1 سال پیش · df6604a734
--- a/.github/workflows/check_no_chinese_comments.py
+++ b/.github/workflows/check_no_chinese_comments.py
@@ -20,7 +20,8 @@ def check_file_for_chinese_comments(file_path):
 
				 def main():
			
 
				     has_chinese = False
			
 
				     excluded_files = ["model_template.py", 'stopwords.py', 'commands.py',
			
 
				-                      'indexing_runner.py', 'web_reader_tool.py', 'spark_provider.py']
			
 
				+                      'indexing_runner.py', 'web_reader_tool.py', 'spark_provider.py',
			
 
				+                      'prompts.py']
			
 
				 
			
 
				     for root, _, files in os.walk("."):
			
 
				         for file in files:
			
--- a/api/core/generator/llm_generator.py
+++ b/api/core/generator/llm_generator.py
@@ -1,3 +1,4 @@
 
				+import json
			
 
				 import logging
			
 
				 
			
 
				 from langchain.schema import OutputParserException
			
@@ -22,18 +23,25 @@ class LLMGenerator:
 
				         if len(query) > 2000:
			
 
				             query = query[:300] + "...[TRUNCATED]..." + query[-300:]
			
 
				 
			
 
				-        prompt = prompt.format(query=query)
			
 
				+        query = query.replace("\n", " ")
			
 
				+
			
 
				+        prompt += query + "\n"
			
 
				 
			
 
				         model_instance = ModelFactory.get_text_generation_model(
			
 
				             tenant_id=tenant_id,
			
 
				             model_kwargs=ModelKwargs(
			
 
				-                max_tokens=50
			
 
				+                temperature=1,
			
 
				+                max_tokens=100
			
 
				             )
			
 
				         )
			
 
				 
			
 
				         prompts = [PromptMessage(content=prompt)]
			
 
				         response = model_instance.run(prompts)
			
 
				         answer = response.content
			
 
				+
			
 
				+        result_dict = json.loads(answer)
			
 
				+        answer = result_dict['Your Output']
			
 
				+
			
 
				         return answer.strip()
			
 
				 
			
 
				     @classmethod
			
--- a/api/core/prompt/prompts.py
+++ b/api/core/prompt/prompts.py
@@ -1,10 +1,64 @@
 
				-CONVERSATION_TITLE_PROMPT = (
			
 
				-    "Human:{query}\n-----\n"
			
 
				-    "Help me summarize the intent of what the human said and provide a title, the title should not exceed 20 words.\n"
			
 
				-    "If what the human said is conducted in English, you should only return an English title.\n" 
			
 
				-    "If what the human said is conducted in Chinese, you should only return a Chinese title.\n"
			
 
				-    "title:"
			
 
				-)
			
 
				+# Written by YORKI MINAKO🤡
			
 
				+CONVERSATION_TITLE_PROMPT = """You need to decompose the user's input into "subject" and "intention" in order to accurately figure out what the user's input language actually is. 
			
 
				+Notice: the language type user using is abundant, can be English, Chinese, Español, Arabic, Japanese, and etc.
			
 
				+MAKE SURE your output is the SAME language as the user's input!
			
 
				+Your output is restricted only to: (Input language) Intention + Subject(short as possible)
			
 
				+
			
 
				+Tip: When the user's question is directed at you (the language model), you can add an emoji to make it more fun.
			
 
				+
			
 
				+
			
 
				+example 1:
			
 
				+User Input: hi, yesterday i had some burgers.
			
 
				+{
			
 
				+  "Language Type": "The user's input is pure English",
			
 
				+  "Your Reasoning": "The language of my output must be pure English.",
			
 
				+  "Your Output": "sharing yesterday's food"
			
 
				+}
			
 
				+
			
 
				+example 2:
			
 
				+User Input: hello
			
 
				+{
			
 
				+  "Language Type": "The user's input is written in pure English",
			
 
				+  "Your Reasoning": "The language of my output must be pure English.",
			
 
				+  "Your Output": "Greeting myself☺️"
			
 
				+}
			
 
				+
			
 
				+
			
 
				+example 3:
			
 
				+User Input: why mmap file: oom
			
 
				+{
			
 
				+  "Language Type": "The user's input is written in pure English",
			
 
				+  "Your Reasoning": "The language of my output must be pure English.",
			
 
				+  "Your Output": "Asking about the reason for mmap file: oom"
			
 
				+}
			
 
				+
			
 
				+
			
 
				+example 4:
			
 
				+User Input: www.convinceme.yesterday-you-ate-seafood.tv讲了什么？
			
 
				+{
			
 
				+  "Language Type": "The user's input English-Chinese mixed",
			
 
				+  "Your Reasoning": "The English-part is an URL, the main intention is still written in Chinese, so the language of my output must be using Chinese.",
			
 
				+  "Your Output": "询问网站www.convinceme.yesterday-you-ate-seafood.tv"
			
 
				+}
			
 
				+
			
 
				+example 5:
			
 
				+User Input: why小红的年龄is老than小明？
			
 
				+{
			
 
				+  "Language Type": "The user's input is English-Chinese mixed",
			
 
				+  "Your Reasoning": "The English parts are subjective particles, the main intention is written in Chinese, besides, Chinese occupies a greater \"actual meaning\" than English, so the language of my output must be using Chinese.",
			
 
				+  "Your Output": "询问小红和小明的年龄"
			
 
				+}
			
 
				+
			
 
				+example 6:
			
 
				+User Input: yo, 你今天咋样？
			
 
				+{
			
 
				+  "Language Type": "The user's input is English-Chinese mixed",
			
 
				+  "Your Reasoning": "The English-part is a subjective particle, the main intention is written in Chinese, so the language of my output must be using Chinese.",
			
 
				+  "Your Output": "查询今日我的状态☺️"
			
 
				+}
			
 
				+
			
 
				+User Input: 
			
 
				+"""
			
 
				 
			
 
				 CONVERSATION_SUMMARY_PROMPT = (
			
 
				     "Please generate a short summary of the following conversation.\n"