Spaces:

markqiu
/

prinvest_mate

Sleeping

App Files Files Community

Tuchuanhuhuhu commited on Nov 25, 2023

Commit

76a432f

1 Parent(s): c9a9fba

feat: 保存更多参数

Browse files

Files changed (2) hide show

modules/models/base_model.py +195 -93
modules/utils.py +165 -66

modules/models/base_model.py CHANGED Viewed

@@ -70,13 +70,13 @@ class CallbackToIterator:
 def get_action_description(text):
-    match = re.search('```(.*?)```', text, re.S)
     json_text = match.group(1)
     # 把json转化为python字典
     json_dict = json.loads(json_text)
     # 提取'action'和'action_input'的值
-    action_name = json_dict['action']
-    action_input = json_dict['action_input']
     if action_name != "Final Answer":
         return f'<!-- S O PREFIX --><p class="agent-prefix">{action_name}: {action_input}\n</p><!-- E O PREFIX -->'
     else:
@@ -84,7 +84,6 @@ def get_action_description(text):
 class ChuanhuCallbackHandler(BaseCallbackHandler):
     def __init__(self, callback) -> None:
         """Initialize callback handler."""
         self.callback = callback
@@ -124,7 +123,12 @@ class ChuanhuCallbackHandler(BaseCallbackHandler):
         """Run on new LLM token. Only available when streaming is enabled."""
         self.callback(token)
-    def on_chat_model_start(self, serialized: Dict[str, Any], messages: List[List[BaseMessage]],  **kwargs: Any) -> Any:
         """Run when a chat model starts running."""
         pass
@@ -228,24 +232,26 @@ class BaseLLMModel:
         self.need_api_key = False
         self.single_turn = False
         self.history_file_path = get_first_history_name(user)
         self.temperature = temperature
         self.top_p = top_p
         self.n_choices = n_choices
         self.stop_sequence = stop
-        self.max_generation_token = None
         self.presence_penalty = presence_penalty
         self.frequency_penalty = frequency_penalty
         self.logit_bias = logit_bias
         self.user_identifier = user
     def get_answer_stream_iter(self):
-        """stream predict, need to be implemented
-        conversations are stored in self.history, with the most recent question, in OpenAI format
-        should return a generator, each time give the next word (str) in the answer
         """
-        logging.warning(
-            "stream predict not implemented, using at once predict instead")
         response, _ = self.get_answer_at_once()
         yield response
@@ -256,8 +262,7 @@ class BaseLLMModel:
         the answer (str)
         total token count (int)
         """
-        logging.warning(
-            "at once predict not implemented, using stream predict instead")
         response_iter = self.get_answer_stream_iter()
         count = 0
         for response in response_iter:
@@ -291,7 +296,9 @@ class BaseLLMModel:
         stream_iter = self.get_answer_stream_iter()
         if display_append:
-            display_append = '\n\n<hr class="append-display no-in-raw" />' + display_append
         partial_text = ""
         token_increment = 1
         for partial_text in stream_iter:
@@ -322,11 +329,9 @@ class BaseLLMModel:
             self.history[-2] = construct_user(fake_input)
         chatbot[-1] = (chatbot[-1][0], ai_reply + display_append)
         if fake_input is not None:
-            self.all_token_counts[-1] += count_token(
-                construct_assistant(ai_reply))
         else:
-            self.all_token_counts[-1] = total_token_count - \
-                sum(self.all_token_counts)
         status_text = self.token_message()
         return chatbot, status_text
@@ -349,46 +354,80 @@ class BaseLLMModel:
             from langchain.prompts import PromptTemplate
             from langchain.chat_models import ChatOpenAI
             from langchain.callbacks import StdOutCallbackHandler
-            prompt_template = "Write a concise summary of the following:\n\n{text}\n\nCONCISE SUMMARY IN " + language + ":"
-            PROMPT = PromptTemplate(
-                template=prompt_template, input_variables=["text"])
             llm = ChatOpenAI()
             chain = load_summarize_chain(
-                llm, chain_type="map_reduce", return_intermediate_steps=True, map_prompt=PROMPT, combine_prompt=PROMPT)
-            summary = chain({"input_documents": list(index.docstore.__dict__[
-                            "_dict"].values())}, return_only_outputs=True)["output_text"]
             print(i18n("总结") + f": {summary}")
-            chatbot.append([i18n("上传了")+str(len(files))+"个文件", summary])
         return chatbot, status
-    def prepare_inputs(self, real_inputs, use_websearch, files, reply_language, chatbot, load_from_cache_if_possible=True):
         display_append = []
         limited_context = False
         if type(real_inputs) == list:
-            fake_inputs = real_inputs[0]['text']
         else:
             fake_inputs = real_inputs
         if files:
             from langchain.embeddings.huggingface import HuggingFaceEmbeddings
             from langchain.vectorstores.base import VectorStoreRetriever
             limited_context = True
             msg = "加载索引中……"
             logging.info(msg)
-            index = construct_index(self.api_key, file_src=files, load_from_cache_if_possible=load_from_cache_if_possible)
             assert index is not None, "获取索引失败"
             msg = "索引获取成功，生成回答中……"
             logging.info(msg)
             with retrieve_proxy():
-                retriever = VectorStoreRetriever(vectorstore=index, search_type="similarity", search_kwargs={"k": 6})
                 # retriever = VectorStoreRetriever(vectorstore=index, search_type="similarity_score_threshold", search_kwargs={
                 #                                  "k": 6, "score_threshold": 0.2})
                 try:
-                    relevant_documents = retriever.get_relevant_documents(
-                        fake_inputs)
                 except AssertionError:
-                    return self.prepare_inputs(fake_inputs, use_websearch, files, reply_language, chatbot, load_from_cache_if_possible=False)
-            reference_results = [[d.page_content.strip("�"), os.path.basename(
-                d.metadata["source"])] for d in relevant_documents]
             reference_results = add_source_numbers(reference_results)
             display_append = add_details(reference_results)
             display_append = "\n\n" + "".join(display_append)
@@ -415,16 +454,17 @@ class BaseLLMModel:
             reference_results = []
             for idx, result in enumerate(search_results):
                 logging.debug(f"搜索结果{idx + 1}：{result}")
-                domain_name = urllib3.util.parse_url(result['href']).host
-                reference_results.append([result['body'], result['href']])
                 display_append.append(
                     # f"{idx+1}. [{domain_name}]({result['href']})\n"
                     f"<a href=\"{result['href']}\" target=\"_blank\">{idx+1}.&nbsp;{result['title']}</a>"
                 )
             reference_results = add_source_numbers(reference_results)
             # display_append = "<ol>\n\n" + "".join(display_append) + "</ol>"
-            display_append = '<div class = "source-a">' + \
-                "".join(display_append) + '</div>'
             if type(real_inputs) == list:
                 real_inputs[0]["text"] = (
                     replace_today(WEBSEARCH_PTOMPT_TEMPLATE)
@@ -453,33 +493,54 @@ class BaseLLMModel:
         reply_language="中文",
         should_check_token_count=True,
     ):  # repetition_penalty, top_k
         status_text = "开始生成回答……"
         if type(inputs) == list:
-                logging.info(
-                "用户" + f"{self.user_identifier}" + "的输入为：" +
-                colorama.Fore.BLUE + "(" + str(len(inputs)-1) + " images) " + f"{inputs[0]['text']}" + colorama.Style.RESET_ALL
             )
         else:
             logging.info(
-                "用户" + f"{self.user_identifier}" + "的输入为：" +
-                colorama.Fore.BLUE + f"{inputs}" + colorama.Style.RESET_ALL
             )
         if should_check_token_count:
             if type(inputs) == list:
-                 yield chatbot + [(inputs[0]['text'], "")], status_text
             else:
                 yield chatbot + [(inputs, "")], status_text
         if reply_language == "跟随问题语言（不稳定）":
             reply_language = "the same language as the question, such as English, 中文, 日本語, Español, Français, or Deutsch."
-        limited_context, fake_inputs, display_append, inputs, chatbot = self.prepare_inputs(
-            real_inputs=inputs, use_websearch=use_websearch, files=files, reply_language=reply_language, chatbot=chatbot)
         yield chatbot + [(fake_inputs, "")], status_text
         if (
-            self.need_api_key and
-            self.api_key is None
             and not shared.state.multi_api_key
         ):
             status_text = STANDARD_ERROR_MSG + NO_APIKEY_MSG
@@ -684,11 +745,16 @@ class BaseLLMModel:
         self.history = []
         self.all_token_counts = []
         self.interrupted = False
-        self.history_file_path = new_auto_history_filename(self.user_identifier)
         history_name = self.history_file_path[:-5]
-        choices = [history_name] + get_history_names(self.user_identifier)
         system_prompt = self.system_prompt if remain_system_prompt else ""
-        return [], self.token_message([0]), gr.Radio.update(choices=choices, value=history_name), system_prompt
     def delete_first_conversation(self):
         if self.history:
@@ -719,7 +785,12 @@ class BaseLLMModel:
         token_sum = 0
         for i in range(len(token_lst)):
             token_sum += sum(token_lst[: i + 1])
-        return i18n("Token 计数: ") + f"{sum(token_lst)}" + i18n("，本次对话累计消耗了 ") + f"{token_sum} tokens"
     def rename_chat_history(self, filename, chatbot):
         if filename == "":
@@ -729,78 +800,103 @@ class BaseLLMModel:
         self.delete_chat_history(self.history_file_path)
         # 命名重复检测
         repeat_file_index = 2
-        full_path = os.path.join(HISTORY_DIR, self.user_identifier, filename)
         while os.path.exists(full_path):
-            full_path = os.path.join(HISTORY_DIR, self.user_identifier, f"{repeat_file_index}_{filename}")
             repeat_file_index += 1
         filename = os.path.basename(full_path)
         self.history_file_path = filename
-        save_file(filename, self.system_prompt, self.history, chatbot, self.user_identifier)
-        return init_history_list(self.user_identifier)
-    def auto_name_chat_history(self, name_chat_method, user_question, chatbot, single_turn_checkbox):
         if len(self.history) == 2 and not single_turn_checkbox:
             user_question = self.history[0]["content"]
             if type(user_question) == list:
                 user_question = user_question[0]["text"]
             filename = replace_special_symbols(user_question)[:16] + ".json"
-            return self.rename_chat_history(filename, chatbot, self.user_identifier)
         else:
             return gr.update()
     def auto_save(self, chatbot):
-        save_file(self.history_file_path, self.system_prompt,
-                  self.history, chatbot, self.user_identifier)
     def export_markdown(self, filename, chatbot):
         if filename == "":
             return
         if not filename.endswith(".md"):
             filename += ".md"
-        save_file(filename, self.system_prompt, self.history, chatbot, self.user_identifier)
     def load_chat_history(self, new_history_file_path=None):
-        logging.debug(f"{self.user_identifier} 加载对话历史中……")
         if new_history_file_path is not None:
             if type(new_history_file_path) != str:
-                # copy file from new_history_file_path.name to os.path.join(HISTORY_DIR, self.user_identifier)
                 new_history_file_path = new_history_file_path.name
-                shutil.copyfile(new_history_file_path, os.path.join(
-                    HISTORY_DIR, self.user_identifier, os.path.basename(new_history_file_path)))
                 self.history_file_path = os.path.basename(new_history_file_path)
             else:
                 self.history_file_path = new_history_file_path
         try:
             if self.history_file_path == os.path.basename(self.history_file_path):
                 history_file_path = os.path.join(
-                    HISTORY_DIR, self.user_identifier, self.history_file_path)
             else:
                 history_file_path = self.history_file_path
             if not self.history_file_path.endswith(".json"):
                 history_file_path += ".json"
             with open(history_file_path, "r", encoding="utf-8") as f:
-                json_s = json.load(f)
             try:
-                if type(json_s["history"][0]) == str:
                     logging.info("历史记录格式为旧版，正在转换……")
                     new_history = []
-                    for index, item in enumerate(json_s["history"]):
                         if index % 2 == 0:
                             new_history.append(construct_user(item))
                         else:
                             new_history.append(construct_assistant(item))
-                    json_s["history"] = new_history
                     logging.info(new_history)
             except:
                 pass
-            if len(json_s["chatbot"]) < len(json_s["history"])//2:
                 logging.info("Trimming corrupted history...")
-                json_s["history"] = json_s["history"][-len(json_s["chatbot"]):]
-                logging.info(f"Trimmed history: {json_s['history']}")
-            logging.debug(f"{self.user_identifier} 加载对话历史完毕")
-            self.history = json_s["history"]
-            return os.path.basename(self.history_file_path), json_s["system"], json_s["chatbot"]
         except:
             # 没有对话历史或者对话历史解析失败
             logging.info(f"没有找到对话历史记录 {self.history_file_path}")
@@ -814,23 +910,28 @@ class BaseLLMModel:
         if not filename.endswith(".json"):
             filename += ".json"
         if filename == os.path.basename(filename):
-            history_file_path = os.path.join(HISTORY_DIR, self.user_identifier, filename)
         else:
             history_file_path = filename
         md_history_file_path = history_file_path[:-5] + ".md"
         try:
             os.remove(history_file_path)
             os.remove(md_history_file_path)
-            return i18n("删除对话历史成功"), get_history_list(self.user_identifier), []
         except:
             logging.info(f"删除对话历史失败 {history_file_path}")
-            return i18n("对话历史")+filename+i18n("已经被删除啦"), get_history_list(self.user_identifier), []
     def auto_load(self):
-        filepath = get_history_filepath(self.user_identifier)
         if not filepath:
-            self.history_file_path = new_auto_history_filename(
-                self.user_identifier)
         else:
             self.history_file_path = filepath
         filename, system_prompt, chatbot = self.load_chat_history()
@@ -838,18 +939,15 @@ class BaseLLMModel:
         return filename, system_prompt, chatbot
     def like(self):
-        """like the last response, implement if needed
-        """
         return gr.update()
     def dislike(self):
-        """dislike the last response, implement if needed
-        """
         return gr.update()
     def deinitialize(self):
-        """deinitialize the model, implement if needed
-        """
         pass
@@ -874,7 +972,8 @@ class Base_Chat_Langchain_Client(BaseLLMModel):
     def get_answer_at_once(self):
         assert isinstance(
-            self.model, BaseChatModel), "model is not instance of LangChain BaseChatModel"
         history = self._get_langchain_style_history()
         response = self.model.generate(history)
         return response.content, sum(response.content)
@@ -882,13 +981,16 @@ class Base_Chat_Langchain_Client(BaseLLMModel):
     def get_answer_stream_iter(self):
         it = CallbackToIterator()
         assert isinstance(
-            self.model, BaseChatModel), "model is not instance of LangChain BaseChatModel"
         history = self._get_langchain_style_history()
         def thread_func():
-            self.model(messages=history, callbacks=[
-                ChuanhuCallbackHandler(it.callback)])
             it.finish()
         t = Thread(target=thread_func)
         t.start()
         partial_text = ""

 def get_action_description(text):
+    match = re.search("```(.*?)```", text, re.S)
     json_text = match.group(1)
     # 把json转化为python字典
     json_dict = json.loads(json_text)
     # 提取'action'和'action_input'的值
+    action_name = json_dict["action"]
+    action_input = json_dict["action_input"]
     if action_name != "Final Answer":
         return f'<!-- S O PREFIX --><p class="agent-prefix">{action_name}: {action_input}\n</p><!-- E O PREFIX -->'
     else:
 class ChuanhuCallbackHandler(BaseCallbackHandler):
     def __init__(self, callback) -> None:
         """Initialize callback handler."""
         self.callback = callback
         """Run on new LLM token. Only available when streaming is enabled."""
         self.callback(token)
+    def on_chat_model_start(
+        self,
+        serialized: Dict[str, Any],
+        messages: List[List[BaseMessage]],
+        **kwargs: Any,
+    ) -> Any:
         """Run when a chat model starts running."""
         pass
         self.need_api_key = False
         self.single_turn = False
         self.history_file_path = get_first_history_name(user)
+        self.user_name = user
         self.temperature = temperature
         self.top_p = top_p
         self.n_choices = n_choices
         self.stop_sequence = stop
+        self.max_generation_token = max_generation_token
         self.presence_penalty = presence_penalty
         self.frequency_penalty = frequency_penalty
         self.logit_bias = logit_bias
         self.user_identifier = user
+        self.metadata = {}
     def get_answer_stream_iter(self):
+        """Implement stream prediction.
+        Conversations are stored in self.history, with the most recent question in OpenAI format.
+        Should return a generator that yields the next word (str) in the answer.
         """
+        logging.warning("Stream prediction is not implemented. Using at once prediction instead.")
         response, _ = self.get_answer_at_once()
         yield response
         the answer (str)
         total token count (int)
         """
+        logging.warning("at once predict not implemented, using stream predict instead")
         response_iter = self.get_answer_stream_iter()
         count = 0
         for response in response_iter:
         stream_iter = self.get_answer_stream_iter()
         if display_append:
+            display_append = (
+                '\n\n<hr class="append-display no-in-raw" />' + display_append
+            )
         partial_text = ""
         token_increment = 1
         for partial_text in stream_iter:
             self.history[-2] = construct_user(fake_input)
         chatbot[-1] = (chatbot[-1][0], ai_reply + display_append)
         if fake_input is not None:
+            self.all_token_counts[-1] += count_token(construct_assistant(ai_reply))
         else:
+            self.all_token_counts[-1] = total_token_count - sum(self.all_token_counts)
         status_text = self.token_message()
         return chatbot, status_text
             from langchain.prompts import PromptTemplate
             from langchain.chat_models import ChatOpenAI
             from langchain.callbacks import StdOutCallbackHandler
+            prompt_template = (
+                "Write a concise summary of the following:\n\n{text}\n\nCONCISE SUMMARY IN "
+                + language
+                + ":"
+            )
+            PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
             llm = ChatOpenAI()
             chain = load_summarize_chain(
+                llm,
+                chain_type="map_reduce",
+                return_intermediate_steps=True,
+                map_prompt=PROMPT,
+                combine_prompt=PROMPT,
+            )
+            summary = chain(
+                {"input_documents": list(index.docstore.__dict__["_dict"].values())},
+                return_only_outputs=True,
+            )["output_text"]
             print(i18n("总结") + f": {summary}")
+            chatbot.append([i18n("上传了") + str(len(files)) + "个文件", summary])
         return chatbot, status
+    def prepare_inputs(
+        self,
+        real_inputs,
+        use_websearch,
+        files,
+        reply_language,
+        chatbot,
+        load_from_cache_if_possible=True,
+    ):
         display_append = []
         limited_context = False
         if type(real_inputs) == list:
+            fake_inputs = real_inputs[0]["text"]
         else:
             fake_inputs = real_inputs
         if files:
             from langchain.embeddings.huggingface import HuggingFaceEmbeddings
             from langchain.vectorstores.base import VectorStoreRetriever
             limited_context = True
             msg = "加载索引中……"
             logging.info(msg)
+            index = construct_index(
+                self.api_key,
+                file_src=files,
+                load_from_cache_if_possible=load_from_cache_if_possible,
+            )
             assert index is not None, "获取索引失败"
             msg = "索引获取成功，生成回答中……"
             logging.info(msg)
             with retrieve_proxy():
+                retriever = VectorStoreRetriever(
+                    vectorstore=index, search_type="similarity", search_kwargs={"k": 6}
+                )
                 # retriever = VectorStoreRetriever(vectorstore=index, search_type="similarity_score_threshold", search_kwargs={
                 #                                  "k": 6, "score_threshold": 0.2})
                 try:
+                    relevant_documents = retriever.get_relevant_documents(fake_inputs)
                 except AssertionError:
+                    return self.prepare_inputs(
+                        fake_inputs,
+                        use_websearch,
+                        files,
+                        reply_language,
+                        chatbot,
+                        load_from_cache_if_possible=False,
+                    )
+            reference_results = [
+                [d.page_content.strip("�"), os.path.basename(d.metadata["source"])]
+                for d in relevant_documents
+            ]
             reference_results = add_source_numbers(reference_results)
             display_append = add_details(reference_results)
             display_append = "\n\n" + "".join(display_append)
             reference_results = []
             for idx, result in enumerate(search_results):
                 logging.debug(f"搜索结果{idx + 1}：{result}")
+                domain_name = urllib3.util.parse_url(result["href"]).host
+                reference_results.append([result["body"], result["href"]])
                 display_append.append(
                     # f"{idx+1}. [{domain_name}]({result['href']})\n"
                     f"<a href=\"{result['href']}\" target=\"_blank\">{idx+1}.&nbsp;{result['title']}</a>"
                 )
             reference_results = add_source_numbers(reference_results)
             # display_append = "<ol>\n\n" + "".join(display_append) + "</ol>"
+            display_append = (
+                '<div class = "source-a">' + "".join(display_append) + "</div>"
+            )
             if type(real_inputs) == list:
                 real_inputs[0]["text"] = (
                     replace_today(WEBSEARCH_PTOMPT_TEMPLATE)
         reply_language="中文",
         should_check_token_count=True,
     ):  # repetition_penalty, top_k
         status_text = "开始生成回答……"
         if type(inputs) == list:
+            logging.info(
+                "用户"
+                + f"{self.user_name}"
+                + "的输入为："
+                + colorama.Fore.BLUE
+                + "("
+                + str(len(inputs) - 1)
+                + " images) "
+                + f"{inputs[0]['text']}"
+                + colorama.Style.RESET_ALL
             )
         else:
             logging.info(
+                "用户"
+                + f"{self.user_name}"
+                + "的输入为："
+                + colorama.Fore.BLUE
+                + f"{inputs}"
+                + colorama.Style.RESET_ALL
             )
         if should_check_token_count:
             if type(inputs) == list:
+                yield chatbot + [(inputs[0]["text"], "")], status_text
             else:
                 yield chatbot + [(inputs, "")], status_text
         if reply_language == "跟随问题语言（不稳定）":
             reply_language = "the same language as the question, such as English, 中文, 日本語, Español, Français, or Deutsch."
+        (
+            limited_context,
+            fake_inputs,
+            display_append,
+            inputs,
+            chatbot,
+        ) = self.prepare_inputs(
+            real_inputs=inputs,
+            use_websearch=use_websearch,
+            files=files,
+            reply_language=reply_language,
+            chatbot=chatbot,
+        )
         yield chatbot + [(fake_inputs, "")], status_text
         if (
+            self.need_api_key
+            and self.api_key is None
             and not shared.state.multi_api_key
         ):
             status_text = STANDARD_ERROR_MSG + NO_APIKEY_MSG
         self.history = []
         self.all_token_counts = []
         self.interrupted = False
+        self.history_file_path = new_auto_history_filename(self.user_name)
         history_name = self.history_file_path[:-5]
+        choices = [history_name] + get_history_names(self.user_name)
         system_prompt = self.system_prompt if remain_system_prompt else ""
+        return (
+            [],
+            self.token_message([0]),
+            gr.Radio.update(choices=choices, value=history_name),
+            system_prompt,
+        )
     def delete_first_conversation(self):
         if self.history:
         token_sum = 0
         for i in range(len(token_lst)):
             token_sum += sum(token_lst[: i + 1])
+        return (
+            i18n("Token 计数: ")
+            + f"{sum(token_lst)}"
+            + i18n("，本次对话累计消耗了 ")
+            + f"{token_sum} tokens"
+        )
     def rename_chat_history(self, filename, chatbot):
         if filename == "":
         self.delete_chat_history(self.history_file_path)
         # 命名重复检测
         repeat_file_index = 2
+        full_path = os.path.join(HISTORY_DIR, self.user_name, filename)
         while os.path.exists(full_path):
+            full_path = os.path.join(
+                HISTORY_DIR, self.user_name, f"{repeat_file_index}_{filename}"
+            )
             repeat_file_index += 1
         filename = os.path.basename(full_path)
         self.history_file_path = filename
+        save_file(filename, self, chatbot)
+        return init_history_list(self.user_name)
+    def auto_name_chat_history(
+        self, name_chat_method, user_question, chatbot, single_turn_checkbox
+    ):
         if len(self.history) == 2 and not single_turn_checkbox:
             user_question = self.history[0]["content"]
             if type(user_question) == list:
                 user_question = user_question[0]["text"]
             filename = replace_special_symbols(user_question)[:16] + ".json"
+            return self.rename_chat_history(filename, chatbot, self.user_name)
         else:
             return gr.update()
     def auto_save(self, chatbot):
+        save_file(self.history_file_path, self, chatbot)
     def export_markdown(self, filename, chatbot):
         if filename == "":
             return
         if not filename.endswith(".md"):
             filename += ".md"
+        save_file(filename, self, chatbot)
     def load_chat_history(self, new_history_file_path=None):
+        logging.debug(f"{self.user_name} 加载对话历史中……")
         if new_history_file_path is not None:
             if type(new_history_file_path) != str:
+                # copy file from new_history_file_path.name to os.path.join(HISTORY_DIR, self.user_name)
                 new_history_file_path = new_history_file_path.name
+                shutil.copyfile(
+                    new_history_file_path,
+                    os.path.join(
+                        HISTORY_DIR,
+                        self.user_name,
+                        os.path.basename(new_history_file_path),
+                    ),
+                )
                 self.history_file_path = os.path.basename(new_history_file_path)
             else:
                 self.history_file_path = new_history_file_path
         try:
             if self.history_file_path == os.path.basename(self.history_file_path):
                 history_file_path = os.path.join(
+                    HISTORY_DIR, self.user_name, self.history_file_path
+                )
             else:
                 history_file_path = self.history_file_path
             if not self.history_file_path.endswith(".json"):
                 history_file_path += ".json"
             with open(history_file_path, "r", encoding="utf-8") as f:
+                saved_json = json.load(f)
             try:
+                if type(saved_json["history"][0]) == str:
                     logging.info("历史记录格式为旧版，正在转换……")
                     new_history = []
+                    for index, item in enumerate(saved_json["history"]):
                         if index % 2 == 0:
                             new_history.append(construct_user(item))
                         else:
                             new_history.append(construct_assistant(item))
+                    saved_json["history"] = new_history
                     logging.info(new_history)
             except:
                 pass
+            if len(saved_json["chatbot"]) < len(saved_json["history"]) // 2:
                 logging.info("Trimming corrupted history...")
+                saved_json["history"] = saved_json["history"][-len(saved_json["chatbot"]) :]
+                logging.info(f"Trimmed history: {saved_json['history']}")
+            logging.debug(f"{self.user_name} 加载对话历史完毕")
+            self.history = saved_json["history"]
+            self.single_turn = saved_json.get("single_turn", False)
+            self.temperature = saved_json.get("temperature", 1.0)
+            self.top_p = saved_json.get("top_p", None)
+            self.n_choices = saved_json.get("n_choices", 1)
+            self.stop_sequence = saved_json.get("stop_sequence", None)
+            self.max_generation_token = saved_json.get("max_generation_token", None)
+            self.presence_penalty = saved_json.get("presence_penalty", 0)
+            self.frequency_penalty = saved_json.get("frequency_penalty", 0)
+            self.logit_bias = saved_json.get("logit_bias", None)
+            self.user_identifier = saved_json.get("user_identifier", self.user_name)
+            self.metadata = saved_json.get("metadata", {})
+            return (
+                os.path.basename(self.history_file_path),
+                saved_json["system"],
+                saved_json["chatbot"],
+            )
         except:
             # 没有对话历史或者对话历史解析失败
             logging.info(f"没有找到对话历史记录 {self.history_file_path}")
         if not filename.endswith(".json"):
             filename += ".json"
         if filename == os.path.basename(filename):
+            history_file_path = os.path.join(
+                HISTORY_DIR, self.user_name, filename
+            )
         else:
             history_file_path = filename
         md_history_file_path = history_file_path[:-5] + ".md"
         try:
             os.remove(history_file_path)
             os.remove(md_history_file_path)
+            return i18n("删除对话历史成功"), get_history_list(self.user_name), []
         except:
             logging.info(f"删除对话历史失败 {history_file_path}")
+            return (
+                i18n("对话历史") + filename + i18n("已经被删除啦"),
+                get_history_list(self.user_name),
+                [],
+            )
     def auto_load(self):
+        filepath = get_history_filepath(self.user_name)
         if not filepath:
+            self.history_file_path = new_auto_history_filename(self.user_name)
         else:
             self.history_file_path = filepath
         filename, system_prompt, chatbot = self.load_chat_history()
         return filename, system_prompt, chatbot
     def like(self):
+        """like the last response, implement if needed"""
         return gr.update()
     def dislike(self):
+        """dislike the last response, implement if needed"""
         return gr.update()
     def deinitialize(self):
+        """deinitialize the model, implement if needed"""
         pass
     def get_answer_at_once(self):
         assert isinstance(
+            self.model, BaseChatModel
+        ), "model is not instance of LangChain BaseChatModel"
         history = self._get_langchain_style_history()
         response = self.model.generate(history)
         return response.content, sum(response.content)
     def get_answer_stream_iter(self):
         it = CallbackToIterator()
         assert isinstance(
+            self.model, BaseChatModel
+        ), "model is not instance of LangChain BaseChatModel"
         history = self._get_langchain_style_history()
         def thread_func():
+            self.model(
+                messages=history, callbacks=[ChuanhuCallbackHandler(it.callback)]
+            )
             it.finish()
         t = Thread(target=thread_func)
         t.start()
         partial_text = ""

modules/utils.py CHANGED Viewed

@@ -31,97 +31,127 @@ if TYPE_CHECKING:
         headers: List[str]
         data: List[List[str | int | bool]]
 def predict(current_model, *args):
     iter = current_model.predict(*args)
     for i in iter:
         yield i
 def billing_info(current_model):
     return current_model.billing_info()
 def set_key(current_model, *args):
     return current_model.set_key(*args)
 def load_chat_history(current_model, *args):
     return current_model.load_chat_history(*args)
 def delete_chat_history(current_model, *args):
     return current_model.delete_chat_history(*args)
 def interrupt(current_model, *args):
     return current_model.interrupt(*args)
 def reset(current_model, *args):
     return current_model.reset(*args)
 def retry(current_model, *args):
     iter = current_model.retry(*args)
     for i in iter:
         yield i
 def delete_first_conversation(current_model, *args):
     return current_model.delete_first_conversation(*args)
 def delete_last_conversation(current_model, *args):
     return current_model.delete_last_conversation(*args)
 def set_system_prompt(current_model, *args):
     return current_model.set_system_prompt(*args)
 def rename_chat_history(current_model, *args):
     return current_model.rename_chat_history(*args)
 def auto_name_chat_history(current_model, *args):
     return current_model.auto_name_chat_history(*args)
 def export_markdown(current_model, *args):
     return current_model.export_markdown(*args)
 def upload_chat_history(current_model, *args):
     return current_model.load_chat_history(*args)
 def set_token_upper_limit(current_model, *args):
     return current_model.set_token_upper_limit(*args)
 def set_temperature(current_model, *args):
     current_model.set_temperature(*args)
 def set_top_p(current_model, *args):
     current_model.set_top_p(*args)
 def set_n_choices(current_model, *args):
     current_model.set_n_choices(*args)
 def set_stop_sequence(current_model, *args):
     current_model.set_stop_sequence(*args)
 def set_max_tokens(current_model, *args):
     current_model.set_max_tokens(*args)
 def set_presence_penalty(current_model, *args):
     current_model.set_presence_penalty(*args)
 def set_frequency_penalty(current_model, *args):
     current_model.set_frequency_penalty(*args)
 def set_logit_bias(current_model, *args):
     current_model.set_logit_bias(*args)
 def set_user_identifier(current_model, *args):
     current_model.set_user_identifier(*args)
 def set_single_turn(current_model, *args):
     current_model.set_single_turn(*args)
 def handle_file_upload(current_model, *args):
     return current_model.handle_file_upload(*args)
 def handle_summarize_index(current_model, *args):
     return current_model.summarize_index(*args)
 def like(current_model, *args):
     return current_model.like(*args)
 def dislike(current_model, *args):
     return current_model.dislike(*args)
@@ -134,7 +164,7 @@ def count_token(input_str):
     return length
-def markdown_to_html_with_syntax_highlight(md_str): # deprecated
     def replacer(match):
         lang = match.group(1) or "text"
         code = match.group(2)
@@ -156,7 +186,7 @@ def markdown_to_html_with_syntax_highlight(md_str): # deprecated
     return html_str
-def normalize_markdown(md_text: str) -> str: # deprecated
     lines = md_text.split("\n")
     normalized_lines = []
     inside_list = False
@@ -180,7 +210,7 @@ def normalize_markdown(md_text: str) -> str: # deprecated
     return "\n".join(normalized_lines)
-def convert_mdtext(md_text): # deprecated
     code_block_pattern = re.compile(r"```(.*?)(?:```|$)", re.DOTALL)
     inline_code_pattern = re.compile(r"`(.*?)`", re.DOTALL)
     code_blocks = code_block_pattern.findall(md_text)
@@ -209,16 +239,22 @@ def clip_rawtext(chat_message, need_escape=True):
     # first, clip hr line
     hr_pattern = r'\n\n<hr class="append-display no-in-raw" />(.*?)'
     hr_match = re.search(hr_pattern, chat_message, re.DOTALL)
-    message_clipped = chat_message[:hr_match.start()] if hr_match else chat_message
     # second, avoid agent-prefix being escaped
-    agent_prefix_pattern = r'(<!-- S O PREFIX --><p class="agent-prefix">.*?<\/p><!-- E O PREFIX -->)'
     # agent_matches = re.findall(agent_prefix_pattern, message_clipped)
     agent_parts = re.split(agent_prefix_pattern, message_clipped, flags=re.DOTALL)
     final_message = ""
     for i, part in enumerate(agent_parts):
         if i % 2 == 0:
             if part != "" and part != "\n":
-                final_message += f'<pre class="fake-pre">{escape_markdown(part)}</pre>' if need_escape else f'<pre class="fake-pre">{part}</pre>'
         else:
             final_message += part
     return final_message
@@ -248,51 +284,53 @@ def convert_bot_before_marked(chat_message):
         md = f'<div class="md-message">\n\n{result}\n</div>'
         return raw + md
 def convert_user_before_marked(chat_message):
     if '<div class="user-message">' in chat_message:
         return chat_message
     else:
         return f'<div class="user-message">{escape_markdown(chat_message)}</div>'
 def escape_markdown(text):
     """
     Escape Markdown special characters to HTML-safe equivalents.
     """
     escape_chars = {
         # ' ': '&nbsp;',
-        '_': '&#95;',
-        '*': '&#42;',
-        '[': '&#91;',
-        ']': '&#93;',
-        '(': '&#40;',
-        ')': '&#41;',
-        '{': '&#123;',
-        '}': '&#125;',
-        '#': '&#35;',
-        '+': '&#43;',
-        '-': '&#45;',
-        '.': '&#46;',
-        '!': '&#33;',
-        '`': '&#96;',
-        '>': '&#62;',
-        '<': '&#60;',
-        '|': '&#124;',
-        '$': '&#36;',
-        ':': '&#58;',
-        '\n': '<br>',
     }
-    text = text.replace('    ', '&nbsp;&nbsp;&nbsp;&nbsp;')
-    return ''.join(escape_chars.get(c, c) for c in text)
-def convert_asis(userinput): # deprecated
     return (
         f'<p style="white-space:pre-wrap;">{html.escape(userinput)}</p>'
         + ALREADY_CONVERTED_MARK
     )
-def detect_converted_mark(userinput): # deprecated
     try:
         if userinput.endswith(ALREADY_CONVERTED_MARK):
             return True
@@ -302,7 +340,7 @@ def detect_converted_mark(userinput): # deprecated
         return True
-def detect_language(code): # deprecated
     if code.startswith("\n"):
         first_line = ""
     else:
@@ -328,7 +366,10 @@ def construct_assistant(text):
     return construct_text("assistant", text)
-def save_file(filename, system, history, chatbot, user_name):
     os.makedirs(os.path.join(HISTORY_DIR, user_name), exist_ok=True)
     if filename is None:
         filename = new_auto_history_filename(user_name)
@@ -339,22 +380,38 @@ def save_file(filename, system, history, chatbot, user_name):
     if filename == ".json":
         raise Exception("文件名不能为空")
-    json_s = {"system": system, "history": history, "chatbot": chatbot}
-    repeat_file_index = 2
     if not filename == os.path.basename(filename):
         history_file_path = filename
     else:
         history_file_path = os.path.join(HISTORY_DIR, user_name, filename)
-    with open(history_file_path, "w", encoding='utf-8') as f:
-        json.dump(json_s, f, ensure_ascii=False)
     filename = os.path.basename(filename)
     filename_md = filename[:-5] + ".md"
     md_s = f"system: \n- {system} \n"
     for data in history:
         md_s += f"\n{data['role']}: \n- {data['content']} \n"
-    with open(os.path.join(HISTORY_DIR, user_name, filename_md), "w", encoding="utf8") as f:
         f.write(md_s)
     return os.path.join(HISTORY_DIR, user_name, filename)
@@ -362,8 +419,12 @@ def save_file(filename, system, history, chatbot, user_name):
 def sorted_by_pinyin(list):
     return sorted(list, key=lambda char: lazy_pinyin(char)[0][0])
 def sorted_by_last_modified_time(list, dir):
-    return sorted(list, key=lambda char: os.path.getctime(os.path.join(dir, char)), reverse=True)
 def get_file_names_by_type(dir, filetypes=[".json"]):
     logging.debug(f"获取文件名列表，目录为{dir}，文件类型为{filetypes}")
@@ -373,6 +434,7 @@ def get_file_names_by_type(dir, filetypes=[".json"]):
     logging.debug(f"files are:{files}")
     return files
 def get_file_names_by_pinyin(dir, filetypes=[".json"]):
     files = get_file_names_by_type(dir, filetypes)
     if files != [""]:
@@ -380,10 +442,12 @@ def get_file_names_by_pinyin(dir, filetypes=[".json"]):
     logging.debug(f"files are:{files}")
     return files
 def get_file_names_dropdown_by_pinyin(dir, filetypes=[".json"]):
     files = get_file_names_by_pinyin(dir, filetypes)
     return gr.Dropdown.update(choices=files)
 def get_file_names_by_last_modified_time(dir, filetypes=[".json"]):
     files = get_file_names_by_type(dir, filetypes)
     if files != [""]:
@@ -397,21 +461,29 @@ def get_history_names(user_name=""):
     if user_name == "" and hide_history_when_not_logged_in:
         return []
     else:
-        history_files = get_file_names_by_last_modified_time(os.path.join(HISTORY_DIR, user_name))
-        history_files = [f[:f.rfind(".")] for f in history_files]
         return history_files
 def get_first_history_name(user_name=""):
     history_names = get_history_names(user_name)
     return history_names[0] if history_names else None
 def get_history_list(user_name=""):
     history_names = get_history_names(user_name)
     return gr.Radio.update(choices=history_names)
 def init_history_list(user_name=""):
     history_names = get_history_names(user_name)
-    return gr.Radio.update(choices=history_names, value=history_names[0] if history_names else "")
 def filter_history(user_name, keyword):
     history_names = get_history_names(user_name)
@@ -421,6 +493,7 @@ def filter_history(user_name, keyword):
     except:
         return gr.update(choices=history_names)
 def load_template(filename, mode=0):
     logging.debug(f"加载模板文件{filename}，模式为{mode}（0为返回字典和下拉菜单，1为返回下拉菜单，2为返回字典）")
     lines = []
@@ -441,15 +514,14 @@ def load_template(filename, mode=0):
         return {row[0]: row[1] for row in lines}
     else:
         choices = sorted_by_pinyin([row[0] for row in lines])
-        return {row[0]: row[1] for row in lines}, gr.Dropdown.update(
-            choices=choices
-        )
 def get_template_names():
     logging.debug("获取模板文件名列表")
     return get_file_names_by_pinyin(TEMPLATES_DIR, filetypes=[".csv", "json"])
 def get_template_dropdown():
     logging.debug("获取模板下拉菜单")
     template_names = get_template_names()
@@ -524,9 +596,7 @@ def get_geoip():
     if "error" in data.keys():
         logging.warning(f"无法获取IP地址信息。\n{data}")
         if data["reason"] == "RateLimited":
-            return (
-                i18n("您的IP区域：未知。")
-            )
         else:
             return i18n("获取IP地理位置失败。原因：") + f"{data['reason']}" + i18n("。你仍然可以使用聊天功能。")
     else:
@@ -590,29 +660,36 @@ def update_chuanhu():
     if update_status == "success":
         logging.info("Successfully updated, restart needed")
         status = '<span id="update-status" class="hideK">success</span>'
-        return gr.Markdown.update(value=i18n("更新成功，请重启本程序")+status)
     else:
         status = '<span id="update-status" class="hideK">failure</span>'
-        return gr.Markdown.update(value=i18n("更新失败，请尝试[手动更新](https://github.com/GaiZhenbiao/ChuanhuChatGPT/wiki/使用教程#手动更新)")+status)
-def add_source_numbers(lst, source_name = "Source", use_source = True):
     if use_source:
-        return [f'[{idx+1}]\t "{item[0]}"\n{source_name}: {item[1]}' for idx, item in enumerate(lst)]
     else:
         return [f'[{idx+1}]\t "{item}"' for idx, item in enumerate(lst)]
 def add_details(lst):
     nodes = []
     for index, txt in enumerate(lst):
         brief = txt[:25].replace("\n", "")
-        nodes.append(
-            f"<details><summary>{brief}...</summary><p>{txt}</p></details>"
-        )
     return nodes
-def sheet_to_string(sheet, sheet_name = None):
     result = []
     for index, row in sheet.iterrows():
         row_string = ""
@@ -623,59 +700,70 @@ def sheet_to_string(sheet, sheet_name = None):
         result.append(row_string)
     return result
 def excel_to_string(file_path):
     # 读取Excel文件中的所有工作表
-    excel_file = pd.read_excel(file_path, engine='openpyxl', sheet_name=None)
     # 初始化结果字符串
     result = []
     # 遍历每一个工作表
     for sheet_name, sheet_data in excel_file.items():
         # 处理当前工作表并添加到结果字符串
         result += sheet_to_string(sheet_data, sheet_name=sheet_name)
     return result
 def get_last_day_of_month(any_day):
     # The day 28 exists in every month. 4 days later, it's always next month
     next_month = any_day.replace(day=28) + datetime.timedelta(days=4)
     # subtracting the number of the current day brings us back one month
     return next_month - datetime.timedelta(days=next_month.day)
 def get_model_source(model_name, alternative_source):
     if model_name == "gpt2-medium":
         return "https://huggingface.co/gpt2-medium"
 def refresh_ui_elements_on_load(current_model, selected_model_name, user_name):
     current_model.set_user_identifier(user_name)
     return toggle_like_btn_visibility(selected_model_name), *current_model.auto_load()
 def toggle_like_btn_visibility(selected_model_name):
     if selected_model_name == "xmchat":
         return gr.update(visible=True)
     else:
         return gr.update(visible=False)
 def get_corresponding_file_type_by_model_name(selected_model_name):
     if selected_model_name in ["xmchat", "GPT4 Vision"]:
         return ["image"]
     else:
         return [".pdf", ".docx", ".pptx", ".epub", ".xlsx", ".txt", "text"]
 # def toggle_file_type(selected_model_name):
 #     return gr.Files.update(file_types=get_corresponding_file_type_by_model_name(selected_model_name))
 def new_auto_history_filename(username):
     latest_file = get_first_history_name(username)
     if latest_file:
-        with open(os.path.join(HISTORY_DIR, username, latest_file + ".json"), 'r', encoding="utf-8") as f:
             if len(f.read()) == 0:
                 return latest_file
-    now = i18n("新对话 ") + datetime.datetime.now().strftime('%m-%d %H-%M')
-    return f'{now}.json'
 def get_history_filepath(username):
     dirname = os.path.join(HISTORY_DIR, username)
@@ -687,20 +775,28 @@ def get_history_filepath(username):
     latest_file = os.path.join(dirname, latest_file)
     return latest_file
 def beautify_err_msg(err_msg):
-    if "insufficient_quota" in  err_msg:
-        return i18n("剩余配额不足，[进一步了解](https://github.com/GaiZhenbiao/ChuanhuChatGPT/wiki/%E5%B8%B8%E8%A7%81%E9%97%AE%E9%A2%98#you-exceeded-your-current-quota-please-check-your-plan-and-billing-details)")
     if "The model `gpt-4` does not exist" in err_msg:
-        return i18n("你没有权限访问 GPT4，[进一步了解](https://github.com/GaiZhenbiao/ChuanhuChatGPT/issues/843)")
     if "Resource not found" in err_msg:
         return i18n("请查看 config_example.json，配置 Azure OpenAI")
     return err_msg
 def auth_from_conf(username, password):
     try:
         with open("config.json", encoding="utf-8") as f:
             conf = json.load(f)
-        usernames, passwords = [i[0] for i in conf["users"]], [i[1] for i in conf["users"]]
         if username in usernames:
             if passwords[usernames.index(username)] == password:
                 return True
@@ -708,6 +804,7 @@ def auth_from_conf(username, password):
     except:
         return False
 def get_file_hash(file_src=None, file_paths=None):
     if file_src:
         file_paths = [x.name for x in file_src]
@@ -721,12 +818,14 @@ def get_file_hash(file_src=None, file_paths=None):
     return md5_hash.hexdigest()
 def myprint(**args):
     print(args)
 def replace_special_symbols(string, replace_string=" "):
     # 定义正则表达式，匹配所有特殊符号
-    pattern = r'[!@#$%^&*()<>?/\|}{~:]'
     new_string = re.sub(pattern, replace_string, string)

         headers: List[str]
         data: List[List[str | int | bool]]
 def predict(current_model, *args):
     iter = current_model.predict(*args)
     for i in iter:
         yield i
 def billing_info(current_model):
     return current_model.billing_info()
 def set_key(current_model, *args):
     return current_model.set_key(*args)
 def load_chat_history(current_model, *args):
     return current_model.load_chat_history(*args)
 def delete_chat_history(current_model, *args):
     return current_model.delete_chat_history(*args)
 def interrupt(current_model, *args):
     return current_model.interrupt(*args)
 def reset(current_model, *args):
     return current_model.reset(*args)
 def retry(current_model, *args):
     iter = current_model.retry(*args)
     for i in iter:
         yield i
 def delete_first_conversation(current_model, *args):
     return current_model.delete_first_conversation(*args)
 def delete_last_conversation(current_model, *args):
     return current_model.delete_last_conversation(*args)
 def set_system_prompt(current_model, *args):
     return current_model.set_system_prompt(*args)
 def rename_chat_history(current_model, *args):
     return current_model.rename_chat_history(*args)
 def auto_name_chat_history(current_model, *args):
     return current_model.auto_name_chat_history(*args)
 def export_markdown(current_model, *args):
     return current_model.export_markdown(*args)
 def upload_chat_history(current_model, *args):
     return current_model.load_chat_history(*args)
 def set_token_upper_limit(current_model, *args):
     return current_model.set_token_upper_limit(*args)
 def set_temperature(current_model, *args):
     current_model.set_temperature(*args)
 def set_top_p(current_model, *args):
     current_model.set_top_p(*args)
 def set_n_choices(current_model, *args):
     current_model.set_n_choices(*args)
 def set_stop_sequence(current_model, *args):
     current_model.set_stop_sequence(*args)
 def set_max_tokens(current_model, *args):
     current_model.set_max_tokens(*args)
 def set_presence_penalty(current_model, *args):
     current_model.set_presence_penalty(*args)
 def set_frequency_penalty(current_model, *args):
     current_model.set_frequency_penalty(*args)
 def set_logit_bias(current_model, *args):
     current_model.set_logit_bias(*args)
 def set_user_identifier(current_model, *args):
     current_model.set_user_identifier(*args)
 def set_single_turn(current_model, *args):
     current_model.set_single_turn(*args)
 def handle_file_upload(current_model, *args):
     return current_model.handle_file_upload(*args)
 def handle_summarize_index(current_model, *args):
     return current_model.summarize_index(*args)
 def like(current_model, *args):
     return current_model.like(*args)
 def dislike(current_model, *args):
     return current_model.dislike(*args)
     return length
+def markdown_to_html_with_syntax_highlight(md_str):  # deprecated
     def replacer(match):
         lang = match.group(1) or "text"
         code = match.group(2)
     return html_str
+def normalize_markdown(md_text: str) -> str:  # deprecated
     lines = md_text.split("\n")
     normalized_lines = []
     inside_list = False
     return "\n".join(normalized_lines)
+def convert_mdtext(md_text):  # deprecated
     code_block_pattern = re.compile(r"```(.*?)(?:```|$)", re.DOTALL)
     inline_code_pattern = re.compile(r"`(.*?)`", re.DOTALL)
     code_blocks = code_block_pattern.findall(md_text)
     # first, clip hr line
     hr_pattern = r'\n\n<hr class="append-display no-in-raw" />(.*?)'
     hr_match = re.search(hr_pattern, chat_message, re.DOTALL)
+    message_clipped = chat_message[: hr_match.start()] if hr_match else chat_message
     # second, avoid agent-prefix being escaped
+    agent_prefix_pattern = (
+        r'(<!-- S O PREFIX --><p class="agent-prefix">.*?<\/p><!-- E O PREFIX -->)'
+    )
     # agent_matches = re.findall(agent_prefix_pattern, message_clipped)
     agent_parts = re.split(agent_prefix_pattern, message_clipped, flags=re.DOTALL)
     final_message = ""
     for i, part in enumerate(agent_parts):
         if i % 2 == 0:
             if part != "" and part != "\n":
+                final_message += (
+                    f'<pre class="fake-pre">{escape_markdown(part)}</pre>'
+                    if need_escape
+                    else f'<pre class="fake-pre">{part}</pre>'
+                )
         else:
             final_message += part
     return final_message
         md = f'<div class="md-message">\n\n{result}\n</div>'
         return raw + md
 def convert_user_before_marked(chat_message):
     if '<div class="user-message">' in chat_message:
         return chat_message
     else:
         return f'<div class="user-message">{escape_markdown(chat_message)}</div>'
 def escape_markdown(text):
     """
     Escape Markdown special characters to HTML-safe equivalents.
     """
     escape_chars = {
         # ' ': '&nbsp;',
+        "_": "&#95;",
+        "*": "&#42;",
+        "[": "&#91;",
+        "]": "&#93;",
+        "(": "&#40;",
+        ")": "&#41;",
+        "{": "&#123;",
+        "}": "&#125;",
+        "#": "&#35;",
+        "+": "&#43;",
+        "-": "&#45;",
+        ".": "&#46;",
+        "!": "&#33;",
+        "`": "&#96;",
+        ">": "&#62;",
+        "<": "&#60;",
+        "|": "&#124;",
+        "$": "&#36;",
+        ":": "&#58;",
+        "\n": "<br>",
     }
+    text = text.replace("    ", "&nbsp;&nbsp;&nbsp;&nbsp;")
+    return "".join(escape_chars.get(c, c) for c in text)
+def convert_asis(userinput):  # deprecated
     return (
         f'<p style="white-space:pre-wrap;">{html.escape(userinput)}</p>'
         + ALREADY_CONVERTED_MARK
     )
+def detect_converted_mark(userinput):  # deprecated
     try:
         if userinput.endswith(ALREADY_CONVERTED_MARK):
             return True
         return True
+def detect_language(code):  # deprecated
     if code.startswith("\n"):
         first_line = ""
     else:
     return construct_text("assistant", text)
+def save_file(filename, model, chatbot):
+    system = model.system_prompt
+    history = model.history
+    user_name = model.user_name
     os.makedirs(os.path.join(HISTORY_DIR, user_name), exist_ok=True)
     if filename is None:
         filename = new_auto_history_filename(user_name)
     if filename == ".json":
         raise Exception("文件名不能为空")
+    json_s = {
+        "system": system,
+        "history": history,
+        "chatbot": chatbot,
+        "single_turn": model.single_turn,
+        "temperature": model.temperature,
+        "top_p": model.top_p,
+        "n_choices": model.n_choices,
+        "stop_sequence": model.stop_sequence,
+        "max_generation_token": model.max_generation_token,
+        "presence_penalty": model.presence_penalty,
+        "frequency_penalty": model.frequency_penalty,
+        "logit_bias": model.logit_bias,
+        "user_identifier": model.user_identifier,
+        "metadata": model.metadata
+    }
     if not filename == os.path.basename(filename):
         history_file_path = filename
     else:
         history_file_path = os.path.join(HISTORY_DIR, user_name, filename)
+    with open(history_file_path, "w", encoding="utf-8") as f:
+        json.dump(json_s, f, ensure_ascii=False, indent=4)
     filename = os.path.basename(filename)
     filename_md = filename[:-5] + ".md"
     md_s = f"system: \n- {system} \n"
     for data in history:
         md_s += f"\n{data['role']}: \n- {data['content']} \n"
+    with open(
+        os.path.join(HISTORY_DIR, user_name, filename_md), "w", encoding="utf8"
+    ) as f:
         f.write(md_s)
     return os.path.join(HISTORY_DIR, user_name, filename)
 def sorted_by_pinyin(list):
     return sorted(list, key=lambda char: lazy_pinyin(char)[0][0])
 def sorted_by_last_modified_time(list, dir):
+    return sorted(
+        list, key=lambda char: os.path.getctime(os.path.join(dir, char)), reverse=True
+    )
 def get_file_names_by_type(dir, filetypes=[".json"]):
     logging.debug(f"获取文件名列表，目录为{dir}，文件类型为{filetypes}")
     logging.debug(f"files are:{files}")
     return files
 def get_file_names_by_pinyin(dir, filetypes=[".json"]):
     files = get_file_names_by_type(dir, filetypes)
     if files != [""]:
     logging.debug(f"files are:{files}")
     return files
 def get_file_names_dropdown_by_pinyin(dir, filetypes=[".json"]):
     files = get_file_names_by_pinyin(dir, filetypes)
     return gr.Dropdown.update(choices=files)
 def get_file_names_by_last_modified_time(dir, filetypes=[".json"]):
     files = get_file_names_by_type(dir, filetypes)
     if files != [""]:
     if user_name == "" and hide_history_when_not_logged_in:
         return []
     else:
+        history_files = get_file_names_by_last_modified_time(
+            os.path.join(HISTORY_DIR, user_name)
+        )
+        history_files = [f[: f.rfind(".")] for f in history_files]
         return history_files
 def get_first_history_name(user_name=""):
     history_names = get_history_names(user_name)
     return history_names[0] if history_names else None
 def get_history_list(user_name=""):
     history_names = get_history_names(user_name)
     return gr.Radio.update(choices=history_names)
 def init_history_list(user_name=""):
     history_names = get_history_names(user_name)
+    return gr.Radio.update(
+        choices=history_names, value=history_names[0] if history_names else ""
+    )
 def filter_history(user_name, keyword):
     history_names = get_history_names(user_name)
     except:
         return gr.update(choices=history_names)
 def load_template(filename, mode=0):
     logging.debug(f"加载模板文件{filename}，模式为{mode}（0为返回字典和下拉菜单，1为返回下拉菜单，2为返回字典）")
     lines = []
         return {row[0]: row[1] for row in lines}
     else:
         choices = sorted_by_pinyin([row[0] for row in lines])
+        return {row[0]: row[1] for row in lines}, gr.Dropdown.update(choices=choices)
 def get_template_names():
     logging.debug("获取模板文件名列表")
     return get_file_names_by_pinyin(TEMPLATES_DIR, filetypes=[".csv", "json"])
 def get_template_dropdown():
     logging.debug("获取模板下拉菜单")
     template_names = get_template_names()
     if "error" in data.keys():
         logging.warning(f"无法获取IP地址信息。\n{data}")
         if data["reason"] == "RateLimited":
+            return i18n("您的IP区域：未知。")
         else:
             return i18n("获取IP地理位置失败。原因：") + f"{data['reason']}" + i18n("。你仍然可以使用聊天功能。")
     else:
     if update_status == "success":
         logging.info("Successfully updated, restart needed")
         status = '<span id="update-status" class="hideK">success</span>'
+        return gr.Markdown.update(value=i18n("更新成功，请重启本程序") + status)
     else:
         status = '<span id="update-status" class="hideK">failure</span>'
+        return gr.Markdown.update(
+            value=i18n(
+                "更新失败，请尝试[手动更新](https://github.com/GaiZhenbiao/ChuanhuChatGPT/wiki/使用教程#手动更新)"
+            )
+            + status
+        )
+def add_source_numbers(lst, source_name="Source", use_source=True):
     if use_source:
+        return [
+            f'[{idx+1}]\t "{item[0]}"\n{source_name}: {item[1]}'
+            for idx, item in enumerate(lst)
+        ]
     else:
         return [f'[{idx+1}]\t "{item}"' for idx, item in enumerate(lst)]
 def add_details(lst):
     nodes = []
     for index, txt in enumerate(lst):
         brief = txt[:25].replace("\n", "")
+        nodes.append(f"<details><summary>{brief}...</summary><p>{txt}</p></details>")
     return nodes
+def sheet_to_string(sheet, sheet_name=None):
     result = []
     for index, row in sheet.iterrows():
         row_string = ""
         result.append(row_string)
     return result
 def excel_to_string(file_path):
     # 读取Excel文件中的所有工作表
+    excel_file = pd.read_excel(file_path, engine="openpyxl", sheet_name=None)
     # 初始化结果字符串
     result = []
     # 遍历每一个工作表
     for sheet_name, sheet_data in excel_file.items():
         # 处理当前工作表并添加到结果字符串
         result += sheet_to_string(sheet_data, sheet_name=sheet_name)
     return result
 def get_last_day_of_month(any_day):
     # The day 28 exists in every month. 4 days later, it's always next month
     next_month = any_day.replace(day=28) + datetime.timedelta(days=4)
     # subtracting the number of the current day brings us back one month
     return next_month - datetime.timedelta(days=next_month.day)
 def get_model_source(model_name, alternative_source):
     if model_name == "gpt2-medium":
         return "https://huggingface.co/gpt2-medium"
 def refresh_ui_elements_on_load(current_model, selected_model_name, user_name):
     current_model.set_user_identifier(user_name)
     return toggle_like_btn_visibility(selected_model_name), *current_model.auto_load()
 def toggle_like_btn_visibility(selected_model_name):
     if selected_model_name == "xmchat":
         return gr.update(visible=True)
     else:
         return gr.update(visible=False)
 def get_corresponding_file_type_by_model_name(selected_model_name):
     if selected_model_name in ["xmchat", "GPT4 Vision"]:
         return ["image"]
     else:
         return [".pdf", ".docx", ".pptx", ".epub", ".xlsx", ".txt", "text"]
 # def toggle_file_type(selected_model_name):
 #     return gr.Files.update(file_types=get_corresponding_file_type_by_model_name(selected_model_name))
 def new_auto_history_filename(username):
     latest_file = get_first_history_name(username)
     if latest_file:
+        with open(
+            os.path.join(HISTORY_DIR, username, latest_file + ".json"),
+            "r",
+            encoding="utf-8",
+        ) as f:
             if len(f.read()) == 0:
                 return latest_file
+    now = i18n("新对话 ") + datetime.datetime.now().strftime("%m-%d %H-%M")
+    return f"{now}.json"
 def get_history_filepath(username):
     dirname = os.path.join(HISTORY_DIR, username)
     latest_file = os.path.join(dirname, latest_file)
     return latest_file
 def beautify_err_msg(err_msg):
+    if "insufficient_quota" in err_msg:
+        return i18n(
+            "剩余配额不足，[进一步了解](https://github.com/GaiZhenbiao/ChuanhuChatGPT/wiki/%E5%B8%B8%E8%A7%81%E9%97%AE%E9%A2%98#you-exceeded-your-current-quota-please-check-your-plan-and-billing-details)"
+        )
     if "The model `gpt-4` does not exist" in err_msg:
+        return i18n(
+            "你没有权限访问 GPT4，[进一步了解](https://github.com/GaiZhenbiao/ChuanhuChatGPT/issues/843)"
+        )
     if "Resource not found" in err_msg:
         return i18n("请查看 config_example.json，配置 Azure OpenAI")
     return err_msg
 def auth_from_conf(username, password):
     try:
         with open("config.json", encoding="utf-8") as f:
             conf = json.load(f)
+        usernames, passwords = [i[0] for i in conf["users"]], [
+            i[1] for i in conf["users"]
+        ]
         if username in usernames:
             if passwords[usernames.index(username)] == password:
                 return True
     except:
         return False
 def get_file_hash(file_src=None, file_paths=None):
     if file_src:
         file_paths = [x.name for x in file_src]
     return md5_hash.hexdigest()
 def myprint(**args):
     print(args)
 def replace_special_symbols(string, replace_string=" "):
     # 定义正则表达式，匹配所有特殊符号
+    pattern = r"[!@#$%^&*()<>?/\|}{~:]"
     new_string = re.sub(pattern, replace_string, string)