import gradio as gr from llama_cpp import Llama import json import time llm = Llama(model_path="./neuralhermes-2.5-mistral-7b.Q5_K_M.gguf", n_ctx=32768, n_threads=2, chat_format="chatml") llm_for_translate = Llama(model_path="./qwen-1.8b-q5_k_m.gguf", n_ctx=1024, n_threads=2, chat_format="chatml") chi_eng_dict = [] def get_dict_result(original_text): for d in chi_eng_dict: if d[0] == original_text: return d[1] elif d[1] == original_text: return d[0] return None def stream_translate_into(message, language='English'): return llm_for_translate.create_chat_completion( messages=[{"role": "system", "content": f"Translate words into {language}. Regardless the meanning!"}, {"role": "user", "content": f"'{message}'"}], stream=True, stop=['\n\n'] ) def chat_completion(message, history, system_prompt): messages_prompts = [{"role": "system", "content": system_prompt}] for human, assistant in history: messages_prompts.append({"role": "user", "content": human}) messages_prompts.append({"role": "assistant", "content": assistant}) messages_prompts.append({"role": "user", "content": message}) response = llm.create_chat_completion( messages=messages_prompts, stream=False ) print(json.dumps(response, ensure_ascii=False, indent=2)) return response['choices'][0]['message']['content'] def chat_stream_completion(message, history, system_prompt, translate_check): messages_prompts = [{"role": "system", "content": system_prompt}] if translate_check: if len(history) > 0: for human, assistant in history: human_repl = get_dict_result(human) assistant_repl = get_dict_result(assistant) if human_repl is None or assistant_repl is None: print(chi_eng_dict) raise gr.Error("历史信息缺少翻译字典,请勿中途修改翻译功能!") messages_prompts.append({"role": "user", "content": human_repl}) messages_prompts.append({"role": "assistant", "content": assistant_repl}) message_repl = "" for chunk in stream_translate_into(message, language='English'): if len(chunk['choices'][0]["delta"]) != 0 and "content" in chunk['choices'][0]["delta"]: message_repl = message_repl + \ chunk['choices'][0]["delta"]["content"] chi_eng_dict.append((message, message_repl)) messages_prompts.append({"role": "user", "content": message_repl}) print(messages_prompts) response = llm.create_chat_completion( messages=messages_prompts, stream=False, stop=['\n\n'] ) print(json.dumps(response, ensure_ascii=False, indent=2)) result = response['choices'][0]['message']['content'] result_repl = "" for chunk in stream_translate_into(result, language='Chinese'): if len(chunk['choices'][0]["delta"]) != 0 and "content" in chunk['choices'][0]["delta"]: result_repl = result_repl + \ chunk['choices'][0]["delta"]["content"] yield result_repl chi_eng_dict.append((result, result_repl)) else: for human, assistant in history: messages_prompts.append({"role": "user", "content": human}) messages_prompts.append({"role": "assistant", "content": assistant}) messages_prompts.append({"role": "user", "content": message}) response = llm.create_chat_completion( messages=messages_prompts, stream=True ) message_repl = "" for chunk in response: if len(chunk['choices'][0]["delta"]) != 0 and "content" in chunk['choices'][0]["delta"]: message_repl = message_repl + \ chunk['choices'][0]["delta"]["content"] yield message_repl gr.ChatInterface( chat_stream_completion, additional_inputs=[gr.Textbox( "You are helpful AI.", label="System Prompt"), gr.Checkbox(label="Translate?")] ).queue().launch(server_name="0.0.0.0")