import hydra from hydra.core.global_hydra import GlobalHydra from omegaconf import DictConfig, OmegaConf import streamlit as st from PIL import Image import os import sys sys.path.append(os.path.dirname(__file__)) from download_models import download_model @st.cache_resource def load_simple_rag(config, used_lmdeploy=False): ## load config data_source_dir = config["data_source_dir"] db_persist_directory = config["db_persist_directory"] llm_model = config["llm_model"] embeddings_model = config["embeddings_model"] reranker_model = config["reranker_model"] llm_system_prompt = config["llm_system_prompt"] rag_prompt_template = config["rag_prompt_template"] from rag.simple_rag import WuleRAG if not used_lmdeploy: from rag.simple_rag import InternLM, WuleRAG base_mode = InternLM(model_path=llm_model, llm_system_prompt=llm_system_prompt) else: from deploy.lmdeploy_model import LmdeployLM, GenerationConfig cache_max_entry_count = config.get("cache_max_entry_count", 0.2) base_mode = LmdeployLM(model_path=llm_model, llm_system_prompt=llm_system_prompt, cache_max_entry_count=cache_max_entry_count) ## loda final rag model wulewule_rag = WuleRAG(data_source_dir, db_persist_directory, base_mode, embeddings_model, reranker_model, rag_prompt_template) return wulewule_rag @st.cache_resource def load_wulewule_agent(config): from agent.wulewule_agent import MultiModalAssistant, Settings use_remote = config["use_remote"] SiliconFlow_api = config["SiliconFlow_api"] data_source_dir = config["data_source_dir"] if len(SiliconFlow_api)<51 and os.environ.get('SiliconFlow_api', ""): SiliconFlow_api = os.environ.get('SiliconFlow_api') print(f"======= loading llm =======") if use_remote: from llama_index.llms.siliconflow import SiliconFlow from llama_index.embeddings.siliconflow import SiliconFlowEmbedding api_base_url = "https://api.siliconflow.cn/v1/chat/completions" # model = "Qwen/Qwen2.5-72B-Instruct" # model = "deepseek-ai/DeepSeek-V2.5" remote_llm = config["remote_llm"] remote_embeddings_model = config["remote_embeddings_model"] llm = SiliconFlow( model=remote_llm, base_url=api_base_url, api_key=SiliconFlow_api, max_tokens=4096) embed_model = SiliconFlowEmbedding( model=remote_embeddings_model, api_key=SiliconFlow_api) else: from llama_index.embeddings.huggingface import HuggingFaceEmbedding from llama_index.llms.huggingface import HuggingFaceLLM local_llm = config["llm_model"] local_embeddings_model = config["agent_embeddings_model"] llm = HuggingFaceLLM( model_name=local_llm, tokenizer_name=local_llm, model_kwargs={"trust_remote_code":True}, tokenizer_kwargs={"trust_remote_code":True}, # context_window=4096, # max_new_tokens=4096, ) embed_model = HuggingFaceEmbedding( model_name=local_embeddings_model ) # settings Settings.llm = llm Settings.embed_model = embed_model wulewule_assistant = MultiModalAssistant(data_source_dir, llm, SiliconFlow_api) print(f"======= finished loading ! =======") return wulewule_assistant GlobalHydra.instance().clear() @hydra.main(version_base=None, config_path="./configs", config_name="model_cfg") def main(cfg): # omegaconf.dictcfg.DictConfig 转换为普通字典 config_dict = OmegaConf.to_container(cfg, resolve=True) ## download model from modelscope if not config_dict["use_remote"] and not os.path.exists(config_dict["llm_model"]): download_model(llm_model_path =config_dict["llm_model"]) ## agent mode, used llama-index, rturn off lmdeloy and chroma rag if cfg.agent_mode: ## load wulewule agent wulewule_assistant = load_wulewule_agent(config_dict) cfg.use_rag = False cfg.use_lmdepoly = False if cfg.use_rag: ## load rag model wulewule_model = load_simple_rag(config_dict, used_lmdeploy=cfg.use_lmdepoly) elif ( cfg.use_lmdepoly): ## load lmdeploy model from deploy.lmdeploy_model import load_turbomind_model, GenerationConfig wulewule_model = load_turbomind_model(config_dict["llm_model"], config_dict["llm_system_prompt"], config_dict["cache_max_entry_count"]) ## streamlit setting if "messages" not in st.session_state: st.session_state["messages"] = [] # 在侧边栏中创建一个标题和一个链接 with st.sidebar: st.markdown("## 悟了悟了💡") logo_path = "assets/sd_wulewule.webp" if os.path.exists(logo_path): image = Image.open(logo_path) st.image(image, caption='wulewule') "[InternLM](https://github.com/InternLM)" "[悟了悟了](https://github.com/xzyun2011/wulewule.git)" # 创建一个标题 st.title("悟了悟了:黑神话悟空AI助手🐒") # 遍历session_state中的所有消息,并显示在聊天界面上 for msg in st.session_state.messages: st.chat_message("user").write(msg["user"]) assistant_res = msg["assistant"] if isinstance(assistant_res, str): st.chat_message("assistant").write(assistant_res) elif cfg.agent_mode and isinstance(assistant_res, dict): image_url = assistant_res["image_url"] audio_text = assistant_res["audio_text"] st.chat_message("assistant").write(assistant_res["response"]) if image_url: # 使用st.image展示URL图像,并设置使用列宽 st.image( image_url, width=256 ) if audio_text: # 使用st.audio函数播放音频 st.audio("audio.mp3") st.write(f"语音内容为: \n\n{audio_text}") # Get user input if prompt := st.chat_input("请输入你的问题,换行使用Shfit+Enter。"): # Display user input st.chat_message("user").write(prompt) ## 初始化完整的回答字符串 full_answer = "" if cfg.agent_mode: with st.chat_message('robot'): message_placeholder = st.empty() response_dict = wulewule_assistant.chat(prompt) image_url = response_dict["image_url"] audio_text = response_dict["audio_text"] for cur_response in response_dict["response"]: full_answer += cur_response # Display robot response in chat message container message_placeholder.markdown(full_answer + '▌') message_placeholder.markdown(full_answer) # 将问答结果添加到 session_state 的消息历史中 st.session_state.messages.append({"user": prompt, "assistant": response_dict}) if image_url: # 使用st.image展示URL图像,并设置使用列宽 st.image( image_url, width=256 ) if audio_text: # 使用st.audio函数播放音频 st.audio("audio.mp3") st.write(f"语音内容为: \n\n{audio_text}") # 流式显示, used streaming result else: if cfg.stream_response: # rag with st.chat_message('robot'): message_placeholder = st.empty() if cfg.use_rag: for cur_response in wulewule_model.query_stream(prompt): full_answer += cur_response # Display robot response in chat message container message_placeholder.markdown(full_answer + '▌') elif cfg.use_lmdepoly: # gen_config = GenerationConfig(top_p=0.8, # top_k=40, # temperature=0.8, # max_new_tokens=2048, # repetition_penalty=1.05) messages = [{'role': 'user', 'content': f'{prompt}'}] for response in wulewule_model.stream_infer(messages): full_answer += response.text # Display robot response in chat message container message_placeholder.markdown(full_answer + '▌') message_placeholder.markdown(full_answer) # 一次性显示结果 else: if cfg.use_lmdepoly: messages = [{'role': 'user', 'content': f'{prompt}'}] full_answer = wulewule_model(messages).text elif cfg.use_rag: full_answer = wulewule_model.query(prompt) # 显示回答 st.chat_message("assistant").write(full_answer) # 将问答结果添加到 session_state 的消息历史中 st.session_state.messages.append({"user": prompt, "assistant": full_answer}) if __name__ == "__main__": main()