| | import langchain_community,langchain |
| | from langchain_community.llms import LlamaCpp |
| | from llama_cpp import Llama |
| | from langchain.schema import AIMessage, HumanMessage, SystemMessage |
| | import gradio as gr |
| |
|
| | |
| | space_model_path = "./model/llama-3.2-1b-instruct-q8_0.gguf" |
| | model_path = "hugging-quants/Llama-3.2-1B-Instruct-Q8_0-GGUF" |
| | file_name = "llama-3.2-1b-instruct-q8_0.gguf" |
| | Llama.from_pretrained(repo_id = model_path, filename=file_name, local_dir="./model") |
| | system_message = "You are a helpful assistant who acts like a pirate." |
| | llm = LlamaCpp( |
| | model_path=space_model_path, |
| | temperature=0.8, |
| | max_tokens=250, |
| | top_p=0.6, |
| | verbose=True |
| | ) |
| |
|
| |
|
| | def stream_response(message, history): |
| | print(f"Input: {message}. History: {history}\n") |
| |
|
| | history_langchain_format = [] |
| | history_langchain_format.append(SystemMessage(content=system_message)) |
| |
|
| | for human, ai in history: |
| | history_langchain_format.append(HumanMessage(content=human)) |
| | history_langchain_format.append(AIMessage(content=ai)) |
| |
|
| | if message is not None: |
| | history_langchain_format.append(HumanMessage(content=message)) |
| | partial_message = "" |
| | for response in llm.stream(history_langchain_format): |
| | partial_message += response |
| | yield partial_message |
| |
|
| |
|
| | demo_interface = gr.ChatInterface( |
| |
|
| | stream_response, |
| | textbox=gr.Textbox(placeholder="Send to the LLM...", |
| | container=False, |
| | autoscroll=True, |
| | scale=7), |
| | ) |
| |
|
| | demo_interface.launch(share=False, debug=True) |