Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -29,25 +29,35 @@ print("Loading the model...")
|
|
29 |
llm = Llama(model_path=model_path)
|
30 |
print("Model loaded successfully!")
|
31 |
|
32 |
-
def generate_response(message, history, temperature=0.7, top_p=1.0, max_tokens=256):
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
# history_langchain_format.append(AIMessage(content=ai))
|
37 |
-
|
38 |
-
# history_langchain_format.append(HumanMessage(content=message))
|
39 |
-
|
40 |
try:
|
41 |
-
|
42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
except Exception as e:
|
44 |
-
return f"Error generating response: {e}"
|
45 |
|
46 |
if __name__ == "__main__":
|
47 |
-
|
48 |
generate_response,
|
49 |
title=title,
|
50 |
description=description,
|
51 |
)
|
52 |
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
llm = Llama(model_path=model_path)
|
30 |
print("Model loaded successfully!")
|
31 |
|
32 |
+
def generate_response(message, history, temperature=0.7, top_p=1.0, max_tokens=256):
|
33 |
+
"""
|
34 |
+
Generate a response based on the user's message and the conversation history.
|
35 |
+
"""
|
|
|
|
|
|
|
|
|
36 |
try:
|
37 |
+
conversation = ""
|
38 |
+
for msg in history:
|
39 |
+
role, content = msg
|
40 |
+
conversation += f"<start_of_turn>{role}\n{content}\n<end_of_turn>"
|
41 |
+
|
42 |
+
conversation += f"<start_of_turn>user\n{message}\n<end_of_turn>"
|
43 |
+
|
44 |
+
response = llm(conversation, max_tokens=max_tokens, temperature=temperature, top_p=top_p)
|
45 |
+
return response["choices"][0]["text"].strip()
|
46 |
+
|
47 |
except Exception as e:
|
48 |
+
return f"Error generating response: {str(e)}"
|
49 |
|
50 |
if __name__ == "__main__":
|
51 |
+
gguf_demo = gr.ChatInterface(
|
52 |
generate_response,
|
53 |
title=title,
|
54 |
description=description,
|
55 |
)
|
56 |
|
57 |
+
gguf_demo.launch(share=True)
|
58 |
+
|
59 |
+
try:
|
60 |
+
if llm:
|
61 |
+
llm.close()
|
62 |
+
except Exception as e:
|
63 |
+
print(f"Error closing model: {e}")
|