Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -29,11 +29,13 @@ print("Loading the model...")
|
|
29 |
llm = Llama(model_path=model_path)
|
30 |
print("Model loaded successfully!")
|
31 |
|
|
|
32 |
def generate_response(messages, temperature=0.7, top_p=1.0, max_tokens=256):
|
33 |
"""
|
34 |
Generate a response from the model.
|
35 |
Args:
|
36 |
-
messages (list): List of conversation history in
|
|
|
37 |
temperature (float): Sampling temperature.
|
38 |
top_p (float): Top-p sampling parameter.
|
39 |
max_tokens (int): Maximum number of tokens to generate.
|
@@ -41,11 +43,16 @@ def generate_response(messages, temperature=0.7, top_p=1.0, max_tokens=256):
|
|
41 |
str: The model's response.
|
42 |
"""
|
43 |
prompt = ""
|
44 |
-
for
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
try:
|
51 |
response = llm(prompt, max_tokens=max_tokens, temperature=temperature, top_p=top_p)
|
|
|
29 |
llm = Llama(model_path=model_path)
|
30 |
print("Model loaded successfully!")
|
31 |
|
32 |
+
# Define the function to generate responses
|
33 |
def generate_response(messages, temperature=0.7, top_p=1.0, max_tokens=256):
|
34 |
"""
|
35 |
Generate a response from the model.
|
36 |
Args:
|
37 |
+
messages (list): List of conversation history in the format:
|
38 |
+
[{'role': 'user', 'content': '...'}, {'role': 'assistant', 'content': '...'}]
|
39 |
temperature (float): Sampling temperature.
|
40 |
top_p (float): Top-p sampling parameter.
|
41 |
max_tokens (int): Maximum number of tokens to generate.
|
|
|
43 |
str: The model's response.
|
44 |
"""
|
45 |
prompt = ""
|
46 |
+
for message in messages:
|
47 |
+
role = message['role']
|
48 |
+
content = message['content']
|
49 |
+
|
50 |
+
if role == 'user':
|
51 |
+
prompt += f"<start_of_turn>user\n{content}\n<end_of_turn>"
|
52 |
+
elif role == 'assistant':
|
53 |
+
prompt += f"<start_of_turn>model\n{content}\n<end_of_turn>"
|
54 |
+
|
55 |
+
prompt += "<start_of_turn>user\n" + messages[-1]['content'] + "\n<end_of_turn>"
|
56 |
|
57 |
try:
|
58 |
response = llm(prompt, max_tokens=max_tokens, temperature=temperature, top_p=top_p)
|