vislupus commited on
Commit
b3d7c0e
·
verified ·
1 Parent(s): ed099ae

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -6
app.py CHANGED
@@ -29,11 +29,13 @@ print("Loading the model...")
29
  llm = Llama(model_path=model_path)
30
  print("Model loaded successfully!")
31
 
 
32
  def generate_response(messages, temperature=0.7, top_p=1.0, max_tokens=256):
33
  """
34
  Generate a response from the model.
35
  Args:
36
- messages (list): List of conversation history in a tuple format (user, assistant).
 
37
  temperature (float): Sampling temperature.
38
  top_p (float): Top-p sampling parameter.
39
  max_tokens (int): Maximum number of tokens to generate.
@@ -41,11 +43,16 @@ def generate_response(messages, temperature=0.7, top_p=1.0, max_tokens=256):
41
  str: The model's response.
42
  """
43
  prompt = ""
44
- for user_message, assistant_message in messages:
45
- prompt += f"<start_of_turn>user\n{user_message}\n<end_of_turn>"
46
- prompt += f"<start_of_turn>model\n{assistant_message}\n<end_of_turn>"
47
-
48
- prompt += "<start_of_turn>user\n" + messages[-1][0] + "\n<end_of_turn>"
 
 
 
 
 
49
 
50
  try:
51
  response = llm(prompt, max_tokens=max_tokens, temperature=temperature, top_p=top_p)
 
29
  llm = Llama(model_path=model_path)
30
  print("Model loaded successfully!")
31
 
32
+ # Define the function to generate responses
33
  def generate_response(messages, temperature=0.7, top_p=1.0, max_tokens=256):
34
  """
35
  Generate a response from the model.
36
  Args:
37
+ messages (list): List of conversation history in the format:
38
+ [{'role': 'user', 'content': '...'}, {'role': 'assistant', 'content': '...'}]
39
  temperature (float): Sampling temperature.
40
  top_p (float): Top-p sampling parameter.
41
  max_tokens (int): Maximum number of tokens to generate.
 
43
  str: The model's response.
44
  """
45
  prompt = ""
46
+ for message in messages:
47
+ role = message['role']
48
+ content = message['content']
49
+
50
+ if role == 'user':
51
+ prompt += f"<start_of_turn>user\n{content}\n<end_of_turn>"
52
+ elif role == 'assistant':
53
+ prompt += f"<start_of_turn>model\n{content}\n<end_of_turn>"
54
+
55
+ prompt += "<start_of_turn>user\n" + messages[-1]['content'] + "\n<end_of_turn>"
56
 
57
  try:
58
  response = llm(prompt, max_tokens=max_tokens, temperature=temperature, top_p=top_p)