Spaces:

daniloedu
/

chat_llm_v2

Sleeping

daniloedu commited on Aug 4, 2023

Commit

38124bc

1 Parent(s): abaf1a6

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -17,7 +17,14 @@ def query(payload):
     return response.json()
 def respond(message, instruction="A conversation between a user and an AI assistant. The assistant gives helpful and honest answers."):
     prompt = format_chat_prompt(message, instruction)
     response = query({"inputs": prompt})
     generated_text = response[0]['generated_text']
     assistant_message = generated_text.split("Assistant:")[-1]

     return response.json()
 def respond(message, instruction="A conversation between a user and an AI assistant. The assistant gives helpful and honest answers."):
+    MAX_TOKENS = 1024  # limit for the model
     prompt = format_chat_prompt(message, instruction)
+    # Check if the prompt is too long and, if so, truncate it
+    num_tokens = len(tokenizer.encode(prompt))
+    if num_tokens > MAX_TOKENS:
+        # Truncate the prompt to fit within the token limit
+        prompt = tokenizer.decode(tokenizer.encode(prompt)[-MAX_TOKENS:])
     response = query({"inputs": prompt})
     generated_text = response[0]['generated_text']
     assistant_message = generated_text.split("Assistant:")[-1]