Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -17,7 +17,14 @@ def query(payload):
|
|
17 |
return response.json()
|
18 |
|
19 |
def respond(message, instruction="A conversation between a user and an AI assistant. The assistant gives helpful and honest answers."):
|
|
|
20 |
prompt = format_chat_prompt(message, instruction)
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
response = query({"inputs": prompt})
|
22 |
generated_text = response[0]['generated_text']
|
23 |
assistant_message = generated_text.split("Assistant:")[-1]
|
|
|
17 |
return response.json()
|
18 |
|
19 |
def respond(message, instruction="A conversation between a user and an AI assistant. The assistant gives helpful and honest answers."):
|
20 |
+
MAX_TOKENS = 1024 # limit for the model
|
21 |
prompt = format_chat_prompt(message, instruction)
|
22 |
+
# Check if the prompt is too long and, if so, truncate it
|
23 |
+
num_tokens = len(tokenizer.encode(prompt))
|
24 |
+
if num_tokens > MAX_TOKENS:
|
25 |
+
# Truncate the prompt to fit within the token limit
|
26 |
+
prompt = tokenizer.decode(tokenizer.encode(prompt)[-MAX_TOKENS:])
|
27 |
+
|
28 |
response = query({"inputs": prompt})
|
29 |
generated_text = response[0]['generated_text']
|
30 |
assistant_message = generated_text.split("Assistant:")[-1]
|