Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
from transformers import pipeline
|
2 |
import gradio as gr
|
3 |
import json
|
|
|
4 |
|
5 |
# Initialize the pipeline with the new model
|
6 |
pipe = pipeline("text-generation", model="Blexus/Quble_test_model_v1_INSTRUCT_v1")
|
@@ -42,15 +43,18 @@ def generate(prompt, system, history, temperature=0.9, max_new_tokens=4096, top_
|
|
42 |
if formatted_prompt in database:
|
43 |
response_text = database[formatted_prompt]
|
44 |
else:
|
45 |
-
#
|
46 |
try:
|
47 |
-
|
48 |
-
|
49 |
-
assistant_response = response["generated_text"].split("ASSISTANT:")[-1].strip()
|
50 |
-
yield assistant_response
|
51 |
|
52 |
-
#
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
54 |
save_database(database)
|
55 |
except Exception as e:
|
56 |
print(f"Error generating response: {e}")
|
|
|
1 |
from transformers import pipeline
|
2 |
import gradio as gr
|
3 |
import json
|
4 |
+
import time
|
5 |
|
6 |
# Initialize the pipeline with the new model
|
7 |
pipe = pipeline("text-generation", model="Blexus/Quble_test_model_v1_INSTRUCT_v1")
|
|
|
43 |
if formatted_prompt in database:
|
44 |
response_text = database[formatted_prompt]
|
45 |
else:
|
46 |
+
# Generate the response without streaming
|
47 |
try:
|
48 |
+
response = pipe(formatted_prompt, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p, repetition_penalty=repetition_penalty)[0]["generated_text"]
|
49 |
+
response_text = response.split("ASSISTANT:")[-1].strip()
|
|
|
|
|
50 |
|
51 |
+
# Simulate streaming by yielding parts of the response
|
52 |
+
for char in response_text:
|
53 |
+
yield char
|
54 |
+
time.sleep(0.05) # Add a slight delay to simulate typing
|
55 |
+
|
56 |
+
# Save the generated response to the database after the response is generated
|
57 |
+
database[formatted_prompt] = response_text
|
58 |
save_database(database)
|
59 |
except Exception as e:
|
60 |
print(f"Error generating response: {e}")
|