Spaces:
Running
Running
dingusagar
commited on
async support
Browse files
app.py
CHANGED
@@ -5,9 +5,8 @@ from ollama import chat
|
|
5 |
from ollama import ChatResponse
|
6 |
|
7 |
# Default model
|
8 |
-
# OLLAMA_MODEL = "llama3.2:3b-instruct-q3_K_M"
|
9 |
-
# OLLAMA_MODEL = "llama3.2:1b"
|
10 |
OLLAMA_MODEL = "llama3.2:3b"
|
|
|
11 |
|
12 |
# Load BERT MODEL
|
13 |
from transformers import pipeline, DistilBertTokenizerFast
|
@@ -88,28 +87,40 @@ Use second person terms like you in the explanation.
|
|
88 |
prompt = explain_only_prompt
|
89 |
|
90 |
print(f"Prompt to llama : {prompt}")
|
91 |
-
|
92 |
{
|
93 |
'role': 'user',
|
94 |
'content': prompt,
|
95 |
},
|
96 |
-
])
|
97 |
-
|
98 |
-
|
99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
def gradio_interface(prompt, selected_model):
|
101 |
if selected_model == MODEL_CHOICE_LLAMA:
|
102 |
-
|
|
|
103 |
elif selected_model == MODEL_CHOICE_BERT:
|
104 |
response, confidence = ask_bert(prompt)
|
105 |
response = f"{response} with confidence {confidence}"
|
|
|
106 |
elif selected_model == MODEL_CHOICE_BERT_LLAMA:
|
107 |
-
|
108 |
-
|
109 |
-
|
|
|
|
|
110 |
else:
|
111 |
-
|
112 |
-
return response
|
113 |
|
114 |
MODEL_CHOICE_BERT_LLAMA = "Fine-tuned BERT (classification) + Llama 3.2 3B (explanation)"
|
115 |
MODEL_CHOICE_BERT = "Fine-tuned BERT (classification only)"
|
|
|
5 |
from ollama import ChatResponse
|
6 |
|
7 |
# Default model
|
|
|
|
|
8 |
OLLAMA_MODEL = "llama3.2:3b"
|
9 |
+
# OLLAMA_MODEL = "llama3.2:1b"
|
10 |
|
11 |
# Load BERT MODEL
|
12 |
from transformers import pipeline, DistilBertTokenizerFast
|
|
|
87 |
prompt = explain_only_prompt
|
88 |
|
89 |
print(f"Prompt to llama : {prompt}")
|
90 |
+
stream = chat(model=OLLAMA_MODEL, messages=[
|
91 |
{
|
92 |
'role': 'user',
|
93 |
'content': prompt,
|
94 |
},
|
95 |
+
], stream=True)
|
96 |
+
response = ""
|
97 |
+
for chunk in stream:
|
98 |
+
response += chunk['message']['content']
|
99 |
+
yield response
|
100 |
+
|
101 |
+
def gradio_bert_interface(prompt):
|
102 |
+
response, confidence = ask_bert(prompt)
|
103 |
+
return f"{response} with confidence {confidence}%"
|
104 |
+
|
105 |
+
# Separate function for Ollama response
|
106 |
+
def gradio_ollama_interface(prompt, bert_class=""):
|
107 |
+
return ask_ollama(prompt, expected_class=bert_class)
|
108 |
def gradio_interface(prompt, selected_model):
|
109 |
if selected_model == MODEL_CHOICE_LLAMA:
|
110 |
+
for chunk in ask_ollama(prompt):
|
111 |
+
yield chunk
|
112 |
elif selected_model == MODEL_CHOICE_BERT:
|
113 |
response, confidence = ask_bert(prompt)
|
114 |
response = f"{response} with confidence {confidence}"
|
115 |
+
return response
|
116 |
elif selected_model == MODEL_CHOICE_BERT_LLAMA:
|
117 |
+
label, confidence = ask_bert(prompt)
|
118 |
+
initial_response = f"BERT model says {label} with confidence {confidence}%\n\nGenerating explanation using Llama model...\n"
|
119 |
+
yield initial_response
|
120 |
+
for chunk in ask_ollama(prompt, expected_class=label):
|
121 |
+
yield initial_response + "\n" + chunk
|
122 |
else:
|
123 |
+
return "Something went wrong. Select the correct model configuration from settings. "
|
|
|
124 |
|
125 |
MODEL_CHOICE_BERT_LLAMA = "Fine-tuned BERT (classification) + Llama 3.2 3B (explanation)"
|
126 |
MODEL_CHOICE_BERT = "Fine-tuned BERT (classification only)"
|