q8 model
Browse files
app.py
CHANGED
|
@@ -2,9 +2,14 @@ import gradio as gr
|
|
| 2 |
|
| 3 |
from llama_cpp import Llama, LlamaTokenizer
|
| 4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
llm = Llama.from_pretrained(
|
| 6 |
-
repo_id="ID2223JR/
|
| 7 |
-
filename="unsloth.
|
| 8 |
)
|
| 9 |
|
| 10 |
|
|
@@ -56,21 +61,11 @@ def submit_to_model():
|
|
| 56 |
content = ""
|
| 57 |
|
| 58 |
for partial_response in response:
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
elif "message" in partial_response["choices"][0]:
|
| 65 |
-
content += partial_response["choices"][0]["message"].get(
|
| 66 |
-
"content", ""
|
| 67 |
-
)
|
| 68 |
-
else:
|
| 69 |
-
content += partial_response["choices"][0].get("text", "")
|
| 70 |
-
if content:
|
| 71 |
-
yield content
|
| 72 |
-
else:
|
| 73 |
-
yield "Unexpected response structure."
|
| 74 |
|
| 75 |
ingredients_list.clear() # Reset list after generation
|
| 76 |
|
|
|
|
| 2 |
|
| 3 |
from llama_cpp import Llama, LlamaTokenizer
|
| 4 |
|
| 5 |
+
# llm = Llama.from_pretrained(
|
| 6 |
+
# repo_id="ID2223JR/gguf_model",
|
| 7 |
+
# filename="unsloth.Q4_K_M.gguf",
|
| 8 |
+
# )
|
| 9 |
+
|
| 10 |
llm = Llama.from_pretrained(
|
| 11 |
+
repo_id="ID2223JR/gguf_model_q8",
|
| 12 |
+
filename="unsloth.Q8_0.gguf",
|
| 13 |
)
|
| 14 |
|
| 15 |
|
|
|
|
| 61 |
content = ""
|
| 62 |
|
| 63 |
for partial_response in response:
|
| 64 |
+
|
| 65 |
+
content += partial_response["choices"][0]["delta"].get("content", "")
|
| 66 |
+
|
| 67 |
+
if content:
|
| 68 |
+
yield content
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
ingredients_list.clear() # Reset list after generation
|
| 71 |
|