Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -2,28 +2,52 @@ import gradio as gr
|
|
2 |
from huggingface_hub import hf_hub_download
|
3 |
from llama_cpp import Llama
|
4 |
import os
|
|
|
|
|
5 |
|
6 |
-
|
7 |
-
|
8 |
-
filename = "llm-jp-3-3.7b-instruct-EZO-Humanities-f16.gguf"
|
9 |
|
10 |
-
|
11 |
-
|
12 |
|
13 |
-
|
|
|
|
|
14 |
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
def get_llama_response(prompt):
|
24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
def greet(prompt, intensity):
|
|
|
|
|
|
|
|
|
27 |
full_response = ""
|
28 |
for output in get_llama_response(prompt):
|
29 |
if len(output['choices']) > 0:
|
@@ -33,17 +57,25 @@ def greet(prompt, intensity):
|
|
33 |
|
34 |
return full_response + "!" * int(intensity)
|
35 |
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
demo.queue()
|
49 |
-
|
|
|
|
2 |
from huggingface_hub import hf_hub_download
|
3 |
from llama_cpp import Llama
|
4 |
import os
|
5 |
+
import threading
|
6 |
+
import time
|
7 |
|
8 |
+
repo_id = "mmnga/ELYZA-japanese-Llama-2-7b-instruct-gguf"
|
9 |
+
filename = "ELYZA-japanese-Llama-2-7b-instruct-q4_K_M.gguf"
|
|
|
10 |
|
11 |
+
CONTEXT_SIZE = 2048
|
12 |
+
N_THREADS = min(os.cpu_count(), 4)
|
13 |
|
14 |
+
llm = None
|
15 |
+
model_loaded = False
|
16 |
+
loading_progress = 0
|
17 |
|
18 |
+
def load_model():
|
19 |
+
global llm, model_loaded, loading_progress
|
20 |
+
loading_progress = 0
|
21 |
+
model_path = hf_hub_download(repo_id=repo_id, filename=filename)
|
22 |
+
loading_progress = 50
|
23 |
+
llm = Llama(
|
24 |
+
model_path=model_path,
|
25 |
+
n_threads=N_THREADS,
|
26 |
+
n_batch=32,
|
27 |
+
verbose=False,
|
28 |
+
n_ctx=CONTEXT_SIZE,
|
29 |
+
)
|
30 |
+
loading_progress = 100
|
31 |
+
model_loaded = True
|
32 |
+
|
33 |
+
def get_loading_status():
|
34 |
+
global loading_progress
|
35 |
+
return loading_progress
|
36 |
|
37 |
def get_llama_response(prompt):
|
38 |
+
global llm, model_loaded
|
39 |
+
if not model_loaded:
|
40 |
+
return [{"choices": [{"text": "モデルを読み込んでいます。しばらくお待ちください..."}]}]
|
41 |
+
try:
|
42 |
+
return llm(prompt, max_tokens=1024, temperature=0.7, top_p=0.95, repeat_penalty=1.1, stream=True)
|
43 |
+
except Exception as e:
|
44 |
+
return [{"choices": [{"text": f"エラーが発生しました: {str(e)}"}]}]
|
45 |
|
46 |
def greet(prompt, intensity):
|
47 |
+
global model_loaded
|
48 |
+
if not model_loaded:
|
49 |
+
return "モデルを読み込んでいます。しばらくお待ちください..."
|
50 |
+
|
51 |
full_response = ""
|
52 |
for output in get_llama_response(prompt):
|
53 |
if len(output['choices']) > 0:
|
|
|
57 |
|
58 |
return full_response + "!" * int(intensity)
|
59 |
|
60 |
+
# モデルを非同期で読み込む
|
61 |
+
threading.Thread(target=load_model, daemon=True).start()
|
62 |
+
|
63 |
+
with gr.Blocks() as demo:
|
64 |
+
gr.Markdown("# Llama.cpp-python-sample (Streaming)")
|
65 |
+
gr.Markdown(f"MODEL: {filename} from {repo_id}")
|
66 |
+
|
67 |
+
with gr.Row():
|
68 |
+
input_text = gr.Textbox(label="Enter your prompt")
|
69 |
+
intensity = gr.Slider(minimum=0, maximum=10, step=1, label="Intensity")
|
70 |
+
|
71 |
+
output_text = gr.Textbox(label="Generated Response")
|
72 |
+
submit_button = gr.Button("Submit")
|
73 |
+
|
74 |
+
loading_bar = gr.progressbar(label="Model Loading Progress", max=100)
|
75 |
+
|
76 |
+
submit_button.click(fn=greet, inputs=[input_text, intensity], outputs=output_text)
|
77 |
+
demo.load(fn=get_loading_status, outputs=loading_bar, every=1)
|
78 |
|
79 |
demo.queue()
|
80 |
+
if __name__ == "__main__":
|
81 |
+
demo.launch()
|