Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -5,21 +5,20 @@ import os
|
|
5 |
import threading
|
6 |
import time
|
7 |
|
8 |
-
repo_id = "
|
9 |
-
filename = "
|
10 |
|
11 |
CONTEXT_SIZE = 2048
|
12 |
N_THREADS = min(os.cpu_count(), 4)
|
13 |
|
14 |
llm = None
|
15 |
model_loaded = False
|
16 |
-
loading_progress = 0
|
17 |
|
18 |
-
def load_model():
|
19 |
-
global llm, model_loaded
|
20 |
-
|
21 |
model_path = hf_hub_download(repo_id=repo_id, filename=filename)
|
22 |
-
|
23 |
llm = Llama(
|
24 |
model_path=model_path,
|
25 |
n_threads=N_THREADS,
|
@@ -27,12 +26,9 @@ def load_model():
|
|
27 |
verbose=False,
|
28 |
n_ctx=CONTEXT_SIZE,
|
29 |
)
|
30 |
-
|
31 |
model_loaded = True
|
32 |
-
|
33 |
-
def get_loading_status():
|
34 |
-
global loading_progress
|
35 |
-
return loading_progress, f"モデル読み込み進捗: {loading_progress}%"
|
36 |
|
37 |
def get_llama_response(prompt):
|
38 |
global llm, model_loaded
|
@@ -57,13 +53,12 @@ def greet(prompt, intensity):
|
|
57 |
|
58 |
return full_response + "!" * int(intensity)
|
59 |
|
60 |
-
# モデルを非同期で読み込む
|
61 |
-
threading.Thread(target=load_model, daemon=True).start()
|
62 |
-
|
63 |
with gr.Blocks() as demo:
|
64 |
gr.Markdown("# Llama.cpp-python-sample (Streaming)")
|
65 |
gr.Markdown(f"MODEL: {filename} from {repo_id}")
|
66 |
|
|
|
|
|
67 |
with gr.Row():
|
68 |
input_text = gr.Textbox(label="Enter your prompt")
|
69 |
intensity = gr.Slider(minimum=0, maximum=10, step=1, label="Intensity")
|
@@ -71,12 +66,8 @@ with gr.Blocks() as demo:
|
|
71 |
output_text = gr.Textbox(label="Generated Response")
|
72 |
submit_button = gr.Button("Submit")
|
73 |
|
74 |
-
loading_progress = gr.Progress()
|
75 |
-
loading_status = gr.Textbox(label="Loading Status")
|
76 |
-
|
77 |
submit_button.click(fn=greet, inputs=[input_text, intensity], outputs=output_text)
|
78 |
-
demo.load(fn=
|
79 |
|
80 |
demo.queue()
|
81 |
-
|
82 |
-
demo.launch()
|
|
|
5 |
import threading
|
6 |
import time
|
7 |
|
8 |
+
repo_id = "ineair/llm-jp-3-3.7b-instruct-EZO-Humanities-gguf"
|
9 |
+
filename = "llm-jp-3-3.7b-instruct-EZO-Humanities-f16.gguf"
|
10 |
|
11 |
CONTEXT_SIZE = 2048
|
12 |
N_THREADS = min(os.cpu_count(), 4)
|
13 |
|
14 |
llm = None
|
15 |
model_loaded = False
|
|
|
16 |
|
17 |
+
def load_model(progress=gr.Progress()):
|
18 |
+
global llm, model_loaded
|
19 |
+
progress(0, desc="モデルのダウンロードを開始")
|
20 |
model_path = hf_hub_download(repo_id=repo_id, filename=filename)
|
21 |
+
progress(0.5, desc="モデルをメモリに読み込み中")
|
22 |
llm = Llama(
|
23 |
model_path=model_path,
|
24 |
n_threads=N_THREADS,
|
|
|
26 |
verbose=False,
|
27 |
n_ctx=CONTEXT_SIZE,
|
28 |
)
|
29 |
+
progress(1, desc="モデルの読み込み完了")
|
30 |
model_loaded = True
|
31 |
+
return "モデルの読み込みが完了しました。"
|
|
|
|
|
|
|
32 |
|
33 |
def get_llama_response(prompt):
|
34 |
global llm, model_loaded
|
|
|
53 |
|
54 |
return full_response + "!" * int(intensity)
|
55 |
|
|
|
|
|
|
|
56 |
with gr.Blocks() as demo:
|
57 |
gr.Markdown("# Llama.cpp-python-sample (Streaming)")
|
58 |
gr.Markdown(f"MODEL: {filename} from {repo_id}")
|
59 |
|
60 |
+
loading_status = gr.Textbox(label="Loading Status")
|
61 |
+
|
62 |
with gr.Row():
|
63 |
input_text = gr.Textbox(label="Enter your prompt")
|
64 |
intensity = gr.Slider(minimum=0, maximum=10, step=1, label="Intensity")
|
|
|
66 |
output_text = gr.Textbox(label="Generated Response")
|
67 |
submit_button = gr.Button("Submit")
|
68 |
|
|
|
|
|
|
|
69 |
submit_button.click(fn=greet, inputs=[input_text, intensity], outputs=output_text)
|
70 |
+
demo.load(fn=load_model, outputs=loading_status)
|
71 |
|
72 |
demo.queue()
|
73 |
+
demo.launch()
|
|