sawac commited on
Commit
84e2d22
·
verified ·
1 Parent(s): a7cb98d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -21
app.py CHANGED
@@ -5,21 +5,20 @@ import os
5
  import threading
6
  import time
7
 
8
- repo_id = "mmnga/ELYZA-japanese-Llama-2-7b-instruct-gguf"
9
- filename = "ELYZA-japanese-Llama-2-7b-instruct-q4_K_M.gguf"
10
 
11
  CONTEXT_SIZE = 2048
12
  N_THREADS = min(os.cpu_count(), 4)
13
 
14
  llm = None
15
  model_loaded = False
16
- loading_progress = 0
17
 
18
- def load_model():
19
- global llm, model_loaded, loading_progress
20
- loading_progress = 0
21
  model_path = hf_hub_download(repo_id=repo_id, filename=filename)
22
- loading_progress = 50
23
  llm = Llama(
24
  model_path=model_path,
25
  n_threads=N_THREADS,
@@ -27,12 +26,9 @@ def load_model():
27
  verbose=False,
28
  n_ctx=CONTEXT_SIZE,
29
  )
30
- loading_progress = 100
31
  model_loaded = True
32
-
33
- def get_loading_status():
34
- global loading_progress
35
- return loading_progress, f"モデル読み込み進捗: {loading_progress}%"
36
 
37
  def get_llama_response(prompt):
38
  global llm, model_loaded
@@ -57,13 +53,12 @@ def greet(prompt, intensity):
57
 
58
  return full_response + "!" * int(intensity)
59
 
60
- # モデルを非同期で読み込む
61
- threading.Thread(target=load_model, daemon=True).start()
62
-
63
  with gr.Blocks() as demo:
64
  gr.Markdown("# Llama.cpp-python-sample (Streaming)")
65
  gr.Markdown(f"MODEL: {filename} from {repo_id}")
66
 
 
 
67
  with gr.Row():
68
  input_text = gr.Textbox(label="Enter your prompt")
69
  intensity = gr.Slider(minimum=0, maximum=10, step=1, label="Intensity")
@@ -71,12 +66,8 @@ with gr.Blocks() as demo:
71
  output_text = gr.Textbox(label="Generated Response")
72
  submit_button = gr.Button("Submit")
73
 
74
- loading_progress = gr.Progress()
75
- loading_status = gr.Textbox(label="Loading Status")
76
-
77
  submit_button.click(fn=greet, inputs=[input_text, intensity], outputs=output_text)
78
- demo.load(fn=get_loading_status, outputs=[loading_progress, loading_status], every=1)
79
 
80
  demo.queue()
81
- if __name__ == "__main__":
82
- demo.launch()
 
5
  import threading
6
  import time
7
 
8
+ repo_id = "ineair/llm-jp-3-3.7b-instruct-EZO-Humanities-gguf"
9
+ filename = "llm-jp-3-3.7b-instruct-EZO-Humanities-f16.gguf"
10
 
11
  CONTEXT_SIZE = 2048
12
  N_THREADS = min(os.cpu_count(), 4)
13
 
14
  llm = None
15
  model_loaded = False
 
16
 
17
+ def load_model(progress=gr.Progress()):
18
+ global llm, model_loaded
19
+ progress(0, desc="モデルのダウンロードを開始")
20
  model_path = hf_hub_download(repo_id=repo_id, filename=filename)
21
+ progress(0.5, desc="モデルをメモリに読み込み中")
22
  llm = Llama(
23
  model_path=model_path,
24
  n_threads=N_THREADS,
 
26
  verbose=False,
27
  n_ctx=CONTEXT_SIZE,
28
  )
29
+ progress(1, desc="モデルの読み込み完了")
30
  model_loaded = True
31
+ return "モデルの読み込みが完了しました。"
 
 
 
32
 
33
  def get_llama_response(prompt):
34
  global llm, model_loaded
 
53
 
54
  return full_response + "!" * int(intensity)
55
 
 
 
 
56
  with gr.Blocks() as demo:
57
  gr.Markdown("# Llama.cpp-python-sample (Streaming)")
58
  gr.Markdown(f"MODEL: {filename} from {repo_id}")
59
 
60
+ loading_status = gr.Textbox(label="Loading Status")
61
+
62
  with gr.Row():
63
  input_text = gr.Textbox(label="Enter your prompt")
64
  intensity = gr.Slider(minimum=0, maximum=10, step=1, label="Intensity")
 
66
  output_text = gr.Textbox(label="Generated Response")
67
  submit_button = gr.Button("Submit")
68
 
 
 
 
69
  submit_button.click(fn=greet, inputs=[input_text, intensity], outputs=output_text)
70
+ demo.load(fn=load_model, outputs=loading_status)
71
 
72
  demo.queue()
73
+ demo.launch()