vilarin commited on
Commit
fc09eb0
·
verified ·
1 Parent(s): 92c729c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -34
app.py CHANGED
@@ -9,7 +9,6 @@ OLLAMA = os.path.expanduser("~/ollama")
9
  process = None
10
  OLLAMA_SERVICE_THREAD = None
11
 
12
-
13
  if not os.path.exists(OLLAMA):
14
  subprocess.run("curl -L https://ollama.com/download/ollama-linux-amd64 -o ~/ollama", shell=True)
15
  os.chmod(OLLAMA, 0o755)
@@ -27,7 +26,6 @@ def terminate():
27
  OLLAMA_SERVICE_THREAD.join()
28
  process = None
29
  OLLAMA_SERVICE_THREAD = None
30
- os.system("systemctl stop ollama.service")
31
  return "Ollama service stopped."
32
 
33
  # Uncomment and modify the model to what you want locally
@@ -94,7 +92,7 @@ def ollama_func(command):
94
  else:
95
  return "No supported command."
96
 
97
- @spaces.GPU()
98
  def launch():
99
  global OLLAMA_SERVICE_THREAD
100
  OLLAMA_SERVICE_THREAD = threading.Thread(target=ollama_service_thread)
@@ -102,8 +100,41 @@ def launch():
102
  print("Giving ollama serve a moment")
103
  time.sleep(10)
104
 
 
105
  def stream_chat(message: str, history: list, model: str, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
106
  print(f"message: {message}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  if message.startswith("/"):
108
  resp = ollama_func(message)
109
  yield resp
@@ -111,38 +142,19 @@ def stream_chat(message: str, history: list, model: str, temperature: float, max
111
  if not INIT_SIGN:
112
  yield "Please initialize Ollama"
113
  else:
114
- launch()
115
- conversation = []
116
- for prompt, answer in history:
117
- conversation.extend([
118
- {"role": "user", "content": prompt},
119
- {"role": "assistant", "content": answer},
120
- ])
121
- conversation.append({"role": "user", "content": message})
122
-
123
- print(f"Conversation is -\n{conversation}")
124
-
125
- response = client.chat(
126
- model=model,
127
- messages=conversation,
128
- stream=True,
129
- options={
130
- 'num_predict': max_new_tokens,
131
- 'temperature': temperature,
132
- 'top_p': top_p,
133
- 'top_k': top_k,
134
- 'repeat_penalty': penalty,
135
- 'low_vram': True,
136
- },
137
  )
138
 
139
- terminate()
140
-
141
- buffer = ""
142
- for chunk in response:
143
- buffer += chunk["message"]["content"]
144
- yield buffer
145
-
146
 
147
  chatbot = gr.Chatbot(height=600, placeholder=DESCRIPTION)
148
 
@@ -150,7 +162,7 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
150
  gr.HTML(TITLE)
151
  gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
152
  gr.ChatInterface(
153
- fn=stream_chat,
154
  chatbot=chatbot,
155
  fill_height=True,
156
  additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
 
9
  process = None
10
  OLLAMA_SERVICE_THREAD = None
11
 
 
12
  if not os.path.exists(OLLAMA):
13
  subprocess.run("curl -L https://ollama.com/download/ollama-linux-amd64 -o ~/ollama", shell=True)
14
  os.chmod(OLLAMA, 0o755)
 
26
  OLLAMA_SERVICE_THREAD.join()
27
  process = None
28
  OLLAMA_SERVICE_THREAD = None
 
29
  return "Ollama service stopped."
30
 
31
  # Uncomment and modify the model to what you want locally
 
92
  else:
93
  return "No supported command."
94
 
95
+
96
  def launch():
97
  global OLLAMA_SERVICE_THREAD
98
  OLLAMA_SERVICE_THREAD = threading.Thread(target=ollama_service_thread)
 
100
  print("Giving ollama serve a moment")
101
  time.sleep(10)
102
 
103
+ @spaces.GPU()
104
  def stream_chat(message: str, history: list, model: str, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
105
  print(f"message: {message}")
106
+ conversation = []
107
+ for prompt, answer in history:
108
+ conversation.extend([
109
+ {"role": "user", "content": prompt},
110
+ {"role": "assistant", "content": answer},
111
+ ])
112
+ conversation.append({"role": "user", "content": message})
113
+
114
+ print(f"Conversation is -\n{conversation}")
115
+
116
+ response = client.chat(
117
+ model=model,
118
+ messages=conversation,
119
+ stream=True,
120
+ options={
121
+ 'num_predict': max_new_tokens,
122
+ 'temperature': temperature,
123
+ 'top_p': top_p,
124
+ 'top_k': top_k,
125
+ 'repeat_penalty': penalty,
126
+ 'low_vram': True,
127
+ "keep_alive": 60s,
128
+ },
129
+ )
130
+
131
+ buffer = ""
132
+ for chunk in response:
133
+ buffer += chunk["message"]["content"]
134
+ yield buffer
135
+
136
+
137
+ def main(message: str, history: list, model: str, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
138
  if message.startswith("/"):
139
  resp = ollama_func(message)
140
  yield resp
 
142
  if not INIT_SIGN:
143
  yield "Please initialize Ollama"
144
  else:
145
+ if process:
146
+ launch()
147
+ stream_chat(
148
+ message,
149
+ history,
150
+ model,
151
+ temperature,
152
+ max_new_tokens,
153
+ top_p,
154
+ top_k,
155
+ penalty
 
 
 
 
 
 
 
 
 
 
 
 
156
  )
157
 
 
 
 
 
 
 
 
158
 
159
  chatbot = gr.Chatbot(height=600, placeholder=DESCRIPTION)
160
 
 
162
  gr.HTML(TITLE)
163
  gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
164
  gr.ChatInterface(
165
+ fn=main,
166
  chatbot=chatbot,
167
  fill_height=True,
168
  additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),