vilarin commited on
Commit
3a15f63
·
verified ·
1 Parent(s): 85c845b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -40
app.py CHANGED
@@ -92,18 +92,15 @@ def ollama_func(command):
92
  return "Running..."
93
  else:
94
  return "No supported command."
95
-
 
96
  def launch():
97
  global OLLAMA_SERVICE_THREAD
98
  OLLAMA_SERVICE_THREAD = threading.Thread(target=ollama_service_thread)
99
  OLLAMA_SERVICE_THREAD.start()
100
- print("Giving ollama serve a moment")
101
- time.sleep(10)
102
 
103
- @spaces.GPU()
104
  async def stream_chat(message: str, history: list, model: str, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
105
- if not process:
106
- launch()
107
  print(f"message: {message}")
108
  conversation = []
109
  for prompt, answer in history:
@@ -111,28 +108,10 @@ async def stream_chat(message: str, history: list, model: str, temperature: floa
111
  {"role": "user", "content": prompt},
112
  {"role": "assistant", "content": answer},
113
  ])
114
- conversation.append({"role": "user", "content": message})
115
-
116
  print(f"Conversation is -\n{conversation}")
117
-
118
- async for part in await client.chat(
119
- model=model,
120
- stream=True,
121
- messages=conversation,
122
- keep_alive="60s",
123
- options={
124
- 'num_predict': max_new_tokens,
125
- 'temperature': temperature,
126
- 'top_p': top_p,
127
- 'top_k': top_k,
128
- 'repeat_penalty': penalty,
129
- 'low_vram': True,
130
- },
131
- ):
132
- yield part['message']['content']
133
-
134
-
135
- async def main(message: str, history: list, model: str, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
136
  if message.startswith("/"):
137
  resp = ollama_func(message)
138
  yield resp
@@ -140,19 +119,25 @@ async def main(message: str, history: list, model: str, temperature: float, max_
140
  if not INIT_SIGN:
141
  yield "Please initialize Ollama"
142
  else:
143
- async for response in stream_chat(
144
- message,
145
- history,
146
- model,
147
- temperature,
148
- max_new_tokens,
149
- top_p,
150
- top_k,
151
- penalty
152
- ):
153
- yield response
154
-
155
-
 
 
 
 
 
 
156
 
157
  chatbot = gr.Chatbot(height=600, placeholder=DESCRIPTION)
158
 
 
92
  return "Running..."
93
  else:
94
  return "No supported command."
95
+
96
+ @spaces.GPU()
97
  def launch():
98
  global OLLAMA_SERVICE_THREAD
99
  OLLAMA_SERVICE_THREAD = threading.Thread(target=ollama_service_thread)
100
  OLLAMA_SERVICE_THREAD.start()
 
 
101
 
102
+
103
  async def stream_chat(message: str, history: list, model: str, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
 
 
104
  print(f"message: {message}")
105
  conversation = []
106
  for prompt, answer in history:
 
108
  {"role": "user", "content": prompt},
109
  {"role": "assistant", "content": answer},
110
  ])
111
+ conversation.append({"role": "user", "content": message})
112
+
113
  print(f"Conversation is -\n{conversation}")
114
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  if message.startswith("/"):
116
  resp = ollama_func(message)
117
  yield resp
 
119
  if not INIT_SIGN:
120
  yield "Please initialize Ollama"
121
  else:
122
+ if not process:
123
+ launch()
124
+ print("Giving ollama serve a moment")
125
+ time.sleep(10)
126
+ async for part in await client.chat(
127
+ model=model,
128
+ stream=True,
129
+ messages=conversation,
130
+ keep_alive="60s",
131
+ options={
132
+ 'num_predict': max_new_tokens,
133
+ 'temperature': temperature,
134
+ 'top_p': top_p,
135
+ 'top_k': top_k,
136
+ 'repeat_penalty': penalty,
137
+ 'low_vram': True,
138
+ },
139
+ ):
140
+ yield part['message']['content']
141
 
142
  chatbot = gr.Chatbot(height=600, placeholder=DESCRIPTION)
143