Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -9,7 +9,6 @@ OLLAMA = os.path.expanduser("~/ollama")
|
|
9 |
process = None
|
10 |
OLLAMA_SERVICE_THREAD = None
|
11 |
|
12 |
-
|
13 |
if not os.path.exists(OLLAMA):
|
14 |
subprocess.run("curl -L https://ollama.com/download/ollama-linux-amd64 -o ~/ollama", shell=True)
|
15 |
os.chmod(OLLAMA, 0o755)
|
@@ -27,7 +26,6 @@ def terminate():
|
|
27 |
OLLAMA_SERVICE_THREAD.join()
|
28 |
process = None
|
29 |
OLLAMA_SERVICE_THREAD = None
|
30 |
-
os.system("systemctl stop ollama.service")
|
31 |
return "Ollama service stopped."
|
32 |
|
33 |
# Uncomment and modify the model to what you want locally
|
@@ -94,7 +92,7 @@ def ollama_func(command):
|
|
94 |
else:
|
95 |
return "No supported command."
|
96 |
|
97 |
-
|
98 |
def launch():
|
99 |
global OLLAMA_SERVICE_THREAD
|
100 |
OLLAMA_SERVICE_THREAD = threading.Thread(target=ollama_service_thread)
|
@@ -102,8 +100,41 @@ def launch():
|
|
102 |
print("Giving ollama serve a moment")
|
103 |
time.sleep(10)
|
104 |
|
|
|
105 |
def stream_chat(message: str, history: list, model: str, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
|
106 |
print(f"message: {message}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
if message.startswith("/"):
|
108 |
resp = ollama_func(message)
|
109 |
yield resp
|
@@ -111,38 +142,19 @@ def stream_chat(message: str, history: list, model: str, temperature: float, max
|
|
111 |
if not INIT_SIGN:
|
112 |
yield "Please initialize Ollama"
|
113 |
else:
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
response = client.chat(
|
126 |
-
model=model,
|
127 |
-
messages=conversation,
|
128 |
-
stream=True,
|
129 |
-
options={
|
130 |
-
'num_predict': max_new_tokens,
|
131 |
-
'temperature': temperature,
|
132 |
-
'top_p': top_p,
|
133 |
-
'top_k': top_k,
|
134 |
-
'repeat_penalty': penalty,
|
135 |
-
'low_vram': True,
|
136 |
-
},
|
137 |
)
|
138 |
|
139 |
-
terminate()
|
140 |
-
|
141 |
-
buffer = ""
|
142 |
-
for chunk in response:
|
143 |
-
buffer += chunk["message"]["content"]
|
144 |
-
yield buffer
|
145 |
-
|
146 |
|
147 |
chatbot = gr.Chatbot(height=600, placeholder=DESCRIPTION)
|
148 |
|
@@ -150,7 +162,7 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
|
|
150 |
gr.HTML(TITLE)
|
151 |
gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
|
152 |
gr.ChatInterface(
|
153 |
-
fn=
|
154 |
chatbot=chatbot,
|
155 |
fill_height=True,
|
156 |
additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
|
|
|
9 |
process = None
|
10 |
OLLAMA_SERVICE_THREAD = None
|
11 |
|
|
|
12 |
if not os.path.exists(OLLAMA):
|
13 |
subprocess.run("curl -L https://ollama.com/download/ollama-linux-amd64 -o ~/ollama", shell=True)
|
14 |
os.chmod(OLLAMA, 0o755)
|
|
|
26 |
OLLAMA_SERVICE_THREAD.join()
|
27 |
process = None
|
28 |
OLLAMA_SERVICE_THREAD = None
|
|
|
29 |
return "Ollama service stopped."
|
30 |
|
31 |
# Uncomment and modify the model to what you want locally
|
|
|
92 |
else:
|
93 |
return "No supported command."
|
94 |
|
95 |
+
|
96 |
def launch():
|
97 |
global OLLAMA_SERVICE_THREAD
|
98 |
OLLAMA_SERVICE_THREAD = threading.Thread(target=ollama_service_thread)
|
|
|
100 |
print("Giving ollama serve a moment")
|
101 |
time.sleep(10)
|
102 |
|
103 |
+
@spaces.GPU()
|
104 |
def stream_chat(message: str, history: list, model: str, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
|
105 |
print(f"message: {message}")
|
106 |
+
conversation = []
|
107 |
+
for prompt, answer in history:
|
108 |
+
conversation.extend([
|
109 |
+
{"role": "user", "content": prompt},
|
110 |
+
{"role": "assistant", "content": answer},
|
111 |
+
])
|
112 |
+
conversation.append({"role": "user", "content": message})
|
113 |
+
|
114 |
+
print(f"Conversation is -\n{conversation}")
|
115 |
+
|
116 |
+
response = client.chat(
|
117 |
+
model=model,
|
118 |
+
messages=conversation,
|
119 |
+
stream=True,
|
120 |
+
options={
|
121 |
+
'num_predict': max_new_tokens,
|
122 |
+
'temperature': temperature,
|
123 |
+
'top_p': top_p,
|
124 |
+
'top_k': top_k,
|
125 |
+
'repeat_penalty': penalty,
|
126 |
+
'low_vram': True,
|
127 |
+
"keep_alive": 60s,
|
128 |
+
},
|
129 |
+
)
|
130 |
+
|
131 |
+
buffer = ""
|
132 |
+
for chunk in response:
|
133 |
+
buffer += chunk["message"]["content"]
|
134 |
+
yield buffer
|
135 |
+
|
136 |
+
|
137 |
+
def main(message: str, history: list, model: str, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
|
138 |
if message.startswith("/"):
|
139 |
resp = ollama_func(message)
|
140 |
yield resp
|
|
|
142 |
if not INIT_SIGN:
|
143 |
yield "Please initialize Ollama"
|
144 |
else:
|
145 |
+
if process:
|
146 |
+
launch()
|
147 |
+
stream_chat(
|
148 |
+
message,
|
149 |
+
history,
|
150 |
+
model,
|
151 |
+
temperature,
|
152 |
+
max_new_tokens,
|
153 |
+
top_p,
|
154 |
+
top_k,
|
155 |
+
penalty
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
156 |
)
|
157 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
158 |
|
159 |
chatbot = gr.Chatbot(height=600, placeholder=DESCRIPTION)
|
160 |
|
|
|
162 |
gr.HTML(TITLE)
|
163 |
gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
|
164 |
gr.ChatInterface(
|
165 |
+
fn=main,
|
166 |
chatbot=chatbot,
|
167 |
fill_height=True,
|
168 |
additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
|