Flux.1-Fill-dev

Running on Zero

App Files Files Community

vilarin commited on Jul 9, 2024

Commit

3a15f63

verified ·

1 Parent(s): 85c845b

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -40

app.py CHANGED Viewed

@@ -92,18 +92,15 @@ def ollama_func(command):
         return "Running..."
     else:
         return "No supported command."
 def launch():
     global OLLAMA_SERVICE_THREAD
     OLLAMA_SERVICE_THREAD = threading.Thread(target=ollama_service_thread)
     OLLAMA_SERVICE_THREAD.start()
-    print("Giving ollama serve a moment")
-    time.sleep(10)
-@spaces.GPU()
 async def stream_chat(message: str, history: list, model: str, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
-    if not process:
-        launch()
     print(f"message: {message}")
     conversation = []
     for prompt, answer in history:
@@ -111,28 +108,10 @@ async def stream_chat(message: str, history: list, model: str, temperature: floa
             {"role": "user", "content": prompt},
             {"role": "assistant", "content": answer},
         ])
-    conversation.append({"role": "user", "content": message})
     print(f"Conversation is -\n{conversation}")
-    async for part in await client.chat(
-        model=model,
-        stream=True,
-        messages=conversation,
-        keep_alive="60s",
-        options={
-            'num_predict': max_new_tokens,
-            'temperature': temperature,
-            'top_p': top_p,
-            'top_k': top_k,
-            'repeat_penalty': penalty,
-            'low_vram': True,
-        },
-        ):
-            yield part['message']['content']
-async def main(message: str, history: list, model: str, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
     if message.startswith("/"):
         resp = ollama_func(message)
         yield resp
@@ -140,19 +119,25 @@ async def main(message: str, history: list, model: str, temperature: float, max_
         if not INIT_SIGN:
             yield "Please initialize Ollama"
         else:
-            async for response in stream_chat(
-                message,
-                history,
-                model,
-                temperature,
-                max_new_tokens,
-                top_p,
-                top_k,
-                penalty
-            ):
-                yield response
 chatbot = gr.Chatbot(height=600, placeholder=DESCRIPTION)

         return "Running..."
     else:
         return "No supported command."
+@spaces.GPU()
 def launch():
     global OLLAMA_SERVICE_THREAD
     OLLAMA_SERVICE_THREAD = threading.Thread(target=ollama_service_thread)
     OLLAMA_SERVICE_THREAD.start()
 async def stream_chat(message: str, history: list, model: str, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
     print(f"message: {message}")
     conversation = []
     for prompt, answer in history:
             {"role": "user", "content": prompt},
             {"role": "assistant", "content": answer},
         ])
+     conversation.append({"role": "user", "content": message})
     print(f"Conversation is -\n{conversation}")
     if message.startswith("/"):
         resp = ollama_func(message)
         yield resp
         if not INIT_SIGN:
             yield "Please initialize Ollama"
         else:
+            if not process:
+                launch()
+                print("Giving ollama serve a moment")
+                time.sleep(10)
+            async for part in await client.chat(
+                model=model,
+                stream=True,
+                messages=conversation,
+                keep_alive="60s",
+                options={
+                    'num_predict': max_new_tokens,
+                    'temperature': temperature,
+                    'top_p': top_p,
+                    'top_k': top_k,
+                    'repeat_penalty': penalty,
+                    'low_vram': True,
+                    },
+                ):
+                    yield part['message']['content']
 chatbot = gr.Chatbot(height=600, placeholder=DESCRIPTION)