Flux.1-Fill-dev

Running on Zero

File size: 5,565 Bytes

cc5b602
6f619d7
 
 
7eeaa8f
6f619d7
 
 
 
 
 
 
 
7eeaa8f
 
 
 
6f619d7
7eeaa8f
6f619d7
7eeaa8f
 
 
 
417f21a
a9fe0e7
 
7eeaa8f
6f619d7
7eeaa8f
85585d6
e6367a7
51a7d9e
29c0142
86bea01
51a7d9e
 
e6367a7
86bea01
51a7d9e
bd34f0b
 
86bea01
bd34f0b
7eeaa8f
 
 
bd34f0b
 
 
51a7d9e
 
 
bd34f0b
 
 
 
 
 
 
51a7d9e
 
7eeaa8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86bea01
7eeaa8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51a7d9e
82b38de
51a7d9e
 
 
 
 
 
 
 
7eeaa8f
 
 
 
9c72529
51a7d9e
 
 
 
 
 
 
 
 
 
82b38de
51a7d9e
 
3569c20
51a7d9e
 
bd34f0b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51a7d9e

import os
import threading
import time
import subprocess
import spaces

OLLAMA = os.path.expanduser("~/ollama")

if not os.path.exists(OLLAMA):
    subprocess.run("curl -L https://ollama.com/download/ollama-linux-amd64 -o ~/ollama", shell=True)
    os.chmod(OLLAMA, 0o755)

def ollama_service_thread():
    global process
    process = subprocess.Popen("~/ollama serve", shell=True)
    process.wait()
    
OLLAMA_SERVICE_THREAD = threading.Thread(target=ollama_service_thread)
# OLLAMA_SERVICE_THREAD.start()

def terminate():
    if process:
        os.killpg(os.getpgid(process.pid), signal.SIGTERM)
    OLLAMA_SERVICE_THREAD.join()

# Uncomment and modify the model to what you want locally
# model = "moondream" 
# model = os.environ.get("MODEL")

# subprocess.run(f"~/ollama pull {model}", shell=True)

import copy
import gradio as gr
from ollama import Client
client = Client(host='http://localhost:11434', timeout=120)

HF_TOKEN = os.environ.get("HF_TOKEN", None)

TITLE = "<h1><center>ollama-Chat</center></h1>"

DESCRIPTION = f"""
<center>
<p>Feel free to test models with ollama.
<br>
Input <em>/pull model_name</em> to pull model.
<br>
Input <em>/list</em> to get model list.
</p>
</center>
"""

CSS = """
.duplicate-button {
    margin: auto !important;
    color: white !important;
    background: black !important;
    border-radius: 100vh !important;
}
h3 {
    text-align: center;
}
"""
INIT_SIGN = ""

def init():
    OLLAMA_SERVICE_THREAD.start()
    print("Giving ollama serve a moment")
    time.sleep(10)
    INIT_SIGN = "FINISHED"

def ollama_func(command):
    c1, c2 = command.split(" ")
    function_map = {
        "/init": init(),
        "/pull": ollama.pull(c2),
        "/list": ollama.list(),
        "/bye": terminate(),
    }
    if c1 in function_map:
        function_map[c1]
    else:
        print("No supported command.")
        
@spaces.GPU()
def launch():
    OLLAMA_SERVICE_THREAD.start()
    print("Giving ollama serve a moment")
    time.sleep(10)

def stream_chat(message: str, history: list, model: str, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
    
    if message.startswith("/"):
        ollama_func(message)
    else:
        if INIT_SIGN:
            return "Please Enter /init to initialize Ollama"
        else:
            launch()
            conversation = []
            for prompt, answer in history:
                conversation.extend([
                    {"role": "user", "content": prompt}, 
                    {"role": "assistant", "content": answer},
                ])
            conversation.append({"role": "user", "content": message})
        
            print(f"Conversation is -\n{conversation}")
               
            response = client.chat(
                model=model,
                messages=conversation,
                stream=True,
                options={
                    'num_predict': max_new_tokens,
                    'temperature': temperature,
                    'top_p': top_p,
                    'top_k': top_k,
                    'repeat_penalty': penalty,
                    'low_vram': True,
                },
            )

            terminate()
            
            buffer = ""
            for chunk in response:
                buffer += chunk["message"]["content"]
                yield buffer


chatbot = gr.Chatbot(height=600, placeholder=DESCRIPTION)

with gr.Blocks(css=CSS, theme="soft") as demo:
    gr.HTML(TITLE)
    gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
    gr.ChatInterface(
        fn=stream_chat,
        chatbot=chatbot,
        fill_height=True,
        additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
        additional_inputs=[
            gr.Textbox(
                value="qwen2:0.5b",
                label="Model",
                render=False,
            ),
            gr.Slider(
                minimum=0,
                maximum=1,
                step=0.1,
                value=0.8,
                label="Temperature",
                render=False,
            ),
            gr.Slider(
                minimum=128,
                maximum=2048,
                step=1,
                value=1024,
                label="Max New Tokens",
                render=False,
            ),
            gr.Slider(
                minimum=0.0,
                maximum=1.0,
                step=0.1,
                value=0.8,
                label="top_p",
                render=False,
            ),
            gr.Slider(
                minimum=1,
                maximum=20,
                step=1,
                value=20,
                label="top_k",
                render=False,
            ),
            gr.Slider(
                minimum=0.0,
                maximum=2.0,
                step=0.1,
                value=1.0,
                label="Repetition penalty",
                render=False,
            ),
        ],
        examples=[
            ["Help me study vocabulary: write a sentence for me to fill in the blank, and I'll try to pick the correct option."],
            ["What are 5 creative things I could do with my kids' art? I don't want to throw them away, but it's also so much clutter."],
            ["Tell me a random fun fact about the Roman Empire."],
            ["Show me a code snippet of a website's sticky header in CSS and JavaScript."],
        ],
        cache_examples=False,
    )


if __name__ == "__main__":
    demo.launch()