import gradio as gr
from ollama import chat, ChatResponse
import subprocess
import time

model_id = "phi"

def interact(message: str, history: list):
    message_dct = {
        "role": "user",
        "content": message
    }
    chat_history = [msg for msg in history]
    chat_history.append(message_dct)

    response: ChatResponse = chat(
        model=model_id,
        messages=chat_history,
        stream=True
    )
    text_response = ""

    for chunk in response:
        bit = chunk["message"]["content"]
        text_response += bit
        yield text_response

interface = gr.ChatInterface(
    fn=interact,
    type="messages",
    title="Microsoft Phi Chat Interface",
    description="Model: Microsoft Phi-2 (2.7B params)"
)

print("\n\nStarting Ollama...\n\n")
subprocess.Popen(["ollama", "serve"])
time.sleep(10)
print("\n\nOllama started successfully!!\n\n\n\nTesting...\n\n")
subprocess.run(["ollama", "pull", model_id])
time.sleep(5)
print("\n\nMicrosoft Phi-2 started successfully!!\n\n")
interface.launch(server_name="0.0.0.0", server_port=7860)