import gradio as gr from ollama import chat, ChatResponse import subprocess import time model_id = "phi" def interact(message: str, history: list): message_dct = { "role": "user", "content": message } chat_history = [msg for msg in history] chat_history.append(message_dct) response: ChatResponse = chat( model=model_id, messages=chat_history, stream=True ) text_response = "" for chunk in response: bit = chunk["message"]["content"] text_response += bit yield text_response interface = gr.ChatInterface( fn=interact, type="messages", title="Microsoft Phi Chat Interface", description="Model: Microsoft Phi-2 (2.7B params)" ) print("\n\nStarting Ollama...\n\n") subprocess.Popen(["ollama", "serve"]) time.sleep(10) print("\n\nOllama started successfully!!\n\n\n\nTesting...\n\n") subprocess.run(["ollama", "pull", model_id]) time.sleep(5) print("\n\nMicrosoft Phi-2 started successfully!!\n\n") interface.launch(server_name="0.0.0.0", server_port=7860)