from llama_cpp import Llama
import gradio as gr

# Load your model
llm = Llama.from_pretrained(
    repo_id="h3lmi/modelgguf",
    filename="unsloth.Q8_0.gguf",
)

# Define the fixed prompt structure
base_prompt = """
### Instruction: You are a doctor. Answer the following query by a patient.
### Input: {}
### Response:
"""

# Define a function to generate model output
def generate_response(patient_query):
    formatted_prompt = base_prompt.format(patient_query)
    output = llm(formatted_prompt, max_tokens=32, echo=False)
    return output['choices'][0]['text']

# Create the Gradio interface
interface = gr.Interface(
    fn=generate_response,                          # Function to generate response
    inputs=gr.Textbox(lines=8, placeholder="Describe your symptoms here..."), # Input box
    outputs=gr.Textbox(label="Doctor's Response"), # Output box
    title="Doctor AI Assistant",                 
    description="Enter your symptoms to get advice from the AI doctor.", # description
)

interface.launch()