import gradio as gr
from huggingface_hub import InferenceClient

# Function to return the appropriate client based on the model selected
def client_fn(model):
    model_map = {
        "Nous Hermes Mixtral 8x7B DPO": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
        "StarChat2 15b": "HuggingFaceH4/starchat2-15b-v0.1",
        "Mistral 7B v0.3": "mistralai/Mistral-7B-Instruct-v0.3",
        "Phi 3 mini": "microsoft/Phi-3-mini-4k-instruct",
        "Mixtral 8x7B": "mistralai/Mixtral-8x7B-Instruct-v0.1"
    }
    return InferenceClient(model_map.get(model, "mistralai/Mixtral-8x7B-Instruct-v0.1"))

# System instructions for the models to follow
system_instructions = "[SYSTEM] Your task is to Answer the question. Keep conversation very short, clear and concise. The expectation is that you will avoid introductions and start answering the query directly, Only answer the question asked by user, Do not say unnecessary things.[QUESTION]"

# Function to generate model responses
def models(text, model="Mixtral 8x7B"):
    client = client_fn(model)
    generate_kwargs = {
        "max_new_tokens": 100,
        "do_sample": True,
    }
    
    formatted_prompt = f"{system_instructions} {text} [ANSWER]"
    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
    
    output = ""
    for response in stream:
        output += response.token.text
        if output.endswith("</s>"):
            output = output[:-4]
    return output

# Gradio interface description and configuration
description = """# H GO
### Inspired from Google Go"""

# Creating the Gradio interface
text_input = gr.Textbox(label="Enter your message here:")
dropdown = gr.Dropdown(['Mixtral 8x7B', 'Nous Hermes Mixtral 8x7B DPO', 'StarChat2 15b', 'Mistral 7B v0.3', 'Phi 3 mini'], value="Mistral 7B v0.3", label="Select Model")
submit_btn = gr.Button("Send")

demo = gr.Interface(
    fn=models,
    inputs=[text_input, dropdown],
    outputs="text",
    description=description,
    live=True,
    batch=True,
    max_batch_size=10000,
    layout="vertical"
)

# Adding the submit button to the interface
demo.add_component(submit_btn)

# Configuring the button to trigger the model response
submit_btn.click(fn=models, inputs=[text_input, dropdown], outputs="text")

# Queue and launch configuration for Gradio
demo.queue(max_size=300000)
demo.launch()