File size: 2,456 Bytes
c127950
0897689
c127950
33305c7
0897689
33305c7
 
 
 
 
 
 
 
0897689
d034c9f
70ab5a1
 
d35aef4
70ab5a1
a434a54
2707100
3af3245
33305c7
 
0897689
33305c7
 
 
 
0897689
33305c7
68f77aa
33305c7
0897689
33305c7
 
 
68f77aa
0897689
 
33305c7
 
2895b02
b25c66c
8c7374c
 
 
 
 
 
 
 
 
4a611ab
33305c7
16d3b5a
c127950
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import gradio as gr
from huggingface_hub import InferenceClient

# Function to return the appropriate client based on the model selected
def client_fn(model):
    model_map = {
        "Nous Hermes Mixtral 8x7B DPO": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
        "StarChat2 15b": "HuggingFaceH4/starchat2-15b-v0.1",
        "Mistral 7B v0.3": "mistralai/Mistral-7B-Instruct-v0.3",
        "Phi 3 mini": "microsoft/Phi-3-mini-4k-instruct",
        "Mixtral 8x7B": "mistralai/Mixtral-8x7B-Instruct-v0.1"
    }
    return InferenceClient(model_map.get(model, "mistralai/Mixtral-8x7B-Instruct-v0.1"))

system_instructions = ("[SYSTEM] You are a chat bot named 'H go'."
                       "Your task is to Answer the question."
                       "Keep conversation very short, clear and concise."
                       "Respond naturally and concisely to the user's queries. "
                       "The expectation is that you will avoid introductions and start answering the query directly, Only answer the question asked by user, Do not say unnecessary things."
                       "Begin with a greeting if the user initiates the conversation. "
                       "Here is the user's query:[QUESTION] ")

# Function to generate model responses
def models(text, model="Mixtral 8x7B"):
    client = client_fn(model)
    generate_kwargs = {
        "max_new_tokens": 100,
        "do_sample": True,
    }
    
    formatted_prompt = f"{system_instructions} {text} [ANSWER]"
    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
    
    output = ""
    for response in stream:
        output += response.token.text
        if output.endswith("</s>"):
            output = output[:-4]
    return output

# Gradio interface description and configuration
description = """# H GO
### Inspired from Google Go"""

with gr.Blocks() as demo:
    gr.Markdown(description)
    
    text_input = gr.Textbox(label="Enter your message here:")
    dropdown = gr.Dropdown(['Mixtral 8x7B', 'Nous Hermes Mixtral 8x7B DPO', 'StarChat2 15b', 'Mistral 7B v0.3', 'Phi 3 mini'], value="Mistral 7B v0.3", label="Select Model")
    submit_btn = gr.Button("Send")
    output_text = gr.Textbox(label="Response")
    
    submit_btn.click(fn=models, inputs=[text_input, dropdown], outputs=output_text)

# Queue and launch configuration for Gradio
demo.queue(max_size=300000)
demo.launch()