File size: 780 Bytes
c127950
0897689
c127950
6cbb926
c127950
41838e1
 
fa6182f
41838e1
 
6cbb926
41838e1
6cbb926
 
 
41838e1
6cbb926
 
 
 
41838e1
 
 
 
b25c66c
41838e1
 
91e8cf6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import gradio as gr
from huggingface_hub import InferenceClient

client = InferenceClient("google/gemma-1.1-2b-it")

def models(Query): 
    
    messages = []
    
    messages.append({"role": "user", "content": f"[SYSTEM] You are ASSISTANT who answer question asked by user in short and concise manner. [USER] {Query}"})

    Response = ""

    for message in client.chat_completion(
        messages,
        max_tokens=2048,
        stream=True
    ):
        token = message.choices[0].delta.content

        Response += token
        yield Response

description="# Chat GO\n### Enter your query and Press enter and get lightning fast response"

demo = gr.Interface(description=description,fn=models, inputs=["text"], outputs="text")
demo.queue(max_size=300000)
demo.launch()