Spaces:
Sleeping
Sleeping
File size: 780 Bytes
c127950 0897689 c127950 6cbb926 c127950 41838e1 fa6182f 41838e1 6cbb926 41838e1 6cbb926 41838e1 6cbb926 41838e1 b25c66c 41838e1 91e8cf6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
import gradio as gr
from huggingface_hub import InferenceClient
client = InferenceClient("google/gemma-1.1-2b-it")
def models(Query):
messages = []
messages.append({"role": "user", "content": f"[SYSTEM] You are ASSISTANT who answer question asked by user in short and concise manner. [USER] {Query}"})
Response = ""
for message in client.chat_completion(
messages,
max_tokens=2048,
stream=True
):
token = message.choices[0].delta.content
Response += token
yield Response
description="# Chat GO\n### Enter your query and Press enter and get lightning fast response"
demo = gr.Interface(description=description,fn=models, inputs=["text"], outputs="text")
demo.queue(max_size=300000)
demo.launch() |