KingNish commited on
Commit
41838e1
1 Parent(s): 91e8cf6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -18
app.py CHANGED
@@ -3,32 +3,26 @@ from huggingface_hub import InferenceClient
3
 
4
  client = InferenceClient("google/gemma-1.1-2b-it")
5
 
6
- def respond(
7
- message,
8
- history: list[tuple[str, str]],
9
- max_tokens
10
- ):
11
  messages = []
 
 
12
 
13
- for val in history:
14
- if val[0]:
15
- messages.append({"role": "user", "content": val[0]})
16
- if val[1]:
17
- messages.append({"role": "assistant", "content": val[1]})
18
-
19
- messages.append({"role": "user", "content": message})
20
-
21
- response = ""
22
 
23
  for message in client.chat_completion(
24
  messages,
25
- max_tokens=1024,
26
  stream=True
27
  ):
28
  token = message.choices[0].delta.content
29
- response += token
30
- yield response
31
 
32
- demo = gr.ChatInterface(respond, description="# Chat With AI faster than groq")
 
 
 
33
 
 
 
34
  demo.launch()
 
3
 
4
  client = InferenceClient("google/gemma-1.1-2b-it")
5
 
6
+ def models(Query):
7
+
 
 
 
8
  messages = []
9
+
10
+ messages.append({"role": "user", "content": f"[SYSTEM] You are ASSISTANT who answer question asked by user in short and concise manner. [USER] {Query}"})
11
 
12
+ Response = ""
 
 
 
 
 
 
 
 
13
 
14
  for message in client.chat_completion(
15
  messages,
16
+ max_tokens=2048,
17
  stream=True
18
  ):
19
  token = message.choices[0].delta.content
 
 
20
 
21
+ Response += token
22
+ yield Response
23
+
24
+ description="# Chat GO\n### Enter your query and Press enter and get lightning fast response"
25
 
26
+ demo = gr.Interface(description=description,fn=models, inputs=["text"], outputs="text")
27
+ demo.queue(max_size=300000)
28
  demo.launch()