wifix199 commited on
Commit
c088d8d
·
verified ·
1 Parent(s): 64b5005

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -25
app.py CHANGED
@@ -1,12 +1,18 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
  import os
 
4
  """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
  """
7
- client = InferenceClient("meta-llama/Llama-3.2-1B")
8
- os.environ["HF_TOKEN"]
9
 
 
 
 
 
 
 
 
10
 
11
  def respond(
12
  message,
@@ -18,48 +24,51 @@ def respond(
18
  ):
19
  messages = [{"role": "system", "content": system_message}]
20
 
21
- for val in history:
22
- if val[0]:
23
- messages.append({"role": "user", "content": val[0]})
24
- if val[1]:
25
- messages.append({"role": "assistant", "content": val[1]})
26
 
27
  messages.append({"role": "user", "content": message})
28
 
29
  response = ""
30
 
31
- for message in client.chat_completion(
32
- messages,
33
- max_tokens=max_tokens,
34
- stream=True,
35
- temperature=temperature,
36
- top_p=top_p,
37
- ):
38
- token = message.choices[0].delta.content
39
-
40
- response += token
41
- yield response
42
-
 
 
43
 
44
  """
45
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
46
  """
47
  demo = gr.ChatInterface(
48
- respond,
49
  additional_inputs=[
50
  gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
51
- gr.Slider(minimum=1, maximum=2048, value=2048, step=1, label="Max new tokens"),
52
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
53
  gr.Slider(
54
  minimum=0.1,
55
  maximum=1.0,
56
  value=0.95,
57
- step=0.05,
58
  label="Top-p (nucleus sampling)",
59
  ),
60
  ],
 
 
61
  )
62
 
63
-
64
  if __name__ == "__main__":
65
  demo.launch()
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
  import os
4
+
5
  """
6
+ For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/en/guides/inference
7
  """
 
 
8
 
9
+ # Retrieve the Hugging Face token
10
+ hf_token = os.environ.get("HF_TOKEN")
11
+ if not hf_token:
12
+ raise ValueError("Please set the HF_TOKEN environment variable with your Hugging Face API token.")
13
+
14
+ # Initialize the InferenceClient with a correct model
15
+ client = InferenceClient("meta-llama/Llama-3.2-1B-Instruct", token=hf_token)
16
 
17
  def respond(
18
  message,
 
24
  ):
25
  messages = [{"role": "system", "content": system_message}]
26
 
27
+ for user_input, assistant_response in history:
28
+ if user_input:
29
+ messages.append({"role": "user", "content": user_input})
30
+ if assistant_response:
31
+ messages.append({"role": "assistant", "content": assistant_response})
32
 
33
  messages.append({"role": "user", "content": message})
34
 
35
  response = ""
36
 
37
+ # Start the chat completion
38
+ try:
39
+ for msg in client.chat_completion(
40
+ messages=messages,
41
+ max_new_tokens=max_tokens,
42
+ stream=True,
43
+ temperature=temperature,
44
+ top_p=top_p,
45
+ ):
46
+ token = msg.delta.get("content", "")
47
+ response += token
48
+ yield response
49
+ except Exception as e:
50
+ yield f"Error during inference: {e}"
51
 
52
  """
53
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
54
  """
55
  demo = gr.ChatInterface(
56
+ fn=respond,
57
  additional_inputs=[
58
  gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
59
+ gr.Slider(minimum=1, maximum=1024, value=512, step=1, label="Max new tokens"),
60
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.01, label="Temperature"),
61
  gr.Slider(
62
  minimum=0.1,
63
  maximum=1.0,
64
  value=0.95,
65
+ step=0.01,
66
  label="Top-p (nucleus sampling)",
67
  ),
68
  ],
69
+ title="Chat with Llama 2",
70
+ description="A chat interface using Llama 2 model via Hugging Face Inference API.",
71
  )
72
 
 
73
  if __name__ == "__main__":
74
  demo.launch()