Threatthriver commited on
Commit
66980c9
1 Parent(s): e18c985

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -17
app.py CHANGED
@@ -1,18 +1,13 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
- # Initialize the InferenceClient with the model ID from Hugging Face
5
- client = InferenceClient(model="HuggingFaceH4/zephyr-7b-beta")
 
 
 
 
6
 
7
- # Latest updates (you can replace this with actual update information)
8
- latest_updates = """
9
- **Zephyr 7B Beta Chatbot - Latest Updates:**
10
-
11
- * **Improved Error Handling:** The chatbot now provides clearer error messages if something goes wrong.
12
- * **Enhanced System Message Input:** You can now provide multi-line system messages to guide the AI's behavior.
13
- * **Optimized Temperature Range:** The temperature slider's range has been adjusted for better control over randomness.
14
- * **Robust Chunk Handling:** The chatbot now handles streamed responses more reliably, even if some chunks are missing content.
15
- """
16
 
17
  def respond(
18
  message: str,
@@ -21,6 +16,7 @@ def respond(
21
  max_tokens: int,
22
  temperature: float,
23
  top_p: float,
 
24
  ):
25
  """
26
  Generates a response from the AI model based on the user's message and chat history.
@@ -32,10 +28,13 @@ def respond(
32
  max_tokens (int): The maximum number of tokens for the output.
33
  temperature (float): Sampling temperature for controlling the randomness.
34
  top_p (float): Top-p (nucleus sampling) for controlling diversity.
 
35
 
36
  Yields:
37
  str: The AI's response as it is generated.
38
  """
 
 
39
 
40
  # Prepare the conversation history for the API call
41
  messages = [{"role": "system", "content": system_message}]
@@ -66,7 +65,8 @@ def respond(
66
  except Exception as e:
67
  yield f"**Error:** {str(e)}"
68
 
69
- def show_updates_and_respond(history, system_message, max_tokens, temperature, top_p):
 
70
  """
71
  Shows the latest updates and then generates a response from the model based on the updates.
72
  """
@@ -78,6 +78,7 @@ def show_updates_and_respond(history, system_message, max_tokens, temperature, t
78
  max_tokens=max_tokens,
79
  temperature=temperature,
80
  top_p=top_p,
 
81
  )
82
  history[-1] = ("User: ", "Show me the latest updates")
83
  history.append(("Assistant:", latest_updates))
@@ -88,10 +89,25 @@ def show_updates_and_respond(history, system_message, max_tokens, temperature, t
88
  max_tokens=max_tokens,
89
  temperature=temperature,
90
  top_p=top_p,
 
91
  )
92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  # Define the Gradio interface with the Blocks context
94
  with gr.Blocks(css=".gradio-container {border: none;}") as demo:
 
95
  chat_interface = gr.ChatInterface(
96
  fn=respond,
97
  additional_inputs=[
@@ -109,9 +125,15 @@ with gr.Blocks(css=".gradio-container {border: none;}") as demo:
109
  step=0.05,
110
  label="Top-p (nucleus sampling)",
111
  ),
 
 
 
 
 
112
  ],
113
- title="Zephyr 7B Beta Chatbot",
114
- description="A customizable chatbot interface using Hugging Face's Zephyr 7B Beta model and Inference API.",
 
115
  )
116
 
117
  # Add the "Show Updates" button and output area
@@ -121,11 +143,10 @@ with gr.Blocks(css=".gradio-container {border: none;}") as demo:
121
  # Define the button's click event (now inside the Blocks context)
122
  updates_button.click(
123
  fn=show_updates_and_respond,
124
- inputs=[chat_interface.chat_history, chat_interface.textbox, gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"), gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")],
125
- outputs=chat_interface.chat_history
126
  )
127
 
128
-
129
  # Launch the Gradio interface in full screen
130
  if __name__ == "__main__":
131
  demo.launch(share=True, fullscreen=True)
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
+ # Define available models and their Hugging Face IDs
5
+ available_models = {
6
+ "Zephyr 7B Beta": "HuggingFaceH4/zephyr-7b-beta",
7
+ "Llama 2 70B Chat": "meta-llama/Llama-2-70b-chat",
8
+ # Add more models here as needed
9
+ }
10
 
 
 
 
 
 
 
 
 
 
11
 
12
  def respond(
13
  message: str,
 
16
  max_tokens: int,
17
  temperature: float,
18
  top_p: float,
19
+ model_name: str,
20
  ):
21
  """
22
  Generates a response from the AI model based on the user's message and chat history.
 
28
  max_tokens (int): The maximum number of tokens for the output.
29
  temperature (float): Sampling temperature for controlling the randomness.
30
  top_p (float): Top-p (nucleus sampling) for controlling diversity.
31
+ model_name (str): The name of the model to use.
32
 
33
  Yields:
34
  str: The AI's response as it is generated.
35
  """
36
+ # Initialize the InferenceClient with the selected model
37
+ client = InferenceClient(model=available_models[model_name])
38
 
39
  # Prepare the conversation history for the API call
40
  messages = [{"role": "system", "content": system_message}]
 
65
  except Exception as e:
66
  yield f"**Error:** {str(e)}"
67
 
68
+
69
+ def show_updates_and_respond(history, system_message, max_tokens, temperature, top_p, model_name):
70
  """
71
  Shows the latest updates and then generates a response from the model based on the updates.
72
  """
 
78
  max_tokens=max_tokens,
79
  temperature=temperature,
80
  top_p=top_p,
81
+ model_name=model_name,
82
  )
83
  history[-1] = ("User: ", "Show me the latest updates")
84
  history.append(("Assistant:", latest_updates))
 
89
  max_tokens=max_tokens,
90
  temperature=temperature,
91
  top_p=top_p,
92
+ model_name=model_name,
93
  )
94
 
95
+
96
+ # Latest updates (you can replace this with actual update information)
97
+ latest_updates = """
98
+ **Chatbot - Latest Updates:**
99
+
100
+ * **Multiple Model Support:** You can now choose from different models like Zephyr 7B and Llama 2.
101
+ * **Improved Error Handling:** The chatbot now provides clearer error messages if something goes wrong.
102
+ * **Enhanced System Message Input:** You can now provide multi-line system messages to guide the AI's behavior.
103
+ * **Optimized Temperature Range:** The temperature slider's range has been adjusted for better control over randomness.
104
+ * **Robust Chunk Handling:** The chatbot now handles streamed responses more reliably, even if some chunks are missing content.
105
+ """
106
+
107
+
108
  # Define the Gradio interface with the Blocks context
109
  with gr.Blocks(css=".gradio-container {border: none;}") as demo:
110
+ chat_history = gr.State([]) # Initialize an empty chat history state
111
  chat_interface = gr.ChatInterface(
112
  fn=respond,
113
  additional_inputs=[
 
125
  step=0.05,
126
  label="Top-p (nucleus sampling)",
127
  ),
128
+ gr.Dropdown(
129
+ choices=list(available_models.keys()),
130
+ value="Zephyr 7B Beta",
131
+ label="Select Model",
132
+ ),
133
  ],
134
+ title="Multi-Model Chatbot",
135
+ description="A customizable chatbot interface using Hugging Face's Inference API.",
136
+ chat_history=chat_history, # Pass the state to the ChatInterface
137
  )
138
 
139
  # Add the "Show Updates" button and output area
 
143
  # Define the button's click event (now inside the Blocks context)
144
  updates_button.click(
145
  fn=show_updates_and_respond,
146
+ inputs=[chat_history, chat_interface.textbox, gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"), gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"), chat_interface.dropdown],
147
+ outputs=chat_history
148
  )
149
 
 
150
  # Launch the Gradio interface in full screen
151
  if __name__ == "__main__":
152
  demo.launch(share=True, fullscreen=True)