Spaces:

IntellijMind
/

chat-llm

Runtime error

App Files Files Community

Threatthriver commited on Aug 28

Commit

66980c9

•

1 Parent(s): e18c985

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -17

app.py CHANGED Viewed

@@ -1,18 +1,13 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
-# Initialize the InferenceClient with the model ID from Hugging Face
-client = InferenceClient(model="HuggingFaceH4/zephyr-7b-beta")
-# Latest updates (you can replace this with actual update information)
-latest_updates = """
-**Zephyr 7B Beta Chatbot - Latest Updates:**
-* **Improved Error Handling:** The chatbot now provides clearer error messages if something goes wrong.
-* **Enhanced System Message Input:** You can now provide multi-line system messages to guide the AI's behavior.
-* **Optimized Temperature Range:** The temperature slider's range has been adjusted for better control over randomness.
-* **Robust Chunk Handling:** The chatbot now handles streamed responses more reliably, even if some chunks are missing content.
-"""
 def respond(
     message: str,
@@ -21,6 +16,7 @@ def respond(
     max_tokens: int,
     temperature: float,
     top_p: float,
 ):
     """
     Generates a response from the AI model based on the user's message and chat history.
@@ -32,10 +28,13 @@ def respond(
         max_tokens (int): The maximum number of tokens for the output.
         temperature (float): Sampling temperature for controlling the randomness.
         top_p (float): Top-p (nucleus sampling) for controlling diversity.
     Yields:
         str: The AI's response as it is generated.
     """
     # Prepare the conversation history for the API call
     messages = [{"role": "system", "content": system_message}]
@@ -66,7 +65,8 @@ def respond(
     except Exception as e:
         yield f"**Error:** {str(e)}"
-def show_updates_and_respond(history, system_message, max_tokens, temperature, top_p):
     """
     Shows the latest updates and then generates a response from the model based on the updates.
     """
@@ -78,6 +78,7 @@ def show_updates_and_respond(history, system_message, max_tokens, temperature, t
         max_tokens=max_tokens,
         temperature=temperature,
         top_p=top_p,
     )
     history[-1] = ("User: ", "Show me the latest updates")
     history.append(("Assistant:", latest_updates))
@@ -88,10 +89,25 @@ def show_updates_and_respond(history, system_message, max_tokens, temperature, t
         max_tokens=max_tokens,
         temperature=temperature,
         top_p=top_p,
     )
 # Define the Gradio interface with the Blocks context
 with gr.Blocks(css=".gradio-container {border: none;}") as demo:
     chat_interface = gr.ChatInterface(
         fn=respond,
         additional_inputs=[
@@ -109,9 +125,15 @@ with gr.Blocks(css=".gradio-container {border: none;}") as demo:
                 step=0.05,
                 label="Top-p (nucleus sampling)",
             ),
         ],
-        title="Zephyr 7B Beta Chatbot",
-        description="A customizable chatbot interface using Hugging Face's Zephyr 7B Beta model and Inference API.",
     )
     # Add the "Show Updates" button and output area
@@ -121,11 +143,10 @@ with gr.Blocks(css=".gradio-container {border: none;}") as demo:
     # Define the button's click event (now inside the Blocks context)
     updates_button.click(
         fn=show_updates_and_respond,
-        inputs=[chat_interface.chat_history, chat_interface.textbox, gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"), gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")],
-        outputs=chat_interface.chat_history
     )
 # Launch the Gradio interface in full screen
 if __name__ == "__main__":
     demo.launch(share=True, fullscreen=True)

 import gradio as gr
 from huggingface_hub import InferenceClient
+# Define available models and their Hugging Face IDs
+available_models = {
+    "Zephyr 7B Beta": "HuggingFaceH4/zephyr-7b-beta",
+    "Llama 2 70B Chat": "meta-llama/Llama-2-70b-chat",
+    # Add more models here as needed
+}
 def respond(
     message: str,
     max_tokens: int,
     temperature: float,
     top_p: float,
+    model_name: str,
 ):
     """
     Generates a response from the AI model based on the user's message and chat history.
         max_tokens (int): The maximum number of tokens for the output.
         temperature (float): Sampling temperature for controlling the randomness.
         top_p (float): Top-p (nucleus sampling) for controlling diversity.
+        model_name (str): The name of the model to use.
     Yields:
         str: The AI's response as it is generated.
     """
+    # Initialize the InferenceClient with the selected model
+    client = InferenceClient(model=available_models[model_name])
     # Prepare the conversation history for the API call
     messages = [{"role": "system", "content": system_message}]
     except Exception as e:
         yield f"**Error:** {str(e)}"
+def show_updates_and_respond(history, system_message, max_tokens, temperature, top_p, model_name):
     """
     Shows the latest updates and then generates a response from the model based on the updates.
     """
         max_tokens=max_tokens,
         temperature=temperature,
         top_p=top_p,
+        model_name=model_name,
     )
     history[-1] = ("User: ", "Show me the latest updates")
     history.append(("Assistant:", latest_updates))
         max_tokens=max_tokens,
         temperature=temperature,
         top_p=top_p,
+        model_name=model_name,
     )
+# Latest updates (you can replace this with actual update information)
+latest_updates = """
+**Chatbot - Latest Updates:**
+* **Multiple Model Support:** You can now choose from different models like Zephyr 7B and Llama 2.
+* **Improved Error Handling:** The chatbot now provides clearer error messages if something goes wrong.
+* **Enhanced System Message Input:** You can now provide multi-line system messages to guide the AI's behavior.
+* **Optimized Temperature Range:** The temperature slider's range has been adjusted for better control over randomness.
+* **Robust Chunk Handling:** The chatbot now handles streamed responses more reliably, even if some chunks are missing content.
+"""
 # Define the Gradio interface with the Blocks context
 with gr.Blocks(css=".gradio-container {border: none;}") as demo:
+    chat_history = gr.State([])  # Initialize an empty chat history state
     chat_interface = gr.ChatInterface(
         fn=respond,
         additional_inputs=[
                 step=0.05,
                 label="Top-p (nucleus sampling)",
             ),
+            gr.Dropdown(
+                choices=list(available_models.keys()),
+                value="Zephyr 7B Beta",
+                label="Select Model",
+            ),
         ],
+        title="Multi-Model Chatbot",
+        description="A customizable chatbot interface using Hugging Face's Inference API.",
+        chat_history=chat_history,  # Pass the state to the ChatInterface
     )
     # Add the "Show Updates" button and output area
     # Define the button's click event (now inside the Blocks context)
     updates_button.click(
         fn=show_updates_and_respond,
+        inputs=[chat_history, chat_interface.textbox, gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"), gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"), chat_interface.dropdown],
+        outputs=chat_history
     )
 # Launch the Gradio interface in full screen
 if __name__ == "__main__":
     demo.launch(share=True, fullscreen=True)