Spaces:

IntellijMind
/

chat-llm

Build error

App Files Files Community

Threatthriver commited on Sep 20, 2024

Commit

c51870d

verified ·

1 Parent(s): 66980c9

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -40

app.py CHANGED Viewed

@@ -4,11 +4,9 @@ from huggingface_hub import InferenceClient
 # Define available models and their Hugging Face IDs
 available_models = {
     "Zephyr 7B Beta": "HuggingFaceH4/zephyr-7b-beta",
-    "Llama 2 70B Chat": "meta-llama/Llama-2-70b-chat",
     # Add more models here as needed
 }
 def respond(
     message: str,
     history: list[tuple[str, str]],
@@ -20,37 +18,17 @@ def respond(
 ):
     """
     Generates a response from the AI model based on the user's message and chat history.
-    Args:
-        message (str): The user's input message.
-        history (list): A list of tuples representing the conversation history (user, assistant).
-        system_message (str): A system-level message guiding the AI's behavior.
-        max_tokens (int): The maximum number of tokens for the output.
-        temperature (float): Sampling temperature for controlling the randomness.
-        top_p (float): Top-p (nucleus sampling) for controlling diversity.
-        model_name (str): The name of the model to use.
-    Yields:
-        str: The AI's response as it is generated.
     """
-    # Initialize the InferenceClient with the selected model
     client = InferenceClient(model=available_models[model_name])
-    # Prepare the conversation history for the API call
     messages = [{"role": "system", "content": system_message}]
     for user_input, assistant_response in history:
         messages.append({"role": "user", "content": user_input})
         messages.append({"role": "assistant", "content": assistant_response})
-    # Add the latest user message to the conversation
     messages.append({"role": "user", "content": message})
-    # Initialize an empty response
     streamed_response = ""
     try:
-        # Generate a response from the model with streaming
         for response in client.chat_completion(
             messages=messages,
             max_tokens=max_tokens,
@@ -61,15 +39,18 @@ def respond(
             chunk = response.choices[0].delta.get("content", "")
             streamed_response += chunk
             yield streamed_response
     except Exception as e:
         yield f"**Error:** {str(e)}"
 def show_updates_and_respond(history, system_message, max_tokens, temperature, top_p, model_name):
-    """
-    Shows the latest updates and then generates a response from the model based on the updates.
-    """
     history.append(("User: ", "Show me the latest updates"))
     yield from respond(
         message="Show me the latest updates",
@@ -92,19 +73,6 @@ def show_updates_and_respond(history, system_message, max_tokens, temperature, t
         model_name=model_name,
     )
-# Latest updates (you can replace this with actual update information)
-latest_updates = """
-**Chatbot - Latest Updates:**
-* **Multiple Model Support:** You can now choose from different models like Zephyr 7B and Llama 2.
-* **Improved Error Handling:** The chatbot now provides clearer error messages if something goes wrong.
-* **Enhanced System Message Input:** You can now provide multi-line system messages to guide the AI's behavior.
-* **Optimized Temperature Range:** The temperature slider's range has been adjusted for better control over randomness.
-* **Robust Chunk Handling:** The chatbot now handles streamed responses more reliably, even if some chunks are missing content.
-"""
 # Define the Gradio interface with the Blocks context
 with gr.Blocks(css=".gradio-container {border: none;}") as demo:
     chat_history = gr.State([])  # Initialize an empty chat history state
@@ -149,4 +117,4 @@ with gr.Blocks(css=".gradio-container {border: none;}") as demo:
 # Launch the Gradio interface in full screen
 if __name__ == "__main__":
-    demo.launch(share=True, fullscreen=True)

 # Define available models and their Hugging Face IDs
 available_models = {
     "Zephyr 7B Beta": "HuggingFaceH4/zephyr-7b-beta",
     # Add more models here as needed
 }
 def respond(
     message: str,
     history: list[tuple[str, str]],
 ):
     """
     Generates a response from the AI model based on the user's message and chat history.
     """
     client = InferenceClient(model=available_models[model_name])
     messages = [{"role": "system", "content": system_message}]
     for user_input, assistant_response in history:
         messages.append({"role": "user", "content": user_input})
         messages.append({"role": "assistant", "content": assistant_response})
     messages.append({"role": "user", "content": message})
     streamed_response = ""
     try:
         for response in client.chat_completion(
             messages=messages,
             max_tokens=max_tokens,
             chunk = response.choices[0].delta.get("content", "")
             streamed_response += chunk
             yield streamed_response
     except Exception as e:
         yield f"**Error:** {str(e)}"
+# Latest updates
+latest_updates = """
+**Chatbot - Latest Updates:**
+* **Multiple Model Support:** You can now choose from different models like Zephyr 7B.
+* **Improved Error Handling:** The chatbot now provides clearer error messages if something goes wrong.
+"""
 def show_updates_and_respond(history, system_message, max_tokens, temperature, top_p, model_name):
     history.append(("User: ", "Show me the latest updates"))
     yield from respond(
         message="Show me the latest updates",
         model_name=model_name,
     )
 # Define the Gradio interface with the Blocks context
 with gr.Blocks(css=".gradio-container {border: none;}") as demo:
     chat_history = gr.State([])  # Initialize an empty chat history state
 # Launch the Gradio interface in full screen
 if __name__ == "__main__":
+    demo.launch(share=True, fullscreen=True)