Spaces:

IntellijMind
/

chat-llm

Runtime error

App Files Files Community

Threatthriver commited on Sep 20

Commit

29567f1

•

1 Parent(s): 3e4a10f

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -62

app.py CHANGED Viewed

@@ -1,82 +1,50 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-# Define available models and their Hugging Face IDs
-available_models = {
-    "Zephyr 7B Beta": "HuggingFaceH4/zephyr-7b-beta",
-    # Add more models here as needed
-}
-def respond(
-    message: str,
-    history: list[tuple[str, str]],
-    system_message: str,
-    max_tokens: int,
-    temperature: float,
-    top_p: float,
-    model_name: str,
-):
     """
-    Generates a response from the AI model based on the user's message and chat history.
     """
-    client = InferenceClient(model=available_models[model_name])
-    # Prepare the conversation history for the API call
-    messages = [{"role": "system", "content": system_message}]
-    for user_input, assistant_response in history:
-        messages.append({"role": "user", "content": user_input})
-        messages.append({"role": "assistant", "content": assistant_response})
-    messages.append({"role": "user", "content": message})
-    streamed_response = ""
-    try:
-        # Generate a response from the model with streaming
-        for response in client.chat_completion(
-            messages=messages,
-            max_tokens=max_tokens,
-            stream=True,
             temperature=temperature,
             top_p=top_p,
-        ):
-            chunk = response.choices[0].delta.get("content", "")
-            streamed_response += chunk
-            yield streamed_response
-    except Exception as e:
-        yield f"**Error:** {str(e)}"
-def update_chatbox(history, message, model_name, system_message, max_tokens, temperature, top_p):
     """
     Update the chat history and generate the next AI response.
     """
     history.append(("User", message))  # Add user message to history
-    ai_response = next(respond(
-        message=message,
-        history=history,
-        system_message=system_message,
-        max_tokens=max_tokens,
-        temperature=temperature,
-        top_p=top_p,
-        model_name=model_name
-    ))
-    history.append(("AI", ai_response))  # Add AI response to history
     return history, ""  # Return updated history and clear the user input
 # Define the Gradio interface with the Blocks context
 with gr.Blocks(css=".gradio-container {border: none;}") as demo:
     chat_history = gr.State([])  # Initialize an empty chat history state
-    system_message = gr.Textbox(
-        value="You are a friendly and helpful assistant.",
-        label="System message",
-        lines=2
-    )
-    max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
     temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature")
-    top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
-    model_dropdown = gr.Dropdown(choices=list(available_models.keys()), value="Zephyr 7B Beta", label="Select Model")
     chatbot = gr.Chatbot(label="Character-like AI Chat")
     user_input = gr.Textbox(show_label=False, placeholder="Type your message here...")
@@ -85,7 +53,7 @@ with gr.Blocks(css=".gradio-container {border: none;}") as demo:
     # When the send button is clicked, update chat history
     send_button.click(
         fn=update_chatbox,
-        inputs=[chat_history, user_input, model_dropdown, system_message, max_tokens, temperature, top_p],
         outputs=[chatbot, user_input],  # Update chatbox and clear user input
         queue=True  # Ensure responses are shown in order
     )

 import gradio as gr
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+# Load the tokenizer and model (lightweight model as per your suggestion)
+tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3.1-8B-Instruct")
+model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3.1-8B-Instruct")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model = model.to(device)
+def generate_response(message, history, max_tokens, temperature, top_p):
     """
+    Generates a response from the model.
     """
+    # Prepare conversation history as input
+    input_ids = tokenizer.encode(message + tokenizer.eos_token, return_tensors="pt").to(device)
+    # Generate the output using the model
+    with torch.no_grad():
+        output = model.generate(
+            input_ids,
+            max_length=max_tokens,
             temperature=temperature,
             top_p=top_p,
+            pad_token_id=tokenizer.eos_token_id,
+        )
+    response = tokenizer.decode(output[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
+    history.append((message, response))
+    return history, ""
+def update_chatbox(history, message, max_tokens, temperature, top_p):
     """
     Update the chat history and generate the next AI response.
     """
     history.append(("User", message))  # Add user message to history
+    history, _ = generate_response(message, history, max_tokens, temperature, top_p)
     return history, ""  # Return updated history and clear the user input
 # Define the Gradio interface with the Blocks context
 with gr.Blocks(css=".gradio-container {border: none;}") as demo:
     chat_history = gr.State([])  # Initialize an empty chat history state
+    max_tokens = gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max Tokens")
     temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature")
+    top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="Top-p (nucleus sampling)")
     chatbot = gr.Chatbot(label="Character-like AI Chat")
     user_input = gr.Textbox(show_label=False, placeholder="Type your message here...")
     # When the send button is clicked, update chat history
     send_button.click(
         fn=update_chatbox,
+        inputs=[chat_history, user_input, max_tokens, temperature, top_p],
         outputs=[chatbot, user_input],  # Update chatbox and clear user input
         queue=True  # Ensure responses are shown in order
     )