Spaces:

IntellijMind
/

chat-llm

Build error

File size: 2,793 Bytes

c67e035
 
 
ba6c3d0
 
c67e035
 
ba6c3d0
c67e035
ba6c3d0
 
 
 
c67e035
ba6c3d0
 
 
 
 
 
 
 
 
 
c67e035
ba6c3d0
 
 
 
 
 
 
 
 
 
 
 
c67e035
ba6c3d0
c67e035
 
ba6c3d0
c67e035
 
ba6c3d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c67e035
ba6c3d0
c67e035
 
 
 
 
 
 
 
 
 
 
 
ba6c3d0
 
c67e035
 
ba6c3d0
c67e035
ba6c3d0

import gradio as gr
from huggingface_hub import InferenceClient

# Initialize the InferenceClient with the model ID from Hugging Face
client = InferenceClient(model="HuggingFaceH4/zephyr-7b-beta")

def respond(
    message: str,
    history: list[tuple[str, str]],
    system_message: str,
    max_tokens: int,
    temperature: float,
    top_p: float,
):
    """
    Generates a response from the AI model based on the user's message and chat history.
    
    Args:
        message (str): The user's input message.
        history (list): A list of tuples representing the conversation history (user, assistant).
        system_message (str): A system-level message guiding the AI's behavior.
        max_tokens (int): The maximum number of tokens for the output.
        temperature (float): Sampling temperature for controlling the randomness.
        top_p (float): Top-p (nucleus sampling) for controlling diversity.

    Yields:
        str: The AI's response as it is generated.
    """
    
    # Prepare the conversation history for the API call
    messages = [{"role": "system", "content": system_message}]
    
    for user_input, assistant_response in history:
        if user_input:
            messages.append({"role": "user", "content": user_input})
        if assistant_response:
            messages.append({"role": "assistant", "content": assistant_response})

    # Add the latest user message to the conversation
    messages.append({"role": "user", "content": message})

    # Initialize an empty response
    response = ""

    try:
        # Generate a response from the model with streaming
        for message in client.chat_completion(
            messages=messages,
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
        ):
            token = message.choices[0].delta.content
            response += token
            yield response

    except Exception as e:
        yield f"An error occurred: {str(e)}"


# Define the ChatInterface with additional input components for user customization
demo = gr.ChatInterface(
    fn=respond,
    additional_inputs=[
        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
    title="Chatbot Interface",
    description="A customizable chatbot interface using Hugging Face's Inference API.",
)

# Launch the Gradio interface
if __name__ == "__main__":
    demo.launch()