import base64 import requests from io import BytesIO from PIL import Image import gradio as gr def encode_image(img): """ Encodes a PIL Image to a base64 string in PNG format. """ buffered = BytesIO() img.save(buffered, format="PNG") encoded_string = base64.b64encode(buffered.getvalue()).decode("utf-8") return encoded_string def get_api_response(api_key, user_message, user_image): """ Sends the user message and image to the Hyperbolic API and retrieves the response. """ if not api_key: return {"error": "API key is required."} if not user_message and not user_image: return {"error": "Please provide a text message, an image, or both."} try: headers = { "Content-Type": "application/json", "Authorization": f"Bearer {api_key}", } messages = [] if user_message: messages.append({ "type": "text", "text": user_message }) if user_image: # Open the uploaded image img = Image.open(user_image) base64_img = encode_image(img) messages.append({ "type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_img}"} }) payload = { "messages": [ { "role": "user", "content": messages, } ], "model": "Qwen/Qwen2-VL-72B-Instruct", "max_tokens": 2048, "temperature": 0.7, "top_p": 0.9, } api_endpoint = "https://api.hyperbolic.xyz/v1/chat/completions" response = requests.post(api_endpoint, headers=headers, json=payload) # Check if the request was successful if response.status_code == 200: api_response = response.json() # Extract the AI's reply (assuming the response structure) ai_reply = api_response.get("choices", [{}])[0].get("message", {}).get("content", "No response content.") return {"response": ai_reply} else: return {"error": f"API Error: {response.status_code} - {response.text}"} except Exception as e: return {"error": str(e)} def chatbot_response(api_key, user_message, user_image, history): """ Handles the chatbot interaction by updating the conversation history. """ # Append the user's message to the history if user_message or user_image: history.append(("User", user_message, user_image)) # Get the API response api_result = get_api_response(api_key, user_message, user_image) if "error" in api_result: ai_message = f"Error: {api_result['error']}" else: ai_message = api_result["response"] # Append the AI's response to the history history.append(("AI", ai_message, None)) return history, history # Define the Gradio interface with gr.Blocks() as demo: gr.Markdown( """ # 🖼️ Image Description Chatbot with Hyperbolic API Engage in a conversation with the AI by sending text messages and/or uploading images. Enter your Hyperbolic API key to get started. """ ) with gr.Row(): api_key_input = gr.Textbox( label="🔑 Hyperbolic API Key", type="password", placeholder="Enter your API key here", interactive=True ) chatbot = gr.Chatbot(label="💬 Chatbot") # `.style()` method removed with gr.Row(): with gr.Column(scale=4): user_text = gr.Textbox( label="Your Message", placeholder="Type your message here...", lines=1 ) with gr.Column(scale=1): user_image = gr.Image( label="Upload Image", type="file", # Removed the 'tool' parameter interactive=True ) send_button = gr.Button("📤 Send") # Hidden state to keep track of the conversation history state = gr.State([]) send_button.click( fn=chatbot_response, inputs=[api_key_input, user_text, user_image, state], outputs=[chatbot, state] ) gr.Markdown( """ --- **Note:** Your API key is used only for this session and is not stored. Ensure you trust the environment in which you're running this application. """ ) # Launch the Gradio app if __name__ == "__main__": demo.launch()