Spaces:
Running
Running
# import required packages | |
import google.generativeai as genai | |
import os | |
import PIL.Image | |
import gradio as gr | |
from gradio_multimodalchatbot import MultimodalChatbot | |
from gradio.data_classes import FileData | |
# For better security practices, retrieve sensitive information like API keys from environment variables. | |
# Fetch an environment variable. | |
GOOGLE_API_KEY = os.environ.get('GOOGLE_API_KEY') | |
genai.configure(api_key=GOOGLE_API_KEY) | |
# These codelines are just to verify if your api key is correct or not | |
# Use them when you clone the repo and build locally | |
#!curl \ | |
#-H 'Content-Type: application/json' \ | |
#-d '{ "prompt": { "text": "Write a very short story about a magic backpack"} }' \ | |
#"https://generativelanguage.googleapis.com/v1beta3/models/text-bison-001:generateText?key=<enter-your-key-here>" | |
# Initialize genai models | |
model = genai.GenerativeModel('gemini-pro') | |
modelvis = genai.GenerativeModel('gemini-pro-vision') | |
def gemini(input, file, chatbot=[]): | |
""" | |
Function to handle gemini model and gemini vision model interactions. | |
Parameters: | |
input (str): The input text. | |
file (File): An optional file object for image processing. | |
chatbot (list): A list to keep track of chatbot interactions. | |
Returns: | |
tuple: Updated chatbot interaction list, an empty string, and None. | |
""" | |
messages = [] | |
print(chatbot) | |
# Process previous chatbot messages if present | |
if len(chatbot) != 0: | |
for user, bot in chatbot: | |
user, bot = user.text, bot.text | |
messages.extend([ | |
{'role': 'user', 'parts': [user]}, | |
{'role': 'model', 'parts': [bot]} | |
]) | |
messages.append({'role': 'user', 'parts': [input]}) | |
else: | |
messages.append({'role': 'user', 'parts': [input]}) | |
try: | |
# Process image if file is provided | |
if file is not None: | |
with PIL.Image.open(file.name) as img: | |
message = [{'role': 'user', 'parts': [input, img]}] | |
response = modelvis.generate_content(message) | |
gemini_video_resp = response.text | |
messages.append({'role': 'model', 'parts': [gemini_video_resp]}) | |
# Construct list of messages in the required format | |
user_msg = {"text": input, "files": [{"file": FileData(path=file.name)}]} | |
bot_msg = {"text": gemini_video_resp, "files": []} | |
chatbot.append([user_msg, bot_msg]) | |
else: | |
response = model.generate_content(messages) | |
gemini_resp = response.text | |
# Construct list of messages in the required format | |
user_msg = {"text": input, "files": []} | |
bot_msg = {"text": gemini_resp, "files": []} | |
chatbot.append([user_msg, bot_msg]) | |
except Exception as e: | |
# Handling exceptions and raising error to the modal | |
print(f"An error occurred: {e}") | |
raise gr.Error(e) | |
return chatbot, "", None | |
# Define the Gradio Blocks interface | |
with gr.Blocks() as demo: | |
# # Add a centered header using HTML | |
# gr.HTML("<center><h1>Gemini-PRO & Gemini-PRO-Vision API</h1></center>") | |
# Initialize the MultimodalChatbot component | |
multi = MultimodalChatbot(value=[]) | |
with gr.Row(): | |
# Textbox for user input with increased scale for better visibility | |
tb = gr.Textbox(scale=5, placeholder='Message CortexChat with Vision...') | |
# Upload button for image files | |
up = gr.UploadButton("Attach File", file_types=["image"], scale=1) | |
# Define the behavior on text submission | |
tb.submit(gemini, [tb, up, multi], [multi, tb, up]) | |
# Define the behavior on image upload | |
# Using chained then() calls to update the upload button's state | |
up.upload(lambda: gr.UploadButton("Uploading Image..."), [], up) \ | |
.then(lambda: gr.UploadButton("Image Uploaded"), [], up) \ | |
.then(lambda: gr.UploadButton("Upload Image"), [], up) | |
# Launch the demo with a queue to handle multiple users | |
demo.queue().launch() |