Spaces:
Running
Running
File size: 3,792 Bytes
da443a0 1b4d595 da443a0 1b4d595 da443a0 1b4d595 b442a1d 1b4d595 8073098 1b4d595 11c7e42 1b4d595 da443a0 1b4d595 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
# import required packages
import google.generativeai as genai
import os
import PIL.Image
import gradio as gr
from gradio_multimodalchatbot import MultimodalChatbot
from gradio.data_classes import FileData
# For better security practices, retrieve sensitive information like API keys from environment variables.
# Fetch an environment variable.
GOOGLE_API_KEY = os.environ.get('GOOGLE_API_KEY')
genai.configure(api_key=GOOGLE_API_KEY)
# These codelines are just to verify if your api key is correct or not
# Use them when you clone the repo and build locally
#!curl \
#-H 'Content-Type: application/json' \
#-d '{ "prompt": { "text": "Write a very short story about a magic backpack"} }' \
#"https://generativelanguage.googleapis.com/v1beta3/models/text-bison-001:generateText?key=<enter-your-key-here>"
# Initialize genai models
model = genai.GenerativeModel('gemini-pro')
modelvis = genai.GenerativeModel('gemini-pro-vision')
def gemini(input, file, chatbot=[]):
"""
Function to handle gemini model and gemini vision model interactions.
Parameters:
input (str): The input text.
file (File): An optional file object for image processing.
chatbot (list): A list to keep track of chatbot interactions.
Returns:
tuple: Updated chatbot interaction list, an empty string, and None.
"""
messages = []
print(chatbot)
# Process previous chatbot messages if present
if len(chatbot) != 0:
for user, bot in chatbot:
user, bot = user.text, bot.text
messages.extend([
{'role': 'user', 'parts': [user]},
{'role': 'model', 'parts': [bot]}
])
messages.append({'role': 'user', 'parts': [input]})
else:
messages.append({'role': 'user', 'parts': [input]})
try:
# Process image if file is provided
if file is not None:
with PIL.Image.open(file.name) as img:
message = [{'role': 'user', 'parts': [input, img]}]
response = modelvis.generate_content(message)
gemini_video_resp = response.text
messages.append({'role': 'model', 'parts': [gemini_video_resp]})
# Construct list of messages in the required format
user_msg = {"text": input, "files": [{"file": FileData(path=file.name)}]}
bot_msg = {"text": gemini_video_resp, "files": []}
chatbot.append([user_msg, bot_msg])
else:
response = model.generate_content(messages)
gemini_resp = response.text
# Construct list of messages in the required format
user_msg = {"text": input, "files": []}
bot_msg = {"text": gemini_resp, "files": []}
chatbot.append([user_msg, bot_msg])
except Exception as e:
# Handling exceptions and raising error to the modal
print(f"An error occurred: {e}")
raise gr.Error(e)
return chatbot, "", None
# Define the Gradio Blocks interface
with gr.Blocks() as demo:
# # Add a centered header using HTML
# gr.HTML("<center><h1>Gemini-PRO & Gemini-PRO-Vision API</h1></center>")
# Initialize the MultimodalChatbot component
multi = MultimodalChatbot(value=[])
with gr.Row():
# Textbox for user input with increased scale for better visibility
tb = gr.Textbox(scale=5, placeholder='Message CortexChat with Vision...')
# Upload button for image files
up = gr.UploadButton("Attach File", file_types=["image"], scale=1)
# Define the behavior on text submission
tb.submit(gemini, [tb, up, multi], [multi, tb, up])
# Define the behavior on image upload
# Using chained then() calls to update the upload button's state
up.upload(lambda: gr.UploadButton("Uploading Image..."), [], up) \
.then(lambda: gr.UploadButton("Image Uploaded"), [], up) \
.then(lambda: gr.UploadButton("Upload Image"), [], up)
# Launch the demo with a queue to handle multiple users
demo.queue().launch() |