Spaces:

AI-ANK
/

PaLM-Kosmos-Vision-Gradio

Sleeping

App Files Files Community

AI-ANK commited on Dec 10, 2023

Commit

73ea806

1 Parent(s): e160018

Create app.py

Browse files

Files changed (1) hide show

app.py +83 -0

app.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import gradio as gr
+import requests
+from PIL import Image
+from transformers import AutoProcessor, AutoModelForVision2Seq
+from io import BytesIO
+import replicate
+from llama_index.llms.palm import PaLM
+from llama_index import ServiceContext, VectorStoreIndex, Document
+from llama_index.memory import ChatMemoryBuffer
+import os
+# Function to get image caption via Kosmos2 (as in your original code)
+def get_image_caption(image_data):
+    input_data = {
+        "image": image_data,
+        "description_type": "Brief"
+    }
+    output = replicate.run(
+        "lucataco/kosmos-2:3e7b211c29c092f4bcc8853922cc986baa52efe255876b80cac2c2fbb4aff805",
+        input=input_data
+    )
+    # Split the output string on the newline character and take the first item
+    text_description = output.split('\n\n')[0]
+    return text_description
+# Function to create the chat engine (as in your original code)
+def create_chat_engine(img_desc, api_key):
+    llm = PaLM(api_key=api_key)
+    service_context = ServiceContext.from_defaults(llm=llm, embed_model="local")
+    doc = Document(text=img_desc)
+    index = VectorStoreIndex.from_documents([doc], service_context=service_context)
+    chatmemory = ChatMemoryBuffer.from_defaults(token_limit=1500)
+    chat_engine = index.as_chat_engine(
+        chat_mode="context",
+        system_prompt=(
+            f"You are a chatbot, able to have normal interactions, as well as talk. "
+            "You always answer in great detail and are polite. Your responses always descriptive. "
+            "Your job is to talk about an image the user has uploaded. Image description: {img_desc}."
+        ),
+        verbose=True,
+        memory=chatmemory
+    )
+    return chat_engine
+# Function to handle image upload and chat interaction
+def process_image_and_chat(image_file, user_input):
+    if image_file is None:
+        return "Please upload an image."
+    image_data = BytesIO(image_file.read())
+    img_desc = get_image_caption(image_data)
+    chat_engine = create_chat_engine(img_desc, os.environ["GOOGLE_API_KEY"])
+    if user_input:
+        try:
+            response = chat_engine.chat(user_input)
+            return response
+        except Exception as e:
+            return f'An error occurred: {str(e)}'
+    else:
+        return "Ask me anything about the uploaded image."
+# Define Gradio interface
+image_input = gr.inputs.Image(type="file")
+text_input = gr.inputs.Textbox(label="Ask me about the image:")
+output_text = gr.outputs.Textbox(label="Response")
+iface = gr.Interface(
+    fn=process_image_and_chat,
+    inputs=[image_input, text_input],
+    outputs=output_text,
+    title="My version of ChatGPT vision",
+    description="You can upload an image and start chatting with the LLM about the image",
+    allow_flagging="never"
+)
+# Set Replicate and Google API keys
+os.environ['REPLICATE_API_TOKEN'] = 'your_replicate_api_token'  # Replace with your actual key
+os.environ["GOOGLE_API_KEY"] = 'your_google_api_key'  # Replace with your actual key
+# Launch the app
+iface.launch()