AI-ANK commited on
Commit
73ea806
·
1 Parent(s): e160018

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -0
app.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ from PIL import Image
4
+ from transformers import AutoProcessor, AutoModelForVision2Seq
5
+ from io import BytesIO
6
+ import replicate
7
+ from llama_index.llms.palm import PaLM
8
+ from llama_index import ServiceContext, VectorStoreIndex, Document
9
+ from llama_index.memory import ChatMemoryBuffer
10
+ import os
11
+
12
+ # Function to get image caption via Kosmos2 (as in your original code)
13
+ def get_image_caption(image_data):
14
+ input_data = {
15
+ "image": image_data,
16
+ "description_type": "Brief"
17
+ }
18
+ output = replicate.run(
19
+ "lucataco/kosmos-2:3e7b211c29c092f4bcc8853922cc986baa52efe255876b80cac2c2fbb4aff805",
20
+ input=input_data
21
+ )
22
+ # Split the output string on the newline character and take the first item
23
+ text_description = output.split('\n\n')[0]
24
+ return text_description
25
+
26
+ # Function to create the chat engine (as in your original code)
27
+ def create_chat_engine(img_desc, api_key):
28
+ llm = PaLM(api_key=api_key)
29
+ service_context = ServiceContext.from_defaults(llm=llm, embed_model="local")
30
+ doc = Document(text=img_desc)
31
+ index = VectorStoreIndex.from_documents([doc], service_context=service_context)
32
+ chatmemory = ChatMemoryBuffer.from_defaults(token_limit=1500)
33
+
34
+ chat_engine = index.as_chat_engine(
35
+ chat_mode="context",
36
+ system_prompt=(
37
+ f"You are a chatbot, able to have normal interactions, as well as talk. "
38
+ "You always answer in great detail and are polite. Your responses always descriptive. "
39
+ "Your job is to talk about an image the user has uploaded. Image description: {img_desc}."
40
+ ),
41
+ verbose=True,
42
+ memory=chatmemory
43
+ )
44
+ return chat_engine
45
+
46
+ # Function to handle image upload and chat interaction
47
+ def process_image_and_chat(image_file, user_input):
48
+ if image_file is None:
49
+ return "Please upload an image."
50
+
51
+ image_data = BytesIO(image_file.read())
52
+ img_desc = get_image_caption(image_data)
53
+ chat_engine = create_chat_engine(img_desc, os.environ["GOOGLE_API_KEY"])
54
+
55
+ if user_input:
56
+ try:
57
+ response = chat_engine.chat(user_input)
58
+ return response
59
+ except Exception as e:
60
+ return f'An error occurred: {str(e)}'
61
+ else:
62
+ return "Ask me anything about the uploaded image."
63
+
64
+ # Define Gradio interface
65
+ image_input = gr.inputs.Image(type="file")
66
+ text_input = gr.inputs.Textbox(label="Ask me about the image:")
67
+ output_text = gr.outputs.Textbox(label="Response")
68
+
69
+ iface = gr.Interface(
70
+ fn=process_image_and_chat,
71
+ inputs=[image_input, text_input],
72
+ outputs=output_text,
73
+ title="My version of ChatGPT vision",
74
+ description="You can upload an image and start chatting with the LLM about the image",
75
+ allow_flagging="never"
76
+ )
77
+
78
+ # Set Replicate and Google API keys
79
+ os.environ['REPLICATE_API_TOKEN'] = 'your_replicate_api_token' # Replace with your actual key
80
+ os.environ["GOOGLE_API_KEY"] = 'your_google_api_key' # Replace with your actual key
81
+
82
+ # Launch the app
83
+ iface.launch()