Spaces:
Sleeping
Sleeping
File size: 5,012 Bytes
79541e0 7441aaa 79541e0 7441aaa dbbb7b4 ceaabfc 5e19c4d 79541e0 dbbb7b4 79541e0 dbbb7b4 79541e0 dbbb7b4 79541e0 dbbb7b4 79541e0 dbbb7b4 79541e0 dbbb7b4 79541e0 dbbb7b4 79541e0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
import os
import base64
from io import BytesIO
from PIL import Image
import streamlit as st
from langchain.memory import ConversationSummaryBufferMemory
from langchain_google_genai import ChatGoogleGenerativeAI
from datetime import datetime
from langchain_core.messages import HumanMessage
from dotenv import load_dotenv
load_dotenv()
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
# Define title and layout
st.set_page_config(page_title="Vision Bot", layout="wide")
# GOOGLE_API_KEY=os.getenv("GOOGLE_API_KEY")
os.environ["GOOGLE_API_KEY"] = "AIzaSyAFPijT_v7G_Gm31QXgcIsqIO-JN4fCFsA"
st.title("Vision Bot")
llm = ChatGoogleGenerativeAI(
model="gemini-1.5-flash",
max_tokens=4000
)
IMAGE_SAVE_FOLDER = "./uploaded_images"
if not os.path.exists(IMAGE_SAVE_FOLDER):
os.makedirs(IMAGE_SAVE_FOLDER)
st.markdown(
"""
<style>
.sidebar-content {
background-color: #f1f3f6;
padding: 20px;
border-radius: 10px;
text-align: left;
box-shadow: 0px 0px 10px rgba(0,0,0,0.1);
}
.st-emotion-cache-janbn0 {
flex-direction: row-reverse;
text-align: right;
}
.uploaded-image {
border: 2px solid #D1D1D1;
border-radius: 8px;
margin-top: 10px;
}
</style>
""",
unsafe_allow_html=True,
)
# Initialize session states
if "messages" not in st.session_state:
st.session_state.messages = []
if "llm" not in st.session_state:
st.session_state.llm = llm
if "rag_memory" not in st.session_state:
st.session_state.rag_memory = ConversationSummaryBufferMemory(llm=st.session_state.llm, max_token_limit=5000)
if "current_image" not in st.session_state:
st.session_state.current_image = None
if "last_displayed_image" not in st.session_state:
st.session_state.last_displayed_image = None
container = st.container()
with st.sidebar:
st.markdown(
"""
<div class="sidebar-content">
<h2>Vision Bot</h2>
<p>This is Vision Bot where you can ask any question regarding any image. It can perform various tasks such as:</p>
<ul>
<li><b>Image Captioning</b></li>
<li><b>Answering text-related queries inside the image</b></li>
<li><b>OCR (Optical Character Recognition)</b></li>
<li><b>Image Analysis & Description</b></li>
</ul>
</div>
""",
unsafe_allow_html=True,
)
# Upload image
# Upload image
uploaded_image = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png","webp"], key="image_uploader")
# Check if a new image is uploaded
if uploaded_image and uploaded_image != st.session_state.current_image:
st.session_state.current_image = uploaded_image
# Fix image size here
st.image(uploaded_image, caption="Newly Uploaded Image", width=300) # Adjust width to a smaller size
# Add a system message to mark the new image in the conversation
st.session_state.messages.append({
"role": "system",
"content": f"New image uploaded: {uploaded_image.name}",
"image": uploaded_image
})
# Display messages
for message in st.session_state.messages:
with container.chat_message(message["role"]):
if message["role"] == "system" and "image" in message:
# Display image in chat history with fixed size
st.image(message["image"], width=300) # Adjust width to a smaller size
st.write(message["content"])
# Take prompt
if prompt := st.chat_input("Enter your query here..."):
with container.chat_message("user"):
st.write(prompt)
# Save user input in session state
st.session_state.messages.append({"role": "user", "content": prompt})
if st.session_state.current_image:
# Save uploaded image to disk
image = Image.open(st.session_state.current_image)
current_date = datetime.now().strftime("%Y%m%d")
image_name = f"{current_date}_{st.session_state.current_image.name}"
image_path = os.path.join(IMAGE_SAVE_FOLDER, image_name)
image.save(image_path)
# Encode image in base64
with open(image_path, "rb") as image_file:
encoded_string = base64.b64encode(image_file.read()).decode()
# Send image and text to the model
chat = HumanMessage(
content=[
{"type": "text", "text": prompt},
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_string}"}},
]
)
else:
# Send only text to the model if no image is uploaded
chat = HumanMessage(content=prompt)
# Get AI response
ai_msg = llm.invoke([chat]).content
with container.chat_message("assistant"):
st.write(ai_msg)
# Save the conversation context in memory
st.session_state.rag_memory.save_context({'input': prompt}, {'output': ai_msg})
# Append the assistant's message to the session state
st.session_state.messages.append({"role": "assistant", "content": ai_msg}) |