Spaces:

bacancydataprophets
/

Vision-bot

Sleeping

App Files Files Community

Vision-bot / app.py

simran0608

Update app.py

5e19c4d verified 19 days ago

raw

history blame contribute delete

5.01 kB

	import os
	import base64
	from io import BytesIO
	from PIL import Image
	import streamlit as st
	from langchain.memory import ConversationSummaryBufferMemory
	from langchain_google_genai import ChatGoogleGenerativeAI
	from datetime import datetime
	from langchain_core.messages import HumanMessage
	from dotenv import load_dotenv
	load_dotenv()

	os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
	# Define title and layout
	st.set_page_config(page_title="Vision Bot", layout="wide")
	# GOOGLE_API_KEY=os.getenv("GOOGLE_API_KEY")
	os.environ["GOOGLE_API_KEY"] = "AIzaSyAFPijT_v7G_Gm31QXgcIsqIO-JN4fCFsA"
	st.title("Vision Bot")


	llm = ChatGoogleGenerativeAI(
	model="gemini-1.5-flash",
	max_tokens=4000
	)

	IMAGE_SAVE_FOLDER = "./uploaded_images"
	if not os.path.exists(IMAGE_SAVE_FOLDER):
	os.makedirs(IMAGE_SAVE_FOLDER)

	st.markdown(
	"""
	<style>
	.sidebar-content {
	background-color: #f1f3f6;
	padding: 20px;
	border-radius: 10px;
	text-align: left;
	box-shadow: 0px 0px 10px rgba(0,0,0,0.1);
	}
	.st-emotion-cache-janbn0 {
	flex-direction: row-reverse;
	text-align: right;
	}
	.uploaded-image {
	border: 2px solid #D1D1D1;
	border-radius: 8px;
	margin-top: 10px;
	}
	</style>
	""",
	unsafe_allow_html=True,
	)
	# Initialize session states
	if "messages" not in st.session_state:
	st.session_state.messages = []
	if "llm" not in st.session_state:
	st.session_state.llm = llm
	if "rag_memory" not in st.session_state:
	st.session_state.rag_memory = ConversationSummaryBufferMemory(llm=st.session_state.llm, max_token_limit=5000)
	if "current_image" not in st.session_state:
	st.session_state.current_image = None
	if "last_displayed_image" not in st.session_state:
	st.session_state.last_displayed_image = None

	container = st.container()
	with st.sidebar:
	st.markdown(
	"""
	<div class="sidebar-content">
	<h2>Vision Bot</h2>
	<p>This is Vision Bot where you can ask any question regarding any image. It can perform various tasks such as:</p>
	<ul>
	<li><b>Image Captioning</b></li>
	<li><b>Answering text-related queries inside the image</b></li>
	<li><b>OCR (Optical Character Recognition)</b></li>
	<li><b>Image Analysis & Description</b></li>
	</ul>
	</div>
	""",
	unsafe_allow_html=True,
	)
	# Upload image
	# Upload image
	uploaded_image = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png","webp"], key="image_uploader")

	# Check if a new image is uploaded
	if uploaded_image and uploaded_image != st.session_state.current_image:
	st.session_state.current_image = uploaded_image

	# Fix image size here
	st.image(uploaded_image, caption="Newly Uploaded Image", width=300) # Adjust width to a smaller size

	# Add a system message to mark the new image in the conversation
	st.session_state.messages.append({
	"role": "system",
	"content": f"New image uploaded: {uploaded_image.name}",
	"image": uploaded_image
	})

	# Display messages
	for message in st.session_state.messages:
	with container.chat_message(message["role"]):
	if message["role"] == "system" and "image" in message:
	# Display image in chat history with fixed size
	st.image(message["image"], width=300) # Adjust width to a smaller size
	st.write(message["content"])

	# Take prompt
	if prompt := st.chat_input("Enter your query here..."):
	with container.chat_message("user"):
	st.write(prompt)

	# Save user input in session state
	st.session_state.messages.append({"role": "user", "content": prompt})

	if st.session_state.current_image:
	# Save uploaded image to disk
	image = Image.open(st.session_state.current_image)
	current_date = datetime.now().strftime("%Y%m%d")
	image_name = f"{current_date}_{st.session_state.current_image.name}"
	image_path = os.path.join(IMAGE_SAVE_FOLDER, image_name)
	image.save(image_path)

	# Encode image in base64
	with open(image_path, "rb") as image_file:
	encoded_string = base64.b64encode(image_file.read()).decode()

	# Send image and text to the model
	chat = HumanMessage(
	content=[
	{"type": "text", "text": prompt},
	{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_string}"}},
	]
	)
	else:
	# Send only text to the model if no image is uploaded
	chat = HumanMessage(content=prompt)

	# Get AI response
	ai_msg = llm.invoke([chat]).content
	with container.chat_message("assistant"):
	st.write(ai_msg)

	# Save the conversation context in memory
	st.session_state.rag_memory.save_context({'input': prompt}, {'output': ai_msg})

	# Append the assistant's message to the session state
	st.session_state.messages.append({"role": "assistant", "content": ai_msg})