File size: 5,012 Bytes
79541e0
 
 
 
 
 
 
 
 
7441aaa
 
79541e0
7441aaa
dbbb7b4
 
ceaabfc
5e19c4d
79541e0
 
dbbb7b4
79541e0
 
 
 
 
 
 
 
 
 
 
 
dbbb7b4
 
 
 
 
 
 
79541e0
 
 
 
dbbb7b4
 
 
 
 
79541e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dbbb7b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79541e0
dbbb7b4
79541e0
 
 
 
dbbb7b4
 
 
79541e0
 
 
 
 
 
 
 
 
 
 
 
dbbb7b4
 
79541e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import os 
import base64
from io import BytesIO
from PIL import Image
import streamlit as st
from langchain.memory import ConversationSummaryBufferMemory
from langchain_google_genai import ChatGoogleGenerativeAI
from datetime import datetime
from langchain_core.messages import HumanMessage
from dotenv import load_dotenv
load_dotenv()

os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
# Define title and layout
st.set_page_config(page_title="Vision Bot", layout="wide")
# GOOGLE_API_KEY=os.getenv("GOOGLE_API_KEY")
os.environ["GOOGLE_API_KEY"] = "AIzaSyAFPijT_v7G_Gm31QXgcIsqIO-JN4fCFsA"
st.title("Vision Bot")


llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    max_tokens=4000
)

IMAGE_SAVE_FOLDER = "./uploaded_images"
if not os.path.exists(IMAGE_SAVE_FOLDER):
    os.makedirs(IMAGE_SAVE_FOLDER)

st.markdown(
    """
<style>
    .sidebar-content {
        background-color: #f1f3f6;
        padding: 20px;
        border-radius: 10px;
        text-align: left;
        box-shadow: 0px 0px 10px rgba(0,0,0,0.1);
    }
    .st-emotion-cache-janbn0 {
        flex-direction: row-reverse;
        text-align: right;
    }
    .uploaded-image {
        border: 2px solid #D1D1D1;
        border-radius: 8px;
        margin-top: 10px;
    }
</style>
""",
    unsafe_allow_html=True,
)
# Initialize session states
if "messages" not in st.session_state:
    st.session_state.messages = []
if "llm" not in st.session_state:
    st.session_state.llm = llm
if "rag_memory" not in st.session_state:
    st.session_state.rag_memory = ConversationSummaryBufferMemory(llm=st.session_state.llm, max_token_limit=5000)
if "current_image" not in st.session_state:
    st.session_state.current_image = None
if "last_displayed_image" not in st.session_state:
    st.session_state.last_displayed_image = None

container = st.container()
with st.sidebar:
    st.markdown(
        """
        <div class="sidebar-content">
        <h2>Vision Bot</h2>
        <p>This is Vision Bot where you can ask any question regarding any image. It can perform various tasks such as:</p>
        <ul>
            <li><b>Image Captioning</b></li>
            <li><b>Answering text-related queries inside the image</b></li>
            <li><b>OCR (Optical Character Recognition)</b></li>
            <li><b>Image Analysis & Description</b></li>
        </ul>
        </div>
        """,
        unsafe_allow_html=True,
    )
# Upload image
# Upload image
uploaded_image = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png","webp"], key="image_uploader")

# Check if a new image is uploaded
if uploaded_image and uploaded_image != st.session_state.current_image:
    st.session_state.current_image = uploaded_image
    
    # Fix image size here
    st.image(uploaded_image, caption="Newly Uploaded Image", width=300)  # Adjust width to a smaller size
    
    # Add a system message to mark the new image in the conversation
    st.session_state.messages.append({
        "role": "system", 
        "content": f"New image uploaded: {uploaded_image.name}",
        "image": uploaded_image
    })

# Display messages
for message in st.session_state.messages:
    with container.chat_message(message["role"]):
        if message["role"] == "system" and "image" in message:
            # Display image in chat history with fixed size
            st.image(message["image"], width=300)  # Adjust width to a smaller size
        st.write(message["content"])

# Take prompt
if prompt := st.chat_input("Enter your query here..."):
    with container.chat_message("user"):
        st.write(prompt)

    # Save user input in session state
    st.session_state.messages.append({"role": "user", "content": prompt})

    if st.session_state.current_image:
        # Save uploaded image to disk
        image = Image.open(st.session_state.current_image)
        current_date = datetime.now().strftime("%Y%m%d")
        image_name = f"{current_date}_{st.session_state.current_image.name}"
        image_path = os.path.join(IMAGE_SAVE_FOLDER, image_name)
        image.save(image_path)

        # Encode image in base64
        with open(image_path, "rb") as image_file:
            encoded_string = base64.b64encode(image_file.read()).decode()

        # Send image and text to the model
        chat = HumanMessage(
            content=[
                {"type": "text", "text": prompt},
                {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_string}"}},
            ]
        )
    else:
        # Send only text to the model if no image is uploaded
        chat = HumanMessage(content=prompt)

    # Get AI response
    ai_msg = llm.invoke([chat]).content
    with container.chat_message("assistant"): 
        st.write(ai_msg)

    # Save the conversation context in memory
    st.session_state.rag_memory.save_context({'input': prompt}, {'output': ai_msg})
    
    # Append the assistant's message to the session state
    st.session_state.messages.append({"role": "assistant", "content": ai_msg})