import streamlit as st import torch from PIL import Image from qwen_vl_utils import process_vision_info from transformers import Qwen2VLForConditionalGeneration, AutoProcessor import time @st.cache_resource def load_model(): model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", trust_remote_code=True, torch_dtype=torch.float32).eval() processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", trust_remote_code=True) return model, processor model, processor = load_model() st.title("Image Query App") uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"]) st.sidebar.title("Suggested Questions") predefined_questions = [ "What is the main object in this image?", "Describe the scene in the image.", "Are there any people in the image?", "What is the background of the image?" ] selected_question = st.sidebar.radio("Choose a question", predefined_questions) question = st.sidebar.text_input("Or ask your own question here:") submit_button = st.sidebar.button("Submit") response = "" if uploaded_file is not None: image = Image.open(uploaded_file) original_size = image.size st.write(f"Original image dimensions: {original_size}") max_size = (700, 700) if image.size[0] > 1000 or image.size[1] > 1000: image.thumbnail(max_size) resized_size = image.size st.write(f"Image resized to: {resized_size}") else: st.write("Image size is within acceptable limits.") if not question: question = selected_question if submit_button: st.sidebar.markdown("
{response}
", unsafe_allow_html=True) st.markdown(f"Time taken to fetch the answer: {elapsed_time:.2f} seconds
", unsafe_allow_html=True) if uploaded_file is not None: st.image(image, caption='Uploaded Image', use_column_width=True)