Spaces:

Kabilash10
/

QwenVL-OCR

No application file

Kabilash10 commited on Sep 18

Commit

c54eb78

•

1 Parent(s): 7d41561

Upload app.py

Files changed (1) hide show

app.py ADDED Viewed

+import streamlit as st
+from PIL import Image
+import torch
+from transformers import AutoProcessor, AutoModelForVision2Seq
+# Title of the app
+st.title("OCR with Qwen2-VL-7B-Instruct")
+# Load the processor and model
+st.write("Loading model...")
+model_id = "Qwen/Qwen2-VL-7B-Instruct"
+processor = AutoProcessor.from_pretrained(model_id)
+model = AutoModelForVision2Seq.from_pretrained(model_id)
+st.write("Model loaded successfully!")
+# Upload image section
+uploaded_image = st.file_uploader("Upload an image for OCR", type=["jpg", "jpeg", "png"])
+if uploaded_image is not None:
+    # Open the image
+    image = Image.open(uploaded_image)
+    # Display the uploaded image
+    st.image(image, caption="Uploaded Image", use_column_width=True)
+    # Process the image using the model
+    st.write("Processing the image...")
+    # Prepare the image for model input
+    inputs = processor(images=image, return_tensors="pt")
+    # Generate text (OCR) from the image
+    with torch.no_grad():
+        generated_ids = model.generate(**inputs)
+    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    # Display the OCR result
+    st.write("Extracted Text:")
+    st.text(generated_text)