Kabilash10 commited on
Commit
c54eb78
1 Parent(s): 7d41561

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -0
app.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PIL import Image
3
+ import torch
4
+ from transformers import AutoProcessor, AutoModelForVision2Seq
5
+
6
+ # Title of the app
7
+ st.title("OCR with Qwen2-VL-7B-Instruct")
8
+
9
+ # Load the processor and model
10
+ st.write("Loading model...")
11
+ model_id = "Qwen/Qwen2-VL-7B-Instruct"
12
+ processor = AutoProcessor.from_pretrained(model_id)
13
+ model = AutoModelForVision2Seq.from_pretrained(model_id)
14
+ st.write("Model loaded successfully!")
15
+
16
+ # Upload image section
17
+ uploaded_image = st.file_uploader("Upload an image for OCR", type=["jpg", "jpeg", "png"])
18
+
19
+ if uploaded_image is not None:
20
+ # Open the image
21
+ image = Image.open(uploaded_image)
22
+
23
+ # Display the uploaded image
24
+ st.image(image, caption="Uploaded Image", use_column_width=True)
25
+
26
+ # Process the image using the model
27
+ st.write("Processing the image...")
28
+
29
+ # Prepare the image for model input
30
+ inputs = processor(images=image, return_tensors="pt")
31
+
32
+ # Generate text (OCR) from the image
33
+ with torch.no_grad():
34
+ generated_ids = model.generate(**inputs)
35
+ generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
36
+
37
+ # Display the OCR result
38
+ st.write("Extracted Text:")
39
+ st.text(generated_text)