Saurabh1207 commited on
Commit
ff470d8
·
verified ·
1 Parent(s): d4217b8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -0
app.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoProcessor, AutoModelForVision2Seq
3
+ from PIL import Image
4
+ import requests
5
+ import matplotlib.pyplot as plt
6
+
7
+ device = "cuda" if torch.cuda.is_available() else "cpu"
8
+
9
+ # Load processor and model
10
+ processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
11
+ model = AutoModelForVision2Seq.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
12
+
13
+ def perform_ocr(image_path: str):
14
+ # Load image
15
+ image = Image.open(image_path).convert("RGB")
16
+
17
+ # Preprocess image
18
+ inputs = processor(images=image, return_tensors="pt").to(device)
19
+
20
+ # Generate text
21
+ with torch.no_grad():
22
+ generated_ids = model.generate(**inputs)
23
+
24
+ # Decode generated text
25
+ extracted_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
26
+ return extracted_text
27
+
28
+ # Example usage
29
+ if __name__ == "__main__":
30
+ IMAGE_PATH = "Images\Hindi-to-English-sentences-translation.jpg" # Replace with the path to your image
31
+
32
+ # Perform OCR
33
+ extracted_text = perform_ocr(IMAGE_PATH)
34
+
35
+ # Display results
36
+ print("Extracted Text:", extracted_text)
37
+
38
+ # Show image
39
+ img = Image.open(IMAGE_PATH)
40
+ plt.imshow(img)
41
+ plt.axis("off")
42
+ plt.show()