File size: 2,515 Bytes
390ca68
 
 
 
 
af39285
390ca68
 
 
 
 
 
 
 
 
 
 
af39285
390ca68
 
 
af39285
 
390ca68
 
 
 
 
 
af39285
 
390ca68
 
 
 
 
af39285
 
 
390ca68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80a1384
390ca68
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import torch
import gradio as gr
from read import text_recognizer
from model import Model
from utils import CTCLabelConverter
from ultralytics import YOLO
from PIL import ImageDraw

""" vocab / character number configuration """
file = open("UrduGlyphs.txt","r",encoding="utf-8")
content = file.readlines()
content = ''.join([str(elem).strip('\n') for elem in content])
content = content+" "
""" model configuration """
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
converter = CTCLabelConverter(content)
recognition_model = Model(num_class=len(converter.character), device=device)
recognition_model = recognition_model.to(device)
recognition_model.load_state_dict(torch.load("best_norm_ED.pth", map_location=device))
recognition_model.eval()

detection_model = YOLO("yolov8m_UrduDoc.pt")

examples = ["1.jpg","2.jpg","3.jpg"]

input = gr.Image(type="pil",image_mode="RGB", label="Input Image")

def predict(input):
    "Line Detection"
    detection_results = detection_model.predict(source=input, conf=0.2, imgsz=1280, save=False, nms=True, device=device)
    bounding_boxes = detection_results[0].boxes.xyxy.cpu().numpy().tolist()
    bounding_boxes.sort(key=lambda x: x[1])
    
    "Draw the bounding boxes"
    draw = ImageDraw.Draw(input)
    for box in bounding_boxes:
        # draw rectangle outline with random color and width=5
        from numpy import random
        draw.rectangle(box, fill=None, outline=tuple(random.randint(0,255,3)), width=5)
    
    "Crop the detected lines"
    cropped_images = []
    for box in bounding_boxes:
        cropped_images.append(input.crop(box))
    len(cropped_images)
    
    "Recognize the text"
    texts = []
    for img in cropped_images:
        texts.append(text_recognizer(img, recognition_model, converter, device))
    
    "Join the text"
    text = "\n".join(texts)
    
    "Return the image with bounding boxes and the text"
    return input,text

output_image = gr.Image(type="pil",image_mode="RGB",label="Detected Lines")
output_text = gr.Textbox(label="Recognized Text",interactive=True,show_copy_button=True)

iface = gr.Interface(predict,
                     inputs=input,
                     outputs=[output_image,output_text],
                     title="End-to-End Urdu OCR",
                     description="Demo Web App For UTRNet\n(https://github.com/abdur75648/UTRNet-High-Resolution-Urdu-Text-Recognition)",
                     examples=examples,
                     allow_flagging="never")
iface.launch()