File size: 1,588 Bytes
5b765fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import os

os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

import time
import requests
from io import BytesIO

import utility
from detector import *
from recognizer import *

# Global Detector and Recognizer
args = utility.parse_args()
text_recognizer = TextRecognizer(args)
text_detector = TextDetector(args)


def apply_ocr(img):
    # Detect text regions
    dt_boxes, _ = text_detector(img)

    boxes = []
    for box in dt_boxes:
        p1, p2, p3, p4 = box
        x1 = min(p1[0], p2[0], p3[0], p4[0])
        y1 = min(p1[1], p2[1], p3[1], p4[1])
        x2 = max(p1[0], p2[0], p3[0], p4[0])
        y2 = max(p1[1], p2[1], p3[1], p4[1])
        boxes.append([x1, y1, x2, y2])

    # Recognize text
    img_list = []
    for i in range(len(boxes)):
        x1, y1, x2, y2 = map(int, boxes[i])
        img_list.append(img.copy()[y1:y2, x1:x2])
    img_list.reverse()

    rec_res, _ = text_recognizer(img_list)

    # Postprocess
    total_text = ""
    table = dict()
    for i in range(len(rec_res)):
        table[i] = {
            "text": rec_res[i][0],
        }
        total_text += rec_res[i][0] + " "

    total_text = total_text.strip()
    return total_text


def main():
    image_url = "https://i.ibb.co/kQvHGjj/aewrg.png"
    response = requests.get(image_url)
    img = np.array(Image.open(BytesIO(response.content)).convert("RGB"))

    t0 = time.time()
    epoch = 1
    for _ in range(epoch):
        ocr_text = apply_ocr(img)
    print("Elapsed time:", (time.time() - t0) * 1000 / epoch, "ms")
    print("Output:", ocr_text)


if __name__ == "__main__":
    main()