File size: 1,746 Bytes
e029c8d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149bc14
 
 
 
e029c8d
149bc14
e029c8d
149bc14
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from cnocr import CnOcr
import pandas as pd

def check_telecode(input_string):
    if len(input_string) == 12:
        try:
            int(input_string)
            return True
        except ValueError:
            return False

    if len(input_string) == 8:
        try:
            int(input_string)
            return True
        except ValueError:
            return False

    return False

def extract_integers(input_string):
    if len(input_string) == 12:
        w1 = input_string[:4]
        w2 = input_string[4:8]
        w3 = input_string[8:]
        return w1, w2, w3
    elif len(input_string) == 8:
        w1 = input_string[:4]
        w2 = input_string[4:]
        return w1, w2
    else:
        return None, None, None

def get_chinese_name(path):

    ocr = CnOcr(rec_model_name='en_PP-OCRv3')
    # ocr = CnOcr(rec_model_name='densenet_lite_136-fc')
    out = ocr.ocr(path)

    df = pd.read_csv('hkTelecode.csv', dtype={'code': str}, index_col=False)

    chinese_name = []

    for data in out:
        text = data['text']
        text = text.replace(' ', '')
        if check_telecode(text):
            w1, w2, w3 = extract_integers(text)
            print(w1)
            print(w2)
            print(w3)
            chinese_name.append(df['word'][df['code'] == str(w1)].iloc[0])
            chinese_name.append(df['word'][df['code'] == str(w2)].iloc[0])
            if w3 is not None:
                chinese_name.append(df['word'][df['code'] == str(w3)].iloc[0])
            chinese_complete = ""
            for item in chinese_name:
                chinese_complete = chinese_complete + item
            return chinese_complete
        chinese_name = []
    return ""

# print(get_chinese_name('image\hkid.jpg'))