Spaces:
Sleeping
Sleeping
File size: 1,746 Bytes
e029c8d 149bc14 e029c8d 149bc14 e029c8d 149bc14 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
from cnocr import CnOcr
import pandas as pd
def check_telecode(input_string):
if len(input_string) == 12:
try:
int(input_string)
return True
except ValueError:
return False
if len(input_string) == 8:
try:
int(input_string)
return True
except ValueError:
return False
return False
def extract_integers(input_string):
if len(input_string) == 12:
w1 = input_string[:4]
w2 = input_string[4:8]
w3 = input_string[8:]
return w1, w2, w3
elif len(input_string) == 8:
w1 = input_string[:4]
w2 = input_string[4:]
return w1, w2
else:
return None, None, None
def get_chinese_name(path):
ocr = CnOcr(rec_model_name='en_PP-OCRv3')
# ocr = CnOcr(rec_model_name='densenet_lite_136-fc')
out = ocr.ocr(path)
df = pd.read_csv('hkTelecode.csv', dtype={'code': str}, index_col=False)
chinese_name = []
for data in out:
text = data['text']
text = text.replace(' ', '')
if check_telecode(text):
w1, w2, w3 = extract_integers(text)
print(w1)
print(w2)
print(w3)
chinese_name.append(df['word'][df['code'] == str(w1)].iloc[0])
chinese_name.append(df['word'][df['code'] == str(w2)].iloc[0])
if w3 is not None:
chinese_name.append(df['word'][df['code'] == str(w3)].iloc[0])
chinese_complete = ""
for item in chinese_name:
chinese_complete = chinese_complete + item
return chinese_complete
chinese_name = []
return ""
# print(get_chinese_name('image\hkid.jpg'))
|