Spaces:
Sleeping
Sleeping
File size: 1,151 Bytes
e029c8d 149bc14 e029c8d 149bc14 e029c8d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
from cnocr import CnOcr
import openai
from dotenv import load_dotenv
import os
import json
def get_chiname(path):
ocr = CnOcr(rec_model_name='chinese_cht_PP-OCRv3')
out = ocr.ocr(path)
print(out)
load_dotenv()
openai.api_key = os.environ.get("data-extraction-api")
invalid_list = [' ',',']
data_set_1 = []
for item in out:
if item['text'] not in invalid_list:
data_set_1.append(item['text'])
completion = openai.ChatCompletion.create(
model = "gpt-3.5-turbo",
temperature = 0,
messages = [
{"role": "system", "content": "You are an AI assistant for extracting Chinese name (usually in length of three) from Hong Kong ID card."},
{"role": "user", "content": f"Extract data from the following set of text: {data_set_1}. \
You only need to return a dictonary with Chinese name in Chinese. Use double quote."},
]
)
data = completion['choices'][0]['message']['content']
# print(data)
id_data = json.loads(data)
# print(id_data)
return id_data
# return [name, valid_hkid, hkid, issuedate] |