from cnocr import CnOcr import openai from dotenv import load_dotenv import os import json def get_chiname(path): ocr = CnOcr(rec_model_name='chinese_cht_PP-OCRv3') out = ocr.ocr(path) print(out) load_dotenv() openai.api_key = os.environ.get("data-extraction-api") invalid_list = [' ',','] data_set_1 = [] for item in out: if item['text'] not in invalid_list: data_set_1.append(item['text']) completion = openai.ChatCompletion.create( model = "gpt-3.5-turbo", temperature = 0, messages = [ {"role": "system", "content": "You are an AI assistant for extracting Chinese name (usually in length of three) from Hong Kong ID card."}, {"role": "user", "content": f"Extract data from the following set of text: {data_set_1}. \ You only need to return a dictonary with Chinese name in Chinese. Use double quote."}, ] ) data = completion['choices'][0]['message']['content'] # print(data) id_data = json.loads(data) # print(id_data) return id_data # return [name, valid_hkid, hkid, issuedate]