Spaces:

OneFi
/

hf-similarity-check

Sleeping

File size: 1,151 Bytes

from cnocr import CnOcr
import openai
from dotenv import load_dotenv
import os
import json

def get_chiname(path):
    ocr = CnOcr(rec_model_name='chinese_cht_PP-OCRv3')
    out = ocr.ocr(path)

    print(out)

    load_dotenv()
    openai.api_key = os.environ.get("data-extraction-api")

    invalid_list = [' ',',']
    data_set_1 = []
    for item in out:
        if item['text'] not in invalid_list:
            data_set_1.append(item['text'])

    completion = openai.ChatCompletion.create(
        model = "gpt-3.5-turbo",
        temperature = 0,
        messages = [
            {"role": "system", "content": "You are an AI assistant for extracting Chinese name (usually in length of three) from Hong Kong ID card."},
            {"role": "user", "content": f"Extract data from the following set of text: {data_set_1}. \
                You only need to return a dictonary with Chinese name in Chinese. Use double quote."},
        ]
    )

    data = completion['choices'][0]['message']['content']

    # print(data)

    id_data = json.loads(data)
    
    # print(id_data)
    return id_data
    # return [name, valid_hkid, hkid, issuedate]