marcotam commited on
Commit
4101236
·
1 Parent(s): 9687104

Upload model0.py

Browse files
Files changed (1) hide show
  1. model0.py +20 -7
model0.py CHANGED
@@ -3,6 +3,7 @@ import openai
3
  from dotenv import load_dotenv
4
  import os
5
  import json
 
6
 
7
  def model0(path):
8
  ocr = CnOcr(rec_model_name='en_PP-OCRv3')
@@ -19,25 +20,29 @@ def model0(path):
19
  if item['text'] not in invalid_list:
20
  data_set_1.append(item['text'])
21
 
 
 
22
  completion = openai.ChatCompletion.create(
23
  model = "gpt-3.5-turbo",
24
  temperature = 0,
25
  messages = [
26
  {"role": "system", "content": "You are an AI assistant for extracting data from HKID card with following information \
27
- (name, HKID number, date of issue) from HKID card. Uppercase and lowercase letters are the same. Store the results in \
28
  dictionary format"},
29
  {"role": "user", "content": f"Extract data from the following set of text: {data_set_1}. \
30
  You have three types of data to extract. \
31
  1. id card holder full name (it noramlly is a chinese name, including surname and family \
32
  name in English spelling, and it may be separate in different fields in the data set for surname and family name \
33
  sometimes) \
34
- 2. issue date (should be a date with month and day, e.g. 19-97 is the required format, but 26-11-18 is not \
35
- because date of issue of have 5 characters) Only choose valid format!!! \
36
- 3. HKID number (The standard format of HKID number is @123456(#) e.g. A123456(7) is a valid HKID number. \
 
37
  (a) @ represents any one or two capital letters of the alphabet. \
38
  (b) # is the check digit which has 11 possible values from 0 to 9 and A.) \
39
  Remember to include the check digit with () \
40
  Only reply a dictionary. No need to add other words or explanation. Use double quote for dictionary."},
 
41
  ]
42
  )
43
 
@@ -46,9 +51,17 @@ def model0(path):
46
  print(data)
47
 
48
  id_data = json.loads(data)
 
 
 
 
 
 
 
 
 
 
49
 
50
  print(id_data)
51
- return
52
  # return [name, valid_hkid, hkid, issuedate]
53
-
54
- model0('dontTouchMe/IMG_4499.jpg')
 
3
  from dotenv import load_dotenv
4
  import os
5
  import json
6
+ import checkTool
7
 
8
  def model0(path):
9
  ocr = CnOcr(rec_model_name='en_PP-OCRv3')
 
20
  if item['text'] not in invalid_list:
21
  data_set_1.append(item['text'])
22
 
23
+ print(f'All data here: {data_set_1}')
24
+
25
  completion = openai.ChatCompletion.create(
26
  model = "gpt-3.5-turbo",
27
  temperature = 0,
28
  messages = [
29
  {"role": "system", "content": "You are an AI assistant for extracting data from HKID card with following information \
30
+ (name, date of birth, date of issue, HKID number) from HKID card. Uppercase and lowercase letters are the same. Store the results in \
31
  dictionary format"},
32
  {"role": "user", "content": f"Extract data from the following set of text: {data_set_1}. \
33
  You have three types of data to extract. \
34
  1. id card holder full name (it noramlly is a chinese name, including surname and family \
35
  name in English spelling, and it may be separate in different fields in the data set for surname and family name \
36
  sometimes) \
37
+ 2. date of birth (should be a date with year, month and day, e.g. 23-02-2003 is the required format, but 26-11 is not \
38
+ because date of birth should have 10 characters) Only choose valid format!!!\
39
+ 3. date of issue (a string with format xx-xx) \
40
+ 4. HKID number (The standard format of HKID number is @123456(#) e.g. A123456(7) is a valid HKID number. \
41
  (a) @ represents any one or two capital letters of the alphabet. \
42
  (b) # is the check digit which has 11 possible values from 0 to 9 and A.) \
43
  Remember to include the check digit with () \
44
  Only reply a dictionary. No need to add other words or explanation. Use double quote for dictionary."},
45
+
46
  ]
47
  )
48
 
 
51
  print(data)
52
 
53
  id_data = json.loads(data)
54
+
55
+ name = id_data["name"]
56
+ dateofbirth = id_data["date of birth"]
57
+ issuedate = id_data["date of issue"]
58
+ hkid = id_data["HKID number"]
59
+ if checkTool.validate_hkid(hkid=hkid):
60
+ valid_hkid = 'True'
61
+ else:
62
+ valid_hkid = 'False'
63
+ name = checkTool.seperate_name(name)
64
 
65
  print(id_data)
66
+ return [name, valid_hkid, hkid, issuedate, dateofbirth]
67
  # return [name, valid_hkid, hkid, issuedate]