Mitul Mohammad Abdullah Al Mukit commited on
Commit
9312707
·
1 Parent(s): ddf4d52
.gitignore CHANGED
@@ -1,5 +1,9 @@
1
- image/*
2
- saved/*
3
- image
4
- saved
5
- .DS_Store
 
 
 
 
 
1
+ image/hkid.jpg
2
+ saved/HKID.jpg
3
+ .DS_Store
4
+ bank_statement/*
5
+ bank_statement
6
+ data1.txt
7
+ .env
8
+ test.py
9
+ dontTouchMe
README.md CHANGED
@@ -1,13 +1,24 @@
1
- ---
2
- title: Similarity Check
3
- emoji: 👁
4
- colorFrom: purple
5
- colorTo: red
6
- sdk: streamlit
7
- sdk_version: 1.21.0
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Similarity_check
2
+
3
+ This application serves as a similarity check of user's name during registration
4
+
5
+ ## Installation
6
+
7
+ Use the package manager [pip](https://pip.pypa.io/en/stable/) to install packages.
8
+ ```
9
+ pip install -r requirements.txt
10
+ ```
11
+
12
+ ## Usage
13
+
14
+ Run web UI
15
+ ```
16
+ streamlit run webapp.py
17
+ ```
18
+
19
+ ## Uploading Files
20
+
21
+ jpg/jpeg format of the HKID and bank statement are required to run the application
22
+
23
+ ## Connecion to database
24
+ The code related to connecting to database is done through API request, and it can be checked in transaction_api repository.
__pycache__/check_hkid_validity.cpython-311.pyc ADDED
Binary file (3.15 kB). View file
 
__pycache__/demo.cpython-311.pyc CHANGED
Binary files a/__pycache__/demo.cpython-311.pyc and b/__pycache__/demo.cpython-311.pyc differ
 
__pycache__/extract_pdf.cpython-311.pyc CHANGED
Binary files a/__pycache__/extract_pdf.cpython-311.pyc and b/__pycache__/extract_pdf.cpython-311.pyc differ
 
__pycache__/extraction_data.cpython-311.pyc ADDED
Binary file (4.21 kB). View file
 
__pycache__/imageSegmentation.cpython-311.pyc CHANGED
Binary files a/__pycache__/imageSegmentation.cpython-311.pyc and b/__pycache__/imageSegmentation.cpython-311.pyc differ
 
__pycache__/similarity_check.cpython-311.pyc CHANGED
Binary files a/__pycache__/similarity_check.cpython-311.pyc and b/__pycache__/similarity_check.cpython-311.pyc differ
 
check_hkid_validity.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from cnocr import CnOcr
2
+
3
+ def string_similarity(s1, s2): # Levenshtein distance algorithm
4
+
5
+ s1 = s1.replace(' ','')
6
+ s1 = s1.lower()
7
+ s2 = s2.replace(' ','')
8
+ s2 = s2.lower()
9
+
10
+ if s1 == s2:
11
+ return 100.0
12
+
13
+ len1 = len(s1)
14
+ len2 = len(s2)
15
+ matrix = [[0] * (len2 + 1) for _ in range(len1 + 1)]
16
+
17
+ for i in range(len1 + 1):
18
+ matrix[i][0] = i
19
+
20
+ for j in range(len2 + 1):
21
+ matrix[0][j] = j
22
+
23
+ for i in range(1, len1 + 1):
24
+ for j in range(1, len2 + 1):
25
+ if s1[i - 1] == s2[j - 1]:
26
+ cost = 0
27
+ else:
28
+ cost = 1
29
+ matrix[i][j] = min(matrix[i - 1][j] + 1, # deletion
30
+ matrix[i][j - 1] + 1, # insertion
31
+ matrix[i - 1][j - 1] + cost) # substitution
32
+
33
+ similarity = (1 - matrix[len1][len2] / max(len1, len2)) * 100
34
+ return round(similarity, 1)
35
+
36
+ def is_good_subsequence(s1, s2):
37
+ len_s2 = len(s2)
38
+ len_s1 = len(s1)
39
+
40
+ s1 = s1.lower()
41
+ s2 = s2.lower()
42
+
43
+ if len_s2 > len_s1 + 10:
44
+ return False
45
+
46
+ # Initialize variables for counting matches
47
+ match_count = 0
48
+ s1_index = 0
49
+
50
+ # Iterate over each character in s2
51
+ for char in s2:
52
+ # Search for the character in s1 starting from the last matched index
53
+ while s1_index < len_s1:
54
+ if s1[s1_index] == char:
55
+ match_count += 1
56
+ s1_index += 1
57
+ break
58
+ s1_index += 1
59
+
60
+ # Check if the match count is more than 70% of s2 length
61
+ return match_count >= (0.5 * len_s2)
62
+
63
+ def check_hkid(path):
64
+ ocr = CnOcr(rec_model_name='en_PP-OCRv3')
65
+ # ocr = CnOcr(rec_model_name='densenet_lite_136-fc')
66
+ out = ocr.ocr(path)
67
+
68
+ for data in out:
69
+ text = data['text']
70
+
71
+ if string_similarity('HONGKONGPERMANENTIDENTITYCARD', text) > 60:
72
+ return True
73
+
74
+ return False
75
+
76
+ # print(check_hkid('image/hkid.jpg'))
demo.py CHANGED
@@ -13,6 +13,8 @@ import Visualization_utilities as vis
13
  # Load a sample picture and learn how to recognize it.
14
 
15
  def get_face_encoding(path):
 
 
16
  HKID_cropped = imageSegmentation.auto_cropping(path)
17
  cv2.imwrite('saved/HKID.jpg', HKID_cropped)
18
  HKID_image = face_recognition.load_image_file("saved/HKID.jpg")
@@ -63,6 +65,8 @@ def process_frame(frame, process_this_frame, face_locations, faces, face_names,
63
 
64
  hkid_face_encoding = get_face_encoding("image")
65
 
 
 
66
  known_face_encodings = [
67
  hkid_face_encoding
68
  ]
 
13
  # Load a sample picture and learn how to recognize it.
14
 
15
  def get_face_encoding(path):
16
+ print(f'path: {path}')
17
+ print('hello')
18
  HKID_cropped = imageSegmentation.auto_cropping(path)
19
  cv2.imwrite('saved/HKID.jpg', HKID_cropped)
20
  HKID_image = face_recognition.load_image_file("saved/HKID.jpg")
 
65
 
66
  hkid_face_encoding = get_face_encoding("image")
67
 
68
+ print(f'encoding: {hkid_face_encoding}')
69
+
70
  known_face_encodings = [
71
  hkid_face_encoding
72
  ]
extract_pdf.py CHANGED
@@ -43,7 +43,7 @@ def get_info_from_bank(img_path, file_name):
43
  out = ocr.ocr(img_path)
44
  # Data
45
  bank_data = {
46
- "name_on_bs": "",
47
  "address": "",
48
  "bank": check_bank_name(file_name),
49
  "date": "",
@@ -67,8 +67,8 @@ def get_info_from_bank(img_path, file_name):
67
  pass
68
  elif ((positions[0][0] >= 147) and (positions[0][1] >= 265) and (positions[2][0] <= 400) and (positions[2][1] <= 295)):
69
  if (raw_detected_text != ''): # name
70
- bank_data["name_on_bs"] += raw_detected_text
71
- bank_data["name_on_bs"] = check_mr(bank_data["name_on_bs"])
72
  elif ((positions[0][0] >= 113) and (positions[0][1] >= 291) and (positions[2][0] <= 500) and (positions[2][1] <= 381)):
73
  if (raw_detected_text != ''): # position
74
  bank_data["address"] += raw_detected_text
@@ -100,13 +100,13 @@ def get_info_from_bank(img_path, file_name):
100
 
101
 
102
  # print('------------From bank statement------------')
103
- # print(f'Name: {bank_data["name_on_bs"]}')
104
  # print(f'Address: {bank_data["address"]}')
105
  # print(f'Bank: {bank_data["bank"]}')
106
  # print(f'Date: {bank_data["date"]}')
107
  # print(f'Asset: {asset_equa} = {bank_data["asset"]}')
108
  # print(f'Liabilities: {bank_data["liabilities"]}')
109
- # post_data(bank_data["bank"], bank_data["name_on_bs"], bank_data["address"], bank_data["asset"], bank_data["liabilities"], bank_data["date"])
110
  return bank_data
111
 
112
  ########## Posting data through API ############
 
43
  out = ocr.ocr(img_path)
44
  # Data
45
  bank_data = {
46
+ "nameStatement": "",
47
  "address": "",
48
  "bank": check_bank_name(file_name),
49
  "date": "",
 
67
  pass
68
  elif ((positions[0][0] >= 147) and (positions[0][1] >= 265) and (positions[2][0] <= 400) and (positions[2][1] <= 295)):
69
  if (raw_detected_text != ''): # name
70
+ bank_data["nameStatement"] += raw_detected_text
71
+ bank_data["nameStatement"] = check_mr(bank_data["nameStatement"])
72
  elif ((positions[0][0] >= 113) and (positions[0][1] >= 291) and (positions[2][0] <= 500) and (positions[2][1] <= 381)):
73
  if (raw_detected_text != ''): # position
74
  bank_data["address"] += raw_detected_text
 
100
 
101
 
102
  # print('------------From bank statement------------')
103
+ # print(f'Name: {bank_data["nameStatement"]}')
104
  # print(f'Address: {bank_data["address"]}')
105
  # print(f'Bank: {bank_data["bank"]}')
106
  # print(f'Date: {bank_data["date"]}')
107
  # print(f'Asset: {asset_equa} = {bank_data["asset"]}')
108
  # print(f'Liabilities: {bank_data["liabilities"]}')
109
+ # post_data(bank_data["bank"], bank_data["nameStatement"], bank_data["address"], bank_data["asset"], bank_data["liabilities"], bank_data["date"])
110
  return bank_data
111
 
112
  ########## Posting data through API ############
extraction_data.py CHANGED
@@ -2,6 +2,9 @@
2
  from cnocr import CnOcr
3
  from pdfquery import PDFQuery
4
  import openai
 
 
 
5
 
6
  def validate(text):
7
  invalid_list = [' ',',']
@@ -39,58 +42,40 @@ def check_mr(text):
39
  else:
40
  return text
41
 
42
- def get_info_from_bank(img_path, pdf_path):
43
  # Running the model
44
  ocr = CnOcr(rec_model_name='densenet_lite_136-gru')
45
  out = ocr.ocr(img_path)
46
 
47
- # Data
48
- bank_data = {
49
- "name_on_bs": "",
50
- "address": "",
51
- "bank": "",
52
- "date": "",
53
- "asset": 0.0,
54
- "liabilities": ""
55
- }
56
-
57
- # {
58
- # "Customer Name": "MR CHIU CHUNG YIN",
59
- # "Address": "FLAT 13,8/F,OILOK HOUSE, YAU OI ESTATE, TUEN MUN NT",
60
- # "Bank Name": "HSBC",
61
- # "Statement Issue Date": "10 January 2023",
62
- # "Total Asset": "7,265.80",
63
- # "Total Liability": "7,265.80"
64
- # }
65
-
66
- openai.api_key = "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"
67
  invalid_list = [' ',',']
68
  data_set_1 = []
69
 
70
- pdf = PDFQuery(pdf_path)
71
- pdf.load(0)
72
- text_elements = pdf.pq('LTTextLineHorizontal').text()
73
- text_elements = text_elements.replace("cid:", "")
74
-
75
  for item in out:
76
  if item['text'] not in invalid_list:
77
  data_set_1.append(item['text'])
78
 
79
  completion = openai.ChatCompletion.create(
80
  model = "gpt-3.5-turbo",
81
- temperature = 0.2,
82
  messages = [
83
- {"role": "system", "content": "You are an AI assistant for extracting data from bank statements. Uppercase and lowercase letters are the same. List results in a dictionary format."},
84
- {"role": "user", "content": f"Extract data from the following 2 sets of text: {data_set_1} and {text_elements}. (1.) Data that locate in the front part of the text: customer full name, address in Hong Kong (including flat, floor, court/estate, region in Hong Kong), bank name, bank statement issue date (verly likely to be within 1-2 years), (2.) Data that mainly locate in the other part of the text: total asset (including investments and deposits) and total liability (often contains DR and includes credit card but might be zero) of the current month."},
85
- # {"role": "assistant", "content": "Q: How do you make 7 even? A: Take away the s."},
86
- # {"role": "user", "content": "Write one related to programmers."}
87
  ]
88
  )
89
- bs_data = completion['choices'][0]['message']['content']
90
- print(bs_data)
91
- return bs_data
92
 
93
- # get_info_from_bank('hangseng_page-0001.jpg','hangseng.pdf')
94
- # get_info_from_bank('hsbc_one_account_page-0001.jpg','hsbc_one_account.pdf')
95
- # get_info_from_bank('boch_consolidated.jpg','boch_consolidated.pdf')
96
- get_info_from_bank('hsbc_one_account_page-10001.jpg','hsbc_one_account_page-10001.pdf')
 
 
 
 
 
 
 
 
 
 
 
2
  from cnocr import CnOcr
3
  from pdfquery import PDFQuery
4
  import openai
5
+ import json
6
+ from dotenv import load_dotenv
7
+ import os
8
 
9
  def validate(text):
10
  invalid_list = [' ',',']
 
42
  else:
43
  return text
44
 
45
+ def get_info_from_bank(img_path):
46
  # Running the model
47
  ocr = CnOcr(rec_model_name='densenet_lite_136-gru')
48
  out = ocr.ocr(img_path)
49
 
50
+ load_dotenv()
51
+ openai.api_key = os.environ.get("data-extraction-api")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  invalid_list = [' ',',']
53
  data_set_1 = []
54
 
 
 
 
 
 
55
  for item in out:
56
  if item['text'] not in invalid_list:
57
  data_set_1.append(item['text'])
58
 
59
  completion = openai.ChatCompletion.create(
60
  model = "gpt-3.5-turbo",
61
+ temperature = 0,
62
  messages = [
63
+ {"role": "system", "content": "You are an AI assistant for extracting data with following names(bank, nameStatement, address, totalAsset (only HKD and represent as one number), totalLiability, statementDate) from bank statements. Uppercase and lowercase letters are the same. Store the results in dictionary format"},
64
+ {"role": "user", "content": f"Extract data from the following 2 sets of text: {data_set_1}. (1.) Data that locate in the front part of the text: customer full name (it should be a Chinese name in English spelling and two to three words), address in Hong Kong (including flat, floor, court/estate, region in Hong Kong), bank name, bank statement issue date (verly likely to be within 1-2 years), (2.) Data that mainly locate in the other part of the text: total asset (including investments and deposits) and total liability (often contains DR and includes credit card but might be zero) of the current month."},
 
 
65
  ]
66
  )
 
 
 
67
 
68
+ # bs_data = completion['choices'][0]['message']
69
+ data = completion['choices'][0]['message']['content']
70
+ bs_data = json.loads(data)
71
+ # for data_item in bs_data:
72
+ # if 'name' in data_item:
73
+ # bs_data[''] = check_mr
74
+ # print(bs_data)
75
+ # new_name = check_mr(bs_data["nameStatement"])
76
+ bs_data["nameStatement"] = check_mr(bs_data["nameStatement"])
77
+ # bs_data["totalAsset"] = bs_data["totalAsset"].replace("HKD","")
78
+ # bs_data["totalLiability"] = bs_data["totalLiability"].replace("HKD","")
79
+ # bs_data["totalLiability"] = bs_data["totalLiability"].replace("DR","")
80
+ # print(bs_data)
81
+ return bs_data
image/DONT_DELETE.txt ADDED
File without changes
imageSegmentation.py CHANGED
@@ -22,34 +22,42 @@ detector = vision.FaceDetector.create_from_options(options)
22
  def crop(
23
  image,
24
  detection_result
25
- ) -> np.ndarray :
26
- annotated_image = image.copy()
27
- height, width, _ = image.shape
28
 
 
29
  # Here assume we only detect one face
30
  for detection in detection_result.detections:
31
  # Crop detected face
32
  bbox = detection.bounding_box
33
- cropped_img = image[bbox.origin_y - 90: bbox.origin_y + bbox.height + 30, bbox.origin_x - 80:bbox.origin_x + bbox.width + 35]
34
-
35
- return cropped_img
 
 
36
 
37
  def auto_cropping(dir):
38
 
39
  files = os.listdir(dir) # list of files in directory
40
 
 
41
  for file in files:
42
-
 
43
  file_dir = Path(dir + "/" + file)
44
  abs_path = file_dir.resolve()
45
 
46
  img = mp.Image.create_from_file(str(abs_path))
47
 
48
  detection_result = detector.detect(img)
49
- save_path = 'saved'
50
 
51
  image_copy = np.copy(img.numpy_view())
52
  annotated_image = crop(image_copy, detection_result)
 
 
 
 
53
  rgb_annotated_image = cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB)
54
 
55
  return rgb_annotated_image
@@ -57,4 +65,6 @@ def auto_cropping(dir):
57
  # auto_cropping("image") # <----------- !!!!change address here!!!! ------------------> #
58
 
59
  # The current problem (6/2/2023) is that the model may recognize some cartoon face as human face,
60
- # my idea is to use another model to classify if the cropped image is real human face
 
 
 
22
  def crop(
23
  image,
24
  detection_result
25
+ ):
26
+ # annotated_image = image.copy()
27
+ # height, width, _ = image.shape
28
 
29
+ print(image.shape)
30
  # Here assume we only detect one face
31
  for detection in detection_result.detections:
32
  # Crop detected face
33
  bbox = detection.bounding_box
34
+ print(f'bbox {bbox}')
35
+ cropped_img = image[bbox.origin_y: bbox.origin_y + bbox.height, bbox.origin_x:bbox.origin_x + bbox.width]
36
+ # cropped_img = image[bbox.origin_y - 90: bbox.origin_y + bbox.height + 30, bbox.origin_x - 80:bbox.origin_x + bbox.width + 35]
37
+ print(f'crop: {cropped_img}')
38
+ return cropped_img
39
 
40
  def auto_cropping(dir):
41
 
42
  files = os.listdir(dir) # list of files in directory
43
 
44
+ print(files)
45
  for file in files:
46
+ if file == "DONT_DELETE.txt":
47
+ continue
48
  file_dir = Path(dir + "/" + file)
49
  abs_path = file_dir.resolve()
50
 
51
  img = mp.Image.create_from_file(str(abs_path))
52
 
53
  detection_result = detector.detect(img)
 
54
 
55
  image_copy = np.copy(img.numpy_view())
56
  annotated_image = crop(image_copy, detection_result)
57
+
58
+ print('hello')
59
+ print(annotated_image)
60
+
61
  rgb_annotated_image = cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB)
62
 
63
  return rgb_annotated_image
 
65
  # auto_cropping("image") # <----------- !!!!change address here!!!! ------------------> #
66
 
67
  # The current problem (6/2/2023) is that the model may recognize some cartoon face as human face,
68
+ # my idea is to use another model to classify if the cropped image is real human face
69
+
70
+ # print(auto_cropping("image"))
model0.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from cnocr import CnOcr
2
+ import openai
3
+ from dotenv import load_dotenv
4
+ import os
5
+ import json
6
+
7
+ def model0(path):
8
+ ocr = CnOcr(rec_model_name='en_PP-OCRv3')
9
+ out = ocr.ocr(path)
10
+
11
+ print(out)
12
+
13
+ load_dotenv()
14
+ openai.api_key = os.environ.get("data-extraction-api")
15
+
16
+ invalid_list = [' ',',']
17
+ data_set_1 = []
18
+ for item in out:
19
+ if item['text'] not in invalid_list:
20
+ data_set_1.append(item['text'])
21
+
22
+ completion = openai.ChatCompletion.create(
23
+ model = "gpt-3.5-turbo",
24
+ temperature = 0,
25
+ messages = [
26
+ {"role": "system", "content": "You are an AI assistant for extracting data from HKID card with following information \
27
+ (name, HKID number, date of issue) from HKID card. Uppercase and lowercase letters are the same. Store the results in \
28
+ dictionary format"},
29
+ {"role": "user", "content": f"Extract data from the following set of text: {data_set_1}. \
30
+ You have three types of data to extract. \
31
+ 1. id card holder full name (it noramlly is a chinese name, including surname and family \
32
+ name in English spelling, and it may be separate in different fields in the data set for surname and family name \
33
+ sometimes) \
34
+ 2. issue date (should be a date with month and day, e.g. 19-97 is the required format, but 26-11-18 is not \
35
+ because date of issue of have 5 characters) Only choose valid format!!! \
36
+ 3. HKID number (The standard format of HKID number is @123456(#) e.g. A123456(7) is a valid HKID number. \
37
+ (a) @ represents any one or two capital letters of the alphabet. \
38
+ (b) # is the check digit which has 11 possible values from 0 to 9 and A.) \
39
+ Remember to include the check digit with () \
40
+ Only reply a dictionary. No need to add other words or explanation. Use double quote for dictionary."},
41
+ ]
42
+ )
43
+
44
+ data = completion['choices'][0]['message']['content']
45
+
46
+ print(data)
47
+
48
+ id_data = json.loads(data)
49
+
50
+ print(id_data)
51
+ return
52
+ # return [name, valid_hkid, hkid, issuedate]
53
+
54
+ model0('dontTouchMe/IMG_4499.jpg')
request_json/__pycache__/sbt_request_generator.cpython-311.pyc CHANGED
Binary files a/request_json/__pycache__/sbt_request_generator.cpython-311.pyc and b/request_json/__pycache__/sbt_request_generator.cpython-311.pyc differ
 
request_json/sbt_request_generator.py CHANGED
@@ -51,9 +51,8 @@ def generate_request(data):
51
 
52
 
53
  def split_data(data):
54
- request_id = "request1234"
55
- # token_id = generate_token_id(501)
56
- token_id = "12344321"
57
 
58
  f = open('data1.txt', 'r')
59
  with open('data1.txt') as f:
@@ -62,47 +61,26 @@ def split_data(data):
62
 
63
  if "avg_score" not in data.keys():
64
  data["avg_score"] = "0"
 
 
65
 
66
- legal_doc_data = {
67
  "endpoint": "SBT",
68
- "apiType": "store_legalDoc_verif",
69
- "requestId": "request_id_id",
70
  "date": get_today_date(), # a string
71
- "tokenID": token_id,# a string
72
  "docType": "HKID",
73
  "nameDoc": data["name_on_id"], # a string; lower case with space separate; e.g. san chi nan
74
  "docID": data["hkid"], # a string; with bracket (); e.g. G908833(1)
75
  "docValidity": data["validity"], # a string; "True" or "False"
76
  "dateOfIssue": data["issue_date"], # a string; month-year; e.g. 07-81
77
- "matchingScore": str(data["avg_score"]) # a string; e.g. "0.957"
 
 
 
 
 
 
78
  }
79
 
80
- bank_statement_data = {
81
- "endpoint": "SBT",
82
- "apiType": "store_statement_verif",
83
- "requestId": "request_id_bs",
84
- "date": get_today_date(), # a string
85
- "tokenID": token_id, # a string
86
- "bank":data["bank"], #
87
- "nameStatement":data["name_on_bs"], #
88
- "address":data["address"], #
89
- "asset": str(data["asset"]), # a string containing only numbers
90
- "liability": data["liabilities"], # a string containing only numbers
91
- "statementDate": data["date"], # a string
92
- }
93
-
94
- generate_request(legal_doc_data)
95
- generate_request(bank_statement_data)
96
-
97
-
98
- # demo structure of the data
99
- # {"password2": "chingfuilau", "username": "Allenlau1111", "password1": "Allen02118173", "date": "2023-03-03 00:00:00",
100
- # "credentialId": "testing123","requestID": "test_statements",
101
- # "userId": "7893456",
102
- # "endpoint": "SBT",
103
- # "apiType": "metadata",
104
- # 'tokenId':"500",
105
- # "ipfsLink1": ".",
106
- # "ipfsLink2": "..",
107
- # "ipfsLink3": "...",
108
- # "membershipStatus": "1"}
 
51
 
52
 
53
  def split_data(data):
54
+ # request_id = "request1234"
55
+ # token_id = "12344321"
 
56
 
57
  f = open('data1.txt', 'r')
58
  with open('data1.txt') as f:
 
61
 
62
  if "avg_score" not in data.keys():
63
  data["avg_score"] = "0"
64
+ elif "similarity_score" not in data.keys():
65
+ data["similarity_score"] = "0"
66
 
67
+ sbt_data = {
68
  "endpoint": "SBT",
69
+ "apiType": "store_img_verif",
70
+ "requestId": "request_id_1234",
71
  "date": get_today_date(), # a string
 
72
  "docType": "HKID",
73
  "nameDoc": data["name_on_id"], # a string; lower case with space separate; e.g. san chi nan
74
  "docID": data["hkid"], # a string; with bracket (); e.g. G908833(1)
75
  "docValidity": data["validity"], # a string; "True" or "False"
76
  "dateOfIssue": data["issue_date"], # a string; month-year; e.g. 07-81
77
+ "matchingScore": str(data["similarity_score"]), # a string; e.g. "0.957"
78
+ "bank":str(data["bank"]), #
79
+ "nameStatement":str(data["nameStatement"]), #
80
+ "address":str(data["address"]), #
81
+ "asset": str(data["totalAsset"]), # a string containing only numbers
82
+ "liability": str(data["totalLiability"]), # a string containing only numbers
83
+ "statementDate": str(data["statementDate"]), # a string
84
  }
85
 
86
+ generate_request(sbt_data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
saved/DONT_DELETE.txt ADDED
File without changes
sbt_request.txt CHANGED
@@ -1,22 +1,34 @@
1
- endpoint: b'O5LDGUcUb6knu8lOvKTVd5EmjI/9yH6P4KraYx87dQQqJxJ2rmKUgk+qxavAJMFOWfhEuMwZsFB43Y3A+F6QoqBBXZfj0e4snOHWZKXEKeKynlmIR7c/Cy6bEK9oUrkrSOkPd5CZZE0/BswZMfh8XidW4GQTBo6bybHkBpSKYpNkS9W8GaJLWqnDEEKFI+KSS3gsE55PEOFvKQgB7s+icT+GF/2l5vOWKqzc8pHFFH9nyCF63zRLpAVe+ZEB52dAV148SJNlPNsr4ikQLOqOOPCYF35c5lbwGyZPbib0+6pc9Z4evxPUqzAAsrSmkwUr/c1Rqg+go4vnhJ3EfSjGbuMAeMJLdtLNzglYw/93rKYxG0Jc/w/4RWS98PHPGLA3GEvoD30leOfwS/yO8N0Cb7v9u3XwqIy/FNcdDOBii9GOmXCLJn6u8KvbyfhsBM4MtiqwGuPIBB/Wu9AVVqJGGT85YgrnWBnJfoMHvYlhEM3xEe5jSkeMz+G1h7vXye2wdKKc7jmt748i/8QRZ4TsFUcauU2V2OPD6eGcrjg4MKma0Nu7UKQyUJKXg9azDdn8YhMqzfGQ8vkoj3hQJNTIR7aOyXaACjqEpG+kWC4mCjPciYGowzp8AmY+QK+KSRjwpWJr5uvBCtqFH8OwUZuS5IAaqMbrBsufLdh7XIF48cc='
2
 
3
- apiType: b'Tg6rz5RJnLaseo/MDn6Dz6ui3MG2YKQYdbwteCT9WdN++AyLXRnxGepXuR1i5oEqmTpxdZsA5mUnJBEjIh6EDJAXgd3jG/0Dy5p7TGe62Rmla3EGOkdjHRNWBSoXYOeivLxdH+lTe6x3+bzc7/cgbDjdJ0auBkid+avLZM8KtV3vGtOjHey3D8I+7foBzALse9TI71LHThKIg3Hjeq6dj0SLkkaz/DiLFS90JXa9Ip5NZ+CGtJ5qmDLfqjBpOLLZ06aXhEoLgUVwk4vf/d5DAOxsRyhPfK3wao4UwaVtj+NO3bR4/tQfCQ2CnFNvrr7GQfXhqGx1Mh1yOoyClZPCj8I+r3hYYrmofAR9PQF58qkmuuxAiBTqNlZHyQGmbRDOUaNBPy2Tdr67MrM43zmAFysT8HJBUCcLXWv0FcGKCqFv941usFuboOaoRH9ib8hRqvt757arQPwdmIxyayNkOWgWF3kQTt9iSbleWk61l06EUgdX6qOnAdEDHLJxPYGdrGHVwuRlT7Sj0RgkSviaom+0xcU/T24auDgZj/OjMCibz/QrRz1Ap9RMnteBGjQoYXiIhyBPe7p3Q/6h0PZCyCqSH/IDj4I8ar0FNtynkuXKggr/GWUNV7xkLS/xujhXuziBwTedR1LYdmth99h40CzamqmYL5cj13WtUHfmFk4='
4
 
5
- requestId: b'Ew+AqWhPC5HmyJnAfltVVOcmDvYMRxV4dmBxlel2+AFQmnCVWY2gTz3KrTnvGan+xONkFyDRQV5Xcmm9coTRuXLqNHxQcNz7Iu/m8ipLUrUr2Kofa5/paAcD5x7I/nYgb7UNQ2qWzpXo/vfw6syY9vVK49ZhPfi2tLYY3gEqIQojQvmkhC3qzTrbQqKSQq8jWQ9crNGiDvrfHhPfPG/hdNhwhluiznhTFGJkyeh8d2lVwUtqKN8MMSinX/Jaqs7hyS3FIGILynPwgyASkKGLPFpqAmfemDPkbkzybHMThKRBcebUPdIMpll88nT73vBwslwr0bTvat4wOZQHjtVs/JDzWXnYw2zO/ljWzcl4RSNcJzKulUhEV6NInIolRQ30mTvxihJIojOT921zhlaHx6fp3FP2Qw2lHHgtAvY4L10R1Qxtmy0Xm7vx7oELrFjKZN9+w8h5ofvNqHdkE7HafF6tYiIPm0bWaWM1y1Z7+znrr0at7/DfEb+a+aUchC5xZ59zywi5cJCNGl0f1dEVQ8Ywl8+PMpb/e8PkEghl1MYXecbp++FhDbWK4Db5OL6+9n1WDSEkkaZ7wwo/SdSjcoSrIbTeVp6BSMTryMozADAWt2ETQzMPI3ts/LVBIbae1Qur8zb3tbx8H/Yeg8K4yLzho/PXDQnM1UNdR93uRps='
6
 
7
- date: b'CuIu6PiwXFE6zH9VRTYTmj0zht4nKQtD0CMB+hflSl4NhhzEEodA0ULRu5/ah+u+Uw24vuK+n8YrlGGC1m1j8s6axbHMwjJnfMY7ZmDcDr6ITHINKpjUpD8KSunkjNReqhQCsPnoyPFTPYc6lA/oHA9qURrE3x5OeaL3Bt9iCwTqLFslHOJzI4E6hPDoLUl0qUu7WDwYKN+eq1tsvO/eqe2Eh8TlYGCPXvipXIu3pHp+7blNmeUn4wqOEIOnUUwvQPRNemKeKZWaO/j9kGMO0T3d7CjKE6MlbPJbj0EfCEXM37qJyvQ4r685CZwcC5Xa8AkiYarbHg0adcdCBHi0GECAUzKvlgEqcTolHTTgZ5orFNmV9K6NYqteXkbIgPQiroHDImcWcu9tjTRsmkuX8eCUSUlnBB+1qD1ggHIrk6MbJhAyDSNl4+PPm0E81z7mMJm/Ho/4ikQANz9Y9cvXLhYVVOkwmzaD1RW0I9rSnGtmW2elJVvgOQfi2Oh+nHTFzZs6UzuiG0F5JrbRnT3A8AxcWbiK3mwM3ne+0NKiF+rMSC6HsnDpE7KCNpg1GtGNw/ChXBQBkLBCxiBA78FkTmG+n4UUbC7FpPPPDZkR16o/CjTOVKI7691EcxxTU+jYPpxkB9Ul1kBZcA5ayyOL+wC3BiXi5CqjDcCbwmFivRk='
8
 
9
- tokenID: b'cYLpI2tweuOUayPSzS/SIqWXPOMbpJinzXNTCBUo3Ew1taoLmb0jJQb4VKk1iRrD5CjnO9jS7CLyFJ1OLGsTNHyiN1dc/P2A5jGbCKyq5OOoKVd+F6IamfHy2YMTGjb4nDP8dMfeUlz5R0FYx3yiuQ3oY6CHYAm/TKYNLaSTu5JXpC4FrVlsVnExDzgsAlMZN52qtVNgKyaO4dC1NVzdlMMBA3Wqiu1+d77fupWl/mQTqWSw5BR2pbe4yZYEBskjW2R7d7xl22UxOOar2tY9yRYKbDB1K6M/00Lm9ApkFCXruz1K7/kbz8IPi/p79MSXpdBLQko4/Z5hxZ13+Jc6QeAqcgiENC5wjYdeaJMyYOGvCc/6XpFq4Pso3D3y3j/qytm/0S4QW4/OWwgQj/DggU9HU55I8HxP//SfHbO6CBVRwB86ym18YV8Y7z5pnK3urZBQYE3iVSfhB+dj2XOzAE07uKHNMKlouKzoBVq5voaYVvBFf2odVFDRbsoa1chJxGcr08dARLZ6lt8ZyKsXqNj+EKxVAy0bHOU+F4ZyIaQyVg8p2jUyOAH2WZwlsmJ8yg0f41ddlyHwIpurCuRa4L4wFlRblmrSH8hxjWIwKfqhz5CJSknvQGYksfzic0vv6nVRXMx8jcdq/HGNn6iiPZw4hluVPIwF8j+h4zsA/Wc='
10
 
11
- bank: b'LbIOvIo9qYAb4jJjczhspqbR1jPArtkyKqfE2oM2LuA/d1oZTGf1VGgQyCt5T8m5UAknAZu32A1eCOkPx9B0xLW5lYuEvnRgeyvNW46FM4Fx/iFrLns4lClV2ivqJ+cWV2BV57u8p1Tj0OoVbVPSl6v0T+Pxg5YFJvNjU6+w2lRL63Q4WiKFKVCHfdKbxzkfCE3UtK+GTm9D/d28O2ql9lRBFFnsBvRCYa9MhWSx50CNv81kC1J0tELyejtVxrlnW24SVVt+S5v6kV70TVHYfmkoeh5gd8XJlGEZ8Ww30UoYsos5pgaywvgGhcKMtHF5gI4cAY9Q4xlcEpUcyFQSMB76+HmX92fqUxAfVJRCG1iUrFP+vswZ+eWvx0LCXG8Wl7tYSWzBD9NmKADiam00ODV1f/woWFp5GPr7eupUSPZhI72/I3s4u+m3bvtFFEFJKIeCAX1QcmZL6r3KKsp6PSDR4G7iCk2lbhpLW2icasYy93ziDnUn5FozgPrnjasSRxDB83yOqw619AFHMTlFeMfsFEKAz16JzBUz0Bi6PujEfKZ/QDLFK6FyNHicNaAPtpsci4PlMLSvoOVV5Uf+lIj2yBNYyMfSYda4wYJ+djk8GhguN+Cx/UWZcnTP/C8CS0wk8/iVVkSk+oAKM4esLDDgocl4AhbpuLbfB4XIQ9Y='
12
 
13
- nameStatement: b'KoojlgYLT6dN8GI2DB3TDX8QKwZXgGYZUjX550icBht3gFn6hkoejR4dP4TivuFhaVb1muER0QTyjWog5N4yT9iWJHpfXV8j41SrFst4QcJO81yoxTY4nguLg3Ie56RYk6FQgz2dnKx2WPs/M+Hgc+d+mGlb7mjPtmD4zFVJJHyDaVeUdWWZWVVd79yIbFnNleCWxPjByfsGaDiIGdkoZApiLFknBn46q7/JR+McJXYfPajPZZK+pOExGHj8TKPS9nEuz+73POX1fz/faRWGYDtqIcpkoi1VoSKY1cN6G05NM1N2KAOmxPw4rXy5bYWhNNWreMQNH+P03QQQUiJgo94MMph5aL5G+dc/9ei/GLwZe7icMu3/O4S3y/ZTwOwt8nHu1QQukKEJxzvr11YChUGO/Hz9c3vGvxOvTzaYJhR9bSsf9tB0iVXtatsk6Ng6kOCFICatLFd8VDOxqiVzzW8gxP31AV0xsInDKbsjyOKdIvwxPwFX2+XEjWQ+YyTFuw16DzJniCPP3+eFN0VBK6PBf6OgeEJbtb1yM6hB00ny5JWoau55LIajHEV6/wp3fr81V151+0UdXzTtmmXCy+pTS0gqvc4Nm2O9Sar/iNpiwpyJqd5oxDb/RRcLExnFBGo21vh7vILAdR4ajeX7PPZhVcYeAMMvwOcqSit8G+o='
14
 
15
- address: b'bj5oVnpsoxk2n33yVxia8QhDEI8fRxKMFsLvMy6EaQNKwolxrp1d2at4t+u0CmvkmTfCcgYd1Wr9oaJ6COmJfJNIwgy/ftZL/s+6PMzfBgapt0yDSBSDM4vtRIlIKnz4+BXCSDwwpqpCMo5J0OdEJaa4K3qf/9S1B1gf2rotmbt2xo/Ipsf7lfG2Rk+gSiGcs2bsMBBPBIqtTgf7SxXKk7RkOU58nPz+mnZEjdQgIsrQFJqII6+Esw7puTmDtrgGLokTSL7IXqtPw5V7Z/XXbqzImSuEc7wpk7ky4LU48wJ+bGe3JNosOB+c7KEdbiz4gIC2LCu8lw8kWbBaqsfXUzX3QCw+9n5g7xAw0VdrF7B/e3G8pCrmgoJis4NQl5Jy30M5pe7bUbgxwLY9HGmWzBgMGHCcpmrPO0ADpOM7QwdTPmevA482ZZTHzlny6mp3wWJAbomnuXJ8NvFnYMWIq7eImPY9mAKxjYzrX2fQ2qVr6Yr/T1bqErekvKz0V9eTrD+xu4nLyqmXe7/sD5f9yPY9PcLVAMMunBwf0x+aCsX1bqWVLPe4HG0nZgecZizH+Qx2Qv8PYWqBugqXTAVYNcS7vGnrsgoUjnWjTUTTIB/VoZCCtxcb0oDazoZbqJ+8+kv/MRsO/qdQd7nDbnU1oYg/RLfb87PXGQWYk5I0qPc='
16
 
17
- asset: b'JAu5OeWuUZJAh7+8isnKhlUNaXKtXgmErqrUy0+YqOpY4P0nSKCT0DH7nmX1DxHn1ezZBMFkM+wcO/9a/9R4WM2CABl4nLYzuu/y20dzdl2w0MqzHla8OaIQIoBDms/XPaBiLByAVDjSm2UCt81NG25VXXn8LNMfmjvCXe8VBa0fBgZrNnPfR9YF8GRMBMcctSHiPrXZuvIHEvcHhbEgHa3zo7Gq5ApPqFrgiVwmdRkFjf5DxRvsUXPlqeUW7nKD0NLWIAUlM0L0/PnQpKLmhexYU5qgsSXngFH+vm7NBWas2AQXMKX4WNWCLqlkljIQ3vlHh+Q7lZZW2C9Y39tz0ghC8eLzygTGbnxxzT2bSk5YJMLy8Gm7toJTGhL4KoMEPO3LC/C3mnVGo9Qtdenkt634d616AILlMwWhf9SuhIPbygBegmrcxdtjdjDHbGopztn6Urcf7ai2+5dPVTcyWOiwZf6AXsGqf9TTmK/DLWVV/MSwRUPDY/HKCPwS6o2Bcm86MNJFOru4Ez7qPIArvjjgQMdYFXeO/C373oOX+EBFxLrui8Tz2t4SNzNGy+Tw9kVZDdJYUyoE5V6q0VFLmrE5J8TFKNsTz2LYIdkV7GIMITavvUqpieXfj5oJpNDA5CSTXl2OQmArLzbO10VETsizw5JPrFD5+LAGvsMLfMw='
18
 
19
- liability: b'ND9CxqaBZFqxPImrnQKpCsY5pW74y5cRIbfxsfwlrOTnPGKXlllPXNqgwOBlJwvK/5xJA3u7Taj/EGpyRwvnH0JUwiWRWt6i+JlKD6OuRwYEhbvYZwWZPR6EFkI0C97Yusk9dJHZd6AAm84h5tPSnx1p2Z4g8IthGgrP1eLKTqK1v/cc+f2ldkKxM/iaEDemFB7LzVespjkUaE+nhfoG3wk7v1+BPSZt5TK1zwYDj1JdGP5JyuzyY6Niwyk9ThcHnuyJ8qMby7qsod1drvdEESDI0d0MrwYzVJBekhYQpqiOOYpnbkxXAlVkFyI2L7DV4+ue66MeFZfsad19FGQA9DGvCdXMAx+CvigRJyRG2DFMOZ7BgiCAwFJQAj+ayma8C0mHpeJChrt0i/ZOskEG6rjVtLPeQVvdvuV1NX1PDSpVeFD6ml/ZjN0BmIJxofbxRm/CiffJArbtT9dpVPneY5/nJuRF7JrLSx4+zSlailj7RHdxEsy8fq7x78Zl3jPhmtn7P0Gnd8+epgo+66IqBIHOSUpy2vM12qq44xaxX+bVChg6CGBp8fgwo+49/7zqWQ0/hXi08/ZwAnhcwlXBVi/U7zJsW4vTS0ZaizX6uv9oy7MMsDaqgvmqUr+XHUV2xQPGjswrpUeNhJVjMd+598+fqsGeJ8AYXkSv34EET0Q='
20
 
21
- statementDate: b'LqkAFp9Li6qYamXMH8UUndrGhquJrtNXdMgG36EjZoFF72ire4Y0Q/S9q4HnKjOYlobVxxsxvkDdUiWxJa6uuuULMkabaozKLnDsvSfZCMDZcvEEkxrTqYU00fetcVCFieS74p+kS+a3a2Ohq2zlnHZEG6EmviiBPTP8Q3wGpQXYkJNQ8cPbum77dvYLabSUu61xUU0B6Dz5QHI5W/6NcFFgT6kB243eHoK4L3qJGaSm5cuB10iU62PY8HYoHTeKQ9Ve59qu5KPyNj9VwoD/tZebNpnveIRX68T4cmkGW3PW2FeyL27mxOsCMQrUCI5oxOFstO27a/Yfqg/CwRDnfYr+8Hm7vohB0bW7hSD6dfCFWfv4j6RdQ/T5ToF6jKmkBPiv6IGyR7LYAb/zr9drgSRxVaoR9gZMv+yq9xPJENQbDUV2P9N8ypRqS/+hNIMyx58TpnJc5st7J+hehWJT/5A2M59EjTVt1mwYLwbp9rjxevRoe5YZDxepeFCrS1gvyg8J0MJfE6J/RWogEAu9UTkgpks0s4NaXPGb/hMs4wMs9xsZooKjtH7p/sDTR8JlewVcxirl/xg1lxKELFd1qy4AVaFR2PAd8dj51t+7oZf+WCh+kODepzGgo6JOiBhAHAKuRd5lUT9J3gg5vZzaKEoGliBlEj2tNWDvVpbXUI8='
 
 
 
 
 
 
 
 
 
 
 
 
22
 
 
1
+ endpoint: b'NbBxyA/6/7XAzpk8HYqkN6D7k7yD8/c2e53wyblTHcVg0D1EgYnZodoTuvDpEMCkouCyk80xAhIynxYa0lsRfLsI4438OLHuad/gvZt29v1cYqxVYqs+udA/Hd2zbsr4zYKORBW+LJhj9u+8zefNEGCstMK215eeht0ZpxLlgQMQdApSkPsPJnYO3VLWmBiV7txwZGhkqtOvE+cfsUEsulSpdhpRvcj5IGMNrF/14gLKHWjxTKj45At2w1ToBzIwfPDPrF9QA4mw6kUs25UFoSOFOo5/OMbW2iSDpYFE4UHj74Z75hzYJWdWlDTKLZUDWxwOF19CGSbKNxZt5Fsr3W2k17W5D5CG0skkEl4lhCyepoyGNWJZCNJ65PRFcUYyT2JqWRQM7htIKgbs1lZlHl85go3Z9ULNf3CgJeIZGwo61a2jwDPB7lr9LmpVk6tz5HLUNaKpyGZaebiB2J/Fk7MyrYd/r5AB8qLgZXgp61YRFv7bFpZAKIRnMTZP1FH3ZLR8CE9sISWxsx5WipGmSI3FmTASg9Xf7PBdyx3+GChiP7bxmBCDhQ/BqOn45ULOIyHURQ+im4O9B8MEx0XlWhilK0Fz0LtXnT0PGG2GdTE/B9kaEd6g6u8oaT4+3WeyZTN3Jtae0yFQ9iKnwPoR45LtXejfvQgaqAjauUnFDBs='
2
 
3
+ apiType: b'fytDJK29mmABmyNae11+Sn21WO41siR45mS8s5fH0KrHcgXJPIobqC1LuhV640z0AmdKvr5m4uLjCQYG2vsKLPPT83S872E0R0W2/5pfwoTgn0ezMnhHe9Gcfc3dV9pHcg1JgxQ2+/RJcajzT79X8QkQnGMVUkYj+8vy0RE09to7kGkzjsnnzYgHkewsnt2IKLSsrMgVbQte7QdVSSguq/z3C6EinEKkwN9GjrOxeL3vFGgf2Qwpx62kLOkFVPIqqzAG+/HU3psAMHXUf9Rea35wRPLDzZh7REAgZChTQ03qwGYwYMqEBrUjh92/4bZkZYYQmE4vc14gZCEFMnZgfMpa7r2kpsoiwwb01BF1WErI50pfLm/eeBw1G7OpNmVfNi04/7kc1Xqvf1Co9iNXNm9sSnwk9Vs8F7LC+bFe2ZKaAcQ7WBqPJefirJzUvz/0J8dqmfwm1m3/jIpQ5sRg6lrxd1omo5jeAspyx1DLTFqu61/o+H/30CeakVCkcbmsh53s8DOpRml7MIGKJQa3+u1tKo3VaCuiuoeDaQgX6M9sJaFw4AIAAl01Va7feqi11P3xFVt8y7dQJmfTwCBQxmFQwFAU2g0Ts91yUkSnD0AKMn7S0watnKtcJnd+33yomMm2AFhcljrlp/U+bztBueVShU8VZyd8LcxWAcPaszA='
4
 
5
+ requestId: b'HngLFH+4oanRITJPMA1Ub9tV/fbI9i17SY27qmqoxaYYtK63S9OhUg+OOIMNBQMGsjJ6pHfWfAB89xFDJ0QM3fUvhd1BHiTpyGnL5mqwoqTPouTyRARyAX+6LgEwe/vOX/LJGE1GbGjzV6RNKNn2cHm3Va6zGMMvensZkuW3cs+GbYuv5+22pk1Sot4jDl6uX0/CayqDchU9XpMo5BgbXDth0Xve2LtfKMQrKtWQ3926Dr3G219F2T3xIaQCWcP6lo0scK2Uk4RJWdHDl9Znvx6pjh6NY+ygHzUvbdBnrjTII6OOgBTPIslmhiKr4u3QHwv/zqzoQcMrncJ8IlLvJ5RhcwK1gtty+r7ank88TDyhm8pfv3mD31ei503410OMuDveiHLUwkjE3GvLhcbQ3Ukxg8xP5S/DsvWYNkq4ZanLJFS9pkSaEhpz1SnY9idXxVy3mnaNfg/npGk/yUHUx9KZ/i8DkP1RtjnDWFen/pD8Op4/KIx4pMm/+wHsCRRSMmpoqM/SaeqKXjota728eO9fg+vuADHL0nIpB4bQk/mSR88MrAwKnYye4arhH85v5FvaBwttpsuHAopB57+UXvs6e8nD7WjRuWY/S3DU8cFZqw5SCz9GbjmXD2Kj5AA3HXrfwEPrbPKA39bqjg82gd+aCtlfmNAffxM53svwBMI='
6
 
7
+ userId: b'CeUitfymDZUWw3IHsufjBUMeNOovvQ8EA5fKgQ7TfHuYaiuLl2OMUVuTc/BaaCY1JFgCo2ek2gWFBFJ4fzRyYFR8+eYg9GEYIjuP9s1/PG+iKsYJKgPR1iCZPPLtcEi39Ch7J7c9swq3ce67v8omSPqLRA6mUGlNrDc1qJavgTJBqsqLIMDjFEU9AgrUiyoR3VH0HRggavF2t/lhy7i96K97jpTKaE1NCTUc3lrFQkFpg2iLVdQCb5FukAdZY7UsT5KgU/vjZOUdufj6VpAQQe38zatCdnYxgu5q/0UlxZdhGhg4DIktVzVFv350cpKrOaIsyoLNAynEhQRyPXMp01Oa2Ol0BfdQnsBI35s/mY3EgzcxGnGIBLe0uzXcy++/X8aj5d44Ct6sZwCfKe093fWrkT2JnoFiZJ/fzTAWAuQKNUF1V9oj7n7VT7FDc+K5STiPS5fwRKk9LazpB0CxY17I5yFEGTTG20iXzrlqRCXGJIvi3rbDqk3F1oHU15UqTiw6Xq9/d4CsDBQb6RBl1VE0T19EyCLL7SSyvV1N9onYQSGCAGeuQvwPijgDzqfYufHZZ5ET0dAWu5BzGIvEwQqvtrmsDBa+QHeJj3IS+UKQYozsNhRH1UI4mNpd+2xOSklbr82QKYAgqsf3x9/w6kmn9UBCfk+ERSQ2pNIeOC8='
8
 
9
+ date: b'XBZNHz0m2aqUlKkGKobCyEdg8wPcmGYPwFNAvKH+5CXYmjKAd0XsLliR7bDCa+9cHy1USvqJNfQctXQXogUvIqQUvzMYhThfF+MXZx8ZJbMquXwDGslooGDw477K5taMc4vBdjZ044CWkJoLn77hZgUFtFovHxLonNR9YUWYh+eu4k42hhg2pbTJwg5iCWohLS95VnZYAnjcn5zIlLPRdFhl/9o0Tl8vdH2QVRk33+KwK1fDXWJvbG45jD84+rFPEHJh5B8cEAgFRJv0wsB6rbl+WDo51Q1mcKvDj/ZB6nGswUA3LRl1rtE+/Kgl79qkQuUI0WMowEhm7zTVGpsOnQ+KMNSs0PwKeZLX4PSvmuGOAjUODdu6bmpRLY5EK2aAC/fqy86jO1q/0dAeEYuXzjJ21EEuDWUPBc6yVH/ANhq52bR+rD72JrMx5v/3A5xvmdzXqwIBkEJjmrz2YWIt0Fvbu83k9DiC1JcbgIGOgSimwQEtArgUU2Kv6drCZc9IVGM6FrIOkJaDgykwAMHVC432OzS0QIMYzFO3X3AAXN3WHO1iE2W0Ymbq9NX5w1EvlKF8v0LJ+FFwBTujr40eiajeBRhumg5mD77tWwnOoubSas2W2t9CgxnFc4poe3AALqkFbpKYscT401k8oLEQHE83sdhgzMmZkYX8PAuKx2E='
10
 
11
+ docType: b'ahk6WI86pWF6VtFolNg9m3+L7zMQO0cigoUbglHsOl6OpqY8/GlR7neLgFpo5N0DcVXS3icYJ6mNNpFn1Nt0I5V/L6vx8e4WOymskAxgXd+LjYTNKaXcnef1cZDR9P040cicPWNahhZ7ZrgNk9CkWWpa3GYGggLuOM9ZKOXbuQlDwFl8Hv1RH0erg15DuYUC7M6ugSQfRK1bK84iq2ZDDnNJNMQNQlzZ1KzzmXJIK4EWa/JJMwwkK9h4XwZIBExtNVvMAPtyrMIUuHtxay+V3+Qs7g+B4E5maZQBFCqkPbqaU5TmN7HChC95cQGU0mG7LIZ+UH+WbSa3lCijSEHkQ5VbDdPebXib7tA0uKkaICs5CA600p6WFTccj3wZvPpRF4mrMOwz+GgcqBKWJLSy+bjqmqRyKcFApWE4DRtsaL3aJ4uW5wDPbiLWX210EjHanff2FcS+Ab3AA2HM+BZaTsynAmJZ6QXfbtswn4xZr2GmzdYjw1yTFkUCF4NuA+vaaVk8GZHctMSY0PY0vXI7Tk2ZkV2YCClJ9+y8W5nrBQTlUmYuFR4c0SQOGJS7g88SXeEju/hKWo3eqbhZ+DFdizBrxrWn+Ysi04QQJElv9oHXw14Kq5S9DBkwR4AV0KH21gJqmyxSYWEPX+/ejrY5pM9MB+Kaf0P/0uqp0Eb+3jE='
12
 
13
+ nameDoc: b'eKA52KyqncSq/E3gOyL9mirv8h6vRGqRiiqXpD0fcF8g2KNwHKSh0z1VsXluWeIOpKMmPpsQv5+zoOCGt7CwIc0ZtkBauu3TYOyzOd6uc2v9K43Dj1adK+UrI46Sq2IQMwXvzdmJ4vBI8DXEFiIT7dzK+bCAKVBY0EAC37T7wEczsiRkiUta/dNWl+bxNmcO7hrUi0rj4M+sVKFhf3p+75jXiNi/QwomJWD2OQ84oh/n4rHl7D0OwdZ5K3KCaHPr30vUtC5JLFFKnEwbTxznXqfBes649RHyf4vxKf/gF4Z8yZwAKUUJ5Ez9hEKzSvG3htIOHDGD+E/LyiFoqJgXsh38XRjl7cQNPn+f9IAcsNQbDVtxQ6qdufRbnoMph3K/OyCckntUruf+lEkiBh7uRWGyHdIFHrBk7LPYtNMg4msLCsrpt1/IRnd7A07MWHRiiJ6K5I+cCOYdc5MHTdfxGiPdHcUkctKTkmmpJhCRsJbRksl+dkows60CaemHDW1aH/aygnhdmO9E9MfxdDqfleroV81zeUl+BtCjXVuFISIcXnOr+hJttV+k/9jvRJZiTfMR2PI8PQKBw0I8uytrI3kMMjGzIVr9uhcXtx0H+VP2hJQkuaLOJfgI1bxLUM4CLgHLxpPxYjr00sbe9LJroVBYgfKmwcyaiD3wu6azliQ='
14
 
15
+ docID: b'R3UEdGUG+omUgvioLJKdaJHYWVO4430ahCViGppAmh28g1S8emz5D12SmTSTiHansmeg1VQzC3AD+J7IcbZKLhtvxQ0fWCBfC6OctTp72IU7FbOZNnrFv8CdwcBFKhnqhV8YUWRFBKwn0GIPl6JeXAwptw6xn/DNQOIUdollyTSEyw5LyKKS5DFVRnYPKakSCm61oaSWech8C/0H8sqVPUHEqq98NIQWuU8fY1XprECBXpfszaADrPDpb34EoeOQnybGYNMiloY5quCUBqExeyOD5/seJWmh0fZuwp2p59YO693fPJOfIskIoWTHVVXev3+0UuFoXuBCQQFUdZ3rrR1aFG5ypD9LBx8MflnL7/W6tT0IiUWWpNk2sEvT7GZcecpt5biUZ6VoL+tINr3cOiRneri5lahh+nnUoAksEBtqRW4u096HsJHJ1SLyUEFV/no520ZjC0VtDN+ahakRn5VJZlKY66/Lwt6bKhXOkxwGLTYVvJLmSw5YQnAEryvHUeHYPyr18fFOZO0hcrYyADQi7Au2NFXlMZ3MTgMQKA5q6DFAGu6OjO75YpK0vJX+1cJgyOeaFe+Mih+0efw7M5LVrmEP+UXu4X3YZQrxZyYfD2ljLZrkeo3mASPP8QmWQGwoyE++BfZgVQo3ePZYg8R+I5zXMq5p/ED80z1Hq0U='
16
 
17
+ docValidity: b'OO6lVIP3D6UJflecoJm+OzNNE8PJW1xhqcDXqCL3SHOYtEYNGQvpZWa6Hr3TgGtLFuXSyPbokv38Pndb+QBv/49AXWZ1Szhj2QajUS6hIq6UImSNAL8p+z5ljiqRGc1G6jzLIyGabgBf3leY6oszJy5GfEwKGQdjKCDd+pZZUVEkHhOHAtpycmpHH7I3W+bkqnB75NU4PmNHK9PgmckY1xNsZrMAMB4MSr2Bix8rlPxkICitCP9WDYV/VXKwuDCXHG0mCCVUi1vvPcLhg1IZNO5//79bJd0FGlrZe13waj4fWvobjSNO9oV/D9kFsensaXDJUduR/ohdIYWQVL5Sx//oK/pcrJa/cwyl/K6UUtAymR88ysleqoCKgcKn/5C7ZxKn36ANKhu72l0Sopbtar7GZu0IxxjmNdkT93K4y+s8xdkzfpelZSM+vmeUZht7KAQYW+iQiyXjT4d6VNju7WtR6j1IfGtXwsp3tc+i5Bu7Kl7026FLqn6iKkC6fF0TFBWR09SHFS1dru4aSiI6qErxw4Y5rxU9FQT6dmVLMO3gXZEg2FiBVBz8T10MoTG2Gu0LvXjmbeC/EzM0bNm62J3N3hwa77W3+RWJBG1FPDmqee7fgNr1gzOAKiTa5Pn9qVctJuqBkBI0wfWVM8uguLK8ibkf/r/zNznpoyFXMYQ='
18
 
19
+ dateOfIssue: b'Rr7anhZoXVS8vmMss8WxyJnugeVgp7HqjrhMxO3raAF3tFSGKcn9cEzcTjSlB2l2LbSyJQCCF+5fqSK5uFLQfn6kcAhwL4zuPhzmxfZBhX6PSPvB3hd5bjWAT8N/mRbrCP/3eWReMgFw/UXYbf/DooD609EgBSyzMpGm81SvYVsQIqDLZNcEEsZEj1+HtfIFS6sTYPllgSn3Za4TgBvgxwEJm8G7b5QL5WCDRetRB4U+OndGl6byvUAFD7tU9n1DE8RxLV+hZN7sLwvkpqXP+6Bm7k4b2kI5gcL2Wyxg0ryCoa00u92aPrDnBH6D4hvFtbJEI9iqFviL2+K7qvgLN1B8S81A9KILrHctfcvn5prVI9pUmu4tpS/aW3QWFlgKxnAX0o0ytDijzLOm8fmiWctID/3ONjFMnEuaqqXdpUv4JaDW/XY/LTUIq4/BYwjaV4mFSY9Qa/FmOIyr2sRHlIJIa20W7MfZKTTQqW+lt0Q2EtsNoi5r4MfXHwy/qx6Huy5Mlyu8A2oLJNkNKIoXcmr+sSzqeHZHmiipqCC0ln/1rm4ZTx1wCYP8EemMyJn7rjad8Id7rMmsxSottVmm3LTzA7oS3IRQaEP/zkqZzSBlVXi44HuZW3EAEAVC6gtklV2avAMsbwBzg8ek1NDzfEG3Dv+KSniSGA0tuJrRVOs='
20
 
21
+ matchingScore: b'Qx3QwN0J3uFFHiS9XIp6P2zmvDTbNv2hXB1J/6fJDDNHBG2VCjhGiaYOQgkifMgu/Q5iBO19WWy0GAEekcaD9dGeacVk0dJRc5GvN4+RHJ25ESJeApuwGokHgweoEAJhydgjqPnnPxQkKTOUlETXdvfgF9QDvzhyhffbzU7rDzrzjhrYc+C1Zf+dsQfGjEboB8eLR9+IgL0lHDWt+BBk8yEk8tRA0XLFS+UNlha2GOajOVO8BcuxsvGg1wEbEYSJcQJXWza9WJ1Wk/wjzAQoGN2uNdE+A7mU+aRrpx4hcTvIIHfz4E0NyMUxLiLv0K+wtmMdjC7xLJD0T2O7VuvgBYgB4Y5wjWNtnD2IRilPMrV4/Dssyfvtl0v2maW9+nr4seXq6b8kGl4Rq42IVYQzZvqqWp1ghERQeRfgTZSPGe5l8aJ5U5Ejf9a9CzATrz018XHPpjevZyNTjmRo6b2+2YNVCTnCkMkrzkUJUowX7Qb+zbYG+30EvL5AJ/n9tjxHC7LwnaLftpMz/ppq0q+SFDEf411pe8I3lrEWoSCguReE5hsZlY0C/TDvN1khrABc+cttMFVnr1UDc3tQcF4Kgp9pRUZuyfUWiApOE+Km72iH2cNgYm/jtJq6kB39Ut3njBJUTn4hAylJ9m58PnQsWB40ZFDdaHJ++LA+YBLfeWQ='
22
+
23
+ bank: b'VoePsCXu7dmzZuwI0JCUGQXXDPOTS4i6g/ZYpp727Gx40wezdusISq59595JlIIQJ2FZCS5NVe3Gf946KiCxMupP+Iz1BsJTCjE2Ty5yjHSiUgwETBIriEEhGJaUHCxcwHUC5329weETUVSzS4tDXSTQlAALQ+6a7qsjrHZqSBUkb4W46LveMEavSgHgHGDvRGfr1sYAcVSeL1u1x7LFzUBQYqfV1RWqpu8oqcNi/ETJseeA2ni2NsFdcetBT/U46GsrS9RIeu0y6siXMJ8AR8/L7AaliRbtWWiMb1N5JklmvQ6GQcfy+cBZihzHNOr7JcalcrZ2BDVtRRl+1EGa4g04kyZcTV1js46tTtgitY6MFjYWq4Hv7KiwUTnu93LZU1V297gFMPaaOAqyGBDgf/4sy9pe+3H3+ITGZuoxsGa/k70YmlUuKVb8y6XEUN7tIJm0HC9l7FPcfmaE3Qc80YNdNLBNgzZD8zgMJ4qXuQxf2Wts97AjO3XkJIayKiWe8IAfHddFhsF+tWUY6Pyc6b2fWEPZemISXJNnOPkjX5NTuKWRYKZ66qITOQF0zMWguQoYPcuwDKvp+mMKM6jF9DUeC80PYpMhZNrx3eRlvQNTXpz0wrwJwI2Q+FVXok3vz25yXQFDGnFN/mt39R/8BmD2AFG5IoqvcypWF5CPZfw='
24
+
25
+ nameStatement: b'aCuZ8Tx2pYRJ6+uXF1WC/WwCSSY1LsKw5x9b6oCvUtorklPEQ5lVFpaGMrn4E2OxXHJ5vyU1IU9AlW44hxLaIYUBFkhaMIwkJQJ/E5hI7UcGiHvWcdmGHrqoVa3DhS4Uh+cxPOEz9KMMfD1j9utHONpcdOmfPRh8tCvHW5FlZcnUJZ0gGTUFsZhWGuJvq50gLQyihuJXT0B6gATCAZ377n0RqM0NvW3D+pgQvy8uESJ7dD57thVDi2UVHK8ej0Vuf5ZHtlx0MI+EnypXmNXA1D7PpVaGVMDKf82KoiYJcglQc53IP7ycdqw3PDhHG5MRVkhTZ6C055X4bTgqqSxSoRnZwzVdB0INFpZOKRgJobPeYMrDzD3s3Pg0jLF5S1mgs06JuG9bjppgudgkVrSE0wZf1+j8kkfKUo/hFj7dCxmkX+CuI8XTbZNzX117eJ4+aoX5M1cojByRoiyDZDtE0FrgGyDJJR7s54zNSxdyiRbw1kONFw7JX0Qmjd0e0gS+KJjXr69uE63XA93jwhv5CmVb95z7iQx8FHKWM96p78Rek1NC9r4YLhtCWj6SC3EbYUVqnidbnp3+A2Y0GmnBmKXwgdmdFSVcOR8Cpmxn8N+6bwPWcOTnqSeXKshRn7Uu7+VhIjVwudtTUSYr4hjoPrFp2/VyNXB4c5jHtE5x/qk='
26
+
27
+ address: b'XhC2JH0Txcj5K43hH3D99ZMzheaMZMSjJ1sRcXEhWqayh4eBJHDxiT1ZcUaPvhLvxlsmEyw5Q6DCGFPDbC8TidKPwrh8zvam3PSdN13omGFlHuGLhAwmClduY+2dzgW0via6zRHOz5tfpe2Sj/2mszmLa7Yf8/GOwKmsP3y2DYB1av4T/fpwosuBT9hVZDXQdUhlBekHYbW4aWEJLP+1Fq6SfxVxnw7eRlSrNjBzdpD3K7qnJxJl/jtICd9LlDZAdNIUv48prGtQ+XdgFmQOG7y0C2agi23dLO+LamPBn3vpJBoZJpjt3wej4wyqXPL6sMKcB5HS/6qY7Thh8qiI4AgnmjfJUFIlJBKdm89hu/BnvZeF/Y1sS+uqPT85ZoMidXs/+z7xYZ0d1buPZO6vaJ0bSImbTKV8CbFgBg0mCYjcH5Da91NWQ1tM/BE8xYPFb68ODGcJSiuI4+6jnrypiwFUHvxMLSt+tR6Mo6+n8XFTx0gQuD40gUmFtCZ7SEcpIXfFZRh8IFLqaCLB6hQGj3TXevxcdAeDvAMreF/LeBCf3rD6txQoQWaSEgIFKKfLpyOXEG04EIF2rOfwUT7Xhx3zRs051UuN1oa8GE2saBJodvzVVJAf9y/utdV61qP3DpMUGNwb93j1rxwf9j4HPv8d7d27I8bTN7xXEVU+bho='
28
+
29
+ asset: b'TSzUXkCvG7LBDUNb1K3ZYe/mlNsLjH0ylTY2nzkquaPM48PXeXxR9vOtAKzKQYk7Oq50rcDEntMd76CcS+PNhVvXbj9JTevNtPRHU7KjPgBmmSvuiDf6nSYGA049Q4U9Zf6AjrJi/hyKxXmi9mP8BkCSRsKhc4SwKW34VglgE6Ou2COds1rQ5+evNeRKMSpNI0rq+x+n+LftDUIa7phcqr+jtBB7Gq2KXcPmCloZcaiKyPQANNEPc8ZN1e9GiTy9ra0vqhuZjFxQdhJmcr6fWER1Xd6UYA9hNR88e08ldyUXZokJ6ssi0uNl69yR1ktZxrn3i2auyKvaaxMGmujvH/W6xSNHAIfx5PuQh87PmEmosodpK6VPvyjP1a9rDuRYbjWLI/kJe/OKWF5fCOx8+pj64vB4RZVYD6ZavfIsynGpAoY9y+ORmldi8DDesfDke87hbGcNyKyuISeIAkiBZz7TaHFiAgmMIxxcmyjLc0BB5B8UTyIF6p/gZBEcjqLtcqFBxPgUrgk4+1IuNaN8AgCk4xbq5X9a/QQ8sb59/NfkTB+DI62IqvCiXDKukNL1FRjJmpCqj138qI6bAiNY7OH1HeQrCvZ4FbZC3+cFhclSfBcRIEvsvtzrOkHIrScrPAhX0c1UZhU9XW7QQGyZqXs25pv14dWTTExu7/FntLs='
30
+
31
+ liability: b'F24LCecm2GPaWKHAWjrYIftVNCbbPYmvcwsMjruKqKfChBYSGQy+nrCOrIgp0wL5Asf4NF1qDuNeeQI5DuT4jTRxe5rR2Yxlqvzodhf6aGHWxzPz1VGZLaN7THRRq0EjL7ixNXz/6TeRKktP3Grhi+Jk5LUtGhIdUySGG1N96CINtE7a1zKfZBasyLyZ8PBqwYjOtSNX5naTAqPGCI6eGjqIY7fSKjb82EML8Xe+6uZVqqR2aNv2u8yzq8nglHH0J7i068SwpnDBT5jjdEH3pbyTsiLnmdHYQ8WGKG2SetSIMfxkkJqmCErbqH3F1uN/grzFsah8fHG6W/479yPcGsCnIFeUeCzPAihhHvUI21V39ADT9iG9V9sIXzPmo1BKT9JjKooRi+9+beOZT4YvjZz9pNbXanYL+mNW2qwimLDc3L4mrcoSQ0WlyXGefY2Sk8W9ePKtSrZRBIjzkRB10dAkicykCM0yts4sWxb2LMwFTJZRtXFV9Ay6xkiFx8mqeWHlZRThGx4aLcibIzTrwLBUzK3XeypUR/6ukpJvvaoDLlyoBQ1L+SE0lU0Wz8EDBYWMIrfw2dwP5jpT9ho0c+/bILr9DL93+1AVfwYqG8Uw1rVPPvLsZyOTIZqrUI2k8qZnsqIED41H9msI23YOVc665W/mvcafasjnQ9oTqL0='
32
+
33
+ statementDate: b'F0hBPc79PlZ3TE+jEY42dYDYQ3dzb5JrpLYmJaFgZS7Bumb7NjuyRKPPwLdILtYHfoSK2ZBQXLmplLY9J2k5UzMFwgQj/Oc/9tOJA6uHqCI0FjAx62Cu3PnECokXvKA1Cm1rNsY1IDt212eC5ghiiMF3pdKBW7jElIBsuDjJavtCnTF52QyXs7eVhAA0ZxhI4t0aP9TKEZINiGteczj36Of+UDgej3QR9/uK7Ds+FJFEQhMKWzNUM22AuXGGvhceAKiK2heOez8znCEtp/vvTRvYicYmRbNweQGjtrhCEVMeQ4UTGlOEqploOHuZ0ykU0TIUmjIPzPLUZsaKj8a4HK+ecEIntUWwCDtD76DHcbJ+ZzLLFYSfiIPTBTS88Pul2HqgnqilRufui1A6GEcBHluHQpnPrFth/FMB4Xuia8AvIkA5TfuKAneoq2Lwo3kD4/OrixfboF1d0NKm/tGmKhiCIWmhW6D1MtZv86rlVzsFrSS5UxjIQyHTuIYprsNerF7C0Uqy6llmdKkti2M9IJKc9T8pqReKevYmWxkkaQ9xd/orqpG9bPHYn3bcgj60FQ5x4zaytfF2rdHr6sTWCTfTb/Iz+yWBzmXS4kdV/V5lTPuGfp4uFyPI72nhPgGtQ6OCLuWkEmgjBK6VcZALY0TefZ8fByfsVLRNKJgnvVQ='
34
 
similarity_check.py CHANGED
@@ -2,10 +2,15 @@ from model1 import model1
2
  from model2 import model2
3
  import checkTool as ct
4
  import extract_pdf as pf
 
5
 
6
  # get info from hkid card
7
 
8
  def string_similarity(s1, s2): # Levenshtein distance algorithm
 
 
 
 
9
  if s1 == s2:
10
  return 100.0
11
 
@@ -32,7 +37,7 @@ def string_similarity(s1, s2): # Levenshtein distance algorithm
32
  similarity = (1 - matrix[len1][len2] / max(len1, len2)) * 100
33
  return round(similarity, 1)
34
 
35
- def get_data(img1_path, img2_path, file_name):
36
 
37
  # img_fp = 'IMG_4495.jpg'
38
 
@@ -58,8 +63,9 @@ def get_data(img1_path, img2_path, file_name):
58
  # name = name.replace(' ', '')
59
  # name = name.lower()
60
 
61
- data = pf.get_info_from_bank(img2_path, file_name)
62
- name = data["name_on_bs"]
 
63
 
64
 
65
  ############# Similarity check ##############
 
2
  from model2 import model2
3
  import checkTool as ct
4
  import extract_pdf as pf
5
+ import extraction_data as ed
6
 
7
  # get info from hkid card
8
 
9
  def string_similarity(s1, s2): # Levenshtein distance algorithm
10
+ s1 = s1.replace(' ', '')
11
+ s1 = s1.lower()
12
+ s2 = s2.replace(' ', '')
13
+ s2 = s2.lower()
14
  if s1 == s2:
15
  return 100.0
16
 
 
37
  similarity = (1 - matrix[len1][len2] / max(len1, len2)) * 100
38
  return round(similarity, 1)
39
 
40
+ def get_data(img1_path, img2_path):
41
 
42
  # img_fp = 'IMG_4495.jpg'
43
 
 
63
  # name = name.replace(' ', '')
64
  # name = name.lower()
65
 
66
+ # data = pf.get_info_from_bank(img2_path, file_name)
67
+ data = ed.get_info_from_bank(img2_path)
68
+ name = data["nameStatement"]
69
 
70
 
71
  ############# Similarity check ##############
test.py CHANGED
@@ -1,3 +1,4 @@
1
  import streamlit
 
2
 
3
- print(streamlit.__version__)
 
1
  import streamlit
2
+ import mediapipe
3
 
4
+ print(mediapipe.__version__)
test_ocr.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ import easyocr
2
+ reader = easyocr.Reader(['en'])
3
+ result = reader.readtext('hangseng_page-0001.jpg', detail = 0)
4
+ print(result)
webapp.py CHANGED
@@ -11,12 +11,13 @@ import streamlit as st
11
  import requests
12
  import json
13
  import request_json.sbt_request_generator as sbt
 
 
 
14
 
15
- global data
16
- data = {}
17
-
18
 
19
  def main():
 
20
  # st.title("SBT Web Application")
21
  # today's date = get_today_date
22
 
@@ -30,22 +31,37 @@ def main():
30
  """
31
  st.markdown(html_temp, unsafe_allow_html=True)
32
 
 
 
 
 
 
 
33
  st.header("I. Similarity Check")
34
- image_file = st.file_uploader("Upload Image", type=['jpg', 'png', 'jpeg'], accept_multiple_files=True)
35
  if len(image_file) == 1:
36
- # print(image_file[0].name)
37
  image1 = Image.open(image_file[0])
38
  st.text("HKID card")
39
  st.image(image1)
 
 
 
 
 
 
 
40
  elif len(image_file) == 2:
41
  image1 = Image.open(image_file[0])
42
  st.text("HKID card")
43
  st.image(image1)
44
  image2 = Image.open(image_file[1])
45
- file_name = image_file[1].name
 
 
46
  st.text("Bank statement")
47
  st.image(image2)
48
-
 
49
  # if image_file2 is not None:
50
  # image2 = Image.open(image_file)
51
  # st.text("Bank statement")
@@ -60,15 +76,20 @@ def main():
60
  if st.button("Recognise"):
61
  with st.spinner('Wait for it...'):
62
  # global data
63
- data = sc.get_data(image1, image2, file_name)
64
-
65
- with open('data1.txt', 'w') as f:
66
- f.write(json.dumps(data))
67
  # data.update(sc.get_data(image1, image2, file_name))
68
- print(f'data inside {data}')
69
  # sbt.split_data(data)
 
 
70
  st.success('Done!')
71
- score = data["similarity_score"]
 
 
 
72
  #print(score)
73
  st.text(f'score: {score}')
74
  if (score>85):
@@ -76,6 +97,8 @@ def main():
76
  else:
77
  st.text(f'unmatched')
78
 
 
 
79
  st.header("IIa. HKID Data Extraction")
80
  st.text(f'Name: {data["name_on_id"]}') # name is without space
81
  st.text(f'HKID: {data["hkid"]} and validity: {data["validity"]}')
@@ -83,16 +106,21 @@ def main():
83
 
84
  st.header("IIb. Bank Statement Data Extraction")
85
  # st.write('------------From bank statement------------')
86
- st.text(f'Name: {data["name_on_bs"]}')
87
  st.text(f'Address: {data["address"]}')
88
  st.text(f'Bank: {data["bank"]}')
89
- st.text(f'Date: {data["date"]}')
90
- st.text(f'Asset: {data["asset"]} hkd')
91
- st.text(f'Liabilities: {data["liabilities"]} hkd')
92
  # result_img= detect_faces(our_image)
93
  # st.image(result_img)
94
  # print(f'data outside 1 {data}')
 
 
 
 
95
 
 
96
  st.header("II. Facial Recognition")
97
  run = st.checkbox('Run')
98
 
@@ -127,18 +155,27 @@ def main():
127
  print(score)
128
  if len(score) > 20:
129
  avg_score = sum(score) / len(score)
130
- st.write(f'{avg_score}')
131
- with open('data1.txt', 'w') as f:
132
- data_raw = f.read()
133
- data = json.loads(data_raw)
134
- data['avg_score'] = str(avg_score)
135
- f.write(json.dumps(data))
 
 
 
 
 
 
 
 
 
 
136
 
137
 
138
  # update_text(f'{demo.convert_distance_to_percentage(score, 0.45)}')
139
  else:
140
  st.write('Stopped')
141
-
142
 
143
  # print(f'the data is {data}')
144
 
@@ -149,7 +186,7 @@ def main():
149
 
150
  # st.header("IIIb. Bank Statement Data Extraction")
151
  # # st.write('------------From bank statement------------')
152
- # st.text(f'Name: {data["name_on_bs"]}')
153
  # st.text(f'Address: {data["address"]}')
154
  # st.text(f'Bank: {data["bank"]}')
155
  # st.text(f'Date: {data["date"]}')
@@ -160,7 +197,7 @@ def main():
160
  if st.button("Confirm"):
161
  # print(f'data outside 3 {data}')
162
  with st.spinner('Sending data...'):
163
- sbt.split_data(data)
164
  st.success('Done!')
165
 
166
  if __name__ == '__main__':
 
11
  import requests
12
  import json
13
  import request_json.sbt_request_generator as sbt
14
+ import pathlib
15
+ import os
16
+ import check_hkid_validity as chv
17
 
 
 
 
18
 
19
  def main():
20
+
21
  # st.title("SBT Web Application")
22
  # today's date = get_today_date
23
 
 
31
  """
32
  st.markdown(html_temp, unsafe_allow_html=True)
33
 
34
+ if 'hkid_image_validity' not in st.session_state:
35
+ st.session_state.hkid_image_validity = False
36
+
37
+ if 'data' not in st.session_state:
38
+ st.session_state['data'] = {}
39
+
40
  st.header("I. Similarity Check")
41
+ image_file = st.file_uploader("Upload Image", type=['jpg', 'png', 'jpeg', 'pdf'], accept_multiple_files=True)
42
  if len(image_file) == 1:
 
43
  image1 = Image.open(image_file[0])
44
  st.text("HKID card")
45
  st.image(image1)
46
+ image1.save('image/hkid.jpg', 'JPEG')
47
+ if chv.check_hkid('image/hkid.jpg'):
48
+ st.text("Valid HKID card.")
49
+ st.session_state.hkid_image_validity = True
50
+ else:
51
+ st.text("Invalid HKID card. Please upload again!")
52
+ st.session_state.hkid_image_validity = False
53
  elif len(image_file) == 2:
54
  image1 = Image.open(image_file[0])
55
  st.text("HKID card")
56
  st.image(image1)
57
  image2 = Image.open(image_file[1])
58
+ # image2 = image_file[1]
59
+ # image2.save('image/hkid.jpg', 'JPEG')
60
+ # file_name = image_file[1].name
61
  st.text("Bank statement")
62
  st.image(image2)
63
+
64
+ print(f"the id is: {st.session_state.hkid_image_validity}")
65
  # if image_file2 is not None:
66
  # image2 = Image.open(image_file)
67
  # st.text("Bank statement")
 
76
  if st.button("Recognise"):
77
  with st.spinner('Wait for it...'):
78
  # global data
79
+ data = sc.get_data(image1, image2)
80
+ # data = ed.get_info_from_bank('hsbc_one_account.pdf')
81
+ # with open('data1.txt', 'w') as f:
82
+ # f.write(json.dumps(data))
83
  # data.update(sc.get_data(image1, image2, file_name))
84
+ # print(f'data inside {data}')
85
  # sbt.split_data(data)
86
+ if 'data' in st.session_state:
87
+ st.session_state['data'] = data
88
  st.success('Done!')
89
+ # if "similarity_score" not in data.keys():
90
+ # data["similarity_score"] = "0"
91
+ score = int(st.session_state['data']['similarity_score'])
92
+ # score = int(data["similarity_score"])
93
  #print(score)
94
  st.text(f'score: {score}')
95
  if (score>85):
 
97
  else:
98
  st.text(f'unmatched')
99
 
100
+
101
+ data = st.session_state['data']
102
  st.header("IIa. HKID Data Extraction")
103
  st.text(f'Name: {data["name_on_id"]}') # name is without space
104
  st.text(f'HKID: {data["hkid"]} and validity: {data["validity"]}')
 
106
 
107
  st.header("IIb. Bank Statement Data Extraction")
108
  # st.write('------------From bank statement------------')
109
+ st.text(f'Name: {data["nameStatement"]}')
110
  st.text(f'Address: {data["address"]}')
111
  st.text(f'Bank: {data["bank"]}')
112
+ st.text(f'Date: {data["statementDate"]}')
113
+ st.text(f'Asset: {data["totalAsset"]} hkd')
114
+ st.text(f'Liabilities: {data["totalLiability"]} hkd')
115
  # result_img= detect_faces(our_image)
116
  # st.image(result_img)
117
  # print(f'data outside 1 {data}')
118
+
119
+ if 'data' in st.session_state:
120
+ tempout = st.session_state['data']
121
+ print(f'hello: {tempout}')
122
 
123
+
124
  st.header("II. Facial Recognition")
125
  run = st.checkbox('Run')
126
 
 
155
  print(score)
156
  if len(score) > 20:
157
  avg_score = sum(score) / len(score)
158
+ st.write(avg_score)
159
+ # st.write(f'{demo.convert_distance_to_percentage(avg_score, 0.45)}')
160
+ camera.release()
161
+ run = not run
162
+ st.session_state['data']['avg_score'] = str(avg_score)
163
+ # with open('data1.txt', 'r') as f:
164
+ # if f is not None:
165
+ # data_raw = f.read()
166
+ # data = json.loads(data_raw)
167
+ # data['avg_score'] = str(avg_score)
168
+ # else:
169
+ # data = {}
170
+
171
+
172
+ # with open('data1.txt', 'w') as f:
173
+ # f.write(json.dumps(data))
174
 
175
 
176
  # update_text(f'{demo.convert_distance_to_percentage(score, 0.45)}')
177
  else:
178
  st.write('Stopped')
 
179
 
180
  # print(f'the data is {data}')
181
 
 
186
 
187
  # st.header("IIIb. Bank Statement Data Extraction")
188
  # # st.write('------------From bank statement------------')
189
+ # st.text(f'Name: {data["nameStatement"]}')
190
  # st.text(f'Address: {data["address"]}')
191
  # st.text(f'Bank: {data["bank"]}')
192
  # st.text(f'Date: {data["date"]}')
 
197
  if st.button("Confirm"):
198
  # print(f'data outside 3 {data}')
199
  with st.spinner('Sending data...'):
200
+ sbt.split_data(st.session_state['data'])
201
  st.success('Done!')
202
 
203
  if __name__ == '__main__':