Mitul Mohammad Abdullah Al Mukit commited on
Commit
1f72938
·
1 Parent(s): ac8d65b

first commit

Browse files
Files changed (43) hide show
  1. .gitignore +5 -0
  2. Visualization_utilities.py +189 -0
  3. __pycache__/Visualization_utilities.cpython-311.pyc +0 -0
  4. __pycache__/Visualization_utilities.cpython-39.pyc +0 -0
  5. __pycache__/checkTool.cpython-311.pyc +0 -0
  6. __pycache__/checkTool.cpython-39.pyc +0 -0
  7. __pycache__/data_encryption.cpython-311.pyc +0 -0
  8. __pycache__/data_encryption.cpython-39.pyc +0 -0
  9. __pycache__/demo.cpython-311.pyc +0 -0
  10. __pycache__/demo.cpython-39.pyc +0 -0
  11. __pycache__/extract_pdf.cpython-311.pyc +0 -0
  12. __pycache__/extract_pdf.cpython-39.pyc +0 -0
  13. __pycache__/imageSegmentation.cpython-311.pyc +0 -0
  14. __pycache__/imageSegmentation.cpython-39.pyc +0 -0
  15. __pycache__/model1.cpython-311.pyc +0 -0
  16. __pycache__/model1.cpython-39.pyc +0 -0
  17. __pycache__/model2.cpython-311.pyc +0 -0
  18. __pycache__/model2.cpython-39.pyc +0 -0
  19. __pycache__/similarity_check.cpython-311.pyc +0 -0
  20. __pycache__/similarity_check.cpython-39.pyc +0 -0
  21. __pycache__/webapp.cpython-311.pyc +0 -0
  22. blaze_face_short_range.tflite +3 -0
  23. checkTool.py +227 -0
  24. data1.txt +1 -0
  25. data_encryption.py +12 -0
  26. demo.py +185 -0
  27. extract_pdf.py +139 -0
  28. extraction_data.py +96 -0
  29. imageSegmentation.py +60 -0
  30. model1.py +46 -0
  31. model2.py +46 -0
  32. pubkey.pem +13 -0
  33. request_json/__pycache__/sbt_request_generator.cpython-311.pyc +0 -0
  34. request_json/__pycache__/sbt_request_generator.cpython-39.pyc +0 -0
  35. request_json/request_legalDocument.json +75 -0
  36. request_json/sbt_request_generator.py +108 -0
  37. requirements.txt +15 -0
  38. sbt/deployment.py +3 -0
  39. sbt_request.txt +22 -0
  40. similarity_check.py +89 -0
  41. test.py +3 -0
  42. text_reader_v2.py +18 -0
  43. webapp.py +209 -0
.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ image/*
2
+ saved/*
3
+ image
4
+ saved
5
+ .DS_Store
Visualization_utilities.py ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import mediapipe as mp
3
+ from mediapipe import solutions
4
+ from mediapipe.framework.formats import landmark_pb2
5
+ import numpy as np
6
+ import math
7
+
8
+ # visualization libraries
9
+ import matplotlib.pyplot as plt
10
+ import matplotlib.animation as animation
11
+ from matplotlib import style
12
+
13
+ def draw_eyes_on_image(rgb_image, detection_result):
14
+
15
+ # return rgb_image, 0, 0
16
+
17
+ # canonical_face_model_uv_visualization in the below link
18
+ # https://github.com/google/mediapipe/blob/a908d668c730da128dfa8d9f6bd25d519d006692/mediapipe/modules/face_geometry/data/canonical_face_model_uv_visualization.png
19
+ left_eyes_bottom_list = [33, 7, 163, 144, 145, 153, 154, 155, 133]
20
+ left_eyes_top_list = [246, 161, 160, 159, 158, 157, 173]
21
+ right_eyes_bottom_list = [362, 382, 381, 380, 374, 373, 390, 249, 263]
22
+ right_eyes_top_list = [398, 384, 385, 386, 387, 388, 466]
23
+
24
+ face_landmarks_list = detection_result.face_landmarks
25
+ annotated_image = np.copy(rgb_image)
26
+
27
+ # We resize image to 640 * 360
28
+ height, width, channels = rgb_image.shape
29
+
30
+ # Loop through the detected faces to visualize. Actually, if we detect more than two faces, we will require user closer to the camera
31
+ for idx in range(len(face_landmarks_list)):
32
+ face_landmarks = face_landmarks_list[idx]
33
+
34
+ mlist = []
35
+ for landmark in face_landmarks:
36
+ mlist.append([int(landmark.x * width), int(landmark.y * height), landmark.z])
37
+
38
+ narray = np.copy(mlist)
39
+
40
+ # Vertical line
41
+ #
42
+ #
43
+ # Pick the largest difference (middle of the eyes)
44
+ leftUp = narray[159]
45
+ leftDown = narray[145]
46
+ rightUp = narray[386]
47
+ rightDown = narray[374]
48
+
49
+ # compute left eye distance (vertical)
50
+ leftUp_x = int(leftUp[0])
51
+ leftUp_y = int(leftUp[1])
52
+ leftDown_x = int(leftDown[0])
53
+ leftDown_y = int(leftDown[1])
54
+ leftVerDis = math.dist([leftUp_x, leftUp_y],[leftDown_x, leftDown_y])
55
+
56
+ # compute right eye distance (vertical)
57
+ rightUp_x = int(rightUp[0])
58
+ rightUp_y = int(rightUp[1])
59
+ rightDown_x = int(rightDown[0])
60
+ rightDown_y = int(rightDown[1])
61
+ rightVerDis = math.dist([rightUp_x, rightUp_y],[rightDown_x, rightDown_y])
62
+
63
+ # print(f'leftVerDis: {leftVerDis} and rightVerDis: {rightVerDis}')
64
+
65
+ # draw a line from left eye top to bottom
66
+ annotated_image = cv2.line(rgb_image, (int(leftUp_x), int(leftUp_y)), (int(leftDown_x), int(leftDown_y)), (0, 200, 0), 1)
67
+
68
+ # draw a line from right eye top to bottom
69
+ annotated_image = cv2.line(rgb_image, (int(rightUp_x), int(rightUp_y)), (int(rightDown_x), int(rightDown_y)), (0, 200, 0), 1)
70
+ #
71
+ #
72
+ # Horizontonal line
73
+ #
74
+ #
75
+ # Pick the largest difference (middle of the eyes)
76
+ leftLeft = narray[33]
77
+ leftRight = narray[133]
78
+ rightLeft = narray[362]
79
+ rightRight = narray[263]
80
+
81
+ # compute left eye distance (horizontal)
82
+ leftLeft_x = int(leftLeft[0])
83
+ leftLeft_y = int(leftLeft[1])
84
+ leftRight_x = int(leftRight[0])
85
+ leftRight_y = int(leftRight[1])
86
+ leftHorDis = math.dist([leftLeft_x, leftLeft_y],[leftRight_x, leftRight_y])
87
+
88
+ # compute right eye distance (horizontal)
89
+ rightLeft_x = int(rightLeft[0])
90
+ rightLeft_y = int(rightLeft[1])
91
+ rightRight_x = int(rightRight[0])
92
+ rightRight_y = int(rightRight[1])
93
+ rightHorDis = math.dist([rightLeft_x, rightLeft_y],[rightRight_x, rightRight_y])
94
+
95
+ # print(f'leftHorDis: {leftHorDis} and rightHorDis: {rightHorDis}')
96
+
97
+ # draw a line from left eye top to bottom
98
+ annotated_image = cv2.line(rgb_image, (int(leftLeft_x), int(leftLeft_y)), (int(leftRight_x), int(leftRight_y)), (0, 200, 0), 1)
99
+
100
+ # draw a line from right eye top to bottom
101
+ annotated_image = cv2.line(rgb_image, (int(rightLeft_x), int(rightLeft_y)), (int(rightRight_x), int(rightRight_y)), (0, 200, 0), 1)
102
+ #
103
+ #
104
+ #
105
+ #
106
+ # print(f'leftRatio: {leftVerDis/leftHorDis} and rightRatio: {rightVerDis/rightHorDis}')
107
+
108
+ leftRatio = leftVerDis/leftHorDis*100
109
+ rightRatio = rightVerDis/rightHorDis*100
110
+
111
+
112
+ # left_eyes_bottom = [narray[x] for x in left_eyes_bottom_list]
113
+ # left_eyes_top = [narray[x] for x in left_eyes_top_list]
114
+ # right_eyes_bottom = [narray[x] for x in right_eyes_bottom_list]
115
+ # right_eyes_top = [narray[x] for x in right_eyes_top_list]
116
+
117
+ # for p in left_eyes_bottom:
118
+ # annotated_image = cv2.circle(rgb_image, (int(p[0]), int(p[1])), radius=1, color=(0,0,255), thickness=1)
119
+
120
+ # for p in left_eyes_top:
121
+ # annotated_image = cv2.circle(rgb_image, (int(p[0]), int(p[1])), radius=1, color=(0,0,255), thickness=1)
122
+
123
+ # for p in right_eyes_bottom:
124
+ # annotated_image = cv2.circle(rgb_image, (int(p[0]), int(p[1])), radius=1, color=(0,0,255), thickness=1)
125
+
126
+ # for p in right_eyes_top:
127
+ # annotated_image = cv2.circle(rgb_image, (int(p[0]), int(p[1])), radius=1, color=(0,0,255), thickness=1)
128
+
129
+
130
+ return annotated_image, leftRatio, rightRatio
131
+
132
+ def draw_landmarks_on_image(rgb_image, detection_result):
133
+ face_landmarks_list = detection_result.face_landmarks
134
+ annotated_image = np.copy(rgb_image)
135
+
136
+ # Loop through the detected faces to visualize. Actually, if we detect more than two faces, we will require user closer to the camera
137
+ for idx in range(len(face_landmarks_list)):
138
+ face_landmarks = face_landmarks_list[idx]
139
+
140
+ # Draw the face landmarks.
141
+ face_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
142
+ face_landmarks_proto.landmark.extend([
143
+ landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in face_landmarks
144
+ ])
145
+
146
+ solutions.drawing_utils.draw_landmarks(
147
+ image=annotated_image,
148
+ landmark_list=face_landmarks_proto,
149
+ connections=mp.solutions.face_mesh.FACEMESH_TESSELATION,
150
+ landmark_drawing_spec=None,
151
+ connection_drawing_spec=mp.solutions.drawing_styles
152
+ .get_default_face_mesh_tesselation_style())
153
+ solutions.drawing_utils.draw_landmarks(
154
+ image=annotated_image,
155
+ landmark_list=face_landmarks_proto,
156
+ connections=mp.solutions.face_mesh.FACEMESH_CONTOURS,
157
+ landmark_drawing_spec=None,
158
+ connection_drawing_spec=mp.solutions.drawing_styles
159
+ .get_default_face_mesh_contours_style())
160
+ solutions.drawing_utils.draw_landmarks(
161
+ image=annotated_image,
162
+ landmark_list=face_landmarks_proto,
163
+ connections=mp.solutions.face_mesh.FACEMESH_IRISES,
164
+ landmark_drawing_spec=None,
165
+ connection_drawing_spec=mp.solutions.drawing_styles
166
+ .get_default_face_mesh_iris_connections_style())
167
+
168
+ return annotated_image
169
+
170
+ def plot_face_blendshapes_bar_graph(face_blendshapes):
171
+ # Extract the face blendshapes category names and scores.
172
+ face_blendshapes_names = [face_blendshapes_category.category_name for face_blendshapes_category in face_blendshapes]
173
+ face_blendshapes_scores = [face_blendshapes_category.score for face_blendshapes_category in face_blendshapes]
174
+ # The blendshapes are ordered in decreasing score value.
175
+ face_blendshapes_ranks = range(len(face_blendshapes_names))
176
+
177
+ fig, ax = plt.subplots(figsize=(12, 12))
178
+ bar = ax.barh(face_blendshapes_ranks, face_blendshapes_scores, label=[str(x) for x in face_blendshapes_ranks])
179
+ ax.set_yticks(face_blendshapes_ranks, face_blendshapes_names)
180
+ ax.invert_yaxis()
181
+
182
+ # Label each bar with values
183
+ for score, patch in zip(face_blendshapes_scores, bar.patches):
184
+ plt.text(patch.get_x() + patch.get_width(), patch.get_y(), f"{score:.4f}", va="top")
185
+
186
+ ax.set_xlabel('Score')
187
+ ax.set_title("Face Blendshapes")
188
+ plt.tight_layout()
189
+ plt.show()
__pycache__/Visualization_utilities.cpython-311.pyc ADDED
Binary file (8.96 kB). View file
 
__pycache__/Visualization_utilities.cpython-39.pyc ADDED
Binary file (4.37 kB). View file
 
__pycache__/checkTool.cpython-311.pyc ADDED
Binary file (9.76 kB). View file
 
__pycache__/checkTool.cpython-39.pyc ADDED
Binary file (4.84 kB). View file
 
__pycache__/data_encryption.cpython-311.pyc ADDED
Binary file (1.11 kB). View file
 
__pycache__/data_encryption.cpython-39.pyc ADDED
Binary file (625 Bytes). View file
 
__pycache__/demo.cpython-311.pyc ADDED
Binary file (4.65 kB). View file
 
__pycache__/demo.cpython-39.pyc ADDED
Binary file (2.61 kB). View file
 
__pycache__/extract_pdf.cpython-311.pyc ADDED
Binary file (5.92 kB). View file
 
__pycache__/extract_pdf.cpython-39.pyc ADDED
Binary file (3.21 kB). View file
 
__pycache__/imageSegmentation.cpython-311.pyc ADDED
Binary file (2.55 kB). View file
 
__pycache__/imageSegmentation.cpython-39.pyc ADDED
Binary file (1.49 kB). View file
 
__pycache__/model1.cpython-311.pyc ADDED
Binary file (1.6 kB). View file
 
__pycache__/model1.cpython-39.pyc ADDED
Binary file (902 Bytes). View file
 
__pycache__/model2.cpython-311.pyc ADDED
Binary file (1.76 kB). View file
 
__pycache__/model2.cpython-39.pyc ADDED
Binary file (968 Bytes). View file
 
__pycache__/similarity_check.cpython-311.pyc ADDED
Binary file (3.12 kB). View file
 
__pycache__/similarity_check.cpython-39.pyc ADDED
Binary file (1.75 kB). View file
 
__pycache__/webapp.cpython-311.pyc ADDED
Binary file (4.58 kB). View file
 
blaze_face_short_range.tflite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4578f35940bf5a1a655214a1cce5cab13eba73c1297cd78e1a04c2380b0152f
3
+ size 229746
checkTool.py ADDED
@@ -0,0 +1,227 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ def check_integer(string):
4
+ if string.isdigit():
5
+ return True
6
+
7
+ for char in string:
8
+ if char.isdigit():
9
+ return True
10
+ return False
11
+
12
+ def check_alpha(string):
13
+ for char in string:
14
+ if not ((char >= 'a' and char <= 'z') or (char >= 'A' and char <= 'Z') or char == ' '):
15
+ return False
16
+ return True
17
+
18
+ def is_chinese_name(text):
19
+ substrings = [text[:1], text[:2], text[:3], text[:4], text[:5], text[:6], text[:7], text[:8]]
20
+
21
+ if len(text) > 40:
22
+ return False
23
+
24
+ for substring in substrings:
25
+ upper_case_sum = 0
26
+ lower_case_sum = 0
27
+ space = 0
28
+ for char in substring:
29
+ if char >= 'A' and char <= 'Z':
30
+ upper_case_sum += 1
31
+ if char >= 'a' and char <= 'z':
32
+ lower_case_sum += 1
33
+ if char == ' ':
34
+ space += 1
35
+ if upper_case_sum >= 3 and lower_case_sum >= 2 and space >= 1:
36
+ return True
37
+
38
+ return False
39
+
40
+ def seperate_name(text):
41
+ word1 = ""
42
+ word2 = ""
43
+ word3 = ""
44
+ name = text.replace(' ', '')
45
+ # l = 0
46
+ # space = 0
47
+ # for char in text:
48
+ # if char >= 'A' and char <= 'Z':
49
+ # l += 1
50
+ # if char != ' ':
51
+ # space += 1
52
+ # else:
53
+ # word2 = text[l-1:space]
54
+ # word3 = text[space+1::]
55
+ # word1 = text[:l - 2]
56
+
57
+ # # only two characters
58
+ # if space == len(text):
59
+ # word1 = text[:l-1]
60
+ # word2 = text[l-1::]
61
+ # name = word1 + ' ' + word2
62
+ # else:
63
+ # name = word1 + ' ' + word2 + ' ' + word3
64
+ return name.lower()
65
+
66
+ def validate_hkid(hkid): # omit parentheses
67
+ hkid = hkid.replace('(', '').replace(')', '')
68
+
69
+ weight = [9, 8, 7, 6, 5, 4, 3, 2, 1]
70
+ values = list('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ') + [None]
71
+
72
+ match = re.match('^([A-Z])?([A-Z])([0-9]{6})([0-9A])$', hkid)
73
+ if not match: return False
74
+
75
+ hkidArr = []
76
+ for g in match.groups():
77
+ hkidArr += list(g) if g else [g]
78
+
79
+ r = sum([values.index(i) * w for i, w in zip(hkidArr, weight)]) % 11
80
+
81
+ return r == 0
82
+
83
+ def format_HKID(hkid):
84
+ hkid = hkid.replace('(', '').replace(')', '')
85
+ idlen = len(hkid)
86
+
87
+ match = re.match('^([A-Z])?([A-Z])([0-9]{6})([0-9A])$', hkid)
88
+
89
+ hkidArr = []
90
+ for g in match.groups():
91
+ hkidArr += list(g) if g else [g]
92
+
93
+ formatted_hkid = ''
94
+
95
+ index = 0
96
+ for char in hkidArr:
97
+ if char != None:
98
+ formatted_hkid += char
99
+ if index == idlen - 1:
100
+ formatted_hkid += '('
101
+ if index == idlen:
102
+ formatted_hkid += ')'
103
+ index += 1
104
+
105
+ return formatted_hkid
106
+
107
+ def format_issuedate(issuedate):
108
+ formatted_issuedate = issuedate.replace('(', '').replace(')', '')
109
+ formatted_issuedate = formatted_issuedate.replace('C', '')
110
+ return formatted_issuedate
111
+
112
+ def is_string_integer(string):
113
+ try:
114
+ int(string) # Attempt to convert the string to an integer
115
+ return True # If successful, the string only contains integers
116
+ except ValueError:
117
+ return False # If a ValueError occurs, the string doesn't only contain integers
118
+
119
+ def check_issuedate(text):
120
+ if len(text) < 5 and len(text) > 7 :
121
+ return False
122
+ if len(text) > 0 and text[0] == '(':
123
+ text = text.replace('(', '')
124
+ elif len(text) > 0 and text[0] == 'C':
125
+ text = text.replace('C', '')
126
+ if len(text) > 0 and text[-1] == ')':
127
+ text = text.replace(')', '')
128
+ if len(text) != 5:
129
+ return False
130
+ if text[2] != '-':
131
+ return False
132
+ text = text.replace('-', '')
133
+ if not is_string_integer(text):
134
+ return False
135
+ return True
136
+
137
+ def print_info(name, valid_hkid, hkid, issuedate):
138
+ print(f'Name: {name}')
139
+ print(f'HKID: {hkid} and validity: {valid_hkid}')
140
+ print(f'Date of issue: {issuedate}')
141
+
142
+ def is_comma_present(string):
143
+ return ',' in string
144
+
145
+ def longest_common_subsequence(s1, s2):
146
+ m, n = len(s1), len(s2)
147
+ # Create a 2D table to store the lengths of common subsequences
148
+ dp = [[0] * (n + 1) for _ in range(m + 1)]
149
+
150
+ # Build the table in a bottom-up manner
151
+ for i in range(1, m + 1):
152
+ for j in range(1, n + 1):
153
+ if s1[i - 1] == s2[j - 1]:
154
+ dp[i][j] = dp[i - 1][j - 1] + 1
155
+ else:
156
+ dp[i][j] = max(dp[i - 1][j], dp[i][j - 1])
157
+
158
+ # Retrieve the longest common subsequence
159
+ lcs = []
160
+ i, j = m, n
161
+ while i > 0 and j > 0:
162
+ if s1[i - 1] == s2[j - 1]:
163
+ lcs.append(s1[i - 1])
164
+ i -= 1
165
+ j -= 1
166
+ elif dp[i - 1][j] > dp[i][j - 1]:
167
+ i -= 1
168
+ else:
169
+ j -= 1
170
+
171
+ # Reverse the sequence to get the correct order
172
+ lcs.reverse()
173
+ return ''.join(lcs)
174
+
175
+ def combine_info(info1, info2):
176
+ combined_info = []
177
+
178
+ print(info1)
179
+ print(info2)
180
+
181
+ if info1[0] == info2[0]:
182
+ combined_info.append(info1[0]) # Append the variable as-is if it's the same in both models
183
+ elif info1[0] == '':
184
+ combined_info.append(info2[0])
185
+ elif info2[0] == '':
186
+ combined_info.append(info1[0])
187
+ else:
188
+ subseq = longest_common_subsequence(info1[0], info2[0])
189
+ combined_info.append(subseq)
190
+
191
+ if info1[1] == 'True' and info2[1] == 'False':
192
+ combined_info.append(info1[1])
193
+ combined_info.append(info1[2])
194
+ elif info1[1] == 'False' and info2[1] == 'True':
195
+ combined_info.append(info2[1])
196
+ combined_info.append(info2[2])
197
+ elif info1[1] == 'True' and info2[1] == 'True':
198
+ if info1[2] == info2[2]:
199
+ combined_info.append(info1[1])
200
+ combined_info.append(info1[2])
201
+ else:
202
+ combined_info.append('False')
203
+ combined_info.append('Suspicous HKID')
204
+
205
+ if info1[3] == info2[3]:
206
+ combined_info.append(info1[3])
207
+ else:
208
+ combined_info.append('Unmatched issuedate')
209
+
210
+ # print(combined_info)
211
+
212
+ return combined_info
213
+
214
+
215
+
216
+ # info1 = ['', 'True', 'Z683365(5)', '06-96']
217
+ # info2 = ['lok wing', 'False', 'Z68336505)', '06-96']
218
+ # info = combine_info(info1, info2)
219
+ # print_info(*info)
220
+
221
+
222
+ # text = 'TAMKing Man'
223
+ # if is_comma_present(text):
224
+ # text = text.replace(',', '')
225
+ # if not check_integer(text):
226
+ # if check_alpha(text) and is_chinese_name(text):
227
+ # name = seperate_name(text)
data1.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ {"name_on_bs": "laupakching", "address": "rm a,33/f, blk 2b ocean pride 100 tai ho road tsuen wan nt ", "bank": "hangseng", "date": "4feb 2023", "asset": 117923.2, "liabilities": "16965.04", "similarity_score": 100.0, "name_on_id": "laupakching", "hkid": "Y332177(9)", "validity": "True", "issue_date": "11-95"}
data_encryption.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import rsa
3
+
4
+ with open("pubkey.pem", 'rb') as f:
5
+ pubKey = rsa.PublicKey.load_pkcs1(f.read())
6
+
7
+ def encrypt(data):
8
+ for key, value in data.items():
9
+ value_bytes = value.encode("utf-8")
10
+ encrypted_value = rsa.encrypt(value_bytes, pubKey)
11
+ encoded_value = base64.b64encode(encrypted_value)
12
+ data[key] = encoded_value
demo.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import face_recognition
2
+ import cv2
3
+ import numpy as np
4
+
5
+ import imageSegmentation
6
+
7
+ from mediapipe.tasks.python import vision
8
+ import Visualization_utilities as vis
9
+
10
+ # Get a reference to webcam #0 (the default one)
11
+ # video_capture = cv2.VideoCapture(0)
12
+
13
+ # Load a sample picture and learn how to recognize it.
14
+
15
+ def get_face_encoding(path):
16
+ HKID_cropped = imageSegmentation.auto_cropping(path)
17
+ cv2.imwrite('saved/HKID.jpg', HKID_cropped)
18
+ HKID_image = face_recognition.load_image_file("saved/HKID.jpg")
19
+ HKID_face_encoding = face_recognition.face_encodings(HKID_image)[0]
20
+ return HKID_face_encoding
21
+
22
+ # HKID_image = face_recognition.load_image_file("saved/HKID.jpg")
23
+ # HKID_face_encoding = face_recognition.face_encodings(HKID_image)[0]
24
+
25
+ # Create arrays of known face encodings and their names
26
+ # known_face_encodings = [
27
+ # HKID_face_encoding
28
+ # ]
29
+ # known_face_names = [
30
+ # "Marco"
31
+ # ]
32
+
33
+ # Initialize some variables
34
+ # face_locations = []
35
+ # face_encodings = []
36
+ # face_names = []
37
+ # process_this_frame = True
38
+
39
+ # score = []
40
+
41
+ # faces = 0 # number of faces
42
+
43
+ # while True:
44
+ # # Grab a single frame of video
45
+ # ret, frame = video_capture.read()
46
+
47
+
48
+
49
+ # # # Draw a label with a name below the face
50
+ # # cv2.rectangle(frame, (left, bottom - 35), (right, bottom), (0, 0, 255), cv2.FILLED)
51
+ # # font = cv2.FONT_HERSHEY_DUPLEX
52
+ # # cv2.putText(frame, name, (left + 6, bottom - 6), font, 1.0, (255, 255, 255), 1)
53
+
54
+ # # Display the resulting image
55
+ # cv2.imshow('Video', frame)
56
+
57
+ # # Hit 'q' on the keyboard to quit!
58
+ # if cv2.waitKey(1) & 0xFF == ord('q'):
59
+ # break
60
+
61
+
62
+ def process_frame(frame, process_this_frame, face_locations, faces, face_names, score):
63
+
64
+ hkid_face_encoding = get_face_encoding("image")
65
+
66
+ known_face_encodings = [
67
+ hkid_face_encoding
68
+ ]
69
+
70
+ known_face_names = [
71
+ "recognized"
72
+ ]
73
+
74
+ # Only process every other frame of video to save time
75
+ if process_this_frame:
76
+ face_names = []
77
+ # Resize frame of video to 1/4 size for faster face recognition processing
78
+ small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25)
79
+
80
+ # Convert the image from BGR color (which OpenCV uses) to RGB color (which face_recognition uses)
81
+ rgb_small_frame = cv2.cvtColor(small_frame, cv2.COLOR_BGR2RGB)
82
+
83
+ # Find all the faces and face encodings in the current frame of video
84
+ face_locations = face_recognition.face_locations(rgb_small_frame)
85
+ face_encodings = face_recognition.face_encodings(rgb_small_frame, face_locations)
86
+ faces = len(face_encodings) # number of faces
87
+
88
+ for face_encoding in face_encodings:
89
+ # See if the face is a match for the known face(s)
90
+ matches = face_recognition.compare_faces(known_face_encodings, face_encoding)
91
+ name = "Unknown"
92
+
93
+ # # If a match was found in known_face_encodings, just use the first one.
94
+ # if True in matches:
95
+ # first_match_index = matches.index(True)
96
+ # name = known_face_names[first_match_index]
97
+
98
+ # Or instead, use the known face with the smallest distance to the new face
99
+ face_distances = face_recognition.face_distance(known_face_encodings, face_encoding)
100
+ best_match_index = np.argmin(face_distances)
101
+ print(face_distances)
102
+ if matches[best_match_index] and face_distances[best_match_index] < 0.45:
103
+ score.append(face_distances[best_match_index])
104
+ name = known_face_names[best_match_index]
105
+ else:
106
+ score = []
107
+
108
+ face_names.append(name)
109
+
110
+ # if len(score) > 20:
111
+ # avg_score = sum(score) / len(score)
112
+
113
+ # Display the results
114
+ if faces > 1 :
115
+ # Define the text and font properties
116
+ text = "More than 1 person detected!"
117
+ font = cv2.FONT_HERSHEY_DUPLEX
118
+ font_scale = 1
119
+ font_thickness = 2
120
+
121
+ # Calculate the text size
122
+ window_height = frame.shape[0]
123
+ window_width = frame.shape[1]
124
+ text_size, _ = cv2.getTextSize(text, font, font_scale, font_thickness)
125
+
126
+ # Calculate the text position
127
+ text_x = int((window_width - text_size[0]) / 2)
128
+ text_y = window_height - int(text_size[1] / 2)
129
+
130
+ cv2.putText(frame, text, (text_x, text_y), font, font_scale, (255, 255, 255), font_thickness, cv2.LINE_AA)
131
+
132
+ for (top, right, bottom, left), name in zip(face_locations, face_names):
133
+ # Scale back up face locations since the frame we detected in was scaled to 1/4 size
134
+ top *= 4
135
+ right *= 4
136
+ bottom *= 4
137
+ left *= 4
138
+
139
+ # Draw a box around the face
140
+ cv2.rectangle(frame, (left, top), (right, bottom), (65, 181, 41), 4)
141
+
142
+ # Define the name box properties
143
+ name_box_color = (44, 254, 0)
144
+ name_box_alpha = 0.7
145
+ name_box_thickness = -1
146
+
147
+ # Define the text properties
148
+ font = cv2.FONT_HERSHEY_TRIPLEX
149
+ font_scale = 1
150
+ font_thickness = 2
151
+ text_color = (255, 255, 255)
152
+
153
+ # Calculate the text size
154
+ text_width, text_height = cv2.getTextSize(name, font, font_scale, font_thickness)[0]
155
+
156
+ # Draw the name box
157
+ cv2.rectangle(frame, (left, bottom - 35), (right, bottom),
158
+ name_box_color, name_box_thickness)
159
+ cv2.rectangle(frame, (left, bottom - 35), (right, bottom),
160
+ name_box_color, cv2.FILLED)
161
+
162
+ # Draw the name text
163
+ cv2.putText(frame, name, (left + 70, bottom - 6), font, font_scale, text_color, font_thickness)
164
+
165
+ process_this_frame = process_this_frame
166
+
167
+ frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
168
+
169
+ return frame, process_this_frame, face_locations, faces, face_names, score
170
+
171
+ def convert_distance_to_percentage(distance, threshold):
172
+ if distance < threshold:
173
+ score = 80
174
+ score += distance / 0.45 * 20
175
+ else:
176
+ score = (1 - distance) * 100
177
+ return score
178
+
179
+ # percent = convert_distance_to_percentage(avg_score, 0.45)
180
+
181
+ # print(f'avg_score = {percent:.2f}% : Approved!')
182
+
183
+ # # Release handle to the webcam
184
+ # video_capture.release()
185
+ # cv2.destroyAllWindows()
extract_pdf.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ################# cnocr ##################
2
+ from cnocr import CnOcr
3
+
4
+ def validate(text):
5
+ invalid_list = [' ',',']
6
+ for char in invalid_list:
7
+ text = text.replace(char, '')
8
+ return text
9
+
10
+ def check_bank(text):
11
+ text = text.replace(' ', '')
12
+ bank_list = ['bankofchina','hangseng','hsbc','sc']
13
+ for bank in bank_list:
14
+ if bank in text:
15
+ return bank
16
+ else:
17
+ return False
18
+
19
+ def check_bank_name(img_path):
20
+ # BOCH - "Consolidated Statement 2023-01-01"
21
+ # HangSeng - "Statement of Prestige Banking 2023-03-0" OR "Statement of Preferred Banking 2023-03-07"
22
+ # HSBC - "Statement - HSBC One Account 2023-02-10"
23
+ # Standard Chartered - "statementOfAccount 2023-02-01"
24
+ standard_names = {'boch': "Consolidated Statement",
25
+ 'hangseng': "Statement of",
26
+ 'hsbc': "Statement - HSBC One Account",
27
+ 'sc': "statementOfAccount"}
28
+ for bank_name in standard_names:
29
+ if bank_name in str(img_path) or standard_names[bank_name] in str(img_path):
30
+ return bank_name
31
+
32
+ def check_mr(text):
33
+ openings = ['mr', 'ms', 'miss', 'mrs']
34
+ words = text.lower().split()
35
+ if words and words[0] in openings:
36
+ return ''.join(words[1:])
37
+ else:
38
+ return text
39
+
40
+ def get_info_from_bank(img_path, file_name):
41
+ # Running the model
42
+ ocr = CnOcr(rec_model_name='densenet_lite_136-gru')
43
+ out = ocr.ocr(img_path)
44
+ # Data
45
+ bank_data = {
46
+ "name_on_bs": "",
47
+ "address": "",
48
+ "bank": check_bank_name(file_name),
49
+ "date": "",
50
+ "asset": 0.0,
51
+ "liabilities": ""
52
+ }
53
+
54
+ asset_y = [722,747]
55
+ asset_equa = ''
56
+ asset_iterations = 2
57
+ liabilities_y = [747,800]
58
+ count = 0
59
+ invalid_list = ['', ' ', ',']
60
+
61
+ for item in out:
62
+ detected_text = item['text']
63
+ raw_detected_text = detected_text.lower()
64
+ #raw_detected_text = detected_text
65
+ positions = item['position']
66
+ if raw_detected_text in invalid_list or raw_detected_text is None:
67
+ pass
68
+ elif ((positions[0][0] >= 147) and (positions[0][1] >= 265) and (positions[2][0] <= 400) and (positions[2][1] <= 295)):
69
+ if (raw_detected_text != ''): # name
70
+ bank_data["name_on_bs"] += raw_detected_text
71
+ bank_data["name_on_bs"] = check_mr(bank_data["name_on_bs"])
72
+ elif ((positions[0][0] >= 113) and (positions[0][1] >= 291) and (positions[2][0] <= 500) and (positions[2][1] <= 381)):
73
+ if (raw_detected_text != ''): # position
74
+ bank_data["address"] += raw_detected_text
75
+ bank_data["address"] += ' '
76
+ elif ((positions[0][0] >= 996) and (positions[0][1] >= 289) and (positions[2][0] <= 1083) and (positions[2][1] <= 314)):
77
+ if (raw_detected_text != ''): # statement date
78
+ bank_data["date"] += raw_detected_text
79
+ elif ((positions[0][0] >= 900) and (positions[0][1] >= asset_y[0]) and (positions[2][0] <= 1120) and (positions[2][1] <= asset_y[1])): #
80
+ # take a look at the y0/y1 position
81
+ if (raw_detected_text != '' and count <= asset_iterations and ('DR' not in raw_detected_text)): # asset
82
+ asset_equa += raw_detected_text
83
+ asset_equa += '+'
84
+ raw_detected_text = raw_detected_text.replace(',', '')
85
+ #raw_detected_text = validate(raw_detected_text).lower()
86
+ asset_float = float(raw_detected_text)
87
+ bank_data["asset"] += asset_float
88
+ asset_y[0] += 21
89
+ asset_y[1] += 27
90
+ liabilities_y[1] += 27
91
+ count += 1
92
+ elif 'DR' in raw_detected_text:
93
+ bank_data["liabilities"] = validate(raw_detected_text)
94
+ elif ((positions[0][0] >= 900) and (positions[0][1] >= liabilities_y[0]) and (positions[2][0] <= 1130) and (positions[2][1] <= liabilities_y[1])):
95
+ if (raw_detected_text != '' and 'dr' in raw_detected_text): # liabilities
96
+ raw_detected_text = raw_detected_text.replace('dr','')
97
+ bank_data["liabilities"] = validate(raw_detected_text)
98
+ elif check_bank(raw_detected_text) != False: # bank
99
+ bank_data["bank"] = check_bank(raw_detected_text)
100
+
101
+
102
+ # print('------------From bank statement------------')
103
+ # print(f'Name: {bank_data["name_on_bs"]}')
104
+ # print(f'Address: {bank_data["address"]}')
105
+ # print(f'Bank: {bank_data["bank"]}')
106
+ # print(f'Date: {bank_data["date"]}')
107
+ # print(f'Asset: {asset_equa} = {bank_data["asset"]}')
108
+ # print(f'Liabilities: {bank_data["liabilities"]}')
109
+ # post_data(bank_data["bank"], bank_data["name_on_bs"], bank_data["address"], bank_data["asset"], bank_data["liabilities"], bank_data["date"])
110
+ return bank_data
111
+
112
+ ########## Posting data through API ############
113
+ import requests
114
+ import data_encryption
115
+ # POST /api/v1/users HTTP/1.1
116
+
117
+ def post_data(bank, name, address, asset, liabilities, date):
118
+ # endpoint = 'http://ipygg-api-test-env.ap-east-1.elasticbeanstalk.com/SBT/api/v1/users'
119
+ data = {
120
+ "endpoint": "/SBT",
121
+ "apiType": "store_statement_verif",
122
+ "requestId": 'request_1234',
123
+ "userId": 'user1',
124
+ "bank": bank,
125
+ "nameStatement": name,
126
+ "address": address,
127
+ "asset": str(asset),
128
+ "liability": liabilities,
129
+ "statementDate": date
130
+ }
131
+
132
+ encrypted_data = data_encryption.encrypt(data)
133
+
134
+ # request = requests.post(url=endpoint, data=encrypted_data)
135
+
136
+ # def extract_pdf_data(img_path='hangseng_page-0001.jpg'):
137
+ # page_number = 1
138
+ # images = f'hangseng_page-000{page_number}.jpg'
139
+ # get_info_from_bank(img_path)
extraction_data.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ################# cnocr ##################
2
+ from cnocr import CnOcr
3
+ from pdfquery import PDFQuery
4
+ import openai
5
+
6
+ def validate(text):
7
+ invalid_list = [' ',',']
8
+ for char in invalid_list:
9
+ text = text.replace(char, '')
10
+ return text
11
+
12
+ def check_bank(text):
13
+ text = text.replace(' ', '')
14
+ bank_list = ['bankofchina','hangseng','hsbc','sc']
15
+ for bank in bank_list:
16
+ if bank in text:
17
+ return bank
18
+ else:
19
+ return False
20
+
21
+ def check_bank_name(img_path):
22
+ # BOCH - "Consolidated Statement 2023-01-01"
23
+ # HangSeng - "Statement of Prestige Banking 2023-03-0" OR "Statement of Preferred Banking 2023-03-07"
24
+ # HSBC - "Statement - HSBC One Account 2023-02-10"
25
+ # Standard Chartered - "statementOfAccount 2023-02-01"
26
+ standard_names = {'boch': "Consolidated Statement",
27
+ 'hangseng': "Statement of",
28
+ 'hsbc': "Statement - HSBC One Account",
29
+ 'sc': "statementOfAccount"}
30
+ for bank_name in standard_names:
31
+ if bank_name in str(img_path) or standard_names[bank_name] in str(img_path):
32
+ return bank_name
33
+
34
+ def check_mr(text):
35
+ openings = ['mr', 'ms', 'miss', 'mrs']
36
+ words = text.lower().split()
37
+ if words and words[0] in openings:
38
+ return ''.join(words[1:])
39
+ else:
40
+ return text
41
+
42
+ def get_info_from_bank(img_path, pdf_path):
43
+ # Running the model
44
+ ocr = CnOcr(rec_model_name='densenet_lite_136-gru')
45
+ out = ocr.ocr(img_path)
46
+
47
+ # Data
48
+ bank_data = {
49
+ "name_on_bs": "",
50
+ "address": "",
51
+ "bank": "",
52
+ "date": "",
53
+ "asset": 0.0,
54
+ "liabilities": ""
55
+ }
56
+
57
+ # {
58
+ # "Customer Name": "MR CHIU CHUNG YIN",
59
+ # "Address": "FLAT 13,8/F,OILOK HOUSE, YAU OI ESTATE, TUEN MUN NT",
60
+ # "Bank Name": "HSBC",
61
+ # "Statement Issue Date": "10 January 2023",
62
+ # "Total Asset": "7,265.80",
63
+ # "Total Liability": "7,265.80"
64
+ # }
65
+
66
+ openai.api_key = "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"
67
+ invalid_list = [' ',',']
68
+ data_set_1 = []
69
+
70
+ pdf = PDFQuery(pdf_path)
71
+ pdf.load(0)
72
+ text_elements = pdf.pq('LTTextLineHorizontal').text()
73
+ text_elements = text_elements.replace("cid:", "")
74
+
75
+ for item in out:
76
+ if item['text'] not in invalid_list:
77
+ data_set_1.append(item['text'])
78
+
79
+ completion = openai.ChatCompletion.create(
80
+ model = "gpt-3.5-turbo",
81
+ temperature = 0.2,
82
+ messages = [
83
+ {"role": "system", "content": "You are an AI assistant for extracting data from bank statements. Uppercase and lowercase letters are the same. List results in a dictionary format."},
84
+ {"role": "user", "content": f"Extract data from the following 2 sets of text: {data_set_1} and {text_elements}. (1.) Data that locate in the front part of the text: customer full name, address in Hong Kong (including flat, floor, court/estate, region in Hong Kong), bank name, bank statement issue date (verly likely to be within 1-2 years), (2.) Data that mainly locate in the other part of the text: total asset (including investments and deposits) and total liability (often contains DR and includes credit card but might be zero) of the current month."},
85
+ # {"role": "assistant", "content": "Q: How do you make 7 even? A: Take away the s."},
86
+ # {"role": "user", "content": "Write one related to programmers."}
87
+ ]
88
+ )
89
+ bs_data = completion['choices'][0]['message']['content']
90
+ print(bs_data)
91
+ return bs_data
92
+
93
+ # get_info_from_bank('hangseng_page-0001.jpg','hangseng.pdf')
94
+ # get_info_from_bank('hsbc_one_account_page-0001.jpg','hsbc_one_account.pdf')
95
+ # get_info_from_bank('boch_consolidated.jpg','boch_consolidated.pdf')
96
+ get_info_from_bank('hsbc_one_account_page-10001.jpg','hsbc_one_account_page-10001.pdf')
imageSegmentation.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This program is designed to auto crop the face on a given image
2
+ # It is required to change the image into gray format to satisfy the pre-trained model requirement
3
+
4
+ import cv2
5
+ import numpy as np
6
+ import os
7
+
8
+ import mediapipe as mp
9
+ from mediapipe.tasks import python
10
+ from mediapipe.tasks.python import vision
11
+
12
+ import cv2
13
+
14
+ from pathlib import Path
15
+
16
+ # auto crop the image in the given dir
17
+
18
+ base_options = python.BaseOptions(model_asset_path='blaze_face_short_range.tflite')
19
+ options = vision.FaceDetectorOptions(base_options=base_options)
20
+ detector = vision.FaceDetector.create_from_options(options)
21
+
22
+ def crop(
23
+ image,
24
+ detection_result
25
+ ) -> np.ndarray :
26
+ annotated_image = image.copy()
27
+ height, width, _ = image.shape
28
+
29
+ # Here assume we only detect one face
30
+ for detection in detection_result.detections:
31
+ # Crop detected face
32
+ bbox = detection.bounding_box
33
+ cropped_img = image[bbox.origin_y - 90: bbox.origin_y + bbox.height + 30, bbox.origin_x - 80:bbox.origin_x + bbox.width + 35]
34
+
35
+ return cropped_img
36
+
37
+ def auto_cropping(dir):
38
+
39
+ files = os.listdir(dir) # list of files in directory
40
+
41
+ for file in files:
42
+
43
+ file_dir = Path(dir + "/" + file)
44
+ abs_path = file_dir.resolve()
45
+
46
+ img = mp.Image.create_from_file(str(abs_path))
47
+
48
+ detection_result = detector.detect(img)
49
+ save_path = 'saved'
50
+
51
+ image_copy = np.copy(img.numpy_view())
52
+ annotated_image = crop(image_copy, detection_result)
53
+ rgb_annotated_image = cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB)
54
+
55
+ return rgb_annotated_image
56
+
57
+ # auto_cropping("image") # <----------- !!!!change address here!!!! ------------------> #
58
+
59
+ # The current problem (6/2/2023) is that the model may recognize some cartoon face as human face,
60
+ # my idea is to use another model to classify if the cropped image is real human face
model1.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from cnocr import CnOcr
2
+ import pandas as pd
3
+ import checkTool
4
+
5
+ def model1(path):
6
+
7
+ ocr = CnOcr(rec_model_name='en_PP-OCRv3')
8
+ # ocr = CnOcr(rec_model_name='densenet_lite_136-fc')
9
+ out = ocr.ocr(path)
10
+
11
+ #print(out)
12
+
13
+ name = ''
14
+ scanned_number = len(out)
15
+ hkid = out[scanned_number-1]['text']
16
+ issuedate = ''
17
+
18
+ for data in out:
19
+ text = data['text']
20
+ score = data['score']
21
+ position = data['position']
22
+
23
+ if not checkTool.check_integer(text):
24
+ if checkTool.check_alpha(text) and checkTool.is_chinese_name(text):
25
+ name = checkTool.seperate_name(text)
26
+
27
+ # check if the data is issuedate
28
+ if checkTool.check_issuedate(text):
29
+ issuedate = checkTool.format_issuedate(text)
30
+
31
+
32
+
33
+ if checkTool.validate_hkid(hkid=hkid):
34
+ valid_hkid = 'True'
35
+ hkid = checkTool.format_HKID(out[scanned_number-1]['text'])
36
+ else:
37
+ valid_hkid = 'False'
38
+
39
+ # checkTool.print_info(name, hkid, valid_hkid, issuedate)
40
+
41
+ return [name, valid_hkid, hkid, issuedate]
42
+
43
+ # example for testing
44
+ # info = model1('IMG_4495.jpg')
45
+ # print(info)
46
+ # checkTool.print_info(*info)
model2.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from cnocr import CnOcr
2
+ import pandas as pd
3
+ import checkTool
4
+
5
+ # img_fp = 'IMG_4499.jpg'
6
+
7
+ def model2(path):
8
+
9
+ ocr = CnOcr(rec_model_name='densenet_lite_136-gru')
10
+ # ocr = CnOcr(rec_model_name='densenet_lite_136-fc')
11
+ out = ocr.ocr(path)
12
+
13
+ name = ''
14
+ scanned_number = len(out)
15
+ hkid = out[scanned_number-1]['text']
16
+ issuedate = ''
17
+
18
+ for data in out:
19
+ text = data['text']
20
+ score = data['score']
21
+ position = data['position']
22
+
23
+ if checkTool.is_comma_present(text):
24
+ text = text.replace(',', '')
25
+ if not checkTool.check_integer(text):
26
+ if checkTool.check_alpha(text) and checkTool.is_chinese_name(text):
27
+ name = checkTool.seperate_name(text)
28
+
29
+ # check if the data is issuedate
30
+ if checkTool.check_issuedate(text):
31
+ issuedate = checkTool.format_issuedate(text)
32
+
33
+ if checkTool.validate_hkid(hkid=hkid):
34
+ valid_hkid = 'True'
35
+ hkid = checkTool.format_HKID(out[scanned_number-1]['text'])
36
+ else:
37
+ valid_hkid = 'False'
38
+
39
+ # checkTool.print_info(name, hkid, valid_hkid, issuedate)
40
+
41
+ return [name, valid_hkid, hkid, issuedate]
42
+
43
+ # # example for testing
44
+ # info = model2('IMG_4496.jpg')
45
+ # print(info)
46
+ # checkTool.print_info(*info)
pubkey.pem ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN RSA PUBLIC KEY-----
2
+ MIICCgKCAgEAgTkWgzMVHIX/mYOZ5F6GIeZ5WastU7LWNmCSi2kTZQr2OjGsATCU
3
+ uD/ZrxVObpZPw4vvXax0LGkIyDx7QG4psVEKx26IUtvn7Br+CJyATmK2dW9sCkwY
4
+ N4x/67F9a1N8yOKhEvkcBtplphZfqZTCZ3d4VUShBt9gYGlO4odeXZ3cZLm+N9Hc
5
+ MEP6qMIoH1KBNjhcx60BvLbODHkYRup7YAcOh/cOEC/WNkZqQPPYomcVyXat6UKS
6
+ L1Vf/s1RnhOStu4JmYS1se39LRAxKI+xADZ7D+y7bhcBGykT7evEPGCwUAh++y6y
7
+ Wolj9HS5oIkcxq+Rj3HLlm7ofDubeBpuOWF2xVh0jYpSFHUYkVChssmfb0WFwxrt
8
+ YQj8aqX2C9taoWQpHdCcANJSvaM1YvLRPe8pHRpCjm/BrvxddxMNY1gCWpBCP7ym
9
+ WAuJShb/kkdDnQ+exS9n/UbzRMzYoHnKroQL9CPn26mbzlEO7mMOj1h34rQZeTD1
10
+ OAFEC1JFBL8LCMRkh+RT3UVpHTSFn/Oc2Gq912MivUrHbeK5Y8lPZOrEmvvxeqDB
11
+ uOPOMpkh9LWEoGlO4GLvnMhhbINt1OnuUIRCqvOh3jXUXoseVnAMWv1QTRyreq4h
12
+ d8GlMUR5U4dmc2XHncy1riVDVV4FYSAL2N94utNDgztKUkGL6i2Z5AECAwEAAQ==
13
+ -----END RSA PUBLIC KEY-----
request_json/__pycache__/sbt_request_generator.cpython-311.pyc ADDED
Binary file (4.72 kB). View file
 
request_json/__pycache__/sbt_request_generator.cpython-39.pyc ADDED
Binary file (2.56 kB). View file
 
request_json/request_legalDocument.json ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "request": {
3
+ "method": "POST",
4
+ "header": [],
5
+ "body": {
6
+ "mode": "formdata",
7
+ "formdata": [
8
+ {
9
+ "key": "requestId",
10
+ "value": "AhuqYhSgE9IQGpGeylwlD+zt9Q1o9typDou/GI0AIIxkoLL4tP8YmA41oEs3iz0UGtw2NzMjGSi/rmZogBuie8QlhOaEER+mysk/JRSj1YmMMnnxrfTBAzaa/FWF9MQ1OwpHbQc+TJuLBnuW/HnrVB/uMcl2klk0KyxD/rOgCRGW5W9ANZuWtBKRoO5ZuTKvcB3uGRc6h2iPuarUE6OcPyRL4byn2fi3ZAdAo3Uh61nCy090Gywf0qQRl54GP9uqv7R136Ilb2RNwE6cccC0F+C43eEXdnXkMdK3+P/ZNvNtsNTOMn2YfxhqaoxLef25dhukNWwA4k0JfR2waMXeMPrUxpzlkRHtN7V3os8q12mPvV2h/CSZelDw5GZWlUZW3A4rKWnLSX72/T05e1LdJ5mplkGPhSEoLUlEOkDOPmODBn6euy4mTAwAKUYLlVgbaHuG8CAMuKz3eQ6wD8RKajJF8+Y7BjnUH/fWZqLE6+QtiHjxGczKv7wxzD4kfNTj1DeZ9SHMf70IBb91URRMET1n9DTsq05FueI+/qm6VvV+TG1nbxuBBEC+yyTkB1Zjc4wxRUuqfZ7Rql4f7xfgQRYn28r0dEmgnGluqDMp7eDL6Kl1Xe7Hm0qblwf6jB4OxYY+sSCqhyIevGYJZQ1J9NwAuRhaZrwd4EwWs0G0efQ=",
11
+ "description": "statement",
12
+ "type": "text"
13
+ },
14
+ {
15
+ "key": "userId",
16
+ "value": "XV6W6EroodCeMfcuIt/X0Cw16NEHYj1ntBRHvKBxFHxroIGW4bzjIx/Z52zvOYhCu8p9iFODnq6diMZ5YjPmix34sUauhAU+Crthkh7pxa0nYsZs+ZgCGx3gVaPN4z+MmKA8PHofTZ0cH59xXLL0v5hwtVEUmQ3K5AH/7bUYl87e5J+75ml9JeoI+a/iPkAfTvUk8Q4hEeVhc0TWkbRT+u9kzMD0Ej3tPDgHILw/PifP7LYNDi1VZ9sHcaPA4irRztVwOR+9swYd56BZ2ROeN4zqkd32E+Hy8EcAGguextnZe0gWcZVbuUF/M/Jt3PSYua4rJTvQaZEeK8CKOrdnby3ZEYnh5Q7meD8zd+H0myTB9xzIi+/03gqDAbF68MTPfA3Ur8c0TE3vIBkOVub7YM0hQZMKgjEbyUSWlkxqjpYgR/fTZHxd9nk7zSfRAHXypuiH/GyQf0v7k1hbDmCeSL0995iTFM00VS3nj/Ik4b61ioFALyluxrMO/Mz4UJOJD5TlrTIgqme0ibez0tSb7OReHxGa8X2OooRYftckpQs5YxdO5ifQleGg05ZV/oUP9kzW6k7SvIIqyEuRWgAVbMcm2AJcJG5/PZR7CK2HiT0thK6wHAdKxO5gsRdxvC7tk80SSWJ892nIBxp77iY5TAnbBu5cNYBZcKiWtu6U8RM="
17
+ },
18
+ {
19
+ "key": "endpoint",
20
+ "value": "X5Ut0e359J19WgOjS/N4ui9HIr7ol/1cMhuYjB7BSiquCG/xt6dvSVtM3mbPD2cu+7FN/EoM8RNOfTq5kXO/8naiZ5HKc+LPcZfrLJDPrL33kwyw4uKxY/NEtQBc8pskTcSH5RPBX1/6Xmdu0Nb29GtAJqyZk/F65xPBrxY11W2SJlg8s58R72F6Y9urQTrankAa4xt58jL+mo6DQJjJLw+pLu5RiYU2lfp8OyDPAPfpRfMvmHT11sox1ia5RcWeKosQB0AU09JWxcbtpNYQZP+evZElkVWtOkUoflR+6ZBppjJwnu5QyTZr2wKASTmG2v2PTqeqbtns2Hagrmr8DkLUH/YNIcU7L193ffaYIoBGq21d+ysU80Glri1Xi1jO6jmJye66ansHkSVi1CPRiZPwhKB7PNo65VXIA2fYaGMnAEUOfRfZ8/XbhNOPOUX54N/gqvmiv/IQCwHvzGyHpXBJN3yFheSnk7T4d/prz7ginwUGnBm3R/+IhWvqIg8owxMqUayVmZnBVJ6UuTHDYGmg+lZ9A+R0eOKfSwl6W3uD2enKR1XBC7PkLZUA8hUdx4ZWEbkCcR5VnmsN32iLqVdrX8EdTl4kFkD+fHH0tjJT1RpL/9s8J9V7UPERC+V49OlQYLOaxGuQqHxp5KAhfGJjXCCzvo/Ikj1WTs8zvgg=",
21
+ "description": "SBT",
22
+ "type": "text"
23
+ },
24
+ {
25
+ "key": "apiType",
26
+ "value": "OZoO3DjuNYXElX/9GSRKvvnKOLLK47Ps0q77iX6kvEABqbvpDc0SZiSaOLYyx8ZpWFbhO0jzBxCCVdoJsC1POxn4hjYhc22zhSKHL203YZ38ROIOJzvB4vYoBwy4DjLXQmMf8XbuDjug/tQ9qCCI6pLIaQSvlnAh6z5zjtD0ovqN6BGX+UZxwvheMkXI3yv35NGE0HWvCPwAj2Xv6ZJ3UYATv60j9nZjH9ih6+P8WgtDvDXdzlOFmjwPGiFMvzLSERkhvoFxq/liaXSAGORW/gBpfX4Tp6Gf6cWz/5ochu9XL9ojwN/JXkKs60iN8Vrha1nIjF5SHyFhsI9y6sVeipU/gsrAKETmE3CpF6rFBoFDm+3VuZnKDFFDQ4U+J3f0UFOjj6diUrzOiq4nH6YgYLP7fn/mDuSweIVF53/vbc7PPbpgipy1K9xqlaRw9uD6vwew+NZOm9VwuqSZFmFdlch9yi3FA82dHN1Fb7HvopUpBzvSVXgW79oyIVZwzw3ifng2rxqoBbUTzg2w7qy6JoWIxoo2IZTYlLNEP+XEUpNxjyCLtuRgoliYowAZABVwBQEQufqAqrZ9OrjQlf8y5Ar92tgfW25BZ+BgwVW3ILA/wthWWHTQvMiEEZ03BvYOXDM7vNrZRvlQ3Nge9LGTmsQimslxzViTPHIYL5gES6U=",
27
+ "description": "store_legalDoc_verif",
28
+ "type": "text"
29
+ },
30
+ {
31
+ "key": "docType",
32
+ "value": "aGuIy/yK6V76wmGj/glbbEHn6j3CyEhmOJdqRsq9KIx8W8lcTpOLe7JlJIV7guBYBBOyno/4rmLWXxc6f5VaMIk8e698FiaVpGtxtsXT7sbXNPH1lEuCLKDVxpl4TxZGTsEVfOPQ4Gg9Nnu1VwamJvYCODuzGUKjgFQdq00DSIDAmBWRYSuYRdM6qVTk9lo6rtqjO7N8yw0iZQBIa9C4p/hDnzbrojcYhIadrXo98olMe4qrm53Qr3+B/YqAehu04Rt0hNbA/ZLKpsn9GZul7w1H26mQUaDPWApKsCvuzny523r6ejqpEDRDPajhjOLaztJCWCWy1yIXGq3SScxK3Iy7FCWiDc9frvR0G+ra0ar1h/KKoATjm4cjkvyRhHRvT583MjPWWFshka/prRU4Gaoff1v0//qAicN8wPjh4mhAehznJ3XpSU0GWwbYCmkgDbVnx6dCh43j6DYAYA24hnbRtbYLcJoYhasQfCbWeHs0BrXFe2eN7zyIRlrsHCjwILkrVlrZC1bhLEC7TcaV6GGDLhVWTx0+KCT50/yZb159xNXjksK8PCqh4W9afAu0cMtgAQT/V35V7zTpmRZRHn35dPlLYR6KtAUTxR7XCd32wLfyUPdAFYaLA/Ks5psUQ7SChRRhPh8k+kUUOA2uxI95YMyD3tjX03Emnga+0ZA=",
33
+ "description": "HKID",
34
+ "type": "text"
35
+ },
36
+ {
37
+ "key": "nameDoc",
38
+ "value": "UMsvHPUFARGpvZsFGFIjZD8VQ7ft54rizY2klq/nVrnguWfgIMFcKYFQ9b31UYj98l8f7v5s1h2rbY8thlvKoXcS60heeW1G56LKcXnJ0UFOOLJdyCY9Jbrt3gv3EkwrNb+GMaeh93dFmO5w7XSMAKtFyjAESpO6E+Kr/T/U3VQL/TctMOeGOKN3Cv4N954Kxg4mSetAJiYrPhNJnmewFKQRtawOv9Y5YKY7fhLDpb4VXpFeEA7g92KqgpSmnXa6AkJhhphkshakpgbfVeLU2y5n6YDnv9BBQua9CayqMU7rnI9rtal/SubT0G+HEdJyyzhu/ZmD1wm3BO+QWIieCyU6+GO4ymNNSugb8pPOd+l+e/ritxwCmvJCWeDB+qKTJSvkgBNLG2ICS6+SrQMVFgwmLWbD0oZO3ru4oY35g4akPIP6BkEZ++P75kTOlvKWkZe05Yy6DqFvIUaxicaooTOg1NWxAKWfEizyZ50scGP88pG4+XLW3IUGCniUA78j9Z4SkPnVUTrKR/RnJsWN4jdSubN/loatYhiJZATYjlQrftRM3NFFpmfUcm8wIErT6mBMDb8oT/n2n0YOsld1nQIXartLn7wM0egibLkuihOBGlZ+1lasnSBRSY9YMyI9msUnA9bslu+k/kga5qIp0bMp7f1rEKcaR6taS8hW5gY=",
39
+ "description": "allen lau",
40
+ "type": "text"
41
+ },
42
+ {
43
+ "key": "docID",
44
+ "value": "Pz9QeLnCZxZzG1qb1F3FQ7Dqy0Dbra0Z7GTVgfk89nQz/K6Ui7MXuqa6WXNKc0mDYEPmpuOmVu165GYs9Ws2+L7yXrnwcg8mePs/MpwtdvCr6YYeMQexgOzOA4oFjjPMOVMP9Z70znUQeJSVGJcaazswagwp8gRIyb+fhbNjbDY9+aPGKAxzlzlzhTAp7ZihWg7XW/9kRLKrGK1EdKFEDUq1grZpszDS7SwGYME3nCah753JNU2Zt2jJn1yd5Rs+OeSgSuByR1YEboK7MyLnxTqlU0DJvg5GNs7L2AGOWZtFPjUdMKUuPw0teaAUtTC+sms5/vHghRhIzTIr6S6/5PcId3TUCC+qGphIrVf4MqR9wQoJuU4RDYMUSf2Hgod3yMPe7jcokJaug17ToukriaBVWJ7nZoBM7Wa5JkD7wge4qNeBivS/fytikcWsMK36TpZPd7cEoRVn2lmADXJEl4DpMAMUPqKILl41wjXgPvIs5YzfoBC45yEXn6DpsKyU4v7RnnpRT6PnGmKSI5h9H55jFS8vg4f3GEMoXnbRiRn/QePx4W3fBNh8Sfh4oEtnbtC5tGxVhDrPxlxSK1ySXNlGLEMAQCaCeQidKIO3gWgBdJyRhgWlmuexUh5N4jrNlnRgwbUkxhgpcrsweYlNiJjhGUOULABDFJjLSAd49Jo=",
45
+ "description": "G908833(1)",
46
+ "type": "text"
47
+ },
48
+ {
49
+ "key": "docValidity",
50
+ "value": "bp2OFdPn7t7YfIaIKIyzt3jBFjPNb0WtxyzzzV2CX/R1shy+/Klx5yEQ60Rn8HbcInh9Dopbp3K0JWB2rPvjyYhwassyVjsbGcGd7QUJntL49XYnimd4nLhCsI0eVYNDFWQk4lfzgXqH6GD7V/8xxHZdS7FnWp1zf82s3m8PwqmfTD2XartaQh5GVf2woin6YhCN/XuPMbcZCxhDXOedDz5hf095rk83jWyMnn6lsC1loeOGoVza744chQXRph3XWAXuP2m2ZG3zMxMxomGL2AmZj5tmJ4DNWCPP4Qthi6ZTlSapmb05xMX47xWsSZA3Xd5RIlbVd+Y1iNR+7Qs6oaf0qR3UYaV3BA+Fh83StZKLkkZK9bk+esuRvHQa0Q4aqvBa1S3YAF0soN+ba/UM8+AmlMs2OSzFYN7fv8VR1Xwclw1NJQqvIg6aZyFZdk2RbvmNgkedsuxtye21yawHpsXtcHy415YR8wn5aefsfb+DAwnKZ/6gZUlpCKb9iZ+9xlzXocadLInCKBiTyHYMtxj/VPngdDDZQQZcJlZTDr0ZMq2fdmcYZbPVQmX3UVaa2Zu2Uqlm85pWH1EKvgxoYSWx/FT90hZU8r9DHfjOthOJwwCgAe/mvpNDs/JzGMYUr89qOVNtDycnkJvcC3OYwvNgElSL1pipXS4KM/3W3kU=",
51
+ "description": "True",
52
+ "type": "text"
53
+ },
54
+ {
55
+ "key": "dateOfIssue",
56
+ "value": "V1Qw8e7grhgy7kQxYTivhMYnBDixs6KOe+wE55uJ4orkrP/0sBjZWtty7bUYZ7wBzg0bujLHPPmmWRnS/UqSwhYr4D105b/J5ZY+w/Q5g9gKkWEPnWcJPtJ4ATt1KxvbCFN7AFT67+cxKaGDf0VE6HBPuNbSufOJ/55X4Vaslb/TpTiyXSSg8I29Y9vVfZ3m9vZwtLAIGgLo/HCoEuSfx8a8ntKqJOr3MPjOTN/Ml9kISDpnNqpsYnav9ZXCVfMZaTxXVIegvct2bJzVmf1gAMXry2EpDOocjugfAxY1ODEFFmXiHHIRDNL7NMy8XeU4iOQcTdzcTKlmUR6YYGk5pHOIYaBZOx56ge/EIP5+D+0Lv3R/KoY8GJllFtnK3EO21GrT6sRLWB22CV+cGhN6xkSCl+wIMN/X625p1zf2kPo0hwnOHPsCWwzoWIDUIl2K8aBBPckl190da9Yb/8SPJEZrsFzK6JgYMOGsTQ1J7+Jsn6KXV6c0Mfb3SAoyREIpZwJQKWHqQOHJVJVcZ8wA3fHAQKsUxSElDsIfxI0Jbp3WvqO9pIdAX/dAnEmtw8ajH21efx8vWlV+8GtRQ5QCLhqF7ioPMLQt2Kkl3kvs3FvbryzME1+Y3Bomz1LaS+gMa4s/dLcG+Q1pBSUfP0WYMFjXqCDMkPLZnhJycGXx7+k=",
57
+ "description": "07-81",
58
+ "type": "text"
59
+ },
60
+ {
61
+ "key": "matchingScore",
62
+ "value": "OjG09vdihB12ZPb8C3oqVcQMhjMbBSIRjKtzDzr96QCDodSx4W9uIjn9e6zWHW7RyOXbm0hiqKae6d8ZS7hPpqyIF7Yfl1OAfnuEkQ0WfLm8yuUvCA4oFSsH86gx1EYeDUYM44aZEJ5qqPK/IUKvXsHSGgsoIZ0QniIHn71Q1K28zc79iZ0UCkkRhhitoF68JNY0Qik8hiX6ES7yiU0daq7vFIbw0Tg9JLr8fpw/81+Fm7zVfhAx5T2LT2cag04da+YLKSUIjsksQ/CCiRQSDRfdbOdZ9Os0tiXPZdYoIU/dxVerlUJLlmiMYYnVte4m/8pMlw57bc8oKE+qka83R8E4hH9Wu1uOcWHjdPYyUJdunyXByXtM5igrUHgmLvvUQ7eBfnrK5+HZlhHC9tpiTVvcWJd086lS9/hi8UPp0XgSc3h9TcQU2EiC2rgnC3PbPIdjo0Evb7M9P2T0xlA68Na3uW94hBoHzoyS1VmLFTo5alR4LteFBhZ/sCeMp0m1LYs4ZXUOCS85FRwK1x8WnomXpxOAFvBai5JwiLLqkNyBG90LEKQRFFyqk4dceBjOGk+YIq2fTWIMGNTNrtIltV7tf9GJS+LwLBDGsofieYceGeS6ekGDgnNXQJpIRTLccD6qX4FNl+W2K7g1M0xVMNeJ/B7LV3Pb/7uD+s2ASyo=",
63
+ "description": "0.957",
64
+ "type": "text"
65
+ }
66
+ ]
67
+ },
68
+ "url": {
69
+ "raw": "http://ipygg-api-test-env.ap-east-1.elasticbeanstalk.com/SBT",
70
+ "protocol": "http",
71
+ "host": ["ipygg-api-test-env", "ap-east-1", "elasticbeanstalk", "com"],
72
+ "path": ["SBT"]
73
+ }
74
+ }
75
+ }
request_json/sbt_request_generator.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import os
3
+ import rsa
4
+ from datetime import date
5
+ import secrets
6
+ import string
7
+ import requests
8
+ import json
9
+
10
+ def generate_token_id(length):
11
+ characters = string.ascii_letters + string.digits # + string.punctuation
12
+ token = ''.join(secrets.choice(characters) for _ in range(length))
13
+ return token
14
+
15
+ # Examples for what will be generated
16
+ # 5!bA9H2f1q^...
17
+ # Xe7uM$4d9@...
18
+ # &3yTb1*8Z#...
19
+ # %pWqN7!6zX...
20
+ # @9oV!s6Rd2...
21
+
22
+ def get_today_date():
23
+ today = date.today()
24
+ return str(today)
25
+
26
+ # Example for what will be returned
27
+ # 2023-06-29
28
+
29
+ def generate_request(data):
30
+ url = 'http://ipygg-api-test-env.ap-east-1.elasticbeanstalk.com/SBT'
31
+
32
+ pubkey_path = os.path.join(os.path.dirname(__file__), '..', 'pubkey.pem')
33
+
34
+ with open(pubkey_path, 'rb') as f:
35
+ pubKey = rsa.PublicKey.load_pkcs1(f.read())
36
+
37
+ for key, value in data.items():
38
+ value_bytes = value.encode("utf-8")
39
+ encrypted_value = rsa.encrypt(value_bytes, pubKey)
40
+ encoded_value = base64.b64encode(encrypted_value)
41
+ data[key] = encoded_value
42
+
43
+ # Write the encrypted and encoded values to a file
44
+ with open("sbt_request.txt", "w") as f:
45
+ for key, value in data.items():
46
+ f.write(f"{key}: {value}\n\n")
47
+
48
+ # posting Json file to api
49
+ r = requests.post(url, data=data)
50
+ print(r.json)
51
+
52
+
53
+ def split_data(data):
54
+ request_id = "request1234"
55
+ # token_id = generate_token_id(501)
56
+ token_id = "12344321"
57
+
58
+ f = open('data1.txt', 'r')
59
+ with open('data1.txt') as f:
60
+ data_raw = f.read()
61
+ data = json.loads(data_raw)
62
+
63
+ if "avg_score" not in data.keys():
64
+ data["avg_score"] = "0"
65
+
66
+ legal_doc_data = {
67
+ "endpoint": "SBT",
68
+ "apiType": "store_legalDoc_verif",
69
+ "requestId": "request_id_id",
70
+ "date": get_today_date(), # a string
71
+ "tokenID": token_id,# a string
72
+ "docType": "HKID",
73
+ "nameDoc": data["name_on_id"], # a string; lower case with space separate; e.g. san chi nan
74
+ "docID": data["hkid"], # a string; with bracket (); e.g. G908833(1)
75
+ "docValidity": data["validity"], # a string; "True" or "False"
76
+ "dateOfIssue": data["issue_date"], # a string; month-year; e.g. 07-81
77
+ "matchingScore": str(data["avg_score"]) # a string; e.g. "0.957"
78
+ }
79
+
80
+ bank_statement_data = {
81
+ "endpoint": "SBT",
82
+ "apiType": "store_statement_verif",
83
+ "requestId": "request_id_bs",
84
+ "date": get_today_date(), # a string
85
+ "tokenID": token_id, # a string
86
+ "bank":data["bank"], #
87
+ "nameStatement":data["name_on_bs"], #
88
+ "address":data["address"], #
89
+ "asset": str(data["asset"]), # a string containing only numbers
90
+ "liability": data["liabilities"], # a string containing only numbers
91
+ "statementDate": data["date"], # a string
92
+ }
93
+
94
+ generate_request(legal_doc_data)
95
+ generate_request(bank_statement_data)
96
+
97
+
98
+ # demo structure of the data
99
+ # {"password2": "chingfuilau", "username": "Allenlau1111", "password1": "Allen02118173", "date": "2023-03-03 00:00:00",
100
+ # "credentialId": "testing123","requestID": "test_statements",
101
+ # "userId": "7893456",
102
+ # "endpoint": "SBT",
103
+ # "apiType": "metadata",
104
+ # 'tokenId':"500",
105
+ # "ipfsLink1": ".",
106
+ # "ipfsLink2": "..",
107
+ # "ipfsLink3": "...",
108
+ # "membershipStatus": "1"}
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cnocr==2.2.2.3
2
+ face_recognition==1.3.0
3
+ matplotlib==3.7.1
4
+ mediapipe==0.10.1
5
+ numpy==1.25.0
6
+ opencv_contrib_python==4.7.0.72
7
+ opencv_python==4.7.0.72
8
+ opencv_python_headless==4.7.0.72
9
+ pandas==2.0.2
10
+ Pillow==9.5.0
11
+ Pillow==9.5.0
12
+ Requests==2.31.0
13
+ rsa==4.9
14
+ streamlit==1.24.0
15
+ streamlit_webrtc==0.45.1
sbt/deployment.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from web3 import Web3
2
+ # test userID: 1001001
3
+ #
sbt_request.txt ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ endpoint: b'O5LDGUcUb6knu8lOvKTVd5EmjI/9yH6P4KraYx87dQQqJxJ2rmKUgk+qxavAJMFOWfhEuMwZsFB43Y3A+F6QoqBBXZfj0e4snOHWZKXEKeKynlmIR7c/Cy6bEK9oUrkrSOkPd5CZZE0/BswZMfh8XidW4GQTBo6bybHkBpSKYpNkS9W8GaJLWqnDEEKFI+KSS3gsE55PEOFvKQgB7s+icT+GF/2l5vOWKqzc8pHFFH9nyCF63zRLpAVe+ZEB52dAV148SJNlPNsr4ikQLOqOOPCYF35c5lbwGyZPbib0+6pc9Z4evxPUqzAAsrSmkwUr/c1Rqg+go4vnhJ3EfSjGbuMAeMJLdtLNzglYw/93rKYxG0Jc/w/4RWS98PHPGLA3GEvoD30leOfwS/yO8N0Cb7v9u3XwqIy/FNcdDOBii9GOmXCLJn6u8KvbyfhsBM4MtiqwGuPIBB/Wu9AVVqJGGT85YgrnWBnJfoMHvYlhEM3xEe5jSkeMz+G1h7vXye2wdKKc7jmt748i/8QRZ4TsFUcauU2V2OPD6eGcrjg4MKma0Nu7UKQyUJKXg9azDdn8YhMqzfGQ8vkoj3hQJNTIR7aOyXaACjqEpG+kWC4mCjPciYGowzp8AmY+QK+KSRjwpWJr5uvBCtqFH8OwUZuS5IAaqMbrBsufLdh7XIF48cc='
2
+
3
+ apiType: b'Tg6rz5RJnLaseo/MDn6Dz6ui3MG2YKQYdbwteCT9WdN++AyLXRnxGepXuR1i5oEqmTpxdZsA5mUnJBEjIh6EDJAXgd3jG/0Dy5p7TGe62Rmla3EGOkdjHRNWBSoXYOeivLxdH+lTe6x3+bzc7/cgbDjdJ0auBkid+avLZM8KtV3vGtOjHey3D8I+7foBzALse9TI71LHThKIg3Hjeq6dj0SLkkaz/DiLFS90JXa9Ip5NZ+CGtJ5qmDLfqjBpOLLZ06aXhEoLgUVwk4vf/d5DAOxsRyhPfK3wao4UwaVtj+NO3bR4/tQfCQ2CnFNvrr7GQfXhqGx1Mh1yOoyClZPCj8I+r3hYYrmofAR9PQF58qkmuuxAiBTqNlZHyQGmbRDOUaNBPy2Tdr67MrM43zmAFysT8HJBUCcLXWv0FcGKCqFv941usFuboOaoRH9ib8hRqvt757arQPwdmIxyayNkOWgWF3kQTt9iSbleWk61l06EUgdX6qOnAdEDHLJxPYGdrGHVwuRlT7Sj0RgkSviaom+0xcU/T24auDgZj/OjMCibz/QrRz1Ap9RMnteBGjQoYXiIhyBPe7p3Q/6h0PZCyCqSH/IDj4I8ar0FNtynkuXKggr/GWUNV7xkLS/xujhXuziBwTedR1LYdmth99h40CzamqmYL5cj13WtUHfmFk4='
4
+
5
+ requestId: b'Ew+AqWhPC5HmyJnAfltVVOcmDvYMRxV4dmBxlel2+AFQmnCVWY2gTz3KrTnvGan+xONkFyDRQV5Xcmm9coTRuXLqNHxQcNz7Iu/m8ipLUrUr2Kofa5/paAcD5x7I/nYgb7UNQ2qWzpXo/vfw6syY9vVK49ZhPfi2tLYY3gEqIQojQvmkhC3qzTrbQqKSQq8jWQ9crNGiDvrfHhPfPG/hdNhwhluiznhTFGJkyeh8d2lVwUtqKN8MMSinX/Jaqs7hyS3FIGILynPwgyASkKGLPFpqAmfemDPkbkzybHMThKRBcebUPdIMpll88nT73vBwslwr0bTvat4wOZQHjtVs/JDzWXnYw2zO/ljWzcl4RSNcJzKulUhEV6NInIolRQ30mTvxihJIojOT921zhlaHx6fp3FP2Qw2lHHgtAvY4L10R1Qxtmy0Xm7vx7oELrFjKZN9+w8h5ofvNqHdkE7HafF6tYiIPm0bWaWM1y1Z7+znrr0at7/DfEb+a+aUchC5xZ59zywi5cJCNGl0f1dEVQ8Ywl8+PMpb/e8PkEghl1MYXecbp++FhDbWK4Db5OL6+9n1WDSEkkaZ7wwo/SdSjcoSrIbTeVp6BSMTryMozADAWt2ETQzMPI3ts/LVBIbae1Qur8zb3tbx8H/Yeg8K4yLzho/PXDQnM1UNdR93uRps='
6
+
7
+ date: b'CuIu6PiwXFE6zH9VRTYTmj0zht4nKQtD0CMB+hflSl4NhhzEEodA0ULRu5/ah+u+Uw24vuK+n8YrlGGC1m1j8s6axbHMwjJnfMY7ZmDcDr6ITHINKpjUpD8KSunkjNReqhQCsPnoyPFTPYc6lA/oHA9qURrE3x5OeaL3Bt9iCwTqLFslHOJzI4E6hPDoLUl0qUu7WDwYKN+eq1tsvO/eqe2Eh8TlYGCPXvipXIu3pHp+7blNmeUn4wqOEIOnUUwvQPRNemKeKZWaO/j9kGMO0T3d7CjKE6MlbPJbj0EfCEXM37qJyvQ4r685CZwcC5Xa8AkiYarbHg0adcdCBHi0GECAUzKvlgEqcTolHTTgZ5orFNmV9K6NYqteXkbIgPQiroHDImcWcu9tjTRsmkuX8eCUSUlnBB+1qD1ggHIrk6MbJhAyDSNl4+PPm0E81z7mMJm/Ho/4ikQANz9Y9cvXLhYVVOkwmzaD1RW0I9rSnGtmW2elJVvgOQfi2Oh+nHTFzZs6UzuiG0F5JrbRnT3A8AxcWbiK3mwM3ne+0NKiF+rMSC6HsnDpE7KCNpg1GtGNw/ChXBQBkLBCxiBA78FkTmG+n4UUbC7FpPPPDZkR16o/CjTOVKI7691EcxxTU+jYPpxkB9Ul1kBZcA5ayyOL+wC3BiXi5CqjDcCbwmFivRk='
8
+
9
+ tokenID: b'cYLpI2tweuOUayPSzS/SIqWXPOMbpJinzXNTCBUo3Ew1taoLmb0jJQb4VKk1iRrD5CjnO9jS7CLyFJ1OLGsTNHyiN1dc/P2A5jGbCKyq5OOoKVd+F6IamfHy2YMTGjb4nDP8dMfeUlz5R0FYx3yiuQ3oY6CHYAm/TKYNLaSTu5JXpC4FrVlsVnExDzgsAlMZN52qtVNgKyaO4dC1NVzdlMMBA3Wqiu1+d77fupWl/mQTqWSw5BR2pbe4yZYEBskjW2R7d7xl22UxOOar2tY9yRYKbDB1K6M/00Lm9ApkFCXruz1K7/kbz8IPi/p79MSXpdBLQko4/Z5hxZ13+Jc6QeAqcgiENC5wjYdeaJMyYOGvCc/6XpFq4Pso3D3y3j/qytm/0S4QW4/OWwgQj/DggU9HU55I8HxP//SfHbO6CBVRwB86ym18YV8Y7z5pnK3urZBQYE3iVSfhB+dj2XOzAE07uKHNMKlouKzoBVq5voaYVvBFf2odVFDRbsoa1chJxGcr08dARLZ6lt8ZyKsXqNj+EKxVAy0bHOU+F4ZyIaQyVg8p2jUyOAH2WZwlsmJ8yg0f41ddlyHwIpurCuRa4L4wFlRblmrSH8hxjWIwKfqhz5CJSknvQGYksfzic0vv6nVRXMx8jcdq/HGNn6iiPZw4hluVPIwF8j+h4zsA/Wc='
10
+
11
+ bank: b'LbIOvIo9qYAb4jJjczhspqbR1jPArtkyKqfE2oM2LuA/d1oZTGf1VGgQyCt5T8m5UAknAZu32A1eCOkPx9B0xLW5lYuEvnRgeyvNW46FM4Fx/iFrLns4lClV2ivqJ+cWV2BV57u8p1Tj0OoVbVPSl6v0T+Pxg5YFJvNjU6+w2lRL63Q4WiKFKVCHfdKbxzkfCE3UtK+GTm9D/d28O2ql9lRBFFnsBvRCYa9MhWSx50CNv81kC1J0tELyejtVxrlnW24SVVt+S5v6kV70TVHYfmkoeh5gd8XJlGEZ8Ww30UoYsos5pgaywvgGhcKMtHF5gI4cAY9Q4xlcEpUcyFQSMB76+HmX92fqUxAfVJRCG1iUrFP+vswZ+eWvx0LCXG8Wl7tYSWzBD9NmKADiam00ODV1f/woWFp5GPr7eupUSPZhI72/I3s4u+m3bvtFFEFJKIeCAX1QcmZL6r3KKsp6PSDR4G7iCk2lbhpLW2icasYy93ziDnUn5FozgPrnjasSRxDB83yOqw619AFHMTlFeMfsFEKAz16JzBUz0Bi6PujEfKZ/QDLFK6FyNHicNaAPtpsci4PlMLSvoOVV5Uf+lIj2yBNYyMfSYda4wYJ+djk8GhguN+Cx/UWZcnTP/C8CS0wk8/iVVkSk+oAKM4esLDDgocl4AhbpuLbfB4XIQ9Y='
12
+
13
+ nameStatement: b'KoojlgYLT6dN8GI2DB3TDX8QKwZXgGYZUjX550icBht3gFn6hkoejR4dP4TivuFhaVb1muER0QTyjWog5N4yT9iWJHpfXV8j41SrFst4QcJO81yoxTY4nguLg3Ie56RYk6FQgz2dnKx2WPs/M+Hgc+d+mGlb7mjPtmD4zFVJJHyDaVeUdWWZWVVd79yIbFnNleCWxPjByfsGaDiIGdkoZApiLFknBn46q7/JR+McJXYfPajPZZK+pOExGHj8TKPS9nEuz+73POX1fz/faRWGYDtqIcpkoi1VoSKY1cN6G05NM1N2KAOmxPw4rXy5bYWhNNWreMQNH+P03QQQUiJgo94MMph5aL5G+dc/9ei/GLwZe7icMu3/O4S3y/ZTwOwt8nHu1QQukKEJxzvr11YChUGO/Hz9c3vGvxOvTzaYJhR9bSsf9tB0iVXtatsk6Ng6kOCFICatLFd8VDOxqiVzzW8gxP31AV0xsInDKbsjyOKdIvwxPwFX2+XEjWQ+YyTFuw16DzJniCPP3+eFN0VBK6PBf6OgeEJbtb1yM6hB00ny5JWoau55LIajHEV6/wp3fr81V151+0UdXzTtmmXCy+pTS0gqvc4Nm2O9Sar/iNpiwpyJqd5oxDb/RRcLExnFBGo21vh7vILAdR4ajeX7PPZhVcYeAMMvwOcqSit8G+o='
14
+
15
+ address: b'bj5oVnpsoxk2n33yVxia8QhDEI8fRxKMFsLvMy6EaQNKwolxrp1d2at4t+u0CmvkmTfCcgYd1Wr9oaJ6COmJfJNIwgy/ftZL/s+6PMzfBgapt0yDSBSDM4vtRIlIKnz4+BXCSDwwpqpCMo5J0OdEJaa4K3qf/9S1B1gf2rotmbt2xo/Ipsf7lfG2Rk+gSiGcs2bsMBBPBIqtTgf7SxXKk7RkOU58nPz+mnZEjdQgIsrQFJqII6+Esw7puTmDtrgGLokTSL7IXqtPw5V7Z/XXbqzImSuEc7wpk7ky4LU48wJ+bGe3JNosOB+c7KEdbiz4gIC2LCu8lw8kWbBaqsfXUzX3QCw+9n5g7xAw0VdrF7B/e3G8pCrmgoJis4NQl5Jy30M5pe7bUbgxwLY9HGmWzBgMGHCcpmrPO0ADpOM7QwdTPmevA482ZZTHzlny6mp3wWJAbomnuXJ8NvFnYMWIq7eImPY9mAKxjYzrX2fQ2qVr6Yr/T1bqErekvKz0V9eTrD+xu4nLyqmXe7/sD5f9yPY9PcLVAMMunBwf0x+aCsX1bqWVLPe4HG0nZgecZizH+Qx2Qv8PYWqBugqXTAVYNcS7vGnrsgoUjnWjTUTTIB/VoZCCtxcb0oDazoZbqJ+8+kv/MRsO/qdQd7nDbnU1oYg/RLfb87PXGQWYk5I0qPc='
16
+
17
+ asset: b'JAu5OeWuUZJAh7+8isnKhlUNaXKtXgmErqrUy0+YqOpY4P0nSKCT0DH7nmX1DxHn1ezZBMFkM+wcO/9a/9R4WM2CABl4nLYzuu/y20dzdl2w0MqzHla8OaIQIoBDms/XPaBiLByAVDjSm2UCt81NG25VXXn8LNMfmjvCXe8VBa0fBgZrNnPfR9YF8GRMBMcctSHiPrXZuvIHEvcHhbEgHa3zo7Gq5ApPqFrgiVwmdRkFjf5DxRvsUXPlqeUW7nKD0NLWIAUlM0L0/PnQpKLmhexYU5qgsSXngFH+vm7NBWas2AQXMKX4WNWCLqlkljIQ3vlHh+Q7lZZW2C9Y39tz0ghC8eLzygTGbnxxzT2bSk5YJMLy8Gm7toJTGhL4KoMEPO3LC/C3mnVGo9Qtdenkt634d616AILlMwWhf9SuhIPbygBegmrcxdtjdjDHbGopztn6Urcf7ai2+5dPVTcyWOiwZf6AXsGqf9TTmK/DLWVV/MSwRUPDY/HKCPwS6o2Bcm86MNJFOru4Ez7qPIArvjjgQMdYFXeO/C373oOX+EBFxLrui8Tz2t4SNzNGy+Tw9kVZDdJYUyoE5V6q0VFLmrE5J8TFKNsTz2LYIdkV7GIMITavvUqpieXfj5oJpNDA5CSTXl2OQmArLzbO10VETsizw5JPrFD5+LAGvsMLfMw='
18
+
19
+ liability: b'ND9CxqaBZFqxPImrnQKpCsY5pW74y5cRIbfxsfwlrOTnPGKXlllPXNqgwOBlJwvK/5xJA3u7Taj/EGpyRwvnH0JUwiWRWt6i+JlKD6OuRwYEhbvYZwWZPR6EFkI0C97Yusk9dJHZd6AAm84h5tPSnx1p2Z4g8IthGgrP1eLKTqK1v/cc+f2ldkKxM/iaEDemFB7LzVespjkUaE+nhfoG3wk7v1+BPSZt5TK1zwYDj1JdGP5JyuzyY6Niwyk9ThcHnuyJ8qMby7qsod1drvdEESDI0d0MrwYzVJBekhYQpqiOOYpnbkxXAlVkFyI2L7DV4+ue66MeFZfsad19FGQA9DGvCdXMAx+CvigRJyRG2DFMOZ7BgiCAwFJQAj+ayma8C0mHpeJChrt0i/ZOskEG6rjVtLPeQVvdvuV1NX1PDSpVeFD6ml/ZjN0BmIJxofbxRm/CiffJArbtT9dpVPneY5/nJuRF7JrLSx4+zSlailj7RHdxEsy8fq7x78Zl3jPhmtn7P0Gnd8+epgo+66IqBIHOSUpy2vM12qq44xaxX+bVChg6CGBp8fgwo+49/7zqWQ0/hXi08/ZwAnhcwlXBVi/U7zJsW4vTS0ZaizX6uv9oy7MMsDaqgvmqUr+XHUV2xQPGjswrpUeNhJVjMd+598+fqsGeJ8AYXkSv34EET0Q='
20
+
21
+ statementDate: b'LqkAFp9Li6qYamXMH8UUndrGhquJrtNXdMgG36EjZoFF72ire4Y0Q/S9q4HnKjOYlobVxxsxvkDdUiWxJa6uuuULMkabaozKLnDsvSfZCMDZcvEEkxrTqYU00fetcVCFieS74p+kS+a3a2Ohq2zlnHZEG6EmviiBPTP8Q3wGpQXYkJNQ8cPbum77dvYLabSUu61xUU0B6Dz5QHI5W/6NcFFgT6kB243eHoK4L3qJGaSm5cuB10iU62PY8HYoHTeKQ9Ve59qu5KPyNj9VwoD/tZebNpnveIRX68T4cmkGW3PW2FeyL27mxOsCMQrUCI5oxOFstO27a/Yfqg/CwRDnfYr+8Hm7vohB0bW7hSD6dfCFWfv4j6RdQ/T5ToF6jKmkBPiv6IGyR7LYAb/zr9drgSRxVaoR9gZMv+yq9xPJENQbDUV2P9N8ypRqS/+hNIMyx58TpnJc5st7J+hehWJT/5A2M59EjTVt1mwYLwbp9rjxevRoe5YZDxepeFCrS1gvyg8J0MJfE6J/RWogEAu9UTkgpks0s4NaXPGb/hMs4wMs9xsZooKjtH7p/sDTR8JlewVcxirl/xg1lxKELFd1qy4AVaFR2PAd8dj51t+7oZf+WCh+kODepzGgo6JOiBhAHAKuRd5lUT9J3gg5vZzaKEoGliBlEj2tNWDvVpbXUI8='
22
+
similarity_check.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from model1 import model1
2
+ from model2 import model2
3
+ import checkTool as ct
4
+ import extract_pdf as pf
5
+
6
+ # get info from hkid card
7
+
8
+ def string_similarity(s1, s2): # Levenshtein distance algorithm
9
+ if s1 == s2:
10
+ return 100.0
11
+
12
+ len1 = len(s1)
13
+ len2 = len(s2)
14
+ matrix = [[0] * (len2 + 1) for _ in range(len1 + 1)]
15
+
16
+ for i in range(len1 + 1):
17
+ matrix[i][0] = i
18
+
19
+ for j in range(len2 + 1):
20
+ matrix[0][j] = j
21
+
22
+ for i in range(1, len1 + 1):
23
+ for j in range(1, len2 + 1):
24
+ if s1[i - 1] == s2[j - 1]:
25
+ cost = 0
26
+ else:
27
+ cost = 1
28
+ matrix[i][j] = min(matrix[i - 1][j] + 1, # deletion
29
+ matrix[i][j - 1] + 1, # insertion
30
+ matrix[i - 1][j - 1] + cost) # substitution
31
+
32
+ similarity = (1 - matrix[len1][len2] / max(len1, len2)) * 100
33
+ return round(similarity, 1)
34
+
35
+ def get_data(img1_path, img2_path, file_name):
36
+
37
+ # img_fp = 'IMG_4495.jpg'
38
+
39
+ info1 = model1(img1_path)
40
+ info2 = model2(img1_path)
41
+
42
+ def print_info(name, valid_hkid, hkid, issuedate):
43
+ print(f'Name: {name}') # name is without space
44
+ print(f'HKID: {hkid} and validity: {valid_hkid}')
45
+ print(f'Date of issue: {issuedate}')
46
+
47
+ cinfo = ct.combine_info(info1, info2)
48
+
49
+ # get info from bank
50
+
51
+ # images = r'hangseng_page-0001.jpg'
52
+ # bank_list = ['bankofchina','hangsengbank','hsbc','sc']
53
+ # image_path = 'hangseng_page-0001.jpg'
54
+ # post_url = r''
55
+
56
+ # name = pf.get_info_from_bank(img2_path)
57
+ # name = pf.check_mr(name)
58
+ # name = name.replace(' ', '')
59
+ # name = name.lower()
60
+
61
+ data = pf.get_info_from_bank(img2_path, file_name)
62
+ name = data["name_on_bs"]
63
+
64
+
65
+ ############# Similarity check ##############
66
+
67
+ # img_fp = 'IMG_1234.jpg'
68
+ name1 = cinfo[0]
69
+ threshold = 85
70
+ # print(f'Name in HKID: {name1}')
71
+ # print(f'Nmae in bank statement: {name}')
72
+ similarity_score = string_similarity(name,name1)
73
+ # print(f'Similarity: {similarity_score}')
74
+ # if (similarity_score >= threshold): # Above threshold
75
+ # print('It is the same person')
76
+ # else: # Below threshold
77
+ # print('It is not the same person')
78
+ data["similarity_score"] = similarity_score
79
+ data["name_on_id"] = name1
80
+ data["hkid"] = cinfo[2]
81
+ data["validity"] = cinfo[1]
82
+ data["issue_date"] = cinfo[3]
83
+
84
+ return data
85
+
86
+
87
+ # path1 = 'IMG_4495.jpg'
88
+ # path2 = 'hangseng_page-0001.jpg'
89
+ # print(get_score(path1, path2))
test.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ import streamlit
2
+
3
+ print(streamlit.__version__)
text_reader_v2.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from model1 import model1
2
+ from model2 import model2
3
+ import checkTool
4
+
5
+ def textreader(path):
6
+ info1 = model1(path)
7
+ info2 = model2(path)
8
+
9
+ def print_info(name, valid_hkid, hkid, issuedate):
10
+ print(f'Name: {name}') # name is without space
11
+ print(f'HKID: {hkid} and validity: {valid_hkid}')
12
+ print(f'Date of issue: {issuedate}')
13
+
14
+ cinfo = checkTool.combine_info(info1, info2)
15
+
16
+ return cinfo[0]
17
+
18
+ # print_info(*cinfo)
webapp.py ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import similarity_check as sc
3
+ import cv2
4
+ from PIL import Image
5
+ import numpy as np
6
+ import tempfile
7
+ from streamlit_webrtc import VideoTransformerBase, webrtc_streamer
8
+ import demo
9
+ import time
10
+ import streamlit as st
11
+ import requests
12
+ import json
13
+ import request_json.sbt_request_generator as sbt
14
+
15
+ global data
16
+ data = {}
17
+
18
+
19
+ def main():
20
+ # st.title("SBT Web Application")
21
+ # today's date = get_today_date
22
+
23
+ # global data
24
+ html_temp = """
25
+ <body style="background-color:red;">
26
+ <div style="background-color:teal ;padding:10px">
27
+ <h2 style="color:white;text-align:center;">SBT Web Application</h2>
28
+ </div>
29
+ </body>
30
+ """
31
+ st.markdown(html_temp, unsafe_allow_html=True)
32
+
33
+ st.header("I. Similarity Check")
34
+ image_file = st.file_uploader("Upload Image", type=['jpg', 'png', 'jpeg'], accept_multiple_files=True)
35
+ if len(image_file) == 1:
36
+ # print(image_file[0].name)
37
+ image1 = Image.open(image_file[0])
38
+ st.text("HKID card")
39
+ st.image(image1)
40
+ elif len(image_file) == 2:
41
+ image1 = Image.open(image_file[0])
42
+ st.text("HKID card")
43
+ st.image(image1)
44
+ image2 = Image.open(image_file[1])
45
+ file_name = image_file[1].name
46
+ st.text("Bank statement")
47
+ st.image(image2)
48
+
49
+ # if image_file2 is not None:
50
+ # image2 = Image.open(image_file)
51
+ # st.text("Bank statement")
52
+ # st.image(image2)
53
+
54
+ # path1 = 'IMG_4495.jpg'
55
+ # path2 = 'hangseng_page-0001.jpg'
56
+ # image1 = save_image(image1)
57
+ # image2 = save_image(image2)
58
+
59
+ data = {}
60
+ if st.button("Recognise"):
61
+ with st.spinner('Wait for it...'):
62
+ # global data
63
+ data = sc.get_data(image1, image2, file_name)
64
+
65
+ with open('data1.txt', 'w') as f:
66
+ f.write(json.dumps(data))
67
+ # data.update(sc.get_data(image1, image2, file_name))
68
+ print(f'data inside {data}')
69
+ # sbt.split_data(data)
70
+ st.success('Done!')
71
+ score = data["similarity_score"]
72
+ #print(score)
73
+ st.text(f'score: {score}')
74
+ if (score>85):
75
+ st.text(f'matched')
76
+ else:
77
+ st.text(f'unmatched')
78
+
79
+ st.header("IIa. HKID Data Extraction")
80
+ st.text(f'Name: {data["name_on_id"]}') # name is without space
81
+ st.text(f'HKID: {data["hkid"]} and validity: {data["validity"]}')
82
+ st.text(f'Date of issue: {data["issue_date"]}')
83
+
84
+ st.header("IIb. Bank Statement Data Extraction")
85
+ # st.write('------------From bank statement------------')
86
+ st.text(f'Name: {data["name_on_bs"]}')
87
+ st.text(f'Address: {data["address"]}')
88
+ st.text(f'Bank: {data["bank"]}')
89
+ st.text(f'Date: {data["date"]}')
90
+ st.text(f'Asset: {data["asset"]} hkd')
91
+ st.text(f'Liabilities: {data["liabilities"]} hkd')
92
+ # result_img= detect_faces(our_image)
93
+ # st.image(result_img)
94
+ # print(f'data outside 1 {data}')
95
+
96
+ st.header("II. Facial Recognition")
97
+ run = st.checkbox('Run')
98
+
99
+ # webrtc_streamer(key="example")
100
+ # 1. Web Rtc
101
+ # webrtc_streamer(key="jhv", video_frame_callback=video_frame_callback)
102
+
103
+
104
+ # # init the camera
105
+ face_locations = []
106
+ # face_encodings = []
107
+ face_names = []
108
+ process_this_frame = True
109
+
110
+ score = []
111
+
112
+ faces = 0
113
+
114
+ FRAME_WINDOW = st.image([])
115
+ camera = cv2.VideoCapture(0)
116
+
117
+ while run:
118
+
119
+ # Capture frame-by-frame
120
+ # Grab a single frame of video
121
+ ret, frame = camera.read()
122
+
123
+ result, process_this_frame, face_locations, faces, face_names, score = demo.process_frame(frame, process_this_frame, face_locations, faces, face_names, score)
124
+ # Display the resulting image
125
+ FRAME_WINDOW.image(result)
126
+
127
+ print(score)
128
+ if len(score) > 20:
129
+ avg_score = sum(score) / len(score)
130
+ st.write(f'{avg_score}')
131
+ with open('data1.txt', 'w') as f:
132
+ data_raw = f.read()
133
+ data = json.loads(data_raw)
134
+ data['avg_score'] = str(avg_score)
135
+ f.write(json.dumps(data))
136
+
137
+
138
+ # update_text(f'{demo.convert_distance_to_percentage(score, 0.45)}')
139
+ else:
140
+ st.write('Stopped')
141
+
142
+
143
+ # print(f'the data is {data}')
144
+
145
+ # st.header("IIIa. HKID Data Extraction")
146
+ # st.text(f'Name: {data["name_on_id"]}') # name is without space
147
+ # st.text(f'HKID: {data["hkid"]} and validity: {data["validity"]}')
148
+ # st.text(f'Date of issue: {data["issue_date"]}')
149
+
150
+ # st.header("IIIb. Bank Statement Data Extraction")
151
+ # # st.write('------------From bank statement------------')
152
+ # st.text(f'Name: {data["name_on_bs"]}')
153
+ # st.text(f'Address: {data["address"]}')
154
+ # st.text(f'Bank: {data["bank"]}')
155
+ # st.text(f'Date: {data["date"]}')
156
+ # st.text(f'Asset: {data["asset"]} hkd')
157
+ # st.text(f'Liabilities: {data["liabilities"]} hkd')
158
+
159
+ # print(f'data outside 2 {data}')
160
+ if st.button("Confirm"):
161
+ # print(f'data outside 3 {data}')
162
+ with st.spinner('Sending data...'):
163
+ sbt.split_data(data)
164
+ st.success('Done!')
165
+
166
+ if __name__ == '__main__':
167
+ main()
168
+
169
+
170
+
171
+ # def save_image(image):
172
+ # try:
173
+ # temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.jpg')
174
+ # Image.save(temp_file.name)
175
+ # return temp_file.name
176
+ # except IOError:
177
+ # print("Unable to save image to temporary file")
178
+ # return None
179
+
180
+ # json_file = 'request json\request_legalDocument.json'
181
+ # file = open(json_file, 'r')
182
+ # data = json.load(file)
183
+ # file.close()
184
+ # # Update data
185
+ # data.update(new_data)
186
+ # file = open(json_file, 'w')
187
+ # for item in data['request']['body']['formdata']:
188
+ # if item["key"] == "requestId":
189
+ # item["value"] = ""
190
+ # elif item["key"] == "userId":
191
+ # item["value"] = generate_token_id(2048)
192
+ # elif item["key"] == "endpoint":
193
+ # item["value"] = ""
194
+ # elif item["key"] == "apiType":
195
+ # item["value"] = ""
196
+ # elif item["key"] == "docType":
197
+ # item["value"] = "HKID"
198
+ # elif item["key"] == "nameDoc":
199
+ # item["value"] = new_data["name_on_id"]
200
+ # elif item["key"] == "docID":
201
+ # item["value"] = new_data["name_on_id"]
202
+ # elif item["key"] == "docValidity":
203
+ # item["value"] = new_data["validity"]
204
+ # elif item["key"] == "dateOfIssue":
205
+ # item["value"] = new_data["date_issue"]
206
+ # elif item["key"] == "matchingScore":
207
+ # item["value"] = new_data["similarity_score"]
208
+ # json.dump(data, file)
209
+ # file.close()