Spaces:
Sleeping
Sleeping
Mitul Mohammad Abdullah Al Mukit
commited on
Commit
·
1f72938
1
Parent(s):
ac8d65b
first commit
Browse files- .gitignore +5 -0
- Visualization_utilities.py +189 -0
- __pycache__/Visualization_utilities.cpython-311.pyc +0 -0
- __pycache__/Visualization_utilities.cpython-39.pyc +0 -0
- __pycache__/checkTool.cpython-311.pyc +0 -0
- __pycache__/checkTool.cpython-39.pyc +0 -0
- __pycache__/data_encryption.cpython-311.pyc +0 -0
- __pycache__/data_encryption.cpython-39.pyc +0 -0
- __pycache__/demo.cpython-311.pyc +0 -0
- __pycache__/demo.cpython-39.pyc +0 -0
- __pycache__/extract_pdf.cpython-311.pyc +0 -0
- __pycache__/extract_pdf.cpython-39.pyc +0 -0
- __pycache__/imageSegmentation.cpython-311.pyc +0 -0
- __pycache__/imageSegmentation.cpython-39.pyc +0 -0
- __pycache__/model1.cpython-311.pyc +0 -0
- __pycache__/model1.cpython-39.pyc +0 -0
- __pycache__/model2.cpython-311.pyc +0 -0
- __pycache__/model2.cpython-39.pyc +0 -0
- __pycache__/similarity_check.cpython-311.pyc +0 -0
- __pycache__/similarity_check.cpython-39.pyc +0 -0
- __pycache__/webapp.cpython-311.pyc +0 -0
- blaze_face_short_range.tflite +3 -0
- checkTool.py +227 -0
- data1.txt +1 -0
- data_encryption.py +12 -0
- demo.py +185 -0
- extract_pdf.py +139 -0
- extraction_data.py +96 -0
- imageSegmentation.py +60 -0
- model1.py +46 -0
- model2.py +46 -0
- pubkey.pem +13 -0
- request_json/__pycache__/sbt_request_generator.cpython-311.pyc +0 -0
- request_json/__pycache__/sbt_request_generator.cpython-39.pyc +0 -0
- request_json/request_legalDocument.json +75 -0
- request_json/sbt_request_generator.py +108 -0
- requirements.txt +15 -0
- sbt/deployment.py +3 -0
- sbt_request.txt +22 -0
- similarity_check.py +89 -0
- test.py +3 -0
- text_reader_v2.py +18 -0
- webapp.py +209 -0
.gitignore
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
image/*
|
2 |
+
saved/*
|
3 |
+
image
|
4 |
+
saved
|
5 |
+
.DS_Store
|
Visualization_utilities.py
ADDED
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
import mediapipe as mp
|
3 |
+
from mediapipe import solutions
|
4 |
+
from mediapipe.framework.formats import landmark_pb2
|
5 |
+
import numpy as np
|
6 |
+
import math
|
7 |
+
|
8 |
+
# visualization libraries
|
9 |
+
import matplotlib.pyplot as plt
|
10 |
+
import matplotlib.animation as animation
|
11 |
+
from matplotlib import style
|
12 |
+
|
13 |
+
def draw_eyes_on_image(rgb_image, detection_result):
|
14 |
+
|
15 |
+
# return rgb_image, 0, 0
|
16 |
+
|
17 |
+
# canonical_face_model_uv_visualization in the below link
|
18 |
+
# https://github.com/google/mediapipe/blob/a908d668c730da128dfa8d9f6bd25d519d006692/mediapipe/modules/face_geometry/data/canonical_face_model_uv_visualization.png
|
19 |
+
left_eyes_bottom_list = [33, 7, 163, 144, 145, 153, 154, 155, 133]
|
20 |
+
left_eyes_top_list = [246, 161, 160, 159, 158, 157, 173]
|
21 |
+
right_eyes_bottom_list = [362, 382, 381, 380, 374, 373, 390, 249, 263]
|
22 |
+
right_eyes_top_list = [398, 384, 385, 386, 387, 388, 466]
|
23 |
+
|
24 |
+
face_landmarks_list = detection_result.face_landmarks
|
25 |
+
annotated_image = np.copy(rgb_image)
|
26 |
+
|
27 |
+
# We resize image to 640 * 360
|
28 |
+
height, width, channels = rgb_image.shape
|
29 |
+
|
30 |
+
# Loop through the detected faces to visualize. Actually, if we detect more than two faces, we will require user closer to the camera
|
31 |
+
for idx in range(len(face_landmarks_list)):
|
32 |
+
face_landmarks = face_landmarks_list[idx]
|
33 |
+
|
34 |
+
mlist = []
|
35 |
+
for landmark in face_landmarks:
|
36 |
+
mlist.append([int(landmark.x * width), int(landmark.y * height), landmark.z])
|
37 |
+
|
38 |
+
narray = np.copy(mlist)
|
39 |
+
|
40 |
+
# Vertical line
|
41 |
+
#
|
42 |
+
#
|
43 |
+
# Pick the largest difference (middle of the eyes)
|
44 |
+
leftUp = narray[159]
|
45 |
+
leftDown = narray[145]
|
46 |
+
rightUp = narray[386]
|
47 |
+
rightDown = narray[374]
|
48 |
+
|
49 |
+
# compute left eye distance (vertical)
|
50 |
+
leftUp_x = int(leftUp[0])
|
51 |
+
leftUp_y = int(leftUp[1])
|
52 |
+
leftDown_x = int(leftDown[0])
|
53 |
+
leftDown_y = int(leftDown[1])
|
54 |
+
leftVerDis = math.dist([leftUp_x, leftUp_y],[leftDown_x, leftDown_y])
|
55 |
+
|
56 |
+
# compute right eye distance (vertical)
|
57 |
+
rightUp_x = int(rightUp[0])
|
58 |
+
rightUp_y = int(rightUp[1])
|
59 |
+
rightDown_x = int(rightDown[0])
|
60 |
+
rightDown_y = int(rightDown[1])
|
61 |
+
rightVerDis = math.dist([rightUp_x, rightUp_y],[rightDown_x, rightDown_y])
|
62 |
+
|
63 |
+
# print(f'leftVerDis: {leftVerDis} and rightVerDis: {rightVerDis}')
|
64 |
+
|
65 |
+
# draw a line from left eye top to bottom
|
66 |
+
annotated_image = cv2.line(rgb_image, (int(leftUp_x), int(leftUp_y)), (int(leftDown_x), int(leftDown_y)), (0, 200, 0), 1)
|
67 |
+
|
68 |
+
# draw a line from right eye top to bottom
|
69 |
+
annotated_image = cv2.line(rgb_image, (int(rightUp_x), int(rightUp_y)), (int(rightDown_x), int(rightDown_y)), (0, 200, 0), 1)
|
70 |
+
#
|
71 |
+
#
|
72 |
+
# Horizontonal line
|
73 |
+
#
|
74 |
+
#
|
75 |
+
# Pick the largest difference (middle of the eyes)
|
76 |
+
leftLeft = narray[33]
|
77 |
+
leftRight = narray[133]
|
78 |
+
rightLeft = narray[362]
|
79 |
+
rightRight = narray[263]
|
80 |
+
|
81 |
+
# compute left eye distance (horizontal)
|
82 |
+
leftLeft_x = int(leftLeft[0])
|
83 |
+
leftLeft_y = int(leftLeft[1])
|
84 |
+
leftRight_x = int(leftRight[0])
|
85 |
+
leftRight_y = int(leftRight[1])
|
86 |
+
leftHorDis = math.dist([leftLeft_x, leftLeft_y],[leftRight_x, leftRight_y])
|
87 |
+
|
88 |
+
# compute right eye distance (horizontal)
|
89 |
+
rightLeft_x = int(rightLeft[0])
|
90 |
+
rightLeft_y = int(rightLeft[1])
|
91 |
+
rightRight_x = int(rightRight[0])
|
92 |
+
rightRight_y = int(rightRight[1])
|
93 |
+
rightHorDis = math.dist([rightLeft_x, rightLeft_y],[rightRight_x, rightRight_y])
|
94 |
+
|
95 |
+
# print(f'leftHorDis: {leftHorDis} and rightHorDis: {rightHorDis}')
|
96 |
+
|
97 |
+
# draw a line from left eye top to bottom
|
98 |
+
annotated_image = cv2.line(rgb_image, (int(leftLeft_x), int(leftLeft_y)), (int(leftRight_x), int(leftRight_y)), (0, 200, 0), 1)
|
99 |
+
|
100 |
+
# draw a line from right eye top to bottom
|
101 |
+
annotated_image = cv2.line(rgb_image, (int(rightLeft_x), int(rightLeft_y)), (int(rightRight_x), int(rightRight_y)), (0, 200, 0), 1)
|
102 |
+
#
|
103 |
+
#
|
104 |
+
#
|
105 |
+
#
|
106 |
+
# print(f'leftRatio: {leftVerDis/leftHorDis} and rightRatio: {rightVerDis/rightHorDis}')
|
107 |
+
|
108 |
+
leftRatio = leftVerDis/leftHorDis*100
|
109 |
+
rightRatio = rightVerDis/rightHorDis*100
|
110 |
+
|
111 |
+
|
112 |
+
# left_eyes_bottom = [narray[x] for x in left_eyes_bottom_list]
|
113 |
+
# left_eyes_top = [narray[x] for x in left_eyes_top_list]
|
114 |
+
# right_eyes_bottom = [narray[x] for x in right_eyes_bottom_list]
|
115 |
+
# right_eyes_top = [narray[x] for x in right_eyes_top_list]
|
116 |
+
|
117 |
+
# for p in left_eyes_bottom:
|
118 |
+
# annotated_image = cv2.circle(rgb_image, (int(p[0]), int(p[1])), radius=1, color=(0,0,255), thickness=1)
|
119 |
+
|
120 |
+
# for p in left_eyes_top:
|
121 |
+
# annotated_image = cv2.circle(rgb_image, (int(p[0]), int(p[1])), radius=1, color=(0,0,255), thickness=1)
|
122 |
+
|
123 |
+
# for p in right_eyes_bottom:
|
124 |
+
# annotated_image = cv2.circle(rgb_image, (int(p[0]), int(p[1])), radius=1, color=(0,0,255), thickness=1)
|
125 |
+
|
126 |
+
# for p in right_eyes_top:
|
127 |
+
# annotated_image = cv2.circle(rgb_image, (int(p[0]), int(p[1])), radius=1, color=(0,0,255), thickness=1)
|
128 |
+
|
129 |
+
|
130 |
+
return annotated_image, leftRatio, rightRatio
|
131 |
+
|
132 |
+
def draw_landmarks_on_image(rgb_image, detection_result):
|
133 |
+
face_landmarks_list = detection_result.face_landmarks
|
134 |
+
annotated_image = np.copy(rgb_image)
|
135 |
+
|
136 |
+
# Loop through the detected faces to visualize. Actually, if we detect more than two faces, we will require user closer to the camera
|
137 |
+
for idx in range(len(face_landmarks_list)):
|
138 |
+
face_landmarks = face_landmarks_list[idx]
|
139 |
+
|
140 |
+
# Draw the face landmarks.
|
141 |
+
face_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
|
142 |
+
face_landmarks_proto.landmark.extend([
|
143 |
+
landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in face_landmarks
|
144 |
+
])
|
145 |
+
|
146 |
+
solutions.drawing_utils.draw_landmarks(
|
147 |
+
image=annotated_image,
|
148 |
+
landmark_list=face_landmarks_proto,
|
149 |
+
connections=mp.solutions.face_mesh.FACEMESH_TESSELATION,
|
150 |
+
landmark_drawing_spec=None,
|
151 |
+
connection_drawing_spec=mp.solutions.drawing_styles
|
152 |
+
.get_default_face_mesh_tesselation_style())
|
153 |
+
solutions.drawing_utils.draw_landmarks(
|
154 |
+
image=annotated_image,
|
155 |
+
landmark_list=face_landmarks_proto,
|
156 |
+
connections=mp.solutions.face_mesh.FACEMESH_CONTOURS,
|
157 |
+
landmark_drawing_spec=None,
|
158 |
+
connection_drawing_spec=mp.solutions.drawing_styles
|
159 |
+
.get_default_face_mesh_contours_style())
|
160 |
+
solutions.drawing_utils.draw_landmarks(
|
161 |
+
image=annotated_image,
|
162 |
+
landmark_list=face_landmarks_proto,
|
163 |
+
connections=mp.solutions.face_mesh.FACEMESH_IRISES,
|
164 |
+
landmark_drawing_spec=None,
|
165 |
+
connection_drawing_spec=mp.solutions.drawing_styles
|
166 |
+
.get_default_face_mesh_iris_connections_style())
|
167 |
+
|
168 |
+
return annotated_image
|
169 |
+
|
170 |
+
def plot_face_blendshapes_bar_graph(face_blendshapes):
|
171 |
+
# Extract the face blendshapes category names and scores.
|
172 |
+
face_blendshapes_names = [face_blendshapes_category.category_name for face_blendshapes_category in face_blendshapes]
|
173 |
+
face_blendshapes_scores = [face_blendshapes_category.score for face_blendshapes_category in face_blendshapes]
|
174 |
+
# The blendshapes are ordered in decreasing score value.
|
175 |
+
face_blendshapes_ranks = range(len(face_blendshapes_names))
|
176 |
+
|
177 |
+
fig, ax = plt.subplots(figsize=(12, 12))
|
178 |
+
bar = ax.barh(face_blendshapes_ranks, face_blendshapes_scores, label=[str(x) for x in face_blendshapes_ranks])
|
179 |
+
ax.set_yticks(face_blendshapes_ranks, face_blendshapes_names)
|
180 |
+
ax.invert_yaxis()
|
181 |
+
|
182 |
+
# Label each bar with values
|
183 |
+
for score, patch in zip(face_blendshapes_scores, bar.patches):
|
184 |
+
plt.text(patch.get_x() + patch.get_width(), patch.get_y(), f"{score:.4f}", va="top")
|
185 |
+
|
186 |
+
ax.set_xlabel('Score')
|
187 |
+
ax.set_title("Face Blendshapes")
|
188 |
+
plt.tight_layout()
|
189 |
+
plt.show()
|
__pycache__/Visualization_utilities.cpython-311.pyc
ADDED
Binary file (8.96 kB). View file
|
|
__pycache__/Visualization_utilities.cpython-39.pyc
ADDED
Binary file (4.37 kB). View file
|
|
__pycache__/checkTool.cpython-311.pyc
ADDED
Binary file (9.76 kB). View file
|
|
__pycache__/checkTool.cpython-39.pyc
ADDED
Binary file (4.84 kB). View file
|
|
__pycache__/data_encryption.cpython-311.pyc
ADDED
Binary file (1.11 kB). View file
|
|
__pycache__/data_encryption.cpython-39.pyc
ADDED
Binary file (625 Bytes). View file
|
|
__pycache__/demo.cpython-311.pyc
ADDED
Binary file (4.65 kB). View file
|
|
__pycache__/demo.cpython-39.pyc
ADDED
Binary file (2.61 kB). View file
|
|
__pycache__/extract_pdf.cpython-311.pyc
ADDED
Binary file (5.92 kB). View file
|
|
__pycache__/extract_pdf.cpython-39.pyc
ADDED
Binary file (3.21 kB). View file
|
|
__pycache__/imageSegmentation.cpython-311.pyc
ADDED
Binary file (2.55 kB). View file
|
|
__pycache__/imageSegmentation.cpython-39.pyc
ADDED
Binary file (1.49 kB). View file
|
|
__pycache__/model1.cpython-311.pyc
ADDED
Binary file (1.6 kB). View file
|
|
__pycache__/model1.cpython-39.pyc
ADDED
Binary file (902 Bytes). View file
|
|
__pycache__/model2.cpython-311.pyc
ADDED
Binary file (1.76 kB). View file
|
|
__pycache__/model2.cpython-39.pyc
ADDED
Binary file (968 Bytes). View file
|
|
__pycache__/similarity_check.cpython-311.pyc
ADDED
Binary file (3.12 kB). View file
|
|
__pycache__/similarity_check.cpython-39.pyc
ADDED
Binary file (1.75 kB). View file
|
|
__pycache__/webapp.cpython-311.pyc
ADDED
Binary file (4.58 kB). View file
|
|
blaze_face_short_range.tflite
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b4578f35940bf5a1a655214a1cce5cab13eba73c1297cd78e1a04c2380b0152f
|
3 |
+
size 229746
|
checkTool.py
ADDED
@@ -0,0 +1,227 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
|
3 |
+
def check_integer(string):
|
4 |
+
if string.isdigit():
|
5 |
+
return True
|
6 |
+
|
7 |
+
for char in string:
|
8 |
+
if char.isdigit():
|
9 |
+
return True
|
10 |
+
return False
|
11 |
+
|
12 |
+
def check_alpha(string):
|
13 |
+
for char in string:
|
14 |
+
if not ((char >= 'a' and char <= 'z') or (char >= 'A' and char <= 'Z') or char == ' '):
|
15 |
+
return False
|
16 |
+
return True
|
17 |
+
|
18 |
+
def is_chinese_name(text):
|
19 |
+
substrings = [text[:1], text[:2], text[:3], text[:4], text[:5], text[:6], text[:7], text[:8]]
|
20 |
+
|
21 |
+
if len(text) > 40:
|
22 |
+
return False
|
23 |
+
|
24 |
+
for substring in substrings:
|
25 |
+
upper_case_sum = 0
|
26 |
+
lower_case_sum = 0
|
27 |
+
space = 0
|
28 |
+
for char in substring:
|
29 |
+
if char >= 'A' and char <= 'Z':
|
30 |
+
upper_case_sum += 1
|
31 |
+
if char >= 'a' and char <= 'z':
|
32 |
+
lower_case_sum += 1
|
33 |
+
if char == ' ':
|
34 |
+
space += 1
|
35 |
+
if upper_case_sum >= 3 and lower_case_sum >= 2 and space >= 1:
|
36 |
+
return True
|
37 |
+
|
38 |
+
return False
|
39 |
+
|
40 |
+
def seperate_name(text):
|
41 |
+
word1 = ""
|
42 |
+
word2 = ""
|
43 |
+
word3 = ""
|
44 |
+
name = text.replace(' ', '')
|
45 |
+
# l = 0
|
46 |
+
# space = 0
|
47 |
+
# for char in text:
|
48 |
+
# if char >= 'A' and char <= 'Z':
|
49 |
+
# l += 1
|
50 |
+
# if char != ' ':
|
51 |
+
# space += 1
|
52 |
+
# else:
|
53 |
+
# word2 = text[l-1:space]
|
54 |
+
# word3 = text[space+1::]
|
55 |
+
# word1 = text[:l - 2]
|
56 |
+
|
57 |
+
# # only two characters
|
58 |
+
# if space == len(text):
|
59 |
+
# word1 = text[:l-1]
|
60 |
+
# word2 = text[l-1::]
|
61 |
+
# name = word1 + ' ' + word2
|
62 |
+
# else:
|
63 |
+
# name = word1 + ' ' + word2 + ' ' + word3
|
64 |
+
return name.lower()
|
65 |
+
|
66 |
+
def validate_hkid(hkid): # omit parentheses
|
67 |
+
hkid = hkid.replace('(', '').replace(')', '')
|
68 |
+
|
69 |
+
weight = [9, 8, 7, 6, 5, 4, 3, 2, 1]
|
70 |
+
values = list('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ') + [None]
|
71 |
+
|
72 |
+
match = re.match('^([A-Z])?([A-Z])([0-9]{6})([0-9A])$', hkid)
|
73 |
+
if not match: return False
|
74 |
+
|
75 |
+
hkidArr = []
|
76 |
+
for g in match.groups():
|
77 |
+
hkidArr += list(g) if g else [g]
|
78 |
+
|
79 |
+
r = sum([values.index(i) * w for i, w in zip(hkidArr, weight)]) % 11
|
80 |
+
|
81 |
+
return r == 0
|
82 |
+
|
83 |
+
def format_HKID(hkid):
|
84 |
+
hkid = hkid.replace('(', '').replace(')', '')
|
85 |
+
idlen = len(hkid)
|
86 |
+
|
87 |
+
match = re.match('^([A-Z])?([A-Z])([0-9]{6})([0-9A])$', hkid)
|
88 |
+
|
89 |
+
hkidArr = []
|
90 |
+
for g in match.groups():
|
91 |
+
hkidArr += list(g) if g else [g]
|
92 |
+
|
93 |
+
formatted_hkid = ''
|
94 |
+
|
95 |
+
index = 0
|
96 |
+
for char in hkidArr:
|
97 |
+
if char != None:
|
98 |
+
formatted_hkid += char
|
99 |
+
if index == idlen - 1:
|
100 |
+
formatted_hkid += '('
|
101 |
+
if index == idlen:
|
102 |
+
formatted_hkid += ')'
|
103 |
+
index += 1
|
104 |
+
|
105 |
+
return formatted_hkid
|
106 |
+
|
107 |
+
def format_issuedate(issuedate):
|
108 |
+
formatted_issuedate = issuedate.replace('(', '').replace(')', '')
|
109 |
+
formatted_issuedate = formatted_issuedate.replace('C', '')
|
110 |
+
return formatted_issuedate
|
111 |
+
|
112 |
+
def is_string_integer(string):
|
113 |
+
try:
|
114 |
+
int(string) # Attempt to convert the string to an integer
|
115 |
+
return True # If successful, the string only contains integers
|
116 |
+
except ValueError:
|
117 |
+
return False # If a ValueError occurs, the string doesn't only contain integers
|
118 |
+
|
119 |
+
def check_issuedate(text):
|
120 |
+
if len(text) < 5 and len(text) > 7 :
|
121 |
+
return False
|
122 |
+
if len(text) > 0 and text[0] == '(':
|
123 |
+
text = text.replace('(', '')
|
124 |
+
elif len(text) > 0 and text[0] == 'C':
|
125 |
+
text = text.replace('C', '')
|
126 |
+
if len(text) > 0 and text[-1] == ')':
|
127 |
+
text = text.replace(')', '')
|
128 |
+
if len(text) != 5:
|
129 |
+
return False
|
130 |
+
if text[2] != '-':
|
131 |
+
return False
|
132 |
+
text = text.replace('-', '')
|
133 |
+
if not is_string_integer(text):
|
134 |
+
return False
|
135 |
+
return True
|
136 |
+
|
137 |
+
def print_info(name, valid_hkid, hkid, issuedate):
|
138 |
+
print(f'Name: {name}')
|
139 |
+
print(f'HKID: {hkid} and validity: {valid_hkid}')
|
140 |
+
print(f'Date of issue: {issuedate}')
|
141 |
+
|
142 |
+
def is_comma_present(string):
|
143 |
+
return ',' in string
|
144 |
+
|
145 |
+
def longest_common_subsequence(s1, s2):
|
146 |
+
m, n = len(s1), len(s2)
|
147 |
+
# Create a 2D table to store the lengths of common subsequences
|
148 |
+
dp = [[0] * (n + 1) for _ in range(m + 1)]
|
149 |
+
|
150 |
+
# Build the table in a bottom-up manner
|
151 |
+
for i in range(1, m + 1):
|
152 |
+
for j in range(1, n + 1):
|
153 |
+
if s1[i - 1] == s2[j - 1]:
|
154 |
+
dp[i][j] = dp[i - 1][j - 1] + 1
|
155 |
+
else:
|
156 |
+
dp[i][j] = max(dp[i - 1][j], dp[i][j - 1])
|
157 |
+
|
158 |
+
# Retrieve the longest common subsequence
|
159 |
+
lcs = []
|
160 |
+
i, j = m, n
|
161 |
+
while i > 0 and j > 0:
|
162 |
+
if s1[i - 1] == s2[j - 1]:
|
163 |
+
lcs.append(s1[i - 1])
|
164 |
+
i -= 1
|
165 |
+
j -= 1
|
166 |
+
elif dp[i - 1][j] > dp[i][j - 1]:
|
167 |
+
i -= 1
|
168 |
+
else:
|
169 |
+
j -= 1
|
170 |
+
|
171 |
+
# Reverse the sequence to get the correct order
|
172 |
+
lcs.reverse()
|
173 |
+
return ''.join(lcs)
|
174 |
+
|
175 |
+
def combine_info(info1, info2):
|
176 |
+
combined_info = []
|
177 |
+
|
178 |
+
print(info1)
|
179 |
+
print(info2)
|
180 |
+
|
181 |
+
if info1[0] == info2[0]:
|
182 |
+
combined_info.append(info1[0]) # Append the variable as-is if it's the same in both models
|
183 |
+
elif info1[0] == '':
|
184 |
+
combined_info.append(info2[0])
|
185 |
+
elif info2[0] == '':
|
186 |
+
combined_info.append(info1[0])
|
187 |
+
else:
|
188 |
+
subseq = longest_common_subsequence(info1[0], info2[0])
|
189 |
+
combined_info.append(subseq)
|
190 |
+
|
191 |
+
if info1[1] == 'True' and info2[1] == 'False':
|
192 |
+
combined_info.append(info1[1])
|
193 |
+
combined_info.append(info1[2])
|
194 |
+
elif info1[1] == 'False' and info2[1] == 'True':
|
195 |
+
combined_info.append(info2[1])
|
196 |
+
combined_info.append(info2[2])
|
197 |
+
elif info1[1] == 'True' and info2[1] == 'True':
|
198 |
+
if info1[2] == info2[2]:
|
199 |
+
combined_info.append(info1[1])
|
200 |
+
combined_info.append(info1[2])
|
201 |
+
else:
|
202 |
+
combined_info.append('False')
|
203 |
+
combined_info.append('Suspicous HKID')
|
204 |
+
|
205 |
+
if info1[3] == info2[3]:
|
206 |
+
combined_info.append(info1[3])
|
207 |
+
else:
|
208 |
+
combined_info.append('Unmatched issuedate')
|
209 |
+
|
210 |
+
# print(combined_info)
|
211 |
+
|
212 |
+
return combined_info
|
213 |
+
|
214 |
+
|
215 |
+
|
216 |
+
# info1 = ['', 'True', 'Z683365(5)', '06-96']
|
217 |
+
# info2 = ['lok wing', 'False', 'Z68336505)', '06-96']
|
218 |
+
# info = combine_info(info1, info2)
|
219 |
+
# print_info(*info)
|
220 |
+
|
221 |
+
|
222 |
+
# text = 'TAMKing Man'
|
223 |
+
# if is_comma_present(text):
|
224 |
+
# text = text.replace(',', '')
|
225 |
+
# if not check_integer(text):
|
226 |
+
# if check_alpha(text) and is_chinese_name(text):
|
227 |
+
# name = seperate_name(text)
|
data1.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"name_on_bs": "laupakching", "address": "rm a,33/f, blk 2b ocean pride 100 tai ho road tsuen wan nt ", "bank": "hangseng", "date": "4feb 2023", "asset": 117923.2, "liabilities": "16965.04", "similarity_score": 100.0, "name_on_id": "laupakching", "hkid": "Y332177(9)", "validity": "True", "issue_date": "11-95"}
|
data_encryption.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import base64
|
2 |
+
import rsa
|
3 |
+
|
4 |
+
with open("pubkey.pem", 'rb') as f:
|
5 |
+
pubKey = rsa.PublicKey.load_pkcs1(f.read())
|
6 |
+
|
7 |
+
def encrypt(data):
|
8 |
+
for key, value in data.items():
|
9 |
+
value_bytes = value.encode("utf-8")
|
10 |
+
encrypted_value = rsa.encrypt(value_bytes, pubKey)
|
11 |
+
encoded_value = base64.b64encode(encrypted_value)
|
12 |
+
data[key] = encoded_value
|
demo.py
ADDED
@@ -0,0 +1,185 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import face_recognition
|
2 |
+
import cv2
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
import imageSegmentation
|
6 |
+
|
7 |
+
from mediapipe.tasks.python import vision
|
8 |
+
import Visualization_utilities as vis
|
9 |
+
|
10 |
+
# Get a reference to webcam #0 (the default one)
|
11 |
+
# video_capture = cv2.VideoCapture(0)
|
12 |
+
|
13 |
+
# Load a sample picture and learn how to recognize it.
|
14 |
+
|
15 |
+
def get_face_encoding(path):
|
16 |
+
HKID_cropped = imageSegmentation.auto_cropping(path)
|
17 |
+
cv2.imwrite('saved/HKID.jpg', HKID_cropped)
|
18 |
+
HKID_image = face_recognition.load_image_file("saved/HKID.jpg")
|
19 |
+
HKID_face_encoding = face_recognition.face_encodings(HKID_image)[0]
|
20 |
+
return HKID_face_encoding
|
21 |
+
|
22 |
+
# HKID_image = face_recognition.load_image_file("saved/HKID.jpg")
|
23 |
+
# HKID_face_encoding = face_recognition.face_encodings(HKID_image)[0]
|
24 |
+
|
25 |
+
# Create arrays of known face encodings and their names
|
26 |
+
# known_face_encodings = [
|
27 |
+
# HKID_face_encoding
|
28 |
+
# ]
|
29 |
+
# known_face_names = [
|
30 |
+
# "Marco"
|
31 |
+
# ]
|
32 |
+
|
33 |
+
# Initialize some variables
|
34 |
+
# face_locations = []
|
35 |
+
# face_encodings = []
|
36 |
+
# face_names = []
|
37 |
+
# process_this_frame = True
|
38 |
+
|
39 |
+
# score = []
|
40 |
+
|
41 |
+
# faces = 0 # number of faces
|
42 |
+
|
43 |
+
# while True:
|
44 |
+
# # Grab a single frame of video
|
45 |
+
# ret, frame = video_capture.read()
|
46 |
+
|
47 |
+
|
48 |
+
|
49 |
+
# # # Draw a label with a name below the face
|
50 |
+
# # cv2.rectangle(frame, (left, bottom - 35), (right, bottom), (0, 0, 255), cv2.FILLED)
|
51 |
+
# # font = cv2.FONT_HERSHEY_DUPLEX
|
52 |
+
# # cv2.putText(frame, name, (left + 6, bottom - 6), font, 1.0, (255, 255, 255), 1)
|
53 |
+
|
54 |
+
# # Display the resulting image
|
55 |
+
# cv2.imshow('Video', frame)
|
56 |
+
|
57 |
+
# # Hit 'q' on the keyboard to quit!
|
58 |
+
# if cv2.waitKey(1) & 0xFF == ord('q'):
|
59 |
+
# break
|
60 |
+
|
61 |
+
|
62 |
+
def process_frame(frame, process_this_frame, face_locations, faces, face_names, score):
|
63 |
+
|
64 |
+
hkid_face_encoding = get_face_encoding("image")
|
65 |
+
|
66 |
+
known_face_encodings = [
|
67 |
+
hkid_face_encoding
|
68 |
+
]
|
69 |
+
|
70 |
+
known_face_names = [
|
71 |
+
"recognized"
|
72 |
+
]
|
73 |
+
|
74 |
+
# Only process every other frame of video to save time
|
75 |
+
if process_this_frame:
|
76 |
+
face_names = []
|
77 |
+
# Resize frame of video to 1/4 size for faster face recognition processing
|
78 |
+
small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25)
|
79 |
+
|
80 |
+
# Convert the image from BGR color (which OpenCV uses) to RGB color (which face_recognition uses)
|
81 |
+
rgb_small_frame = cv2.cvtColor(small_frame, cv2.COLOR_BGR2RGB)
|
82 |
+
|
83 |
+
# Find all the faces and face encodings in the current frame of video
|
84 |
+
face_locations = face_recognition.face_locations(rgb_small_frame)
|
85 |
+
face_encodings = face_recognition.face_encodings(rgb_small_frame, face_locations)
|
86 |
+
faces = len(face_encodings) # number of faces
|
87 |
+
|
88 |
+
for face_encoding in face_encodings:
|
89 |
+
# See if the face is a match for the known face(s)
|
90 |
+
matches = face_recognition.compare_faces(known_face_encodings, face_encoding)
|
91 |
+
name = "Unknown"
|
92 |
+
|
93 |
+
# # If a match was found in known_face_encodings, just use the first one.
|
94 |
+
# if True in matches:
|
95 |
+
# first_match_index = matches.index(True)
|
96 |
+
# name = known_face_names[first_match_index]
|
97 |
+
|
98 |
+
# Or instead, use the known face with the smallest distance to the new face
|
99 |
+
face_distances = face_recognition.face_distance(known_face_encodings, face_encoding)
|
100 |
+
best_match_index = np.argmin(face_distances)
|
101 |
+
print(face_distances)
|
102 |
+
if matches[best_match_index] and face_distances[best_match_index] < 0.45:
|
103 |
+
score.append(face_distances[best_match_index])
|
104 |
+
name = known_face_names[best_match_index]
|
105 |
+
else:
|
106 |
+
score = []
|
107 |
+
|
108 |
+
face_names.append(name)
|
109 |
+
|
110 |
+
# if len(score) > 20:
|
111 |
+
# avg_score = sum(score) / len(score)
|
112 |
+
|
113 |
+
# Display the results
|
114 |
+
if faces > 1 :
|
115 |
+
# Define the text and font properties
|
116 |
+
text = "More than 1 person detected!"
|
117 |
+
font = cv2.FONT_HERSHEY_DUPLEX
|
118 |
+
font_scale = 1
|
119 |
+
font_thickness = 2
|
120 |
+
|
121 |
+
# Calculate the text size
|
122 |
+
window_height = frame.shape[0]
|
123 |
+
window_width = frame.shape[1]
|
124 |
+
text_size, _ = cv2.getTextSize(text, font, font_scale, font_thickness)
|
125 |
+
|
126 |
+
# Calculate the text position
|
127 |
+
text_x = int((window_width - text_size[0]) / 2)
|
128 |
+
text_y = window_height - int(text_size[1] / 2)
|
129 |
+
|
130 |
+
cv2.putText(frame, text, (text_x, text_y), font, font_scale, (255, 255, 255), font_thickness, cv2.LINE_AA)
|
131 |
+
|
132 |
+
for (top, right, bottom, left), name in zip(face_locations, face_names):
|
133 |
+
# Scale back up face locations since the frame we detected in was scaled to 1/4 size
|
134 |
+
top *= 4
|
135 |
+
right *= 4
|
136 |
+
bottom *= 4
|
137 |
+
left *= 4
|
138 |
+
|
139 |
+
# Draw a box around the face
|
140 |
+
cv2.rectangle(frame, (left, top), (right, bottom), (65, 181, 41), 4)
|
141 |
+
|
142 |
+
# Define the name box properties
|
143 |
+
name_box_color = (44, 254, 0)
|
144 |
+
name_box_alpha = 0.7
|
145 |
+
name_box_thickness = -1
|
146 |
+
|
147 |
+
# Define the text properties
|
148 |
+
font = cv2.FONT_HERSHEY_TRIPLEX
|
149 |
+
font_scale = 1
|
150 |
+
font_thickness = 2
|
151 |
+
text_color = (255, 255, 255)
|
152 |
+
|
153 |
+
# Calculate the text size
|
154 |
+
text_width, text_height = cv2.getTextSize(name, font, font_scale, font_thickness)[0]
|
155 |
+
|
156 |
+
# Draw the name box
|
157 |
+
cv2.rectangle(frame, (left, bottom - 35), (right, bottom),
|
158 |
+
name_box_color, name_box_thickness)
|
159 |
+
cv2.rectangle(frame, (left, bottom - 35), (right, bottom),
|
160 |
+
name_box_color, cv2.FILLED)
|
161 |
+
|
162 |
+
# Draw the name text
|
163 |
+
cv2.putText(frame, name, (left + 70, bottom - 6), font, font_scale, text_color, font_thickness)
|
164 |
+
|
165 |
+
process_this_frame = process_this_frame
|
166 |
+
|
167 |
+
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
|
168 |
+
|
169 |
+
return frame, process_this_frame, face_locations, faces, face_names, score
|
170 |
+
|
171 |
+
def convert_distance_to_percentage(distance, threshold):
|
172 |
+
if distance < threshold:
|
173 |
+
score = 80
|
174 |
+
score += distance / 0.45 * 20
|
175 |
+
else:
|
176 |
+
score = (1 - distance) * 100
|
177 |
+
return score
|
178 |
+
|
179 |
+
# percent = convert_distance_to_percentage(avg_score, 0.45)
|
180 |
+
|
181 |
+
# print(f'avg_score = {percent:.2f}% : Approved!')
|
182 |
+
|
183 |
+
# # Release handle to the webcam
|
184 |
+
# video_capture.release()
|
185 |
+
# cv2.destroyAllWindows()
|
extract_pdf.py
ADDED
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
################# cnocr ##################
|
2 |
+
from cnocr import CnOcr
|
3 |
+
|
4 |
+
def validate(text):
|
5 |
+
invalid_list = [' ',',']
|
6 |
+
for char in invalid_list:
|
7 |
+
text = text.replace(char, '')
|
8 |
+
return text
|
9 |
+
|
10 |
+
def check_bank(text):
|
11 |
+
text = text.replace(' ', '')
|
12 |
+
bank_list = ['bankofchina','hangseng','hsbc','sc']
|
13 |
+
for bank in bank_list:
|
14 |
+
if bank in text:
|
15 |
+
return bank
|
16 |
+
else:
|
17 |
+
return False
|
18 |
+
|
19 |
+
def check_bank_name(img_path):
|
20 |
+
# BOCH - "Consolidated Statement 2023-01-01"
|
21 |
+
# HangSeng - "Statement of Prestige Banking 2023-03-0" OR "Statement of Preferred Banking 2023-03-07"
|
22 |
+
# HSBC - "Statement - HSBC One Account 2023-02-10"
|
23 |
+
# Standard Chartered - "statementOfAccount 2023-02-01"
|
24 |
+
standard_names = {'boch': "Consolidated Statement",
|
25 |
+
'hangseng': "Statement of",
|
26 |
+
'hsbc': "Statement - HSBC One Account",
|
27 |
+
'sc': "statementOfAccount"}
|
28 |
+
for bank_name in standard_names:
|
29 |
+
if bank_name in str(img_path) or standard_names[bank_name] in str(img_path):
|
30 |
+
return bank_name
|
31 |
+
|
32 |
+
def check_mr(text):
|
33 |
+
openings = ['mr', 'ms', 'miss', 'mrs']
|
34 |
+
words = text.lower().split()
|
35 |
+
if words and words[0] in openings:
|
36 |
+
return ''.join(words[1:])
|
37 |
+
else:
|
38 |
+
return text
|
39 |
+
|
40 |
+
def get_info_from_bank(img_path, file_name):
|
41 |
+
# Running the model
|
42 |
+
ocr = CnOcr(rec_model_name='densenet_lite_136-gru')
|
43 |
+
out = ocr.ocr(img_path)
|
44 |
+
# Data
|
45 |
+
bank_data = {
|
46 |
+
"name_on_bs": "",
|
47 |
+
"address": "",
|
48 |
+
"bank": check_bank_name(file_name),
|
49 |
+
"date": "",
|
50 |
+
"asset": 0.0,
|
51 |
+
"liabilities": ""
|
52 |
+
}
|
53 |
+
|
54 |
+
asset_y = [722,747]
|
55 |
+
asset_equa = ''
|
56 |
+
asset_iterations = 2
|
57 |
+
liabilities_y = [747,800]
|
58 |
+
count = 0
|
59 |
+
invalid_list = ['', ' ', ',']
|
60 |
+
|
61 |
+
for item in out:
|
62 |
+
detected_text = item['text']
|
63 |
+
raw_detected_text = detected_text.lower()
|
64 |
+
#raw_detected_text = detected_text
|
65 |
+
positions = item['position']
|
66 |
+
if raw_detected_text in invalid_list or raw_detected_text is None:
|
67 |
+
pass
|
68 |
+
elif ((positions[0][0] >= 147) and (positions[0][1] >= 265) and (positions[2][0] <= 400) and (positions[2][1] <= 295)):
|
69 |
+
if (raw_detected_text != ''): # name
|
70 |
+
bank_data["name_on_bs"] += raw_detected_text
|
71 |
+
bank_data["name_on_bs"] = check_mr(bank_data["name_on_bs"])
|
72 |
+
elif ((positions[0][0] >= 113) and (positions[0][1] >= 291) and (positions[2][0] <= 500) and (positions[2][1] <= 381)):
|
73 |
+
if (raw_detected_text != ''): # position
|
74 |
+
bank_data["address"] += raw_detected_text
|
75 |
+
bank_data["address"] += ' '
|
76 |
+
elif ((positions[0][0] >= 996) and (positions[0][1] >= 289) and (positions[2][0] <= 1083) and (positions[2][1] <= 314)):
|
77 |
+
if (raw_detected_text != ''): # statement date
|
78 |
+
bank_data["date"] += raw_detected_text
|
79 |
+
elif ((positions[0][0] >= 900) and (positions[0][1] >= asset_y[0]) and (positions[2][0] <= 1120) and (positions[2][1] <= asset_y[1])): #
|
80 |
+
# take a look at the y0/y1 position
|
81 |
+
if (raw_detected_text != '' and count <= asset_iterations and ('DR' not in raw_detected_text)): # asset
|
82 |
+
asset_equa += raw_detected_text
|
83 |
+
asset_equa += '+'
|
84 |
+
raw_detected_text = raw_detected_text.replace(',', '')
|
85 |
+
#raw_detected_text = validate(raw_detected_text).lower()
|
86 |
+
asset_float = float(raw_detected_text)
|
87 |
+
bank_data["asset"] += asset_float
|
88 |
+
asset_y[0] += 21
|
89 |
+
asset_y[1] += 27
|
90 |
+
liabilities_y[1] += 27
|
91 |
+
count += 1
|
92 |
+
elif 'DR' in raw_detected_text:
|
93 |
+
bank_data["liabilities"] = validate(raw_detected_text)
|
94 |
+
elif ((positions[0][0] >= 900) and (positions[0][1] >= liabilities_y[0]) and (positions[2][0] <= 1130) and (positions[2][1] <= liabilities_y[1])):
|
95 |
+
if (raw_detected_text != '' and 'dr' in raw_detected_text): # liabilities
|
96 |
+
raw_detected_text = raw_detected_text.replace('dr','')
|
97 |
+
bank_data["liabilities"] = validate(raw_detected_text)
|
98 |
+
elif check_bank(raw_detected_text) != False: # bank
|
99 |
+
bank_data["bank"] = check_bank(raw_detected_text)
|
100 |
+
|
101 |
+
|
102 |
+
# print('------------From bank statement------------')
|
103 |
+
# print(f'Name: {bank_data["name_on_bs"]}')
|
104 |
+
# print(f'Address: {bank_data["address"]}')
|
105 |
+
# print(f'Bank: {bank_data["bank"]}')
|
106 |
+
# print(f'Date: {bank_data["date"]}')
|
107 |
+
# print(f'Asset: {asset_equa} = {bank_data["asset"]}')
|
108 |
+
# print(f'Liabilities: {bank_data["liabilities"]}')
|
109 |
+
# post_data(bank_data["bank"], bank_data["name_on_bs"], bank_data["address"], bank_data["asset"], bank_data["liabilities"], bank_data["date"])
|
110 |
+
return bank_data
|
111 |
+
|
112 |
+
########## Posting data through API ############
|
113 |
+
import requests
|
114 |
+
import data_encryption
|
115 |
+
# POST /api/v1/users HTTP/1.1
|
116 |
+
|
117 |
+
def post_data(bank, name, address, asset, liabilities, date):
|
118 |
+
# endpoint = 'http://ipygg-api-test-env.ap-east-1.elasticbeanstalk.com/SBT/api/v1/users'
|
119 |
+
data = {
|
120 |
+
"endpoint": "/SBT",
|
121 |
+
"apiType": "store_statement_verif",
|
122 |
+
"requestId": 'request_1234',
|
123 |
+
"userId": 'user1',
|
124 |
+
"bank": bank,
|
125 |
+
"nameStatement": name,
|
126 |
+
"address": address,
|
127 |
+
"asset": str(asset),
|
128 |
+
"liability": liabilities,
|
129 |
+
"statementDate": date
|
130 |
+
}
|
131 |
+
|
132 |
+
encrypted_data = data_encryption.encrypt(data)
|
133 |
+
|
134 |
+
# request = requests.post(url=endpoint, data=encrypted_data)
|
135 |
+
|
136 |
+
# def extract_pdf_data(img_path='hangseng_page-0001.jpg'):
|
137 |
+
# page_number = 1
|
138 |
+
# images = f'hangseng_page-000{page_number}.jpg'
|
139 |
+
# get_info_from_bank(img_path)
|
extraction_data.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
################# cnocr ##################
|
2 |
+
from cnocr import CnOcr
|
3 |
+
from pdfquery import PDFQuery
|
4 |
+
import openai
|
5 |
+
|
6 |
+
def validate(text):
|
7 |
+
invalid_list = [' ',',']
|
8 |
+
for char in invalid_list:
|
9 |
+
text = text.replace(char, '')
|
10 |
+
return text
|
11 |
+
|
12 |
+
def check_bank(text):
|
13 |
+
text = text.replace(' ', '')
|
14 |
+
bank_list = ['bankofchina','hangseng','hsbc','sc']
|
15 |
+
for bank in bank_list:
|
16 |
+
if bank in text:
|
17 |
+
return bank
|
18 |
+
else:
|
19 |
+
return False
|
20 |
+
|
21 |
+
def check_bank_name(img_path):
|
22 |
+
# BOCH - "Consolidated Statement 2023-01-01"
|
23 |
+
# HangSeng - "Statement of Prestige Banking 2023-03-0" OR "Statement of Preferred Banking 2023-03-07"
|
24 |
+
# HSBC - "Statement - HSBC One Account 2023-02-10"
|
25 |
+
# Standard Chartered - "statementOfAccount 2023-02-01"
|
26 |
+
standard_names = {'boch': "Consolidated Statement",
|
27 |
+
'hangseng': "Statement of",
|
28 |
+
'hsbc': "Statement - HSBC One Account",
|
29 |
+
'sc': "statementOfAccount"}
|
30 |
+
for bank_name in standard_names:
|
31 |
+
if bank_name in str(img_path) or standard_names[bank_name] in str(img_path):
|
32 |
+
return bank_name
|
33 |
+
|
34 |
+
def check_mr(text):
|
35 |
+
openings = ['mr', 'ms', 'miss', 'mrs']
|
36 |
+
words = text.lower().split()
|
37 |
+
if words and words[0] in openings:
|
38 |
+
return ''.join(words[1:])
|
39 |
+
else:
|
40 |
+
return text
|
41 |
+
|
42 |
+
def get_info_from_bank(img_path, pdf_path):
|
43 |
+
# Running the model
|
44 |
+
ocr = CnOcr(rec_model_name='densenet_lite_136-gru')
|
45 |
+
out = ocr.ocr(img_path)
|
46 |
+
|
47 |
+
# Data
|
48 |
+
bank_data = {
|
49 |
+
"name_on_bs": "",
|
50 |
+
"address": "",
|
51 |
+
"bank": "",
|
52 |
+
"date": "",
|
53 |
+
"asset": 0.0,
|
54 |
+
"liabilities": ""
|
55 |
+
}
|
56 |
+
|
57 |
+
# {
|
58 |
+
# "Customer Name": "MR CHIU CHUNG YIN",
|
59 |
+
# "Address": "FLAT 13,8/F,OILOK HOUSE, YAU OI ESTATE, TUEN MUN NT",
|
60 |
+
# "Bank Name": "HSBC",
|
61 |
+
# "Statement Issue Date": "10 January 2023",
|
62 |
+
# "Total Asset": "7,265.80",
|
63 |
+
# "Total Liability": "7,265.80"
|
64 |
+
# }
|
65 |
+
|
66 |
+
openai.api_key = "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"
|
67 |
+
invalid_list = [' ',',']
|
68 |
+
data_set_1 = []
|
69 |
+
|
70 |
+
pdf = PDFQuery(pdf_path)
|
71 |
+
pdf.load(0)
|
72 |
+
text_elements = pdf.pq('LTTextLineHorizontal').text()
|
73 |
+
text_elements = text_elements.replace("cid:", "")
|
74 |
+
|
75 |
+
for item in out:
|
76 |
+
if item['text'] not in invalid_list:
|
77 |
+
data_set_1.append(item['text'])
|
78 |
+
|
79 |
+
completion = openai.ChatCompletion.create(
|
80 |
+
model = "gpt-3.5-turbo",
|
81 |
+
temperature = 0.2,
|
82 |
+
messages = [
|
83 |
+
{"role": "system", "content": "You are an AI assistant for extracting data from bank statements. Uppercase and lowercase letters are the same. List results in a dictionary format."},
|
84 |
+
{"role": "user", "content": f"Extract data from the following 2 sets of text: {data_set_1} and {text_elements}. (1.) Data that locate in the front part of the text: customer full name, address in Hong Kong (including flat, floor, court/estate, region in Hong Kong), bank name, bank statement issue date (verly likely to be within 1-2 years), (2.) Data that mainly locate in the other part of the text: total asset (including investments and deposits) and total liability (often contains DR and includes credit card but might be zero) of the current month."},
|
85 |
+
# {"role": "assistant", "content": "Q: How do you make 7 even? A: Take away the s."},
|
86 |
+
# {"role": "user", "content": "Write one related to programmers."}
|
87 |
+
]
|
88 |
+
)
|
89 |
+
bs_data = completion['choices'][0]['message']['content']
|
90 |
+
print(bs_data)
|
91 |
+
return bs_data
|
92 |
+
|
93 |
+
# get_info_from_bank('hangseng_page-0001.jpg','hangseng.pdf')
|
94 |
+
# get_info_from_bank('hsbc_one_account_page-0001.jpg','hsbc_one_account.pdf')
|
95 |
+
# get_info_from_bank('boch_consolidated.jpg','boch_consolidated.pdf')
|
96 |
+
get_info_from_bank('hsbc_one_account_page-10001.jpg','hsbc_one_account_page-10001.pdf')
|
imageSegmentation.py
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This program is designed to auto crop the face on a given image
|
2 |
+
# It is required to change the image into gray format to satisfy the pre-trained model requirement
|
3 |
+
|
4 |
+
import cv2
|
5 |
+
import numpy as np
|
6 |
+
import os
|
7 |
+
|
8 |
+
import mediapipe as mp
|
9 |
+
from mediapipe.tasks import python
|
10 |
+
from mediapipe.tasks.python import vision
|
11 |
+
|
12 |
+
import cv2
|
13 |
+
|
14 |
+
from pathlib import Path
|
15 |
+
|
16 |
+
# auto crop the image in the given dir
|
17 |
+
|
18 |
+
base_options = python.BaseOptions(model_asset_path='blaze_face_short_range.tflite')
|
19 |
+
options = vision.FaceDetectorOptions(base_options=base_options)
|
20 |
+
detector = vision.FaceDetector.create_from_options(options)
|
21 |
+
|
22 |
+
def crop(
|
23 |
+
image,
|
24 |
+
detection_result
|
25 |
+
) -> np.ndarray :
|
26 |
+
annotated_image = image.copy()
|
27 |
+
height, width, _ = image.shape
|
28 |
+
|
29 |
+
# Here assume we only detect one face
|
30 |
+
for detection in detection_result.detections:
|
31 |
+
# Crop detected face
|
32 |
+
bbox = detection.bounding_box
|
33 |
+
cropped_img = image[bbox.origin_y - 90: bbox.origin_y + bbox.height + 30, bbox.origin_x - 80:bbox.origin_x + bbox.width + 35]
|
34 |
+
|
35 |
+
return cropped_img
|
36 |
+
|
37 |
+
def auto_cropping(dir):
|
38 |
+
|
39 |
+
files = os.listdir(dir) # list of files in directory
|
40 |
+
|
41 |
+
for file in files:
|
42 |
+
|
43 |
+
file_dir = Path(dir + "/" + file)
|
44 |
+
abs_path = file_dir.resolve()
|
45 |
+
|
46 |
+
img = mp.Image.create_from_file(str(abs_path))
|
47 |
+
|
48 |
+
detection_result = detector.detect(img)
|
49 |
+
save_path = 'saved'
|
50 |
+
|
51 |
+
image_copy = np.copy(img.numpy_view())
|
52 |
+
annotated_image = crop(image_copy, detection_result)
|
53 |
+
rgb_annotated_image = cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB)
|
54 |
+
|
55 |
+
return rgb_annotated_image
|
56 |
+
|
57 |
+
# auto_cropping("image") # <----------- !!!!change address here!!!! ------------------> #
|
58 |
+
|
59 |
+
# The current problem (6/2/2023) is that the model may recognize some cartoon face as human face,
|
60 |
+
# my idea is to use another model to classify if the cropped image is real human face
|
model1.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from cnocr import CnOcr
|
2 |
+
import pandas as pd
|
3 |
+
import checkTool
|
4 |
+
|
5 |
+
def model1(path):
|
6 |
+
|
7 |
+
ocr = CnOcr(rec_model_name='en_PP-OCRv3')
|
8 |
+
# ocr = CnOcr(rec_model_name='densenet_lite_136-fc')
|
9 |
+
out = ocr.ocr(path)
|
10 |
+
|
11 |
+
#print(out)
|
12 |
+
|
13 |
+
name = ''
|
14 |
+
scanned_number = len(out)
|
15 |
+
hkid = out[scanned_number-1]['text']
|
16 |
+
issuedate = ''
|
17 |
+
|
18 |
+
for data in out:
|
19 |
+
text = data['text']
|
20 |
+
score = data['score']
|
21 |
+
position = data['position']
|
22 |
+
|
23 |
+
if not checkTool.check_integer(text):
|
24 |
+
if checkTool.check_alpha(text) and checkTool.is_chinese_name(text):
|
25 |
+
name = checkTool.seperate_name(text)
|
26 |
+
|
27 |
+
# check if the data is issuedate
|
28 |
+
if checkTool.check_issuedate(text):
|
29 |
+
issuedate = checkTool.format_issuedate(text)
|
30 |
+
|
31 |
+
|
32 |
+
|
33 |
+
if checkTool.validate_hkid(hkid=hkid):
|
34 |
+
valid_hkid = 'True'
|
35 |
+
hkid = checkTool.format_HKID(out[scanned_number-1]['text'])
|
36 |
+
else:
|
37 |
+
valid_hkid = 'False'
|
38 |
+
|
39 |
+
# checkTool.print_info(name, hkid, valid_hkid, issuedate)
|
40 |
+
|
41 |
+
return [name, valid_hkid, hkid, issuedate]
|
42 |
+
|
43 |
+
# example for testing
|
44 |
+
# info = model1('IMG_4495.jpg')
|
45 |
+
# print(info)
|
46 |
+
# checkTool.print_info(*info)
|
model2.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from cnocr import CnOcr
|
2 |
+
import pandas as pd
|
3 |
+
import checkTool
|
4 |
+
|
5 |
+
# img_fp = 'IMG_4499.jpg'
|
6 |
+
|
7 |
+
def model2(path):
|
8 |
+
|
9 |
+
ocr = CnOcr(rec_model_name='densenet_lite_136-gru')
|
10 |
+
# ocr = CnOcr(rec_model_name='densenet_lite_136-fc')
|
11 |
+
out = ocr.ocr(path)
|
12 |
+
|
13 |
+
name = ''
|
14 |
+
scanned_number = len(out)
|
15 |
+
hkid = out[scanned_number-1]['text']
|
16 |
+
issuedate = ''
|
17 |
+
|
18 |
+
for data in out:
|
19 |
+
text = data['text']
|
20 |
+
score = data['score']
|
21 |
+
position = data['position']
|
22 |
+
|
23 |
+
if checkTool.is_comma_present(text):
|
24 |
+
text = text.replace(',', '')
|
25 |
+
if not checkTool.check_integer(text):
|
26 |
+
if checkTool.check_alpha(text) and checkTool.is_chinese_name(text):
|
27 |
+
name = checkTool.seperate_name(text)
|
28 |
+
|
29 |
+
# check if the data is issuedate
|
30 |
+
if checkTool.check_issuedate(text):
|
31 |
+
issuedate = checkTool.format_issuedate(text)
|
32 |
+
|
33 |
+
if checkTool.validate_hkid(hkid=hkid):
|
34 |
+
valid_hkid = 'True'
|
35 |
+
hkid = checkTool.format_HKID(out[scanned_number-1]['text'])
|
36 |
+
else:
|
37 |
+
valid_hkid = 'False'
|
38 |
+
|
39 |
+
# checkTool.print_info(name, hkid, valid_hkid, issuedate)
|
40 |
+
|
41 |
+
return [name, valid_hkid, hkid, issuedate]
|
42 |
+
|
43 |
+
# # example for testing
|
44 |
+
# info = model2('IMG_4496.jpg')
|
45 |
+
# print(info)
|
46 |
+
# checkTool.print_info(*info)
|
pubkey.pem
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
-----BEGIN RSA PUBLIC KEY-----
|
2 |
+
MIICCgKCAgEAgTkWgzMVHIX/mYOZ5F6GIeZ5WastU7LWNmCSi2kTZQr2OjGsATCU
|
3 |
+
uD/ZrxVObpZPw4vvXax0LGkIyDx7QG4psVEKx26IUtvn7Br+CJyATmK2dW9sCkwY
|
4 |
+
N4x/67F9a1N8yOKhEvkcBtplphZfqZTCZ3d4VUShBt9gYGlO4odeXZ3cZLm+N9Hc
|
5 |
+
MEP6qMIoH1KBNjhcx60BvLbODHkYRup7YAcOh/cOEC/WNkZqQPPYomcVyXat6UKS
|
6 |
+
L1Vf/s1RnhOStu4JmYS1se39LRAxKI+xADZ7D+y7bhcBGykT7evEPGCwUAh++y6y
|
7 |
+
Wolj9HS5oIkcxq+Rj3HLlm7ofDubeBpuOWF2xVh0jYpSFHUYkVChssmfb0WFwxrt
|
8 |
+
YQj8aqX2C9taoWQpHdCcANJSvaM1YvLRPe8pHRpCjm/BrvxddxMNY1gCWpBCP7ym
|
9 |
+
WAuJShb/kkdDnQ+exS9n/UbzRMzYoHnKroQL9CPn26mbzlEO7mMOj1h34rQZeTD1
|
10 |
+
OAFEC1JFBL8LCMRkh+RT3UVpHTSFn/Oc2Gq912MivUrHbeK5Y8lPZOrEmvvxeqDB
|
11 |
+
uOPOMpkh9LWEoGlO4GLvnMhhbINt1OnuUIRCqvOh3jXUXoseVnAMWv1QTRyreq4h
|
12 |
+
d8GlMUR5U4dmc2XHncy1riVDVV4FYSAL2N94utNDgztKUkGL6i2Z5AECAwEAAQ==
|
13 |
+
-----END RSA PUBLIC KEY-----
|
request_json/__pycache__/sbt_request_generator.cpython-311.pyc
ADDED
Binary file (4.72 kB). View file
|
|
request_json/__pycache__/sbt_request_generator.cpython-39.pyc
ADDED
Binary file (2.56 kB). View file
|
|
request_json/request_legalDocument.json
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"request": {
|
3 |
+
"method": "POST",
|
4 |
+
"header": [],
|
5 |
+
"body": {
|
6 |
+
"mode": "formdata",
|
7 |
+
"formdata": [
|
8 |
+
{
|
9 |
+
"key": "requestId",
|
10 |
+
"value": "AhuqYhSgE9IQGpGeylwlD+zt9Q1o9typDou/GI0AIIxkoLL4tP8YmA41oEs3iz0UGtw2NzMjGSi/rmZogBuie8QlhOaEER+mysk/JRSj1YmMMnnxrfTBAzaa/FWF9MQ1OwpHbQc+TJuLBnuW/HnrVB/uMcl2klk0KyxD/rOgCRGW5W9ANZuWtBKRoO5ZuTKvcB3uGRc6h2iPuarUE6OcPyRL4byn2fi3ZAdAo3Uh61nCy090Gywf0qQRl54GP9uqv7R136Ilb2RNwE6cccC0F+C43eEXdnXkMdK3+P/ZNvNtsNTOMn2YfxhqaoxLef25dhukNWwA4k0JfR2waMXeMPrUxpzlkRHtN7V3os8q12mPvV2h/CSZelDw5GZWlUZW3A4rKWnLSX72/T05e1LdJ5mplkGPhSEoLUlEOkDOPmODBn6euy4mTAwAKUYLlVgbaHuG8CAMuKz3eQ6wD8RKajJF8+Y7BjnUH/fWZqLE6+QtiHjxGczKv7wxzD4kfNTj1DeZ9SHMf70IBb91URRMET1n9DTsq05FueI+/qm6VvV+TG1nbxuBBEC+yyTkB1Zjc4wxRUuqfZ7Rql4f7xfgQRYn28r0dEmgnGluqDMp7eDL6Kl1Xe7Hm0qblwf6jB4OxYY+sSCqhyIevGYJZQ1J9NwAuRhaZrwd4EwWs0G0efQ=",
|
11 |
+
"description": "statement",
|
12 |
+
"type": "text"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"key": "userId",
|
16 |
+
"value": "XV6W6EroodCeMfcuIt/X0Cw16NEHYj1ntBRHvKBxFHxroIGW4bzjIx/Z52zvOYhCu8p9iFODnq6diMZ5YjPmix34sUauhAU+Crthkh7pxa0nYsZs+ZgCGx3gVaPN4z+MmKA8PHofTZ0cH59xXLL0v5hwtVEUmQ3K5AH/7bUYl87e5J+75ml9JeoI+a/iPkAfTvUk8Q4hEeVhc0TWkbRT+u9kzMD0Ej3tPDgHILw/PifP7LYNDi1VZ9sHcaPA4irRztVwOR+9swYd56BZ2ROeN4zqkd32E+Hy8EcAGguextnZe0gWcZVbuUF/M/Jt3PSYua4rJTvQaZEeK8CKOrdnby3ZEYnh5Q7meD8zd+H0myTB9xzIi+/03gqDAbF68MTPfA3Ur8c0TE3vIBkOVub7YM0hQZMKgjEbyUSWlkxqjpYgR/fTZHxd9nk7zSfRAHXypuiH/GyQf0v7k1hbDmCeSL0995iTFM00VS3nj/Ik4b61ioFALyluxrMO/Mz4UJOJD5TlrTIgqme0ibez0tSb7OReHxGa8X2OooRYftckpQs5YxdO5ifQleGg05ZV/oUP9kzW6k7SvIIqyEuRWgAVbMcm2AJcJG5/PZR7CK2HiT0thK6wHAdKxO5gsRdxvC7tk80SSWJ892nIBxp77iY5TAnbBu5cNYBZcKiWtu6U8RM="
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"key": "endpoint",
|
20 |
+
"value": "X5Ut0e359J19WgOjS/N4ui9HIr7ol/1cMhuYjB7BSiquCG/xt6dvSVtM3mbPD2cu+7FN/EoM8RNOfTq5kXO/8naiZ5HKc+LPcZfrLJDPrL33kwyw4uKxY/NEtQBc8pskTcSH5RPBX1/6Xmdu0Nb29GtAJqyZk/F65xPBrxY11W2SJlg8s58R72F6Y9urQTrankAa4xt58jL+mo6DQJjJLw+pLu5RiYU2lfp8OyDPAPfpRfMvmHT11sox1ia5RcWeKosQB0AU09JWxcbtpNYQZP+evZElkVWtOkUoflR+6ZBppjJwnu5QyTZr2wKASTmG2v2PTqeqbtns2Hagrmr8DkLUH/YNIcU7L193ffaYIoBGq21d+ysU80Glri1Xi1jO6jmJye66ansHkSVi1CPRiZPwhKB7PNo65VXIA2fYaGMnAEUOfRfZ8/XbhNOPOUX54N/gqvmiv/IQCwHvzGyHpXBJN3yFheSnk7T4d/prz7ginwUGnBm3R/+IhWvqIg8owxMqUayVmZnBVJ6UuTHDYGmg+lZ9A+R0eOKfSwl6W3uD2enKR1XBC7PkLZUA8hUdx4ZWEbkCcR5VnmsN32iLqVdrX8EdTl4kFkD+fHH0tjJT1RpL/9s8J9V7UPERC+V49OlQYLOaxGuQqHxp5KAhfGJjXCCzvo/Ikj1WTs8zvgg=",
|
21 |
+
"description": "SBT",
|
22 |
+
"type": "text"
|
23 |
+
},
|
24 |
+
{
|
25 |
+
"key": "apiType",
|
26 |
+
"value": "OZoO3DjuNYXElX/9GSRKvvnKOLLK47Ps0q77iX6kvEABqbvpDc0SZiSaOLYyx8ZpWFbhO0jzBxCCVdoJsC1POxn4hjYhc22zhSKHL203YZ38ROIOJzvB4vYoBwy4DjLXQmMf8XbuDjug/tQ9qCCI6pLIaQSvlnAh6z5zjtD0ovqN6BGX+UZxwvheMkXI3yv35NGE0HWvCPwAj2Xv6ZJ3UYATv60j9nZjH9ih6+P8WgtDvDXdzlOFmjwPGiFMvzLSERkhvoFxq/liaXSAGORW/gBpfX4Tp6Gf6cWz/5ochu9XL9ojwN/JXkKs60iN8Vrha1nIjF5SHyFhsI9y6sVeipU/gsrAKETmE3CpF6rFBoFDm+3VuZnKDFFDQ4U+J3f0UFOjj6diUrzOiq4nH6YgYLP7fn/mDuSweIVF53/vbc7PPbpgipy1K9xqlaRw9uD6vwew+NZOm9VwuqSZFmFdlch9yi3FA82dHN1Fb7HvopUpBzvSVXgW79oyIVZwzw3ifng2rxqoBbUTzg2w7qy6JoWIxoo2IZTYlLNEP+XEUpNxjyCLtuRgoliYowAZABVwBQEQufqAqrZ9OrjQlf8y5Ar92tgfW25BZ+BgwVW3ILA/wthWWHTQvMiEEZ03BvYOXDM7vNrZRvlQ3Nge9LGTmsQimslxzViTPHIYL5gES6U=",
|
27 |
+
"description": "store_legalDoc_verif",
|
28 |
+
"type": "text"
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"key": "docType",
|
32 |
+
"value": "aGuIy/yK6V76wmGj/glbbEHn6j3CyEhmOJdqRsq9KIx8W8lcTpOLe7JlJIV7guBYBBOyno/4rmLWXxc6f5VaMIk8e698FiaVpGtxtsXT7sbXNPH1lEuCLKDVxpl4TxZGTsEVfOPQ4Gg9Nnu1VwamJvYCODuzGUKjgFQdq00DSIDAmBWRYSuYRdM6qVTk9lo6rtqjO7N8yw0iZQBIa9C4p/hDnzbrojcYhIadrXo98olMe4qrm53Qr3+B/YqAehu04Rt0hNbA/ZLKpsn9GZul7w1H26mQUaDPWApKsCvuzny523r6ejqpEDRDPajhjOLaztJCWCWy1yIXGq3SScxK3Iy7FCWiDc9frvR0G+ra0ar1h/KKoATjm4cjkvyRhHRvT583MjPWWFshka/prRU4Gaoff1v0//qAicN8wPjh4mhAehznJ3XpSU0GWwbYCmkgDbVnx6dCh43j6DYAYA24hnbRtbYLcJoYhasQfCbWeHs0BrXFe2eN7zyIRlrsHCjwILkrVlrZC1bhLEC7TcaV6GGDLhVWTx0+KCT50/yZb159xNXjksK8PCqh4W9afAu0cMtgAQT/V35V7zTpmRZRHn35dPlLYR6KtAUTxR7XCd32wLfyUPdAFYaLA/Ks5psUQ7SChRRhPh8k+kUUOA2uxI95YMyD3tjX03Emnga+0ZA=",
|
33 |
+
"description": "HKID",
|
34 |
+
"type": "text"
|
35 |
+
},
|
36 |
+
{
|
37 |
+
"key": "nameDoc",
|
38 |
+
"value": "UMsvHPUFARGpvZsFGFIjZD8VQ7ft54rizY2klq/nVrnguWfgIMFcKYFQ9b31UYj98l8f7v5s1h2rbY8thlvKoXcS60heeW1G56LKcXnJ0UFOOLJdyCY9Jbrt3gv3EkwrNb+GMaeh93dFmO5w7XSMAKtFyjAESpO6E+Kr/T/U3VQL/TctMOeGOKN3Cv4N954Kxg4mSetAJiYrPhNJnmewFKQRtawOv9Y5YKY7fhLDpb4VXpFeEA7g92KqgpSmnXa6AkJhhphkshakpgbfVeLU2y5n6YDnv9BBQua9CayqMU7rnI9rtal/SubT0G+HEdJyyzhu/ZmD1wm3BO+QWIieCyU6+GO4ymNNSugb8pPOd+l+e/ritxwCmvJCWeDB+qKTJSvkgBNLG2ICS6+SrQMVFgwmLWbD0oZO3ru4oY35g4akPIP6BkEZ++P75kTOlvKWkZe05Yy6DqFvIUaxicaooTOg1NWxAKWfEizyZ50scGP88pG4+XLW3IUGCniUA78j9Z4SkPnVUTrKR/RnJsWN4jdSubN/loatYhiJZATYjlQrftRM3NFFpmfUcm8wIErT6mBMDb8oT/n2n0YOsld1nQIXartLn7wM0egibLkuihOBGlZ+1lasnSBRSY9YMyI9msUnA9bslu+k/kga5qIp0bMp7f1rEKcaR6taS8hW5gY=",
|
39 |
+
"description": "allen lau",
|
40 |
+
"type": "text"
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"key": "docID",
|
44 |
+
"value": "Pz9QeLnCZxZzG1qb1F3FQ7Dqy0Dbra0Z7GTVgfk89nQz/K6Ui7MXuqa6WXNKc0mDYEPmpuOmVu165GYs9Ws2+L7yXrnwcg8mePs/MpwtdvCr6YYeMQexgOzOA4oFjjPMOVMP9Z70znUQeJSVGJcaazswagwp8gRIyb+fhbNjbDY9+aPGKAxzlzlzhTAp7ZihWg7XW/9kRLKrGK1EdKFEDUq1grZpszDS7SwGYME3nCah753JNU2Zt2jJn1yd5Rs+OeSgSuByR1YEboK7MyLnxTqlU0DJvg5GNs7L2AGOWZtFPjUdMKUuPw0teaAUtTC+sms5/vHghRhIzTIr6S6/5PcId3TUCC+qGphIrVf4MqR9wQoJuU4RDYMUSf2Hgod3yMPe7jcokJaug17ToukriaBVWJ7nZoBM7Wa5JkD7wge4qNeBivS/fytikcWsMK36TpZPd7cEoRVn2lmADXJEl4DpMAMUPqKILl41wjXgPvIs5YzfoBC45yEXn6DpsKyU4v7RnnpRT6PnGmKSI5h9H55jFS8vg4f3GEMoXnbRiRn/QePx4W3fBNh8Sfh4oEtnbtC5tGxVhDrPxlxSK1ySXNlGLEMAQCaCeQidKIO3gWgBdJyRhgWlmuexUh5N4jrNlnRgwbUkxhgpcrsweYlNiJjhGUOULABDFJjLSAd49Jo=",
|
45 |
+
"description": "G908833(1)",
|
46 |
+
"type": "text"
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"key": "docValidity",
|
50 |
+
"value": "bp2OFdPn7t7YfIaIKIyzt3jBFjPNb0WtxyzzzV2CX/R1shy+/Klx5yEQ60Rn8HbcInh9Dopbp3K0JWB2rPvjyYhwassyVjsbGcGd7QUJntL49XYnimd4nLhCsI0eVYNDFWQk4lfzgXqH6GD7V/8xxHZdS7FnWp1zf82s3m8PwqmfTD2XartaQh5GVf2woin6YhCN/XuPMbcZCxhDXOedDz5hf095rk83jWyMnn6lsC1loeOGoVza744chQXRph3XWAXuP2m2ZG3zMxMxomGL2AmZj5tmJ4DNWCPP4Qthi6ZTlSapmb05xMX47xWsSZA3Xd5RIlbVd+Y1iNR+7Qs6oaf0qR3UYaV3BA+Fh83StZKLkkZK9bk+esuRvHQa0Q4aqvBa1S3YAF0soN+ba/UM8+AmlMs2OSzFYN7fv8VR1Xwclw1NJQqvIg6aZyFZdk2RbvmNgkedsuxtye21yawHpsXtcHy415YR8wn5aefsfb+DAwnKZ/6gZUlpCKb9iZ+9xlzXocadLInCKBiTyHYMtxj/VPngdDDZQQZcJlZTDr0ZMq2fdmcYZbPVQmX3UVaa2Zu2Uqlm85pWH1EKvgxoYSWx/FT90hZU8r9DHfjOthOJwwCgAe/mvpNDs/JzGMYUr89qOVNtDycnkJvcC3OYwvNgElSL1pipXS4KM/3W3kU=",
|
51 |
+
"description": "True",
|
52 |
+
"type": "text"
|
53 |
+
},
|
54 |
+
{
|
55 |
+
"key": "dateOfIssue",
|
56 |
+
"value": "V1Qw8e7grhgy7kQxYTivhMYnBDixs6KOe+wE55uJ4orkrP/0sBjZWtty7bUYZ7wBzg0bujLHPPmmWRnS/UqSwhYr4D105b/J5ZY+w/Q5g9gKkWEPnWcJPtJ4ATt1KxvbCFN7AFT67+cxKaGDf0VE6HBPuNbSufOJ/55X4Vaslb/TpTiyXSSg8I29Y9vVfZ3m9vZwtLAIGgLo/HCoEuSfx8a8ntKqJOr3MPjOTN/Ml9kISDpnNqpsYnav9ZXCVfMZaTxXVIegvct2bJzVmf1gAMXry2EpDOocjugfAxY1ODEFFmXiHHIRDNL7NMy8XeU4iOQcTdzcTKlmUR6YYGk5pHOIYaBZOx56ge/EIP5+D+0Lv3R/KoY8GJllFtnK3EO21GrT6sRLWB22CV+cGhN6xkSCl+wIMN/X625p1zf2kPo0hwnOHPsCWwzoWIDUIl2K8aBBPckl190da9Yb/8SPJEZrsFzK6JgYMOGsTQ1J7+Jsn6KXV6c0Mfb3SAoyREIpZwJQKWHqQOHJVJVcZ8wA3fHAQKsUxSElDsIfxI0Jbp3WvqO9pIdAX/dAnEmtw8ajH21efx8vWlV+8GtRQ5QCLhqF7ioPMLQt2Kkl3kvs3FvbryzME1+Y3Bomz1LaS+gMa4s/dLcG+Q1pBSUfP0WYMFjXqCDMkPLZnhJycGXx7+k=",
|
57 |
+
"description": "07-81",
|
58 |
+
"type": "text"
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"key": "matchingScore",
|
62 |
+
"value": "OjG09vdihB12ZPb8C3oqVcQMhjMbBSIRjKtzDzr96QCDodSx4W9uIjn9e6zWHW7RyOXbm0hiqKae6d8ZS7hPpqyIF7Yfl1OAfnuEkQ0WfLm8yuUvCA4oFSsH86gx1EYeDUYM44aZEJ5qqPK/IUKvXsHSGgsoIZ0QniIHn71Q1K28zc79iZ0UCkkRhhitoF68JNY0Qik8hiX6ES7yiU0daq7vFIbw0Tg9JLr8fpw/81+Fm7zVfhAx5T2LT2cag04da+YLKSUIjsksQ/CCiRQSDRfdbOdZ9Os0tiXPZdYoIU/dxVerlUJLlmiMYYnVte4m/8pMlw57bc8oKE+qka83R8E4hH9Wu1uOcWHjdPYyUJdunyXByXtM5igrUHgmLvvUQ7eBfnrK5+HZlhHC9tpiTVvcWJd086lS9/hi8UPp0XgSc3h9TcQU2EiC2rgnC3PbPIdjo0Evb7M9P2T0xlA68Na3uW94hBoHzoyS1VmLFTo5alR4LteFBhZ/sCeMp0m1LYs4ZXUOCS85FRwK1x8WnomXpxOAFvBai5JwiLLqkNyBG90LEKQRFFyqk4dceBjOGk+YIq2fTWIMGNTNrtIltV7tf9GJS+LwLBDGsofieYceGeS6ekGDgnNXQJpIRTLccD6qX4FNl+W2K7g1M0xVMNeJ/B7LV3Pb/7uD+s2ASyo=",
|
63 |
+
"description": "0.957",
|
64 |
+
"type": "text"
|
65 |
+
}
|
66 |
+
]
|
67 |
+
},
|
68 |
+
"url": {
|
69 |
+
"raw": "http://ipygg-api-test-env.ap-east-1.elasticbeanstalk.com/SBT",
|
70 |
+
"protocol": "http",
|
71 |
+
"host": ["ipygg-api-test-env", "ap-east-1", "elasticbeanstalk", "com"],
|
72 |
+
"path": ["SBT"]
|
73 |
+
}
|
74 |
+
}
|
75 |
+
}
|
request_json/sbt_request_generator.py
ADDED
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import base64
|
2 |
+
import os
|
3 |
+
import rsa
|
4 |
+
from datetime import date
|
5 |
+
import secrets
|
6 |
+
import string
|
7 |
+
import requests
|
8 |
+
import json
|
9 |
+
|
10 |
+
def generate_token_id(length):
|
11 |
+
characters = string.ascii_letters + string.digits # + string.punctuation
|
12 |
+
token = ''.join(secrets.choice(characters) for _ in range(length))
|
13 |
+
return token
|
14 |
+
|
15 |
+
# Examples for what will be generated
|
16 |
+
# 5!bA9H2f1q^...
|
17 |
+
# Xe7uM$4d9@...
|
18 |
+
# &3yTb1*8Z#...
|
19 |
+
# %pWqN7!6zX...
|
20 |
+
# @9oV!s6Rd2...
|
21 |
+
|
22 |
+
def get_today_date():
|
23 |
+
today = date.today()
|
24 |
+
return str(today)
|
25 |
+
|
26 |
+
# Example for what will be returned
|
27 |
+
# 2023-06-29
|
28 |
+
|
29 |
+
def generate_request(data):
|
30 |
+
url = 'http://ipygg-api-test-env.ap-east-1.elasticbeanstalk.com/SBT'
|
31 |
+
|
32 |
+
pubkey_path = os.path.join(os.path.dirname(__file__), '..', 'pubkey.pem')
|
33 |
+
|
34 |
+
with open(pubkey_path, 'rb') as f:
|
35 |
+
pubKey = rsa.PublicKey.load_pkcs1(f.read())
|
36 |
+
|
37 |
+
for key, value in data.items():
|
38 |
+
value_bytes = value.encode("utf-8")
|
39 |
+
encrypted_value = rsa.encrypt(value_bytes, pubKey)
|
40 |
+
encoded_value = base64.b64encode(encrypted_value)
|
41 |
+
data[key] = encoded_value
|
42 |
+
|
43 |
+
# Write the encrypted and encoded values to a file
|
44 |
+
with open("sbt_request.txt", "w") as f:
|
45 |
+
for key, value in data.items():
|
46 |
+
f.write(f"{key}: {value}\n\n")
|
47 |
+
|
48 |
+
# posting Json file to api
|
49 |
+
r = requests.post(url, data=data)
|
50 |
+
print(r.json)
|
51 |
+
|
52 |
+
|
53 |
+
def split_data(data):
|
54 |
+
request_id = "request1234"
|
55 |
+
# token_id = generate_token_id(501)
|
56 |
+
token_id = "12344321"
|
57 |
+
|
58 |
+
f = open('data1.txt', 'r')
|
59 |
+
with open('data1.txt') as f:
|
60 |
+
data_raw = f.read()
|
61 |
+
data = json.loads(data_raw)
|
62 |
+
|
63 |
+
if "avg_score" not in data.keys():
|
64 |
+
data["avg_score"] = "0"
|
65 |
+
|
66 |
+
legal_doc_data = {
|
67 |
+
"endpoint": "SBT",
|
68 |
+
"apiType": "store_legalDoc_verif",
|
69 |
+
"requestId": "request_id_id",
|
70 |
+
"date": get_today_date(), # a string
|
71 |
+
"tokenID": token_id,# a string
|
72 |
+
"docType": "HKID",
|
73 |
+
"nameDoc": data["name_on_id"], # a string; lower case with space separate; e.g. san chi nan
|
74 |
+
"docID": data["hkid"], # a string; with bracket (); e.g. G908833(1)
|
75 |
+
"docValidity": data["validity"], # a string; "True" or "False"
|
76 |
+
"dateOfIssue": data["issue_date"], # a string; month-year; e.g. 07-81
|
77 |
+
"matchingScore": str(data["avg_score"]) # a string; e.g. "0.957"
|
78 |
+
}
|
79 |
+
|
80 |
+
bank_statement_data = {
|
81 |
+
"endpoint": "SBT",
|
82 |
+
"apiType": "store_statement_verif",
|
83 |
+
"requestId": "request_id_bs",
|
84 |
+
"date": get_today_date(), # a string
|
85 |
+
"tokenID": token_id, # a string
|
86 |
+
"bank":data["bank"], #
|
87 |
+
"nameStatement":data["name_on_bs"], #
|
88 |
+
"address":data["address"], #
|
89 |
+
"asset": str(data["asset"]), # a string containing only numbers
|
90 |
+
"liability": data["liabilities"], # a string containing only numbers
|
91 |
+
"statementDate": data["date"], # a string
|
92 |
+
}
|
93 |
+
|
94 |
+
generate_request(legal_doc_data)
|
95 |
+
generate_request(bank_statement_data)
|
96 |
+
|
97 |
+
|
98 |
+
# demo structure of the data
|
99 |
+
# {"password2": "chingfuilau", "username": "Allenlau1111", "password1": "Allen02118173", "date": "2023-03-03 00:00:00",
|
100 |
+
# "credentialId": "testing123","requestID": "test_statements",
|
101 |
+
# "userId": "7893456",
|
102 |
+
# "endpoint": "SBT",
|
103 |
+
# "apiType": "metadata",
|
104 |
+
# 'tokenId':"500",
|
105 |
+
# "ipfsLink1": ".",
|
106 |
+
# "ipfsLink2": "..",
|
107 |
+
# "ipfsLink3": "...",
|
108 |
+
# "membershipStatus": "1"}
|
requirements.txt
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cnocr==2.2.2.3
|
2 |
+
face_recognition==1.3.0
|
3 |
+
matplotlib==3.7.1
|
4 |
+
mediapipe==0.10.1
|
5 |
+
numpy==1.25.0
|
6 |
+
opencv_contrib_python==4.7.0.72
|
7 |
+
opencv_python==4.7.0.72
|
8 |
+
opencv_python_headless==4.7.0.72
|
9 |
+
pandas==2.0.2
|
10 |
+
Pillow==9.5.0
|
11 |
+
Pillow==9.5.0
|
12 |
+
Requests==2.31.0
|
13 |
+
rsa==4.9
|
14 |
+
streamlit==1.24.0
|
15 |
+
streamlit_webrtc==0.45.1
|
sbt/deployment.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
from web3 import Web3
|
2 |
+
# test userID: 1001001
|
3 |
+
#
|
sbt_request.txt
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
endpoint: b'O5LDGUcUb6knu8lOvKTVd5EmjI/9yH6P4KraYx87dQQqJxJ2rmKUgk+qxavAJMFOWfhEuMwZsFB43Y3A+F6QoqBBXZfj0e4snOHWZKXEKeKynlmIR7c/Cy6bEK9oUrkrSOkPd5CZZE0/BswZMfh8XidW4GQTBo6bybHkBpSKYpNkS9W8GaJLWqnDEEKFI+KSS3gsE55PEOFvKQgB7s+icT+GF/2l5vOWKqzc8pHFFH9nyCF63zRLpAVe+ZEB52dAV148SJNlPNsr4ikQLOqOOPCYF35c5lbwGyZPbib0+6pc9Z4evxPUqzAAsrSmkwUr/c1Rqg+go4vnhJ3EfSjGbuMAeMJLdtLNzglYw/93rKYxG0Jc/w/4RWS98PHPGLA3GEvoD30leOfwS/yO8N0Cb7v9u3XwqIy/FNcdDOBii9GOmXCLJn6u8KvbyfhsBM4MtiqwGuPIBB/Wu9AVVqJGGT85YgrnWBnJfoMHvYlhEM3xEe5jSkeMz+G1h7vXye2wdKKc7jmt748i/8QRZ4TsFUcauU2V2OPD6eGcrjg4MKma0Nu7UKQyUJKXg9azDdn8YhMqzfGQ8vkoj3hQJNTIR7aOyXaACjqEpG+kWC4mCjPciYGowzp8AmY+QK+KSRjwpWJr5uvBCtqFH8OwUZuS5IAaqMbrBsufLdh7XIF48cc='
|
2 |
+
|
3 |
+
apiType: b'Tg6rz5RJnLaseo/MDn6Dz6ui3MG2YKQYdbwteCT9WdN++AyLXRnxGepXuR1i5oEqmTpxdZsA5mUnJBEjIh6EDJAXgd3jG/0Dy5p7TGe62Rmla3EGOkdjHRNWBSoXYOeivLxdH+lTe6x3+bzc7/cgbDjdJ0auBkid+avLZM8KtV3vGtOjHey3D8I+7foBzALse9TI71LHThKIg3Hjeq6dj0SLkkaz/DiLFS90JXa9Ip5NZ+CGtJ5qmDLfqjBpOLLZ06aXhEoLgUVwk4vf/d5DAOxsRyhPfK3wao4UwaVtj+NO3bR4/tQfCQ2CnFNvrr7GQfXhqGx1Mh1yOoyClZPCj8I+r3hYYrmofAR9PQF58qkmuuxAiBTqNlZHyQGmbRDOUaNBPy2Tdr67MrM43zmAFysT8HJBUCcLXWv0FcGKCqFv941usFuboOaoRH9ib8hRqvt757arQPwdmIxyayNkOWgWF3kQTt9iSbleWk61l06EUgdX6qOnAdEDHLJxPYGdrGHVwuRlT7Sj0RgkSviaom+0xcU/T24auDgZj/OjMCibz/QrRz1Ap9RMnteBGjQoYXiIhyBPe7p3Q/6h0PZCyCqSH/IDj4I8ar0FNtynkuXKggr/GWUNV7xkLS/xujhXuziBwTedR1LYdmth99h40CzamqmYL5cj13WtUHfmFk4='
|
4 |
+
|
5 |
+
requestId: b'Ew+AqWhPC5HmyJnAfltVVOcmDvYMRxV4dmBxlel2+AFQmnCVWY2gTz3KrTnvGan+xONkFyDRQV5Xcmm9coTRuXLqNHxQcNz7Iu/m8ipLUrUr2Kofa5/paAcD5x7I/nYgb7UNQ2qWzpXo/vfw6syY9vVK49ZhPfi2tLYY3gEqIQojQvmkhC3qzTrbQqKSQq8jWQ9crNGiDvrfHhPfPG/hdNhwhluiznhTFGJkyeh8d2lVwUtqKN8MMSinX/Jaqs7hyS3FIGILynPwgyASkKGLPFpqAmfemDPkbkzybHMThKRBcebUPdIMpll88nT73vBwslwr0bTvat4wOZQHjtVs/JDzWXnYw2zO/ljWzcl4RSNcJzKulUhEV6NInIolRQ30mTvxihJIojOT921zhlaHx6fp3FP2Qw2lHHgtAvY4L10R1Qxtmy0Xm7vx7oELrFjKZN9+w8h5ofvNqHdkE7HafF6tYiIPm0bWaWM1y1Z7+znrr0at7/DfEb+a+aUchC5xZ59zywi5cJCNGl0f1dEVQ8Ywl8+PMpb/e8PkEghl1MYXecbp++FhDbWK4Db5OL6+9n1WDSEkkaZ7wwo/SdSjcoSrIbTeVp6BSMTryMozADAWt2ETQzMPI3ts/LVBIbae1Qur8zb3tbx8H/Yeg8K4yLzho/PXDQnM1UNdR93uRps='
|
6 |
+
|
7 |
+
date: b'CuIu6PiwXFE6zH9VRTYTmj0zht4nKQtD0CMB+hflSl4NhhzEEodA0ULRu5/ah+u+Uw24vuK+n8YrlGGC1m1j8s6axbHMwjJnfMY7ZmDcDr6ITHINKpjUpD8KSunkjNReqhQCsPnoyPFTPYc6lA/oHA9qURrE3x5OeaL3Bt9iCwTqLFslHOJzI4E6hPDoLUl0qUu7WDwYKN+eq1tsvO/eqe2Eh8TlYGCPXvipXIu3pHp+7blNmeUn4wqOEIOnUUwvQPRNemKeKZWaO/j9kGMO0T3d7CjKE6MlbPJbj0EfCEXM37qJyvQ4r685CZwcC5Xa8AkiYarbHg0adcdCBHi0GECAUzKvlgEqcTolHTTgZ5orFNmV9K6NYqteXkbIgPQiroHDImcWcu9tjTRsmkuX8eCUSUlnBB+1qD1ggHIrk6MbJhAyDSNl4+PPm0E81z7mMJm/Ho/4ikQANz9Y9cvXLhYVVOkwmzaD1RW0I9rSnGtmW2elJVvgOQfi2Oh+nHTFzZs6UzuiG0F5JrbRnT3A8AxcWbiK3mwM3ne+0NKiF+rMSC6HsnDpE7KCNpg1GtGNw/ChXBQBkLBCxiBA78FkTmG+n4UUbC7FpPPPDZkR16o/CjTOVKI7691EcxxTU+jYPpxkB9Ul1kBZcA5ayyOL+wC3BiXi5CqjDcCbwmFivRk='
|
8 |
+
|
9 |
+
tokenID: b'cYLpI2tweuOUayPSzS/SIqWXPOMbpJinzXNTCBUo3Ew1taoLmb0jJQb4VKk1iRrD5CjnO9jS7CLyFJ1OLGsTNHyiN1dc/P2A5jGbCKyq5OOoKVd+F6IamfHy2YMTGjb4nDP8dMfeUlz5R0FYx3yiuQ3oY6CHYAm/TKYNLaSTu5JXpC4FrVlsVnExDzgsAlMZN52qtVNgKyaO4dC1NVzdlMMBA3Wqiu1+d77fupWl/mQTqWSw5BR2pbe4yZYEBskjW2R7d7xl22UxOOar2tY9yRYKbDB1K6M/00Lm9ApkFCXruz1K7/kbz8IPi/p79MSXpdBLQko4/Z5hxZ13+Jc6QeAqcgiENC5wjYdeaJMyYOGvCc/6XpFq4Pso3D3y3j/qytm/0S4QW4/OWwgQj/DggU9HU55I8HxP//SfHbO6CBVRwB86ym18YV8Y7z5pnK3urZBQYE3iVSfhB+dj2XOzAE07uKHNMKlouKzoBVq5voaYVvBFf2odVFDRbsoa1chJxGcr08dARLZ6lt8ZyKsXqNj+EKxVAy0bHOU+F4ZyIaQyVg8p2jUyOAH2WZwlsmJ8yg0f41ddlyHwIpurCuRa4L4wFlRblmrSH8hxjWIwKfqhz5CJSknvQGYksfzic0vv6nVRXMx8jcdq/HGNn6iiPZw4hluVPIwF8j+h4zsA/Wc='
|
10 |
+
|
11 |
+
bank: b'LbIOvIo9qYAb4jJjczhspqbR1jPArtkyKqfE2oM2LuA/d1oZTGf1VGgQyCt5T8m5UAknAZu32A1eCOkPx9B0xLW5lYuEvnRgeyvNW46FM4Fx/iFrLns4lClV2ivqJ+cWV2BV57u8p1Tj0OoVbVPSl6v0T+Pxg5YFJvNjU6+w2lRL63Q4WiKFKVCHfdKbxzkfCE3UtK+GTm9D/d28O2ql9lRBFFnsBvRCYa9MhWSx50CNv81kC1J0tELyejtVxrlnW24SVVt+S5v6kV70TVHYfmkoeh5gd8XJlGEZ8Ww30UoYsos5pgaywvgGhcKMtHF5gI4cAY9Q4xlcEpUcyFQSMB76+HmX92fqUxAfVJRCG1iUrFP+vswZ+eWvx0LCXG8Wl7tYSWzBD9NmKADiam00ODV1f/woWFp5GPr7eupUSPZhI72/I3s4u+m3bvtFFEFJKIeCAX1QcmZL6r3KKsp6PSDR4G7iCk2lbhpLW2icasYy93ziDnUn5FozgPrnjasSRxDB83yOqw619AFHMTlFeMfsFEKAz16JzBUz0Bi6PujEfKZ/QDLFK6FyNHicNaAPtpsci4PlMLSvoOVV5Uf+lIj2yBNYyMfSYda4wYJ+djk8GhguN+Cx/UWZcnTP/C8CS0wk8/iVVkSk+oAKM4esLDDgocl4AhbpuLbfB4XIQ9Y='
|
12 |
+
|
13 |
+
nameStatement: b'KoojlgYLT6dN8GI2DB3TDX8QKwZXgGYZUjX550icBht3gFn6hkoejR4dP4TivuFhaVb1muER0QTyjWog5N4yT9iWJHpfXV8j41SrFst4QcJO81yoxTY4nguLg3Ie56RYk6FQgz2dnKx2WPs/M+Hgc+d+mGlb7mjPtmD4zFVJJHyDaVeUdWWZWVVd79yIbFnNleCWxPjByfsGaDiIGdkoZApiLFknBn46q7/JR+McJXYfPajPZZK+pOExGHj8TKPS9nEuz+73POX1fz/faRWGYDtqIcpkoi1VoSKY1cN6G05NM1N2KAOmxPw4rXy5bYWhNNWreMQNH+P03QQQUiJgo94MMph5aL5G+dc/9ei/GLwZe7icMu3/O4S3y/ZTwOwt8nHu1QQukKEJxzvr11YChUGO/Hz9c3vGvxOvTzaYJhR9bSsf9tB0iVXtatsk6Ng6kOCFICatLFd8VDOxqiVzzW8gxP31AV0xsInDKbsjyOKdIvwxPwFX2+XEjWQ+YyTFuw16DzJniCPP3+eFN0VBK6PBf6OgeEJbtb1yM6hB00ny5JWoau55LIajHEV6/wp3fr81V151+0UdXzTtmmXCy+pTS0gqvc4Nm2O9Sar/iNpiwpyJqd5oxDb/RRcLExnFBGo21vh7vILAdR4ajeX7PPZhVcYeAMMvwOcqSit8G+o='
|
14 |
+
|
15 |
+
address: b'bj5oVnpsoxk2n33yVxia8QhDEI8fRxKMFsLvMy6EaQNKwolxrp1d2at4t+u0CmvkmTfCcgYd1Wr9oaJ6COmJfJNIwgy/ftZL/s+6PMzfBgapt0yDSBSDM4vtRIlIKnz4+BXCSDwwpqpCMo5J0OdEJaa4K3qf/9S1B1gf2rotmbt2xo/Ipsf7lfG2Rk+gSiGcs2bsMBBPBIqtTgf7SxXKk7RkOU58nPz+mnZEjdQgIsrQFJqII6+Esw7puTmDtrgGLokTSL7IXqtPw5V7Z/XXbqzImSuEc7wpk7ky4LU48wJ+bGe3JNosOB+c7KEdbiz4gIC2LCu8lw8kWbBaqsfXUzX3QCw+9n5g7xAw0VdrF7B/e3G8pCrmgoJis4NQl5Jy30M5pe7bUbgxwLY9HGmWzBgMGHCcpmrPO0ADpOM7QwdTPmevA482ZZTHzlny6mp3wWJAbomnuXJ8NvFnYMWIq7eImPY9mAKxjYzrX2fQ2qVr6Yr/T1bqErekvKz0V9eTrD+xu4nLyqmXe7/sD5f9yPY9PcLVAMMunBwf0x+aCsX1bqWVLPe4HG0nZgecZizH+Qx2Qv8PYWqBugqXTAVYNcS7vGnrsgoUjnWjTUTTIB/VoZCCtxcb0oDazoZbqJ+8+kv/MRsO/qdQd7nDbnU1oYg/RLfb87PXGQWYk5I0qPc='
|
16 |
+
|
17 |
+
asset: b'JAu5OeWuUZJAh7+8isnKhlUNaXKtXgmErqrUy0+YqOpY4P0nSKCT0DH7nmX1DxHn1ezZBMFkM+wcO/9a/9R4WM2CABl4nLYzuu/y20dzdl2w0MqzHla8OaIQIoBDms/XPaBiLByAVDjSm2UCt81NG25VXXn8LNMfmjvCXe8VBa0fBgZrNnPfR9YF8GRMBMcctSHiPrXZuvIHEvcHhbEgHa3zo7Gq5ApPqFrgiVwmdRkFjf5DxRvsUXPlqeUW7nKD0NLWIAUlM0L0/PnQpKLmhexYU5qgsSXngFH+vm7NBWas2AQXMKX4WNWCLqlkljIQ3vlHh+Q7lZZW2C9Y39tz0ghC8eLzygTGbnxxzT2bSk5YJMLy8Gm7toJTGhL4KoMEPO3LC/C3mnVGo9Qtdenkt634d616AILlMwWhf9SuhIPbygBegmrcxdtjdjDHbGopztn6Urcf7ai2+5dPVTcyWOiwZf6AXsGqf9TTmK/DLWVV/MSwRUPDY/HKCPwS6o2Bcm86MNJFOru4Ez7qPIArvjjgQMdYFXeO/C373oOX+EBFxLrui8Tz2t4SNzNGy+Tw9kVZDdJYUyoE5V6q0VFLmrE5J8TFKNsTz2LYIdkV7GIMITavvUqpieXfj5oJpNDA5CSTXl2OQmArLzbO10VETsizw5JPrFD5+LAGvsMLfMw='
|
18 |
+
|
19 |
+
liability: b'ND9CxqaBZFqxPImrnQKpCsY5pW74y5cRIbfxsfwlrOTnPGKXlllPXNqgwOBlJwvK/5xJA3u7Taj/EGpyRwvnH0JUwiWRWt6i+JlKD6OuRwYEhbvYZwWZPR6EFkI0C97Yusk9dJHZd6AAm84h5tPSnx1p2Z4g8IthGgrP1eLKTqK1v/cc+f2ldkKxM/iaEDemFB7LzVespjkUaE+nhfoG3wk7v1+BPSZt5TK1zwYDj1JdGP5JyuzyY6Niwyk9ThcHnuyJ8qMby7qsod1drvdEESDI0d0MrwYzVJBekhYQpqiOOYpnbkxXAlVkFyI2L7DV4+ue66MeFZfsad19FGQA9DGvCdXMAx+CvigRJyRG2DFMOZ7BgiCAwFJQAj+ayma8C0mHpeJChrt0i/ZOskEG6rjVtLPeQVvdvuV1NX1PDSpVeFD6ml/ZjN0BmIJxofbxRm/CiffJArbtT9dpVPneY5/nJuRF7JrLSx4+zSlailj7RHdxEsy8fq7x78Zl3jPhmtn7P0Gnd8+epgo+66IqBIHOSUpy2vM12qq44xaxX+bVChg6CGBp8fgwo+49/7zqWQ0/hXi08/ZwAnhcwlXBVi/U7zJsW4vTS0ZaizX6uv9oy7MMsDaqgvmqUr+XHUV2xQPGjswrpUeNhJVjMd+598+fqsGeJ8AYXkSv34EET0Q='
|
20 |
+
|
21 |
+
statementDate: b'LqkAFp9Li6qYamXMH8UUndrGhquJrtNXdMgG36EjZoFF72ire4Y0Q/S9q4HnKjOYlobVxxsxvkDdUiWxJa6uuuULMkabaozKLnDsvSfZCMDZcvEEkxrTqYU00fetcVCFieS74p+kS+a3a2Ohq2zlnHZEG6EmviiBPTP8Q3wGpQXYkJNQ8cPbum77dvYLabSUu61xUU0B6Dz5QHI5W/6NcFFgT6kB243eHoK4L3qJGaSm5cuB10iU62PY8HYoHTeKQ9Ve59qu5KPyNj9VwoD/tZebNpnveIRX68T4cmkGW3PW2FeyL27mxOsCMQrUCI5oxOFstO27a/Yfqg/CwRDnfYr+8Hm7vohB0bW7hSD6dfCFWfv4j6RdQ/T5ToF6jKmkBPiv6IGyR7LYAb/zr9drgSRxVaoR9gZMv+yq9xPJENQbDUV2P9N8ypRqS/+hNIMyx58TpnJc5st7J+hehWJT/5A2M59EjTVt1mwYLwbp9rjxevRoe5YZDxepeFCrS1gvyg8J0MJfE6J/RWogEAu9UTkgpks0s4NaXPGb/hMs4wMs9xsZooKjtH7p/sDTR8JlewVcxirl/xg1lxKELFd1qy4AVaFR2PAd8dj51t+7oZf+WCh+kODepzGgo6JOiBhAHAKuRd5lUT9J3gg5vZzaKEoGliBlEj2tNWDvVpbXUI8='
|
22 |
+
|
similarity_check.py
ADDED
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from model1 import model1
|
2 |
+
from model2 import model2
|
3 |
+
import checkTool as ct
|
4 |
+
import extract_pdf as pf
|
5 |
+
|
6 |
+
# get info from hkid card
|
7 |
+
|
8 |
+
def string_similarity(s1, s2): # Levenshtein distance algorithm
|
9 |
+
if s1 == s2:
|
10 |
+
return 100.0
|
11 |
+
|
12 |
+
len1 = len(s1)
|
13 |
+
len2 = len(s2)
|
14 |
+
matrix = [[0] * (len2 + 1) for _ in range(len1 + 1)]
|
15 |
+
|
16 |
+
for i in range(len1 + 1):
|
17 |
+
matrix[i][0] = i
|
18 |
+
|
19 |
+
for j in range(len2 + 1):
|
20 |
+
matrix[0][j] = j
|
21 |
+
|
22 |
+
for i in range(1, len1 + 1):
|
23 |
+
for j in range(1, len2 + 1):
|
24 |
+
if s1[i - 1] == s2[j - 1]:
|
25 |
+
cost = 0
|
26 |
+
else:
|
27 |
+
cost = 1
|
28 |
+
matrix[i][j] = min(matrix[i - 1][j] + 1, # deletion
|
29 |
+
matrix[i][j - 1] + 1, # insertion
|
30 |
+
matrix[i - 1][j - 1] + cost) # substitution
|
31 |
+
|
32 |
+
similarity = (1 - matrix[len1][len2] / max(len1, len2)) * 100
|
33 |
+
return round(similarity, 1)
|
34 |
+
|
35 |
+
def get_data(img1_path, img2_path, file_name):
|
36 |
+
|
37 |
+
# img_fp = 'IMG_4495.jpg'
|
38 |
+
|
39 |
+
info1 = model1(img1_path)
|
40 |
+
info2 = model2(img1_path)
|
41 |
+
|
42 |
+
def print_info(name, valid_hkid, hkid, issuedate):
|
43 |
+
print(f'Name: {name}') # name is without space
|
44 |
+
print(f'HKID: {hkid} and validity: {valid_hkid}')
|
45 |
+
print(f'Date of issue: {issuedate}')
|
46 |
+
|
47 |
+
cinfo = ct.combine_info(info1, info2)
|
48 |
+
|
49 |
+
# get info from bank
|
50 |
+
|
51 |
+
# images = r'hangseng_page-0001.jpg'
|
52 |
+
# bank_list = ['bankofchina','hangsengbank','hsbc','sc']
|
53 |
+
# image_path = 'hangseng_page-0001.jpg'
|
54 |
+
# post_url = r''
|
55 |
+
|
56 |
+
# name = pf.get_info_from_bank(img2_path)
|
57 |
+
# name = pf.check_mr(name)
|
58 |
+
# name = name.replace(' ', '')
|
59 |
+
# name = name.lower()
|
60 |
+
|
61 |
+
data = pf.get_info_from_bank(img2_path, file_name)
|
62 |
+
name = data["name_on_bs"]
|
63 |
+
|
64 |
+
|
65 |
+
############# Similarity check ##############
|
66 |
+
|
67 |
+
# img_fp = 'IMG_1234.jpg'
|
68 |
+
name1 = cinfo[0]
|
69 |
+
threshold = 85
|
70 |
+
# print(f'Name in HKID: {name1}')
|
71 |
+
# print(f'Nmae in bank statement: {name}')
|
72 |
+
similarity_score = string_similarity(name,name1)
|
73 |
+
# print(f'Similarity: {similarity_score}')
|
74 |
+
# if (similarity_score >= threshold): # Above threshold
|
75 |
+
# print('It is the same person')
|
76 |
+
# else: # Below threshold
|
77 |
+
# print('It is not the same person')
|
78 |
+
data["similarity_score"] = similarity_score
|
79 |
+
data["name_on_id"] = name1
|
80 |
+
data["hkid"] = cinfo[2]
|
81 |
+
data["validity"] = cinfo[1]
|
82 |
+
data["issue_date"] = cinfo[3]
|
83 |
+
|
84 |
+
return data
|
85 |
+
|
86 |
+
|
87 |
+
# path1 = 'IMG_4495.jpg'
|
88 |
+
# path2 = 'hangseng_page-0001.jpg'
|
89 |
+
# print(get_score(path1, path2))
|
test.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit
|
2 |
+
|
3 |
+
print(streamlit.__version__)
|
text_reader_v2.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from model1 import model1
|
2 |
+
from model2 import model2
|
3 |
+
import checkTool
|
4 |
+
|
5 |
+
def textreader(path):
|
6 |
+
info1 = model1(path)
|
7 |
+
info2 = model2(path)
|
8 |
+
|
9 |
+
def print_info(name, valid_hkid, hkid, issuedate):
|
10 |
+
print(f'Name: {name}') # name is without space
|
11 |
+
print(f'HKID: {hkid} and validity: {valid_hkid}')
|
12 |
+
print(f'Date of issue: {issuedate}')
|
13 |
+
|
14 |
+
cinfo = checkTool.combine_info(info1, info2)
|
15 |
+
|
16 |
+
return cinfo[0]
|
17 |
+
|
18 |
+
# print_info(*cinfo)
|
webapp.py
ADDED
@@ -0,0 +1,209 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import similarity_check as sc
|
3 |
+
import cv2
|
4 |
+
from PIL import Image
|
5 |
+
import numpy as np
|
6 |
+
import tempfile
|
7 |
+
from streamlit_webrtc import VideoTransformerBase, webrtc_streamer
|
8 |
+
import demo
|
9 |
+
import time
|
10 |
+
import streamlit as st
|
11 |
+
import requests
|
12 |
+
import json
|
13 |
+
import request_json.sbt_request_generator as sbt
|
14 |
+
|
15 |
+
global data
|
16 |
+
data = {}
|
17 |
+
|
18 |
+
|
19 |
+
def main():
|
20 |
+
# st.title("SBT Web Application")
|
21 |
+
# today's date = get_today_date
|
22 |
+
|
23 |
+
# global data
|
24 |
+
html_temp = """
|
25 |
+
<body style="background-color:red;">
|
26 |
+
<div style="background-color:teal ;padding:10px">
|
27 |
+
<h2 style="color:white;text-align:center;">SBT Web Application</h2>
|
28 |
+
</div>
|
29 |
+
</body>
|
30 |
+
"""
|
31 |
+
st.markdown(html_temp, unsafe_allow_html=True)
|
32 |
+
|
33 |
+
st.header("I. Similarity Check")
|
34 |
+
image_file = st.file_uploader("Upload Image", type=['jpg', 'png', 'jpeg'], accept_multiple_files=True)
|
35 |
+
if len(image_file) == 1:
|
36 |
+
# print(image_file[0].name)
|
37 |
+
image1 = Image.open(image_file[0])
|
38 |
+
st.text("HKID card")
|
39 |
+
st.image(image1)
|
40 |
+
elif len(image_file) == 2:
|
41 |
+
image1 = Image.open(image_file[0])
|
42 |
+
st.text("HKID card")
|
43 |
+
st.image(image1)
|
44 |
+
image2 = Image.open(image_file[1])
|
45 |
+
file_name = image_file[1].name
|
46 |
+
st.text("Bank statement")
|
47 |
+
st.image(image2)
|
48 |
+
|
49 |
+
# if image_file2 is not None:
|
50 |
+
# image2 = Image.open(image_file)
|
51 |
+
# st.text("Bank statement")
|
52 |
+
# st.image(image2)
|
53 |
+
|
54 |
+
# path1 = 'IMG_4495.jpg'
|
55 |
+
# path2 = 'hangseng_page-0001.jpg'
|
56 |
+
# image1 = save_image(image1)
|
57 |
+
# image2 = save_image(image2)
|
58 |
+
|
59 |
+
data = {}
|
60 |
+
if st.button("Recognise"):
|
61 |
+
with st.spinner('Wait for it...'):
|
62 |
+
# global data
|
63 |
+
data = sc.get_data(image1, image2, file_name)
|
64 |
+
|
65 |
+
with open('data1.txt', 'w') as f:
|
66 |
+
f.write(json.dumps(data))
|
67 |
+
# data.update(sc.get_data(image1, image2, file_name))
|
68 |
+
print(f'data inside {data}')
|
69 |
+
# sbt.split_data(data)
|
70 |
+
st.success('Done!')
|
71 |
+
score = data["similarity_score"]
|
72 |
+
#print(score)
|
73 |
+
st.text(f'score: {score}')
|
74 |
+
if (score>85):
|
75 |
+
st.text(f'matched')
|
76 |
+
else:
|
77 |
+
st.text(f'unmatched')
|
78 |
+
|
79 |
+
st.header("IIa. HKID Data Extraction")
|
80 |
+
st.text(f'Name: {data["name_on_id"]}') # name is without space
|
81 |
+
st.text(f'HKID: {data["hkid"]} and validity: {data["validity"]}')
|
82 |
+
st.text(f'Date of issue: {data["issue_date"]}')
|
83 |
+
|
84 |
+
st.header("IIb. Bank Statement Data Extraction")
|
85 |
+
# st.write('------------From bank statement------------')
|
86 |
+
st.text(f'Name: {data["name_on_bs"]}')
|
87 |
+
st.text(f'Address: {data["address"]}')
|
88 |
+
st.text(f'Bank: {data["bank"]}')
|
89 |
+
st.text(f'Date: {data["date"]}')
|
90 |
+
st.text(f'Asset: {data["asset"]} hkd')
|
91 |
+
st.text(f'Liabilities: {data["liabilities"]} hkd')
|
92 |
+
# result_img= detect_faces(our_image)
|
93 |
+
# st.image(result_img)
|
94 |
+
# print(f'data outside 1 {data}')
|
95 |
+
|
96 |
+
st.header("II. Facial Recognition")
|
97 |
+
run = st.checkbox('Run')
|
98 |
+
|
99 |
+
# webrtc_streamer(key="example")
|
100 |
+
# 1. Web Rtc
|
101 |
+
# webrtc_streamer(key="jhv", video_frame_callback=video_frame_callback)
|
102 |
+
|
103 |
+
|
104 |
+
# # init the camera
|
105 |
+
face_locations = []
|
106 |
+
# face_encodings = []
|
107 |
+
face_names = []
|
108 |
+
process_this_frame = True
|
109 |
+
|
110 |
+
score = []
|
111 |
+
|
112 |
+
faces = 0
|
113 |
+
|
114 |
+
FRAME_WINDOW = st.image([])
|
115 |
+
camera = cv2.VideoCapture(0)
|
116 |
+
|
117 |
+
while run:
|
118 |
+
|
119 |
+
# Capture frame-by-frame
|
120 |
+
# Grab a single frame of video
|
121 |
+
ret, frame = camera.read()
|
122 |
+
|
123 |
+
result, process_this_frame, face_locations, faces, face_names, score = demo.process_frame(frame, process_this_frame, face_locations, faces, face_names, score)
|
124 |
+
# Display the resulting image
|
125 |
+
FRAME_WINDOW.image(result)
|
126 |
+
|
127 |
+
print(score)
|
128 |
+
if len(score) > 20:
|
129 |
+
avg_score = sum(score) / len(score)
|
130 |
+
st.write(f'{avg_score}')
|
131 |
+
with open('data1.txt', 'w') as f:
|
132 |
+
data_raw = f.read()
|
133 |
+
data = json.loads(data_raw)
|
134 |
+
data['avg_score'] = str(avg_score)
|
135 |
+
f.write(json.dumps(data))
|
136 |
+
|
137 |
+
|
138 |
+
# update_text(f'{demo.convert_distance_to_percentage(score, 0.45)}')
|
139 |
+
else:
|
140 |
+
st.write('Stopped')
|
141 |
+
|
142 |
+
|
143 |
+
# print(f'the data is {data}')
|
144 |
+
|
145 |
+
# st.header("IIIa. HKID Data Extraction")
|
146 |
+
# st.text(f'Name: {data["name_on_id"]}') # name is without space
|
147 |
+
# st.text(f'HKID: {data["hkid"]} and validity: {data["validity"]}')
|
148 |
+
# st.text(f'Date of issue: {data["issue_date"]}')
|
149 |
+
|
150 |
+
# st.header("IIIb. Bank Statement Data Extraction")
|
151 |
+
# # st.write('------------From bank statement------------')
|
152 |
+
# st.text(f'Name: {data["name_on_bs"]}')
|
153 |
+
# st.text(f'Address: {data["address"]}')
|
154 |
+
# st.text(f'Bank: {data["bank"]}')
|
155 |
+
# st.text(f'Date: {data["date"]}')
|
156 |
+
# st.text(f'Asset: {data["asset"]} hkd')
|
157 |
+
# st.text(f'Liabilities: {data["liabilities"]} hkd')
|
158 |
+
|
159 |
+
# print(f'data outside 2 {data}')
|
160 |
+
if st.button("Confirm"):
|
161 |
+
# print(f'data outside 3 {data}')
|
162 |
+
with st.spinner('Sending data...'):
|
163 |
+
sbt.split_data(data)
|
164 |
+
st.success('Done!')
|
165 |
+
|
166 |
+
if __name__ == '__main__':
|
167 |
+
main()
|
168 |
+
|
169 |
+
|
170 |
+
|
171 |
+
# def save_image(image):
|
172 |
+
# try:
|
173 |
+
# temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.jpg')
|
174 |
+
# Image.save(temp_file.name)
|
175 |
+
# return temp_file.name
|
176 |
+
# except IOError:
|
177 |
+
# print("Unable to save image to temporary file")
|
178 |
+
# return None
|
179 |
+
|
180 |
+
# json_file = 'request json\request_legalDocument.json'
|
181 |
+
# file = open(json_file, 'r')
|
182 |
+
# data = json.load(file)
|
183 |
+
# file.close()
|
184 |
+
# # Update data
|
185 |
+
# data.update(new_data)
|
186 |
+
# file = open(json_file, 'w')
|
187 |
+
# for item in data['request']['body']['formdata']:
|
188 |
+
# if item["key"] == "requestId":
|
189 |
+
# item["value"] = ""
|
190 |
+
# elif item["key"] == "userId":
|
191 |
+
# item["value"] = generate_token_id(2048)
|
192 |
+
# elif item["key"] == "endpoint":
|
193 |
+
# item["value"] = ""
|
194 |
+
# elif item["key"] == "apiType":
|
195 |
+
# item["value"] = ""
|
196 |
+
# elif item["key"] == "docType":
|
197 |
+
# item["value"] = "HKID"
|
198 |
+
# elif item["key"] == "nameDoc":
|
199 |
+
# item["value"] = new_data["name_on_id"]
|
200 |
+
# elif item["key"] == "docID":
|
201 |
+
# item["value"] = new_data["name_on_id"]
|
202 |
+
# elif item["key"] == "docValidity":
|
203 |
+
# item["value"] = new_data["validity"]
|
204 |
+
# elif item["key"] == "dateOfIssue":
|
205 |
+
# item["value"] = new_data["date_issue"]
|
206 |
+
# elif item["key"] == "matchingScore":
|
207 |
+
# item["value"] = new_data["similarity_score"]
|
208 |
+
# json.dump(data, file)
|
209 |
+
# file.close()
|