romanbredehoft-zama
commited on
Commit
Β·
b47829b
1
Parent(s):
060d7fa
Improve explainability inner workings
Browse files- app.py +82 -23
- backend.py +77 -136
- data/data.csv +47 -47
- data/gpt_data.csv +0 -57
- deployment_files/explain_model/versions.json +0 -1
- deployment_files/{approval_model β model}/client.zip +2 -2
- deployment_files/{approval_model/server.zip β model/pre_processor_bank.pkl} +2 -2
- deployment_files/{explain_model/client.zip β model/pre_processor_third_party.pkl} +2 -2
- deployment_files/model/pre_processor_user.pkl +3 -0
- deployment_files/{explain_model β model}/server.zip +2 -2
- deployment_files/{approval_model β model}/versions.json +0 -0
- deployment_files/pre_processor_third_party.pkl +2 -2
- development.py +16 -68
- server.py +2 -2
- settings.py +15 -13
- utils/pre_processing.py +1 -3
app.py
CHANGED
@@ -10,13 +10,15 @@ from settings import (
|
|
10 |
CHILDREN_MIN_MAX,
|
11 |
INCOME_MIN_MAX,
|
12 |
AGE_MIN_MAX,
|
13 |
-
EMPLOYED_MIN_MAX,
|
14 |
FAMILY_MIN_MAX,
|
15 |
INCOME_TYPES,
|
16 |
OCCUPATION_TYPES,
|
17 |
HOUSING_TYPES,
|
18 |
EDUCATION_TYPES,
|
19 |
FAMILY_STATUS,
|
|
|
|
|
|
|
20 |
)
|
21 |
from backend import (
|
22 |
keygen_send,
|
@@ -25,7 +27,7 @@ from backend import (
|
|
25 |
pre_process_encrypt_send_third_party,
|
26 |
run_fhe,
|
27 |
get_output_and_decrypt,
|
28 |
-
|
29 |
)
|
30 |
|
31 |
|
@@ -97,18 +99,67 @@ with demo:
|
|
97 |
with gr.Row():
|
98 |
with gr.Column():
|
99 |
gr.Markdown("### User")
|
100 |
-
bool_inputs = gr.CheckboxGroup(
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
|
106 |
with gr.Column():
|
107 |
-
income_type = gr.Dropdown(
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
|
113 |
with gr.Column():
|
114 |
encrypt_button_user = gr.Button("Encrypt the inputs and send to server.")
|
@@ -120,7 +171,12 @@ with demo:
|
|
120 |
with gr.Row():
|
121 |
with gr.Column(scale=2):
|
122 |
gr.Markdown("### Bank ")
|
123 |
-
account_age = gr.Slider(
|
|
|
|
|
|
|
|
|
|
|
124 |
|
125 |
with gr.Column():
|
126 |
encrypt_button_bank = gr.Button("Encrypt the inputs and send to server.")
|
@@ -133,7 +189,12 @@ with demo:
|
|
133 |
with gr.Column(scale=2):
|
134 |
gr.Markdown("### Third party ")
|
135 |
employed = gr.Radio(["Yes", "No"], label="Is the person employed ?", value="Yes")
|
136 |
-
years_employed = gr.
|
|
|
|
|
|
|
|
|
|
|
137 |
|
138 |
with gr.Column():
|
139 |
encrypt_button_third_party = gr.Button("Encrypt the inputs and send to server.")
|
@@ -163,7 +224,7 @@ with demo:
|
|
163 |
# client side to the server
|
164 |
encrypt_button_third_party.click(
|
165 |
pre_process_encrypt_send_third_party,
|
166 |
-
inputs=[client_id,
|
167 |
outputs=[encrypted_input_third_party],
|
168 |
)
|
169 |
|
@@ -230,20 +291,18 @@ with demo:
|
|
230 |
parties, runs the new prediction in FHE and decrypts the output.
|
231 |
"""
|
232 |
)
|
233 |
-
|
234 |
"Encrypt the inputs, compute in FHE and decrypt the output."
|
235 |
)
|
236 |
-
|
237 |
label="Additional years of employed required.", max_lines=1, interactive=False
|
238 |
)
|
239 |
|
240 |
# Button to explain the prediction
|
241 |
-
|
242 |
-
|
243 |
-
inputs=[client_id, prediction_output,
|
244 |
-
|
245 |
-
housing_type, account_age, employed, years_employed],
|
246 |
-
outputs=[years_employed_prediction],
|
247 |
)
|
248 |
|
249 |
gr.Markdown(
|
|
|
10 |
CHILDREN_MIN_MAX,
|
11 |
INCOME_MIN_MAX,
|
12 |
AGE_MIN_MAX,
|
|
|
13 |
FAMILY_MIN_MAX,
|
14 |
INCOME_TYPES,
|
15 |
OCCUPATION_TYPES,
|
16 |
HOUSING_TYPES,
|
17 |
EDUCATION_TYPES,
|
18 |
FAMILY_STATUS,
|
19 |
+
YEARS_EMPLOYED_BINS,
|
20 |
+
INCOME_VALUE,
|
21 |
+
AGE_VALUE,
|
22 |
)
|
23 |
from backend import (
|
24 |
keygen_send,
|
|
|
27 |
pre_process_encrypt_send_third_party,
|
28 |
run_fhe,
|
29 |
get_output_and_decrypt,
|
30 |
+
explain_encrypt_run_decrypt,
|
31 |
)
|
32 |
|
33 |
|
|
|
99 |
with gr.Row():
|
100 |
with gr.Column():
|
101 |
gr.Markdown("### User")
|
102 |
+
bool_inputs = gr.CheckboxGroup(
|
103 |
+
["Car", "Property", "Mobile phone"],
|
104 |
+
label="Which of the following do you actively hold or own?"
|
105 |
+
)
|
106 |
+
num_children = gr.Slider(
|
107 |
+
**CHILDREN_MIN_MAX,
|
108 |
+
step=1,
|
109 |
+
label="Number of children",
|
110 |
+
info="How many children do you have ?"
|
111 |
+
)
|
112 |
+
household_size = gr.Slider(
|
113 |
+
**FAMILY_MIN_MAX,
|
114 |
+
step=1,
|
115 |
+
label="Household size",
|
116 |
+
info="How many members does your household have ?"
|
117 |
+
)
|
118 |
+
total_income = gr.Slider(
|
119 |
+
**INCOME_MIN_MAX,
|
120 |
+
value=INCOME_VALUE,
|
121 |
+
label="Income",
|
122 |
+
info="What's you total yearly income (in euros) ?"
|
123 |
+
)
|
124 |
+
age = gr.Slider(
|
125 |
+
**AGE_MIN_MAX,
|
126 |
+
value=AGE_VALUE,
|
127 |
+
step=1,
|
128 |
+
label="Age",
|
129 |
+
info="How old are you ?"
|
130 |
+
)
|
131 |
|
132 |
with gr.Column():
|
133 |
+
income_type = gr.Dropdown(
|
134 |
+
choices=INCOME_TYPES,
|
135 |
+
value=INCOME_TYPES[0],
|
136 |
+
label="Income type",
|
137 |
+
info="What is your main type of income ?"
|
138 |
+
)
|
139 |
+
education_type = gr.Dropdown(
|
140 |
+
choices=EDUCATION_TYPES,
|
141 |
+
value=EDUCATION_TYPES[0],
|
142 |
+
label="Education",
|
143 |
+
info="What is your education background ?"
|
144 |
+
)
|
145 |
+
family_status = gr.Dropdown(
|
146 |
+
choices=FAMILY_STATUS,
|
147 |
+
value=FAMILY_STATUS[0],
|
148 |
+
label="Family",
|
149 |
+
info="What is your family status ?"
|
150 |
+
)
|
151 |
+
occupation_type = gr.Dropdown(
|
152 |
+
choices=OCCUPATION_TYPES,
|
153 |
+
value=OCCUPATION_TYPES[0],
|
154 |
+
label="Occupation",
|
155 |
+
info="What is your main occupation ?"
|
156 |
+
)
|
157 |
+
housing_type = gr.Dropdown(
|
158 |
+
choices=HOUSING_TYPES,
|
159 |
+
value=HOUSING_TYPES[0],
|
160 |
+
label="Housing",
|
161 |
+
info="In what type of housing do you live ?"
|
162 |
+
)
|
163 |
|
164 |
with gr.Column():
|
165 |
encrypt_button_user = gr.Button("Encrypt the inputs and send to server.")
|
|
|
171 |
with gr.Row():
|
172 |
with gr.Column(scale=2):
|
173 |
gr.Markdown("### Bank ")
|
174 |
+
account_age = gr.Slider(
|
175 |
+
**ACCOUNT_MIN_MAX,
|
176 |
+
step=1,
|
177 |
+
label="Account age (months)",
|
178 |
+
info="How long have this person had this bank account (in months) ?"
|
179 |
+
)
|
180 |
|
181 |
with gr.Column():
|
182 |
encrypt_button_bank = gr.Button("Encrypt the inputs and send to server.")
|
|
|
189 |
with gr.Column(scale=2):
|
190 |
gr.Markdown("### Third party ")
|
191 |
employed = gr.Radio(["Yes", "No"], label="Is the person employed ?", value="Yes")
|
192 |
+
years_employed = gr.Dropdown(
|
193 |
+
choices=YEARS_EMPLOYED_BINS,
|
194 |
+
value=YEARS_EMPLOYED_BINS[0],
|
195 |
+
label="Years of employment",
|
196 |
+
info="How long have this person been employed (in years) ?"
|
197 |
+
)
|
198 |
|
199 |
with gr.Column():
|
200 |
encrypt_button_third_party = gr.Button("Encrypt the inputs and send to server.")
|
|
|
224 |
# client side to the server
|
225 |
encrypt_button_third_party.click(
|
226 |
pre_process_encrypt_send_third_party,
|
227 |
+
inputs=[client_id, years_employed, employed],
|
228 |
outputs=[encrypted_input_third_party],
|
229 |
)
|
230 |
|
|
|
291 |
parties, runs the new prediction in FHE and decrypts the output.
|
292 |
"""
|
293 |
)
|
294 |
+
explain_button = gr.Button(
|
295 |
"Encrypt the inputs, compute in FHE and decrypt the output."
|
296 |
)
|
297 |
+
explain_prediction = gr.Textbox(
|
298 |
label="Additional years of employed required.", max_lines=1, interactive=False
|
299 |
)
|
300 |
|
301 |
# Button to explain the prediction
|
302 |
+
explain_button.click(
|
303 |
+
explain_encrypt_run_decrypt,
|
304 |
+
inputs=[client_id, prediction_output, years_employed, employed],
|
305 |
+
outputs=[explain_prediction],
|
|
|
|
|
306 |
)
|
307 |
|
308 |
gr.Markdown(
|
backend.py
CHANGED
@@ -14,26 +14,22 @@ from settings import (
|
|
14 |
FHE_KEYS,
|
15 |
CLIENT_FILES,
|
16 |
SERVER_FILES,
|
17 |
-
|
18 |
-
|
19 |
-
APPROVAL_PROCESSED_INPUT_SHAPE,
|
20 |
-
EXPLAIN_PROCESSED_INPUT_SHAPE,
|
21 |
INPUT_INDEXES,
|
22 |
-
|
23 |
-
EXPLAIN_INPUT_SLICES,
|
24 |
PRE_PROCESSOR_USER_PATH,
|
25 |
PRE_PROCESSOR_BANK_PATH,
|
26 |
PRE_PROCESSOR_THIRD_PARTY_PATH,
|
27 |
CLIENT_TYPES,
|
28 |
USER_COLUMNS,
|
29 |
BANK_COLUMNS,
|
30 |
-
|
|
|
|
|
31 |
)
|
32 |
|
33 |
-
from utils.client_server_interface import MultiInputsFHEModelClient
|
34 |
-
|
35 |
-
# Load the server used for explaining the prediction
|
36 |
-
EXPLAIN_FHE_SERVER = MultiInputsFHEModelServer(EXPLAIN_DEPLOYMENT_PATH)
|
37 |
|
38 |
# Load pre-processor instances
|
39 |
with (
|
@@ -93,22 +89,18 @@ def clean_temporary_files(n_keys=20):
|
|
93 |
shutil.rmtree(directory)
|
94 |
|
95 |
|
96 |
-
def _get_client(client_id
|
97 |
"""Get the client instance.
|
98 |
|
99 |
Args:
|
100 |
client_id (int): The client ID to consider.
|
101 |
-
is_approval (bool): If client is representing the 'approval' model (else, it is
|
102 |
-
representing the 'explain' model). Default to True.
|
103 |
|
104 |
Returns:
|
105 |
FHEModelClient: The client instance.
|
106 |
"""
|
107 |
-
|
108 |
-
key_dir = FHE_KEYS / f"{client_id}_{key_suffix}"
|
109 |
-
client_dir = APPROVAL_DEPLOYMENT_PATH if is_approval else EXPLAIN_DEPLOYMENT_PATH
|
110 |
|
111 |
-
return MultiInputsFHEModelClient(
|
112 |
|
113 |
|
114 |
def _get_client_file_path(name, client_id, client_type=None):
|
@@ -206,7 +198,7 @@ def keygen_send():
|
|
206 |
return client_id, evaluation_key_short, gr.update(value="Keys are generated and evaluation key is sent β
")
|
207 |
|
208 |
|
209 |
-
def _encrypt_send(client_id, inputs, client_type
|
210 |
"""Encrypt the given inputs for a specific client and send it to the server.
|
211 |
|
212 |
Args:
|
@@ -227,8 +219,8 @@ def _encrypt_send(client_id, inputs, client_type, app_mode=True):
|
|
227 |
encrypted_inputs = client.quantize_encrypt_serialize_multi_inputs(
|
228 |
inputs,
|
229 |
input_index=INPUT_INDEXES[client_type],
|
230 |
-
processed_input_shape=
|
231 |
-
input_slice=
|
232 |
)
|
233 |
|
234 |
file_name = "encrypted_inputs"
|
@@ -248,14 +240,15 @@ def _encrypt_send(client_id, inputs, client_type, app_mode=True):
|
|
248 |
return encrypted_inputs_short
|
249 |
|
250 |
|
251 |
-
def
|
252 |
-
"""Pre-process the user inputs.
|
253 |
|
254 |
Args:
|
|
|
255 |
*inputs (Tuple[numpy.ndarray]): The inputs to pre-process.
|
256 |
|
257 |
Returns:
|
258 |
-
(
|
259 |
"""
|
260 |
bool_inputs, num_children, household_size, total_income, age, income_type, education_type, \
|
261 |
family_status, occupation_type, housing_type = inputs
|
@@ -284,32 +277,18 @@ def _pre_process_user(*inputs):
|
|
284 |
|
285 |
preprocessed_user_inputs = PRE_PROCESSOR_USER.transform(user_inputs)
|
286 |
|
287 |
-
return preprocessed_user_inputs
|
288 |
-
|
289 |
-
|
290 |
-
def pre_process_encrypt_send_user(client_id, *inputs):
|
291 |
-
"""Pre-process, encrypt and send the user inputs for a specific client to the server.
|
292 |
-
|
293 |
-
Args:
|
294 |
-
client_id (str): The current client ID to consider.
|
295 |
-
*inputs (Tuple[numpy.ndarray]): The inputs to pre-process.
|
296 |
-
|
297 |
-
Returns:
|
298 |
-
(str): A short representation of the encrypted input to send in hex.
|
299 |
-
"""
|
300 |
-
preprocessed_user_inputs = _pre_process_user(*inputs)
|
301 |
-
|
302 |
return _encrypt_send(client_id, preprocessed_user_inputs, "user")
|
303 |
|
304 |
|
305 |
-
def
|
306 |
-
"""Pre-process the bank inputs.
|
307 |
|
308 |
Args:
|
|
|
309 |
*inputs (Tuple[numpy.ndarray]): The inputs to pre-process.
|
310 |
|
311 |
Returns:
|
312 |
-
(
|
313 |
"""
|
314 |
account_age = inputs[0]
|
315 |
|
@@ -321,52 +300,7 @@ def _pre_process_bank(*inputs):
|
|
321 |
|
322 |
preprocessed_bank_inputs = PRE_PROCESSOR_BANK.transform(bank_inputs)
|
323 |
|
324 |
-
return preprocessed_bank_inputs
|
325 |
-
|
326 |
-
|
327 |
-
def pre_process_encrypt_send_bank(client_id, *inputs):
|
328 |
-
"""Pre-process, encrypt and send the bank inputs for a specific client to the server.
|
329 |
-
|
330 |
-
Args:
|
331 |
-
client_id (str): The current client ID to consider.
|
332 |
-
*inputs (Tuple[numpy.ndarray]): The inputs to pre-process.
|
333 |
-
|
334 |
-
Returns:
|
335 |
-
(str): A short representation of the encrypted input to send in hex.
|
336 |
-
"""
|
337 |
-
preprocessed_bank_inputs = _pre_process_bank(*inputs)
|
338 |
-
|
339 |
return _encrypt_send(client_id, preprocessed_bank_inputs, "bank")
|
340 |
-
|
341 |
-
|
342 |
-
def _pre_process_third_party(*inputs):
|
343 |
-
"""Pre-process the third party inputs.
|
344 |
-
|
345 |
-
Args:
|
346 |
-
*inputs (Tuple[numpy.ndarray]): The inputs to pre-process.
|
347 |
-
|
348 |
-
Returns:
|
349 |
-
(numpy.ndarray): The pre-processed inputs.
|
350 |
-
"""
|
351 |
-
third_party_data = {}
|
352 |
-
if len(inputs) == 1:
|
353 |
-
employed = inputs[0]
|
354 |
-
else:
|
355 |
-
employed, years_employed = inputs
|
356 |
-
third_party_data["Years_employed"] = [years_employed]
|
357 |
-
|
358 |
-
is_employed = employed == "Yes"
|
359 |
-
third_party_data["Employed"] = [is_employed]
|
360 |
-
|
361 |
-
third_party_inputs = pandas.DataFrame(third_party_data)
|
362 |
-
|
363 |
-
if len(inputs) == 1:
|
364 |
-
preprocessed_third_party_inputs = third_party_inputs.to_numpy()
|
365 |
-
else:
|
366 |
-
third_party_inputs = third_party_inputs.reindex(APPROVAL_THIRD_PARTY_COLUMNS, axis=1)
|
367 |
-
preprocessed_third_party_inputs = PRE_PROCESSOR_THIRD_PARTY.transform(third_party_inputs)
|
368 |
-
|
369 |
-
return preprocessed_third_party_inputs
|
370 |
|
371 |
|
372 |
def pre_process_encrypt_send_third_party(client_id, *inputs):
|
@@ -379,7 +313,18 @@ def pre_process_encrypt_send_third_party(client_id, *inputs):
|
|
379 |
Returns:
|
380 |
(str): A short representation of the encrypted input to send in hex.
|
381 |
"""
|
382 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
383 |
|
384 |
return _encrypt_send(client_id, preprocessed_third_party_inputs, "third_party")
|
385 |
|
@@ -459,7 +404,7 @@ def get_output_and_decrypt(client_id):
|
|
459 |
raise gr.Error("Please run the FHE execution first and wait for it to be completed.")
|
460 |
|
461 |
|
462 |
-
def
|
463 |
"""Pre-process and encrypt the inputs, run the prediction in FHE and decrypt the output.
|
464 |
|
465 |
Args:
|
@@ -470,8 +415,7 @@ def years_employed_encrypt_run_decrypt(client_id, prediction_output, *inputs):
|
|
470 |
|
471 |
Returns:
|
472 |
(str): A message indicating the number of additional years of employment that could be
|
473 |
-
required in order to increase the chance of
|
474 |
-
credit card approval.
|
475 |
"""
|
476 |
|
477 |
if "approved" in prediction_output:
|
@@ -479,63 +423,60 @@ def years_employed_encrypt_run_decrypt(client_id, prediction_output, *inputs):
|
|
479 |
"Explaining the prediction can only be done if the credit card is likely to be denied."
|
480 |
)
|
481 |
|
482 |
-
# Retrieve the
|
483 |
-
|
484 |
|
485 |
-
#
|
486 |
-
|
|
|
487 |
|
488 |
-
#
|
489 |
-
|
|
|
|
|
490 |
|
491 |
-
|
492 |
-
|
|
|
|
|
|
|
|
|
|
|
493 |
|
494 |
-
|
495 |
-
|
496 |
-
family_status, occupation_type, housing_type,
|
497 |
-
)
|
498 |
-
preprocessed_bank_inputs = _pre_process_bank(account_age)
|
499 |
-
preprocessed_third_party_inputs = _pre_process_third_party(employed)
|
500 |
|
501 |
-
|
502 |
-
|
503 |
-
preprocessed_bank_inputs,
|
504 |
-
preprocessed_third_party_inputs
|
505 |
-
]
|
506 |
|
507 |
-
|
508 |
-
|
509 |
-
|
510 |
-
|
511 |
-
|
512 |
-
|
513 |
-
|
514 |
-
|
515 |
-
)
|
516 |
-
encrypted_inputs.append(encrypted_input)
|
517 |
-
|
518 |
-
# Run the FHE computation
|
519 |
-
encrypted_output = EXPLAIN_FHE_SERVER.run(
|
520 |
-
*encrypted_inputs,
|
521 |
-
serialized_evaluation_keys=evaluation_key
|
522 |
-
)
|
523 |
|
524 |
-
|
525 |
-
|
|
|
526 |
|
527 |
-
|
528 |
-
|
|
|
|
|
529 |
|
530 |
-
if years_employed_diff > 0:
|
531 |
return (
|
532 |
-
f"
|
533 |
-
"
|
|
|
534 |
)
|
535 |
|
536 |
return (
|
537 |
-
"
|
538 |
-
"might
|
|
|
539 |
)
|
540 |
-
|
541 |
|
|
|
14 |
FHE_KEYS,
|
15 |
CLIENT_FILES,
|
16 |
SERVER_FILES,
|
17 |
+
DEPLOYMENT_PATH,
|
18 |
+
PROCESSED_INPUT_SHAPE,
|
|
|
|
|
19 |
INPUT_INDEXES,
|
20 |
+
INPUT_SLICES,
|
|
|
21 |
PRE_PROCESSOR_USER_PATH,
|
22 |
PRE_PROCESSOR_BANK_PATH,
|
23 |
PRE_PROCESSOR_THIRD_PARTY_PATH,
|
24 |
CLIENT_TYPES,
|
25 |
USER_COLUMNS,
|
26 |
BANK_COLUMNS,
|
27 |
+
THIRD_PARTY_COLUMNS,
|
28 |
+
YEARS_EMPLOYED_BINS,
|
29 |
+
YEARS_EMPLOYED_BIN_NAME_TO_INDEX,
|
30 |
)
|
31 |
|
32 |
+
from utils.client_server_interface import MultiInputsFHEModelClient
|
|
|
|
|
|
|
33 |
|
34 |
# Load pre-processor instances
|
35 |
with (
|
|
|
89 |
shutil.rmtree(directory)
|
90 |
|
91 |
|
92 |
+
def _get_client(client_id):
|
93 |
"""Get the client instance.
|
94 |
|
95 |
Args:
|
96 |
client_id (int): The client ID to consider.
|
|
|
|
|
97 |
|
98 |
Returns:
|
99 |
FHEModelClient: The client instance.
|
100 |
"""
|
101 |
+
key_dir = FHE_KEYS / f"{client_id}"
|
|
|
|
|
102 |
|
103 |
+
return MultiInputsFHEModelClient(DEPLOYMENT_PATH, key_dir=key_dir, nb_inputs=len(CLIENT_TYPES))
|
104 |
|
105 |
|
106 |
def _get_client_file_path(name, client_id, client_type=None):
|
|
|
198 |
return client_id, evaluation_key_short, gr.update(value="Keys are generated and evaluation key is sent β
")
|
199 |
|
200 |
|
201 |
+
def _encrypt_send(client_id, inputs, client_type):
|
202 |
"""Encrypt the given inputs for a specific client and send it to the server.
|
203 |
|
204 |
Args:
|
|
|
219 |
encrypted_inputs = client.quantize_encrypt_serialize_multi_inputs(
|
220 |
inputs,
|
221 |
input_index=INPUT_INDEXES[client_type],
|
222 |
+
processed_input_shape=PROCESSED_INPUT_SHAPE,
|
223 |
+
input_slice=INPUT_SLICES[client_type],
|
224 |
)
|
225 |
|
226 |
file_name = "encrypted_inputs"
|
|
|
240 |
return encrypted_inputs_short
|
241 |
|
242 |
|
243 |
+
def pre_process_encrypt_send_user(client_id, *inputs):
|
244 |
+
"""Pre-process, encrypt and send the user inputs for a specific client to the server.
|
245 |
|
246 |
Args:
|
247 |
+
client_id (str): The current client ID to consider.
|
248 |
*inputs (Tuple[numpy.ndarray]): The inputs to pre-process.
|
249 |
|
250 |
Returns:
|
251 |
+
(str): A short representation of the encrypted input to send in hex.
|
252 |
"""
|
253 |
bool_inputs, num_children, household_size, total_income, age, income_type, education_type, \
|
254 |
family_status, occupation_type, housing_type = inputs
|
|
|
277 |
|
278 |
preprocessed_user_inputs = PRE_PROCESSOR_USER.transform(user_inputs)
|
279 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
280 |
return _encrypt_send(client_id, preprocessed_user_inputs, "user")
|
281 |
|
282 |
|
283 |
+
def pre_process_encrypt_send_bank(client_id, *inputs):
|
284 |
+
"""Pre-process, encrypt and send the bank inputs for a specific client to the server.
|
285 |
|
286 |
Args:
|
287 |
+
client_id (str): The current client ID to consider.
|
288 |
*inputs (Tuple[numpy.ndarray]): The inputs to pre-process.
|
289 |
|
290 |
Returns:
|
291 |
+
(str): A short representation of the encrypted input to send in hex.
|
292 |
"""
|
293 |
account_age = inputs[0]
|
294 |
|
|
|
300 |
|
301 |
preprocessed_bank_inputs = PRE_PROCESSOR_BANK.transform(bank_inputs)
|
302 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
303 |
return _encrypt_send(client_id, preprocessed_bank_inputs, "bank")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
304 |
|
305 |
|
306 |
def pre_process_encrypt_send_third_party(client_id, *inputs):
|
|
|
313 |
Returns:
|
314 |
(str): A short representation of the encrypted input to send in hex.
|
315 |
"""
|
316 |
+
years_employed_bin, employed = inputs
|
317 |
+
|
318 |
+
years_employed = YEARS_EMPLOYED_BIN_NAME_TO_INDEX[years_employed_bin]
|
319 |
+
is_employed = employed == "Yes"
|
320 |
+
|
321 |
+
third_party_inputs = pandas.DataFrame({
|
322 |
+
"Years_employed": [years_employed],
|
323 |
+
"Employed": [is_employed],
|
324 |
+
})
|
325 |
+
|
326 |
+
third_party_inputs = third_party_inputs.reindex(THIRD_PARTY_COLUMNS, axis=1)
|
327 |
+
preprocessed_third_party_inputs = PRE_PROCESSOR_THIRD_PARTY.transform(third_party_inputs)
|
328 |
|
329 |
return _encrypt_send(client_id, preprocessed_third_party_inputs, "third_party")
|
330 |
|
|
|
404 |
raise gr.Error("Please run the FHE execution first and wait for it to be completed.")
|
405 |
|
406 |
|
407 |
+
def explain_encrypt_run_decrypt(client_id, prediction_output, *inputs):
|
408 |
"""Pre-process and encrypt the inputs, run the prediction in FHE and decrypt the output.
|
409 |
|
410 |
Args:
|
|
|
415 |
|
416 |
Returns:
|
417 |
(str): A message indicating the number of additional years of employment that could be
|
418 |
+
required in order to increase the chance of credit card approval.
|
|
|
419 |
"""
|
420 |
|
421 |
if "approved" in prediction_output:
|
|
|
423 |
"Explaining the prediction can only be done if the credit card is likely to be denied."
|
424 |
)
|
425 |
|
426 |
+
# Retrieve the third party inputs
|
427 |
+
years_employed, employed = inputs
|
428 |
|
429 |
+
# Years_employed is divided into several ordered bins. Here, we retrieve the index representing
|
430 |
+
# the bin from the input
|
431 |
+
bin_index = YEARS_EMPLOYED_BIN_NAME_TO_INDEX[years_employed]
|
432 |
|
433 |
+
# If the bin is not the last (representing the most years of employment), we run the model in
|
434 |
+
# FHE for each bins "older" than the given bin, in order. Then, we retrieve the first bin that
|
435 |
+
# changes the model's prediction to "approval" and display it to the user.
|
436 |
+
if bin_index != len(YEARS_EMPLOYED_BINS) - 1:
|
437 |
|
438 |
+
output_predictions = []
|
439 |
+
|
440 |
+
# Loop over the bins "older" than the input one
|
441 |
+
for years_employed_bin in YEARS_EMPLOYED_BINS[bin_index+1:]:
|
442 |
+
|
443 |
+
# Send the new encrypted input
|
444 |
+
pre_process_encrypt_send_third_party(client_id, years_employed_bin, employed)
|
445 |
|
446 |
+
# Run the model in FHE
|
447 |
+
run_fhe(client_id)
|
|
|
|
|
|
|
|
|
448 |
|
449 |
+
# Retrieve the new prediction
|
450 |
+
output_prediction = get_output_and_decrypt(client_id)
|
|
|
|
|
|
|
451 |
|
452 |
+
is_approved = "approved" in output_prediction[0]
|
453 |
+
output_predictions.append(is_approved)
|
454 |
+
|
455 |
+
# Re-send the initial third party inputs in order to avoid unwanted conflict (as sending
|
456 |
+
# some inputs basically re-writes the associated file on the server side)
|
457 |
+
pre_process_encrypt_send_third_party(client_id, years_employed, employed)
|
458 |
+
|
459 |
+
# In case the model predicted at least one approval
|
460 |
+
if any(output_predictions):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
461 |
|
462 |
+
# Retrieve the first bin that made the model predict an approval
|
463 |
+
first_approved_prediction_index = numpy.argmax(output_predictions)
|
464 |
+
years_employed_bin_needed = YEARS_EMPLOYED_BINS[first_approved_prediction_index + bin_index + 1]
|
465 |
|
466 |
+
return (
|
467 |
+
f"Having at least {years_employed_bin_needed} more years of employment would "
|
468 |
+
"increase your chance of having your credit card approved."
|
469 |
+
)
|
470 |
|
|
|
471 |
return (
|
472 |
+
f"Increasing the number of years of employment up to {YEARS_EMPLOYED_BINS[-1]} years "
|
473 |
+
"does not seem to be enough to get an approval based on the given inputs. Other inputs "
|
474 |
+
"like the income or the account's age might have bigger impact in this particular case."
|
475 |
)
|
476 |
|
477 |
return (
|
478 |
+
f"You already have the maximum amount of years of employment ({years_employed} years). "
|
479 |
+
"Other inputs like the income or the account's age might have bigger impact in this "
|
480 |
+
"particular case."
|
481 |
)
|
|
|
482 |
|
data/data.csv
CHANGED
@@ -1,57 +1,57 @@
|
|
1 |
Own_car,Own_property,Mobile_phone,Num_children,Household_size,Total_income,Age,Income_type,Education_type,Family_status,Occupation_type,Housing_type,Account_age,Employed,Years_employed,Target
|
2 |
-
1,1,1,2,4,27500.000000000004,35,Salaried,Higher education,Married,Engineer,House / apartment,36,1,
|
3 |
0,0,1,0,1,11000.0,50,Pensioner,Secondary,Widow,Retired,Rented apartment,12,0,0,0
|
4 |
-
1,1,0,1,3,41250.0,40,Self-employed,Higher education,Married,Business Owner,House / apartment,48,1,
|
5 |
-
0,1,0,1,2,16500.0,28,Salaried,Secondary,Single,Teacher,With parents,24,1,
|
6 |
-
1,0,1,0,1,24750.000000000004,32,Self-employed,Higher education,Divorced,Entrepreneur,Rented apartment,60,1,
|
7 |
-
0,1,1,3,5,19250.0,45,Salaried,Incomplete higher,Married,Office Worker,House / apartment,30,1,
|
8 |
1,0,0,0,1,13750.000000000002,55,Pensioner,Lower secondary,Widow,Retired,House / apartment,120,0,0,0
|
9 |
-
0,1,1,2,3,33000.0,37,Salaried,Higher education,Married,Manager,House / apartment,36,1,
|
10 |
-
1,1,1,0,1,44000.0,29,Self-employed,Higher education,Single,Entrepreneur,House / apartment,48,1,
|
11 |
-
0,1,1,4,6,22000.0,38,Salaried,Secondary,Married,Salesperson,With parents,60,1,
|
12 |
-
1,0,0,1,2,30250.000000000004,26,Self-employed,Higher education,Married,Designer,Rented apartment,12,1,
|
13 |
0,0,0,0,1,16500.0,60,Pensioner,Lower secondary,Widow,Retired,House / apartment,180,0,0,0
|
14 |
-
1,1,1,2,4,38500.0,45,Salaried,Higher education,Married,Manager,House / apartment,72,1,
|
15 |
-
0,1,1,3,5,35750.0,50,Salaried,Incomplete higher,Divorced,Teacher,House / apartment,96,1,
|
16 |
-
1,0,1,0,1,46750.00000000001,33,Self-employed,Higher education,Single,Consultant,Rented apartment,36,1,
|
17 |
0,1,0,1,3,17600.0,55,Pensioner,Secondary,Widow,Retired,House / apartment,150,0,0,0
|
18 |
-
1,1,1,2,4,49500.00000000001,41,Salaried,Higher education,Married,Doctor,House / apartment,60,1,
|
19 |
0,0,1,0,1,12650.000000000002,24,Student,Incomplete higher,Single,Student,With parents,6,0,0,0
|
20 |
-
1,0,0,1,3,26400.000000000004,35,Self-employed,Secondary,Divorced,Cooking Staff,Rented apartment,24,1,
|
21 |
-
0,1,1,3,5,30250.000000000004,50,Salaried,Higher education,Widow,Engineer,House / apartment,120,1,
|
22 |
-
1,0,1,0,2,38500.0,30,Salaried,Higher education,Married,Lawyer,Rented apartment,36,1,
|
23 |
-
0,1,0,2,4,17600.0,40,Self-employed,Secondary,Married,Carpenter,House / apartment,48,1,
|
24 |
1,0,1,1,2,14300.000000000002,29,Student,Higher education,Single,Student,With parents,12,0,0,0
|
25 |
0,1,1,0,1,24750.000000000004,55,Pensioner,Lower secondary,Divorced,Retired,House / apartment,180,0,0,0
|
26 |
-
1,1,1,1,3,44000.0,46,Salaried,Higher education,Married,Architect,House / apartment,72,1,
|
27 |
-
0,0,0,2,5,20350.0,33,Self-employed,Secondary,Single,Plumber,With parents,24,1,
|
28 |
-
1,1,1,0,1,35750.0,28,Self-employed,Higher education,Single,Designer,Rented apartment,36,1,
|
29 |
0,0,0,1,2,15950.000000000002,53,Pensioner,Incomplete higher,Widow,Retired,House / apartment,144,0,0,0
|
30 |
-
1,1,0,3,6,31900.000000000004,39,Salaried,Secondary,Divorced,Teacher,House / apartment,60,1,
|
31 |
-
0,1,1,0,1,22000.0,31,Salaried,Higher education,Single,Nurse,Rented apartment,48,1,
|
32 |
-
1,0,1,2,4,28050.000000000004,42,Self-employed,Higher education,Married,Business Owner,House / apartment,60,1,
|
33 |
-
0,1,0,1,3,18700.0,47,Salaried,Secondary,Married,Factory Worker,With parents,72,1,
|
34 |
-
0,0,1,0,1,46750.00000000001,25,Self-employed,Higher education,Single,Entrepreneur,Rented apartment,12,1,
|
35 |
-
1,1,0,3,5,16500.0,55,Salaried,Secondary,Married,Teacher,House / apartment,240,1,
|
36 |
-
0,0,1,1,2,24750.000000000004,40,Salaried,Higher education,Divorced,Nurse,With parents,36,1,
|
37 |
1,0,1,0,1,11000.0,50,Pensioner,Lower secondary,Widow,Retired,Rented apartment,60,0,0,0
|
38 |
-
0,1,0,2,4,19250.0,30,Salaried,Higher education,Married,Office Worker,House / apartment,24,1,
|
39 |
-
1,0,1,1,3,33000.0,33,Self-employed,Secondary,Single,Entrepreneur,Rented apartment,48,1,
|
40 |
-
0,0,0,0,1,41250.0,28,Salaried,Higher education,Single,Manager,With parents,12,1,
|
41 |
-
1,1,1,3,6,22000.0,45,Salaried,Incomplete higher,Married,Salesperson,House / apartment,96,1,
|
42 |
-
1,0,1,0,1,16500.0,65,Pensioner,Higher education,Widow,Retired,House / apartment,420,0,
|
43 |
-
0,1,0,1,2,12100.000000000002,26,Salaried,Secondary,Single,Office Worker,With parents,24,1,
|
44 |
-
1,0,1,2,4,9900.0,35,Salaried,Lower secondary,Married,Factory Worker,House / apartment,60,1,
|
45 |
-
0,0,1,0,1,27500.000000000004,55,Self-employed,Higher education,Divorced,Consultant,Rented apartment,180,1,
|
46 |
-
1,1,0,3,5,13750.000000000002,40,Salaried,Incomplete higher,Married,Cooking Staff,House / apartment,120,1,
|
47 |
-
0,1,1,1,2,19250.0,28,Self-employed,Secondary,Single,Designer,Rented apartment,36,1,
|
48 |
-
1,0,0,0,1,24750.000000000004,30,Salaried,Higher education,Married,Engineer,With parents,48,1,
|
49 |
-
0,1,1,2,3,14850.000000000002,48,Salaried,Secondary,Divorced,Salesperson,House / apartment,240,1,
|
50 |
-
1,0,1,2,4,22000.0,45,Self-employed,Secondary,Married,Artist,House / apartment,120,1,
|
51 |
-
0,1,0,0,1,30250.000000000004,50,Salaried,Higher education,Widow,Scientist,Rented apartment,300,0,
|
52 |
-
1,0,1,1,2,14300.000000000002,27,Salaried,Incomplete higher,Single,Accountant,With parents,36,1,
|
53 |
-
0,0,0,3,6,12100.000000000002,38,Salaried,Lower secondary,Divorced,Bus Driver,House / apartment,72,1,
|
54 |
-
1,1,0,0,1,17600.0,55,Pensioner,Higher education,Married,Retired,House / apartment,420,0,
|
55 |
-
0,1,1,2,3,15950.000000000002,33,Self-employed,Secondary,Married,Business Owner,Rented apartment,60,1,
|
56 |
-
1,0,1,1,2,37400.0,29,Salaried,Higher education,Single,Software Developer,With parents,48,1,
|
57 |
-
0,0,0,0,1,19800.0,42,Self-employed,Incomplete higher,Divorced,Entrepreneur,House / apartment,96,1,
|
|
|
1 |
Own_car,Own_property,Mobile_phone,Num_children,Household_size,Total_income,Age,Income_type,Education_type,Family_status,Occupation_type,Housing_type,Account_age,Employed,Years_employed,Target
|
2 |
+
1,1,1,2,4,27500.000000000004,35,Salaried,Higher education,Married,Engineer,House / apartment,36,1,1,1
|
3 |
0,0,1,0,1,11000.0,50,Pensioner,Secondary,Widow,Retired,Rented apartment,12,0,0,0
|
4 |
+
1,1,0,1,3,41250.0,40,Self-employed,Higher education,Married,Business Owner,House / apartment,48,1,3,1
|
5 |
+
0,1,0,1,2,16500.0,28,Salaried,Secondary,Single,Teacher,With parents,24,1,1,1
|
6 |
+
1,0,1,0,1,24750.000000000004,32,Self-employed,Higher education,Divorced,Entrepreneur,Rented apartment,60,1,2,1
|
7 |
+
0,1,1,3,5,19250.0,45,Salaried,Incomplete higher,Married,Office Worker,House / apartment,30,1,3,1
|
8 |
1,0,0,0,1,13750.000000000002,55,Pensioner,Lower secondary,Widow,Retired,House / apartment,120,0,0,0
|
9 |
+
0,1,1,2,3,33000.0,37,Salaried,Higher education,Married,Manager,House / apartment,36,1,4,1
|
10 |
+
1,1,1,0,1,44000.0,29,Self-employed,Higher education,Single,Entrepreneur,House / apartment,48,1,2,1
|
11 |
+
0,1,1,4,6,22000.0,38,Salaried,Secondary,Married,Salesperson,With parents,60,1,4,1
|
12 |
+
1,0,0,1,2,30250.000000000004,26,Self-employed,Higher education,Married,Designer,Rented apartment,12,1,1,1
|
13 |
0,0,0,0,1,16500.0,60,Pensioner,Lower secondary,Widow,Retired,House / apartment,180,0,0,0
|
14 |
+
1,1,1,2,4,38500.0,45,Salaried,Higher education,Married,Manager,House / apartment,72,1,5,1
|
15 |
+
0,1,1,3,5,35750.0,50,Salaried,Incomplete higher,Divorced,Teacher,House / apartment,96,1,5,1
|
16 |
+
1,0,1,0,1,46750.00000000001,33,Self-employed,Higher education,Single,Consultant,Rented apartment,36,1,3,1
|
17 |
0,1,0,1,3,17600.0,55,Pensioner,Secondary,Widow,Retired,House / apartment,150,0,0,0
|
18 |
+
1,1,1,2,4,49500.00000000001,41,Salaried,Higher education,Married,Doctor,House / apartment,60,1,4,1
|
19 |
0,0,1,0,1,12650.000000000002,24,Student,Incomplete higher,Single,Student,With parents,6,0,0,0
|
20 |
+
1,0,0,1,3,26400.000000000004,35,Self-employed,Secondary,Divorced,Cooking Staff,Rented apartment,24,1,1,1
|
21 |
+
0,1,1,3,5,30250.000000000004,50,Salaried,Higher education,Widow,Engineer,House / apartment,120,1,5,1
|
22 |
+
1,0,1,0,2,38500.0,30,Salaried,Higher education,Married,Lawyer,Rented apartment,36,1,2,1
|
23 |
+
0,1,0,2,4,17600.0,40,Self-employed,Secondary,Married,Carpenter,House / apartment,48,1,4,1
|
24 |
1,0,1,1,2,14300.000000000002,29,Student,Higher education,Single,Student,With parents,12,0,0,0
|
25 |
0,1,1,0,1,24750.000000000004,55,Pensioner,Lower secondary,Divorced,Retired,House / apartment,180,0,0,0
|
26 |
+
1,1,1,1,3,44000.0,46,Salaried,Higher education,Married,Architect,House / apartment,72,1,4,1
|
27 |
+
0,0,0,2,5,20350.0,33,Self-employed,Secondary,Single,Plumber,With parents,24,1,2,1
|
28 |
+
1,1,1,0,1,35750.0,28,Self-employed,Higher education,Single,Designer,Rented apartment,36,1,1,1
|
29 |
0,0,0,1,2,15950.000000000002,53,Pensioner,Incomplete higher,Widow,Retired,House / apartment,144,0,0,0
|
30 |
+
1,1,0,3,6,31900.000000000004,39,Salaried,Secondary,Divorced,Teacher,House / apartment,60,1,3,1
|
31 |
+
0,1,1,0,1,22000.0,31,Salaried,Higher education,Single,Nurse,Rented apartment,48,1,2,1
|
32 |
+
1,0,1,2,4,28050.000000000004,42,Self-employed,Higher education,Married,Business Owner,House / apartment,60,1,4,1
|
33 |
+
0,1,0,1,3,18700.0,47,Salaried,Secondary,Married,Factory Worker,With parents,72,1,3,0
|
34 |
+
0,0,1,0,1,46750.00000000001,25,Self-employed,Higher education,Single,Entrepreneur,Rented apartment,12,1,0,1
|
35 |
+
1,1,0,3,5,16500.0,55,Salaried,Secondary,Married,Teacher,House / apartment,240,1,5,1
|
36 |
+
0,0,1,1,2,24750.000000000004,40,Salaried,Higher education,Divorced,Nurse,With parents,36,1,3,1
|
37 |
1,0,1,0,1,11000.0,50,Pensioner,Lower secondary,Widow,Retired,Rented apartment,60,0,0,0
|
38 |
+
0,1,0,2,4,19250.0,30,Salaried,Higher education,Married,Office Worker,House / apartment,24,1,1,1
|
39 |
+
1,0,1,1,3,33000.0,33,Self-employed,Secondary,Single,Entrepreneur,Rented apartment,48,1,2,0
|
40 |
+
0,0,0,0,1,41250.0,28,Salaried,Higher education,Single,Manager,With parents,12,1,1,1
|
41 |
+
1,1,1,3,6,22000.0,45,Salaried,Incomplete higher,Married,Salesperson,House / apartment,96,1,4,1
|
42 |
+
1,0,1,0,1,16500.0,65,Pensioner,Higher education,Widow,Retired,House / apartment,420,0,5,1
|
43 |
+
0,1,0,1,2,12100.000000000002,26,Salaried,Secondary,Single,Office Worker,With parents,24,1,0,0
|
44 |
+
1,0,1,2,4,9900.0,35,Salaried,Lower secondary,Married,Factory Worker,House / apartment,60,1,3,1
|
45 |
+
0,0,1,0,1,27500.000000000004,55,Self-employed,Higher education,Divorced,Consultant,Rented apartment,180,1,5,1
|
46 |
+
1,1,0,3,5,13750.000000000002,40,Salaried,Incomplete higher,Married,Cooking Staff,House / apartment,120,1,4,0
|
47 |
+
0,1,1,1,2,19250.0,28,Self-employed,Secondary,Single,Designer,Rented apartment,36,1,1,1
|
48 |
+
1,0,0,0,1,24750.000000000004,30,Salaried,Higher education,Married,Engineer,With parents,48,1,2,1
|
49 |
+
0,1,1,2,3,14850.000000000002,48,Salaried,Secondary,Divorced,Salesperson,House / apartment,240,1,5,0
|
50 |
+
1,0,1,2,4,22000.0,45,Self-employed,Secondary,Married,Artist,House / apartment,120,1,4,1
|
51 |
+
0,1,0,0,1,30250.000000000004,50,Salaried,Higher education,Widow,Scientist,Rented apartment,300,0,5,1
|
52 |
+
1,0,1,1,2,14300.000000000002,27,Salaried,Incomplete higher,Single,Accountant,With parents,36,1,1,0
|
53 |
+
0,0,0,3,6,12100.000000000002,38,Salaried,Lower secondary,Divorced,Bus Driver,House / apartment,72,1,3,1
|
54 |
+
1,1,0,0,1,17600.0,55,Pensioner,Higher education,Married,Retired,House / apartment,420,0,5,0
|
55 |
+
0,1,1,2,3,15950.000000000002,33,Self-employed,Secondary,Married,Business Owner,Rented apartment,60,1,2,1
|
56 |
+
1,0,1,1,2,37400.0,29,Salaried,Higher education,Single,Software Developer,With parents,48,1,2,1
|
57 |
+
0,0,0,0,1,19800.0,42,Self-employed,Incomplete higher,Divorced,Entrepreneur,House / apartment,96,1,4,0
|
data/gpt_data.csv
DELETED
@@ -1,57 +0,0 @@
|
|
1 |
-
ID,Car,Property,Work phone,Phone,Email,Number of children,Household size,Income,Age,Income type,Education,Family,Occupation,Housing,Account age (months),Employed,Years of employment,Credit card Approval
|
2 |
-
0,Yes,Yes,No,Yes,Yes,2,4,50000,35,Salaried,Higher education,Married,Engineer,House / apartment,36,Yes,5,Yes
|
3 |
-
1,No,No,Yes,Yes,No,0,1,20000,50,Pensioner,Secondary,Widow,Retired,Rented apartment,12,No,0,No
|
4 |
-
2,Yes,Yes,No,No,Yes,1,3,75000,40,Self-employed,Higher education,Civil marriage,Business Owner,House / apartment,48,Yes,10,Yes
|
5 |
-
3,No,Yes,Yes,No,No,1,2,30000,28,Salaried,Secondary,Single,Teacher,With parents,24,Yes,3,Yes
|
6 |
-
4,Yes,No,No,Yes,Yes,0,1,45000,32,Self-employed,Higher education,Divorced,Freelancer,Rented apartment,60,Yes,7,Yes
|
7 |
-
5,No,Yes,Yes,Yes,No,3,5,35000,45,Salaried,Incomplete higher,Married,Clerk,House / apartment,30,Yes,10,Yes
|
8 |
-
6,Yes,No,No,No,Yes,0,1,25000,55,Pensioner,Lower secondary,Widow,Retired,House / apartment,120,No,0,No
|
9 |
-
7,No,Yes,Yes,Yes,Yes,2,3,60000,37,Salaried,Higher education,Civil marriage,Manager,House / apartment,36,Yes,12,Yes
|
10 |
-
8,Yes,Yes,Yes,Yes,Yes,0,1,80000,29,Self-employed,Higher education,Single,Entrepreneur,House / apartment,48,Yes,6,Yes
|
11 |
-
9,No,Yes,No,Yes,No,4,6,40000,38,Salaried,Secondary,Married,Salesperson,With parents,60,Yes,15,Yes
|
12 |
-
10,Yes,No,Yes,No,Yes,1,2,55000,26,Self-employed,Higher education,Civil marriage,Designer,Rented apartment,12,Yes,3,Yes
|
13 |
-
11,No,No,No,No,No,0,1,30000,60,Pensioner,Lower secondary,Widow,Retired,House / apartment,180,No,0,No
|
14 |
-
12,Yes,Yes,Yes,Yes,Yes,2,4,70000,45,Salaried,Higher education,Married,Manager,House / apartment,72,Yes,20,Yes
|
15 |
-
13,No,Yes,Yes,Yes,Yes,3,5,65000,50,Salaried,Incomplete higher,Divorced,Teacher,House / apartment,96,Yes,25,Yes
|
16 |
-
14,Yes,No,Yes,Yes,No,0,1,85000,33,Self-employed,Higher education,Single,Consultant,Rented apartment,36,Yes,10,Yes
|
17 |
-
15,No,Yes,No,No,Yes,1,3,32000,55,Pensioner,Secondary,Widow,Retired,House / apartment,150,No,0,No
|
18 |
-
16,Yes,Yes,Yes,Yes,Yes,2,4,90000,41,Salaried,Higher education,Married,Doctor,House / apartment,60,Yes,15,Yes
|
19 |
-
17,No,No,No,Yes,Yes,0,1,23000,24,Student,Incomplete higher,Single,Student,With parents,6,No,0,No
|
20 |
-
18,Yes,No,Yes,No,Yes,1,3,48000,35,Self-employed,Secondary,Divorced,Chef,Rented apartment,24,Yes,5,Yes
|
21 |
-
19,No,Yes,No,Yes,No,3,5,55000,50,Salaried,Higher education,Widow,Engineer,House / apartment,120,Yes,20,Yes
|
22 |
-
20,Yes,No,Yes,Yes,Yes,0,2,70000,30,Salaried,Higher education,Civil marriage,Lawyer,Rented apartment,36,Yes,8,Yes
|
23 |
-
21,No,Yes,Yes,No,Yes,2,4,32000,40,Self-employed,Secondary,Married,Carpenter,House / apartment,48,Yes,12,Yes
|
24 |
-
22,Yes,No,No,Yes,No,1,2,26000,29,Student,Higher education,Single,Student,With parents,12,No,0,No
|
25 |
-
23,No,Yes,Yes,Yes,Yes,0,1,45000,55,Pensioner,Lower secondary,Divorced,Retired,House / apartment,180,No,0,No
|
26 |
-
24,Yes,Yes,No,Yes,Yes,1,3,80000,46,Salaried,Higher education,Married,Architect,House / apartment,72,Yes,18,Yes
|
27 |
-
25,No,No,Yes,No,No,2,5,37000,33,Self-employed,Secondary,Single,Plumber,With parents,24,Yes,6,Yes
|
28 |
-
26,Yes,Yes,Yes,Yes,No,0,1,65000,28,Self-employed,Higher education,Single,Graphic Designer,Rented apartment,36,Yes,4,Yes
|
29 |
-
27,No,No,No,No,Yes,1,2,29000,53,Pensioner,Incomplete higher,Widow,Retired,House / apartment,144,No,0,No
|
30 |
-
28,Yes,Yes,Yes,No,Yes,3,6,58000,39,Salaried,Secondary,Divorced,Teacher,House / apartment,60,Yes,10,Yes
|
31 |
-
29,No,Yes,No,Yes,No,0,1,40000,31,Salaried,Higher education,Single,Nurse,Rented apartment,48,Yes,7,Yes
|
32 |
-
30,Yes,No,Yes,Yes,Yes,2,4,51000,42,Self-employed,Higher education,Civil marriage,Business Owner,House / apartment,60,Yes,14,Yes
|
33 |
-
31,No,Yes,No,No,No,1,3,34000,47,Salaried,Secondary,Married,Factory Worker,With parents,72,Yes,9,No
|
34 |
-
32,No,No,Yes,Yes,Yes,0,1,85000,25,Self-employed,Higher education,Single,Entrepreneur,Rented apartment,12,Yes,2,Yes
|
35 |
-
33,Yes,Yes,No,No,No,3,5,30000,55,Salaried,Secondary,Married,Teacher,House / apartment,240,Yes,30,Yes
|
36 |
-
34,No,No,Yes,Yes,Yes,1,2,45000,40,Salaried,Higher education,Divorced,Nurse,With parents,36,Yes,10,Yes
|
37 |
-
35,Yes,No,No,Yes,No,0,1,20000,50,Pensioner,Lower secondary,Widow,Retired,Rented apartment,60,No,0,No
|
38 |
-
36,No,Yes,Yes,No,Yes,2,4,35000,30,Salaried,Higher education,Civil marriage,Clerk,House / apartment,24,Yes,5,Yes
|
39 |
-
37,Yes,No,Yes,Yes,No,1,3,60000,33,Self-employed,Secondary,Single,Freelancer,Rented apartment,48,Yes,7,No
|
40 |
-
38,No,No,No,No,Yes,0,1,75000,28,Salaried,Higher education,Single,Manager,With parents,12,Yes,3,Yes
|
41 |
-
39,Yes,Yes,Yes,Yes,No,3,6,40000,45,Salaried,Incomplete higher,Married,Salesperson,House / apartment,96,Yes,15,Yes
|
42 |
-
40,Yes,No,Yes,Yes,Yes,0,1,30000,65,Pensioner,Higher education,Widow,Retired,House / apartment,420,No,40,Yes
|
43 |
-
41,No,Yes,No,No,No,1,2,22000,26,Salaried,Secondary,Single,Junior Clerk,With parents,24,Yes,2,No
|
44 |
-
42,Yes,No,Yes,Yes,No,2,4,18000,35,Salaried,Lower secondary,Married,Factory Worker,House / apartment,60,Yes,10,Yes
|
45 |
-
43,No,No,No,Yes,Yes,0,1,50000,55,Self-employed,Higher education,Divorced,Consultant,Rented apartment,180,Yes,30,Yes
|
46 |
-
44,Yes,Yes,Yes,No,No,3,5,25000,40,Salaried,Incomplete higher,Married,Cook,House / apartment,120,Yes,15,No
|
47 |
-
45,No,Yes,No,Yes,Yes,1,2,35000,28,Self-employed,Secondary,Single,Graphic Designer,Rented apartment,36,Yes,5,Yes
|
48 |
-
46,Yes,No,Yes,No,Yes,0,1,45000,30,Salaried,Higher education,Civil marriage,Engineer,With parents,48,Yes,8,Yes
|
49 |
-
47,No,Yes,Yes,Yes,No,2,3,27000,48,Salaried,Secondary,Divorced,Salesperson,House / apartment,240,Yes,20,No
|
50 |
-
48,Yes,No,Yes,Yes,No,2,4,40000,45,Self-employed,Secondary,Married,Artist,House / apartment,120,Yes,15,Yes
|
51 |
-
49,No,Yes,No,No,Yes,0,1,55000,50,Salaried,Higher education,Widow,Scientist,Rented apartment,300,No,25,Yes
|
52 |
-
50,Yes,No,Yes,Yes,Yes,1,2,26000,27,Salaried,Incomplete higher,Single,Junior Accountant,With parents,36,Yes,3,No
|
53 |
-
51,No,No,No,No,No,3,6,22000,38,Salaried,Lower secondary,Divorced,Bus Driver,House / apartment,72,Yes,10,Yes
|
54 |
-
52,Yes,Yes,Yes,No,Yes,0,1,32000,55,Pensioner,Higher education,Married,Retired,House / apartment,420,No,30,No
|
55 |
-
53,No,Yes,No,Yes,No,2,3,29000,33,Self-employed,Secondary,Civil marriage,Shop Owner,Rented apartment,60,Yes,7,Yes
|
56 |
-
54,Yes,No,Yes,Yes,Yes,1,2,68000,29,Salaried,Higher education,Single,Software Developer,With parents,48,Yes,6,Yes
|
57 |
-
55,No,No,No,No,Yes,0,1,36000,42,Self-employed,Incomplete higher,Divorced,Freelancer,House / apartment,96,Yes,12,No
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
deployment_files/explain_model/versions.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"concrete-python": "2.5.0rc1", "concrete-ml": "1.3.0", "python": "3.10.11"}
|
|
|
|
deployment_files/{approval_model β model}/client.zip
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09a8c2ae9e2e2e18a7e48123222312e3de32ef3b751bf88e8e090739f92c12f3
|
3 |
+
size 28549
|
deployment_files/{approval_model/server.zip β model/pre_processor_bank.pkl}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c76179d6cb576a61f6d57e29f45545d8c4dd2d86a3818730f5c6bdd35faebe4a
|
3 |
+
size 1098
|
deployment_files/{explain_model/client.zip β model/pre_processor_third_party.pkl}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3a8d320eea9a0b2d28b9ee46b5b02df1dc85176746385d64e4c742edc99339f4
|
3 |
+
size 647
|
deployment_files/model/pre_processor_user.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a4300414ee222355b2ac9e0f5e4dc21b50f0a4de8e17e0ed4121dc766231d010
|
3 |
+
size 3340
|
deployment_files/{explain_model β model}/server.zip
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:87df3659c986799920676e358c316fc99f8c65951b71ccb9288c94af90c8a57b
|
3 |
+
size 1775
|
deployment_files/{approval_model β model}/versions.json
RENAMED
File without changes
|
deployment_files/pre_processor_third_party.pkl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a4ac1351641df7d247a18c15405b22926584c88aa9640857cb1943a2bb7ee32e
|
3 |
+
size 1098
|
development.py
CHANGED
@@ -6,46 +6,34 @@ import pandas
|
|
6 |
import pickle
|
7 |
|
8 |
from settings import (
|
9 |
-
|
10 |
-
EXPLAIN_DEPLOYMENT_PATH,
|
11 |
DATA_PATH,
|
12 |
-
|
13 |
-
EXPLAIN_INPUT_SLICES,
|
14 |
PRE_PROCESSOR_USER_PATH,
|
15 |
PRE_PROCESSOR_BANK_PATH,
|
16 |
PRE_PROCESSOR_THIRD_PARTY_PATH,
|
17 |
USER_COLUMNS,
|
18 |
BANK_COLUMNS,
|
19 |
-
|
20 |
-
EXPLAIN_THIRD_PARTY_COLUMNS,
|
21 |
)
|
22 |
from utils.client_server_interface import MultiInputsFHEModelDev
|
23 |
from utils.model import MultiInputDecisionTreeClassifier, MultiInputDecisionTreeRegressor
|
24 |
from utils.pre_processing import get_pre_processors
|
25 |
|
26 |
|
27 |
-
def get_multi_inputs(data
|
28 |
"""Get inputs for all three parties from the input data, using fixed slices.
|
29 |
|
30 |
Args:
|
31 |
data (numpy.ndarray): The input data to consider.
|
32 |
-
is_approval (bool): If the data should be used for the 'approval' model (else, otherwise for
|
33 |
-
the 'explain' model).
|
34 |
|
35 |
Returns:
|
36 |
(Tuple[numpy.ndarray]): The inputs for all three parties.
|
37 |
"""
|
38 |
-
if is_approval:
|
39 |
-
return (
|
40 |
-
data[:, APPROVAL_INPUT_SLICES["user"]],
|
41 |
-
data[:, APPROVAL_INPUT_SLICES["bank"]],
|
42 |
-
data[:, APPROVAL_INPUT_SLICES["third_party"]]
|
43 |
-
)
|
44 |
-
|
45 |
return (
|
46 |
-
data[:,
|
47 |
-
data[:,
|
48 |
-
data[:,
|
49 |
)
|
50 |
|
51 |
|
@@ -61,7 +49,7 @@ data_y = data_x.pop("Target").copy().to_frame()
|
|
61 |
# Get data from all parties
|
62 |
data_user = data_x[USER_COLUMNS].copy()
|
63 |
data_bank = data_x[BANK_COLUMNS].copy()
|
64 |
-
data_third_party = data_x[
|
65 |
|
66 |
# Feature engineer the data
|
67 |
pre_processor_user, pre_processor_bank, pre_processor_third_party = get_pre_processors()
|
@@ -75,23 +63,23 @@ preprocessed_data_x = numpy.concatenate((preprocessed_data_user, preprocessed_da
|
|
75 |
|
76 |
print("\nTrain and compile the model")
|
77 |
|
78 |
-
|
79 |
|
80 |
-
|
81 |
|
82 |
-
multi_inputs_train = get_multi_inputs(preprocessed_data_x
|
83 |
|
84 |
-
|
85 |
|
86 |
print("\nSave deployment files")
|
87 |
|
88 |
# Delete the deployment folder and its content if it already exists
|
89 |
-
if
|
90 |
-
shutil.rmtree(
|
91 |
|
92 |
# Save files needed for deployment (and enable cross-platform deployment)
|
93 |
-
|
94 |
-
|
95 |
|
96 |
# Save pre-processors
|
97 |
with (
|
@@ -103,44 +91,4 @@ with (
|
|
103 |
pickle.dump(pre_processor_bank, file_bank)
|
104 |
pickle.dump(pre_processor_third_party, file_third_party)
|
105 |
|
106 |
-
|
107 |
-
print("\nLoad, train, compile and save files for the 'explain' model")
|
108 |
-
|
109 |
-
# Define input and target data
|
110 |
-
data_x = data.copy()
|
111 |
-
data_y = data_x.pop("Years_employed").copy().to_frame()
|
112 |
-
target_values = data_x.pop("Target").copy()
|
113 |
-
|
114 |
-
# Get all data points whose target value is True (credit card has been approved)
|
115 |
-
approved_mask = target_values == 1
|
116 |
-
data_x_approved = data_x[approved_mask]
|
117 |
-
data_y_approved = data_y[approved_mask]
|
118 |
-
|
119 |
-
# Get data from all parties
|
120 |
-
data_user = data_x_approved[USER_COLUMNS].copy()
|
121 |
-
data_bank = data_x_approved[BANK_COLUMNS].copy()
|
122 |
-
data_third_party = data_x_approved[EXPLAIN_THIRD_PARTY_COLUMNS].copy()
|
123 |
-
|
124 |
-
preprocessed_data_user = pre_processor_user.transform(data_user)
|
125 |
-
preprocessed_data_bank = pre_processor_bank.transform(data_bank)
|
126 |
-
preprocessed_data_third_party = data_third_party.to_numpy()
|
127 |
-
|
128 |
-
preprocessed_data_x = numpy.concatenate((preprocessed_data_user, preprocessed_data_bank, preprocessed_data_third_party), axis=1)
|
129 |
-
|
130 |
-
model_explain = MultiInputDecisionTreeRegressor()
|
131 |
-
|
132 |
-
model_explain, sklearn_model_explain = model_explain.fit_benchmark(preprocessed_data_x, data_y_approved)
|
133 |
-
|
134 |
-
multi_inputs_train = get_multi_inputs(preprocessed_data_x, is_approval=False)
|
135 |
-
|
136 |
-
model_explain.compile(*multi_inputs_train, inputs_encryption_status=["encrypted", "encrypted", "encrypted"])
|
137 |
-
|
138 |
-
# Delete the deployment folder and its content if it already exists
|
139 |
-
if EXPLAIN_DEPLOYMENT_PATH.is_dir():
|
140 |
-
shutil.rmtree(EXPLAIN_DEPLOYMENT_PATH)
|
141 |
-
|
142 |
-
# Save files needed for deployment (and enable cross-platform deployment)
|
143 |
-
fhe_model_dev_explain = MultiInputsFHEModelDev(EXPLAIN_DEPLOYMENT_PATH, model_explain)
|
144 |
-
fhe_model_dev_explain.save(via_mlir=True)
|
145 |
-
|
146 |
print("\nDone !")
|
|
|
6 |
import pickle
|
7 |
|
8 |
from settings import (
|
9 |
+
DEPLOYMENT_PATH,
|
|
|
10 |
DATA_PATH,
|
11 |
+
INPUT_SLICES,
|
|
|
12 |
PRE_PROCESSOR_USER_PATH,
|
13 |
PRE_PROCESSOR_BANK_PATH,
|
14 |
PRE_PROCESSOR_THIRD_PARTY_PATH,
|
15 |
USER_COLUMNS,
|
16 |
BANK_COLUMNS,
|
17 |
+
THIRD_PARTY_COLUMNS,
|
|
|
18 |
)
|
19 |
from utils.client_server_interface import MultiInputsFHEModelDev
|
20 |
from utils.model import MultiInputDecisionTreeClassifier, MultiInputDecisionTreeRegressor
|
21 |
from utils.pre_processing import get_pre_processors
|
22 |
|
23 |
|
24 |
+
def get_multi_inputs(data):
|
25 |
"""Get inputs for all three parties from the input data, using fixed slices.
|
26 |
|
27 |
Args:
|
28 |
data (numpy.ndarray): The input data to consider.
|
|
|
|
|
29 |
|
30 |
Returns:
|
31 |
(Tuple[numpy.ndarray]): The inputs for all three parties.
|
32 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
return (
|
34 |
+
data[:, INPUT_SLICES["user"]],
|
35 |
+
data[:, INPUT_SLICES["bank"]],
|
36 |
+
data[:, INPUT_SLICES["third_party"]]
|
37 |
)
|
38 |
|
39 |
|
|
|
49 |
# Get data from all parties
|
50 |
data_user = data_x[USER_COLUMNS].copy()
|
51 |
data_bank = data_x[BANK_COLUMNS].copy()
|
52 |
+
data_third_party = data_x[THIRD_PARTY_COLUMNS].copy()
|
53 |
|
54 |
# Feature engineer the data
|
55 |
pre_processor_user, pre_processor_bank, pre_processor_third_party = get_pre_processors()
|
|
|
63 |
|
64 |
print("\nTrain and compile the model")
|
65 |
|
66 |
+
model = MultiInputDecisionTreeClassifier()
|
67 |
|
68 |
+
model, sklearn_model = model.fit_benchmark(preprocessed_data_x, data_y)
|
69 |
|
70 |
+
multi_inputs_train = get_multi_inputs(preprocessed_data_x)
|
71 |
|
72 |
+
model.compile(*multi_inputs_train, inputs_encryption_status=["encrypted", "encrypted", "encrypted"])
|
73 |
|
74 |
print("\nSave deployment files")
|
75 |
|
76 |
# Delete the deployment folder and its content if it already exists
|
77 |
+
if DEPLOYMENT_PATH.is_dir():
|
78 |
+
shutil.rmtree(DEPLOYMENT_PATH)
|
79 |
|
80 |
# Save files needed for deployment (and enable cross-platform deployment)
|
81 |
+
fhe_model_dev = MultiInputsFHEModelDev(DEPLOYMENT_PATH, model)
|
82 |
+
fhe_model_dev.save(via_mlir=True)
|
83 |
|
84 |
# Save pre-processors
|
85 |
with (
|
|
|
91 |
pickle.dump(pre_processor_bank, file_bank)
|
92 |
pickle.dump(pre_processor_third_party, file_third_party)
|
93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
print("\nDone !")
|
server.py
CHANGED
@@ -5,11 +5,11 @@ from typing import List, Optional
|
|
5 |
from fastapi import FastAPI, File, Form, UploadFile
|
6 |
from fastapi.responses import JSONResponse, Response
|
7 |
|
8 |
-
from settings import
|
9 |
from utils.client_server_interface import MultiInputsFHEModelServer
|
10 |
|
11 |
# Load the server
|
12 |
-
FHE_SERVER = MultiInputsFHEModelServer(
|
13 |
|
14 |
|
15 |
def _get_server_file_path(name, client_id, client_type=None):
|
|
|
5 |
from fastapi import FastAPI, File, Form, UploadFile
|
6 |
from fastapi.responses import JSONResponse, Response
|
7 |
|
8 |
+
from settings import DEPLOYMENT_PATH, SERVER_FILES, CLIENT_TYPES
|
9 |
from utils.client_server_interface import MultiInputsFHEModelServer
|
10 |
|
11 |
# Load the server
|
12 |
+
FHE_SERVER = MultiInputsFHEModelServer(DEPLOYMENT_PATH)
|
13 |
|
14 |
|
15 |
def _get_server_file_path(name, client_id, client_type=None):
|
settings.py
CHANGED
@@ -13,8 +13,7 @@ CLIENT_FILES = REPO_DIR / "client_files"
|
|
13 |
SERVER_FILES = REPO_DIR / "server_files"
|
14 |
|
15 |
# ALl deployment directories
|
16 |
-
|
17 |
-
EXPLAIN_DEPLOYMENT_PATH = DEPLOYMENT_PATH / "explain_model"
|
18 |
|
19 |
# Path targeting pre-processor saved files
|
20 |
PRE_PROCESSOR_USER_PATH = DEPLOYMENT_PATH / 'pre_processor_user.pkl'
|
@@ -33,8 +32,7 @@ SERVER_URL = "http://localhost:8000/"
|
|
33 |
DATA_PATH = "data/data.csv"
|
34 |
|
35 |
# Development settings
|
36 |
-
|
37 |
-
EXPLAIN_PROCESSED_INPUT_SHAPE = (1, 38)
|
38 |
|
39 |
CLIENT_TYPES = ["user", "bank", "third_party"]
|
40 |
INPUT_INDEXES = {
|
@@ -42,16 +40,11 @@ INPUT_INDEXES = {
|
|
42 |
"bank": 1,
|
43 |
"third_party": 2,
|
44 |
}
|
45 |
-
|
46 |
"user": slice(0, 36), # First position: start from 0
|
47 |
"bank": slice(36, 37), # Second position: start from n_feature_user
|
48 |
"third_party": slice(37, 39), # Third position: start from n_feature_user + n_feature_bank
|
49 |
}
|
50 |
-
EXPLAIN_INPUT_SLICES = {
|
51 |
-
"user": slice(0, 36), # First position: start from 0
|
52 |
-
"bank": slice(36, 37), # Second position: start from n_feature_user
|
53 |
-
"third_party": slice(37, 38), # Third position: start from n_feature_user + n_feature_bank
|
54 |
-
}
|
55 |
|
56 |
# Fix column order for pre-processing steps
|
57 |
USER_COLUMNS = [
|
@@ -60,8 +53,7 @@ USER_COLUMNS = [
|
|
60 |
'Occupation_type',
|
61 |
]
|
62 |
BANK_COLUMNS = ["Account_age"]
|
63 |
-
|
64 |
-
EXPLAIN_THIRD_PARTY_COLUMNS = ["Employed"]
|
65 |
|
66 |
_data = pandas.read_csv(DATA_PATH, encoding="utf-8")
|
67 |
|
@@ -77,13 +69,23 @@ ACCOUNT_MIN_MAX = get_min_max(_data, "Account_age")
|
|
77 |
CHILDREN_MIN_MAX = get_min_max(_data, "Num_children")
|
78 |
INCOME_MIN_MAX = get_min_max(_data, "Total_income")
|
79 |
AGE_MIN_MAX = get_min_max(_data, "Age")
|
80 |
-
EMPLOYED_MIN_MAX = get_min_max(_data, "Years_employed")
|
81 |
FAMILY_MIN_MAX = get_min_max(_data, "Household_size")
|
82 |
|
|
|
|
|
|
|
|
|
83 |
# App data choices
|
84 |
INCOME_TYPES = list(_data["Income_type"].unique())
|
85 |
OCCUPATION_TYPES = list(_data["Occupation_type"].unique())
|
86 |
HOUSING_TYPES = list(_data["Housing_type"].unique())
|
87 |
EDUCATION_TYPES = list(_data["Education_type"].unique())
|
88 |
FAMILY_STATUS = list(_data["Family_status"].unique())
|
|
|
|
|
|
|
|
|
89 |
|
|
|
|
|
|
|
|
13 |
SERVER_FILES = REPO_DIR / "server_files"
|
14 |
|
15 |
# ALl deployment directories
|
16 |
+
DEPLOYMENT_PATH = DEPLOYMENT_PATH / "model"
|
|
|
17 |
|
18 |
# Path targeting pre-processor saved files
|
19 |
PRE_PROCESSOR_USER_PATH = DEPLOYMENT_PATH / 'pre_processor_user.pkl'
|
|
|
32 |
DATA_PATH = "data/data.csv"
|
33 |
|
34 |
# Development settings
|
35 |
+
PROCESSED_INPUT_SHAPE = (1, 39)
|
|
|
36 |
|
37 |
CLIENT_TYPES = ["user", "bank", "third_party"]
|
38 |
INPUT_INDEXES = {
|
|
|
40 |
"bank": 1,
|
41 |
"third_party": 2,
|
42 |
}
|
43 |
+
INPUT_SLICES = {
|
44 |
"user": slice(0, 36), # First position: start from 0
|
45 |
"bank": slice(36, 37), # Second position: start from n_feature_user
|
46 |
"third_party": slice(37, 39), # Third position: start from n_feature_user + n_feature_bank
|
47 |
}
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
# Fix column order for pre-processing steps
|
50 |
USER_COLUMNS = [
|
|
|
53 |
'Occupation_type',
|
54 |
]
|
55 |
BANK_COLUMNS = ["Account_age"]
|
56 |
+
THIRD_PARTY_COLUMNS = ["Years_employed", "Employed"]
|
|
|
57 |
|
58 |
_data = pandas.read_csv(DATA_PATH, encoding="utf-8")
|
59 |
|
|
|
69 |
CHILDREN_MIN_MAX = get_min_max(_data, "Num_children")
|
70 |
INCOME_MIN_MAX = get_min_max(_data, "Total_income")
|
71 |
AGE_MIN_MAX = get_min_max(_data, "Age")
|
|
|
72 |
FAMILY_MIN_MAX = get_min_max(_data, "Household_size")
|
73 |
|
74 |
+
# Default values
|
75 |
+
INCOME_VALUE = 12000
|
76 |
+
AGE_VALUE = 30
|
77 |
+
|
78 |
# App data choices
|
79 |
INCOME_TYPES = list(_data["Income_type"].unique())
|
80 |
OCCUPATION_TYPES = list(_data["Occupation_type"].unique())
|
81 |
HOUSING_TYPES = list(_data["Housing_type"].unique())
|
82 |
EDUCATION_TYPES = list(_data["Education_type"].unique())
|
83 |
FAMILY_STATUS = list(_data["Family_status"].unique())
|
84 |
+
YEARS_EMPLOYED_BINS = ['0-2', '2-5', '5-8', '8-11', '11-18', '18+']
|
85 |
+
|
86 |
+
# Years_employed bin order
|
87 |
+
YEARS_EMPLOYED_BIN_NAME_TO_INDEX = {bin_name: i for i, bin_name in enumerate(YEARS_EMPLOYED_BINS)}
|
88 |
|
89 |
+
assert len(YEARS_EMPLOYED_BINS) == len(list(_data["Years_employed"].unique())), (
|
90 |
+
"Years_employed bins are not matching the expected list"
|
91 |
+
)
|
utils/pre_processing.py
CHANGED
@@ -56,9 +56,7 @@ def get_pre_processors():
|
|
56 |
)
|
57 |
|
58 |
pre_processor_third_party = ColumnTransformer(
|
59 |
-
transformers=[
|
60 |
-
('standard_scaler', StandardScaler(), ['Years_employed']),
|
61 |
-
],
|
62 |
remainder='passthrough',
|
63 |
verbose_feature_names_out=False,
|
64 |
)
|
|
|
56 |
)
|
57 |
|
58 |
pre_processor_third_party = ColumnTransformer(
|
59 |
+
transformers=[],
|
|
|
|
|
60 |
remainder='passthrough',
|
61 |
verbose_feature_names_out=False,
|
62 |
)
|