brestok commited on
Commit
40749a1
·
1 Parent(s): 7373a4a

Add encryption and decryption for sensitive message data

Browse files

Integrated mechanisms to encrypt sensitive words in messages and decrypt them when retrieving. This ensures better data protection while handling sensitive user information in message flows.

test.py CHANGED
@@ -1,13 +1,53 @@
1
- import asyncio
2
-
3
- import numpy as np
4
-
5
- from trauma.api.message.ai.openai_request import convert_value_to_embeddings
6
- from trauma.core.config import settings
7
-
8
-
9
- async def main():
10
- entities = await settings.DB_CLIENT
11
-
12
- if __name__ == '__main__':
13
- asyncio.run(main())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import re
2
+ #
3
+ # def xor_cipher(text: str, key: str) -> str:
4
+ # key_bytes = key.encode('utf-8')
5
+ # text_bytes = text.encode('utf-8')
6
+ # key_len = len(key_bytes)
7
+ # encrypted_bytes = bytes([
8
+ # text_bytes[i] ^ key_bytes[i % key_len]
9
+ # for i in range(len(text_bytes))
10
+ # ])
11
+ # return encrypted_bytes.hex()
12
+ #
13
+ # def encrypt_sensitive_data(text: str, words_to_encrypt: list[str], secret_key: str) -> str:
14
+ # result = text
15
+ # for word in words_to_encrypt:
16
+ # if word in result:
17
+ # encrypted = xor_cipher(word, secret_key)
18
+ # result = result.replace(word, f"[{encrypted}]")
19
+ # return result
20
+ #
21
+ # def decrypt_sensitive_data(text: str, secret_key: str) -> str:
22
+ #
23
+ # def decrypt_match(match):
24
+ # encrypted_hex = match.group(1)
25
+ # # Convert hex back to bytes
26
+ # encrypted_bytes = bytes.fromhex(encrypted_hex)
27
+ # # XOR with key to decrypt
28
+ # key_bytes = secret_key.encode('utf-8')
29
+ # decrypted_bytes = bytes([
30
+ # encrypted_bytes[i] ^ key_bytes[i % len(key_bytes)]
31
+ # for i in range(len(encrypted_bytes))
32
+ # ])
33
+ # return decrypted_bytes.decode('utf-8')
34
+ #
35
+ # # Find all [encrypted] patterns and decrypt them
36
+ # pattern = r'\[([\da-fA-F]+)\]'
37
+ # return re.sub(pattern, decrypt_match, text)
38
+ #
39
+ # # Example usage:
40
+ # if __name__ == "__main__":
41
+ # SECRET_KEY = "dda7db64674d3cbc571ccedfdb4321818ba642b8dd3ddbdd80d1ce2b2a4a3546"
42
+ #
43
+ # # Test encryption
44
+ # original_text = "Привет! Меня зовут John, я живу в Moscow, мой email: [email protected]"
45
+ # sensitive_words = []
46
+ #
47
+ # encrypted_text = encrypt_sensitive_data(original_text, sensitive_words, SECRET_KEY)
48
+ # print("Encrypted:", encrypted_text)
49
+ #
50
+ # # Test decryption
51
+ # decrypted_text = decrypt_sensitive_data(encrypted_text, SECRET_KEY)
52
+ # print("Decrypted:", decrypted_text)
53
+ #
trauma/api/message/ai/engine.py CHANGED
@@ -5,7 +5,7 @@ import numpy as np
5
  from trauma.api.chat.dto import EntityData
6
  from trauma.api.chat.model import ChatModel
7
  from trauma.api.data.model import EntityModel, EntityModelExtended
8
- from trauma.api.message.ai.openai_request import (update_entity_data_with_ai,
9
  generate_next_question,
10
  generate_search_request,
11
  generate_final_response,
@@ -28,25 +28,26 @@ from trauma.api.message.utils import (decode_treatment_letters,
28
  prepare_final_entities_str,
29
  pick_empty_field_instructions,
30
  find_matching_age_group,
31
- search_changed_field_inst)
 
32
  from trauma.core.config import settings
33
 
34
 
35
  async def search_entities(
36
  user_message: str, messages: list[MessageModel], chat: ChatModel
37
  ) -> CreateMessageResponse:
38
- decoded_message = decode_treatment_letters(user_message)
39
- message_history_str = prepare_message_history_str(messages, decoded_message)
40
 
41
  entity_data, is_valid = await asyncio.gather(
42
- update_entity_data_with_ai(chat.entityData, decoded_message, messages[-1].text),
43
- check_is_valid_request(decoded_message, message_history_str)
44
  )
45
  final_entities, fields_changed_inst = None, search_changed_field_inst(entity_data, chat.entityData)
46
 
47
  if not is_valid:
48
  empty_field = retrieve_empty_field_from_entity_data(chat.entityData.model_dump(mode='json'))
49
- response = await generate_invalid_response(decoded_message, message_history_str, empty_field)
50
  final_entities = messages[-1].entities if messages else None
51
 
52
  else:
@@ -57,7 +58,7 @@ async def search_entities(
57
  if empty_field == 'age':
58
  response = await generate_next_question(empty_field_instructions, message_history_str)
59
  else:
60
- user_messages_str = prepare_user_messages_str(decoded_message, messages)
61
  possible_entity_indexes, search_request = await asyncio.gather(
62
  filter_entities_by_age_location(entity_data),
63
  generate_search_request(user_messages_str, entity_data)
@@ -68,16 +69,17 @@ async def search_entities(
68
  final_entities_str = prepare_final_entities_str(final_entities)
69
  if final_entities:
70
  response = await generate_final_response(
71
- final_entities_str, decoded_message, message_history_str, empty_field_instructions
72
  )
73
  else:
74
  response = await generate_empty_final_response(
75
- decoded_message, message_history_str, fields_changed_inst
76
  )
77
 
78
- user_message = MessageModel(chatId=chat.id, author=Author.User, text=decoded_message)
79
  assistant_message = MessageModel(chatId=chat.id, author=Author.Assistant, text=response, entities=final_entities)
80
- asyncio.create_task(save_assistant_user_message(user_message, assistant_message))
 
81
  return assistant_message
82
 
83
 
@@ -134,3 +136,13 @@ async def set_entities_score(entities: list[EntityModelExtended], search_request
134
  if score > 0.72:
135
  final_entities.append(entity)
136
  return sorted(final_entities, key=lambda x: x.score, reverse=True)
 
 
 
 
 
 
 
 
 
 
 
5
  from trauma.api.chat.dto import EntityData
6
  from trauma.api.chat.model import ChatModel
7
  from trauma.api.data.model import EntityModel, EntityModelExtended
8
+ from trauma.api.message.ai.openai_request import (get_sensitive_words, update_entity_data_with_ai,
9
  generate_next_question,
10
  generate_search_request,
11
  generate_final_response,
 
28
  prepare_final_entities_str,
29
  pick_empty_field_instructions,
30
  find_matching_age_group,
31
+ search_changed_field_inst,
32
+ encrypt_message)
33
  from trauma.core.config import settings
34
 
35
 
36
  async def search_entities(
37
  user_message: str, messages: list[MessageModel], chat: ChatModel
38
  ) -> CreateMessageResponse:
39
+ user_message = decode_treatment_letters(user_message)
40
+ message_history_str = prepare_message_history_str(messages, user_message)
41
 
42
  entity_data, is_valid = await asyncio.gather(
43
+ update_entity_data_with_ai(chat.entityData, user_message, messages[-1].text),
44
+ check_is_valid_request(user_message, message_history_str)
45
  )
46
  final_entities, fields_changed_inst = None, search_changed_field_inst(entity_data, chat.entityData)
47
 
48
  if not is_valid:
49
  empty_field = retrieve_empty_field_from_entity_data(chat.entityData.model_dump(mode='json'))
50
+ response = await generate_invalid_response(user_message, message_history_str, empty_field)
51
  final_entities = messages[-1].entities if messages else None
52
 
53
  else:
 
58
  if empty_field == 'age':
59
  response = await generate_next_question(empty_field_instructions, message_history_str)
60
  else:
61
+ user_messages_str = prepare_user_messages_str(user_message, messages)
62
  possible_entity_indexes, search_request = await asyncio.gather(
63
  filter_entities_by_age_location(entity_data),
64
  generate_search_request(user_messages_str, entity_data)
 
69
  final_entities_str = prepare_final_entities_str(final_entities)
70
  if final_entities:
71
  response = await generate_final_response(
72
+ final_entities_str, user_message, message_history_str, empty_field_instructions
73
  )
74
  else:
75
  response = await generate_empty_final_response(
76
+ user_message, message_history_str, fields_changed_inst
77
  )
78
 
79
+ user_message = MessageModel(chatId=chat.id, author=Author.User, text=user_message)
80
  assistant_message = MessageModel(chatId=chat.id, author=Author.Assistant, text=response, entities=final_entities)
81
+ user_message_enc, assistant_message_enc = await encrypt_messages([user_message, assistant_message])
82
+ asyncio.create_task(save_assistant_user_message(user_message_enc, assistant_message_enc))
83
  return assistant_message
84
 
85
 
 
136
  if score > 0.72:
137
  final_entities.append(entity)
138
  return sorted(final_entities, key=lambda x: x.score, reverse=True)
139
+
140
+
141
+ async def encrypt_messages(messages: list[MessageModel]) -> list[MessageModel]:
142
+ encrypted_messages = []
143
+ sensitive_words = await asyncio.gather(*[get_sensitive_words(message.text) for message in messages])
144
+ for message, sensitive_word in zip(messages, sensitive_words):
145
+ encrypted_message = MessageModel(**message.model_dump())
146
+ encrypted_message.text = encrypt_message(message.text, sensitive_word)
147
+ encrypted_messages.append(encrypted_message)
148
+ return encrypted_messages
trauma/api/message/ai/openai_request.py CHANGED
@@ -209,5 +209,13 @@ async def generate_searched_entity_response(user_query: str, facility: EntityMod
209
  return messages
210
 
211
 
212
- if __name__ == '__main__':
213
- asyncio.run(retrieve_semantic_answer('I want to know more about Praktijk Hermens'))
 
 
 
 
 
 
 
 
 
209
  return messages
210
 
211
 
212
+ @openai_wrapper(is_json=True, return_='words')
213
+ async def get_sensitive_words(text: str):
214
+ messages = [
215
+ {
216
+ "role": "system",
217
+ "content": TraumaPrompts.get_sensitive_words
218
+ .replace("{text}", text)
219
+ }
220
+ ]
221
+ return messages
trauma/api/message/ai/prompts.py CHANGED
@@ -382,3 +382,33 @@ Your response must be in the following JSON format:
382
  }
383
  ```
384
  - **score**: A floating-point number between **0.00 and 1.00**, representing the degree of relevance."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
382
  }
383
  ```
384
  - **score**: A floating-point number between **0.00 and 1.00**, representing the degree of relevance."""
385
+ get_sensitive_words = """## Task
386
+
387
+ You must find all sensitive words or word combinations in the text `{text}` and return them in a JSON object. Sensitive data includes:
388
+ - Personal data
389
+ - Name and surname
390
+ - Email
391
+ - Phone number
392
+ - Date of birth
393
+ - Address (street, house number, postal code)
394
+ - IP-address
395
+ - Civil registration number
396
+
397
+ ## Data
398
+
399
+ **Text**:
400
+ ```
401
+ {text}
402
+ ```
403
+
404
+ ## JSON Response Format
405
+
406
+ ```json
407
+ {
408
+ "words": ["string", "string"]
409
+ }
410
+ ```
411
+
412
+ ## Important Notes
413
+
414
+ - The words must be in the same case and language as they appear in the text."""
trauma/api/message/utils.py CHANGED
@@ -6,7 +6,7 @@ from trauma.api.data.dto import AgeGroup
6
  from trauma.api.data.model import EntityModel
7
  from trauma.api.message.dto import Author
8
  from trauma.api.message.model import MessageModel
9
-
10
 
11
  def transform_messages_to_openai(messages: list[MessageModel]) -> list[dict]:
12
  openai_messages = []
@@ -268,3 +268,40 @@ def search_changed_field_inst(entity_data: dict, old_entity_data: EntityData) ->
268
  real_key = key if key!="treatmentArea" else "traumaType"
269
  changed_fields[real_key] = instruction_map[key]
270
  return changed_fields
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  from trauma.api.data.model import EntityModel
7
  from trauma.api.message.dto import Author
8
  from trauma.api.message.model import MessageModel
9
+ from trauma.core.config import settings
10
 
11
  def transform_messages_to_openai(messages: list[MessageModel]) -> list[dict]:
12
  openai_messages = []
 
268
  real_key = key if key!="treatmentArea" else "traumaType"
269
  changed_fields[real_key] = instruction_map[key]
270
  return changed_fields
271
+
272
+
273
+ def xor_cipher(text: str, key: str) -> str:
274
+ key_bytes = key.encode('utf-8')
275
+ text_bytes = text.encode('utf-8')
276
+ key_len = len(key_bytes)
277
+ encrypted_bytes = bytes([
278
+ text_bytes[i] ^ key_bytes[i % key_len]
279
+ for i in range(len(text_bytes))
280
+ ])
281
+ return encrypted_bytes.hex()
282
+
283
+ def encrypt_message(text: str, words_to_encrypt: list[str]) -> str:
284
+ result = text
285
+ for word in words_to_encrypt:
286
+ if word in result:
287
+ encrypted = xor_cipher(word, settings.SECRET_KEY)
288
+ result = result.replace(word, f"[{encrypted}]")
289
+ return result
290
+
291
+ def decrypt_messages(messages: list[MessageModel]) -> list[MessageModel]:
292
+
293
+ def decrypt_match(match):
294
+ encrypted_hex = match.group(1)
295
+ encrypted_bytes = bytes.fromhex(encrypted_hex)
296
+ key_bytes = settings.SECRET_KEY.encode('utf-8')
297
+ decrypted_bytes = bytes([
298
+ encrypted_bytes[i] ^ key_bytes[i % len(key_bytes)]
299
+ for i in range(len(encrypted_bytes))
300
+ ])
301
+ return decrypted_bytes.decode('utf-8')
302
+
303
+ pattern = r'\[([\da-fA-F]+)\]'
304
+
305
+ for message in messages:
306
+ message.text = re.sub(pattern, decrypt_match, message.text)
307
+ return messages
trauma/api/message/views.py CHANGED
@@ -13,7 +13,7 @@ from trauma.api.message.schemas import (AllMessageWrapper,
13
  CreateMessageRequest)
14
  from trauma.core.security import PermissionDependency
15
  from trauma.core.wrappers import TraumaResponseWrapper
16
-
17
 
18
  @message_router.get('/{chatId}/all')
19
  async def get_all_chat_messages(
@@ -35,6 +35,7 @@ async def create_message(
35
  account: AccountModel = Depends(PermissionDependency([AccountType.Admin, AccountType.User]))
36
  ) -> TraumaResponseWrapper[MessageModel]:
37
  messages, chat = await get_all_chat_messages_obj(chatId, account)
 
38
  response = await search_entities(message_data.text, messages, chat)
39
  return TraumaResponseWrapper(data=response)
40
 
 
13
  CreateMessageRequest)
14
  from trauma.core.security import PermissionDependency
15
  from trauma.core.wrappers import TraumaResponseWrapper
16
+ from trauma.api.message.utils import decrypt_messages
17
 
18
  @message_router.get('/{chatId}/all')
19
  async def get_all_chat_messages(
 
35
  account: AccountModel = Depends(PermissionDependency([AccountType.Admin, AccountType.User]))
36
  ) -> TraumaResponseWrapper[MessageModel]:
37
  messages, chat = await get_all_chat_messages_obj(chatId, account)
38
+ messages = decrypt_messages(messages)
39
  response = await search_entities(message_data.text, messages, chat)
40
  return TraumaResponseWrapper(data=response)
41