Spaces:
Running
Running
Add encryption and decryption for sensitive message data
Browse filesIntegrated mechanisms to encrypt sensitive words in messages and decrypt them when retrieving. This ensures better data protection while handling sensitive user information in message flows.
- test.py +53 -13
- trauma/api/message/ai/engine.py +24 -12
- trauma/api/message/ai/openai_request.py +10 -2
- trauma/api/message/ai/prompts.py +30 -0
- trauma/api/message/utils.py +38 -1
- trauma/api/message/views.py +2 -1
test.py
CHANGED
@@ -1,13 +1,53 @@
|
|
1 |
-
import
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# import re
|
2 |
+
#
|
3 |
+
# def xor_cipher(text: str, key: str) -> str:
|
4 |
+
# key_bytes = key.encode('utf-8')
|
5 |
+
# text_bytes = text.encode('utf-8')
|
6 |
+
# key_len = len(key_bytes)
|
7 |
+
# encrypted_bytes = bytes([
|
8 |
+
# text_bytes[i] ^ key_bytes[i % key_len]
|
9 |
+
# for i in range(len(text_bytes))
|
10 |
+
# ])
|
11 |
+
# return encrypted_bytes.hex()
|
12 |
+
#
|
13 |
+
# def encrypt_sensitive_data(text: str, words_to_encrypt: list[str], secret_key: str) -> str:
|
14 |
+
# result = text
|
15 |
+
# for word in words_to_encrypt:
|
16 |
+
# if word in result:
|
17 |
+
# encrypted = xor_cipher(word, secret_key)
|
18 |
+
# result = result.replace(word, f"[{encrypted}]")
|
19 |
+
# return result
|
20 |
+
#
|
21 |
+
# def decrypt_sensitive_data(text: str, secret_key: str) -> str:
|
22 |
+
#
|
23 |
+
# def decrypt_match(match):
|
24 |
+
# encrypted_hex = match.group(1)
|
25 |
+
# # Convert hex back to bytes
|
26 |
+
# encrypted_bytes = bytes.fromhex(encrypted_hex)
|
27 |
+
# # XOR with key to decrypt
|
28 |
+
# key_bytes = secret_key.encode('utf-8')
|
29 |
+
# decrypted_bytes = bytes([
|
30 |
+
# encrypted_bytes[i] ^ key_bytes[i % len(key_bytes)]
|
31 |
+
# for i in range(len(encrypted_bytes))
|
32 |
+
# ])
|
33 |
+
# return decrypted_bytes.decode('utf-8')
|
34 |
+
#
|
35 |
+
# # Find all [encrypted] patterns and decrypt them
|
36 |
+
# pattern = r'\[([\da-fA-F]+)\]'
|
37 |
+
# return re.sub(pattern, decrypt_match, text)
|
38 |
+
#
|
39 |
+
# # Example usage:
|
40 |
+
# if __name__ == "__main__":
|
41 |
+
# SECRET_KEY = "dda7db64674d3cbc571ccedfdb4321818ba642b8dd3ddbdd80d1ce2b2a4a3546"
|
42 |
+
#
|
43 |
+
# # Test encryption
|
44 |
+
# original_text = "Привет! Меня зовут John, я живу в Moscow, мой email: [email protected]"
|
45 |
+
# sensitive_words = []
|
46 |
+
#
|
47 |
+
# encrypted_text = encrypt_sensitive_data(original_text, sensitive_words, SECRET_KEY)
|
48 |
+
# print("Encrypted:", encrypted_text)
|
49 |
+
#
|
50 |
+
# # Test decryption
|
51 |
+
# decrypted_text = decrypt_sensitive_data(encrypted_text, SECRET_KEY)
|
52 |
+
# print("Decrypted:", decrypted_text)
|
53 |
+
#
|
trauma/api/message/ai/engine.py
CHANGED
@@ -5,7 +5,7 @@ import numpy as np
|
|
5 |
from trauma.api.chat.dto import EntityData
|
6 |
from trauma.api.chat.model import ChatModel
|
7 |
from trauma.api.data.model import EntityModel, EntityModelExtended
|
8 |
-
from trauma.api.message.ai.openai_request import (update_entity_data_with_ai,
|
9 |
generate_next_question,
|
10 |
generate_search_request,
|
11 |
generate_final_response,
|
@@ -28,25 +28,26 @@ from trauma.api.message.utils import (decode_treatment_letters,
|
|
28 |
prepare_final_entities_str,
|
29 |
pick_empty_field_instructions,
|
30 |
find_matching_age_group,
|
31 |
-
search_changed_field_inst
|
|
|
32 |
from trauma.core.config import settings
|
33 |
|
34 |
|
35 |
async def search_entities(
|
36 |
user_message: str, messages: list[MessageModel], chat: ChatModel
|
37 |
) -> CreateMessageResponse:
|
38 |
-
|
39 |
-
message_history_str = prepare_message_history_str(messages,
|
40 |
|
41 |
entity_data, is_valid = await asyncio.gather(
|
42 |
-
update_entity_data_with_ai(chat.entityData,
|
43 |
-
check_is_valid_request(
|
44 |
)
|
45 |
final_entities, fields_changed_inst = None, search_changed_field_inst(entity_data, chat.entityData)
|
46 |
|
47 |
if not is_valid:
|
48 |
empty_field = retrieve_empty_field_from_entity_data(chat.entityData.model_dump(mode='json'))
|
49 |
-
response = await generate_invalid_response(
|
50 |
final_entities = messages[-1].entities if messages else None
|
51 |
|
52 |
else:
|
@@ -57,7 +58,7 @@ async def search_entities(
|
|
57 |
if empty_field == 'age':
|
58 |
response = await generate_next_question(empty_field_instructions, message_history_str)
|
59 |
else:
|
60 |
-
user_messages_str = prepare_user_messages_str(
|
61 |
possible_entity_indexes, search_request = await asyncio.gather(
|
62 |
filter_entities_by_age_location(entity_data),
|
63 |
generate_search_request(user_messages_str, entity_data)
|
@@ -68,16 +69,17 @@ async def search_entities(
|
|
68 |
final_entities_str = prepare_final_entities_str(final_entities)
|
69 |
if final_entities:
|
70 |
response = await generate_final_response(
|
71 |
-
final_entities_str,
|
72 |
)
|
73 |
else:
|
74 |
response = await generate_empty_final_response(
|
75 |
-
|
76 |
)
|
77 |
|
78 |
-
user_message = MessageModel(chatId=chat.id, author=Author.User, text=
|
79 |
assistant_message = MessageModel(chatId=chat.id, author=Author.Assistant, text=response, entities=final_entities)
|
80 |
-
|
|
|
81 |
return assistant_message
|
82 |
|
83 |
|
@@ -134,3 +136,13 @@ async def set_entities_score(entities: list[EntityModelExtended], search_request
|
|
134 |
if score > 0.72:
|
135 |
final_entities.append(entity)
|
136 |
return sorted(final_entities, key=lambda x: x.score, reverse=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
from trauma.api.chat.dto import EntityData
|
6 |
from trauma.api.chat.model import ChatModel
|
7 |
from trauma.api.data.model import EntityModel, EntityModelExtended
|
8 |
+
from trauma.api.message.ai.openai_request import (get_sensitive_words, update_entity_data_with_ai,
|
9 |
generate_next_question,
|
10 |
generate_search_request,
|
11 |
generate_final_response,
|
|
|
28 |
prepare_final_entities_str,
|
29 |
pick_empty_field_instructions,
|
30 |
find_matching_age_group,
|
31 |
+
search_changed_field_inst,
|
32 |
+
encrypt_message)
|
33 |
from trauma.core.config import settings
|
34 |
|
35 |
|
36 |
async def search_entities(
|
37 |
user_message: str, messages: list[MessageModel], chat: ChatModel
|
38 |
) -> CreateMessageResponse:
|
39 |
+
user_message = decode_treatment_letters(user_message)
|
40 |
+
message_history_str = prepare_message_history_str(messages, user_message)
|
41 |
|
42 |
entity_data, is_valid = await asyncio.gather(
|
43 |
+
update_entity_data_with_ai(chat.entityData, user_message, messages[-1].text),
|
44 |
+
check_is_valid_request(user_message, message_history_str)
|
45 |
)
|
46 |
final_entities, fields_changed_inst = None, search_changed_field_inst(entity_data, chat.entityData)
|
47 |
|
48 |
if not is_valid:
|
49 |
empty_field = retrieve_empty_field_from_entity_data(chat.entityData.model_dump(mode='json'))
|
50 |
+
response = await generate_invalid_response(user_message, message_history_str, empty_field)
|
51 |
final_entities = messages[-1].entities if messages else None
|
52 |
|
53 |
else:
|
|
|
58 |
if empty_field == 'age':
|
59 |
response = await generate_next_question(empty_field_instructions, message_history_str)
|
60 |
else:
|
61 |
+
user_messages_str = prepare_user_messages_str(user_message, messages)
|
62 |
possible_entity_indexes, search_request = await asyncio.gather(
|
63 |
filter_entities_by_age_location(entity_data),
|
64 |
generate_search_request(user_messages_str, entity_data)
|
|
|
69 |
final_entities_str = prepare_final_entities_str(final_entities)
|
70 |
if final_entities:
|
71 |
response = await generate_final_response(
|
72 |
+
final_entities_str, user_message, message_history_str, empty_field_instructions
|
73 |
)
|
74 |
else:
|
75 |
response = await generate_empty_final_response(
|
76 |
+
user_message, message_history_str, fields_changed_inst
|
77 |
)
|
78 |
|
79 |
+
user_message = MessageModel(chatId=chat.id, author=Author.User, text=user_message)
|
80 |
assistant_message = MessageModel(chatId=chat.id, author=Author.Assistant, text=response, entities=final_entities)
|
81 |
+
user_message_enc, assistant_message_enc = await encrypt_messages([user_message, assistant_message])
|
82 |
+
asyncio.create_task(save_assistant_user_message(user_message_enc, assistant_message_enc))
|
83 |
return assistant_message
|
84 |
|
85 |
|
|
|
136 |
if score > 0.72:
|
137 |
final_entities.append(entity)
|
138 |
return sorted(final_entities, key=lambda x: x.score, reverse=True)
|
139 |
+
|
140 |
+
|
141 |
+
async def encrypt_messages(messages: list[MessageModel]) -> list[MessageModel]:
|
142 |
+
encrypted_messages = []
|
143 |
+
sensitive_words = await asyncio.gather(*[get_sensitive_words(message.text) for message in messages])
|
144 |
+
for message, sensitive_word in zip(messages, sensitive_words):
|
145 |
+
encrypted_message = MessageModel(**message.model_dump())
|
146 |
+
encrypted_message.text = encrypt_message(message.text, sensitive_word)
|
147 |
+
encrypted_messages.append(encrypted_message)
|
148 |
+
return encrypted_messages
|
trauma/api/message/ai/openai_request.py
CHANGED
@@ -209,5 +209,13 @@ async def generate_searched_entity_response(user_query: str, facility: EntityMod
|
|
209 |
return messages
|
210 |
|
211 |
|
212 |
-
|
213 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
209 |
return messages
|
210 |
|
211 |
|
212 |
+
@openai_wrapper(is_json=True, return_='words')
|
213 |
+
async def get_sensitive_words(text: str):
|
214 |
+
messages = [
|
215 |
+
{
|
216 |
+
"role": "system",
|
217 |
+
"content": TraumaPrompts.get_sensitive_words
|
218 |
+
.replace("{text}", text)
|
219 |
+
}
|
220 |
+
]
|
221 |
+
return messages
|
trauma/api/message/ai/prompts.py
CHANGED
@@ -382,3 +382,33 @@ Your response must be in the following JSON format:
|
|
382 |
}
|
383 |
```
|
384 |
- **score**: A floating-point number between **0.00 and 1.00**, representing the degree of relevance."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
382 |
}
|
383 |
```
|
384 |
- **score**: A floating-point number between **0.00 and 1.00**, representing the degree of relevance."""
|
385 |
+
get_sensitive_words = """## Task
|
386 |
+
|
387 |
+
You must find all sensitive words or word combinations in the text `{text}` and return them in a JSON object. Sensitive data includes:
|
388 |
+
- Personal data
|
389 |
+
- Name and surname
|
390 |
+
- Email
|
391 |
+
- Phone number
|
392 |
+
- Date of birth
|
393 |
+
- Address (street, house number, postal code)
|
394 |
+
- IP-address
|
395 |
+
- Civil registration number
|
396 |
+
|
397 |
+
## Data
|
398 |
+
|
399 |
+
**Text**:
|
400 |
+
```
|
401 |
+
{text}
|
402 |
+
```
|
403 |
+
|
404 |
+
## JSON Response Format
|
405 |
+
|
406 |
+
```json
|
407 |
+
{
|
408 |
+
"words": ["string", "string"]
|
409 |
+
}
|
410 |
+
```
|
411 |
+
|
412 |
+
## Important Notes
|
413 |
+
|
414 |
+
- The words must be in the same case and language as they appear in the text."""
|
trauma/api/message/utils.py
CHANGED
@@ -6,7 +6,7 @@ from trauma.api.data.dto import AgeGroup
|
|
6 |
from trauma.api.data.model import EntityModel
|
7 |
from trauma.api.message.dto import Author
|
8 |
from trauma.api.message.model import MessageModel
|
9 |
-
|
10 |
|
11 |
def transform_messages_to_openai(messages: list[MessageModel]) -> list[dict]:
|
12 |
openai_messages = []
|
@@ -268,3 +268,40 @@ def search_changed_field_inst(entity_data: dict, old_entity_data: EntityData) ->
|
|
268 |
real_key = key if key!="treatmentArea" else "traumaType"
|
269 |
changed_fields[real_key] = instruction_map[key]
|
270 |
return changed_fields
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
from trauma.api.data.model import EntityModel
|
7 |
from trauma.api.message.dto import Author
|
8 |
from trauma.api.message.model import MessageModel
|
9 |
+
from trauma.core.config import settings
|
10 |
|
11 |
def transform_messages_to_openai(messages: list[MessageModel]) -> list[dict]:
|
12 |
openai_messages = []
|
|
|
268 |
real_key = key if key!="treatmentArea" else "traumaType"
|
269 |
changed_fields[real_key] = instruction_map[key]
|
270 |
return changed_fields
|
271 |
+
|
272 |
+
|
273 |
+
def xor_cipher(text: str, key: str) -> str:
|
274 |
+
key_bytes = key.encode('utf-8')
|
275 |
+
text_bytes = text.encode('utf-8')
|
276 |
+
key_len = len(key_bytes)
|
277 |
+
encrypted_bytes = bytes([
|
278 |
+
text_bytes[i] ^ key_bytes[i % key_len]
|
279 |
+
for i in range(len(text_bytes))
|
280 |
+
])
|
281 |
+
return encrypted_bytes.hex()
|
282 |
+
|
283 |
+
def encrypt_message(text: str, words_to_encrypt: list[str]) -> str:
|
284 |
+
result = text
|
285 |
+
for word in words_to_encrypt:
|
286 |
+
if word in result:
|
287 |
+
encrypted = xor_cipher(word, settings.SECRET_KEY)
|
288 |
+
result = result.replace(word, f"[{encrypted}]")
|
289 |
+
return result
|
290 |
+
|
291 |
+
def decrypt_messages(messages: list[MessageModel]) -> list[MessageModel]:
|
292 |
+
|
293 |
+
def decrypt_match(match):
|
294 |
+
encrypted_hex = match.group(1)
|
295 |
+
encrypted_bytes = bytes.fromhex(encrypted_hex)
|
296 |
+
key_bytes = settings.SECRET_KEY.encode('utf-8')
|
297 |
+
decrypted_bytes = bytes([
|
298 |
+
encrypted_bytes[i] ^ key_bytes[i % len(key_bytes)]
|
299 |
+
for i in range(len(encrypted_bytes))
|
300 |
+
])
|
301 |
+
return decrypted_bytes.decode('utf-8')
|
302 |
+
|
303 |
+
pattern = r'\[([\da-fA-F]+)\]'
|
304 |
+
|
305 |
+
for message in messages:
|
306 |
+
message.text = re.sub(pattern, decrypt_match, message.text)
|
307 |
+
return messages
|
trauma/api/message/views.py
CHANGED
@@ -13,7 +13,7 @@ from trauma.api.message.schemas import (AllMessageWrapper,
|
|
13 |
CreateMessageRequest)
|
14 |
from trauma.core.security import PermissionDependency
|
15 |
from trauma.core.wrappers import TraumaResponseWrapper
|
16 |
-
|
17 |
|
18 |
@message_router.get('/{chatId}/all')
|
19 |
async def get_all_chat_messages(
|
@@ -35,6 +35,7 @@ async def create_message(
|
|
35 |
account: AccountModel = Depends(PermissionDependency([AccountType.Admin, AccountType.User]))
|
36 |
) -> TraumaResponseWrapper[MessageModel]:
|
37 |
messages, chat = await get_all_chat_messages_obj(chatId, account)
|
|
|
38 |
response = await search_entities(message_data.text, messages, chat)
|
39 |
return TraumaResponseWrapper(data=response)
|
40 |
|
|
|
13 |
CreateMessageRequest)
|
14 |
from trauma.core.security import PermissionDependency
|
15 |
from trauma.core.wrappers import TraumaResponseWrapper
|
16 |
+
from trauma.api.message.utils import decrypt_messages
|
17 |
|
18 |
@message_router.get('/{chatId}/all')
|
19 |
async def get_all_chat_messages(
|
|
|
35 |
account: AccountModel = Depends(PermissionDependency([AccountType.Admin, AccountType.User]))
|
36 |
) -> TraumaResponseWrapper[MessageModel]:
|
37 |
messages, chat = await get_all_chat_messages_obj(chatId, account)
|
38 |
+
messages = decrypt_messages(messages)
|
39 |
response = await search_entities(message_data.text, messages, chat)
|
40 |
return TraumaResponseWrapper(data=response)
|
41 |
|