Spaces:
Runtime error
Runtime error
arogeriogel
commited on
including metadata
Browse files
app.py
CHANGED
@@ -3,7 +3,8 @@ import streamlit as st
|
|
3 |
import re
|
4 |
import logging
|
5 |
from presidio_anonymizer import AnonymizerEngine
|
6 |
-
from presidio_analyzer import AnalyzerEngine,
|
|
|
7 |
from annotated_text import annotated_text
|
8 |
from flair_recognizer import FlairRecognizer
|
9 |
|
@@ -36,9 +37,17 @@ def analyzer_engine():
|
|
36 |
|
37 |
def analyze(**kwargs):
|
38 |
"""Analyze input using Analyzer engine and input arguments (kwargs)."""
|
|
|
|
|
39 |
if "entities" not in kwargs or "All" in kwargs["entities"]:
|
40 |
kwargs["entities"] = None
|
41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
def annotate():
|
44 |
text = st.session_state.text
|
@@ -88,8 +97,8 @@ def analyze_text():
|
|
88 |
return_decision_process=False,
|
89 |
)
|
90 |
|
91 |
-
if st.session_state.excluded_words:
|
92 |
-
|
93 |
|
94 |
if st.session_state.allowed_words:
|
95 |
analyze_results = exclude_manual_input(analyze_results)
|
@@ -99,29 +108,29 @@ def analyze_text():
|
|
99 |
logging.info(
|
100 |
f"analyse results: {st.session_state.analyze_results}\n"
|
101 |
)
|
102 |
-
|
103 |
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
|
|
116 |
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
|
126 |
def exclude_manual_input(analyze_results):
|
127 |
analyze_results_fltered=[]
|
|
|
3 |
import re
|
4 |
import logging
|
5 |
from presidio_anonymizer import AnonymizerEngine
|
6 |
+
from presidio_analyzer import AnalyzerEngine, PatternRecognizer
|
7 |
+
|
8 |
from annotated_text import annotated_text
|
9 |
from flair_recognizer import FlairRecognizer
|
10 |
|
|
|
37 |
|
38 |
def analyze(**kwargs):
|
39 |
"""Analyze input using Analyzer engine and input arguments (kwargs)."""
|
40 |
+
analyzer_engine = analyzer_engine()
|
41 |
+
|
42 |
if "entities" not in kwargs or "All" in kwargs["entities"]:
|
43 |
kwargs["entities"] = None
|
44 |
+
|
45 |
+
if st.session_state.excluded_words:
|
46 |
+
excluded_words_recognizer = PatternRecognizer(supported_entity="MANUAL ADD",
|
47 |
+
deny_list=st.session_state.excluded_words)
|
48 |
+
analyzer_engine.registry.add_recognizer(excluded_words_recognizer)
|
49 |
+
|
50 |
+
return analyzer_engine.analyze(**kwargs)
|
51 |
|
52 |
def annotate():
|
53 |
text = st.session_state.text
|
|
|
97 |
return_decision_process=False,
|
98 |
)
|
99 |
|
100 |
+
# if st.session_state.excluded_words:
|
101 |
+
# analyze_results = include_manual_input(analyze_results)
|
102 |
|
103 |
if st.session_state.allowed_words:
|
104 |
analyze_results = exclude_manual_input(analyze_results)
|
|
|
108 |
logging.info(
|
109 |
f"analyse results: {st.session_state.analyze_results}\n"
|
110 |
)
|
|
|
111 |
|
112 |
+
|
113 |
+
# def include_manual_input(analyze_results):
|
114 |
+
# analyze_results_extended=analyze_results
|
115 |
+
# logging.info(
|
116 |
+
# f"analyse results before adding extra words: {analyze_results}\n"
|
117 |
+
# )
|
118 |
+
# for word in st.session_state.excluded_words:
|
119 |
+
# if word in st.session_state.text:
|
120 |
+
# r = re.compile(word)
|
121 |
+
# index_entries = [[m.start(),m.end()] for m in r.finditer(st.session_state.text)]
|
122 |
+
# for entry in index_entries:
|
123 |
+
# start=entry[0]
|
124 |
+
# end=entry[1]
|
125 |
|
126 |
+
# analyze_results_extended.append("type": "MANUAL ADD", "start": start, "end": end, "score": 1.0})
|
127 |
+
# logging.info(
|
128 |
+
# f"analyse results after adding allowed words: {analyze_results_extended}\n"
|
129 |
+
# )
|
130 |
+
# logging.info(
|
131 |
+
# f"type of entries in results: {type(analyze_results[0])}\n"
|
132 |
+
# )
|
133 |
+
# return analyze_results_extended
|
134 |
|
135 |
def exclude_manual_input(analyze_results):
|
136 |
analyze_results_fltered=[]
|