umarigan commited on
Commit
0cb91b5
1 Parent(s): f2dc65a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -38
app.py CHANGED
@@ -1,6 +1,5 @@
1
  import streamlit as st
2
  import pandas as pd
3
- import spacy
4
  from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer
5
  import PyPDF2
6
  import docx
@@ -92,6 +91,7 @@ def entity_comb(output):
92
  else:
93
  output_comb.append(entity)
94
  return output_comb
 
95
  def create_mask_dict(entities):
96
  mask_dict = {}
97
  entity_counters = {}
@@ -104,14 +104,13 @@ def create_mask_dict(entities):
104
  entity_counters[entity['entity_group']] += 1
105
  mask_dict[entity['word']] = f"{entity['entity_group']}_{entity_counters[entity['entity_group']]}"
106
  return mask_dict
 
107
  def create_masked_text(input_text, entities):
108
- # Create the mask dictionary
109
  mask_dict = create_mask_dict(entities)
110
 
111
  masked_text = input_text
112
  for entity in sorted(entities, key=lambda x: x['start'], reverse=True):
113
  if entity['entity_group'] not in ['CARDINAL', 'EVENT']:
114
- # Replace the entity with its entity group from the mask dictionary
115
  masked_text = (
116
  masked_text[:entity['start']] +
117
  f"<{mask_dict[entity['word']]}> " + # Use angle brackets for clarity
@@ -140,47 +139,17 @@ if Run_Button and input_text:
140
  entity['end'] += offset
141
 
142
  all_outputs.extend(output)
143
-
144
 
145
  # Combine entities
146
-
147
  output_comb = entity_comb(all_outputs)
148
 
149
- # Create mask dictionary
150
- mask_dict = create_mask_dict(output_comb)
151
-
152
  masked_text = create_masked_text(input_text, output_comb)
153
-
154
- # Apply masking and add masked_word column
155
- for entity in output_comb:
156
- if entity['entity_group'] not in ['CARDINAL', 'EVENT']:
157
- entity['masked_word'] = mask_dict.get(entity['word'], entity['word'])
158
- else:
159
- entity['masked_word'] = entity['word']
160
- print("output_comb", output_comb)
161
- #df = pd.DataFrame.from_dict(output_comb)
162
- #cols_to_keep = ['word', 'entity_group', 'score', 'start', 'end']
163
- #df_final = df[cols_to_keep].loc[:,~df.columns.duplicated()].copy()
164
-
165
- #st.subheader("Recognized Entities")
166
- #st.dataframe(df_final)
167
 
168
-
169
-
170
- # Spacy display logic with entity numbering
171
- spacy_display = {"ents": [], "text": input_text, "title": None}
172
- for entity in output_comb:
173
- if entity['entity_group'] not in ['CARDINAL', 'EVENT']:
174
- label = f"{entity['entity_group']}_{mask_dict[entity['word']].split('_')[1]}"
175
- else:
176
- label = entity['entity_group']
177
- spacy_display["ents"].append({"start": entity["start"], "end": entity["end"], "label": label})
178
-
179
- html = spacy.displacy.render(spacy_display, style="ent", minify=True, manual=True)
180
- st.write(html, unsafe_allow_html=True)
181
 
182
  st.subheader("Masking Dictionary")
183
  st.json(mask_dict)
184
-
185
- st.subheader("Masked Text Preview")
186
- st.text(masked_text)
 
1
  import streamlit as st
2
  import pandas as pd
 
3
  from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer
4
  import PyPDF2
5
  import docx
 
91
  else:
92
  output_comb.append(entity)
93
  return output_comb
94
+
95
  def create_mask_dict(entities):
96
  mask_dict = {}
97
  entity_counters = {}
 
104
  entity_counters[entity['entity_group']] += 1
105
  mask_dict[entity['word']] = f"{entity['entity_group']}_{entity_counters[entity['entity_group']]}"
106
  return mask_dict
107
+
108
  def create_masked_text(input_text, entities):
 
109
  mask_dict = create_mask_dict(entities)
110
 
111
  masked_text = input_text
112
  for entity in sorted(entities, key=lambda x: x['start'], reverse=True):
113
  if entity['entity_group'] not in ['CARDINAL', 'EVENT']:
 
114
  masked_text = (
115
  masked_text[:entity['start']] +
116
  f"<{mask_dict[entity['word']]}> " + # Use angle brackets for clarity
 
139
  entity['end'] += offset
140
 
141
  all_outputs.extend(output)
 
142
 
143
  # Combine entities
 
144
  output_comb = entity_comb(all_outputs)
145
 
146
+ # Create masked text and masking dictionary
 
 
147
  masked_text = create_masked_text(input_text, output_comb)
148
+ mask_dict = create_mask_dict(output_comb)
 
 
 
 
 
 
 
 
 
 
 
 
 
149
 
150
+ # Display the masked text and masking dictionary
151
+ st.subheader("Masked Text Preview")
152
+ st.text(masked_text)
 
 
 
 
 
 
 
 
 
 
153
 
154
  st.subheader("Masking Dictionary")
155
  st.json(mask_dict)