import streamlit as st from flair.data import Sentence from flair.models import SequenceTagger import re import logging # Render Streamlit page st.title("Anonymise your text!") st.markdown( "This mini-app anonymises text using Bert. You can find the code on [GitHub(WIP)](#)" ) # Configure logger logging.basicConfig(format="\n%(asctime)s\n%(message)s", level=logging.INFO, force=True) @st.cache(suppress_st_warning=True) def load_tagger(): return SequenceTagger.load("flair/ner-english-large") def anonymise_text(text: str, metadata: str = "", white_listed_words: str = ""): """anonymise text""" if st.session_state.n_requests >= 50: st.session_state.text_error = "Too many requests. Please wait a few seconds before anonymising more text." logging.info(f"Session request limit reached: {st.session_state.n_requests}") st.session_state.n_requests = 1 return st.session_state.text = "" st.session_state.text_error = "" if not text: st.session_state.text_error = "Please enter your text" return # load tagger tagger = load_tagger() with text_spinner_placeholder: with st.spinner("Please wait while your text is being anonymised..."): # flagged = openai.moderate(prompt) # if flagged: # st.session_state.text_error = "Input flagged as inappropriate." # logging.info(f"Topic: {topic}{mood_output}{style_output}\n") # return # else: sentence = Sentence(text) # predict NER tags tagger.predict(sentence) # iterate over entities and redact enitities=[e.text for e in sentence.get_spans('ner')] regex = re.compile('|'.join(map(re.escape, enitities))) text_anon = regex.sub("", text) st.session_state.text_error = "" st.session_state.n_requests += 1 st.session_state.text_anon = text_anon logging.info( f"text: {text}{metadata}{white_listed_words}\n" f"text anonymised: {st.session_state.text_anon}" ) # def anonymise_text(text: str, metadata: str = "", white_listed_words: str = ""): # st.session_state.text_anon = "this is anonymised" if "text" not in st.session_state: st.session_state.text = "" if "text_error" not in st.session_state: st.session_state.text_error = "" if "text_anon" not in st.session_state: st.session_state.text_anon = "" if "n_requests" not in st.session_state: st.session_state.n_requests = 0 text = st.text_input(label="Text to be anonymised", placeholder="Write your text here") metadata = st.text_input( label="Data to be redacted (optional)", placeholder="inspirational", ) white_listed_words = st.text_input( label="Data to be ignored (optional)", placeholder="inspirational", ) # button return true when clicked anonymise_now = st.button( label="Anonymise text", type="primary", on_click=anonymise_text, args=(text, metadata, white_listed_words), ) text_spinner_placeholder = st.empty() if st.session_state.text_error: st.error(st.session_state.text_error) if st.session_state.text_anon: st.markdown("""---""") st.text_area(label="Text anonymised", value=st.session_state.text_anon, height=100)