import json import time import streamlit as st from annotated_text import annotated_text import matplotlib from flair.data import Sentence from flair.models import SequenceTagger checkpoints = [ "qanastek/pos-french", ] colors = list(matplotlib.colors.cnames.values()) @st.cache(suppress_st_warning=True, allow_output_mutation=True) def get_model(model_name): return SequenceTagger.load(model_name) # Load the model def getPos(s: Sentence): texts = [] labels = [] for t in s.tokens: for label in t.annotation_layers.keys(): texts.append(t.text) labels.append(t.get_labels(label)[0].value) return texts, labels def getDictFromPOS(texts, labels): return [{ "text": t, "label": l } for t, l in zip(texts, labels)] def getAnnotatedFromPOS(texts, labels): return [(t,l,"#8ef") for t, l in zip(texts, labels)] def main(): st.title("๐Ÿฅ– French Part-Of-Speech Tagging") checkpoint = st.selectbox("Choose model", checkpoints) model = get_model(checkpoint) default_text = "George Washington est allรฉ ร  Washington" input_text = st.text_area( label="Original text", value=default_text, ) start = None if st.button("๐Ÿง  Compute"): start = time.time() with st.spinner("Search for Part-Of-Speech Tags ๐Ÿ”"): # Build Sentence s = Sentence(input_text) # predict tags model.predict(s) try: texts, labels = getPos(s) st.header("Labels:") st.write(" ".join(labels)) st.header("Labels:") anns = getAnnotatedFromPOS(texts, labels) annotated_text(*anns) st.header("JSON:") st.json(getDictFromPOS(texts, labels)) except Exception as e: st.error("Some error occured!" + str(e)) st.stop() st.write("---") st.markdown( "Built by [Yanis Labrak](https://www.linkedin.com/in/yanis-labrak-8a7412145/) ๐Ÿš€" ) st.markdown( "_Source code made with [FlairNLP](https://github.com/flairNLP/flair)_" ) if start is not None: st.text(f"prediction took {time.time() - start:.2f}s") if __name__ == "__main__": main()