ben-epstein's picture
catch bad spans, propogate error message to UI
11710d1 unverified
raw
history blame
1.47 kB
import json
from json import JSONDecodeError
import streamlit as st
from datasets import Dataset
from spacy_to_hf import spacy_to_hf
demo_option = [
{
"text": "Planned to go to the Apple Storefront on Tuesday",
"spans": [
{"start": 0, "end": 7, "label": "Action"},
{"start": 21, "end": 37, "label": "Loc"},
{"start": 41, "end": 48, "label": "Date"},
],
}
]
tokenizers = [
"bert-base-uncased",
"bert-base-cased",
"distilbert-base-uncased",
"distilbert-base-cased",
"roberta-base",
]
st.title("Spacy to HuggingFace converter")
st.markdown("[Homepage](https://github.com/ben-epstein/spacy-to-hf)")
tok = st.selectbox("Pick a tokenizer", tokenizers)
spacy_data = st.text_area("Input your NER Span data here")
if spacy_data or st.button("Or try an example"):
run_data = None
if spacy_data:
try:
run_data = json.loads(spacy_data)
except JSONDecodeError as e:
st.warning(f"Invalid JSON data, try again\n{str(e)}")
else:
run_data = demo_option
if run_data:
st.write("Spacy input data:")
st.json(run_data)
try:
hf_data = spacy_to_hf(run_data, tok)
df = Dataset.from_dict(hf_data).to_pandas()
st.write("Output huggingface format:")
st.dataframe(df)
except Exception as e:
st.error(f"Failed to create tokens and tags: {str(e)}")