ner_comparation / app.py
Roland Szabo
Add example and description
5e4308a
import gradio as gr
import spacy
from botocore.exceptions import ClientError
from transformers import pipeline
import boto3
nlp = spacy.load("en_core_web_sm")
ner_pipeline = pipeline("ner", model="Jean-Baptiste/roberta-large-ner-english", aggregation_strategy="simple", grouped_entities=True)
def greet(model_type, text):
if model_type == "Spacy":
doc = nlp(text)
pos_tokens = []
for token in doc:
if token.ent_type_ != "":
pos_tokens.append((token.text, token.ent_type_))
else:
pos_tokens.append((token.text, None))
return pos_tokens
elif model_type == "Roberta":
output = ner_pipeline(text)
print(output)
return {"text": text, "entities": [
{"word": entity["word"], "entity": entity["entity_group"], "start": entity['start'],
'end': entity['end']}
for entity in output]}
elif model_type == "AWS Comprehend":
client = boto3.client('comprehend')
try:
response = client.detect_dominant_language(Text=text)
languages = response['Languages']
print("Detected %s languages.", len(languages))
language = languages[0]['LanguageCode']
except ClientError:
print("Couldn't detect languages.")
language = 'en'
response = client.detect_entities(
Text=text, LanguageCode=language)
print(response)
return {"text": text, "entities": [{"word": entity["Text"], "entity": entity["Type"], "start": entity['BeginOffset'], 'end': entity['EndOffset']}
for entity in response["Entities"]]}
description = """Compare the NER outputs of Spacy, HuggingFace Roberta and AWS Comprehend.
These models are off the shelf models, which have not been finetuned. This is just to show a baseline,
before we start finetuning the models. All of them can be finetuned (including AWS Comprehend).
AWS Comprehend can be finetuned using Entity lists, without having to annotate full documents by hand."""
demo = gr.Interface(fn=greet, inputs=[gr.Radio(["Spacy", "Roberta", "AWS Comprehend"]), "text"],
outputs="highlight", title="Comparison of NER Options",
description=description,
examples=[["AWS Comprehend", """We hereby issue in your favour this documentary credit which is available by
negotiation of your drafts at sight drawn on L/C Openers Bank at Chennai on account of
M/s.TANGEDCO Limited bearing the number, date of the documentary credit and the
name of the issuing bank of this credit for 100% invoice value accompanied by the
following documents."""]],)
demo.launch()