NerRoB-czech / app.py
AlzbetaStrompova
change layout
1709ba8
raw
history blame
2.08 kB
import json
import gradio as gr
from website_script import load, run, gaz
tokenizer, model = load()
gazetteers_for_matching = gaz()
examples = [
"Masarykova univerzita se nachází v Brně .",
"Barack Obama navštívil Prahu minulý týden .",
"Angela Merkelová se setkala s francouzským prezidentem v Paříži .",
"Nobelova cena za fyziku byla udělena týmu vědců z MIT ."
]
def add_gazetteers(new_gazetteers):
global gazetteers_for_matching
for key, value_lst in new_gazetteers.items():
key = key.upper()
for dictionary in gazetteers_for_matching:
if key in dictionary.values():
for value in value_lst:
dictionary[value] = key
def ner(text):
for d in gazetteers_for_matching:
print(len(d))
result = run(tokenizer, model, gazetteers_for_matching, text)
return {"text": text, "entities": result}
def load_gazetters(file_names):
print(file_names)
# Assuming you have a JSON file named 'data.json'
for file_name in file_names:
with open(file_name, 'r') as file:
data = json.load(file)
gazetteers_for_matching = add_gazetteers(data)
with gr.Blocks(css="footer{display:none !important}", theme=gr.themes.Default(primary_hue="blue", secondary_hue="sky")) as demo:
# with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Interface(ner,
gr.Textbox(lines=10, placeholder="Enter sentence here..."),
gr.HighlightedText(show_legend=True, color_map={"PER": "red", "ORG": "green", "LOC": "blue"}),
examples=examples,
title="NerROB-czech",
description="This is an implementation of a Named Entity Recognition model for the Czech language using gazetteers.",
allow_flagging="never")
gr.Interface(load_gazetters,
gr.File(label="Upload a JSON file", file_count="multiple", file_types=[".json"]),
None,
allow_flagging="never",
description="Here you can upload your own gazetteers.",
)
if __name__ == "__main__":
demo.launch()