turkish-named-entity-recognition-tests

Runtime error

App Files Files Community

akdeniz27 commited on Jun 25, 2022

Commit

4874aa0

1 Parent(s): 557a1ee

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -10

app.py CHANGED Viewed

@@ -30,19 +30,22 @@ model_checkpoint = st.sidebar.radio("", model_list)
 st.sidebar.write("For details of models: 'https://huggingface.co/akdeniz27/")
 st.sidebar.write("")
-xlm_agg_strategy_info = "'aggregation_strategy' can be selected as 'simple' or 'none' for 'xlm-roberta' because of the RoBERTa model's tokenization approach."
-st.sidebar.header("Select Aggregation Strategy Type")
 if model_checkpoint == "akdeniz27/xlm-roberta-base-turkish-ner":
-    aggregation = st.sidebar.radio("", ('simple', 'none'))
-    st.sidebar.write(xlm_agg_strategy_info)
-elif model_checkpoint == "xlm-roberta-large-finetuned-conll03-english": # or model_checkpoint == "tner/tner-xlm-roberta-base-ontonotes5":
-    aggregation = st.sidebar.radio("", ('simple', 'none'))
-    st.sidebar.write(xlm_agg_strategy_info)
     st.sidebar.write("")
     st.sidebar.write("This English NER model is included just to show the zero-shot transfer learning capability of XLM-Roberta.")
 else:
-    aggregation = st.sidebar.radio("", ('first', 'simple', 'average', 'max', 'none'))
 st.sidebar.write("Please refer 'https://huggingface.co/transformers/_modules/transformers/pipelines/token_classification.html' for entity grouping with aggregation_strategy parameter.")
@@ -73,8 +76,18 @@ if Run_Button == True:
     ner_pipeline = setModel(model_checkpoint, aggregation)
     output = ner_pipeline(input_text)
-    df = pd.DataFrame.from_dict(output)
     if aggregation != "none":
         cols_to_keep = ['word','entity_group','score','start','end']
     else:
@@ -90,7 +103,7 @@ if Run_Button == True:
     spacy_display["text"] = input_text
     spacy_display["title"] = None
-    for entity in output:
         if aggregation != "none":
             spacy_display["ents"].append({"start": entity["start"], "end": entity["end"], "label": entity["entity_group"]})
         else:

 st.sidebar.write("For details of models: 'https://huggingface.co/akdeniz27/")
 st.sidebar.write("")
+# xlm_agg_strategy_info = "'aggregation_strategy' can be selected as 'simple' or 'none' for 'xlm-roberta' because of the RoBERTa model's tokenization approach."
+# st.sidebar.header("Select Aggregation Strategy Type")
 if model_checkpoint == "akdeniz27/xlm-roberta-base-turkish-ner":
+    aggregation = "simple"
+    # aggregation = st.sidebar.radio("", ('simple', 'none'))
+    # st.sidebar.write(xlm_agg_strategy_info)
+elif model_checkpoint == "xlm-roberta-large-finetuned-conll03-english" or model_checkpoint == "tner/tner-xlm-roberta-base-ontonotes5":
+    aggregation = "simple"
+    # aggregation = st.sidebar.radio("", ('simple', 'none'))
+    # st.sidebar.write(xlm_agg_strategy_info)
     st.sidebar.write("")
     st.sidebar.write("This English NER model is included just to show the zero-shot transfer learning capability of XLM-Roberta.")
 else:
+    aggregation = "first"
+    # aggregation = st.sidebar.radio("", ('first', 'simple', 'average', 'max', 'none'))
 st.sidebar.write("Please refer 'https://huggingface.co/transformers/_modules/transformers/pipelines/token_classification.html' for entity grouping with aggregation_strategy parameter.")
     ner_pipeline = setModel(model_checkpoint, aggregation)
     output = ner_pipeline(input_text)
+    output_comb = []
+    for ind, entity in enumerate(output):
+        if ind == 0:
+            output_comb.append(entity)
+        elif output[ind]["start"] == output[ind-1]["end"]:
+            output_comb[ind-1]["entity"] = output_comb[ind-1]["entity"] + output[ind]["entity"]
+            output_comb[ind-1]["end"] = output[ind]["end"]
+        else:
+            output_comb.append(entity)
+    df = pd.DataFrame.from_dict(output_comb)
     if aggregation != "none":
         cols_to_keep = ['word','entity_group','score','start','end']
     else:
     spacy_display["text"] = input_text
     spacy_display["title"] = None
+    for entity in output_comb:
         if aggregation != "none":
             spacy_display["ents"].append({"start": entity["start"], "end": entity["end"], "label": entity["entity_group"]})
         else: