Spaces:

Montazerh82
/

albert-fa-zwnj-base-v2-ner

Sleeping

App Files Files Community

Montazerh82 commited on Jul 25, 2024

Commit

a029051

1 Parent(s): 266ec2a

add evaluate file

Browse files

Files changed (3) hide show

app.py +1 -1
evaluate.py +73 -0
result.csv +2 -0

app.py CHANGED Viewed

@@ -21,7 +21,7 @@ entity_mapping = {'DAT': 'تاریخ',
                   'LOC': 'مکان',
                   'MON': 'پول',
                   'ORG': 'سازمان',
-                  'PCT': 'شخصیت',
                   'PER': 'شخص',
                   'PRO': 'محصول',
                   'TIM': 'زمان'}

                   'LOC': 'مکان',
                   'MON': 'پول',
                   'ORG': 'سازمان',
+                  'PCT': 'درصد',
                   'PER': 'شخص',
                   'PRO': 'محصول',
                   'TIM': 'زمان'}

evaluate.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import pandas as pd
+from seqeval.metrics import f1_score, precision_score, recall_score
+from transformers import pipeline, AutoTokenizer
+from datasets import load_dataset
+tokenizer = AutoTokenizer.from_pretrained(
+    "HooshvareLab/albert-fa-zwnj-base-v2-ner")
+dataset = load_dataset('HaniehPoostchi/persian_ner', split='test', trust_remote_code=True)
+# tag_to_num = {'O':0, 'I-EVE':1, 'I-FAC':2, 'I-LOC':3, 'I-ORG':4, 'I-PER':5, 'I-PRO':6, 'B-EVE':7, 'B-FAC':8, 'B-LOC':9, 'B-ORG':10, 'B-PER':11, 'B-PRO':12}
+num_to_tags = {0: 'O',
+                1: 'I-EVE',
+                2: 'I-FAC',
+                3: 'I-LOC',
+                4: 'I-ORG',
+                5: 'I-PER',
+                6: 'I-PRO',
+                7: 'B-EVE',
+                8: 'B-FAC',
+                9: 'B-LOC',
+                10: 'B-ORG',
+                11: 'B-PER',
+                12: 'B-PRO'}
+def add_text(examples):
+    results = {'text': [' '.join(example) for example in examples['tokens']],
+            #    'ner_tags': [[num_to_tags[tag] for tag in example] for example in examples['ner_tags']]
+               }
+    return results
+dataset = dataset.map(add_text, batched=True)
+dataset = dataset.shuffle(seed=42).select(range(100))
+pipe = pipeline("token-classification",
+                model="HooshvareLab/albert-fa-zwnj-base-v2-ner")
+def predict(example):
+    tokenized = tokenizer(example['text'])
+    words = set(tokenized.word_ids())
+    words.remove(None)
+    words_num = len(words)
+    result = pipe(example['text'])
+    predictions = ['O'] * words_num
+    for entity in result:
+        word_id = tokenized.token_to_word(entity['index'])
+        if predictions[word_id] == 'O':
+            # if entity['entity'] not in tag_to_num.keys():
+            #     predictions[word_id] = 1
+            #     continue
+            predictions[word_id] = entity['entity']
+    return {'predictions': predictions}
+dataset = dataset.map(predict)
+true_labels = [[num_to_tags[tag] for tag in example] for example in dataset['ner_tags']]
+# true_labels = dataset['ner_tags']
+predicted_labels = dataset['predictions']
+result = {'model': "HooshvareLab/albert-fa-zwnj-base-v2-ner",
+          'evaluation_dataset': 'HaniehPoostchi/persian_ner',
+          'Recall': recall_score(true_labels, predicted_labels),
+          'Precision': precision_score(true_labels, predicted_labels),
+          'F1': f1_score(true_labels, predicted_labels)}
+result = pd.DataFrame([result])
+result.to_csv('result.csv', index=False)

result.csv ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ model,evaluation_dataset,Recall,Precision,F1
2	+ HooshvareLab/albert-fa-zwnj-base-v2-ner,HaniehPoostchi/persian_ner,0.9745222929936306,0.9272727272727272,0.9503105590062112