Spaces:

Montazerh82
/

albert-fa-zwnj-base-v2-ner

Sleeping

App Files Files Community

albert-fa-zwnj-base-v2-ner / evaluate.py

Montazerh82

add evaluate file

a029051 11 months ago

raw

history blame contribute delete

2.5 kB

	import pandas as pd
	from seqeval.metrics import f1_score, precision_score, recall_score
	from transformers import pipeline, AutoTokenizer
	from datasets import load_dataset

	tokenizer = AutoTokenizer.from_pretrained(
	"HooshvareLab/albert-fa-zwnj-base-v2-ner")

	dataset = load_dataset('HaniehPoostchi/persian_ner', split='test', trust_remote_code=True)

	# tag_to_num = {'O':0, 'I-EVE':1, 'I-FAC':2, 'I-LOC':3, 'I-ORG':4, 'I-PER':5, 'I-PRO':6, 'B-EVE':7, 'B-FAC':8, 'B-LOC':9, 'B-ORG':10, 'B-PER':11, 'B-PRO':12}
	num_to_tags = {0: 'O',
	1: 'I-EVE',
	2: 'I-FAC',
	3: 'I-LOC',
	4: 'I-ORG',
	5: 'I-PER',
	6: 'I-PRO',
	7: 'B-EVE',
	8: 'B-FAC',
	9: 'B-LOC',
	10: 'B-ORG',
	11: 'B-PER',
	12: 'B-PRO'}

	def add_text(examples):
	results = {'text': [' '.join(example) for example in examples['tokens']],
	# 'ner_tags': [[num_to_tags[tag] for tag in example] for example in examples['ner_tags']]
	}
	return results

	dataset = dataset.map(add_text, batched=True)
	dataset = dataset.shuffle(seed=42).select(range(100))

	pipe = pipeline("token-classification",
	model="HooshvareLab/albert-fa-zwnj-base-v2-ner")


	def predict(example):
	tokenized = tokenizer(example['text'])

	words = set(tokenized.word_ids())
	words.remove(None)
	words_num = len(words)

	result = pipe(example['text'])

	predictions = ['O'] * words_num

	for entity in result:
	word_id = tokenized.token_to_word(entity['index'])
	if predictions[word_id] == 'O':
	# if entity['entity'] not in tag_to_num.keys():
	# predictions[word_id] = 1
	# continue
	predictions[word_id] = entity['entity']
	return {'predictions': predictions}

	dataset = dataset.map(predict)

	true_labels = [[num_to_tags[tag] for tag in example] for example in dataset['ner_tags']]
	# true_labels = dataset['ner_tags']
	predicted_labels = dataset['predictions']

	result = {'model': "HooshvareLab/albert-fa-zwnj-base-v2-ner",
	'evaluation_dataset': 'HaniehPoostchi/persian_ner',
	'Recall': recall_score(true_labels, predicted_labels),
	'Precision': precision_score(true_labels, predicted_labels),
	'F1': f1_score(true_labels, predicted_labels)}

	result = pd.DataFrame([result])

	result.to_csv('result.csv', index=False)