Spaces:
Sleeping
Sleeping
File size: 1,518 Bytes
a7fbbb7 9b3af2e a7fbbb7 c59cf35 a7fbbb7 c59cf35 a7fbbb7 c59cf35 668f6af c59cf35 668f6af a7fbbb7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
def analyze(model_name: str, text: str, top_k=1) -> dict:
'''
Output result of sentiment analysis of a text through a defined model
'''
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer, top_k=top_k)
return classifier(text)
user_input = "Go fuck yourself"
user_model = "andyqin18/test-finetuned"
# result = analyze(user_model, user_input, top_k=2)
# print(result[0][0]['label'])
import pandas as pd
import numpy as np
df = pd.read_csv("milestone3/comp/test_comment.csv")
test_texts = df["comment_text"].values
sample_texts = np.random.choice(test_texts, size=10, replace=False)
init_table_dict = {
"Text": [],
"Highest Toxicity Class": [],
"Highest Score": [],
"Second Highest Toxicity Class": [],
"Second Highest Score": []
}
for text in sample_texts:
result = analyze(user_model, text, top_k=2)
init_table_dict["Text"].append(text[:50])
init_table_dict["Highest Toxicity Class"].append(result[0][0]['label'])
init_table_dict["Highest Score"].append(result[0][0]['score'])
init_table_dict["Second Highest Toxicity Class"].append(result[0][1]['label'])
init_table_dict["Second Highest Score"].append(result[0][1]['score'])
print(init_table_dict) |