|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
import torch |
|
|
|
import pandas as pd |
|
from fastapi import FastAPI |
|
app=FastAPI() |
|
splits = {'train': 'train_df.csv', 'validation': 'val_df.csv', 'test': 'test_df.csv'} |
|
df = pd.read_csv("hf://datasets/Sp1786/multiclass-sentiment-analysis-dataset/" + splits["train"]) |
|
|
|
model_name = "tabularisai/multilingual-sentiment-analysis" |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForSequenceClassification.from_pretrained(model_name) |
|
|
|
def predict_sentiment(texts): |
|
inputs = tokenizer(texts, return_tensors="pt", truncation=True, padding=True, max_length=512) |
|
with torch.no_grad(): |
|
outputs = model(**inputs) |
|
probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1) |
|
sentiment_map = {0: "Very Negative", 1: "Negative", 2: "Neutral", 3: "Positive", 4: "Very Positive"} |
|
return [sentiment_map[p] for p in torch.argmax(probabilities, dim=-1).tolist()] |
|
|
|
texts = [ |
|
|
|
"I absolutely love the new design of this app!", "Cooking microwave pizzas, yummy.", "The weather is fine, nothing special.", |
|
] |
|
|
|
print(predict_sentiment(texts)) |
|
|