Spaces:
Sleeping
Sleeping
Update model.py
Browse files
model.py
CHANGED
@@ -25,17 +25,19 @@ stopwords = stopwords_list()
|
|
25 |
# Load the BERT model for sentiment analysis
|
26 |
dataset = Dataset.from_pandas(pd.DataFrame({"Comment": []}))
|
27 |
|
28 |
-
config = AutoConfig.from_pretrained("HooshvareLab/albert-fa-zwnj-base-v2")
|
29 |
-
tokenizer = AutoTokenizer.from_pretrained("HooshvareLab/albert-fa-zwnj-base-v2")
|
30 |
-
model = BertForSequenceClassification.from_pretrained("HooshvareLab/albert-fa-zwnj-base-v2", num_labels=3)
|
31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
33 |
model.to(device)
|
34 |
|
35 |
-
|
36 |
# Tokenization function for sentiment analysis
|
37 |
def tokenize_function(examples):
|
38 |
-
return tokenizer(examples["Comment"], padding="max_length", truncation=True, max_length=
|
39 |
|
40 |
# Sentiment prediction function
|
41 |
def predict_sentiment(batch):
|
@@ -49,8 +51,13 @@ def predict_sentiment(batch):
|
|
49 |
return {'sentiment': predictions.cpu()}
|
50 |
|
51 |
# Mapping sentiment labels
|
52 |
-
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
# Adding sentiment prediction to tokenized dataset
|
56 |
def predict_sentiment_labels(text):
|
@@ -58,7 +65,7 @@ def predict_sentiment_labels(text):
|
|
58 |
tokenized_dataset = dataset.map(tokenize_function, batched=True)
|
59 |
predicted_sentiments = tokenized_dataset.map(predict_sentiment, batched=True)
|
60 |
sentiment = predicted_sentiments[0]['sentiment']
|
61 |
-
return
|
62 |
|
63 |
|
64 |
|
|
|
25 |
# Load the BERT model for sentiment analysis
|
26 |
dataset = Dataset.from_pandas(pd.DataFrame({"Comment": []}))
|
27 |
|
|
|
|
|
|
|
28 |
|
29 |
+
# بارگذاری مدل و توکنایزر
|
30 |
+
model_name = "m3hrdadfi/albert-fa-base-v2-sentiment-deepsentipers-multi"
|
31 |
+
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=5, ignore_mismatched_sizes=True)
|
32 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
33 |
+
|
34 |
+
# انتخاب دستگاه (GPU یا CPU)
|
35 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
36 |
model.to(device)
|
37 |
|
|
|
38 |
# Tokenization function for sentiment analysis
|
39 |
def tokenize_function(examples):
|
40 |
+
return tokenizer(examples["Comment"], padding="max_length", truncation=True, max_length=128, return_tensors='pt')
|
41 |
|
42 |
# Sentiment prediction function
|
43 |
def predict_sentiment(batch):
|
|
|
51 |
return {'sentiment': predictions.cpu()}
|
52 |
|
53 |
# Mapping sentiment labels
|
54 |
+
sentiment_labels = {
|
55 |
+
0: 'Furious',
|
56 |
+
1: 'Angry',
|
57 |
+
2: 'Neutral',
|
58 |
+
3: 'Happy',
|
59 |
+
4: 'Delighted'
|
60 |
+
}
|
61 |
|
62 |
# Adding sentiment prediction to tokenized dataset
|
63 |
def predict_sentiment_labels(text):
|
|
|
65 |
tokenized_dataset = dataset.map(tokenize_function, batched=True)
|
66 |
predicted_sentiments = tokenized_dataset.map(predict_sentiment, batched=True)
|
67 |
sentiment = predicted_sentiments[0]['sentiment']
|
68 |
+
return sentiment_labels.get(sentiment, 'نامشخص')
|
69 |
|
70 |
|
71 |
|