danielcd99 commited on
Commit
1ba6bc3
·
1 Parent(s): e8059ec

added symbolic model

Browse files
Files changed (3) hide show
  1. app.py +4 -2
  2. requirements.txt +2 -1
  3. wordnet.py +80 -0
app.py CHANGED
@@ -3,6 +3,7 @@ import pandas as pd
3
  from preprocess_data import preprocess_text,get_stopwords
4
  from datasets import load_dataset
5
  from transformers import pipeline
 
6
 
7
  dataset = load_dataset('danielcd99/imdb')
8
 
@@ -45,9 +46,10 @@ if st.button('Encontre exemplos!'):
45
  else:
46
  predictions.append('Positive')
47
 
48
- df['predictions'] = predictions
 
49
 
50
- cols = ['review','sentiment', 'predictions']
51
 
52
  st.table(df[cols])
53
 
 
3
  from preprocess_data import preprocess_text,get_stopwords
4
  from datasets import load_dataset
5
  from transformers import pipeline
6
+ from wordnet import wordnet_pipeline
7
 
8
  dataset = load_dataset('danielcd99/imdb')
9
 
 
46
  else:
47
  predictions.append('Positive')
48
 
49
+ df['bert_results'] = predictions
50
+ df['wordnet_results'] = wordnet_pipeline(df, 'preprocessed_review')
51
 
52
+ cols = ['review','sentiment', 'bert_results', 'wordnet_results']
53
 
54
  st.table(df[cols])
55
 
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
  nltk
2
  transformers==4.28.0
3
- torch
 
 
1
  nltk
2
  transformers==4.28.0
3
+ torch
4
+ numpy
wordnet.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import nltk
3
+ from nltk.corpus import sentiwordnet as swn
4
+ from nltk.corpus import stopwords
5
+
6
+ flatten = lambda l: [item for sublist in l for item in sublist]
7
+
8
+ tagsswn = {
9
+ "NN": "n",
10
+ "VB": "v",
11
+ "JJ": "a",
12
+ "RB": "r",
13
+ }
14
+
15
+ def get_sentiment(aval, stopwords):
16
+ """
17
+ Calcula o score de sentimento de um texto usando SentiWordNet.
18
+
19
+ Entrada:
20
+ aval (str): Texto a ser analisado.
21
+
22
+ Saída:
23
+ tuple: Score positivo e negativo do texto.
24
+ """
25
+ pos_scores = []
26
+ neg_scores = []
27
+ sentences = nltk.sent_tokenize(aval)
28
+ sentence_words = [nltk.word_tokenize(sentence) for sentence in sentences]
29
+ tagged_sentence_words = flatten(nltk.pos_tag_sents(sentence_words))
30
+
31
+ tagged_sentence_words = [word for word in tagged_sentence_words if word[0].lower() not in stopwords]
32
+
33
+ for word, pos in tagged_sentence_words:
34
+
35
+ swn_pos = tagsswn.get(pos[:2], None)
36
+ if not swn_pos:
37
+ continue
38
+
39
+ synsets = list(swn.senti_synsets(word.lower(), swn_pos))
40
+
41
+ if not synsets:
42
+ continue
43
+
44
+ synset = synsets[0]
45
+ pos_scores.append(synset.pos_score())
46
+ neg_scores.append(synset.neg_score())
47
+
48
+ sump = np.sum(pos_scores) if pos_scores else 0
49
+ sumn = np.sum(neg_scores) if neg_scores else 0
50
+
51
+ return sump, sumn
52
+
53
+ def classify_sentiment(aval, stopwords):
54
+ """
55
+ Classifica um texto como positivo ou negativo com base no score de sentimento.
56
+
57
+ Entrada:
58
+ aval (str): Texto a ser classificado.
59
+
60
+ Saída:
61
+ str: "positive" se o score positivo for maior, "negative" caso contrário.
62
+ """
63
+ pos_score, neg_score = get_sentiment(aval, stopwords)
64
+ return "positive" if pos_score > neg_score else "negative"
65
+
66
+
67
+ def wordnet_pipeline(df, column):
68
+ nltk.download('sentiwordnet')
69
+ nltk.download('wordnet')
70
+ nltk.download('stopwords')
71
+ nltk.download('punkt')
72
+ nltk.download('averaged_perceptron_tagger')
73
+
74
+ stpwrds = set(stopwords.words("english"))
75
+
76
+ l = []
77
+ for review in df[column]:
78
+ l.append(classify_sentiment(review, stpwrds))
79
+
80
+ return l