IMDB_Reviews

Sleeping

App Files Files Community

IMDB_Reviews / wordnet.py

danielcd99

added symbolic model

1ba6bc3 about 1 year ago

raw

history blame contribute delete

2.11 kB

	import numpy as np
	import nltk
	from nltk.corpus import sentiwordnet as swn
	from nltk.corpus import stopwords

	flatten = lambda l: [item for sublist in l for item in sublist]

	tagsswn = {
	"NN": "n",
	"VB": "v",
	"JJ": "a",
	"RB": "r",
	}

	def get_sentiment(aval, stopwords):
	"""
	Calcula o score de sentimento de um texto usando SentiWordNet.

	Entrada:
	aval (str): Texto a ser analisado.

	Saída:
	tuple: Score positivo e negativo do texto.
	"""
	pos_scores = []
	neg_scores = []
	sentences = nltk.sent_tokenize(aval)
	sentence_words = [nltk.word_tokenize(sentence) for sentence in sentences]
	tagged_sentence_words = flatten(nltk.pos_tag_sents(sentence_words))

	tagged_sentence_words = [word for word in tagged_sentence_words if word[0].lower() not in stopwords]

	for word, pos in tagged_sentence_words:

	swn_pos = tagsswn.get(pos[:2], None)
	if not swn_pos:
	continue

	synsets = list(swn.senti_synsets(word.lower(), swn_pos))

	if not synsets:
	continue

	synset = synsets[0]
	pos_scores.append(synset.pos_score())
	neg_scores.append(synset.neg_score())

	sump = np.sum(pos_scores) if pos_scores else 0
	sumn = np.sum(neg_scores) if neg_scores else 0

	return sump, sumn

	def classify_sentiment(aval, stopwords):
	"""
	Classifica um texto como positivo ou negativo com base no score de sentimento.

	Entrada:
	aval (str): Texto a ser classificado.

	Saída:
	str: "positive" se o score positivo for maior, "negative" caso contrário.
	"""
	pos_score, neg_score = get_sentiment(aval, stopwords)
	return "positive" if pos_score > neg_score else "negative"


	def wordnet_pipeline(df, column):
	nltk.download('sentiwordnet')
	nltk.download('wordnet')
	nltk.download('stopwords')
	nltk.download('punkt')
	nltk.download('averaged_perceptron_tagger')

	stpwrds = set(stopwords.words("english"))

	l = []
	for review in df[column]:
	l.append(classify_sentiment(review, stpwrds))

	return l