rmayormartins
commited on
Commit
•
aa5f929
1
Parent(s):
d6cc6b2
Adicionados app.py e requirements.txt; modificado README.md
Browse files- README.md +44 -7
- app.py +107 -0
- requirements.txt +5 -0
README.md
CHANGED
@@ -1,13 +1,50 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 4.12.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
-
license: ecl-2.0
|
11 |
---
|
12 |
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
title: sentiment-analysis-committee
|
3 |
+
emoji: 👥
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: green
|
6 |
sdk: gradio
|
7 |
+
sdk_version: "4.12.0"
|
8 |
app_file: app.py
|
9 |
pinned: false
|
|
|
10 |
---
|
11 |
|
12 |
+
|
13 |
+
# Sentiment Analysis Committee
|
14 |
+
|
15 |
+
A comprehensive sentiment analysis tool using multiple methods, including BERT (Base and Large), DistilBERT, SiEBERT, TextBlob, VADER, and AFINN.
|
16 |
+
|
17 |
+
## How to Use
|
18 |
+
|
19 |
+
Enter text into the interface to receive sentiment analyses from various methods. The committee's decision is based on the majority of votes among the methods.
|
20 |
+
|
21 |
+
## Technical Details
|
22 |
+
|
23 |
+
This project leverages various natural language processing models to evaluate the sentiment of entered text:
|
24 |
+
|
25 |
+
- **BERT Base and BERT Large**: Transformer-based models providing sentiment scores and labels. BERT Large is a larger variant of BERT with more layers, potentially offering more nuanced sentiment analysis.
|
26 |
+
- **DistilBERT**: A distilled version of BERT, optimized for speed and efficiency.
|
27 |
+
- **SiEBERT**: A RoBERTa-based model fine-tuned for sentiment analysis.
|
28 |
+
- **TextBlob**: Utilizes Naive Bayes classifiers, offering straightforward sentiment evaluations.
|
29 |
+
- **VADER**: Designed for social media and short texts, giving a compound sentiment score.
|
30 |
+
- **AFINN**: A lexical method assigning scores to words, indicating sentiment intensity.
|
31 |
+
|
32 |
+
The final decision of the committee is determined by a majority vote approach, providing a balanced sentiment analysis.
|
33 |
+
|
34 |
+
## Additional Information
|
35 |
+
|
36 |
+
- Developed by Ramon Mayor Martins (2023)
|
37 |
+
- E-mail: [[email protected]](mailto:[email protected])
|
38 |
+
- Homepage: [https://rmayormartins.github.io/](https://rmayormartins.github.io/)
|
39 |
+
- Twitter: [@rmayormartins](https://twitter.com/rmayormartins)
|
40 |
+
- GitHub: [https://github.com/rmayormartins](https://github.com/rmayormartins)
|
41 |
+
|
42 |
+
## Notes
|
43 |
+
|
44 |
+
- The committee's decision is democratic, based on the majority vote from the utilized methods.
|
45 |
+
- The project is implemented in Python and hosted on Hugging Face Spaces.
|
46 |
+
|
47 |
+
|
48 |
+
|
49 |
+
|
50 |
+
|
app.py
ADDED
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import pipeline
|
2 |
+
import gradio as gr
|
3 |
+
from textblob import TextBlob
|
4 |
+
import numpy as np
|
5 |
+
import nltk
|
6 |
+
from nltk.sentiment import SentimentIntensityAnalyzer
|
7 |
+
from afinn import Afinn
|
8 |
+
|
9 |
+
|
10 |
+
#VADER e AFINN
|
11 |
+
nltk.download('vader_lexicon')
|
12 |
+
vader = SentimentIntensityAnalyzer()
|
13 |
+
afinn = Afinn()
|
14 |
+
|
15 |
+
#Hugging Face
|
16 |
+
bert_model = pipeline("sentiment-analysis", model="bert-base-uncased")
|
17 |
+
#BERT Large
|
18 |
+
bert_large_model = pipeline("sentiment-analysis", model="bert-large-uncased")
|
19 |
+
distilbert_model = pipeline("sentiment-analysis", model="distilbert-base-uncased")
|
20 |
+
siebert_model = pipeline("sentiment-analysis", model="siebert/sentiment-roberta-large-english")
|
21 |
+
|
22 |
+
|
23 |
+
def normalize_score(score, range_min, range_max):
|
24 |
+
return (score - range_min) / (range_max - range_min)
|
25 |
+
|
26 |
+
|
27 |
+
def analyze_with_bert(text):
|
28 |
+
analysis = bert_model(text)
|
29 |
+
label, score = map_label(analysis[0]['label']), analysis[0]['score']
|
30 |
+
return label, score
|
31 |
+
|
32 |
+
|
33 |
+
def analyze_with_bert_large(text):
|
34 |
+
analysis = bert_large_model(text)
|
35 |
+
label, score = map_label(analysis[0]['label']), analysis[0]['score']
|
36 |
+
return label, score
|
37 |
+
|
38 |
+
def analyze_with_distilbert(text):
|
39 |
+
analysis = distilbert_model(text)
|
40 |
+
label, score = map_label(analysis[0]['label']), analysis[0]['score']
|
41 |
+
return label, score
|
42 |
+
|
43 |
+
def analyze_with_siebert(text):
|
44 |
+
analysis = siebert_model(text)
|
45 |
+
return analysis[0]['label'], analysis[0]['score']
|
46 |
+
|
47 |
+
def analyze_with_textblob(text):
|
48 |
+
analysis = TextBlob(text).sentiment
|
49 |
+
label = "POSITIVE" if analysis.polarity > 0 else "NEGATIVE" if analysis.polarity < 0 else "NEUTRAL"
|
50 |
+
normalized_score = normalize_score(analysis.polarity, -1, 1)
|
51 |
+
return label, normalized_score
|
52 |
+
|
53 |
+
def analyze_with_vader(text):
|
54 |
+
scores = vader.polarity_scores(text)
|
55 |
+
label = "POSITIVE" if scores['compound'] > 0.05 else "NEGATIVE" if scores['compound'] < -0.05 else "NEUTRAL"
|
56 |
+
normalized_score = normalize_score(scores['compound'], -1, 1)
|
57 |
+
return label, normalized_score
|
58 |
+
|
59 |
+
def analyze_with_afinn(text):
|
60 |
+
score = afinn.score(text)
|
61 |
+
label = "POSITIVE" if score > 0 else "NEGATIVE" if score < 0 else "NEUTRAL"
|
62 |
+
normalized_score = normalize_score(score, -5, 5)
|
63 |
+
return label, normalized_score
|
64 |
+
|
65 |
+
#mapeio BERT e DistilBERT
|
66 |
+
def map_label(label):
|
67 |
+
if label == "LABEL_0":
|
68 |
+
return "NEGATIVE"
|
69 |
+
elif label == "LABEL_1":
|
70 |
+
return "POSITIVE"
|
71 |
+
else:
|
72 |
+
return "NEUTRAL"
|
73 |
+
|
74 |
+
|
75 |
+
#Comite
|
76 |
+
def calculate_committee_decision(results):
|
77 |
+
#coto voto
|
78 |
+
vote_count = {"POSITIVE": 0, "NEGATIVE": 0, "NEUTRAL": 0}
|
79 |
+
for label, score in results.values():
|
80 |
+
vote_count[label] += 1
|
81 |
+
|
82 |
+
#maioria dos votos
|
83 |
+
final_label = max(vote_count, key=vote_count.get)
|
84 |
+
return final_label, vote_count[final_label] / len(results)
|
85 |
+
|
86 |
+
|
87 |
+
|
88 |
+
|
89 |
+
def analyze_text(text):
|
90 |
+
results = {
|
91 |
+
"BERT Base": analyze_with_bert(text),
|
92 |
+
"BERT Large": analyze_with_bert_large(text),
|
93 |
+
"DistilBERT": analyze_with_distilbert(text),
|
94 |
+
"SiEBERT": analyze_with_siebert(text),
|
95 |
+
"TextBlob": analyze_with_textblob(text),
|
96 |
+
"VADER": analyze_with_vader(text),
|
97 |
+
"AFINN": analyze_with_afinn(text)
|
98 |
+
}
|
99 |
+
|
100 |
+
final_label, vote_ratio = calculate_committee_decision(results)
|
101 |
+
results["Committee Decision"] = {"label": final_label, "vote_ratio": vote_ratio}
|
102 |
+
return results
|
103 |
+
|
104 |
+
|
105 |
+
#Gradio
|
106 |
+
iface = gr.Interface(fn=analyze_text, inputs="text", outputs="json")
|
107 |
+
iface.launch(debug=True)
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
transformers
|
2 |
+
gradio
|
3 |
+
textblob
|
4 |
+
nltk
|
5 |
+
afinn
|