luisespinosa
commited on
Commit
โข
199ac16
1
Parent(s):
0b3eea5
Update README.md
Browse files
README.md
CHANGED
@@ -1,19 +1,9 @@
|
|
1 |
# twitter-XLM-roBERTa-base for Sentiment Analysis
|
2 |
|
|
|
3 |
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
This is a roBERTa-base model trained on ~58M tweets and finetuned for sentiment analysis with the TweetEval benchmark.
|
14 |
-
|
15 |
-
- Paper: [_TweetEval_ benchmark (Findings of EMNLP 2020)](https://arxiv.org/pdf/2010.12421.pdf).
|
16 |
-
- Git Repo: [Tweeteval official repository](https://github.com/cardiffnlp/tweeteval).
|
17 |
|
18 |
## Example of classification
|
19 |
|
@@ -37,22 +27,17 @@ def preprocess(text):
|
|
37 |
new_text.append(t)
|
38 |
return " ".join(new_text)
|
39 |
|
40 |
-
# Tasks:
|
41 |
-
# emoji, emotion, hate, irony, offensive, sentiment
|
42 |
-
# stance/abortion, stance/atheism, stance/climate, stance/feminist, stance/hillary
|
43 |
-
|
44 |
-
task='sentiment'
|
45 |
-
MODEL = f"cardiffnlp/twitter-roberta-base-{task}"
|
46 |
|
|
|
47 |
tokenizer = AutoTokenizer.from_pretrained(MODEL)
|
48 |
|
49 |
# download label mapping
|
50 |
labels=[]
|
51 |
-
mapping_link = f"https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/
|
52 |
with urllib.request.urlopen(mapping_link) as f:
|
53 |
-
html = f.read().decode('utf-8').split("
|
54 |
")
|
55 |
-
csvreader = csv.reader(html, delimiter='
|
56 |
labels = [row[1] for row in csvreader if len(row) > 1]
|
57 |
|
58 |
# PT
|
@@ -88,8 +73,8 @@ for i in range(scores.shape[0]):
|
|
88 |
Output:
|
89 |
|
90 |
```
|
91 |
-
1) positive 0.
|
92 |
-
2) neutral 0.
|
93 |
-
3) negative 0.
|
94 |
```
|
95 |
|
|
|
1 |
# twitter-XLM-roBERTa-base for Sentiment Analysis
|
2 |
|
3 |
+
This is a XLM-roBERTa-base model trained on ~198M tweets and finetuned for sentiment analysis in
|
4 |
|
5 |
+
- Paper: [XLM-T: A Multilingual Language Model Toolkit for Twitter](https://...).
|
6 |
+
- Git Repo: [Tweeteval official repository](https://github.com/cardiffnlp/xlm-t).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
## Example of classification
|
9 |
|
|
|
27 |
new_text.append(t)
|
28 |
return " ".join(new_text)
|
29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
+
MODEL = f"cardiffnlp/twitter-xlm-roberta-base-sentiment"
|
32 |
tokenizer = AutoTokenizer.from_pretrained(MODEL)
|
33 |
|
34 |
# download label mapping
|
35 |
labels=[]
|
36 |
+
mapping_link = f"https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/sentiment/mapping.txt"
|
37 |
with urllib.request.urlopen(mapping_link) as f:
|
38 |
+
html = f.read().decode('utf-8').split("\\
|
39 |
")
|
40 |
+
csvreader = csv.reader(html, delimiter='\\\\t')
|
41 |
labels = [row[1] for row in csvreader if len(row) > 1]
|
42 |
|
43 |
# PT
|
|
|
73 |
Output:
|
74 |
|
75 |
```
|
76 |
+
1) positive 0.76726073
|
77 |
+
2) neutral 0.201
|
78 |
+
3) negative 0.0312
|
79 |
```
|
80 |
|