Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,6 +3,10 @@ from transformers import BertTokenizer, BertModel
|
|
3 |
from huggingface_hub import PyTorchModelHubMixin
|
4 |
import numpy as np
|
5 |
import gradio as gr
|
|
|
|
|
|
|
|
|
6 |
|
7 |
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
|
8 |
device
|
@@ -33,7 +37,30 @@ tokenizer = BertTokenizer.from_pretrained('digitalepidemiologylab/covid-twitter-
|
|
33 |
MAX_LEN = 256
|
34 |
|
35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
def return_vec(text):
|
|
|
37 |
encodings = tokenizer.encode_plus(
|
38 |
text,
|
39 |
None,
|
|
|
3 |
from huggingface_hub import PyTorchModelHubMixin
|
4 |
import numpy as np
|
5 |
import gradio as gr
|
6 |
+
import nltk
|
7 |
+
nltk.download('stopwords')
|
8 |
+
from nltk.corpus import stopwords
|
9 |
+
import re
|
10 |
|
11 |
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
|
12 |
device
|
|
|
37 |
MAX_LEN = 256
|
38 |
|
39 |
|
40 |
+
def rmTrash(raw_string, remuser, remstop, remurls):
|
41 |
+
final_string = ""
|
42 |
+
raw_string_2 = ""
|
43 |
+
if remuser == True:
|
44 |
+
for i in raw_string.split():
|
45 |
+
if '@' not in i:
|
46 |
+
raw_string_2 += ' ' + i
|
47 |
+
else:
|
48 |
+
raw_string_2 = raw_string
|
49 |
+
raw_string_2 = re.sub(r'[^\w\s]', '', raw_string_2.lower())
|
50 |
+
if remurls == True:
|
51 |
+
raw_string_2 = re.sub(r'http\S+', '', raw_string_2.lower())
|
52 |
+
if remstop == True:
|
53 |
+
raw_string_tokens = raw_string_2.split()
|
54 |
+
for token in raw_string_tokens:
|
55 |
+
if (not(token in stopwords.words('english'))):
|
56 |
+
final_string = final_string + ' ' + token
|
57 |
+
else:
|
58 |
+
final_string = raw_string_2
|
59 |
+
return final_string
|
60 |
+
|
61 |
+
|
62 |
def return_vec(text):
|
63 |
+
text = rmTrash(text,True,True,True)
|
64 |
encodings = tokenizer.encode_plus(
|
65 |
text,
|
66 |
None,
|