Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
ADDED
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
import nltk
|
3 |
+
import keras
|
4 |
+
import spacy
|
5 |
+
import string
|
6 |
+
import pickle
|
7 |
+
import tempfile
|
8 |
+
import numpy as np
|
9 |
+
import gradio as gr
|
10 |
+
import contractions
|
11 |
+
import tensorflow as tf
|
12 |
+
from nltk.stem import WordNetLemmatizer
|
13 |
+
from nltk.tokenize import word_tokenize
|
14 |
+
from nltk.corpus import stopwords, wordnet
|
15 |
+
from tensorflow.keras.layers import Layer
|
16 |
+
from tensorflow.keras import backend as K
|
17 |
+
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
18 |
+
|
19 |
+
|
20 |
+
class Attention(Layer):
|
21 |
+
|
22 |
+
def __init__(self, return_sequences=True, **kwargs):
|
23 |
+
self.return_sequences = return_sequences
|
24 |
+
super(Attention, self).__init__(**kwargs)
|
25 |
+
|
26 |
+
def build(self, input_shape):
|
27 |
+
|
28 |
+
self.W=self.add_weight(name="att_weight", shape=(input_shape[-1],1),
|
29 |
+
initializer="normal")
|
30 |
+
self.b=self.add_weight(name="att_bias", shape=(input_shape[1],1),
|
31 |
+
initializer="zeros")
|
32 |
+
|
33 |
+
super(Attention,self).build(input_shape)
|
34 |
+
|
35 |
+
def call(self, x):
|
36 |
+
|
37 |
+
e = K.tanh(K.dot(x,self.W)+self.b)
|
38 |
+
a = K.softmax(e, axis=1)
|
39 |
+
output = x*a
|
40 |
+
|
41 |
+
if self.return_sequences:
|
42 |
+
return output
|
43 |
+
|
44 |
+
return K.sum(output, axis=1)
|
45 |
+
|
46 |
+
|
47 |
+
|
48 |
+
def load_tokenizer(path):
|
49 |
+
with open(path, 'rb') as f:
|
50 |
+
tokenizer = pickle.load(f)
|
51 |
+
return tokenizer
|
52 |
+
|
53 |
+
|
54 |
+
def cleaning(text):
|
55 |
+
nlp = spacy.load('en_core_web_sm')
|
56 |
+
# Punctuation symbols to remove
|
57 |
+
exclude = string.punctuation
|
58 |
+
|
59 |
+
def expand_contractions(text): return contractions.fix(text)
|
60 |
+
text = expand_contractions(text)
|
61 |
+
|
62 |
+
text = text.lower()
|
63 |
+
|
64 |
+
def remove_tags(text): return re.sub(r'@\w*', ' ' , text)
|
65 |
+
text = remove_tags(text)
|
66 |
+
|
67 |
+
def remove_hashtags(text): return re.sub(r'#\w*', ' ' , text)
|
68 |
+
text = remove_hashtags(text)
|
69 |
+
|
70 |
+
def remove_apostrophe(text): return re.sub(r"'s\b", "", text)
|
71 |
+
text = remove_apostrophe(text)
|
72 |
+
|
73 |
+
def remove_special_chars(text): return re.sub(r"[^a-zA-Z0-9\s]", ' ', text)
|
74 |
+
text = remove_special_chars(text)
|
75 |
+
|
76 |
+
def remove_number(text): return re.sub(r'[\d]', ' ', text)
|
77 |
+
text = remove_number(text)
|
78 |
+
|
79 |
+
def remove_punc(text): return ''.join([c for c in text if c not in exclude])
|
80 |
+
text = remove_punc(text)
|
81 |
+
|
82 |
+
def remove_extra_spaces(text): return re.sub('^\S', ' ', text)
|
83 |
+
text = remove_extra_spaces(text)
|
84 |
+
|
85 |
+
def map_pos_tags(pos_tags):
|
86 |
+
# Map NLTK POS tags to WordNet tags
|
87 |
+
tag_map = {
|
88 |
+
'N': wordnet.NOUN,
|
89 |
+
'V': wordnet.VERB,
|
90 |
+
'R': wordnet.ADV,
|
91 |
+
'J': wordnet.ADJ
|
92 |
+
}
|
93 |
+
|
94 |
+
mapped_tags = []
|
95 |
+
for token, tag in pos_tags:
|
96 |
+
mapped_tag = tag[0].upper()
|
97 |
+
if mapped_tag in tag_map:
|
98 |
+
mapped_tag = tag_map[mapped_tag]
|
99 |
+
else:
|
100 |
+
mapped_tag = wordnet.NOUN # Default to noun if no mapping found
|
101 |
+
mapped_tags.append(mapped_tag)
|
102 |
+
|
103 |
+
return mapped_tags
|
104 |
+
|
105 |
+
def remove_stopwords(text):
|
106 |
+
stop_words = set(stopwords.words('english'))
|
107 |
+
tokens = word_tokenize(text)
|
108 |
+
filtered_text = [word for word in tokens if word.lower() not in stop_words]
|
109 |
+
return ' '.join(filtered_text)
|
110 |
+
text = remove_stopwords(text)
|
111 |
+
|
112 |
+
def pos_tag_and_lemmatize(text):
|
113 |
+
tokens = word_tokenize(text)
|
114 |
+
pos_tags = nltk.pos_tag(tokens)
|
115 |
+
|
116 |
+
# Map POS tags to WordNet tags
|
117 |
+
wordnet_tags = map_pos_tags(pos_tags)
|
118 |
+
|
119 |
+
# Lemmatize based on POS tags
|
120 |
+
lemmatizer = WordNetLemmatizer()
|
121 |
+
lemmas = " ".join([lemmatizer.lemmatize(token, tag) for token, tag in zip(tokens, wordnet_tags)])
|
122 |
+
|
123 |
+
return lemmas
|
124 |
+
text = pos_tag_and_lemmatize(text)
|
125 |
+
|
126 |
+
return text
|
127 |
+
|
128 |
+
|
129 |
+
def label_tweet(test_review):
|
130 |
+
token_list = tokenizer.texts_to_sequences([test_review])[0]
|
131 |
+
token_list = pad_sequences([token_list], maxlen=44, padding='post')
|
132 |
+
predicted = model.predict(token_list, verbose=0)
|
133 |
+
if predicted >= 0.5:
|
134 |
+
return 1
|
135 |
+
else:
|
136 |
+
return 0
|
137 |
+
|
138 |
+
|
139 |
+
def analyze_text(comment):
|
140 |
+
comment = cleaning(comment)
|
141 |
+
result = label_tweet(comment)
|
142 |
+
if result == 0:
|
143 |
+
text = "Negative"
|
144 |
+
else:
|
145 |
+
text = "Positive"
|
146 |
+
return text
|
147 |
+
|
148 |
+
|
149 |
+
# It can be used to reconstruct the model identically.
|
150 |
+
model = keras.models.load_model("twitter_sentiment.keras",
|
151 |
+
custom_objects={'Attention': Attention})
|
152 |
+
|
153 |
+
# Load tokenizer
|
154 |
+
tokenizer = load_tokenizer('tokenizer.pkl')
|
155 |
+
|
156 |
+
interface = gr.Interface(fn=analyze_text, inputs=gr.inputs.Textbox(lines=2, placeholder='Enter a positive or negative tweet here...'),
|
157 |
+
outputs='text',title='Twitter Sentimental Analysis', theme='darkhuggingface')
|
158 |
+
interface.launch(inline=False)
|