Spaces:
Sleeping
Sleeping
Delete app.py
Browse files
app.py
DELETED
@@ -1,151 +0,0 @@
|
|
1 |
-
import re
|
2 |
-
import nltk
|
3 |
-
nltk.download('all')
|
4 |
-
import keras
|
5 |
-
import spacy
|
6 |
-
import string
|
7 |
-
import pickle
|
8 |
-
import tempfile
|
9 |
-
import numpy as np
|
10 |
-
import gradio as gr
|
11 |
-
import contractions
|
12 |
-
import tensorflow as tf
|
13 |
-
from nltk.stem import WordNetLemmatizer
|
14 |
-
from nltk.tokenize import word_tokenize
|
15 |
-
from nltk.corpus import wordnet
|
16 |
-
from tensorflow.keras.layers import Layer
|
17 |
-
from tensorflow.keras import backend as K
|
18 |
-
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
19 |
-
|
20 |
-
|
21 |
-
class Attention(Layer):
|
22 |
-
|
23 |
-
def __init__(self, return_sequences=True, **kwargs):
|
24 |
-
self.return_sequences = return_sequences
|
25 |
-
super(Attention, self).__init__(**kwargs)
|
26 |
-
|
27 |
-
def build(self, input_shape):
|
28 |
-
|
29 |
-
self.W=self.add_weight(name="att_weight", shape=(input_shape[-1],1),
|
30 |
-
initializer="normal")
|
31 |
-
self.b=self.add_weight(name="att_bias", shape=(input_shape[1],1),
|
32 |
-
initializer="zeros")
|
33 |
-
|
34 |
-
super(Attention,self).build(input_shape)
|
35 |
-
|
36 |
-
def call(self, x):
|
37 |
-
|
38 |
-
e = K.tanh(K.dot(x,self.W)+self.b)
|
39 |
-
a = K.softmax(e, axis=1)
|
40 |
-
output = x*a
|
41 |
-
|
42 |
-
if self.return_sequences:
|
43 |
-
return output
|
44 |
-
|
45 |
-
return K.sum(output, axis=1)
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
def load_tokenizer(path):
|
50 |
-
with open(path, 'rb') as f:
|
51 |
-
tokenizer = pickle.load(f)
|
52 |
-
return tokenizer
|
53 |
-
|
54 |
-
|
55 |
-
def cleaning(text):
|
56 |
-
# Punctuation symbols to remove
|
57 |
-
exclude = string.punctuation
|
58 |
-
|
59 |
-
def expand_contractions(text): return contractions.fix(text)
|
60 |
-
text = expand_contractions(text)
|
61 |
-
|
62 |
-
text = text.lower()
|
63 |
-
|
64 |
-
def remove_tags(text): return re.sub(r'@\w*', ' ' , text)
|
65 |
-
text = remove_tags(text)
|
66 |
-
|
67 |
-
def remove_hashtags(text): return re.sub(r'#\w*', ' ' , text)
|
68 |
-
text = remove_hashtags(text)
|
69 |
-
|
70 |
-
def remove_apostrophe(text): return re.sub(r"'s\b", "", text)
|
71 |
-
text = remove_apostrophe(text)
|
72 |
-
|
73 |
-
def remove_special_chars(text): return re.sub(r"[^a-zA-Z0-9\s]", ' ', text)
|
74 |
-
text = remove_special_chars(text)
|
75 |
-
|
76 |
-
def remove_number(text): return re.sub(r'[\d]', ' ', text)
|
77 |
-
text = remove_number(text)
|
78 |
-
|
79 |
-
def remove_punc(text): return ''.join([c for c in text if c not in exclude])
|
80 |
-
text = remove_punc(text)
|
81 |
-
|
82 |
-
def remove_extra_spaces(text): return re.sub('^\S', ' ', text)
|
83 |
-
text = remove_extra_spaces(text)
|
84 |
-
|
85 |
-
def map_pos_tags(pos_tags):
|
86 |
-
# Map NLTK POS tags to WordNet tags
|
87 |
-
tag_map = {
|
88 |
-
'N': wordnet.NOUN,
|
89 |
-
'V': wordnet.VERB,
|
90 |
-
'R': wordnet.ADV,
|
91 |
-
'J': wordnet.ADJ
|
92 |
-
}
|
93 |
-
|
94 |
-
mapped_tags = []
|
95 |
-
for token, tag in pos_tags:
|
96 |
-
mapped_tag = tag[0].upper()
|
97 |
-
if mapped_tag in tag_map:
|
98 |
-
mapped_tag = tag_map[mapped_tag]
|
99 |
-
else:
|
100 |
-
mapped_tag = wordnet.NOUN # Default to noun if no mapping found
|
101 |
-
mapped_tags.append(mapped_tag)
|
102 |
-
|
103 |
-
return mapped_tags
|
104 |
-
|
105 |
-
def pos_tag_and_lemmatize(text):
|
106 |
-
tokens = word_tokenize(text)
|
107 |
-
pos_tags = nltk.pos_tag(tokens)
|
108 |
-
|
109 |
-
# Map POS tags to WordNet tags
|
110 |
-
wordnet_tags = map_pos_tags(pos_tags)
|
111 |
-
|
112 |
-
# Lemmatize based on POS tags
|
113 |
-
lemmatizer = WordNetLemmatizer()
|
114 |
-
lemmas = " ".join([lemmatizer.lemmatize(token, tag) for token, tag in zip(tokens, wordnet_tags)])
|
115 |
-
|
116 |
-
return lemmas
|
117 |
-
text = pos_tag_and_lemmatize(text)
|
118 |
-
|
119 |
-
return text
|
120 |
-
|
121 |
-
|
122 |
-
def label_tweet(test_review):
|
123 |
-
token_list = tokenizer.texts_to_sequences([test_review])[0]
|
124 |
-
token_list = pad_sequences([token_list], maxlen=44, padding='post')
|
125 |
-
predicted = model.predict(token_list, verbose=0)
|
126 |
-
if predicted >= 0.5:
|
127 |
-
return 1
|
128 |
-
else:
|
129 |
-
return 0
|
130 |
-
|
131 |
-
|
132 |
-
def analyze_text(comment):
|
133 |
-
comment = cleaning(comment)
|
134 |
-
result = label_tweet(comment)
|
135 |
-
if result == 0:
|
136 |
-
text = "Negative"
|
137 |
-
else:
|
138 |
-
text = "Positive"
|
139 |
-
return text
|
140 |
-
|
141 |
-
|
142 |
-
# It can be used to reconstruct the model identically.
|
143 |
-
model = keras.models.load_model("twitter_sentiment.keras",
|
144 |
-
custom_objects={'Attention': Attention})
|
145 |
-
|
146 |
-
# Load tokenizer
|
147 |
-
tokenizer = load_tokenizer('tokenizer.pkl')
|
148 |
-
|
149 |
-
interface = gr.Interface(fn=analyze_text, inputs=gr.inputs.Textbox(lines=2, placeholder='Enter a positive or negative tweet here...'),
|
150 |
-
outputs='text',title='Twitter Sentimental Analysis', theme='darkhuggingface')
|
151 |
-
interface.launch(inline=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|