Spaces:

Puyush
/

Twitter_Sentimental_Analysis

Sleeping

App Files Files Community

Puyush commited on Jul 8, 2023

Commit

0197d35

•

1 Parent(s): a844ecc

Upload app.py

Browse files

Files changed (1) hide show

app.py +158 -0

app.py ADDED Viewed

	@@ -0,0 +1,158 @@

+import re
+import nltk
+import keras
+import spacy
+import string
+import pickle
+import tempfile
+import numpy as np
+import gradio as gr
+import contractions
+import tensorflow as tf
+from nltk.stem import WordNetLemmatizer
+from nltk.tokenize import word_tokenize
+from nltk.corpus import stopwords, wordnet
+from tensorflow.keras.layers import Layer
+from tensorflow.keras import backend as K
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+class Attention(Layer):
+ def __init__(self, return_sequences=True, **kwargs):
+ self.return_sequences = return_sequences
+ super(Attention, self).__init__(**kwargs)
+ def build(self, input_shape):
+ self.W=self.add_weight(name="att_weight", shape=(input_shape[-1],1),
+ initializer="normal")
+ self.b=self.add_weight(name="att_bias", shape=(input_shape[1],1),
+ initializer="zeros")
+ super(Attention,self).build(input_shape)
+ def call(self, x):
+ e = K.tanh(K.dot(x,self.W)+self.b)
+ a = K.softmax(e, axis=1)
+ output = x*a
+ if self.return_sequences:
+ return output
+ return K.sum(output, axis=1)
+def load_tokenizer(path):
+ with open(path, 'rb') as f:
+ tokenizer = pickle.load(f)
+ return tokenizer
+def cleaning(text):
+ nlp = spacy.load('en_core_web_sm')
+ # Punctuation symbols to remove
+ exclude = string.punctuation
+ def expand_contractions(text): return contractions.fix(text)
+ text = expand_contractions(text)
+ text = text.lower()
+ def remove_tags(text): return re.sub(r'@\w*', ' ' , text)
+ text = remove_tags(text)
+ def remove_hashtags(text): return re.sub(r'#\w*', ' ' , text)
+ text = remove_hashtags(text)
+ def remove_apostrophe(text): return re.sub(r"'s\b", "", text)
+ text = remove_apostrophe(text)
+ def remove_special_chars(text): return re.sub(r"[^a-zA-Z0-9\s]", ' ', text)
+ text = remove_special_chars(text)
+ def remove_number(text): return re.sub(r'[\d]', ' ', text)
+ text = remove_number(text)
+ def remove_punc(text): return ''.join([c for c in text if c not in exclude])
+ text = remove_punc(text)
+ def remove_extra_spaces(text): return re.sub('^\S', ' ', text)
+ text = remove_extra_spaces(text)
+ def map_pos_tags(pos_tags):
+ # Map NLTK POS tags to WordNet tags
+ tag_map = {
+ 'N': wordnet.NOUN,
+ 'V': wordnet.VERB,
+ 'R': wordnet.ADV,
+ 'J': wordnet.ADJ
+ }
+ mapped_tags = []
+ for token, tag in pos_tags:
+ mapped_tag = tag[0].upper()
+ if mapped_tag in tag_map:
+ mapped_tag = tag_map[mapped_tag]
+ else:
+ mapped_tag = wordnet.NOUN # Default to noun if no mapping found
+ mapped_tags.append(mapped_tag)
+ return mapped_tags
+ def remove_stopwords(text):
+ stop_words = set(stopwords.words('english'))
+ tokens = word_tokenize(text)
+ filtered_text = [word for word in tokens if word.lower() not in stop_words]
+ return ' '.join(filtered_text)
+ text = remove_stopwords(text)
+ def pos_tag_and_lemmatize(text):
+ tokens = word_tokenize(text)
+ pos_tags = nltk.pos_tag(tokens)
+ # Map POS tags to WordNet tags
+ wordnet_tags = map_pos_tags(pos_tags)
+ # Lemmatize based on POS tags
+ lemmatizer = WordNetLemmatizer()
+ lemmas = " ".join([lemmatizer.lemmatize(token, tag) for token, tag in zip(tokens, wordnet_tags)])
+ return lemmas
+ text = pos_tag_and_lemmatize(text)
+ return text
+def label_tweet(test_review):
+ token_list = tokenizer.texts_to_sequences([test_review])[0]
+ token_list = pad_sequences([token_list], maxlen=44, padding='post')
+ predicted = model.predict(token_list, verbose=0)
+ if predicted >= 0.5:
+ return 1
+ else:
+ return 0
+def analyze_text(comment):
+ comment = cleaning(comment)
+ result = label_tweet(comment)
+ if result == 0:
+ text = "Negative"
+ else:
+ text = "Positive"
+ return text
+# It can be used to reconstruct the model identically.
+model = keras.models.load_model("twitter_sentiment.keras",
+ custom_objects={'Attention': Attention})
+# Load tokenizer
+tokenizer = load_tokenizer('tokenizer.pkl')
+interface = gr.Interface(fn=analyze_text, inputs=gr.inputs.Textbox(lines=2, placeholder='Enter a positive or negative tweet here...'),
+ outputs='text',title='Twitter Sentimental Analysis', theme='darkhuggingface')
+interface.launch(inline=False)