Puyush commited on
Commit
6415799
1 Parent(s): a79dedc

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -151
app.py DELETED
@@ -1,151 +0,0 @@
1
- import re
2
- import nltk
3
- nltk.download('all')
4
- import keras
5
- import spacy
6
- import string
7
- import pickle
8
- import tempfile
9
- import numpy as np
10
- import gradio as gr
11
- import contractions
12
- import tensorflow as tf
13
- from nltk.stem import WordNetLemmatizer
14
- from nltk.tokenize import word_tokenize
15
- from nltk.corpus import wordnet
16
- from tensorflow.keras.layers import Layer
17
- from tensorflow.keras import backend as K
18
- from tensorflow.keras.preprocessing.sequence import pad_sequences
19
-
20
-
21
- class Attention(Layer):
22
-
23
- def __init__(self, return_sequences=True, **kwargs):
24
- self.return_sequences = return_sequences
25
- super(Attention, self).__init__(**kwargs)
26
-
27
- def build(self, input_shape):
28
-
29
- self.W=self.add_weight(name="att_weight", shape=(input_shape[-1],1),
30
- initializer="normal")
31
- self.b=self.add_weight(name="att_bias", shape=(input_shape[1],1),
32
- initializer="zeros")
33
-
34
- super(Attention,self).build(input_shape)
35
-
36
- def call(self, x):
37
-
38
- e = K.tanh(K.dot(x,self.W)+self.b)
39
- a = K.softmax(e, axis=1)
40
- output = x*a
41
-
42
- if self.return_sequences:
43
- return output
44
-
45
- return K.sum(output, axis=1)
46
-
47
-
48
-
49
- def load_tokenizer(path):
50
- with open(path, 'rb') as f:
51
- tokenizer = pickle.load(f)
52
- return tokenizer
53
-
54
-
55
- def cleaning(text):
56
- # Punctuation symbols to remove
57
- exclude = string.punctuation
58
-
59
- def expand_contractions(text): return contractions.fix(text)
60
- text = expand_contractions(text)
61
-
62
- text = text.lower()
63
-
64
- def remove_tags(text): return re.sub(r'@\w*', ' ' , text)
65
- text = remove_tags(text)
66
-
67
- def remove_hashtags(text): return re.sub(r'#\w*', ' ' , text)
68
- text = remove_hashtags(text)
69
-
70
- def remove_apostrophe(text): return re.sub(r"'s\b", "", text)
71
- text = remove_apostrophe(text)
72
-
73
- def remove_special_chars(text): return re.sub(r"[^a-zA-Z0-9\s]", ' ', text)
74
- text = remove_special_chars(text)
75
-
76
- def remove_number(text): return re.sub(r'[\d]', ' ', text)
77
- text = remove_number(text)
78
-
79
- def remove_punc(text): return ''.join([c for c in text if c not in exclude])
80
- text = remove_punc(text)
81
-
82
- def remove_extra_spaces(text): return re.sub('^\S', ' ', text)
83
- text = remove_extra_spaces(text)
84
-
85
- def map_pos_tags(pos_tags):
86
- # Map NLTK POS tags to WordNet tags
87
- tag_map = {
88
- 'N': wordnet.NOUN,
89
- 'V': wordnet.VERB,
90
- 'R': wordnet.ADV,
91
- 'J': wordnet.ADJ
92
- }
93
-
94
- mapped_tags = []
95
- for token, tag in pos_tags:
96
- mapped_tag = tag[0].upper()
97
- if mapped_tag in tag_map:
98
- mapped_tag = tag_map[mapped_tag]
99
- else:
100
- mapped_tag = wordnet.NOUN # Default to noun if no mapping found
101
- mapped_tags.append(mapped_tag)
102
-
103
- return mapped_tags
104
-
105
- def pos_tag_and_lemmatize(text):
106
- tokens = word_tokenize(text)
107
- pos_tags = nltk.pos_tag(tokens)
108
-
109
- # Map POS tags to WordNet tags
110
- wordnet_tags = map_pos_tags(pos_tags)
111
-
112
- # Lemmatize based on POS tags
113
- lemmatizer = WordNetLemmatizer()
114
- lemmas = " ".join([lemmatizer.lemmatize(token, tag) for token, tag in zip(tokens, wordnet_tags)])
115
-
116
- return lemmas
117
- text = pos_tag_and_lemmatize(text)
118
-
119
- return text
120
-
121
-
122
- def label_tweet(test_review):
123
- token_list = tokenizer.texts_to_sequences([test_review])[0]
124
- token_list = pad_sequences([token_list], maxlen=44, padding='post')
125
- predicted = model.predict(token_list, verbose=0)
126
- if predicted >= 0.5:
127
- return 1
128
- else:
129
- return 0
130
-
131
-
132
- def analyze_text(comment):
133
- comment = cleaning(comment)
134
- result = label_tweet(comment)
135
- if result == 0:
136
- text = "Negative"
137
- else:
138
- text = "Positive"
139
- return text
140
-
141
-
142
- # It can be used to reconstruct the model identically.
143
- model = keras.models.load_model("twitter_sentiment.keras",
144
- custom_objects={'Attention': Attention})
145
-
146
- # Load tokenizer
147
- tokenizer = load_tokenizer('tokenizer.pkl')
148
-
149
- interface = gr.Interface(fn=analyze_text, inputs=gr.inputs.Textbox(lines=2, placeholder='Enter a positive or negative tweet here...'),
150
- outputs='text',title='Twitter Sentimental Analysis', theme='darkhuggingface')
151
- interface.launch(inline=False)