sashtech commited on
Commit
cfaf614
·
verified ·
1 Parent(s): c824c99

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +278 -23
app.py CHANGED
@@ -1,24 +1,279 @@
 
1
  import gradio as gr
2
- from ginger import correct_sentence # Import the correct_sentence function from ginger.py
3
-
4
- def grammar_corrector(text):
5
- """
6
- This function calls the Ginger API function to correct the text.
7
- """
8
- return correct_sentence(text)
9
-
10
- def main():
11
- # Create the Gradio interface
12
- interface = gr.Interface(
13
- fn=grammar_corrector, # Function to correct the grammar
14
- inputs=gr.Textbox(lines=2, placeholder="Enter a sentence..."), # Input box for sentence
15
- outputs=gr.Textbox(label="Corrected Sentence"), # Output box for corrected sentence
16
- title="Grammar Correction App", # Title for the app
17
- description="Enter a sentence and click 'Submit' to see the corrected version.", # Brief description
18
- )
19
-
20
- # Launch the interface
21
- interface.launch()
22
-
23
- if __name__ == "__main__":
24
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
  import gradio as gr
3
+ from transformers import pipeline
4
+ import spacy
5
+ import subprocess
6
+ import nltk
7
+ from nltk.corpus import wordnet
8
+ from spellchecker import SpellChecker
9
+
10
+ # Initialize the English text classification pipeline for AI detection
11
+ pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
12
+
13
+ # Initialize the spell checker
14
+ spell = SpellChecker()
15
+
16
+ # Ensure necessary NLTK data is downloaded
17
+ nltk.download('wordnet')
18
+ nltk.download('omw-1.4')
19
+
20
+ # Ensure the SpaCy model is installed
21
+ try:
22
+ nlp = spacy.load("en_core_web_sm")
23
+ except OSError:
24
+ subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
25
+ nlp = spacy.load("en_core_web_sm")
26
+
27
+ # Function to predict the label and score for English text (AI Detection)
28
+ def predict_en(text):
29
+ res = pipeline_en(text)[0]
30
+ return res['label'], res['score']
31
+
32
+ # Function to get synonyms using NLTK WordNet
33
+ def get_synonyms_nltk(word, pos):
34
+ synsets = wordnet.synsets(word, pos=pos)
35
+ if synsets:
36
+ lemmas = synsets[0].lemmas()
37
+ return [lemma.name() for lemma in lemmas]
38
+ return []
39
+
40
+ # Function to remove redundant and meaningless words
41
+ def remove_redundant_words(text):
42
+ doc = nlp(text)
43
+ meaningless_words = {"actually", "basically", "literally", "really", "very", "just"}
44
+ filtered_text = [token.text for token in doc if token.text.lower() not in meaningless_words]
45
+ return ' '.join(filtered_text)
46
+
47
+ # Function to fix spacing around punctuation
48
+ def fix_punctuation_spacing(text):
49
+ text = ' '.join(part.strip() for part in text.splitlines())
50
+ text = text.replace(' ,', ',').replace(' .', '.').replace(' ;', ';')
51
+ return text
52
+
53
+ # Updated function to capitalize sentences and proper nouns
54
+ def capitalize_sentences_and_nouns(text):
55
+ text = fix_punctuation_spacing(text)
56
+ doc = nlp(text)
57
+ corrected_text = []
58
+
59
+ for sent in doc.sents:
60
+ sentence = []
61
+ for token in sent:
62
+ if token.i == sent.start: # First word of the sentence
63
+ sentence.append(token.text.capitalize())
64
+ elif token.pos_ == "PROPN": # Proper noun
65
+ sentence.append(token.text.capitalize())
66
+ else:
67
+ sentence.append(token.text)
68
+ corrected_text.append(' '.join(sentence))
69
+
70
+ return ' '.join(corrected_text)
71
+
72
+ # Function to handle possessive forms
73
+ def correct_possessives(text):
74
+ doc = nlp(text)
75
+ corrected_text = []
76
+
77
+ for token in doc:
78
+ if token.dep_ == 'poss' and token.head.pos_ == 'NOUN':
79
+ corrected_text.append(f"{token.text}'s")
80
+ else:
81
+ corrected_text.append(token.text)
82
+
83
+ return ' '.join(corrected_text)
84
+
85
+ # Function to correct tense errors in a sentence
86
+ def correct_tense_errors(text):
87
+ doc = nlp(text)
88
+ corrected_text = []
89
+ for token in doc:
90
+ if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
91
+ lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
92
+ corrected_text.append(lemma)
93
+ else:
94
+ corrected_text.append(token.text)
95
+ return ' '.join(corrected_text)
96
+
97
+ # Function to correct singular/plural errors
98
+ def correct_singular_plural_errors(text):
99
+ doc = nlp(text)
100
+ corrected_text = []
101
+
102
+ for token in doc:
103
+ if token.pos_ == "NOUN":
104
+ if token.tag_ == "NN": # Singular noun
105
+ if any(child.text.lower() in ['many', 'several', 'few'] for child in token.head.children):
106
+ corrected_text.append(token.lemma_ + 's')
107
+ else:
108
+ corrected_text.append(token.text)
109
+ elif token.tag_ == "NNS": # Plural noun
110
+ if any(child.text.lower() in ['a', 'one'] for child in token.head.children):
111
+ corrected_text.append(token.lemma_)
112
+ else:
113
+ corrected_text.append(token.text)
114
+ else:
115
+ corrected_text.append(token.text)
116
+
117
+ return ' '.join(corrected_text)
118
+
119
+ # Function to check and correct article errors
120
+ def correct_article_errors(text):
121
+ doc = nlp(text)
122
+ corrected_text = []
123
+ for token in doc:
124
+ if token.text in ['a', 'an']:
125
+ next_token = token.nbor(1)
126
+ if token.text == "a" and next_token.text[0].lower() in "aeiou":
127
+ corrected_text.append("an")
128
+ elif token.text == "an" and next_token.text[0].lower() not in "aeiou":
129
+ corrected_text.append("a")
130
+ else:
131
+ corrected_text.append(token.text)
132
+ else:
133
+ corrected_text.append(token.text)
134
+ return ' '.join(corrected_text)
135
+
136
+ # Function to get the correct synonym while maintaining verb form
137
+ def replace_with_synonym(token):
138
+ pos = None
139
+ if token.pos_ == "VERB":
140
+ pos = wordnet.VERB
141
+ elif token.pos_ == "NOUN":
142
+ pos = wordnet.NOUN
143
+ elif token.pos_ == "ADJ":
144
+ pos = wordnet.ADJ
145
+ elif token.pos_ == "ADV":
146
+ pos = wordnet.ADV
147
+
148
+ synonyms = get_synonyms_nltk(token.lemma_, pos)
149
+
150
+ if synonyms:
151
+ synonym = synonyms[0]
152
+ if token.tag_ == "VBG": # Present participle (e.g., running)
153
+ synonym += 'ing'
154
+ elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle
155
+ synonym += 'ed'
156
+ elif token.tag_ == "VBZ": # Third-person singular present
157
+ synonym += 's'
158
+ return synonym
159
+ return token.text
160
+
161
+ # Function to check for and avoid double negatives
162
+ def correct_double_negatives(text):
163
+ doc = nlp(text)
164
+ corrected_text = []
165
+ for token in doc:
166
+ if token.text.lower() == "not" and any(child.text.lower() == "never" for child in token.head.children):
167
+ corrected_text.append("always")
168
+ else:
169
+ corrected_text.append(token.text)
170
+ return ' '.join(corrected_text)
171
+
172
+ # Function to ensure subject-verb agreement
173
+ def ensure_subject_verb_agreement(text):
174
+ doc = nlp(text)
175
+ corrected_text = []
176
+ for token in doc:
177
+ if token.dep_ == "nsubj" and token.head.pos_ == "VERB":
178
+ if token.tag_ == "NN" and token.head.tag_ != "VBZ": # Singular noun, should use singular verb
179
+ corrected_text.append(token.head.lemma_ + "s")
180
+ elif token.tag_ == "NNS" and token.head.tag_ == "VBZ": # Plural noun, should not use singular verb
181
+ corrected_text.append(token.head.lemma_)
182
+ corrected_text.append(token.text)
183
+ return ' '.join(corrected_text)
184
+
185
+ # Function to correct spelling errors
186
+ def correct_spelling(text):
187
+ words = text.split()
188
+ corrected_words = []
189
+ for word in words:
190
+ corrected_word = spell.correction(word)
191
+ if corrected_word is not None:
192
+ corrected_words.append(corrected_word)
193
+ else:
194
+ corrected_words.append(word) # Keep the original word if correction is None
195
+ return ' '.join(corrected_words)
196
+
197
+ # Function to rephrase text and replace words with their synonyms while maintaining form
198
+ def rephrase_with_synonyms(text):
199
+ doc = nlp(text)
200
+ rephrased_text = []
201
+
202
+ for token in doc:
203
+ pos_tag = None
204
+ if token.pos_ == "NOUN":
205
+ pos_tag = wordnet.NOUN
206
+ elif token.pos_ == "VERB":
207
+ pos_tag = wordnet.VERB
208
+ elif token.pos_ == "ADJ":
209
+ pos_tag = wordnet.ADJ
210
+ elif token.pos_ == "ADV":
211
+ pos_tag = wordnet.ADV
212
+
213
+ if pos_tag:
214
+ synonyms = get_synonyms_nltk(token.text, pos_tag)
215
+ if synonyms:
216
+ synonym = synonyms[0] # Just using the first synonym for simplicity
217
+ if token.pos_ == "VERB":
218
+ if token.tag_ == "VBG": # Present participle (e.g., running)
219
+ synonym += 'ing'
220
+ elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle
221
+ synonym += 'ed'
222
+ elif token.tag_ == "VBZ": # Third-person singular present
223
+ synonym += 's'
224
+ elif token.pos_ == "NOUN" and token.tag_ == "NNS": # Plural nouns
225
+ synonym += 's' if not synonym.endswith('s') else ""
226
+ rephrased_text.append(synonym)
227
+ else:
228
+ rephrased_text.append(token.text)
229
+ else:
230
+ rephrased_text.append(token.text)
231
+
232
+ return ' '.join(rephrased_text)
233
+
234
+ # Function to paraphrase and correct grammar with enhanced accuracy
235
+ def paraphrase_and_correct(text):
236
+ # Remove meaningless or redundant words first
237
+ cleaned_text = remove_redundant_words(text)
238
+
239
+ # Capitalize sentences and nouns
240
+ paraphrased_text = capitalize_sentences_and_nouns(cleaned_text)
241
+
242
+ # Ensure first letter of each sentence is capitalized and fix possessives
243
+ paraphrased_text = correct_possessives(paraphrased_text)
244
+
245
+ # Apply grammatical corrections
246
+ paraphrased_text = correct_article_errors(paraphrased_text)
247
+ paraphrased_text = correct_singular_plural_errors(paraphrased_text)
248
+ paraphrased_text = correct_tense_errors(paraphrased_text)
249
+ paraphrased_text = correct_double_negatives(paraphrased_text)
250
+ paraphrased_text = ensure_subject_verb_agreement(paraphrased_text)
251
+
252
+ # Rephrase with synonyms while maintaining grammatical forms
253
+ paraphrased_text = rephrase_with_synonyms(paraphrased_text)
254
+
255
+ # Correct spelling errors
256
+ paraphrased_text = correct_spelling(paraphrased_text)
257
+
258
+ return paraphrased_text
259
+
260
+ # Gradio app setup with two tabs
261
+ with gr.Blocks() as demo:
262
+ with gr.Tab("AI Detection"):
263
+ t1 = gr.Textbox(lines=5, label='Text')
264
+ button1 = gr.Button("🤖 Predict!")
265
+ label1 = gr.Textbox(lines=1, label='Predicted Label 🎃')
266
+ score1 = gr.Textbox(lines=1, label='Prob')
267
+
268
+ # Connect the prediction function to the button
269
+ button1.click(fn=predict_en, inputs=t1, outputs=[label1, score1])
270
+
271
+ with gr.Tab("Paraphrasing & Grammar Correction"):
272
+ t2 = gr.Textbox(lines=5, label='Enter text for paraphrasing and grammar correction')
273
+ button2 = gr.Button("🔄 Paraphrase and Correct")
274
+ result2 = gr.Textbox(lines=5, label='Corrected Text')
275
+
276
+ # Connect the paraphrasing and correction function to the button
277
+ button2.click(fn=paraphrase_and_correct, inputs=t2, outputs=result2)
278
+
279
+ demo.launch(share=True) # Share=True to create a public link