sashtech commited on
Commit
c7c1d09
·
verified ·
1 Parent(s): fdbab88

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -39
app.py CHANGED
@@ -17,12 +17,10 @@ nltk.download('punkt')
17
  nltk.download('stopwords')
18
  nltk.download('averaged_perceptron_tagger')
19
  nltk.download('averaged_perceptron_tagger_eng')
20
-
21
  nltk.download('wordnet')
22
  nltk.download('omw-1.4')
23
  nltk.download('punkt_tab')
24
 
25
-
26
  # Initialize stopwords
27
  stop_words = set(stopwords.words("english"))
28
 
@@ -45,7 +43,6 @@ except OSError:
45
 
46
  def plagiarism_removal(text):
47
  def plagiarism_remover(word):
48
- # Handle stopwords, punctuation, and excluded words
49
  if word.lower() in stop_words or word.lower() in exclude_words or word in string.punctuation:
50
  return word
51
 
@@ -53,60 +50,48 @@ def plagiarism_removal(text):
53
  synonyms = set()
54
  for syn in wordnet.synsets(word):
55
  for lemma in syn.lemmas():
56
- # Exclude overly technical synonyms or words with underscores
57
  if "_" not in lemma.name() and lemma.name().isalpha() and lemma.name().lower() != word.lower():
58
  synonyms.add(lemma.name())
59
 
60
- # Get part of speech for word and filter synonyms with the same POS
61
  pos_tag_word = nltk.pos_tag([word])[0]
62
-
63
- # Avoid replacing certain parts of speech
64
  if pos_tag_word[1] in exclude_tags:
65
  return word
66
 
67
  filtered_synonyms = [syn for syn in synonyms if nltk.pos_tag([syn])[0][1] == pos_tag_word[1]]
68
 
69
- # Return original word if no appropriate synonyms found
70
  if not filtered_synonyms:
71
  return word
72
 
73
- # Select a random synonym from the filtered list
74
  synonym_choice = random.choice(filtered_synonyms)
75
 
76
- # Retain original capitalization
77
  if word.istitle():
78
  return synonym_choice.title()
79
  return synonym_choice
80
 
81
- # Tokenize, replace words, and join them back
82
  para_split = word_tokenize(text)
83
  final_text = [plagiarism_remover(word) for word in para_split]
84
 
85
- # Handle spacing around punctuation correctly
86
  corrected_text = []
87
  for i in range(len(final_text)):
88
  if final_text[i] in string.punctuation and i > 0:
89
- corrected_text[-1] += final_text[i] # Append punctuation to previous word
90
  else:
91
  corrected_text.append(final_text[i])
92
 
93
  return " ".join(corrected_text)
94
 
95
- # Function to predict the label and score for English text (AI Detection)
96
  def predict_en(text):
97
  res = pipeline_en(text)[0]
98
  return res['label'], res['score']
99
 
100
- # Function to remove redundant and meaningless words
101
  def remove_redundant_words(text):
102
  doc = nlp(text)
103
  meaningless_words = {"actually", "basically", "literally", "really", "very", "just"}
104
  filtered_text = [token.text for token in doc if token.text.lower() not in meaningless_words]
105
  return ' '.join(filtered_text)
106
 
107
- # Function to fix spacing before punctuation
108
  def fix_punctuation_spacing(text):
109
- # Split the text into words and punctuation
110
  words = text.split(' ')
111
  cleaned_words = []
112
  punctuation_marks = {',', '.', "'", '!', '?', ':'}
@@ -120,12 +105,10 @@ def fix_punctuation_spacing(text):
120
  return ' '.join(cleaned_words).replace(' ,', ',').replace(' .', '.').replace(" '", "'") \
121
  .replace(' !', '!').replace(' ?', '?').replace(' :', ':')
122
 
123
- # Function to fix possessives like "Earth's"
124
  def fix_possessives(text):
125
  text = re.sub(r'(\w)\s\'\s?s', r"\1's", text)
126
  return text
127
 
128
- # Function to capitalize the first letter of sentences and proper nouns
129
  def capitalize_sentences_and_nouns(text):
130
  doc = nlp(text)
131
  corrected_text = []
@@ -143,7 +126,6 @@ def capitalize_sentences_and_nouns(text):
143
 
144
  return ' '.join(corrected_text)
145
 
146
- # Function to force capitalization of the first letter of every sentence and ensure full stops
147
  def force_first_letter_capital(text):
148
  sentences = re.split(r'(?<=\w[.!?])\s+', text)
149
  capitalized_sentences = []
@@ -157,7 +139,6 @@ def force_first_letter_capital(text):
157
 
158
  return " ".join(capitalized_sentences)
159
 
160
- # Function to correct tense errors in a sentence
161
  def correct_tense_errors(text):
162
  doc = nlp(text)
163
  corrected_text = []
@@ -169,7 +150,6 @@ def correct_tense_errors(text):
169
  corrected_text.append(token.text)
170
  return ' '.join(corrected_text)
171
 
172
- # Function to check and correct article errors
173
  def correct_article_errors(text):
174
  doc = nlp(text)
175
  corrected_text = []
@@ -186,7 +166,6 @@ def correct_article_errors(text):
186
  corrected_text.append(token.text)
187
  return ' '.join(corrected_text)
188
 
189
- # Function to ensure subject-verb agreement
190
  def ensure_subject_verb_agreement(text):
191
  doc = nlp(text)
192
  corrected_text = []
@@ -199,7 +178,6 @@ def ensure_subject_verb_agreement(text):
199
  corrected_text.append(token.text)
200
  return ' '.join(corrected_text)
201
 
202
- # Function to correct spelling errors
203
  def correct_spelling(text):
204
  words = text.split()
205
  corrected_words = []
@@ -211,21 +189,25 @@ def correct_spelling(text):
211
  corrected_words.append(word)
212
  return ' '.join(corrected_words)
213
 
214
- # Main function for paraphrasing and grammar correction
215
  def paraphrase_and_correct(text):
216
- # Add synonym replacement here
217
- cleaned_text = remove_redundant_words(text)
218
- plag_removed = plagiarism_removal(cleaned_text)
219
- paraphrased_text = capitalize_sentences_and_nouns(plag_removed)
220
- paraphrased_text = force_first_letter_capital(paraphrased_text)
221
- paraphrased_text = correct_article_errors(paraphrased_text)
222
- paraphrased_text = correct_tense_errors(paraphrased_text)
223
- paraphrased_text = ensure_subject_verb_agreement(paraphrased_text)
224
- paraphrased_text = fix_possessives(paraphrased_text)
225
- paraphrased_text = correct_spelling(paraphrased_text)
226
- paraphrased_text = fix_punctuation_spacing(paraphrased_text)
227
-
228
- return paraphrased_text
 
 
 
 
 
229
 
230
  # Gradio app setup
231
  with gr.Blocks() as demo:
@@ -244,4 +226,4 @@ with gr.Blocks() as demo:
244
 
245
  button2.click(fn=paraphrase_and_correct, inputs=t2, outputs=result2)
246
 
247
- demo.launch(share=True)
 
17
  nltk.download('stopwords')
18
  nltk.download('averaged_perceptron_tagger')
19
  nltk.download('averaged_perceptron_tagger_eng')
 
20
  nltk.download('wordnet')
21
  nltk.download('omw-1.4')
22
  nltk.download('punkt_tab')
23
 
 
24
  # Initialize stopwords
25
  stop_words = set(stopwords.words("english"))
26
 
 
43
 
44
  def plagiarism_removal(text):
45
  def plagiarism_remover(word):
 
46
  if word.lower() in stop_words or word.lower() in exclude_words or word in string.punctuation:
47
  return word
48
 
 
50
  synonyms = set()
51
  for syn in wordnet.synsets(word):
52
  for lemma in syn.lemmas():
 
53
  if "_" not in lemma.name() and lemma.name().isalpha() and lemma.name().lower() != word.lower():
54
  synonyms.add(lemma.name())
55
 
 
56
  pos_tag_word = nltk.pos_tag([word])[0]
57
+
 
58
  if pos_tag_word[1] in exclude_tags:
59
  return word
60
 
61
  filtered_synonyms = [syn for syn in synonyms if nltk.pos_tag([syn])[0][1] == pos_tag_word[1]]
62
 
 
63
  if not filtered_synonyms:
64
  return word
65
 
 
66
  synonym_choice = random.choice(filtered_synonyms)
67
 
 
68
  if word.istitle():
69
  return synonym_choice.title()
70
  return synonym_choice
71
 
 
72
  para_split = word_tokenize(text)
73
  final_text = [plagiarism_remover(word) for word in para_split]
74
 
 
75
  corrected_text = []
76
  for i in range(len(final_text)):
77
  if final_text[i] in string.punctuation and i > 0:
78
+ corrected_text[-1] += final_text[i]
79
  else:
80
  corrected_text.append(final_text[i])
81
 
82
  return " ".join(corrected_text)
83
 
 
84
  def predict_en(text):
85
  res = pipeline_en(text)[0]
86
  return res['label'], res['score']
87
 
 
88
  def remove_redundant_words(text):
89
  doc = nlp(text)
90
  meaningless_words = {"actually", "basically", "literally", "really", "very", "just"}
91
  filtered_text = [token.text for token in doc if token.text.lower() not in meaningless_words]
92
  return ' '.join(filtered_text)
93
 
 
94
  def fix_punctuation_spacing(text):
 
95
  words = text.split(' ')
96
  cleaned_words = []
97
  punctuation_marks = {',', '.', "'", '!', '?', ':'}
 
105
  return ' '.join(cleaned_words).replace(' ,', ',').replace(' .', '.').replace(" '", "'") \
106
  .replace(' !', '!').replace(' ?', '?').replace(' :', ':')
107
 
 
108
  def fix_possessives(text):
109
  text = re.sub(r'(\w)\s\'\s?s', r"\1's", text)
110
  return text
111
 
 
112
  def capitalize_sentences_and_nouns(text):
113
  doc = nlp(text)
114
  corrected_text = []
 
126
 
127
  return ' '.join(corrected_text)
128
 
 
129
  def force_first_letter_capital(text):
130
  sentences = re.split(r'(?<=\w[.!?])\s+', text)
131
  capitalized_sentences = []
 
139
 
140
  return " ".join(capitalized_sentences)
141
 
 
142
  def correct_tense_errors(text):
143
  doc = nlp(text)
144
  corrected_text = []
 
150
  corrected_text.append(token.text)
151
  return ' '.join(corrected_text)
152
 
 
153
  def correct_article_errors(text):
154
  doc = nlp(text)
155
  corrected_text = []
 
166
  corrected_text.append(token.text)
167
  return ' '.join(corrected_text)
168
 
 
169
  def ensure_subject_verb_agreement(text):
170
  doc = nlp(text)
171
  corrected_text = []
 
178
  corrected_text.append(token.text)
179
  return ' '.join(corrected_text)
180
 
 
181
  def correct_spelling(text):
182
  words = text.split()
183
  corrected_words = []
 
189
  corrected_words.append(word)
190
  return ' '.join(corrected_words)
191
 
 
192
  def paraphrase_and_correct(text):
193
+ paragraphs = text.split("\n\n") # Split by paragraphs
194
+
195
+ # Process each paragraph separately
196
+ processed_paragraphs = []
197
+ for paragraph in paragraphs:
198
+ cleaned_text = remove_redundant_words(paragraph)
199
+ plag_removed = plagiarism_removal(cleaned_text)
200
+ paraphrased_text = capitalize_sentences_and_nouns(plag_removed)
201
+ paraphrased_text = force_first_letter_capital(paraphrased_text)
202
+ paraphrased_text = correct_article_errors(paraphrased_text)
203
+ paraphrased_text = correct_tense_errors(paraphrased_text)
204
+ paraphrased_text = ensure_subject_verb_agreement(paraphrased_text)
205
+ paraphrased_text = fix_possessives(paraphrased_text)
206
+ paraphrased_text = correct_spelling(paraphrased_text)
207
+ paraphrased_text = fix_punctuation_spacing(paraphrased_text)
208
+ processed_paragraphs.append(paraphrased_text)
209
+
210
+ return "\n\n".join(processed_paragraphs) # Reassemble the text with paragraphs
211
 
212
  # Gradio app setup
213
  with gr.Blocks() as demo:
 
226
 
227
  button2.click(fn=paraphrase_and_correct, inputs=t2, outputs=result2)
228
 
229
+ demo.launch(share=True)