sashtech commited on
Commit
e00f367
·
verified ·
1 Parent(s): f036c05

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +117 -26
app.py CHANGED
@@ -5,33 +5,124 @@ import spacy
5
  import subprocess
6
  import nltk
7
  from nltk.corpus import wordnet
 
8
 
9
- # Ensure necessary NLTK data is downloaded
 
 
 
 
 
 
 
 
10
  nltk.download('wordnet')
11
  nltk.download('omw-1.4')
12
 
13
- # Ensure the SpaCy model is installed
14
  try:
15
  nlp = spacy.load("en_core_web_sm")
16
  except OSError:
17
  subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
18
  nlp = spacy.load("en_core_web_sm")
19
 
20
- # Initialize the English text classification pipeline for AI detection
21
- pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
22
-
23
- # Function to predict the label and score for English text (AI Detection)
24
- def predict_en(text):
25
- res = pipeline_en(text)[0]
26
- return res['label'], res['score']
27
-
28
  # Function to get synonyms using NLTK WordNet (Humanifier)
29
  def get_synonyms_nltk(word, pos):
30
  synsets = wordnet.synsets(word, pos=pos)
31
- if synsets:
32
- lemmas = synsets[0].lemmas()
33
- return [lemma.name() for lemma in lemmas]
34
- return []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  # Paraphrasing function using SpaCy and NLTK (Humanifier)
37
  def paraphrase_with_spacy_nltk(text):
@@ -41,37 +132,37 @@ def paraphrase_with_spacy_nltk(text):
41
  for token in doc:
42
  # Map SpaCy POS tags to WordNet POS tags
43
  pos = None
44
- if token.pos_ in {"NOUN"}:
45
  pos = wordnet.NOUN
46
- elif token.pos_ in {"VERB"}:
47
  pos = wordnet.VERB
48
- elif token.pos_ in {"ADJ"}:
49
  pos = wordnet.ADJ
50
- elif token.pos_ in {"ADV"}:
51
  pos = wordnet.ADV
52
 
53
  synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []
54
 
55
  # Replace with a synonym only if it makes sense
56
- if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"} and synonyms[0] != token.text.lower():
57
  paraphrased_words.append(synonyms[0])
58
  else:
59
  paraphrased_words.append(token.text)
60
 
61
- # Join the words back into a sentence
62
- paraphrased_sentence = ' '.join(paraphrased_words)
63
-
64
- return paraphrased_sentence
65
 
66
  # Combined function: Paraphrase -> Grammar Correction -> Capitalization (Humanifier)
67
  def paraphrase_and_correct(text):
68
  # Step 1: Paraphrase the text
69
  paraphrased_text = paraphrase_with_spacy_nltk(text)
70
 
71
- # Additional steps (grammar correction, capitalization) can go here...
72
- # For now, we'll return the paraphrased text as an example.
 
 
 
73
 
74
- return paraphrased_text
75
 
76
  # Gradio app setup with two tabs
77
  with gr.Blocks() as demo:
 
5
  import subprocess
6
  import nltk
7
  from nltk.corpus import wordnet
8
+ from collections import defaultdict
9
 
10
+ # Initialize the English text classification pipeline for AI detection
11
+ pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
12
+
13
+ # Function to predict the label and score for English text (AI Detection)
14
+ def predict_en(text):
15
+ res = pipeline_en(text)[0]
16
+ return res['label'], res['score']
17
+
18
+ # Ensure necessary NLTK data is downloaded for Humanifier
19
  nltk.download('wordnet')
20
  nltk.download('omw-1.4')
21
 
22
+ # Ensure the SpaCy model is installed for Humanifier
23
  try:
24
  nlp = spacy.load("en_core_web_sm")
25
  except OSError:
26
  subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
27
  nlp = spacy.load("en_core_web_sm")
28
 
 
 
 
 
 
 
 
 
29
  # Function to get synonyms using NLTK WordNet (Humanifier)
30
  def get_synonyms_nltk(word, pos):
31
  synsets = wordnet.synsets(word, pos=pos)
32
+ synonyms = set()
33
+ for synset in synsets:
34
+ for lemma in synset.lemmas():
35
+ if lemma.name() != word:
36
+ synonyms.add(lemma.name())
37
+ return list(synonyms)
38
+
39
+ # Function to capitalize the first letter of sentences and proper nouns (Humanifier)
40
+ def capitalize_sentences_and_nouns(text):
41
+ doc = nlp(text)
42
+ corrected_text = []
43
+
44
+ for sent in doc.sents:
45
+ sentence = []
46
+ for token in sent:
47
+ if token.i == sent.start: # First word of the sentence
48
+ sentence.append(token.text.capitalize())
49
+ elif token.pos_ == "PROPN": # Proper noun
50
+ sentence.append(token.text.capitalize())
51
+ else:
52
+ sentence.append(token.text)
53
+ corrected_text.append(' '.join(sentence))
54
+
55
+ return ' '.join(corrected_text)
56
+
57
+ # Function to correct tense errors in a sentence (Tense Correction)
58
+ def correct_tense_errors(text):
59
+ doc = nlp(text)
60
+ corrected_text = []
61
+
62
+ for token in doc:
63
+ if token.pos_ == "VERB":
64
+ # Check if verb is in its base form
65
+ if token.tag_ == "VB" and token.text.lower() not in ["be", "have", "do"]:
66
+ # Attempt to correct verb form based on sentence context
67
+ context = " ".join([t.text for t in doc if t.i != token.i])
68
+ corrected_text.append(token.lemma_)
69
+ else:
70
+ corrected_text.append(token.text)
71
+ else:
72
+ corrected_text.append(token.text)
73
+
74
+ return ' '.join(corrected_text)
75
+
76
+ # Function to correct singular/plural errors (Singular/Plural Correction)
77
+ def correct_singular_plural_errors(text):
78
+ doc = nlp(text)
79
+ corrected_text = []
80
+
81
+ # Create a context dictionary for singular/plural determination
82
+ context = defaultdict(int)
83
+ for token in doc:
84
+ if token.pos_ == "NOUN":
85
+ # Track context for noun usage
86
+ if token.tag_ == "NNS":
87
+ context['plural'] += 1
88
+ elif token.tag_ == "NN":
89
+ context['singular'] += 1
90
+
91
+ for token in doc:
92
+ if token.pos_ == "NOUN":
93
+ if token.tag_ == "NN": # Singular noun
94
+ if context['plural'] > context['singular']:
95
+ corrected_text.append(token.lemma_ + 's')
96
+ else:
97
+ corrected_text.append(token.text)
98
+ elif token.tag_ == "NNS": # Plural noun
99
+ if context['singular'] > context['plural']:
100
+ corrected_text.append(token.lemma_)
101
+ else:
102
+ corrected_text.append(token.text)
103
+ else:
104
+ corrected_text.append(token.text)
105
+ else:
106
+ corrected_text.append(token.text)
107
+
108
+ return ' '.join(corrected_text)
109
+
110
+ # Function to check and correct article errors
111
+ def correct_article_errors(text):
112
+ doc = nlp(text)
113
+ corrected_text = []
114
+ for token in doc:
115
+ if token.text in ['a', 'an']:
116
+ next_token = token.nbor(1)
117
+ if token.text == "a" and next_token.text[0].lower() in "aeiou":
118
+ corrected_text.append("an")
119
+ elif token.text == "an" and next_token.text[0].lower() not in "aeiou":
120
+ corrected_text.append("a")
121
+ else:
122
+ corrected_text.append(token.text)
123
+ else:
124
+ corrected_text.append(token.text)
125
+ return ' '.join(corrected_text)
126
 
127
  # Paraphrasing function using SpaCy and NLTK (Humanifier)
128
  def paraphrase_with_spacy_nltk(text):
 
132
  for token in doc:
133
  # Map SpaCy POS tags to WordNet POS tags
134
  pos = None
135
+ if token.pos_ == "NOUN":
136
  pos = wordnet.NOUN
137
+ elif token.pos_ == "VERB":
138
  pos = wordnet.VERB
139
+ elif token.pos_ == "ADJ":
140
  pos = wordnet.ADJ
141
+ elif token.pos_ == "ADV":
142
  pos = wordnet.ADV
143
 
144
  synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []
145
 
146
  # Replace with a synonym only if it makes sense
147
+ if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"}:
148
  paraphrased_words.append(synonyms[0])
149
  else:
150
  paraphrased_words.append(token.text)
151
 
152
+ return ' '.join(paraphrased_words)
 
 
 
153
 
154
  # Combined function: Paraphrase -> Grammar Correction -> Capitalization (Humanifier)
155
  def paraphrase_and_correct(text):
156
  # Step 1: Paraphrase the text
157
  paraphrased_text = paraphrase_with_spacy_nltk(text)
158
 
159
+ # Step 2: Apply grammatical corrections on the paraphrased text
160
+ corrected_text = correct_article_errors(paraphrased_text)
161
+ corrected_text = capitalize_sentences_and_nouns(corrected_text)
162
+ corrected_text = correct_singular_plural_errors(corrected_text)
163
+ final_text = correct_tense_errors(corrected_text)
164
 
165
+ return final_text
166
 
167
  # Gradio app setup with two tabs
168
  with gr.Blocks() as demo: