sashtech commited on
Commit
51568dc
·
verified ·
1 Parent(s): c163eb2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +123 -41
app.py CHANGED
@@ -1,68 +1,150 @@
1
  import os
2
  import gradio as gr
3
  import spacy
 
4
  import nltk
5
  from nltk.corpus import wordnet
6
- from nltk.stem import WordNetLemmatizer
7
- from collections import defaultdict
8
 
9
- # Ensure necessary NLTK data is downloaded
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  nltk.download('wordnet')
11
- nltk.download('averaged_perceptron_tagger')
12
- nltk.download('punkt')
13
 
14
- # Ensure the SpaCy model is installed for POS tagging
15
  try:
16
  nlp = spacy.load("en_core_web_sm")
17
  except OSError:
18
  subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
19
  nlp = spacy.load("en_core_web_sm")
20
 
21
- # Initialize lemmatizer
22
- lemmatizer = WordNetLemmatizer()
23
-
24
- # Helper function to map nltk POS tags to wordnet POS tags
25
- def get_wordnet_pos(treebank_tag):
26
- if treebank_tag.startswith('J'):
27
- return wordnet.ADJ
28
- elif treebank_tag.startswith('V'):
29
- return wordnet.VERB
30
- elif treebank_tag.startswith('N'):
31
- return wordnet.NOUN
32
- elif treebank_tag.startswith('R'):
33
- return wordnet.ADV
34
- else:
35
- return None
36
-
37
- # Function to correct tense, singular/plural, and verb forms
38
- def grammar_correction(text):
39
- words = nltk.word_tokenize(text)
40
- tagged = nltk.pos_tag(words)
41
-
42
  corrected_text = []
43
- for word, tag in tagged:
44
- wordnet_pos = get_wordnet_pos(tag) or wordnet.NOUN
45
- lemma = lemmatizer.lemmatize(word, pos=wordnet_pos)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
- # Apply basic rules for common errors
48
- if tag.startswith('VB') and word.lower() != lemma: # Verb tense correction
49
- corrected_text.append(lemma)
50
- elif tag.startswith('NNS') and word.lower() == lemma: # Singular/plural correction
51
- corrected_text.append(word + 's')
52
  else:
53
- corrected_text.append(word)
54
 
55
- return ' '.join(corrected_text)
 
 
 
 
 
 
56
 
57
- # Gradio app setup
 
 
 
 
 
 
 
 
 
 
58
  with gr.Blocks() as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  with gr.Tab("Grammar Correction"):
60
  grammar_input = gr.Textbox(lines=5, label="Input Text")
61
  grammar_button = gr.Button("Correct Grammar")
62
  grammar_output = gr.Textbox(label="Corrected Text")
63
 
64
- # Connect the grammar correction function to the button
65
- grammar_button.click(grammar_correction, inputs=grammar_input, outputs=grammar_output)
66
 
67
- # Launch the app
68
  demo.launch()
 
1
  import os
2
  import gradio as gr
3
  import spacy
4
+ import subprocess
5
  import nltk
6
  from nltk.corpus import wordnet
 
 
7
 
8
+ # Clone and install CorrectLy
9
+ def install_correctly():
10
+ if not os.path.exists('CorrectLy'):
11
+ print("Cloning CorrectLy repository...")
12
+ subprocess.run(["git", "clone", "https://github.com/rounakdatta/CorrectLy.git"], check=True)
13
+
14
+ # Install dependencies from CorrectLy
15
+ subprocess.run([sys.executable, "-m", "pip", "install", "-r", "CorrectLy/requirements.txt"], check=True)
16
+
17
+ # Add CorrectLy to Python path
18
+ sys.path.append(os.path.abspath('CorrectLy'))
19
+
20
+ # Install CorrectLy
21
+ install_correctly()
22
+
23
+ # Import CorrectLy after installation
24
+ from CorrectLy.correctly import CorrectLy
25
+
26
+ # Initialize CorrectLy for grammar correction
27
+ corrector = CorrectLy()
28
+
29
+ # Initialize the English text classification pipeline for AI detection
30
+ from transformers import pipeline
31
+ pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
32
+
33
+ # Function to predict the label and score for English text (AI Detection)
34
+ def predict_en(text):
35
+ res = pipeline_en(text)[0]
36
+ return res['label'], res['score']
37
+
38
+ # Ensure necessary NLTK data is downloaded for Humanifier
39
  nltk.download('wordnet')
40
+ nltk.download('omw-1.4')
 
41
 
42
+ # Ensure the SpaCy model is installed for Humanifier
43
  try:
44
  nlp = spacy.load("en_core_web_sm")
45
  except OSError:
46
  subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
47
  nlp = spacy.load("en_core_web_sm")
48
 
49
+ # Function to correct grammar using CorrectLy
50
+ def correct_grammar_with_correctly(text):
51
+ return corrector.correct(text)
52
+
53
+ # Function to get synonyms using NLTK WordNet (Humanifier)
54
+ def get_synonyms_nltk(word, pos):
55
+ synsets = wordnet.synsets(word, pos=pos)
56
+ if synsets:
57
+ lemmas = synsets[0].lemmas()
58
+ return [lemma.name() for lemma in lemmas]
59
+ return []
60
+
61
+ # Function to capitalize the first letter of sentences and proper nouns (Humanifier)
62
+ def capitalize_sentences_and_nouns(text):
63
+ doc = nlp(text)
 
 
 
 
 
 
64
  corrected_text = []
65
+
66
+ for sent in doc.sents:
67
+ sentence = []
68
+ for token in sent:
69
+ if token.i == sent.start: # First word of the sentence
70
+ sentence.append(token.text.capitalize())
71
+ elif token.pos_ == "PROPN": # Proper noun
72
+ sentence.append(token.text.capitalize())
73
+ else:
74
+ sentence.append(token.text)
75
+ corrected_text.append(' '.join(sentence))
76
+
77
+ return ' '.join(corrected_text)
78
+
79
+ # Paraphrasing function using SpaCy and NLTK (Humanifier)
80
+ def paraphrase_with_spacy_nltk(text):
81
+ doc = nlp(text)
82
+ paraphrased_words = []
83
+
84
+ for token in doc:
85
+ # Map SpaCy POS tags to WordNet POS tags
86
+ pos = None
87
+ if token.pos_ in {"NOUN"}:
88
+ pos = wordnet.NOUN
89
+ elif token.pos_ in {"VERB"}:
90
+ pos = wordnet.VERB
91
+ elif token.pos_ in {"ADJ"}:
92
+ pos = wordnet.ADJ
93
+ elif token.pos_ in {"ADV"}:
94
+ pos = wordnet.ADV
95
 
96
+ synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []
97
+
98
+ # Replace with a synonym only if it makes sense
99
+ if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"} and synonyms[0] != token.text.lower():
100
+ paraphrased_words.append(synonyms[0])
101
  else:
102
+ paraphrased_words.append(token.text)
103
 
104
+ # Join the words back into a sentence
105
+ paraphrased_sentence = ' '.join(paraphrased_words)
106
+
107
+ # Capitalize sentences and proper nouns
108
+ corrected_text = capitalize_sentences_and_nouns(paraphrased_sentence)
109
+
110
+ return corrected_text
111
 
112
+ # Combined function: Paraphrase -> Capitalization (Humanifier)
113
+ def paraphrase_and_correct(text):
114
+ # Step 1: Paraphrase the text
115
+ paraphrased_text = paraphrase_with_spacy_nltk(text)
116
+
117
+ # Step 2: Capitalize sentences and proper nouns
118
+ final_text = capitalize_sentences_and_nouns(paraphrased_text)
119
+
120
+ return final_text
121
+
122
+ # Gradio app setup with three tabs
123
  with gr.Blocks() as demo:
124
+ with gr.Tab("AI Detection"):
125
+ t1 = gr.Textbox(lines=5, label='Text')
126
+ button1 = gr.Button("🤖 Predict!")
127
+ label1 = gr.Textbox(lines=1, label='Predicted Label 🎃')
128
+ score1 = gr.Textbox(lines=1, label='Prob')
129
+
130
+ # Connect the prediction function to the button
131
+ button1.click(predict_en, inputs=[t1], outputs=[label1, score1], api_name='predict_en')
132
+
133
+ with gr.Tab("Humanifier"):
134
+ text_input = gr.Textbox(lines=5, label="Input Text")
135
+ paraphrase_button = gr.Button("Paraphrase & Correct")
136
+ output_text = gr.Textbox(label="Paraphrased Text")
137
+
138
+ # Connect the paraphrasing function to the button
139
+ paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)
140
+
141
  with gr.Tab("Grammar Correction"):
142
  grammar_input = gr.Textbox(lines=5, label="Input Text")
143
  grammar_button = gr.Button("Correct Grammar")
144
  grammar_output = gr.Textbox(label="Corrected Text")
145
 
146
+ # Connect the CorrectLy grammar correction function to the button
147
+ grammar_button.click(correct_grammar_with_correctly, inputs=grammar_input, outputs=grammar_output)
148
 
149
+ # Launch the app with all functionalities
150
  demo.launch()