sashtech commited on
Commit
c163eb2
·
verified ·
1 Parent(s): 0eb1430

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -71
app.py CHANGED
@@ -1,92 +1,68 @@
1
  import os
2
- import subprocess
3
- import sys
4
  import gradio as gr
5
- from transformers import pipeline
6
  import spacy
7
  import nltk
8
  from nltk.corpus import wordnet
 
 
9
 
10
- # Function to install GECToR
11
- def install_gector():
12
- if not os.path.exists('gector'):
13
- print("Cloning GECToR repository...")
14
- subprocess.run(["git", "clone", "https://github.com/grammarly/gector.git"], check=True)
15
-
16
- # Install dependencies from GECToR requirements
17
- subprocess.run([sys.executable, "-m", "pip", "install", "-r", "gector/requirements.txt"], check=True)
18
-
19
- # Manually add GECToR to the Python path
20
- sys.path.append(os.path.abspath('gector'))
21
-
22
- # Install and import GECToR
23
- install_gector()
24
-
25
- # Ensure the gector module path is added correctly
26
- sys.path.insert(0, os.path.abspath('./gector'))
27
-
28
- # Import GECToR after installation
29
- from gector.gec_model import GecBERTModel
30
-
31
- # Initialize GECToR model for grammar correction
32
- gector_model = GecBERTModel(vocab_path='gector/data/output_vocabulary',
33
- model_paths=['https://grammarly-nlp-data.s3.amazonaws.com/gector/roberta_1_gector.th'],
34
- is_ensemble=False)
35
-
36
- # Initialize the English text classification pipeline for AI detection
37
- pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
38
-
39
- # Function to predict the label and score for English text (AI Detection)
40
- def predict_en(text):
41
- res = pipeline_en(text)[0]
42
- return res['label'], res['score']
43
-
44
- # Ensure necessary NLTK data is downloaded for Humanifier
45
  nltk.download('wordnet')
46
- nltk.download('omw-1.4')
 
47
 
48
- # Ensure the SpaCy model is installed for Humanifier
49
  try:
50
  nlp = spacy.load("en_core_web_sm")
51
  except OSError:
52
- subprocess.run([sys.executable, "-m", "spacy", "download", "en_core_web_sm"])
53
  nlp = spacy.load("en_core_web_sm")
54
 
55
- # Function to correct grammar using GECToR
56
- def correct_grammar_with_gector(text):
57
- corrected_sentences = []
58
- sentences = [text]
59
- for sentence in sentences:
60
- preds = gector_model.handle_batch([sentence])
61
- corrected_sentences.append(preds[0])
62
- return ' '.join(corrected_sentences)
63
-
64
- # Gradio app setup with three tabs
65
- with gr.Blocks() as demo:
66
- with gr.Tab("AI Detection"):
67
- t1 = gr.Textbox(lines=5, label='Text')
68
- button1 = gr.Button("🤖 Predict!")
69
- label1 = gr.Textbox(lines=1, label='Predicted Label 🎃')
70
- score1 = gr.Textbox(lines=1, label='Prob')
71
-
72
- # Connect the prediction function to the button
73
- button1.click(predict_en, inputs=[t1], outputs=[label1, score1], api_name='predict_en')
 
74
 
75
- with gr.Tab("Humanifier"):
76
- text_input = gr.Textbox(lines=5, label="Input Text")
77
- paraphrase_button = gr.Button("Paraphrase & Correct")
78
- output_text = gr.Textbox(label="Paraphrased Text")
79
-
80
- # Connect the paraphrasing function to the button
81
- paraphrase_button.click(correct_grammar_with_gector, inputs=text_input, outputs=output_text)
 
 
 
 
 
82
 
 
 
 
 
83
  with gr.Tab("Grammar Correction"):
84
  grammar_input = gr.Textbox(lines=5, label="Input Text")
85
  grammar_button = gr.Button("Correct Grammar")
86
  grammar_output = gr.Textbox(label="Corrected Text")
87
 
88
- # Connect the GECToR grammar correction function to the button
89
- grammar_button.click(correct_grammar_with_gector, inputs=grammar_input, outputs=grammar_output)
90
 
91
- # Launch the app with all functionalities
92
- demo.launch()
 
1
  import os
 
 
2
  import gradio as gr
 
3
  import spacy
4
  import nltk
5
  from nltk.corpus import wordnet
6
+ from nltk.stem import WordNetLemmatizer
7
+ from collections import defaultdict
8
 
9
+ # Ensure necessary NLTK data is downloaded
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  nltk.download('wordnet')
11
+ nltk.download('averaged_perceptron_tagger')
12
+ nltk.download('punkt')
13
 
14
+ # Ensure the SpaCy model is installed for POS tagging
15
  try:
16
  nlp = spacy.load("en_core_web_sm")
17
  except OSError:
18
+ subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
19
  nlp = spacy.load("en_core_web_sm")
20
 
21
+ # Initialize lemmatizer
22
+ lemmatizer = WordNetLemmatizer()
23
+
24
+ # Helper function to map nltk POS tags to wordnet POS tags
25
+ def get_wordnet_pos(treebank_tag):
26
+ if treebank_tag.startswith('J'):
27
+ return wordnet.ADJ
28
+ elif treebank_tag.startswith('V'):
29
+ return wordnet.VERB
30
+ elif treebank_tag.startswith('N'):
31
+ return wordnet.NOUN
32
+ elif treebank_tag.startswith('R'):
33
+ return wordnet.ADV
34
+ else:
35
+ return None
36
+
37
+ # Function to correct tense, singular/plural, and verb forms
38
+ def grammar_correction(text):
39
+ words = nltk.word_tokenize(text)
40
+ tagged = nltk.pos_tag(words)
41
 
42
+ corrected_text = []
43
+ for word, tag in tagged:
44
+ wordnet_pos = get_wordnet_pos(tag) or wordnet.NOUN
45
+ lemma = lemmatizer.lemmatize(word, pos=wordnet_pos)
46
+
47
+ # Apply basic rules for common errors
48
+ if tag.startswith('VB') and word.lower() != lemma: # Verb tense correction
49
+ corrected_text.append(lemma)
50
+ elif tag.startswith('NNS') and word.lower() == lemma: # Singular/plural correction
51
+ corrected_text.append(word + 's')
52
+ else:
53
+ corrected_text.append(word)
54
 
55
+ return ' '.join(corrected_text)
56
+
57
+ # Gradio app setup
58
+ with gr.Blocks() as demo:
59
  with gr.Tab("Grammar Correction"):
60
  grammar_input = gr.Textbox(lines=5, label="Input Text")
61
  grammar_button = gr.Button("Correct Grammar")
62
  grammar_output = gr.Textbox(label="Corrected Text")
63
 
64
+ # Connect the grammar correction function to the button
65
+ grammar_button.click(grammar_correction, inputs=grammar_input, outputs=grammar_output)
66
 
67
+ # Launch the app
68
+ demo.launch()