sashtech commited on
Commit
7feda08
·
verified ·
1 Parent(s): 6b18ba5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -40
app.py CHANGED
@@ -1,16 +1,25 @@
1
  # Import dependencies
2
  import gradio as gr
3
- from transformers import AutoTokenizer, AutoModelForSequenceClassification, T5Tokenizer, T5ForConditionalGeneration
4
  import torch
5
  import nltk
6
  from nltk.corpus import wordnet
7
- import subprocess
 
 
 
8
 
9
  # Download NLTK data (if not already downloaded)
10
  nltk.download('punkt')
11
  nltk.download('stopwords')
12
  nltk.download('wordnet') # Download WordNet
13
 
 
 
 
 
 
 
14
  # Check for GPU and set the device accordingly
15
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
 
@@ -18,10 +27,6 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
  tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
19
  model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)
20
 
21
- # Load Parrot Paraphraser model and tokenizer for humanizing text
22
- paraphrase_tokenizer = T5Tokenizer.from_pretrained("prithivida/parrot_paraphraser_on_T5")
23
- paraphrase_model = T5ForConditionalGeneration.from_pretrained("prithivida/parrot_paraphraser_on_T5").to(device)
24
-
25
  # AI detection function using DistilBERT
26
  def detect_ai_generated(text):
27
  inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(device)
@@ -31,43 +36,52 @@ def detect_ai_generated(text):
31
  ai_probability = probabilities[0][1].item() # Probability of being AI-generated
32
  return f"AI-Generated Content Probability: {ai_probability:.2f}%"
33
 
34
- # Humanize the AI-detected text using the Parrot Paraphraser model
35
- def humanize_text(AI_text):
36
- inputs = paraphrase_tokenizer(AI_text, return_tensors="pt", max_length=512, truncation=True).to(device)
37
- with torch.no_grad(): # Avoid gradient calculations for faster inference
38
- paraphrased_ids = paraphrase_model.generate(
39
- inputs['input_ids'],
40
- max_length=inputs['input_ids'].shape[-1] + 20, # Slightly more than the original input length
41
- num_beams=4,
42
- early_stopping=True,
43
- length_penalty=1.0,
44
- no_repeat_ngram_size=3,
45
- )
46
- paraphrased_text = paraphrase_tokenizer.decode(paraphrased_ids[0], skip_special_tokens=True)
47
- return f"Humanized Text:\n{paraphrased_text}"
48
 
49
- # Gradio interface definition
50
- ai_detection_interface = gr.Interface(
51
- fn=detect_ai_generated,
52
- inputs="textbox",
53
- outputs="text",
54
- title="AI Text Detection",
55
- description="Enter text to determine the probability of it being AI-generated."
56
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
- humanization_interface = gr.Interface(
59
- fn=humanize_text,
60
- inputs="textbox",
61
- outputs="text",
62
- title="Text Humanizer",
63
- description="Enter text to get a human-written version, paraphrased for natural output."
64
- )
 
 
 
65
 
66
- # Combine both interfaces into a single Gradio app with tabs
67
- interface = gr.TabbedInterface(
68
- [ai_detection_interface, humanization_interface],
69
- ["AI Detection", "Humanization"]
70
- )
71
 
72
  # Launch the Gradio app
73
  interface.launch(debug=False)
 
1
  # Import dependencies
2
  import gradio as gr
3
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
  import torch
5
  import nltk
6
  from nltk.corpus import wordnet
7
+ import spacy
8
+ from gensim.models import KeyedVectors
9
+ from gensim import downloader as api
10
+ from nltk.tokenize import word_tokenize
11
 
12
  # Download NLTK data (if not already downloaded)
13
  nltk.download('punkt')
14
  nltk.download('stopwords')
15
  nltk.download('wordnet') # Download WordNet
16
 
17
+ # Load spaCy model
18
+ nlp = spacy.load("en_core_web_sm")
19
+
20
+ # Load a smaller Word2Vec model from Gensim's pre-trained models
21
+ word_vectors = api.load("glove-wiki-gigaword-50")
22
+
23
  # Check for GPU and set the device accordingly
24
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
25
 
 
27
  tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
28
  model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)
29
 
 
 
 
 
30
  # AI detection function using DistilBERT
31
  def detect_ai_generated(text):
32
  inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(device)
 
36
  ai_probability = probabilities[0][1].item() # Probability of being AI-generated
37
  return f"AI-Generated Content Probability: {ai_probability:.2f}%"
38
 
39
+ # Function to get synonyms using Gensim Word2Vec
40
+ def get_synonyms_gensim(word):
41
+ try:
42
+ synonyms = word_vectors.most_similar(positive=[word], topn=5)
43
+ return [synonym[0] for synonym in synonyms]
44
+ except KeyError:
45
+ return []
 
 
 
 
 
 
 
46
 
47
+ # Paraphrasing function using Gensim for synonym replacement
48
+ def paraphrase_with_gensim(text):
49
+ words = word_tokenize(text)
50
+ paraphrased_words = []
51
+ for word in words:
52
+ synonyms = get_synonyms_gensim(word.lower())
53
+ if synonyms:
54
+ paraphrased_words.append(synonyms[0])
55
+ else:
56
+ paraphrased_words.append(word)
57
+ return ' '.join(paraphrased_words)
58
+
59
+ # Paraphrasing function using spaCy for synonym replacement
60
+ def paraphrase_with_spacy(text):
61
+ doc = nlp(text)
62
+ paraphrased_words = []
63
+ for token in doc:
64
+ synonyms = get_synonyms_gensim(token.text.lower())
65
+ if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"}: # Only replace certain types of words
66
+ paraphrased_words.append(synonyms[0])
67
+ else:
68
+ paraphrased_words.append(token.text)
69
+ return ' '.join(paraphrased_words)
70
 
71
+ # Gradio interface definition
72
+ with gr.Blocks() as interface:
73
+ with gr.Row():
74
+ with gr.Column():
75
+ text_input = gr.Textbox(lines=5, label="Input Text")
76
+ detect_button = gr.Button("AI Detection")
77
+ paraphrase_gensim_button = gr.Button("Paraphrase with Gensim")
78
+ paraphrase_spacy_button = gr.Button("Paraphrase with spaCy")
79
+ with gr.Column():
80
+ output_text = gr.Textbox(label="Output")
81
 
82
+ detect_button.click(detect_ai_generated, inputs=text_input, outputs=output_text)
83
+ paraphrase_gensim_button.click(paraphrase_with_gensim, inputs=text_input, outputs=output_text)
84
+ paraphrase_spacy_button.click(paraphrase_with_spacy, inputs=text_input, outputs=output_text)
 
 
85
 
86
  # Launch the Gradio app
87
  interface.launch(debug=False)