Spaces:

Zeamays3427
/

fake-news-debunker

Running

App Files Files Community

Zeamays3427 commited on Oct 16, 2024

Commit

c8e5a0b

verified ·

1 Parent(s): e96ec01

Update app.py

Browse files

Files changed (1) hide show

app.py +166 -164

app.py CHANGED Viewed

@@ -1,164 +1,166 @@
-import gradio as gr
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-import torch
-import openai
-import os
-import spacy
-import subprocess
-import sys
-import pandas as pd
-from sklearn.feature_extraction.text import TfidfVectorizer
-# Set OpenAI API key from environment variables
-openai.api_key = os.getenv("OPENAI_API_KEY")
-# Load the tokenizer and the pretrained classification model
-tokenizer = AutoTokenizer.from_pretrained("hamzab/roberta-fake-news-classification")
-model = AutoModelForSequenceClassification.from_pretrained("hamzab/roberta-fake-news-classification")
-# Load spaCy model for keyword extraction
-try:
-    nlp = spacy.load('en_core_web_sm')
-except:
-    # If spaCy model is not available, download it
-    subprocess.run([sys.executable, "-m", "spacy", "download", "en_core_web_sm"])
-    nlp = spacy.load('en_core_web_sm')
-# Load the WELFake dataset and extract top 500 TF-IDF keywords
-def load_data():
-    # Load WELFake dataset from CSV file
-    wel_fake_data = pd.read_csv('WELFake_Dataset.csv')
-    wel_fake_data.dropna(subset=['text'], inplace=True)  # Remove rows with missing 'text'
-    # Create a TF-IDF vectorizer and fit it on the dataset's text column
-    vectorizer = TfidfVectorizer(max_features=500, stop_words='english')
-    X = vectorizer.fit_transform(wel_fake_data['text'])
-    # Get the top 500 keywords from the dataset
-    top_keywords = vectorizer.get_feature_names_out()
-    return top_keywords
-# Load top TF-IDF keywords from the WELFake dataset
-top_keywords = load_data()
-# Function to extract keywords using spaCy and matching them with TF-IDF keywords
-def extract_keywords(text):
-    # Use spaCy to extract keywords (nouns and proper nouns)
-    doc = nlp(text)
-    spacy_keywords = [token.text for token in doc if
-                      token.is_alpha and not token.is_stop and token.pos_ in ['NOUN', 'PROPN']]
-    # Use TF-IDF to match keywords in the input text with the top keywords from the dataset
-    tfidf_keywords = [kw for kw in top_keywords if kw.lower() in text.lower()]
-    # Combine the keywords from both sources and remove duplicates
-    all_keywords = list(set(spacy_keywords + tfidf_keywords))
-    return all_keywords
-# Function to predict whether the news is real or fake using the classification model
-def predict(title, text):
-    # Combine the title and text as input to the model
-    input_text = title + " " + text
-    # Tokenize the input and prepare it for the model
-    inputs = tokenizer.encode_plus(
-        input_text,
-        add_special_tokens=True,
-        max_length=512,
-        truncation=True,
-        padding='max_length',
-        return_tensors="pt"
-    )
-    # Set the model to evaluation mode
-    model.eval()
-    # Perform the prediction using the model
-    with torch.no_grad():
-        outputs = model(**inputs)
-        logits = outputs.logits
-        probabilities = torch.softmax(logits, dim=1)
-        prediction_value = torch.argmax(probabilities, dim=1).item()
-    # Map the model's output to 'Fake' or 'Real'
-    if prediction_value == 0:
-        label = 'Fake'
-    else:
-        label = 'Real'
-    # Extract keywords from the input text
-    keywords = extract_keywords(text)
-    return label, keywords
-# Function to generate fact-checking suggestions using OpenAI's GPT model
-def generate_suggestions(title, text, keywords):
-    # Construct the prompt for GPT based on the title, text, and keywords
-    prompt = f"""
-You are a specialist in fact-checking. Based on the title, text, and keywords of the fake news, please suggest some ways to know more about the facts. Please give recommendations that are easy to accept.
-Keywords: {', '.join(keywords)}
-Title: {title}
-Text: {text}
-"""
-    try:
-        # Call the OpenAI API to generate suggestions
-        response = openai.Completion.create(
-            engine="gpt-4-2024-08-06",
-            prompt=prompt,
-            max_tokens=1000,
-            temperature=0.7,
-        )
-        suggestions = response.choices[0].text.strip()
-    except Exception as e:
-        suggestions = "Unable to generate suggestions at this time."
-        print(f"Error generating suggestions: {e}")
-    return suggestions
-# Main function that predicts and explains the results
-def predict_and_explain(title, text):
-    # Predict whether the news is real or fake, and extract keywords
-    label, keywords = predict(title, text)
-    # If the news is classified as fake, generate suggestions
-    if label == 'Fake':
-        suggestions = generate_suggestions(title, text, keywords)
-        return f"""
-**Prediction**: Fake News
-**Keywords**: {', '.join(keywords)}
-**Suggestions**:
-{suggestions}
-"""
-    else:
-        # If the news is real, just show the prediction and keywords
-        return f"""
-**Prediction**: Real News
-**Keywords**: {', '.join(keywords)}
-"""
-# Gradio interface setup
-iface = gr.Interface(
-    fn=predict_and_explain,  # The function to handle user input and return predictions
-    inputs=[
-        gr.Textbox(label="Title"),  # Textbox for the news title
-        gr.Textbox(label="Text", lines=10)  # Textbox for the news content
-    ],
-    outputs="markdown",  # Output format is markdown
-    title="Fake News Detector with Suggestions",  # Title of the Gradio app
-    description="Enter the news title and content to check if it's fake. If fake, get suggestions on how to know more about the facts.",
-    # Description of the app
-)
-# Launch the Gradio app
-iface.launch()

+import gradio as gr
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+import openai
+import os
+import spacy
+import subprocess
+import sys
+import pandas as pd
+from sklearn.feature_extraction.text import TfidfVectorizer
+# Set OpenAI API key from environment variables
+openai.api_key = os.getenv("OPENAI_API_KEY")
+# Load the tokenizer and the pretrained classification model
+tokenizer = AutoTokenizer.from_pretrained("hamzab/roberta-fake-news-classification")
+model = AutoModelForSequenceClassification.from_pretrained("hamzab/roberta-fake-news-classification")
+# Load spaCy model for keyword extraction
+import spacy.cli
+try:
+    nlp = spacy.load('en_core_web_sm')
+except OSError:
+    # If spaCy model is not available, download it
+    spacy.cli.download("en_core_web_sm")
+    nlp = spacy.load('en_core_web_sm')
+# Load the WELFake dataset and extract top 500 TF-IDF keywords
+def load_data():
+    # Load WELFake dataset from CSV file
+    wel_fake_data = pd.read_csv('WELFake_Dataset.csv')
+    wel_fake_data.dropna(subset=['text'], inplace=True)  # Remove rows with missing 'text'
+    # Create a TF-IDF vectorizer and fit it on the dataset's text column
+    vectorizer = TfidfVectorizer(max_features=500, stop_words='english')
+    X = vectorizer.fit_transform(wel_fake_data['text'])
+    # Get the top 500 keywords from the dataset
+    top_keywords = vectorizer.get_feature_names_out()
+    return top_keywords
+# Load top TF-IDF keywords from the WELFake dataset
+top_keywords = load_data()
+# Function to extract keywords using spaCy and matching them with TF-IDF keywords
+def extract_keywords(text):
+    # Use spaCy to extract keywords (nouns and proper nouns)
+    doc = nlp(text)
+    spacy_keywords = [token.text for token in doc if
+                      token.is_alpha and not token.is_stop and token.pos_ in ['NOUN', 'PROPN']]
+    # Use TF-IDF to match keywords in the input text with the top keywords from the dataset
+    tfidf_keywords = [kw for kw in top_keywords if kw.lower() in text.lower()]
+    # Combine the keywords from both sources and remove duplicates
+    all_keywords = list(set(spacy_keywords + tfidf_keywords))
+    return all_keywords
+# Function to predict whether the news is real or fake using the classification model
+def predict(title, text):
+    # Combine the title and text as input to the model
+    input_text = title + " " + text
+    # Tokenize the input and prepare it for the model
+    inputs = tokenizer.encode_plus(
+        input_text,
+        add_special_tokens=True,
+        max_length=512,
+        truncation=True,
+        padding='max_length',
+        return_tensors="pt"
+    )
+    # Set the model to evaluation mode
+    model.eval()
+    # Perform the prediction using the model
+    with torch.no_grad():
+        outputs = model(**inputs)
+        logits = outputs.logits
+        probabilities = torch.softmax(logits, dim=1)
+        prediction_value = torch.argmax(probabilities, dim=1).item()
+    # Map the model's output to 'Fake' or 'Real'
+    if prediction_value == 0:
+        label = 'Fake'
+    else:
+        label = 'Real'
+    # Extract keywords from the input text
+    keywords = extract_keywords(text)
+    return label, keywords
+# Function to generate fact-checking suggestions using OpenAI's GPT model
+def generate_suggestions(title, text, keywords):
+    # Construct the prompt for GPT based on the title, text, and keywords
+    prompt = f"""
+You are a specialist in fact-checking. Based on the title, text, and keywords of the fake news, please suggest some ways to know more about the facts. Please give recommendations that are easy to accept.
+Keywords: {', '.join(keywords)}
+Title: {title}
+Text: {text}
+"""
+    try:
+        # Call the OpenAI API to generate suggestions
+        response = openai.Completion.create(
+            engine="gpt-4-2024-08-06",
+            prompt=prompt,
+            max_tokens=1000,
+            temperature=0.7,
+        )
+        suggestions = response.choices[0].text.strip()
+    except Exception as e:
+        suggestions = "Unable to generate suggestions at this time."
+        print(f"Error generating suggestions: {e}")
+    return suggestions
+# Main function that predicts and explains the results
+def predict_and_explain(title, text):
+    # Predict whether the news is real or fake, and extract keywords
+    label, keywords = predict(title, text)
+    # If the news is classified as fake, generate suggestions
+    if label == 'Fake':
+        suggestions = generate_suggestions(title, text, keywords)
+        return f"""
+**Prediction**: Fake News
+**Keywords**: {', '.join(keywords)}
+**Suggestions**:
+{suggestions}
+"""
+    else:
+        # If the news is real, just show the prediction and keywords
+        return f"""
+**Prediction**: Real News
+**Keywords**: {', '.join(keywords)}
+"""
+# Gradio interface setup
+iface = gr.Interface(
+    fn=predict_and_explain,  # The function to handle user input and return predictions
+    inputs=[
+        gr.Textbox(label="Title"),  # Textbox for the news title
+        gr.Textbox(label="Text", lines=10)  # Textbox for the news content
+    ],
+    outputs="markdown",  # Output format is markdown
+    title="Fake News Detector with Suggestions",  # Title of the Gradio app
+    description="Enter the news title and content to check if it's fake. If fake, get suggestions on how to know more about the facts.",
+    # Description of the app
+)
+# Launch the Gradio app
+iface.launch()