Spaces:

Zeamays3427
/

fake-news-debunker

Running

App Files Files Community

Zeamays3427 commited on Oct 16, 2024

Commit

65ad974

verified ·

1 Parent(s): 495e52e

Upload 4 files

Browse files

Files changed (5) hide show

.gitattributes +1 -0
LICENSE +18 -0
WELFake_Dataset.csv +3 -0
app.py +164 -0
requirements.txt +8 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+WELFake_Dataset.csv filter=lfs diff=lfs merge=lfs -text

LICENSE ADDED Viewed

	@@ -0,0 +1,18 @@

+MIT License
+Copyright (c) 2024 DengPeng
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.

WELFake_Dataset.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:665331424230fc452e9482c3547a6a199a2c29745ade8d236950d1d105223773
+size 245086152

app.py ADDED Viewed

	@@ -0,0 +1,164 @@

+import gradio as gr
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+import openai
+import os
+import spacy
+import subprocess
+import sys
+import pandas as pd
+from sklearn.feature_extraction.text import TfidfVectorizer
+# Set OpenAI API key from environment variables
+openai.api_key = os.getenv("OPENAI_API_KEY")
+# Load the tokenizer and the pretrained classification model
+tokenizer = AutoTokenizer.from_pretrained("hamzab/roberta-fake-news-classification")
+model = AutoModelForSequenceClassification.from_pretrained("hamzab/roberta-fake-news-classification")
+# Load spaCy model for keyword extraction
+try:
+    nlp = spacy.load('en_core_web_sm')
+except:
+    # If spaCy model is not available, download it
+    subprocess.run([sys.executable, "-m", "spacy", "download", "en_core_web_sm"])
+    nlp = spacy.load('en_core_web_sm')
+# Load the WELFake dataset and extract top 500 TF-IDF keywords
+def load_data():
+    # Load WELFake dataset from CSV file
+    wel_fake_data = pd.read_csv('WELFake_Dataset.csv')
+    wel_fake_data.dropna(subset=['text'], inplace=True)  # Remove rows with missing 'text'
+    # Create a TF-IDF vectorizer and fit it on the dataset's text column
+    vectorizer = TfidfVectorizer(max_features=500, stop_words='english')
+    X = vectorizer.fit_transform(wel_fake_data['text'])
+    # Get the top 500 keywords from the dataset
+    top_keywords = vectorizer.get_feature_names_out()
+    return top_keywords
+# Load top TF-IDF keywords from the WELFake dataset
+top_keywords = load_data()
+# Function to extract keywords using spaCy and matching them with TF-IDF keywords
+def extract_keywords(text):
+    # Use spaCy to extract keywords (nouns and proper nouns)
+    doc = nlp(text)
+    spacy_keywords = [token.text for token in doc if
+                      token.is_alpha and not token.is_stop and token.pos_ in ['NOUN', 'PROPN']]
+    # Use TF-IDF to match keywords in the input text with the top keywords from the dataset
+    tfidf_keywords = [kw for kw in top_keywords if kw.lower() in text.lower()]
+    # Combine the keywords from both sources and remove duplicates
+    all_keywords = list(set(spacy_keywords + tfidf_keywords))
+    return all_keywords
+# Function to predict whether the news is real or fake using the classification model
+def predict(title, text):
+    # Combine the title and text as input to the model
+    input_text = title + " " + text
+    # Tokenize the input and prepare it for the model
+    inputs = tokenizer.encode_plus(
+        input_text,
+        add_special_tokens=True,
+        max_length=512,
+        truncation=True,
+        padding='max_length',
+        return_tensors="pt"
+    )
+    # Set the model to evaluation mode
+    model.eval()
+    # Perform the prediction using the model
+    with torch.no_grad():
+        outputs = model(**inputs)
+        logits = outputs.logits
+        probabilities = torch.softmax(logits, dim=1)
+        prediction_value = torch.argmax(probabilities, dim=1).item()
+    # Map the model's output to 'Fake' or 'Real'
+    if prediction_value == 0:
+        label = 'Fake'
+    else:
+        label = 'Real'
+    # Extract keywords from the input text
+    keywords = extract_keywords(text)
+    return label, keywords
+# Function to generate fact-checking suggestions using OpenAI's GPT model
+def generate_suggestions(title, text, keywords):
+    # Construct the prompt for GPT based on the title, text, and keywords
+    prompt = f"""
+You are a specialist in fact-checking. Based on the title, text, and keywords of the fake news, please suggest some ways to know more about the facts. Please give recommendations that are easy to accept.
+Keywords: {', '.join(keywords)}
+Title: {title}
+Text: {text}
+"""
+    try:
+        # Call the OpenAI API to generate suggestions
+        response = openai.Completion.create(
+            engine="text-davinci-003",
+            prompt=prompt,
+            max_tokens=150,
+            temperature=0.7,
+        )
+        suggestions = response.choices[0].text.strip()
+    except Exception as e:
+        suggestions = "Unable to generate suggestions at this time."
+        print(f"Error generating suggestions: {e}")
+    return suggestions
+# Main function that predicts and explains the results
+def predict_and_explain(title, text):
+    # Predict whether the news is real or fake, and extract keywords
+    label, keywords = predict(title, text)
+    # If the news is classified as fake, generate suggestions
+    if label == 'Fake':
+        suggestions = generate_suggestions(title, text, keywords)
+        return f"""
+**Prediction**: Fake News
+**Keywords**: {', '.join(keywords)}
+**Suggestions**:
+{suggestions}
+"""
+    else:
+        # If the news is real, just show the prediction and keywords
+        return f"""
+**Prediction**: Real News
+**Keywords**: {', '.join(keywords)}
+"""
+# Gradio interface setup
+iface = gr.Interface(
+    fn=predict_and_explain,  # The function to handle user input and return predictions
+    inputs=[
+        gr.Textbox(label="Title"),  # Textbox for the news title
+        gr.Textbox(label="Text", lines=10)  # Textbox for the news content
+    ],
+    outputs="markdown",  # Output format is markdown
+    title="Fake News Detector with Suggestions",  # Title of the Gradio app
+    description="Enter the news title and content to check if it's fake. If fake, get suggestions on how to know more about the facts.",
+    # Description of the app
+)
+# Launch the Gradio app
+iface.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+transformers
+torch
+gradio
+openai
+spacy
+en-core-web-sm
+pandas
+scikit-learn