Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
import re | |
from sklearn.model_selection import train_test_split | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.preprocessing import LabelBinarizer | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.metrics import accuracy_score, f1_score | |
# Define your functions and logic here | |
def load_and_prepare_data(): | |
try: | |
file_path = 'WELFake_Dataset.csv' # Ensure this is the correct path | |
dataset = pd.read_csv(file_path) | |
print(f"Dataset loaded with {dataset.shape[0]} records") | |
dataset = dataset.drop(columns=['Unnamed: 0']) | |
dataset = dataset.dropna(subset=['title', 'text']) | |
dataset['clean_text'] = dataset['text'].apply(clean_text) | |
print(f"Dataset cleaned. Records after cleaning: {dataset.shape[0]}") | |
return dataset | |
except Exception as e: | |
return f"Error loading and preparing data: {e}" | |
def clean_text(text): | |
try: | |
text = re.sub(r'\W', ' ', text) | |
text = re.sub(r'\s+', ' ', text) | |
text = re.sub(r'\d', '', text) | |
text = text.lower().strip() | |
return text | |
except Exception as e: | |
return f"Error cleaning text: {e}" | |
def train_model(dataset): | |
try: | |
X_train, X_test, y_train, y_test = train_test_split(dataset['clean_text'], dataset['label'], test_size=0.2, random_state=42) | |
print(f"Training data size: {X_train.shape[0]}, Test data size: {X_test.shape[0]}") | |
vectorizer = TfidfVectorizer(max_features=10000) | |
X_train_tfidf = vectorizer.fit_transform(X_train) | |
X_test_tfidf = vectorizer.transform(X_test) | |
lb = LabelBinarizer() | |
y_train_binary = lb.fit_transform(y_train) | |
y_test_binary = lb.transform(y_test) | |
log_reg_model = LogisticRegression(max_iter=1000) | |
log_reg_model.fit(X_train_tfidf, y_train) | |
y_pred_log_reg_train = log_reg_model.predict(X_train_tfidf) | |
train_accuracy_log_reg = accuracy_score(y_train, y_pred_log_reg_train) | |
train_f1_log_reg = f1_score(y_train, y_pred_log_reg_train) | |
y_pred_log_reg = log_reg_model.predict(X_test_tfidf) | |
accuracy_log_reg = accuracy_score(y_test, y_pred_log_reg) | |
f1_log_reg = f1_score(y_test, y_pred_log_reg) | |
print(f"Train Accuracy: {train_accuracy_log_reg}, Train F1 Score: {train_f1_log_reg}") | |
print(f"Test Accuracy: {accuracy_log_reg}, Test F1 Score: {f1_log_reg}") | |
return vectorizer, lb, log_reg_model, accuracy_log_reg, f1_log_reg | |
except Exception as e: | |
return f"Error training model: {e}" | |
def fake_news_detection(text): | |
try: | |
dataset = load_and_prepare_data() | |
if isinstance(dataset, str): # Check if there was an error in loading data | |
return dataset | |
vectorizer, lb, log_reg_model, accuracy_log_reg, f1_log_reg = train_model(dataset) | |
if isinstance(vectorizer, str): # Check if there was an error in training models | |
return vectorizer | |
clean_text_input = clean_text(text) | |
text_tfidf = vectorizer.transform([clean_text_input]) | |
prediction = log_reg_model.predict_proba(text_tfidf) | |
result = "Real" if prediction[0][1] >= 0.5 else "Fake" | |
return f"Prediction: {result}" | |
except Exception as e: | |
return f"Error in fake news detection: {e}" | |
iface = gr.Interface( | |
fn=fake_news_detection, | |
inputs=gr.Textbox(lines=2, placeholder="Enter news text here..."), | |
outputs="text", | |
title="Fake News Detector", | |
description="Enter a news headline or article text to check if it is fake or real." | |
) | |
if __name__ == "__main__": | |
iface.launch() | |