Spaces:
Sleeping
Sleeping
app.py
Browse files
app.py
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
import re
|
4 |
+
from sklearn.model_selection import train_test_split
|
5 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
6 |
+
from sklearn.preprocessing import LabelBinarizer
|
7 |
+
from sklearn.linear_model import LogisticRegression
|
8 |
+
from sklearn.metrics import accuracy_score, f1_score
|
9 |
+
|
10 |
+
# Define your functions and logic here
|
11 |
+
def load_and_prepare_data():
|
12 |
+
try:
|
13 |
+
file_path = 'WELFake_Dataset.csv' # Ensure this is the correct path
|
14 |
+
dataset = pd.read_csv(file_path)
|
15 |
+
print(f"Dataset loaded with {dataset.shape[0]} records")
|
16 |
+
dataset = dataset.drop(columns=['Unnamed: 0'])
|
17 |
+
dataset = dataset.dropna(subset=['title', 'text'])
|
18 |
+
dataset['clean_text'] = dataset['text'].apply(clean_text)
|
19 |
+
print(f"Dataset cleaned. Records after cleaning: {dataset.shape[0]}")
|
20 |
+
return dataset
|
21 |
+
except Exception as e:
|
22 |
+
return f"Error loading and preparing data: {e}"
|
23 |
+
|
24 |
+
def clean_text(text):
|
25 |
+
try:
|
26 |
+
text = re.sub(r'\W', ' ', text)
|
27 |
+
text = re.sub(r'\s+', ' ', text)
|
28 |
+
text = re.sub(r'\d', '', text)
|
29 |
+
text = text.lower().strip()
|
30 |
+
return text
|
31 |
+
except Exception as e:
|
32 |
+
return f"Error cleaning text: {e}"
|
33 |
+
|
34 |
+
def train_model(dataset):
|
35 |
+
try:
|
36 |
+
X_train, X_test, y_train, y_test = train_test_split(dataset['clean_text'], dataset['label'], test_size=0.2, random_state=42)
|
37 |
+
print(f"Training data size: {X_train.shape[0]}, Test data size: {X_test.shape[0]}")
|
38 |
+
|
39 |
+
vectorizer = TfidfVectorizer(max_features=10000)
|
40 |
+
X_train_tfidf = vectorizer.fit_transform(X_train)
|
41 |
+
X_test_tfidf = vectorizer.transform(X_test)
|
42 |
+
|
43 |
+
lb = LabelBinarizer()
|
44 |
+
y_train_binary = lb.fit_transform(y_train)
|
45 |
+
y_test_binary = lb.transform(y_test)
|
46 |
+
|
47 |
+
log_reg_model = LogisticRegression(max_iter=1000)
|
48 |
+
log_reg_model.fit(X_train_tfidf, y_train)
|
49 |
+
|
50 |
+
y_pred_log_reg_train = log_reg_model.predict(X_train_tfidf)
|
51 |
+
train_accuracy_log_reg = accuracy_score(y_train, y_pred_log_reg_train)
|
52 |
+
train_f1_log_reg = f1_score(y_train, y_pred_log_reg_train)
|
53 |
+
|
54 |
+
y_pred_log_reg = log_reg_model.predict(X_test_tfidf)
|
55 |
+
accuracy_log_reg = accuracy_score(y_test, y_pred_log_reg)
|
56 |
+
f1_log_reg = f1_score(y_test, y_pred_log_reg)
|
57 |
+
|
58 |
+
print(f"Train Accuracy: {train_accuracy_log_reg}, Train F1 Score: {train_f1_log_reg}")
|
59 |
+
print(f"Test Accuracy: {accuracy_log_reg}, Test F1 Score: {f1_log_reg}")
|
60 |
+
|
61 |
+
return vectorizer, lb, log_reg_model, accuracy_log_reg, f1_log_reg
|
62 |
+
except Exception as e:
|
63 |
+
return f"Error training model: {e}"
|
64 |
+
|
65 |
+
def fake_news_detection(text):
|
66 |
+
try:
|
67 |
+
dataset = load_and_prepare_data()
|
68 |
+
if isinstance(dataset, str): # Check if there was an error in loading data
|
69 |
+
return dataset
|
70 |
+
vectorizer, lb, log_reg_model, accuracy_log_reg, f1_log_reg = train_model(dataset)
|
71 |
+
if isinstance(vectorizer, str): # Check if there was an error in training models
|
72 |
+
return vectorizer
|
73 |
+
|
74 |
+
clean_text_input = clean_text(text)
|
75 |
+
text_tfidf = vectorizer.transform([clean_text_input])
|
76 |
+
prediction = log_reg_model.predict_proba(text_tfidf)
|
77 |
+
result = "Real" if prediction[0][1] >= 0.5 else "Fake"
|
78 |
+
|
79 |
+
return f"Prediction: {result}"
|
80 |
+
except Exception as e:
|
81 |
+
return f"Error in fake news detection: {e}"
|
82 |
+
|
83 |
+
iface = gr.Interface(
|
84 |
+
fn=fake_news_detection,
|
85 |
+
inputs=gr.Textbox(lines=2, placeholder="Enter news text here..."),
|
86 |
+
outputs="text",
|
87 |
+
title="Fake News Detector",
|
88 |
+
description="Enter a news headline or article text to check if it is fake or real."
|
89 |
+
)
|
90 |
+
|
91 |
+
if __name__ == "__main__":
|
92 |
+
iface.launch()
|