sercetexam9's picture
Update app.py
3dbdad1 verified
raw
history blame
3.39 kB
import re
import string
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn import feature_extraction, linear_model, model_selection, preprocessing
from sklearn.metrics import accuracy_score,precision_score
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt
import seaborn as sns
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import sent_tokenize, word_tokenize
from wordcloud import WordCloud, STOPWORDS
from tokenizers import (
decoders,
models,
normalizers,
pre_tokenizers,
processors,
trainers,
Tokenizer,
)
import gc
import warnings
warnings.filterwarnings("ignore")
nltk.download('punkt')
nltk.download('stopwords')
from fastapi import FastAPI, Request
import pickle
model = pickle.load(open("/content/fakenewsdetection/fakenews.sav", 'rb'))
import gradio as gr
def predict(text):
text=pd.DataFrame([text], columns=["text"])
text=text["text"]
text=text.apply(wordpre)
text=lower_and_tokenize(text)
text = text.apply(lambda x: [lemmatizer.lemmatize(word) for word in x])
text = text.apply(lambda x: ' '.join(x))
# tokenize and encode sequences in the test set
tokens_text = tokenizer.batch_encode_plus(
text.tolist(),
max_length = max_seq_len,
padding="max_length",
truncation=True,
return_token_type_ids=True,
add_special_tokens = True,
)
# for text set
text_seq = torch.tensor(tokens_text['input_ids'])
text_mask = torch.tensor(tokens_text['attention_mask'])
text_y=torch.tensor([0])
# wrap tensors
text_data = TensorDataset(text_seq, text_mask,text_y)
# sampler for sampling the data during testing
text_sampler = SequentialSampler(text_data)
# dataLoader for test set
text_dataloader = DataLoader(text_data, sampler = text_sampler, batch_size=batch_size)
random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)
total_eval_accuracy = 0
total_eval_loss = 0
y_true = []
y_pred = []
total_t0 = time.time()
use=listmodel[bestidx]
use.eval()
total_eval_accuracy = 0
total_eval_loss = 0
y_true = []
y_pred = []
t0=time.time()
for batch in text_dataloader:
input_ids = batch[0].to(device)
input_mask = batch[1].to(device)
labels = batch[2].to(device)
with torch.no_grad():
out = model(input_ids, input_mask,labels=labels)
del input_ids,input_mask
gc.collect()
loss = out[0]
logi = out.logits
logits_tensor = torch.tensor(logi)
loss = criterion(logits_tensor, labels)
total_eval_loss += loss.item()
pred = torch.argmax(logits_tensor, dim = 1)
y_pred.append(pred.flatten())
del pred,logi,out,logits_tensor
gc.collect()
result=str()
if y_pred==[1]:
predict="This may be a fake news."
else:
predict="This may be a real news."
return result
demo = gr.Interface(
fn=predict,
inputs=[gr.Textbox(label="Text", lines=3)],
outputs=[gr.Textbox(label="Predict", lines=1)],
)
if __name__ == "__main__":
demo.launch(share=True)