Spaces:
Runtime error
Runtime error
import re | |
import string | |
import pandas as pd | |
from sklearn.model_selection import train_test_split | |
from sklearn.metrics import accuracy_score, confusion_matrix | |
from sklearn import feature_extraction, linear_model, model_selection, preprocessing | |
from sklearn.metrics import accuracy_score,precision_score | |
from sklearn.model_selection import train_test_split | |
from sklearn.pipeline import Pipeline | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import nltk | |
from nltk.corpus import stopwords | |
from nltk.stem import PorterStemmer | |
from nltk.tokenize import sent_tokenize, word_tokenize | |
from wordcloud import WordCloud, STOPWORDS | |
from tokenizers import ( | |
decoders, | |
models, | |
normalizers, | |
pre_tokenizers, | |
processors, | |
trainers, | |
Tokenizer, | |
) | |
import gc | |
import warnings | |
warnings.filterwarnings("ignore") | |
nltk.download('punkt') | |
nltk.download('stopwords') | |
from fastapi import FastAPI, Request | |
import pickle | |
model = pickle.load(open("/content/fakenewsdetection/fakenews.sav", 'rb')) | |
import gradio as gr | |
def predict(text): | |
text=pd.DataFrame([text], columns=["text"]) | |
text=text["text"] | |
text=text.apply(wordpre) | |
text=lower_and_tokenize(text) | |
text = text.apply(lambda x: [lemmatizer.lemmatize(word) for word in x]) | |
text = text.apply(lambda x: ' '.join(x)) | |
# tokenize and encode sequences in the test set | |
tokens_text = tokenizer.batch_encode_plus( | |
text.tolist(), | |
max_length = max_seq_len, | |
padding="max_length", | |
truncation=True, | |
return_token_type_ids=True, | |
add_special_tokens = True, | |
) | |
# for text set | |
text_seq = torch.tensor(tokens_text['input_ids']) | |
text_mask = torch.tensor(tokens_text['attention_mask']) | |
text_y=torch.tensor([0]) | |
# wrap tensors | |
text_data = TensorDataset(text_seq, text_mask,text_y) | |
# sampler for sampling the data during testing | |
text_sampler = SequentialSampler(text_data) | |
# dataLoader for test set | |
text_dataloader = DataLoader(text_data, sampler = text_sampler, batch_size=batch_size) | |
random.seed(seed_val) | |
torch.manual_seed(seed_val) | |
torch.cuda.manual_seed_all(seed_val) | |
total_eval_accuracy = 0 | |
total_eval_loss = 0 | |
y_true = [] | |
y_pred = [] | |
total_t0 = time.time() | |
use=listmodel[bestidx] | |
use.eval() | |
total_eval_accuracy = 0 | |
total_eval_loss = 0 | |
y_true = [] | |
y_pred = [] | |
t0=time.time() | |
for batch in text_dataloader: | |
input_ids = batch[0].to(device) | |
input_mask = batch[1].to(device) | |
labels = batch[2].to(device) | |
with torch.no_grad(): | |
out = model(input_ids, input_mask,labels=labels) | |
del input_ids,input_mask | |
gc.collect() | |
loss = out[0] | |
logi = out.logits | |
logits_tensor = torch.tensor(logi) | |
loss = criterion(logits_tensor, labels) | |
total_eval_loss += loss.item() | |
pred = torch.argmax(logits_tensor, dim = 1) | |
y_pred.append(pred.flatten()) | |
del pred,logi,out,logits_tensor | |
gc.collect() | |
result=str() | |
if y_pred==[1]: | |
predict="This may be a fake news." | |
else: | |
predict="This may be a real news." | |
return result | |
demo = gr.Interface( | |
fn=predict, | |
inputs=[gr.Textbox(label="Text", lines=3)], | |
outputs=[gr.Textbox(label="Predict", lines=1)], | |
) | |
if __name__ == "__main__": | |
demo.launch(share=True) |