Spaces:

sercetexam9
/

fakenewsdetection

Runtime error

App Files Files Community

fakenewsdetection / app.py

sercetexam9

Update app.py

3dbdad1 verified 6 months ago

raw

history blame

3.39 kB

	import re
	import string
	import pandas as pd
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import accuracy_score, confusion_matrix
	from sklearn import feature_extraction, linear_model, model_selection, preprocessing
	from sklearn.metrics import accuracy_score,precision_score
	from sklearn.model_selection import train_test_split
	from sklearn.pipeline import Pipeline
	import matplotlib.pyplot as plt
	import seaborn as sns
	import nltk
	from nltk.corpus import stopwords
	from nltk.stem import PorterStemmer
	from nltk.tokenize import sent_tokenize, word_tokenize
	from wordcloud import WordCloud, STOPWORDS
	from tokenizers import (
	decoders,
	models,
	normalizers,
	pre_tokenizers,
	processors,
	trainers,
	Tokenizer,
	)
	import gc
	import warnings
	warnings.filterwarnings("ignore")
	nltk.download('punkt')
	nltk.download('stopwords')
	from fastapi import FastAPI, Request
	import pickle
	model = pickle.load(open("/content/fakenewsdetection/fakenews.sav", 'rb'))
	import gradio as gr
	def predict(text):
	text=pd.DataFrame([text], columns=["text"])
	text=text["text"]
	text=text.apply(wordpre)
	text=lower_and_tokenize(text)
	text = text.apply(lambda x: [lemmatizer.lemmatize(word) for word in x])
	text = text.apply(lambda x: ' '.join(x))
	# tokenize and encode sequences in the test set
	tokens_text = tokenizer.batch_encode_plus(
	text.tolist(),
	max_length = max_seq_len,
	padding="max_length",
	truncation=True,
	return_token_type_ids=True,
	add_special_tokens = True,
	)
	# for text set
	text_seq = torch.tensor(tokens_text['input_ids'])
	text_mask = torch.tensor(tokens_text['attention_mask'])
	text_y=torch.tensor([0])
	# wrap tensors
	text_data = TensorDataset(text_seq, text_mask,text_y)

	# sampler for sampling the data during testing
	text_sampler = SequentialSampler(text_data)

	# dataLoader for test set
	text_dataloader = DataLoader(text_data, sampler = text_sampler, batch_size=batch_size)
	random.seed(seed_val)
	torch.manual_seed(seed_val)
	torch.cuda.manual_seed_all(seed_val)
	total_eval_accuracy = 0
	total_eval_loss = 0
	y_true = []
	y_pred = []
	total_t0 = time.time()
	use=listmodel[bestidx]
	use.eval()
	total_eval_accuracy = 0
	total_eval_loss = 0
	y_true = []
	y_pred = []
	t0=time.time()

	for batch in text_dataloader:
	input_ids = batch[0].to(device)
	input_mask = batch[1].to(device)
	labels = batch[2].to(device)
	with torch.no_grad():
	out = model(input_ids, input_mask,labels=labels)
	del input_ids,input_mask
	gc.collect()
	loss = out[0]
	logi = out.logits
	logits_tensor = torch.tensor(logi)
	loss = criterion(logits_tensor, labels)
	total_eval_loss += loss.item()
	pred = torch.argmax(logits_tensor, dim = 1)
	y_pred.append(pred.flatten())
	del pred,logi,out,logits_tensor
	gc.collect()

	result=str()
	if y_pred==[1]:
	predict="This may be a fake news."
	else:
	predict="This may be a real news."
	return result

	demo = gr.Interface(
	fn=predict,
	inputs=[gr.Textbox(label="Text", lines=3)],
	outputs=[gr.Textbox(label="Predict", lines=1)],
	)
	if __name__ == "__main__":
	demo.launch(share=True)