File size: 3,388 Bytes
451f6fd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be8212d
451f6fd
bdb12ee
451f6fd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be8212d
451f6fd
 
 
 
be8212d
 
451f6fd
 
b284a0e
23ea436
451f6fd
23ea436
3dbdad1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import re
import string
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn import feature_extraction, linear_model, model_selection, preprocessing
from sklearn.metrics import accuracy_score,precision_score
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt
import seaborn as sns
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import sent_tokenize, word_tokenize
from wordcloud import WordCloud, STOPWORDS
from tokenizers import (
    decoders,
    models,
    normalizers,
    pre_tokenizers,
    processors,
    trainers,
    Tokenizer,
)
import gc
import warnings
warnings.filterwarnings("ignore")
nltk.download('punkt')
nltk.download('stopwords')
from fastapi import FastAPI, Request
import pickle
model = pickle.load(open("/content/fakenewsdetection/fakenews.sav", 'rb'))
import gradio as gr
def predict(text):
    text=pd.DataFrame([text], columns=["text"])
    text=text["text"]
    text=text.apply(wordpre)
    text=lower_and_tokenize(text)
    text = text.apply(lambda x: [lemmatizer.lemmatize(word) for word in x])
    text = text.apply(lambda x: ' '.join(x))
    # tokenize and encode sequences in the test set
    tokens_text = tokenizer.batch_encode_plus(
        text.tolist(),
        max_length = max_seq_len,
        padding="max_length",
        truncation=True,
        return_token_type_ids=True,
        add_special_tokens = True,
    )
    # for text set
    text_seq = torch.tensor(tokens_text['input_ids'])
    text_mask = torch.tensor(tokens_text['attention_mask'])
    text_y=torch.tensor([0])
    # wrap tensors
    text_data = TensorDataset(text_seq, text_mask,text_y)

    # sampler for sampling the data during testing
    text_sampler = SequentialSampler(text_data)

    # dataLoader for test set
    text_dataloader = DataLoader(text_data, sampler = text_sampler, batch_size=batch_size)
    random.seed(seed_val)
    torch.manual_seed(seed_val)
    torch.cuda.manual_seed_all(seed_val)
    total_eval_accuracy = 0
    total_eval_loss = 0
    y_true = []
    y_pred = []
    total_t0 = time.time()
    use=listmodel[bestidx]
    use.eval()
    total_eval_accuracy = 0
    total_eval_loss = 0
    y_true = []
    y_pred = []
    t0=time.time()

    for batch in text_dataloader:
        input_ids = batch[0].to(device)
        input_mask = batch[1].to(device)
        labels = batch[2].to(device)
        with torch.no_grad():
            out = model(input_ids, input_mask,labels=labels)
            del input_ids,input_mask
            gc.collect()
            loss = out[0]
            logi = out.logits
            logits_tensor = torch.tensor(logi)
        loss = criterion(logits_tensor, labels)
        total_eval_loss += loss.item()
        pred = torch.argmax(logits_tensor, dim = 1)
        y_pred.append(pred.flatten())
        del pred,logi,out,logits_tensor
        gc.collect()  

    result=str()
    if y_pred==[1]:
       predict="This may be a fake news."
    else:
       predict="This may be a real news."
    return result

demo = gr.Interface(
    fn=predict,
    inputs=[gr.Textbox(label="Text", lines=3)],
    outputs=[gr.Textbox(label="Predict", lines=1)],
)
if __name__ == "__main__":
    demo.launch(share=True)