File size: 4,783 Bytes
11bce6b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
from flask import Flask, request, render_template,url_for, current_app, abort
from tqdm import tqdm
import numpy as np
# import nbformat
# from nbconvert import PythonExporter
# import os
import torch
from transformers import AutoModel,AutoTokenizer
import pickle
from xgboost import XGBClassifier

app = Flask(__name__)

# Load the model during the application startup
# @before_first_request
def load_model():
    try:
        with open('static/ipynbFiles/classifier2.pkl', 'rb') as file:
            current_app.clf = pickle.load(file)
    except Exception as e:
        print(f"Error loading model: {str(e)}")
        abort(500)  # Internal Server Error
app.before_first_request(load_model)

def model_extract(input_string):
    param ={'maxLen' :256,}
    model = AutoModel.from_pretrained("ai4bharat/indic-bert")
    tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-bert")

    def pad_sequences(sequences, maxlen=None, dtype='int32', padding='pre', truncating='pre', value=0.0):
        padded_sequences = []
        for seq in sequences:
            if padding == 'pre':
                padded_seq = np.pad(seq, (maxlen - len(seq), 0), 'constant', constant_values=value)
            elif padding == 'post':
                padded_seq = np.pad(seq, (0, maxlen - len(seq)), 'constant', constant_values=value)
            else:
                raise ValueError("Padding should be 'pre' or 'post'.")

            if truncating == 'pre':
                padded_seq = padded_seq[-maxlen:]
            elif truncating == 'post':
                padded_seq = padded_seq[:maxlen]
            else:
                raise ValueError("Truncating should be 'pre' or 'post'.")

            padded_sequences.append(padded_seq)

        return np.array(padded_sequences, dtype=dtype)


    def create_attention_masks(input_ids):
        attention_masks = []
        for seq in tqdm(input_ids):
            seq_mask = [float(i>0) for i in seq]
            attention_masks.append(seq_mask)
        return np.array(attention_masks)

    def getFeaturesandLabel(single_string, label):
        # Wrap the single string in a list
        sentences = ["[CLS] " + single_string + " [SEP]"]

        # Tokenize and preprocess
        tokenizer_texts = list(map(lambda t: tokenizer.tokenize(t)[:512], tqdm(sentences)))
        input_ids = [tokenizer.convert_tokens_to_ids(x) for x in tqdm(tokenizer_texts)]

        # Pad sequences and create attention masks
        input_ids = pad_sequences(sequences=input_ids, maxlen=param['maxLen'], dtype='long', padding='post', truncating='post')
        attention_masks_data = create_attention_masks(input_ids)

        # Convert to torch tensors
        X_data = torch.tensor(input_ids)
        attention_masks_data = torch.tensor(attention_masks_data)
        y_data = torch.tensor(label)

        return X_data, attention_masks_data, y_data 
    
    text_input=input_string
    label_input = [0]
    X_data, attention_masks_data, y_data = getFeaturesandLabel(text_input, label_input)
    return X_data


# def model_heart():
#     # Path to the notebook file
#     notebook_path = os.path.join('static', 'ipynbFiles', 'trail.ipynb')
#     # Read the notebook content
#     with open(notebook_path, 'r', encoding='utf-8') as notebook_file:
#         notebook_content = nbformat.read(notebook_file, as_version=4)
#     # Create a PythonExporter
#     python_exporter = PythonExporter()
#     # Convert the notebook to a Python script
#     python_script, _ = python_exporter.from_notebook_node(notebook_content)
#     print(python_script)
#     # Execute the Python script
#     exec(python_script)

# model_heart()
# Now you can use the variables and functions defined in the notebook in your app.py
from tempCodeRunnerFile import match
@app.route('/')
def index():
    return render_template('index.html')

@app.route('/predict' ,methods=['POST','GET'])
def predict():
    input_string=request.form['text']
    print('text: ',input_string)
    with open('static/ipynbFiles/classifier_10epochs_updated.pkl','rb') as file:
        clf=pickle.load(file)
    
    if any(c in input_string for c in match):
        prediction = [0]
    else:
        ans=model_extract(input_string)
        print('torch.tensor variable: ',ans)
        prediction = clf.predict(ans)

    print('prediction=',prediction)
    if prediction==[0]:
        return render_template('index.html', pred='Cyberbullying Text', question='వాక్యం -   '+input_string)
    else:
        return render_template('index.html', pred='Non-Cyberbullying Text', question='వాక్యం -   '+input_string)

if __name__ == "__main__":
    app.run(debug=True,port=8001)

#for creating a pickle file: 
#   with open('classifier.pkl','wb') as file:
#       pickle.dump(xgb, file)