Spaces:
Runtime error
Runtime error
File size: 4,783 Bytes
11bce6b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
from flask import Flask, request, render_template,url_for, current_app, abort
from tqdm import tqdm
import numpy as np
# import nbformat
# from nbconvert import PythonExporter
# import os
import torch
from transformers import AutoModel,AutoTokenizer
import pickle
from xgboost import XGBClassifier
app = Flask(__name__)
# Load the model during the application startup
# @before_first_request
def load_model():
try:
with open('static/ipynbFiles/classifier2.pkl', 'rb') as file:
current_app.clf = pickle.load(file)
except Exception as e:
print(f"Error loading model: {str(e)}")
abort(500) # Internal Server Error
app.before_first_request(load_model)
def model_extract(input_string):
param ={'maxLen' :256,}
model = AutoModel.from_pretrained("ai4bharat/indic-bert")
tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-bert")
def pad_sequences(sequences, maxlen=None, dtype='int32', padding='pre', truncating='pre', value=0.0):
padded_sequences = []
for seq in sequences:
if padding == 'pre':
padded_seq = np.pad(seq, (maxlen - len(seq), 0), 'constant', constant_values=value)
elif padding == 'post':
padded_seq = np.pad(seq, (0, maxlen - len(seq)), 'constant', constant_values=value)
else:
raise ValueError("Padding should be 'pre' or 'post'.")
if truncating == 'pre':
padded_seq = padded_seq[-maxlen:]
elif truncating == 'post':
padded_seq = padded_seq[:maxlen]
else:
raise ValueError("Truncating should be 'pre' or 'post'.")
padded_sequences.append(padded_seq)
return np.array(padded_sequences, dtype=dtype)
def create_attention_masks(input_ids):
attention_masks = []
for seq in tqdm(input_ids):
seq_mask = [float(i>0) for i in seq]
attention_masks.append(seq_mask)
return np.array(attention_masks)
def getFeaturesandLabel(single_string, label):
# Wrap the single string in a list
sentences = ["[CLS] " + single_string + " [SEP]"]
# Tokenize and preprocess
tokenizer_texts = list(map(lambda t: tokenizer.tokenize(t)[:512], tqdm(sentences)))
input_ids = [tokenizer.convert_tokens_to_ids(x) for x in tqdm(tokenizer_texts)]
# Pad sequences and create attention masks
input_ids = pad_sequences(sequences=input_ids, maxlen=param['maxLen'], dtype='long', padding='post', truncating='post')
attention_masks_data = create_attention_masks(input_ids)
# Convert to torch tensors
X_data = torch.tensor(input_ids)
attention_masks_data = torch.tensor(attention_masks_data)
y_data = torch.tensor(label)
return X_data, attention_masks_data, y_data
text_input=input_string
label_input = [0]
X_data, attention_masks_data, y_data = getFeaturesandLabel(text_input, label_input)
return X_data
# def model_heart():
# # Path to the notebook file
# notebook_path = os.path.join('static', 'ipynbFiles', 'trail.ipynb')
# # Read the notebook content
# with open(notebook_path, 'r', encoding='utf-8') as notebook_file:
# notebook_content = nbformat.read(notebook_file, as_version=4)
# # Create a PythonExporter
# python_exporter = PythonExporter()
# # Convert the notebook to a Python script
# python_script, _ = python_exporter.from_notebook_node(notebook_content)
# print(python_script)
# # Execute the Python script
# exec(python_script)
# model_heart()
# Now you can use the variables and functions defined in the notebook in your app.py
from tempCodeRunnerFile import match
@app.route('/')
def index():
return render_template('index.html')
@app.route('/predict' ,methods=['POST','GET'])
def predict():
input_string=request.form['text']
print('text: ',input_string)
with open('static/ipynbFiles/classifier_10epochs_updated.pkl','rb') as file:
clf=pickle.load(file)
if any(c in input_string for c in match):
prediction = [0]
else:
ans=model_extract(input_string)
print('torch.tensor variable: ',ans)
prediction = clf.predict(ans)
print('prediction=',prediction)
if prediction==[0]:
return render_template('index.html', pred='Cyberbullying Text', question='వాక్యం - '+input_string)
else:
return render_template('index.html', pred='Non-Cyberbullying Text', question='వాక్యం - '+input_string)
if __name__ == "__main__":
app.run(debug=True,port=8001)
#for creating a pickle file:
# with open('classifier.pkl','wb') as file:
# pickle.dump(xgb, file) |