beds_pipeline_beds / views.py
andreyunic23's picture
Rename files
8f7ab25
from flask import render_template, request, redirect
from app import app
from fileinput import filename
import pandas as pd
from functions import *
import json
#{% autoescape off %}#codigo dentro executa html
#{% endautoescape %}
#https://jsfiddle.net/onury/kBQdS/ cromo criar lista js
#definicao de variaveis
#device = 'cuda' if cuda.is_available() else 'cpu'
device = 'cpu'
torch.manual_seed(0)
# sub_loss = ['loss', 'newaccidentl', 'newreescreverl', 'newcondl', 'newnotl', 'environment'] #10->5
# sub_hazard = ['hazard', 'newreescreverh', 'newaccidenth', 'fail', 'newnoth', 'newcondh'] #11->5
# sub_constraint = ['prevent', 'recommendation', 'mitigate', 'detect', 'newnotc', 'newreescreverc'] #5->3
step1_labels = ['loss', 'hazard', 'constraint']
step2_labels = ['correct', 'incorrect']
step3_labels = ['rewrite', 'not', 'condition', 'accident', 'correct']
# sub_loss_incorreto = sub_loss.copy()
# sub_loss_incorreto.remove('loss')
# sub_hazard_incorreto = sub_hazard.copy()
# sub_hazard_incorreto.remove('hazard')
# sub_constraint_incorreto = sub_constraint.copy()
# for item in ['prevent', 'detect', 'mitigate']:
# sub_constraint_incorreto.remove(item)
def label_to_int(df, name, labels):
for i in range(len(df[name])):
df.loc[i,name] = labels.index(df.loc[i,name])
#path = './models/8020/'
#path = './models/7030/experimental/menos_erros1/'
path = 'andreyunic23/'
#carregar modelos
tokenizer = AutoTokenizer.from_pretrained('google-bert/bert-base-uncased')
#modelo 1
num_label1 = 3#ok
model_path1 = path+'beds_step1'#ok
#tokenizer = AutoTokenizer.from_pretrained(model_path1)#ok
model_step1 = AutoModelForSequenceClassification.from_pretrained(model_path1, num_labels=num_label1).to(device)#ok
#modelos 2
num_label2 = 2
model_path2l = path+'beds_step2_loss'
model_path2h = path+'beds_step2_hazard'
model_path2c = path+'beds_step2_constraint'
#tokenizer_loss = AutoTokenizer.from_pretrained(model_path2l)
#tokenizer_hazard = AutoTokenizer.from_pretrained(model_path2h)
#tokenizer_constraint = AutoTokenizer.from_pretrained(model_path2c)
model_step2_loss = AutoModelForSequenceClassification.from_pretrained(model_path2l, num_labels=num_label2).to(device)#ok
model_step2_hazard = AutoModelForSequenceClassification.from_pretrained(model_path2h, num_labels=num_label2).to(device)#ok
model_step2_constraint = AutoModelForSequenceClassification.from_pretrained(model_path2c, num_labels=num_label2).to(device)#ok
#labels_corretos = ['loss', 'hazard', 'prevent', 'detect', 'mitigate']
labels_corretos = ['loss', 'hazard', 'constraint']
# #modelo 3
# #num_label = x
model_path3l = path+'beds_step3_loss'
model_path3h = path+'beds_step3_hazard'
model_path3c = path+'beds_step3_constraint'
#tokenizer_loss_incorreto = AutoTokenizer.from_pretrained(model_path3l)
#tokenizer_hazard_incorreto = AutoTokenizer.from_pretrained(model_path3h)
#tokenizer_constraint_incorreto = AutoTokenizer.from_pretrained(model_path3c)
model_step3_loss = AutoModelForSequenceClassification.from_pretrained(model_path3l, num_labels=4).to(device)
model_step3_hazard = AutoModelForSequenceClassification.from_pretrained(model_path3h, num_labels=4).to(device)
model_step3_constraint = AutoModelForSequenceClassification.from_pretrained(model_path3c, num_labels=2).to(device)
#modelo 4
model_path4 = path+'beds_step4'
model_step4 = SentenceTransformer(model_path4)
########era x_loss_correto!!!!!! errei
correct_example_path = "./datasets/"
#correct_example_path = "/home/ATOunic/mysite/datasets/"
correct_loss_df = pd.read_csv(correct_example_path+'correct_loss_reference.csv')
correct_hazard_df = pd.read_csv(correct_example_path+'correct_hazard_reference.csv')
correct_constraint_df = pd.read_csv(correct_example_path+'correct_constraint_reference.csv')
examples_correct_loss = format_examples(correct_loss_df.squeeze().tolist())#to_list())#['req']
examples_correct_hazard = format_examples(correct_hazard_df.squeeze().tolist())#.to_list())#['req']
examples_correct_constraint = format_examples(correct_constraint_df.squeeze().tolist())#.to_list())#['req']
def inferencia(df):
result_parte1 = []
result_incorrect_loss = []
result_incorrect_hazard = []
result_incorrect_constraint = []
result_list_sim_loss = []
result_list_sim_hazard = []
result_list_sim_constraint= []
result_list_erro_loss = []
result_list_erro_hazard = []
result_list_erro_constraint = []
x_test = df['req'].to_list()
if(len(df.columns)==2):
input_type = 'unlabeled'
pass
if(len(df.columns)>2):
y_test = convert_label(df['label'])
#print('##############################################')#,type(y_test[1]))
input_type = 'labeled'
#teste parte 1
with torch.no_grad():
encodings = tokenizer(x_test, truncation=True, padding='max_length', max_length=512,return_tensors="pt")
results = model_step1(encodings['input_ids'].to(device),encodings['attention_mask'].to(device))#.to(device)
predictions = np.argmax(results.logits.cpu(), axis=-1)
####em vez de printar, guardar as predicoes numa lista [id, sent, orig, pred]
### nao sei se eu coloco probabilidade. quando a probabilidade do top é menor que 70%, seja um "bom" mal sinal que deve ser levantado. por enquanto
if(input_type=='labeled'):
df_parte1 = df_with_pred(y_test,predictions,df)
elif(input_type=='unlabeled'):
df['label'] = predictions.numpy().tolist()
df_parte1 = df_with_pred(predictions.numpy().tolist(),predictions,df)
result_parte1 = df_parte1.to_json(orient="records", default_handler=str)
#carregar modelo 2
#organize predictions: em vez de df, apenas lista de sentencas
##posso fazer ver. com rotulo e sem rotulo
#list_classif_loss, list_classif_hazard, list_classif_constraint = organize_predictions_list(predictions, df_parte1) #return df[id, req]
list_loss = df[df['label'] == 0]#['req']#.to_list()
list_hazard = df[df['label'] == 1]#['req']#.to_list()
#print(list_hazard)
list_constraint = df[df['label'] == 2]#['req']#.to_list()
#teste parte 2 loss
#x_loss = list_loss['req'].to_list()
#print('x_loss=',x_loss)
if list_loss['req'].to_list():
with torch.no_grad():
test_loss = tokenizer(list_loss['req'].to_list(), truncation=True, padding='max_length', max_length=512,return_tensors="pt")
results_loss = model_step2_loss(test_loss['input_ids'].to(device),test_loss['attention_mask'].to(device))#.to(device)
predictions_loss = np.argmax(results_loss.logits.cpu(), axis=-1)
incorrect_loss = get_incorrect(predictions_loss, list_loss) #return df:[id,req]
result_incorrect_loss = incorrect_loss.to_json(orient='records', default_handler=str)
list_sim_loss = check_similarity_return2(incorrect_loss, examples_correct_loss, model_step4)
result_list_sim_loss = json.dumps(list_sim_loss, default=int)
# #teste parte 3 loss
list_incorrect_loss = incorrect_loss['req'].to_list()
if list_incorrect_loss:
with torch.no_grad():
test_loss_incorrect = tokenizer(list_incorrect_loss, truncation=True, padding='max_length', max_length=512,return_tensors="pt")
results_loss_incorrect = model_step3_loss(test_loss_incorrect['input_ids'].to(device),test_loss_incorrect['attention_mask'].to(device))
list_erro_loss = list_erro_with_pred(results_loss_incorrect, incorrect_loss, step3_labels)
result_list_erro_loss = json.dumps(list_erro_loss, default=int)
#teste parte 2 hazard
#x_hazard = list_classif_hazard['req'].to_list()
if list_hazard['req'].to_list():
with torch.no_grad():
test_hazard = tokenizer(list_hazard['req'].to_list(), truncation=True, padding='max_length', max_length=512,return_tensors="pt")
results_hazard = model_step2_hazard(test_hazard['input_ids'].to(device),test_hazard['attention_mask'].to(device))
predictions_hazard = np.argmax(results_hazard.logits.cpu(), axis=-1)
incorrect_hazard = get_incorrect(predictions_hazard, list_hazard)
result_incorrect_hazard = incorrect_hazard.to_json(orient='records', default_handler=str)
list_sim_hazard = check_similarity_return2(incorrect_hazard, examples_correct_hazard, model_step4)
result_list_sim_hazard = json.dumps(list_sim_hazard, default=int)
#teste parte 3 hazard
list_incorrect_hazard = incorrect_hazard['req'].to_list()
if list_incorrect_hazard:
with torch.no_grad():
test_hazard_incorrect = tokenizer(list_incorrect_hazard, truncation=True, padding='max_length', max_length=512,return_tensors="pt")
results_hazard_incorrect = model_step3_hazard(test_hazard_incorrect['input_ids'].to(device),test_hazard_incorrect['attention_mask'].to(device))
list_erro_hazard = list_erro_with_pred(results_hazard_incorrect, incorrect_hazard, step3_labels)
result_list_erro_hazard = json.dumps(list_erro_hazard, default=int)
#teste parte 2 constraint
#x_constraint = list_classif_constraint['req'].to_list()
if list_constraint['req'].to_list():
with torch.no_grad():
test_constraint = tokenizer(list_constraint['req'].to_list(), truncation=True, padding='max_length', max_length=512,return_tensors="pt")
results_constraint = model_step2_constraint(test_constraint['input_ids'].to(device),test_constraint['attention_mask'].to(device))
predictions_constraint = np.argmax(results_constraint.logits.cpu(), axis=-1)
incorrect_constraint = get_incorrect(predictions_constraint, list_constraint)
result_incorrect_constraint = incorrect_constraint.to_json(orient='records', default_handler=str)
list_sim_constraint = check_similarity_return2(incorrect_constraint, examples_correct_constraint, model_step4)
result_list_sim_constraint = json.dumps(list_sim_constraint, default=int)
#teste parte 3 constraint
list_incorrect_constraint = incorrect_constraint['req'].to_list()
if list_incorrect_constraint:
with torch.no_grad():
test_constraint_incorrect = tokenizer( list_incorrect_constraint, truncation=True, padding='max_length', max_length=512,return_tensors="pt")
results_constraint_incorrect = model_step3_constraint(test_constraint_incorrect['input_ids'].to(device),test_constraint_incorrect['attention_mask'].to(device))
list_erro_constraint = list_erro_with_pred(results_constraint_incorrect, incorrect_constraint, step3_labels)
result_list_erro_constraint = json.dumps(list_erro_constraint, default=int)
#parte 4
return render_template("interface.html",
#data = result,
result_parte1 = result_parte1,
result_incorrect_loss = result_incorrect_loss,
result_incorrect_hazard = result_incorrect_hazard,
result_incorrect_constraint = result_incorrect_constraint,
result_list_sim_loss = result_list_sim_loss,
result_list_sim_hazard = result_list_sim_hazard,
result_list_sim_constraint= result_list_sim_constraint,
result_list_erro_loss = result_list_erro_loss,
result_list_erro_hazard = result_list_erro_hazard,
result_list_erro_constraint = result_list_erro_constraint
)
@app.route('/')
def homepage():
return render_template('index.html', name='', file = None)
@app.route('/interface3', methods = ['POST'])#para teste da hp
def teste():
return render_template('interface.html', name='', file = None)
@app.route('/interface', methods = ['POST'])#/success
def success():
if request.method == 'POST':
f = request.files['file']
#df = pd.read_csv(f, names=['req','label'], sep=',', header=None, on_bad_lines='skip')
df = pd.read_csv(f, sep=',', header=None, on_bad_lines='skip')
#print(len(df.columns))
if(len(df.columns)==1):
df.columns = ['req']
elif(len(df.columns)>1):
df.columns = ['req','label']
df.insert(0, 'id', range(0, 0 + len(df)))
#result = df.to_json(orient="records")
############################################# FIM
#f.save(f.filename)
return inferencia(df)
def parse_text(text):
sentences = []
split = text.split('\n')
for item in split:
item = item.replace('\r','')
if item!= '':
sentences.append(item)
return sentences
@app.route('/interface2', methods = ['POST'])
def success2():
if request.method == 'POST':
form_loss = request.form['text_loss']
form_loss = parse_text(form_loss)
form_hazard = request.form['text_hazard']
form_hazard = parse_text(form_hazard)
form_constraint = request.form['text_constraint']
form_constraint = parse_text(form_constraint)
#fill_loss = ['loss' for x in range(len(form_loss))]
fill_loss = ['loss'] * len(form_loss)
fill_hazard = ['hazard'] * len(form_hazard)
fill_constraint = ['constraint'] * len(form_constraint)
dict_loss = {'req': form_loss, 'label': fill_loss}
dict_hazard = {'req': form_hazard, 'label': fill_hazard}
dict_constraint = {'req': form_constraint, 'label': fill_constraint}
df = pd.DataFrame(dict_loss)
df = pd.concat([df, pd.DataFrame(dict_hazard)])
df = pd.concat([df, pd.DataFrame(dict_constraint)])
df.insert(0, 'id', range(0, 0 + len(df)))
df.reset_index(drop=True, inplace=True)
return inferencia(df)
#return render_template("interface.html", name='', file = None)