Spaces:
Sleeping
Sleeping
from flask import render_template, request, redirect | |
from app import app | |
from fileinput import filename | |
import pandas as pd | |
from functions import * | |
import json | |
#{% autoescape off %}#codigo dentro executa html | |
#{% endautoescape %} | |
#https://jsfiddle.net/onury/kBQdS/ cromo criar lista js | |
#definicao de variaveis | |
#device = 'cuda' if cuda.is_available() else 'cpu' | |
device = 'cpu' | |
torch.manual_seed(0) | |
# sub_loss = ['loss', 'newaccidentl', 'newreescreverl', 'newcondl', 'newnotl', 'environment'] #10->5 | |
# sub_hazard = ['hazard', 'newreescreverh', 'newaccidenth', 'fail', 'newnoth', 'newcondh'] #11->5 | |
# sub_constraint = ['prevent', 'recommendation', 'mitigate', 'detect', 'newnotc', 'newreescreverc'] #5->3 | |
step1_labels = ['loss', 'hazard', 'constraint'] | |
step2_labels = ['correct', 'incorrect'] | |
step3_labels = ['rewrite', 'not', 'condition', 'accident', 'correct'] | |
# sub_loss_incorreto = sub_loss.copy() | |
# sub_loss_incorreto.remove('loss') | |
# sub_hazard_incorreto = sub_hazard.copy() | |
# sub_hazard_incorreto.remove('hazard') | |
# sub_constraint_incorreto = sub_constraint.copy() | |
# for item in ['prevent', 'detect', 'mitigate']: | |
# sub_constraint_incorreto.remove(item) | |
def label_to_int(df, name, labels): | |
for i in range(len(df[name])): | |
df.loc[i,name] = labels.index(df.loc[i,name]) | |
#path = './models/8020/' | |
#path = './models/7030/experimental/menos_erros1/' | |
path = 'andreyunic23/' | |
#carregar modelos | |
tokenizer = AutoTokenizer.from_pretrained('google-bert/bert-base-uncased') | |
#modelo 1 | |
num_label1 = 3#ok | |
model_path1 = path+'beds_step1'#ok | |
#tokenizer = AutoTokenizer.from_pretrained(model_path1)#ok | |
model_step1 = AutoModelForSequenceClassification.from_pretrained(model_path1, num_labels=num_label1).to(device)#ok | |
#modelos 2 | |
num_label2 = 2 | |
model_path2l = path+'beds_step2_loss' | |
model_path2h = path+'beds_step2_hazard' | |
model_path2c = path+'beds_step2_constraint' | |
#tokenizer_loss = AutoTokenizer.from_pretrained(model_path2l) | |
#tokenizer_hazard = AutoTokenizer.from_pretrained(model_path2h) | |
#tokenizer_constraint = AutoTokenizer.from_pretrained(model_path2c) | |
model_step2_loss = AutoModelForSequenceClassification.from_pretrained(model_path2l, num_labels=num_label2).to(device)#ok | |
model_step2_hazard = AutoModelForSequenceClassification.from_pretrained(model_path2h, num_labels=num_label2).to(device)#ok | |
model_step2_constraint = AutoModelForSequenceClassification.from_pretrained(model_path2c, num_labels=num_label2).to(device)#ok | |
#labels_corretos = ['loss', 'hazard', 'prevent', 'detect', 'mitigate'] | |
labels_corretos = ['loss', 'hazard', 'constraint'] | |
# #modelo 3 | |
# #num_label = x | |
model_path3l = path+'beds_step3_loss' | |
model_path3h = path+'beds_step3_hazard' | |
model_path3c = path+'beds_step3_constraint' | |
#tokenizer_loss_incorreto = AutoTokenizer.from_pretrained(model_path3l) | |
#tokenizer_hazard_incorreto = AutoTokenizer.from_pretrained(model_path3h) | |
#tokenizer_constraint_incorreto = AutoTokenizer.from_pretrained(model_path3c) | |
model_step3_loss = AutoModelForSequenceClassification.from_pretrained(model_path3l, num_labels=4).to(device) | |
model_step3_hazard = AutoModelForSequenceClassification.from_pretrained(model_path3h, num_labels=4).to(device) | |
model_step3_constraint = AutoModelForSequenceClassification.from_pretrained(model_path3c, num_labels=2).to(device) | |
#modelo 4 | |
model_path4 = path+'beds_step4' | |
model_step4 = SentenceTransformer(model_path4) | |
########era x_loss_correto!!!!!! errei | |
correct_example_path = "./datasets/" | |
#correct_example_path = "/home/ATOunic/mysite/datasets/" | |
correct_loss_df = pd.read_csv(correct_example_path+'correct_loss_reference.csv') | |
correct_hazard_df = pd.read_csv(correct_example_path+'correct_hazard_reference.csv') | |
correct_constraint_df = pd.read_csv(correct_example_path+'correct_constraint_reference.csv') | |
examples_correct_loss = format_examples(correct_loss_df.squeeze().tolist())#to_list())#['req'] | |
examples_correct_hazard = format_examples(correct_hazard_df.squeeze().tolist())#.to_list())#['req'] | |
examples_correct_constraint = format_examples(correct_constraint_df.squeeze().tolist())#.to_list())#['req'] | |
def inferencia(df): | |
result_parte1 = [] | |
result_incorrect_loss = [] | |
result_incorrect_hazard = [] | |
result_incorrect_constraint = [] | |
result_list_sim_loss = [] | |
result_list_sim_hazard = [] | |
result_list_sim_constraint= [] | |
result_list_erro_loss = [] | |
result_list_erro_hazard = [] | |
result_list_erro_constraint = [] | |
x_test = df['req'].to_list() | |
if(len(df.columns)==2): | |
input_type = 'unlabeled' | |
pass | |
if(len(df.columns)>2): | |
y_test = convert_label(df['label']) | |
#print('##############################################')#,type(y_test[1])) | |
input_type = 'labeled' | |
#teste parte 1 | |
with torch.no_grad(): | |
encodings = tokenizer(x_test, truncation=True, padding='max_length', max_length=512,return_tensors="pt") | |
results = model_step1(encodings['input_ids'].to(device),encodings['attention_mask'].to(device))#.to(device) | |
predictions = np.argmax(results.logits.cpu(), axis=-1) | |
####em vez de printar, guardar as predicoes numa lista [id, sent, orig, pred] | |
### nao sei se eu coloco probabilidade. quando a probabilidade do top é menor que 70%, seja um "bom" mal sinal que deve ser levantado. por enquanto | |
if(input_type=='labeled'): | |
df_parte1 = df_with_pred(y_test,predictions,df) | |
elif(input_type=='unlabeled'): | |
df['label'] = predictions.numpy().tolist() | |
df_parte1 = df_with_pred(predictions.numpy().tolist(),predictions,df) | |
result_parte1 = df_parte1.to_json(orient="records", default_handler=str) | |
#carregar modelo 2 | |
#organize predictions: em vez de df, apenas lista de sentencas | |
##posso fazer ver. com rotulo e sem rotulo | |
#list_classif_loss, list_classif_hazard, list_classif_constraint = organize_predictions_list(predictions, df_parte1) #return df[id, req] | |
list_loss = df[df['label'] == 0]#['req']#.to_list() | |
list_hazard = df[df['label'] == 1]#['req']#.to_list() | |
#print(list_hazard) | |
list_constraint = df[df['label'] == 2]#['req']#.to_list() | |
#teste parte 2 loss | |
#x_loss = list_loss['req'].to_list() | |
#print('x_loss=',x_loss) | |
if list_loss['req'].to_list(): | |
with torch.no_grad(): | |
test_loss = tokenizer(list_loss['req'].to_list(), truncation=True, padding='max_length', max_length=512,return_tensors="pt") | |
results_loss = model_step2_loss(test_loss['input_ids'].to(device),test_loss['attention_mask'].to(device))#.to(device) | |
predictions_loss = np.argmax(results_loss.logits.cpu(), axis=-1) | |
incorrect_loss = get_incorrect(predictions_loss, list_loss) #return df:[id,req] | |
result_incorrect_loss = incorrect_loss.to_json(orient='records', default_handler=str) | |
list_sim_loss = check_similarity_return2(incorrect_loss, examples_correct_loss, model_step4) | |
result_list_sim_loss = json.dumps(list_sim_loss, default=int) | |
# #teste parte 3 loss | |
list_incorrect_loss = incorrect_loss['req'].to_list() | |
if list_incorrect_loss: | |
with torch.no_grad(): | |
test_loss_incorrect = tokenizer(list_incorrect_loss, truncation=True, padding='max_length', max_length=512,return_tensors="pt") | |
results_loss_incorrect = model_step3_loss(test_loss_incorrect['input_ids'].to(device),test_loss_incorrect['attention_mask'].to(device)) | |
list_erro_loss = list_erro_with_pred(results_loss_incorrect, incorrect_loss, step3_labels) | |
result_list_erro_loss = json.dumps(list_erro_loss, default=int) | |
#teste parte 2 hazard | |
#x_hazard = list_classif_hazard['req'].to_list() | |
if list_hazard['req'].to_list(): | |
with torch.no_grad(): | |
test_hazard = tokenizer(list_hazard['req'].to_list(), truncation=True, padding='max_length', max_length=512,return_tensors="pt") | |
results_hazard = model_step2_hazard(test_hazard['input_ids'].to(device),test_hazard['attention_mask'].to(device)) | |
predictions_hazard = np.argmax(results_hazard.logits.cpu(), axis=-1) | |
incorrect_hazard = get_incorrect(predictions_hazard, list_hazard) | |
result_incorrect_hazard = incorrect_hazard.to_json(orient='records', default_handler=str) | |
list_sim_hazard = check_similarity_return2(incorrect_hazard, examples_correct_hazard, model_step4) | |
result_list_sim_hazard = json.dumps(list_sim_hazard, default=int) | |
#teste parte 3 hazard | |
list_incorrect_hazard = incorrect_hazard['req'].to_list() | |
if list_incorrect_hazard: | |
with torch.no_grad(): | |
test_hazard_incorrect = tokenizer(list_incorrect_hazard, truncation=True, padding='max_length', max_length=512,return_tensors="pt") | |
results_hazard_incorrect = model_step3_hazard(test_hazard_incorrect['input_ids'].to(device),test_hazard_incorrect['attention_mask'].to(device)) | |
list_erro_hazard = list_erro_with_pred(results_hazard_incorrect, incorrect_hazard, step3_labels) | |
result_list_erro_hazard = json.dumps(list_erro_hazard, default=int) | |
#teste parte 2 constraint | |
#x_constraint = list_classif_constraint['req'].to_list() | |
if list_constraint['req'].to_list(): | |
with torch.no_grad(): | |
test_constraint = tokenizer(list_constraint['req'].to_list(), truncation=True, padding='max_length', max_length=512,return_tensors="pt") | |
results_constraint = model_step2_constraint(test_constraint['input_ids'].to(device),test_constraint['attention_mask'].to(device)) | |
predictions_constraint = np.argmax(results_constraint.logits.cpu(), axis=-1) | |
incorrect_constraint = get_incorrect(predictions_constraint, list_constraint) | |
result_incorrect_constraint = incorrect_constraint.to_json(orient='records', default_handler=str) | |
list_sim_constraint = check_similarity_return2(incorrect_constraint, examples_correct_constraint, model_step4) | |
result_list_sim_constraint = json.dumps(list_sim_constraint, default=int) | |
#teste parte 3 constraint | |
list_incorrect_constraint = incorrect_constraint['req'].to_list() | |
if list_incorrect_constraint: | |
with torch.no_grad(): | |
test_constraint_incorrect = tokenizer( list_incorrect_constraint, truncation=True, padding='max_length', max_length=512,return_tensors="pt") | |
results_constraint_incorrect = model_step3_constraint(test_constraint_incorrect['input_ids'].to(device),test_constraint_incorrect['attention_mask'].to(device)) | |
list_erro_constraint = list_erro_with_pred(results_constraint_incorrect, incorrect_constraint, step3_labels) | |
result_list_erro_constraint = json.dumps(list_erro_constraint, default=int) | |
#parte 4 | |
return render_template("interface.html", | |
#data = result, | |
result_parte1 = result_parte1, | |
result_incorrect_loss = result_incorrect_loss, | |
result_incorrect_hazard = result_incorrect_hazard, | |
result_incorrect_constraint = result_incorrect_constraint, | |
result_list_sim_loss = result_list_sim_loss, | |
result_list_sim_hazard = result_list_sim_hazard, | |
result_list_sim_constraint= result_list_sim_constraint, | |
result_list_erro_loss = result_list_erro_loss, | |
result_list_erro_hazard = result_list_erro_hazard, | |
result_list_erro_constraint = result_list_erro_constraint | |
) | |
def homepage(): | |
return render_template('index.html', name='', file = None) | |
#para teste da hp | |
def teste(): | |
return render_template('interface.html', name='', file = None) | |
#/success | |
def success(): | |
if request.method == 'POST': | |
f = request.files['file'] | |
#df = pd.read_csv(f, names=['req','label'], sep=',', header=None, on_bad_lines='skip') | |
df = pd.read_csv(f, sep=',', header=None, on_bad_lines='skip') | |
#print(len(df.columns)) | |
if(len(df.columns)==1): | |
df.columns = ['req'] | |
elif(len(df.columns)>1): | |
df.columns = ['req','label'] | |
df.insert(0, 'id', range(0, 0 + len(df))) | |
#result = df.to_json(orient="records") | |
############################################# FIM | |
#f.save(f.filename) | |
return inferencia(df) | |
def parse_text(text): | |
sentences = [] | |
split = text.split('\n') | |
for item in split: | |
item = item.replace('\r','') | |
if item!= '': | |
sentences.append(item) | |
return sentences | |
def success2(): | |
if request.method == 'POST': | |
form_loss = request.form['text_loss'] | |
form_loss = parse_text(form_loss) | |
form_hazard = request.form['text_hazard'] | |
form_hazard = parse_text(form_hazard) | |
form_constraint = request.form['text_constraint'] | |
form_constraint = parse_text(form_constraint) | |
#fill_loss = ['loss' for x in range(len(form_loss))] | |
fill_loss = ['loss'] * len(form_loss) | |
fill_hazard = ['hazard'] * len(form_hazard) | |
fill_constraint = ['constraint'] * len(form_constraint) | |
dict_loss = {'req': form_loss, 'label': fill_loss} | |
dict_hazard = {'req': form_hazard, 'label': fill_hazard} | |
dict_constraint = {'req': form_constraint, 'label': fill_constraint} | |
df = pd.DataFrame(dict_loss) | |
df = pd.concat([df, pd.DataFrame(dict_hazard)]) | |
df = pd.concat([df, pd.DataFrame(dict_constraint)]) | |
df.insert(0, 'id', range(0, 0 + len(df))) | |
df.reset_index(drop=True, inplace=True) | |
return inferencia(df) | |
#return render_template("interface.html", name='', file = None) |