Spaces:
Runtime error
Runtime error
import os | |
import re | |
import contractions | |
import unicodedata | |
import spacy | |
import keras | |
import requests | |
import shutil | |
import json | |
import gradio as gr | |
import pandas as pd | |
import numpy as np | |
from PIL import Image | |
from keras import backend as K | |
from keras.utils.data_utils import pad_sequences | |
from gensim.models import Word2Vec | |
from gensim.models.callbacks import CallbackAny2Vec | |
import nltk | |
nltk.download('punkt') | |
nltk.download('stopwords') | |
os.system('python -m spacy download en_core_web_sm')] | |
import en_core_web_sm | |
nlp = en_core_web_sm.load() | |
def recall_m(y_true, y_pred): | |
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) | |
possible_positives = K.sum(K.round(K.clip(y_true, 0, 1))) | |
recall = true_positives / (possible_positives + K.epsilon()) | |
return recall | |
def precision_m(y_true, y_pred): | |
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) | |
predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1))) | |
precision = true_positives / (predicted_positives + K.epsilon()) | |
return precision | |
def f1_m(y_true, y_pred): | |
precision = precision_m(y_true, y_pred) | |
recall = recall_m(y_true, y_pred) | |
return 2*((precision*recall)/(precision+recall+K.epsilon())) | |
#initialise callback class | |
class callback(CallbackAny2Vec): | |
""" | |
Print the loss value after each epoch | |
""" | |
def __init__(self): | |
self.epoch = 0 | |
#gensim loss is cumulative, so we record previous values to print | |
self.loss_previous_step = 0 | |
def on_epoch_end(self, model): | |
loss = model.get_latest_training_loss() | |
if self.epoch % 100 == 0: | |
print('Loss after epoch {}: {}'.format(self.epoch, loss-self.loss_previous_step)) | |
self.epoch+= 1 | |
self.loss_previous_step = loss | |
def spacy_lemmatize_text(text): | |
text = nlp(text) | |
text = ' '.join([word.lemma_ if word.lemma_ != '-PRON-' else word.text for word in text]) | |
return text | |
def remove_accented_chars(text): | |
text = unicodedata.normalize('NFC', text).encode('ascii', 'ignore').decode('utf-8', 'ignore') | |
return text | |
def remove_special_characters(text, remove_digits=False): | |
pattern = r'[^a-zA-Z0-9\s]' if not remove_digits else r'[^a-zA-Z\s]' | |
text = re.sub(pattern, '', text) | |
return text | |
def remove_stopwords(text, is_lower_case=False, stopwords=None): | |
if not stopwords: | |
stopwords = nltk.corpus.stopwords.words('english') | |
tokens = nltk.word_tokenize(text) | |
tokens = [token.strip() for token in tokens] | |
if is_lower_case: | |
filtered_tokens = [token for token in tokens if token not in stopwords] | |
else: | |
filtered_tokens = [token for token in tokens if token.lower() not in stopwords] | |
filtered_text = ' '.join(filtered_tokens) | |
return filtered_text | |
def pre_process(): | |
opo_texto_sem_caracteres_especiais = (remove_accented_chars(sentence)) | |
sentenceExpanded = contractions.fix(opo_texto_sem_caracteres_especiais) | |
sentenceWithoutPunctuation = remove_special_characters(sentenceExpanded , remove_digits=True) | |
sentenceLowered = sentenceWithoutPunctuation.lower() | |
sentenceLemmatized = spacy_lemmatize_text(sentenceLowered) | |
sentenceLemStopped = remove_stopwords(sentenceLemmatized, is_lower_case=False) | |
return nltk.word_tokenize(sentenceLemStopped) | |
def classify(new_column = True): | |
sentenceWords = json.loads(sentence.replace("'",'"')) | |
aux_vector = [] | |
for word in sentenceWords: | |
aux_vector.append(reloaded_w2v_model.wv[word]) | |
w2vWords = [] | |
w2vWords.append(aux_vector) | |
MCTIinput_vector = pad_sequences(w2vWords, maxlen=2726, padding='pre') | |
value = reconstructed_model_CNN.predict(MCTIinput_vector)[0] | |
if value >= 0.5: | |
return Image.open(r"elegivel.png") | |
else: | |
return Image.open(r"inelegivel.png") | |
def gen_output(data): | |
return "output.xlsx" | |
reloaded_w2v_model = Word2Vec.load('word2vec_xp8.model') | |
reconstructed_model_CNN = keras.models.load_model("best weights CNN.h5", | |
custom_objects={'f1_m':f1_m, | |
"precision_m":precision_m, | |
"recall_m":recall_m}) | |
def app(operacao, resultado, dados): | |
boxes = {'Color': ['Green','Green','Green','Blue','Blue','Red','Red','Red'], | |
'Shape': ['Rectangle','Rectangle','Square','Rectangle','Square','Square','Square','Rectangle'], | |
'Price': [10,15,5,5,10,15,15,5] | |
} | |
df = pd.DataFrame(boxes, columns= ['Color','Shape','Price']) | |
df.to_excel("output.xlsx") | |
if operacao === "Pré-processamento + Classificação" : | |
pre_process() | |
classify(resultado == "Nova Coluna") | |
output = gen_output() | |
return output | |
elif operacao === "Apenas Pré-processamento" : | |
pre_process() | |
output = gen_output() | |
return output | |
elif operacao === "Apenas Classificação" : | |
classify(resultado == "Nova Coluna") | |
output = gen_output() | |
return output | |
iface = gr.Interface( | |
fn=app, | |
inputs=[ | |
gr.Radio(["Pré-processamento + Classificação", "Apenas Pré-processamento", "Apenas Classificação"]), | |
gr.Radio(["Nova Coluna", "Filtrar planilha"]), | |
"file" | |
], | |
outputs="file" | |
) | |
iface.launch() |