Spaces:
Running
Running
import numpy as np | |
import transformers | |
from sklearn import metrics | |
import pandas as pd | |
import streamlit as st | |
def ignitor_load(): | |
dataj=pd.read_json('tinyignitorfile.json') | |
return dataj | |
def appendor(thex): | |
gaa=ignitor_load() | |
shortt=gaa.loc[:21,['text','index']] | |
shortt.loc[21,'text']=thex | |
return shortt | |
tokenizerr = transformers.DistilBertTokenizer.from_pretrained('distilbert-base-uncased') | |
modell = transformers.TFDistilBertModel.from_pretrained('distilbert-base-uncased') | |
encod=[] | |
def allll(df): | |
for i in range(len(df)): | |
v=df.loc[i,'text'] | |
embed=np.array(np.array(modell(np.array(tokenizerr.encode(v))[np.newaxis,:])[0][0][1:-1]).mean(0)) | |
encod.append(embed) | |
#allll(shortt) | |
labs = {} | |
labs["SALARY"] = ['underpay','underpaid','overpay','overpaying','payments','wage','payroll','pay','paycheck'] | |
labs["COLLEAGUES"] = ['colleague','employee','staff' ,'coworker','co-worker','colleagues'] | |
labs["SUPERVISION"] = ['boss','supervisors','manager','supervisor'] | |
labs["TIMEDAY"] = ['monday','weekday','day','weekend'] | |
labs["TIMEDAYNOMONDAY"] = ['weekday','day','weekend'] | |
emblabs={} | |
emblabss=[] | |
keyy=[] | |
for key,v in labs.items(): | |
keyy.append(key) | |
embed=np.array(np.array(modell(np.array(tokenizerr.encode(v))[np.newaxis,:])[0][0][1:-1]).mean(0)) | |
emblabss.append(embed) | |
for i in range(len(keyy)): | |
emblabs[keyy[i]] = emblabss[i] | |
hamme=[] | |
for a,z in emblabs.items(): | |
jj=z.reshape(-1, 1) | |
hamme.append(jj) | |
sim=[] | |
for i in range(len(hamme)): | |
zz=metrics.pairwise.cosine_similarity(encod, hamme[i].T) | |
sim.append(zz) | |
sim=np.array(sim) | |
cyr1=st.secrets["cyr1"] | |
cyr1=float(cyr1) | |
cyr2=st.secrets["cyr2"] | |
cyr2=float(cyr2) | |
cyr3=st.secrets["cyr3"] | |
cyr3=float(cyr3) | |
cyr4=st.secrets["cyr4"] | |
cyr4=float(cyr4) | |
cyr5=st.secrets["cyr5"] | |
cyr5=float(cyr5) | |
referirv=[cyr1,cyr2,cyr3,cyr4,cyr5] | |
meanss=[] | |
labels = list(emblabs.keys()) | |
for i in range(len(sim)): | |
sim[i] = sim[i] / sum(sim[i]) | |
meanss.append(sim[i].mean()) | |
zarayeb = [ii / jj for ii, jj in zip(referirv, meanss)] | |
for i in range(len(sim)): | |
sim[i] = (sim[i])*zarayeb[i] | |
threshhold=st.secrets["threshhold"] | |
threshhold=float(threshhold) | |
tags=[] | |
for j in range(len(sim[0])): | |
if np.amax([sim[:,j,0]]) <= threshhold: | |
label='None' | |
tags.append(label) | |
else: | |
label=np.argmax([sim[:,j,0]]) | |
tags.append(label) | |
return tags[-1] |