File size: 2,688 Bytes
0307f7f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import numpy as np
import transformers
from sklearn import metrics
import pandas as pd
import streamlit as st
def ignitor_load():
    dataj=pd.read_json('tinyignitorfile.json')
    return dataj

def appendor(thex):
    gaa=ignitor_load()
    shortt=gaa.loc[:21,['text','index']]
    shortt.loc[21,'text']=thex
    return shortt



tokenizerr = transformers.DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
modell = transformers.TFDistilBertModel.from_pretrained('distilbert-base-uncased')


encod=[]
def allll(df):
    for i in range(len(df)):
        v=df.loc[i,'text']
        embed=np.array(np.array(modell(np.array(tokenizerr.encode(v))[np.newaxis,:])[0][0][1:-1]).mean(0))
        encod.append(embed)
#allll(shortt)


    
    labs = {}
    
    labs["SALARY"] = ['underpay','underpaid','overpay','overpaying','payments','wage','payroll','pay','paycheck']
    labs["COLLEAGUES"] = ['colleague','employee','staff' ,'coworker','co-worker','colleagues']
    labs["SUPERVISION"] = ['boss','supervisors','manager','supervisor']
    labs["TIMEDAY"] = ['monday','weekday','day','weekend']
    labs["TIMEDAYNOMONDAY"] = ['weekday','day','weekend']
    emblabs={}
    emblabss=[]
    keyy=[]
    for key,v in labs.items():
        keyy.append(key)
        embed=np.array(np.array(modell(np.array(tokenizerr.encode(v))[np.newaxis,:])[0][0][1:-1]).mean(0))
        emblabss.append(embed)
        
    for i in range(len(keyy)):
        emblabs[keyy[i]] = emblabss[i]
        
    hamme=[]
    for a,z in emblabs.items():
        jj=z.reshape(-1, 1)
        hamme.append(jj)
        
        
    sim=[]
    for i in range(len(hamme)):
        zz=metrics.pairwise.cosine_similarity(encod, hamme[i].T)
        sim.append(zz)
        
    sim=np.array(sim)
    cyr1=st.secrets["cyr1"]
    cyr1=float(cyr1)
    cyr2=st.secrets["cyr2"]
    cyr2=float(cyr2)
    cyr3=st.secrets["cyr3"]
    cyr3=float(cyr3)
    cyr4=st.secrets["cyr4"]
    cyr4=float(cyr4)
    cyr5=st.secrets["cyr5"]
    cyr5=float(cyr5)

    referirv=[cyr1,cyr2,cyr3,cyr4,cyr5]
    meanss=[]
    labels = list(emblabs.keys())
    for i in range(len(sim)):
        sim[i] = sim[i] / sum(sim[i])
        meanss.append(sim[i].mean())
    zarayeb = [ii / jj for ii, jj in zip(referirv, meanss)]
    
    for i in range(len(sim)):
    
        sim[i] = (sim[i])*zarayeb[i]
    
    threshhold=st.secrets["threshhold"]
    threshhold=float(threshhold)    
    tags=[]
    for j in range(len(sim[0])):
        if np.amax([sim[:,j,0]]) <= threshhold:
            label='None'
            tags.append(label)
        else:
            label=np.argmax([sim[:,j,0]])
            tags.append(label)
            
    return tags[-1]