In [None]:
import os; os.chdir('..')
from transformers import pipeline

classifier = pipeline("text-classification", model="finetuned_entity_categorical_classification/checkpoint-23355", device="cuda")

In [None]:
classifier(
 'cat ear shaped headphones'
)

In [None]:
classifier(
 'catfood'
)

In [None]:
classifier(
 'headphones'
)

## Inference Without Pipes

In [1]:
import os; os.chdir('..')
%pwd


'/home/ubuntu/SentenceStructureComparision'

In [2]:
import json
label2id= json.load(
 open('data/categories_refined.json', 'r')
)
id2label= {}
for key in label2id.keys():
 id2label[label2id[key]] = key
 

In [3]:
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
import torch
from torch.nn import functional as F
import numpy as np



model_name= "finetuned_entity_categorical_classification/checkpoint-3338"
tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForSequenceClassification.from_pretrained(model_name)


 from .autonotebook import tqdm as notebook_tqdm


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [4]:
# probabilities = 1 / (1 + np.exp(-logit_score))
def logit2prob(logit):
 # odds =np.exp(logit)
 # prob = odds / (1 + odds)
 prob= 1/(1+ np.exp(-logit))
 return np.round(prob, 3)




def predict(sentence: str):
 '''
 Returns (probability_human, probability_AI, label)
 '''
 inputs = tokenizer(sentence, return_tensors="pt")
 with torch.no_grad():
 logits = model(**inputs).logits
 
 # print("logits: ", logits)
 predicted_class_id = logits.argmax().item()
 
 # get probabilities using softmax from logit score and convert it to numpy array
 probabilities_scores = F.softmax(logits, dim = -1).numpy()[0]
 individual_probabilities_scores = logit2prob(logits.numpy()[0])
 
 
 d= {}
 d_ind= {}
 # d_ind= {}
 for i in range(27):
 # print(f"P({id2label[i]}): {probabilities_scores[i]}")
 # d[f'P({id2label[i]})']= format(probabilities_scores[i], '.2f')
 d[f'P({id2label[i]})']= round(probabilities_scores[i], 3)
 
 
 for i in range(27):
 # print(f"P({id2label[i]}): {probabilities_scores[i]}")
 # d[f'P({id2label[i]})']= format(probabilities_scores[i], '.2f')
 d_ind[f'P({id2label[i]})']= (individual_probabilities_scores[i])
 
 

 print("Predicted Class: ", model.config.id2label[predicted_class_id], f"\nprobabilities_scores: {individual_probabilities_scores[predicted_class_id]}\n")
 return d_ind
 
 
 

In [5]:
predict("cat ear headphones")

Predicted Class: Computers_and_Electronics 
probabilities_scores: 1.0



{'P(Hobbies_and_Leisure)': 0.071,
 'P(News)': 0.004,
 'P(Science)': 0.02,
 'P(Autos_and_Vehicles)': 0.023,
 'P(Health)': 0.012,
 'P(Pets_and_Animals)': 0.005,
 'P(Adult)': 0.062,
 'P(Computers_and_Electronics)': 1.0,
 'P(Online Communities)': 0.076,
 'P(Beauty_and_Fitness)': 0.013,
 'P(People_and_Society)': 0.0,
 'P(Business_and_Industrial)': 0.003,
 'P(Reference)': 0.043,
 'P(Shopping)': 0.233,
 'P(Travel_and_Transportation)': 0.003,
 'P(Food_and_Drink)': 0.013,
 'P(Law_and_Government)': 0.06,
 'P(Books_and_Literature)': 0.004,
 'P(Finance)': 0.035,
 'P(Games)': 0.044,
 'P(Home_and_Garden)': 0.013,
 'P(Jobs_and_Education)': 0.003,
 'P(Arts_and_Entertainment)': 0.012,
 'P(Sensitive Subjects)': 0.003,
 'P(Real Estate)': 0.027,
 'P(Internet_and_Telecom)': 0.045,
 'P(Sports)': 0.016}

In [6]:
predict('catfood')

Predicted Class: Food_and_Drink 
probabilities_scores: 1.0



{'P(Hobbies_and_Leisure)': 0.025,
 'P(News)': 0.137,
 'P(Science)': 0.023,
 'P(Autos_and_Vehicles)': 0.01,
 'P(Health)': 0.272,
 'P(Pets_and_Animals)': 0.148,
 'P(Adult)': 0.005,
 'P(Computers_and_Electronics)': 0.089,
 'P(Online Communities)': 0.072,
 'P(Beauty_and_Fitness)': 0.105,
 'P(People_and_Society)': 0.005,
 'P(Business_and_Industrial)': 0.011,
 'P(Reference)': 0.011,
 'P(Shopping)': 0.037,
 'P(Travel_and_Transportation)': 0.016,
 'P(Food_and_Drink)': 1.0,
 'P(Law_and_Government)': 0.006,
 'P(Books_and_Literature)': 0.024,
 'P(Finance)': 0.013,
 'P(Games)': 0.044,
 'P(Home_and_Garden)': 0.012,
 'P(Jobs_and_Education)': 0.011,
 'P(Arts_and_Entertainment)': 0.161,
 'P(Sensitive Subjects)': 0.032,
 'P(Real Estate)': 0.006,
 'P(Internet_and_Telecom)': 0.009,
 'P(Sports)': 0.02}

In [37]:
predict("food for cats")

Predicted Class: Food_and_Drink 
probabilities_scores: 1.0



{'P(Hobbies_and_Leisure)': 0.048,
 'P(News)': 0.202,
 'P(Science)': 0.025,
 'P(Autos_and_Vehicles)': 0.095,
 'P(Health)': 0.094,
 'P(Pets_and_Animals)': 0.006,
 'P(Adult)': 0.016,
 'P(Computers_and_Electronics)': 0.129,
 'P(Online Communities)': 0.078,
 'P(Beauty_and_Fitness)': 0.122,
 'P(People_and_Society)': 0.008,
 'P(Business_and_Industrial)': 0.022,
 'P(Reference)': 0.014,
 'P(Shopping)': 0.046,
 'P(Travel_and_Transportation)': 0.024,
 'P(Food_and_Drink)': 1.0,
 'P(Law_and_Government)': 0.013,
 'P(Books_and_Literature)': 0.038,
 'P(Finance)': 0.026,
 'P(Games)': 0.091,
 'P(Home_and_Garden)': 0.025,
 'P(Jobs_and_Education)': 0.033,
 'P(Arts_and_Entertainment)': 0.233,
 'P(Sensitive Subjects)': 0.022,
 'P(Real Estate)': 0.005,
 'P(Internet_and_Telecom)': 0.003,
 'P(Sports)': 0.039}

In [38]:
predict('cat edible foods')

Predicted Class: Food_and_Drink 
probabilities_scores: 0.9980000257492065



{'P(Hobbies_and_Leisure)': 0.113,
 'P(News)': 0.037,
 'P(Science)': 0.024,
 'P(Autos_and_Vehicles)': 0.05,
 'P(Health)': 0.039,
 'P(Pets_and_Animals)': 0.444,
 'P(Adult)': 0.003,
 'P(Computers_and_Electronics)': 0.022,
 'P(Online Communities)': 0.12,
 'P(Beauty_and_Fitness)': 0.114,
 'P(People_and_Society)': 0.001,
 'P(Business_and_Industrial)': 0.008,
 'P(Reference)': 0.003,
 'P(Shopping)': 0.014,
 'P(Travel_and_Transportation)': 0.009,
 'P(Food_and_Drink)': 0.998,
 'P(Law_and_Government)': 0.005,
 'P(Books_and_Literature)': 0.006,
 'P(Finance)': 0.009,
 'P(Games)': 0.052,
 'P(Home_and_Garden)': 0.006,
 'P(Jobs_and_Education)': 0.005,
 'P(Arts_and_Entertainment)': 0.199,
 'P(Sensitive Subjects)': 0.033,
 'P(Real Estate)': 0.003,
 'P(Internet_and_Telecom)': 0.001,
 'P(Sports)': 0.123}

In [39]:
predict('feline ear shaped headphones')

Predicted Class: Computers_and_Electronics 
probabilities_scores: 1.0



{'P(Hobbies_and_Leisure)': 0.134,
 'P(News)': 0.002,
 'P(Science)': 0.027,
 'P(Autos_and_Vehicles)': 0.061,
 'P(Health)': 0.008,
 'P(Pets_and_Animals)': 0.006,
 'P(Adult)': 0.069,
 'P(Computers_and_Electronics)': 1.0,
 'P(Online Communities)': 0.16,
 'P(Beauty_and_Fitness)': 0.015,
 'P(People_and_Society)': 0.0,
 'P(Business_and_Industrial)': 0.003,
 'P(Reference)': 0.019,
 'P(Shopping)': 0.147,
 'P(Travel_and_Transportation)': 0.005,
 'P(Food_and_Drink)': 0.023,
 'P(Law_and_Government)': 0.115,
 'P(Books_and_Literature)': 0.007,
 'P(Finance)': 0.037,
 'P(Games)': 0.042,
 'P(Home_and_Garden)': 0.032,
 'P(Jobs_and_Education)': 0.003,
 'P(Arts_and_Entertainment)': 0.01,
 'P(Sensitive Subjects)': 0.003,
 'P(Real Estate)': 0.012,
 'P(Internet_and_Telecom)': 0.016,
 'P(Sports)': 0.015}

In [40]:
predict("apple ")

Predicted Class: Food_and_Drink 
probabilities_scores: 0.9909999966621399



{'P(Hobbies_and_Leisure)': 0.02,
 'P(News)': 0.017,
 'P(Science)': 0.008,
 'P(Autos_and_Vehicles)': 0.06,
 'P(Health)': 0.032,
 'P(Pets_and_Animals)': 0.004,
 'P(Adult)': 0.022,
 'P(Computers_and_Electronics)': 0.989,
 'P(Online Communities)': 0.056,
 'P(Beauty_and_Fitness)': 0.026,
 'P(People_and_Society)': 0.0,
 'P(Business_and_Industrial)': 0.008,
 'P(Reference)': 0.052,
 'P(Shopping)': 0.105,
 'P(Travel_and_Transportation)': 0.012,
 'P(Food_and_Drink)': 0.991,
 'P(Law_and_Government)': 0.007,
 'P(Books_and_Literature)': 0.009,
 'P(Finance)': 0.014,
 'P(Games)': 0.284,
 'P(Home_and_Garden)': 0.015,
 'P(Jobs_and_Education)': 0.017,
 'P(Arts_and_Entertainment)': 0.031,
 'P(Sensitive Subjects)': 0.014,
 'P(Real Estate)': 0.003,
 'P(Internet_and_Telecom)': 0.003,
 'P(Sports)': 0.021}

In [41]:
predict('apple iphone')

Predicted Class: Computers_and_Electronics 
probabilities_scores: 1.0



{'P(Hobbies_and_Leisure)': 0.054,
 'P(News)': 0.003,
 'P(Science)': 0.011,
 'P(Autos_and_Vehicles)': 0.122,
 'P(Health)': 0.01,
 'P(Pets_and_Animals)': 0.004,
 'P(Adult)': 0.054,
 'P(Computers_and_Electronics)': 1.0,
 'P(Online Communities)': 0.081,
 'P(Beauty_and_Fitness)': 0.016,
 'P(People_and_Society)': 0.0,
 'P(Business_and_Industrial)': 0.005,
 'P(Reference)': 0.064,
 'P(Shopping)': 0.224,
 'P(Travel_and_Transportation)': 0.006,
 'P(Food_and_Drink)': 0.172,
 'P(Law_and_Government)': 0.051,
 'P(Books_and_Literature)': 0.006,
 'P(Finance)': 0.025,
 'P(Games)': 0.138,
 'P(Home_and_Garden)': 0.03,
 'P(Jobs_and_Education)': 0.006,
 'P(Arts_and_Entertainment)': 0.008,
 'P(Sensitive Subjects)': 0.003,
 'P(Real Estate)': 0.006,
 'P(Internet_and_Telecom)': 0.004,
 'P(Sports)': 0.018}

In [42]:
predict(
 'razer kraken'
)

Predicted Class: Computers_and_Electronics 
probabilities_scores: 1.0



{'P(Hobbies_and_Leisure)': 0.077,
 'P(News)': 0.005,
 'P(Science)': 0.009,
 'P(Autos_and_Vehicles)': 0.077,
 'P(Health)': 0.015,
 'P(Pets_and_Animals)': 0.003,
 'P(Adult)': 0.073,
 'P(Computers_and_Electronics)': 1.0,
 'P(Online Communities)': 0.086,
 'P(Beauty_and_Fitness)': 0.022,
 'P(People_and_Society)': 0.0,
 'P(Business_and_Industrial)': 0.004,
 'P(Reference)': 0.021,
 'P(Shopping)': 0.203,
 'P(Travel_and_Transportation)': 0.003,
 'P(Food_and_Drink)': 0.241,
 'P(Law_and_Government)': 0.009,
 'P(Books_and_Literature)': 0.003,
 'P(Finance)': 0.029,
 'P(Games)': 0.195,
 'P(Home_and_Garden)': 0.044,
 'P(Jobs_and_Education)': 0.004,
 'P(Arts_and_Entertainment)': 0.013,
 'P(Sensitive Subjects)': 0.003,
 'P(Real Estate)': 0.012,
 'P(Internet_and_Telecom)': 0.004,
 'P(Sports)': 0.017}

In [43]:
predict("facebook")

Predicted Class: Online Communities 
probabilities_scores: 0.9990000128746033



{'P(Hobbies_and_Leisure)': 0.009,
 'P(News)': 0.037,
 'P(Science)': 0.014,
 'P(Autos_and_Vehicles)': 0.004,
 'P(Health)': 0.007,
 'P(Pets_and_Animals)': 0.048,
 'P(Adult)': 0.287,
 'P(Computers_and_Electronics)': 0.536,
 'P(Online Communities)': 0.999,
 'P(Beauty_and_Fitness)': 0.002,
 'P(People_and_Society)': 0.001,
 'P(Business_and_Industrial)': 0.002,
 'P(Reference)': 0.006,
 'P(Shopping)': 0.038,
 'P(Travel_and_Transportation)': 0.016,
 'P(Food_and_Drink)': 0.012,
 'P(Law_and_Government)': 0.024,
 'P(Books_and_Literature)': 0.059,
 'P(Finance)': 0.001,
 'P(Games)': 0.025,
 'P(Home_and_Garden)': 0.377,
 'P(Jobs_and_Education)': 0.018,
 'P(Arts_and_Entertainment)': 0.028,
 'P(Sensitive Subjects)': 0.072,
 'P(Real Estate)': 0.002,
 'P(Internet_and_Telecom)': 0.003,
 'P(Sports)': 0.006}

In [44]:
predict('apple iphone')

Predicted Class: Computers_and_Electronics 
probabilities_scores: 1.0



{'P(Hobbies_and_Leisure)': 0.054,
 'P(News)': 0.003,
 'P(Science)': 0.011,
 'P(Autos_and_Vehicles)': 0.122,
 'P(Health)': 0.01,
 'P(Pets_and_Animals)': 0.004,
 'P(Adult)': 0.054,
 'P(Computers_and_Electronics)': 1.0,
 'P(Online Communities)': 0.081,
 'P(Beauty_and_Fitness)': 0.016,
 'P(People_and_Society)': 0.0,
 'P(Business_and_Industrial)': 0.005,
 'P(Reference)': 0.064,
 'P(Shopping)': 0.224,
 'P(Travel_and_Transportation)': 0.006,
 'P(Food_and_Drink)': 0.172,
 'P(Law_and_Government)': 0.051,
 'P(Books_and_Literature)': 0.006,
 'P(Finance)': 0.025,
 'P(Games)': 0.138,
 'P(Home_and_Garden)': 0.03,
 'P(Jobs_and_Education)': 0.006,
 'P(Arts_and_Entertainment)': 0.008,
 'P(Sensitive Subjects)': 0.003,
 'P(Real Estate)': 0.006,
 'P(Internet_and_Telecom)': 0.004,
 'P(Sports)': 0.018}

In [45]:
predict('best vr')

Predicted Class: Computers_and_Electronics 
probabilities_scores: 1.0



{'P(Hobbies_and_Leisure)': 0.186,
 'P(News)': 0.003,
 'P(Science)': 0.009,
 'P(Autos_and_Vehicles)': 0.512,
 'P(Health)': 0.002,
 'P(Pets_and_Animals)': 0.002,
 'P(Adult)': 0.039,
 'P(Computers_and_Electronics)': 1.0,
 'P(Online Communities)': 0.061,
 'P(Beauty_and_Fitness)': 0.003,
 'P(People_and_Society)': 0.0,
 'P(Business_and_Industrial)': 0.001,
 'P(Reference)': 0.015,
 'P(Shopping)': 0.274,
 'P(Travel_and_Transportation)': 0.002,
 'P(Food_and_Drink)': 0.009,
 'P(Law_and_Government)': 0.058,
 'P(Books_and_Literature)': 0.002,
 'P(Finance)': 0.033,
 'P(Games)': 0.151,
 'P(Home_and_Garden)': 0.027,
 'P(Jobs_and_Education)': 0.002,
 'P(Arts_and_Entertainment)': 0.005,
 'P(Sensitive Subjects)': 0.001,
 'P(Real Estate)': 0.035,
 'P(Internet_and_Telecom)': 0.001,
 'P(Sports)': 0.008}

In [46]:
predict("best vr")

Predicted Class: Computers_and_Electronics 
probabilities_scores: 1.0



{'P(Hobbies_and_Leisure)': 0.186,
 'P(News)': 0.003,
 'P(Science)': 0.009,
 'P(Autos_and_Vehicles)': 0.512,
 'P(Health)': 0.002,
 'P(Pets_and_Animals)': 0.002,
 'P(Adult)': 0.039,
 'P(Computers_and_Electronics)': 1.0,
 'P(Online Communities)': 0.061,
 'P(Beauty_and_Fitness)': 0.003,
 'P(People_and_Society)': 0.0,
 'P(Business_and_Industrial)': 0.001,
 'P(Reference)': 0.015,
 'P(Shopping)': 0.274,
 'P(Travel_and_Transportation)': 0.002,
 'P(Food_and_Drink)': 0.009,
 'P(Law_and_Government)': 0.058,
 'P(Books_and_Literature)': 0.002,
 'P(Finance)': 0.033,
 'P(Games)': 0.151,
 'P(Home_and_Garden)': 0.027,
 'P(Jobs_and_Education)': 0.002,
 'P(Arts_and_Entertainment)': 0.005,
 'P(Sensitive Subjects)': 0.001,
 'P(Real Estate)': 0.035,
 'P(Internet_and_Telecom)': 0.001,
 'P(Sports)': 0.008}

In [47]:
predict("pa best views")

Predicted Class: Adult 
probabilities_scores: 0.7149999737739563



{'P(Hobbies_and_Leisure)': 0.684,
 'P(News)': 0.009,
 'P(Science)': 0.001,
 'P(Autos_and_Vehicles)': 0.004,
 'P(Health)': 0.001,
 'P(Pets_and_Animals)': 0.0,
 'P(Adult)': 0.715,
 'P(Computers_and_Electronics)': 0.274,
 'P(Online Communities)': 0.246,
 'P(Beauty_and_Fitness)': 0.003,
 'P(People_and_Society)': 0.001,
 'P(Business_and_Industrial)': 0.0,
 'P(Reference)': 0.0,
 'P(Shopping)': 0.022,
 'P(Travel_and_Transportation)': 0.001,
 'P(Food_and_Drink)': 0.002,
 'P(Law_and_Government)': 0.021,
 'P(Books_and_Literature)': 0.007,
 'P(Finance)': 0.003,
 'P(Games)': 0.012,
 'P(Home_and_Garden)': 0.178,
 'P(Jobs_and_Education)': 0.002,
 'P(Arts_and_Entertainment)': 0.01,
 'P(Sensitive Subjects)': 0.001,
 'P(Real Estate)': 0.026,
 'P(Internet_and_Telecom)': 0.0,
 'P(Sports)': 0.02}

In [52]:
predict(
 "best ac dharmashala in vrindavan"
)

Predicted Class: Hobbies_and_Leisure 
probabilities_scores: 0.9950000047683716



{'P(Hobbies_and_Leisure)': 0.995,
 'P(News)': 0.003,
 'P(Science)': 0.008,
 'P(Autos_and_Vehicles)': 0.026,
 'P(Health)': 0.003,
 'P(Pets_and_Animals)': 0.003,
 'P(Adult)': 0.075,
 'P(Computers_and_Electronics)': 0.127,
 'P(Online Communities)': 0.156,
 'P(Beauty_and_Fitness)': 0.026,
 'P(People_and_Society)': 0.001,
 'P(Business_and_Industrial)': 0.0,
 'P(Reference)': 0.0,
 'P(Shopping)': 0.046,
 'P(Travel_and_Transportation)': 0.003,
 'P(Food_and_Drink)': 0.002,
 'P(Law_and_Government)': 0.041,
 'P(Books_and_Literature)': 0.012,
 'P(Finance)': 0.011,
 'P(Games)': 0.002,
 'P(Home_and_Garden)': 0.062,
 'P(Jobs_and_Education)': 0.003,
 'P(Arts_and_Entertainment)': 0.029,
 'P(Sensitive Subjects)': 0.0,
 'P(Real Estate)': 0.146,
 'P(Internet_and_Telecom)': 0.0,
 'P(Sports)': 0.007}

In [10]:
inputs = tokenizer("best cat ear headphones", return_tensors="pt")
with torch.no_grad():
 logits = model(**inputs).logits

In [14]:
l= logits.numpy()[0]
l

array([-1.353771 , -5.8301578, -4.050355 , -1.9018538, -5.129807 ,
 -5.2707334, -2.696651 , 8.821061 , -2.0982835, -4.4173856,
 -9.076361 , -5.888918 , -3.7155762, -1.0305756, -5.5817475,
 -3.987473 , -2.4096951, -5.1136127, -3.217719 , -2.938894 ,
 -3.7113686, -5.8976064, -4.788314 , -6.4181705, -3.5685277,
 -4.5266075, -4.3206973], dtype=float32)

In [18]:
# logit2prob <- function(logit){
# odds <- exp(logit)
# prob <- odds / (1 + odds)
# return(prob)
# }
def logit2prob(logit):
 odds =np.exp(logit)
 prob = odds / (1 + odds)
 return np.round(prob, 2)

In [17]:
for i in l:
 print(round(logit2prob(i), 2))

0.21
0.0
0.02
0.13
0.01
0.01
0.06
1.0
0.11
0.01
0.0
0.0
0.02
0.26
0.0
0.02
0.08
0.01
0.04
0.05
0.02
0.0
0.01
0.0
0.03
0.01
0.01


In [19]:
logit2prob(l)

array([0.21, 0. , 0.02, 0.13, 0.01, 0.01, 0.06, 1. , 0.11, 0.01, 0. ,
 0. , 0.02, 0.26, 0. , 0.02, 0.08, 0.01, 0.04, 0.05, 0.02, 0. ,
 0.01, 0. , 0.03, 0.01, 0.01], dtype=float32)