# Using the pipeline function

In [4]:
from transformers import pipeline

classifier = pipeline(task="sentiment-analysis")

inputs = ["This was so bad I couldnĀ“t finish it. The actresses are so bad at acting it feels like a bad comedy from minute one. The high rated reviews is obviously from friend/family and is pure BS.",
 "I thought the cast was great. Brianna and Emma were exceptionaly talented in thier characters. Fun film."]

outputs = classifier(inputs)

No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


In [2]:
outputs

[{'label': 'NEGATIVE', 'score': 0.9995231628417969},
 {'label': 'POSITIVE', 'score': 0.9998352527618408}]

# Defining tokenizer and model manually

## Tokenizer

In [3]:
from transformers import AutoTokenizer

checkpoint = "distilbert/distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)



In [23]:
from pprint import pprint
tokenized_inputs = tokenizer(
 inputs, padding=True, truncation=True, return_tensors="pt")

In [24]:
print(tokenized_inputs["input_ids"][0], tokenized_inputs["attention_mask"][0], sep = "\n")

tensor([ 101, 2023, 2001, 2061, 2919, 1045, 2481, 29658, 2102, 3926,
 2009, 1012, 1996, 19910, 2024, 2061, 2919, 2012, 3772, 2009,
 5683, 2066, 1037, 2919, 4038, 2013, 3371, 2028, 1012, 1996,
 2152, 6758, 4391, 2003, 5525, 2013, 2767, 1013, 2155, 1998,
 2003, 5760, 18667, 1012, 102])
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])


In [25]:
print(tokenized_inputs["input_ids"][1], tokenized_inputs["attention_mask"][1], sep = "\n")

tensor([ 101, 1045, 2245, 1996, 3459, 2001, 2307, 1012, 25558, 1998,
 5616, 2020, 11813, 2100, 10904, 1999, 16215, 3771, 3494, 1012,
 4569, 2143, 1012, 102, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0])
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])


In [26]:
len(tokenized_inputs["input_ids"][0]), len(tokenized_inputs["input_ids"][1])

(45, 45)

## Model

In [56]:
from transformers import AutoModelForSequenceClassification
import torch
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
model.eval();



In [57]:
with torch.no_grad():
 outputs = model(**tokenized_inputs)

In [58]:
print(dir(outputs))

['__annotations__', '__class__', '__class_getitem__', '__contains__', '__dataclass_fields__', '__dataclass_params__', '__delattr__', '__delitem__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__ior__', '__iter__', '__le__', '__len__', '__lt__', '__match_args__', '__module__', '__ne__', '__new__', '__or__', '__post_init__', '__reduce__', '__reduce_ex__', '__repr__', '__reversed__', '__ror__', '__setattr__', '__setitem__', '__sizeof__', '__str__', '__subclasshook__', 'attentions', 'clear', 'copy', 'fromkeys', 'get', 'hidden_states', 'items', 'keys', 'logits', 'loss', 'move_to_end', 'pop', 'popitem', 'setdefault', 'to_tuple', 'update', 'values']


In [59]:
outputs.logits

tensor([[ 4.2415, -3.4063],
 [-4.1783, 4.5328]])

In [60]:
import torch.nn.functional as F
F.softmax(outputs.logits, dim = -1)

tensor([[9.9952e-01, 4.7686e-04],
 [1.6471e-04, 9.9984e-01]])

In [66]:
predictions = outputs.logits.argmax(dim = -1)
pred_probas = F.softmax(outputs.logits, dim = -1).max(dim = -1).values

preds = []
for p, pp in zip(predictions, pred_probas):
 preds.append({'label': model.config.id2label[p.item()], 'score': pp.item()})

In [67]:
preds

[{'label': 'NEGATIVE', 'score': 0.9995231628417969},
 {'label': 'POSITIVE', 'score': 0.9998352527618408}]

```

Reference Output

---

[{'label': 'NEGATIVE', 'score': 0.9995231628417969},
 {'label': 'POSITIVE', 'score': 0.9998352527618408}]
````