gauneg
/

roberta-base-absa-ate-sentiment

Token Classification

Inference Endpoints

Model card Files Files and versions Community

gauneg commited on 4 days ago

Commit

8122c96

•

1 Parent(s): 06626f3

Update README.md

Files changed (1) hide show

README.md +57 -7

README.md CHANGED Viewed

@@ -20,24 +20,74 @@ This model has been trained on the following datasets:
 # Use
-* Importing the libraries and loading the models and the pipeline
 ```python
 from transformers import AutoTokenizer, AutoModelForTokenClassification
-from transformers import pipeline
 model_id = "gauneg/roberta-base-absa-ate-sentiment"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForTokenClassification.from_pretrained(model_id)
-ate_sent_pipeline = pipeline(task='ner',
-                  aggregation_strategy='simple',
-                  tokenizer=tokenizer,
-                  model=model)
 ```
-* Using the pipeline object:
 ```python
 text_input = "Been here a few times and food has always been good but service really suffers when it gets crowded."
 ate_sent_pipeline(text_input)
 ```

 # Use
+* Making token level inferences with Auto classes
 ```python
 from transformers import AutoTokenizer, AutoModelForTokenClassification
 model_id = "gauneg/roberta-base-absa-ate-sentiment"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForTokenClassification.from_pretrained(model_id)
+# the sequence of labels used during training
+label = {"B-neu": 1, "I-neu": 2, "O": 0, "B-neg": 4, "B-con": 5, "I-pos": 6, "B-pos": 7, "I-con": 8, "I-neg": 9, "X": -100}
+id2lab = {idx: lab for lab, idx in labels.items()}
+lab2id = {lab: idx for lab, idx in labels.items()}
+# making one prediction at a time (should be padded/batched and truncated for efficiency)
+text_input = "Been here a few times and food has always been good but service really suffers when it gets crowded."
+tok_inputs = tokenizer(text_input, return_tensors="pt")
+y_pred = model(**tok_inputs) # predicting the logits
+y_pred_fin = y_pred.logits.argmax(dim=-1)[0] # selecting the most favoured labels for each token from the logits
+decoded_pred = [id2lab[logx.item()] for logx in y_pred_fin]
+## displaying the input tokens with predictions and skipping <s> and </s> tokens at the beginning and the end respectively
+tok_levl_pred = list(zip(tokenizer.convert_ids_to_tokens(tok_inputs['input_ids'][0]), decoded_pred))[1:-1]
 ```
+* results in `tok_level_pred` variable
+```bash
+[('Be', 'O'),
+ ('en', 'O'),
+ ('Ġhere', 'O'),
+ ('Ġa', 'O'),
+ ('Ġfew', 'O'),
+ ('Ġtimes', 'O'),
+ ('Ġand', 'O'),
+ ('Ġfood', 'B-pos'),
+ ('Ġhas', 'O'),
+ ('Ġalways', 'O'),
+ ('Ġbeen', 'O'),
+ ('Ġgood', 'O'),
+ ('Ġbut', 'O'),
+ ('Ġservice', 'B-neg'),
+ ('Ġreally', 'O'),
+ ('Ġsuffers', 'O'),
+ ('Ġwhen', 'O'),
+ ('Ġit', 'O'),
+ ('Ġgets', 'O'),
+ ('Ġcrowded', 'O'),
+ ('.', 'O')]
+```
+# OR
+* Using the pipeline directly for end-to-end inference:
 ```python
+from transformers import pipeline
+ate_sent_pipeline = pipeline(task='ner',
+                  aggregation_strategy='simple',
+                  model="gauneg/roberta-base-absa-ate-sentiment")
 text_input = "Been here a few times and food has always been good but service really suffers when it gets crowded."
 ate_sent_pipeline(text_input)
 ```