Spaces:
Running
Running
from transformers import T5TokenizerFast | |
# Initialize the tokenizer | |
tokenizer = T5TokenizerFast.from_pretrained("google/t5-v1_1-xxl", legacy=False) | |
# Your specific token IDs | |
token_ids = [3, 23, 31, 51, 3, 12775, 3768, 5, 1] | |
# Decode the full sequence | |
full_text = tokenizer.decode(token_ids, skip_special_tokens=True) | |
print("\nFull decoded text:", full_text) | |
# Decode each token individually and print its text value | |
for token_id in token_ids: | |
# Decode each token without skipping special tokens | |
token_text = tokenizer.decode([token_id], skip_special_tokens=False) | |
print(f"Decoded token {token_id}: {token_text}") | |
# Convert token ID 3 to its token string | |
token_3_name = tokenizer.convert_ids_to_tokens(3) | |
print(f"Token ID 3 corresponds to: {token_3_name}") |