File size: 772 Bytes
ed8e391
5b879f4
ed8e391
 
5b879f4
ed8e391
 
5b879f4
ed8e391
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
from transformers import T5TokenizerFast

# Initialize the tokenizer
tokenizer = T5TokenizerFast.from_pretrained("google/t5-v1_1-xxl", legacy=False)

# Your specific token IDs
token_ids = [3, 23, 31, 51, 3, 12775, 3768, 5, 1]

# Decode the full sequence
full_text = tokenizer.decode(token_ids, skip_special_tokens=True)
print("\nFull decoded text:", full_text)

# Decode each token individually and print its text value
for token_id in token_ids:
    # Decode each token without skipping special tokens
    token_text = tokenizer.decode([token_id], skip_special_tokens=False)
    print(f"Decoded token {token_id}: {token_text}")

# Convert token ID 3 to its token string
token_3_name = tokenizer.convert_ids_to_tokens(3)
print(f"Token ID 3 corresponds to: {token_3_name}")