torchnet / tests /tokenize_test.py
milselarch's picture
push to main
df07554
raw
history blame contribute delete
338 Bytes
from transformers import BertTokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
text = """
The proletariat is the social class of wage-earners who are
those members of a society whose only possession of significant
economic value is their labour power
""".strip()
tokens = tokenizer.tokenize(text)
print(tokens)