from transformers import BertTokenizer | |
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") | |
text = """ | |
The proletariat is the social class of wage-earners who are | |
those members of a society whose only possession of significant | |
economic value is their labour power | |
""".strip() | |
tokens = tokenizer.tokenize(text) | |
print(tokens) |