Update README.md
Browse files
README.md
CHANGED
@@ -32,30 +32,33 @@ Please see the [official repository](https://github.com/GU-DataLab/stance-detect
|
|
32 |
from transformers import BertTokenizer, BertForMaskedLM, pipeline
|
33 |
import torch
|
34 |
|
35 |
-
#
|
36 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
37 |
|
38 |
-
#
|
39 |
pretrained_LM_path = "kornosk/bert-political-election2020-twitter-mlm"
|
40 |
|
41 |
-
#
|
42 |
tokenizer = BertTokenizer.from_pretrained(pretrained_LM_path)
|
43 |
model = BertForMaskedLM.from_pretrained(pretrained_LM_path)
|
44 |
|
45 |
-
#
|
46 |
example = "Trump is the [MASK] of USA"
|
47 |
fill_mask = pipeline('fill-mask', model=model, tokenizer=tokenizer)
|
|
|
|
|
|
|
48 |
|
49 |
outputs = fill_mask(example)
|
50 |
print(outputs)
|
51 |
|
52 |
-
#
|
53 |
inputs = tokenizer(example, return_tensors="pt")
|
54 |
outputs = model(**inputs)
|
55 |
print(outputs)
|
56 |
|
57 |
# OR you can use this model to train on your downstream task!
|
58 |
-
#
|
59 |
```
|
60 |
|
61 |
# Reference
|
|
|
32 |
from transformers import BertTokenizer, BertForMaskedLM, pipeline
|
33 |
import torch
|
34 |
|
35 |
+
# Choose GPU if available
|
36 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
37 |
|
38 |
+
# Select mode path here
|
39 |
pretrained_LM_path = "kornosk/bert-political-election2020-twitter-mlm"
|
40 |
|
41 |
+
# Load model
|
42 |
tokenizer = BertTokenizer.from_pretrained(pretrained_LM_path)
|
43 |
model = BertForMaskedLM.from_pretrained(pretrained_LM_path)
|
44 |
|
45 |
+
# Fill mask
|
46 |
example = "Trump is the [MASK] of USA"
|
47 |
fill_mask = pipeline('fill-mask', model=model, tokenizer=tokenizer)
|
48 |
+
# Use following line instead of the above one does not work.
|
49 |
+
# Huggingface have been updated, newer version accepts a string of model name instead.
|
50 |
+
fill_mask = pipeline('fill-mask', model=pretrained_LM_path, tokenizer=tokenizer)
|
51 |
|
52 |
outputs = fill_mask(example)
|
53 |
print(outputs)
|
54 |
|
55 |
+
# See embeddings
|
56 |
inputs = tokenizer(example, return_tensors="pt")
|
57 |
outputs = model(**inputs)
|
58 |
print(outputs)
|
59 |
|
60 |
# OR you can use this model to train on your downstream task!
|
61 |
+
# Please consider citing our paper if you feel this is useful :)
|
62 |
```
|
63 |
|
64 |
# Reference
|