fix tokenizer loading to decode digits (#3)
Browse files- fix tokenizer loading to decode digits (ca2257f10f9916abb571823991a97dbfffcd35b6)
Co-authored-by: Makoto Shing <[email protected]>
README.md
CHANGED
@@ -42,7 +42,7 @@ Then start generating text with `japanese-stablelm-base-alpha-7b` by using the f
|
|
42 |
import torch
|
43 |
from transformers import LlamaTokenizer, AutoModelForCausalLM
|
44 |
|
45 |
-
tokenizer = LlamaTokenizer.from_pretrained("novelai/nerdstash-tokenizer-v1")
|
46 |
|
47 |
model = AutoModelForCausalLM.from_pretrained(
|
48 |
"stabilityai/japanese-stablelm-base-alpha-7b",
|
@@ -76,7 +76,7 @@ tokens = model.generate(
|
|
76 |
do_sample=True,
|
77 |
)
|
78 |
|
79 |
-
out = tokenizer.decode(tokens[0], skip_special_tokens=
|
80 |
print(out)
|
81 |
"""
|
82 |
AI ใง็งๅญฆ็ ็ฉถใๅ ้ใใใซใฏใใใผใฟ้งๅๅๆๅใๅฟ
่ฆใงใใใใจใๆใใใซใชใฃใฆใใฆใใพใใ็ ็ฉถใฎใใใใๅด้ขใงใใใผใฟใใใ้่ฆใซใชใฃใฆใใใฎใงใใ
|
|
|
42 |
import torch
|
43 |
from transformers import LlamaTokenizer, AutoModelForCausalLM
|
44 |
|
45 |
+
tokenizer = LlamaTokenizer.from_pretrained("novelai/nerdstash-tokenizer-v1", additional_special_tokens=['โโ'])
|
46 |
|
47 |
model = AutoModelForCausalLM.from_pretrained(
|
48 |
"stabilityai/japanese-stablelm-base-alpha-7b",
|
|
|
76 |
do_sample=True,
|
77 |
)
|
78 |
|
79 |
+
out = tokenizer.decode(tokens[0], skip_special_tokens=True)
|
80 |
print(out)
|
81 |
"""
|
82 |
AI ใง็งๅญฆ็ ็ฉถใๅ ้ใใใซใฏใใใผใฟ้งๅๅๆๅใๅฟ
่ฆใงใใใใจใๆใใใซใชใฃใฆใใฆใใพใใ็ ็ฉถใฎใใใใๅด้ขใงใใใผใฟใใใ้่ฆใซใชใฃใฆใใใฎใงใใ
|