Spaces:
Running
Running
update
Browse files- vocab/moss/README.md +6 -0
- vocab/moss/__init__.py +4 -4
vocab/moss/README.md
CHANGED
@@ -25,3 +25,9 @@ moss的
|
|
25 |
text = bytearray([self.byte_decoder[c] for c in text]).decode("utf-8", errors=self.errors)
|
26 |
return text
|
27 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
text = bytearray([self.byte_decoder[c] for c in text]).decode("utf-8", errors=self.errors)
|
26 |
return text
|
27 |
```
|
28 |
+
|
29 |
+
|
30 |
+
## troubleshooting
|
31 |
+
|
32 |
+
|
33 |
+
|
vocab/moss/__init__.py
CHANGED
@@ -2,11 +2,11 @@
|
|
2 |
import os
|
3 |
from transformers import AutoTokenizer, BloomTokenizerFast
|
4 |
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
|
9 |
-
tokenizer = AutoTokenizer.from_pretrained("fnlp/moss-moon-003-sft", trust_remote_code=True)
|
10 |
|
11 |
# vocab_size = len(tokenizer.get_vocab())
|
12 |
# vocab_size = tokenizer.vocab_size
|
|
|
2 |
import os
|
3 |
from transformers import AutoTokenizer, BloomTokenizerFast
|
4 |
|
5 |
+
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
|
6 |
+
TOKENIZER_DIR = os.path.join(CURRENT_DIR, "moss-moon-003-sft")
|
7 |
+
tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_DIR, trust_remote_code=True)
|
8 |
|
9 |
+
# tokenizer = AutoTokenizer.from_pretrained("fnlp/moss-moon-003-sft", trust_remote_code=True)
|
10 |
|
11 |
# vocab_size = len(tokenizer.get_vocab())
|
12 |
# vocab_size = tokenizer.vocab_size
|