xu-song commited on
Commit
da93e39
1 Parent(s): 2d550af
Files changed (2) hide show
  1. vocab/moss/README.md +6 -0
  2. vocab/moss/__init__.py +4 -4
vocab/moss/README.md CHANGED
@@ -25,3 +25,9 @@ moss的
25
  text = bytearray([self.byte_decoder[c] for c in text]).decode("utf-8", errors=self.errors)
26
  return text
27
  ```
 
 
 
 
 
 
 
25
  text = bytearray([self.byte_decoder[c] for c in text]).decode("utf-8", errors=self.errors)
26
  return text
27
  ```
28
+
29
+
30
+ ## troubleshooting
31
+
32
+
33
+
vocab/moss/__init__.py CHANGED
@@ -2,11 +2,11 @@
2
  import os
3
  from transformers import AutoTokenizer, BloomTokenizerFast
4
 
5
- # CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
6
- # TOKENIZER_DIR = os.path.join(CURRENT_DIR, "moss-moon-003-sft")
7
- # tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_DIR, trust_remote_code=True)
8
 
9
- tokenizer = AutoTokenizer.from_pretrained("fnlp/moss-moon-003-sft", trust_remote_code=True)
10
 
11
  # vocab_size = len(tokenizer.get_vocab())
12
  # vocab_size = tokenizer.vocab_size
 
2
  import os
3
  from transformers import AutoTokenizer, BloomTokenizerFast
4
 
5
+ CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
6
+ TOKENIZER_DIR = os.path.join(CURRENT_DIR, "moss-moon-003-sft")
7
+ tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_DIR, trust_remote_code=True)
8
 
9
+ # tokenizer = AutoTokenizer.from_pretrained("fnlp/moss-moon-003-sft", trust_remote_code=True)
10
 
11
  # vocab_size = len(tokenizer.get_vocab())
12
  # vocab_size = tokenizer.vocab_size