Liang-Su commited on
Commit
18187e5
1 Parent(s): 5c4969a

Upload 74 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. user-baichuan2-13b-v2-3.6/README.md +23 -0
  2. user-baichuan2-13b-v2-3.6/adapter_config.json +24 -0
  3. user-baichuan2-13b-v2-3.6/adapter_model.safetensors +3 -0
  4. user-baichuan2-13b-v2-3.6/all_results.json +7 -0
  5. user-baichuan2-13b-v2-3.6/checkpoint-200/README.md +53 -0
  6. user-baichuan2-13b-v2-3.6/checkpoint-200/adapter_config.json +24 -0
  7. user-baichuan2-13b-v2-3.6/checkpoint-200/adapter_model.safetensors +3 -0
  8. user-baichuan2-13b-v2-3.6/checkpoint-200/optimizer.pt +3 -0
  9. user-baichuan2-13b-v2-3.6/checkpoint-200/rng_state.pth +3 -0
  10. user-baichuan2-13b-v2-3.6/checkpoint-200/scheduler.pt +3 -0
  11. user-baichuan2-13b-v2-3.6/checkpoint-200/special_tokens_map.json +30 -0
  12. user-baichuan2-13b-v2-3.6/checkpoint-200/tokenization_baichuan.py +258 -0
  13. user-baichuan2-13b-v2-3.6/checkpoint-200/tokenizer.model +3 -0
  14. user-baichuan2-13b-v2-3.6/checkpoint-200/tokenizer_config.json +44 -0
  15. user-baichuan2-13b-v2-3.6/checkpoint-200/trainer_state.json +161 -0
  16. user-baichuan2-13b-v2-3.6/checkpoint-200/training_args.bin +3 -0
  17. user-baichuan2-13b-v2-3.6/checkpoint-300/README.md +23 -0
  18. user-baichuan2-13b-v2-3.6/checkpoint-300/adapter_config.json +24 -0
  19. user-baichuan2-13b-v2-3.6/checkpoint-300/adapter_model.safetensors +3 -0
  20. user-baichuan2-13b-v2-3.6/checkpoint-300/optimizer.pt +3 -0
  21. user-baichuan2-13b-v2-3.6/checkpoint-300/rng_state.pth +3 -0
  22. user-baichuan2-13b-v2-3.6/checkpoint-300/scheduler.pt +3 -0
  23. user-baichuan2-13b-v2-3.6/checkpoint-300/special_tokens_map.json +30 -0
  24. user-baichuan2-13b-v2-3.6/checkpoint-300/tokenization_baichuan.py +258 -0
  25. user-baichuan2-13b-v2-3.6/checkpoint-300/tokenizer.model +3 -0
  26. user-baichuan2-13b-v2-3.6/checkpoint-300/tokenizer_config.json +44 -0
  27. user-baichuan2-13b-v2-3.6/checkpoint-300/trainer_state.json +231 -0
  28. user-baichuan2-13b-v2-3.6/checkpoint-300/training_args.bin +3 -0
  29. user-baichuan2-13b-v2-3.6/checkpoint-400/README.md +23 -0
  30. user-baichuan2-13b-v2-3.6/checkpoint-400/adapter_config.json +24 -0
  31. user-baichuan2-13b-v2-3.6/checkpoint-400/adapter_model.safetensors +3 -0
  32. user-baichuan2-13b-v2-3.6/checkpoint-400/optimizer.pt +3 -0
  33. user-baichuan2-13b-v2-3.6/checkpoint-400/rng_state.pth +3 -0
  34. user-baichuan2-13b-v2-3.6/checkpoint-400/scheduler.pt +3 -0
  35. user-baichuan2-13b-v2-3.6/checkpoint-400/special_tokens_map.json +30 -0
  36. user-baichuan2-13b-v2-3.6/checkpoint-400/tokenization_baichuan.py +258 -0
  37. user-baichuan2-13b-v2-3.6/checkpoint-400/tokenizer.model +3 -0
  38. user-baichuan2-13b-v2-3.6/checkpoint-400/tokenizer_config.json +44 -0
  39. user-baichuan2-13b-v2-3.6/checkpoint-400/trainer_state.json +301 -0
  40. user-baichuan2-13b-v2-3.6/checkpoint-400/training_args.bin +3 -0
  41. user-baichuan2-13b-v2-3.6/runs/Mar06_16-02-53_u/events.out.tfevents.1709741241.u.349083.0 +3 -0
  42. user-baichuan2-13b-v2-3.6/runs/Mar06_16-15-19_u/events.out.tfevents.1709741991.u.349593.0 +3 -0
  43. user-baichuan2-13b-v2-3.6/runs/Mar06_16-27-57_u/events.out.tfevents.1709742755.u.350734.0 +3 -0
  44. user-baichuan2-13b-v2-3.6/runs/Mar06_16-37-25_u/events.out.tfevents.1709743386.u.351776.0 +3 -0
  45. user-baichuan2-13b-v2-3.6/runs/Mar06_16-46-23_u/events.out.tfevents.1709743925.u.352180.0 +3 -0
  46. user-baichuan2-13b-v2-3.6/runs/Mar06_16-55-14_u/events.out.tfevents.1709744402.u.352650.0 +3 -0
  47. user-baichuan2-13b-v2-3.6/runs/Mar06_17-03-22_u/events.out.tfevents.1709744890.u.353116.0 +3 -0
  48. user-baichuan2-13b-v2-3.6/runs/Mar06_17-13-29_u/events.out.tfevents.1709745516.u.353684.0 +3 -0
  49. user-baichuan2-13b-v2-3.6/runs/Mar06_17-30-51_u/events.out.tfevents.1709746552.u.354572.0 +3 -0
  50. user-baichuan2-13b-v2-3.6/runs/Mar06_17-42-56_u/events.out.tfevents.1709747302.u.355650.0 +3 -0
user-baichuan2-13b-v2-3.6/README.md ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+
7
+ The following `bitsandbytes` quantization config was used during training:
8
+ - quant_method: bitsandbytes
9
+ - _load_in_8bit: False
10
+ - _load_in_4bit: True
11
+ - llm_int8_threshold: 6.0
12
+ - llm_int8_skip_modules: None
13
+ - llm_int8_enable_fp32_cpu_offload: False
14
+ - llm_int8_has_fp16_weight: False
15
+ - bnb_4bit_quant_type: nf4
16
+ - bnb_4bit_use_double_quant: True
17
+ - bnb_4bit_compute_dtype: float16
18
+ - load_in_4bit: True
19
+ - load_in_8bit: False
20
+ ### Framework versions
21
+
22
+
23
+ - PEFT 0.4.0
user-baichuan2-13b-v2-3.6/adapter_config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "/home/jiakangxiang/.cache/modelscope/hub/baichuan-inc/Baichuan2-13B-Chat",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 16,
11
+ "lora_dropout": 0.05,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 16,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "o_proj",
18
+ "W_pack",
19
+ "down_proj",
20
+ "up_proj",
21
+ "gate_proj"
22
+ ],
23
+ "task_type": "CAUSAL_LM"
24
+ }
user-baichuan2-13b-v2-3.6/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fa2928717257a823e5ece47fa40497bfd62df2de1dad1d22b7189be1eaae1fc
3
+ size 223203704
user-baichuan2-13b-v2-3.6/all_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.5017403132133569,
4
+ "train_runtime": 75900.5046,
5
+ "train_samples_per_second": 0.102,
6
+ "train_steps_per_second": 0.006
7
+ }
user-baichuan2-13b-v2-3.6/checkpoint-200/README.md ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+
7
+ The following `bitsandbytes` quantization config was used during training:
8
+ - quant_method: bitsandbytes
9
+ - _load_in_8bit: False
10
+ - _load_in_4bit: True
11
+ - llm_int8_threshold: 6.0
12
+ - llm_int8_skip_modules: None
13
+ - llm_int8_enable_fp32_cpu_offload: False
14
+ - llm_int8_has_fp16_weight: False
15
+ - bnb_4bit_quant_type: nf4
16
+ - bnb_4bit_use_double_quant: True
17
+ - bnb_4bit_compute_dtype: float16
18
+ - load_in_4bit: True
19
+ - load_in_8bit: False
20
+
21
+ The following `bitsandbytes` quantization config was used during training:
22
+ - quant_method: bitsandbytes
23
+ - _load_in_8bit: False
24
+ - _load_in_4bit: True
25
+ - llm_int8_threshold: 6.0
26
+ - llm_int8_skip_modules: None
27
+ - llm_int8_enable_fp32_cpu_offload: False
28
+ - llm_int8_has_fp16_weight: False
29
+ - bnb_4bit_quant_type: nf4
30
+ - bnb_4bit_use_double_quant: True
31
+ - bnb_4bit_compute_dtype: float16
32
+ - load_in_4bit: True
33
+ - load_in_8bit: False
34
+
35
+ The following `bitsandbytes` quantization config was used during training:
36
+ - quant_method: bitsandbytes
37
+ - _load_in_8bit: False
38
+ - _load_in_4bit: True
39
+ - llm_int8_threshold: 6.0
40
+ - llm_int8_skip_modules: None
41
+ - llm_int8_enable_fp32_cpu_offload: False
42
+ - llm_int8_has_fp16_weight: False
43
+ - bnb_4bit_quant_type: nf4
44
+ - bnb_4bit_use_double_quant: True
45
+ - bnb_4bit_compute_dtype: float16
46
+ - load_in_4bit: True
47
+ - load_in_8bit: False
48
+ ### Framework versions
49
+
50
+ - PEFT 0.4.0
51
+ - PEFT 0.4.0
52
+
53
+ - PEFT 0.4.0
user-baichuan2-13b-v2-3.6/checkpoint-200/adapter_config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "/home/jiakangxiang/.cache/modelscope/hub/baichuan-inc/Baichuan2-13B-Chat",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 16,
11
+ "lora_dropout": 0.05,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 16,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "o_proj",
18
+ "W_pack",
19
+ "down_proj",
20
+ "up_proj",
21
+ "gate_proj"
22
+ ],
23
+ "task_type": "CAUSAL_LM"
24
+ }
user-baichuan2-13b-v2-3.6/checkpoint-200/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38a30f766c18c946f733b21a20bbed87b6c9fc7fcd352632d9f10275b9bcafec
3
+ size 223203704
user-baichuan2-13b-v2-3.6/checkpoint-200/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:244f09da90f8637279a30da6c1ab06000f824a132e5721b107eae147dab5ec76
3
+ size 446541509
user-baichuan2-13b-v2-3.6/checkpoint-200/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82057afe356e5d07dcf306e6c4328a22809f76704092cbfcfc589f5f6ca4ecfb
3
+ size 14575
user-baichuan2-13b-v2-3.6/checkpoint-200/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7d91431adcc602a99f3b7dfb114d8f98fcd9283452a4e00f4d0e20d836e409d
3
+ size 627
user-baichuan2-13b-v2-3.6/checkpoint-200/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": true
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": true
15
+ },
16
+ "pad_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": true
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": true
29
+ }
30
+ }
user-baichuan2-13b-v2-3.6/checkpoint-200/tokenization_baichuan.py ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023, Baichuan Intelligent Technology. All rights reserved.
2
+
3
+ import os
4
+ from shutil import copyfile
5
+ from typing import Any, Dict, List, Optional, Tuple
6
+
7
+ import sentencepiece as spm
8
+ from transformers.tokenization_utils import AddedToken, PreTrainedTokenizer
9
+ from transformers.utils import logging
10
+
11
+
12
+ logger = logging.get_logger(__name__)
13
+
14
+ VOCAB_FILES_NAMES = {"vocab_file": "tokenizer.model"}
15
+
16
+ PRETRAINED_VOCAB_FILES_MAP = {
17
+ "vocab_file": {},
18
+ "tokenizer_file": {},
19
+ }
20
+ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {}
21
+
22
+
23
+ class BaichuanTokenizer(PreTrainedTokenizer):
24
+ """
25
+ Construct a Baichuan tokenizer. Based on byte-level Byte-Pair-Encoding.
26
+
27
+ Args:
28
+ vocab_file (`str`):
29
+ Path to the vocabulary file.
30
+ """
31
+
32
+ vocab_files_names = VOCAB_FILES_NAMES
33
+ pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
34
+ max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
35
+ model_input_names = ["input_ids", "attention_mask"]
36
+
37
+ def __init__(
38
+ self,
39
+ vocab_file,
40
+ unk_token="<unk>",
41
+ bos_token="<s>",
42
+ eos_token="</s>",
43
+ pad_token=None,
44
+ sp_model_kwargs: Optional[Dict[str, Any]] = None,
45
+ add_bos_token=True,
46
+ add_eos_token=False,
47
+ clean_up_tokenization_spaces=False,
48
+ **kwargs,
49
+ ):
50
+ self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
51
+ bos_token = (
52
+ AddedToken(bos_token, lstrip=False, rstrip=False)
53
+ if isinstance(bos_token, str)
54
+ else bos_token
55
+ )
56
+ eos_token = (
57
+ AddedToken(eos_token, lstrip=False, rstrip=False)
58
+ if isinstance(eos_token, str)
59
+ else eos_token
60
+ )
61
+ unk_token = (
62
+ AddedToken(unk_token, lstrip=False, rstrip=False)
63
+ if isinstance(unk_token, str)
64
+ else unk_token
65
+ )
66
+ pad_token = (
67
+ AddedToken(pad_token, lstrip=False, rstrip=False)
68
+ if isinstance(pad_token, str)
69
+ else pad_token
70
+ )
71
+ self.vocab_file = vocab_file
72
+ self.add_bos_token = add_bos_token
73
+ self.add_eos_token = add_eos_token
74
+ self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
75
+ self.sp_model.Load(vocab_file)
76
+ super().__init__(
77
+ bos_token=bos_token,
78
+ eos_token=eos_token,
79
+ unk_token=unk_token,
80
+ pad_token=pad_token,
81
+ add_bos_token=add_bos_token,
82
+ add_eos_token=add_eos_token,
83
+ sp_model_kwargs=self.sp_model_kwargs,
84
+ clean_up_tokenization_spaces=clean_up_tokenization_spaces,
85
+ **kwargs,
86
+ )
87
+
88
+ def __getstate__(self):
89
+ state = self.__dict__.copy()
90
+ state["sp_model"] = None
91
+ return state
92
+
93
+ def __setstate__(self, d):
94
+ self.__dict__ = d
95
+ self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
96
+ self.sp_model.Load(self.vocab_file)
97
+
98
+ @property
99
+ def vocab_size(self):
100
+ """Returns vocab size"""
101
+ return self.sp_model.get_piece_size()
102
+
103
+ def get_vocab(self):
104
+ """Returns vocab as a dict"""
105
+ vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)}
106
+ vocab.update(self.added_tokens_encoder)
107
+ return vocab
108
+
109
+ def _tokenize(self, text):
110
+ """Returns a tokenized string."""
111
+ return self.sp_model.encode(text, out_type=str)
112
+
113
+ def _convert_token_to_id(self, token):
114
+ """Converts a token (str) in an id using the vocab."""
115
+ return self.sp_model.piece_to_id(token)
116
+
117
+ def _convert_id_to_token(self, index):
118
+ """Converts an index (integer) in a token (str) using the vocab."""
119
+ token = self.sp_model.IdToPiece(index)
120
+ return token
121
+
122
+ def convert_tokens_to_string(self, tokens):
123
+ """Converts a sequence of tokens (string) in a single string."""
124
+ current_sub_tokens = []
125
+ out_string = ""
126
+ prev_is_special = False
127
+ for i, token in enumerate(tokens):
128
+ # make sure that special tokens are not decoded using sentencepiece model
129
+ if token in self.all_special_tokens:
130
+ if not prev_is_special and i != 0:
131
+ out_string += " "
132
+ out_string += self.sp_model.decode(current_sub_tokens) + token
133
+ prev_is_special = True
134
+ current_sub_tokens = []
135
+ else:
136
+ current_sub_tokens.append(token)
137
+ prev_is_special = False
138
+ out_string += self.sp_model.decode(current_sub_tokens)
139
+ return out_string
140
+
141
+ def save_vocabulary(
142
+ self, save_directory, filename_prefix: Optional[str] = None
143
+ ) -> Tuple[str]:
144
+ """
145
+ Save the vocabulary and special tokens file to a directory.
146
+
147
+ Args:
148
+ save_directory (`str`):
149
+ The directory in which to save the vocabulary.
150
+
151
+ Returns:
152
+ `Tuple(str)`: Paths to the files saved.
153
+ """
154
+ if not os.path.isdir(save_directory):
155
+ logger.error(f"Vocabulary path ({save_directory}) should be a directory")
156
+ return
157
+ out_vocab_file = os.path.join(
158
+ save_directory,
159
+ (filename_prefix + "-" if filename_prefix else "")
160
+ + VOCAB_FILES_NAMES["vocab_file"],
161
+ )
162
+
163
+ if os.path.abspath(self.vocab_file) != os.path.abspath(
164
+ out_vocab_file
165
+ ) and os.path.isfile(self.vocab_file):
166
+ copyfile(self.vocab_file, out_vocab_file)
167
+ elif not os.path.isfile(self.vocab_file):
168
+ with open(out_vocab_file, "wb") as fi:
169
+ content_spiece_model = self.sp_model.serialized_model_proto()
170
+ fi.write(content_spiece_model)
171
+
172
+ return (out_vocab_file,)
173
+
174
+ def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
175
+ bos_token_id = [self.bos_token_id] if self.add_bos_token else []
176
+ eos_token_id = [self.eos_token_id] if self.add_eos_token else []
177
+
178
+ output = bos_token_id + token_ids_0 + eos_token_id
179
+
180
+ if token_ids_1 is not None:
181
+ output = output + bos_token_id + token_ids_1 + eos_token_id
182
+
183
+ return output
184
+
185
+ def get_special_tokens_mask(
186
+ self,
187
+ token_ids_0: List[int],
188
+ token_ids_1: Optional[List[int]] = None,
189
+ already_has_special_tokens: bool = False,
190
+ ) -> List[int]:
191
+ """
192
+ Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
193
+ special tokens using the tokenizer `prepare_for_model` method.
194
+
195
+ Args:
196
+ token_ids_0 (`List[int]`):
197
+ List of IDs.
198
+ token_ids_1 (`List[int]`, *optional*):
199
+ Optional second list of IDs for sequence pairs.
200
+ already_has_special_tokens (`bool`, *optional*, defaults to `False`):
201
+ Whether or not the token list is already formatted with special tokens for the model.
202
+
203
+ Returns:
204
+ `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
205
+ """
206
+ if already_has_special_tokens:
207
+ return super().get_special_tokens_mask(
208
+ token_ids_0=token_ids_0,
209
+ token_ids_1=token_ids_1,
210
+ already_has_special_tokens=True,
211
+ )
212
+
213
+ bos_token_id = [1] if self.add_bos_token else []
214
+ eos_token_id = [1] if self.add_eos_token else []
215
+
216
+ if token_ids_1 is None:
217
+ return bos_token_id + ([0] * len(token_ids_0)) + eos_token_id
218
+ return (
219
+ bos_token_id
220
+ + ([0] * len(token_ids_0))
221
+ + eos_token_id
222
+ + bos_token_id
223
+ + ([0] * len(token_ids_1))
224
+ + eos_token_id
225
+ )
226
+
227
+ def create_token_type_ids_from_sequences(
228
+ self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
229
+ ) -> List[int]:
230
+ """
231
+ Creates a mask from the two sequences passed to be used in a sequence-pair classification task. An ALBERT
232
+ sequence pair mask has the following format:
233
+
234
+ ```
235
+ 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
236
+ | first sequence | second sequence |
237
+ ```
238
+
239
+ if token_ids_1 is None, only returns the first portion of the mask (0s).
240
+
241
+ Args:
242
+ token_ids_0 (`List[int]`):
243
+ List of ids.
244
+ token_ids_1 (`List[int]`, *optional*):
245
+ Optional second list of IDs for sequence pairs.
246
+
247
+ Returns:
248
+ `List[int]`: List of [token type IDs](../glossary#token-type-ids) according to the given sequence(s).
249
+ """
250
+ bos_token_id = [self.bos_token_id] if self.add_bos_token else []
251
+ eos_token_id = [self.eos_token_id] if self.add_eos_token else []
252
+
253
+ output = [0] * len(bos_token_id + token_ids_0 + eos_token_id)
254
+
255
+ if token_ids_1 is not None:
256
+ output += [1] * len(bos_token_id + token_ids_1 + eos_token_id)
257
+
258
+ return output
user-baichuan2-13b-v2-3.6/checkpoint-200/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79452955be6b419a65984273a9f08af86042e1c2a75ee3ba989cbf620a133cc2
3
+ size 2001107
user-baichuan2-13b-v2-3.6/checkpoint-200/tokenizer_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": true,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": true,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": true,
25
+ "rstrip": false,
26
+ "single_word": true,
27
+ "special": true
28
+ }
29
+ },
30
+ "auto_map": {
31
+ "AutoTokenizer": [
32
+ "tokenization_baichuan.BaichuanTokenizer",
33
+ null
34
+ ]
35
+ },
36
+ "bos_token": "<s>",
37
+ "clean_up_tokenization_spaces": false,
38
+ "eos_token": "</s>",
39
+ "model_max_length": 4096,
40
+ "pad_token": "<unk>",
41
+ "sp_model_kwargs": {},
42
+ "tokenizer_class": "BaichuanTokenizer",
43
+ "unk_token": "<unk>"
44
+ }
user-baichuan2-13b-v2-3.6/checkpoint-200/trainer_state.json ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.41450777202072536,
5
+ "eval_steps": 500,
6
+ "global_step": 200,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 4.99941873550415,
14
+ "learning_rate": 2e-05,
15
+ "loss": 9.9329,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 1.741065502166748,
21
+ "learning_rate": 4e-05,
22
+ "loss": 11.0746,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 1.4727320671081543,
28
+ "learning_rate": 6e-05,
29
+ "loss": 2.7159,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 0.1335960477590561,
35
+ "learning_rate": 8e-05,
36
+ "loss": 0.3969,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 0.0014472692273557186,
42
+ "learning_rate": 0.0001,
43
+ "loss": 0.0032,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 0.0010780546581372619,
49
+ "learning_rate": 0.0001,
50
+ "loss": 0.0002,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.15,
55
+ "grad_norm": 1.03132963180542,
56
+ "learning_rate": 0.0001,
57
+ "loss": 0.0002,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.17,
62
+ "grad_norm": 0.008827299810945988,
63
+ "learning_rate": 0.0001,
64
+ "loss": 0.0,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.19,
69
+ "grad_norm": 0.0002956670359708369,
70
+ "learning_rate": 0.0001,
71
+ "loss": 0.0001,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.21,
76
+ "grad_norm": 0.0003419867134653032,
77
+ "learning_rate": 0.0001,
78
+ "loss": 0.0,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.23,
83
+ "grad_norm": 0.0003681881644297391,
84
+ "learning_rate": 0.0001,
85
+ "loss": 0.0,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.25,
90
+ "grad_norm": 0.0002884200366679579,
91
+ "learning_rate": 0.0001,
92
+ "loss": 0.0,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.27,
97
+ "grad_norm": 0.00011985149467363954,
98
+ "learning_rate": 0.0001,
99
+ "loss": 0.0,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.29,
104
+ "grad_norm": 0.0003195986500941217,
105
+ "learning_rate": 0.0001,
106
+ "loss": 0.0,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.31,
111
+ "grad_norm": 0.00010149635636480525,
112
+ "learning_rate": 0.0001,
113
+ "loss": 0.0,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.33,
118
+ "grad_norm": 0.00010508792183827609,
119
+ "learning_rate": 0.0001,
120
+ "loss": 0.0,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 0.35,
125
+ "grad_norm": 0.00011793687008321285,
126
+ "learning_rate": 0.0001,
127
+ "loss": 0.006,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 0.37,
132
+ "grad_norm": 8.076676749624312e-05,
133
+ "learning_rate": 0.0001,
134
+ "loss": 0.0,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 0.39,
139
+ "grad_norm": 0.0007808339432813227,
140
+ "learning_rate": 0.0001,
141
+ "loss": 0.006,
142
+ "step": 190
143
+ },
144
+ {
145
+ "epoch": 0.41,
146
+ "grad_norm": 0.11711683869361877,
147
+ "learning_rate": 0.0001,
148
+ "loss": 0.003,
149
+ "step": 200
150
+ }
151
+ ],
152
+ "logging_steps": 10,
153
+ "max_steps": 482,
154
+ "num_input_tokens_seen": 0,
155
+ "num_train_epochs": 1,
156
+ "save_steps": 100,
157
+ "total_flos": 3.335841878562816e+17,
158
+ "train_batch_size": 1,
159
+ "trial_name": null,
160
+ "trial_params": null
161
+ }
user-baichuan2-13b-v2-3.6/checkpoint-200/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1abab34fc571ab2be46c8abdf765b96b9a09ab4144528e95270f1af465c0f19c
3
+ size 4475
user-baichuan2-13b-v2-3.6/checkpoint-300/README.md ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+
7
+ The following `bitsandbytes` quantization config was used during training:
8
+ - quant_method: bitsandbytes
9
+ - _load_in_8bit: False
10
+ - _load_in_4bit: True
11
+ - llm_int8_threshold: 6.0
12
+ - llm_int8_skip_modules: None
13
+ - llm_int8_enable_fp32_cpu_offload: False
14
+ - llm_int8_has_fp16_weight: False
15
+ - bnb_4bit_quant_type: nf4
16
+ - bnb_4bit_use_double_quant: True
17
+ - bnb_4bit_compute_dtype: float16
18
+ - load_in_4bit: True
19
+ - load_in_8bit: False
20
+ ### Framework versions
21
+
22
+
23
+ - PEFT 0.4.0
user-baichuan2-13b-v2-3.6/checkpoint-300/adapter_config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "/home/jiakangxiang/.cache/modelscope/hub/baichuan-inc/Baichuan2-13B-Chat",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 16,
11
+ "lora_dropout": 0.05,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 16,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "o_proj",
18
+ "W_pack",
19
+ "down_proj",
20
+ "up_proj",
21
+ "gate_proj"
22
+ ],
23
+ "task_type": "CAUSAL_LM"
24
+ }
user-baichuan2-13b-v2-3.6/checkpoint-300/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3abb43148c7b50ee5cbd1d6ca51b35ce5a55e74d837bf441cb247d3ea6a7c56a
3
+ size 223203704
user-baichuan2-13b-v2-3.6/checkpoint-300/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6626b117cfcf7dfb7c1d3cf79bd87cac30bfdbb5b6d6138c5282bda6d59f86cb
3
+ size 446541893
user-baichuan2-13b-v2-3.6/checkpoint-300/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:939982b34814a29d20630db8d8bb9ca8ffaaca659852333f9bf40ca5f715ffd0
3
+ size 14575
user-baichuan2-13b-v2-3.6/checkpoint-300/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47e1286a69d6cf9d5865b0808d9a438a85cffe270c5273c3518b2a5557084aa5
3
+ size 627
user-baichuan2-13b-v2-3.6/checkpoint-300/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": true
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": true
15
+ },
16
+ "pad_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": true
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": true
29
+ }
30
+ }
user-baichuan2-13b-v2-3.6/checkpoint-300/tokenization_baichuan.py ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023, Baichuan Intelligent Technology. All rights reserved.
2
+
3
+ import os
4
+ from shutil import copyfile
5
+ from typing import Any, Dict, List, Optional, Tuple
6
+
7
+ import sentencepiece as spm
8
+ from transformers.tokenization_utils import AddedToken, PreTrainedTokenizer
9
+ from transformers.utils import logging
10
+
11
+
12
+ logger = logging.get_logger(__name__)
13
+
14
+ VOCAB_FILES_NAMES = {"vocab_file": "tokenizer.model"}
15
+
16
+ PRETRAINED_VOCAB_FILES_MAP = {
17
+ "vocab_file": {},
18
+ "tokenizer_file": {},
19
+ }
20
+ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {}
21
+
22
+
23
+ class BaichuanTokenizer(PreTrainedTokenizer):
24
+ """
25
+ Construct a Baichuan tokenizer. Based on byte-level Byte-Pair-Encoding.
26
+
27
+ Args:
28
+ vocab_file (`str`):
29
+ Path to the vocabulary file.
30
+ """
31
+
32
+ vocab_files_names = VOCAB_FILES_NAMES
33
+ pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
34
+ max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
35
+ model_input_names = ["input_ids", "attention_mask"]
36
+
37
+ def __init__(
38
+ self,
39
+ vocab_file,
40
+ unk_token="<unk>",
41
+ bos_token="<s>",
42
+ eos_token="</s>",
43
+ pad_token=None,
44
+ sp_model_kwargs: Optional[Dict[str, Any]] = None,
45
+ add_bos_token=True,
46
+ add_eos_token=False,
47
+ clean_up_tokenization_spaces=False,
48
+ **kwargs,
49
+ ):
50
+ self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
51
+ bos_token = (
52
+ AddedToken(bos_token, lstrip=False, rstrip=False)
53
+ if isinstance(bos_token, str)
54
+ else bos_token
55
+ )
56
+ eos_token = (
57
+ AddedToken(eos_token, lstrip=False, rstrip=False)
58
+ if isinstance(eos_token, str)
59
+ else eos_token
60
+ )
61
+ unk_token = (
62
+ AddedToken(unk_token, lstrip=False, rstrip=False)
63
+ if isinstance(unk_token, str)
64
+ else unk_token
65
+ )
66
+ pad_token = (
67
+ AddedToken(pad_token, lstrip=False, rstrip=False)
68
+ if isinstance(pad_token, str)
69
+ else pad_token
70
+ )
71
+ self.vocab_file = vocab_file
72
+ self.add_bos_token = add_bos_token
73
+ self.add_eos_token = add_eos_token
74
+ self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
75
+ self.sp_model.Load(vocab_file)
76
+ super().__init__(
77
+ bos_token=bos_token,
78
+ eos_token=eos_token,
79
+ unk_token=unk_token,
80
+ pad_token=pad_token,
81
+ add_bos_token=add_bos_token,
82
+ add_eos_token=add_eos_token,
83
+ sp_model_kwargs=self.sp_model_kwargs,
84
+ clean_up_tokenization_spaces=clean_up_tokenization_spaces,
85
+ **kwargs,
86
+ )
87
+
88
+ def __getstate__(self):
89
+ state = self.__dict__.copy()
90
+ state["sp_model"] = None
91
+ return state
92
+
93
+ def __setstate__(self, d):
94
+ self.__dict__ = d
95
+ self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
96
+ self.sp_model.Load(self.vocab_file)
97
+
98
+ @property
99
+ def vocab_size(self):
100
+ """Returns vocab size"""
101
+ return self.sp_model.get_piece_size()
102
+
103
+ def get_vocab(self):
104
+ """Returns vocab as a dict"""
105
+ vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)}
106
+ vocab.update(self.added_tokens_encoder)
107
+ return vocab
108
+
109
+ def _tokenize(self, text):
110
+ """Returns a tokenized string."""
111
+ return self.sp_model.encode(text, out_type=str)
112
+
113
+ def _convert_token_to_id(self, token):
114
+ """Converts a token (str) in an id using the vocab."""
115
+ return self.sp_model.piece_to_id(token)
116
+
117
+ def _convert_id_to_token(self, index):
118
+ """Converts an index (integer) in a token (str) using the vocab."""
119
+ token = self.sp_model.IdToPiece(index)
120
+ return token
121
+
122
+ def convert_tokens_to_string(self, tokens):
123
+ """Converts a sequence of tokens (string) in a single string."""
124
+ current_sub_tokens = []
125
+ out_string = ""
126
+ prev_is_special = False
127
+ for i, token in enumerate(tokens):
128
+ # make sure that special tokens are not decoded using sentencepiece model
129
+ if token in self.all_special_tokens:
130
+ if not prev_is_special and i != 0:
131
+ out_string += " "
132
+ out_string += self.sp_model.decode(current_sub_tokens) + token
133
+ prev_is_special = True
134
+ current_sub_tokens = []
135
+ else:
136
+ current_sub_tokens.append(token)
137
+ prev_is_special = False
138
+ out_string += self.sp_model.decode(current_sub_tokens)
139
+ return out_string
140
+
141
+ def save_vocabulary(
142
+ self, save_directory, filename_prefix: Optional[str] = None
143
+ ) -> Tuple[str]:
144
+ """
145
+ Save the vocabulary and special tokens file to a directory.
146
+
147
+ Args:
148
+ save_directory (`str`):
149
+ The directory in which to save the vocabulary.
150
+
151
+ Returns:
152
+ `Tuple(str)`: Paths to the files saved.
153
+ """
154
+ if not os.path.isdir(save_directory):
155
+ logger.error(f"Vocabulary path ({save_directory}) should be a directory")
156
+ return
157
+ out_vocab_file = os.path.join(
158
+ save_directory,
159
+ (filename_prefix + "-" if filename_prefix else "")
160
+ + VOCAB_FILES_NAMES["vocab_file"],
161
+ )
162
+
163
+ if os.path.abspath(self.vocab_file) != os.path.abspath(
164
+ out_vocab_file
165
+ ) and os.path.isfile(self.vocab_file):
166
+ copyfile(self.vocab_file, out_vocab_file)
167
+ elif not os.path.isfile(self.vocab_file):
168
+ with open(out_vocab_file, "wb") as fi:
169
+ content_spiece_model = self.sp_model.serialized_model_proto()
170
+ fi.write(content_spiece_model)
171
+
172
+ return (out_vocab_file,)
173
+
174
+ def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
175
+ bos_token_id = [self.bos_token_id] if self.add_bos_token else []
176
+ eos_token_id = [self.eos_token_id] if self.add_eos_token else []
177
+
178
+ output = bos_token_id + token_ids_0 + eos_token_id
179
+
180
+ if token_ids_1 is not None:
181
+ output = output + bos_token_id + token_ids_1 + eos_token_id
182
+
183
+ return output
184
+
185
+ def get_special_tokens_mask(
186
+ self,
187
+ token_ids_0: List[int],
188
+ token_ids_1: Optional[List[int]] = None,
189
+ already_has_special_tokens: bool = False,
190
+ ) -> List[int]:
191
+ """
192
+ Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
193
+ special tokens using the tokenizer `prepare_for_model` method.
194
+
195
+ Args:
196
+ token_ids_0 (`List[int]`):
197
+ List of IDs.
198
+ token_ids_1 (`List[int]`, *optional*):
199
+ Optional second list of IDs for sequence pairs.
200
+ already_has_special_tokens (`bool`, *optional*, defaults to `False`):
201
+ Whether or not the token list is already formatted with special tokens for the model.
202
+
203
+ Returns:
204
+ `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
205
+ """
206
+ if already_has_special_tokens:
207
+ return super().get_special_tokens_mask(
208
+ token_ids_0=token_ids_0,
209
+ token_ids_1=token_ids_1,
210
+ already_has_special_tokens=True,
211
+ )
212
+
213
+ bos_token_id = [1] if self.add_bos_token else []
214
+ eos_token_id = [1] if self.add_eos_token else []
215
+
216
+ if token_ids_1 is None:
217
+ return bos_token_id + ([0] * len(token_ids_0)) + eos_token_id
218
+ return (
219
+ bos_token_id
220
+ + ([0] * len(token_ids_0))
221
+ + eos_token_id
222
+ + bos_token_id
223
+ + ([0] * len(token_ids_1))
224
+ + eos_token_id
225
+ )
226
+
227
+ def create_token_type_ids_from_sequences(
228
+ self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
229
+ ) -> List[int]:
230
+ """
231
+ Creates a mask from the two sequences passed to be used in a sequence-pair classification task. An ALBERT
232
+ sequence pair mask has the following format:
233
+
234
+ ```
235
+ 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
236
+ | first sequence | second sequence |
237
+ ```
238
+
239
+ if token_ids_1 is None, only returns the first portion of the mask (0s).
240
+
241
+ Args:
242
+ token_ids_0 (`List[int]`):
243
+ List of ids.
244
+ token_ids_1 (`List[int]`, *optional*):
245
+ Optional second list of IDs for sequence pairs.
246
+
247
+ Returns:
248
+ `List[int]`: List of [token type IDs](../glossary#token-type-ids) according to the given sequence(s).
249
+ """
250
+ bos_token_id = [self.bos_token_id] if self.add_bos_token else []
251
+ eos_token_id = [self.eos_token_id] if self.add_eos_token else []
252
+
253
+ output = [0] * len(bos_token_id + token_ids_0 + eos_token_id)
254
+
255
+ if token_ids_1 is not None:
256
+ output += [1] * len(bos_token_id + token_ids_1 + eos_token_id)
257
+
258
+ return output
user-baichuan2-13b-v2-3.6/checkpoint-300/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79452955be6b419a65984273a9f08af86042e1c2a75ee3ba989cbf620a133cc2
3
+ size 2001107
user-baichuan2-13b-v2-3.6/checkpoint-300/tokenizer_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": true,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": true,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": true,
25
+ "rstrip": false,
26
+ "single_word": true,
27
+ "special": true
28
+ }
29
+ },
30
+ "auto_map": {
31
+ "AutoTokenizer": [
32
+ "tokenization_baichuan.BaichuanTokenizer",
33
+ null
34
+ ]
35
+ },
36
+ "bos_token": "<s>",
37
+ "clean_up_tokenization_spaces": false,
38
+ "eos_token": "</s>",
39
+ "model_max_length": 4096,
40
+ "pad_token": "<unk>",
41
+ "sp_model_kwargs": {},
42
+ "tokenizer_class": "BaichuanTokenizer",
43
+ "unk_token": "<unk>"
44
+ }
user-baichuan2-13b-v2-3.6/checkpoint-300/trainer_state.json ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.6217616580310881,
5
+ "eval_steps": 500,
6
+ "global_step": 300,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 4.99941873550415,
14
+ "learning_rate": 2e-05,
15
+ "loss": 9.9329,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 1.741065502166748,
21
+ "learning_rate": 4e-05,
22
+ "loss": 11.0746,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 1.4727320671081543,
28
+ "learning_rate": 6e-05,
29
+ "loss": 2.7159,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 0.1335960477590561,
35
+ "learning_rate": 8e-05,
36
+ "loss": 0.3969,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 0.0014472692273557186,
42
+ "learning_rate": 0.0001,
43
+ "loss": 0.0032,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 0.0010780546581372619,
49
+ "learning_rate": 0.0001,
50
+ "loss": 0.0002,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.15,
55
+ "grad_norm": 1.03132963180542,
56
+ "learning_rate": 0.0001,
57
+ "loss": 0.0002,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.17,
62
+ "grad_norm": 0.008827299810945988,
63
+ "learning_rate": 0.0001,
64
+ "loss": 0.0,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.19,
69
+ "grad_norm": 0.0002956670359708369,
70
+ "learning_rate": 0.0001,
71
+ "loss": 0.0001,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.21,
76
+ "grad_norm": 0.0003419867134653032,
77
+ "learning_rate": 0.0001,
78
+ "loss": 0.0,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.23,
83
+ "grad_norm": 0.0003681881644297391,
84
+ "learning_rate": 0.0001,
85
+ "loss": 0.0,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.25,
90
+ "grad_norm": 0.0002884200366679579,
91
+ "learning_rate": 0.0001,
92
+ "loss": 0.0,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.27,
97
+ "grad_norm": 0.00011985149467363954,
98
+ "learning_rate": 0.0001,
99
+ "loss": 0.0,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.29,
104
+ "grad_norm": 0.0003195986500941217,
105
+ "learning_rate": 0.0001,
106
+ "loss": 0.0,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.31,
111
+ "grad_norm": 0.00010149635636480525,
112
+ "learning_rate": 0.0001,
113
+ "loss": 0.0,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.33,
118
+ "grad_norm": 0.00010508792183827609,
119
+ "learning_rate": 0.0001,
120
+ "loss": 0.0,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 0.35,
125
+ "grad_norm": 0.00011793687008321285,
126
+ "learning_rate": 0.0001,
127
+ "loss": 0.006,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 0.37,
132
+ "grad_norm": 8.076676749624312e-05,
133
+ "learning_rate": 0.0001,
134
+ "loss": 0.0,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 0.39,
139
+ "grad_norm": 0.0007808339432813227,
140
+ "learning_rate": 0.0001,
141
+ "loss": 0.006,
142
+ "step": 190
143
+ },
144
+ {
145
+ "epoch": 0.41,
146
+ "grad_norm": 0.11711683869361877,
147
+ "learning_rate": 0.0001,
148
+ "loss": 0.003,
149
+ "step": 200
150
+ },
151
+ {
152
+ "epoch": 0.44,
153
+ "grad_norm": 0.0002039404644165188,
154
+ "learning_rate": 0.0001,
155
+ "loss": 0.0001,
156
+ "step": 210
157
+ },
158
+ {
159
+ "epoch": 0.46,
160
+ "grad_norm": 0.00873592495918274,
161
+ "learning_rate": 0.0001,
162
+ "loss": 0.0209,
163
+ "step": 220
164
+ },
165
+ {
166
+ "epoch": 0.48,
167
+ "grad_norm": 3.0506539344787598,
168
+ "learning_rate": 0.0001,
169
+ "loss": 0.0201,
170
+ "step": 230
171
+ },
172
+ {
173
+ "epoch": 0.5,
174
+ "grad_norm": 0.05903371796011925,
175
+ "learning_rate": 0.0001,
176
+ "loss": 0.0026,
177
+ "step": 240
178
+ },
179
+ {
180
+ "epoch": 0.52,
181
+ "grad_norm": 0.0002484666183590889,
182
+ "learning_rate": 0.0001,
183
+ "loss": 0.0001,
184
+ "step": 250
185
+ },
186
+ {
187
+ "epoch": 0.54,
188
+ "grad_norm": 0.0003493047261144966,
189
+ "learning_rate": 0.0001,
190
+ "loss": 0.0001,
191
+ "step": 260
192
+ },
193
+ {
194
+ "epoch": 0.56,
195
+ "grad_norm": 0.0008058947860263288,
196
+ "learning_rate": 0.0001,
197
+ "loss": 0.0001,
198
+ "step": 270
199
+ },
200
+ {
201
+ "epoch": 0.58,
202
+ "grad_norm": 0.0004198936221655458,
203
+ "learning_rate": 0.0001,
204
+ "loss": 0.0001,
205
+ "step": 280
206
+ },
207
+ {
208
+ "epoch": 0.6,
209
+ "grad_norm": 0.0002983050071634352,
210
+ "learning_rate": 0.0001,
211
+ "loss": 0.0001,
212
+ "step": 290
213
+ },
214
+ {
215
+ "epoch": 0.62,
216
+ "grad_norm": 0.0002279053587699309,
217
+ "learning_rate": 0.0001,
218
+ "loss": 0.0001,
219
+ "step": 300
220
+ }
221
+ ],
222
+ "logging_steps": 10,
223
+ "max_steps": 482,
224
+ "num_input_tokens_seen": 0,
225
+ "num_train_epochs": 1,
226
+ "save_steps": 100,
227
+ "total_flos": 5.004587702980301e+17,
228
+ "train_batch_size": 1,
229
+ "trial_name": null,
230
+ "trial_params": null
231
+ }
user-baichuan2-13b-v2-3.6/checkpoint-300/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1abab34fc571ab2be46c8abdf765b96b9a09ab4144528e95270f1af465c0f19c
3
+ size 4475
user-baichuan2-13b-v2-3.6/checkpoint-400/README.md ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+
7
+ The following `bitsandbytes` quantization config was used during training:
8
+ - quant_method: bitsandbytes
9
+ - _load_in_8bit: False
10
+ - _load_in_4bit: True
11
+ - llm_int8_threshold: 6.0
12
+ - llm_int8_skip_modules: None
13
+ - llm_int8_enable_fp32_cpu_offload: False
14
+ - llm_int8_has_fp16_weight: False
15
+ - bnb_4bit_quant_type: nf4
16
+ - bnb_4bit_use_double_quant: True
17
+ - bnb_4bit_compute_dtype: float16
18
+ - load_in_4bit: True
19
+ - load_in_8bit: False
20
+ ### Framework versions
21
+
22
+
23
+ - PEFT 0.4.0
user-baichuan2-13b-v2-3.6/checkpoint-400/adapter_config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "/home/jiakangxiang/.cache/modelscope/hub/baichuan-inc/Baichuan2-13B-Chat",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 16,
11
+ "lora_dropout": 0.05,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 16,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "o_proj",
18
+ "W_pack",
19
+ "down_proj",
20
+ "up_proj",
21
+ "gate_proj"
22
+ ],
23
+ "task_type": "CAUSAL_LM"
24
+ }
user-baichuan2-13b-v2-3.6/checkpoint-400/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a761a3ff3848cad96ebcb4d93d82af85ae86c2703ec031d10338d35d93aff15
3
+ size 223203704
user-baichuan2-13b-v2-3.6/checkpoint-400/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b55a4a50d2c66907dc3b95e3f979aac9b320a415e3a978859a220e686a76d9a
3
+ size 446541893
user-baichuan2-13b-v2-3.6/checkpoint-400/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54a11743bad40439e1bbf592fb8f66b1a3c8dbde2539b8897aec5e85c29fcc1c
3
+ size 14575
user-baichuan2-13b-v2-3.6/checkpoint-400/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6536a67a17be52e1d2b7b314f3abff272bd0f976aca319628b666d64bd161a64
3
+ size 627
user-baichuan2-13b-v2-3.6/checkpoint-400/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": true
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": true
15
+ },
16
+ "pad_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": true
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": true
29
+ }
30
+ }
user-baichuan2-13b-v2-3.6/checkpoint-400/tokenization_baichuan.py ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023, Baichuan Intelligent Technology. All rights reserved.
2
+
3
+ import os
4
+ from shutil import copyfile
5
+ from typing import Any, Dict, List, Optional, Tuple
6
+
7
+ import sentencepiece as spm
8
+ from transformers.tokenization_utils import AddedToken, PreTrainedTokenizer
9
+ from transformers.utils import logging
10
+
11
+
12
+ logger = logging.get_logger(__name__)
13
+
14
+ VOCAB_FILES_NAMES = {"vocab_file": "tokenizer.model"}
15
+
16
+ PRETRAINED_VOCAB_FILES_MAP = {
17
+ "vocab_file": {},
18
+ "tokenizer_file": {},
19
+ }
20
+ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {}
21
+
22
+
23
+ class BaichuanTokenizer(PreTrainedTokenizer):
24
+ """
25
+ Construct a Baichuan tokenizer. Based on byte-level Byte-Pair-Encoding.
26
+
27
+ Args:
28
+ vocab_file (`str`):
29
+ Path to the vocabulary file.
30
+ """
31
+
32
+ vocab_files_names = VOCAB_FILES_NAMES
33
+ pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
34
+ max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
35
+ model_input_names = ["input_ids", "attention_mask"]
36
+
37
+ def __init__(
38
+ self,
39
+ vocab_file,
40
+ unk_token="<unk>",
41
+ bos_token="<s>",
42
+ eos_token="</s>",
43
+ pad_token=None,
44
+ sp_model_kwargs: Optional[Dict[str, Any]] = None,
45
+ add_bos_token=True,
46
+ add_eos_token=False,
47
+ clean_up_tokenization_spaces=False,
48
+ **kwargs,
49
+ ):
50
+ self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
51
+ bos_token = (
52
+ AddedToken(bos_token, lstrip=False, rstrip=False)
53
+ if isinstance(bos_token, str)
54
+ else bos_token
55
+ )
56
+ eos_token = (
57
+ AddedToken(eos_token, lstrip=False, rstrip=False)
58
+ if isinstance(eos_token, str)
59
+ else eos_token
60
+ )
61
+ unk_token = (
62
+ AddedToken(unk_token, lstrip=False, rstrip=False)
63
+ if isinstance(unk_token, str)
64
+ else unk_token
65
+ )
66
+ pad_token = (
67
+ AddedToken(pad_token, lstrip=False, rstrip=False)
68
+ if isinstance(pad_token, str)
69
+ else pad_token
70
+ )
71
+ self.vocab_file = vocab_file
72
+ self.add_bos_token = add_bos_token
73
+ self.add_eos_token = add_eos_token
74
+ self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
75
+ self.sp_model.Load(vocab_file)
76
+ super().__init__(
77
+ bos_token=bos_token,
78
+ eos_token=eos_token,
79
+ unk_token=unk_token,
80
+ pad_token=pad_token,
81
+ add_bos_token=add_bos_token,
82
+ add_eos_token=add_eos_token,
83
+ sp_model_kwargs=self.sp_model_kwargs,
84
+ clean_up_tokenization_spaces=clean_up_tokenization_spaces,
85
+ **kwargs,
86
+ )
87
+
88
+ def __getstate__(self):
89
+ state = self.__dict__.copy()
90
+ state["sp_model"] = None
91
+ return state
92
+
93
+ def __setstate__(self, d):
94
+ self.__dict__ = d
95
+ self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
96
+ self.sp_model.Load(self.vocab_file)
97
+
98
+ @property
99
+ def vocab_size(self):
100
+ """Returns vocab size"""
101
+ return self.sp_model.get_piece_size()
102
+
103
+ def get_vocab(self):
104
+ """Returns vocab as a dict"""
105
+ vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)}
106
+ vocab.update(self.added_tokens_encoder)
107
+ return vocab
108
+
109
+ def _tokenize(self, text):
110
+ """Returns a tokenized string."""
111
+ return self.sp_model.encode(text, out_type=str)
112
+
113
+ def _convert_token_to_id(self, token):
114
+ """Converts a token (str) in an id using the vocab."""
115
+ return self.sp_model.piece_to_id(token)
116
+
117
+ def _convert_id_to_token(self, index):
118
+ """Converts an index (integer) in a token (str) using the vocab."""
119
+ token = self.sp_model.IdToPiece(index)
120
+ return token
121
+
122
+ def convert_tokens_to_string(self, tokens):
123
+ """Converts a sequence of tokens (string) in a single string."""
124
+ current_sub_tokens = []
125
+ out_string = ""
126
+ prev_is_special = False
127
+ for i, token in enumerate(tokens):
128
+ # make sure that special tokens are not decoded using sentencepiece model
129
+ if token in self.all_special_tokens:
130
+ if not prev_is_special and i != 0:
131
+ out_string += " "
132
+ out_string += self.sp_model.decode(current_sub_tokens) + token
133
+ prev_is_special = True
134
+ current_sub_tokens = []
135
+ else:
136
+ current_sub_tokens.append(token)
137
+ prev_is_special = False
138
+ out_string += self.sp_model.decode(current_sub_tokens)
139
+ return out_string
140
+
141
+ def save_vocabulary(
142
+ self, save_directory, filename_prefix: Optional[str] = None
143
+ ) -> Tuple[str]:
144
+ """
145
+ Save the vocabulary and special tokens file to a directory.
146
+
147
+ Args:
148
+ save_directory (`str`):
149
+ The directory in which to save the vocabulary.
150
+
151
+ Returns:
152
+ `Tuple(str)`: Paths to the files saved.
153
+ """
154
+ if not os.path.isdir(save_directory):
155
+ logger.error(f"Vocabulary path ({save_directory}) should be a directory")
156
+ return
157
+ out_vocab_file = os.path.join(
158
+ save_directory,
159
+ (filename_prefix + "-" if filename_prefix else "")
160
+ + VOCAB_FILES_NAMES["vocab_file"],
161
+ )
162
+
163
+ if os.path.abspath(self.vocab_file) != os.path.abspath(
164
+ out_vocab_file
165
+ ) and os.path.isfile(self.vocab_file):
166
+ copyfile(self.vocab_file, out_vocab_file)
167
+ elif not os.path.isfile(self.vocab_file):
168
+ with open(out_vocab_file, "wb") as fi:
169
+ content_spiece_model = self.sp_model.serialized_model_proto()
170
+ fi.write(content_spiece_model)
171
+
172
+ return (out_vocab_file,)
173
+
174
+ def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
175
+ bos_token_id = [self.bos_token_id] if self.add_bos_token else []
176
+ eos_token_id = [self.eos_token_id] if self.add_eos_token else []
177
+
178
+ output = bos_token_id + token_ids_0 + eos_token_id
179
+
180
+ if token_ids_1 is not None:
181
+ output = output + bos_token_id + token_ids_1 + eos_token_id
182
+
183
+ return output
184
+
185
+ def get_special_tokens_mask(
186
+ self,
187
+ token_ids_0: List[int],
188
+ token_ids_1: Optional[List[int]] = None,
189
+ already_has_special_tokens: bool = False,
190
+ ) -> List[int]:
191
+ """
192
+ Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
193
+ special tokens using the tokenizer `prepare_for_model` method.
194
+
195
+ Args:
196
+ token_ids_0 (`List[int]`):
197
+ List of IDs.
198
+ token_ids_1 (`List[int]`, *optional*):
199
+ Optional second list of IDs for sequence pairs.
200
+ already_has_special_tokens (`bool`, *optional*, defaults to `False`):
201
+ Whether or not the token list is already formatted with special tokens for the model.
202
+
203
+ Returns:
204
+ `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
205
+ """
206
+ if already_has_special_tokens:
207
+ return super().get_special_tokens_mask(
208
+ token_ids_0=token_ids_0,
209
+ token_ids_1=token_ids_1,
210
+ already_has_special_tokens=True,
211
+ )
212
+
213
+ bos_token_id = [1] if self.add_bos_token else []
214
+ eos_token_id = [1] if self.add_eos_token else []
215
+
216
+ if token_ids_1 is None:
217
+ return bos_token_id + ([0] * len(token_ids_0)) + eos_token_id
218
+ return (
219
+ bos_token_id
220
+ + ([0] * len(token_ids_0))
221
+ + eos_token_id
222
+ + bos_token_id
223
+ + ([0] * len(token_ids_1))
224
+ + eos_token_id
225
+ )
226
+
227
+ def create_token_type_ids_from_sequences(
228
+ self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
229
+ ) -> List[int]:
230
+ """
231
+ Creates a mask from the two sequences passed to be used in a sequence-pair classification task. An ALBERT
232
+ sequence pair mask has the following format:
233
+
234
+ ```
235
+ 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
236
+ | first sequence | second sequence |
237
+ ```
238
+
239
+ if token_ids_1 is None, only returns the first portion of the mask (0s).
240
+
241
+ Args:
242
+ token_ids_0 (`List[int]`):
243
+ List of ids.
244
+ token_ids_1 (`List[int]`, *optional*):
245
+ Optional second list of IDs for sequence pairs.
246
+
247
+ Returns:
248
+ `List[int]`: List of [token type IDs](../glossary#token-type-ids) according to the given sequence(s).
249
+ """
250
+ bos_token_id = [self.bos_token_id] if self.add_bos_token else []
251
+ eos_token_id = [self.eos_token_id] if self.add_eos_token else []
252
+
253
+ output = [0] * len(bos_token_id + token_ids_0 + eos_token_id)
254
+
255
+ if token_ids_1 is not None:
256
+ output += [1] * len(bos_token_id + token_ids_1 + eos_token_id)
257
+
258
+ return output
user-baichuan2-13b-v2-3.6/checkpoint-400/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79452955be6b419a65984273a9f08af86042e1c2a75ee3ba989cbf620a133cc2
3
+ size 2001107
user-baichuan2-13b-v2-3.6/checkpoint-400/tokenizer_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": true,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": true,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": true,
25
+ "rstrip": false,
26
+ "single_word": true,
27
+ "special": true
28
+ }
29
+ },
30
+ "auto_map": {
31
+ "AutoTokenizer": [
32
+ "tokenization_baichuan.BaichuanTokenizer",
33
+ null
34
+ ]
35
+ },
36
+ "bos_token": "<s>",
37
+ "clean_up_tokenization_spaces": false,
38
+ "eos_token": "</s>",
39
+ "model_max_length": 4096,
40
+ "pad_token": "<unk>",
41
+ "sp_model_kwargs": {},
42
+ "tokenizer_class": "BaichuanTokenizer",
43
+ "unk_token": "<unk>"
44
+ }
user-baichuan2-13b-v2-3.6/checkpoint-400/trainer_state.json ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.8290155440414507,
5
+ "eval_steps": 500,
6
+ "global_step": 400,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 4.99941873550415,
14
+ "learning_rate": 2e-05,
15
+ "loss": 9.9329,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 1.741065502166748,
21
+ "learning_rate": 4e-05,
22
+ "loss": 11.0746,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 1.4727320671081543,
28
+ "learning_rate": 6e-05,
29
+ "loss": 2.7159,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 0.1335960477590561,
35
+ "learning_rate": 8e-05,
36
+ "loss": 0.3969,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 0.0014472692273557186,
42
+ "learning_rate": 0.0001,
43
+ "loss": 0.0032,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 0.0010780546581372619,
49
+ "learning_rate": 0.0001,
50
+ "loss": 0.0002,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.15,
55
+ "grad_norm": 1.03132963180542,
56
+ "learning_rate": 0.0001,
57
+ "loss": 0.0002,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.17,
62
+ "grad_norm": 0.008827299810945988,
63
+ "learning_rate": 0.0001,
64
+ "loss": 0.0,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.19,
69
+ "grad_norm": 0.0002956670359708369,
70
+ "learning_rate": 0.0001,
71
+ "loss": 0.0001,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.21,
76
+ "grad_norm": 0.0003419867134653032,
77
+ "learning_rate": 0.0001,
78
+ "loss": 0.0,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.23,
83
+ "grad_norm": 0.0003681881644297391,
84
+ "learning_rate": 0.0001,
85
+ "loss": 0.0,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.25,
90
+ "grad_norm": 0.0002884200366679579,
91
+ "learning_rate": 0.0001,
92
+ "loss": 0.0,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.27,
97
+ "grad_norm": 0.00011985149467363954,
98
+ "learning_rate": 0.0001,
99
+ "loss": 0.0,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.29,
104
+ "grad_norm": 0.0003195986500941217,
105
+ "learning_rate": 0.0001,
106
+ "loss": 0.0,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.31,
111
+ "grad_norm": 0.00010149635636480525,
112
+ "learning_rate": 0.0001,
113
+ "loss": 0.0,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.33,
118
+ "grad_norm": 0.00010508792183827609,
119
+ "learning_rate": 0.0001,
120
+ "loss": 0.0,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 0.35,
125
+ "grad_norm": 0.00011793687008321285,
126
+ "learning_rate": 0.0001,
127
+ "loss": 0.006,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 0.37,
132
+ "grad_norm": 8.076676749624312e-05,
133
+ "learning_rate": 0.0001,
134
+ "loss": 0.0,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 0.39,
139
+ "grad_norm": 0.0007808339432813227,
140
+ "learning_rate": 0.0001,
141
+ "loss": 0.006,
142
+ "step": 190
143
+ },
144
+ {
145
+ "epoch": 0.41,
146
+ "grad_norm": 0.11711683869361877,
147
+ "learning_rate": 0.0001,
148
+ "loss": 0.003,
149
+ "step": 200
150
+ },
151
+ {
152
+ "epoch": 0.44,
153
+ "grad_norm": 0.0002039404644165188,
154
+ "learning_rate": 0.0001,
155
+ "loss": 0.0001,
156
+ "step": 210
157
+ },
158
+ {
159
+ "epoch": 0.46,
160
+ "grad_norm": 0.00873592495918274,
161
+ "learning_rate": 0.0001,
162
+ "loss": 0.0209,
163
+ "step": 220
164
+ },
165
+ {
166
+ "epoch": 0.48,
167
+ "grad_norm": 3.0506539344787598,
168
+ "learning_rate": 0.0001,
169
+ "loss": 0.0201,
170
+ "step": 230
171
+ },
172
+ {
173
+ "epoch": 0.5,
174
+ "grad_norm": 0.05903371796011925,
175
+ "learning_rate": 0.0001,
176
+ "loss": 0.0026,
177
+ "step": 240
178
+ },
179
+ {
180
+ "epoch": 0.52,
181
+ "grad_norm": 0.0002484666183590889,
182
+ "learning_rate": 0.0001,
183
+ "loss": 0.0001,
184
+ "step": 250
185
+ },
186
+ {
187
+ "epoch": 0.54,
188
+ "grad_norm": 0.0003493047261144966,
189
+ "learning_rate": 0.0001,
190
+ "loss": 0.0001,
191
+ "step": 260
192
+ },
193
+ {
194
+ "epoch": 0.56,
195
+ "grad_norm": 0.0008058947860263288,
196
+ "learning_rate": 0.0001,
197
+ "loss": 0.0001,
198
+ "step": 270
199
+ },
200
+ {
201
+ "epoch": 0.58,
202
+ "grad_norm": 0.0004198936221655458,
203
+ "learning_rate": 0.0001,
204
+ "loss": 0.0001,
205
+ "step": 280
206
+ },
207
+ {
208
+ "epoch": 0.6,
209
+ "grad_norm": 0.0002983050071634352,
210
+ "learning_rate": 0.0001,
211
+ "loss": 0.0001,
212
+ "step": 290
213
+ },
214
+ {
215
+ "epoch": 0.62,
216
+ "grad_norm": 0.0002279053587699309,
217
+ "learning_rate": 0.0001,
218
+ "loss": 0.0001,
219
+ "step": 300
220
+ },
221
+ {
222
+ "epoch": 0.64,
223
+ "grad_norm": 0.00015332824841607362,
224
+ "learning_rate": 0.0001,
225
+ "loss": 0.0,
226
+ "step": 310
227
+ },
228
+ {
229
+ "epoch": 0.66,
230
+ "grad_norm": 0.00011723622446879745,
231
+ "learning_rate": 0.0001,
232
+ "loss": 0.0,
233
+ "step": 320
234
+ },
235
+ {
236
+ "epoch": 0.68,
237
+ "grad_norm": 0.0001235378731507808,
238
+ "learning_rate": 0.0001,
239
+ "loss": 0.0,
240
+ "step": 330
241
+ },
242
+ {
243
+ "epoch": 0.7,
244
+ "grad_norm": 0.00010625163122313097,
245
+ "learning_rate": 0.0001,
246
+ "loss": 0.0001,
247
+ "step": 340
248
+ },
249
+ {
250
+ "epoch": 0.73,
251
+ "grad_norm": 7.50239341869019e-05,
252
+ "learning_rate": 0.0001,
253
+ "loss": 0.0,
254
+ "step": 350
255
+ },
256
+ {
257
+ "epoch": 0.75,
258
+ "grad_norm": 0.00010148331784876063,
259
+ "learning_rate": 0.0001,
260
+ "loss": 0.0,
261
+ "step": 360
262
+ },
263
+ {
264
+ "epoch": 0.77,
265
+ "grad_norm": 7.368126534856856e-05,
266
+ "learning_rate": 0.0001,
267
+ "loss": 0.0,
268
+ "step": 370
269
+ },
270
+ {
271
+ "epoch": 0.79,
272
+ "grad_norm": 0.00012744461128022522,
273
+ "learning_rate": 0.0001,
274
+ "loss": 0.0,
275
+ "step": 380
276
+ },
277
+ {
278
+ "epoch": 0.81,
279
+ "grad_norm": 5.87971335335169e-05,
280
+ "learning_rate": 0.0001,
281
+ "loss": 0.0,
282
+ "step": 390
283
+ },
284
+ {
285
+ "epoch": 0.83,
286
+ "grad_norm": 6.405858584912494e-05,
287
+ "learning_rate": 0.0001,
288
+ "loss": 0.0001,
289
+ "step": 400
290
+ }
291
+ ],
292
+ "logging_steps": 10,
293
+ "max_steps": 482,
294
+ "num_input_tokens_seen": 0,
295
+ "num_train_epochs": 1,
296
+ "save_steps": 100,
297
+ "total_flos": 6.625291354656461e+17,
298
+ "train_batch_size": 1,
299
+ "trial_name": null,
300
+ "trial_params": null
301
+ }
user-baichuan2-13b-v2-3.6/checkpoint-400/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1abab34fc571ab2be46c8abdf765b96b9a09ab4144528e95270f1af465c0f19c
3
+ size 4475
user-baichuan2-13b-v2-3.6/runs/Mar06_16-02-53_u/events.out.tfevents.1709741241.u.349083.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3014282355673d8fdcaa570d4c5296d240c5dd75bf87a842af9ec4b76a4e9116
3
+ size 5162
user-baichuan2-13b-v2-3.6/runs/Mar06_16-15-19_u/events.out.tfevents.1709741991.u.349593.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9058c4ffc24a76a37dcc37c3d6b8fc0c8aa81301fb6b58fe0305a01b7081d8f2
3
+ size 5162
user-baichuan2-13b-v2-3.6/runs/Mar06_16-27-57_u/events.out.tfevents.1709742755.u.350734.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a35a2b472b2864493ef9958235b77f8c4c7af27b4b88b478d703f99b4f10f718
3
+ size 5162
user-baichuan2-13b-v2-3.6/runs/Mar06_16-37-25_u/events.out.tfevents.1709743386.u.351776.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c388e6f60bbcf55c3e512b8af87941e03d9a7648390ab77bfe714ffdc48c7bac
3
+ size 5162
user-baichuan2-13b-v2-3.6/runs/Mar06_16-46-23_u/events.out.tfevents.1709743925.u.352180.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a087c12a11786c7f7b12c642ee0a6c4098d3859d1e2df9abf3e218f2e3365ec
3
+ size 5162
user-baichuan2-13b-v2-3.6/runs/Mar06_16-55-14_u/events.out.tfevents.1709744402.u.352650.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0aca9a5ecc84224df331de22088bfa3fe20a25c6903fd402a0be863a9566699d
3
+ size 5162
user-baichuan2-13b-v2-3.6/runs/Mar06_17-03-22_u/events.out.tfevents.1709744890.u.353116.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfea604250d4aac14bc3ed7c865337977033589c98558e199805f39e062fa8d8
3
+ size 5162
user-baichuan2-13b-v2-3.6/runs/Mar06_17-13-29_u/events.out.tfevents.1709745516.u.353684.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94b3e44490b8dd88a5a161c7a6b9a835026ef7a8218a9fb171acaa8737033db7
3
+ size 5162
user-baichuan2-13b-v2-3.6/runs/Mar06_17-30-51_u/events.out.tfevents.1709746552.u.354572.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2a7c82c63e3cb38c34975b02f2f0e377684b0dfa72aa1dfcfaad954da7c93e2
3
+ size 5162
user-baichuan2-13b-v2-3.6/runs/Mar06_17-42-56_u/events.out.tfevents.1709747302.u.355650.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf7786e2daf0473ba392d547dc007aaa018e30f7c97ce3bca26d196f68798142
3
+ size 5164