add the 70k checkpoint
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- 70000/checkpoint_metadata.json +9 -0
- 70000/config.yaml +139 -0
- 70000/lr_scheduler/lr_scheduler.pt +3 -0
- 70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors +3 -0
- 70000/model/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
- 70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors +3 -0
- 70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors +3 -0
- 70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors +3 -0
- 70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors +3 -0
- 70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors +3 -0
- 70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors +3 -0
- 70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors +3 -0
- 70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors +3 -0
- 70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors +3 -0
- 70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors +3 -0
- 70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors +3 -0
- 70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors +3 -0
- 70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors +3 -0
70000/checkpoint_metadata.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dp": 8,
|
3 |
+
"metas": {
|
4 |
+
"consumed_train_samples": 35840000,
|
5 |
+
"last_train_step": 70000
|
6 |
+
},
|
7 |
+
"tp": 8,
|
8 |
+
"version": "1.2"
|
9 |
+
}
|
70000/config.yaml
ADDED
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
checkpoints:
|
2 |
+
checkpoint_interval: 5000
|
3 |
+
checkpoints_path: /fsx/phuc/checkpoints/doremi/big-run-02/reference-2.8b-llama-tuned-weights_with_100k_proxy
|
4 |
+
checkpoints_path_is_shared_file_system: true
|
5 |
+
resume_checkpoint_path: null
|
6 |
+
save_initial_state: false
|
7 |
+
data:
|
8 |
+
dataset:
|
9 |
+
dataset_overwrite_cache: false
|
10 |
+
dataset_processing_num_proc_per_process: 1
|
11 |
+
hf_dataset_config_name: null
|
12 |
+
hf_dataset_or_datasets: /fsx/phuc/project_data/doremi/datasets/the_pile_raw/tokenized_data/train
|
13 |
+
hf_dataset_splits: train
|
14 |
+
text_column_name: text
|
15 |
+
num_loading_workers: 1
|
16 |
+
seed: 42
|
17 |
+
doremi:
|
18 |
+
domain_names:
|
19 |
+
- Pile-CC
|
20 |
+
- Github
|
21 |
+
- OpenWebText2
|
22 |
+
- StackExchange
|
23 |
+
- Wikipedia (en)
|
24 |
+
- PubMed Abstracts
|
25 |
+
- USPTO Backgrounds
|
26 |
+
- FreeLaw
|
27 |
+
- PubMed Central
|
28 |
+
- Enron Emails
|
29 |
+
- HackerNews
|
30 |
+
- NIH ExPorter
|
31 |
+
- Books3
|
32 |
+
- ArXiv
|
33 |
+
- DM Mathematics
|
34 |
+
- OpenSubtitles
|
35 |
+
- Gutenberg (PG-19)
|
36 |
+
- Ubuntu IRC
|
37 |
+
- BookCorpus2
|
38 |
+
- EuroParl
|
39 |
+
- YoutubeSubtitles
|
40 |
+
- PhilPapers
|
41 |
+
domain_weights:
|
42 |
+
- 0.2333
|
43 |
+
- 0.07
|
44 |
+
- 0.1154
|
45 |
+
- 0.0528
|
46 |
+
- 0.0665
|
47 |
+
- 0.067
|
48 |
+
- 0.0366
|
49 |
+
- 0.0571
|
50 |
+
- 0.0451
|
51 |
+
- 0.0036
|
52 |
+
- 0.0087
|
53 |
+
- 0.0078
|
54 |
+
- 0.0708
|
55 |
+
- 0.0656
|
56 |
+
- 0.0034
|
57 |
+
- 0.0048
|
58 |
+
- 0.0222
|
59 |
+
- 0.0084
|
60 |
+
- 0.0038
|
61 |
+
- 0.0186
|
62 |
+
- 0.0149
|
63 |
+
- 0.0235
|
64 |
+
ref_model_checkpoint_path: null
|
65 |
+
ref_model_resume_checkpoint_path: null
|
66 |
+
general:
|
67 |
+
benchmark_csv_path: null
|
68 |
+
consumed_train_samples: 35840000
|
69 |
+
ignore_sanity_checks: true
|
70 |
+
project: nanotron
|
71 |
+
run: train_tuned_2.8b_model
|
72 |
+
seed: 42
|
73 |
+
step: 70000
|
74 |
+
logging:
|
75 |
+
iteration_step_info_interval: 1
|
76 |
+
log_level: info
|
77 |
+
log_level_replica: info
|
78 |
+
model:
|
79 |
+
ddp_bucket_cap_mb: 120
|
80 |
+
dtype: bfloat16
|
81 |
+
init_method:
|
82 |
+
std: 0.025
|
83 |
+
make_vocab_size_divisible_by: 1
|
84 |
+
model_config:
|
85 |
+
bos_token_id: 1
|
86 |
+
eos_token_id: 2
|
87 |
+
hidden_act: silu
|
88 |
+
hidden_size: 4096
|
89 |
+
initializer_range: 0.02
|
90 |
+
intermediate_size: 24576
|
91 |
+
is_llama_config: true
|
92 |
+
max_position_embeddings: 1024
|
93 |
+
num_attention_heads: 32
|
94 |
+
num_hidden_layers: 6
|
95 |
+
num_key_value_heads: 16
|
96 |
+
pad_token_id: null
|
97 |
+
pretraining_tp: 1
|
98 |
+
rms_norm_eps: 1.0e-05
|
99 |
+
rope_scaling: null
|
100 |
+
tie_word_embeddings: true
|
101 |
+
use_cache: true
|
102 |
+
vocab_size: 49152
|
103 |
+
optimizer:
|
104 |
+
accumulate_grad_in_fp32: true
|
105 |
+
adam_beta1: 0.9
|
106 |
+
adam_beta2: 0.95
|
107 |
+
adam_eps: 1.0e-08
|
108 |
+
clip_grad: 1.0
|
109 |
+
learning_rate_scheduler:
|
110 |
+
learning_rate: 0.0003
|
111 |
+
lr_decay_steps: 8
|
112 |
+
lr_decay_style: cosine
|
113 |
+
lr_warmup_steps: 2
|
114 |
+
lr_warmup_style: linear
|
115 |
+
min_decay_lr: 1.0e-05
|
116 |
+
torch_adam_is_fused: true
|
117 |
+
weight_decay: 0.01
|
118 |
+
zero_stage: 0
|
119 |
+
parallelism:
|
120 |
+
dp: 8
|
121 |
+
pp: 1
|
122 |
+
pp_engine: 1f1b
|
123 |
+
recompute_granularity: SELECTIVE
|
124 |
+
tp: 8
|
125 |
+
tp_linear_async_communication: true
|
126 |
+
tp_mode: REDUCE_SCATTER
|
127 |
+
profiler: null
|
128 |
+
tokenizer:
|
129 |
+
tokenizer_max_length: null
|
130 |
+
tokenizer_name_or_path: gpt2
|
131 |
+
tokenizer_revision: null
|
132 |
+
tokens:
|
133 |
+
batch_accumulation_per_replica: 1
|
134 |
+
limit_test_batches: 0
|
135 |
+
limit_val_batches: 8
|
136 |
+
micro_batch_size: 64
|
137 |
+
sequence_length: 1024
|
138 |
+
train_steps: 70000
|
139 |
+
val_check_interval: -1
|
70000/lr_scheduler/lr_scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f44f89c4642a0011361e62b7717a7243492b6f41d8aa83936b9c4e75cdab7cf4
|
3 |
+
size 1012
|
70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b924abc2c694bab96dd718980d2fc41374bb6a98c2a2a7ef033ce2084a80d34a
|
3 |
+
size 4194536
|
70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:befcaf368c5f7006f3f85a23ebe7ac2b8383f0e2dcb54f514485e450a803011a
|
3 |
+
size 4194544
|
70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:634a0e7e3a8f6e98e7bfbd8bdb9e09663e6155177fe47a5b8dae9741aee93b56
|
3 |
+
size 4194544
|
70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c42cbc6fda656a9efb9254ff1fb4b6aac1b1f53d9ffd08632df492a83e0578c7
|
3 |
+
size 4194544
|
70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:571e457e7da01bab5afb26c12f45d31383b2b6b45d7930defd16c444b9d20705
|
3 |
+
size 4194544
|
70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9aec92ba7b1146b18237c31eba3ccd2704ce4c9b877e9e98310cf6b7c8c2daa6
|
3 |
+
size 4194544
|
70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84491e45c7d80cd47f542836ee82aec17a0f332d0a0bde0a3785aa20b2fe2ca3
|
3 |
+
size 4194544
|
70000/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d86ef426782938777a098a005c96184ad66e88b5559b32946096db74daeeec93
|
3 |
+
size 4194544
|
70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b5080436a11827555f2464b505bd293a4b5f6edefd3512b10581a67fa4fb7735
|
3 |
+
size 8388960
|
70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9df3affd4284d543fe4ef9242de56e222a3bd61417b67b1c3437c2ddb471b0d
|
3 |
+
size 8388960
|
70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f4e1a2f0195db7bb0875718ad3aefc0e7fb36cc81d42e65c309aefee6d1b0d6b
|
3 |
+
size 8388960
|
70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff413c42cbaa4def157792ac531370732e092e372cbccbb98f99138b48275c01
|
3 |
+
size 8388960
|
70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f81d26513bf6ddd9a53ba9c7c925dfb53d47f36596426ff7402b1160a90a6e86
|
3 |
+
size 8388960
|
70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f0840250899782f7a0f5f246773ae560dc3bb0004d779dd30cdee0e133634da1
|
3 |
+
size 8388960
|
70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d6e73a67f24e9dd5c03ffc7549fbdbc809a70a8f60d4636ca691a7fa1901efb
|
3 |
+
size 8388960
|
70000/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:860fd9e1811888cf0dc8d38c1296edef0ae33fffe775a7f964dc7dd4f529db37
|
3 |
+
size 8388960
|
70000/model/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:77aa2d98fa003f17cacd3e302f52690baf3dc7d1d51a1bcadc5c4b69b9765d8e
|
3 |
+
size 8288
|
70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:388b1e2644d45b43de282701f8fd15b4bdc9657ccf6680d6021e511c66b57a5e
|
3 |
+
size 25166064
|
70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:539857dae12a301c64f1cdcee5b9e755a3adf02b5f2e124e9dc3b2f9e248048e
|
3 |
+
size 25166064
|
70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:536398da36df23920039973a73d140566043cc53ee2a0d914a8844372a64aff9
|
3 |
+
size 25166064
|
70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d14cc8eddd75e465d397973a37b0782cb4904becec6aaf91a4367678873677c
|
3 |
+
size 25166064
|
70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0ce4f3f6650309f13f3e6d544de1e20e355a6f08806eef191cfd501c8ff2acbc
|
3 |
+
size 25166064
|
70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3a7fd52544d2ce19fd2ae3abd5446bf450e9dfee065f42e0f181afb2e5413a86
|
3 |
+
size 25166064
|
70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b7bfd2dcd8cf9b2c47812d070d52dfe7de4e44087e51ca1e1d579f2f5f334260
|
3 |
+
size 25166064
|
70000/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64b15490e5736a45fc3703acb705c86fb53bf08913287d1dc7d64b223537e01a
|
3 |
+
size 25166064
|
70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8dcbe0259c3f13e64c37658cc5bed072e1b14d1c1fd7bd920a7737f5df08d69a
|
3 |
+
size 50331944
|
70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:12c42c59c00b0a680a2da7797fe742a81dc60b471dc8e98dc735110c7e5a2ad2
|
3 |
+
size 50331952
|
70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b0b40fd72f645c955728741df102ce92daa359c789b8bc35a7c009c49ce80396
|
3 |
+
size 50331952
|
70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:347f3971a29037808915a992d49172b4f8aafe5142ed8517ef74cacab96238ca
|
3 |
+
size 50331952
|
70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d72a03fe0dfbe9c614e25c2a339979ad70cdb71ab3d8292c146bccabf9e0afb6
|
3 |
+
size 50331952
|
70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ec84771ce31f81f1ec995a024d49b497bba9e2024aeac5faadc8ffedf4458c7
|
3 |
+
size 50331952
|
70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:23f4fc83195f52847195a215dae1e010ca868000095092e64f69825ad050ef79
|
3 |
+
size 50331952
|
70000/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c53af868894ef04ad4253fbf68c121f01599134ce660e637b8a5e3f883334413
|
3 |
+
size 50331952
|
70000/model/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5cfef55c64df70d52293e020ab6059676279dee0eb6ec8b70021c2004340b1e8
|
3 |
+
size 8288
|
70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6844d86e43c3eeeb2f837a178d03f1d4ee2000fca9030a908828fbd259198f8c
|
3 |
+
size 4194536
|
70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf465add9144127313ccf1daed2830c00e80971ef8801b35f1a40c5432c800bd
|
3 |
+
size 4194544
|
70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:241fae3f5d988a9ed1547709a7bfbdb80adb33f62fc19465ebdb646996cc689e
|
3 |
+
size 4194544
|
70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc7f9f5371026139f947dfbfb09a8f81bf04d0cf6851aa863edbd6846f240c29
|
3 |
+
size 4194544
|
70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:04d9cddd3c55dec8bf5d342e89a9704aa996387f3b7cf015152a7379498b3879
|
3 |
+
size 4194544
|
70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-5-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:077701a3bda055d412b631689fd87725d2a46a5d3b2cbff7ef86d01c81374018
|
3 |
+
size 4194544
|
70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-6-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c7806c296fe37fe4b2e5f83a948a6a7dace14e112b3ed5493804f9cfc2438010
|
3 |
+
size 4194544
|
70000/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-7-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dda24af303369e36c8aadf2f141f24e8286f63212cc4be01b58eeea199137347
|
3 |
+
size 4194544
|
70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a2ed20f3337c7ba5f80b3edc443d296071f232178daf676a330b0c420332f2f0
|
3 |
+
size 8388960
|
70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-1-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d0b8192b996f3fb5b193e441fabc579468aac681a98385d81b84bb1d0c48c57b
|
3 |
+
size 8388960
|
70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-2-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:88a8b9b3e76e271956e023d8d16968883902b927e1ea476ea0fe25a539b55ec5
|
3 |
+
size 8388960
|
70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-3-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:806c1d4682253e4d0d7da1cbfe787dc50b60732f14f26aaefba8a5eddf088adf
|
3 |
+
size 8388960
|
70000/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-4-of-8.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:60b6c0c58e3321fe8f82ed19d2a1529e652949a61570ef3c931af659920291c8
|
3 |
+
size 8388960
|