MNC-LLM commited on
Commit
2215056
1 Parent(s): 6c7a5bf

Model save

Browse files
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- base_model: MNC-LLM/Mistral-7B-NWS-u2k-eng-cot-ep4-lr1e-05
3
  tags:
4
  - generated_from_trainer
5
  model-index:
@@ -12,7 +12,7 @@ should probably proofread and complete it, then remove this comment. -->
12
 
13
  # batch1_epochs4_lr1e-05_paged_adamw_32bit_cosine_length2048_warmup_0.05_max_grad1.0_grad_accu16
14
 
15
- This model is a fine-tuned version of [MNC-LLM/Mistral-7B-NWS-u2k-eng-cot-ep4-lr1e-05](https://huggingface.co/MNC-LLM/Mistral-7B-NWS-u2k-eng-cot-ep4-lr1e-05) on the None dataset.
16
 
17
  ## Model description
18
 
 
1
  ---
2
+ base_model: allenai/tulu-2-dpo-7b
3
  tags:
4
  - generated_from_trainer
5
  model-index:
 
12
 
13
  # batch1_epochs4_lr1e-05_paged_adamw_32bit_cosine_length2048_warmup_0.05_max_grad1.0_grad_accu16
14
 
15
+ This model is a fine-tuned version of [allenai/tulu-2-dpo-7b](https://huggingface.co/allenai/tulu-2-dpo-7b) on the None dataset.
16
 
17
  ## Model description
18
 
pytorch_model-00001-of-00002.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7b8fc3789fea6baf58f5d72da2e19647d7414a396d0ebf00dcecbd982d5b073
3
- size 9943030860
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ef37ff4d66c97c6bb73bb5521e597806c657f411f38b1c52bd239ed5404ca2b
3
+ size 9976623130
pytorch_model-00002-of-00002.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf2d671c0dae4495933e3a0483a303d0a97a033151bc76d0cc622fcd3961efe2
3
- size 4540536863
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd764682257415e54346b4259f6e67bb17cd77ec924e8d07ce3755975bc54998
3
+ size 3500311811
pytorch_model.bin.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 14483464192
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "pytorch_model-00002-of-00002.bin",
@@ -140,24 +140,24 @@
140
  "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
141
  "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
142
  "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
143
- "model.layers.22.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
144
- "model.layers.22.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
145
- "model.layers.22.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
146
- "model.layers.22.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
147
- "model.layers.22.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
148
  "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
149
  "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
150
  "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
151
  "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
152
- "model.layers.23.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
153
- "model.layers.23.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
154
- "model.layers.23.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
155
- "model.layers.23.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
156
- "model.layers.23.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
157
- "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
158
- "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
159
- "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
160
- "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
161
  "model.layers.24.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
162
  "model.layers.24.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
163
  "model.layers.24.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
 
1
  {
2
  "metadata": {
3
+ "total_size": 13476831232
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "pytorch_model-00002-of-00002.bin",
 
140
  "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
141
  "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
142
  "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
143
+ "model.layers.22.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
144
+ "model.layers.22.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
145
+ "model.layers.22.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
146
+ "model.layers.22.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
147
+ "model.layers.22.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
148
  "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
149
  "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
150
  "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
151
  "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
152
+ "model.layers.23.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
153
+ "model.layers.23.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
154
+ "model.layers.23.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
155
+ "model.layers.23.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
156
+ "model.layers.23.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
157
+ "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
158
+ "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
159
+ "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
160
+ "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
161
  "model.layers.24.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
162
  "model.layers.24.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
163
  "model.layers.24.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",