clowman commited on
Commit
ad5cc76
1 Parent(s): 51ca443

Train 5 epochs on mistral formatted data

Browse files
config.json CHANGED
@@ -1,9 +1,8 @@
1
  {
2
  "_name_or_path": "imone/Mistral_7B_with_EOT_token",
3
  "architectures": [
4
- "LlamaForCausalLM"
5
  ],
6
- "attention_bias": false,
7
  "attention_dropout": 0.0,
8
  "bos_token_id": 1,
9
  "eos_token_id": 32000,
@@ -12,13 +11,11 @@
12
  "initializer_range": 0.02,
13
  "intermediate_size": 14336,
14
  "max_position_embeddings": 8192,
15
- "model_type": "llama",
16
  "num_attention_heads": 32,
17
  "num_hidden_layers": 32,
18
  "num_key_value_heads": 8,
19
- "pretraining_tp": 1,
20
  "rms_norm_eps": 1e-05,
21
- "rope_scaling": null,
22
  "rope_theta": 10000.0,
23
  "sliding_window": 4096,
24
  "tie_word_embeddings": false,
 
1
  {
2
  "_name_or_path": "imone/Mistral_7B_with_EOT_token",
3
  "architectures": [
4
+ "MistralForCausalLM"
5
  ],
 
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 1,
8
  "eos_token_id": 32000,
 
11
  "initializer_range": 0.02,
12
  "intermediate_size": 14336,
13
  "max_position_embeddings": 8192,
14
+ "model_type": "mistral",
15
  "num_attention_heads": 32,
16
  "num_hidden_layers": 32,
17
  "num_key_value_heads": 8,
 
18
  "rms_norm_eps": 1e-05,
 
19
  "rope_theta": 10000.0,
20
  "sliding_window": 4096,
21
  "tie_word_embeddings": false,
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:888f2931f401ea6f5eda9bf2d4d1433dd1eefa8579b0a07b192863f54fc0e141
3
- size 4943178728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1122c9732f0b84b9ca3ac69038cbbd40a27d479c8fc734d04870d0656851c772
3
+ size 4943178720
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9ca2b2200d9195216d05f7142c3824cf7c6a9d9562e525badb79ff6385c28fb
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9a3b45bdebf30ffcd44cb9eca12f2a1f4ea8731da36caa0fed1de449af62e44
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:01e461b9ada5cb3839ad3d3fbc68e321fadfeaf0cfa3d9d924a4ba8f7fc5f7fe
3
  size 4540532728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e342e33392e1657134f1a131291bd5e7851acdc8e489173fcaa32ece9883ceb9
3
  size 4540532728
model.safetensors.index.json CHANGED
@@ -24,9 +24,9 @@
24
  "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
25
  "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
26
  "model.layers.10.input_layernorm.weight": "model-00002-of-00003.safetensors",
27
- "model.layers.10.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
28
  "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
29
- "model.layers.10.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
30
  "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
31
  "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
32
  "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
 
24
  "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
25
  "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
26
  "model.layers.10.input_layernorm.weight": "model-00002-of-00003.safetensors",
27
+ "model.layers.10.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
28
  "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
29
+ "model.layers.10.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
30
  "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
31
  "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
32
  "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
openchat.json CHANGED
@@ -1 +1 @@
1
- {"local_rank": 0, "model_path": "imone/Mistral_7B_with_EOT_token", "data_prefix": "./data/", "save_path": "./checkpoints/mistral-7b/", "save_every": 1, "batch_max_len": 77824, "epochs": 10, "lr": 0.00010565752123252648, "lr_min_ratio": 0.1, "lr_warmup_ratio": 0.05, "weight_decay": 0.1, "beta1": 0.9, "beta2": 0.95, "eps": 1e-05, "deepspeed": true, "deepspeed_config": "deepspeed_config.json", "deepscale": false, "deepscale_config": null, "model_type": "openchat_v3.2", "device": "<non-serializable>", "epoch": 9}
 
1
+ {"local_rank": 0, "model_path": "imone/Mistral_7B_with_EOT_token", "data_prefix": "./data/mistral/", "save_path": "./checkpoints/mistral-7b/", "save_every": 1, "batch_max_len": 77824, "epochs": 5, "lr": 1.7583295075430884e-05, "lr_min_ratio": 0.1, "lr_warmup_ratio": 0.05, "weight_decay": 0.1, "beta1": 0.9, "beta2": 0.95, "eps": 1e-05, "deepspeed": true, "deepspeed_config": "deepspeed_config.json", "deepscale": false, "deepscale_config": null, "model_type": "openchat_v3.2_mistral", "device": "<non-serializable>", "epoch": 4}