binoy370sk commited on
Commit
ad27dfb
·
verified ·
1 Parent(s): f64c140

Upload 9 files

Browse files
configs/410M-og-pythia-gptj-chatbbot.yml ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+
3
+ "train_data_paths": ["/workspace/harshit_data/ChatbotData/bin_data/train/bin__text_document"],
4
+ "valid_data_paths": ["/workspace/harshit_data/ChatbotData/bin_data/val/bin__text_document"],
5
+ "test_data_paths": ["/workspace/harshit_data/ChatbotData/bin_data/test/bin__text_document"],
6
+
7
+ "tokenizer_type": "HFTokenizer",
8
+ "vocab_file": "/workspace/harshit_data/ChatbotData/harshit_chatbot_tokenizer.json",
9
+
10
+
11
+ "save": "checkpoints/410M_og_pythia_gpt_j_chatbot",
12
+ "load": "checkpoints/410M_og_pythia_gpt_j_chatbot",
13
+ "tensorboard_dir": "tensorboard/410M_og_pythia_gpt_j_chatbot",
14
+ "log_dir": "logs/410M_og_pythia_gpt_j_chatbot",
15
+
16
+
17
+ "use_wandb": False,
18
+ # parallelism settings
19
+ "pipe_parallel_size": 1,
20
+ "model_parallel_size": 4,
21
+
22
+ # model settings
23
+ "num_layers": 24,
24
+ "hidden_size": 1024,
25
+ "num_attention_heads": 16,
26
+ "seq_length": 2048,
27
+ "max_position_embeddings": 2048,
28
+ "pos_emb": "rotary",
29
+ "rotary_pct": 0.25,
30
+ "no_weight_tying": true,
31
+ "gpt_j_residual": true,
32
+ "output_layer_parallelism": "column",
33
+
34
+ "attention_config": [[["flash"], 24]],
35
+
36
+ "scaled_upper_triang_masked_softmax_fusion": true,
37
+ "bias_gelu_fusion": true,
38
+
39
+ # init methods
40
+ "init_method": "small_init",
41
+ "output_layer_init_method": "wang_init",
42
+
43
+ "optimizer": {
44
+ "type": "Adam",
45
+ "params": {
46
+ "lr": 0.0003,
47
+ "betas": [0.9, 0.95],
48
+ "eps": 1.0e-8
49
+ }
50
+ },
51
+ "min_lr": 0.00003,
52
+
53
+ "zero_optimization": {
54
+ "stage": 1,
55
+ "allgather_partitions": true,
56
+ "allgather_bucket_size": 500000000,
57
+ "overlap_comm": true,
58
+ "reduce_scatter": true,
59
+ "reduce_bucket_size": 500000000,
60
+ "contiguous_gradients": true,
61
+ "cpu_offload": false
62
+ },
63
+
64
+ # batch size (trained on 32 gpus)
65
+ "train_micro_batch_size_per_gpu": 32,
66
+ "data_impl": "mmap",
67
+ "num_workers": 1,
68
+
69
+ # activation checkpointing
70
+ "checkpoint_activations": true,
71
+ "checkpoint_num_layers": 1,
72
+ "partition_activations": true,
73
+ "synchronize_each_layer": true,
74
+
75
+ # regularization
76
+ "gradient_clipping": 1.0,
77
+ "weight_decay": 0.1,
78
+ "hidden_dropout": 0,
79
+ "attention_dropout": 0,
80
+
81
+ # precision settings
82
+ "fp16": {
83
+ "fp16": true,
84
+ "enabled": true,
85
+ "loss_scale": 0,
86
+ "loss_scale_window": 1000,
87
+ "initial_scale_power": 12,
88
+ "hysteresis": 2,
89
+ "min_loss_scale": 1,
90
+ },
91
+
92
+ # misc. training settings
93
+ "train_iters": 100000,
94
+ "lr_decay_iters": 100000,
95
+ "distributed_backend": "nccl",
96
+ "lr_decay_style": "cosine",
97
+ "warmup": 0.00,
98
+ "checkpoint_factor": 5000,
99
+ "eval_interval": 5000,
100
+ "eval_iters": 10,
101
+ "do_test": true,
102
+ "extra_save_iters": [10,100,500,1000],
103
+
104
+ # logging
105
+ "log_interval": 100,
106
+ "steps_per_print": 10,
107
+ "keep_last_n_checkpoints": 10,
108
+ "wall_clock_breakdown": true,
109
+
110
+
111
+
112
+ }
mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6c4291dc827a50a8a20ea845c81172368ec66542d9596251ad615b8a59ee098
3
+ size 184500188
mp_rank_01_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d6acb47d996e5d795cb02143bfcbf7a381d7ae064c38c6599301e66190723de
3
+ size 184500188
mp_rank_02_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:491c8dcb7d42ea18dc50b0329b4951877d38fdf0125ad60c6c1d5063f2543ae5
3
+ size 184500188
mp_rank_03_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a17f3481845d6c342773c3dd294a5ffd3a9f6d13e63b71dcaf5cd2fd86ef2be8
3
+ size 184500188
zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c884e2ec8fa83a4d45115d02f11f704a6c6dae93984f6e2ebeb38244642054c5
3
+ size 1106487998
zero_pp_rank_0_mp_rank_01_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcf22cacc45cd8882f734587e6cd4284861f5aa8bf41aee17bda3e6b795a855c
3
+ size 1106487998
zero_pp_rank_0_mp_rank_02_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e22abaddcb2b1b279db5880059c2f94da84d6ef9c0234fc013985826cc42dda
3
+ size 1106487998
zero_pp_rank_0_mp_rank_03_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21c84c0951d327c81fc46967a55bc9662286ee027a6637af72cf279dfb32d2c4
3
+ size 1106487998