diff --git a/.gitattributes b/.gitattributes deleted file mode 100644 index be0424d9c1da1d5ebed05a465b6bf7e422b1fa72..0000000000000000000000000000000000000000 --- a/.gitattributes +++ /dev/null @@ -1,41 +0,0 @@ -*.7z filter=lfs diff=lfs merge=lfs -text -*.arrow filter=lfs diff=lfs merge=lfs -text -*.bin filter=lfs diff=lfs merge=lfs -text -*.bz2 filter=lfs diff=lfs merge=lfs -text -*.ftz filter=lfs diff=lfs merge=lfs -text -*.gz filter=lfs diff=lfs merge=lfs -text -*.h5 filter=lfs diff=lfs merge=lfs -text -*.joblib filter=lfs diff=lfs merge=lfs -text -*.lfs.* filter=lfs diff=lfs merge=lfs -text -*.mlmodel filter=lfs diff=lfs merge=lfs -text -*.model filter=lfs diff=lfs merge=lfs -text -*.msgpack filter=lfs diff=lfs merge=lfs -text -*.npy filter=lfs diff=lfs merge=lfs -text -*.npz filter=lfs diff=lfs merge=lfs -text -*.onnx filter=lfs diff=lfs merge=lfs -text -*.ot filter=lfs diff=lfs merge=lfs -text -*.parquet filter=lfs diff=lfs merge=lfs -text -*.pb filter=lfs diff=lfs merge=lfs -text -*.pickle filter=lfs diff=lfs merge=lfs -text -*.pkl filter=lfs diff=lfs merge=lfs -text -*.pt filter=lfs diff=lfs merge=lfs -text -*.pth filter=lfs diff=lfs merge=lfs -text -*.rar filter=lfs diff=lfs merge=lfs -text -saved_model/**/* filter=lfs diff=lfs merge=lfs -text -*.tar.* filter=lfs diff=lfs merge=lfs -text -*.tflite filter=lfs diff=lfs merge=lfs -text -*.tgz filter=lfs diff=lfs merge=lfs -text -*.wasm filter=lfs diff=lfs merge=lfs -text -*.xz filter=lfs diff=lfs merge=lfs -text -*.zip filter=lfs diff=lfs merge=lfs -text -*.zst filter=lfs diff=lfs merge=lfs -text -*tfevents* filter=lfs diff=lfs merge=lfs -text -checkpoint-10000/tokenizer.json filter=lfs diff=lfs merge=lfs -text -checkpoint-12500/tokenizer.json filter=lfs diff=lfs merge=lfs -text -checkpoint-15000/tokenizer.json filter=lfs diff=lfs merge=lfs -text -checkpoint-20000/tokenizer.json filter=lfs diff=lfs merge=lfs -text -checkpoint-2500/tokenizer.json filter=lfs diff=lfs merge=lfs -text -checkpoint-25000/tokenizer.json filter=lfs diff=lfs merge=lfs -text -checkpoint-5000/tokenizer.json filter=lfs diff=lfs merge=lfs -text -checkpoint-7500/tokenizer.json filter=lfs diff=lfs merge=lfs -text -tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 92a48a4e2c09fdf099a6b79ea5725f4c44bf5bda..0000000000000000000000000000000000000000 --- a/.gitignore +++ /dev/null @@ -1,23 +0,0 @@ -*/pilot_*/ -pilot_*/ -checkpoint-*/ -*/pilot_*/ -pilot_*/ -checkpoint-*/ -*/pilot_*/ -pilot_*/ -checkpoint-*/ -*/pilot_*/ -pilot_*/ -checkpoint-*/ -*/pilot_*/ -pilot_*/ -checkpoint-*/ -*/pilot_*/ -pilot_*/ -checkpoint-*/ -*/pilot_*/ -pilot_*/ -checkpoint-*/ -*/pilot_*/ -pilot_*/ diff --git a/all_results.json b/all_results.json deleted file mode 100644 index 4145b3d3ef673f7afed721e01c4ac4fa05eda6f2..0000000000000000000000000000000000000000 --- a/all_results.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "epoch": 19.86, - "eval_loss": 3.520382881164551, - "eval_runtime": 518.4337, - "eval_samples": 4906, - "eval_samples_per_second": 9.463, - "eval_steps_per_second": 4.732, - "perplexity": 33.79736636184415, - "train_loss": 3.6973518359375, - "train_runtime": 53135.1163, - "train_samples": 10000, - "train_samples_per_second": 3.764, - "train_steps_per_second": 0.47 -} \ No newline at end of file diff --git a/checkpoint-10000/config.json b/checkpoint-10000/config.json deleted file mode 100644 index 01102e03e4779d2c43cb80a51c8459e256eba4ce..0000000000000000000000000000000000000000 --- a/checkpoint-10000/config.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_name_or_path": "bigscience/bloom-350m", - "adapters": { - "adapters": {}, - "config_map": {}, - "fusion_config_map": {}, - "fusions": {} - }, - "apply_residual_connection_post_layernorm": false, - "architectures": [ - "BloomForCausalLM" - ], - "attention_dropout": 0.0, - "attention_softmax_in_fp32": true, - "bias_dropout_fusion": true, - "bos_token_id": 1, - "eos_token_id": 2, - "hidden_dropout": 0.0, - "hidden_size": 1024, - "initializer_range": 0.02, - "layer_norm_epsilon": 1e-05, - "masked_softmax_fusion": true, - "model_type": "bloom", - "n_head": 16, - "n_inner": null, - "n_layer": 24, - "offset_alibi": 100, - "pad_token_id": 3, - "pretraining_tp": 1, - "seq_length": 2048, - "skip_bias_add": true, - "skip_bias_add_qkv": false, - "slow_but_exact": false, - "torch_dtype": "float32", - "transformers_version": "4.20.0.dev0", - "unk_token_id": 0, - "use_cache": true, - "vocab_size": 250880 -} diff --git a/checkpoint-10000/optimizer.pt b/checkpoint-10000/optimizer.pt deleted file mode 100644 index 0747125f263afa45742a1c4d3b5e0cfe9c48ab0b..0000000000000000000000000000000000000000 --- a/checkpoint-10000/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cf7f64bb919c1b75c964f253c50125ccf1e1684c0c9979785ab5f879599cba19 -size 2254269 diff --git a/checkpoint-10000/pytorch_model.bin b/checkpoint-10000/pytorch_model.bin deleted file mode 100644 index 343fb03eac16f1229009a1af50f21dbfef09bee8..0000000000000000000000000000000000000000 --- a/checkpoint-10000/pytorch_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fe3c7b0341df858cc1beabbbceb6eed9e315110b3e54ad2cb52d600ec787b536 -size 2236955191 diff --git a/checkpoint-10000/rng_state.pth b/checkpoint-10000/rng_state.pth deleted file mode 100644 index 74e5a9440ebca137b01696d05621b0f1a46a0418..0000000000000000000000000000000000000000 --- a/checkpoint-10000/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ff0b7d688f72cecc3d6db8a5558dfafab2669c75076ea7f5e8d0318cba901963 -size 14503 diff --git a/checkpoint-10000/scheduler.pt b/checkpoint-10000/scheduler.pt deleted file mode 100644 index 0278cf8d1585ae197bad514a213741000bcf63c6..0000000000000000000000000000000000000000 --- a/checkpoint-10000/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ebae5cf74f470a9dc57b090feb9de29d57aa2d381061d1a61fd32b3c3221556b -size 623 diff --git a/checkpoint-10000/special_tokens_map.json b/checkpoint-10000/special_tokens_map.json deleted file mode 100644 index 25bc39604f72700b3b8e10bd69bb2f227157edd1..0000000000000000000000000000000000000000 --- a/checkpoint-10000/special_tokens_map.json +++ /dev/null @@ -1 +0,0 @@ -{"bos_token": "", "eos_token": "", "unk_token": "", "pad_token": ""} \ No newline at end of file diff --git a/checkpoint-10000/tokenizer.json b/checkpoint-10000/tokenizer.json deleted file mode 100644 index a4fa803e0e0e614382d27635cc90df38f96f41f0..0000000000000000000000000000000000000000 --- a/checkpoint-10000/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8f6efc66e73f1fd69da4f436e48befb519fdff3fe18910850c1d41bd862293a5 -size 14500443 diff --git a/checkpoint-10000/tokenizer_config.json b/checkpoint-10000/tokenizer_config.json deleted file mode 100644 index 149b7da28b1ab9fe6defc5878e01cdb16f445e11..0000000000000000000000000000000000000000 --- a/checkpoint-10000/tokenizer_config.json +++ /dev/null @@ -1 +0,0 @@ -{"unk_token": "", "bos_token": "", "eos_token": "", "pad_token": "", "add_prefix_space": false, "name_or_path": "bigscience/bloom-350m", "special_tokens_map_file": null, "padding_side": "left", "tokenizer_class": "BloomTokenizer"} \ No newline at end of file diff --git a/checkpoint-10000/trainer_state.json b/checkpoint-10000/trainer_state.json deleted file mode 100644 index 0f7bb83f4a4128fa53e9d637f184894dd61268ed..0000000000000000000000000000000000000000 --- a/checkpoint-10000/trainer_state.json +++ /dev/null @@ -1,56 +0,0 @@ -{ - "best_metric": 3.64797043800354, - "best_model_checkpoint": "/users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_10000samples_-1vocab_original-frozen/checkpoint-10000", - "epoch": 7.942250446517166, - "global_step": 10000, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 1.99, - "learning_rate": 9e-05, - "loss": 4.249, - "step": 2500 - }, - { - "epoch": 3.97, - "learning_rate": 8e-05, - "loss": 3.9172, - "step": 5000 - }, - { - "epoch": 3.97, - "eval_loss": 3.8250739574432373, - "eval_runtime": 519.4242, - "eval_samples_per_second": 9.445, - "eval_steps_per_second": 4.723, - "step": 5000 - }, - { - "epoch": 5.96, - "learning_rate": 7e-05, - "loss": 3.7702, - "step": 7500 - }, - { - "epoch": 7.94, - "learning_rate": 6e-05, - "loss": 3.6827, - "step": 10000 - }, - { - "epoch": 7.94, - "eval_loss": 3.64797043800354, - "eval_runtime": 519.0865, - "eval_samples_per_second": 9.451, - "eval_steps_per_second": 4.726, - "step": 10000 - } - ], - "max_steps": 25000, - "num_train_epochs": 20, - "total_flos": 1.4865812724645888e+17, - "trial_name": null, - "trial_params": null -} diff --git a/checkpoint-10000/training_args.bin b/checkpoint-10000/training_args.bin deleted file mode 100644 index ac02d917397c1472d05a48c4ea5f1aa01e63810c..0000000000000000000000000000000000000000 --- a/checkpoint-10000/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0cd06de07bc00b172f616a37bcac81b46cefe15d7481c457bed5477be8eb5945 -size 3375 diff --git a/checkpoint-10000/wikiann-az-results.txt b/checkpoint-10000/wikiann-az-results.txt deleted file mode 100644 index 69acae7dcb4c7167f1474d29c1cdc0b230e684ac..0000000000000000000000000000000000000000 --- a/checkpoint-10000/wikiann-az-results.txt +++ /dev/null @@ -1,8 +0,0 @@ -================================================== -Results -================================================== -Model: /users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_10000samples_-1vocab_original-frozen/checkpoint-10000 -[0.3831710709318498, 0.36646621860629647, 0.37618879887284257, 0.36916695471828553, 0.39875173370319006, 0.3918592618144188, 0.3883292810003473, 0.3762723762723763, 0.37241379310344824, 0.37391001046389954] -37.97 -1.00 -================================================== \ No newline at end of file diff --git a/checkpoint-12500/config.json b/checkpoint-12500/config.json deleted file mode 100644 index 01102e03e4779d2c43cb80a51c8459e256eba4ce..0000000000000000000000000000000000000000 --- a/checkpoint-12500/config.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_name_or_path": "bigscience/bloom-350m", - "adapters": { - "adapters": {}, - "config_map": {}, - "fusion_config_map": {}, - "fusions": {} - }, - "apply_residual_connection_post_layernorm": false, - "architectures": [ - "BloomForCausalLM" - ], - "attention_dropout": 0.0, - "attention_softmax_in_fp32": true, - "bias_dropout_fusion": true, - "bos_token_id": 1, - "eos_token_id": 2, - "hidden_dropout": 0.0, - "hidden_size": 1024, - "initializer_range": 0.02, - "layer_norm_epsilon": 1e-05, - "masked_softmax_fusion": true, - "model_type": "bloom", - "n_head": 16, - "n_inner": null, - "n_layer": 24, - "offset_alibi": 100, - "pad_token_id": 3, - "pretraining_tp": 1, - "seq_length": 2048, - "skip_bias_add": true, - "skip_bias_add_qkv": false, - "slow_but_exact": false, - "torch_dtype": "float32", - "transformers_version": "4.20.0.dev0", - "unk_token_id": 0, - "use_cache": true, - "vocab_size": 250880 -} diff --git a/checkpoint-12500/optimizer.pt b/checkpoint-12500/optimizer.pt deleted file mode 100644 index 8ca81fd4fc8d53616522b561c2a32b8e58583520..0000000000000000000000000000000000000000 --- a/checkpoint-12500/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1ba0462ba8ad328e587ce96f04f4c5e6f69394ff3582a4828bf6d85660bfe06b -size 2254269 diff --git a/checkpoint-12500/pytorch_model.bin b/checkpoint-12500/pytorch_model.bin deleted file mode 100644 index af46535ed6b9bb2bc1b24c7dcabc2afe95ff5307..0000000000000000000000000000000000000000 --- a/checkpoint-12500/pytorch_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ee64dfd5244f8dc74878e78f209466dc68d8ea2c528c3719121c12794dc9ec09 -size 2236955191 diff --git a/checkpoint-12500/rng_state.pth b/checkpoint-12500/rng_state.pth deleted file mode 100644 index 2645397a74a6b6a3bffa474dcf496600b7c24935..0000000000000000000000000000000000000000 --- a/checkpoint-12500/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0217759d118441b4ad07c71fd1be8c09ef99c4c1b9ca5ace0ae209a8872fb667 -size 14503 diff --git a/checkpoint-12500/scheduler.pt b/checkpoint-12500/scheduler.pt deleted file mode 100644 index 20fe61e31c8ad93792e2966e7a5aadf8fdfeb769..0000000000000000000000000000000000000000 --- a/checkpoint-12500/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d270c6e3000cbdb534f7db7e774ca17393c2523690c8058754d752dd5b11a93a -size 623 diff --git a/checkpoint-12500/special_tokens_map.json b/checkpoint-12500/special_tokens_map.json deleted file mode 100644 index 25bc39604f72700b3b8e10bd69bb2f227157edd1..0000000000000000000000000000000000000000 --- a/checkpoint-12500/special_tokens_map.json +++ /dev/null @@ -1 +0,0 @@ -{"bos_token": "", "eos_token": "", "unk_token": "", "pad_token": ""} \ No newline at end of file diff --git a/checkpoint-12500/tokenizer.json b/checkpoint-12500/tokenizer.json deleted file mode 100644 index a4fa803e0e0e614382d27635cc90df38f96f41f0..0000000000000000000000000000000000000000 --- a/checkpoint-12500/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8f6efc66e73f1fd69da4f436e48befb519fdff3fe18910850c1d41bd862293a5 -size 14500443 diff --git a/checkpoint-12500/tokenizer_config.json b/checkpoint-12500/tokenizer_config.json deleted file mode 100644 index 149b7da28b1ab9fe6defc5878e01cdb16f445e11..0000000000000000000000000000000000000000 --- a/checkpoint-12500/tokenizer_config.json +++ /dev/null @@ -1 +0,0 @@ -{"unk_token": "", "bos_token": "", "eos_token": "", "pad_token": "", "add_prefix_space": false, "name_or_path": "bigscience/bloom-350m", "special_tokens_map_file": null, "padding_side": "left", "tokenizer_class": "BloomTokenizer"} \ No newline at end of file diff --git a/checkpoint-12500/trainer_state.json b/checkpoint-12500/trainer_state.json deleted file mode 100644 index 53f4f862b59c9206bffcfb6e22ce21cdd0786de2..0000000000000000000000000000000000000000 --- a/checkpoint-12500/trainer_state.json +++ /dev/null @@ -1,86 +0,0 @@ -{ - "best_metric": 3.1985085010528564, - "best_model_checkpoint": "/users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_10000samples_-1vocab_original-frozen/checkpoint-12500", - "epoch": 9.927961897201826, - "global_step": 12500, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 1.99, - "learning_rate": 0.0008, - "loss": 3.6925, - "step": 2500 - }, - { - "epoch": 1.99, - "eval_loss": 3.425896406173706, - "eval_runtime": 510.13, - "eval_samples_per_second": 9.617, - "eval_steps_per_second": 4.809, - "step": 2500 - }, - { - "epoch": 3.97, - "learning_rate": 0.0006, - "loss": 3.3514, - "step": 5000 - }, - { - "epoch": 3.97, - "eval_loss": 3.297409772872925, - "eval_runtime": 509.9866, - "eval_samples_per_second": 9.62, - "eval_steps_per_second": 4.81, - "step": 5000 - }, - { - "epoch": 5.96, - "learning_rate": 0.0004, - "loss": 3.2632, - "step": 7500 - }, - { - "epoch": 5.96, - "eval_loss": 3.2419724464416504, - "eval_runtime": 509.6021, - "eval_samples_per_second": 9.627, - "eval_steps_per_second": 4.814, - "step": 7500 - }, - { - "epoch": 7.94, - "learning_rate": 0.0002, - "loss": 3.2189, - "step": 10000 - }, - { - "epoch": 7.94, - "eval_loss": 3.210970163345337, - "eval_runtime": 509.6579, - "eval_samples_per_second": 9.626, - "eval_steps_per_second": 4.813, - "step": 10000 - }, - { - "epoch": 9.93, - "learning_rate": 0.0, - "loss": 3.1953, - "step": 12500 - }, - { - "epoch": 9.93, - "eval_loss": 3.1985085010528564, - "eval_runtime": 509.537, - "eval_samples_per_second": 9.628, - "eval_steps_per_second": 4.814, - "step": 12500 - } - ], - "max_steps": 12500, - "num_train_epochs": 10, - "total_flos": 1.8582498082553856e+17, - "trial_name": null, - "trial_params": null -} diff --git a/checkpoint-12500/training_args.bin b/checkpoint-12500/training_args.bin deleted file mode 100644 index 691ea3fe0403fa3bc75e8b21ff0aeb01fb29c7ed..0000000000000000000000000000000000000000 --- a/checkpoint-12500/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b5a0101fd8f6900559ae4c013c2312163c0f239c35b8f9e14dabf6d0adafea84 -size 3375 diff --git a/checkpoint-12500/wikiann-az-results.txt b/checkpoint-12500/wikiann-az-results.txt deleted file mode 100644 index b2692f433d17351067f158783e837ab491f7a5e9..0000000000000000000000000000000000000000 --- a/checkpoint-12500/wikiann-az-results.txt +++ /dev/null @@ -1,8 +0,0 @@ -================================================== -Results -================================================== -Model: /users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_10000samples_-1vocab_original-frozen/checkpoint-12500 -[0.3931564245810055, 0.3662171753016323, 0.37123627346794197, 0.37279944770452195, 0.3948646773074254, 0.3877904960110995, 0.37873523280055593, 0.3800491745697225, 0.3798690106859704, 0.37486910994764405] -38.00 -0.90 -================================================== \ No newline at end of file diff --git a/checkpoint-15000/config.json b/checkpoint-15000/config.json deleted file mode 100644 index 01102e03e4779d2c43cb80a51c8459e256eba4ce..0000000000000000000000000000000000000000 --- a/checkpoint-15000/config.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_name_or_path": "bigscience/bloom-350m", - "adapters": { - "adapters": {}, - "config_map": {}, - "fusion_config_map": {}, - "fusions": {} - }, - "apply_residual_connection_post_layernorm": false, - "architectures": [ - "BloomForCausalLM" - ], - "attention_dropout": 0.0, - "attention_softmax_in_fp32": true, - "bias_dropout_fusion": true, - "bos_token_id": 1, - "eos_token_id": 2, - "hidden_dropout": 0.0, - "hidden_size": 1024, - "initializer_range": 0.02, - "layer_norm_epsilon": 1e-05, - "masked_softmax_fusion": true, - "model_type": "bloom", - "n_head": 16, - "n_inner": null, - "n_layer": 24, - "offset_alibi": 100, - "pad_token_id": 3, - "pretraining_tp": 1, - "seq_length": 2048, - "skip_bias_add": true, - "skip_bias_add_qkv": false, - "slow_but_exact": false, - "torch_dtype": "float32", - "transformers_version": "4.20.0.dev0", - "unk_token_id": 0, - "use_cache": true, - "vocab_size": 250880 -} diff --git a/checkpoint-15000/optimizer.pt b/checkpoint-15000/optimizer.pt deleted file mode 100644 index fcb11720ad766f4d24107c982df605c7a4e3b7b0..0000000000000000000000000000000000000000 --- a/checkpoint-15000/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:93338749e626564421d833643e2dd16d8515d99d53a300869d23436882e97cf2 -size 2254269 diff --git a/checkpoint-15000/pytorch_model.bin b/checkpoint-15000/pytorch_model.bin deleted file mode 100644 index aa89130d7d36ee909de07cb8b6d116906dcd7701..0000000000000000000000000000000000000000 --- a/checkpoint-15000/pytorch_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8e3f96f3abaf7a0f9af4115907c15df4c5565ed49672ae92cf29d31269fb6e7f -size 2236955191 diff --git a/checkpoint-15000/rng_state.pth b/checkpoint-15000/rng_state.pth deleted file mode 100644 index 2645397a74a6b6a3bffa474dcf496600b7c24935..0000000000000000000000000000000000000000 --- a/checkpoint-15000/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0217759d118441b4ad07c71fd1be8c09ef99c4c1b9ca5ace0ae209a8872fb667 -size 14503 diff --git a/checkpoint-15000/scheduler.pt b/checkpoint-15000/scheduler.pt deleted file mode 100644 index 8d3d39ad4b2183a90165fa1731ec456fda1ee5c9..0000000000000000000000000000000000000000 --- a/checkpoint-15000/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:301727affc0c0a4c1f25106f7fd12c059ede0526ba52733c25be949ad3bc04d7 -size 623 diff --git a/checkpoint-15000/special_tokens_map.json b/checkpoint-15000/special_tokens_map.json deleted file mode 100644 index 25bc39604f72700b3b8e10bd69bb2f227157edd1..0000000000000000000000000000000000000000 --- a/checkpoint-15000/special_tokens_map.json +++ /dev/null @@ -1 +0,0 @@ -{"bos_token": "", "eos_token": "", "unk_token": "", "pad_token": ""} \ No newline at end of file diff --git a/checkpoint-15000/tokenizer.json b/checkpoint-15000/tokenizer.json deleted file mode 100644 index a4fa803e0e0e614382d27635cc90df38f96f41f0..0000000000000000000000000000000000000000 --- a/checkpoint-15000/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8f6efc66e73f1fd69da4f436e48befb519fdff3fe18910850c1d41bd862293a5 -size 14500443 diff --git a/checkpoint-15000/tokenizer_config.json b/checkpoint-15000/tokenizer_config.json deleted file mode 100644 index 149b7da28b1ab9fe6defc5878e01cdb16f445e11..0000000000000000000000000000000000000000 --- a/checkpoint-15000/tokenizer_config.json +++ /dev/null @@ -1 +0,0 @@ -{"unk_token": "", "bos_token": "", "eos_token": "", "pad_token": "", "add_prefix_space": false, "name_or_path": "bigscience/bloom-350m", "special_tokens_map_file": null, "padding_side": "left", "tokenizer_class": "BloomTokenizer"} \ No newline at end of file diff --git a/checkpoint-15000/trainer_state.json b/checkpoint-15000/trainer_state.json deleted file mode 100644 index 09f94a5d59a1f3eb1a3feed94d15a7a198f1980f..0000000000000000000000000000000000000000 --- a/checkpoint-15000/trainer_state.json +++ /dev/null @@ -1,76 +0,0 @@ -{ - "best_metric": 3.5690791606903076, - "best_model_checkpoint": "/users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_10000samples_-1vocab_original-frozen/checkpoint-15000", - "epoch": 11.913673347886485, - "global_step": 15000, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 1.99, - "learning_rate": 9e-05, - "loss": 4.249, - "step": 2500 - }, - { - "epoch": 3.97, - "learning_rate": 8e-05, - "loss": 3.9172, - "step": 5000 - }, - { - "epoch": 3.97, - "eval_loss": 3.8250739574432373, - "eval_runtime": 519.4242, - "eval_samples_per_second": 9.445, - "eval_steps_per_second": 4.723, - "step": 5000 - }, - { - "epoch": 5.96, - "learning_rate": 7e-05, - "loss": 3.7702, - "step": 7500 - }, - { - "epoch": 7.94, - "learning_rate": 6e-05, - "loss": 3.6827, - "step": 10000 - }, - { - "epoch": 7.94, - "eval_loss": 3.64797043800354, - "eval_runtime": 519.0865, - "eval_samples_per_second": 9.451, - "eval_steps_per_second": 4.726, - "step": 10000 - }, - { - "epoch": 9.93, - "learning_rate": 5e-05, - "loss": 3.624, - "step": 12500 - }, - { - "epoch": 11.91, - "learning_rate": 4e-05, - "loss": 3.585, - "step": 15000 - }, - { - "epoch": 11.91, - "eval_loss": 3.5690791606903076, - "eval_runtime": 519.0226, - "eval_samples_per_second": 9.452, - "eval_steps_per_second": 4.726, - "step": 15000 - } - ], - "max_steps": 25000, - "num_train_epochs": 20, - "total_flos": 2.2299183440461824e+17, - "trial_name": null, - "trial_params": null -} diff --git a/checkpoint-15000/training_args.bin b/checkpoint-15000/training_args.bin deleted file mode 100644 index ac02d917397c1472d05a48c4ea5f1aa01e63810c..0000000000000000000000000000000000000000 --- a/checkpoint-15000/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0cd06de07bc00b172f616a37bcac81b46cefe15d7481c457bed5477be8eb5945 -size 3375 diff --git a/checkpoint-20000/config.json b/checkpoint-20000/config.json deleted file mode 100644 index 01102e03e4779d2c43cb80a51c8459e256eba4ce..0000000000000000000000000000000000000000 --- a/checkpoint-20000/config.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_name_or_path": "bigscience/bloom-350m", - "adapters": { - "adapters": {}, - "config_map": {}, - "fusion_config_map": {}, - "fusions": {} - }, - "apply_residual_connection_post_layernorm": false, - "architectures": [ - "BloomForCausalLM" - ], - "attention_dropout": 0.0, - "attention_softmax_in_fp32": true, - "bias_dropout_fusion": true, - "bos_token_id": 1, - "eos_token_id": 2, - "hidden_dropout": 0.0, - "hidden_size": 1024, - "initializer_range": 0.02, - "layer_norm_epsilon": 1e-05, - "masked_softmax_fusion": true, - "model_type": "bloom", - "n_head": 16, - "n_inner": null, - "n_layer": 24, - "offset_alibi": 100, - "pad_token_id": 3, - "pretraining_tp": 1, - "seq_length": 2048, - "skip_bias_add": true, - "skip_bias_add_qkv": false, - "slow_but_exact": false, - "torch_dtype": "float32", - "transformers_version": "4.20.0.dev0", - "unk_token_id": 0, - "use_cache": true, - "vocab_size": 250880 -} diff --git a/checkpoint-20000/optimizer.pt b/checkpoint-20000/optimizer.pt deleted file mode 100644 index 8af13cdaa0e645f88f3dde5aa694f05e51e6bb23..0000000000000000000000000000000000000000 --- a/checkpoint-20000/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a002623dc1f3a7f26188c4a378e00394c1a949760f52844efbdd6f502ebdad5e -size 2254269 diff --git a/checkpoint-20000/pytorch_model.bin b/checkpoint-20000/pytorch_model.bin deleted file mode 100644 index 817e7d5e36d120a0e8227cbb0eb9de390109f781..0000000000000000000000000000000000000000 --- a/checkpoint-20000/pytorch_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:70afd8810d9ba16c3568302d0d9d5f78dcba8edba783672ac9c11160a556f4f3 -size 2236955191 diff --git a/checkpoint-20000/rng_state.pth b/checkpoint-20000/rng_state.pth deleted file mode 100644 index 17e114315addd3b9d99707b37203994031fdd092..0000000000000000000000000000000000000000 --- a/checkpoint-20000/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:528887aeaf571c1dd9d1789c0fad11e336830c7f10d9174d25b3f236cf9a2aa4 -size 14503 diff --git a/checkpoint-20000/scheduler.pt b/checkpoint-20000/scheduler.pt deleted file mode 100644 index 0fce858fc59e1c04346ec17a91eea84ca7634ec2..0000000000000000000000000000000000000000 --- a/checkpoint-20000/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:69341a1831197b0345e31eaac56abf9ad4527cc56eba4b526818b4ffb6ef6dad -size 623 diff --git a/checkpoint-20000/special_tokens_map.json b/checkpoint-20000/special_tokens_map.json deleted file mode 100644 index 25bc39604f72700b3b8e10bd69bb2f227157edd1..0000000000000000000000000000000000000000 --- a/checkpoint-20000/special_tokens_map.json +++ /dev/null @@ -1 +0,0 @@ -{"bos_token": "", "eos_token": "", "unk_token": "", "pad_token": ""} \ No newline at end of file diff --git a/checkpoint-20000/tokenizer.json b/checkpoint-20000/tokenizer.json deleted file mode 100644 index a4fa803e0e0e614382d27635cc90df38f96f41f0..0000000000000000000000000000000000000000 --- a/checkpoint-20000/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8f6efc66e73f1fd69da4f436e48befb519fdff3fe18910850c1d41bd862293a5 -size 14500443 diff --git a/checkpoint-20000/tokenizer_config.json b/checkpoint-20000/tokenizer_config.json deleted file mode 100644 index 149b7da28b1ab9fe6defc5878e01cdb16f445e11..0000000000000000000000000000000000000000 --- a/checkpoint-20000/tokenizer_config.json +++ /dev/null @@ -1 +0,0 @@ -{"unk_token": "", "bos_token": "", "eos_token": "", "pad_token": "", "add_prefix_space": false, "name_or_path": "bigscience/bloom-350m", "special_tokens_map_file": null, "padding_side": "left", "tokenizer_class": "BloomTokenizer"} \ No newline at end of file diff --git a/checkpoint-20000/trainer_state.json b/checkpoint-20000/trainer_state.json deleted file mode 100644 index 7a8d4d43228be197a59eee666c88081a4ef65d48..0000000000000000000000000000000000000000 --- a/checkpoint-20000/trainer_state.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "best_metric": 3.531822443008423, - "best_model_checkpoint": "/users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_10000samples_-1vocab_original-frozen/checkpoint-20000", - "epoch": 15.885096249255804, - "global_step": 20000, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 1.99, - "learning_rate": 9e-05, - "loss": 4.249, - "step": 2500 - }, - { - "epoch": 3.97, - "learning_rate": 8e-05, - "loss": 3.9172, - "step": 5000 - }, - { - "epoch": 3.97, - "eval_loss": 3.8250739574432373, - "eval_runtime": 519.4242, - "eval_samples_per_second": 9.445, - "eval_steps_per_second": 4.723, - "step": 5000 - }, - { - "epoch": 5.96, - "learning_rate": 7e-05, - "loss": 3.7702, - "step": 7500 - }, - { - "epoch": 7.94, - "learning_rate": 6e-05, - "loss": 3.6827, - "step": 10000 - }, - { - "epoch": 7.94, - "eval_loss": 3.64797043800354, - "eval_runtime": 519.0865, - "eval_samples_per_second": 9.451, - "eval_steps_per_second": 4.726, - "step": 10000 - }, - { - "epoch": 9.93, - "learning_rate": 5e-05, - "loss": 3.624, - "step": 12500 - }, - { - "epoch": 11.91, - "learning_rate": 4e-05, - "loss": 3.585, - "step": 15000 - }, - { - "epoch": 11.91, - "eval_loss": 3.5690791606903076, - "eval_runtime": 519.0226, - "eval_samples_per_second": 9.452, - "eval_steps_per_second": 4.726, - "step": 15000 - }, - { - "epoch": 13.9, - "learning_rate": 3e-05, - "loss": 3.5588, - "step": 17500 - }, - { - "epoch": 15.89, - "learning_rate": 2e-05, - "loss": 3.54, - "step": 20000 - }, - { - "epoch": 15.89, - "eval_loss": 3.531822443008423, - "eval_runtime": 518.4812, - "eval_samples_per_second": 9.462, - "eval_steps_per_second": 4.731, - "step": 20000 - } - ], - "max_steps": 25000, - "num_train_epochs": 20, - "total_flos": 2.973255415627776e+17, - "trial_name": null, - "trial_params": null -} diff --git a/checkpoint-20000/training_args.bin b/checkpoint-20000/training_args.bin deleted file mode 100644 index ac02d917397c1472d05a48c4ea5f1aa01e63810c..0000000000000000000000000000000000000000 --- a/checkpoint-20000/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0cd06de07bc00b172f616a37bcac81b46cefe15d7481c457bed5477be8eb5945 -size 3375 diff --git a/checkpoint-2500/config.json b/checkpoint-2500/config.json deleted file mode 100644 index 01102e03e4779d2c43cb80a51c8459e256eba4ce..0000000000000000000000000000000000000000 --- a/checkpoint-2500/config.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_name_or_path": "bigscience/bloom-350m", - "adapters": { - "adapters": {}, - "config_map": {}, - "fusion_config_map": {}, - "fusions": {} - }, - "apply_residual_connection_post_layernorm": false, - "architectures": [ - "BloomForCausalLM" - ], - "attention_dropout": 0.0, - "attention_softmax_in_fp32": true, - "bias_dropout_fusion": true, - "bos_token_id": 1, - "eos_token_id": 2, - "hidden_dropout": 0.0, - "hidden_size": 1024, - "initializer_range": 0.02, - "layer_norm_epsilon": 1e-05, - "masked_softmax_fusion": true, - "model_type": "bloom", - "n_head": 16, - "n_inner": null, - "n_layer": 24, - "offset_alibi": 100, - "pad_token_id": 3, - "pretraining_tp": 1, - "seq_length": 2048, - "skip_bias_add": true, - "skip_bias_add_qkv": false, - "slow_but_exact": false, - "torch_dtype": "float32", - "transformers_version": "4.20.0.dev0", - "unk_token_id": 0, - "use_cache": true, - "vocab_size": 250880 -} diff --git a/checkpoint-2500/optimizer.pt b/checkpoint-2500/optimizer.pt deleted file mode 100644 index f0273a55ea432ab07f965e9f795674f8ba5993f5..0000000000000000000000000000000000000000 --- a/checkpoint-2500/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:41760dbdcd7c2a0f19e33dbacd6b48ccb3c940c26ead21843a4bffcb5bff4393 -size 2254269 diff --git a/checkpoint-2500/pytorch_model.bin b/checkpoint-2500/pytorch_model.bin deleted file mode 100644 index 9f56eb88ab1e71a3b7e104f79bc147d2c400a873..0000000000000000000000000000000000000000 --- a/checkpoint-2500/pytorch_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dc2b7d0a79254f7841b16b221ab8c6c6bb887c251fc400c22639ffa8cd17539c -size 2236955191 diff --git a/checkpoint-2500/rng_state.pth b/checkpoint-2500/rng_state.pth deleted file mode 100644 index 0e16a98c64ebc66b52ccb831ca38ca1d8b8a6933..0000000000000000000000000000000000000000 --- a/checkpoint-2500/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:674676e662eeb93778c2b153ffad13aa90b43355da1956ce0b1e01e72f48c8d7 -size 14503 diff --git a/checkpoint-2500/scheduler.pt b/checkpoint-2500/scheduler.pt deleted file mode 100644 index 39f754e4e40932a231da06fd74e846d4e0c1c2a3..0000000000000000000000000000000000000000 --- a/checkpoint-2500/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8848be77d5e16f8ad560a7262091b3d3fcd8d0f3fa50682054480c93bc684fe6 -size 623 diff --git a/checkpoint-2500/special_tokens_map.json b/checkpoint-2500/special_tokens_map.json deleted file mode 100644 index 25bc39604f72700b3b8e10bd69bb2f227157edd1..0000000000000000000000000000000000000000 --- a/checkpoint-2500/special_tokens_map.json +++ /dev/null @@ -1 +0,0 @@ -{"bos_token": "", "eos_token": "", "unk_token": "", "pad_token": ""} \ No newline at end of file diff --git a/checkpoint-2500/tokenizer.json b/checkpoint-2500/tokenizer.json deleted file mode 100644 index a4fa803e0e0e614382d27635cc90df38f96f41f0..0000000000000000000000000000000000000000 --- a/checkpoint-2500/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8f6efc66e73f1fd69da4f436e48befb519fdff3fe18910850c1d41bd862293a5 -size 14500443 diff --git a/checkpoint-2500/tokenizer_config.json b/checkpoint-2500/tokenizer_config.json deleted file mode 100644 index 149b7da28b1ab9fe6defc5878e01cdb16f445e11..0000000000000000000000000000000000000000 --- a/checkpoint-2500/tokenizer_config.json +++ /dev/null @@ -1 +0,0 @@ -{"unk_token": "", "bos_token": "", "eos_token": "", "pad_token": "", "add_prefix_space": false, "name_or_path": "bigscience/bloom-350m", "special_tokens_map_file": null, "padding_side": "left", "tokenizer_class": "BloomTokenizer"} \ No newline at end of file diff --git a/checkpoint-2500/trainer_state.json b/checkpoint-2500/trainer_state.json deleted file mode 100644 index e7f78c3e65223268f6339765786817f4747f8683..0000000000000000000000000000000000000000 --- a/checkpoint-2500/trainer_state.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "best_metric": 3.425896406173706, - "best_model_checkpoint": "/users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_10000samples_-1vocab_original-frozen/checkpoint-2500", - "epoch": 1.9851160944631872, - "global_step": 2500, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 1.99, - "learning_rate": 0.0008, - "loss": 3.6925, - "step": 2500 - }, - { - "epoch": 1.99, - "eval_loss": 3.425896406173706, - "eval_runtime": 510.13, - "eval_samples_per_second": 9.617, - "eval_steps_per_second": 4.809, - "step": 2500 - } - ], - "max_steps": 12500, - "num_train_epochs": 10, - "total_flos": 3.715756650921984e+16, - "trial_name": null, - "trial_params": null -} diff --git a/checkpoint-2500/training_args.bin b/checkpoint-2500/training_args.bin deleted file mode 100644 index 691ea3fe0403fa3bc75e8b21ff0aeb01fb29c7ed..0000000000000000000000000000000000000000 --- a/checkpoint-2500/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b5a0101fd8f6900559ae4c013c2312163c0f239c35b8f9e14dabf6d0adafea84 -size 3375 diff --git a/checkpoint-2500/wikiann-az-results.txt b/checkpoint-2500/wikiann-az-results.txt deleted file mode 100644 index 838d8623fe33fae20318c4d80a7c6a00311da1e3..0000000000000000000000000000000000000000 --- a/checkpoint-2500/wikiann-az-results.txt +++ /dev/null @@ -1,8 +0,0 @@ -================================================== -Results -================================================== -Model: /users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_10000samples_-1vocab_original-frozen/checkpoint-2500 -[0.38242530755711773, 0.359518754423213, 0.37277486910994767, 0.3864902506963789, 0.38790406673618355, 0.4061196105702364, 0.3722397476340694, 0.38269433696799154, 0.3635784781916587, 0.3857690966166725] -38.00 -1.28 -================================================== \ No newline at end of file diff --git a/checkpoint-25000/config.json b/checkpoint-25000/config.json deleted file mode 100644 index 01102e03e4779d2c43cb80a51c8459e256eba4ce..0000000000000000000000000000000000000000 --- a/checkpoint-25000/config.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_name_or_path": "bigscience/bloom-350m", - "adapters": { - "adapters": {}, - "config_map": {}, - "fusion_config_map": {}, - "fusions": {} - }, - "apply_residual_connection_post_layernorm": false, - "architectures": [ - "BloomForCausalLM" - ], - "attention_dropout": 0.0, - "attention_softmax_in_fp32": true, - "bias_dropout_fusion": true, - "bos_token_id": 1, - "eos_token_id": 2, - "hidden_dropout": 0.0, - "hidden_size": 1024, - "initializer_range": 0.02, - "layer_norm_epsilon": 1e-05, - "masked_softmax_fusion": true, - "model_type": "bloom", - "n_head": 16, - "n_inner": null, - "n_layer": 24, - "offset_alibi": 100, - "pad_token_id": 3, - "pretraining_tp": 1, - "seq_length": 2048, - "skip_bias_add": true, - "skip_bias_add_qkv": false, - "slow_but_exact": false, - "torch_dtype": "float32", - "transformers_version": "4.20.0.dev0", - "unk_token_id": 0, - "use_cache": true, - "vocab_size": 250880 -} diff --git a/checkpoint-25000/optimizer.pt b/checkpoint-25000/optimizer.pt deleted file mode 100644 index 604fc48c647119f915d3838a2a52e054db20a025..0000000000000000000000000000000000000000 --- a/checkpoint-25000/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2e5ce5a41ad17beb7a5f2c048600fe879a5c7f4d9d198cde90d18b45011661b0 -size 2254269 diff --git a/checkpoint-25000/pytorch_model.bin b/checkpoint-25000/pytorch_model.bin deleted file mode 100644 index 563a0b3966e9ff128f1b1817695da4e6c2833cc3..0000000000000000000000000000000000000000 --- a/checkpoint-25000/pytorch_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ff22586eea4fb06e03ee9ab88ff9dcf9845667b34e4c607443fa5bda35685958 -size 2236955191 diff --git a/checkpoint-25000/rng_state.pth b/checkpoint-25000/rng_state.pth deleted file mode 100644 index 91674c3f22f32df4347a54e6e7cd5d9b9a0848e2..0000000000000000000000000000000000000000 --- a/checkpoint-25000/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0c2a3ff852d2aa963610ba6a5b806297a1c3c1ead923d2820e04e29706d893bd -size 14503 diff --git a/checkpoint-25000/scheduler.pt b/checkpoint-25000/scheduler.pt deleted file mode 100644 index 7ad3b44dd75ce7d8d5e7e1e604001842c0cc94ff..0000000000000000000000000000000000000000 --- a/checkpoint-25000/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2d68cbb33fa1a2e013758e6ff8a1c4cb984da09f9cb40986c80de7fb857fd18f -size 623 diff --git a/checkpoint-25000/special_tokens_map.json b/checkpoint-25000/special_tokens_map.json deleted file mode 100644 index 25bc39604f72700b3b8e10bd69bb2f227157edd1..0000000000000000000000000000000000000000 --- a/checkpoint-25000/special_tokens_map.json +++ /dev/null @@ -1 +0,0 @@ -{"bos_token": "", "eos_token": "", "unk_token": "", "pad_token": ""} \ No newline at end of file diff --git a/checkpoint-25000/tokenizer.json b/checkpoint-25000/tokenizer.json deleted file mode 100644 index a4fa803e0e0e614382d27635cc90df38f96f41f0..0000000000000000000000000000000000000000 --- a/checkpoint-25000/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8f6efc66e73f1fd69da4f436e48befb519fdff3fe18910850c1d41bd862293a5 -size 14500443 diff --git a/checkpoint-25000/tokenizer_config.json b/checkpoint-25000/tokenizer_config.json deleted file mode 100644 index 149b7da28b1ab9fe6defc5878e01cdb16f445e11..0000000000000000000000000000000000000000 --- a/checkpoint-25000/tokenizer_config.json +++ /dev/null @@ -1 +0,0 @@ -{"unk_token": "", "bos_token": "", "eos_token": "", "pad_token": "", "add_prefix_space": false, "name_or_path": "bigscience/bloom-350m", "special_tokens_map_file": null, "padding_side": "left", "tokenizer_class": "BloomTokenizer"} \ No newline at end of file diff --git a/checkpoint-25000/trainer_state.json b/checkpoint-25000/trainer_state.json deleted file mode 100644 index 127f980a4ab49525b0a9484c08b2371dd4013723..0000000000000000000000000000000000000000 --- a/checkpoint-25000/trainer_state.json +++ /dev/null @@ -1,116 +0,0 @@ -{ - "best_metric": 3.520382881164551, - "best_model_checkpoint": "/users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_10000samples_-1vocab_original-frozen/checkpoint-25000", - "epoch": 19.856519150625125, - "global_step": 25000, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 1.99, - "learning_rate": 9e-05, - "loss": 4.249, - "step": 2500 - }, - { - "epoch": 3.97, - "learning_rate": 8e-05, - "loss": 3.9172, - "step": 5000 - }, - { - "epoch": 3.97, - "eval_loss": 3.8250739574432373, - "eval_runtime": 519.4242, - "eval_samples_per_second": 9.445, - "eval_steps_per_second": 4.723, - "step": 5000 - }, - { - "epoch": 5.96, - "learning_rate": 7e-05, - "loss": 3.7702, - "step": 7500 - }, - { - "epoch": 7.94, - "learning_rate": 6e-05, - "loss": 3.6827, - "step": 10000 - }, - { - "epoch": 7.94, - "eval_loss": 3.64797043800354, - "eval_runtime": 519.0865, - "eval_samples_per_second": 9.451, - "eval_steps_per_second": 4.726, - "step": 10000 - }, - { - "epoch": 9.93, - "learning_rate": 5e-05, - "loss": 3.624, - "step": 12500 - }, - { - "epoch": 11.91, - "learning_rate": 4e-05, - "loss": 3.585, - "step": 15000 - }, - { - "epoch": 11.91, - "eval_loss": 3.5690791606903076, - "eval_runtime": 519.0226, - "eval_samples_per_second": 9.452, - "eval_steps_per_second": 4.726, - "step": 15000 - }, - { - "epoch": 13.9, - "learning_rate": 3e-05, - "loss": 3.5588, - "step": 17500 - }, - { - "epoch": 15.89, - "learning_rate": 2e-05, - "loss": 3.54, - "step": 20000 - }, - { - "epoch": 15.89, - "eval_loss": 3.531822443008423, - "eval_runtime": 518.4812, - "eval_samples_per_second": 9.462, - "eval_steps_per_second": 4.731, - "step": 20000 - }, - { - "epoch": 17.87, - "learning_rate": 1e-05, - "loss": 3.5249, - "step": 22500 - }, - { - "epoch": 19.86, - "learning_rate": 0.0, - "loss": 3.5218, - "step": 25000 - }, - { - "epoch": 19.86, - "eval_loss": 3.520382881164551, - "eval_runtime": 518.5767, - "eval_samples_per_second": 9.461, - "eval_steps_per_second": 4.73, - "step": 25000 - } - ], - "max_steps": 25000, - "num_train_epochs": 20, - "total_flos": 3.7165924872093696e+17, - "trial_name": null, - "trial_params": null -} diff --git a/checkpoint-25000/training_args.bin b/checkpoint-25000/training_args.bin deleted file mode 100644 index ac02d917397c1472d05a48c4ea5f1aa01e63810c..0000000000000000000000000000000000000000 --- a/checkpoint-25000/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0cd06de07bc00b172f616a37bcac81b46cefe15d7481c457bed5477be8eb5945 -size 3375 diff --git a/checkpoint-5000/config.json b/checkpoint-5000/config.json deleted file mode 100644 index 01102e03e4779d2c43cb80a51c8459e256eba4ce..0000000000000000000000000000000000000000 --- a/checkpoint-5000/config.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_name_or_path": "bigscience/bloom-350m", - "adapters": { - "adapters": {}, - "config_map": {}, - "fusion_config_map": {}, - "fusions": {} - }, - "apply_residual_connection_post_layernorm": false, - "architectures": [ - "BloomForCausalLM" - ], - "attention_dropout": 0.0, - "attention_softmax_in_fp32": true, - "bias_dropout_fusion": true, - "bos_token_id": 1, - "eos_token_id": 2, - "hidden_dropout": 0.0, - "hidden_size": 1024, - "initializer_range": 0.02, - "layer_norm_epsilon": 1e-05, - "masked_softmax_fusion": true, - "model_type": "bloom", - "n_head": 16, - "n_inner": null, - "n_layer": 24, - "offset_alibi": 100, - "pad_token_id": 3, - "pretraining_tp": 1, - "seq_length": 2048, - "skip_bias_add": true, - "skip_bias_add_qkv": false, - "slow_but_exact": false, - "torch_dtype": "float32", - "transformers_version": "4.20.0.dev0", - "unk_token_id": 0, - "use_cache": true, - "vocab_size": 250880 -} diff --git a/checkpoint-5000/optimizer.pt b/checkpoint-5000/optimizer.pt deleted file mode 100644 index b257225b3a6570ac291f31780d0e2dd0de477a65..0000000000000000000000000000000000000000 --- a/checkpoint-5000/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b158c0a1013b9dda055cea446889ca2a0761db52f5dfb77c566194c2b8d75d10 -size 2254269 diff --git a/checkpoint-5000/pytorch_model.bin b/checkpoint-5000/pytorch_model.bin deleted file mode 100644 index a6447fab240b09e235d751ea475673f347201c0c..0000000000000000000000000000000000000000 --- a/checkpoint-5000/pytorch_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dd827040e31d8ae675b528e478e25bd48e0aa68fa5fa8cf6506c1b3c0ab0e777 -size 2236955191 diff --git a/checkpoint-5000/rng_state.pth b/checkpoint-5000/rng_state.pth deleted file mode 100644 index 6a0bae8e3547586750ea7ecba200270d8296068b..0000000000000000000000000000000000000000 --- a/checkpoint-5000/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:69a15e581e7afd6fd12d6dddef3da31e19b3dd058003b5c5d00781b54e093f7c -size 14503 diff --git a/checkpoint-5000/scheduler.pt b/checkpoint-5000/scheduler.pt deleted file mode 100644 index 90bfa33aa2e57caff6083bf68c3b38db47518ccd..0000000000000000000000000000000000000000 --- a/checkpoint-5000/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d4841973343260138ab6da195f6b9590db0a8465a2275f311ddcc72346193a56 -size 623 diff --git a/checkpoint-5000/special_tokens_map.json b/checkpoint-5000/special_tokens_map.json deleted file mode 100644 index 25bc39604f72700b3b8e10bd69bb2f227157edd1..0000000000000000000000000000000000000000 --- a/checkpoint-5000/special_tokens_map.json +++ /dev/null @@ -1 +0,0 @@ -{"bos_token": "", "eos_token": "", "unk_token": "", "pad_token": ""} \ No newline at end of file diff --git a/checkpoint-5000/tokenizer.json b/checkpoint-5000/tokenizer.json deleted file mode 100644 index a4fa803e0e0e614382d27635cc90df38f96f41f0..0000000000000000000000000000000000000000 --- a/checkpoint-5000/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8f6efc66e73f1fd69da4f436e48befb519fdff3fe18910850c1d41bd862293a5 -size 14500443 diff --git a/checkpoint-5000/tokenizer_config.json b/checkpoint-5000/tokenizer_config.json deleted file mode 100644 index 149b7da28b1ab9fe6defc5878e01cdb16f445e11..0000000000000000000000000000000000000000 --- a/checkpoint-5000/tokenizer_config.json +++ /dev/null @@ -1 +0,0 @@ -{"unk_token": "", "bos_token": "", "eos_token": "", "pad_token": "", "add_prefix_space": false, "name_or_path": "bigscience/bloom-350m", "special_tokens_map_file": null, "padding_side": "left", "tokenizer_class": "BloomTokenizer"} \ No newline at end of file diff --git a/checkpoint-5000/trainer_state.json b/checkpoint-5000/trainer_state.json deleted file mode 100644 index be20ed950abaafaf042fb8b884924891d215ca29..0000000000000000000000000000000000000000 --- a/checkpoint-5000/trainer_state.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "best_metric": 3.8250739574432373, - "best_model_checkpoint": "/users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_10000samples_-1vocab_original-frozen/checkpoint-5000", - "epoch": 3.9708275451478467, - "global_step": 5000, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 1.99, - "learning_rate": 9e-05, - "loss": 4.249, - "step": 2500 - }, - { - "epoch": 3.97, - "learning_rate": 8e-05, - "loss": 3.9172, - "step": 5000 - }, - { - "epoch": 3.97, - "eval_loss": 3.8250739574432373, - "eval_runtime": 519.4242, - "eval_samples_per_second": 9.445, - "eval_steps_per_second": 4.723, - "step": 5000 - } - ], - "max_steps": 25000, - "num_train_epochs": 20, - "total_flos": 7.432442008829952e+16, - "trial_name": null, - "trial_params": null -} diff --git a/checkpoint-5000/training_args.bin b/checkpoint-5000/training_args.bin deleted file mode 100644 index ac02d917397c1472d05a48c4ea5f1aa01e63810c..0000000000000000000000000000000000000000 --- a/checkpoint-5000/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0cd06de07bc00b172f616a37bcac81b46cefe15d7481c457bed5477be8eb5945 -size 3375 diff --git a/checkpoint-5000/wikiann-az-results.txt b/checkpoint-5000/wikiann-az-results.txt deleted file mode 100644 index 52093f6a5a116481915ebdca93a6dbe228c47392..0000000000000000000000000000000000000000 --- a/checkpoint-5000/wikiann-az-results.txt +++ /dev/null @@ -1,8 +0,0 @@ -================================================== -Results -================================================== -Model: /users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_10000samples_-1vocab_original-frozen/checkpoint-5000 -[0.38795518207282914, 0.3600990449239476, 0.37191249117854625, 0.37989601386481797, 0.3945908460471567, 0.400695652173913, 0.38208537438768375, 0.3857294242317202, 0.3677725118483412, 0.3835137967167307] -38.14 -1.16 -================================================== \ No newline at end of file diff --git a/checkpoint-7500/config.json b/checkpoint-7500/config.json deleted file mode 100644 index 01102e03e4779d2c43cb80a51c8459e256eba4ce..0000000000000000000000000000000000000000 --- a/checkpoint-7500/config.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_name_or_path": "bigscience/bloom-350m", - "adapters": { - "adapters": {}, - "config_map": {}, - "fusion_config_map": {}, - "fusions": {} - }, - "apply_residual_connection_post_layernorm": false, - "architectures": [ - "BloomForCausalLM" - ], - "attention_dropout": 0.0, - "attention_softmax_in_fp32": true, - "bias_dropout_fusion": true, - "bos_token_id": 1, - "eos_token_id": 2, - "hidden_dropout": 0.0, - "hidden_size": 1024, - "initializer_range": 0.02, - "layer_norm_epsilon": 1e-05, - "masked_softmax_fusion": true, - "model_type": "bloom", - "n_head": 16, - "n_inner": null, - "n_layer": 24, - "offset_alibi": 100, - "pad_token_id": 3, - "pretraining_tp": 1, - "seq_length": 2048, - "skip_bias_add": true, - "skip_bias_add_qkv": false, - "slow_but_exact": false, - "torch_dtype": "float32", - "transformers_version": "4.20.0.dev0", - "unk_token_id": 0, - "use_cache": true, - "vocab_size": 250880 -} diff --git a/checkpoint-7500/optimizer.pt b/checkpoint-7500/optimizer.pt deleted file mode 100644 index 1bfa5eef97ad2b3920267a6eb7ad35dfef65e0d0..0000000000000000000000000000000000000000 --- a/checkpoint-7500/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c583b268a8f05fe1ae0dc99d02c8aa192530e4872049177ae77305b636100d06 -size 2254269 diff --git a/checkpoint-7500/pytorch_model.bin b/checkpoint-7500/pytorch_model.bin deleted file mode 100644 index cc61994f89c0738e73f5b41870b9628d90d9a874..0000000000000000000000000000000000000000 --- a/checkpoint-7500/pytorch_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:567df95e93a3842184376f3e6f3a687f3e191723c458f258ed4cc9c2fa9fe8ec -size 2236955191 diff --git a/checkpoint-7500/rng_state.pth b/checkpoint-7500/rng_state.pth deleted file mode 100644 index d22ef2d8c468e8478a6b014595fdd078590453a0..0000000000000000000000000000000000000000 --- a/checkpoint-7500/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4ec08f7c8006113b1a3cfff394ab26b2537112941e13a3692aeea177f161c92b -size 14503 diff --git a/checkpoint-7500/scheduler.pt b/checkpoint-7500/scheduler.pt deleted file mode 100644 index 92a7351ba15122d445557f2ca342b5c2a8f66242..0000000000000000000000000000000000000000 --- a/checkpoint-7500/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3645d67727f305ed9f285de92859169b9ced76729a97a8143c6ece6d98a161d3 -size 623 diff --git a/checkpoint-7500/special_tokens_map.json b/checkpoint-7500/special_tokens_map.json deleted file mode 100644 index 25bc39604f72700b3b8e10bd69bb2f227157edd1..0000000000000000000000000000000000000000 --- a/checkpoint-7500/special_tokens_map.json +++ /dev/null @@ -1 +0,0 @@ -{"bos_token": "", "eos_token": "", "unk_token": "", "pad_token": ""} \ No newline at end of file diff --git a/checkpoint-7500/tokenizer.json b/checkpoint-7500/tokenizer.json deleted file mode 100644 index a4fa803e0e0e614382d27635cc90df38f96f41f0..0000000000000000000000000000000000000000 --- a/checkpoint-7500/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8f6efc66e73f1fd69da4f436e48befb519fdff3fe18910850c1d41bd862293a5 -size 14500443 diff --git a/checkpoint-7500/tokenizer_config.json b/checkpoint-7500/tokenizer_config.json deleted file mode 100644 index 149b7da28b1ab9fe6defc5878e01cdb16f445e11..0000000000000000000000000000000000000000 --- a/checkpoint-7500/tokenizer_config.json +++ /dev/null @@ -1 +0,0 @@ -{"unk_token": "", "bos_token": "", "eos_token": "", "pad_token": "", "add_prefix_space": false, "name_or_path": "bigscience/bloom-350m", "special_tokens_map_file": null, "padding_side": "left", "tokenizer_class": "BloomTokenizer"} \ No newline at end of file diff --git a/checkpoint-7500/trainer_state.json b/checkpoint-7500/trainer_state.json deleted file mode 100644 index af6c7a30de05b5050a04efeb401f24dcb22e4598..0000000000000000000000000000000000000000 --- a/checkpoint-7500/trainer_state.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "best_metric": 3.2419724464416504, - "best_model_checkpoint": "/users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_10000samples_-1vocab_original-frozen/checkpoint-7500", - "epoch": 5.956538995832506, - "global_step": 7500, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 1.99, - "learning_rate": 0.0008, - "loss": 3.6925, - "step": 2500 - }, - { - "epoch": 1.99, - "eval_loss": 3.425896406173706, - "eval_runtime": 510.13, - "eval_samples_per_second": 9.617, - "eval_steps_per_second": 4.809, - "step": 2500 - }, - { - "epoch": 3.97, - "learning_rate": 0.0006, - "loss": 3.3514, - "step": 5000 - }, - { - "epoch": 3.97, - "eval_loss": 3.297409772872925, - "eval_runtime": 509.9866, - "eval_samples_per_second": 9.62, - "eval_steps_per_second": 4.81, - "step": 5000 - }, - { - "epoch": 5.96, - "learning_rate": 0.0004, - "loss": 3.2632, - "step": 7500 - }, - { - "epoch": 5.96, - "eval_loss": 3.2419724464416504, - "eval_runtime": 509.6021, - "eval_samples_per_second": 9.627, - "eval_steps_per_second": 4.814, - "step": 7500 - } - ], - "max_steps": 12500, - "num_train_epochs": 10, - "total_flos": 1.114912736673792e+17, - "trial_name": null, - "trial_params": null -} diff --git a/checkpoint-7500/training_args.bin b/checkpoint-7500/training_args.bin deleted file mode 100644 index 691ea3fe0403fa3bc75e8b21ff0aeb01fb29c7ed..0000000000000000000000000000000000000000 --- a/checkpoint-7500/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b5a0101fd8f6900559ae4c013c2312163c0f239c35b8f9e14dabf6d0adafea84 -size 3375 diff --git a/checkpoint-7500/wikiann-az-results.txt b/checkpoint-7500/wikiann-az-results.txt deleted file mode 100644 index e3d25729c9da796318334aa05631d808a94264e5..0000000000000000000000000000000000000000 --- a/checkpoint-7500/wikiann-az-results.txt +++ /dev/null @@ -1,8 +0,0 @@ -================================================== -Results -================================================== -Model: /users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_10000samples_-1vocab_original-frozen/checkpoint-7500 -[0.3938547486033519, 0.3604938271604939, 0.37385159010600705, 0.37478470547709275, 0.38933148597159684, 0.3983459682977257, 0.38442822384428216, 0.3738580463808854, 0.37841577308889657, 0.36306620209059237] -37.90 -1.18 -================================================== \ No newline at end of file diff --git a/config.json b/config.json deleted file mode 100644 index 01102e03e4779d2c43cb80a51c8459e256eba4ce..0000000000000000000000000000000000000000 --- a/config.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_name_or_path": "bigscience/bloom-350m", - "adapters": { - "adapters": {}, - "config_map": {}, - "fusion_config_map": {}, - "fusions": {} - }, - "apply_residual_connection_post_layernorm": false, - "architectures": [ - "BloomForCausalLM" - ], - "attention_dropout": 0.0, - "attention_softmax_in_fp32": true, - "bias_dropout_fusion": true, - "bos_token_id": 1, - "eos_token_id": 2, - "hidden_dropout": 0.0, - "hidden_size": 1024, - "initializer_range": 0.02, - "layer_norm_epsilon": 1e-05, - "masked_softmax_fusion": true, - "model_type": "bloom", - "n_head": 16, - "n_inner": null, - "n_layer": 24, - "offset_alibi": 100, - "pad_token_id": 3, - "pretraining_tp": 1, - "seq_length": 2048, - "skip_bias_add": true, - "skip_bias_add_qkv": false, - "slow_but_exact": false, - "torch_dtype": "float32", - "transformers_version": "4.20.0.dev0", - "unk_token_id": 0, - "use_cache": true, - "vocab_size": 250880 -} diff --git a/eval_results.json b/eval_results.json deleted file mode 100644 index c488dd38113d8e8736b31493cfff5b9a9027657a..0000000000000000000000000000000000000000 --- a/eval_results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "epoch": 19.86, - "eval_loss": 3.520382881164551, - "eval_runtime": 518.4337, - "eval_samples": 4906, - "eval_samples_per_second": 9.463, - "eval_steps_per_second": 4.732, - "perplexity": 33.79736636184415 -} \ No newline at end of file diff --git a/pytorch_model.bin b/pytorch_model.bin deleted file mode 100644 index 563a0b3966e9ff128f1b1817695da4e6c2833cc3..0000000000000000000000000000000000000000 --- a/pytorch_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ff22586eea4fb06e03ee9ab88ff9dcf9845667b34e4c607443fa5bda35685958 -size 2236955191 diff --git a/special_tokens_map.json b/special_tokens_map.json deleted file mode 100644 index 25bc39604f72700b3b8e10bd69bb2f227157edd1..0000000000000000000000000000000000000000 --- a/special_tokens_map.json +++ /dev/null @@ -1 +0,0 @@ -{"bos_token": "", "eos_token": "", "unk_token": "", "pad_token": ""} \ No newline at end of file diff --git a/tokenizer.json b/tokenizer.json deleted file mode 100644 index a4fa803e0e0e614382d27635cc90df38f96f41f0..0000000000000000000000000000000000000000 --- a/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8f6efc66e73f1fd69da4f436e48befb519fdff3fe18910850c1d41bd862293a5 -size 14500443 diff --git a/tokenizer_config.json b/tokenizer_config.json deleted file mode 100644 index 149b7da28b1ab9fe6defc5878e01cdb16f445e11..0000000000000000000000000000000000000000 --- a/tokenizer_config.json +++ /dev/null @@ -1 +0,0 @@ -{"unk_token": "", "bos_token": "", "eos_token": "", "pad_token": "", "add_prefix_space": false, "name_or_path": "bigscience/bloom-350m", "special_tokens_map_file": null, "padding_side": "left", "tokenizer_class": "BloomTokenizer"} \ No newline at end of file diff --git a/train_results.json b/train_results.json deleted file mode 100644 index 4924f969d9ee5690963a05c8f51dca526feb49f6..0000000000000000000000000000000000000000 --- a/train_results.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "epoch": 19.86, - "train_loss": 3.6973518359375, - "train_runtime": 53135.1163, - "train_samples": 10000, - "train_samples_per_second": 3.764, - "train_steps_per_second": 0.47 -} \ No newline at end of file diff --git a/trainer_state.json b/trainer_state.json deleted file mode 100644 index 05bd61b15d5df9f27825f4194efb14b714bd16f6..0000000000000000000000000000000000000000 --- a/trainer_state.json +++ /dev/null @@ -1,125 +0,0 @@ -{ - "best_metric": 3.520382881164551, - "best_model_checkpoint": "/users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_10000samples_-1vocab_original-frozen/checkpoint-25000", - "epoch": 19.856519150625125, - "global_step": 25000, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 1.99, - "learning_rate": 9e-05, - "loss": 4.249, - "step": 2500 - }, - { - "epoch": 3.97, - "learning_rate": 8e-05, - "loss": 3.9172, - "step": 5000 - }, - { - "epoch": 3.97, - "eval_loss": 3.8250739574432373, - "eval_runtime": 519.4242, - "eval_samples_per_second": 9.445, - "eval_steps_per_second": 4.723, - "step": 5000 - }, - { - "epoch": 5.96, - "learning_rate": 7e-05, - "loss": 3.7702, - "step": 7500 - }, - { - "epoch": 7.94, - "learning_rate": 6e-05, - "loss": 3.6827, - "step": 10000 - }, - { - "epoch": 7.94, - "eval_loss": 3.64797043800354, - "eval_runtime": 519.0865, - "eval_samples_per_second": 9.451, - "eval_steps_per_second": 4.726, - "step": 10000 - }, - { - "epoch": 9.93, - "learning_rate": 5e-05, - "loss": 3.624, - "step": 12500 - }, - { - "epoch": 11.91, - "learning_rate": 4e-05, - "loss": 3.585, - "step": 15000 - }, - { - "epoch": 11.91, - "eval_loss": 3.5690791606903076, - "eval_runtime": 519.0226, - "eval_samples_per_second": 9.452, - "eval_steps_per_second": 4.726, - "step": 15000 - }, - { - "epoch": 13.9, - "learning_rate": 3e-05, - "loss": 3.5588, - "step": 17500 - }, - { - "epoch": 15.89, - "learning_rate": 2e-05, - "loss": 3.54, - "step": 20000 - }, - { - "epoch": 15.89, - "eval_loss": 3.531822443008423, - "eval_runtime": 518.4812, - "eval_samples_per_second": 9.462, - "eval_steps_per_second": 4.731, - "step": 20000 - }, - { - "epoch": 17.87, - "learning_rate": 1e-05, - "loss": 3.5249, - "step": 22500 - }, - { - "epoch": 19.86, - "learning_rate": 0.0, - "loss": 3.5218, - "step": 25000 - }, - { - "epoch": 19.86, - "eval_loss": 3.520382881164551, - "eval_runtime": 518.5767, - "eval_samples_per_second": 9.461, - "eval_steps_per_second": 4.73, - "step": 25000 - }, - { - "epoch": 19.86, - "step": 25000, - "total_flos": 3.7165924872093696e+17, - "train_loss": 3.6973518359375, - "train_runtime": 53135.1163, - "train_samples_per_second": 3.764, - "train_steps_per_second": 0.47 - } - ], - "max_steps": 25000, - "num_train_epochs": 20, - "total_flos": 3.7165924872093696e+17, - "trial_name": null, - "trial_params": null -} diff --git a/training_args.bin b/training_args.bin deleted file mode 100644 index ac02d917397c1472d05a48c4ea5f1aa01e63810c..0000000000000000000000000000000000000000 --- a/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0cd06de07bc00b172f616a37bcac81b46cefe15d7481c457bed5477be8eb5945 -size 3375 diff --git a/wikiann-az-results.txt b/wikiann-az-results.txt deleted file mode 100644 index 096ce47587d3502ff38e98b9f32cdd0f40b5560e..0000000000000000000000000000000000000000 --- a/wikiann-az-results.txt +++ /dev/null @@ -1,8 +0,0 @@ -================================================== -Results -================================================== -Model: /users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_10000samples_-1vocab_original-frozen/ -[0.3899018232819075, 0.37265320580942257, 0.36829182742897226, 0.38707411310151035, 0.38603839441535776, 0.3918169209431346, 0.3767813694820994, 0.3875750088245676, 0.37196391394864675, 0.3844011142061281] -38.16 -0.80 -================================================== \ No newline at end of file diff --git a/word_embeddings.pt b/word_embeddings.pt deleted file mode 100644 index 2a4cbda005e5d2ade008b1110876ca6a87706b17..0000000000000000000000000000000000000000 --- a/word_embeddings.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:174ed618237771e5906be0e8d70c568de63633f3bb5e8a1e303bbdbaeaedc1ca -size 1027605867 diff --git a/word_embeddings_layernorm.pt b/word_embeddings_layernorm.pt deleted file mode 100644 index 1b8759b2a378472c0f17a4292a2a6276b8a3e07e..0000000000000000000000000000000000000000 --- a/word_embeddings_layernorm.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:10917f86841a4f322406bd72ba2e4ae8e4780aaf462c98a76eca01e0c5fbc893 -size 9703