diff --git a/low-shot-task-specific-100-ex/coin_flip/best_model/adapter_model.bin b/low-shot-task-specific-100-ex/coin_flip/best_model/adapter_model.bin index 43a34bff6604f26cbe9799377031a3f351b7bf22..ff582c62b46dc2070b86a97ac449220c9ce4f4d2 100644 --- a/low-shot-task-specific-100-ex/coin_flip/best_model/adapter_model.bin +++ b/low-shot-task-specific-100-ex/coin_flip/best_model/adapter_model.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cd47cb9e4d59e5592dc86a4825506634b4b81810e2c8cce8d4ee00b654660681 +oid sha256:900b929352d7b9864cf5124eb4c572a8b6dae37682d249b2ab7580664a91b940 size 104973389 diff --git a/low-shot-task-specific-100-ex/coin_flip/best_model/optimizer.pt b/low-shot-task-specific-100-ex/coin_flip/best_model/optimizer.pt index f71ab05a043e24abe7409c59ecdc058feae959e2..9ef09dd0819983933b02f6826d8a1a3442ad7af8 100644 --- a/low-shot-task-specific-100-ex/coin_flip/best_model/optimizer.pt +++ b/low-shot-task-specific-100-ex/coin_flip/best_model/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:89cb77b938fcbec8ef8fc937366b88a4eb9b3dda7246bcdf5841e8d1ff389eb7 +oid sha256:9906cf6b230090f82e564d944ed89f86f7b93fbea51a96f60d93b92105d169fe size 209984517 diff --git a/low-shot-task-specific-100-ex/coin_flip/best_model/rng_state.pth b/low-shot-task-specific-100-ex/coin_flip/best_model/rng_state.pth index 80a4b24ac39d2e283b0d0adbe575e539e7cb8444..1786ce235fbc332da55bd278b2c8e6b93bcabe56 100644 --- a/low-shot-task-specific-100-ex/coin_flip/best_model/rng_state.pth +++ b/low-shot-task-specific-100-ex/coin_flip/best_model/rng_state.pth @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b683f78ced98226c01d86c04f25e6a3295aa86e50560601c917694a914d68aad +oid sha256:5e482f68aee4b354cad62400db9fdeb5b976710c5407b5ccbc9fb83983e947b7 size 14575 diff --git a/low-shot-task-specific-100-ex/coin_flip/best_model/scheduler.pt b/low-shot-task-specific-100-ex/coin_flip/best_model/scheduler.pt index 879d7ab75d8479e9604f08d5ff328f1b267075dc..97af97ef41be68d37872e3e436854f56a12660f7 100644 --- a/low-shot-task-specific-100-ex/coin_flip/best_model/scheduler.pt +++ b/low-shot-task-specific-100-ex/coin_flip/best_model/scheduler.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eb69dbf6f829f506b60594816da07bc6940e3d9adce52ab976bf73b294ac5127 +oid sha256:8aea4ff6d6c72e86d24e872bf7765995d2e2e0abda70fdf4dff06ed25a492666 size 627 diff --git a/low-shot-task-specific-100-ex/coin_flip/best_model/trainer_state.json b/low-shot-task-specific-100-ex/coin_flip/best_model/trainer_state.json index b8bacf4cac67d85ed54a334a42d91410fb2beef5..a1549b4a642d990d05bb710877e7bbf0be9c6d96 100644 --- a/low-shot-task-specific-100-ex/coin_flip/best_model/trainer_state.json +++ b/low-shot-task-specific-100-ex/coin_flip/best_model/trainer_state.json @@ -1,95 +1,55 @@ { - "best_metric": 0.7364377379417419, - "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-100-ex/coin_flip/checkpoint-20", - "epoch": 8.0, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 9.6, "eval_steps": 500, - "global_step": 20, + "global_step": 60, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 0.8, - "eval_loss": 4.770002365112305, - "eval_runtime": 0.7512, - "eval_samples_per_second": 26.626, - "eval_steps_per_second": 3.994, - "step": 2 - }, - { - "epoch": 2.0, - "eval_loss": 4.635828971862793, - "eval_runtime": 0.7546, - "eval_samples_per_second": 26.505, - "eval_steps_per_second": 3.976, - "step": 5 - }, - { - "epoch": 2.8, - "eval_loss": 4.354025363922119, - "eval_runtime": 0.7548, - "eval_samples_per_second": 26.499, - "eval_steps_per_second": 3.975, - "step": 7 - }, - { - "epoch": 4.0, - "learning_rate": 5.9999999999999995e-05, - "loss": 4.5475, + "epoch": 1.6, + "learning_rate": 6.666666666666667e-05, + "loss": 3.5884, "step": 10 }, { - "epoch": 4.0, - "eval_loss": 3.8307082653045654, - "eval_runtime": 0.7548, - "eval_samples_per_second": 26.498, - "eval_steps_per_second": 3.975, - "step": 10 + "epoch": 3.2, + "learning_rate": 5.333333333333333e-05, + "loss": 0.8993, + "step": 20 }, { "epoch": 4.8, - "eval_loss": 3.6256070137023926, - "eval_runtime": 0.7542, - "eval_samples_per_second": 26.519, - "eval_steps_per_second": 3.978, - "step": 12 - }, - { - "epoch": 6.0, - "eval_loss": 2.8008601665496826, - "eval_runtime": 0.7532, - "eval_samples_per_second": 26.552, - "eval_steps_per_second": 3.983, - "step": 15 + "learning_rate": 4e-05, + "loss": 0.2492, + "step": 30 }, { - "epoch": 6.8, - "eval_loss": 1.8803138732910156, - "eval_runtime": 0.7625, - "eval_samples_per_second": 26.229, - "eval_steps_per_second": 3.934, - "step": 17 + "epoch": 6.4, + "learning_rate": 2.6666666666666667e-05, + "loss": 0.1766, + "step": 40 }, { "epoch": 8.0, - "learning_rate": 0.00011399999999999999, - "loss": 2.7123, - "step": 20 + "learning_rate": 1.3333333333333333e-05, + "loss": 0.1544, + "step": 50 }, { - "epoch": 8.0, - "eval_loss": 0.7364377379417419, - "eval_runtime": 0.7535, - "eval_samples_per_second": 26.542, - "eval_steps_per_second": 3.981, - "step": 20 + "epoch": 9.6, + "learning_rate": 0.0, + "loss": 0.1371, + "step": 60 } ], "logging_steps": 10, - "max_steps": 20, + "max_steps": 60, "num_train_epochs": 10, "save_steps": 500, - "total_flos": 2393497935544320.0, + "total_flos": 3582829037813760.0, "trial_name": null, "trial_params": null } diff --git a/low-shot-task-specific-100-ex/coin_flip/best_model/training_args.bin b/low-shot-task-specific-100-ex/coin_flip/best_model/training_args.bin index cca1d50ab2c5b9626b78953f59c1d21278960586..388aae6fb47e2410d1ceebb47de27141d86c9935 100644 --- a/low-shot-task-specific-100-ex/coin_flip/best_model/training_args.bin +++ b/low-shot-task-specific-100-ex/coin_flip/best_model/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cfd2f888c4bb95dd2b9338dcfeb755b0ed743f36bb3535194839914be97ca407 +oid sha256:1dac19ece9926ce0df65478a28bc7c2376d0aab79aeef27eb692db5c74efbeb6 size 4091 diff --git a/low-shot-task-specific-100-ex/cola/best_model/adapter_model.bin b/low-shot-task-specific-100-ex/cola/best_model/adapter_model.bin index f15e5e26d98e66d031af59fce1b51b20006f0b67..b900ce027bc49c3c856bfd22879242d64b67f821 100644 --- a/low-shot-task-specific-100-ex/cola/best_model/adapter_model.bin +++ b/low-shot-task-specific-100-ex/cola/best_model/adapter_model.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1223c9b19186bca2d1f9a40d1afb7b9d667bb02a7b7fdf5a29875a3ba0ad2c8f +oid sha256:7cf6eaeebbd72ab6814da833d9eda14981f05692b2765a3de982dfd71c1fc537 size 104973389 diff --git a/low-shot-task-specific-100-ex/cola/best_model/optimizer.pt b/low-shot-task-specific-100-ex/cola/best_model/optimizer.pt index 2bd8e3fbb9fc30c3147c92e091603ec921a195de..f288cf9547e6c6387e20d3ecbd0754f60602d4b7 100644 --- a/low-shot-task-specific-100-ex/cola/best_model/optimizer.pt +++ b/low-shot-task-specific-100-ex/cola/best_model/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:52d58812c7b440cb5008d9896abfb5be121a3021dd1ec3e407cf5ece524078cb +oid sha256:c5d16a56b4d5cf1511d315501d9eaabcd5aa34e6693772060a35cbe3e2cdd9c1 size 209984517 diff --git a/low-shot-task-specific-100-ex/cola/best_model/rng_state.pth b/low-shot-task-specific-100-ex/cola/best_model/rng_state.pth index 411f311183b61678aeb511e12775b50c9f09b1bf..ebdc6fa01f3850390a2a66bed2c8846c042e888f 100644 --- a/low-shot-task-specific-100-ex/cola/best_model/rng_state.pth +++ b/low-shot-task-specific-100-ex/cola/best_model/rng_state.pth @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0cfb4b1b5009a9b015311afd9778b0ef0a4a8bc389c04629a1d2e68a84aeea44 +oid sha256:577ca82244635df762528677a0c1f397652f06dfb861feea220b11b2a5c0b33a size 14575 diff --git a/low-shot-task-specific-100-ex/cola/best_model/scheduler.pt b/low-shot-task-specific-100-ex/cola/best_model/scheduler.pt index 10701e0231c43fd7fbee03df6bea6a9e4d381788..efccd4737b2851b7ee4ca7f64dcef03b0c02d3a4 100644 --- a/low-shot-task-specific-100-ex/cola/best_model/scheduler.pt +++ b/low-shot-task-specific-100-ex/cola/best_model/scheduler.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9b63e6b896793d947452e2a5278b1333dd1dcc71d94b37f92a47c6766156ba1e +oid sha256:495734eb861c1697fb12451c91ebf183750105f664f8401e6e4afa76e8c58d35 size 627 diff --git a/low-shot-task-specific-100-ex/cola/best_model/trainer_state.json b/low-shot-task-specific-100-ex/cola/best_model/trainer_state.json index a4b2df95bc9f3a5231487274377d693b7b50f521..765423119049770e5658ad16d777b92a8e5cde8e 100644 --- a/low-shot-task-specific-100-ex/cola/best_model/trainer_state.json +++ b/low-shot-task-specific-100-ex/cola/best_model/trainer_state.json @@ -1,95 +1,55 @@ { - "best_metric": 3.2643978595733643, - "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-100-ex/cola/checkpoint-20", - "epoch": 8.0, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 9.6, "eval_steps": 500, - "global_step": 20, + "global_step": 60, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 0.8, - "eval_loss": 7.496130466461182, - "eval_runtime": 0.651, - "eval_samples_per_second": 30.721, - "eval_steps_per_second": 4.608, - "step": 2 - }, - { - "epoch": 2.0, - "eval_loss": 7.389729976654053, - "eval_runtime": 0.6506, - "eval_samples_per_second": 30.74, - "eval_steps_per_second": 4.611, - "step": 5 - }, - { - "epoch": 2.8, - "eval_loss": 7.235182285308838, - "eval_runtime": 0.6495, - "eval_samples_per_second": 30.792, - "eval_steps_per_second": 4.619, - "step": 7 - }, - { - "epoch": 4.0, - "learning_rate": 5.9999999999999995e-05, - "loss": 7.3487, + "epoch": 1.6, + "learning_rate": 6.8e-05, + "loss": 6.9013, "step": 10 }, { - "epoch": 4.0, - "eval_loss": 6.905265808105469, - "eval_runtime": 0.6482, - "eval_samples_per_second": 30.855, - "eval_steps_per_second": 4.628, - "step": 10 + "epoch": 3.2, + "learning_rate": 5.466666666666667e-05, + "loss": 4.5097, + "step": 20 }, { "epoch": 4.8, - "eval_loss": 6.568731784820557, - "eval_runtime": 0.6499, - "eval_samples_per_second": 30.774, - "eval_steps_per_second": 4.616, - "step": 12 - }, - { - "epoch": 6.0, - "eval_loss": 5.829730987548828, - "eval_runtime": 0.6495, - "eval_samples_per_second": 30.791, - "eval_steps_per_second": 4.619, - "step": 15 + "learning_rate": 4.133333333333334e-05, + "loss": 1.3419, + "step": 30 }, { - "epoch": 6.8, - "eval_loss": 5.088259220123291, - "eval_runtime": 0.6485, - "eval_samples_per_second": 30.84, - "eval_steps_per_second": 4.626, - "step": 17 + "epoch": 6.4, + "learning_rate": 2.8e-05, + "loss": 0.312, + "step": 40 }, { "epoch": 8.0, - "learning_rate": 0.00011999999999999999, - "loss": 5.6641, - "step": 20 + "learning_rate": 1.4666666666666666e-05, + "loss": 0.1887, + "step": 50 }, { - "epoch": 8.0, - "eval_loss": 3.2643978595733643, - "eval_runtime": 0.6478, - "eval_samples_per_second": 30.872, - "eval_steps_per_second": 4.631, - "step": 20 + "epoch": 9.6, + "learning_rate": 1.3333333333333334e-06, + "loss": 0.1659, + "step": 60 } ], "logging_steps": 10, - "max_steps": 20, + "max_steps": 60, "num_train_epochs": 10, "save_steps": 500, - "total_flos": 862945020149760.0, + "total_flos": 1397031336345600.0, "trial_name": null, "trial_params": null } diff --git a/low-shot-task-specific-100-ex/cola/best_model/training_args.bin b/low-shot-task-specific-100-ex/cola/best_model/training_args.bin index 4a37896dcde66f6bf0b41321830abc936c5c42ec..9d99b4e14b98185e6dd4375c5fcd61e900a50e89 100644 --- a/low-shot-task-specific-100-ex/cola/best_model/training_args.bin +++ b/low-shot-task-specific-100-ex/cola/best_model/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7584dccd05bece2a9ec3f622ce5aa37500e33036a484978bd2e232bdb92a42bd +oid sha256:030d874224160392a528da785805f84b87012297066180d07cb7f334c83a3b1c size 4091 diff --git a/low-shot-task-specific-100-ex/commonsense_qa/best_model/adapter_model.bin b/low-shot-task-specific-100-ex/commonsense_qa/best_model/adapter_model.bin index a4da00c0b315fec7a594473ba0df88a8a17210e4..01b93a415b3f7aacebf4d77a1d061f106086e810 100644 --- a/low-shot-task-specific-100-ex/commonsense_qa/best_model/adapter_model.bin +++ b/low-shot-task-specific-100-ex/commonsense_qa/best_model/adapter_model.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f63d75b63e7b225c36c047b776e506742a0eb06a2cb34d6ac32cbc8faea89420 +oid sha256:949e2fce99ab8e025bccb82e82cde96dfefff4db63f88191a2dc513f466f95fe size 104973389 diff --git a/low-shot-task-specific-100-ex/commonsense_qa/best_model/optimizer.pt b/low-shot-task-specific-100-ex/commonsense_qa/best_model/optimizer.pt index 766f0661a1d679216d2d8ff1b310ed3035de7d12..f741ea166279d98fe907494f266e63052ccbab3b 100644 --- a/low-shot-task-specific-100-ex/commonsense_qa/best_model/optimizer.pt +++ b/low-shot-task-specific-100-ex/commonsense_qa/best_model/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1fdd8a85dbed2d83977bbce5185352392812b7736e12294c276e5e9046d46672 +oid sha256:a3ae1e782342c243ae71ef5ed3bce12577030d9ac5b0ee3f3216294b93571507 size 209984517 diff --git a/low-shot-task-specific-100-ex/commonsense_qa/best_model/rng_state.pth b/low-shot-task-specific-100-ex/commonsense_qa/best_model/rng_state.pth index 2cd9ca3bea976d1f3c3e6223b8d4dbc35042bcc9..655eb053a8a71d57dac31e0ed3f4239d6965598f 100644 --- a/low-shot-task-specific-100-ex/commonsense_qa/best_model/rng_state.pth +++ b/low-shot-task-specific-100-ex/commonsense_qa/best_model/rng_state.pth @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6e7d44b7a4cdc1eb16e6709c586b407ac6e8b61b77e11a4182c69dce6b3efbf4 +oid sha256:b9b4046a77d21f3c9945d5e736c8c40a3c0242857a18a73f733de62410972bcf size 14575 diff --git a/low-shot-task-specific-100-ex/commonsense_qa/best_model/scheduler.pt b/low-shot-task-specific-100-ex/commonsense_qa/best_model/scheduler.pt index 10701e0231c43fd7fbee03df6bea6a9e4d381788..97af97ef41be68d37872e3e436854f56a12660f7 100644 --- a/low-shot-task-specific-100-ex/commonsense_qa/best_model/scheduler.pt +++ b/low-shot-task-specific-100-ex/commonsense_qa/best_model/scheduler.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9b63e6b896793d947452e2a5278b1333dd1dcc71d94b37f92a47c6766156ba1e +oid sha256:8aea4ff6d6c72e86d24e872bf7765995d2e2e0abda70fdf4dff06ed25a492666 size 627 diff --git a/low-shot-task-specific-100-ex/commonsense_qa/best_model/trainer_state.json b/low-shot-task-specific-100-ex/commonsense_qa/best_model/trainer_state.json index 2e9b1545afdbb64590d3888db8909a566c358f7a..41d89b1a761c8a4c31d31719b34bd5cd2d31cb3d 100644 --- a/low-shot-task-specific-100-ex/commonsense_qa/best_model/trainer_state.json +++ b/low-shot-task-specific-100-ex/commonsense_qa/best_model/trainer_state.json @@ -1,95 +1,55 @@ { - "best_metric": 1.0019607543945312, - "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-100-ex/commonsense_qa/checkpoint-20", - "epoch": 8.0, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 9.6, "eval_steps": 500, - "global_step": 20, + "global_step": 60, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 0.8, - "eval_loss": 5.86135196685791, - "eval_runtime": 0.8193, - "eval_samples_per_second": 24.41, - "eval_steps_per_second": 3.662, - "step": 2 - }, - { - "epoch": 2.0, - "eval_loss": 5.789961814880371, - "eval_runtime": 0.8276, - "eval_samples_per_second": 24.167, - "eval_steps_per_second": 3.625, - "step": 5 - }, - { - "epoch": 2.8, - "eval_loss": 5.628936767578125, - "eval_runtime": 0.8224, - "eval_samples_per_second": 24.32, - "eval_steps_per_second": 3.648, - "step": 7 - }, - { - "epoch": 4.0, - "learning_rate": 5.9999999999999995e-05, - "loss": 5.5941, + "epoch": 1.6, + "learning_rate": 6.666666666666667e-05, + "loss": 4.5215, "step": 10 }, { - "epoch": 4.0, - "eval_loss": 4.905825614929199, - "eval_runtime": 0.8285, - "eval_samples_per_second": 24.14, - "eval_steps_per_second": 3.621, - "step": 10 + "epoch": 3.2, + "learning_rate": 5.333333333333333e-05, + "loss": 1.6265, + "step": 20 }, { "epoch": 4.8, - "eval_loss": 4.051631927490234, - "eval_runtime": 0.8299, - "eval_samples_per_second": 24.099, - "eval_steps_per_second": 3.615, - "step": 12 - }, - { - "epoch": 6.0, - "eval_loss": 3.0537314414978027, - "eval_runtime": 0.8237, - "eval_samples_per_second": 24.28, - "eval_steps_per_second": 3.642, - "step": 15 + "learning_rate": 4e-05, + "loss": 0.5894, + "step": 30 }, { - "epoch": 6.8, - "eval_loss": 2.2741684913635254, - "eval_runtime": 0.8216, - "eval_samples_per_second": 24.344, - "eval_steps_per_second": 3.652, - "step": 17 + "epoch": 6.4, + "learning_rate": 2.6666666666666667e-05, + "loss": 0.4951, + "step": 40 }, { "epoch": 8.0, - "learning_rate": 0.00011999999999999999, - "loss": 3.1561, - "step": 20 + "learning_rate": 1.3333333333333333e-05, + "loss": 0.359, + "step": 50 }, { - "epoch": 8.0, - "eval_loss": 1.0019607543945312, - "eval_runtime": 0.8235, - "eval_samples_per_second": 24.287, - "eval_steps_per_second": 3.643, - "step": 20 + "epoch": 9.6, + "learning_rate": 0.0, + "loss": 0.3024, + "step": 60 } ], "logging_steps": 10, - "max_steps": 20, + "max_steps": 60, "num_train_epochs": 10, "save_steps": 500, - "total_flos": 3078414183628800.0, + "total_flos": 4569405149675520.0, "trial_name": null, "trial_params": null } diff --git a/low-shot-task-specific-100-ex/commonsense_qa/best_model/training_args.bin b/low-shot-task-specific-100-ex/commonsense_qa/best_model/training_args.bin index 265b74d9fafc8f7f824bf4da4a2d7247f2888a72..e7263ceb2d9cbdfa7b972935a0c15d12e0a5fe6d 100644 --- a/low-shot-task-specific-100-ex/commonsense_qa/best_model/training_args.bin +++ b/low-shot-task-specific-100-ex/commonsense_qa/best_model/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bcaa3c829539bb3746b5f7e0dafb15dbfccde57e3d5650c68eb42e3fbbacfa3e +oid sha256:893639fde84c03a31eac95c7cc32efd5ac331dcc6aad3a9d8c1cc4820826b76c size 4091 diff --git a/low-shot-task-specific-100-ex/emotion/best_model/adapter_model.bin b/low-shot-task-specific-100-ex/emotion/best_model/adapter_model.bin index c013d8b3d8472cd3f790e9de0e3092238a10f4e3..edacccbd231425ad660be9f4a861d54d3c440e1b 100644 --- a/low-shot-task-specific-100-ex/emotion/best_model/adapter_model.bin +++ b/low-shot-task-specific-100-ex/emotion/best_model/adapter_model.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:23178e724f805ef456b15b82726bde7476bb7f85943ce21b94a72364d19f8459 +oid sha256:79a36fbadf1693a95d51fb67815f543304b8f28ac83311b4deb4e9e2e6fd4fd2 size 104973389 diff --git a/low-shot-task-specific-100-ex/emotion/best_model/optimizer.pt b/low-shot-task-specific-100-ex/emotion/best_model/optimizer.pt index 0131cbd4a3dc71d5fbad3200d6611eccd0cd1cc3..ba2223a44d21c4733c589b47e9dfecbb37ebce29 100644 --- a/low-shot-task-specific-100-ex/emotion/best_model/optimizer.pt +++ b/low-shot-task-specific-100-ex/emotion/best_model/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:73979bacc3db61859edeb2672ed3234900273717c07987eb84d60c9ebb4e30cc +oid sha256:6d8e89ae6be170509d3aede5b9db1253d03aa028322d13df82e8eb65a2fc4e08 size 209984517 diff --git a/low-shot-task-specific-100-ex/emotion/best_model/rng_state.pth b/low-shot-task-specific-100-ex/emotion/best_model/rng_state.pth index 6069cccc74ba7a762067123d6dd43795e852a812..70811e74d1aeb87bf97a109589029d4b64431152 100644 --- a/low-shot-task-specific-100-ex/emotion/best_model/rng_state.pth +++ b/low-shot-task-specific-100-ex/emotion/best_model/rng_state.pth @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a608d60cb0a08df4efb2f7fa2cd8590824d7f4a718f101d397116dc4b5272878 +oid sha256:9ceb0596676a94d8e2213650ba935fa425b8b167f3a10682ab367ee5d59bf39e size 14575 diff --git a/low-shot-task-specific-100-ex/emotion/best_model/scheduler.pt b/low-shot-task-specific-100-ex/emotion/best_model/scheduler.pt index 10701e0231c43fd7fbee03df6bea6a9e4d381788..97af97ef41be68d37872e3e436854f56a12660f7 100644 --- a/low-shot-task-specific-100-ex/emotion/best_model/scheduler.pt +++ b/low-shot-task-specific-100-ex/emotion/best_model/scheduler.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9b63e6b896793d947452e2a5278b1333dd1dcc71d94b37f92a47c6766156ba1e +oid sha256:8aea4ff6d6c72e86d24e872bf7765995d2e2e0abda70fdf4dff06ed25a492666 size 627 diff --git a/low-shot-task-specific-100-ex/emotion/best_model/trainer_state.json b/low-shot-task-specific-100-ex/emotion/best_model/trainer_state.json index 3b9c1fbef0340dbd00e8dc48234d836ad4430c31..2bce39be06d1ac4901487d33f619e61229d254f7 100644 --- a/low-shot-task-specific-100-ex/emotion/best_model/trainer_state.json +++ b/low-shot-task-specific-100-ex/emotion/best_model/trainer_state.json @@ -1,95 +1,55 @@ { - "best_metric": 3.7403335571289062, - "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-100-ex/emotion/checkpoint-20", - "epoch": 8.0, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 9.6, "eval_steps": 500, - "global_step": 20, + "global_step": 60, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 0.8, - "eval_loss": 6.759609222412109, - "eval_runtime": 0.7242, - "eval_samples_per_second": 27.617, - "eval_steps_per_second": 4.143, - "step": 2 - }, - { - "epoch": 2.0, - "eval_loss": 6.701653957366943, - "eval_runtime": 0.7245, - "eval_samples_per_second": 27.606, - "eval_steps_per_second": 4.141, - "step": 5 - }, - { - "epoch": 2.8, - "eval_loss": 6.61182165145874, - "eval_runtime": 0.7269, - "eval_samples_per_second": 27.515, - "eval_steps_per_second": 4.127, - "step": 7 - }, - { - "epoch": 4.0, - "learning_rate": 5.9999999999999995e-05, - "loss": 6.5973, + "epoch": 1.6, + "learning_rate": 6.666666666666667e-05, + "loss": 6.0546, "step": 10 }, { - "epoch": 4.0, - "eval_loss": 6.34386682510376, - "eval_runtime": 0.7251, - "eval_samples_per_second": 27.583, - "eval_steps_per_second": 4.137, - "step": 10 + "epoch": 3.2, + "learning_rate": 5.333333333333333e-05, + "loss": 4.0909, + "step": 20 }, { "epoch": 4.8, - "eval_loss": 5.928864479064941, - "eval_runtime": 0.7256, - "eval_samples_per_second": 27.565, - "eval_steps_per_second": 4.135, - "step": 12 - }, - { - "epoch": 6.0, - "eval_loss": 5.030377388000488, - "eval_runtime": 0.7248, - "eval_samples_per_second": 27.593, - "eval_steps_per_second": 4.139, - "step": 15 + "learning_rate": 4e-05, + "loss": 2.2994, + "step": 30 }, { - "epoch": 6.8, - "eval_loss": 4.649694442749023, - "eval_runtime": 0.7259, - "eval_samples_per_second": 27.554, - "eval_steps_per_second": 4.133, - "step": 17 + "epoch": 6.4, + "learning_rate": 2.6666666666666667e-05, + "loss": 0.9901, + "step": 40 }, { "epoch": 8.0, - "learning_rate": 0.00011999999999999999, - "loss": 5.1116, - "step": 20 + "learning_rate": 1.3333333333333333e-05, + "loss": 0.5193, + "step": 50 }, { - "epoch": 8.0, - "eval_loss": 3.7403335571289062, - "eval_runtime": 0.7246, - "eval_samples_per_second": 27.602, - "eval_steps_per_second": 4.14, - "step": 20 + "epoch": 9.6, + "learning_rate": 0.0, + "loss": 0.4048, + "step": 60 } ], "logging_steps": 10, - "max_steps": 20, + "max_steps": 60, "num_train_epochs": 10, "save_steps": 500, - "total_flos": 2252558490992640.0, + "total_flos": 3335566854389760.0, "trial_name": null, "trial_params": null } diff --git a/low-shot-task-specific-100-ex/emotion/best_model/training_args.bin b/low-shot-task-specific-100-ex/emotion/best_model/training_args.bin index e87b4c75f75da916f77116eb0307ea835da6d074..3163fe033236dede2f0efd4dcaf0f12747d3f7ae 100644 --- a/low-shot-task-specific-100-ex/emotion/best_model/training_args.bin +++ b/low-shot-task-specific-100-ex/emotion/best_model/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cec69ccd4746da9921e4026850479fcbb626150ed87fa80dc42db47d3486b211 +oid sha256:a9176c654caac25b5d0276963fccddd09a64e358e438dedb0a65065e09d8fd8c size 4091 diff --git a/low-shot-task-specific-100-ex/social_i_qa/best_model/adapter_model.bin b/low-shot-task-specific-100-ex/social_i_qa/best_model/adapter_model.bin index 52344dc0bb0985fe987f51ee88dd4152fd3ab084..6b72293860f8d1011468e10b1128247a546fd944 100644 --- a/low-shot-task-specific-100-ex/social_i_qa/best_model/adapter_model.bin +++ b/low-shot-task-specific-100-ex/social_i_qa/best_model/adapter_model.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ef60f33031060f9266bfe61d170eb7c269e012245e37652a45d7c3131c8d3add +oid sha256:f3baf0333809d650d988f3171ce163a104f77f0e24849b73d92595f523c450a1 size 104973389 diff --git a/low-shot-task-specific-100-ex/social_i_qa/best_model/optimizer.pt b/low-shot-task-specific-100-ex/social_i_qa/best_model/optimizer.pt index 494b1dd20be0861d0affb9f8875a792f9d811f1c..4f5d2fcf44724064415a8797026c2ee6441d05b6 100644 --- a/low-shot-task-specific-100-ex/social_i_qa/best_model/optimizer.pt +++ b/low-shot-task-specific-100-ex/social_i_qa/best_model/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:abbeb1912756da0877b31d08f01c2fd522bbb55dc819144a235dd9eae39560ec +oid sha256:54d9b70c62c40e51621ac1d0f737b9f4fe7817f8322bafec0256d8c4acc22f68 size 209984517 diff --git a/low-shot-task-specific-100-ex/social_i_qa/best_model/rng_state.pth b/low-shot-task-specific-100-ex/social_i_qa/best_model/rng_state.pth index 82786a327b2ffca4e7e788c6a2882ecaa836bf13..5a251acc71a085abe8025f631125fc1a39b174eb 100644 --- a/low-shot-task-specific-100-ex/social_i_qa/best_model/rng_state.pth +++ b/low-shot-task-specific-100-ex/social_i_qa/best_model/rng_state.pth @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1ab0262a43d8b8116e7c1b8d394e85687822d3581f13149fbc20dd3f065e85a6 +oid sha256:2bcf9f292ce90305d3f64ff1cc106681764cf8693b49bb19dd2577e6cf765e6e size 14575 diff --git a/low-shot-task-specific-100-ex/social_i_qa/best_model/scheduler.pt b/low-shot-task-specific-100-ex/social_i_qa/best_model/scheduler.pt index 10701e0231c43fd7fbee03df6bea6a9e4d381788..97af97ef41be68d37872e3e436854f56a12660f7 100644 --- a/low-shot-task-specific-100-ex/social_i_qa/best_model/scheduler.pt +++ b/low-shot-task-specific-100-ex/social_i_qa/best_model/scheduler.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9b63e6b896793d947452e2a5278b1333dd1dcc71d94b37f92a47c6766156ba1e +oid sha256:8aea4ff6d6c72e86d24e872bf7765995d2e2e0abda70fdf4dff06ed25a492666 size 627 diff --git a/low-shot-task-specific-100-ex/social_i_qa/best_model/trainer_state.json b/low-shot-task-specific-100-ex/social_i_qa/best_model/trainer_state.json index bffe5fbe450f94a29147bbf72491e20ef61973e9..33543a1e4d039fae9464c33fc964ae2ee10db63a 100644 --- a/low-shot-task-specific-100-ex/social_i_qa/best_model/trainer_state.json +++ b/low-shot-task-specific-100-ex/social_i_qa/best_model/trainer_state.json @@ -1,95 +1,55 @@ { - "best_metric": 0.821982741355896, - "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-100-ex/social_i_qa/checkpoint-20", - "epoch": 8.0, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 9.6, "eval_steps": 500, - "global_step": 20, + "global_step": 60, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 0.8, - "eval_loss": 6.487459659576416, - "eval_runtime": 0.8925, - "eval_samples_per_second": 22.408, - "eval_steps_per_second": 3.361, - "step": 2 - }, - { - "epoch": 2.0, - "eval_loss": 6.3755621910095215, - "eval_runtime": 0.897, - "eval_samples_per_second": 22.297, - "eval_steps_per_second": 3.345, - "step": 5 - }, - { - "epoch": 2.8, - "eval_loss": 6.16649866104126, - "eval_runtime": 0.8963, - "eval_samples_per_second": 22.314, - "eval_steps_per_second": 3.347, - "step": 7 - }, - { - "epoch": 4.0, - "learning_rate": 5.9999999999999995e-05, - "loss": 6.1511, + "epoch": 1.6, + "learning_rate": 6.666666666666667e-05, + "loss": 4.9972, "step": 10 }, { - "epoch": 4.0, - "eval_loss": 5.644223690032959, - "eval_runtime": 0.8985, - "eval_samples_per_second": 22.259, - "eval_steps_per_second": 3.339, - "step": 10 + "epoch": 3.2, + "learning_rate": 5.333333333333333e-05, + "loss": 1.4853, + "step": 20 }, { "epoch": 4.8, - "eval_loss": 4.884535312652588, - "eval_runtime": 0.9023, - "eval_samples_per_second": 22.166, - "eval_steps_per_second": 3.325, - "step": 12 - }, - { - "epoch": 6.0, - "eval_loss": 3.0946044921875, - "eval_runtime": 0.8991, - "eval_samples_per_second": 22.245, - "eval_steps_per_second": 3.337, - "step": 15 + "learning_rate": 4e-05, + "loss": 0.4584, + "step": 30 }, { - "epoch": 6.8, - "eval_loss": 2.1555423736572266, - "eval_runtime": 0.9036, - "eval_samples_per_second": 22.133, - "eval_steps_per_second": 3.32, - "step": 17 + "epoch": 6.4, + "learning_rate": 2.6666666666666667e-05, + "loss": 0.3353, + "step": 40 }, { "epoch": 8.0, - "learning_rate": 0.00011999999999999999, - "loss": 3.3011, - "step": 20 + "learning_rate": 1.3333333333333333e-05, + "loss": 0.2728, + "step": 50 }, { - "epoch": 8.0, - "eval_loss": 0.821982741355896, - "eval_runtime": 0.8939, - "eval_samples_per_second": 22.373, - "eval_steps_per_second": 3.356, - "step": 20 + "epoch": 9.6, + "learning_rate": 0.0, + "loss": 0.2213, + "step": 60 } ], "logging_steps": 10, - "max_steps": 20, + "max_steps": 60, "num_train_epochs": 10, "save_steps": 500, - "total_flos": 3338039476224000.0, + "total_flos": 4962552021319680.0, "trial_name": null, "trial_params": null } diff --git a/low-shot-task-specific-100-ex/social_i_qa/best_model/training_args.bin b/low-shot-task-specific-100-ex/social_i_qa/best_model/training_args.bin index dd47b3dd7b4b1b9240e4c9bb5f73e480a555a266..63283c8bad1ba9e4c49157373aaed8726226cafc 100644 --- a/low-shot-task-specific-100-ex/social_i_qa/best_model/training_args.bin +++ b/low-shot-task-specific-100-ex/social_i_qa/best_model/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2b54b052f5cac8f07391027a3b0c53c8bffff1028fc30595e86e27a07c6b887e +oid sha256:8af11791e6cdf16263699f5c07021c5d83c28dff3223886bef6b07b99808437e size 4091 diff --git a/low-shot-task-specific-100-ex/sst/best_model/adapter_model.bin b/low-shot-task-specific-100-ex/sst/best_model/adapter_model.bin index dc2e7d8a3b7651d93dfc40765b488c8706f670f8..624af1cdf2408dc143ee62398d7d2bbc38ef4342 100644 --- a/low-shot-task-specific-100-ex/sst/best_model/adapter_model.bin +++ b/low-shot-task-specific-100-ex/sst/best_model/adapter_model.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f0ab51ab7c25129d35e69c6a5c9f19421507d7aea82814b2fbf3fbb9e27e0120 +oid sha256:8d4a8082792fc849393266eaa10c1e85cc9c3ae5e398cfcaec5bc226efa12578 size 104973389 diff --git a/low-shot-task-specific-100-ex/sst/best_model/optimizer.pt b/low-shot-task-specific-100-ex/sst/best_model/optimizer.pt index ae601c20ca349aeaac8ffc25b67793bbfc5c66c8..97f2b32686aa1935340a18212802adb84406720c 100644 --- a/low-shot-task-specific-100-ex/sst/best_model/optimizer.pt +++ b/low-shot-task-specific-100-ex/sst/best_model/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:75b2aab283007757d7da6dabd8a4d1a3e6a88f4f3b3c863b51c6af953abd76f6 +oid sha256:5f13f5bf513a01427c8ef493fc671d1afae75974286dc054d4310620eed43190 size 209984517 diff --git a/low-shot-task-specific-100-ex/sst/best_model/rng_state.pth b/low-shot-task-specific-100-ex/sst/best_model/rng_state.pth index 3a7c4ffda07384c11f19852f28450dcb1eebe30b..ca43f41ab0ba0f2d3c29dada850722cbc9896b22 100644 --- a/low-shot-task-specific-100-ex/sst/best_model/rng_state.pth +++ b/low-shot-task-specific-100-ex/sst/best_model/rng_state.pth @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bd5c668f408d63f497259a79124b33e953210b9fb047bdfd418e598fe5018820 +oid sha256:34aa090882640627386ae3817671b84253ebaab4311765be68e81e144e48ef55 size 14575 diff --git a/low-shot-task-specific-100-ex/sst/best_model/scheduler.pt b/low-shot-task-specific-100-ex/sst/best_model/scheduler.pt index 920b76ebb92522ccb9b9a6f0fea021ad71358e06..6af8f5eb46978a06dba498020aa9dc6dfadd6db6 100644 --- a/low-shot-task-specific-100-ex/sst/best_model/scheduler.pt +++ b/low-shot-task-specific-100-ex/sst/best_model/scheduler.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:daa948d8422c0ff5c84b04b3a278a49a8c2106994063cdc84b33b076944943d4 +oid sha256:f16485f213295154c0d69518bb66258b478c3854316b9ff741b7b323a6b7d753 size 627 diff --git a/low-shot-task-specific-100-ex/sst/best_model/trainer_state.json b/low-shot-task-specific-100-ex/sst/best_model/trainer_state.json index 53314106524931b4dac2ac4cb000909c7e3466e9..8e94758ffef25123f6c1b16feb20f9204adf51e8 100644 --- a/low-shot-task-specific-100-ex/sst/best_model/trainer_state.json +++ b/low-shot-task-specific-100-ex/sst/best_model/trainer_state.json @@ -1,95 +1,55 @@ { - "best_metric": 4.71249532699585, - "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-100-ex/sst/checkpoint-20", - "epoch": 8.0, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 9.6, "eval_steps": 500, - "global_step": 20, + "global_step": 60, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 0.8, - "eval_loss": 7.7774858474731445, - "eval_runtime": 0.6674, - "eval_samples_per_second": 29.965, - "eval_steps_per_second": 4.495, - "step": 2 - }, - { - "epoch": 2.0, - "eval_loss": 7.758080959320068, - "eval_runtime": 0.6648, - "eval_samples_per_second": 30.083, - "eval_steps_per_second": 4.512, - "step": 5 - }, - { - "epoch": 2.8, - "eval_loss": 7.7222137451171875, - "eval_runtime": 0.6677, - "eval_samples_per_second": 29.954, - "eval_steps_per_second": 4.493, - "step": 7 - }, - { - "epoch": 4.0, - "learning_rate": 4.2e-05, - "loss": 7.716, + "epoch": 1.6, + "learning_rate": 7.066666666666667e-05, + "loss": 7.3041, "step": 10 }, { - "epoch": 4.0, - "eval_loss": 7.386101722717285, - "eval_runtime": 0.6691, - "eval_samples_per_second": 29.892, - "eval_steps_per_second": 4.484, - "step": 10 + "epoch": 3.2, + "learning_rate": 5.7333333333333336e-05, + "loss": 4.82, + "step": 20 }, { "epoch": 4.8, - "eval_loss": 7.176695346832275, - "eval_runtime": 0.6671, - "eval_samples_per_second": 29.982, - "eval_steps_per_second": 4.497, - "step": 12 - }, - { - "epoch": 6.0, - "eval_loss": 6.573421478271484, - "eval_runtime": 0.6678, - "eval_samples_per_second": 29.951, - "eval_steps_per_second": 4.493, - "step": 15 + "learning_rate": 4.4000000000000006e-05, + "loss": 1.2888, + "step": 30 }, { - "epoch": 6.8, - "eval_loss": 6.066993236541748, - "eval_runtime": 0.6693, - "eval_samples_per_second": 29.881, - "eval_steps_per_second": 4.482, - "step": 17 + "epoch": 6.4, + "learning_rate": 3.066666666666667e-05, + "loss": 0.2708, + "step": 40 }, { "epoch": 8.0, - "learning_rate": 0.000102, - "loss": 6.5057, - "step": 20 + "learning_rate": 1.7333333333333336e-05, + "loss": 0.1504, + "step": 50 }, { - "epoch": 8.0, - "eval_loss": 4.71249532699585, - "eval_runtime": 0.6679, - "eval_samples_per_second": 29.944, - "eval_steps_per_second": 4.492, - "step": 20 + "epoch": 9.6, + "learning_rate": 4.000000000000001e-06, + "loss": 0.1077, + "step": 60 } ], "logging_steps": 10, - "max_steps": 20, + "max_steps": 60, "num_train_epochs": 10, "save_steps": 500, - "total_flos": 1760506745978880.0, + "total_flos": 2554218354769920.0, "trial_name": null, "trial_params": null } diff --git a/low-shot-task-specific-100-ex/sst/best_model/training_args.bin b/low-shot-task-specific-100-ex/sst/best_model/training_args.bin index cf1ab3fc037dc8dc2c606c56b7f3e1e9fa0ff075..d059280f4f74c1538130ed9ba06faaca2887d937 100644 --- a/low-shot-task-specific-100-ex/sst/best_model/training_args.bin +++ b/low-shot-task-specific-100-ex/sst/best_model/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:82db56540ff893c3d36fa725b2ccaa3a282af73561fa81b1cd27f7673a28d02f +oid sha256:d6b68d5494fd59a8931fe75179461097ab12bf65feb3b85581dd03a1bc109cd9 size 4091 diff --git a/low-shot-task-specific-100-ex/sum/best_model/adapter_model.bin b/low-shot-task-specific-100-ex/sum/best_model/adapter_model.bin index 567056a0d5ef8311e0f4a4652ddf71405394affd..6a8b52317899903978e8d4d9de7f27c8a0c6a29b 100644 --- a/low-shot-task-specific-100-ex/sum/best_model/adapter_model.bin +++ b/low-shot-task-specific-100-ex/sum/best_model/adapter_model.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a7cc16d00544954ce77b8cae15ccb1f3d592abfd2e8a630f9f4afd1fecf5a7be +oid sha256:2d2c2c1869e16917f0713fe4958b0fd90a395bc1551cdd4c805a036b409ee11e size 104973389 diff --git a/low-shot-task-specific-100-ex/sum/best_model/optimizer.pt b/low-shot-task-specific-100-ex/sum/best_model/optimizer.pt index 64a3a073ab327ab9ebc672cd8cab68b7901b6f2b..1d94be60015d24fe6d29f3c9d33800471a4a1ffa 100644 --- a/low-shot-task-specific-100-ex/sum/best_model/optimizer.pt +++ b/low-shot-task-specific-100-ex/sum/best_model/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a11eba00d1ce3e5f0d03227932d4e26433f2f6b6a7c8dc1f7f157da65ae61a16 +oid sha256:0e1bc3f72790a64267d5724bc6229edf90cbb987e421f8a994a903cd30c1b21f size 209984517 diff --git a/low-shot-task-specific-100-ex/sum/best_model/rng_state.pth b/low-shot-task-specific-100-ex/sum/best_model/rng_state.pth index 411f311183b61678aeb511e12775b50c9f09b1bf..17facdd080be3ab630cffef47a382061568f7c91 100644 --- a/low-shot-task-specific-100-ex/sum/best_model/rng_state.pth +++ b/low-shot-task-specific-100-ex/sum/best_model/rng_state.pth @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0cfb4b1b5009a9b015311afd9778b0ef0a4a8bc389c04629a1d2e68a84aeea44 +oid sha256:3a52c225a78208eb165a8c30467227762e3ced5a053ad4bfc3a92262e3ccc33c size 14575 diff --git a/low-shot-task-specific-100-ex/sum/best_model/scheduler.pt b/low-shot-task-specific-100-ex/sum/best_model/scheduler.pt index 10701e0231c43fd7fbee03df6bea6a9e4d381788..97af97ef41be68d37872e3e436854f56a12660f7 100644 --- a/low-shot-task-specific-100-ex/sum/best_model/scheduler.pt +++ b/low-shot-task-specific-100-ex/sum/best_model/scheduler.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9b63e6b896793d947452e2a5278b1333dd1dcc71d94b37f92a47c6766156ba1e +oid sha256:8aea4ff6d6c72e86d24e872bf7765995d2e2e0abda70fdf4dff06ed25a492666 size 627 diff --git a/low-shot-task-specific-100-ex/sum/best_model/trainer_state.json b/low-shot-task-specific-100-ex/sum/best_model/trainer_state.json index f505dad02db5ae5dcecca92c17803301bdb6dcfd..0f7dff7542fed317fc83bfb791029ce1878b9506 100644 --- a/low-shot-task-specific-100-ex/sum/best_model/trainer_state.json +++ b/low-shot-task-specific-100-ex/sum/best_model/trainer_state.json @@ -1,95 +1,55 @@ { - "best_metric": 1.7415841817855835, - "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-100-ex/sum/checkpoint-20", - "epoch": 8.0, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 9.6, "eval_steps": 500, - "global_step": 20, + "global_step": 60, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 0.8, - "eval_loss": 3.6972098350524902, - "eval_runtime": 0.6541, - "eval_samples_per_second": 30.578, - "eval_steps_per_second": 4.587, - "step": 2 - }, - { - "epoch": 2.0, - "eval_loss": 3.5827407836914062, - "eval_runtime": 0.6516, - "eval_samples_per_second": 30.695, - "eval_steps_per_second": 4.604, - "step": 5 - }, - { - "epoch": 2.8, - "eval_loss": 3.435373306274414, - "eval_runtime": 0.6534, - "eval_samples_per_second": 30.611, - "eval_steps_per_second": 4.592, - "step": 7 - }, - { - "epoch": 4.0, - "learning_rate": 5.9999999999999995e-05, - "loss": 3.5457, + "epoch": 1.6, + "learning_rate": 6.666666666666667e-05, + "loss": 3.0337, "step": 10 }, { - "epoch": 4.0, - "eval_loss": 3.144989013671875, - "eval_runtime": 0.6516, - "eval_samples_per_second": 30.692, - "eval_steps_per_second": 4.604, - "step": 10 + "epoch": 3.2, + "learning_rate": 5.333333333333333e-05, + "loss": 1.7695, + "step": 20 }, { "epoch": 4.8, - "eval_loss": 2.9391090869903564, - "eval_runtime": 0.6511, - "eval_samples_per_second": 30.718, - "eval_steps_per_second": 4.608, - "step": 12 - }, - { - "epoch": 6.0, - "eval_loss": 2.312290668487549, - "eval_runtime": 0.6505, - "eval_samples_per_second": 30.745, - "eval_steps_per_second": 4.612, - "step": 15 + "learning_rate": 4e-05, + "loss": 0.506, + "step": 30 }, { - "epoch": 6.8, - "eval_loss": 1.938306450843811, - "eval_runtime": 0.6508, - "eval_samples_per_second": 30.732, - "eval_steps_per_second": 4.61, - "step": 17 + "epoch": 6.4, + "learning_rate": 2.6666666666666667e-05, + "loss": 0.2987, + "step": 40 }, { "epoch": 8.0, - "learning_rate": 0.00011999999999999999, - "loss": 2.4305, - "step": 20 + "learning_rate": 1.3333333333333333e-05, + "loss": 0.2552, + "step": 50 }, { - "epoch": 8.0, - "eval_loss": 1.7415841817855835, - "eval_runtime": 0.6535, - "eval_samples_per_second": 30.603, - "eval_steps_per_second": 4.59, - "step": 20 + "epoch": 9.6, + "learning_rate": 0.0, + "loss": 0.2156, + "step": 60 } ], "logging_steps": 10, - "max_steps": 20, + "max_steps": 60, "num_train_epochs": 10, "save_steps": 500, - "total_flos": 1186858480435200.0, + "total_flos": 1780287720652800.0, "trial_name": null, "trial_params": null } diff --git a/low-shot-task-specific-100-ex/sum/best_model/training_args.bin b/low-shot-task-specific-100-ex/sum/best_model/training_args.bin index 5f47ff9df2b2338467f66974fee13bb6446c709a..83676d8ec846bf8854c272f241a567140ad35608 100644 --- a/low-shot-task-specific-100-ex/sum/best_model/training_args.bin +++ b/low-shot-task-specific-100-ex/sum/best_model/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c9b167b4b835edf4fa5e13863bdad1f68e733f002739a766fe1eb9a9eb6f48df +oid sha256:eed2a9d96b9d414d97522c11e4a4005166f181c7e4ce8763f264a4da637a347b size 4091 diff --git a/low-shot-task-specific-100-ex/svamp/best_model/adapter_model.bin b/low-shot-task-specific-100-ex/svamp/best_model/adapter_model.bin index de4d065bce814b54125559ba6465e6c94d71dc49..9fe44251d3c4616d2dfc686337dd5034b974ceb9 100644 --- a/low-shot-task-specific-100-ex/svamp/best_model/adapter_model.bin +++ b/low-shot-task-specific-100-ex/svamp/best_model/adapter_model.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2f29211dc7f547a0a871873375eefed004d258e206cecfee346d1c18ddb963d3 +oid sha256:c7c988598d575c2d615f27babef066bdb1465845d830f01d1b757b90f57a8f98 size 104973389 diff --git a/low-shot-task-specific-100-ex/svamp/best_model/optimizer.pt b/low-shot-task-specific-100-ex/svamp/best_model/optimizer.pt index 2255368a029550268469229d48f8da09b16e27b8..5c41cc7194603a4d98ab1de3c374c2133dd690cd 100644 --- a/low-shot-task-specific-100-ex/svamp/best_model/optimizer.pt +++ b/low-shot-task-specific-100-ex/svamp/best_model/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a2b859941698531e737b7ef66017873e12852369c66d25abd3373ec677259117 +oid sha256:e2008f46e96a2efc77627c37dc472e78315d8d260cbc110fc22b8b7648959ecc size 209984517 diff --git a/low-shot-task-specific-100-ex/svamp/best_model/rng_state.pth b/low-shot-task-specific-100-ex/svamp/best_model/rng_state.pth index 91b504fd87b8370f2678923974333a9a245291ee..9797afe9f59e988ba26bd2248e5f11f20634eb6b 100644 --- a/low-shot-task-specific-100-ex/svamp/best_model/rng_state.pth +++ b/low-shot-task-specific-100-ex/svamp/best_model/rng_state.pth @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5077750d4ebc29a06666f871f496d7ff06c1cb14dbd8f8954de69896bf4426ab +oid sha256:d69303b7c1b8eda88caba2f66d02f7e4952ea247079f1347de1a9cdad90a623e size 14575 diff --git a/low-shot-task-specific-100-ex/svamp/best_model/scheduler.pt b/low-shot-task-specific-100-ex/svamp/best_model/scheduler.pt index 10701e0231c43fd7fbee03df6bea6a9e4d381788..97af97ef41be68d37872e3e436854f56a12660f7 100644 --- a/low-shot-task-specific-100-ex/svamp/best_model/scheduler.pt +++ b/low-shot-task-specific-100-ex/svamp/best_model/scheduler.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9b63e6b896793d947452e2a5278b1333dd1dcc71d94b37f92a47c6766156ba1e +oid sha256:8aea4ff6d6c72e86d24e872bf7765995d2e2e0abda70fdf4dff06ed25a492666 size 627 diff --git a/low-shot-task-specific-100-ex/svamp/best_model/trainer_state.json b/low-shot-task-specific-100-ex/svamp/best_model/trainer_state.json index 9e63ddf622281a1ebae94d6b0818fe08d6111dd4..1b4207bb54f6f50dd2c7a02cf6d5dd0c1b306070 100644 --- a/low-shot-task-specific-100-ex/svamp/best_model/trainer_state.json +++ b/low-shot-task-specific-100-ex/svamp/best_model/trainer_state.json @@ -1,95 +1,55 @@ { - "best_metric": 1.6565091609954834, - "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-100-ex/svamp/checkpoint-20", - "epoch": 8.0, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 9.6, "eval_steps": 500, - "global_step": 20, + "global_step": 60, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 0.8, - "eval_loss": 5.190371036529541, - "eval_runtime": 0.8961, - "eval_samples_per_second": 22.319, - "eval_steps_per_second": 3.348, - "step": 2 - }, - { - "epoch": 2.0, - "eval_loss": 5.100682735443115, - "eval_runtime": 0.9032, - "eval_samples_per_second": 22.144, - "eval_steps_per_second": 3.322, - "step": 5 - }, - { - "epoch": 2.8, - "eval_loss": 4.914952278137207, - "eval_runtime": 0.9065, - "eval_samples_per_second": 22.062, - "eval_steps_per_second": 3.309, - "step": 7 - }, - { - "epoch": 4.0, - "learning_rate": 5.9999999999999995e-05, - "loss": 4.937, + "epoch": 1.6, + "learning_rate": 6.666666666666667e-05, + "loss": 4.0111, "step": 10 }, { - "epoch": 4.0, - "eval_loss": 4.304265022277832, - "eval_runtime": 0.9048, - "eval_samples_per_second": 22.105, - "eval_steps_per_second": 3.316, - "step": 10 + "epoch": 3.2, + "learning_rate": 5.333333333333333e-05, + "loss": 2.0745, + "step": 20 }, { "epoch": 4.8, - "eval_loss": 3.561671733856201, - "eval_runtime": 0.9075, - "eval_samples_per_second": 22.04, - "eval_steps_per_second": 3.306, - "step": 12 - }, - { - "epoch": 6.0, - "eval_loss": 2.8418495655059814, - "eval_runtime": 0.9069, - "eval_samples_per_second": 22.052, - "eval_steps_per_second": 3.308, - "step": 15 + "learning_rate": 4e-05, + "loss": 1.1402, + "step": 30 }, { - "epoch": 6.8, - "eval_loss": 2.3915348052978516, - "eval_runtime": 0.9035, - "eval_samples_per_second": 22.136, - "eval_steps_per_second": 3.32, - "step": 17 + "epoch": 6.4, + "learning_rate": 2.6666666666666667e-05, + "loss": 0.845, + "step": 40 }, { "epoch": 8.0, - "learning_rate": 0.00011999999999999999, - "loss": 3.0207, - "step": 20 + "learning_rate": 1.3333333333333333e-05, + "loss": 0.6969, + "step": 50 }, { - "epoch": 8.0, - "eval_loss": 1.6565091609954834, - "eval_runtime": 0.905, - "eval_samples_per_second": 22.099, - "eval_steps_per_second": 3.315, - "step": 20 + "epoch": 9.6, + "learning_rate": 0.0, + "loss": 0.6297, + "step": 60 } ], "logging_steps": 10, - "max_steps": 20, + "max_steps": 60, "num_train_epochs": 10, "save_steps": 500, - "total_flos": 3347929963560960.0, + "total_flos": 5063929516523520.0, "trial_name": null, "trial_params": null } diff --git a/low-shot-task-specific-100-ex/svamp/best_model/training_args.bin b/low-shot-task-specific-100-ex/svamp/best_model/training_args.bin index 785156c063423357186f06ea80e2ca6be6fce34d..420bbcc9d161ee9516712336ef7cdb59bd8ab4d4 100644 --- a/low-shot-task-specific-100-ex/svamp/best_model/training_args.bin +++ b/low-shot-task-specific-100-ex/svamp/best_model/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b132a2d5ba1c5592adec662b49a14662d09b6d2d689b790811b0c67535815c7d +oid sha256:89187712b0b3f8571d4b05a1d9a0c621cb121c6f3768816a8223cffde0f5134d size 4091 diff --git a/low-shot-task-specific-100-ex/word-sorting/best_model/adapter_model.bin b/low-shot-task-specific-100-ex/word-sorting/best_model/adapter_model.bin index d05555687d58d80a9ed4c7765b75a028df73a1ab..ee5626ac1220e152042db8c33c2f46addf35b4a1 100644 --- a/low-shot-task-specific-100-ex/word-sorting/best_model/adapter_model.bin +++ b/low-shot-task-specific-100-ex/word-sorting/best_model/adapter_model.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1c3c5a2be24d9f05a68e2795dcb390c54ada897a16d601b0dd2b44cab21718f0 +oid sha256:f174bcbd27519b73a922329f1e5ef20f0a3dbc18de78fb9fbd15133e827c9f40 size 104973389 diff --git a/low-shot-task-specific-100-ex/word-sorting/best_model/optimizer.pt b/low-shot-task-specific-100-ex/word-sorting/best_model/optimizer.pt index e0270bac01057e744fcc9121afc064d938b54e7b..a85812e8b70304cd12aaff7c5b66c6f0fd6009cf 100644 --- a/low-shot-task-specific-100-ex/word-sorting/best_model/optimizer.pt +++ b/low-shot-task-specific-100-ex/word-sorting/best_model/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0d4146bb863afab1cb22cde74559a41111b3d731420d88faada97d9deff22af9 +oid sha256:9a5e3e7e5726726d88384ef53fe84a880ea76e1d731099ce9851fe02e8f326e2 size 209984517 diff --git a/low-shot-task-specific-100-ex/word-sorting/best_model/rng_state.pth b/low-shot-task-specific-100-ex/word-sorting/best_model/rng_state.pth index 9b4d383cdfd8717456d5b802d5ec9ba07a37a9cd..b2f8941ad5fda2513e44300ebb34a63e22e8debe 100644 --- a/low-shot-task-specific-100-ex/word-sorting/best_model/rng_state.pth +++ b/low-shot-task-specific-100-ex/word-sorting/best_model/rng_state.pth @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:28c03a1d71c19dc35634d295a6d0b4ae49b617b7f63b6772fee61d4c6abab573 +oid sha256:17ebbe15250705722a059f62ad7b9f81e3e45bf6f03e89e568cd932ead3b1f2e size 14575 diff --git a/low-shot-task-specific-100-ex/word-sorting/best_model/scheduler.pt b/low-shot-task-specific-100-ex/word-sorting/best_model/scheduler.pt index 10701e0231c43fd7fbee03df6bea6a9e4d381788..97af97ef41be68d37872e3e436854f56a12660f7 100644 --- a/low-shot-task-specific-100-ex/word-sorting/best_model/scheduler.pt +++ b/low-shot-task-specific-100-ex/word-sorting/best_model/scheduler.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9b63e6b896793d947452e2a5278b1333dd1dcc71d94b37f92a47c6766156ba1e +oid sha256:8aea4ff6d6c72e86d24e872bf7765995d2e2e0abda70fdf4dff06ed25a492666 size 627 diff --git a/low-shot-task-specific-100-ex/word-sorting/best_model/trainer_state.json b/low-shot-task-specific-100-ex/word-sorting/best_model/trainer_state.json index fd7bbbbd5e763361e260a3d8d594b2ceba8121ef..2270c3a09735b2f80bcc9e4163871d1371598002 100644 --- a/low-shot-task-specific-100-ex/word-sorting/best_model/trainer_state.json +++ b/low-shot-task-specific-100-ex/word-sorting/best_model/trainer_state.json @@ -1,95 +1,55 @@ { - "best_metric": 0.0560903362929821, - "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-100-ex/word-sorting/checkpoint-20", - "epoch": 8.0, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 9.6, "eval_steps": 500, - "global_step": 20, + "global_step": 60, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 0.8, - "eval_loss": 0.3754672408103943, - "eval_runtime": 1.8736, - "eval_samples_per_second": 10.675, - "eval_steps_per_second": 1.601, - "step": 2 - }, - { - "epoch": 2.0, - "eval_loss": 0.35889530181884766, - "eval_runtime": 1.8792, - "eval_samples_per_second": 10.643, - "eval_steps_per_second": 1.596, - "step": 5 - }, - { - "epoch": 2.8, - "eval_loss": 0.3372827470302582, - "eval_runtime": 1.8739, - "eval_samples_per_second": 10.673, - "eval_steps_per_second": 1.601, - "step": 7 - }, - { - "epoch": 4.0, - "learning_rate": 5.9999999999999995e-05, - "loss": 0.3948, + "epoch": 1.6, + "learning_rate": 6.666666666666667e-05, + "loss": 0.2757, "step": 10 }, { - "epoch": 4.0, - "eval_loss": 0.272649347782135, - "eval_runtime": 1.8766, - "eval_samples_per_second": 10.658, - "eval_steps_per_second": 1.599, - "step": 10 + "epoch": 3.2, + "learning_rate": 5.333333333333333e-05, + "loss": 0.0592, + "step": 20 }, { "epoch": 4.8, - "eval_loss": 0.2144305408000946, - "eval_runtime": 1.8768, - "eval_samples_per_second": 10.656, - "eval_steps_per_second": 1.598, - "step": 12 - }, - { - "epoch": 6.0, - "eval_loss": 0.11929650604724884, - "eval_runtime": 1.8692, - "eval_samples_per_second": 10.7, - "eval_steps_per_second": 1.605, - "step": 15 + "learning_rate": 4e-05, + "loss": 0.0387, + "step": 30 }, { - "epoch": 6.8, - "eval_loss": 0.06970790028572083, - "eval_runtime": 1.869, - "eval_samples_per_second": 10.701, - "eval_steps_per_second": 1.605, - "step": 17 + "epoch": 6.4, + "learning_rate": 2.6666666666666667e-05, + "loss": 0.0309, + "step": 40 }, { "epoch": 8.0, - "learning_rate": 0.00011999999999999999, - "loss": 0.1551, - "step": 20 + "learning_rate": 1.3333333333333333e-05, + "loss": 0.0243, + "step": 50 }, { - "epoch": 8.0, - "eval_loss": 0.0560903362929821, - "eval_runtime": 1.8656, - "eval_samples_per_second": 10.721, - "eval_steps_per_second": 1.608, - "step": 20 + "epoch": 9.6, + "learning_rate": 0.0, + "loss": 0.0245, + "step": 60 } ], "logging_steps": 10, - "max_steps": 20, + "max_steps": 60, "num_train_epochs": 10, "save_steps": 500, - "total_flos": 8184378271334400.0, + "total_flos": 1.239525325504512e+16, "trial_name": null, "trial_params": null } diff --git a/low-shot-task-specific-100-ex/word-sorting/best_model/training_args.bin b/low-shot-task-specific-100-ex/word-sorting/best_model/training_args.bin index d1bbfa1c279a135c632e653e5d383d86e036022f..4bdf508c3d2c59417b6c951ff9b16e5fc52d2ce5 100644 --- a/low-shot-task-specific-100-ex/word-sorting/best_model/training_args.bin +++ b/low-shot-task-specific-100-ex/word-sorting/best_model/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fc2a8b2e2f9ddeafce47d8123a31984865c2c00940312ef44f0f086aa55882d8 +oid sha256:cbb3c5697845134906855e707ca29a29c08328d8aaae5e277e6a2a225c3dc3c0 size 4091