kpriyanshu256 commited on
Commit
42b7c0f
1 Parent(s): f978574

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-1678/config.json +33 -0
  2. qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-1678/merges.txt +0 -0
  3. qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-1678/optimizer.pt +3 -0
  4. qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-1678/pytorch_model.bin +3 -0
  5. qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-1678/rng_state.pth +3 -0
  6. qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-1678/scaler.pt +3 -0
  7. qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-1678/scheduler.pt +3 -0
  8. qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-1678/special_tokens_map.json +1 -0
  9. qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-1678/tokenizer.json +0 -0
  10. qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-1678/tokenizer_config.json +1 -0
  11. qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-1678/trainer_state.json +43 -0
  12. qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-1678/training_args.bin +3 -0
  13. qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-1678/vocab.json +0 -0
  14. qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-21814/config.json +33 -0
  15. qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-21814/merges.txt +0 -0
  16. qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-21814/optimizer.pt +3 -0
  17. qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-21814/pred.pkl +3 -0
  18. qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-21814/pytorch_model.bin +3 -0
  19. qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-21814/rng_state.pth +3 -0
  20. qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-21814/scaler.pt +3 -0
  21. qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-21814/scheduler.pt +3 -0
  22. qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-21814/special_tokens_map.json +1 -0
  23. qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-21814/tokenizer.json +0 -0
  24. qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-21814/tokenizer_config.json +1 -0
  25. qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-21814/trainer_state.json +391 -0
  26. qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-21814/training_args.bin +3 -0
  27. qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-21814/vocab.json +0 -0
  28. qa_sp_codet5p-220m_s2_latex_bs_lr/pred.pkl +3 -0
  29. qa_sp_codet5p-220m_s2_latex_bs_lr/runs/Apr11_07-47-49_babel-3-9/1712836075.2354758/events.out.tfevents.1712836075.babel-3-9 +3 -0
  30. qa_sp_codet5p-220m_s2_latex_bs_lr/runs/Apr11_07-47-49_babel-3-9/events.out.tfevents.1712836075.babel-3-9 +3 -0
  31. qa_sp_codet5p-220m_s2_latex_bs_lr/runs/Apr11_15-55-12_babel-0-19/1712865318.2280471/events.out.tfevents.1712865318.babel-0-19 +3 -0
  32. qa_sp_codet5p-220m_s2_latex_bs_lr/runs/Apr11_15-55-12_babel-0-19/events.out.tfevents.1712865318.babel-0-19 +3 -0
  33. qa_sp_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-100680/config.json +33 -0
  34. qa_sp_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-100680/merges.txt +0 -0
  35. qa_sp_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-100680/optimizer.pt +3 -0
  36. qa_sp_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-100680/pytorch_model.bin +3 -0
  37. qa_sp_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-100680/rng_state.pth +3 -0
  38. qa_sp_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-100680/scaler.pt +3 -0
  39. qa_sp_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-100680/scheduler.pt +3 -0
  40. qa_sp_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-100680/special_tokens_map.json +1 -0
  41. qa_sp_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-100680/tokenizer.json +0 -0
  42. qa_sp_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-100680/tokenizer_config.json +1 -0
  43. qa_sp_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-100680/trainer_state.json +1762 -0
  44. qa_sp_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-100680/training_args.bin +3 -0
  45. qa_sp_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-100680/vocab.json +0 -0
  46. qa_sp_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-25170/config.json +33 -0
  47. qa_sp_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-25170/merges.txt +0 -0
  48. qa_sp_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-25170/optimizer.pt +3 -0
  49. qa_sp_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-25170/pytorch_model.bin +3 -0
  50. qa_sp_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-25170/rng_state.pth +3 -0
qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-1678/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/data/tir/projects/tir7/user_data/priyansk/qa_s2_codet5p-220m_latex/checkpoint-3300/",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "bos_token_id": 1,
7
+ "d_ff": 3072,
8
+ "d_kv": 64,
9
+ "d_model": 768,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "relu",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 2,
14
+ "feed_forward_proj": "relu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": false,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "max_length": 1024,
20
+ "model_type": "t5",
21
+ "n_positions": 512,
22
+ "num_decoder_layers": 12,
23
+ "num_heads": 12,
24
+ "num_layers": 12,
25
+ "output_past": true,
26
+ "pad_token_id": 0,
27
+ "relative_attention_max_distance": 128,
28
+ "relative_attention_num_buckets": 32,
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.17.0",
31
+ "use_cache": true,
32
+ "vocab_size": 32100
33
+ }
qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-1678/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-1678/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f2b199d835b3a9c417f2f6bb151c52386f73058fe8ca132fc04d80294b787db
3
+ size 1783209146
qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-1678/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf97e52835a8c401265c24d62e79559510665a39a97915ad2b269f7f68d863ed
3
+ size 891647438
qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-1678/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce7a15d3701b734f711ba9447109b1ac15c08b4c5e2a22f3d2f28e6556658a05
3
+ size 14244
qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-1678/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e00d3917cbd509e7f582529318322380daef6622a72b5be923aad777d57ea55c
3
+ size 988
qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-1678/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11233ca0b65b0c788f9190d2470adbe1dc249e59c48e9d1326f16463ee00f585
3
+ size 1064
qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-1678/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, "additional_special_tokens": [{"content": "<extra_id_99>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_98>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_97>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_96>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_95>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_94>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_93>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_92>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_91>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_90>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_89>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_88>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_87>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_86>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_85>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_84>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_83>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_82>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_81>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_80>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_79>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_78>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_77>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_76>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_75>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_74>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_73>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_72>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_71>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_70>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_69>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_68>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_67>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_66>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_65>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_64>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_63>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_62>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_61>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_60>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_59>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_58>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_57>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_56>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_55>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_54>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_53>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_52>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_51>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_50>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_49>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_48>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_47>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_46>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_45>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_44>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_43>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_42>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_41>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_40>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_39>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_38>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_37>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_36>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_35>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_34>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_33>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_32>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_31>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_30>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_29>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_28>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_27>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_26>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_25>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_24>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_23>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_22>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_21>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_20>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_19>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_18>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_17>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_16>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_15>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_14>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_13>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_12>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_11>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_10>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_9>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_8>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_7>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_6>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_5>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_4>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_3>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_2>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_1>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_0>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}]}
qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-1678/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-1678/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"errors": "replace", "bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "add_prefix_space": false, "trim_offsets": true, "model_max_length": 512, "special_tokens_map_file": "/data/datasets/hf_cache/transformers/f432e4eb4a7dfc04b533beea5590e11f4b46c86f5630e8a032704ef76d7269c1.b9905d0575bde443a20834122b6e2d48e853b2e36444ce98ddeb43c38097eb3f", "name_or_path": "/data/tir/projects/tir7/user_data/priyansk/qa_s2_codet5p-220m_latex/checkpoint-3300/", "tokenizer_class": "RobertaTokenizer"}
qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-1678/trainer_state.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 33.3333,
3
+ "best_model_checkpoint": "/data/tir/projects/tir7/user_data/priyansk/qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-1678",
4
+ "epoch": 1.0,
5
+ "global_step": 1678,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.3,
12
+ "learning_rate": 4.9752185141040927e-05,
13
+ "loss": 0.0428,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.6,
18
+ "learning_rate": 4.9504370282081844e-05,
19
+ "loss": 0.042,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 0.89,
24
+ "learning_rate": 4.925655542312277e-05,
25
+ "loss": 0.0417,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 1.0,
30
+ "eval_exact_match": 33.3333,
31
+ "eval_loss": 0.0927141085267067,
32
+ "eval_runtime": 1116.0162,
33
+ "eval_samples_per_second": 0.883,
34
+ "eval_steps_per_second": 0.221,
35
+ "step": 1678
36
+ }
37
+ ],
38
+ "max_steps": 100680,
39
+ "num_train_epochs": 60,
40
+ "total_flos": 8174650722877440.0,
41
+ "trial_name": null,
42
+ "trial_params": null
43
+ }
qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-1678/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:126b84637d570a341b86590116a6a3f0ff46b7abb34f5227dff6f68fb07b880f
3
+ size 3768
qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-1678/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-21814/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/data/tir/projects/tir7/user_data/priyansk/qa_s2_codet5p-220m_latex/checkpoint-3300/",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "bos_token_id": 1,
7
+ "d_ff": 3072,
8
+ "d_kv": 64,
9
+ "d_model": 768,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "relu",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 2,
14
+ "feed_forward_proj": "relu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": false,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "max_length": 1024,
20
+ "model_type": "t5",
21
+ "n_positions": 512,
22
+ "num_decoder_layers": 12,
23
+ "num_heads": 12,
24
+ "num_layers": 12,
25
+ "output_past": true,
26
+ "pad_token_id": 0,
27
+ "relative_attention_max_distance": 128,
28
+ "relative_attention_num_buckets": 32,
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.17.0",
31
+ "use_cache": true,
32
+ "vocab_size": 32100
33
+ }
qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-21814/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-21814/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f11006904fbece98a0f21f4c2c3f036b07feb2ea0f252a55ebb32780f4d847a
3
+ size 1783209146
qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-21814/pred.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6076e044061a19f809efbe5d1357e8ef53cacf56006f799aeb0f41d45ecf455
3
+ size 555166
qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-21814/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93ff01a9e0078e9d58afb2861369404cfb54bfc2ef9e63df6b5b34b16f7187b5
3
+ size 891647438
qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-21814/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64880422f8358043601b73ad01183a70814fcd156d286919679ba4d388340b32
3
+ size 14244
qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-21814/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65884eef3b444cabeea783dae6d489dafb8b53451285c0591213f079c6d79782
3
+ size 988
qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-21814/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdc934e9992a6d47c53c16dc69684e8a27831af98ed6adb63f90cc0459435964
3
+ size 1064
qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-21814/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, "additional_special_tokens": [{"content": "<extra_id_99>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_98>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_97>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_96>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_95>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_94>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_93>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_92>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_91>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_90>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_89>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_88>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_87>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_86>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_85>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_84>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_83>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_82>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_81>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_80>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_79>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_78>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_77>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_76>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_75>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_74>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_73>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_72>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_71>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_70>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_69>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_68>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_67>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_66>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_65>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_64>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_63>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_62>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_61>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_60>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_59>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_58>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_57>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_56>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_55>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_54>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_53>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_52>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_51>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_50>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_49>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_48>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_47>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_46>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_45>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_44>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_43>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_42>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_41>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_40>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_39>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_38>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_37>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_36>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_35>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_34>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_33>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_32>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_31>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_30>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_29>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_28>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_27>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_26>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_25>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_24>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_23>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_22>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_21>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_20>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_19>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_18>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_17>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_16>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_15>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_14>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_13>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_12>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_11>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_10>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_9>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_8>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_7>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_6>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_5>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_4>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_3>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_2>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_1>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_0>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}]}
qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-21814/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-21814/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"errors": "replace", "bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "add_prefix_space": false, "trim_offsets": true, "model_max_length": 512, "special_tokens_map_file": "/data/datasets/hf_cache/transformers/f432e4eb4a7dfc04b533beea5590e11f4b46c86f5630e8a032704ef76d7269c1.b9905d0575bde443a20834122b6e2d48e853b2e36444ce98ddeb43c38097eb3f", "name_or_path": "/data/tir/projects/tir7/user_data/priyansk/qa_s2_codet5p-220m_latex/checkpoint-3300/", "tokenizer_class": "RobertaTokenizer"}
qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-21814/trainer_state.json ADDED
@@ -0,0 +1,391 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 30.8943,
3
+ "best_model_checkpoint": "/data/tir/projects/tir7/user_data/priyansk/qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-10068",
4
+ "epoch": 13.0,
5
+ "global_step": 21814,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.3,
12
+ "learning_rate": 4.9752185141040927e-05,
13
+ "loss": 0.0427,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.6,
18
+ "learning_rate": 4.9504370282081844e-05,
19
+ "loss": 0.0447,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 0.89,
24
+ "learning_rate": 4.925655542312277e-05,
25
+ "loss": 0.0341,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 1.0,
30
+ "eval_exact_match": 28.8618,
31
+ "eval_loss": 0.09518733620643616,
32
+ "eval_runtime": 1968.4827,
33
+ "eval_samples_per_second": 0.5,
34
+ "eval_steps_per_second": 0.125,
35
+ "step": 1678
36
+ },
37
+ {
38
+ "epoch": 1.19,
39
+ "learning_rate": 4.900874056416369e-05,
40
+ "loss": 0.0411,
41
+ "step": 2000
42
+ },
43
+ {
44
+ "epoch": 1.49,
45
+ "learning_rate": 4.876092570520461e-05,
46
+ "loss": 0.0328,
47
+ "step": 2500
48
+ },
49
+ {
50
+ "epoch": 1.79,
51
+ "learning_rate": 4.8513110846245534e-05,
52
+ "loss": 0.0331,
53
+ "step": 3000
54
+ },
55
+ {
56
+ "epoch": 2.0,
57
+ "eval_exact_match": 30.1829,
58
+ "eval_loss": 0.10444394499063492,
59
+ "eval_runtime": 1280.8756,
60
+ "eval_samples_per_second": 0.769,
61
+ "eval_steps_per_second": 0.193,
62
+ "step": 3356
63
+ },
64
+ {
65
+ "epoch": 2.09,
66
+ "learning_rate": 4.826529598728646e-05,
67
+ "loss": 0.03,
68
+ "step": 3500
69
+ },
70
+ {
71
+ "epoch": 2.38,
72
+ "learning_rate": 4.8017481128327376e-05,
73
+ "loss": 0.0294,
74
+ "step": 4000
75
+ },
76
+ {
77
+ "epoch": 2.68,
78
+ "learning_rate": 4.77696662693683e-05,
79
+ "loss": 0.0283,
80
+ "step": 4500
81
+ },
82
+ {
83
+ "epoch": 2.98,
84
+ "learning_rate": 4.752185141040922e-05,
85
+ "loss": 0.027,
86
+ "step": 5000
87
+ },
88
+ {
89
+ "epoch": 3.0,
90
+ "eval_exact_match": 30.7927,
91
+ "eval_loss": 0.10687928646802902,
92
+ "eval_runtime": 1121.635,
93
+ "eval_samples_per_second": 0.878,
94
+ "eval_steps_per_second": 0.22,
95
+ "step": 5034
96
+ },
97
+ {
98
+ "epoch": 3.28,
99
+ "learning_rate": 4.727403655145014e-05,
100
+ "loss": 0.0279,
101
+ "step": 5500
102
+ },
103
+ {
104
+ "epoch": 3.58,
105
+ "learning_rate": 4.7026221692491066e-05,
106
+ "loss": 0.02,
107
+ "step": 6000
108
+ },
109
+ {
110
+ "epoch": 3.87,
111
+ "learning_rate": 4.677840683353199e-05,
112
+ "loss": 0.0196,
113
+ "step": 6500
114
+ },
115
+ {
116
+ "epoch": 4.0,
117
+ "eval_exact_match": 30.3862,
118
+ "eval_loss": 0.10539323091506958,
119
+ "eval_runtime": 1476.3167,
120
+ "eval_samples_per_second": 0.667,
121
+ "eval_steps_per_second": 0.167,
122
+ "step": 6712
123
+ },
124
+ {
125
+ "epoch": 4.17,
126
+ "learning_rate": 4.653059197457291e-05,
127
+ "loss": 0.0207,
128
+ "step": 7000
129
+ },
130
+ {
131
+ "epoch": 4.47,
132
+ "learning_rate": 4.628277711561383e-05,
133
+ "loss": 0.0216,
134
+ "step": 7500
135
+ },
136
+ {
137
+ "epoch": 4.77,
138
+ "learning_rate": 4.6035457886372666e-05,
139
+ "loss": 0.0173,
140
+ "step": 8000
141
+ },
142
+ {
143
+ "epoch": 5.0,
144
+ "eval_exact_match": 30.4878,
145
+ "eval_loss": 0.11340699344873428,
146
+ "eval_runtime": 1428.9544,
147
+ "eval_samples_per_second": 0.689,
148
+ "eval_steps_per_second": 0.173,
149
+ "step": 8390
150
+ },
151
+ {
152
+ "epoch": 5.07,
153
+ "learning_rate": 4.578764302741359e-05,
154
+ "loss": 0.016,
155
+ "step": 8500
156
+ },
157
+ {
158
+ "epoch": 5.36,
159
+ "learning_rate": 4.553982816845451e-05,
160
+ "loss": 0.0162,
161
+ "step": 9000
162
+ },
163
+ {
164
+ "epoch": 5.66,
165
+ "learning_rate": 4.529201330949543e-05,
166
+ "loss": 0.0174,
167
+ "step": 9500
168
+ },
169
+ {
170
+ "epoch": 5.96,
171
+ "learning_rate": 4.504419845053635e-05,
172
+ "loss": 0.0141,
173
+ "step": 10000
174
+ },
175
+ {
176
+ "epoch": 6.0,
177
+ "eval_exact_match": 30.8943,
178
+ "eval_loss": 0.11525732278823853,
179
+ "eval_runtime": 1327.6592,
180
+ "eval_samples_per_second": 0.742,
181
+ "eval_steps_per_second": 0.186,
182
+ "step": 10068
183
+ },
184
+ {
185
+ "epoch": 6.26,
186
+ "learning_rate": 4.479638359157728e-05,
187
+ "loss": 0.0143,
188
+ "step": 10500
189
+ },
190
+ {
191
+ "epoch": 6.56,
192
+ "learning_rate": 4.45485687326182e-05,
193
+ "loss": 0.0133,
194
+ "step": 11000
195
+ },
196
+ {
197
+ "epoch": 6.85,
198
+ "learning_rate": 4.430174513309496e-05,
199
+ "loss": 0.0137,
200
+ "step": 11500
201
+ },
202
+ {
203
+ "epoch": 7.0,
204
+ "eval_exact_match": 29.7764,
205
+ "eval_loss": 0.12133623659610748,
206
+ "eval_runtime": 1913.2501,
207
+ "eval_samples_per_second": 0.515,
208
+ "eval_steps_per_second": 0.129,
209
+ "step": 11746
210
+ },
211
+ {
212
+ "epoch": 7.15,
213
+ "learning_rate": 4.405393027413588e-05,
214
+ "loss": 0.013,
215
+ "step": 12000
216
+ },
217
+ {
218
+ "epoch": 7.45,
219
+ "learning_rate": 4.38061154151768e-05,
220
+ "loss": 0.0108,
221
+ "step": 12500
222
+ },
223
+ {
224
+ "epoch": 7.75,
225
+ "learning_rate": 4.355830055621772e-05,
226
+ "loss": 0.0098,
227
+ "step": 13000
228
+ },
229
+ {
230
+ "epoch": 8.0,
231
+ "eval_exact_match": 29.065,
232
+ "eval_loss": 0.12475814670324326,
233
+ "eval_runtime": 1494.3981,
234
+ "eval_samples_per_second": 0.659,
235
+ "eval_steps_per_second": 0.165,
236
+ "step": 13424
237
+ },
238
+ {
239
+ "epoch": 8.05,
240
+ "learning_rate": 4.331048569725865e-05,
241
+ "loss": 0.0108,
242
+ "step": 13500
243
+ },
244
+ {
245
+ "epoch": 8.34,
246
+ "learning_rate": 4.3062670838299565e-05,
247
+ "loss": 0.0097,
248
+ "step": 14000
249
+ },
250
+ {
251
+ "epoch": 8.64,
252
+ "learning_rate": 4.281485597934049e-05,
253
+ "loss": 0.0106,
254
+ "step": 14500
255
+ },
256
+ {
257
+ "epoch": 8.94,
258
+ "learning_rate": 4.2567041120381406e-05,
259
+ "loss": 0.0079,
260
+ "step": 15000
261
+ },
262
+ {
263
+ "epoch": 9.0,
264
+ "eval_exact_match": 30.6911,
265
+ "eval_loss": 0.12646353244781494,
266
+ "eval_runtime": 1623.2792,
267
+ "eval_samples_per_second": 0.607,
268
+ "eval_steps_per_second": 0.152,
269
+ "step": 15102
270
+ },
271
+ {
272
+ "epoch": 9.24,
273
+ "learning_rate": 4.231922626142233e-05,
274
+ "loss": 0.0081,
275
+ "step": 15500
276
+ },
277
+ {
278
+ "epoch": 9.54,
279
+ "learning_rate": 4.207141140246325e-05,
280
+ "loss": 0.0087,
281
+ "step": 16000
282
+ },
283
+ {
284
+ "epoch": 9.83,
285
+ "learning_rate": 4.182359654350417e-05,
286
+ "loss": 0.0077,
287
+ "step": 16500
288
+ },
289
+ {
290
+ "epoch": 10.0,
291
+ "eval_exact_match": 28.8618,
292
+ "eval_loss": 0.12802913784980774,
293
+ "eval_runtime": 2008.241,
294
+ "eval_samples_per_second": 0.49,
295
+ "eval_steps_per_second": 0.123,
296
+ "step": 16780
297
+ },
298
+ {
299
+ "epoch": 10.13,
300
+ "learning_rate": 4.1576277314263014e-05,
301
+ "loss": 0.008,
302
+ "step": 17000
303
+ },
304
+ {
305
+ "epoch": 10.43,
306
+ "learning_rate": 4.132846245530394e-05,
307
+ "loss": 0.0061,
308
+ "step": 17500
309
+ },
310
+ {
311
+ "epoch": 10.73,
312
+ "learning_rate": 4.1080647596344856e-05,
313
+ "loss": 0.0069,
314
+ "step": 18000
315
+ },
316
+ {
317
+ "epoch": 11.0,
318
+ "eval_exact_match": 28.7602,
319
+ "eval_loss": 0.13084228336811066,
320
+ "eval_runtime": 1715.562,
321
+ "eval_samples_per_second": 0.574,
322
+ "eval_steps_per_second": 0.144,
323
+ "step": 18458
324
+ },
325
+ {
326
+ "epoch": 11.03,
327
+ "learning_rate": 4.083283273738578e-05,
328
+ "loss": 0.007,
329
+ "step": 18500
330
+ },
331
+ {
332
+ "epoch": 11.32,
333
+ "learning_rate": 4.05850178784267e-05,
334
+ "loss": 0.0064,
335
+ "step": 19000
336
+ },
337
+ {
338
+ "epoch": 11.62,
339
+ "learning_rate": 4.033720301946762e-05,
340
+ "loss": 0.0052,
341
+ "step": 19500
342
+ },
343
+ {
344
+ "epoch": 11.92,
345
+ "learning_rate": 4.008938816050854e-05,
346
+ "loss": 0.006,
347
+ "step": 20000
348
+ },
349
+ {
350
+ "epoch": 12.0,
351
+ "eval_exact_match": 27.7439,
352
+ "eval_loss": 0.1305484175682068,
353
+ "eval_runtime": 1246.1348,
354
+ "eval_samples_per_second": 0.79,
355
+ "eval_steps_per_second": 0.198,
356
+ "step": 20136
357
+ },
358
+ {
359
+ "epoch": 12.22,
360
+ "learning_rate": 3.984157330154946e-05,
361
+ "loss": 0.004,
362
+ "step": 20500
363
+ },
364
+ {
365
+ "epoch": 12.51,
366
+ "learning_rate": 3.959375844259039e-05,
367
+ "loss": 0.0048,
368
+ "step": 21000
369
+ },
370
+ {
371
+ "epoch": 12.81,
372
+ "learning_rate": 3.934594358363131e-05,
373
+ "loss": 0.0046,
374
+ "step": 21500
375
+ },
376
+ {
377
+ "epoch": 13.0,
378
+ "eval_exact_match": 29.4715,
379
+ "eval_loss": 0.13410894572734833,
380
+ "eval_runtime": 1841.0978,
381
+ "eval_samples_per_second": 0.535,
382
+ "eval_steps_per_second": 0.134,
383
+ "step": 21814
384
+ }
385
+ ],
386
+ "max_steps": 100680,
387
+ "num_train_epochs": 60,
388
+ "total_flos": 1.0627045939740672e+17,
389
+ "trial_name": null,
390
+ "trial_params": null
391
+ }
qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-21814/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1db2ad5c152c1d55d4570bf9ba2a2c3f13830b4b5dea3e90709ee118a48b1299
3
+ size 3768
qa_sp_codet5p-220m_s2_latex_bs_lr/checkpoint-21814/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
qa_sp_codet5p-220m_s2_latex_bs_lr/pred.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6076e044061a19f809efbe5d1357e8ef53cacf56006f799aeb0f41d45ecf455
3
+ size 555166
qa_sp_codet5p-220m_s2_latex_bs_lr/runs/Apr11_07-47-49_babel-3-9/1712836075.2354758/events.out.tfevents.1712836075.babel-3-9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09a618e80f44b9fa9beb34b1ec14c048ea7a53bade681b91ffdd2ce42c626f45
3
+ size 4981
qa_sp_codet5p-220m_s2_latex_bs_lr/runs/Apr11_07-47-49_babel-3-9/events.out.tfevents.1712836075.babel-3-9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b9281584df95d61044ddff8797e2f329b53cf8da63a67053a063f6d53646569
3
+ size 14856
qa_sp_codet5p-220m_s2_latex_bs_lr/runs/Apr11_15-55-12_babel-0-19/1712865318.2280471/events.out.tfevents.1712865318.babel-0-19 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:237f10a966cbe64a9e25504a0a62c39bff638fe3b8c3eb48cccdf6b55b45d90c
3
+ size 4982
qa_sp_codet5p-220m_s2_latex_bs_lr/runs/Apr11_15-55-12_babel-0-19/events.out.tfevents.1712865318.babel-0-19 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd465648f217e9a3c8d387e7977345264a61a8bd2abd92f3b0cd048fa3b8ebcb
3
+ size 4922
qa_sp_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-100680/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/data/tir/projects/tir7/user_data/priyansk/qa_s2_codet5p-220m_latex/checkpoint-3300/",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "bos_token_id": 1,
7
+ "d_ff": 3072,
8
+ "d_kv": 64,
9
+ "d_model": 768,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "relu",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 2,
14
+ "feed_forward_proj": "relu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": false,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "max_length": 1024,
20
+ "model_type": "t5",
21
+ "n_positions": 512,
22
+ "num_decoder_layers": 12,
23
+ "num_heads": 12,
24
+ "num_layers": 12,
25
+ "output_past": true,
26
+ "pad_token_id": 0,
27
+ "relative_attention_max_distance": 128,
28
+ "relative_attention_num_buckets": 32,
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.17.0",
31
+ "use_cache": true,
32
+ "vocab_size": 32100
33
+ }
qa_sp_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-100680/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
qa_sp_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-100680/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d432afae039e0950348b78b190886d3039edec39a31bb81a481d2e3fd7c3df9
3
+ size 1783209658
qa_sp_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-100680/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4589d0aef1a90459ddc2e00d3c565c4b1516f2d4a5384fad944a3a707f544f78
3
+ size 891647438
qa_sp_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-100680/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be86d8d3bdd062828c6b5f3a37a96d2097dd2fb78acb56db133a00c734927a42
3
+ size 14244
qa_sp_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-100680/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb622555c982d63251635173b31c3aae643dda15a03f9fcc9c073f60b3efba82
3
+ size 988
qa_sp_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-100680/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce0f79d3ceaee3a179770e08eab9dabf3aef7e6160c3bc8173db58196e48f6d5
3
+ size 1064
qa_sp_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-100680/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, "additional_special_tokens": [{"content": "<extra_id_99>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_98>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_97>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_96>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_95>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_94>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_93>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_92>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_91>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_90>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_89>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_88>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_87>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_86>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_85>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_84>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_83>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_82>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_81>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_80>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_79>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_78>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_77>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_76>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_75>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_74>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_73>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_72>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_71>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_70>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_69>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_68>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_67>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_66>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_65>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_64>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_63>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_62>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_61>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_60>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_59>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_58>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_57>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_56>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_55>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_54>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_53>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_52>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_51>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_50>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_49>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_48>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_47>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_46>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_45>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_44>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_43>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_42>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_41>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_40>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_39>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_38>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_37>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_36>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_35>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_34>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_33>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_32>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_31>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_30>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_29>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_28>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_27>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_26>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_25>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_24>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_23>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_22>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_21>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_20>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_19>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_18>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_17>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_16>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_15>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_14>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_13>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_12>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_11>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_10>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_9>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_8>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_7>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_6>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_5>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_4>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_3>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_2>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_1>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_0>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}]}
qa_sp_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-100680/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
qa_sp_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-100680/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"errors": "replace", "bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "add_prefix_space": false, "trim_offsets": true, "model_max_length": 512, "special_tokens_map_file": "/data/datasets/hf_cache/transformers/f432e4eb4a7dfc04b533beea5590e11f4b46c86f5630e8a032704ef76d7269c1.b9905d0575bde443a20834122b6e2d48e853b2e36444ce98ddeb43c38097eb3f", "name_or_path": "/data/tir/projects/tir7/user_data/priyansk/qa_s2_codet5p-220m_latex/checkpoint-3300/", "tokenizer_class": "RobertaTokenizer"}
qa_sp_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-100680/trainer_state.json ADDED
@@ -0,0 +1,1762 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 31.9106,
3
+ "best_model_checkpoint": "/data/tir/projects/tir7/user_data/priyansk/qa_sp_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-25170",
4
+ "epoch": 60.0,
5
+ "global_step": 100680,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.3,
12
+ "learning_rate": 4.9752185141040927e-05,
13
+ "loss": 0.0415,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.6,
18
+ "learning_rate": 4.9504370282081844e-05,
19
+ "loss": 0.0459,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 0.89,
24
+ "learning_rate": 4.925655542312277e-05,
25
+ "loss": 0.0402,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 1.0,
30
+ "eval_exact_match": 28.7602,
31
+ "eval_loss": 0.09363532811403275,
32
+ "eval_runtime": 1196.6462,
33
+ "eval_samples_per_second": 0.823,
34
+ "eval_steps_per_second": 0.206,
35
+ "step": 1678
36
+ },
37
+ {
38
+ "epoch": 1.19,
39
+ "learning_rate": 4.900874056416369e-05,
40
+ "loss": 0.0363,
41
+ "step": 2000
42
+ },
43
+ {
44
+ "epoch": 1.49,
45
+ "learning_rate": 4.876092570520461e-05,
46
+ "loss": 0.0353,
47
+ "step": 2500
48
+ },
49
+ {
50
+ "epoch": 1.79,
51
+ "learning_rate": 4.8513110846245534e-05,
52
+ "loss": 0.032,
53
+ "step": 3000
54
+ },
55
+ {
56
+ "epoch": 2.0,
57
+ "eval_exact_match": 30.9959,
58
+ "eval_loss": 0.10178329050540924,
59
+ "eval_runtime": 1048.7858,
60
+ "eval_samples_per_second": 0.939,
61
+ "eval_steps_per_second": 0.236,
62
+ "step": 3356
63
+ },
64
+ {
65
+ "epoch": 2.09,
66
+ "learning_rate": 4.826529598728646e-05,
67
+ "loss": 0.0306,
68
+ "step": 3500
69
+ },
70
+ {
71
+ "epoch": 2.38,
72
+ "learning_rate": 4.8017481128327376e-05,
73
+ "loss": 0.0256,
74
+ "step": 4000
75
+ },
76
+ {
77
+ "epoch": 2.68,
78
+ "learning_rate": 4.777016189908622e-05,
79
+ "loss": 0.0257,
80
+ "step": 4500
81
+ },
82
+ {
83
+ "epoch": 2.98,
84
+ "learning_rate": 4.7522347040127135e-05,
85
+ "loss": 0.0305,
86
+ "step": 5000
87
+ },
88
+ {
89
+ "epoch": 3.0,
90
+ "eval_exact_match": 30.8943,
91
+ "eval_loss": 0.10773847997188568,
92
+ "eval_runtime": 1007.6522,
93
+ "eval_samples_per_second": 0.978,
94
+ "eval_steps_per_second": 0.245,
95
+ "step": 5034
96
+ },
97
+ {
98
+ "epoch": 3.28,
99
+ "learning_rate": 4.727453218116806e-05,
100
+ "loss": 0.0266,
101
+ "step": 5500
102
+ },
103
+ {
104
+ "epoch": 3.58,
105
+ "learning_rate": 4.7026717322208976e-05,
106
+ "loss": 0.0227,
107
+ "step": 6000
108
+ },
109
+ {
110
+ "epoch": 3.87,
111
+ "learning_rate": 4.67789024632499e-05,
112
+ "loss": 0.0193,
113
+ "step": 6500
114
+ },
115
+ {
116
+ "epoch": 4.0,
117
+ "eval_exact_match": 30.2846,
118
+ "eval_loss": 0.1102108359336853,
119
+ "eval_runtime": 1099.1827,
120
+ "eval_samples_per_second": 0.896,
121
+ "eval_steps_per_second": 0.225,
122
+ "step": 6712
123
+ },
124
+ {
125
+ "epoch": 4.17,
126
+ "learning_rate": 4.6531087604290825e-05,
127
+ "loss": 0.0218,
128
+ "step": 7000
129
+ },
130
+ {
131
+ "epoch": 4.47,
132
+ "learning_rate": 4.628327274533175e-05,
133
+ "loss": 0.0202,
134
+ "step": 7500
135
+ },
136
+ {
137
+ "epoch": 4.77,
138
+ "learning_rate": 4.6035457886372666e-05,
139
+ "loss": 0.0169,
140
+ "step": 8000
141
+ },
142
+ {
143
+ "epoch": 5.0,
144
+ "eval_exact_match": 28.7602,
145
+ "eval_loss": 0.11473795026540756,
146
+ "eval_runtime": 925.7323,
147
+ "eval_samples_per_second": 1.064,
148
+ "eval_steps_per_second": 0.267,
149
+ "step": 8390
150
+ },
151
+ {
152
+ "epoch": 5.07,
153
+ "learning_rate": 4.578764302741359e-05,
154
+ "loss": 0.018,
155
+ "step": 8500
156
+ },
157
+ {
158
+ "epoch": 5.36,
159
+ "learning_rate": 4.554032379817243e-05,
160
+ "loss": 0.0136,
161
+ "step": 9000
162
+ },
163
+ {
164
+ "epoch": 5.66,
165
+ "learning_rate": 4.529250893921335e-05,
166
+ "loss": 0.0152,
167
+ "step": 9500
168
+ },
169
+ {
170
+ "epoch": 5.96,
171
+ "learning_rate": 4.5044694080254274e-05,
172
+ "loss": 0.0164,
173
+ "step": 10000
174
+ },
175
+ {
176
+ "epoch": 6.0,
177
+ "eval_exact_match": 30.3862,
178
+ "eval_loss": 0.11702010035514832,
179
+ "eval_runtime": 731.1347,
180
+ "eval_samples_per_second": 1.347,
181
+ "eval_steps_per_second": 0.338,
182
+ "step": 10068
183
+ },
184
+ {
185
+ "epoch": 6.26,
186
+ "learning_rate": 4.479687922129519e-05,
187
+ "loss": 0.0143,
188
+ "step": 10500
189
+ },
190
+ {
191
+ "epoch": 6.56,
192
+ "learning_rate": 4.4549064362336116e-05,
193
+ "loss": 0.0168,
194
+ "step": 11000
195
+ },
196
+ {
197
+ "epoch": 6.85,
198
+ "learning_rate": 4.430124950337704e-05,
199
+ "loss": 0.0107,
200
+ "step": 11500
201
+ },
202
+ {
203
+ "epoch": 7.0,
204
+ "eval_exact_match": 30.3862,
205
+ "eval_loss": 0.11999832093715668,
206
+ "eval_runtime": 829.5941,
207
+ "eval_samples_per_second": 1.187,
208
+ "eval_steps_per_second": 0.298,
209
+ "step": 11746
210
+ },
211
+ {
212
+ "epoch": 7.15,
213
+ "learning_rate": 4.4053434644417964e-05,
214
+ "loss": 0.0129,
215
+ "step": 12000
216
+ },
217
+ {
218
+ "epoch": 7.45,
219
+ "learning_rate": 4.38061154151768e-05,
220
+ "loss": 0.0101,
221
+ "step": 12500
222
+ },
223
+ {
224
+ "epoch": 7.75,
225
+ "learning_rate": 4.355830055621772e-05,
226
+ "loss": 0.0106,
227
+ "step": 13000
228
+ },
229
+ {
230
+ "epoch": 8.0,
231
+ "eval_exact_match": 31.3008,
232
+ "eval_loss": 0.12672394514083862,
233
+ "eval_runtime": 812.293,
234
+ "eval_samples_per_second": 1.213,
235
+ "eval_steps_per_second": 0.304,
236
+ "step": 13424
237
+ },
238
+ {
239
+ "epoch": 8.05,
240
+ "learning_rate": 4.331048569725865e-05,
241
+ "loss": 0.0111,
242
+ "step": 13500
243
+ },
244
+ {
245
+ "epoch": 8.34,
246
+ "learning_rate": 4.3062670838299565e-05,
247
+ "loss": 0.0105,
248
+ "step": 14000
249
+ },
250
+ {
251
+ "epoch": 8.64,
252
+ "learning_rate": 4.281485597934049e-05,
253
+ "loss": 0.0087,
254
+ "step": 14500
255
+ },
256
+ {
257
+ "epoch": 8.94,
258
+ "learning_rate": 4.2567041120381406e-05,
259
+ "loss": 0.0097,
260
+ "step": 15000
261
+ },
262
+ {
263
+ "epoch": 9.0,
264
+ "eval_exact_match": 31.1992,
265
+ "eval_loss": 0.12798507511615753,
266
+ "eval_runtime": 856.8147,
267
+ "eval_samples_per_second": 1.15,
268
+ "eval_steps_per_second": 0.288,
269
+ "step": 15102
270
+ },
271
+ {
272
+ "epoch": 9.24,
273
+ "learning_rate": 4.231922626142233e-05,
274
+ "loss": 0.0086,
275
+ "step": 15500
276
+ },
277
+ {
278
+ "epoch": 9.54,
279
+ "learning_rate": 4.207141140246325e-05,
280
+ "loss": 0.0067,
281
+ "step": 16000
282
+ },
283
+ {
284
+ "epoch": 9.83,
285
+ "learning_rate": 4.1824092173222096e-05,
286
+ "loss": 0.008,
287
+ "step": 16500
288
+ },
289
+ {
290
+ "epoch": 10.0,
291
+ "eval_exact_match": 29.9797,
292
+ "eval_loss": 0.12833493947982788,
293
+ "eval_runtime": 1066.2236,
294
+ "eval_samples_per_second": 0.924,
295
+ "eval_steps_per_second": 0.232,
296
+ "step": 16780
297
+ },
298
+ {
299
+ "epoch": 10.13,
300
+ "learning_rate": 4.1576277314263014e-05,
301
+ "loss": 0.0075,
302
+ "step": 17000
303
+ },
304
+ {
305
+ "epoch": 10.43,
306
+ "learning_rate": 4.132846245530394e-05,
307
+ "loss": 0.0058,
308
+ "step": 17500
309
+ },
310
+ {
311
+ "epoch": 10.73,
312
+ "learning_rate": 4.1080647596344856e-05,
313
+ "loss": 0.0073,
314
+ "step": 18000
315
+ },
316
+ {
317
+ "epoch": 11.0,
318
+ "eval_exact_match": 28.9634,
319
+ "eval_loss": 0.13248516619205475,
320
+ "eval_runtime": 725.665,
321
+ "eval_samples_per_second": 1.357,
322
+ "eval_steps_per_second": 0.34,
323
+ "step": 18458
324
+ },
325
+ {
326
+ "epoch": 11.03,
327
+ "learning_rate": 4.083283273738578e-05,
328
+ "loss": 0.0076,
329
+ "step": 18500
330
+ },
331
+ {
332
+ "epoch": 11.32,
333
+ "learning_rate": 4.05850178784267e-05,
334
+ "loss": 0.0055,
335
+ "step": 19000
336
+ },
337
+ {
338
+ "epoch": 11.62,
339
+ "learning_rate": 4.033720301946762e-05,
340
+ "loss": 0.0048,
341
+ "step": 19500
342
+ },
343
+ {
344
+ "epoch": 11.92,
345
+ "learning_rate": 4.008938816050854e-05,
346
+ "loss": 0.006,
347
+ "step": 20000
348
+ },
349
+ {
350
+ "epoch": 12.0,
351
+ "eval_exact_match": 27.8455,
352
+ "eval_loss": 0.13396009802818298,
353
+ "eval_runtime": 912.143,
354
+ "eval_samples_per_second": 1.08,
355
+ "eval_steps_per_second": 0.271,
356
+ "step": 20136
357
+ },
358
+ {
359
+ "epoch": 12.22,
360
+ "learning_rate": 3.984206893126738e-05,
361
+ "loss": 0.0051,
362
+ "step": 20500
363
+ },
364
+ {
365
+ "epoch": 12.51,
366
+ "learning_rate": 3.959425407230831e-05,
367
+ "loss": 0.0043,
368
+ "step": 21000
369
+ },
370
+ {
371
+ "epoch": 12.81,
372
+ "learning_rate": 3.934643921334923e-05,
373
+ "loss": 0.0052,
374
+ "step": 21500
375
+ },
376
+ {
377
+ "epoch": 13.0,
378
+ "eval_exact_match": 29.065,
379
+ "eval_loss": 0.1362345665693283,
380
+ "eval_runtime": 900.0703,
381
+ "eval_samples_per_second": 1.094,
382
+ "eval_steps_per_second": 0.274,
383
+ "step": 21814
384
+ },
385
+ {
386
+ "epoch": 13.11,
387
+ "learning_rate": 3.9099119984108064e-05,
388
+ "loss": 0.0055,
389
+ "step": 22000
390
+ },
391
+ {
392
+ "epoch": 13.41,
393
+ "learning_rate": 3.885130512514899e-05,
394
+ "loss": 0.0045,
395
+ "step": 22500
396
+ },
397
+ {
398
+ "epoch": 13.71,
399
+ "learning_rate": 3.860349026618991e-05,
400
+ "loss": 0.004,
401
+ "step": 23000
402
+ },
403
+ {
404
+ "epoch": 14.0,
405
+ "eval_exact_match": 28.8618,
406
+ "eval_loss": 0.1380210816860199,
407
+ "eval_runtime": 947.4267,
408
+ "eval_samples_per_second": 1.04,
409
+ "eval_steps_per_second": 0.261,
410
+ "step": 23492
411
+ },
412
+ {
413
+ "epoch": 14.0,
414
+ "learning_rate": 3.835567540723083e-05,
415
+ "loss": 0.0036,
416
+ "step": 23500
417
+ },
418
+ {
419
+ "epoch": 14.3,
420
+ "learning_rate": 3.8107860548271754e-05,
421
+ "loss": 0.0045,
422
+ "step": 24000
423
+ },
424
+ {
425
+ "epoch": 14.6,
426
+ "learning_rate": 3.786004568931268e-05,
427
+ "loss": 0.0029,
428
+ "step": 24500
429
+ },
430
+ {
431
+ "epoch": 14.9,
432
+ "learning_rate": 3.761272646007151e-05,
433
+ "loss": 0.0034,
434
+ "step": 25000
435
+ },
436
+ {
437
+ "epoch": 15.0,
438
+ "eval_exact_match": 31.9106,
439
+ "eval_loss": 0.13853412866592407,
440
+ "eval_runtime": 978.5191,
441
+ "eval_samples_per_second": 1.007,
442
+ "eval_steps_per_second": 0.252,
443
+ "step": 25170
444
+ },
445
+ {
446
+ "epoch": 15.2,
447
+ "learning_rate": 3.736491160111244e-05,
448
+ "loss": 0.0021,
449
+ "step": 25500
450
+ },
451
+ {
452
+ "epoch": 15.49,
453
+ "learning_rate": 3.7117096742153355e-05,
454
+ "loss": 0.0038,
455
+ "step": 26000
456
+ },
457
+ {
458
+ "epoch": 15.79,
459
+ "learning_rate": 3.686928188319428e-05,
460
+ "loss": 0.0029,
461
+ "step": 26500
462
+ },
463
+ {
464
+ "epoch": 16.0,
465
+ "eval_exact_match": 29.5732,
466
+ "eval_loss": 0.1424587368965149,
467
+ "eval_runtime": 1082.2714,
468
+ "eval_samples_per_second": 0.91,
469
+ "eval_steps_per_second": 0.228,
470
+ "step": 26848
471
+ },
472
+ {
473
+ "epoch": 16.09,
474
+ "learning_rate": 3.6621467024235196e-05,
475
+ "loss": 0.0026,
476
+ "step": 27000
477
+ },
478
+ {
479
+ "epoch": 16.39,
480
+ "learning_rate": 3.637365216527613e-05,
481
+ "loss": 0.0027,
482
+ "step": 27500
483
+ },
484
+ {
485
+ "epoch": 16.69,
486
+ "learning_rate": 3.6125837306317045e-05,
487
+ "loss": 0.0031,
488
+ "step": 28000
489
+ },
490
+ {
491
+ "epoch": 16.98,
492
+ "learning_rate": 3.587802244735797e-05,
493
+ "loss": 0.0026,
494
+ "step": 28500
495
+ },
496
+ {
497
+ "epoch": 17.0,
498
+ "eval_exact_match": 27.7439,
499
+ "eval_loss": 0.14451779425144196,
500
+ "eval_runtime": 1009.6445,
501
+ "eval_samples_per_second": 0.976,
502
+ "eval_steps_per_second": 0.245,
503
+ "step": 28526
504
+ },
505
+ {
506
+ "epoch": 17.28,
507
+ "learning_rate": 3.5630207588398886e-05,
508
+ "loss": 0.0022,
509
+ "step": 29000
510
+ },
511
+ {
512
+ "epoch": 17.58,
513
+ "learning_rate": 3.538288835915773e-05,
514
+ "loss": 0.0022,
515
+ "step": 29500
516
+ },
517
+ {
518
+ "epoch": 17.88,
519
+ "learning_rate": 3.513507350019865e-05,
520
+ "loss": 0.0023,
521
+ "step": 30000
522
+ },
523
+ {
524
+ "epoch": 18.0,
525
+ "eval_exact_match": 29.4715,
526
+ "eval_loss": 0.14251776039600372,
527
+ "eval_runtime": 887.5576,
528
+ "eval_samples_per_second": 1.11,
529
+ "eval_steps_per_second": 0.278,
530
+ "step": 30204
531
+ },
532
+ {
533
+ "epoch": 18.18,
534
+ "learning_rate": 3.488725864123957e-05,
535
+ "loss": 0.0024,
536
+ "step": 30500
537
+ },
538
+ {
539
+ "epoch": 18.47,
540
+ "learning_rate": 3.4639443782280494e-05,
541
+ "loss": 0.0018,
542
+ "step": 31000
543
+ },
544
+ {
545
+ "epoch": 18.77,
546
+ "learning_rate": 3.439162892332141e-05,
547
+ "loss": 0.0024,
548
+ "step": 31500
549
+ },
550
+ {
551
+ "epoch": 19.0,
552
+ "eval_exact_match": 31.3008,
553
+ "eval_loss": 0.14469841122627258,
554
+ "eval_runtime": 877.9702,
555
+ "eval_samples_per_second": 1.122,
556
+ "eval_steps_per_second": 0.281,
557
+ "step": 31882
558
+ },
559
+ {
560
+ "epoch": 19.07,
561
+ "learning_rate": 3.414381406436234e-05,
562
+ "loss": 0.0021,
563
+ "step": 32000
564
+ },
565
+ {
566
+ "epoch": 19.37,
567
+ "learning_rate": 3.389649483512118e-05,
568
+ "loss": 0.0018,
569
+ "step": 32500
570
+ },
571
+ {
572
+ "epoch": 19.67,
573
+ "learning_rate": 3.36486799761621e-05,
574
+ "loss": 0.0018,
575
+ "step": 33000
576
+ },
577
+ {
578
+ "epoch": 19.96,
579
+ "learning_rate": 3.340136074692094e-05,
580
+ "loss": 0.0021,
581
+ "step": 33500
582
+ },
583
+ {
584
+ "epoch": 20.0,
585
+ "eval_exact_match": 28.9634,
586
+ "eval_loss": 0.14738105237483978,
587
+ "eval_runtime": 873.8193,
588
+ "eval_samples_per_second": 1.127,
589
+ "eval_steps_per_second": 0.283,
590
+ "step": 33560
591
+ },
592
+ {
593
+ "epoch": 20.26,
594
+ "learning_rate": 3.315354588796186e-05,
595
+ "loss": 0.0016,
596
+ "step": 34000
597
+ },
598
+ {
599
+ "epoch": 20.56,
600
+ "learning_rate": 3.2905731029002785e-05,
601
+ "loss": 0.0017,
602
+ "step": 34500
603
+ },
604
+ {
605
+ "epoch": 20.86,
606
+ "learning_rate": 3.26579161700437e-05,
607
+ "loss": 0.0017,
608
+ "step": 35000
609
+ },
610
+ {
611
+ "epoch": 21.0,
612
+ "eval_exact_match": 29.6748,
613
+ "eval_loss": 0.14746956527233124,
614
+ "eval_runtime": 750.7375,
615
+ "eval_samples_per_second": 1.312,
616
+ "eval_steps_per_second": 0.329,
617
+ "step": 35238
618
+ },
619
+ {
620
+ "epoch": 21.16,
621
+ "learning_rate": 3.241010131108463e-05,
622
+ "loss": 0.0015,
623
+ "step": 35500
624
+ },
625
+ {
626
+ "epoch": 21.45,
627
+ "learning_rate": 3.216228645212555e-05,
628
+ "loss": 0.0016,
629
+ "step": 36000
630
+ },
631
+ {
632
+ "epoch": 21.75,
633
+ "learning_rate": 3.1914471593166475e-05,
634
+ "loss": 0.0014,
635
+ "step": 36500
636
+ },
637
+ {
638
+ "epoch": 22.0,
639
+ "eval_exact_match": 28.1504,
640
+ "eval_loss": 0.14879275858402252,
641
+ "eval_runtime": 1523.8109,
642
+ "eval_samples_per_second": 0.646,
643
+ "eval_steps_per_second": 0.162,
644
+ "step": 36916
645
+ },
646
+ {
647
+ "epoch": 22.05,
648
+ "learning_rate": 3.166665673420739e-05,
649
+ "loss": 0.0015,
650
+ "step": 37000
651
+ },
652
+ {
653
+ "epoch": 22.35,
654
+ "learning_rate": 3.1418841875248316e-05,
655
+ "loss": 0.0014,
656
+ "step": 37500
657
+ },
658
+ {
659
+ "epoch": 22.65,
660
+ "learning_rate": 3.1171027016289234e-05,
661
+ "loss": 0.0013,
662
+ "step": 38000
663
+ },
664
+ {
665
+ "epoch": 22.94,
666
+ "learning_rate": 3.092321215733016e-05,
667
+ "loss": 0.0014,
668
+ "step": 38500
669
+ },
670
+ {
671
+ "epoch": 23.0,
672
+ "eval_exact_match": 28.1504,
673
+ "eval_loss": 0.149652898311615,
674
+ "eval_runtime": 1033.6604,
675
+ "eval_samples_per_second": 0.953,
676
+ "eval_steps_per_second": 0.239,
677
+ "step": 38594
678
+ },
679
+ {
680
+ "epoch": 23.24,
681
+ "learning_rate": 3.0675397298371075e-05,
682
+ "loss": 0.0011,
683
+ "step": 39000
684
+ },
685
+ {
686
+ "epoch": 23.54,
687
+ "learning_rate": 3.042807806912991e-05,
688
+ "loss": 0.0016,
689
+ "step": 39500
690
+ },
691
+ {
692
+ "epoch": 23.84,
693
+ "learning_rate": 3.0180263210170838e-05,
694
+ "loss": 0.0012,
695
+ "step": 40000
696
+ },
697
+ {
698
+ "epoch": 24.0,
699
+ "eval_exact_match": 28.6585,
700
+ "eval_loss": 0.15041251480579376,
701
+ "eval_runtime": 944.0607,
702
+ "eval_samples_per_second": 1.043,
703
+ "eval_steps_per_second": 0.262,
704
+ "step": 40272
705
+ },
706
+ {
707
+ "epoch": 24.14,
708
+ "learning_rate": 2.9932448351211762e-05,
709
+ "loss": 0.001,
710
+ "step": 40500
711
+ },
712
+ {
713
+ "epoch": 24.43,
714
+ "learning_rate": 2.9684633492252683e-05,
715
+ "loss": 0.0011,
716
+ "step": 41000
717
+ },
718
+ {
719
+ "epoch": 24.73,
720
+ "learning_rate": 2.9436818633293607e-05,
721
+ "loss": 0.0012,
722
+ "step": 41500
723
+ },
724
+ {
725
+ "epoch": 25.0,
726
+ "eval_exact_match": 28.9634,
727
+ "eval_loss": 0.1517140120267868,
728
+ "eval_runtime": 1034.4416,
729
+ "eval_samples_per_second": 0.952,
730
+ "eval_steps_per_second": 0.239,
731
+ "step": 41950
732
+ },
733
+ {
734
+ "epoch": 25.03,
735
+ "learning_rate": 2.918900377433452e-05,
736
+ "loss": 0.0011,
737
+ "step": 42000
738
+ },
739
+ {
740
+ "epoch": 25.33,
741
+ "learning_rate": 2.8941188915375445e-05,
742
+ "loss": 0.0012,
743
+ "step": 42500
744
+ },
745
+ {
746
+ "epoch": 25.63,
747
+ "learning_rate": 2.8693374056416366e-05,
748
+ "loss": 0.0011,
749
+ "step": 43000
750
+ },
751
+ {
752
+ "epoch": 25.92,
753
+ "learning_rate": 2.844555919745729e-05,
754
+ "loss": 0.0009,
755
+ "step": 43500
756
+ },
757
+ {
758
+ "epoch": 26.0,
759
+ "eval_exact_match": 26.8293,
760
+ "eval_loss": 0.15073440968990326,
761
+ "eval_runtime": 956.8294,
762
+ "eval_samples_per_second": 1.029,
763
+ "eval_steps_per_second": 0.258,
764
+ "step": 43628
765
+ },
766
+ {
767
+ "epoch": 26.22,
768
+ "learning_rate": 2.819823996821613e-05,
769
+ "loss": 0.0011,
770
+ "step": 44000
771
+ },
772
+ {
773
+ "epoch": 26.52,
774
+ "learning_rate": 2.795042510925705e-05,
775
+ "loss": 0.0011,
776
+ "step": 44500
777
+ },
778
+ {
779
+ "epoch": 26.82,
780
+ "learning_rate": 2.7702610250297977e-05,
781
+ "loss": 0.0008,
782
+ "step": 45000
783
+ },
784
+ {
785
+ "epoch": 27.0,
786
+ "eval_exact_match": 28.8618,
787
+ "eval_loss": 0.1522374302148819,
788
+ "eval_runtime": 1118.1764,
789
+ "eval_samples_per_second": 0.881,
790
+ "eval_steps_per_second": 0.221,
791
+ "step": 45306
792
+ },
793
+ {
794
+ "epoch": 27.12,
795
+ "learning_rate": 2.74547953913389e-05,
796
+ "loss": 0.001,
797
+ "step": 45500
798
+ },
799
+ {
800
+ "epoch": 27.41,
801
+ "learning_rate": 2.7207476162097733e-05,
802
+ "loss": 0.0008,
803
+ "step": 46000
804
+ },
805
+ {
806
+ "epoch": 27.71,
807
+ "learning_rate": 2.6959661303138657e-05,
808
+ "loss": 0.0009,
809
+ "step": 46500
810
+ },
811
+ {
812
+ "epoch": 28.0,
813
+ "eval_exact_match": 29.065,
814
+ "eval_loss": 0.1529761403799057,
815
+ "eval_runtime": 885.6394,
816
+ "eval_samples_per_second": 1.112,
817
+ "eval_steps_per_second": 0.279,
818
+ "step": 46984
819
+ },
820
+ {
821
+ "epoch": 28.01,
822
+ "learning_rate": 2.671184644417957e-05,
823
+ "loss": 0.0007,
824
+ "step": 47000
825
+ },
826
+ {
827
+ "epoch": 28.31,
828
+ "learning_rate": 2.64640315852205e-05,
829
+ "loss": 0.0007,
830
+ "step": 47500
831
+ },
832
+ {
833
+ "epoch": 28.61,
834
+ "learning_rate": 2.6216216726261423e-05,
835
+ "loss": 0.0008,
836
+ "step": 48000
837
+ },
838
+ {
839
+ "epoch": 28.9,
840
+ "learning_rate": 2.596889749702026e-05,
841
+ "loss": 0.0008,
842
+ "step": 48500
843
+ },
844
+ {
845
+ "epoch": 29.0,
846
+ "eval_exact_match": 29.065,
847
+ "eval_loss": 0.15399600565433502,
848
+ "eval_runtime": 1034.2005,
849
+ "eval_samples_per_second": 0.952,
850
+ "eval_steps_per_second": 0.239,
851
+ "step": 48662
852
+ },
853
+ {
854
+ "epoch": 29.2,
855
+ "learning_rate": 2.5721082638061182e-05,
856
+ "loss": 0.0007,
857
+ "step": 49000
858
+ },
859
+ {
860
+ "epoch": 29.5,
861
+ "learning_rate": 2.54732677791021e-05,
862
+ "loss": 0.0008,
863
+ "step": 49500
864
+ },
865
+ {
866
+ "epoch": 29.8,
867
+ "learning_rate": 2.5225948549860945e-05,
868
+ "loss": 0.0007,
869
+ "step": 50000
870
+ },
871
+ {
872
+ "epoch": 30.0,
873
+ "eval_exact_match": 29.2683,
874
+ "eval_loss": 0.1526673436164856,
875
+ "eval_runtime": 1244.1541,
876
+ "eval_samples_per_second": 0.792,
877
+ "eval_steps_per_second": 0.199,
878
+ "step": 50340
879
+ },
880
+ {
881
+ "epoch": 30.1,
882
+ "learning_rate": 2.4978133690901865e-05,
883
+ "loss": 0.0007,
884
+ "step": 50500
885
+ },
886
+ {
887
+ "epoch": 30.39,
888
+ "learning_rate": 2.4730318831942786e-05,
889
+ "loss": 0.0007,
890
+ "step": 51000
891
+ },
892
+ {
893
+ "epoch": 30.69,
894
+ "learning_rate": 2.4482999602701628e-05,
895
+ "loss": 0.0007,
896
+ "step": 51500
897
+ },
898
+ {
899
+ "epoch": 30.99,
900
+ "learning_rate": 2.423518474374255e-05,
901
+ "loss": 0.0006,
902
+ "step": 52000
903
+ },
904
+ {
905
+ "epoch": 31.0,
906
+ "eval_exact_match": 27.8455,
907
+ "eval_loss": 0.15256962180137634,
908
+ "eval_runtime": 917.3541,
909
+ "eval_samples_per_second": 1.074,
910
+ "eval_steps_per_second": 0.269,
911
+ "step": 52018
912
+ },
913
+ {
914
+ "epoch": 31.29,
915
+ "learning_rate": 2.3987369884783473e-05,
916
+ "loss": 0.0007,
917
+ "step": 52500
918
+ },
919
+ {
920
+ "epoch": 31.59,
921
+ "learning_rate": 2.3739555025824394e-05,
922
+ "loss": 0.0005,
923
+ "step": 53000
924
+ },
925
+ {
926
+ "epoch": 31.88,
927
+ "learning_rate": 2.3491740166865315e-05,
928
+ "loss": 0.0006,
929
+ "step": 53500
930
+ },
931
+ {
932
+ "epoch": 32.0,
933
+ "eval_exact_match": 28.3537,
934
+ "eval_loss": 0.15374213457107544,
935
+ "eval_runtime": 1281.8169,
936
+ "eval_samples_per_second": 0.768,
937
+ "eval_steps_per_second": 0.193,
938
+ "step": 53696
939
+ },
940
+ {
941
+ "epoch": 32.18,
942
+ "learning_rate": 2.324392530790624e-05,
943
+ "loss": 0.0005,
944
+ "step": 54000
945
+ },
946
+ {
947
+ "epoch": 32.48,
948
+ "learning_rate": 2.299611044894716e-05,
949
+ "loss": 0.0005,
950
+ "step": 54500
951
+ },
952
+ {
953
+ "epoch": 32.78,
954
+ "learning_rate": 2.274829558998808e-05,
955
+ "loss": 0.0006,
956
+ "step": 55000
957
+ },
958
+ {
959
+ "epoch": 33.0,
960
+ "eval_exact_match": 28.1504,
961
+ "eval_loss": 0.15262377262115479,
962
+ "eval_runtime": 1067.8103,
963
+ "eval_samples_per_second": 0.922,
964
+ "eval_steps_per_second": 0.231,
965
+ "step": 55374
966
+ },
967
+ {
968
+ "epoch": 33.08,
969
+ "learning_rate": 2.2500480731029e-05,
970
+ "loss": 0.0006,
971
+ "step": 55500
972
+ },
973
+ {
974
+ "epoch": 33.37,
975
+ "learning_rate": 2.2252665872069922e-05,
976
+ "loss": 0.0005,
977
+ "step": 56000
978
+ },
979
+ {
980
+ "epoch": 33.67,
981
+ "learning_rate": 2.2004851013110846e-05,
982
+ "loss": 0.0005,
983
+ "step": 56500
984
+ },
985
+ {
986
+ "epoch": 33.97,
987
+ "learning_rate": 2.1757036154151767e-05,
988
+ "loss": 0.0006,
989
+ "step": 57000
990
+ },
991
+ {
992
+ "epoch": 34.0,
993
+ "eval_exact_match": 28.1504,
994
+ "eval_loss": 0.1557828038930893,
995
+ "eval_runtime": 795.8492,
996
+ "eval_samples_per_second": 1.238,
997
+ "eval_steps_per_second": 0.31,
998
+ "step": 57052
999
+ },
1000
+ {
1001
+ "epoch": 34.27,
1002
+ "learning_rate": 2.150971692491061e-05,
1003
+ "loss": 0.0004,
1004
+ "step": 57500
1005
+ },
1006
+ {
1007
+ "epoch": 34.56,
1008
+ "learning_rate": 2.126190206595153e-05,
1009
+ "loss": 0.0005,
1010
+ "step": 58000
1011
+ },
1012
+ {
1013
+ "epoch": 34.86,
1014
+ "learning_rate": 2.101408720699245e-05,
1015
+ "loss": 0.0004,
1016
+ "step": 58500
1017
+ },
1018
+ {
1019
+ "epoch": 35.0,
1020
+ "eval_exact_match": 29.4715,
1021
+ "eval_loss": 0.15758809447288513,
1022
+ "eval_runtime": 1178.0996,
1023
+ "eval_samples_per_second": 0.836,
1024
+ "eval_steps_per_second": 0.21,
1025
+ "step": 58730
1026
+ },
1027
+ {
1028
+ "epoch": 35.16,
1029
+ "learning_rate": 2.076627234803337e-05,
1030
+ "loss": 0.0005,
1031
+ "step": 59000
1032
+ },
1033
+ {
1034
+ "epoch": 35.46,
1035
+ "learning_rate": 2.0518457489074292e-05,
1036
+ "loss": 0.0004,
1037
+ "step": 59500
1038
+ },
1039
+ {
1040
+ "epoch": 35.76,
1041
+ "learning_rate": 2.0270642630115213e-05,
1042
+ "loss": 0.0004,
1043
+ "step": 60000
1044
+ },
1045
+ {
1046
+ "epoch": 36.0,
1047
+ "eval_exact_match": 30.9959,
1048
+ "eval_loss": 0.15651802718639374,
1049
+ "eval_runtime": 827.9283,
1050
+ "eval_samples_per_second": 1.19,
1051
+ "eval_steps_per_second": 0.298,
1052
+ "step": 60408
1053
+ },
1054
+ {
1055
+ "epoch": 36.05,
1056
+ "learning_rate": 2.0022827771156134e-05,
1057
+ "loss": 0.0004,
1058
+ "step": 60500
1059
+ },
1060
+ {
1061
+ "epoch": 36.35,
1062
+ "learning_rate": 1.977501291219706e-05,
1063
+ "loss": 0.0004,
1064
+ "step": 61000
1065
+ },
1066
+ {
1067
+ "epoch": 36.65,
1068
+ "learning_rate": 1.9527198053237982e-05,
1069
+ "loss": 0.0004,
1070
+ "step": 61500
1071
+ },
1072
+ {
1073
+ "epoch": 36.95,
1074
+ "learning_rate": 1.9279878823996824e-05,
1075
+ "loss": 0.0004,
1076
+ "step": 62000
1077
+ },
1078
+ {
1079
+ "epoch": 37.0,
1080
+ "eval_exact_match": 29.2683,
1081
+ "eval_loss": 0.15583892166614532,
1082
+ "eval_runtime": 962.3074,
1083
+ "eval_samples_per_second": 1.024,
1084
+ "eval_steps_per_second": 0.257,
1085
+ "step": 62086
1086
+ },
1087
+ {
1088
+ "epoch": 37.25,
1089
+ "learning_rate": 1.9032063965037745e-05,
1090
+ "loss": 0.0004,
1091
+ "step": 62500
1092
+ },
1093
+ {
1094
+ "epoch": 37.54,
1095
+ "learning_rate": 1.8784249106078665e-05,
1096
+ "loss": 0.0003,
1097
+ "step": 63000
1098
+ },
1099
+ {
1100
+ "epoch": 37.84,
1101
+ "learning_rate": 1.8536434247119586e-05,
1102
+ "loss": 0.0003,
1103
+ "step": 63500
1104
+ },
1105
+ {
1106
+ "epoch": 38.0,
1107
+ "eval_exact_match": 29.9797,
1108
+ "eval_loss": 0.15561942756175995,
1109
+ "eval_runtime": 1021.3453,
1110
+ "eval_samples_per_second": 0.964,
1111
+ "eval_steps_per_second": 0.242,
1112
+ "step": 63764
1113
+ },
1114
+ {
1115
+ "epoch": 38.14,
1116
+ "learning_rate": 1.8288619388160507e-05,
1117
+ "loss": 0.0003,
1118
+ "step": 64000
1119
+ },
1120
+ {
1121
+ "epoch": 38.44,
1122
+ "learning_rate": 1.804130015891935e-05,
1123
+ "loss": 0.0003,
1124
+ "step": 64500
1125
+ },
1126
+ {
1127
+ "epoch": 38.74,
1128
+ "learning_rate": 1.779348529996027e-05,
1129
+ "loss": 0.0003,
1130
+ "step": 65000
1131
+ },
1132
+ {
1133
+ "epoch": 39.0,
1134
+ "eval_exact_match": 31.1992,
1135
+ "eval_loss": 0.1581706553697586,
1136
+ "eval_runtime": 924.9891,
1137
+ "eval_samples_per_second": 1.065,
1138
+ "eval_steps_per_second": 0.267,
1139
+ "step": 65442
1140
+ },
1141
+ {
1142
+ "epoch": 39.03,
1143
+ "learning_rate": 1.754567044100119e-05,
1144
+ "loss": 0.0003,
1145
+ "step": 65500
1146
+ },
1147
+ {
1148
+ "epoch": 39.33,
1149
+ "learning_rate": 1.729785558204211e-05,
1150
+ "loss": 0.0003,
1151
+ "step": 66000
1152
+ },
1153
+ {
1154
+ "epoch": 39.63,
1155
+ "learning_rate": 1.7050040723083035e-05,
1156
+ "loss": 0.0003,
1157
+ "step": 66500
1158
+ },
1159
+ {
1160
+ "epoch": 39.93,
1161
+ "learning_rate": 1.6802225864123956e-05,
1162
+ "loss": 0.0002,
1163
+ "step": 67000
1164
+ },
1165
+ {
1166
+ "epoch": 40.0,
1167
+ "eval_exact_match": 29.6748,
1168
+ "eval_loss": 0.15863555669784546,
1169
+ "eval_runtime": 1123.2159,
1170
+ "eval_samples_per_second": 0.877,
1171
+ "eval_steps_per_second": 0.22,
1172
+ "step": 67120
1173
+ },
1174
+ {
1175
+ "epoch": 40.23,
1176
+ "learning_rate": 1.655441100516488e-05,
1177
+ "loss": 0.0002,
1178
+ "step": 67500
1179
+ },
1180
+ {
1181
+ "epoch": 40.52,
1182
+ "learning_rate": 1.63065961462058e-05,
1183
+ "loss": 0.0002,
1184
+ "step": 68000
1185
+ },
1186
+ {
1187
+ "epoch": 40.82,
1188
+ "learning_rate": 1.605927691696464e-05,
1189
+ "loss": 0.0002,
1190
+ "step": 68500
1191
+ },
1192
+ {
1193
+ "epoch": 41.0,
1194
+ "eval_exact_match": 30.0813,
1195
+ "eval_loss": 0.15939126908779144,
1196
+ "eval_runtime": 960.6758,
1197
+ "eval_samples_per_second": 1.025,
1198
+ "eval_steps_per_second": 0.257,
1199
+ "step": 68798
1200
+ },
1201
+ {
1202
+ "epoch": 41.12,
1203
+ "learning_rate": 1.581146205800556e-05,
1204
+ "loss": 0.0002,
1205
+ "step": 69000
1206
+ },
1207
+ {
1208
+ "epoch": 41.42,
1209
+ "learning_rate": 1.556364719904648e-05,
1210
+ "loss": 0.0002,
1211
+ "step": 69500
1212
+ },
1213
+ {
1214
+ "epoch": 41.72,
1215
+ "learning_rate": 1.5315832340087405e-05,
1216
+ "loss": 0.0002,
1217
+ "step": 70000
1218
+ },
1219
+ {
1220
+ "epoch": 42.0,
1221
+ "eval_exact_match": 30.4878,
1222
+ "eval_loss": 0.15988801419734955,
1223
+ "eval_runtime": 887.1545,
1224
+ "eval_samples_per_second": 1.11,
1225
+ "eval_steps_per_second": 0.278,
1226
+ "step": 70476
1227
+ },
1228
+ {
1229
+ "epoch": 42.01,
1230
+ "learning_rate": 1.5068513110846244e-05,
1231
+ "loss": 0.0002,
1232
+ "step": 70500
1233
+ },
1234
+ {
1235
+ "epoch": 42.31,
1236
+ "learning_rate": 1.4820698251887166e-05,
1237
+ "loss": 0.0002,
1238
+ "step": 71000
1239
+ },
1240
+ {
1241
+ "epoch": 42.61,
1242
+ "learning_rate": 1.4572883392928085e-05,
1243
+ "loss": 0.0003,
1244
+ "step": 71500
1245
+ },
1246
+ {
1247
+ "epoch": 42.91,
1248
+ "learning_rate": 1.4325068533969013e-05,
1249
+ "loss": 0.0002,
1250
+ "step": 72000
1251
+ },
1252
+ {
1253
+ "epoch": 43.0,
1254
+ "eval_exact_match": 30.5894,
1255
+ "eval_loss": 0.15938729047775269,
1256
+ "eval_runtime": 1072.2899,
1257
+ "eval_samples_per_second": 0.919,
1258
+ "eval_steps_per_second": 0.23,
1259
+ "step": 72154
1260
+ },
1261
+ {
1262
+ "epoch": 43.21,
1263
+ "learning_rate": 1.4077253675009935e-05,
1264
+ "loss": 0.0001,
1265
+ "step": 72500
1266
+ },
1267
+ {
1268
+ "epoch": 43.5,
1269
+ "learning_rate": 1.3829438816050854e-05,
1270
+ "loss": 0.0002,
1271
+ "step": 73000
1272
+ },
1273
+ {
1274
+ "epoch": 43.8,
1275
+ "learning_rate": 1.3581623957091777e-05,
1276
+ "loss": 0.0002,
1277
+ "step": 73500
1278
+ },
1279
+ {
1280
+ "epoch": 44.0,
1281
+ "eval_exact_match": 30.8943,
1282
+ "eval_loss": 0.16005010902881622,
1283
+ "eval_runtime": 908.7909,
1284
+ "eval_samples_per_second": 1.084,
1285
+ "eval_steps_per_second": 0.272,
1286
+ "step": 73832
1287
+ },
1288
+ {
1289
+ "epoch": 44.1,
1290
+ "learning_rate": 1.3334304727850615e-05,
1291
+ "loss": 0.0002,
1292
+ "step": 74000
1293
+ },
1294
+ {
1295
+ "epoch": 44.4,
1296
+ "learning_rate": 1.3086489868891538e-05,
1297
+ "loss": 0.0002,
1298
+ "step": 74500
1299
+ },
1300
+ {
1301
+ "epoch": 44.7,
1302
+ "learning_rate": 1.2838675009932457e-05,
1303
+ "loss": 0.0002,
1304
+ "step": 75000
1305
+ },
1306
+ {
1307
+ "epoch": 44.99,
1308
+ "learning_rate": 1.259086015097338e-05,
1309
+ "loss": 0.0001,
1310
+ "step": 75500
1311
+ },
1312
+ {
1313
+ "epoch": 45.0,
1314
+ "eval_exact_match": 30.1829,
1315
+ "eval_loss": 0.16123907268047333,
1316
+ "eval_runtime": 1046.4568,
1317
+ "eval_samples_per_second": 0.941,
1318
+ "eval_steps_per_second": 0.236,
1319
+ "step": 75510
1320
+ },
1321
+ {
1322
+ "epoch": 45.29,
1323
+ "learning_rate": 1.23430452920143e-05,
1324
+ "loss": 0.0002,
1325
+ "step": 76000
1326
+ },
1327
+ {
1328
+ "epoch": 45.59,
1329
+ "learning_rate": 1.2095230433055228e-05,
1330
+ "loss": 0.0002,
1331
+ "step": 76500
1332
+ },
1333
+ {
1334
+ "epoch": 45.89,
1335
+ "learning_rate": 1.1847415574096149e-05,
1336
+ "loss": 0.0001,
1337
+ "step": 77000
1338
+ },
1339
+ {
1340
+ "epoch": 46.0,
1341
+ "eval_exact_match": 30.9959,
1342
+ "eval_loss": 0.161320760846138,
1343
+ "eval_runtime": 1052.5853,
1344
+ "eval_samples_per_second": 0.936,
1345
+ "eval_steps_per_second": 0.235,
1346
+ "step": 77188
1347
+ },
1348
+ {
1349
+ "epoch": 46.19,
1350
+ "learning_rate": 1.159960071513707e-05,
1351
+ "loss": 0.0001,
1352
+ "step": 77500
1353
+ },
1354
+ {
1355
+ "epoch": 46.48,
1356
+ "learning_rate": 1.135228148589591e-05,
1357
+ "loss": 0.0001,
1358
+ "step": 78000
1359
+ },
1360
+ {
1361
+ "epoch": 46.78,
1362
+ "learning_rate": 1.110446662693683e-05,
1363
+ "loss": 0.0001,
1364
+ "step": 78500
1365
+ },
1366
+ {
1367
+ "epoch": 47.0,
1368
+ "eval_exact_match": 29.065,
1369
+ "eval_loss": 0.16340066492557526,
1370
+ "eval_runtime": 1034.6142,
1371
+ "eval_samples_per_second": 0.952,
1372
+ "eval_steps_per_second": 0.239,
1373
+ "step": 78866
1374
+ },
1375
+ {
1376
+ "epoch": 47.08,
1377
+ "learning_rate": 1.0856651767977751e-05,
1378
+ "loss": 0.0001,
1379
+ "step": 79000
1380
+ },
1381
+ {
1382
+ "epoch": 47.38,
1383
+ "learning_rate": 1.0608836909018672e-05,
1384
+ "loss": 0.0001,
1385
+ "step": 79500
1386
+ },
1387
+ {
1388
+ "epoch": 47.68,
1389
+ "learning_rate": 1.0361022050059594e-05,
1390
+ "loss": 0.0001,
1391
+ "step": 80000
1392
+ },
1393
+ {
1394
+ "epoch": 47.97,
1395
+ "learning_rate": 1.0113207191100515e-05,
1396
+ "loss": 0.0001,
1397
+ "step": 80500
1398
+ },
1399
+ {
1400
+ "epoch": 48.0,
1401
+ "eval_exact_match": 30.5894,
1402
+ "eval_loss": 0.1627337783575058,
1403
+ "eval_runtime": 1084.3069,
1404
+ "eval_samples_per_second": 0.908,
1405
+ "eval_steps_per_second": 0.228,
1406
+ "step": 80544
1407
+ },
1408
+ {
1409
+ "epoch": 48.27,
1410
+ "learning_rate": 9.865392332141441e-06,
1411
+ "loss": 0.0001,
1412
+ "step": 81000
1413
+ },
1414
+ {
1415
+ "epoch": 48.57,
1416
+ "learning_rate": 9.618073102900281e-06,
1417
+ "loss": 0.0001,
1418
+ "step": 81500
1419
+ },
1420
+ {
1421
+ "epoch": 48.87,
1422
+ "learning_rate": 9.370753873659116e-06,
1423
+ "loss": 0.0002,
1424
+ "step": 82000
1425
+ },
1426
+ {
1427
+ "epoch": 49.0,
1428
+ "eval_exact_match": 29.3699,
1429
+ "eval_loss": 0.16392475366592407,
1430
+ "eval_runtime": 1147.3956,
1431
+ "eval_samples_per_second": 0.858,
1432
+ "eval_steps_per_second": 0.215,
1433
+ "step": 82222
1434
+ },
1435
+ {
1436
+ "epoch": 49.17,
1437
+ "learning_rate": 9.122939014700044e-06,
1438
+ "loss": 0.0001,
1439
+ "step": 82500
1440
+ },
1441
+ {
1442
+ "epoch": 49.46,
1443
+ "learning_rate": 8.875124155740965e-06,
1444
+ "loss": 0.0001,
1445
+ "step": 83000
1446
+ },
1447
+ {
1448
+ "epoch": 49.76,
1449
+ "learning_rate": 8.627309296781885e-06,
1450
+ "loss": 0.0001,
1451
+ "step": 83500
1452
+ },
1453
+ {
1454
+ "epoch": 50.0,
1455
+ "eval_exact_match": 28.6585,
1456
+ "eval_loss": 0.16398130357265472,
1457
+ "eval_runtime": 1157.2252,
1458
+ "eval_samples_per_second": 0.851,
1459
+ "eval_steps_per_second": 0.213,
1460
+ "step": 83900
1461
+ },
1462
+ {
1463
+ "epoch": 50.06,
1464
+ "learning_rate": 8.379494437822806e-06,
1465
+ "loss": 0.0001,
1466
+ "step": 84000
1467
+ },
1468
+ {
1469
+ "epoch": 50.36,
1470
+ "learning_rate": 8.131679578863727e-06,
1471
+ "loss": 0.0001,
1472
+ "step": 84500
1473
+ },
1474
+ {
1475
+ "epoch": 50.66,
1476
+ "learning_rate": 7.884360349622567e-06,
1477
+ "loss": 0.0001,
1478
+ "step": 85000
1479
+ },
1480
+ {
1481
+ "epoch": 50.95,
1482
+ "learning_rate": 7.636545490663488e-06,
1483
+ "loss": 0.0001,
1484
+ "step": 85500
1485
+ },
1486
+ {
1487
+ "epoch": 51.0,
1488
+ "eval_exact_match": 29.5732,
1489
+ "eval_loss": 0.16443894803524017,
1490
+ "eval_runtime": 974.5681,
1491
+ "eval_samples_per_second": 1.011,
1492
+ "eval_steps_per_second": 0.253,
1493
+ "step": 85578
1494
+ },
1495
+ {
1496
+ "epoch": 51.25,
1497
+ "learning_rate": 7.388730631704409e-06,
1498
+ "loss": 0.0001,
1499
+ "step": 86000
1500
+ },
1501
+ {
1502
+ "epoch": 51.55,
1503
+ "learning_rate": 7.140915772745329e-06,
1504
+ "loss": 0.0001,
1505
+ "step": 86500
1506
+ },
1507
+ {
1508
+ "epoch": 51.85,
1509
+ "learning_rate": 6.893100913786256e-06,
1510
+ "loss": 0.0001,
1511
+ "step": 87000
1512
+ },
1513
+ {
1514
+ "epoch": 52.0,
1515
+ "eval_exact_match": 29.9797,
1516
+ "eval_loss": 0.16551026701927185,
1517
+ "eval_runtime": 1068.6565,
1518
+ "eval_samples_per_second": 0.922,
1519
+ "eval_steps_per_second": 0.231,
1520
+ "step": 87256
1521
+ },
1522
+ {
1523
+ "epoch": 52.15,
1524
+ "learning_rate": 6.645286054827177e-06,
1525
+ "loss": 0.0001,
1526
+ "step": 87500
1527
+ },
1528
+ {
1529
+ "epoch": 52.44,
1530
+ "learning_rate": 6.397471195868098e-06,
1531
+ "loss": 0.0001,
1532
+ "step": 88000
1533
+ },
1534
+ {
1535
+ "epoch": 52.74,
1536
+ "learning_rate": 6.1496563369090186e-06,
1537
+ "loss": 0.0001,
1538
+ "step": 88500
1539
+ },
1540
+ {
1541
+ "epoch": 53.0,
1542
+ "eval_exact_match": 29.3699,
1543
+ "eval_loss": 0.1660846322774887,
1544
+ "eval_runtime": 1143.5908,
1545
+ "eval_samples_per_second": 0.861,
1546
+ "eval_steps_per_second": 0.216,
1547
+ "step": 88934
1548
+ },
1549
+ {
1550
+ "epoch": 53.04,
1551
+ "learning_rate": 5.90184147794994e-06,
1552
+ "loss": 0.0001,
1553
+ "step": 89000
1554
+ },
1555
+ {
1556
+ "epoch": 53.34,
1557
+ "learning_rate": 5.654026618990861e-06,
1558
+ "loss": 0.0001,
1559
+ "step": 89500
1560
+ },
1561
+ {
1562
+ "epoch": 53.64,
1563
+ "learning_rate": 5.406211760031782e-06,
1564
+ "loss": 0.0001,
1565
+ "step": 90000
1566
+ },
1567
+ {
1568
+ "epoch": 53.93,
1569
+ "learning_rate": 5.158396901072703e-06,
1570
+ "loss": 0.0001,
1571
+ "step": 90500
1572
+ },
1573
+ {
1574
+ "epoch": 54.0,
1575
+ "eval_exact_match": 30.0813,
1576
+ "eval_loss": 0.16615071892738342,
1577
+ "eval_runtime": 1061.5964,
1578
+ "eval_samples_per_second": 0.928,
1579
+ "eval_steps_per_second": 0.233,
1580
+ "step": 90612
1581
+ },
1582
+ {
1583
+ "epoch": 54.23,
1584
+ "learning_rate": 4.910582042113629e-06,
1585
+ "loss": 0.0001,
1586
+ "step": 91000
1587
+ },
1588
+ {
1589
+ "epoch": 54.53,
1590
+ "learning_rate": 4.6632628128724694e-06,
1591
+ "loss": 0.0,
1592
+ "step": 91500
1593
+ },
1594
+ {
1595
+ "epoch": 54.83,
1596
+ "learning_rate": 4.415447953913391e-06,
1597
+ "loss": 0.0001,
1598
+ "step": 92000
1599
+ },
1600
+ {
1601
+ "epoch": 55.0,
1602
+ "eval_exact_match": 30.0813,
1603
+ "eval_loss": 0.16646790504455566,
1604
+ "eval_runtime": 1058.7707,
1605
+ "eval_samples_per_second": 0.93,
1606
+ "eval_steps_per_second": 0.233,
1607
+ "step": 92290
1608
+ },
1609
+ {
1610
+ "epoch": 55.13,
1611
+ "learning_rate": 4.167633094954312e-06,
1612
+ "loss": 0.0,
1613
+ "step": 92500
1614
+ },
1615
+ {
1616
+ "epoch": 55.42,
1617
+ "learning_rate": 3.919818235995233e-06,
1618
+ "loss": 0.0,
1619
+ "step": 93000
1620
+ },
1621
+ {
1622
+ "epoch": 55.72,
1623
+ "learning_rate": 3.672499006754073e-06,
1624
+ "loss": 0.0001,
1625
+ "step": 93500
1626
+ },
1627
+ {
1628
+ "epoch": 56.0,
1629
+ "eval_exact_match": 30.6911,
1630
+ "eval_loss": 0.16832616925239563,
1631
+ "eval_runtime": 960.3954,
1632
+ "eval_samples_per_second": 1.026,
1633
+ "eval_steps_per_second": 0.257,
1634
+ "step": 93968
1635
+ },
1636
+ {
1637
+ "epoch": 56.02,
1638
+ "learning_rate": 3.424684147794994e-06,
1639
+ "loss": 0.0,
1640
+ "step": 94000
1641
+ },
1642
+ {
1643
+ "epoch": 56.32,
1644
+ "learning_rate": 3.1768692888359152e-06,
1645
+ "loss": 0.0,
1646
+ "step": 94500
1647
+ },
1648
+ {
1649
+ "epoch": 56.62,
1650
+ "learning_rate": 2.929054429876836e-06,
1651
+ "loss": 0.0,
1652
+ "step": 95000
1653
+ },
1654
+ {
1655
+ "epoch": 56.91,
1656
+ "learning_rate": 2.6812395709177573e-06,
1657
+ "loss": 0.0,
1658
+ "step": 95500
1659
+ },
1660
+ {
1661
+ "epoch": 57.0,
1662
+ "eval_exact_match": 30.1829,
1663
+ "eval_loss": 0.16693390905857086,
1664
+ "eval_runtime": 1143.9699,
1665
+ "eval_samples_per_second": 0.861,
1666
+ "eval_steps_per_second": 0.216,
1667
+ "step": 95646
1668
+ },
1669
+ {
1670
+ "epoch": 57.21,
1671
+ "learning_rate": 2.433424711958678e-06,
1672
+ "loss": 0.0,
1673
+ "step": 96000
1674
+ },
1675
+ {
1676
+ "epoch": 57.51,
1677
+ "learning_rate": 2.185609852999605e-06,
1678
+ "loss": 0.0,
1679
+ "step": 96500
1680
+ },
1681
+ {
1682
+ "epoch": 57.81,
1683
+ "learning_rate": 1.9377949940405256e-06,
1684
+ "loss": 0.0,
1685
+ "step": 97000
1686
+ },
1687
+ {
1688
+ "epoch": 58.0,
1689
+ "eval_exact_match": 30.3862,
1690
+ "eval_loss": 0.1675402671098709,
1691
+ "eval_runtime": 1086.4675,
1692
+ "eval_samples_per_second": 0.907,
1693
+ "eval_steps_per_second": 0.227,
1694
+ "step": 97324
1695
+ },
1696
+ {
1697
+ "epoch": 58.1,
1698
+ "learning_rate": 1.6899801350814468e-06,
1699
+ "loss": 0.0,
1700
+ "step": 97500
1701
+ },
1702
+ {
1703
+ "epoch": 58.4,
1704
+ "learning_rate": 1.4421652761223676e-06,
1705
+ "loss": 0.0,
1706
+ "step": 98000
1707
+ },
1708
+ {
1709
+ "epoch": 58.7,
1710
+ "learning_rate": 1.1943504171632889e-06,
1711
+ "loss": 0.0,
1712
+ "step": 98500
1713
+ },
1714
+ {
1715
+ "epoch": 59.0,
1716
+ "learning_rate": 9.465355582042097e-07,
1717
+ "loss": 0.0,
1718
+ "step": 99000
1719
+ },
1720
+ {
1721
+ "epoch": 59.0,
1722
+ "eval_exact_match": 30.3862,
1723
+ "eval_loss": 0.16781553626060486,
1724
+ "eval_runtime": 1066.9264,
1725
+ "eval_samples_per_second": 0.923,
1726
+ "eval_steps_per_second": 0.232,
1727
+ "step": 99002
1728
+ },
1729
+ {
1730
+ "epoch": 59.3,
1731
+ "learning_rate": 6.987206992451308e-07,
1732
+ "loss": 0.0,
1733
+ "step": 99500
1734
+ },
1735
+ {
1736
+ "epoch": 59.59,
1737
+ "learning_rate": 4.5140147000397107e-07,
1738
+ "loss": 0.0,
1739
+ "step": 100000
1740
+ },
1741
+ {
1742
+ "epoch": 59.89,
1743
+ "learning_rate": 2.0358661104489208e-07,
1744
+ "loss": 0.0,
1745
+ "step": 100500
1746
+ },
1747
+ {
1748
+ "epoch": 60.0,
1749
+ "eval_exact_match": 30.3862,
1750
+ "eval_loss": 0.1676684468984604,
1751
+ "eval_runtime": 1061.5636,
1752
+ "eval_samples_per_second": 0.928,
1753
+ "eval_steps_per_second": 0.233,
1754
+ "step": 100680
1755
+ }
1756
+ ],
1757
+ "max_steps": 100680,
1758
+ "num_train_epochs": 60,
1759
+ "total_flos": 4.904790433726464e+17,
1760
+ "trial_name": null,
1761
+ "trial_params": null
1762
+ }
qa_sp_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-100680/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20f658430ea88865a64ab08fd004f7991d946c8cc912106a118905f420111df6
3
+ size 3768
qa_sp_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-100680/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
qa_sp_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-25170/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/data/tir/projects/tir7/user_data/priyansk/qa_s2_codet5p-220m_latex/checkpoint-3300/",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "bos_token_id": 1,
7
+ "d_ff": 3072,
8
+ "d_kv": 64,
9
+ "d_model": 768,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "relu",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 2,
14
+ "feed_forward_proj": "relu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": false,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "max_length": 1024,
20
+ "model_type": "t5",
21
+ "n_positions": 512,
22
+ "num_decoder_layers": 12,
23
+ "num_heads": 12,
24
+ "num_layers": 12,
25
+ "output_past": true,
26
+ "pad_token_id": 0,
27
+ "relative_attention_max_distance": 128,
28
+ "relative_attention_num_buckets": 32,
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.17.0",
31
+ "use_cache": true,
32
+ "vocab_size": 32100
33
+ }
qa_sp_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-25170/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
qa_sp_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-25170/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:795d24ca862f010d2d312c57b19515e49878fe85d5bc82fe273be03bfa3080f4
3
+ size 1783209146
qa_sp_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-25170/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b3b5b01635dc5bdae85585c41221f41b93df4791c1d93f7d4ac7e6aaa6f8d25
3
+ size 891647438
qa_sp_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-25170/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c34251f0812da27b0eb801a1b09215c75f1457bab88dec9b4f22b7d55953603
3
+ size 14244