kpriyanshu256 commited on
Commit
c80828e
1 Parent(s): 42b7c0f

Upload folder using huggingface_hub

Browse files
Files changed (39) hide show
  1. qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-10068/config.json +33 -0
  2. qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-10068/merges.txt +0 -0
  3. qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-10068/optimizer.pt +3 -0
  4. qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-10068/pytorch_model.bin +3 -0
  5. qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-10068/rng_state.pth +3 -0
  6. qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-10068/scaler.pt +3 -0
  7. qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-10068/scheduler.pt +3 -0
  8. qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-10068/special_tokens_map.json +1 -0
  9. qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-10068/tokenizer.json +0 -0
  10. qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-10068/tokenizer_config.json +1 -0
  11. qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-10068/trainer_state.json +190 -0
  12. qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-10068/training_args.bin +3 -0
  13. qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-10068/vocab.json +0 -0
  14. qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-100680/config.json +33 -0
  15. qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-100680/merges.txt +0 -0
  16. qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-100680/optimizer.pt +3 -0
  17. qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-100680/pytorch_model.bin +3 -0
  18. qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-100680/rng_state.pth +3 -0
  19. qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-100680/scaler.pt +3 -0
  20. qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-100680/scheduler.pt +3 -0
  21. qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-100680/special_tokens_map.json +1 -0
  22. qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-100680/tokenizer.json +0 -0
  23. qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-100680/tokenizer_config.json +1 -0
  24. qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-100680/trainer_state.json +1762 -0
  25. qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-100680/training_args.bin +3 -0
  26. qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-100680/vocab.json +0 -0
  27. qa_sp_codet5p-220m_s2_latex_bs_lr_47/config.json +33 -0
  28. qa_sp_codet5p-220m_s2_latex_bs_lr_47/merges.txt +0 -0
  29. qa_sp_codet5p-220m_s2_latex_bs_lr_47/metric.txt +2 -0
  30. qa_sp_codet5p-220m_s2_latex_bs_lr_47/pytorch_model.bin +3 -0
  31. qa_sp_codet5p-220m_s2_latex_bs_lr_47/runs/Apr11_16-52-53_babel-3-7/1712868779.2058115/events.out.tfevents.1712868779.babel-3-7 +3 -0
  32. qa_sp_codet5p-220m_s2_latex_bs_lr_47/runs/Apr11_16-52-53_babel-3-7/events.out.tfevents.1712868779.babel-3-7 +3 -0
  33. qa_sp_codet5p-220m_s2_latex_bs_lr_47/runs/Apr11_16-52-53_babel-3-7/events.out.tfevents.1713018580.babel-3-7 +3 -0
  34. qa_sp_codet5p-220m_s2_latex_bs_lr_47/special_tokens_map.json +1 -0
  35. qa_sp_codet5p-220m_s2_latex_bs_lr_47/tokenizer.json +0 -0
  36. qa_sp_codet5p-220m_s2_latex_bs_lr_47/tokenizer_config.json +1 -0
  37. qa_sp_codet5p-220m_s2_latex_bs_lr_47/trainer_state.json +1771 -0
  38. qa_sp_codet5p-220m_s2_latex_bs_lr_47/training_args.bin +3 -0
  39. qa_sp_codet5p-220m_s2_latex_bs_lr_47/vocab.json +0 -0
qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-10068/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/data/tir/projects/tir7/user_data/priyansk/qa_s2_codet5p-220m_latex/checkpoint-3300/",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "bos_token_id": 1,
7
+ "d_ff": 3072,
8
+ "d_kv": 64,
9
+ "d_model": 768,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "relu",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 2,
14
+ "feed_forward_proj": "relu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": false,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "max_length": 1024,
20
+ "model_type": "t5",
21
+ "n_positions": 512,
22
+ "num_decoder_layers": 12,
23
+ "num_heads": 12,
24
+ "num_layers": 12,
25
+ "output_past": true,
26
+ "pad_token_id": 0,
27
+ "relative_attention_max_distance": 128,
28
+ "relative_attention_num_buckets": 32,
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.17.0",
31
+ "use_cache": true,
32
+ "vocab_size": 32100
33
+ }
qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-10068/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-10068/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b9674e278e197a9b6b8459f124a6c2aa2c67dbbc2406089c24fbb90a601a791
3
+ size 1783209146
qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-10068/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:026afe1a84a57c4a30090ca8ada15a90b6d7e311cc4a2298533aa81813ff7696
3
+ size 891647438
qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-10068/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10b1dae2b00feb3784011419a3e17cb7c6ab5b1e60bbc391eaecbb49c94002d4
3
+ size 14244
qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-10068/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e430ef18dd3c6267c7c8743ad9b9a0e10abb2af2cdd111b15257c26a3fb4765
3
+ size 988
qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-10068/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6822903680fb5beb35cdb87640dc9cc2a2e36ffddac0df9c941d6f4f79d812e
3
+ size 1064
qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-10068/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, "additional_special_tokens": [{"content": "<extra_id_99>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_98>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_97>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_96>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_95>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_94>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_93>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_92>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_91>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_90>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_89>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_88>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_87>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_86>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_85>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_84>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_83>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_82>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_81>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_80>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_79>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_78>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_77>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_76>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_75>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_74>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_73>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_72>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_71>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_70>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_69>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_68>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_67>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_66>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_65>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_64>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_63>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_62>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_61>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_60>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_59>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_58>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_57>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_56>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_55>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_54>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_53>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_52>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_51>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_50>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_49>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_48>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_47>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_46>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_45>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_44>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_43>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_42>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_41>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_40>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_39>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_38>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_37>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_36>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_35>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_34>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_33>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_32>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_31>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_30>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_29>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_28>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_27>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_26>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_25>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_24>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_23>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_22>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_21>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_20>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_19>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_18>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_17>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_16>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_15>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_14>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_13>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_12>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_11>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_10>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_9>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_8>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_7>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_6>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_5>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_4>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_3>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_2>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_1>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_0>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}]}
qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-10068/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-10068/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"errors": "replace", "bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "add_prefix_space": false, "trim_offsets": true, "model_max_length": 512, "special_tokens_map_file": "/data/datasets/hf_cache/transformers/f432e4eb4a7dfc04b533beea5590e11f4b46c86f5630e8a032704ef76d7269c1.b9905d0575bde443a20834122b6e2d48e853b2e36444ce98ddeb43c38097eb3f", "name_or_path": "/data/tir/projects/tir7/user_data/priyansk/qa_s2_codet5p-220m_latex/checkpoint-3300/", "tokenizer_class": "RobertaTokenizer"}
qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-10068/trainer_state.json ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 30.8943,
3
+ "best_model_checkpoint": "/data/tir/projects/tir7/user_data/priyansk/qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-10068",
4
+ "epoch": 6.0,
5
+ "global_step": 10068,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.3,
12
+ "learning_rate": 4.9752185141040927e-05,
13
+ "loss": 0.0427,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.6,
18
+ "learning_rate": 4.9504370282081844e-05,
19
+ "loss": 0.0447,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 0.89,
24
+ "learning_rate": 4.925655542312277e-05,
25
+ "loss": 0.0341,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 1.0,
30
+ "eval_exact_match": 28.8618,
31
+ "eval_loss": 0.09518733620643616,
32
+ "eval_runtime": 1894.6128,
33
+ "eval_samples_per_second": 0.52,
34
+ "eval_steps_per_second": 0.13,
35
+ "step": 1678
36
+ },
37
+ {
38
+ "epoch": 1.19,
39
+ "learning_rate": 4.900874056416369e-05,
40
+ "loss": 0.0411,
41
+ "step": 2000
42
+ },
43
+ {
44
+ "epoch": 1.49,
45
+ "learning_rate": 4.876092570520461e-05,
46
+ "loss": 0.0328,
47
+ "step": 2500
48
+ },
49
+ {
50
+ "epoch": 1.79,
51
+ "learning_rate": 4.8513110846245534e-05,
52
+ "loss": 0.0331,
53
+ "step": 3000
54
+ },
55
+ {
56
+ "epoch": 2.0,
57
+ "eval_exact_match": 30.1829,
58
+ "eval_loss": 0.10444394499063492,
59
+ "eval_runtime": 1252.1912,
60
+ "eval_samples_per_second": 0.787,
61
+ "eval_steps_per_second": 0.197,
62
+ "step": 3356
63
+ },
64
+ {
65
+ "epoch": 2.09,
66
+ "learning_rate": 4.826529598728646e-05,
67
+ "loss": 0.03,
68
+ "step": 3500
69
+ },
70
+ {
71
+ "epoch": 2.38,
72
+ "learning_rate": 4.8017481128327376e-05,
73
+ "loss": 0.0294,
74
+ "step": 4000
75
+ },
76
+ {
77
+ "epoch": 2.68,
78
+ "learning_rate": 4.77696662693683e-05,
79
+ "loss": 0.0283,
80
+ "step": 4500
81
+ },
82
+ {
83
+ "epoch": 2.98,
84
+ "learning_rate": 4.752185141040922e-05,
85
+ "loss": 0.027,
86
+ "step": 5000
87
+ },
88
+ {
89
+ "epoch": 3.0,
90
+ "eval_exact_match": 30.7927,
91
+ "eval_loss": 0.10687928646802902,
92
+ "eval_runtime": 1090.2564,
93
+ "eval_samples_per_second": 0.903,
94
+ "eval_steps_per_second": 0.227,
95
+ "step": 5034
96
+ },
97
+ {
98
+ "epoch": 3.28,
99
+ "learning_rate": 4.727403655145014e-05,
100
+ "loss": 0.0279,
101
+ "step": 5500
102
+ },
103
+ {
104
+ "epoch": 3.58,
105
+ "learning_rate": 4.7026221692491066e-05,
106
+ "loss": 0.02,
107
+ "step": 6000
108
+ },
109
+ {
110
+ "epoch": 3.87,
111
+ "learning_rate": 4.677840683353199e-05,
112
+ "loss": 0.0196,
113
+ "step": 6500
114
+ },
115
+ {
116
+ "epoch": 4.0,
117
+ "eval_exact_match": 30.3862,
118
+ "eval_loss": 0.10539323091506958,
119
+ "eval_runtime": 1426.0185,
120
+ "eval_samples_per_second": 0.691,
121
+ "eval_steps_per_second": 0.173,
122
+ "step": 6712
123
+ },
124
+ {
125
+ "epoch": 4.17,
126
+ "learning_rate": 4.653059197457291e-05,
127
+ "loss": 0.0207,
128
+ "step": 7000
129
+ },
130
+ {
131
+ "epoch": 4.47,
132
+ "learning_rate": 4.628277711561383e-05,
133
+ "loss": 0.0216,
134
+ "step": 7500
135
+ },
136
+ {
137
+ "epoch": 4.77,
138
+ "learning_rate": 4.6035457886372666e-05,
139
+ "loss": 0.0173,
140
+ "step": 8000
141
+ },
142
+ {
143
+ "epoch": 5.0,
144
+ "eval_exact_match": 30.4878,
145
+ "eval_loss": 0.11340699344873428,
146
+ "eval_runtime": 1351.563,
147
+ "eval_samples_per_second": 0.729,
148
+ "eval_steps_per_second": 0.183,
149
+ "step": 8390
150
+ },
151
+ {
152
+ "epoch": 5.07,
153
+ "learning_rate": 4.578764302741359e-05,
154
+ "loss": 0.016,
155
+ "step": 8500
156
+ },
157
+ {
158
+ "epoch": 5.36,
159
+ "learning_rate": 4.553982816845451e-05,
160
+ "loss": 0.0162,
161
+ "step": 9000
162
+ },
163
+ {
164
+ "epoch": 5.66,
165
+ "learning_rate": 4.529201330949543e-05,
166
+ "loss": 0.0174,
167
+ "step": 9500
168
+ },
169
+ {
170
+ "epoch": 5.96,
171
+ "learning_rate": 4.504419845053635e-05,
172
+ "loss": 0.0141,
173
+ "step": 10000
174
+ },
175
+ {
176
+ "epoch": 6.0,
177
+ "eval_exact_match": 30.8943,
178
+ "eval_loss": 0.11525732278823853,
179
+ "eval_runtime": 1238.1999,
180
+ "eval_samples_per_second": 0.796,
181
+ "eval_steps_per_second": 0.199,
182
+ "step": 10068
183
+ }
184
+ ],
185
+ "max_steps": 100680,
186
+ "num_train_epochs": 60,
187
+ "total_flos": 4.904790433726464e+16,
188
+ "trial_name": null,
189
+ "trial_params": null
190
+ }
qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-10068/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:007e3036f07603b9a502394d794913f87be2edaed83eed9aa3cc900a0bd8473f
3
+ size 3768
qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-10068/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-100680/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/data/tir/projects/tir7/user_data/priyansk/qa_s2_codet5p-220m_latex/checkpoint-3300/",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "bos_token_id": 1,
7
+ "d_ff": 3072,
8
+ "d_kv": 64,
9
+ "d_model": 768,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "relu",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 2,
14
+ "feed_forward_proj": "relu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": false,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "max_length": 1024,
20
+ "model_type": "t5",
21
+ "n_positions": 512,
22
+ "num_decoder_layers": 12,
23
+ "num_heads": 12,
24
+ "num_layers": 12,
25
+ "output_past": true,
26
+ "pad_token_id": 0,
27
+ "relative_attention_max_distance": 128,
28
+ "relative_attention_num_buckets": 32,
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.17.0",
31
+ "use_cache": true,
32
+ "vocab_size": 32100
33
+ }
qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-100680/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-100680/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:762fcc87b24a1351f4270517a0bd80e4c778d3807a6d32b9478e803faa369a60
3
+ size 1783209658
qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-100680/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59352cfc8ff8ea2e5ab2b00fff4242041ae4b9cc5e669617fa20ea57fefada3d
3
+ size 891647438
qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-100680/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cf8e78cbace91b2d39b04c34e1b72726db4f4e833559ab7473fc6d1d86cdde5
3
+ size 14244
qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-100680/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44524137a877ad2b626bb19fd9317e9bf063b15c299a9500bb062b6b8c0b3246
3
+ size 988
qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-100680/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2cbcd92c335c2fc28e94b123183178db5c7bbb33aa259230c50a3f351aa0e35
3
+ size 1064
qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-100680/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, "additional_special_tokens": [{"content": "<extra_id_99>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_98>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_97>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_96>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_95>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_94>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_93>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_92>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_91>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_90>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_89>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_88>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_87>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_86>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_85>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_84>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_83>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_82>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_81>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_80>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_79>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_78>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_77>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_76>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_75>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_74>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_73>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_72>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_71>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_70>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_69>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_68>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_67>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_66>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_65>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_64>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_63>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_62>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_61>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_60>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_59>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_58>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_57>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_56>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_55>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_54>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_53>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_52>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_51>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_50>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_49>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_48>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_47>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_46>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_45>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_44>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_43>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_42>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_41>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_40>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_39>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_38>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_37>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_36>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_35>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_34>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_33>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_32>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_31>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_30>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_29>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_28>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_27>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_26>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_25>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_24>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_23>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_22>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_21>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_20>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_19>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_18>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_17>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_16>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_15>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_14>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_13>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_12>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_11>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_10>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_9>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_8>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_7>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_6>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_5>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_4>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_3>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_2>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_1>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_0>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}]}
qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-100680/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-100680/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"errors": "replace", "bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "add_prefix_space": false, "trim_offsets": true, "model_max_length": 512, "special_tokens_map_file": "/data/datasets/hf_cache/transformers/f432e4eb4a7dfc04b533beea5590e11f4b46c86f5630e8a032704ef76d7269c1.b9905d0575bde443a20834122b6e2d48e853b2e36444ce98ddeb43c38097eb3f", "name_or_path": "/data/tir/projects/tir7/user_data/priyansk/qa_s2_codet5p-220m_latex/checkpoint-3300/", "tokenizer_class": "RobertaTokenizer"}
qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-100680/trainer_state.json ADDED
@@ -0,0 +1,1762 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 30.8943,
3
+ "best_model_checkpoint": "/data/tir/projects/tir7/user_data/priyansk/qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-10068",
4
+ "epoch": 60.0,
5
+ "global_step": 100680,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.3,
12
+ "learning_rate": 4.9752185141040927e-05,
13
+ "loss": 0.0427,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.6,
18
+ "learning_rate": 4.9504370282081844e-05,
19
+ "loss": 0.0447,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 0.89,
24
+ "learning_rate": 4.925655542312277e-05,
25
+ "loss": 0.0341,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 1.0,
30
+ "eval_exact_match": 28.8618,
31
+ "eval_loss": 0.09518733620643616,
32
+ "eval_runtime": 1894.6128,
33
+ "eval_samples_per_second": 0.52,
34
+ "eval_steps_per_second": 0.13,
35
+ "step": 1678
36
+ },
37
+ {
38
+ "epoch": 1.19,
39
+ "learning_rate": 4.900874056416369e-05,
40
+ "loss": 0.0411,
41
+ "step": 2000
42
+ },
43
+ {
44
+ "epoch": 1.49,
45
+ "learning_rate": 4.876092570520461e-05,
46
+ "loss": 0.0328,
47
+ "step": 2500
48
+ },
49
+ {
50
+ "epoch": 1.79,
51
+ "learning_rate": 4.8513110846245534e-05,
52
+ "loss": 0.0331,
53
+ "step": 3000
54
+ },
55
+ {
56
+ "epoch": 2.0,
57
+ "eval_exact_match": 30.1829,
58
+ "eval_loss": 0.10444394499063492,
59
+ "eval_runtime": 1252.1912,
60
+ "eval_samples_per_second": 0.787,
61
+ "eval_steps_per_second": 0.197,
62
+ "step": 3356
63
+ },
64
+ {
65
+ "epoch": 2.09,
66
+ "learning_rate": 4.826529598728646e-05,
67
+ "loss": 0.03,
68
+ "step": 3500
69
+ },
70
+ {
71
+ "epoch": 2.38,
72
+ "learning_rate": 4.8017481128327376e-05,
73
+ "loss": 0.0294,
74
+ "step": 4000
75
+ },
76
+ {
77
+ "epoch": 2.68,
78
+ "learning_rate": 4.77696662693683e-05,
79
+ "loss": 0.0283,
80
+ "step": 4500
81
+ },
82
+ {
83
+ "epoch": 2.98,
84
+ "learning_rate": 4.752185141040922e-05,
85
+ "loss": 0.027,
86
+ "step": 5000
87
+ },
88
+ {
89
+ "epoch": 3.0,
90
+ "eval_exact_match": 30.7927,
91
+ "eval_loss": 0.10687928646802902,
92
+ "eval_runtime": 1090.2564,
93
+ "eval_samples_per_second": 0.903,
94
+ "eval_steps_per_second": 0.227,
95
+ "step": 5034
96
+ },
97
+ {
98
+ "epoch": 3.28,
99
+ "learning_rate": 4.727403655145014e-05,
100
+ "loss": 0.0279,
101
+ "step": 5500
102
+ },
103
+ {
104
+ "epoch": 3.58,
105
+ "learning_rate": 4.7026221692491066e-05,
106
+ "loss": 0.02,
107
+ "step": 6000
108
+ },
109
+ {
110
+ "epoch": 3.87,
111
+ "learning_rate": 4.677840683353199e-05,
112
+ "loss": 0.0196,
113
+ "step": 6500
114
+ },
115
+ {
116
+ "epoch": 4.0,
117
+ "eval_exact_match": 30.3862,
118
+ "eval_loss": 0.10539323091506958,
119
+ "eval_runtime": 1426.0185,
120
+ "eval_samples_per_second": 0.691,
121
+ "eval_steps_per_second": 0.173,
122
+ "step": 6712
123
+ },
124
+ {
125
+ "epoch": 4.17,
126
+ "learning_rate": 4.653059197457291e-05,
127
+ "loss": 0.0207,
128
+ "step": 7000
129
+ },
130
+ {
131
+ "epoch": 4.47,
132
+ "learning_rate": 4.628277711561383e-05,
133
+ "loss": 0.0216,
134
+ "step": 7500
135
+ },
136
+ {
137
+ "epoch": 4.77,
138
+ "learning_rate": 4.6035457886372666e-05,
139
+ "loss": 0.0173,
140
+ "step": 8000
141
+ },
142
+ {
143
+ "epoch": 5.0,
144
+ "eval_exact_match": 30.4878,
145
+ "eval_loss": 0.11340699344873428,
146
+ "eval_runtime": 1351.563,
147
+ "eval_samples_per_second": 0.729,
148
+ "eval_steps_per_second": 0.183,
149
+ "step": 8390
150
+ },
151
+ {
152
+ "epoch": 5.07,
153
+ "learning_rate": 4.578764302741359e-05,
154
+ "loss": 0.016,
155
+ "step": 8500
156
+ },
157
+ {
158
+ "epoch": 5.36,
159
+ "learning_rate": 4.553982816845451e-05,
160
+ "loss": 0.0162,
161
+ "step": 9000
162
+ },
163
+ {
164
+ "epoch": 5.66,
165
+ "learning_rate": 4.529201330949543e-05,
166
+ "loss": 0.0174,
167
+ "step": 9500
168
+ },
169
+ {
170
+ "epoch": 5.96,
171
+ "learning_rate": 4.504419845053635e-05,
172
+ "loss": 0.0141,
173
+ "step": 10000
174
+ },
175
+ {
176
+ "epoch": 6.0,
177
+ "eval_exact_match": 30.8943,
178
+ "eval_loss": 0.11525732278823853,
179
+ "eval_runtime": 1238.1999,
180
+ "eval_samples_per_second": 0.796,
181
+ "eval_steps_per_second": 0.199,
182
+ "step": 10068
183
+ },
184
+ {
185
+ "epoch": 6.26,
186
+ "learning_rate": 4.479638359157728e-05,
187
+ "loss": 0.0143,
188
+ "step": 10500
189
+ },
190
+ {
191
+ "epoch": 6.56,
192
+ "learning_rate": 4.45485687326182e-05,
193
+ "loss": 0.0133,
194
+ "step": 11000
195
+ },
196
+ {
197
+ "epoch": 6.85,
198
+ "learning_rate": 4.430174513309496e-05,
199
+ "loss": 0.0137,
200
+ "step": 11500
201
+ },
202
+ {
203
+ "epoch": 7.0,
204
+ "eval_exact_match": 29.7764,
205
+ "eval_loss": 0.12133623659610748,
206
+ "eval_runtime": 1798.1818,
207
+ "eval_samples_per_second": 0.548,
208
+ "eval_steps_per_second": 0.137,
209
+ "step": 11746
210
+ },
211
+ {
212
+ "epoch": 7.15,
213
+ "learning_rate": 4.405393027413588e-05,
214
+ "loss": 0.013,
215
+ "step": 12000
216
+ },
217
+ {
218
+ "epoch": 7.45,
219
+ "learning_rate": 4.38061154151768e-05,
220
+ "loss": 0.0108,
221
+ "step": 12500
222
+ },
223
+ {
224
+ "epoch": 7.75,
225
+ "learning_rate": 4.355830055621772e-05,
226
+ "loss": 0.0098,
227
+ "step": 13000
228
+ },
229
+ {
230
+ "epoch": 8.0,
231
+ "eval_exact_match": 29.065,
232
+ "eval_loss": 0.12475814670324326,
233
+ "eval_runtime": 1418.154,
234
+ "eval_samples_per_second": 0.695,
235
+ "eval_steps_per_second": 0.174,
236
+ "step": 13424
237
+ },
238
+ {
239
+ "epoch": 8.05,
240
+ "learning_rate": 4.331048569725865e-05,
241
+ "loss": 0.0108,
242
+ "step": 13500
243
+ },
244
+ {
245
+ "epoch": 8.34,
246
+ "learning_rate": 4.3062670838299565e-05,
247
+ "loss": 0.0097,
248
+ "step": 14000
249
+ },
250
+ {
251
+ "epoch": 8.64,
252
+ "learning_rate": 4.281485597934049e-05,
253
+ "loss": 0.0106,
254
+ "step": 14500
255
+ },
256
+ {
257
+ "epoch": 8.94,
258
+ "learning_rate": 4.2567041120381406e-05,
259
+ "loss": 0.0079,
260
+ "step": 15000
261
+ },
262
+ {
263
+ "epoch": 9.0,
264
+ "eval_exact_match": 30.6911,
265
+ "eval_loss": 0.12646353244781494,
266
+ "eval_runtime": 1557.2757,
267
+ "eval_samples_per_second": 0.633,
268
+ "eval_steps_per_second": 0.159,
269
+ "step": 15102
270
+ },
271
+ {
272
+ "epoch": 9.24,
273
+ "learning_rate": 4.231922626142233e-05,
274
+ "loss": 0.0081,
275
+ "step": 15500
276
+ },
277
+ {
278
+ "epoch": 9.54,
279
+ "learning_rate": 4.207141140246325e-05,
280
+ "loss": 0.0087,
281
+ "step": 16000
282
+ },
283
+ {
284
+ "epoch": 9.83,
285
+ "learning_rate": 4.182359654350417e-05,
286
+ "loss": 0.0077,
287
+ "step": 16500
288
+ },
289
+ {
290
+ "epoch": 10.0,
291
+ "eval_exact_match": 28.8618,
292
+ "eval_loss": 0.12802913784980774,
293
+ "eval_runtime": 1843.86,
294
+ "eval_samples_per_second": 0.534,
295
+ "eval_steps_per_second": 0.134,
296
+ "step": 16780
297
+ },
298
+ {
299
+ "epoch": 10.13,
300
+ "learning_rate": 4.1576277314263014e-05,
301
+ "loss": 0.008,
302
+ "step": 17000
303
+ },
304
+ {
305
+ "epoch": 10.43,
306
+ "learning_rate": 4.132846245530394e-05,
307
+ "loss": 0.0061,
308
+ "step": 17500
309
+ },
310
+ {
311
+ "epoch": 10.73,
312
+ "learning_rate": 4.1080647596344856e-05,
313
+ "loss": 0.0069,
314
+ "step": 18000
315
+ },
316
+ {
317
+ "epoch": 11.0,
318
+ "eval_exact_match": 28.7602,
319
+ "eval_loss": 0.13084228336811066,
320
+ "eval_runtime": 1640.7111,
321
+ "eval_samples_per_second": 0.6,
322
+ "eval_steps_per_second": 0.151,
323
+ "step": 18458
324
+ },
325
+ {
326
+ "epoch": 11.03,
327
+ "learning_rate": 4.083283273738578e-05,
328
+ "loss": 0.007,
329
+ "step": 18500
330
+ },
331
+ {
332
+ "epoch": 11.32,
333
+ "learning_rate": 4.05850178784267e-05,
334
+ "loss": 0.0064,
335
+ "step": 19000
336
+ },
337
+ {
338
+ "epoch": 11.62,
339
+ "learning_rate": 4.033720301946762e-05,
340
+ "loss": 0.0052,
341
+ "step": 19500
342
+ },
343
+ {
344
+ "epoch": 11.92,
345
+ "learning_rate": 4.008938816050854e-05,
346
+ "loss": 0.006,
347
+ "step": 20000
348
+ },
349
+ {
350
+ "epoch": 12.0,
351
+ "eval_exact_match": 27.7439,
352
+ "eval_loss": 0.1305484175682068,
353
+ "eval_runtime": 1207.7361,
354
+ "eval_samples_per_second": 0.816,
355
+ "eval_steps_per_second": 0.205,
356
+ "step": 20136
357
+ },
358
+ {
359
+ "epoch": 12.22,
360
+ "learning_rate": 3.984157330154946e-05,
361
+ "loss": 0.004,
362
+ "step": 20500
363
+ },
364
+ {
365
+ "epoch": 12.51,
366
+ "learning_rate": 3.959375844259039e-05,
367
+ "loss": 0.0048,
368
+ "step": 21000
369
+ },
370
+ {
371
+ "epoch": 12.81,
372
+ "learning_rate": 3.934594358363131e-05,
373
+ "loss": 0.0046,
374
+ "step": 21500
375
+ },
376
+ {
377
+ "epoch": 13.0,
378
+ "eval_exact_match": 29.4715,
379
+ "eval_loss": 0.13410894572734833,
380
+ "eval_runtime": 1779.2713,
381
+ "eval_samples_per_second": 0.554,
382
+ "eval_steps_per_second": 0.139,
383
+ "step": 21814
384
+ },
385
+ {
386
+ "epoch": 13.11,
387
+ "learning_rate": 3.909812872467223e-05,
388
+ "loss": 0.005,
389
+ "step": 22000
390
+ },
391
+ {
392
+ "epoch": 13.41,
393
+ "learning_rate": 3.885031386571315e-05,
394
+ "loss": 0.0039,
395
+ "step": 22500
396
+ },
397
+ {
398
+ "epoch": 13.71,
399
+ "learning_rate": 3.860249900675407e-05,
400
+ "loss": 0.0052,
401
+ "step": 23000
402
+ },
403
+ {
404
+ "epoch": 14.0,
405
+ "eval_exact_match": 29.5732,
406
+ "eval_loss": 0.13682885468006134,
407
+ "eval_runtime": 1195.0104,
408
+ "eval_samples_per_second": 0.824,
409
+ "eval_steps_per_second": 0.207,
410
+ "step": 23492
411
+ },
412
+ {
413
+ "epoch": 14.0,
414
+ "learning_rate": 3.835517977751291e-05,
415
+ "loss": 0.0039,
416
+ "step": 23500
417
+ },
418
+ {
419
+ "epoch": 14.3,
420
+ "learning_rate": 3.8107364918553836e-05,
421
+ "loss": 0.0039,
422
+ "step": 24000
423
+ },
424
+ {
425
+ "epoch": 14.6,
426
+ "learning_rate": 3.786004568931268e-05,
427
+ "loss": 0.0032,
428
+ "step": 24500
429
+ },
430
+ {
431
+ "epoch": 14.9,
432
+ "learning_rate": 3.7612230830353595e-05,
433
+ "loss": 0.0034,
434
+ "step": 25000
435
+ },
436
+ {
437
+ "epoch": 15.0,
438
+ "eval_exact_match": 28.1504,
439
+ "eval_loss": 0.13723917305469513,
440
+ "eval_runtime": 1941.6161,
441
+ "eval_samples_per_second": 0.507,
442
+ "eval_steps_per_second": 0.127,
443
+ "step": 25170
444
+ },
445
+ {
446
+ "epoch": 15.2,
447
+ "learning_rate": 3.736441597139452e-05,
448
+ "loss": 0.0034,
449
+ "step": 25500
450
+ },
451
+ {
452
+ "epoch": 15.49,
453
+ "learning_rate": 3.7116601112435444e-05,
454
+ "loss": 0.0032,
455
+ "step": 26000
456
+ },
457
+ {
458
+ "epoch": 15.79,
459
+ "learning_rate": 3.686878625347636e-05,
460
+ "loss": 0.003,
461
+ "step": 26500
462
+ },
463
+ {
464
+ "epoch": 16.0,
465
+ "eval_exact_match": 28.8618,
466
+ "eval_loss": 0.13947974145412445,
467
+ "eval_runtime": 1579.4418,
468
+ "eval_samples_per_second": 0.624,
469
+ "eval_steps_per_second": 0.156,
470
+ "step": 26848
471
+ },
472
+ {
473
+ "epoch": 16.09,
474
+ "learning_rate": 3.6621467024235196e-05,
475
+ "loss": 0.003,
476
+ "step": 27000
477
+ },
478
+ {
479
+ "epoch": 16.39,
480
+ "learning_rate": 3.637365216527613e-05,
481
+ "loss": 0.0024,
482
+ "step": 27500
483
+ },
484
+ {
485
+ "epoch": 16.69,
486
+ "learning_rate": 3.6125837306317045e-05,
487
+ "loss": 0.0028,
488
+ "step": 28000
489
+ },
490
+ {
491
+ "epoch": 16.98,
492
+ "learning_rate": 3.587802244735797e-05,
493
+ "loss": 0.0025,
494
+ "step": 28500
495
+ },
496
+ {
497
+ "epoch": 17.0,
498
+ "eval_exact_match": 28.7602,
499
+ "eval_loss": 0.1384698450565338,
500
+ "eval_runtime": 1809.0785,
501
+ "eval_samples_per_second": 0.544,
502
+ "eval_steps_per_second": 0.137,
503
+ "step": 28526
504
+ },
505
+ {
506
+ "epoch": 17.28,
507
+ "learning_rate": 3.5630207588398886e-05,
508
+ "loss": 0.0025,
509
+ "step": 29000
510
+ },
511
+ {
512
+ "epoch": 17.58,
513
+ "learning_rate": 3.538239272943982e-05,
514
+ "loss": 0.0022,
515
+ "step": 29500
516
+ },
517
+ {
518
+ "epoch": 17.88,
519
+ "learning_rate": 3.5134577870480735e-05,
520
+ "loss": 0.0024,
521
+ "step": 30000
522
+ },
523
+ {
524
+ "epoch": 18.0,
525
+ "eval_exact_match": 29.5732,
526
+ "eval_loss": 0.14079181849956512,
527
+ "eval_runtime": 1472.2748,
528
+ "eval_samples_per_second": 0.669,
529
+ "eval_steps_per_second": 0.168,
530
+ "step": 30204
531
+ },
532
+ {
533
+ "epoch": 18.18,
534
+ "learning_rate": 3.488676301152166e-05,
535
+ "loss": 0.0021,
536
+ "step": 30500
537
+ },
538
+ {
539
+ "epoch": 18.47,
540
+ "learning_rate": 3.4638948152562576e-05,
541
+ "loss": 0.0024,
542
+ "step": 31000
543
+ },
544
+ {
545
+ "epoch": 18.77,
546
+ "learning_rate": 3.43911332936035e-05,
547
+ "loss": 0.0019,
548
+ "step": 31500
549
+ },
550
+ {
551
+ "epoch": 19.0,
552
+ "eval_exact_match": 28.7602,
553
+ "eval_loss": 0.14403322339057922,
554
+ "eval_runtime": 1414.577,
555
+ "eval_samples_per_second": 0.696,
556
+ "eval_steps_per_second": 0.175,
557
+ "step": 31882
558
+ },
559
+ {
560
+ "epoch": 19.07,
561
+ "learning_rate": 3.414331843464442e-05,
562
+ "loss": 0.002,
563
+ "step": 32000
564
+ },
565
+ {
566
+ "epoch": 19.37,
567
+ "learning_rate": 3.389550357568534e-05,
568
+ "loss": 0.002,
569
+ "step": 32500
570
+ },
571
+ {
572
+ "epoch": 19.67,
573
+ "learning_rate": 3.3648184346444184e-05,
574
+ "loss": 0.0021,
575
+ "step": 33000
576
+ },
577
+ {
578
+ "epoch": 19.96,
579
+ "learning_rate": 3.34003694874851e-05,
580
+ "loss": 0.0017,
581
+ "step": 33500
582
+ },
583
+ {
584
+ "epoch": 20.0,
585
+ "eval_exact_match": 28.6585,
586
+ "eval_loss": 0.14581115543842316,
587
+ "eval_runtime": 1719.5939,
588
+ "eval_samples_per_second": 0.573,
589
+ "eval_steps_per_second": 0.144,
590
+ "step": 33560
591
+ },
592
+ {
593
+ "epoch": 20.26,
594
+ "learning_rate": 3.315255462852603e-05,
595
+ "loss": 0.0016,
596
+ "step": 34000
597
+ },
598
+ {
599
+ "epoch": 20.56,
600
+ "learning_rate": 3.290523539928487e-05,
601
+ "loss": 0.0017,
602
+ "step": 34500
603
+ },
604
+ {
605
+ "epoch": 20.86,
606
+ "learning_rate": 3.2657420540325785e-05,
607
+ "loss": 0.0017,
608
+ "step": 35000
609
+ },
610
+ {
611
+ "epoch": 21.0,
612
+ "eval_exact_match": 29.6748,
613
+ "eval_loss": 0.1457909643650055,
614
+ "eval_runtime": 1424.642,
615
+ "eval_samples_per_second": 0.691,
616
+ "eval_steps_per_second": 0.173,
617
+ "step": 35238
618
+ },
619
+ {
620
+ "epoch": 21.16,
621
+ "learning_rate": 3.240960568136671e-05,
622
+ "loss": 0.0016,
623
+ "step": 35500
624
+ },
625
+ {
626
+ "epoch": 21.45,
627
+ "learning_rate": 3.2161790822407626e-05,
628
+ "loss": 0.0015,
629
+ "step": 36000
630
+ },
631
+ {
632
+ "epoch": 21.75,
633
+ "learning_rate": 3.191397596344855e-05,
634
+ "loss": 0.0014,
635
+ "step": 36500
636
+ },
637
+ {
638
+ "epoch": 22.0,
639
+ "eval_exact_match": 28.7602,
640
+ "eval_loss": 0.14809423685073853,
641
+ "eval_runtime": 1802.4045,
642
+ "eval_samples_per_second": 0.546,
643
+ "eval_steps_per_second": 0.137,
644
+ "step": 36916
645
+ },
646
+ {
647
+ "epoch": 22.05,
648
+ "learning_rate": 3.1666161104489475e-05,
649
+ "loss": 0.0014,
650
+ "step": 37000
651
+ },
652
+ {
653
+ "epoch": 22.35,
654
+ "learning_rate": 3.14183462455304e-05,
655
+ "loss": 0.0014,
656
+ "step": 37500
657
+ },
658
+ {
659
+ "epoch": 22.65,
660
+ "learning_rate": 3.1170531386571316e-05,
661
+ "loss": 0.0015,
662
+ "step": 38000
663
+ },
664
+ {
665
+ "epoch": 22.94,
666
+ "learning_rate": 3.092271652761224e-05,
667
+ "loss": 0.0013,
668
+ "step": 38500
669
+ },
670
+ {
671
+ "epoch": 23.0,
672
+ "eval_exact_match": 29.878,
673
+ "eval_loss": 0.14681237936019897,
674
+ "eval_runtime": 1291.1919,
675
+ "eval_samples_per_second": 0.763,
676
+ "eval_steps_per_second": 0.191,
677
+ "step": 38594
678
+ },
679
+ {
680
+ "epoch": 23.24,
681
+ "learning_rate": 3.0675397298371075e-05,
682
+ "loss": 0.0013,
683
+ "step": 39000
684
+ },
685
+ {
686
+ "epoch": 23.54,
687
+ "learning_rate": 3.0427582439411996e-05,
688
+ "loss": 0.0014,
689
+ "step": 39500
690
+ },
691
+ {
692
+ "epoch": 23.84,
693
+ "learning_rate": 3.0179767580452917e-05,
694
+ "loss": 0.0012,
695
+ "step": 40000
696
+ },
697
+ {
698
+ "epoch": 24.0,
699
+ "eval_exact_match": 27.1341,
700
+ "eval_loss": 0.14823853969573975,
701
+ "eval_runtime": 1532.4551,
702
+ "eval_samples_per_second": 0.643,
703
+ "eval_steps_per_second": 0.161,
704
+ "step": 40272
705
+ },
706
+ {
707
+ "epoch": 24.14,
708
+ "learning_rate": 2.9931952721493838e-05,
709
+ "loss": 0.001,
710
+ "step": 40500
711
+ },
712
+ {
713
+ "epoch": 24.43,
714
+ "learning_rate": 2.9685129121970594e-05,
715
+ "loss": 0.0011,
716
+ "step": 41000
717
+ },
718
+ {
719
+ "epoch": 24.73,
720
+ "learning_rate": 2.9437314263011518e-05,
721
+ "loss": 0.001,
722
+ "step": 41500
723
+ },
724
+ {
725
+ "epoch": 25.0,
726
+ "eval_exact_match": 29.1667,
727
+ "eval_loss": 0.1513073891401291,
728
+ "eval_runtime": 1636.7658,
729
+ "eval_samples_per_second": 0.602,
730
+ "eval_steps_per_second": 0.151,
731
+ "step": 41950
732
+ },
733
+ {
734
+ "epoch": 25.03,
735
+ "learning_rate": 2.918949940405244e-05,
736
+ "loss": 0.0011,
737
+ "step": 42000
738
+ },
739
+ {
740
+ "epoch": 25.33,
741
+ "learning_rate": 2.8941684545093363e-05,
742
+ "loss": 0.001,
743
+ "step": 42500
744
+ },
745
+ {
746
+ "epoch": 25.63,
747
+ "learning_rate": 2.869386968613429e-05,
748
+ "loss": 0.001,
749
+ "step": 43000
750
+ },
751
+ {
752
+ "epoch": 25.92,
753
+ "learning_rate": 2.8446550456893122e-05,
754
+ "loss": 0.0011,
755
+ "step": 43500
756
+ },
757
+ {
758
+ "epoch": 26.0,
759
+ "eval_exact_match": 29.3699,
760
+ "eval_loss": 0.15289542078971863,
761
+ "eval_runtime": 1668.6258,
762
+ "eval_samples_per_second": 0.59,
763
+ "eval_steps_per_second": 0.148,
764
+ "step": 43628
765
+ },
766
+ {
767
+ "epoch": 26.22,
768
+ "learning_rate": 2.819873559793405e-05,
769
+ "loss": 0.0013,
770
+ "step": 44000
771
+ },
772
+ {
773
+ "epoch": 26.52,
774
+ "learning_rate": 2.7950920738974974e-05,
775
+ "loss": 0.0009,
776
+ "step": 44500
777
+ },
778
+ {
779
+ "epoch": 26.82,
780
+ "learning_rate": 2.7703105880015888e-05,
781
+ "loss": 0.0008,
782
+ "step": 45000
783
+ },
784
+ {
785
+ "epoch": 27.0,
786
+ "eval_exact_match": 28.7602,
787
+ "eval_loss": 0.1510438770055771,
788
+ "eval_runtime": 1512.0615,
789
+ "eval_samples_per_second": 0.651,
790
+ "eval_steps_per_second": 0.163,
791
+ "step": 45306
792
+ },
793
+ {
794
+ "epoch": 27.12,
795
+ "learning_rate": 2.7455291021056812e-05,
796
+ "loss": 0.0008,
797
+ "step": 45500
798
+ },
799
+ {
800
+ "epoch": 27.41,
801
+ "learning_rate": 2.7207476162097733e-05,
802
+ "loss": 0.0009,
803
+ "step": 46000
804
+ },
805
+ {
806
+ "epoch": 27.71,
807
+ "learning_rate": 2.6959661303138657e-05,
808
+ "loss": 0.0009,
809
+ "step": 46500
810
+ },
811
+ {
812
+ "epoch": 28.0,
813
+ "eval_exact_match": 29.5732,
814
+ "eval_loss": 0.15132220089435577,
815
+ "eval_runtime": 1581.6848,
816
+ "eval_samples_per_second": 0.623,
817
+ "eval_steps_per_second": 0.156,
818
+ "step": 46984
819
+ },
820
+ {
821
+ "epoch": 28.01,
822
+ "learning_rate": 2.671184644417957e-05,
823
+ "loss": 0.0008,
824
+ "step": 47000
825
+ },
826
+ {
827
+ "epoch": 28.31,
828
+ "learning_rate": 2.64640315852205e-05,
829
+ "loss": 0.0007,
830
+ "step": 47500
831
+ },
832
+ {
833
+ "epoch": 28.61,
834
+ "learning_rate": 2.621671235597934e-05,
835
+ "loss": 0.0009,
836
+ "step": 48000
837
+ },
838
+ {
839
+ "epoch": 28.9,
840
+ "learning_rate": 2.596889749702026e-05,
841
+ "loss": 0.0008,
842
+ "step": 48500
843
+ },
844
+ {
845
+ "epoch": 29.0,
846
+ "eval_exact_match": 29.6748,
847
+ "eval_loss": 0.15183404088020325,
848
+ "eval_runtime": 1477.4592,
849
+ "eval_samples_per_second": 0.667,
850
+ "eval_steps_per_second": 0.167,
851
+ "step": 48662
852
+ },
853
+ {
854
+ "epoch": 29.2,
855
+ "learning_rate": 2.5721082638061182e-05,
856
+ "loss": 0.0008,
857
+ "step": 49000
858
+ },
859
+ {
860
+ "epoch": 29.5,
861
+ "learning_rate": 2.5473763408820024e-05,
862
+ "loss": 0.0008,
863
+ "step": 49500
864
+ },
865
+ {
866
+ "epoch": 29.8,
867
+ "learning_rate": 2.5225948549860945e-05,
868
+ "loss": 0.0006,
869
+ "step": 50000
870
+ },
871
+ {
872
+ "epoch": 30.0,
873
+ "eval_exact_match": 28.252,
874
+ "eval_loss": 0.15343768894672394,
875
+ "eval_runtime": 1700.6429,
876
+ "eval_samples_per_second": 0.579,
877
+ "eval_steps_per_second": 0.145,
878
+ "step": 50340
879
+ },
880
+ {
881
+ "epoch": 30.1,
882
+ "learning_rate": 2.4979124950337704e-05,
883
+ "loss": 0.0008,
884
+ "step": 50500
885
+ },
886
+ {
887
+ "epoch": 30.39,
888
+ "learning_rate": 2.4731310091378625e-05,
889
+ "loss": 0.0007,
890
+ "step": 51000
891
+ },
892
+ {
893
+ "epoch": 30.69,
894
+ "learning_rate": 2.4483495232419546e-05,
895
+ "loss": 0.0005,
896
+ "step": 51500
897
+ },
898
+ {
899
+ "epoch": 30.99,
900
+ "learning_rate": 2.4235680373460466e-05,
901
+ "loss": 0.0008,
902
+ "step": 52000
903
+ },
904
+ {
905
+ "epoch": 31.0,
906
+ "eval_exact_match": 30.1829,
907
+ "eval_loss": 0.15716703236103058,
908
+ "eval_runtime": 1647.0959,
909
+ "eval_samples_per_second": 0.598,
910
+ "eval_steps_per_second": 0.15,
911
+ "step": 52018
912
+ },
913
+ {
914
+ "epoch": 31.29,
915
+ "learning_rate": 2.3987865514501387e-05,
916
+ "loss": 0.0006,
917
+ "step": 52500
918
+ },
919
+ {
920
+ "epoch": 31.59,
921
+ "learning_rate": 2.3740050655542308e-05,
922
+ "loss": 0.0006,
923
+ "step": 53000
924
+ },
925
+ {
926
+ "epoch": 31.88,
927
+ "learning_rate": 2.3492235796583236e-05,
928
+ "loss": 0.0006,
929
+ "step": 53500
930
+ },
931
+ {
932
+ "epoch": 32.0,
933
+ "eval_exact_match": 28.6585,
934
+ "eval_loss": 0.15596693754196167,
935
+ "eval_runtime": 1847.1524,
936
+ "eval_samples_per_second": 0.533,
937
+ "eval_steps_per_second": 0.134,
938
+ "step": 53696
939
+ },
940
+ {
941
+ "epoch": 32.18,
942
+ "learning_rate": 2.3244420937624156e-05,
943
+ "loss": 0.0005,
944
+ "step": 54000
945
+ },
946
+ {
947
+ "epoch": 32.48,
948
+ "learning_rate": 2.2996606078665077e-05,
949
+ "loss": 0.0006,
950
+ "step": 54500
951
+ },
952
+ {
953
+ "epoch": 32.78,
954
+ "learning_rate": 2.2748791219705998e-05,
955
+ "loss": 0.0005,
956
+ "step": 55000
957
+ },
958
+ {
959
+ "epoch": 33.0,
960
+ "eval_exact_match": 28.4553,
961
+ "eval_loss": 0.15410810708999634,
962
+ "eval_runtime": 1607.5799,
963
+ "eval_samples_per_second": 0.613,
964
+ "eval_steps_per_second": 0.154,
965
+ "step": 55374
966
+ },
967
+ {
968
+ "epoch": 33.08,
969
+ "learning_rate": 2.250097636074692e-05,
970
+ "loss": 0.0006,
971
+ "step": 55500
972
+ },
973
+ {
974
+ "epoch": 33.37,
975
+ "learning_rate": 2.225316150178784e-05,
976
+ "loss": 0.0005,
977
+ "step": 56000
978
+ },
979
+ {
980
+ "epoch": 33.67,
981
+ "learning_rate": 2.200534664282876e-05,
982
+ "loss": 0.0005,
983
+ "step": 56500
984
+ },
985
+ {
986
+ "epoch": 33.97,
987
+ "learning_rate": 2.175753178386968e-05,
988
+ "loss": 0.0005,
989
+ "step": 57000
990
+ },
991
+ {
992
+ "epoch": 34.0,
993
+ "eval_exact_match": 28.4553,
994
+ "eval_loss": 0.15642932057380676,
995
+ "eval_runtime": 1995.8486,
996
+ "eval_samples_per_second": 0.494,
997
+ "eval_steps_per_second": 0.124,
998
+ "step": 57052
999
+ },
1000
+ {
1001
+ "epoch": 34.27,
1002
+ "learning_rate": 2.150971692491061e-05,
1003
+ "loss": 0.0005,
1004
+ "step": 57500
1005
+ },
1006
+ {
1007
+ "epoch": 34.56,
1008
+ "learning_rate": 2.126190206595153e-05,
1009
+ "loss": 0.0004,
1010
+ "step": 58000
1011
+ },
1012
+ {
1013
+ "epoch": 34.86,
1014
+ "learning_rate": 2.101408720699245e-05,
1015
+ "loss": 0.0004,
1016
+ "step": 58500
1017
+ },
1018
+ {
1019
+ "epoch": 35.0,
1020
+ "eval_exact_match": 28.7602,
1021
+ "eval_loss": 0.15773606300354004,
1022
+ "eval_runtime": 1522.3573,
1023
+ "eval_samples_per_second": 0.647,
1024
+ "eval_steps_per_second": 0.162,
1025
+ "step": 58730
1026
+ },
1027
+ {
1028
+ "epoch": 35.16,
1029
+ "learning_rate": 2.0766767977751292e-05,
1030
+ "loss": 0.0005,
1031
+ "step": 59000
1032
+ },
1033
+ {
1034
+ "epoch": 35.46,
1035
+ "learning_rate": 2.051944874851013e-05,
1036
+ "loss": 0.0004,
1037
+ "step": 59500
1038
+ },
1039
+ {
1040
+ "epoch": 35.76,
1041
+ "learning_rate": 2.027163388955105e-05,
1042
+ "loss": 0.0004,
1043
+ "step": 60000
1044
+ },
1045
+ {
1046
+ "epoch": 36.0,
1047
+ "eval_exact_match": 28.252,
1048
+ "eval_loss": 0.15739889442920685,
1049
+ "eval_runtime": 1470.8721,
1050
+ "eval_samples_per_second": 0.67,
1051
+ "eval_steps_per_second": 0.168,
1052
+ "step": 60408
1053
+ },
1054
+ {
1055
+ "epoch": 36.05,
1056
+ "learning_rate": 2.0024314660309893e-05,
1057
+ "loss": 0.0005,
1058
+ "step": 60500
1059
+ },
1060
+ {
1061
+ "epoch": 36.35,
1062
+ "learning_rate": 1.9776499801350814e-05,
1063
+ "loss": 0.0004,
1064
+ "step": 61000
1065
+ },
1066
+ {
1067
+ "epoch": 36.65,
1068
+ "learning_rate": 1.9528684942391735e-05,
1069
+ "loss": 0.0004,
1070
+ "step": 61500
1071
+ },
1072
+ {
1073
+ "epoch": 36.95,
1074
+ "learning_rate": 1.9280870083432655e-05,
1075
+ "loss": 0.0004,
1076
+ "step": 62000
1077
+ },
1078
+ {
1079
+ "epoch": 37.0,
1080
+ "eval_exact_match": 26.7276,
1081
+ "eval_loss": 0.15798313915729523,
1082
+ "eval_runtime": 1545.6144,
1083
+ "eval_samples_per_second": 0.637,
1084
+ "eval_steps_per_second": 0.16,
1085
+ "step": 62086
1086
+ },
1087
+ {
1088
+ "epoch": 37.25,
1089
+ "learning_rate": 1.9033055224473576e-05,
1090
+ "loss": 0.0003,
1091
+ "step": 62500
1092
+ },
1093
+ {
1094
+ "epoch": 37.54,
1095
+ "learning_rate": 1.8785735995232418e-05,
1096
+ "loss": 0.0004,
1097
+ "step": 63000
1098
+ },
1099
+ {
1100
+ "epoch": 37.84,
1101
+ "learning_rate": 1.853792113627334e-05,
1102
+ "loss": 0.0004,
1103
+ "step": 63500
1104
+ },
1105
+ {
1106
+ "epoch": 38.0,
1107
+ "eval_exact_match": 29.3699,
1108
+ "eval_loss": 0.159205362200737,
1109
+ "eval_runtime": 2036.6907,
1110
+ "eval_samples_per_second": 0.484,
1111
+ "eval_steps_per_second": 0.121,
1112
+ "step": 63764
1113
+ },
1114
+ {
1115
+ "epoch": 38.14,
1116
+ "learning_rate": 1.8290106277314266e-05,
1117
+ "loss": 0.0003,
1118
+ "step": 64000
1119
+ },
1120
+ {
1121
+ "epoch": 38.44,
1122
+ "learning_rate": 1.8043778307508936e-05,
1123
+ "loss": 0.0004,
1124
+ "step": 64500
1125
+ },
1126
+ {
1127
+ "epoch": 38.74,
1128
+ "learning_rate": 1.779596344854986e-05,
1129
+ "loss": 0.0003,
1130
+ "step": 65000
1131
+ },
1132
+ {
1133
+ "epoch": 39.0,
1134
+ "eval_exact_match": 29.065,
1135
+ "eval_loss": 0.15887553989887238,
1136
+ "eval_runtime": 1474.6108,
1137
+ "eval_samples_per_second": 0.668,
1138
+ "eval_steps_per_second": 0.168,
1139
+ "step": 65442
1140
+ },
1141
+ {
1142
+ "epoch": 39.03,
1143
+ "learning_rate": 1.754814858959078e-05,
1144
+ "loss": 0.0004,
1145
+ "step": 65500
1146
+ },
1147
+ {
1148
+ "epoch": 39.33,
1149
+ "learning_rate": 1.7300333730631702e-05,
1150
+ "loss": 0.0002,
1151
+ "step": 66000
1152
+ },
1153
+ {
1154
+ "epoch": 39.63,
1155
+ "learning_rate": 1.7052518871672626e-05,
1156
+ "loss": 0.0003,
1157
+ "step": 66500
1158
+ },
1159
+ {
1160
+ "epoch": 39.93,
1161
+ "learning_rate": 1.6804704012713547e-05,
1162
+ "loss": 0.0003,
1163
+ "step": 67000
1164
+ },
1165
+ {
1166
+ "epoch": 40.0,
1167
+ "eval_exact_match": 28.4553,
1168
+ "eval_loss": 0.16181451082229614,
1169
+ "eval_runtime": 1902.6668,
1170
+ "eval_samples_per_second": 0.518,
1171
+ "eval_steps_per_second": 0.13,
1172
+ "step": 67120
1173
+ },
1174
+ {
1175
+ "epoch": 40.23,
1176
+ "learning_rate": 1.6556889153754468e-05,
1177
+ "loss": 0.0003,
1178
+ "step": 67500
1179
+ },
1180
+ {
1181
+ "epoch": 40.52,
1182
+ "learning_rate": 1.6309074294795392e-05,
1183
+ "loss": 0.0002,
1184
+ "step": 68000
1185
+ },
1186
+ {
1187
+ "epoch": 40.82,
1188
+ "learning_rate": 1.6061259435836313e-05,
1189
+ "loss": 0.0002,
1190
+ "step": 68500
1191
+ },
1192
+ {
1193
+ "epoch": 41.0,
1194
+ "eval_exact_match": 29.4715,
1195
+ "eval_loss": 0.1619289368391037,
1196
+ "eval_runtime": 1699.0156,
1197
+ "eval_samples_per_second": 0.58,
1198
+ "eval_steps_per_second": 0.145,
1199
+ "step": 68798
1200
+ },
1201
+ {
1202
+ "epoch": 41.12,
1203
+ "learning_rate": 1.5813444576877234e-05,
1204
+ "loss": 0.0003,
1205
+ "step": 69000
1206
+ },
1207
+ {
1208
+ "epoch": 41.42,
1209
+ "learning_rate": 1.5565629717918155e-05,
1210
+ "loss": 0.0002,
1211
+ "step": 69500
1212
+ },
1213
+ {
1214
+ "epoch": 41.72,
1215
+ "learning_rate": 1.5317814858959076e-05,
1216
+ "loss": 0.0002,
1217
+ "step": 70000
1218
+ },
1219
+ {
1220
+ "epoch": 42.0,
1221
+ "eval_exact_match": 27.6423,
1222
+ "eval_loss": 0.15936070680618286,
1223
+ "eval_runtime": 1667.2618,
1224
+ "eval_samples_per_second": 0.591,
1225
+ "eval_steps_per_second": 0.148,
1226
+ "step": 70476
1227
+ },
1228
+ {
1229
+ "epoch": 42.01,
1230
+ "learning_rate": 1.5070000000000003e-05,
1231
+ "loss": 0.0003,
1232
+ "step": 70500
1233
+ },
1234
+ {
1235
+ "epoch": 42.31,
1236
+ "learning_rate": 1.4822185141040922e-05,
1237
+ "loss": 0.0002,
1238
+ "step": 71000
1239
+ },
1240
+ {
1241
+ "epoch": 42.61,
1242
+ "learning_rate": 1.4574370282081845e-05,
1243
+ "loss": 0.0002,
1244
+ "step": 71500
1245
+ },
1246
+ {
1247
+ "epoch": 42.91,
1248
+ "learning_rate": 1.4326555423122764e-05,
1249
+ "loss": 0.0002,
1250
+ "step": 72000
1251
+ },
1252
+ {
1253
+ "epoch": 43.0,
1254
+ "eval_exact_match": 27.8455,
1255
+ "eval_loss": 0.16002264618873596,
1256
+ "eval_runtime": 2084.2416,
1257
+ "eval_samples_per_second": 0.473,
1258
+ "eval_steps_per_second": 0.119,
1259
+ "step": 72154
1260
+ },
1261
+ {
1262
+ "epoch": 43.21,
1263
+ "learning_rate": 1.4078740564163686e-05,
1264
+ "loss": 0.0002,
1265
+ "step": 72500
1266
+ },
1267
+ {
1268
+ "epoch": 43.5,
1269
+ "learning_rate": 1.3830925705204605e-05,
1270
+ "loss": 0.0002,
1271
+ "step": 73000
1272
+ },
1273
+ {
1274
+ "epoch": 43.8,
1275
+ "learning_rate": 1.3583110846245531e-05,
1276
+ "loss": 0.0002,
1277
+ "step": 73500
1278
+ },
1279
+ {
1280
+ "epoch": 44.0,
1281
+ "eval_exact_match": 29.5732,
1282
+ "eval_loss": 0.1613691747188568,
1283
+ "eval_runtime": 1688.4017,
1284
+ "eval_samples_per_second": 0.583,
1285
+ "eval_steps_per_second": 0.146,
1286
+ "step": 73832
1287
+ },
1288
+ {
1289
+ "epoch": 44.1,
1290
+ "learning_rate": 1.3335295987286456e-05,
1291
+ "loss": 0.0002,
1292
+ "step": 74000
1293
+ },
1294
+ {
1295
+ "epoch": 44.4,
1296
+ "learning_rate": 1.3087481128327375e-05,
1297
+ "loss": 0.0001,
1298
+ "step": 74500
1299
+ },
1300
+ {
1301
+ "epoch": 44.7,
1302
+ "learning_rate": 1.2839666269368297e-05,
1303
+ "loss": 0.0002,
1304
+ "step": 75000
1305
+ },
1306
+ {
1307
+ "epoch": 44.99,
1308
+ "learning_rate": 1.2591851410409216e-05,
1309
+ "loss": 0.0002,
1310
+ "step": 75500
1311
+ },
1312
+ {
1313
+ "epoch": 45.0,
1314
+ "eval_exact_match": 30.4878,
1315
+ "eval_loss": 0.16062164306640625,
1316
+ "eval_runtime": 1456.9794,
1317
+ "eval_samples_per_second": 0.676,
1318
+ "eval_steps_per_second": 0.17,
1319
+ "step": 75510
1320
+ },
1321
+ {
1322
+ "epoch": 45.29,
1323
+ "learning_rate": 1.2344036551450139e-05,
1324
+ "loss": 0.0001,
1325
+ "step": 76000
1326
+ },
1327
+ {
1328
+ "epoch": 45.59,
1329
+ "learning_rate": 1.209671732220898e-05,
1330
+ "loss": 0.0002,
1331
+ "step": 76500
1332
+ },
1333
+ {
1334
+ "epoch": 45.89,
1335
+ "learning_rate": 1.1848902463249901e-05,
1336
+ "loss": 0.0002,
1337
+ "step": 77000
1338
+ },
1339
+ {
1340
+ "epoch": 46.0,
1341
+ "eval_exact_match": 28.4553,
1342
+ "eval_loss": 0.16334177553653717,
1343
+ "eval_runtime": 1424.3349,
1344
+ "eval_samples_per_second": 0.692,
1345
+ "eval_steps_per_second": 0.173,
1346
+ "step": 77188
1347
+ },
1348
+ {
1349
+ "epoch": 46.19,
1350
+ "learning_rate": 1.1601087604290822e-05,
1351
+ "loss": 0.0001,
1352
+ "step": 77500
1353
+ },
1354
+ {
1355
+ "epoch": 46.48,
1356
+ "learning_rate": 1.1353272745331743e-05,
1357
+ "loss": 0.0001,
1358
+ "step": 78000
1359
+ },
1360
+ {
1361
+ "epoch": 46.78,
1362
+ "learning_rate": 1.1105457886372669e-05,
1363
+ "loss": 0.0001,
1364
+ "step": 78500
1365
+ },
1366
+ {
1367
+ "epoch": 47.0,
1368
+ "eval_exact_match": 29.7764,
1369
+ "eval_loss": 0.16305780410766602,
1370
+ "eval_runtime": 1532.407,
1371
+ "eval_samples_per_second": 0.643,
1372
+ "eval_steps_per_second": 0.161,
1373
+ "step": 78866
1374
+ },
1375
+ {
1376
+ "epoch": 47.08,
1377
+ "learning_rate": 1.0858138657131504e-05,
1378
+ "loss": 0.0001,
1379
+ "step": 79000
1380
+ },
1381
+ {
1382
+ "epoch": 47.38,
1383
+ "learning_rate": 1.0610323798172431e-05,
1384
+ "loss": 0.0001,
1385
+ "step": 79500
1386
+ },
1387
+ {
1388
+ "epoch": 47.68,
1389
+ "learning_rate": 1.0362508939213352e-05,
1390
+ "loss": 0.0001,
1391
+ "step": 80000
1392
+ },
1393
+ {
1394
+ "epoch": 47.97,
1395
+ "learning_rate": 1.0114694080254273e-05,
1396
+ "loss": 0.0001,
1397
+ "step": 80500
1398
+ },
1399
+ {
1400
+ "epoch": 48.0,
1401
+ "eval_exact_match": 28.7602,
1402
+ "eval_loss": 0.1653144359588623,
1403
+ "eval_runtime": 1420.3224,
1404
+ "eval_samples_per_second": 0.694,
1405
+ "eval_steps_per_second": 0.174,
1406
+ "step": 80544
1407
+ },
1408
+ {
1409
+ "epoch": 48.27,
1410
+ "learning_rate": 9.866879221295194e-06,
1411
+ "loss": 0.0001,
1412
+ "step": 81000
1413
+ },
1414
+ {
1415
+ "epoch": 48.57,
1416
+ "learning_rate": 9.619064362336115e-06,
1417
+ "loss": 0.0001,
1418
+ "step": 81500
1419
+ },
1420
+ {
1421
+ "epoch": 48.87,
1422
+ "learning_rate": 9.371249503377035e-06,
1423
+ "loss": 0.0001,
1424
+ "step": 82000
1425
+ },
1426
+ {
1427
+ "epoch": 49.0,
1428
+ "eval_exact_match": 28.5569,
1429
+ "eval_loss": 0.16365119814872742,
1430
+ "eval_runtime": 1693.8317,
1431
+ "eval_samples_per_second": 0.582,
1432
+ "eval_steps_per_second": 0.146,
1433
+ "step": 82222
1434
+ },
1435
+ {
1436
+ "epoch": 49.17,
1437
+ "learning_rate": 9.124921533571714e-06,
1438
+ "loss": 0.0001,
1439
+ "step": 82500
1440
+ },
1441
+ {
1442
+ "epoch": 49.46,
1443
+ "learning_rate": 8.877106674612636e-06,
1444
+ "loss": 0.0001,
1445
+ "step": 83000
1446
+ },
1447
+ {
1448
+ "epoch": 49.76,
1449
+ "learning_rate": 8.629291815653557e-06,
1450
+ "loss": 0.0001,
1451
+ "step": 83500
1452
+ },
1453
+ {
1454
+ "epoch": 50.0,
1455
+ "eval_exact_match": 28.7602,
1456
+ "eval_loss": 0.1648363322019577,
1457
+ "eval_runtime": 1491.6451,
1458
+ "eval_samples_per_second": 0.66,
1459
+ "eval_steps_per_second": 0.166,
1460
+ "step": 83900
1461
+ },
1462
+ {
1463
+ "epoch": 50.06,
1464
+ "learning_rate": 8.381476956694478e-06,
1465
+ "loss": 0.0001,
1466
+ "step": 84000
1467
+ },
1468
+ {
1469
+ "epoch": 50.36,
1470
+ "learning_rate": 8.133662097735399e-06,
1471
+ "loss": 0.0001,
1472
+ "step": 84500
1473
+ },
1474
+ {
1475
+ "epoch": 50.66,
1476
+ "learning_rate": 7.88584723877632e-06,
1477
+ "loss": 0.0001,
1478
+ "step": 85000
1479
+ },
1480
+ {
1481
+ "epoch": 50.95,
1482
+ "learning_rate": 7.638032379817246e-06,
1483
+ "loss": 0.0001,
1484
+ "step": 85500
1485
+ },
1486
+ {
1487
+ "epoch": 51.0,
1488
+ "eval_exact_match": 29.5732,
1489
+ "eval_loss": 0.16499294340610504,
1490
+ "eval_runtime": 1816.3605,
1491
+ "eval_samples_per_second": 0.542,
1492
+ "eval_steps_per_second": 0.136,
1493
+ "step": 85578
1494
+ },
1495
+ {
1496
+ "epoch": 51.25,
1497
+ "learning_rate": 7.390217520858167e-06,
1498
+ "loss": 0.0001,
1499
+ "step": 86000
1500
+ },
1501
+ {
1502
+ "epoch": 51.55,
1503
+ "learning_rate": 7.142402661899088e-06,
1504
+ "loss": 0.0001,
1505
+ "step": 86500
1506
+ },
1507
+ {
1508
+ "epoch": 51.85,
1509
+ "learning_rate": 6.894587802940009e-06,
1510
+ "loss": 0.0001,
1511
+ "step": 87000
1512
+ },
1513
+ {
1514
+ "epoch": 52.0,
1515
+ "eval_exact_match": 30.0813,
1516
+ "eval_loss": 0.1663326919078827,
1517
+ "eval_runtime": 1639.0771,
1518
+ "eval_samples_per_second": 0.601,
1519
+ "eval_steps_per_second": 0.151,
1520
+ "step": 87256
1521
+ },
1522
+ {
1523
+ "epoch": 52.15,
1524
+ "learning_rate": 6.64677294398093e-06,
1525
+ "loss": 0.0001,
1526
+ "step": 87500
1527
+ },
1528
+ {
1529
+ "epoch": 52.44,
1530
+ "learning_rate": 6.3989580850218504e-06,
1531
+ "loss": 0.0001,
1532
+ "step": 88000
1533
+ },
1534
+ {
1535
+ "epoch": 52.74,
1536
+ "learning_rate": 6.151143226062771e-06,
1537
+ "loss": 0.0001,
1538
+ "step": 88500
1539
+ },
1540
+ {
1541
+ "epoch": 53.0,
1542
+ "eval_exact_match": 29.2683,
1543
+ "eval_loss": 0.1655188500881195,
1544
+ "eval_runtime": 1906.3649,
1545
+ "eval_samples_per_second": 0.517,
1546
+ "eval_steps_per_second": 0.13,
1547
+ "step": 88934
1548
+ },
1549
+ {
1550
+ "epoch": 53.04,
1551
+ "learning_rate": 5.903328367103692e-06,
1552
+ "loss": 0.0001,
1553
+ "step": 89000
1554
+ },
1555
+ {
1556
+ "epoch": 53.34,
1557
+ "learning_rate": 5.655513508144619e-06,
1558
+ "loss": 0.0001,
1559
+ "step": 89500
1560
+ },
1561
+ {
1562
+ "epoch": 53.64,
1563
+ "learning_rate": 5.40769864918554e-06,
1564
+ "loss": 0.0001,
1565
+ "step": 90000
1566
+ },
1567
+ {
1568
+ "epoch": 53.93,
1569
+ "learning_rate": 5.159883790226461e-06,
1570
+ "loss": 0.0,
1571
+ "step": 90500
1572
+ },
1573
+ {
1574
+ "epoch": 54.0,
1575
+ "eval_exact_match": 29.4715,
1576
+ "eval_loss": 0.16723661124706268,
1577
+ "eval_runtime": 1751.4211,
1578
+ "eval_samples_per_second": 0.562,
1579
+ "eval_steps_per_second": 0.141,
1580
+ "step": 90612
1581
+ },
1582
+ {
1583
+ "epoch": 54.23,
1584
+ "learning_rate": 4.912564560985301e-06,
1585
+ "loss": 0.0,
1586
+ "step": 91000
1587
+ },
1588
+ {
1589
+ "epoch": 54.53,
1590
+ "learning_rate": 4.664749702026222e-06,
1591
+ "loss": 0.0,
1592
+ "step": 91500
1593
+ },
1594
+ {
1595
+ "epoch": 54.83,
1596
+ "learning_rate": 4.416934843067143e-06,
1597
+ "loss": 0.0,
1598
+ "step": 92000
1599
+ },
1600
+ {
1601
+ "epoch": 55.0,
1602
+ "eval_exact_match": 30.1829,
1603
+ "eval_loss": 0.1663067787885666,
1604
+ "eval_runtime": 1758.7727,
1605
+ "eval_samples_per_second": 0.56,
1606
+ "eval_steps_per_second": 0.14,
1607
+ "step": 92290
1608
+ },
1609
+ {
1610
+ "epoch": 55.13,
1611
+ "learning_rate": 4.169615613825984e-06,
1612
+ "loss": 0.0,
1613
+ "step": 92500
1614
+ },
1615
+ {
1616
+ "epoch": 55.42,
1617
+ "learning_rate": 3.921800754866905e-06,
1618
+ "loss": 0.0,
1619
+ "step": 93000
1620
+ },
1621
+ {
1622
+ "epoch": 55.72,
1623
+ "learning_rate": 3.6739858959078254e-06,
1624
+ "loss": 0.0,
1625
+ "step": 93500
1626
+ },
1627
+ {
1628
+ "epoch": 56.0,
1629
+ "eval_exact_match": 30.0813,
1630
+ "eval_loss": 0.16646040976047516,
1631
+ "eval_runtime": 1568.4573,
1632
+ "eval_samples_per_second": 0.628,
1633
+ "eval_steps_per_second": 0.157,
1634
+ "step": 93968
1635
+ },
1636
+ {
1637
+ "epoch": 56.02,
1638
+ "learning_rate": 3.4261710369487462e-06,
1639
+ "loss": 0.0,
1640
+ "step": 94000
1641
+ },
1642
+ {
1643
+ "epoch": 56.32,
1644
+ "learning_rate": 3.178356177989667e-06,
1645
+ "loss": 0.0,
1646
+ "step": 94500
1647
+ },
1648
+ {
1649
+ "epoch": 56.62,
1650
+ "learning_rate": 2.930541319030594e-06,
1651
+ "loss": 0.0,
1652
+ "step": 95000
1653
+ },
1654
+ {
1655
+ "epoch": 56.91,
1656
+ "learning_rate": 2.682726460071515e-06,
1657
+ "loss": 0.0,
1658
+ "step": 95500
1659
+ },
1660
+ {
1661
+ "epoch": 57.0,
1662
+ "eval_exact_match": 29.5732,
1663
+ "eval_loss": 0.16705289483070374,
1664
+ "eval_runtime": 1688.4241,
1665
+ "eval_samples_per_second": 0.583,
1666
+ "eval_steps_per_second": 0.146,
1667
+ "step": 95646
1668
+ },
1669
+ {
1670
+ "epoch": 57.21,
1671
+ "learning_rate": 2.4349116011124362e-06,
1672
+ "loss": 0.0,
1673
+ "step": 96000
1674
+ },
1675
+ {
1676
+ "epoch": 57.51,
1677
+ "learning_rate": 2.187096742153357e-06,
1678
+ "loss": 0.0,
1679
+ "step": 96500
1680
+ },
1681
+ {
1682
+ "epoch": 57.81,
1683
+ "learning_rate": 1.9397775129121975e-06,
1684
+ "loss": 0.0,
1685
+ "step": 97000
1686
+ },
1687
+ {
1688
+ "epoch": 58.0,
1689
+ "eval_exact_match": 29.3699,
1690
+ "eval_loss": 0.16738204658031464,
1691
+ "eval_runtime": 1770.453,
1692
+ "eval_samples_per_second": 0.556,
1693
+ "eval_steps_per_second": 0.14,
1694
+ "step": 97324
1695
+ },
1696
+ {
1697
+ "epoch": 58.1,
1698
+ "learning_rate": 1.6919626539531185e-06,
1699
+ "loss": 0.0,
1700
+ "step": 97500
1701
+ },
1702
+ {
1703
+ "epoch": 58.4,
1704
+ "learning_rate": 1.4446434247119588e-06,
1705
+ "loss": 0.0,
1706
+ "step": 98000
1707
+ },
1708
+ {
1709
+ "epoch": 58.7,
1710
+ "learning_rate": 1.1968285657528796e-06,
1711
+ "loss": 0.0,
1712
+ "step": 98500
1713
+ },
1714
+ {
1715
+ "epoch": 59.0,
1716
+ "learning_rate": 9.490137067938009e-07,
1717
+ "loss": 0.0,
1718
+ "step": 99000
1719
+ },
1720
+ {
1721
+ "epoch": 59.0,
1722
+ "eval_exact_match": 29.5732,
1723
+ "eval_loss": 0.16720303893089294,
1724
+ "eval_runtime": 1631.7334,
1725
+ "eval_samples_per_second": 0.604,
1726
+ "eval_steps_per_second": 0.151,
1727
+ "step": 99002
1728
+ },
1729
+ {
1730
+ "epoch": 59.3,
1731
+ "learning_rate": 7.011988478347218e-07,
1732
+ "loss": 0.0,
1733
+ "step": 99500
1734
+ },
1735
+ {
1736
+ "epoch": 59.59,
1737
+ "learning_rate": 4.5338398887564833e-07,
1738
+ "loss": 0.0,
1739
+ "step": 100000
1740
+ },
1741
+ {
1742
+ "epoch": 59.89,
1743
+ "learning_rate": 2.0556912991656932e-07,
1744
+ "loss": 0.0,
1745
+ "step": 100500
1746
+ },
1747
+ {
1748
+ "epoch": 60.0,
1749
+ "eval_exact_match": 29.5732,
1750
+ "eval_loss": 0.16741037368774414,
1751
+ "eval_runtime": 1682.2541,
1752
+ "eval_samples_per_second": 0.586,
1753
+ "eval_steps_per_second": 0.147,
1754
+ "step": 100680
1755
+ }
1756
+ ],
1757
+ "max_steps": 100680,
1758
+ "num_train_epochs": 60,
1759
+ "total_flos": 4.904790433726464e+17,
1760
+ "trial_name": null,
1761
+ "trial_params": null
1762
+ }
qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-100680/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:007e3036f07603b9a502394d794913f87be2edaed83eed9aa3cc900a0bd8473f
3
+ size 3768
qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-100680/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
qa_sp_codet5p-220m_s2_latex_bs_lr_47/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/data/tir/projects/tir7/user_data/priyansk/qa_s2_codet5p-220m_latex/checkpoint-3300/",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "bos_token_id": 1,
7
+ "d_ff": 3072,
8
+ "d_kv": 64,
9
+ "d_model": 768,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "relu",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 2,
14
+ "feed_forward_proj": "relu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": false,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "max_length": 1024,
20
+ "model_type": "t5",
21
+ "n_positions": 512,
22
+ "num_decoder_layers": 12,
23
+ "num_heads": 12,
24
+ "num_layers": 12,
25
+ "output_past": true,
26
+ "pad_token_id": 0,
27
+ "relative_attention_max_distance": 128,
28
+ "relative_attention_num_buckets": 32,
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.17.0",
31
+ "use_cache": true,
32
+ "vocab_size": 32100
33
+ }
qa_sp_codet5p-220m_s2_latex_bs_lr_47/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
qa_sp_codet5p-220m_s2_latex_bs_lr_47/metric.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ Val {'eval_loss': 0.11525732278823853, 'eval_exact_match': 30.8943, 'eval_runtime': 1239.2856, 'eval_samples_per_second': 0.795, 'eval_steps_per_second': 0.199, 'epoch': 60.0}
2
+ Test 0
qa_sp_codet5p-220m_s2_latex_bs_lr_47/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:026afe1a84a57c4a30090ca8ada15a90b6d7e311cc4a2298533aa81813ff7696
3
+ size 891647438
qa_sp_codet5p-220m_s2_latex_bs_lr_47/runs/Apr11_16-52-53_babel-3-7/1712868779.2058115/events.out.tfevents.1712868779.babel-3-7 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6df3faceeec4fb3153aaed02df20d287e621ced9840d7c0a1198aa7a0fd9e6d
3
+ size 4990
qa_sp_codet5p-220m_s2_latex_bs_lr_47/runs/Apr11_16-52-53_babel-3-7/events.out.tfevents.1712868779.babel-3-7 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44e4cf271423be84d53ff91b4eb20be5bb2d1b8c7fc65722a3ee70bfd4e45579
3
+ size 56109
qa_sp_codet5p-220m_s2_latex_bs_lr_47/runs/Apr11_16-52-53_babel-3-7/events.out.tfevents.1713018580.babel-3-7 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d92258917bc38b7295c233138947bbce4f52c5633c0db273db8fe7438b0f21ea
3
+ size 372
qa_sp_codet5p-220m_s2_latex_bs_lr_47/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, "additional_special_tokens": [{"content": "<extra_id_99>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_98>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_97>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_96>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_95>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_94>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_93>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_92>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_91>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_90>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_89>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_88>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_87>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_86>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_85>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_84>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_83>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_82>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_81>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_80>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_79>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_78>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_77>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_76>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_75>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_74>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_73>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_72>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_71>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_70>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_69>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_68>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_67>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_66>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_65>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_64>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_63>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_62>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_61>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_60>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_59>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_58>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_57>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_56>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_55>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_54>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_53>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_52>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_51>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_50>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_49>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_48>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_47>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_46>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_45>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_44>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_43>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_42>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_41>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_40>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_39>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_38>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_37>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_36>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_35>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_34>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_33>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_32>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_31>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_30>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_29>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_28>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_27>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_26>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_25>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_24>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_23>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_22>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_21>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_20>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_19>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_18>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_17>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_16>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_15>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_14>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_13>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_12>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_11>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_10>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_9>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_8>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_7>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_6>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_5>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_4>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_3>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_2>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_1>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_0>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}]}
qa_sp_codet5p-220m_s2_latex_bs_lr_47/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
qa_sp_codet5p-220m_s2_latex_bs_lr_47/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"errors": "replace", "bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "add_prefix_space": false, "trim_offsets": true, "model_max_length": 512, "special_tokens_map_file": "/data/datasets/hf_cache/transformers/f432e4eb4a7dfc04b533beea5590e11f4b46c86f5630e8a032704ef76d7269c1.b9905d0575bde443a20834122b6e2d48e853b2e36444ce98ddeb43c38097eb3f", "name_or_path": "/data/tir/projects/tir7/user_data/priyansk/qa_s2_codet5p-220m_latex/checkpoint-3300/", "tokenizer_class": "RobertaTokenizer"}
qa_sp_codet5p-220m_s2_latex_bs_lr_47/trainer_state.json ADDED
@@ -0,0 +1,1771 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 30.8943,
3
+ "best_model_checkpoint": "/data/tir/projects/tir7/user_data/priyansk/qa_sp_codet5p-220m_s2_latex_bs_lr_47/checkpoint-10068",
4
+ "epoch": 60.0,
5
+ "global_step": 100680,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.3,
12
+ "learning_rate": 4.9752185141040927e-05,
13
+ "loss": 0.0427,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.6,
18
+ "learning_rate": 4.9504370282081844e-05,
19
+ "loss": 0.0447,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 0.89,
24
+ "learning_rate": 4.925655542312277e-05,
25
+ "loss": 0.0341,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 1.0,
30
+ "eval_exact_match": 28.8618,
31
+ "eval_loss": 0.09518733620643616,
32
+ "eval_runtime": 1894.6128,
33
+ "eval_samples_per_second": 0.52,
34
+ "eval_steps_per_second": 0.13,
35
+ "step": 1678
36
+ },
37
+ {
38
+ "epoch": 1.19,
39
+ "learning_rate": 4.900874056416369e-05,
40
+ "loss": 0.0411,
41
+ "step": 2000
42
+ },
43
+ {
44
+ "epoch": 1.49,
45
+ "learning_rate": 4.876092570520461e-05,
46
+ "loss": 0.0328,
47
+ "step": 2500
48
+ },
49
+ {
50
+ "epoch": 1.79,
51
+ "learning_rate": 4.8513110846245534e-05,
52
+ "loss": 0.0331,
53
+ "step": 3000
54
+ },
55
+ {
56
+ "epoch": 2.0,
57
+ "eval_exact_match": 30.1829,
58
+ "eval_loss": 0.10444394499063492,
59
+ "eval_runtime": 1252.1912,
60
+ "eval_samples_per_second": 0.787,
61
+ "eval_steps_per_second": 0.197,
62
+ "step": 3356
63
+ },
64
+ {
65
+ "epoch": 2.09,
66
+ "learning_rate": 4.826529598728646e-05,
67
+ "loss": 0.03,
68
+ "step": 3500
69
+ },
70
+ {
71
+ "epoch": 2.38,
72
+ "learning_rate": 4.8017481128327376e-05,
73
+ "loss": 0.0294,
74
+ "step": 4000
75
+ },
76
+ {
77
+ "epoch": 2.68,
78
+ "learning_rate": 4.77696662693683e-05,
79
+ "loss": 0.0283,
80
+ "step": 4500
81
+ },
82
+ {
83
+ "epoch": 2.98,
84
+ "learning_rate": 4.752185141040922e-05,
85
+ "loss": 0.027,
86
+ "step": 5000
87
+ },
88
+ {
89
+ "epoch": 3.0,
90
+ "eval_exact_match": 30.7927,
91
+ "eval_loss": 0.10687928646802902,
92
+ "eval_runtime": 1090.2564,
93
+ "eval_samples_per_second": 0.903,
94
+ "eval_steps_per_second": 0.227,
95
+ "step": 5034
96
+ },
97
+ {
98
+ "epoch": 3.28,
99
+ "learning_rate": 4.727403655145014e-05,
100
+ "loss": 0.0279,
101
+ "step": 5500
102
+ },
103
+ {
104
+ "epoch": 3.58,
105
+ "learning_rate": 4.7026221692491066e-05,
106
+ "loss": 0.02,
107
+ "step": 6000
108
+ },
109
+ {
110
+ "epoch": 3.87,
111
+ "learning_rate": 4.677840683353199e-05,
112
+ "loss": 0.0196,
113
+ "step": 6500
114
+ },
115
+ {
116
+ "epoch": 4.0,
117
+ "eval_exact_match": 30.3862,
118
+ "eval_loss": 0.10539323091506958,
119
+ "eval_runtime": 1426.0185,
120
+ "eval_samples_per_second": 0.691,
121
+ "eval_steps_per_second": 0.173,
122
+ "step": 6712
123
+ },
124
+ {
125
+ "epoch": 4.17,
126
+ "learning_rate": 4.653059197457291e-05,
127
+ "loss": 0.0207,
128
+ "step": 7000
129
+ },
130
+ {
131
+ "epoch": 4.47,
132
+ "learning_rate": 4.628277711561383e-05,
133
+ "loss": 0.0216,
134
+ "step": 7500
135
+ },
136
+ {
137
+ "epoch": 4.77,
138
+ "learning_rate": 4.6035457886372666e-05,
139
+ "loss": 0.0173,
140
+ "step": 8000
141
+ },
142
+ {
143
+ "epoch": 5.0,
144
+ "eval_exact_match": 30.4878,
145
+ "eval_loss": 0.11340699344873428,
146
+ "eval_runtime": 1351.563,
147
+ "eval_samples_per_second": 0.729,
148
+ "eval_steps_per_second": 0.183,
149
+ "step": 8390
150
+ },
151
+ {
152
+ "epoch": 5.07,
153
+ "learning_rate": 4.578764302741359e-05,
154
+ "loss": 0.016,
155
+ "step": 8500
156
+ },
157
+ {
158
+ "epoch": 5.36,
159
+ "learning_rate": 4.553982816845451e-05,
160
+ "loss": 0.0162,
161
+ "step": 9000
162
+ },
163
+ {
164
+ "epoch": 5.66,
165
+ "learning_rate": 4.529201330949543e-05,
166
+ "loss": 0.0174,
167
+ "step": 9500
168
+ },
169
+ {
170
+ "epoch": 5.96,
171
+ "learning_rate": 4.504419845053635e-05,
172
+ "loss": 0.0141,
173
+ "step": 10000
174
+ },
175
+ {
176
+ "epoch": 6.0,
177
+ "eval_exact_match": 30.8943,
178
+ "eval_loss": 0.11525732278823853,
179
+ "eval_runtime": 1238.1999,
180
+ "eval_samples_per_second": 0.796,
181
+ "eval_steps_per_second": 0.199,
182
+ "step": 10068
183
+ },
184
+ {
185
+ "epoch": 6.26,
186
+ "learning_rate": 4.479638359157728e-05,
187
+ "loss": 0.0143,
188
+ "step": 10500
189
+ },
190
+ {
191
+ "epoch": 6.56,
192
+ "learning_rate": 4.45485687326182e-05,
193
+ "loss": 0.0133,
194
+ "step": 11000
195
+ },
196
+ {
197
+ "epoch": 6.85,
198
+ "learning_rate": 4.430174513309496e-05,
199
+ "loss": 0.0137,
200
+ "step": 11500
201
+ },
202
+ {
203
+ "epoch": 7.0,
204
+ "eval_exact_match": 29.7764,
205
+ "eval_loss": 0.12133623659610748,
206
+ "eval_runtime": 1798.1818,
207
+ "eval_samples_per_second": 0.548,
208
+ "eval_steps_per_second": 0.137,
209
+ "step": 11746
210
+ },
211
+ {
212
+ "epoch": 7.15,
213
+ "learning_rate": 4.405393027413588e-05,
214
+ "loss": 0.013,
215
+ "step": 12000
216
+ },
217
+ {
218
+ "epoch": 7.45,
219
+ "learning_rate": 4.38061154151768e-05,
220
+ "loss": 0.0108,
221
+ "step": 12500
222
+ },
223
+ {
224
+ "epoch": 7.75,
225
+ "learning_rate": 4.355830055621772e-05,
226
+ "loss": 0.0098,
227
+ "step": 13000
228
+ },
229
+ {
230
+ "epoch": 8.0,
231
+ "eval_exact_match": 29.065,
232
+ "eval_loss": 0.12475814670324326,
233
+ "eval_runtime": 1418.154,
234
+ "eval_samples_per_second": 0.695,
235
+ "eval_steps_per_second": 0.174,
236
+ "step": 13424
237
+ },
238
+ {
239
+ "epoch": 8.05,
240
+ "learning_rate": 4.331048569725865e-05,
241
+ "loss": 0.0108,
242
+ "step": 13500
243
+ },
244
+ {
245
+ "epoch": 8.34,
246
+ "learning_rate": 4.3062670838299565e-05,
247
+ "loss": 0.0097,
248
+ "step": 14000
249
+ },
250
+ {
251
+ "epoch": 8.64,
252
+ "learning_rate": 4.281485597934049e-05,
253
+ "loss": 0.0106,
254
+ "step": 14500
255
+ },
256
+ {
257
+ "epoch": 8.94,
258
+ "learning_rate": 4.2567041120381406e-05,
259
+ "loss": 0.0079,
260
+ "step": 15000
261
+ },
262
+ {
263
+ "epoch": 9.0,
264
+ "eval_exact_match": 30.6911,
265
+ "eval_loss": 0.12646353244781494,
266
+ "eval_runtime": 1557.2757,
267
+ "eval_samples_per_second": 0.633,
268
+ "eval_steps_per_second": 0.159,
269
+ "step": 15102
270
+ },
271
+ {
272
+ "epoch": 9.24,
273
+ "learning_rate": 4.231922626142233e-05,
274
+ "loss": 0.0081,
275
+ "step": 15500
276
+ },
277
+ {
278
+ "epoch": 9.54,
279
+ "learning_rate": 4.207141140246325e-05,
280
+ "loss": 0.0087,
281
+ "step": 16000
282
+ },
283
+ {
284
+ "epoch": 9.83,
285
+ "learning_rate": 4.182359654350417e-05,
286
+ "loss": 0.0077,
287
+ "step": 16500
288
+ },
289
+ {
290
+ "epoch": 10.0,
291
+ "eval_exact_match": 28.8618,
292
+ "eval_loss": 0.12802913784980774,
293
+ "eval_runtime": 1843.86,
294
+ "eval_samples_per_second": 0.534,
295
+ "eval_steps_per_second": 0.134,
296
+ "step": 16780
297
+ },
298
+ {
299
+ "epoch": 10.13,
300
+ "learning_rate": 4.1576277314263014e-05,
301
+ "loss": 0.008,
302
+ "step": 17000
303
+ },
304
+ {
305
+ "epoch": 10.43,
306
+ "learning_rate": 4.132846245530394e-05,
307
+ "loss": 0.0061,
308
+ "step": 17500
309
+ },
310
+ {
311
+ "epoch": 10.73,
312
+ "learning_rate": 4.1080647596344856e-05,
313
+ "loss": 0.0069,
314
+ "step": 18000
315
+ },
316
+ {
317
+ "epoch": 11.0,
318
+ "eval_exact_match": 28.7602,
319
+ "eval_loss": 0.13084228336811066,
320
+ "eval_runtime": 1640.7111,
321
+ "eval_samples_per_second": 0.6,
322
+ "eval_steps_per_second": 0.151,
323
+ "step": 18458
324
+ },
325
+ {
326
+ "epoch": 11.03,
327
+ "learning_rate": 4.083283273738578e-05,
328
+ "loss": 0.007,
329
+ "step": 18500
330
+ },
331
+ {
332
+ "epoch": 11.32,
333
+ "learning_rate": 4.05850178784267e-05,
334
+ "loss": 0.0064,
335
+ "step": 19000
336
+ },
337
+ {
338
+ "epoch": 11.62,
339
+ "learning_rate": 4.033720301946762e-05,
340
+ "loss": 0.0052,
341
+ "step": 19500
342
+ },
343
+ {
344
+ "epoch": 11.92,
345
+ "learning_rate": 4.008938816050854e-05,
346
+ "loss": 0.006,
347
+ "step": 20000
348
+ },
349
+ {
350
+ "epoch": 12.0,
351
+ "eval_exact_match": 27.7439,
352
+ "eval_loss": 0.1305484175682068,
353
+ "eval_runtime": 1207.7361,
354
+ "eval_samples_per_second": 0.816,
355
+ "eval_steps_per_second": 0.205,
356
+ "step": 20136
357
+ },
358
+ {
359
+ "epoch": 12.22,
360
+ "learning_rate": 3.984157330154946e-05,
361
+ "loss": 0.004,
362
+ "step": 20500
363
+ },
364
+ {
365
+ "epoch": 12.51,
366
+ "learning_rate": 3.959375844259039e-05,
367
+ "loss": 0.0048,
368
+ "step": 21000
369
+ },
370
+ {
371
+ "epoch": 12.81,
372
+ "learning_rate": 3.934594358363131e-05,
373
+ "loss": 0.0046,
374
+ "step": 21500
375
+ },
376
+ {
377
+ "epoch": 13.0,
378
+ "eval_exact_match": 29.4715,
379
+ "eval_loss": 0.13410894572734833,
380
+ "eval_runtime": 1779.2713,
381
+ "eval_samples_per_second": 0.554,
382
+ "eval_steps_per_second": 0.139,
383
+ "step": 21814
384
+ },
385
+ {
386
+ "epoch": 13.11,
387
+ "learning_rate": 3.909812872467223e-05,
388
+ "loss": 0.005,
389
+ "step": 22000
390
+ },
391
+ {
392
+ "epoch": 13.41,
393
+ "learning_rate": 3.885031386571315e-05,
394
+ "loss": 0.0039,
395
+ "step": 22500
396
+ },
397
+ {
398
+ "epoch": 13.71,
399
+ "learning_rate": 3.860249900675407e-05,
400
+ "loss": 0.0052,
401
+ "step": 23000
402
+ },
403
+ {
404
+ "epoch": 14.0,
405
+ "eval_exact_match": 29.5732,
406
+ "eval_loss": 0.13682885468006134,
407
+ "eval_runtime": 1195.0104,
408
+ "eval_samples_per_second": 0.824,
409
+ "eval_steps_per_second": 0.207,
410
+ "step": 23492
411
+ },
412
+ {
413
+ "epoch": 14.0,
414
+ "learning_rate": 3.835517977751291e-05,
415
+ "loss": 0.0039,
416
+ "step": 23500
417
+ },
418
+ {
419
+ "epoch": 14.3,
420
+ "learning_rate": 3.8107364918553836e-05,
421
+ "loss": 0.0039,
422
+ "step": 24000
423
+ },
424
+ {
425
+ "epoch": 14.6,
426
+ "learning_rate": 3.786004568931268e-05,
427
+ "loss": 0.0032,
428
+ "step": 24500
429
+ },
430
+ {
431
+ "epoch": 14.9,
432
+ "learning_rate": 3.7612230830353595e-05,
433
+ "loss": 0.0034,
434
+ "step": 25000
435
+ },
436
+ {
437
+ "epoch": 15.0,
438
+ "eval_exact_match": 28.1504,
439
+ "eval_loss": 0.13723917305469513,
440
+ "eval_runtime": 1941.6161,
441
+ "eval_samples_per_second": 0.507,
442
+ "eval_steps_per_second": 0.127,
443
+ "step": 25170
444
+ },
445
+ {
446
+ "epoch": 15.2,
447
+ "learning_rate": 3.736441597139452e-05,
448
+ "loss": 0.0034,
449
+ "step": 25500
450
+ },
451
+ {
452
+ "epoch": 15.49,
453
+ "learning_rate": 3.7116601112435444e-05,
454
+ "loss": 0.0032,
455
+ "step": 26000
456
+ },
457
+ {
458
+ "epoch": 15.79,
459
+ "learning_rate": 3.686878625347636e-05,
460
+ "loss": 0.003,
461
+ "step": 26500
462
+ },
463
+ {
464
+ "epoch": 16.0,
465
+ "eval_exact_match": 28.8618,
466
+ "eval_loss": 0.13947974145412445,
467
+ "eval_runtime": 1579.4418,
468
+ "eval_samples_per_second": 0.624,
469
+ "eval_steps_per_second": 0.156,
470
+ "step": 26848
471
+ },
472
+ {
473
+ "epoch": 16.09,
474
+ "learning_rate": 3.6621467024235196e-05,
475
+ "loss": 0.003,
476
+ "step": 27000
477
+ },
478
+ {
479
+ "epoch": 16.39,
480
+ "learning_rate": 3.637365216527613e-05,
481
+ "loss": 0.0024,
482
+ "step": 27500
483
+ },
484
+ {
485
+ "epoch": 16.69,
486
+ "learning_rate": 3.6125837306317045e-05,
487
+ "loss": 0.0028,
488
+ "step": 28000
489
+ },
490
+ {
491
+ "epoch": 16.98,
492
+ "learning_rate": 3.587802244735797e-05,
493
+ "loss": 0.0025,
494
+ "step": 28500
495
+ },
496
+ {
497
+ "epoch": 17.0,
498
+ "eval_exact_match": 28.7602,
499
+ "eval_loss": 0.1384698450565338,
500
+ "eval_runtime": 1809.0785,
501
+ "eval_samples_per_second": 0.544,
502
+ "eval_steps_per_second": 0.137,
503
+ "step": 28526
504
+ },
505
+ {
506
+ "epoch": 17.28,
507
+ "learning_rate": 3.5630207588398886e-05,
508
+ "loss": 0.0025,
509
+ "step": 29000
510
+ },
511
+ {
512
+ "epoch": 17.58,
513
+ "learning_rate": 3.538239272943982e-05,
514
+ "loss": 0.0022,
515
+ "step": 29500
516
+ },
517
+ {
518
+ "epoch": 17.88,
519
+ "learning_rate": 3.5134577870480735e-05,
520
+ "loss": 0.0024,
521
+ "step": 30000
522
+ },
523
+ {
524
+ "epoch": 18.0,
525
+ "eval_exact_match": 29.5732,
526
+ "eval_loss": 0.14079181849956512,
527
+ "eval_runtime": 1472.2748,
528
+ "eval_samples_per_second": 0.669,
529
+ "eval_steps_per_second": 0.168,
530
+ "step": 30204
531
+ },
532
+ {
533
+ "epoch": 18.18,
534
+ "learning_rate": 3.488676301152166e-05,
535
+ "loss": 0.0021,
536
+ "step": 30500
537
+ },
538
+ {
539
+ "epoch": 18.47,
540
+ "learning_rate": 3.4638948152562576e-05,
541
+ "loss": 0.0024,
542
+ "step": 31000
543
+ },
544
+ {
545
+ "epoch": 18.77,
546
+ "learning_rate": 3.43911332936035e-05,
547
+ "loss": 0.0019,
548
+ "step": 31500
549
+ },
550
+ {
551
+ "epoch": 19.0,
552
+ "eval_exact_match": 28.7602,
553
+ "eval_loss": 0.14403322339057922,
554
+ "eval_runtime": 1414.577,
555
+ "eval_samples_per_second": 0.696,
556
+ "eval_steps_per_second": 0.175,
557
+ "step": 31882
558
+ },
559
+ {
560
+ "epoch": 19.07,
561
+ "learning_rate": 3.414331843464442e-05,
562
+ "loss": 0.002,
563
+ "step": 32000
564
+ },
565
+ {
566
+ "epoch": 19.37,
567
+ "learning_rate": 3.389550357568534e-05,
568
+ "loss": 0.002,
569
+ "step": 32500
570
+ },
571
+ {
572
+ "epoch": 19.67,
573
+ "learning_rate": 3.3648184346444184e-05,
574
+ "loss": 0.0021,
575
+ "step": 33000
576
+ },
577
+ {
578
+ "epoch": 19.96,
579
+ "learning_rate": 3.34003694874851e-05,
580
+ "loss": 0.0017,
581
+ "step": 33500
582
+ },
583
+ {
584
+ "epoch": 20.0,
585
+ "eval_exact_match": 28.6585,
586
+ "eval_loss": 0.14581115543842316,
587
+ "eval_runtime": 1719.5939,
588
+ "eval_samples_per_second": 0.573,
589
+ "eval_steps_per_second": 0.144,
590
+ "step": 33560
591
+ },
592
+ {
593
+ "epoch": 20.26,
594
+ "learning_rate": 3.315255462852603e-05,
595
+ "loss": 0.0016,
596
+ "step": 34000
597
+ },
598
+ {
599
+ "epoch": 20.56,
600
+ "learning_rate": 3.290523539928487e-05,
601
+ "loss": 0.0017,
602
+ "step": 34500
603
+ },
604
+ {
605
+ "epoch": 20.86,
606
+ "learning_rate": 3.2657420540325785e-05,
607
+ "loss": 0.0017,
608
+ "step": 35000
609
+ },
610
+ {
611
+ "epoch": 21.0,
612
+ "eval_exact_match": 29.6748,
613
+ "eval_loss": 0.1457909643650055,
614
+ "eval_runtime": 1424.642,
615
+ "eval_samples_per_second": 0.691,
616
+ "eval_steps_per_second": 0.173,
617
+ "step": 35238
618
+ },
619
+ {
620
+ "epoch": 21.16,
621
+ "learning_rate": 3.240960568136671e-05,
622
+ "loss": 0.0016,
623
+ "step": 35500
624
+ },
625
+ {
626
+ "epoch": 21.45,
627
+ "learning_rate": 3.2161790822407626e-05,
628
+ "loss": 0.0015,
629
+ "step": 36000
630
+ },
631
+ {
632
+ "epoch": 21.75,
633
+ "learning_rate": 3.191397596344855e-05,
634
+ "loss": 0.0014,
635
+ "step": 36500
636
+ },
637
+ {
638
+ "epoch": 22.0,
639
+ "eval_exact_match": 28.7602,
640
+ "eval_loss": 0.14809423685073853,
641
+ "eval_runtime": 1802.4045,
642
+ "eval_samples_per_second": 0.546,
643
+ "eval_steps_per_second": 0.137,
644
+ "step": 36916
645
+ },
646
+ {
647
+ "epoch": 22.05,
648
+ "learning_rate": 3.1666161104489475e-05,
649
+ "loss": 0.0014,
650
+ "step": 37000
651
+ },
652
+ {
653
+ "epoch": 22.35,
654
+ "learning_rate": 3.14183462455304e-05,
655
+ "loss": 0.0014,
656
+ "step": 37500
657
+ },
658
+ {
659
+ "epoch": 22.65,
660
+ "learning_rate": 3.1170531386571316e-05,
661
+ "loss": 0.0015,
662
+ "step": 38000
663
+ },
664
+ {
665
+ "epoch": 22.94,
666
+ "learning_rate": 3.092271652761224e-05,
667
+ "loss": 0.0013,
668
+ "step": 38500
669
+ },
670
+ {
671
+ "epoch": 23.0,
672
+ "eval_exact_match": 29.878,
673
+ "eval_loss": 0.14681237936019897,
674
+ "eval_runtime": 1291.1919,
675
+ "eval_samples_per_second": 0.763,
676
+ "eval_steps_per_second": 0.191,
677
+ "step": 38594
678
+ },
679
+ {
680
+ "epoch": 23.24,
681
+ "learning_rate": 3.0675397298371075e-05,
682
+ "loss": 0.0013,
683
+ "step": 39000
684
+ },
685
+ {
686
+ "epoch": 23.54,
687
+ "learning_rate": 3.0427582439411996e-05,
688
+ "loss": 0.0014,
689
+ "step": 39500
690
+ },
691
+ {
692
+ "epoch": 23.84,
693
+ "learning_rate": 3.0179767580452917e-05,
694
+ "loss": 0.0012,
695
+ "step": 40000
696
+ },
697
+ {
698
+ "epoch": 24.0,
699
+ "eval_exact_match": 27.1341,
700
+ "eval_loss": 0.14823853969573975,
701
+ "eval_runtime": 1532.4551,
702
+ "eval_samples_per_second": 0.643,
703
+ "eval_steps_per_second": 0.161,
704
+ "step": 40272
705
+ },
706
+ {
707
+ "epoch": 24.14,
708
+ "learning_rate": 2.9931952721493838e-05,
709
+ "loss": 0.001,
710
+ "step": 40500
711
+ },
712
+ {
713
+ "epoch": 24.43,
714
+ "learning_rate": 2.9685129121970594e-05,
715
+ "loss": 0.0011,
716
+ "step": 41000
717
+ },
718
+ {
719
+ "epoch": 24.73,
720
+ "learning_rate": 2.9437314263011518e-05,
721
+ "loss": 0.001,
722
+ "step": 41500
723
+ },
724
+ {
725
+ "epoch": 25.0,
726
+ "eval_exact_match": 29.1667,
727
+ "eval_loss": 0.1513073891401291,
728
+ "eval_runtime": 1636.7658,
729
+ "eval_samples_per_second": 0.602,
730
+ "eval_steps_per_second": 0.151,
731
+ "step": 41950
732
+ },
733
+ {
734
+ "epoch": 25.03,
735
+ "learning_rate": 2.918949940405244e-05,
736
+ "loss": 0.0011,
737
+ "step": 42000
738
+ },
739
+ {
740
+ "epoch": 25.33,
741
+ "learning_rate": 2.8941684545093363e-05,
742
+ "loss": 0.001,
743
+ "step": 42500
744
+ },
745
+ {
746
+ "epoch": 25.63,
747
+ "learning_rate": 2.869386968613429e-05,
748
+ "loss": 0.001,
749
+ "step": 43000
750
+ },
751
+ {
752
+ "epoch": 25.92,
753
+ "learning_rate": 2.8446550456893122e-05,
754
+ "loss": 0.0011,
755
+ "step": 43500
756
+ },
757
+ {
758
+ "epoch": 26.0,
759
+ "eval_exact_match": 29.3699,
760
+ "eval_loss": 0.15289542078971863,
761
+ "eval_runtime": 1668.6258,
762
+ "eval_samples_per_second": 0.59,
763
+ "eval_steps_per_second": 0.148,
764
+ "step": 43628
765
+ },
766
+ {
767
+ "epoch": 26.22,
768
+ "learning_rate": 2.819873559793405e-05,
769
+ "loss": 0.0013,
770
+ "step": 44000
771
+ },
772
+ {
773
+ "epoch": 26.52,
774
+ "learning_rate": 2.7950920738974974e-05,
775
+ "loss": 0.0009,
776
+ "step": 44500
777
+ },
778
+ {
779
+ "epoch": 26.82,
780
+ "learning_rate": 2.7703105880015888e-05,
781
+ "loss": 0.0008,
782
+ "step": 45000
783
+ },
784
+ {
785
+ "epoch": 27.0,
786
+ "eval_exact_match": 28.7602,
787
+ "eval_loss": 0.1510438770055771,
788
+ "eval_runtime": 1512.0615,
789
+ "eval_samples_per_second": 0.651,
790
+ "eval_steps_per_second": 0.163,
791
+ "step": 45306
792
+ },
793
+ {
794
+ "epoch": 27.12,
795
+ "learning_rate": 2.7455291021056812e-05,
796
+ "loss": 0.0008,
797
+ "step": 45500
798
+ },
799
+ {
800
+ "epoch": 27.41,
801
+ "learning_rate": 2.7207476162097733e-05,
802
+ "loss": 0.0009,
803
+ "step": 46000
804
+ },
805
+ {
806
+ "epoch": 27.71,
807
+ "learning_rate": 2.6959661303138657e-05,
808
+ "loss": 0.0009,
809
+ "step": 46500
810
+ },
811
+ {
812
+ "epoch": 28.0,
813
+ "eval_exact_match": 29.5732,
814
+ "eval_loss": 0.15132220089435577,
815
+ "eval_runtime": 1581.6848,
816
+ "eval_samples_per_second": 0.623,
817
+ "eval_steps_per_second": 0.156,
818
+ "step": 46984
819
+ },
820
+ {
821
+ "epoch": 28.01,
822
+ "learning_rate": 2.671184644417957e-05,
823
+ "loss": 0.0008,
824
+ "step": 47000
825
+ },
826
+ {
827
+ "epoch": 28.31,
828
+ "learning_rate": 2.64640315852205e-05,
829
+ "loss": 0.0007,
830
+ "step": 47500
831
+ },
832
+ {
833
+ "epoch": 28.61,
834
+ "learning_rate": 2.621671235597934e-05,
835
+ "loss": 0.0009,
836
+ "step": 48000
837
+ },
838
+ {
839
+ "epoch": 28.9,
840
+ "learning_rate": 2.596889749702026e-05,
841
+ "loss": 0.0008,
842
+ "step": 48500
843
+ },
844
+ {
845
+ "epoch": 29.0,
846
+ "eval_exact_match": 29.6748,
847
+ "eval_loss": 0.15183404088020325,
848
+ "eval_runtime": 1477.4592,
849
+ "eval_samples_per_second": 0.667,
850
+ "eval_steps_per_second": 0.167,
851
+ "step": 48662
852
+ },
853
+ {
854
+ "epoch": 29.2,
855
+ "learning_rate": 2.5721082638061182e-05,
856
+ "loss": 0.0008,
857
+ "step": 49000
858
+ },
859
+ {
860
+ "epoch": 29.5,
861
+ "learning_rate": 2.5473763408820024e-05,
862
+ "loss": 0.0008,
863
+ "step": 49500
864
+ },
865
+ {
866
+ "epoch": 29.8,
867
+ "learning_rate": 2.5225948549860945e-05,
868
+ "loss": 0.0006,
869
+ "step": 50000
870
+ },
871
+ {
872
+ "epoch": 30.0,
873
+ "eval_exact_match": 28.252,
874
+ "eval_loss": 0.15343768894672394,
875
+ "eval_runtime": 1700.6429,
876
+ "eval_samples_per_second": 0.579,
877
+ "eval_steps_per_second": 0.145,
878
+ "step": 50340
879
+ },
880
+ {
881
+ "epoch": 30.1,
882
+ "learning_rate": 2.4979124950337704e-05,
883
+ "loss": 0.0008,
884
+ "step": 50500
885
+ },
886
+ {
887
+ "epoch": 30.39,
888
+ "learning_rate": 2.4731310091378625e-05,
889
+ "loss": 0.0007,
890
+ "step": 51000
891
+ },
892
+ {
893
+ "epoch": 30.69,
894
+ "learning_rate": 2.4483495232419546e-05,
895
+ "loss": 0.0005,
896
+ "step": 51500
897
+ },
898
+ {
899
+ "epoch": 30.99,
900
+ "learning_rate": 2.4235680373460466e-05,
901
+ "loss": 0.0008,
902
+ "step": 52000
903
+ },
904
+ {
905
+ "epoch": 31.0,
906
+ "eval_exact_match": 30.1829,
907
+ "eval_loss": 0.15716703236103058,
908
+ "eval_runtime": 1647.0959,
909
+ "eval_samples_per_second": 0.598,
910
+ "eval_steps_per_second": 0.15,
911
+ "step": 52018
912
+ },
913
+ {
914
+ "epoch": 31.29,
915
+ "learning_rate": 2.3987865514501387e-05,
916
+ "loss": 0.0006,
917
+ "step": 52500
918
+ },
919
+ {
920
+ "epoch": 31.59,
921
+ "learning_rate": 2.3740050655542308e-05,
922
+ "loss": 0.0006,
923
+ "step": 53000
924
+ },
925
+ {
926
+ "epoch": 31.88,
927
+ "learning_rate": 2.3492235796583236e-05,
928
+ "loss": 0.0006,
929
+ "step": 53500
930
+ },
931
+ {
932
+ "epoch": 32.0,
933
+ "eval_exact_match": 28.6585,
934
+ "eval_loss": 0.15596693754196167,
935
+ "eval_runtime": 1847.1524,
936
+ "eval_samples_per_second": 0.533,
937
+ "eval_steps_per_second": 0.134,
938
+ "step": 53696
939
+ },
940
+ {
941
+ "epoch": 32.18,
942
+ "learning_rate": 2.3244420937624156e-05,
943
+ "loss": 0.0005,
944
+ "step": 54000
945
+ },
946
+ {
947
+ "epoch": 32.48,
948
+ "learning_rate": 2.2996606078665077e-05,
949
+ "loss": 0.0006,
950
+ "step": 54500
951
+ },
952
+ {
953
+ "epoch": 32.78,
954
+ "learning_rate": 2.2748791219705998e-05,
955
+ "loss": 0.0005,
956
+ "step": 55000
957
+ },
958
+ {
959
+ "epoch": 33.0,
960
+ "eval_exact_match": 28.4553,
961
+ "eval_loss": 0.15410810708999634,
962
+ "eval_runtime": 1607.5799,
963
+ "eval_samples_per_second": 0.613,
964
+ "eval_steps_per_second": 0.154,
965
+ "step": 55374
966
+ },
967
+ {
968
+ "epoch": 33.08,
969
+ "learning_rate": 2.250097636074692e-05,
970
+ "loss": 0.0006,
971
+ "step": 55500
972
+ },
973
+ {
974
+ "epoch": 33.37,
975
+ "learning_rate": 2.225316150178784e-05,
976
+ "loss": 0.0005,
977
+ "step": 56000
978
+ },
979
+ {
980
+ "epoch": 33.67,
981
+ "learning_rate": 2.200534664282876e-05,
982
+ "loss": 0.0005,
983
+ "step": 56500
984
+ },
985
+ {
986
+ "epoch": 33.97,
987
+ "learning_rate": 2.175753178386968e-05,
988
+ "loss": 0.0005,
989
+ "step": 57000
990
+ },
991
+ {
992
+ "epoch": 34.0,
993
+ "eval_exact_match": 28.4553,
994
+ "eval_loss": 0.15642932057380676,
995
+ "eval_runtime": 1995.8486,
996
+ "eval_samples_per_second": 0.494,
997
+ "eval_steps_per_second": 0.124,
998
+ "step": 57052
999
+ },
1000
+ {
1001
+ "epoch": 34.27,
1002
+ "learning_rate": 2.150971692491061e-05,
1003
+ "loss": 0.0005,
1004
+ "step": 57500
1005
+ },
1006
+ {
1007
+ "epoch": 34.56,
1008
+ "learning_rate": 2.126190206595153e-05,
1009
+ "loss": 0.0004,
1010
+ "step": 58000
1011
+ },
1012
+ {
1013
+ "epoch": 34.86,
1014
+ "learning_rate": 2.101408720699245e-05,
1015
+ "loss": 0.0004,
1016
+ "step": 58500
1017
+ },
1018
+ {
1019
+ "epoch": 35.0,
1020
+ "eval_exact_match": 28.7602,
1021
+ "eval_loss": 0.15773606300354004,
1022
+ "eval_runtime": 1522.3573,
1023
+ "eval_samples_per_second": 0.647,
1024
+ "eval_steps_per_second": 0.162,
1025
+ "step": 58730
1026
+ },
1027
+ {
1028
+ "epoch": 35.16,
1029
+ "learning_rate": 2.0766767977751292e-05,
1030
+ "loss": 0.0005,
1031
+ "step": 59000
1032
+ },
1033
+ {
1034
+ "epoch": 35.46,
1035
+ "learning_rate": 2.051944874851013e-05,
1036
+ "loss": 0.0004,
1037
+ "step": 59500
1038
+ },
1039
+ {
1040
+ "epoch": 35.76,
1041
+ "learning_rate": 2.027163388955105e-05,
1042
+ "loss": 0.0004,
1043
+ "step": 60000
1044
+ },
1045
+ {
1046
+ "epoch": 36.0,
1047
+ "eval_exact_match": 28.252,
1048
+ "eval_loss": 0.15739889442920685,
1049
+ "eval_runtime": 1470.8721,
1050
+ "eval_samples_per_second": 0.67,
1051
+ "eval_steps_per_second": 0.168,
1052
+ "step": 60408
1053
+ },
1054
+ {
1055
+ "epoch": 36.05,
1056
+ "learning_rate": 2.0024314660309893e-05,
1057
+ "loss": 0.0005,
1058
+ "step": 60500
1059
+ },
1060
+ {
1061
+ "epoch": 36.35,
1062
+ "learning_rate": 1.9776499801350814e-05,
1063
+ "loss": 0.0004,
1064
+ "step": 61000
1065
+ },
1066
+ {
1067
+ "epoch": 36.65,
1068
+ "learning_rate": 1.9528684942391735e-05,
1069
+ "loss": 0.0004,
1070
+ "step": 61500
1071
+ },
1072
+ {
1073
+ "epoch": 36.95,
1074
+ "learning_rate": 1.9280870083432655e-05,
1075
+ "loss": 0.0004,
1076
+ "step": 62000
1077
+ },
1078
+ {
1079
+ "epoch": 37.0,
1080
+ "eval_exact_match": 26.7276,
1081
+ "eval_loss": 0.15798313915729523,
1082
+ "eval_runtime": 1545.6144,
1083
+ "eval_samples_per_second": 0.637,
1084
+ "eval_steps_per_second": 0.16,
1085
+ "step": 62086
1086
+ },
1087
+ {
1088
+ "epoch": 37.25,
1089
+ "learning_rate": 1.9033055224473576e-05,
1090
+ "loss": 0.0003,
1091
+ "step": 62500
1092
+ },
1093
+ {
1094
+ "epoch": 37.54,
1095
+ "learning_rate": 1.8785735995232418e-05,
1096
+ "loss": 0.0004,
1097
+ "step": 63000
1098
+ },
1099
+ {
1100
+ "epoch": 37.84,
1101
+ "learning_rate": 1.853792113627334e-05,
1102
+ "loss": 0.0004,
1103
+ "step": 63500
1104
+ },
1105
+ {
1106
+ "epoch": 38.0,
1107
+ "eval_exact_match": 29.3699,
1108
+ "eval_loss": 0.159205362200737,
1109
+ "eval_runtime": 2036.6907,
1110
+ "eval_samples_per_second": 0.484,
1111
+ "eval_steps_per_second": 0.121,
1112
+ "step": 63764
1113
+ },
1114
+ {
1115
+ "epoch": 38.14,
1116
+ "learning_rate": 1.8290106277314266e-05,
1117
+ "loss": 0.0003,
1118
+ "step": 64000
1119
+ },
1120
+ {
1121
+ "epoch": 38.44,
1122
+ "learning_rate": 1.8043778307508936e-05,
1123
+ "loss": 0.0004,
1124
+ "step": 64500
1125
+ },
1126
+ {
1127
+ "epoch": 38.74,
1128
+ "learning_rate": 1.779596344854986e-05,
1129
+ "loss": 0.0003,
1130
+ "step": 65000
1131
+ },
1132
+ {
1133
+ "epoch": 39.0,
1134
+ "eval_exact_match": 29.065,
1135
+ "eval_loss": 0.15887553989887238,
1136
+ "eval_runtime": 1474.6108,
1137
+ "eval_samples_per_second": 0.668,
1138
+ "eval_steps_per_second": 0.168,
1139
+ "step": 65442
1140
+ },
1141
+ {
1142
+ "epoch": 39.03,
1143
+ "learning_rate": 1.754814858959078e-05,
1144
+ "loss": 0.0004,
1145
+ "step": 65500
1146
+ },
1147
+ {
1148
+ "epoch": 39.33,
1149
+ "learning_rate": 1.7300333730631702e-05,
1150
+ "loss": 0.0002,
1151
+ "step": 66000
1152
+ },
1153
+ {
1154
+ "epoch": 39.63,
1155
+ "learning_rate": 1.7052518871672626e-05,
1156
+ "loss": 0.0003,
1157
+ "step": 66500
1158
+ },
1159
+ {
1160
+ "epoch": 39.93,
1161
+ "learning_rate": 1.6804704012713547e-05,
1162
+ "loss": 0.0003,
1163
+ "step": 67000
1164
+ },
1165
+ {
1166
+ "epoch": 40.0,
1167
+ "eval_exact_match": 28.4553,
1168
+ "eval_loss": 0.16181451082229614,
1169
+ "eval_runtime": 1902.6668,
1170
+ "eval_samples_per_second": 0.518,
1171
+ "eval_steps_per_second": 0.13,
1172
+ "step": 67120
1173
+ },
1174
+ {
1175
+ "epoch": 40.23,
1176
+ "learning_rate": 1.6556889153754468e-05,
1177
+ "loss": 0.0003,
1178
+ "step": 67500
1179
+ },
1180
+ {
1181
+ "epoch": 40.52,
1182
+ "learning_rate": 1.6309074294795392e-05,
1183
+ "loss": 0.0002,
1184
+ "step": 68000
1185
+ },
1186
+ {
1187
+ "epoch": 40.82,
1188
+ "learning_rate": 1.6061259435836313e-05,
1189
+ "loss": 0.0002,
1190
+ "step": 68500
1191
+ },
1192
+ {
1193
+ "epoch": 41.0,
1194
+ "eval_exact_match": 29.4715,
1195
+ "eval_loss": 0.1619289368391037,
1196
+ "eval_runtime": 1699.0156,
1197
+ "eval_samples_per_second": 0.58,
1198
+ "eval_steps_per_second": 0.145,
1199
+ "step": 68798
1200
+ },
1201
+ {
1202
+ "epoch": 41.12,
1203
+ "learning_rate": 1.5813444576877234e-05,
1204
+ "loss": 0.0003,
1205
+ "step": 69000
1206
+ },
1207
+ {
1208
+ "epoch": 41.42,
1209
+ "learning_rate": 1.5565629717918155e-05,
1210
+ "loss": 0.0002,
1211
+ "step": 69500
1212
+ },
1213
+ {
1214
+ "epoch": 41.72,
1215
+ "learning_rate": 1.5317814858959076e-05,
1216
+ "loss": 0.0002,
1217
+ "step": 70000
1218
+ },
1219
+ {
1220
+ "epoch": 42.0,
1221
+ "eval_exact_match": 27.6423,
1222
+ "eval_loss": 0.15936070680618286,
1223
+ "eval_runtime": 1667.2618,
1224
+ "eval_samples_per_second": 0.591,
1225
+ "eval_steps_per_second": 0.148,
1226
+ "step": 70476
1227
+ },
1228
+ {
1229
+ "epoch": 42.01,
1230
+ "learning_rate": 1.5070000000000003e-05,
1231
+ "loss": 0.0003,
1232
+ "step": 70500
1233
+ },
1234
+ {
1235
+ "epoch": 42.31,
1236
+ "learning_rate": 1.4822185141040922e-05,
1237
+ "loss": 0.0002,
1238
+ "step": 71000
1239
+ },
1240
+ {
1241
+ "epoch": 42.61,
1242
+ "learning_rate": 1.4574370282081845e-05,
1243
+ "loss": 0.0002,
1244
+ "step": 71500
1245
+ },
1246
+ {
1247
+ "epoch": 42.91,
1248
+ "learning_rate": 1.4326555423122764e-05,
1249
+ "loss": 0.0002,
1250
+ "step": 72000
1251
+ },
1252
+ {
1253
+ "epoch": 43.0,
1254
+ "eval_exact_match": 27.8455,
1255
+ "eval_loss": 0.16002264618873596,
1256
+ "eval_runtime": 2084.2416,
1257
+ "eval_samples_per_second": 0.473,
1258
+ "eval_steps_per_second": 0.119,
1259
+ "step": 72154
1260
+ },
1261
+ {
1262
+ "epoch": 43.21,
1263
+ "learning_rate": 1.4078740564163686e-05,
1264
+ "loss": 0.0002,
1265
+ "step": 72500
1266
+ },
1267
+ {
1268
+ "epoch": 43.5,
1269
+ "learning_rate": 1.3830925705204605e-05,
1270
+ "loss": 0.0002,
1271
+ "step": 73000
1272
+ },
1273
+ {
1274
+ "epoch": 43.8,
1275
+ "learning_rate": 1.3583110846245531e-05,
1276
+ "loss": 0.0002,
1277
+ "step": 73500
1278
+ },
1279
+ {
1280
+ "epoch": 44.0,
1281
+ "eval_exact_match": 29.5732,
1282
+ "eval_loss": 0.1613691747188568,
1283
+ "eval_runtime": 1688.4017,
1284
+ "eval_samples_per_second": 0.583,
1285
+ "eval_steps_per_second": 0.146,
1286
+ "step": 73832
1287
+ },
1288
+ {
1289
+ "epoch": 44.1,
1290
+ "learning_rate": 1.3335295987286456e-05,
1291
+ "loss": 0.0002,
1292
+ "step": 74000
1293
+ },
1294
+ {
1295
+ "epoch": 44.4,
1296
+ "learning_rate": 1.3087481128327375e-05,
1297
+ "loss": 0.0001,
1298
+ "step": 74500
1299
+ },
1300
+ {
1301
+ "epoch": 44.7,
1302
+ "learning_rate": 1.2839666269368297e-05,
1303
+ "loss": 0.0002,
1304
+ "step": 75000
1305
+ },
1306
+ {
1307
+ "epoch": 44.99,
1308
+ "learning_rate": 1.2591851410409216e-05,
1309
+ "loss": 0.0002,
1310
+ "step": 75500
1311
+ },
1312
+ {
1313
+ "epoch": 45.0,
1314
+ "eval_exact_match": 30.4878,
1315
+ "eval_loss": 0.16062164306640625,
1316
+ "eval_runtime": 1456.9794,
1317
+ "eval_samples_per_second": 0.676,
1318
+ "eval_steps_per_second": 0.17,
1319
+ "step": 75510
1320
+ },
1321
+ {
1322
+ "epoch": 45.29,
1323
+ "learning_rate": 1.2344036551450139e-05,
1324
+ "loss": 0.0001,
1325
+ "step": 76000
1326
+ },
1327
+ {
1328
+ "epoch": 45.59,
1329
+ "learning_rate": 1.209671732220898e-05,
1330
+ "loss": 0.0002,
1331
+ "step": 76500
1332
+ },
1333
+ {
1334
+ "epoch": 45.89,
1335
+ "learning_rate": 1.1848902463249901e-05,
1336
+ "loss": 0.0002,
1337
+ "step": 77000
1338
+ },
1339
+ {
1340
+ "epoch": 46.0,
1341
+ "eval_exact_match": 28.4553,
1342
+ "eval_loss": 0.16334177553653717,
1343
+ "eval_runtime": 1424.3349,
1344
+ "eval_samples_per_second": 0.692,
1345
+ "eval_steps_per_second": 0.173,
1346
+ "step": 77188
1347
+ },
1348
+ {
1349
+ "epoch": 46.19,
1350
+ "learning_rate": 1.1601087604290822e-05,
1351
+ "loss": 0.0001,
1352
+ "step": 77500
1353
+ },
1354
+ {
1355
+ "epoch": 46.48,
1356
+ "learning_rate": 1.1353272745331743e-05,
1357
+ "loss": 0.0001,
1358
+ "step": 78000
1359
+ },
1360
+ {
1361
+ "epoch": 46.78,
1362
+ "learning_rate": 1.1105457886372669e-05,
1363
+ "loss": 0.0001,
1364
+ "step": 78500
1365
+ },
1366
+ {
1367
+ "epoch": 47.0,
1368
+ "eval_exact_match": 29.7764,
1369
+ "eval_loss": 0.16305780410766602,
1370
+ "eval_runtime": 1532.407,
1371
+ "eval_samples_per_second": 0.643,
1372
+ "eval_steps_per_second": 0.161,
1373
+ "step": 78866
1374
+ },
1375
+ {
1376
+ "epoch": 47.08,
1377
+ "learning_rate": 1.0858138657131504e-05,
1378
+ "loss": 0.0001,
1379
+ "step": 79000
1380
+ },
1381
+ {
1382
+ "epoch": 47.38,
1383
+ "learning_rate": 1.0610323798172431e-05,
1384
+ "loss": 0.0001,
1385
+ "step": 79500
1386
+ },
1387
+ {
1388
+ "epoch": 47.68,
1389
+ "learning_rate": 1.0362508939213352e-05,
1390
+ "loss": 0.0001,
1391
+ "step": 80000
1392
+ },
1393
+ {
1394
+ "epoch": 47.97,
1395
+ "learning_rate": 1.0114694080254273e-05,
1396
+ "loss": 0.0001,
1397
+ "step": 80500
1398
+ },
1399
+ {
1400
+ "epoch": 48.0,
1401
+ "eval_exact_match": 28.7602,
1402
+ "eval_loss": 0.1653144359588623,
1403
+ "eval_runtime": 1420.3224,
1404
+ "eval_samples_per_second": 0.694,
1405
+ "eval_steps_per_second": 0.174,
1406
+ "step": 80544
1407
+ },
1408
+ {
1409
+ "epoch": 48.27,
1410
+ "learning_rate": 9.866879221295194e-06,
1411
+ "loss": 0.0001,
1412
+ "step": 81000
1413
+ },
1414
+ {
1415
+ "epoch": 48.57,
1416
+ "learning_rate": 9.619064362336115e-06,
1417
+ "loss": 0.0001,
1418
+ "step": 81500
1419
+ },
1420
+ {
1421
+ "epoch": 48.87,
1422
+ "learning_rate": 9.371249503377035e-06,
1423
+ "loss": 0.0001,
1424
+ "step": 82000
1425
+ },
1426
+ {
1427
+ "epoch": 49.0,
1428
+ "eval_exact_match": 28.5569,
1429
+ "eval_loss": 0.16365119814872742,
1430
+ "eval_runtime": 1693.8317,
1431
+ "eval_samples_per_second": 0.582,
1432
+ "eval_steps_per_second": 0.146,
1433
+ "step": 82222
1434
+ },
1435
+ {
1436
+ "epoch": 49.17,
1437
+ "learning_rate": 9.124921533571714e-06,
1438
+ "loss": 0.0001,
1439
+ "step": 82500
1440
+ },
1441
+ {
1442
+ "epoch": 49.46,
1443
+ "learning_rate": 8.877106674612636e-06,
1444
+ "loss": 0.0001,
1445
+ "step": 83000
1446
+ },
1447
+ {
1448
+ "epoch": 49.76,
1449
+ "learning_rate": 8.629291815653557e-06,
1450
+ "loss": 0.0001,
1451
+ "step": 83500
1452
+ },
1453
+ {
1454
+ "epoch": 50.0,
1455
+ "eval_exact_match": 28.7602,
1456
+ "eval_loss": 0.1648363322019577,
1457
+ "eval_runtime": 1491.6451,
1458
+ "eval_samples_per_second": 0.66,
1459
+ "eval_steps_per_second": 0.166,
1460
+ "step": 83900
1461
+ },
1462
+ {
1463
+ "epoch": 50.06,
1464
+ "learning_rate": 8.381476956694478e-06,
1465
+ "loss": 0.0001,
1466
+ "step": 84000
1467
+ },
1468
+ {
1469
+ "epoch": 50.36,
1470
+ "learning_rate": 8.133662097735399e-06,
1471
+ "loss": 0.0001,
1472
+ "step": 84500
1473
+ },
1474
+ {
1475
+ "epoch": 50.66,
1476
+ "learning_rate": 7.88584723877632e-06,
1477
+ "loss": 0.0001,
1478
+ "step": 85000
1479
+ },
1480
+ {
1481
+ "epoch": 50.95,
1482
+ "learning_rate": 7.638032379817246e-06,
1483
+ "loss": 0.0001,
1484
+ "step": 85500
1485
+ },
1486
+ {
1487
+ "epoch": 51.0,
1488
+ "eval_exact_match": 29.5732,
1489
+ "eval_loss": 0.16499294340610504,
1490
+ "eval_runtime": 1816.3605,
1491
+ "eval_samples_per_second": 0.542,
1492
+ "eval_steps_per_second": 0.136,
1493
+ "step": 85578
1494
+ },
1495
+ {
1496
+ "epoch": 51.25,
1497
+ "learning_rate": 7.390217520858167e-06,
1498
+ "loss": 0.0001,
1499
+ "step": 86000
1500
+ },
1501
+ {
1502
+ "epoch": 51.55,
1503
+ "learning_rate": 7.142402661899088e-06,
1504
+ "loss": 0.0001,
1505
+ "step": 86500
1506
+ },
1507
+ {
1508
+ "epoch": 51.85,
1509
+ "learning_rate": 6.894587802940009e-06,
1510
+ "loss": 0.0001,
1511
+ "step": 87000
1512
+ },
1513
+ {
1514
+ "epoch": 52.0,
1515
+ "eval_exact_match": 30.0813,
1516
+ "eval_loss": 0.1663326919078827,
1517
+ "eval_runtime": 1639.0771,
1518
+ "eval_samples_per_second": 0.601,
1519
+ "eval_steps_per_second": 0.151,
1520
+ "step": 87256
1521
+ },
1522
+ {
1523
+ "epoch": 52.15,
1524
+ "learning_rate": 6.64677294398093e-06,
1525
+ "loss": 0.0001,
1526
+ "step": 87500
1527
+ },
1528
+ {
1529
+ "epoch": 52.44,
1530
+ "learning_rate": 6.3989580850218504e-06,
1531
+ "loss": 0.0001,
1532
+ "step": 88000
1533
+ },
1534
+ {
1535
+ "epoch": 52.74,
1536
+ "learning_rate": 6.151143226062771e-06,
1537
+ "loss": 0.0001,
1538
+ "step": 88500
1539
+ },
1540
+ {
1541
+ "epoch": 53.0,
1542
+ "eval_exact_match": 29.2683,
1543
+ "eval_loss": 0.1655188500881195,
1544
+ "eval_runtime": 1906.3649,
1545
+ "eval_samples_per_second": 0.517,
1546
+ "eval_steps_per_second": 0.13,
1547
+ "step": 88934
1548
+ },
1549
+ {
1550
+ "epoch": 53.04,
1551
+ "learning_rate": 5.903328367103692e-06,
1552
+ "loss": 0.0001,
1553
+ "step": 89000
1554
+ },
1555
+ {
1556
+ "epoch": 53.34,
1557
+ "learning_rate": 5.655513508144619e-06,
1558
+ "loss": 0.0001,
1559
+ "step": 89500
1560
+ },
1561
+ {
1562
+ "epoch": 53.64,
1563
+ "learning_rate": 5.40769864918554e-06,
1564
+ "loss": 0.0001,
1565
+ "step": 90000
1566
+ },
1567
+ {
1568
+ "epoch": 53.93,
1569
+ "learning_rate": 5.159883790226461e-06,
1570
+ "loss": 0.0,
1571
+ "step": 90500
1572
+ },
1573
+ {
1574
+ "epoch": 54.0,
1575
+ "eval_exact_match": 29.4715,
1576
+ "eval_loss": 0.16723661124706268,
1577
+ "eval_runtime": 1751.4211,
1578
+ "eval_samples_per_second": 0.562,
1579
+ "eval_steps_per_second": 0.141,
1580
+ "step": 90612
1581
+ },
1582
+ {
1583
+ "epoch": 54.23,
1584
+ "learning_rate": 4.912564560985301e-06,
1585
+ "loss": 0.0,
1586
+ "step": 91000
1587
+ },
1588
+ {
1589
+ "epoch": 54.53,
1590
+ "learning_rate": 4.664749702026222e-06,
1591
+ "loss": 0.0,
1592
+ "step": 91500
1593
+ },
1594
+ {
1595
+ "epoch": 54.83,
1596
+ "learning_rate": 4.416934843067143e-06,
1597
+ "loss": 0.0,
1598
+ "step": 92000
1599
+ },
1600
+ {
1601
+ "epoch": 55.0,
1602
+ "eval_exact_match": 30.1829,
1603
+ "eval_loss": 0.1663067787885666,
1604
+ "eval_runtime": 1758.7727,
1605
+ "eval_samples_per_second": 0.56,
1606
+ "eval_steps_per_second": 0.14,
1607
+ "step": 92290
1608
+ },
1609
+ {
1610
+ "epoch": 55.13,
1611
+ "learning_rate": 4.169615613825984e-06,
1612
+ "loss": 0.0,
1613
+ "step": 92500
1614
+ },
1615
+ {
1616
+ "epoch": 55.42,
1617
+ "learning_rate": 3.921800754866905e-06,
1618
+ "loss": 0.0,
1619
+ "step": 93000
1620
+ },
1621
+ {
1622
+ "epoch": 55.72,
1623
+ "learning_rate": 3.6739858959078254e-06,
1624
+ "loss": 0.0,
1625
+ "step": 93500
1626
+ },
1627
+ {
1628
+ "epoch": 56.0,
1629
+ "eval_exact_match": 30.0813,
1630
+ "eval_loss": 0.16646040976047516,
1631
+ "eval_runtime": 1568.4573,
1632
+ "eval_samples_per_second": 0.628,
1633
+ "eval_steps_per_second": 0.157,
1634
+ "step": 93968
1635
+ },
1636
+ {
1637
+ "epoch": 56.02,
1638
+ "learning_rate": 3.4261710369487462e-06,
1639
+ "loss": 0.0,
1640
+ "step": 94000
1641
+ },
1642
+ {
1643
+ "epoch": 56.32,
1644
+ "learning_rate": 3.178356177989667e-06,
1645
+ "loss": 0.0,
1646
+ "step": 94500
1647
+ },
1648
+ {
1649
+ "epoch": 56.62,
1650
+ "learning_rate": 2.930541319030594e-06,
1651
+ "loss": 0.0,
1652
+ "step": 95000
1653
+ },
1654
+ {
1655
+ "epoch": 56.91,
1656
+ "learning_rate": 2.682726460071515e-06,
1657
+ "loss": 0.0,
1658
+ "step": 95500
1659
+ },
1660
+ {
1661
+ "epoch": 57.0,
1662
+ "eval_exact_match": 29.5732,
1663
+ "eval_loss": 0.16705289483070374,
1664
+ "eval_runtime": 1688.4241,
1665
+ "eval_samples_per_second": 0.583,
1666
+ "eval_steps_per_second": 0.146,
1667
+ "step": 95646
1668
+ },
1669
+ {
1670
+ "epoch": 57.21,
1671
+ "learning_rate": 2.4349116011124362e-06,
1672
+ "loss": 0.0,
1673
+ "step": 96000
1674
+ },
1675
+ {
1676
+ "epoch": 57.51,
1677
+ "learning_rate": 2.187096742153357e-06,
1678
+ "loss": 0.0,
1679
+ "step": 96500
1680
+ },
1681
+ {
1682
+ "epoch": 57.81,
1683
+ "learning_rate": 1.9397775129121975e-06,
1684
+ "loss": 0.0,
1685
+ "step": 97000
1686
+ },
1687
+ {
1688
+ "epoch": 58.0,
1689
+ "eval_exact_match": 29.3699,
1690
+ "eval_loss": 0.16738204658031464,
1691
+ "eval_runtime": 1770.453,
1692
+ "eval_samples_per_second": 0.556,
1693
+ "eval_steps_per_second": 0.14,
1694
+ "step": 97324
1695
+ },
1696
+ {
1697
+ "epoch": 58.1,
1698
+ "learning_rate": 1.6919626539531185e-06,
1699
+ "loss": 0.0,
1700
+ "step": 97500
1701
+ },
1702
+ {
1703
+ "epoch": 58.4,
1704
+ "learning_rate": 1.4446434247119588e-06,
1705
+ "loss": 0.0,
1706
+ "step": 98000
1707
+ },
1708
+ {
1709
+ "epoch": 58.7,
1710
+ "learning_rate": 1.1968285657528796e-06,
1711
+ "loss": 0.0,
1712
+ "step": 98500
1713
+ },
1714
+ {
1715
+ "epoch": 59.0,
1716
+ "learning_rate": 9.490137067938009e-07,
1717
+ "loss": 0.0,
1718
+ "step": 99000
1719
+ },
1720
+ {
1721
+ "epoch": 59.0,
1722
+ "eval_exact_match": 29.5732,
1723
+ "eval_loss": 0.16720303893089294,
1724
+ "eval_runtime": 1631.7334,
1725
+ "eval_samples_per_second": 0.604,
1726
+ "eval_steps_per_second": 0.151,
1727
+ "step": 99002
1728
+ },
1729
+ {
1730
+ "epoch": 59.3,
1731
+ "learning_rate": 7.011988478347218e-07,
1732
+ "loss": 0.0,
1733
+ "step": 99500
1734
+ },
1735
+ {
1736
+ "epoch": 59.59,
1737
+ "learning_rate": 4.5338398887564833e-07,
1738
+ "loss": 0.0,
1739
+ "step": 100000
1740
+ },
1741
+ {
1742
+ "epoch": 59.89,
1743
+ "learning_rate": 2.0556912991656932e-07,
1744
+ "loss": 0.0,
1745
+ "step": 100500
1746
+ },
1747
+ {
1748
+ "epoch": 60.0,
1749
+ "eval_exact_match": 29.5732,
1750
+ "eval_loss": 0.16741037368774414,
1751
+ "eval_runtime": 1682.2541,
1752
+ "eval_samples_per_second": 0.586,
1753
+ "eval_steps_per_second": 0.147,
1754
+ "step": 100680
1755
+ },
1756
+ {
1757
+ "epoch": 60.0,
1758
+ "step": 100680,
1759
+ "total_flos": 4.904790433726464e+17,
1760
+ "train_loss": 0.004280355875287834,
1761
+ "train_runtime": 148560.3618,
1762
+ "train_samples_per_second": 2.712,
1763
+ "train_steps_per_second": 0.678
1764
+ }
1765
+ ],
1766
+ "max_steps": 100680,
1767
+ "num_train_epochs": 60,
1768
+ "total_flos": 4.904790433726464e+17,
1769
+ "trial_name": null,
1770
+ "trial_params": null
1771
+ }
qa_sp_codet5p-220m_s2_latex_bs_lr_47/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:007e3036f07603b9a502394d794913f87be2edaed83eed9aa3cc900a0bd8473f
3
+ size 3768
qa_sp_codet5p-220m_s2_latex_bs_lr_47/vocab.json ADDED
The diff for this file is too large to render. See raw diff