DuongTrongChi commited on
Commit
dcc6328
·
verified ·
1 Parent(s): 61be092

Training in progress, step 161, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6b65aace9da077ee07a204876e8098d96dad47b8b4be8f51e9b9cbec2ff2393
3
  size 100198584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcab26ada2c2637d79ec11d98df950fe228da88d062fd59cebde5c23cc576bd9
3
  size 100198584
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:527fada47e774dd945cf8bd8fa9cb84eca9c449c19271b31a9ecbc61d6166143
3
  size 50675156
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3777d03e8d2baa5c405d941ac93d97f6e70f91bdf36b70dc8613461422b413d
3
  size 50675156
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67c15c8690a2da73762f6bc4ec11beb5e387acb500a6b50782f5eba470e3275a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae12e87061029d0002bd195ec24682167e09466b871fd07c9121b60003cd8e79
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.19432015341064743,
5
  "eval_steps": 500,
6
- "global_step": 133,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -938,6 +938,202 @@
938
  "learning_rate": 1.8869863013698633e-05,
939
  "loss": 1.2982,
940
  "step": 133
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
941
  }
942
  ],
943
  "logging_steps": 1,
@@ -957,7 +1153,7 @@
957
  "attributes": {}
958
  }
959
  },
960
- "total_flos": 1.4946336109032653e+17,
961
  "train_batch_size": 4,
962
  "trial_name": null,
963
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.23522965939183635,
5
  "eval_steps": 500,
6
+ "global_step": 161,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
938
  "learning_rate": 1.8869863013698633e-05,
939
  "loss": 1.2982,
940
  "step": 133
941
+ },
942
+ {
943
+ "epoch": 0.1957812071956899,
944
+ "grad_norm": 0.1279006004333496,
945
+ "learning_rate": 1.8835616438356166e-05,
946
+ "loss": 1.3291,
947
+ "step": 134
948
+ },
949
+ {
950
+ "epoch": 0.19724226098073236,
951
+ "grad_norm": 0.13129518926143646,
952
+ "learning_rate": 1.8801369863013702e-05,
953
+ "loss": 1.2805,
954
+ "step": 135
955
+ },
956
+ {
957
+ "epoch": 0.19870331476577482,
958
+ "grad_norm": 0.1193300113081932,
959
+ "learning_rate": 1.8767123287671235e-05,
960
+ "loss": 1.3173,
961
+ "step": 136
962
+ },
963
+ {
964
+ "epoch": 0.2001643685508173,
965
+ "grad_norm": 0.12838037312030792,
966
+ "learning_rate": 1.8732876712328768e-05,
967
+ "loss": 1.3416,
968
+ "step": 137
969
+ },
970
+ {
971
+ "epoch": 0.20162542233585973,
972
+ "grad_norm": 0.1269877851009369,
973
+ "learning_rate": 1.86986301369863e-05,
974
+ "loss": 1.295,
975
+ "step": 138
976
+ },
977
+ {
978
+ "epoch": 0.2030864761209022,
979
+ "grad_norm": 0.13122180104255676,
980
+ "learning_rate": 1.8664383561643838e-05,
981
+ "loss": 1.293,
982
+ "step": 139
983
+ },
984
+ {
985
+ "epoch": 0.20454752990594466,
986
+ "grad_norm": 0.1385333091020584,
987
+ "learning_rate": 1.863013698630137e-05,
988
+ "loss": 1.2237,
989
+ "step": 140
990
+ },
991
+ {
992
+ "epoch": 0.20600858369098712,
993
+ "grad_norm": 0.1442401111125946,
994
+ "learning_rate": 1.8595890410958907e-05,
995
+ "loss": 1.2833,
996
+ "step": 141
997
+ },
998
+ {
999
+ "epoch": 0.2074696374760296,
1000
+ "grad_norm": 0.1236981600522995,
1001
+ "learning_rate": 1.856164383561644e-05,
1002
+ "loss": 1.3252,
1003
+ "step": 142
1004
+ },
1005
+ {
1006
+ "epoch": 0.20893069126107205,
1007
+ "grad_norm": 0.13414394855499268,
1008
+ "learning_rate": 1.8527397260273973e-05,
1009
+ "loss": 1.4099,
1010
+ "step": 143
1011
+ },
1012
+ {
1013
+ "epoch": 0.21039174504611452,
1014
+ "grad_norm": 0.1376720666885376,
1015
+ "learning_rate": 1.849315068493151e-05,
1016
+ "loss": 1.3735,
1017
+ "step": 144
1018
+ },
1019
+ {
1020
+ "epoch": 0.21185279883115699,
1021
+ "grad_norm": 0.13452522456645966,
1022
+ "learning_rate": 1.8458904109589043e-05,
1023
+ "loss": 1.2442,
1024
+ "step": 145
1025
+ },
1026
+ {
1027
+ "epoch": 0.21331385261619942,
1028
+ "grad_norm": 0.12783923745155334,
1029
+ "learning_rate": 1.8424657534246576e-05,
1030
+ "loss": 1.3028,
1031
+ "step": 146
1032
+ },
1033
+ {
1034
+ "epoch": 0.2147749064012419,
1035
+ "grad_norm": 0.1320268213748932,
1036
+ "learning_rate": 1.839041095890411e-05,
1037
+ "loss": 1.2906,
1038
+ "step": 147
1039
+ },
1040
+ {
1041
+ "epoch": 0.21623596018628435,
1042
+ "grad_norm": 0.14116773009300232,
1043
+ "learning_rate": 1.8356164383561645e-05,
1044
+ "loss": 1.3724,
1045
+ "step": 148
1046
+ },
1047
+ {
1048
+ "epoch": 0.21769701397132682,
1049
+ "grad_norm": 0.13176654279232025,
1050
+ "learning_rate": 1.8321917808219182e-05,
1051
+ "loss": 1.2719,
1052
+ "step": 149
1053
+ },
1054
+ {
1055
+ "epoch": 0.21915806775636928,
1056
+ "grad_norm": 0.13379769027233124,
1057
+ "learning_rate": 1.8287671232876715e-05,
1058
+ "loss": 1.3286,
1059
+ "step": 150
1060
+ },
1061
+ {
1062
+ "epoch": 0.22061912154141175,
1063
+ "grad_norm": 0.13172045350074768,
1064
+ "learning_rate": 1.8253424657534248e-05,
1065
+ "loss": 1.3104,
1066
+ "step": 151
1067
+ },
1068
+ {
1069
+ "epoch": 0.22208017532645422,
1070
+ "grad_norm": 0.14471982419490814,
1071
+ "learning_rate": 1.821917808219178e-05,
1072
+ "loss": 1.3111,
1073
+ "step": 152
1074
+ },
1075
+ {
1076
+ "epoch": 0.22354122911149665,
1077
+ "grad_norm": 0.14093175530433655,
1078
+ "learning_rate": 1.8184931506849317e-05,
1079
+ "loss": 1.2696,
1080
+ "step": 153
1081
+ },
1082
+ {
1083
+ "epoch": 0.22500228289653912,
1084
+ "grad_norm": 0.14340919256210327,
1085
+ "learning_rate": 1.815068493150685e-05,
1086
+ "loss": 1.3105,
1087
+ "step": 154
1088
+ },
1089
+ {
1090
+ "epoch": 0.22646333668158158,
1091
+ "grad_norm": 0.15136584639549255,
1092
+ "learning_rate": 1.8116438356164387e-05,
1093
+ "loss": 1.1514,
1094
+ "step": 155
1095
+ },
1096
+ {
1097
+ "epoch": 0.22792439046662405,
1098
+ "grad_norm": 0.16463331878185272,
1099
+ "learning_rate": 1.808219178082192e-05,
1100
+ "loss": 1.2292,
1101
+ "step": 156
1102
+ },
1103
+ {
1104
+ "epoch": 0.22938544425166651,
1105
+ "grad_norm": 0.14684943854808807,
1106
+ "learning_rate": 1.8047945205479453e-05,
1107
+ "loss": 1.1744,
1108
+ "step": 157
1109
+ },
1110
+ {
1111
+ "epoch": 0.23084649803670898,
1112
+ "grad_norm": 0.17171254754066467,
1113
+ "learning_rate": 1.801369863013699e-05,
1114
+ "loss": 1.2522,
1115
+ "step": 158
1116
+ },
1117
+ {
1118
+ "epoch": 0.23230755182175145,
1119
+ "grad_norm": 0.1535484492778778,
1120
+ "learning_rate": 1.7979452054794522e-05,
1121
+ "loss": 1.3086,
1122
+ "step": 159
1123
+ },
1124
+ {
1125
+ "epoch": 0.2337686056067939,
1126
+ "grad_norm": 0.15115784108638763,
1127
+ "learning_rate": 1.7945205479452055e-05,
1128
+ "loss": 1.3126,
1129
+ "step": 160
1130
+ },
1131
+ {
1132
+ "epoch": 0.23522965939183635,
1133
+ "grad_norm": 0.15851254761219025,
1134
+ "learning_rate": 1.791095890410959e-05,
1135
+ "loss": 1.3022,
1136
+ "step": 161
1137
  }
1138
  ],
1139
  "logging_steps": 1,
 
1153
  "attributes": {}
1154
  }
1155
  },
1156
+ "total_flos": 1.8126428980702003e+17,
1157
  "train_batch_size": 4,
1158
  "trial_name": null,
1159
  "trial_params": null