Shresthadev403 commited on
Commit
b3dd9a9
·
1 Parent(s): 8a49809

End of training

Browse files
README.md CHANGED
@@ -13,12 +13,12 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  This model was trained from scratch on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
- - eval_loss: 1.1248
17
- - eval_runtime: 18.0737
18
- - eval_samples_per_second: 110.658
19
- - eval_steps_per_second: 1.771
20
  - epoch: 0.01
21
- - step: 25
22
 
23
  ## Model description
24
 
 
13
 
14
  This model was trained from scratch on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
+ - eval_loss: 1.1253
17
+ - eval_runtime: 17.9015
18
+ - eval_samples_per_second: 111.722
19
+ - eval_steps_per_second: 1.788
20
  - epoch: 0.01
21
+ - step: 30
22
 
23
  ## Model description
24
 
logs/events.out.tfevents.1702892983.c87f45a5aed4.42.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ea702a24314946f92467438aff883338d086d47b57a079511642c601f29ddf5
3
- size 5071
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f60b19c41deb5a295a4c31755061d6efbaef785573dee8fe083782fc58461a86
3
+ size 5491
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eedddfce999d785930cda46ba5039fb11f40804527336adc043a90eb5babe4c1
3
  size 497918592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a9d2f50b1ec70a7f9f965d6c23fb17f132f0f94e902901696715e5e56a02d6d
3
  size 497918592
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.142299771308899,
3
- "best_model_checkpoint": "food-recipe-generation/checkpoint-5",
4
- "epoch": 0.008161932745674175,
5
  "eval_steps": 5,
6
- "global_step": 25,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -77,6 +77,20 @@
77
  "eval_samples_per_second": 110.658,
78
  "eval_steps_per_second": 1.771,
79
  "step": 25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  }
81
  ],
82
  "logging_steps": 5,
@@ -84,7 +98,7 @@
84
  "num_input_tokens_seen": 0,
85
  "num_train_epochs": 1,
86
  "save_steps": 5,
87
- "total_flos": 52258406400000.0,
88
  "train_batch_size": 32,
89
  "trial_name": null,
90
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.1247614622116089,
3
+ "best_model_checkpoint": "food-recipe-generation/checkpoint-25",
4
+ "epoch": 0.009794319294809012,
5
  "eval_steps": 5,
6
+ "global_step": 30,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
77
  "eval_samples_per_second": 110.658,
78
  "eval_steps_per_second": 1.771,
79
  "step": 25
80
+ },
81
+ {
82
+ "epoch": 0.01,
83
+ "learning_rate": 4.8371335504885994e-05,
84
+ "loss": 1.1589,
85
+ "step": 30
86
+ },
87
+ {
88
+ "epoch": 0.01,
89
+ "eval_loss": 1.1252552270889282,
90
+ "eval_runtime": 17.9015,
91
+ "eval_samples_per_second": 111.722,
92
+ "eval_steps_per_second": 1.788,
93
+ "step": 30
94
  }
95
  ],
96
  "logging_steps": 5,
 
98
  "num_input_tokens_seen": 0,
99
  "num_train_epochs": 1,
100
  "save_steps": 5,
101
+ "total_flos": 62710087680000.0,
102
  "train_batch_size": 32,
103
  "trial_name": null,
104
  "trial_params": null