Shresthadev403 commited on
Commit
d877bcd
·
1 Parent(s): 3fbd989

End of training

Browse files
README.md CHANGED
@@ -13,12 +13,12 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  This model was trained from scratch on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
- - eval_loss: 1.1423
17
- - eval_runtime: 18.2229
18
- - eval_samples_per_second: 109.752
19
  - eval_steps_per_second: 1.756
20
  - epoch: 0.0
21
- - step: 5
22
 
23
  ## Model description
24
 
 
13
 
14
  This model was trained from scratch on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
+ - eval_loss: 1.1526
17
+ - eval_runtime: 18.2243
18
+ - eval_samples_per_second: 109.744
19
  - eval_steps_per_second: 1.756
20
  - epoch: 0.0
21
+ - step: 15
22
 
23
  ## Model description
24
 
logs/events.out.tfevents.1702892360.82d5b6822809.42.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47b731f489539729ca489d9f14794e43f6410b766b6914000b9675dc117820e8
3
- size 5071
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:665bd13cf70bd896026d07b64a9a9163b7760281767a496ec63619223b9ef07d
3
+ size 5491
logs/events.out.tfevents.1702892648.82d5b6822809.42.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37ad33be7d0485b15516b8569d1a5beb0cb8ff4e4b63a6739ebc3c0e37d8f4b2
3
+ size 5071
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:876ea86b9abcb1b2de8746622b8968d50f0dee582e9aa4b133c809b475d5c880
3
  size 497918592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:786a50d8d4ce0218d11987ec66734818f9045dfe55fad3d78997f5a0a8bc34fb
3
  size 497918592
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 0.001632386549134835,
5
  "eval_steps": 5,
6
- "global_step": 5,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -21,6 +21,34 @@
21
  "eval_samples_per_second": 109.752,
22
  "eval_steps_per_second": 1.756,
23
  "step": 5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  }
25
  ],
26
  "logging_steps": 5,
@@ -28,7 +56,7 @@
28
  "num_input_tokens_seen": 0,
29
  "num_train_epochs": 1,
30
  "save_steps": 5,
31
- "total_flos": 10451681280000.0,
32
  "train_batch_size": 32,
33
  "trial_name": null,
34
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.142299771308899,
3
+ "best_model_checkpoint": "food-recipe-generation/checkpoint-5",
4
+ "epoch": 0.004897159647404506,
5
  "eval_steps": 5,
6
+ "global_step": 15,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
21
  "eval_samples_per_second": 109.752,
22
  "eval_steps_per_second": 1.756,
23
  "step": 5
24
+ },
25
+ {
26
+ "epoch": 0.0,
27
+ "learning_rate": 4.8371335504885994e-05,
28
+ "loss": 1.2427,
29
+ "step": 10
30
+ },
31
+ {
32
+ "epoch": 0.0,
33
+ "eval_loss": 1.1404472589492798,
34
+ "eval_runtime": 17.9113,
35
+ "eval_samples_per_second": 111.662,
36
+ "eval_steps_per_second": 1.787,
37
+ "step": 10
38
+ },
39
+ {
40
+ "epoch": 0.0,
41
+ "learning_rate": 4.9185667752443e-05,
42
+ "loss": 1.1104,
43
+ "step": 15
44
+ },
45
+ {
46
+ "epoch": 0.0,
47
+ "eval_loss": 1.1525651216506958,
48
+ "eval_runtime": 18.2243,
49
+ "eval_samples_per_second": 109.744,
50
+ "eval_steps_per_second": 1.756,
51
+ "step": 15
52
  }
53
  ],
54
  "logging_steps": 5,
 
56
  "num_input_tokens_seen": 0,
57
  "num_train_epochs": 1,
58
  "save_steps": 5,
59
+ "total_flos": 31355043840000.0,
60
  "train_batch_size": 32,
61
  "trial_name": null,
62
  "trial_params": null