superbigtree commited on
Commit
2d7d1b0
·
verified ·
1 Parent(s): 5e0bcf2

Model save

Browse files
README.md CHANGED
@@ -2,7 +2,7 @@
2
  tags:
3
  - generated_from_trainer
4
  datasets:
5
- - ydshieh/coco_dataset_script
6
  model-index:
7
  - name: clip-roberta-finetuned
8
  results: []
@@ -13,9 +13,7 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  # clip-roberta-finetuned
15
 
16
- This model was trained from scratch on the ydshieh/coco_dataset_script 2017 dataset.
17
- It achieves the following results on the evaluation set:
18
- - Loss: 1.5877
19
 
20
  ## Model description
21
 
@@ -44,7 +42,7 @@ The following hyperparameters were used during training:
44
  - total_eval_batch_size: 512
45
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
46
  - lr_scheduler_type: linear
47
- - num_epochs: 5
48
 
49
  ### Training results
50
 
@@ -54,5 +52,5 @@ The following hyperparameters were used during training:
54
 
55
  - Transformers 4.38.0.dev0
56
  - Pytorch 2.0.1+gita61a294
57
- - Datasets 2.17.0
58
  - Tokenizers 0.15.2
 
2
  tags:
3
  - generated_from_trainer
4
  datasets:
5
+ - coco_dataset_script
6
  model-index:
7
  - name: clip-roberta-finetuned
8
  results: []
 
13
 
14
  # clip-roberta-finetuned
15
 
16
+ This model was trained from scratch on the coco_dataset_script dataset.
 
 
17
 
18
  ## Model description
19
 
 
42
  - total_eval_batch_size: 512
43
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
44
  - lr_scheduler_type: linear
45
+ - num_epochs: 1
46
 
47
  ### Training results
48
 
 
52
 
53
  - Transformers 4.38.0.dev0
54
  - Pytorch 2.0.1+gita61a294
55
+ - Datasets 2.17.1
56
  - Tokenizers 0.15.2
all_results.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
- "epoch": 5.0,
3
- "eval_loss": 1.587681531906128,
4
- "eval_runtime": 22.6452,
5
- "eval_samples_per_second": 1104.605,
6
- "eval_steps_per_second": 2.164,
7
- "train_loss": 0.24804650540995352,
8
- "train_runtime": 4511.4291,
9
- "train_samples_per_second": 655.838,
10
- "train_steps_per_second": 1.281
11
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "eval_loss": 4.245348930358887,
4
+ "eval_runtime": 23.2323,
5
+ "eval_samples_per_second": 1076.693,
6
+ "eval_steps_per_second": 2.109,
7
+ "train_loss": 4.222018241882324,
8
+ "train_runtime": 2.6838,
9
+ "train_samples_per_second": 37.261,
10
+ "train_steps_per_second": 0.373
11
  }
eval_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 5.0,
3
- "eval_loss": 1.587681531906128,
4
- "eval_runtime": 22.6452,
5
- "eval_samples_per_second": 1104.605,
6
- "eval_steps_per_second": 2.164
7
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "eval_loss": 4.245348930358887,
4
+ "eval_runtime": 23.2323,
5
+ "eval_samples_per_second": 1076.693,
6
+ "eval_steps_per_second": 2.109
7
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:647327dd1e7ab36b8309aef5689b6c55d0ee312364ebb486faaae029e6b7ce77
3
  size 851603588
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:585e472774472feb8b26fc756ac99bf8c6adabec9f1333f3026e66d6752ad93b
3
  size 851603588
runs/Feb20_01-55-59_b2d203f0f1d0/events.out.tfevents.1708394230.b2d203f0f1d0.4134.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25954e7e6ea72fcdfc2f0557426e440f5160b0a8ef2fa801aef0660fbc4f49ab
3
+ size 8516
runs/Feb20_02-04-16_b2d203f0f1d0/events.out.tfevents.1708394669.b2d203f0f1d0.4412.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:551cb63bf724c3ed85e9d85e50183fb3927fc067b964db71e2be5040e035c56e
3
+ size 8516
runs/Feb20_04-04-22_b2d203f0f1d0/events.out.tfevents.1708401867.b2d203f0f1d0.5056.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c54be8f1ccbc2917726e0a85ae071329e5387733b6f70438dcd825b831007365
3
+ size 8503
runs/Feb20_04-35-08_b2d203f0f1d0/events.out.tfevents.1708403713.b2d203f0f1d0.5278.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8e28a02bbd4caf67604a600a69b236189c30b6aae7f49c310a4427b92f362eb
3
+ size 8502
runs/Feb20_04-38-45_b2d203f0f1d0/events.out.tfevents.1708403930.b2d203f0f1d0.5381.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bc8d14de0e9dda35f028c2bddb8acfe3776a57f3967b6cc2e17134aec81f5fe
3
+ size 8503
runs/Feb20_04-39-27_b2d203f0f1d0/events.out.tfevents.1708403972.b2d203f0f1d0.5474.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d07b50d5ac0977ed794507ef8bbc0369a8576084ce51e359584614f8026423d5
3
+ size 8503
runs/Feb20_04-40-22_b2d203f0f1d0/events.out.tfevents.1708404027.b2d203f0f1d0.5563.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b5b6780a44417e226f0aa2eb23c5221c4c242e6e41793aaa4db888da197ff3a
3
+ size 9558
runs/Feb20_05-23-50_35c5cc03ccf3/events.out.tfevents.1708406724.35c5cc03ccf3.1893.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90e16abe3f452600e3b53c349c985e7b2d95bd958790fd18fa8b731d64e467c3
3
+ size 8727
runs/Feb20_05-37-26_35c5cc03ccf3/events.out.tfevents.1708407454.35c5cc03ccf3.10573.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88947bc353eb7b0bc168961de051d21eec229ca8afa2e0b92425eb788ff00564
3
+ size 8864
runs/Feb20_05-37-26_35c5cc03ccf3/events.out.tfevents.1708407490.35c5cc03ccf3.10573.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52eacb8dc598230538a39fe07f94ad80ddbe4d3f5cd4f05a5ca21fa8cbd62310
3
+ size 354
runs/Feb20_05-48-07_35c5cc03ccf3/events.out.tfevents.1708408093.35c5cc03ccf3.12717.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ca6249e9d415ab8e4f265d60ec3a3e6002c3e89ddd9687044d552761c8f2775
3
+ size 8863
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 5.0,
3
- "train_loss": 0.24804650540995352,
4
- "train_runtime": 4511.4291,
5
- "train_samples_per_second": 655.838,
6
- "train_steps_per_second": 1.281
7
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "train_loss": 4.222018241882324,
4
+ "train_runtime": 2.6838,
5
+ "train_samples_per_second": 37.261,
6
+ "train_steps_per_second": 0.373
7
  }
trainer_state.json CHANGED
@@ -1,95 +1,29 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.0,
5
  "eval_steps": 500,
6
- "global_step": 5780,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.43,
13
- "learning_rate": 4.567474048442907e-05,
14
- "loss": 0.7182,
15
- "step": 500
16
- },
17
- {
18
- "epoch": 0.87,
19
- "learning_rate": 4.134948096885813e-05,
20
- "loss": 0.393,
21
- "step": 1000
22
- },
23
- {
24
- "epoch": 1.3,
25
- "learning_rate": 3.70242214532872e-05,
26
- "loss": 0.3001,
27
- "step": 1500
28
- },
29
- {
30
- "epoch": 1.73,
31
- "learning_rate": 3.269896193771627e-05,
32
- "loss": 0.2633,
33
- "step": 2000
34
- },
35
- {
36
- "epoch": 2.16,
37
- "learning_rate": 2.8373702422145332e-05,
38
- "loss": 0.2275,
39
- "step": 2500
40
- },
41
- {
42
- "epoch": 2.6,
43
- "learning_rate": 2.4048442906574396e-05,
44
- "loss": 0.1934,
45
- "step": 3000
46
- },
47
- {
48
- "epoch": 3.03,
49
- "learning_rate": 1.972318339100346e-05,
50
- "loss": 0.1841,
51
- "step": 3500
52
- },
53
- {
54
- "epoch": 3.46,
55
- "learning_rate": 1.5397923875432525e-05,
56
- "loss": 0.1484,
57
- "step": 4000
58
- },
59
- {
60
- "epoch": 3.89,
61
- "learning_rate": 1.1072664359861593e-05,
62
- "loss": 0.1445,
63
- "step": 4500
64
- },
65
- {
66
- "epoch": 4.33,
67
- "learning_rate": 6.747404844290659e-06,
68
- "loss": 0.1217,
69
- "step": 5000
70
- },
71
- {
72
- "epoch": 4.76,
73
- "learning_rate": 2.4221453287197232e-06,
74
- "loss": 0.113,
75
- "step": 5500
76
- },
77
- {
78
- "epoch": 5.0,
79
- "step": 5780,
80
- "total_flos": 3.951256954680115e+17,
81
- "train_loss": 0.24804650540995352,
82
- "train_runtime": 4511.4291,
83
- "train_samples_per_second": 655.838,
84
- "train_steps_per_second": 1.281
85
  }
86
  ],
87
  "logging_steps": 500,
88
- "max_steps": 5780,
89
  "num_input_tokens_seen": 0,
90
- "num_train_epochs": 5,
91
  "save_steps": 500,
92
- "total_flos": 3.951256954680115e+17,
93
  "train_batch_size": 64,
94
  "trial_name": null,
95
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 1,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 1.0,
13
+ "step": 1,
14
+ "total_flos": 68360846966784.0,
15
+ "train_loss": 4.222018241882324,
16
+ "train_runtime": 2.6838,
17
+ "train_samples_per_second": 37.261,
18
+ "train_steps_per_second": 0.373
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  }
20
  ],
21
  "logging_steps": 500,
22
+ "max_steps": 1,
23
  "num_input_tokens_seen": 0,
24
+ "num_train_epochs": 1,
25
  "save_steps": 500,
26
+ "total_flos": 68360846966784.0,
27
  "train_batch_size": 64,
28
  "trial_name": null,
29
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6623b37b1d356473764e5afe1b5235846f11dc42692c18f5476bc94b805ebe3
3
  size 4475
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6cc5992e14fa461b474d0c2e6d76ae8e94ba01f52f2d524fae1e8e45c488fa7
3
  size 4475