schnapp commited on
Commit
304ad1e
1 Parent(s): 0170e96

End of training

Browse files
README.md CHANGED
@@ -40,112 +40,44 @@ The following hyperparameters were used during training:
40
  - seed: 42
41
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
  - lr_scheduler_type: linear
43
- - num_epochs: 100
44
 
45
  ### Training results
46
 
47
  | Training Loss | Epoch | Step | Validation Loss |
48
  |:-------------:|:-----:|:----:|:---------------:|
49
  | No log | 1.0 | 42 | 0.0129 |
50
- | No log | 2.0 | 84 | 0.0016 |
51
  | No log | 3.0 | 126 | 0.0007 |
52
  | No log | 4.0 | 168 | 0.0005 |
53
  | No log | 5.0 | 210 | 0.0003 |
54
  | No log | 6.0 | 252 | 0.0003 |
55
  | No log | 7.0 | 294 | 0.0002 |
56
  | No log | 8.0 | 336 | 0.0002 |
57
- | No log | 9.0 | 378 | 0.0002 |
58
- | No log | 10.0 | 420 | 0.0002 |
59
  | No log | 11.0 | 462 | 0.0001 |
60
- | 0.0134 | 12.0 | 504 | 0.0001 |
61
- | 0.0134 | 13.0 | 546 | 0.0001 |
62
- | 0.0134 | 14.0 | 588 | 0.0001 |
63
- | 0.0134 | 15.0 | 630 | 0.0001 |
64
- | 0.0134 | 16.0 | 672 | 0.0001 |
65
- | 0.0134 | 17.0 | 714 | 0.0001 |
66
- | 0.0134 | 18.0 | 756 | 0.0001 |
67
- | 0.0134 | 19.0 | 798 | 0.0001 |
68
- | 0.0134 | 20.0 | 840 | 0.0001 |
69
- | 0.0134 | 21.0 | 882 | 0.0001 |
70
- | 0.0134 | 22.0 | 924 | 0.0001 |
71
- | 0.0134 | 23.0 | 966 | 0.0001 |
72
- | 0.0 | 24.0 | 1008 | 0.0001 |
73
- | 0.0 | 25.0 | 1050 | 0.0001 |
74
- | 0.0 | 26.0 | 1092 | 0.0001 |
75
- | 0.0 | 27.0 | 1134 | 0.0001 |
76
- | 0.0 | 28.0 | 1176 | 0.0001 |
77
- | 0.0 | 29.0 | 1218 | 0.0001 |
78
- | 0.0 | 30.0 | 1260 | 0.0001 |
79
- | 0.0 | 31.0 | 1302 | 0.0001 |
80
  | 0.0 | 32.0 | 1344 | 0.0000 |
81
- | 0.0 | 33.0 | 1386 | 0.0000 |
82
- | 0.0 | 34.0 | 1428 | 0.0000 |
83
- | 0.0 | 35.0 | 1470 | 0.0000 |
84
- | 0.0 | 36.0 | 1512 | 0.0000 |
85
- | 0.0 | 37.0 | 1554 | 0.0000 |
86
- | 0.0 | 38.0 | 1596 | 0.0000 |
87
- | 0.0 | 39.0 | 1638 | 0.0000 |
88
- | 0.0 | 40.0 | 1680 | 0.0000 |
89
- | 0.0 | 41.0 | 1722 | 0.0000 |
90
- | 0.0 | 42.0 | 1764 | 0.0000 |
91
- | 0.0 | 43.0 | 1806 | 0.0000 |
92
- | 0.0 | 44.0 | 1848 | 0.0000 |
93
- | 0.0 | 45.0 | 1890 | 0.0000 |
94
- | 0.0 | 46.0 | 1932 | 0.0000 |
95
- | 0.0 | 47.0 | 1974 | 0.0000 |
96
- | 0.0 | 48.0 | 2016 | 0.0000 |
97
- | 0.0 | 49.0 | 2058 | 0.0000 |
98
- | 0.0 | 50.0 | 2100 | 0.0000 |
99
- | 0.0 | 51.0 | 2142 | 0.0000 |
100
- | 0.0 | 52.0 | 2184 | 0.0000 |
101
- | 0.0 | 53.0 | 2226 | 0.0000 |
102
- | 0.0 | 54.0 | 2268 | 0.0000 |
103
- | 0.0 | 55.0 | 2310 | 0.0000 |
104
- | 0.0 | 56.0 | 2352 | 0.0000 |
105
- | 0.0 | 57.0 | 2394 | 0.0000 |
106
- | 0.0 | 58.0 | 2436 | 0.0000 |
107
- | 0.0 | 59.0 | 2478 | 0.0000 |
108
- | 0.0 | 60.0 | 2520 | 0.0000 |
109
- | 0.0 | 61.0 | 2562 | 0.0000 |
110
- | 0.0 | 62.0 | 2604 | 0.0000 |
111
- | 0.0 | 63.0 | 2646 | 0.0000 |
112
- | 0.0 | 64.0 | 2688 | 0.0000 |
113
- | 0.0 | 65.0 | 2730 | 0.0000 |
114
- | 0.0 | 66.0 | 2772 | 0.0000 |
115
- | 0.0 | 67.0 | 2814 | 0.0000 |
116
- | 0.0 | 68.0 | 2856 | 0.0000 |
117
- | 0.0 | 69.0 | 2898 | 0.0000 |
118
- | 0.0 | 70.0 | 2940 | 0.0000 |
119
- | 0.0 | 71.0 | 2982 | 0.0000 |
120
- | 0.0 | 72.0 | 3024 | 0.0000 |
121
- | 0.0 | 73.0 | 3066 | 0.0000 |
122
- | 0.0 | 74.0 | 3108 | 0.0000 |
123
- | 0.0 | 75.0 | 3150 | 0.0000 |
124
- | 0.0 | 76.0 | 3192 | 0.0000 |
125
- | 0.0 | 77.0 | 3234 | 0.0000 |
126
- | 0.0 | 78.0 | 3276 | 0.0000 |
127
- | 0.0 | 79.0 | 3318 | 0.0000 |
128
- | 0.0 | 80.0 | 3360 | 0.0000 |
129
- | 0.0 | 81.0 | 3402 | 0.0000 |
130
- | 0.0 | 82.0 | 3444 | 0.0000 |
131
- | 0.0 | 83.0 | 3486 | 0.0000 |
132
- | 0.0 | 84.0 | 3528 | 0.0000 |
133
- | 0.0 | 85.0 | 3570 | 0.0000 |
134
- | 0.0 | 86.0 | 3612 | 0.0000 |
135
- | 0.0 | 87.0 | 3654 | 0.0000 |
136
- | 0.0 | 88.0 | 3696 | 0.0000 |
137
- | 0.0 | 89.0 | 3738 | 0.0000 |
138
- | 0.0 | 90.0 | 3780 | 0.0000 |
139
- | 0.0 | 91.0 | 3822 | 0.0000 |
140
- | 0.0 | 92.0 | 3864 | 0.0000 |
141
- | 0.0 | 93.0 | 3906 | 0.0000 |
142
- | 0.0 | 94.0 | 3948 | 0.0000 |
143
- | 0.0 | 95.0 | 3990 | 0.0000 |
144
- | 0.0 | 96.0 | 4032 | 0.0000 |
145
- | 0.0 | 97.0 | 4074 | 0.0000 |
146
- | 0.0 | 98.0 | 4116 | 0.0000 |
147
- | 0.0 | 99.0 | 4158 | 0.0000 |
148
- | 0.0 | 100.0 | 4200 | 0.0000 |
149
 
150
 
151
  ### Framework versions
 
40
  - seed: 42
41
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
  - lr_scheduler_type: linear
43
+ - num_epochs: 32
44
 
45
  ### Training results
46
 
47
  | Training Loss | Epoch | Step | Validation Loss |
48
  |:-------------:|:-----:|:----:|:---------------:|
49
  | No log | 1.0 | 42 | 0.0129 |
50
+ | No log | 2.0 | 84 | 0.0014 |
51
  | No log | 3.0 | 126 | 0.0007 |
52
  | No log | 4.0 | 168 | 0.0005 |
53
  | No log | 5.0 | 210 | 0.0003 |
54
  | No log | 6.0 | 252 | 0.0003 |
55
  | No log | 7.0 | 294 | 0.0002 |
56
  | No log | 8.0 | 336 | 0.0002 |
57
+ | No log | 9.0 | 378 | 0.0001 |
58
+ | No log | 10.0 | 420 | 0.0001 |
59
  | No log | 11.0 | 462 | 0.0001 |
60
+ | 0.0161 | 12.0 | 504 | 0.0001 |
61
+ | 0.0161 | 13.0 | 546 | 0.0001 |
62
+ | 0.0161 | 14.0 | 588 | 0.0001 |
63
+ | 0.0161 | 15.0 | 630 | 0.0001 |
64
+ | 0.0161 | 16.0 | 672 | 0.0001 |
65
+ | 0.0161 | 17.0 | 714 | 0.0001 |
66
+ | 0.0161 | 18.0 | 756 | 0.0001 |
67
+ | 0.0161 | 19.0 | 798 | 0.0000 |
68
+ | 0.0161 | 20.0 | 840 | 0.0000 |
69
+ | 0.0161 | 21.0 | 882 | 0.0000 |
70
+ | 0.0161 | 22.0 | 924 | 0.0000 |
71
+ | 0.0161 | 23.0 | 966 | 0.0000 |
72
+ | 0.0 | 24.0 | 1008 | 0.0000 |
73
+ | 0.0 | 25.0 | 1050 | 0.0000 |
74
+ | 0.0 | 26.0 | 1092 | 0.0000 |
75
+ | 0.0 | 27.0 | 1134 | 0.0000 |
76
+ | 0.0 | 28.0 | 1176 | 0.0000 |
77
+ | 0.0 | 29.0 | 1218 | 0.0000 |
78
+ | 0.0 | 30.0 | 1260 | 0.0000 |
79
+ | 0.0 | 31.0 | 1302 | 0.0000 |
80
  | 0.0 | 32.0 | 1344 | 0.0000 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
 
83
  ### Framework versions
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c17ea4467390a86c0fded2ce724f7cf38982d5d706f30f240b56f0e5f1817038
3
  size 3555504
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:481b649a7c35e013243c07a42f6fc614a934057148a010733ebfa8c12c31fad9
3
  size 3555504
logs/events.out.tfevents.1717282943.llms-a100.324432.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d586bc9674c75bb7d363a93db17644a022555f62d5eb9dd3019785b9f66c786
3
- size 13738
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3270044c0306304edde66c827dec35d48ca031227fa7176f5b320c5e023f67a9
3
+ size 14363
logs/events.out.tfevents.1717285975.llms-a100.324432.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bf21050ec4e233d9876082ecd0f86989c6a7ab81943f8aa2402ed7154c25d19
3
+ size 359