hanyoonsang commited on
Commit
372518a
·
1 Parent(s): f3e5486

Training in progress, epoch 1

Browse files
config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "_commit_hash": "a959cf33c20e09215873e338299c900f57047c61",
3
- "_name_or_path": "naver-clova-ix/donut-base",
4
  "architectures": [
5
  "VisionEncoderDecoderModel"
6
  ],
@@ -9,7 +9,6 @@
9
  "activation_dropout": 0.0,
10
  "activation_function": "gelu",
11
  "add_cross_attention": true,
12
- "add_final_layer_norm": true,
13
  "architectures": null,
14
  "attention_dropout": 0.0,
15
  "bad_words_ids": null,
@@ -22,8 +21,8 @@
22
  "decoder_attention_heads": 16,
23
  "decoder_ffn_dim": 4096,
24
  "decoder_layerdrop": 0.0,
25
- "decoder_layers": 4,
26
- "decoder_start_token_id": null,
27
  "diversity_penalty": 0.0,
28
  "do_sample": false,
29
  "dropout": 0.1,
@@ -40,20 +39,22 @@
40
  "forced_eos_token_id": 2,
41
  "id2label": {
42
  "0": "LABEL_0",
43
- "1": "LABEL_1"
 
44
  },
45
  "init_std": 0.02,
46
  "is_decoder": true,
47
- "is_encoder_decoder": false,
48
  "label2id": {
49
  "LABEL_0": 0,
50
- "LABEL_1": 1
 
51
  },
52
  "length_penalty": 1.0,
53
  "max_length": 512,
54
- "max_position_embeddings": 1536,
55
  "min_length": 0,
56
- "model_type": "mbart",
57
  "no_repeat_ngram_size": 0,
58
  "num_beam_groups": 1,
59
  "num_beams": 1,
@@ -70,7 +71,7 @@
70
  "repetition_penalty": 1.0,
71
  "return_dict": true,
72
  "return_dict_in_generate": false,
73
- "scale_embedding": true,
74
  "sep_token_id": null,
75
  "suppress_tokens": null,
76
  "task_specific_params": null,
@@ -87,7 +88,7 @@
87
  "typical_p": 1.0,
88
  "use_bfloat16": false,
89
  "use_cache": true,
90
- "vocab_size": 57537
91
  },
92
  "decoder_start_token_id": 0,
93
  "encoder": {
@@ -104,15 +105,16 @@
104
  "depths": [
105
  2,
106
  2,
107
- 14,
108
  2
109
  ],
110
  "diversity_penalty": 0.0,
111
  "do_sample": false,
112
  "drop_path_rate": 0.1,
113
  "early_stopping": false,
114
- "embed_dim": 128,
115
  "encoder_no_repeat_ngram_size": 0,
 
116
  "eos_token_id": null,
117
  "exponential_decay_length_penalty": null,
118
  "finetuning_task": null,
@@ -120,7 +122,7 @@
120
  "forced_eos_token_id": null,
121
  "hidden_act": "gelu",
122
  "hidden_dropout_prob": 0.0,
123
- "hidden_size": 1024,
124
  "id2label": {
125
  "0": "LABEL_0",
126
  "1": "LABEL_1"
@@ -141,19 +143,20 @@
141
  "max_length": 20,
142
  "min_length": 0,
143
  "mlp_ratio": 4.0,
144
- "model_type": "donut-swin",
145
  "no_repeat_ngram_size": 0,
146
  "num_beam_groups": 1,
147
  "num_beams": 1,
148
  "num_channels": 3,
149
  "num_heads": [
150
- 4,
151
- 8,
152
- 16,
153
- 32
154
  ],
155
  "num_layers": 4,
156
  "num_return_sequences": 1,
 
157
  "output_attentions": false,
158
  "output_hidden_states": false,
159
  "output_scores": false,
@@ -169,6 +172,13 @@
169
  "return_dict": true,
170
  "return_dict_in_generate": false,
171
  "sep_token_id": null,
 
 
 
 
 
 
 
172
  "suppress_tokens": null,
173
  "task_specific_params": null,
174
  "temperature": 1.0,
@@ -184,7 +194,7 @@
184
  "typical_p": 1.0,
185
  "use_absolute_embeddings": false,
186
  "use_bfloat16": false,
187
- "window_size": 10
188
  },
189
  "is_encoder_decoder": true,
190
  "model_type": "vision-encoder-decoder",
 
1
  {
2
+ "_commit_hash": null,
3
+ "_name_or_path": "my-model",
4
  "architectures": [
5
  "VisionEncoderDecoderModel"
6
  ],
 
9
  "activation_dropout": 0.0,
10
  "activation_function": "gelu",
11
  "add_cross_attention": true,
 
12
  "architectures": null,
13
  "attention_dropout": 0.0,
14
  "bad_words_ids": null,
 
21
  "decoder_attention_heads": 16,
22
  "decoder_ffn_dim": 4096,
23
  "decoder_layerdrop": 0.0,
24
+ "decoder_layers": 12,
25
+ "decoder_start_token_id": 2,
26
  "diversity_penalty": 0.0,
27
  "do_sample": false,
28
  "dropout": 0.1,
 
39
  "forced_eos_token_id": 2,
40
  "id2label": {
41
  "0": "LABEL_0",
42
+ "1": "LABEL_1",
43
+ "2": "LABEL_2"
44
  },
45
  "init_std": 0.02,
46
  "is_decoder": true,
47
+ "is_encoder_decoder": true,
48
  "label2id": {
49
  "LABEL_0": 0,
50
+ "LABEL_1": 1,
51
+ "LABEL_2": 2
52
  },
53
  "length_penalty": 1.0,
54
  "max_length": 512,
55
+ "max_position_embeddings": 1024,
56
  "min_length": 0,
57
+ "model_type": "bart",
58
  "no_repeat_ngram_size": 0,
59
  "num_beam_groups": 1,
60
  "num_beams": 1,
 
71
  "repetition_penalty": 1.0,
72
  "return_dict": true,
73
  "return_dict_in_generate": false,
74
+ "scale_embedding": false,
75
  "sep_token_id": null,
76
  "suppress_tokens": null,
77
  "task_specific_params": null,
 
88
  "typical_p": 1.0,
89
  "use_bfloat16": false,
90
  "use_cache": true,
91
+ "vocab_size": 57531
92
  },
93
  "decoder_start_token_id": 0,
94
  "encoder": {
 
105
  "depths": [
106
  2,
107
  2,
108
+ 6,
109
  2
110
  ],
111
  "diversity_penalty": 0.0,
112
  "do_sample": false,
113
  "drop_path_rate": 0.1,
114
  "early_stopping": false,
115
+ "embed_dim": 96,
116
  "encoder_no_repeat_ngram_size": 0,
117
+ "encoder_stride": 32,
118
  "eos_token_id": null,
119
  "exponential_decay_length_penalty": null,
120
  "finetuning_task": null,
 
122
  "forced_eos_token_id": null,
123
  "hidden_act": "gelu",
124
  "hidden_dropout_prob": 0.0,
125
+ "hidden_size": 768,
126
  "id2label": {
127
  "0": "LABEL_0",
128
  "1": "LABEL_1"
 
143
  "max_length": 20,
144
  "min_length": 0,
145
  "mlp_ratio": 4.0,
146
+ "model_type": "swin",
147
  "no_repeat_ngram_size": 0,
148
  "num_beam_groups": 1,
149
  "num_beams": 1,
150
  "num_channels": 3,
151
  "num_heads": [
152
+ 3,
153
+ 6,
154
+ 12,
155
+ 24
156
  ],
157
  "num_layers": 4,
158
  "num_return_sequences": 1,
159
+ "out_features": null,
160
  "output_attentions": false,
161
  "output_hidden_states": false,
162
  "output_scores": false,
 
172
  "return_dict": true,
173
  "return_dict_in_generate": false,
174
  "sep_token_id": null,
175
+ "stage_names": [
176
+ "stem",
177
+ "stage1",
178
+ "stage2",
179
+ "stage3",
180
+ "stage4"
181
+ ],
182
  "suppress_tokens": null,
183
  "task_specific_params": null,
184
  "temperature": 1.0,
 
194
  "typical_p": 1.0,
195
  "use_absolute_embeddings": false,
196
  "use_bfloat16": false,
197
+ "window_size": 7
198
  },
199
  "is_encoder_decoder": true,
200
  "model_type": "vision-encoder-decoder",
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28371c96c2cab1325beccd7fc9399470744a69401e48132934c43ba0754fe5d5
3
- size 809224571
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97d177e779ce583dced643e828989e74f48543e43cbf7ef0478e38d07e688e50
3
+ size 1159744119
runs/Oct13_01-25-31_DESKTOP-P4C2GQ0/1697128089.3916984/events.out.tfevents.1697128089.DESKTOP-P4C2GQ0.10744.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e0ba9712b34e7de9660002d192c76a05c112d8759b3879d6d58417858355a86
3
+ size 5916
runs/Oct13_01-25-31_DESKTOP-P4C2GQ0/events.out.tfevents.1697128089.DESKTOP-P4C2GQ0.10744.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29599b1e66798d81d4d97f7972bfdc8d178aff073812c60d77319814e0554e60
3
+ size 8427
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3bef1682a67c4ab58f0c94196b1753c267bf7c2d2b973ccf18662f2461d27953
3
  size 3695
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07f593c5ebd0f2acbe50f60ed97f0ddac8f32157f049871c3a5f78ac37daa151
3
  size 3695