Skyler215 commited on
Commit
a27bd19
·
verified ·
1 Parent(s): 7a7c055

Training in progress, epoch 1

Browse files
config.json CHANGED
@@ -4,117 +4,126 @@
4
  ],
5
  "decoder": {
6
  "_attn_implementation_autoset": true,
7
- "_name_or_path": "vinai/bartpho-syllable",
8
- "activation_dropout": 0.0,
9
- "activation_function": "gelu",
10
  "add_cross_attention": true,
11
  "architectures": [
12
- "MBartModel"
13
  ],
14
- "attention_dropout": 0.0,
15
  "bad_words_ids": null,
16
  "begin_suppress_tokens": null,
17
- "bos_token_id": 0,
18
  "chunk_size_feed_forward": 0,
19
- "classifier_dropout": 0.0,
20
  "cross_attention_hidden_size": null,
21
- "d_model": 1024,
22
- "decoder_attention_heads": 16,
23
- "decoder_ffn_dim": 4096,
24
- "decoder_layerdrop": 0.0,
25
- "decoder_layers": 12,
26
- "decoder_start_token_id": 0,
27
  "diversity_penalty": 0.0,
28
  "do_sample": false,
29
- "dropout": 0.1,
30
  "early_stopping": true,
31
- "encoder_attention_heads": 16,
32
- "encoder_ffn_dim": 4096,
33
- "encoder_layerdrop": 0.0,
34
- "encoder_layers": 12,
35
  "encoder_no_repeat_ngram_size": 0,
36
- "eos_token_id": 2,
37
  "exponential_decay_length_penalty": null,
38
  "finetuning_task": null,
39
  "forced_bos_token_id": null,
40
- "forced_eos_token_id": 2,
41
- "gradient_checkpointing": false,
42
  "id2label": {
43
  "0": "LABEL_0",
44
  "1": "LABEL_1"
45
  },
46
- "init_std": 0.02,
47
  "is_decoder": true,
48
  "is_encoder_decoder": false,
49
  "label2id": {
50
  "LABEL_0": 0,
51
  "LABEL_1": 1
52
  },
 
53
  "length_penalty": 2.0,
54
  "max_length": 29,
55
- "max_position_embeddings": 1024,
56
  "min_length": 0,
57
- "model_type": "mbart",
 
 
 
 
 
 
58
  "no_repeat_ngram_size": 3,
59
  "num_beam_groups": 1,
60
  "num_beams": 4,
61
- "num_hidden_layers": 12,
62
  "num_return_sequences": 1,
63
  "output_attentions": false,
64
  "output_hidden_states": false,
65
  "output_scores": false,
66
- "pad_token_id": 1,
67
  "prefix": null,
68
  "problem_type": null,
69
  "pruned_heads": {},
70
  "remove_invalid_values": false,
 
71
  "repetition_penalty": 1.0,
 
72
  "return_dict": true,
73
  "return_dict_in_generate": false,
74
- "scale_embedding": false,
 
75
  "sep_token_id": null,
 
 
 
 
 
76
  "suppress_tokens": null,
77
- "task_specific_params": null,
 
 
 
 
 
78
  "temperature": 1.0,
79
  "tf_legacy_loss": false,
80
  "tie_encoder_decoder": false,
81
  "tie_word_embeddings": true,
82
- "tokenizer_class": "BartphoTokenizer",
83
  "top_k": 50,
84
  "top_p": 1.0,
85
- "torch_dtype": "float32",
86
  "torchscript": false,
87
  "typical_p": 1.0,
88
  "use_bfloat16": false,
89
  "use_cache": true,
90
- "vocab_size": 40030
91
  },
92
- "decoder_start_token_id": 0,
93
  "early_stopping": null,
94
  "encoder": {
95
  "_attn_implementation_autoset": true,
96
- "_name_or_path": "microsoft/beit-large-patch16-224-pt22k-ft22k",
97
  "add_cross_attention": false,
98
- "add_fpn": false,
99
  "architectures": [
100
- "BeitForImageClassification"
101
  ],
102
  "attention_probs_dropout_prob": 0.0,
103
- "auxiliary_channels": 256,
104
- "auxiliary_concat_input": false,
105
- "auxiliary_loss_weight": 0.4,
106
- "auxiliary_num_convs": 1,
107
  "bad_words_ids": null,
108
  "begin_suppress_tokens": null,
109
  "bos_token_id": null,
110
  "chunk_size_feed_forward": 0,
111
  "cross_attention_hidden_size": null,
112
  "decoder_start_token_id": null,
 
 
 
 
 
 
113
  "diversity_penalty": 0.0,
114
  "do_sample": false,
115
  "drop_path_rate": 0.1,
116
  "early_stopping": false,
 
117
  "encoder_no_repeat_ngram_size": 0,
 
118
  "eos_token_id": null,
119
  "exponential_decay_length_penalty": null,
120
  "finetuning_task": null,
@@ -122,7 +131,7 @@
122
  "forced_eos_token_id": null,
123
  "hidden_act": "gelu",
124
  "hidden_dropout_prob": 0.0,
125
- "hidden_size": 1024,
126
  "id2label": {
127
  "0": "organism, being",
128
  "1": "benthos",
@@ -21966,9 +21975,8 @@
21966
  "21841": "chipboard, hardboard",
21967
  "21842": "knothole"
21968
  },
21969
- "image_size": 224,
21970
  "initializer_range": 0.02,
21971
- "intermediate_size": 4096,
21972
  "is_decoder": false,
21973
  "is_encoder_decoder": false,
21974
  "label2id": {
@@ -43211,72 +43219,51 @@
43211
  "zwieback, rusk, Brussels_biscuit, twice-baked_bread": 12729,
43212
  "zygospore": 21630
43213
  },
43214
- "layer_norm_eps": 1e-12,
43215
- "layer_scale_init_value": 0.1,
43216
  "length_penalty": 1.0,
43217
  "max_length": 20,
43218
  "min_length": 0,
43219
- "model_type": "beit",
 
43220
  "no_repeat_ngram_size": 0,
43221
- "num_attention_heads": 16,
43222
  "num_beam_groups": 1,
43223
  "num_beams": 1,
43224
  "num_channels": 3,
43225
- "num_hidden_layers": 24,
 
 
 
 
 
 
43226
  "num_return_sequences": 1,
43227
  "out_features": [
43228
- "stage24"
43229
  ],
43230
  "out_indices": [
43231
- 24
43232
  ],
43233
  "output_attentions": false,
43234
  "output_hidden_states": false,
43235
  "output_scores": false,
43236
  "pad_token_id": null,
43237
- "patch_size": 16,
43238
- "pool_scales": [
43239
- 1,
43240
- 2,
43241
- 3,
43242
- 6
43243
- ],
43244
  "prefix": null,
43245
  "problem_type": null,
43246
  "pruned_heads": {},
 
43247
  "remove_invalid_values": false,
43248
  "repetition_penalty": 1.0,
43249
- "reshape_hidden_states": true,
43250
  "return_dict": true,
43251
  "return_dict_in_generate": false,
43252
- "semantic_loss_ignore_index": 255,
43253
  "sep_token_id": null,
43254
  "stage_names": [
43255
  "stem",
43256
  "stage1",
43257
  "stage2",
43258
  "stage3",
43259
- "stage4",
43260
- "stage5",
43261
- "stage6",
43262
- "stage7",
43263
- "stage8",
43264
- "stage9",
43265
- "stage10",
43266
- "stage11",
43267
- "stage12",
43268
- "stage13",
43269
- "stage14",
43270
- "stage15",
43271
- "stage16",
43272
- "stage17",
43273
- "stage18",
43274
- "stage19",
43275
- "stage20",
43276
- "stage21",
43277
- "stage22",
43278
- "stage23",
43279
- "stage24"
43280
  ],
43281
  "suppress_tokens": null,
43282
  "task_specific_params": null,
@@ -43290,26 +43277,21 @@
43290
  "torch_dtype": "float32",
43291
  "torchscript": false,
43292
  "typical_p": 1.0,
43293
- "use_absolute_position_embeddings": false,
43294
- "use_auxiliary_head": true,
43295
  "use_bfloat16": false,
43296
- "use_mask_token": false,
43297
- "use_mean_pooling": true,
43298
- "use_relative_position_bias": true,
43299
- "use_shared_relative_position_bias": false,
43300
- "vocab_size": 8192
43301
  },
43302
- "eos_token_id": 2,
43303
  "is_encoder_decoder": true,
43304
  "length_penalty": null,
43305
  "max_length": null,
43306
  "model_type": "vision-encoder-decoder",
43307
  "no_repeat_ngram_size": null,
43308
  "num_beams": null,
43309
- "pad_token_id": 1,
43310
  "tie_word_embeddings": false,
43311
  "torch_dtype": "float32",
43312
  "transformers_version": "4.46.2",
43313
  "use_cache": false,
43314
- "vocab_size": 40030
43315
  }
 
4
  ],
5
  "decoder": {
6
  "_attn_implementation_autoset": true,
7
+ "_name_or_path": "NlpHUST/gpt2-vietnamese",
8
+ "activation_function": "gelu_new",
 
9
  "add_cross_attention": true,
10
  "architectures": [
11
+ "GPT2LMHeadModel"
12
  ],
13
+ "attn_pdrop": 0.0,
14
  "bad_words_ids": null,
15
  "begin_suppress_tokens": null,
16
+ "bos_token_id": 50256,
17
  "chunk_size_feed_forward": 0,
 
18
  "cross_attention_hidden_size": null,
19
+ "decoder_start_token_id": 50257,
 
 
 
 
 
20
  "diversity_penalty": 0.0,
21
  "do_sample": false,
 
22
  "early_stopping": true,
23
+ "embd_pdrop": 0.0,
 
 
 
24
  "encoder_no_repeat_ngram_size": 0,
25
+ "eos_token_id": 50257,
26
  "exponential_decay_length_penalty": null,
27
  "finetuning_task": null,
28
  "forced_bos_token_id": null,
29
+ "forced_eos_token_id": null,
 
30
  "id2label": {
31
  "0": "LABEL_0",
32
  "1": "LABEL_1"
33
  },
34
+ "initializer_range": 0.02,
35
  "is_decoder": true,
36
  "is_encoder_decoder": false,
37
  "label2id": {
38
  "LABEL_0": 0,
39
  "LABEL_1": 1
40
  },
41
+ "layer_norm_epsilon": 1e-05,
42
  "length_penalty": 2.0,
43
  "max_length": 29,
 
44
  "min_length": 0,
45
+ "model_type": "gpt2",
46
+ "n_ctx": 1024,
47
+ "n_embd": 768,
48
+ "n_head": 12,
49
+ "n_inner": null,
50
+ "n_layer": 12,
51
+ "n_positions": 1024,
52
  "no_repeat_ngram_size": 3,
53
  "num_beam_groups": 1,
54
  "num_beams": 4,
 
55
  "num_return_sequences": 1,
56
  "output_attentions": false,
57
  "output_hidden_states": false,
58
  "output_scores": false,
59
+ "pad_token_id": 50258,
60
  "prefix": null,
61
  "problem_type": null,
62
  "pruned_heads": {},
63
  "remove_invalid_values": false,
64
+ "reorder_and_upcast_attn": false,
65
  "repetition_penalty": 1.0,
66
+ "resid_pdrop": 0.0,
67
  "return_dict": true,
68
  "return_dict_in_generate": false,
69
+ "scale_attn_by_inverse_layer_idx": false,
70
+ "scale_attn_weights": true,
71
  "sep_token_id": null,
72
+ "summary_activation": null,
73
+ "summary_first_dropout": 0.1,
74
+ "summary_proj_to_labels": true,
75
+ "summary_type": "cls_index",
76
+ "summary_use_proj": true,
77
  "suppress_tokens": null,
78
+ "task_specific_params": {
79
+ "text-generation": {
80
+ "do_sample": true,
81
+ "max_length": 50
82
+ }
83
+ },
84
  "temperature": 1.0,
85
  "tf_legacy_loss": false,
86
  "tie_encoder_decoder": false,
87
  "tie_word_embeddings": true,
88
+ "tokenizer_class": null,
89
  "top_k": 50,
90
  "top_p": 1.0,
91
+ "torch_dtype": null,
92
  "torchscript": false,
93
  "typical_p": 1.0,
94
  "use_bfloat16": false,
95
  "use_cache": true,
96
+ "vocab_size": 50259
97
  },
98
+ "decoder_start_token_id": 50257,
99
  "early_stopping": null,
100
  "encoder": {
101
  "_attn_implementation_autoset": true,
102
+ "_name_or_path": "microsoft/swin-large-patch4-window12-384-in22k",
103
  "add_cross_attention": false,
 
104
  "architectures": [
105
+ "SwinForImageClassification"
106
  ],
107
  "attention_probs_dropout_prob": 0.0,
 
 
 
 
108
  "bad_words_ids": null,
109
  "begin_suppress_tokens": null,
110
  "bos_token_id": null,
111
  "chunk_size_feed_forward": 0,
112
  "cross_attention_hidden_size": null,
113
  "decoder_start_token_id": null,
114
+ "depths": [
115
+ 2,
116
+ 2,
117
+ 18,
118
+ 2
119
+ ],
120
  "diversity_penalty": 0.0,
121
  "do_sample": false,
122
  "drop_path_rate": 0.1,
123
  "early_stopping": false,
124
+ "embed_dim": 192,
125
  "encoder_no_repeat_ngram_size": 0,
126
+ "encoder_stride": 32,
127
  "eos_token_id": null,
128
  "exponential_decay_length_penalty": null,
129
  "finetuning_task": null,
 
131
  "forced_eos_token_id": null,
132
  "hidden_act": "gelu",
133
  "hidden_dropout_prob": 0.0,
134
+ "hidden_size": 1536,
135
  "id2label": {
136
  "0": "organism, being",
137
  "1": "benthos",
 
21975
  "21841": "chipboard, hardboard",
21976
  "21842": "knothole"
21977
  },
21978
+ "image_size": 384,
21979
  "initializer_range": 0.02,
 
21980
  "is_decoder": false,
21981
  "is_encoder_decoder": false,
21982
  "label2id": {
 
43219
  "zwieback, rusk, Brussels_biscuit, twice-baked_bread": 12729,
43220
  "zygospore": 21630
43221
  },
43222
+ "layer_norm_eps": 1e-05,
 
43223
  "length_penalty": 1.0,
43224
  "max_length": 20,
43225
  "min_length": 0,
43226
+ "mlp_ratio": 4.0,
43227
+ "model_type": "swin",
43228
  "no_repeat_ngram_size": 0,
 
43229
  "num_beam_groups": 1,
43230
  "num_beams": 1,
43231
  "num_channels": 3,
43232
+ "num_heads": [
43233
+ 6,
43234
+ 12,
43235
+ 24,
43236
+ 48
43237
+ ],
43238
+ "num_layers": 4,
43239
  "num_return_sequences": 1,
43240
  "out_features": [
43241
+ "stage4"
43242
  ],
43243
  "out_indices": [
43244
+ 4
43245
  ],
43246
  "output_attentions": false,
43247
  "output_hidden_states": false,
43248
  "output_scores": false,
43249
  "pad_token_id": null,
43250
+ "patch_size": 4,
43251
+ "path_norm": true,
 
 
 
 
 
43252
  "prefix": null,
43253
  "problem_type": null,
43254
  "pruned_heads": {},
43255
+ "qkv_bias": true,
43256
  "remove_invalid_values": false,
43257
  "repetition_penalty": 1.0,
 
43258
  "return_dict": true,
43259
  "return_dict_in_generate": false,
 
43260
  "sep_token_id": null,
43261
  "stage_names": [
43262
  "stem",
43263
  "stage1",
43264
  "stage2",
43265
  "stage3",
43266
+ "stage4"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43267
  ],
43268
  "suppress_tokens": null,
43269
  "task_specific_params": null,
 
43277
  "torch_dtype": "float32",
43278
  "torchscript": false,
43279
  "typical_p": 1.0,
43280
+ "use_absolute_embeddings": false,
 
43281
  "use_bfloat16": false,
43282
+ "window_size": 12
 
 
 
 
43283
  },
43284
+ "eos_token_id": 50257,
43285
  "is_encoder_decoder": true,
43286
  "length_penalty": null,
43287
  "max_length": null,
43288
  "model_type": "vision-encoder-decoder",
43289
  "no_repeat_ngram_size": null,
43290
  "num_beams": null,
43291
+ "pad_token_id": 50258,
43292
  "tie_word_embeddings": false,
43293
  "torch_dtype": "float32",
43294
  "transformers_version": "4.46.2",
43295
  "use_cache": false,
43296
+ "vocab_size": 50257
43297
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f067af818fdaf6489b2d41b308e363ce3c01d12cc18654b5a70aab1ea4ec0e15
3
- size 2188139048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca33e1051c71c8ea69c5164c539dd6e008f63e75c9bd2621b784e310bcfce3aa
3
+ size 1400816568
preprocessor_config.json CHANGED
@@ -1,24 +1,22 @@
1
  {
2
- "crop_size": 224,
3
- "do_center_crop": false,
4
  "do_normalize": true,
5
  "do_rescale": true,
6
  "do_resize": true,
7
  "image_mean": [
8
- 0.5,
9
- 0.5,
10
- 0.5
11
  ],
12
  "image_processor_type": "ViTImageProcessor",
13
  "image_std": [
14
- 0.5,
15
- 0.5,
16
- 0.5
17
  ],
18
- "resample": 2,
19
  "rescale_factor": 0.00392156862745098,
20
  "size": {
21
- "height": 224,
22
- "width": 224
23
  }
24
  }
 
1
  {
 
 
2
  "do_normalize": true,
3
  "do_rescale": true,
4
  "do_resize": true,
5
  "image_mean": [
6
+ 0.485,
7
+ 0.456,
8
+ 0.406
9
  ],
10
  "image_processor_type": "ViTImageProcessor",
11
  "image_std": [
12
+ 0.229,
13
+ 0.224,
14
+ 0.225
15
  ],
16
+ "resample": 3,
17
  "rescale_factor": 0.00392156862745098,
18
  "size": {
19
+ "height": 384,
20
+ "width": 384
21
  }
22
  }
runs/Nov30_13-17-05_3a55e4dea1ac/events.out.tfevents.1732972630.3a55e4dea1ac.847.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ecbcadf998c153e97334047eae0ceb3d4a5162b8b24b9d1e244e7f543dd6d9f
3
+ size 1761368
runs/Nov30_13-18-52_3a55e4dea1ac/events.out.tfevents.1732972733.3a55e4dea1ac.4166.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbe16f6d7ed2097b0ee9fbb458e3312e11e89661f1d967f708dc2d15df38631d
3
+ size 1762242
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:debf02df871fc5dfa6dae15f563b696c84d8ee9b3e8b3212d735c908c3aa8cbc
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fa015fb227493245f67922f6a49ea97cc1ef194da74222fb821d691980f64bd
3
  size 5432