Training in progress, epoch 1
Browse files
config.json
CHANGED
@@ -4,117 +4,126 @@
|
|
4 |
],
|
5 |
"decoder": {
|
6 |
"_attn_implementation_autoset": true,
|
7 |
-
"_name_or_path": "
|
8 |
-
"
|
9 |
-
"activation_function": "gelu",
|
10 |
"add_cross_attention": true,
|
11 |
"architectures": [
|
12 |
-
"
|
13 |
],
|
14 |
-
"
|
15 |
"bad_words_ids": null,
|
16 |
"begin_suppress_tokens": null,
|
17 |
-
"bos_token_id":
|
18 |
"chunk_size_feed_forward": 0,
|
19 |
-
"classifier_dropout": 0.0,
|
20 |
"cross_attention_hidden_size": null,
|
21 |
-
"
|
22 |
-
"decoder_attention_heads": 16,
|
23 |
-
"decoder_ffn_dim": 4096,
|
24 |
-
"decoder_layerdrop": 0.0,
|
25 |
-
"decoder_layers": 12,
|
26 |
-
"decoder_start_token_id": 0,
|
27 |
"diversity_penalty": 0.0,
|
28 |
"do_sample": false,
|
29 |
-
"dropout": 0.1,
|
30 |
"early_stopping": true,
|
31 |
-
"
|
32 |
-
"encoder_ffn_dim": 4096,
|
33 |
-
"encoder_layerdrop": 0.0,
|
34 |
-
"encoder_layers": 12,
|
35 |
"encoder_no_repeat_ngram_size": 0,
|
36 |
-
"eos_token_id":
|
37 |
"exponential_decay_length_penalty": null,
|
38 |
"finetuning_task": null,
|
39 |
"forced_bos_token_id": null,
|
40 |
-
"forced_eos_token_id":
|
41 |
-
"gradient_checkpointing": false,
|
42 |
"id2label": {
|
43 |
"0": "LABEL_0",
|
44 |
"1": "LABEL_1"
|
45 |
},
|
46 |
-
"
|
47 |
"is_decoder": true,
|
48 |
"is_encoder_decoder": false,
|
49 |
"label2id": {
|
50 |
"LABEL_0": 0,
|
51 |
"LABEL_1": 1
|
52 |
},
|
|
|
53 |
"length_penalty": 2.0,
|
54 |
"max_length": 29,
|
55 |
-
"max_position_embeddings": 1024,
|
56 |
"min_length": 0,
|
57 |
-
"model_type": "
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
"no_repeat_ngram_size": 3,
|
59 |
"num_beam_groups": 1,
|
60 |
"num_beams": 4,
|
61 |
-
"num_hidden_layers": 12,
|
62 |
"num_return_sequences": 1,
|
63 |
"output_attentions": false,
|
64 |
"output_hidden_states": false,
|
65 |
"output_scores": false,
|
66 |
-
"pad_token_id":
|
67 |
"prefix": null,
|
68 |
"problem_type": null,
|
69 |
"pruned_heads": {},
|
70 |
"remove_invalid_values": false,
|
|
|
71 |
"repetition_penalty": 1.0,
|
|
|
72 |
"return_dict": true,
|
73 |
"return_dict_in_generate": false,
|
74 |
-
"
|
|
|
75 |
"sep_token_id": null,
|
|
|
|
|
|
|
|
|
|
|
76 |
"suppress_tokens": null,
|
77 |
-
"task_specific_params":
|
|
|
|
|
|
|
|
|
|
|
78 |
"temperature": 1.0,
|
79 |
"tf_legacy_loss": false,
|
80 |
"tie_encoder_decoder": false,
|
81 |
"tie_word_embeddings": true,
|
82 |
-
"tokenizer_class":
|
83 |
"top_k": 50,
|
84 |
"top_p": 1.0,
|
85 |
-
"torch_dtype":
|
86 |
"torchscript": false,
|
87 |
"typical_p": 1.0,
|
88 |
"use_bfloat16": false,
|
89 |
"use_cache": true,
|
90 |
-
"vocab_size":
|
91 |
},
|
92 |
-
"decoder_start_token_id":
|
93 |
"early_stopping": null,
|
94 |
"encoder": {
|
95 |
"_attn_implementation_autoset": true,
|
96 |
-
"_name_or_path": "microsoft/
|
97 |
"add_cross_attention": false,
|
98 |
-
"add_fpn": false,
|
99 |
"architectures": [
|
100 |
-
"
|
101 |
],
|
102 |
"attention_probs_dropout_prob": 0.0,
|
103 |
-
"auxiliary_channels": 256,
|
104 |
-
"auxiliary_concat_input": false,
|
105 |
-
"auxiliary_loss_weight": 0.4,
|
106 |
-
"auxiliary_num_convs": 1,
|
107 |
"bad_words_ids": null,
|
108 |
"begin_suppress_tokens": null,
|
109 |
"bos_token_id": null,
|
110 |
"chunk_size_feed_forward": 0,
|
111 |
"cross_attention_hidden_size": null,
|
112 |
"decoder_start_token_id": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
"diversity_penalty": 0.0,
|
114 |
"do_sample": false,
|
115 |
"drop_path_rate": 0.1,
|
116 |
"early_stopping": false,
|
|
|
117 |
"encoder_no_repeat_ngram_size": 0,
|
|
|
118 |
"eos_token_id": null,
|
119 |
"exponential_decay_length_penalty": null,
|
120 |
"finetuning_task": null,
|
@@ -122,7 +131,7 @@
|
|
122 |
"forced_eos_token_id": null,
|
123 |
"hidden_act": "gelu",
|
124 |
"hidden_dropout_prob": 0.0,
|
125 |
-
"hidden_size":
|
126 |
"id2label": {
|
127 |
"0": "organism, being",
|
128 |
"1": "benthos",
|
@@ -21966,9 +21975,8 @@
|
|
21966 |
"21841": "chipboard, hardboard",
|
21967 |
"21842": "knothole"
|
21968 |
},
|
21969 |
-
"image_size":
|
21970 |
"initializer_range": 0.02,
|
21971 |
-
"intermediate_size": 4096,
|
21972 |
"is_decoder": false,
|
21973 |
"is_encoder_decoder": false,
|
21974 |
"label2id": {
|
@@ -43211,72 +43219,51 @@
|
|
43211 |
"zwieback, rusk, Brussels_biscuit, twice-baked_bread": 12729,
|
43212 |
"zygospore": 21630
|
43213 |
},
|
43214 |
-
"layer_norm_eps": 1e-
|
43215 |
-
"layer_scale_init_value": 0.1,
|
43216 |
"length_penalty": 1.0,
|
43217 |
"max_length": 20,
|
43218 |
"min_length": 0,
|
43219 |
-
"
|
|
|
43220 |
"no_repeat_ngram_size": 0,
|
43221 |
-
"num_attention_heads": 16,
|
43222 |
"num_beam_groups": 1,
|
43223 |
"num_beams": 1,
|
43224 |
"num_channels": 3,
|
43225 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
43226 |
"num_return_sequences": 1,
|
43227 |
"out_features": [
|
43228 |
-
"
|
43229 |
],
|
43230 |
"out_indices": [
|
43231 |
-
|
43232 |
],
|
43233 |
"output_attentions": false,
|
43234 |
"output_hidden_states": false,
|
43235 |
"output_scores": false,
|
43236 |
"pad_token_id": null,
|
43237 |
-
"patch_size":
|
43238 |
-
"
|
43239 |
-
1,
|
43240 |
-
2,
|
43241 |
-
3,
|
43242 |
-
6
|
43243 |
-
],
|
43244 |
"prefix": null,
|
43245 |
"problem_type": null,
|
43246 |
"pruned_heads": {},
|
|
|
43247 |
"remove_invalid_values": false,
|
43248 |
"repetition_penalty": 1.0,
|
43249 |
-
"reshape_hidden_states": true,
|
43250 |
"return_dict": true,
|
43251 |
"return_dict_in_generate": false,
|
43252 |
-
"semantic_loss_ignore_index": 255,
|
43253 |
"sep_token_id": null,
|
43254 |
"stage_names": [
|
43255 |
"stem",
|
43256 |
"stage1",
|
43257 |
"stage2",
|
43258 |
"stage3",
|
43259 |
-
"stage4"
|
43260 |
-
"stage5",
|
43261 |
-
"stage6",
|
43262 |
-
"stage7",
|
43263 |
-
"stage8",
|
43264 |
-
"stage9",
|
43265 |
-
"stage10",
|
43266 |
-
"stage11",
|
43267 |
-
"stage12",
|
43268 |
-
"stage13",
|
43269 |
-
"stage14",
|
43270 |
-
"stage15",
|
43271 |
-
"stage16",
|
43272 |
-
"stage17",
|
43273 |
-
"stage18",
|
43274 |
-
"stage19",
|
43275 |
-
"stage20",
|
43276 |
-
"stage21",
|
43277 |
-
"stage22",
|
43278 |
-
"stage23",
|
43279 |
-
"stage24"
|
43280 |
],
|
43281 |
"suppress_tokens": null,
|
43282 |
"task_specific_params": null,
|
@@ -43290,26 +43277,21 @@
|
|
43290 |
"torch_dtype": "float32",
|
43291 |
"torchscript": false,
|
43292 |
"typical_p": 1.0,
|
43293 |
-
"
|
43294 |
-
"use_auxiliary_head": true,
|
43295 |
"use_bfloat16": false,
|
43296 |
-
"
|
43297 |
-
"use_mean_pooling": true,
|
43298 |
-
"use_relative_position_bias": true,
|
43299 |
-
"use_shared_relative_position_bias": false,
|
43300 |
-
"vocab_size": 8192
|
43301 |
},
|
43302 |
-
"eos_token_id":
|
43303 |
"is_encoder_decoder": true,
|
43304 |
"length_penalty": null,
|
43305 |
"max_length": null,
|
43306 |
"model_type": "vision-encoder-decoder",
|
43307 |
"no_repeat_ngram_size": null,
|
43308 |
"num_beams": null,
|
43309 |
-
"pad_token_id":
|
43310 |
"tie_word_embeddings": false,
|
43311 |
"torch_dtype": "float32",
|
43312 |
"transformers_version": "4.46.2",
|
43313 |
"use_cache": false,
|
43314 |
-
"vocab_size":
|
43315 |
}
|
|
|
4 |
],
|
5 |
"decoder": {
|
6 |
"_attn_implementation_autoset": true,
|
7 |
+
"_name_or_path": "NlpHUST/gpt2-vietnamese",
|
8 |
+
"activation_function": "gelu_new",
|
|
|
9 |
"add_cross_attention": true,
|
10 |
"architectures": [
|
11 |
+
"GPT2LMHeadModel"
|
12 |
],
|
13 |
+
"attn_pdrop": 0.0,
|
14 |
"bad_words_ids": null,
|
15 |
"begin_suppress_tokens": null,
|
16 |
+
"bos_token_id": 50256,
|
17 |
"chunk_size_feed_forward": 0,
|
|
|
18 |
"cross_attention_hidden_size": null,
|
19 |
+
"decoder_start_token_id": 50257,
|
|
|
|
|
|
|
|
|
|
|
20 |
"diversity_penalty": 0.0,
|
21 |
"do_sample": false,
|
|
|
22 |
"early_stopping": true,
|
23 |
+
"embd_pdrop": 0.0,
|
|
|
|
|
|
|
24 |
"encoder_no_repeat_ngram_size": 0,
|
25 |
+
"eos_token_id": 50257,
|
26 |
"exponential_decay_length_penalty": null,
|
27 |
"finetuning_task": null,
|
28 |
"forced_bos_token_id": null,
|
29 |
+
"forced_eos_token_id": null,
|
|
|
30 |
"id2label": {
|
31 |
"0": "LABEL_0",
|
32 |
"1": "LABEL_1"
|
33 |
},
|
34 |
+
"initializer_range": 0.02,
|
35 |
"is_decoder": true,
|
36 |
"is_encoder_decoder": false,
|
37 |
"label2id": {
|
38 |
"LABEL_0": 0,
|
39 |
"LABEL_1": 1
|
40 |
},
|
41 |
+
"layer_norm_epsilon": 1e-05,
|
42 |
"length_penalty": 2.0,
|
43 |
"max_length": 29,
|
|
|
44 |
"min_length": 0,
|
45 |
+
"model_type": "gpt2",
|
46 |
+
"n_ctx": 1024,
|
47 |
+
"n_embd": 768,
|
48 |
+
"n_head": 12,
|
49 |
+
"n_inner": null,
|
50 |
+
"n_layer": 12,
|
51 |
+
"n_positions": 1024,
|
52 |
"no_repeat_ngram_size": 3,
|
53 |
"num_beam_groups": 1,
|
54 |
"num_beams": 4,
|
|
|
55 |
"num_return_sequences": 1,
|
56 |
"output_attentions": false,
|
57 |
"output_hidden_states": false,
|
58 |
"output_scores": false,
|
59 |
+
"pad_token_id": 50258,
|
60 |
"prefix": null,
|
61 |
"problem_type": null,
|
62 |
"pruned_heads": {},
|
63 |
"remove_invalid_values": false,
|
64 |
+
"reorder_and_upcast_attn": false,
|
65 |
"repetition_penalty": 1.0,
|
66 |
+
"resid_pdrop": 0.0,
|
67 |
"return_dict": true,
|
68 |
"return_dict_in_generate": false,
|
69 |
+
"scale_attn_by_inverse_layer_idx": false,
|
70 |
+
"scale_attn_weights": true,
|
71 |
"sep_token_id": null,
|
72 |
+
"summary_activation": null,
|
73 |
+
"summary_first_dropout": 0.1,
|
74 |
+
"summary_proj_to_labels": true,
|
75 |
+
"summary_type": "cls_index",
|
76 |
+
"summary_use_proj": true,
|
77 |
"suppress_tokens": null,
|
78 |
+
"task_specific_params": {
|
79 |
+
"text-generation": {
|
80 |
+
"do_sample": true,
|
81 |
+
"max_length": 50
|
82 |
+
}
|
83 |
+
},
|
84 |
"temperature": 1.0,
|
85 |
"tf_legacy_loss": false,
|
86 |
"tie_encoder_decoder": false,
|
87 |
"tie_word_embeddings": true,
|
88 |
+
"tokenizer_class": null,
|
89 |
"top_k": 50,
|
90 |
"top_p": 1.0,
|
91 |
+
"torch_dtype": null,
|
92 |
"torchscript": false,
|
93 |
"typical_p": 1.0,
|
94 |
"use_bfloat16": false,
|
95 |
"use_cache": true,
|
96 |
+
"vocab_size": 50259
|
97 |
},
|
98 |
+
"decoder_start_token_id": 50257,
|
99 |
"early_stopping": null,
|
100 |
"encoder": {
|
101 |
"_attn_implementation_autoset": true,
|
102 |
+
"_name_or_path": "microsoft/swin-large-patch4-window12-384-in22k",
|
103 |
"add_cross_attention": false,
|
|
|
104 |
"architectures": [
|
105 |
+
"SwinForImageClassification"
|
106 |
],
|
107 |
"attention_probs_dropout_prob": 0.0,
|
|
|
|
|
|
|
|
|
108 |
"bad_words_ids": null,
|
109 |
"begin_suppress_tokens": null,
|
110 |
"bos_token_id": null,
|
111 |
"chunk_size_feed_forward": 0,
|
112 |
"cross_attention_hidden_size": null,
|
113 |
"decoder_start_token_id": null,
|
114 |
+
"depths": [
|
115 |
+
2,
|
116 |
+
2,
|
117 |
+
18,
|
118 |
+
2
|
119 |
+
],
|
120 |
"diversity_penalty": 0.0,
|
121 |
"do_sample": false,
|
122 |
"drop_path_rate": 0.1,
|
123 |
"early_stopping": false,
|
124 |
+
"embed_dim": 192,
|
125 |
"encoder_no_repeat_ngram_size": 0,
|
126 |
+
"encoder_stride": 32,
|
127 |
"eos_token_id": null,
|
128 |
"exponential_decay_length_penalty": null,
|
129 |
"finetuning_task": null,
|
|
|
131 |
"forced_eos_token_id": null,
|
132 |
"hidden_act": "gelu",
|
133 |
"hidden_dropout_prob": 0.0,
|
134 |
+
"hidden_size": 1536,
|
135 |
"id2label": {
|
136 |
"0": "organism, being",
|
137 |
"1": "benthos",
|
|
|
21975 |
"21841": "chipboard, hardboard",
|
21976 |
"21842": "knothole"
|
21977 |
},
|
21978 |
+
"image_size": 384,
|
21979 |
"initializer_range": 0.02,
|
|
|
21980 |
"is_decoder": false,
|
21981 |
"is_encoder_decoder": false,
|
21982 |
"label2id": {
|
|
|
43219 |
"zwieback, rusk, Brussels_biscuit, twice-baked_bread": 12729,
|
43220 |
"zygospore": 21630
|
43221 |
},
|
43222 |
+
"layer_norm_eps": 1e-05,
|
|
|
43223 |
"length_penalty": 1.0,
|
43224 |
"max_length": 20,
|
43225 |
"min_length": 0,
|
43226 |
+
"mlp_ratio": 4.0,
|
43227 |
+
"model_type": "swin",
|
43228 |
"no_repeat_ngram_size": 0,
|
|
|
43229 |
"num_beam_groups": 1,
|
43230 |
"num_beams": 1,
|
43231 |
"num_channels": 3,
|
43232 |
+
"num_heads": [
|
43233 |
+
6,
|
43234 |
+
12,
|
43235 |
+
24,
|
43236 |
+
48
|
43237 |
+
],
|
43238 |
+
"num_layers": 4,
|
43239 |
"num_return_sequences": 1,
|
43240 |
"out_features": [
|
43241 |
+
"stage4"
|
43242 |
],
|
43243 |
"out_indices": [
|
43244 |
+
4
|
43245 |
],
|
43246 |
"output_attentions": false,
|
43247 |
"output_hidden_states": false,
|
43248 |
"output_scores": false,
|
43249 |
"pad_token_id": null,
|
43250 |
+
"patch_size": 4,
|
43251 |
+
"path_norm": true,
|
|
|
|
|
|
|
|
|
|
|
43252 |
"prefix": null,
|
43253 |
"problem_type": null,
|
43254 |
"pruned_heads": {},
|
43255 |
+
"qkv_bias": true,
|
43256 |
"remove_invalid_values": false,
|
43257 |
"repetition_penalty": 1.0,
|
|
|
43258 |
"return_dict": true,
|
43259 |
"return_dict_in_generate": false,
|
|
|
43260 |
"sep_token_id": null,
|
43261 |
"stage_names": [
|
43262 |
"stem",
|
43263 |
"stage1",
|
43264 |
"stage2",
|
43265 |
"stage3",
|
43266 |
+
"stage4"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43267 |
],
|
43268 |
"suppress_tokens": null,
|
43269 |
"task_specific_params": null,
|
|
|
43277 |
"torch_dtype": "float32",
|
43278 |
"torchscript": false,
|
43279 |
"typical_p": 1.0,
|
43280 |
+
"use_absolute_embeddings": false,
|
|
|
43281 |
"use_bfloat16": false,
|
43282 |
+
"window_size": 12
|
|
|
|
|
|
|
|
|
43283 |
},
|
43284 |
+
"eos_token_id": 50257,
|
43285 |
"is_encoder_decoder": true,
|
43286 |
"length_penalty": null,
|
43287 |
"max_length": null,
|
43288 |
"model_type": "vision-encoder-decoder",
|
43289 |
"no_repeat_ngram_size": null,
|
43290 |
"num_beams": null,
|
43291 |
+
"pad_token_id": 50258,
|
43292 |
"tie_word_embeddings": false,
|
43293 |
"torch_dtype": "float32",
|
43294 |
"transformers_version": "4.46.2",
|
43295 |
"use_cache": false,
|
43296 |
+
"vocab_size": 50257
|
43297 |
}
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca33e1051c71c8ea69c5164c539dd6e008f63e75c9bd2621b784e310bcfce3aa
|
3 |
+
size 1400816568
|
preprocessor_config.json
CHANGED
@@ -1,24 +1,22 @@
|
|
1 |
{
|
2 |
-
"crop_size": 224,
|
3 |
-
"do_center_crop": false,
|
4 |
"do_normalize": true,
|
5 |
"do_rescale": true,
|
6 |
"do_resize": true,
|
7 |
"image_mean": [
|
8 |
-
0.
|
9 |
-
0.
|
10 |
-
0.
|
11 |
],
|
12 |
"image_processor_type": "ViTImageProcessor",
|
13 |
"image_std": [
|
14 |
-
0.
|
15 |
-
0.
|
16 |
-
0.
|
17 |
],
|
18 |
-
"resample":
|
19 |
"rescale_factor": 0.00392156862745098,
|
20 |
"size": {
|
21 |
-
"height":
|
22 |
-
"width":
|
23 |
}
|
24 |
}
|
|
|
1 |
{
|
|
|
|
|
2 |
"do_normalize": true,
|
3 |
"do_rescale": true,
|
4 |
"do_resize": true,
|
5 |
"image_mean": [
|
6 |
+
0.485,
|
7 |
+
0.456,
|
8 |
+
0.406
|
9 |
],
|
10 |
"image_processor_type": "ViTImageProcessor",
|
11 |
"image_std": [
|
12 |
+
0.229,
|
13 |
+
0.224,
|
14 |
+
0.225
|
15 |
],
|
16 |
+
"resample": 3,
|
17 |
"rescale_factor": 0.00392156862745098,
|
18 |
"size": {
|
19 |
+
"height": 384,
|
20 |
+
"width": 384
|
21 |
}
|
22 |
}
|
runs/Nov30_13-17-05_3a55e4dea1ac/events.out.tfevents.1732972630.3a55e4dea1ac.847.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ecbcadf998c153e97334047eae0ceb3d4a5162b8b24b9d1e244e7f543dd6d9f
|
3 |
+
size 1761368
|
runs/Nov30_13-18-52_3a55e4dea1ac/events.out.tfevents.1732972733.3a55e4dea1ac.4166.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cbe16f6d7ed2097b0ee9fbb458e3312e11e89661f1d967f708dc2d15df38631d
|
3 |
+
size 1762242
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5432
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2fa015fb227493245f67922f6a49ea97cc1ef194da74222fb821d691980f64bd
|
3 |
size 5432
|