Training in progress, step 10000

Files changed (4) hide show

config.json CHANGED Viewed

@@ -3,7 +3,7 @@
     "EncoderDecoderModel"
   ],
   "decoder": {
-    "_name_or_path": "gpt2-medium",
     "activation_function": "gelu_new",
     "add_cross_attention": true,
     "architectures": [
@@ -43,12 +43,11 @@
     "min_length": 0,
     "model_type": "gpt2",
     "n_ctx": 1024,
-    "n_embd": 1024,
-    "n_head": 16,
     "n_inner": null,
-    "n_layer": 24,
     "n_positions": 1024,
-    "n_special": 0,
     "no_repeat_ngram_size": 0,
     "num_beam_groups": 1,
     "num_beams": 1,
@@ -57,7 +56,6 @@
     "output_hidden_states": false,
     "output_scores": false,
     "pad_token_id": null,
-    "predict_special_tokens": true,
     "prefix": null,
     "problem_type": null,
     "pruned_heads": {},
@@ -99,7 +97,7 @@
   "decoder_start_token_id": 50256,
   "early_stopping": true,
   "encoder": {
-    "_name_or_path": "gpt2-medium",
     "activation_function": "gelu_new",
     "add_cross_attention": false,
     "architectures": [
@@ -139,12 +137,11 @@
     "min_length": 0,
     "model_type": "gpt2",
     "n_ctx": 1024,
-    "n_embd": 1024,
-    "n_head": 16,
     "n_inner": null,
-    "n_layer": 24,
     "n_positions": 1024,
-    "n_special": 0,
     "no_repeat_ngram_size": 0,
     "num_beam_groups": 1,
     "num_beams": 1,
@@ -153,7 +150,6 @@
     "output_hidden_states": false,
     "output_scores": false,
     "pad_token_id": null,
-    "predict_special_tokens": true,
     "prefix": null,
     "problem_type": null,
     "pruned_heads": {},

     "EncoderDecoderModel"
   ],
   "decoder": {
+    "_name_or_path": "gpt2",
     "activation_function": "gelu_new",
     "add_cross_attention": true,
     "architectures": [
     "min_length": 0,
     "model_type": "gpt2",
     "n_ctx": 1024,
+    "n_embd": 768,
+    "n_head": 12,
     "n_inner": null,
+    "n_layer": 12,
     "n_positions": 1024,
     "no_repeat_ngram_size": 0,
     "num_beam_groups": 1,
     "num_beams": 1,
     "output_hidden_states": false,
     "output_scores": false,
     "pad_token_id": null,
     "prefix": null,
     "problem_type": null,
     "pruned_heads": {},
   "decoder_start_token_id": 50256,
   "early_stopping": true,
   "encoder": {
+    "_name_or_path": "gpt2",
     "activation_function": "gelu_new",
     "add_cross_attention": false,
     "architectures": [
     "min_length": 0,
     "model_type": "gpt2",
     "n_ctx": 1024,
+    "n_embd": 768,
+    "n_head": 12,
     "n_inner": null,
+    "n_layer": 12,
     "n_positions": 1024,
     "no_repeat_ngram_size": 0,
     "num_beam_groups": 1,
     "num_beams": 1,
     "output_hidden_states": false,
     "output_scores": false,
     "pad_token_id": null,
     "prefix": null,
     "problem_type": null,
     "pruned_heads": {},

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:288b077c28664ad5927da68b425eb9443783dd3cfcce26158bba6c8250a019ce
-size 3241914192

 version https://git-lfs.github.com/spec/v1
+oid sha256:156baf145c6c3138bb884a0697f63f3ed8eaf7c80e9aac30595fccf2f0835c85
+size 1109028072

runs/Mar12_19-24-38_jn020/events.out.tfevents.1741807628.jn020.398992.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:80517c41d1357f5739a2d8930bf5f45eed02cc213e2d7a48b85117681a820ef8
-size 17830

 version https://git-lfs.github.com/spec/v1
+oid sha256:d765591789b9f7b6e25f78c782cdc7a40931653af3e449479d1299a1b753e63b
+size 18726

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d4ee502e259362ba17c64127370d3ac7405bbaafc9a77a5bc47fea3fcee1a99a
 size 5432

 version https://git-lfs.github.com/spec/v1
+oid sha256:04ec21e70451935fe7ccf4f25e474b7a4ce1a008a140f12928eefecbc9a40e33
 size 5432