Upload model

Browse files

Files changed (5) hide show

config.json +35 -0
model-00001-of-00003.safetensors +3 -0
model-00002-of-00003.safetensors +3 -0
model-00003-of-00003.safetensors +3 -0
model.safetensors.index.json +71 -0

config.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "_name_or_path": "hf-internal-testing/tiny-random-GPT2Model",
+  "activation_function": "gelu",
+  "architectures": [
+    "GPT2Model"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 0,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 0,
+  "gradient_checkpointing": false,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_embd": 32,
+  "n_head": 4,
+  "n_inner": 37,
+  "n_layer": 5,
+  "n_positions": 512,
+  "pad_token_id": 1023,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.36.0.dev0",
+  "type_vocab_size": 16,
+  "use_cache": true,
+  "vocab_size": 1024
+}

model-00001-of-00003.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:04b8be9038e017518157c113665b09f1e4e15fe0e07d9b8e39a300071c6e3f11
+size 131192

model-00002-of-00003.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:88e8987451ee2cf7a17dee190516fbdce905a37f84c8a4850200327fa2904c2b
+size 150404

model-00003-of-00003.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d21159c7de8e2feae780536f040209133b55136aa892ee9ce991cb5a83ce46f
+size 56256

model.safetensors.index.json ADDED Viewed

	@@ -0,0 +1,71 @@

+{
+  "metadata": {
+    "total_size": 332644
+  },
+  "weight_map": {
+    "h.0.attn.c_attn.bias": "model-00002-of-00003.safetensors",
+    "h.0.attn.c_attn.weight": "model-00002-of-00003.safetensors",
+    "h.0.attn.c_proj.bias": "model-00002-of-00003.safetensors",
+    "h.0.attn.c_proj.weight": "model-00002-of-00003.safetensors",
+    "h.0.ln_1.bias": "model-00002-of-00003.safetensors",
+    "h.0.ln_1.weight": "model-00002-of-00003.safetensors",
+    "h.0.ln_2.bias": "model-00002-of-00003.safetensors",
+    "h.0.ln_2.weight": "model-00002-of-00003.safetensors",
+    "h.0.mlp.c_fc.bias": "model-00002-of-00003.safetensors",
+    "h.0.mlp.c_fc.weight": "model-00002-of-00003.safetensors",
+    "h.0.mlp.c_proj.bias": "model-00002-of-00003.safetensors",
+    "h.0.mlp.c_proj.weight": "model-00002-of-00003.safetensors",
+    "h.1.attn.c_attn.bias": "model-00002-of-00003.safetensors",
+    "h.1.attn.c_attn.weight": "model-00002-of-00003.safetensors",
+    "h.1.attn.c_proj.bias": "model-00002-of-00003.safetensors",
+    "h.1.attn.c_proj.weight": "model-00002-of-00003.safetensors",
+    "h.1.ln_1.bias": "model-00002-of-00003.safetensors",
+    "h.1.ln_1.weight": "model-00002-of-00003.safetensors",
+    "h.1.ln_2.bias": "model-00002-of-00003.safetensors",
+    "h.1.ln_2.weight": "model-00002-of-00003.safetensors",
+    "h.1.mlp.c_fc.bias": "model-00002-of-00003.safetensors",
+    "h.1.mlp.c_fc.weight": "model-00002-of-00003.safetensors",
+    "h.1.mlp.c_proj.bias": "model-00002-of-00003.safetensors",
+    "h.1.mlp.c_proj.weight": "model-00002-of-00003.safetensors",
+    "h.2.attn.c_attn.bias": "model-00002-of-00003.safetensors",
+    "h.2.attn.c_attn.weight": "model-00002-of-00003.safetensors",
+    "h.2.attn.c_proj.bias": "model-00002-of-00003.safetensors",
+    "h.2.attn.c_proj.weight": "model-00002-of-00003.safetensors",
+    "h.2.ln_1.bias": "model-00002-of-00003.safetensors",
+    "h.2.ln_1.weight": "model-00002-of-00003.safetensors",
+    "h.2.ln_2.bias": "model-00002-of-00003.safetensors",
+    "h.2.ln_2.weight": "model-00002-of-00003.safetensors",
+    "h.2.mlp.c_fc.bias": "model-00002-of-00003.safetensors",
+    "h.2.mlp.c_fc.weight": "model-00002-of-00003.safetensors",
+    "h.2.mlp.c_proj.bias": "model-00002-of-00003.safetensors",
+    "h.2.mlp.c_proj.weight": "model-00002-of-00003.safetensors",
+    "h.3.attn.c_attn.bias": "model-00003-of-00003.safetensors",
+    "h.3.attn.c_attn.weight": "model-00003-of-00003.safetensors",
+    "h.3.attn.c_proj.bias": "model-00003-of-00003.safetensors",
+    "h.3.attn.c_proj.weight": "model-00003-of-00003.safetensors",
+    "h.3.ln_1.bias": "model-00002-of-00003.safetensors",
+    "h.3.ln_1.weight": "model-00002-of-00003.safetensors",
+    "h.3.ln_2.bias": "model-00003-of-00003.safetensors",
+    "h.3.ln_2.weight": "model-00003-of-00003.safetensors",
+    "h.3.mlp.c_fc.bias": "model-00003-of-00003.safetensors",
+    "h.3.mlp.c_fc.weight": "model-00003-of-00003.safetensors",
+    "h.3.mlp.c_proj.bias": "model-00003-of-00003.safetensors",
+    "h.3.mlp.c_proj.weight": "model-00003-of-00003.safetensors",
+    "h.4.attn.c_attn.bias": "model-00003-of-00003.safetensors",
+    "h.4.attn.c_attn.weight": "model-00003-of-00003.safetensors",
+    "h.4.attn.c_proj.bias": "model-00003-of-00003.safetensors",
+    "h.4.attn.c_proj.weight": "model-00003-of-00003.safetensors",
+    "h.4.ln_1.bias": "model-00003-of-00003.safetensors",
+    "h.4.ln_1.weight": "model-00003-of-00003.safetensors",
+    "h.4.ln_2.bias": "model-00003-of-00003.safetensors",
+    "h.4.ln_2.weight": "model-00003-of-00003.safetensors",
+    "h.4.mlp.c_fc.bias": "model-00003-of-00003.safetensors",
+    "h.4.mlp.c_fc.weight": "model-00003-of-00003.safetensors",
+    "h.4.mlp.c_proj.bias": "model-00003-of-00003.safetensors",
+    "h.4.mlp.c_proj.weight": "model-00003-of-00003.safetensors",
+    "ln_f.bias": "model-00003-of-00003.safetensors",
+    "ln_f.weight": "model-00003-of-00003.safetensors",
+    "wpe.weight": "model-00002-of-00003.safetensors",
+    "wte.weight": "model-00001-of-00003.safetensors"
+  }
+}