bryanmildort commited on
Commit
a07eb96
1 Parent(s): d311cdf

Upload 9 files

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "EleutherAI/gpt-neo-2.7B",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPTNeoForCausalLM"
@@ -75,7 +75,7 @@
75
  },
76
  "tokenizer_class": "GPT2Tokenizer",
77
  "torch_dtype": "float32",
78
- "transformers_version": "4.26.1",
79
  "use_cache": true,
80
  "vocab_size": 50258,
81
  "window_size": 256
 
1
  {
2
+ "_name_or_path": "./notes-nlp/gpt_neo_notes_pre",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPTNeoForCausalLM"
 
75
  },
76
  "tokenizer_class": "GPT2Tokenizer",
77
  "torch_dtype": "float32",
78
+ "transformers_version": "4.27.2",
79
  "use_cache": true,
80
  "vocab_size": 50258,
81
  "window_size": 256
generation_config.json CHANGED
@@ -2,5 +2,5 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 50256,
4
  "eos_token_id": 50256,
5
- "transformers_version": "4.26.1"
6
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 50256,
4
  "eos_token_id": 50256,
5
+ "transformers_version": "4.27.2"
6
  }
pytorch_model-00001-of-00002.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:62a663d237f36617806f9476302edb76be432a7c8bfb6e8d984211a09fa4bf73
3
- size 9996984169
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8998bd59cf93fe14a866d72a521e63f67ed1b6727bc70496bb4e76fde1ceb84
3
+ size 10106068835
pytorch_model-00002-of-00002.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aae9a578276b8a929c15e85357749068bbe10173f97fc8ddd79d86d78ccb682d
3
- size 742638515
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82cd5c78a022746235382eef2800dd18a0eb542f230d7abb6d17bead95c937a2
3
+ size 633553991
pytorch_model.bin.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 10739458176
4
  },
5
  "weight_map": {
6
  "transformer.h.0.attn.attention.bias": "pytorch_model-00001-of-00002.bin",
@@ -346,8 +346,8 @@
346
  "transformer.h.29.ln_2.weight": "pytorch_model-00001-of-00002.bin",
347
  "transformer.h.29.mlp.c_fc.bias": "pytorch_model-00001-of-00002.bin",
348
  "transformer.h.29.mlp.c_fc.weight": "pytorch_model-00001-of-00002.bin",
349
- "transformer.h.29.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
350
- "transformer.h.29.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
351
  "transformer.h.3.attn.attention.bias": "pytorch_model-00001-of-00002.bin",
352
  "transformer.h.3.attn.attention.k_proj.weight": "pytorch_model-00001-of-00002.bin",
353
  "transformer.h.3.attn.attention.masked_bias": "pytorch_model-00001-of-00002.bin",
@@ -363,15 +363,15 @@
363
  "transformer.h.3.mlp.c_fc.weight": "pytorch_model-00001-of-00002.bin",
364
  "transformer.h.3.mlp.c_proj.bias": "pytorch_model-00001-of-00002.bin",
365
  "transformer.h.3.mlp.c_proj.weight": "pytorch_model-00001-of-00002.bin",
366
- "transformer.h.30.attn.attention.bias": "pytorch_model-00002-of-00002.bin",
367
  "transformer.h.30.attn.attention.k_proj.weight": "pytorch_model-00002-of-00002.bin",
368
- "transformer.h.30.attn.attention.masked_bias": "pytorch_model-00002-of-00002.bin",
369
  "transformer.h.30.attn.attention.out_proj.bias": "pytorch_model-00002-of-00002.bin",
370
  "transformer.h.30.attn.attention.out_proj.weight": "pytorch_model-00002-of-00002.bin",
371
  "transformer.h.30.attn.attention.q_proj.weight": "pytorch_model-00002-of-00002.bin",
372
  "transformer.h.30.attn.attention.v_proj.weight": "pytorch_model-00002-of-00002.bin",
373
- "transformer.h.30.ln_1.bias": "pytorch_model-00002-of-00002.bin",
374
- "transformer.h.30.ln_1.weight": "pytorch_model-00002-of-00002.bin",
375
  "transformer.h.30.ln_2.bias": "pytorch_model-00002-of-00002.bin",
376
  "transformer.h.30.ln_2.weight": "pytorch_model-00002-of-00002.bin",
377
  "transformer.h.30.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
 
1
  {
2
  "metadata": {
3
+ "total_size": 10622017664.0
4
  },
5
  "weight_map": {
6
  "transformer.h.0.attn.attention.bias": "pytorch_model-00001-of-00002.bin",
 
346
  "transformer.h.29.ln_2.weight": "pytorch_model-00001-of-00002.bin",
347
  "transformer.h.29.mlp.c_fc.bias": "pytorch_model-00001-of-00002.bin",
348
  "transformer.h.29.mlp.c_fc.weight": "pytorch_model-00001-of-00002.bin",
349
+ "transformer.h.29.mlp.c_proj.bias": "pytorch_model-00001-of-00002.bin",
350
+ "transformer.h.29.mlp.c_proj.weight": "pytorch_model-00001-of-00002.bin",
351
  "transformer.h.3.attn.attention.bias": "pytorch_model-00001-of-00002.bin",
352
  "transformer.h.3.attn.attention.k_proj.weight": "pytorch_model-00001-of-00002.bin",
353
  "transformer.h.3.attn.attention.masked_bias": "pytorch_model-00001-of-00002.bin",
 
363
  "transformer.h.3.mlp.c_fc.weight": "pytorch_model-00001-of-00002.bin",
364
  "transformer.h.3.mlp.c_proj.bias": "pytorch_model-00001-of-00002.bin",
365
  "transformer.h.3.mlp.c_proj.weight": "pytorch_model-00001-of-00002.bin",
366
+ "transformer.h.30.attn.attention.bias": "pytorch_model-00001-of-00002.bin",
367
  "transformer.h.30.attn.attention.k_proj.weight": "pytorch_model-00002-of-00002.bin",
368
+ "transformer.h.30.attn.attention.masked_bias": "pytorch_model-00001-of-00002.bin",
369
  "transformer.h.30.attn.attention.out_proj.bias": "pytorch_model-00002-of-00002.bin",
370
  "transformer.h.30.attn.attention.out_proj.weight": "pytorch_model-00002-of-00002.bin",
371
  "transformer.h.30.attn.attention.q_proj.weight": "pytorch_model-00002-of-00002.bin",
372
  "transformer.h.30.attn.attention.v_proj.weight": "pytorch_model-00002-of-00002.bin",
373
+ "transformer.h.30.ln_1.bias": "pytorch_model-00001-of-00002.bin",
374
+ "transformer.h.30.ln_1.weight": "pytorch_model-00001-of-00002.bin",
375
  "transformer.h.30.ln_2.bias": "pytorch_model-00002-of-00002.bin",
376
  "transformer.h.30.ln_2.weight": "pytorch_model-00002-of-00002.bin",
377
  "transformer.h.30.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e09b8b7058c768585bdbaa8b0f19a51a6261bdd64f78231fb559ff08715ea527
3
- size 3451
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1366c71c60e969557fe221b2103dac00c9b4c2153c6081f264bda847cd37c95
3
+ size 3515