|
--- |
|
language: |
|
- en |
|
library_name: transformers |
|
--- |
|
This is a second strip test. The goal is to strip GPT-2-XL down to the same amount as GPT-2-Small to see what happens. |
|
|
|
These are the only layers/tensors left (I'm unsure of the terminology for these): |
|
``` |
|
wte.weight |
|
wpe.weight |
|
h.0.ln_1.weight |
|
h.0.ln_1.bias |
|
h.0.attn.bias |
|
h.0.attn.c_attn.weight |
|
h.0.attn.c_attn.bias |
|
h.0.attn.c_proj.weight |
|
h.0.attn.c_proj.bias |
|
h.0.ln_2.weight |
|
h.0.ln_2.bias |
|
h.0.mlp.c_fc.weight |
|
h.0.mlp.c_fc.bias |
|
h.0.mlp.c_proj.weight |
|
h.0.mlp.c_proj.bias |
|
h.1.ln_1.weight |
|
h.1.ln_1.bias |
|
h.1.attn.bias |
|
h.1.attn.c_attn.weight |
|
h.1.attn.c_attn.bias |
|
h.1.attn.c_proj.weight |
|
h.1.attn.c_proj.bias |
|
h.1.ln_2.weight |
|
h.1.ln_2.bias |
|
h.1.mlp.c_fc.weight |
|
h.1.mlp.c_fc.bias |
|
h.1.mlp.c_proj.weight |
|
h.1.mlp.c_proj.bias |
|
h.2.ln_1.weight |
|
h.2.ln_1.bias |
|
h.2.attn.bias |
|
h.2.attn.c_attn.weight |
|
h.2.attn.c_attn.bias |
|
h.2.attn.c_proj.weight |
|
h.2.attn.c_proj.bias |
|
h.2.ln_2.weight |
|
h.2.ln_2.bias |
|
h.2.mlp.c_fc.weight |
|
h.2.mlp.c_fc.bias |
|
h.2.mlp.c_proj.weight |
|
h.2.mlp.c_proj.bias |
|
h.3.ln_1.weight |
|
h.3.ln_1.bias |
|
h.3.attn.bias |
|
h.3.attn.c_attn.weight |
|
h.3.attn.c_attn.bias |
|
h.3.attn.c_proj.weight |
|
h.3.attn.c_proj.bias |
|
h.3.ln_2.weight |
|
h.3.ln_2.bias |
|
h.3.mlp.c_fc.weight |
|
h.3.mlp.c_fc.bias |
|
h.3.mlp.c_proj.weight |
|
h.3.mlp.c_proj.bias |
|
h.4.ln_1.weight |
|
h.4.ln_1.bias |
|
h.4.attn.bias |
|
h.4.attn.c_attn.weight |
|
h.4.attn.c_attn.bias |
|
h.4.attn.c_proj.weight |
|
h.4.attn.c_proj.bias |
|
h.4.ln_2.weight |
|
h.4.ln_2.bias |
|
h.4.mlp.c_fc.weight |
|
h.4.mlp.c_fc.bias |
|
h.4.mlp.c_proj.weight |
|
h.4.mlp.c_proj.bias |
|
h.5.ln_1.weight |
|
h.5.ln_1.bias |
|
h.5.attn.bias |
|
h.5.attn.c_attn.weight |
|
h.5.attn.c_attn.bias |
|
h.5.attn.c_proj.weight |
|
h.5.attn.c_proj.bias |
|
h.5.ln_2.weight |
|
h.5.ln_2.bias |
|
h.5.mlp.c_fc.weight |
|
h.5.mlp.c_fc.bias |
|
h.5.mlp.c_proj.weight |
|
h.5.mlp.c_proj.bias |
|
h.6.ln_1.weight |
|
h.6.ln_1.bias |
|
h.6.attn.bias |
|
h.6.attn.c_attn.weight |
|
h.6.attn.c_attn.bias |
|
h.6.attn.c_proj.weight |
|
h.6.attn.c_proj.bias |
|
h.6.ln_2.weight |
|
h.6.ln_2.bias |
|
h.6.mlp.c_fc.weight |
|
h.6.mlp.c_fc.bias |
|
h.6.mlp.c_proj.weight |
|
h.6.mlp.c_proj.bias |
|
h.7.ln_1.weight |
|
h.7.ln_1.bias |
|
h.7.attn.bias |
|
h.7.attn.c_attn.weight |
|
h.7.attn.c_attn.bias |
|
h.7.attn.c_proj.weight |
|
h.7.attn.c_proj.bias |
|
h.7.ln_2.weight |
|
h.7.ln_2.bias |
|
h.7.mlp.c_fc.weight |
|
h.7.mlp.c_fc.bias |
|
h.7.mlp.c_proj.weight |
|
h.7.mlp.c_proj.bias |
|
h.8.ln_1.weight |
|
h.8.ln_1.bias |
|
h.8.attn.bias |
|
h.8.attn.c_attn.weight |
|
h.8.attn.c_attn.bias |
|
h.8.attn.c_proj.weight |
|
h.8.attn.c_proj.bias |
|
h.8.ln_2.weight |
|
h.8.ln_2.bias |
|
h.8.mlp.c_fc.weight |
|
h.8.mlp.c_fc.bias |
|
h.8.mlp.c_proj.weight |
|
h.8.mlp.c_proj.bias |
|
h.9.ln_1.weight |
|
h.9.ln_1.bias |
|
h.9.attn.bias |
|
h.9.attn.c_attn.weight |
|
h.9.attn.c_attn.bias |
|
h.9.attn.c_proj.weight |
|
h.9.attn.c_proj.bias |
|
h.9.ln_2.weight |
|
h.9.ln_2.bias |
|
h.9.mlp.c_fc.weight |
|
h.9.mlp.c_fc.bias |
|
h.9.mlp.c_proj.weight |
|
h.9.mlp.c_proj.bias |
|
h.10.ln_1.weight |
|
h.10.ln_1.bias |
|
h.10.attn.bias |
|
h.10.attn.c_attn.weight |
|
h.10.attn.c_attn.bias |
|
h.10.attn.c_proj.weight |
|
h.10.attn.c_proj.bias |
|
h.10.ln_2.weight |
|
h.10.ln_2.bias |
|
h.10.mlp.c_fc.weight |
|
h.10.mlp.c_fc.bias |
|
h.10.mlp.c_proj.weight |
|
h.10.mlp.c_proj.bias |
|
h.11.ln_1.weight |
|
h.11.ln_1.bias |
|
h.11.attn.bias |
|
h.11.attn.c_attn.weight |
|
h.11.attn.c_attn.bias |
|
h.11.attn.c_proj.weight |
|
h.11.attn.c_proj.bias |
|
h.11.ln_2.weight |
|
h.11.ln_2.bias |
|
h.11.mlp.c_fc.weight |
|
h.11.mlp.c_fc.bias |
|
h.11.mlp.c_proj.weight |
|
h.11.mlp.c_proj.bias |
|
ln_f.weight |
|
ln_f.bias |
|
``` |