Spaces:
Build error
Build error
Delete cs772_proj
Browse files- cs772_proj/bert_base/checkpoint-3848/config.json +0 -37
- cs772_proj/bert_base/checkpoint-3848/model.safetensors +0 -3
- cs772_proj/bert_base/checkpoint-3848/optimizer.pt +0 -3
- cs772_proj/bert_base/checkpoint-3848/rng_state.pth +0 -3
- cs772_proj/bert_base/checkpoint-3848/scheduler.pt +0 -3
- cs772_proj/bert_base/checkpoint-3848/special_tokens_map.json +0 -7
- cs772_proj/bert_base/checkpoint-3848/tokenizer.json +0 -0
- cs772_proj/bert_base/checkpoint-3848/tokenizer_config.json +0 -55
- cs772_proj/bert_base/checkpoint-3848/trainer_state.json +0 -85
- cs772_proj/bert_base/checkpoint-3848/training_args.bin +0 -3
- cs772_proj/bert_base/checkpoint-3848/vocab.txt +0 -0
- cs772_proj/cs772_bert/.gitattributes +0 -35
- cs772_proj/cs772_bert/README.md +0 -13
- cs772_proj/cs772_project.ipynb +0 -0
- cs772_proj/demo_gradio_distilbert.py +0 -372
- cs772_proj/requirements.txt +0 -123
- cs772_proj/tutorial_data/pyvene_rome_attention_output.csv +0 -73
- cs772_proj/tutorial_data/pyvene_rome_attention_output.pdf +0 -0
- cs772_proj/tutorial_data/pyvene_rome_attention_output.png +0 -0
- cs772_proj/tutorial_data/pyvene_rome_block_output.csv +0 -73
- cs772_proj/tutorial_data/pyvene_rome_block_output.pdf +0 -0
- cs772_proj/tutorial_data/pyvene_rome_block_output.png +0 -0
- cs772_proj/tutorial_data/pyvene_rome_mlp_activation.csv +0 -73
- cs772_proj/tutorial_data/pyvene_rome_mlp_activation.pdf +0 -0
- cs772_proj/tutorial_data/pyvene_rome_mlp_activation.png +0 -0
cs772_proj/bert_base/checkpoint-3848/config.json
DELETED
@@ -1,37 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"_name_or_path": "bert-base-uncased",
|
3 |
-
"architectures": [
|
4 |
-
"BertForSequenceClassification"
|
5 |
-
],
|
6 |
-
"attention_probs_dropout_prob": 0.1,
|
7 |
-
"classifier_dropout": null,
|
8 |
-
"gradient_checkpointing": false,
|
9 |
-
"hidden_act": "gelu",
|
10 |
-
"hidden_dropout_prob": 0.1,
|
11 |
-
"hidden_size": 768,
|
12 |
-
"id2label": {
|
13 |
-
"0": "hate",
|
14 |
-
"1": "normal",
|
15 |
-
"2": "offense"
|
16 |
-
},
|
17 |
-
"initializer_range": 0.02,
|
18 |
-
"intermediate_size": 3072,
|
19 |
-
"label2id": {
|
20 |
-
"hate": 0,
|
21 |
-
"normal": 1,
|
22 |
-
"offense": 2
|
23 |
-
},
|
24 |
-
"layer_norm_eps": 1e-12,
|
25 |
-
"max_position_embeddings": 512,
|
26 |
-
"model_type": "bert",
|
27 |
-
"num_attention_heads": 12,
|
28 |
-
"num_hidden_layers": 12,
|
29 |
-
"pad_token_id": 0,
|
30 |
-
"position_embedding_type": "absolute",
|
31 |
-
"problem_type": "single_label_classification",
|
32 |
-
"torch_dtype": "float32",
|
33 |
-
"transformers_version": "4.39.3",
|
34 |
-
"type_vocab_size": 2,
|
35 |
-
"use_cache": true,
|
36 |
-
"vocab_size": 30522
|
37 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cs772_proj/bert_base/checkpoint-3848/model.safetensors
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:f92169bcbaeee93e4c65d5f6b7af90505d8a754096d1b7d1ea70cf290cc79690
|
3 |
-
size 437961724
|
|
|
|
|
|
|
|
cs772_proj/bert_base/checkpoint-3848/optimizer.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:306504e0a7e6e3e27b15a81346a5f70e8941e6ec7085d33d70693b13cbba1e8b
|
3 |
-
size 876044538
|
|
|
|
|
|
|
|
cs772_proj/bert_base/checkpoint-3848/rng_state.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:5f8c99e888295714e206a5143fed689d23c7ae28a194ff83078714c2d99f94ab
|
3 |
-
size 14244
|
|
|
|
|
|
|
|
cs772_proj/bert_base/checkpoint-3848/scheduler.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:9258b30f99447d96f005979b906a97fe44e711e5ecb53f5be292707492c5ef45
|
3 |
-
size 1064
|
|
|
|
|
|
|
|
cs772_proj/bert_base/checkpoint-3848/special_tokens_map.json
DELETED
@@ -1,7 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"cls_token": "[CLS]",
|
3 |
-
"mask_token": "[MASK]",
|
4 |
-
"pad_token": "[PAD]",
|
5 |
-
"sep_token": "[SEP]",
|
6 |
-
"unk_token": "[UNK]"
|
7 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cs772_proj/bert_base/checkpoint-3848/tokenizer.json
DELETED
The diff for this file is too large to render.
See raw diff
|
|
cs772_proj/bert_base/checkpoint-3848/tokenizer_config.json
DELETED
@@ -1,55 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"added_tokens_decoder": {
|
3 |
-
"0": {
|
4 |
-
"content": "[PAD]",
|
5 |
-
"lstrip": false,
|
6 |
-
"normalized": false,
|
7 |
-
"rstrip": false,
|
8 |
-
"single_word": false,
|
9 |
-
"special": true
|
10 |
-
},
|
11 |
-
"100": {
|
12 |
-
"content": "[UNK]",
|
13 |
-
"lstrip": false,
|
14 |
-
"normalized": false,
|
15 |
-
"rstrip": false,
|
16 |
-
"single_word": false,
|
17 |
-
"special": true
|
18 |
-
},
|
19 |
-
"101": {
|
20 |
-
"content": "[CLS]",
|
21 |
-
"lstrip": false,
|
22 |
-
"normalized": false,
|
23 |
-
"rstrip": false,
|
24 |
-
"single_word": false,
|
25 |
-
"special": true
|
26 |
-
},
|
27 |
-
"102": {
|
28 |
-
"content": "[SEP]",
|
29 |
-
"lstrip": false,
|
30 |
-
"normalized": false,
|
31 |
-
"rstrip": false,
|
32 |
-
"single_word": false,
|
33 |
-
"special": true
|
34 |
-
},
|
35 |
-
"103": {
|
36 |
-
"content": "[MASK]",
|
37 |
-
"lstrip": false,
|
38 |
-
"normalized": false,
|
39 |
-
"rstrip": false,
|
40 |
-
"single_word": false,
|
41 |
-
"special": true
|
42 |
-
}
|
43 |
-
},
|
44 |
-
"clean_up_tokenization_spaces": true,
|
45 |
-
"cls_token": "[CLS]",
|
46 |
-
"do_lower_case": true,
|
47 |
-
"mask_token": "[MASK]",
|
48 |
-
"model_max_length": 512,
|
49 |
-
"pad_token": "[PAD]",
|
50 |
-
"sep_token": "[SEP]",
|
51 |
-
"strip_accents": null,
|
52 |
-
"tokenize_chinese_chars": true,
|
53 |
-
"tokenizer_class": "BertTokenizer",
|
54 |
-
"unk_token": "[UNK]"
|
55 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cs772_proj/bert_base/checkpoint-3848/trainer_state.json
DELETED
@@ -1,85 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"best_metric": 0.7247874736785889,
|
3 |
-
"best_model_checkpoint": "bert_base/checkpoint-1924",
|
4 |
-
"epoch": 4.0,
|
5 |
-
"eval_steps": 500,
|
6 |
-
"global_step": 3848,
|
7 |
-
"is_hyper_param_search": false,
|
8 |
-
"is_local_process_zero": true,
|
9 |
-
"is_world_process_zero": true,
|
10 |
-
"log_history": [
|
11 |
-
{
|
12 |
-
"epoch": 1.0,
|
13 |
-
"grad_norm": 13.14333724975586,
|
14 |
-
"learning_rate": 8.000000000000001e-06,
|
15 |
-
"loss": 0.8204,
|
16 |
-
"step": 962
|
17 |
-
},
|
18 |
-
{
|
19 |
-
"epoch": 1.0,
|
20 |
-
"eval_f1": 0.6669116037121897,
|
21 |
-
"eval_loss": 0.7362112402915955,
|
22 |
-
"eval_runtime": 2.6989,
|
23 |
-
"eval_samples_per_second": 712.147,
|
24 |
-
"eval_steps_per_second": 44.833,
|
25 |
-
"step": 962
|
26 |
-
},
|
27 |
-
{
|
28 |
-
"epoch": 2.0,
|
29 |
-
"grad_norm": 15.062124252319336,
|
30 |
-
"learning_rate": 6e-06,
|
31 |
-
"loss": 0.664,
|
32 |
-
"step": 1924
|
33 |
-
},
|
34 |
-
{
|
35 |
-
"epoch": 2.0,
|
36 |
-
"eval_f1": 0.6714560068474462,
|
37 |
-
"eval_loss": 0.7247874736785889,
|
38 |
-
"eval_runtime": 1.9229,
|
39 |
-
"eval_samples_per_second": 999.531,
|
40 |
-
"eval_steps_per_second": 62.926,
|
41 |
-
"step": 1924
|
42 |
-
},
|
43 |
-
{
|
44 |
-
"epoch": 3.0,
|
45 |
-
"grad_norm": 30.878219604492188,
|
46 |
-
"learning_rate": 4.000000000000001e-06,
|
47 |
-
"loss": 0.5662,
|
48 |
-
"step": 2886
|
49 |
-
},
|
50 |
-
{
|
51 |
-
"epoch": 3.0,
|
52 |
-
"eval_f1": 0.6630607481681304,
|
53 |
-
"eval_loss": 0.7806704044342041,
|
54 |
-
"eval_runtime": 2.4376,
|
55 |
-
"eval_samples_per_second": 788.47,
|
56 |
-
"eval_steps_per_second": 49.638,
|
57 |
-
"step": 2886
|
58 |
-
},
|
59 |
-
{
|
60 |
-
"epoch": 4.0,
|
61 |
-
"grad_norm": 21.18539810180664,
|
62 |
-
"learning_rate": 2.0000000000000003e-06,
|
63 |
-
"loss": 0.4919,
|
64 |
-
"step": 3848
|
65 |
-
},
|
66 |
-
{
|
67 |
-
"epoch": 4.0,
|
68 |
-
"eval_f1": 0.6878731135692044,
|
69 |
-
"eval_loss": 0.7753087282180786,
|
70 |
-
"eval_runtime": 2.3502,
|
71 |
-
"eval_samples_per_second": 817.816,
|
72 |
-
"eval_steps_per_second": 51.486,
|
73 |
-
"step": 3848
|
74 |
-
}
|
75 |
-
],
|
76 |
-
"logging_steps": 500,
|
77 |
-
"max_steps": 4810,
|
78 |
-
"num_input_tokens_seen": 0,
|
79 |
-
"num_train_epochs": 5,
|
80 |
-
"save_steps": 500,
|
81 |
-
"total_flos": 1892113337449692.0,
|
82 |
-
"train_batch_size": 16,
|
83 |
-
"trial_name": null,
|
84 |
-
"trial_params": null
|
85 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cs772_proj/bert_base/checkpoint-3848/training_args.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:f562e068a01b97d232a8b2fbb9f51b80ec2d1eedd0dd5a99be9c3f9af0bcbeb1
|
3 |
-
size 4856
|
|
|
|
|
|
|
|
cs772_proj/bert_base/checkpoint-3848/vocab.txt
DELETED
The diff for this file is too large to render.
See raw diff
|
|
cs772_proj/cs772_bert/.gitattributes
DELETED
@@ -1,35 +0,0 @@
|
|
1 |
-
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cs772_proj/cs772_bert/README.md
DELETED
@@ -1,13 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: Cs772 Bert
|
3 |
-
emoji: 🌖
|
4 |
-
colorFrom: indigo
|
5 |
-
colorTo: gray
|
6 |
-
sdk: gradio
|
7 |
-
sdk_version: 4.29.0
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
-
license: mit
|
11 |
-
---
|
12 |
-
|
13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cs772_proj/cs772_project.ipynb
DELETED
The diff for this file is too large to render.
See raw diff
|
|
cs772_proj/demo_gradio_distilbert.py
DELETED
@@ -1,372 +0,0 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
|
3 |
-
import transformers
|
4 |
-
import torch
|
5 |
-
#import neptune
|
6 |
-
#from knockknock import slack_sender
|
7 |
-
from transformers import *
|
8 |
-
#import glob
|
9 |
-
from transformers import BertTokenizer
|
10 |
-
from transformers import BertForSequenceClassification, AdamW, BertConfig
|
11 |
-
import random
|
12 |
-
import pandas as pd
|
13 |
-
from transformers import BertTokenizer
|
14 |
-
#from Models.utils import masked_cross_entropy,fix_the_random,format_time,save_normal_model,save_bert_model
|
15 |
-
from sklearn.metrics import accuracy_score,f1_score
|
16 |
-
from tqdm import tqdm
|
17 |
-
'''from TensorDataset.datsetSplitter import createDatasetSplit
|
18 |
-
from TensorDataset.dataLoader import combine_features
|
19 |
-
from Preprocess.dataCollect import collect_data,set_name'''
|
20 |
-
from sklearn.metrics import accuracy_score,f1_score,roc_auc_score,recall_score,precision_score
|
21 |
-
import matplotlib.pyplot as plt
|
22 |
-
import time
|
23 |
-
import os
|
24 |
-
from transformers import BertTokenizer
|
25 |
-
#import GPUtil
|
26 |
-
from sklearn.utils import class_weight
|
27 |
-
#import json
|
28 |
-
#from Models.bertModels import *
|
29 |
-
#from Models.otherModels import *
|
30 |
-
import sys
|
31 |
-
#import time
|
32 |
-
#from waiting import wait
|
33 |
-
from sklearn.preprocessing import LabelEncoder
|
34 |
-
import numpy as np
|
35 |
-
#import threading
|
36 |
-
#import argparse
|
37 |
-
#import ast
|
38 |
-
|
39 |
-
#from manual_training_inference import select_model
|
40 |
-
#from Models.utils import save_normal_model,save_bert_model,load_model
|
41 |
-
#from Models.utils import return_params
|
42 |
-
from transformers import DistilBertTokenizer
|
43 |
-
|
44 |
-
|
45 |
-
#from TensorDataset.dataLoader import custom_att_masks
|
46 |
-
#from keras.preprocessing.sequence import pad_sequences
|
47 |
-
|
48 |
-
#import seaborn as sns
|
49 |
-
import matplotlib.pyplot as plt
|
50 |
-
import numpy as np
|
51 |
-
import PIL.Image as Image
|
52 |
-
from torch import nn
|
53 |
-
|
54 |
-
from pyvene import embed_to_distrib, top_vals, format_token
|
55 |
-
from pyvene import (
|
56 |
-
IntervenableModel,
|
57 |
-
VanillaIntervention, Intervention,
|
58 |
-
RepresentationConfig,
|
59 |
-
IntervenableConfig,
|
60 |
-
ConstantSourceIntervention,
|
61 |
-
LocalistRepresentationIntervention
|
62 |
-
)
|
63 |
-
from pyvene import create_gpt2
|
64 |
-
#%config InlineBackend.figure_formats = ['svg']
|
65 |
-
from plotnine import (
|
66 |
-
ggplot,
|
67 |
-
geom_tile,
|
68 |
-
aes,
|
69 |
-
facet_wrap,
|
70 |
-
theme,
|
71 |
-
element_text,
|
72 |
-
geom_bar,
|
73 |
-
geom_hline,
|
74 |
-
scale_y_log10,
|
75 |
-
xlab, ylab, ylim,
|
76 |
-
scale_y_discrete, scale_y_continuous, ggsave
|
77 |
-
)
|
78 |
-
from plotnine.scales import scale_y_reverse, scale_fill_cmap
|
79 |
-
from tqdm import tqdm
|
80 |
-
global device
|
81 |
-
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
82 |
-
def create_bert(cache_dir=None):
|
83 |
-
"""Creates a GPT2 model, config, and tokenizer from the given name and revision"""
|
84 |
-
from transformers import BertConfig
|
85 |
-
|
86 |
-
config = BertConfig.from_pretrained("./bert_base/checkpoint-3848/config.json")
|
87 |
-
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
|
88 |
-
gpt = AutoModelForSequenceClassification.from_pretrained("./bert_base/checkpoint-3848", config=config, cache_dir=cache_dir)
|
89 |
-
print("loaded model")
|
90 |
-
return config, tokenizer, gpt
|
91 |
-
def interpret(text,label):
|
92 |
-
titles={
|
93 |
-
"block_output": "single restored layer in BERT",
|
94 |
-
"mlp_activation": "center of interval of 5 patched mlp layer",
|
95 |
-
"attention_output": "center of interval of 5 patched attn layer"
|
96 |
-
}
|
97 |
-
|
98 |
-
colors={
|
99 |
-
"block_output": "Purples",
|
100 |
-
"mlp_activation": "Greens",
|
101 |
-
"attention_output": "Reds"
|
102 |
-
}
|
103 |
-
|
104 |
-
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
105 |
-
#config, tokenizer, gpt = pv.create_llama(name="sharpbai/alpaca-7b-merged")
|
106 |
-
config, tokenizer, gpt = create_bert()
|
107 |
-
#config, tokenizer, gpt = create_gpt2(name="gpt2-xl")
|
108 |
-
|
109 |
-
gpt.to(device)
|
110 |
-
|
111 |
-
base = text
|
112 |
-
inputs = [
|
113 |
-
tokenizer(base, return_tensors="pt").to(device),
|
114 |
-
]
|
115 |
-
#print(base)
|
116 |
-
base_token = tokenizer.convert_ids_to_tokens(inputs[0]['input_ids'][0])
|
117 |
-
res = gpt(**inputs[0])
|
118 |
-
probabilities = nn.functional.softmax(res[0], dim=-1)
|
119 |
-
if label=="hate":
|
120 |
-
l = 0
|
121 |
-
elif label=="normal":
|
122 |
-
l=1
|
123 |
-
else:l=2
|
124 |
-
#print(probabilities)
|
125 |
-
#print(res[0][0][0].item())
|
126 |
-
#print(res)
|
127 |
-
#distrib = embed_to_distrib(gpt, res.last_hidden_state, logits=False)
|
128 |
-
#top_vals(tokenizer, distrib[0][-1], n=20)
|
129 |
-
base = tokenizer(text, return_tensors="pt").to(device)
|
130 |
-
config = corrupted_config(type(gpt))
|
131 |
-
intervenable = IntervenableModel(config, gpt)
|
132 |
-
_, counterfactual_outputs = intervenable(
|
133 |
-
base, unit_locations={"base": ([[[0,1,2,3]]])}
|
134 |
-
)
|
135 |
-
#probabilities = nn.functional.softmax(counterfactual_outputs[0], dim=-1)
|
136 |
-
#print(probabilities)
|
137 |
-
for stream in ["block_output", "mlp_activation", "attention_output"]:
|
138 |
-
data = []
|
139 |
-
for layer_i in tqdm(range(gpt.config.num_hidden_layers)):
|
140 |
-
for pos_i in range(len(base_token)):
|
141 |
-
config = restore_corrupted_with_interval_config(
|
142 |
-
layer_i, stream,
|
143 |
-
window=1 if stream == "block_output" else 5
|
144 |
-
)
|
145 |
-
|
146 |
-
n_restores = len(config.representations) - 1
|
147 |
-
intervenable = IntervenableModel(config, gpt)
|
148 |
-
_, counterfactual_outputs = intervenable(
|
149 |
-
base,
|
150 |
-
[None] + [base]*n_restores,
|
151 |
-
{
|
152 |
-
"sources->base": (
|
153 |
-
[None] + [[[pos_i]]]*n_restores,
|
154 |
-
[[[0,1,2,3]]] + [[[pos_i]]]*n_restores,
|
155 |
-
)
|
156 |
-
},
|
157 |
-
)
|
158 |
-
#distrib = embed_to_distrib(
|
159 |
-
#gpt, counterfactual_outputs.last_hidden_state, logits=False
|
160 |
-
#)
|
161 |
-
#prob = distrib[0][-1][token].detach().cpu().item()
|
162 |
-
logits = counterfactual_outputs[0]
|
163 |
-
probabilities = nn.functional.softmax(logits, dim=-1)
|
164 |
-
prob_offense = probabilities[0][l].item()
|
165 |
-
data.append({"layer": layer_i, "pos": pos_i, "prob": prob_offense})
|
166 |
-
df = pd.DataFrame(data)
|
167 |
-
df.to_csv(f"./tutorial_data/pyvene_rome_{stream}.csv")
|
168 |
-
for stream in ["block_output", "mlp_activation", "attention_output"]:
|
169 |
-
df = pd.read_csv(f"./tutorial_data/pyvene_rome_{stream}.csv")
|
170 |
-
df["layer"] = df["layer"].astype(int)
|
171 |
-
df["pos"] = df["pos"].astype(int)
|
172 |
-
prob_type = "p"+"("+label+")"
|
173 |
-
df[prob_type] = df["prob"].astype(float)
|
174 |
-
#custom_labels = ["imagine*","the*", "riots*", "if", "people", "actually", "got" ,"food" ,"boxes" ,"instead", "of" ,"ebt", "cards", "every", "ghetto", "in", "america", "would" ,"look", "like", "ferguson"]
|
175 |
-
custom_labels = base_token #["what*", "sort*", "of*", "white*","man" ,"or", "woman", "would", "vote", "for", "this", "nigger"]
|
176 |
-
#custom_labels = ["no*", "liberal*","congratulated*", "hindu*", "refugees", "post", "cab", "because", "they", "hate", "hindus"]
|
177 |
-
breaks = list(range(len(custom_labels)))#[0, 1, 2, 3, 4, 5, 6,7,8,9,10,11]
|
178 |
-
|
179 |
-
|
180 |
-
plot = (
|
181 |
-
ggplot(df, aes(x="layer", y="pos"))
|
182 |
-
|
183 |
-
+ geom_tile(aes(fill=prob_type))
|
184 |
-
+ scale_fill_cmap(colors[stream]) + xlab(titles[stream])
|
185 |
-
+ scale_y_reverse(
|
186 |
-
limits = (-0.5, len(custom_labels)),
|
187 |
-
breaks=breaks, labels=custom_labels)
|
188 |
-
+ theme(figure_size=(6,9)) + ylab("")
|
189 |
-
+ theme(axis_text_y = element_text(angle = 90, hjust = 1))
|
190 |
-
)
|
191 |
-
ggsave(
|
192 |
-
plot, filename=f"./tutorial_data/pyvene_rome_{stream}.png", dpi=200
|
193 |
-
)
|
194 |
-
if stream == "mlp_activation":
|
195 |
-
mlp_img_path = f"./tutorial_data/pyvene_rome_{stream}.png"
|
196 |
-
elif stream=="block_output":
|
197 |
-
bo_path = f"./tutorial_data/pyvene_rome_{stream}.png"
|
198 |
-
else:attention_path = f"./tutorial_data/pyvene_rome_{stream}.png"
|
199 |
-
return mlp_img_path,bo_path,attention_path
|
200 |
-
|
201 |
-
def restore_corrupted_with_interval_config(
|
202 |
-
layer, stream="mlp_activation", window=5, num_layers=12):
|
203 |
-
start = max(0, layer - window // 2)
|
204 |
-
end = min(num_layers, layer - (-window // 2))
|
205 |
-
config = IntervenableConfig(
|
206 |
-
representations=[
|
207 |
-
RepresentationConfig(
|
208 |
-
0, # layer
|
209 |
-
"block_input", # intervention type
|
210 |
-
),
|
211 |
-
] + [
|
212 |
-
RepresentationConfig(
|
213 |
-
i, # layer
|
214 |
-
stream, # intervention type
|
215 |
-
) for i in range(start, end)],
|
216 |
-
intervention_types=\
|
217 |
-
[NoiseIntervention]+[VanillaIntervention]*(end-start)
|
218 |
-
)
|
219 |
-
return config
|
220 |
-
|
221 |
-
class NoiseIntervention(ConstantSourceIntervention, LocalistRepresentationIntervention):
|
222 |
-
def __init__(self, embed_dim, **kwargs):
|
223 |
-
super().__init__()
|
224 |
-
self.interchange_dim = embed_dim
|
225 |
-
rs = np.random.RandomState(1)
|
226 |
-
prng = lambda *shape: rs.randn(*shape)
|
227 |
-
self.noise = torch.from_numpy(
|
228 |
-
prng(1, 4, embed_dim)).to(device)
|
229 |
-
self.noise_level = 0.7462981581687927 #0.3462981581687927
|
230 |
-
|
231 |
-
def forward(self, base, source=None, subspaces=None):
|
232 |
-
base[..., : self.interchange_dim] += self.noise * self.noise_level
|
233 |
-
return base
|
234 |
-
|
235 |
-
def __str__(self):
|
236 |
-
return f"NoiseIntervention(embed_dim={self.embed_dim})"
|
237 |
-
|
238 |
-
|
239 |
-
def corrupted_config(model_type):
|
240 |
-
config = IntervenableConfig(
|
241 |
-
model_type=model_type,
|
242 |
-
representations=[
|
243 |
-
RepresentationConfig(
|
244 |
-
0, # layer
|
245 |
-
"block_input", # intervention type
|
246 |
-
),
|
247 |
-
],
|
248 |
-
intervention_types=NoiseIntervention,
|
249 |
-
)
|
250 |
-
return config
|
251 |
-
def create_bert(cache_dir=None):
|
252 |
-
"""Creates a GPT2 model, config, and tokenizer from the given name and revision"""
|
253 |
-
from transformers import BertConfig
|
254 |
-
|
255 |
-
config = BertConfig.from_pretrained("./bert_base/checkpoint-3848/config.json")
|
256 |
-
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
|
257 |
-
gpt = AutoModelForSequenceClassification.from_pretrained("./bert_base/checkpoint-3848", config=config, cache_dir=cache_dir)
|
258 |
-
print("loaded model")
|
259 |
-
return config, tokenizer, gpt
|
260 |
-
|
261 |
-
# params = return_params('best_model_json/distilbert.json', 0.001 )
|
262 |
-
#params = return_params('best_model_json/distilbert.json', 1 )
|
263 |
-
|
264 |
-
|
265 |
-
'''embeddings=None
|
266 |
-
if(params['bert_tokens']):
|
267 |
-
train,val,test=createDatasetSplit(params) #update
|
268 |
-
else:
|
269 |
-
train,val,test,vocab_own=createDatasetSplit(params)
|
270 |
-
params['embed_size']=vocab_own.embeddings.shape[1]
|
271 |
-
params['vocab_size']=vocab_own.embeddings.shape[0]
|
272 |
-
embeddings=vocab_own.embeddings
|
273 |
-
if(params['auto_weights']):
|
274 |
-
y_test = [ele[2] for ele in test]
|
275 |
-
# print(y_test)
|
276 |
-
encoder = LabelEncoder()
|
277 |
-
encoder.classes_ = np.load(params['class_names'],allow_pickle=True)
|
278 |
-
params['weights']=class_weight.compute_class_weight('balanced',np.unique(y_test),y_test).astype('float32')
|
279 |
-
#params['weights']=np.array([len(y_test)/y_test.count(encoder.classes_[0]),len(y_test)/y_test.count(encoder.classes_[1]),len(y_test)/y_test.count(encoder.classes_[2])]).astype('float32')
|
280 |
-
|
281 |
-
model=select_model(params,embeddings)
|
282 |
-
model = model.eval()
|
283 |
-
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
|
284 |
-
|
285 |
-
|
286 |
-
classes_ = np.load('Data/classes.npy')
|
287 |
-
'''
|
288 |
-
def main_function(text,label):
|
289 |
-
'''tokens = tokenizer.encode_plus(text)
|
290 |
-
input_ids = pad_sequences(torch.tensor(tokens['input_ids']).unsqueeze(0),maxlen=int(params['max_length']),\
|
291 |
-
dtype="long",
|
292 |
-
value=0, truncating="post", padding="post")
|
293 |
-
# att_vals = pad_sequences(att_vals,maxlen=int(params['max_length']), dtype="float",
|
294 |
-
# value=0.0, truncating="post", padding="post")
|
295 |
-
att_masks=custom_att_masks(input_ids)
|
296 |
-
|
297 |
-
outs = model(torch.tensor(input_ids),
|
298 |
-
attention_mask=torch.tensor(att_masks, dtype=bool),
|
299 |
-
labels=None,
|
300 |
-
device='cuda')
|
301 |
-
|
302 |
-
text_tokens = tokenizer.convert_ids_to_tokens(input_ids.squeeze())
|
303 |
-
|
304 |
-
text_tokens_ = text_tokens[:len(tokens['input_ids'])]
|
305 |
-
|
306 |
-
print ('xyz')
|
307 |
-
print (outs[1][5].shape)
|
308 |
-
avg_attn = torch.mean(outs[1][5], dim=1)
|
309 |
-
avg_attn_np = avg_attn[0,0,:len(tokens['input_ids'])].detach().squeeze().numpy()
|
310 |
-
|
311 |
-
logits = outs[0]
|
312 |
-
print (logits)
|
313 |
-
print (np.sum(avg_attn_np))
|
314 |
-
print (avg_attn_np)
|
315 |
-
|
316 |
-
pred = torch.argmax(logits)
|
317 |
-
pred_label = classes_[pred]
|
318 |
-
'''
|
319 |
-
ml_img_path,bo_img_path,atten_img_path = interpret(text,label)
|
320 |
-
ml_im = Image.open(ml_img_path)
|
321 |
-
bo_im = Image.open(bo_img_path)
|
322 |
-
atten_im = Image.open(atten_img_path)
|
323 |
-
|
324 |
-
yield ml_im, bo_im, atten_im
|
325 |
-
|
326 |
-
'''
|
327 |
-
sns.set_theme(rc={'figure.figsize':(30,1)})
|
328 |
-
|
329 |
-
# creating subplot
|
330 |
-
fig, ax = plt.subplots()
|
331 |
-
|
332 |
-
# drawing heatmap on current axes
|
333 |
-
ax = sns.heatmap(np.expand_dims(avg_attn_np,0), annot= np.expand_dims(np.array(text_tokens_),0), \
|
334 |
-
fmt="", annot_kws={'size': 10}, cmap="magma")
|
335 |
-
|
336 |
-
fig = ax.get_figure()
|
337 |
-
fig.savefig("out.png" ,bbox_inches='tight')
|
338 |
-
|
339 |
-
im = Image.open("out.png")
|
340 |
-
|
341 |
-
yield im
|
342 |
-
|
343 |
-
'''
|
344 |
-
|
345 |
-
#return list(zip(text_tokens_ , avg_attn_np)), pred_label
|
346 |
-
# return list(zip(text_tokens_[1:-1] , avg_attn_np[1:-1]))
|
347 |
-
|
348 |
-
|
349 |
-
demo = gr.Interface(main_function,
|
350 |
-
inputs="textbox",
|
351 |
-
outputs="image",
|
352 |
-
theme = 'compact')
|
353 |
-
|
354 |
-
with gr.Blocks() as demo:
|
355 |
-
with gr.Tab("Text Input"):
|
356 |
-
text_input = gr.Textbox()
|
357 |
-
label_input = gr.Textbox()
|
358 |
-
text_button = gr.Button("Show")
|
359 |
-
|
360 |
-
with gr.Tab("Interpretability"):
|
361 |
-
with gr.Row():
|
362 |
-
image_output1 = gr.Image()
|
363 |
-
image_output2 = gr.Image()
|
364 |
-
image_output3 = gr.Image()
|
365 |
-
|
366 |
-
text_button.click(main_function, inputs=[text_input,label_input], outputs=[image_output1,image_output2,image_output3])
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
if __name__ == "__main__":
|
372 |
-
demo.launch(share=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cs772_proj/requirements.txt
DELETED
@@ -1,123 +0,0 @@
|
|
1 |
-
accelerate==0.29.2
|
2 |
-
aiohttp==3.9.4
|
3 |
-
aiosignal==1.3.1
|
4 |
-
asttokens
|
5 |
-
async-timeout==4.0.3
|
6 |
-
attrs==23.2.0
|
7 |
-
backcall==0.2.0
|
8 |
-
beautifulsoup4==4.12.3
|
9 |
-
bleach==6.1.0
|
10 |
-
certifi==2024.2.2
|
11 |
-
charset-normalizer==3.3.2
|
12 |
-
comm
|
13 |
-
contourpy==1.2.1
|
14 |
-
cycler==0.12.1
|
15 |
-
datasets==2.18.0
|
16 |
-
debugpy
|
17 |
-
decorator
|
18 |
-
defusedxml==0.7.1
|
19 |
-
dill==0.3.8
|
20 |
-
docopt==0.6.2
|
21 |
-
exceptiongroup
|
22 |
-
executing
|
23 |
-
fastjsonschema==2.19.1
|
24 |
-
filelock==3.13.4
|
25 |
-
fonttools==4.51.0
|
26 |
-
frozenlist==1.4.1
|
27 |
-
fsspec==2023.6.0
|
28 |
-
huggingface-hub==0.20.3
|
29 |
-
idna==3.7
|
30 |
-
importlib_metadata
|
31 |
-
ipykernel
|
32 |
-
ipython==8.12.3
|
33 |
-
ipywidgets==8.1.2
|
34 |
-
jedi
|
35 |
-
Jinja2==3.1.3
|
36 |
-
jsonschema==4.21.1
|
37 |
-
jsonschema-specifications==2023.12.1
|
38 |
-
jupyter_client
|
39 |
-
jupyter_core
|
40 |
-
jupyterlab_pygments==0.3.0
|
41 |
-
jupyterlab_widgets==3.0.10
|
42 |
-
kiwisolver==1.4.5
|
43 |
-
MarkupSafe==2.1.5
|
44 |
-
matplotlib==3.8.4
|
45 |
-
matplotlib-inline
|
46 |
-
mistune==3.0.2
|
47 |
-
mizani==0.11.1
|
48 |
-
mpmath==1.3.0
|
49 |
-
multidict==6.0.5
|
50 |
-
multiprocess==0.70.16
|
51 |
-
nbclient==0.10.0
|
52 |
-
nbconvert==7.16.3
|
53 |
-
nbformat==5.10.4
|
54 |
-
nest_asyncio
|
55 |
-
networkx==3.3
|
56 |
-
numpy==1.26.4
|
57 |
-
nvidia-cublas-cu12==12.1.3.1
|
58 |
-
nvidia-cuda-cupti-cu12==12.1.105
|
59 |
-
nvidia-cuda-nvrtc-cu12==12.1.105
|
60 |
-
nvidia-cuda-runtime-cu12==12.1.105
|
61 |
-
nvidia-cudnn-cu12==8.9.2.26
|
62 |
-
nvidia-cufft-cu12==11.0.2.54
|
63 |
-
nvidia-curand-cu12==10.3.2.106
|
64 |
-
nvidia-cusolver-cu12==11.4.5.107
|
65 |
-
nvidia-cusparse-cu12==12.1.0.106
|
66 |
-
nvidia-nccl-cu12==2.19.3
|
67 |
-
nvidia-nvjitlink-cu12==12.4.127
|
68 |
-
nvidia-nvtx-cu12==12.1.105
|
69 |
-
packaging
|
70 |
-
pandas==2.2.2
|
71 |
-
pandocfilters==1.5.1
|
72 |
-
parso
|
73 |
-
patsy==0.5.6
|
74 |
-
pexpect
|
75 |
-
pickleshare
|
76 |
-
pillow==10.3.0
|
77 |
-
pipreqs==0.5.0
|
78 |
-
platformdirs
|
79 |
-
plotnine==0.13.4
|
80 |
-
prompt-toolkit
|
81 |
-
protobuf==5.26.1
|
82 |
-
psutil
|
83 |
-
ptyprocess
|
84 |
-
pure-eval
|
85 |
-
pyarrow==15.0.2
|
86 |
-
pyarrow-hotfix==0.6
|
87 |
-
Pygments
|
88 |
-
pyparsing==3.1.2
|
89 |
-
python-dateutil==2.8.2
|
90 |
-
pytz==2023.4
|
91 |
-
pyvene==0.1.1
|
92 |
-
PyYAML==6.0.1
|
93 |
-
pyzmq
|
94 |
-
referencing==0.34.0
|
95 |
-
regex==2023.12.25
|
96 |
-
requests==2.31.0
|
97 |
-
rpds-py==0.18.0
|
98 |
-
safetensors==0.4.3
|
99 |
-
scipy==1.13.0
|
100 |
-
sentencepiece==0.2.0
|
101 |
-
six
|
102 |
-
soupsieve==2.5
|
103 |
-
stack-data
|
104 |
-
statsmodels==0.14.1
|
105 |
-
sympy==1.12
|
106 |
-
tinycss2==1.2.1
|
107 |
-
tokenizers==0.15.2
|
108 |
-
torch==2.2.2
|
109 |
-
tornado
|
110 |
-
tqdm==4.66.2
|
111 |
-
traitlets
|
112 |
-
transformers==4.39.3
|
113 |
-
triton==2.2.0
|
114 |
-
typing_extensions
|
115 |
-
tzdata==2024.1
|
116 |
-
urllib3==2.0.7
|
117 |
-
wcwidth
|
118 |
-
webencodings==0.5.1
|
119 |
-
widgetsnbextension==4.0.10
|
120 |
-
xxhash==3.4.1
|
121 |
-
yarg==0.1.9
|
122 |
-
yarl==1.9.4
|
123 |
-
zipp
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cs772_proj/tutorial_data/pyvene_rome_attention_output.csv
DELETED
@@ -1,73 +0,0 @@
|
|
1 |
-
,layer,pos,prob
|
2 |
-
0,0,0,0.008943566121160984
|
3 |
-
1,0,1,0.010685674846172333
|
4 |
-
2,0,2,0.015678975731134415
|
5 |
-
3,0,3,0.01495782658457756
|
6 |
-
4,0,4,0.01689751259982586
|
7 |
-
5,0,5,0.012341184541583061
|
8 |
-
6,1,0,0.00910158734768629
|
9 |
-
7,1,1,0.011121801100671291
|
10 |
-
8,1,2,0.015446535311639309
|
11 |
-
9,1,3,0.014828759245574474
|
12 |
-
10,1,4,0.01610460691154003
|
13 |
-
11,1,5,0.012241763062775135
|
14 |
-
12,2,0,0.009373819455504417
|
15 |
-
13,2,1,0.011316204443573952
|
16 |
-
14,2,2,0.01544259861111641
|
17 |
-
15,2,3,0.014399203471839428
|
18 |
-
16,2,4,0.015949850901961327
|
19 |
-
17,2,5,0.012191198766231537
|
20 |
-
18,3,0,0.008611239492893219
|
21 |
-
19,3,1,0.01138687040656805
|
22 |
-
20,3,2,0.015247474424540997
|
23 |
-
21,3,3,0.013744203373789787
|
24 |
-
22,3,4,0.014804143458604813
|
25 |
-
23,3,5,0.011855616234242916
|
26 |
-
24,4,0,0.009979105554521084
|
27 |
-
25,4,1,0.011923858895897865
|
28 |
-
26,4,2,0.015469703823328018
|
29 |
-
27,4,3,0.012778976932168007
|
30 |
-
28,4,4,0.015446675941348076
|
31 |
-
29,4,5,0.01213959138840437
|
32 |
-
30,5,0,0.010452548041939735
|
33 |
-
31,5,1,0.011575913988053799
|
34 |
-
32,5,2,0.014227043837308884
|
35 |
-
33,5,3,0.013159635476768017
|
36 |
-
34,5,4,0.016256239265203476
|
37 |
-
35,5,5,0.01196625828742981
|
38 |
-
36,6,0,0.009859082289040089
|
39 |
-
37,6,1,0.011729804798960686
|
40 |
-
38,6,2,0.013667005114257336
|
41 |
-
39,6,3,0.012512612156569958
|
42 |
-
40,6,4,0.015985535457730293
|
43 |
-
41,6,5,0.011508451774716377
|
44 |
-
42,7,0,0.00967455469071865
|
45 |
-
43,7,1,0.012198343873023987
|
46 |
-
44,7,2,0.013812437653541565
|
47 |
-
45,7,3,0.012038654647767544
|
48 |
-
46,7,4,0.014745757915079594
|
49 |
-
47,7,5,0.011055140756070614
|
50 |
-
48,8,0,0.01034906692802906
|
51 |
-
49,8,1,0.011351429857313633
|
52 |
-
50,8,2,0.013925875537097454
|
53 |
-
51,8,3,0.012646789662539959
|
54 |
-
52,8,4,0.01411098800599575
|
55 |
-
53,8,5,0.011073073372244835
|
56 |
-
54,9,0,0.013398675248026848
|
57 |
-
55,9,1,0.011368145234882832
|
58 |
-
56,9,2,0.013541489839553833
|
59 |
-
57,9,3,0.013448523357510567
|
60 |
-
58,9,4,0.013419842347502708
|
61 |
-
59,9,5,0.011098676361143589
|
62 |
-
60,10,0,0.013398675248026848
|
63 |
-
61,10,1,0.012150835245847702
|
64 |
-
62,10,2,0.014172807335853577
|
65 |
-
63,10,3,0.012981802225112915
|
66 |
-
64,10,4,0.013179052621126175
|
67 |
-
65,10,5,0.01129151601344347
|
68 |
-
66,11,0,0.013398675248026848
|
69 |
-
67,11,1,0.01180819422006607
|
70 |
-
68,11,2,0.013985361903905869
|
71 |
-
69,11,3,0.012903643772006035
|
72 |
-
70,11,4,0.012925814837217331
|
73 |
-
71,11,5,0.011390508152544498
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cs772_proj/tutorial_data/pyvene_rome_attention_output.pdf
DELETED
Binary file (26.3 kB)
|
|
cs772_proj/tutorial_data/pyvene_rome_attention_output.png
DELETED
Binary file (50.9 kB)
|
|
cs772_proj/tutorial_data/pyvene_rome_block_output.csv
DELETED
@@ -1,73 +0,0 @@
|
|
1 |
-
,layer,pos,prob
|
2 |
-
0,0,0,0.009189224801957607
|
3 |
-
1,0,1,0.011389641091227531
|
4 |
-
2,0,2,0.0162599328905344
|
5 |
-
3,0,3,0.015484759584069252
|
6 |
-
4,0,4,0.015411637723445892
|
7 |
-
5,0,5,0.012490469962358475
|
8 |
-
6,1,0,0.00770866172388196
|
9 |
-
7,1,1,0.011720607057213783
|
10 |
-
8,1,2,0.015047593042254448
|
11 |
-
9,1,3,0.014841136522591114
|
12 |
-
10,1,4,0.017443198710680008
|
13 |
-
11,1,5,0.011815374717116356
|
14 |
-
12,2,0,0.008566385135054588
|
15 |
-
13,2,1,0.01111418567597866
|
16 |
-
14,2,2,0.01541436929255724
|
17 |
-
15,2,3,0.014069304801523685
|
18 |
-
16,2,4,0.016460780054330826
|
19 |
-
17,2,5,0.0121275270357728
|
20 |
-
18,3,0,0.009172435849905014
|
21 |
-
19,3,1,0.011352349072694778
|
22 |
-
20,3,2,0.013832006603479385
|
23 |
-
21,3,3,0.014499133452773094
|
24 |
-
22,3,4,0.01608533412218094
|
25 |
-
23,3,5,0.011975396424531937
|
26 |
-
24,4,0,0.009531590156257153
|
27 |
-
25,4,1,0.011509168893098831
|
28 |
-
26,4,2,0.012929881922900677
|
29 |
-
27,4,3,0.013458534143865108
|
30 |
-
28,4,4,0.015189730562269688
|
31 |
-
29,4,5,0.011921005323529243
|
32 |
-
30,5,0,0.009805092588067055
|
33 |
-
31,5,1,0.011592468246817589
|
34 |
-
32,5,2,0.013322774320840836
|
35 |
-
33,5,3,0.01245818566530943
|
36 |
-
34,5,4,0.013958347029983997
|
37 |
-
35,5,5,0.012003983370959759
|
38 |
-
36,6,0,0.01007422897964716
|
39 |
-
37,6,1,0.010900546796619892
|
40 |
-
38,6,2,0.01368661504238844
|
41 |
-
39,6,3,0.01260523870587349
|
42 |
-
40,6,4,0.013009610585868359
|
43 |
-
41,6,5,0.012099610641598701
|
44 |
-
42,7,0,0.010249304585158825
|
45 |
-
43,7,1,0.010945979505777359
|
46 |
-
44,7,2,0.013585647568106651
|
47 |
-
45,7,3,0.013284442014992237
|
48 |
-
46,7,4,0.012696263380348682
|
49 |
-
47,7,5,0.012064820155501366
|
50 |
-
48,8,0,0.009416966699063778
|
51 |
-
49,8,1,0.011989694088697433
|
52 |
-
50,8,2,0.01403607614338398
|
53 |
-
51,8,3,0.012878036126494408
|
54 |
-
52,8,4,0.012870670296251774
|
55 |
-
53,8,5,0.011852720752358437
|
56 |
-
54,9,0,0.009302603080868721
|
57 |
-
55,9,1,0.011646227911114693
|
58 |
-
56,9,2,0.013754121959209442
|
59 |
-
57,9,3,0.01287330687046051
|
60 |
-
58,9,4,0.012776567600667477
|
61 |
-
59,9,5,0.011404040269553661
|
62 |
-
60,10,0,0.009880894795060158
|
63 |
-
61,10,1,0.011837868951261044
|
64 |
-
62,10,2,0.013910908252000809
|
65 |
-
63,10,3,0.012473315000534058
|
66 |
-
64,10,4,0.012750478461384773
|
67 |
-
65,10,5,0.011884817853569984
|
68 |
-
66,11,0,0.013398675248026848
|
69 |
-
67,11,1,0.012010819278657436
|
70 |
-
68,11,2,0.012010819278657436
|
71 |
-
69,11,3,0.012010819278657436
|
72 |
-
70,11,4,0.012010819278657436
|
73 |
-
71,11,5,0.012010819278657436
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cs772_proj/tutorial_data/pyvene_rome_block_output.pdf
DELETED
Binary file (26.8 kB)
|
|
cs772_proj/tutorial_data/pyvene_rome_block_output.png
DELETED
Binary file (47.8 kB)
|
|
cs772_proj/tutorial_data/pyvene_rome_mlp_activation.csv
DELETED
@@ -1,73 +0,0 @@
|
|
1 |
-
,layer,pos,prob
|
2 |
-
0,0,0,0.0075546312145888805
|
3 |
-
1,0,1,0.011380046606063843
|
4 |
-
2,0,2,0.01438708696514368
|
5 |
-
3,0,3,0.015439963899552822
|
6 |
-
4,0,4,0.015718040987849236
|
7 |
-
5,0,5,0.012858170084655285
|
8 |
-
6,1,0,0.0077091907151043415
|
9 |
-
7,1,1,0.011459099128842354
|
10 |
-
8,1,2,0.014624425210058689
|
11 |
-
9,1,3,0.015179034322500229
|
12 |
-
10,1,4,0.015754742547869682
|
13 |
-
11,1,5,0.012920349836349487
|
14 |
-
12,2,0,0.007979463785886765
|
15 |
-
13,2,1,0.011575750075280666
|
16 |
-
14,2,2,0.014750510454177856
|
17 |
-
15,2,3,0.014939533546566963
|
18 |
-
16,2,4,0.01672947406768799
|
19 |
-
17,2,5,0.012872524559497833
|
20 |
-
18,3,0,0.008789473213255405
|
21 |
-
19,3,1,0.011063076555728912
|
22 |
-
20,3,2,0.01672506332397461
|
23 |
-
21,3,3,0.012915139086544514
|
24 |
-
22,3,4,0.01752210408449173
|
25 |
-
23,3,5,0.012578015215694904
|
26 |
-
24,4,0,0.009665396064519882
|
27 |
-
25,4,1,0.011315570212900639
|
28 |
-
26,4,2,0.016729004681110382
|
29 |
-
27,4,3,0.012932662852108479
|
30 |
-
28,4,4,0.017836520448327065
|
31 |
-
29,4,5,0.012803135439753532
|
32 |
-
30,5,0,0.010207359679043293
|
33 |
-
31,5,1,0.01099418569356203
|
34 |
-
32,5,2,0.01522758323699236
|
35 |
-
33,5,3,0.012608421966433525
|
36 |
-
34,5,4,0.01690223254263401
|
37 |
-
35,5,5,0.01230985764414072
|
38 |
-
36,6,0,0.009948461316525936
|
39 |
-
37,6,1,0.011443679220974445
|
40 |
-
38,6,2,0.013499817810952663
|
41 |
-
39,6,3,0.012555226683616638
|
42 |
-
40,6,4,0.01549310702830553
|
43 |
-
41,6,5,0.011905322782695293
|
44 |
-
42,7,0,0.009184295311570168
|
45 |
-
43,7,1,0.011352204717695713
|
46 |
-
44,7,2,0.01403868943452835
|
47 |
-
45,7,3,0.012666325084865093
|
48 |
-
46,7,4,0.013838390819728374
|
49 |
-
47,7,5,0.011248479597270489
|
50 |
-
48,8,0,0.010832141153514385
|
51 |
-
49,8,1,0.011385922320187092
|
52 |
-
50,8,2,0.01583883911371231
|
53 |
-
51,8,3,0.01382371224462986
|
54 |
-
52,8,4,0.014275728724896908
|
55 |
-
53,8,5,0.011227857321500778
|
56 |
-
54,9,0,0.013241364620625973
|
57 |
-
55,9,1,0.01146922167390585
|
58 |
-
56,9,2,0.015066420659422874
|
59 |
-
57,9,3,0.013642949052155018
|
60 |
-
58,9,4,0.013898820616304874
|
61 |
-
59,9,5,0.011261279694736004
|
62 |
-
60,10,0,0.013216082938015461
|
63 |
-
61,10,1,0.012054135091602802
|
64 |
-
62,10,2,0.014480901882052422
|
65 |
-
63,10,3,0.012983473017811775
|
66 |
-
64,10,4,0.012887177988886833
|
67 |
-
65,10,5,0.011302494443953037
|
68 |
-
66,11,0,0.013019828125834465
|
69 |
-
67,11,1,0.01216293778270483
|
70 |
-
68,11,2,0.01321493461728096
|
71 |
-
69,11,3,0.012598911300301552
|
72 |
-
70,11,4,0.013332013040781021
|
73 |
-
71,11,5,0.011366385966539383
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cs772_proj/tutorial_data/pyvene_rome_mlp_activation.pdf
DELETED
Binary file (26.7 kB)
|
|
cs772_proj/tutorial_data/pyvene_rome_mlp_activation.png
DELETED
Binary file (52.2 kB)
|
|