shambhavi3 commited on
Commit
93d8271
·
verified ·
1 Parent(s): b5acc5f

Delete cs772_proj

Browse files
cs772_proj/bert_base/checkpoint-3848/config.json DELETED
@@ -1,37 +0,0 @@
1
- {
2
- "_name_or_path": "bert-base-uncased",
3
- "architectures": [
4
- "BertForSequenceClassification"
5
- ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "classifier_dropout": null,
8
- "gradient_checkpointing": false,
9
- "hidden_act": "gelu",
10
- "hidden_dropout_prob": 0.1,
11
- "hidden_size": 768,
12
- "id2label": {
13
- "0": "hate",
14
- "1": "normal",
15
- "2": "offense"
16
- },
17
- "initializer_range": 0.02,
18
- "intermediate_size": 3072,
19
- "label2id": {
20
- "hate": 0,
21
- "normal": 1,
22
- "offense": 2
23
- },
24
- "layer_norm_eps": 1e-12,
25
- "max_position_embeddings": 512,
26
- "model_type": "bert",
27
- "num_attention_heads": 12,
28
- "num_hidden_layers": 12,
29
- "pad_token_id": 0,
30
- "position_embedding_type": "absolute",
31
- "problem_type": "single_label_classification",
32
- "torch_dtype": "float32",
33
- "transformers_version": "4.39.3",
34
- "type_vocab_size": 2,
35
- "use_cache": true,
36
- "vocab_size": 30522
37
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cs772_proj/bert_base/checkpoint-3848/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f92169bcbaeee93e4c65d5f6b7af90505d8a754096d1b7d1ea70cf290cc79690
3
- size 437961724
 
 
 
 
cs772_proj/bert_base/checkpoint-3848/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:306504e0a7e6e3e27b15a81346a5f70e8941e6ec7085d33d70693b13cbba1e8b
3
- size 876044538
 
 
 
 
cs772_proj/bert_base/checkpoint-3848/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f8c99e888295714e206a5143fed689d23c7ae28a194ff83078714c2d99f94ab
3
- size 14244
 
 
 
 
cs772_proj/bert_base/checkpoint-3848/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9258b30f99447d96f005979b906a97fe44e711e5ecb53f5be292707492c5ef45
3
- size 1064
 
 
 
 
cs772_proj/bert_base/checkpoint-3848/special_tokens_map.json DELETED
@@ -1,7 +0,0 @@
1
- {
2
- "cls_token": "[CLS]",
3
- "mask_token": "[MASK]",
4
- "pad_token": "[PAD]",
5
- "sep_token": "[SEP]",
6
- "unk_token": "[UNK]"
7
- }
 
 
 
 
 
 
 
 
cs772_proj/bert_base/checkpoint-3848/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
cs772_proj/bert_base/checkpoint-3848/tokenizer_config.json DELETED
@@ -1,55 +0,0 @@
1
- {
2
- "added_tokens_decoder": {
3
- "0": {
4
- "content": "[PAD]",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false,
9
- "special": true
10
- },
11
- "100": {
12
- "content": "[UNK]",
13
- "lstrip": false,
14
- "normalized": false,
15
- "rstrip": false,
16
- "single_word": false,
17
- "special": true
18
- },
19
- "101": {
20
- "content": "[CLS]",
21
- "lstrip": false,
22
- "normalized": false,
23
- "rstrip": false,
24
- "single_word": false,
25
- "special": true
26
- },
27
- "102": {
28
- "content": "[SEP]",
29
- "lstrip": false,
30
- "normalized": false,
31
- "rstrip": false,
32
- "single_word": false,
33
- "special": true
34
- },
35
- "103": {
36
- "content": "[MASK]",
37
- "lstrip": false,
38
- "normalized": false,
39
- "rstrip": false,
40
- "single_word": false,
41
- "special": true
42
- }
43
- },
44
- "clean_up_tokenization_spaces": true,
45
- "cls_token": "[CLS]",
46
- "do_lower_case": true,
47
- "mask_token": "[MASK]",
48
- "model_max_length": 512,
49
- "pad_token": "[PAD]",
50
- "sep_token": "[SEP]",
51
- "strip_accents": null,
52
- "tokenize_chinese_chars": true,
53
- "tokenizer_class": "BertTokenizer",
54
- "unk_token": "[UNK]"
55
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cs772_proj/bert_base/checkpoint-3848/trainer_state.json DELETED
@@ -1,85 +0,0 @@
1
- {
2
- "best_metric": 0.7247874736785889,
3
- "best_model_checkpoint": "bert_base/checkpoint-1924",
4
- "epoch": 4.0,
5
- "eval_steps": 500,
6
- "global_step": 3848,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 1.0,
13
- "grad_norm": 13.14333724975586,
14
- "learning_rate": 8.000000000000001e-06,
15
- "loss": 0.8204,
16
- "step": 962
17
- },
18
- {
19
- "epoch": 1.0,
20
- "eval_f1": 0.6669116037121897,
21
- "eval_loss": 0.7362112402915955,
22
- "eval_runtime": 2.6989,
23
- "eval_samples_per_second": 712.147,
24
- "eval_steps_per_second": 44.833,
25
- "step": 962
26
- },
27
- {
28
- "epoch": 2.0,
29
- "grad_norm": 15.062124252319336,
30
- "learning_rate": 6e-06,
31
- "loss": 0.664,
32
- "step": 1924
33
- },
34
- {
35
- "epoch": 2.0,
36
- "eval_f1": 0.6714560068474462,
37
- "eval_loss": 0.7247874736785889,
38
- "eval_runtime": 1.9229,
39
- "eval_samples_per_second": 999.531,
40
- "eval_steps_per_second": 62.926,
41
- "step": 1924
42
- },
43
- {
44
- "epoch": 3.0,
45
- "grad_norm": 30.878219604492188,
46
- "learning_rate": 4.000000000000001e-06,
47
- "loss": 0.5662,
48
- "step": 2886
49
- },
50
- {
51
- "epoch": 3.0,
52
- "eval_f1": 0.6630607481681304,
53
- "eval_loss": 0.7806704044342041,
54
- "eval_runtime": 2.4376,
55
- "eval_samples_per_second": 788.47,
56
- "eval_steps_per_second": 49.638,
57
- "step": 2886
58
- },
59
- {
60
- "epoch": 4.0,
61
- "grad_norm": 21.18539810180664,
62
- "learning_rate": 2.0000000000000003e-06,
63
- "loss": 0.4919,
64
- "step": 3848
65
- },
66
- {
67
- "epoch": 4.0,
68
- "eval_f1": 0.6878731135692044,
69
- "eval_loss": 0.7753087282180786,
70
- "eval_runtime": 2.3502,
71
- "eval_samples_per_second": 817.816,
72
- "eval_steps_per_second": 51.486,
73
- "step": 3848
74
- }
75
- ],
76
- "logging_steps": 500,
77
- "max_steps": 4810,
78
- "num_input_tokens_seen": 0,
79
- "num_train_epochs": 5,
80
- "save_steps": 500,
81
- "total_flos": 1892113337449692.0,
82
- "train_batch_size": 16,
83
- "trial_name": null,
84
- "trial_params": null
85
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cs772_proj/bert_base/checkpoint-3848/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f562e068a01b97d232a8b2fbb9f51b80ec2d1eedd0dd5a99be9c3f9af0bcbeb1
3
- size 4856
 
 
 
 
cs772_proj/bert_base/checkpoint-3848/vocab.txt DELETED
The diff for this file is too large to render. See raw diff
 
cs772_proj/cs772_bert/.gitattributes DELETED
@@ -1,35 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cs772_proj/cs772_bert/README.md DELETED
@@ -1,13 +0,0 @@
1
- ---
2
- title: Cs772 Bert
3
- emoji: 🌖
4
- colorFrom: indigo
5
- colorTo: gray
6
- sdk: gradio
7
- sdk_version: 4.29.0
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cs772_proj/cs772_project.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
cs772_proj/demo_gradio_distilbert.py DELETED
@@ -1,372 +0,0 @@
1
- import gradio as gr
2
-
3
- import transformers
4
- import torch
5
- #import neptune
6
- #from knockknock import slack_sender
7
- from transformers import *
8
- #import glob
9
- from transformers import BertTokenizer
10
- from transformers import BertForSequenceClassification, AdamW, BertConfig
11
- import random
12
- import pandas as pd
13
- from transformers import BertTokenizer
14
- #from Models.utils import masked_cross_entropy,fix_the_random,format_time,save_normal_model,save_bert_model
15
- from sklearn.metrics import accuracy_score,f1_score
16
- from tqdm import tqdm
17
- '''from TensorDataset.datsetSplitter import createDatasetSplit
18
- from TensorDataset.dataLoader import combine_features
19
- from Preprocess.dataCollect import collect_data,set_name'''
20
- from sklearn.metrics import accuracy_score,f1_score,roc_auc_score,recall_score,precision_score
21
- import matplotlib.pyplot as plt
22
- import time
23
- import os
24
- from transformers import BertTokenizer
25
- #import GPUtil
26
- from sklearn.utils import class_weight
27
- #import json
28
- #from Models.bertModels import *
29
- #from Models.otherModels import *
30
- import sys
31
- #import time
32
- #from waiting import wait
33
- from sklearn.preprocessing import LabelEncoder
34
- import numpy as np
35
- #import threading
36
- #import argparse
37
- #import ast
38
-
39
- #from manual_training_inference import select_model
40
- #from Models.utils import save_normal_model,save_bert_model,load_model
41
- #from Models.utils import return_params
42
- from transformers import DistilBertTokenizer
43
-
44
-
45
- #from TensorDataset.dataLoader import custom_att_masks
46
- #from keras.preprocessing.sequence import pad_sequences
47
-
48
- #import seaborn as sns
49
- import matplotlib.pyplot as plt
50
- import numpy as np
51
- import PIL.Image as Image
52
- from torch import nn
53
-
54
- from pyvene import embed_to_distrib, top_vals, format_token
55
- from pyvene import (
56
- IntervenableModel,
57
- VanillaIntervention, Intervention,
58
- RepresentationConfig,
59
- IntervenableConfig,
60
- ConstantSourceIntervention,
61
- LocalistRepresentationIntervention
62
- )
63
- from pyvene import create_gpt2
64
- #%config InlineBackend.figure_formats = ['svg']
65
- from plotnine import (
66
- ggplot,
67
- geom_tile,
68
- aes,
69
- facet_wrap,
70
- theme,
71
- element_text,
72
- geom_bar,
73
- geom_hline,
74
- scale_y_log10,
75
- xlab, ylab, ylim,
76
- scale_y_discrete, scale_y_continuous, ggsave
77
- )
78
- from plotnine.scales import scale_y_reverse, scale_fill_cmap
79
- from tqdm import tqdm
80
- global device
81
- device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
82
- def create_bert(cache_dir=None):
83
- """Creates a GPT2 model, config, and tokenizer from the given name and revision"""
84
- from transformers import BertConfig
85
-
86
- config = BertConfig.from_pretrained("./bert_base/checkpoint-3848/config.json")
87
- tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
88
- gpt = AutoModelForSequenceClassification.from_pretrained("./bert_base/checkpoint-3848", config=config, cache_dir=cache_dir)
89
- print("loaded model")
90
- return config, tokenizer, gpt
91
- def interpret(text,label):
92
- titles={
93
- "block_output": "single restored layer in BERT",
94
- "mlp_activation": "center of interval of 5 patched mlp layer",
95
- "attention_output": "center of interval of 5 patched attn layer"
96
- }
97
-
98
- colors={
99
- "block_output": "Purples",
100
- "mlp_activation": "Greens",
101
- "attention_output": "Reds"
102
- }
103
-
104
- device = "cuda:0" if torch.cuda.is_available() else "cpu"
105
- #config, tokenizer, gpt = pv.create_llama(name="sharpbai/alpaca-7b-merged")
106
- config, tokenizer, gpt = create_bert()
107
- #config, tokenizer, gpt = create_gpt2(name="gpt2-xl")
108
-
109
- gpt.to(device)
110
-
111
- base = text
112
- inputs = [
113
- tokenizer(base, return_tensors="pt").to(device),
114
- ]
115
- #print(base)
116
- base_token = tokenizer.convert_ids_to_tokens(inputs[0]['input_ids'][0])
117
- res = gpt(**inputs[0])
118
- probabilities = nn.functional.softmax(res[0], dim=-1)
119
- if label=="hate":
120
- l = 0
121
- elif label=="normal":
122
- l=1
123
- else:l=2
124
- #print(probabilities)
125
- #print(res[0][0][0].item())
126
- #print(res)
127
- #distrib = embed_to_distrib(gpt, res.last_hidden_state, logits=False)
128
- #top_vals(tokenizer, distrib[0][-1], n=20)
129
- base = tokenizer(text, return_tensors="pt").to(device)
130
- config = corrupted_config(type(gpt))
131
- intervenable = IntervenableModel(config, gpt)
132
- _, counterfactual_outputs = intervenable(
133
- base, unit_locations={"base": ([[[0,1,2,3]]])}
134
- )
135
- #probabilities = nn.functional.softmax(counterfactual_outputs[0], dim=-1)
136
- #print(probabilities)
137
- for stream in ["block_output", "mlp_activation", "attention_output"]:
138
- data = []
139
- for layer_i in tqdm(range(gpt.config.num_hidden_layers)):
140
- for pos_i in range(len(base_token)):
141
- config = restore_corrupted_with_interval_config(
142
- layer_i, stream,
143
- window=1 if stream == "block_output" else 5
144
- )
145
-
146
- n_restores = len(config.representations) - 1
147
- intervenable = IntervenableModel(config, gpt)
148
- _, counterfactual_outputs = intervenable(
149
- base,
150
- [None] + [base]*n_restores,
151
- {
152
- "sources->base": (
153
- [None] + [[[pos_i]]]*n_restores,
154
- [[[0,1,2,3]]] + [[[pos_i]]]*n_restores,
155
- )
156
- },
157
- )
158
- #distrib = embed_to_distrib(
159
- #gpt, counterfactual_outputs.last_hidden_state, logits=False
160
- #)
161
- #prob = distrib[0][-1][token].detach().cpu().item()
162
- logits = counterfactual_outputs[0]
163
- probabilities = nn.functional.softmax(logits, dim=-1)
164
- prob_offense = probabilities[0][l].item()
165
- data.append({"layer": layer_i, "pos": pos_i, "prob": prob_offense})
166
- df = pd.DataFrame(data)
167
- df.to_csv(f"./tutorial_data/pyvene_rome_{stream}.csv")
168
- for stream in ["block_output", "mlp_activation", "attention_output"]:
169
- df = pd.read_csv(f"./tutorial_data/pyvene_rome_{stream}.csv")
170
- df["layer"] = df["layer"].astype(int)
171
- df["pos"] = df["pos"].astype(int)
172
- prob_type = "p"+"("+label+")"
173
- df[prob_type] = df["prob"].astype(float)
174
- #custom_labels = ["imagine*","the*", "riots*", "if", "people", "actually", "got" ,"food" ,"boxes" ,"instead", "of" ,"ebt", "cards", "every", "ghetto", "in", "america", "would" ,"look", "like", "ferguson"]
175
- custom_labels = base_token #["what*", "sort*", "of*", "white*","man" ,"or", "woman", "would", "vote", "for", "this", "nigger"]
176
- #custom_labels = ["no*", "liberal*","congratulated*", "hindu*", "refugees", "post", "cab", "because", "they", "hate", "hindus"]
177
- breaks = list(range(len(custom_labels)))#[0, 1, 2, 3, 4, 5, 6,7,8,9,10,11]
178
-
179
-
180
- plot = (
181
- ggplot(df, aes(x="layer", y="pos"))
182
-
183
- + geom_tile(aes(fill=prob_type))
184
- + scale_fill_cmap(colors[stream]) + xlab(titles[stream])
185
- + scale_y_reverse(
186
- limits = (-0.5, len(custom_labels)),
187
- breaks=breaks, labels=custom_labels)
188
- + theme(figure_size=(6,9)) + ylab("")
189
- + theme(axis_text_y = element_text(angle = 90, hjust = 1))
190
- )
191
- ggsave(
192
- plot, filename=f"./tutorial_data/pyvene_rome_{stream}.png", dpi=200
193
- )
194
- if stream == "mlp_activation":
195
- mlp_img_path = f"./tutorial_data/pyvene_rome_{stream}.png"
196
- elif stream=="block_output":
197
- bo_path = f"./tutorial_data/pyvene_rome_{stream}.png"
198
- else:attention_path = f"./tutorial_data/pyvene_rome_{stream}.png"
199
- return mlp_img_path,bo_path,attention_path
200
-
201
- def restore_corrupted_with_interval_config(
202
- layer, stream="mlp_activation", window=5, num_layers=12):
203
- start = max(0, layer - window // 2)
204
- end = min(num_layers, layer - (-window // 2))
205
- config = IntervenableConfig(
206
- representations=[
207
- RepresentationConfig(
208
- 0, # layer
209
- "block_input", # intervention type
210
- ),
211
- ] + [
212
- RepresentationConfig(
213
- i, # layer
214
- stream, # intervention type
215
- ) for i in range(start, end)],
216
- intervention_types=\
217
- [NoiseIntervention]+[VanillaIntervention]*(end-start)
218
- )
219
- return config
220
-
221
- class NoiseIntervention(ConstantSourceIntervention, LocalistRepresentationIntervention):
222
- def __init__(self, embed_dim, **kwargs):
223
- super().__init__()
224
- self.interchange_dim = embed_dim
225
- rs = np.random.RandomState(1)
226
- prng = lambda *shape: rs.randn(*shape)
227
- self.noise = torch.from_numpy(
228
- prng(1, 4, embed_dim)).to(device)
229
- self.noise_level = 0.7462981581687927 #0.3462981581687927
230
-
231
- def forward(self, base, source=None, subspaces=None):
232
- base[..., : self.interchange_dim] += self.noise * self.noise_level
233
- return base
234
-
235
- def __str__(self):
236
- return f"NoiseIntervention(embed_dim={self.embed_dim})"
237
-
238
-
239
- def corrupted_config(model_type):
240
- config = IntervenableConfig(
241
- model_type=model_type,
242
- representations=[
243
- RepresentationConfig(
244
- 0, # layer
245
- "block_input", # intervention type
246
- ),
247
- ],
248
- intervention_types=NoiseIntervention,
249
- )
250
- return config
251
- def create_bert(cache_dir=None):
252
- """Creates a GPT2 model, config, and tokenizer from the given name and revision"""
253
- from transformers import BertConfig
254
-
255
- config = BertConfig.from_pretrained("./bert_base/checkpoint-3848/config.json")
256
- tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
257
- gpt = AutoModelForSequenceClassification.from_pretrained("./bert_base/checkpoint-3848", config=config, cache_dir=cache_dir)
258
- print("loaded model")
259
- return config, tokenizer, gpt
260
-
261
- # params = return_params('best_model_json/distilbert.json', 0.001 )
262
- #params = return_params('best_model_json/distilbert.json', 1 )
263
-
264
-
265
- '''embeddings=None
266
- if(params['bert_tokens']):
267
- train,val,test=createDatasetSplit(params) #update
268
- else:
269
- train,val,test,vocab_own=createDatasetSplit(params)
270
- params['embed_size']=vocab_own.embeddings.shape[1]
271
- params['vocab_size']=vocab_own.embeddings.shape[0]
272
- embeddings=vocab_own.embeddings
273
- if(params['auto_weights']):
274
- y_test = [ele[2] for ele in test]
275
- # print(y_test)
276
- encoder = LabelEncoder()
277
- encoder.classes_ = np.load(params['class_names'],allow_pickle=True)
278
- params['weights']=class_weight.compute_class_weight('balanced',np.unique(y_test),y_test).astype('float32')
279
- #params['weights']=np.array([len(y_test)/y_test.count(encoder.classes_[0]),len(y_test)/y_test.count(encoder.classes_[1]),len(y_test)/y_test.count(encoder.classes_[2])]).astype('float32')
280
-
281
- model=select_model(params,embeddings)
282
- model = model.eval()
283
- tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
284
-
285
-
286
- classes_ = np.load('Data/classes.npy')
287
- '''
288
- def main_function(text,label):
289
- '''tokens = tokenizer.encode_plus(text)
290
- input_ids = pad_sequences(torch.tensor(tokens['input_ids']).unsqueeze(0),maxlen=int(params['max_length']),\
291
- dtype="long",
292
- value=0, truncating="post", padding="post")
293
- # att_vals = pad_sequences(att_vals,maxlen=int(params['max_length']), dtype="float",
294
- # value=0.0, truncating="post", padding="post")
295
- att_masks=custom_att_masks(input_ids)
296
-
297
- outs = model(torch.tensor(input_ids),
298
- attention_mask=torch.tensor(att_masks, dtype=bool),
299
- labels=None,
300
- device='cuda')
301
-
302
- text_tokens = tokenizer.convert_ids_to_tokens(input_ids.squeeze())
303
-
304
- text_tokens_ = text_tokens[:len(tokens['input_ids'])]
305
-
306
- print ('xyz')
307
- print (outs[1][5].shape)
308
- avg_attn = torch.mean(outs[1][5], dim=1)
309
- avg_attn_np = avg_attn[0,0,:len(tokens['input_ids'])].detach().squeeze().numpy()
310
-
311
- logits = outs[0]
312
- print (logits)
313
- print (np.sum(avg_attn_np))
314
- print (avg_attn_np)
315
-
316
- pred = torch.argmax(logits)
317
- pred_label = classes_[pred]
318
- '''
319
- ml_img_path,bo_img_path,atten_img_path = interpret(text,label)
320
- ml_im = Image.open(ml_img_path)
321
- bo_im = Image.open(bo_img_path)
322
- atten_im = Image.open(atten_img_path)
323
-
324
- yield ml_im, bo_im, atten_im
325
-
326
- '''
327
- sns.set_theme(rc={'figure.figsize':(30,1)})
328
-
329
- # creating subplot
330
- fig, ax = plt.subplots()
331
-
332
- # drawing heatmap on current axes
333
- ax = sns.heatmap(np.expand_dims(avg_attn_np,0), annot= np.expand_dims(np.array(text_tokens_),0), \
334
- fmt="", annot_kws={'size': 10}, cmap="magma")
335
-
336
- fig = ax.get_figure()
337
- fig.savefig("out.png" ,bbox_inches='tight')
338
-
339
- im = Image.open("out.png")
340
-
341
- yield im
342
-
343
- '''
344
-
345
- #return list(zip(text_tokens_ , avg_attn_np)), pred_label
346
- # return list(zip(text_tokens_[1:-1] , avg_attn_np[1:-1]))
347
-
348
-
349
- demo = gr.Interface(main_function,
350
- inputs="textbox",
351
- outputs="image",
352
- theme = 'compact')
353
-
354
- with gr.Blocks() as demo:
355
- with gr.Tab("Text Input"):
356
- text_input = gr.Textbox()
357
- label_input = gr.Textbox()
358
- text_button = gr.Button("Show")
359
-
360
- with gr.Tab("Interpretability"):
361
- with gr.Row():
362
- image_output1 = gr.Image()
363
- image_output2 = gr.Image()
364
- image_output3 = gr.Image()
365
-
366
- text_button.click(main_function, inputs=[text_input,label_input], outputs=[image_output1,image_output2,image_output3])
367
-
368
-
369
-
370
-
371
- if __name__ == "__main__":
372
- demo.launch(share=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cs772_proj/requirements.txt DELETED
@@ -1,123 +0,0 @@
1
- accelerate==0.29.2
2
- aiohttp==3.9.4
3
- aiosignal==1.3.1
4
- asttokens
5
- async-timeout==4.0.3
6
- attrs==23.2.0
7
- backcall==0.2.0
8
- beautifulsoup4==4.12.3
9
- bleach==6.1.0
10
- certifi==2024.2.2
11
- charset-normalizer==3.3.2
12
- comm
13
- contourpy==1.2.1
14
- cycler==0.12.1
15
- datasets==2.18.0
16
- debugpy
17
- decorator
18
- defusedxml==0.7.1
19
- dill==0.3.8
20
- docopt==0.6.2
21
- exceptiongroup
22
- executing
23
- fastjsonschema==2.19.1
24
- filelock==3.13.4
25
- fonttools==4.51.0
26
- frozenlist==1.4.1
27
- fsspec==2023.6.0
28
- huggingface-hub==0.20.3
29
- idna==3.7
30
- importlib_metadata
31
- ipykernel
32
- ipython==8.12.3
33
- ipywidgets==8.1.2
34
- jedi
35
- Jinja2==3.1.3
36
- jsonschema==4.21.1
37
- jsonschema-specifications==2023.12.1
38
- jupyter_client
39
- jupyter_core
40
- jupyterlab_pygments==0.3.0
41
- jupyterlab_widgets==3.0.10
42
- kiwisolver==1.4.5
43
- MarkupSafe==2.1.5
44
- matplotlib==3.8.4
45
- matplotlib-inline
46
- mistune==3.0.2
47
- mizani==0.11.1
48
- mpmath==1.3.0
49
- multidict==6.0.5
50
- multiprocess==0.70.16
51
- nbclient==0.10.0
52
- nbconvert==7.16.3
53
- nbformat==5.10.4
54
- nest_asyncio
55
- networkx==3.3
56
- numpy==1.26.4
57
- nvidia-cublas-cu12==12.1.3.1
58
- nvidia-cuda-cupti-cu12==12.1.105
59
- nvidia-cuda-nvrtc-cu12==12.1.105
60
- nvidia-cuda-runtime-cu12==12.1.105
61
- nvidia-cudnn-cu12==8.9.2.26
62
- nvidia-cufft-cu12==11.0.2.54
63
- nvidia-curand-cu12==10.3.2.106
64
- nvidia-cusolver-cu12==11.4.5.107
65
- nvidia-cusparse-cu12==12.1.0.106
66
- nvidia-nccl-cu12==2.19.3
67
- nvidia-nvjitlink-cu12==12.4.127
68
- nvidia-nvtx-cu12==12.1.105
69
- packaging
70
- pandas==2.2.2
71
- pandocfilters==1.5.1
72
- parso
73
- patsy==0.5.6
74
- pexpect
75
- pickleshare
76
- pillow==10.3.0
77
- pipreqs==0.5.0
78
- platformdirs
79
- plotnine==0.13.4
80
- prompt-toolkit
81
- protobuf==5.26.1
82
- psutil
83
- ptyprocess
84
- pure-eval
85
- pyarrow==15.0.2
86
- pyarrow-hotfix==0.6
87
- Pygments
88
- pyparsing==3.1.2
89
- python-dateutil==2.8.2
90
- pytz==2023.4
91
- pyvene==0.1.1
92
- PyYAML==6.0.1
93
- pyzmq
94
- referencing==0.34.0
95
- regex==2023.12.25
96
- requests==2.31.0
97
- rpds-py==0.18.0
98
- safetensors==0.4.3
99
- scipy==1.13.0
100
- sentencepiece==0.2.0
101
- six
102
- soupsieve==2.5
103
- stack-data
104
- statsmodels==0.14.1
105
- sympy==1.12
106
- tinycss2==1.2.1
107
- tokenizers==0.15.2
108
- torch==2.2.2
109
- tornado
110
- tqdm==4.66.2
111
- traitlets
112
- transformers==4.39.3
113
- triton==2.2.0
114
- typing_extensions
115
- tzdata==2024.1
116
- urllib3==2.0.7
117
- wcwidth
118
- webencodings==0.5.1
119
- widgetsnbextension==4.0.10
120
- xxhash==3.4.1
121
- yarg==0.1.9
122
- yarl==1.9.4
123
- zipp
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cs772_proj/tutorial_data/pyvene_rome_attention_output.csv DELETED
@@ -1,73 +0,0 @@
1
- ,layer,pos,prob
2
- 0,0,0,0.008943566121160984
3
- 1,0,1,0.010685674846172333
4
- 2,0,2,0.015678975731134415
5
- 3,0,3,0.01495782658457756
6
- 4,0,4,0.01689751259982586
7
- 5,0,5,0.012341184541583061
8
- 6,1,0,0.00910158734768629
9
- 7,1,1,0.011121801100671291
10
- 8,1,2,0.015446535311639309
11
- 9,1,3,0.014828759245574474
12
- 10,1,4,0.01610460691154003
13
- 11,1,5,0.012241763062775135
14
- 12,2,0,0.009373819455504417
15
- 13,2,1,0.011316204443573952
16
- 14,2,2,0.01544259861111641
17
- 15,2,3,0.014399203471839428
18
- 16,2,4,0.015949850901961327
19
- 17,2,5,0.012191198766231537
20
- 18,3,0,0.008611239492893219
21
- 19,3,1,0.01138687040656805
22
- 20,3,2,0.015247474424540997
23
- 21,3,3,0.013744203373789787
24
- 22,3,4,0.014804143458604813
25
- 23,3,5,0.011855616234242916
26
- 24,4,0,0.009979105554521084
27
- 25,4,1,0.011923858895897865
28
- 26,4,2,0.015469703823328018
29
- 27,4,3,0.012778976932168007
30
- 28,4,4,0.015446675941348076
31
- 29,4,5,0.01213959138840437
32
- 30,5,0,0.010452548041939735
33
- 31,5,1,0.011575913988053799
34
- 32,5,2,0.014227043837308884
35
- 33,5,3,0.013159635476768017
36
- 34,5,4,0.016256239265203476
37
- 35,5,5,0.01196625828742981
38
- 36,6,0,0.009859082289040089
39
- 37,6,1,0.011729804798960686
40
- 38,6,2,0.013667005114257336
41
- 39,6,3,0.012512612156569958
42
- 40,6,4,0.015985535457730293
43
- 41,6,5,0.011508451774716377
44
- 42,7,0,0.00967455469071865
45
- 43,7,1,0.012198343873023987
46
- 44,7,2,0.013812437653541565
47
- 45,7,3,0.012038654647767544
48
- 46,7,4,0.014745757915079594
49
- 47,7,5,0.011055140756070614
50
- 48,8,0,0.01034906692802906
51
- 49,8,1,0.011351429857313633
52
- 50,8,2,0.013925875537097454
53
- 51,8,3,0.012646789662539959
54
- 52,8,4,0.01411098800599575
55
- 53,8,5,0.011073073372244835
56
- 54,9,0,0.013398675248026848
57
- 55,9,1,0.011368145234882832
58
- 56,9,2,0.013541489839553833
59
- 57,9,3,0.013448523357510567
60
- 58,9,4,0.013419842347502708
61
- 59,9,5,0.011098676361143589
62
- 60,10,0,0.013398675248026848
63
- 61,10,1,0.012150835245847702
64
- 62,10,2,0.014172807335853577
65
- 63,10,3,0.012981802225112915
66
- 64,10,4,0.013179052621126175
67
- 65,10,5,0.01129151601344347
68
- 66,11,0,0.013398675248026848
69
- 67,11,1,0.01180819422006607
70
- 68,11,2,0.013985361903905869
71
- 69,11,3,0.012903643772006035
72
- 70,11,4,0.012925814837217331
73
- 71,11,5,0.011390508152544498
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cs772_proj/tutorial_data/pyvene_rome_attention_output.pdf DELETED
Binary file (26.3 kB)
 
cs772_proj/tutorial_data/pyvene_rome_attention_output.png DELETED
Binary file (50.9 kB)
 
cs772_proj/tutorial_data/pyvene_rome_block_output.csv DELETED
@@ -1,73 +0,0 @@
1
- ,layer,pos,prob
2
- 0,0,0,0.009189224801957607
3
- 1,0,1,0.011389641091227531
4
- 2,0,2,0.0162599328905344
5
- 3,0,3,0.015484759584069252
6
- 4,0,4,0.015411637723445892
7
- 5,0,5,0.012490469962358475
8
- 6,1,0,0.00770866172388196
9
- 7,1,1,0.011720607057213783
10
- 8,1,2,0.015047593042254448
11
- 9,1,3,0.014841136522591114
12
- 10,1,4,0.017443198710680008
13
- 11,1,5,0.011815374717116356
14
- 12,2,0,0.008566385135054588
15
- 13,2,1,0.01111418567597866
16
- 14,2,2,0.01541436929255724
17
- 15,2,3,0.014069304801523685
18
- 16,2,4,0.016460780054330826
19
- 17,2,5,0.0121275270357728
20
- 18,3,0,0.009172435849905014
21
- 19,3,1,0.011352349072694778
22
- 20,3,2,0.013832006603479385
23
- 21,3,3,0.014499133452773094
24
- 22,3,4,0.01608533412218094
25
- 23,3,5,0.011975396424531937
26
- 24,4,0,0.009531590156257153
27
- 25,4,1,0.011509168893098831
28
- 26,4,2,0.012929881922900677
29
- 27,4,3,0.013458534143865108
30
- 28,4,4,0.015189730562269688
31
- 29,4,5,0.011921005323529243
32
- 30,5,0,0.009805092588067055
33
- 31,5,1,0.011592468246817589
34
- 32,5,2,0.013322774320840836
35
- 33,5,3,0.01245818566530943
36
- 34,5,4,0.013958347029983997
37
- 35,5,5,0.012003983370959759
38
- 36,6,0,0.01007422897964716
39
- 37,6,1,0.010900546796619892
40
- 38,6,2,0.01368661504238844
41
- 39,6,3,0.01260523870587349
42
- 40,6,4,0.013009610585868359
43
- 41,6,5,0.012099610641598701
44
- 42,7,0,0.010249304585158825
45
- 43,7,1,0.010945979505777359
46
- 44,7,2,0.013585647568106651
47
- 45,7,3,0.013284442014992237
48
- 46,7,4,0.012696263380348682
49
- 47,7,5,0.012064820155501366
50
- 48,8,0,0.009416966699063778
51
- 49,8,1,0.011989694088697433
52
- 50,8,2,0.01403607614338398
53
- 51,8,3,0.012878036126494408
54
- 52,8,4,0.012870670296251774
55
- 53,8,5,0.011852720752358437
56
- 54,9,0,0.009302603080868721
57
- 55,9,1,0.011646227911114693
58
- 56,9,2,0.013754121959209442
59
- 57,9,3,0.01287330687046051
60
- 58,9,4,0.012776567600667477
61
- 59,9,5,0.011404040269553661
62
- 60,10,0,0.009880894795060158
63
- 61,10,1,0.011837868951261044
64
- 62,10,2,0.013910908252000809
65
- 63,10,3,0.012473315000534058
66
- 64,10,4,0.012750478461384773
67
- 65,10,5,0.011884817853569984
68
- 66,11,0,0.013398675248026848
69
- 67,11,1,0.012010819278657436
70
- 68,11,2,0.012010819278657436
71
- 69,11,3,0.012010819278657436
72
- 70,11,4,0.012010819278657436
73
- 71,11,5,0.012010819278657436
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cs772_proj/tutorial_data/pyvene_rome_block_output.pdf DELETED
Binary file (26.8 kB)
 
cs772_proj/tutorial_data/pyvene_rome_block_output.png DELETED
Binary file (47.8 kB)
 
cs772_proj/tutorial_data/pyvene_rome_mlp_activation.csv DELETED
@@ -1,73 +0,0 @@
1
- ,layer,pos,prob
2
- 0,0,0,0.0075546312145888805
3
- 1,0,1,0.011380046606063843
4
- 2,0,2,0.01438708696514368
5
- 3,0,3,0.015439963899552822
6
- 4,0,4,0.015718040987849236
7
- 5,0,5,0.012858170084655285
8
- 6,1,0,0.0077091907151043415
9
- 7,1,1,0.011459099128842354
10
- 8,1,2,0.014624425210058689
11
- 9,1,3,0.015179034322500229
12
- 10,1,4,0.015754742547869682
13
- 11,1,5,0.012920349836349487
14
- 12,2,0,0.007979463785886765
15
- 13,2,1,0.011575750075280666
16
- 14,2,2,0.014750510454177856
17
- 15,2,3,0.014939533546566963
18
- 16,2,4,0.01672947406768799
19
- 17,2,5,0.012872524559497833
20
- 18,3,0,0.008789473213255405
21
- 19,3,1,0.011063076555728912
22
- 20,3,2,0.01672506332397461
23
- 21,3,3,0.012915139086544514
24
- 22,3,4,0.01752210408449173
25
- 23,3,5,0.012578015215694904
26
- 24,4,0,0.009665396064519882
27
- 25,4,1,0.011315570212900639
28
- 26,4,2,0.016729004681110382
29
- 27,4,3,0.012932662852108479
30
- 28,4,4,0.017836520448327065
31
- 29,4,5,0.012803135439753532
32
- 30,5,0,0.010207359679043293
33
- 31,5,1,0.01099418569356203
34
- 32,5,2,0.01522758323699236
35
- 33,5,3,0.012608421966433525
36
- 34,5,4,0.01690223254263401
37
- 35,5,5,0.01230985764414072
38
- 36,6,0,0.009948461316525936
39
- 37,6,1,0.011443679220974445
40
- 38,6,2,0.013499817810952663
41
- 39,6,3,0.012555226683616638
42
- 40,6,4,0.01549310702830553
43
- 41,6,5,0.011905322782695293
44
- 42,7,0,0.009184295311570168
45
- 43,7,1,0.011352204717695713
46
- 44,7,2,0.01403868943452835
47
- 45,7,3,0.012666325084865093
48
- 46,7,4,0.013838390819728374
49
- 47,7,5,0.011248479597270489
50
- 48,8,0,0.010832141153514385
51
- 49,8,1,0.011385922320187092
52
- 50,8,2,0.01583883911371231
53
- 51,8,3,0.01382371224462986
54
- 52,8,4,0.014275728724896908
55
- 53,8,5,0.011227857321500778
56
- 54,9,0,0.013241364620625973
57
- 55,9,1,0.01146922167390585
58
- 56,9,2,0.015066420659422874
59
- 57,9,3,0.013642949052155018
60
- 58,9,4,0.013898820616304874
61
- 59,9,5,0.011261279694736004
62
- 60,10,0,0.013216082938015461
63
- 61,10,1,0.012054135091602802
64
- 62,10,2,0.014480901882052422
65
- 63,10,3,0.012983473017811775
66
- 64,10,4,0.012887177988886833
67
- 65,10,5,0.011302494443953037
68
- 66,11,0,0.013019828125834465
69
- 67,11,1,0.01216293778270483
70
- 68,11,2,0.01321493461728096
71
- 69,11,3,0.012598911300301552
72
- 70,11,4,0.013332013040781021
73
- 71,11,5,0.011366385966539383
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cs772_proj/tutorial_data/pyvene_rome_mlp_activation.pdf DELETED
Binary file (26.7 kB)
 
cs772_proj/tutorial_data/pyvene_rome_mlp_activation.png DELETED
Binary file (52.2 kB)