Spaces:
Sleeping
Sleeping
import argparse | |
import logging | |
import math | |
import os | |
import random | |
import datasets | |
from datasets import load_dataset, load_metric | |
from torch.utils.data import DataLoader | |
from tqdm.auto import tqdm | |
import gradio as gr | |
import transformers | |
from accelerate import Accelerator # huggingface package | |
from transformers import ( | |
AdamW, | |
AutoConfig, | |
AutoModelForSequenceClassification, | |
AutoTokenizer, | |
DataCollatorWithPadding, | |
PretrainedConfig, | |
SchedulerType, | |
default_data_collator, | |
get_scheduler, | |
set_seed, | |
BertTokenizer, | |
) | |
from transformers.utils.versions import require_version | |
import torch | |
from test_module.modeling_transkimer import BertForSequenceClassification as TranskimerForSequenceClassification | |
from test_module.modeling_transkimer_roberta import RobertaForSequenceClassification as TranskimerRobertaForSequenceClassification | |
from test_module.modeling_utils import convert_softmax_mask_to_digit | |
from blackbox_utils.my_attack import CharacterAttack | |
from transformers import glue_processors as processors | |
task_to_keys = { | |
"cola": ("sentence", None), | |
"mnli": ("premise", "hypothesis"), | |
"mrpc": ("sentence1", "sentence2"), | |
"qnli": ("question", "sentence"), | |
"qqp": ("question1", "question2"), | |
"rte": ("sentence1", "sentence2"), | |
"sst2": ("sentence", None), | |
"stsb": ("sentence1", "sentence2"), | |
"wnli": ("sentence1", "sentence2"), | |
"imdb": ("text", None), | |
} | |
model_path_dict = { | |
"transkimer_sst2_not_pad":'./not_pad_0.5', | |
} | |
datasets.utils.logging.set_verbosity_error() | |
transformers.utils.logging.set_verbosity_error() | |
task_name = 'sst2' | |
model_type = 'transkimer' | |
# Load pretrained model and tokenizer | |
model_path_key = f'{model_type}_{task_name}_not_pad' | |
model_path = model_path_dict[model_path_key] | |
config = AutoConfig.from_pretrained(model_path, num_labels=num_labels, finetuning_task=task_name) | |
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased', use_fast=True) | |
model = TranskimerForSequenceClassification.from_pretrained(model_path,from_tf=bool(".ckpt" in model_path),config=config,) | |
# Preprocessing the datasets | |
sentence1_key, sentence2_key = task_to_keys[task_name] | |
processor = processors[task_name]() | |
label_list = processor.get_labels() | |
label_to_id = {v: i for i, v in enumerate(label_list)} | |
padding = False | |
attack = CharacterAttack(f'{model_type}_{task_name}',model,tokenizer,device='cpu',max_per=10,padding=padding,max_length=128,label_to_id=label_to_id,sentence1_key=sentence1_key,sentence2_key=sentence2_key) | |
def greet(text): | |
text_input = [(text,None)] | |
outputs,time = attack.get_prob(text_input) | |
_,token_remained,_ = attack.output_analysis(outputs) | |
return time,token_remained.item() | |
iface = gr.Interface(fn=greet, inputs=["text","text"], outputs=["number","number"]) | |
iface.launch() |