Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
""" Finetuning example. | |
""" | |
from __future__ import print_function | |
import sys | |
import numpy as np | |
from os.path import abspath, dirname | |
sys.path.insert(0, dirname(dirname(abspath(__file__)))) | |
import json | |
import math | |
from torchmoji.model_def import torchmoji_transfer | |
from torchmoji.global_variables import PRETRAINED_PATH, VOCAB_PATH | |
from torchmoji.finetuning import ( | |
load_benchmark, | |
finetune) | |
from torchmoji.class_avg_finetuning import class_avg_finetune | |
def roundup(x): | |
return int(math.ceil(x / 10.0)) * 10 | |
# Format: (dataset_name, | |
# path_to_dataset, | |
# nb_classes, | |
# use_f1_score) | |
DATASETS = [ | |
#('SE0714', '../data/SE0714/raw.pickle', 3, True), | |
#('Olympic', '../data/Olympic/raw.pickle', 4, True), | |
#('PsychExp', '../data/PsychExp/raw.pickle', 7, True), | |
#('SS-Twitter', '../data/SS-Twitter/raw.pickle', 2, False), | |
('SS-Youtube', '../data/SS-Youtube/raw.pickle', 2, False), | |
#('SE1604', '../data/SE1604/raw.pickle', 3, False), # Excluded due to Twitter's ToS | |
#('SCv1', '../data/SCv1/raw.pickle', 2, True), | |
#('SCv2-GEN', '../data/SCv2-GEN/raw.pickle', 2, True) | |
] | |
RESULTS_DIR = 'results' | |
# 'new' | 'last' | 'full' | 'chain-thaw' | |
FINETUNE_METHOD = 'last' | |
VERBOSE = 1 | |
nb_tokens = 50000 | |
nb_epochs = 1000 | |
epoch_size = 1000 | |
with open(VOCAB_PATH, 'r') as f: | |
vocab = json.load(f) | |
for rerun_iter in range(5): | |
for p in DATASETS: | |
# debugging | |
assert len(vocab) == nb_tokens | |
dset = p[0] | |
path = p[1] | |
nb_classes = p[2] | |
use_f1_score = p[3] | |
if FINETUNE_METHOD == 'last': | |
extend_with = 0 | |
elif FINETUNE_METHOD in ['new', 'full', 'chain-thaw']: | |
extend_with = 10000 | |
else: | |
raise ValueError('Finetuning method not recognised!') | |
# Load dataset. | |
data = load_benchmark(path, vocab, extend_with=extend_with) | |
(X_train, y_train) = (data['texts'][0], data['labels'][0]) | |
(X_val, y_val) = (data['texts'][1], data['labels'][1]) | |
(X_test, y_test) = (data['texts'][2], data['labels'][2]) | |
weight_path = PRETRAINED_PATH if FINETUNE_METHOD != 'new' else None | |
nb_model_classes = 2 if use_f1_score else nb_classes | |
model = torchmoji_transfer( | |
nb_model_classes, | |
weight_path, | |
extend_embedding=data['added']) | |
print(model) | |
# Training | |
print('Training: {}'.format(path)) | |
if use_f1_score: | |
model, result = class_avg_finetune(model, data['texts'], | |
data['labels'], | |
nb_classes, data['batch_size'], | |
FINETUNE_METHOD, | |
verbose=VERBOSE) | |
else: | |
model, result = finetune(model, data['texts'], data['labels'], | |
nb_classes, data['batch_size'], | |
FINETUNE_METHOD, metric='acc', | |
verbose=VERBOSE) | |
# Write results | |
if use_f1_score: | |
print('Overall F1 score (dset = {}): {}'.format(dset, result)) | |
with open('{}/{}_{}_{}_results.txt'. | |
format(RESULTS_DIR, dset, FINETUNE_METHOD, rerun_iter), | |
"w") as f: | |
f.write("F1: {}\n".format(result)) | |
else: | |
print('Test accuracy (dset = {}): {}'.format(dset, result)) | |
with open('{}/{}_{}_{}_results.txt'. | |
format(RESULTS_DIR, dset, FINETUNE_METHOD, rerun_iter), | |
"w") as f: | |
f.write("Acc: {}\n".format(result)) | |