import numpy as np import pandas as pd from transformers import AutoTokenizer, AutoModelForSequenceClassification from fastai.text.all import * from sklearn.model_selection import train_test_split from torch.utils.data import Dataset torch.serialization.add_safe_globals(['L']) class QuestionDataset(Dataset): def __init__(self, X, y, tokenizer): self.text = X self.targets = y self.tok = tokenizer def __len__(self): return len(self.text) def __getitem__(self, idx): text = self.text[idx] targ = self.targets[idx] return self.tok(text, padding='max_length', truncation=True, max_length=30, return_tensors="pt")["input_ids"][0], tensor(targ) def new_empty(self): return QuestionDataset([], [], self.tok) class ModelLoader: def __init__(self): self.path = "DeBERTaV3/input/" self.train_df = pd.read_csv(self.path + "train.csv") self.test_df = pd.read_csv(self.path + "test.csv") self.tokenizer = AutoTokenizer.from_pretrained('microsoft/deberta-v3-base') self.df = self.train_df # Train/validation split self.X_train, self.X_valid, self.y_train, self.y_valid = train_test_split( self.df["question_text"].tolist(), self.df["target"].tolist(), stratify=self.df["target"], test_size=0.01 ) self.train_ds = QuestionDataset(self.X_train, self.y_train, self.tokenizer) self.valid_ds = QuestionDataset(self.X_valid, self.y_valid, self.tokenizer) self.train_dl = DataLoader(self.train_ds, batch_size=256) self.valid_dl = DataLoader(self.valid_ds, batch_size=512) self.dls = DataLoaders(self.train_dl, self.valid_dl) self.bert = AutoModelForSequenceClassification.from_pretrained('microsoft/deberta-v3-base').train() self.classifier = nn.Sequential( nn.Linear(768, 1024), nn.ReLU(), nn.Dropout(0.5), nn.Linear(1024, 2) ) self.bert.classifier = self.classifier class BertClassifier(nn.Module): def __init__(self, bert): super(BertClassifier, self).__init__() self.bert = bert def forward(self, x): return self.bert(x).logits self.model = BertClassifier(self.bert) # Calculate class weights n_0 = (self.train_df["target"] == 0).sum() n_1 = (self.train_df["target"] == 1).sum() n = n_0 + n_1 self.class_weights = tensor([n / (n + n_0), n / (n + n_1)]) self.learn = Learner(self.dls, self.model, loss_func=nn.CrossEntropyLoss(weight=self.class_weights), metrics=[accuracy, F1Score()]).to_fp16() try: # First attempt: Try loading with weights_only=True self.learn.load('fastai_QIQC-deberta-v3', strict=False, weights_only=True) except Exception as e: print(f"Warning: Could not load with weights_only=True. Falling back to default loading. Error: {e}") # Second attempt: Fall back to regular loading if the first attempt fails self.learn.load('fastai_QIQC-deberta-v3', strict=False) def get_learner(self): return self.learn