question about accuracy
I tried to reproduce your code, but I found that the test accuracy can only reach about 0.886. My settings are based on your training settings. I will attach my code below. Can you please take a look at the problem or provide the original code?
import torch
from torch import nn, optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from tqdm import tqdm
import timm
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
import io
from PIL import Image
import pandas as pd
import torch
from torch.utils.data import Dataset
class CIFAR100ParquetDataset(Dataset):
def init(self, parquet_file, transform=None):
self.data = pd.read_parquet(parquet_file)
self.transform = transform
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
img_dict = self.data.iloc[idx]["img"]
img_bytes = img_dict["bytes"]
img = Image.open(io.BytesIO(img_bytes))
label = self.data.iloc[idx]["fine_label"]
if self.transform:
img = self.transform(img)
return img, label
from torch.utils.data import DataLoader
from torchvision import transforms
transform = transforms.Compose([
transforms.RandomCrop(32, padding=4),
transforms.RandomHorizontalFlip(),
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.5071, 0.4865, 0.4409], std=[0.2673, 0.2564, 0.2762]),
])
train_parquet = "/data/yjzhang/desktop/try/ckpt/cifar100/2/cifar100/train-00000-of-00001.parquet"
test_parquet = "/data/yjzhang/desktop/try/ckpt/cifar100/2/cifar100/test-00000-of-00001.parquet"
train_dataset = CIFAR100ParquetDataset(train_parquet, transform=transform)
test_dataset = CIFAR100ParquetDataset(test_parquet, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=True)
model = timm.create_model(
"vit_base_patch16_224.orig_in21k_ft_in1k",
pretrained=False,
num_classes=100
)
pth_path = "/data/yjzhang/desktop/try/ckpt/cifar100/4/model.pth"
state_dict = torch.load(pth_path, map_location=device)
missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
print(f"Missing keys: {missing_keys}")
print(f"Unexpected keys: {unexpected_keys}")
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=1e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
def train(model, loader, criterion, optimizer, device):
model.train()
total_loss = 0
correct = 0
total_samples = 0
pbar = tqdm(loader, desc="Training", leave=False)
for inputs, targets in pbar:
inputs, targets = inputs.to(device), targets.to(device)
outputs = model(inputs)
loss = criterion(outputs, targets)
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_loss += loss.item() * inputs.size(0)
_, predicted = outputs.max(1)
correct += predicted.eq(targets).sum().item()
total_samples += inputs.size(0)
avg_loss = total_loss / total_samples
accuracy = correct / total_samples
pbar.set_postfix({"Loss": f"{avg_loss:.4f}", "Acc": f"{accuracy:.4f}"})
return total_loss / total_samples, correct / total_samples
def test(model, loader, criterion, device):
model.eval()
total_loss = 0
correct = 0
total_samples = 0
pbar = tqdm(loader, desc="Testing", leave=False)
with torch.no_grad():
for inputs, targets in pbar:
inputs, targets = inputs.to(device), targets.to(device)
outputs = model(inputs)
loss = criterion(outputs, targets)
total_loss += loss.item() * inputs.size(0)
_, predicted = outputs.max(1)
correct += predicted.eq(targets).sum().item()
total_samples += inputs.size(0)
avg_loss = total_loss / total_samples
accuracy = correct / total_samples
pbar.set_postfix({"Loss": f"{avg_loss:.4f}", "Acc": f"{accuracy:.4f}"})
return total_loss / total_samples, correct / total_samples
num_epochs = 10
for epoch in range(num_epochs):
print(f"Epoch {epoch + 1}/{num_epochs}")
# train_loss, train_acc = train(model, train_loader, criterion, optimizer, device)
test_loss, test_acc = test(model, test_loader, criterion, device)
scheduler.step()
# print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}")
print(f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.4f}")
torch.save(model.state_dict(), "vit_cifar100_finetuned.pth")
print("Training complete. Model saved as vit_cifar100_finetuned.pth")
Hi!
The models were pre-trained with transforms.Normalize(mean=[0.5, 0.5 0.5], std=[0.5, 0.5, 0.5])
.
Hope this helps!
Thank you so much !!