OLD MODEL: DO NOT USE FOR LEADERBOARD

Model Card for Model ID

This is a fine-tuned Vision Transformer (ViT) model from Google. The model was loaded and fine-tuned on the training data collected.

Link: https://huggingface.co/google/vit-base-patch16-224-in21k

lat_mean = 39.95164939753852

lat_std = 0.0007290994359226359

lon_mean = -75.191420541785

lon_std = 0.000733160718757529

model_name = "AppliedMLReedShreya/ViT_Attempt_1"
config = AutoConfig.from_pretrained(model_name)
config.num_labels = 2  # We need two outputs: latitude and longitude

# Load the pre-trained ViT model
vit_model = AutoModelForImageClassification.from_pretrained(model_name, config=config)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Using device: {device}')
vit_model = vit_model.to(device)

# Initialize lists to store predictions and actual values
all_preds = []
all_actuals = []

vit_model.eval()
with torch.no_grad():
    for images, gps_coords in val_dataloader:
        images, gps_coords = images.to(device), gps_coords.to(device)

        outputs = vit_model(images).logits

        # Denormalize predictions and actual values
        preds = outputs.cpu() * torch.tensor([lat_std, lon_std]) + torch.tensor([lat_mean, lon_mean])
        actuals = gps_coords.cpu() * torch.tensor([lat_std, lon_std]) + torch.tensor([lat_mean, lon_mean])

        all_preds.append(preds)
        all_actuals.append(actuals)

# Concatenate all batches
all_preds = torch.cat(all_preds).numpy()
all_actuals = torch.cat(all_actuals).numpy()