Spaces:

Jfink09
/

DeepFundus

Running

App Files Files Community

Jfink09 commited on Feb 1, 2023

Commit

18e1e5b

1 Parent(s): 8fb6219

Delete resnet50_deepfundus.py

Browse files

Files changed (1) hide show

resnet50_deepfundus.py +0 -603

resnet50_deepfundus.py DELETED Viewed

@@ -1,603 +0,0 @@
-# -*- coding: utf-8 -*-
-"""ResNet50_DeepFundus.ipynb
-Automatically generated by Colaboratory.
-Original file is located at
-    https://colab.research.google.com/drive/1pd56CapAEjZ8AHAW5bi0uMm6ZzJlOpDZ
-"""
-######################################################### Use block of code if dataset is on GitHub #######################################################
-# import os
-# import requests
-# import zipfile
-# from pathlib import Path
-# # Setup path to data folder
-# data_path = Path("data/")
-# image_path = data_path / "deepfundus"
-# # If the image folder doesn't exist, download it and prepare it...
-# if image_path.is_dir():
-#     print(f"{image_path} directory exists.")
-# else:
-#     print(f"Did not find {image_path} directory, creating one...")
-#     image_path.mkdir(parents=True, exist_ok=True)
-# # Download fundus data
-# with open(data_path / "deepfundus.zip", "wb") as f:
-#     request = requests.get("https://github.com/jfink09/DeepFundus/raw/main/deepfundus.zip")
-#     print("Downloading fundus data...")
-#     f.write(request.content)
-# # Unzip fundus data
-# with zipfile.ZipFile(data_path / "deepfundus.zip", "r") as zip_ref:
-#     print("Unzipping fundus data...")
-#     zip_ref.extractall(image_path)
-# # Remove zip file
-# os.remove(data_path / "deepfundus.zip")
-######################################### Use commented out code if dataset was downloaded from GitHub ######################################################
-# # Setup train and testing paths
-# train_dir = image_path / "train"
-# test_dir = image_path / "test"
-# train_dir, test_dir
-from pathlib import Path
-# Setup train and testing paths
-train_dir = Path("drive/MyDrive/data/train")
-test_dir = Path("drive/MyDrive/data/test")
-train_dir, test_dir
-from torchvision import datasets, transforms
-# Create simple transform
-data_transform = transforms.Compose([
-    transforms.Resize((64, 64)),
-    transforms.ToTensor(),
-])
-# Use ImageFolder to create dataset(s)
-train_data = datasets.ImageFolder(root=train_dir, # target folder of images
-                                  transform=data_transform, # transforms to perform on data (images)
-                                  target_transform=None) # transforms to perform on labels (if necessary)
-test_data = datasets.ImageFolder(root=test_dir,
-                                 transform=data_transform)
-print(f"Train data:\n{train_data}\nTest data:\n{test_data}")
-# For this notebook to run with updated APIs, we need torch 1.12+ and torchvision 0.13+
-try:
-    import torch
-    import torchvision
-    assert int(torch.__version__.split(".")[1]) >= 12, "torch version should be 1.12+"
-    assert int(torchvision.__version__.split(".")[1]) >= 13, "torchvision version should be 0.13+"
-    print(f"torch version: {torch.__version__}")
-    print(f"torchvision version: {torchvision.__version__}")
-except:
-    print(f"[INFO] torch/torchvision versions not as required, installing nightly versions.")
-    !pip3 install -U torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113
-    import torch
-    import torchvision
-    print(f"torch version: {torch.__version__}")
-    print(f"torchvision version: {torchvision.__version__}")
-# Continue with regular imports
-import matplotlib.pyplot as plt
-import torch
-import torchvision
-from torch import nn
-from torchvision import transforms
-# Try to get torchinfo, install it if it doesn't work
-try:
-    from torchinfo import summary
-except:
-    print("[INFO] Couldn't find torchinfo... installing it.")
-    !pip install -q torchinfo
-    from torchinfo import summary
-# Try to import the going_modular directory, download it from GitHub if it doesn't work
-try:
-    from going_modular.going_modular import data_setup, engine
-except:
-    # Get the going_modular scripts
-    print("[INFO] Couldn't find going_modular scripts... downloading them from GitHub.")
-    !git clone https://github.com/jfink09/optical-funduscopic-convolutional-neural-network
-    !mv optical-funduscopic-convolutional-neural-network/going_modular .
-    !rm -rf optical-funduscopic-convolutional-neural-network
-    from going_modular.going_modular import data_setup, engine
-# Setup device agnostic code
-device = "cuda" if torch.cuda.is_available() else "cpu"
-device
-normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
-                                 std=[0.229, 0.224, 0.225])
-# Create a transforms pipeline manually (required for torchvision < 0.13)
-manual_transforms = transforms.Compose([
-    transforms.Resize((224, 224)), # 1. Reshape all images to 224x224 (though some models may require different sizes)
-    transforms.ToTensor(), # 2. Turn image values to between 0 & 1
-    transforms.Normalize(mean=[0.485, 0.456, 0.406], # 3. A mean of [0.485, 0.456, 0.406] (across each colour channel)
-                         std=[0.229, 0.224, 0.225]) # 4. A standard deviation of [0.229, 0.224, 0.225] (across each colour channel),
-])
-# Create training and testing DataLoaders as well as get a list of class names
-train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(train_dir=train_dir,
-                                                                               test_dir=test_dir,
-                                                                               transform=manual_transforms, # resize, convert images to between 0 & 1 and normalize them
-                                                                               batch_size=32) # set mini-batch size to 32
-train_dataloader, test_dataloader, class_names
-# Get a set of pretrained model weights
-weights = torchvision.models.ResNet50_Weights.DEFAULT # .DEFAULT = best available weights from pretraining on ImageNet
-weights
-# Get the transforms used to create our pretrained weights
-auto_transforms = weights.transforms()
-auto_transforms
-# # Create training and testing DataLoaders as well as get a list of class names
-# train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(train_dir=train_dir,
-#                                                                                test_dir=test_dir,
-#                                                                                transform=auto_transforms, # perform same data transforms on our own data as the pretrained model
-#                                                                                batch_size=32) # set mini-batch size to 32
-# train_dataloader, test_dataloader, class_names
-# OLD: Setup the model with pretrained weights and send it to the target device (this was prior to torchvision v0.13)
-# model = torchvision.models.efficientnet_b0(pretrained=True).to(device) # OLD method (with pretrained=True)
-# NEW: Setup the model with pretrained weights and send it to the target device (torchvision v0.13+)
-weights = torchvision.models.ResNet50_Weights.DEFAULT # .DEFAULT = best available weights
-model = torchvision.models.resnet50(weights=weights).to(device)
-#model # uncomment to output (it's very long)
-# Print a summary using torchinfo (uncomment for actual output)
-summary(model=model,
-        input_size=(32, 3, 224, 224), # make sure this is "input_size", not "input_shape"
-        # col_names=["input_size"], # uncomment for smaller output
-        col_names=["input_size", "output_size", "num_params", "trainable"],
-        col_width=20,
-        row_settings=["var_names"]
-)
-# Set the manual seeds
-torch.manual_seed(42)
-torch.cuda.manual_seed(42)
-# Get the length of class_names (one output unit for each class)
-output_shape = len(class_names)
-# Recreate the classifier layer and seed it to the target device
-model.classifier = torch.nn.Sequential(
-    torch.nn.Dropout(p=0.2, inplace=True),
-    torch.nn.Linear(in_features=2048,
-                    out_features=output_shape, # same number of output units as our number of classes
-                    bias=True)).to(device)
-# Define loss and optimizer
-loss_fn = nn.CrossEntropyLoss()
-optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
-# Set the random seeds
-torch.manual_seed(42)
-torch.cuda.manual_seed(42)
-# Start the timer
-from timeit import default_timer as timer
-start_time = timer()
-# Setup training and save the results
-results = engine.train(model=model,
-                        train_dataloader=train_dataloader,
-                        test_dataloader=test_dataloader,
-                        optimizer=optimizer,
-                        loss_fn=loss_fn,
-                        epochs=20,
-                        device=device)
-# End the timer and print out how long it took
-end_time = timer()
-print(f"[INFO] Total training time: {end_time-start_time:.3f} seconds")
-# Get the plot_loss_curves() function from helper_functions.py, download the file if we don't have it
-try:
-    from helper_functions import plot_loss_curves
-except:
-    print("[INFO] Couldn't find helper_functions.py, downloading...")
-    with open("helper_functions.py", "wb") as f:
-        import requests
-        request = requests.get("https://github.com/jfink09/optical-funduscopic-convolutional-neural-network/raw/main/helper_functions.py")
-        f.write(request.content)
-    from helper_functions import plot_loss_curves
-# Plot the loss curves of our model
-plot_loss_curves(results)
-from typing import List, Tuple
-from PIL import Image
-# 1. Take in a trained model, class names, image path, image size, a transform and target device
-def pred_and_plot_image(model: torch.nn.Module,
-                        image_path: str,
-                        class_names: List[str],
-                        image_size: Tuple[int, int] = (224, 224),
-                        transform: torchvision.transforms = None,
-                        device: torch.device=device):
-    # 2. Open image
-    img = Image.open(image_path)
-    # 3. Create transformation for image (if one doesn't exist)
-    if transform is not None:
-        image_transform = transform
-    else:
-        image_transform = transforms.Compose([
-            transforms.Resize(image_size),
-            transforms.ToTensor(),
-            transforms.Normalize(mean=[0.485, 0.456, 0.406],
-                                 std=[0.229, 0.224, 0.225]),
-        ])
-    ### Predict on image ###
-    # 4. Make sure the model is on the target device
-    model.to(device)
-    # 5. Turn on model evaluation mode and inference mode
-    model.eval()
-    with torch.inference_mode():
-      # 6. Transform and add an extra dimension to image (model requires samples in [batch_size, color_channels, height, width])
-      transformed_image = image_transform(img).unsqueeze(dim=0)
-      # 7. Make a prediction on image with an extra dimension and send it to the target device
-      target_image_pred = model(transformed_image.to(device))
-    # 8. Convert logits -> prediction probabilities (using torch.softmax() for multi-class classification)
-    target_image_pred_probs = torch.softmax(target_image_pred, dim=1)
-    # 9. Convert prediction probabilities -> prediction labels
-    target_image_pred_label = torch.argmax(target_image_pred_probs, dim=1)
-    # 10. Plot image with predicted label and probability
-    plt.figure()
-    plt.imshow(img)
-    plt.title(f"Pred: {class_names[target_image_pred_label]} | Prob: {target_image_pred_probs.max():.3f}")
-    plt.axis(False);
-# Get a random list of image paths from test set
-import random
-num_images_to_plot = 3
-test_image_path_list = list(Path(test_dir).glob("*/*.jpg")) # get list all image paths from test data
-test_image_path_sample = random.sample(population=test_image_path_list, # go through all of the test image paths
-                                       k=num_images_to_plot) # randomly select 'k' image paths to pred and plot
-# Make predictions on and plot the images
-for image_path in test_image_path_sample:
-    pred_and_plot_image(model=model,
-                        image_path=image_path,
-                        class_names=class_names,
-                        # transform=weights.transforms(), # optionally pass in a specified transform from our pretrained model weights
-                        image_size=(224, 224))
-data_path = Path("data/")
-image_path = data_path / "deepfundus"
-# If the image folder doesn't exist, download it and prepare it...
-if image_path.is_dir():
-    print(f"{image_path} directory exists.")
-else:
-    print(f"Did not find {image_path} directory, creating one...")
-    image_path.mkdir(parents=True, exist_ok=True)
-# Import/install Gradio
-try:
-    import gradio as gr
-except:
-    !pip -q install gradio
-    import gradio as gr
-print(f"Gradio version: {gr.__version__}")
-from google.colab import drive
-drive.mount('/content/drive')
-# Put ResNet50 on CPU
-model.to("cpu")
-# Check the device
-next(iter(model.parameters())).device
-# 1. Setup pretrained ResNet50 weights
-resnet50_weights = torchvision.models.ResNet50_Weights.DEFAULT
-# 2. Get ResNet50 transforms
-resnet50_transforms = resnet50_weights.transforms()
-# 3. Setup pretrained model
-resnet50 = torchvision.models.resnet50(weights=resnet50_weights) # could also use weights="DEFAULT"
-# 4. Freeze the base layers in the model (this will freeze all layers to begin with)
-for param in resnet50.parameters():
-    param.requires_grad = True # Set to False for model's other than ResNet
-# 5. Update the classifier head
-resnet50.classifier = nn.Sequential(
-    nn.Dropout(p=0.3, inplace=True), # keep dropout layer same
-    nn.Linear(in_features=2048, # keep in_features same
-              out_features=8)) # change out_features to suit our number of classes # 4
-def create_resnet50_model(num_classes:int=8, # 4
-                          seed:int=42):
-    """Creates an ResNet50 feature extractor model and transforms.
-    Args:
-        num_classes (int, optional): number of classes in the classifier head.
-            Defaults to 3.
-        seed (int, optional): random seed value. Defaults to 42.
-    Returns:
-        model (torch.nn.Module): ResNet50 feature extractor model.
-        transforms (torchvision.transforms): ResNet50 image transforms.
-    """
-    # 1, 2, 3. Create ResNet50 pretrained weights, transforms and model
-    weights = torchvision.models.ResNet50_Weights.DEFAULT
-    transforms = weights.transforms()
-    model = torchvision.models.resnet50(weights=weights)
-    # 4. Freeze all layers in base model
-    for param in model.parameters():
-        param.requires_grad = True # Set to False for model's other than ResNet
-    # 5. Change classifier head with random seed for reproducibility
-    torch.manual_seed(seed)
-    model.classifier = nn.Sequential(
-        nn.Dropout(p=0.3, inplace=True),
-        nn.Linear(in_features=2048
-                  , out_features=num_classes), # If using EffnetB2 in_features = 1408, EffnetB0 in_features = 1280, if ResNet50 in_features = 2048
-    )
-    return model, transforms
-resnet50, resnet50_transforms = create_resnet50_model(num_classes=8, # 4
-                                                      seed=42)
-from torchinfo import summary
-# Print ResNet50 model summary (uncomment for full output)
-summary(resnet50,
-        input_size=(1, 3, 224, 224),
-        col_names=["input_size", "output_size", "num_params", "trainable"],
-        col_width=20,
-        row_settings=["var_names"])
-# Setup DataLoaders
-from going_modular.going_modular import data_setup
-train_dataloader_resnet50, test_dataloader_resnet50, class_names = data_setup.create_dataloaders(train_dir=train_dir,
-                                                                                                 test_dir=test_dir,
-                                                                                                 transform=resnet50_transforms,
-                                                                                                 batch_size=32)
-from going_modular.going_modular import engine
-# Setup optimizer
-optimizer = torch.optim.Adam(params=resnet50.parameters(),
-                             lr=1e-3)
-# Setup loss function
-loss_fn = torch.nn.CrossEntropyLoss()
-# Set seeds for reproducibility and train the model
-#set_seeds()
-resnet50_results = engine.train(model=resnet50,
-                                train_dataloader=train_dataloader_resnet50,
-                                test_dataloader=test_dataloader_resnet50,
-                                epochs=10,
-                                optimizer=optimizer,
-                                loss_fn=loss_fn,
-                                device=device)
-from helper_functions import plot_loss_curves
-plot_loss_curves(resnet50_results)
-from going_modular.going_modular import utils
-# Save the model
-utils.save_model(model=resnet50,
-                 target_dir="models",
-                 model_name="pretrained_resnet50_feature_extractor_drappcompressed.pth")
-from pathlib import Path
-# Get the model size in bytes then convert to megabytes
-pretrained_resnet50_model_size = Path("models/pretrained_resnet50_feature_extractor_drappcompressed.pth").stat().st_size // (1024*1024) # division converts bytes to megabytes (roughly)
-print(f"Pretrained ResNet50 feature extractor model size: {pretrained_resnet50_model_size} MB")
-# Count number of parameters in ResNet50
-resnet50_total_params = sum(torch.numel(param) for param in resnet50.parameters())
-resnet50_total_params
-# Create a dictionary with EffNetB0 statistics
-resnet50_stats = {"test_loss": resnet50_results["test_loss"][-1],
-                  "test_acc": resnet50_results["test_acc"][-1],
-                  "number_of_parameters": resnet50_total_params,
-                  "model_size (MB)": pretrained_resnet50_model_size}
-resnet50_stats
-from pathlib import Path
-# Get all test data paths
-print(f"[INFO] Finding all filepaths ending with '.jpg' in directory: {test_dir}")
-test_data_paths = list(Path(test_dir).glob("*/*.jpg"))
-test_data_paths[:5]
-import pathlib
-import torch
-from PIL import Image
-from timeit import default_timer as timer
-from tqdm.auto import tqdm
-from typing import List, Dict
-# 1. Create a function to return a list of dictionaries with sample, truth label, prediction, prediction probability and prediction time
-def pred_and_store(paths: List[pathlib.Path],
-                   model: torch.nn.Module,
-                   transform: torchvision.transforms,
-                   class_names: List[str],
-                   device: str = "cuda" if torch.cuda.is_available() else "cpu") -> List[Dict]:
-    # 2. Create an empty list to store prediction dictionaires
-    pred_list = []
-    # 3. Loop through target paths
-    for path in tqdm(paths):
-        # 4. Create empty dictionary to store prediction information for each sample
-        pred_dict = {}
-        # 5. Get the sample path and ground truth class name
-        pred_dict["image_path"] = path
-        class_name = path.parent.stem
-        pred_dict["class_name"] = class_name
-        # 6. Start the prediction timer
-        start_time = timer()
-        # 7. Open image path
-        img = Image.open(path).convert('RGB')
-        # 8. Transform the image, add batch dimension and put image on target device
-        transformed_image = transform(img).unsqueeze(0).to(device)
-        # 9. Prepare model for inference by sending it to target device and turning on eval() mode
-        model.to(device)
-        model.eval()
-        # 10. Get prediction probability, predicition label and prediction class
-        with torch.inference_mode():
-            pred_logit = model(transformed_image) # perform inference on target sample
-            pred_prob = torch.softmax(pred_logit, dim=1) # turn logits into prediction probabilities
-            pred_label = torch.argmax(pred_prob, dim=1) # turn prediction probabilities into prediction label
-            pred_class = class_names[pred_label.cpu()] # hardcode prediction class to be on CPU
-            # 11. Make sure things in the dictionary are on CPU (required for inspecting predictions later on)
-            pred_dict["pred_prob"] = round(pred_prob.unsqueeze(0).max().cpu().item(), 4)
-            pred_dict["pred_class"] = pred_class
-            # 12. End the timer and calculate time per pred
-            end_time = timer()
-            pred_dict["time_for_pred"] = round(end_time-start_time, 4)
-        # 13. Does the pred match the true label?
-        pred_dict["correct"] = class_name == pred_class
-        # 14. Add the dictionary to the list of preds
-        pred_list.append(pred_dict)
-    # 15. Return list of prediction dictionaries
-    return pred_list
-# Make predictions across test dataset with ResNet50
-resnet50_test_pred_dicts = pred_and_store(paths=test_data_paths,
-                                          model=resnet50,
-                                          transform=resnet50_transforms,
-                                          class_names=class_names,
-                                          device="cpu") # make predictions on CPU
-# Inspect the first 2 prediction dictionaries
-resnet50_test_pred_dicts[:2]
-# Turn the test_pred_dicts into a DataFrame
-import pandas as pd
-resnet50_test_pred_df = pd.DataFrame(resnet50_test_pred_dicts)
-resnet50_test_pred_df.head()
-# Check number of correct predictions
-resnet50_test_pred_df.correct.value_counts()
-# Find the average time per prediction
-resnet50_average_time_per_pred = round(resnet50_test_pred_df.time_for_pred.mean(), 4)
-print(f"ResNet50 average time per prediction: {resnet50_average_time_per_pred} seconds")
-# Add ResNet50 average prediction time to stats dictionary
-resnet50_stats["time_per_pred_cpu"] = resnet50_average_time_per_pred
-resnet50_stats
-# Turn stat dictionaries into DataFrame
-df = pd.DataFrame([resnet50_stats])
-# Add column for model names
-df["model"] = ["ResNet50"]
-# Convert accuracy to percentages
-df["test_acc"] = round(df["test_acc"] * 100, 2)
-df
-# Put ResNet50 on CPU
-resnet50.to("cpu")
-# Check the device
-next(iter(resnet50.parameters())).device
-from typing import Tuple, Dict
-def predict(img) -> Tuple[Dict, float]:
-    """Transforms and performs a prediction on img and returns prediction and time taken.
-    """
-    # Start the timer
-    start_time = timer()
-    # Transform the target image and add a batch dimension
-    img = resnet50_transforms(img).unsqueeze(0)
-    # Put model into evaluation mode and turn on inference mode
-    resnet50.eval()
-    with torch.inference_mode():
-        # Pass the transformed image through the model and turn the prediction logits into prediction probabilities
-        pred_probs = torch.softmax(resnet50(img), dim=1)
-    # Create a prediction label and prediction probability dictionary for each prediction class (this is the required format for Gradio's output parameter)
-    pred_labels_and_probs = {class_names[i]: float(pred_probs[0][i]) for i in range(len(class_names))}
-    # Calculate the prediction time
-    pred_time = round(timer() - start_time, 5)
-    # Return the prediction dictionary and prediction time
-    return pred_labels_and_probs, pred_time
-# Create a list of example inputs to our Gradio demo
-example_list = [[str(filepath)] for filepath in random.sample(test_data_paths, k=4)]
-example_list
-import gradio as gr
-# Create title, description and article strings
-title = "DeepFundus 👀"
-description = "A ResNet50 feature extractor computer vision model to classify retina pathology from optical funduscopic images."
-article = "Created for fun."
-# Create the Gradio demo
-demo = gr.Interface(fn=predict, # mapping function from input to output
-                    inputs=gr.Image(type="pil"), # what are the inputs?
-                    outputs=[gr.Label(num_top_classes=8, label="Predictions"), # what are the outputs?
-                             gr.Number(label="Prediction time (s)")], # our fn has two outputs, therefore we have two outputs
-                    examples=example_list,
-                    title=title,
-                    description=description,
-                    article=article)
-# Launch the demo!
-demo.launch(debug=False, # print errors locally?
-            share=True) # generate a publically shareable URL?