mousie
mousie

Reputation: 21

I don't know why I'm receiving "Exception has occurred" in this situation

I'm trying to run code from https://github.com/DDI-Dataset/DDI-Code with its dataset (already downloaded and added to a folder, as instructed by the author), and I get the message "Exception has occurred: Exception Please visit https://stanfordaimi.azurewebsites.net/datasets/35866158-8196-48d8-87bf-50dca81df965 to download the DDI dataset." while using the python debugger in VScode.

Below is the code from the file ddi_dataset.py.

"""Code for loading DDI Dataset."""

from torch.utils.data import Subset
from torchvision.datasets import ImageFolder
from torchvision import transforms as T
import os
import pandas as pd
import numpy as np

means = [0.485, 0.456, 0.406]
stds  = [0.229, 0.224, 0.225]
test_transform = T.Compose([
    lambda x: x.convert('RGB'),
    T.Resize(299),
    T.CenterCrop(299),
    T.ToTensor(),
    T.Normalize(mean=means, std=stds)
])

class DDI_Dataset(ImageFolder):
    _DDI_download_link = "https://stanfordaimi.azurewebsites.net/datasets/35866158-8196-48d8-87bf-50dca81df965"
    """DDI Dataset.

    Args:
        root     (str): Root directory of dataset.
        csv_path (str): Path to the metadata CSV file. Defaults to `{root}/ddi_metadata.csv`
        transform     : Function to transform and collate image input. (can use test_transform from this file) 
    """ 
   # root = 'C:\\Users\\User\\OneDrive\\desktop\\CSproject\\datasets\\DDI\\images'
    def __init__(self, root, csv_path=None, download=True, transform=None, *args, **kwargs):
        if csv_path is None:
            csv_path = os.path.join(root, "ddi_metadata.csv")
        if not os.path.exists(csv_path) and download:
            raise Exception(f"Please visit <{DDI_Dataset._DDI_download_link}> to download the DDI dataset.")
        assert os.path.exists(csv_path), f"Path not found <{csv_path}>."
        super(DDI_Dataset, self).__init__(root, *args, transform=transform, **kwargs)
        self.annotations = pd.read_csv(csv_path)
        m_key = 'malignant'
        if m_key not in self.annotations:
            self.annotations[m_key] = self.annotations['malignancy(malig=1)'].apply(lambda x: x==1)

    def __getitem__(self, index):
        img, target = super(DDI_Dataset, self).__getitem__(index)
        path = self.imgs[index][0]        
        annotation = dict(self.annotations[self.annotations.DDI_file==path.split("/")[-1]])
        target = int(annotation['malignant'].item()) # 1 if malignant, 0 if benign
        skin_tone = annotation['skin_tone'].item() # Fitzpatrick- 12, 34, or 56
        return path, img, target, skin_tone

    """Return a subset of the DDI dataset based on skin tones and malignancy of lesion.

    Args:
        skin_tone    (list of int): Which skin tones to include in the subset. Options are {12, 34, 56}.
        diagnosis    (list of str): Include malignant and/or benign images. Options are {"benign", "malignant"}
    """
    def subset(self, skin_tone=None, diagnosis=None):
        skin_tone = [12, 34, 56] if skin_tone is None else skin_tone
        diagnosis = ["benign", "malignant"] if diagnosis is None else diagnosis
        for si in skin_tone: 
            assert si in [12,34,56], f"{si} is not a valid skin tone"
        for di in diagnosis: 
            assert di in ["benign", "malignant"], f"{di} is not a valid diagnosis"
        indices = np.where(self.annotations['skin_tone'].isin(skin_tone) & \
                           self.annotations['malignant'].isin([di=="malignant" for di in diagnosis]))[0]
        return Subset(self, indices)

I thought that the problem is that the variable root does not include any file path so I tried to add an absolute file path to it (the comment above, root = 'C:\\Users\\User\\OneDrive\\desktop\\CSproject\\datasets\\DDI\\images'); however, the same error message came out.

Below is the file's location in my computer.

ddi_dataset.py: C:\Users\User\OneDrive\desktop\CSproject\dataset\DDI\ddi_dataset.py

Images: C:\Users\User\OneDrive\desktop\CSproject\dataset\DDI\images ('images' is the folder where I put the PNG:s and ddi_metadata.csv)

Additionally, below is the code from eval_ddi.py where it calls the DDI_Dataset

"""
Code to evaluate the models trained for our paper, 
    "Disparities in Dermatology AI Performance on a Diverse, 
     Curated Clinical Image Set",
on the DDI dataset. 

Note: assumes DDI data is organized as
    ./DDI
        /images
            /000001.png
            /000002.png
            ...
        /ddi_metadata.csv

(After downloading from the Stanford AIMI repository, this requires moving all .png files into a new subdirectory titled "images".)

------------------------------------------------------
Examples:

(1) w/command line interface
# evaluate DeepDerm on DDI and store results in `DDI-results`
>>>python3 eval_ddi.py --model=DeepDerm --data_dir=DDI --eval_dir=DDI-results 

(2) w/python functions
>>>import eval_ddi
>>>import ddi_model
>>>model = ddi_model.load_model("DeepDerm") # load DeepDerm model
>>>eval_results = eval_ddi.eval_model(model, "DDI") # evaluate images in DDI folder
"""

import argparse
from ddi_dataset import DDI_Dataset, test_transform
from ddi_model import load_model
import matplotlib.pyplot as plt
import numpy as np
import os
import pickle
from sklearn.metrics import (f1_score, balanced_accuracy_score, 
    classification_report, confusion_matrix, roc_curve, auc)
import torch
import tqdm


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model_dir', type=str, default="DDI-models", 
        help="File path for where to save models.")
    parser.add_argument('--model', type=str, default="DeepDerm", 
        help="Name of the model to load (HAM10000, DeepDerm, GroupDRO, CORAL,"\
             " or CDANN).")
    parser.add_argument('--no_download', action='store_true', default=False,
        help="Set to disable downloading models.")
    parser.add_argument('--data_dir', type=str, default="DDI", 
        help="Folder containing dataset to load. Structure should be: (1) `[data_dir]/images` contains all images; (2) `[data_dir]/ddi_metadata.csv` contains the CSV metadata for the DDI dataset")
    parser.add_argument('--eval_dir', type=str, default="DDI-results", 
        help="Folder to store evaluation results.")
    parser.add_argument('--use_gpu', action='store_true', default=False,
        help="Set to use GPU for evaluation.")
    parser.add_argument('--plot', action='store_true', default=False,
        help="Set to show ROC plot.")
    args = parser.parse_args()
    return args

def eval_model(model, dataset, use_gpu=False, show_plot=False):
    """Evaluate loaded model on provided image dataset. Assumes supplied image 
    directory corresponds to `root` input for torchvision.datasets.ImageFolder
    class. Assumes the data is split into binary/malignant labels, as this is 
    what our models are trained+evaluated on."""

    use_gpu = (use_gpu and torch.cuda.is_available())
    device = torch.device("cuda") if use_gpu else torch.device("cpu")

    # load dataset
    dataloader = torch.utils.data.DataLoader(
                    dataset,
                    batch_size=32, shuffle=False,
                    num_workers=0, pin_memory=use_gpu)

    # prepare model for evaluation
    model.to(device).eval()

    # log output for all images in dataset
    hat, star, all_paths = [], [], []
    for batch in tqdm.tqdm(enumerate(dataloader)):
        i, (paths, images, target, skin_tone) = batch
        images = images.to(device)
        target = target.to(device)

        with torch.no_grad():
            output = model(images)

        hat.append(output[:,1].detach().cpu().numpy())
        star.append(target.cpu().numpy())
        all_paths.append(paths)

    hat = np.concatenate(hat)
    star = np.concatenate(star)
    all_paths = np.concatenate(all_paths)
    threshold = model._ddi_threshold
    m_name = model._ddi_name
    m_web_path = model._ddi_web_path

    report = classification_report(star, (hat>threshold).astype(int), 
        target_names=["benign","malignant"])
    fpr, tpr, _ = roc_curve(star, hat, pos_label=1,
                                sample_weight=None,
                                drop_intermediate=True)
    auc_est = auc(fpr, tpr)

    if show_plot:
        _=plt.plot(fpr, tpr, 
            color="blue", linestyle="-", linewidth=2, 
            marker="o", markersize=2, 
            label=f"AUC={auc_est:.3f}")[0]
        plt.show()
        plt.close()

    eval_results = {'predicted_labels':hat, # predicted labels by model
                    'true_labels':star,     # true labels
                    'images':all_paths,     # image paths
                    'report':report,        # sklearn classification report
                    'ROC_AUC':auc_est,      # ROC-AUC
                    'threshold':threshold,  # >= threshold ==> malignant
                    'model':m_name,         # model name
                    'web_path':m_web_path,  # web link to download model
                    }

    return eval_results




if __name__ == '__main__':
    # get arguments from command line
    args = get_args()
    # load model and download if necessary
    model = load_model(args.model, 
        save_dir=args.model_dir, download=not args.no_download)
    # load DDI dataset
    dataset = DDI_Dataset("DDI", transform=test_transform)
    # evaluate results on data
    eval_results = eval_model(model, dataset, 
        use_gpu=args.use_gpu, show_plot=args.plot)

    # save evaluation results in a .pkl file 
    if args.eval_dir:
        os.makedirs(args.eval_dir, exist_ok=True)
        eval_save_path = os.path.join(args.eval_dir, 
                                      f"{args.model}-evaluation.pkl")
        with open(eval_save_path, 'wb') as f:
            pickle.dump(eval_results, f)

        # load results with:
        #with open(eval_save_path, 'rb') as f:
        #    results = pickle.load(f)

and here is the full trace back:

Exception has occurred: Exception
Please visit <https://stanfordaimi.azurewebsites.net/datasets/35866158-8196-48d8-87bf-50dca81df965> to download the DDI dataset.
  File "C:\Users\User\OneDrive\desktop\CSproject\dataset\DDI\ddi_dataset.py", line 37, in __init__
    raise Exception(f"Please visit <{DDI_Dataset._DDI_download_link}> to download the DDI dataset.")
  File "C:\Users\User\OneDrive\desktop\CSproject\dataset\DDI\eval_ddi.py", line 140, in <module>
    dataset = DDI_Dataset("DDI", transform=test_transform)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Exception: Please visit <https://stanfordaimi.azurewebsites.net/datasets/35866158-8196-48d8-87bf-50dca81df965> to download the DDI dataset.

I'm expecting to know why the problem happens and how to solve it.

Upvotes: 0

Views: 91

Answers (1)

tripleee
tripleee

Reputation: 189789

The file ddi_metadata.csv must be in the root folder, not in images

Upvotes: 0

Related Questions