Source code for segmentationmetrics.metrics

import numpy as np
import pandas as pd

from . import surface_distance as sd



[docs]
class SegmentationMetrics:
    """
    Attributes
    ----------
    dice : float
        Dice similarity score.
        The dice score is a measure of the overlap between the predicted and true
        masks. It is calculated as 2*TP / (2*TP + FP + FN).
        If there are multiple labels, the dice score is the mean of the dice 
        scores for each label.
    jaccard : float
        Jaccard similarity score.
        The jaccard score is a measure of the overlap between the predicted and
        true masks. It is calculated as TP / (TP + FP + FN).
        If there are multiple labels, the jaccard score is the mean of the 
        jaccard scores for each label.
    sensitivity : float
        Sensitivity/recall/true positive rate.
        The sensitivity is the proportion of true positives that are correctly
        identified. It is calculated as TP / (TP + FN).
        If there are multiple labels, the sensitivity is the mean of the
        sensitivities for each label.
    specificity : float
        Specificity/selectivity/true negative rate.
        The specificity is the proportion of true negatives that are correctly
        identified. It is calculated as TN / (TN + FP).
        If there are multiple labels, the specificity is the mean of the
        specificities for each label.
    precision : float
        Precision/positive predictive value.
        The precision is the proportion of predicted positives that are true
        positives. It is calculated as TP / (TP + FP).
        If there are multiple labels, the precision is the mean of the
        precisions for each label.
    accuracy : float
        Accuracy. 
        The accuracy is the proportion of true results (both true positives and
        true negatives) among the total number of cases examined. It is 
        calculated as (TP + TN) / (TP + TN + FP + FN).
        If there are multiple labels, the accuracy is the mean of the accuracies
        for each label.
    mean_surface_distance : float or tuple
        The mean surface distance, defaults to symmetric.
        The mean surface distance is the average distance between the surfaces 
        of the predicted and true masks. If symmetric is True, the mean surface 
        distance is the average of the mean surface distance from surface A to
        surface B and the mean surface distance from surface B to surface A. If
        symmetric is False, a tuple is returned with both mean surface 
        distances.
        If there are multiple labels, the mean surface distance is the mean of 
        the mean surface distances for each label.
    hausdorff_distance : float
        The robust Hausdorff distance, defaults to 95th percentile.
        The Hausdorff distance is the maximum distance of a set to the nearest
        point in the other set. The robust Hausdorff distance is the distance at
        a specified percentile of the distances from points on one surface to 
        the other surface.
        If there are multiple labels, the Hausdorff distance is the mean of the
        Hausdorff distances for each label.
    true_volume : float
        The volume of the true mask (in milliliters).
        If there are multiple labels, the true volume is the sum of the true
        volumes for each label.
    predicted_volume : float
        The volume of the predicted mask (in milliliters)
        If there are multiple labels, the predicted volume is the sum of the
        predicted volumes for each label.
    volume_difference : float
        The difference between the true and predicted volumes (in 
        milliliters). Positive values show the predicted volume is larger 
        than the true volume, negative values show the true volume is larger
        than the predicted volume.
        If there are multiple labels, the volume difference is the sum of the 
        absolute volume differences for each label, rather than the overall 
        volume difference. This is to prevent positive and negative differences
        cancelling each other out.
    """

[docs]
    def __init__(self, prediction, truth, zoom, percentile=95, symmetric=True,
                 many_labels=False):
        """
        Initialises the SegmentationMetrics class instance.

        Parameters
        ----------
        prediction : np.ndarray
            An array of bools or ints representing the predicted mask.
        truth : np.ndarray
            An array of bools or ints representing the ground truth mask.
        zoom : tuple
            The length of each voxel dimension in millimeters.
        percentile : int, default 95
            The percentile of surface distances to define as the Hausdorff
            distance.
        symmetric : bool, default True
            If true, the symmetric mean surface distance is calculated i.e.
            the returned mean surface distance is the average of the means
            surface distance from surface A to surface B and the mean
            surface distance from surface B to surface A. If false, a tuple
            is returned with both mean surface distances.
        many_labels : bool, default False
            If false, an error is raised if there are more than 10 labels in
            either the prediction or true mask. This is to prevent accidentally 
            running the metrics on a non-binary mask (e.g. the image that was 
            segmented). If true, metrics are calculated and averaged across all
            labels.
        """
        if prediction.dtype == 'float' or truth.dtype == 'float':
            if prediction.max() <= 1 and truth.max() <= 1:
                prediction = (prediction > 0.5).astype(int)
                truth = (truth > 0.5).astype(int)
        self.prediction = prediction
        self.truth = truth
        self.zoom = zoom
        self.labels = np.unique(np.concatenate((prediction[prediction > 0], truth[truth > 0])))
        if not many_labels:
            if self.labels.size > 10:
                raise ValueError('More than 10 labels found in prediction '
                                 'and/or truth. If you want to calculate '
                                 'metrics for more than 10 labels, set '
                                 'many_labels=True.')

        if self.labels.size == 0:
            self.dice = np.nan
            self.jaccard = np.nan
            self.sensitivity = np.nan
            self.specificity = np.nan
            self.precision = np.nan
            self.accuracy = np.nan
            self.mean_surface_distance = np.nan
            self.hausdorff_distance = np.nan
            self.true_volume = np.nan
            self.predicted_volume = np.nan
            self.volume_difference = np.nan
        else:
            dice_vals = [self._dice(label) for label in self.labels]
            jaccard_vals = [self._jaccard(label) for label in self.labels]
            sensitivity_vals = [self._sensitivity(label) for label in self.labels]
            specificity_vals = [self._specificity(label) for label in self.labels]
            precision_vals = [self._precision(label) for label in self.labels]
            accuracy_vals = [self._accuracy(label) for label in self.labels]
            mean_surface_distance_vals = []
            hausdorff_distance_vals = []
            for label in self.labels:
                if (np.sum(self.truth == label) == 0) or (np.sum(self.prediction == label) == 0):
                    # If there are no voxels of this label in either the truth or prediction, set surface distances to nan as they are not defined
                    if symmetric:
                        mean_surface_distance_vals.append(np.nan)
                    else:
                        mean_surface_distance_vals.append((np.nan, np.nan))

                    hausdorff_distance_vals.append(np.nan)
                else:
                    self._surface_dist = sd.compute_surface_distances(self.prediction == label,
                                                                    self.truth == label,
                                                                    self.zoom)
                    mean_surface_distance_vals.append(self._av_dist(symmetric))
                    hausdorff_distance_vals.append(self._hausdorff_dist(percentile))
            
            self.dice = np.mean(dice_vals)
            self.jaccard = np.mean(jaccard_vals)
            self.sensitivity = np.mean(sensitivity_vals)
            self.specificity = np.mean(specificity_vals)
            self.precision = np.mean(precision_vals)
            self.accuracy = np.mean(accuracy_vals)
            self.mean_surface_distance = np.mean(mean_surface_distance_vals, axis=0)
            self.hausdorff_distance = np.mean(hausdorff_distance_vals)
            self.true_volume = np.sum([self._true_volume(label) for label in self.labels])
            self.predicted_volume = np.sum([self._predicted_volume(label) for label in self.labels])
            if self.labels.size == 1:
                self.volume_difference = self._volume_difference(self.labels[0])
            else:
                self.volume_difference = np.sum(np.abs([self._volume_difference(label) for label in self.labels]))



[docs]
    def get_dict(self):
        """
        Generate a dictionary of segmentation accuracy metrics.

        Returns
        -------
        metrics : dict
            Segmentation accuracy.
        """
        return {'dice': self.dice,
                'jaccard': self.jaccard,
                'sensitivity': self.sensitivity,
                'specificity': self.specificity,
                'precision': self.precision,
                'accuracy': self.accuracy,
                'mean_surface_distance': self.mean_surface_distance,
                'hausdorff_distance': self.hausdorff_distance,
                'volume_difference': self.volume_difference,
                'true_volume': self.true_volume,
                'predicted_volume': self.predicted_volume}



[docs]
    def get_df(self):
        """
        Generate a Pandas DataFrame containing the segmentation accuracy
        metrics.

        Returns
        -------
        df : pd.DataFrame
            DataFrame with metric in one column and score in the next column.
        """
        df = pd.DataFrame.from_dict(self.get_dict(),
                                    orient='index',
                                    columns=['Score'])
        df['Metric'] = ['Dice', 'Jaccard', 'Sensitivity', 'Specificity',
                        'Precision', 'Accuracy', 'Mean Surface Distance',
                        'Hausdorff Distance', 'Volume Difference',
                        'True Volume', 'Predicted Volume']
        df = df[['Metric', 'Score']]
        return df


    def _dice(self, label=1):
        tp, fp, fn, tn = self._get_confusion_counts(label)
        if tp + fp + fn == 0:
            dice = 0.0
        else:
            dice = (2 * tp) / (2 * tp + fp + fn)
        return dice

    def _jaccard(self, label=1):
        tp, fp, fn, tn = self._get_confusion_counts(label)
        if tp + fp + fn == 0:
            jaccard = 0.0
        else:
            jaccard = tp / (tp + fp + fn)
        return jaccard

    def _sensitivity(self, label=1):
        tp, fp, fn, tn = self._get_confusion_counts(label)
        if tp + fn == 0:
            sensitivity = 0.0
        else:
            sensitivity = tp / (tp + fn)
        return sensitivity

    def _specificity(self, label=1):
        tp, fp, fn, tn = self._get_confusion_counts(label)
        if tn + fp == 0:
            specificity = 0.0
        else:
            specificity = tn / (tn + fp)
        return specificity

    def _precision(self, label=1):
        tp, fp, fn, tn = self._get_confusion_counts(label)
        if tp + fp == 0:
            precision = 0.0
        else:
            precision = tp / (tp + fp)
        return precision

    def _accuracy(self, label=1):
        tp, fp, fn, tn = self._get_confusion_counts(label)
        if tp + fp + fn + tn == 0:
            accuracy = 0.0
        else:
            accuracy = (tp + tn) / (tp + fp + fn + tn)
        return accuracy

    def _av_dist(self, symmetric=True):
        av_surf_dist = sd.compute_average_surface_distance(self._surface_dist)
        if symmetric:
            msd = np.mean(av_surf_dist)
        else:
            msd = av_surf_dist
        return msd

    def _hausdorff_dist(self, percentile=95):
        return sd.compute_robust_hausdorff(self._surface_dist, percentile)

    def _true_volume(self, label=1):
        return np.sum(self.truth == label) * np.prod(self.zoom) / 1000

    def _predicted_volume(self, label=1):
        return np.sum(self.prediction == label) * np.prod(self.zoom) / 1000

    def _volume_difference(self, label=1):
        return self._predicted_volume(label) - self._true_volume(label)
    
    def _get_confusion_counts(self, label):
        tp = np.sum((self.prediction == label) & (self.truth == label))
        fp = np.sum((self.prediction == label) & (self.truth != label))
        fn = np.sum((self.prediction != label) & (self.truth == label))
        tn = np.sum((self.prediction != label) & (self.truth != label))
        return tp, fp, fn, tn