Source code for segmentationmetrics.metrics

import numpy as np
import pandas as pd

from . import surface_distance as sd


[docs] class SegmentationMetrics: """ Attributes ---------- dice : float Dice similarity score. The dice score is a measure of the overlap between the predicted and true masks. It is calculated as 2*TP / (2*TP + FP + FN). If there are multiple labels, the dice score is the mean of the dice scores for each label. jaccard : float Jaccard similarity score. The jaccard score is a measure of the overlap between the predicted and true masks. It is calculated as TP / (TP + FP + FN). If there are multiple labels, the jaccard score is the mean of the jaccard scores for each label. sensitivity : float Sensitivity/recall/true positive rate. The sensitivity is the proportion of true positives that are correctly identified. It is calculated as TP / (TP + FN). If there are multiple labels, the sensitivity is the mean of the sensitivities for each label. specificity : float Specificity/selectivity/true negative rate. The specificity is the proportion of true negatives that are correctly identified. It is calculated as TN / (TN + FP). If there are multiple labels, the specificity is the mean of the specificities for each label. precision : float Precision/positive predictive value. The precision is the proportion of predicted positives that are true positives. It is calculated as TP / (TP + FP). If there are multiple labels, the precision is the mean of the precisions for each label. accuracy : float Accuracy. The accuracy is the proportion of true results (both true positives and true negatives) among the total number of cases examined. It is calculated as (TP + TN) / (TP + TN + FP + FN). If there are multiple labels, the accuracy is the mean of the accuracies for each label. mean_surface_distance : float or tuple The mean surface distance, defaults to symmetric. The mean surface distance is the average distance between the surfaces of the predicted and true masks. If symmetric is True, the mean surface distance is the average of the mean surface distance from surface A to surface B and the mean surface distance from surface B to surface A. If symmetric is False, a tuple is returned with both mean surface distances. If there are multiple labels, the mean surface distance is the mean of the mean surface distances for each label. hausdorff_distance : float The robust Hausdorff distance, defaults to 95th percentile. The Hausdorff distance is the maximum distance of a set to the nearest point in the other set. The robust Hausdorff distance is the distance at a specified percentile of the distances from points on one surface to the other surface. If there are multiple labels, the Hausdorff distance is the mean of the Hausdorff distances for each label. true_volume : float The volume of the true mask (in milliliters). If there are multiple labels, the true volume is the sum of the true volumes for each label. predicted_volume : float The volume of the predicted mask (in milliliters) If there are multiple labels, the predicted volume is the sum of the predicted volumes for each label. volume_difference : float The difference between the true and predicted volumes (in milliliters). Positive values show the predicted volume is larger than the true volume, negative values show the true volume is larger than the predicted volume. If there are multiple labels, the volume difference is the sum of the absolute volume differences for each label, rather than the overall volume difference. This is to prevent positive and negative differences cancelling each other out. """
[docs] def __init__(self, prediction, truth, zoom, percentile=95, symmetric=True, many_labels=False): """ Initialises the SegmentationMetrics class instance. Parameters ---------- prediction : np.ndarray An array of bools or ints representing the predicted mask. truth : np.ndarray An array of bools or ints representing the ground truth mask. zoom : tuple The length of each voxel dimension in millimeters. percentile : int, default 95 The percentile of surface distances to define as the Hausdorff distance. symmetric : bool, default True If true, the symmetric mean surface distance is calculated i.e. the returned mean surface distance is the average of the means surface distance from surface A to surface B and the mean surface distance from surface B to surface A. If false, a tuple is returned with both mean surface distances. many_labels : bool, default False If false, an error is raised if there are more than 10 labels in either the prediction or true mask. This is to prevent accidentally running the metrics on a non-binary mask (e.g. the image that was segmented). If true, metrics are calculated and averaged across all labels. """ if prediction.dtype == 'float' or truth.dtype == 'float': if prediction.max() <= 1 and truth.max() <= 1: prediction = (prediction > 0.5).astype(int) truth = (truth > 0.5).astype(int) self.prediction = prediction self.truth = truth self.zoom = zoom self.labels = np.unique(np.concatenate((prediction[prediction > 0], truth[truth > 0]))) if not many_labels: if self.labels.size > 10: raise ValueError('More than 10 labels found in prediction ' 'and/or truth. If you want to calculate ' 'metrics for more than 10 labels, set ' 'many_labels=True.') if self.labels.size == 0: self.dice = np.nan self.jaccard = np.nan self.sensitivity = np.nan self.specificity = np.nan self.precision = np.nan self.accuracy = np.nan self.mean_surface_distance = np.nan self.hausdorff_distance = np.nan self.true_volume = np.nan self.predicted_volume = np.nan self.volume_difference = np.nan else: dice_vals = [self._dice(label) for label in self.labels] jaccard_vals = [self._jaccard(label) for label in self.labels] sensitivity_vals = [self._sensitivity(label) for label in self.labels] specificity_vals = [self._specificity(label) for label in self.labels] precision_vals = [self._precision(label) for label in self.labels] accuracy_vals = [self._accuracy(label) for label in self.labels] mean_surface_distance_vals = [] hausdorff_distance_vals = [] for label in self.labels: if (np.sum(self.truth == label) == 0) or (np.sum(self.prediction == label) == 0): # If there are no voxels of this label in either the truth or prediction, set surface distances to nan as they are not defined if symmetric: mean_surface_distance_vals.append(np.nan) else: mean_surface_distance_vals.append((np.nan, np.nan)) hausdorff_distance_vals.append(np.nan) else: self._surface_dist = sd.compute_surface_distances(self.prediction == label, self.truth == label, self.zoom) mean_surface_distance_vals.append(self._av_dist(symmetric)) hausdorff_distance_vals.append(self._hausdorff_dist(percentile)) self.dice = np.mean(dice_vals) self.jaccard = np.mean(jaccard_vals) self.sensitivity = np.mean(sensitivity_vals) self.specificity = np.mean(specificity_vals) self.precision = np.mean(precision_vals) self.accuracy = np.mean(accuracy_vals) self.mean_surface_distance = np.mean(mean_surface_distance_vals, axis=0) self.hausdorff_distance = np.mean(hausdorff_distance_vals) self.true_volume = np.sum([self._true_volume(label) for label in self.labels]) self.predicted_volume = np.sum([self._predicted_volume(label) for label in self.labels]) if self.labels.size == 1: self.volume_difference = self._volume_difference(self.labels[0]) else: self.volume_difference = np.sum(np.abs([self._volume_difference(label) for label in self.labels]))
[docs] def get_dict(self): """ Generate a dictionary of segmentation accuracy metrics. Returns ------- metrics : dict Segmentation accuracy. """ return {'dice': self.dice, 'jaccard': self.jaccard, 'sensitivity': self.sensitivity, 'specificity': self.specificity, 'precision': self.precision, 'accuracy': self.accuracy, 'mean_surface_distance': self.mean_surface_distance, 'hausdorff_distance': self.hausdorff_distance, 'volume_difference': self.volume_difference, 'true_volume': self.true_volume, 'predicted_volume': self.predicted_volume}
[docs] def get_df(self): """ Generate a Pandas DataFrame containing the segmentation accuracy metrics. Returns ------- df : pd.DataFrame DataFrame with metric in one column and score in the next column. """ df = pd.DataFrame.from_dict(self.get_dict(), orient='index', columns=['Score']) df['Metric'] = ['Dice', 'Jaccard', 'Sensitivity', 'Specificity', 'Precision', 'Accuracy', 'Mean Surface Distance', 'Hausdorff Distance', 'Volume Difference', 'True Volume', 'Predicted Volume'] df = df[['Metric', 'Score']] return df
def _dice(self, label=1): tp, fp, fn, tn = self._get_confusion_counts(label) if tp + fp + fn == 0: dice = 0.0 else: dice = (2 * tp) / (2 * tp + fp + fn) return dice def _jaccard(self, label=1): tp, fp, fn, tn = self._get_confusion_counts(label) if tp + fp + fn == 0: jaccard = 0.0 else: jaccard = tp / (tp + fp + fn) return jaccard def _sensitivity(self, label=1): tp, fp, fn, tn = self._get_confusion_counts(label) if tp + fn == 0: sensitivity = 0.0 else: sensitivity = tp / (tp + fn) return sensitivity def _specificity(self, label=1): tp, fp, fn, tn = self._get_confusion_counts(label) if tn + fp == 0: specificity = 0.0 else: specificity = tn / (tn + fp) return specificity def _precision(self, label=1): tp, fp, fn, tn = self._get_confusion_counts(label) if tp + fp == 0: precision = 0.0 else: precision = tp / (tp + fp) return precision def _accuracy(self, label=1): tp, fp, fn, tn = self._get_confusion_counts(label) if tp + fp + fn + tn == 0: accuracy = 0.0 else: accuracy = (tp + tn) / (tp + fp + fn + tn) return accuracy def _av_dist(self, symmetric=True): av_surf_dist = sd.compute_average_surface_distance(self._surface_dist) if symmetric: msd = np.mean(av_surf_dist) else: msd = av_surf_dist return msd def _hausdorff_dist(self, percentile=95): return sd.compute_robust_hausdorff(self._surface_dist, percentile) def _true_volume(self, label=1): return np.sum(self.truth == label) * np.prod(self.zoom) / 1000 def _predicted_volume(self, label=1): return np.sum(self.prediction == label) * np.prod(self.zoom) / 1000 def _volume_difference(self, label=1): return self._predicted_volume(label) - self._true_volume(label) def _get_confusion_counts(self, label): tp = np.sum((self.prediction == label) & (self.truth == label)) fp = np.sum((self.prediction == label) & (self.truth != label)) fn = np.sum((self.prediction != label) & (self.truth == label)) tn = np.sum((self.prediction != label) & (self.truth != label)) return tp, fp, fn, tn