Source code for qsprpred.models.assessment.metrics.classification

import numpy as np
import sklearn
from rdkit.ML.Scoring.Scoring import CalcBEDROC, CalcEnrichment, CalcRIE

from qsprpred.models.assessment.metrics.base import Metric


# ------------------------------------------
#   Classification Metrics (Probabilistic)
# ------------------------------------------

[docs]class CalibrationError(Metric): """Compute the calibration error of a classifier. ECE is defined as the expected difference between the predicted probability and the observed frequency in each bin. The lower the ECE, the more calibrated the classifier is. Referece: Guo et al. (2017) On Calibration of Modern Neural Networks. https://arxiv.org/abs/1706.04599 Attributes: name (str): Name of the scoring function (calibration_error). """ def __init__(self, n_bins: int = 10, norm: str = "L1"): """Initialize the calibration error scorer. If `norm` is 'L1', the expected calibration error is returned (ECE). If `norm` is 'L2', the root-mean-square calibration error is returned (RMSCE). If `norm` is 'infinity', the maximum calibration error is returned (MCE). Args: n_bins (int): Number of bins to use for calibration. A bigger bin number requires more data. Defaults to 10. norm (str): The norm to use for the calibration error. Can be 'L1' or 'L2' or 'infinity'. Defaults to 'L1'. """ self.n_bins = n_bins self.norm = norm def __call__( self, y_true: np.array, y_pred: list[np.ndarray], ) -> float: """Compute the calibration error of a classifier. Referece: Guo et al. (2017) On Calibration of Modern Neural Networks. https://arxiv.org/abs/1706.04599 Args: y_true (np.array): True class labels. 1d array. y_pred (list[np.array]): Predicted class probabilities. List of arrays of shape (n_samples, n_classes) of length n_tasks. Note. Multi-task predictions are not supported. Returns: float: The calibration error. """ # Check if y_pred is a list of arrays of length 1 if not isinstance(y_pred, list): raise ValueError("y_pred must be a list of 2D arrays.") if len(y_pred) > 1: raise ValueError("Multi-task predictions are not supported.") # TODO: support multi-task predictions # Convert y_pred from list to a 2D array y_pred = y_pred[0] assert len(y_true) >= self.n_bins, "Number of samples must be at least n_bins." # Get the highest probability and the predicted class y_pred_max = np.max(y_pred, axis=1) y_pred_class = np.argmax(y_pred, axis=1) # Sort data based on the highest probability sorted_indices = np.argsort(y_pred_max) sorted_y_true = y_true[sorted_indices] sorted_y_pred_max = y_pred_max[sorted_indices] sorted_y_pred_class = y_pred_class[sorted_indices] # Bin sorted data binned_y_true = np.array_split(sorted_y_true, self.n_bins) binned_y_pred_max = np.array_split(sorted_y_pred_max, self.n_bins) binned_y_pred_class = np.array_split(sorted_y_pred_class, self.n_bins) # Compute the calibration error by iterating over the bins calibration_error = 0.0 for bin_y_true, bin_y_pred_max, bin_y_pred_class in zip( binned_y_true, binned_y_pred_max, binned_y_pred_class ): # Compute the accuracy and the mean probability for the bin mean_prob = np.mean(bin_y_pred_max) accuracy = np.mean(bin_y_true == bin_y_pred_class) # Compute the calibration error for the bin based on the norm if self.norm == "L1": calibration_error += ( np.abs(mean_prob - accuracy) * len(bin_y_true) / len(y_true) ) elif self.norm == "L2": calibration_error += ( np.square(mean_prob - accuracy) ** 2 * len(bin_y_true) / len( y_true) ) elif self.norm == "infinity": calibration_error = max(calibration_error, np.abs(mean_prob - accuracy)) else: raise ValueError(f"Unknown norm {self.norm}") if self.norm == "L2": calibration_error = np.sqrt(calibration_error) return calibration_error def __str__(self) -> str: """Return the name of the scorer.""" return "calibration_error"
[docs]class BEDROC(Metric): """Calculate the Boltzmann-enhanced discrimination of ROC (BEDROC). Reference: Truchon and Bayly, J. Chem. Inf. Model. 2007 47 (2), 488-508. DOI: 10.1021/ci600426e Attributes: name (str): Name of the scoring function (bedroc). """ def __init__(self, alpha: float = 20): """Initialize the BEDROC scorer. Args: alpha (float): Weighting parameter (default: 20) """ self.alpha = alpha def __call__(self, y_true: np.array, y_pred: list[np.array]) -> float: """Calculate the BEDROC score. Args: y_true (np.array): Ground truth (correct) labels. 1d array. y_pred (list[np.array]): Target probability scores. List of arrays of shape (n_samples, n_classes) of length n_tasks. Note. Multi-task predictions are not supported. Returns: float: The BEDROC score. """ if isinstance(y_pred, list): y_pred = y_pred[0] return CalcBEDROC( [[y] for _, y in sorted(zip(y_pred[1], y_true), reverse=True)], col=0, alpha=self.alpha) def __str__(self) -> str: """Return the name of the scorer.""" return "bedroc"
[docs]class EnrichmentFactor(Metric): """Calculate the enrichment factor. Attributes: name (str): Name of the scoring function (enrichment_factor). """ def __init__(self, chi: float = 0.05): """Initialize the enrichment factor scorer. Args: chi (float): Weighting parameter (default: 5%) """ self.chi = chi def __call__(self, y_true: np.array, y_pred: list[np.array]) -> float: """Calculate the enrichment factor. Args: y_true (np.array): Ground truth (correct) labels. 1d array. y_pred (list[np.array]): Target probability scores. List of arrays of shape (n_samples, n_classes) of length n_tasks. Note. Multi-task predictions are not supported. Returns: float: The enrichment factor. """ if isinstance(y_pred, list): y_pred = y_pred[0] return \ CalcEnrichment( [[y] for _, y in sorted(zip(y_pred[1], y_true), reverse=True)], col=0, fractions=[self.chi])[0] def __str__(self) -> str: """Return the name of the scorer.""" return "enrichment_factor"
[docs]class RobustInitialEnhancement(Metric): """Calculate the robust initial enhancement. Reference: Sheridan et al., J. Chem. Inf. Model. 2001 41 (5), 1395-1406. DOI: 10.1021/ci0100144 Attributes: name (str): Name of the scoring function (robust_initial_enhancement). """ def __init__(self, alpha: float = 100): """Initialize the robust initial enhancement scorer. Args: alpha (float): Weighting parameter (default: 100) """ self.alpha = alpha def __call__(self, y_true: np.array, y_pred: list[np.array]) -> float: """Calculate the robust initial enhancement. Args: y_true (np.array): Ground truth (correct) labels. 1d array. y_pred (list[np.array]): Target probability scores. List of arrays of shape (n_samples, n_classes) of length n_tasks. Note. Multi-task predictions are not supported. Returns: float: The robust initial enhancement. """ if isinstance(y_pred, list): y_pred = y_pred[0] return CalcRIE([[y] for _, y in sorted(zip(y_pred[1], y_true), reverse=True)], col=0, alpha=self.alpha) def __str__(self) -> str: """Return the name of the scorer.""" return "robust_initial_enhancement"
# ------------------------------------------ # Classification Metrics (Discrete) # ------------------------------------------
[docs]class Prevalence(Metric): """Calculate the prevalence. Attributes: name (str): Name of the scoring function (prevalence). """ def __call__(self, y_true: np.array, y_pred: np.array) -> float: """Calculate the prevalence. Args: y_true (np.array): Ground truth (correct) labels. 1d array. y_pred (np.array): Predicted labels. 2D array (n_samples, 1) Returns: float: The prevalence. """ return sum(y_true) / len(y_true) def __str__(self) -> str: """Return the name of the scorer.""" return "prevalence"
[docs]class Sensitivity(Metric): """Calculate sensitivity (true positive rate). Attributes: name (str): Name of the scoring function (sensitivity). """ def __call__(self, y_true: np.array, y_pred: np.array) -> float: """Calculate the sensitivity (recall). Args: y_true (np.array): Ground truth (correct) labels. 1d array. y_pred (np.array): Predicted labels. 2D array (n_samples, 1) Returns: float: The sensitivity. """ _, _, fn, tp = sklearn.metrics.confusion_matrix(y_true, y_pred).ravel() return tp / (tp + fn) def __str__(self) -> str: """Return the name of the scorer.""" return "sensitivity"
[docs]class Specificity(Metric): """Calculate specificity (true negative rate). Attributes: name (str): Name of the scoring function (specificity). """ def __call__(self, y_true: np.array, y_pred: np.array) -> float: """Calculate the specificity (selectivity). Args: y_true (np.array): Ground truth (correct) labels. 1d array. y_pred (np.array): Predicted labels. 2D array (n_samples, 1) Returns: float: The specificity. """ tn, fp, _, _ = sklearn.metrics.confusion_matrix(y_true, y_pred).ravel() return tn / (tn + fp) def __str__(self) -> str: """Return the name of the scorer.""" return "specificity"
[docs]class PositivePredictivity(Metric): """Calculate the Positive predictivity. Attributes: name (str): Name of the scoring function (Positive_predictivity). """ def __call__(self, y_true: np.array, y_pred: np.array) -> float: """Calculate the Positive predictivity. Args: y_true (np.array): Ground truth (correct) labels. 1d array. y_pred (np.array): Predicted labels. 2D array (n_samples, 1) Returns: float: The Positive predictivity. """ _, fp, _, tp = sklearn.metrics.confusion_matrix(y_true, y_pred).ravel() return tp / (tp + fp) def __str__(self) -> str: """Return the name of the scorer.""" return "Positive_predictivity"
[docs]class NegativePredictivity(Metric): """Calculate the negative predictivity. Attributes: name (str): Name of the scoring function (negative_predictivity). """ def __call__(self, y_true: np.array, y_pred: np.array) -> float: """Calculate the negative predictivity. Args: y_true (np.array): Ground truth (correct) labels. 1d array. y_pred (np.array): Predicted labels. 2D array (n_samples, 1) Returns: float: The negative predictivity. """ tn, _, fn, _ = sklearn.metrics.confusion_matrix(y_true, y_pred).ravel() return tn / (tn + fn) def __str__(self) -> str: """Return the name of the scorer.""" return "negative_predictivity"
[docs]class CohenKappa(Metric): """Calculate the Cohen's kappa coefficient. Attributes: name (str): Name of the scoring function (cohen_kappa). """ def __call__(self, y_true: np.array, y_pred: np.array) -> float: """Calculate the Cohen kappa coefficient. Args: y_true (np.array): Ground truth (correct) labels. 1d array. y_pred (np.array): Predicted labels. 2D array (n_samples, 1) Returns: float: The Cohen kappa coefficient. """ tn, fp, fn, tp = sklearn.metrics.confusion_matrix(y_true, y_pred).ravel() return (2 * (tp * tn - fp * fn)) / ( (tp + fp) * (tn + fp) + (tp + fn) * (tn + fn)) def __str__(self) -> str: """Return the name of the scorer.""" return "cohen_kappa"
[docs]class BalancedPositivePredictivity(Metric): """Calculate the balanced positive predictivity. Guesné, S.J.J., Hanser, T., Werner, S. et al. Mind your prevalence!. J Cheminform 16, 43 (2024). https://doi.org/10.1186/s13321-024-00837-w Attributes: name (str): Name of the scoring function (balanced_positive_predictivity). """ def __call__(self, y_true: np.array, y_pred: np.array) -> float: """Calculate the balanced positive predictivity. Args: y_true (np.array): Ground truth (correct) labels. 1d array. y_pred (np.array): Predicted labels. 2D array (n_samples, 1) Returns: float: The balanced positive predictivity. """ _, sen, spe = derived_confusion_matrix(y_true, y_pred) return sen / (1 + sen - spe) def __str__(self) -> str: """Return the name of the scorer.""" return "balanced_positive_predictivity"
[docs]class BalancedNegativePredictivity(Metric): """Calculate the balanced negative predictivity. Guesné, S.J.J., Hanser, T., Werner, S. et al. Mind your prevalence!. J Cheminform 16, 43 (2024). https://doi.org/10.1186/s13321-024-00837-w Attributes: name (str): Name of the scoring function (balanced_negative_predictivity). """ def __call__(self, y_true: np.array, y_pred: np.array) -> float: """Calculate the balanced negative predictivity. Args: y_true (np.array): Ground truth (correct) labels. 1d array. y_pred (np.array): Predicted labels. 2D array (n_samples, 1) Returns: float: The balanced negative predictivity. """ _, sen, spe = derived_confusion_matrix(y_true, y_pred) return spe / (1 + sen + spe) def __str__(self) -> str: """Return the name of the scorer.""" return "balanced_negative_predictivity"
[docs]class BalancedMatthewsCorrcoeff(Metric): """Calculate the balanced Matthews correlation coefficient. Guesné, S.J.J., Hanser, T., Werner, S. et al. Mind your prevalence!. J Cheminform 16, 43 (2024). https://doi.org/10.1186/s13321-024-00837-w Attributes: name (str): Name of the scoring function (balanced_matthews_corrcoeff). """ def __call__(self, y_true: np.array, y_pred: np.array) -> float: """Calculate the balanced Matthews correlation coefficient. Args: y_true (np.array): Ground truth (correct) labels. 1d array. y_pred (np.array): Predicted labels. 2D array (n_samples, 1) Returns: float: The correlation coefficient. """ _, sen, spe = derived_confusion_matrix(y_true, y_pred) return (sen + spe - 1) / np.sqrt(1 - (sen - spe) ** 2) def __str__(self) -> str: """Return the name of the scorer.""" return "balanced_matthews_corrcoeff"
[docs]class BalancedCohenKappa(Metric): """Calculate the balanced Cohen kappa coefficient. Guesné, S.J.J., Hanser, T., Werner, S. et al. Mind your prevalence!. J Cheminform 16, 43 (2024). https://doi.org/10.1186/s13321-024-00837-w Attributes: name (str): Name of the scoring function (balanced_cohen_kappa). """ def __call__(self, y_true: np.array, y_pred: np.array) -> float: """Calculate the balanced Cohen kappa coefficient. Args: y_true (np.array): Ground truth (correct) labels. 1d array. y_pred (np.array): Predicted labels. 2D array (n_samples, 1) Returns: float: The balanced Cohen kappa coefficient. """ _, sen, spe = derived_confusion_matrix(y_true, y_pred) return sen + spe - 1 def __str__(self) -> str: """Return the name of the scorer.""" return "balanced_cohen_kappa"
[docs]def derived_confusion_matrix(y_true: np.array, y_pred: np.array) -> tuple[int, int, int]: """Calculate the derived confusion matrix. Args: y_true (np.array): Ground truth (correct) labels. 1d array. y_pred (np.array): Predicted labels. 2D array (n_samples, 1) Returns: tuple[int, int, int]: The derived confusion matrix. Prevalence, sensitivity and specificity. """ tn, fp, fn, tp = sklearn.metrics.confusion_matrix(y_true, y_pred).ravel() pre = sum(y_true) / len(y_true) sen = tp / (tp + fn) spe = tn / (tn + fp) return pre, sen, spe