Source code for qsprpred.models.assessment.metrics.classification

import numpy as np
import sklearn
from rdkit.ML.Scoring.Scoring import CalcBEDROC, CalcEnrichment, CalcRIE

from qsprpred.models.assessment.metrics.base import Metric


# ------------------------------------------
#   Classification Metrics (Probabilistic)
# ------------------------------------------

[docs]class CalibrationError(Metric):
    """Compute the calibration error of a classifier.

    ECE is defined as the expected difference between the predicted probability
    and the observed frequency in each bin. The lower the ECE, the more
    calibrated the classifier is.

    Referece: Guo et al. (2017) On Calibration of Modern Neural Networks.
    https://arxiv.org/abs/1706.04599

    Attributes:
        name (str): Name of the scoring function (calibration_error).
    """

    def __init__(self, n_bins: int = 10, norm: str = "L1"):
        """Initialize the calibration error scorer.

        If `norm` is 'L1', the expected calibration error is returned (ECE).
        If `norm` is 'L2', the root-mean-square calibration error is returned (RMSCE).
        If `norm` is 'infinity', the maximum calibration error is returned (MCE).

        Args:
            n_bins (int): Number of bins to use for calibration.
                A bigger bin number requires more data. Defaults to 10.
            norm (str): The norm to use for the calibration error.
                Can be 'L1' or 'L2' or 'infinity'. Defaults to 'L1'.
        """
        self.n_bins = n_bins
        self.norm = norm

    def __call__(
            self,
            y_true: np.array,
            y_pred: list[np.ndarray],
    ) -> float:
        """Compute the calibration error of a classifier.

        Referece: Guo et al. (2017) On Calibration of Modern Neural Networks.
        https://arxiv.org/abs/1706.04599

        Args:
            y_true (np.array): True class labels. 1d array.
            y_pred (list[np.array]): Predicted class probabilities.
                List of arrays of shape (n_samples, n_classes) of length n_tasks.
                Note. Multi-task predictions are not supported.

        Returns:
            float: The calibration error.
        """
        # Check if y_pred is a list of arrays of length 1
        if not isinstance(y_pred, list):
            raise ValueError("y_pred must be a list of 2D arrays.")
        if len(y_pred) > 1:
            raise ValueError("Multi-task predictions are not supported.")

        # TODO: support multi-task predictions
        # Convert y_pred from list to a 2D array
        y_pred = y_pred[0]

        assert len(y_true) >= self.n_bins, "Number of samples must be at least n_bins."

        # Get the highest probability and the predicted class
        y_pred_max = np.max(y_pred, axis=1)
        y_pred_class = np.argmax(y_pred, axis=1)
        # Sort data based on the highest probability
        sorted_indices = np.argsort(y_pred_max)
        sorted_y_true = y_true[sorted_indices]
        sorted_y_pred_max = y_pred_max[sorted_indices]
        sorted_y_pred_class = y_pred_class[sorted_indices]
        # Bin sorted data
        binned_y_true = np.array_split(sorted_y_true, self.n_bins)
        binned_y_pred_max = np.array_split(sorted_y_pred_max, self.n_bins)
        binned_y_pred_class = np.array_split(sorted_y_pred_class, self.n_bins)
        # Compute the calibration error by iterating over the bins
        calibration_error = 0.0
        for bin_y_true, bin_y_pred_max, bin_y_pred_class in zip(
                binned_y_true, binned_y_pred_max, binned_y_pred_class
        ):
            # Compute the accuracy and the mean probability for the bin
            mean_prob = np.mean(bin_y_pred_max)
            accuracy = np.mean(bin_y_true == bin_y_pred_class)
            # Compute the calibration error for the bin based on the norm
            if self.norm == "L1":
                calibration_error += (
                        np.abs(mean_prob - accuracy) * len(bin_y_true) / len(y_true)
                )
            elif self.norm == "L2":
                calibration_error += (
                        np.square(mean_prob - accuracy) ** 2 * len(bin_y_true) / len(
                    y_true)
                )
            elif self.norm == "infinity":
                calibration_error = max(calibration_error, np.abs(mean_prob - accuracy))
            else:
                raise ValueError(f"Unknown norm {self.norm}")
        if self.norm == "L2":
            calibration_error = np.sqrt(calibration_error)

        return calibration_error

    def __str__(self) -> str:
        """Return the name of the scorer."""
        return "calibration_error"


[docs]class BEDROC(Metric):
    """Calculate the Boltzmann-enhanced discrimination of ROC (BEDROC).

    Reference: Truchon and Bayly, J. Chem. Inf. Model. 2007 47 (2), 488-508. DOI: 10.1021/ci600426e

    Attributes:
        name (str): Name of the scoring function (bedroc).
    """

    def __init__(self, alpha: float = 20):
        """Initialize the BEDROC scorer.

        Args:
            alpha (float): Weighting parameter (default: 20)
        """
        self.alpha = alpha

    def __call__(self, y_true: np.array, y_pred: list[np.array]) -> float:
        """Calculate the BEDROC score.

        Args:
            y_true (np.array): Ground truth (correct) labels. 1d array.
            y_pred (list[np.array]): Target probability scores.
                List of arrays of shape (n_samples, n_classes) of length n_tasks.
                Note. Multi-task predictions are not supported.

        Returns:
            float: The BEDROC score.
        """
        if isinstance(y_pred, list):
            y_pred = y_pred[0]
        return CalcBEDROC(
            [[y] for _, y in sorted(zip(y_pred[1], y_true), reverse=True)],
            col=0,
            alpha=self.alpha)

    def __str__(self) -> str:
        """Return the name of the scorer."""
        return "bedroc"


[docs]class EnrichmentFactor(Metric):
    """Calculate the enrichment factor.

    Attributes:
        name (str): Name of the scoring function (enrichment_factor).
    """

    def __init__(self, chi: float = 0.05):
        """Initialize the enrichment factor scorer.

        Args:
            chi (float): Weighting parameter (default: 5%)
        """
        self.chi = chi

    def __call__(self, y_true: np.array, y_pred: list[np.array]) -> float:
        """Calculate the enrichment factor.

        Args:
            y_true (np.array): Ground truth (correct) labels. 1d array.
            y_pred (list[np.array]): Target probability scores.
                List of arrays of shape (n_samples, n_classes) of length n_tasks.
                Note. Multi-task predictions are not supported.

        Returns:
            float: The enrichment factor.
        """
        if isinstance(y_pred, list):
            y_pred = y_pred[0]
        return \
            CalcEnrichment(
                [[y] for _, y in sorted(zip(y_pred[1], y_true), reverse=True)],
                col=0,
                fractions=[self.chi])[0]

    def __str__(self) -> str:
        """Return the name of the scorer."""
        return "enrichment_factor"


[docs]class RobustInitialEnhancement(Metric):
    """Calculate the robust initial enhancement.

    Reference: Sheridan et al., J. Chem. Inf. Model. 2001 41 (5), 1395-1406. DOI: 10.1021/ci0100144

    Attributes:
        name (str): Name of the scoring function (robust_initial_enhancement).
    """

    def __init__(self, alpha: float = 100):
        """Initialize the robust initial enhancement scorer.

        Args:
            alpha (float): Weighting parameter (default: 100)
        """
        self.alpha = alpha

    def __call__(self, y_true: np.array, y_pred: list[np.array]) -> float:
        """Calculate the robust initial enhancement.

        Args:
            y_true (np.array): Ground truth (correct) labels. 1d array.
            y_pred (list[np.array]): Target probability scores.
                List of arrays of shape (n_samples, n_classes) of length n_tasks.
                Note. Multi-task predictions are not supported.

        Returns:
            float: The robust initial enhancement.
        """
        if isinstance(y_pred, list):
            y_pred = y_pred[0]
        return CalcRIE([[y] for _, y in sorted(zip(y_pred[1], y_true), reverse=True)],
                       col=0,
                       alpha=self.alpha)

    def __str__(self) -> str:
        """Return the name of the scorer."""
        return "robust_initial_enhancement"


# ------------------------------------------
#   Classification Metrics (Discrete)
# ------------------------------------------

[docs]class Prevalence(Metric):
    """Calculate the prevalence.

    Attributes:
        name (str): Name of the scoring function (prevalence).
    """

    def __call__(self, y_true: np.array, y_pred: np.array) -> float:
        """Calculate the prevalence.

        Args:
            y_true (np.array): Ground truth (correct) labels. 1d array.
            y_pred (np.array): Predicted labels. 2D array (n_samples, 1)

        Returns:
            float: The prevalence.

        """
        return sum(y_true) / len(y_true)

    def __str__(self) -> str:
        """Return the name of the scorer."""
        return "prevalence"


[docs]class Sensitivity(Metric):
    """Calculate sensitivity (true positive rate).

    Attributes:
        name (str): Name of the scoring function (sensitivity).
    """

    def __call__(self, y_true: np.array, y_pred: np.array) -> float:
        """Calculate the sensitivity (recall).

        Args:
            y_true (np.array): Ground truth (correct) labels. 1d array.
            y_pred (np.array): Predicted labels. 2D array (n_samples, 1)

        Returns:
            float: The sensitivity.

        """
        _, _, fn, tp = sklearn.metrics.confusion_matrix(y_true, y_pred).ravel()
        return tp / (tp + fn)

    def __str__(self) -> str:
        """Return the name of the scorer."""
        return "sensitivity"


[docs]class Specificity(Metric):
    """Calculate specificity (true negative rate).

    Attributes:
        name (str): Name of the scoring function (specificity).
    """

    def __call__(self, y_true: np.array, y_pred: np.array) -> float:
        """Calculate the specificity (selectivity).

        Args:
            y_true (np.array): Ground truth (correct) labels. 1d array.
            y_pred (np.array): Predicted labels. 2D array (n_samples, 1)

        Returns:
            float: The specificity.

        """
        tn, fp, _, _ = sklearn.metrics.confusion_matrix(y_true, y_pred).ravel()
        return tn / (tn + fp)

    def __str__(self) -> str:
        """Return the name of the scorer."""
        return "specificity"


[docs]class PositivePredictivity(Metric):
    """Calculate the Positive predictivity.

    Attributes:
        name (str): Name of the scoring function (Positive_predictivity).
    """

    def __call__(self, y_true: np.array, y_pred: np.array) -> float:
        """Calculate the Positive predictivity.

        Args:
            y_true (np.array): Ground truth (correct) labels. 1d array.
            y_pred (np.array): Predicted labels. 2D array (n_samples, 1)

        Returns:
            float: The Positive predictivity.

        """
        _, fp, _, tp = sklearn.metrics.confusion_matrix(y_true, y_pred).ravel()
        return tp / (tp + fp)

    def __str__(self) -> str:
        """Return the name of the scorer."""
        return "Positive_predictivity"


[docs]class NegativePredictivity(Metric):
    """Calculate the negative predictivity.

    Attributes:
        name (str): Name of the scoring function (negative_predictivity).
    """

    def __call__(self, y_true: np.array, y_pred: np.array) -> float:
        """Calculate the negative predictivity.

        Args:
            y_true (np.array): Ground truth (correct) labels. 1d array.
            y_pred (np.array): Predicted labels. 2D array (n_samples, 1)

        Returns:
            float: The negative predictivity.

        """
        tn, _, fn, _ = sklearn.metrics.confusion_matrix(y_true, y_pred).ravel()
        return tn / (tn + fn)

    def __str__(self) -> str:
        """Return the name of the scorer."""
        return "negative_predictivity"


[docs]class CohenKappa(Metric):
    """Calculate the Cohen's kappa coefficient.

    Attributes:
        name (str): Name of the scoring function (cohen_kappa).
    """

    def __call__(self, y_true: np.array, y_pred: np.array) -> float:
        """Calculate the Cohen kappa coefficient.

        Args:
            y_true (np.array): Ground truth (correct) labels. 1d array.
            y_pred (np.array): Predicted labels. 2D array (n_samples, 1)

        Returns:
            float: The Cohen kappa coefficient.

        """
        tn, fp, fn, tp = sklearn.metrics.confusion_matrix(y_true, y_pred).ravel()
        return (2 * (tp * tn - fp * fn)) / (
                (tp + fp) * (tn + fp) + (tp + fn) * (tn + fn))

    def __str__(self) -> str:
        """Return the name of the scorer."""
        return "cohen_kappa"


[docs]class BalancedPositivePredictivity(Metric):
    """Calculate the balanced positive predictivity.

    Guesné, S.J.J., Hanser, T., Werner, S. et al. Mind your prevalence!.
    J Cheminform 16, 43 (2024). https://doi.org/10.1186/s13321-024-00837-w

    Attributes:
        name (str): Name of the scoring function (balanced_positive_predictivity).
    """

    def __call__(self, y_true: np.array, y_pred: np.array) -> float:
        """Calculate the balanced positive predictivity.

        Args:
            y_true (np.array): Ground truth (correct) labels. 1d array.
            y_pred (np.array): Predicted labels. 2D array (n_samples, 1)

        Returns:
            float: The balanced positive predictivity.

        """
        _, sen, spe = derived_confusion_matrix(y_true, y_pred)
        return sen / (1 + sen - spe)

    def __str__(self) -> str:
        """Return the name of the scorer."""
        return "balanced_positive_predictivity"


[docs]class BalancedNegativePredictivity(Metric):
    """Calculate the balanced negative predictivity.

    Guesné, S.J.J., Hanser, T., Werner, S. et al. Mind your prevalence!.
    J Cheminform 16, 43 (2024). https://doi.org/10.1186/s13321-024-00837-w

    Attributes:
        name (str): Name of the scoring function (balanced_negative_predictivity).
    """

    def __call__(self, y_true: np.array, y_pred: np.array) -> float:
        """Calculate the balanced negative predictivity.

        Args:
            y_true (np.array): Ground truth (correct) labels. 1d array.
            y_pred (np.array): Predicted labels. 2D array (n_samples, 1)

        Returns:
            float: The balanced negative predictivity.

        """
        _, sen, spe = derived_confusion_matrix(y_true, y_pred)
        return spe / (1 + sen + spe)

    def __str__(self) -> str:
        """Return the name of the scorer."""
        return "balanced_negative_predictivity"


[docs]class BalancedMatthewsCorrcoeff(Metric):
    """Calculate the balanced Matthews correlation coefficient.

    Guesné, S.J.J., Hanser, T., Werner, S. et al. Mind your prevalence!.
    J Cheminform 16, 43 (2024). https://doi.org/10.1186/s13321-024-00837-w

    Attributes:
        name (str): Name of the scoring function (balanced_matthews_corrcoeff).
    """

    def __call__(self, y_true: np.array, y_pred: np.array) -> float:
        """Calculate the balanced Matthews correlation coefficient.

        Args:
            y_true (np.array): Ground truth (correct) labels. 1d array.
            y_pred (np.array): Predicted labels. 2D array (n_samples, 1)

        Returns:
            float: The correlation coefficient.

        """
        _, sen, spe = derived_confusion_matrix(y_true, y_pred)
        return (sen + spe - 1) / np.sqrt(1 - (sen - spe) ** 2)

    def __str__(self) -> str:
        """Return the name of the scorer."""
        return "balanced_matthews_corrcoeff"


[docs]class BalancedCohenKappa(Metric):
    """Calculate the balanced Cohen kappa coefficient.

    Guesné, S.J.J., Hanser, T., Werner, S. et al. Mind your prevalence!.
    J Cheminform 16, 43 (2024). https://doi.org/10.1186/s13321-024-00837-w

    Attributes:
        name (str): Name of the scoring function (balanced_cohen_kappa).
    """

    def __call__(self, y_true: np.array, y_pred: np.array) -> float:
        """Calculate the balanced Cohen kappa coefficient.

        Args:
            y_true (np.array): Ground truth (correct) labels. 1d array.
            y_pred (np.array): Predicted labels. 2D array (n_samples, 1)

        Returns:
            float: The balanced Cohen kappa coefficient.

        """
        _, sen, spe = derived_confusion_matrix(y_true, y_pred)
        return sen + spe - 1

    def __str__(self) -> str:
        """Return the name of the scorer."""
        return "balanced_cohen_kappa"


[docs]def derived_confusion_matrix(y_true: np.array,
                             y_pred: np.array) -> tuple[int, int, int]:
    """Calculate the derived confusion matrix.

    Args:
        y_true (np.array): Ground truth (correct) labels. 1d array.
        y_pred (np.array): Predicted labels. 2D array (n_samples, 1)

    Returns:
        tuple[int, int, int]: The derived confusion matrix.
                              Prevalence, sensitivity and specificity.
    """
    tn, fp, fn, tp = sklearn.metrics.confusion_matrix(y_true, y_pred).ravel()
    pre = sum(y_true) / len(y_true)
    sen = tp / (tp + fn)
    spe = tn / (tn + fp)
    return pre, sen, spe