import numpy as np
import scipy.stats
from qsprpred.models.assessment.metrics.base import Metric
[docs]class KSlope(Metric):
"""Calculate the slope of the regression line through the origin
between the predicted and observed values.
Reference: Tropsha, A., & Golbraikh, A. (2010). In J.-L. Faulon & A. Bender (Eds.),
Handbook of Chemoinformatics Algorithms.
https://www.taylorfrancis.com/books/9781420082999
Attributes:
name (str): Name of the scoring function (k_slope).
"""
def __call__(self, y_true: np.array, y_pred: np.array) -> float:
"""Calculate the slope of the regression line through the origin
between the predicted and observed values.
Args:
y_true (np.array): Ground truth (correct) target values. 1d array.
y_pred (np.array): 2D array (n_samples, n_tasks)
Returns:
float: The coefficient of determination.
"""
num, denom = 0, 0
for i in range(len(y_true)):
num += y_true[i] * y_pred[i]
denom += y_true[i] ** 2
return num / denom if len(y_pred) >= 2 else 0
def __str__(self) -> str:
"""Return the name of the scorer."""
return "k_slope"
[docs]class RPrime20(KSlope):
"""Calculate the coefficient of determination for regression line
through the origin between the predicted and observed values.
Reference: Tropsha, A., & Golbraikh, A. (2010). In J.-L. Faulon & A. Bender (Eds.),
Handbook of Chemoinformatics Algorithms.
https://www.taylorfrancis.com/books/9781420082999
Attributes:
name (str): Name of the scoring function (r_prime_2_0).
"""
def __call__(self, y_true: np.array, y_pred: np.array) -> float:
"""Calculate the coefficient of determination for regression line
through the origin between the predicted and observed values.
Args:
y_true (np.array): Ground truth (correct) target values. 1d array.
y_pred (np.array): 2D array (n_samples, n_tasks)
Returns:
float: The coefficient of determination.
"""
# get the slope of the regression line through the origin
k = super().__call__(y_true, y_pred)
y_pred_mean = y_pred.mean()
num, denom = 0, 0
for i in range(len(y_true)):
num += y_pred[i] - k * y_true[i]
denom += (y_pred[i] - y_pred_mean) ** 2
return 1 - num / denom if len(y_pred) >= 2 else 0
def __str__(self) -> str:
"""Return the name of the scorer."""
return "r_prime_2_0"
[docs]class KPrimeSlope(Metric):
"""Calculate the slope of the regression line through the origin
between the observed and predicted values.
Reference: Tropsha, A., & Golbraikh, A. (2010). In J.-L. Faulon & A. Bender (Eds.),
Handbook of Chemoinformatics Algorithms.
https://www.taylorfrancis.com/books/9781420082999
Attributes:
name (str): Name of the scoring function (k_prime_slope).
"""
def __call__(self, y_true: np.array, y_pred: np.array) -> float:
"""Calculate the slope of the regression line through the origin
between the observed and predicted values.
Args:
y_true (np.array): Ground truth (correct) target values. 1d array.
y_pred (np.array): 2D array (n_samples, n_tasks)
Returns:
float: The coefficient of determination.
"""
num, denom = 0, 0
for i in range(len(y_true)):
num += y_true[i] * y_pred[i]
denom += y_pred[i] ** 2
return num / denom if len(y_pred) >= 2 else 0
def __str__(self) -> str:
"""Return the name of the scorer."""
return "k_prime_slope"
[docs]class R20(KPrimeSlope):
"""Calculate the coefficient of determination for regression line
through the origin between the observed and predicted values.
Reference: Tropsha, A., & Golbraikh, A. (2010). In J.-L. Faulon & A. Bender (Eds.),
Handbook of Chemoinformatics Algorithms.
https://www.taylorfrancis.com/books/9781420082999
Attributes:
name (str): Name of the scoring function (r_2_0).
"""
def __call__(self, y_true: np.array, y_pred: np.array) -> float:
"""Calculate the coefficient of determination for regression line
through the origin between the observed and predicted values.
Args:
y_true (np.array): Ground truth (correct) target values. 1d array.
y_pred (np.array): 2D array (n_samples, n_tasks)
Returns:
float: The coefficient of determination.
"""
# get the slope of the regression line through the origin
k_prime = super().__call__(y_true, y_pred)
y_true_mean = y_true.mean()
num, denom = 0, 0
for i in range(len(y_true)):
num += y_true[i] - k_prime * y_pred[i]
denom += (y_true[i] - y_true_mean) ** 2
return 1 - num / denom if len(y_pred) >= 2 else 0
def __str__(self) -> str:
"""Return the name of the scorer."""
return "r_2_0"
[docs]class Pearson(Metric):
"""Calculate the Pearson correlation coefficient.
Attributes:
name (str): Name of the scoring function (pearson).
"""
def __call__(self, y_true: np.array, y_pred: np.array) -> float:
"""Calculate the Pearson correlation coefficient.
Args:
y_true (np.array): Ground truth (correct) target values. 1d array.
y_pred (np.array): 2D array (n_samples, 1)
Returns:
float: The Pearson correlation coefficient.
"""
y_pred = y_pred.flatten()
return scipy.stats.pearsonr(y_true, y_pred)[0] if len(y_pred) >= 2 else 0
def __str__(self) -> str:
"""Return the name of the scorer."""
return "pearson"
[docs]class Spearman(Metric):
"""Calculate the Spearman correlation
Attributes:
name (str): Name of the scoring function (spearman).
"""
def __call__(self, y_true: np.array, y_pred: np.array) -> float:
"""Calculate the Spearman correlation
Args:
y_true (np.array): Ground truth (correct) target values. 1d array.
y_pred (np.array): 2D array (n_samples, n_tasks)
Returns:
float: The Pearson Spearman coefficient.
"""
return scipy.stats.spearmanr(y_true, y_pred)[0] if len(y_pred) >= 2 else 0
def __str__(self) -> str:
"""Return the name of the scorer."""
return "spearman"
[docs]class Kendall(Metric):
"""Calculate the Kendall rank correlation coefficient.
Attributes:
name (str): Name of the scoring function (kendall).
"""
def __call__(self, y_true: np.array, y_pred: np.array) -> float:
"""Calculate the Kendall rank correlation coefficient.
Args:
y_true (np.array): Ground truth (correct) target values. 1d array.
y_pred (np.array): 2D array (n_samples, n_tasks)
Returns:
float: The Kendall rank correlation coefficient.
"""
return scipy.stats.kendalltau(y_true, y_pred)[0] if len(y_pred) >= 2 else 0
def __str__(self) -> str:
"""Return the name of the scorer."""
return "kendall"
[docs]class AverageFoldError(Metric):
"""Calculate the average fold error (AFE).
Attributes:
name (str): Name of the scoring function (fold_error).
"""
def __call__(self, y_true: np.array, y_pred: np.array) -> float:
"""Calculate the fold error.
Args:
y_true (np.array): Ground truth (correct) target values. 1d array.
y_pred (np.array): 2D array (n_samples, n_tasks)
Returns:
float: The fold error.
"""
return 10 ** (np.mean(np.log10(y_pred / y_true)))
def __str__(self) -> str:
"""Return the name of the scorer."""
return "average_fold_error"
[docs]class AbsoluteAverageFoldError(Metric):
"""Calculate the absolute average fold error (AAFE).
The AAFE is also known as the geometric mean fold error (GMFE).
Attributes:
name (str): Name of the scoring function (absolute_average_fold_error).
"""
def __call__(self, y_true: np.array, y_pred: np.array) -> float:
"""Calculate the absolute fold error.
Args:
y_true (np.array): Ground truth (correct) target values. 1d array.
y_pred (np.array): 2D array (n_samples, n_tasks)
Returns:
float: The absolute average fold error.
"""
return 10 ** (np.mean(np.abs(np.log10(y_pred / y_true))))
def __str__(self) -> str:
"""Return the name of the scorer."""
return "absolute_average_fold_error"
[docs]class PercentageWithinFoldError(Metric):
"""Calculate the percentage of predictions within a certain fold error.
Attributes:
name (str): Name of the scoring function (percentage_within_{x}_fold_error).
"""
def __init__(self, fold_error: float = 2):
"""Initialize the percentage within fold error scorer.
Args:
fold_error (float): The fold error threshold. Defaults to 2.
"""
self.fold_error = fold_error
def __call__(self, y_true: np.array, y_pred: np.array) -> float:
"""Calculate the percentage of predictions within a specified fold error.
Args:
y_true (np.array): Ground truth (correct) target values. 1d array.
y_pred (np.array): 2D array (n_samples, n_tasks)
Returns:
float: The percentage of predictions within a fold error.
"""
fold_errors = np.abs(np.log10(y_pred / y_true))
return np.mean(fold_errors < np.log10(self.fold_error)) * 100
def __str__(self) -> str:
"""Return the name of the scorer."""
return f"percentage_within_{self.fold_error}_fold_error"