"""This module holds assessment methods for QSPRModels"""
from abc import ABC, abstractmethod
from datetime import datetime
from typing import Callable, Iterable
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from .metrics.scikit_learn import SklearnMetrics
from ...data import QSPRDataset
from ...data.sampling.splits import DataSplit
from ...logs import logger
from ...models.early_stopping import EarlyStoppingMode
from ...models.model import QSPRModel
from ...models.monitors import AssessorMonitor, BaseMonitor
[docs]class ModelAssessor(ABC):
"""Base class for assessment methods.
Attributes:
scoreFunc (Metric): scoring function to use, should match the output of the
evaluation method (e.g. if the evaluation methods returns
class probabilities, the scoring function support class
probabilities)
monitor (AssessorMonitor): monitor to use for assessment, if None, a BaseMonitor
is used
useProba (bool): use probabilities for classification models
mode (EarlyStoppingMode): early stopping mode for fitting
splitMultitaskScores (bool): whether to split the scores per task for multitask models
"""
def __init__(
self,
scoring: str | Callable[[Iterable, Iterable], float],
monitor: AssessorMonitor | None = None,
use_proba: bool = True,
mode: EarlyStoppingMode | None = None,
split_multitask_scores: bool = False,
):
"""Initialize the evaluation method class.
Args:
scoring: str | Callable[[Iterable, Iterable], float],
monitor (AssessorMonitor): monitor to track the evaluation
use_proba (bool): use probabilities for classification models
mode (EarlyStoppingMode): early stopping mode for fitting
split_multitask_scores (bool): whether to split the scores per task for multitask models
"""
self.monitor = monitor
self.useProba = use_proba
self.mode = mode
self.scoreFunc = (
SklearnMetrics(scoring) if isinstance(scoring, str) else scoring
)
self.splitMultitaskScores = split_multitask_scores
@abstractmethod
def __call__(
self,
model: QSPRModel,
ds: QSPRDataset,
save: bool = True,
parameters: dict | None = None,
monitor: AssessorMonitor | None = None,
**kwargs,
) -> np.ndarray:
"""Evaluate the model.
Args:
model (QSPRModel): model to evaluate
ds (QSPRDataset): dataset to evaluate on
save (bool): save predictions to file
parameters (dict): parameters to use for the evaluation
monitor (AssessorMonitor): monitor to track the evaluation, overrides
the monitor set in the constructor
kwargs: additional arguments for fit function of the model
Returns:
np.ndarray: scores for the model. If splitMultitaskScores is True, each
column represents a task and each row a fold. Otherwise, a 1D array is
returned with the scores for each fold.
"""
[docs] def predictionsToDataFrame(
self,
model: QSPRModel,
y: np.array,
predictions: np.ndarray | list[np.ndarray],
index: pd.Series,
extra_columns: dict[str, np.ndarray] | None = None,
) -> pd.DataFrame:
"""Create a dataframe with true values and predictions.
Args:
model (QSPRModel): model to evaluate
y (np.array): target values
predictions (np.ndarray | list[np.ndarray]): predictions
index (pd.Series): index of the data set
extra_columns (dict[str, np.ndarray]): extra columns to add to the output
"""
# Create dataframe with true values
df_out = pd.DataFrame(
y.values, columns=y.add_suffix("_Label").columns, index=index
)
# Add predictions to dataframe
for idx, prop in enumerate(model.targetProperties):
if prop.task.isClassification() and self.useProba:
# convert one-hot encoded predictions to class labels
# and add to train and test
df_out[f"{prop.name}_Prediction"] = np.argmax(predictions[idx], axis=1)
# add probability columns to train and test set
df_out = pd.concat(
[
df_out,
pd.DataFrame(predictions[idx], index=index).add_prefix(
f"{prop.name}_ProbabilityClass_"
),
],
axis=1,
)
else:
df_out[f"{prop.name}_Prediction"] = predictions[:, idx]
# Add extra columns to dataframe if given (such as fold indexes)
if extra_columns is not None:
for col_name, col_values in extra_columns.items():
df_out[col_name] = col_values
return df_out
[docs]class CrossValAssessor(ModelAssessor):
"""Perform cross validation on a model.
Attributes:
useProba (bool): use predictProba instead of predict for classification
monitor (AssessorMonitor): monitor to use for assessment, if None, a BaseMonitor
is used
mode (EarlyStoppingMode): mode to use for early stopping
split (DataSplit): split to use for cross validation (default: KFold, n_splits=5)
round (int): number of decimal places to round predictions to (default: 5)
splitMultitaskScores (bool): whether to split the scores per task for multitask models
"""
def __init__(
self,
scoring: str | Callable[[Iterable, Iterable], float],
split: DataSplit | None = None,
monitor: AssessorMonitor | None = None,
use_proba: bool = True,
mode: EarlyStoppingMode | None = None,
round: int = 5,
split_multitask_scores: bool = False,
):
super().__init__(scoring, monitor, use_proba, mode, split_multitask_scores)
self.split = split
if monitor is None:
self.monitor = BaseMonitor()
self.round = round
def __call__(
self,
model: QSPRModel,
ds: QSPRDataset,
save: bool = True,
parameters: dict | None = None,
monitor: AssessorMonitor | None = None,
**kwargs,
) -> np.ndarray:
"""Perform cross validation on the model with the given parameters.
Arguments:
model (QSPRModel): model to assess
ds (QSPRDataset): dataset to assess on
scoring (str | Callable): scoring function to use
save (bool): whether to save predictions to file
parameters (dict): optional model parameters to use in assessment
monitor (AssessorMonitor): optional, overrides monitor set in constructor
**kwargs: additional keyword arguments for the fit function
Returns:
np.ndarray: scores for the validation sets. If splitMultitaskScores is True, each
column represents a task and each row a fold. Otherwise, a 1D array is
returned with the scores for each fold.
"""
model.initFromDataset(ds)
monitor = monitor or self.monitor
split = self.split or KFold(
n_splits=5, shuffle=True, random_state=model.randomState
)
evalparams = model.parameters if parameters is None else parameters
X, _ = ds.getFeatures()
y, _ = ds.getTargetPropertiesValues()
monitor.onAssessmentStart(model, ds, self.__class__.__name__)
# cross validation
fold_counter = np.zeros(y.shape[0])
predictions = []
scores = []
for i, (X_train, X_test, y_train, y_test, idx_train, idx_test) in enumerate(
ds.iterFolds(split=split)
):
logger.debug(
"cross validation fold %s started: %s"
% (i, datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
)
monitor.onFoldStart(
fold=i, X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test
)
# fit model
crossval_estimator = model.loadEstimator(evalparams)
model_fit = model.fit(
X_train,
y_train,
crossval_estimator,
self.mode,
monitor=monitor,
**kwargs,
)
# make predictions
if model.task.isRegression() or not self.useProba:
fold_predictions = model.predict(X_test, crossval_estimator)
else:
fold_predictions = model.predictProba(X_test, crossval_estimator)
# score
if model.isMultiTask and self.splitMultitaskScores:
scores_tasks = []
for idx, prop in enumerate(model.targetProperties):
if self.useProba and prop.task.isClassification():
prop_predictions = [fold_predictions[idx]]
scores_tasks.append(
self.scoreFunc(y.iloc[idx_test, idx], prop_predictions)
)
else:
scores_tasks.append(
self.scoreFunc(
y.iloc[idx_test, idx], fold_predictions[:, idx]
)
)
scores.append(scores_tasks)
else:
score = self.scoreFunc(y.iloc[idx_test], fold_predictions)
scores.append(score)
# save molecule ids and fold number
fold_counter[idx_test] = i
logger.debug(
"cross validation fold %s ended: %s"
% (i, datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
)
fold_predictions_df = self.predictionsToDataFrame(
model,
y.iloc[idx_test],
fold_predictions,
pd.Series(y.index).iloc[idx_test],
extra_columns={"Fold": fold_counter[idx_test]},
)
monitor.onFoldEnd(model_fit, fold_predictions_df)
predictions.append(fold_predictions_df)
# save results
if save:
pd.concat(predictions).round(self.round).to_csv(
f"{model.outPrefix}.cv.tsv", sep="\t"
)
monitor.onAssessmentEnd(pd.concat(predictions))
return np.array(scores)
[docs]class TestSetAssessor(ModelAssessor):
"""Assess a model on a test set.
Attributes:+
useProba (bool): use predictProba instead of predict for classification
monitor (AssessorMonitor): monitor to use for assessment, if None, a BaseMonitor
is used
mode (EarlyStoppingMode): mode to use for early stopping
round (int): number of decimal places to round predictions to (default: 3)
splitMultitaskScores (bool): whether to split the scores per task for multitask models
"""
def __init__(
self,
scoring: str | Callable[[Iterable, Iterable], float],
monitor: AssessorMonitor | None = None,
use_proba: bool = True,
mode: EarlyStoppingMode | None = None,
round: int = 5,
split_multitask_scores: bool = False,
):
super().__init__(scoring, monitor, use_proba, mode, split_multitask_scores)
if monitor is None:
self.monitor = BaseMonitor()
self.round = round
def __call__(
self,
model: QSPRModel,
ds: QSPRDataset,
save: bool = True,
parameters: dict | None = None,
monitor: AssessorMonitor | None = None,
**kwargs,
) -> np.ndarray:
"""Make predictions for independent test set.
Arguments:
model (QSPRModel): model to assess
ds (QSPRDataset): dataset to assess on
scoring (str | Callable): scoring function to use
save (bool): whether to save predictions to file
parameters (dict): optional model parameters to use in assessment
use_proba (bool): use predictProba instead of predict for classification
monitor (AssessorMonitor): optional, overrides monitor set in constructor
**kwargs: additional keyword arguments for the fit function
Returns:
np.ndarray: scores for the test set. If splitMultitaskScores is True, each
column represents a task. Otherwise, a 1D array is returned with the score
for the test set.
"""
model.initFromDataset(ds)
monitor = monitor or self.monitor
evalparams = model.parameters if parameters is None else parameters
X, X_ind = ds.getFeatures()
y, y_ind = ds.getTargetPropertiesValues()
monitor.onAssessmentStart(model, ds, self.__class__.__name__)
monitor.onFoldStart(fold=0, X_train=X, y_train=y, X_test=X_ind, y_test=y_ind)
# fit model
ind_estimator = model.loadEstimator(evalparams)
ind_estimator = model.fit(
X, y, ind_estimator, self.mode, monitor=monitor, **kwargs
)
# predict values for independent test set
if model.task.isRegression() or not self.useProba:
predictions = model.predict(X_ind, ind_estimator)
else:
predictions = model.predictProba(X_ind, ind_estimator)
# score
if model.isMultiTask and self.splitMultitaskScores:
scores_tasks = []
for idx, prop in enumerate(model.targetProperties):
if self.useProba and prop.task.isClassification():
prop_predictions = [predictions[idx]]
scores_tasks.append(
self.scoreFunc(y_ind.iloc[:, idx], prop_predictions)
)
else:
scores_tasks.append(
self.scoreFunc(y_ind.iloc[:, idx], predictions[:, idx])
)
score = scores_tasks
else:
score = [self.scoreFunc(y_ind, predictions)]
predictions_df = self.predictionsToDataFrame(
model, y_ind, predictions, y_ind.index
)
monitor.onFoldEnd(ind_estimator, predictions_df)
# predict values for independent test set and save results
if save:
predictions_df.round(self.round).to_csv(
f"{model.outPrefix}.ind.tsv", sep="\t"
)
monitor.onAssessmentEnd(predictions_df)
return np.array(score)