"""This module holds assessment methods for QSPRModels"""
from abc import ABC, abstractmethod
from datetime import datetime
from typing import Callable, Iterable
import numpy as np
import pandas as pd
from .metrics.scikit_learn import SklearnMetrics
from ...data.processing.pipeline import DatasetPipeline
from ...data.sampling.splits import DataSplit
from ...data.tables.interfaces.qspr_data_set import QSPRDataSet
from ...logs import logger
from ...models.early_stopping import EarlyStoppingMode
from ...models.model import QSPRModel
from ...models.monitors import AssessorMonitor, BaseMonitor
[docs]
class ModelAssessor(ABC):
"""Base class for assessment methods.
Attributes:
name (str): name of the assessment method
scoreFunc (Metric): scoring function to use, should match the output of the
evaluation method (e.g. if the evaluation methods returns class
probabilities, the scoring function support class probabilities)
monitor (AssessorMonitor): monitor to use for assessment, if None, a BaseMonitor
is used
useProba (bool): wheter to use probabilities for classification models
mode (EarlyStoppingMode): early stopping mode for fitting
splitMultitaskScores (bool): whether to split the scores per task for multitask
models
scores (np.ndarray): Scores returned by the scoring function for each fold
predictions (pd.Dataframe): Predictions returned by the model for each fold
"""
def __init__(
self,
name: str,
scoring: str | Callable[[Iterable, Iterable], float],
monitor: AssessorMonitor | None = None,
use_proba: bool = True,
mode: EarlyStoppingMode | None = None,
split_multitask_scores: bool = False,
):
"""Initialize the evaluation method class.
Args:
name (str): name of the evaluation method
scoring: str | Callable[[Iterable, Iterable], float],
monitor (AssessorMonitor): monitor to track the evaluation
use_proba (bool): use probabilities for classification models
mode (EarlyStoppingMode): early stopping mode for fitting
split_multitask_scores (bool): whether to split the scores per task for multitask models
"""
self.name = name
self.scoreFunc = (
SklearnMetrics(scoring) if isinstance(scoring, str) else scoring
)
self.monitor = monitor
self.useProba = use_proba
self.mode = mode
self.splitMultitaskScores = split_multitask_scores
self.scores = None
self.predictions = None
@abstractmethod
def __call__(
self,
model: QSPRModel,
ds: QSPRDataSet,
save: bool = True,
parameters: dict | None = None,
monitor: AssessorMonitor | None = None,
**kwargs,
) -> np.ndarray:
"""Evaluate the model.
Args:
model (QSPRModel): model to evaluate
ds (QSPRDataSet): dataset to evaluate on
save (bool): save predictions to file
parameters (dict): parameters to use for the evaluation
monitor (AssessorMonitor): monitor to track the evaluation, overrides
the monitor set in the constructor
kwargs: additional arguments for fit function of the model
Returns:
np.ndarray: scores for the model. If splitMultitaskScores is True, each
column represents a task and each row a fold. Otherwise, a 1D array is
returned with the scores for each fold.
"""
[docs]
def predictionsToDataFrame(
self,
model: QSPRModel,
y_train: np.ndarray,
y_test: np.ndarray,
train_preds: np.ndarray | list[np.ndarray],
test_preds: np.ndarray | list[np.ndarray],
fold: int,
) -> pd.DataFrame:
"""Create a dataframe with true values and predictions.
Args:
model (QSPRModel): model to evaluate.
dataset (QSPRDataSet): dataset to evaluate on.
y_train (np.ndarray): training target values.
y_test (np.ndarray): testing target values.
train_preds (np.ndarray | list[np.ndarray]): training predictions.
test_preds (np.ndarray | list[np.ndarray]): testing predictions.
fold (int): current fold number.
Returns:
pd.DataFrame: dataframe with true values and predictions.
"""
# Combine predictions
if isinstance(train_preds, list):
predictions = [
np.concatenate((train_preds[idx], test_preds[idx]))
for idx in range(len(train_preds))
]
else:
predictions = np.vstack([train_preds, test_preds])
# Combine target values into dataframe
y = pd.concat([y_train, y_test])
df_out = y.add_suffix("_Label")
# Add predictions to dataframe
for idx, prop in enumerate(model.targetProperties):
if prop.task.isClassification() and self.useProba:
df_out[f"{prop.name}_Prediction"] = np.argmax(predictions[idx], axis=1)
df_out = pd.concat(
[
df_out,
pd.DataFrame(predictions[idx], index=y.index
).add_prefix(f"{prop.name}_ProbabilityClass_"),
],
axis=1,
)
else:
df_out[f"{prop.name}_Prediction"] = predictions[:, idx]
# Add set labels
set_labels = ["Train"] * len(y_train) + ["Test"] * len(y_test)
df_out["Set"] = set_labels
# Add fold number
df_out["Fold"] = fold
return df_out
[docs]
class Assessor(ModelAssessor):
"""Perform cross validation on a model.
Attributes:
useProba (bool): use predictProba instead of predict for classification
monitor (AssessorMonitor): monitor to use for assessment, if None, a BaseMonitor
is used
mode (EarlyStoppingMode): mode to use for early stopping
split (DataSplit): split to use for cross validation (default: KFold, n_splits=5)
round (int): number of decimal places to round predictions to (default: 5)
splitMultitaskScores (bool): whether to split the scores per task for multitask models
"""
def __init__(
self,
name: str,
scoring: str | Callable[[Iterable, Iterable], float],
split: DataSplit,
monitor: AssessorMonitor | None = None,
use_proba: bool = True,
mode: EarlyStoppingMode | None = None,
round: int = 5,
split_multitask_scores: bool = False,
):
super().__init__(name, scoring, monitor, use_proba, mode,
split_multitask_scores)
self.split = split
if monitor is None:
self.monitor = BaseMonitor()
self.round = round
def __call__(
self,
model: QSPRModel,
ds: QSPRDataSet,
pipeline: DatasetPipeline | None = None,
parameters: dict | None = None,
monitor: AssessorMonitor | None = None,
save: bool = True,
**kwargs,
) -> np.ndarray:
"""Perform cross validation on the model with the given parameters.
Arguments:
model (QSPRModel): model to assess
ds (QSPRDataSet): dataset to assess on
scoring (str | Callable): scoring function to use
pipeline (DatasetPipeline): optional pipeline to apply to the dataset
parameters (dict): optional model parameters to use in assessment
monitor (AssessorMonitor): optional, overrides monitor set in constructor
order (pd.Index): optional, order of the indices in the dataset
**kwargs: additional keyword arguments for the fit function
Returns:
np.ndarray: scores for the validation sets. If splitMultitaskScores is True,
each column represents a task and each row a fold. Otherwise, a 1D array is
returned with the scores for each fold.
"""
monitor = monitor or self.monitor
evalparams = model.parameters if parameters is None else parameters
pipeline = pipeline if pipeline is not None else DatasetPipeline()
monitor.onAssessmentStart(
model, ds, pipeline, self.name, evalparams, self.split,
)
# Assess model on each fold in the split
self.scores = []
self.predictions = []
for i, (X_train, y_train, X_test, y_test) in enumerate(
pipeline.applyOnDataSet(ds, self.split)
):
logger.debug(
"Model Assessment fold %s started: %s" %
(i, datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
)
monitor.onFoldStart(
fold=i, X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test
)
logger.debug(
f"Monitoring started for fold {i}."
)
# fit model
logger.debug(
f"Loading model for fold {i} with parameters: {evalparams}."
)
model.initFromData(ds, pipeline)
estimator = model.loadEstimator(evalparams)
logger.debug(
f"Fitting model for fold {i}: {estimator}."
)
model_fit = model.fit(
X_train,
y_train,
estimator,
self.mode,
monitor=monitor,
**kwargs,
)
# make predictions
logger.debug(
f"Making predictions for fold {i}, model: {evalparams}"
)
if model.task.isRegression() or not self.useProba:
test_preds = model.predict(X_test, estimator)
train_preds = model.predict(X_train, estimator)
else:
test_preds = model.predictProba(X_test, estimator)
train_preds = model.predictProba(X_train, estimator)
# score
logger.debug(
f"Scoring predictions for fold {i}."
)
if model.isMultiTask and self.splitMultitaskScores:
scores_tasks = []
for idx, prop in enumerate(model.targetProperties):
if self.useProba and prop.task.isClassification():
prop_predictions = [test_preds[idx]]
scores_tasks.append(
self.scoreFunc(y_test[prop.name], prop_predictions)
)
else:
scores_tasks.append(
self.scoreFunc(
y_test[prop.name], test_preds[:, idx]
)
)
self.scores.append(scores_tasks)
else:
score = self.scoreFunc(y_test, test_preds)
self.scores.append(score)
# Combine predictions and log fold results
logger.debug(
"Evaluation of fold %s ended: %s" %
(i, datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
)
logger.debug(
f"Converting predictions to data frame for fold {i}. "
)
preds_df = self.predictionsToDataFrame(
model, y_train, y_test, train_preds, test_preds, fold=i
)
monitor.onFoldEnd(model_fit, preds_df, self.scores[i])
self.predictions.append(preds_df)
monitor.onAssessmentEnd(pd.concat(self.predictions))
if save:
logger.debug(
f"Saving assessment report ({len(self.predictions)})."
)
pd.concat(self.predictions).round(self.round
).to_csv(
f"{model.outPrefix}_{self.name}.tsv", sep="\t")
return np.array(self.scores)