Source code for qsprpred.benchmarks.settings.data_prep

from dataclasses import dataclass
from typing import Callable

from ...data.processing.data_filters import RepeatsFilter
from ...data.processing.feature_standardizers import SKLearnStandardizer
from ...data.sampling.splits import DataSplit


[docs]@dataclass class DataPrepSettings: """Class that determines settings for data preparation. These are arguments passed to `QSPRDataset.prepareDataset`. Attributes: data_filters (list): Data filters to use. split (DataSplit): Data split to use. smiles_standardizer (str or callable): Standardizer to use for SMILES strings. feature_filters (list): Feature filters to use. feature_standardizer (SKLearnStandardizer): Standardizer to use for features. feature_fill_value (float): Fill value to use for features. shuffle (bool): Whether to shuffle the data. """ data_filters: list | None = (RepeatsFilter(keep=True),) split: DataSplit = None smiles_standardizer: str | Callable = "chembl" feature_filters: list = None feature_standardizer: SKLearnStandardizer = None feature_fill_value: float = 0.0 shuffle: bool = True