Source code for qsprpred.data.processing.feature_transformers
"""This module is used for feature standardization and transformation in a pipeline."""
import pandas as pd
from sklearn.base import BaseEstimator
from .step import Step
[docs]
class FeatureTransformer(Step):
"""Base class for feature transformers
This class is used to standardize or transform feature sets in a pipeline.
It should be subclassed to implement specific transformations.
Currently, only the SklearnStep class is implemented,
which wraps a scikit-learn transformer.
"""
[docs]
class SklearnStep(FeatureTransformer):
"""Step that wraps a scikit-learn transformer
For example, this can be used to wrap a scikit-learn StandardScaler
Attributes:
transformer (BaseEstimator): scikit-learn transformer to wrap, should
have implementations of the `fit` and `transform` methods.
"""
def __init__(self, transformer: BaseEstimator):
"""Initialize the SklearnStep
Args:
transformer (BaseEstimator): scikit-learn transformer to wrap, should
have implementations of the `fit` and `transform` methods.
"""
self._fitted = False
self.transformer = transformer
[docs]
def fit(self, X: pd.DataFrame, y: None | pd.DataFrame = None):
"""Fit the transformer to the data
Args:
X (pd.DataFrame): training data
y (pd.DataFrame | None): training targets
"""
self.transformer.fit(X, y)
self._fitted = True
[docs]
def transform(self, X: pd.DataFrame, y: None | pd.DataFrame = None) -> tuple[
pd.DataFrame, pd.DataFrame | None]:
"""Transform the data using the transformer
Args:
X (pd.DataFrame): data to be transformed
y (pd.DataFrame | None): target data to be transformed
Returns:
pd.DataFrame: transformed data
pd.DataFrame | None: (transformed) target data
"""
X_transformed = self.transformer.transform(X.to_numpy())
return pd.DataFrame(
X_transformed,
columns=X.columns,
index=X.index,
), y