Source code for qsprpred.data.tables.base

from abc import ABC, abstractmethod
from typing import Callable, Generator

import pandas as pd

from qsprpred.data.descriptors.sets import DescriptorSet


[docs]class StoredTable(ABC): """Abstract base class for tables that are stored in a file."""
[docs] @abstractmethod def save(self): """Save the table to a file."""
[docs] @abstractmethod def reload(self): """Reload the table from a file."""
[docs] @abstractmethod def clearFiles(self): """Delete the files associated with the table."""
[docs] @staticmethod @abstractmethod def fromFile(filename: str) -> "StoredTable": """Load a `StoredTable` object from a file. Args: filename (str): The name of the file to load the object from. Returns: The `StoredTable` object itself. """
[docs]class DataTable(StoredTable): @abstractmethod def __len__(self): pass
[docs] @abstractmethod def getProperty(self, name: str): """Get values of a given property."""
[docs] @abstractmethod def getProperties(self): """Get the property names contained in the dataset."""
[docs] @abstractmethod def addProperty(self, name: str, data: list): """Add a property to the dataset. Args: name (str): The name of the property. data (list): The data of the property. """
[docs] @abstractmethod def removeProperty(self, name: str): """Remove a property from the dataset. Args: name (str): The name of the property. """
[docs] @abstractmethod def transformProperties(self, names, transformers): """Transform property values using a transformer function. Args: targets (list[str]): list of column names to transform. transformer (Callable): Function that transforms the data in target columns to a new representation. """
[docs] @abstractmethod def getSubset(self, prefix: str): """Get a subset of the dataset. Args: prefix (str): The prefix of the subset. """
[docs] @abstractmethod def apply( self, func: callable, on_props: list[str] | None = None, func_args: list | None = None, func_kwargs: dict | None = None, ): """Apply a function on all or selected properties. The properties are supplied as the first positional argument to the function. Args: func (callable): The function to apply. on_props (list, optional): The properties to include. func_args (list, optional): The positional arguments of the function. func_kwargs (dict, optional): The keyword arguments of the function. """
[docs] @abstractmethod def filter(self, table_filters: list[Callable]): """Filter the dataset. Args: table_filters (List[Callable]): The filters to apply. """
[docs]class MoleculeDataTable(DataTable):
[docs] @abstractmethod def addDescriptors(self, descriptors: DescriptorSet, *args, **kwargs): """ Add descriptors to the dataset. Args: descriptors (list[DescriptorSet]): The descriptors to add. args: Additional positional arguments to be passed to each descriptor set. kwargs: Additional keyword arguments to be passed to each descriptor set. """
[docs] @abstractmethod def getDescriptors(self) -> pd.DataFrame: """ Get the table of descriptors that are currently in the dataset. Returns: a pd.DataFrame with the descriptors """
[docs] @abstractmethod def getDescriptorNames(self) -> list[str]: """ Get the names of the descriptors that are currently in the dataset. Returns: a `list` of descriptor names """
[docs] @abstractmethod def hasDescriptors(self): """Indicates if the dataset has descriptors."""
@property @abstractmethod def smiles(self) -> Generator[str, None, None]: """Get the SMILES strings of the molecules in the dataset. Returns: list[str]: The SMILES strings of the molecules in the dataset. """
[docs]class DataSetDependant: """Classes that need a data set to operate have to implement this.""" def __init__(self, dataset: MoleculeDataTable | None = None) -> None: self.dataSet = dataset
[docs] def setDataSet(self, dataset: MoleculeDataTable): self.dataSet = dataset
@property def hasDataSet(self) -> bool: """Indicates if this object has a data set attached to it.""" return self.dataSet is not None
[docs] def getDataSet(self): """Get the data set attached to this object. Raises: ValueError: If no data set is attached to this object. """ if self.hasDataSet: return self.dataSet else: raise ValueError("Data set not set.")