Source code for qsprpred.data.chem.identifiers

from abc import ABC, abstractmethod

from rdkit import Chem


[docs] class ChemIdentifier(ABC): """Interface for identifiers of molecules. This should be a simple callable that given a SMILES string returns a unique identifier. """ @abstractmethod def __call__(self, smiles: str) -> str: """Get the identifier of the molecule represented by the given SMILES. Args: smiles (str): input SMILES Returns: str: calculated identifier """
[docs] class Identifiable(ABC): """Interface for objects that use a `ChemIdentifier` to identify duplicate molecules. """ @property @abstractmethod def identifier(self) -> ChemIdentifier: """Get the identifier used by this instance. Returns: ChemIdentifier: The identifier used by this instance. """
[docs] @abstractmethod def applyIdentifier(self, identifier: ChemIdentifier): """Apply an identifier to the SMILES in this instance (i.e. remove duplicates). Args: identifier (ChemIdentifier): The identifier to apply. """
[docs] class InchiIdentifier(ChemIdentifier): """Class for InChI identifiers of molecules.""" def __call__(self, smiles: str) -> str: """Get the InChIKey of the molecule represented by the given SMILES. Args: smiles (str): input SMILES Returns: str: calculated InChIKey """ return Chem.MolToInchiKey(Chem.MolFromSmiles(smiles))
[docs] class IndexIdentifier(ChemIdentifier): """Implementation of a `ChemIdentifier` that returns an index as the identifier. Attributes: index (int): The current index. zfill (int): The number of digits to zero-fill the index """ def __init__(self, zfill: int = 5): """Initialize the index identifier. Args: zfill (int): The number of digits to zero-fill the index """ self.index = 0 self.zfill = zfill def __call__(self, smiles: str) -> str: """Get the index as the molecule identifier. Args: smiles (str): input SMILES Returns: str: calculated identifier """ self.index += 1 return str(self.index).zfill(self.zfill)