Source code for qsprpred.data.chem.matching

from typing import Literal

import pandas as pd
from rdkit import Chem
from rdkit.Chem import Mol

from qsprpred.data.processing.mol_processor import MolProcessorWithID
from qsprpred.data.storage.interfaces.stored_mol import StoredMol


[docs] def match_mol_to_smarts( mol: Chem.Mol | str, smarts: list[str], operator: Literal["or", "and"] = "or", use_chirality: bool = False, ) -> bool: """Check if a molecule matches a SMARTS pattern. Args: mol (Chem.Mol or str): Molecule to check. smarts (list[str]): List of SMARTS patterns to check. operator (literal["or", "and"], optional): Whether to use an "or" or "and" operator on patterns. Defaults to "or". use_chirality: Whether to use chirality in the search. use_chirality (bool, optional): Whether to use chirality in the search. Returns: (bool): True if the molecule matches the pattern, False otherwise. """ mol = Chem.MolFromSmiles(mol) if isinstance(mol, str) else mol ret = False for smart in [Chem.MolFromSmarts(smart) for smart in smarts]: ret = mol.HasSubstructMatch(smart, useChirality=use_chirality) if operator == "or": if ret: return True elif operator == "and": if ret: ret = True else: return False return ret
[docs] class SMARTSMatchProcessor(MolProcessorWithID): """Processor that checks if molecules match a SMARTS pattern.""" def __call__( self, mols: list[str | Mol | StoredMol], *args, props: dict[str, list] | None = None, **kwargs ) -> pd.DataFrame: """Check if a molecule matches a SMARTS pattern. Args: mols (list[str or Mol or StoredMol]): Molecules to check. props (dict[str, list], optional): Dictionary of properties. args: SMARTS patterns to check. kwargs: Additional arguments to pass to `match_mol_to_smarts`. Returns: pd.DataFrame: DataFrame with the results. """ if len(mols) == 0: return pd.DataFrame(index=pd.Index([], name=self.idProp)) if isinstance(mols[0], StoredMol): ids = [mol.id for mol in mols] mols = [mol.as_rd_mol() for mol in mols] else: mols = [ mol if isinstance(mol, Mol) else Chem.MolFromSmiles(mol) for mol in mols ] ids = props[self.idProp] if props is not None else list(range(len(mols))) res = [] for mol in mols: res.append(match_mol_to_smarts(mol, *args, **kwargs)) return pd.DataFrame({"match": res}, index=pd.Index(ids, name=self.idProp)) @property def supportsParallel(self) -> bool: """Check if the processor supports parallel processing.""" return True