Source code for qsprpred.data.tables.tests

import os

import numpy as np
import pandas as pd
from parameterized import parameterized
from sklearn.impute import SimpleImputer
from sklearn.model_selection import KFold, ShuffleSplit

from ..descriptors.fingerprints import MorganFP
from ... import TargetProperty, TargetTasks
from ...data import QSPRDataset
from ...utils.stopwatch import StopWatch
from ...utils.testing.base import QSPRTestCase
from ...utils.testing.check_mixins import DataPrepCheckMixIn
from ...utils.testing.path_mixins import DataSetsPathMixIn, PathMixIn


[docs]class TestDataSetCreationAndSerialization(DataSetsPathMixIn, QSPRTestCase): """Simple tests for dataset creation and serialization under different conditions and error states."""
[docs] def setUp(self): super().setUp() self.setUpPaths()
[docs] def checkConsistency(self, ds: QSPRDataset): self.assertNotIn("Notes", ds.getProperties()) self.assertNotIn("HBD", ds.getProperties()) self.assertTrue(len(self.getSmallDF()) - 1 == len(ds)) self.assertEqual(ds.targetProperties[0].task, TargetTasks.REGRESSION) self.assertTrue(ds.hasProperty("CL")) self.assertEqual(ds.targetProperties[0].name, "CL") self.assertEqual(len(ds.X), len(ds)) self.assertEqual(len(ds.X_ind), 0) self.assertEqual(len(ds.y), len(ds)) self.assertEqual(len(ds.y_ind), 0)
[docs] def checkConsistencyMulticlass(self, ds): self.assertTrue(ds.isMultiTask) self.assertEqual(ds.nTargetProperties, 2) self.assertEqual(len(ds.targetProperties), 2) self.assertEqual(ds.targetProperties[0].name, "CL") self.assertEqual(ds.targetProperties[1].name, "fu") self.assertEqual(ds.targetProperties[0].task, TargetTasks.REGRESSION) self.assertEqual(ds.targetProperties[1].task, TargetTasks.REGRESSION) self.assertEqual(len(ds.X), len(ds)) self.assertEqual(len(ds.y), len(ds)) self.assertEqual(len(ds.y.columns), 2) self.assertEqual(ds.y.columns[0], "CL") self.assertEqual(ds.y.columns[1], "fu")
[docs] def checkConsistencySingleclass(self, ds): self.assertFalse(ds.isMultiTask) self.assertEqual(ds.nTargetProperties, 1) self.assertEqual(len(ds.targetProperties), 1) self.assertEqual(ds.targetProperties[0].name, "CL") self.assertEqual(ds.targetProperties[0].task, TargetTasks.REGRESSION) self.assertEqual(len(ds.X), len(ds)) self.assertEqual(len(ds.y), len(ds)) self.assertEqual(len(ds.y.columns), 1) self.assertEqual(ds.y.columns[0], "CL")
[docs] def checkBadInit(self, ds): with self.assertRaises(AssertionError): ds.makeClassification("CL", []) with self.assertRaises(AssertionError): ds.makeClassification("CL", th=6.5) with self.assertRaises(AssertionError): ds.makeClassification("CL", th=[0, 2, 3]) with self.assertRaises(AssertionError): ds.makeClassification("CL", th=[0, 2, 3])
[docs] def checkClassification(self, ds, target_names, ths): # Test that the dataset properties are correctly initialized self.assertTrue(len(ds.targetProperties) == len(target_names) == len(ths)) for idx, target_prop in enumerate(ds.targetProperties): if len(ths[idx]) == 1: self.assertEqual(target_prop.task, TargetTasks.SINGLECLASS) else: self.assertEqual(target_prop.task, TargetTasks.MULTICLASS) self.assertEqual(target_prop.name, target_names[idx]) y = ds.getTargetPropertiesValues(concat=True) self.assertTrue(y.columns[idx] == target_prop.name) if target_prop.task == TargetTasks.SINGLECLASS: self.assertEqual(y[target_prop.name].unique().shape[0], 2) elif ths[idx] != "precomputed": self.assertEqual( y[target_prop.name].unique().shape[0], (len(ths[idx]) - 1) ) self.assertEqual(target_prop.th, ths[idx])
[docs] def checkRegression(self, ds, target_names): self.assertTrue(len(ds.targetProperties) == len(target_names)) for idx, target_prop in enumerate(ds.targetProperties): self.assertEqual(target_prop.task, TargetTasks.REGRESSION) self.assertTrue(ds.hasProperty(target_names[idx])) self.assertEqual(target_prop.name, target_names[idx]) ds.getTargetPropertiesValues(concat=True)
[docs] def testDefaults(self): """Test basic dataset creation and serialization with mostly default options.""" # create a basic regression data set dataset = QSPRDataset( "test_defaults", [{ "name": "CL", "task": TargetTasks.REGRESSION }], df=self.getSmallDF(), store_dir=self.generatedDataPath, n_jobs=self.nCPU, chunk_size=self.chunkSize, ) self.assertIn("HBD", dataset.getProperties()) dataset.removeProperty("HBD") self.assertNotIn("HBD", dataset.getProperties()) stopwatch = StopWatch() dataset.save() stopwatch.stop("Saving took: ") self.assertTrue(os.path.exists(dataset.storePath)) # load the data set again and check if everything is consistent after loading # creation from file stopwatch.reset() dataset_new = QSPRDataset.fromFile(dataset.metaFile) stopwatch.stop("Loading from file took: ") self.checkConsistency(dataset_new) # creation by reinitialization stopwatch.reset() dataset_new = QSPRDataset( "test_defaults", [{ "name": "CL", "task": TargetTasks.REGRESSION }], store_dir=self.generatedDataPath, n_jobs=self.nCPU, chunk_size=self.chunkSize, ) stopwatch.stop("Reinitialization took: ") self.checkConsistency(dataset_new) # creation from a table file stopwatch.reset() dataset_new = QSPRDataset.fromTableFile( "test_defaults", f"{self.inputDataPath}/test_data.tsv", target_props=[{ "name": "CL", "task": TargetTasks.REGRESSION }], store_dir=self.generatedDataPath, n_jobs=self.nCPU, chunk_size=self.chunkSize, ) stopwatch.stop("Loading from table file took: ") self.assertTrue(isinstance(dataset_new, QSPRDataset)) self.checkConsistency(dataset_new) # creation from a table file with a new name dataset_new = QSPRDataset.fromTableFile( "test_defaults_new", # new name implies HBD below should exist again f"{self.inputDataPath}/test_data.tsv", target_props=[{ "name": "CL", "task": TargetTasks.REGRESSION }], store_dir=self.generatedDataPath, n_jobs=self.nCPU, chunk_size=self.chunkSize, ) self.assertTrue(isinstance(dataset_new, QSPRDataset)) self.assertIn("HBD", dataset_new.getProperties()) dataset_new.removeProperty("HBD") self.checkConsistency(dataset_new)
[docs] def testMultitask(self): """Test multi-task dataset creation and functionality.""" dataset = QSPRDataset( "testMultitask", [ { "name": "CL", "task": TargetTasks.REGRESSION }, { "name": "fu", "task": TargetTasks.REGRESSION }, ], df=self.getSmallDF(), store_dir=self.generatedDataPath, n_jobs=self.nCPU, chunk_size=self.chunkSize, ) # Check that the dataset is correctly initialized self.checkConsistencyMulticlass(dataset) # Check the dataset after dropping a task dataset.unsetTargetProperty("fu") self.checkConsistencySingleclass(dataset) with self.assertRaises(AssertionError): dataset.unsetTargetProperty("fu") with self.assertRaises(AssertionError): dataset.unsetTargetProperty("CL") # Check the dataset after adding a task dataset.setTargetProperty({"name": "fu", "task": TargetTasks.REGRESSION}) self.checkConsistencyMulticlass(dataset)
[docs] def testTargetProperty(self): """Test target property creation and serialization in the context of a dataset. """ dataset = QSPRDataset( "testTargetProperty", [ { "name": "CL", "task": TargetTasks.REGRESSION }, { "name": "fu", "task": TargetTasks.REGRESSION }, ], df=self.getSmallDF(), store_dir=self.generatedDataPath, n_jobs=self.nCPU, chunk_size=self.chunkSize, ) # Check that the make classification method works as expected self.checkBadInit(dataset) dataset.makeClassification("CL", th=[6.5]) dataset.makeClassification("fu", th=[0.3]) self.checkClassification(dataset, ["CL", "fu"], [[6.5], [0.3]]) dataset.makeClassification("CL", th=[0, 15, 30, 60]) self.checkClassification(dataset, ["CL", "fu"], [[0, 15, 30, 60], [0.3]]) dataset.save() # check precomputed threshold setting df_new = dataset.df.copy() del df_new["CL_original"] dataset = QSPRDataset( "testTargetProperty-precomputed", [{ "name": "CL", "task": TargetTasks.MULTICLASS, "th": "precomputed" }], df=df_new, store_dir=self.generatedDataPath, n_jobs=self.nCPU, chunk_size=self.chunkSize, ) self.assertEqual(len(dataset.targetProperties), 1) self.assertEqual(dataset.targetProperties[0].task, TargetTasks.MULTICLASS) self.assertEqual(dataset.targetProperties[0].name, "CL") self.assertEqual(dataset.targetProperties[0].nClasses, 3) self.assertEqual(dataset.targetProperties[0].th, "precomputed") # Check that the dataset is correctly loaded from file for classification dataset.save() dataset_new = QSPRDataset.fromFile(dataset.metaFile) self.checkBadInit(dataset_new) self.checkClassification(dataset_new, ["CL"], ["precomputed"]) # Check that the make regression method works as expected dataset_new.makeRegression(target_property="CL") # Check that the dataset is correctly loaded from file for regression self.checkRegression(dataset_new, ["CL"]) dataset_new.save() dataset_new = QSPRDataset.fromFile(dataset.metaFile) self.checkRegression(dataset_new, ["CL"])
[docs] def testIndexing(self): # default index QSPRDataset( "testTargetProperty", [{ "name": "CL", "task": TargetTasks.REGRESSION }], df=self.getSmallDF(), store_dir=self.generatedDataPath, n_jobs=self.nCPU, chunk_size=self.chunkSize, ) # set index to SMILES column QSPRDataset( "testTargetProperty", [{ "name": "CL", "task": TargetTasks.REGRESSION }], df=self.getSmallDF(), store_dir=self.generatedDataPath, n_jobs=self.nCPU, chunk_size=self.chunkSize, index_cols=["SMILES"], ) # multiindex QSPRDataset( "testTargetProperty", [{ "name": "CL", "task": TargetTasks.REGRESSION }], df=self.getSmallDF(), store_dir=self.generatedDataPath, n_jobs=self.nCPU, chunk_size=self.chunkSize, index_cols=["SMILES", "Name"], ) # index with duplicates self.assertRaises( ValueError, lambda: QSPRDataset( "testTargetProperty", [{ "name": "CL", "task": TargetTasks.REGRESSION }], df=self.getSmallDF(), store_dir=self.generatedDataPath, n_jobs=self.nCPU, chunk_size=self.chunkSize, index_cols=["moka_ionState7.4"], ), ) # index has nans self.assertRaises( ValueError, lambda: QSPRDataset( "testTargetProperty", [{ "name": "CL", "task": TargetTasks.REGRESSION }], df=self.getSmallDF(), store_dir=self.generatedDataPath, n_jobs=self.nCPU, chunk_size=self.chunkSize, index_cols=["fu"], ), )
@parameterized.expand([(1, ), (2, )]) # use one or two CPUs def testInvalidsDetection(self, n_cpu): df = self.getBigDF() all_mols = len(df) dataset = QSPRDataset( "testInvalidsDetection", [{ "name": "CL", "task": TargetTasks.REGRESSION }], df=df, store_dir=self.generatedDataPath, drop_invalids=False, drop_empty=False, n_jobs=n_cpu, ) self.assertEqual(dataset.df.shape[0], df.shape[0]) self.assertRaises(ValueError, lambda: dataset.checkMols()) self.assertRaises( ValueError, lambda: dataset.addDescriptors([MorganFP(radius=2, nBits=128)]), ) invalids = dataset.checkMols(throw=False) self.assertEqual(sum(~invalids), 1) dataset.dropInvalids() self.assertEqual(dataset.df.shape[0], all_mols - 1)
[docs] def testRandomStateShuffle(self): dataset = self.createLargeTestDataSet() seed = dataset.randomState dataset.shuffle() order = dataset.getDF().index.tolist() dataset.save() dataset.shuffle() order_next = dataset.getDF().index.tolist() # reload and check if seed and order are the same dataset = QSPRDataset.fromFile(dataset.metaFile) self.assertEqual(dataset.randomState, seed) self.assertListEqual(dataset.getDF().index.tolist(), order) # shuffle again and check if order is the same as before dataset.shuffle() self.assertListEqual(dataset.getDF().index.tolist(), order_next)
[docs] def testRandomStateFeaturization(self): # create and save the data set dataset = self.createLargeTestDataSet() dataset.addDescriptors( [MorganFP(radius=2, nBits=128)], featurize=False, ) dataset.save() # split and featurize with shuffling split = ShuffleSplit(1, test_size=0.5, random_state=dataset.randomState) dataset.split(split, featurize=False) dataset.featurizeSplits(shuffle=True) train, test = dataset.getFeatures() train_order = train.index.tolist() test_order = test.index.tolist() # reload and check if orders are the same if we redo the split # and featurization with the same random state dataset = QSPRDataset.fromFile(dataset.metaFile) split = ShuffleSplit(1, test_size=0.5, random_state=dataset.randomState) dataset.split(split, featurize=False) dataset.featurizeSplits(shuffle=True) train, test = dataset.getFeatures() self.assertListEqual(train.index.tolist(), train_order) self.assertListEqual(test.index.tolist(), test_order)
[docs] def testRandomStateFolds(self): # create and save the data set (fixes the seed) dataset = self.createLargeTestDataSet() dataset.save() # calculate descriptors and iterate over folds dataset.prepareDataset(feature_calculators=[MorganFP(radius=2, nBits=128)]) train, _ = dataset.getFeatures() order_train = train.index.tolist() order_folds = [] split = KFold(5, shuffle=True, random_state=dataset.randomState) for _, _, _, _, train_index, test_index in dataset.iterFolds(split): order_folds.append(train.iloc[train_index].index.tolist()) # reload and check if orders are the same if we redo the folds from saved data dataset = QSPRDataset.fromFile(dataset.metaFile) dataset.prepareDataset(feature_calculators=[MorganFP(radius=2, nBits=128)]) train, _ = dataset.getFeatures() self.assertListEqual(train.index.tolist(), order_train) split = KFold(5, shuffle=True, random_state=dataset.randomState) for i, (_, _, _, _, train_index, test_index) in enumerate( dataset.iterFolds(split) ): self.assertListEqual(train.iloc[train_index].index.tolist(), order_folds[i])
[docs]class TestSearchFeatures(DataSetsPathMixIn, QSPRTestCase):
[docs] def setUp(self): super().setUp() self.setUpPaths()
[docs] def validateSearch(self, dataset: QSPRDataset, result: QSPRDataset, name: str): """Validate the results of a search.""" self.assertTrue(len(result) < len(dataset)) self.assertTrue(isinstance(result, type(dataset))) self.assertEqual(result.name, name) self.assertListEqual(dataset.getProperties(), result.getProperties()) self.assertListEqual(dataset.getFeatureNames(), result.getFeatureNames()) self.assertListEqual(dataset.targetPropertyNames, result.targetPropertyNames) self.assertEqual(len(dataset.descriptors), len(result.descriptors)) self.assertEqual(len(dataset.descriptorSets), len(result.descriptorSets)) self.assertEqual(len(dataset.targetProperties), len(result.targetProperties)) self.assertEqual(dataset.nTargetProperties, result.nTargetProperties)
[docs] def testSMARTS(self): dataset = self.createLargeTestDataSet( preparation_settings=self.getDefaultPrep() ) search_name = "search_name" results_and = dataset.searchWithSMARTS( ["c1ccccc1", "S(=O)(=O)"], operator="and", name=search_name, ) self.assertTrue(all("S" in x for x in results_and.smiles)) self.validateSearch(dataset, results_and, search_name) results_or = dataset.searchWithSMARTS( ["c1ccccc1", "S"], operator="or", name=search_name, ) self.validateSearch(dataset, results_or, search_name) self.assertFalse(all("S" in x for x in results_or.smiles)) self.assertTrue(any("S" in x for x in results_or.smiles)) self.assertTrue(len(results_and) < len(results_or))
[docs] def testPropSearch(self): dataset = self.createLargeTestDataSet( preparation_settings=self.getDefaultPrep() ) search_name = "search_name" results = dataset.searchOnProperty( "moka_ionState7.4", ["cationic"], name=search_name, exact=True, ) self.validateSearch(dataset, results, search_name) self.assertTrue( all(x == "cationic" for x in results.getProperty("moka_ionState7.4")) ) results = dataset.searchOnProperty( "Reference", ["Cook"], name=search_name, exact=False, ) self.validateSearch(dataset, results, search_name) self.assertTrue(all("Cook" in x for x in results.getProperty("Reference"))) results = dataset.searchOnProperty( "Reference", ["Cook"], name=search_name, exact=True, ) self.assertTrue(len(results) == 0)
[docs]def prop_transform(x): return np.log10(x)
[docs]class TestTargetProperty(QSPRTestCase): """Test the TargetProperty class."""
[docs] def checkTargetProperty(self, target_prop, name, task, th): # Check the target property creation consistency self.assertEqual(target_prop.name, name) self.assertEqual(target_prop.task, task) if task.isClassification(): self.assertTrue(target_prop.task.isClassification()) self.assertEqual(target_prop.th, th)
[docs] def testInit(self): """Check the TargetProperty class on target property creation. """ # Check the different task types targetprop = TargetProperty("CL", TargetTasks.REGRESSION) self.checkTargetProperty(targetprop, "CL", TargetTasks.REGRESSION, None) targetprop = TargetProperty("CL", TargetTasks.MULTICLASS, th=[0, 1, 10, 1200]) self.checkTargetProperty( targetprop, "CL", TargetTasks.MULTICLASS, [0, 1, 10, 1200] ) targetprop = TargetProperty("CL", TargetTasks.SINGLECLASS, th=[5]) self.checkTargetProperty(targetprop, "CL", TargetTasks.SINGLECLASS, [5]) # check with precomputed values targetprop = TargetProperty( "CL", TargetTasks.SINGLECLASS, th="precomputed", n_classes=2 ) self.checkTargetProperty( targetprop, "CL", TargetTasks.SINGLECLASS, "precomputed" ) # Check from dictionary creation targetprop = TargetProperty.fromDict( { "name": "CL", "task": TargetTasks.REGRESSION } ) self.checkTargetProperty(targetprop, "CL", TargetTasks.REGRESSION, None) targetprop = TargetProperty.fromDict( { "name": "CL", "task": TargetTasks.MULTICLASS, "th": [0, 1, 10, 1200] } ) self.checkTargetProperty( targetprop, "CL", TargetTasks.MULTICLASS, [0, 1, 10, 1200] ) # Check from list creation, selection and serialization support functions targetprops = TargetProperty.fromList( [ { "name": "CL", "task": TargetTasks.REGRESSION }, { "name": "fu", "task": TargetTasks.REGRESSION }, ] ) self.checkTargetProperty(targetprops[0], "CL", TargetTasks.REGRESSION, None) self.checkTargetProperty(targetprops[1], "fu", TargetTasks.REGRESSION, None) self.assertListEqual(TargetProperty.getNames(targetprops), ["CL", "fu"]) targetprops = TargetProperty.toList(targetprops) self.assertIsInstance(targetprops, list) self.assertIsInstance(targetprops[0], dict) self.assertEqual(targetprops[0]["name"], "CL") self.assertEqual(targetprops[0]["task"], TargetTasks.REGRESSION)
@parameterized.expand( [ (TargetTasks.REGRESSION, "CL", None, prop_transform), (TargetTasks.MULTICLASS, "CL", [0, 1, 10, 1200], lambda x: x + 1), # (TargetTasks.SINGLECLASS, "CL", [5], np.log), FIXME: np.log does not save ] ) def testSerialization(self, task, name, th, transformer): prop = TargetProperty(name, task, transformer=transformer, th=th) json_form = prop.toJSON() prop2 = TargetProperty.fromJSON(json_form) self.assertEqual(prop2.name, prop.name) self.assertEqual(prop2.task, prop.task) rnd_number = np.random.rand(10) self.assertTrue( all(prop2.transformer(rnd_number) == prop.transformer(rnd_number)) )
[docs]class TestDataSetPreparation(DataSetsPathMixIn, DataPrepCheckMixIn, QSPRTestCase): """Test as many possible combinations of data sets and their preparation settings. These can run potentially for a long time so use the ``skip`` decorator if you want to skip all these tests to speed things up during development."""
[docs] def setUp(self): super().setUp() self.setUpPaths()
@parameterized.expand(DataSetsPathMixIn.getPrepCombos()) def testPrepCombos( self, _, name, feature_calculators, split, feature_standardizer, feature_filter, data_filter, applicability_domain, ): """Tests one combination of a data set and its preparation settings. This generates a large number of parameterized tests. Use the ``skip`` decorator if you want to skip all these tests. Note that the combinations are not exhaustive, but defined by `DataSetsPathMixIn.getPrepCombos()`.""" np.random.seed(42) dataset = self.createLargeTestDataSet(name=name) self.checkPrep( dataset, feature_calculators, split, feature_standardizer, feature_filter, data_filter, applicability_domain, ["CL"], )
[docs]class TestTargetImputation(PathMixIn, QSPRTestCase): """Small tests to only check if the target imputation works on its own."""
[docs] def setUp(self): """Set up the test Dataframe.""" super().setUp() self.setUpPaths() self.descriptors = [ "Descriptor_F1", "Descriptor_F2", "Descriptor_F3", "Descriptor_F4", "Descriptor_F5", ] self.df = pd.DataFrame( data=np.array( [ ["C", 1, 4, 2, 6, 2, 1, 2], ["C", 1, 8, 4, 2, 4, 1, 2], ["C", 1, 4, 3, 2, 5, 1, np.NaN], ["C", 1, 8, 4, 9, 8, 2, 2], ["C", 1, 4, 2, 3, 9, 2, 2], ["C", 1, 8, 4, 7, 12, 2, 2], ] ), columns=["SMILES", *self.descriptors, "y", "z"], )
[docs] def testImputation(self): """Test the imputation of missing values in the target properties.""" self.dataset = QSPRDataset( "TestImputation", target_props=[ { "name": "y", "task": TargetTasks.REGRESSION, "imputer": SimpleImputer(strategy="mean"), }, { "name": "z", "task": TargetTasks.REGRESSION, "imputer": SimpleImputer(strategy="mean"), }, ], df=self.df, store_dir=self.generatedPath, n_jobs=self.nCPU, chunk_size=self.chunkSize, ) self.assertEqual(self.dataset.targetProperties[0].name, "y") self.assertEqual(self.dataset.targetProperties[1].name, "z") self.assertTrue("y_before_impute" in self.dataset.df.columns) self.assertTrue("z_before_impute" in self.dataset.df.columns) self.assertEqual(self.dataset.df["y"].isna().sum(), 0) self.assertEqual(self.dataset.df["z"].isna().sum(), 0)
[docs]class TestTargetTransformation(DataSetsPathMixIn, QSPRTestCase): """Tests the transformation of target properties."""
[docs] def setUp(self): super().setUp() self.setUpPaths()
[docs] def prop_transform(self, x): return np.log10(x)
[docs] def testTransformation(self): dataset = self.createLargeTestDataSet( target_props=[ { "name": "CL", "task": TargetTasks.REGRESSION, "transformer": prop_transform, }, ] ) self.assertTrue(all(dataset.df["CL"] == np.log10(dataset.df["CL_before_transform"])))
[docs]class TestApply(DataSetsPathMixIn, QSPRTestCase): """Tests the apply method of the data set."""
[docs] def setUp(self): super().setUp() self.setUpPaths()
[docs] @staticmethod def regularFunc(props, *args, **kwargs): df = pd.DataFrame(props) for idx, arg in enumerate(args): df[f"arg_{idx}"] = arg for key, value in kwargs.items(): df[key] = value return df
@parameterized.expand([(None, None), (2, None), (None, 50), (2, 50)]) def testRegular(self, n_jobs, chunk_size): dataset = self.createLargeTestDataSet() dataset.nJobs = n_jobs dataset.chunkSize = chunk_size result = dataset.apply( self.regularFunc, on_props=["CL", "fu"], func_args=[1, 2, 3], func_kwargs={"A_col": "A", "B_col": "B"}, ) for item in result: self.assertIsInstance(item, pd.DataFrame) self.assertTrue("CL" in item.columns) self.assertTrue("fu" in item.columns) self.assertTrue("A_col" in item.columns) self.assertTrue("B_col" in item.columns) self.assertTrue("arg_0" in item.columns) self.assertTrue("arg_1" in item.columns) self.assertTrue("arg_2" in item.columns) self.assertTrue(all(item["arg_0"] == 1)) self.assertTrue(all(item["arg_1"] == 2)) self.assertTrue(all(item["arg_2"] == 3)) self.assertTrue(all(item["A_col"] == "A")) self.assertTrue(all(item["B_col"] == "B"))