diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index 8ab1d2e..d93acdd 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -1,11 +1,64 @@ from pCRscore import pipeline import pandas import math +import os +import numpy + + +def mock_data(shap=False): + columns = [ + 'Unnamed: 0', 'Trial', 'Mixture', 'B.cells.Memory', + 'B.cells.Naive', 'CAFs.MSC.iCAF.like', 'CAFs.myCAF.like', 'DCs', + 'Endothelial.ACKR1', 'Endothelial.CXCL12', 'Endothelial.LYVE1', + 'Endothelial.RGS5', 'GenMod1', 'GenMod2', 'GenMod3', 'GenMod4', + 'GenMod5', 'GenMod6', 'GenMod7', 'Luminal.Progenitors', + 'Macrophage', 'Mature.Luminal', 'Monocyte', 'Myoepithelial', + 'NK.cells', 'NKT.cells', 'Plasmablasts', 'PVL.Differentiated', + 'PVL.Immature', 'T.cells.CD4.', 'T.cells.CD8.', 'Cancer.Cells', + 'Normal.Epi', 'TCells', 'Myeloids', 'BCells', 'CAFs', 'PVLs', + 'Endothelials', 'ER', 'Response', 'Cohort', 'PAM50_Basal', + 'PAM50_Her2', 'PAM50_LumA', 'PAM50_LumB', 'PAM50_Normal' + ] + num_rows = 1009 + if shap: + n_floats = 44 + else: + n_floats = 36 + df = pandas.DataFrame(columns=columns, index=numpy.arange(num_rows)) + df['Unnamed: 0'] = 0 + df['Trial'] = 'GSE22093' + df['Mixture'] = 'GSM549230' + for i in range(n_floats): + df[columns[i + 3]] = numpy.random.uniform(0, 1, num_rows) + if not shap: + df['ER'] = numpy.random.choice([0, 1], num_rows) + df['Response'] = numpy.random.choice([0, 1], num_rows) + df['Cohort'] = 'Discovery' + if not shap: + df['PAM50_Basal'] = numpy.random.choice([True, False], num_rows) + df['PAM50_Her2'] = numpy.random.choice([True, False], num_rows) + df['PAM50_LumA'] = numpy.random.choice([True, False], num_rows) + df['PAM50_LumB'] = numpy.random.choice([True, False], num_rows) + df['PAM50_Normal'] = numpy.random.choice([True, False], num_rows) + # Dropping columns not present in SHAP data + if shap: + df = df.drop(columns=['Trial', 'Mixture', 'Response', 'Cohort']) + return df + # Import DiscoveryData.csv and drop invalid columns -# FIXME: replace local data with mock data (after finishing the pipeline) -data_raw = pandas.read_csv(".meta/DiscoveryData.csv") -shap_raw = pandas.read_csv(".meta/DiscoverySHAP.csv") +if os.path.exists(".meta"): + # Running locally + local = True + data_raw = pandas.read_csv(".meta/DiscoveryData.csv") + shap_raw = pandas.read_csv(".meta/DiscoverySHAP.csv") +else: + # On GitHub Actions + local = False + data_raw = mock_data() + shap_raw = mock_data(shap=True) + + data = pipeline.drop_non_float(data_raw) shap = pipeline.drop_non_float(shap_raw, extra_cols=range(36, 42)) @@ -22,12 +75,17 @@ def test_drop_non_float_and_unnamed(): # Test def test_normalize_data(): assert data_norm.shape == data.shape - tol = 1e-3 - assert math.isclose(data['B.cells.Memory'].iloc[0], 0.01789, rel_tol=tol) - assert math.isclose( - data_norm['B.cells.Memory'].iloc[0], 0.34186, rel_tol=tol - ) - assert math.isclose(shap['Endothelials'].iloc[0], 0.037792, rel_tol=tol) + if local: + tol = 1e-3 + assert math.isclose( + data['B.cells.Memory'].iloc[0], 0.01789, rel_tol=tol + ) + assert math.isclose( + data_norm['B.cells.Memory'].iloc[0], 0.34186, rel_tol=tol + ) + assert math.isclose( + shap['Endothelials'].iloc[0], 0.037792, rel_tol=tol + ) # Combine data and SHAP values