Skip to content

Commit

Permalink
Merge branch 'issue-1' into develop
Browse files Browse the repository at this point in the history
* issue-1:
  Fixed flakes
  Generate mock data for GH actions
  • Loading branch information
wleoncio committed Oct 2, 2024
2 parents 6f14deb + 158cc70 commit 80a848d
Showing 1 changed file with 67 additions and 9 deletions.
76 changes: 67 additions & 9 deletions tests/test_pipeline.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,64 @@
from pCRscore import pipeline
import pandas
import math
import os
import numpy


def mock_data(shap=False):
columns = [
'Unnamed: 0', 'Trial', 'Mixture', 'B.cells.Memory',
'B.cells.Naive', 'CAFs.MSC.iCAF.like', 'CAFs.myCAF.like', 'DCs',
'Endothelial.ACKR1', 'Endothelial.CXCL12', 'Endothelial.LYVE1',
'Endothelial.RGS5', 'GenMod1', 'GenMod2', 'GenMod3', 'GenMod4',
'GenMod5', 'GenMod6', 'GenMod7', 'Luminal.Progenitors',
'Macrophage', 'Mature.Luminal', 'Monocyte', 'Myoepithelial',
'NK.cells', 'NKT.cells', 'Plasmablasts', 'PVL.Differentiated',
'PVL.Immature', 'T.cells.CD4.', 'T.cells.CD8.', 'Cancer.Cells',
'Normal.Epi', 'TCells', 'Myeloids', 'BCells', 'CAFs', 'PVLs',
'Endothelials', 'ER', 'Response', 'Cohort', 'PAM50_Basal',
'PAM50_Her2', 'PAM50_LumA', 'PAM50_LumB', 'PAM50_Normal'
]
num_rows = 1009
if shap:
n_floats = 44
else:
n_floats = 36
df = pandas.DataFrame(columns=columns, index=numpy.arange(num_rows))
df['Unnamed: 0'] = 0
df['Trial'] = 'GSE22093'
df['Mixture'] = 'GSM549230'
for i in range(n_floats):
df[columns[i + 3]] = numpy.random.uniform(0, 1, num_rows)
if not shap:
df['ER'] = numpy.random.choice([0, 1], num_rows)
df['Response'] = numpy.random.choice([0, 1], num_rows)
df['Cohort'] = 'Discovery'
if not shap:
df['PAM50_Basal'] = numpy.random.choice([True, False], num_rows)
df['PAM50_Her2'] = numpy.random.choice([True, False], num_rows)
df['PAM50_LumA'] = numpy.random.choice([True, False], num_rows)
df['PAM50_LumB'] = numpy.random.choice([True, False], num_rows)
df['PAM50_Normal'] = numpy.random.choice([True, False], num_rows)
# Dropping columns not present in SHAP data
if shap:
df = df.drop(columns=['Trial', 'Mixture', 'Response', 'Cohort'])
return df


# Import DiscoveryData.csv and drop invalid columns
# FIXME: replace local data with mock data (after finishing the pipeline)
data_raw = pandas.read_csv(".meta/DiscoveryData.csv")
shap_raw = pandas.read_csv(".meta/DiscoverySHAP.csv")
if os.path.exists(".meta"):
# Running locally
local = True
data_raw = pandas.read_csv(".meta/DiscoveryData.csv")
shap_raw = pandas.read_csv(".meta/DiscoverySHAP.csv")
else:
# On GitHub Actions
local = False
data_raw = mock_data()
shap_raw = mock_data(shap=True)


data = pipeline.drop_non_float(data_raw)
shap = pipeline.drop_non_float(shap_raw, extra_cols=range(36, 42))

Expand All @@ -22,12 +75,17 @@ def test_drop_non_float_and_unnamed():
# Test
def test_normalize_data():
assert data_norm.shape == data.shape
tol = 1e-3
assert math.isclose(data['B.cells.Memory'].iloc[0], 0.01789, rel_tol=tol)
assert math.isclose(
data_norm['B.cells.Memory'].iloc[0], 0.34186, rel_tol=tol
)
assert math.isclose(shap['Endothelials'].iloc[0], 0.037792, rel_tol=tol)
if local:
tol = 1e-3
assert math.isclose(
data['B.cells.Memory'].iloc[0], 0.01789, rel_tol=tol
)
assert math.isclose(
data_norm['B.cells.Memory'].iloc[0], 0.34186, rel_tol=tol
)
assert math.isclose(
shap['Endothelials'].iloc[0], 0.037792, rel_tol=tol
)


# Combine data and SHAP values
Expand Down

0 comments on commit 80a848d

Please sign in to comment.