diff --git a/src/pCRscore/pipeline.py b/src/pCRscore/pipeline.py index 6781cb8..d873bfd 100644 --- a/src/pCRscore/pipeline.py +++ b/src/pCRscore/pipeline.py @@ -30,7 +30,7 @@ def combine_fractions_shap(data_norm, shap): # Function to fit a line to SHAP vs Fraction for each cell type -def fit_line(data): +def fit_line(data, split_ci=False): result = [] grouped = data.groupby('Feature') for name, group in grouped: @@ -39,7 +39,14 @@ def fit_line(data): model = statsmodels.api.OLS(y, X).fit() coef = model.params['Fraction'] ci = model.conf_int(alpha=0.001).loc['Fraction'] - result.append({'Feature': name, 'Coef': coef, 'CI': ci[0] * ci[1]}) + if split_ci: + result.append( + {'Feature': name, 'Coef': coef, 'LI': ci[0], 'HI': ci[1]} + ) + else: + result.append( + {'Feature': name, 'Coef': coef, 'CI': ci[0] * ci[1]} + ) return pandas.DataFrame(result) diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index 86ca169..e6cd4f1 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -133,3 +133,26 @@ def test_combine_discovery_validation(): assert 'SHAP value' in all_pat.columns if local: all_pat.shape == (15888, 3) + + +# A new fitting is performed on discovery and validation data +fit = pipeline.fit_line(all_pat, split_ci=True) + + +def test_fit(): + assert fit.shape[0] == len(all_pat['Feature'].unique()) + assert 'Feature' in fit.columns + assert 'Coef' in fit.columns + assert 'LI' in fit.columns + assert 'HI' in fit.columns + if local: + line = fit['Feature'] == 'CAFs.myCAF.like' + assert math.isclose( + fit.loc[line, 'Coef'].values[0], -0.028020, rel_tol=1e-2 + ) + assert math.isclose( + fit.loc[line, 'LI'].values[0], -0.041878, rel_tol=1e-2 + ) + assert math.isclose( + fit.loc[line, 'HI'].values[0], -0.014161, rel_tol=1e-2 + )