From 73d752080c345392768a4eb54a41dc3d72de4f2f Mon Sep 17 00:00:00 2001
From: Brian J Arnold <brianjohnarnold@gmail.com>
Date: Tue, 24 May 2022 15:10:06 -0400
Subject: [PATCH] fixed error in CI calculation, bumped version

---
 README.md               |  2 +-
 setup.py                |  2 +-
 src/decifer/__init__.py |  2 +-
 src/decifer/__main__.py | 35 +++++++++++++++++++----------------
 4 files changed, 22 insertions(+), 19 deletions(-)

diff --git a/README.md b/README.md
index ccd0bc4..909e81b 100644
--- a/README.md
+++ b/README.md
@@ -185,7 +185,7 @@ DeCiFer's main output file (ending with `_output.tsv`) corresponds to a single T
 
 For the column containing the `true_cluster_DCF`, the CIs correspond to the 95% credible interval of the posterior distribution of the DCF cluster center (Eqn 8 in manuscript and S23 in supplement) . These CIs have been corrected for multiple tests. Specifically, for each cluster, we find the lower CI by finding the X=[0.025/(number of hypothesis tests)] quantile, where the number of tests corresponds to (number of clusters)\*(number of samples for patient). The same procedure is used for the upper CI, by finding the quantile that corresponds to 1-X.
 
-These cluster CIs may also be found in the output file ending in `_cluster.CIs.tsv`. This file contains this information in a more condensed format, reporting only the upper and lower CIs for each cluster for each sample (in column `f_lb` and `f_ub` respectively).
+These cluster CIs may also be found in the output file ending in `_cluster.CIs.tsv`. This file contains this information in a more condensed format, reporting only the upper and lower CIs for each cluster for each sample (in column `f_lb` and `f_ub` respectively). These numbers may contain "NaN" if no mutations were assigned to that particular cluster.
 
 The file ending in `_model_selection.tsv` shows how decifer selected the best value of K clusters. 
 
diff --git a/setup.py b/setup.py
index e30491d..d673759 100644
--- a/setup.py
+++ b/setup.py
@@ -12,7 +12,7 @@
 
 setuptools.setup(
     name='decifer',
-    version='v2.1.1',
+    version='v2.1.2',
     python_requires='>=3.7.*',
     packages=['decifer'],
     package_dir={'': 'src'},
diff --git a/src/decifer/__init__.py b/src/decifer/__init__.py
index 73f37a5..472bc37 100644
--- a/src/decifer/__init__.py
+++ b/src/decifer/__init__.py
@@ -1 +1 @@
-__version__ = 'v2.1.1'
+__version__ = 'v2.1.2'
diff --git a/src/decifer/__main__.py b/src/decifer/__main__.py
index 31c1be6..09dde5c 100644
--- a/src/decifer/__main__.py
+++ b/src/decifer/__main__.py
@@ -280,22 +280,25 @@ def CI(job):
     mut = list(filter(lambda m : m.assigned_cluster == c, muts))
     
     num_pts = 10000
-    grid = [objective(j, mut, s, bb) for j in np.linspace(0, PURITY[s], num_pts)]
-    min_log = min(grid)
-    delta = (-1*min_log)-2      # constant to make -log(pdf) values less negative
-    prob = (lambda x: math.exp(-1*(x+delta)))           # convert -log(pdf) to unnormalized probability
-    total = sum([prob(x) for x in grid])                # unnormalized probabilities across support
-    pdf = [prob(x)/total for x in grid]                 # normalized probabilities across support
-    cdf = np.cumsum(pdf)
-
-    low_ci = 0.025/num_tests                            # divide the desired CI quantile by the number of tests, bonferonni correction
-    high_ci = 1 - low_ci
-
-    low_index = take_closest(cdf, low_ci)
-    high_index = take_closest(cdf, high_ci)
-
-    l = float(low_index)/num_pts
-    u = float(high_index)/num_pts
+    if len(mut) == 0:
+        l, u, pdf = "NaN", "Nan", ["Nan"]*num_pts
+    else:
+        grid = [objective(j, mut, s, bb) for j in np.linspace(0, PURITY[s], num_pts)]
+        min_log = min(grid)
+        delta = (-1*min_log)-2      # constant to make -log(pdf) values less negative
+        prob = (lambda x: math.exp(-1*(x+delta)))           # convert -log(pdf) to unnormalized probability
+        total = sum([prob(x) for x in grid])                # unnormalized probabilities across support
+        pdf = [prob(x)/total for x in grid]                 # normalized probabilities across support
+        cdf = np.cumsum(pdf)
+
+        low_ci = 0.025/num_tests                            # divide the desired CI quantile by the number of tests, bonferonni correction
+        high_ci = 1 - low_ci
+
+        low_index = take_closest(cdf, low_ci)
+        high_index = take_closest(cdf, high_ci)
+
+        l = float(low_index)/num_pts
+        u = float(high_index)/num_pts
 
     return (c, s, l, u, pdf)