Merge pull request #281 from surajiyer/patch-1

Update filters.py
uber · Jan 20, 2021 · bf187ab · bf187ab
2 parents 2971e77 + 7c836c4
commit bf187ab
Showing 1 changed file with 14 additions and 2 deletions.
diff --git a/causalml/feature_selection/filters.py b/causalml/feature_selection/filters.py
@@ -157,14 +157,21 @@ def filter_LR(self, data, treatment_indicator, features, y_name, disp=True):
     @staticmethod
     def _GetNodeSummary(data,
                         experiment_group_column='treatment_group_key', 
-                        y_name='conversion'):
+                        y_name='conversion', smooth=True):
         """
         To count the conversions and get the probabilities by treatment groups. This function comes from the uplift tree algorithm, that is used for tree node split evaluation.
 
         Parameters
         ----------
         data : DataFrame
             The DataFrame that contains all the data (in the current "node").  
+        experiment_group_column : str
+            Treatment indicator column name.
+        y_name : str
+            Label indicator column name.
+        smooth : bool
+            Smooth label count by adding 1 in case certain labels do not occur
+            naturally with a treatment. Prevents zero divisions.
 
         Returns
         -------
@@ -191,7 +198,12 @@ def _GetNodeSummary(data,
         for ti in treatment_group_keys: 
             results.update({ti: {}}) 
             for ci in y_name_keys:
-                results[ti].update({ci: results_series[ti, ci]}) 
+                if smooth:
+                    results[ti].update({ci: results_series[ti, ci]
+                                        if results_series.index.isin([(ti, ci)]).any()
+                                        else 1})
+                else:
+                    results[ti].update({ci: results_series[ti, ci]})
 
         # Probability of conversion and group size by treatment group
         nodeSummary = {}