From 2562abf1116725ede3d0c00234d7f0fcd81d0a95 Mon Sep 17 00:00:00 2001
From: Keith Battocchi <kebatt@microsoft.com>
Date: Thu, 23 Jan 2020 18:47:16 -0500
Subject: [PATCH] Add propensity clipping to DRLearner

---
 econml/drlearner.py            | 22 +++++++++++++++++++++-
 econml/tests/test_drlearner.py | 11 +++++++++++
 2 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/econml/drlearner.py b/econml/drlearner.py
index e30a0f416..0abba2890 100644
--- a/econml/drlearner.py
+++ b/econml/drlearner.py
@@ -123,6 +123,9 @@ class takes as input the parameter ``model_regressor``, which is an arbitrary sc
         It is ignored if X is None. The final CATE will be trained on the outcome of featurizer.fit_transform(X).
         If featurizer=None, then CATE is trained on X.
 
+    min_propensity : float, optional, default ``1e-6``
+        The minimum propensity at which to clip propensity estimates to avoid dividing by zero.
+
     n_splits: int, cross-validation generator or an iterable, optional (default is 2)
         Determines the cross-validation splitting strategy.
         Possible inputs for cv are:
@@ -246,6 +249,7 @@ def __init__(self, model_propensity=LogisticRegressionCV(cv=3, solver='lbfgs', m
                  model_final=StatsModelsLinearRegression(),
                  multitask_model_final=False,
                  featurizer=None,
+                 min_propensity=1e-6,
                  n_splits=2,
                  random_state=None):
         class ModelNuisance:
@@ -273,7 +277,7 @@ def fit(self, Y, T, X=None, W=None, *, sample_weight=None):
 
             def predict(self, Y, T, X=None, W=None, *, sample_weight=None):
                 XW = self._combine(X, W)
-                propensities = self._model_propensity.predict_proba(XW)
+                propensities = np.maximum(self._model_propensity.predict_proba(XW), min_propensity)
                 n = T.shape[0]
                 Y_pred = np.zeros((T.shape[0], T.shape[1] + 1))
                 T_counter = np.zeros(T.shape)
@@ -556,6 +560,10 @@ class LinearDRLearner(StatsModelsCateEstimatorDiscreteMixin, DRLearner):
     fit_cate_intercept : bool, optional, default True
         Whether the linear CATE model should have a constant term.
 
+
+    min_propensity : float, optional, default ``1e-6``
+        The minimum propensity at which to clip propensity estimates to avoid dividing by zero.
+
     n_splits: int, cross-validation generator or an iterable, optional (default is 2)
         Determines the cross-validation splitting strategy.
         Possible inputs for cv are:
@@ -628,12 +636,14 @@ def __init__(self,
                  model_regression=WeightedLassoCVWrapper(cv=3),
                  featurizer=None,
                  fit_cate_intercept=True,
+                 min_propensity=1e-6,
                  n_splits=2, random_state=None):
         super().__init__(model_propensity=model_propensity,
                          model_regression=model_regression,
                          model_final=StatsModelsLinearRegression(fit_intercept=fit_cate_intercept),
                          featurizer=featurizer,
                          multitask_model_final=False,
+                         min_propensity=min_propensity,
                          n_splits=n_splits,
                          random_state=random_state)
 
@@ -746,6 +756,9 @@ class SparseLinearDRLearner(DebiasedLassoCateEstimatorDiscreteMixin, DRLearner):
         dual gap for optimality and continues until it is smaller
         than ``tol``.
 
+    min_propensity : float, optional, default ``1e-6``
+        The minimum propensity at which to clip propensity estimates to avoid dividing by zero.
+
     n_splits: int, cross-validation generator or an iterable, optional, default 2
         Determines the cross-validation splitting strategy.
         Possible inputs for cv are:
@@ -822,6 +835,7 @@ def __init__(self,
                  alpha='auto',
                  max_iter=1000,
                  tol=1e-4,
+                 min_propensity=1e-6,
                  n_splits=2, random_state=None):
         model_final = DebiasedLasso(
             alpha=alpha,
@@ -833,6 +847,7 @@ def __init__(self,
                          model_final=model_final,
                          featurizer=featurizer,
                          multitask_model_final=False,
+                         min_propensity=min_propensity,
                          n_splits=n_splits,
                          random_state=random_state)
 
@@ -906,6 +921,9 @@ class ForestDRLearner(DRLearner):
         `predict` methods. If different models per treatment arm are desired, see the
         :class:`~econml.utilities.MultiModelWrapper` helper class.
 
+    min_propensity : float, optional, default ``1e-6``
+        The minimum propensity at which to clip propensity estimates to avoid dividing by zero.
+
     n_crossfit_splits: int, cross-validation generator or an iterable, optional (Default=2)
         Determines the cross-validation splitting strategy.
         Possible inputs for cv are:
@@ -1039,6 +1057,7 @@ class ForestDRLearner(DRLearner):
 
     def __init__(self,
                  model_regression, model_propensity,
+                 min_propensity=1e-6,
                  n_crossfit_splits=2,
                  n_estimators=1000,
                  criterion="mse",
@@ -1071,6 +1090,7 @@ def __init__(self,
         super().__init__(model_regression=model_regression, model_propensity=model_propensity,
                          model_final=model_final, featurizer=None,
                          multitask_model_final=False,
+                         min_propensity=min_propensity,
                          n_splits=n_crossfit_splits, random_state=random_state)
 
     def _get_inference_options(self):
diff --git a/econml/tests/test_drlearner.py b/econml/tests/test_drlearner.py
index 97fe1f2e5..72fd973cf 100644
--- a/econml/tests/test_drlearner.py
+++ b/econml/tests/test_drlearner.py
@@ -564,6 +564,17 @@ def test_sparse(self):
         # Check that a majority of true effects lie in the 5-95% CI
         self.assertTrue(in_CI.mean() > 0.8)
 
+    def test_drlearner_clipping(self):
+        X = np.linspace(0, 1, 200).reshape(-1, 1)
+        T = np.random.binomial(1, X)
+        Y = np.random.normal(size=T.shape)
+        X[0] = -1000  # one split will have only X values between 0 and 1,
+        # so the predicted propensity for this point will be extremely low
+        learner = DRLearner()
+        learner.fit(Y, T, X)
+        effect = learner.const_marginal_effect(np.array([[0.5]]))
+        assert not(np.any(np.isnan(effect)))
+
     def _test_te(self, learner_instance, tol, te_type="const"):
         if te_type not in ["const", "heterogeneous"]:
             raise ValueError("Type of treatment effect must be 'const' or 'heterogeneous'.")