Skip to content

Commit

Permalink
Add propensity clipping to DRLearner
Browse files Browse the repository at this point in the history
  • Loading branch information
kbattocchi committed Feb 7, 2020
1 parent 515ba08 commit 2562abf
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 1 deletion.
22 changes: 21 additions & 1 deletion econml/drlearner.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,9 @@ class takes as input the parameter ``model_regressor``, which is an arbitrary sc
It is ignored if X is None. The final CATE will be trained on the outcome of featurizer.fit_transform(X).
If featurizer=None, then CATE is trained on X.
min_propensity : float, optional, default ``1e-6``
The minimum propensity at which to clip propensity estimates to avoid dividing by zero.
n_splits: int, cross-validation generator or an iterable, optional (default is 2)
Determines the cross-validation splitting strategy.
Possible inputs for cv are:
Expand Down Expand Up @@ -246,6 +249,7 @@ def __init__(self, model_propensity=LogisticRegressionCV(cv=3, solver='lbfgs', m
model_final=StatsModelsLinearRegression(),
multitask_model_final=False,
featurizer=None,
min_propensity=1e-6,
n_splits=2,
random_state=None):
class ModelNuisance:
Expand Down Expand Up @@ -273,7 +277,7 @@ def fit(self, Y, T, X=None, W=None, *, sample_weight=None):

def predict(self, Y, T, X=None, W=None, *, sample_weight=None):
XW = self._combine(X, W)
propensities = self._model_propensity.predict_proba(XW)
propensities = np.maximum(self._model_propensity.predict_proba(XW), min_propensity)
n = T.shape[0]
Y_pred = np.zeros((T.shape[0], T.shape[1] + 1))
T_counter = np.zeros(T.shape)
Expand Down Expand Up @@ -556,6 +560,10 @@ class LinearDRLearner(StatsModelsCateEstimatorDiscreteMixin, DRLearner):
fit_cate_intercept : bool, optional, default True
Whether the linear CATE model should have a constant term.
min_propensity : float, optional, default ``1e-6``
The minimum propensity at which to clip propensity estimates to avoid dividing by zero.
n_splits: int, cross-validation generator or an iterable, optional (default is 2)
Determines the cross-validation splitting strategy.
Possible inputs for cv are:
Expand Down Expand Up @@ -628,12 +636,14 @@ def __init__(self,
model_regression=WeightedLassoCVWrapper(cv=3),
featurizer=None,
fit_cate_intercept=True,
min_propensity=1e-6,
n_splits=2, random_state=None):
super().__init__(model_propensity=model_propensity,
model_regression=model_regression,
model_final=StatsModelsLinearRegression(fit_intercept=fit_cate_intercept),
featurizer=featurizer,
multitask_model_final=False,
min_propensity=min_propensity,
n_splits=n_splits,
random_state=random_state)

Expand Down Expand Up @@ -746,6 +756,9 @@ class SparseLinearDRLearner(DebiasedLassoCateEstimatorDiscreteMixin, DRLearner):
dual gap for optimality and continues until it is smaller
than ``tol``.
min_propensity : float, optional, default ``1e-6``
The minimum propensity at which to clip propensity estimates to avoid dividing by zero.
n_splits: int, cross-validation generator or an iterable, optional, default 2
Determines the cross-validation splitting strategy.
Possible inputs for cv are:
Expand Down Expand Up @@ -822,6 +835,7 @@ def __init__(self,
alpha='auto',
max_iter=1000,
tol=1e-4,
min_propensity=1e-6,
n_splits=2, random_state=None):
model_final = DebiasedLasso(
alpha=alpha,
Expand All @@ -833,6 +847,7 @@ def __init__(self,
model_final=model_final,
featurizer=featurizer,
multitask_model_final=False,
min_propensity=min_propensity,
n_splits=n_splits,
random_state=random_state)

Expand Down Expand Up @@ -906,6 +921,9 @@ class ForestDRLearner(DRLearner):
`predict` methods. If different models per treatment arm are desired, see the
:class:`~econml.utilities.MultiModelWrapper` helper class.
min_propensity : float, optional, default ``1e-6``
The minimum propensity at which to clip propensity estimates to avoid dividing by zero.
n_crossfit_splits: int, cross-validation generator or an iterable, optional (Default=2)
Determines the cross-validation splitting strategy.
Possible inputs for cv are:
Expand Down Expand Up @@ -1039,6 +1057,7 @@ class ForestDRLearner(DRLearner):

def __init__(self,
model_regression, model_propensity,
min_propensity=1e-6,
n_crossfit_splits=2,
n_estimators=1000,
criterion="mse",
Expand Down Expand Up @@ -1071,6 +1090,7 @@ def __init__(self,
super().__init__(model_regression=model_regression, model_propensity=model_propensity,
model_final=model_final, featurizer=None,
multitask_model_final=False,
min_propensity=min_propensity,
n_splits=n_crossfit_splits, random_state=random_state)

def _get_inference_options(self):
Expand Down
11 changes: 11 additions & 0 deletions econml/tests/test_drlearner.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,17 @@ def test_sparse(self):
# Check that a majority of true effects lie in the 5-95% CI
self.assertTrue(in_CI.mean() > 0.8)

def test_drlearner_clipping(self):
X = np.linspace(0, 1, 200).reshape(-1, 1)
T = np.random.binomial(1, X)
Y = np.random.normal(size=T.shape)
X[0] = -1000 # one split will have only X values between 0 and 1,
# so the predicted propensity for this point will be extremely low
learner = DRLearner()
learner.fit(Y, T, X)
effect = learner.const_marginal_effect(np.array([[0.5]]))
assert not(np.any(np.isnan(effect)))

def _test_te(self, learner_instance, tol, te_type="const"):
if te_type not in ["const", "heterogeneous"]:
raise ValueError("Type of treatment effect must be 'const' or 'heterogeneous'.")
Expand Down

0 comments on commit 2562abf

Please sign in to comment.