Skip to content

Commit

Permalink
Add support for uplifting in the inspector.
Browse files Browse the repository at this point in the history
This allows exporting uplift models from YDF to TF

PiperOrigin-RevId: 579181978
  • Loading branch information
rstz authored and copybara-github committed Nov 3, 2023
1 parent c6fee4a commit 244576e
Show file tree
Hide file tree
Showing 6 changed files with 150 additions and 6 deletions.
42 changes: 38 additions & 4 deletions tensorflow_decision_forests/component/builder/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,10 @@ def _import_dataspec(self, src_dataspec: data_spec_pb2.DataSpecification):
if dst_col_idx == self._header.ranking_group_col_idx:
continue

if isinstance(self._objective, py_tree.objective.AbstractUpliftObjective):
if dst_col_idx == self._header.uplift_treatment_col_idx:
continue

if not created:
raise ValueError(
"import_dataspec was called after some of the model was build. "
Expand Down Expand Up @@ -511,7 +515,11 @@ def _initialize_header_column_idx(self):
label_column.name = self._objective.label
self._dataspec_column_index[label_column.name] = self._header.label_col_idx

if isinstance(self._objective, py_tree.objective.ClassificationObjective):
if isinstance(
self._objective, py_tree.objective.ClassificationObjective
) or isinstance(
self._objective, py_tree.objective.CategoricalUpliftObjective
):
label_column.type = ColumnType.CATEGORICAL

# One value is reserved for the non-used OOV item.
Expand All @@ -537,6 +545,7 @@ def _initialize_header_column_idx(self):
(
py_tree.objective.RegressionObjective,
py_tree.objective.RankingObjective,
py_tree.objective.NumericalUpliftObjective,
),
):
label_column.type = ColumnType.NUMERICAL
Expand All @@ -556,6 +565,18 @@ def _initialize_header_column_idx(self):
self._header.ranking_group_col_idx
)

if isinstance(self._objective, py_tree.objective.AbstractUpliftObjective):
assert len(self._dataspec.columns) == 1

# Create the "treatment" column for Uplifting.
self._header.uplift_treatment_col_idx = 1
treatment_column = self._dataspec.columns.add()
treatment_column.type = ColumnType.CATEGORICAL
treatment_column.name = self._objective.treatment
self._dataspec_column_index[treatment_column.name] = (
self._header.uplift_treatment_col_idx
)


@six.add_metaclass(abc.ABCMeta)
class AbstractDecisionForestBuilder(AbstractBuilder):
Expand Down Expand Up @@ -850,8 +871,11 @@ def check_leaf(self, node: py_tree.node.LeafNode):
"A regression objective requires leaf nodes with regressive values."
)

elif isinstance(self.objective, py_tree.objective.RankingObjective):
raise ValueError("Ranking objective not supported by this model")
elif isinstance(self.objective, py_tree.objective.AbstractUpliftObjective):
if not isinstance(node.value, py_tree.value.UpliftValue):
raise ValueError(
"An uplift objective requires leaf nodes with uplift values."
)

else:
raise NotImplementedError()
Expand Down Expand Up @@ -920,6 +944,11 @@ def __init__(
loss = gradient_boosted_trees_pb2.Loss.LAMBDA_MART_NDCG5
bias = [bias]

elif isinstance(objective, py_tree.objective.AbstractUpliftObjective):
raise ValueError(
"Uplift objective not supported by Gradient Boosted Tree models."
)

else:
raise NotImplementedError()

Expand Down Expand Up @@ -972,7 +1001,12 @@ def specialized_header_filename(self) -> str:
return self._file_prefix + inspector_lib.BASE_FILENAME_GBT_HEADER

def check_leaf(self, node: py_tree.node.LeafNode):
if not isinstance(node.value, py_tree.value.RegressionValue):
if isinstance(self.objective, py_tree.objective.AbstractUpliftObjective):
raise ValueError(
"Uplift objective not supported by Gradient Boosted Tree models."
)

elif not isinstance(node.value, py_tree.value.RegressionValue):
raise ValueError(
"A GBT model should only have leaf with regressive "
f"value. Got {node.value} instead."
Expand Down
16 changes: 16 additions & 0 deletions tensorflow_decision_forests/component/inspector/inspector.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,22 @@ def objective(self) -> py_tree.objective.AbstractObjective:
return py_tree.objective.RankingObjective(
label=label.name, group=group_column.name)

elif self.task == Task.CATEGORICAL_UPLIFT:
uplift_treatment = self._dataspec.columns[
self._header.uplift_treatment_col_idx
]
return py_tree.objective.CategoricalUpliftObjective(
label=label.name, treatment=uplift_treatment.name
)

elif self.task == Task.NUMERICAL_UPLIFT:
uplift_treatment = self._dataspec.columns[
self._header.uplift_treatment_col_idx
]
return py_tree.objective.NumericalUpliftObjective(
label=label.name, treatment=uplift_treatment.name
)

else:
raise NotImplementedError()

Expand Down
43 changes: 43 additions & 0 deletions tensorflow_decision_forests/component/py_tree/objective.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,3 +155,46 @@ def __eq__(self, other):
if not isinstance(other, RankingObjective):
return False
return self.label == other.label and self._group == other._group


class AbstractUpliftObjective(AbstractObjective):
"""Objective for Uplift."""

def __init__(self, label: str, treatment: str):
super(AbstractUpliftObjective, self).__init__(label)
self._treatment = treatment

@property
def treatment(self) -> str:
return self._treatment

def __eq__(self, other):
if not isinstance(other, AbstractUpliftObjective):
return False
return (
self.label == other.label
and self._treatment == other._treatment
and self.task == other.task
)


class CategoricalUpliftObjective(AbstractUpliftObjective):
"""Objective for Categorical Uplift."""

@property
def task(self) -> Task:
return Task.CATEGORICAL_UPLIFT

def __repr__(self):
return f"CategoricalUplift(label={self.label}, treatment={self._treatment})"


class NumericalUpliftObjective(AbstractUpliftObjective):
"""Objective for Numerical Uplift."""

@property
def task(self) -> Task:
return Task.NUMERICAL_UPLIFT

def __repr__(self):
return f"NumericalUplift(label={self.label}, treatment={self._treatment})"
21 changes: 21 additions & 0 deletions tensorflow_decision_forests/component/py_tree/objective_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,27 @@ def test_ranking(self):
objective = objective_lib.RankingObjective(label="label", group="group")
logging.info("objective: %s", objective)

def test_numerical_uplift(self):
objective = objective_lib.NumericalUpliftObjective(
label="label", treatment="treatment"
)
logging.info("objective: %s", objective)

def test_categorical_uplift(self):
objective = objective_lib.CategoricalUpliftObjective(
label="label", treatment="treatment"
)
logging.info("objective: %s", objective)

def test_uplift_objects_are_not_equal(self):
numerical_objective = objective_lib.NumericalUpliftObjective(
label="label", treatment="treatment"
)
categorical_objective = objective_lib.CategoricalUpliftObjective(
label="label", treatment="treatment"
)
self.assertNotEqual(numerical_objective, categorical_objective)


if __name__ == "__main__":
tf.test.main()
8 changes: 7 additions & 1 deletion tensorflow_decision_forests/keras/core_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -1239,7 +1239,6 @@ def yggdrasil_model_to_keras_model(
"file containing among other things, a data_spec.pb file."
)

temp_directory = None
if src_container == "zip":
# Unzip the model in a temporary directory
temp_directory = tempfile.TemporaryDirectory()
Expand All @@ -1255,6 +1254,13 @@ def yggdrasil_model_to_keras_model(
ranking_group=objective.group
if objective.task == inspector_lib.Task.RANKING
else None,
uplift_treatment=objective.treatment
if objective.task
in (
inspector_lib.Task.CATEGORICAL_UPLIFT,
inspector_lib.Task.NUMERICAL_UPLIFT,
)
else None,
verbose=verbose,
advanced_arguments=AdvancedArguments(
disable_categorical_integer_offset_correction=disable_categorical_integer_offset_correction,
Expand Down
26 changes: 25 additions & 1 deletion tensorflow_decision_forests/keras/keras_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2293,7 +2293,9 @@ def test_golden_model_gbt(self):
("adult_binary_class_gbdt", 0.012131),
("prefixed_adult_binary_class_gbdt", 0.012131),
)
def test_ydf_to_keras_model(self, ydf_model_directory, expected_prediction):
def test_ydf_to_keras_model_adult(
self, ydf_model_directory, expected_prediction
):
ygg_model_path = os.path.join(
ydf_test_data_path(), "model", ydf_model_directory
)
Expand Down Expand Up @@ -2328,6 +2330,28 @@ def custom_model_input_signature(
)
self.assertNear(prediction[0, 0], expected_prediction, 0.00001)

def test_ydf_to_keras_model_uplift(self):
ygg_model_path = os.path.join(
ydf_test_data_path(), "model", "sim_pte_categorical_uplift_rf"
)
tfdf_model_path = os.path.join(tmp_path(), "sim_pte_categorical_uplift_rf")

dataset_directory = os.path.join(ydf_test_data_path(), "dataset")
test_path = os.path.join(dataset_directory, "sim_pte_test.csv")
test_df = pd.read_csv(test_path)

outcome_key = "y"
treatment_group = "treat"
# Remove the treatment group from the test dataset.
test_df = test_df.drop(treatment_group, axis=1)

core.yggdrasil_model_to_keras_model(ygg_model_path, tfdf_model_path)
loaded_model = models.load_model(tfdf_model_path)
prediction = loaded_model.predict(
keras.pd_dataframe_to_tf_dataset(test_df, label=outcome_key)
)
self.assertNear(prediction[0, 0], -0.7580058, 0.00001)

@parameterized.parameters(
"directory",
"zip",
Expand Down

0 comments on commit 244576e

Please sign in to comment.