From 225171aa441bc90874f634d6fff6f8f771f42016 Mon Sep 17 00:00:00 2001 From: Mathieu Guillame-Bert Date: Tue, 14 May 2024 01:28:29 -0700 Subject: [PATCH 01/30] Fix error when plotting oblique trees in colab. PiperOrigin-RevId: 633479472 --- yggdrasil_decision_forests/port/python/CHANGELOG.md | 6 ++++++ .../port/python/ydf/learner/learner_test.py | 13 +++++++++++++ .../port/python/ydf/model/tree/condition.py | 2 +- 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/yggdrasil_decision_forests/port/python/CHANGELOG.md b/yggdrasil_decision_forests/port/python/CHANGELOG.md index 3bb912f3..98ad761a 100644 --- a/yggdrasil_decision_forests/port/python/CHANGELOG.md +++ b/yggdrasil_decision_forests/port/python/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## HEAD + +### Fix + +- Fix error when plotting oblique trees (`model.plot_tree`) in colab. + ## 0.4.3- 2024-05-07 ### Feature diff --git a/yggdrasil_decision_forests/port/python/ydf/learner/learner_test.py b/yggdrasil_decision_forests/port/python/ydf/learner/learner_test.py index fd25a94e..5ab0e156 100644 --- a/yggdrasil_decision_forests/port/python/ydf/learner/learner_test.py +++ b/yggdrasil_decision_forests/port/python/ydf/learner/learner_test.py @@ -653,6 +653,8 @@ def test_adult_sparse_oblique(self): sparse_oblique_weights="CONTINUOUS", ) model = learner.train(self.adult.train) + assert isinstance(model, decision_forest_model.DecisionForestModel) + model.plot_tree().html() logging.info("Trained model: %s", model) def test_adult_mhld_oblique(self): @@ -799,6 +801,17 @@ def test_ranking_path(self): self.assertGreaterEqual(evaluation.ndcg, 0.70) self.assertLessEqual(evaluation.ndcg, 0.74) + def test_adult_sparse_oblique(self): + learner = specialized_learners.GradientBoostedTreesLearner( + label="income", + num_trees=5, + split_axis="SPARSE_OBLIQUE", + ) + model = learner.train(self.adult.train) + assert isinstance(model, decision_forest_model.DecisionForestModel) + model.plot_tree().html() + logging.info("Trained model: %s", model) + def test_adult_num_threads(self): learner = specialized_learners.GradientBoostedTreesLearner( label="income", num_threads=12, num_trees=50 diff --git a/yggdrasil_decision_forests/port/python/ydf/model/tree/condition.py b/yggdrasil_decision_forests/port/python/ydf/model/tree/condition.py index 709530be..7d3d1e3b 100644 --- a/yggdrasil_decision_forests/port/python/ydf/model/tree/condition.py +++ b/yggdrasil_decision_forests/port/python/ydf/model/tree/condition.py @@ -377,7 +377,7 @@ def _to_json_numerical_sparse_oblique( return { "type": "NUMERICAL_SPARSE_OBLIQUE", "attributes": [dataspec.columns[f].name for f in condition.attributes], - "weights": condition.weights, + "weights": list(condition.weights), "threshold": condition.threshold, } From 005177a2798893628b89069151dd5d40737ca761 Mon Sep 17 00:00:00 2001 From: Mathieu Guillame-Bert Date: Tue, 14 May 2024 02:35:58 -0700 Subject: [PATCH 02/30] Add `max_depth` argument to `model.print_tree`. PiperOrigin-RevId: 633499732 --- yggdrasil_decision_forests/port/python/CHANGELOG.md | 4 ++++ .../model/decision_forest_model/decision_forest_model.py | 9 +++++++-- .../decision_forest_model/decision_forest_model_test.py | 8 ++++++++ 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/yggdrasil_decision_forests/port/python/CHANGELOG.md b/yggdrasil_decision_forests/port/python/CHANGELOG.md index 98ad761a..c81bd9b8 100644 --- a/yggdrasil_decision_forests/port/python/CHANGELOG.md +++ b/yggdrasil_decision_forests/port/python/CHANGELOG.md @@ -2,6 +2,10 @@ ## HEAD +### Feature + +- Add `max_depth` argument to `model.print_tree`. + ### Fix - Fix error when plotting oblique trees (`model.plot_tree`) in colab. diff --git a/yggdrasil_decision_forests/port/python/ydf/model/decision_forest_model/decision_forest_model.py b/yggdrasil_decision_forests/port/python/ydf/model/decision_forest_model/decision_forest_model.py index 0d08e37d..294c2c24 100644 --- a/yggdrasil_decision_forests/port/python/ydf/model/decision_forest_model/decision_forest_model.py +++ b/yggdrasil_decision_forests/port/python/ydf/model/decision_forest_model/decision_forest_model.py @@ -57,7 +57,9 @@ def iter_trees(self) -> Iterator[tree_lib.Tree]: return (self.get_tree(tree_idx) for tree_idx in range(self.num_trees())) - def print_tree(self, tree_idx: int = 0, file=sys.stdout) -> None: + def print_tree( + self, tree_idx: int = 0, max_depth: Optional[int] = 6, file=sys.stdout + ) -> None: """Prints a tree in the terminal. Usage example: @@ -78,11 +80,14 @@ def print_tree(self, tree_idx: int = 0, file=sys.stdout) -> None: Args: tree_idx: Index of the tree. Should be in [0, self.num_trees()). + max_depth: Maximum tree depth of the plot. Set to None for full depth. file: Where to print the tree. By default, prints on the terminal standard output. """ - file.write(self.get_tree(tree_idx).pretty(self.data_spec())) + file.write( + self.get_tree(tree_idx).pretty(self.data_spec(), max_depth=max_depth) + ) def plot_tree( self, diff --git a/yggdrasil_decision_forests/port/python/ydf/model/decision_forest_model/decision_forest_model_test.py b/yggdrasil_decision_forests/port/python/ydf/model/decision_forest_model/decision_forest_model_test.py index 6d747570..ed51ef22 100644 --- a/yggdrasil_decision_forests/port/python/ydf/model/decision_forest_model/decision_forest_model_test.py +++ b/yggdrasil_decision_forests/port/python/ydf/model/decision_forest_model/decision_forest_model_test.py @@ -69,6 +69,14 @@ def test_predict_leaves(self): ) self.assertTrue(np.all(leaves >= 0)) + def test_print_api(self): + self.adult_binary_class_gbdt.print_tree() + self.adult_binary_class_gbdt.print_tree(tree_idx=0, max_depth=None) + + def test_plot_api(self): + self.adult_binary_class_gbdt.plot_tree().html() + self.adult_binary_class_gbdt.plot_tree(tree_idx=0, max_depth=None).html() + @parameterized.parameters(x for x in generic_model.NodeFormat) def test_node_format(self, node_format: generic_model.NodeFormat): """Test that the node format is saved correctly.""" From 7981aacb22a113e6d82a42a217b8c9e535a7d888 Mon Sep 17 00:00:00 2001 From: Mathieu Guillame-Bert Date: Tue, 21 May 2024 08:41:37 -0700 Subject: [PATCH 03/30] Documentation of the Distribute tool. PiperOrigin-RevId: 635819063 --- .../utils/distribute/README.md | 175 ++++++++++++++++++ 1 file changed, 175 insertions(+) create mode 100644 yggdrasil_decision_forests/utils/distribute/README.md diff --git a/yggdrasil_decision_forests/utils/distribute/README.md b/yggdrasil_decision_forests/utils/distribute/README.md new file mode 100644 index 00000000..05ab6502 --- /dev/null +++ b/yggdrasil_decision_forests/utils/distribute/README.md @@ -0,0 +1,175 @@ +# Distribute + +**Distribute** is an open-source C++ library that allows for the implementation +of distributed algorithms in systems such as Borg and Cloud. Distribute provides +a small number of low-level primitives, granting developers maximum flexibility. + +Distribute is used for all the distributed computation of YDF, including +hyper-parameter tuning, benchmarks, and distributed training. + +## Features + +- Logic control in case of worker preemption or failure. +- Extensive testing on pipelines with hundreds of workers over multiple days. +- Cross-worker communication. +- Generic workers can be used in multiple different pipelines at the same + time. +- Multiple available backends (Borg workers, TensorFlow Distribute, + Multi-threads). Multi-threads backend is particularly useful during + development. + +## Computation model + +**Initialization** + +- Multiple machines execute a manager and N worker processes. Each worker is + assigned to an integer id in [0, N). Each worker knows its id. +- The pipeline is initialized by the manager with a "welcome" data blob (e.g. + a proto). The welcome blob cannot be modified. +- The workers are initialized with this welcome blob of data. After a worker + is preempted and restarted, it is re-initialized with this same welcome blob + of data. + +**Computation** + +- Computation is triggered by "queries". +- A query is created by the manager or a worker and contains a blob of data + called the query data (typically a proto). +- The query is executed by a worker who replies with a blob of data (called + the answer data) and an absl::Status. +- Queries can be issued synchronously (blocking) or asynchronously + (non-blocking). +- Queries can be sent globally (i.e., any available worker can execute them) + or to specific workers (identified by the worker id). +- A worker can emit queries while executing a query. This is useful for + cascade execution. +- The number of queries processed by each worker in parallel is configurable + in the manager and can be adjusted during pipeline execution. +- If a query computation fails, the absl::Status is returned to the caller. + +**Failure scenario** + +- After being preempted and restarted, a worker is re-initialized with a + welcome blob of data. The welcome blob of data can be used, for example, to + specify a CNS path to a checkpoint location. +- If the manager is restarted (e.g., preempted, failure), all the workers are + restarted. +- Each query emitter (manager or workers) is responsible for the queries their + emit. +- If a worker is interrupted while executing a global query (i.e. a query that + any worker can execute), the next available worker will execute this query + automatically. +- If a worker is interrupted while executing a worker targeted query (i.e. a + query that can only be executed by a given worker), the query emitter waits + for the worker to be back online and re-send the query automatically. +- If a query emitter (manager or worker) is interrupted while a worker is + executing one of its query, the query answer is discarded. + +**Shutdown** + +- When the user code on the manager stops a pipeline, the shutdown method is + called on all the workers. +- When the manager stops a pipeline, the worker processes can be interrupted + or kept running. +- If the worker processes are not interrupted, a new manager can be created to + start a new pipeline. + +## Minimal example + +**The worker : `worker.cc`** + +```c++ +class ToyWorker final : public AbstractWorker { + +public: + + virtual ~ToyWorker() = default; + + // Initialize the worker with the welcome blob. + // Note: "Blob" is an alias for "std::string". + absl::Status Setup(Blob welcome_blob) override { + YDF_LOG(INFO) << "I am worker #" << WorkerIdx(); + return absl::OkStatus(); + } + + // Stop the worker. + absl::Status Done() override { + YDF_LOG(INFO) << "Bye"; + return absl::OkStatus(); + } + + // Execute a request. + absl::StatusOr RunRequest(Blob blob) override { + if(blob == "ping") return "pong"; + return absl::InvalidArgumentError("Unknown task"); + } +}; + +constexpr char kToyWorkerKey[] = "ToyWorker"; +REGISTER_Distribution_Worker(ToyWorker, kToyWorkerKey); +``` + +**The manager : `manager.cc`** + +```c++ +// Initialize +proto::Config config; +config.set_implementation_key("MULTI_THREAD"); // For debugging +config.MutableExtension(proto::multi_thread)->set_num_workers(5); +auto manager = CreateManager(config, /*worker_name=*/kToyWorkerKey, /*welcome_blob=*/"hello"); + +// Process + +// Blocking request to any worker. +auto result = manager->BlockingRequest("ping").value(); + +// Blocking request to a specific worker. +auto result = manager->BlockingRequest("ping", /*worker_idx=*/ 2).value(); + +// Async request to any worker. +for(int i=0; i<100; i++){ + manager->AsynchronousRequest("ping"); +} +for(int i=0; i<100; i++){ + auto result = manager->NextAsynchronousAnswer().value(); +} + +// Async request to a specific worker. +for(int i=0; i<100; i++){ + manager->AsynchronousRequest("ping", /*worker_idx=*/ i % manager->NumWorkers()); +} +for(int i=0; i<100; i++){ + auto result = manager->NextAsynchronousAnswer().value(); +} + +// Note: Workers can also execute "AsynchronousRequest". + +// Shutdown. This calls "Done" on all the workers and wait until it finishes. +manager->Done(); +``` + +## Examples + +### Beginner + +- [unit tests](https://source.corp.google.com/piper///depot/google3/third_party/yggdrasil_decision_forests/utils/distribute/distribute_test.cc): + Distribute unit tests. Shows all features. + +- [distribute cli](https://source.corp.google.com/piper///depot/google3/third_party/yggdrasil_decision_forests/utils/distribute_cli/): + Distribute the execution of CLI commands. + +### Intermediate + +- [hyperparameter_sweep](https://source.corp.google.com/piper///depot/google3/third_party/yggdrasil_decision_forests/examples/hyperparameter_sweep/README.md): + Trains and save many models with various input features. + +- [benchmark v2](https://source.corp.google.com/piper///depot/google3/learning/lib/ami/simple_ml/benchmark_v2/README.md): + An ML benchmark trainings and evaluating millions of models. + +- [hyperparameters optimizer](https://source.corp.google.com/piper///depot/google3/third_party/yggdrasil_decision_forests/learner/hyperparameters_optimizer/BUILD): + YDF hyper-parameter tuner. + +### Advanced + +- [Distributed GBT](https://source.corp.google.com/piper///depot/google3/third_party/yggdrasil_decision_forests/learner/distributed_gradient_boosted_trees/BUILD): + Distributed GBT training. From 2300559f86f77edadbab4383f202779efac19f18 Mon Sep 17 00:00:00 2001 From: TensorFlow Decision Forests Team Date: Thu, 23 May 2024 06:44:14 -0700 Subject: [PATCH 04/30] Update example value of `sparse_oblique_num_projections_exponent` in docs PiperOrigin-RevId: 636533987 --- documentation/public/docs/guide_how_to_improve_model.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/documentation/public/docs/guide_how_to_improve_model.md b/documentation/public/docs/guide_how_to_improve_model.md index 4750955a..8c728f0d 100644 --- a/documentation/public/docs/guide_how_to_improve_model.md +++ b/documentation/public/docs/guide_how_to_improve_model.md @@ -102,7 +102,7 @@ for more details. learner = ydf.RandomForestLearner( split_axis="SPARSE_OBLIQUE", sparse_oblique_normalization="MIN_MAX", - sparse_oblique_num_projections_exponent=1, + sparse_oblique_num_projections_exponent=1.0, ...) ``` From 15f7c06cbfd780d3d8984fe5e073d64311bc4af3 Mon Sep 17 00:00:00 2001 From: Mathieu Guillame-Bert Date: Thu, 23 May 2024 06:58:05 -0700 Subject: [PATCH 05/30] Add tools for unit testing debugging. - Plotting of ellipse predictions. - mnist dataset PiperOrigin-RevId: 636536865 --- .../port/python/dev_requirements.txt | 3 +- .../port/python/ydf/model/BUILD | 1 + .../port/python/ydf/model/jax_model_test.py | 149 ++++++++++++++++-- 3 files changed, 139 insertions(+), 14 deletions(-) diff --git a/yggdrasil_decision_forests/port/python/dev_requirements.txt b/yggdrasil_decision_forests/port/python/dev_requirements.txt index 86a58190..5642d727 100644 --- a/yggdrasil_decision_forests/port/python/dev_requirements.txt +++ b/yggdrasil_decision_forests/port/python/dev_requirements.txt @@ -6,4 +6,5 @@ matplotlib jax; platform_machine != 'aarch64' and platform_system != 'Windows' jaxlib; platform_machine != 'aarch64' and platform_system != 'Windows' optax; platform_machine != 'aarch64' and platform_system != 'Windows' and python_version >= '3.9' -flatbuffers; platform_machine != 'aarch64' and platform_system != 'Windows' and python_version >= '3.12' \ No newline at end of file +flatbuffers; platform_machine != 'aarch64' and platform_system != 'Windows' and python_version >= '3.12' +tensorflow-datasets; platform_machine != 'aarch64' and platform_system != 'Windows' and python_version >= '3.9' \ No newline at end of file diff --git a/yggdrasil_decision_forests/port/python/ydf/model/BUILD b/yggdrasil_decision_forests/port/python/ydf/model/BUILD index 49412faa..4330a671 100644 --- a/yggdrasil_decision_forests/port/python/ydf/model/BUILD +++ b/yggdrasil_decision_forests/port/python/ydf/model/BUILD @@ -341,6 +341,7 @@ py_test( # numpy dep, # optax dep, # tensorflow dep, + # tensorflow_datasets dep, "@ydf_cc//yggdrasil_decision_forests/dataset:data_spec_py_proto", "//ydf/dataset:dataspec", "//ydf/learner:generic_learner", diff --git a/yggdrasil_decision_forests/port/python/ydf/model/jax_model_test.py b/yggdrasil_decision_forests/port/python/ydf/model/jax_model_test.py index d3e03c04..11726d64 100644 --- a/yggdrasil_decision_forests/port/python/ydf/model/jax_model_test.py +++ b/yggdrasil_decision_forests/port/python/ydf/model/jax_model_test.py @@ -14,9 +14,10 @@ import array import logging +import os import sys import tempfile -from typing import Any, Dict, List, Optional, Sequence +from typing import Any, Callable, Dict, List, Optional, Sequence from absl.testing import absltest from absl.testing import parameterized @@ -66,23 +67,67 @@ def create_dataset(columns: List[str], n: int = 1000) -> Dict[str, Any]: return {k: data[k] for k in columns} +def create_dataset_mnist( + num_examples: int, begin_example_idx: int = 0 +) -> Dict[str, Any]: + """Creates a binary classification dataset based on MNIST. + + This function cannot be executed in a sandbox test as it downloads a dataset. + + Args: + num_examples: Number of examples to extract from the training dataset. + begin_example_idx: Index of the first example in the training dataset. + + Returns: + The binary mnist dataset. + """ + + import tensorflow_datasets as tfds # pylint: disable=import-error,g-import-not-at-top + + tf_ds = tfds.load("mnist", split="train") + raw_ds = ( + tf_ds.skip(begin_example_idx) + .batch(num_examples) + .take(1) + .as_numpy_iterator() + .next() + ) + raw_image_shape = raw_ds["image"].shape + return { + "label": raw_ds["label"] >= 5, + "image": raw_ds["image"].reshape([raw_image_shape[0], -1]), + } + + def create_dataset_ellipse( num_examples: int = 1000, num_features: int = 3, plot_path: Optional[str] = None, -): - """Create a binary classification dataset classifying ellipses.""" +) -> Dict[str, np.ndarray]: + """Creates a binary classification dataset classifying ellipses. + + Args: + num_examples: Number of generated examples. + num_features: Number of features. + plot_path: If set, saves a plot of the examples to this file. + + Returns: + An ellipse dataset. + """ + features = np.random.uniform(-1, 1, size=[num_examples, num_features]) - scales = np.array([1.0 + i * 0.1 for i in range(num_features)]) + scales = np.array([1.0 + i * 0.4 for i in range(num_features)]) labels = ( np.sqrt(np.sum(np.multiply(np.square(features), scales), axis=1)) <= 0.80 ) if plot_path: colors = ["blue" if l else "red" for l in labels] - plt.scatter(features[:, 0], features[:, 1], color=colors, s=1.5) - plt.axis("off") - plt.savefig(plot_path) + fig, ax = plt.subplots(1, 1) + ax.scatter(features[:, 0], features[:, 1], color=colors, s=1.5) + ax.set_axis_off() + ax.set_aspect("equal") + fig.savefig(plot_path) data = {"label": labels} for i in range(num_features): @@ -90,6 +135,53 @@ def create_dataset_ellipse( return data +def plot_ellipse_predictions( + prediction_fn: Callable[[Dict[str, jax.Array]], jax.Array], + path: str, + resolution: int = 200, + num_features: int = 3, +) -> None: + """Plots the predictions of a model on the ellipse dataset. + + Args: + prediction_fn: A function taking a batch of examples, and returning + predictions. + path: Save to save the plot. + resolution: Plotting resolution, for each axis. + num_features: Number of features. + """ + + # Compute feature values + vs = np.linspace(-1, 1, resolution) + tuple_features = np.meshgrid(*((vs,) * num_features)) + raw_features = np.stack([np.reshape(f, [-1]) for f in tuple_features], axis=1) + dict_features = { + f"f_{i}": jnp.asarray(raw_features[:, i]) for i in range(num_features) + } + + # Generate predictions + predictions = prediction_fn(dict_features) + + disp_predictions = np.reshape(predictions, [resolution] * num_features) + if len(disp_predictions.shape) > 2: + disp_predictions = np.mean( + disp_predictions, + axis=[i for i in range(2, len(disp_predictions.shape))], + ) + + # Plot predictions + fig, ax = plt.subplots(1, 1) + ax.imshow( + disp_predictions, + interpolation="none", + resample=False, + cmap="coolwarm", + ) + ax.set_axis_off() + ax.set_aspect("equal") + fig.savefig(path) + + class JaxModelTest(parameterized.TestCase): @parameterized.parameters( @@ -798,16 +890,41 @@ def test_to_jax_function( atol=1e-5, ) - def test_fine_tune_model(self): + def test_dataset_ellipse(self): + with tempfile.TemporaryDirectory() as tempdir: + _ = create_dataset_ellipse( + 1000, plot_path=os.path.join(tempdir, "ds.png") + ) + + def prediction_fn(features: Dict[str, jax.Array]) -> jax.Array: + return features["f_0"] >= 0.1 + + plot_ellipse_predictions( + prediction_fn, os.path.join(tempdir, "predictions.png") + ) + + @parameterized.named_parameters( + ("leaves_ellipse", "ellipse", True), + # ("leaves_mnist", "mnist", True), # Not compatible with test sandbox + ) + def test_fine_tune_model(self, dataset: str, leaves_as_params: bool): # Note: Optax cannot be imported in python 3.8. - import optax + import optax # pylint: disable=import-error,g-import-not-at-top # Make datasets label = "label" - train_ds = create_dataset_ellipse(1000) - test_ds = create_dataset_ellipse(10000) - finetune_ds = create_dataset_ellipse(10000) + + if dataset == "ellipse": + train_ds = create_dataset_ellipse(1000) + test_ds = create_dataset_ellipse(10000) + finetune_ds = create_dataset_ellipse(10000) + elif dataset == "mnist": + train_ds = create_dataset_mnist(1000) + test_ds = create_dataset_mnist(10000, 1000) + finetune_ds = create_dataset_mnist(10000, 1000 + 10000) + else: + assert False # Train a model with YDF model = specialized_learners.GradientBoostedTreesLearner(label=label).train( @@ -822,9 +939,15 @@ def test_fine_tune_model(self): jax_model = to_jax.to_jax_function( model, apply_activation=False, - leaves_as_params=True, + leaves_as_params=leaves_as_params, ) + # Check parameter values + if leaves_as_params: + self.assertContainsSubset( + ["leaf_values", "initial_predictions"], jax_model.params + ) + def to_jax_array(d): """Converts a numpy array into a jax array.""" return {k: jnp.asarray(v) for k, v in d.items()} From 827748773b44f75f3db8a118d584bc9ddcf73abb Mon Sep 17 00:00:00 2001 From: Mathieu Guillame-Bert Date: Fri, 24 May 2024 02:41:47 -0700 Subject: [PATCH 06/30] Support for oblique splits in JAX converter. PiperOrigin-RevId: 636847004 --- .../port/python/ydf/model/BUILD | 1 + .../port/python/ydf/model/export_jax.py | 89 +++++++++++++++++-- .../port/python/ydf/model/jax_model_test.py | 70 +++++++++++---- 3 files changed, 139 insertions(+), 21 deletions(-) diff --git a/yggdrasil_decision_forests/port/python/ydf/model/BUILD b/yggdrasil_decision_forests/port/python/ydf/model/BUILD index 4330a671..37ff4baa 100644 --- a/yggdrasil_decision_forests/port/python/ydf/model/BUILD +++ b/yggdrasil_decision_forests/port/python/ydf/model/BUILD @@ -346,6 +346,7 @@ py_test( "//ydf/dataset:dataspec", "//ydf/learner:generic_learner", "//ydf/learner:specialized_learners", + "//ydf/model/gradient_boosted_trees_model", "//ydf/model/tree:all", ], ) diff --git a/yggdrasil_decision_forests/port/python/ydf/model/export_jax.py b/yggdrasil_decision_forests/port/python/ydf/model/export_jax.py index dfc1f3c1..cfb1822d 100644 --- a/yggdrasil_decision_forests/port/python/ydf/model/export_jax.py +++ b/yggdrasil_decision_forests/port/python/ydf/model/export_jax.py @@ -18,7 +18,7 @@ import dataclasses import enum import functools -from typing import Any, Sequence, Dict, Optional, List, Set, Tuple, Callable, Union +from typing import Any, Sequence, Dict, Optional, List, Set, Tuple, Callable, Union, MutableSequence from yggdrasil_decision_forests.dataset import data_spec_pb2 as ds_pb from ydf.dataset import dataspec as dataspec_lib @@ -39,9 +39,9 @@ # pytype: enable=import-error # Typehint for arrays -ArrayFloat = array.array -ArrayInt = array.array -ArrayBool = array.array +ArrayFloat = MutableSequence[float] +ArrayInt = MutableSequence[int] +ArrayBool = MutableSequence[int] # Names of the learnable parameters of the model. _PARAM_LEAF_VALUES = "leaf_values" @@ -52,6 +52,7 @@ class ConditionType(enum.IntEnum): GREATER_THAN = 0 IS_IN = 1 + SPARSE_OBLIQUE = 2 def compact_dtype(values: Sequence[int]) -> Any: @@ -425,13 +426,15 @@ class InternalForest: dataspec: Dataspec. leaf_outputs: Prediction values for each leaf node. split_features: Internal idx of the feature being tested for each non-leaf - node. + node. For oblique splits, "split_features" contains the number of weights. split_parameters: Parameter of the condition for each non-leaf nodes. (1) For "greather than" condition (i.e., feature >= threshold), "split_parameter" is the threshold. (2) For "is in" condition (i.e., feature in mask), "split_parameter" is an uint32 offset in the mask "catgorical_mask" wheret the condition is evaluated as - "catgorical_mask[split_parameter + attribute_value]". + "catgorical_mask[split_parameter + attribute_value]". (3) for oblique + splits, "split_parameter" an uint32 offset for the first weight and + attribute in "oblique_weights" and "oblique_attributes" respectively. negative_children: Node offset of the negative children for each non-leaf node in the forest. positive_children: Node offset of the positive children for each non-leaf @@ -442,6 +445,9 @@ class InternalForest: trees. begin_leaf_nodes: Index of the first leaf node for each of the trees. catgorical_mask: Boolean mask used in "is in" conditions. + oblique_weights: Buffer of weights for the oblique splits. + oblique_attributes: Buffer of attributes for the oblique splits. Has the + same size as "oblique_weights". initial_predictions: Initial predictions of the forest (before any tree is applied). max_depth: Maximum depth of the trees. @@ -482,6 +488,12 @@ class InternalForest: catgorical_mask: ArrayBool = dataclasses.field( default_factory=lambda: array.array("b", []) ) + oblique_weights: ArrayFloat = dataclasses.field( + default_factory=lambda: array.array("f", []) + ) + oblique_attributes: ArrayInt = dataclasses.field( + default_factory=lambda: array.array("l", []) + ) initial_predictions: ArrayFloat = dataclasses.field( default_factory=lambda: array.array("f", []) ) @@ -500,6 +512,8 @@ def clear_array_data(self) -> None: self.begin_non_leaf_nodes = array.array("l", []) self.begin_leaf_nodes = array.array("l", []) self.catgorical_mask = array.array("l", []) + self.oblique_weights = array.array("f", []) + self.oblique_attributes = array.array("l", []) # Note: We don't release "initial_predictions". def __post_init__(self, model: generic_model.GenericModel): @@ -614,6 +628,32 @@ def _add_node( self.split_parameters.append(float_offset) self.condition_types.append(ConditionType.IS_IN) self.catgorical_mask.extend(bitmap) + + elif isinstance(node.condition, tree_lib.NumericalSparseObliqueCondition): + offset = len(self.oblique_weights) + num_weights = len(node.condition.weights) + + # Add the weights + self.oblique_weights.extend(node.condition.weights) + self.oblique_attributes.extend([ + self.feature_spec.inv_numerical[attribute] + for attribute in node.condition.attributes + ]) + # Add the bias + self.oblique_weights.append(node.condition.threshold) + self.oblique_attributes.append(0) + + # Encode the offset as a float32 + float_offset = float( + jax.lax.bitcast_convert_type( + jnp.array(offset, dtype=jnp.int32), jnp.float32 + ) + ) + + self.split_features.append(num_weights) + self.split_parameters.append(float_offset) + self.condition_types.append(ConditionType.SPARSE_OBLIQUE) + else: # TODO: Add support for other types of conditions. raise ValueError( @@ -674,6 +714,8 @@ class InternalForestJaxArrays: begin_non_leaf_nodes: jax.Array = dataclasses.field(init=False) begin_leaf_nodes: jax.Array = dataclasses.field(init=False) catgorical_mask: Optional[jax.Array] = dataclasses.field(init=False) + oblique_weights: Optional[jax.Array] = dataclasses.field(init=False) + oblique_attributes: Optional[jax.Array] = dataclasses.field(init=False) initial_predictions: Optional[jax.Array] = dataclasses.field(init=False) def __post_init__(self, forest: InternalForest): @@ -704,6 +746,16 @@ def __post_init__(self, forest: InternalForest): else: self.catgorical_mask = None + if forest.oblique_weights: + self.oblique_weights = asarray(forest.oblique_weights, dtype=jnp.float32) + else: + self.oblique_weights = None + + if forest.oblique_attributes: + self.oblique_attributes = to_compact_jax_array(forest.oblique_attributes) + else: + self.oblique_attributes = None + self.initial_predictions = asarray( forest.initial_predictions, dtype=jnp.float32 ) @@ -937,6 +989,27 @@ def condition_is_in(node_idx): ) return jax_arrays.catgorical_mask[categorical_mask_offset] + def condition_sparse_oblique(node_idx): + """Evaluates a sparse oblique condition.""" + num_weights = jax_arrays.split_features[node_idx] + offset = jax.lax.bitcast_convert_type( + jax_arrays.split_parameters[node_idx], jnp.int32 + ) + bias = jax_arrays.oblique_weights[offset + num_weights] + numerical_features = intern_feature_values["numerical"] + + def sum_iter(i, a): + return ( + a + + numerical_features[jax_arrays.oblique_attributes[i]] + * jax_arrays.oblique_weights[i] + ) + + weighted_sum = jax.lax.fori_loop( + offset, num_weights + offset, sum_iter, -bias + ) + return weighted_sum >= 0 + # Assemble the condition map. condition_fns = [None] * len(jax_arrays.dense_condition_mapping) if ConditionType.GREATER_THAN in jax_arrays.dense_condition_mapping: @@ -947,6 +1020,10 @@ def condition_is_in(node_idx): condition_fns[jax_arrays.dense_condition_mapping[ConditionType.IS_IN]] = ( condition_is_in ) + if ConditionType.SPARSE_OBLIQUE in jax_arrays.dense_condition_mapping: + condition_fns[ + jax_arrays.dense_condition_mapping[ConditionType.SPARSE_OBLIQUE] + ] = condition_sparse_oblique if len(condition_fns) == 1: # Since there is only one type of conditions, there is not need for a diff --git a/yggdrasil_decision_forests/port/python/ydf/model/jax_model_test.py b/yggdrasil_decision_forests/port/python/ydf/model/jax_model_test.py index 11726d64..9939e326 100644 --- a/yggdrasil_decision_forests/port/python/ydf/model/jax_model_test.py +++ b/yggdrasil_decision_forests/port/python/ydf/model/jax_model_test.py @@ -35,17 +35,23 @@ from ydf.model import export_jax as to_jax from ydf.model import generic_model from ydf.model import tree as tree_lib - +from ydf.model.gradient_boosted_trees_model import gradient_boosted_trees_model InternalFeatureItem = to_jax.InternalFeatureItem -def create_dataset(columns: List[str], n: int = 1000) -> Dict[str, Any]: +def create_dataset( + columns: List[str], n: int = 1000, seed: Optional[int] = None +) -> Dict[str, Any]: """Creates a dataset with random values.""" + if seed is not None: + np.random.seed(seed) data = { # Single-dim features "f1": np.random.random(size=n), "f2": np.random.random(size=n), + "f3": np.random.random(size=n), + "f4": np.random.random(size=n), "i1": np.random.randint(100, size=n), "i2": np.random.randint(100, size=n), "c1": np.random.choice(["x", "y", "z"], size=n, p=[0.6, 0.3, 0.1]), @@ -810,6 +816,19 @@ def test_densify_conditions( False, specialized_learners.GradientBoostedTreesLearner, ), + ( + "gbt_regression_num_oblique", + ["f1", "f2", "f3", "f4"], + "label_regress", + generic_learner.Task.REGRESSION, + False, + specialized_learners.GradientBoostedTreesLearner, + { + "split_axis": "SPARSE_OBLIQUE", + "sparse_oblique_normalization": "STANDARD_DEVIATION", + }, + False, # TODO: Check conversion when bug solved. + ), ) def test_to_jax_function( self, @@ -818,12 +837,17 @@ def test_to_jax_function( task: generic_learner.Task, has_encoding: bool, learner, + learner_kwargs=None, + test_tf_conversion: bool = True, ): - if learner == specialized_learners.GradientBoostedTreesLearner: - learner_kwargs = {"validation_ratio": 0.0} - else: + if learner_kwargs is None: learner_kwargs = {} + else: + learner_kwargs = learner_kwargs.copy() + + if learner == specialized_learners.GradientBoostedTreesLearner: + learner_kwargs["validation_ratio"] = 0.0 # Create YDF model columns = features + [label] @@ -831,10 +855,13 @@ def test_to_jax_function( label=label, task=task, **learner_kwargs, - ).train(create_dataset(columns, 1000)) + ).train(create_dataset(columns, 1000, seed=1)) + self.assertIsInstance( + model, gradient_boosted_trees_model.GradientBoostedTreesModel + ) # Golden predictions - test_ds = create_dataset(columns, 1000) + test_ds = create_dataset(columns, 1, seed=2) ydf_predictions = model.predict(test_ds) # Convert model to tf function @@ -859,6 +886,9 @@ def test_to_jax_function( atol=1e-5, ) + if not test_tf_conversion: + return + # Convert to a TensorFlow function tf_model = tf.Module() tf_model.my_call = tf.function( @@ -885,7 +915,7 @@ def test_to_jax_function( restored_tf_predictions = restored_tf_model.my_call(input_values) np.testing.assert_allclose( restored_tf_predictions, - ydf_predictions, + jax_predictions, rtol=1e-5, atol=1e-5, ) @@ -904,10 +934,13 @@ def prediction_fn(features: Dict[str, jax.Array]) -> jax.Array: ) @parameterized.named_parameters( - ("leaves_ellipse", "ellipse", True), - # ("leaves_mnist", "mnist", True), # Not compatible with test sandbox + ("leaves_ellipse", "ellipse", True, False), + # ("leaves_mnist", "mnist", True , False), # Skip in sandboxed test + ("leaves_ellipse_oblique", "ellipse", True, True), ) - def test_fine_tune_model(self, dataset: str, leaves_as_params: bool): + def test_fine_tune_model( + self, dataset: str, leaves_as_params: bool, oblique: bool + ): # Note: Optax cannot be imported in python 3.8. import optax # pylint: disable=import-error,g-import-not-at-top @@ -926,10 +959,15 @@ def test_fine_tune_model(self, dataset: str, leaves_as_params: bool): else: assert False + kwargs = {} + if oblique: + kwargs["split_axis"] = "SPARSE_OBLIQUE" + kwargs["sparse_oblique_normalization"] = "STANDARD_DEVIATION" + # Train a model with YDF - model = specialized_learners.GradientBoostedTreesLearner(label=label).train( - train_ds - ) + model = specialized_learners.GradientBoostedTreesLearner( + label=label, **kwargs + ).train(train_ds) # Evaluate the YDF model pre_tuned_ydf_accuracy = model.evaluate(test_ds).accuracy @@ -976,7 +1014,9 @@ def compute_loss(state, batch): compute_accuracy(jax_model.params, jax_test_ds) ) logging.info("pre_tuned_jax_test_accuracy: %s", pre_tuned_jax_test_accuracy) - self.assertAlmostEqual(pre_tuned_jax_test_accuracy, pre_tuned_ydf_accuracy) + self.assertAlmostEqual( + pre_tuned_jax_test_accuracy, pre_tuned_ydf_accuracy, delta=1.0e-5 + ) # Finetune the JAX model assert jax_model.params is not None From 3aec76ea19dff886dc2e9a6656959bb09a02cd5d Mon Sep 17 00:00:00 2001 From: Mathieu Guillame-Bert Date: Fri, 24 May 2024 04:31:58 -0700 Subject: [PATCH 07/30] Sort the dictionary of integer labels casted to string following the numerical order. PiperOrigin-RevId: 636870447 --- .../port/python/ydf/dataset/dataset.py | 50 +++++++++++++-- .../port/python/ydf/dataset/dataset_test.py | 61 ++++++++++++++----- .../port/python/ydf/dataset/dataspec.py | 2 +- .../python/ydf/learner/generic_learner.py | 1 + .../port/python/ydf/model/generic_model.py | 2 +- 5 files changed, 93 insertions(+), 23 deletions(-) diff --git a/yggdrasil_decision_forests/port/python/ydf/dataset/dataset.py b/yggdrasil_decision_forests/port/python/ydf/dataset/dataset.py index 1be34d97..8960f095 100644 --- a/yggdrasil_decision_forests/port/python/ydf/dataset/dataset.py +++ b/yggdrasil_decision_forests/port/python/ydf/dataset/dataset.py @@ -56,6 +56,7 @@ def _add_column( column_data: Any, inference_args: Optional[dataspec.DataSpecInferenceArgs], column_idx: Optional[int], + is_label: bool, ): """Adds a column to the dataset and computes the column statistics.""" assert (column_idx is None) != (inference_args is None) @@ -112,7 +113,7 @@ def _add_column( elif column.semantic == dataspec.Semantic.CATEGORICAL: - from_boolean = False + force_dictionary = None if not isinstance(column_data, np.ndarray): column_data = np.array(column_data, dtype=np.bytes_) ydf_dtype = dataspec.np_dtype_to_ydf_dtype(column_data.dtype) @@ -121,7 +122,16 @@ def _add_column( bool_column_data = column_data column_data = np.full_like(bool_column_data, b"false", "|S5") column_data[bool_column_data] = b"true" - from_boolean = True + force_dictionary = [dataspec.YDF_OOD_BYTES, b"false", b"true"] + elif ( + is_label + and column_data.dtype.type in dataspec.NP_SUPPORTED_INT_DTYPE + and (dictionary_size := dense_integer_dictionary_size(column_data)) + ): + column_data = column_data.astype(np.bytes_) + force_dictionary = [dataspec.YDF_OOD_BYTES] + [ + str(i).encode("utf-8") for i in range(dictionary_size) + ] elif ( column_data.dtype.type in [ @@ -145,10 +155,8 @@ def _add_column( if column_data.dtype.type == np.bytes_: if inference_args is not None: guide = dataspec.categorical_column_guide(column, inference_args) - if from_boolean: - guide["dictionary"] = np.array( - [b"", b"false", b"true"], dtype=np.bytes_ - ) + if force_dictionary: + guide["dictionary"] = np.array(force_dictionary, dtype=np.bytes_) self._dataset.PopulateColumnCategoricalNPBytes( column.name, column_data, **guide, ydf_dtype=ydf_dtype ) @@ -259,6 +267,7 @@ def create_vertical_dataset( data_spec: Optional[data_spec_pb2.DataSpecification] = None, required_columns: Optional[Sequence[str]] = None, dont_unroll_columns: Optional[Sequence[str]] = None, + label: Optional[str] = None, ) -> VerticalDataset: """Creates a VerticalDataset from various sources of data. @@ -342,6 +351,7 @@ def create_vertical_dataset( mentioned in the data spec or `columns` are required. dont_unroll_columns: List of columns that cannot be unrolled. If one such column needs to be unrolled, raise an error. + label: Name of the label column, if any. Returns: Dataset to be ingested by the learner algorithms. @@ -364,6 +374,7 @@ def create_vertical_dataset( data_spec=data_spec, inference_args=None, dont_unroll_columns=dont_unroll_columns, + label=label, ) else: inference_args = dataspec.DataSpecInferenceArgs( @@ -382,6 +393,7 @@ def create_vertical_dataset( inference_args=inference_args, data_spec=None, dont_unroll_columns=dont_unroll_columns, + label=label, ) @@ -391,6 +403,7 @@ def create_vertical_dataset_with_spec_or_args( inference_args: Optional[dataspec.DataSpecInferenceArgs], data_spec: Optional[data_spec_pb2.DataSpecification], dont_unroll_columns: Optional[Sequence[str]] = None, + label: Optional[str] = None, ) -> VerticalDataset: """Returns a vertical dataset from inference args or data spec (not both!).""" assert (data_spec is None) != (inference_args is None) @@ -416,6 +429,7 @@ def create_vertical_dataset_with_spec_or_args( required_columns, inference_args=inference_args, data_spec=data_spec, + label=label, ) @@ -447,6 +461,7 @@ def create_vertical_dataset_from_dict_of_values( required_columns: Optional[Sequence[str]], inference_args: Optional[dataspec.DataSpecInferenceArgs], data_spec: Optional[data_spec_pb2.DataSpecification], + label: Optional[str] = None, ) -> VerticalDataset: """Specialization of create_vertical_dataset to dictionary of values. @@ -461,6 +476,7 @@ def create_vertical_dataset_from_dict_of_values( is set. data_spec: Data spec of the given data. Must be None if inference_args is set. + label: Name of the label column, if any. Returns: A Vertical dataset with the given properties. @@ -543,6 +559,7 @@ def dataspec_to_normalized_columns( column_data, inference_args=inference_args, # Might be None column_idx=column_idx if data_spec is not None else None, + is_label=label == column.name, ) if data_spec is None: @@ -667,3 +684,24 @@ def _type(value: Any) -> str: return f"numpy's array of '{value.dtype.name}'" else: return str(type(value)) + + +def dense_integer_dictionary_size(values: np.ndarray) -> Optional[int]: + """Gets the number of items in a dense and zero-indexed array of integers. + + If the array is not dense or not zero-indexed, returns None. + + Args: + values: Numpy array of integer values. + + Returns: + Number of unique dense values, or None. + """ + unique_values = np.unique(values).tolist() + if ( + unique_values + and unique_values[0] == 0 + and unique_values[-1] + 1 == len(unique_values) + ): + return len(unique_values) + return None diff --git a/yggdrasil_decision_forests/port/python/ydf/dataset/dataset_test.py b/yggdrasil_decision_forests/port/python/ydf/dataset/dataset_test.py index 69a455bd..a5f9aa3f 100644 --- a/yggdrasil_decision_forests/port/python/ydf/dataset/dataset_test.py +++ b/yggdrasil_decision_forests/port/python/ydf/dataset/dataset_test.py @@ -571,7 +571,7 @@ def test_order_boolean(self, values, expected_counts, count_nas): count_nas=count_nas, categorical=ds_pb.CategoricalSpec( items={ - "": VocabValue(index=0, count=expected_counts[0]), + "": VocabValue(index=0, count=expected_counts[0]), "false": VocabValue(index=1, count=expected_counts[1]), "true": VocabValue(index=2, count=expected_counts[2]), }, @@ -582,31 +582,38 @@ def test_order_boolean(self, values, expected_counts, count_nas): ) test_utils.assertProto2Equal(self, ds.data_spec(), expected_data_spec) - @parameterized.parameters( - ([True, True, True], (0, 0, 3), 0), - ([False, False, False], (0, 3, 0), 0), - ([True, False, False], (0, 2, 1), 0), - ) - def test_order_boolean(self, values, expected_counts, count_nas): + def test_order_integers(self): ds = dataset.create_vertical_dataset( - {"col": np.array(values)}, + {"col": np.array([0, 1, 4, 3, 1, 2, 3, 4, 12, 11, 10, 9, 8, 7, 6, 5])}, columns=[Column("col", dataspec.Semantic.CATEGORICAL)], + label="col", ) expected_data_spec = ds_pb.DataSpecification( - created_num_rows=3, + created_num_rows=16, columns=( ds_pb.Column( name="col", type=ds_pb.ColumnType.CATEGORICAL, - dtype=ds_pb.DType.DTYPE_BOOL, - count_nas=count_nas, + dtype=ds_pb.DType.DTYPE_INT64, + count_nas=0, categorical=ds_pb.CategoricalSpec( items={ - "": VocabValue(index=0, count=expected_counts[0]), - "false": VocabValue(index=1, count=expected_counts[1]), - "true": VocabValue(index=2, count=expected_counts[2]), + "": VocabValue(index=0, count=0), + "0": VocabValue(index=1, count=1), + "1": VocabValue(index=2, count=2), + "2": VocabValue(index=3, count=1), + "3": VocabValue(index=4, count=2), + "4": VocabValue(index=5, count=2), + "5": VocabValue(index=6, count=1), + "6": VocabValue(index=7, count=1), + "7": VocabValue(index=8, count=1), + "8": VocabValue(index=9, count=1), + "9": VocabValue(index=10, count=1), + "10": VocabValue(index=11, count=1), + "11": VocabValue(index=12, count=1), + "12": VocabValue(index=13, count=1), }, - number_of_unique_values=3, + number_of_unique_values=14, ), ), ), @@ -1885,5 +1892,29 @@ def test_required_columns_file_inference_args_explicit_success( self.assertEqual(ds._dataset.DebugString(), "f1\n1\n2\n3\n") +class DenseDictionaryTest(parameterized.TestCase): + + @parameterized.parameters( + ([0], 1), + ([0, 1], 2), + ([0, 1, 1, 0], 2), + ([1, 0], 2), + ([4, 3, 4, 1, 2, 0, 1, 2, 4], 5), + ) + def test_dense_integer_dictionary_size(self, values, expected): + self.assertEqual( + dataset.dense_integer_dictionary_size(np.array(values)), expected + ) + + @parameterized.parameters( + ([],), + ([-1, 0, 1],), + ([1, 2, 3, 4],), + ([0, 1, 3, 4],), + ) + def test_dense_integer_dictionary_size_is_none(self, values): + self.assertIsNone(dataset.dense_integer_dictionary_size(np.array(values))) + + if __name__ == "__main__": absltest.main() diff --git a/yggdrasil_decision_forests/port/python/ydf/dataset/dataspec.py b/yggdrasil_decision_forests/port/python/ydf/dataset/dataspec.py index 0522f5c3..9a0d8b29 100644 --- a/yggdrasil_decision_forests/port/python/ydf/dataset/dataspec.py +++ b/yggdrasil_decision_forests/port/python/ydf/dataset/dataspec.py @@ -16,7 +16,6 @@ import dataclasses import enum -import logging from typing import Any, Dict, List, Literal, Optional, Sequence, Tuple, Union import numpy as np @@ -35,6 +34,7 @@ # Must match kOutOfDictionaryItemKey in # yggdrasil_decision_forests/dataset/data_spec.h YDF_OOD = "" +YDF_OOD_BYTES = b"" # Mapping between Numpy dtypes and YDF dtypes. _NP_DTYPE_TO_YDF_DTYPE = { diff --git a/yggdrasil_decision_forests/port/python/ydf/learner/generic_learner.py b/yggdrasil_decision_forests/port/python/ydf/learner/generic_learner.py index 2a946788..f624a480 100644 --- a/yggdrasil_decision_forests/port/python/ydf/learner/generic_learner.py +++ b/yggdrasil_decision_forests/port/python/ydf/learner/generic_learner.py @@ -339,6 +339,7 @@ def _get_vertical_dataset( inference_args=effective_data_spec_args, required_columns=None, # All columns in the dataspec are required. dont_unroll_columns=dont_unroll_columns, + label=self._label, ) def cross_validation( diff --git a/yggdrasil_decision_forests/port/python/ydf/model/generic_model.py b/yggdrasil_decision_forests/port/python/ydf/model/generic_model.py index e96df37c..7eee2933 100644 --- a/yggdrasil_decision_forests/port/python/ydf/model/generic_model.py +++ b/yggdrasil_decision_forests/port/python/ydf/model/generic_model.py @@ -944,7 +944,7 @@ def label(self) -> str: return self.data_spec().columns[self.label_col_idx()].name def label_classes(self) -> List[str]: - """Returns the label classes for classification tasks, None otherwise.""" + """Returns the label classes for a classification model; fails otherwise.""" if self.task() != Task.CLASSIFICATION: raise ValueError( "Label classes are only available for classification models. This" From 2b3e21cbdf2892d3c3aa531ee3caa6376f0e00b1 Mon Sep 17 00:00:00 2001 From: Mathieu Guillame-Bert Date: Mon, 27 May 2024 03:04:06 -0700 Subject: [PATCH 08/30] Add `verbose` argument to `train` method which is equivalent but sometime more convenient than`ydf.verbose`. PiperOrigin-RevId: 637584007 --- .../port/python/CHANGELOG.md | 2 + .../python/ydf/learner/generic_learner.py | 16 +- .../port/python/ydf/learner/learner_test.py | 10 +- .../specialized_learners_pre_generated.py | 493 +++++++----------- .../ydf/learner/wrapper/wrapper_generator.cc | 7 +- .../ydf/learner/wrapper/wrapper_test.cc | 7 +- .../port/python/ydf/utils/log.py | 14 +- 7 files changed, 242 insertions(+), 307 deletions(-) diff --git a/yggdrasil_decision_forests/port/python/CHANGELOG.md b/yggdrasil_decision_forests/port/python/CHANGELOG.md index c81bd9b8..42b6785b 100644 --- a/yggdrasil_decision_forests/port/python/CHANGELOG.md +++ b/yggdrasil_decision_forests/port/python/CHANGELOG.md @@ -5,6 +5,8 @@ ### Feature - Add `max_depth` argument to `model.print_tree`. +- Add `verbose` argument to `train` method which is equivalent but sometime + more convenient than`ydf.verbose`. ### Fix diff --git a/yggdrasil_decision_forests/port/python/ydf/learner/generic_learner.py b/yggdrasil_decision_forests/port/python/ydf/learner/generic_learner.py index f624a480..37da48e0 100644 --- a/yggdrasil_decision_forests/port/python/ydf/learner/generic_learner.py +++ b/yggdrasil_decision_forests/port/python/ydf/learner/generic_learner.py @@ -37,6 +37,7 @@ from ydf.model import generic_model from ydf.model import model_lib from ydf.utils import log +from ydf.utils import log from yggdrasil_decision_forests.utils import fold_generator_pb2 from yggdrasil_decision_forests.utils.distribute.implementations.grpc import grpc_pb2 @@ -116,6 +117,7 @@ def train( self, ds: dataset.InputDataset, valid: Optional[dataset.InputDataset] = None, + verbose: Optional[Union[int, bool]] = None, ) -> generic_model.ModelType: """Trains a model on the given dataset. @@ -162,6 +164,10 @@ def train( do not need validation dataset. Some learners, such as GradientBoostedTrees, automatically extract a validation dataset from the training dataset if the validation dataset is not provided. + verbose: Verbose level during training. If None, uses the global verbose + level of `ydf.verbose`. Levels are: 0 of False: No logs, 1 or True: + Print a few logs in a notebook; prints all the logs in a terminal. 2: + Prints all the logs on all surfaces. Returns: A trained model. @@ -187,7 +193,15 @@ def train( "The validation dataset may only be a path if the training dataset is" " a path." ) - return self._train_from_dataset(ds, valid) + + saved_verbose = log.verbose(verbose) if verbose is not None else None + try: + model = self._train_from_dataset(ds, valid) + finally: + if saved_verbose is not None: + log.verbose(saved_verbose) + + return model def __str__(self) -> str: return f"""\ diff --git a/yggdrasil_decision_forests/port/python/ydf/learner/learner_test.py b/yggdrasil_decision_forests/port/python/ydf/learner/learner_test.py index 5ab0e156..8f7816c0 100644 --- a/yggdrasil_decision_forests/port/python/ydf/learner/learner_test.py +++ b/yggdrasil_decision_forests/port/python/ydf/learner/learner_test.py @@ -994,14 +994,20 @@ def test_model_with_na_conditions_numerical(self): class LoggingTest(parameterized.TestCase): - @parameterized.parameters(0, 1, 2) - def test_logging(self, verbose): + @parameterized.parameters(0, 1, 2, False, True) + def test_logging_function(self, verbose): save_verbose = log.verbose(verbose) learner = specialized_learners.RandomForestLearner(label="label") ds = pd.DataFrame({"feature": [0, 1], "label": [0, 1]}) _ = learner.train(ds) log.verbose(save_verbose) + @parameterized.parameters(0, 1, 2, False, True) + def test_logging_arg(self, verbose): + learner = specialized_learners.RandomForestLearner(label="label") + ds = pd.DataFrame({"feature": [0, 1], "label": [0, 1]}) + _ = learner.train(ds, verbose=verbose) + class UtilityTest(LearnerTest): diff --git a/yggdrasil_decision_forests/port/python/ydf/learner/specialized_learners_pre_generated.py b/yggdrasil_decision_forests/port/python/ydf/learner/specialized_learners_pre_generated.py index c3f0964a..0b19e100 100644 --- a/yggdrasil_decision_forests/port/python/ydf/learner/specialized_learners_pre_generated.py +++ b/yggdrasil_decision_forests/port/python/ydf/learner/specialized_learners_pre_generated.py @@ -239,6 +239,18 @@ class RandomForestLearner(generic_learner.GenericLearner): expressed in seconds. Each learning algorithm is free to use this parameter at it sees fit. Enabling maximum training duration makes the model training non-deterministic. Default: -1.0. + mhld_oblique_max_num_attributes: For MHLD oblique splits i.e. + `split_axis=MHLD_OBLIQUE`. Maximum number of attributes in the projection. + Increasing this value increases the training time. Decreasing this value + acts as a regularization. The value should be in [2, + num_numerical_features]. If the value is above the total number of + numerical features, the value is capped automatically. The value 1 is + allowed but results in ordinary (non-oblique) splits. Default: None. + mhld_oblique_sample_attributes: For MHLD oblique splits i.e. + `split_axis=MHLD_OBLIQUE`. If true, applies the attribute sampling + controlled by the "num_candidate_attributes" or + "num_candidate_attributes_ratio" parameters. If false, all the attributes + are tested. Default: None. min_examples: Minimum number of examples in a node. Default: 5. missing_value_policy: Method used to handle missing attribute values. - `GLOBAL_IMPUTATION`: Missing attribute values are imputed, with the mean @@ -293,6 +305,16 @@ class RandomForestLearner(generic_learner.GenericLearner): IN_NODE. - IN_NODE: The features are sorted just before being used in the node. This solution is slow but consumes little amount of memory. . Default: "PRESORT". + sparse_oblique_max_num_projections: For sparse oblique splits i.e. + `split_axis=SPARSE_OBLIQUE`. Maximum number of projections (applied after + the num_projections_exponent). Oblique splits try out + max(p^num_projections_exponent, max_num_projections) random projections + for choosing a split, where p is the number of numerical features. + Increasing "max_num_projections" increases the training time but not the + inference time. In late stage model development, if every bit of accuracy + if important, increase this value. The paper "Sparse Projection Oblique + Random Forests" (Tomita et al, 2020) does not define this hyperparameter. + Default: None. sparse_oblique_normalization: For sparse oblique splits i.e. `split_axis=SPARSE_OBLIQUE`. Normalization applied on the features, before applying the sparse oblique projections. - `NONE`: No normalization. - @@ -302,12 +324,28 @@ class RandomForestLearner(generic_learner.GenericLearner): max-min) estimated on the entire train dataset. Default: None. sparse_oblique_num_projections_exponent: For sparse oblique splits i.e. `split_axis=SPARSE_OBLIQUE`. Controls of the number of random projections - to test at each node as `num_features^num_projections_exponent`. Default: - None. - sparse_oblique_projection_density_factor: For sparse oblique splits i.e. - `split_axis=SPARSE_OBLIQUE`. Controls of the number of random projections - to test at each node as `num_features^num_projections_exponent`. Default: - None. + to test at each node. Increasing this value very likely improves the + quality of the model, drastically increases the training time, and doe not + impact the inference time. Oblique splits try out + max(p^num_projections_exponent, max_num_projections) random projections + for choosing a split, where p is the number of numerical features. + Therefore, increasing this `num_projections_exponent` and possibly + `max_num_projections` may improve model quality, but will also + significantly increase training time. Note that the complexity of + (classic) Random Forests is roughly proportional to + `num_projections_exponent=0.5`, since it considers sqrt(num_features) for + a split. The complexity of (classic) GBDT is roughly proportional to + `num_projections_exponent=1`, since it considers all features for a split. + The paper "Sparse Projection Oblique Random Forests" (Tomita et al, 2020) + recommends values in [1/4, 2]. Default: None. + sparse_oblique_projection_density_factor: Density of the projections as an + exponent of the number of features. Independently for each projection, + each feature has a probability "projection_density_factor / num_features" + to be considered in the projection. The paper "Sparse Projection Oblique + Random Forests" (Tomita et al, 2020) calls this parameter `lambda` and + recommends values in [1, 5]. Increasing this value increases training and + inference time (on average). This value is best tuned for each dataset. + Default: None. sparse_oblique_weights: For sparse oblique splits i.e. `split_axis=SPARSE_OBLIQUE`. Possible values: - `BINARY`: The oblique weights are sampled in {-1,1} (default). - `CONTINUOUS`: The oblique @@ -315,9 +353,11 @@ class RandomForestLearner(generic_learner.GenericLearner): split_axis: What structure of split to consider for numerical features. - `AXIS_ALIGNED`: Axis aligned splits (i.e. one condition at a time). This is the "classical" way to train a tree. Default value. - `SPARSE_OBLIQUE`: - Sparse oblique splits (i.e. splits one a small number of features) from - "Sparse Projection Oblique Random Forests", Tomita et al., 2020. Default: - "AXIS_ALIGNED". + Sparse oblique splits (i.e. random splits one a small number of features) + from "Sparse Projection Oblique Random Forests", Tomita et al., 2020. - + `MHLD_OBLIQUE`: Multi-class Hellinger Linear Discriminant splits from + "Classification Based on Multivariate Contrast Patterns", Canete-Sifuentes + et al., 2029 Default: "AXIS_ALIGNED". uplift_min_examples_in_treatment: For uplift models only. Minimum number of examples per treatment in a node. Default: 5. uplift_split_score: For uplift models only. Splitter score i.e. score @@ -401,6 +441,8 @@ def __init__( max_num_nodes: Optional[int] = None, maximum_model_size_in_memory_in_bytes: Optional[float] = -1.0, maximum_training_duration_seconds: Optional[float] = -1.0, + mhld_oblique_max_num_attributes: Optional[int] = None, + mhld_oblique_sample_attributes: Optional[bool] = None, min_examples: Optional[int] = 5, missing_value_policy: Optional[str] = "GLOBAL_IMPUTATION", num_candidate_attributes: Optional[int] = 0, @@ -411,6 +453,7 @@ def __init__( random_seed: Optional[int] = 123456, sampling_with_replacement: Optional[bool] = True, sorting_strategy: Optional[str] = "PRESORT", + sparse_oblique_max_num_projections: Optional[int] = None, sparse_oblique_normalization: Optional[str] = None, sparse_oblique_num_projections_exponent: Optional[float] = None, sparse_oblique_projection_density_factor: Optional[float] = None, @@ -458,6 +501,8 @@ def __init__( maximum_model_size_in_memory_in_bytes ), "maximum_training_duration_seconds": maximum_training_duration_seconds, + "mhld_oblique_max_num_attributes": mhld_oblique_max_num_attributes, + "mhld_oblique_sample_attributes": mhld_oblique_sample_attributes, "min_examples": min_examples, "missing_value_policy": missing_value_policy, "num_candidate_attributes": num_candidate_attributes, @@ -470,6 +515,9 @@ def __init__( "random_seed": random_seed, "sampling_with_replacement": sampling_with_replacement, "sorting_strategy": sorting_strategy, + "sparse_oblique_max_num_projections": ( + sparse_oblique_max_num_projections + ), "sparse_oblique_normalization": sparse_oblique_normalization, "sparse_oblique_num_projections_exponent": ( sparse_oblique_num_projections_exponent @@ -521,6 +569,7 @@ def train( self, ds: dataset.InputDataset, valid: Optional[dataset.InputDataset] = None, + verbose: Optional[Union[int, bool]] = None, ) -> random_forest_model.RandomForestModel: """Trains a model on the given dataset. @@ -551,11 +600,15 @@ def train( do not need validation dataset. Some learners, such as GradientBoostedTrees, automatically extract a validation dataset from the training dataset if the validation dataset is not provided. + verbose: Verbose level during training. If None, uses the global verbose + level of `ydf.verbose`. Levels are: 0 of False: No logs, 1 or True: + Print a few logs in a notebook; prints all the logs in a terminal. 2: + Prints all the logs on all surfaces. Returns: A trained model. """ - return super().train(ds, valid) + return super().train(ds=ds, valid=valid, verbose=verbose) @classmethod def capabilities(cls) -> abstract_learner_pb2.LearnerCapabilities: @@ -619,271 +672,6 @@ def hyperparameter_templates( } -class HyperparameterOptimizerLearner(generic_learner.GenericLearner): - r"""Hyperparameter Optimizer learning algorithm. - - Usage example: - - ```python - import ydf - import pandas as pd - - dataset = pd.read_csv("project/dataset.csv") - - model = ydf.HyperparameterOptimizerLearner().train(dataset) - - print(model.summary()) - ``` - - Hyperparameters are configured to give reasonable results for typical - datasets. Hyperparameters can also be modified manually (see descriptions) - below or by applying the hyperparameter templates available with - `HyperparameterOptimizerLearner.hyperparameter_templates()` (see this - function's documentation for - details). - - Attributes: - label: Label of the dataset. The label column should not be identified as a - feature in the `features` parameter. - task: Task to solve (e.g. Task.CLASSIFICATION, Task.REGRESSION, - Task.RANKING, Task.CATEGORICAL_UPLIFT, Task.NUMERICAL_UPLIFT). - weights: Name of a feature that identifies the weight of each example. If - weights are not specified, unit weights are assumed. The weight column - should not be identified as a feature in the `features` parameter. - ranking_group: Only for `task=Task.RANKING`. Name of a feature that - identifies queries in a query/document ranking task. The ranking group - should not be identified as a feature in the `features` parameter. - uplift_treatment: Only for `task=Task.CATEGORICAL_UPLIFT` and `task=Task`. - NUMERICAL_UPLIFT. Name of a numerical feature that identifies the - treatment in an uplift problem. The value 0 is reserved for the control - treatment. Currently, only 0/1 binary treatments are supported. - features: If None, all columns are used as features. The semantic of the - features is determined automatically. Otherwise, if - include_all_columns=False (default) only the column listed in `features` - are imported. If include_all_columns=True, all the columns are imported as - features and only the semantic of the columns NOT in `columns` is - determined automatically. If specified, defines the order of the features - - any non-listed features are appended in-order after the specified - features (if include_all_columns=True). The label, weights, uplift - treatment and ranking_group columns should not be specified as features. - include_all_columns: See `features`. - max_vocab_count: Maximum size of the vocabulary of CATEGORICAL and - CATEGORICAL_SET columns stored as strings. If more unique values exist, - only the most frequent values are kept, and the remaining values are - considered as out-of-vocabulary. - min_vocab_frequency: Minimum number of occurrence of a value for CATEGORICAL - and CATEGORICAL_SET columns. Value observed less than - `min_vocab_frequency` are considered as out-of-vocabulary. - discretize_numerical_columns: If true, discretize all the numerical columns - before training. Discretized numerical columns are faster to train with, - but they can have a negative impact on the model quality. Using - `discretize_numerical_columns=True` is equivalent as setting the column - semantic DISCRETIZED_NUMERICAL in the `column` argument. See the - definition of DISCRETIZED_NUMERICAL for more details. - num_discretized_numerical_bins: Number of bins used when disretizing - numerical columns. - max_num_scanned_rows_to_infer_semantic: Number of rows to scan when - inferring the column's semantic if it is not explicitly specified. Only - used when reading from file, in-memory datasets are always read in full. - Setting this to a lower number will speed up dataset reading, but might - result in incorrect column semantics. Set to -1 to scan the entire - dataset. - max_num_scanned_rows_to_compute_statistics: Number of rows to scan when - computing a column's statistics. Only used when reading from file, - in-memory datasets are always read in full. A column's statistics include - the dictionary for categorical features and the mean / min / max for - numerical features. Setting this to a lower number will speed up dataset - reading, but skew statistics in the dataspec, which can hurt model quality - (e.g. if an important category of a categorical feature is considered - OOV). Set to -1 to scan the entire dataset. - data_spec: Dataspec to be used (advanced). If a data spec is given, - `columns`, `include_all_columns`, `max_vocab_count`, - `min_vocab_frequency`, `discretize_numerical_columns` and - `num_discretized_numerical_bins` will be ignored. - maximum_model_size_in_memory_in_bytes: Limit the size of the model when - stored in ram. Different algorithms can enforce this limit differently. - Note that when models are compiled into an inference, the size of the - inference engine is generally much smaller than the original model. - Default: -1.0. - maximum_training_duration_seconds: Maximum training duration of the model - expressed in seconds. Each learning algorithm is free to use this - parameter at it sees fit. Enabling maximum training duration makes the - model training non-deterministic. Default: -1.0. - pure_serving_model: Clear the model from any information that is not - required for model serving. This includes debugging, model interpretation - and other meta-data. The size of the serialized model can be reduced - significatively (50% model size reduction is common). This parameter has - no impact on the quality, serving speed or RAM usage of model serving. - Default: False. - random_seed: Random seed for the training of the model. Learners are - expected to be deterministic by the random seed. Default: 123456. - num_threads: Number of threads used to train the model. Different learning - algorithms use multi-threading differently and with different degree of - efficiency. If `None`, `num_threads` will be automatically set to the - number of processors (up to a maximum of 32; or set to 6 if the number of - processors is not available). Making `num_threads` significantly larger - than the number of processors can slow-down the training speed. The - default value logic might change in the future. - resume_training: If true, the model training resumes from the checkpoint - stored in the `working_dir` directory. If `working_dir` does not contain - any model checkpoint, the training starts from the beginning. Resuming - training is useful in the following situations: (1) The training was - interrupted by the user (e.g. ctrl+c or "stop" button in a notebook) or - rescheduled, or (2) the hyper-parameter of the learner was changed e.g. - increasing the number of trees. - working_dir: Path to a directory available for the learning algorithm to - store intermediate computation results. Depending on the learning - algorithm and parameters, the working_dir might be optional, required, or - ignored. For instance, distributed training algorithm always need a - "working_dir", and the gradient boosted tree and hyper-parameter tuners - will export artefacts to the "working_dir" if provided. - resume_training_snapshot_interval_seconds: Indicative number of seconds in - between snapshots when `resume_training=True`. Might be ignored by some - learners. - tuner: If set, automatically select the best hyperparameters using the - provided tuner. When using distributed training, the tuning is - distributed. - workers: If set, enable distributed training. "workers" is the list of IP - addresses of the workers. A worker is a process running - `ydf.start_worker(port)`. - """ - - def __init__( - self, - label: str, - task: generic_learner.Task = generic_learner.Task.CLASSIFICATION, - weights: Optional[str] = None, - ranking_group: Optional[str] = None, - uplift_treatment: Optional[str] = None, - features: dataspec.ColumnDefs = None, - include_all_columns: bool = False, - max_vocab_count: int = 2000, - min_vocab_frequency: int = 5, - discretize_numerical_columns: bool = False, - num_discretized_numerical_bins: int = 255, - max_num_scanned_rows_to_infer_semantic: int = 10000, - max_num_scanned_rows_to_compute_statistics: int = 10000, - data_spec: Optional[data_spec_pb2.DataSpecification] = None, - maximum_model_size_in_memory_in_bytes: Optional[float] = -1.0, - maximum_training_duration_seconds: Optional[float] = -1.0, - pure_serving_model: Optional[bool] = False, - random_seed: Optional[int] = 123456, - num_threads: Optional[int] = None, - working_dir: Optional[str] = None, - resume_training: bool = False, - resume_training_snapshot_interval_seconds: int = 1800, - tuner: Optional[tuner_lib.AbstractTuner] = None, - workers: Optional[Sequence[str]] = None, - ): - - hyper_parameters = { - "maximum_model_size_in_memory_in_bytes": ( - maximum_model_size_in_memory_in_bytes - ), - "maximum_training_duration_seconds": maximum_training_duration_seconds, - "pure_serving_model": pure_serving_model, - "random_seed": random_seed, - } - - data_spec_args = dataspec.DataSpecInferenceArgs( - columns=dataspec.normalize_column_defs(features), - include_all_columns=include_all_columns, - max_vocab_count=max_vocab_count, - min_vocab_frequency=min_vocab_frequency, - discretize_numerical_columns=discretize_numerical_columns, - num_discretized_numerical_bins=num_discretized_numerical_bins, - max_num_scanned_rows_to_infer_semantic=max_num_scanned_rows_to_infer_semantic, - max_num_scanned_rows_to_compute_statistics=max_num_scanned_rows_to_compute_statistics, - ) - - deployment_config = self._build_deployment_config( - num_threads=num_threads, - resume_training=resume_training, - resume_training_snapshot_interval_seconds=resume_training_snapshot_interval_seconds, - working_dir=working_dir, - workers=workers, - ) - - super().__init__( - learner_name="HYPERPARAMETER_OPTIMIZER", - task=task, - label=label, - weights=weights, - ranking_group=ranking_group, - uplift_treatment=uplift_treatment, - data_spec_args=data_spec_args, - data_spec=data_spec, - hyper_parameters=hyper_parameters, - deployment_config=deployment_config, - tuner=tuner, - ) - - def train( - self, - ds: dataset.InputDataset, - valid: Optional[dataset.InputDataset] = None, - ) -> generic_model.GenericModel: - """Trains a model on the given dataset. - - Options for dataset reading are given on the learner. Consult the - documentation of the learner or ydf.create_vertical_dataset() for additional - information on dataset reading in YDF. - - Usage example: - - ``` - import ydf - import pandas as pd - - train_ds = pd.read_csv(...) - - learner = ydf.HyperparameterOptimizerLearner(label="label") - model = learner.train(train_ds) - print(model.summary()) - ``` - - If training is interrupted (for example, by interrupting the cell execution - in Colab), the model will be returned to the state it was in at the moment - of interruption. - - Args: - ds: Training dataset. - valid: Optional validation dataset. Some learners, such as Random Forest, - do not need validation dataset. Some learners, such as - GradientBoostedTrees, automatically extract a validation dataset from - the training dataset if the validation dataset is not provided. - - Returns: - A trained model. - """ - return super().train(ds, valid) - - @classmethod - def capabilities(cls) -> abstract_learner_pb2.LearnerCapabilities: - return abstract_learner_pb2.LearnerCapabilities( - support_max_training_duration=True, - resume_training=False, - support_validation_dataset=False, - support_partial_cache_dataset_format=False, - support_max_model_size_in_memory=False, - support_monotonic_constraints=False, - ) - - @classmethod - def hyperparameter_templates( - cls, - ) -> Dict[str, hyperparameters.HyperparameterTemplate]: - r"""Hyperparameter templates for this Learner. - - This learner currently does not provide any hyperparameter templates, this - method is provided for consistency with other learners. - - Returns: - Empty dictionary. - """ - return {} - - class GradientBoostedTreesLearner(generic_learner.GenericLearner): r"""Gradient Boosted Trees learning algorithm. @@ -1135,6 +923,18 @@ class GradientBoostedTreesLearner(generic_learner.GenericLearner): expressed in seconds. Each learning algorithm is free to use this parameter at it sees fit. Enabling maximum training duration makes the model training non-deterministic. Default: -1.0. + mhld_oblique_max_num_attributes: For MHLD oblique splits i.e. + `split_axis=MHLD_OBLIQUE`. Maximum number of attributes in the projection. + Increasing this value increases the training time. Decreasing this value + acts as a regularization. The value should be in [2, + num_numerical_features]. If the value is above the total number of + numerical features, the value is capped automatically. The value 1 is + allowed but results in ordinary (non-oblique) splits. Default: None. + mhld_oblique_sample_attributes: For MHLD oblique splits i.e. + `split_axis=MHLD_OBLIQUE`. If true, applies the attribute sampling + controlled by the "num_candidate_attributes" or + "num_candidate_attributes_ratio" parameters. If false, all the attributes + are tested. Default: None. min_examples: Minimum number of examples in a node. Default: 5. missing_value_policy: Method used to handle missing attribute values. - `GLOBAL_IMPUTATION`: Missing attribute values are imputed, with the mean @@ -1195,6 +995,16 @@ class GradientBoostedTreesLearner(generic_learner.GenericLearner): IN_NODE. - IN_NODE: The features are sorted just before being used in the node. This solution is slow but consumes little amount of memory. . Default: "PRESORT". + sparse_oblique_max_num_projections: For sparse oblique splits i.e. + `split_axis=SPARSE_OBLIQUE`. Maximum number of projections (applied after + the num_projections_exponent). Oblique splits try out + max(p^num_projections_exponent, max_num_projections) random projections + for choosing a split, where p is the number of numerical features. + Increasing "max_num_projections" increases the training time but not the + inference time. In late stage model development, if every bit of accuracy + if important, increase this value. The paper "Sparse Projection Oblique + Random Forests" (Tomita et al, 2020) does not define this hyperparameter. + Default: None. sparse_oblique_normalization: For sparse oblique splits i.e. `split_axis=SPARSE_OBLIQUE`. Normalization applied on the features, before applying the sparse oblique projections. - `NONE`: No normalization. - @@ -1204,12 +1014,28 @@ class GradientBoostedTreesLearner(generic_learner.GenericLearner): max-min) estimated on the entire train dataset. Default: None. sparse_oblique_num_projections_exponent: For sparse oblique splits i.e. `split_axis=SPARSE_OBLIQUE`. Controls of the number of random projections - to test at each node as `num_features^num_projections_exponent`. Default: - None. - sparse_oblique_projection_density_factor: For sparse oblique splits i.e. - `split_axis=SPARSE_OBLIQUE`. Controls of the number of random projections - to test at each node as `num_features^num_projections_exponent`. Default: - None. + to test at each node. Increasing this value very likely improves the + quality of the model, drastically increases the training time, and doe not + impact the inference time. Oblique splits try out + max(p^num_projections_exponent, max_num_projections) random projections + for choosing a split, where p is the number of numerical features. + Therefore, increasing this `num_projections_exponent` and possibly + `max_num_projections` may improve model quality, but will also + significantly increase training time. Note that the complexity of + (classic) Random Forests is roughly proportional to + `num_projections_exponent=0.5`, since it considers sqrt(num_features) for + a split. The complexity of (classic) GBDT is roughly proportional to + `num_projections_exponent=1`, since it considers all features for a split. + The paper "Sparse Projection Oblique Random Forests" (Tomita et al, 2020) + recommends values in [1/4, 2]. Default: None. + sparse_oblique_projection_density_factor: Density of the projections as an + exponent of the number of features. Independently for each projection, + each feature has a probability "projection_density_factor / num_features" + to be considered in the projection. The paper "Sparse Projection Oblique + Random Forests" (Tomita et al, 2020) calls this parameter `lambda` and + recommends values in [1, 5]. Increasing this value increases training and + inference time (on average). This value is best tuned for each dataset. + Default: None. sparse_oblique_weights: For sparse oblique splits i.e. `split_axis=SPARSE_OBLIQUE`. Possible values: - `BINARY`: The oblique weights are sampled in {-1,1} (default). - `CONTINUOUS`: The oblique @@ -1217,9 +1043,11 @@ class GradientBoostedTreesLearner(generic_learner.GenericLearner): split_axis: What structure of split to consider for numerical features. - `AXIS_ALIGNED`: Axis aligned splits (i.e. one condition at a time). This is the "classical" way to train a tree. Default value. - `SPARSE_OBLIQUE`: - Sparse oblique splits (i.e. splits one a small number of features) from - "Sparse Projection Oblique Random Forests", Tomita et al., 2020. Default: - "AXIS_ALIGNED". + Sparse oblique splits (i.e. random splits one a small number of features) + from "Sparse Projection Oblique Random Forests", Tomita et al., 2020. - + `MHLD_OBLIQUE`: Multi-class Hellinger Linear Discriminant splits from + "Classification Based on Multivariate Contrast Patterns", Canete-Sifuentes + et al., 2029 Default: "AXIS_ALIGNED". subsample: Ratio of the dataset (sampling without replacement) used to train individual trees for the random sampling method. If \\"subsample\\" is set and if \\"sampling_method\\" is NOT set or set to \\"NONE\\", then @@ -1330,6 +1158,8 @@ def __init__( max_num_nodes: Optional[int] = None, maximum_model_size_in_memory_in_bytes: Optional[float] = -1.0, maximum_training_duration_seconds: Optional[float] = -1.0, + mhld_oblique_max_num_attributes: Optional[int] = None, + mhld_oblique_sample_attributes: Optional[bool] = None, min_examples: Optional[int] = 5, missing_value_policy: Optional[str] = "GLOBAL_IMPUTATION", num_candidate_attributes: Optional[int] = -1, @@ -1341,6 +1171,7 @@ def __init__( selective_gradient_boosting_ratio: Optional[float] = 0.01, shrinkage: Optional[float] = 0.1, sorting_strategy: Optional[str] = "PRESORT", + sparse_oblique_max_num_projections: Optional[int] = None, sparse_oblique_normalization: Optional[str] = None, sparse_oblique_num_projections_exponent: Optional[float] = None, sparse_oblique_projection_density_factor: Optional[float] = None, @@ -1407,6 +1238,8 @@ def __init__( maximum_model_size_in_memory_in_bytes ), "maximum_training_duration_seconds": maximum_training_duration_seconds, + "mhld_oblique_max_num_attributes": mhld_oblique_max_num_attributes, + "mhld_oblique_sample_attributes": mhld_oblique_sample_attributes, "min_examples": min_examples, "missing_value_policy": missing_value_policy, "num_candidate_attributes": num_candidate_attributes, @@ -1418,6 +1251,9 @@ def __init__( "selective_gradient_boosting_ratio": selective_gradient_boosting_ratio, "shrinkage": shrinkage, "sorting_strategy": sorting_strategy, + "sparse_oblique_max_num_projections": ( + sparse_oblique_max_num_projections + ), "sparse_oblique_normalization": sparse_oblique_normalization, "sparse_oblique_num_projections_exponent": ( sparse_oblique_num_projections_exponent @@ -1472,6 +1308,7 @@ def train( self, ds: dataset.InputDataset, valid: Optional[dataset.InputDataset] = None, + verbose: Optional[Union[int, bool]] = None, ) -> gradient_boosted_trees_model.GradientBoostedTreesModel: """Trains a model on the given dataset. @@ -1502,11 +1339,15 @@ def train( do not need validation dataset. Some learners, such as GradientBoostedTrees, automatically extract a validation dataset from the training dataset if the validation dataset is not provided. + verbose: Verbose level during training. If None, uses the global verbose + level of `ydf.verbose`. Levels are: 0 of False: No logs, 1 or True: + Print a few logs in a notebook; prints all the logs in a terminal. 2: + Prints all the logs on all surfaces. Returns: A trained model. """ - return super().train(ds, valid) + return super().train(ds=ds, valid=valid, verbose=verbose) @classmethod def capabilities(cls) -> abstract_learner_pb2.LearnerCapabilities: @@ -1845,6 +1686,7 @@ def train( self, ds: dataset.InputDataset, valid: Optional[dataset.InputDataset] = None, + verbose: Optional[Union[int, bool]] = None, ) -> gradient_boosted_trees_model.GradientBoostedTreesModel: """Trains a model on the given dataset. @@ -1875,11 +1717,15 @@ def train( do not need validation dataset. Some learners, such as GradientBoostedTrees, automatically extract a validation dataset from the training dataset if the validation dataset is not provided. + verbose: Verbose level during training. If None, uses the global verbose + level of `ydf.verbose`. Levels are: 0 of False: No logs, 1 or True: + Print a few logs in a notebook; prints all the logs in a terminal. 2: + Prints all the logs on all surfaces. Returns: A trained model. """ - return super().train(ds, valid) + return super().train(ds=ds, valid=valid, verbose=verbose) @classmethod def capabilities(cls) -> abstract_learner_pb2.LearnerCapabilities: @@ -2074,6 +1920,18 @@ class CartLearner(generic_learner.GenericLearner): expressed in seconds. Each learning algorithm is free to use this parameter at it sees fit. Enabling maximum training duration makes the model training non-deterministic. Default: -1.0. + mhld_oblique_max_num_attributes: For MHLD oblique splits i.e. + `split_axis=MHLD_OBLIQUE`. Maximum number of attributes in the projection. + Increasing this value increases the training time. Decreasing this value + acts as a regularization. The value should be in [2, + num_numerical_features]. If the value is above the total number of + numerical features, the value is capped automatically. The value 1 is + allowed but results in ordinary (non-oblique) splits. Default: None. + mhld_oblique_sample_attributes: For MHLD oblique splits i.e. + `split_axis=MHLD_OBLIQUE`. If true, applies the attribute sampling + controlled by the "num_candidate_attributes" or + "num_candidate_attributes_ratio" parameters. If false, all the attributes + are tested. Default: None. min_examples: Minimum number of examples in a node. Default: 5. missing_value_policy: Method used to handle missing attribute values. - `GLOBAL_IMPUTATION`: Missing attribute values are imputed, with the mean @@ -2114,6 +1972,16 @@ class CartLearner(generic_learner.GenericLearner): IN_NODE. - IN_NODE: The features are sorted just before being used in the node. This solution is slow but consumes little amount of memory. . Default: "PRESORT". + sparse_oblique_max_num_projections: For sparse oblique splits i.e. + `split_axis=SPARSE_OBLIQUE`. Maximum number of projections (applied after + the num_projections_exponent). Oblique splits try out + max(p^num_projections_exponent, max_num_projections) random projections + for choosing a split, where p is the number of numerical features. + Increasing "max_num_projections" increases the training time but not the + inference time. In late stage model development, if every bit of accuracy + if important, increase this value. The paper "Sparse Projection Oblique + Random Forests" (Tomita et al, 2020) does not define this hyperparameter. + Default: None. sparse_oblique_normalization: For sparse oblique splits i.e. `split_axis=SPARSE_OBLIQUE`. Normalization applied on the features, before applying the sparse oblique projections. - `NONE`: No normalization. - @@ -2123,12 +1991,28 @@ class CartLearner(generic_learner.GenericLearner): max-min) estimated on the entire train dataset. Default: None. sparse_oblique_num_projections_exponent: For sparse oblique splits i.e. `split_axis=SPARSE_OBLIQUE`. Controls of the number of random projections - to test at each node as `num_features^num_projections_exponent`. Default: - None. - sparse_oblique_projection_density_factor: For sparse oblique splits i.e. - `split_axis=SPARSE_OBLIQUE`. Controls of the number of random projections - to test at each node as `num_features^num_projections_exponent`. Default: - None. + to test at each node. Increasing this value very likely improves the + quality of the model, drastically increases the training time, and doe not + impact the inference time. Oblique splits try out + max(p^num_projections_exponent, max_num_projections) random projections + for choosing a split, where p is the number of numerical features. + Therefore, increasing this `num_projections_exponent` and possibly + `max_num_projections` may improve model quality, but will also + significantly increase training time. Note that the complexity of + (classic) Random Forests is roughly proportional to + `num_projections_exponent=0.5`, since it considers sqrt(num_features) for + a split. The complexity of (classic) GBDT is roughly proportional to + `num_projections_exponent=1`, since it considers all features for a split. + The paper "Sparse Projection Oblique Random Forests" (Tomita et al, 2020) + recommends values in [1/4, 2]. Default: None. + sparse_oblique_projection_density_factor: Density of the projections as an + exponent of the number of features. Independently for each projection, + each feature has a probability "projection_density_factor / num_features" + to be considered in the projection. The paper "Sparse Projection Oblique + Random Forests" (Tomita et al, 2020) calls this parameter `lambda` and + recommends values in [1, 5]. Increasing this value increases training and + inference time (on average). This value is best tuned for each dataset. + Default: None. sparse_oblique_weights: For sparse oblique splits i.e. `split_axis=SPARSE_OBLIQUE`. Possible values: - `BINARY`: The oblique weights are sampled in {-1,1} (default). - `CONTINUOUS`: The oblique @@ -2136,9 +2020,11 @@ class CartLearner(generic_learner.GenericLearner): split_axis: What structure of split to consider for numerical features. - `AXIS_ALIGNED`: Axis aligned splits (i.e. one condition at a time). This is the "classical" way to train a tree. Default value. - `SPARSE_OBLIQUE`: - Sparse oblique splits (i.e. splits one a small number of features) from - "Sparse Projection Oblique Random Forests", Tomita et al., 2020. Default: - "AXIS_ALIGNED". + Sparse oblique splits (i.e. random splits one a small number of features) + from "Sparse Projection Oblique Random Forests", Tomita et al., 2020. - + `MHLD_OBLIQUE`: Multi-class Hellinger Linear Discriminant splits from + "Classification Based on Multivariate Contrast Patterns", Canete-Sifuentes + et al., 2029 Default: "AXIS_ALIGNED". uplift_min_examples_in_treatment: For uplift models only. Minimum number of examples per treatment in a node. Default: 5. uplift_split_score: For uplift models only. Splitter score i.e. score @@ -2214,6 +2100,8 @@ def __init__( max_num_nodes: Optional[int] = None, maximum_model_size_in_memory_in_bytes: Optional[float] = -1.0, maximum_training_duration_seconds: Optional[float] = -1.0, + mhld_oblique_max_num_attributes: Optional[int] = None, + mhld_oblique_sample_attributes: Optional[bool] = None, min_examples: Optional[int] = 5, missing_value_policy: Optional[str] = "GLOBAL_IMPUTATION", num_candidate_attributes: Optional[int] = 0, @@ -2221,6 +2109,7 @@ def __init__( pure_serving_model: Optional[bool] = False, random_seed: Optional[int] = 123456, sorting_strategy: Optional[str] = "PRESORT", + sparse_oblique_max_num_projections: Optional[int] = None, sparse_oblique_normalization: Optional[str] = None, sparse_oblique_num_projections_exponent: Optional[float] = None, sparse_oblique_projection_density_factor: Optional[float] = None, @@ -2261,6 +2150,8 @@ def __init__( maximum_model_size_in_memory_in_bytes ), "maximum_training_duration_seconds": maximum_training_duration_seconds, + "mhld_oblique_max_num_attributes": mhld_oblique_max_num_attributes, + "mhld_oblique_sample_attributes": mhld_oblique_sample_attributes, "min_examples": min_examples, "missing_value_policy": missing_value_policy, "num_candidate_attributes": num_candidate_attributes, @@ -2268,6 +2159,9 @@ def __init__( "pure_serving_model": pure_serving_model, "random_seed": random_seed, "sorting_strategy": sorting_strategy, + "sparse_oblique_max_num_projections": ( + sparse_oblique_max_num_projections + ), "sparse_oblique_normalization": sparse_oblique_normalization, "sparse_oblique_num_projections_exponent": ( sparse_oblique_num_projections_exponent @@ -2319,6 +2213,7 @@ def train( self, ds: dataset.InputDataset, valid: Optional[dataset.InputDataset] = None, + verbose: Optional[Union[int, bool]] = None, ) -> random_forest_model.RandomForestModel: """Trains a model on the given dataset. @@ -2349,18 +2244,22 @@ def train( do not need validation dataset. Some learners, such as GradientBoostedTrees, automatically extract a validation dataset from the training dataset if the validation dataset is not provided. + verbose: Verbose level during training. If None, uses the global verbose + level of `ydf.verbose`. Levels are: 0 of False: No logs, 1 or True: + Print a few logs in a notebook; prints all the logs in a terminal. 2: + Prints all the logs on all surfaces. Returns: A trained model. """ - return super().train(ds, valid) + return super().train(ds=ds, valid=valid, verbose=verbose) @classmethod def capabilities(cls) -> abstract_learner_pb2.LearnerCapabilities: return abstract_learner_pb2.LearnerCapabilities( support_max_training_duration=True, resume_training=False, - support_validation_dataset=False, + support_validation_dataset=True, support_partial_cache_dataset_format=False, support_max_model_size_in_memory=False, support_monotonic_constraints=False, diff --git a/yggdrasil_decision_forests/port/python/ydf/learner/wrapper/wrapper_generator.cc b/yggdrasil_decision_forests/port/python/ydf/learner/wrapper/wrapper_generator.cc index e748af20..1d4078c4 100644 --- a/yggdrasil_decision_forests/port/python/ydf/learner/wrapper/wrapper_generator.cc +++ b/yggdrasil_decision_forests/port/python/ydf/learner/wrapper/wrapper_generator.cc @@ -618,6 +618,7 @@ class $0(generic_learner.GenericLearner): self, ds: dataset.InputDataset, valid: Optional[dataset.InputDataset] = None, + verbose: Optional[Union[int, bool]] = None, ) -> $7: """Trains a model on the given dataset. @@ -648,11 +649,15 @@ class $0(generic_learner.GenericLearner): do not need validation dataset. Some learners, such as GradientBoostedTrees, automatically extract a validation dataset from the training dataset if the validation dataset is not provided. + verbose: Verbose level during training. If None, uses the global verbose + level of `ydf.verbose`. Levels are: 0 of False: No logs, 1 or True: + Print a few logs in a notebook; prints all the logs in a terminal. 2: + Prints all the logs on all surfaces. Returns: A trained model. """ - return super().train(ds, valid) + return super().train(ds=ds, valid=valid, verbose=verbose) )", /*$0*/ class_name, /*$1*/ learner_key, /*$2*/ fields_documentation, diff --git a/yggdrasil_decision_forests/port/python/ydf/learner/wrapper/wrapper_test.cc b/yggdrasil_decision_forests/port/python/ydf/learner/wrapper/wrapper_test.cc index cc58d8f4..687883d6 100644 --- a/yggdrasil_decision_forests/port/python/ydf/learner/wrapper/wrapper_test.cc +++ b/yggdrasil_decision_forests/port/python/ydf/learner/wrapper/wrapper_test.cc @@ -271,6 +271,7 @@ class FakeAlgorithmLearner(generic_learner.GenericLearner): self, ds: dataset.InputDataset, valid: Optional[dataset.InputDataset] = None, + verbose: Optional[Union[int, bool]] = None, ) -> generic_model.GenericModel: """Trains a model on the given dataset. @@ -301,11 +302,15 @@ class FakeAlgorithmLearner(generic_learner.GenericLearner): do not need validation dataset. Some learners, such as GradientBoostedTrees, automatically extract a validation dataset from the training dataset if the validation dataset is not provided. + verbose: Verbose level during training. If None, uses the global verbose + level of `ydf.verbose`. Levels are: 0 of False: No logs, 1 or True: + Print a few logs in a notebook; prints all the logs in a terminal. 2: + Prints all the logs on all surfaces. Returns: A trained model. """ - return super().train(ds, valid) + return super().train(ds=ds, valid=valid, verbose=verbose) @classmethod def capabilities(cls) -> abstract_learner_pb2.LearnerCapabilities: diff --git a/yggdrasil_decision_forests/port/python/ydf/utils/log.py b/yggdrasil_decision_forests/port/python/ydf/utils/log.py index 1b007953..a30f3c93 100644 --- a/yggdrasil_decision_forests/port/python/ydf/utils/log.py +++ b/yggdrasil_decision_forests/port/python/ydf/utils/log.py @@ -32,7 +32,7 @@ import enum import io import sys -from typing import Any, Optional, Set +from typing import Any, Optional, Set, Union from ydf.cc import ydf @@ -59,13 +59,13 @@ class WarningMessage(enum.Enum): _ALREADY_DISPLAYED_WARNING_IDS: Set[WarningMessage] = set() -def verbose(level: int = 2) -> int: +def verbose(level: Union[int, bool] = 2) -> int: """Sets the verbose level of YDF. The verbose levels are: - 0: Print no logs. - 1: Print a few logs in a colab or notebook cell. Print all the logs in the - console. This is the default verbose level. + 0 or False: Print no logs. + 1 or True: Print a few logs in a colab or notebook cell. Print all the logs + in the console. This is the default verbose level. 2: Prints all the logs on all surfaces. Usage example: @@ -85,6 +85,10 @@ def verbose(level: int = 2) -> int: Returns: The previous verbose level. """ + + if isinstance(level, bool): + level = 1 if level else 0 + global _VERBOSE_LEVEL old = _VERBOSE_LEVEL _VERBOSE_LEVEL = level From f7499ec0190b9534a304d57837d5030e41ae78bf Mon Sep 17 00:00:00 2001 From: Mathieu Guillame-Bert Date: Thu, 30 May 2024 06:13:49 -0700 Subject: [PATCH 09/30] Add ability to update YDF model with JAX parameters. PiperOrigin-RevId: 638627996 --- .../port/python/ydf/cc/ydf.pyi | 1 + .../port/python/ydf/model/export_jax.py | 59 ++- .../port/python/ydf/model/generic_model.py | 48 ++- .../model/gradient_boosted_trees_model/BUILD | 1 + .../gradient_boosted_trees_model.py | 9 +- .../gradient_boosted_trees_model_test.py | 31 +- .../gradient_boosted_trees_wrapper.cc | 10 + .../gradient_boosted_trees_wrapper.h | 2 + .../port/python/ydf/model/jax_model_test.py | 335 ++++++++++++------ .../port/python/ydf/model/model.cc | 2 + 10 files changed, 376 insertions(+), 122 deletions(-) diff --git a/yggdrasil_decision_forests/port/python/ydf/cc/ydf.pyi b/yggdrasil_decision_forests/port/python/ydf/cc/ydf.pyi index dcf16fbb..b2a2658c 100644 --- a/yggdrasil_decision_forests/port/python/ydf/cc/ydf.pyi +++ b/yggdrasil_decision_forests/port/python/ydf/cc/ydf.pyi @@ -200,6 +200,7 @@ class GradientBoostedTreesCCModel(DecisionForestCCModel): def validation_loss(self) -> float: ... def validation_evaluation(self) -> metric_pb2.EvaluationResults: ... def initial_predictions(self) -> npt.NDArray[float]: ... + def set_initial_predictions(self, values: npt.NDArray[float]): ... def num_trees_per_iter(self) -> int: ... def loss(self) -> gradient_boosted_trees_pb2.Loss: ... diff --git a/yggdrasil_decision_forests/port/python/ydf/model/export_jax.py b/yggdrasil_decision_forests/port/python/ydf/model/export_jax.py index cfb1822d..05a31211 100644 --- a/yggdrasil_decision_forests/port/python/ydf/model/export_jax.py +++ b/yggdrasil_decision_forests/port/python/ydf/model/export_jax.py @@ -351,9 +351,9 @@ def stack(features: List[InternalFeatureItem], dtype): ) -@dataclasses.dataclass(frozen=True) +@dataclasses.dataclass class BeginNodeIdx: - """Index of the first leaf and non leaf node in a tree.""" + """Index of leaf and non leaf node in a tree.""" leaf_node: int non_leaf_node: int @@ -1052,3 +1052,58 @@ def sum_iter(i, a): new_node_offset_if_non_leaf, # Non-leaf node_offset, # Leaf ) + + +def update_with_jax_params( + model: generic_model.GenericModel, + params: Dict[str, Any], +): + """Updates the model with JAX params as created by `to_jax_function`. + + Args: + model: A YDF model. + params: See "update_with_jax_params" in generic_model.py. + """ + + if not isinstance(model, decision_forest_model.DecisionForestModel): + raise ValueError("The model is not a decision forest") + + if isinstance( + model, gradient_boosted_trees_model.GradientBoostedTreesModel + ) and (initial_predictions := params.get(_PARAM_INITIAL_PREDICTIONS)): + model.set_initial_predictions(initial_predictions) + + leaf_values = params.get(_PARAM_LEAF_VALUES) + + # Only scan the trees if the user updates node parameters. + # Note: Add other node parameters here. + if leaf_values is not None: + cur_node = BeginNodeIdx(leaf_node=0, non_leaf_node=0) + + for tree_idx, tree in enumerate(model.iter_trees()): + _update_node_with_jax_param(tree.root, cur_node, leaf_values) + model.set_tree(tree_idx, tree) + + +def _update_node_with_jax_param( + node: tree_lib.AbstractNode, + cur_node: BeginNodeIdx, + leaf_values: Optional[jax.Array], +): + """Updates recursively the node values.""" + + if node.is_leaf: + assert isinstance(node, tree_lib.Leaf) + # TODO: Add support for other types of leaf nodes. + if not isinstance(node.value, tree_lib.RegressionValue): + raise ValueError( + "The YDF Jax exporter does not support this leaf value:" + f" {node.value!r}" + ) + node.value.value = leaf_values[cur_node.leaf_node] + cur_node.leaf_node += 1 + else: + cur_node.non_leaf_node += 1 + assert isinstance(node, tree_lib.NonLeaf) + _update_node_with_jax_param(node.neg_child, cur_node, leaf_values) + _update_node_with_jax_param(node.pos_child, cur_node, leaf_values) diff --git a/yggdrasil_decision_forests/port/python/ydf/model/generic_model.py b/yggdrasil_decision_forests/port/python/ydf/model/generic_model.py index 7eee2933..e6bad13f 100644 --- a/yggdrasil_decision_forests/port/python/ydf/model/generic_model.py +++ b/yggdrasil_decision_forests/port/python/ydf/model/generic_model.py @@ -836,16 +836,16 @@ def to_jax_function( # pytype: disable=name-error import jax.numpy as jnp # Train a model. - model = ydf.RandomForestLearner(label="l").train({ + model = ydf.GradientBoostedTreesLearner(label="l").train({ "f1": np.random.random(size=100), "f2": np.random.random(size=100), "l": np.random.randint(2, size=100), }) # Convert model to a JAX function. - jax_model = model.o_jax_function() + jax_model = model.to_jax_function() - # Make predictions with the TF module. + # Make predictions with the JAX function. jax_predictions = jax_model.predict({ "f1": jnp.array([0, 0.5, 1]), "f2": jnp.array([1, 0, 0.5]), @@ -875,6 +875,48 @@ def to_jax_function( # pytype: disable=name-error leaves_as_params=leaves_as_params, ) + def update_with_jax_params(self, params: Dict[str, Any]): + """Updates the model with JAX params as created by `to_jax_function`. + + Usage example: + + ```python + import ydf + import numpy as np + import jax.numpy as jnp + + # Train a model with YDF + dataset = { + "f1": np.random.random(size=100), + "f2": np.random.random(size=100), + "l": np.random.randint(2, size=100), + } + model = ydf.GradientBoostedTreesLearner(label="l").train(dataset) + + # Convert model to a JAX function with leave values as parameters. + jax_model = model.to_jax_function( + leaves_as_params=True, + apply_activation=True) + # Note: The learnable model parameter are in `jax_model.params`. + + # Finetune the model parameters with your own logic. + jax_model.params = fine_tune_model(jax_model.params, ...) + + # Update the YDF model with the finetuned parameters + model.update_with_jax_params(jax_model.params) + + # Make predictions with the finetuned YDF model + predictions = model.predict(dataset) + + # Save the YDF model + model.save("/tmp/my_ydf_model") + ``` + + Args: + params: Learnable parameter of the model generated with `to_jax_function`. + """ + _get_export_jax().update_with_jax_params(model=self, params=params) + def hyperparameter_optimizer_logs( self, ) -> Optional[optimizer_logs.OptimizerLogs]: diff --git a/yggdrasil_decision_forests/port/python/ydf/model/gradient_boosted_trees_model/BUILD b/yggdrasil_decision_forests/port/python/ydf/model/gradient_boosted_trees_model/BUILD index 9eacedb4..3d07e6f6 100644 --- a/yggdrasil_decision_forests/port/python/ydf/model/gradient_boosted_trees_model/BUILD +++ b/yggdrasil_decision_forests/port/python/ydf/model/gradient_boosted_trees_model/BUILD @@ -53,6 +53,7 @@ py_test( ":gradient_boosted_trees_model", # absl/logging dep, # absl/testing:absltest dep, + # absl/testing:parameterized dep, # numpy dep, # pandas dep, "//ydf/dataset:dataspec", diff --git a/yggdrasil_decision_forests/port/python/ydf/model/gradient_boosted_trees_model/gradient_boosted_trees_model.py b/yggdrasil_decision_forests/port/python/ydf/model/gradient_boosted_trees_model/gradient_boosted_trees_model.py index 8812f8a1..f375806b 100644 --- a/yggdrasil_decision_forests/port/python/ydf/model/gradient_boosted_trees_model/gradient_boosted_trees_model.py +++ b/yggdrasil_decision_forests/port/python/ydf/model/gradient_boosted_trees_model/gradient_boosted_trees_model.py @@ -15,8 +15,9 @@ """Definitions for Gradient Boosted Trees models.""" import math -from typing import Optional +from typing import Optional, Sequence +import numpy as np import numpy.typing as npt from yggdrasil_decision_forests.metric import metric_pb2 @@ -41,6 +42,12 @@ def initial_predictions(self) -> npt.NDArray[float]: """Returns the model's initial predictions (i.e. the model bias).""" return self._model.initial_predictions() + def set_initial_predictions(self, initial_predictions: Sequence[float]): + """Sets the model's initial predictions (i.e. the model bias).""" + return self._model.set_initial_predictions( + np.asarray(initial_predictions, np.float32) + ) + def validation_evaluation(self) -> Optional[metric.Evaluation]: """Returns the validation evaluation of the model, if available. diff --git a/yggdrasil_decision_forests/port/python/ydf/model/gradient_boosted_trees_model/gradient_boosted_trees_model_test.py b/yggdrasil_decision_forests/port/python/ydf/model/gradient_boosted_trees_model/gradient_boosted_trees_model_test.py index 2c75fdad..c87eeefc 100644 --- a/yggdrasil_decision_forests/port/python/ydf/model/gradient_boosted_trees_model/gradient_boosted_trees_model_test.py +++ b/yggdrasil_decision_forests/port/python/ydf/model/gradient_boosted_trees_model/gradient_boosted_trees_model_test.py @@ -19,6 +19,7 @@ from absl import logging from absl.testing import absltest +from absl.testing import parameterized import numpy as np import numpy.testing as npt import pandas as pd @@ -42,18 +43,19 @@ Tree = tree_lib.Tree -class GradientBoostedTreesTest(absltest.TestCase): +def load_model( + name: str, + directory: str = "model", +) -> gradient_boosted_trees_model.GradientBoostedTreesModel: + path = os.path.join(test_utils.ydf_test_data_path(), directory, name) + return model_lib.load_model(path) + + +class GradientBoostedTreesTest(parameterized.TestCase): def setUp(self): super().setUp() - def load_model( - name: str, - directory: str = "model", - ) -> gradient_boosted_trees_model.GradientBoostedTreesModel: - path = os.path.join(test_utils.ydf_test_data_path(), directory, name) - return model_lib.load_model(path) - # This model is a classification model for pure serving. self.adult_binary_class_gbdt = load_model("adult_binary_class_gbdt") @@ -140,6 +142,19 @@ def test_initial_predictions(self): initial_predictions = self.adult_binary_class_gbdt.initial_predictions() np.testing.assert_allclose(initial_predictions, [-1.1630996]) + @parameterized.parameters( + "adult_binary_class_gbdt", + "iris_multi_class_gbdt", + "abalone_regression_gbdt", + ) + def test_set_initial_predictions(self, model_name): + model = load_model(model_name) + initial_predictions = model.initial_predictions() + model.set_initial_predictions(initial_predictions * 2.0) + np.testing.assert_allclose( + initial_predictions * 2, model.initial_predictions() + ) + def test_validation_evaluation_empty(self): dataset = { "x1": np.array([0, 0, 0, 1, 1, 1]), diff --git a/yggdrasil_decision_forests/port/python/ydf/model/gradient_boosted_trees_model/gradient_boosted_trees_wrapper.cc b/yggdrasil_decision_forests/port/python/ydf/model/gradient_boosted_trees_model/gradient_boosted_trees_wrapper.cc index c10a4e23..d5091f00 100644 --- a/yggdrasil_decision_forests/port/python/ydf/model/gradient_boosted_trees_model/gradient_boosted_trees_wrapper.cc +++ b/yggdrasil_decision_forests/port/python/ydf/model/gradient_boosted_trees_model/gradient_boosted_trees_wrapper.cc @@ -20,6 +20,7 @@ #include #include #include +#include #include "absl/status/status.h" #include "absl/status/statusor.h" @@ -56,4 +57,13 @@ py::array_t GradientBoostedTreesCCModel::initial_predictions() const { return initial_predictions; } +void GradientBoostedTreesCCModel::set_initial_predictions( + const py::array_t& values) { + std::vector std_values(values.size(), 0.0f); + for (int i = 0; i < values.size(); i++) { + std_values[i] = values.at(i); + } + gbt_model_->set_initial_predictions(std::move(std_values)); +} + } // namespace yggdrasil_decision_forests::port::python diff --git a/yggdrasil_decision_forests/port/python/ydf/model/gradient_boosted_trees_model/gradient_boosted_trees_wrapper.h b/yggdrasil_decision_forests/port/python/ydf/model/gradient_boosted_trees_model/gradient_boosted_trees_wrapper.h index 5edc4fb4..e7defd48 100644 --- a/yggdrasil_decision_forests/port/python/ydf/model/gradient_boosted_trees_model/gradient_boosted_trees_wrapper.h +++ b/yggdrasil_decision_forests/port/python/ydf/model/gradient_boosted_trees_model/gradient_boosted_trees_wrapper.h @@ -63,6 +63,8 @@ class GradientBoostedTreesCCModel : public DecisionForestCCModel { py::array_t initial_predictions() const; + void set_initial_predictions(const py::array_t& values); + ::yggdrasil_decision_forests::model::gradient_boosted_trees::proto::Loss loss() const { return gbt_model_->loss(); diff --git a/yggdrasil_decision_forests/port/python/ydf/model/jax_model_test.py b/yggdrasil_decision_forests/port/python/ydf/model/jax_model_test.py index 9939e326..b42c5b0e 100644 --- a/yggdrasil_decision_forests/port/python/ydf/model/jax_model_test.py +++ b/yggdrasil_decision_forests/port/python/ydf/model/jax_model_test.py @@ -188,6 +188,116 @@ def plot_ellipse_predictions( fig.savefig(path) +def check_toy_model(test_self, model): + test_self.assertEqual( + model.get_tree(0).pretty(model.data_spec()), + """\ +'f1' >= 2 [score=0 missing=False] + ├─(pos)─ 'c1' in ['x', 'y'] [score=0 missing=False] + │ ├─(pos)─ 'c1' in ['x'] [score=0 missing=False] + │ │ ├─(pos)─ value=1 + │ │ └─(neg)─ value=2 + │ └─(neg)─ value=3 + └─(neg)─ 'f1' >= 1 [score=0 missing=False] + ├─(pos)─ value=4 + └─(neg)─ value=5 +""", + ) + + test_self.assertEqual( + model.get_tree(1).pretty(model.data_spec()), + """\ +'f2' >= 1.5 [score=0 missing=False] + ├─(pos)─ value=6 + └─(neg)─ value=7 +""", + ) + + +def create_toy_model(test_self): + columns = ["f1", "c1", "f2", "label_regress"] + model = specialized_learners.GradientBoostedTreesLearner( + label="label_regress", + task=generic_learner.Task.REGRESSION, + num_trees=1, + ).train(create_dataset(columns)) + model.set_initial_predictions([0]) + model.remove_tree(0) + + # pylint: disable=invalid-name + RegressionValue = tree_lib.RegressionValue + Leaf = tree_lib.Leaf + NonLeaf = tree_lib.NonLeaf + NumericalHigherThanCondition = tree_lib.NumericalHigherThanCondition + CategoricalIsInCondition = tree_lib.CategoricalIsInCondition + Tree = tree_lib.Tree + # pylint: enable=invalid-name + + model.add_tree( + Tree( + root=NonLeaf( + condition=NumericalHigherThanCondition( + missing=False, score=0.0, attribute=1, threshold=2.0 + ), + pos_child=NonLeaf( + condition=CategoricalIsInCondition( + missing=False, + score=0.0, + attribute=2, + mask=[1, 2], + ), + pos_child=NonLeaf( + condition=CategoricalIsInCondition( + missing=False, + score=0.0, + attribute=2, + mask=[1], + ), + pos_child=Leaf( + value=RegressionValue(num_examples=0.0, value=1.0) + ), + neg_child=Leaf( + value=RegressionValue(num_examples=0.0, value=2.0) + ), + ), + neg_child=Leaf( + value=RegressionValue(num_examples=0.0, value=3.0) + ), + ), + neg_child=NonLeaf( + condition=NumericalHigherThanCondition( + missing=False, score=0.0, attribute=1, threshold=1.0 + ), + pos_child=Leaf( + value=RegressionValue(num_examples=0.0, value=4.0) + ), + neg_child=Leaf( + value=RegressionValue(num_examples=0.0, value=5.0) + ), + ), + ) + ) + ) + + model.add_tree( + Tree( + root=NonLeaf( + condition=NumericalHigherThanCondition( + missing=False, score=0.0, attribute=3, threshold=1.5 + ), + pos_child=Leaf( + value=RegressionValue(num_examples=0.0, value=6.0) + ), + neg_child=Leaf( + value=RegressionValue(num_examples=0.0, value=7.0) + ), + ) + ) + ) + check_toy_model(test_self, model) + return model + + class JaxModelTest(parameterized.TestCase): @parameterized.parameters( @@ -561,109 +671,7 @@ def test_categorical_list_to_bitmap_invalid(self): ) def test_internal_forest_on_manual(self): - columns = ["f1", "c1", "f2", "label_regress"] - model = specialized_learners.RandomForestLearner( - label="label_regress", - task=generic_learner.Task.REGRESSION, - num_trees=1, - ).train(create_dataset(columns)) - model.remove_tree(0) - - # pylint: disable=invalid-name - RegressionValue = tree_lib.RegressionValue - Leaf = tree_lib.Leaf - NonLeaf = tree_lib.NonLeaf - NumericalHigherThanCondition = tree_lib.NumericalHigherThanCondition - CategoricalIsInCondition = tree_lib.CategoricalIsInCondition - Tree = tree_lib.Tree - # pylint: enable=invalid-name - - model.add_tree( - Tree( - root=NonLeaf( - condition=NumericalHigherThanCondition( - missing=False, score=0.0, attribute=1, threshold=2.0 - ), - pos_child=NonLeaf( - condition=CategoricalIsInCondition( - missing=False, - score=0.0, - attribute=2, - mask=[1, 2], - ), - pos_child=NonLeaf( - condition=CategoricalIsInCondition( - missing=False, - score=0.0, - attribute=2, - mask=[1], - ), - pos_child=Leaf( - value=RegressionValue(num_examples=0.0, value=1.0) - ), - neg_child=Leaf( - value=RegressionValue(num_examples=0.0, value=2.0) - ), - ), - neg_child=Leaf( - value=RegressionValue(num_examples=0.0, value=3.0) - ), - ), - neg_child=NonLeaf( - condition=NumericalHigherThanCondition( - missing=False, score=0.0, attribute=1, threshold=1.0 - ), - pos_child=Leaf( - value=RegressionValue(num_examples=0.0, value=4.0) - ), - neg_child=Leaf( - value=RegressionValue(num_examples=0.0, value=5.0) - ), - ), - ) - ) - ) - - model.add_tree( - Tree( - root=NonLeaf( - condition=NumericalHigherThanCondition( - missing=False, score=0.0, attribute=3, threshold=1.5 - ), - pos_child=Leaf( - value=RegressionValue(num_examples=0.0, value=6.0) - ), - neg_child=Leaf( - value=RegressionValue(num_examples=0.0, value=7.0) - ), - ) - ) - ) - - self.assertEqual( - model.get_tree(0).pretty(model.data_spec()), - """\ -'f1' >= 2 [score=0 missing=False] - ├─(pos)─ 'c1' in ['x', 'y'] [score=0 missing=False] - │ ├─(pos)─ 'c1' in ['x'] [score=0 missing=False] - │ │ ├─(pos)─ value=1 - │ │ └─(neg)─ value=2 - │ └─(neg)─ value=3 - └─(neg)─ 'f1' >= 1 [score=0 missing=False] - ├─(pos)─ value=4 - └─(neg)─ value=5 -""", - ) - - self.assertEqual( - model.get_tree(1).pretty(model.data_spec()), - """\ -'f2' >= 1.5 [score=0 missing=False] - ├─(pos)─ value=6 - └─(neg)─ value=7 -""", - ) - + model = create_toy_model(self) internal_forest = to_jax.InternalForest(model) self.assertEqual( @@ -934,7 +942,7 @@ def prediction_fn(features: Dict[str, jax.Array]) -> jax.Array: ) @parameterized.named_parameters( - ("leaves_ellipse", "ellipse", True, False), + ("leaves_ellipse_axis_aligned", "ellipse", True, False), # ("leaves_mnist", "mnist", True , False), # Skip in sandboxed test ("leaves_ellipse_oblique", "ellipse", True, True), ) @@ -993,20 +1001,24 @@ def to_jax_array(d): jax_test_ds = to_jax_array(test_ds) jax_finetune_ds = to_jax_array(finetune_ds) + @jax.jit + def compute_predictions(state, batch): + batch = batch.copy() + batch.pop(label) + return jax_model.predict(batch, state) + @jax.jit def compute_accuracy(state, batch): batch = batch.copy() labels = batch.pop(label) - features = batch - predictions = jax_model.predict(features, state) + predictions = jax_model.predict(batch, state) return jnp.mean((predictions >= 0.0) == labels) @jax.jit def compute_loss(state, batch): batch = batch.copy() labels = batch.pop(label) - features = batch - logits = jax_model.predict(features, state) + logits = jax_model.predict(batch, state) loss = optax.sigmoid_binary_cross_entropy(logits, labels).mean() return loss @@ -1068,6 +1080,113 @@ def train_step(opt_state, mdl_state, batch): post_tuned_jax_test_accuracy, pre_tuned_jax_test_accuracy + 0.01 ) + # Update the YDF model with the finetuned parameters + model.update_with_jax_params(mdl_state) + + # Check the weights have been updated + new_jax_model = to_jax.to_jax_function( + model, leaves_as_params=leaves_as_params + ) + np.testing.assert_allclose( + mdl_state["initial_predictions"], + new_jax_model.params["initial_predictions"], + rtol=1e-5, + atol=1e-5, + ) + np.testing.assert_allclose( + mdl_state["leaf_values"], + new_jax_model.params["leaf_values"], + rtol=1e-5, + atol=1e-5, + ) + + # Check the predictions of the updated YDF model + finetuned_ydf_predictions = model.predict( + test_ds, + ) + finetuned_jax_predictions = compute_predictions(mdl_state, jax_test_ds) + np.testing.assert_allclose( + jax.nn.sigmoid(finetuned_jax_predictions), + finetuned_ydf_predictions, + rtol=1e-5, + atol=1e-5, + ) + + def test_update_with_jax_params_manual(self): + model = create_toy_model(self) + check_toy_model(self, model) + + jax_model = to_jax.to_jax_function(model, leaves_as_params=True) + to_jax.update_with_jax_params(model, jax_model.params) + + # Nothing have changed yet + check_toy_model(self, model) + + np.testing.assert_allclose( + jax_model.params["leaf_values"], + [5.0, 4.0, 3.0, 2.0, 1.0, 7.0, 6.0], + rtol=1e-5, + atol=1e-5, + ) + + np.testing.assert_allclose( + jax_model.params["initial_predictions"], + [0.0], + rtol=1e-5, + atol=1e-5, + ) + + jax_model.params["leaf_values"] = jnp.asarray( + [1.0, 2.0, 3.0, 4.0, 5, 6.0, 7.0], jnp.float32 + ) + jax_model.params["initial_predictions"] = jnp.asarray([1.0], jnp.float32) + to_jax.update_with_jax_params(model, jax_model.params) + + np.testing.assert_allclose( + model.initial_predictions(), + jax_model.params["initial_predictions"], + rtol=1e-5, + atol=1e-5, + ) + + new_jax_model = to_jax.to_jax_function(model, leaves_as_params=True) + np.testing.assert_allclose( + jax_model.params["initial_predictions"], + new_jax_model.params["initial_predictions"], + rtol=1e-5, + atol=1e-5, + ) + np.testing.assert_allclose( + jax_model.params["leaf_values"], + new_jax_model.params["leaf_values"], + rtol=1e-5, + atol=1e-5, + ) + + self.assertEqual( + model.get_tree(0).pretty(model.data_spec()), + """\ +'f1' >= 2 [score=0 missing=False] + ├─(pos)─ 'c1' in ['x', 'y'] [score=0 missing=False] + │ ├─(pos)─ 'c1' in ['x'] [score=0 missing=False] + │ │ ├─(pos)─ value=5 + │ │ └─(neg)─ value=4 + │ └─(neg)─ value=3 + └─(neg)─ 'f1' >= 1 [score=0 missing=False] + ├─(pos)─ value=2 + └─(neg)─ value=1 +""", + ) + + self.assertEqual( + model.get_tree(1).pretty(model.data_spec()), + """\ +'f2' >= 1.5 [score=0 missing=False] + ├─(pos)─ value=7 + └─(neg)─ value=6 +""", + ) + if __name__ == "__main__": if sys.version_info < (3, 9): diff --git a/yggdrasil_decision_forests/port/python/ydf/model/model.cc b/yggdrasil_decision_forests/port/python/ydf/model/model.cc index 2ddf4334..20406838 100644 --- a/yggdrasil_decision_forests/port/python/ydf/model/model.cc +++ b/yggdrasil_decision_forests/port/python/ydf/model/model.cc @@ -209,6 +209,8 @@ void init_model(py::module_& m) { .def("validation_loss", &GradientBoostedTreesCCModel::validation_loss) .def("initial_predictions", &GradientBoostedTreesCCModel::initial_predictions) + .def("set_initial_predictions", + &GradientBoostedTreesCCModel::set_initial_predictions) .def("validation_evaluation", &GradientBoostedTreesCCModel::validation_evaluation) .def("loss", &GradientBoostedTreesCCModel::loss) From 83bead872281fd328e4355498419b9e67263bf94 Mon Sep 17 00:00:00 2001 From: Mathieu Guillame-Bert Date: Thu, 30 May 2024 07:25:46 -0700 Subject: [PATCH 10/30] No public description PiperOrigin-RevId: 638644757 --- yggdrasil_decision_forests/port/python/ydf/dataset/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yggdrasil_decision_forests/port/python/ydf/dataset/dataset.py b/yggdrasil_decision_forests/port/python/ydf/dataset/dataset.py index 8960f095..138ab6ff 100644 --- a/yggdrasil_decision_forests/port/python/ydf/dataset/dataset.py +++ b/yggdrasil_decision_forests/port/python/ydf/dataset/dataset.py @@ -491,7 +491,7 @@ def dataspec_to_normalized_columns( for column_spec in columns: if column_spec.name not in data and column_spec.name in required_columns: raise ValueError( - f"The data spec expects columns {column_spec.name} which was not" + f"The data spec expects columns {column_spec.name!r} which was not" f" found in the data. Available columns: {list(data)}. Required" f" columns: {required_columns}" ) From c92df409d516f25720eec92d700819a06986fea9 Mon Sep 17 00:00:00 2001 From: Mathieu Guillame-Bert Date: Thu, 30 May 2024 07:44:42 -0700 Subject: [PATCH 11/30] Divide "TrainWithStatus" into "TrainWithStatus" and "TrainWithStatusImpl". PiperOrigin-RevId: 638649524 --- yggdrasil_decision_forests/learner/BUILD | 3 + .../learner/abstract_learner.cc | 73 ++++++++++++++++++- .../learner/abstract_learner.h | 43 +++++++++-- .../learner/abstract_learner_test.cc | 4 +- yggdrasil_decision_forests/learner/cart/BUILD | 1 - .../learner/cart/cart.cc | 14 +--- .../learner/cart/cart.h | 4 +- .../distributed_gradient_boosted_trees/BUILD | 1 - .../distributed_gradient_boosted_trees.cc | 19 +---- .../distributed_gradient_boosted_trees.h | 6 +- .../learner/export_doc_test.cc | 10 ++- .../learner/gradient_boosted_trees/BUILD | 1 - .../gradient_boosted_trees.cc | 34 +-------- .../gradient_boosted_trees.h | 8 +- .../learner/hyperparameters_optimizer/BUILD | 1 - .../hyperparameters_optimizer.cc | 23 +----- .../hyperparameters_optimizer.h | 8 +- .../learner/multitasker/BUILD | 1 - .../learner/multitasker/multitasker.cc | 3 +- .../learner/multitasker/multitasker.h | 4 +- .../learner/random_forest/BUILD | 1 - .../learner/random_forest/random_forest.cc | 15 +--- .../learner/random_forest/random_forest.h | 4 +- .../ydf/learner/wrapper/wrapper_test.cc | 2 +- yggdrasil_decision_forests/utils/usage.h | 2 - .../utils/usage_default.cc | 2 - 26 files changed, 150 insertions(+), 137 deletions(-) diff --git a/yggdrasil_decision_forests/learner/BUILD b/yggdrasil_decision_forests/learner/BUILD index 2a788220..1d25f795 100644 --- a/yggdrasil_decision_forests/learner/BUILD +++ b/yggdrasil_decision_forests/learner/BUILD @@ -75,6 +75,7 @@ cc_library_ydf( "//yggdrasil_decision_forests/utils:status_macros", "//yggdrasil_decision_forests/utils:synchronization_primitives", "//yggdrasil_decision_forests/utils:uid", + "//yggdrasil_decision_forests/utils:usage", "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", @@ -180,8 +181,10 @@ cc_test( ":abstract_learner", ":abstract_learner_cc_proto", ":export_doc", + "//yggdrasil_decision_forests/model:abstract_model", "//yggdrasil_decision_forests/utils:filesystem", "//yggdrasil_decision_forests/utils:test", + "@com_google_absl//absl/status:statusor", "@com_google_googletest//:gtest_main", ], ) diff --git a/yggdrasil_decision_forests/learner/abstract_learner.cc b/yggdrasil_decision_forests/learner/abstract_learner.cc index bbc9ad12..e33e8ebb 100644 --- a/yggdrasil_decision_forests/learner/abstract_learner.cc +++ b/yggdrasil_decision_forests/learner/abstract_learner.cc @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -32,7 +33,8 @@ #include "absl/strings/str_join.h" #include "absl/strings/string_view.h" #include "absl/strings/substitute.h" -#include "absl/time/time.h" +#include "absl/time/clock.h" +#include "absl/types/optional.h" #include "yggdrasil_decision_forests/dataset/data_spec.h" #include "yggdrasil_decision_forests/dataset/data_spec.pb.h" #include "yggdrasil_decision_forests/dataset/types.h" @@ -53,6 +55,7 @@ #include "yggdrasil_decision_forests/utils/status_macros.h" #include "yggdrasil_decision_forests/utils/synchronization_primitives.h" #include "yggdrasil_decision_forests/utils/uid.h" +#include "yggdrasil_decision_forests/utils/usage.h" namespace yggdrasil_decision_forests { namespace model { @@ -271,21 +274,85 @@ absl::Status AbstractLearner::LinkTrainingConfig( return absl::OkStatus(); } +// Non status; dataset in memory. std::unique_ptr AbstractLearner::Train( const dataset::VerticalDataset& train_dataset) const { return TrainWithStatus(train_dataset).value(); } +// Non status; dataset on disk. std::unique_ptr AbstractLearner::Train( const absl::string_view typed_path, const dataset::proto::DataSpecification& data_spec) const { return TrainWithStatus(typed_path, data_spec).value(); } +// API; dataset in memory. absl::StatusOr> AbstractLearner::TrainWithStatus( - const absl::string_view typed_path, + const dataset::VerticalDataset& train_dataset, + absl::optional> + valid_dataset) const { + utils::usage::OnTrainingStart(train_dataset.data_spec(), training_config(), + train_dataset.nrow()); + const auto begin_training = absl::Now(); + + ASSIGN_OR_RETURN(auto model, + TrainWithStatusImpl(train_dataset, valid_dataset)); + + utils::usage::OnTrainingEnd(train_dataset.data_spec(), training_config(), + train_dataset.nrow(), *model, + absl::Now() - begin_training); + + if (training_config().pure_serving_model()) { + RETURN_IF_ERROR(model->MakePureServing()); + } + return model; +} + +// Impl; dataset in memory. +absl::StatusOr> +AbstractLearner::TrainWithStatusImpl( + const dataset::VerticalDataset& train_dataset, + absl::optional> + valid_dataset) const { + // This method should always be implemented by learners. + return absl::UnimplementedError( + "The learner does not implement TrainWithStatusImpl (recommended) " + "TrainWithStatus and " + "TrainWithStatusImpl (deprecated)."); +} + +// API; dataset on disk. +absl::StatusOr> AbstractLearner::TrainWithStatus( + absl::string_view typed_path, const dataset::proto::DataSpecification& data_spec, const absl::optional& typed_valid_path) const { + utils::usage::OnTrainingStart(data_spec, training_config(), + /*num_examples=*/-1); + const auto begin_training = absl::Now(); + + ASSIGN_OR_RETURN( + auto model, TrainWithStatusImpl(typed_path, data_spec, typed_valid_path)); + + utils::usage::OnTrainingEnd(data_spec, training_config(), + /*num_examples=*/-1, *model, + absl::Now() - begin_training); + + if (training_config().pure_serving_model()) { + RETURN_IF_ERROR(model->MakePureServing()); + } + return model; +} + +// Impl; dataset on disk. +absl::StatusOr> +AbstractLearner::TrainWithStatusImpl( + absl::string_view typed_path, + const dataset::proto::DataSpecification& data_spec, + const absl::optional& typed_valid_path) const { + // If training on disk is not implemented, we load the dataset and use + // training from memory. + // List the columns used for the training. // Only these columns will be loaded. proto::TrainingConfigLinking link_config; @@ -310,7 +377,7 @@ absl::StatusOr> AbstractLearner::TrainWithStatus( /*required_columns=*/{}, dataset_loading_config)); valid_dataset = valid_dataset_data; } - return TrainWithStatus(train_dataset, valid_dataset); + return TrainWithStatusImpl(train_dataset, valid_dataset); } absl::Status CheckGenericHyperParameterSpecification( diff --git a/yggdrasil_decision_forests/learner/abstract_learner.h b/yggdrasil_decision_forests/learner/abstract_learner.h index 15e907f4..e33a515f 100644 --- a/yggdrasil_decision_forests/learner/abstract_learner.h +++ b/yggdrasil_decision_forests/learner/abstract_learner.h @@ -18,6 +18,7 @@ #ifndef YGGDRASIL_DECISION_FORESTS_LEARNER_ABSTRACT_LEARNER_H_ #define YGGDRASIL_DECISION_FORESTS_LEARNER_ABSTRACT_LEARNER_H_ +#include #include #include @@ -46,6 +47,9 @@ class AbstractLearner { // Trains a model using the dataset stored on disk at the path "typed_path". // + // A learner might use distributed training, or load the dataset in memory and + // fallback to in-memory training. + // // A typed path is a dataset with a format prefix. prefix format. For example, // "csv:/tmp/dataset.csv". The path supports sharding, globbing and comma // separation. See the "Dataset path and format" section of the user manual @@ -56,8 +60,11 @@ class AbstractLearner { // for validation. If "typed_valid_path" is not provided, a validation dataset // will be extracted from the training dataset. If the algorithm does not have // the "use_validation_dataset" capability, "typed_valid_path" is ignored. + // + // This method is virtual for historical reasons with external codebase. + // Internally or in any new code, this method should not be overridden. virtual absl::StatusOr> TrainWithStatus( - const absl::string_view typed_path, + absl::string_view typed_path, const dataset::proto::DataSpecification& data_spec, const absl::optional& typed_valid_path = {}) const; @@ -68,19 +75,22 @@ class AbstractLearner { // for validation. If "valid_dataset" is not provided, a validation dataset // will be extracted from the training dataset. If the algorithm does not have // the "use_validation_dataset" capability, "valid_dataset" is ignored. + // + // This method is virtual for historical reasons with external codebase. + // Internally or in any new code, this method should not be overridden. virtual absl::StatusOr> TrainWithStatus( const dataset::VerticalDataset& train_dataset, absl::optional> - valid_dataset = {}) const = 0; + valid_dataset = {}) const; - // Similar as TrainWithStatus, but crash in case of error. - // [Deprecated] + // [Deprecated] Similar as TrainWithStatus, but fails (CHECK) in case of + // error. virtual std::unique_ptr Train( - const absl::string_view typed_path, + absl::string_view typed_path, const dataset::proto::DataSpecification& data_spec) const; - // Trains and returns a model from a training dataset stored on drive. - // [Deprecated] + // [Deprecated] Similar as TrainWithStatus, but fails (CHECK) in case of + // error. virtual std::unique_ptr Train( const dataset::VerticalDataset& train_dataset) const; @@ -164,6 +174,25 @@ class AbstractLearner { stop_training_trigger_ = trigger; } + // Implementation of the "TrainWithStatus" function. Callers should call + // "TrainWithStatus". Learners should implement "TrainWithStatusImpl" (either + // both versions, to support both distributed and in-memory training) or only + // the in-memory version below. + virtual absl::StatusOr> TrainWithStatusImpl( + absl::string_view typed_path, + const dataset::proto::DataSpecification& data_spec, + const absl::optional& typed_valid_path) const; + + // Implementation of the "TrainWithStatus" function. Callers should call + // "TrainWithStatus". Learners should implement this "TrainWithStatusImpl" + // function. + // + // This method is not a pure virtual function for historical reasons. + virtual absl::StatusOr> TrainWithStatusImpl( + const dataset::VerticalDataset& train_dataset, + absl::optional> + valid_dataset) const; + protected: // Training configuration. Contains the hyper parameters of the learner. proto::TrainingConfig training_config_; diff --git a/yggdrasil_decision_forests/learner/abstract_learner_test.cc b/yggdrasil_decision_forests/learner/abstract_learner_test.cc index a73b4a46..a01fc566 100644 --- a/yggdrasil_decision_forests/learner/abstract_learner_test.cc +++ b/yggdrasil_decision_forests/learner/abstract_learner_test.cc @@ -212,7 +212,7 @@ TEST(AbstractLearner, EvaluateLearner) { explicit FakeLearner(const proto::TrainingConfig& training_config) : AbstractLearner(training_config) {} - absl::StatusOr> TrainWithStatus( + absl::StatusOr> TrainWithStatusImpl( const dataset::VerticalDataset& train_dataset, absl::optional> valid_dataset = {}) const override { @@ -274,7 +274,7 @@ TEST(AbstractLearner, MaximumModelSizeInMemoryInBytes) { explicit FakeLearner(const proto::TrainingConfig& training_config) : AbstractLearner(training_config) {} - absl::StatusOr> TrainWithStatus( + absl::StatusOr> TrainWithStatusImpl( const dataset::VerticalDataset& train_dataset, absl::optional> valid_dataset = {}) const override { diff --git a/yggdrasil_decision_forests/learner/cart/BUILD b/yggdrasil_decision_forests/learner/cart/BUILD index a02ab53e..90d89b53 100644 --- a/yggdrasil_decision_forests/learner/cart/BUILD +++ b/yggdrasil_decision_forests/learner/cart/BUILD @@ -34,7 +34,6 @@ cc_library_ydf( "//yggdrasil_decision_forests/utils:logging", "//yggdrasil_decision_forests/utils:random", "//yggdrasil_decision_forests/utils:status_macros", - "//yggdrasil_decision_forests/utils:usage", "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/memory", "@com_google_absl//absl/status", diff --git a/yggdrasil_decision_forests/learner/cart/cart.cc b/yggdrasil_decision_forests/learner/cart/cart.cc index 31af1801..8bf17378 100644 --- a/yggdrasil_decision_forests/learner/cart/cart.cc +++ b/yggdrasil_decision_forests/learner/cart/cart.cc @@ -47,7 +47,6 @@ #include "yggdrasil_decision_forests/utils/logging.h" #include "yggdrasil_decision_forests/utils/random.h" #include "yggdrasil_decision_forests/utils/status_macros.h" -#include "yggdrasil_decision_forests/utils/usage.h" namespace yggdrasil_decision_forests { namespace model { @@ -136,7 +135,7 @@ CartLearner::GetGenericHyperParameterSpecification() const { return hparam_def; } -absl::StatusOr> CartLearner::TrainWithStatus( +absl::StatusOr> CartLearner::TrainWithStatusImpl( const dataset::VerticalDataset& train_dataset, absl::optional> valid_dataset) const { @@ -177,8 +176,6 @@ absl::StatusOr> CartLearner::TrainWithStatus( YDF_LOG(INFO) << "Training CART on " << train_dataset.nrow() << " example(s) and " << config_link.features().size() << " feature(s)."; - utils::usage::OnTrainingStart(train_dataset.data_spec(), config, config_link, - train_dataset.nrow()); std::vector weights; RETURN_IF_ERROR(dataset::GetWeights(train_dataset, config_link, &weights)); @@ -271,20 +268,11 @@ absl::StatusOr> CartLearner::TrainWithStatus( mdl->mutable_out_of_bag_evaluations()->push_back(oob_evaluation); } - utils::usage::OnTrainingEnd(train_dataset.data_spec(), config, config_link, - train_dataset.nrow(), *mdl, - absl::Now() - begin_training); - // Cache the structural variable importance in the model data. RETURN_IF_ERROR(mdl->PrecomputeVariableImportances( mdl->AvailableStructuralVariableImportances())); decision_tree::SetLeafIndices(mdl->mutable_decision_trees()); - - if (config.pure_serving_model()) { - RETURN_IF_ERROR(mdl->MakePureServing()); - } - return std::move(mdl); } diff --git a/yggdrasil_decision_forests/learner/cart/cart.h b/yggdrasil_decision_forests/learner/cart/cart.h index bd27ff4d..d6da448b 100644 --- a/yggdrasil_decision_forests/learner/cart/cart.h +++ b/yggdrasil_decision_forests/learner/cart/cart.h @@ -49,10 +49,10 @@ class CartLearner : public AbstractLearner { // Generic hyper parameter names. static constexpr char kHParamValidationRatio[] = "validation_ratio"; - absl::StatusOr> TrainWithStatus( + absl::StatusOr> TrainWithStatusImpl( const dataset::VerticalDataset& train_dataset, absl::optional> - valid_dataset = {}) const override; + valid_dataset) const override; // Sets the hyper-parameters of the learning algorithm from "generic hparams". absl::Status SetHyperParametersImpl( diff --git a/yggdrasil_decision_forests/learner/distributed_gradient_boosted_trees/BUILD b/yggdrasil_decision_forests/learner/distributed_gradient_boosted_trees/BUILD index 30f373c4..86812a46 100644 --- a/yggdrasil_decision_forests/learner/distributed_gradient_boosted_trees/BUILD +++ b/yggdrasil_decision_forests/learner/distributed_gradient_boosted_trees/BUILD @@ -37,7 +37,6 @@ cc_library_ydf( "//yggdrasil_decision_forests/utils:snapshot", "//yggdrasil_decision_forests/utils:status_macros", "//yggdrasil_decision_forests/utils:uid", - "//yggdrasil_decision_forests/utils:usage", "//yggdrasil_decision_forests/utils/distribute:core", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/status", diff --git a/yggdrasil_decision_forests/learner/distributed_gradient_boosted_trees/distributed_gradient_boosted_trees.cc b/yggdrasil_decision_forests/learner/distributed_gradient_boosted_trees/distributed_gradient_boosted_trees.cc index 8e651fb9..2794aa4f 100644 --- a/yggdrasil_decision_forests/learner/distributed_gradient_boosted_trees/distributed_gradient_boosted_trees.cc +++ b/yggdrasil_decision_forests/learner/distributed_gradient_boosted_trees/distributed_gradient_boosted_trees.cc @@ -32,7 +32,6 @@ #include "yggdrasil_decision_forests/utils/snapshot.h" #include "yggdrasil_decision_forests/utils/status_macros.h" #include "yggdrasil_decision_forests/utils/uid.h" -#include "yggdrasil_decision_forests/utils/usage.h" namespace yggdrasil_decision_forests { namespace model { @@ -54,7 +53,7 @@ DistributedGradientBoostedTreesLearner::Capabilities() const { } absl::StatusOr> -DistributedGradientBoostedTreesLearner::TrainWithStatus( +DistributedGradientBoostedTreesLearner::TrainWithStatusImpl( const dataset::VerticalDataset& train_dataset, absl::optional> valid_dataset) const { @@ -199,12 +198,10 @@ DistributedGradientBoostedTreesLearner::GetGenericHyperParameterSpecification() } absl::StatusOr> -DistributedGradientBoostedTreesLearner::TrainWithStatus( +DistributedGradientBoostedTreesLearner::TrainWithStatusImpl( const absl::string_view typed_path, const dataset::proto::DataSpecification& data_spec, const absl::optional& typed_valid_path) const { - const auto begin_training = absl::Now(); - internal::Monitoring monitoring; // Extract and check the configuration. @@ -218,9 +215,6 @@ DistributedGradientBoostedTreesLearner::TrainWithStatus( data_spec, &spe_config)); RETURN_IF_ERROR(internal::CheckConfiguration(deployment_)); - utils::usage::OnTrainingStart(data_spec, config, config_link, - /*num_examples=*/-1); - // Working directory. auto work_directory = deployment().cache_path(); if (!deployment().try_resume_training()) { @@ -269,15 +263,6 @@ DistributedGradientBoostedTreesLearner::TrainWithStatus( updated_deployment, config, config_link, spe_config, dataset_cache_path, typed_valid_path, work_directory, data_spec, log_directory(), &monitoring)); - - if (config.pure_serving_model()) { - RETURN_IF_ERROR(model->MakePureServing()); - } - - utils::usage::OnTrainingEnd(data_spec, config, config_link, - /*num_examples=*/-1, *model, - absl::Now() - begin_training); - return std::move(model); } diff --git a/yggdrasil_decision_forests/learner/distributed_gradient_boosted_trees/distributed_gradient_boosted_trees.h b/yggdrasil_decision_forests/learner/distributed_gradient_boosted_trees/distributed_gradient_boosted_trees.h index 0aa808e9..d3b263e6 100644 --- a/yggdrasil_decision_forests/learner/distributed_gradient_boosted_trees/distributed_gradient_boosted_trees.h +++ b/yggdrasil_decision_forests/learner/distributed_gradient_boosted_trees/distributed_gradient_boosted_trees.h @@ -77,12 +77,12 @@ class DistributedGradientBoostedTreesLearner : public AbstractLearner { static constexpr char kHParamForceNumericalDiscretization[] = "force_numerical_discretization"; - absl::StatusOr> TrainWithStatus( + absl::StatusOr> TrainWithStatusImpl( const dataset::VerticalDataset& train_dataset, absl::optional> - valid_dataset = {}) const override; + valid_dataset) const override; - absl::StatusOr> TrainWithStatus( + absl::StatusOr> TrainWithStatusImpl( absl::string_view typed_path, const dataset::proto::DataSpecification& data_spec, const absl::optional& typed_valid_path) const override; diff --git a/yggdrasil_decision_forests/learner/export_doc_test.cc b/yggdrasil_decision_forests/learner/export_doc_test.cc index b7f030b5..61e9a395 100644 --- a/yggdrasil_decision_forests/learner/export_doc_test.cc +++ b/yggdrasil_decision_forests/learner/export_doc_test.cc @@ -15,10 +15,14 @@ #include "yggdrasil_decision_forests/learner/export_doc.h" +#include + #include "gmock/gmock.h" #include "gtest/gtest.h" +#include "absl/status/statusor.h" #include "yggdrasil_decision_forests/learner/abstract_learner.h" #include "yggdrasil_decision_forests/learner/abstract_learner.pb.h" +#include "yggdrasil_decision_forests/model/abstract_model.h" #include "yggdrasil_decision_forests/utils/filesystem.h" #include "yggdrasil_decision_forests/utils/test.h" @@ -31,7 +35,7 @@ class FakeLearner1 : public AbstractLearner { explicit FakeLearner1(const proto::TrainingConfig& training_config) : AbstractLearner(training_config) {} - absl::StatusOr> TrainWithStatus( + absl::StatusOr> TrainWithStatusImpl( const dataset::VerticalDataset& train_dataset, absl::optional> valid_dataset = {}) const override { @@ -58,7 +62,7 @@ class FakeLearner2 : public AbstractLearner { explicit FakeLearner2(const proto::TrainingConfig& training_config) : AbstractLearner(training_config) {} - absl::StatusOr> TrainWithStatus( + absl::StatusOr> TrainWithStatusImpl( const dataset::VerticalDataset& train_dataset, absl::optional> valid_dataset = {}) const override { @@ -73,7 +77,7 @@ class FakeLearner3 : public AbstractLearner { explicit FakeLearner3(const proto::TrainingConfig& training_config) : AbstractLearner(training_config) {} - absl::StatusOr> TrainWithStatus( + absl::StatusOr> TrainWithStatusImpl( const dataset::VerticalDataset& train_dataset, absl::optional> valid_dataset = {}) const override { diff --git a/yggdrasil_decision_forests/learner/gradient_boosted_trees/BUILD b/yggdrasil_decision_forests/learner/gradient_boosted_trees/BUILD index 15ea2d29..180b4fad 100644 --- a/yggdrasil_decision_forests/learner/gradient_boosted_trees/BUILD +++ b/yggdrasil_decision_forests/learner/gradient_boosted_trees/BUILD @@ -56,7 +56,6 @@ cc_library_ydf( "//yggdrasil_decision_forests/utils:random", "//yggdrasil_decision_forests/utils:snapshot", "//yggdrasil_decision_forests/utils:status_macros", - "//yggdrasil_decision_forests/utils:usage", "@com_google_absl//absl/container:fixed_array", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", diff --git a/yggdrasil_decision_forests/learner/gradient_boosted_trees/gradient_boosted_trees.cc b/yggdrasil_decision_forests/learner/gradient_boosted_trees/gradient_boosted_trees.cc index e5ada9e6..f4533521 100644 --- a/yggdrasil_decision_forests/learner/gradient_boosted_trees/gradient_boosted_trees.cc +++ b/yggdrasil_decision_forests/learner/gradient_boosted_trees/gradient_boosted_trees.cc @@ -69,7 +69,6 @@ #include "yggdrasil_decision_forests/utils/random.h" #include "yggdrasil_decision_forests/utils/snapshot.h" #include "yggdrasil_decision_forests/utils/status_macros.h" -#include "yggdrasil_decision_forests/utils/usage.h" namespace yggdrasil_decision_forests { namespace model { @@ -608,7 +607,7 @@ GradientBoostedTreesLearner::InitializeModel( } absl::StatusOr> -GradientBoostedTreesLearner::TrainWithStatus( +GradientBoostedTreesLearner::TrainWithStatusImpl( const absl::string_view typed_path, const dataset::proto::DataSpecification& data_spec, const absl::optional& typed_valid_path) const { @@ -616,8 +615,8 @@ GradientBoostedTreesLearner::TrainWithStatus( gradient_boosted_trees::proto::gradient_boosted_trees_config); if (!gbt_config.has_sample_with_shards()) { // Regular training. - return AbstractLearner::TrainWithStatus(typed_path, data_spec, - typed_valid_path); + return AbstractLearner::TrainWithStatusImpl(typed_path, data_spec, + typed_valid_path); } return ShardedSamplingTrain(typed_path, data_spec, typed_valid_path); @@ -644,10 +643,6 @@ GradientBoostedTreesLearner::ShardedSamplingTrain( internal::AllTrainingConfiguration config; RETURN_IF_ERROR(BuildAllTrainingConfiguration(data_spec, &config)); - utils::usage::OnTrainingStart(data_spec, config.train_config, - config.train_config_link, - /*num_examples=*/-1); - // Initialize the model. auto mdl = InitializeModel(config, data_spec); @@ -1124,22 +1119,12 @@ GradientBoostedTreesLearner::ShardedSamplingTrain( config, early_stopping, validation->dataset, deployment().num_threads(), mdl.get())); } - RETURN_IF_ERROR(FinalizeModel(log_directory_, mdl.get())); - - if (config.train_config.pure_serving_model()) { - RETURN_IF_ERROR(mdl->MakePureServing()); - } - - utils::usage::OnTrainingEnd( - data_spec, config.train_config, config.train_config_link, - /*num_examples=*/-1, *mdl, absl::Now() - begin_training); - return mdl; } absl::StatusOr> -GradientBoostedTreesLearner::TrainWithStatus( +GradientBoostedTreesLearner::TrainWithStatusImpl( const dataset::VerticalDataset& train_dataset, absl::optional> valid_dataset) const { @@ -1172,9 +1157,6 @@ GradientBoostedTreesLearner::TrainWithStatus( << " example(s) and " << config.train_config_link.features().size() << " feature(s)."; - utils::usage::OnTrainingStart(train_dataset.data_spec(), config.train_config, - config.train_config_link, train_dataset.nrow()); - if (config.gbt_config->has_sample_with_shards()) { return absl::InvalidArgumentError( "\"sample_with_shards\" is not compatible with training " @@ -1658,15 +1640,7 @@ GradientBoostedTreesLearner::TrainWithStatus( RETURN_IF_ERROR(FinalizeModel(log_directory_, mdl.get())); - utils::usage::OnTrainingEnd(train_dataset.data_spec(), training_config(), - config.train_config_link, train_dataset.nrow(), - *mdl, absl::Now() - begin_training); - decision_tree::SetLeafIndices(mdl->mutable_decision_trees()); - - if (config.train_config.pure_serving_model()) { - RETURN_IF_ERROR(mdl->MakePureServing()); - } return std::move(mdl); } diff --git a/yggdrasil_decision_forests/learner/gradient_boosted_trees/gradient_boosted_trees.h b/yggdrasil_decision_forests/learner/gradient_boosted_trees/gradient_boosted_trees.h index fc84142d..38b72263 100644 --- a/yggdrasil_decision_forests/learner/gradient_boosted_trees/gradient_boosted_trees.h +++ b/yggdrasil_decision_forests/learner/gradient_boosted_trees/gradient_boosted_trees.h @@ -132,15 +132,15 @@ class GradientBoostedTreesLearner : public AbstractLearner { static constexpr char kHParamFocalLossGamma[] = "focal_loss_gamma"; static constexpr char kHParamFocalLossAlpha[] = "focal_loss_alpha"; - absl::StatusOr> TrainWithStatus( + absl::StatusOr> TrainWithStatusImpl( const dataset::VerticalDataset& train_dataset, absl::optional> - valid_dataset = {}) const override; + valid_dataset) const override; - absl::StatusOr> TrainWithStatus( + absl::StatusOr> TrainWithStatusImpl( const absl::string_view typed_path, const dataset::proto::DataSpecification& data_spec, - const absl::optional& typed_valid_path = {}) const override; + const absl::optional& typed_valid_path) const override; // Detects configuration errors and warnings. static absl::Status CheckConfiguration( diff --git a/yggdrasil_decision_forests/learner/hyperparameters_optimizer/BUILD b/yggdrasil_decision_forests/learner/hyperparameters_optimizer/BUILD index 0808d662..d2df9c63 100644 --- a/yggdrasil_decision_forests/learner/hyperparameters_optimizer/BUILD +++ b/yggdrasil_decision_forests/learner/hyperparameters_optimizer/BUILD @@ -38,7 +38,6 @@ cc_library_ydf( "//yggdrasil_decision_forests/utils:concurrency", "//yggdrasil_decision_forests/utils:filesystem", "//yggdrasil_decision_forests/utils:hyper_parameters", - "//yggdrasil_decision_forests/utils:usage", "//yggdrasil_decision_forests/utils/distribute:distribute_without_implementations", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/status", diff --git a/yggdrasil_decision_forests/learner/hyperparameters_optimizer/hyperparameters_optimizer.cc b/yggdrasil_decision_forests/learner/hyperparameters_optimizer/hyperparameters_optimizer.cc index 5def4c84..e20ec858 100644 --- a/yggdrasil_decision_forests/learner/hyperparameters_optimizer/hyperparameters_optimizer.cc +++ b/yggdrasil_decision_forests/learner/hyperparameters_optimizer/hyperparameters_optimizer.cc @@ -35,7 +35,6 @@ #include "yggdrasil_decision_forests/utils/concurrency_streamprocessor.h" #include "yggdrasil_decision_forests/utils/distribute/distribute.h" #include "yggdrasil_decision_forests/utils/filesystem.h" -#include "yggdrasil_decision_forests/utils/usage.h" namespace yggdrasil_decision_forests { namespace model { @@ -165,7 +164,7 @@ HyperParameterOptimizerLearner::TrainFromFileOnMemoryDataset( } absl::StatusOr> -HyperParameterOptimizerLearner::TrainWithStatus( +HyperParameterOptimizerLearner::TrainWithStatusImpl( const dataset::VerticalDataset& train_dataset, absl::optional> valid_dataset) const { @@ -184,8 +183,6 @@ HyperParameterOptimizerLearner::TrainWithStatus( "deployment configs."); } - const auto begin_training = absl::Now(); - // The effective configuration is the user configuration + the default value + // the automatic configuration (if enabled) + the copy of the non-specified // training configuration field from the learner to the sub-learner (e.g. copy @@ -197,9 +194,6 @@ HyperParameterOptimizerLearner::TrainWithStatus( const proto::HyperParametersOptimizerLearnerTrainingConfig& spe_config = effective_config.GetExtension(proto::hyperparameters_optimizer_config); - utils::usage::OnTrainingStart(train_dataset.data_spec(), effective_config, - config_link, train_dataset.nrow()); - // Initialize the learner with the base hyperparameters. ASSIGN_OR_RETURN(auto base_learner, BuildBaseLearner(spe_config, /*for_tuning=*/true)); @@ -231,9 +225,6 @@ HyperParameterOptimizerLearner::TrainWithStatus( RETURN_IF_ERROR(base_learner->SetHyperParameters(best_params)); ASSIGN_OR_RETURN( auto mdl, base_learner->TrainWithStatus(train_dataset, valid_dataset)); - utils::usage::OnTrainingEnd(train_dataset.data_spec(), training_config(), - config_link, train_dataset.nrow(), *mdl, - absl::Now() - begin_training); *mdl->mutable_hyperparameter_optimizer_logs() = logs; return mdl; } else { @@ -265,7 +256,7 @@ absl::Status HyperParameterOptimizerLearner::GetEffectiveConfiguration( } absl::StatusOr> -HyperParameterOptimizerLearner::TrainWithStatus( +HyperParameterOptimizerLearner::TrainWithStatusImpl( const absl::string_view typed_path, const dataset::proto::DataSpecification& data_spec, const absl::optional& typed_valid_path) const { @@ -274,8 +265,8 @@ HyperParameterOptimizerLearner::TrainWithStatus( deployment().execution_case() == model::proto::DeploymentConfig::ExecutionCase::kLocal) { // Load the dataset in memory and run the in-memory training. - return AbstractLearner::TrainWithStatus(typed_path, data_spec, - typed_valid_path); + return AbstractLearner::TrainWithStatusImpl(typed_path, data_spec, + typed_valid_path); } if (!deployment().has_distribute()) { @@ -284,8 +275,6 @@ HyperParameterOptimizerLearner::TrainWithStatus( "deployment configs."); } - const auto begin_training = absl::Now(); - // The effective configuration is the user configuration + the default value + // the automatic configuration (if enabled) + the copy of the non-specified // training configuration field from the learner to the sub-learner (e.g. copy @@ -300,8 +289,6 @@ HyperParameterOptimizerLearner::TrainWithStatus( // Initialize the remote workers. ASSIGN_OR_RETURN(auto manager, CreateDistributeManager(spe_config)); - utils::usage::OnTrainingStart(data_spec, effective_config, config_link, -1); - // Initialize the learner with the base hyperparameters. ASSIGN_OR_RETURN(auto base_learner, BuildBaseLearner(spe_config, /*for_tuning=*/true)); @@ -337,8 +324,6 @@ HyperParameterOptimizerLearner::TrainWithStatus( best_params, typed_path, data_spec, typed_valid_path, manager.get())); - utils::usage::OnTrainingEnd(data_spec, training_config(), config_link, -1, - *model, absl::Now() - begin_training); *model->mutable_hyperparameter_optimizer_logs() = logs; RETURN_IF_ERROR(manager->Done()); diff --git a/yggdrasil_decision_forests/learner/hyperparameters_optimizer/hyperparameters_optimizer.h b/yggdrasil_decision_forests/learner/hyperparameters_optimizer/hyperparameters_optimizer.h index 74c8d822..7919eab6 100644 --- a/yggdrasil_decision_forests/learner/hyperparameters_optimizer/hyperparameters_optimizer.h +++ b/yggdrasil_decision_forests/learner/hyperparameters_optimizer/hyperparameters_optimizer.h @@ -52,15 +52,15 @@ class HyperParameterOptimizerLearner : public AbstractLearner { // Unique identifier of the learning algorithm. static constexpr char kRegisteredName[] = "HYPERPARAMETER_OPTIMIZER"; - absl::StatusOr> TrainWithStatus( + absl::StatusOr> TrainWithStatusImpl( const dataset::VerticalDataset& train_dataset, absl::optional> - valid_dataset = {}) const override; + valid_dataset) const override; - absl::StatusOr> TrainWithStatus( + absl::StatusOr> TrainWithStatusImpl( const absl::string_view typed_path, const dataset::proto::DataSpecification& data_spec, - const absl::optional& typed_valid_path = {}) const override; + const absl::optional& typed_valid_path) const override; // Sets the hyper-parameters of the learning algorithm from "generic hparams". absl::Status SetHyperParametersImpl( diff --git a/yggdrasil_decision_forests/learner/multitasker/BUILD b/yggdrasil_decision_forests/learner/multitasker/BUILD index e2f21770..5b86efd9 100644 --- a/yggdrasil_decision_forests/learner/multitasker/BUILD +++ b/yggdrasil_decision_forests/learner/multitasker/BUILD @@ -27,7 +27,6 @@ cc_library_ydf( "//yggdrasil_decision_forests/utils:regex", "//yggdrasil_decision_forests/utils:status_macros", "//yggdrasil_decision_forests/utils:synchronization_primitives", - "//yggdrasil_decision_forests/utils:usage", "@com_google_absl//absl/memory", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", diff --git a/yggdrasil_decision_forests/learner/multitasker/multitasker.cc b/yggdrasil_decision_forests/learner/multitasker/multitasker.cc index 50093e90..e9b04937 100644 --- a/yggdrasil_decision_forests/learner/multitasker/multitasker.cc +++ b/yggdrasil_decision_forests/learner/multitasker/multitasker.cc @@ -30,7 +30,6 @@ #include "yggdrasil_decision_forests/utils/regex.h" #include "yggdrasil_decision_forests/utils/status_macros.h" #include "yggdrasil_decision_forests/utils/synchronization_primitives.h" -#include "yggdrasil_decision_forests/utils/usage.h" namespace yggdrasil_decision_forests { namespace model { @@ -112,7 +111,7 @@ MultitaskerLearner::MultitaskerLearner( : AbstractLearner(training_config) {} absl::StatusOr> -MultitaskerLearner::TrainWithStatus( +MultitaskerLearner::TrainWithStatusImpl( const dataset::VerticalDataset& train_dataset, absl::optional> valid_dataset) const { diff --git a/yggdrasil_decision_forests/learner/multitasker/multitasker.h b/yggdrasil_decision_forests/learner/multitasker/multitasker.h index b917969c..7f84d93e 100644 --- a/yggdrasil_decision_forests/learner/multitasker/multitasker.h +++ b/yggdrasil_decision_forests/learner/multitasker/multitasker.h @@ -42,10 +42,10 @@ class MultitaskerLearner : public AbstractLearner { static constexpr char kRegisteredName[] = "MULTITASKER"; - absl::StatusOr> TrainWithStatus( + absl::StatusOr> TrainWithStatusImpl( const dataset::VerticalDataset& train_dataset, absl::optional> - valid_dataset = {}) const override; + valid_dataset) const override; absl::Status SetHyperParameters( const proto::GenericHyperParameters& generic_hyper_params) override; diff --git a/yggdrasil_decision_forests/learner/random_forest/BUILD b/yggdrasil_decision_forests/learner/random_forest/BUILD index 833b9daf..0b5ba20d 100644 --- a/yggdrasil_decision_forests/learner/random_forest/BUILD +++ b/yggdrasil_decision_forests/learner/random_forest/BUILD @@ -49,7 +49,6 @@ cc_library_ydf( "//yggdrasil_decision_forests/utils:random", "//yggdrasil_decision_forests/utils:status_macros", "//yggdrasil_decision_forests/utils:synchronization_primitives", - "//yggdrasil_decision_forests/utils:usage", "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/memory", "@com_google_absl//absl/status", diff --git a/yggdrasil_decision_forests/learner/random_forest/random_forest.cc b/yggdrasil_decision_forests/learner/random_forest/random_forest.cc index ba6e6150..1596f16f 100644 --- a/yggdrasil_decision_forests/learner/random_forest/random_forest.cc +++ b/yggdrasil_decision_forests/learner/random_forest/random_forest.cc @@ -34,6 +34,7 @@ #include "absl/time/time.h" #include "yggdrasil_decision_forests/dataset/data_spec.pb.h" #include "yggdrasil_decision_forests/dataset/example_writer.h" +#include "yggdrasil_decision_forests/dataset/types.h" #include "yggdrasil_decision_forests/dataset/vertical_dataset.h" #include "yggdrasil_decision_forests/dataset/weight.h" #include "yggdrasil_decision_forests/dataset/weight.pb.h" @@ -43,7 +44,6 @@ #include "yggdrasil_decision_forests/learner/decision_tree/generic_parameters.h" #include "yggdrasil_decision_forests/learner/decision_tree/training.h" #include "yggdrasil_decision_forests/learner/random_forest/random_forest.pb.h" -#include "yggdrasil_decision_forests/dataset/types.h" #include "yggdrasil_decision_forests/metric/metric.h" #include "yggdrasil_decision_forests/metric/metric.pb.h" #include "yggdrasil_decision_forests/model/abstract_model.h" @@ -61,7 +61,6 @@ #include "yggdrasil_decision_forests/utils/logging.h" #include "yggdrasil_decision_forests/utils/status_macros.h" #include "yggdrasil_decision_forests/utils/synchronization_primitives.h" -#include "yggdrasil_decision_forests/utils/usage.h" namespace yggdrasil_decision_forests { namespace model { @@ -362,7 +361,7 @@ absl::Status RandomForestLearner::CheckConfiguration( } absl::StatusOr> -RandomForestLearner::TrainWithStatus( +RandomForestLearner::TrainWithStatusImpl( const dataset::VerticalDataset& train_dataset, absl::optional> valid_dataset) const { @@ -419,8 +418,6 @@ RandomForestLearner::TrainWithStatus( RETURN_IF_ERROR(CheckConfiguration(train_dataset.data_spec(), config_with_default, config_link, rf_config, deployment())); - utils::usage::OnTrainingStart(train_dataset.data_spec(), config_with_default, - config_link, train_dataset.nrow()); std::vector weights; @@ -891,10 +888,6 @@ RandomForestLearner::TrainWithStatus( deployment().num_threads(), mdl.get())); } - utils::usage::OnTrainingEnd(train_dataset.data_spec(), config_with_default, - config_link, train_dataset.nrow(), *mdl, - absl::Now() - begin_training); - if (!rf_config.export_oob_prediction_path().empty()) { RETURN_IF_ERROR(ExportOOBPredictions( config_with_default, config_link, train_dataset.data_spec(), @@ -907,10 +900,6 @@ RandomForestLearner::TrainWithStatus( decision_tree::SetLeafIndices(mdl->mutable_decision_trees()); - if (config_with_default.pure_serving_model()) { - RETURN_IF_ERROR(mdl->MakePureServing()); - } - return std::move(mdl); } diff --git a/yggdrasil_decision_forests/learner/random_forest/random_forest.h b/yggdrasil_decision_forests/learner/random_forest/random_forest.h index 36555194..4897b85f 100644 --- a/yggdrasil_decision_forests/learner/random_forest/random_forest.h +++ b/yggdrasil_decision_forests/learner/random_forest/random_forest.h @@ -74,10 +74,10 @@ class RandomForestLearner : public AbstractLearner { static constexpr char kHParamSamplingWithReplacement[] = "sampling_with_replacement"; - absl::StatusOr> TrainWithStatus( + absl::StatusOr> TrainWithStatusImpl( const dataset::VerticalDataset& train_dataset, absl::optional> - valid_dataset = {}) const override; + valid_dataset) const override; // Detects configuration errors and warnings. static absl::Status CheckConfiguration( diff --git a/yggdrasil_decision_forests/port/python/ydf/learner/wrapper/wrapper_test.cc b/yggdrasil_decision_forests/port/python/ydf/learner/wrapper/wrapper_test.cc index 687883d6..a5d9ae7f 100644 --- a/yggdrasil_decision_forests/port/python/ydf/learner/wrapper/wrapper_test.cc +++ b/yggdrasil_decision_forests/port/python/ydf/learner/wrapper/wrapper_test.cc @@ -41,7 +41,7 @@ class FakeLearner1 : public model::AbstractLearner { explicit FakeLearner1(const model::proto::TrainingConfig& training_config) : AbstractLearner(training_config) {} - absl::StatusOr> TrainWithStatus( + absl::StatusOr> TrainWithStatusImpl( const dataset::VerticalDataset& train_dataset, std::optional> valid_dataset = {}) const override { diff --git a/yggdrasil_decision_forests/utils/usage.h b/yggdrasil_decision_forests/utils/usage.h index 12fc2e53..565146b4 100644 --- a/yggdrasil_decision_forests/utils/usage.h +++ b/yggdrasil_decision_forests/utils/usage.h @@ -37,14 +37,12 @@ namespace usage { void OnTrainingStart( const dataset::proto::DataSpecification& data_spec, const model::proto::TrainingConfig& train_config, - const model::proto::TrainingConfigLinking& train_config_link, int64_t num_examples); // Complete a model training. // Should be called at the end of the "Train" methods of learners. void OnTrainingEnd(const dataset::proto::DataSpecification& data_spec, const model::proto::TrainingConfig& train_config, - const model::proto::TrainingConfigLinking& train_config_link, int64_t num_examples, const model::AbstractModel& model, absl::Duration training_duration); diff --git a/yggdrasil_decision_forests/utils/usage_default.cc b/yggdrasil_decision_forests/utils/usage_default.cc index 5d037adf..55b72bab 100644 --- a/yggdrasil_decision_forests/utils/usage_default.cc +++ b/yggdrasil_decision_forests/utils/usage_default.cc @@ -22,14 +22,12 @@ namespace usage { void OnTrainingStart( const dataset::proto::DataSpecification& data_spec, const model::proto::TrainingConfig& train_config, - const model::proto::TrainingConfigLinking& train_config_link, int64_t num_examples) { // Add usage tracking here. } void OnTrainingEnd(const dataset::proto::DataSpecification& data_spec, const model::proto::TrainingConfig& train_config, - const model::proto::TrainingConfigLinking& train_config_link, int64_t num_examples, const model::AbstractModel& model, absl::Duration training_duration) { // Add usage tracking here. From 0f0cd9714d6294c670136a031e34157220c97930 Mon Sep 17 00:00:00 2001 From: Mathieu Guillame-Bert Date: Thu, 30 May 2024 08:57:03 -0700 Subject: [PATCH 12/30] - Add placeholder for dataset usage statistics. - Break the dependency from AbstractModel to VerticalDatasetIO. PiperOrigin-RevId: 638670083 --- yggdrasil_decision_forests/cli/BUILD | 1 + yggdrasil_decision_forests/cli/evaluate.cc | 5 +- yggdrasil_decision_forests/dataset/BUILD | 2 + .../dataset/example_writer.cc | 3 + .../dataset/vertical_dataset_io.cc | 4 + yggdrasil_decision_forests/learner/BUILD | 1 + .../learner/abstract_learner.cc | 7 + yggdrasil_decision_forests/model/BUILD | 29 +++- .../model/abstract_model.cc | 123 +------------- .../model/abstract_model.h | 27 +-- .../model/abstract_model_test.cc | 12 +- .../model/evaluate_on_disk.cc | 155 ++++++++++++++++++ .../model/evaluate_on_disk.h | 43 +++++ .../model/model_library.cc | 5 + yggdrasil_decision_forests/utils/BUILD | 2 + yggdrasil_decision_forests/utils/usage.h | 22 ++- .../utils/usage_default.cc | 18 +- 17 files changed, 299 insertions(+), 160 deletions(-) create mode 100644 yggdrasil_decision_forests/model/evaluate_on_disk.cc create mode 100644 yggdrasil_decision_forests/model/evaluate_on_disk.h diff --git a/yggdrasil_decision_forests/cli/BUILD b/yggdrasil_decision_forests/cli/BUILD index 83caca09..b197b7e9 100644 --- a/yggdrasil_decision_forests/cli/BUILD +++ b/yggdrasil_decision_forests/cli/BUILD @@ -149,6 +149,7 @@ cc_binary_ydf( "//yggdrasil_decision_forests/metric:report", "//yggdrasil_decision_forests/model:abstract_model", "//yggdrasil_decision_forests/model:all_models", + "//yggdrasil_decision_forests/model:evaluate_on_disk", "//yggdrasil_decision_forests/model:model_library", "//yggdrasil_decision_forests/model:prediction_cc_proto", "//yggdrasil_decision_forests/serving/decision_forest:register_engines", diff --git a/yggdrasil_decision_forests/cli/evaluate.cc b/yggdrasil_decision_forests/cli/evaluate.cc index 2a6a65f7..a998e6f9 100644 --- a/yggdrasil_decision_forests/cli/evaluate.cc +++ b/yggdrasil_decision_forests/cli/evaluate.cc @@ -31,6 +31,7 @@ #include "yggdrasil_decision_forests/metric/metric.pb.h" #include "yggdrasil_decision_forests/metric/report.h" #include "yggdrasil_decision_forests/model/abstract_model.h" +#include "yggdrasil_decision_forests/model/evaluate_on_disk.h" #include "yggdrasil_decision_forests/model/model_library.h" #include "yggdrasil_decision_forests/model/prediction.pb.h" #include "yggdrasil_decision_forests/utils/filesystem.h" @@ -83,7 +84,9 @@ void Evaluate() { options.set_task(model->task()); } // evaluate model. - evaluation = model->Evaluate(absl::GetFlag(FLAGS_dataset), options, &rnd); + evaluation = + model::EvaluateOnDisk(*model, absl::GetFlag(FLAGS_dataset), options, &rnd) + .value(); const auto format = absl::GetFlag(FLAGS_format); if (format == "text") { diff --git a/yggdrasil_decision_forests/dataset/BUILD b/yggdrasil_decision_forests/dataset/BUILD index c607823f..12613711 100644 --- a/yggdrasil_decision_forests/dataset/BUILD +++ b/yggdrasil_decision_forests/dataset/BUILD @@ -188,6 +188,7 @@ cc_library_ydf( "//yggdrasil_decision_forests/utils:logging", "//yggdrasil_decision_forests/utils:sharded_io", "//yggdrasil_decision_forests/utils:status_macros", + "//yggdrasil_decision_forests/utils:usage", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", @@ -212,6 +213,7 @@ cc_library_ydf( ":formats_cc_proto", "//yggdrasil_decision_forests/utils:sharded_io", "//yggdrasil_decision_forests/utils:status_macros", + "//yggdrasil_decision_forests/utils:usage", "@com_google_absl//absl/memory", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", diff --git a/yggdrasil_decision_forests/dataset/example_writer.cc b/yggdrasil_decision_forests/dataset/example_writer.cc index 40ee973e..3fe0237d 100644 --- a/yggdrasil_decision_forests/dataset/example_writer.cc +++ b/yggdrasil_decision_forests/dataset/example_writer.cc @@ -33,6 +33,7 @@ #include "yggdrasil_decision_forests/dataset/formats.pb.h" #include "yggdrasil_decision_forests/utils/sharded_io.h" #include "yggdrasil_decision_forests/utils/status_macros.h" +#include "yggdrasil_decision_forests/utils/usage.h" namespace yggdrasil_decision_forests { namespace dataset { @@ -45,6 +46,8 @@ absl::StatusOr> CreateExampleWriter( ASSIGN_OR_RETURN(std::tie(sharded_path, format), GetDatasetPathAndTypeOrStatus(typed_path)); + utils::usage::OnSaveDataset(sharded_path); + const std::string& format_name = proto::DatasetFormat_Name(format); ASSIGN_OR_RETURN( auto writer, diff --git a/yggdrasil_decision_forests/dataset/vertical_dataset_io.cc b/yggdrasil_decision_forests/dataset/vertical_dataset_io.cc index cd0bae9c..f7c1f196 100644 --- a/yggdrasil_decision_forests/dataset/vertical_dataset_io.cc +++ b/yggdrasil_decision_forests/dataset/vertical_dataset_io.cc @@ -35,6 +35,7 @@ #include "yggdrasil_decision_forests/utils/logging.h" #include "yggdrasil_decision_forests/utils/sharded_io.h" #include "yggdrasil_decision_forests/utils/status_macros.h" +#include "yggdrasil_decision_forests/utils/usage.h" namespace yggdrasil_decision_forests { namespace dataset { @@ -125,6 +126,8 @@ absl::Status LoadVerticalDataset( std::vector shards; RETURN_IF_ERROR(utils::ExpandInputShards(path, &shards)); + utils::usage::OnLoadDataset(path); + if (shards.size() <= 1 || config.num_threads <= 1) { // Loading in a single thread. return LoadVerticalDatasetSingleThread(typed_path, data_spec, dataset, @@ -240,6 +243,7 @@ absl::Status SaveVerticalDataset(const VerticalDataset& dataset, ASSIGN_OR_RETURN(auto writer, CreateExampleWriter(typed_path, dataset.data_spec(), num_records_by_shard)); + proto::Example example; for (VerticalDataset::row_t row = 0; row < dataset.nrow(); row++) { dataset.ExtractExample(row, &example); diff --git a/yggdrasil_decision_forests/learner/BUILD b/yggdrasil_decision_forests/learner/BUILD index 1d25f795..b4749b67 100644 --- a/yggdrasil_decision_forests/learner/BUILD +++ b/yggdrasil_decision_forests/learner/BUILD @@ -57,6 +57,7 @@ cc_library_ydf( ":abstract_learner_cc_proto", "//yggdrasil_decision_forests/dataset:data_spec", "//yggdrasil_decision_forests/dataset:data_spec_cc_proto", + "//yggdrasil_decision_forests/dataset:formats", "//yggdrasil_decision_forests/dataset:types", "//yggdrasil_decision_forests/dataset:vertical_dataset", "//yggdrasil_decision_forests/dataset:vertical_dataset_io", diff --git a/yggdrasil_decision_forests/learner/abstract_learner.cc b/yggdrasil_decision_forests/learner/abstract_learner.cc index e33e8ebb..8a4f7b50 100644 --- a/yggdrasil_decision_forests/learner/abstract_learner.cc +++ b/yggdrasil_decision_forests/learner/abstract_learner.cc @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -37,6 +38,7 @@ #include "absl/types/optional.h" #include "yggdrasil_decision_forests/dataset/data_spec.h" #include "yggdrasil_decision_forests/dataset/data_spec.pb.h" +#include "yggdrasil_decision_forests/dataset/formats.h" #include "yggdrasil_decision_forests/dataset/types.h" #include "yggdrasil_decision_forests/dataset/vertical_dataset.h" #include "yggdrasil_decision_forests/dataset/vertical_dataset_io.h" @@ -327,6 +329,11 @@ absl::StatusOr> AbstractLearner::TrainWithStatus( absl::string_view typed_path, const dataset::proto::DataSpecification& data_spec, const absl::optional& typed_valid_path) const { + std::string path; + ASSIGN_OR_RETURN(std::tie(std::ignore, path), + dataset::SplitTypeAndPath(typed_path)); + utils::usage::OnLoadDataset(path); + utils::usage::OnTrainingStart(data_spec, training_config(), /*num_examples=*/-1); const auto begin_training = absl::Now(); diff --git a/yggdrasil_decision_forests/model/BUILD b/yggdrasil_decision_forests/model/BUILD index 1374941a..97b10344 100644 --- a/yggdrasil_decision_forests/model/BUILD +++ b/yggdrasil_decision_forests/model/BUILD @@ -40,9 +40,7 @@ cc_library_ydf( "//yggdrasil_decision_forests/dataset:data_spec", "//yggdrasil_decision_forests/dataset:data_spec_cc_proto", "//yggdrasil_decision_forests/dataset:example_cc_proto", - "//yggdrasil_decision_forests/dataset:formats", "//yggdrasil_decision_forests/dataset:vertical_dataset", - "//yggdrasil_decision_forests/dataset:vertical_dataset_io", "//yggdrasil_decision_forests/dataset:weight", "//yggdrasil_decision_forests/dataset:weight_cc_proto", "//yggdrasil_decision_forests/metric", @@ -50,7 +48,6 @@ cc_library_ydf( "//yggdrasil_decision_forests/metric:report", "//yggdrasil_decision_forests/serving:example_set", "//yggdrasil_decision_forests/serving:fast_engine", - "//yggdrasil_decision_forests/utils:concurrency", "//yggdrasil_decision_forests/utils:distribution", "//yggdrasil_decision_forests/utils:distribution_cc_proto", "//yggdrasil_decision_forests/utils:logging", @@ -58,7 +55,6 @@ cc_library_ydf( "//yggdrasil_decision_forests/utils:protobuf", "//yggdrasil_decision_forests/utils:random", "//yggdrasil_decision_forests/utils:registration", - "//yggdrasil_decision_forests/utils:sharded_io", "//yggdrasil_decision_forests/utils:status_macros", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", @@ -70,6 +66,29 @@ cc_library_ydf( ], ) +cc_library_ydf( + name = "evaluate_on_disk", + srcs = ["evaluate_on_disk.cc"], + hdrs = ["evaluate_on_disk.h"], + deps = [ + ":abstract_model", + "//yggdrasil_decision_forests/dataset:formats", + "//yggdrasil_decision_forests/dataset:vertical_dataset", + "//yggdrasil_decision_forests/dataset:vertical_dataset_io", + "//yggdrasil_decision_forests/dataset:weight", + "//yggdrasil_decision_forests/metric", + "//yggdrasil_decision_forests/utils:concurrency", + "//yggdrasil_decision_forests/utils:logging", + "//yggdrasil_decision_forests/utils:random", + "//yggdrasil_decision_forests/utils:sharded_io", + "//yggdrasil_decision_forests/utils:status_macros", + "//yggdrasil_decision_forests/utils:synchronization_primitives", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/strings", + ], +) + # Note: The hyper parameter optimizer and the model library are in the same cc_library_ydf because they # co-depend on each others. cc_library_ydf( @@ -88,6 +107,7 @@ cc_library_ydf( "//yggdrasil_decision_forests/utils:filesystem", "//yggdrasil_decision_forests/utils:logging", "//yggdrasil_decision_forests/utils:status_macros", + "//yggdrasil_decision_forests/utils:usage", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", @@ -196,6 +216,7 @@ cc_test( "//yggdrasil_decision_forests/dataset:vertical_dataset", "//yggdrasil_decision_forests/dataset:vertical_dataset_io", "//yggdrasil_decision_forests/metric", + "//yggdrasil_decision_forests/model:evaluate_on_disk", "//yggdrasil_decision_forests/model/gradient_boosted_trees", "//yggdrasil_decision_forests/model/random_forest", "//yggdrasil_decision_forests/serving:example_set", diff --git a/yggdrasil_decision_forests/model/abstract_model.cc b/yggdrasil_decision_forests/model/abstract_model.cc index b6c72ac1..42e8295b 100644 --- a/yggdrasil_decision_forests/model/abstract_model.cc +++ b/yggdrasil_decision_forests/model/abstract_model.cc @@ -18,9 +18,9 @@ #include #include -#include +#include +#include #include -#include #include #include #include @@ -34,12 +34,11 @@ #include "absl/strings/str_join.h" #include "absl/strings/string_view.h" #include "absl/strings/substitute.h" +#include "absl/types/optional.h" #include "yggdrasil_decision_forests/dataset/data_spec.h" #include "yggdrasil_decision_forests/dataset/data_spec.pb.h" #include "yggdrasil_decision_forests/dataset/example.pb.h" -#include "yggdrasil_decision_forests/dataset/formats.h" #include "yggdrasil_decision_forests/dataset/vertical_dataset.h" -#include "yggdrasil_decision_forests/dataset/vertical_dataset_io.h" #include "yggdrasil_decision_forests/dataset/weight.h" #include "yggdrasil_decision_forests/dataset/weight.pb.h" #include "yggdrasil_decision_forests/metric/metric.h" @@ -51,13 +50,11 @@ #include "yggdrasil_decision_forests/model/prediction.pb.h" #include "yggdrasil_decision_forests/serving/example_set.h" #include "yggdrasil_decision_forests/serving/fast_engine.h" -#include "yggdrasil_decision_forests/utils/concurrency.h" #include "yggdrasil_decision_forests/utils/distribution.h" #include "yggdrasil_decision_forests/utils/distribution.pb.h" #include "yggdrasil_decision_forests/utils/logging.h" #include "yggdrasil_decision_forests/utils/protobuf.h" #include "yggdrasil_decision_forests/utils/random.h" -#include "yggdrasil_decision_forests/utils/sharded_io.h" #include "yggdrasil_decision_forests/utils/status_macros.h" namespace yggdrasil_decision_forests { @@ -126,14 +123,6 @@ metric::proto::EvaluationResults AbstractModel::Evaluate( return EvaluateWithStatus(dataset, option, rnd, predictions).value(); } -metric::proto::EvaluationResults AbstractModel::Evaluate( - const absl::string_view typed_path, - const metric::proto::EvaluationOptions& option, - utils::RandomEngine* rnd) const { - // TODO: Fix. - return EvaluateWithStatus(typed_path, option, rnd).value(); -} - absl::StatusOr AbstractModel::EvaluateWithStatus( const dataset::VerticalDataset& dataset, @@ -150,22 +139,6 @@ AbstractModel::EvaluateWithStatus( return eval; } -absl::StatusOr -AbstractModel::EvaluateWithStatus( - const absl::string_view typed_path, - const metric::proto::EvaluationOptions& option, - utils::RandomEngine* rnd) const { - if (option.task() != task()) { - STATUS_FATAL("The evaluation and the model tasks differ."); - } - metric::proto::EvaluationResults eval; - RETURN_IF_ERROR( - metric::InitializeEvaluation(option, LabelColumnSpec(), &eval)); - RETURN_IF_ERROR(AppendEvaluation(typed_path, option, rnd, &eval)); - RETURN_IF_ERROR(metric::FinalizeEvaluation(option, LabelColumnSpec(), &eval)); - return eval; -} - absl::StatusOr AbstractModel::EvaluateWithEngine( const serving::FastEngine& engine, const dataset::VerticalDataset& dataset, @@ -444,96 +417,6 @@ absl::Status AbstractModel::AppendEvaluation( return absl::OkStatus(); } -absl::Status AbstractModel::AppendEvaluation( - const absl::string_view typed_path, - const metric::proto::EvaluationOptions& option, utils::RandomEngine* rnd, - metric::proto::EvaluationResults* eval) const { - dataset::proto::LinkedWeightDefinition weight_links; - if (option.has_weights()) { - RETURN_IF_ERROR(dataset::GetLinkedWeightDefinition( - option.weights(), data_spec_, &weight_links)); - } - - auto engine_or_status = BuildFastEngine(); - if (engine_or_status.ok()) { - const auto engine = std::move(engine_or_status.value()); - // Extract the shards from the dataset path. - std::string path, prefix; - std::tie(prefix, path) = dataset::SplitTypeAndPath(typed_path).value(); - std::vector shards; - RETURN_IF_ERROR(utils::ExpandInputShards(path, &shards)); - - // Evaluate each shard in a separate thread. - utils::concurrency::Mutex - mutex; // Guards "num_evaluated_shards" and "eval". - int num_evaluated_shards = 0; - absl::Status worker_status; - - const auto process_shard = [&option, eval, &mutex, &prefix, &engine, - &weight_links, &num_evaluated_shards, &shards, - this](absl::string_view shard, - int sub_rnd_seed) -> absl::Status { - utils::RandomEngine sub_rnd(sub_rnd_seed); - - dataset::VerticalDataset dataset; - RETURN_IF_ERROR(dataset::LoadVerticalDataset( - absl::StrCat(prefix, ":", shard), data_spec_, &dataset)); - - metric::proto::EvaluationResults sub_evaluation; - RETURN_IF_ERROR(metric::InitializeEvaluation(option, LabelColumnSpec(), - &sub_evaluation)); - - RETURN_IF_ERROR(AppendEvaluationWithEngine(dataset, option, weight_links, - *engine, &sub_rnd, nullptr, - &sub_evaluation)); - - utils::concurrency::MutexLock lock(&mutex); - RETURN_IF_ERROR(metric::MergeEvaluation(option, sub_evaluation, eval)); - num_evaluated_shards++; - LOG_INFO_EVERY_N_SEC(30, _ << num_evaluated_shards << "/" << shards.size() - << " shards evaluated"); - return absl::OkStatus(); - }; - - { - const int num_threads = std::min(shards.size(), 20); - utils::concurrency::ThreadPool thread_pool("evaluation", num_threads); - thread_pool.StartWorkers(); - for (const auto& shard : shards) { - thread_pool.Schedule([&shard, &mutex, &process_shard, &worker_status, - sub_rnd_seed = (*rnd)()]() -> void { - { - utils::concurrency::MutexLock lock(&mutex); - if (!worker_status.ok()) { - return; - } - } - auto sub_status = process_shard(shard, sub_rnd_seed); - { - utils::concurrency::MutexLock lock(&mutex); - worker_status.Update(sub_status); - } - }); - } - } - - RETURN_IF_ERROR(worker_status); - - } else { - // Evaluate using the (slow) generic inference. - YDF_LOG(WARNING) - << "Evaluation with the slow generic engine without distribution"; - dataset::VerticalDataset dataset; - RETURN_IF_ERROR( - dataset::LoadVerticalDataset(typed_path, data_spec_, &dataset)); - RETURN_IF_ERROR(AppendEvaluation(dataset, option, rnd, eval)); - return absl::OkStatus(); - } - - eval->set_num_folds(eval->num_folds() + 1); - return absl::OkStatus(); -} - absl::Status AbstractModel::AppendEvaluationOverrideType( const dataset::VerticalDataset& dataset, const metric::proto::EvaluationOptions& option, diff --git a/yggdrasil_decision_forests/model/abstract_model.h b/yggdrasil_decision_forests/model/abstract_model.h index 5334e78a..1fbaa731 100644 --- a/yggdrasil_decision_forests/model/abstract_model.h +++ b/yggdrasil_decision_forests/model/abstract_model.h @@ -21,8 +21,8 @@ #ifndef YGGDRASIL_DECISION_FORESTS_MODEL_ABSTRACT_MODEL_H_ #define YGGDRASIL_DECISION_FORESTS_MODEL_ABSTRACT_MODEL_H_ +#include #include -#include #include #include @@ -222,25 +222,6 @@ class AbstractModel { const metric::proto::EvaluationOptions& option, utils::RandomEngine* rnd, std::vector* predictions = nullptr) const; - // Evaluates the model on a dataset stored in disk. `typed_path` defines - // the type and the path pattern of the files, as described in - // `yggdrasil_decision_forests/datasets/format.h` file. - // This method is preferable when the number of examples is large since they - // do not have to be all first loaded into memory. - // Returns a finalized EvaluationResults. - // Evaluates the model on a dataset. Returns a finalized EvaluationResults. - // The random generator "rnd" is used bootstrapping of confidence intervals - // and sub-sampling evaluation (if configured in "option"). - absl::StatusOr EvaluateWithStatus( - const absl::string_view typed_path, - const metric::proto::EvaluationOptions& option, - utils::RandomEngine* rnd) const; - - metric::proto::EvaluationResults Evaluate( - const absl::string_view typed_path, - const metric::proto::EvaluationOptions& option, - utils::RandomEngine* rnd) const; - // Similar to "Evaluate", but allow to override the evaluation objective. absl::StatusOr EvaluateOverrideType( const dataset::VerticalDataset& dataset, @@ -472,9 +453,6 @@ class AbstractModel { "SaveModel/LoadModel instead."); } - protected: - explicit AbstractModel(const absl::string_view name) : name_(name) {} - absl::Status AppendEvaluationWithEngine( const dataset::VerticalDataset& dataset, const metric::proto::EvaluationOptions& option, @@ -483,6 +461,9 @@ class AbstractModel { std::vector* predictions, metric::proto::EvaluationResults* eval) const; + protected: + explicit AbstractModel(const absl::string_view name) : name_(name) {} + // Prints information about the hyper-parameter optimizer logs. void AppendHyperparameterOptimizerLogs(std::string* description) const; diff --git a/yggdrasil_decision_forests/model/abstract_model_test.cc b/yggdrasil_decision_forests/model/abstract_model_test.cc index 4f14c2ea..d9620bf0 100644 --- a/yggdrasil_decision_forests/model/abstract_model_test.cc +++ b/yggdrasil_decision_forests/model/abstract_model_test.cc @@ -29,6 +29,7 @@ #include "yggdrasil_decision_forests/dataset/vertical_dataset_io.h" #include "yggdrasil_decision_forests/metric/metric.h" #include "yggdrasil_decision_forests/model/abstract_model.pb.h" +#include "yggdrasil_decision_forests/model/evaluate_on_disk.h" #include "yggdrasil_decision_forests/model/fast_engine_factory.h" #include "yggdrasil_decision_forests/model/model_library.h" #include "yggdrasil_decision_forests/model/model_testing.h" @@ -420,10 +421,13 @@ TEST(Evaluate, FromDisk) { &model)); utils::RandomEngine rnd; - const auto evaluation = model->Evaluate( - absl::StrCat("csv:", - file::JoinPath(TestDataDir(), "dataset", "adult_test.csv")), - {}, &rnd); + const auto evaluation = + EvaluateOnDisk( + *model, + absl::StrCat("csv:", file::JoinPath(TestDataDir(), "dataset", + "adult_test.csv")), + {}, &rnd) + .value(); EXPECT_NEAR(metric::Accuracy(evaluation), 0.8723513, 0.000001); } diff --git a/yggdrasil_decision_forests/model/evaluate_on_disk.cc b/yggdrasil_decision_forests/model/evaluate_on_disk.cc new file mode 100644 index 00000000..fa8bab1f --- /dev/null +++ b/yggdrasil_decision_forests/model/evaluate_on_disk.cc @@ -0,0 +1,155 @@ +/* + * Copyright 2022 Google LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "yggdrasil_decision_forests/model/evaluate_on_disk.h" + +#include +#include +#include +#include +#include + +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" +#include "yggdrasil_decision_forests/dataset/formats.h" +#include "yggdrasil_decision_forests/dataset/vertical_dataset.h" +#include "yggdrasil_decision_forests/dataset/vertical_dataset_io.h" +#include "yggdrasil_decision_forests/dataset/weight.h" +#include "yggdrasil_decision_forests/metric/metric.h" +#include "yggdrasil_decision_forests/model/abstract_model.h" +#include "yggdrasil_decision_forests/utils/concurrency.h" +#include "yggdrasil_decision_forests/utils/logging.h" +#include "yggdrasil_decision_forests/utils/random.h" +#include "yggdrasil_decision_forests/utils/sharded_io.h" +#include "yggdrasil_decision_forests/utils/status_macros.h" +#include "yggdrasil_decision_forests/utils/synchronization_primitives.h" + +namespace yggdrasil_decision_forests::model { + +namespace { + +// Evaluates a model and add the evaluation to an already initialized evaluation +// proto. +absl::Status AppendEvaluation(const AbstractModel& model, + const absl::string_view typed_path, + const metric::proto::EvaluationOptions& option, + utils::RandomEngine* rnd, + metric::proto::EvaluationResults* eval) { + dataset::proto::LinkedWeightDefinition weight_links; + if (option.has_weights()) { + RETURN_IF_ERROR(dataset::GetLinkedWeightDefinition( + option.weights(), model.data_spec(), &weight_links)); + } + + auto engine_or_status = model.BuildFastEngine(); + if (engine_or_status.ok()) { + const auto engine = std::move(engine_or_status.value()); + // Extract the shards from the dataset path. + std::string path, prefix; + std::tie(prefix, path) = dataset::SplitTypeAndPath(typed_path).value(); + std::vector shards; + RETURN_IF_ERROR(utils::ExpandInputShards(path, &shards)); + + // Evaluate each shard in a separate thread. + utils::concurrency::Mutex + mutex; // Guards "num_evaluated_shards" and "eval". + int num_evaluated_shards = 0; + absl::Status worker_status; + + const auto process_shard = [&option, eval, &mutex, &prefix, &engine, + &weight_links, &num_evaluated_shards, &shards, + &model](absl::string_view shard, + int sub_rnd_seed) -> absl::Status { + utils::RandomEngine sub_rnd(sub_rnd_seed); + + dataset::VerticalDataset dataset; + RETURN_IF_ERROR(dataset::LoadVerticalDataset( + absl::StrCat(prefix, ":", shard), model.data_spec(), &dataset)); + + metric::proto::EvaluationResults sub_evaluation; + RETURN_IF_ERROR(metric::InitializeEvaluation( + option, model.LabelColumnSpec(), &sub_evaluation)); + + RETURN_IF_ERROR(model.AppendEvaluationWithEngine( + dataset, option, weight_links, *engine, &sub_rnd, nullptr, + &sub_evaluation)); + + utils::concurrency::MutexLock lock(&mutex); + RETURN_IF_ERROR(metric::MergeEvaluation(option, sub_evaluation, eval)); + num_evaluated_shards++; + LOG_INFO_EVERY_N_SEC(30, _ << num_evaluated_shards << "/" << shards.size() + << " shards evaluated"); + return absl::OkStatus(); + }; + + { + const int num_threads = std::min(shards.size(), 20); + utils::concurrency::ThreadPool thread_pool("evaluation", num_threads); + thread_pool.StartWorkers(); + for (const auto& shard : shards) { + thread_pool.Schedule([&shard, &mutex, &process_shard, &worker_status, + sub_rnd_seed = (*rnd)()]() -> void { + { + utils::concurrency::MutexLock lock(&mutex); + if (!worker_status.ok()) { + return; + } + } + auto sub_status = process_shard(shard, sub_rnd_seed); + { + utils::concurrency::MutexLock lock(&mutex); + worker_status.Update(sub_status); + } + }); + } + } + + RETURN_IF_ERROR(worker_status); + + } else { + // Evaluate using the (slow) generic inference. + YDF_LOG(WARNING) + << "Evaluation with the slow generic engine without distribution"; + dataset::VerticalDataset dataset; + RETURN_IF_ERROR( + dataset::LoadVerticalDataset(typed_path, model.data_spec(), &dataset)); + RETURN_IF_ERROR(model.AppendEvaluation(dataset, option, rnd, eval)); + return absl::OkStatus(); + } + + eval->set_num_folds(eval->num_folds() + 1); + return absl::OkStatus(); +} + +} // namespace + +absl::StatusOr EvaluateOnDisk( + const AbstractModel& model, const absl::string_view typed_path, + const metric::proto::EvaluationOptions& option, utils::RandomEngine* rnd) { + if (option.task() != model.task()) { + STATUS_FATAL("The evaluation and the model tasks differ."); + } + metric::proto::EvaluationResults eval; + RETURN_IF_ERROR( + metric::InitializeEvaluation(option, model.LabelColumnSpec(), &eval)); + RETURN_IF_ERROR(AppendEvaluation(model, typed_path, option, rnd, &eval)); + RETURN_IF_ERROR( + metric::FinalizeEvaluation(option, model.LabelColumnSpec(), &eval)); + return eval; +} + +} // namespace yggdrasil_decision_forests::model diff --git a/yggdrasil_decision_forests/model/evaluate_on_disk.h b/yggdrasil_decision_forests/model/evaluate_on_disk.h new file mode 100644 index 00000000..05fec549 --- /dev/null +++ b/yggdrasil_decision_forests/model/evaluate_on_disk.h @@ -0,0 +1,43 @@ +/* + * Copyright 2022 Google LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Evaluation of a model on a dataset stored in disk. + +#ifndef YGGDRASIL_DECISION_FORESTS_MODEL_EVALUATE_ON_DISK_H_ +#define YGGDRASIL_DECISION_FORESTS_MODEL_EVALUATE_ON_DISK_H_ + +#include "absl/status/statusor.h" +#include "absl/strings/string_view.h" +#include "yggdrasil_decision_forests/model/abstract_model.h" +#include "yggdrasil_decision_forests/utils/random.h" + +namespace yggdrasil_decision_forests::model { + +// Evaluates the model on a dataset stored in disk. `typed_path` defines +// the type and the path pattern of the files, as described in +// `yggdrasil_decision_forests/datasets/format.h` file. +// This method is preferable when the number of examples is large since they +// do not have to be all first loaded into memory. +// Returns a finalized EvaluationResults. +// Evaluates the model on a dataset. Returns a finalized EvaluationResults. +// The random generator "rnd" is used bootstrapping of confidence intervals +// and sub-sampling evaluation (if configured in "option"). +absl::StatusOr EvaluateOnDisk( + const AbstractModel& model, const absl::string_view typed_path, + const metric::proto::EvaluationOptions& option, utils::RandomEngine* rnd); + +} // namespace yggdrasil_decision_forests::model + +#endif // YGGDRASIL_DECISION_FORESTS_MODEL_EVALUATE_ON_DISK_H_ diff --git a/yggdrasil_decision_forests/model/model_library.cc b/yggdrasil_decision_forests/model/model_library.cc index 79820d35..349d0b5f 100644 --- a/yggdrasil_decision_forests/model/model_library.cc +++ b/yggdrasil_decision_forests/model/model_library.cc @@ -32,6 +32,7 @@ #include "yggdrasil_decision_forests/utils/filesystem.h" #include "yggdrasil_decision_forests/utils/logging.h" #include "yggdrasil_decision_forests/utils/status_macros.h" +#include "yggdrasil_decision_forests/utils/usage.h" namespace yggdrasil_decision_forests { namespace model { @@ -83,6 +84,8 @@ absl::Status SaveModel(absl::string_view directory, const AbstractModel& mdl, absl::Status SaveModel(absl::string_view directory, const AbstractModel* const mdl, ModelIOOptions io_options) { + utils::usage::OnSaveModel(directory); + RETURN_IF_ERROR(mdl->Validate()); RETURN_IF_ERROR(file::RecursivelyCreateDir(directory, file::Defaults())); proto::AbstractModel header; @@ -115,6 +118,8 @@ absl::StatusOr> LoadModel( absl::Status LoadModel(absl::string_view directory, std::unique_ptr* model, ModelIOOptions io_options) { + utils::usage::OnLoadModel(directory); + proto::AbstractModel header; std::string effective_directory = ImproveModelReadingPath(directory); diff --git a/yggdrasil_decision_forests/utils/BUILD b/yggdrasil_decision_forests/utils/BUILD index 7eec3406..da5dd41b 100644 --- a/yggdrasil_decision_forests/utils/BUILD +++ b/yggdrasil_decision_forests/utils/BUILD @@ -494,6 +494,7 @@ cc_library_ydf( "//yggdrasil_decision_forests/dataset:data_spec_cc_proto", "//yggdrasil_decision_forests/learner:abstract_learner_cc_proto", "//yggdrasil_decision_forests/model:abstract_model", + "@com_google_absl//absl/strings", "@com_google_absl//absl/time", ], ) @@ -507,6 +508,7 @@ cc_library_ydf( "//yggdrasil_decision_forests/dataset:data_spec_cc_proto", "//yggdrasil_decision_forests/learner:abstract_learner_cc_proto", "//yggdrasil_decision_forests/model:abstract_model", + "@com_google_absl//absl/strings", "@com_google_absl//absl/time", ], ) diff --git a/yggdrasil_decision_forests/utils/usage.h b/yggdrasil_decision_forests/utils/usage.h index 565146b4..a09138de 100644 --- a/yggdrasil_decision_forests/utils/usage.h +++ b/yggdrasil_decision_forests/utils/usage.h @@ -22,6 +22,9 @@ #ifndef YGGDRASIL_DECISION_FORESTS_TOOL_USAGE_H_ #define YGGDRASIL_DECISION_FORESTS_TOOL_USAGE_H_ +#include + +#include "absl/strings/string_view.h" #include "absl/time/time.h" #include "yggdrasil_decision_forests/dataset/data_spec.pb.h" #include "yggdrasil_decision_forests/learner/abstract_learner.pb.h" @@ -34,10 +37,9 @@ namespace usage { // Start a new model training. // Should be called at the start of the "Train" methods of learners. -void OnTrainingStart( - const dataset::proto::DataSpecification& data_spec, - const model::proto::TrainingConfig& train_config, - int64_t num_examples); +void OnTrainingStart(const dataset::proto::DataSpecification& data_spec, + const model::proto::TrainingConfig& train_config, + int64_t num_examples); // Complete a model training. // Should be called at the end of the "Train" methods of learners. @@ -56,6 +58,18 @@ void OnTrainingEnd(const dataset::proto::DataSpecification& data_spec, void OnInference(int64_t num_examples, const model::proto::Metadata& metadata); void OnInference(int64_t num_examples, const model::MetaData& metadata); +// When a dataset is loaded for training, inference, or other operations. +void OnLoadDataset(absl::string_view path); + +// When a dataset is saved. +void OnSaveDataset(absl::string_view path); + +// When a model is loaded for training, inference, or other operations. +void OnLoadModel(absl::string_view path); + +// When a model is saved. +void OnSaveModel(absl::string_view path); + // Enables / disable usage tracking. void EnableUsage(bool usage); diff --git a/yggdrasil_decision_forests/utils/usage_default.cc b/yggdrasil_decision_forests/utils/usage_default.cc index 55b72bab..9fef44d6 100644 --- a/yggdrasil_decision_forests/utils/usage_default.cc +++ b/yggdrasil_decision_forests/utils/usage_default.cc @@ -13,16 +13,18 @@ * limitations under the License. */ +#include + +#include "absl/strings/string_view.h" #include "yggdrasil_decision_forests/utils/usage.h" namespace yggdrasil_decision_forests { namespace utils { namespace usage { -void OnTrainingStart( - const dataset::proto::DataSpecification& data_spec, - const model::proto::TrainingConfig& train_config, - int64_t num_examples) { +void OnTrainingStart(const dataset::proto::DataSpecification& data_spec, + const model::proto::TrainingConfig& train_config, + int64_t num_examples) { // Add usage tracking here. } @@ -42,6 +44,14 @@ void OnInference(int64_t num_examples, const model::MetaData& metadata) { // Add usage tracking here. } +void OnLoadDataset(absl::string_view path) {} + +void OnSaveDataset(absl::string_view path) {} + +void OnLoadModel(absl::string_view path) {} + +void OnSaveModel(absl::string_view path) {} + void EnableUsage(bool usage) {} } // namespace usage From 5d56fb865ffc092f86eda16f718a39f20ebcce49 Mon Sep 17 00:00:00 2001 From: Mathieu Guillame-Bert Date: Fri, 31 May 2024 06:25:38 -0700 Subject: [PATCH 13/30] Create a npm package for the js port of ydf. PiperOrigin-RevId: 639014915 --- documentation/public/mkdocs.yml | 4 +- .../port/javascript/README.md | 5 +- .../port/javascript/npm/README.md | 164 + .../port/javascript/npm/dist/readme.txt | 3 + .../port/javascript/npm/package-lock.json | 3823 +++++++++++++++++ .../port/javascript/npm/package.json | 36 + .../javascript/npm/test/inference.test.js | 96 + .../port/javascript/npm/test/model_1.zip | Bin 0 -> 7229 bytes .../port/javascript/npm/test/model_2.zip | Bin 0 -> 1476 bytes 9 files changed, 4126 insertions(+), 5 deletions(-) create mode 100644 yggdrasil_decision_forests/port/javascript/npm/README.md create mode 100644 yggdrasil_decision_forests/port/javascript/npm/dist/readme.txt create mode 100644 yggdrasil_decision_forests/port/javascript/npm/package-lock.json create mode 100644 yggdrasil_decision_forests/port/javascript/npm/package.json create mode 100644 yggdrasil_decision_forests/port/javascript/npm/test/inference.test.js create mode 100644 yggdrasil_decision_forests/port/javascript/npm/test/model_1.zip create mode 100644 yggdrasil_decision_forests/port/javascript/npm/test/model_2.zip diff --git a/documentation/public/mkdocs.yml b/documentation/public/mkdocs.yml index c6edbdc2..0b246796 100644 --- a/documentation/public/mkdocs.yml +++ b/documentation/public/mkdocs.yml @@ -91,14 +91,14 @@ nav: # - example distance: tutorial/example_distance.ipynb # TODO: model inspection, manual tree creation, custom loss. - Other APIs: - - TensorFlow Decision Forests: https://www.tensorflow.org/decision_forests - CLI quickstart: cli_quickstart.md + - JavaScript: https://www.npmjs.com/package/yggdrasil-decision-forests - CLI & C++ user manual: cli_user_manual.md - CLI commands: cli_commands.md - CLI examples: https://github.com/google/yggdrasil-decision-forests/tree/main/examples - C++ examples: https://github.com/google/yggdrasil-decision-forests/tree/main/examples/standalone + - TensorFlow Decision Forests: https://www.tensorflow.org/decision_forests - Go: https://github.com/google/yggdrasil-decision-forests/tree/main/yggdrasil_decision_forests/port/go - - JavaScript: https://github.com/google/yggdrasil-decision-forests/tree/main/yggdrasil_decision_forests/port/javascript - Changelog: changelog.md - Long-time support: lts.md - Contact: contact.md diff --git a/yggdrasil_decision_forests/port/javascript/README.md b/yggdrasil_decision_forests/port/javascript/README.md index e6419fde..5b33b0b8 100644 --- a/yggdrasil_decision_forests/port/javascript/README.md +++ b/yggdrasil_decision_forests/port/javascript/README.md @@ -3,7 +3,6 @@ The JavaScript API makes it possible to run an Yggdrasil Decision Forests model or a TensorFlow Decision Forests model in a webpage. -## Documentation - -Check the [documentation](https://ydf.readthedocs.io/en/latest/js_serving.html) +Check the +[yggdrasil-decision-forests npm package](https://www.npmjs.com/package/yggdrasil-decision-forests) for details. diff --git a/yggdrasil_decision_forests/port/javascript/npm/README.md b/yggdrasil_decision_forests/port/javascript/npm/README.md new file mode 100644 index 00000000..858c4980 --- /dev/null +++ b/yggdrasil_decision_forests/port/javascript/npm/README.md @@ -0,0 +1,164 @@ +# YDF in JS + +With this package, you can generate predictions of machine learning models +trained with [YDF](https://ydf.readthedocs.io) in the browser and with NodeJS. + +## Usage example + +First, let's train a machine learning model in python. For more details, read +[YDF's documentation](https://ydf.readthedocs.io). + +In Python in a Colab or in a Jupyter Notebook, run: + +```python +# Install YDF +!pip install ydf pandas + +import ydf +import pandas as pd + +# Download a training dataset +ds_path = "https://raw.githubusercontent.com/google/yggdrasil-decision-forests/main/yggdrasil_decision_forests/test_data/dataset/" +train_ds = pd.read_csv(ds_path + "adult_train.csv") + +# Train a Gradient Boosted Trees model +learner = ydf.GradientBoostedTreesLearner(label="income", pure_serving_model=True) +model = learner.train(train_ds) + +# Save the model +model.save("/tmp/my_model") + +# Zip the model +# Important: Use -j to not include the directory structure. +!zip -rj /tmp/my_model.zip /tmp/my_model +``` + +Then: + +### Run the model with NodeJS and CommonJS + +```js +(async function (){ + // Load the YDF library + const ydf = await require("yggdrasil-decision-forests")(); + + // Load the model + const fs = require("node:fs"); + let model = await ydf.loadModelFromZipBlob(fs.readFileSync("./model.zip")); + + // Create a batch of examples. + let examples = { + "age": [39, 40, 40, 35], + "workclass": ["State-gov", "Private", "Private", "Federal-gov"], + "fnlwgt": [77516, 121772, 193524, 76845], + "education": ["Bachelors", "Assoc-voc", "Doctorate", "9th"], + "education_num": ["13", "11", "16", "5"], + "marital_status": ["Never-married", "Married-civ-spouse", "Married-civ-spouse", "Married-civ-spouse"], + "occupation": ["Adm-clerical", "Craft-repair", "Prof-specialty", "Farming-fishing"], + "relationship": ["Not-in-family", "Husband", "Husband", "Husband"], + "race": ["White", "Asian-Pac-Islander", "White", "Black"], + "sex": ["Male", "Male", "Male", "Male"], + "capital_gain": [2174, 0, 0, 0], + "capital_loss": [0, 0, 0, 0], + "hours_per_week": [40, 40, 60, 40], + "native_country": ["United-States", null, "United-States", "United-States"] + }; + + // Make predictions + let predictions = model.predict(examples); + console.log("predictions:", predictions); + + // Release model + model.unload(); +}()) +``` + +### Run the model with NodeJS and ES6 + +```js +import * as fs from "node:fs"; +import YggdrasilDecisionForests from 'yggdrasil-decision-forests'; + +// Load the YDF library +let ydf = await YggdrasilDecisionForests(); + +// Load the model +let model = await ydf.loadModelFromZipBlob(fs.readFileSync("./model.zip")); + +// Create a batch of examples. +let examples = { + "age": [39, 40, 40, 35], + "workclass": ["State-gov", "Private", "Private", "Federal-gov"], + "fnlwgt": [77516, 121772, 193524, 76845], + "education": ["Bachelors", "Assoc-voc", "Doctorate", "9th"], + "education_num": ["13", "11", "16", "5"], + "marital_status": ["Never-married", "Married-civ-spouse", "Married-civ-spouse", "Married-civ-spouse"], + "occupation": ["Adm-clerical", "Craft-repair", "Prof-specialty", "Farming-fishing"], + "relationship": ["Not-in-family", "Husband", "Husband", "Husband"], + "race": ["White", "Asian-Pac-Islander", "White", "Black"], + "sex": ["Male", "Male", "Male", "Male"], + "capital_gain": [2174, 0, 0, 0], + "capital_loss": [0, 0, 0, 0], + "hours_per_week": [40, 40, 60, 40], + "native_country": ["United-States", null, "United-States", "United-States"] +}; + +// Make predictions +let predictions = model.predict(examples); +console.log("predictions:", predictions); + +// Release model +model.unload(); +``` + +### Run the model with in Browser + +```html + + + +``` + +## For developers + +### Run unit tests + +```sh +npm test +``` + +### Update the binary bundle + +```sh +# Assume the shell is located in a clone of: +# https://github.com/google/yggdrasil-decision-forests.git + +# Compile the YDF with WebAssembly +yggdrasil_decision_forests/port/javascript/tools/build_zipped_library.sh + +# Extract the the content of `dist` in `yggdrasil_decision_forests/port/javascript/npm/dist`. +unzip dist/ydf.zip -d yggdrasil_decision_forests/port/javascript/npm/dist +``` diff --git a/yggdrasil_decision_forests/port/javascript/npm/dist/readme.txt b/yggdrasil_decision_forests/port/javascript/npm/dist/readme.txt new file mode 100644 index 00000000..3e9ae665 --- /dev/null +++ b/yggdrasil_decision_forests/port/javascript/npm/dist/readme.txt @@ -0,0 +1,3 @@ +Before submitting the npm package, populate this directory with inference.js and +inference.wasm generated by +yggdrasil_decision_forests/port/javascript/tools/build_zipped_library.sh. \ No newline at end of file diff --git a/yggdrasil_decision_forests/port/javascript/npm/package-lock.json b/yggdrasil_decision_forests/port/javascript/npm/package-lock.json new file mode 100644 index 00000000..465db7ec --- /dev/null +++ b/yggdrasil_decision_forests/port/javascript/npm/package-lock.json @@ -0,0 +1,3823 @@ +{ + "name": "yggdrasil-decision-forests", + "version": "0.0.2", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "yggdrasil-decision-forests", + "version": "0.0.2", + "license": "Apache-2.0", + "dependencies": { + "jszip": "^3.10.1" + }, + "devDependencies": { + "jest": "^29.7.0" + } + }, + "node_modules/@ampproject/remapping": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/@ampproject/remapping/-/remapping-2.3.0.tgz", + "integrity": "sha512-30iZtAPgz+LTIYoeivqYo853f02jBYSd5uGnGpkFV0M3xOt9aN73erkgYAmZU43x4VfqcnLxW9Kpg3R5LC4YYw==", + "dev": true, + "dependencies": { + "@jridgewell/gen-mapping": "^0.3.5", + "@jridgewell/trace-mapping": "^0.3.24" + }, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@babel/code-frame": { + "version": "7.24.2", + "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.24.2.tgz", + "integrity": "sha512-y5+tLQyV8pg3fsiln67BVLD1P13Eg4lh5RW9mF0zUuvLrv9uIQ4MCL+CRT+FTsBlBjcIan6PGsLcBN0m3ClUyQ==", + "dev": true, + "dependencies": { + "@babel/highlight": "^7.24.2", + "picocolors": "^1.0.0" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/compat-data": { + "version": "7.24.4", + "resolved": "https://registry.npmjs.org/@babel/compat-data/-/compat-data-7.24.4.tgz", + "integrity": "sha512-vg8Gih2MLK+kOkHJp4gBEIkyaIi00jgWot2D9QOmmfLC8jINSOzmCLta6Bvz/JSBCqnegV0L80jhxkol5GWNfQ==", + "dev": true, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/core": { + "version": "7.24.4", + "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.24.4.tgz", + "integrity": "sha512-MBVlMXP+kkl5394RBLSxxk/iLTeVGuXTV3cIDXavPpMMqnSnt6apKgan/U8O3USWZCWZT/TbgfEpKa4uMgN4Dg==", + "dev": true, + "dependencies": { + "@ampproject/remapping": "^2.2.0", + "@babel/code-frame": "^7.24.2", + "@babel/generator": "^7.24.4", + "@babel/helper-compilation-targets": "^7.23.6", + "@babel/helper-module-transforms": "^7.23.3", + "@babel/helpers": "^7.24.4", + "@babel/parser": "^7.24.4", + "@babel/template": "^7.24.0", + "@babel/traverse": "^7.24.1", + "@babel/types": "^7.24.0", + "convert-source-map": "^2.0.0", + "debug": "^4.1.0", + "gensync": "^1.0.0-beta.2", + "json5": "^2.2.3", + "semver": "^6.3.1" + }, + "engines": { + "node": ">=6.9.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/babel" + } + }, + "node_modules/@babel/generator": { + "version": "7.24.4", + "resolved": "https://registry.npmjs.org/@babel/generator/-/generator-7.24.4.tgz", + "integrity": "sha512-Xd6+v6SnjWVx/nus+y0l1sxMOTOMBkyL4+BIdbALyatQnAe/SRVjANeDPSCYaX+i1iJmuGSKf3Z+E+V/va1Hvw==", + "dev": true, + "dependencies": { + "@babel/types": "^7.24.0", + "@jridgewell/gen-mapping": "^0.3.5", + "@jridgewell/trace-mapping": "^0.3.25", + "jsesc": "^2.5.1" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-compilation-targets": { + "version": "7.23.6", + "resolved": "https://registry.npmjs.org/@babel/helper-compilation-targets/-/helper-compilation-targets-7.23.6.tgz", + "integrity": "sha512-9JB548GZoQVmzrFgp8o7KxdgkTGm6xs9DW0o/Pim72UDjzr5ObUQ6ZzYPqA+g9OTS2bBQoctLJrky0RDCAWRgQ==", + "dev": true, + "dependencies": { + "@babel/compat-data": "^7.23.5", + "@babel/helper-validator-option": "^7.23.5", + "browserslist": "^4.22.2", + "lru-cache": "^5.1.1", + "semver": "^6.3.1" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-environment-visitor": { + "version": "7.22.20", + "resolved": "https://registry.npmjs.org/@babel/helper-environment-visitor/-/helper-environment-visitor-7.22.20.tgz", + "integrity": "sha512-zfedSIzFhat/gFhWfHtgWvlec0nqB9YEIVrpuwjruLlXfUSnA8cJB0miHKwqDnQ7d32aKo2xt88/xZptwxbfhA==", + "dev": true, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-function-name": { + "version": "7.23.0", + "resolved": "https://registry.npmjs.org/@babel/helper-function-name/-/helper-function-name-7.23.0.tgz", + "integrity": "sha512-OErEqsrxjZTJciZ4Oo+eoZqeW9UIiOcuYKRJA4ZAgV9myA+pOXhhmpfNCKjEH/auVfEYVFJ6y1Tc4r0eIApqiw==", + "dev": true, + "dependencies": { + "@babel/template": "^7.22.15", + "@babel/types": "^7.23.0" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-hoist-variables": { + "version": "7.22.5", + "resolved": "https://registry.npmjs.org/@babel/helper-hoist-variables/-/helper-hoist-variables-7.22.5.tgz", + "integrity": "sha512-wGjk9QZVzvknA6yKIUURb8zY3grXCcOZt+/7Wcy8O2uctxhplmUPkOdlgoNhmdVee2c92JXbf1xpMtVNbfoxRw==", + "dev": true, + "dependencies": { + "@babel/types": "^7.22.5" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-module-imports": { + "version": "7.24.3", + "resolved": "https://registry.npmjs.org/@babel/helper-module-imports/-/helper-module-imports-7.24.3.tgz", + "integrity": "sha512-viKb0F9f2s0BCS22QSF308z/+1YWKV/76mwt61NBzS5izMzDPwdq1pTrzf+Li3npBWX9KdQbkeCt1jSAM7lZqg==", + "dev": true, + "dependencies": { + "@babel/types": "^7.24.0" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-module-transforms": { + "version": "7.23.3", + "resolved": "https://registry.npmjs.org/@babel/helper-module-transforms/-/helper-module-transforms-7.23.3.tgz", + "integrity": "sha512-7bBs4ED9OmswdfDzpz4MpWgSrV7FXlc3zIagvLFjS5H+Mk7Snr21vQ6QwrsoCGMfNC4e4LQPdoULEt4ykz0SRQ==", + "dev": true, + "dependencies": { + "@babel/helper-environment-visitor": "^7.22.20", + "@babel/helper-module-imports": "^7.22.15", + "@babel/helper-simple-access": "^7.22.5", + "@babel/helper-split-export-declaration": "^7.22.6", + "@babel/helper-validator-identifier": "^7.22.20" + }, + "engines": { + "node": ">=6.9.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0" + } + }, + "node_modules/@babel/helper-plugin-utils": { + "version": "7.24.0", + "resolved": "https://registry.npmjs.org/@babel/helper-plugin-utils/-/helper-plugin-utils-7.24.0.tgz", + "integrity": "sha512-9cUznXMG0+FxRuJfvL82QlTqIzhVW9sL0KjMPHhAOOvpQGL8QtdxnBKILjBqxlHyliz0yCa1G903ZXI/FuHy2w==", + "dev": true, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-simple-access": { + "version": "7.22.5", + "resolved": "https://registry.npmjs.org/@babel/helper-simple-access/-/helper-simple-access-7.22.5.tgz", + "integrity": "sha512-n0H99E/K+Bika3++WNL17POvo4rKWZ7lZEp1Q+fStVbUi8nxPQEBOlTmCOxW/0JsS56SKKQ+ojAe2pHKJHN35w==", + "dev": true, + "dependencies": { + "@babel/types": "^7.22.5" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-split-export-declaration": { + "version": "7.22.6", + "resolved": "https://registry.npmjs.org/@babel/helper-split-export-declaration/-/helper-split-export-declaration-7.22.6.tgz", + "integrity": "sha512-AsUnxuLhRYsisFiaJwvp1QF+I3KjD5FOxut14q/GzovUe6orHLesW2C7d754kRm53h5gqrz6sFl6sxc4BVtE/g==", + "dev": true, + "dependencies": { + "@babel/types": "^7.22.5" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-string-parser": { + "version": "7.24.1", + "resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.24.1.tgz", + "integrity": "sha512-2ofRCjnnA9y+wk8b9IAREroeUP02KHp431N2mhKniy2yKIDKpbrHv9eXwm8cBeWQYcJmzv5qKCu65P47eCF7CQ==", + "dev": true, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-validator-identifier": { + "version": "7.22.20", + "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.22.20.tgz", + "integrity": "sha512-Y4OZ+ytlatR8AI+8KZfKuL5urKp7qey08ha31L8b3BwewJAoJamTzyvxPR/5D+KkdJCGPq/+8TukHBlY10FX9A==", + "dev": true, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-validator-option": { + "version": "7.23.5", + "resolved": "https://registry.npmjs.org/@babel/helper-validator-option/-/helper-validator-option-7.23.5.tgz", + "integrity": "sha512-85ttAOMLsr53VgXkTbkx8oA6YTfT4q7/HzXSLEYmjcSTJPMPQtvq1BD79Byep5xMUYbGRzEpDsjUf3dyp54IKw==", + "dev": true, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helpers": { + "version": "7.24.4", + "resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.24.4.tgz", + "integrity": "sha512-FewdlZbSiwaVGlgT1DPANDuCHaDMiOo+D/IDYRFYjHOuv66xMSJ7fQwwODwRNAPkADIO/z1EoF/l2BCWlWABDw==", + "dev": true, + "dependencies": { + "@babel/template": "^7.24.0", + "@babel/traverse": "^7.24.1", + "@babel/types": "^7.24.0" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/highlight": { + "version": "7.24.2", + "resolved": "https://registry.npmjs.org/@babel/highlight/-/highlight-7.24.2.tgz", + "integrity": "sha512-Yac1ao4flkTxTteCDZLEvdxg2fZfz1v8M4QpaGypq/WPDqg3ijHYbDfs+LG5hvzSoqaSZ9/Z9lKSP3CjZjv+pA==", + "dev": true, + "dependencies": { + "@babel/helper-validator-identifier": "^7.22.20", + "chalk": "^2.4.2", + "js-tokens": "^4.0.0", + "picocolors": "^1.0.0" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/highlight/node_modules/ansi-styles": { + "version": "3.2.1", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-3.2.1.tgz", + "integrity": "sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==", + "dev": true, + "dependencies": { + "color-convert": "^1.9.0" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/@babel/highlight/node_modules/chalk": { + "version": "2.4.2", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.2.tgz", + "integrity": "sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==", + "dev": true, + "dependencies": { + "ansi-styles": "^3.2.1", + "escape-string-regexp": "^1.0.5", + "supports-color": "^5.3.0" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/@babel/highlight/node_modules/color-convert": { + "version": "1.9.3", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz", + "integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==", + "dev": true, + "dependencies": { + "color-name": "1.1.3" + } + }, + "node_modules/@babel/highlight/node_modules/color-name": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz", + "integrity": "sha512-72fSenhMw2HZMTVHeCA9KCmpEIbzWiQsjN+BHcBbS9vr1mtt+vJjPdksIBNUmKAW8TFUDPJK5SUU3QhE9NEXDw==", + "dev": true + }, + "node_modules/@babel/highlight/node_modules/escape-string-regexp": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz", + "integrity": "sha512-vbRorB5FUQWvla16U8R/qgaFIya2qGzwDrNmCZuYKrbdSUMG6I1ZCGQRefkRVhuOkIGVne7BQ35DSfo1qvJqFg==", + "dev": true, + "engines": { + "node": ">=0.8.0" + } + }, + "node_modules/@babel/highlight/node_modules/has-flag": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz", + "integrity": "sha512-sKJf1+ceQBr4SMkvQnBDNDtf4TXpVhVGateu0t918bl30FnbE2m4vNLX+VWe/dpjlb+HugGYzW7uQXH98HPEYw==", + "dev": true, + "engines": { + "node": ">=4" + } + }, + "node_modules/@babel/highlight/node_modules/supports-color": { + "version": "5.5.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz", + "integrity": "sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==", + "dev": true, + "dependencies": { + "has-flag": "^3.0.0" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/@babel/parser": { + "version": "7.24.4", + "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.24.4.tgz", + "integrity": "sha512-zTvEBcghmeBma9QIGunWevvBAp4/Qu9Bdq+2k0Ot4fVMD6v3dsC9WOcRSKk7tRRyBM/53yKMJko9xOatGQAwSg==", + "dev": true, + "bin": { + "parser": "bin/babel-parser.js" + }, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@babel/plugin-syntax-async-generators": { + "version": "7.8.4", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-async-generators/-/plugin-syntax-async-generators-7.8.4.tgz", + "integrity": "sha512-tycmZxkGfZaxhMRbXlPXuVFpdWlXpir2W4AMhSJgRKzk/eDlIXOhb2LHWoLpDF7TEHylV5zNhykX6KAgHJmTNw==", + "dev": true, + "dependencies": { + "@babel/helper-plugin-utils": "^7.8.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/plugin-syntax-bigint": { + "version": "7.8.3", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-bigint/-/plugin-syntax-bigint-7.8.3.tgz", + "integrity": "sha512-wnTnFlG+YxQm3vDxpGE57Pj0srRU4sHE/mDkt1qv2YJJSeUAec2ma4WLUnUPeKjyrfntVwe/N6dCXpU+zL3Npg==", + "dev": true, + "dependencies": { + "@babel/helper-plugin-utils": "^7.8.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/plugin-syntax-class-properties": { + "version": "7.12.13", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-class-properties/-/plugin-syntax-class-properties-7.12.13.tgz", + "integrity": "sha512-fm4idjKla0YahUNgFNLCB0qySdsoPiZP3iQE3rky0mBUtMZ23yDJ9SJdg6dXTSDnulOVqiF3Hgr9nbXvXTQZYA==", + "dev": true, + "dependencies": { + "@babel/helper-plugin-utils": "^7.12.13" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/plugin-syntax-import-meta": { + "version": "7.10.4", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-import-meta/-/plugin-syntax-import-meta-7.10.4.tgz", + "integrity": "sha512-Yqfm+XDx0+Prh3VSeEQCPU81yC+JWZ2pDPFSS4ZdpfZhp4MkFMaDC1UqseovEKwSUpnIL7+vK+Clp7bfh0iD7g==", + "dev": true, + "dependencies": { + "@babel/helper-plugin-utils": "^7.10.4" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/plugin-syntax-json-strings": { + "version": "7.8.3", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-json-strings/-/plugin-syntax-json-strings-7.8.3.tgz", + "integrity": "sha512-lY6kdGpWHvjoe2vk4WrAapEuBR69EMxZl+RoGRhrFGNYVK8mOPAW8VfbT/ZgrFbXlDNiiaxQnAtgVCZ6jv30EA==", + "dev": true, + "dependencies": { + "@babel/helper-plugin-utils": "^7.8.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/plugin-syntax-jsx": { + "version": "7.24.1", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-jsx/-/plugin-syntax-jsx-7.24.1.tgz", + "integrity": "sha512-2eCtxZXf+kbkMIsXS4poTvT4Yu5rXiRa+9xGVT56raghjmBTKMpFNc9R4IDiB4emao9eO22Ox7CxuJG7BgExqA==", + "dev": true, + "dependencies": { + "@babel/helper-plugin-utils": "^7.24.0" + }, + "engines": { + "node": ">=6.9.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/plugin-syntax-logical-assignment-operators": { + "version": "7.10.4", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-logical-assignment-operators/-/plugin-syntax-logical-assignment-operators-7.10.4.tgz", + "integrity": "sha512-d8waShlpFDinQ5MtvGU9xDAOzKH47+FFoney2baFIoMr952hKOLp1HR7VszoZvOsV/4+RRszNY7D17ba0te0ig==", + "dev": true, + "dependencies": { + "@babel/helper-plugin-utils": "^7.10.4" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/plugin-syntax-nullish-coalescing-operator": { + "version": "7.8.3", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-nullish-coalescing-operator/-/plugin-syntax-nullish-coalescing-operator-7.8.3.tgz", + "integrity": "sha512-aSff4zPII1u2QD7y+F8oDsz19ew4IGEJg9SVW+bqwpwtfFleiQDMdzA/R+UlWDzfnHFCxxleFT0PMIrR36XLNQ==", + "dev": true, + "dependencies": { + "@babel/helper-plugin-utils": "^7.8.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/plugin-syntax-numeric-separator": { + "version": "7.10.4", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-numeric-separator/-/plugin-syntax-numeric-separator-7.10.4.tgz", + "integrity": "sha512-9H6YdfkcK/uOnY/K7/aA2xpzaAgkQn37yzWUMRK7OaPOqOpGS1+n0H5hxT9AUw9EsSjPW8SVyMJwYRtWs3X3ug==", + "dev": true, + "dependencies": { + "@babel/helper-plugin-utils": "^7.10.4" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/plugin-syntax-object-rest-spread": { + "version": "7.8.3", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-object-rest-spread/-/plugin-syntax-object-rest-spread-7.8.3.tgz", + "integrity": "sha512-XoqMijGZb9y3y2XskN+P1wUGiVwWZ5JmoDRwx5+3GmEplNyVM2s2Dg8ILFQm8rWM48orGy5YpI5Bl8U1y7ydlA==", + "dev": true, + "dependencies": { + "@babel/helper-plugin-utils": "^7.8.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/plugin-syntax-optional-catch-binding": { + "version": "7.8.3", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-optional-catch-binding/-/plugin-syntax-optional-catch-binding-7.8.3.tgz", + "integrity": "sha512-6VPD0Pc1lpTqw0aKoeRTMiB+kWhAoT24PA+ksWSBrFtl5SIRVpZlwN3NNPQjehA2E/91FV3RjLWoVTglWcSV3Q==", + "dev": true, + "dependencies": { + "@babel/helper-plugin-utils": "^7.8.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/plugin-syntax-optional-chaining": { + "version": "7.8.3", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-optional-chaining/-/plugin-syntax-optional-chaining-7.8.3.tgz", + "integrity": "sha512-KoK9ErH1MBlCPxV0VANkXW2/dw4vlbGDrFgz8bmUsBGYkFRcbRwMh6cIJubdPrkxRwuGdtCk0v/wPTKbQgBjkg==", + "dev": true, + "dependencies": { + "@babel/helper-plugin-utils": "^7.8.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/plugin-syntax-top-level-await": { + "version": "7.14.5", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-top-level-await/-/plugin-syntax-top-level-await-7.14.5.tgz", + "integrity": "sha512-hx++upLv5U1rgYfwe1xBQUhRmU41NEvpUvrp8jkrSCdvGSnM5/qdRMtylJ6PG5OFkBaHkbTAKTnd3/YyESRHFw==", + "dev": true, + "dependencies": { + "@babel/helper-plugin-utils": "^7.14.5" + }, + "engines": { + "node": ">=6.9.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/plugin-syntax-typescript": { + "version": "7.24.1", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-typescript/-/plugin-syntax-typescript-7.24.1.tgz", + "integrity": "sha512-Yhnmvy5HZEnHUty6i++gcfH1/l68AHnItFHnaCv6hn9dNh0hQvvQJsxpi4BMBFN5DLeHBuucT/0DgzXif/OyRw==", + "dev": true, + "dependencies": { + "@babel/helper-plugin-utils": "^7.24.0" + }, + "engines": { + "node": ">=6.9.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/template": { + "version": "7.24.0", + "resolved": "https://registry.npmjs.org/@babel/template/-/template-7.24.0.tgz", + "integrity": "sha512-Bkf2q8lMB0AFpX0NFEqSbx1OkTHf0f+0j82mkw+ZpzBnkk7e9Ql0891vlfgi+kHwOk8tQjiQHpqh4LaSa0fKEA==", + "dev": true, + "dependencies": { + "@babel/code-frame": "^7.23.5", + "@babel/parser": "^7.24.0", + "@babel/types": "^7.24.0" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/traverse": { + "version": "7.24.1", + "resolved": "https://registry.npmjs.org/@babel/traverse/-/traverse-7.24.1.tgz", + "integrity": "sha512-xuU6o9m68KeqZbQuDt2TcKSxUw/mrsvavlEqQ1leZ/B+C9tk6E4sRWy97WaXgvq5E+nU3cXMxv3WKOCanVMCmQ==", + "dev": true, + "dependencies": { + "@babel/code-frame": "^7.24.1", + "@babel/generator": "^7.24.1", + "@babel/helper-environment-visitor": "^7.22.20", + "@babel/helper-function-name": "^7.23.0", + "@babel/helper-hoist-variables": "^7.22.5", + "@babel/helper-split-export-declaration": "^7.22.6", + "@babel/parser": "^7.24.1", + "@babel/types": "^7.24.0", + "debug": "^4.3.1", + "globals": "^11.1.0" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/types": { + "version": "7.24.0", + "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.24.0.tgz", + "integrity": "sha512-+j7a5c253RfKh8iABBhywc8NSfP5LURe7Uh4qpsh6jc+aLJguvmIUBdjSdEMQv2bENrCR5MfRdjGo7vzS/ob7w==", + "dev": true, + "dependencies": { + "@babel/helper-string-parser": "^7.23.4", + "@babel/helper-validator-identifier": "^7.22.20", + "to-fast-properties": "^2.0.0" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@bcoe/v8-coverage": { + "version": "0.2.3", + "resolved": "https://registry.npmjs.org/@bcoe/v8-coverage/-/v8-coverage-0.2.3.tgz", + "integrity": "sha512-0hYQ8SB4Db5zvZB4axdMHGwEaQjkZzFjQiN9LVYvIFB2nSUHW9tYpxWriPrWDASIxiaXax83REcLxuSdnGPZtw==", + "dev": true + }, + "node_modules/@istanbuljs/load-nyc-config": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@istanbuljs/load-nyc-config/-/load-nyc-config-1.1.0.tgz", + "integrity": "sha512-VjeHSlIzpv/NyD3N0YuHfXOPDIixcA1q2ZV98wsMqcYlPmv2n3Yb2lYP9XMElnaFVXg5A7YLTeLu6V84uQDjmQ==", + "dev": true, + "dependencies": { + "camelcase": "^5.3.1", + "find-up": "^4.1.0", + "get-package-type": "^0.1.0", + "js-yaml": "^3.13.1", + "resolve-from": "^5.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/@istanbuljs/load-nyc-config/node_modules/argparse": { + "version": "1.0.10", + "resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz", + "integrity": "sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==", + "dev": true, + "dependencies": { + "sprintf-js": "~1.0.2" + } + }, + "node_modules/@istanbuljs/load-nyc-config/node_modules/camelcase": { + "version": "5.3.1", + "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-5.3.1.tgz", + "integrity": "sha512-L28STB170nwWS63UjtlEOE3dldQApaJXZkOI1uMFfzf3rRuPegHaHesyee+YxQ+W6SvRDQV6UrdOdRiR153wJg==", + "dev": true, + "engines": { + "node": ">=6" + } + }, + "node_modules/@istanbuljs/load-nyc-config/node_modules/find-up": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/find-up/-/find-up-4.1.0.tgz", + "integrity": "sha512-PpOwAdQ/YlXQ2vj8a3h8IipDuYRi3wceVQQGYWxNINccq40Anw7BlsEXCMbt1Zt+OLA6Fq9suIpIWD0OsnISlw==", + "dev": true, + "dependencies": { + "locate-path": "^5.0.0", + "path-exists": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/@istanbuljs/load-nyc-config/node_modules/js-yaml": { + "version": "3.14.1", + "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.1.tgz", + "integrity": "sha512-okMH7OXXJ7YrN9Ok3/SXrnu4iX9yOk+25nqX4imS2npuvTYDmo/QEZoqwZkYaIDk3jVvBOTOIEgEhaLOynBS9g==", + "dev": true, + "dependencies": { + "argparse": "^1.0.7", + "esprima": "^4.0.0" + }, + "bin": { + "js-yaml": "bin/js-yaml.js" + } + }, + "node_modules/@istanbuljs/load-nyc-config/node_modules/locate-path": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-5.0.0.tgz", + "integrity": "sha512-t7hw9pI+WvuwNJXwk5zVHpyhIqzg2qTlklJOf0mVxGSbe3Fp2VieZcduNYjaLDoy6p9uGpQEGWG87WpMKlNq8g==", + "dev": true, + "dependencies": { + "p-locate": "^4.1.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/@istanbuljs/load-nyc-config/node_modules/p-limit": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-2.3.0.tgz", + "integrity": "sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w==", + "dev": true, + "dependencies": { + "p-try": "^2.0.0" + }, + "engines": { + "node": ">=6" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/@istanbuljs/load-nyc-config/node_modules/p-locate": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-4.1.0.tgz", + "integrity": "sha512-R79ZZ/0wAxKGu3oYMlz8jy/kbhsNrS7SKZ7PxEHBgJ5+F2mtFW2fK2cOtBh1cHYkQsbzFV7I+EoRKe6Yt0oK7A==", + "dev": true, + "dependencies": { + "p-limit": "^2.2.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/@istanbuljs/schema": { + "version": "0.1.3", + "resolved": "https://registry.npmjs.org/@istanbuljs/schema/-/schema-0.1.3.tgz", + "integrity": "sha512-ZXRY4jNvVgSVQ8DL3LTcakaAtXwTVUxE81hslsyD2AtoXW/wVob10HkOJ1X/pAlcI7D+2YoZKg5do8G/w6RYgA==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/@jest/console": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/@jest/console/-/console-29.7.0.tgz", + "integrity": "sha512-5Ni4CU7XHQi32IJ398EEP4RrB8eV09sXP2ROqD4bksHrnTree52PsxvX8tpL8LvTZ3pFzXyPbNQReSN41CAhOg==", + "dev": true, + "dependencies": { + "@jest/types": "^29.6.3", + "@types/node": "*", + "chalk": "^4.0.0", + "jest-message-util": "^29.7.0", + "jest-util": "^29.7.0", + "slash": "^3.0.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@jest/core": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/@jest/core/-/core-29.7.0.tgz", + "integrity": "sha512-n7aeXWKMnGtDA48y8TLWJPJmLmmZ642Ceo78cYWEpiD7FzDgmNDV/GCVRorPABdXLJZ/9wzzgZAlHjXjxDHGsg==", + "dev": true, + "dependencies": { + "@jest/console": "^29.7.0", + "@jest/reporters": "^29.7.0", + "@jest/test-result": "^29.7.0", + "@jest/transform": "^29.7.0", + "@jest/types": "^29.6.3", + "@types/node": "*", + "ansi-escapes": "^4.2.1", + "chalk": "^4.0.0", + "ci-info": "^3.2.0", + "exit": "^0.1.2", + "graceful-fs": "^4.2.9", + "jest-changed-files": "^29.7.0", + "jest-config": "^29.7.0", + "jest-haste-map": "^29.7.0", + "jest-message-util": "^29.7.0", + "jest-regex-util": "^29.6.3", + "jest-resolve": "^29.7.0", + "jest-resolve-dependencies": "^29.7.0", + "jest-runner": "^29.7.0", + "jest-runtime": "^29.7.0", + "jest-snapshot": "^29.7.0", + "jest-util": "^29.7.0", + "jest-validate": "^29.7.0", + "jest-watcher": "^29.7.0", + "micromatch": "^4.0.4", + "pretty-format": "^29.7.0", + "slash": "^3.0.0", + "strip-ansi": "^6.0.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + }, + "peerDependencies": { + "node-notifier": "^8.0.1 || ^9.0.0 || ^10.0.0" + }, + "peerDependenciesMeta": { + "node-notifier": { + "optional": true + } + } + }, + "node_modules/@jest/environment": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/@jest/environment/-/environment-29.7.0.tgz", + "integrity": "sha512-aQIfHDq33ExsN4jP1NWGXhxgQ/wixs60gDiKO+XVMd8Mn0NWPWgc34ZQDTb2jKaUWQ7MuwoitXAsN2XVXNMpAw==", + "dev": true, + "dependencies": { + "@jest/fake-timers": "^29.7.0", + "@jest/types": "^29.6.3", + "@types/node": "*", + "jest-mock": "^29.7.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@jest/expect": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/@jest/expect/-/expect-29.7.0.tgz", + "integrity": "sha512-8uMeAMycttpva3P1lBHB8VciS9V0XAr3GymPpipdyQXbBcuhkLQOSe8E/p92RyAdToS6ZD1tFkX+CkhoECE0dQ==", + "dev": true, + "dependencies": { + "expect": "^29.7.0", + "jest-snapshot": "^29.7.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@jest/expect-utils": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/@jest/expect-utils/-/expect-utils-29.7.0.tgz", + "integrity": "sha512-GlsNBWiFQFCVi9QVSx7f5AgMeLxe9YCCs5PuP2O2LdjDAA8Jh9eX7lA1Jq/xdXw3Wb3hyvlFNfZIfcRetSzYcA==", + "dev": true, + "dependencies": { + "jest-get-type": "^29.6.3" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@jest/fake-timers": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/@jest/fake-timers/-/fake-timers-29.7.0.tgz", + "integrity": "sha512-q4DH1Ha4TTFPdxLsqDXK1d3+ioSL7yL5oCMJZgDYm6i+6CygW5E5xVr/D1HdsGxjt1ZWSfUAs9OxSB/BNelWrQ==", + "dev": true, + "dependencies": { + "@jest/types": "^29.6.3", + "@sinonjs/fake-timers": "^10.0.2", + "@types/node": "*", + "jest-message-util": "^29.7.0", + "jest-mock": "^29.7.0", + "jest-util": "^29.7.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@jest/globals": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/@jest/globals/-/globals-29.7.0.tgz", + "integrity": "sha512-mpiz3dutLbkW2MNFubUGUEVLkTGiqW6yLVTA+JbP6fI6J5iL9Y0Nlg8k95pcF8ctKwCS7WVxteBs29hhfAotzQ==", + "dev": true, + "dependencies": { + "@jest/environment": "^29.7.0", + "@jest/expect": "^29.7.0", + "@jest/types": "^29.6.3", + "jest-mock": "^29.7.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@jest/reporters": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/@jest/reporters/-/reporters-29.7.0.tgz", + "integrity": "sha512-DApq0KJbJOEzAFYjHADNNxAE3KbhxQB1y5Kplb5Waqw6zVbuWatSnMjE5gs8FUgEPmNsnZA3NCWl9NG0ia04Pg==", + "dev": true, + "dependencies": { + "@bcoe/v8-coverage": "^0.2.3", + "@jest/console": "^29.7.0", + "@jest/test-result": "^29.7.0", + "@jest/transform": "^29.7.0", + "@jest/types": "^29.6.3", + "@jridgewell/trace-mapping": "^0.3.18", + "@types/node": "*", + "chalk": "^4.0.0", + "collect-v8-coverage": "^1.0.0", + "exit": "^0.1.2", + "glob": "^7.1.3", + "graceful-fs": "^4.2.9", + "istanbul-lib-coverage": "^3.0.0", + "istanbul-lib-instrument": "^6.0.0", + "istanbul-lib-report": "^3.0.0", + "istanbul-lib-source-maps": "^4.0.0", + "istanbul-reports": "^3.1.3", + "jest-message-util": "^29.7.0", + "jest-util": "^29.7.0", + "jest-worker": "^29.7.0", + "slash": "^3.0.0", + "string-length": "^4.0.1", + "strip-ansi": "^6.0.0", + "v8-to-istanbul": "^9.0.1" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + }, + "peerDependencies": { + "node-notifier": "^8.0.1 || ^9.0.0 || ^10.0.0" + }, + "peerDependenciesMeta": { + "node-notifier": { + "optional": true + } + } + }, + "node_modules/@jest/reporters/node_modules/brace-expansion": { + "version": "1.1.11", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", + "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==", + "dev": true, + "dependencies": { + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" + } + }, + "node_modules/@jest/reporters/node_modules/glob": { + "version": "7.2.3", + "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz", + "integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==", + "dev": true, + "dependencies": { + "fs.realpath": "^1.0.0", + "inflight": "^1.0.4", + "inherits": "2", + "minimatch": "^3.1.1", + "once": "^1.3.0", + "path-is-absolute": "^1.0.0" + }, + "engines": { + "node": "*" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/@jest/reporters/node_modules/minimatch": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", + "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", + "dev": true, + "dependencies": { + "brace-expansion": "^1.1.7" + }, + "engines": { + "node": "*" + } + }, + "node_modules/@jest/schemas": { + "version": "29.6.3", + "resolved": "https://registry.npmjs.org/@jest/schemas/-/schemas-29.6.3.tgz", + "integrity": "sha512-mo5j5X+jIZmJQveBKeS/clAueipV7KgiX1vMgCxam1RNYiqE1w62n0/tJJnHtjW8ZHcQco5gY85jA3mi0L+nSA==", + "dev": true, + "dependencies": { + "@sinclair/typebox": "^0.27.8" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@jest/source-map": { + "version": "29.6.3", + "resolved": "https://registry.npmjs.org/@jest/source-map/-/source-map-29.6.3.tgz", + "integrity": "sha512-MHjT95QuipcPrpLM+8JMSzFx6eHp5Bm+4XeFDJlwsvVBjmKNiIAvasGK2fxz2WbGRlnvqehFbh07MMa7n3YJnw==", + "dev": true, + "dependencies": { + "@jridgewell/trace-mapping": "^0.3.18", + "callsites": "^3.0.0", + "graceful-fs": "^4.2.9" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@jest/test-result": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/@jest/test-result/-/test-result-29.7.0.tgz", + "integrity": "sha512-Fdx+tv6x1zlkJPcWXmMDAG2HBnaR9XPSd5aDWQVsfrZmLVT3lU1cwyxLgRmXR9yrq4NBoEm9BMsfgFzTQAbJYA==", + "dev": true, + "dependencies": { + "@jest/console": "^29.7.0", + "@jest/types": "^29.6.3", + "@types/istanbul-lib-coverage": "^2.0.0", + "collect-v8-coverage": "^1.0.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@jest/test-sequencer": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/@jest/test-sequencer/-/test-sequencer-29.7.0.tgz", + "integrity": "sha512-GQwJ5WZVrKnOJuiYiAF52UNUJXgTZx1NHjFSEB0qEMmSZKAkdMoIzw/Cj6x6NF4AvV23AUqDpFzQkN/eYCYTxw==", + "dev": true, + "dependencies": { + "@jest/test-result": "^29.7.0", + "graceful-fs": "^4.2.9", + "jest-haste-map": "^29.7.0", + "slash": "^3.0.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@jest/transform": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/@jest/transform/-/transform-29.7.0.tgz", + "integrity": "sha512-ok/BTPFzFKVMwO5eOHRrvnBVHdRy9IrsrW1GpMaQ9MCnilNLXQKmAX8s1YXDFaai9xJpac2ySzV0YeRRECr2Vw==", + "dev": true, + "dependencies": { + "@babel/core": "^7.11.6", + "@jest/types": "^29.6.3", + "@jridgewell/trace-mapping": "^0.3.18", + "babel-plugin-istanbul": "^6.1.1", + "chalk": "^4.0.0", + "convert-source-map": "^2.0.0", + "fast-json-stable-stringify": "^2.1.0", + "graceful-fs": "^4.2.9", + "jest-haste-map": "^29.7.0", + "jest-regex-util": "^29.6.3", + "jest-util": "^29.7.0", + "micromatch": "^4.0.4", + "pirates": "^4.0.4", + "slash": "^3.0.0", + "write-file-atomic": "^4.0.2" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@jest/types": { + "version": "29.6.3", + "resolved": "https://registry.npmjs.org/@jest/types/-/types-29.6.3.tgz", + "integrity": "sha512-u3UPsIilWKOM3F9CXtrG8LEJmNxwoCQC/XVj4IKYXvvpx7QIi/Kg1LI5uDmDpKlac62NUtX7eLjRh+jVZcLOzw==", + "dev": true, + "dependencies": { + "@jest/schemas": "^29.6.3", + "@types/istanbul-lib-coverage": "^2.0.0", + "@types/istanbul-reports": "^3.0.0", + "@types/node": "*", + "@types/yargs": "^17.0.8", + "chalk": "^4.0.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/@jridgewell/gen-mapping": { + "version": "0.3.5", + "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.5.tgz", + "integrity": "sha512-IzL8ZoEDIBRWEzlCcRhOaCupYyN5gdIK+Q6fbFdPDg6HqX6jpkItn7DFIpW9LQzXG6Df9sA7+OKnq0qlz/GaQg==", + "dev": true, + "dependencies": { + "@jridgewell/set-array": "^1.2.1", + "@jridgewell/sourcemap-codec": "^1.4.10", + "@jridgewell/trace-mapping": "^0.3.24" + }, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@jridgewell/resolve-uri": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz", + "integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==", + "dev": true, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@jridgewell/set-array": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/@jridgewell/set-array/-/set-array-1.2.1.tgz", + "integrity": "sha512-R8gLRTZeyp03ymzP/6Lil/28tGeGEzhx1q2k703KGWRAI1VdvPIXdG70VJc2pAMw3NA6JKL5hhFu1sJX0Mnn/A==", + "dev": true, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@jridgewell/sourcemap-codec": { + "version": "1.4.15", + "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.4.15.tgz", + "integrity": "sha512-eF2rxCRulEKXHTRiDrDy6erMYWqNw4LPdQ8UQA4huuxaQsVeRPFl2oM8oDGxMFhJUWZf9McpLtJasDDZb/Bpeg==", + "dev": true + }, + "node_modules/@jridgewell/trace-mapping": { + "version": "0.3.25", + "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.25.tgz", + "integrity": "sha512-vNk6aEwybGtawWmy/PzwnGDOjCkLWSD2wqvjGGAgOAwCGWySYXfYoxt00IJkTF+8Lb57DwOb3Aa0o9CApepiYQ==", + "dev": true, + "dependencies": { + "@jridgewell/resolve-uri": "^3.1.0", + "@jridgewell/sourcemap-codec": "^1.4.14" + } + }, + "node_modules/@sinclair/typebox": { + "version": "0.27.8", + "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.27.8.tgz", + "integrity": "sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==", + "dev": true + }, + "node_modules/@sinonjs/commons": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/@sinonjs/commons/-/commons-3.0.1.tgz", + "integrity": "sha512-K3mCHKQ9sVh8o1C9cxkwxaOmXoAMlDxC1mYyHrjqOWEcBjYr76t96zL2zlj5dUGZ3HSw240X1qgH3Mjf1yJWpQ==", + "dev": true, + "dependencies": { + "type-detect": "4.0.8" + } + }, + "node_modules/@sinonjs/fake-timers": { + "version": "10.3.0", + "resolved": "https://registry.npmjs.org/@sinonjs/fake-timers/-/fake-timers-10.3.0.tgz", + "integrity": "sha512-V4BG07kuYSUkTCSBHG8G8TNhM+F19jXFWnQtzj+we8DrkpSBCee9Z3Ms8yiGer/dlmhe35/Xdgyo3/0rQKg7YA==", + "dev": true, + "dependencies": { + "@sinonjs/commons": "^3.0.0" + } + }, + "node_modules/@types/babel__core": { + "version": "7.20.5", + "resolved": "https://registry.npmjs.org/@types/babel__core/-/babel__core-7.20.5.tgz", + "integrity": "sha512-qoQprZvz5wQFJwMDqeseRXWv3rqMvhgpbXFfVyWhbx9X47POIA6i/+dXefEmZKoAgOaTdaIgNSMqMIU61yRyzA==", + "dev": true, + "dependencies": { + "@babel/parser": "^7.20.7", + "@babel/types": "^7.20.7", + "@types/babel__generator": "*", + "@types/babel__template": "*", + "@types/babel__traverse": "*" + } + }, + "node_modules/@types/babel__generator": { + "version": "7.6.8", + "resolved": "https://registry.npmjs.org/@types/babel__generator/-/babel__generator-7.6.8.tgz", + "integrity": "sha512-ASsj+tpEDsEiFr1arWrlN6V3mdfjRMZt6LtK/Vp/kreFLnr5QH5+DhvD5nINYZXzwJvXeGq+05iUXcAzVrqWtw==", + "dev": true, + "dependencies": { + "@babel/types": "^7.0.0" + } + }, + "node_modules/@types/babel__template": { + "version": "7.4.4", + "resolved": "https://registry.npmjs.org/@types/babel__template/-/babel__template-7.4.4.tgz", + "integrity": "sha512-h/NUaSyG5EyxBIp8YRxo4RMe2/qQgvyowRwVMzhYhBCONbW8PUsg4lkFMrhgZhUe5z3L3MiLDuvyJ/CaPa2A8A==", + "dev": true, + "dependencies": { + "@babel/parser": "^7.1.0", + "@babel/types": "^7.0.0" + } + }, + "node_modules/@types/babel__traverse": { + "version": "7.20.5", + "resolved": "https://registry.npmjs.org/@types/babel__traverse/-/babel__traverse-7.20.5.tgz", + "integrity": "sha512-WXCyOcRtH37HAUkpXhUduaxdm82b4GSlyTqajXviN4EfiuPgNYR109xMCKvpl6zPIpua0DGlMEDCq+g8EdoheQ==", + "dev": true, + "dependencies": { + "@babel/types": "^7.20.7" + } + }, + "node_modules/@types/graceful-fs": { + "version": "4.1.9", + "resolved": "https://registry.npmjs.org/@types/graceful-fs/-/graceful-fs-4.1.9.tgz", + "integrity": "sha512-olP3sd1qOEe5dXTSaFvQG+02VdRXcdytWLAZsAq1PecU8uqQAhkrnbli7DagjtXKW/Bl7YJbUsa8MPcuc8LHEQ==", + "dev": true, + "dependencies": { + "@types/node": "*" + } + }, + "node_modules/@types/istanbul-lib-coverage": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/@types/istanbul-lib-coverage/-/istanbul-lib-coverage-2.0.6.tgz", + "integrity": "sha512-2QF/t/auWm0lsy8XtKVPG19v3sSOQlJe/YHZgfjb/KBBHOGSV+J2q/S671rcq9uTBrLAXmZpqJiaQbMT+zNU1w==", + "dev": true + }, + "node_modules/@types/istanbul-lib-report": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/@types/istanbul-lib-report/-/istanbul-lib-report-3.0.3.tgz", + "integrity": "sha512-NQn7AHQnk/RSLOxrBbGyJM/aVQ+pjj5HCgasFxc0K/KhoATfQ/47AyUl15I2yBUpihjmas+a+VJBOqecrFH+uA==", + "dev": true, + "dependencies": { + "@types/istanbul-lib-coverage": "*" + } + }, + "node_modules/@types/istanbul-reports": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/@types/istanbul-reports/-/istanbul-reports-3.0.4.tgz", + "integrity": "sha512-pk2B1NWalF9toCRu6gjBzR69syFjP4Od8WRAX+0mmf9lAjCRicLOWc+ZrxZHx/0XRjotgkF9t6iaMJ+aXcOdZQ==", + "dev": true, + "dependencies": { + "@types/istanbul-lib-report": "*" + } + }, + "node_modules/@types/node": { + "version": "20.12.7", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.12.7.tgz", + "integrity": "sha512-wq0cICSkRLVaf3UGLMGItu/PtdY7oaXaI/RVU+xliKVOtRna3PRY57ZDfztpDL0n11vfymMUnXv8QwYCO7L1wg==", + "dev": true, + "dependencies": { + "undici-types": "~5.26.4" + } + }, + "node_modules/@types/stack-utils": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/@types/stack-utils/-/stack-utils-2.0.3.tgz", + "integrity": "sha512-9aEbYZ3TbYMznPdcdr3SmIrLXwC/AKZXQeCf9Pgao5CKb8CyHuEX5jzWPTkvregvhRJHcpRO6BFoGW9ycaOkYw==", + "dev": true + }, + "node_modules/@types/yargs": { + "version": "17.0.32", + "resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.32.tgz", + "integrity": "sha512-xQ67Yc/laOG5uMfX/093MRlGGCIBzZMarVa+gfNKJxWAIgykYpVGkBdbqEzGDDfCrVUj6Hiff4mTZ5BA6TmAog==", + "dev": true, + "dependencies": { + "@types/yargs-parser": "*" + } + }, + "node_modules/@types/yargs-parser": { + "version": "21.0.3", + "resolved": "https://registry.npmjs.org/@types/yargs-parser/-/yargs-parser-21.0.3.tgz", + "integrity": "sha512-I4q9QU9MQv4oEOz4tAHJtNz1cwuLxn2F3xcc2iV5WdqLPpUnj30aUuxt1mAxYTG+oe8CZMV/+6rU4S4gRDzqtQ==", + "dev": true + }, + "node_modules/ansi-escapes": { + "version": "4.3.2", + "resolved": "https://registry.npmjs.org/ansi-escapes/-/ansi-escapes-4.3.2.tgz", + "integrity": "sha512-gKXj5ALrKWQLsYG9jlTRmR/xKluxHV+Z9QEwNIgCfM1/uwPMCuzVVnh5mwTd+OuBZcwSIMbqssNWRm1lE51QaQ==", + "dev": true, + "dependencies": { + "type-fest": "^0.21.3" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/ansi-regex": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/ansi-styles": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", + "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "dev": true, + "dependencies": { + "color-convert": "^2.0.1" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/anymatch": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-3.1.3.tgz", + "integrity": "sha512-KMReFUr0B4t+D+OBkjR3KYqvocp2XaSzO55UcB6mgQMd3KbcE+mWTyvVV7D/zsdEbNnV6acZUutkiHQXvTr1Rw==", + "dev": true, + "dependencies": { + "normalize-path": "^3.0.0", + "picomatch": "^2.0.4" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/babel-jest": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/babel-jest/-/babel-jest-29.7.0.tgz", + "integrity": "sha512-BrvGY3xZSwEcCzKvKsCi2GgHqDqsYkOP4/by5xCgIwGXQxIEh+8ew3gmrE1y7XRR6LHZIj6yLYnUi/mm2KXKBg==", + "dev": true, + "dependencies": { + "@jest/transform": "^29.7.0", + "@types/babel__core": "^7.1.14", + "babel-plugin-istanbul": "^6.1.1", + "babel-preset-jest": "^29.6.3", + "chalk": "^4.0.0", + "graceful-fs": "^4.2.9", + "slash": "^3.0.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + }, + "peerDependencies": { + "@babel/core": "^7.8.0" + } + }, + "node_modules/babel-plugin-istanbul": { + "version": "6.1.1", + "resolved": "https://registry.npmjs.org/babel-plugin-istanbul/-/babel-plugin-istanbul-6.1.1.tgz", + "integrity": "sha512-Y1IQok9821cC9onCx5otgFfRm7Lm+I+wwxOx738M/WLPZ9Q42m4IG5W0FNX8WLL2gYMZo3JkuXIH2DOpWM+qwA==", + "dev": true, + "dependencies": { + "@babel/helper-plugin-utils": "^7.0.0", + "@istanbuljs/load-nyc-config": "^1.0.0", + "@istanbuljs/schema": "^0.1.2", + "istanbul-lib-instrument": "^5.0.4", + "test-exclude": "^6.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/babel-plugin-istanbul/node_modules/istanbul-lib-instrument": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/istanbul-lib-instrument/-/istanbul-lib-instrument-5.2.1.tgz", + "integrity": "sha512-pzqtp31nLv/XFOzXGuvhCb8qhjmTVo5vjVk19XE4CRlSWz0KoeJ3bw9XsA7nOp9YBf4qHjwBxkDzKcME/J29Yg==", + "dev": true, + "dependencies": { + "@babel/core": "^7.12.3", + "@babel/parser": "^7.14.7", + "@istanbuljs/schema": "^0.1.2", + "istanbul-lib-coverage": "^3.2.0", + "semver": "^6.3.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/babel-plugin-jest-hoist": { + "version": "29.6.3", + "resolved": "https://registry.npmjs.org/babel-plugin-jest-hoist/-/babel-plugin-jest-hoist-29.6.3.tgz", + "integrity": "sha512-ESAc/RJvGTFEzRwOTT4+lNDk/GNHMkKbNzsvT0qKRfDyyYTskxB5rnU2njIDYVxXCBHHEI1c0YwHob3WaYujOg==", + "dev": true, + "dependencies": { + "@babel/template": "^7.3.3", + "@babel/types": "^7.3.3", + "@types/babel__core": "^7.1.14", + "@types/babel__traverse": "^7.0.6" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/babel-preset-current-node-syntax": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/babel-preset-current-node-syntax/-/babel-preset-current-node-syntax-1.0.1.tgz", + "integrity": "sha512-M7LQ0bxarkxQoN+vz5aJPsLBn77n8QgTFmo8WK0/44auK2xlCXrYcUxHFxgU7qW5Yzw/CjmLRK2uJzaCd7LvqQ==", + "dev": true, + "dependencies": { + "@babel/plugin-syntax-async-generators": "^7.8.4", + "@babel/plugin-syntax-bigint": "^7.8.3", + "@babel/plugin-syntax-class-properties": "^7.8.3", + "@babel/plugin-syntax-import-meta": "^7.8.3", + "@babel/plugin-syntax-json-strings": "^7.8.3", + "@babel/plugin-syntax-logical-assignment-operators": "^7.8.3", + "@babel/plugin-syntax-nullish-coalescing-operator": "^7.8.3", + "@babel/plugin-syntax-numeric-separator": "^7.8.3", + "@babel/plugin-syntax-object-rest-spread": "^7.8.3", + "@babel/plugin-syntax-optional-catch-binding": "^7.8.3", + "@babel/plugin-syntax-optional-chaining": "^7.8.3", + "@babel/plugin-syntax-top-level-await": "^7.8.3" + }, + "peerDependencies": { + "@babel/core": "^7.0.0" + } + }, + "node_modules/babel-preset-jest": { + "version": "29.6.3", + "resolved": "https://registry.npmjs.org/babel-preset-jest/-/babel-preset-jest-29.6.3.tgz", + "integrity": "sha512-0B3bhxR6snWXJZtR/RliHTDPRgn1sNHOR0yVtq/IiQFyuOVjFS+wuio/R4gSNkyYmKmJB4wGZv2NZanmKmTnNA==", + "dev": true, + "dependencies": { + "babel-plugin-jest-hoist": "^29.6.3", + "babel-preset-current-node-syntax": "^1.0.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0" + } + }, + "node_modules/balanced-match": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", + "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", + "dev": true + }, + "node_modules/braces": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz", + "integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==", + "dev": true, + "dependencies": { + "fill-range": "^7.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/browserslist": { + "version": "4.23.0", + "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.23.0.tgz", + "integrity": "sha512-QW8HiM1shhT2GuzkvklfjcKDiWFXHOeFCIA/huJPwHsslwcydgk7X+z2zXpEijP98UCY7HbubZt5J2Zgvf0CaQ==", + "dev": true, + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/browserslist" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/browserslist" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "dependencies": { + "caniuse-lite": "^1.0.30001587", + "electron-to-chromium": "^1.4.668", + "node-releases": "^2.0.14", + "update-browserslist-db": "^1.0.13" + }, + "bin": { + "browserslist": "cli.js" + }, + "engines": { + "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7" + } + }, + "node_modules/bser": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/bser/-/bser-2.1.1.tgz", + "integrity": "sha512-gQxTNE/GAfIIrmHLUE3oJyp5FO6HRBfhjnw4/wMmA63ZGDJnWBmgY/lyQBpnDUkGmAhbSe39tx2d/iTOAfglwQ==", + "dev": true, + "dependencies": { + "node-int64": "^0.4.0" + } + }, + "node_modules/buffer-from": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.2.tgz", + "integrity": "sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==", + "dev": true + }, + "node_modules/callsites": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz", + "integrity": "sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==", + "dev": true, + "engines": { + "node": ">=6" + } + }, + "node_modules/camelcase": { + "version": "6.3.0", + "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-6.3.0.tgz", + "integrity": "sha512-Gmy6FhYlCY7uOElZUSbxo2UCDH8owEk996gkbrpsgGtrJLM3J7jGxl9Ic7Qwwj4ivOE5AWZWRMecDdF7hqGjFA==", + "dev": true, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/caniuse-lite": { + "version": "1.0.30001613", + "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001613.tgz", + "integrity": "sha512-BNjJULJfOONQERivfxte7alLfeLW4QnwHvNW4wEcLEbXfV6VSCYvr+REbf2Sojv8tC1THpjPXBxWgDbq4NtLWg==", + "dev": true, + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/browserslist" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/caniuse-lite" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ] + }, + "node_modules/chalk": { + "version": "4.1.2", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", + "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==", + "dev": true, + "dependencies": { + "ansi-styles": "^4.1.0", + "supports-color": "^7.1.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/chalk?sponsor=1" + } + }, + "node_modules/chalk/node_modules/supports-color": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", + "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", + "dev": true, + "dependencies": { + "has-flag": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/char-regex": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/char-regex/-/char-regex-1.0.2.tgz", + "integrity": "sha512-kWWXztvZ5SBQV+eRgKFeh8q5sLuZY2+8WUIzlxWVTg+oGwY14qylx1KbKzHd8P6ZYkAg0xyIDU9JMHhyJMZ1jw==", + "dev": true, + "engines": { + "node": ">=10" + } + }, + "node_modules/ci-info": { + "version": "3.9.0", + "resolved": "https://registry.npmjs.org/ci-info/-/ci-info-3.9.0.tgz", + "integrity": "sha512-NIxF55hv4nSqQswkAeiOi1r83xy8JldOFDTWiug55KBu9Jnblncd2U6ViHmYgHf01TPZS77NJBhBMKdWj9HQMQ==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/sibiraj-s" + } + ], + "engines": { + "node": ">=8" + } + }, + "node_modules/cjs-module-lexer": { + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/cjs-module-lexer/-/cjs-module-lexer-1.3.1.tgz", + "integrity": "sha512-a3KdPAANPbNE4ZUv9h6LckSl9zLsYOP4MBmhIPkRaeyybt+r4UghLvq+xw/YwUcC1gqylCkL4rdVs3Lwupjm4Q==", + "dev": true + }, + "node_modules/co": { + "version": "4.6.0", + "resolved": "https://registry.npmjs.org/co/-/co-4.6.0.tgz", + "integrity": "sha512-QVb0dM5HvG+uaxitm8wONl7jltx8dqhfU33DcqtOZcLSVIKSDDLDi7+0LbAKiyI8hD9u42m2YxXSkMGWThaecQ==", + "dev": true, + "engines": { + "iojs": ">= 1.0.0", + "node": ">= 0.12.0" + } + }, + "node_modules/collect-v8-coverage": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/collect-v8-coverage/-/collect-v8-coverage-1.0.2.tgz", + "integrity": "sha512-lHl4d5/ONEbLlJvaJNtsF/Lz+WvB07u2ycqTYbdrq7UypDXailES4valYb2eWiJFxZlVmpGekfqoxQhzyFdT4Q==", + "dev": true + }, + "node_modules/color-convert": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", + "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "dev": true, + "dependencies": { + "color-name": "~1.1.4" + }, + "engines": { + "node": ">=7.0.0" + } + }, + "node_modules/color-name": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", + "dev": true + }, + "node_modules/concat-map": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", + "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==", + "dev": true + }, + "node_modules/convert-source-map": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/convert-source-map/-/convert-source-map-2.0.0.tgz", + "integrity": "sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==", + "dev": true + }, + "node_modules/core-util-is": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.3.tgz", + "integrity": "sha512-ZQBvi1DcpJ4GDqanjucZ2Hj3wEO5pZDS89BWbkcrvdxksJorwUDDZamX9ldFkp9aw2lmBDLgkObEA4DWNJ9FYQ==" + }, + "node_modules/create-jest": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/create-jest/-/create-jest-29.7.0.tgz", + "integrity": "sha512-Adz2bdH0Vq3F53KEMJOoftQFutWCukm6J24wbPWRO4k1kMY7gS7ds/uoJkNuV8wDCtWWnuwGcJwpWcih+zEW1Q==", + "dev": true, + "dependencies": { + "@jest/types": "^29.6.3", + "chalk": "^4.0.0", + "exit": "^0.1.2", + "graceful-fs": "^4.2.9", + "jest-config": "^29.7.0", + "jest-util": "^29.7.0", + "prompts": "^2.0.1" + }, + "bin": { + "create-jest": "bin/create-jest.js" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/cross-spawn": { + "version": "7.0.3", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz", + "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==", + "dev": true, + "dependencies": { + "path-key": "^3.1.0", + "shebang-command": "^2.0.0", + "which": "^2.0.1" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/debug": { + "version": "4.3.4", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz", + "integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==", + "dev": true, + "dependencies": { + "ms": "2.1.2" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/debug/node_modules/ms": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", + "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==", + "dev": true + }, + "node_modules/dedent": { + "version": "1.5.3", + "resolved": "https://registry.npmjs.org/dedent/-/dedent-1.5.3.tgz", + "integrity": "sha512-NHQtfOOW68WD8lgypbLA5oT+Bt0xXJhiYvoR6SmmNXZfpzOGXwdKWmcwG8N7PwVVWV3eF/68nmD9BaJSsTBhyQ==", + "dev": true, + "peerDependencies": { + "babel-plugin-macros": "^3.1.0" + }, + "peerDependenciesMeta": { + "babel-plugin-macros": { + "optional": true + } + } + }, + "node_modules/deepmerge": { + "version": "4.3.1", + "resolved": "https://registry.npmjs.org/deepmerge/-/deepmerge-4.3.1.tgz", + "integrity": "sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==", + "dev": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/detect-newline": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/detect-newline/-/detect-newline-3.1.0.tgz", + "integrity": "sha512-TLz+x/vEXm/Y7P7wn1EJFNLxYpUD4TgMosxY6fAVJUnJMbupHBOncxyWUG9OpTaH9EBD7uFI5LfEgmMOc54DsA==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/diff-sequences": { + "version": "29.6.3", + "resolved": "https://registry.npmjs.org/diff-sequences/-/diff-sequences-29.6.3.tgz", + "integrity": "sha512-EjePK1srD3P08o2j4f0ExnylqRs5B9tJjcp9t1krH2qRi8CCdsYfwe9JgSLurFBWwq4uOlipzfk5fHNvwFKr8Q==", + "dev": true, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/electron-to-chromium": { + "version": "1.4.750", + "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.4.750.tgz", + "integrity": "sha512-9ItEpeu15hW5m8jKdriL+BQrgwDTXEL9pn4SkillWFu73ZNNNQ2BKKLS+ZHv2vC9UkNhosAeyfxOf/5OSeTCPA==", + "dev": true + }, + "node_modules/emittery": { + "version": "0.13.1", + "resolved": "https://registry.npmjs.org/emittery/-/emittery-0.13.1.tgz", + "integrity": "sha512-DeWwawk6r5yR9jFgnDKYt4sLS0LmHJJi3ZOnb5/JdbYwj3nW+FxQnHIjhBKz8YLC7oRNPVM9NQ47I3CVx34eqQ==", + "dev": true, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sindresorhus/emittery?sponsor=1" + } + }, + "node_modules/emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "dev": true + }, + "node_modules/error-ex": { + "version": "1.3.2", + "resolved": "https://registry.npmjs.org/error-ex/-/error-ex-1.3.2.tgz", + "integrity": "sha512-7dFHNmqeFSEt2ZBsCriorKnn3Z2pj+fd9kmI6QoWw4//DL+icEBfc0U7qJCisqrTsKTjw4fNFy2pW9OqStD84g==", + "dev": true, + "dependencies": { + "is-arrayish": "^0.2.1" + } + }, + "node_modules/escalade": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.2.tgz", + "integrity": "sha512-ErCHMCae19vR8vQGe50xIsVomy19rg6gFu3+r3jkEO46suLMWBksvVyoGgQV+jOfl84ZSOSlmv6Gxa89PmTGmA==", + "dev": true, + "engines": { + "node": ">=6" + } + }, + "node_modules/esprima": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz", + "integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==", + "dev": true, + "bin": { + "esparse": "bin/esparse.js", + "esvalidate": "bin/esvalidate.js" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/execa": { + "version": "5.1.1", + "resolved": "https://registry.npmjs.org/execa/-/execa-5.1.1.tgz", + "integrity": "sha512-8uSpZZocAZRBAPIEINJj3Lo9HyGitllczc27Eh5YYojjMFMn8yHMDMaUHE2Jqfq05D/wucwI4JGURyXt1vchyg==", + "dev": true, + "dependencies": { + "cross-spawn": "^7.0.3", + "get-stream": "^6.0.0", + "human-signals": "^2.1.0", + "is-stream": "^2.0.0", + "merge-stream": "^2.0.0", + "npm-run-path": "^4.0.1", + "onetime": "^5.1.2", + "signal-exit": "^3.0.3", + "strip-final-newline": "^2.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sindresorhus/execa?sponsor=1" + } + }, + "node_modules/exit": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/exit/-/exit-0.1.2.tgz", + "integrity": "sha512-Zk/eNKV2zbjpKzrsQ+n1G6poVbErQxJ0LBOJXaKZ1EViLzH+hrLu9cdXI4zw9dBQJslwBEpbQ2P1oS7nDxs6jQ==", + "dev": true, + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/expect": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/expect/-/expect-29.7.0.tgz", + "integrity": "sha512-2Zks0hf1VLFYI1kbh0I5jP3KHHyCHpkfyHBzsSXRFgl/Bg9mWYfMW8oD+PdMPlEwy5HNsR9JutYy6pMeOh61nw==", + "dev": true, + "dependencies": { + "@jest/expect-utils": "^29.7.0", + "jest-get-type": "^29.6.3", + "jest-matcher-utils": "^29.7.0", + "jest-message-util": "^29.7.0", + "jest-util": "^29.7.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/fast-json-stable-stringify": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz", + "integrity": "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==", + "dev": true + }, + "node_modules/fb-watchman": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/fb-watchman/-/fb-watchman-2.0.2.tgz", + "integrity": "sha512-p5161BqbuCaSnB8jIbzQHOlpgsPmK5rJVDfDKO91Axs5NC1uu3HRQm6wt9cd9/+GtQQIO53JdGXXoyDpTAsgYA==", + "dev": true, + "dependencies": { + "bser": "2.1.1" + } + }, + "node_modules/fill-range": { + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz", + "integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==", + "dev": true, + "dependencies": { + "to-regex-range": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/fs.realpath": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", + "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==", + "dev": true + }, + "node_modules/fsevents": { + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", + "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", + "dev": true, + "hasInstallScript": true, + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, + "node_modules/function-bind": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", + "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", + "dev": true, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/gensync": { + "version": "1.0.0-beta.2", + "resolved": "https://registry.npmjs.org/gensync/-/gensync-1.0.0-beta.2.tgz", + "integrity": "sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg==", + "dev": true, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/get-caller-file": { + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", + "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==", + "dev": true, + "engines": { + "node": "6.* || 8.* || >= 10.*" + } + }, + "node_modules/get-package-type": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/get-package-type/-/get-package-type-0.1.0.tgz", + "integrity": "sha512-pjzuKtY64GYfWizNAJ0fr9VqttZkNiK2iS430LtIHzjBEr6bX8Am2zm4sW4Ro5wjWW5cAlRL1qAMTcXbjNAO2Q==", + "dev": true, + "engines": { + "node": ">=8.0.0" + } + }, + "node_modules/get-stream": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-6.0.1.tgz", + "integrity": "sha512-ts6Wi+2j3jQjqi70w5AlN8DFnkSwC+MqmxEzdEALB2qXZYV3X/b1CTfgPLGJNMeAWxdPfU8FO1ms3NUfaHCPYg==", + "dev": true, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/globals": { + "version": "11.12.0", + "resolved": "https://registry.npmjs.org/globals/-/globals-11.12.0.tgz", + "integrity": "sha512-WOBp/EEGUiIsJSp7wcv/y6MO+lV9UoncWqxuFfm8eBwzWNgyfBd6Gz+IeKQ9jCmyhoH99g15M3T+QaVHFjizVA==", + "dev": true, + "engines": { + "node": ">=4" + } + }, + "node_modules/graceful-fs": { + "version": "4.2.11", + "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", + "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==", + "dev": true + }, + "node_modules/has-flag": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", + "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/hasown": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", + "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", + "dev": true, + "dependencies": { + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/html-escaper": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/html-escaper/-/html-escaper-2.0.2.tgz", + "integrity": "sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==", + "dev": true + }, + "node_modules/human-signals": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/human-signals/-/human-signals-2.1.0.tgz", + "integrity": "sha512-B4FFZ6q/T2jhhksgkbEW3HBvWIfDW85snkQgawt07S7J5QXTk6BkNV+0yAeZrM5QpMAdYlocGoljn0sJ/WQkFw==", + "dev": true, + "engines": { + "node": ">=10.17.0" + } + }, + "node_modules/immediate": { + "version": "3.0.6", + "resolved": "https://registry.npmjs.org/immediate/-/immediate-3.0.6.tgz", + "integrity": "sha512-XXOFtyqDjNDAQxVfYxuF7g9Il/IbWmmlQg2MYKOH8ExIT1qg6xc4zyS3HaEEATgs1btfzxq15ciUiY7gjSXRGQ==" + }, + "node_modules/import-local": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/import-local/-/import-local-3.1.0.tgz", + "integrity": "sha512-ASB07uLtnDs1o6EHjKpX34BKYDSqnFerfTOJL2HvMqF70LnxpjkzDB8J44oT9pu4AMPkQwf8jl6szgvNd2tRIg==", + "dev": true, + "dependencies": { + "pkg-dir": "^4.2.0", + "resolve-cwd": "^3.0.0" + }, + "bin": { + "import-local-fixture": "fixtures/cli.js" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/imurmurhash": { + "version": "0.1.4", + "resolved": "https://registry.npmjs.org/imurmurhash/-/imurmurhash-0.1.4.tgz", + "integrity": "sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA==", + "dev": true, + "engines": { + "node": ">=0.8.19" + } + }, + "node_modules/inflight": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", + "integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==", + "dev": true, + "dependencies": { + "once": "^1.3.0", + "wrappy": "1" + } + }, + "node_modules/inherits": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==" + }, + "node_modules/is-arrayish": { + "version": "0.2.1", + "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.2.1.tgz", + "integrity": "sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg==", + "dev": true + }, + "node_modules/is-core-module": { + "version": "2.13.1", + "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.13.1.tgz", + "integrity": "sha512-hHrIjvZsftOsvKSn2TRYl63zvxsgE0K+0mYMoH6gD4omR5IWB2KynivBQczo3+wF1cCkjzvptnI9Q0sPU66ilw==", + "dev": true, + "dependencies": { + "hasown": "^2.0.0" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-fullwidth-code-point": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", + "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/is-generator-fn": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/is-generator-fn/-/is-generator-fn-2.1.0.tgz", + "integrity": "sha512-cTIB4yPYL/Grw0EaSzASzg6bBy9gqCofvWN8okThAYIxKJZC+udlRAmGbM0XLeniEJSs8uEgHPGuHSe1XsOLSQ==", + "dev": true, + "engines": { + "node": ">=6" + } + }, + "node_modules/is-number": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz", + "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==", + "dev": true, + "engines": { + "node": ">=0.12.0" + } + }, + "node_modules/is-stream": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-stream/-/is-stream-2.0.1.tgz", + "integrity": "sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==", + "dev": true, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/isarray": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz", + "integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==" + }, + "node_modules/isexe": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", + "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", + "dev": true + }, + "node_modules/istanbul-lib-coverage": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/istanbul-lib-coverage/-/istanbul-lib-coverage-3.2.2.tgz", + "integrity": "sha512-O8dpsF+r0WV/8MNRKfnmrtCWhuKjxrq2w+jpzBL5UZKTi2LeVWnWOmWRxFlesJONmc+wLAGvKQZEOanko0LFTg==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/istanbul-lib-instrument": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/istanbul-lib-instrument/-/istanbul-lib-instrument-6.0.2.tgz", + "integrity": "sha512-1WUsZ9R1lA0HtBSohTkm39WTPlNKSJ5iFk7UwqXkBLoHQT+hfqPsfsTDVuZdKGaBwn7din9bS7SsnoAr943hvw==", + "dev": true, + "dependencies": { + "@babel/core": "^7.23.9", + "@babel/parser": "^7.23.9", + "@istanbuljs/schema": "^0.1.3", + "istanbul-lib-coverage": "^3.2.0", + "semver": "^7.5.4" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/istanbul-lib-instrument/node_modules/lru-cache": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz", + "integrity": "sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA==", + "dev": true, + "dependencies": { + "yallist": "^4.0.0" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/istanbul-lib-instrument/node_modules/semver": { + "version": "7.6.0", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.6.0.tgz", + "integrity": "sha512-EnwXhrlwXMk9gKu5/flx5sv/an57AkRplG3hTK68W7FRDN+k+OWBj65M7719OkA82XLBxrcX0KSHj+X5COhOVg==", + "dev": true, + "dependencies": { + "lru-cache": "^6.0.0" + }, + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/istanbul-lib-instrument/node_modules/yallist": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", + "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==", + "dev": true + }, + "node_modules/istanbul-lib-report": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/istanbul-lib-report/-/istanbul-lib-report-3.0.1.tgz", + "integrity": "sha512-GCfE1mtsHGOELCU8e/Z7YWzpmybrx/+dSTfLrvY8qRmaY6zXTKWn6WQIjaAFw069icm6GVMNkgu0NzI4iPZUNw==", + "dev": true, + "dependencies": { + "istanbul-lib-coverage": "^3.0.0", + "make-dir": "^4.0.0", + "supports-color": "^7.1.0" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/istanbul-lib-report/node_modules/supports-color": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", + "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", + "dev": true, + "dependencies": { + "has-flag": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/istanbul-lib-source-maps": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/istanbul-lib-source-maps/-/istanbul-lib-source-maps-4.0.1.tgz", + "integrity": "sha512-n3s8EwkdFIJCG3BPKBYvskgXGoy88ARzvegkitk60NxRdwltLOTaH7CUiMRXvwYorl0Q712iEjcWB+fK/MrWVw==", + "dev": true, + "dependencies": { + "debug": "^4.1.1", + "istanbul-lib-coverage": "^3.0.0", + "source-map": "^0.6.1" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/istanbul-reports": { + "version": "3.1.7", + "resolved": "https://registry.npmjs.org/istanbul-reports/-/istanbul-reports-3.1.7.tgz", + "integrity": "sha512-BewmUXImeuRk2YY0PVbxgKAysvhRPUQE0h5QRM++nVWyubKGV0l8qQ5op8+B2DOmwSe63Jivj0BjkPQVf8fP5g==", + "dev": true, + "dependencies": { + "html-escaper": "^2.0.0", + "istanbul-lib-report": "^3.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/jest": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest/-/jest-29.7.0.tgz", + "integrity": "sha512-NIy3oAFp9shda19hy4HK0HRTWKtPJmGdnvywu01nOqNC2vZg+Z+fvJDxpMQA88eb2I9EcafcdjYgsDthnYTvGw==", + "dev": true, + "dependencies": { + "@jest/core": "^29.7.0", + "@jest/types": "^29.6.3", + "import-local": "^3.0.2", + "jest-cli": "^29.7.0" + }, + "bin": { + "jest": "bin/jest.js" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + }, + "peerDependencies": { + "node-notifier": "^8.0.1 || ^9.0.0 || ^10.0.0" + }, + "peerDependenciesMeta": { + "node-notifier": { + "optional": true + } + } + }, + "node_modules/jest-changed-files": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-changed-files/-/jest-changed-files-29.7.0.tgz", + "integrity": "sha512-fEArFiwf1BpQ+4bXSprcDc3/x4HSzL4al2tozwVpDFpsxALjLYdyiIK4e5Vz66GQJIbXJ82+35PtysofptNX2w==", + "dev": true, + "dependencies": { + "execa": "^5.0.0", + "jest-util": "^29.7.0", + "p-limit": "^3.1.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-circus": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-circus/-/jest-circus-29.7.0.tgz", + "integrity": "sha512-3E1nCMgipcTkCocFwM90XXQab9bS+GMsjdpmPrlelaxwD93Ad8iVEjX/vvHPdLPnFf+L40u+5+iutRdA1N9myw==", + "dev": true, + "dependencies": { + "@jest/environment": "^29.7.0", + "@jest/expect": "^29.7.0", + "@jest/test-result": "^29.7.0", + "@jest/types": "^29.6.3", + "@types/node": "*", + "chalk": "^4.0.0", + "co": "^4.6.0", + "dedent": "^1.0.0", + "is-generator-fn": "^2.0.0", + "jest-each": "^29.7.0", + "jest-matcher-utils": "^29.7.0", + "jest-message-util": "^29.7.0", + "jest-runtime": "^29.7.0", + "jest-snapshot": "^29.7.0", + "jest-util": "^29.7.0", + "p-limit": "^3.1.0", + "pretty-format": "^29.7.0", + "pure-rand": "^6.0.0", + "slash": "^3.0.0", + "stack-utils": "^2.0.3" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-cli": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-cli/-/jest-cli-29.7.0.tgz", + "integrity": "sha512-OVVobw2IubN/GSYsxETi+gOe7Ka59EFMR/twOU3Jb2GnKKeMGJB5SGUUrEz3SFVmJASUdZUzy83sLNNQ2gZslg==", + "dev": true, + "dependencies": { + "@jest/core": "^29.7.0", + "@jest/test-result": "^29.7.0", + "@jest/types": "^29.6.3", + "chalk": "^4.0.0", + "create-jest": "^29.7.0", + "exit": "^0.1.2", + "import-local": "^3.0.2", + "jest-config": "^29.7.0", + "jest-util": "^29.7.0", + "jest-validate": "^29.7.0", + "yargs": "^17.3.1" + }, + "bin": { + "jest": "bin/jest.js" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + }, + "peerDependencies": { + "node-notifier": "^8.0.1 || ^9.0.0 || ^10.0.0" + }, + "peerDependenciesMeta": { + "node-notifier": { + "optional": true + } + } + }, + "node_modules/jest-cli/node_modules/cliui": { + "version": "8.0.1", + "resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz", + "integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==", + "dev": true, + "dependencies": { + "string-width": "^4.2.0", + "strip-ansi": "^6.0.1", + "wrap-ansi": "^7.0.0" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/jest-cli/node_modules/yargs": { + "version": "17.7.2", + "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz", + "integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==", + "dev": true, + "dependencies": { + "cliui": "^8.0.1", + "escalade": "^3.1.1", + "get-caller-file": "^2.0.5", + "require-directory": "^2.1.1", + "string-width": "^4.2.3", + "y18n": "^5.0.5", + "yargs-parser": "^21.1.1" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/jest-cli/node_modules/yargs-parser": { + "version": "21.1.1", + "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz", + "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==", + "dev": true, + "engines": { + "node": ">=12" + } + }, + "node_modules/jest-config": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-config/-/jest-config-29.7.0.tgz", + "integrity": "sha512-uXbpfeQ7R6TZBqI3/TxCU4q4ttk3u0PJeC+E0zbfSoSjq6bJ7buBPxzQPL0ifrkY4DNu4JUdk0ImlBUYi840eQ==", + "dev": true, + "dependencies": { + "@babel/core": "^7.11.6", + "@jest/test-sequencer": "^29.7.0", + "@jest/types": "^29.6.3", + "babel-jest": "^29.7.0", + "chalk": "^4.0.0", + "ci-info": "^3.2.0", + "deepmerge": "^4.2.2", + "glob": "^7.1.3", + "graceful-fs": "^4.2.9", + "jest-circus": "^29.7.0", + "jest-environment-node": "^29.7.0", + "jest-get-type": "^29.6.3", + "jest-regex-util": "^29.6.3", + "jest-resolve": "^29.7.0", + "jest-runner": "^29.7.0", + "jest-util": "^29.7.0", + "jest-validate": "^29.7.0", + "micromatch": "^4.0.4", + "parse-json": "^5.2.0", + "pretty-format": "^29.7.0", + "slash": "^3.0.0", + "strip-json-comments": "^3.1.1" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + }, + "peerDependencies": { + "@types/node": "*", + "ts-node": ">=9.0.0" + }, + "peerDependenciesMeta": { + "@types/node": { + "optional": true + }, + "ts-node": { + "optional": true + } + } + }, + "node_modules/jest-config/node_modules/brace-expansion": { + "version": "1.1.11", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", + "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==", + "dev": true, + "dependencies": { + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" + } + }, + "node_modules/jest-config/node_modules/glob": { + "version": "7.2.3", + "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz", + "integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==", + "dev": true, + "dependencies": { + "fs.realpath": "^1.0.0", + "inflight": "^1.0.4", + "inherits": "2", + "minimatch": "^3.1.1", + "once": "^1.3.0", + "path-is-absolute": "^1.0.0" + }, + "engines": { + "node": "*" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/jest-config/node_modules/minimatch": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", + "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", + "dev": true, + "dependencies": { + "brace-expansion": "^1.1.7" + }, + "engines": { + "node": "*" + } + }, + "node_modules/jest-diff": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-diff/-/jest-diff-29.7.0.tgz", + "integrity": "sha512-LMIgiIrhigmPrs03JHpxUh2yISK3vLFPkAodPeo0+BuF7wA2FoQbkEg1u8gBYBThncu7e1oEDUfIXVuTqLRUjw==", + "dev": true, + "dependencies": { + "chalk": "^4.0.0", + "diff-sequences": "^29.6.3", + "jest-get-type": "^29.6.3", + "pretty-format": "^29.7.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-docblock": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-docblock/-/jest-docblock-29.7.0.tgz", + "integrity": "sha512-q617Auw3A612guyaFgsbFeYpNP5t2aoUNLwBUbc/0kD1R4t9ixDbyFTHd1nok4epoVFpr7PmeWHrhvuV3XaJ4g==", + "dev": true, + "dependencies": { + "detect-newline": "^3.0.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-each": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-each/-/jest-each-29.7.0.tgz", + "integrity": "sha512-gns+Er14+ZrEoC5fhOfYCY1LOHHr0TI+rQUHZS8Ttw2l7gl+80eHc/gFf2Ktkw0+SIACDTeWvpFcv3B04VembQ==", + "dev": true, + "dependencies": { + "@jest/types": "^29.6.3", + "chalk": "^4.0.0", + "jest-get-type": "^29.6.3", + "jest-util": "^29.7.0", + "pretty-format": "^29.7.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-environment-node": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-environment-node/-/jest-environment-node-29.7.0.tgz", + "integrity": "sha512-DOSwCRqXirTOyheM+4d5YZOrWcdu0LNZ87ewUoywbcb2XR4wKgqiG8vNeYwhjFMbEkfju7wx2GYH0P2gevGvFw==", + "dev": true, + "dependencies": { + "@jest/environment": "^29.7.0", + "@jest/fake-timers": "^29.7.0", + "@jest/types": "^29.6.3", + "@types/node": "*", + "jest-mock": "^29.7.0", + "jest-util": "^29.7.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-get-type": { + "version": "29.6.3", + "resolved": "https://registry.npmjs.org/jest-get-type/-/jest-get-type-29.6.3.tgz", + "integrity": "sha512-zrteXnqYxfQh7l5FHyL38jL39di8H8rHoecLH3JNxH3BwOrBsNeabdap5e0I23lD4HHI8W5VFBZqG4Eaq5LNcw==", + "dev": true, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-haste-map": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-haste-map/-/jest-haste-map-29.7.0.tgz", + "integrity": "sha512-fP8u2pyfqx0K1rGn1R9pyE0/KTn+G7PxktWidOBTqFPLYX0b9ksaMFkhK5vrS3DVun09pckLdlx90QthlW7AmA==", + "dev": true, + "dependencies": { + "@jest/types": "^29.6.3", + "@types/graceful-fs": "^4.1.3", + "@types/node": "*", + "anymatch": "^3.0.3", + "fb-watchman": "^2.0.0", + "graceful-fs": "^4.2.9", + "jest-regex-util": "^29.6.3", + "jest-util": "^29.7.0", + "jest-worker": "^29.7.0", + "micromatch": "^4.0.4", + "walker": "^1.0.8" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + }, + "optionalDependencies": { + "fsevents": "^2.3.2" + } + }, + "node_modules/jest-leak-detector": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-leak-detector/-/jest-leak-detector-29.7.0.tgz", + "integrity": "sha512-kYA8IJcSYtST2BY9I+SMC32nDpBT3J2NvWJx8+JCuCdl/CR1I4EKUJROiP8XtCcxqgTTBGJNdbB1A8XRKbTetw==", + "dev": true, + "dependencies": { + "jest-get-type": "^29.6.3", + "pretty-format": "^29.7.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-matcher-utils": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-matcher-utils/-/jest-matcher-utils-29.7.0.tgz", + "integrity": "sha512-sBkD+Xi9DtcChsI3L3u0+N0opgPYnCRPtGcQYrgXmR+hmt/fYfWAL0xRXYU8eWOdfuLgBe0YCW3AFtnRLagq/g==", + "dev": true, + "dependencies": { + "chalk": "^4.0.0", + "jest-diff": "^29.7.0", + "jest-get-type": "^29.6.3", + "pretty-format": "^29.7.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-message-util": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-message-util/-/jest-message-util-29.7.0.tgz", + "integrity": "sha512-GBEV4GRADeP+qtB2+6u61stea8mGcOT4mCtrYISZwfu9/ISHFJ/5zOMXYbpBE9RsS5+Gb63DW4FgmnKJ79Kf6w==", + "dev": true, + "dependencies": { + "@babel/code-frame": "^7.12.13", + "@jest/types": "^29.6.3", + "@types/stack-utils": "^2.0.0", + "chalk": "^4.0.0", + "graceful-fs": "^4.2.9", + "micromatch": "^4.0.4", + "pretty-format": "^29.7.0", + "slash": "^3.0.0", + "stack-utils": "^2.0.3" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-mock": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-mock/-/jest-mock-29.7.0.tgz", + "integrity": "sha512-ITOMZn+UkYS4ZFh83xYAOzWStloNzJFO2s8DWrE4lhtGD+AorgnbkiKERe4wQVBydIGPx059g6riW5Btp6Llnw==", + "dev": true, + "dependencies": { + "@jest/types": "^29.6.3", + "@types/node": "*", + "jest-util": "^29.7.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-pnp-resolver": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/jest-pnp-resolver/-/jest-pnp-resolver-1.2.3.tgz", + "integrity": "sha512-+3NpwQEnRoIBtx4fyhblQDPgJI0H1IEIkX7ShLUjPGA7TtUTvI1oiKi3SR4oBR0hQhQR80l4WAe5RrXBwWMA8w==", + "dev": true, + "engines": { + "node": ">=6" + }, + "peerDependencies": { + "jest-resolve": "*" + }, + "peerDependenciesMeta": { + "jest-resolve": { + "optional": true + } + } + }, + "node_modules/jest-regex-util": { + "version": "29.6.3", + "resolved": "https://registry.npmjs.org/jest-regex-util/-/jest-regex-util-29.6.3.tgz", + "integrity": "sha512-KJJBsRCyyLNWCNBOvZyRDnAIfUiRJ8v+hOBQYGn8gDyF3UegwiP4gwRR3/SDa42g1YbVycTidUF3rKjyLFDWbg==", + "dev": true, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-resolve": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-resolve/-/jest-resolve-29.7.0.tgz", + "integrity": "sha512-IOVhZSrg+UvVAshDSDtHyFCCBUl/Q3AAJv8iZ6ZjnZ74xzvwuzLXid9IIIPgTnY62SJjfuupMKZsZQRsCvxEgA==", + "dev": true, + "dependencies": { + "chalk": "^4.0.0", + "graceful-fs": "^4.2.9", + "jest-haste-map": "^29.7.0", + "jest-pnp-resolver": "^1.2.2", + "jest-util": "^29.7.0", + "jest-validate": "^29.7.0", + "resolve": "^1.20.0", + "resolve.exports": "^2.0.0", + "slash": "^3.0.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-resolve-dependencies": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-resolve-dependencies/-/jest-resolve-dependencies-29.7.0.tgz", + "integrity": "sha512-un0zD/6qxJ+S0et7WxeI3H5XSe9lTBBR7bOHCHXkKR6luG5mwDDlIzVQ0V5cZCuoTgEdcdwzTghYkTWfubi+nA==", + "dev": true, + "dependencies": { + "jest-regex-util": "^29.6.3", + "jest-snapshot": "^29.7.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-runner": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-runner/-/jest-runner-29.7.0.tgz", + "integrity": "sha512-fsc4N6cPCAahybGBfTRcq5wFR6fpLznMg47sY5aDpsoejOcVYFb07AHuSnR0liMcPTgBsA3ZJL6kFOjPdoNipQ==", + "dev": true, + "dependencies": { + "@jest/console": "^29.7.0", + "@jest/environment": "^29.7.0", + "@jest/test-result": "^29.7.0", + "@jest/transform": "^29.7.0", + "@jest/types": "^29.6.3", + "@types/node": "*", + "chalk": "^4.0.0", + "emittery": "^0.13.1", + "graceful-fs": "^4.2.9", + "jest-docblock": "^29.7.0", + "jest-environment-node": "^29.7.0", + "jest-haste-map": "^29.7.0", + "jest-leak-detector": "^29.7.0", + "jest-message-util": "^29.7.0", + "jest-resolve": "^29.7.0", + "jest-runtime": "^29.7.0", + "jest-util": "^29.7.0", + "jest-watcher": "^29.7.0", + "jest-worker": "^29.7.0", + "p-limit": "^3.1.0", + "source-map-support": "0.5.13" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-runtime": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-runtime/-/jest-runtime-29.7.0.tgz", + "integrity": "sha512-gUnLjgwdGqW7B4LvOIkbKs9WGbn+QLqRQQ9juC6HndeDiezIwhDP+mhMwHWCEcfQ5RUXa6OPnFF8BJh5xegwwQ==", + "dev": true, + "dependencies": { + "@jest/environment": "^29.7.0", + "@jest/fake-timers": "^29.7.0", + "@jest/globals": "^29.7.0", + "@jest/source-map": "^29.6.3", + "@jest/test-result": "^29.7.0", + "@jest/transform": "^29.7.0", + "@jest/types": "^29.6.3", + "@types/node": "*", + "chalk": "^4.0.0", + "cjs-module-lexer": "^1.0.0", + "collect-v8-coverage": "^1.0.0", + "glob": "^7.1.3", + "graceful-fs": "^4.2.9", + "jest-haste-map": "^29.7.0", + "jest-message-util": "^29.7.0", + "jest-mock": "^29.7.0", + "jest-regex-util": "^29.6.3", + "jest-resolve": "^29.7.0", + "jest-snapshot": "^29.7.0", + "jest-util": "^29.7.0", + "slash": "^3.0.0", + "strip-bom": "^4.0.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-runtime/node_modules/brace-expansion": { + "version": "1.1.11", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", + "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==", + "dev": true, + "dependencies": { + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" + } + }, + "node_modules/jest-runtime/node_modules/glob": { + "version": "7.2.3", + "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz", + "integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==", + "dev": true, + "dependencies": { + "fs.realpath": "^1.0.0", + "inflight": "^1.0.4", + "inherits": "2", + "minimatch": "^3.1.1", + "once": "^1.3.0", + "path-is-absolute": "^1.0.0" + }, + "engines": { + "node": "*" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/jest-runtime/node_modules/minimatch": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", + "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", + "dev": true, + "dependencies": { + "brace-expansion": "^1.1.7" + }, + "engines": { + "node": "*" + } + }, + "node_modules/jest-snapshot": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-snapshot/-/jest-snapshot-29.7.0.tgz", + "integrity": "sha512-Rm0BMWtxBcioHr1/OX5YCP8Uov4riHvKPknOGs804Zg9JGZgmIBkbtlxJC/7Z4msKYVbIJtfU+tKb8xlYNfdkw==", + "dev": true, + "dependencies": { + "@babel/core": "^7.11.6", + "@babel/generator": "^7.7.2", + "@babel/plugin-syntax-jsx": "^7.7.2", + "@babel/plugin-syntax-typescript": "^7.7.2", + "@babel/types": "^7.3.3", + "@jest/expect-utils": "^29.7.0", + "@jest/transform": "^29.7.0", + "@jest/types": "^29.6.3", + "babel-preset-current-node-syntax": "^1.0.0", + "chalk": "^4.0.0", + "expect": "^29.7.0", + "graceful-fs": "^4.2.9", + "jest-diff": "^29.7.0", + "jest-get-type": "^29.6.3", + "jest-matcher-utils": "^29.7.0", + "jest-message-util": "^29.7.0", + "jest-util": "^29.7.0", + "natural-compare": "^1.4.0", + "pretty-format": "^29.7.0", + "semver": "^7.5.3" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-snapshot/node_modules/lru-cache": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz", + "integrity": "sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA==", + "dev": true, + "dependencies": { + "yallist": "^4.0.0" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/jest-snapshot/node_modules/semver": { + "version": "7.6.0", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.6.0.tgz", + "integrity": "sha512-EnwXhrlwXMk9gKu5/flx5sv/an57AkRplG3hTK68W7FRDN+k+OWBj65M7719OkA82XLBxrcX0KSHj+X5COhOVg==", + "dev": true, + "dependencies": { + "lru-cache": "^6.0.0" + }, + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/jest-snapshot/node_modules/yallist": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", + "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==", + "dev": true + }, + "node_modules/jest-util": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-util/-/jest-util-29.7.0.tgz", + "integrity": "sha512-z6EbKajIpqGKU56y5KBUgy1dt1ihhQJgWzUlZHArA/+X2ad7Cb5iF+AK1EWVL/Bo7Rz9uurpqw6SiBCefUbCGA==", + "dev": true, + "dependencies": { + "@jest/types": "^29.6.3", + "@types/node": "*", + "chalk": "^4.0.0", + "ci-info": "^3.2.0", + "graceful-fs": "^4.2.9", + "picomatch": "^2.2.3" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-validate": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-validate/-/jest-validate-29.7.0.tgz", + "integrity": "sha512-ZB7wHqaRGVw/9hST/OuFUReG7M8vKeq0/J2egIGLdvjHCmYqGARhzXmtgi+gVeZ5uXFF219aOc3Ls2yLg27tkw==", + "dev": true, + "dependencies": { + "@jest/types": "^29.6.3", + "camelcase": "^6.2.0", + "chalk": "^4.0.0", + "jest-get-type": "^29.6.3", + "leven": "^3.1.0", + "pretty-format": "^29.7.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-watcher": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-watcher/-/jest-watcher-29.7.0.tgz", + "integrity": "sha512-49Fg7WXkU3Vl2h6LbLtMQ/HyB6rXSIX7SqvBLQmssRBGN9I0PNvPmAmCWSOY6SOvrjhI/F7/bGAv9RtnsPA03g==", + "dev": true, + "dependencies": { + "@jest/test-result": "^29.7.0", + "@jest/types": "^29.6.3", + "@types/node": "*", + "ansi-escapes": "^4.2.1", + "chalk": "^4.0.0", + "emittery": "^0.13.1", + "jest-util": "^29.7.0", + "string-length": "^4.0.1" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/jest-worker": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-worker/-/jest-worker-29.7.0.tgz", + "integrity": "sha512-eIz2msL/EzL9UFTFFx7jBTkeZfku0yUAyZZZmJ93H2TYEiroIx2PQjEXcwYtYl8zXCxb+PAmA2hLIt/6ZEkPHw==", + "dev": true, + "dependencies": { + "@types/node": "*", + "jest-util": "^29.7.0", + "merge-stream": "^2.0.0", + "supports-color": "^8.0.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/js-tokens": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", + "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==", + "dev": true + }, + "node_modules/jsesc": { + "version": "2.5.2", + "resolved": "https://registry.npmjs.org/jsesc/-/jsesc-2.5.2.tgz", + "integrity": "sha512-OYu7XEzjkCQ3C5Ps3QIZsQfNpqoJyZZA99wd9aWd05NCtC5pWOkShK2mkL6HXQR6/Cy2lbNdPlZBpuQHXE63gA==", + "dev": true, + "bin": { + "jsesc": "bin/jsesc" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/json-parse-even-better-errors": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz", + "integrity": "sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==", + "dev": true + }, + "node_modules/json5": { + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz", + "integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==", + "dev": true, + "bin": { + "json5": "lib/cli.js" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/jszip": { + "version": "3.10.1", + "resolved": "https://registry.npmjs.org/jszip/-/jszip-3.10.1.tgz", + "integrity": "sha512-xXDvecyTpGLrqFrvkrUSoxxfJI5AH7U8zxxtVclpsUtMCq4JQ290LY8AW5c7Ggnr/Y/oK+bQMbqK2qmtk3pN4g==", + "dependencies": { + "lie": "~3.3.0", + "pako": "~1.0.2", + "readable-stream": "~2.3.6", + "setimmediate": "^1.0.5" + } + }, + "node_modules/kleur": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/kleur/-/kleur-3.0.3.tgz", + "integrity": "sha512-eTIzlVOSUR+JxdDFepEYcBMtZ9Qqdef+rnzWdRZuMbOywu5tO2w2N7rqjoANZ5k9vywhL6Br1VRjUIgTQx4E8w==", + "dev": true, + "engines": { + "node": ">=6" + } + }, + "node_modules/leven": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/leven/-/leven-3.1.0.tgz", + "integrity": "sha512-qsda+H8jTaUaN/x5vzW2rzc+8Rw4TAQ/4KjB46IwK5VH+IlVeeeje/EoZRpiXvIqjFgK84QffqPztGI3VBLG1A==", + "dev": true, + "engines": { + "node": ">=6" + } + }, + "node_modules/lie": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/lie/-/lie-3.3.0.tgz", + "integrity": "sha512-UaiMJzeWRlEujzAuw5LokY1L5ecNQYZKfmyZ9L7wDHb/p5etKaxXhohBcrw0EYby+G/NA52vRSN4N39dxHAIwQ==", + "dependencies": { + "immediate": "~3.0.5" + } + }, + "node_modules/lines-and-columns": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz", + "integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==", + "dev": true + }, + "node_modules/lru-cache": { + "version": "5.1.1", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz", + "integrity": "sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==", + "dev": true, + "dependencies": { + "yallist": "^3.0.2" + } + }, + "node_modules/make-dir": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/make-dir/-/make-dir-4.0.0.tgz", + "integrity": "sha512-hXdUTZYIVOt1Ex//jAQi+wTZZpUpwBj/0QsOzqegb3rGMMeJiSEu5xLHnYfBrRV4RH2+OCSOO95Is/7x1WJ4bw==", + "dev": true, + "dependencies": { + "semver": "^7.5.3" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/make-dir/node_modules/lru-cache": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz", + "integrity": "sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA==", + "dev": true, + "dependencies": { + "yallist": "^4.0.0" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/make-dir/node_modules/semver": { + "version": "7.6.0", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.6.0.tgz", + "integrity": "sha512-EnwXhrlwXMk9gKu5/flx5sv/an57AkRplG3hTK68W7FRDN+k+OWBj65M7719OkA82XLBxrcX0KSHj+X5COhOVg==", + "dev": true, + "dependencies": { + "lru-cache": "^6.0.0" + }, + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/make-dir/node_modules/yallist": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", + "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==", + "dev": true + }, + "node_modules/makeerror": { + "version": "1.0.12", + "resolved": "https://registry.npmjs.org/makeerror/-/makeerror-1.0.12.tgz", + "integrity": "sha512-JmqCvUhmt43madlpFzG4BQzG2Z3m6tvQDNKdClZnO3VbIudJYmxsT0FNJMeiB2+JTSlTQTSbU8QdesVmwJcmLg==", + "dev": true, + "dependencies": { + "tmpl": "1.0.5" + } + }, + "node_modules/merge-stream": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/merge-stream/-/merge-stream-2.0.0.tgz", + "integrity": "sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w==", + "dev": true + }, + "node_modules/micromatch": { + "version": "4.0.5", + "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.5.tgz", + "integrity": "sha512-DMy+ERcEW2q8Z2Po+WNXuw3c5YaUSFjAO5GsJqfEl7UjvtIuFKO6ZrKvcItdy98dwFI2N1tg3zNIdKaQT+aNdA==", + "dev": true, + "dependencies": { + "braces": "^3.0.2", + "picomatch": "^2.3.1" + }, + "engines": { + "node": ">=8.6" + } + }, + "node_modules/mimic-fn": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/mimic-fn/-/mimic-fn-2.1.0.tgz", + "integrity": "sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg==", + "dev": true, + "engines": { + "node": ">=6" + } + }, + "node_modules/natural-compare": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/natural-compare/-/natural-compare-1.4.0.tgz", + "integrity": "sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==", + "dev": true + }, + "node_modules/node-int64": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/node-int64/-/node-int64-0.4.0.tgz", + "integrity": "sha512-O5lz91xSOeoXP6DulyHfllpq+Eg00MWitZIbtPfoSEvqIHdl5gfcY6hYzDWnj0qD5tz52PI08u9qUvSVeUBeHw==", + "dev": true + }, + "node_modules/node-releases": { + "version": "2.0.14", + "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.14.tgz", + "integrity": "sha512-y10wOWt8yZpqXmOgRo77WaHEmhYQYGNA6y421PKsKYWEK8aW+cqAphborZDhqfyKrbZEN92CN1X2KbafY2s7Yw==", + "dev": true + }, + "node_modules/normalize-path": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz", + "integrity": "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==", + "dev": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/npm-run-path": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/npm-run-path/-/npm-run-path-4.0.1.tgz", + "integrity": "sha512-S48WzZW777zhNIrn7gxOlISNAqi9ZC/uQFnRdbeIHhZhCA6UqpkOT8T1G7BvfdgP4Er8gF4sUbaS0i7QvIfCWw==", + "dev": true, + "dependencies": { + "path-key": "^3.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/once": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", + "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", + "dev": true, + "dependencies": { + "wrappy": "1" + } + }, + "node_modules/onetime": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/onetime/-/onetime-5.1.2.tgz", + "integrity": "sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==", + "dev": true, + "dependencies": { + "mimic-fn": "^2.1.0" + }, + "engines": { + "node": ">=6" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/p-limit": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz", + "integrity": "sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ==", + "dev": true, + "dependencies": { + "yocto-queue": "^0.1.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/p-try": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/p-try/-/p-try-2.2.0.tgz", + "integrity": "sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ==", + "dev": true, + "engines": { + "node": ">=6" + } + }, + "node_modules/pako": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/pako/-/pako-1.0.11.tgz", + "integrity": "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==" + }, + "node_modules/parse-json": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/parse-json/-/parse-json-5.2.0.tgz", + "integrity": "sha512-ayCKvm/phCGxOkYRSCM82iDwct8/EonSEgCSxWxD7ve6jHggsFl4fZVQBPRNgQoKiuV/odhFrGzQXZwbifC8Rg==", + "dev": true, + "dependencies": { + "@babel/code-frame": "^7.0.0", + "error-ex": "^1.3.1", + "json-parse-even-better-errors": "^2.3.0", + "lines-and-columns": "^1.1.6" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/path-exists": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", + "integrity": "sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/path-is-absolute": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", + "integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==", + "dev": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/path-key": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", + "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/path-parse": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.7.tgz", + "integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==", + "dev": true + }, + "node_modules/picocolors": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.0.0.tgz", + "integrity": "sha512-1fygroTLlHu66zi26VoTDv8yRgm0Fccecssto+MhsZ0D/DGW2sm8E8AjW7NU5VVTRt5GxbeZ5qBuJr+HyLYkjQ==", + "dev": true + }, + "node_modules/picomatch": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz", + "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==", + "dev": true, + "engines": { + "node": ">=8.6" + }, + "funding": { + "url": "https://github.com/sponsors/jonschlinkert" + } + }, + "node_modules/pirates": { + "version": "4.0.6", + "resolved": "https://registry.npmjs.org/pirates/-/pirates-4.0.6.tgz", + "integrity": "sha512-saLsH7WeYYPiD25LDuLRRY/i+6HaPYr6G1OUlN39otzkSTxKnubR9RTxS3/Kk50s1g2JTgFwWQDQyplC5/SHZg==", + "dev": true, + "engines": { + "node": ">= 6" + } + }, + "node_modules/pkg-dir": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/pkg-dir/-/pkg-dir-4.2.0.tgz", + "integrity": "sha512-HRDzbaKjC+AOWVXxAU/x54COGeIv9eb+6CkDSQoNTt4XyWoIJvuPsXizxu/Fr23EiekbtZwmh1IcIG/l/a10GQ==", + "dev": true, + "dependencies": { + "find-up": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/pkg-dir/node_modules/find-up": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/find-up/-/find-up-4.1.0.tgz", + "integrity": "sha512-PpOwAdQ/YlXQ2vj8a3h8IipDuYRi3wceVQQGYWxNINccq40Anw7BlsEXCMbt1Zt+OLA6Fq9suIpIWD0OsnISlw==", + "dev": true, + "dependencies": { + "locate-path": "^5.0.0", + "path-exists": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/pkg-dir/node_modules/locate-path": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-5.0.0.tgz", + "integrity": "sha512-t7hw9pI+WvuwNJXwk5zVHpyhIqzg2qTlklJOf0mVxGSbe3Fp2VieZcduNYjaLDoy6p9uGpQEGWG87WpMKlNq8g==", + "dev": true, + "dependencies": { + "p-locate": "^4.1.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/pkg-dir/node_modules/p-limit": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-2.3.0.tgz", + "integrity": "sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w==", + "dev": true, + "dependencies": { + "p-try": "^2.0.0" + }, + "engines": { + "node": ">=6" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/pkg-dir/node_modules/p-locate": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-4.1.0.tgz", + "integrity": "sha512-R79ZZ/0wAxKGu3oYMlz8jy/kbhsNrS7SKZ7PxEHBgJ5+F2mtFW2fK2cOtBh1cHYkQsbzFV7I+EoRKe6Yt0oK7A==", + "dev": true, + "dependencies": { + "p-limit": "^2.2.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/pretty-format": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-29.7.0.tgz", + "integrity": "sha512-Pdlw/oPxN+aXdmM9R00JVC9WVFoCLTKJvDVLgmJ+qAffBMxsV85l/Lu7sNx4zSzPyoL2euImuEwHhOXdEgNFZQ==", + "dev": true, + "dependencies": { + "@jest/schemas": "^29.6.3", + "ansi-styles": "^5.0.0", + "react-is": "^18.0.0" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + } + }, + "node_modules/pretty-format/node_modules/ansi-styles": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz", + "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==", + "dev": true, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/process-nextick-args": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.1.tgz", + "integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==" + }, + "node_modules/prompts": { + "version": "2.4.2", + "resolved": "https://registry.npmjs.org/prompts/-/prompts-2.4.2.tgz", + "integrity": "sha512-NxNv/kLguCA7p3jE8oL2aEBsrJWgAakBpgmgK6lpPWV+WuOmY6r2/zbAVnP+T8bQlA0nzHXSJSJW0Hq7ylaD2Q==", + "dev": true, + "dependencies": { + "kleur": "^3.0.3", + "sisteransi": "^1.0.5" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/pure-rand": { + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/pure-rand/-/pure-rand-6.1.0.tgz", + "integrity": "sha512-bVWawvoZoBYpp6yIoQtQXHZjmz35RSVHnUOTefl8Vcjr8snTPY1wnpSPMWekcFwbxI6gtmT7rSYPFvz71ldiOA==", + "dev": true, + "funding": [ + { + "type": "individual", + "url": "https://github.com/sponsors/dubzzz" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fast-check" + } + ] + }, + "node_modules/react-is": { + "version": "18.3.1", + "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.3.1.tgz", + "integrity": "sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==", + "dev": true + }, + "node_modules/readable-stream": { + "version": "2.3.8", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.8.tgz", + "integrity": "sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==", + "dependencies": { + "core-util-is": "~1.0.0", + "inherits": "~2.0.3", + "isarray": "~1.0.0", + "process-nextick-args": "~2.0.0", + "safe-buffer": "~5.1.1", + "string_decoder": "~1.1.1", + "util-deprecate": "~1.0.1" + } + }, + "node_modules/require-directory": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", + "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==", + "dev": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/resolve": { + "version": "1.22.8", + "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.22.8.tgz", + "integrity": "sha512-oKWePCxqpd6FlLvGV1VU0x7bkPmmCNolxzjMf4NczoDnQcIWrAF+cPtZn5i6n+RfD2d9i0tzpKnG6Yk168yIyw==", + "dev": true, + "dependencies": { + "is-core-module": "^2.13.0", + "path-parse": "^1.0.7", + "supports-preserve-symlinks-flag": "^1.0.0" + }, + "bin": { + "resolve": "bin/resolve" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/resolve-cwd": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/resolve-cwd/-/resolve-cwd-3.0.0.tgz", + "integrity": "sha512-OrZaX2Mb+rJCpH/6CpSqt9xFVpN++x01XnN2ie9g6P5/3xelLAkXWVADpdz1IHD/KFfEXyE6V0U01OQ3UO2rEg==", + "dev": true, + "dependencies": { + "resolve-from": "^5.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/resolve-from": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-5.0.0.tgz", + "integrity": "sha512-qYg9KP24dD5qka9J47d0aVky0N+b4fTU89LN9iDnjB5waksiC49rvMB0PrUJQGoTmH50XPiqOvAjDfaijGxYZw==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/resolve.exports": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/resolve.exports/-/resolve.exports-2.0.2.tgz", + "integrity": "sha512-X2UW6Nw3n/aMgDVy+0rSqgHlv39WZAlZrXCdnbyEiKm17DSqHX4MmQMaST3FbeWR5FTuRcUwYAziZajji0Y7mg==", + "dev": true, + "engines": { + "node": ">=10" + } + }, + "node_modules/safe-buffer": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", + "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==" + }, + "node_modules/semver": { + "version": "6.3.1", + "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz", + "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==", + "dev": true, + "bin": { + "semver": "bin/semver.js" + } + }, + "node_modules/setimmediate": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/setimmediate/-/setimmediate-1.0.5.tgz", + "integrity": "sha512-MATJdZp8sLqDl/68LfQmbP8zKPLQNV6BIZoIgrscFDQ+RsvK/BxeDQOgyxKKoh0y/8h3BqVFnCqQ/gd+reiIXA==" + }, + "node_modules/shebang-command": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", + "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", + "dev": true, + "dependencies": { + "shebang-regex": "^3.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/shebang-regex": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", + "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/signal-exit": { + "version": "3.0.7", + "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.7.tgz", + "integrity": "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==", + "dev": true + }, + "node_modules/sisteransi": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/sisteransi/-/sisteransi-1.0.5.tgz", + "integrity": "sha512-bLGGlR1QxBcynn2d5YmDX4MGjlZvy2MRBDRNHLJ8VI6l6+9FUiyTFNJ0IveOSP0bcXgVDPRcfGqA0pjaqUpfVg==", + "dev": true + }, + "node_modules/slash": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz", + "integrity": "sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/source-map": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", + "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", + "dev": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/source-map-support": { + "version": "0.5.13", + "resolved": "https://registry.npmjs.org/source-map-support/-/source-map-support-0.5.13.tgz", + "integrity": "sha512-SHSKFHadjVA5oR4PPqhtAVdcBWwRYVd6g6cAXnIbRiIwc2EhPrTuKUBdSLvlEKyIP3GCf89fltvcZiP9MMFA1w==", + "dev": true, + "dependencies": { + "buffer-from": "^1.0.0", + "source-map": "^0.6.0" + } + }, + "node_modules/sprintf-js": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz", + "integrity": "sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==", + "dev": true + }, + "node_modules/stack-utils": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/stack-utils/-/stack-utils-2.0.6.tgz", + "integrity": "sha512-XlkWvfIm6RmsWtNJx+uqtKLS8eqFbxUg0ZzLXqY0caEy9l7hruX8IpiDnjsLavoBgqCCR71TqWO8MaXYheJ3RQ==", + "dev": true, + "dependencies": { + "escape-string-regexp": "^2.0.0" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/stack-utils/node_modules/escape-string-regexp": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-2.0.0.tgz", + "integrity": "sha512-UpzcLCXolUWcNu5HtVMHYdXJjArjsF9C0aNnquZYY4uW/Vu0miy5YoWvbV345HauVvcAUnpRuhMMcqTcGOY2+w==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/string_decoder": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz", + "integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==", + "dependencies": { + "safe-buffer": "~5.1.0" + } + }, + "node_modules/string-length": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/string-length/-/string-length-4.0.2.tgz", + "integrity": "sha512-+l6rNN5fYHNhZZy41RXsYptCjA2Igmq4EG7kZAYFQI1E1VTXarr6ZPXBg6eq7Y6eK4FEhY6AJlyuFIb/v/S0VQ==", + "dev": true, + "dependencies": { + "char-regex": "^1.0.2", + "strip-ansi": "^6.0.0" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/string-width": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "dev": true, + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/strip-ansi": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "dev": true, + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/strip-bom": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/strip-bom/-/strip-bom-4.0.0.tgz", + "integrity": "sha512-3xurFv5tEgii33Zi8Jtp55wEIILR9eh34FAW00PZf+JnSsTmV/ioewSgQl97JHvgjoRGwPShsWm+IdrxB35d0w==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/strip-final-newline": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/strip-final-newline/-/strip-final-newline-2.0.0.tgz", + "integrity": "sha512-BrpvfNAE3dcvq7ll3xVumzjKjZQ5tI1sEUIKr3Uoks0XUl45St3FlatVqef9prk4jRDzhW6WZg+3bk93y6pLjA==", + "dev": true, + "engines": { + "node": ">=6" + } + }, + "node_modules/strip-json-comments": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-3.1.1.tgz", + "integrity": "sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==", + "dev": true, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/supports-color": { + "version": "8.1.1", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-8.1.1.tgz", + "integrity": "sha512-MpUEN2OodtUzxvKQl72cUF7RQ5EiHsGvSsVG0ia9c5RbWGL2CI4C7EpPS8UTBIplnlzZiNuV56w+FuNxy3ty2Q==", + "dev": true, + "dependencies": { + "has-flag": "^4.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/supports-color?sponsor=1" + } + }, + "node_modules/supports-preserve-symlinks-flag": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/supports-preserve-symlinks-flag/-/supports-preserve-symlinks-flag-1.0.0.tgz", + "integrity": "sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w==", + "dev": true, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/test-exclude": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/test-exclude/-/test-exclude-6.0.0.tgz", + "integrity": "sha512-cAGWPIyOHU6zlmg88jwm7VRyXnMN7iV68OGAbYDk/Mh/xC/pzVPlQtY6ngoIH/5/tciuhGfvESU8GrHrcxD56w==", + "dev": true, + "dependencies": { + "@istanbuljs/schema": "^0.1.2", + "glob": "^7.1.4", + "minimatch": "^3.0.4" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/test-exclude/node_modules/brace-expansion": { + "version": "1.1.11", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", + "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==", + "dev": true, + "dependencies": { + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" + } + }, + "node_modules/test-exclude/node_modules/glob": { + "version": "7.2.3", + "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz", + "integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==", + "dev": true, + "dependencies": { + "fs.realpath": "^1.0.0", + "inflight": "^1.0.4", + "inherits": "2", + "minimatch": "^3.1.1", + "once": "^1.3.0", + "path-is-absolute": "^1.0.0" + }, + "engines": { + "node": "*" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/test-exclude/node_modules/minimatch": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", + "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", + "dev": true, + "dependencies": { + "brace-expansion": "^1.1.7" + }, + "engines": { + "node": "*" + } + }, + "node_modules/tmpl": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/tmpl/-/tmpl-1.0.5.tgz", + "integrity": "sha512-3f0uOEAQwIqGuWW2MVzYg8fV/QNnc/IpuJNG837rLuczAaLVHslWHZQj4IGiEl5Hs3kkbhwL9Ab7Hrsmuj+Smw==", + "dev": true + }, + "node_modules/to-fast-properties": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/to-fast-properties/-/to-fast-properties-2.0.0.tgz", + "integrity": "sha512-/OaKK0xYrs3DmxRYqL/yDc+FxFUVYhDlXMhRmv3z915w2HF1tnN1omB354j8VUGO/hbRzyD6Y3sA7v7GS/ceog==", + "dev": true, + "engines": { + "node": ">=4" + } + }, + "node_modules/to-regex-range": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", + "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==", + "dev": true, + "dependencies": { + "is-number": "^7.0.0" + }, + "engines": { + "node": ">=8.0" + } + }, + "node_modules/type-detect": { + "version": "4.0.8", + "resolved": "https://registry.npmjs.org/type-detect/-/type-detect-4.0.8.tgz", + "integrity": "sha512-0fr/mIH1dlO+x7TlcMy+bIDqKPsw/70tVyeHW787goQjhmqaZe10uwLujubK9q9Lg6Fiho1KUKDYz0Z7k7g5/g==", + "dev": true, + "engines": { + "node": ">=4" + } + }, + "node_modules/type-fest": { + "version": "0.21.3", + "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.21.3.tgz", + "integrity": "sha512-t0rzBq87m3fVcduHDUFhKmyyX+9eo6WQjZvf51Ea/M0Q7+T374Jp1aUiyUl0GKxp8M/OETVHSDvmkyPgvX+X2w==", + "dev": true, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/undici-types": { + "version": "5.26.5", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", + "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", + "dev": true + }, + "node_modules/update-browserslist-db": { + "version": "1.0.13", + "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.0.13.tgz", + "integrity": "sha512-xebP81SNcPuNpPP3uzeW1NYXxI3rxyJzF3pD6sH4jE7o/IX+WtSpwnVU+qIsDPyk0d3hmFQ7mjqc6AtV604hbg==", + "dev": true, + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/browserslist" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/browserslist" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "dependencies": { + "escalade": "^3.1.1", + "picocolors": "^1.0.0" + }, + "bin": { + "update-browserslist-db": "cli.js" + }, + "peerDependencies": { + "browserslist": ">= 4.21.0" + } + }, + "node_modules/util-deprecate": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", + "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==" + }, + "node_modules/v8-to-istanbul": { + "version": "9.2.0", + "resolved": "https://registry.npmjs.org/v8-to-istanbul/-/v8-to-istanbul-9.2.0.tgz", + "integrity": "sha512-/EH/sDgxU2eGxajKdwLCDmQ4FWq+kpi3uCmBGpw1xJtnAxEjlD8j8PEiGWpCIMIs3ciNAgH0d3TTJiUkYzyZjA==", + "dev": true, + "dependencies": { + "@jridgewell/trace-mapping": "^0.3.12", + "@types/istanbul-lib-coverage": "^2.0.1", + "convert-source-map": "^2.0.0" + }, + "engines": { + "node": ">=10.12.0" + } + }, + "node_modules/walker": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/walker/-/walker-1.0.8.tgz", + "integrity": "sha512-ts/8E8l5b7kY0vlWLewOkDXMmPdLcVV4GmOQLyxuSswIJsweeFZtAsMF7k1Nszz+TYBQrlYRmzOnr398y1JemQ==", + "dev": true, + "dependencies": { + "makeerror": "1.0.12" + } + }, + "node_modules/which": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", + "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", + "dev": true, + "dependencies": { + "isexe": "^2.0.0" + }, + "bin": { + "node-which": "bin/node-which" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/wrap-ansi": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", + "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", + "dev": true, + "dependencies": { + "ansi-styles": "^4.0.0", + "string-width": "^4.1.0", + "strip-ansi": "^6.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/wrap-ansi?sponsor=1" + } + }, + "node_modules/wrappy": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", + "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", + "dev": true + }, + "node_modules/write-file-atomic": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/write-file-atomic/-/write-file-atomic-4.0.2.tgz", + "integrity": "sha512-7KxauUdBmSdWnmpaGFg+ppNjKF8uNLry8LyzjauQDOVONfFLNKrKvQOxZ/VuTIcS/gge/YNahf5RIIQWTSarlg==", + "dev": true, + "dependencies": { + "imurmurhash": "^0.1.4", + "signal-exit": "^3.0.7" + }, + "engines": { + "node": "^12.13.0 || ^14.15.0 || >=16.0.0" + } + }, + "node_modules/y18n": { + "version": "5.0.8", + "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz", + "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==", + "dev": true, + "engines": { + "node": ">=10" + } + }, + "node_modules/yallist": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-3.1.1.tgz", + "integrity": "sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==", + "dev": true + }, + "node_modules/yocto-queue": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-0.1.0.tgz", + "integrity": "sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==", + "dev": true, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + } + } +} diff --git a/yggdrasil_decision_forests/port/javascript/npm/package.json b/yggdrasil_decision_forests/port/javascript/npm/package.json new file mode 100644 index 00000000..21b40d47 --- /dev/null +++ b/yggdrasil_decision_forests/port/javascript/npm/package.json @@ -0,0 +1,36 @@ +{ + "name": "yggdrasil-decision-forests", + "version": "0.0.2", + "description": "With this package, you can generate predictions of machine learning models trained with YDF in browser and with NodeJS.", + "main": "dist/inference.js", + "scripts": { + "test": "jest" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/google/yggdrasil-decision-forests.git" + }, + "keywords": [ + "ydf", + "machine-learning", + "random-forest", + "gradient-boosting", + "tabular-data", + "interpretable", + "decision-forest", + "decision-tree", + "tensorflow-decision-forest" + ], + "author": "Mathieu Guillame-Bert", + "license": "Apache-2.0", + "bugs": { + "url": "https://github.com/google/yggdrasil-decision-forests/issues" + }, + "homepage": "https://ydf.readthedocs.io", + "dependencies": { + "jszip": "^3.10.1" + }, + "devDependencies": { + "jest": "^29.7.0" + } +} diff --git a/yggdrasil_decision_forests/port/javascript/npm/test/inference.test.js b/yggdrasil_decision_forests/port/javascript/npm/test/inference.test.js new file mode 100644 index 00000000..a931c61b --- /dev/null +++ b/yggdrasil_decision_forests/port/javascript/npm/test/inference.test.js @@ -0,0 +1,96 @@ +/* + * Copyright 2022 Google LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +const fs = require('node:fs'); + +describe('YDF Inference', () => { + let ydf = null; + let model1 = null; + let model2 = null; + + beforeAll(async () => { + ydf = await require('yggdrasil-decision-forests')(); + + model1 = + await ydf.loadModelFromZipBlob(fs.readFileSync('./test/model_1.zip')); + model2 = + await ydf.loadModelFromZipBlob(fs.readFileSync('./test/model_2.zip')); + }); + + it('loadModelFromZipBlob', () => { + expect(model1).not.toBeNull(); + expect(model2).not.toBeNull(); + }); + + it('predict model1', async () => { + let predictions = model1.predict({ + 'age': [39, 40, 40, 35], + 'workclass': ['State-gov', 'Private', 'Private', 'Federal-gov'], + 'fnlwgt': [77516, 121772, 193524, 76845], + 'education': ['Bachelors', 'Assoc-voc', 'Doctorate', '9th'], + 'education_num': [13, 11, 16, 5], + 'marital_status': [ + 'Never-married', 'Married-civ-spouse', 'Married-civ-spouse', + 'Married-civ-spouse' + ], + 'occupation': + ['Adm-clerical', 'Craft-repair', 'Prof-specialty', 'Farming-fishing'], + 'relationship': ['Not-in-family', 'Husband', 'Husband', 'Husband'], + 'race': ['White', 'Asian-Pac-Islander', 'White', 'Black'], + 'sex': ['Male', 'Male', 'Male', 'Male'], + 'capital_gain': [2174, 0, 0, 0], + 'capital_loss': [0, 0, 0, 0], + 'hours_per_week': [40, 40, 60, 40], + 'native_country': + ['United-States', null, 'United-States', 'United-States'] + }); + console.log('Predictions:', predictions); + + expect(predictions).toEqual([ + 0.13323983550071716, + 0.47678571939468384, + 0.818461537361145, + 0.4974619150161743, + ]); + }); + + it('predict model2', async () => { + let predictions = model2.predict({ + 'f1': [0, 0, 0, 0, 0, 0, 0, 0], + 'f2': [ + ['RED', 'BLUE'], ['GREEN'], [], ['RED', 'BLUE', 'GREEN'], ['BLUE'], [], + ['RED'], ['BLUE', 'RED'] + ], + 'f3': + [['X'], ['Y'], [], ['X', 'Y', 'Z'], ['X'], ['Z', 'Y'], ['Y'], ['Z']], + }); + console.log('Predictions:', predictions); + expect(predictions).toEqual([ + 0.4690462052822113, + 0.4563983976840973, + 0.4563983976840973, + 0.5488502383232117, + 0.4563983976840973, + 0.5943315029144287, + 0.4690462052822113, + 0.5488502383232117, + ]); + }); + + afterAll(async () => { + model1.unload(); + model2.unload(); + }); +}); diff --git a/yggdrasil_decision_forests/port/javascript/npm/test/model_1.zip b/yggdrasil_decision_forests/port/javascript/npm/test/model_1.zip new file mode 100644 index 0000000000000000000000000000000000000000..875c79f073f00f19a381c8b7bb6a8b8e691fcd63 GIT binary patch literal 7229 zcma)BWmH^Cx@~AIIKiQDXe7A11PB&9xJ%;>9h^o&kdWXIJh%jR3xtb11VTe_2oM?q zp@TN|xHE69x%aI#^QO+KbLz*bed^S=tM;n>>1Y5k$N&HU4glR;%%H}Oim3|=007|v z0Qdk9z~08s#>&?VV#n)cYw+R`07x3jWA|r;2H*kEFrxIa02umKXaEaCS2uGN^0l+0 z)!{|UM6rfcFqBrz9Z^D)&5gN&*BmCt#s0GCrCwOgP*YP^@k+3x)WsUqkdZ`}iJ3_W zL-|xim)Iq}3PGeA*!-9&ni&O*ljU%EamQ38TalC`ACAtmH*r=r-)rc*yYl;T%D>0N zM}^5^>Q|YxkFUV*!Awio=D^{+Di<^?h&vs1GI1Hz!p;Ik)^Z{rdC?Mg*DxP_%{Ir} zuzGYXb(7b|_W{Nw^3vmMaPXv8m>yN%B#TO?K+5@58IFBpRPT(b z8iI=qCCsRb8oF{$>!??)3!R*QM$i_nkFQC006K*#Nj zn)sA7bhUy&tc2=##+SJ*#J`o4=UbL8h>%WQ&@17yA6ey(=%Wo^h+K>zxoJA?FK6hv zehKR9xV9PCD;r+Vl14VYNwhB?!3arm#aJ^GLZ6N)xV;_w%`GRBg1WxNvx}a-w-3B= z`5C9Nc?Q!?F5WP4*W>A4d7-A5*l?9M?Cf=d$|FEJ=9~2jm5o- zd+$>ENE=xQev|TzCplUm(?P8ifH67~Qb|JQxti`6D8o?;)(Z>KFWU-qjfr!QX?B$- z4!(ukB)kJSH;%u?_M$1pK7`J!oU{8atg4Lpelv=AEN#sWky<-V3-3Y;A`0sSYvPyR z5<5|HD{yO;EUB>xQ~COlXY)+>!1Uqd*yA;RIF!}`SQQ~hh0gEk`}p=`40kLYyK2!b zyfj$b%G&U#BgO!yY_-go!!oCaw8p^=z0N@WAWG8(Gjn&C4_+yi?W`&%@l~kb>9()N z_6gcrYfa|9jXx2!sTkJ=)KadwRxFS-SX>pGIJH#V$!89;pDt}9#(-}5Cqqer5>QFs9~RJJ2XdpyAN2B{bxH@{^;G z8*iWym-3_gXq+~*hh~^uitD7(Lo5dUaq@0(LsT-h!(m%fntWf{Pq>|g!-Uwk2rjP(bxrM){i~cl&i0P@o05@X|9V`p{ngaU}%I z;!PC-b|Rd$nghq@fFM7?^0eFl4H;2TaBtpuaVeKyvrM6sGa8;msXdt&1Af;J42OL8 zKH?4IB618Hv=L|a03k2@sRMKOrL3Q2*zCPIOfcmuuxV_2-j9o%iuKYp^uvB-GGaZ! zRR=DdW#&fH8$R^Qw8u*x8innY(4SNHeq`i5PywrlJ@Fr1!hXP0S8mf3v-5{`58S{OqZv6%wwmn)ZHcg5dr*=c0%=R*MOpdBo$(Q9fm&W;q1kPq1dW=mvPT(}h zC%op4vU>X~=(x7X^ZWCr%daQj-w%_xKH6E#@OtrFa%G^YifVQ5vOSo zJ^n_1H0)p`tdk`IWsm4!FfmHEj8f~JL|@LWzq%J~Po0^ecr15?msVzV{djCK@>TL` z?E7ME$#XWP(xq0&{#ov=|J-hpq!uMzIf61r&k)zon8UQed4gfrQ=VvO8JS7)F!)G^u~m-_p%MN>=@cO@EwA-QG109LBcWJQe|=49{4(01 zkKBjx+Zzk>C?;M3Evo+X`kZs<}_^~0VM$cH{(FrcJ=&=9(RiH7`?v4TPT5Uf zR<0&eVJJVzcxmP__iSbR{8e=TDiyqF_+79>Hqf=Xu8#G|Lg6>%Ex|0whVY8(PJ&#X zw3?9u$yR2h6{pMEIl#o5H*Q;9Uu6Ai7<|b(4p9wlY953umt0{)y{`xq(qC7fH=`H* zbzOaWZ8e$A0!wZ$_-)yAiru9KvQxefb13k`MT`zN_GJ zA@O`%tt@Wko`4pipT;XBPkrLbVY<%yW^4R7V}`wn)l=9(J1#)EQL;=F0__RxND5@O zR?n1{sH}h=rn@g#Vpz>P+-~3jr4C$Vq&(w>O}KcPp)_A7b~iLv zT;!%YFaRX;WZC7c9@PsDZyXJi#O;cuVS-L3F3ZS7R*ZW;5gJg=t|K`kP$JO4mrfNh zO7X8{zOt2--w~bm5$$HhA{!&o#j6cmfCuW{>2HjLplv;6K5|u!n?Nc|iQPCVqXp+r zSh`%Y#}yB%#96MFlM;hA^lWCg_ubb`8t%G;V;Qi6+9{53H4`loy$SJ5^r-h&p;_tvT$f}l_vFYz2FJ^`BJ09_Wzw);CFJYv$!`wm z3US4|FhmpmsCD7_=DYhce!_SR{5bK$g%<)v??}%!-9_R0$4x}q3t2V~)37rp2j6dN zrv$&gw64Q6_Z_=<+Y@mDe)<`Q zf^AfHHL<15=EdU{8xaZ5WRYDl9ISZF$B)Z~F%8aa2Ue(yJYyX6zx0DeL(q z{*$#&uQUUU+FiIJ+D4gUD*_#>LS&NXdqx>Wwdb$F-I%;g-sjZ1@~gN6TdQBvnf=wgLqrccB_!6qC-ZeefG0&$G)9Tl3|h+ z;VvX?AP4T1zew+a{Al}Y$}5w}K=zJBo1xU2@F$kSEZ1Z;>Sr!VL8D=z(q@`OJ?At; zRWr;zuiViXap{Kd9_SM-FEO%t716}`0_$sZyjbKA={<~wvBPtrj<;Hmuih`6~mz5iU#`1;3A_+3BTuIL7PW85@A3D0f! zeMF;#R(3Pux%CMrSnw`H%ea8Z?m;%^gM&l!XbB9(96U_d7zwo-mxm2K-t{?Ul8~F+ zS%o1$KFx>ER=Tkao|i^w$hKK#$|<009oYn(eAF|uI}tt)?dO`ih2{gAi6q~uR8lFa zhDsxw>u5(V>Ln$!^D-p+W~-%TIkMduU0%)q(g0bBrSy4<#Kx)Bxr^ww`7mATRHVlD zkz{i)f2eh`+}h{O&O5VB^|p^qNsi%>)&A%??3T(j^$o-;An~0MhN$Kd1Eypg6j40V z6**Xx)%Ik9f?kwA(LX2WJ%|M6`ti(u-}S_DiaM>-e7?i6_%xfbXbt62Sl9Mj+AU5& zXTe_9`HzOaW*XxsL^Zp{-u=ZUCCBhrb0w5fKc$JxZ0Ap*ko=z1XXSYKwm!{njGB>` z{4yc$>;x)aQtKw({4y&9=<@J2(2CdOVU99F?HV;ctvWJ5jKq!X)ZcB%nsTFCu}nae`iR!C;WqhKSG^zwqQXE~+I8U(p>b`Yi1+V5( zk##0ieA(A{$`lqWMZ|SAjlKDuf72K5q&p^i$vd5qy^+%pbdfUPNpKc)Qu%h~hhWj& zb^3uK_6OZH3h*u--yuxD4N?X?;{_r%pJ#qwY0g`$+jZx7pm1es1B{(!1A_ptwHcOcIe0Q}0h@X#DwB zR5?9mIJ}m4O27^*|TEobm;ebuF!9cHkl(=zFDr#u1OJeDW$RDv+G^0W}jZ?Xr z#{l2g5)7KOjQSw0Vq)9T75IAL=B-Y|`|gSxiLYchmAyUQQ3VJCx#{N`t1q$r5f{4|HZij)pSHUmVybntee7*NjqLE6Br*2M}V z4Uh9v8KqAnj9|VLp#O}5ZNJ1bX<2ClKL_v1=+lc4@21&!_Dg1TN}4Jf7u#PS#$&mH z5TV+9CEuzCKS%9<#7ayQ*gbO{IDMnAboX6Fe1yI#-p8hJJba8nNBVStCK(hV!XZoWsgdu*GD80LcZse%(IxU0bJs)l*$5%3scY|4jj#x4H8V^2sF*P%)nR ztNCFIu^<#qW&SPQ1A9RE?j74J)$9~8)hQuAnzQ)I_<$5|Tt64yN|)+S3l(4FBT-_`MDjF{WA!;Vj-~L%OJ3btJR~rT>DpW z&P#5_#+}K!G6+|zEqBZ7w^m(YG$Er9rH2z7eHC5rDYdKf(#lOIXT?#<;D?n6-ukVt ziNBf(UYf}mw~*ckc+li-h{^qY_I1S!@epHpP=mQtGLKvo)3D&m(D1SFuVQ=w44O{9 zwZ`mna#uyBdi2g8NG!CL&7Tju7d;|6$7p-aaWqF!|A)1kt?A+D^A2y*4Z$KL(>T1P zS)vJ_JE>+8&QGdW!*kG=XdTH8YI+L%wzl(D=RmNi=kg%0hnH2RieSEsoe-;1b2IiT zooePDprCYnZu8R>WkoZZyh`j(og37FIzE0eC*?{kkP7TORkrwbL~mrmAxS1$cq`X+ zm1X3!9L!skLTIr)KK3zUj6LR2W3ayUa$^RNmOduW2ZuBI0RjN{ E9~mYYlK=n! literal 0 HcmV?d00001 diff --git a/yggdrasil_decision_forests/port/javascript/npm/test/model_2.zip b/yggdrasil_decision_forests/port/javascript/npm/test/model_2.zip new file mode 100644 index 0000000000000000000000000000000000000000..7a5f435ebcf78ac4b1e775ee7d7f5e0d977df0e6 GIT binary patch literal 1476 zcmWIWW@Zs#W?$x^!(AZe0pgUzlEnDpg4ATaf~3bm4-y2E`uo2Z zmK1$86cS6AmJslRpSee>hp#6_OQy%kC$lZeXbw++0+&*XV~dhc$LaQF(G{mooM>$h zY;bJ~e8{w)tyxrGe0A^s{oQi~xY?T-nnlgUizlq+p1>TEboB7iM^A)baXm3)taO9l-UQw3P7A*l$es4npYB^l%HQ*lA01{vvad;K%eK8-`A8T#4tsiJ9gv823w!L$Sd5D?+$*NEdM58) zQ&7wunKMm10uLCT7&uzWzj5ShZscm7`?onpkW2K9)Bf9sMbrwqs%}PS78N%8l<@TN z1C3_Xc9meh)NXLO!TZEzeVu#PuU^!@cm1;dy`!cIN0?V7f7KGGV7-#Oft@=1+zmR*!?56Upz@$PZW;8!4b^m-M1oWRD69aWuiddK=K3cXp&FfaAfwj{Q83*BoW&RdlO7-s?k^j9=t!`ucER9DC4seB5 ze(UL%by{j0@|3&UbH~gx0aL&2%Y9}rwcO``R=LZOKO5aY@%ud8=d|NgvRlGbVFxY4 z+B#cPp>>}oGDbXQo#EATex`z5y3@Cv-@CZQ#B>7Yb1V4h?7x`9Yr1i59_J00=Cj53 z4*7^UaC?jS&e)OJlia450r&sdPG#Ud%^dtDzwz*=14gaSBb-hgKE@?A zjd$hq_cxkDrRpp)^*PNa3$FUK$n$gF3T^(QI*hX;KluMLJv5a|2N;HoO!mx(l!BU( zRe<3L0tyWa8bLI2w&nuo0tN;~1__3SJ+47}HE%L9taS(l(J(&5bX>U%+4O@T(}8&o zo2f9hI`KHT-~#}TkoRo> literal 0 HcmV?d00001 From b94e0255c57e2d8bdef201db8518126a673c08de Mon Sep 17 00:00:00 2001 From: Mathieu Guillame-Bert Date: Mon, 3 Jun 2024 08:27:41 -0700 Subject: [PATCH 14/30] SKLearn to YDF model converter | Unit tests (part 1) PiperOrigin-RevId: 639792213 --- .../port/python/dev_requirements.txt | 1 + .../port/python/ydf/model/BUILD | 21 +++ .../port/python/ydf/model/export_sklearn.py | 21 +++ .../python/ydf/model/sklearn_model_test.py | 153 ++++++++++++++++++ 4 files changed, 196 insertions(+) create mode 100644 yggdrasil_decision_forests/port/python/ydf/model/export_sklearn.py create mode 100644 yggdrasil_decision_forests/port/python/ydf/model/sklearn_model_test.py diff --git a/yggdrasil_decision_forests/port/python/dev_requirements.txt b/yggdrasil_decision_forests/port/python/dev_requirements.txt index 5642d727..d9d73891 100644 --- a/yggdrasil_decision_forests/port/python/dev_requirements.txt +++ b/yggdrasil_decision_forests/port/python/dev_requirements.txt @@ -3,6 +3,7 @@ tensorflow_decision_forests; platform_machine != 'aarch64' and python_version >= tensorflow; platform_machine != 'aarch64' portpicker matplotlib +scikit-learn jax; platform_machine != 'aarch64' and platform_system != 'Windows' jaxlib; platform_machine != 'aarch64' and platform_system != 'Windows' optax; platform_machine != 'aarch64' and platform_system != 'Windows' and python_version >= '3.9' diff --git a/yggdrasil_decision_forests/port/python/ydf/model/BUILD b/yggdrasil_decision_forests/port/python/ydf/model/BUILD index 37ff4baa..b5d0804c 100644 --- a/yggdrasil_decision_forests/port/python/ydf/model/BUILD +++ b/yggdrasil_decision_forests/port/python/ydf/model/BUILD @@ -136,6 +136,14 @@ py_library( ], ) +# Note: This build rule does not depends on SKLearn. To use its functionalities, SKLearn needs to be +# imported manually by the call i.e. //third_party/py/sklearn. +py_library( + name = "export_sklearn", + srcs = ["export_sklearn.py"], + deps = [":generic_model"], +) + py_library( name = "model_lib", srcs = ["model_lib.py"], @@ -351,6 +359,19 @@ py_test( ], ) +py_test( + name = "sklearn_model_test", + srcs = ["sklearn_model_test.py"], + python_version = "PY3", + deps = [ + ":export_sklearn", + # absl/testing:absltest dep, + # absl/testing:parameterized dep, + # numpy dep, + # sklearn dep, + ], +) + py_test( name = "benchmark_test", srcs = ["benchmark_test.py"], diff --git a/yggdrasil_decision_forests/port/python/ydf/model/export_sklearn.py b/yggdrasil_decision_forests/port/python/ydf/model/export_sklearn.py new file mode 100644 index 00000000..2bf82a7f --- /dev/null +++ b/yggdrasil_decision_forests/port/python/ydf/model/export_sklearn.py @@ -0,0 +1,21 @@ +# Copyright 2022 Google LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Import and export sklearn models.""" + +from ydf.model import generic_model + + +def from_sklearn(sklearn_model) -> generic_model.GenericModel: + raise NotImplementedError("from_sklearn not implemented") diff --git a/yggdrasil_decision_forests/port/python/ydf/model/sklearn_model_test.py b/yggdrasil_decision_forests/port/python/ydf/model/sklearn_model_test.py new file mode 100644 index 00000000..9678092e --- /dev/null +++ b/yggdrasil_decision_forests/port/python/ydf/model/sklearn_model_test.py @@ -0,0 +1,153 @@ +# Copyright 2022 Google LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from absl.testing import absltest +from absl.testing import parameterized +import numpy as np +from sklearn import datasets +from sklearn import ensemble +from sklearn import linear_model +from sklearn import tree +from ydf.model import export_sklearn + + +class ScikitLearnModelConverterTest(parameterized.TestCase): + + @parameterized.parameters( + (tree.DecisionTreeRegressor(random_state=42),), + (tree.ExtraTreeRegressor(random_state=42),), + (ensemble.RandomForestRegressor(random_state=42),), + (ensemble.ExtraTreesRegressor(random_state=42),), + ( + ensemble.GradientBoostingRegressor( + random_state=42, + ), + ), + (ensemble.GradientBoostingRegressor(random_state=42, init="zero"),), + ( + ensemble.GradientBoostingRegressor( + random_state=42, + init=tree.DecisionTreeRegressor(random_state=42), + ), + ), + ) + def DISABLED_test_import_regression_model( + self, + sklearn_model, + ): + features, labels = datasets.make_regression( + n_samples=100, + n_features=10, + random_state=42, + ) + sklearn_model.fit(features, labels) + sklearn_predictions = sklearn_model.predict(features).astype(np.float32) + + ydf_model = export_sklearn.from_sklearn(sklearn_model) + ydf_predictions = ydf_model.predict(features) + + np.testing.assert_allclose(sklearn_predictions, ydf_predictions, rtol=1e-4) + + @parameterized.parameters( + (tree.DecisionTreeClassifier(random_state=42),), + (tree.ExtraTreeClassifier(random_state=42),), + (ensemble.RandomForestClassifier(random_state=42),), + (ensemble.ExtraTreesClassifier(random_state=42),), + ) + def DISABLED_test_import_classification_model( + self, + sklearn_model, + ): + features, labels = datasets.make_classification( + n_samples=100, + n_features=10, + n_classes=4, + n_clusters_per_class=1, + random_state=42, + ) + sklearn_model.fit(features, labels) + sklearn_predictions = sklearn_model.predict_proba(features).astype( + np.float32 + ) + + ydf_model = export_sklearn.from_sklearn(sklearn_model) + ydf_predictions = ydf_model.predict(features) + np.testing.assert_allclose(sklearn_predictions, ydf_predictions, rtol=1e-5) + + def DISABLED_test_import_raises_when_unrecognised_model_provided(self): + features, labels = datasets.make_regression( + n_samples=100, + n_features=10, + random_state=42, + ) + sklearn_model = linear_model.LinearRegression().fit(features, labels) + with self.assertRaises(NotImplementedError): + export_sklearn.from_sklearn(sklearn_model) + + def DISABLED_test_import_raises_when_sklearn_model_is_not_fit(self): + with self.assertRaises( + ValueError, + msg="Scikit-learn model must be fit to data before converting to TF.", + ): + _ = export_sklearn.from_sklearn(tree.DecisionTreeRegressor()) + + def DISABLED_test_import_raises_when_regression_target_is_multivariate(self): + features, labels = datasets.make_regression( + n_samples=100, + n_features=10, + # This produces a two-dimensional target variable. + n_targets=2, + random_state=42, + ) + sklearn_model = tree.DecisionTreeRegressor().fit(features, labels) + with self.assertRaisesRegex( + ValueError, + "Only scalar regression and single-label classification are supported.", + ): + _ = export_sklearn.from_sklearn(sklearn_model) + + def DISABLED_test_import_raises_when_classification_target_is_multilabel( + self, + ): + features, labels = datasets.make_multilabel_classification( + n_samples=100, + n_features=10, + # This assigns two class labels per example. + n_labels=2, + random_state=42, + ) + sklearn_model = tree.DecisionTreeClassifier().fit(features, labels) + with self.assertRaisesRegex( + ValueError, + "Only scalar regression and single-label classification are supported.", + ): + _ = export_sklearn.from_sklearn(sklearn_model) + + def DISABLED_test_convert_raises_when_gbt_initial_estimator_is_not_tree_or_constant( + self, + ): + features, labels = datasets.make_regression( + n_samples=100, + n_features=10, + random_state=42, + ) + init_estimator = linear_model.LinearRegression() + sklearn_model = ensemble.GradientBoostingRegressor(init=init_estimator) + sklearn_model.fit(features, labels) + with self.assertRaises(ValueError): + _ = export_sklearn.from_sklearn(sklearn_model) + + +if __name__ == "__main__": + absltest.main() From 9ecb005a7d61bc44b28a1023e795747d318f756d Mon Sep 17 00:00:00 2001 From: Mathieu Guillame-Bert Date: Mon, 3 Jun 2024 08:32:50 -0700 Subject: [PATCH 15/30] SKLearn to YDF model converter | Converter (part 2) PiperOrigin-RevId: 639794115 --- .../port/python/ydf/model/BUILD | 11 +- .../port/python/ydf/model/export_sklearn.py | 379 +++++++++++++++++- .../python/ydf/model/sklearn_model_test.py | 41 +- 3 files changed, 416 insertions(+), 15 deletions(-) diff --git a/yggdrasil_decision_forests/port/python/ydf/model/BUILD b/yggdrasil_decision_forests/port/python/ydf/model/BUILD index b5d0804c..0881c414 100644 --- a/yggdrasil_decision_forests/port/python/ydf/model/BUILD +++ b/yggdrasil_decision_forests/port/python/ydf/model/BUILD @@ -141,7 +141,15 @@ py_library( py_library( name = "export_sklearn", srcs = ["export_sklearn.py"], - deps = [":generic_model"], + deps = [ + ":generic_model", + # numpy dep, + "//ydf/learner:generic_learner", + "//ydf/learner:specialized_learners", + "//ydf/model/gradient_boosted_trees_model", + "//ydf/model/random_forest_model", + "//ydf/model/tree:all", + ], ) py_library( @@ -369,6 +377,7 @@ py_test( # absl/testing:parameterized dep, # numpy dep, # sklearn dep, + "//ydf/model/decision_forest_model", ], ) diff --git a/yggdrasil_decision_forests/port/python/ydf/model/export_sklearn.py b/yggdrasil_decision_forests/port/python/ydf/model/export_sklearn.py index 2bf82a7f..a1055592 100644 --- a/yggdrasil_decision_forests/port/python/ydf/model/export_sklearn.py +++ b/yggdrasil_decision_forests/port/python/ydf/model/export_sklearn.py @@ -12,10 +12,383 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Import and export sklearn models.""" +"""Import and export Scikit-Learn models from/to YDF.""" +import enum +import functools +from typing import Any, Dict, List, Optional, TypeVar, Union + +import numpy as np + +from ydf.learner import generic_learner +from ydf.learner import specialized_learners from ydf.model import generic_model +from ydf.model import tree as tree_lib +from ydf.model.gradient_boosted_trees_model import gradient_boosted_trees_model +from ydf.model.random_forest_model import random_forest_model + +# pytype: disable=import-error +# pylint: disable=g-import-not-at-top +try: + from sklearn import base + from sklearn import dummy + from sklearn import ensemble + from sklearn import tree +except ImportError as exc: + raise ImportError("Cannot import sklearn") from exc +# pylint: enable=g-import-not-at-top +# pytype: enable=import-error + + +# The column idx=0 is reserved for the label in YDF models. +_LABEL_COLUMN_OFFSET = 1 + +# Name of the label/feature columns +_LABEL_KEY = "label" +_FEATURES_KEY = "features" + + +class TaskType(enum.Enum): + """The type of task that a scikit-learn model performs.""" + + UNKNOWN = 1 + SCALAR_REGRESSION = 2 + SINGLE_LABEL_CLASSIFICATION = 3 + + +ScikitLearnModel = TypeVar("ScikitLearnModel", bound=base.BaseEstimator) +ScikitLearnTree = TypeVar("ScikitLearnTree", bound=tree.BaseDecisionTree) + + +def from_sklearn(sklearn_model: ScikitLearnModel) -> generic_model.GenericModel: + """Converts a tree-based scikit-learn model to a YDF model. + + Usage example: + + ```python + import ydf + from sklearn import datasets + from sklearn import tree + + # Train a SKLearn model + X, y = datasets.make_classification() + skl_model = tree.DecisionTreeClassifier().fit(X, y) + + # Convert the SKLearn model to a YDF model + ydf_model = ydf.from_sklearn(skl_model) + + # Make predictions with the YDF model + ydf_predictions = ydf_model.predict(X) + + # Analyse the YDF model + ydf_model.analyze(X) + ``` + + Currently supported models are: + * sklearn.tree.DecisionTreeClassifier + * sklearn.tree.DecisionTreeRegressor + * sklearn.tree.ExtraTreeClassifier + * sklearn.tree.ExtraTreeRegressor + * sklearn.ensemble.RandomForestClassifier + * sklearn.ensemble.RandomForestRegressor + * sklearn.ensemble.ExtraTreesClassifier + * sklearn.ensemble.ExtraTreesRegressor + * sklearn.ensemble.GradientBoostingRegressor + + Additionally, only single-label classification and scalar regression are + supported (e.g. multivariate regression models will not convert). + + Args: + sklearn_model: the scikit-learn tree based model to be converted. + + Returns: + a YDF Model that emulates the provided scikit-learn model. + """ + + if not hasattr(sklearn_model, "n_features_in_"): + raise ValueError( + "Scikit-Learn model must be fit to data before converting." + ) + return _sklearn_to_ydf_model(sklearn_model) + + +def _gen_fake_features(num_features: int, num_examples: int = 2): + return np.zeros(shape=[num_examples, num_features]) + + +@functools.singledispatch +def _sklearn_to_ydf_model( + sklearn_model: ScikitLearnModel, +) -> generic_model.GenericModel: + """Builds a YDF model from the given scikit-learn model.""" + raise NotImplementedError( + f"Can't build a YDF model for {type(sklearn_model)}" + ) + + +@_sklearn_to_ydf_model.register(tree.DecisionTreeRegressor) +@_sklearn_to_ydf_model.register(tree.ExtraTreeRegressor) +def _(sklearn_model: ScikitLearnTree) -> generic_model.GenericModel: + """Converts a single scikit-learn regression tree to a YDF model.""" + ydf_model = specialized_learners.RandomForestLearner( + label=_LABEL_KEY, + task=generic_learner.Task.REGRESSION, + num_trees=0, + ).train( + { + _LABEL_KEY: [0.0, 1.0], + _FEATURES_KEY: _gen_fake_features(sklearn_model.n_features_in_), + }, + verbose=0, + ) + assert isinstance(ydf_model, random_forest_model.RandomForestModel) + ydf_tree = convert_sklearn_tree_to_ydf_tree(sklearn_model) + ydf_model.add_tree(ydf_tree) + return ydf_model + + +@_sklearn_to_ydf_model.register(tree.DecisionTreeClassifier) +@_sklearn_to_ydf_model.register(tree.ExtraTreeClassifier) +def _(sklearn_model: ScikitLearnTree) -> generic_model.GenericModel: + """Converts a single scikit-learn classification tree to a YDF model.""" + ydf_model = specialized_learners.RandomForestLearner( + label=_LABEL_KEY, + task=generic_learner.Task.CLASSIFICATION, + num_trees=0, + ).train( + { + _LABEL_KEY: [str(c) for c in sklearn_model.classes_], + _FEATURES_KEY: _gen_fake_features( + sklearn_model.n_features_in_, len(sklearn_model.classes_) + ), + }, + verbose=0, + ) + assert isinstance(ydf_model, random_forest_model.RandomForestModel) + ydf_tree = convert_sklearn_tree_to_ydf_tree(sklearn_model) + ydf_model.add_tree(ydf_tree) + return ydf_model + + +@_sklearn_to_ydf_model.register(ensemble.ExtraTreesRegressor) +@_sklearn_to_ydf_model.register(ensemble.RandomForestRegressor) +def _( + sklearn_model: Union[ + ensemble.ExtraTreesRegressor, ensemble.RandomForestRegressor + ], +) -> generic_model.GenericModel: + """Converts a forest regression model into a YDF model.""" + + ydf_model = specialized_learners.RandomForestLearner( + label=_LABEL_KEY, + task=generic_learner.Task.REGRESSION, + num_trees=0, + ).train( + { + _LABEL_KEY: [0.0, 1.0], + _FEATURES_KEY: _gen_fake_features(sklearn_model.n_features_in_), + }, + verbose=0, + ) + assert isinstance(ydf_model, random_forest_model.RandomForestModel) + for sklearn_tree in sklearn_model.estimators_: + ydf_tree = convert_sklearn_tree_to_ydf_tree(sklearn_tree) + ydf_model.add_tree(ydf_tree) + return ydf_model + + +@_sklearn_to_ydf_model.register(ensemble.ExtraTreesClassifier) +@_sklearn_to_ydf_model.register(ensemble.RandomForestClassifier) +def _( + sklearn_model: Union[ + ensemble.ExtraTreesClassifier, ensemble.RandomForestClassifier + ], +) -> generic_model.GenericModel: + """Converts a forest classification model into a YDF model.""" + + ydf_model = specialized_learners.RandomForestLearner( + label=_LABEL_KEY, + task=generic_learner.Task.CLASSIFICATION, + num_trees=0, + ).train( + { + _LABEL_KEY: [str(c) for c in sklearn_model.classes_], + _FEATURES_KEY: _gen_fake_features( + sklearn_model.n_features_in_, len(sklearn_model.classes_) + ), + }, + verbose=0, + ) + assert isinstance(ydf_model, random_forest_model.RandomForestModel) + for sklearn_tree in sklearn_model.estimators_: + ydf_tree = convert_sklearn_tree_to_ydf_tree(sklearn_tree) + ydf_model.add_tree(ydf_tree) + return ydf_model + + +@_sklearn_to_ydf_model.register(ensemble.GradientBoostingRegressor) +def _( + sklearn_model: ensemble.GradientBoostingRegressor, +) -> generic_model.GenericModel: + """Converts a gradient boosting regression model into a YDF model.""" + + if isinstance(sklearn_model.init_, dummy.DummyRegressor): + # If the initial estimator is a DummyRegressor, then it predicts a constant + # which can be passed to GradientBoostedTreeBuilder as a bias. + init_pytree = None + bias = sklearn_model.init_.constant_[0][0] + elif isinstance(sklearn_model.init_, tree.DecisionTreeRegressor): + # If the initial estimator is a DecisionTreeRegressor, we add it as the + # first tree in the ensemble and set the bias to zero. We could also support + # other tree-based initial estimators (e.g. RandomForest), but this seems + # like a niche enough use case that we don't for the moment. + init_pytree = convert_sklearn_tree_to_ydf_tree(sklearn_model.init_) + bias = 0.0 + elif sklearn_model.init_ == "zero": + init_pytree = None + bias = 0.0 + else: + raise ValueError( + "The initial estimator must be either a DummyRegressor" + "or a DecisionTreeRegressor, but got" + f"{type(sklearn_model.init_)}." + ) + + ydf_model = specialized_learners.GradientBoostedTreesLearner( + label=_LABEL_KEY, + task=generic_learner.Task.REGRESSION, + num_trees=0, + ).train( + { + _LABEL_KEY: [0.0, 1.0], + _FEATURES_KEY: _gen_fake_features(sklearn_model.n_features_in_), + }, + verbose=0, + ) + assert isinstance( + ydf_model, gradient_boosted_trees_model.GradientBoostedTreesModel + ) + + ydf_model.set_initial_predictions([bias]) + + if init_pytree: + ydf_model.add_tree(init_pytree) + + for weak_learner in sklearn_model.estimators_.ravel(): + ydf_tree = convert_sklearn_tree_to_ydf_tree( + weak_learner, weight=sklearn_model.learning_rate + ) + ydf_model.add_tree(ydf_tree) + return ydf_model + + +def convert_sklearn_tree_to_ydf_tree( + sklearn_tree: ScikitLearnTree, + weight: Optional[float] = None, +) -> tree_lib.Tree: + """Converts a scikit-learn decision tree into a YDF tree. + + Args: + sklearn_tree: a scikit-learn decision tree. + weight: an optional weight to apply to the values of the leaves in the tree. + This is intended for use when converting gradient boosted tree models. + + Returns: + a YDF tree that has the same structure as the scikit-learn tree. + """ + try: + sklearn_tree_data = sklearn_tree.tree_.__getstate__() + except AttributeError as e: + raise ValueError( + "Scikit-Learn model must be fit to data before converting." + ) from e + + field_names = sklearn_tree_data["nodes"].dtype.names + task_type = _get_sklearn_tree_task_type(sklearn_tree) + if weight and task_type is TaskType.SINGLE_LABEL_CLASSIFICATION: + raise ValueError("weight should not be passed for classification trees.") + + nodes = [] + # For each node + for node_properties, node_output in zip( + sklearn_tree_data["nodes"], + sklearn_tree_data["values"], + ): + # Dictionary of node properties (e.g. "left_child", "threshold") except for + # the node output value. + node = { + field_name: field_value + for field_name, field_value in zip(field_names, node_properties) + } + + # Add the node output value to the dictionary of properties. + if task_type is TaskType.SCALAR_REGRESSION: + scaling_factor = weight if weight else 1.0 + node["value"] = tree_lib.RegressionValue( + value=node_output[0][0] * scaling_factor, + num_examples=node["weighted_n_node_samples"], + ) + elif task_type is TaskType.SINGLE_LABEL_CLASSIFICATION: + # Normalise to probabilities if we have a classification tree. + probabilities = list(node_output[0] / node_output[0].sum()) + node["value"] = tree_lib.ProbabilityValue( + probability=probabilities, + num_examples=node["weighted_n_node_samples"], + ) + else: + raise ValueError( + "Only scalar regression and single-label classification are " + "supported." + ) + nodes.append(node) + + root_node = _convert_sklearn_node_to_ydf_node( + # The root node has index zero. + node_index=0, + nodes=nodes, + ) + return tree_lib.Tree(root_node) + + +def _get_sklearn_tree_task_type(sklearn_tree: ScikitLearnTree) -> TaskType: + """Finds the task type of a scikit learn tree.""" + if hasattr(sklearn_tree, "n_classes_") and sklearn_tree.n_outputs_ == 1: + return TaskType.SINGLE_LABEL_CLASSIFICATION + elif sklearn_tree.n_outputs_ == 1: + return TaskType.SCALAR_REGRESSION + else: + return TaskType.UNKNOWN + + +def _convert_sklearn_node_to_ydf_node( + node_index: int, + nodes: List[Dict[str, Any]], +) -> tree_lib.AbstractNode: + """Converts a node within a scikit-learn tree into a YDF node.""" + if node_index == -1: + raise ValueError("Bad node idx") + + node = nodes[node_index] + is_leaf = node["left_child"] == -1 + if is_leaf: + return tree_lib.Leaf(value=node["value"]) -def from_sklearn(sklearn_model) -> generic_model.GenericModel: - raise NotImplementedError("from_sklearn not implemented") + neg_child = _convert_sklearn_node_to_ydf_node( + node_index=node["left_child"], + nodes=nodes, + ) + pos_child = _convert_sklearn_node_to_ydf_node( + node_index=node["right_child"], + nodes=nodes, + ) + return tree_lib.NonLeaf( + condition=tree_lib.NumericalHigherThanCondition( + attribute=node["feature"] + _LABEL_COLUMN_OFFSET, + threshold=node["threshold"], + missing=False, + score=0.0, + ), + pos_child=pos_child, + neg_child=neg_child, + ) diff --git a/yggdrasil_decision_forests/port/python/ydf/model/sklearn_model_test.py b/yggdrasil_decision_forests/port/python/ydf/model/sklearn_model_test.py index 9678092e..d0a7941f 100644 --- a/yggdrasil_decision_forests/port/python/ydf/model/sklearn_model_test.py +++ b/yggdrasil_decision_forests/port/python/ydf/model/sklearn_model_test.py @@ -20,6 +20,7 @@ from sklearn import linear_model from sklearn import tree from ydf.model import export_sklearn +from ydf.model.decision_forest_model import decision_forest_model class ScikitLearnModelConverterTest(parameterized.TestCase): @@ -42,7 +43,7 @@ class ScikitLearnModelConverterTest(parameterized.TestCase): ), ), ) - def DISABLED_test_import_regression_model( + def test_import_regression_model( self, sklearn_model, ): @@ -55,8 +56,25 @@ def DISABLED_test_import_regression_model( sklearn_predictions = sklearn_model.predict(features).astype(np.float32) ydf_model = export_sklearn.from_sklearn(sklearn_model) - ydf_predictions = ydf_model.predict(features) + assert isinstance(ydf_model, decision_forest_model.DecisionForestModel) + self.assertSequenceEqual( + ydf_model.input_feature_names(), + [ + "features.00_of_10", + "features.01_of_10", + "features.02_of_10", + "features.03_of_10", + "features.04_of_10", + "features.05_of_10", + "features.06_of_10", + "features.07_of_10", + "features.08_of_10", + "features.09_of_10", + ], + ) + self.assertEqual(ydf_model.label(), "label") + ydf_predictions = ydf_model.predict({"features": features}) np.testing.assert_allclose(sklearn_predictions, ydf_predictions, rtol=1e-4) @parameterized.parameters( @@ -65,7 +83,7 @@ def DISABLED_test_import_regression_model( (ensemble.RandomForestClassifier(random_state=42),), (ensemble.ExtraTreesClassifier(random_state=42),), ) - def DISABLED_test_import_classification_model( + def test_import_classification_model( self, sklearn_model, ): @@ -82,10 +100,11 @@ def DISABLED_test_import_classification_model( ) ydf_model = export_sklearn.from_sklearn(sklearn_model) - ydf_predictions = ydf_model.predict(features) + ydf_features = {"features": features} + ydf_predictions = ydf_model.predict(ydf_features) np.testing.assert_allclose(sklearn_predictions, ydf_predictions, rtol=1e-5) - def DISABLED_test_import_raises_when_unrecognised_model_provided(self): + def test_import_raises_when_unrecognised_model_provided(self): features, labels = datasets.make_regression( n_samples=100, n_features=10, @@ -95,14 +114,14 @@ def DISABLED_test_import_raises_when_unrecognised_model_provided(self): with self.assertRaises(NotImplementedError): export_sklearn.from_sklearn(sklearn_model) - def DISABLED_test_import_raises_when_sklearn_model_is_not_fit(self): - with self.assertRaises( + def test_import_raises_when_sklearn_model_is_not_fit(self): + with self.assertRaisesRegex( ValueError, - msg="Scikit-learn model must be fit to data before converting to TF.", + "Scikit-Learn model must be fit to data before converting", ): _ = export_sklearn.from_sklearn(tree.DecisionTreeRegressor()) - def DISABLED_test_import_raises_when_regression_target_is_multivariate(self): + def test_import_raises_when_regression_target_is_multivariate(self): features, labels = datasets.make_regression( n_samples=100, n_features=10, @@ -117,7 +136,7 @@ def DISABLED_test_import_raises_when_regression_target_is_multivariate(self): ): _ = export_sklearn.from_sklearn(sklearn_model) - def DISABLED_test_import_raises_when_classification_target_is_multilabel( + def test_import_raises_when_classification_target_is_multilabel( self, ): features, labels = datasets.make_multilabel_classification( @@ -134,7 +153,7 @@ def DISABLED_test_import_raises_when_classification_target_is_multilabel( ): _ = export_sklearn.from_sklearn(sklearn_model) - def DISABLED_test_convert_raises_when_gbt_initial_estimator_is_not_tree_or_constant( + def test_convert_raises_when_gbt_initial_estimator_is_not_tree_or_constant( self, ): features, labels = datasets.make_regression( From 394c82b901573ef4681aa1966c4fbf6c8f44db1f Mon Sep 17 00:00:00 2001 From: Mathieu Guillame-Bert Date: Fri, 7 Jun 2024 02:00:33 -0700 Subject: [PATCH 16/30] Surface the SKLearn converter in the API PiperOrigin-RevId: 641176849 --- documentation/public/docs/py_api/index.md | 2 + documentation/public/docs/py_api/utilities.md | 2 + .../port/python/CHANGELOG.md | 1 + .../port/python/ydf/BUILD | 2 + .../port/python/ydf/__init__.py | 2 +- .../port/python/ydf/api_test.py | 8 +++ .../port/python/ydf/model/export_sklearn.py | 45 +------------- .../port/python/ydf/model/generic_model.py | 60 +++++++++++++++++++ 8 files changed, 77 insertions(+), 45 deletions(-) diff --git a/documentation/public/docs/py_api/index.md b/documentation/public/docs/py_api/index.md index 9a5282dd..f81bf83c 100644 --- a/documentation/public/docs/py_api/index.md +++ b/documentation/public/docs/py_api/index.md @@ -65,6 +65,8 @@ and evaluation. e.g. training date, uid. - [from_tensorflow_decision_forests](utilities.md#ydf.from_tensorflow_decision_forests): Load a TensorFlow Decision Forests model from disk. +- [from_sklearn](utilities.md#ydf.from_sklearn): Convert a scikit-learn model + into a YDF model. ## Custom Loss diff --git a/documentation/public/docs/py_api/utilities.md b/documentation/public/docs/py_api/utilities.md index ece421e7..6657d28c 100644 --- a/documentation/public/docs/py_api/utilities.md +++ b/documentation/public/docs/py_api/utilities.md @@ -26,6 +26,8 @@ ::: ydf.from_tensorflow_decision_forests +::: ydf.from_sklearn + ::: ydf.RegressionLoss ::: ydf.BinaryClassificationLoss diff --git a/yggdrasil_decision_forests/port/python/CHANGELOG.md b/yggdrasil_decision_forests/port/python/CHANGELOG.md index 42b6785b..5339e1e0 100644 --- a/yggdrasil_decision_forests/port/python/CHANGELOG.md +++ b/yggdrasil_decision_forests/port/python/CHANGELOG.md @@ -7,6 +7,7 @@ - Add `max_depth` argument to `model.print_tree`. - Add `verbose` argument to `train` method which is equivalent but sometime more convenient than`ydf.verbose`. +- Add SKLearn to YDF model converter: `ydf.from_sklearn`. ### Fix diff --git a/yggdrasil_decision_forests/port/python/ydf/BUILD b/yggdrasil_decision_forests/port/python/ydf/BUILD index aea54afe..1c910007 100644 --- a/yggdrasil_decision_forests/port/python/ydf/BUILD +++ b/yggdrasil_decision_forests/port/python/ydf/BUILD @@ -22,6 +22,7 @@ py_library( "//ydf/learner:tuner", "//ydf/learner:worker", "//ydf/model:export_jax", # buildcleaner: keep + "//ydf/model:export_sklearn", # buildcleaner: keep "//ydf/model:export_tf", # buildcleaner: keep "//ydf/model:generic_model", "//ydf/model:model_lib", @@ -98,6 +99,7 @@ py_test( # absl/testing:absltest dep, # jax dep, # buildcleaner: keep # pandas dep, + # sklearn dep, # buildcleaner: keep # tensorflow dep, # buildcleaner: keep # tensorflow_decision_forests dep, # buildcleaner: keep "//ydf/utils:test_utils", diff --git a/yggdrasil_decision_forests/port/python/ydf/__init__.py b/yggdrasil_decision_forests/port/python/ydf/__init__.py index 412c5c74..d87a56b3 100644 --- a/yggdrasil_decision_forests/port/python/ydf/__init__.py +++ b/yggdrasil_decision_forests/port/python/ydf/__init__.py @@ -74,7 +74,7 @@ def _check_install(): from ydf.dataset.dataset import create_vertical_dataset from ydf.model.model_metadata import ModelMetadata from ydf.model.model_lib import from_tensorflow_decision_forests - +from ydf.model.generic_model import from_sklearn # Custom Loss from ydf.learner.custom_loss import RegressionLoss diff --git a/yggdrasil_decision_forests/port/python/ydf/api_test.py b/yggdrasil_decision_forests/port/python/ydf/api_test.py index 30deab15..44024e53 100644 --- a/yggdrasil_decision_forests/port/python/ydf/api_test.py +++ b/yggdrasil_decision_forests/port/python/ydf/api_test.py @@ -21,6 +21,8 @@ from absl import logging from absl.testing import absltest import pandas as pd +from sklearn import ensemble as skl_ensemble +import sklearn.datasets import ydf # In the world, use "import ydf" from ydf.utils import test_utils @@ -224,6 +226,12 @@ def test_export_jax_function(self): model = ydf.load_model(model_path) _ = model.to_jax_function() + def test_import_sklearn_model(self): + X, y = sklearn.datasets.make_classification() + skl_model = skl_ensemble.RandomForestClassifier().fit(X, y) + ydf_model = ydf.from_sklearn(skl_model) + _ = ydf_model.predict({"features": X}) + if __name__ == "__main__": absltest.main() diff --git a/yggdrasil_decision_forests/port/python/ydf/model/export_sklearn.py b/yggdrasil_decision_forests/port/python/ydf/model/export_sklearn.py index a1055592..3d6b6f39 100644 --- a/yggdrasil_decision_forests/port/python/ydf/model/export_sklearn.py +++ b/yggdrasil_decision_forests/port/python/ydf/model/export_sklearn.py @@ -61,50 +61,7 @@ class TaskType(enum.Enum): def from_sklearn(sklearn_model: ScikitLearnModel) -> generic_model.GenericModel: - """Converts a tree-based scikit-learn model to a YDF model. - - Usage example: - - ```python - import ydf - from sklearn import datasets - from sklearn import tree - - # Train a SKLearn model - X, y = datasets.make_classification() - skl_model = tree.DecisionTreeClassifier().fit(X, y) - - # Convert the SKLearn model to a YDF model - ydf_model = ydf.from_sklearn(skl_model) - - # Make predictions with the YDF model - ydf_predictions = ydf_model.predict(X) - - # Analyse the YDF model - ydf_model.analyze(X) - ``` - - Currently supported models are: - * sklearn.tree.DecisionTreeClassifier - * sklearn.tree.DecisionTreeRegressor - * sklearn.tree.ExtraTreeClassifier - * sklearn.tree.ExtraTreeRegressor - * sklearn.ensemble.RandomForestClassifier - * sklearn.ensemble.RandomForestRegressor - * sklearn.ensemble.ExtraTreesClassifier - * sklearn.ensemble.ExtraTreesRegressor - * sklearn.ensemble.GradientBoostingRegressor - - Additionally, only single-label classification and scalar regression are - supported (e.g. multivariate regression models will not convert). - - Args: - sklearn_model: the scikit-learn tree based model to be converted. - - Returns: - a YDF Model that emulates the provided scikit-learn model. - """ - + """Converts a tree-based scikit-learn model to a YDF model.""" if not hasattr(sklearn_model, "n_features_in_"): raise ValueError( "Scikit-Learn model must be fit to data before converting." diff --git a/yggdrasil_decision_forests/port/python/ydf/model/generic_model.py b/yggdrasil_decision_forests/port/python/ydf/model/generic_model.py index e6bad13f..6baf0fb1 100644 --- a/yggdrasil_decision_forests/port/python/ydf/model/generic_model.py +++ b/yggdrasil_decision_forests/port/python/ydf/model/generic_model.py @@ -1089,6 +1089,53 @@ def force_engine(self, engine_name: Optional[str]) -> None: self._model.ForceEngine(engine_name) +def from_sklearn(sklearn_model: Any) -> GenericModel: + """Converts a tree-based scikit-learn model to a YDF model. + + Usage example: + + ```python + import ydf + from sklearn import datasets + from sklearn import tree + + # Train a SKLearn model + X, y = datasets.make_classification() + skl_model = tree.DecisionTreeClassifier().fit(X, y) + + # Convert the SKLearn model to a YDF model + ydf_model = ydf.from_sklearn(skl_model) + + # Make predictions with the YDF model + ydf_predictions = ydf_model.predict({"features": X}) + + # Analyse the YDF model + ydf_model.analyze({"features": X}) + ``` + + Currently supported models are: + * sklearn.tree.DecisionTreeClassifier + * sklearn.tree.DecisionTreeRegressor + * sklearn.tree.ExtraTreeClassifier + * sklearn.tree.ExtraTreeRegressor + * sklearn.ensemble.RandomForestClassifier + * sklearn.ensemble.RandomForestRegressor + * sklearn.ensemble.ExtraTreesClassifier + * sklearn.ensemble.ExtraTreesRegressor + * sklearn.ensemble.GradientBoostingRegressor + + Additionally, only single-label classification and scalar regression are + supported (e.g. multivariate regression models will not convert). + + Args: + sklearn_model: the scikit-learn tree based model to be converted. + + Returns: + a YDF Model that emulates the provided scikit-learn model. + """ + return _get_export_sklearn().from_sklearn(sklearn_model) + + def _get_export_jax(): try: from ydf.model import export_jax # pylint: disable=g-import-not-at-top,import-outside-toplevel # pytype: disable=import-error @@ -1114,4 +1161,17 @@ def _get_export_tf(): ) from exc +def _get_export_sklearn(): + try: + from ydf.model import export_sklearn # pylint: disable=g-import-not-at-top,import-outside-toplevel # pytype: disable=import-error + + return export_sklearn + except ImportError as exc: + raise ValueError( + '"scikit-learn" is needed by this function. Make sure ' + "it installed and try again. If using pip, run `pip install" + " scikit-learn`." + ) from exc + + ModelType = TypeVar("ModelType", bound=GenericModel) From 33c15add96dfe66b8f845b1c71f29ae922e1ac3d Mon Sep 17 00:00:00 2001 From: Mathieu Guillame-Bert Date: Thu, 13 Jun 2024 03:19:13 -0700 Subject: [PATCH 17/30] Anomaly detection; Create the ANOMALY_DETECTION task (part 1) PiperOrigin-RevId: 642918719 --- yggdrasil_decision_forests/learner/BUILD | 1 + .../learner/abstract_learner.cc | 50 ++++++++++----- .../learner/abstract_learner_test.cc | 64 +++++++++++++++---- yggdrasil_decision_forests/metric/metric.cc | 30 +++++++++ .../metric/metric.proto | 13 +++- .../metric/metric_test.cc | 50 +++++++++++++++ yggdrasil_decision_forests/metric/report.cc | 9 +++ yggdrasil_decision_forests/metric/report.h | 2 + .../metric/report_test.cc | 44 +++++++++++++ .../model/abstract_model.cc | 49 +++++++++++++- .../model/abstract_model.proto | 6 ++ .../model/abstract_model_test.cc | 29 +++++++++ .../model/decision_tree/decision_tree.cc | 27 ++++---- .../model/prediction.proto | 8 ++- .../python/ydf/learner/generic_learner.py | 13 +++- .../specialized_learners_pre_generated.py | 3 +- .../port/python/ydf/model/generic_model.py | 6 ++ .../utils/model_analysis.cc | 9 ++- .../utils/test_utils.cc | 14 ++++ 19 files changed, 374 insertions(+), 53 deletions(-) diff --git a/yggdrasil_decision_forests/learner/BUILD b/yggdrasil_decision_forests/learner/BUILD index b4749b67..15d94263 100644 --- a/yggdrasil_decision_forests/learner/BUILD +++ b/yggdrasil_decision_forests/learner/BUILD @@ -154,6 +154,7 @@ cc_test( "//yggdrasil_decision_forests/model:model_testing", "//yggdrasil_decision_forests/model:prediction_cc_proto", "//yggdrasil_decision_forests/utils:logging", + "//yggdrasil_decision_forests/utils:status_macros", "//yggdrasil_decision_forests/utils:test", "@com_google_absl//absl/memory", "@com_google_absl//absl/status", diff --git a/yggdrasil_decision_forests/learner/abstract_learner.cc b/yggdrasil_decision_forests/learner/abstract_learner.cc index 8a4f7b50..cee772f1 100644 --- a/yggdrasil_decision_forests/learner/abstract_learner.cc +++ b/yggdrasil_decision_forests/learner/abstract_learner.cc @@ -67,23 +67,27 @@ absl::Status AbstractLearner::LinkTrainingConfig( const dataset::proto::DataSpecification& data_spec, proto::TrainingConfigLinking* config_link) { // Label. - int32_t label; - if (!training_config.has_label()) { - STATUS_FATAL("No label specified in the training config. Aborting."); + int32_t label = -1; + // Anomaly detection is the only task that can have or not have labels. + if (training_config.task() != proto::ANOMALY_DETECTION && + !training_config.has_label()) { + STATUS_FATAL("No label specified in the training config."); + } + if (training_config.has_label()) { + RETURN_IF_ERROR(dataset::GetSingleColumnIdxFromName( + training_config.label(), data_spec, &label, + "Retrieving label column failed.")); + config_link->set_num_label_classes( + data_spec.columns(label).categorical().number_of_unique_values()); } - RETURN_IF_ERROR(dataset::GetSingleColumnIdxFromName( - training_config.label(), data_spec, &label, - "Retrieving label column failed. ")); config_link->set_label(label); - config_link->set_num_label_classes( - data_spec.columns(label).categorical().number_of_unique_values()); // CV group. int32_t cv_group = -1; if (training_config.has_cv_group()) { RETURN_IF_ERROR(dataset::GetSingleColumnIdxFromName( training_config.cv_group(), data_spec, &cv_group, - "Retrieving cross-validation group column failed. ")); + "Retrieving cross-validation group column failed.")); } config_link->set_cv_group(cv_group); @@ -475,6 +479,15 @@ absl::Status AbstractLearner::CheckConfiguration( const model::proto::TrainingConfig& config, const model::proto::TrainingConfigLinking& config_link, const model::proto::DeploymentConfig& deployment) { + if (deployment.num_threads() < 0) { + return absl::InvalidArgumentError("The number of threads should be >= 0"); + } + + if (config.task() == model::proto::Task::ANOMALY_DETECTION) { + // Note: ANOMALY_DETECTION is the only task that does not need a label. + return absl::OkStatus(); + } + const auto& label_col_spec = data_spec.columns(config_link.label()); // Check the type of the label column. switch (config.task()) { @@ -487,7 +500,8 @@ absl::Status AbstractLearner::CheckConfiguration( return absl::InvalidArgumentError(absl::StrCat( "The label column \"", config.label(), "\" should be CATEGORICAL for a CLASSIFICATION " - "Task. Note: BOOLEAN columns should be set as CATEGORICAL using a " + "Task. Note: BOOLEAN columns should be set as CATEGORICAL using " + "a " "dataspec guide, even for a binary classification task.")); } break; @@ -535,7 +549,8 @@ absl::Status AbstractLearner::CheckConfiguration( return absl::InvalidArgumentError( "The \"ranking_group\" column must have a " "\"max_number_of_unique_values\" " - "of -1 in the dataspec guide. This ensures that rare groups are " + "of -1 in the dataspec guide. This ensures that rare groups " + "are " "not pruned."); } } @@ -543,7 +558,8 @@ absl::Status AbstractLearner::CheckConfiguration( case model::proto::Task::CATEGORICAL_UPLIFT: { if (label_col_spec.type() != dataset::proto::ColumnType::CATEGORICAL) { return absl::InvalidArgumentError( - "The label column should be CATEGORICAL for an CATEGORICAL_UPLIFT " + "The label column should be CATEGORICAL for an " + "CATEGORICAL_UPLIFT " "task."); } if (!config_link.has_uplift_treatment() || @@ -592,6 +608,8 @@ absl::Status AbstractLearner::CheckConfiguration( "Uplift only supports binary treatments."); } } break; + case model::proto::Task::ANOMALY_DETECTION: + return absl::InternalError("ANOMALY_DETECTION has no labels"); } // Check the label don't contains NaN. if (label_col_spec.count_nas() != 0) { @@ -600,9 +618,7 @@ absl::Status AbstractLearner::CheckConfiguration( "missing values. $1 missing values are found.", config.label(), label_col_spec.count_nas())); } - if (deployment.num_threads() < 0) { - return absl::InvalidArgumentError("The number of threads should be >= 0"); - } + return absl::OkStatus(); } @@ -818,7 +834,9 @@ void InitializeModelWithAbstractTrainingConfig( const proto::TrainingConfig& training_config, const proto::TrainingConfigLinking& training_config_linking, AbstractModel* model) { - model->set_label_col_idx(training_config_linking.label()); + if (training_config.task() != proto::Task::ANOMALY_DETECTION) { + model->set_label_col_idx(training_config_linking.label()); + } if (training_config.task() == proto::Task::RANKING) { model->set_ranking_group_col(training_config_linking.ranking_group()); diff --git a/yggdrasil_decision_forests/learner/abstract_learner_test.cc b/yggdrasil_decision_forests/learner/abstract_learner_test.cc index a01fc566..b7a20faa 100644 --- a/yggdrasil_decision_forests/learner/abstract_learner_test.cc +++ b/yggdrasil_decision_forests/learner/abstract_learner_test.cc @@ -32,6 +32,7 @@ #include "yggdrasil_decision_forests/model/model_testing.h" #include "yggdrasil_decision_forests/model/prediction.pb.h" #include "yggdrasil_decision_forests/utils/logging.h" +#include "yggdrasil_decision_forests/utils/status_macros.h" #include "yggdrasil_decision_forests/utils/test.h" #include "yggdrasil_decision_forests/learner/abstract_learner.h" @@ -57,8 +58,8 @@ TEST(AbstractModel, LinkTrainingConfig) { data_spec.add_columns()->set_name("D"); proto::TrainingConfigLinking config_link; - CHECK_OK(AbstractLearner::LinkTrainingConfig(training_config, data_spec, - &config_link)); + ASSERT_OK(AbstractLearner::LinkTrainingConfig(training_config, data_spec, + &config_link)); EXPECT_EQ(config_link.label(), 0); EXPECT_THAT(config_link.features(), ElementsAre(1, 3)); @@ -76,8 +77,8 @@ TEST(AbstractModel, LinkTrainingConfigNoInputFeatures) { data_spec.add_columns()->set_name("D"); proto::TrainingConfigLinking config_link; - CHECK_OK(AbstractLearner::LinkTrainingConfig(training_config, data_spec, - &config_link)); + ASSERT_OK(AbstractLearner::LinkTrainingConfig(training_config, data_spec, + &config_link)); EXPECT_EQ(config_link.label(), 0); EXPECT_THAT(config_link.features(), ElementsAre(1, 2, 3)); @@ -100,8 +101,8 @@ TEST(AbstractModel, LinkTrainingConfigFullyMissingFeatures) { std::numeric_limits::quiet_NaN()); proto::TrainingConfigLinking config_link; - CHECK_OK(AbstractLearner::LinkTrainingConfig(training_config, data_spec, - &config_link)); + ASSERT_OK(AbstractLearner::LinkTrainingConfig(training_config, data_spec, + &config_link)); EXPECT_EQ(config_link.label(), 0); EXPECT_THAT(config_link.features(), ElementsAre(3)); @@ -120,7 +121,44 @@ TEST(AbstractModel, LinkTrainingConfigMissingLabel) { EXPECT_THAT(AbstractLearner::LinkTrainingConfig(training_config, data_spec, &config_link), StatusIs(absl::StatusCode::kInvalidArgument, - "No label specified in the training config. Aborting.")); + "No label specified in the training config.")); +} + +TEST(AbstractModel, LinkTrainingConfigAnomalyDetection) { + proto::TrainingConfig training_config; + training_config.set_task(proto::Task::ANOMALY_DETECTION); + training_config.add_features(".*"); + + dataset::proto::DataSpecification data_spec; + data_spec.add_columns()->set_name("A"); + data_spec.add_columns()->set_name("B"); + data_spec.add_columns()->set_name("C"); + + proto::TrainingConfigLinking config_link; + ASSERT_OK(AbstractLearner::LinkTrainingConfig(training_config, data_spec, + &config_link)); + + EXPECT_EQ(config_link.label(), -1); + EXPECT_THAT(config_link.features(), ElementsAre(0, 1, 2)); +} + +TEST(AbstractModel, LinkTrainingConfigAnomalyDetectionWithLabel) { + proto::TrainingConfig training_config; + training_config.set_task(proto::Task::ANOMALY_DETECTION); + training_config.set_label("A"); + training_config.add_features(".*"); + + dataset::proto::DataSpecification data_spec; + data_spec.add_columns()->set_name("A"); + data_spec.add_columns()->set_name("B"); + data_spec.add_columns()->set_name("C"); + + proto::TrainingConfigLinking config_link; + ASSERT_OK(AbstractLearner::LinkTrainingConfig(training_config, data_spec, + &config_link)); + + EXPECT_EQ(config_link.label(), 0); + EXPECT_THAT(config_link.features(), ElementsAre(1, 2)); } TEST(AbstractLearner, GenericHyperParameters) { @@ -218,7 +256,7 @@ TEST(AbstractLearner, EvaluateLearner) { valid_dataset = {}) const override { auto model = absl::make_unique(); model::proto::TrainingConfigLinking config_link; - CHECK_OK(AbstractLearner::LinkTrainingConfig( + RETURN_IF_ERROR(AbstractLearner::LinkTrainingConfig( training_config(), train_dataset.data_spec(), &config_link)); InitializeModelWithAbstractTrainingConfig(training_config(), config_link, model.get()); @@ -246,12 +284,12 @@ TEST(AbstractLearner, EvaluateLearner) { dataset::VerticalDataset dataset; dataset.set_data_spec(data_spec); - CHECK_OK(dataset.CreateColumnsFromDataspec()); + ASSERT_OK(dataset.CreateColumnsFromDataspec()); for (int i = 0; i < 1000; i++) { - CHECK_OK(dataset.AppendExampleWithStatus({{"a", "1"}})); - CHECK_OK(dataset.AppendExampleWithStatus({{"a", "2"}})); - CHECK_OK(dataset.AppendExampleWithStatus({{"a", "1"}})); - CHECK_OK(dataset.AppendExampleWithStatus({{"a", "2"}})); + ASSERT_OK(dataset.AppendExampleWithStatus({{"a", "1"}})); + ASSERT_OK(dataset.AppendExampleWithStatus({{"a", "2"}})); + ASSERT_OK(dataset.AppendExampleWithStatus({{"a", "1"}})); + ASSERT_OK(dataset.AppendExampleWithStatus({{"a", "2"}})); } const metric::proto::EvaluationOptions evaluation_options = diff --git a/yggdrasil_decision_forests/metric/metric.cc b/yggdrasil_decision_forests/metric/metric.cc index bb0b2f25..879106f4 100644 --- a/yggdrasil_decision_forests/metric/metric.cc +++ b/yggdrasil_decision_forests/metric/metric.cc @@ -395,6 +395,12 @@ void MergeEvaluationUplift(const proto::EvaluationResults::Uplift& src, std::max(dst->num_treatments(), src.num_treatments())); } +void MergeEvaluationAnomalyDetection( + const proto::EvaluationResults::AnomalyDetection& src, + proto::EvaluationResults::AnomalyDetection* dst) { + // No merging to be done. +} + } // namespace float PValueMeanIsGreaterThanZero(const std::vector& sample) { @@ -835,6 +841,9 @@ absl::Status InitializeEvaluation(const proto::EvaluationOptions& option, RETURN_IF_ERROR(uplift::InitializeNumericalUpliftEvaluation( option, label_column, eval)); break; + case model::proto::Task::ANOMALY_DETECTION: + eval->mutable_anomaly_detection(); + break; default: STATUS_FATALS("Non supported task type: ", model::proto::Task_Name(option.task())); @@ -917,6 +926,9 @@ absl::Status AddPrediction(const proto::EvaluationOptions& option, need_prediction_sampling = true; break; + case model::proto::Task::ANOMALY_DETECTION: + break; + default: break; } @@ -970,6 +982,10 @@ absl::Status FinalizeEvaluation(const proto::EvaluationOptions& option, RETURN_IF_ERROR(uplift::FinalizeUpliftMetricsFromSampledPredictions( option, label_column, eval)); break; + + case model::proto::Task::ANOMALY_DETECTION: + break; + default: break; } @@ -1296,6 +1312,10 @@ absl::Status MergeEvaluation(const proto::EvaluationOptions& option, case proto::EvaluationResults::kUplift: MergeEvaluationUplift(src.uplift(), dst->mutable_uplift()); break; + case proto::EvaluationResults::kAnomalyDetection: + MergeEvaluationAnomalyDetection(src.anomaly_detection(), + dst->mutable_anomaly_detection()); + break; case proto::EvaluationResults::TYPE_NOT_SET: return absl::InvalidArgumentError("Non initialized evaluation"); break; @@ -1658,6 +1678,11 @@ absl::StatusOr GetMetricUplift( return absl::InvalidArgumentError("Not implemented"); } } +absl::StatusOr GetMetricAnomalyDetection( + const proto::EvaluationResults& evaluation, + const proto::MetricAccessor::AnomalyDetection& metric) { + return absl::InvalidArgumentError("No AnomalyDetection metric"); +} absl::StatusOr GetUserCustomizedMetrics( const proto::EvaluationResults& evaluation, @@ -1714,6 +1739,11 @@ absl::StatusOr GetMetric(const proto::EvaluationResults& evaluation, return GetMetricFatalMissing("uplift", evaluation, metric); } return GetMetricUplift(evaluation, metric.uplift()); + case proto::MetricAccessor::kAnomalyDetection: + if (!evaluation.has_anomaly_detection()) { + return GetMetricFatalMissing("anomaly_detection", evaluation, metric); + } + return GetMetricAnomalyDetection(evaluation, metric.anomaly_detection()); case proto::MetricAccessor::kUserMetric: return GetUserCustomizedMetrics(evaluation, metric.user_metric()); case proto::MetricAccessor::TASK_NOT_SET: diff --git a/yggdrasil_decision_forests/metric/metric.proto b/yggdrasil_decision_forests/metric/metric.proto index 14c98e3c..b78d4d73 100644 --- a/yggdrasil_decision_forests/metric/metric.proto +++ b/yggdrasil_decision_forests/metric/metric.proto @@ -68,6 +68,8 @@ message EvaluationOptions { message Uplift {} + message AnomalyDetection {} + // Task of the model. optional model.proto.Task task = 1 [default = CLASSIFICATION]; // Evaluation configuration depending on the type of problem. @@ -76,6 +78,7 @@ message EvaluationOptions { Regression regression = 3; Ranking ranking = 7; Uplift uplift = 8; + AnomalyDetection anomaly_detection = 9; } // Percentage of sampled predictions. If no predictions need to be sampled // (i.e. no part of the configuration needs it), this parameter is ignored and @@ -97,7 +100,7 @@ message EvaluationOptions { // however. optional dataset.proto.WeightDefinition weights = 6; - // Next ID: 8 + // Next ID: 10 } // Evaluation results of a model. @@ -204,6 +207,8 @@ message EvaluationResults { optional double cate_calibration = 4; } + message AnomalyDetection {} + // Number of predictions (weighted by example weight). optional double count_predictions = 1 [default = 0]; // Number of predictions (without weights). @@ -220,6 +225,7 @@ message EvaluationResults { Regression regression = 7; Ranking ranking = 12; Uplift uplift = 14; + AnomalyDetection anomaly_detection = 16; } // The dataspec of the label column. This field can contain information such // as: The possible label values, the distribution of the label values, the @@ -245,7 +251,7 @@ message EvaluationResults { // User can use this field to store value for any customized metrics. map user_metrics = 15; - // Next ID: 16 + // Next ID: 17 } // Reference a metric."MetricAccessor" is used as a parameter of the function @@ -262,6 +268,7 @@ message MetricAccessor { Loss loss = 3; Ranking ranking = 4; Uplift uplift = 5; + AnomalyDetection anomaly_detection = 7; UserMetric user_metric = 6; } @@ -335,6 +342,8 @@ message MetricAccessor { message CateCalibration {} } + message AnomalyDetection {} + message UserMetric { optional string metrics_name = 1; } diff --git a/yggdrasil_decision_forests/metric/metric_test.cc b/yggdrasil_decision_forests/metric/metric_test.cc index 8797b32c..aad1db67 100644 --- a/yggdrasil_decision_forests/metric/metric_test.cc +++ b/yggdrasil_decision_forests/metric/metric_test.cc @@ -809,6 +809,20 @@ TEST(Metric, GetMetricRegression) { EXPECT_NEAR(mae, MAE(results_regression), 0.0001); } +TEST(Metric, GetMetricAnomalyDetection) { + const proto::EvaluationResults results = PARSE_TEST_PROTO(R"pb( + task: ANOMALY_DETECTION + label_column { type: CATEGORICAL } + anomaly_detection {} + count_predictions: 10 + )pb"); + + EXPECT_THAT( + GetMetric(results, PARSE_TEST_PROTO(R"pb(anomaly_detection {})pb")) + .status(), + StatusIs(absl::StatusCode::kInvalidArgument)); +} + TEST(Metric, GetMetricClassification) { const proto::EvaluationResults results_classification = PARSE_TEST_PROTO(R"pb( task: CLASSIFICATION @@ -1700,6 +1714,42 @@ TEST(Metric, MergeEvaluationUplifting) { EXPECT_THAT(dst, EqualsProto(expected_dst)); } +TEST(Metric, MergeEvaluationAnomalyDetection) { + const proto::EvaluationResults src = PARSE_TEST_PROTO( + R"pb( + count_predictions: 1 + count_predictions_no_weight: 2 + sampled_predictions { example_key: "a" } + count_sampled_predictions: 3 + training_duration_in_seconds: 4 + num_folds: 5 + anomaly_detection {} + )pb"); + proto::EvaluationResults dst = PARSE_TEST_PROTO( + R"pb( + count_predictions: 10 + count_predictions_no_weight: 20 + sampled_predictions { example_key: "b" } + count_sampled_predictions: 30 + training_duration_in_seconds: 40 + num_folds: 50 + anomaly_detection {} + )pb"); + EXPECT_OK(MergeEvaluation({}, src, &dst)); + proto::EvaluationResults expected_dst = PARSE_TEST_PROTO( + R"pb( + count_predictions: 11 + count_predictions_no_weight: 22 + sampled_predictions { example_key: "b" } + sampled_predictions { example_key: "a" } + count_sampled_predictions: 33 + training_duration_in_seconds: 44 + num_folds: 55 + anomaly_detection {} + )pb"); + EXPECT_THAT(dst, EqualsProto(expected_dst)); +} + TEST(Metric, HigherIsBetter) { { const proto::MetricAccessor accessor = PARSE_TEST_PROTO( diff --git a/yggdrasil_decision_forests/metric/report.cc b/yggdrasil_decision_forests/metric/report.cc index d0159775..22f4c764 100644 --- a/yggdrasil_decision_forests/metric/report.cc +++ b/yggdrasil_decision_forests/metric/report.cc @@ -382,6 +382,9 @@ absl::Status AppendTextReportWithStatus(const proto::EvaluationResults& eval, case proto::EvaluationResults::TypeCase::kUplift: RETURN_IF_ERROR(AppendTextReportUplift(eval, report)); break; + case proto::EvaluationResults::TypeCase::kAnomalyDetection: + RETURN_IF_ERROR(AppendTextReportAnomalyDetection(eval, report)); + break; default: STATUS_FATAL("This model does not support evaluation reports."); } @@ -576,6 +579,12 @@ absl::Status AppendTextReportUplift(const proto::EvaluationResults& eval, return absl::OkStatus(); } +absl::Status AppendTextReportAnomalyDetection( + const proto::EvaluationResults& eval, std::string* report) { + absl::StrAppend(report, "No report for anomaly detection\n"); + return absl::OkStatus(); +} + absl::Status AppendHtmlReport(const proto::EvaluationResults& eval, std::string* html_report, const HtmlReportOptions& options) { diff --git a/yggdrasil_decision_forests/metric/report.h b/yggdrasil_decision_forests/metric/report.h index 25f398d7..3040e903 100644 --- a/yggdrasil_decision_forests/metric/report.h +++ b/yggdrasil_decision_forests/metric/report.h @@ -43,6 +43,8 @@ absl::Status AppendTextReportRanking(const proto::EvaluationResults& eval, std::string* report); absl::Status AppendTextReportUplift(const proto::EvaluationResults& eval, std::string* report); +absl::Status AppendTextReportAnomalyDetection( + const proto::EvaluationResults& eval, std::string* report); // Add the report in a html format. struct HtmlReportOptions { diff --git a/yggdrasil_decision_forests/metric/report_test.cc b/yggdrasil_decision_forests/metric/report_test.cc index 11d79d0c..3e3e784a 100644 --- a/yggdrasil_decision_forests/metric/report_test.cc +++ b/yggdrasil_decision_forests/metric/report_test.cc @@ -140,6 +140,50 @@ TEST(Report, HtmlReportRegression) { CHECK_OK(file::SetContent(path, html_report)); } +TEST(Report, HtmlReportAnomalyDetection) { + // Create a fake column specification. + dataset::proto::Column label_column; + label_column.set_type(dataset::proto::ColumnType::CATEGORICAL); + label_column.set_name("label"); + label_column.mutable_categorical()->set_number_of_unique_values(3); + label_column.mutable_categorical()->set_most_frequent_value(1); + label_column.mutable_categorical()->set_is_already_integerized(false); + auto& vocab = *label_column.mutable_categorical()->mutable_items(); + vocab["a"].set_index(0); + vocab["b"].set_index(1); + vocab["c"].set_index(2); + + // Configure the evaluation. + utils::RandomEngine rnd; + proto::EvaluationOptions option; + option.set_task(model::proto::Task::ANOMALY_DETECTION); + + // Initialize. + proto::EvaluationResults eval; + ASSERT_OK(InitializeEvaluation(option, label_column, &eval)); + model::proto::Prediction pred; + auto* pred_proba = pred.mutable_classification()->mutable_distribution(); + pred_proba->mutable_counts()->Resize(3, 0); + pred_proba->set_sum(1); + + // Add some predictions. + pred.mutable_anomaly_detection()->set_value(0.5); + ASSERT_OK(AddPrediction(option, pred, &rnd, &eval)); + + pred.mutable_anomaly_detection()->set_value(0.1); + ASSERT_OK(AddPrediction(option, pred, &rnd, &eval)); + + // Finalize. + ASSERT_OK(FinalizeEvaluation(option, label_column, &eval)); + + std::string html_report; + ASSERT_OK(AppendHtmlReport(eval, &html_report)); + + const auto path = + file::JoinPath(test::TmpDirectory(), "report_anomaly_detection.html"); + YDF_LOG(INFO) << "path: " << path; + ASSERT_OK(file::SetContent(path, html_report)); +} } // namespace } // namespace metric } // namespace yggdrasil_decision_forests diff --git a/yggdrasil_decision_forests/model/abstract_model.cc b/yggdrasil_decision_forests/model/abstract_model.cc index 42e8295b..919c5bb6 100644 --- a/yggdrasil_decision_forests/model/abstract_model.cc +++ b/yggdrasil_decision_forests/model/abstract_model.cc @@ -131,6 +131,14 @@ AbstractModel::EvaluateWithStatus( if (option.task() != task()) { STATUS_FATAL("The evaluation and the model tasks differ."); } + if (label_col_idx_ == -1) { + if (task() == proto::Task::ANOMALY_DETECTION) { + STATUS_FATAL( + "Cannot evaluate an anomaly detection model without a label."); + } else { + STATUS_FATAL("A model cannot be evaluated without a label."); + } + } metric::proto::EvaluationResults eval; RETURN_IF_ERROR( metric::InitializeEvaluation(option, LabelColumnSpec(), &eval)); @@ -147,6 +155,14 @@ AbstractModel::EvaluateWithEngine( if (option.task() != task()) { STATUS_FATAL("The evaluation and the model tasks differ."); } + if (label_col_idx_ == -1) { + if (task() == proto::Task::ANOMALY_DETECTION) { + STATUS_FATAL( + "Cannot evaluate an anomaly detection model without a label."); + } else { + STATUS_FATAL("A model cannot be evaluated without a label."); + } + } metric::proto::EvaluationResults eval; RETURN_IF_ERROR( metric::InitializeEvaluation(option, LabelColumnSpec(), &eval)); @@ -313,6 +329,12 @@ void FloatToProtoPrediction(const std::vector& src_prediction, src_prediction.begin() + (example_idx + 1) * num_prediction_dimensions}; break; + + case proto::ANOMALY_DETECTION: + DCHECK_EQ(num_prediction_dimensions, 1); + dst_prediction->mutable_anomaly_detection()->set_value( + src_prediction[example_idx]); + break; } } @@ -660,6 +682,9 @@ absl::Status SetGroundTruth(const dataset::VerticalDataset& dataset, ->values(); prediction->mutable_uplift()->set_treatment(treatments[row_idx]); } break; + case proto::Task::ANOMALY_DETECTION: + // No ground truth to set. + break; default: STATUS_FATAL("Non supported task."); @@ -707,6 +732,10 @@ absl::Status SetGroundTruth(const dataset::proto::Example& example, break; } } break; + case proto::Task::ANOMALY_DETECTION: + // No ground truth to set. + break; + default: STATUS_FATAL("Non supported task."); break; @@ -725,8 +754,10 @@ void AbstractModel::AppendDescriptionAndStatistics( const bool full_definition, std::string* description) const { absl::StrAppendFormat(description, "Type: \"%s\"\n", name()); absl::StrAppendFormat(description, "Task: %s\n", proto::Task_Name(task())); - absl::StrAppendFormat(description, "Label: \"%s\"\n", - data_spec().columns(label_col_idx_).name()); + if (label_col_idx_ != -1) { + absl::StrAppendFormat(description, "Label: \"%s\"\n", + data_spec().columns(label_col_idx_).name()); + } if (ranking_group_col_idx_ != -1) { absl::StrAppendFormat(description, "Rank group: \"%s\"\n", data_spec().columns(ranking_group_col_idx_).name()); @@ -1041,6 +1072,11 @@ void PredictionMerger::Add(const proto::Prediction& src, dst_->mutable_ranking()->set_relevance( dst_->ranking().relevance() + src_factor * src.ranking().relevance()); break; + case proto::Prediction::kAnomalyDetection: + dst_->mutable_anomaly_detection()->set_value( + dst_->anomaly_detection().value() + + src_factor * src.anomaly_detection().value()); + break; default: CHECK(false); } @@ -1066,6 +1102,10 @@ void PredictionMerger::ScalePrediction(const float scale, case proto::Prediction::kRanking: dst->mutable_ranking()->set_relevance(dst->ranking().relevance() * scale); break; + case proto::Prediction::kAnomalyDetection: + dst->mutable_anomaly_detection()->set_value( + dst->anomaly_detection().value() * scale); + break; default: break; } @@ -1092,7 +1132,7 @@ void AbstractModel::CopyAbstractModelMetaData(AbstractModel* dst) const { } absl::Status AbstractModel::Validate() const { - if (label_col_idx_ < 0 || label_col_idx_ >= data_spec().columns_size()) { + if (label_col_idx_ < -1 || label_col_idx_ >= data_spec().columns_size()) { return absl::InvalidArgumentError("Invalid label column"); } @@ -1147,6 +1187,9 @@ absl::Status AbstractModel::Validate() const { dataset::proto::ColumnType_Name(label_col_spec().type()))); } break; + case model::proto::Task::ANOMALY_DETECTION: + // Nothing to check + break; default: return absl::InvalidArgumentError("Unknown task"); } diff --git a/yggdrasil_decision_forests/model/abstract_model.proto b/yggdrasil_decision_forests/model/abstract_model.proto index b50c47ef..f4a660f4 100644 --- a/yggdrasil_decision_forests/model/abstract_model.proto +++ b/yggdrasil_decision_forests/model/abstract_model.proto @@ -39,6 +39,12 @@ enum Task { // Predicts the incremental impact of a treatment on a numerical outcome. // See https://en.wikipedia.org/wiki/Uplift_modelling. NUMERICAL_UPLIFT = 5; + + // Predicts if an instance is similar to the majority of the training data or + // anomalous (a.k.a. an outlier). An anomaly detection prediction is a value + // between 0 and 1, where 0 indicates the possible most normal instance and 1 + // indicates the most possible anomalous instance. + ANOMALY_DETECTION = 6; } // Contains the same information as a model::AbstractModel (without the diff --git a/yggdrasil_decision_forests/model/abstract_model_test.cc b/yggdrasil_decision_forests/model/abstract_model_test.cc index d9620bf0..8b534145 100644 --- a/yggdrasil_decision_forests/model/abstract_model_test.cc +++ b/yggdrasil_decision_forests/model/abstract_model_test.cc @@ -254,6 +254,28 @@ TEST(AbstractLearner, MergeAddPredictionsClassification) { .value())); } +TEST(AbstractLearner, MergeAddPredictionsAnomalyDetection) { + proto::Prediction src = + PARSE_TEST_PROTO(R"pb(anomaly_detection { value: 1 })pb"); + proto::Prediction dst; + PredictionMerger merger(&dst); + + merger.Add(src, 0.25f); + EXPECT_THAT(dst, EqualsProto(utils::ParseTextProto( + "anomaly_detection {value:0.25 }") + .value())); + + merger.Add(src, 0.25f); + EXPECT_THAT(dst, EqualsProto(utils::ParseTextProto( + "anomaly_detection { value: 0.5 }") + .value())); + + merger.Add(src, 0.50f); + EXPECT_THAT(dst, EqualsProto(utils::ParseTextProto( + "anomaly_detection { value: 1.0 }") + .value())); +} + TEST(AbstractModel, BuildFastEngine) { FakeModelWithoutEngine model_without_engine; EXPECT_THAT(model_without_engine.BuildFastEngine().status(), @@ -395,6 +417,13 @@ TEST(FloatToProtoPrediction, Base) { EXPECT_THAT(prediction, EqualsProto(utils::ParseTextProto( R"(uplift { treatment_effect: 0.4 })") .value())); + + FloatToProtoPrediction({0.2, 0.4}, /*example_idx=*/0, + proto::Task::ANOMALY_DETECTION, + /*num_prediction_dimensions=*/1, &prediction); + EXPECT_THAT(prediction, EqualsProto(utils::ParseTextProto( + R"(anomaly_detection { value: 0.2 })") + .value())); } TEST(Evaluate, FromVerticalDataset) { diff --git a/yggdrasil_decision_forests/model/decision_tree/decision_tree.cc b/yggdrasil_decision_forests/model/decision_tree/decision_tree.cc index 78d908af..ec7d87dc 100644 --- a/yggdrasil_decision_forests/model/decision_tree/decision_tree.cc +++ b/yggdrasil_decision_forests/model/decision_tree/decision_tree.cc @@ -1200,19 +1200,20 @@ void AppendModelStructureHeader( const DecisionForest& trees, const dataset::proto::DataSpecification& data_spec, const int label_col_idx, std::string* description) { - const auto& label_col_spec = data_spec.columns(label_col_idx); - - // Print the label values. - if (label_col_spec.type() == dataset::proto::CATEGORICAL && - !label_col_spec.categorical().is_already_integerized()) { - absl::StrAppend(description, "Label values:\n"); - for (int value = 1; - value < label_col_spec.categorical().number_of_unique_values(); - value++) { - absl::StrAppend( - description, "\t", - dataset::CategoricalIdxToRepresentation(label_col_spec, value, true), - "\n"); + if (label_col_idx != -1) { + const auto& label_col_spec = data_spec.columns(label_col_idx); + // Print the label values. + if (label_col_spec.type() == dataset::proto::CATEGORICAL && + !label_col_spec.categorical().is_already_integerized()) { + absl::StrAppend(description, "Label values:\n"); + for (int value = 1; + value < label_col_spec.categorical().number_of_unique_values(); + value++) { + absl::StrAppend(description, "\t", + dataset::CategoricalIdxToRepresentation(label_col_spec, + value, true), + "\n"); + } } } diff --git a/yggdrasil_decision_forests/model/prediction.proto b/yggdrasil_decision_forests/model/prediction.proto index dce365d6..9cfa5b3f 100644 --- a/yggdrasil_decision_forests/model/prediction.proto +++ b/yggdrasil_decision_forests/model/prediction.proto @@ -71,16 +71,22 @@ message Prediction { } } + message AnomalyDetection { + // Value between 0 (normal) and 1 (anomaly). + optional float value = 1; + } + oneof type { Classification classification = 1; Regression regression = 2; Ranking ranking = 5; Uplift uplift = 6; + AnomalyDetection anomaly_detection = 7; } optional float weight = 3 [default = 1]; // Identifier about the example. optional string example_key = 4; - // Next ID: 6 + // Next ID: 8 } diff --git a/yggdrasil_decision_forests/port/python/ydf/learner/generic_learner.py b/yggdrasil_decision_forests/port/python/ydf/learner/generic_learner.py index 37da48e0..4687390a 100644 --- a/yggdrasil_decision_forests/port/python/ydf/learner/generic_learner.py +++ b/yggdrasil_decision_forests/port/python/ydf/learner/generic_learner.py @@ -77,7 +77,7 @@ def __init__( self._deployment_config = deployment_config self._tuner = tuner - if not self._label: + if task != Task.ANOMALY_DETECTION and not self._label: raise ValueError("Constructing the learner requires a non-empty label.") if self._ranking_group is not None and task != Task.RANKING: raise ValueError( @@ -276,6 +276,7 @@ def _get_training_config(self) -> abstract_learner_pb2.TrainingConfig: # Apply monotonic constraints. if self._data_spec_args.columns: for feature in self._data_spec_args.columns: + assert feature is not None if not feature.normalized_monotonic: continue @@ -457,7 +458,7 @@ def _build_data_spec_args(self) -> dataspec.DataSpecInferenceArgs: column are specified as features. """ - def create_label_column(name: str, task: Task) -> dataspec.Column: + def create_label_column(name: str, task: Task) -> Optional[dataspec.Column]: if task in [Task.CLASSIFICATION, Task.CATEGORICAL_UPLIFT]: return dataspec.Column( name=name, @@ -467,6 +468,9 @@ def create_label_column(name: str, task: Task) -> dataspec.Column: ) elif task in [Task.REGRESSION, Task.RANKING, Task.NUMERICAL_UPLIFT]: return dataspec.Column(name=name, semantic=dataspec.Semantic.NUMERICAL) + elif task in [Task.ANOMALY_DETECTION]: + # No label column + return None else: raise ValueError(f"Unsupported task {task.name} for label column") @@ -485,7 +489,10 @@ def create_label_column(name: str, task: Task) -> dataspec.Column: f"Label column {self._label} is also an input feature. A column" " cannot be both a label and input feature." ) - column_defs.append(create_label_column(self._label, self._task)) + if ( + label_column := create_label_column(self._label, self._task) + ) is not None: + column_defs.append(label_column) if self._weights is not None: if dataspec.column_defs_contains_column(self._weights, column_defs): raise ValueError( diff --git a/yggdrasil_decision_forests/port/python/ydf/learner/specialized_learners_pre_generated.py b/yggdrasil_decision_forests/port/python/ydf/learner/specialized_learners_pre_generated.py index 0b19e100..d4708a2c 100644 --- a/yggdrasil_decision_forests/port/python/ydf/learner/specialized_learners_pre_generated.py +++ b/yggdrasil_decision_forests/port/python/ydf/learner/specialized_learners_pre_generated.py @@ -83,7 +83,8 @@ class RandomForestLearner(generic_learner.GenericLearner): label: Label of the dataset. The label column should not be identified as a feature in the `features` parameter. task: Task to solve (e.g. Task.CLASSIFICATION, Task.REGRESSION, - Task.RANKING, Task.CATEGORICAL_UPLIFT, Task.NUMERICAL_UPLIFT). + Task.RANKING, Task.CATEGORICAL_UPLIFT, Task.NUMERICAL_UPLIFT, + Task.ANOMALY_DETECTION). weights: Name of a feature that identifies the weight of each example. If weights are not specified, unit weights are assumed. The weight column should not be identified as a feature in the `features` parameter. diff --git a/yggdrasil_decision_forests/port/python/ydf/model/generic_model.py b/yggdrasil_decision_forests/port/python/ydf/model/generic_model.py index 6baf0fb1..a231aca1 100644 --- a/yggdrasil_decision_forests/port/python/ydf/model/generic_model.py +++ b/yggdrasil_decision_forests/port/python/ydf/model/generic_model.py @@ -64,6 +64,10 @@ class Task(enum.Enum): categorical outcome. NUMERICAL_UPLIFT: Predicts the incremental impact of a treatment on a numerical outcome. + ANOMALY_DETECTION: Predicts if an instance is similar to the majority of the + training data or anomalous (a.k.a. an outlier). An anomaly detection + prediction is a value between 0 and 1, where 0 indicates the possible most + normal instance and 1 indicates the most possible anomalous instance. """ CLASSIFICATION = "CLASSIFICATION" @@ -71,6 +75,7 @@ class Task(enum.Enum): RANKING = "RANKING" CATEGORICAL_UPLIFT = "CATEGORICAL_UPLIFT" NUMERICAL_UPLIFT = "NUMERICAL_UPLIFT" + ANOMALY_DETECTION = "ANOMALY_DETECTION" def _to_proto_type(self) -> abstract_model_pb2.Task: if self in TASK_TO_PROTO: @@ -93,6 +98,7 @@ def _from_proto_type(cls, task: abstract_model_pb2.Task): Task.RANKING: abstract_model_pb2.RANKING, Task.CATEGORICAL_UPLIFT: abstract_model_pb2.CATEGORICAL_UPLIFT, Task.NUMERICAL_UPLIFT: abstract_model_pb2.NUMERICAL_UPLIFT, + Task.ANOMALY_DETECTION: abstract_model_pb2.ANOMALY_DETECTION, } PROTO_TO_TASK = {v: k for k, v in TASK_TO_PROTO.items()} diff --git a/yggdrasil_decision_forests/utils/model_analysis.cc b/yggdrasil_decision_forests/utils/model_analysis.cc index 92928dfa..b111e3f1 100644 --- a/yggdrasil_decision_forests/utils/model_analysis.cc +++ b/yggdrasil_decision_forests/utils/model_analysis.cc @@ -953,7 +953,6 @@ absl::StatusOr FeatureVariationNumerical( return item; } - absl::StatusOr FeatureVariationBoolean( const model::AbstractModel& model, const int column_idx, const dataset::proto::Example& example, @@ -1085,6 +1084,14 @@ absl::StatusOr> ListOutputs( return prediction.uplift().treatment_effect(0); }}); break; + case model::proto::Task::ANOMALY_DETECTION: + outputs.push_back( + {.label = "output", + .compute = [](const model::proto::Prediction& prediction) -> float { + return prediction.anomaly_detection().value(); + }}); + break; + default: return absl::InvalidArgumentError( "Non supported model task for feature variation"); diff --git a/yggdrasil_decision_forests/utils/test_utils.cc b/yggdrasil_decision_forests/utils/test_utils.cc index 2d20399a..241ba7cb 100644 --- a/yggdrasil_decision_forests/utils/test_utils.cc +++ b/yggdrasil_decision_forests/utils/test_utils.cc @@ -308,6 +308,9 @@ void TrainAndTestTester::TrainAndEvaluateModel( EXPECT_NEAR(metric::AUUC(e1), metric::AUUC(e2), 0.001); EXPECT_NEAR(metric::Qini(e1), metric::Qini(e2), 0.001); break; + case model::proto::Task::ANOMALY_DETECTION: + // No metrics + break; default: YDF_LOG(FATAL) << "Not implemented"; } @@ -644,6 +647,11 @@ void ExpectEqualPredictions(const model::proto::Task task, } } break; + case model::proto::Task::ANOMALY_DETECTION: + EXPECT_NEAR(a.anomaly_detection().value(), b.anomaly_detection().value(), + epsilon); + break; + default: YDF_LOG(FATAL) << "Not supported task"; } @@ -743,6 +751,12 @@ void ExpectEqualPredictions( } } break; + case model::proto::Task::ANOMALY_DETECTION: + EXPECT_NEAR(generic_prediction.anomaly_detection().value(), + predictions[prediction_idx], epsilon) + << "Predictions don't match."; + break; + default: YDF_LOG(FATAL) << "Not supported task"; } From f3195892994f22d495fe52a968fe51ecf7c6af3b Mon Sep 17 00:00:00 2001 From: Mathieu Guillame-Bert Date: Thu, 13 Jun 2024 03:22:53 -0700 Subject: [PATCH 18/30] Anomaly detection; ccleaner + decision tree utilities (part 2) PiperOrigin-RevId: 642919627 --- .../distributed_gradient_boosted_trees/BUILD | 1 + .../distributed_gradient_boosted_trees.cc | 1 + .../learner/gradient_boosted_trees/BUILD | 6 ++++ .../gradient_boosted_trees.cc | 11 ++++++ .../gradient_boosted_trees_test.cc | 1 + yggdrasil_decision_forests/model/BUILD | 3 +- .../model/abstract_model.cc | 14 ++++++++ .../model/abstract_model_test.cc | 36 ++++++++++++++----- .../model/decision_tree/BUILD | 3 ++ .../model/decision_tree/builder.cc | 9 +++++ .../model/decision_tree/builder.h | 3 ++ .../model/decision_tree/builder_test.cc | 18 ++++++++++ .../model/decision_tree/decision_tree.cc | 20 ++++++++--- .../model/decision_tree/decision_tree.h | 14 +++++++- .../model/decision_tree/decision_tree.proto | 16 +++++++-- .../model/decision_tree/decision_tree_test.cc | 21 +++++++++++ .../build_decision_tree_anomaly.txt.expected | 3 ++ yggdrasil_decision_forests/utils/BUILD | 11 +++++- .../utils/feature_importance.cc | 3 +- .../utils/test_utils.cc | 14 ++++++-- 20 files changed, 185 insertions(+), 23 deletions(-) create mode 100644 yggdrasil_decision_forests/test_data/golden/build_decision_tree_anomaly.txt.expected diff --git a/yggdrasil_decision_forests/learner/distributed_gradient_boosted_trees/BUILD b/yggdrasil_decision_forests/learner/distributed_gradient_boosted_trees/BUILD index 86812a46..a173a6b0 100644 --- a/yggdrasil_decision_forests/learner/distributed_gradient_boosted_trees/BUILD +++ b/yggdrasil_decision_forests/learner/distributed_gradient_boosted_trees/BUILD @@ -34,6 +34,7 @@ cc_library_ydf( "//yggdrasil_decision_forests/model/gradient_boosted_trees", "//yggdrasil_decision_forests/utils:filesystem", "//yggdrasil_decision_forests/utils:logging", + "//yggdrasil_decision_forests/utils:sharded_io", "//yggdrasil_decision_forests/utils:snapshot", "//yggdrasil_decision_forests/utils:status_macros", "//yggdrasil_decision_forests/utils:uid", diff --git a/yggdrasil_decision_forests/learner/distributed_gradient_boosted_trees/distributed_gradient_boosted_trees.cc b/yggdrasil_decision_forests/learner/distributed_gradient_boosted_trees/distributed_gradient_boosted_trees.cc index 2794aa4f..2bfb37e4 100644 --- a/yggdrasil_decision_forests/learner/distributed_gradient_boosted_trees/distributed_gradient_boosted_trees.cc +++ b/yggdrasil_decision_forests/learner/distributed_gradient_boosted_trees/distributed_gradient_boosted_trees.cc @@ -29,6 +29,7 @@ #include "yggdrasil_decision_forests/model/gradient_boosted_trees/gradient_boosted_trees.h" #include "yggdrasil_decision_forests/utils/filesystem.h" #include "yggdrasil_decision_forests/utils/logging.h" +#include "yggdrasil_decision_forests/utils/sharded_io.h" #include "yggdrasil_decision_forests/utils/snapshot.h" #include "yggdrasil_decision_forests/utils/status_macros.h" #include "yggdrasil_decision_forests/utils/uid.h" diff --git a/yggdrasil_decision_forests/learner/gradient_boosted_trees/BUILD b/yggdrasil_decision_forests/learner/gradient_boosted_trees/BUILD index 180b4fad..3b2d8b94 100644 --- a/yggdrasil_decision_forests/learner/gradient_boosted_trees/BUILD +++ b/yggdrasil_decision_forests/learner/gradient_boosted_trees/BUILD @@ -45,17 +45,22 @@ cc_library_ydf( "//yggdrasil_decision_forests/model/decision_tree:decision_tree_cc_proto", "//yggdrasil_decision_forests/model/gradient_boosted_trees", "//yggdrasil_decision_forests/model/gradient_boosted_trees:gradient_boosted_trees_cc_proto", + "//yggdrasil_decision_forests/serving:example_set", + "//yggdrasil_decision_forests/serving:fast_engine", "//yggdrasil_decision_forests/serving/decision_forest:register_engines", "//yggdrasil_decision_forests/utils:adaptive_work", "//yggdrasil_decision_forests/utils:compatibility", + "//yggdrasil_decision_forests/utils:concurrency", "//yggdrasil_decision_forests/utils:csv", "//yggdrasil_decision_forests/utils:feature_importance", "//yggdrasil_decision_forests/utils:filesystem", "//yggdrasil_decision_forests/utils:hyper_parameters", "//yggdrasil_decision_forests/utils:logging", "//yggdrasil_decision_forests/utils:random", + "//yggdrasil_decision_forests/utils:sharded_io", "//yggdrasil_decision_forests/utils:snapshot", "//yggdrasil_decision_forests/utils:status_macros", + "//yggdrasil_decision_forests/utils:synchronization_primitives", "@com_google_absl//absl/container:fixed_array", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", @@ -134,6 +139,7 @@ cc_test( "//yggdrasil_decision_forests/utils:logging", "//yggdrasil_decision_forests/utils:model_analysis", "//yggdrasil_decision_forests/utils:random", + "//yggdrasil_decision_forests/utils:sharded_io", "//yggdrasil_decision_forests/utils:snapshot", "//yggdrasil_decision_forests/utils:status_macros", "//yggdrasil_decision_forests/utils:test", diff --git a/yggdrasil_decision_forests/learner/gradient_boosted_trees/gradient_boosted_trees.cc b/yggdrasil_decision_forests/learner/gradient_boosted_trees/gradient_boosted_trees.cc index f4533521..43212498 100644 --- a/yggdrasil_decision_forests/learner/gradient_boosted_trees/gradient_boosted_trees.cc +++ b/yggdrasil_decision_forests/learner/gradient_boosted_trees/gradient_boosted_trees.cc @@ -18,12 +18,15 @@ #include #include #include +#include #include +#include #include #include #include #include #include +#include #include #include @@ -34,9 +37,12 @@ #include "absl/status/statusor.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" +#include "absl/strings/str_join.h" #include "absl/strings/string_view.h" +#include "absl/strings/substitute.h" #include "absl/time/clock.h" #include "absl/time/time.h" +#include "absl/types/optional.h" #include "absl/types/span.h" #include "yggdrasil_decision_forests/dataset/data_spec.h" #include "yggdrasil_decision_forests/dataset/data_spec.pb.h" @@ -60,15 +66,20 @@ #include "yggdrasil_decision_forests/model/decision_tree/decision_tree.h" #include "yggdrasil_decision_forests/model/gradient_boosted_trees/gradient_boosted_trees.h" #include "yggdrasil_decision_forests/model/gradient_boosted_trees/gradient_boosted_trees.pb.h" +#include "yggdrasil_decision_forests/serving/example_set.h" +#include "yggdrasil_decision_forests/serving/fast_engine.h" #include "yggdrasil_decision_forests/utils/adaptive_work.h" +#include "yggdrasil_decision_forests/utils/concurrency.h" #include "yggdrasil_decision_forests/utils/csv.h" #include "yggdrasil_decision_forests/utils/feature_importance.h" #include "yggdrasil_decision_forests/utils/filesystem.h" #include "yggdrasil_decision_forests/utils/hyper_parameters.h" #include "yggdrasil_decision_forests/utils/logging.h" #include "yggdrasil_decision_forests/utils/random.h" +#include "yggdrasil_decision_forests/utils/sharded_io.h" #include "yggdrasil_decision_forests/utils/snapshot.h" #include "yggdrasil_decision_forests/utils/status_macros.h" +#include "yggdrasil_decision_forests/utils/synchronization_primitives.h" namespace yggdrasil_decision_forests { namespace model { diff --git a/yggdrasil_decision_forests/learner/gradient_boosted_trees/gradient_boosted_trees_test.cc b/yggdrasil_decision_forests/learner/gradient_boosted_trees/gradient_boosted_trees_test.cc index f0a8d548..206364b7 100644 --- a/yggdrasil_decision_forests/learner/gradient_boosted_trees/gradient_boosted_trees_test.cc +++ b/yggdrasil_decision_forests/learner/gradient_boosted_trees/gradient_boosted_trees_test.cc @@ -69,6 +69,7 @@ #include "yggdrasil_decision_forests/utils/logging.h" #include "yggdrasil_decision_forests/utils/model_analysis.h" #include "yggdrasil_decision_forests/utils/random.h" +#include "yggdrasil_decision_forests/utils/sharded_io.h" #include "yggdrasil_decision_forests/utils/snapshot.h" #include "yggdrasil_decision_forests/utils/status_macros.h" #include "yggdrasil_decision_forests/utils/test.h" diff --git a/yggdrasil_decision_forests/model/BUILD b/yggdrasil_decision_forests/model/BUILD index 97b10344..a0e70fe9 100644 --- a/yggdrasil_decision_forests/model/BUILD +++ b/yggdrasil_decision_forests/model/BUILD @@ -151,7 +151,6 @@ cc_library_ydf( "//yggdrasil_decision_forests/utils:html_content", "//yggdrasil_decision_forests/utils:plot", "//yggdrasil_decision_forests/utils:protobuf", - "//yggdrasil_decision_forests/utils:uid", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", @@ -208,6 +207,7 @@ cc_test( deps = [ ":abstract_model", ":abstract_model_cc_proto", + ":evaluate_on_disk", ":model_library", ":model_testing", ":prediction_cc_proto", @@ -216,7 +216,6 @@ cc_test( "//yggdrasil_decision_forests/dataset:vertical_dataset", "//yggdrasil_decision_forests/dataset:vertical_dataset_io", "//yggdrasil_decision_forests/metric", - "//yggdrasil_decision_forests/model:evaluate_on_disk", "//yggdrasil_decision_forests/model/gradient_boosted_trees", "//yggdrasil_decision_forests/model/random_forest", "//yggdrasil_decision_forests/serving:example_set", diff --git a/yggdrasil_decision_forests/model/abstract_model.cc b/yggdrasil_decision_forests/model/abstract_model.cc index 919c5bb6..c21d99ab 100644 --- a/yggdrasil_decision_forests/model/abstract_model.cc +++ b/yggdrasil_decision_forests/model/abstract_model.cc @@ -484,15 +484,18 @@ absl::Status AbstractModel::AppendEvaluationOverrideType( sub_example_idx++) { FloatToProtoPrediction(batch_of_predictions, sub_example_idx, task(), num_prediction_dimensions, &original_prediction); + RETURN_IF_ERROR(ChangePredictionType(task(), override_task, original_prediction, &overridden_prediction)); + RETURN_IF_ERROR(model::SetGroundTruth( dataset, begin_example_idx + sub_example_idx, model::GroundTruthColumnIndices(override_label_col_idx, override_group_col_idx, uplift_treatment_col_idx_), override_task, &overridden_prediction)); + if (option.has_weights()) { ASSIGN_OR_RETURN( const float weight, @@ -558,6 +561,17 @@ absl::Status ChangePredictionType(proto::Task src_task, proto::Task dst_task, } else if (src_task == proto::Task::RANKING && dst_task == proto::Task::REGRESSION) { dst_pred->mutable_regression()->set_value(src_pred.ranking().relevance()); + } else if (src_task == proto::Task::ANOMALY_DETECTION && + dst_task == proto::Task::CLASSIFICATION) { + const float value = src_pred.anomaly_detection().value(); + auto* dst_clas = dst_pred->mutable_classification(); + // Assume the positive class is the abnormal one. + dst_clas->set_value(value >= 0.5f ? 2 : 1); + dst_clas->mutable_distribution()->clear_counts(); + dst_clas->mutable_distribution()->set_sum(1.f); + dst_clas->mutable_distribution()->add_counts(0.f); + dst_clas->mutable_distribution()->add_counts(1.f - value); + dst_clas->mutable_distribution()->add_counts(value); } else { STATUS_FATALS("Non supported override of task from ", proto::Task_Name(src_task), " to ", diff --git a/yggdrasil_decision_forests/model/abstract_model_test.cc b/yggdrasil_decision_forests/model/abstract_model_test.cc index 8b534145..230c9049 100644 --- a/yggdrasil_decision_forests/model/abstract_model_test.cc +++ b/yggdrasil_decision_forests/model/abstract_model_test.cc @@ -45,6 +45,7 @@ namespace yggdrasil_decision_forests { namespace model { namespace { +using test::ApproximatelyEqualsProto; using test::EqualsProto; using test::StatusIs; @@ -336,20 +337,39 @@ TEST(ChangePredictionType, ClassificationToRanking) { } } +TEST(ChangePredictionType, AnomalyDetectionToClassification) { + const proto::Prediction src_pred = + PARSE_TEST_PROTO(R"pb(anomaly_detection { value: 0.8 })pb"); + proto::Prediction dst_pred; + ASSERT_OK(ChangePredictionType(proto::Task::ANOMALY_DETECTION, + proto::Task::CLASSIFICATION, src_pred, + &dst_pred)); + EXPECT_THAT(dst_pred, + ApproximatelyEqualsProto(PARSE_TEST_PROTO_WITH_TYPE( + proto::Prediction, + R"pb( + classification { + value: 2 + distribution { counts: 0 counts: 0.2 counts: 0.8 sum: 1 } + } + )pb"))); +} + TEST(FloatToProtoPrediction, Base) { proto::Prediction prediction; FloatToProtoPrediction({0, 0.5, 1}, /*example_idx=*/0, proto::Task::CLASSIFICATION, /*num_prediction_dimensions=*/1, &prediction); - EXPECT_THAT(prediction, - EqualsProto(utils::ParseTextProto(R"( - classification { - value: 1 - distribution { counts: 0 counts: 1 counts: 0 sum: 1 } - } - )") - .value())); + EXPECT_THAT( + prediction, + EqualsProto(utils::ParseTextProto(R"pb( + classification { + value: 1 + distribution { counts: 0 counts: 1 counts: 0 sum: 1 } + } + )pb") + .value())); FloatToProtoPrediction({0, 0.5, 1}, /*example_idx=*/1, proto::Task::CLASSIFICATION, diff --git a/yggdrasil_decision_forests/model/decision_tree/BUILD b/yggdrasil_decision_forests/model/decision_tree/BUILD index f602e057..febad499 100644 --- a/yggdrasil_decision_forests/model/decision_tree/BUILD +++ b/yggdrasil_decision_forests/model/decision_tree/BUILD @@ -40,8 +40,10 @@ cc_library_ydf( "//yggdrasil_decision_forests/utils:protobuf", "//yggdrasil_decision_forests/utils:sharded_io", "//yggdrasil_decision_forests/utils:status_macros", + "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/log:check", "@com_google_absl//absl/memory", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", @@ -116,6 +118,7 @@ cc_library_ydf( hdrs = ["builder.h"], deps = [ ":decision_tree", + "@com_google_absl//absl/log:check", "@com_google_absl//absl/types:span", ], ) diff --git a/yggdrasil_decision_forests/model/decision_tree/builder.cc b/yggdrasil_decision_forests/model/decision_tree/builder.cc index 1cb89106..b8f69b68 100644 --- a/yggdrasil_decision_forests/model/decision_tree/builder.cc +++ b/yggdrasil_decision_forests/model/decision_tree/builder.cc @@ -18,6 +18,7 @@ #include #include +#include "absl/log/check.h" #include "absl/types/span.h" namespace yggdrasil_decision_forests::model::decision_tree { @@ -72,4 +73,12 @@ void TreeBuilder::LeafRegression(const float value) { node_->mutable_node()->mutable_regressor()->set_top_value(value); } +void TreeBuilder::LeafAnomalyDetection(const int num_examples_without_weight) { + node_->mutable_node() + ->mutable_anomaly_detection() + ->set_num_examples_without_weight(num_examples_without_weight); + node_->mutable_node()->set_num_pos_training_examples_without_weight( + num_examples_without_weight); +} + } // namespace yggdrasil_decision_forests::model::decision_tree diff --git a/yggdrasil_decision_forests/model/decision_tree/builder.h b/yggdrasil_decision_forests/model/decision_tree/builder.h index fa1f001d..550b9880 100644 --- a/yggdrasil_decision_forests/model/decision_tree/builder.h +++ b/yggdrasil_decision_forests/model/decision_tree/builder.h @@ -53,6 +53,9 @@ class TreeBuilder { // Creates a regression leaf. void LeafRegression(float value); + // Creates an anomaly detection leaf. + void LeafAnomalyDetection(int num_examples_without_weight); + private: NodeWithChildren* node_; }; diff --git a/yggdrasil_decision_forests/model/decision_tree/builder_test.cc b/yggdrasil_decision_forests/model/decision_tree/builder_test.cc index fe03ee07..078a4c0b 100644 --- a/yggdrasil_decision_forests/model/decision_tree/builder_test.cc +++ b/yggdrasil_decision_forests/model/decision_tree/builder_test.cc @@ -53,6 +53,24 @@ TEST(TreeBuilder, Base) { "golden/build_decision_tree.txt.expected"); } +TEST(TreeBuilder, AnomalyDetection) { + DecisionTree tree; + TreeBuilder builder(&tree); + + dataset::proto::DataSpecification dataspec; + dataset::AddColumn("f1", dataset::proto::ColumnType::NUMERICAL, &dataspec); + + auto [l1, l2] = builder.ConditionIsGreater(/*attribute=*/0, /*threshold=*/1); + l1.LeafAnomalyDetection(2); + l2.LeafAnomalyDetection(3); + + std::string description; + tree.AppendModelStructure(dataspec, 0, &description); + test::ExpectEqualGolden(description, + "yggdrasil_decision_forests/test_data/" + "golden/build_decision_tree_anomaly.txt.expected"); +} + } // namespace } // namespace decision_tree } // namespace model diff --git a/yggdrasil_decision_forests/model/decision_tree/decision_tree.cc b/yggdrasil_decision_forests/model/decision_tree/decision_tree.cc index ec7d87dc..66cdd8aa 100644 --- a/yggdrasil_decision_forests/model/decision_tree/decision_tree.cc +++ b/yggdrasil_decision_forests/model/decision_tree/decision_tree.cc @@ -21,13 +21,13 @@ #include #include #include -#include #include #include #include #include #include +#include "absl/base/optimization.h" #include "absl/container/flat_hash_map.h" #include "absl/container/flat_hash_set.h" #include "absl/memory/memory.h" @@ -48,7 +48,6 @@ #include "yggdrasil_decision_forests/utils/distribution.pb.h" #include "yggdrasil_decision_forests/utils/logging.h" #include "yggdrasil_decision_forests/utils/protobuf.h" -#include "yggdrasil_decision_forests/utils/sharded_io.h" #include "yggdrasil_decision_forests/utils/status_macros.h" namespace yggdrasil_decision_forests { @@ -150,6 +149,11 @@ void AppendValueDescription(const dataset::proto::DataSpecification& data_spec, "] examples_per_treatment_and_outcome:[", sum_weights_per_treatment_and_outcome_str, "]"); } break; + + case proto::Node::OutputCase::kAnomalyDetection: + absl::StrAppend(description, "count:", + node.anomaly_detection().num_examples_without_weight()); + break; } } @@ -543,6 +547,8 @@ void NodeWithChildren::ClearLabelDistributionDetails() { break; case proto::Node::OutputCase::kUplift: break; + case proto::Node::OutputCase::kAnomalyDetection: + break; } } @@ -895,7 +901,7 @@ const proto::Node& DecisionTree::GetLeafWithSwappedAttribute( return current_node->node(); } -const proto::Node& DecisionTree::GetLeaf( +const NodeWithChildren& DecisionTree::GetLeafAlt( const dataset::proto::Example& example) const { // Go down the tree according to an observation attribute values. CHECK(root_ != nullptr); @@ -906,7 +912,12 @@ const proto::Node& DecisionTree::GetLeaf( current_node = condition_result ? current_node->pos_child() : current_node->neg_child(); } - return current_node->node(); + return *current_node; +} + +const proto::Node& DecisionTree::GetLeaf( + const dataset::proto::Example& example) const { + return GetLeafAlt(example).node(); } void DecisionTree::GetPath(const dataset::VerticalDataset& dataset, @@ -1247,6 +1258,7 @@ void DecisionTree::SetLeafIndices() { if (node->IsLeaf()) { node->set_leaf_idx(next_leaf_idx++); } + node->set_depth(depth); }, /*neg_before_pos_child=*/true); } diff --git a/yggdrasil_decision_forests/model/decision_tree/decision_tree.h b/yggdrasil_decision_forests/model/decision_tree/decision_tree.h index 6d1ec72e..432cb5ef 100644 --- a/yggdrasil_decision_forests/model/decision_tree/decision_tree.h +++ b/yggdrasil_decision_forests/model/decision_tree/decision_tree.h @@ -23,12 +23,14 @@ #include +#include #include #include #include #include #include +#include "absl/log/check.h" #include "absl/status/status.h" #include "absl/types/optional.h" #include "absl/types/span.h" @@ -37,7 +39,7 @@ #include "yggdrasil_decision_forests/dataset/vertical_dataset.h" #include "yggdrasil_decision_forests/model/abstract_model.pb.h" #include "yggdrasil_decision_forests/model/decision_tree/decision_tree.pb.h" -#include "yggdrasil_decision_forests/utils/sharded_io.h" +#include "yggdrasil_decision_forests/utils/protobuf.h" namespace yggdrasil_decision_forests { namespace model { @@ -203,6 +205,9 @@ class NodeWithChildren { int32_t leaf_idx() const { return leaf_idx_; } void set_leaf_idx(const int32_t v) { leaf_idx_ = v; } + int32_t depth() const { return depth_; } + void set_depth(const int32_t v) { depth_ = v; } + // Compare a tree to another tree. If they are equal, return an empty string. // If they are different, returns an explanation of the differences. std::string DebugCompare(const dataset::proto::DataSpecification& dataspec, @@ -219,6 +224,10 @@ class NodeWithChildren { // Index of the leaf (if the node is a leaf) in the tree in a depth first // exploration. It is set by calling "SetLeafIndices()". int32_t leaf_idx_ = -1; + + // Depth of the node. Assuming that the root node has depth 0. It is set by + // calling "SetLeafIndices()". + int32_t depth_ = -1; }; // A generic decision tree. This class is designed for cheap modification (by @@ -270,6 +279,9 @@ class DecisionTree { const dataset::VerticalDataset& dataset, dataset::VerticalDataset::row_t row_idx) const; + const NodeWithChildren& GetLeafAlt( + const dataset::proto::Example& example) const; + // Apply the decision tree on an example and returns the path. void GetPath(const dataset::VerticalDataset& dataset, dataset::VerticalDataset::row_t row_idx, diff --git a/yggdrasil_decision_forests/model/decision_tree/decision_tree.proto b/yggdrasil_decision_forests/model/decision_tree/decision_tree.proto index 86765cf1..03065494 100644 --- a/yggdrasil_decision_forests/model/decision_tree/decision_tree.proto +++ b/yggdrasil_decision_forests/model/decision_tree/decision_tree.proto @@ -73,6 +73,14 @@ message NodeUpliftOutput { repeated int64 num_examples_per_treatment = 5 [packed = true]; } +// Output of a node in an anomaliy detection tree. +message NodeAnomalyDetectionOutput { + // Next ID: 2 + + // Number of examples that reached this node. + optional int64 num_examples_without_weight = 1; +} + // The sub-messages of "ConditionParams" are the different types of condition // that can be attached to a node. message Condition { @@ -161,17 +169,19 @@ message NodeCondition { // Node in a decision tree (without the information about the children). message Node { - // Next ID: 6 + // Next ID: 7 // Label value. Might be unspecified for non-leaf nodes. oneof output { NodeClassifierOutput classifier = 1; NodeRegressorOutput regressor = 2; NodeUpliftOutput uplift = 5; + NodeAnomalyDetectionOutput anomaly_detection = 6; } // Branching condition to the children. If not specified, this node is a leaf. optional NodeCondition condition = 3; - // Number of positive examples (non-weighted) that reached this node during - // training. + // Number of examples (non-weighted) that reached this node during + // training. Warning: Contrary to what the name suggest, this is not the count + // of examples branched to the positive child. optional int64 num_pos_training_examples_without_weight = 4; } diff --git a/yggdrasil_decision_forests/model/decision_tree/decision_tree_test.cc b/yggdrasil_decision_forests/model/decision_tree/decision_tree_test.cc index 47c51187..0b5c1d86 100644 --- a/yggdrasil_decision_forests/model/decision_tree/decision_tree_test.cc +++ b/yggdrasil_decision_forests/model/decision_tree/decision_tree_test.cc @@ -811,6 +811,27 @@ TEST(DecisionTree, DebugCompare) { EXPECT_TRUE(tree1.DebugCompare(dataspec, 0, tree2).empty()); } +TEST(DecisionTree, Depth) { + dataset::proto::DataSpecification dataspec; + dataset::AddColumn("f", dataset::proto::ColumnType::NUMERICAL, &dataspec); + + DecisionTree tree; + TreeBuilder builder(&tree); + + auto [pos, l1] = builder.ConditionIsGreater(1, 1); + auto [l2, l3] = pos.ConditionIsGreater(1, 2); + l1.LeafRegression(1); + l2.LeafRegression(2); + l3.LeafRegression(3); + tree.SetLeafIndices(); + + EXPECT_EQ(tree.root().depth(), 0); + EXPECT_EQ(tree.root().pos_child()->depth(), 1); + EXPECT_EQ(tree.root().neg_child()->depth(), 1); + EXPECT_EQ(tree.root().pos_child()->pos_child()->depth(), 2); + EXPECT_EQ(tree.root().pos_child()->neg_child()->depth(), 2); +} + } // namespace } // namespace decision_tree } // namespace model diff --git a/yggdrasil_decision_forests/test_data/golden/build_decision_tree_anomaly.txt.expected b/yggdrasil_decision_forests/test_data/golden/build_decision_tree_anomaly.txt.expected new file mode 100644 index 00000000..d16dbb03 --- /dev/null +++ b/yggdrasil_decision_forests/test_data/golden/build_decision_tree_anomaly.txt.expected @@ -0,0 +1,3 @@ + "f1">=1 [s:0 n:0 np:0 miss:0] + ├─(pos)─ count:2 + └─(neg)─ count:3 diff --git a/yggdrasil_decision_forests/utils/BUILD b/yggdrasil_decision_forests/utils/BUILD index da5dd41b..5f42c7b5 100644 --- a/yggdrasil_decision_forests/utils/BUILD +++ b/yggdrasil_decision_forests/utils/BUILD @@ -513,6 +513,13 @@ cc_library_ydf( ], ) +cc_library( + name = "documentation_cc", + srcs = ["documentation.cc"], + hdrs = ["documentation.h"], + deps = ["@com_google_absl//absl/strings"], +) + cc_library_ydf( name = "feature_importance", srcs = ["feature_importance.cc"], @@ -550,6 +557,7 @@ cc_library_ydf( ":filesystem", ":logging", ":random", + ":sharded_io", ":test", ":testing_macros", ":uid", @@ -579,12 +587,13 @@ cc_library_ydf( "//yggdrasil_decision_forests/serving:example_set", "//yggdrasil_decision_forests/serving:fast_engine", "//yggdrasil_decision_forests/serving/decision_forest", + "@com_google_absl//absl/memory", "@com_google_absl//absl/random", - "@com_google_absl//absl/random:distributions", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", "@com_google_absl//absl/time", + "@com_google_absl//absl/types:optional", "@com_google_googletest//:gtest", ], ) diff --git a/yggdrasil_decision_forests/utils/feature_importance.cc b/yggdrasil_decision_forests/utils/feature_importance.cc index 408fbed7..b8a2e8de 100644 --- a/yggdrasil_decision_forests/utils/feature_importance.cc +++ b/yggdrasil_decision_forests/utils/feature_importance.cc @@ -235,7 +235,8 @@ absl::Status ComputePermutationFeatureImportance( utils::RandomEngine rng; utils::concurrency::Mutex rng_mutex; - const auto base_evaluation = model->Evaluate(dataset, eval_options, &rng); + ASSIGN_OR_RETURN(const auto base_evaluation, + model->EvaluateWithStatus(dataset, eval_options, &rng)); const auto permutation_evaluation = [&dataset, &eval_options, &rng, &rng_mutex, model](const int feature_idx) diff --git a/yggdrasil_decision_forests/utils/test_utils.cc b/yggdrasil_decision_forests/utils/test_utils.cc index 241ba7cb..d05a0052 100644 --- a/yggdrasil_decision_forests/utils/test_utils.cc +++ b/yggdrasil_decision_forests/utils/test_utils.cc @@ -15,19 +15,25 @@ #include "yggdrasil_decision_forests/utils/test_utils.h" -#include - #include +#include +#include +#include +#include #include +#include +#include #include +#include #include #include +#include #include #include #include "gmock/gmock.h" #include "gtest/gtest.h" -#include "absl/random/distributions.h" +#include "absl/memory/memory.h" #include "absl/random/random.h" #include "absl/status/status.h" #include "absl/status/statusor.h" @@ -35,6 +41,7 @@ #include "absl/strings/string_view.h" #include "absl/time/clock.h" #include "absl/time/time.h" +#include "absl/types/optional.h" #include "yggdrasil_decision_forests/dataset/data_spec.h" #include "yggdrasil_decision_forests/dataset/data_spec.pb.h" #include "yggdrasil_decision_forests/dataset/data_spec_inference.h" @@ -63,6 +70,7 @@ #include "yggdrasil_decision_forests/utils/filesystem.h" #include "yggdrasil_decision_forests/utils/logging.h" #include "yggdrasil_decision_forests/utils/random.h" +#include "yggdrasil_decision_forests/utils/sharded_io.h" #include "yggdrasil_decision_forests/utils/test.h" #include "yggdrasil_decision_forests/utils/testing_macros.h" #include "yggdrasil_decision_forests/utils/uid.h" From f63eecc9bdf480afe94a5ff13366d4b579746a4c Mon Sep 17 00:00:00 2001 From: TensorFlow Decision Forests Team Date: Thu, 13 Jun 2024 05:13:17 -0700 Subject: [PATCH 19/30] Automated Code Change PiperOrigin-RevId: 642948182 --- yggdrasil_decision_forests/learner/BUILD | 1 + yggdrasil_decision_forests/learner/export_doc_main.cc | 1 + 2 files changed, 2 insertions(+) diff --git a/yggdrasil_decision_forests/learner/BUILD b/yggdrasil_decision_forests/learner/BUILD index 15d94263..40907240 100644 --- a/yggdrasil_decision_forests/learner/BUILD +++ b/yggdrasil_decision_forests/learner/BUILD @@ -16,6 +16,7 @@ cc_binary_ydf( ":export_doc", ":learner_library", "//yggdrasil_decision_forests/utils:logging", + "@com_google_absl//absl/flags:flag", "@com_google_absl//absl/strings", ], ) diff --git a/yggdrasil_decision_forests/learner/export_doc_main.cc b/yggdrasil_decision_forests/learner/export_doc_main.cc index 51d36a2c..8a474dd6 100644 --- a/yggdrasil_decision_forests/learner/export_doc_main.cc +++ b/yggdrasil_decision_forests/learner/export_doc_main.cc @@ -18,6 +18,7 @@ // #include +#include "absl/flags/flag.h" #include "absl/strings/substitute.h" #include "yggdrasil_decision_forests/learner/export_doc.h" #include "yggdrasil_decision_forests/learner/learner_library.h" From 225fd58505e92665eeedf3e7581d4f0faa75398f Mon Sep 17 00:00:00 2001 From: Mathieu Guillame-Bert Date: Fri, 14 Jun 2024 05:57:12 -0700 Subject: [PATCH 20/30] Add YDF + JAX tutorial PiperOrigin-RevId: 643324787 --- .../docs/tutorial/compose_with_jax.ipynb | 4658 +++++++++++++++++ documentation/public/mkdocs.yml | 1 + 2 files changed, 4659 insertions(+) create mode 100644 documentation/public/docs/tutorial/compose_with_jax.ipynb diff --git a/documentation/public/docs/tutorial/compose_with_jax.ipynb b/documentation/public/docs/tutorial/compose_with_jax.ipynb new file mode 100644 index 00000000..83037021 --- /dev/null +++ b/documentation/public/docs/tutorial/compose_with_jax.ipynb @@ -0,0 +1,4658 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "OM2KIwYEd0pZ" + }, + "source": [ + "# With JAX\n", + "\n", + "[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/google/yggdrasil-decision-forests/blob/main/documentation/public/docs/tutorial/compose_with_jax.ipynb)\n", + "\n", + "## About this tutorial\n", + "\n", + "JAX is a machine learning library to train neural network models. While decision forests trained by YDF are different from neural networks, YDF and JAX can be combined to create powerful hybrid models.\n", + "\n", + "This tutorial is divided into two parts. First, we show how to convert a YDF model into a JAX model, and how to save the resulting model as a SavedModel using `jax2tf`.\n", + "\n", + "Second, we show how YDF and JAX can be combined to solve the distribution shift problem: We train a YDF model, convert it to a JAX model, finetune it using JAX, and convert it back to a YDF model." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ySnymYVDiS7d" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qcIsaJiTiSk2" + }, + "outputs": [], + "source": [ + "# Install dependencies\n", + "!pip install ydf -U -q\n", + "!pip install tensorflow -U -q\n", + "!pip install optax pandas numpy -U -q\n", + "\n", + "!pip install jax[cpu] -U\n", + "# OR\n", + "# !pip install jax[cuda12] -U -q\n", + "# See https://jax.readthedocs.io/en/latest/installation.html for JAX variations." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "id": "8Pm9OxVztyCU" + }, + "outputs": [], + "source": [ + "import tempfile\n", + "\n", + "import jax\n", + "from jax.experimental import jax2tf # To export JAX model to SavedModel\n", + "import optax # To finetune YDF+JAX models\n", + "import pandas as pd # We use Pandas to load small datasets\n", + "import tensorflow as tf # To create SavedModels\n", + "import ydf # Yggdrasil Decision Forests" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OLZoiM72Ek5Q" + }, + "source": [ + "## Convert YDF model into a JAX function\n", + "\n", + "In this section, we train a YDF model on the Adult dataset, convert it into a JAX function, and demonstrate various operations.\n", + "\n", + "First let's download a binary classification dataset." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "id": "jKvcOllNx8_u" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ageworkclassfnlwgteducationeducation_nummarital_statusoccupationrelationshipracesexcapital_gaincapital_losshours_per_weeknative_countryincome
044Private2280577th-8th4Married-civ-spouseMachine-op-inspctWifeWhiteFemale0040Dominican-Republic<=50K
120Private299047Some-college10Never-marriedOther-serviceNot-in-familyWhiteFemale0020United-States<=50K
240Private342164HS-grad9SeparatedAdm-clericalUnmarriedWhiteFemale0037United-States<=50K
330Private361742Some-college10Married-civ-spouseExec-managerialHusbandWhiteMale0050United-States<=50K
467Self-emp-inc171564HS-grad9Married-civ-spouseProf-specialtyWifeWhiteFemale20051030England>50K
\n", + "
" + ], + "text/plain": [ + " age workclass fnlwgt education education_num marital_status \\\n", + "0 44 Private 228057 7th-8th 4 Married-civ-spouse \n", + "1 20 Private 299047 Some-college 10 Never-married \n", + "2 40 Private 342164 HS-grad 9 Separated \n", + "3 30 Private 361742 Some-college 10 Married-civ-spouse \n", + "4 67 Self-emp-inc 171564 HS-grad 9 Married-civ-spouse \n", + "\n", + " occupation relationship race sex capital_gain \\\n", + "0 Machine-op-inspct Wife White Female 0 \n", + "1 Other-service Not-in-family White Female 0 \n", + "2 Adm-clerical Unmarried White Female 0 \n", + "3 Exec-managerial Husband White Male 0 \n", + "4 Prof-specialty Wife White Female 20051 \n", + "\n", + " capital_loss hours_per_week native_country income \n", + "0 0 40 Dominican-Republic <=50K \n", + "1 0 20 United-States <=50K \n", + "2 0 37 United-States <=50K \n", + "3 0 50 United-States <=50K \n", + "4 0 30 England >50K " + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_path = \"https://raw.githubusercontent.com/google/yggdrasil-decision-forests/main/yggdrasil_decision_forests/test_data/dataset\"\n", + "\n", + "# Download and load the dataset as Pandas DataFrames\n", + "train_ds = pd.read_csv(f\"{ds_path}/adult_train.csv\")\n", + "test_ds = pd.read_csv(f\"{ds_path}/adult_test.csv\")\n", + "label = \"income\"\n", + "\n", + "# Print the first 5 training examples\n", + "train_ds.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BTLbq0PxjdLT" + }, + "source": [ + "First, we train a YDF model on the dataset." + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "id": "xmEVpzNpzRlF" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Train model on 22792 examples\n", + "Model trained in 0:00:02.277830\n" + ] + } + ], + "source": [ + "learner = ydf.GradientBoostedTreesLearner(label=label)\n", + "model = learner.train(train_ds)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_toMixu3jhhY" + }, + "source": [ + "We convert the YDF model into a JAX function." + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "id": "AiUkvYK7zdXs" + }, + "outputs": [], + "source": [ + "jax_model = model.to_jax_function()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "g__NNgkZjvjc" + }, + "source": [ + "The `jax_model` object contains three fields.\n", + "\n", + "- `predict`: A JAX function making predictions.\n", + "- `encoder`: An optional callable class to prepare examples for `predict`. Since JAX does not support string values, categorical string input features have to be prepared before calling `predict`.\n", + "- `params`: A optional dictionary of Jax Arrays defining the differentiable parameters of the model. By default, `params` is None and `predict` does not except any parameters. We show how to use `params` in the second section.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NWXE9CEE2Js8" + }, + "source": [ + "We generate predictions for the first 5 examples in the test set.\n", + "\n", + "First, we select some examples and encode them." + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "id": "OChSApgzA52B" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'age': Array([39, 40, 40, 35, 23], dtype=int32),\n", + " 'workclass': Array([4, 1, 1, 6, 3], dtype=int32),\n", + " 'fnlwgt': Array([ 77516, 121772, 193524, 76845, 190709], dtype=int32),\n", + " 'education': Array([ 3, 5, 13, 11, 7], dtype=int32),\n", + " 'education_num': Array([13, 11, 16, 5, 12], dtype=int32),\n", + " 'marital_status': Array([2, 1, 1, 1, 2], dtype=int32),\n", + " 'occupation': Array([ 4, 3, 1, 10, 12], dtype=int32),\n", + " 'relationship': Array([2, 1, 1, 1, 2], dtype=int32),\n", + " 'race': Array([1, 3, 1, 2, 1], dtype=int32),\n", + " 'sex': Array([1, 1, 1, 1, 1], dtype=int32),\n", + " 'capital_gain': Array([2174, 0, 0, 0, 0], dtype=int32),\n", + " 'capital_loss': Array([0, 0, 0, 0, 0], dtype=int32),\n", + " 'hours_per_week': Array([40, 40, 60, 40, 52], dtype=int32),\n", + " 'native_country': Array([1, 0, 1, 1, 1], dtype=int32)}" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Select the first 5 examples from the Pandas Dataframe and remove the labels.\n", + "selected_examples = test_ds[:5].drop(model.label(), axis=1)\n", + "\n", + "# Encode the examples into a dictionary of JAX arrays.\n", + "jax_selected_examples = jax_model.encoder(selected_examples)\n", + "jax_selected_examples" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NWmi2IRD2ml4" + }, + "source": [ + "Then, we generate the predictions." + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "id": "_B1YSaLNCQNH" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Array([0.01860434, 0.36130956, 0.83858865, 0.04385566, 0.02917648], dtype=float32)" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "jax_predictions = jax_model.predict(jax_selected_examples)\n", + "jax_predictions" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mEFdGjyO2qGF" + }, + "source": [ + "Note that the predictions of the JAX function are equal to the predictions of the YDF model (modulo float rouding errors)." + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "id": "hBHw49HYCfKR" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0.01860435, 0.36130956, 0.83858865, 0.04385567, 0.02917649],\n", + " dtype=float32)" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.predict(selected_examples)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wtJAEFtu2yHx" + }, + "source": [ + "JAX does not define a model serialization format e.g. a way to save a model on disk. Instead, to save a JAX model for serving, it is common to export it as a SavedModel." + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "id": "xt3lvsvPC0oi" + }, + "outputs": [], + "source": [ + "# Create a TF module with the model.\n", + "tf_model = tf.Module()\n", + "tf_model.predict = tf.function(\n", + " jax2tf.convert(jax_model.predict, with_gradient=False),\n", + " jit_compile=True,\n", + " autograph=False,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "id": "kwPIhMp5DgCO" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Check the predictions of the TF module.\n", + "tf_selected_examples = {\n", + " k: tf.constant(v) for k, v in jax_selected_examples.items()\n", + "}\n", + "tf_predictions = tf_model.predict(tf_selected_examples)\n", + "tf_predictions" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "id": "oOw_7ZD5EHbK" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /tmp/tmp90flesgr/assets\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /tmp/tmp90flesgr/assets\n" + ] + } + ], + "source": [ + "# Save the TF module to file.\n", + "with tempfile.TemporaryDirectory() as tempdir:\n", + " tf.saved_model.save(tf_model, tempdir)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1qZj2RfgNdfJ" + }, + "source": [ + "**INFO:** YDF's `to_tensorflow_saved_model` function allows to directly create a SavedModel model. This approach results in faster models, but it requires the installation of TensorFlow Decision Forests." + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "id": "0_P6KZnoEYHD" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[INFO 24-06-14 14:31:56.6553 CEST kernel.cc:1233] Loading model from path /tmp/tmp71lnhoy9/tmp83xu8mjt/ with prefix e57777e0_\n", + "[INFO 24-06-14 14:31:56.6795 CEST quick_scorer_extended.cc:911] The binary was compiled without AVX2 support, but your CPU supports it. Enable it for faster model inference.\n", + "[INFO 24-06-14 14:31:56.6803 CEST abstract_model.cc:1362] Engine \"GradientBoostedTreesQuickScorerExtended\" built\n", + "[INFO 24-06-14 14:31:56.6803 CEST kernel.cc:1061] Use fast generic engine\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /tmp/tmpi0fp69xz/assets\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /tmp/tmpi0fp69xz/assets\n" + ] + } + ], + "source": [ + "try:\n", + " with tempfile.TemporaryDirectory() as tempdir:\n", + " # Save the YDF model to a SavedModel directly.\n", + " model.to_tensorflow_saved_model(tempdir, mode=\"tf\")\n", + "except Exception as e:\n", + " print(\"Could not save YDF model to SavedModel with to_tensorflow_saved_model\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QU1mZh5EEjn4" + }, + "source": [ + "## Fine tune a YDF model with JAX\n", + "\n", + "A distribution shift problem occurs when the examples of interest (serving examples) follow a different distribution than the training dataset. As an example, distribution shift occurs in hospitals when training a model on data acquired by different devices. Although datasets from different devices should be compatible, subtle differences between them cause a model trained on one dataset to perform poorly on another. For instance, a machine learning model trained to detect tumors on images captured by a device might not work effectively on images captured by a device from another brand. Distribution shifts are also common in dynamic systems that change overtime (e.g., user behaviors).\n", + "\n", + "In this section, we solve a distribution shift issue using finetuning. For that, we use the Adult dataset with a twist. We assume that only people with \"relationship=Wife,\" are of interest. However, only 5% of the people are in this category so we have few training examples.\n", + "\n", + "We will first observe that training only on `relationship=Wife` examples or training on all available examples does not produce the best model. Instead, we will train a YDF model on all examples, finetuned it with JAX on the `relationship=Wife` examples, and observe that this finetune model perform better. Finally, the finetuned JAX model will be converted back into a YDF model and analyzed using YDF tools.\n", + "\n", + "**INFO:** This section assumes you are familiar with [JAX](https://jax.readthedocs.io/) and [Orbax](https://orbax.readthedocs.io/)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SfBKEcCH-6Xk" + }, + "source": [ + "First, let's print the distribution of `relationship` in the test examples. Our objective is optimize the quality of the model on the 483 `relationship == Wife` examples." + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "id": "FyNuf5G2EmWl" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "relationship\n", + "Husband 4002\n", + "Not-in-family 2505\n", + "Own-child 1521\n", + "Unmarried 948\n", + "Wife 483\n", + "Other-relative 310\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_ds[\"relationship\"].value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ba8nUY5Y_KAv" + }, + "source": [ + "We divide the dataset in two groups: Group A contains the `relationship != Wife` examples and group B contains the `relationship == Wife` examples." + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "id": "D0wQv8wOFR7f" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of examples per group\n", + "\tTrain Group A: 21707\n", + "\tTest Group A: 9286\n", + "\tTrain Group B: 1085\n", + "\tTest Group B: 483\n" + ] + } + ], + "source": [ + "def is_group_B(ds):\n", + " return ds[\"relationship\"] == \"Wife\"\n", + "\n", + "\n", + "train_ds_group_A = train_ds[~is_group_B(train_ds)]\n", + "test_ds_group_A = test_ds[~is_group_B(test_ds)]\n", + "\n", + "train_ds_group_B = train_ds[is_group_B(train_ds)]\n", + "test_ds_group_B = test_ds[is_group_B(test_ds)]\n", + "\n", + "print(\"Number of examples per group\")\n", + "print(\"\\tTrain Group A:\", len(train_ds_group_A))\n", + "print(\"\\tTest Group A:\", len(test_ds_group_A))\n", + "print(\"\\tTrain Group B:\", len(train_ds_group_B))\n", + "print(\"\\tTest Group B:\", len(test_ds_group_B))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XWDLNTKT_gkM" + }, + "source": [ + "Note that group A contains more examples than group B, but what we care are the test examples in group B.\n", + "\n", + "Let's train and evaluate three models on different combinations of group A and B. Those will be our baselines." + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "id": "ZlHNFJApGNq0" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy on B, model trained on A: 0.7204968944099379\n", + "Accuracy on B, model trained on B: 0.7329192546583851\n", + "Accuracy on B, model trained on A+B: 0.7556935817805382\n" + ] + } + ], + "source": [ + "# Train model on group A\n", + "model_group_A = ydf.GradientBoostedTreesLearner(label=label).train(\n", + " train_ds_group_A, verbose=0\n", + ")\n", + "# Train model on group B\n", + "model_group_B = ydf.GradientBoostedTreesLearner(label=label).train(\n", + " train_ds_group_B, verbose=0\n", + ")\n", + "\n", + "# Train model on group A + B\n", + "model_group_AB = ydf.GradientBoostedTreesLearner(label=label).train(\n", + " train_ds, verbose=0\n", + ")\n", + "\n", + "# Evaluate the models on group B\n", + "accuracy_test_B_model_A = model_group_A.evaluate(test_ds_group_B).accuracy\n", + "accuracy_test_B_model_B = model_group_B.evaluate(test_ds_group_B).accuracy\n", + "accuracy_test_B_model_AB = model_group_AB.evaluate(test_ds_group_B).accuracy\n", + "\n", + "print(\"Accuracy on B, model trained on A:\", accuracy_test_B_model_A)\n", + "print(\"Accuracy on B, model trained on B:\", accuracy_test_B_model_B)\n", + "print(\"Accuracy on B, model trained on A+B:\", accuracy_test_B_model_AB)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JqJGrlvl_-Rk" + }, + "source": [ + "The model trained on both group A and B is the one performing best on group B. Can we do better?\n", + "\n", + "Let's convert the model trained on A+B into a JAX function." + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "id": "oJZjdugAH3G9" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'leaf_values': Array([-0.1233467 , -0.0927111 , 0.2927755 , ..., 0.05464426,\n", + " 0.12556875, -0.11374608], dtype=float32),\n", + " 'initial_predictions': Array([-1.1630996], dtype=float32)}" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "jax_model_group_AB = model_group_AB.to_jax_function(\n", + " apply_activation=False,\n", + " leaves_as_params=True,\n", + ")\n", + "\n", + "jax_model_group_AB.params" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PCJqBODMA5rK" + }, + "source": [ + "Note that:\n", + "\n", + "- `apply_activation=True` removes the activation function from the model. This allows for the model loss to be computed on logits rather than probabilities which make finetuning more stable numerically.\n", + "- `leaves_as_params=True` specifies that the leave values are exported as model parameters in`params`. This is necessary to finetune the model." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "03Aeofv3BKrZ" + }, + "source": [ + "To finetune the model, we need to generate batches of examples. The following block generate such batches." + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "id": "5zam6V5dL0lU" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'age': Array([44, 67, 26, 30], dtype=int32), 'workclass': Array([1, 5, 0, 1], dtype=int32), 'fnlwgt': Array([228057, 171564, 167835, 118551], dtype=int32), 'education': Array([9, 1, 3, 3], dtype=int32), 'education_num': Array([ 4, 9, 13, 13], dtype=int32), 'marital_status': Array([1, 1, 1, 1], dtype=int32), 'occupation': Array([ 7, 1, 0, 11], dtype=int32), 'relationship': Array([5, 5, 5, 5], dtype=int32), 'race': Array([1, 1, 1, 1], dtype=int32), 'sex': Array([2, 2, 2, 2], dtype=int32), 'capital_gain': Array([ 0, 20051, 0, 0], dtype=int32), 'capital_loss': Array([0, 0, 0, 0], dtype=int32), 'hours_per_week': Array([40, 30, 20, 16], dtype=int32), 'native_country': Array([12, 10, 1, 1], dtype=int32), 'income': Array([False, True, False, True], dtype=bool)}\n" + ] + } + ], + "source": [ + "def get_num_examples(ds):\n", + " return len(next(iter(ds.values())))\n", + "\n", + "\n", + "def prepare_dataset(ds, jax_model, batch=100):\n", + " ds = ds.copy()\n", + "\n", + " # Make the label boolean\n", + " ds[label] = ds[label] == \">50K\"\n", + "\n", + " # Encode the input features\n", + " encoded_ds = jax_model.encoder(ds)\n", + "\n", + " # Yield batches of examples\n", + " n = get_num_examples(encoded_ds)\n", + " i = 0\n", + " while i < n:\n", + " begin_idx = i\n", + " end_idx = min(i + batch, n)\n", + " yield {k: v[begin_idx:end_idx] for k, v in encoded_ds.items()}\n", + " i += batch\n", + "\n", + "\n", + "# Example of utilisation of \"prepare_dataset\".\n", + "for examples in prepare_dataset(train_ds_group_B, jax_model_group_AB, batch=4):\n", + " print(examples)\n", + " break # We only print the first batch" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "iGEt9d5bBxAf" + }, + "source": [ + "Let's define utilities to compute and print the loss and accuracy of the model." + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "id": "Ps697bccJU2K" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "stage:initial test-accuracy:0.75569 test-loss:0.47798 train-accuracy:0.83963 train-loss:0.37099\n" + ] + } + ], + "source": [ + "@jax.jit\n", + "def compute_accuracy(params, examples, logit=True):\n", + " examples = examples.copy()\n", + " labels = examples.pop(model.label())\n", + " predictions = jax_model_group_AB.predict(examples, params)\n", + " return ((predictions >= 0.0) == labels).mean()\n", + "\n", + "\n", + "@jax.jit\n", + "def compute_loss(params, examples):\n", + " examples = examples.copy()\n", + " labels = examples.pop(model.label())\n", + " logits = jax_model_group_AB.predict(examples, params)\n", + " return optax.sigmoid_binary_cross_entropy(logits, labels).mean()\n", + "\n", + "\n", + "def compute_metric(metric_fn, ds):\n", + " sum_metrics = 0\n", + " num_examples = 0\n", + " for examples in prepare_dataset(ds, jax_model_group_AB):\n", + " n = get_num_examples(examples)\n", + " sum_metrics += n * metric_fn(jax_model_group_AB.params, examples)\n", + " num_examples += n\n", + " return float(sum_metrics / num_examples)\n", + "\n", + "\n", + "def print_logs(stage):\n", + " train_accuracy = compute_metric(compute_accuracy, train_ds_group_B)\n", + " train_loss = compute_metric(compute_loss, train_ds_group_B)\n", + " test_accuracy = compute_metric(compute_accuracy, test_ds_group_B)\n", + " test_loss = compute_metric(compute_loss, test_ds_group_B)\n", + " print(\n", + " f\"stage:{stage:10} \"\n", + " f\"test-accuracy:{test_accuracy:.5f} test-loss:{test_loss:.5f} \"\n", + " f\"train-accuracy:{train_accuracy:.5f} train-loss:{train_loss:.5f}\"\n", + " )\n", + "\n", + "\n", + "# Metrics of the model before training.\n", + "print_logs(\"initial\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tba0RsT_CC0o" + }, + "source": [ + "Following is the train training loop." + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "id": "_trp941IZQu2" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "stage:epoch_0 test-accuracy:0.75569 test-loss:0.47798 train-accuracy:0.83963 train-loss:0.37099\n", + "stage:epoch_1 test-accuracy:0.75155 test-loss:0.48035 train-accuracy:0.84424 train-loss:0.36520\n", + "stage:epoch_2 test-accuracy:0.75776 test-loss:0.47823 train-accuracy:0.84240 train-loss:0.35878\n", + "stage:epoch_3 test-accuracy:0.75983 test-loss:0.48016 train-accuracy:0.84608 train-loss:0.35352\n", + "stage:epoch_4 test-accuracy:0.75776 test-loss:0.48063 train-accuracy:0.84793 train-loss:0.34862\n", + "stage:epoch_5 test-accuracy:0.75569 test-loss:0.48173 train-accuracy:0.85069 train-loss:0.34419\n", + "stage:epoch_6 test-accuracy:0.75776 test-loss:0.48283 train-accuracy:0.85346 train-loss:0.34008\n", + "stage:epoch_7 test-accuracy:0.75776 test-loss:0.48381 train-accuracy:0.85806 train-loss:0.33622\n", + "stage:epoch_8 test-accuracy:0.75983 test-loss:0.48495 train-accuracy:0.86175 train-loss:0.33260\n", + "stage:epoch_9 test-accuracy:0.75983 test-loss:0.48595 train-accuracy:0.86267 train-loss:0.32917\n", + "stage:final test-accuracy:0.75983 test-loss:0.48703 train-accuracy:0.86359 train-loss:0.32592\n" + ] + } + ], + "source": [ + "optimizer = optax.adam(0.001)\n", + "\n", + "\n", + "@jax.jit\n", + "def train_step(opt_state, mdl_state, examples):\n", + " loss, grads = jax.value_and_grad(compute_loss)(mdl_state, examples)\n", + " updates, opt_state = optimizer.update(grads, opt_state)\n", + " mdl_state = optax.apply_updates(mdl_state, updates)\n", + " return opt_state, mdl_state, loss\n", + "\n", + "\n", + "opt_state = optimizer.init(jax_model_group_AB.params)\n", + "for epoch_idx in range(10):\n", + " print_logs(f\"epoch_{epoch_idx}\")\n", + " for examples in prepare_dataset(train_ds_group_B, jax_model_group_AB):\n", + " opt_state, jax_model_group_AB.params, _ = train_step(\n", + " opt_state, jax_model_group_AB.params, examples\n", + " )\n", + "\n", + "print_logs(\"final\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "huoeeQAzDj4n" + }, + "source": [ + "Notice both the test and training accuracy improving during training.\n", + "\n", + "We can now update the YDF model with the finetuned weights." + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "id": "G3LZFKM6Dvgl" + }, + "outputs": [], + "source": [ + "model_group_AB.update_with_jax_params(jax_model_group_AB.params)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qiZVYQDuD3q7" + }, + "source": [ + "`model_group_AB` is the finetuned model. Let's evaluate and compare it to the other models:" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": { + "id": "GTEZwCGFbfjQ" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy on B, model trained on A: 0.7204968944099379\n", + "Accuracy on B, model trained on B: 0.7329192546583851\n", + "Accuracy on B, model trained on A+B: 0.7556935817805382\n", + "==================================\n", + "Accuracy on B, model trained on A+B, finetuned on B: 0.7598343685300207\n" + ] + } + ], + "source": [ + "accuracy_test_B_model_AB_finetuned_B = model_group_AB.evaluate(\n", + " test_ds_group_B\n", + ").accuracy\n", + "\n", + "print(\"Accuracy on B, model trained on A:\", accuracy_test_B_model_A)\n", + "print(\"Accuracy on B, model trained on B:\", accuracy_test_B_model_B)\n", + "print(\"Accuracy on B, model trained on A+B:\", accuracy_test_B_model_AB)\n", + "print(\"==================================\")\n", + "print(\n", + " \"Accuracy on B, model trained on A+B, finetuned on B:\",\n", + " accuracy_test_B_model_AB_finetuned_B,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-YwFSyW7EE0N" + }, + "source": [ + "Notice that the new model \"Accuracy on B, model trained on A+B\" shows the best test accuracy.\n", + "\n", + "`model_group_AB` is a YDF model like anyother. For instance, you can save it and analyse it.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": { + "id": "-42P_PvwOjHZ" + }, + "outputs": [], + "source": [ + "# Save the model\n", + "with tempfile.TemporaryDirectory() as tempdir:\n", + " model_group_AB.save(tempdir)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": { + "id": "GSFq5OP1ELAw" + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "

Variable importances measure the importance of an input feature for a model.

    1.   "capital_gain"  0.049689 ################\n",
+       "    2.     "occupation"  0.045549 ##############\n",
+       "    3.      "education"  0.026915 ########\n",
+       "    4.  "education_num"  0.026915 ########\n",
+       "    5.            "age"  0.018634 ######\n",
+       "    6.   "capital_loss"  0.018634 ######\n",
+       "    7.      "workclass"  0.014493 #####\n",
+       "    8.         "fnlwgt"  0.002070 #\n",
+       "    9. "native_country"  0.002070 #\n",
+       "   10.   "relationship"  0.000000 \n",
+       "   11.           "race"  0.000000 \n",
+       "   12.            "sex"  0.000000 \n",
+       "   13. "hours_per_week"  0.000000 \n",
+       "   14. "marital_status" -0.002070 \n",
+       "
    1.   "capital_gain"  0.164288 ################\n",
+       "    2.   "capital_loss"  0.048263 #####\n",
+       "    3.     "occupation"  0.033196 ###\n",
+       "    4.      "education"  0.023903 ##\n",
+       "    5.  "education_num"  0.015137 ##\n",
+       "    6.            "age"  0.013872 #\n",
+       "    7.      "workclass"  0.006274 #\n",
+       "    8.           "race"  0.002477 \n",
+       "    9.            "sex"  0.001453 \n",
+       "   10.         "fnlwgt"  0.000984 \n",
+       "   11. "marital_status"  0.000722 \n",
+       "   12.   "relationship"  0.000000 \n",
+       "   13. "native_country" -0.000019 \n",
+       "   14. "hours_per_week" -0.007143 \n",
+       "
    1.   "capital_gain"  0.083385 ################\n",
+       "    2.     "occupation"  0.040765 ########\n",
+       "    3.   "capital_loss"  0.030647 ######\n",
+       "    4.      "education"  0.026051 #####\n",
+       "    5.            "age"  0.024419 #####\n",
+       "    6.  "education_num"  0.016887 ####\n",
+       "    7.      "workclass"  0.010427 ##\n",
+       "    8.           "race"  0.003161 #\n",
+       "    9. "marital_status"  0.000790 #\n",
+       "   10.            "sex"  0.000704 #\n",
+       "   11.   "relationship"  0.000000 #\n",
+       "   12. "native_country" -0.000361 #\n",
+       "   13.         "fnlwgt" -0.001022 \n",
+       "   14. "hours_per_week" -0.006107 \n",
+       "
    1.   "capital_gain"  0.162868 ################\n",
+       "    2.   "capital_loss"  0.048043 #####\n",
+       "    3.     "occupation"  0.033135 ###\n",
+       "    4.      "education"  0.023881 ##\n",
+       "    5.  "education_num"  0.015116 ##\n",
+       "    6.            "age"  0.013875 #\n",
+       "    7.      "workclass"  0.006275 #\n",
+       "    8.           "race"  0.002472 \n",
+       "    9.            "sex"  0.001448 \n",
+       "   10.         "fnlwgt"  0.000990 \n",
+       "   11. "marital_status"  0.000721 \n",
+       "   12.   "relationship"  0.000000 \n",
+       "   13. "native_country" -0.000014 \n",
+       "   14. "hours_per_week" -0.007106 \n",
+       "
    1.            "age"  0.226642 ################\n",
+       "    2.     "occupation"  0.219727 #############\n",
+       "    3.   "capital_gain"  0.214876 ############\n",
+       "    4.      "education"  0.213746 ###########\n",
+       "    5. "marital_status"  0.212739 ###########\n",
+       "    6.   "relationship"  0.206040 #########\n",
+       "    7.         "fnlwgt"  0.203843 ########\n",
+       "    8. "hours_per_week"  0.203735 ########\n",
+       "    9.   "capital_loss"  0.196549 ######\n",
+       "   10. "native_country"  0.190548 ####\n",
+       "   11.      "workclass"  0.187795 ###\n",
+       "   12.  "education_num"  0.184215 ##\n",
+       "   13.           "race"  0.180495 \n",
+       "   14.            "sex"  0.177647 \n",
+       "
    1.            "age" 26.000000 ################\n",
+       "    2.   "capital_gain" 26.000000 ################\n",
+       "    3. "marital_status" 20.000000 ############\n",
+       "    4.   "relationship" 17.000000 ##########\n",
+       "    5.   "capital_loss" 14.000000 ########\n",
+       "    6. "hours_per_week" 14.000000 ########\n",
+       "    7.      "education" 12.000000 #######\n",
+       "    8.         "fnlwgt" 10.000000 #####\n",
+       "    9.           "race"  9.000000 #####\n",
+       "   10.  "education_num"  7.000000 ###\n",
+       "   11.            "sex"  4.000000 #\n",
+       "   12.     "occupation"  2.000000 \n",
+       "   13.      "workclass"  1.000000 \n",
+       "   14. "native_country"  1.000000 \n",
+       "
    1.     "occupation" 724.000000 ################\n",
+       "    2.         "fnlwgt" 513.000000 ###########\n",
+       "    3.            "age" 483.000000 ##########\n",
+       "    4.      "education" 464.000000 ##########\n",
+       "    5. "hours_per_week" 339.000000 #######\n",
+       "    6.   "capital_gain" 326.000000 ######\n",
+       "    7. "native_country" 306.000000 ######\n",
+       "    8.   "capital_loss" 297.000000 ######\n",
+       "    9.   "relationship" 262.000000 #####\n",
+       "   10.      "workclass" 244.000000 #####\n",
+       "   11. "marital_status" 210.000000 ####\n",
+       "   12.  "education_num" 82.000000 #\n",
+       "   13.            "sex" 42.000000 \n",
+       "   14.           "race" 21.000000 \n",
+       "
    1.   "relationship" 3014.690076 ################\n",
+       "    2.   "capital_gain" 2065.521668 ##########\n",
+       "    3.      "education" 1144.490954 ######\n",
+       "    4. "marital_status" 1111.389695 #####\n",
+       "    5.     "occupation" 1094.619502 #####\n",
+       "    6.  "education_num" 796.666823 ####\n",
+       "    7.   "capital_loss" 584.055066 ###\n",
+       "    8.            "age" 582.288569 ###\n",
+       "    9. "hours_per_week" 366.856509 #\n",
+       "   10. "native_country" 263.872689 #\n",
+       "   11.         "fnlwgt" 216.537764 #\n",
+       "   12.      "workclass" 196.085503 #\n",
+       "   13.            "sex" 47.217730 \n",
+       "   14.           "race"  5.428727 \n",
+       "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Analyse the model\n", + "model_group_AB.analyze(test_ds_group_B)" + ] + } + ], + "metadata": { + "colab": { + "last_runtime": { + "build_target": "", + "kind": "local" + }, + "private_outputs": true, + "provenance": [ + { + "file_id": "1LjF5dfXxeLAzb2epxADkd34MHoUN4aHt", + "timestamp": 1716900422680 + } + ] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/documentation/public/mkdocs.yml b/documentation/public/mkdocs.yml index 0b246796..18128006 100644 --- a/documentation/public/mkdocs.yml +++ b/documentation/public/mkdocs.yml @@ -66,6 +66,7 @@ nav: # TODO: boolean, and text - Deep learning: - with TensorFlow: tutorial/compose_with_tf.ipynb + - with Jax: tutorial/compose_with_jax.ipynb - Dataset: - Pandas Dataframe: tutorial/pandas.ipynb - TensorFlow Dataset: tutorial/tf_dataset.ipynb From 933e38def952688ce4ecc6f25966cc33ae0b30fc Mon Sep 17 00:00:00 2001 From: Mathieu Guillame-Bert Date: Mon, 17 Jun 2024 06:20:43 -0700 Subject: [PATCH 21/30] Anomaly detection; Isolation forest c++ model (part 3) PiperOrigin-RevId: 643984185 --- yggdrasil_decision_forests/model/BUILD | 1 + .../model/abstract_model.h | 5 +- yggdrasil_decision_forests/model/describe.cc | 4 +- .../model/isolation_forest/BUILD | 77 +++++ .../isolation_forest/isolation_forest.cc | 319 ++++++++++++++++++ .../model/isolation_forest/isolation_forest.h | 163 +++++++++ .../isolation_forest/isolation_forest.proto | 54 +++ .../isolation_forest/isolation_forest_test.cc | 301 +++++++++++++++++ .../test_data/dataset/README.md | 26 +- .../test_data/dataset/gaussians_test.csv | 281 +++++++++++++++ .../test_data/dataset/gaussians_train.csv | 281 +++++++++++++++ .../model/gaussians_anomaly_if/data_spec.pb | Bin 0 -> 134 bytes .../test_data/model/gaussians_anomaly_if/done | 0 .../model/gaussians_anomaly_if/header.pb | Bin 0 -> 67 bytes .../isolation_forest_header.pb | Bin 0 -> 23 bytes .../gaussians_anomaly_if/nodes-00000-of-00001 | Bin 0 -> 103016 bytes .../prediction/gaussians_anomaly_if_skl.csv | 280 +++++++++++++++ 17 files changed, 1788 insertions(+), 4 deletions(-) create mode 100644 yggdrasil_decision_forests/model/isolation_forest/BUILD create mode 100644 yggdrasil_decision_forests/model/isolation_forest/isolation_forest.cc create mode 100644 yggdrasil_decision_forests/model/isolation_forest/isolation_forest.h create mode 100644 yggdrasil_decision_forests/model/isolation_forest/isolation_forest.proto create mode 100644 yggdrasil_decision_forests/model/isolation_forest/isolation_forest_test.cc create mode 100644 yggdrasil_decision_forests/test_data/dataset/gaussians_test.csv create mode 100644 yggdrasil_decision_forests/test_data/dataset/gaussians_train.csv create mode 100644 yggdrasil_decision_forests/test_data/model/gaussians_anomaly_if/data_spec.pb create mode 100644 yggdrasil_decision_forests/test_data/model/gaussians_anomaly_if/done create mode 100644 yggdrasil_decision_forests/test_data/model/gaussians_anomaly_if/header.pb create mode 100644 yggdrasil_decision_forests/test_data/model/gaussians_anomaly_if/isolation_forest_header.pb create mode 100644 yggdrasil_decision_forests/test_data/model/gaussians_anomaly_if/nodes-00000-of-00001 create mode 100644 yggdrasil_decision_forests/test_data/prediction/gaussians_anomaly_if_skl.csv diff --git a/yggdrasil_decision_forests/model/BUILD b/yggdrasil_decision_forests/model/BUILD index a0e70fe9..f48020fb 100644 --- a/yggdrasil_decision_forests/model/BUILD +++ b/yggdrasil_decision_forests/model/BUILD @@ -13,6 +13,7 @@ cc_library_ydf( name = "all_models", deps = [ "//yggdrasil_decision_forests/model/gradient_boosted_trees", + "//yggdrasil_decision_forests/model/isolation_forest", "//yggdrasil_decision_forests/model/random_forest", "//yggdrasil_decision_forests/serving/decision_forest:register_engines", ], diff --git a/yggdrasil_decision_forests/model/abstract_model.h b/yggdrasil_decision_forests/model/abstract_model.h index 1fbaa731..6f88dc42 100644 --- a/yggdrasil_decision_forests/model/abstract_model.h +++ b/yggdrasil_decision_forests/model/abstract_model.h @@ -150,7 +150,10 @@ class AbstractModel { // Get the model target column. int label_col_idx() const { return label_col_idx_; } - // Name of the label column. + // Tests if the model has a label. + bool has_label() const { return label_col_idx_ != -1; } + + // Name of the label column. Should only be called if "has_label()" is true. std::string label() const { DCHECK_GE(label_col_idx_, 0); DCHECK_LT(label_col_idx_, data_spec_.columns_size()); diff --git a/yggdrasil_decision_forests/model/describe.cc b/yggdrasil_decision_forests/model/describe.cc index c6a192d3..9a47caa7 100644 --- a/yggdrasil_decision_forests/model/describe.cc +++ b/yggdrasil_decision_forests/model/describe.cc @@ -68,7 +68,9 @@ utils::html::Html Model(const model::AbstractModel& model) { h::Html content; AddKeyValue(&content, "Name", model.name()); AddKeyValue(&content, "Task", proto::Task_Name(model.task())); - AddKeyValue(&content, "Label", model.label()); + if (model.has_label()) { + AddKeyValue(&content, "Label", model.label()); + } if (model.ranking_group_col_idx() != -1) { AddKeyValue( diff --git a/yggdrasil_decision_forests/model/isolation_forest/BUILD b/yggdrasil_decision_forests/model/isolation_forest/BUILD new file mode 100644 index 00000000..686f7545 --- /dev/null +++ b/yggdrasil_decision_forests/model/isolation_forest/BUILD @@ -0,0 +1,77 @@ +load("//yggdrasil_decision_forests/utils:compile.bzl", "all_proto_library", "cc_library_ydf") + +package( + default_visibility = ["//visibility:public"], + licenses = ["notice"], +) + +# Library +# ======= + +cc_library_ydf( + name = "isolation_forest", + srcs = ["isolation_forest.cc"], + hdrs = ["isolation_forest.h"], + deps = [ + ":isolation_forest_cc_proto", + "//yggdrasil_decision_forests/dataset:example_cc_proto", + "//yggdrasil_decision_forests/dataset:types", + "//yggdrasil_decision_forests/dataset:vertical_dataset", + "//yggdrasil_decision_forests/metric:metric_cc_proto", + "//yggdrasil_decision_forests/model:abstract_model", + "//yggdrasil_decision_forests/model:abstract_model_cc_proto", + "//yggdrasil_decision_forests/model:prediction_cc_proto", + "//yggdrasil_decision_forests/model/decision_tree", + "//yggdrasil_decision_forests/model/decision_tree:decision_forest_interface", + "//yggdrasil_decision_forests/model/decision_tree:decision_tree_cc_proto", + "//yggdrasil_decision_forests/utils:filesystem", + "//yggdrasil_decision_forests/utils:logging", + "//yggdrasil_decision_forests/utils:status_macros", + "@com_google_absl//absl/status", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:optional", + "@com_google_absl//absl/types:span", + ], + alwayslink = 1, +) + +# Proto +# ======== + +all_proto_library( + name = "isolation_forest_proto", + srcs = ["isolation_forest.proto"], + deps = [ + "//yggdrasil_decision_forests/metric:metric_proto", + "//yggdrasil_decision_forests/model:abstract_model_proto", + ], +) + +# Test +# ======== + +cc_test( + name = "isolation_forest_test", + srcs = ["isolation_forest_test.cc"], + data = ["//yggdrasil_decision_forests/test_data"], + deps = [ + ":isolation_forest", + "//yggdrasil_decision_forests/dataset:csv_example_reader", + "//yggdrasil_decision_forests/dataset:data_spec", + "//yggdrasil_decision_forests/dataset:data_spec_cc_proto", + "//yggdrasil_decision_forests/dataset:vertical_dataset", + "//yggdrasil_decision_forests/dataset:vertical_dataset_io", + "//yggdrasil_decision_forests/model:model_library", + "//yggdrasil_decision_forests/model:prediction_cc_proto", + "//yggdrasil_decision_forests/model/decision_tree", + "//yggdrasil_decision_forests/model/decision_tree:builder", + "//yggdrasil_decision_forests/utils:filesystem", + "//yggdrasil_decision_forests/utils:logging", + "//yggdrasil_decision_forests/utils:protobuf", + "//yggdrasil_decision_forests/utils:test", + "//yggdrasil_decision_forests/utils:testing_macros", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:span", + "@com_google_googletest//:gtest_main", + ], +) diff --git a/yggdrasil_decision_forests/model/isolation_forest/isolation_forest.cc b/yggdrasil_decision_forests/model/isolation_forest/isolation_forest.cc new file mode 100644 index 00000000..d8415d78 --- /dev/null +++ b/yggdrasil_decision_forests/model/isolation_forest/isolation_forest.cc @@ -0,0 +1,319 @@ +/* + * Copyright 2022 Google LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "yggdrasil_decision_forests/model/isolation_forest/isolation_forest.h" + +#include + +#include +#include +#include +#include +#include +#include + +#include "absl/status/status.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "absl/types/span.h" +#include "yggdrasil_decision_forests/dataset/example.pb.h" +#include "yggdrasil_decision_forests/dataset/types.h" +#include "yggdrasil_decision_forests/dataset/vertical_dataset.h" +#include "yggdrasil_decision_forests/metric/metric.pb.h" +#include "yggdrasil_decision_forests/model/abstract_model.h" +#include "yggdrasil_decision_forests/model/abstract_model.pb.h" +#include "yggdrasil_decision_forests/model/decision_tree/decision_tree.h" +#include "yggdrasil_decision_forests/model/decision_tree/decision_tree.pb.h" +#include "yggdrasil_decision_forests/model/decision_tree/decision_tree_io.h" +#include "yggdrasil_decision_forests/model/decision_tree/structure_analysis.h" +#include "yggdrasil_decision_forests/model/isolation_forest/isolation_forest.pb.h" +#include "yggdrasil_decision_forests/model/prediction.pb.h" +#include "yggdrasil_decision_forests/utils/filesystem.h" +#include "yggdrasil_decision_forests/utils/logging.h" +#include "yggdrasil_decision_forests/utils/status_macros.h" + +namespace yggdrasil_decision_forests::model::isolation_forest { + +namespace { +// Basename for the shards containing the nodes. +constexpr char kNodeBaseFilename[] = "nodes"; +// Filename containing the isolation forest header. +constexpr char kHeaderBaseFilename[] = "isolation_forest_header.pb"; + +} // namespace + +float PreissAveragePathLength(UnsignedExampleIdx num_examples) { + DCHECK_GT(num_examples, 0); + const float num_examples_float = static_cast(num_examples); + + // Harmonic number + // This is the approximation proposed in "Isolation Forest" by Liu et al. + const auto H = [](const float x) { + constexpr float euler_constant = 0.5772156649f; + return std::log(x) + euler_constant; + }; + + if (num_examples > 2) { + return 2.f * H(num_examples_float - 1.f) - + 2.f * (num_examples_float - 1.f) / num_examples_float; + } else if (num_examples == 2) { + return 1.f; + } else { + return 0.f; // To be safe. + } +} + +float IsolationForestPredictionFromDenominator(const float average_h, + const float denominator) { + if (denominator == 0.f) { + return 0.f; + } + const float term = -average_h / denominator; + return std::pow(2.f, term); +} + +float IsolationForestPrediction(const float average_h, + const UnsignedExampleIdx num_examples) { + return IsolationForestPredictionFromDenominator( + average_h, PreissAveragePathLength(num_examples)); +} + +proto::Header IsolationForestModel::BuildHeaderProto() const { + proto::Header header; + header.set_num_trees(decision_trees_.size()); + header.mutable_isolation_forest(); + header.set_num_examples_per_trees(num_examples_per_trees_); + return header; +} + +void IsolationForestModel::ApplyHeaderProto(const proto::Header& header) { + num_examples_per_trees_ = header.num_examples_per_trees(); +} + +absl::Status IsolationForestModel::Save( + absl::string_view directory, const ModelIOOptions& io_options) const { + RETURN_IF_ERROR(file::RecursivelyCreateDir(directory, file::Defaults())); + RETURN_IF_ERROR(ValidateModelIOOptions(io_options)); + + // Format used to store the nodes. + std::string format; + if (node_format_.has_value()) { + format = node_format_.value(); + } else { + ASSIGN_OR_RETURN(format, decision_tree::RecommendedSerializationFormat()); + } + + int num_shards; + const auto node_base_filename = + absl::StrCat(io_options.file_prefix.value(), kNodeBaseFilename); + RETURN_IF_ERROR(decision_tree::SaveTreesToDisk( + directory, node_base_filename, decision_trees_, format, &num_shards)); + + auto header = BuildHeaderProto(); + header.set_node_format(format); + header.set_num_node_shards(num_shards); + + const auto header_filename = + absl::StrCat(io_options.file_prefix.value(), kHeaderBaseFilename); + RETURN_IF_ERROR(file::SetBinaryProto( + file::JoinPath(directory, header_filename), header, file::Defaults())); + return absl::OkStatus(); +} + +absl::Status IsolationForestModel::Load(absl::string_view directory, + const ModelIOOptions& io_options) { + RETURN_IF_ERROR(ValidateModelIOOptions(io_options)); + + proto::Header header; + decision_trees_.clear(); + const auto header_filename = + absl::StrCat(io_options.file_prefix.value(), kHeaderBaseFilename); + RETURN_IF_ERROR(file::GetBinaryProto( + file::JoinPath(directory, header_filename), &header, file::Defaults())); + const auto node_base_filename = + absl::StrCat(io_options.file_prefix.value(), kNodeBaseFilename); + RETURN_IF_ERROR(decision_tree::LoadTreesFromDisk( + directory, node_base_filename, header.num_node_shards(), + header.num_trees(), header.node_format(), &decision_trees_)); + node_format_ = header.node_format(); + ApplyHeaderProto(header); + return absl::OkStatus(); +} + +absl::Status IsolationForestModel::SerializeModelImpl( + model::proto::SerializedModel* dst_proto, std::string* dst_raw) const { + const auto& specialized_proto = dst_proto->MutableExtension( + isolation_forest::proto::isolation_forest_serialized_model); + *specialized_proto->mutable_header() = BuildHeaderProto(); + if (node_format_.has_value()) { + specialized_proto->mutable_header()->set_node_format(node_format_.value()); + } + ASSIGN_OR_RETURN(*dst_raw, decision_tree::SerializeTrees(decision_trees_)); + return absl::OkStatus(); +} + +absl::Status IsolationForestModel::DeserializeModelImpl( + const model::proto::SerializedModel& src_proto, absl::string_view src_raw) { + const auto& specialized_proto = src_proto.GetExtension( + isolation_forest::proto::isolation_forest_serialized_model); + ApplyHeaderProto(specialized_proto.header()); + if (specialized_proto.header().has_node_format()) { + node_format_ = specialized_proto.header().node_format(); + } + return decision_tree::DeserializeTrees( + src_raw, specialized_proto.header().num_trees(), &decision_trees_); +} + +absl::Status IsolationForestModel::Validate() const { + RETURN_IF_ERROR(AbstractModel::Validate()); + if (decision_trees_.empty()) { + return absl::InvalidArgumentError("Empty isolation forest"); + } + if (task_ != model::proto::Task::ANOMALY_DETECTION) { + return absl::InvalidArgumentError("Wrong task"); + } + return absl::OkStatus(); +} + +absl::optional IsolationForestModel::ModelSizeInBytes() const { + return AbstractAttributesSizeInBytes() + + decision_tree::EstimateSizeInByte(decision_trees_); +} + +void IsolationForestModel::PredictLambda( + std::function + get_leaf, + model::proto::Prediction* prediction) const { + float sum_h = 0.0; + for (const auto& tree : decision_trees_) { + const auto& leaf = get_leaf(*tree); + const auto num_examples = + leaf.node().anomaly_detection().num_examples_without_weight(); + sum_h += leaf.depth() + PreissAveragePathLength(num_examples); + } + + if (!decision_trees_.empty()) { + sum_h /= decision_trees_.size(); + } + DCHECK_GT(num_examples_per_trees_, 0); + const float p = IsolationForestPrediction( + /*average_h=*/sum_h, + /*num_examples=*/num_examples_per_trees_); + prediction->mutable_anomaly_detection()->set_value(p); +} + +void IsolationForestModel::Predict(const dataset::VerticalDataset& dataset, + dataset::VerticalDataset::row_t row_idx, + model::proto::Prediction* prediction) const { + PredictLambda( + [&](const decision_tree::DecisionTree& tree) + -> const decision_tree::NodeWithChildren& { + return tree.GetLeafAlt(dataset, row_idx); + }, + prediction); +} + +void IsolationForestModel::Predict(const dataset::proto::Example& example, + model::proto::Prediction* prediction) const { + PredictLambda( + [&](const decision_tree::DecisionTree& tree) + -> const decision_tree::NodeWithChildren& { + return tree.GetLeafAlt(example); + }, + prediction); +} + +absl::Status IsolationForestModel::PredictGetLeaves( + const dataset::VerticalDataset& dataset, + dataset::VerticalDataset::row_t row_idx, absl::Span leaves) const { + if (leaves.size() != num_trees()) { + return absl::InvalidArgumentError("Wrong number of trees"); + } + for (int tree_idx = 0; tree_idx < decision_trees_.size(); tree_idx++) { + auto& leaf = decision_trees_[tree_idx]->GetLeafAlt(dataset, row_idx); + if (leaf.leaf_idx() < 0) { + return absl::InvalidArgumentError("Leaf idx not set"); + } + leaves[tree_idx] = leaf.leaf_idx(); + } + return absl::OkStatus(); +} + +bool IsolationForestModel::CheckStructure( + const decision_tree::CheckStructureOptions& options) const { + return decision_tree::CheckStructure(options, data_spec(), decision_trees_); +} + +void IsolationForestModel::AppendDescriptionAndStatistics( + bool full_definition, std::string* description) const { + AbstractModel::AppendDescriptionAndStatistics(full_definition, description); + absl::StrAppend(description, "\n"); + StrAppendForestStructureStatistics(data_spec(), decision_trees(), + description); + absl::StrAppend(description, + "Node format: ", node_format_.value_or("NOT_SET"), "\n"); + + absl::StrAppend(description, + "Number of examples per tree: ", num_examples_per_trees_, + "\n"); + + if (full_definition) { + absl::StrAppend(description, "\nModel Structure:\n"); + decision_tree::AppendModelStructure(decision_trees_, data_spec(), + label_col_idx_, description); + } +} + +absl::Status IsolationForestModel::MakePureServing() { + for (auto& tree : decision_trees_) { + tree->IterateOnMutableNodes( + [](decision_tree::NodeWithChildren* node, const int depth) { + if (!node->IsLeaf()) { + // Remove the label information from the non-leaf nodes. + node->mutable_node()->clear_output(); + } + }); + } + return AbstractModel::MakePureServing(); +} + +absl::Status IsolationForestModel::Distance( + const dataset::VerticalDataset& dataset1, + const dataset::VerticalDataset& dataset2, + absl::Span distances) const { + return decision_tree::Distance(decision_trees(), dataset1, dataset2, + distances); +} + +std::string IsolationForestModel::DebugCompare( + const AbstractModel& other) const { + if (const auto parent_compare = AbstractModel::DebugCompare(other); + !parent_compare.empty()) { + return parent_compare; + } + const auto* other_cast = dynamic_cast(&other); + if (!other_cast) { + return "Non matching types"; + } + return decision_tree::DebugCompare( + data_spec_, label_col_idx_, decision_trees_, other_cast->decision_trees_); +} + +REGISTER_AbstractModel(IsolationForestModel, + IsolationForestModel::kRegisteredName); + +} // namespace yggdrasil_decision_forests::model::isolation_forest diff --git a/yggdrasil_decision_forests/model/isolation_forest/isolation_forest.h b/yggdrasil_decision_forests/model/isolation_forest/isolation_forest.h new file mode 100644 index 00000000..002a0b1d --- /dev/null +++ b/yggdrasil_decision_forests/model/isolation_forest/isolation_forest.h @@ -0,0 +1,163 @@ +/* + * Copyright 2022 Google LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef YGGDRASIL_DECISION_FORESTS_MODEL_ISOLATION_FOREST_ISOLATION_FOREST_H_ +#define YGGDRASIL_DECISION_FORESTS_MODEL_ISOLATION_FOREST_ISOLATION_FOREST_H_ + +#include + +#include +#include +#include +#include +#include +#include + +#include "absl/status/status.h" +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "absl/types/span.h" +#include "yggdrasil_decision_forests/dataset/example.pb.h" +#include "yggdrasil_decision_forests/dataset/types.h" +#include "yggdrasil_decision_forests/dataset/vertical_dataset.h" +#include "yggdrasil_decision_forests/metric/metric.pb.h" +#include "yggdrasil_decision_forests/model/abstract_model.h" +#include "yggdrasil_decision_forests/model/abstract_model.pb.h" +#include "yggdrasil_decision_forests/model/decision_tree/decision_forest_interface.h" +#include "yggdrasil_decision_forests/model/decision_tree/decision_tree.h" +#include "yggdrasil_decision_forests/model/isolation_forest/isolation_forest.pb.h" +#include "yggdrasil_decision_forests/model/prediction.pb.h" + +namespace yggdrasil_decision_forests::model::isolation_forest { + +class IsolationForestModel : public AbstractModel, + public DecisionForestInterface { + public: + inline static constexpr char kRegisteredName[] = "ISOLATION_FOREST"; + + IsolationForestModel() : AbstractModel(kRegisteredName) {} + + void Predict(const dataset::VerticalDataset& dataset, + dataset::VerticalDataset::row_t row_idx, + model::proto::Prediction* prediction) const override; + + void Predict(const dataset::proto::Example& example, + model::proto::Prediction* prediction) const override; + + absl::Status PredictGetLeaves(const dataset::VerticalDataset& dataset, + dataset::VerticalDataset::row_t row_idx, + absl::Span leaves) const override; + + bool CheckStructure( + const decision_tree::CheckStructureOptions& options) const override; + + absl::optional ModelSizeInBytes() const override; + + void AppendDescriptionAndStatistics(bool full_definition, + std::string* description) const override; + + absl::Status MakePureServing() override; + + absl::Status Distance(const dataset::VerticalDataset& dataset1, + const dataset::VerticalDataset& dataset2, + absl::Span distances) const override; + + int num_trees() const override { return decision_trees_.size(); } + + // For the serving engines. + // TODO: Move in DecisionForestInterface. + size_t NumTrees() const { return num_trees(); } + int64_t NumNodes() const { + return decision_tree::NumberOfNodes(decision_trees_); + } + void CountFeatureUsage( + std::unordered_map* feature_usage) const { + for (const auto& tree : decision_trees_) { + tree->CountFeatureUsage(feature_usage); + } + } + + const std::vector>& + decision_trees() const override { + return decision_trees_; + } + + std::vector>* + mutable_decision_trees() override { + return &decision_trees_; + } + + void set_node_format(const absl::optional& format) override { + node_format_ = format; + } + + void set_num_examples_per_trees(int64_t value) { + num_examples_per_trees_ = value; + } + + int64_t num_examples_per_trees() const { return num_examples_per_trees_; } + + std::string DebugCompare(const AbstractModel& other) const override; + + absl::Status Save(absl::string_view directory, + const ModelIOOptions& io_options) const override; + + absl::Status Load(absl::string_view directory, + const ModelIOOptions& io_options) override; + + absl::Status Validate() const override; + + private: + void PredictLambda(std::function + get_leaf, + model::proto::Prediction* prediction) const; + + // The decision trees. + std::vector> decision_trees_; + + // Node storage format. + absl::optional node_format_; + + absl::Status SerializeModelImpl(model::proto::SerializedModel* dst_proto, + std::string* dst_raw) const override; + + absl::Status DeserializeModelImpl( + const model::proto::SerializedModel& src_proto, + absl::string_view src_raw) override; + + proto::Header BuildHeaderProto() const; + void ApplyHeaderProto(const proto::Header& header); + + // Number of examples used to grow each tree. + int64_t num_examples_per_trees_ = -1; +}; + +// Analytical expected number of examples in a binary tree trained with +// "num_examples" examples. Called "c" in "Isolation-Based Anomaly Detection" by +// Liu et al. +float PreissAveragePathLength(UnsignedExampleIdx num_examples); + +// Isolation forest prediction. +float IsolationForestPrediction(float average_h, + UnsignedExampleIdx num_examples); + +// Isolation forest prediction, from the pre-computed denominator. +float IsolationForestPredictionFromDenominator(float average_h, + float denominator); + +} // namespace yggdrasil_decision_forests::model::isolation_forest + +#endif // YGGDRASIL_DECISION_FORESTS_MODEL_ISOLATION_FOREST_ISOLATION_FOREST_H_ diff --git a/yggdrasil_decision_forests/model/isolation_forest/isolation_forest.proto b/yggdrasil_decision_forests/model/isolation_forest/isolation_forest.proto new file mode 100644 index 00000000..f3cb8d26 --- /dev/null +++ b/yggdrasil_decision_forests/model/isolation_forest/isolation_forest.proto @@ -0,0 +1,54 @@ +/* + * Copyright 2022 Google LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = "proto2"; + +package yggdrasil_decision_forests.model.isolation_forest.proto; + +import "yggdrasil_decision_forests/model/abstract_model.proto"; + +// An isolation forest as defined in "Isolation-Based Anomaly Detection" by Liu +// et al. (2012). In this case, the prediction value is the node depth. +message IsolationForestAnomalityScore {} + +// Header for the isolation forest model. +message Header { + // Next ID: 6 + + // Number of shards used to store the nodes. + optional int32 num_node_shards = 1; + + // Number of trees. + optional int64 num_trees = 2; + + // Container used to store the trees' nodes. + optional string node_format = 3 [default = "TFE_RECORDIO"]; + + // Number of examples used to grow each tree. + optional int64 num_examples_per_trees = 4; + + oneof anomality_score { + IsolationForestAnomalityScore isolation_forest = 5; + } +} + +message IsolationForestSerializedModel { + optional Header header = 1; +} + +extend model.proto.SerializedModel { + optional IsolationForestSerializedModel isolation_forest_serialized_model = + 1003; +} diff --git a/yggdrasil_decision_forests/model/isolation_forest/isolation_forest_test.cc b/yggdrasil_decision_forests/model/isolation_forest/isolation_forest_test.cc new file mode 100644 index 00000000..26839376 --- /dev/null +++ b/yggdrasil_decision_forests/model/isolation_forest/isolation_forest_test.cc @@ -0,0 +1,301 @@ +/* + * Copyright 2022 Google LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include "yggdrasil_decision_forests/model/isolation_forest/isolation_forest.h" + +#include +#include +#include +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/str_cat.h" +#include "absl/types/span.h" +#include "yggdrasil_decision_forests/dataset/data_spec.h" +#include "yggdrasil_decision_forests/dataset/data_spec.pb.h" +#include "yggdrasil_decision_forests/dataset/vertical_dataset.h" +#include "yggdrasil_decision_forests/dataset/vertical_dataset_io.h" +#include "yggdrasil_decision_forests/model/decision_tree/builder.h" +#include "yggdrasil_decision_forests/model/decision_tree/decision_tree.h" +#include "yggdrasil_decision_forests/model/model_library.h" +#include "yggdrasil_decision_forests/model/prediction.pb.h" +#include "yggdrasil_decision_forests/utils/filesystem.h" +#include "yggdrasil_decision_forests/utils/logging.h" +#include "yggdrasil_decision_forests/utils/protobuf.h" +#include "yggdrasil_decision_forests/utils/test.h" +#include "yggdrasil_decision_forests/utils/testing_macros.h" + +namespace yggdrasil_decision_forests::model::isolation_forest { +namespace { + +const float kEpsilon = 0.00001f; + +using ::testing::ElementsAre; +using ::yggdrasil_decision_forests::test::EqualsProto; + +std::string TestDataDir() { + return file::JoinPath(test::DataRootDirectory(), + "yggdrasil_decision_forests/test_data"); +} + +std::unique_ptr CreateToyModel() { + auto model = std::make_unique(); + model->set_task(model::proto::Task::ANOMALY_DETECTION); + model->set_num_examples_per_trees(10); + + dataset::AddNumericalColumn("a", model->mutable_data_spec()); + dataset::AddNumericalColumn("b", model->mutable_data_spec()); + dataset::AddNumericalColumn("c", model->mutable_data_spec()); + + model->mutable_input_features()->push_back(0); + model->mutable_input_features()->push_back(1); + model->mutable_input_features()->push_back(2); + + // Tree 1 + { + auto tree = std::make_unique(); + decision_tree::TreeBuilder root(tree.get()); + auto [nl1, l1] = root.ConditionIsGreater(0, 0); + auto [l2, nl2] = nl1.ConditionIsGreater(1, 1); + auto [l3, l4] = nl2.ConditionIsGreater(0, 1); + l1.LeafAnomalyDetection(2); + l2.LeafAnomalyDetection(4); + l3.LeafAnomalyDetection(2); + l4.LeafAnomalyDetection(2); + tree->SetLeafIndices(); + model->mutable_decision_trees()->push_back(std::move(tree)); + } + + // Tree 2 + { + auto tree = std::make_unique(); + decision_tree::TreeBuilder root(tree.get()); + auto [l1, l2] = root.ConditionIsGreater(1, 1); + l1.LeafAnomalyDetection(5); + l2.LeafAnomalyDetection(5); + tree->SetLeafIndices(); + model->mutable_decision_trees()->push_back(std::move(tree)); + } + + return model; +} + +dataset::VerticalDataset CreateToyDataset( + const dataset::proto::DataSpecification& dataspec) { + dataset::VerticalDataset dataset; + dataset.set_data_spec(dataspec); + EXPECT_OK(dataset.CreateColumnsFromDataspec()); + dataset.AppendExample({{"a", "-1"}, {"b", "0"}, {"c", "0"}}); + dataset.AppendExample({{"a", "0.5"}, {"b", "0"}, {"c", "0"}}); + dataset.AppendExample({{"a", "1.5"}, {"b", "0"}, {"c", "0"}}); + dataset.AppendExample({{"a", "1.5"}, {"b", "2"}, {"c", "1"}}); + return dataset; +} + +TEST(IsolationForest, PreissAveragePathLength) { + EXPECT_EQ(PreissAveragePathLength(1), 0.f); + EXPECT_EQ(PreissAveragePathLength(2), 1.f); + /* + H = lambda x: math.log(x) + 0.5772156649 + 2 * H(3 - 1) - 2 * (3 - 1) / 3 + >> 1.207392357586557 + */ + EXPECT_NEAR(PreissAveragePathLength(3), 1.20739233f, kEpsilon); +} + +TEST(IsolationForest, IsolationForestPrediction) { + EXPECT_NEAR(IsolationForestPrediction(0, 4), 1.f, kEpsilon); + + /* + 2**( - 1 / (2 * H(4 - 1) - 2 * (4 - 1) / 4)) + >> 0.6877436677784063 + 2**( - 2 / (2 * H(4 - 1) - 2 * (4 - 1) / 4)) + >> 0.472991352569295 + 2**( - 2 / (2 * H(8 - 1) - 2 * (8 - 1) / 8)) + >> 0.6566744390877336 + */ + EXPECT_NEAR(IsolationForestPrediction(1, 4), 0.687743664f, kEpsilon); + EXPECT_NEAR(IsolationForestPrediction(2, 4), 0.472991377f, kEpsilon); + EXPECT_NEAR(IsolationForestPrediction(2, 8), 0.656674445f, kEpsilon); +} + +TEST(IsolationForest, Description) { + const auto model = CreateToyModel(); + const std::string description = model->DescriptionAndStatistics(true); + EXPECT_THAT(description, testing::HasSubstr(R"( +Tree #0: + "a">=0 [s:0 n:0 np:0 miss:0] + ├─(pos)─ "b">=1 [s:0 n:0 np:0 miss:0] + | ├─(pos)─ count:4 + | └─(neg)─ "a">=1 [s:0 n:0 np:0 miss:0] + | ├─(pos)─ count:2 + | └─(neg)─ count:2 + └─(neg)─ count:2 + +Tree #1: + "b">=1 [s:0 n:0 np:0 miss:0] + ├─(pos)─ count:5 + └─(neg)─ count:5 +)")); +} + +TEST(IsolationForest, Serialize) { + const auto original_model = CreateToyModel(); + ASSERT_OK_AND_ASSIGN(std::string serialized_model, + SerializeModel(*original_model)); + ASSERT_OK_AND_ASSIGN(const auto loaded_model, + DeserializeModel(serialized_model)); + EXPECT_EQ(original_model->DebugCompare(*loaded_model), ""); +} + +TEST(IsolationForest, PredictGetLeaves) { + const auto model = CreateToyModel(); + const auto dataset = CreateToyDataset(model->data_spec()); + std::vector leaves(model->num_trees()); + EXPECT_OK(model->PredictGetLeaves(dataset, 0, absl::MakeSpan(leaves))); + EXPECT_THAT(leaves, ElementsAre(0, 0)); + + EXPECT_OK(model->PredictGetLeaves(dataset, 1, absl::MakeSpan(leaves))); + EXPECT_THAT(leaves, ElementsAre(1, 0)); + + EXPECT_OK(model->PredictGetLeaves(dataset, 2, absl::MakeSpan(leaves))); + EXPECT_THAT(leaves, ElementsAre(2, 0)); + + EXPECT_OK(model->PredictGetLeaves(dataset, 3, absl::MakeSpan(leaves))); + EXPECT_THAT(leaves, ElementsAre(3, 1)); +} + +TEST(IsolationForest, PredictVerticalDataset) { + const auto model = CreateToyModel(); + const auto dataset = CreateToyDataset(model->data_spec()); + model::proto::Prediction prediction; + + model->Predict(dataset, 0, &prediction); + EXPECT_THAT(prediction, + EqualsProto(utils::ParseTextProto( + "anomaly_detection { value: 0.6111162 }") + .value())); + + model->Predict(dataset, 1, &prediction); + EXPECT_THAT(prediction, + EqualsProto(utils::ParseTextProto( + "anomaly_detection { value: 0.5079549 }") + .value())); + + model->Predict(dataset, 2, &prediction); + EXPECT_THAT(prediction, + EqualsProto(utils::ParseTextProto( + "anomaly_detection { value: 0.5079549 }") + .value())); + + model->Predict(dataset, 3, &prediction); + EXPECT_THAT(prediction, + EqualsProto(utils::ParseTextProto( + "anomaly_detection { value: 0.51496893 }") + .value())); +} + +TEST(IsolationForest, PredictExampleProto) { + const auto model = CreateToyModel(); + const auto dataset = CreateToyDataset(model->data_spec()); + + dataset::proto::Example example; + model::proto::Prediction prediction; + + dataset.ExtractExample(0, &example); + model->Predict(example, &prediction); + EXPECT_THAT(prediction, + EqualsProto(utils::ParseTextProto( + "anomaly_detection { value: 0.6111162 }") + .value())); + + dataset.ExtractExample(1, &example); + model->Predict(example, &prediction); + EXPECT_THAT(prediction, + EqualsProto(utils::ParseTextProto( + "anomaly_detection { value: 0.5079549 }") + .value())); + + dataset.ExtractExample(2, &example); + model->Predict(example, &prediction); + EXPECT_THAT(prediction, + EqualsProto(utils::ParseTextProto( + "anomaly_detection { value: 0.5079549 }") + .value())); + + dataset.ExtractExample(3, &example); + model->Predict(example, &prediction); + EXPECT_THAT(prediction, + EqualsProto(utils::ParseTextProto( + "anomaly_detection { value: 0.51496893 }") + .value())); +} + +TEST(IsolationForest, Distance) { + const auto model = CreateToyModel(); + const auto dataset = CreateToyDataset(model->data_spec()); + ASSERT_OK_AND_ASSIGN(const auto dataset_extract, + dataset.Extract(std::vector{0, 1})); + + std::vector distances(4); + ASSERT_OK(model->Distance(dataset_extract, dataset_extract, + absl::MakeSpan(distances))); + EXPECT_THAT(distances, ElementsAre(0, 0.5, 0.5, 0)); +} + +TEST(IsolationForest, PredictGolden) { + // The model, dataset, and golden predictions have been generated in the test + // "test_import_anomaly_detection_model" in + // ydf/model/sklearn_model_test.py + ASSERT_OK_AND_ASSIGN(const auto model, + model::LoadModel(file::JoinPath( + TestDataDir(), "model", "gaussians_anomaly_if"))); + dataset::VerticalDataset dataset; + ASSERT_OK(dataset::LoadVerticalDataset( + absl::StrCat("csv:", file::JoinPath(TestDataDir(), "dataset", + "gaussians_test.csv")), + model->data_spec(), &dataset)); + + YDF_LOG(INFO) << "Model:\n" << model->DescriptionAndStatistics(true); + + // Those predictions have been checked with sklearn implementation. + model::proto::Prediction prediction; + + model->Predict(dataset, 0, &prediction); + EXPECT_NEAR(prediction.anomaly_detection().value(), 4.192874686491115943e-01, + kEpsilon); + + model->Predict(dataset, 1, &prediction); + EXPECT_NEAR(prediction.anomaly_detection().value(), 4.414360433426349206e-01, + kEpsilon); + + model->Predict(dataset, 2, &prediction); + EXPECT_NEAR(prediction.anomaly_detection().value(), 5.071637878193088200e-01, + kEpsilon); + + model->Predict(dataset, 3, &prediction); + EXPECT_NEAR(prediction.anomaly_detection().value(), 4.252762996248650729e-01, + kEpsilon); + + model->Predict(dataset, 4, &prediction); + EXPECT_NEAR(prediction.anomaly_detection().value(), 3.864382268322048009e-01, + kEpsilon); +} + +} // namespace +} // namespace yggdrasil_decision_forests::model::isolation_forest diff --git a/yggdrasil_decision_forests/test_data/dataset/README.md b/yggdrasil_decision_forests/test_data/dataset/README.md index efd50686..0561f158 100644 --- a/yggdrasil_decision_forests/test_data/dataset/README.md +++ b/yggdrasil_decision_forests/test_data/dataset/README.md @@ -12,7 +12,8 @@ Donors: Ronny Kohavi and Barry Becker Full name: Molecular Biology (Splice-junction Gene Sequences) Data Set -Url: https://archive.ics.uci.edu/ml/datasets/Molecular+Biology+(Splice-junction+Gene+Sequences) +Url: +https://archive.ics.uci.edu/ml/datasets/Molecular+Biology+(Splice-junction+Gene+Sequences) Donors: G. Towell, M. Noordewier, and J. Shavlik @@ -74,7 +75,6 @@ bazel run -c opt --copt=-mavx2 //third_party/yggdrasil_decision_forests/cli/util --ratio_test=0.2 ``` - ## Sim PTE Full name: Simulations for Personalized Treatment Effects @@ -106,3 +106,25 @@ test$ts = NULL write.csv(train,"yggdrasil_decision_forests/test_data/dataset/sim_pte_train.csv", row.names=F, quote=F) write.csv(test,"yggdrasil_decision_forests/test_data/dataset/sim_pte_test.csv", row.names=F, quote=F) ``` + +## Gaussians + +Generate two gaussians for anomaly detection similarly as: +https://scikit-learn.org/stable/auto_examples/ensemble/plot_isolation_forest.html + +```python +def gen_ds(n_samples: int = 120, n_outliers: int = 40, seed: int = 0): + np.random.seed(seed) + covariance = np.array([[0.5, -0.1], [0.7, 0.4]]) + cluster_1 = 0.4 * np.random.randn(n_samples, 2) @ covariance + np.array( + [2, 2] + ) + cluster_2 = 0.3 * np.random.randn(n_samples, 2) + np.array([-2, -2]) + outliers = np.random.uniform(low=-4, high=4, size=(n_outliers, 2)) + features = np.concatenate([cluster_1, cluster_2, outliers]) + labels = np.concatenate([ + np.zeros((2 * n_samples), dtype=bool), + np.ones((n_outliers), dtype=bool), + ]) + return features, labels +``` diff --git a/yggdrasil_decision_forests/test_data/dataset/gaussians_test.csv b/yggdrasil_decision_forests/test_data/dataset/gaussians_test.csv new file mode 100644 index 00000000..81ffc35f --- /dev/null +++ b/yggdrasil_decision_forests/test_data/dataset/gaussians_test.csv @@ -0,0 +1,281 @@ +features.0_of_2,features.1_of_2,label +2.153577276910627,1.8371451592694583,False +1.5939344353435811,1.8494518905455508,False +1.5286506907384565,1.5971375033261677,False +2.1358244205926673,1.8084144252881242,False +1.9939841140777448,1.9473391760813303,False +1.7155821887496516,1.6118931689985763,False +1.827981339890142,1.9514479914135938,False +1.9187843336191588,1.7786666195363376,False +1.7197140014719288,1.8664397814345979,False +2.1716310091835487,2.091561884325908,False +2.100398803312508,2.227180560662855,False +2.321016559011082,2.044335465400587,False +1.9887273892840691,1.8545693045615301,False +1.713406513303731,1.855192499539295,False +2.094921914761489,2.09557239786315,False +1.7505768621352644,1.964185865772056,False +1.6259078803564386,1.8922540053649888,False +1.8622046861353438,2.0248235094064486,False +1.8421743256617302,2.082198925596146,False +2.539732800383628,2.0523349786079788,False +1.713096779583923,1.8656527878408908,False +2.3244556295376015,2.300679067914473,False +1.8318027699157868,1.8960483863578623,False +2.6262545351475692,2.3284042024499163,False +2.1968486612144034,2.093946139453922,False +1.9614041069729802,1.931633211762804,False +1.6736803981199664,1.9898058923348252,False +2.1224756468560595,2.102215479924143,False +2.428505265539897,2.115416996453589,False +2.3049569910466556,2.1301990932731463,False +2.19992349526599,2.2306348224771924,False +2.0191199702548412,1.9317879535668452,False +2.076543549661604,1.9683678000551312,False +2.651874586088501,2.1979055151294946,False +2.0460961073699537,1.6891375700605715,False +1.5699267972890758,1.9770500141457514,False +2.277334711802109,2.133785544600683,False +1.4969106490176003,1.6638224075784953,False +2.1705920974043544,2.1447241033222944,False +2.2593820775602436,2.1127179994353487,False +1.8993221121775288,1.9767718346756147,False +2.1521267394159684,2.0581458079137898,False +2.0729823648514425,2.0111093945241225,False +1.9715854029120514,2.087236697262908,False +2.3406197484136246,2.17584457442625,False +2.291627372875806,1.9816683116013707,False +1.746098495894795,1.912814532810929,False +2.106354089950419,1.995434636773205,False +1.943436384799374,2.0207296441163227,False +2.0714488007505953,2.1364851592094802,False +2.25343644438834,2.2138063753603197,False +2.2469003148645386,2.078832898046299,False +1.8284447121082188,2.0708976631190286,False +1.8810751217130333,1.8177856456668795,False +1.955888371873416,2.015867067711082,False +1.8136211657224905,2.1053501955366367,False +1.9285676660840505,1.8286310235634808,False +1.7026698405863405,1.7760128150554466,False +1.5398446388298672,1.743024243618115,False +2.3387756922694445,2.020567377740466,False +1.7780313554574796,1.8769588191679656,False +2.805539675823081,2.2637860426744068,False +1.9745295556374391,2.2721055194503212,False +2.4201734256706344,1.98897584132596,False +2.0018830824452887,2.1861059721004508,False +1.7947181638022578,1.910609511636205,False +1.908138871739948,2.137288324782873,False +1.9839727688735278,1.8685228086144594,False +1.7836996783201942,1.7960823241773622,False +2.17341104993303,1.9753763264730095,False +1.9341972013103697,1.9911834512716307,False +2.383892490563376,2.085310416098167,False +2.14444940366981,2.0008535806254732,False +2.188710698942128,2.095827996803256,False +2.2376133057578036,2.0999084427311683,False +1.2562220713463892,1.62283062868358,False +2.820119221524675,2.3083637594415047,False +2.0602294233943366,1.9663205849318968,False +1.9393758786438466,1.9864091197118718,False +1.6893166376108721,1.8197806270684977,False +1.6174135966738807,1.8611594858927671,False +1.9667003096137126,1.942585449165151,False +2.050125380067692,1.9522390414820814,False +2.257056529938172,1.994712016700821,False +1.9071588873844407,1.6089542631786358,False +2.1230329920106574,2.17011457019399,False +2.4360474036009507,1.8590854072932705,False +1.9453658107318683,1.9620429614706876,False +2.185845220744233,1.9007724573403333,False +2.046469520283993,1.9216289505810884,False +1.833281610680562,2.101069985483989,False +2.4627402097960984,2.186788748667749,False +1.8050492168770758,1.9056399508627149,False +2.1799586051503703,2.016007490893276,False +2.03566145837529,1.9770753643155299,False +2.3355256745839563,2.012705291820365,False +2.6920276172651376,2.1015827234360414,False +1.67612722384749,1.7132478736573904,False +2.2381428730803026,2.0431523813717045,False +2.4546341210787435,2.134672668241019,False +2.1505459348083287,2.1478760266236225,False +2.434904774798712,2.335272709242209,False +1.240616869784891,1.7715678969968844,False +1.4205001832695263,1.9235399571668763,False +2.323879013325074,2.3577372405349717,False +1.558750879358681,1.7982318536227375,False +2.0586498924859127,1.8615808507173535,False +1.9978642244335243,2.1896083634855437,False +2.0028845640881427,1.9054724436640422,False +2.317656442367249,1.964354652744067,False +2.4641573667314334,2.0157605571656925,False +2.3423938631230916,1.955152209803274,False +2.441874342341385,2.440688759728985,False +1.8351705647945953,1.9900169086570791,False +1.8029698533056726,1.995472828362521,False +2.1050463383010802,2.0225860509765687,False +2.6340083560328975,2.260304501932143,False +1.630633279717299,1.807519265200112,False +1.5771251096023178,1.940772002148525,False +1.8961691893191295,2.198972604214995,False +-2.1494640690893236,-2.093295493490855,False +-2.0005674448514013,-2.4189861273786297,False +-2.2583949082328125,-1.7975865422936084,False +-1.814438260764112,-2.132951579210191,False +-1.456839525762363,-2.3917180767673214,False +-2.103496163046494,-2.0692519229406408,False +-2.837925500043962,-1.4187413559151762,False +-1.8901003956379825,-2.3133768145723375,False +-1.3846479671427667,-1.8243013999482853,False +-1.8711421579934107,-2.1820995194600137,False +-1.9681331827894346,-2.457704094868807,False +-1.7614921716725467,-2.1123314956529664,False +-1.9597855410336131,-1.6393835413400883,False +-1.9145755667452826,-1.9212597663610194,False +-1.917050208553345,-2.219981481168594,False +-1.7491985841697193,-1.5369922667586549,False +-1.7723583019706206,-1.734527355660535,False +-2.2631844556754563,-2.2603361668618778,False +-2.432262807287552,-1.6303240787514692,False +-2.0762539602822105,-1.5800468172557043,False +-2.2345735048060402,-2.131252694848574,False +-1.9713724738426226,-1.7235649794021466,False +-1.981774941260148,-1.936662573497685,False +-1.9950417298083154,-1.9468436839172119,False +-2.3349410053654234,-1.9757218697080163,False +-2.0559736980534398,-2.0170473442657544,False +-1.8522990332190052,-2.2042034423026657,False +-2.025352408221389,-2.089208564832051,False +-1.8748093985075411,-1.7645688046953232,False +-2.286627578712107,-1.8242268706692153,False +-1.3802650039343498,-2.4413470777497874,False +-2.2490515685945343,-2.2641732799532512,False +-2.083729316462987,-1.5131452742137999,False +-1.995994197095847,-2.2084080785861677,False +-1.8134589487083284,-2.179941359321254,False +-1.6629763513934195,-1.9084198879267968,False +-1.5833661810889195,-2.1984032729459058,False +-1.090742866288391,-1.7526246124899627,False +-1.8036259542239899,-2.0153565342822994,False +-2.2176791357403283,-2.260330603287077,False +-2.040793197830177,-2.239180935647939,False +-1.915197286325474,-2.247829229554196,False +-1.8136751897482974,-1.713163488762591,False +-2.211752152220685,-1.642194179673592,False +-2.071382580725655,-1.6534136341735324,False +-1.8685500958126289,-1.6633015035028724,False +-2.299105938658905,-2.0320381960337675,False +-1.5645712218227195,-2.1854110543044736,False +-2.6111603677042385,-2.5827767544429365,False +-2.7519321958028184,-2.6342491765750475,False +-2.123491748956545,-1.6164415751474834,False +-2.1326687838539518,-1.9029417939195703,False +-2.032997447049082,-1.9974353163691927,False +-2.050459651923415,-2.0522541032923973,False +-1.8616507700068947,-2.3527948014323945,False +-1.6969618467995826,-1.723994620025671,False +-2.0585172022627702,-1.7583819727303456,False +-2.210403327877153,-2.1611669071626016,False +-1.953120844918975,-2.0570663075254583,False +-2.1346214098014866,-2.2017344116359787,False +-2.167248416535813,-1.7182493767410536,False +-2.582997021700506,-1.8942516906892002,False +-2.070931085543896,-1.7816559500001055,False +-1.8454779159081902,-2.834760340295877,False +-1.8246060168567722,-1.9027177269654736,False +-1.9934411490120343,-2.1406021448833674,False +-1.7440156334133134,-2.12390879291331,False +-1.449584712005094,-1.830685143351706,False +-1.3586515797681553,-2.2356601990760705,False +-2.5267776920698557,-1.7855631207542553,False +-1.7441887814824142,-1.9893919708835728,False +-2.461637973723393,-2.1343685554148357,False +-1.8146043398238996,-2.0552528976961235,False +-2.0347955556417188,-2.0526376905985257,False +-2.2801743966879506,-2.15990609782508,False +-2.4279666261561594,-1.469612015506692,False +-2.142611862541395,-1.8567169454547328,False +-2.3065657833923927,-1.7616415281196753,False +-2.5619482932905906,-1.723815464583513,False +-2.0106103774636135,-1.3668184839197872,False +-2.3919602218532057,-1.9770858559521225,False +-1.8898304558348407,-1.6301302422871289,False +-2.1268570884172324,-1.9740606780427137,False +-2.6427400187232104,-2.2490506592068122,False +-1.8645152148334274,-1.668747702109036,False +-2.0845208807196838,-1.3830933430405121,False +-1.471925232065272,-2.018195747532443,False +-2.724050900352136,-2.533269912741796,False +-2.233357647988224,-1.6652476676227495,False +-1.9069183136648697,-2.6282743448668437,False +-2.0686297486610488,-1.5159915876310655,False +-2.1124414061907957,-2.2249908851827,False +-1.3836127692445652,-1.9839771389604957,False +-2.143747129635824,-1.8949498523515163,False +-1.9948505820877733,-2.128742683470528,False +-1.6374631014344112,-1.665289459164654,False +-1.7477415325576688,-2.0308661652720614,False +-1.655929887080155,-2.014910773747603,False +-1.8600070198313479,-1.6898939396181496,False +-1.7573466919202954,-1.4630735950381186,False +-1.8646147951879488,-2.505217995760471,False +-2.3480510314946827,-1.5949679543954822,False +-2.0993849509797884,-1.8840382564600726,False +-2.255436696959248,-1.6997355728959098,False +-2.1154496746498372,-1.562567528417144,False +-2.1596702062694533,-1.6645599809846914,False +-1.7976811685537393,-2.2167175716242453,False +-1.6703011001758463,-2.2704903471427995,False +-2.2467401566712755,-1.7834866123661923,False +-2.1876026004397966,-2.1781529201763528,False +-2.1031702127631178,-2.3000507569617548,False +-1.6865016770978307,-1.817445590454545,False +-2.020798609007145,-2.032517620152062,False +-1.8649533461698464,-1.4703994698442786,False +-1.7387090592403274,-2.1525371402826203,False +-1.766774238425341,-2.0356313516309266,False +-2.0596994551411174,-1.4400585874543625,False +-2.125681369303437,-2.1437554745382217,False +-2.5856315861735695,-2.420698743635941,False +-1.8646631183796205,-2.208476270355572,False +-2.4900394649825124,1.1511651544933024,True +2.0344476046103894,-2.314140863333434,True +0.8076339848611198,1.9914270019666915,True +1.1057496849626673,0.7770184235731925,True +-1.6361417147560235,1.8528517699320703,True +3.562467519999598,-0.5955088779076245,True +2.2574545346512425,-3.5508716827046776,True +2.6821728199197272,-2.4619998654228183,True +-0.8392250488802144,-1.5993516325971484,True +-3.3591708495516057,3.237048023489943,True +-1.038766579570094,0.24557950780911586,True +-0.04706987282939945,-2.9427108644564948,True +-2.348367528884536,-3.3904895277414306,True +0.06337359956229971,-1.907603585336103,True +-1.1435071289376646,-3.1354773704221053,True +2.3004147172200575,-3.147328985689591,True +3.8856705897055024,-2.5827106844926613,True +0.5792408994771945,-3.641237323287295,True +2.296930317246673,-2.4831524174536277,True +0.22323182726501312,1.9206203452725727,True +-2.8005481206619525,0.40869739349577383,True +-2.267062333369765,2.0735683945371575,True +1.7833215484497096,-2.587607725783336,True +2.8957324613365873,-3.841799203869246,True +2.8818959793849217,0.47123048928750677,True +-0.7742362231532063,2.0699754481786927,True +1.735432013539759,3.8986093984674124,True +-1.7753196110961378,-3.9696506207252593,True +3.4712208665191024,2.8631768363031505,True +1.8308070201540474,0.13351026337230731,True +1.655649961990088,2.244236455013942,True +-1.000992451705125,2.1625802026396608,True +2.004994553401403,0.9056896923540778,True +-0.785072608257301,1.5784641541147275,True +-3.975097137398463,2.1991731737967823,True +3.1713328196547756,-2.085474340499963,True +-3.033862525526162,-2.237728097650179,True +-1.5832261496792137,3.0642280685106735,True +0.3453314419047562,-1.7063068031485553,True +-2.893162481656428,-1.6788442951636666,True diff --git a/yggdrasil_decision_forests/test_data/dataset/gaussians_train.csv b/yggdrasil_decision_forests/test_data/dataset/gaussians_train.csv new file mode 100644 index 00000000..2d54282d --- /dev/null +++ b/yggdrasil_decision_forests/test_data/dataset/gaussians_train.csv @@ -0,0 +1,281 @@ +features.0_of_2,features.1_of_2,label +2.464854487536355,1.993463059500049,False +2.823197692597556,2.3193933925080037,False +2.0998737916645984,1.7689332196137755,False +2.1476376651817626,1.9377793099713447,False +2.0943238101840325,2.069824514381882,False +2.4360052961818086,2.226922018267641,False +2.1862765496473906,1.989026493632973,False +2.18220145821388,2.035633363070066,False +2.241371500777097,1.9074115148711677,False +1.8234667333256973,1.850821973645688,False +1.6724152435564852,2.2066985679038207,False +1.9650810340580973,1.8466761487805892,False +2.0467285359098675,1.6765113071046933,False +1.9567402254530557,1.9682202433038087,False +2.717976298443771,2.1737862346097074,False +2.136874990627992,2.054308106108471,False +1.2678198393712778,1.7185839949893762,False +1.974195281483884,2.038932321029683,False +2.582724493805179,2.143169148576397,False +1.8378898663573155,1.9671246326042644,False +1.3926843845764685,1.8147392486540477,False +2.2049630725398988,2.380374870862087,False +1.775408759198537,1.9502941990122764,False +1.9671382276229497,2.1745102713351026,False +1.6176531520284985,2.030517469068083,False +1.9292393871618583,2.0977230621052287,False +1.56726196093195,1.831531056043169,False +2.114296478080786,2.069660388418413,False +2.0979955758437723,2.0457348147430383,False +1.7715680547874086,1.9673342971892964,False +1.7648330252134583,1.9693699120645514,False +1.3540116149382397,1.75632063490871,False +1.9229865663124373,1.928618004516528,False +1.803539362154008,2.1392530947627657,False +1.8330850379462704,2.044603197902712,False +2.1819333274475823,1.9914736432340843,False +1.8821289072096374,1.7568518413616836,False +1.8887215027722222,1.8743367198024479,False +1.6637626640295873,1.9422159396049687,False +1.9534157893988542,2.0214485560418542,False +2.019201448190501,2.190738231544004,False +1.6629842557884693,1.7355745126064257,False +2.828499408047683,2.2438121804130695,False +2.1853769602044717,1.924060843423638,False +2.08109595923861,2.2115423811694037,False +2.2616492303124436,2.2117182891401157,False +2.3151139258308118,2.1479312467143195,False +2.269113766528626,2.0987970510237357,False +2.5021437424377977,2.2853192781961007,False +2.1379394403052405,2.0592418144430074,False +1.999257602291366,1.7090325223349585,False +2.0173340785870963,2.2059228732447114,False +2.30958925095897,2.357904325908453,False +1.7079888566446384,1.8969519893998688,False +2.7991325468977157,2.1599646855702925,False +2.6272042964024216,2.070264786906999,False +2.3625730498567887,2.3400594198980333,False +2.1710871166327146,2.1391131581654785,False +2.1460475674893162,1.9373083063945167,False +2.3810337421078436,2.122989892636801,False +1.7674528848675515,1.8090388522603036,False +2.4310356859135798,2.2002922165016825,False +1.819188756761979,2.0038411879368274,False +2.4307631336298883,2.31328833862556,False +2.248548265549998,2.03830210351808,False +1.9970065586726455,2.1170765135845033,False +1.8740460241853432,2.032066195750191,False +2.0622321069092435,2.1336631703265954,False +2.0569945117611494,1.9436085664429232,False +1.7731441200877875,1.8092698900966533,False +1.8247781578129492,2.12995297591055,False +2.2111435014042957,2.094938090087836,False +2.7410832113301042,2.0557909269239087,False +2.130200115578007,2.215235495113103,False +1.6075748285495772,1.9787827596501073,False +2.4660876409968964,2.276864499676884,False +1.619646244765788,1.8975600266964954,False +1.7945355749335234,1.89778157515908,False +1.9229463620795428,1.782145521777592,False +1.647916656989338,1.9758475389373347,False +2.440662484930295,2.32864642663841,False +2.2143985089730056,1.9760313663446,False +1.9913345296263996,2.184115496977454,False +1.3674210234243558,1.7928452384111646,False +2.326349889809418,2.0031896262138837,False +2.27341550758021,2.0141620715196504,False +1.9890789562564972,1.861562680595869,False +1.9839978967619627,2.150424836596439,False +1.6462441293951757,1.9218084221269216,False +1.9137876638235471,2.021017965584703,False +1.544214855592258,1.7941675495032563,False +1.2487234368105824,1.6700002317574074,False +1.6764701466355483,1.718661517061833,False +1.7937295543073828,2.052521746258894,False +2.2841314874356233,2.2764648551207305,False +1.8162028614730814,2.0944424154848074,False +1.6650772569869656,1.8146763530905083,False +2.056622359364121,1.9516215205831702,False +2.3849392733525785,2.10088904258565,False +2.8068750156983278,2.1273150339385953,False +1.859137462690252,1.9764666051055495,False +2.40340576377975,2.0608558130801313,False +1.6752786128124848,1.7156817718663628,False +1.7884861205426201,1.8828880995030473,False +2.0285028107086194,1.9730989536953971,False +2.270856881872083,2.0143477581069047,False +2.026668245335731,1.89391982603789,False +1.6962968347196363,1.9721698993243222,False +2.102758403183135,2.0613450942432707,False +2.4400297876731907,1.9028664977066632,False +1.7119361027056164,1.9828807159078092,False +2.0420956177273504,2.0955808747895976,False +1.709553955487168,2.071753799649838,False +2.096311997729152,2.030888704273429,False +1.8139187018808456,1.9858251660008535,False +1.5770582508620794,1.9780312550213501,False +2.0079217177498556,2.0882824664424966,False +1.9874989821144156,2.1712389935464755,False +1.7193011018944846,1.60902301421818,False +2.274785995964089,2.091254936373715,False +-2.1912311076656685,-2.1191815442986393,False +-2.0398641732760865,-2.0893372638205183,False +-2.092703890714137,-2.502801141898993,False +-1.654300530565064,-1.6761144223889537,False +-2.244009277761261,-2.4399272983407543,False +-1.8436805370641725,-2.17273639094392,False +-1.957414051003766,-2.0957985251435285,False +-1.792538374678944,-1.7915752569031982,False +-2.2176792135390753,-2.4150091866185166,False +-2.4748815192005247,-1.8168861862678385,False +-2.356657777335209,-2.152044906289606,False +-2.1788942115351526,-2.0157701888808637,False +-2.580883941753952,-1.9433664209618515,False +-1.8428326928497383,-1.9734733738866015,False +-2.0932658515095417,-1.970779950119365,False +-1.880286096307961,-2.831777826927995,False +-1.4132263075247917,-1.8829720031936221,False +-2.195722574716106,-2.1172860125562805,False +-1.8518774667952436,-2.03483118171031,False +-2.6092053403344484,-1.3806521415922042,False +-2.033162197169742,-1.6939481864852601,False +-2.2076149543353174,-1.5390868837262608,False +-1.9140968933323161,-1.8173468496573648,False +-2.3135760098440863,-1.6366564130951897,False +-1.7930545506395634,-1.6094461311305004,False +-2.1884262678924737,-2.1443081355382363,False +-1.3088249906948175,-2.318004746816464,False +-2.040784910203496,-1.6589325912191915,False +-1.9706825096855434,-1.8251138960740119,False +-2.1198347087788627,-1.8889832336457444,False +-2.391958055520595,-1.5025607961145435,False +-2.035449213538571,-2.204053461199055,False +-1.8000850753904256,-2.138215936216566,False +-2.400277541420826,-2.4040152517392666,False +-1.7918680541929604,-2.04787203144388,False +-2.040110467900532,-1.6766768582071212,False +-2.338047742627023,-2.2192033258594472,False +-2.1154639427543827,-1.9716945232048777,False +-2.012651435387174,-2.086066157716972,False +-2.018487920628694,-2.0321915828873522,False +-2.2158813165655378,-2.243897896566223,False +-1.9176450926828181,-2.2672745248986583,False +-2.347206577757256,-2.093687675337708,False +-2.047300104849145,-1.322982950810537,False +-2.21141008275687,-1.7170217825091516,False +-1.7758434997386106,-2.3566834865611206,False +-1.7680241067792202,-2.3551641920579955,False +-2.7977516713990225,-1.8181041426921858,False +-2.5267671750313156,-1.8647196614582255,False +-2.205203269321165,-1.5021347611430385,False +-1.6794471802051973,-2.136015741155416,False +-2.2063512833086047,-2.364223220928236,False +-2.1322767896877775,-2.0841066485553528,False +-2.1094080633175056,-1.9529888434182907,False +-1.8264435506813366,-1.895103662902048,False +-2.229243177171933,-2.4313374421404736,False +-1.5906404455692587,-2.2068347553649814,False +-2.195688079980506,-2.156356793690333,False +-2.5529208650469943,-2.143392201212146,False +-2.1438967442023844,-1.8138925104969463,False +-1.7904628552677992,-1.9988687332741193,False +-1.720445487765709,-1.8980105048596214,False +-2.0047046334807663,-1.9517215495105331,False +-2.05719604807442,-2.118454854210035,False +-2.08032006106819,-2.338403399441002,False +-1.9158674884051112,-2.297937083278874,False +-1.7475106207779092,-2.0748375740482845,False +-1.9851515055049729,-1.851848967115713,False +-1.8070056604811215,-2.4711870225900356,False +-2.0620711028491914,-1.7359463263757653,False +-2.5094317458296764,-1.883815857381481,False +-2.676669268820657,-2.306752053090681,False +-1.9884108344479436,-2.4970145306965863,False +-2.295653221305245,-2.441550502239076,False +-1.505559520337732,-1.9507316733537998,False +-1.8298129166441992,-2.0668025301545465,False +-2.10602952462716,-2.4849422565953097,False +-2.0875512088243586,-2.228447663543487,False +-1.742622822712299,-1.6576694400027279,False +-1.5600263853277467,-1.7442344181616305,False +-2.179596181076896,-2.3347690957881184,False +-1.7700010455064743,-1.8931121547583134,False +-2.530561535203109,-1.8933554621768693,False +-1.7556440532536401,-1.982322323245511,False +-2.0555161013028025,-2.242294546284907,False +-2.4339604098690164,-1.7599106151979917,False +-2.0927343334315127,-2.0700399984631077,False +-1.48018364392426,-1.7946496679422428,False +-1.8887524996156695,-1.9573814584438294,False +-1.5440015417702682,-1.4841232077751416,False +-1.7211484665561416,-1.8253326225806228,False +-2.6283809213618436,-1.9628834257299481,False +-2.039032086258111,-1.9718140311843293,False +-1.7170861738032448,-2.8219031501568668,False +-2.1707936160410557,-1.9190286935177716,False +-2.140053663815829,-2.425071833937878,False +-1.7393109539309615,-1.9169384282461617,False +-2.2913313711333454,-1.9055548386452528,False +-1.7535242863850613,-1.9984122061101917,False +-1.759830558970701,-1.9765219474515017,False +-2.1185686947963065,-2.3478261549199737,False +-2.0257792300914836,-1.9417121185862685,False +-1.7372501715238007,-2.034532240546168,False +-1.8627753181337028,-2.2893836041201183,False +-2.2347887467482574,-2.033116789708066,False +-2.316388539195504,-1.7539256488025956,False +-1.861060901204418,-1.916271270682264,False +-1.8983287624352165,-1.3936869315545608,False +-2.140659256390039,-2.6604323856501675,False +-1.9402099409310605,-2.0151810622885,False +-2.155255712753121,-2.293648957807631,False +-2.1317568565406444,-1.9455984712346537,False +-2.1508450101927616,-1.2762638961368755,False +-2.2881513144899444,-2.2379352088123015,False +-2.6865860120043585,-1.924554675493539,False +-2.604921988339928,-2.1618363900123505,False +-2.082701160368167,-2.2129183897540665,False +-1.4783381967636466,-1.7016816826053502,False +-1.6042589371095273,-2.2647256455649756,False +-1.6614217806456295,-1.8511997160968114,False +3.80980530763056,3.1183492515643216,True +2.1164957948616685,1.5859878225463246,True +-1.316014642592803,-2.818515374346341,True +-3.498911975521522,-2.0647863663881214,True +-0.5417481505496111,0.17597018903985973,True +2.184668432438973,3.669927384452744,True +-3.061436156921512,-3.1439668784496675,True +0.717557784108406,1.9631845915783437,True +2.785203042775879,3.4866566417343083,True +3.867409938085136,-0.8015864622037929,True +-0.9573185317794151,-2.817530586421821,True +1.4794755094684753,1.2540956675266965,True +2.896500766809658,-3.221936041698875,True +-0.017784737397265715,0.6486554373765046,True +-2.067543679680653,-2.6477967509667097,True +2.8766466913569717,-3.5317206221155297,True +-0.235032768655417,-3.0733279895929178,True +-0.34352990934906114,3.8396986107384743,True +-0.6103491723562176,2.8569993400365385,True +-3.061475486534449,-1.829983385905087,True +-0.769658074661324,-0.8015028799253541,True +1.371067829361225,-1.2422549809959387,True +1.7101349472801308,1.11349519380314,True +-0.8067108379618153,-0.5459189787654459,True +0.9162215984825659,-3.439662478842843,True +2.5792539068455227,1.2273692889090952,True +1.8107397153426819,0.2953840086591235,True +-3.116183112066042,-0.7597150936244006,True +-0.7570113372115514,-1.4316560796542648,True +-3.760397400762005,1.8980339407718185,True +-3.1217243354999944,0.850465064360681,True +1.625739971737726,1.078290583469558,True +3.6731380158238,-3.173614759318891,True +2.937337272841593,-3.766478121208694,True +0.2793348394166717,-0.7660510564859297,True +0.19347088315006555,-1.0792009835199217,True +-2.4754646804794556,-3.8470168204104818,True +0.14519851032939446,2.7422149014787385,True +-1.0142723540416467,-2.217089455880158,True +-3.3557439722252473,-3.317512615050373,True diff --git a/yggdrasil_decision_forests/test_data/model/gaussians_anomaly_if/data_spec.pb b/yggdrasil_decision_forests/test_data/model/gaussians_anomaly_if/data_spec.pb new file mode 100644 index 0000000000000000000000000000000000000000..7fd0b4a15918ef8a73e078d195ea4bf7b04c9f73 GIT binary patch literal 134 zcmd-w=U^1#PfJZKDJ@DZ)-#CDPm4FwlHp{40#Ohx3nCOj1PE9#L@*|BbD?T9BwC{Y Rlav4#2h2tR1_>qwMgT`e6A%CZ literal 0 HcmV?d00001 diff --git a/yggdrasil_decision_forests/test_data/model/gaussians_anomaly_if/done b/yggdrasil_decision_forests/test_data/model/gaussians_anomaly_if/done new file mode 100644 index 00000000..e69de29b diff --git a/yggdrasil_decision_forests/test_data/model/gaussians_anomaly_if/header.pb b/yggdrasil_decision_forests/test_data/model/gaussians_anomaly_if/header.pb new file mode 100644 index 0000000000000000000000000000000000000000..f33587675c6e6ba1db768ba8071888abce2eeceb GIT binary patch literal 67 zcmd-Q@C^3%aSZYF_ltM)4{{9-5nz+}4+D$_FxG!Y2SyJBCy0ZKL4ZMmL5U%O0RVG5 BA>04} literal 0 HcmV?d00001 diff --git a/yggdrasil_decision_forests/test_data/model/gaussians_anomaly_if/isolation_forest_header.pb b/yggdrasil_decision_forests/test_data/model/gaussians_anomaly_if/isolation_forest_header.pb new file mode 100644 index 0000000000000000000000000000000000000000..e306b7d72590a9f91126e647ebf1d0b0ac74b34d GIT binary patch literal 23 ecmd;J6iAWcb@K6diVt=T40ZK$c2!8xVgLX+Mg=MW literal 0 HcmV?d00001 diff --git a/yggdrasil_decision_forests/test_data/model/gaussians_anomaly_if/nodes-00000-of-00001 b/yggdrasil_decision_forests/test_data/model/gaussians_anomaly_if/nodes-00000-of-00001 new file mode 100644 index 0000000000000000000000000000000000000000..ccce3bf29b64965a3c072b8a6eecabc02d22e52a GIT binary patch literal 103016 zcmb5Xb$nOX(mhO&Ai*WLySo%iaCdi#6?bTXQc8j1?ykk%izT?bySux?`+K`)_PH}> zACl+(ardXQXV$ElEoa*|6t5BHzyHPkfB&B-cBC*deG)~C9U)rS5ygV@{{Iz9e)m%? z$ix4?x?Eu+`I!1J|JfBS{x;x0JFe$a>D{Sy5_(1szrzYHD=hs2*MD@=Ae+3$&R?za zT+z>2_D((r=bph+D(aKO(uaTfZu$}O>5r5bvh>EOXP|VeiHVIE&TY9UL?a8dJ$7c z50vcyd7>NZgdfe%Z{9t{54`K8C~Ebq?Q6{_X7zjV#<-$3K2uw#g`DEx`N+Wq?^ogf z3HZ;R^Fn1R{%;lkzCF1W55$rBPI#+)#C#vf8IMba`|YlM9vR;xo)tG$+o++!&EDJJ zuE#vMRD6zJIcUx6_Qv^xZTY^QzZ2vhcuV_N`eFUUQKe+P2hNl8{#O5ex+J&C8~J7k z74E^Zi9^ssJ4c_W`m4zs>WhrqrM_qAXU7vx*>20*kcZ{OxJmC3_k*~9ivG;w)9KIf z|F~VzKwE!y^uVWarTyslioahpT8}9C!u3n6qkUei zie}Yw+6Q_?n?zP|!vFFK36-4mClfF3KU^yRRWtgA;2(ZUrpaaXQ}SyZVbA$=_XFU3 zT-8wFKz@GhKGj~3HyRbJ#t-6~P^5-+J{pv~o3)=EoJ`+i75}~m*@HtfKEe*z09?vr zcFw0R4L=t~OChJ}J;F0V0DQg@xXNR%2_l`+N?YJKJtB0Ml;y2!K*^Xo40Kaijsh=wOobrC6 z|0W>+&Nzkr`0lBN9?u8PIE7xsfF456YaC3zy)8)2|7efLSDFWl{;2pyYLE=|Mh?4+ z&D8fw)u-P55&k^#8-8|P>DNPV*~%cr8*z8|?T%eOErG-}ykPfiZyZt2^G8yp*Z9DG zD}KkJv`;0z;hDEC72c|t$JG3RJdWQK!J6MKmua2hi3|Iuz4xj43UMwO9IlA1z3;D_ zE!)KtA2!-mKyt!0*g*#@2 zAFk(8>Gi#x!pf&_`P$a}Mw*{4Nb*O#&o7r$`jD4ud)TVKco>am!w33#&euf$X1-i1 zz1SPl3O!!Gq)>W=UsbpIpEl%Q$QhqX9^dYH5&WKf%8EGIPWijjxqJwD2hOUhQ3QWI zPoE#~8h#8u`bmnu7p(G;6_;7%IsTd>`Fijkc>Au<<9zxm|MRQdQGO6l@Qa7S-Yb6v zm&m7gC zPfxrXYW+_2YD?=;j8;U{m~A&M{Jcpd+ZuxCG}H2#27@<~^P19`gaoBmHYVJ_0X(cptm z#YRg5#P`(H!_VY^Wv-bz3-oucHF;=9`L&qKOZW+lrf?N%KisGhx?Vn zdcuWI{2JN!ows6rwep^odyz3teYxS$o4+bBieTCrz-pi-b{K#>!A1;kP z_@68Bi}2?j55QeIqP?}<%ARQx{G)!nAlWKq2fv*<#(e((7WsVP3Up|k8JRe$IP3LEB~+`wYR-ESK#%8Ugr$W_)v^*w;u3v4VL3k;~cmc zDEztm7jXWnaVx;qF1s7s=aJ=&c0k{wo#*DHab)rWKe215eZQ)|_3f&}^}w&yRG!nB zxIFZi@1yf`;}7jrb>L@f+BCQCOP4jU4gq-1A1-dGHB4K;Bon&n1BOZ*P14 zquF+aKa6 za7ymyxc@uzKlIMNPOAJve)}q&uX5Z}I_nkajlSx!o@6_d2k6g!kO=dXks}YYw-tok z!){T(N>;n!J+=gi`P*d={Pc0`^JcNp*%U5#c356O@^RrpFY$1>U-W8^)B8(@fCrvc zV&1}hqw>>o$YR84@TI8oZyw)%7Cap+A=F;Dt4eo+6@@qE^J zF5I?{+ffOsNj&1mW8Iyg?Oc4 zent;|?^n|6Fyy0qQ<@*ij@J`TJHkG55s&vNWapKK{9B8o-3%Z2jd!etlEY7jm(i{H z_QLmVGEkit0{_oB(G@=Ai&oJ&r{M#?0n@LmegXaRpHl;e<5YI5I{V6c*5gv?cl$u+ zxke5@bBEa1lWXJ2^JDIJTzchYNDaNegrCIGFInTf|92#7zLq46EaKyN!s+kG-@nbi zh<1xb@kcjtLT`Az?pFWdM$tNy>|F7=@`-rTx2_*x8)rK^^cQ@*^7Qk{=T`gpF=%~a z_`_eJ;%lt>s}>!AoOyC->=A#_Pq&4hiBsu?ZJs937FXnobl;@%H+n=Lq1Ng2o(Lx=~u8@8^hj@*eAfjze$mqrhMHXi=Fh#VKN4-+%L8h?<#&2c)& zroX0H7>rAWzkb|nr4PUP9;f#h$EfdjJ)EeIPW<(}66TeC{ktdM1^6X9vk&yhL*``k z-B$cJ$15Uu==o+`pQpl)*L)Tx>{pcsHTV`G^>+0P(hu#6As^FJL3ZUUTKJxK&C*4;n`Rt>WtbFU&)#v5Fsg-0yknESp%kav2 zzJUIpS@k;==vO(W-rIQa9@zQ~{Bb>%=KIWY6BqE4Tw1Akf=|2GdOr#MQpxGNrf}9L zQ}^@a&*{g&jdei2L-!glk8_<;yvToo=ORx2u10gk2Y+FT`zRd9XaC$u&a*0>d z-z6l96C3jz=Z8w?ya#@Mjq*X@xxYiudp}RVAAo$*m-NCRrvcE!py`=pd`}x?mKO8T^bHv6vL-FZ4cc$_K zp7k16@xTL*{tM{6zm9|dTlu*aaYzX52fv4N>vJE-@8nnK;szgl*7glnaX>zH^#`lG z>2O*v6JMU^oPGztiTlM8c`)O^BhEK1k6Qh;`N{r{qt%SZR{cLV$a__u7aV?wZ&vuY zR(_5i;>hMl-!Bi|nY!Kt531+&lJk84PV`pvy8_1HQnkm~vvt(^4t$?Zt|a_8>oBU% zT-KV87CDri%P;uVj2WoxAwOD$-pjI`E)9R^x4hgSK#o7icSap*jjQvZknRr=-@xqe zfE$AU_iJeV#d?MxaQ;e@GtkB__MnVb`QeK6{UbcSC+G8d-3}2?RsatB=Th<5R=B0g z6Zlme=x3d87GKrpe9({GfX27Nue-RG=--;>xZf+wdJmoj@9Ezs!EenYX+mY6u)zXr zzY05y)?4H^A(b&y9K+gCzh-_^n)$@&qn~B27t*~Q^b#ka@077eT#FCtbu#oPE}bF! zpRq&zyPy2f?n>TfA)RX(Is9D6v{K?xT+TZ*=yg}`pn1NHr|?$Jkmt!>-`!RB!+|H~ zQ{ha#VLuOc`VZp0J0OQO-jP-9?+(5#jb+sz{xiv4eBfu$pQ_$N?{Z4IcjJ04O?<#R zctW4YBhRHe{`g%tbV>XAeHt>K9UZfqk@^`o0bPFAw_hyQBqU zTL%9(zp+0qO&);1IPwYW{PT573afm>&Iv(s-ULqGuJ(Oj`lRs$5B?78{(U_0LEsWk zK6laI;pc1C3q76h@q&KKCbFLcJAC1< z#r(9^cHdMkqLM>@eeZ{gALO^|oPnJAP#MMWf?mg;69bp(W3s-{L%db)*w^(*ZxS!I zqta4VqDfe)?npR&q%nEC2{b~c+lC-$13f(Oz(rt-b*v;DoE zSAR=&>YZ#)gNJx-6}T(wJ=*!(zJa6d=e3br()^&}8Jk1S8}4=nuFpiZ zA9KYCzOmlR`A+lm>8SUuz`qnm&c8f=>GieWRQZ08`8s;QdzW6fANUFEseE%HF zu}(1hke7})U+DwCdn0{r1pV*5)jE`U8eG`#{mWm(!}~{m$EN%Lr6E;?zOHXvse-I` z`31jpsrCIJ__T!;^Q00a-{`&iFL(YDvpSrCemXB#eB3+`*Z0Z)1jv2{`GvuiA=fyD7x+#%tM{?6pI!Exv^RFBKl-Ifp!5s8 z4L#(2BIhv)^DsEjpZraq=K_Cga#{}>eaPSZ`BCz6;X*I?Jk0}C&)<2czrla9j=2#B z<8Z0`wp{QN`lXS>zR*qDA1nEqrv>DCopHHT`Wt?IU~R_@M`i`dJR`n2XCEjz_&n6~A{PoYBgbEfMj(^~EIJ|Ny4dB4uz zd4~`DjM}cot;%P&UG{zaz~qTT;DLCnUA&6?Qk=XW(mWR2{qcEo@vQyPuADc;GuMqn zs5f%N_shcP!j9X;>A%oxw_dJ~SkHOqgm^N1(C5Ig?_KsPk&1(aXk9Ns2&4VUcqDdgiAqk9)SFOix}w7s(l8PX$85<4|q4< zcG8NMV2oZ5K>vDmdWXt*TpGXNGeP}+!Yd!KHXnw6#^F-o9Y`9%8sFl2_WfY5F!~-E z{QIP!d5Qe+cZts#oOVSVosUP6`E&6DZnK~4>yU(x?ECGvooU`?JT6V#z@73zp35^| z&GURc`+DQRM%u3vKCjE1eJ=c5St0e3BF8{=?k@&&w{rDIxoOlmn@$)O)D?qca*;f?7zSk{wY&X2oa12=ZXwt=>B zy5z9^J^GsTqXPbS9&_3k@lPD5_lxjT<}SUzQoLKFq*@$P^wSO`UVgF zUyP=GxvL&L-hY$pEO%aj^Ew9I4|w3*EKTb$<^Op9$||40o9q(>a>k?5yM17HeSet{ z#x>$faG|}_Q@k#Y*roPaz)x4S`48>--=Xs|g9m?~^Cy6vkps_nt=wO^^9%bqfr$cT zzk_|zq`4&@%2QO%oAC#nqVcj>d7Rx$_a$t{rLzB{_;)qlz~kZ56=FQGe`h@f|D}V2 zg&x1Jbowv!s>ITNuM2(-6lpH^oDi53Qbh5Tb zqyyRycpvVf))5LXFh)Ayu|F=A-rC5Sm0!e>dv`Kn?;0l_c=>O~MtdkfrCR3=mi-NW zuBQ)a|BiT%G##aIA%D>`n!<&AX%hQ7=kua;N*_FYo_+#8$Hk?JV?m|mLXY2ZI`c5_ zN=7|k<-0tU`p&T*E{#9LF(TJBg#-McVuzF*`g`}&`iJm%9}>sl62BRA?!kV!Gz}C>H$T>_mSh` zQu&SJ`a4g+sj#@N>L-xbJg?Tt9=LmYdGu?SKH8(tt?eov$n&?J2RYkuY2tw1fq&Ad zJVD-mqCVe59LdtY6uA7pF}}ftUfX)Ko?yRJ>iM<$HQM(m`{WNpcTQX}JoO9Z$LVk2 z^QlQq#ObMrxF2r_7bN==?9Y`?vCSr5n3wvo;SZd_ahF+fBG0j(Thxt6e}~NA!0(na z^mkJ|c|-5(wm9f_q{rjQ`R+llcz^o78b9Fs`)og9&-)d9?vgdyS;R$t{HgTX7w6q{ zPvrsrvcH$_fpvS;i1Cl`W99|VJOKB@=Wf>i{98`@xqQdCVFP5l0;g^Z{q7(6%U#^- zy#efYCz%F3&6EFI<-MryTIC;p8m^(=l_~vf8A2=^c()!il5PtY~0*BjMzat#G zs<0S0+^=+dHA|k|n)l${*{t%e^OFl))4ql$@;Pjn$GIuzlS^ecvhyI^PZ~MmE>P9p zuDy`H&xzKj=ree-ru&SNDhf=j8bV_b+!lgKw^GIqbOX z$MAsuixc`B4f3mXo2YoeJIxdQt`hQBg@3a5Ll3`2$K46AwR@xdU#;@Q|7?Ps@wim+ z3>o|5d+U5T>AsomximP4Cs7*vd&bjQDu#;3%FFbQ$KWEbMMlW)KJQPQ{sg@V;pKNb z2Jwt-@W6B9KKXvut6!9CHdy5qem?$g-xus(n^lZ2r(eQvn_KpM%e5_;|4;WQeVVpL zzcF!v*U7Z{9S!6e^U%G7($DIjL-O@#*R?@qrJllBbwjS-dEcwoWxk8%Sm(vyhi!uX z>(?IsbAJ9P>q(F2AFuZ7RwAx7Pg|cnQSFNM+1q*Z!ABA4CsP=n`ze>sX z{(R4&$7hGK@;uJ$&s7{X*W25z{W*P)i+tRgA#r(~p!wGwPU{+qkNc12c`|k^)o8g3Wwz7>72vp1Alk?kme=m6`3o~IZVG%c=gNabsYSkiqiqSxIgGp z;WSv3QpEut>0|#9Wcwb4d$`joe^e<|u+#^i-PI;3eaKtv@cMUx;J?(R%)kqQf93@t zorl8Tszr~jJf^g0BmJ2C!0ysOIsfr_p8h*k1=7*|H1kzB`+FUSf2!woi_$}uzxR5KWDpq*TM6Q^LO~cC;fVw*O`a%|9emQ4vp6x&b$JgwuNb4Q1-`j z(EAR?=hEoGzTdKZ7++k^rO`uN*9ON_d53+A=AV%drQbBO{XImu`ESuqMjtp4O3^); zk;89{i1v38rhgfI;0&6RUGW6YyPW#pVS)bLV{}f+aeI}<5AZ6*PGgND6e{0{$aUynqNMfV(R$ED&`sI&cj zN$rGkJ>u1WW)4XjLjOTLXMQWD+7bC0k*|f_-o+O<+q2O5mx%-NM5FaNA><=FR0#gj zeEcBxbDVD*Jn(9~i_S5ahc1=AAHNqca`^uiMW2TN|AimD^I%+M_xG--;BDlvOI|FA zjEC*HRQh3~q_^T6I+sV{7(4Jz9-Yo<8JA0=2R=P-Qr@|qOQlzOsy_FHpS6AL?`?#n{E+aWSEn)UgH0Te z*PJNNuXx|-ykCUg=5F$wjn|v{dpqXDE6LZD7sUHIZLy9``@J|D1hM-rqR+1&%vsRe$kI zPKjsg5y!c?DMY>75A@n5K7@QK`)E_b2g^Kw*Q|~{N)DW(v*`ZE;6Oh?w1`&y^~LF4 zSLs)}mKORPHJ{RaG_EelBxc!hsy7$oyT_Wn9f=x2?1 z$EqK%XKcZX=WS;^0QYrub&jI);`QCz9JQL&Z_G7ct@4Ziez(eZTqJoY{eEn}K;a+{ ze&y}&3C7K!dvwC*zr)V+wpV=3AKXyi8TcvNsuKJ&U(>&k&$sn+$$Hk~(&z(kWWS{W zw)W27pmd;3p5fwo=rf)!75>`M*{wJcE6}{6^jj~r-_vEdxkdU_c5hpr74cehMZAn0_JcC~EXJ`D5A2e4p!+Rj2YI4mou!?J-+HH) zSo`_auC3Mj1$=ItXl;GZQ=vG$10X-4{r9p9HD2Y>UWlXQfE%b+@l}jyzrS+kaq#{0 z;1uHH_^EW(rO-?FyS|@+JX86n9{f<>c0aAJ4L``|?4#eo89Czlurr=@JigreUiwjd zopuL)`9ZYr2thAp+wjUCcue}N_ff#T)lQBNo*&J=R>jk7V;n2~o?&z@rugx^pyL_& z&3-OcF$LY*DI9H|tsJd?3_rvZ6hq&KLEhFUhT;u*tG@C+j^lTg%1_en_I>o4@N^ES z`SX7p9JEW%j47@Cy~gWNLALKf%;?2~|C77$5YNh8U9EN{H#}2v_&+@9q>=+C?=*VH zOMbW=^}ml4VctI2GaoKhod1k2;K3U_#_q}>>j}sGcYL9LBhzW?d^Mt}pA|>~cbIoDK(;UB^ViR_%9m7_4mtDX(&QPuzPw!! zD!pRq;>tWJoNO`b2HN-~czw;v@6{mn9VULx-`@qlY~e3i`SIT`Gvm(iL0+Q9(C1~4 z=YN|PaVY&?_}@8D^0tYqdc+O91Uc#cRoQ(UKyuDImqriqP5v#l#CPezPwe@bL!~z- zrQRRG{^1t;`F7McsY1X7&(hCs1liiR;OS_vH*vw=ip>cGF3$(fdpzjnSuf9Hd0(#k z!B6Y;c`kTW-F_atIB#4UKF}NKx67)h*Ppiwzp}=8>f~Fiy`Cq|m6Y-5`kQUvhYF`r zRlWa39Ib|AwdQ+oc)i|*{-NsXoZcfpYfp9%?d2X0r}ahA=$(KoODUxpZg7$CLh2%f2DD#^rFYhiM%QM zEnDdQr;$^C@F^1f-}rO+rFKZI_koaa2&`uH^Yq3N^b6w${Hvu=@65E}35AXIs&yv6?ak9s$*sQ7|MllBK>ztueWzZIW3CEr=^;pfLm7b>1t`;~^C z!tEVE=N~S=$nWsnx}6}Oe4Ey-3hziadWTDRJbv|e;Cw!*$Mc21!t42*-fu8JE{z`e zeArVxgghYsZ(Gs0F!qqwyVp3_-d}+qz1?Efo{-n$( z9z`2<06(thQpJ&=Vtw%SsQ;qr~e|CvL=X zAahogXV}l0F-hW+pJa+(#@woZk)E^efuG{NYFhnt`MIq*!oK8RGpI&amu zmm)~EKlo;urPlk3&(BeYDm(D>PrMp(;+Kg^XC4Ip%JUxUALVCh93O#a^6L@5|8Mot z-UbK$GVI(OV2d;MOkb53=pX8p$)0ZyzwLfGr0|iClmm8HAoIIp~%!{dq-JyrKtag7UsA82DinYxuueAHI zRqo%4&cBE^zaw<^#mLvh_{4+s_&cl537Y;%=aU|JsO%%hE#sJesN(nAM(1`OcHvYjrKpLt_rtNwueH|e1JA@Fxyy<=Uk_r9;!Wx#1SaE&$p^UA)o@^r>G z?Ca*Ibp^%2?;Z5K7{z~@6;JC8TVwzGdGi-N39yap6SwJo9{G2*8|PQ$wMQoVdDNjU zkKu>yxK#F+`rGff_D$IQ!|!nSyz(4}|J@+x{FeH4IjQIFhtPYyKCJ2=kcVqe_nL}# zjdS!qisRCy(jWgMyTV7iCoP#$$>I0TD0xn1c(Wga5B*W9aVpQjeUq!`2Tzq__^bir4Xjyc{i`InZW*CEXe|?@%6Z z&9km2=09tr=3nTKEl^j^3!Z!scdx?szx(iFHm&2eJ^#1KH*hj{p?920-tt#EclWSc zn$>ZLWQv5^+T~)y|Z^aw&kX^0zTUN9kA9kC>X%YS(+6$N7q@9l-a^j$zig3s<7^ z6yq2A%kRXn`c1Yrq2x>cobNf}Ez(2oUwIt+89&e~yhHz97xE^_Z-Ou3$DxvOoc;s- z64#rm_z=gy4D(fcKwfxm9f9xN-oTG@H>$#i+wABhvGXn zq4oowhfC1AEVk37!GZqoCtIRFu%4+0?-!-oA}>Y`yXBwhzRJj<-(hGP;3@gMhpj#Q z0PoH)uXhx{y&uhEe{1{z=fg|=-T?MVdn83ZmHw>zbWTR`^SfSdAEzGxr}Gk;R}@a4 zYxJ&yaCjY{=l{ofV~IR)KT4?Z*5sl4B3HX0&SCY{{gkplF@w&Lj2!$=9BC;0n|a6R z0YBlxPJyETdGII{{U2-ps5oyFTPMa|FuvLmX4)Bvf+v z&FL%88#LeW{a%T<`0xGcb!X#4r-Yuy*}Ew<{8L%J*bm{Uv`?xxeIj+DreQZ zS^51E_#Se~Ujizfaf0@rI{Tr}WB+>o2(!DD@&o^I=QI-brd>VSZOHh7q1r#5`*%iw ztskGsUlMZ0=Tfypw}VZr{k?6OhSoT`S5f;jg}1L_Nbd-ck1p+KUN`p0bHuo6UFX4j z=QLj{?^!8UTjSRE?8_U=^B;bv$n&2bm&I$=vHJTfPXNY?i39O98Fkja?^b%RXV-?k zD^BENNzp>q_6>Ya=cEd6L;WaLK2f{U-&Y|&ypGX-x3p}VBWQQhvF@Ly-wTT##Ze0}b7^Md{F-E=HU=LafI z@A(CMho+=+M#Bg4-c{=&Z%Q6^8m)tk9Q?P15Ax8yU8`44*U6DP6iIlQP}r_=YHu#Y=K_8*Q*`#Co? zBt7_vQ^{B55%SEf=v|(PAMzS!GFbU*|7nVB5#xmWpA#qIi8R&zd(FG!t_gd7m!kdb zEWJF0P>*Slda?Q;hcr|zNq-K7$>t6gthuPU5~yqYChBZ z#F$IzoW;`*;+b|Yjy1nEtF)2%Rr!g#POWFzkJ9@+Y;kKGF%HnbkKtjLqcz?87(C=} zYRvcw5Ax)rV?wU-qWf>ZNhIR zPk%&L`r!4YV)S6y|A6yo7Oe*j4)g~_&uZ0ga3Q4k6|gT}NZl7xeERQiWKT)&f{cIg z`n}E^F<+Q*sq}^)miO0gJ@Dz3iPqoB{?liD9|Zr+SJU@~c=Ni-nXjOiq>Nm5^ZAXl z?uOpc>thj*@ef|l((Dj%vb`Rc;qPV?dL~ZpH(V*xG&hNVz{Vc*Y*Qf18mG z{a(p)oiC>P8RhL$TRw>dst;ReE{+Iz>SwbE^xVCmkMupKl|T@*^pP?&w}6f$)(iz zgnZ7rPLQ)dtLsnbv_G{ujQG=SG~zWUSAn;RS!irViO5 z=R?SAPW%P!uJmWN?jiew3m5SQehsqTTMz0-{n_9`zhLwDR(~Ul$iIK0^Hnp_CG;!e zVe;z1t6#%O&^P`O?}NIjWL&B~K?~YfFfNrTm0r%2Z>;$HyXKYnWUtp%t+rMa@q{!E zoLIz-LtveLe)9&yQWR#`)VMzo7r}5(4gG z^>=-B{>uys?e`LVjt}>TUlS+#!L>SztmC^zcsg%T`XiHj><5)z<)_b-Kk%rUkKW@b z{Sh~5UCVhf^{{Jp-r399Q{){h>VaJp<3vy8ZKXI%fCdk-wM)i>mQ}d`;>@_vI?yttmH( z@2=w?cA1{hzE{}|&Z@tY(BD^jV}PySJHHQy-u3VDeE^@2IC{v_W#4d!$0OdmW99zc zs~^-Zo=4>c_@`Zerw#mkHtBUZ^ph5&a|GhU{ljT@=tZyTBY1QBI`P3z_oyNKJrR$% zhrAr1`W5hMEzcJ$=VQn}-6&_3=l@h6a`vN3)$V0C()pi~KkP;C4V64lpSPKM{dSgGIbl#BzH+p9} zA7tKCI{gKD%eEi3`stiio+ENUa>gm*x;6I;>=~c^nRuW#ekGj?D0$X|G>^ON!0+x+ z^>+Z+kHLXnav%HpZQAs+f}eYwf?w<+dxaj~&pY{o-ivnn-8THcYu_ip*8h)H&4KTq zM|>;hw}2hjbE)FX|Llaq2cK%4XGwcwhx)duOj-s92{(#%gSNj8xcw4gt%W(qz7V#cf{~p|d-s*iwNcsOq z?+f6!MDc1t(l6vq+x>Xob@xKB)voRMg^&}U^vuh&Bk&96lLDbo#$Qm_=SFivb4Wf{>zTC-!JT6P(t$Z$kXHN z?+`E5^Zh=*4|Dn>csxEp>r};K(!ae_-jL7h{g+$i%_?jceoea?T=+|o%6|W9^j&em zF3R|%fwp<$-G?0kHhI2FepdO$@gwDa$K?-kMocmS_O5#HIW_6LtS3FaPi-{fqv8Sk zXg$5w3-Euu_EUk+=lS|O-sx#^YhK0{mis2|kG!AO{h*+4Ti|fKT&ni_CvHRK5Apo+ zZ=qn>4v-(O(HU~a<5JT$^-<^okpYa2`cZuk`$2_=H zdj1zTTm6pD_y}^ci$|riuLJML8R)#9@u}49ckx02Yd#Mz$!hhVx$sEg-*wK({!BdJ zv$#*e0GUU~XP=J-Iook*?2)%8RrU$Ly8jftK>K-P2mJeC_gV21DZLdBvLFxSpZlr# z4jFy$dH?4E6%XW(=9fnM8-4JoUM+%0zd(J}l9v@8)tH7m)WU ztjDF|*?a~4zR6V&ywcgOseD6zCk*W~3?Ag&7jCuc|J8H_@~_{OAE@yGc~E-J zct_s1lx`yYnst@Q6ZOM#Ui&T7Cz%jl;wbxZ-RK>Gk;8w!@Z+rQRd?Vf!JqTt@Q2?z zk2Y`ruivORy}koKAITPI>u29T#tINPh9~?CUK9Ak`r|S6_W*Xg<7|hW!9&~)X5Nu_ ziocFCi%&Ym8#o;&%5@u$_t=c5{Fcow@5vlLz)QR=2K=x+mj(xM{o>m$Nahjt--2rX zupfMxLhWksV3#}EK`Y+aNVHE<`bX!7bbbq-1^&FKa3QZWR`yrEPl#sxKrh0KQ?Tbe zQ0a_I=9G6$A_@8)6`w}G&%KF&Kzf0yQ7*|Fg{&&u`pKt2@{LhKs z3ZA?#clLk4D;u0ej$h?3V{Lg}#NRjVZ*hSif2W5oi0a`7`EOr&k>G3kq2U9)Ik($y~V{h!2nt7m#K3y zz`hPZ+yftu`C)ymg!e=}pXWK}|G=4-Gm7E`|LIoP|K3KMV{*U2?XSm0!RTp$OZnh; zb$T9JdpVZ!4_;e}6EDsumCoM<0?$|j>EF9hdhY&0alN8*chx>EO40eFlK=9wV6bhT zkMKmDx3Yg_zjlNDymoYS`#DgtS@fMZILJquQgWTZ@wxL4{a7*e`X2HsooK#ezFZoA z$W!5la$ezcIQ{$EVK43boRN#dsk{L<{w;Z)!RJ7F{;2mG*)u;Xb^AZv6c&Dzp6-w3 zciMjk8h+JI%rh$f6o=nf<#Q^_dDQWP{N4<2ZzrwS_|!{<5Bmy#`HOhCef2y0!8zo4 z9`^@Rs(3o(k@wSXJ@{Y0jn;3*9`Y~G8(Q1**N7q9izu90r*nz;oV=ia)UTC1P>wgq zKfRu-)(4RH`>n_pn||un^Y76h zf7hDHfB%&8ulxPplGRoGuuphZ-jD0N#oj~vvJm3^UcWo=6<>{)_<){=kcW{0A637F zymo~O9_@w{5kr-owOsnX73sV{#;D0D`&WiJHz5RZ# z-UDhc&L5Ws7yLFalK0HKUU2h({+{^yyc6>FDZ>ZY#$(e;a};mrA6Y}+1H%V#PQ1JX z`plP0rT606_7L>or}uJytN+o#^v=GrQ9IP1%1l~}Kf!LwzW zh{BH-euY}$3|5QA*lfglLFHPAPB=Z8k1IFg`n6FU(+k9H* zDZIeC&jdcl zqxVH`GI;&o5Pp6wl1BLPnnyZ(q! zz+c5(lT`ac-uZgmAlV-v-^=SHsQ@kR^l<&w)9$YwxZ`MHjdC-gEVGxJGhyJHt_V?#mt3{G=yZj-}oMq)XJ+IH4 zc@}z)s>pS(iNo-L-ttYfPgU~j#Zp@NuS+ZE8Ixya->kX)+*JR5x=#T*$2a!K)65;~ zM0@i6k&bgq*6H?n#o!^%@%|H_r{o25(YXxgTbD*3yox8a&lj6Z+viFDG4E7A2cNyQ z?eD%;+}Wt~DIYiPL$3Mpe;b~_Z=CIx6{qLQ*8+#XXL_7R$p6x+r}OVSXnP=6zR8=2& zp#7f3YrLiCpx=!k-WA0&s(e9S@vO%>h~qVUpx-y4ece4QiGI%v{VWUU`(bd9uZbJK zi1z1k?(Q$tu9;KGJTgBn75>eaKfYf$KV`JFU$u@)`$2<;IP#yY8vI{>^YFK^TYA(R zJMgYHEVHz8*#kex-6KJ=KSN%=Ih|Xu9hK>npPDNk!k+3mpH4po-qbenRovj&y+l@F z@7STfLz)5hd60P-KJe3PODUBP$S-BD9jbgp$X69SUHIVB_D|YJD!-M>(6}~o=-m*R5t;3w5&TAwLD^B&7}8=t%B{ylEPQG5SZcn>qs?+RV{1>aMnvLg>% zuSl2Qe=y_4j$>47onw@11V<1F^Eob~FB{JxXOz{TMvnMx(!rrS#98r}YN& zQ}v(jeG~cN-`{EeF>>3_t(u&d-&;N}_L_(0&MXi9E>e^8aF&h&RdYTYe(Zz>1PWlsm3|trJbzr+vZI-lol zeO@;%4(%rxpGy7iVC938DlYhmwJxUO4|&R*aRh(%@AO0HjcI1zU$H*-UuWJSzdq}% z?K1076~zZ_KN>nP5HevcS0h1#bnKaZx-J6mcm&V#$%5clwa z`RI=Z2l8^)==Uf}zCUX*kMV{$I((Ju7$;8PemwT$eMsIVYJZ^o&mSb`HI7TqKj*j5 z`rp`tU*(%LFBv&_#m;d)NRBt?7Yxkg5ijfx){^fa_}@En&g+2_{jNQq6|U3oIe2_> zY52kZuUye350@VJe9LA(2dml5t6hL!I9BN1ef+!by2=y$P8oa`a^l73;ZD0iFWRPS zLXZ2o)*Eupeh>4oy!?ABPX9t2^~%xrjPt{#%ID_hK`IW|Kgdk{j6Qhx4%{txbH8K$ zn#Y1}O%*7c#?;Ms5atN__xp`Y#Flu94+QT2~P-^0(BhID?# z^<1hv%$`oallQ2<``P0>SK;&<+r)zp;>&%*{$09}=(siRsg>xQka_7+`7a!c&asRf zeh0<-D0uO`n*KY|K?!mp&*Uejr$6BOt!Sj?S;(_BeH~?axD~fr*@J9Q7 z2wMtrPae>lx`xhWSkK^qPmhQCoeAVEGy4gAJ_mQcPtZG6!T#>S*$;q6xwQ84!Nzgh z;P)Jc7xd@!vcF^aw_j?(i^rw*mw#wq$YYBBG_R`odA@e$G3Y06e7UIXpWw5w z-%RO8;W;?ak5x|p{T0XyN8J)&!!LX*Kje&S;(%SipuRt>Pcm|(tasrczGqqNzoWah zV}rCe{$aQ0`$NBSc97+!TnsMy@n=IDz8uWX- z@!(~6fPdK`bg#ns;?n2=FU)wIH`u?Z;{(2A=kCw2-(Ew$=iqa0E}i~E{#wyHJH?}L zQ~U4ArdOtO1*N~MES=YLT#BdmJ0%sJTd=;u^N*NJ#LMlh*E63F=TYs5e7$X7P1u|M z;qnLljaBBU-{&Ad!8s}f+4|4Yk?Df}lY8LJ&O_&SDy~ECz1HdAk=x^4M1>{tzA_E@ z)$8H=z2$l;q_~4J=zCo72-wq9#^;I?`5AxCzFz9Fs6OU5w$r7`%Wog%AcQu6 zTsogp@(Q<~TIbO^_3PRF8+-6?Tg%VdfBvbtM$JRef4MFb3Ez_TarYR#p`!)FZwn6Fv4X%HQ7`Ygyy_`K|uG!0(MYxusv?$?=$W@M!0QS0@C>aRuDInU<)2 zgE&@2_79f&kWb5%7IK##;H@1_`wJCs+D>$S%XVBEJ>XZ}WB*<3`Q`GQ*E?SDXglVu z>VJ?2jLl$;t3^<72;%_uDYM)6RW};Qa}(Z2n)Xp~R%#o@%A?B#oj>Gdz^zy!?_Rh| zKFRYRo}YC;Jvf@a+lCk7yEOB*$OGH!^Vcf*76?6V5B8(7Rc!AsWyPh{853@Xmv{(6fJ5Aqr!7kZI{ry5amG4P%557J%Y5zp^ zUf<34lOF}h`~d&2L3EzN_>7}?cg*(|_EgXJ!_L18fjD+wPl$G4Jc9#&U+&Yq%Jp0- zy>$_bs`iBa-xnf#;KP5)683qG`;*g-z`OnZtjY^;{0h_krr`zohpG#dK5*8(OBW>b z3;m9(O9V?f^e;zK^Oy01`nzZ89D(!0rP4bxsJGQ`-eu~1n(VWB;54cpSAN%BdWa`X z*BywT>vd`PK|gV}=7^v5Og-Y9GF#t^!0rbByKP1v^5F$9sr-}t+mH9<(ZZ{9PU53~ zzxa3W%Q8OYKl3Ab-|qgdBd%W~eNa3h-pk>Z^wjq} z;9oz3$9moPp*ZKzc`)Jg-;3k-^iKbP{=o*c&r>)V{L`rQ8R9sx-~h(I(r^7;e!uzs zg}YzE@4E%#fzNs1QsIVKo>H|N?DOCKY8^Mbezl)dIsYyU;&|VH?g7{@l?nO36_0bt znt}({bE)+5OjYk1J@m3vI;i-ASF|>nRlJZFE2qCd$V0h(Q&l@bp6ARr)qjEWt@uGD zhyKqA%`yTKqZnk&k75LkhJfwR|;7vXlr1Av&hL^HJ z&iVB!Rr~z$jD9zy^_5=RQMBJOa_|Y-YJZnossPRRMjt#kWYy=^z=^TtxizlGQEB}} z{LFX5lP~Z-m)GaNkRRmVWf@+OulsgS+}c2c_ll2PmCYPhX&Gl zatQnfPmLYI_yX?(!S=t)+U}CvM?3srw=lQ-j=8@d;G6!U+DEH+YOmDaQ{ex0CYzPN zvoC>O^|CadseEX?CC5uy`$wa>HPrk7-U&`-xB546vOg8i%wr?eI0oLwDRj?g^uhn_ z2s#&MJT8qM?5BQOA0)q%u#Zu%Lhyfb54#*+-zhtaFHH>C89%`L`jYl}j7Ozjug%&` z^B&dnK3VsZT(9-M5&lnQrQd;*J&zY>9R$6i*Xi7j^vw8Gyt_A}b8aJt{hOmJ5I@;_ zw;$pU9`s7xcLJx{I2tF6qx|gI+ri#nj2`eS9@O_oz`s*Lug4+Zkci&98XU;`CZ=__ zl6PMs&&f=@Mh`riA3u!oK=JVR$+;JR-h-L;^+KkS--SK5zw`Gh!E;Y6HU2&F*SaXp zSB59-SB2N}FyzlVM78FdrGabY$V0bAbe_j{x-|OWH?m2f z6=!U|@oIhmexHli)HsFQXJ@`(+qjSOoc6b#JYb(Evp)ZWyxette*^j5Np!BJaNX-p z=vS)xOz}qi7Y@jKZ(cV$;~Vx{d>_MJ`P086o$-Y3$yJ^U_oR7D**onIJ~8{~^MA+7Mp{4ib&KJ-%z>W1~E2VUK=v0=ycTq>NVb@o{C zn|7IRmG3%PHdMIn%g}um`{h#k-@l*6qmmD*LgzVT$A8z+`FEhv4iB$H6yurqSI#;Y z`TFx3?TcMJfYW0!y+cquQbjH(@Llcb!h^q{_;fF2@ZhIOhD||oTtYr`P((FeAy4(r zV;{qQxm14so^RjRbdQu)`2pWVPckYw{O>su6LR*$rSV7hIcc53^;{Y~;KgX2QO#qJ zw;x3JP(~lP@6+jX1jx@-r29JZlZi_Ad;|O_3)FeF@q_wmqw}L(JaOQ+=*eMfT)_U9 zgz@a-!j%W$EE=Qtk&xG4K<9OaALJ8`F17ki;&<-<^(X z;^*`~;O1LH=LRMZke}OLQ@zuI{PxDu9_^3%Rn5Lw`*Y3AXO$fK%eN&0p7D=%`!z;% z>4)3R=)t~wZuxsIKBsrqOTa5QEUfYm`Kt6gA!mFl8As=9OSgv#2Rz#S{=zDcv;B?0 zH}in2J;0}Zvcy)LS7GeuhCDB5p4IBx$IW5kVA55) zzS27rNAKSe|Gnj3gZ|^kew1D0WDgZj*qs}7C{%vm=A!#9!sqXxKA-QDT#pO*8Pihl zyCE-GR-Q}oyzh)p>L+<=eL((s9^%qzN8oO1Q&XLNXB-WFhCtaGnLBCFhMshr< z`hNrUxCP%@u{(I!p}y|IAI}H(YwF+Iz;1eSde@o}Tvezx{J*Jwkr?JEQw$*GO{SG;w(F+cd9&b^p_N$QaQMjvuts zjo&KBdJp^oO-lya=Cu`biwB7LgzPh`_RXKu{*GCX-&9TP=i>EN&^rl*TkdW-w4;)j zf7?*W(O%E%d+fW7J@n3R-ENJ`$shF6=cMnCvLAg>{tjE)2 zaL#GrH-?}6d~E#Fz2HOkydHG&i@aQjC(rSC{&D&n{B)RvVL-JF0b4tbjL;Em)Avcm0nu)@B8xi zTAz2Otw#G_=Fg?V>)tAbH9t=(>GL?m9qFun-LBiS#=1DbWxre+|G-PyK)nld=^;ON zC$>X=l>PCYpOJ4Rk8-_-N56*O+Yxf0UfDfohs*>OT}mOQu+IBo<~BOC$6k$9%!2E&|~7oWTM8$ICGt&kA@h#&V`(@sVoxbfE0zKHXs zOQR3`Z2wmGXcyo;4Xe*BVD~u*-Qy^{^j$LpPsxkimG5($@eMy2yFL%Ht+O0G_>Gi1 zod;jk|1me5@(+Cf5p*x7+Bv4;d1T{z!Grf<&i(;-(e7`Cp0aOpVU^l%K|k#nnm@Tc zl-;Gphpc|}{czE}bneACTq?X1Hwy;X#%JoX6NDd~-}K%6z@Krc)boztsvw1r{I4Fg z3Gxu^8?AXM^mzZ}_yJz(K_hU^$ap4B_wPFXB%272{c&mh zf>)NP4-`J+?UH%?-bdMQ?=Ii7d(AibXVZP2vTuF+mGY0ccNUcAeC~MyJRj{*d2q!E zyqrU5yi`u`!aMeR`(OT+_tEZrA|I7E%JT$nPiNf^y|pDZFYx^{sywgceS=<~ z1iq2?1MJ7iC;VK=lEZp_aOz!4l^6K$uxmKv+z+TU@v_|H4R(Y5cFFm|)PqO!2BodI z4qxDu&S-xZ)-djUYy0NNq0R}7U*ILpR6z7Qcb=ekGn(GlfcN)@dVE7(fAZac|LxCS zxPucN4M7ih^M>Alp5otTD2-1ehkmjXa^KG5&Wp#FUHUx};!fR5-hVQV^PPjwx2J|en`_-k<2ai~7YOD5# zzxwy9SmoDZ*w<&h;=d93H}M;Pz|D}?er^=jH?`=m?)C%UFnu#w^;qA_Z@{_$6%X(> zjhtz1=lby~W1Xt_x1SpvB-bIpDRV1Yu#_X-uo16V^`HHf5BjQoc;0sAVc-m{XTL8k z_dL14;XI^fT*II8H$R-3pCLcK+E3s({!!m`Z%FGD=tbXD26S5>epK&jRDI8m zIRa%~$xl^UuPXgYz3t}}(~s=7_D|0oJ9x0$ zK0KAogDXDh<;|=8BJUG>%W>w#yQ9BcFPL{5CXeuw+kcR1cgQbo`0@EfEdG0e#Glt0 zrrnGm@E%%c1N^w+MSZ|l`@6Yek>+AN89eYy*JCu~EL1d+8+zq-(R$m316~tS zsr3ZuIp;9I+4?kVplzP%`HOy^2>n%s=>CcQa%tj(|8G;$0+;GjQt57Y;5Hp6zi;mL zfZpUVjjVZnwukPs3=ilJKi43Heha>3%Fua|v4=eKRrS46^{>9$`+Jd__W7jOYuZ0> z-ndk_!y3zfXO{OV`tKM1TvGd|xLduJ{no?Jfq->FF9GXusd!ax*;eM$)Wh$9f%Lvn z$+y+k>pSFkfWM#Wf52(tYdXXwg1@=>XYzR!ex_`xJ+J1V)qxW^If<_w|l znrSD<%l}LF@e0q08+`U;x4)Y>((fwzlfq4Xh1NSp4xFC5lc6_FD8zYCGHhZ!Jgr>dj z=9A|&4qxDpOP3|^e{uNx@94qb{wI3e!oE(B{e8sen|eNhez-otA@GL3c`?EZUZ#Cb z9)Nddle`bmyh~J~a|db%UVrHG(GT78z@NhRIZnTa;CAHF=z;IStz^&jTq?abX|tpM z7&+otzLL(NJnWvlDhxZW=hEPSN9{GVo-=ab|2@op-<)tHt&f#{^EJMq^0zPttuu{1 zc>LUk*1aA){oB)if$Di4;Qfhi$KwapdffO!e0Qtw31K{eSDWc`)xH$=BcJ3|?GJtb zulju(^xI|7>j3C~I7agZ#l_zReXe#s%L~Dm@9A~FOn7I%(9?Md^KTbX&;Go|%ldTJ zt>+Cj%VeGtPFNYW@j0%sR!Sh6X|yqN}eH#{(Ba{ zyPSv4f!H6F&N(k|R~Gl+Y5V~1Ox5QBV*D_!(p#RrHpYRH|GY`>2jG8h@OR)EeaNFN z86^Fgdhkiv>!U|K?4NFz_gcJuaOOMUG;V2s&oZWsJ>SKv(LBNNb7}a&f69C@#C*Wx z$ys;8|MqD1dFR2%=)#`+ziz+I3*|W)>#;xnZ#@rnp3o=gzxGtTA0OzT+5APneBswq5B?HW_#|-^&g2Vff3NEo>hCE0 zR|q;T{Tn;@4H_WN1NiskI)BR#+Rwv&zb((FxSibVL+}a6Nd6R$zTc;*{s8^){q?#R zywk;xAlqHx&F=UCc${}r58S2ELbs3Z;Co#0g8y5y?pWoU52|r!{K775jZcCX|2xEb z9&Xpv2k{wu@E+Jco{ZZozRDkkA09`}yaBwHvv&zS-R^ZtEKqU6?rHM%(vEp>X?P>< z;o0=Q4e}m&?EBRuxx-ui4vw{1$$?XQNDeC>j@yX~`d{~C_ZV-eA6SdlPZSTIlkndQ zbMTOl-y(ahKY=&sY&Fa$oF6Vto`6#|ZWib%dEcV?oD2H*GTYAyJM%j+g99FU-`jr& zo^!F>$D8k^v4_7J8Ks_kzJ;G=G3~rsO_ul8+&)}7c?Hg`p7eKu3=hZ?)z|otZ%Zxt zdEs^{ZGVsN8XxaSPx;SvJh{C+UF`~9;lIYT>nq&8=UaR9OXUB2`ZA%ysoq@QXTjcg zg4enQJYQC_pF74bN9P9~@jWb_EBMFjVc_Sun+$p??j%i9TjhDvrd4vpk?>DBkD7ky z!7t@jb#CLKx8O$T_Gt&^*9w&D1Mn)FV~ko)K)zr)&9f$M$lDjy^E~oYcBekifc*J@ zhhm=N{ifIVwsQ2FYQBWu7E!(k9NrJ;e`j-Y(=$TPJ)VFUJM~oM2Ym0&)8|>x51771 z;BxyJd{_Ix?#%k!0ivI&e12(NT(t{$l)03#=#T6oC;cpNc^#CRd2ng`0e4NaFR(Lm z@NC~v%b}n6`ZlExxzCz}N)DWclj!_O;V-$GALGJ7DI zUWo_MJ4(XmeyG0(H=D|Rw9Z46va}BO;CZg!`w;SkcJKNvo3$SoZ9>1>ci|&nXDaJ= zdcgY{+Xr|qJJer^B;S?ueB#6p{MCNyoWTRHeayebRXlU1mFp+n|01VPB5^tITpB#&vt{3M*8G2(UqHzb|G1>8A2BYM3cp&`c6L5W zukWJ@R)33&+ULvt4GUm?GC1J(M{IqL0(sdFxr62R8u8bxPv298^KT)#2j;lARC=G= z(K!g$b7}P8r^Yo}KN&fAWv(FC8$9luc7@)e?DltkjvoB%{YdX9l%Fe?+F9rM%74EJ z65mDU&!xiIxTFsBmHekK^qnWWR8*RAp!A~`q;n!A@AzvQv=jU1Qt3^pS=eJdBM&ED zs`q`$zU8%%(%!?*q84pbzXX20c3WlM3_j|=PKqeLpSqo*wx1&X8+){OndS0ahQHtZ z@5eiM&_CT+;kn`guMLUp>(GcHww*sr;6DR{=PldceKCetHOc;QORRA&fImJm^iTF#Lz(YV3lw0k(O5-K{i` zGroxjJpBf#b(M!+|9G3xKCXK3+juePhknBN$iLr_k#L-K25<@%e~Y{so{&%ZO~2=Z zJom0L7+1`TOJ(0>54~4a@@$#y`|K{WX#c15du~o=_s4#@GOjj2WT#+u*h1P!#LDwC$4pywzzh*l+wNvOOOvj`s)c=c$wP%6&b* ztK)glnU~;iklsDBKQ2|C8oyr&d#>lw=>30#xX7C=JW`q9>HAK2vK)3Ja0d8`&b0(UU)kl!~GC@|J7}^ zMGb$5dCPv_=s&^T4N+H=a2L)05b+&nHlm3`?Av&h&^bL2{WddhLG(l24a;Zf?I+du a*ykIrE2FNXfQ5rRMmRw13okFjo{j)wn{G=0 literal 0 HcmV?d00001 diff --git a/yggdrasil_decision_forests/test_data/prediction/gaussians_anomaly_if_skl.csv b/yggdrasil_decision_forests/test_data/prediction/gaussians_anomaly_if_skl.csv new file mode 100644 index 00000000..14436e18 --- /dev/null +++ b/yggdrasil_decision_forests/test_data/prediction/gaussians_anomaly_if_skl.csv @@ -0,0 +1,280 @@ +4.192874686491115943e-01 +4.414360433426349206e-01 +5.071637878193088200e-01 +4.252762996248650729e-01 +3.864382268322048009e-01 +4.572857985150768356e-01 +3.828214071190141898e-01 +4.229257548829531421e-01 +4.062915153623989362e-01 +3.924027382415538612e-01 +4.204809538715528761e-01 +4.120245158211452985e-01 +4.021714702533157881e-01 +4.116859482405123560e-01 +3.856297713263709404e-01 +3.879418151101039491e-01 +4.254604004573970255e-01 +3.829543100175958337e-01 +3.894370012297819206e-01 +4.767564669226527219e-01 +4.105111972875190030e-01 +4.531210549381821107e-01 +3.874123385442900802e-01 +5.070949713343407828e-01 +4.004866507650391427e-01 +3.887316633629200879e-01 +4.045394382558925028e-01 +3.907274096454862455e-01 +4.443613684156251242e-01 +4.225904764508747746e-01 +4.323633888377847456e-01 +3.872707488263947839e-01 +3.817428594100403805e-01 +4.999718182628443697e-01 +4.541545263864004700e-01 +4.411337054894758913e-01 +4.173526637361775204e-01 +5.025084696934063455e-01 +4.006944572465925836e-01 +4.094431791281276656e-01 +3.820838149099484715e-01 +3.893680505002969361e-01 +3.810040914235358067e-01 +3.855961731361299272e-01 +4.232722352430269286e-01 +4.110354525550297122e-01 +3.918776416664728202e-01 +3.834264091236884164e-01 +3.837920468095222248e-01 +3.933842928691425600e-01 +4.326440460645166741e-01 +4.056551811690080056e-01 +3.895330258052301375e-01 +4.060813389953127350e-01 +3.851454738878740125e-01 +3.950579767455482116e-01 +4.068190790541990620e-01 +4.313255306243298781e-01 +4.776874716748540362e-01 +4.156832554965157467e-01 +3.950591826845297749e-01 +5.301922036262658455e-01 +4.308485980804026561e-01 +4.393885347009985343e-01 +4.116120753344015837e-01 +3.869103658296884629e-01 +3.979117800684048301e-01 +4.020911435926875499e-01 +4.141215529982648635e-01 +3.942074750986535592e-01 +3.818074385912471058e-01 +4.219578719165194558e-01 +3.868267522953542370e-01 +3.987328486697296337e-01 +4.055211053693148493e-01 +5.500677248550085441e-01 +5.420195167261064872e-01 +3.828485420051598864e-01 +3.817567080948217062e-01 +4.183393305586847188e-01 +4.323695489158039251e-01 +3.842818759027541442e-01 +3.844818053615791098e-01 +4.068890989165111494e-01 +4.618555038832681259e-01 +4.018989074713745224e-01 +4.648227216919613713e-01 +3.819814560184485730e-01 +4.103791772911288271e-01 +3.904823673762904956e-01 +3.899555383140163034e-01 +4.574802701165338603e-01 +3.865763499090780209e-01 +3.951694611570931159e-01 +3.855885909007265577e-01 +4.156832554965157467e-01 +4.985406549658428221e-01 +4.402427532823418033e-01 +4.036571176114390203e-01 +4.544473624936938294e-01 +3.975366726865089406e-01 +4.716395580368268337e-01 +5.287268511798952630e-01 +4.842557205309823143e-01 +4.679571513883666323e-01 +4.559195029152853240e-01 +4.034531926450437567e-01 +4.120864592485077860e-01 +3.904682127823561610e-01 +4.172182514027760192e-01 +4.519915549851231118e-01 +4.205610367050501286e-01 +4.871252088471250685e-01 +3.791240419514353421e-01 +3.832566735533416424e-01 +3.839636789324888944e-01 +5.022452297590203063e-01 +4.359862728110341923e-01 +4.420147071002538675e-01 +4.148646563422070388e-01 +3.826592321605687963e-01 +4.266047739387540672e-01 +4.097065722991519165e-01 +3.905689585798612362e-01 +4.875926043809953869e-01 +3.789379435786642270e-01 +5.500418969423366278e-01 +4.096627265516010197e-01 +4.618205485619392703e-01 +3.938384755246451818e-01 +4.345132144143419306e-01 +3.974581163061812195e-01 +4.302496481849614196e-01 +3.882766841015732329e-01 +3.997871035161660314e-01 +4.586997505551287935e-01 +4.118826061235560942e-01 +4.065925103426547005e-01 +4.656338978518183569e-01 +4.423437630145455879e-01 +3.895478346750532728e-01 +4.103814948857281708e-01 +3.817683297876992565e-01 +3.793772698872787119e-01 +3.993730701909981029e-01 +3.773695170984343594e-01 +3.984209767946685909e-01 +3.821866507638494448e-01 +4.008893660492762745e-01 +4.097956829221550690e-01 +5.046304127250391680e-01 +4.048730213871686301e-01 +4.512826335180712412e-01 +3.920315658275624338e-01 +3.940099922559582857e-01 +4.098016210252675706e-01 +4.423806966764076143e-01 +4.946810305106644212e-01 +3.880441037630216750e-01 +3.988136213344566916e-01 +3.928119252560551833e-01 +4.047339195637830178e-01 +4.135616388584226755e-01 +4.297218879483765686e-01 +4.203077591790538858e-01 +4.265634462984130848e-01 +3.948208310202510063e-01 +4.459889025894496206e-01 +5.206044029282200780e-01 +5.579405715391486664e-01 +4.358864550595425991e-01 +3.833844043711691629e-01 +3.786230316520938777e-01 +3.788119550597274232e-01 +4.236605785765598098e-01 +4.226165349745940047e-01 +3.950578738595934003e-01 +3.882292203069177550e-01 +3.817579965315230917e-01 +3.871879442211887401e-01 +4.141477749441478950e-01 +4.429423350433662754e-01 +3.960791336888356828e-01 +4.984501366941139766e-01 +3.883437150295604434e-01 +3.872289451323157605e-01 +3.997820439178182972e-01 +4.487556562092934742e-01 +4.810655203676491021e-01 +4.466108918716661380e-01 +3.966013053412562606e-01 +4.249603405034386716e-01 +3.861806487610026650e-01 +3.788789309952160789e-01 +4.002886743478978326e-01 +4.877364558262433647e-01 +3.885950090060952822e-01 +4.173414050232285843e-01 +4.660352010052796201e-01 +4.719319980905274936e-01 +4.069173143546231941e-01 +4.345190394359730313e-01 +3.780582739823815097e-01 +4.740919913608709901e-01 +4.226080513284103390e-01 +4.677747720413846233e-01 +4.512101286667756228e-01 +5.286989180949901446e-01 +4.275707741626420533e-01 +4.762117730989295916e-01 +4.455416064365435580e-01 +3.889253486766672263e-01 +4.578419212637818148e-01 +3.834804343711281649e-01 +3.858922224640186083e-01 +4.499084362588428587e-01 +3.972063543467886926e-01 +4.129127707066883035e-01 +4.160268102467659657e-01 +4.668341997467896909e-01 +4.473061848655025541e-01 +4.577191352620764708e-01 +3.844943281007760505e-01 +4.252057879779328475e-01 +4.494716604068641486e-01 +4.245835883611878137e-01 +4.023278468500455785e-01 +4.354858123839681072e-01 +4.103481467812403749e-01 +3.866402448320220286e-01 +3.987291363628146512e-01 +4.118935925884376625e-01 +3.779766179680454918e-01 +4.612377027605725210e-01 +4.039052165777324288e-01 +3.946717258624949376e-01 +4.569480237849761206e-01 +3.829179280218348302e-01 +4.894804677721761865e-01 +3.972207981766076035e-01 +6.593193717487798589e-01 +6.192209945328789322e-01 +5.477797224059469672e-01 +6.173453627703995306e-01 +6.275236972956231840e-01 +6.912391238952994010e-01 +6.763233649866716712e-01 +6.404186030149033870e-01 +5.392811212791098763e-01 +7.286598308044932581e-01 +6.388780842505601409e-01 +6.215996766626598058e-01 +5.696388297677966728e-01 +5.491739082858497767e-01 +5.992520118257051998e-01 +6.619900211379498023e-01 +7.164889333567598939e-01 +6.609778150579318501e-01 +6.278282329056331657e-01 +5.797573451130116906e-01 +6.775016449975370669e-01 +6.334292246222487099e-01 +6.332737346764194530e-01 +6.987914858039537824e-01 +6.331578114099671861e-01 +6.204798967766358420e-01 +5.881668787726149761e-01 +6.115757410551400097e-01 +6.433056106218002501e-01 +5.907053136999610432e-01 +4.613760056477577143e-01 +6.222050253446579360e-01 +5.384364226794347008e-01 +6.394062132296666201e-01 +7.105713663527076784e-01 +6.656745655947218232e-01 +5.287150025616496052e-01 +6.780186910018997093e-01 +5.716047211136439099e-01 +5.259055469274873662e-01 From 887b122a3ba0c5cf5acc818cbb3da6ea156b780b Mon Sep 17 00:00:00 2001 From: Mathieu Guillame-Bert Date: Mon, 17 Jun 2024 06:24:16 -0700 Subject: [PATCH 22/30] Anomaly detection; Isolation forest c++ learner (part 4) PiperOrigin-RevId: 643984861 --- yggdrasil_decision_forests/learner/BUILD | 2 + .../learner/abstract_learner.cc | 10 +- .../learner/abstract_learner.proto | 4 + .../learner/isolation_forest/BUILD | 91 ++ .../isolation_forest/isolation_forest.cc | 459 +++++++++ .../isolation_forest/isolation_forest.h | 115 +++ .../isolation_forest/isolation_forest.proto | 48 + .../isolation_forest/isolation_forest_test.cc | 245 +++++ .../test_data/dataset/mammographic_masses.csv | 962 ++++++++++++++++++ 9 files changed, 1932 insertions(+), 4 deletions(-) create mode 100644 yggdrasil_decision_forests/learner/isolation_forest/BUILD create mode 100644 yggdrasil_decision_forests/learner/isolation_forest/isolation_forest.cc create mode 100644 yggdrasil_decision_forests/learner/isolation_forest/isolation_forest.h create mode 100644 yggdrasil_decision_forests/learner/isolation_forest/isolation_forest.proto create mode 100644 yggdrasil_decision_forests/learner/isolation_forest/isolation_forest_test.cc create mode 100644 yggdrasil_decision_forests/test_data/dataset/mammographic_masses.csv diff --git a/yggdrasil_decision_forests/learner/BUILD b/yggdrasil_decision_forests/learner/BUILD index 40907240..6b8daa6f 100644 --- a/yggdrasil_decision_forests/learner/BUILD +++ b/yggdrasil_decision_forests/learner/BUILD @@ -31,6 +31,7 @@ cc_library_ydf( "//yggdrasil_decision_forests/learner/cart", "//yggdrasil_decision_forests/learner/distributed_gradient_boosted_trees:dgbt", "//yggdrasil_decision_forests/learner/gradient_boosted_trees", + "//yggdrasil_decision_forests/learner/isolation_forest", "//yggdrasil_decision_forests/learner/random_forest", ], ) @@ -74,6 +75,7 @@ cc_library_ydf( "//yggdrasil_decision_forests/utils:fold_generator_cc_proto", "//yggdrasil_decision_forests/utils:hyper_parameters", "//yggdrasil_decision_forests/utils:logging", + "//yggdrasil_decision_forests/utils:random", "//yggdrasil_decision_forests/utils:status_macros", "//yggdrasil_decision_forests/utils:synchronization_primitives", "//yggdrasil_decision_forests/utils:uid", diff --git a/yggdrasil_decision_forests/learner/abstract_learner.cc b/yggdrasil_decision_forests/learner/abstract_learner.cc index cee772f1..41f89b5f 100644 --- a/yggdrasil_decision_forests/learner/abstract_learner.cc +++ b/yggdrasil_decision_forests/learner/abstract_learner.cc @@ -35,6 +35,7 @@ #include "absl/strings/string_view.h" #include "absl/strings/substitute.h" #include "absl/time/clock.h" +#include "absl/time/time.h" #include "absl/types/optional.h" #include "yggdrasil_decision_forests/dataset/data_spec.h" #include "yggdrasil_decision_forests/dataset/data_spec.pb.h" @@ -54,6 +55,7 @@ #include "yggdrasil_decision_forests/utils/fold_generator.h" #include "yggdrasil_decision_forests/utils/hyper_parameters.h" #include "yggdrasil_decision_forests/utils/logging.h" +#include "yggdrasil_decision_forests/utils/random.h" #include "yggdrasil_decision_forests/utils/status_macros.h" #include "yggdrasil_decision_forests/utils/synchronization_primitives.h" #include "yggdrasil_decision_forests/utils/uid.h" @@ -834,7 +836,7 @@ void InitializeModelWithAbstractTrainingConfig( const proto::TrainingConfig& training_config, const proto::TrainingConfigLinking& training_config_linking, AbstractModel* model) { - if (training_config.task() != proto::Task::ANOMALY_DETECTION) { + if (training_config_linking.has_label()) { model->set_label_col_idx(training_config_linking.label()); } @@ -888,11 +890,11 @@ void InitializeModelMetadataWithAbstractTrainingConfig( } absl::Status AbstractLearner::CheckCapabilities() const { - // All the learners require a label. - if (training_config().label().empty()) { + const auto capabilities = Capabilities(); + + if (capabilities.require_label() && training_config().label().empty()) { return absl::InvalidArgumentError("\"label\" field required."); } - const auto capabilities = Capabilities(); // Maximum training duration. if (!capabilities.support_max_training_duration() && diff --git a/yggdrasil_decision_forests/learner/abstract_learner.proto b/yggdrasil_decision_forests/learner/abstract_learner.proto index 44b758ee..a51d008b 100644 --- a/yggdrasil_decision_forests/learner/abstract_learner.proto +++ b/yggdrasil_decision_forests/learner/abstract_learner.proto @@ -263,6 +263,10 @@ message LearnerCapabilities { // If true, the algorithm supports monotonic constraints over numerical // features. optional bool support_monotonic_constraints = 6 [default = false]; + + // If true, the learner requires a label. If false, the learner does not + // require a label. + optional bool require_label = 7 [default = true]; } // Monotonic constraints between model's output and numerical input features. diff --git a/yggdrasil_decision_forests/learner/isolation_forest/BUILD b/yggdrasil_decision_forests/learner/isolation_forest/BUILD new file mode 100644 index 00000000..a100656f --- /dev/null +++ b/yggdrasil_decision_forests/learner/isolation_forest/BUILD @@ -0,0 +1,91 @@ +load("//yggdrasil_decision_forests/utils:compile.bzl", "all_proto_library", "cc_library_ydf") + +package( + default_visibility = ["//visibility:public"], + licenses = ["notice"], +) + +# Library +# ======= + +cc_library_ydf( + name = "isolation_forest", + srcs = ["isolation_forest.cc"], + hdrs = ["isolation_forest.h"], + deps = [ + ":isolation_forest_cc_proto", + "//yggdrasil_decision_forests/dataset:data_spec_cc_proto", + "//yggdrasil_decision_forests/dataset:types", + "//yggdrasil_decision_forests/dataset:vertical_dataset", + "//yggdrasil_decision_forests/learner:abstract_learner", + "//yggdrasil_decision_forests/learner:abstract_learner_cc_proto", + "//yggdrasil_decision_forests/learner/decision_tree:generic_parameters", + "//yggdrasil_decision_forests/learner/decision_tree:training", + "//yggdrasil_decision_forests/metric:metric_cc_proto", + "//yggdrasil_decision_forests/model:abstract_model", + "//yggdrasil_decision_forests/model:abstract_model_cc_proto", + "//yggdrasil_decision_forests/model/decision_tree", + "//yggdrasil_decision_forests/model/decision_tree:decision_tree_cc_proto", + "//yggdrasil_decision_forests/model/isolation_forest", + "//yggdrasil_decision_forests/serving/decision_forest:register_engines", + "//yggdrasil_decision_forests/utils:concurrency", + "//yggdrasil_decision_forests/utils:hyper_parameters", + "//yggdrasil_decision_forests/utils:logging", + "//yggdrasil_decision_forests/utils:random", + "//yggdrasil_decision_forests/utils:status_macros", + "//yggdrasil_decision_forests/utils:synchronization_primitives", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:optional", + ], + alwayslink = 1, +) + +# Proto +# ======== + +all_proto_library( + name = "isolation_forest_proto", + srcs = ["isolation_forest.proto"], + deps = [ + "//yggdrasil_decision_forests/learner:abstract_learner_proto", + "//yggdrasil_decision_forests/learner/decision_tree:decision_tree_proto", + ], +) + +# Test +# ======== + +cc_test( + name = "isolation_forest_test", + srcs = ["isolation_forest_test.cc"], + data = ["//yggdrasil_decision_forests/test_data"], + deps = [ + ":isolation_forest", + ":isolation_forest_cc_proto", + "//yggdrasil_decision_forests/dataset:data_spec", + "//yggdrasil_decision_forests/dataset:data_spec_inference", + "//yggdrasil_decision_forests/dataset:vertical_dataset", + "//yggdrasil_decision_forests/learner:abstract_learner_cc_proto", + "//yggdrasil_decision_forests/learner:learner_library", + "//yggdrasil_decision_forests/learner/decision_tree:training", + "//yggdrasil_decision_forests/learner/hyperparameters_optimizer", + "//yggdrasil_decision_forests/metric:metric_cc_proto", + "//yggdrasil_decision_forests/metric:report", + "//yggdrasil_decision_forests/model:abstract_model_cc_proto", + "//yggdrasil_decision_forests/model/decision_tree", + "//yggdrasil_decision_forests/model/isolation_forest", + "//yggdrasil_decision_forests/utils:filesystem", + "//yggdrasil_decision_forests/utils:logging", + "//yggdrasil_decision_forests/utils:random", + "//yggdrasil_decision_forests/utils:test", + "//yggdrasil_decision_forests/utils:test_utils", + "//yggdrasil_decision_forests/utils:testing_macros", + "@com_google_absl//absl/status", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest_main", + ], +) diff --git a/yggdrasil_decision_forests/learner/isolation_forest/isolation_forest.cc b/yggdrasil_decision_forests/learner/isolation_forest/isolation_forest.cc new file mode 100644 index 00000000..9302dd0e --- /dev/null +++ b/yggdrasil_decision_forests/learner/isolation_forest/isolation_forest.cc @@ -0,0 +1,459 @@ +/* + * Copyright 2022 Google LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "yggdrasil_decision_forests/learner/isolation_forest/isolation_forest.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "absl/container/flat_hash_set.h" +#include "absl/memory/memory.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/strings/str_cat.h" +#include "absl/types/optional.h" +#include "yggdrasil_decision_forests/dataset/data_spec.pb.h" +#include "yggdrasil_decision_forests/dataset/types.h" +#include "yggdrasil_decision_forests/dataset/vertical_dataset.h" +#include "yggdrasil_decision_forests/learner/abstract_learner.h" +#include "yggdrasil_decision_forests/learner/abstract_learner.pb.h" +#include "yggdrasil_decision_forests/learner/decision_tree/generic_parameters.h" +#include "yggdrasil_decision_forests/learner/decision_tree/training.h" +#include "yggdrasil_decision_forests/learner/isolation_forest/isolation_forest.pb.h" +#include "yggdrasil_decision_forests/metric/metric.pb.h" +#include "yggdrasil_decision_forests/model/abstract_model.h" +#include "yggdrasil_decision_forests/model/abstract_model.pb.h" +#include "yggdrasil_decision_forests/model/decision_tree/decision_tree.h" +#include "yggdrasil_decision_forests/model/decision_tree/decision_tree.pb.h" +#include "yggdrasil_decision_forests/model/isolation_forest/isolation_forest.h" +#include "yggdrasil_decision_forests/utils/concurrency.h" +#include "yggdrasil_decision_forests/utils/hyper_parameters.h" +#include "yggdrasil_decision_forests/utils/logging.h" +#include "yggdrasil_decision_forests/utils/random.h" +#include "yggdrasil_decision_forests/utils/status_macros.h" +#include "yggdrasil_decision_forests/utils/synchronization_primitives.h" + +namespace yggdrasil_decision_forests::model::isolation_forest { + +namespace { + +using ::yggdrasil_decision_forests::model::isolation_forest::internal:: + Configuration; +using ::yggdrasil_decision_forests::model::isolation_forest::internal:: + DefaultMaximumDepth; +using ::yggdrasil_decision_forests::model::isolation_forest::internal:: + GetNumExamplesPerTrees; + +// Assembles and checks the configuration. +absl::StatusOr BuildConfig( + const IsolationForestLearner& learner, + const dataset::proto::DataSpecification& data_spec, + const UnsignedExampleIdx num_training_examples) { + internal::Configuration config; + + config.training_config = learner.training_config(); + config.if_config = config.training_config.MutableExtension( + isolation_forest::proto::isolation_forest_config); + + RETURN_IF_ERROR(AbstractLearner::LinkTrainingConfig( + config.training_config, data_spec, &config.config_link)); + + if (config.training_config.task() != model::proto::Task::ANOMALY_DETECTION) { + return absl::InvalidArgumentError(absl::StrCat( + "The ISOLATION_FOREST learner does not support the task ", + model::proto::Task_Name(config.training_config.task()), ".")); + } + + decision_tree::SetDefaultHyperParameters( + config.if_config->mutable_decision_tree()); + + if (!config.if_config->decision_tree().has_max_depth()) { + const auto num_examples_per_trees = + GetNumExamplesPerTrees(*config.if_config, num_training_examples); + config.if_config->mutable_decision_tree()->set_max_depth( + DefaultMaximumDepth(num_examples_per_trees)); + } + + if (!config.if_config->decision_tree().has_min_examples()) { + config.if_config->mutable_decision_tree()->set_min_examples(1); + } + + RETURN_IF_ERROR(learner.CheckConfiguration(data_spec, config.training_config, + config.config_link, + learner.deployment())); + + if (config.config_link.has_weight_definition()) { + return absl::InvalidArgumentError( + "Isolation forest does not support weights"); + } + return config; +} + +} // namespace + +namespace internal { + +absl::StatusOr FindSplit( + const Configuration& config, const dataset::VerticalDataset& train_dataset, + const std::vector& selected_examples, + decision_tree::NodeWithChildren* node, utils::RandomEngine* rnd) { + DCHECK_GT(selected_examples.size(), 0); + + // Sample the order in which features are tested. + // TODO: Use cache. + std::vector feature_order = {config.config_link.features().begin(), + config.config_link.features().end()}; + std::shuffle(feature_order.begin(), feature_order.end(), *rnd); + + // Test features one after another. + for (const auto& attribute_idx : feature_order) { + const auto& col_spec = train_dataset.data_spec().columns(attribute_idx); + if (col_spec.type() != dataset::proto::ColumnType::NUMERICAL) { + // TODO: Add support for other types of features. + continue; + } + + const auto na_replacement = col_spec.numerical().mean(); + ASSIGN_OR_RETURN( + const dataset::VerticalDataset::NumericalColumn* value_container, + train_dataset.ColumnWithCastWithStatus< + dataset::VerticalDataset::NumericalColumn>(attribute_idx)); + const auto& values = value_container->values(); + + // Find minimum and maximum value. + float min_value; + float max_value; + UnsignedExampleIdx num_valid_examples = 0; + for (const auto example_idx : selected_examples) { + const auto value = values[example_idx]; + if (std::isnan(value)) { + continue; + } + if (num_valid_examples == 0 || value < min_value) { + min_value = value; + } + if (num_valid_examples == 0 || value > max_value) { + max_value = value; + } + num_valid_examples++; + } + + if (num_valid_examples == 0 || max_value == min_value) { + // Cannot split. + continue; + } + + // Randomly select a threshold in (min_value, max_value). + const float threshold = std::uniform_real_distribution( + std::nextafter(min_value, std::numeric_limits::max()), + max_value)(*rnd); + DCHECK_GT(threshold, min_value); + DCHECK_LE(threshold, max_value); + + // Count the number of positive examples. + UnsignedExampleIdx num_pos_examples = 0; + for (const auto example_idx : selected_examples) { + auto value = values[example_idx]; + if (std::isnan(value)) { + value = na_replacement; + } + if (value >= threshold) { + num_pos_examples++; + } + } + + DCHECK_GT(num_pos_examples, 0); + DCHECK_LT(num_pos_examples, selected_examples.size()); + + // Set split. + auto* condition = node->mutable_node()->mutable_condition(); + condition->set_attribute(attribute_idx); + condition->mutable_condition()->mutable_higher_condition()->set_threshold( + threshold); + condition->set_na_value(na_replacement >= threshold); + condition->set_num_training_examples_without_weight( + selected_examples.size()); + condition->set_num_pos_training_examples_without_weight(num_pos_examples); + + return true; + } + + return false; // No split found +} + +// Grows recursively a node. +absl::Status GrowNode(const Configuration& config, + const dataset::VerticalDataset& train_dataset, + const std::vector& selected_examples, + const int depth, decision_tree::NodeWithChildren* node, + utils::RandomEngine* rnd) { + if (selected_examples.empty()) { + return absl::InternalError("No examples fed to the node trainer"); + } + + const auto& dt_config = config.if_config->decision_tree(); + + // Set node value + node->mutable_node()->set_num_pos_training_examples_without_weight( + selected_examples.size()); + node->mutable_node() + ->mutable_anomaly_detection() + ->set_num_examples_without_weight(selected_examples.size()); + + // Stop growth + if (selected_examples.size() < dt_config.min_examples() || + (dt_config.max_depth() >= 0 && depth >= dt_config.max_depth())) { + node->FinalizeAsLeaf(dt_config.store_detailed_label_distribution()); + return absl::OkStatus(); + } + + // Look for a split + ASSIGN_OR_RETURN( + const bool found_condition, + FindSplit(config, train_dataset, selected_examples, node, rnd)); + + if (!found_condition) { + // No split found + node->FinalizeAsLeaf(dt_config.store_detailed_label_distribution()); + return absl::OkStatus(); + } + + // Turn the node into a non-leaf node + STATUS_CHECK_EQ( + selected_examples.size(), + node->node().condition().num_training_examples_without_weight()); + node->CreateChildren(); + node->FinalizeAsNonLeaf(dt_config.keep_non_leaf_label_distribution(), + dt_config.store_detailed_label_distribution()); + + // Branch examples to children + // TODO: Use cache to avoid re-allocating selected example + // buffers. + std::vector positive_examples; + std::vector negative_examples; + RETURN_IF_ERROR(decision_tree::internal::SplitExamples( + train_dataset, selected_examples, node->node().condition(), false, + dt_config.internal_error_on_wrong_splitter_statistics(), + &positive_examples, &negative_examples)); + + // Split children + RETURN_IF_ERROR(GrowNode(config, train_dataset, positive_examples, depth + 1, + node->mutable_pos_child(), rnd)); + positive_examples = {}; // Release memory of "positive_examples". + RETURN_IF_ERROR(GrowNode(config, train_dataset, negative_examples, depth + 1, + node->mutable_neg_child(), rnd)); + return absl::OkStatus(); +} + +// Grows and return a tree. +absl::StatusOr> GrowTree( + const Configuration& config, const dataset::VerticalDataset& train_dataset, + const std::vector& selected_examples, + utils::RandomEngine* rnd) { + auto tree = std::make_unique(); + tree->CreateRoot(); + RETURN_IF_ERROR(GrowNode(config, train_dataset, selected_examples, + /*depth=*/0, tree->mutable_root(), rnd)); + return std::move(tree); +} + +int DefaultMaximumDepth(UnsignedExampleIdx num_examples_per_trees) { + return std::ceil(std::log2(num_examples_per_trees)); +} + +std::vector SampleExamples( + const UnsignedExampleIdx num_examples, + const UnsignedExampleIdx num_examples_to_sample, utils::RandomEngine* rnd) { + std::vector examples(num_examples); + std::iota(examples.begin(), examples.end(), 0); + std::shuffle(examples.begin(), examples.end(), *rnd); + examples.resize(num_examples_to_sample); + examples.shrink_to_fit(); + std::sort(examples.begin(), examples.end()); + return examples; +} + +SignedExampleIdx GetNumExamplesPerTrees( + const proto::IsolationForestTrainingConfig& if_config, + const SignedExampleIdx num_training_examples) { + switch (if_config.sampling_method_case()) { + case proto::IsolationForestTrainingConfig::kSubsampleRatio: + return static_cast( + std::ceil(static_cast(if_config.subsample_ratio()) * + num_training_examples)); + default: + return if_config.subsample_count(); + } +} + +} // namespace internal + +IsolationForestLearner::IsolationForestLearner( + const model::proto::TrainingConfig& training_config) + : AbstractLearner(training_config) {} + +absl::Status IsolationForestLearner::SetHyperParametersImpl( + utils::GenericHyperParameterConsumer* generic_hyper_params) { + RETURN_IF_ERROR( + AbstractLearner::SetHyperParametersImpl(generic_hyper_params)); + const auto& if_config = training_config_.MutableExtension( + isolation_forest::proto::isolation_forest_config); + + // Decision tree specific hyper-parameters. + absl::flat_hash_set consumed_hparams; + RETURN_IF_ERROR(decision_tree::SetHyperParameters( + &consumed_hparams, if_config->mutable_decision_tree(), + generic_hyper_params)); + + { + const auto hparam = generic_hyper_params->Get(kHParamNumTrees); + if (hparam.has_value()) { + if_config->set_num_trees(hparam.value().value().integer()); + } + } + + { + const auto hparam = generic_hyper_params->Get(kHParamSubsampleRatio); + if (hparam.has_value()) { + if_config->set_subsample_ratio(hparam.value().value().real()); + } + } + + { + const auto hparam = generic_hyper_params->Get(kHParamSubsampleCount); + if (hparam.has_value()) { + if_config->set_subsample_count(hparam.value().value().integer()); + } + } + + return absl::OkStatus(); +} + +absl::StatusOr +IsolationForestLearner::GetGenericHyperParameterSpecification() const { + ASSIGN_OR_RETURN(auto hparam_def, + AbstractLearner::GetGenericHyperParameterSpecification()); + model::proto::TrainingConfig config; + const auto proto_path = "learner/isolation_forest/isolation_forest.proto"; + + hparam_def.mutable_documentation()->set_description( + R"(An Isolation Forest (https://ieeexplore.ieee.org/abstract/document/4781136) is a collection of decision trees trained without labels and independently to partition the feature space. The Isolation Forest prediction is an anomaly score that indicates whether an example originates from a same distribution to the training examples. We refer to Isolation Forest as both the original algorithm by Liu et al. and its extensions.)"); + + const auto& if_config = + config.GetExtension(isolation_forest::proto::isolation_forest_config); + + { + auto& param = hparam_def.mutable_fields()->operator[](kHParamNumTrees); + param.mutable_integer()->set_minimum(0); + param.mutable_integer()->set_default_value(if_config.num_trees()); + param.mutable_documentation()->set_proto_path(proto_path); + param.mutable_documentation()->set_description( + R"(Number of individual decision trees. Increasing the number of trees can increase the quality of the model at the expense of size, training speed, and inference latency.)"); + } + + { + auto& param = + hparam_def.mutable_fields()->operator[](kHParamSubsampleCount); + param.mutable_integer()->set_minimum(0); + param.mutable_integer()->set_default_value(if_config.num_trees()); + param.mutable_documentation()->set_proto_path(proto_path); + param.mutable_documentation()->set_description( + R"(Number of examples used to grow each tree. Only one of "subsample_ratio" and "subsample_count" can be set. If neither is set, "subsample_count" is assumed to be equal to 256. This is the default value recommended in the isolation forest paper.)"); + } + + { + auto& param = + hparam_def.mutable_fields()->operator[](kHParamSubsampleRatio); + param.mutable_integer()->set_minimum(0); + param.mutable_integer()->set_default_value(if_config.num_trees()); + param.mutable_documentation()->set_proto_path(proto_path); + param.mutable_documentation()->set_description( + R"(Ratio of number of training examples used to grow each tree. Only one of "subsample_ratio" and "subsample_count" can be set. If neither is set, "subsample_count" is assumed to be equal to 256. This is the default value recommended in the isolation forest paper.)"); + } + + RETURN_IF_ERROR(decision_tree::GetGenericHyperParameterSpecification( + if_config.decision_tree(), &hparam_def)); + return hparam_def; +} + +absl::StatusOr> +IsolationForestLearner::TrainWithStatusImpl( + const dataset::VerticalDataset& train_dataset, + absl::optional> + valid_dataset) const { + RETURN_IF_ERROR(dataset::CheckNumExamples(train_dataset.nrow())); + + ASSIGN_OR_RETURN( + const internal::Configuration config, + BuildConfig(*this, train_dataset.data_spec(), train_dataset.nrow())); + + auto model = absl::make_unique(); + InitializeModelWithAbstractTrainingConfig(config.training_config, + config.config_link, model.get()); + model->set_data_spec(train_dataset.data_spec()); + model->set_num_examples_per_trees( + GetNumExamplesPerTrees(*config.if_config, train_dataset.nrow())); + + YDF_LOG(INFO) << "Training isolation forest on " << train_dataset.nrow() + << " example(s) and " << config.config_link.features_size() + << " feature(s)."; + + utils::RandomEngine global_random(config.training_config.random_seed()); + + absl::Status global_status; + utils::concurrency::Mutex global_mutex; + { + yggdrasil_decision_forests::utils::concurrency::ThreadPool pool( + "TrainIF", deployment().num_threads()); + pool.StartWorkers(); + const auto num_trees = config.if_config->num_trees(); + model->mutable_decision_trees()->resize(num_trees); + for (int tree_idx = 0; tree_idx < num_trees; tree_idx++) { + pool.Schedule([&train_dataset, &model, &config, tree_idx, &global_status, + &global_mutex, seed = global_random()]() { + { + utils::concurrency::MutexLock lock(&global_mutex); + if (!global_status.ok()) { + return; + } + } + utils::RandomEngine local_random(seed); + const auto selected_examples = internal::SampleExamples( + train_dataset.nrow(), model->num_examples_per_trees(), + &local_random); + auto tree_or = + GrowTree(config, train_dataset, selected_examples, &local_random); + if (!tree_or.ok()) { + utils::concurrency::MutexLock lock(&global_mutex); + global_status.Update(tree_or.status()); + return; + } + (*model->mutable_decision_trees())[tree_idx] = std::move(*tree_or); + }); + } + } + decision_tree::SetLeafIndices(model->mutable_decision_trees()); + return std::move(model); +} + +} // namespace yggdrasil_decision_forests::model::isolation_forest diff --git a/yggdrasil_decision_forests/learner/isolation_forest/isolation_forest.h b/yggdrasil_decision_forests/learner/isolation_forest/isolation_forest.h new file mode 100644 index 00000000..7d1b3450 --- /dev/null +++ b/yggdrasil_decision_forests/learner/isolation_forest/isolation_forest.h @@ -0,0 +1,115 @@ +/* + * Copyright 2022 Google LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Isolation Forest learner. +#ifndef YGGDRASIL_DECISION_FORESTS_LEARNER_ISOLATION_FOREST_H_ +#define YGGDRASIL_DECISION_FORESTS_LEARNER_ISOLATION_FOREST_H_ + +#include +#include +#include + +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/types/optional.h" +#include "yggdrasil_decision_forests/dataset/types.h" +#include "yggdrasil_decision_forests/dataset/vertical_dataset.h" +#include "yggdrasil_decision_forests/learner/abstract_learner.h" +#include "yggdrasil_decision_forests/learner/abstract_learner.pb.h" +#include "yggdrasil_decision_forests/learner/isolation_forest/isolation_forest.pb.h" +#include "yggdrasil_decision_forests/model/abstract_model.h" +#include "yggdrasil_decision_forests/model/decision_tree/decision_tree.h" +#include "yggdrasil_decision_forests/utils/hyper_parameters.h" +#include "yggdrasil_decision_forests/utils/random.h" + +namespace yggdrasil_decision_forests::model::isolation_forest { + +class IsolationForestLearner : public AbstractLearner { + public: + explicit IsolationForestLearner( + const model::proto::TrainingConfig& training_config); + + inline static constexpr char kRegisteredName[] = "ISOLATION_FOREST"; + inline static constexpr char kHParamNumTrees[] = "num_trees"; + inline static constexpr char kHParamSubsampleRatio[] = "subsample_ratio"; + inline static constexpr char kHParamSubsampleCount[] = "subsample_count"; + // TODO: Add all hyper-parameters. + + absl::StatusOr> TrainWithStatusImpl( + const dataset::VerticalDataset& train_dataset, + absl::optional> + valid_dataset) const override; + + absl::Status SetHyperParametersImpl( + utils::GenericHyperParameterConsumer* generic_hyper_params) override; + + absl::StatusOr + GetGenericHyperParameterSpecification() const override; + + model::proto::LearnerCapabilities Capabilities() const override { + model::proto::LearnerCapabilities capabilities; + capabilities.set_require_label(false); + return capabilities; + } +}; + +REGISTER_AbstractLearner(IsolationForestLearner, + IsolationForestLearner::kRegisteredName); + +namespace internal { + +struct Configuration { + model::proto::TrainingConfig training_config; + model::proto::TrainingConfigLinking config_link; + // "if_config" is a non-owning pointer to a sub-component of + // "training_config". + proto::IsolationForestTrainingConfig* if_config = nullptr; +}; + +// Gets the number of examples used to grow each tree. +SignedExampleIdx GetNumExamplesPerTrees( + const proto::IsolationForestTrainingConfig& if_config, + SignedExampleIdx num_training_examples); + +// Sample examples to grow a tree. +std::vector SampleExamples( + UnsignedExampleIdx num_examples, UnsignedExampleIdx num_examples_to_sample, + utils::RandomEngine* rnd); + +// Default maximum depth hyper-parameter according to the number of examples +// used to grow each tree. +int DefaultMaximumDepth(UnsignedExampleIdx num_examples_per_trees); + +// Finds a split (i.e. condition) for a node. +// +// A split is randomly sampled and returned. +// A valid split always branches one training examples in each branch. If not +// valid split can be generated, "FindSplit" returns false and not split is set. +// If a valid split is sampled, the condition of "node" is set and the function +// returns true. +// +// This function currently only implement the original isolation forest +// algorithm: Only split of the form "X >= threshold" are generated. The +// threshold is uniformly sampled between the minimum and maximum values +// observed in the training examples reaching this node +absl::StatusOr FindSplit( + const Configuration& config, const dataset::VerticalDataset& train_dataset, + const std::vector& selected_examples, + decision_tree::NodeWithChildren* node, utils::RandomEngine* rnd); + +} // namespace internal + +} // namespace yggdrasil_decision_forests::model::isolation_forest +#endif // YGGDRASIL_DECISION_FORESTS_LEARNER_ISOLATION_FOREST_H_ diff --git a/yggdrasil_decision_forests/learner/isolation_forest/isolation_forest.proto b/yggdrasil_decision_forests/learner/isolation_forest/isolation_forest.proto new file mode 100644 index 00000000..3cfafc78 --- /dev/null +++ b/yggdrasil_decision_forests/learner/isolation_forest/isolation_forest.proto @@ -0,0 +1,48 @@ +/* + * Copyright 2022 Google LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = "proto2"; + +package yggdrasil_decision_forests.model.isolation_forest.proto; + +import "yggdrasil_decision_forests/learner/abstract_learner.proto"; +import "yggdrasil_decision_forests/learner/decision_tree/decision_tree.proto"; + +option java_outer_classname = "IsolationForestLearner"; + +// Training configuration for the Isolation Forest algorithm. +message IsolationForestTrainingConfig { + // Next ID: 5 + + // Decision tree specific parameters. + optional decision_tree.proto.DecisionTreeTrainingConfig decision_tree = 1; + + // Number of trees in the forest. + optional int32 num_trees = 2 [default = 300]; + + // Number of examples used to grow each tree. Only one of "subsample_ratio" + // and "subsample_count" can be set. If neither is set, "subsample_count" is + // assumed to be equal to 256. This is the default value recommended in the + // isolation forest paper + // (https://ieeexplore.ieee.org/abstract/document/4781136). + oneof sampling_method { + float subsample_ratio = 3; + int32 subsample_count = 4 [default = 256]; + } +} + +extend model.proto.TrainingConfig { + optional IsolationForestTrainingConfig isolation_forest_config = 1007; +} diff --git a/yggdrasil_decision_forests/learner/isolation_forest/isolation_forest_test.cc b/yggdrasil_decision_forests/learner/isolation_forest/isolation_forest_test.cc new file mode 100644 index 00000000..db5380ed --- /dev/null +++ b/yggdrasil_decision_forests/learner/isolation_forest/isolation_forest_test.cc @@ -0,0 +1,245 @@ +/* + * Copyright 2022 Google LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "yggdrasil_decision_forests/learner/isolation_forest/isolation_forest.h" + +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/status/status.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" +#include "yggdrasil_decision_forests/dataset/data_spec.h" +#include "yggdrasil_decision_forests/dataset/data_spec_inference.h" +#include "yggdrasil_decision_forests/dataset/vertical_dataset.h" +#include "yggdrasil_decision_forests/learner/abstract_learner.pb.h" +#include "yggdrasil_decision_forests/learner/isolation_forest/isolation_forest.pb.h" +#include "yggdrasil_decision_forests/learner/learner_library.h" +#include "yggdrasil_decision_forests/metric/metric.pb.h" +#include "yggdrasil_decision_forests/metric/report.h" +#include "yggdrasil_decision_forests/model/abstract_model.pb.h" +#include "yggdrasil_decision_forests/model/decision_tree/decision_tree.h" +#include "yggdrasil_decision_forests/model/isolation_forest/isolation_forest.h" +#include "yggdrasil_decision_forests/utils/filesystem.h" +#include "yggdrasil_decision_forests/utils/logging.h" +#include "yggdrasil_decision_forests/utils/random.h" +#include "yggdrasil_decision_forests/utils/test.h" +#include "yggdrasil_decision_forests/utils/test_utils.h" +#include "yggdrasil_decision_forests/utils/testing_macros.h" + +namespace yggdrasil_decision_forests::model::isolation_forest { +namespace { + +using test::StatusIs; +using ::testing::ElementsAre; + +class IsolationForestOnGaussians : public utils::TrainAndTestTester { + proto::IsolationForestTrainingConfig* if_config() { + return train_config_.MutableExtension( + isolation_forest::proto::isolation_forest_config); + } + + void SetUp() override { + train_config_.set_learner(IsolationForestLearner::kRegisteredName); + train_config_.set_task(model::proto::Task::ANOMALY_DETECTION); + train_config_.add_features("f.*"); + train_config_.set_label("label"); + dataset_filename_ = "gaussians_train.csv"; + eval_options_.set_task(model::proto::Task::ANOMALY_DETECTION); + + if_config()->set_subsample_count(100); + } +}; + +TEST_F(IsolationForestOnGaussians, DefaultHyperParameters) { + TrainAndEvaluateModel(); + YDF_LOG(INFO) << "Model:\n" << model_->DescriptionAndStatistics(true); + + utils::RandomEngine rnd; + metric::proto::EvaluationOptions options; + options.set_task(model::proto::Task::CLASSIFICATION); + ASSERT_OK_AND_ASSIGN( + const auto evaluation, + model_->EvaluateOverrideType(test_dataset_, options, + model::proto::Task::CLASSIFICATION, + model_->label_col_idx(), -1, &rnd)); + + YDF_LOG(INFO) << "Evaluation:\n" << metric::TextReport(evaluation).value(); + EXPECT_NEAR(evaluation.classification().rocs(1).auc(), 0.99, 0.005f); + + EXPECT_EQ(model_->task(), model::proto::Task::ANOMALY_DETECTION); + EXPECT_EQ(model_->label_col_idx(), 2); + EXPECT_THAT(model_->input_features(), ElementsAre(0, 1)); + + auto if_model = dynamic_cast(model_.get()); + EXPECT_EQ(if_model->num_trees(), 300); + EXPECT_GT(if_model->NumNodes(), if_model->num_trees() * 32); +} + +class IsolationForestOnAdult : public utils::TrainAndTestTester { + proto::IsolationForestTrainingConfig* if_config() { + return train_config_.MutableExtension( + isolation_forest::proto::isolation_forest_config); + } + + void SetUp() override { + train_config_.set_learner(IsolationForestLearner::kRegisteredName); + train_config_.set_task(model::proto::Task::ANOMALY_DETECTION); + train_config_.set_label("income"); + dataset_filename_ = "adult_train.csv"; + eval_options_.set_task(model::proto::Task::ANOMALY_DETECTION); + + if_config()->set_subsample_count(100); + } +}; + +TEST_F(IsolationForestOnAdult, DefaultHyperParameters) { + TrainAndEvaluateModel(); + YDF_LOG(INFO) << "Model:\n" << model_->DescriptionAndStatistics(true); +} + +class IsolationForestOnMammographicMasses : public utils::TrainAndTestTester { + proto::IsolationForestTrainingConfig* if_config() { + return train_config_.MutableExtension( + isolation_forest::proto::isolation_forest_config); + } + + void SetUp() override { + train_config_.set_learner(IsolationForestLearner::kRegisteredName); + train_config_.set_task(model::proto::Task::ANOMALY_DETECTION); + train_config_.set_label("Severity"); + dataset_filename_ = "mammographic_masses.csv"; + eval_options_.set_task(model::proto::Task::ANOMALY_DETECTION); + + if_config()->set_subsample_count(100); + } +}; + +TEST_F(IsolationForestOnMammographicMasses, DefaultHyperParameters) { + TrainAndEvaluateModel(); + YDF_LOG(INFO) << "Model:\n" << model_->DescriptionAndStatistics(true); +} + +TEST(IsolationForest, BadTask) { + std::string dataset_path = absl::StrCat( + "csv:", file::JoinPath(test::DataRootDirectory(), + "yggdrasil_decision_forests/" + "test_data/dataset/gaussians_train.csv")); + + ASSERT_OK_AND_ASSIGN(auto dataspec, dataset::CreateDataSpec(dataset_path)); + + model::proto::TrainingConfig train_config; + train_config.set_learner(IsolationForestLearner::kRegisteredName); + train_config.set_task(model::proto::Task::CLASSIFICATION); + train_config.add_features("f.*"); + train_config.set_label("label"); + ASSERT_OK_AND_ASSIGN(auto learner, model::GetLearner(train_config)); + + EXPECT_THAT(learner->TrainWithStatus(dataset_path, dataspec).status(), + StatusIs(absl::StatusCode::kInvalidArgument)); +} + +TEST(DefaultMaximumDepth, Base) { + EXPECT_EQ(internal::DefaultMaximumDepth(254), 8); + EXPECT_EQ(internal::DefaultMaximumDepth(255), 8); + EXPECT_EQ(internal::DefaultMaximumDepth(256), 8); + EXPECT_EQ(internal::DefaultMaximumDepth(257), 9); +} + +TEST(SampleExamples, Base) { + utils::RandomEngine rnd; + const auto samples = internal::SampleExamples(100, 10, &rnd); + EXPECT_EQ(samples.size(), 10); + EXPECT_TRUE(std::is_sorted(samples.begin(), samples.end())); + + // Look for duplicates + for (int i = 1; i < samples.size(); i++) { + EXPECT_LT(samples[i - 1], samples[i]); + } +} + +TEST(GetNumExamplesPerTrees, Default) { + proto::IsolationForestTrainingConfig if_config; + EXPECT_EQ(internal::GetNumExamplesPerTrees(if_config, 1000), 256); +} + +TEST(GetNumExamplesPerTrees, Count) { + proto::IsolationForestTrainingConfig if_config; + if_config.set_subsample_count(5); + EXPECT_EQ(internal::GetNumExamplesPerTrees(if_config, 100), 5); +} + +TEST(GetNumExamplesPerTrees, Rate) { + proto::IsolationForestTrainingConfig if_config; + if_config.set_subsample_ratio(0.5f); + EXPECT_EQ(internal::GetNumExamplesPerTrees(if_config, 100), 50); +} + +TEST(FindSplit, Numerical) { + for (int seed = 0; seed < 10; seed++) { + // This is a stochastic test. + utils::RandomEngine rnd(seed); + + internal::Configuration config; + config.config_link.add_features(0); // Only select "f1". + + decision_tree::NodeWithChildren node; + + dataset::VerticalDataset dataset; + dataset::AddNumericalColumn("f1", dataset.mutable_data_spec()); + dataset::AddNumericalColumn("f2", dataset.mutable_data_spec()); + ASSERT_OK(dataset.CreateColumnsFromDataspec()); + ASSERT_OK_AND_ASSIGN(auto* column, + dataset.MutableColumnWithCastWithStatus< + dataset::VerticalDataset::NumericalColumn>(0)); + *column->mutable_values() = {1, 2, 4, 100}; + + ASSERT_OK_AND_ASSIGN( + const bool found_condition, + FindSplit(config, dataset, + {0, 1, 2}, // Don't select the example with value "100". + &node, &rnd)); + EXPECT_TRUE(found_condition); + EXPECT_EQ(node.node().condition().attribute(), 0); // Always "f1". + EXPECT_TRUE(node.node().condition().condition().has_higher_condition()); + const float threshold = + node.node().condition().condition().higher_condition().threshold(); + EXPECT_GE(threshold, 1.0f); + EXPECT_LE(threshold, 4.0f); // The value 100 is clearly ignored. + } +} + +TEST(GetGenericHyperParameterSpecification, Base) { + model::proto::TrainingConfig train_config; + train_config.set_learner(IsolationForestLearner::kRegisteredName); + train_config.set_task(model::proto::Task::ANOMALY_DETECTION); + ASSERT_OK_AND_ASSIGN(auto learner, model::GetLearner(train_config)); + + ASSERT_OK_AND_ASSIGN(const auto hp_specs, + learner->GetGenericHyperParameterSpecification()); + + for (absl::string_view field : { + IsolationForestLearner::kHParamNumTrees, + IsolationForestLearner::kHParamSubsampleRatio, + IsolationForestLearner::kHParamSubsampleCount, + }) { + EXPECT_TRUE(hp_specs.fields().contains(field)); + } +} + +} // namespace +} // namespace yggdrasil_decision_forests::model::isolation_forest diff --git a/yggdrasil_decision_forests/test_data/dataset/mammographic_masses.csv b/yggdrasil_decision_forests/test_data/dataset/mammographic_masses.csv new file mode 100644 index 00000000..3a09f575 --- /dev/null +++ b/yggdrasil_decision_forests/test_data/dataset/mammographic_masses.csv @@ -0,0 +1,962 @@ +BI-RADS,Age,Shape,Margin,Density,Severity +5,67,3,5,3,1 +4,43,1,1,,1 +5,58,4,5,3,1 +4,28,1,1,3,0 +5,74,1,5,,1 +4,65,1,,3,0 +4,70,,,3,0 +5,42,1,,3,0 +5,57,1,5,3,1 +5,60,,5,1,1 +5,76,1,4,3,1 +3,42,2,1,3,1 +4,64,1,,3,0 +4,36,3,1,2,0 +4,60,2,1,2,0 +4,54,1,1,3,0 +3,52,3,4,3,0 +4,59,2,1,3,1 +4,54,1,1,3,1 +4,40,1,,,0 +,66,,,1,1 +5,56,4,3,1,1 +4,43,1,,,0 +5,42,4,4,3,1 +4,59,2,4,3,1 +5,75,4,5,3,1 +2,66,1,1,,0 +5,63,3,,3,0 +5,45,4,5,3,1 +5,55,4,4,3,0 +4,46,1,5,2,0 +5,54,4,4,3,1 +5,57,4,4,3,1 +4,39,1,1,2,0 +4,81,1,1,3,0 +4,77,3,,,0 +4,60,2,1,3,0 +5,67,3,4,2,1 +4,48,4,5,,1 +4,55,3,4,2,0 +4,59,2,1,,0 +4,78,1,1,1,0 +4,50,1,1,3,0 +4,61,2,1,,0 +5,62,3,5,2,1 +5,44,2,4,,1 +5,64,4,5,3,1 +4,23,1,1,,0 +2,42,,,4,0 +5,67,4,5,3,1 +4,74,2,1,2,0 +5,80,3,5,3,1 +4,23,1,1,,0 +4,63,2,1,,0 +4,53,,5,3,1 +4,43,3,4,,0 +4,49,2,1,1,0 +5,51,2,4,,0 +4,45,2,1,,0 +5,59,2,,,1 +5,52,4,3,3,1 +5,60,4,3,3,1 +4,57,2,5,3,0 +3,57,2,1,,0 +5,74,4,4,3,1 +4,25,2,1,,0 +4,49,1,1,3,0 +5,72,4,3,,1 +4,45,2,1,3,0 +4,64,2,1,3,0 +4,73,2,1,2,0 +5,68,4,3,3,1 +5,52,4,5,3,0 +5,66,4,4,3,1 +5,70,,4,,1 +4,25,1,1,3,0 +5,74,1,1,2,1 +4,64,1,1,3,0 +5,60,4,3,2,1 +5,67,2,4,1,0 +4,67,4,5,3,0 +5,44,4,4,2,1 +3,68,1,1,3,1 +4,57,,4,1,0 +5,51,4,,,1 +4,33,1,,,0 +5,58,4,4,3,1 +5,36,1,,,0 +4,63,1,1,,0 +5,62,1,5,3,1 +4,73,3,4,3,1 +4,80,4,4,3,1 +4,67,1,1,,0 +5,59,2,1,3,1 +5,60,1,,3,0 +5,54,4,4,3,1 +4,40,1,1,,0 +4,47,2,1,,0 +5,62,4,4,3,0 +4,33,2,1,3,0 +5,59,2,,,0 +4,65,2,,,0 +4,58,4,4,,0 +4,29,2,,,0 +4,58,1,1,,0 +4,54,1,1,,0 +4,44,1,1,,1 +3,34,2,1,,0 +4,57,1,1,3,0 +5,33,4,4,,1 +4,45,4,4,3,0 +5,71,4,4,3,1 +5,59,4,4,2,0 +4,56,2,1,,0 +4,40,3,4,,0 +4,56,1,1,3,0 +4,45,2,1,,0 +4,57,2,1,2,0 +5,55,3,4,3,1 +5,84,4,5,3,0 +5,51,4,4,3,1 +4,43,1,1,,0 +4,24,2,1,2,0 +4,66,1,1,3,0 +5,33,4,4,3,0 +4,59,4,3,2,0 +4,76,2,3,,0 +4,40,1,1,,0 +4,52,,4,,0 +5,40,4,5,3,1 +5,67,4,4,3,1 +5,75,4,3,3,1 +5,86,4,4,3,0 +4,60,2,,,0 +5,66,4,4,3,1 +5,46,4,5,3,1 +4,59,4,4,3,1 +5,65,4,4,3,1 +4,53,1,1,3,0 +5,67,3,5,3,1 +5,80,4,5,3,1 +4,55,2,1,3,0 +4,48,1,1,,0 +4,47,1,1,2,0 +4,50,2,1,,0 +5,62,4,5,3,1 +5,63,4,4,3,1 +4,63,4,,3,1 +4,71,4,4,3,1 +4,41,1,1,3,0 +5,57,4,4,4,1 +5,71,4,4,4,1 +4,66,1,1,3,0 +4,47,2,4,2,0 +3,34,4,4,3,0 +4,59,3,4,3,0 +5,55,2,,,1 +4,51,,,3,0 +4,62,2,1,,0 +4,58,4,,3,1 +5,67,4,4,3,1 +4,41,2,1,3,0 +4,23,3,1,3,0 +4,53,,4,3,0 +4,42,2,1,3,0 +5,87,4,5,3,1 +4,68,1,1,3,1 +4,64,1,1,3,0 +5,54,3,5,3,1 +5,86,4,5,3,1 +4,21,2,1,3,0 +4,39,1,1,,0 +4,53,4,4,3,0 +4,44,4,4,3,0 +4,54,1,1,3,0 +5,63,4,5,3,1 +4,62,2,1,,0 +4,45,2,1,2,0 +5,71,4,5,3,0 +5,49,4,4,3,1 +4,49,4,4,3,0 +5,66,4,4,4,0 +4,19,1,1,3,0 +4,35,1,1,2,0 +4,71,3,3,,1 +5,74,4,5,3,1 +5,37,4,4,3,1 +4,67,1,,3,0 +5,81,3,4,3,1 +5,59,4,4,3,1 +4,34,1,1,3,0 +5,79,4,3,3,1 +5,60,3,1,3,0 +4,41,1,1,3,1 +4,50,1,1,3,0 +5,85,4,4,3,1 +4,46,1,1,3,0 +5,66,4,4,3,1 +4,73,3,1,2,0 +4,55,1,1,3,0 +4,49,2,1,3,0 +3,49,4,4,3,0 +4,51,4,5,3,1 +2,48,4,4,3,0 +4,58,4,5,3,0 +5,72,4,5,3,1 +4,46,2,3,3,0 +4,43,4,3,3,1 +,52,4,4,3,0 +4,66,2,1,,0 +4,46,1,1,1,0 +4,69,3,1,3,0 +2,59,1,1,,1 +5,43,2,1,3,1 +5,76,4,5,3,1 +4,46,1,1,3,0 +4,59,2,4,3,0 +4,57,1,1,3,0 +5,43,4,5,,0 +3,45,2,1,3,0 +3,43,2,1,3,0 +4,45,2,1,3,0 +5,57,4,5,3,1 +5,79,4,4,3,1 +5,54,2,1,3,1 +4,40,3,4,3,0 +5,63,4,4,3,1 +2,55,1,,1,0 +4,52,2,1,3,0 +4,38,1,1,3,0 +3,72,4,3,3,0 +5,80,4,3,3,1 +5,76,4,3,3,1 +4,62,3,1,3,0 +5,64,4,5,3,1 +5,42,4,5,3,0 +3,60,,3,1,0 +4,64,4,5,3,0 +4,63,4,4,3,1 +4,24,2,1,2,0 +5,72,4,4,3,1 +4,63,2,1,3,0 +4,46,1,1,3,0 +3,33,1,1,3,0 +5,76,4,4,3,1 +4,36,2,3,3,0 +4,40,2,1,3,0 +5,58,1,5,3,1 +4,43,2,1,3,0 +3,42,1,1,3,0 +4,32,1,1,3,0 +5,57,4,4,2,1 +4,37,1,1,3,0 +4,70,4,4,3,1 +5,56,4,2,3,1 +3,76,,3,2,0 +5,73,4,4,3,1 +5,77,4,5,3,1 +5,67,4,4,1,1 +5,71,4,3,3,1 +5,65,4,4,3,1 +4,43,1,1,3,0 +4,40,2,1,,0 +4,49,2,1,3,0 +5,76,4,2,3,1 +4,55,4,4,3,0 +5,72,4,5,3,1 +3,53,4,3,3,0 +5,75,4,4,3,1 +5,61,4,5,3,1 +5,67,4,4,3,1 +5,55,4,2,3,1 +5,66,4,4,3,1 +2,76,1,1,2,0 +4,57,4,4,3,1 +5,71,3,1,3,0 +5,70,4,5,3,1 +4,35,4,2,,0 +5,79,1,,3,1 +4,63,2,1,3,0 +5,40,1,4,3,1 +4,41,1,1,3,0 +4,47,2,1,2,0 +4,68,1,1,3,1 +4,64,4,3,3,1 +4,65,4,4,,1 +4,73,4,3,3,0 +4,39,4,3,3,0 +5,55,4,5,4,1 +5,53,3,4,4,0 +5,66,4,4,3,1 +4,43,3,1,2,0 +5,44,4,5,3,1 +4,77,4,4,3,1 +4,62,2,4,3,0 +5,80,4,4,3,1 +4,33,4,4,3,0 +4,50,4,5,3,1 +4,71,1,,3,0 +5,46,4,4,3,1 +5,49,4,5,3,1 +4,53,1,1,3,0 +3,46,2,1,2,0 +4,57,1,1,3,0 +4,54,3,1,3,0 +4,54,1,,,0 +2,49,2,1,2,0 +4,47,3,1,3,0 +4,40,1,1,3,0 +4,45,1,1,3,0 +4,50,4,5,3,1 +5,54,4,4,3,1 +4,67,4,1,3,1 +4,77,4,4,3,1 +4,66,4,3,3,0 +4,71,2,,3,1 +4,36,2,3,3,0 +4,69,4,4,3,0 +4,48,1,1,3,0 +4,64,4,4,3,1 +4,71,4,2,3,1 +5,60,4,3,3,1 +4,24,1,1,3,0 +5,34,4,5,2,1 +4,79,1,1,2,0 +4,45,1,1,3,0 +4,37,2,1,2,0 +4,42,1,1,2,0 +4,72,4,4,3,1 +5,60,4,5,3,1 +5,85,3,5,3,1 +4,51,1,1,3,0 +5,54,4,5,3,1 +5,55,4,3,3,1 +4,64,4,4,3,0 +5,67,4,5,3,1 +5,75,4,3,3,1 +5,87,4,4,3,1 +4,46,4,4,3,1 +4,59,2,1,,0 +55,46,4,3,3,1 +5,61,1,1,3,1 +4,44,1,4,3,0 +4,32,1,1,3,0 +4,62,1,1,3,0 +5,59,4,5,3,1 +4,61,4,1,3,0 +5,78,4,4,3,1 +5,42,4,5,3,0 +4,45,1,2,3,0 +5,34,2,1,3,1 +5,39,4,3,,1 +4,27,3,1,3,0 +4,43,1,1,3,0 +5,83,4,4,3,1 +4,36,2,1,3,0 +4,37,2,1,3,0 +4,56,3,1,3,1 +5,55,4,4,3,1 +5,46,3,,3,0 +4,88,4,4,3,1 +5,71,4,4,3,1 +4,41,2,1,3,0 +5,49,4,4,3,1 +3,51,1,1,4,0 +4,39,1,3,3,0 +4,46,2,1,3,0 +5,52,4,4,3,1 +5,58,4,4,3,1 +4,67,4,5,3,1 +5,80,4,4,3,1 +3,46,1,,,0 +3,43,1,,,0 +4,45,1,1,3,0 +5,68,4,4,3,1 +4,54,4,4,,1 +4,44,2,3,3,0 +5,74,4,3,3,1 +5,55,4,5,3,0 +4,49,4,4,3,1 +4,49,1,1,3,0 +5,50,4,3,3,1 +5,52,3,5,3,1 +4,45,1,1,3,0 +4,66,1,1,3,0 +4,68,4,4,3,1 +4,72,2,1,3,0 +5,64,,,3,0 +2,49,,3,3,0 +3,44,,4,3,0 +5,74,4,4,3,1 +5,58,4,4,3,1 +4,77,2,3,3,0 +4,49,3,1,3,0 +4,34,,,4,0 +5,60,4,3,3,1 +5,69,4,3,3,1 +4,53,2,1,3,0 +3,46,3,4,3,0 +5,74,4,4,3,1 +4,58,1,1,3,0 +5,68,4,4,3,1 +5,46,4,3,3,0 +5,61,2,4,3,1 +5,70,4,3,3,1 +5,37,4,4,3,1 +3,65,4,5,3,1 +4,67,4,4,3,0 +5,69,3,4,3,0 +5,76,4,4,3,1 +4,65,4,3,3,0 +5,72,4,2,3,1 +4,62,4,2,3,0 +5,42,4,4,3,1 +5,66,4,3,3,1 +5,48,4,4,3,1 +4,35,1,1,3,0 +5,60,4,4,3,1 +5,67,4,2,3,1 +5,78,4,4,3,1 +4,66,1,1,3,1 +4,26,1,1,,0 +4,48,1,1,3,0 +4,31,1,1,3,0 +5,43,4,3,3,1 +5,72,2,4,3,0 +5,66,1,1,3,1 +4,56,4,4,3,0 +5,58,4,5,3,1 +5,33,2,4,3,1 +4,37,1,1,3,0 +5,36,4,3,3,1 +4,39,2,3,3,0 +4,39,4,4,3,1 +5,83,4,4,3,1 +4,68,4,5,3,1 +5,63,3,4,3,1 +5,78,4,4,3,1 +4,38,2,3,3,0 +5,46,4,3,3,1 +5,60,4,4,3,1 +5,56,2,3,3,1 +4,33,1,1,3,0 +4,,4,5,3,1 +4,69,1,5,3,1 +5,66,1,4,3,1 +4,72,1,3,3,0 +4,29,1,1,3,0 +5,54,4,5,3,1 +5,80,4,4,3,1 +5,68,4,3,3,1 +4,35,2,1,3,0 +4,57,3,,3,0 +5,,4,4,3,1 +4,50,1,1,3,0 +4,32,4,3,3,0 +0,69,4,5,3,1 +4,71,4,5,3,1 +5,87,4,5,3,1 +3,40,2,,3,0 +4,31,1,1,,0 +4,64,1,1,3,0 +5,55,4,5,3,1 +4,18,1,1,3,0 +3,50,2,1,,0 +4,53,1,1,3,0 +5,84,4,5,3,1 +5,80,4,3,3,1 +4,32,1,1,3,0 +5,77,3,4,3,1 +4,38,1,1,3,0 +5,54,4,5,3,1 +4,63,1,1,3,0 +4,61,1,1,3,0 +4,52,1,1,3,0 +4,36,1,1,3,0 +4,41,,,3,0 +4,59,1,1,3,0 +5,51,4,4,2,1 +4,36,1,1,3,0 +5,40,4,3,3,1 +4,49,1,1,3,0 +4,37,2,3,3,0 +4,46,1,1,3,0 +4,63,1,1,3,0 +4,28,2,1,3,0 +4,47,2,1,3,0 +4,42,2,1,3,1 +5,44,4,5,3,1 +4,49,4,4,3,0 +5,47,4,5,3,1 +5,52,4,5,3,1 +4,53,1,1,3,1 +5,83,3,3,3,1 +4,50,4,4,,1 +5,63,4,4,3,1 +4,82,,5,3,1 +4,54,1,1,3,0 +4,50,4,4,3,0 +5,80,4,5,3,1 +5,45,2,4,3,0 +5,59,4,4,,1 +4,28,2,1,3,0 +4,31,1,1,3,0 +4,41,2,1,3,0 +4,21,3,1,3,0 +5,44,3,4,3,1 +5,49,4,4,3,1 +5,71,4,5,3,1 +5,75,4,5,3,1 +4,38,2,1,3,0 +4,60,1,3,3,0 +5,87,4,5,3,1 +4,70,4,4,3,1 +5,55,4,5,3,1 +3,21,1,1,3,0 +4,50,1,1,3,0 +5,76,4,5,3,1 +4,23,1,1,3,0 +3,68,,,3,0 +4,62,4,,3,1 +5,65,1,,3,1 +5,73,4,5,3,1 +4,38,2,3,3,0 +2,57,1,1,3,0 +5,65,4,5,3,1 +5,67,2,4,3,1 +5,61,2,4,3,1 +5,56,4,4,3,0 +5,71,2,4,3,1 +4,49,2,2,3,0 +4,55,,,3,0 +4,44,2,1,3,0 +0,58,4,4,3,0 +4,27,2,1,3,0 +5,73,4,5,3,1 +4,34,2,1,3,0 +5,63,,4,3,1 +4,50,2,1,3,1 +4,62,2,1,3,0 +3,21,3,1,3,0 +4,49,2,,3,0 +4,36,3,1,3,0 +4,45,2,1,3,1 +5,67,4,5,3,1 +4,21,1,1,3,0 +4,57,2,1,3,0 +5,66,4,5,3,1 +4,71,4,4,3,1 +5,69,3,4,3,1 +6,80,4,5,3,1 +3,27,2,1,3,0 +4,38,2,1,3,0 +4,23,2,1,3,0 +5,70,,5,3,1 +4,46,4,3,3,0 +4,61,2,3,3,0 +5,65,4,5,3,1 +4,60,4,3,3,0 +5,83,4,5,3,1 +5,40,4,4,3,1 +2,59,,4,3,0 +4,53,3,4,3,0 +4,76,4,4,3,0 +5,79,1,4,3,1 +5,38,2,4,3,1 +4,61,3,4,3,0 +4,56,2,1,3,0 +4,44,2,1,3,0 +4,64,3,4,,1 +4,66,3,3,3,0 +4,50,3,3,3,0 +4,46,1,1,3,0 +4,39,1,1,3,0 +4,60,3,,,0 +5,55,4,5,3,1 +4,40,2,1,3,0 +4,26,1,1,3,0 +5,84,3,2,3,1 +4,41,2,2,3,0 +4,63,1,1,3,0 +2,65,,1,2,0 +4,49,1,1,3,0 +4,56,2,2,3,1 +5,65,4,4,3,0 +4,54,1,1,3,0 +4,36,1,1,3,0 +5,49,4,4,3,0 +4,59,4,4,3,1 +5,75,4,4,3,1 +5,59,4,2,3,0 +5,59,4,4,3,1 +4,28,4,4,3,1 +5,53,4,5,3,0 +5,57,4,4,3,0 +5,77,4,3,4,0 +5,85,4,3,3,1 +4,59,4,4,3,0 +5,59,1,5,3,1 +4,65,3,3,3,1 +4,54,2,1,3,0 +5,46,4,5,3,1 +4,63,4,4,3,1 +4,53,1,1,3,1 +4,56,1,1,3,0 +5,66,4,4,3,1 +5,66,4,5,3,1 +4,55,1,1,3,0 +4,44,1,1,3,0 +5,86,3,4,3,1 +5,47,4,5,3,1 +5,59,4,5,3,1 +5,66,4,5,3,0 +5,61,4,3,3,1 +3,46,,5,,1 +4,69,1,1,3,0 +5,93,1,5,3,1 +4,39,1,3,3,0 +5,44,4,5,3,1 +4,45,2,2,3,0 +4,51,3,4,3,0 +4,56,2,4,3,0 +4,66,4,4,3,0 +5,61,4,5,3,1 +4,64,3,3,3,1 +5,57,2,4,3,0 +5,79,4,4,3,1 +4,57,2,1,,0 +4,44,4,1,1,0 +4,31,2,1,3,0 +4,63,4,4,3,0 +4,64,1,1,3,0 +5,47,4,5,3,0 +5,68,4,5,3,1 +4,30,1,1,3,0 +5,43,4,5,3,1 +4,56,1,1,3,0 +4,46,2,1,3,0 +4,67,2,1,3,0 +5,52,4,5,3,1 +4,67,4,4,3,1 +4,47,2,1,3,0 +5,58,4,5,3,1 +4,28,2,1,3,0 +4,43,1,1,3,0 +4,57,2,4,3,0 +5,68,4,5,3,1 +4,64,2,4,3,0 +4,64,2,4,3,0 +5,62,4,4,3,1 +4,38,4,1,3,0 +5,68,4,4,3,1 +4,41,2,1,3,0 +4,35,2,1,3,1 +4,68,2,1,3,0 +5,55,4,4,3,1 +5,67,4,4,3,1 +4,51,4,3,3,0 +2,40,1,1,3,0 +5,73,4,4,3,1 +4,58,,4,3,1 +4,51,,4,3,0 +3,50,,,3,1 +5,59,4,3,3,1 +6,60,3,5,3,1 +4,27,2,1,,0 +5,54,4,3,3,0 +4,56,1,1,3,0 +5,53,4,5,3,1 +4,54,2,4,3,0 +5,79,1,4,3,1 +5,67,4,3,3,1 +5,64,3,3,3,1 +4,70,1,2,3,1 +5,55,4,3,3,1 +5,65,3,3,3,1 +5,45,4,2,3,1 +4,57,4,4,,1 +5,49,1,1,3,1 +4,24,2,1,3,0 +4,52,1,1,3,0 +4,50,2,1,3,0 +4,35,1,1,3,0 +5,,3,3,3,1 +5,64,4,3,3,1 +5,40,4,1,1,1 +5,66,4,4,3,1 +4,64,4,4,3,1 +5,52,4,3,3,1 +5,43,1,4,3,1 +4,56,4,4,3,0 +4,72,3,,3,0 +6,51,4,4,3,1 +4,79,4,4,3,1 +4,22,2,1,3,0 +4,73,2,1,3,0 +4,53,3,4,3,0 +4,59,2,1,3,1 +4,46,4,4,2,0 +5,66,4,4,3,1 +4,50,4,3,3,1 +4,58,1,1,3,1 +4,55,1,1,3,0 +4,62,2,4,3,1 +4,60,1,1,3,0 +5,57,4,3,3,1 +4,57,1,1,3,0 +6,41,2,1,3,0 +4,71,2,1,3,1 +4,32,2,1,3,0 +4,57,2,1,3,0 +4,19,1,1,3,0 +4,62,2,4,3,1 +5,67,4,5,3,1 +4,50,4,5,3,0 +4,65,2,3,2,0 +4,40,2,4,2,0 +6,71,4,4,3,1 +6,68,4,3,3,1 +4,68,1,1,3,0 +4,29,1,1,3,0 +4,53,2,1,3,0 +5,66,4,4,3,1 +4,60,3,,4,0 +5,76,4,4,3,1 +4,58,2,1,2,0 +5,96,3,4,3,1 +5,70,4,4,3,1 +4,34,2,1,3,0 +4,59,2,1,3,0 +4,45,3,1,3,1 +5,65,4,4,3,1 +4,59,1,1,3,0 +4,21,2,1,3,0 +3,43,2,1,3,0 +4,53,1,1,3,0 +4,65,2,1,3,0 +4,64,2,4,3,1 +4,53,4,4,3,0 +4,51,1,1,3,0 +4,59,2,4,3,0 +4,56,2,1,3,0 +4,60,2,1,3,0 +4,22,1,1,3,0 +4,25,2,1,3,0 +6,76,3,,3,0 +5,69,4,4,3,1 +4,58,2,1,3,0 +5,62,4,3,3,1 +4,56,4,4,3,0 +4,64,1,1,3,0 +4,32,2,1,3,0 +5,48,,4,,1 +5,59,4,4,2,1 +4,52,1,1,3,0 +4,63,4,4,3,0 +5,67,4,4,3,1 +5,61,4,4,3,1 +5,59,4,5,3,1 +5,52,4,3,3,1 +4,35,4,4,3,0 +5,77,3,3,3,1 +5,71,4,3,3,1 +5,63,4,3,3,1 +4,38,2,1,2,0 +5,72,4,3,3,1 +4,76,4,3,3,1 +4,53,3,3,3,0 +4,67,4,5,3,0 +5,69,2,4,3,1 +4,54,1,1,3,0 +2,35,2,1,2,0 +5,68,4,3,3,1 +4,68,4,4,3,0 +4,67,2,4,3,1 +3,39,1,1,3,0 +4,44,2,1,3,0 +4,33,1,1,3,0 +4,60,,4,3,0 +4,58,1,1,3,0 +4,31,1,1,3,0 +3,23,1,1,3,0 +5,56,4,5,3,1 +4,69,2,1,3,1 +6,63,1,1,3,0 +4,65,1,1,3,1 +4,44,2,1,2,0 +4,62,3,3,3,1 +4,67,4,4,3,1 +4,56,2,1,3,0 +4,52,3,4,3,0 +4,43,1,1,3,1 +4,41,4,3,2,1 +4,42,3,4,2,0 +3,46,1,1,3,0 +5,55,4,4,3,1 +5,58,4,4,2,1 +5,87,4,4,3,1 +4,66,2,1,3,0 +0,72,4,3,3,1 +5,60,4,3,3,1 +5,83,4,4,2,1 +4,31,2,1,3,0 +4,53,2,1,3,0 +4,64,2,3,3,0 +5,31,4,4,2,1 +5,62,4,4,2,1 +4,56,2,1,3,0 +5,58,4,4,3,1 +4,67,1,4,3,0 +5,75,4,5,3,1 +5,65,3,4,3,1 +5,74,3,2,3,1 +4,59,2,1,3,0 +4,57,4,4,4,1 +4,76,3,2,3,0 +4,63,1,4,3,0 +4,44,1,1,3,0 +4,42,3,1,2,0 +4,35,3,,2,0 +5,65,4,3,3,1 +4,70,2,1,3,0 +4,48,1,1,3,0 +4,74,1,1,1,1 +6,40,,3,4,1 +4,63,1,1,3,0 +5,60,4,4,3,1 +5,86,4,3,3,1 +4,27,1,1,3,0 +4,71,4,5,2,1 +5,85,4,4,3,1 +4,51,3,3,3,0 +6,72,4,3,3,1 +5,52,4,4,3,1 +4,66,2,1,3,0 +5,71,4,5,3,1 +4,42,2,1,3,0 +4,64,4,4,2,1 +4,41,2,2,3,0 +4,50,2,1,3,0 +4,30,1,1,3,0 +4,67,1,1,3,0 +5,62,4,4,3,1 +4,46,2,1,2,0 +4,35,1,1,3,0 +4,53,1,1,2,0 +4,59,2,1,3,0 +4,19,3,1,3,0 +5,86,2,1,3,1 +4,72,2,1,3,0 +4,37,2,1,2,0 +4,46,3,1,3,1 +4,45,1,1,3,0 +4,48,4,5,3,0 +4,58,4,4,3,1 +4,42,1,1,3,0 +4,56,2,4,3,1 +4,47,2,1,3,0 +4,49,4,4,3,1 +5,76,2,5,3,1 +5,62,4,5,3,1 +5,64,4,4,3,1 +5,53,4,3,3,1 +4,70,4,2,2,1 +5,55,4,4,3,1 +4,34,4,4,3,0 +5,76,4,4,3,1 +4,39,1,1,3,0 +2,23,1,1,3,0 +4,19,1,1,3,0 +5,65,4,5,3,1 +4,57,2,1,3,0 +5,41,4,4,3,1 +4,36,4,5,3,1 +4,62,3,3,3,0 +4,69,2,1,3,0 +4,41,3,1,3,0 +3,51,2,4,3,0 +5,50,3,2,3,1 +4,47,4,4,3,0 +4,54,4,5,3,1 +5,52,4,4,3,1 +4,30,1,1,3,0 +3,48,4,4,3,1 +5,,4,4,3,1 +4,65,2,4,3,1 +4,50,1,1,3,0 +5,65,4,5,3,1 +5,66,4,3,3,1 +6,41,3,3,2,1 +5,72,3,2,3,1 +4,42,1,1,1,1 +4,80,4,4,3,1 +0,45,2,4,3,0 +4,41,1,1,3,0 +4,72,3,3,3,1 +4,60,4,5,3,0 +5,67,4,3,3,1 +4,55,2,1,3,0 +4,61,3,4,3,1 +4,55,3,4,3,1 +4,52,4,4,3,1 +4,42,1,1,3,0 +5,63,4,4,3,1 +4,62,4,5,3,1 +4,46,1,1,3,0 +4,65,2,1,3,0 +4,57,3,3,3,1 +4,66,4,5,3,1 +4,45,1,1,3,0 +4,77,4,5,3,1 +4,35,1,1,3,0 +4,50,4,5,3,1 +4,57,4,4,3,0 +4,74,3,1,3,1 +4,59,4,5,3,0 +4,51,1,1,3,0 +4,42,3,4,3,1 +4,35,2,4,3,0 +4,42,1,1,3,0 +4,43,2,1,3,0 +4,62,4,4,3,1 +4,27,2,1,3,0 +5,,4,3,3,1 +4,57,4,4,3,1 +4,59,2,1,3,0 +5,40,3,2,3,1 +4,20,1,1,3,0 +5,74,4,3,3,1 +4,22,1,1,3,0 +4,57,4,3,3,0 +4,57,4,3,3,1 +4,55,2,1,2,0 +4,62,2,1,3,0 +4,54,1,1,3,0 +4,71,1,1,3,1 +4,65,3,3,3,0 +4,68,4,4,3,0 +4,64,1,1,3,0 +4,54,2,4,3,0 +4,48,4,4,3,1 +4,58,4,3,3,0 +5,58,3,4,3,1 +4,70,1,1,1,0 +5,70,1,4,3,1 +4,59,2,1,3,0 +4,57,2,4,3,0 +4,53,4,5,3,0 +4,54,4,4,3,1 +4,53,2,1,3,0 +0,71,4,4,3,1 +5,67,4,5,3,1 +4,68,4,4,3,1 +4,56,2,4,3,0 +4,35,2,1,3,0 +4,52,4,4,3,1 +4,47,2,1,3,0 +4,56,4,5,3,1 +4,64,4,5,3,0 +5,66,4,5,3,1 +4,62,3,3,3,0 From 7bc4d1609eac82af823228f7a010ce302fcc259a Mon Sep 17 00:00:00 2001 From: Mathieu Guillame-Bert Date: Mon, 17 Jun 2024 06:27:52 -0700 Subject: [PATCH 23/30] Anomaly detection; Fast c++ inference of Isolation forest models (part 5) PiperOrigin-RevId: 643985602 --- .../serving/decision_forest/BUILD | 20 ++++- .../decision_forest/decision_forest.cc | 47 +++++++++++ .../serving/decision_forest/decision_forest.h | 8 +- .../decision_forest_serving.cc | 34 ++++++++ .../decision_forest/decision_forest_serving.h | 12 +++ .../decision_forest/decision_forest_test.cc | 21 +++++ .../decision_forest/register_engines.cc | 80 ++++++++++++++++++- .../decision_forest/register_engines.h | 6 +- .../serving/decision_forest/utils.cc | 16 +++- 9 files changed, 237 insertions(+), 7 deletions(-) diff --git a/yggdrasil_decision_forests/serving/decision_forest/BUILD b/yggdrasil_decision_forests/serving/decision_forest/BUILD index 6770a078..0743c5cb 100644 --- a/yggdrasil_decision_forests/serving/decision_forest/BUILD +++ b/yggdrasil_decision_forests/serving/decision_forest/BUILD @@ -62,14 +62,22 @@ cc_library_ydf( ], deps = [ ":decision_forest", + ":decision_forest_serving", ":quick_scorer_extended", "//yggdrasil_decision_forests/dataset:data_spec_cc_proto", "//yggdrasil_decision_forests/model:abstract_model", + "//yggdrasil_decision_forests/model:abstract_model_cc_proto", + "//yggdrasil_decision_forests/model/decision_tree", "//yggdrasil_decision_forests/model/gradient_boosted_trees", "//yggdrasil_decision_forests/model/gradient_boosted_trees:gradient_boosted_trees_cc_proto", + "//yggdrasil_decision_forests/model/isolation_forest", + "//yggdrasil_decision_forests/model/random_forest", "//yggdrasil_decision_forests/serving:example_set_model_wrapper", "//yggdrasil_decision_forests/serving:fast_engine", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/strings", ], alwayslink = 1, ) @@ -91,6 +99,7 @@ cc_library_ydf( "//yggdrasil_decision_forests/model/decision_tree", "//yggdrasil_decision_forests/model/decision_tree:decision_tree_cc_proto", "//yggdrasil_decision_forests/model/gradient_boosted_trees", + "//yggdrasil_decision_forests/model/isolation_forest", "//yggdrasil_decision_forests/model/random_forest", "//yggdrasil_decision_forests/serving:example_set", "//yggdrasil_decision_forests/utils:bitmap", @@ -103,6 +112,7 @@ cc_library_ydf( "@com_google_absl//absl/strings", "@com_google_absl//absl/types:optional", ], + alwayslink = 1, ) cc_library_ydf( @@ -115,10 +125,13 @@ cc_library_ydf( ], deps = [ "//yggdrasil_decision_forests/model:abstract_model_cc_proto", + "//yggdrasil_decision_forests/model/isolation_forest", "//yggdrasil_decision_forests/serving:example_set", + "//yggdrasil_decision_forests/utils:compatibility", "//yggdrasil_decision_forests/utils:logging", "//yggdrasil_decision_forests/utils:usage", ], + alwayslink = 1, ) cc_library_ydf( @@ -157,10 +170,12 @@ cc_library_ydf( "//yggdrasil_decision_forests/dataset:data_spec_cc_proto", "//yggdrasil_decision_forests/model:abstract_model", "//yggdrasil_decision_forests/model/gradient_boosted_trees", + "//yggdrasil_decision_forests/model/isolation_forest", "//yggdrasil_decision_forests/model/random_forest", + "//yggdrasil_decision_forests/serving:example_set", + "//yggdrasil_decision_forests/utils:logging", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/status", - "@com_google_absl//absl/strings:str_format", ], ) @@ -209,6 +224,9 @@ cc_test( "//yggdrasil_decision_forests/model/decision_tree:decision_tree_cc_proto", "//yggdrasil_decision_forests/model/gradient_boosted_trees", "//yggdrasil_decision_forests/model/gradient_boosted_trees:gradient_boosted_trees_cc_proto", + "//yggdrasil_decision_forests/model/isolation_forest", + "//yggdrasil_decision_forests/model/random_forest", + "//yggdrasil_decision_forests/serving:example_set", "//yggdrasil_decision_forests/utils:concurrency", "//yggdrasil_decision_forests/utils:csv", "//yggdrasil_decision_forests/utils:filesystem", diff --git a/yggdrasil_decision_forests/serving/decision_forest/decision_forest.cc b/yggdrasil_decision_forests/serving/decision_forest/decision_forest.cc index 76a318fb..7218b77b 100644 --- a/yggdrasil_decision_forests/serving/decision_forest/decision_forest.cc +++ b/yggdrasil_decision_forests/serving/decision_forest/decision_forest.cc @@ -37,8 +37,10 @@ #include "yggdrasil_decision_forests/model/decision_tree/decision_tree.h" #include "yggdrasil_decision_forests/model/decision_tree/decision_tree.pb.h" #include "yggdrasil_decision_forests/model/gradient_boosted_trees/gradient_boosted_trees.h" +#include "yggdrasil_decision_forests/model/isolation_forest/isolation_forest.h" #include "yggdrasil_decision_forests/model/random_forest/random_forest.h" #include "yggdrasil_decision_forests/serving/decision_forest/decision_forest_serving.h" +#include "yggdrasil_decision_forests/serving/decision_forest/utils.h" #include "yggdrasil_decision_forests/serving/example_set.h" #include "yggdrasil_decision_forests/utils/bitmap.h" #include "yggdrasil_decision_forests/utils/compatibility.h" @@ -54,6 +56,7 @@ using dataset::proto::ColumnType; using model::decision_tree::NodeWithChildren; using model::gradient_boosted_trees::GradientBoostedTreesModel; using model::gradient_boosted_trees::proto::Loss; +using model::isolation_forest::IsolationForestModel; using model::random_forest::RandomForestModel; using ConditionType = model::decision_tree::proto::Condition::TypeCase; typedef absl::flat_hash_map FeatureDefMap; @@ -634,6 +637,30 @@ absl::Status SetLeafNodeRandomForestNumericalUplift( return absl::OkStatus(); } +template +absl::Status SetLeafNodeIsolationForest( + const IsolationForestModel& src_model, const NodeWithChildren& src_node, + SpecializedModel* dst_model, + typename SpecializedModel::NodeType* dst_node) { + using Node = typename SpecializedModel::NodeType; + static_assert(std::is_same>::value || + std::is_same>::value, + "Non supported node type."); + + const float value = + (src_node.depth() + + model::isolation_forest::PreissAveragePathLength( + src_node.node().anomaly_detection().num_examples_without_weight())) / + src_model.NumTrees(); + + *dst_node = Node::Leaf( + /*.right_idx =*/0, + /*.feature_idx =*/0, + /*.label =*/ + value); + return absl::OkStatus(); +} + // Set the leaf of a binary classification Gradient Boosted Trees. template absl::Status SetLeafGradientBoostedTreesClassification( @@ -1051,6 +1078,26 @@ absl::Status GenericToSpecializedModel( SetLeafNodeRandomForestNumericalUplift, src, dst); } +template <> +absl::Status GenericToSpecializedModel(const IsolationForestModel& src, + IsolationForest* dst) { + using DstType = std::remove_pointer::type; + dst->denominator = model::isolation_forest::PreissAveragePathLength( + src.num_examples_per_trees()); + return GenericToSpecializedGenericModelHelper( + SetLeafNodeIsolationForest, src, dst); +} + +template <> +absl::Status GenericToSpecializedModel(const IsolationForestModel& src, + GenericIsolationForest* dst) { + using DstType = std::remove_pointer::type; + dst->denominator = model::isolation_forest::PreissAveragePathLength( + src.num_examples_per_trees()); + return GenericToSpecializedGenericModelHelper( + SetLeafNodeIsolationForest, src, dst); +} + template <> absl::Status GenericToSpecializedModel( const GradientBoostedTreesModel& src, diff --git a/yggdrasil_decision_forests/serving/decision_forest/decision_forest.h b/yggdrasil_decision_forests/serving/decision_forest/decision_forest.h index b03ed309..4167463c 100644 --- a/yggdrasil_decision_forests/serving/decision_forest/decision_forest.h +++ b/yggdrasil_decision_forests/serving/decision_forest/decision_forest.h @@ -47,11 +47,17 @@ #ifndef YGGDRASIL_DECISION_FORESTS_SERVING_DECISION_FOREST_H_ #define YGGDRASIL_DECISION_FORESTS_SERVING_DECISION_FOREST_H_ +#include +#include +#include + #include "absl/status/status.h" +#include "absl/types/optional.h" +#include "yggdrasil_decision_forests/dataset/vertical_dataset.h" #include "yggdrasil_decision_forests/model/gradient_boosted_trees/gradient_boosted_trees.h" #include "yggdrasil_decision_forests/model/random_forest/random_forest.h" #include "yggdrasil_decision_forests/serving/decision_forest/decision_forest_serving.h" -#include "yggdrasil_decision_forests/serving/decision_forest/utils.h" +#include "yggdrasil_decision_forests/serving/example_set.h" namespace yggdrasil_decision_forests { namespace serving { diff --git a/yggdrasil_decision_forests/serving/decision_forest/decision_forest_serving.cc b/yggdrasil_decision_forests/serving/decision_forest/decision_forest_serving.cc index 8b65a1f4..12f2e33c 100644 --- a/yggdrasil_decision_forests/serving/decision_forest/decision_forest_serving.cc +++ b/yggdrasil_decision_forests/serving/decision_forest/decision_forest_serving.cc @@ -15,6 +15,15 @@ #include "yggdrasil_decision_forests/serving/decision_forest/decision_forest_serving.h" +#include +#include +#include +#include +#include + +#include "yggdrasil_decision_forests/model/isolation_forest/isolation_forest.h" +#include "yggdrasil_decision_forests/serving/example_set.h" +#include "yggdrasil_decision_forests/utils/compatibility.h" #include "yggdrasil_decision_forests/utils/logging.h" #include "yggdrasil_decision_forests/utils/usage.h" @@ -61,6 +70,12 @@ void ActivationGradientBoostedTreesMultinomialLogLikelihood( } } +template +float IsolationForestActivation(const Model& model, const float value) { + return model::isolation_forest::IsolationForestPredictionFromDenominator( + value, model.denominator); +} + // Identity transformation for the output of a decision forest model. // Default value for the "FinalTransform" argument in "PredictHelper". // @@ -647,6 +662,25 @@ void Predict( model, examples, num_examples, predictions); } +template <> +void Predict( + const GenericIsolationForest& model, + const typename GenericIsolationForest::ExampleSet& examples, + int num_examples, std::vector* predictions) { + PredictHelper::type, + IsolationForestActivation>(model, examples, num_examples, + predictions); +} + +template <> +void Predict(const IsolationForest& model, + const typename IsolationForest::ExampleSet& examples, + int num_examples, std::vector* predictions) { + PredictHelper::type, + IsolationForestActivation>(model, examples, num_examples, + predictions); +} + template <> void Predict( const GradientBoostedTreesBinaryClassification& model, diff --git a/yggdrasil_decision_forests/serving/decision_forest/decision_forest_serving.h b/yggdrasil_decision_forests/serving/decision_forest/decision_forest_serving.h index b73aceaa..2b197c76 100644 --- a/yggdrasil_decision_forests/serving/decision_forest/decision_forest_serving.h +++ b/yggdrasil_decision_forests/serving/decision_forest/decision_forest_serving.h @@ -16,6 +16,7 @@ #ifndef YGGDRASIL_DECISION_FORESTS_SERVING_DECISION_FOREST_DECISION_FOREST_SERVING_H_ #define YGGDRASIL_DECISION_FORESTS_SERVING_DECISION_FOREST_DECISION_FOREST_SERVING_H_ +#include #include #include "yggdrasil_decision_forests/model/abstract_model.pb.h" @@ -386,6 +387,17 @@ struct GenericRandomForestNumericalUplift : ExampleSetModel { }; using RandomForestNumericalUplift = GenericRandomForestNumericalUplift<>; +// Isolation Forest model. +template +struct GenericIsolationForest : ExampleSetModel { + static constexpr model::proto::Task kTask = + model::proto::Task::ANOMALY_DETECTION; + + // Denominator / normalizer of the prediction output. + float denominator; +}; +using IsolationForest = GenericIsolationForest<>; + // GBDT model for binary classification. template struct GenericGradientBoostedTreesBinaryClassification diff --git a/yggdrasil_decision_forests/serving/decision_forest/decision_forest_test.cc b/yggdrasil_decision_forests/serving/decision_forest/decision_forest_test.cc index 02de472f..db3fb9b8 100644 --- a/yggdrasil_decision_forests/serving/decision_forest/decision_forest_test.cc +++ b/yggdrasil_decision_forests/serving/decision_forest/decision_forest_test.cc @@ -15,8 +15,11 @@ #include "yggdrasil_decision_forests/serving/decision_forest/decision_forest.h" +#include +#include #include #include +#include #include #include @@ -39,11 +42,15 @@ #include "yggdrasil_decision_forests/model/fast_engine_factory.h" #include "yggdrasil_decision_forests/model/gradient_boosted_trees/gradient_boosted_trees.h" #include "yggdrasil_decision_forests/model/gradient_boosted_trees/gradient_boosted_trees.pb.h" +#include "yggdrasil_decision_forests/model/isolation_forest/isolation_forest.h" #include "yggdrasil_decision_forests/model/model_library.h" +#include "yggdrasil_decision_forests/model/random_forest/random_forest.h" #include "yggdrasil_decision_forests/serving/decision_forest/decision_forest_serving.h" #include "yggdrasil_decision_forests/serving/decision_forest/quick_scorer_extended.h" #include "yggdrasil_decision_forests/serving/decision_forest/register_engines.h" +#include "yggdrasil_decision_forests/serving/example_set.h" #include "yggdrasil_decision_forests/utils/concurrency.h" // IWYU pragma: keep +#include "yggdrasil_decision_forests/utils/concurrency_streamprocessor.h" #include "yggdrasil_decision_forests/utils/csv.h" #include "yggdrasil_decision_forests/utils/filesystem.h" #include "yggdrasil_decision_forests/utils/logging.h" @@ -57,6 +64,7 @@ namespace decision_forest { namespace { using model::gradient_boosted_trees::GradientBoostedTreesModel; +using model::isolation_forest::IsolationForestModel; using model::random_forest::RandomForestModel; using testing::ElementsAre; @@ -488,6 +496,19 @@ TEST(SimPTECategoricalupliftRF, ManualGeneric) { dataset, *model, engine); } +TEST(GaussiansIF, ManualGeneric) { + const auto model = LoadModel("gaussians_anomaly_if"); + const auto dataset = + LoadDataset(model->data_spec(), "gaussians_test.csv", "csv"); + + auto* if_model = dynamic_cast(model.get()); + IsolationForest engine; + CHECK_OK(GenericToSpecializedModel(*if_model, &engine)); + + utils::ExpectEqualPredictionsTemplate( + dataset, *model, engine); +} + void BuildFullTree(const int d, model::decision_tree::NodeWithChildren* node) { if (d <= 0) { node->mutable_node()->mutable_classifier()->set_top_value(1.f); diff --git a/yggdrasil_decision_forests/serving/decision_forest/register_engines.cc b/yggdrasil_decision_forests/serving/decision_forest/register_engines.cc index aab9da8e..29f71339 100644 --- a/yggdrasil_decision_forests/serving/decision_forest/register_engines.cc +++ b/yggdrasil_decision_forests/serving/decision_forest/register_engines.cc @@ -17,15 +17,33 @@ // #include "yggdrasil_decision_forests/serving/decision_forest/register_engines.h" +#include +#include +#include +#include +#include +#include +#include + +#include "absl/memory/memory.h" +#include "absl/status/status.h" #include "absl/status/statusor.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" #include "yggdrasil_decision_forests/dataset/data_spec.pb.h" #include "yggdrasil_decision_forests/model/abstract_model.h" +#include "yggdrasil_decision_forests/model/abstract_model.pb.h" +#include "yggdrasil_decision_forests/model/decision_tree/decision_tree.h" #include "yggdrasil_decision_forests/model/fast_engine_factory.h" #include "yggdrasil_decision_forests/model/gradient_boosted_trees/gradient_boosted_trees.h" #include "yggdrasil_decision_forests/model/gradient_boosted_trees/gradient_boosted_trees.pb.h" -#include "yggdrasil_decision_forests/serving/decision_forest/decision_forest.h" +#include "yggdrasil_decision_forests/model/isolation_forest/isolation_forest.h" +#include "yggdrasil_decision_forests/model/random_forest/random_forest.h" +#include "yggdrasil_decision_forests/serving/decision_forest/decision_forest.h" // IWYU pragma: keep +#include "yggdrasil_decision_forests/serving/decision_forest/decision_forest_serving.h" #include "yggdrasil_decision_forests/serving/decision_forest/quick_scorer_extended.h" #include "yggdrasil_decision_forests/serving/example_set_model_wrapper.h" +#include "yggdrasil_decision_forests/serving/fast_engine.h" namespace yggdrasil_decision_forests { namespace model { @@ -609,6 +627,66 @@ class RandomForestGenericFastEngineFactory : public model::FastEngineFactory { REGISTER_FastEngineFactory(RandomForestGenericFastEngineFactory, serving::random_forest::kGeneric); +class IsolationForestGenericFastEngineFactory + : public model::FastEngineFactory { + public: + using SourceModel = isolation_forest::IsolationForestModel; + + std::string name() const override { + return serving::isolation_forest::kGeneric; + } + + bool IsCompatible(const AbstractModel* const model) const override { + auto* if_model = dynamic_cast(model); + // This implementation is the most generic and least efficient engine. + if (if_model == nullptr) { + return false; + } + return if_model->CheckStructure({/*.global_imputation_is_higher =*/false}); + } + + std::vector IsBetterThan() const override { return {}; } + + absl::StatusOr> CreateEngine( + const AbstractModel* const model) const override { + auto* if_model = dynamic_cast(model); + if (!if_model) { + return absl::InvalidArgumentError("The model is not an IF."); + } + + if (!if_model->CheckStructure({/*.global_imputation_is_higher =*/false})) { + return NoGlobalImputationError("IsolationForestGenericFastEngineFactory"); + } + + const bool need_uint32_node_index = + MaxNumberOfNodesPerTree(if_model->decision_trees()) >= + std::numeric_limits::max(); + + switch (if_model->task()) { + case model::proto::ANOMALY_DETECTION: { + if (need_uint32_node_index) { + auto engine = absl::make_unique, + serving::decision_forest::Predict>>(); + RETURN_IF_ERROR(engine->LoadModel(*if_model)); + return engine; + } else { + auto engine = absl::make_unique, + serving::decision_forest::Predict>>(); + RETURN_IF_ERROR(engine->LoadModel(*if_model)); + return engine; + } + } + default: + return absl::InvalidArgumentError("Non supported RF model"); + } + } +}; + +REGISTER_FastEngineFactory(IsolationForestGenericFastEngineFactory, + serving::isolation_forest::kGeneric); + class RandomForestOptPredFastEngineFactory : public model::FastEngineFactory { public: using SourceModel = random_forest::RandomForestModel; diff --git a/yggdrasil_decision_forests/serving/decision_forest/register_engines.h b/yggdrasil_decision_forests/serving/decision_forest/register_engines.h index 50edccc9..b7dc89d5 100644 --- a/yggdrasil_decision_forests/serving/decision_forest/register_engines.h +++ b/yggdrasil_decision_forests/serving/decision_forest/register_engines.h @@ -18,8 +18,6 @@ #ifndef YGGDRASIL_DECISION_FORESTS_SERVING_REGISTER_ENGINE_DECISION_FOREST_H_ #define YGGDRASIL_DECISION_FORESTS_SERVING_REGISTER_ENGINE_DECISION_FOREST_H_ -#include "yggdrasil_decision_forests/model/abstract_model.h" -#include "yggdrasil_decision_forests/serving/fast_engine.h" namespace yggdrasil_decision_forests { namespace serving { @@ -36,6 +34,10 @@ constexpr char kGeneric[] = "RandomForestGeneric"; constexpr char kOptPred[] = "RandomForestOptPred"; } // namespace random_forest +namespace isolation_forest { +constexpr char kGeneric[] = "IsolationForestGeneric"; +} // namespace isolation_forest + } // namespace serving } // namespace yggdrasil_decision_forests #endif // YGGDRASIL_DECISION_FORESTS_SERVING_REGISTER_ENGINE_DECISION_FOREST_H_ diff --git a/yggdrasil_decision_forests/serving/decision_forest/utils.cc b/yggdrasil_decision_forests/serving/decision_forest/utils.cc index a459f54b..710989ec 100644 --- a/yggdrasil_decision_forests/serving/decision_forest/utils.cc +++ b/yggdrasil_decision_forests/serving/decision_forest/utils.cc @@ -15,17 +15,24 @@ #include "yggdrasil_decision_forests/serving/decision_forest/utils.h" +#include +#include +#include +#include + #include "absl/status/status.h" -#include "absl/strings/str_format.h" #include "yggdrasil_decision_forests/dataset/data_spec.pb.h" +#include "yggdrasil_decision_forests/model/abstract_model.h" #include "yggdrasil_decision_forests/model/gradient_boosted_trees/gradient_boosted_trees.h" +#include "yggdrasil_decision_forests/model/isolation_forest/isolation_forest.h" #include "yggdrasil_decision_forests/model/random_forest/random_forest.h" +#include "yggdrasil_decision_forests/serving/example_set.h" +#include "yggdrasil_decision_forests/utils/logging.h" namespace yggdrasil_decision_forests { namespace serving { namespace decision_forest { - // Get the list of input features used by the model. // // The order of the input feature is deterministic. @@ -46,10 +53,15 @@ absl::Status GetInputFeatures( const auto* gbt_model = dynamic_cast< const model::gradient_boosted_trees::GradientBoostedTreesModel*>( &src_model); + const auto* if_model = + dynamic_cast( + &src_model); if (rf_model) { rf_model->CountFeatureUsage(&feature_usage); } else if (gbt_model) { gbt_model->CountFeatureUsage(&feature_usage); + } else if (if_model) { + if_model->CountFeatureUsage(&feature_usage); } else { return absl::InvalidArgumentError("Unsupported decision forest model type"); } From 86f7edf8e7224d08c76794340da342c3cdae6a33 Mon Sep 17 00:00:00 2001 From: Mathieu Guillame-Bert Date: Mon, 17 Jun 2024 09:46:57 -0700 Subject: [PATCH 24/30] Anomaly detection; Enable model and prediction analysis of isolation forest models (part 6) PiperOrigin-RevId: 644041112 --- .../golden/analyze_model_if.html.expected | 676 ++++++++++++++++++ .../analyze_prediction_if.html.expected | 231 ++++++ .../model/gaussians_anomaly_if/header.pb | Bin 67 -> 61 bytes yggdrasil_decision_forests/utils/BUILD | 1 + .../utils/model_analysis.cc | 31 +- .../utils/model_analysis.proto | 3 + .../utils/model_analysis_test.cc | 59 +- .../utils/partial_dependence_plot.cc | 14 + .../utils/partial_dependence_plot.proto | 6 +- 9 files changed, 1012 insertions(+), 9 deletions(-) create mode 100644 yggdrasil_decision_forests/test_data/golden/analyze_model_if.html.expected create mode 100644 yggdrasil_decision_forests/test_data/golden/analyze_prediction_if.html.expected diff --git a/yggdrasil_decision_forests/test_data/golden/analyze_model_if.html.expected b/yggdrasil_decision_forests/test_data/golden/analyze_model_if.html.expected new file mode 100644 index 00000000..eba95d49 --- /dev/null +++ b/yggdrasil_decision_forests/test_data/golden/analyze_model_if.html.expected @@ -0,0 +1,676 @@ + + + + +

Analyse dataset: MODEL_PATH

Model: DATASET_PATH

Number of records: 2
+Number of columns: 2
+
+Number of columns by type:
+	NUMERICAL: 2 (100%)
+
+Columns:
+
+NUMERICAL: 2 (100%)
+	0: "features.0_of_2" NUMERICAL mean:0 min:0 max:0 sd:0 dtype:DTYPE_FLOAT64
+	1: "features.1_of_2" NUMERICAL mean:0 min:0 max:0 sd:0 dtype:DTYPE_FLOAT64
+
+Terminology:
+	nas: Number of non-available (i.e. missing) values.
+	ood: Out of dictionary.
+	manually-defined: Attribute whose type is manually defined by the user, i.e., the type was not automatically inferred.
+	tokenized: The attribute value is obtained through tokenization.
+	has-dict: The attribute is attached to a string dictionary e.g. a categorical attribute stored as a string.
+	vocab-size: Number of unique values.
+
+
+ +
+
+ +
+
+ +
+
+ +
+
+ +
+
+ +
+
+ +
+
+ +
Type: "ISOLATION_FOREST"
+Task: ANOMALY_DETECTION
+
+Input Features (2):
+	features.0_of_2
+	features.1_of_2
+
+No weights
+
+Variable Importance disabled i.e. compute_oob_variable_importances=false.
+Cannot compute model self evaluation:This model does not support evaluation reports.
+
+Number of trees: 100
+Total number of nodes: 6488
+
+Number of nodes by tree:
+Count: 100 Average: 64.88 StdDev: 15.1758
+Min: 33 Max: 101 Ignored: 0
+----------------------------------------------
+[  33,  36)  1   1.00%   1.00% #
+[  36,  39)  1   1.00%   2.00% #
+[  39,  43)  4   4.00%   6.00% ###
+[  43,  46)  6   6.00%  12.00% ####
+[  46,  50)  4   4.00%  16.00% ###
+[  50,  53)  2   2.00%  18.00% #
+[  53,  57) 11  11.00%  29.00% ########
+[  57,  60)  9   9.00%  38.00% ######
+[  60,  64) 14  14.00%  52.00% ##########
+[  64,  67)  8   8.00%  60.00% ######
+[  67,  70)  5   5.00%  65.00% ####
+[  70,  74) 11  11.00%  76.00% ########
+[  74,  77)  4   4.00%  80.00% ###
+[  77,  81)  3   3.00%  83.00% ##
+[  81,  84)  4   4.00%  87.00% ###
+[  84,  88)  4   4.00%  91.00% ###
+[  88,  91)  2   2.00%  93.00% #
+[  91,  95)  3   3.00%  96.00% ##
+[  95,  98)  0   0.00%  96.00%
+[  98, 101]  4   4.00% 100.00% ###
+
+Depth by leafs:
+Count: 3294 Average: 5.92077 StdDev: 1.37529
+Min: 1 Max: 7 Ignored: 0
+----------------------------------------------
+[ 1, 2)   13   0.39%   0.39%
+[ 2, 3)   56   1.70%   2.09%
+[ 3, 4)  189   5.74%   7.83% #
+[ 4, 5)  308   9.35%  17.18% ##
+[ 5, 6)  443  13.45%  30.63% ###
+[ 6, 7)  631  19.16%  49.79% ####
+[ 7, 7] 1654  50.21% 100.00% ##########
+
+Number of training obs by leaf:
+Count: 3294 Average: 0 StdDev: 0
+Min: 0 Max: 0 Ignored: 0
+----------------------------------------------
+[ 0, 0] 3294 100.00% 100.00% ##########
+
+Attribute in nodes:
+	1617 : features.0_of_2 [NUMERICAL]
+	1577 : features.1_of_2 [NUMERICAL]
+
+Attribute in nodes with depth <= 0:
+	61 : features.0_of_2 [NUMERICAL]
+	39 : features.1_of_2 [NUMERICAL]
+
+Attribute in nodes with depth <= 1:
+	148 : features.0_of_2 [NUMERICAL]
+	139 : features.1_of_2 [NUMERICAL]
+
+Attribute in nodes with depth <= 2:
+	311 : features.0_of_2 [NUMERICAL]
+	294 : features.1_of_2 [NUMERICAL]
+
+Attribute in nodes with depth <= 3:
+	534 : features.0_of_2 [NUMERICAL]
+	518 : features.1_of_2 [NUMERICAL]
+
+Attribute in nodes with depth <= 5:
+	1206 : features.0_of_2 [NUMERICAL]
+	1161 : features.1_of_2 [NUMERICAL]
+
+Condition type in nodes:
+	3194 : HigherCondition
+Condition type in nodes with depth <= 0:
+	100 : HigherCondition
+Condition type in nodes with depth <= 1:
+	287 : HigherCondition
+Condition type in nodes with depth <= 2:
+	605 : HigherCondition
+Condition type in nodes with depth <= 3:
+	1052 : HigherCondition
+Condition type in nodes with depth <= 5:
+	2367 : HigherCondition
+Node format: BLOB_SEQUENCE
+Number of examples per tree: 100
+
\ No newline at end of file diff --git a/yggdrasil_decision_forests/test_data/golden/analyze_prediction_if.html.expected b/yggdrasil_decision_forests/test_data/golden/analyze_prediction_if.html.expected new file mode 100644 index 00000000..367dbae2 --- /dev/null +++ b/yggdrasil_decision_forests/test_data/golden/analyze_prediction_if.html.expected @@ -0,0 +1,231 @@ + + + + +
+
+ + +
+ +
\ No newline at end of file diff --git a/yggdrasil_decision_forests/test_data/model/gaussians_anomaly_if/header.pb b/yggdrasil_decision_forests/test_data/model/gaussians_anomaly_if/header.pb index f33587675c6e6ba1db768ba8071888abce2eeceb..7904759d1ef5bf2117fd4044a85730adfe3a1011 100644 GIT binary patch delta 14 VcmZ>^ogmMw!JsiwPLe5s0RR@N0<8c5 delta 41 jcmcC@o*=Jc@E-;k9T+`e?Ej2G99#?n3=#}V3<(SX1}qf| diff --git a/yggdrasil_decision_forests/utils/BUILD b/yggdrasil_decision_forests/utils/BUILD index 5f42c7b5..68dcb435 100644 --- a/yggdrasil_decision_forests/utils/BUILD +++ b/yggdrasil_decision_forests/utils/BUILD @@ -1219,6 +1219,7 @@ cc_test( "//yggdrasil_decision_forests/model/decision_tree", "//yggdrasil_decision_forests/model/decision_tree:builder", "//yggdrasil_decision_forests/model/gradient_boosted_trees", + "//yggdrasil_decision_forests/model/isolation_forest", "//yggdrasil_decision_forests/model/random_forest", "//yggdrasil_decision_forests/utils:testing_macros", "@com_google_absl//absl/memory", diff --git a/yggdrasil_decision_forests/utils/model_analysis.cc b/yggdrasil_decision_forests/utils/model_analysis.cc index b111e3f1..1f347b80 100644 --- a/yggdrasil_decision_forests/utils/model_analysis.cc +++ b/yggdrasil_decision_forests/utils/model_analysis.cc @@ -188,6 +188,19 @@ absl::Status Set1DCurveData( } break; + case model::proto::Task::ANOMALY_DETECTION: + STATUS_CHECK_EQ(label_value_idx, -1); + switch (target_type) { + case CurveTargetType::kPrediction: + target_dst->push_back( + bin.prediction().sum_of_anomaly_detection_predictions() / + pdp.num_observations()); + break; + default: + return absl::InvalidArgumentError("Not implemented."); + } + break; + default: return absl::InvalidArgumentError("Not implemented."); } @@ -207,7 +220,6 @@ absl::Status PlotPartialDependencePlot1DNumerical( pdp.pdp_bins_size()); const auto& attr_spec = data_spec.columns(attribute_idx); - const auto& label_spec = data_spec.columns(label_col_idx); STATUS_CHECK_EQ(attr_spec.type(), dataset::proto::ColumnType::NUMERICAL); @@ -226,6 +238,7 @@ absl::Status PlotPartialDependencePlot1DNumerical( // PDP switch (task) { case model::proto::Task::CLASSIFICATION: { + const auto& label_spec = data_spec.columns(label_col_idx); for (int label_value_idx = FirstCategoricalLabelValueForPdpPlot(label_spec); label_value_idx < label_spec.categorical().number_of_unique_values(); @@ -308,6 +321,15 @@ absl::Status PlotPartialDependencePlot1DNumerical( -1, dataset::proto::ColumnType::NUMERICAL, prediction_curve)); } break; + case model::proto::Task::ANOMALY_DETECTION: { + auto* prediction_curve = AddCurve(pdp_plot); + prediction_curve->style = plot::LineStyle::SOLID; + RETURN_IF_ERROR(Set1DCurveData(pdp, CurveTargetType::kPrediction, false, + model::proto::Task::ANOMALY_DETECTION, -1, + dataset::proto::ColumnType::NUMERICAL, + prediction_curve)); + } break; + default: return absl::InvalidArgumentError("Not implemented"); } @@ -735,7 +757,8 @@ absl::StatusOr Analyse( options.cep().example_sampling())); } - if (options.permuted_variable_importance().enabled()) { + if (options.permuted_variable_importance().enabled() && + model.label_col_idx() != -1) { RETURN_IF_ERROR(ComputePermutationFeatureImportance( dataset, &model, analysis.mutable_variable_importances(), {options.num_threads(), @@ -1041,10 +1064,10 @@ absl::StatusOr> ListOutputs( const proto::PredictionAnalysisResult& analysis, const proto::PredictionAnalysisOptions& options) { std::vector outputs; - const auto& label_column = - analysis.data_spec().columns(analysis.label_col_idx()); switch (analysis.task()) { case model::proto::Task::CLASSIFICATION: { + const auto& label_column = + analysis.data_spec().columns(analysis.label_col_idx()); const int first_class_idx = (label_column.categorical().number_of_unique_values() == 3) ? 2 : 1; for (int class_idx = first_class_idx; diff --git a/yggdrasil_decision_forests/utils/model_analysis.proto b/yggdrasil_decision_forests/utils/model_analysis.proto index e5ea294d..422e63ad 100644 --- a/yggdrasil_decision_forests/utils/model_analysis.proto +++ b/yggdrasil_decision_forests/utils/model_analysis.proto @@ -48,6 +48,9 @@ message Options { optional PermutedVariableImportance permuted_variable_importance = 7; message PermutedVariableImportance { + // If the model does not have labels (e.g., anomaly detection without + // labels), permutation variable importances are not computed, even if + // enabled=True. optional bool enabled = 1 [default = true]; // Number of repetitions of the estimation. More repetitions increase the diff --git a/yggdrasil_decision_forests/utils/model_analysis_test.cc b/yggdrasil_decision_forests/utils/model_analysis_test.cc index e124f299..db9e6f6c 100644 --- a/yggdrasil_decision_forests/utils/model_analysis_test.cc +++ b/yggdrasil_decision_forests/utils/model_analysis_test.cc @@ -56,7 +56,7 @@ std::string ModelDir() { "yggdrasil_decision_forests/test_data/model"); } -TEST(ModelAnalysis, Basic) { +TEST(ModelAnalysis, Classification) { const std::string dataset_path = absl::StrCat("csv:", file::JoinPath(DatasetDir(), "adult_test.csv")); const std::string model_path = @@ -70,7 +70,7 @@ TEST(ModelAnalysis, Basic) { dataset::VerticalDataset dataset; CHECK_OK(dataset::LoadVerticalDataset( dataset_path, model->data_spec(), &dataset, - /*ensure_non_missing=*/model->input_features())); + /*required_columns=*/model->input_features())); proto::Options options; options.mutable_pdp()->set_example_sampling(0.01f); @@ -87,6 +87,35 @@ TEST(ModelAnalysis, Basic) { "DATASET_PATH", analysis, options)); } +TEST(ModelAnalysis, AnomalyDetection) { + const std::string dataset_path = + absl::StrCat("csv:", file::JoinPath(DatasetDir(), "gaussians_test.csv")); + const std::string model_path = + file::JoinPath(ModelDir(), "gaussians_anomaly_if"); + + ASSERT_OK_AND_ASSIGN(const auto model, model::LoadModel(model_path)); + + dataset::VerticalDataset dataset; + ASSERT_OK(dataset::LoadVerticalDataset( + dataset_path, model->data_spec(), &dataset, + /*required_columns=*/model->input_features())); + + proto::Options options; + options.mutable_pdp()->set_example_sampling(0.01f); + options.mutable_cep()->set_example_sampling(0.1f); + options.set_num_threads(1); + options.set_html_id_prefix("my_report"); + options.mutable_report_header()->set_enabled(false); + const auto report_path = file::JoinPath(test::TmpDirectory(), "analysis"); + + ASSERT_OK_AND_ASSIGN(const auto analysis, + Analyse(*model.get(), dataset, options)); + + ASSERT_OK_AND_ASSIGN(const auto report, + CreateHtmlReport(*model.get(), dataset, "MODEL_PATH", + "DATASET_PATH", analysis, options)); +} + TEST(ModelAnalysis, FailsWithEmptyDataset) { const std::string dataset_path = absl::StrCat("csv:", file::JoinPath(DatasetDir(), "adult_test.csv")); @@ -202,7 +231,7 @@ TEST(ModelAnalysis, PDPPlot) { // TODO: Add a more extensive unit test with a golden report. } -TEST(PredictionAnalysis, Basic) { +TEST(PredictionAnalysis, Classification) { const std::string dataset_path = absl::StrCat("csv:", file::JoinPath(DatasetDir(), "adult_test.csv")); const std::string model_path = @@ -216,7 +245,29 @@ TEST(PredictionAnalysis, Basic) { dataset::VerticalDataset dataset; ASSERT_OK(dataset::LoadVerticalDataset( dataset_path, model->data_spec(), &dataset, - /*ensure_non_missing=*/model->input_features())); + /*required_columns=*/model->input_features())); + + proto::PredictionAnalysisOptions options; + options.set_html_id_prefix("my_prefix"); + dataset::proto::Example example; + dataset.ExtractExample(0, &example); + ASSERT_OK_AND_ASSIGN(const auto analysis, + AnalyzePrediction(*model, example, options)); + ASSERT_OK_AND_ASSIGN(const auto report, CreateHtmlReport(analysis, options)); +} + +TEST(PredictionAnalysis, AnomalyDetection) { + const std::string dataset_path = + absl::StrCat("csv:", file::JoinPath(DatasetDir(), "gaussians_test.csv")); + const std::string model_path = + file::JoinPath(ModelDir(), "gaussians_anomaly_if"); + + ASSERT_OK_AND_ASSIGN(const auto model, model::LoadModel(model_path)); + + dataset::VerticalDataset dataset; + ASSERT_OK(dataset::LoadVerticalDataset( + dataset_path, model->data_spec(), &dataset, + /*required_columns=*/model->input_features())); proto::PredictionAnalysisOptions options; options.set_html_id_prefix("my_prefix"); diff --git a/yggdrasil_decision_forests/utils/partial_dependence_plot.cc b/yggdrasil_decision_forests/utils/partial_dependence_plot.cc index 8bf3c922..7cbf972c 100644 --- a/yggdrasil_decision_forests/utils/partial_dependence_plot.cc +++ b/yggdrasil_decision_forests/utils/partial_dependence_plot.cc @@ -216,6 +216,15 @@ absl::Status UpdateBin( // truth does not have the same scale/range as the predictions. } break; + case model::proto::Task::ANOMALY_DETECTION: { + STATUS_CHECK( + bin->prediction().has_sum_of_anomaly_detection_predictions()); + // Prediction. + bin->mutable_prediction()->set_sum_of_anomaly_detection_predictions( + bin->prediction().sum_of_anomaly_detection_predictions() + + prediction.anomaly_detection().value() * prediction.weight()); + } break; + default: return absl::InvalidArgumentError("Invalid model task"); } @@ -322,6 +331,11 @@ absl::Status InitializePartialDependence( bin->mutable_prediction()->set_sum_of_ranking_predictions(0.0); break; + case model::proto::Task::ANOMALY_DETECTION: + bin->mutable_prediction()->set_sum_of_anomaly_detection_predictions( + 0.0); + break; + default: return absl::InvalidArgumentError("Invalid task"); } diff --git a/yggdrasil_decision_forests/utils/partial_dependence_plot.proto b/yggdrasil_decision_forests/utils/partial_dependence_plot.proto index 752f3657..a85e8575 100644 --- a/yggdrasil_decision_forests/utils/partial_dependence_plot.proto +++ b/yggdrasil_decision_forests/utils/partial_dependence_plot.proto @@ -38,9 +38,13 @@ message PartialDependencePlotSet { // num_observations to obtain the mean prediction. double sum_of_regression_predictions = 2 [default = 0]; - // sum_of_regression_predictions should be normalized with + // sum_of_ranking_predictions should be normalized with // num_observations to obtain the mean prediction. double sum_of_ranking_predictions = 3 [default = 0]; + + // sum_of_anomaly_detection_predictions should be normalized with + // num_observations to obtain the mean prediction. + double sum_of_anomaly_detection_predictions = 4 [default = 0]; } } // Represent the accumulation of evaluation metrics. From fe99922fad740f3eac9a5e41aa63219e7ddc6905 Mon Sep 17 00:00:00 2001 From: Mathieu Guillame-Bert Date: Mon, 17 Jun 2024 09:50:32 -0700 Subject: [PATCH 25/30] Anomaly detection; Isolation forest learner and model is available in python (part 7) PiperOrigin-RevId: 644042237 --- .../port/python/ydf/cc/ydf.pyi | 10 +- .../port/python/ydf/learner/BUILD | 6 +- .../python/ydf/learner/generic_learner.py | 2 +- .../port/python/ydf/learner/learner_test.py | 38 ++++++++ .../port/python/ydf/learner/wrapper/BUILD | 3 + .../ydf/learner/wrapper/wrapper_generator.bzl | 5 +- .../ydf/learner/wrapper/wrapper_generator.cc | 91 ++++++++++++++----- .../port/python/ydf/model/BUILD | 7 ++ .../ydf/model/isolation_forest_model/BUILD | 53 +++++++++++ .../model/isolation_forest_model/__init__.py | 14 +++ .../isolation_forest_model.py | 24 +++++ .../isolation_forest_model_test.py | 76 ++++++++++++++++ .../isolation_forest_wrapper.cc | 43 +++++++++ .../isolation_forest_wrapper.h | 57 ++++++++++++ .../port/python/ydf/model/model.cc | 21 +++++ .../port/python/ydf/model/model_lib.py | 3 + .../port/python/ydf/model/tree/__init__.py | 1 + .../port/python/ydf/model/tree/value.py | 40 +++++++- 18 files changed, 462 insertions(+), 32 deletions(-) create mode 100644 yggdrasil_decision_forests/port/python/ydf/model/isolation_forest_model/BUILD create mode 100644 yggdrasil_decision_forests/port/python/ydf/model/isolation_forest_model/__init__.py create mode 100644 yggdrasil_decision_forests/port/python/ydf/model/isolation_forest_model/isolation_forest_model.py create mode 100644 yggdrasil_decision_forests/port/python/ydf/model/isolation_forest_model/isolation_forest_model_test.py create mode 100644 yggdrasil_decision_forests/port/python/ydf/model/isolation_forest_model/isolation_forest_wrapper.cc create mode 100644 yggdrasil_decision_forests/port/python/ydf/model/isolation_forest_model/isolation_forest_wrapper.h diff --git a/yggdrasil_decision_forests/port/python/ydf/cc/ydf.pyi b/yggdrasil_decision_forests/port/python/ydf/cc/ydf.pyi index b2a2658c..40fe3061 100644 --- a/yggdrasil_decision_forests/port/python/ydf/cc/ydf.pyi +++ b/yggdrasil_decision_forests/port/python/ydf/cc/ydf.pyi @@ -10,6 +10,7 @@ from google3.third_party.yggdrasil_decision_forests.model import abstract_model_ from google3.third_party.yggdrasil_decision_forests.model import hyperparameter_pb2 from google3.third_party.yggdrasil_decision_forests.model.decision_tree import decision_tree_pb2 from google3.third_party.yggdrasil_decision_forests.model.gradient_boosted_trees import gradient_boosted_trees_pb2 +from google3.third_party.yggdrasil_decision_forests.model.isolation_forest import isolation_forest_pb2 from google3.third_party.yggdrasil_decision_forests.model.random_forest import random_forest_pb2 from google3.third_party.yggdrasil_decision_forests.utils import fold_generator_pb2 from google3.third_party.yggdrasil_decision_forests.utils import fold_generator_pb2 @@ -84,8 +85,9 @@ class VerticalDataset: required_columns: Optional[Sequence[str]] = None, ) -> None: ... def SetMultiDimDataspec( - self, unrolling: Dict[str, List[str]], - ) -> None: ... + self, + unrolling: Dict[str, List[str]], + ) -> None: ... # Model bindings @@ -194,6 +196,10 @@ class RandomForestCCModel(DecisionForestCCModel): ) -> List[random_forest_pb2.OutOfBagTrainingEvaluations]: ... def winner_takes_all(self) -> bool: ... +class IsolationForestCCModel(DecisionForestCCModel): + @property + def kRegisteredName(self): ... + class GradientBoostedTreesCCModel(DecisionForestCCModel): @property def kRegisteredName(self): ... diff --git a/yggdrasil_decision_forests/port/python/ydf/learner/BUILD b/yggdrasil_decision_forests/port/python/ydf/learner/BUILD index d1957c8b..eca6bedb 100644 --- a/yggdrasil_decision_forests/port/python/ydf/learner/BUILD +++ b/yggdrasil_decision_forests/port/python/ydf/learner/BUILD @@ -33,6 +33,7 @@ cc_library_ydf( "@ydf_cc//yggdrasil_decision_forests/learner/cart", "@ydf_cc//yggdrasil_decision_forests/learner/distributed_gradient_boosted_trees:dgbt", "@ydf_cc//yggdrasil_decision_forests/learner/gradient_boosted_trees", + "@ydf_cc//yggdrasil_decision_forests/learner/isolation_forest", "@ydf_cc//yggdrasil_decision_forests/learner/random_forest", ], alwayslink = 1, @@ -225,26 +226,23 @@ py_test( tags = [ ], deps = [ - ":custom_loss_py", ":generic_learner", ":specialized_learners", ":tuner", # absl/logging dep, # absl/testing:absltest dep, # absl/testing:parameterized dep, - # jax dep, # numpy dep, # pandas dep, + # sklearn dep, "@ydf_cc//yggdrasil_decision_forests/dataset:data_spec_py_proto", "@ydf_cc//yggdrasil_decision_forests/learner:abstract_learner_py_proto", "@ydf_cc//yggdrasil_decision_forests/model:abstract_model_py_proto", - "//ydf/dataset", "//ydf/dataset:dataspec", "//ydf/metric", "//ydf/model:generic_model", "//ydf/model:model_lib", "//ydf/model/decision_forest_model", - "//ydf/model/gradient_boosted_trees_model", "//ydf/utils:log", "//ydf/utils:test_utils", ], diff --git a/yggdrasil_decision_forests/port/python/ydf/learner/generic_learner.py b/yggdrasil_decision_forests/port/python/ydf/learner/generic_learner.py index 4687390a..11c8e3be 100644 --- a/yggdrasil_decision_forests/port/python/ydf/learner/generic_learner.py +++ b/yggdrasil_decision_forests/port/python/ydf/learner/generic_learner.py @@ -53,7 +53,7 @@ def __init__( self, learner_name: str, task: Task, - label: str, + label: Optional[str], weights: Optional[str], ranking_group: Optional[str], uplift_treatment: Optional[str], diff --git a/yggdrasil_decision_forests/port/python/ydf/learner/learner_test.py b/yggdrasil_decision_forests/port/python/ydf/learner/learner_test.py index 8f7816c0..5c0b64fc 100644 --- a/yggdrasil_decision_forests/port/python/ydf/learner/learner_test.py +++ b/yggdrasil_decision_forests/port/python/ydf/learner/learner_test.py @@ -24,6 +24,7 @@ import numpy as np import numpy.testing as npt import pandas as pd +from sklearn import metrics from yggdrasil_decision_forests.dataset import data_spec_pb2 as ds_pb from yggdrasil_decision_forests.learner import abstract_learner_pb2 @@ -66,6 +67,7 @@ def setUp(self): Column("treat", semantic=dataspec.Semantic.CATEGORICAL), ], ) + self.gaussians = test_utils.load_datasets("gaussians") def _check_adult_model( self, @@ -1009,6 +1011,42 @@ def test_logging_arg(self, verbose): _ = learner.train(ds, verbose=verbose) +class IsolationForestLearnerTest(LearnerTest): + + @parameterized.parameters(False, True) + def test_gaussians(self, with_labels: bool): + if with_labels: + learner = specialized_learners.IsolationForestLearner(label="label") + else: + learner = specialized_learners.IsolationForestLearner( + features=["f1", "f2"] + ) + model = learner.train(self.gaussians.train) + predictions = model.predict(self.gaussians.test) + + auc = metrics.roc_auc_score(self.gaussians.test_pd["label"], predictions) + logging.info("auc:%s", auc) + self.assertGreaterEqual(auc, 0.99) + + _ = model.describe("text") + _ = model.describe("html") + _ = model.analyze_prediction(self.gaussians.test_pd.iloc[:1]) + _ = model.analyze(self.gaussians.test) + + if with_labels: + evaluation = model.evaluate(self.gaussians.test) + self.assertDictEqual( + evaluation.to_dict(), + {"num_examples": 280, "num_examples_weighted": 280.0}, + ) + else: + with self.assertRaisesRegex( + ValueError, + "Cannot evaluate an anomaly detection model without a label", + ): + _ = model.evaluate(self.gaussians.test) + + class UtilityTest(LearnerTest): def test_feature_name_to_regex(self): diff --git a/yggdrasil_decision_forests/port/python/ydf/learner/wrapper/BUILD b/yggdrasil_decision_forests/port/python/ydf/learner/wrapper/BUILD index 1c754532..08334d00 100644 --- a/yggdrasil_decision_forests/port/python/ydf/learner/wrapper/BUILD +++ b/yggdrasil_decision_forests/port/python/ydf/learner/wrapper/BUILD @@ -27,6 +27,7 @@ cc_library_ydf( srcs = ["wrapper_generator.cc"], hdrs = ["wrapper_generator.h"], deps = [ + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", @@ -36,6 +37,7 @@ cc_library_ydf( "@ydf_cc//yggdrasil_decision_forests/learner:learner_library", "@ydf_cc//yggdrasil_decision_forests/model:hyperparameter_cc_proto", "@ydf_cc//yggdrasil_decision_forests/utils:hyper_parameters", + "@ydf_cc//yggdrasil_decision_forests/utils:logging", "@ydf_cc//yggdrasil_decision_forests/utils:status_macros", ], ) @@ -55,6 +57,7 @@ cc_test( "@ydf_cc//yggdrasil_decision_forests/learner:abstract_learner", "@ydf_cc//yggdrasil_decision_forests/learner:abstract_learner_cc_proto", "@ydf_cc//yggdrasil_decision_forests/learner/gradient_boosted_trees", + "@ydf_cc//yggdrasil_decision_forests/learner/isolation_forest", "@ydf_cc//yggdrasil_decision_forests/learner/random_forest", "@ydf_cc//yggdrasil_decision_forests/model:abstract_model", "@ydf_cc//yggdrasil_decision_forests/model:hyperparameter_cc_proto", diff --git a/yggdrasil_decision_forests/port/python/ydf/learner/wrapper/wrapper_generator.bzl b/yggdrasil_decision_forests/port/python/ydf/learner/wrapper/wrapper_generator.bzl index a473d4d3..43db5a8c 100644 --- a/yggdrasil_decision_forests/port/python/ydf/learner/wrapper/wrapper_generator.bzl +++ b/yggdrasil_decision_forests/port/python/ydf/learner/wrapper/wrapper_generator.bzl @@ -73,8 +73,9 @@ def py_wrap_yggdrasil_learners( "//ydf/dataset:dataset", "//ydf/dataset:dataspec", "//ydf/model:generic_model", - "//ydf/model/gradient_boosted_trees_model:gradient_boosted_trees_model", - "//ydf/model/random_forest_model:random_forest_model", + "//ydf/model/gradient_boosted_trees_model", + "//ydf/model/random_forest_model", + "//ydf/model/isolation_forest_model", "//ydf/learner:generic_learner", "//ydf/learner:hyperparameters", "//ydf/learner:custom_loss_py", diff --git a/yggdrasil_decision_forests/port/python/ydf/learner/wrapper/wrapper_generator.cc b/yggdrasil_decision_forests/port/python/ydf/learner/wrapper/wrapper_generator.cc index 1d4078c4..28b71127 100644 --- a/yggdrasil_decision_forests/port/python/ydf/learner/wrapper/wrapper_generator.cc +++ b/yggdrasil_decision_forests/port/python/ydf/learner/wrapper/wrapper_generator.cc @@ -24,6 +24,7 @@ #include #include +#include "absl/container/flat_hash_map.h" #include "absl/status/status.h" #include "absl/status/statusor.h" #include "absl/strings/ascii.h" @@ -40,10 +41,53 @@ #include "yggdrasil_decision_forests/learner/learner_library.h" #include "yggdrasil_decision_forests/model/hyperparameter.pb.h" #include "yggdrasil_decision_forests/utils/hyper_parameters.h" +#include "yggdrasil_decision_forests/utils/logging.h" #include "yggdrasil_decision_forests/utils/status_macros.h" namespace yggdrasil_decision_forests { +// Configuration data for each individual learner. +struct LearnerConfig { + // Name of the python class of the model. + std::string model_class_name = "generic_model.GenericModel"; + + // Default value of the "task" learner constructor argument. + std::string default_task = "CLASSIFICATION"; + + // If true, the learner requires a "label" constructor argument. + bool require_label = true; +}; + +absl::flat_hash_map LearnerConfigs() { + absl::flat_hash_map configs; + + configs["RANDOM_FOREST"] = { + .model_class_name = "random_forest_model.RandomForestModel", + }; + + configs["CART"] = { + .model_class_name = "random_forest_model.RandomForestModel", + }; + + configs["GRADIENT_BOOSTED_TREES"] = { + .model_class_name = + "gradient_boosted_trees_model.GradientBoostedTreesModel", + }; + + configs["DISTRIBUTED_GRADIENT_BOOSTED_TREES"] = { + .model_class_name = + "gradient_boosted_trees_model.GradientBoostedTreesModel", + }; + + configs["ISOLATION_FOREST"] = { + .model_class_name = "isolation_forest_model.IsolationForestModel", + .default_task = "ANOMALY_DETECTION", + .require_label = false, + }; + + return configs; +} // namespace yggdrasil_decision_forests + // Gets the number of prefix spaces. int NumLeadingSpaces(const absl::string_view text) { auto char_it = text.begin(); @@ -67,18 +111,6 @@ std::string LearnerKeyToClassName(const absl::string_view key) { return absl::StrCat(absl::StrReplaceAll(value, {{"_", ""}}), "Learner"); } -// Converts a learner name into the model class associated with it. -std::string LearnerKeyToModelClassName(const absl::string_view key) { - if (key == "RANDOM_FOREST" || key == "CART") { - return "random_forest_model.RandomForestModel"; - } else if (key == "GRADIENT_BOOSTED_TREES" || - key == "DISTRIBUTED_GRADIENT_BOOSTED_TREES") { - return "gradient_boosted_trees_model.GradientBoostedTreesModel"; - } else { - return "generic_model.GenericModel"; - } -} - // Converts a learner name into a nice name. // e.g. "RANDOM_FOREST" -> "Random Forest" std::string LearnerKeyToNiceLearnerName(absl::string_view key) { @@ -264,6 +296,7 @@ from $1learner import tuner as tuner_lib from $1model import generic_model from $1model.gradient_boosted_trees_model import gradient_boosted_trees_model from $1model.random_forest_model import random_forest_model +from $1model.isolation_forest_model import isolation_forest_model )", prefix, pydf_prefix); @@ -292,9 +325,20 @@ from typing import Dict, Optional, Sequence, Union )", imports); + const auto learner_configs = LearnerConfigs(); + for (const auto& learner_key : model::AllRegisteredLearners()) { + // Get the learner configuration. + LearnerConfig learner_config; + const auto learner_config_it = learner_configs.find(learner_key); + if (learner_config_it != learner_configs.end()) { + learner_config = learner_config_it->second; + } else { + YDF_LOG(INFO) << "No learner config for " << learner_key + << ". Using default config."; + } + const auto class_name = LearnerKeyToClassName(learner_key); - const auto model_class_name = LearnerKeyToModelClassName(learner_key); // Get a learner instance. std::unique_ptr learner; @@ -439,7 +483,8 @@ from typing import Dict, Optional, Sequence, Union FixGBTDefinition(&fields_documentation, &fields_constructor); } // TODO: Add support for hyperparameter templates. - absl::SubstituteAndAppend(&wrapper, R"( + absl::SubstituteAndAppend( + &wrapper, R"( class $0(generic_learner.GenericLearner): r"""$6 learning algorithm. @@ -555,8 +600,8 @@ class $0(generic_learner.GenericLearner): """ def __init__(self, - label: str, - task: generic_learner.Task = generic_learner.Task.CLASSIFICATION, + label: $9, + task: generic_learner.Task = generic_learner.Task.$8, weights: Optional[str] = None, ranking_group: Optional[str] = None, uplift_treatment: Optional[str] = None, @@ -659,12 +704,14 @@ class $0(generic_learner.GenericLearner): """ return super().train(ds=ds, valid=valid, verbose=verbose) )", - /*$0*/ class_name, /*$1*/ learner_key, - /*$2*/ fields_documentation, - /*$3*/ fields_constructor, /*$4*/ fields_dict, - /*$5*/ free_text_documentation, - /*$6*/ nice_learner_name, - /*$7*/ model_class_name); + /*$0*/ class_name, /*$1*/ learner_key, + /*$2*/ fields_documentation, + /*$3*/ fields_constructor, /*$4*/ fields_dict, + /*$5*/ free_text_documentation, + /*$6*/ nice_learner_name, + /*$7*/ learner_config.model_class_name, + /*$8*/ learner_config.default_task, + /*$9*/ learner_config.require_label ? "str" : "Optional[str] = None"); const auto bool_rep = [](const bool value) -> std::string { return value ? "True" : "False"; diff --git a/yggdrasil_decision_forests/port/python/ydf/model/BUILD b/yggdrasil_decision_forests/port/python/ydf/model/BUILD index 0881c414..99a8b8d7 100644 --- a/yggdrasil_decision_forests/port/python/ydf/model/BUILD +++ b/yggdrasil_decision_forests/port/python/ydf/model/BUILD @@ -17,6 +17,7 @@ cc_library_ydf( deps = [ "@ydf_cc//yggdrasil_decision_forests/model/decision_tree:decision_forest_interface", "@ydf_cc//yggdrasil_decision_forests/model/gradient_boosted_trees", + "@ydf_cc//yggdrasil_decision_forests/model/isolation_forest", "@ydf_cc//yggdrasil_decision_forests/model/random_forest", ], ) @@ -65,6 +66,7 @@ pybind_library( ":model_wrapper", "//ydf/model/decision_forest_model:decision_forest_wrapper", "//ydf/model/gradient_boosted_trees_model:gradient_boosted_trees_wrapper", + "//ydf/model/isolation_forest_model:isolation_forest_wrapper", "//ydf/model/random_forest_model:random_forest_wrapper", "//ydf/utils:custom_casters", "//ydf/utils:status_casters", @@ -74,6 +76,7 @@ pybind_library( "@ydf_cc//yggdrasil_decision_forests/model:abstract_model", "@ydf_cc//yggdrasil_decision_forests/model:model_library", "@ydf_cc//yggdrasil_decision_forests/model/gradient_boosted_trees", + "@ydf_cc//yggdrasil_decision_forests/model/isolation_forest", "@ydf_cc//yggdrasil_decision_forests/model/random_forest", "@ydf_cc//yggdrasil_decision_forests/utils:logging", "@ydf_cc//yggdrasil_decision_forests/utils:model_analysis", @@ -144,9 +147,11 @@ py_library( deps = [ ":generic_model", # numpy dep, + # sklearn dep, "//ydf/learner:generic_learner", "//ydf/learner:specialized_learners", "//ydf/model/gradient_boosted_trees_model", + "//ydf/model/isolation_forest_model", "//ydf/model/random_forest_model", "//ydf/model/tree:all", ], @@ -162,6 +167,7 @@ py_library( "//ydf/cc:ydf", "//ydf/dataset:dataspec", "//ydf/model/gradient_boosted_trees_model", + "//ydf/model/isolation_forest_model", "//ydf/model/random_forest_model", "//ydf/utils:log", ], @@ -246,6 +252,7 @@ py_test( # pandas dep, "//ydf/dataset", "//ydf/model/gradient_boosted_trees_model", + "//ydf/model/isolation_forest_model", "//ydf/model/random_forest_model", "//ydf/utils:test_utils", ], diff --git a/yggdrasil_decision_forests/port/python/ydf/model/isolation_forest_model/BUILD b/yggdrasil_decision_forests/port/python/ydf/model/isolation_forest_model/BUILD new file mode 100644 index 00000000..d2bb104d --- /dev/null +++ b/yggdrasil_decision_forests/port/python/ydf/model/isolation_forest_model/BUILD @@ -0,0 +1,53 @@ +# pytype test and library +load("@pybind11_bazel//:build_defs.bzl", "pybind_library") + +package( + default_visibility = ["//visibility:public"], + licenses = ["notice"], +) + +# Libraries +# ========= + +pybind_library( + name = "isolation_forest_wrapper", + srcs = ["isolation_forest_wrapper.cc"], + hdrs = ["isolation_forest_wrapper.h"], + deps = [ + "//ydf/model/decision_forest_model:decision_forest_wrapper", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + "@ydf_cc//yggdrasil_decision_forests/model:abstract_model", + "@ydf_cc//yggdrasil_decision_forests/model/isolation_forest", + "@ydf_cc//yggdrasil_decision_forests/utils:logging", + ], +) + +py_library( + name = "isolation_forest_model", + srcs = ["isolation_forest_model.py"], + deps = [ + "//ydf/cc:ydf", + "//ydf/model/decision_forest_model", + ], +) + +# Tests +# ===== + +py_test( + name = "isolation_forest_model_test", + srcs = ["isolation_forest_model_test.py"], + data = [ + "//test_data", + "@ydf_cc//yggdrasil_decision_forests/test_data", + ], + python_version = "PY3", + deps = [ + # absl/testing:absltest dep, + # numpy dep, + # pandas dep, + "//ydf/model:model_lib", + "//ydf/utils:test_utils", + ], +) diff --git a/yggdrasil_decision_forests/port/python/ydf/model/isolation_forest_model/__init__.py b/yggdrasil_decision_forests/port/python/ydf/model/isolation_forest_model/__init__.py new file mode 100644 index 00000000..4446e915 --- /dev/null +++ b/yggdrasil_decision_forests/port/python/ydf/model/isolation_forest_model/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2022 Google LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/yggdrasil_decision_forests/port/python/ydf/model/isolation_forest_model/isolation_forest_model.py b/yggdrasil_decision_forests/port/python/ydf/model/isolation_forest_model/isolation_forest_model.py new file mode 100644 index 00000000..ba5100db --- /dev/null +++ b/yggdrasil_decision_forests/port/python/ydf/model/isolation_forest_model/isolation_forest_model.py @@ -0,0 +1,24 @@ +# Copyright 2022 Google LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Definitions for Isolation Forest models.""" + +from ydf.cc import ydf +from ydf.model.decision_forest_model import decision_forest_model + + +class IsolationForestModel(decision_forest_model.DecisionForestModel): + """An Isolation Forest model for prediction and inspection.""" + + _model: ydf.IsolationForestCCModel diff --git a/yggdrasil_decision_forests/port/python/ydf/model/isolation_forest_model/isolation_forest_model_test.py b/yggdrasil_decision_forests/port/python/ydf/model/isolation_forest_model/isolation_forest_model_test.py new file mode 100644 index 00000000..a8968c01 --- /dev/null +++ b/yggdrasil_decision_forests/port/python/ydf/model/isolation_forest_model/isolation_forest_model_test.py @@ -0,0 +1,76 @@ +# Copyright 2022 Google LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for the isolation forest models.""" + +import logging +import os + +from absl.testing import absltest +import numpy as np +import pandas as pd + +from ydf.model import model_lib +from ydf.utils import test_utils + + +class IsolationForestModelTest(absltest.TestCase): + + def setUp(self): + super().setUp() + + def build_path(*args): + return os.path.join(test_utils.ydf_test_data_path(), *args) + + self.model_gaussians = model_lib.load_model( + build_path("model", "gaussians_anomaly_if") + ) + self.dataset_gaussians_train = pd.read_csv( + build_path("dataset", "gaussians_train.csv") + ) + self.dataset_gaussians_test = pd.read_csv( + build_path("dataset", "gaussians_test.csv") + ) + + def test_predict(self): + predictions = self.model_gaussians.predict(self.dataset_gaussians_test) + np.testing.assert_allclose( + predictions[:5], + [0.419287, 0.441436, 0.507164, 0.425276, 0.386438], + atol=0.0001, + ) + + def test_distance(self): + distances = self.model_gaussians.distance(self.dataset_gaussians_test) + logging.info("distances:\n%s", distances) + self.assertEqual( + distances.shape, + ( + self.dataset_gaussians_test.shape[0], + self.dataset_gaussians_test.shape[0], + ), + ) + + # Find the example most similar to "self.dataset_gaussians_test[0]". + most_similar_example_idx = np.argmin(distances[0, :]) + logging.info("most_similar_example_idx: %s", most_similar_example_idx) + logging.info("Seed example:\n%s", self.dataset_gaussians_test.iloc[0]) + logging.info( + "Most similar example:\n%s", + self.dataset_gaussians_test.iloc[most_similar_example_idx], + ) + + +if __name__ == "__main__": + absltest.main() diff --git a/yggdrasil_decision_forests/port/python/ydf/model/isolation_forest_model/isolation_forest_wrapper.cc b/yggdrasil_decision_forests/port/python/ydf/model/isolation_forest_model/isolation_forest_wrapper.cc new file mode 100644 index 00000000..5d0e9559 --- /dev/null +++ b/yggdrasil_decision_forests/port/python/ydf/model/isolation_forest_model/isolation_forest_wrapper.cc @@ -0,0 +1,43 @@ +/* + * Copyright 2022 Google LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ydf/model/isolation_forest_model/isolation_forest_wrapper.h" + +#include +#include + +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "yggdrasil_decision_forests/model/abstract_model.h" + +namespace yggdrasil_decision_forests::port::python { + +absl::StatusOr> +IsolationForestCCModel::Create( + std::unique_ptr& model_ptr) { + auto* if_model = dynamic_cast(model_ptr.get()); + if (if_model == nullptr) { + return absl::InvalidArgumentError( + "This model is not an isolation forest model."); + } + // Both release and the unique_ptr constructor are noexcept. + model_ptr.release(); + std::unique_ptr new_model_ptr(if_model); + + return std::make_unique(std::move(new_model_ptr), + if_model); +} + +} // namespace yggdrasil_decision_forests::port::python diff --git a/yggdrasil_decision_forests/port/python/ydf/model/isolation_forest_model/isolation_forest_wrapper.h b/yggdrasil_decision_forests/port/python/ydf/model/isolation_forest_model/isolation_forest_wrapper.h new file mode 100644 index 00000000..868b7854 --- /dev/null +++ b/yggdrasil_decision_forests/port/python/ydf/model/isolation_forest_model/isolation_forest_wrapper.h @@ -0,0 +1,57 @@ +/* + * Copyright 2022 Google LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef YGGDRASIL_DECISION_FORESTS_PORT_PYTHON_YDF_MODEL_ISOLATION_FOREST_MODEL_RANDOM_FOREST_WRAPPER_H_ +#define YGGDRASIL_DECISION_FORESTS_PORT_PYTHON_YDF_MODEL_ISOLATION_FOREST_MODEL_RANDOM_FOREST_WRAPPER_H_ + +#include +#include + +#include "absl/status/statusor.h" +#include "yggdrasil_decision_forests/model/abstract_model.h" +#include "yggdrasil_decision_forests/model/isolation_forest/isolation_forest.h" +#include "ydf/model/decision_forest_model/decision_forest_wrapper.h" +#include "yggdrasil_decision_forests/utils/logging.h" + +namespace yggdrasil_decision_forests::port::python { + +class IsolationForestCCModel : public DecisionForestCCModel { + using YDFModel = ::yggdrasil_decision_forests::model::isolation_forest:: + IsolationForestModel; + + public: + // Creates a IsolationForestCCModel if `model_ptr` refers to a + // IsolationForestModel. + // + // If this method returns an invalid status, "model_ptr" is not modified. + // If this method returns an ok status, the content of "model_ptr" is moved + // (and "model_ptr" becomes empty). + static absl::StatusOr> Create( + std::unique_ptr& model_ptr); + + // `model` and `if_model` must point to the same object. Prefer using + // IsolationForestCCModel::Compute for construction. + IsolationForestCCModel(std::unique_ptr model, YDFModel* if_model) + : DecisionForestCCModel(std::move(model), if_model), if_model_(if_model) { + DCHECK_EQ(model_.get(), if_model_); + } + + private: + // This is a non-owning pointer to the model held by `model_`. + YDFModel* if_model_; +}; + +} // namespace yggdrasil_decision_forests::port::python +#endif // YGGDRASIL_DECISION_FORESTS_PORT_PYTHON_YDF_MODEL_ISOLATION_FOREST_MODEL_RANDOM_FOREST_WRAPPER_H_ diff --git a/yggdrasil_decision_forests/port/python/ydf/model/model.cc b/yggdrasil_decision_forests/port/python/ydf/model/model.cc index 20406838..fb61c621 100644 --- a/yggdrasil_decision_forests/port/python/ydf/model/model.cc +++ b/yggdrasil_decision_forests/port/python/ydf/model/model.cc @@ -30,10 +30,12 @@ #include "pybind11_protobuf/native_proto_caster.h" #include "yggdrasil_decision_forests/model/abstract_model.h" #include "yggdrasil_decision_forests/model/gradient_boosted_trees/gradient_boosted_trees.h" +#include "yggdrasil_decision_forests/model/isolation_forest/isolation_forest.h" #include "yggdrasil_decision_forests/model/model_library.h" #include "yggdrasil_decision_forests/model/random_forest/random_forest.h" #include "ydf/model/decision_forest_model/decision_forest_wrapper.h" #include "ydf/model/gradient_boosted_trees_model/gradient_boosted_trees_wrapper.h" +#include "ydf/model/isolation_forest_model/isolation_forest_wrapper.h" #include "ydf/model/model_wrapper.h" #include "ydf/model/random_forest_model/random_forest_wrapper.h" #include "ydf/utils/custom_casters.h" @@ -84,6 +86,11 @@ std::unique_ptr CreateCCModel( // `model_ptr` is now invalid. return std::move(gbt_model.value()); } + auto if_model = IsolationForestCCModel::Create(model_ptr); + if (if_model.ok()) { + // `model_ptr` is now invalid. + return std::move(if_model.value()); + } // `model_ptr` is still valid. return std::make_unique(std::move(model_ptr)); } @@ -198,6 +205,20 @@ void init_model(py::module_& m) { return model::random_forest::RandomForestModel::kRegisteredName; }); + py::class_(m, + "IsolationForestCCModel") + .def("__repr__", + [](const GenericCCModel& a) { + return absl::Substitute( + "( m, "GradientBoostedTreesCCModel") diff --git a/yggdrasil_decision_forests/port/python/ydf/model/model_lib.py b/yggdrasil_decision_forests/port/python/ydf/model/model_lib.py index 8f6672a7..4cd1763d 100644 --- a/yggdrasil_decision_forests/port/python/ydf/model/model_lib.py +++ b/yggdrasil_decision_forests/port/python/ydf/model/model_lib.py @@ -23,6 +23,7 @@ from ydf.dataset import dataspec from ydf.model import generic_model from ydf.model.gradient_boosted_trees_model import gradient_boosted_trees_model +from ydf.model.isolation_forest_model import isolation_forest_model from ydf.model.random_forest_model import random_forest_model from ydf.utils import log @@ -156,6 +157,8 @@ def load_cc_model(cc_model: ydf.GenericCCModel) -> generic_model.ModelType: return random_forest_model.RandomForestModel(cc_model) if model_name == ydf.GradientBoostedTreesCCModel.kRegisteredName: return gradient_boosted_trees_model.GradientBoostedTreesModel(cc_model) + if model_name == ydf.IsolationForestCCModel.kRegisteredName: + return isolation_forest_model.IsolationForestModel(cc_model) logging.info( "This model has type %s, which is not fully supported. Only generic model" " tasks (e.g. inference) are possible", diff --git a/yggdrasil_decision_forests/port/python/ydf/model/tree/__init__.py b/yggdrasil_decision_forests/port/python/ydf/model/tree/__init__.py index a17c4468..4392f572 100644 --- a/yggdrasil_decision_forests/port/python/ydf/model/tree/__init__.py +++ b/yggdrasil_decision_forests/port/python/ydf/model/tree/__init__.py @@ -39,6 +39,7 @@ from ydf.model.tree.value import RegressionValue from ydf.model.tree.value import ProbabilityValue from ydf.model.tree.value import UpliftValue +from ydf.model.tree.value import AnomalyDetectionValue # Plotting from ydf.model.tree.plot import PlotOptions diff --git a/yggdrasil_decision_forests/port/python/ydf/model/tree/value.py b/yggdrasil_decision_forests/port/python/ydf/model/tree/value.py index 9f57b649..b16b3605 100644 --- a/yggdrasil_decision_forests/port/python/ydf/model/tree/value.py +++ b/yggdrasil_decision_forests/port/python/ydf/model/tree/value.py @@ -29,7 +29,7 @@ class AbstractValue(metaclass=abc.ABCMeta): """A generic value/prediction/output. Attrs: - num_examples: Number of example in the node. + num_examples: Number of examples in the node with weight. """ num_examples: float @@ -100,6 +100,20 @@ def pretty(self) -> str: return f"value={self.treatment_effect}" +@dataclasses.dataclass +class AnomalyDetectionValue(AbstractValue): + """The value of an anomaly detection tree. + + Attrs: + num_examples_without_weight: Number of examples reaching this node. + """ + + num_examples_without_weight: int + + def pretty(self) -> str: + return f"count={self.num_examples_without_weight}" + + def to_value(proto_node: decision_tree_pb2.Node) -> AbstractValue: """Extracts the "value" part of a proto node.""" @@ -130,6 +144,12 @@ def to_value(proto_node: decision_tree_pb2.Node) -> AbstractValue: num_examples=proto_node.uplift.sum_weights, ) + if proto_node.HasField("anomaly_detection"): + return AnomalyDetectionValue( + num_examples_without_weight=proto_node.anomaly_detection.num_examples_without_weight, + num_examples=-1.0, # The number of weighted examples is not tracked. + ) + raise ValueError("Unsupported value") @@ -179,6 +199,15 @@ def _to_json_uplift(value: UpliftValue) -> Dict[str, Any]: } +@to_json.register +def _to_json_uplift(value: AnomalyDetectionValue) -> Dict[str, Any]: + return { + "type": "ANOMALY_DETECTION", + "num_examples_without_weight": value.num_examples_without_weight, + "num_examples": value.num_examples, + } + + @functools.singledispatch def set_proto_node(value: AbstractValue, proto_node: decision_tree_pb2.Node): """Sets the "value" part in a proto node. @@ -227,3 +256,12 @@ def _set_proto_node_from_uplift( ): proto_node.uplift.treatment_effect[:] = value.treatment_effect proto_node.uplift.sum_weights = value.num_examples + + +@set_proto_node.register +def _set_proto_node_from_anomaly_detection( + value: AnomalyDetectionValue, proto_node: decision_tree_pb2.Node +): + proto_node.anomaly_detection.num_examples_without_weight = ( + value.num_examples_without_weight + ) From 8d691607f5a300a337341ed0d280fc27a45d577f Mon Sep 17 00:00:00 2001 From: Mathieu Guillame-Bert Date: Mon, 17 Jun 2024 12:08:18 -0700 Subject: [PATCH 26/30] Anomaly detection; Add support for sklearn isolation forests in the model importer (part 8) PiperOrigin-RevId: 644091208 --- .../port/python/ydf/model/export_sklearn.py | 213 +++++++++++++----- .../port/python/ydf/model/generic_model.py | 19 +- .../python/ydf/model/sklearn_model_test.py | 77 ++++++- 3 files changed, 250 insertions(+), 59 deletions(-) diff --git a/yggdrasil_decision_forests/port/python/ydf/model/export_sklearn.py b/yggdrasil_decision_forests/port/python/ydf/model/export_sklearn.py index 3d6b6f39..902d2519 100644 --- a/yggdrasil_decision_forests/port/python/ydf/model/export_sklearn.py +++ b/yggdrasil_decision_forests/port/python/ydf/model/export_sklearn.py @@ -14,10 +14,10 @@ """Import and export Scikit-Learn models from/to YDF.""" +import dataclasses import enum import functools -from typing import Any, Dict, List, Optional, TypeVar, Union - +from typing import Any, Dict, List, Optional, TypeVar, Union, Sequence import numpy as np from ydf.learner import generic_learner @@ -25,6 +25,7 @@ from ydf.model import generic_model from ydf.model import tree as tree_lib from ydf.model.gradient_boosted_trees_model import gradient_boosted_trees_model +from ydf.model.isolation_forest_model import isolation_forest_model from ydf.model.random_forest_model import random_forest_model # pytype: disable=import-error @@ -43,9 +44,18 @@ # The column idx=0 is reserved for the label in YDF models. _LABEL_COLUMN_OFFSET = 1 -# Name of the label/feature columns -_LABEL_KEY = "label" -_FEATURES_KEY = "features" + +@dataclasses.dataclass(frozen=True) +class InternalOptions: + """Internal options for the conversion. + + Attributes: + label_name: Column name of the created label. + feature_name: Column name of the created feature. + """ + + label_name: str + feature_name: str class TaskType(enum.Enum): @@ -54,19 +64,27 @@ class TaskType(enum.Enum): UNKNOWN = 1 SCALAR_REGRESSION = 2 SINGLE_LABEL_CLASSIFICATION = 3 + ANOMALY_DETECTION = 4 ScikitLearnModel = TypeVar("ScikitLearnModel", bound=base.BaseEstimator) ScikitLearnTree = TypeVar("ScikitLearnTree", bound=tree.BaseDecisionTree) -def from_sklearn(sklearn_model: ScikitLearnModel) -> generic_model.GenericModel: +def from_sklearn( + sklearn_model: ScikitLearnModel, + label_name: str = "label", + feature_name: str = "features", +) -> generic_model.GenericModel: """Converts a tree-based scikit-learn model to a YDF model.""" if not hasattr(sklearn_model, "n_features_in_"): raise ValueError( "Scikit-Learn model must be fit to data before converting." ) - return _sklearn_to_ydf_model(sklearn_model) + return _sklearn_to_ydf_model( + sklearn_model, + InternalOptions(label_name=label_name, feature_name=feature_name), + ) def _gen_fake_features(num_features: int, num_examples: int = 2): @@ -75,9 +93,10 @@ def _gen_fake_features(num_features: int, num_examples: int = 2): @functools.singledispatch def _sklearn_to_ydf_model( - sklearn_model: ScikitLearnModel, + sklearn_model: ScikitLearnModel, options: InternalOptions ) -> generic_model.GenericModel: """Builds a YDF model from the given scikit-learn model.""" + del options raise NotImplementedError( f"Can't build a YDF model for {type(sklearn_model)}" ) @@ -85,44 +104,54 @@ def _sklearn_to_ydf_model( @_sklearn_to_ydf_model.register(tree.DecisionTreeRegressor) @_sklearn_to_ydf_model.register(tree.ExtraTreeRegressor) -def _(sklearn_model: ScikitLearnTree) -> generic_model.GenericModel: +def _( + sklearn_model: ScikitLearnTree, options: InternalOptions +) -> generic_model.GenericModel: """Converts a single scikit-learn regression tree to a YDF model.""" ydf_model = specialized_learners.RandomForestLearner( - label=_LABEL_KEY, + label=options.label_name, task=generic_learner.Task.REGRESSION, num_trees=0, ).train( { - _LABEL_KEY: [0.0, 1.0], - _FEATURES_KEY: _gen_fake_features(sklearn_model.n_features_in_), + options.label_name: [0.0, 1.0], + options.feature_name: _gen_fake_features( + sklearn_model.n_features_in_ + ), }, verbose=0, ) assert isinstance(ydf_model, random_forest_model.RandomForestModel) - ydf_tree = convert_sklearn_tree_to_ydf_tree(sklearn_model) + ydf_tree = convert_sklearn_tree_to_ydf_tree( + sklearn_model, task=generic_learner.Task.REGRESSION + ) ydf_model.add_tree(ydf_tree) return ydf_model @_sklearn_to_ydf_model.register(tree.DecisionTreeClassifier) @_sklearn_to_ydf_model.register(tree.ExtraTreeClassifier) -def _(sklearn_model: ScikitLearnTree) -> generic_model.GenericModel: +def _( + sklearn_model: ScikitLearnTree, options: InternalOptions +) -> generic_model.GenericModel: """Converts a single scikit-learn classification tree to a YDF model.""" ydf_model = specialized_learners.RandomForestLearner( - label=_LABEL_KEY, + label=options.label_name, task=generic_learner.Task.CLASSIFICATION, num_trees=0, ).train( { - _LABEL_KEY: [str(c) for c in sklearn_model.classes_], - _FEATURES_KEY: _gen_fake_features( + options.label_name: [str(c) for c in sklearn_model.classes_], + options.feature_name: _gen_fake_features( sklearn_model.n_features_in_, len(sklearn_model.classes_) ), }, verbose=0, ) assert isinstance(ydf_model, random_forest_model.RandomForestModel) - ydf_tree = convert_sklearn_tree_to_ydf_tree(sklearn_model) + ydf_tree = convert_sklearn_tree_to_ydf_tree( + sklearn_model, task=generic_learner.Task.CLASSIFICATION + ) ydf_model.add_tree(ydf_tree) return ydf_model @@ -131,25 +160,31 @@ def _(sklearn_model: ScikitLearnTree) -> generic_model.GenericModel: @_sklearn_to_ydf_model.register(ensemble.RandomForestRegressor) def _( sklearn_model: Union[ - ensemble.ExtraTreesRegressor, ensemble.RandomForestRegressor + ensemble.ExtraTreesRegressor, + ensemble.RandomForestRegressor, ], + options: InternalOptions, ) -> generic_model.GenericModel: """Converts a forest regression model into a YDF model.""" ydf_model = specialized_learners.RandomForestLearner( - label=_LABEL_KEY, + label=options.label_name, task=generic_learner.Task.REGRESSION, num_trees=0, ).train( { - _LABEL_KEY: [0.0, 1.0], - _FEATURES_KEY: _gen_fake_features(sklearn_model.n_features_in_), + options.label_name: [0.0, 1.0], + options.feature_name: _gen_fake_features( + sklearn_model.n_features_in_ + ), }, verbose=0, ) assert isinstance(ydf_model, random_forest_model.RandomForestModel) for sklearn_tree in sklearn_model.estimators_: - ydf_tree = convert_sklearn_tree_to_ydf_tree(sklearn_tree) + ydf_tree = convert_sklearn_tree_to_ydf_tree( + sklearn_tree, task=generic_learner.Task.REGRESSION + ) ydf_model.add_tree(ydf_tree) return ydf_model @@ -158,34 +193,72 @@ def _( @_sklearn_to_ydf_model.register(ensemble.RandomForestClassifier) def _( sklearn_model: Union[ - ensemble.ExtraTreesClassifier, ensemble.RandomForestClassifier + ensemble.ExtraTreesClassifier, + ensemble.RandomForestClassifier, ], + options: InternalOptions, ) -> generic_model.GenericModel: """Converts a forest classification model into a YDF model.""" ydf_model = specialized_learners.RandomForestLearner( - label=_LABEL_KEY, + label=options.label_name, task=generic_learner.Task.CLASSIFICATION, num_trees=0, ).train( { - _LABEL_KEY: [str(c) for c in sklearn_model.classes_], - _FEATURES_KEY: _gen_fake_features( + options.label_name: [str(c) for c in sklearn_model.classes_], + options.feature_name: _gen_fake_features( sklearn_model.n_features_in_, len(sklearn_model.classes_) ), }, verbose=0, ) - assert isinstance(ydf_model, random_forest_model.RandomForestModel) + assert isinstance( + ydf_model, + random_forest_model.RandomForestModel, + ) for sklearn_tree in sklearn_model.estimators_: - ydf_tree = convert_sklearn_tree_to_ydf_tree(sklearn_tree) + ydf_tree = convert_sklearn_tree_to_ydf_tree( + sklearn_tree, task=generic_learner.Task.CLASSIFICATION + ) + ydf_model.add_tree(ydf_tree) + return ydf_model + + +@_sklearn_to_ydf_model.register(ensemble.IsolationForest) +def _( + sklearn_model: ensemble.IsolationForest, options: InternalOptions +) -> generic_model.GenericModel: + """Converts a single scikit-learn iso-forest to a YDF model.""" + ydf_model = specialized_learners.IsolationForestLearner( + task=generic_learner.Task.ANOMALY_DETECTION, + num_trees=0, + subsample_count=sklearn_model._max_samples, # pylint: disable=protected-access + ).train( + { + options.feature_name: _gen_fake_features( + sklearn_model.n_features_in_ + ), + }, + verbose=0, + ) + assert isinstance(ydf_model, isolation_forest_model.IsolationForestModel) + + for sklearn_tree, attribute_mapping in zip( + sklearn_model.estimators_, sklearn_model.estimators_features_ + ): + ydf_tree = convert_sklearn_tree_to_ydf_tree( + sklearn_tree, + attribute_mapping=attribute_mapping.tolist(), + task=generic_learner.Task.ANOMALY_DETECTION, + ) ydf_model.add_tree(ydf_tree) return ydf_model @_sklearn_to_ydf_model.register(ensemble.GradientBoostingRegressor) def _( - sklearn_model: ensemble.GradientBoostingRegressor, + sklearn_model: ensemble.GradientBoostingRegressor, options: InternalOptions ) -> generic_model.GenericModel: """Converts a gradient boosting regression model into a YDF model.""" @@ -199,7 +272,9 @@ def _( # first tree in the ensemble and set the bias to zero. We could also support # other tree-based initial estimators (e.g. RandomForest), but this seems # like a niche enough use case that we don't for the moment. - init_pytree = convert_sklearn_tree_to_ydf_tree(sklearn_model.init_) + init_pytree = convert_sklearn_tree_to_ydf_tree( + sklearn_model.init_, task=generic_learner.Task.REGRESSION + ) bias = 0.0 elif sklearn_model.init_ == "zero": init_pytree = None @@ -212,13 +287,15 @@ def _( ) ydf_model = specialized_learners.GradientBoostedTreesLearner( - label=_LABEL_KEY, + label=options.label_name, task=generic_learner.Task.REGRESSION, num_trees=0, ).train( { - _LABEL_KEY: [0.0, 1.0], - _FEATURES_KEY: _gen_fake_features(sklearn_model.n_features_in_), + options.label_name: [0.0, 1.0], + options.feature_name: _gen_fake_features( + sklearn_model.n_features_in_ + ), }, verbose=0, ) @@ -233,7 +310,9 @@ def _( for weak_learner in sklearn_model.estimators_.ravel(): ydf_tree = convert_sklearn_tree_to_ydf_tree( - weak_learner, weight=sklearn_model.learning_rate + weak_learner, + weight=sklearn_model.learning_rate, + task=generic_learner.Task.REGRESSION, ) ydf_model.add_tree(ydf_tree) return ydf_model @@ -241,14 +320,19 @@ def _( def convert_sklearn_tree_to_ydf_tree( sklearn_tree: ScikitLearnTree, + task: generic_learner.Task, weight: Optional[float] = None, + attribute_mapping: Optional[Sequence[int]] = None, ) -> tree_lib.Tree: """Converts a scikit-learn decision tree into a YDF tree. Args: sklearn_tree: a scikit-learn decision tree. + task: The task of the model. weight: an optional weight to apply to the values of the leaves in the tree. This is intended for use when converting gradient boosted tree models. + attribute_mapping: Index of the attributes used as input features for this + sktree. Returns: a YDF tree that has the same structure as the scikit-learn tree. @@ -260,10 +344,17 @@ def convert_sklearn_tree_to_ydf_tree( "Scikit-Learn model must be fit to data before converting." ) from e + if hasattr(sklearn_tree, "n_classes_") and sklearn_tree.n_outputs_ == 1: + pass # A classification model + elif sklearn_tree.n_outputs_ == 1: + pass # A regression model + else: + raise ValueError( + "This model type if not supported. `ydf.from_sklearn` only support" + " scalar regression, single-label classification and isolation forests." + ) + field_names = sklearn_tree_data["nodes"].dtype.names - task_type = _get_sklearn_tree_task_type(sklearn_tree) - if weight and task_type is TaskType.SINGLE_LABEL_CLASSIFICATION: - raise ValueError("weight should not be passed for classification trees.") nodes = [] # For each node @@ -278,19 +369,26 @@ def convert_sklearn_tree_to_ydf_tree( for field_name, field_value in zip(field_names, node_properties) } + common_kwargs = {"num_examples": node["weighted_n_node_samples"]} + # Add the node output value to the dictionary of properties. - if task_type is TaskType.SCALAR_REGRESSION: + if task == generic_learner.Task.REGRESSION: scaling_factor = weight if weight else 1.0 node["value"] = tree_lib.RegressionValue( - value=node_output[0][0] * scaling_factor, - num_examples=node["weighted_n_node_samples"], + value=node_output[0][0] * scaling_factor, **common_kwargs ) - elif task_type is TaskType.SINGLE_LABEL_CLASSIFICATION: + elif task == generic_learner.Task.CLASSIFICATION: # Normalise to probabilities if we have a classification tree. + assert weight is None probabilities = list(node_output[0] / node_output[0].sum()) node["value"] = tree_lib.ProbabilityValue( - probability=probabilities, - num_examples=node["weighted_n_node_samples"], + probability=probabilities, **common_kwargs + ) + + elif task == generic_learner.Task.ANOMALY_DETECTION: + assert weight is None + node["value"] = tree_lib.AnomalyDetectionValue( + num_examples_without_weight=node["n_node_samples"], **common_kwargs ) else: raise ValueError( @@ -303,23 +401,17 @@ def convert_sklearn_tree_to_ydf_tree( # The root node has index zero. node_index=0, nodes=nodes, + attribute_mapping=attribute_mapping, + task=task, ) return tree_lib.Tree(root_node) -def _get_sklearn_tree_task_type(sklearn_tree: ScikitLearnTree) -> TaskType: - """Finds the task type of a scikit learn tree.""" - if hasattr(sklearn_tree, "n_classes_") and sklearn_tree.n_outputs_ == 1: - return TaskType.SINGLE_LABEL_CLASSIFICATION - elif sklearn_tree.n_outputs_ == 1: - return TaskType.SCALAR_REGRESSION - else: - return TaskType.UNKNOWN - - def _convert_sklearn_node_to_ydf_node( node_index: int, + task: generic_learner.Task, nodes: List[Dict[str, Any]], + attribute_mapping: Optional[Sequence[int]], ) -> tree_lib.AbstractNode: """Converts a node within a scikit-learn tree into a YDF node.""" if node_index == -1: @@ -333,15 +425,26 @@ def _convert_sklearn_node_to_ydf_node( neg_child = _convert_sklearn_node_to_ydf_node( node_index=node["left_child"], + task=task, nodes=nodes, + attribute_mapping=attribute_mapping, ) pos_child = _convert_sklearn_node_to_ydf_node( node_index=node["right_child"], + task=task, nodes=nodes, + attribute_mapping=attribute_mapping, ) + + attribute = node["feature"] + if attribute_mapping: + attribute = attribute_mapping[attribute] + else: + attribute += _LABEL_COLUMN_OFFSET + return tree_lib.NonLeaf( condition=tree_lib.NumericalHigherThanCondition( - attribute=node["feature"] + _LABEL_COLUMN_OFFSET, + attribute=attribute, threshold=node["threshold"], missing=False, score=0.0, diff --git a/yggdrasil_decision_forests/port/python/ydf/model/generic_model.py b/yggdrasil_decision_forests/port/python/ydf/model/generic_model.py index a231aca1..90e86166 100644 --- a/yggdrasil_decision_forests/port/python/ydf/model/generic_model.py +++ b/yggdrasil_decision_forests/port/python/ydf/model/generic_model.py @@ -1095,7 +1095,11 @@ def force_engine(self, engine_name: Optional[str]) -> None: self._model.ForceEngine(engine_name) -def from_sklearn(sklearn_model: Any) -> GenericModel: +def from_sklearn( + sklearn_model: Any, + label_name: str = "label", + feature_name: str = "features", +) -> GenericModel: """Converts a tree-based scikit-learn model to a YDF model. Usage example: @@ -1129,17 +1133,28 @@ def from_sklearn(sklearn_model: Any) -> GenericModel: * sklearn.ensemble.ExtraTreesClassifier * sklearn.ensemble.ExtraTreesRegressor * sklearn.ensemble.GradientBoostingRegressor + * sklearn.ensemble.IsolationForest + + Unlike YDF, Scikit-learn does not name features and labels. Use the fields + `label_name` and `feature_name` to specify the name of the columns in the YDF + model. Additionally, only single-label classification and scalar regression are supported (e.g. multivariate regression models will not convert). Args: sklearn_model: the scikit-learn tree based model to be converted. + label_name: Name of the multi-dimensional feature in the output YDF model. + feature_name: Name of the label in the output YDF model. Returns: a YDF Model that emulates the provided scikit-learn model. """ - return _get_export_sklearn().from_sklearn(sklearn_model) + return _get_export_sklearn().from_sklearn( + sklearn_model=sklearn_model, + label_name=label_name, + feature_name=feature_name, + ) def _get_export_jax(): diff --git a/yggdrasil_decision_forests/port/python/ydf/model/sklearn_model_test.py b/yggdrasil_decision_forests/port/python/ydf/model/sklearn_model_test.py index d0a7941f..47416b6b 100644 --- a/yggdrasil_decision_forests/port/python/ydf/model/sklearn_model_test.py +++ b/yggdrasil_decision_forests/port/python/ydf/model/sklearn_model_test.py @@ -12,17 +12,52 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Tuple from absl.testing import absltest from absl.testing import parameterized import numpy as np from sklearn import datasets from sklearn import ensemble from sklearn import linear_model +from sklearn import metrics from sklearn import tree from ydf.model import export_sklearn from ydf.model.decision_forest_model import decision_forest_model +def gen_anomaly_detection_dataset( + n_samples: int = 120, + n_outliers: int = 40, + seed: int = 0, +) -> Tuple[np.ndarray, np.ndarray]: + """Generates a two-gaussians anomaly detection dataset. + + This function is similar to the example in: + https://scikit-learn.org/stable/auto_examples/ensemble/plot_isolation_forest.html + + Args: + n_samples: Number of samples to generate in each gaussian. + n_outliers: Number of outliers to generate. + seed: Seed to use for random number generation. + + Returns: + The features and labels for the dataset. + """ + np.random.seed(seed) + covariance = np.array([[0.5, -0.1], [0.7, 0.4]]) + cluster_1 = 0.4 * np.random.randn(n_samples, 2) @ covariance + np.array( + [2, 2] + ) + cluster_2 = 0.3 * np.random.randn(n_samples, 2) + np.array([-2, -2]) + outliers = np.random.uniform(low=-4, high=4, size=(n_outliers, 2)) + features = np.concatenate([cluster_1, cluster_2, outliers]) + labels = np.concatenate([ + np.zeros((2 * n_samples), dtype=bool), + np.ones((n_outliers), dtype=bool), + ]) + return features, labels + + class ScikitLearnModelConverterTest(parameterized.TestCase): @parameterized.parameters( @@ -104,6 +139,44 @@ def test_import_classification_model( ydf_predictions = ydf_model.predict(ydf_features) np.testing.assert_allclose(sklearn_predictions, ydf_predictions, rtol=1e-5) + def test_import_anomaly_detection_model( + self, + ): + train_features, _ = gen_anomaly_detection_dataset(seed=0) + test_features, test_labels = gen_anomaly_detection_dataset(seed=1) + + # Train isolation forest + sklearn_model = ensemble.IsolationForest(max_samples=100, random_state=0) + sklearn_model.fit(train_features) + + # Generate golden predictions + sklearn_predictions = -sklearn_model.score_samples(test_features) + # Note: This is different from "sklearn_model.predict" and + # "sklearn_model.decision_function". + + # Test quality of model + auc = metrics.roc_auc_score(test_labels, sklearn_predictions) + self.assertAlmostEqual(auc, 0.99333, delta=0.0001) + + ydf_model = export_sklearn.from_sklearn(sklearn_model) + self.assertSequenceEqual( + ydf_model.input_feature_names(), + [ + "features.0_of_2", + "features.1_of_2", + ], + ) + ydf_features = {"features": test_features} + ydf_predictions = ydf_model.predict(ydf_features) + + _ = ydf_model.describe("text") + _ = ydf_model.describe("html") + _ = ydf_model.analyze_prediction({"features": test_features[:1]}) + _ = ydf_model.analyze(ydf_features) + + # YDF Predictions match SKLearn predictions + np.testing.assert_allclose(sklearn_predictions, ydf_predictions, rtol=1e-5) + def test_import_raises_when_unrecognised_model_provided(self): features, labels = datasets.make_regression( n_samples=100, @@ -132,7 +205,7 @@ def test_import_raises_when_regression_target_is_multivariate(self): sklearn_model = tree.DecisionTreeRegressor().fit(features, labels) with self.assertRaisesRegex( ValueError, - "Only scalar regression and single-label classification are supported.", + "This model type if not supported", ): _ = export_sklearn.from_sklearn(sklearn_model) @@ -149,7 +222,7 @@ def test_import_raises_when_classification_target_is_multilabel( sklearn_model = tree.DecisionTreeClassifier().fit(features, labels) with self.assertRaisesRegex( ValueError, - "Only scalar regression and single-label classification are supported.", + "This model type if not supported", ): _ = export_sklearn.from_sklearn(sklearn_model) From aa9fcdd7084ff77ad98132218a94ee6234471760 Mon Sep 17 00:00:00 2001 From: Mathieu Guillame-Bert Date: Mon, 17 Jun 2024 12:12:11 -0700 Subject: [PATCH 27/30] Improve error messages when feeding the wrong type of datasets. PiperOrigin-RevId: 644092333 --- .../port/python/CHANGELOG.md | 1 + .../port/python/ydf/dataset/dataset_test.py | 12 +++++++++ .../port/python/ydf/dataset/io/dataset_io.py | 10 ++++++-- .../python/ydf/dataset/io/dataset_io_types.py | 25 ++++++++++++++----- .../port/python/ydf/dataset/io/pandas_io.py | 4 +++ .../python/ydf/learner/generic_learner.py | 3 +++ .../port/python/ydf/learner/learner_test.py | 4 +++ 7 files changed, 51 insertions(+), 8 deletions(-) diff --git a/yggdrasil_decision_forests/port/python/CHANGELOG.md b/yggdrasil_decision_forests/port/python/CHANGELOG.md index 5339e1e0..b41d1211 100644 --- a/yggdrasil_decision_forests/port/python/CHANGELOG.md +++ b/yggdrasil_decision_forests/port/python/CHANGELOG.md @@ -8,6 +8,7 @@ - Add `verbose` argument to `train` method which is equivalent but sometime more convenient than`ydf.verbose`. - Add SKLearn to YDF model converter: `ydf.from_sklearn`. +- Improve error messages when calling the model with non supported data. ### Fix diff --git a/yggdrasil_decision_forests/port/python/ydf/dataset/dataset_test.py b/yggdrasil_decision_forests/port/python/ydf/dataset/dataset_test.py index a5f9aa3f..b7f8e131 100644 --- a/yggdrasil_decision_forests/port/python/ydf/dataset/dataset_test.py +++ b/yggdrasil_decision_forests/port/python/ydf/dataset/dataset_test.py @@ -1172,6 +1172,18 @@ def test_look_numerical(self, value: str): def test_does_not_look_numerical(self, value: str): self.assertFalse(dataset.look_numerical(value)) + def test_from_numpy(self): + with self.assertRaisesRegex( + ValueError, "Numpy arrays cannot be fed directly" + ): + dataset.create_vertical_dataset(np.array([1, 2, 3])) + + def test_from_column_less_pandas(self): + with self.assertRaisesRegex( + ValueError, "The pandas DataFrame must have string column names" + ): + dataset.create_vertical_dataset(pd.DataFrame([[1, 2, 3], [4, 5, 6]])) + class CategoricalSetTest(absltest.TestCase): diff --git a/yggdrasil_decision_forests/port/python/ydf/dataset/io/dataset_io.py b/yggdrasil_decision_forests/port/python/ydf/dataset/io/dataset_io.py index 92e3babf..76f6994b 100644 --- a/yggdrasil_decision_forests/port/python/ydf/dataset/io/dataset_io.py +++ b/yggdrasil_decision_forests/port/python/ydf/dataset/io/dataset_io.py @@ -166,7 +166,13 @@ def cast_input_dataset_to_dict( return _unroll_dict(data, **unroll_dict_kwargs) # TODO: Maybe this error should be raised at a layer above this one? + raise ValueError( - "Cannot import dataset from" - f" {type(data)}.\n{dataset_io_types.SUPPORTED_INPUT_DATA_DESCRIPTION}" + "Non supported dataset type: " + f"{type(data)}\n\n{dataset_io_types.SUPPORTED_INPUT_DATA_DESCRIPTION}" + + ( + dataset_io_types.HOW_TO_FEED_NUMPY + if isinstance(data, np.ndarray) + else "" + ) ) diff --git a/yggdrasil_decision_forests/port/python/ydf/dataset/io/dataset_io_types.py b/yggdrasil_decision_forests/port/python/ydf/dataset/io/dataset_io_types.py index 35a6ed6e..7234be50 100644 --- a/yggdrasil_decision_forests/port/python/ydf/dataset/io/dataset_io_types.py +++ b/yggdrasil_decision_forests/port/python/ydf/dataset/io/dataset_io_types.py @@ -37,13 +37,26 @@ # Supported types of datasets. IODataset = Union[Dict[str, InputValues], "pd.DataFrame", str, Sequence[str]] +HOW_TO_FEED_NUMPY = """ +Numpy arrays cannot be fed directly. Instead, feed them in a dictionary i.e. +Instead of: + model.predict(np.array([[1,2],[3,4]])) +Do: + model.predict({"features":np.array([[1,2],[3,4]])}) +""" + SUPPORTED_INPUT_DATA_DESCRIPTION = """\ A dataset can be one of the following: -- A Pandas DataFrame. -- A dictionary of column names (str) to values. Values can be lists of int, float, bool, str or bytes. Values can also be Numpy arrays. -- A YDF VerticalDataset -- A TensorFlow Batched Dataset. -- A typed (possibly sharded) path to a CSV file (e.g. csv:mydata). -- A list of typed paths (e.g. ["csv:mydata1", "csv:mydata2"]). + 1. A dictionary of string (column names) to column values. The values of a column can be a list of int, float, bool, str, bytes, or a numpy array. A 2D numpy array is treated as a multi-dimensional column. + 2. A Pandas DataFrame. + 3. A YDF VerticalDataset created with `ydf.create_vertical_dataset`. This option is the most efficient when the same dataset is used multiple times. + 4. A batched TensorFlow Dataset. + 5. A typed path to a csv file e.g. "csv:/tmp/dataset.csv". See supported types below. The path can be sharded (e.g. "csv:/tmp/dataset@10") or globbed ("csv:/tmp/dataset*"). + 6. A list of typed paths e.g. ["csv:/tmp/data1.csv", "csv:/tmp/data2.csv"]. See supported types below. + +The supported file formats and corresponding prefixes are: + - CSV file. prefix 'csv:' + - Non-compressed TFRecord of Tensorflow Examples. prefix 'tfrecordv2+tfe:' + - Compressed TFRecord of Tensorflow Examples. prefix 'tfrecord+tfe:'; not available in default public build. """ diff --git a/yggdrasil_decision_forests/port/python/ydf/dataset/io/pandas_io.py b/yggdrasil_decision_forests/port/python/ydf/dataset/io/pandas_io.py index 80f43143..7e6ea031 100644 --- a/yggdrasil_decision_forests/port/python/ydf/dataset/io/pandas_io.py +++ b/yggdrasil_decision_forests/port/python/ydf/dataset/io/pandas_io.py @@ -53,6 +53,10 @@ def to_dict( raise ValueError("The pandas DataFrame must be two-dimensional.") data_dict = data.to_dict("series") + for k in data_dict: + if not isinstance(k, str): + raise ValueError("The pandas DataFrame must have string column names.") + def clean(values): if values.dtype == "object": return values.to_numpy(copy=False, na_value="") diff --git a/yggdrasil_decision_forests/port/python/ydf/learner/generic_learner.py b/yggdrasil_decision_forests/port/python/ydf/learner/generic_learner.py index 11c8e3be..b6cff606 100644 --- a/yggdrasil_decision_forests/port/python/ydf/learner/generic_learner.py +++ b/yggdrasil_decision_forests/port/python/ydf/learner/generic_learner.py @@ -77,8 +77,11 @@ def __init__( self._deployment_config = deployment_config self._tuner = tuner + if self._label is not None and not isinstance(label, str): + raise ValueError("The 'label' should be a string") if task != Task.ANOMALY_DETECTION and not self._label: raise ValueError("Constructing the learner requires a non-empty label.") + if self._ranking_group is not None and task != Task.RANKING: raise ValueError( "The ranking group should only be specified for ranking tasks." diff --git a/yggdrasil_decision_forests/port/python/ydf/learner/learner_test.py b/yggdrasil_decision_forests/port/python/ydf/learner/learner_test.py index 5c0b64fc..87678d1f 100644 --- a/yggdrasil_decision_forests/port/python/ydf/learner/learner_test.py +++ b/yggdrasil_decision_forests/port/python/ydf/learner/learner_test.py @@ -676,6 +676,10 @@ def test_warning_categorical_numerical(self): ) _ = learner.train(ds) + def test_label_is_dataset(self): + with self.assertRaisesRegex(ValueError, "should be a string"): + _ = specialized_learners.RandomForestLearner(label=np.array([1, 0])) # pytype: disable=wrong-arg-types + class CARTLearnerTest(LearnerTest): From 78190b2681e760743d266f70a8edb4aa0fb205e0 Mon Sep 17 00:00:00 2001 From: Mathieu Guillame-Bert Date: Mon, 17 Jun 2024 12:15:27 -0700 Subject: [PATCH 28/30] Anomaly detection; Surface the isolation forest learner and model in the python API (part 9) PiperOrigin-RevId: 644093348 --- documentation/public/docs/py_api/IsolationForestLearner | 3 +++ documentation/public/docs/py_api/IsolationForestModel.md | 3 +++ documentation/public/docs/py_api/index.md | 2 ++ yggdrasil_decision_forests/port/python/CHANGELOG.md | 1 + yggdrasil_decision_forests/port/python/ydf/BUILD | 3 +++ yggdrasil_decision_forests/port/python/ydf/__init__.py | 2 ++ 6 files changed, 14 insertions(+) create mode 100644 documentation/public/docs/py_api/IsolationForestLearner create mode 100644 documentation/public/docs/py_api/IsolationForestModel.md diff --git a/documentation/public/docs/py_api/IsolationForestLearner b/documentation/public/docs/py_api/IsolationForestLearner new file mode 100644 index 00000000..86d7359f --- /dev/null +++ b/documentation/public/docs/py_api/IsolationForestLearner @@ -0,0 +1,3 @@ +[TOC] + +::: ydf.IsolationForestLearner diff --git a/documentation/public/docs/py_api/IsolationForestModel.md b/documentation/public/docs/py_api/IsolationForestModel.md new file mode 100644 index 00000000..817c5097 --- /dev/null +++ b/documentation/public/docs/py_api/IsolationForestModel.md @@ -0,0 +1,3 @@ +[TOC] + +::: ydf.IsolationForestModel diff --git a/documentation/public/docs/py_api/index.md b/documentation/public/docs/py_api/index.md index f81bf83c..9366ea66 100644 --- a/documentation/public/docs/py_api/index.md +++ b/documentation/public/docs/py_api/index.md @@ -13,6 +13,7 @@ A **Learner** trains models and can be cross-validated. - [DecisionTreeLearner](CartLearner.md): Alias to [CartLearner](CartLearner.md). - [DistributedGradientBoostedTreesLearner](DistributedGradientBoostedTreesLearner.md) +- [IsolationForestLearner](IsolationForestLearner.md) All learners derive from [GenericLearner](GenericLearner.md). @@ -29,6 +30,7 @@ arguments of learner classes. - [RandomForestModel](RandomForestModel.md) - [CARTModel](RandomForestModel.md): Alias to [RandomForestModel](RandomForestModel.md). +- [IsolationForestModel](IsolationForestModel.md) All models derive from [GenericModel](GenericModel.md). diff --git a/yggdrasil_decision_forests/port/python/CHANGELOG.md b/yggdrasil_decision_forests/port/python/CHANGELOG.md index b41d1211..77889aeb 100644 --- a/yggdrasil_decision_forests/port/python/CHANGELOG.md +++ b/yggdrasil_decision_forests/port/python/CHANGELOG.md @@ -4,6 +4,7 @@ ### Feature +- Add support for Isolation Forests model. - Add `max_depth` argument to `model.print_tree`. - Add `verbose` argument to `train` method which is equivalent but sometime more convenient than`ydf.verbose`. diff --git a/yggdrasil_decision_forests/port/python/ydf/BUILD b/yggdrasil_decision_forests/port/python/ydf/BUILD index 1c910007..f53318e5 100644 --- a/yggdrasil_decision_forests/port/python/ydf/BUILD +++ b/yggdrasil_decision_forests/port/python/ydf/BUILD @@ -28,6 +28,7 @@ py_library( "//ydf/model:model_lib", "//ydf/model:model_metadata", "//ydf/model/gradient_boosted_trees_model", + "//ydf/model/isolation_forest_model", "//ydf/model/random_forest_model", "//ydf/model/tree:all", "//ydf/utils:log", @@ -56,6 +57,7 @@ py_library( "@ydf_cc//yggdrasil_decision_forests/learner/gradient_boosted_trees/early_stopping:early_stopping_snapshot_py_proto", "@ydf_cc//yggdrasil_decision_forests/learner/hyperparameters_optimizer:hyperparameters_optimizer_py_proto", "@ydf_cc//yggdrasil_decision_forests/learner/hyperparameters_optimizer/optimizers:random_py_proto", + "@ydf_cc//yggdrasil_decision_forests/learner/isolation_forest:isolation_forest_py_proto", "@ydf_cc//yggdrasil_decision_forests/learner/multitasker:multitasker_py_proto", "@ydf_cc//yggdrasil_decision_forests/learner/random_forest:random_forest_py_proto", "@ydf_cc//yggdrasil_decision_forests/metric:metric_py_proto", @@ -64,6 +66,7 @@ py_library( "@ydf_cc//yggdrasil_decision_forests/model:prediction_py_proto", "@ydf_cc//yggdrasil_decision_forests/model/decision_tree:decision_tree_py_proto", "@ydf_cc//yggdrasil_decision_forests/model/gradient_boosted_trees:gradient_boosted_trees_py_proto", + "@ydf_cc//yggdrasil_decision_forests/model/isolation_forest:isolation_forest_py_proto", "@ydf_cc//yggdrasil_decision_forests/model/multitasker:multitasker_py_proto", "@ydf_cc//yggdrasil_decision_forests/model/random_forest:random_forest_py_proto", "@ydf_cc//yggdrasil_decision_forests/serving:serving_py_proto", diff --git a/yggdrasil_decision_forests/port/python/ydf/__init__.py b/yggdrasil_decision_forests/port/python/ydf/__init__.py index d87a56b3..1ef30a05 100644 --- a/yggdrasil_decision_forests/port/python/ydf/__init__.py +++ b/yggdrasil_decision_forests/port/python/ydf/__init__.py @@ -43,6 +43,7 @@ def _check_install(): from ydf.learner.specialized_learners import RandomForestLearner from ydf.learner.specialized_learners import GradientBoostedTreesLearner from ydf.learner.specialized_learners import DistributedGradientBoostedTreesLearner +from ydf.learner.specialized_learners import IsolationForestLearner DecisionTreeLearner = CartLearner @@ -50,6 +51,7 @@ def _check_install(): from ydf.model.generic_model import GenericModel from ydf.model.random_forest_model.random_forest_model import RandomForestModel from ydf.model.gradient_boosted_trees_model.gradient_boosted_trees_model import GradientBoostedTreesModel +from ydf.model.isolation_forest_model.isolation_forest_model import IsolationForestModel # A CART model is a Random Forest with a single tree CARTModel = RandomForestModel From a1f70810215f33f09ceaff05585f0896d63b7def Mon Sep 17 00:00:00 2001 From: Mathieu Guillame-Bert Date: Mon, 17 Jun 2024 12:19:16 -0700 Subject: [PATCH 29/30] Anomaly detection; Anomaly detection tutorial (part 10) PiperOrigin-RevId: 644094544 --- .../docs/tutorial/anomaly_detection.ipynb | 3858 +++++++++++++++++ documentation/public/mkdocs.yml | 1 + 2 files changed, 3859 insertions(+) create mode 100644 documentation/public/docs/tutorial/anomaly_detection.ipynb diff --git a/documentation/public/docs/tutorial/anomaly_detection.ipynb b/documentation/public/docs/tutorial/anomaly_detection.ipynb new file mode 100644 index 00000000..337ccfb4 --- /dev/null +++ b/documentation/public/docs/tutorial/anomaly_detection.ipynb @@ -0,0 +1,3858 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Anomaly detection\n", + "[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/google/yggdrasil-decision-forests/blob/main/documentation/public/docs/tutorial/anomaly_detection.ipynb)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pip install ydf ucimlrepo scikit-learn umap-learn plotly -U -q" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## What is anomaly detection?\n", + "\n", + "**Anomaly detection** techniques are non-supervised learning algorithms for identifying rare and unusual patterns in data that deviate significantly from the norm.\n", + "For example, anomaly detection can be used for fraud detection, network intrusion detection, and fault diagnosis, without the need for defining of abnormal instances.\n", + "\n", + "Anomaly detection with decision forests is a straightforward but effective technique for tabular data. The model assigns an anomaly score to each data point, ranging from 0 (normal) to 1 (abnormal). Decision forests also offer interpretability tools and properties, making it easier to understand and characterize detected anomalies.\n", + "\n", + "In anomaly detection, labeled examples are used not for training but for evaluating the model. These labels ensure that the model can detect known anomalies.\n", + "\n", + "\n", + "We train and evaluate two anomaly detection models on the UCI Covertype dataset, which describes forest cover types and other geographic attributes of land cells. The first model is trained on pine and willow data. Given that willow is rarer than pine, the model differentiates between them without labels. This first model will then be interpreted and characterize what constitute a pine cover type." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/gbm/my_venv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n", + "2024-06-17 13:06:13.648825: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2024-06-17 13:06:14.292005: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" + ] + } + ], + "source": [ + "# Load libraries\n", + "import ydf # For learning the anomaly detection model\n", + "import pandas as pd # We use Pandas to load small datasets\n", + "from sklearn import metrics # Use sklearn to compute AUC\n", + "from ucimlrepo import fetch_ucirepo # To download the dataset\n", + "import matplotlib.pyplot as plt # For plotting\n", + "import seaborn as sns # For plotting\n", + "import umap # For projecting distances in 2d\n", + "\n", + "# For interactive plots\n", + "import plotly.graph_objs as go\n", + "from plotly.offline import iplot\n", + "import plotly.io as pio\n", + "pio.renderers.default=\"colab\"\n", + "\n", + "# Disable Pandas warnings\n", + "pd.options.mode.chained_assignment = None" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We download the Covertype dataset from UCI." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prepare dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# https://archive.ics.uci.edu/dataset/31/covertype\n", + "covertype_repo = fetch_ucirepo(id=31)\n", + "raw_dataset = pd.concat([covertype_repo.data.features, covertype_repo.data.targets], axis=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Select the columns of interest and clean the labels." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ElevationAspectSlopeHorizontal_Distance_To_HydrologyVertical_Distance_To_HydrologyHorizontal_Distance_To_RoadwaysHillshade_9amHillshade_NoonHillshade_3pmHorizontal_Distance_To_Fire_PointsCover_Type
0259651325805102212321486279Aspen
12590562212-63902202351516225Aspen
2280413992686531802342381356121Lodgepole Pine
327851551824211830902382381226211Lodgepole Pine
42595452153-13912202341506172Aspen
\n", + "
" + ], + "text/plain": [ + " Elevation Aspect Slope Horizontal_Distance_To_Hydrology \\\n", + "0 2596 51 3 258 \n", + "1 2590 56 2 212 \n", + "2 2804 139 9 268 \n", + "3 2785 155 18 242 \n", + "4 2595 45 2 153 \n", + "\n", + " Vertical_Distance_To_Hydrology Horizontal_Distance_To_Roadways \\\n", + "0 0 510 \n", + "1 -6 390 \n", + "2 65 3180 \n", + "3 118 3090 \n", + "4 -1 391 \n", + "\n", + " Hillshade_9am Hillshade_Noon Hillshade_3pm \\\n", + "0 221 232 148 \n", + "1 220 235 151 \n", + "2 234 238 135 \n", + "3 238 238 122 \n", + "4 220 234 150 \n", + "\n", + " Horizontal_Distance_To_Fire_Points Cover_Type \n", + "0 6279 Aspen \n", + "1 6225 Aspen \n", + "2 6121 Lodgepole Pine \n", + "3 6211 Lodgepole Pine \n", + "4 6172 Aspen " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset = raw_dataset.copy()\n", + "\n", + "# Features of interest\n", + "features = [\"Elevation\", \"Aspect\", \"Slope\", \"Horizontal_Distance_To_Hydrology\",\n", + " \"Vertical_Distance_To_Hydrology\", \"Horizontal_Distance_To_Roadways\",\n", + " \"Hillshade_9am\", \"Hillshade_Noon\", \"Hillshade_3pm\",\n", + " \"Horizontal_Distance_To_Fire_Points\"]\n", + "dataset = dataset[features + [\"Cover_Type\"]]\n", + "\n", + "# Covert type as text\n", + "dataset[\"Cover_Type\"] = dataset[\"Cover_Type\"].map({\n", + " 1: \"Spruce/Fir\",\n", + " 2: \"Lodgepole Pine\",\n", + " 3: \"Ponderosa Pine\",\n", + " 4: \"Cottonwood/Willow\",\n", + " 5: \"Aspen\",\n", + " 6: \"Douglas-fir\",\n", + " 7: \"Krummholz\"\n", + "})\n", + "\n", + "dataset.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The first model is trained on the \"filtered dataset\" than only contain spruce/fir and cottonwood/willow examples." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "filtered_dataset = dataset[dataset[\"Cover_Type\"].isin([\"Spruce/Fir\", \"Cottonwood/Willow\"])]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As you can see, the spruce/fir cover is much more common than the cottonwood/willow cover:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Cover_Type\n", + "Spruce/Fir 211840\n", + "Cottonwood/Willow 2747\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "filtered_dataset[\"Cover_Type\"].value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We train a popular anomaly detection decision forest algorithm called isolation forest." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Anomaly detection model" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Train model on 214587 examples\n", + "Model trained in 0:00:00.074241\n" + ] + } + ], + "source": [ + "model = ydf.IsolationForestLearner(features=features).train(filtered_dataset)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can then generate \"predictions\" i.e. anomaly scores." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0.57844853, 0.609949 , 0.5433627 , 0.6099571 , 0.48067462],\n", + " dtype=float32)" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "predictions = model.predict(filtered_dataset)\n", + "predictions[:5]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, we plot the model anomaly score's distribution for spruce/fir and cottonwood/willow cover. We se than both distributions are \"separated\", indicating the model's ability to differentiate between the two covers.\n", + "\n", + "**Note:** It's important to note that since cottonwood/willow cover is less frequent, the two distributions are normalized separately. Otherwise, the cottonwood/willow distribution would appear flat." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjIAAAGyCAYAAAD+lC4cAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAACGhklEQVR4nO3dd3iT9frH8XeS7t1CF1DassveyAZBGbLEAYoKLjyKR5xHPf5wH8GBoCi4wY2KgqiIAymyd9lQRqEttAW698r398fTBiqrI+2TpPfrunLlaebnIYXcfKdBKaUQQgghhLBDRr0DCCGEEEJUlxQyQgghhLBbUsgIIYQQwm5JISOEEEIIuyWFjBBCCCHslhQyQgghhLBbUsgIIYQQwm5JISOEEEIIuyWFjBBCCCHslpPeAWqb2Wzm1KlTeHt7YzAY9I4jhBBCiEpQSpGdnU2jRo0wGi/T7qJ0tGbNGjVq1CgVGhqqALV06VLLfUVFReo///mPat++vfLw8FChoaHq9ttvVydPnqzSeyQkJChALnKRi1zkIhe52OElISHhst/zurbI5Obm0qlTJ+666y7Gjx9f4b68vDx27NjBjBkz6NSpE+np6UyfPp0xY8awbdu2Sr+Ht7c3AAkJCfj4+Fg1vxBCCCFqR1ZWFmFhYZbv8UsxKGUbm0YaDAaWLl3KuHHjLvmYrVu30rNnT06cOEHTpk0r9bpZWVn4+vqSmZkphYwQQghhJyr7/W1XY2QyMzMxGAz4+fld8jGFhYUUFhZafs7KyqqDZEIIIYTQg93MWiooKODJJ5/klltuuWxlNnPmTHx9fS2XsLCwOkwphBBCiLpkF4VMcXExN998M0opFixYcNnHPv3002RmZlouCQkJdZRSCCGEEHXN5ruWyouYEydO8Ndff11xnIurqyuurq51lE4IIeqO2WymqKhI7xhCWIWzszMmk6nGr2PThUx5EXP48GFWr15NgwYN9I4khBC6KCoqIi4uDrPZrHcUIazGz8+PkJCQGq3zpmshk5OTw5EjRyw/x8XFERMTQ0BAAKGhodx4443s2LGDn3/+mdLSUpKTkwEICAjAxcVFr9hCCFGnlFIkJSVhMpkICwu7/OJgQtgBpRR5eXmcPn0agNDQ0Gq/lq7Tr6Ojoxk8ePAFt0+ePJnnn3+eyMjIiz5v9erVDBo0qFLvIdOvhRD2rri4mCNHjtCoUSN8fX31jiOE1aSmpnL69GlatWp1QTeTXUy/HjRoEJero2xkiRshhNBVaWkpgLREC4fj4eEBaMV6dcfLSPukEELYCdkvTjgaa/xOSyEjhBBCCLslhYwQQghhBREREcydO1fvGPWOFDJCCCFqzZkzZ7j//vtp2rQprq6uhISEMGzYMNavX693tEr79NNP6devH6CN7TQYDBdcSkpK2Lp1K1OnTtU5bf1j0+vICCGEsG833HADRUVFfPrppzRr1oyUlBRWrVpFampqtV+zuLgYZ2dnK6a8vB9//JExY8ZYfr733nt58cUXKzzGycmJwMDAy75OXeeuL6RFRghRf2Unw7E1kJ+hdxKHlJGRwdq1a3n11VcZPHgw4eHh9OzZk6efftpSGBgMBhYsWMCIESNwd3enWbNmLFmyxPIax48fx2Aw8M033zBw4EDc3Nz48ssvef755+ncuXOF95s7dy4REREVbvvkk09o164drq6uhIaG8uCDD1bId8899xAYGIiPjw9XX301u3btqvD8goICfv/99wqFjIeHByEhIRUucGHXUvm5jRkzBk9PT/73v//V5I9TXIIUMkKI+ic/Az4dA7Nbw2djYE57WPUSmEv1TlYpSinyikp0uVRlWQwvLy+8vLxYtmwZhYWFl3zcjBkzuOGGG9i1axeTJk1i4sSJHDhwoMJjnnrqKaZPn86BAwcYNmxYpd5/wYIFTJs2jalTp7Jnzx6WL19OixYtLPffdNNNnD59ml9//ZXt27fTtWtXhgwZQlpamuUxq1atonHjxrRp06bS532+559/nuuvv549e/Zw1113Ves1xOVJ15IQon7JS4PPx0HSLsAAXkGQkwJr34D8NLjuTbDxac75xaW0ffY3Xd57/4vD8HCp3FeHk5MTixYt4t577+W9996ja9euDBw4kIkTJ9KxY0fL42666SbuueceAF566SX++OMP5s2bx/z58y2Pefjhhxk/fnyVsr788ss89thjTJ8+3XJbjx49AFi3bh1btmzh9OnTlv353njjDZYtW8aSJUssY13+2a0EMH/+fD766CPLz/fddx+zZ8++aIZbb72VO++8s0q5RdVIi4wQon5Z+bRWxHg0hH+thccOwbgFgAG2fQIb5umd0KHccMMNnDp1iuXLlzN8+HCio6Pp2rUrixYtsjymd+/eFZ7Tu3fvC1pkunfvXqX3PX36NKdOnWLIkCEXvX/Xrl3k5OTQoEEDS8uRl5cXcXFxHD16FNBavn766acLCplJkyYRExNjuTz99NOXzFHV3KLqpEVGCFF/JG6D3Yu141u/gZAO2nHnW6EwB359Alb/D9qOBf9w/XJegbuzif0vVq57pTbeu6rc3Ny45ppruOaaa5gxYwb33HMPzz33HFOmTKn0a3h6elb42Wg0XtDNVVxcfC6nu/tlXy8nJ4fQ0FCio6MvuM/Pzw+ALVu2UFJSQp8+fSrc7+vrW6GLqiq5hfVJi4wQon5QClY+pR13uhWa/ON/yj3vhcgBUFIAv/237vNVgcFgwMPFSZeLNVZibdu2Lbm5uZafN23aVOH+TZs2ERUVddnXCAwMJDk5uUIxExMTYzn29vYmIiKCVatWXfT5Xbt2JTk5GScnJ1q0aFHh0rBhQ0DrVrruuuuqvXS+qBtSyAgh6oeEzZC4FZzcYcizF95vMMCI18BggoM/w3H7WefEVqWmpnL11VfzxRdfsHv3buLi4vjuu+947bXXGDt2rOVx3333HZ988gmxsbE899xzbNmypcLsoosZNGgQZ86c4bXXXuPo0aO8++67/PrrrxUe8/zzzzN79mzefvttDh8+zI4dO5g3T+s6HDp0KL1792bcuHH8/vvvHD9+nA0bNvDMM8+wbds2AJYvX35Bt5KwPVLICCHqh+2fatftx4NP6MUfExQFXW/Xjte/VTe5HJiXlxe9evVizpw5DBgwgPbt2zNjxgzuvfde3nnnHcvjXnjhBRYvXkzHjh357LPP+Prrr2nbtu1lXzsqKor58+fz7rvv0qlTJ7Zs2cLjjz9e4TGTJ09m7ty5zJ8/n3bt2jFq1CgOHz4MaK1aK1asYMCAAdx55520atWKiRMncuLECYKDgzl69ChHjhyp9AwpoR+DcvAtpiu7DbgQwoHlZ8DsNlCSD3f/AWE9L/3Y1KMwrxug4IHNEFS9abfWVFBQQFxcHJGRkbi5uekdx6oMBgNLly5l3Lhxekep4M033+TPP/9kxYoVekdxaJf73a7s97e0yAghHN/eJVoRExgFTXpc/rENmkPUKO1YZjDVW02aNLnsbCRhO6SQEUI4vn3LtOsukyq3Rkzvf2vXe7+HgsxaiyVs180330z//v31jiEqQaZfCyEcW34GxG/UjttcV7nnhPWEwDZw5iDs/QG6y4JmtcXBRzeIOiAtMkIIx3bkTzCXQMPWENCscs8xGKDLbdrxzs9rL5sQosakkBFCOLbYldp16+FVe17HiWB0gpPbIWW/9XMJIaxCChkhhOMqLYHDf2jHrUZU7blegdCybOrt3iWXf6wQQjdSyAghHFdSDBRkgJvf5adcX0r7sk0K9/6grQwshLA5UsgIIRzX8XXadXhfMFZjmfnWI8DZA9LjtKJICGFzpJARQjiuExu06/A+l3/cpbh4Qqvy7qUfrJNJCGFVUsgIIRyTufTctOuIvtV/nXZl3Uv7l0n3khA2SAoZIYRjStkLhVng4g3BHar/Oi2GgpMbZMTDaZm9JOpOREQEc+fOrfXXNRgMLFu2DIDjx49jMBgq7CRu66SQEUI4pvLdq5teBaYarP3p4gHNBmvHh2TfnepITk7m3//+N82aNcPV1ZWwsDBGjx7NqlWrKvX8559/ns6dO19w+/lfwPVJZGQkP//8M87OzixevLjCfRMnTsRgMHD8+PEKt0dERDBjxgwAtm7dytSpU+sqbq2TQkYI4ZjKu5WqOz7mfK3Lpm4f+rXmr1XPHD9+nG7duvHXX3/x+uuvs2fPHlauXMngwYOZNm2a3vHszu7du0lPT2fYsGF0796d6OjoCvdHR0cTFhZW4fa4uDhOnDjB1VdfDUBgYCAeHh51mLp2SSEjhHBMp3Zq10261/y1WpUtpndyO2Qn1/z1akopKMrV51LFcUIPPPAABoOBLVu2cMMNN9CqVSvatWvHo48+yqZNmwCIj49n7NixeHl54ePjw80330xKSgoAixYt4oUXXmDXrl0YDAYMBgOLFi0iIiICgOuvvx6DwWD5GWDBggU0b94cFxcXWrduzeefV1yd2WAw8NFHH3H99dfj4eFBy5YtWb58ueX+7t2788Ybb1h+HjduHM7OzuTk5ACQmJiIwWDgyJEjAKSnp3PHHXfg7++Ph4cHI0aM4PDhwxXe8/vvv6ddu3a4uroSERHB7NmzK9x/+vRpRo8ejbu7O5GRkXz55ZcX/fP88ccfGT58OM7OzgwePLhCwXLgwAEKCgq4//77K9weHR2Nq6srvXv3BqreZbVmzRp69uyJq6sroaGhPPXUU5SUlADw888/4+fnR2lpKQAxMTEYDAaeeuopy/Pvuecebrvttkq/X1XJXktCCMeTcwYyEwADhHau+et5B0Pj7nBym7ZScLcpNX/NmijOg1ca6fPe/z2lzeaqhLS0NFauXMn//vc/PD0vfI6fnx9ms9lSxKxZs4aSkhKmTZvGhAkTiI6OZsKECezdu5eVK1fy559/AuDr68t1111HUFAQCxcuZPjw4ZhM2vT6pUuXMn36dObOncvQoUP5+eefufPOO2nSpAmDBw+2vPcLL7zAa6+9xuuvv868efOYNGkSJ06cICAggIEDBxIdHc3jjz+OUoq1a9fi5+fHunXrGD58OGvWrKFx48a0aNECgClTpnD48GGWL1+Oj48PTz75JCNHjmT//v04Ozuzfft2br75Zp5//nkmTJjAhg0beOCBB2jQoAFTpkyxvMapU6dYvXo1zs7OPPTQQ5w+ffqCP7Ply5fz6KOPAjB48GBmzpxJUlISoaGhrF69mn79+nH11Vfz/vvvW56zevVqevfujZubW6U+t/OdPHmSkSNHMmXKFD777DMOHjzIvffei5ubG88//zz9+/cnOzubnTt30r17d9asWUPDhg0rFFJr1qzhySefrPJ7V5a0yAghHM+pHdp1w5bg5mOd15TupSo7cuQISinatGlzycesWrWKPXv28NVXX9GtWzd69erFZ599xpo1a9i6dSvu7u54eXnh5ORESEgIISEhuLu7ExgYCGjFUEhIiOXnN954gylTpvDAAw/QqlUrHn30UcaPH1+hhQW0wuGWW26hRYsWvPLKK+Tk5LBlyxYABg0axLp16ygtLWX37t24uLgwadIky5dzdHQ0AwcOBLAUMB999BH9+/enU6dOfPnll5w8edIyfufNN99kyJAhzJgxg1atWjFlyhQefPBBXn/9dQBiY2P59ddf+fDDD7nqqqvo1q0bH3/8Mfn5+RUynzx5kt27dzNihPa72LdvX1xcXC7I1a1bN86ePUtcXBygFRLnF3FVMX/+fMLCwnjnnXdo06YN48aN44UXXmD27NmYzWZ8fX3p3LlzhQyPPPIIO3fuJCcnh5MnT3LkyBHLn1dtkBYZIYTjOVlWyDTqar3XbD0S/noJjkVrXSyVbJWoFc4eWsuIXu9dSZXZ2frAgQOEhYURFhZmua1t27b4+flx4MABevToUaV4Bw4cuGAga9++fXnrrbcq3NaxY0fLsaenJz4+PpYWkPNbGTZs2MDAgQMZNGgQs2bNArTC4IknnrC8n5OTE7169bK8XoMGDWjdujUHDhywPGbs2LEXZJo7dy6lpaWW1+jWrZvl/jZt2uDn51fhOcuXL6dfv36W2z08POjRowfR0dHccsstllxOTk706dOH6OholFLEx8dXu5A5cOAAvXv3xmAwVMiek5NDYmIiTZs2tbRgPfbYY6xdu5aZM2fy7bffsm7dOtLS0mjUqBEtW7as1vtXhhQyQgjHU94i09iKhUxQFPiFQ8YJOLoaokZZ77WrymDQt5CqpJYtW2IwGDh48KDeUS7g7Oxc4WeDwYDZbAa0Vp5OnToRHR3Nxo0bueaaaxgwYAATJkwgNjaWw4cP12oLw6UsX76cMWPGVLht8ODBfPPNN+zbt4/8/Hy6dtV+5wcOHMjq1asxm814eHhUKLSsbdCgQXzyySfs2rULZ2dn2rRpw6BBg4iOjiY9Pb3W/6yka0kI4ViUOjfQ15otMgaD1ioD0r1USQEBAQwbNox3332X3NzcC+7PyMggKiqKhIQEEhISLLfv37+fjIwM2rZtC4CLi4tlMOn5nJ2dL7g9KiqK9evXV7ht/fr1lteqrPJC4O+//2bQoEEEBAQQFRXF//73P0JDQ2nVqpXl/UpKSti8ebPluampqRw6dMjynpfK1KpVK0wmE23atKGkpITt27db7j906BAZGRmWn3Nycli9evUFLTuDBw/m8OHDfPXVV/Tr188yVmjAgAGsWbOG6OhoSxdUdURFRbFx48YKrWvr16/H29ubJk2aAOdasObMmWMpWsoLmejoaAYNGlSt96405eAyMzMVoDIzM/WOIoSoC+nxSj3no9QLAUoV5Vv3tY+t0V771WZKlZZa97UvIz8/X+3fv1/l51v5fOrA0aNHVUhIiGrbtq1asmSJio2NVfv371dvvfWWatOmjTKbzapz586qf//+avv27Wrz5s2qW7duauDAgZbX+PLLL5Wnp6fauXOnOnPmjCooKFBKKdWyZUt1//33q6SkJJWWlqaUUmrp0qXK2dlZzZ8/X8XGxqrZs2crk8mkVq9ebXk9QC1durRCTl9fX7Vw4ULLz8uWLVMmk0mFhIRYbps+fboymUxq4sSJFZ47duxY1bZtW7V27VoVExOjhg8frlq0aKGKioqUUkpt375dGY1G9eKLL6pDhw6pRYsWKXd39wrvN3z4cNWlSxe1adMmtW3bNtWvXz/l7u6u5syZo5RS6rvvvlMdOnS44M83Pz9fubq6Km9vbzVr1izL7QUFBcrNzU15e3urV155pcJzwsPDLa/7zz+PuLg4BaidO3cqpZRKTExUHh4eatq0aerAgQNq2bJlqmHDhuq5556r8JqdO3dWJpNJLViwQCmlVGpqqnJ2dlaAOnjw4AW5z89/qd/tyn5/SyEjhHAsB1doxcb8PtZ/7ZIipV5por1+4jbrv/4l2HMho5RSp06dUtOmTVPh4eHKxcVFNW7cWI0ZM8ZSXJw4cUKNGTNGeXp6Km9vb3XTTTep5ORky/MLCgrUDTfcoPz8/BRgKQCWL1+uWrRooZycnFR4eLjl8fPnz1fNmjVTzs7OqlWrVuqzzz6rkKcyhUxqaqoyGAxqwoQJltuWLl2qAPXee+9VeG5aWpq6/fbbla+vr3J3d1fDhg1TsbGxFR6zZMkS1bZtW+Xs7KyaNm2qXn/99Qr3JyUlqeuuu065urqqpk2bqs8++6xCwXHbbbepZ5555qJ/vgMHDlSA2rRpU4XbBw0apAC1cePGCrdXpZBRSqno6GjVo0cP5eLiokJCQtSTTz6piouLK7zm9OnTFaAOHDhgua1Tp04VCsGLsUYhYyg7CYeVlZWFr68vmZmZ+PhYafaCEMJ2/f06/PUydJwI49+/8uOr6ts7YP+PMOhpGPTUlR9vBQUFBcTFxREZGVmtKbTCvpWUlBAcHMyvv/5Kz5499Y5jVZf73a7s97eMkRFCOJbkvdp1cLvaef2W12rXsb/VzusL8Q9paWk88sgjVZ7BVV9IISOEcCwp+7Tr2ipkWgzVrk/tgJwLFywTwtqCgoL4v//7vwpToMU5UsgIIRxHUR6kHdWOg9vXznt4h0BoJ+34SOU2PRRC1B4pZIQQjuPMQVBm8AzUthWoLeXdS4d/r733uAgHH9Io6iFr/E5LISOEcBy13a1UrryQOboKSktq973AsjZIUVFRrb+XEHUpLy8PuHCBwqqQlX2FEI7DUsjUUrdSucbdwN0f8tMhcSuE967Vt3NycsLDw4MzZ87g7OyM0Sj/BxX2TSlFXl4ep0+fxs/Pz1KsV4cUMkIIx3G6rJAJqtoqrlVmNGmDfvd8p3Uv1XIhYzAYCA0NJS4ujhMnTtTqewlRl8o3/awJKWSEEI7jzCHtOvDSuy1bTcth5wqZoc/V+tu5uLjQsmVL6V4SDsPZ2blGLTHlpJARQjiG/AzISdGOG9beTrsWLYYABkjZC5knwbdxrb+l0WiUBfGE+AfpaBVCOIazsdq1T2Nwq4NVvD0CoEnZAmVH/qj99xNCXJQUMkIIx1DerdSwVd29p2UathQyQuhFChkhhGM4Wz4+pnXdvWfLa7TrY9FQUlh37yuEsJBCRgjhGPRokQnpCF7BUJQD8Rvr7n2FEBZSyAghHMMZHVpkjEZoUdYqI91LQuhC10Lm77//ZvTo0TRq1AiDwcCyZcsq3K+U4tlnnyU0NBR3d3eGDh3K4cOH9QkrhLBdxfmQEa8dN6zDQgbOdS/JbthC6ELXQiY3N5dOnTrx7rvvXvT+1157jbfffpv33nuPzZs34+npybBhwygoKKjjpEIIm3b2MKC01XY9G9btezcfDEYnSD0Macfq9r2FEPquIzNixAhGjBhx0fuUUsydO5f/+7//Y+zYsQB89tlnBAcHs2zZMiZOnFiXUYUQtiy1rKW2YSswGOr2vd18oWlvOL4WDv8JvabW7fsLUc/Z7BiZuLg4kpOTGTp0qOU2X19fevXqxcaNlx5UV1hYSFZWVoWLEMLBpZa1hDRooc/7l3cv1fFu2EIIGy5kkpOTAQgODq5we3BwsOW+i5k5cya+vr6WS1hYWK3mFELYgNQj2nVAM33ev3w9meNroShPnwxC1FM2W8hU19NPP01mZqblkpCQoHckIURtSzuqXevVIhPYBnzDoKQAjq/TJ4MQ9ZTNFjLlu2GmpKRUuD0lJeWyO2W6urri4+NT4SKEcHDlLTINmuvz/gaDdC8JoRObLWQiIyMJCQlh1apVltuysrLYvHkzvXv31jGZEMKm5KVBfrp2rFfXEpy3XcFvoJR+OYSoZ3SdtZSTk8ORI0csP8fFxRETE0NAQABNmzbl4Ycf5uWXX6Zly5ZERkYyY8YMGjVqxLhx4/QLLYSwLall3UrejcDFU78ckQPA5KqtZ3M2tm4X5hOiHtO1kNm2bRuDBw+2/Pzoo48CMHnyZBYtWsR//vMfcnNzmTp1KhkZGfTr14+VK1fKNvZCiHMs42N06lYq5+IJEf3g6Cqte0kKGSHqhK6FzKBBg1CXaYI1GAy8+OKLvPjii3WYSghhV/QeH3O+lteeK2T6/FvvNELUCzY7RkYIISqlvGspwBYKmbIBvyc2QoGsYSVEXZBCRghh32yla6k8Q0BzMBdD3Bq90whRL0ghI4Swb+nHtWv/SF1jWFhmL8k0bCHqghQyQgj7lZ8OBZnasV9TfbOUs6wn84dMwxaiDkghI4SwX+kntGvPQHD10jdLufC+4OwB2UmQvEfvNEI4PClkhBD2y9KtFKFnioqc3SByoHYs3UtC1DopZIQQ9iujrEXGL1zfHP/UqnyczB/65hCiHpBCRghhv2yxRQagRdk4mcQt2hYKQohaI4WMEMJ+lY+RsbVCxi8MgtqCMsPRv/ROI4RDk0JGCGG/LC0yNta1BOdmL8Wu1DeHEA5OChkhhH0yl0JmgnZsay0yAK1Hatexv0FJob5ZhHBguu61JERdyswv5s/9KRSUlGIyGLiqWQMiGuq4W7KomewkKC0CoxP4NNY7zYWa9ASvEMhJhmPR0GqY3omEcEhSyAiHV1BcyqINx1kQfZTM/GLL7SajgVt6hjF9SCsCvV11TCiqpXx8jG8YGE36ZrkYoxHajoEtH8D+5VLICFFLpGtJOLS03CImfLCJWb8eJDO/mGaBnlzbNpgeEf6UmhVfbIpnxFtrOZScrXdUUVW2OmPpfG3HatcHf4bS4ss/VghRLdIiIxzWyYx87vh4M0fP5OLn4cyM69oyrktjTEYDAJuOpfLsj3uJTclh4gcb+fzuXrRv7KtzalFptjzQt1zT3tqqw7lnIO5vaDFE70RCOBxpkREO6WxOIbd8sImjZ3IJ9XVjyb96c0O3JpYiBuCqZg347r4+dArzIz2vmFs/3MSxMzk6phZVkmGjU6/PZzRB1GjteP+P+mYRwkFJISMcTl5RCXcv2kp8Wh5hAe4sub8PLYK8L/pYXw9nvri7J12a+pFVUMLUz7eTXSBdAHbBHrqW4B/dSyX6ZhHCAUkhIxxKqVnx0Nc72ZWYib+HM5/e2ZPGfu6XfY63mzPv396NEB83jpzO4bFvd2E2y67FNi/dRrcn+KfwfuAeAHmpcGK93mmEcDhSyAiHMvfPWP48cBpXJyMfTe5Os8DK7Ygc5O3Ggtu64mIy8vv+FN5dfaSWk4oaKc7XpjWD7bfImJwgapR2vH+ZrlGEcERSyAiH8du+ZOb9pRUgs27oQLfwgCo9v0tTf14a1w6AN/+M5a+DKVbPKKwkI167dvUBd399s1RG23Ha9f4foaRI1yhCOBopZIRDiDuby2Pf7gLgrr6RXN+lSbVeZ0KPptx2VVOUgulfx8jgX1t1/owlg+GyD7UJkQPBK1jrXjryp95phHAoUsgIu1dUYmb64p3kFJbQMzKAp0e2qdHrPTuqHd3D/ckuLOGez7aRJYN/bY+9DPQtZ3KCDjdpx7sX65tFCAcjhYywe2/+EcvuxEx83Z15a2JnnE01+7V2cTIy/7auhPq6cexMLtO/3kmpDP61LfYy0Pd8HSdo14dWQn66vlmEcCBSyAi7tulYKu//fRSAV2/oQKjv5WcoVVaQtxsf3N4dVycjqw+d4bXfDlrldYWV2FuLDEBIBwhqB6WFsG+Z3mmEcBhSyAi7VVhSyn+X7kEpmNA9jOHtQ636+h2a+PL6TZ0AeH/NMZbuTLTq64sasIfF8P7JYIBOZa0yu7/RN4sQDkQKGWG33l9zjGNncmno5cp/r4uqlfcY06kR0wY3B+DJ7/ewKyGjVt5HVFFGgnbt11TfHFXV4WYwGCF+I6TF6Z1GCIcghYywS3Fnc3mnbK2XGaOi8HV3rrX3euya1gyNCqKoxMw9n20jIS2v1t5LVEJhNhRmasc+jfXNUlU+odoMJoDd3+qbRQgHIYWMsEszVxygqMRM/5YNGdOpUa2+l9FoYM6EzrQJ8eZMdiFTFm4hI0/WAtFN5knt2s0XXCu34KFN6TRRu475Esyl+mYRwgFIISPsTkxCBr/vT8FogOdGt8VQB+uIeLs5s/DOHoT6unH0TC73fraNgmL5EtJFVtlYJZ/qrRWku6gx4OanjfORNWWEqDEpZITdeeO3QwCM79rkkptB1oZQX3cW3dkTbzcnth5P59FvY2RPJj2Ut8j42lm3UjkXD+hym3a85UN9swjhAKSQEXZlw5GzrDtyFmeTgelDWtb5+7cO8eaD27vjYjKyYk8yL/9yoM4z1HtZZYWMvY2POV/3u7TrI39C2jF9swhh56SQEXZl7p+HAbi1Z1PCAjx0ydC7eQNev6kjAJ+sj+OrzfG65Ki37L1FBqBBc2gxFFCw9WO90whh16SQEXZjd2IGW46n4WwycP+gFrpmGdu5MU8Maw3Ac8v3sv2ErNRaZ+x9jEy5Hvdq1zu/gCKZCSdEdUkhI+zGx+u0dTdGdWxEiK+bzmnggUHNGdE+hOJSxf1fbOd0doHekeqHzLJCxp5bZABaXqOtg1OQAXu/1zuNEHZLChlhF5IzC/hldxIAd/eL1DmNxmAw8PpNnWgZ5MXp7EKeXLIbpWTwb61S6lzXkj2PkQEwmqD73drxlve1cxNCVJkUMsIufLrxOCVmRc/IANo39tU7joWXqxPvTuqKS9meTF9vSdA7kmPLT4eSfO3Y3gsZgK53gLMHJO+BY6v1TiOEXZJCRti8wpJSvt6iDai1ldaY87UK9uY/ZeNlXv5lPydSc3VO5MDKu5U8A8FZ/+7FGvMIgK6TteO1b+qbRQg7JYWMsHl/7j9NRl4xob5uDI0K1jvORd3VN5JekQHkFZVvZCndBLXCEaZe/1PvaWB0guNrIWGr3mmEsDtSyAibt2S71l0zvmtjTMbaX8W3OoxGA6/d2BEXJyPrj6SyYk+y3pEck2Wgr53PWDqfXxh0LNu2IHqmvlmEsENSyAiblpJVwJrYMwDc0NW2v7zCG3hy/0Btp+yXf9lPbmGJzokckCO2yAAMeFxrlTm6Ck5s1DuNEHZFChlh05buPIlZQfdwf5oF2v4GgfcPak5YgDtJmQWW3bmFFTnCYngXExB5btuCv16SGUxCVIEUMsJmKaX4bpvWrXRTd9tujSnn5mzi2VHtAG3dm1MZ+ToncjCO2iIDMOAJMLnCifVw4Ce90whhN6SQETZr36ksjp7Jxc3ZyMgOoXrHqbShUUH0jAygqMTM3D9j9Y7jWBxxjEw53ybQ59/a8e/PQLEUwUJUhhQywmb9tk8bMDu4dRDebs46p6k8g8HAUyPaALBkeyJHTmfrnMhBmM2QdUo7dsQWGYD+j4J3I8iIhw3v6J1GCLsghYywWb/u1QqZ4e1DdE5SdV2b+nNt22DMCl7/7ZDecRxD7mkwF4PBCN7200JXJS6ecO1L2vG6N8+NCRJCXJIUMsImHTmdzZHTOTibDAxuE6R3nGp5YlhrjAb4bV8KO+JlU8kaK/9S9woBk5O+WWpT+xugaW8ozoM/ntU7jRA2TwoZYZN+25cCQN8WDfGxo26l87UM9rZMGX/114OySF5NZTnIZpFXYjDAiFcBA+xdAkdW6Z1ICJsmhYywSSvLu5Xa2V+30vkevqYVLk5GNselEV22Ho6oJsvUawcc6PtPoZ2g133a8U8PQ2GOrnGEsGVSyAibk5iex56TmRgNMLStbW5JUFmN/dy546pwAF5beQizWVplqs2Rp15fzNUzwLcpZMbDXy/rnUYImyWFjLA5qw+eBqB7eAANvVx1TlNz0wa3wNvViQNJWfyyJ0nvOPbLkadeX4yrF4yeox1vfg8StuibRwgbZdOFTGlpKTNmzCAyMhJ3d3eaN2/OSy+9JGMNHNzaw2cBGNg6UOck1uHv6cI9/ZsBMOfPWEpKzTonslP1rUUGoMVQ6HQLoGD5v6GkUO9EQtgcmy5kXn31VRYsWMA777zDgQMHePXVV3nttdeYN2+e3tFELSkpNbPxaCoA/Vo01DmN9dzVLwI/D2eOncnlx5hTesexT466PcGVDHsFPAPhzEFYO1vvNELYHJsuZDZs2MDYsWO57rrriIiI4MYbb+Taa69lyxZpYnVUuxIzyC4swc/DmfaNffWOYzXebs7cN0DbUHLuqliKpVWmakqLIadsR3GfetK1VM4jAEa8ph2vfRNS9umbRwgbY9OFTJ8+fVi1ahWxsdoy77t27WLdunWMGDHiks8pLCwkKyurwkXYj79jtW6lvs0bYjIadE5jXZP7hNPQy4WEtHy+25aodxz7kp0EygxGZ611or5pdz20vk5bEHD5v8FcqnciIWyGTRcyTz31FBMnTqRNmzY4OzvTpUsXHn74YSZNmnTJ58ycORNfX1/LJSwsrA4Ti5pad0QrZPq3dJxupXIeLk7cP6gFAPP+OkxBsXwZVVp5t5JPIzDa9D9btcNggOtmg6svnNyuDf4VQgA2Xsh8++23fPnll3z11Vfs2LGDTz/9lDfeeINPP/30ks95+umnyczMtFwSEhLqMLGoiayCYmISMgDo54CFDMCkXk0J8XEjKbOAxVvi9Y5jP7Lq0Royl+ITCte+qB2vegnS4vTNI4SNsOlC5oknnrC0ynTo0IHbb7+dRx55hJkzZ17yOa6urvj4+FS4CPuw8WgqpWZFs4aeNPH30DtOrXBzNvHg1VqrzDurj5JfJK0ylVI+9bo+zVi6mK6TIaI/lOTDT9NBZnAKYduFTF5eHsZ/NCObTCbMZhko6YjKZyv1daDZShdzc/cwmvi7czankM83Hdc7jn3Iqqczlv7JYIDRb4GTG8StgZ1f6J1ICN3ZdCEzevRo/ve///HLL79w/Phxli5dyptvvsn111+vdzRRC7af0DZW7BEZoHOS2uXiZOShIS0BWBB9lJzCEp0T2YH6tD3BlTRoDoP/qx2vegEKs/XNI4TObLqQmTdvHjfeeCMPPPAAUVFRPP7449x333289NJLekcTVpZbWML+JG2GWfdwf53T1L7xXRoT2dCT9LxiFq6TsQ5XVL5hZH2ben0pve6HgOaQewbWv6V3GiF0ZdOFjLe3N3PnzuXEiRPk5+dz9OhRXn75ZVxcXPSOJqxsV0IGpWZFI183Gvm56x2n1jmZjDw8VGuV+WDtMTLzinVOZOPq62J4l+LkAte8oB1veAeyZOsLUX/ZdCEj6o9tZd1K3SIcu1vpfKM7NqJVsBfZBSV8tO6Y3nFsV3EB5GnT8uv9YN/ztRkFYVdpA3/XzdE7jRC6kUJG2ITyQqY+dCuVMxoNPDK0FQAL1x+XVplLKR/o6+wB7vXn9+OKDAYY/LR2vH0RZMnWF6J+kkJG6K7UrNhZ3iJTjwoZgGHtQmgd7E1OYQkLN8hYmYs6f7NIg2Ot9lxjkQOhaW8oLYR1c/VOI4QupJARujuUnE12YQlerk60CfHWO06dMhoNlnVlPlkXR3aBtMpcoHwNGRkfcyGDAQY+qR3v+Azy0vTNI4QOpJARutt+QvvHt0tTP5xM9e9XcmSHUJoHepJVUMJnG0/oHcf2WLYnkBlLF9VsEIR01MbKbP1Y7zRC1Ln6960hbM72etqtVM50XqvMx+viZLXff8qSFpnLMhigz7+14y0faIOjhahHpJARutt9MhOAzmF++gbR0eiOjWji705abhFLdsjO2BVknjdGRlxcu+u1P5/c07DvB73TCFGnpJARusouKObYmVwAOjT21TmNfpxMRu7uFwnAR2uPUWqWPXQsZHuCKzM5Q/e7tGPpXhL1jBQyQlf7Tmmr+Tb2c6eBl6vOafR1c/cwfN2dOZGax+/7kvWOYzssi+GF6ZvD1nW9A4zOcHIbJO3SO40QdUYKGaGrPYlat1J9bo0p5+nqxO1XhQPw/t/HULKzsbaPUKH2OyJdS1fgFQRRo7VjaZUR9YgUMkJXe8rGx3RoIoUMwOQ+Ebg4GYlJyCAmIUPvOPorb41x8wVXL32z2IMed2vXe7+Hojx9swhRR6SQEbqyFDLSIgNAoLcrozqGAshUbJDNIquqaR/wC4eiHDj4i95phKgTUsgI3WQVFBN3Vgb6/tPk3hEA/LI7ibM5hfqG0ZtsFlk1RiN0nKAd716sbxYh6ogUMkI3e8taY5r4u+PvKTual+sU5kenJr4UlZr5ZmuC3nH0Vb6qr4yPqbxOE7Xro39Bdoq+WYSoA1LICN3IQN9Lu72sVearzfH1eyq2TL2uugbNoUkPUGbYu0TvNELUuhoVMkVFRSQmJhIfH1/hIkRlyEDfSxvVMRR/D2dOZuQTfei03nH0kyljZKqlvHtpl3QvCcdXrULm8OHD9O/fH3d3d8LDw4mMjCQyMpKIiAgiIyOtnVE4qL0y0PeS3JxNjO+qfXl/t60er/QrLTLV0/4GbU2Z5N2Qsl/vNELUKqfqPGnKlCk4OTnx888/ExoaisFgsHYu4eByC0s4kaZND20b6qNzGtt0c/cwPl4Xx58HUkjNKax/CwYqJdsTVJdHALQaBgd/1gb9XvOi3omEqDXVKmRiYmLYvn07bdq0sXYeUU8cTM5GKQjydq1/X9CV1DrEm45NfNmdmMmymFOWLQzqjfx0bUdnkEKmOjpO0AqZPUtgyPPajCYhHFC1frPbtm3L2bNnrZ1F1CMHk7WtCdpIa8xl3dStvHspof6t9Fs+PsYzEJzd9M1ij1peCy7eWvfcye16pxGi1lSrkHn11Vf5z3/+Q3R0NKmpqWRlZVW4CHElB5K035OoUG+dk9i2MZ0a4+Jk5GByNntP1rO/W1nSrVQjzm5a9xLA/mW6RhGiNlWrkBk6dCibNm1iyJAhBAUF4e/vj7+/P35+fvj7+1s7o3BAB5KyARkfcyW+Hs5c2zYYgB921rNBv+UtMr4yY6na2o7Vrg8s18YcCeGAqjVGZvXq1dbOIeoRs1lxKFkrZKKkkLmisZ0b8/PuJH7ZncT/XdcWk7GeDK6XFpmaazEUnD0gIx6SYqBRF70TCWF11SpkBg4caO0coh5JTM8np7AEF5ORyIaeesexeQNaNcTHzYnT2YVsjkulT/OGekeqG5YWGSlkqs3FA1peA/t/hP3LpZARDqnaw9gzMjKYPXs299xzD/fccw9z5swhMzPTmtmEg9pfNj6mZbAXziaZSXElrk4mRrTXNpL8adcpndPUIZl6bR3l3Uv7f5TuJeGQqvUtsm3bNpo3b86cOXNIS0sjLS2NN998k+bNm7Njxw5rZxQOpnzGknQrVd6Yzo0AWLEnmaISs85p6kiWjJGxipbXgskV0o7CaVkcTzieahUyjzzyCGPGjOH48eP88MMP/PDDD8TFxTFq1CgefvhhK0cUjqZ8xlKbEJmxVFlXNWtAoLcrmfnFrD18Ru84tc9shqwk7VhaZGrG1VsbKwNaq4wQDqbaLTJPPvkkTk7nhtg4OTnxn//8h23btlktnHBMMmOp6kxGA9d1qEfdS7mnwVwMBiN4h+qdxv61HaNdSyEjHFC1ChkfH5+Lbg6ZkJCAt7f8L1tcWm5hCfFlWxPIYnhVM7qT9oW+6sBpCktKdU5Ty8rHx3iFgKlacxLE+VoNB6MTnDkIqUf1TiOEVVWrkJkwYQJ3330333zzDQkJCSQkJLB48WLuuecebrnlFmtnFA7k8OkcAAK9XQnwdNE5jX3pEuZPsI8r2YUlbDiSqnec2pUlM5asyt0PIvppx4d+1TWKENZWrf/qvPHGGxgMBu644w5KSkoAcHZ25v7772fWrFlWDSgcS2zZ+jGtgr10TmJ/jEYDw9uF8OnGE6zYk8TgNkF6R6o95S0yMtDXelqPhGPRcGgF9HlQ7zRCWE21WmRcXFx46623SE9PJyYmhpiYGNLS0pgzZw6urrIBoLi02JTyQka6IKtjRNk4md/3p1Bc6sCzl2QxPOtrNVy7jt8IeWn6ZhHCimq0iIeHhwcdOnSgQ4cOeHh4WCuTcGCHpJCpkR4RATT0ciEzv5iNRx24e0m2J7A+/3AIbg/KDId/1zuNEFZT6a6l8ePHs2jRInx8fBg/fvxlH/vDDz/UOJhwTIdTtDEyUshUj8loYFi7EL7cHM+ve5MY0CpQ70i1o7yQkRYZ62o9ElL2at1LnSbqnUYIq6h0i4yvry8Gg7bHi4+PD76+vpe8CHExmfnFJGcVANqqvqJ6ylf5/X1fCiWO2r1U3rUkg32tq/UI7frIKigp1DeLEFZS6RaZhQsXWo4XLVpUG1mEgztc1q3UyNcNHzdnndPYr17NAvD3cCY1t4gtx9Mcb++l0mLITtaOfaRryapCO2vr8mQnQdxaaDlU70RC1Fi1xshcffXVZGRkXHB7VlYWV199dU0zCQcVW9at1FK6lWrE2WTk2rYhAPy6J1nnNLUgOwlQYHQGTwftOtOL0Xhu0O+hFfpmEcJKqlXIREdHU1RUdMHtBQUFrF27tsahhGMqn7HUWrYmqLERHbRCZuW+ZMxmB9sI0LJZZCPti1dYV5vrtOtDv8omksIhVGkdmd27d1uO9+/fT3Lyuf8NlpaWsnLlSho3lj5tcXHlhUzLIBkfU1N9mjfE282JM9mFbI9Pp0dEgN6RrCdL1pCpVRH9wdkTsk9BUgw06qJ3IiFqpEqFTOfOnTEYDBgMhot2Ibm7uzNv3jyrhROORVpkrMfFycg1bYP5YcdJVuxJcqxCRmYs1S5nN2hxNRz4SWuVkUJG2LkqtdvGxcVx9OhRlFJs2bKFuLg4y+XkyZNkZWVx11131VZWYcdScwo5m6N1R7aQFhmrGFk2e2nlXgfrXpIZS7Wv9UjtWsbJCAdQpRaZ8PBwAMxmB53yKWpN+UDfsAB3PFxkE0Br6NeyIV6uTiRlFhCTmEHXpv56R7IO2Z6g9rUcpu0snrwHMhLAL0zvREJUW7W+UT777LPL3n/HHXdUK4xwXIdPl3UryYwlq3FzNnF1myCW7zrFr3uSHKeQKd8wUqZe1x7PBhB2FcRv0LqXek3VO5EQ1VatQmb69OkVfi4uLiYvLw8XFxc8PDykkBEXOFS2WaRMvbaukR1CtEJmbzL/HRllWbTSrmXKztd1ovWIskJmhRQywq5Va25jenp6hUtOTg6HDh2iX79+fP3119bOKBxA+dYE0iJjXQNbBeHubCIxPZ+9J7P0jlNzxfmQV7aHlAz2rV3l42SOr4OCTH2zCFEDVlukoWXLlsyaNeuC1hohlFLEni5vkZGBvtbk7qJ1LwGs2JukcxoryDqlXTt7gLuDdJXZqoYtoGErMBfDkT/1TiNEtVl1tSknJydOnTplzZcUDuBMdiEZecUYDdA8UAoZaxvevnyV3ySUvS9wdv7Ua0foJrN15XsvHfpV3xxC1EC1xsgsX768ws9KKZKSknjnnXfo27evVYIJx1E+YymigSduziad0ziewW2CcHUycjw1jwNJ2bRt5KN3pOqTqdd1q/VIWP8WHP5d2+PKJHugCftTrUJm3LhxFX42GAwEBgZy9dVXM3v2bGvkEg7kUNlCeK1kfEyt8HJ1YmCrQH7fn8LKvUn2XchYtieQGUt1okkP8GgIeWfhxAZoNlDvREJUWbW6lsxmc4VLaWkpycnJfPXVV4SGhlo7o7Bzhy2FjHQr1ZaRHbS/dyv22vkmklkyY6lOGU3nbSIp3UvCPtV4jIxSqlb75U+ePMltt91GgwYNcHd3p0OHDmzbtq3W3k9Yn6VFRrYmqDVXRwXhYjJy5HSOpXC0S5YWGSlk6oxlnMwvsomksEvVLmQ+/vhj2rdvj5ubG25ubrRv356PPvrImtlIT0+nb9++ODs78+uvv7J//35mz56Nv7/MZrAXSinL1GvpWqo9Pm7O9GvZEIAVe+y4VUbGyNS95oPByQ0y4uH0fr3TCFFl1Roj8+yzz/Lmm2/y73//m969ewOwceNGHnnkEeLj43nxxRetEu7VV18lLCyMhQsXWm6LjIy0ymuLunEqs4CcwhKcjAYiGnjqHcehjWgfwl8HT/Pr3iSmD22pd5zqsWxPIEvm1xkXT2g2CGJXwsEVENxO70RCVEm1WmQWLFjAhx9+yMyZMxkzZgxjxoxh5syZfPDBB8yfP99q4ZYvX0737t256aabCAoKokuXLnz44YeXfU5hYSFZWVkVLkI/5TteNwv0xMXJqrP9xT9c0zYYJ6OBg8nZHDuTo3ecqivIgsKyhdmka6luySaSwo5V65uluLiY7t27X3B7t27dKCkpqXGocseOHWPBggW0bNmS3377jfvvv5+HHnqITz/99JLPmTlzJr6+vpZLWJj8z05P5eM1ZGuC2ufn4ULfFlr30rIYO1zPqbxbyc0XXGVgeJ1qNRwwwKkdkOUACyuKeqVahcztt9/OggULLrj9gw8+YNKkSTUOVc5sNtO1a1deeeUVunTpwtSpU7n33nt57733Lvmcp59+mszMTMslISHBanlE1R1Klq0J6tL4rlpLxvfbEzGb7Wzgpky91o93MDQp+89prMxeEval0mNkHn30UcuxwWDgo48+4vfff+eqq64CYPPmzcTHx1t1w8jQ0FDatm1b4baoqCi+//77Sz7H1dUVV1dXq2UQNVO+67VMva4bw9qF4O3mxMmMfDYeS7W00NgFmXqtr9YjIHGrNg27+116pxGi0ipdyOzcubPCz926dQPg6NGjADRs2JCGDRuyb98+q4Xr27cvhw4dqnBbbGws4eHhVnsPUXvMZpmxVNfcnE2M6dSILzfH8922BPsqZGTqtb5aXwerXoRja6AwR7r3hN2odCGzevXq2sxxUY888gh9+vThlVde4eabb2bLli188MEHfPDBB3WeRVRdYno++cWluDgZCZcZS3Xmxm5N+HJzPL/uTebFgmJ83Oxk2XmZeq2vwNbgHwnpcXD0L2g7Ru9EQlSKTU8j6dGjB0uXLuXrr7+mffv2vPTSS8ydO9eq43BE7SlfCK9FoBcmo2wAWFc6h/nRIsiLwhIzP++yo4GbmWXj2Xyb6pujvjIYoM112rHMXhJ2pNItMuPHj2fRokX4+Pgwfvz4yz72hx9+qHGwcqNGjWLUqFFWez1Rd2JlawJdGAwGburWhJm/HuS77Qnc2stOCoPyna99ZbCvblqPgI3vQOxvUFoCpmotNSZEnap0i4yvry8Gg8FyfLmLEHBeISNbE9S567s2xmQ0sDM+gyOn7WDLArP5vMXwpJDRTdhV4O4P+WmQsFnvNEJUSqXL7fLVdZVSvPDCCwQGBuLu7l5rwYT9iy0f6BskhUxdC/J2Y1CrQFYdPM132xN5ekSU3pEuL+8slBYCBvBppHea+svkBC2Hwe7FWvdSRF+9EwlxRVUeI6OUokWLFiQmJtZGHuEgSkrNHD0jM5b0dFN3rWXjhx0nKSk165zmCsrHx3iHgslOBic7qjbnrfIrm0gKO1DlQsZoNNKyZUtSU1NrI49wECfS8igqMePubKKJv7Tc6eHqNsEEeLpwJruQtYfP6h3n8mR8jO1ofjWYXCDtGJyN1TuNEFdUrVlLs2bN4oknnmDv3r3WziMcxLmtCbwwyowlXbg4GRnbWeum+Warja9wLYWM7XD1hsiB2vHBX/TNIkQlVKuQueOOO9iyZQudOnXC3d2dgICAChchyrcmkG4lfU3ooe019seBFFKyCnROcxlSyNiW1iO060OyXYGwfdWaWzdnzhzLDCYhLkamXtuGNiE+dA/3Z9uJdL7dmsC/h7TUO9LFWdaQkU1ebULrEfDLo9qWBTmnwStI70RCXFK1CpkpU6ZYOYZwNAeSswDti1Toa9JVTdl2Ip2vt8TzwOAWtrk4obTI2BafRtCoC5zaCbEroav19tATwtqq1bVkMpk4ffr0BbenpqZiMplqHErYt4LiUo6fzQWgTah0LeltRPtQ/DycOZVZQPShC//e2gQpZGxP67LZSwdllV9h26pVyKhLTMkrLCzExcWlRoGE/TuckoNZQYCnC4FeshO53tycTdzUTSsQvtwcr3OaiyjOh9wz2rEUMrajvJA5thqK8vTNIsRlVKlr6e233wa0JdA/+ugjvLzOjX8oLS3l77//pk2bNtZNKOzOuW4lbxlLZSNu6dmUD9fGsfrQaRLT82ji76F3pHOyTmnXzp7aqrLCNgS30/a9yozXipnyfZiEsDFVKmTmzJkDaC0y7733XoVuJBcXFyIiInjvvfesm1DYnUPJ2kDf1rI1gc1oFuhF3xYNWH8klcVbEnh8WGu9I51jGejbRNu4UNgGg0FbHG/ze9rieFLICBtVpUImLi4OgMGDB/PDDz/g7y//exIXOljWIhMlA31tyqRe4VohszWB6UNb4myqVs+y9cn4GNvVekRZIbMSzKVglDGQwvZU61+y1atXVyhiSktLiYmJIT093WrBhP2SFhnbdE3bYAK9XTmbU8gf+1P0jnOOFDK2K7wvuPpqe2ElbtM7jRAXVa1C5uGHH+bjjz8GtCJmwIABdO3albCwMKKjo62ZT9iZM9mFnM0pwmCQxfBsjbPJyITu2jotX24+oXOa85zftSRsi8kZWl6jHR+SVX6FbapWIfPdd9/RqVMnAH766SeOHz/OwYMHeeSRR3jmmWesGlDYl/LWmMgGnri7SDO0rZnYMwyDAdYfSeVY2aaeupMWGdtm2URSVvkVtqlahUxqaiohISEArFixgptuuolWrVpx1113sWfPHqsGFPalfHyMdCvZpib+HlzdWlul9estNjIVWwoZ29ZiKBidtQ0kzx7RO40QF6hWIRMcHMz+/fspLS1l5cqVXHON1vSYl5cnC+LVcwfLWmRkRV/bNemqpgB8tz2RguJSfcMoJYWMrXPzhYh+2vHBn/XNIsRFVKuQufPOO7n55ptp3749BoOBoUOHArB582ZZR6aekxYZ2zewVRCN/dzJyCvm171J+obJS4WSss0sfRrrm0VcWtRo7frAT/rmEOIiqlXIPP/883z00UdMnTqV9evX4+qqrd5qMpl46qmnrBpQ2I+SUjOHU7RxF1GyNYHNMhkN3NKzbNDvJp27l8oH+noFg5OsAm2z2lwHGODktnMLGAphI6q1aSTAjTfeeMFtkydPrlEYYd+Op+ZRWGLGw8VEmC2tHCsucHP3MOb+eZhtJ9I5kJRFVKhOXYHSrWQfvEMgrCckbIaDv0DPe/VOJIRFpQuZt99+m6lTp+Lm5mbZquBSHnrooRoHE/anfMZSq2BvjLa4w7KwCPJxY1j7EH7ZncSi9cd59caO+gSRQsZ+RI3WCpkDy6WQETal0oXMnDlzmDRpEm5ubpatCi7GYDBIIVNPHTxvjyVh++7sE8Evu5NYFnOSJ0e0IcBThw1fLYVMWN2/t6iaNqPg9/+D4+shLw08AvROJARQhUKmfHuCfx4LUe5AUvmMJSlk7EG3cH86NPZlz8lMvt4Sz7TBLeo+REbZGB1pkbF9AZEQ3AFS9mhrynSZpHciIYAqFDKPPvpopR5nMBiYPXt2tQMJ+3UopXzGkky9tgcGg4E7+0bw6Le7+HzjCaYOaFb3+y+VFzJ+Tev2fUX1RI3WCpkDP0khI2xGpQuZnTt3Vvh5x44dlJSU0Lq1totubGwsJpOJbt26WTehsAs5hSUkpOUD0iJjT67rGMorKw6SnFXAr3uTGdOpUd0GyCjbKsEvvG7fV1RP1CiIfgWO/gWF2eAqf9eF/ir936/Vq1dbLqNHj2bgwIEkJiayY8cOduzYQUJCAoMHD+a662Sr9/qofKBvsI8r/nqMtRDV4upk4rayBfIWrq/jLuOCLMgv22hWWmTsQ1BbCGgGpYVw5E+90wgBVHMdmdmzZzNz5swKO2D7+/vz8ssvS7dSPXVuoK90K9mbSb3CcTEZ2RmfQUxCRt29cXm3krs/uMnvjV0wGGRxPGFzqlXIZGVlcebMmQtuP3PmDNnZ2TUOJexPeYtMG1kIz+4EersyqlMoUMetMpbxMdKtZFfalBUysb9DSaG+WYSgmoXM9ddfz5133skPP/xAYmIiiYmJfP/999x9992MHz/e2hmFHTgoM5bs2l19IwH4ZXcSKVkFdfOmlvEx0q1kVxp3A+9QKMqGY2v0TiNE9QqZ9957jxEjRnDrrbcSHh5OeHg4t956K8OHD2f+/PnWzihsnFLq3B5LwdJFYI/aN/alR4Q/JWbF5xtP1M2bppe9j7+0yNgVo1FbUwa0xfGE0Fm1ChkPDw/mz59PamoqO3fuZOfOnaSlpTF//nw8PT2tnVHYuMT0fLIKSnAxGWkR5KV3HFFNd/fTWmU+33SC3MKS2n9D6VqyX1FlhczBX6C0Dn5XhLiMGi0a4enpSceOHenYsaMUMPXYvlOZALQK8cLFqY7XIRFWc03bECIaeJCZX8y32xJq/w1l6rX9Cu8HHg0gPw2O/613GlHPybeOqLG9J7VupXahvjonETVhMhq4p38zAD5eF0dJqbn23kypcy0y0rVkf0xOEDVGO963VN8sot6TQkbUWHmLTPvGMj7G3t3YrQkNPF1ITM9nxd7k2nuj/HQo1Apg2WfJTrW7Xrs+8BOUFuubRdRrUsiIGtt7SvtCattIWmTsnZuzicl9IgB4f81RlFK180bl3UqeQeDiUTvvIWpXeF/wDNSKUpm9JHQkhYyokdNZBZzJLtTWyZI1ZBzC7VeF4+5sYt+pLDYcTa2dN5E9luyfdC8JGyGFjKiRfWWtMc0DvfBwqfTWXcKG+Xu6cHN3bTfq9/8+VjtvIlOvHUP7snXDDv4EJUX6ZhH1lhQyokbKx8e0ayTjYxzJPf2bYTTA37Fn2F9WrFqVTL12DE17g1cwFGTCsWi904h6SgoZUSPlLTLtZXyMQwkL8GBkB23bgg/X1kKrjKzq6xiMJmg7Vjve94O+WUS9JYWMqJG90iLjsO4b0ByA5btOkZCWZ90Xl64lx1E+e+ngL1BcR9tbCHEeKWREtWXmF5OQlg9AWylkHE6HJr70b9mQUrPivTVHrffC568hI11L9i/sKvBpok2nj12pdxpRD0khI6ptT6LWGhMW4I6fh4vOaURteHBwCwC+25Zovc0kc89AST5gAN8m1nlNoR+jETrcqB3v/lbfLKJekkJGVNuuxAwAOjXx0zWHqD29mjWgZ0QARaVmPrDWDKby1hifRuDkap3XFPrqNFG7Pvw75KXpm0XUO1LIiGrblZABSCHj6KZdrbXKfLU5ntScwpq/YPpx7VoG+jqOoCgI6QDmYhn0K+qcFDKi2naXdS11CvPTN4ioVQNaNqRjE1/yi0v5ZH1czV9QNot0TB3LWmWke0nUMSlkRLUkZxaQnFWA0SB7LDk6g8HAtLKxMp9tOEFmfg331ZHNIh1T+xvAYISEzZBmhYJXiEqSQkZUS/n4mFbB3rKibz1wTVQwrYO9yS4s4bMNx2v2YumyhoxD8gmFyIHasbTKiDokhYyolt0y0LdeMRoNPDBYW1fm4/Vx5BaWVP/F0soGDftHWiGZsCnlg353f6NNsxeiDkghI6plV4I2PqZjmKzoW1+M6tiIyIaeZOQV8+nG49V7kZIiyEzQjhs0t1o2YSPajAJnD0g7qnUxCVEH7KqQmTVrFgaDgYcffljvKPWa2aykRaYeMhkN/LtsBtP7a46RVVCNsTLpx0GZwdlT26NHOBZXL2hXtpHk9k/1zSLqDbspZLZu3cr7779Px44d9Y5S7x1PzSWroARXJyOtQ7z1jiPq0NjOjWke6ElmfjGfrKvGgM7ybqWAZmAwWDecsA3dJmvX+5ZCfoauUUT9YBeFTE5ODpMmTeLDDz/E399f7zj1Xvm063aNfHA22cWvkLASk9HAo9e0BuDjtXGk5xZV7QXSyrY6aNDMysmEzWjSAwKjtNWb9y7RO42oB+ziW2jatGlcd911DB069IqPLSwsJCsrq8JFWFdM2UJ4HaVbqV4a0T6EqFAfsgtL+KCqO2OnlhUyATI+xmEZDND1Du1YupdEHbD5Qmbx4sXs2LGDmTNnVurxM2fOxNfX13IJCwur5YT1T/nU686yEF69ZDQaeOyaVgAsWn+cM9lVWO23vGtJBvo6tk4TweQCybvh1E690wgHZ9OFTEJCAtOnT+fLL7/Ezc2tUs95+umnyczMtFwSEhJqOWX9UlxqZt8prZVLVvStv4ZEBdEpzI/84lIWRFdhZ+zyrqUA6VpyaB4BEDVGO5ZWGVHLbLqQ2b59O6dPn6Zr1644OTnh5OTEmjVrePvtt3FycqK0tPSC57i6uuLj41PhIqznUHI2RSVmfNyciGjgoXccoRODwcDj12qtMl9sPkFSZv6Vn1RSCJmJ2rF0LTm+8u6lPUugMEffLMKh2XQhM2TIEPbs2UNMTIzl0r17dyZNmkRMTAwmk0nviPWOZcfrMD8MMuukXuvXoiG9IgMoKjHz9qrDV35C+glt6rWLF3gF1X5Aoa+I/lrBWpQNu77WO41wYDZdyHh7e9O+ffsKF09PTxo0aED79u31jlcv7bIM9JWF8Oo7g8HAE8O0GUzfbE3gQNIVBtZbupUiZep1fWA0Qs+p2vHm98Fs1jePcFg2XcgI21O+oq8shCcAukcEcF2HUMwKXv5lP+pyy9Jb1pCRbqV6o8skcPWB1MNw9C+90wgHZXeFTHR0NHPnztU7Rr2UW1jC4dPZgAz0Fec8NaINLk5G1h9J5c8Dpy/9wPKp1zJjqf5w9YYut2nHmxfom0U4LLsrZIR+9p7MxKwgxMeNYJ/KzSITji8swIN7+mkbQP7vl/0UlVyiC0FmLNVPPacCBjjyJ5yJ1TuNcEBSyIhKK1/RV8bHiH96YHALGnq5cjw1j88utaGkdC3VTwGR0HqEdrz5PX2zCIckhYyotB3x6QB0buqnbxBhc7xcnfhP2cDft1YdJjXnH4vknT/1WrqW6p9e/9Kud30NeWn6ZhEORwoZUWk74zMA6NpU9rsSF7qhWxPaNfIhu6CEOX/+owuhfNdrFy/wDNQln9BR5AAIbg/FebDlQ73TCAcjhYyolFMZ+SRnFWAyGqRrSVyUyWhgxqi2AHy1OZ69JzPP3Sm7XtdvBgP0f1Q73jQfCrP1zSMcihQyolLKu5XahHjj4eKkcxphq65q1oBRHbXp2P9duodSc9l0bJmxJNqOgwYtoCADtn6sdxrhQKSQEZWy40QGIN1K4sqeHdUWb1cndidm8sWmE9qNabLrdb1nNEH/x7Tjje9AUZ6+eYTDkEJGVMrOBK1Fpmu4n75BhM0L8nHjP8O1gb+v/3aIlKyCil1Lov7qcBP4NYXcM7DjM73TCAchhYy4osKSUvad1JaflxYZURm39gqnU5gfOYUlvPjTfjh7RLtDupbqN5Mz9HtEO17/ljabTYgakkJGXNHek1kUlZoJ8HShaYDseC2uzGQ08Mr17TEZDazeEwdZZVOvG7bSN5jQX+dJ4N0Isk/Btk/0TiMcgBQy4op2lg307dpUdrwWldeukS939omgueEUAMojEDwCdE4ldOfkCgP/ox2veQ0KMi//eCGuQAoZcUXl68d0kW4lUUWPXNOKHp7a/ksJpjCd0wib0eV2rXUuPw3WzdU7jbBzUsiIKyqfet1FVvQVVeTp6sRtLbRxEGszAjiULOuHCMDkBEOf1443zYfMk7rGEfZNChlxWUmZ+SRlFmA0QKcmfnrHEXaoGdqX1BFzKM8s3YO5fG0ZUb+1HglNe0NJAUS/oncaYcekkBGXVd6t1CbEB09XWQhPVMNZbbuCRFMY206k8822BJ0DCZtgMMA1L2nHMV9B8l598wi7JYWMuKwdJ2T9GFEDpcWWNWSu7t8fgFdWHNDWlhEirIe24q8ywy+PgdmsdyJhh6SQEZe1MyEDgC5hMtBXVEPaMTCXgLMnN199FZ3C/MguKOGZpXtRSrqYBDDsf+DsCQmbIOYLvdMIOySFjLikohIze8o2/usaLoWMqIYzh7Trhi0xmYy8dkNHnE0G/jyQws+7k/TNJmyDbxMY/F/t+I9nIfesvnmE3ZFCRlzSvlOZFJWY8fdwJqKBLIQnquHMQe06KAqA1iHeTBvcAoDnl+8jLbdIr2TClvT6FwR3gPx0rZgRogqkkBGXdP76MbIQnqiW0/u167JCBuCBQS1oE+JNam4RL/y0T6dgwqaYnGDUHMAAMV9C3N96JxJ2RAoZcUk7zlvRV4hqOX1Auw5qa7nJxcnIqzd0xGiAH2NOsepAik7hhE0J6wHd79SOlz0gK/6KSpNCRlxSeYuMbBQpqqWkEFLLNos8r0UGoFOYH/f013bC/u/SPWTkSReTAK55EfzCITMBfn1K7zTCTkghIy4qJauAkxn5GA3QMcxP7zjCHqUe0WYsufqAT+ML7n70mlY0a+hJSlYh/7dMZjEJwNUbrn8fMMCur+DAT3onEnZAChlxUeUbRbYK9sZLFsIT1WHpVorSFj/7BzdnE3MmdMbJaODn3Un8GHOqjgMKmxTeG/pO145/mg7Z0vUoLk8KGXFRO8q7lWTataguy0Dftpd8SKcwPx4a0hKAGT/u5WRGfl0kE7Zu8H8huD3kpcLSqWAu1TuRsGFSyIiLsqzoK+NjRHVdZKDvxTwwqDldmmoL5T32bYzsxSTAyRVu+BicPeBYNPz9ut6JhA2TQkZc4PyF8GTHa1FtKWVTq4PaXPZhTiYjcyd0xsPFxKZjaXy07lgdhBM2L6hN2ZRsIHoWHP1L3zzCZkkhIy5wICmLwhIzfh7ONGvoqXccYY8KMiHjhHYc3P6KDw9v4Mmzo7SWmzd+i2XvSZl6K4BOE6HrZEDB9/dCloyjEheSQkZcoHz9mC5hfrIQnqie8tYYnybgEVCpp0zoEcY1bYMpKjXz4Fc7yC4orsWAwm6MeBVCOkDeWVhyF5SW6J1I2BgpZMQFZP0YUWPJe7XrkA6VforBYOD1GzvS2M+d46l5PPX9HpmSLcDZHW76FFy8IX4j/PWi3omEjZFCRlzA0iIjhYyoruTd2nUVChkAPw8X5t3aBSejgV/2JPHxurhaCCfsToPmMO5d7Xj9W3Bwhb55hE2RQkZUcDqrgMT0fAwG6BTmq3ccYa+S92jXIVceH/NPXZv689+R2krAr6w4wOpDp62ZTNirtmOh1/3a8bJ/QfpxXeMI2yGFjKhgW9m06zYhPni7OeucRtil0pJzU6+r2CJT7s6+EdzcvQlmBf/+aiexKdlWDCjs1jUvQpMe2mDybydDcYHeiYQNkEJGVLD1eBoAPSKkW0lUU+phKC3UxjT4RVTrJQwGAy+P60DPyAByCku4+9OtpOYUWjensD9OLnDjQnAPgKQY+O2/eicSNkAKGVHB9rIWmW6yoq+orvO7lYzV/yfGxcnIe7d1o2mABwlp+fzri+0UlsgKr/WeXxiM/xAwwLaPYfd3eicSOpNCRljkFpaw71QWAD0iKjdlVogLnIrRrkM61vilAjxd+GRKd7zdnNh6PJ2nf5CZTAJoORQGPKEd//QQnD6obx6hKylkhMWuhAxKzYpGvm408nPXO46wV6d2ateNuljl5VoEefPurV0xGQ38sOMk764+YpXXFXZu0FMQORCK82DJnVAs+3TVV1LICIutx7Vupe7SGiOqy1wKSbu0YysVMgADWgXy/Jh2ALzxeyw/7ZIVXus9owlu+Ag8A7UNSv94Tu9EQidSyAiLbSe0gb7dZaCvqK6zh6E4F5w9oWFLq7707VeFc3e/SAAe+24X28t+X0U95hUE4xZox1veh9jf9c0jdCGFjACg1KwsK/p2D5cWGVFN5d1KoZ20/zFb2X9HRjE0KpiiEjP3frad+NQ8q7+HsDMtr4Fe/9KOf3wAcmTdofpGChkBwMHkLHIKS/B2daJ1iLfecYS9svL4mH8yGQ28fUtn2jf2IS23iDsXbSFL9mQSQ1+AoHaQewZ+nAYyILxekUJGAOemXXcJ98dklI0iRTWVFzKNu9baW3i4OPHx5B6E+rpx9Ewuj36zC7NZvrjqNWc3bbyMyRUO/w5bPtA7kahDUsgI4LyBvrJ+jKiukqJzeyyFdq7Vtwr2ceP927vh4mTkzwMpLFhztFbfT9iB4LZw7cva8e8zIGW/vnlEnZFCRgCw/bgM9BU1lLIXSgrA3V/b5K+WdWzix0tjy2cyHeLv2DO1/p7CxvW8F1oO01aW/mEqlMhq0PWBFDKCkxn5nMoswGQ00DnMT+84wl4lbtWum/QAQ910T07o0ZSJPcJQCqYv3kliugz+rdcMBhgzDzwaQMoeWP2K3olEHZBCRrCtrDWmfSMfPFycdE4j7FbCFu26SY86fdvnx7SjQ2Nf0vOKeeDLHRQUyzYG9Zp3MIx+Szte/xac2KBvHlHrpJARbDtevr+STLsWNXB+i0wdcnM2seC2rvh7OLM7MZPnl++r0/cXNihqNHS+DVCw9D4oyNI7kahFUsgItpXNWJIdr0W15ZyGjBOAARp3q/O3b+Lvwdu3dMFggMVbE1i8Jb7OMwgbM3wm+DWFjHhY+bTeaUQtkkKmnssqKOZgsva/lW5SyIjqKu9WCooCNx9dIvRvGcjj17YG4Nnl+9idmKFLDmEj3Hzg+vcBA8R8AQd+0juRqCVSyNRzO+MzUArCG3gQ5O2mdxxhrxI2a9d13K30T/cPbH7eyr/bOJUhGwnWa+F9oO907fin6ZCdom8eUStsupCZOXMmPXr0wNvbm6CgIMaNG8ehQ4f0juVQNh9LBWRbAlFD8Ru16/A+usYwGg28OaETLYO8SMkqZMrCLWTmy8q/9drgZyC4A+SlwvJ/y6q/DsimC5k1a9Ywbdo0Nm3axB9//EFxcTHXXnstubm5ekdzGJvKCpmrmkkhI6qpKPfcir46FzIAPm7OLLyzB0HersSm5DD1s23kF8lMpnrLyQXGf1C26u9vsH2R3omEldl0IbNy5UqmTJlCu3bt6NSpE4sWLSI+Pp7t27frHc0h5BaWsDsxE4CrmjXQOY2wW4lbwVwCvmHa4Eob0MTfg4V39sDL1YnNcWnctWgreUUlescSegluC0Of045/+y+kykrQjsSmC5l/yszUvnQDAi7delBYWEhWVlaFi7i47SfSKTErGvu5ExbgoXccYa/K1+lo2lvfHP/QrpEvn96lFTMbj6Vy58KtssFkfdbrfojoD8V52pTsUilsHYXdFDJms5mHH36Yvn370r59+0s+bubMmfj6+louYWFhdZjSvmws61bq3VxaY0QNlBcyNtCt9E/dwgP49K6elpaZGxdsICFNVv+tl4xGGLcAXH21VsR1c/ROJKzEbgqZadOmsXfvXhYvXnzZxz399NNkZmZaLgkJCXWU0P6cGx8jhYyoppLCcwvhhffVN8sldAv3Z/HUqwj20cbMXD9/PRuOntU7ltCDXxhc94Z2vGYWnJRhCo7ALgqZBx98kJ9//pnVq1fTpEmTyz7W1dUVHx+fChdxofPHx/SKlIG+opoStmgbRXoGQcOWeqe5pPaNfVk2rS9RoT6czSli0kebeeO3QxSXmvWOJupah5ug3fXauK5vp0Bemt6JRA3ZdCGjlOLBBx9k6dKl/PXXX0RGRuodyWFsO5FOqVnRxF/Gx4gaiFujXTcbWGcbRVZXqK8739/fmwndtU0m31l9hDHvrGdXQobe0URdMhhg1Fzwj4TMePj+HjDLrDZ7ZtOFzLRp0/jiiy/46quv8Pb2Jjk5meTkZPLzZZGrmtp4VLqVhBUcKytkIgfqm6OSPFycePXGjsy7pQu+7s4cSMri+vnrefbHvWTkFekdT9QVdz+Y8AU4ucPRVbDmVb0TiRqw6UJmwYIFZGZmMmjQIEJDQy2Xb775Ru9odm/dkTMA9JGBvqK6CrLOjTFoZh+FTLnRnRqx6rGBjOvcCLOCzzaeYNAb0Xy+8Tgl0t1UP4S0P7dL9ppX4eAKffOIarPpQkYpddHLlClT9I5m19Jyi9h3SpuW3q9FQ53TCLt1YgOoUq2J3kbWj6mKhl6uzJ3Yha/u6UXrYG8y8oqZ8eM+Rs1bJ4OB64tOE6DHvdrx93fDyR365hHVYtOFjKgd64+cRSloE+JNkI/srySq6dhq7drOWmP+qU+LhvzyUD9eGtsOPw9nDiZnc+uHm7n/i+0yVbs+GD4Tml+trS/z1QRIP6F3IlFFUsjUQ2sPa91K/VtKa4yogcN/aNfNh+ibwwqcTEZu7x1B9OODmNw7HJPRwK97kxn65ho+XheH2Sz78zgskzPc9Km2H1PuafjyRshP1zuVqAIpZOoZpRTrDmvN5v1aBuqcRtittGOQdhSMTtBskN5prMbPw4UXxrZnxUP96d2sAYUlZl76eT+3fLhJdtJ2ZG4+MOlb8GkMZ2Ph8/FSzNgRKWTqmWNnczmVWYCLyUjPCFk/RlTT4T+166a9tS8BB9M6xJuv7u3F/65vj4eLic1xaYx8ey2rD57WO5qoLT6NYNIScA+AUzvgs7GyxoydkEKmnlkbq3Ur9Yj0x93FpHMaYbeOlHUrtRiqb45aZDAYmNQrnF+n96dDY18y8oq5c9FWZv16UBbSc1TBbWHKz+DREJJ2wadjIFcGfts6KWTqmXVHyrqVWki3kqim4nyIW6sdt7xG3yx1ILyBJ0vu783k3uEAvLfmKLd8sImkTOlqckjB7WDKL+AVDCl74KMhkLJP71TiMqSQqUcKiksthcyAVjLQV1TTsWgoyQefJhDUVu80dcLVycQLY9szf1JXvF2d2HYinZFvrWX1IelqckhBbWDKCm1ZgfTj8NE1sG+Z3qnEJUghU49sPJpKQbGZRr5utA11vHENoo4c/EW7bjPS5rclsLaRHUL5+aF+tG/sQ3peMXcu3MrMXw9QWCJL3Duchi1g6hpt1eriXPhuMvz2DBQX6J1M/IMUMvXInwdSALg6KghDPfsCElZiLoXYldpx65H6ZtFJeANPlvyrD7dfpXU1vb/mGNe9vY4d8TLLxeF4BMBtP0DvB7WfN74D7/WDI6v0zSUqkEKmnlBK8VfZjIshbYJ1TiPsVuJWyD0Drr4Q0U/vNLpxczbx0rj2vHdbNxp6uXLkdA43LNjA49/tIiVL/sfuUExOMOx/MPFrbdxM6mH4Yrw2qynub1CyxpDepJCpJ/YnZZGUWYC7s4nesr+SqK6DP2vXra7VFhKr54a3D+GPRwZwQ9cmKAVLticy6PVo3vrzMPlF0t3kUNqMhGmb4aoHtPWTjkXDp6PhnR6w+hU4vh5KZONRPRiUcuxyMisrC19fXzIzM/Hxqb/jQuatOszsP2IZGhXMR5O76x1H2COl4K2OkBGvrYTabpzeiWzKzvh0Xvp5PzviMwAI9XXjqRFtGNOpkXTlOpr047BhHsR8pW1tUM7ZA0I7a+NrGrSAgObgH6FdXL30yWrHKvv9LYVMPTH23fXsSshg1vgOTOxpfxv8CRuQuB0+ulr7x/qJo+DioXcim6OU4ufdScz69SAny1YC7trUj+dGt6NTmJ++4YT1FWbD/h+1MTNxf0PeZdac8QqB0E4Q0RdaXgtBUXWX005JIVNGChk4lZFPn1l/YTDA5qeHyEaRonp+e0Yb7NhuPNy0UO80Nq2guJSP18Xx7uoj5JV1Md3QtQlPDm8tf/8clVJw+oC25kzqYTh7GNLjtNabi2130KgLXDUN2t8ARhnlcTFSyJSRQgY+WnuMl385QM+IAL79V2+94wh7pBTMaQ9ZiXDz59B2jN6J7EJKVgGvrjzIDztOAuDl6sR/hrdmUi9tY0pRT+RnwJlDcHI7HP1LG19jLtbuC+4AY96Gxl31TGiTKvv9LWVgPfDz7iQARnUK1TmJsFsJm7UixsWrXqzmay3BPm68eXNnlk3rS6cwP3IKS3j2x32MX7CB/aey9I4n6oq7HzTtBb0fgNuWwKMHYPAz4OpTtnrwUFg7W2ZAVZMUMg4uIS2PmIQMDAZthoUQ1bJrsXYdNRqc3fXNYoc6h/nxw/19eGlce7xdndiVkMHod9Yxc8UB8opK9I4n6ppXIAz8DzwUA+2uB1UKq16Epf+SmU/VIIWMg1uxR2uN6RUZQJC39M2LaigphH1LteNOE/XNYsdMRgO3XxXOn48N5LoOoZSaFe//fYxr3vxbdtWurzwbwE2LYNQcMJhg92JYcieUFuudzK5IIePgfikrZEZ1bKRzEmG3Yn+DggzwbgQR/fVOY/eCfdx4d1JXPpnSncZ+7pzMyOfORVuZ9uUOWUyvvup+F9z6DZhctLWalt4HZtlhvbKkkHFgcWdz2Z2YiVG6lURN7Ppau+54MxhN+mZxIFe3CeaPRwcwdUAzTEYDv+xJYujsNXy+8TilZhkrUe+0vAYmfAlGZ9j7PUTP1DuR3ZBCxoF9uy0BgAGtAmno5apzGmGXspK0FhmAzrfqm8UBebg48d+RUSx/UBsMnF1Ywowf93HDgg3sPZmpdzxR11pdC6Pnasd/v3auS1dclhQyDqqk1MyS7YkATOgepnMaYbdivtAGIjbtDYGt9U7jsNo18uWH+/vw4th2eLk6EVM2GPixb3eRlJmvdzxRl7rcBn3+rR3/+G9IPapvHjsghYyDWn3oDGeyC2ng6cKQKNkkUlSD2Qw7PtOOu07WN0s9YDIauKN3BH8+OpDRnRqhFHy/Q9u76bWVB8kqkAGg9caQ57X/PBRly+DfSpBCxkF9s1XrVhrftTEuTvIxi2o4ukrbV8nVF9qO1TtNvRHi68a8W7qwbFpfekYGUFhiZn70UQa9Hs3C9XEUlshmlA7P5AQ3fgLu/pC0C9bN0TuRTZNvOAd0OquA1Ye06ZwTeki3kqimze9r110myb5KOugc5sc3U6/iwzu60zzQk7TcIl74aT9D31zDsp0nMcuAYMfm0whGvK4dr3kNkvfqm8eGSSHjgL7YHE+pWdE93J8WQd56xxH26OwROPIHYICe9+qdpt4yGAxc0zaY3x4ewMzxHQjydiUhLZ+Hv4lh1Lx1rIk9g4PvMlO/dbgRWl+nbWfw4wPSxXQJUsg4mPyiUj7feByAO/tG6htG2K8tH2jXrYZDQDN9swicTEZu6dmUNU8M5olhrfF2dWJ/UhaTP9nCpI82syshQ++IojYYDDDqTXDz07qY1s/VO5FNkkLGwSzZkUh6XjFhAe4MayeDfEU15KXBzs+141736ZtFVODuYmLa4Bb8/Z/B3NMvEheTkQ1HUxn77nqmfbmD42dz9Y4orM07BEa8ph1HvwpnYvXNY4OkkHEgpWbFx2uPAXBX30icTPLximrY8gEU50FIR2g2SO804iL8PV34v1Ft+evxgYzv2hiDQVvF+5o5a5j160FyCmX/JofS8WZoea3WxfTTdFn19x/km86B/LE/heOpefi4OXGzrB0jqqMo99wg334Pa03bwmY18ffgzZs7s+Kh/gxoFUhxqeK9NUe5+o1olu5MlPEzjsJggJFvgLMHxG8412IqAClkHIbZrJj7p9bkeNtV4Xi6OumcSNilbZ9Afhr4R0CUTLm2F1GhPnx6Zw8+ntyd8AYenM4u5JFvdnHDgg3sSZQVgh2CfzgMfkY7/mMG5MhGo+WkkHEQy3ed4mByNt5uTkwdIIMzRTUU5pxbr6L/49paFsJuGAwGhkQF8/sjA/jP8NZ4uJjYEZ/BmHfX8dT3uzktG1Lav17/gtBOUJAJK5/WO43NkELGARSXmnnzD6015r4BzfDzcNE5kbBLWz6AvFRtllKnW/ROI6rJ1cnEA4Na8NdjgxjXWVshePHWBPq/tpqXft4vO2zbM5MTjH4LDEbYuwQO/6l3IpsghYwD+GZrAvFpeTT0cpEp16J68tLOTe0c+JS0xjiAEF835k7swpJ/9aZbuD+FJWY+XhdH31l/8eBXO1h3+CzFpTJo1O406gK97teOf3lEG9dWz0khY+cy8oqY/fshAB4c3ELGxojqWfOq1lwd3F5bhEs4jO4RASz5V28+vasnPSL8KTErft6dxG0fb6bH//7kie928dfBFNn6wJ4M/i/4hmlbiETP0juN7gzKwYe1Z2Vl4evrS2ZmJj4+PnrHsbr/Lt3DV5vjaR3szc8P9cNZplyLqjp7BOb3AnMJ3L4Mmg/WO5GoRftOZfLl5nh+25tMam6R5XYvVycGtg7k2rbBDGodhK+7s44pxRUdWglfTwCDCaZGQ2hHvRNZXWW/v6WQsWO7EjIYN389SsE3U6+iV7MGekcS9kYp+HwcHIvW1qmY9J3eiUQdKTUrtsSlsXJvEiv3JZOSVWi5z8lo4KpmDRjRIYTh7UJo4OWqY1JxSd/eAft/hEZd4Z4/wWjSO5FVSSFTxlELmeJSM9fPX8/ek1mM79KYNyd01juSsEd7lsD3d4PJFaZtku0I6imzWbH7ZCa/70vmj/0pHD6dY7nPZDTQp3kDrusQyrB2Ifh7ymQCm5GdDO/0hMJMGP4qXPUvvRNZlRQyZRy1kJnzRyxvrTqMr7szfzw6gCBvN70jCXuTexbmXwW5Z2Dw/8HAJ/ROJGzEsTM5/LYvhRV7kthz8tw6NCajgb4tGjKqQyjXtguWGZK2YOvH8Muj4OIF0zaDbxO9E1mNFDJlHLGQ2ZOYybj56yk1K96+pQtjOjXSO5KwN0rBN7fBwZ8hqK3Wx+4k3QfiQsfP5vLLniR+2Z3E/qQsy+1ORgN9WjRkcOtABrYKJLKhJwZZCbrumc2wcDgkbIbmQ2DSEjA6xlhJKWTKOFohk1dUwph31nPkdA7XdQzl3Vu76h1J2KOdX8CP08DoDPf+5ZADBYX1HTuTw4o9Sfy8O4mDydkV7gsLcGdgq0AGtAykT4uGeMkMyrpz+iB8MAhK8mHIs9D/Mb0TWYUUMmUcqZBRSvHot7tYuvMkgd6u/PbwAAKkv1pUVfIe+GgolBQ41D96om4dOZ3DqgMp/H34DFvi0iguPfdV4mwy0DnMj16RDejVLIBu4f54uEhhU6t2fA7LH9QWy5vyC4T30TtRjUkhU8aRCpkvN5/gmaV7MRkNfHVPL5mlJKouNxU+GgLpcdDiGrj1W4dphhb6yS0sYdOxVNbEnmFN7BlOpOZVuN/JaKBDE196RgbQKzKAjk38aCgzoaxLKVj6L9i9GLxD4V/rwLOh3qlqRAqZMo5SyGw7nsatH26mqNTM0yPacN/A5npHEvamOB8+G6v1pfs1halrwCNA71TCAZ1IzWXTsVQ2H0tjc1waJzPyL3hMI183OjTxpUNjXzo08aNDY19pYa6pwhz4cDCcjYWI/nDbD+Bkv3+mUsiUcYRC5uiZHG5YsIGMvGKGtQvmvdu6yaA6UTUlhfDN7XD4N3Dzhbv/gMDWeqcS9URiel5ZUZPKthPpxJ3N5WLfPI393OkU5kunJn50bOJHhya+MtamqlL2w8fXQFEOdJwI178Hdvp9IYVMGXsvZE5nFXDDextISMunU5gfi++9CncXx1r0SNSy4nxYchccWgFObtqshsj+eqcS9Vh2QTH7TmWx92QmuxMz2XMyk7izF+4ZZDBA80AvOjXxo3NTP7qE+dE6xFtWML+SI3/ClzeDKoUe98DIN+yymJFCpow9FzKnMvKZ9NFm4s7m0jTAgx8e6CP9yqJqclNh8a2QsElb9O7WxdD8ar1TCXGBrIJi9iZmsisxk10JGexOzOBU5oU7dbs5G2nfyJf2jX1pHeJNq2BvWod4S8vNP8V8DcvuBxR0mwIjZ9vdZrBSyJSx10Lm+Nlcbvt4M4np+TT2c+fre6+iaQMPvWMJe5KwFb6bAlmJWnfSxK8gop/eqYSotDPZhexOzGBXQgY7EzKIScggu6Dkoo9t5OtGRENP7dLAg4gG2nHTAA/cnOtpK/bOL7VlFlDQchjc8KH2b4GdkEKmjD0WMn/sT+HRb2PILighooEHX957FY393PWOJexFcT6seQ3Wv6U1LQc004qYoCi9kwlRI2az4tjZXHYlZHAwOYuDydnEpmRX2CfqnwwGCPXRipzGfu6E+LoR5ONGiI8bwT6uhPi40cDLFZPR/rpeKmX/cvjhXm25Bb9wuOEjCOupd6pKkUKmjD0VMpn5xcz5I5ZFG44D0LWpH+/d1o0gH9l+QFRCaTHs/hZWv6K1wgC0vwFGzQU32/7dF6Im0nOLOHY2h7izeZxIzSXubC4nUvM4fjaX7MKLt+Ccz2Q0EOjlSrCPK8E+bmUX7TjU153G/u408nPD1clOW3ZObtdaZzPiAQN0mwyD/gvewXonuyyHKmTeffddXn/9dZKTk+nUqRPz5s2jZ8/KVZT2UMjkFpbww45E3lp1mLM5RQDc2TeCp0dE4eIkg9rEFaTFwZ7vYPsiyDqp3ebTGEa8ClGjdY0mhJ6UUqTlFnG8rKhJyswnJauQ5KwCTmcVkJxVwJnsQsyV/BYM9HalsZ9W2DQpu2583rW3m3PtnlBN5GfAyqdg19faz05u0OU2bfxMcHubHAzsMIXMN998wx133MF7771Hr169mDt3Lt999x2HDh0iKCjois+31UImp7CETUdTWX3oNMtjTln+19As0JPnR7djQKtAnRMKm1RSBGnH4PR+OL4Ojq3Wfi7nGQR9HoSeU8FZuiOFuJJSs+JsTiEpWQUVi5xMrdBJyizgZHo++cWlV3wtX3fnCoVNqK8b3m7OeLs5lV2c8Sm7dncx4epkxNXJWLfLaRxfD38+B4lbz93m11Tbp6n5YAjpqP1s1L/1yWEKmV69etGjRw/eeecdAMxmM2FhYfz73//mqaeeuuLza6uQ2Xsyk4S0PMwKzEphLvtjNCuF2axdKwWFJaVk5heTkVdMZn4xablFHDmTQ3xaXoV1FCIaeDClTwS39gqXVpj6oiALjvyhdQmVFmlrvZQflxZDUTbkpZVdUiH3tNY0bP5HU7nBpA3i7XwrtLteNn8UwsqUUqTnFXMyPZ+TGXkkpueTmJ7PyYz8stvyycwvrvbru5i0gsbV2Yirk1bguDgZcXU+V+y4OpnK7j/3mPMfX34xGg0YDQaMBjAYzh0bDQYMZddGoOHZzTSNW0zwyVUY1T+ym1yhQXPwDgHPQPBoCC6e4OymteSUX4wmbUsEgxEad9XG41lRZb+/bXouVlFREdu3b+fpp5+23GY0Ghk6dCgbN2686HMKCwspLDw38CszU9uCPisr66KPr65PVu9nyfbEGr1GY383+jZvyJCoYHo3a4DRaKAgL4cLJxwKh5R2DL68s+rPc/aEBi2gSTdt9c6wXufGwOQVApce+CiEqB4nINzHQLiPJzT1vOD+nMISkjLyOZWZT1JGPicztG6rnMJicgpKyS4sIaewmNyCEnIKSyk5rz+roOxS927EnVFMDDnJYy2S4cRGSD0KhQWQtw/YV/mXGjYTut5u1XTl39tXbG9RNuzkyZMKUBs2bKhw+xNPPKF69ux50ec899xzCpCLXOQiF7nIRS4OcElISLhsrWDTLTLV8fTTT/Poo49afjabzaSlpdGgQQObWdY/KyuLsLAwEhISbGrcTm2qj+cM9fO86+M5Q/087/p4zlA/z1uPc1ZKkZ2dTaNGjS77OJsuZBo2bIjJZCIlJaXC7SkpKYSEhFz0Oa6urri6Vhwj4OfnV1sRa8THx6fe/CUoVx/PGernedfHc4b6ed718Zyhfp53XZ+zr6/vFR9j06NKXVxc6NatG6tWrbLcZjabWbVqFb1799YxmRBCCCFsgU23yAA8+uijTJ48me7du9OzZ0/mzp1Lbm4ud95ZjUGSQgghhHAoNl/ITJgwgTNnzvDss8+SnJxM586dWblyJcHBtr0i4eW4urry3HPPXdAF5sjq4zlD/Tzv+njOUD/Puz6eM9TP87blc7b5dWSEEEIIIS7FpsfICCGEEEJcjhQyQgghhLBbUsgIIYQQwm5JISOEEEIIuyWFjBW8++67RERE4ObmRq9evdiyZcslH/vDDz/QvXt3/Pz88PT0pHPnznz++ecVHjNlyhQMBkOFy/Dhw2v7NKqsKud9vsWLF2MwGBg3blyF25VSPPvss4SGhuLu7s7QoUM5fPhwLSSvPmufsyN+1osWLbrgnNzc3Co8xtE+68qcsyN+1gAZGRlMmzaN0NBQXF1dadWqFStWrKjRa9Y1a5/z888/f8Fn3aZNm9o+jSqrynkPGjTognMyGAxcd911lsfo9vfaClsi1WuLFy9WLi4u6pNPPlH79u1T9957r/Lz81MpKSkXffzq1avVDz/8oPbv36+OHDmi5s6dq0wmk1q5cqXlMZMnT1bDhw9XSUlJlktaWlpdnVKlVPW8y8XFxanGjRur/v37q7Fjx1a4b9asWcrX11ctW7ZM7dq1S40ZM0ZFRkaq/Pz8WjyTyquNc3bEz3rhwoXKx8enwjklJydXeIyjfdaVOWdH/KwLCwtV9+7d1ciRI9W6detUXFycio6OVjExMdV+zbpWG+f83HPPqXbt2lX4rM+cOVNXp1QpVT3v1NTUCuezd+9eZTKZ1MKFCy2P0evvtRQyNdSzZ081bdo0y8+lpaWqUaNGaubMmZV+jS5duqj/+7//s/w8efLkC77wbE11zrukpET16dNHffTRRxeco9lsViEhIer111+33JaRkaFcXV3V119/XSvnUFXWPmelHPOzXrhwofL19b3k6zniZ32lc1bKMT/rBQsWqGbNmqmioiKrvWZdq41zfu6551SnTp2sHdWqavq5zJkzR3l7e6ucnByllL5/r6VrqQaKiorYvn07Q4cOtdxmNBoZOnQoGzduvOLzlVKsWrWKQ4cOMWDAgAr3RUdHExQUROvWrbn//vtJTU21ev7qqu55v/jiiwQFBXH33XdfcF9cXBzJyckVXtPX15devXpV6s+yttXGOZdzxM86JyeH8PBwwsLCGDt2LPv27bPc56if9eXOuZyjfdbLly+nd+/eTJs2jeDgYNq3b88rr7xCaWlptV+zLtXGOZc7fPgwjRo1olmzZkyaNIn4+PhaPZeqsMbn8vHHHzNx4kQ8PT0Bff9eSyFTA2fPnqW0tPSCVYaDg4NJTk6+5PMyMzPx8vLCxcWF6667jnnz5nHNNddY7h8+fDifffYZq1at4tVXX2XNmjWMGDHigr8oeqnOea9bt46PP/6YDz/88KL3lz+vqn+WdaU2zhkc87Nu3bo1n3zyCT/++CNffPEFZrOZPn36kJiYCDjmZ32lcwbH/KyPHTvGkiVLKC0tZcWKFcyYMYPZs2fz8ssvV/s161JtnDNAr169WLRoEStXrmTBggXExcXRv39/srOza/V8Kqumn8uWLVvYu3cv99xzj+U2Pf9e2/wWBY7I29ubmJgYcnJyWLVqFY8++ijNmjVj0KBBAEycONHy2A4dOtCxY0eaN29OdHQ0Q4YM0Sl19WVnZ3P77bfz4Ycf0rBhQ73j1InKnrOjfdYAvXv3rrCpa58+fYiKiuL999/npZde0jFZ7anMOTviZ202mwkKCuKDDz7AZDLRrVs3Tp48yeuvv85zzz2nd7xaUZlzHjFihOXxHTt2pFevXoSHh/Ptt99etnXWXnz88cd06NCBnj176h0FkEKmRho2bIjJZCIlJaXC7SkpKYSEhFzyeUajkRYtWgDQuXNnDhw4wMyZMy2FzD81a9aMhg0bcuTIEZv4B6+q53306FGOHz/O6NGjLbeZzWYAnJycOHTokOV5KSkphIaGVnjNzp0718JZVE1tnHPz5s0veJ69f9YX4+zsTJcuXThy5AiAw33WF/PPc74YR/isQ0NDcXZ2xmQyWW6LiooiOTmZoqIiq/xZ1qbaOGcXF5cLnuPn50erVq0u+/tQl2ryueTm5rJ48WJefPHFCrfr+fdaupZqwMXFhW7durFq1SrLbWazmVWrVlX439mVmM1mCgsLL3l/YmIiqampFX459FTV827Tpg179uwhJibGchkzZgyDBw8mJiaGsLAwIiMjCQkJqfCaWVlZbN68uUp/lrWlNs75Yuz9s76Y0tJS9uzZYzknR/usL+af53wxjvBZ9+3blyNHjliKdIDY2FhCQ0NxcXGx2r+RtaU2zvlicnJyOHr0qF1/1uW+++47CgsLue222yrcruvf61odSlwPLF68WLm6uqpFixap/fv3q6lTpyo/Pz/L1Mvbb79dPfXUU5bHv/LKK+r3339XR48eVfv371dvvPGGcnJyUh9++KFSSqns7Gz1+OOPq40bN6q4uDj1559/qq5du6qWLVuqgoICXc7xYqp63v90sRkcs2bNUn5+furHH39Uu3fvVmPHjrW5KbnWPGdH/axfeOEF9dtvv6mjR4+q7du3q4kTJyo3Nze1b98+y2Mc7bO+0jk76mcdHx+vvL291YMPPqgOHTqkfv75ZxUUFKRefvnlSr+m3mrjnB977DEVHR2t4uLi1Pr169XQoUNVw4YN1enTp+v8/C6luv+e9evXT02YMOGir6nX32spZKxg3rx5qmnTpsrFxUX17NlTbdq0yXLfwIED1eTJky0/P/PMM6pFixbKzc1N+fv7q969e6vFixdb7s/Ly1PXXnutCgwMVM7Ozio8PFzde++9NvOX/nxVOe9/ulghYzab1YwZM1RwcLBydXVVQ4YMUYcOHaql9NVjzXN21M/64Ycftjw2ODhYjRw5Uu3YsaPC6znaZ32lc3bUz1oppTZs2KB69eqlXF1dVbNmzdT//vc/VVJSUunXtAXWPucJEyao0NBQ5eLioho3bqwmTJigjhw5UlenU2lVPe+DBw8qQP3+++8XfT29/l4blFKqdtt8hBBCCCFqh4yREUIIIYTdkkJGCCGEEHZLChkhhBBC2C0pZIQQQghht6SQEUIIIYTdkkJGCCGEEHZLChkhhBBC2C0pZIQQQghht6SQEcKBRUREMHfuXMvPBoOBZcuW1XmO559/3iY2hKwt0dHRGAwGMjIy9I4iRL0jhYwQ9UhSUhIjRoyo1GMdvfgQQjgGKWSEsHFFRUVWe62QkBBcXV2t9nrCPlnzd0oIvUkhI0QdGjRoEA8++CAPPvggvr6+NGzYkBkzZnD+lmcRERG89NJL3HHHHfj4+DB16lQA1q1bR//+/XF3dycsLIyHHnqI3Nxcy/NOnz7N6NGjcXd3JzIyki+//PKC9/9n11JiYiK33HILAQEBeHp60r17dzZv3syiRYt44YUX2LVrFwaDAYPBwKJFiwDIyMjgnnvuITAwEB8fH66++mp27dpV4X1mzZpFcHAw3t7e3H333RQUFFz2z6W0tJS7776byMhI3N3dad26NW+99VaFx0yZMoVx48bxxhtvEBoaSoMGDZg2bRrFxcWWx6Snp3PHHXfg7++Ph4cHI0aM4PDhw5b7Fy1ahJ+fHz///DOtW7fGw8ODG2+8kby8PD799FMiIiLw9/fnoYceorS01PK8zz//nO7du+Pt7U1ISAi33norp0+fvui55Obm4uPjw5IlSyrcvmzZMjw9PcnOzr7o85YsWUKHDh1wd3enQYMGDB06tMLn+8knn9CuXTtcXV0JDQ3lwQcftNwXHx/P2LFj8fLywsfHh5tvvpmUlBTL/eWtax999BGRkZG4ubkBlfsshbB5tb4tpRDCYuDAgcrLy0tNnz5dHTx4UH3xxRfKw8NDffDBB5bHhIeHKx8fH/XGG2+oI0eOWC6enp5qzpw5KjY2Vq1fv1516dJFTZkyxfK8ESNGqE6dOqmNGzeqbdu2qT59+ih3d3c1Z84cy2MAtXTpUqWUUtnZ2apZs2aqf//+au3aterw4cPqm2++URs2bFB5eXnqscceU+3atVNJSUkqKSlJ5eXlKaWUGjp0qBo9erTaunWrio2NVY899phq0KCBSk1NVUop9c033yhXV1f10UcfqYMHD6pnnnlGeXt7q06dOl3yz6WoqEg9++yzauvWrerYsWOWP5dvvvnG8pjJkycrHx8f9a9//UsdOHBA/fTTTxf82Y0ZM0ZFRUWpv//+W8XExKhhw4apFi1aqKKiIqWUUgsXLlTOzs7qmmuuUTt27FBr1qxRDRo0UNdee626+eab1b59+9RPP/2kXFxcKuxK//HHH6sVK1aoo0ePqo0bN6revXurESNGWO5fvXq1AlR6erpSSql7771XjRw5ssI5jhkzRt1xxx0XPf9Tp04pJycn9eabb6q4uDi1e/du9e6776rs7GyllFLz589Xbm5uau7cuerQoUNqy5Ytls+1tLRUde7cWfXr109t27ZNbdq0SXXr1k0NHDjQ8vrPPfec8vT0VMOHD1c7duxQu3btqtRnKYQ9kEJGiDo0cOBAFRUVpcxms+W2J598UkVFRVl+Dg8PV+PGjavwvLvvvltNnTq1wm1r165VRqNR5efnq0OHDilAbdmyxXL/gQMHFHDJQub9999X3t7el/zSeu655y4oPtauXat8fHxUQUFBhdubN2+u3n//faWUUr1791YPPPBAhft79ep12ULmYqZNm6ZuuOEGy8+TJ09W4eHhqqSkxHLbTTfdpCZMmKCUUio2NlYBav369Zb7z549q9zd3dW3336rlNIKGUAdOXLE8pj77rtPeXh4WIoGpZQaNmyYuu+++y6ZbevWrQqwPOefhczmzZuVyWRSp06dUkoplZKSopycnFR0dPRFX2/79u0KUMePH7/o/Y0aNVLPPPPMRe/7/ffflclkUvHx8Zbb9u3bV+H34bnnnlPOzs7q9OnTlsdU5rMUwh5I15IQdeyqq67CYDBYfu7duzeHDx+u0JXRvXv3Cs/ZtWsXixYtwsvLy3IZNmwYZrOZuLg4Dhw4gJOTE926dbM8p02bNvj5+V0yR0xMDF26dCEgIKDS2Xft2kVOTg4NGjSokCUuLo6jR48CcODAAXr16lXheb17977ia7/77rt069aNwMBAvLy8+OCDD4iPj6/wmHbt2mEymSw/h4aGWrp4yv8Mzn/vBg0a0Lp1aw4cOGC5zcPDg+bNm1t+Dg4OJiIiAi8vrwq3nd91tH37dkaPHk3Tpk3x9vZm4MCBABfkK9ezZ0/atWvHp59+CsAXX3xBeHg4AwYMuOjjO3XqxJAhQ+jQoQM33XQTH374Ienp6YDWZXjq1CmGDBly0eceOHCAsLAwwsLCLLe1bdsWPz+/CucdHh5OYGCg5efKfJZC2AMnvQMIIS7k6elZ4eecnBzuu+8+HnrooQse27RpU2JjY6v8Hu7u7lV+Tk5ODqGhoURHR19w3+WKpitZvHgxjz/+OLNnz6Z37954e3vz+uuvs3nz5gqPc3Z2rvCzwWDAbDZX6b0u9hqXe93c3FyGDRvGsGHD+PLLLwkMDCQ+Pp5hw4ZddtDsPffcw7vvvstTTz3FwoULufPOOysUsOczmUz88ccfbNiwgd9//5158+bxzDPPsHnzZho2bFil87uUi/1O1cZnKURdkxYZIerYP7+cN23aRMuWLSu0NPxT165d2b9/Py1atLjg4uLiQps2bSgpKWH79u2W5xw6dOiy65p07NiRmJgY0tLSLnq/i4tLhVai8hzJyck4OTldkKP8CzcqKuqi53g569evp0+fPjzwwAN06dKFFi1aVLlVICoqipKSkgrvnZqayqFDh2jbtm2VXut8Bw8eJDU1lVmzZtG/f3/atGlzyYG+57vttts4ceIEb7/9Nvv372fy5MmXfbzBYKBv37688MIL7Ny5ExcXF5YuXYq3tzcRERGsWrXqos+LiooiISGBhIQEy2379+8nIyPjsuddmc9SCHsghYwQdSw+Pp5HH32UQ4cO8fXXXzNv3jymT59+2ec8+eSTbNiwgQcffJCYmBgOHz7Mjz/+aJm50rp1a4YPH859993H5s2b2b59O/fcc89lW11uueUWQkJCGDduHOvXr+fYsWN8//33bNy4EdBmT8XFxRETE8PZs2cpLCxk6NCh9O7dm3HjxvH7779z/PhxNmzYwDPPPMO2bdsAmD59Op988gkLFy4kNjaW5557jn379l32/Fq2bMm2bdv47bffiI2NZcaMGWzdurUqf6y0bNmSsWPHcu+997Ju3Tp27drFbbfdRuPGjRk7dmyVXut8TZs2xcXFhXnz5nHs2DGWL1/OSy+9dMXn+fv7M378eJ544gmuvfZamjRpcsnHbt68mVdeeYVt27YRHx/PDz/8wJkzZ4iKigK0WUezZ8/m7bff5vDhw+zYsYN58+YBMHToUDp06MCkSZPYsWMHW7Zs4Y477mDgwIEXdFGerzKfpRD2QAoZIerYHXfcQX5+Pj179mTatGlMnz7dMsX6Ujp27MiaNWuIjY2lf//+dOnShWeffZZGjRpZHrNw4UIaNWrEwIEDGT9+PFOnTiUoKOiSr+ni4sLvv/9OUFAQI0eOpEOHDsyaNcvSMnTDDTcwfPhwBg8eTGBgIF9//TUGg4EVK1YwYMAA7rzzTlq1asXEiRM5ceIEwcHBAEyYMIEZM2bwn//8h27dunHixAnuv//+y57ffffdx/jx45kwYQK9evUiNTWVBx54oLJ/pBX+DLp168aoUaPo3bs3SilWrFhxQddRVQQGBrJo0SK+++472rZty6xZs3jjjTcq9dy7776boqIi7rrrrss+zsfHh7///puRI0fSqlUr/u///o/Zs2dbFi+cPHkyc+fOZf78+bRr145Ro0ZZppUbDAZ+/PFH/P39GTBgAEOHDqVZs2Z88803l33PynyWQtgDg1LnLWAhhKhVgwYNonPnzhW2DRCO6/PPP+eRRx7h1KlTuLi46B1HCIckg32FEMLK8vLySEpKYtasWdx3331SxAhRi6RrSQghrOy1116jTZs2hISE8PTTT+sdRwiHJl1LQgghhLBb0iIjhBBCCLslhYwQQggh7JYUMkIIIYSwW1LICCGEEMJuSSEjhBBCCLslhYwQQggh7JYUMkIIIYSwW1LICCGEEMJu/T+FnNCGBBJoDwAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.kdeplot(predictions[filtered_dataset[\"Cover_Type\"] == \"Spruce/Fir\"], label=\"Spruce/Fir\")\n", + "sns.kdeplot(predictions[filtered_dataset[\"Cover_Type\"] == \"Cottonwood/Willow\"], label=\"Cottonwood/Willow\")\n", + "plt.xlabel(\"predicted anomaly score\")\n", + "plt.ylabel(\"distribution\")\n", + "plt.legend()\n", + "None" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The AUC is a metric used to evaluate classification models. It can also be used to quantify the discriminative power of any signal in separating two distinct classes. In the context of anomaly detection, we can use the AUC to quantify how much our anomaly detection model is able to isolate the minority class.\n", + "\n", + "The cover type information are not used to train the model and the dataset is considered static (i.e., the type of coverage does not change overtime). Therefore, we do not need to split the dataset between training and testing, and use all the data both for training the model and evaluate it with the AUC." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9427246186652949" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "metrics.roc_auc_score(filtered_dataset[\"Cover_Type\"] == \"Cottonwood/Willow\", predictions)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This high AUC confirms that the model is well able to separate the two cover types.\n", + "\n", + "We can also analyse the model to understand it: For instance, we see on the partial dependency plot of the elevation that the \"normal\" coverage is around 2900 and 3300 meters of altitude. Other similar conclusions can be taken by looking at other attributes." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.analyze(filtered_dataset, sampling=0.001) # Use larger sampling for better results" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can also interpret individual model predictions. For example, let's select the first Cottonwood/Willow example and generate a prediction:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ElevationAspectSlopeHorizontal_Distance_To_HydrologyVertical_Distance_To_HydrologyHorizontal_Distance_To_RoadwaysHillshade_9amHillshade_NoonHillshade_3pmHorizontal_Distance_To_Fire_PointsCover_Type
198820003187304108201234172268Cottonwood/Willow
\n", + "
" + ], + "text/plain": [ + " Elevation Aspect Slope Horizontal_Distance_To_Hydrology \\\n", + "1988 2000 318 7 30 \n", + "\n", + " Vertical_Distance_To_Hydrology Horizontal_Distance_To_Roadways \\\n", + "1988 4 108 \n", + "\n", + " Hillshade_9am Hillshade_Noon Hillshade_3pm \\\n", + "1988 201 234 172 \n", + "\n", + " Horizontal_Distance_To_Fire_Points Cover_Type \n", + "1988 268 Cottonwood/Willow " + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "first_willow_example = filtered_dataset[filtered_dataset[\"Cover_Type\"] == \"Cottonwood/Willow\"][:1]\n", + "first_willow_example" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0.5474113], dtype=float32)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.predict(first_willow_example)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, let's see how the model prediction would change with the feature values of this example:\n", + "\n", + "We see than the example elevation of 2000 is uncommon and explain some of the high prediction value. On the other hand, the example \"aspect\" and \"slope\" are relatively normal." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "
\n", + "
\n", + "\n", + "\n", + "
\n", + "\n", + "\n", + "
\n", + "\n", + "\n", + "
\n", + "\n", + "\n", + "
\n", + "\n", + "\n", + "
\n", + "\n", + "\n", + "
\n", + "\n", + "\n", + "
\n", + "\n", + "\n", + "
\n", + "\n", + "\n", + "
\n", + "\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.analyze_prediction(first_willow_example)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "List all the decision forest algorithms, our isolation forest model define an implicit distance between examples. This distance can be use to cluster the examples or interpretable mapping.\n", + "\n", + "Let's compute the distance between each pair of examples. To make the code run fast, we only select the first 10000 examples." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0. , 0.86 , 0.6766667, 0.85 ],\n", + " [0.86 , 0. , 0.9066667, 0.31 ],\n", + " [0.6766667, 0.9066667, 0. , 0.8833333],\n", + " [0.85 , 0.31 , 0.8833333, 0. ]], dtype=float32)" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "distances = model.distance(filtered_dataset[:10000]) # Use more examples for better results\n", + "distances[:4, :4]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can then use UMAP (or any other manifold learning algorithm such as T-SNE) to project the examples in a 2D plot.\n", + "\n", + "Note that the cover types are well separated despite the model having never seen them." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/gbm/my_venv/lib/python3.11/site-packages/umap/umap_.py:1858: UserWarning:\n", + "\n", + "using precomputed metric; inverse_transform will be unavailable\n", + "\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "manifold = umap.UMAP(n_components=2, n_neighbors=10, metric=\"precomputed\").fit_transform(distances)\n", + "sns.scatterplot(x=manifold[:, 0],\n", + " y=manifold[:, 1],\n", + " hue=filtered_dataset[\"Cover_Type\"][:manifold.shape[0]])\n", + "plt.legend()" + ] + } + ], + "metadata": { + "colab": { + "private_outputs": true + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/documentation/public/mkdocs.yml b/documentation/public/mkdocs.yml index 18128006..9c1b2144 100644 --- a/documentation/public/mkdocs.yml +++ b/documentation/public/mkdocs.yml @@ -56,6 +56,7 @@ nav: - Regression: tutorial/regression.ipynb - Ranking: tutorial/ranking.ipynb - Uplifting: tutorial/uplifting.ipynb + - Anomaly detection: tutorial/anomaly_detection.ipynb - Input feature: - numerical: tutorial/numerical_feature.ipynb - categorical: tutorial/categorical_feature.ipynb From 74026c8759c2e49bdb0e81d3de2cc8925cec08ab Mon Sep 17 00:00:00 2001 From: Mathieu Guillame-Bert Date: Tue, 18 Jun 2024 00:03:04 -0700 Subject: [PATCH 30/30] Release YDF 0.5.0 PiperOrigin-RevId: 644269898 --- yggdrasil_decision_forests/port/python/CHANGELOG.md | 8 +++++++- yggdrasil_decision_forests/port/python/config/setup.py | 2 +- .../port/python/tools/build_windows_release.bat | 2 +- .../port/python/ydf/dataset/dataset.py | 6 +++--- .../port/python/ydf/dataset/dataset_test.py | 1 - .../port/python/ydf/dataset/dataspec.py | 1 - .../port/python/ydf/utils/test_utils.py | 2 +- yggdrasil_decision_forests/port/python/ydf/version.py | 2 +- 8 files changed, 14 insertions(+), 10 deletions(-) diff --git a/yggdrasil_decision_forests/port/python/CHANGELOG.md b/yggdrasil_decision_forests/port/python/CHANGELOG.md index 77889aeb..195f4008 100644 --- a/yggdrasil_decision_forests/port/python/CHANGELOG.md +++ b/yggdrasil_decision_forests/port/python/CHANGELOG.md @@ -1,6 +1,6 @@ # Changelog -## HEAD +## 0.5.0 - 2024-06-17 ### Feature @@ -10,6 +10,12 @@ more convenient than`ydf.verbose`. - Add SKLearn to YDF model converter: `ydf.from_sklearn`. - Improve error messages when calling the model with non supported data. +- Add support for numpy 2.0. + +### Tutorials + +- Add anomaly detection tutorial. +- Add YDF and JAX model composition tutorial. ### Fix diff --git a/yggdrasil_decision_forests/port/python/config/setup.py b/yggdrasil_decision_forests/port/python/config/setup.py index 8727d908..3a78340f 100644 --- a/yggdrasil_decision_forests/port/python/config/setup.py +++ b/yggdrasil_decision_forests/port/python/config/setup.py @@ -21,7 +21,7 @@ from setuptools.command.install import install from setuptools.dist import Distribution -_VERSION = "0.4.3" +_VERSION = "0.5.0" with open("README.md", "r", encoding="utf-8") as fh: long_description = fh.read() diff --git a/yggdrasil_decision_forests/port/python/tools/build_windows_release.bat b/yggdrasil_decision_forests/port/python/tools/build_windows_release.bat index 816f382c..6256c9bb 100644 --- a/yggdrasil_decision_forests/port/python/tools/build_windows_release.bat +++ b/yggdrasil_decision_forests/port/python/tools/build_windows_release.bat @@ -34,7 +34,7 @@ cls setlocal -set YDF_VERSION=0.4.3 +set YDF_VERSION=0.5.0 set BAZEL=bazel.exe set BAZEL_SH=C:\msys64\usr\bin\bash.exe set BAZEL_FLAGS=--config=windows_cpp20 --config=windows_avx2 diff --git a/yggdrasil_decision_forests/port/python/ydf/dataset/dataset.py b/yggdrasil_decision_forests/port/python/ydf/dataset/dataset.py index 138ab6ff..e7ad9aa4 100644 --- a/yggdrasil_decision_forests/port/python/ydf/dataset/dataset.py +++ b/yggdrasil_decision_forests/port/python/ydf/dataset/dataset.py @@ -136,7 +136,7 @@ def _add_column( column_data.dtype.type in [ np.object_, - np.string_, + np.bytes_, np.str_, ] or column_data.dtype.type in dataspec.NP_SUPPORTED_INT_DTYPE @@ -215,7 +215,7 @@ def _add_column( if column_data.dtype.type in [ np.object_, - np.string_, + np.bytes_, np.bool_, ] or np.issubdtype(column_data.dtype, np.integer): column_data = column_data.astype(np.bytes_) @@ -648,7 +648,7 @@ def infer_semantic(name: str, data: Any) -> dataspec.Semantic: ): return dataspec.Semantic.NUMERICAL - if data.dtype.type in [np.string_, np.bytes_, np.str_]: + if data.dtype.type in [np.bytes_, np.str_]: return dataspec.Semantic.CATEGORICAL if data.dtype.type in [np.object_]: diff --git a/yggdrasil_decision_forests/port/python/ydf/dataset/dataset_test.py b/yggdrasil_decision_forests/port/python/ydf/dataset/dataset_test.py index b7f8e131..6928432a 100644 --- a/yggdrasil_decision_forests/port/python/ydf/dataset/dataset_test.py +++ b/yggdrasil_decision_forests/port/python/ydf/dataset/dataset_test.py @@ -48,7 +48,6 @@ class GenericDatasetTest(parameterized.TestCase): (np.array([1], np.float64), Semantic.NUMERICAL), (np.array([1], np.bool_), Semantic.BOOLEAN), (np.array(["a"], np.bytes_), Semantic.CATEGORICAL), - (np.array(["a"], np.string_), Semantic.CATEGORICAL), (np.array(["a", np.nan], np.object_), Semantic.CATEGORICAL), ) def test_infer_semantic(self, value, expected_semantic): diff --git a/yggdrasil_decision_forests/port/python/ydf/dataset/dataspec.py b/yggdrasil_decision_forests/port/python/ydf/dataset/dataspec.py index 9a0d8b29..d33cfa80 100644 --- a/yggdrasil_decision_forests/port/python/ydf/dataset/dataspec.py +++ b/yggdrasil_decision_forests/port/python/ydf/dataset/dataspec.py @@ -50,7 +50,6 @@ np.float32: ds_pb.DType.DTYPE_FLOAT32, np.float64: ds_pb.DType.DTYPE_FLOAT64, np.bool_: ds_pb.DType.DTYPE_BOOL, - np.string_: ds_pb.DType.DTYPE_BYTES, np.str_: ds_pb.DType.DTYPE_BYTES, np.bytes_: ds_pb.DType.DTYPE_BYTES, np.object_: ds_pb.DType.DTYPE_BYTES, diff --git a/yggdrasil_decision_forests/port/python/ydf/utils/test_utils.py b/yggdrasil_decision_forests/port/python/ydf/utils/test_utils.py index 802ba242..e61f79ef 100644 --- a/yggdrasil_decision_forests/port/python/ydf/utils/test_utils.py +++ b/yggdrasil_decision_forests/port/python/ydf/utils/test_utils.py @@ -175,7 +175,7 @@ def test_almost_equal(a, b) -> Optional[str]: if a.dtype != b.dtype: return f"numpy array type mismatch: {a} != {b}" - if a.dtype.type in [np.string_, np.bytes_, np.str_]: + if a.dtype.type in [np.bytes_, np.str_]: if not np.equal(a, b).all(): return f"numpy array mismatch: {a} != {b}" else: diff --git a/yggdrasil_decision_forests/port/python/ydf/version.py b/yggdrasil_decision_forests/port/python/ydf/version.py index d066b205..e782e81c 100644 --- a/yggdrasil_decision_forests/port/python/ydf/version.py +++ b/yggdrasil_decision_forests/port/python/ydf/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -version = "0.4.3" +version = "0.5.0"