From 516ff75ac9701ab6fd78be603c17ecf4d3f3dba8 Mon Sep 17 00:00:00 2001 From: Dmitry Sorokin Date: Fri, 9 Feb 2024 14:41:47 +0000 Subject: [PATCH 1/8] Fix masking Signed-off-by: Dmitry Sorokin --- kedro-telemetry/kedro_telemetry/masking.py | 24 ++++++++++++++-------- kedro-telemetry/kedro_telemetry/plugin.py | 15 +++++++------- 2 files changed, 24 insertions(+), 15 deletions(-) diff --git a/kedro-telemetry/kedro_telemetry/masking.py b/kedro-telemetry/kedro_telemetry/masking.py index fe5f0a3f6..cfe637e3e 100644 --- a/kedro-telemetry/kedro_telemetry/masking.py +++ b/kedro-telemetry/kedro_telemetry/masking.py @@ -72,22 +72,30 @@ def _get_cli_structure( return output +def _mask_unrecognized_arg(arg: str, vocabulary: Set[str]) -> str: + return arg if arg in vocabulary else MASK + + def _mask_kedro_cli(cli_struct: Dict[str, Any], command_args: List[str]) -> List[str]: """Takes a dynamic vocabulary (based on `KedroCLI`) and returns a masked CLI input""" output = [] vocabulary = _get_vocabulary(cli_struct) + mask_next = False for arg in command_args: if arg.startswith("-"): - for arg_part in arg.split("="): - if arg_part in vocabulary: - output.append(arg_part) - elif arg_part: - output.append(MASK) - elif arg in vocabulary: - output.append(arg) - elif arg: + if "=" in arg: + arg_left = arg.split("=")[0] + output.append(_mask_unrecognized_arg(arg_left, vocabulary)) + output.append(MASK) + else: + mask_next = True + output.append(_mask_unrecognized_arg(arg, vocabulary)) + elif mask_next: output.append(MASK) + mask_next = False + else: + output.append(_mask_unrecognized_arg(arg, vocabulary)) return output diff --git a/kedro-telemetry/kedro_telemetry/plugin.py b/kedro-telemetry/kedro_telemetry/plugin.py index 0e654734f..b76f9163f 100644 --- a/kedro-telemetry/kedro_telemetry/plugin.py +++ b/kedro-telemetry/kedro_telemetry/plugin.py @@ -60,13 +60,6 @@ def before_command_run( ): """Hook implementation to send command run data to Heap""" try: - # get KedroCLI and its structure from actual project root - cli = KedroCLI(project_path=Path.cwd()) - cli_struct = _get_cli_structure(cli_obj=cli, get_help=False) - masked_command_args = _mask_kedro_cli( - cli_struct=cli_struct, command_args=command_args - ) - main_command = masked_command_args[0] if masked_command_args else "kedro" if not project_metadata: # in package mode return @@ -78,6 +71,14 @@ def before_command_run( ) return + # get KedroCLI and its structure from actual project root + cli = KedroCLI(project_path=Path.cwd()) + cli_struct = _get_cli_structure(cli_obj=cli, get_help=False) + masked_command_args = _mask_kedro_cli( + cli_struct=cli_struct, command_args=command_args + ) + main_command = masked_command_args[0] if masked_command_args else "kedro" + logger.debug("You have opted into product usage analytics.") hashed_username = _get_hashed_username() project_properties = _get_project_properties( From bf1d3531a10c95f5453d63be5de5c1d60a418d5a Mon Sep 17 00:00:00 2001 From: Dmitry Sorokin Date: Mon, 12 Feb 2024 16:32:16 +0000 Subject: [PATCH 2/8] Change masking approach Signed-off-by: Dmitry Sorokin --- kedro-telemetry/kedro_telemetry/masking.py | 44 +++++++++++----------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/kedro-telemetry/kedro_telemetry/masking.py b/kedro-telemetry/kedro_telemetry/masking.py index cfe637e3e..7b3b2fc14 100644 --- a/kedro-telemetry/kedro_telemetry/masking.py +++ b/kedro-telemetry/kedro_telemetry/masking.py @@ -72,39 +72,39 @@ def _get_cli_structure( return output -def _mask_unrecognized_arg(arg: str, vocabulary: Set[str]) -> str: - return arg if arg in vocabulary else MASK - - def _mask_kedro_cli(cli_struct: Dict[str, Any], command_args: List[str]) -> List[str]: """Takes a dynamic vocabulary (based on `KedroCLI`) and returns a masked CLI input""" output = [] - vocabulary = _get_vocabulary(cli_struct) - mask_next = False - for arg in command_args: + + # Preserve the initial part of the command until parameters sections begin + arg_index = 0 + current_CLI = cli_struct.get("kedro", {}) + while ( + arg_index < len(command_args) + and not command_args[arg_index].startswith("-") + and command_args[arg_index] in current_CLI + ): + output.append(command_args[arg_index]) + current_CLI = current_CLI[command_args[arg_index]] + arg_index += 1 + + # Mask everything except recognized parameter keywords + for arg in command_args[arg_index:]: if arg.startswith("-"): if "=" in arg: arg_left = arg.split("=")[0] - output.append(_mask_unrecognized_arg(arg_left, vocabulary)) + if arg_left in current_CLI: + output.append(arg_left) output.append(MASK) + elif arg in current_CLI: + output.append(arg) else: - mask_next = True - output.append(_mask_unrecognized_arg(arg, vocabulary)) - elif mask_next: - output.append(MASK) - mask_next = False + output.append(MASK) else: - output.append(_mask_unrecognized_arg(arg, vocabulary)) - return output - + output.append(MASK) -def _get_vocabulary(cli_struct: Dict[str, Any]) -> Set[str]: - """Builds a unique whitelist of terms - a vocabulary""" - vocabulary = {"-h", "--version"} # -h help and version args are not in by default - cli_dynamic_vocabulary = set(_recursive_items(cli_struct)) - {None} - vocabulary.update(cli_dynamic_vocabulary) - return vocabulary + return output def _recursive_items(dictionary: Dict[Any, Any]) -> Iterator[Any]: From 98ffa6580acfa1a3b2992318dd379d979537d2b1 Mon Sep 17 00:00:00 2001 From: Dmitry Sorokin Date: Mon, 12 Feb 2024 16:49:57 +0000 Subject: [PATCH 3/8] Fix tests Signed-off-by: Dmitry Sorokin --- kedro-telemetry/tests/test_masking.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/kedro-telemetry/tests/test_masking.py b/kedro-telemetry/tests/test_masking.py index 8d4698591..782de29ec 100644 --- a/kedro-telemetry/tests/test_masking.py +++ b/kedro-telemetry/tests/test_masking.py @@ -11,7 +11,6 @@ from kedro_telemetry.masking import ( MASK, _get_cli_structure, - _get_vocabulary, _mask_kedro_cli, _recursive_items, ) @@ -175,9 +174,6 @@ def test_recursive_items(self, input_dict, expected_output_count): def test_recursive_items_empty(self): assert len(list(_recursive_items({}))) == 0 - def test_get_vocabulary_empty(self): - assert _get_vocabulary({}) == {"-h", "--version"} - @pytest.mark.parametrize( "input_cli_structure, input_command_args, expected_masked_args", [ From d883a98ce8d515b9db6d246a8640e8f7cb2f108f Mon Sep 17 00:00:00 2001 From: Dmitry Sorokin Date: Mon, 12 Feb 2024 19:33:42 +0000 Subject: [PATCH 4/8] Fix tests Signed-off-by: Dmitry Sorokin --- kedro-telemetry/kedro_telemetry/masking.py | 6 +++++- kedro-telemetry/tests/test_masking.py | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/kedro-telemetry/kedro_telemetry/masking.py b/kedro-telemetry/kedro_telemetry/masking.py index 7b3b2fc14..7123ccd6b 100644 --- a/kedro-telemetry/kedro_telemetry/masking.py +++ b/kedro-telemetry/kedro_telemetry/masking.py @@ -44,6 +44,10 @@ def _recurse_cli( io_dict[element_name], get_help, ) + nested_parameter_list = [option.opts for option in cli_element.get_params(ctx)] + for item in (item for sublist in nested_parameter_list for item in sublist): + if item not in io_dict[element_name]: + io_dict[element_name][item] = None elif isinstance(cli_element, click.Command): if get_help: # gets formatted CLI help incl params for printing @@ -89,7 +93,7 @@ def _mask_kedro_cli(cli_struct: Dict[str, Any], command_args: List[str]) -> List current_CLI = current_CLI[command_args[arg_index]] arg_index += 1 - # Mask everything except recognized parameter keywords + # Mask everything except parameter keywords for arg in command_args[arg_index:]: if arg.startswith("-"): if "=" in arg: diff --git a/kedro-telemetry/tests/test_masking.py b/kedro-telemetry/tests/test_masking.py index 782de29ec..13dbbabcb 100644 --- a/kedro-telemetry/tests/test_masking.py +++ b/kedro-telemetry/tests/test_masking.py @@ -230,8 +230,8 @@ def test_recursive_items_empty(self): "command_b": None, } }, - ["none", "of", "this", "should", "be", "seen", "except", "command_a"], - [MASK, MASK, MASK, MASK, MASK, MASK, MASK, "command_a"], + ["command_a", "should", "be", "seen", "only"], + ["command_a", MASK, MASK, MASK, MASK], ), ], ) From 1b1c804c5753406c6bae881765ac07cdbb2f7163 Mon Sep 17 00:00:00 2001 From: Dmitry Sorokin Date: Mon, 12 Feb 2024 19:48:38 +0000 Subject: [PATCH 5/8] Fix tests Signed-off-by: Dmitry Sorokin --- kedro-telemetry/kedro_telemetry/masking.py | 11 +++++++---- kedro-telemetry/tests/test_masking.py | 4 +++- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/kedro-telemetry/kedro_telemetry/masking.py b/kedro-telemetry/kedro_telemetry/masking.py index 7123ccd6b..134e97e9d 100644 --- a/kedro-telemetry/kedro_telemetry/masking.py +++ b/kedro-telemetry/kedro_telemetry/masking.py @@ -44,10 +44,13 @@ def _recurse_cli( io_dict[element_name], get_help, ) - nested_parameter_list = [option.opts for option in cli_element.get_params(ctx)] - for item in (item for sublist in nested_parameter_list for item in sublist): - if item not in io_dict[element_name]: - io_dict[element_name][item] = None + if not get_help: + nested_parameter_list = [ + option.opts for option in cli_element.get_params(ctx) + ] + for item in (item for sublist in nested_parameter_list for item in sublist): + if item not in io_dict[element_name]: + io_dict[element_name][item] = None elif isinstance(cli_element, click.Command): if get_help: # gets formatted CLI help incl params for printing diff --git a/kedro-telemetry/tests/test_masking.py b/kedro-telemetry/tests/test_masking.py index 13dbbabcb..cb0df343f 100644 --- a/kedro-telemetry/tests/test_masking.py +++ b/kedro-telemetry/tests/test_masking.py @@ -28,6 +28,9 @@ "registry", "run", "starter", + "--version", + "-V", + "--help", ] @@ -74,7 +77,6 @@ def test_get_cli_structure_raw(self, mocker, fake_metadata): for k, v in raw_cli_structure["kedro"].items(): assert isinstance(k, str) - assert isinstance(v, dict) assert sorted(list(raw_cli_structure["kedro"])) == sorted( DEFAULT_KEDRO_COMMANDS From f7bc4c7c624aa2770c310a576c91e6aa079cac49 Mon Sep 17 00:00:00 2001 From: Dmitry Sorokin Date: Mon, 12 Feb 2024 19:51:34 +0000 Subject: [PATCH 6/8] Fix tests Signed-off-by: Dmitry Sorokin --- kedro-telemetry/tests/test_masking.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/kedro-telemetry/tests/test_masking.py b/kedro-telemetry/tests/test_masking.py index cb0df343f..14f628059 100644 --- a/kedro-telemetry/tests/test_masking.py +++ b/kedro-telemetry/tests/test_masking.py @@ -28,9 +28,6 @@ "registry", "run", "starter", - "--version", - "-V", - "--help", ] From 431220f66bf46b0a6bcbb3e09fa463cae751a253 Mon Sep 17 00:00:00 2001 From: Dmitry Sorokin Date: Tue, 13 Feb 2024 10:42:05 +0000 Subject: [PATCH 7/8] Fix tests Signed-off-by: Dmitry Sorokin --- kedro-telemetry/tests/test_masking.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/kedro-telemetry/tests/test_masking.py b/kedro-telemetry/tests/test_masking.py index 14f628059..8837146e4 100644 --- a/kedro-telemetry/tests/test_masking.py +++ b/kedro-telemetry/tests/test_masking.py @@ -28,6 +28,9 @@ "registry", "run", "starter", + "--version", + "-V", + "--help", ] @@ -144,10 +147,6 @@ def test_get_cli_structure_help(self, mocker, fake_metadata): elif isinstance(v, str): assert v.startswith("Usage: [OPTIONS]") - assert sorted(list(help_cli_structure["kedro"])) == sorted( - DEFAULT_KEDRO_COMMANDS - ) - @pytest.mark.parametrize( "input_dict, expected_output_count", [ From c92895e7213a90c020bfd884f8c5efafbfe347ea Mon Sep 17 00:00:00 2001 From: Dmitry Sorokin Date: Tue, 13 Feb 2024 10:48:12 +0000 Subject: [PATCH 8/8] Fix tests Signed-off-by: Dmitry Sorokin --- kedro-telemetry/kedro_telemetry/masking.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kedro-telemetry/kedro_telemetry/masking.py b/kedro-telemetry/kedro_telemetry/masking.py index 134e97e9d..81b181e73 100644 --- a/kedro-telemetry/kedro_telemetry/masking.py +++ b/kedro-telemetry/kedro_telemetry/masking.py @@ -1,5 +1,5 @@ """Module containing command masking functionality.""" -from typing import Any, Dict, Iterator, List, Set, Union +from typing import Any, Dict, Iterator, List, Union import click