diff --git a/kedro-telemetry/kedro_telemetry/masking.py b/kedro-telemetry/kedro_telemetry/masking.py index fe5f0a3f6..81b181e73 100644 --- a/kedro-telemetry/kedro_telemetry/masking.py +++ b/kedro-telemetry/kedro_telemetry/masking.py @@ -1,5 +1,5 @@ """Module containing command masking functionality.""" -from typing import Any, Dict, Iterator, List, Set, Union +from typing import Any, Dict, Iterator, List, Union import click @@ -44,6 +44,13 @@ def _recurse_cli( io_dict[element_name], get_help, ) + if not get_help: + nested_parameter_list = [ + option.opts for option in cli_element.get_params(ctx) + ] + for item in (item for sublist in nested_parameter_list for item in sublist): + if item not in io_dict[element_name]: + io_dict[element_name][item] = None elif isinstance(cli_element, click.Command): if get_help: # gets formatted CLI help incl params for printing @@ -76,27 +83,35 @@ def _mask_kedro_cli(cli_struct: Dict[str, Any], command_args: List[str]) -> List """Takes a dynamic vocabulary (based on `KedroCLI`) and returns a masked CLI input""" output = [] - vocabulary = _get_vocabulary(cli_struct) - for arg in command_args: + + # Preserve the initial part of the command until parameters sections begin + arg_index = 0 + current_CLI = cli_struct.get("kedro", {}) + while ( + arg_index < len(command_args) + and not command_args[arg_index].startswith("-") + and command_args[arg_index] in current_CLI + ): + output.append(command_args[arg_index]) + current_CLI = current_CLI[command_args[arg_index]] + arg_index += 1 + + # Mask everything except parameter keywords + for arg in command_args[arg_index:]: if arg.startswith("-"): - for arg_part in arg.split("="): - if arg_part in vocabulary: - output.append(arg_part) - elif arg_part: - output.append(MASK) - elif arg in vocabulary: - output.append(arg) - elif arg: + if "=" in arg: + arg_left = arg.split("=")[0] + if arg_left in current_CLI: + output.append(arg_left) + output.append(MASK) + elif arg in current_CLI: + output.append(arg) + else: + output.append(MASK) + else: output.append(MASK) - return output - -def _get_vocabulary(cli_struct: Dict[str, Any]) -> Set[str]: - """Builds a unique whitelist of terms - a vocabulary""" - vocabulary = {"-h", "--version"} # -h help and version args are not in by default - cli_dynamic_vocabulary = set(_recursive_items(cli_struct)) - {None} - vocabulary.update(cli_dynamic_vocabulary) - return vocabulary + return output def _recursive_items(dictionary: Dict[Any, Any]) -> Iterator[Any]: diff --git a/kedro-telemetry/kedro_telemetry/plugin.py b/kedro-telemetry/kedro_telemetry/plugin.py index 0e654734f..b76f9163f 100644 --- a/kedro-telemetry/kedro_telemetry/plugin.py +++ b/kedro-telemetry/kedro_telemetry/plugin.py @@ -60,13 +60,6 @@ def before_command_run( ): """Hook implementation to send command run data to Heap""" try: - # get KedroCLI and its structure from actual project root - cli = KedroCLI(project_path=Path.cwd()) - cli_struct = _get_cli_structure(cli_obj=cli, get_help=False) - masked_command_args = _mask_kedro_cli( - cli_struct=cli_struct, command_args=command_args - ) - main_command = masked_command_args[0] if masked_command_args else "kedro" if not project_metadata: # in package mode return @@ -78,6 +71,14 @@ def before_command_run( ) return + # get KedroCLI and its structure from actual project root + cli = KedroCLI(project_path=Path.cwd()) + cli_struct = _get_cli_structure(cli_obj=cli, get_help=False) + masked_command_args = _mask_kedro_cli( + cli_struct=cli_struct, command_args=command_args + ) + main_command = masked_command_args[0] if masked_command_args else "kedro" + logger.debug("You have opted into product usage analytics.") hashed_username = _get_hashed_username() project_properties = _get_project_properties( diff --git a/kedro-telemetry/tests/test_masking.py b/kedro-telemetry/tests/test_masking.py index 8d4698591..8837146e4 100644 --- a/kedro-telemetry/tests/test_masking.py +++ b/kedro-telemetry/tests/test_masking.py @@ -11,7 +11,6 @@ from kedro_telemetry.masking import ( MASK, _get_cli_structure, - _get_vocabulary, _mask_kedro_cli, _recursive_items, ) @@ -29,6 +28,9 @@ "registry", "run", "starter", + "--version", + "-V", + "--help", ] @@ -75,7 +77,6 @@ def test_get_cli_structure_raw(self, mocker, fake_metadata): for k, v in raw_cli_structure["kedro"].items(): assert isinstance(k, str) - assert isinstance(v, dict) assert sorted(list(raw_cli_structure["kedro"])) == sorted( DEFAULT_KEDRO_COMMANDS @@ -146,10 +147,6 @@ def test_get_cli_structure_help(self, mocker, fake_metadata): elif isinstance(v, str): assert v.startswith("Usage: [OPTIONS]") - assert sorted(list(help_cli_structure["kedro"])) == sorted( - DEFAULT_KEDRO_COMMANDS - ) - @pytest.mark.parametrize( "input_dict, expected_output_count", [ @@ -175,9 +172,6 @@ def test_recursive_items(self, input_dict, expected_output_count): def test_recursive_items_empty(self): assert len(list(_recursive_items({}))) == 0 - def test_get_vocabulary_empty(self): - assert _get_vocabulary({}) == {"-h", "--version"} - @pytest.mark.parametrize( "input_cli_structure, input_command_args, expected_masked_args", [ @@ -234,8 +228,8 @@ def test_get_vocabulary_empty(self): "command_b": None, } }, - ["none", "of", "this", "should", "be", "seen", "except", "command_a"], - [MASK, MASK, MASK, MASK, MASK, MASK, MASK, "command_a"], + ["command_a", "should", "be", "seen", "only"], + ["command_a", MASK, MASK, MASK, MASK], ), ], )