Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: kedro-telemetry masking #552

Merged
merged 9 commits into from
Feb 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 34 additions & 19 deletions kedro-telemetry/kedro_telemetry/masking.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""Module containing command masking functionality."""
from typing import Any, Dict, Iterator, List, Set, Union
from typing import Any, Dict, Iterator, List, Union

import click

Expand Down Expand Up @@ -44,6 +44,13 @@ def _recurse_cli(
io_dict[element_name],
get_help,
)
if not get_help:
nested_parameter_list = [
option.opts for option in cli_element.get_params(ctx)
]
for item in (item for sublist in nested_parameter_list for item in sublist):
if item not in io_dict[element_name]:
io_dict[element_name][item] = None

elif isinstance(cli_element, click.Command):
if get_help: # gets formatted CLI help incl params for printing
Expand Down Expand Up @@ -76,27 +83,35 @@ def _mask_kedro_cli(cli_struct: Dict[str, Any], command_args: List[str]) -> List
"""Takes a dynamic vocabulary (based on `KedroCLI`) and returns
a masked CLI input"""
output = []
vocabulary = _get_vocabulary(cli_struct)
for arg in command_args:

# Preserve the initial part of the command until parameters sections begin
arg_index = 0
current_CLI = cli_struct.get("kedro", {})
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My IDE is saying the variable name should be lowercase, maybe rename this to current_cli? 😆

while (
arg_index < len(command_args)
and not command_args[arg_index].startswith("-")
and command_args[arg_index] in current_CLI
):
output.append(command_args[arg_index])
current_CLI = current_CLI[command_args[arg_index]]
arg_index += 1

# Mask everything except parameter keywords
for arg in command_args[arg_index:]:
if arg.startswith("-"):
for arg_part in arg.split("="):
if arg_part in vocabulary:
output.append(arg_part)
elif arg_part:
output.append(MASK)
elif arg in vocabulary:
output.append(arg)
elif arg:
if "=" in arg:
arg_left = arg.split("=")[0]
if arg_left in current_CLI:
output.append(arg_left)
output.append(MASK)
elif arg in current_CLI:
output.append(arg)
else:
output.append(MASK)
else:
output.append(MASK)
return output


def _get_vocabulary(cli_struct: Dict[str, Any]) -> Set[str]:
"""Builds a unique whitelist of terms - a vocabulary"""
vocabulary = {"-h", "--version"} # -h help and version args are not in by default
cli_dynamic_vocabulary = set(_recursive_items(cli_struct)) - {None}
vocabulary.update(cli_dynamic_vocabulary)
return vocabulary
return output


def _recursive_items(dictionary: Dict[Any, Any]) -> Iterator[Any]:
Expand Down
15 changes: 8 additions & 7 deletions kedro-telemetry/kedro_telemetry/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,6 @@ def before_command_run(
):
"""Hook implementation to send command run data to Heap"""
try:
# get KedroCLI and its structure from actual project root
cli = KedroCLI(project_path=Path.cwd())
cli_struct = _get_cli_structure(cli_obj=cli, get_help=False)
masked_command_args = _mask_kedro_cli(
cli_struct=cli_struct, command_args=command_args
)
main_command = masked_command_args[0] if masked_command_args else "kedro"
if not project_metadata: # in package mode
return

Expand All @@ -78,6 +71,14 @@ def before_command_run(
)
return

# get KedroCLI and its structure from actual project root
cli = KedroCLI(project_path=Path.cwd())
cli_struct = _get_cli_structure(cli_obj=cli, get_help=False)
masked_command_args = _mask_kedro_cli(
cli_struct=cli_struct, command_args=command_args
)
main_command = masked_command_args[0] if masked_command_args else "kedro"

astrojuanlu marked this conversation as resolved.
Show resolved Hide resolved
logger.debug("You have opted into product usage analytics.")
hashed_username = _get_hashed_username()
project_properties = _get_project_properties(
Expand Down
16 changes: 5 additions & 11 deletions kedro-telemetry/tests/test_masking.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from kedro_telemetry.masking import (
MASK,
_get_cli_structure,
_get_vocabulary,
_mask_kedro_cli,
_recursive_items,
)
Expand All @@ -29,6 +28,9 @@
"registry",
"run",
"starter",
"--version",
"-V",
"--help",
]


Expand Down Expand Up @@ -75,7 +77,6 @@ def test_get_cli_structure_raw(self, mocker, fake_metadata):

for k, v in raw_cli_structure["kedro"].items():
assert isinstance(k, str)
assert isinstance(v, dict)

assert sorted(list(raw_cli_structure["kedro"])) == sorted(
DEFAULT_KEDRO_COMMANDS
Expand Down Expand Up @@ -146,10 +147,6 @@ def test_get_cli_structure_help(self, mocker, fake_metadata):
elif isinstance(v, str):
assert v.startswith("Usage: [OPTIONS]")

assert sorted(list(help_cli_structure["kedro"])) == sorted(
DEFAULT_KEDRO_COMMANDS
)

@pytest.mark.parametrize(
"input_dict, expected_output_count",
[
Expand All @@ -175,9 +172,6 @@ def test_recursive_items(self, input_dict, expected_output_count):
def test_recursive_items_empty(self):
assert len(list(_recursive_items({}))) == 0

def test_get_vocabulary_empty(self):
assert _get_vocabulary({}) == {"-h", "--version"}

@pytest.mark.parametrize(
"input_cli_structure, input_command_args, expected_masked_args",
[
Expand Down Expand Up @@ -234,8 +228,8 @@ def test_get_vocabulary_empty(self):
"command_b": None,
}
},
["none", "of", "this", "should", "be", "seen", "except", "command_a"],
[MASK, MASK, MASK, MASK, MASK, MASK, MASK, "command_a"],
["command_a", "should", "be", "seen", "only"],
["command_a", MASK, MASK, MASK, MASK],
),
],
)
Expand Down