Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(telemetry): Avoid eager loading the whole KedroCLI for masking #824

Merged
merged 6 commits into from
Sep 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 10 additions & 17 deletions kedro-telemetry/kedro_telemetry/masking.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Module containing command masking functionality."""
from __future__ import annotations

from typing import Any, Iterator
from typing import Any

import click

Expand Down Expand Up @@ -81,16 +81,19 @@ def _get_cli_structure(
return output


def _mask_kedro_cli(
cli_struct: dict[str | None, Any], command_args: list[str]
) -> list[str]:
def _mask_kedro_cli(cli: click.CommandCollection, command_args: list[str]) -> list[str]:
"""Takes a dynamic vocabulary (based on `KedroCLI`) and returns
a masked CLI input"""
output = []

# Preserve the initial part of the command until parameters sections begin
arg_index = 0
current_CLI = cli_struct.get("kedro", {})
cmd = command_args[0] if command_args else ""
if cmd in {"--help", "--version", "-h", "-v", ""}:
return command_args
ankatiyar marked this conversation as resolved.
Show resolved Hide resolved
click_cmd = cli.get_command(ctx=None, cmd_name=cmd) # type: ignore
if click_cmd is None:
return [MASK]

current_CLI = _get_cli_structure(click_cmd)
while (
arg_index < len(command_args)
and not command_args[arg_index].startswith("-")
Expand All @@ -116,13 +119,3 @@ def _mask_kedro_cli(
output.append(MASK)

return output


def _recursive_items(dictionary: dict[Any, Any]) -> Iterator[Any]:
for key, value in dictionary.items():
if isinstance(value, dict):
yield key
yield from _recursive_items(value)
else:
yield key
yield value
7 changes: 2 additions & 5 deletions kedro-telemetry/kedro_telemetry/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from kedro.pipeline import Pipeline

from kedro_telemetry import __version__ as TELEMETRY_VERSION
from kedro_telemetry.masking import _get_cli_structure, _mask_kedro_cli
from kedro_telemetry.masking import _mask_kedro_cli

HEAP_APPID_PROD = "2388822444"
HEAP_ENDPOINT = "https://heapanalytics.com/api/track"
Expand Down Expand Up @@ -176,10 +176,7 @@ def before_command_run(

# get KedroCLI and its structure from actual project root
cli = KedroCLI(project_path=project_path if project_path else Path.cwd())
cli_struct = _get_cli_structure(cli_obj=cli, get_help=False)
masked_command_args = _mask_kedro_cli(
cli_struct=cli_struct, command_args=command_args
)
masked_command_args = _mask_kedro_cli(cli, command_args=command_args)

self._user_uuid = _get_or_create_uuid()

Expand Down
99 changes: 26 additions & 73 deletions kedro-telemetry/tests/test_masking.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
MASK,
_get_cli_structure,
_mask_kedro_cli,
_recursive_items,
)

REPO_NAME = "cli_tools_dummy_project"
Expand Down Expand Up @@ -152,94 +151,48 @@ def test_get_cli_structure_help(self, mocker, fake_metadata):
assert v.startswith("Usage: [OPTIONS]")

@pytest.mark.parametrize(
"input_dict, expected_output_count",
"input_command_args, expected_masked_args",
[
({}, 0),
({"a": "foo"}, 2),
({"a": {"b": "bar"}, "c": {"baz"}}, 5),
([], []),
(
{
"a": {"b": "bar"},
"c": None,
"d": {"e": "fizz"},
"f": {"g": {"h": "buzz"}},
},
12,
["info"],
["info"],
),
],
)
def test_recursive_items(self, input_dict, expected_output_count):
assert expected_output_count == len(
list(_recursive_items(dictionary=input_dict))
)

def test_recursive_items_empty(self):
assert len(list(_recursive_items({}))) == 0

@pytest.mark.parametrize(
"input_cli_structure, input_command_args, expected_masked_args",
[
({}, [], []),
(
{"kedro": {"command_a": None, "command_b": None}},
["command_a"],
["command_a"],
["run", "--pipeline=data_science"],
["run", "--pipeline", MASK],
),
(
{
"kedro": {
"command_a": {"--param1": None, "--param2": None},
"command_b": None,
}
},
["command_a", "--param1=foo"],
["command_a", "--param1", MASK],
["catalog", "list"],
["catalog", "list"],
),
(
{
"kedro": {
"command_a": {"--param1": None, "--param2": None},
"command_b": None,
}
},
["command_a", "--param1= foo"],
["command_a", "--param1", MASK],
["pipeline", "create", "mypipeline"],
["pipeline", "create", MASK],
),
(
{
"kedro": {
"command_a": {"--param": None, "-p": None},
"command_b": None,
}
},
["command_a", "-p", "bar"],
["command_a", "-p", MASK],
["run", "-p", "bar"],
["run", "-p", MASK],
),
(
{
"kedro": {
"command_a": {"--param": None, "-p": None},
"command_b": None,
}
},
["command_a", "-xyz", "bar"],
["command_a", MASK, MASK],
),
(
{
"kedro": {
"command_a": {"--param": None, "-p": None},
"command_b": None,
}
},
["command_a", "should", "be", "seen", "only"],
["command_a", MASK, MASK, MASK, MASK],
["run", "--params=hello=4", "--pipeline=my_pipeline"],
["run", "--params", MASK, "--pipeline", MASK],
),
],
)
def test_mask_kedro_cli(
self, input_cli_structure, input_command_args, expected_masked_args
self, input_command_args, expected_masked_args, fake_metadata, mocker
):
Module = namedtuple("Module", ["cli"])
mocker.patch("kedro.framework.cli.cli._is_project", return_value=True)
mocker.patch(
"kedro.framework.cli.cli.bootstrap_project", return_value=fake_metadata
)
mocker.patch(
"kedro.framework.cli.cli.importlib.import_module",
return_value=Module(cli=cli),
)
kedro_cli = KedroCLI(fake_metadata.project_path)
assert expected_masked_args == _mask_kedro_cli(
cli_struct=input_cli_structure, command_args=input_command_args
cli=kedro_cli, command_args=input_command_args
)