Skip to content

Commit

Permalink
Deprecate requesting codemods by name (#699)
Browse files Browse the repository at this point in the history
* do not include codemod name in requested rules

* registry should only request codemods by id

* create internal property

* fix int tests
  • Loading branch information
clavedeluna authored Jul 9, 2024
1 parent d67e809 commit 804da0d
Show file tree
Hide file tree
Showing 10 changed files with 80 additions and 106 deletions.
8 changes: 4 additions & 4 deletions integration_tests/test_dependency_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def test_add_to_pyproject_toml(self, tmp_repo):
tmp_repo,
"--output",
self.output_path,
"--codemod-include=url-sandbox",
"--codemod-include=pixee:python/url-sandbox",
"--verbose",
]
completed_process = subprocess.run(
Expand All @@ -90,7 +90,7 @@ def test_add_to_requirements_txt(self, tmp_repo):
tmp_repo,
"--output",
self.output_path,
"--codemod-include=url-sandbox",
"--codemod-include=pixee:python/url-sandbox",
"--verbose",
]
completed_process = subprocess.run(
Expand All @@ -115,7 +115,7 @@ def test_add_to_setup(self, tmp_repo):
tmp_repo,
"--output",
self.output_path,
"--codemod-include=url-sandbox",
"--codemod-include=pixee:python/url-sandbox",
"--verbose",
]
completed_process = subprocess.run(
Expand All @@ -140,7 +140,7 @@ def test_fail_to_add(self, tmp_repo):
tmp_repo,
"--output",
self.output_path,
"--codemod-include=url-sandbox",
"--codemod-include=pixee:python/url-sandbox",
"--verbose",
]
completed_process = subprocess.run(
Expand Down
9 changes: 6 additions & 3 deletions integration_tests/test_multiple_codemods.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@ class TestMultipleCodemods:
@pytest.mark.parametrize(
"codemods",
# In this particular case the order should not affect the result
["secure-random,fix-mutable-params", "fix-mutable-params,secure-random"],
[
"pixee:python/secure-random,pixee:python/fix-mutable-params",
"pixee:python/fix-mutable-params,pixee:python/secure-random",
],
)
def test_two_codemods(self, codemods, tmpdir):
source_file_name = "multiple_codemods.py"
Expand Down Expand Up @@ -59,5 +62,5 @@ def func(foo=None):
ids = codemods.split(",")
assert len(results["results"]) == 2
# Order matters
assert results["results"][0]["codemod"] == f"pixee:python/{ids[0]}"
assert results["results"][1]["codemod"] == f"pixee:python/{ids[1]}"
assert results["results"][0]["codemod"] == f"{ids[0]}"
assert results["results"][1]["codemod"] == f"{ids[1]}"
8 changes: 3 additions & 5 deletions src/codemodder/codemodder.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def find_semgrep_results(
yaml_files := list(
itertools.chain.from_iterable(
[
codemod.detector.get_yaml_files(codemod.name)
codemod.detector.get_yaml_files(codemod._internal_name)
for codemod in codemods
if codemod.detector
and isinstance(codemod.detector, SemgrepRuleDetector)
Expand Down Expand Up @@ -101,16 +101,14 @@ def apply_codemods(
logger.info("running codemod %s", codemod.id)

if isinstance(codemod.detector, SemgrepRuleDetector):
# Unfortunately the IDs from semgrep are not fully specified
# TODO: eventually we need to be able to use fully specified IDs here
if codemod.name not in semgrep_finding_ids:
if codemod._internal_name not in semgrep_finding_ids:
logger.debug(
"no results from semgrep for %s, skipping analysis",
codemod.id,
)
continue

files_to_analyze = semgrep_results.files_for_rule(codemod.name)
files_to_analyze = semgrep_results.files_for_rule(codemod._internal_name)

# Non-semgrep codemods ignore the semgrep results
codemod.apply(context, files_to_analyze)
Expand Down
14 changes: 11 additions & 3 deletions src/codemodder/codemods/base_codemod.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ def docs_module_path(self) -> str: ...

@property
def name(self) -> str:
"""Non-unique property for codemods. Multiple codemods can have the same name."""
return self._metadata.name

@property
Expand Down Expand Up @@ -156,6 +157,13 @@ def describe(self):
"references": [ref.model_dump() for ref in self.references],
}

@property
def _internal_name(self) -> str:
"""Used only for internal semgrep runs."""
# Unfortunately the IDs from semgrep are not fully specified
# TODO: eventually we need to be able to use fully specified IDs here
return self.name

def _apply(
self,
context: CodemodExecutionContext,
Expand All @@ -172,8 +180,8 @@ def _apply(
return

results = (
# It seems like semgrep doesn't like our fully-specified id format
self.detector.apply(self.name, context, files_to_analyze)
# It seems like semgrep doesn't like our fully-specified id format so pass in short name instead.
self.detector.apply(self._internal_name, context, files_to_analyze)
if self.detector
else None
)
Expand Down Expand Up @@ -224,7 +232,7 @@ def apply(
:param context: The codemod execution context
:param files_to_analyze: The list of files to analyze
"""
self._apply(context, files_to_analyze, [self.name])
self._apply(context, files_to_analyze, [self._internal_name])

def _process_file(
self,
Expand Down
28 changes: 4 additions & 24 deletions src/codemodder/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import os
import re
from dataclasses import dataclass
from functools import cached_property
from importlib.metadata import EntryPoint, entry_points
from itertools import chain
from typing import TYPE_CHECKING, Callable, Optional
Expand Down Expand Up @@ -32,43 +31,28 @@ class CodemodCollection:


class CodemodRegistry:
_codemods_by_name: dict[str, BaseCodemod]
_codemods_by_id: dict[str, BaseCodemod]
_default_include_paths: set[str]

def __init__(self):
self._codemods_by_name = {}
self._codemods_by_id = {}
self._default_include_paths = set()

@property
def names(self):
return list(self._codemods_by_name.keys())

@property
def ids(self):
return list(self._codemods_by_id.keys())

@property
def codemods(self):
return list(self._codemods_by_name.values())
return list(self._codemods_by_id.values())

@property
def default_include_paths(self) -> list[str]:
return list(self._default_include_paths)

@cached_property
def pixee_codemods_by_name(self) -> dict:
return {
name: codemod
for name, codemod in self._codemods_by_name.items()
if codemod.origin == "pixee"
}

def add_codemod_collection(self, collection: CodemodCollection):
for codemod in collection.codemods:
wrapper = codemod() if isinstance(codemod, type) else codemod
self._codemods_by_name[wrapper.name] = wrapper
self._codemods_by_id[wrapper.id] = wrapper
self._default_include_paths.update(
chain(
Expand Down Expand Up @@ -98,10 +82,8 @@ def match_codemods(
names = set(name for name in codemod_exclude if "*" not in name)

for codemod in self.codemods:
if (
codemod.id in names
or (codemod.origin == "pixee" and codemod.name in names)
or any(pat.match(codemod.id) for pat in patterns)
if codemod.id in names or any(
pat.match(codemod.id) for pat in patterns
):
continue

Expand All @@ -124,9 +106,7 @@ def match_codemods(
continue

try:
matched_codemods.append(
self.pixee_codemods_by_name.get(name) or self._codemods_by_id[name]
)
matched_codemods.append(self._codemods_by_id[name])
except KeyError:
logger.warning(f"Requested codemod to include '{name}' does not exist.")
return matched_codemods
Expand Down
2 changes: 1 addition & 1 deletion src/core_codemods/api/core_codemod.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def __init__(
transformer=transformer,
default_extensions=default_extensions,
)
self.requested_rules = [self.name]
self.requested_rules = []
if requested_rules:
self.requested_rules.extend(requested_rules)

Expand Down
52 changes: 32 additions & 20 deletions tests/codemods/test_include_exclude.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ class TestMatchCodemods:
@classmethod
def setup_class(cls):
cls.registry = load_registered_codemods()
cls.codemod_map = cls.registry._codemods_by_name
cls.codemod_map = cls.registry._codemods_by_id
cls.all_ids = [
c().id if isinstance(c, type) else c.id for c in registry.codemods
]
Expand All @@ -31,63 +31,73 @@ def test_load_sast_codemods(self):

def test_include_non_sast_in_sast(self):
assert set(
self.registry.match_codemods(["secure-random"], None, sast_only=True)
) == {self.codemod_map["secure-random"]}
self.registry.match_codemods(
["pixee:python/secure-random"], None, sast_only=True
)
) == {self.codemod_map["pixee:python/secure-random"]}

@pytest.mark.parametrize(
"input_str",
["secure-random", "pixee:python/secure-random", "secure-random,url-sandbox"],
[
"pixee:python/secure-random",
"pixee:python/secure-random,pixee:python/url-sandbox",
],
)
def test_include(self, input_str):
includes = input_str.split(",")
assert self.registry.match_codemods(includes, None) == [
self.codemod_map[name.replace("pixee:python/", "")] for name in includes
self.codemod_map[name] for name in includes
]

@pytest.mark.parametrize(
"input_str", ["url-sandbox,secure-random", "secure-random,url-sandbox"]
"input_str",
[
"pixee:python/url-sandbox,pixee:python/secure-random",
"pixee:python/secure-random,pixee:python/url-sandbox",
],
)
def test_include_preserve_order(self, input_str):
includes = input_str.split(",")
assert [
codemod.name for codemod in self.registry.match_codemods(includes, None)
codemod.id for codemod in self.registry.match_codemods(includes, None)
] == includes

@pytest.mark.parametrize(
"input_str",
[
"secure-random",
"pixee:python/secure-random",
"secure-random,url-sandbox",
"pixee:python/secure-random,pixee:python/url-sandbox",
],
)
def test_exclude(self, input_str):
excludes = input_str.split(",")
res = [
c
for c in self.registry.codemods
if c.id in self.all_ids and c.name not in excludes and c.id not in excludes
if c.id in self.all_ids and c.id not in excludes
]
assert self.registry.match_codemods(None, excludes) == res

def test_bad_codemod_include_no_match(self):
assert self.registry.match_codemods(["doesntexist"], None) == []

def test_codemod_include_some_match(self):
assert self.registry.match_codemods(["doesntexist", "secure-random"], None) == [
self.codemod_map["secure-random"]
]
assert self.registry.match_codemods(
["doesntexist", "pixee:python/secure-random"], None
) == [self.codemod_map["pixee:python/secure-random"]]

def test_bad_codemod_exclude_all_match(self):
assert self.registry.match_codemods(None, ["doesntexist"]) == [
c for c in self.registry.codemods if c.id in self.all_ids
]

def test_exclude_some_match(self):
assert self.registry.match_codemods(None, ["doesntexist", "secure-random"]) == [
assert self.registry.match_codemods(
None, ["doesntexist", "pixee:python/secure-random"]
) == [
c
for c in self.registry.codemods
if c.name not in "secure-random" and c.id in self.all_ids
if c.id not in "pixee:python/secure-random" and c.id in self.all_ids
]

def test_include_with_pattern(self):
Expand All @@ -96,9 +106,11 @@ def test_include_with_pattern(self):
]

def test_include_with_pattern_and_another(self):
assert self.registry.match_codemods(["*django*", "use-defusedxml"], None) == [
c for c in self.registry.codemods if "django" in c.id
] + [self.codemod_map["use-defusedxml"]]
assert self.registry.match_codemods(
["*django*", "pixee:python/use-defusedxml"], None
) == [c for c in self.registry.codemods if "django" in c.id] + [
self.codemod_map["pixee:python/use-defusedxml"]
]

def test_include_sast_with_prefix(self):
assert self.registry.match_codemods(["sonar*"], None, sast_only=False) == [
Expand All @@ -121,13 +133,13 @@ def test_exclude_with_pattern(self):

def test_exclude_with_pattern_and_another(self):
assert self.registry.match_codemods(
None, ["*django*", "use-defusedxml"], sast_only=False
None, ["*django*", "pixee:python/use-defusedxml"], sast_only=False
) == [
c
for c in self.registry.codemods
if "django" not in c.id
and c.id in self.all_ids
and c.name != "use-defusedxml"
and c.id != "pixee:python/use-defusedxml"
]

def test_exclude_pixee_with_prefix(self):
Expand Down
4 changes: 2 additions & 2 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def test_no_args(self, mocker, caplog):
"some/path",
"--output",
"here.txt",
"--codemod-include=url-sandbox",
"--codemod-include=pixee:python/url-sandbox",
"--help",
],
],
Expand All @@ -52,7 +52,7 @@ def test_help_is_printed(self, mock_print_help, cli_args):
"some/path",
"--output",
"here.txt",
"--codemod-include=url-sandbox",
"--codemod-include=pixee:python/url-sandbox",
"--version",
],
],
Expand Down
Loading

0 comments on commit 804da0d

Please sign in to comment.