From dcc28c1e538b7b69b6638985936000a0468943f9 Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Wed, 10 Apr 2024 10:52:00 +0200 Subject: [PATCH 1/6] Add tests to exhibit the issue with multiline options --- codespell_lib/tests/test_basic.py | 43 +++++++++++++++++-------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/codespell_lib/tests/test_basic.py b/codespell_lib/tests/test_basic.py index 645bb49583..89a9b549d2 100644 --- a/codespell_lib/tests/test_basic.py +++ b/codespell_lib/tests/test_basic.py @@ -573,6 +573,8 @@ def test_ignore( (subdir / "bad.txt").write_text("abandonned") assert cs.main(tmp_path) == 2 assert cs.main("--skip=bad*", tmp_path) == 0 + assert cs.main("--skip=whatever.txt,bad*,whatelse.txt", tmp_path) == 0 + assert cs.main("--skip=whatever.txt,\n bad* ,", tmp_path) == 0 assert cs.main("--skip=*ignoredir*", tmp_path) == 1 assert cs.main("--skip=ignoredir", tmp_path) == 1 assert cs.main("--skip=*ignoredir/bad*", tmp_path) == 1 @@ -1213,7 +1215,7 @@ def test_ill_formed_ini_config_file( assert "ill-formed config file" in stderr -@pytest.mark.parametrize("kind", ["cfg", "toml", "toml_list"]) +@pytest.mark.parametrize("kind", ["cfg", "cfg_multiline", "toml", "toml_list"]) def test_config_toml( tmp_path: Path, capsys: pytest.CaptureFixture[str], @@ -1235,44 +1237,47 @@ def test_config_toml( assert "bad.txt" in stdout assert "abandonned.txt" in stdout - if kind == "cfg": + if kind.startswith("cfg"): conffile = tmp_path / "setup.cfg" args = ("--config", conffile) - conffile.write_text( - """\ + if kind == "cfg": + text = """\ [codespell] skip = bad.txt, whatever.txt count = """ - ) - elif kind == "toml": - assert kind == "toml" + else: + assert kind == "cfg_multiline" + text = """\ +[codespell] +skip = whatever.txt, + bad.txt , + , + +count = +""" + conffile.write_text(text) + else: if sys.version_info < (3, 11): pytest.importorskip("tomli") tomlfile = tmp_path / "pyproject.toml" args = ("--toml", tomlfile) - tomlfile.write_text( - """\ + if kind == "toml": + text = """\ [tool.codespell] skip = 'bad.txt,whatever.txt' check-filenames = false count = true """ - ) - else: - assert kind == "toml_list" - if sys.version_info < (3, 11): - pytest.importorskip("tomli") - tomlfile = tmp_path / "pyproject.toml" - args = ("--toml", tomlfile) - tomlfile.write_text( - """\ + else: + assert kind == "toml_list" + text = """\ [tool.codespell] skip = ['bad.txt', 'whatever.txt'] check-filenames = false count = true """ - ) + tomlfile.write_text(text) # Should pass when skipping bad.txt or abandonned.txt result = cs.main(d, *args, std=True) From 51f91602fef06594610a476276104286c2af8eac Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Wed, 10 Apr 2024 10:52:00 +0200 Subject: [PATCH 2/6] Handle multiline options Clean up options expecting lists before using them, as they may contain newlines. Examples: * Enclosing command-line arguments in quotes may introduce newlines in option values: $ codespell -S "A, B, > C, D, E" * INI files may contain multiline values: [codespell] skip = A, B, C, D, E, In all the above cases, the option parsing mechanism keeps the newlines (and spaces). We need to clean up, by splitting on commas and stripping each resulting item from newlines (and spaces). --- codespell_lib/_codespell.py | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py index 2db9692242..b2b2c3e491 100644 --- a/codespell_lib/_codespell.py +++ b/codespell_lib/_codespell.py @@ -160,19 +160,15 @@ class QuietLevels: class GlobMatch: - def __init__(self, pattern: Optional[str]) -> None: - self.pattern_list: Optional[List[str]] - if pattern: - # Pattern might be a list of comma-delimited strings - self.pattern_list = ",".join(pattern).split(",") - else: - self.pattern_list = None + def __init__(self, pattern: Optional[List[str]]) -> None: + self.pattern_list: Optional[List[str]] = pattern def match(self, filename: str) -> bool: - if self.pattern_list is None: - return False - - return any(fnmatch.fnmatch(filename, p) for p in self.pattern_list) + return ( + any(fnmatch.fnmatch(filename, p) for p in self.pattern_list) + if self.pattern_list + else False + ) class Misspelling: @@ -1109,6 +1105,22 @@ def parse_file( return bad_count +def flatten_clean_comma_separated_arguments( + arguments: Optional[List[str]], +) -> Optional[List[str]]: + """ + >>> flatten_clean_comma_separated_arguments(["a, b ,\n c, d,", "e"]) + ['a', 'b', 'c', 'd', 'e'] + >>> flatten_clean_comma_separated_arguments([]) + >>> flatten_clean_comma_separated_arguments(None) + """ + return ( + [item.strip() for argument in arguments for item in argument.split(",") if item] + if arguments + else None + ) + + def _script_main() -> int: """Wrap to main() for setuptools.""" return main(*sys.argv[1:]) @@ -1256,7 +1268,7 @@ def main(*args: str) -> int: file_opener = FileOpener(options.hard_encoding_detection, options.quiet_level) - glob_match = GlobMatch(options.skip) + glob_match = GlobMatch(flatten_clean_comma_separated_arguments(options.skip)) try: glob_match.match("/random/path") # does not need a real path except re.error: From 95c83acc1e327978a1043f21c05f0687d78c3528 Mon Sep 17 00:00:00 2001 From: Eric Larson Date: Wed, 10 Apr 2024 08:58:39 -0400 Subject: [PATCH 3/6] FIX: Simplify --- codespell_lib/_codespell.py | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py index b2b2c3e491..46e916aaf3 100644 --- a/codespell_lib/_codespell.py +++ b/codespell_lib/_codespell.py @@ -160,15 +160,11 @@ class QuietLevels: class GlobMatch: - def __init__(self, pattern: Optional[List[str]]) -> None: - self.pattern_list: Optional[List[str]] = pattern + def __init__(self, pattern: List[str]) -> None: + self.pattern_list: List[str] = pattern def match(self, filename: str) -> bool: - return ( - any(fnmatch.fnmatch(filename, p) for p in self.pattern_list) - if self.pattern_list - else False - ) + return any(fnmatch.fnmatch(filename, p) for p in self.pattern_list) class Misspelling: @@ -493,6 +489,7 @@ def parse_options( "accepts globs as well. E.g.: if you want " "codespell to skip .eps and .txt files, " 'you\'d give "*.eps,*.txt" to this option.', + default=list(), ) parser.add_argument( @@ -1106,19 +1103,20 @@ def parse_file( def flatten_clean_comma_separated_arguments( - arguments: Optional[List[str]], -) -> Optional[List[str]]: + arguments: List[str], +) -> List[str]: """ >>> flatten_clean_comma_separated_arguments(["a, b ,\n c, d,", "e"]) ['a', 'b', 'c', 'd', 'e'] >>> flatten_clean_comma_separated_arguments([]) - >>> flatten_clean_comma_separated_arguments(None) + [] """ - return ( - [item.strip() for argument in arguments for item in argument.split(",") if item] - if arguments - else None - ) + return [ + item.strip() + for argument in arguments + for item in argument.split(",") + if item + ] def _script_main() -> int: From a49d77aba44231bb5de999169ca598af490ccd87 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 10 Apr 2024 12:59:18 +0000 Subject: [PATCH 4/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- codespell_lib/_codespell.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py index 46e916aaf3..2841244448 100644 --- a/codespell_lib/_codespell.py +++ b/codespell_lib/_codespell.py @@ -1112,10 +1112,7 @@ def flatten_clean_comma_separated_arguments( [] """ return [ - item.strip() - for argument in arguments - for item in argument.split(",") - if item + item.strip() for argument in arguments for item in argument.split(",") if item ] From f3ea6ce802fc22b01a2a069cf5eff5f880fdf69f Mon Sep 17 00:00:00 2001 From: Eric Larson Date: Wed, 10 Apr 2024 09:05:28 -0400 Subject: [PATCH 5/6] FIX: Literal --- codespell_lib/_codespell.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py index 2841244448..1d9e9cb645 100644 --- a/codespell_lib/_codespell.py +++ b/codespell_lib/_codespell.py @@ -489,7 +489,7 @@ def parse_options( "accepts globs as well. E.g.: if you want " "codespell to skip .eps and .txt files, " 'you\'d give "*.eps,*.txt" to this option.', - default=list(), + default=[], ) parser.add_argument( From 17da0fe6d700f06e3a0108089239ca5af75f403c Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Wed, 10 Apr 2024 15:44:20 +0200 Subject: [PATCH 6/6] Default arguments should not be mutable https://florimond.dev/en/posts/2018/08/python-mutable-defaults-are-the-source-of-all-evil --- codespell_lib/_codespell.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py index 1d9e9cb645..1904f7c852 100644 --- a/codespell_lib/_codespell.py +++ b/codespell_lib/_codespell.py @@ -489,7 +489,6 @@ def parse_options( "accepts globs as well. E.g.: if you want " "codespell to skip .eps and .txt files, " 'you\'d give "*.eps,*.txt" to this option.', - default=[], ) parser.add_argument( @@ -1103,7 +1102,7 @@ def parse_file( def flatten_clean_comma_separated_arguments( - arguments: List[str], + arguments: Iterable[str], ) -> List[str]: """ >>> flatten_clean_comma_separated_arguments(["a, b ,\n c, d,", "e"]) @@ -1263,7 +1262,9 @@ def main(*args: str) -> int: file_opener = FileOpener(options.hard_encoding_detection, options.quiet_level) - glob_match = GlobMatch(flatten_clean_comma_separated_arguments(options.skip)) + glob_match = GlobMatch( + flatten_clean_comma_separated_arguments(options.skip) if options.skip else [] + ) try: glob_match.match("/random/path") # does not need a real path except re.error: