diff --git a/CHANGES.rst b/CHANGES.rst index 790d1a1..068fea0 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -2,6 +2,25 @@ Changelog ========= +Insert-license-header 1.1.0 +================================================ +* Re-implement behaviour `--dynamic-years` + * The end year is now also determined by GIT, the current year is NOT automtically used. + * Effectively, the tool will not upgrade the year of files that have not been touched, + this is important when using the pre-commit hook of this tool. Prior `1.1.0`, + `pre-commit run insert-license -a` would cause your CI to go red after New Year's Eve. +* Fix link to the actual pre-commit repository. + +Insert-license-header 1.0.2 +================================================ +* Make function that determines git start date more robust + * If, e.g. no GIT repository exists, the tool will no longer fail but use the current year. + * The path to the file is escaped, allowing for more crazy file names. + +Insert-license-header 1.0.1 +================================================ +* (Re-trigger publishing workflow) + Insert-license-header 1.0.0 (2023-11-07) ================================================ * Tool is a standalone pypi tool and no longer a pre-commit hook diff --git a/README.md b/README.md index b47222c..04771dc 100644 --- a/README.md +++ b/README.md @@ -3,11 +3,15 @@ This python script automatically inserts your license header at the beginning of Forked from [Lucas-C/pre-commit-hooks](https://github.com/Lucas-C/pre-commit-hooks) and modified to realize the following behaviour: +> :warning: The behaviour of `--dynamic-years` changed in version `1.1.0`. +> Add argument `--dynamic-years` which determines the start year of the copyright time range automatically - based on when the file was first tracked with Git. If a start year is already present, it is not touched. If a file is not tracked by Git, the current year is used as start year. -The end year is automatically set to the current year -(`--use-current-year` is activated automatically when `--dynamic-years` is present). +The end year is automatically set to the date of the last commit that affected the file. +If an end year is already present that is in the future, don't touch it. It is, however, +incremented if it lies in the past. If the file is not tracked by Git, use the current year. + Include a `{year_start}` and `{year_end}` in your license header to use this feature. Add argument `--license-base64` to include a license not via a file but through @@ -16,4 +20,4 @@ Obtain your license `base64` encoded string with `cat LICENSE.txt | base64`. Including a license via `--license-base64 {base64string}` overrides the `--license-filepath` option. -> :warning: This is not a pre-commit hook anymore. Instead, this repository contains just the base script to insert licenses in text-based files. To check out the resulting pre-commit hook, visit: https://github.com/Quantco/pre-commit-insert-license)https://github.com/Quantco/pre-commit-insert-license. +> :warning: This is not a pre-commit hook anymore. Instead, this repository contains just the base script to insert licenses in text-based files. To check out the resulting pre-commit hook, visit: https://github.com/Quantco/pre-commit-insert-license diff --git a/insert_license_header/insert_license.py b/insert_license_header/insert_license.py index 53bee9e..407d160 100644 --- a/insert_license_header/insert_license.py +++ b/insert_license_header/insert_license.py @@ -2,15 +2,16 @@ import argparse import base64 -import collections import re import subprocess import sys from datetime import datetime -from typing import Any, Sequence +from typing import Any, Literal, NamedTuple, Sequence from rapidfuzz import fuzz +PLACEHOLDER_END_YEAR = 1000 + FUZZY_MATCH_TODO_COMMENT = ( " TODO: This license is not consistent with the license used in the project." ) @@ -24,18 +25,15 @@ DEBUG_LEVENSHTEIN_DISTANCE_CALCULATION = False -LicenseInfo = collections.namedtuple( - "LicenseInfo", - [ - "prefixed_license", - "plain_license", - "eol", - "comment_start", - "comment_prefix", - "comment_end", - "num_extra_lines", - ], -) + +class LicenseInfo(NamedTuple): + prefixed_license: list[str] + plain_license: list[str] + eol: Literal["", "\n", "\r\n"] + comment_start: str + comment_prefix: str + comment_end: str + num_extra_lines: int class LicenseUpdateError(Exception): @@ -107,14 +105,15 @@ def main(argv=None): help=( "Determine years that appear in license automatically." "If no start date is present in file," - "use the date when the file was first introduced." - "Use current year automatically as end date, implies --use-current-years." + "use the date when the file was first introduced to GIT." + "Use the last commit date that affected the file as the end date." + "If no end date is present in file, use the current year." ), ) args = parser.parse_args(argv) if args.dynamic_years: - args.use_current_year = True + args.allow_past_years = True if args.use_current_year: args.allow_past_years = True @@ -220,11 +219,16 @@ def process_files(args, changed_files, todo_files, license_info: LicenseInfo): for src_filepath in args.filenames: license_info = plain_license_info + last_year = datetime.now().year if args.dynamic_years: - year_start = _get_git_file_creation_date(src_filepath).year - year_end = ( - datetime.now().year - ) # args.dynamic_years implies args.use_current_year + year_range = _get_git_file_year_range(src_filepath) + year_start, year_end = ( + (year_range[0].year, year_range[1].year) + if year_range is not None + else (datetime.now().year, PLACEHOLDER_END_YEAR) + ) + last_year = year_end + prefixed_license = [ line.format( year_start=year_start, @@ -276,12 +280,13 @@ def process_files(args, changed_files, todo_files, license_info: LicenseInfo): try: if license_found( remove_header=args.remove_header, - update_year_range=args.use_current_year, + update_year_range=args.use_current_year or args.dynamic_years, license_header_index=license_header_index, license_info=license_info, src_file_content=src_file_content, src_filepath=src_filepath, encoding=encoding, + last_year=last_year, ): changed_files.append(src_filepath) except LicenseUpdateError as error: @@ -300,6 +305,12 @@ def process_files(args, changed_files, todo_files, license_info: LicenseInfo): ): todo_files.append(src_filepath) else: + # If placeholder end year is still present, replace it with current year + if args.dynamic_years: + _replace_placeholder_in_license_with_current_year( + license_info=license_info, + ) + if license_not_found( remove_header=args.remove_header, license_info=license_info, @@ -312,6 +323,19 @@ def process_files(args, changed_files, todo_files, license_info: LicenseInfo): return changed_files or todo_files or license_update_failed +def _replace_placeholder_in_license_with_current_year( + license_info: LicenseInfo, +) -> LicenseInfo: + current_year = datetime.now().year + for i in range(len(license_info.prefixed_license)): + line = license_info.prefixed_license[i] + license_info.prefixed_license[i] = re.sub( + r"(\d+)-" + str(PLACEHOLDER_END_YEAR), + r"\1-" + str(current_year), + line, + ) + + def _read_file_content(src_filepath): last_error = None for encoding in ( @@ -436,6 +460,7 @@ def try_update_year_range( src_filepath: str, license_header_index: int, license_length: int, + last_year: int, ) -> tuple[Sequence[str], bool]: """ Updates the years in a copyright header in src_file_content by @@ -447,11 +472,10 @@ def try_update_year_range( :param license_header_index: line where the license starts :return: source file contents and a flag indicating update """ - current_year = datetime.now().year for i in range(license_header_index, license_header_index + license_length): updated = try_update_year( - src_file_content[i], src_filepath, current_year, introduce_range=True + src_file_content[i], src_filepath, last_year, introduce_range=True ) if updated: src_file_content[i] = updated @@ -467,10 +491,12 @@ def license_found( src_file_content, src_filepath, encoding, + last_year: int, ): # pylint: disable=too-many-arguments """ - Executed when license is found. It does nothing if remove_header is False, - removes the license if remove_header is True. + Executed when license is found. It tries to update the year range + if update_year_range is True, removes the license if remove_header is True. + :param remove_header: whether header should be removed if found :param update_year_range: whether to update license with the current year :param license_header_index: index where license found @@ -508,6 +534,7 @@ def license_found( src_filepath, license_header_index, len(license_info.prefixed_license), + last_year=last_year, ) if updated: @@ -730,10 +757,10 @@ def get_license_candidate_string(candidate_array, license_info): return license_string_candidate.strip(), found_license_offset -def _get_git_file_creation_date(filepath): - """Uses special git log formatting to extract the years from the commits. - Take the year of the first commit. If the file has not been tracked with Git, - return the current year. +def _get_git_file_year_range(filepath: str) -> tuple[datetime, datetime] | None: + """Uses git log formatting to extract start and end year from the commits. + Take the start year from the first commit and the end year from the last. + If the file has not been tracked with Git, return None. :param filepath: path to file :type filepath: str @@ -749,18 +776,26 @@ def _get_git_file_creation_date(filepath): except ( subprocess.CalledProcessError ): # Cover edge cases, e.g. if there has been no commit yet - return datetime.now() + return None # The result.stdout will contain all the commit dates, one per line. # The last line will be the date of the first commit. dates = result.stdout.strip().split("\n") - first_commit_date = dates[-1] + first_commit_date_str = dates[-1] + last_commit_date_str = dates[0] - if first_commit_date == "": # file has not been tracked with Git - return datetime.now() + if ( + first_commit_date_str == "" or last_commit_date_str == "" + ): # file has not been tracked with Git + return None - first_commit_date = datetime.fromisoformat(first_commit_date.replace("Z", "+00:00")) - return first_commit_date + first_commit_date = datetime.fromisoformat( + first_commit_date_str.replace("Z", "+00:00") + ) + last_commit_date = datetime.fromisoformat( + last_commit_date_str.replace("Z", "+00:00") + ) + return first_commit_date, last_commit_date if __name__ == "__main__": diff --git a/pyproject.toml b/pyproject.toml index 0b0016a..2c0ee68 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "hatchling.build" name = "insert-license-header" description = "Tool to insert license headers at the beginning of text-based files." readme = "README.md" -version = "1.0.2" +version = "1.1.0" license = "MIT" authors = [ {name = "Thomas Marwitz", email = "thomasmarwitz3@gmail.com"}, diff --git a/tests/insert_license_test.py b/tests/insert_license_test.py index 936b123..6200a86 100644 --- a/tests/insert_license_test.py +++ b/tests/insert_license_test.py @@ -7,7 +7,7 @@ from insert_license_header.insert_license import ( LicenseInfo, - _get_git_file_creation_date, + _get_git_file_year_range, find_license_header_index, ) from insert_license_header.insert_license import ( @@ -683,66 +683,138 @@ def test_is_license_present(src_file_content, expected_index, match_years_strict def mock_get_git_file_creation_date(filepath): # Replace this with whatever behavior you want for the mock function - return datetime(2018, 1, 1) + return datetime(2018, 1, 1), datetime(2019, 1, 1) + + +def get_datetime_range(year_range: str): + if year_range == "": + return None + + start_year, end_year = year_range.split("-") + return ( + datetime(int(start_year), 2, 2), + datetime(int(end_year), 2, 2), + ) @pytest.mark.parametrize( - ("license_file_path", "src_file_path", "expected_start_year", "expect_change"), + ("year_range_in_file", "year_range_in_git", "current_year", "expected_year_range"), ( - # Start year is determined dynamically - ("DY_LICENSE.txt", "DY_module_wo_license.py", "2018", 1), - # End year is adjusted to current_year - ("DY_LICENSE.txt", "DY_module_outdated_license.py", "2018", 1), - # Older start year is left unchanged - ("DY_LICENSE.txt", "DY_module_old_license.py", "2000", 1), - # Newer start year is left unchanged (as start year is not touched if already existing) - ("DY_LICENSE.txt", "DY_module_unchanged_license.py", "2022", 0), + ################################################################################ + # GIT tracked: Test END_YEAR. + # -> GIT tracking or year in file should always have precedence over current year + ################################################################################ + ("2020-2022", "2020-2023", "2024", "2020-2023"), + # --> Update 'end_year' if git is newer, prioritize git > current year + ("2020-2023", "2020-2022", "2024", "2020-2023"), + # --> Keep 'end_year' if git is older, prioritize 'year in file' > current year + ("2020-2022", "2020-2022", "2024", "2020-2022"), + # --> Keep 'end_year' if git and in file are same + # GIT tracked: Test START_YEAR + ("2020-2022", "2019-2022", "2024", "2020-2022"), + # --> Respect start year that is in the file + ("2020-2022", "2021-2022", "2024", "2020-2022"), + # --> Do not update start year if git says the file is newer + ("2020-2022", "2020-2022", "2024", "2020-2022"), + # --> Keep 'start_year' if git and in file are same + ################################################################################ + # Not GIT tracked: Test END_YEAR + # -> Year in file should always have precedence over current year + ################################################################################ + ("2020-2022", "", "2024", "2020-2022"), + # --> Keep 'end_year' in file although current year is newer + ("2020-2022", "", "2020", "2020-2022"), + # --> Keep 'end_year' in file although current year is older + ################################################################################ + # No license header present == no year in file + # -> Year in git should always have precedence over current year + # -> Only if git is not tracked, use current year + ################################################################################ + ("", "2020-2022", "2024", "2020-2022"), + # --> Use GIT year range although current year is newer + ("", "2020-2022", "2020", "2020-2022"), + # --> Use GIT year range although current year is older + ("", "", "2024", "2024-2024"), + # --> Use current year if not GIT tracked and no year in file ), ) -def test_dynamic_years( - license_file_path, - src_file_path, - expected_start_year, - expect_change, +def test_dynamic_years_with_existing_license_header( + year_range_in_file: str, + year_range_in_git: str, + current_year: str, + expected_year_range: str, tmpdir, - monkeypatch, + monkeypatch: pytest.MonkeyPatch, ): - # Specify the paths to your license and source files + LICENSE_FILE = "DY_LICENSE.txt" + CONTENT_TEMPLATE_LICENSE_HEADER = ( + "# Copyright (C) {year_range}, PearCorp, Inc.\n" + "# SPDX-License-Identifier: LicenseRef-PearCorp\n\n" + "import sys\n" + ) + CONTENT_TEMPLATE_NO_LICENSE_HEADER = "import sys\n" + with chdir_to_test_resources(): - current_year = datetime.now().year + temp_src_file_path = tmpdir.join("DY_module_template.py") + # Create file either with or without license header depending on whether + # year_range_in_file is given or not (empty string) + with open(temp_src_file_path.strpath, "w", encoding="utf-8") as f: + file_content = ( + CONTENT_TEMPLATE_NO_LICENSE_HEADER + if year_range_in_file == "" + else CONTENT_TEMPLATE_LICENSE_HEADER.format( + year_range=year_range_in_file + ) + ) + f.write(file_content) - expected_content = ( - f"# Copyright (C) {expected_start_year}-{current_year}, PearCorp, Inc.\n" - "# SPDX-License-Identifier: LicenseRef-PearCorp\n\n" - "import sys\n" + monkeypatch.setattr( + "insert_license_header.insert_license._get_git_file_year_range", + lambda _: get_datetime_range(year_range_in_git), ) - - temp_src_file_path = tmpdir.join("module_wo_license.py") - shutil.copy(src_file_path, temp_src_file_path.strpath) - - comment_style = "#" - argv = [ - "--license-filepath", - license_file_path, - "--comment-style", - comment_style, - "--dynamic-years", - temp_src_file_path.strpath, - ] - + # mock datetime.now() to return 'current_year' monkeypatch.setattr( - "insert_license_header.insert_license._get_git_file_creation_date", - mock_get_git_file_creation_date, + "insert_license_header.insert_license.datetime", + type("mock", (), {"now": lambda: datetime.strptime(current_year, "%Y")}), ) - assert insert_license(argv) == expect_change + file_modified = ( + insert_license( + [ + "--license-filepath", + LICENSE_FILE, + "--comment-style", + "#", + "--dynamic-years", + temp_src_file_path.strpath, + ] + ) + == 1 # 0 == no change, 1 == change + ) + + expect_modification = year_range_in_file != expected_year_range + assert file_modified == expect_modification with open(temp_src_file_path, encoding="utf-8") as updated_file: updated_content = updated_file.read() + expected_content = CONTENT_TEMPLATE_LICENSE_HEADER.format( + year_range=expected_year_range + ) + assert updated_content == expected_content +# TESTCASES: +# File's last_year is now 2024 (prev 2023) +# File's last_year is still 2023 (current year = 2024) +# File's start_year + +# 1. File has no license header: +# - Git tracked: take from git +# - Not Git tracked: take current-current + + def test_git_file_creation_date(monkeypatch): """Mock subprocess.run to throw an exception. Expect the returned datetime to have this year!""" @@ -750,8 +822,8 @@ def mock_git_log(*args, **kwargs): raise subprocess.CalledProcessError(128, "git log") monkeypatch.setattr(subprocess, "run", mock_git_log) - result = _get_git_file_creation_date("Not existing") - assert result.year == datetime.now().year + result = _get_git_file_year_range("Not existing") + assert result is None def test_base64_encoded_license(tmpdir): @@ -795,9 +867,9 @@ def mock_empty_git_log(*args, **kwargs): ) monkeypatch.setattr(subprocess, "run", mock_empty_git_log) - result = _get_git_file_creation_date("Test") + result = _get_git_file_year_range("Test") - assert result.year == datetime.now().year + assert result is None @pytest.mark.parametrize(