From b560d0142df564e3d5c6629f3917d00a085335be Mon Sep 17 00:00:00 2001 From: Gianluca Marraffa Date: Tue, 23 Jan 2024 17:46:55 +0100 Subject: [PATCH] feat(mark2confluence): Allow to specify default space and parents for files under a specific directory (#17) This PR introduces the possibility to automatically prepend `SPACE` and `PARENT` headers to `.md` files placed under a specified directory. It is particularly useful for a repository with many projects into it. For more information refer to the README.md file. --- README.md | 33 +++++++ action.yml | 4 + mark2confluence/main.py | 75 ++++++++++++++- .../with_mark_parent_header.md | 5 + ...k_headers.md => with_mark_space_header.md} | 0 .../with_mark_title_header.md | 3 + .../test_inject_default_parents/0-input.md | 1 + .../test_inject_default_parents/0-output.md | 3 + tests/test_main.py | 91 ++++++++++++++++++- 9 files changed, 211 insertions(+), 4 deletions(-) create mode 100644 tests/resources/markdown/test_has_mark_headers/with_mark_parent_header.md rename tests/resources/markdown/test_has_mark_headers/{with_mark_headers.md => with_mark_space_header.md} (100%) create mode 100644 tests/resources/markdown/test_has_mark_headers/with_mark_title_header.md create mode 100644 tests/resources/markdown/test_inject_default_parents/0-input.md create mode 100644 tests/resources/markdown/test_inject_default_parents/0-output.md diff --git a/README.md b/README.md index 1d61c98..d4c1bdc 100644 --- a/README.md +++ b/README.md @@ -32,8 +32,41 @@ FILES: "" # space separated list of file to upload (relative to the repo root di # if FILES is defined; DOC_DIR, DOC_DIR_PATTERN and MODIFIED_INTERVAL are ignored HEADER_TEMPLATE: "---\n\n**WARNING**: This page is automatically generated from [this source code]({{source_link}})\n\n---\n\n\n" # This is a jinja template used as header, source_link is automatically resolved as github source url of the current file MERMAID_PROVIDER: "" # Defines the mermaid provider to use. Supported options are: cloudscript, mermaid-go +default_parents: "" # Automatically inject space and parents headers for the files under the specified directory, format: DIR=SPACE->PARENT1->PARENT2, each definition is separated by a newline ``` +### Automatically creating space and parent headers + +If you want to avoid to copy and paste the same space and parents for every MD file, you can use the `default_parents` input. +Based on the content of the file it will automatically prepend headers before pushing the file onto confluence. +Only the file with `mark` headers will be modified. + +Let's take this example: + +```yaml +default_headers: | + tests/=FOO->Tests + mark2confluence/=FOO->Code +``` + +Every `markdawn` file under the `tests` directory that already contains mark headers will be prepended the following headers: +```markdown + + + + +``` + +The directive supports glob matching and prioritize the longest directory first, for example: + +```yaml +default_headers: | + tests/**=FOO->Tests + tests/resources/**=FOO->Tests->Resources +``` + +Files under `tests/resources/` will have `FOO->Tests->Resources` as headers, while files under `tests/other-dir` will have `FOO->Tests`. + ## Example workflow diff --git a/action.yml b/action.yml index acb79af..8679531 100644 --- a/action.yml +++ b/action.yml @@ -37,6 +37,10 @@ inputs: description: "Defines the mermaid provider to use. Supported options are: cloudscript, mermaid-go" required: false default: "" + default_parents: + description: "Automatically inject space and parents headers for the files under the specified directory, format: DIR=SPACE->PARENT1->PARENT2, each definition is separated by a newline" + required: false + default: "" runs: using: "docker" image: Dockerfile diff --git a/mark2confluence/main.py b/mark2confluence/main.py index be9d917..ad67c19 100755 --- a/mark2confluence/main.py +++ b/mark2confluence/main.py @@ -4,11 +4,13 @@ import re import subprocess from datetime import datetime,timedelta +from fnmatch import fnmatch from typing import List, Tuple import jinja2 from loguru import logger from supermutes import dot from pprint import pformat +from dataclasses import dataclass ACTION_PUBLISH = "publish" ACTION_DRY_RUN = "dry-run" @@ -34,6 +36,7 @@ "CONFLUENCE_USERNAME": "", "CONFLUENCE_BASE_URL": "", "MERMAID_PROVIDER": "", + "DEFAULT_PARENTS": "", } DEFAULT_GITHUB = { @@ -98,7 +101,7 @@ def publish(path: str)-> tuple: def has_mark_headers(path: str) -> bool: - space_re = re.compile("", re.MULTILINE) + space_re = re.compile("", re.IGNORECASE) with open(path, 'r+') as f: data = f.read().split("\n") for line in data: @@ -181,6 +184,71 @@ def check_header_template(header_template: str): logger.error(f"Setup error, HEADER_TEMPLATE: {e}") exit(1) +@dataclass +class ParentCfg(): + directory: str + space: str + parents: List[str] + + def get_header(self) -> str: + header = f"\n" + for parent in self.parents: + header += f"\n" + return header + + def is_directory_included(self, directory: str) -> bool: + global cfg + sanitized_dir = directory.replace(f"{cfg.github.WORKSPACE}/", "") + if not sanitized_dir.endswith("/"): + sanitized_dir += "/" + return fnmatch(sanitized_dir, self.directory) + +def _parse_parent_string(parent_string: str) -> Tuple[str, str, List[str]]: + dir_separator = "=" + spaces_separator = "->" + try: + parent_string_regex = re.compile(rf".+=.+({spaces_separator}.+)*") + if not parent_string_regex.match(parent_string) or parent_string.endswith(spaces_separator): + raise ValueError + directory, space_and_parents = parent_string.split(dir_separator) + space_and_parents_splitted = space_and_parents.split(spaces_separator) + space = space_and_parents_splitted[0] + parents = space_and_parents_splitted[1::] + + if not directory or not space: + raise ValueError + + + return directory, space, parents + except ValueError: + msg = f"default_parents must follow the format DIR=SPACE[->PARENT1->PARENT2], provided: {parent_string}" + logger.error(msg) + raise ValueError(msg) + +def get_default_parents(parents_string: str) -> List[ParentCfg]: + if not parents_string: + return [] + default_parents = list() + parents_string_array = parents_string.split("\n") + parents_string_array = list(filter(lambda x: x, parents_string_array)) + for parent_string in parents_string_array: + directory, space, parents = _parse_parent_string(parent_string) + default_parents.append(ParentCfg(directory, space, parents)) + default_parents.sort(key=lambda cfg: len(cfg.directory), reverse=True) + return default_parents + +def inject_default_parents(path: str, default_parents_cfg: List[ParentCfg]): + file_dir = f"{os.path.dirname(os.path.abspath(path))}" + for parent_cfg in default_parents_cfg: + if parent_cfg.is_directory_included(file_dir): + header = parent_cfg.get_header() + with open(path, 'r') as f: + file_content = f.read() + file_content = f"{header}{file_content}" + with open(path, "w") as f: + f.write(file_content) + return + def main()->int: global cfg @@ -199,14 +267,15 @@ def main()->int: logger.info(f"Files to be processed: {', '.join(files)}") - + default_parents = get_default_parents(cfg.inputs.DEFAULT_PARENTS) status = {} for path in files: if path[-3:] == '.md' and has_mark_headers(path): logger.info(f"Processing file {path}") + inject_default_parents(path, default_parents) + source_link = f"{ cfg.github.SERVER_URL }/{ cfg.github.REPOSITORY }/blob/{ cfg.github.REF_NAME }/{ path.replace(cfg.github.WORKSPACE, '') }" header = tpl.render(source_link=source_link) - inject_header_before_first_line_of_content(path, header) status[path] = publish(path) diff --git a/tests/resources/markdown/test_has_mark_headers/with_mark_parent_header.md b/tests/resources/markdown/test_has_mark_headers/with_mark_parent_header.md new file mode 100644 index 0000000..9e38125 --- /dev/null +++ b/tests/resources/markdown/test_has_mark_headers/with_mark_parent_header.md @@ -0,0 +1,5 @@ + + + + +And you don't. diff --git a/tests/resources/markdown/test_has_mark_headers/with_mark_headers.md b/tests/resources/markdown/test_has_mark_headers/with_mark_space_header.md similarity index 100% rename from tests/resources/markdown/test_has_mark_headers/with_mark_headers.md rename to tests/resources/markdown/test_has_mark_headers/with_mark_space_header.md diff --git a/tests/resources/markdown/test_has_mark_headers/with_mark_title_header.md b/tests/resources/markdown/test_has_mark_headers/with_mark_title_header.md new file mode 100644 index 0000000..bc60a3f --- /dev/null +++ b/tests/resources/markdown/test_has_mark_headers/with_mark_title_header.md @@ -0,0 +1,3 @@ + + +And you don't. diff --git a/tests/resources/markdown/test_inject_default_parents/0-input.md b/tests/resources/markdown/test_inject_default_parents/0-input.md new file mode 100644 index 0000000..b6ded31 --- /dev/null +++ b/tests/resources/markdown/test_inject_default_parents/0-input.md @@ -0,0 +1 @@ + diff --git a/tests/resources/markdown/test_inject_default_parents/0-output.md b/tests/resources/markdown/test_inject_default_parents/0-output.md new file mode 100644 index 0000000..cab0c27 --- /dev/null +++ b/tests/resources/markdown/test_inject_default_parents/0-output.md @@ -0,0 +1,3 @@ + + + diff --git a/tests/test_main.py b/tests/test_main.py index 0a705ae..ddfeff4 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -1,10 +1,12 @@ import os import pytest import shutil +from supermutes import dot import mark2confluence.main as main RESOURCE_DIR = f"{os.path.dirname(os.path.abspath(__file__))}/resources" +WORKSPACE = os.path.realpath(f"{os.path.dirname(os.path.abspath(__file__))}/..") def clean_github_environment_variables(): if(os.getenv("CI", False)): @@ -32,7 +34,9 @@ def test_load_env_prefixes(): def test_has_mark_headers(): resource_directory = f"{RESOURCE_DIR}/markdown/test_has_mark_headers" - assert main.has_mark_headers(f"{resource_directory}/with_mark_headers.md") + assert main.has_mark_headers(f"{resource_directory}/with_mark_space_header.md") + assert main.has_mark_headers(f"{resource_directory}/with_mark_parent_header.md") + assert main.has_mark_headers(f"{resource_directory}/with_mark_title_header.md") assert not main.has_mark_headers(f"{resource_directory}/without_mark_headers.md") def test_check_header_template(): @@ -66,3 +70,88 @@ def test_inject_header(file, expected_index, raises): lines, injected_at_index = main.inject_header_before_first_line_of_content(temp_path, header) assert injected_at_index == expected_index assert lines[injected_at_index] == header +@pytest.mark.parametrize( + "string,expected_dir,expected_space,expected_parents,raises", + [ + ("tools/=foo->bar->baz", "tools/", "foo", ["bar", "baz"], False), + ("tools/=foo->bar", "tools/", "foo", ["bar"], False), + ("tools/=foo", "tools/", "foo", [], False), + ("tools/=", "tools/", "", [], True), + ("tools/", "", "", [], True), + ("tools/=foo->", "tools/", "foo", [], True), + ("=foo", "tools/", "foo", [], True), + ] +) +def test__parse_parents_string(string, expected_dir, expected_space, expected_parents, raises): + if raises: + with pytest.raises(ValueError, match=r"^default_parents.+"): + main._parse_parent_string(string) + else: + directory, space, parents = main._parse_parent_string(string) + assert directory == expected_dir + assert space == expected_space + assert parents == expected_parents + +@pytest.mark.parametrize( + "string,expected_parents_count", + [ + ("tools/=foo", 1), + ("tools/=foo\n", 1), + ("tools/=foo\ntools/=foo", 2), + ("", 0), + (None, 0) + ] +) +def test_get_default_parents(string, expected_parents_count): + parents = main.get_default_parents(string) + assert len(parents) == expected_parents_count + +@pytest.mark.parametrize( + "cfg,expected_header", + [ + ( + main.ParentCfg(directory="test",space="FOO",parents=["BAR", "BAZ"]), + "\n\n\n", + ), + ( + main.ParentCfg(directory="test",space="BAR",parents=["FOO"]), + "\n\n", + ), + ( + main.ParentCfg(directory="test",space="FOO",parents=[]), + "\n", + ), + ] +) +def test_ParentCfg_get_header(cfg: main.ParentCfg, expected_header): + assert cfg.get_header() == expected_header + +def test_inject_default_parents(monkeypatch): + monkeypatch.setattr('mark2confluence.main.cfg', dot.dotify({"github": {"WORKSPACE": WORKSPACE}})) + + base_dir = f"{RESOURCE_DIR}/markdown/test_inject_default_parents" + source_file_path = f"{base_dir}/0-input.md" + expected_file_path = f"{base_dir}/0-output.md" + parsed_file_dir = f"{WORKSPACE}/tests/foo" + parsed_file_path = f"{parsed_file_dir}/parsed_file.md" + cfgs = [ + main.ParentCfg(directory="tests/foo/bar", space="FOO", parents=["BAZ"]), + main.ParentCfg(directory="tests/foo/*", space="FOO", parents=["BAR"]), + main.ParentCfg(directory="tests/*", space="FOO", parents=["BAZ"]), + main.ParentCfg(directory="mark2confluence/", space="BOZ", parents=["BIZ"]), + ] + + os.makedirs(parsed_file_dir, exist_ok=True) + shutil.copy(source_file_path, parsed_file_path) + + main.inject_default_parents(parsed_file_path, cfgs) + + with open(parsed_file_path, "r") as f: + parsed_file_content = f.read() + with open(expected_file_path, "r") as f: + expected_file_content = f.read() + + try: + assert parsed_file_content == expected_file_content + finally: + shutil.rmtree(parsed_file_dir)