feat(mark2confluence): Allow to specify default space and parents for…

… files under a specific directory (#17) This PR introduces the possibility to automatically prepend `SPACE` and `PARENT` headers to `.md` files placed under a specified directory. It is particularly useful for a repository with many projects into it. For more information refer to the README.md file.
draios · Jan 23, 2024 · b560d01 · b560d01
1 parent 26cd28c
commit b560d01
Show file tree

Hide file tree

Showing 9 changed files with 211 additions and 4 deletions.
diff --git a/README.md b/README.md
@@ -32,8 +32,41 @@ FILES: "" # space separated list of file to upload (relative to the repo root di
           # if FILES is defined; DOC_DIR, DOC_DIR_PATTERN and MODIFIED_INTERVAL are ignored
 HEADER_TEMPLATE: "---\n\n**WARNING**: This page is automatically generated from [this source code]({{source_link}})\n\n---\n<!-- Include: ac:toc -->\n\n" # This is a jinja template used as header, source_link is automatically resolved as github source url of the current file
 MERMAID_PROVIDER: "" # Defines the mermaid provider to use. Supported options are: cloudscript, mermaid-go
+default_parents: "" # Automatically inject space and parents headers for the files under the specified directory, format: DIR=SPACE->PARENT1->PARENT2, each definition is separated by a newline
 ```
 
+### Automatically creating space and parent headers
+
+If you want to avoid to copy and paste the same space and parents for every MD file, you can use the `default_parents` input.
+Based on the content of the file it will automatically prepend headers before pushing the file onto confluence.
+Only the file with `mark` headers will be modified.
+
+Let's take this example:
+
+```yaml
+default_headers: |
+  tests/=FOO->Tests
+  mark2confluence/=FOO->Code
+```
+
+Every `markdawn` file under the `tests` directory that already contains mark headers will be prepended the following headers:
+```markdown
+<!-- Space: FOO -->
+<!-- Parent: Tests -->
+
+<your-content>
+```
+
+The directive supports glob matching and prioritize the longest directory first, for example:
+
+```yaml
+default_headers: |
+  tests/**=FOO->Tests
+  tests/resources/**=FOO->Tests->Resources
+```
+
+Files under `tests/resources/` will have `FOO->Tests->Resources` as headers, while files under `tests/other-dir` will have `FOO->Tests`.
+
 ## Example workflow
 
 

diff --git a/action.yml b/action.yml
@@ -37,6 +37,10 @@ inputs:
     description: "Defines the mermaid provider to use. Supported options are: cloudscript, mermaid-go"
     required: false
     default: ""
+  default_parents:
+    description: "Automatically inject space and parents headers for the files under the specified directory, format: DIR=SPACE->PARENT1->PARENT2, each definition is separated by a newline"
+    required: false
+    default: ""
 runs:
   using: "docker"
   image: Dockerfile
diff --git a/mark2confluence/main.py b/mark2confluence/main.py
@@ -4,11 +4,13 @@
 import re
 import subprocess
 from datetime import datetime,timedelta
+from fnmatch import fnmatch
 from typing import List, Tuple
 import jinja2
 from loguru import logger
 from supermutes import dot
 from pprint import pformat
+from dataclasses import dataclass
 
 ACTION_PUBLISH = "publish"
 ACTION_DRY_RUN = "dry-run"
@@ -34,6 +36,7 @@
   "CONFLUENCE_USERNAME": "",
   "CONFLUENCE_BASE_URL": "",
   "MERMAID_PROVIDER": "",
+  "DEFAULT_PARENTS": "",
 }
 
 DEFAULT_GITHUB = {
@@ -98,7 +101,7 @@ def publish(path: str)-> tuple:
 
 
 def has_mark_headers(path: str) -> bool:
-  space_re = re.compile("<!--.?[Ss]pace:.*-->", re.MULTILINE)
+  space_re = re.compile("<!--.?(space|parent|title):.*-->", re.IGNORECASE)
   with open(path, 'r+') as f:
     data = f.read().split("\n")
     for line in data:
@@ -181,6 +184,71 @@ def check_header_template(header_template: str):
     logger.error(f"Setup error, HEADER_TEMPLATE: {e}")
     exit(1)
 
+@dataclass
+class ParentCfg():
+  directory: str
+  space: str
+  parents: List[str]
+
+  def get_header(self) -> str:
+    header = f"<!-- Space: {self.space} -->\n"
+    for parent in self.parents:
+      header += f"<!-- Parent: {parent} -->\n"
+    return header
+
+  def is_directory_included(self, directory: str) -> bool:
+    global cfg
+    sanitized_dir = directory.replace(f"{cfg.github.WORKSPACE}/", "")
+    if not sanitized_dir.endswith("/"):
+      sanitized_dir += "/"
+    return fnmatch(sanitized_dir, self.directory)
+
+def _parse_parent_string(parent_string: str) -> Tuple[str, str, List[str]]:
+  dir_separator = "="
+  spaces_separator = "->"
+  try:
+    parent_string_regex = re.compile(rf".+=.+({spaces_separator}.+)*")
+    if not parent_string_regex.match(parent_string) or parent_string.endswith(spaces_separator):
+      raise ValueError
+    directory, space_and_parents = parent_string.split(dir_separator)
+    space_and_parents_splitted = space_and_parents.split(spaces_separator)
+    space = space_and_parents_splitted[0]
+    parents = space_and_parents_splitted[1::]
+
+    if not directory or not space:
+      raise ValueError
+
+
+    return directory, space, parents
+  except ValueError:
+    msg = f"default_parents must follow the format DIR=SPACE[->PARENT1->PARENT2], provided: {parent_string}"
+    logger.error(msg)
+    raise ValueError(msg)
+
+def get_default_parents(parents_string: str) -> List[ParentCfg]:
+  if not parents_string:
+    return []
+  default_parents = list()
+  parents_string_array = parents_string.split("\n")
+  parents_string_array = list(filter(lambda x: x, parents_string_array))
+  for parent_string in parents_string_array:
+    directory, space, parents = _parse_parent_string(parent_string)
+    default_parents.append(ParentCfg(directory, space, parents))
+  default_parents.sort(key=lambda cfg: len(cfg.directory), reverse=True)
+  return default_parents
+
+def inject_default_parents(path: str, default_parents_cfg: List[ParentCfg]):
+  file_dir = f"{os.path.dirname(os.path.abspath(path))}"
+  for parent_cfg in default_parents_cfg:
+    if parent_cfg.is_directory_included(file_dir):
+      header = parent_cfg.get_header()
+      with open(path, 'r') as f:
+        file_content = f.read()
+      file_content = f"{header}{file_content}"
+      with open(path, "w") as f:
+        f.write(file_content)
+      return
+
 
 def main()->int:
   global cfg
@@ -199,14 +267,15 @@ def main()->int:
 
   logger.info(f"Files to be processed: {', '.join(files)}")
 
-
+  default_parents = get_default_parents(cfg.inputs.DEFAULT_PARENTS)
   status = {}
   for path in files:
     if path[-3:] == '.md' and has_mark_headers(path):
       logger.info(f"Processing file {path}")
+      inject_default_parents(path, default_parents)
+
       source_link = f"{ cfg.github.SERVER_URL }/{ cfg.github.REPOSITORY }/blob/{ cfg.github.REF_NAME }/{ path.replace(cfg.github.WORKSPACE, '') }"
       header = tpl.render(source_link=source_link)
-
       inject_header_before_first_line_of_content(path, header)
 
       status[path] = publish(path)

diff --git a/tests/resources/markdown/test_has_mark_headers/with_mark_parent_header.md b/tests/resources/markdown/test_has_mark_headers/with_mark_parent_header.md
@@ -0,0 +1,5 @@
+<!-- Parent: Football -->
+<!-- Parent: New Team -->
+<!-- Title: I have Mark Lenders -->
+
+And you don't.
diff --git a/...est_has_mark_headers/with_mark_headers.md → ...as_mark_headers/with_mark_space_header.md b/...est_has_mark_headers/with_mark_headers.md → ...as_mark_headers/with_mark_space_header.md
diff --git a/tests/resources/markdown/test_has_mark_headers/with_mark_title_header.md b/tests/resources/markdown/test_has_mark_headers/with_mark_title_header.md
@@ -0,0 +1,3 @@
+<!-- Title: I have Mark Lenders -->
+
+And you don't.
diff --git a/tests/resources/markdown/test_inject_default_parents/0-input.md b/tests/resources/markdown/test_inject_default_parents/0-input.md
@@ -0,0 +1 @@
+<!-- Parent: BAZ -->
diff --git a/tests/resources/markdown/test_inject_default_parents/0-output.md b/tests/resources/markdown/test_inject_default_parents/0-output.md
@@ -0,0 +1,3 @@
+<!-- Space: FOO -->
+<!-- Parent: BAR -->
+<!-- Parent: BAZ -->
diff --git a/tests/test_main.py b/tests/test_main.py
@@ -1,10 +1,12 @@
 import os
 import pytest
 import shutil
+from supermutes import dot
 
 import mark2confluence.main as main
 
 RESOURCE_DIR = f"{os.path.dirname(os.path.abspath(__file__))}/resources"
+WORKSPACE = os.path.realpath(f"{os.path.dirname(os.path.abspath(__file__))}/..")
 
 def clean_github_environment_variables():
     if(os.getenv("CI", False)):
@@ -32,7 +34,9 @@ def test_load_env_prefixes():
 
 def test_has_mark_headers():
     resource_directory = f"{RESOURCE_DIR}/markdown/test_has_mark_headers"
-    assert main.has_mark_headers(f"{resource_directory}/with_mark_headers.md")
+    assert main.has_mark_headers(f"{resource_directory}/with_mark_space_header.md")
+    assert main.has_mark_headers(f"{resource_directory}/with_mark_parent_header.md")
+    assert main.has_mark_headers(f"{resource_directory}/with_mark_title_header.md")
     assert not main.has_mark_headers(f"{resource_directory}/without_mark_headers.md")
 
 def test_check_header_template():
@@ -66,3 +70,88 @@ def test_inject_header(file, expected_index, raises):
         lines, injected_at_index = main.inject_header_before_first_line_of_content(temp_path, header)
         assert injected_at_index == expected_index
         assert lines[injected_at_index] == header
+@pytest.mark.parametrize(
+    "string,expected_dir,expected_space,expected_parents,raises",
+    [
+        ("tools/=foo->bar->baz", "tools/", "foo", ["bar", "baz"], False),
+        ("tools/=foo->bar", "tools/", "foo", ["bar"], False),
+        ("tools/=foo", "tools/", "foo", [], False),
+        ("tools/=", "tools/", "", [], True),
+        ("tools/", "", "", [], True),
+        ("tools/=foo->", "tools/", "foo", [], True),
+        ("=foo", "tools/", "foo", [], True),
+    ]
+)
+def test__parse_parents_string(string, expected_dir, expected_space, expected_parents, raises):
+    if raises:
+        with pytest.raises(ValueError, match=r"^default_parents.+"):
+            main._parse_parent_string(string)
+    else:
+        directory, space, parents = main._parse_parent_string(string)
+        assert directory == expected_dir
+        assert space == expected_space
+        assert parents == expected_parents
+
+@pytest.mark.parametrize(
+    "string,expected_parents_count",
+    [
+        ("tools/=foo", 1),
+        ("tools/=foo\n", 1),
+        ("tools/=foo\ntools/=foo", 2),
+        ("", 0),
+        (None, 0)
+    ]
+)
+def test_get_default_parents(string, expected_parents_count):
+    parents = main.get_default_parents(string)
+    assert len(parents) == expected_parents_count
+
+@pytest.mark.parametrize(
+    "cfg,expected_header",
+    [
+        (
+            main.ParentCfg(directory="test",space="FOO",parents=["BAR", "BAZ"]),
+            "<!-- Space: FOO -->\n<!-- Parent: BAR -->\n<!-- Parent: BAZ -->\n",
+        ),
+        (
+            main.ParentCfg(directory="test",space="BAR",parents=["FOO"]),
+            "<!-- Space: BAR -->\n<!-- Parent: FOO -->\n",
+        ),
+        (
+            main.ParentCfg(directory="test",space="FOO",parents=[]),
+            "<!-- Space: FOO -->\n",
+        ),
+    ]
+)
+def test_ParentCfg_get_header(cfg: main.ParentCfg, expected_header):
+    assert cfg.get_header() == expected_header
+
+def test_inject_default_parents(monkeypatch):
+    monkeypatch.setattr('mark2confluence.main.cfg', dot.dotify({"github": {"WORKSPACE": WORKSPACE}}))
+
+    base_dir = f"{RESOURCE_DIR}/markdown/test_inject_default_parents"
+    source_file_path = f"{base_dir}/0-input.md"
+    expected_file_path = f"{base_dir}/0-output.md"
+    parsed_file_dir = f"{WORKSPACE}/tests/foo"
+    parsed_file_path = f"{parsed_file_dir}/parsed_file.md"
+    cfgs = [
+        main.ParentCfg(directory="tests/foo/bar", space="FOO", parents=["BAZ"]),
+        main.ParentCfg(directory="tests/foo/*", space="FOO", parents=["BAR"]),
+        main.ParentCfg(directory="tests/*", space="FOO", parents=["BAZ"]),
+        main.ParentCfg(directory="mark2confluence/", space="BOZ", parents=["BIZ"]),
+    ]
+
+    os.makedirs(parsed_file_dir, exist_ok=True)
+    shutil.copy(source_file_path, parsed_file_path)
+
+    main.inject_default_parents(parsed_file_path, cfgs)
+
+    with open(parsed_file_path, "r") as f:
+        parsed_file_content =  f.read()
+    with open(expected_file_path, "r") as f:
+        expected_file_content =  f.read()
+
+    try:
+        assert parsed_file_content == expected_file_content
+    finally:
+        shutil.rmtree(parsed_file_dir)