Skip to content

Commit

Permalink
RecipeReader::get_value() can now sub vars in strings (#126)
Browse files Browse the repository at this point in the history
* `RecipeReader::get_value()` can no sub vars in strings

- Adds support for the access operator (on strings) and the `upper` JINJA
  function
- Fixes several issues with JINJA variable subsitutions
- General code clean-up
- Updates tests with improvements

* Adds more unit tests for get_value()

* Adds some (arguably) missing unit test cases

* Fixes bug in string index check

* Adds new edge-case unit tests for get_value()

* Update conda_recipe_manager/parser/recipe_reader.py

Co-authored-by: Bianca Henderson <beeankha@gmail.com>

* Update conda_recipe_manager/parser/recipe_reader.py

Co-authored-by: Bianca Henderson <beeankha@gmail.com>

---------

Co-authored-by: Bianca Henderson <beeankha@gmail.com>
  • Loading branch information
schuylermartin45 and beeankha authored Sep 19, 2024
1 parent ee93c4a commit c7085dd
Show file tree
Hide file tree
Showing 6 changed files with 264 additions and 43 deletions.
5 changes: 3 additions & 2 deletions conda_recipe_manager/parser/_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ class Regex:
"""

# Pattern to detect Jinja variable names and functions
_JINJA_VAR_FUNCTION_PATTERN: Final[str] = r"[a-zA-Z_][a-zA-Z0-9_\|\'\"\(\)\, =\.\-]*"
_JINJA_VAR_FUNCTION_PATTERN: Final[str] = r"[a-zA-Z_][a-zA-Z0-9_\|\'\"\(\)\[\]\, =\.\-]*"

## Pre-process conversion tooling regular expressions ##
# Finds `environ[]` used by a some recipe files. Requires a whitespace character to prevent matches with
Expand Down Expand Up @@ -165,10 +165,11 @@ class Regex:
JINJA_V1_SUB: Final[re.Pattern[str]] = re.compile(r"\${{\s*" + _JINJA_VAR_FUNCTION_PATTERN + r"\s*}}")

# All recognized JINJA functions are kept in a set for the convenience of trying to match against all of them.
# Group 1 contains the function name, Group 2 contains the arguments, if any.
# Group 1 contains the function or variable name, Group 2 contains the arguments, if any.
JINJA_FUNCTION_LOWER: Final[re.Pattern[str]] = re.compile(r"\|\s*(lower)")
JINJA_FUNCTION_UPPER: Final[re.Pattern[str]] = re.compile(r"\|\s*(upper)")
JINJA_FUNCTION_REPLACE: Final[re.Pattern[str]] = re.compile(r"\|\s*(replace)\((.*)\)")
JINJA_FUNCTION_IDX_ACCESS: Final[re.Pattern[str]] = re.compile(r"(\w+)\[(\d+)\]")
# `match()` is a JINJA function available in the V1 recipe format
JINJA_FUNCTION_MATCH: Final[re.Pattern[str]] = re.compile(r"match\(.*,.*\)")
JINJA_FUNCTIONS_SET: Final[set[re.Pattern[str]]] = {
Expand Down
102 changes: 69 additions & 33 deletions conda_recipe_manager/parser/recipe_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,39 +92,36 @@ def _parse_yaml(s: str, parser: Optional[RecipeReader] = None) -> JsonType:
"""
output: JsonType = None

# V1 recipes use $-escaped JINJA substitutions that will not throw parse exceptions. If variable substitution
# is requested, we will need to handle that directly.
def _v1_sub_jinja() -> None:
if parser is not None and parser.get_schema_version() == SchemaVersion.V1:
output = RecipeReader._parse_yaml_recursive_sub(
output, parser._render_jinja_vars # pylint: disable=protected-access
)
# Convenience function to substitute variables. Given the re-try mechanism on YAML parsing, we have to attempt
# to perform substitutions a few times. Substitutions may occur as the entire strings or parts in a string.
def _sub_jinja(out: JsonType) -> JsonType:
if parser is None:
return out
return RecipeReader._parse_yaml_recursive_sub(
out, parser._render_jinja_vars # pylint: disable=protected-access
)

# Our first attempt handles special string cases that require quotes that the YAML parser drops. If that fails,
# then we fall back to performing JINJA substitutions.
try:
try:
output = yaml.load(s, Loader=SafeLoader)
_v1_sub_jinja()
output = _sub_jinja(cast(JsonType, yaml.load(s, Loader=SafeLoader)))
except yaml.scanner.ScannerError:
output = cast(JsonType, yaml.load(quote_special_strings(s), Loader=SafeLoader))
_v1_sub_jinja()
output = _sub_jinja(cast(JsonType, yaml.load(quote_special_strings(s), Loader=SafeLoader)))
except Exception: # pylint: disable=broad-exception-caught
# If a construction exception is thrown, attempt to re-parse by replacing Jinja macros (substrings in
# `{{}}`) with friendly string substitution markers, then re-inject the substitutions back in. We classify
# all Jinja substitutions as string values, so we don't have to worry about the type of the actual
# substitution.
sub_list: list[str] = Regex.JINJA_V0_SUB.findall(s)
s = Regex.JINJA_V0_SUB.sub(RECIPE_MANAGER_SUB_MARKER, s)
output = RecipeReader._parse_yaml_recursive_sub(
cast(JsonType, yaml.load(s, Loader=SafeLoader)), lambda d: substitute_markers(d, sub_list)
)
# Because we leverage PyYaml to parse the data structures, we need to perform a second pass to perform
# variable substitutions.
if parser is not None:
output = RecipeReader._parse_yaml_recursive_sub(
output, parser._render_jinja_vars # pylint: disable=protected-access
output = _sub_jinja(
RecipeReader._parse_yaml_recursive_sub(
cast(JsonType, yaml.load(s, Loader=SafeLoader)), lambda d: substitute_markers(d, sub_list)
)
)
return output

@staticmethod
Expand Down Expand Up @@ -225,6 +222,48 @@ def _generate_subtree(value: JsonType) -> list[Node]:
# Primitives can be safely stringified to generate a parse tree.
return RecipeReader(str(stringify_yaml(value)))._root.children # pylint: disable=protected-access

def _set_on_schema_version(self) -> tuple[int, re.Pattern[str]]:
"""
Helper function for `_render_jinja_vars()` that initializes `schema_version`-specific substitution details.
:returns: The starting index and the regex pattern used to substitute V0 or V1 JINJA variables.
"""
match self._schema_version:
case SchemaVersion.V0:
return 2, Regex.JINJA_V0_SUB
case SchemaVersion.V1:
return 3, Regex.JINJA_V1_SUB

@staticmethod
def _set_key_and_matches(
key: str,
) -> tuple[str, Optional[re.Match[str]], Optional[re.Match[str]], Optional[re.Match[str]]]:
"""
Helper function for `_render_jinja_vars()` that takes a JINJA statement (string inside the braces) and attempts
to match and apply any currently supported "JINJA functions" to the statement.
:param key: Sanitized key to perform JINJA functions on.
:returns: The modified key, if any JINJA functions apply.
"""
# TODO add support for REPLACE

# Example: {{ name | lower }}
lower_match = Regex.JINJA_FUNCTION_LOWER.search(key)
if lower_match:
key = key.replace(lower_match.group(), "").strip()

# Example: {{ name | upper }}
upper_match = Regex.JINJA_FUNCTION_UPPER.search(key)
if upper_match:
key = key.replace(upper_match.group(), "").strip()

# Example: {{ name[0] }}
idx_match = Regex.JINJA_FUNCTION_IDX_ACCESS.search(key)
if idx_match:
key = key.replace(f"[{cast(str, idx_match.group(2))}]", "").strip()

return key, lower_match, upper_match, idx_match

def _render_jinja_vars(self, s: str) -> JsonType:
"""
Helper function that replaces Jinja substitutions with their actual set values.
Expand All @@ -233,38 +272,35 @@ def _render_jinja_vars(self, s: str) -> JsonType:
:returns: The original value, augmented with Jinja substitutions. Types are re-rendered to account for multiline
strings that may have been "normalized" prior to this call.
"""

# Initialize `schema_version` specific details.
def _set_on_schema_version() -> tuple[int, re.Pattern[str]]:
match self._schema_version:
case SchemaVersion.V0:
return 2, Regex.JINJA_V0_SUB
case SchemaVersion.V1:
return 3, Regex.JINJA_V1_SUB

start_idx, sub_regex = _set_on_schema_version()
start_idx, sub_regex = self._set_on_schema_version()

# Search the string, replacing all substitutions we can recognize
for match in cast(list[str], sub_regex.findall(s)):
# The regex guarantees the string starts and ends with double braces
key = match[start_idx:-2].strip()
# Check for and interpret common JINJA functions
# TODO add support for UPPER and REPLACE
lower_match = Regex.JINJA_FUNCTION_LOWER.search(key)
if lower_match:
key = key.replace(lower_match.group(), "").strip()
key, lower_match, upper_match, idx_match = RecipeReader._set_key_and_matches(key)

if key in self._vars_tbl:
# Replace value as a string. Re-interpret the entire value before returning.
value = str(self._vars_tbl[key])
if lower_match:
value = value.lower()
if upper_match:
value = value.upper()
if idx_match:
idx = int(cast(str, idx_match.group(2)))
# From our research, it looks like string indexing on JINJA variables is almost exclusively used to get
# the first character in a string. If the index is out of bounds, we will default to the variable's
# value as a fall-back.
if 0 <= idx < len(value):
value = value[idx]
s = s.replace(match, value)
# $-Escaping the unresolved variable does a few things:
# - Clearly identifies the value as an unresolved variable
# - Normalizes the substitution syntax with V1
# - Ensures the returned value is YAML-parsable
elif self._schema_version == SchemaVersion.V0:
elif self._schema_version == SchemaVersion.V0 and s[:2] == "{{":
s = f"${s}"
return cast(JsonType, yaml.load(s, Loader=SafeLoader))

Expand Down Expand Up @@ -403,6 +439,7 @@ def __init__(self, content: str):

# Auto-detect and deserialize the version of the recipe schema. This will change how the class behaves.
self._schema_version = SchemaVersion.V0
# TODO bootstrap this better. `get_value()` has a circular dependency on `_vars_tbl` if `sub_vars` is used.
schema_version = cast(SchemaVersion | int, self.get_value("/schema_version", SchemaVersion.V0))
if isinstance(schema_version, int) and schema_version == 1:
self._schema_version = SchemaVersion.V1
Expand Down Expand Up @@ -766,7 +803,6 @@ def get_value(self, path: str, default: JsonType | SentinelType = _sentinel, sub
return_value: JsonType = None
# Handle unpacking of the last key-value set of nodes.
if node.is_single_key() and not node.is_root():
# As of writing, Jinja substitutions are not used
if node.children[0].multiline_variant != MultilineVariant.NONE:
multiline_str = cast(
str,
Expand Down
14 changes: 6 additions & 8 deletions tests/parser/test_recipe_parser_deps.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,16 +136,14 @@ def test_get_package_names_to_path(file: str, expected: dict[str, str]) -> None:
"cctools-and-ld64",
"/requirements/build/0",
DependencySection.BUILD,
# TODO Future: Add support for `sub_vars` to render these common compiler variables and/or parse
# the meaning.
MatchSpec("gcc_{{ native_compiler_subdir }}"),
MatchSpec("gcc_linux-64"),
SelectorParser("[linux]", SchemaVersion.V0),
),
Dependency(
"cctools-and-ld64",
"/requirements/build/1",
DependencySection.BUILD,
MatchSpec("gxx_{{ native_compiler_subdir }}"),
MatchSpec("gxx_linux-64"),
SelectorParser("[linux]", SchemaVersion.V0),
),
Dependency(
Expand Down Expand Up @@ -192,14 +190,14 @@ def test_get_package_names_to_path(file: str, expected: dict[str, str]) -> None:
"cctools",
"/requirements/build/0",
DependencySection.BUILD,
MatchSpec("gcc_{{ native_compiler_subdir }}"),
MatchSpec("gcc_linux-64"),
SelectorParser("[linux]", SchemaVersion.V0),
),
Dependency(
"cctools",
"/requirements/build/1",
DependencySection.BUILD,
MatchSpec("gxx_{{ native_compiler_subdir }}"),
MatchSpec("gxx_linux-64"),
SelectorParser("[linux]", SchemaVersion.V0),
),
Dependency(
Expand Down Expand Up @@ -245,14 +243,14 @@ def test_get_package_names_to_path(file: str, expected: dict[str, str]) -> None:
"ld64",
"/requirements/build/0",
DependencySection.BUILD,
MatchSpec("gcc_{{ native_compiler_subdir }}"),
MatchSpec("gcc_linux-64"),
SelectorParser("[linux]", SchemaVersion.V0),
),
Dependency(
"ld64",
"/requirements/build/1",
DependencySection.BUILD,
MatchSpec("gxx_{{ native_compiler_subdir }}"),
MatchSpec("gxx_linux-64"),
SelectorParser("[linux]", SchemaVersion.V0),
),
Dependency("ld64", "/requirements/build/2", DependencySection.BUILD, MatchSpec("autoconf"), None),
Expand Down
84 changes: 84 additions & 0 deletions tests/parser/test_recipe_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,6 +583,45 @@ def test_contains_value(file: str, path: str, expected: bool) -> None:
("simple-recipe_multiline_strings.yaml", "/about/description3", True, QUICK_FOX_SUB_CARROT),
("simple-recipe_multiline_strings.yaml", "/about/description4", True, QUICK_FOX_SUB_CARROT_PLUS),
("simple-recipe_multiline_strings.yaml", "/about/description5", True, QUICK_FOX_SUB_CARROT_MINUS),
## types-toml.yaml ##
# Regression: `{ name[0] }` could not be evaluated.
(
"types-toml.yaml",
"/source/url",
True,
"https://pypi.io/packages/source/t/types-toml/types-toml-0.10.8.6.tar.gz",
),
(
"types-toml.yaml",
"/source",
True,
{
"url": "https://pypi.io/packages/source/t/types-toml/types-toml-0.10.8.6.tar.gz",
"sha256": "6d3ac79e36c9ee593c5d4fb33a50cca0e3adceb6ef5cff8b8e5aef67b4c4aaf2",
},
),
## sub_vars.yaml ##
(
"sub_vars.yaml",
"/package/name",
True,
"types-toml",
),
(
"sub_vars.yaml",
"/source/url",
True,
"https://pypi.io/packages/source/t/TYPES-TOML/types-toml-6.tar.gz",
),
(
"sub_vars.yaml",
"/source",
True,
{
"url": "https://pypi.io/packages/source/t/TYPES-TOML/types-toml-6.tar.gz",
"sha256": "6d3ac79e36c9ee593c5d4fb33a50cca0e3adceb6ef5cff8b8e5aef67b4c4aaf2",
},
),
## v1_simple-recipe.yaml ##
("v1_format/v1_simple-recipe.yaml", "/build/number", False, 0),
("v1_format/v1_simple-recipe.yaml", "/build/number/", False, 0),
Expand Down Expand Up @@ -681,6 +720,45 @@ def test_contains_value(file: str, path: str, expected: bool) -> None:
"last",
],
),
## v1_types-toml.yaml ##
# Regression: `{ name[0] }` could not be evaluated.
(
"v1_format/v1_types-toml.yaml",
"/source/url",
True,
"https://pypi.io/packages/source/t/types-toml/types-toml-0.10.8.6.tar.gz",
),
(
"v1_format/v1_types-toml.yaml",
"/source",
True,
{
"url": "https://pypi.io/packages/source/t/types-toml/types-toml-0.10.8.6.tar.gz",
"sha256": "6d3ac79e36c9ee593c5d4fb33a50cca0e3adceb6ef5cff8b8e5aef67b4c4aaf2",
},
),
## v1_sub_vars.yaml ##
(
"v1_format/v1_sub_vars.yaml",
"/package/name",
True,
"types-toml",
),
(
"v1_format/v1_sub_vars.yaml",
"/source/url",
True,
"https://pypi.io/packages/source/t/TYPES-TOML/types-toml-6.tar.gz",
),
(
"v1_format/v1_sub_vars.yaml",
"/source",
True,
{
"url": "https://pypi.io/packages/source/t/TYPES-TOML/types-toml-6.tar.gz",
"sha256": "6d3ac79e36c9ee593c5d4fb33a50cca0e3adceb6ef5cff8b8e5aef67b4c4aaf2",
},
),
## multi-output.yaml ##
("multi-output.yaml", "/outputs/0/build/run_exports/0", False, "bar"),
("multi-output.yaml", "/outputs/0/build/run_exports", False, ["bar"]),
Expand All @@ -699,6 +777,12 @@ def test_contains_value(file: str, path: str, expected: bool) -> None:
# "test": {"commands": ["db_archive -m hello"]},
# },
# ),
## v1_multi-output.yaml ##
("v1_format/v1_multi-output.yaml", "/outputs/0/requirements/run_exports/0", False, "bar"),
("v1_format/v1_multi-output.yaml", "/outputs/0/requirements/run_exports", False, ["bar"]),
# TODO FIX: This case
# ("v1_format/v1_multi-output.yaml", "/outputs/0/build", False, None),
("v1_format/v1_multi-output.yaml", "/outputs/0/requirements", False, {"run_exports": ["bar"]}),
],
)
def test_get_value(file: str, path: str, sub_vars: bool, expected: JsonType) -> None:
Expand Down
48 changes: 48 additions & 0 deletions tests/test_aux_files/sub_vars.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
{% set name = "TYPES-toml" %}
{% set version = "0.10.8.6" %}

package:
name: {{ name|lower }}
version: {{ version }}

source:
url: https://pypi.io/packages/source/{{ name[0]|lower }}/{{ name|upper }}/{{ name[42] | lower}}-{{ version[7] }}.tar.gz
sha256: 6d3ac79e36c9ee593c5d4fb33a50cca0e3adceb6ef5cff8b8e5aef67b4c4aaf2

build:
number: 0
skip: true # [py<37]
script: {{ PYTHON }} -m pip install . -vv --no-deps --no-build-isolation

requirements:
host:
- setuptools
- wheel
- pip
- python
run:
- python

test:
imports:
- types
requires:
- pip
commands:
- pip check
- test -f $SP_DIR/toml-stubs/__init__.pyi # [unix]

about:
home: https://github.com/python/typeshed
summary: Contains edge cases for `RecipeReader::get_value(sub_vars=True)`
description: Contains edge cases for `RecipeReader::get_value(sub_vars=True)`
license: Apache-2.0 AND MIT
license_file: LICENSE
license_family: OTHER
dev_url: https://github.com/python/typeshed
doc_url: https://pypi.org/project/types-toml/

extra:
recipe-maintainers:
- fhoehle
- conda-forge/mypy
Loading

0 comments on commit c7085dd

Please sign in to comment.