Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integrate the new Rust ArgSplitter. #21824

Merged
merged 1 commit into from
Jan 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion src/python/pants/engine/internals/native_engine.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -663,10 +663,14 @@ class PyOptionId:
) -> None: ...

class PySplitArgs:
pass
def goals(self) -> list[str]: ...
def unknown_goals(self) -> list[str]: ...
def specs(self) -> list[str]: ...
def passthru(self) -> list[str]: ...

class PyArgSplitter:
def __init__(self, build_root: str, known_goals: list[str]) -> None: ...
def split_args(self, args: list[str]) -> PySplitArgs: ...

class PyConfigSource:
def __init__(self, path: str, content: bytes) -> None: ...
Expand Down
176 changes: 27 additions & 149 deletions src/python/pants/option/arg_splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,13 @@

from __future__ import annotations

import os.path
from abc import ABC
from dataclasses import dataclass
from typing import Iterable, Iterator, Sequence

from pants.base.deprecated import warn_or_error
from pants.engine.internals.native_engine import PyArgSplitter
from pants.option.scope import ScopeInfo
from pants.util.ordered_set import OrderedSet


class ArgSplitterError(Exception):
Expand Down Expand Up @@ -79,23 +78,8 @@ class ArgSplitter:
"""

def __init__(self, known_scope_infos: Iterable[ScopeInfo], buildroot: str) -> None:
self._buildroot = buildroot
self._known_scope_infos = known_scope_infos
self._known_goal_scopes = dict(self._get_known_goal_scopes(known_scope_infos))
self._known_scopes = {si.scope for si in known_scope_infos} | set(
self._known_goal_scopes.keys()
)

# Holds aliases like `-h` for `--help`. Used for disambiguation with ignore specs like
# `-dir::`.
self._single_dash_goal_aliases = {
scope
for scope in self._known_goal_scopes.keys()
if scope.startswith("-") and not scope.startswith("--")
}

# We store in reverse order, for efficient popping off the end.
self._unconsumed_args: list[str] = []
self._native_arg_splitter = PyArgSplitter(buildroot, list(self._known_goal_scopes.keys()))

@staticmethod
def _get_known_goal_scopes(
Expand All @@ -109,73 +93,15 @@ def _get_known_goal_scopes(
yield alias, si

def split_args(self, args: Sequence[str]) -> SplitArgs:
"""Split the specified arg list.
native_split_args = self._native_arg_splitter.split_args(list(args))

args[0] is ignored.

Returns a SplitArgs tuple.
"""
goals: OrderedSet[str] = OrderedSet()
specs: list[str] = []
passthru: list[str] = []
unknown_scopes: list[str] = []
builtin_or_auxiliary_goal: str | None = None
canonical_goals = []
for goal in native_split_args.goals():
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rust returns a string-valued goal name, here we get a ScopeInfo instance for it.

si = self._known_goal_scopes.get(goal)
if not si or not si.scope:
continue # Should never happen.

def add_goal(scope: str) -> str:
"""Returns the scope name to assign flags to."""
scope_info = self._known_goal_scopes.get(scope)
if not scope_info:
unknown_scopes.append(scope)
return scope

nonlocal builtin_or_auxiliary_goal
if (scope_info.is_builtin or scope_info.is_auxiliary) and (
not builtin_or_auxiliary_goal or scope.startswith("-")
):
if builtin_or_auxiliary_goal:
goals.add(builtin_or_auxiliary_goal)

# Get scope from info in case we hit an aliased builtin/auxiliary goal.
builtin_or_auxiliary_goal = scope_info.scope
else:
goals.add(scope_info.scope)

# Use builtin/auxiliary goal as default scope for args.
return builtin_or_auxiliary_goal or scope_info.scope

self._unconsumed_args = list(reversed(args))
# The first token is the binary name, so skip it.
self._unconsumed_args.pop()

self._consume_flags()
scope, flags = self._consume_scope()
while scope:
add_goal(scope)
scope, flags = self._consume_scope()

while self._unconsumed_args and not self._at_standalone_double_dash():
if self._at_flag():
self._unconsumed_args.pop()
continue

arg = self._unconsumed_args.pop()
if self.likely_a_spec(arg):
specs.append(arg)
else:
add_goal(arg)

if not builtin_or_auxiliary_goal:
if unknown_scopes and UNKNOWN_GOAL_NAME in self._known_goal_scopes:
builtin_or_auxiliary_goal = UNKNOWN_GOAL_NAME
elif not goals and NO_GOAL_NAME in self._known_goal_scopes:
builtin_or_auxiliary_goal = NO_GOAL_NAME

if self._at_standalone_double_dash():
self._unconsumed_args.pop()
passthru = list(reversed(self._unconsumed_args))

for goal in goals:
si = self._known_goal_scopes[goal]
if (
si.deprecated_scope
and goal == si.deprecated_scope
Expand All @@ -188,73 +114,25 @@ def add_goal(scope: str) -> str:
f"The {si.deprecated_scope} goal was renamed to {si.subsystem_cls.options_scope}",
)

if (si.is_builtin or si.is_auxiliary) and (
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Rust side doesn't (yet) know about builtin or auxiliary goals, so we need this logic here. Note that it's copied directly from the deleted block above.

builtin_or_auxiliary_goal is None or goal.startswith("-")
):
if builtin_or_auxiliary_goal:
canonical_goals.append(builtin_or_auxiliary_goal)
builtin_or_auxiliary_goal = si.scope
else:
canonical_goals.append(si.scope)

if not builtin_or_auxiliary_goal:
if native_split_args.unknown_goals() and UNKNOWN_GOAL_NAME in self._known_goal_scopes:
builtin_or_auxiliary_goal = UNKNOWN_GOAL_NAME
elif not canonical_goals and NO_GOAL_NAME in self._known_goal_scopes:
builtin_or_auxiliary_goal = NO_GOAL_NAME

return SplitArgs(
builtin_or_auxiliary_goal=builtin_or_auxiliary_goal,
goals=list(goals),
unknown_goals=unknown_scopes,
specs=specs,
passthru=passthru,
goals=canonical_goals,
unknown_goals=native_split_args.unknown_goals(),
specs=native_split_args.specs(),
passthru=native_split_args.passthru(),
)

def likely_a_spec(self, arg: str) -> bool:
"""Return whether `arg` looks like a spec, rather than a goal name."""
# Check if it's an ignore spec.
if (
arg.startswith("-")
and arg not in self._single_dash_goal_aliases
and not arg.startswith("--")
):
return True
return any(c in arg for c in (os.path.sep, ".", ":", "*", "#")) or os.path.exists(
os.path.join(self._buildroot, arg)
)

def _consume_scope(self) -> tuple[str | None, list[str]]:
"""Returns a pair (scope, list of flags encountered in that scope).

Note that the flag may be explicitly scoped, and therefore not actually belong to this scope.

For example, in:

pants --check-some-opt=100 check <target>

--check-some-opt should be treated as if it were --check-some-opt=100 in the check scope.
"""
if not self._at_scope():
return None, []
scope = self._unconsumed_args.pop()
flags = self._consume_flags()
return scope, flags

def _consume_flags(self) -> list[str]:
"""Read flags until we encounter the first token that isn't a flag."""
flags = []
while self._at_flag():
flag = self._unconsumed_args.pop()
flags.append(flag)
return flags

def _at_flag(self) -> bool:
if not self._unconsumed_args:
return False
arg = self._unconsumed_args[-1]
if not arg.startswith("--") and not self.is_level_short_arg(arg):
return False
return not self._at_standalone_double_dash() and not self._at_scope()

def _at_scope(self) -> bool:
return bool(self._unconsumed_args) and self._unconsumed_args[-1] in self._known_scopes

def _at_standalone_double_dash(self) -> bool:
"""At the value `--`, used to start passthrough args."""
return bool(self._unconsumed_args) and self._unconsumed_args[-1] == "--"

def is_level_short_arg(self, arg: str) -> bool:
"""We special case the `--level` global option to also be recognized with `-l`.

It's important that this be classified as a global option.

Note that we also need to recognize `-h` and `-v` as builtin goals. That is handled already
via `likely_a_spec()`.
"""
return arg in {"-ltrace", "-ldebug", "-linfo", "-lwarn", "-lerror"}
44 changes: 0 additions & 44 deletions src/python/pants/option/arg_splitter_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

import os
import shlex
from pathlib import Path
from typing import Any

import pytest
Expand Down Expand Up @@ -65,49 +64,6 @@ def assert_unknown_goal(splitter: ArgSplitter, args_str: str, unknown_goals: lis
assert set(unknown_goals) == set(split_args.unknown_goals)


def test_is_spec(tmp_path: Path, splitter: ArgSplitter, known_scope_infos: list[ScopeInfo]) -> None:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test was ported to Rust, and since we no longer need a Python likely_a_spec() method, we also don't need to test it separately.

unambiguous_specs = [
"a/b/c",
"a/b/c/",
"a/b:c",
"a/b/c.txt",
":c",
"::",
"a/",
"./a.txt",
".",
"*",
"a/b/*.txt",
"a/b/test*",
"a/**/*",
"a/b.txt:tgt",
"a/b.txt:../tgt",
"dir#gen",
"//:tgt#gen",
"cache.java",
"cache.tmp.java",
]

directories_vs_goals = ["foo", "a_b_c"]

# With no directories on disk to tiebreak.
for spec in directories_vs_goals:
assert splitter.likely_a_spec(spec) is False
assert splitter.likely_a_spec(f"-{spec}") is True
for s in unambiguous_specs:
assert splitter.likely_a_spec(s) is True
assert splitter.likely_a_spec(f"-{s}") is True

assert splitter.likely_a_spec("-") is True
assert splitter.likely_a_spec("--") is False

# With directories on disk to tiebreak.
splitter = ArgSplitter(known_scope_infos, tmp_path.as_posix())
for d in directories_vs_goals:
(tmp_path / d).mkdir()
assert splitter.likely_a_spec(d) is True


def goal_split_test(command_line: str, **expected):
return (
command_line,
Expand Down
Loading