Skip to content

Commit

Permalink
Add new ecosystem comparison modes for the formatter (#8416)
Browse files Browse the repository at this point in the history
Previously, the ecosystem checks formatted with the baseline then
formatted again with `--diff` to get the changed files.

Now, the ecosystem checks support a new mode where we:
- Format with the baseline
- Commit the changes
- Reset to the target ref
- Format again
- Check the diff from the baseline commit

This effectively tests Ruff changes on unformatted code rather than
changes in previously formatted code (unless, of course, the project is
already using Ruff).

While this mode is the new default, I've retained the old one for local
checks. The mode can be toggled with `--format-comparison <type>`.

Includes some more aggressive resetting of the GitHub repositories when
cached.

Here, I've also stubbed comparison modes in which `black` is used as the
baseline. While these do nothing here, #8419 adds support.

I tested this with the commit from #8216 and ecosystem changes appear
https://gist.github.com/zanieb/a982ec8c392939043613267474471a6e
  • Loading branch information
zanieb authored Nov 2, 2023
1 parent 4d23c1f commit 2f7e2a8
Show file tree
Hide file tree
Showing 5 changed files with 201 additions and 13 deletions.
6 changes: 6 additions & 0 deletions python/ruff-ecosystem/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ Run `ruff format` ecosystem checks comparing your debug build to your system Ruf
ruff-ecosystem format ruff "./target/debug/ruff"
```

Run `ruff format` ecosystem checks comparing with changes to code that is already formatted:

```shell
ruff-ecosystem format ruff "./target/debug/ruff" --format-comparison ruff-then-ruff
```

The default output format is markdown, which includes nice summaries of the changes. You can use `--output-format json` to display the raw data — this is
particularly useful when making changes to the ecosystem checks.

Expand Down
18 changes: 16 additions & 2 deletions python/ruff-ecosystem/ruff_ecosystem/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

from ruff_ecosystem import logger
from ruff_ecosystem.defaults import DEFAULT_TARGETS
from ruff_ecosystem.format import FormatComparison
from ruff_ecosystem.main import OutputFormat, main
from ruff_ecosystem.projects import RuffCommand

Expand Down Expand Up @@ -77,6 +78,12 @@ def entrypoint():
if args.force_preview:
targets = [target.with_preview_enabled() for target in targets]

format_comparison = (
FormatComparison(args.format_comparison)
if args.ruff_command == RuffCommand.format.value
else None
)

with cache_context as cache:
loop = asyncio.get_event_loop()
main_task = asyncio.ensure_future(
Expand All @@ -88,6 +95,7 @@ def entrypoint():
format=OutputFormat(args.output_format),
project_dir=Path(cache),
raise_on_failure=args.pdb,
format_comparison=format_comparison,
)
)
# https://stackoverflow.com/a/58840987/3549270
Expand Down Expand Up @@ -120,7 +128,7 @@ def parse_args() -> argparse.Namespace:
)
parser.add_argument(
"--output-format",
choices=[option.name for option in OutputFormat],
choices=[option.value for option in OutputFormat],
default="markdown",
help="Location for caching cloned repositories",
)
Expand All @@ -140,9 +148,15 @@ def parse_args() -> argparse.Namespace:
action="store_true",
help="Force preview mode to be enabled for all projects",
)
parser.add_argument(
"--format-comparison",
choices=[option.value for option in FormatComparison],
default=FormatComparison.ruff_and_ruff,
help="Type of comparison to make when checking formatting.",
)
parser.add_argument(
"ruff_command",
choices=[option.name for option in RuffCommand],
choices=[option.value for option in RuffCommand],
help="The Ruff command to test",
)
parser.add_argument(
Expand Down
88 changes: 86 additions & 2 deletions python/ruff-ecosystem/ruff_ecosystem/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import time
from asyncio import create_subprocess_exec
from enum import Enum
from pathlib import Path
from subprocess import PIPE
from typing import TYPE_CHECKING, Sequence
Expand Down Expand Up @@ -123,8 +124,33 @@ async def compare_format(
ruff_comparison_executable: Path,
options: FormatOptions,
cloned_repo: ClonedRepository,
format_comparison: FormatComparison,
):
# Run format without diff to get the baseline
args = (ruff_baseline_executable, ruff_comparison_executable, options, cloned_repo)
match format_comparison:
case FormatComparison.ruff_then_ruff:
coro = format_then_format(Formatter.ruff, *args)
case FormatComparison.ruff_and_ruff:
coro = format_and_format(Formatter.ruff, *args)
case FormatComparison.black_then_ruff:
coro = format_then_format(Formatter.black, *args)
case FormatComparison.black_and_ruff:
coro = format_and_format(Formatter.black, *args)
case _:
raise ValueError(f"Unknown format comparison type {format_comparison!r}.")

diff = await coro
return Comparison(diff=Diff(diff), repo=cloned_repo)


async def format_then_format(
baseline_formatter: Formatter,
ruff_baseline_executable: Path,
ruff_comparison_executable: Path,
options: FormatOptions,
cloned_repo: ClonedRepository,
) -> Sequence[str]:
# Run format to get the baseline
await ruff_format(
executable=ruff_baseline_executable.resolve(),
path=cloned_repo.path,
Expand All @@ -139,8 +165,39 @@ async def compare_format(
options=options,
diff=True,
)
return diff

return Comparison(diff=Diff(diff), repo=cloned_repo)

async def format_and_format(
baseline_formatter: Formatter,
ruff_baseline_executable: Path,
ruff_comparison_executable: Path,
options: FormatOptions,
cloned_repo: ClonedRepository,
) -> Sequence[str]:
# Run format without diff to get the baseline
await ruff_format(
executable=ruff_baseline_executable.resolve(),
path=cloned_repo.path,
name=cloned_repo.fullname,
options=options,
)
# Commit the changes
commit = await cloned_repo.commit(
message=f"Formatted with baseline {ruff_baseline_executable}"
)
# Then reset
await cloned_repo.reset()
# Then run format again
await ruff_format(
executable=ruff_comparison_executable.resolve(),
path=cloned_repo.path,
name=cloned_repo.fullname,
options=options,
)
# Then get the diff from the commit
diff = await cloned_repo.diff(commit)
return diff


async def ruff_format(
Expand Down Expand Up @@ -177,3 +234,30 @@ async def ruff_format(

lines = result.decode("utf8").splitlines()
return lines


class FormatComparison(Enum):
ruff_then_ruff = "ruff-then-ruff"
"""
Run Ruff baseline then Ruff comparison; checks for changes in behavior when formatting previously "formatted" code
"""

ruff_and_ruff = "ruff-and-ruff"
"""
Run Ruff baseline then reset and run Ruff comparison; checks changes in behavior when formatting "unformatted" code
"""

black_then_ruff = "black-then-ruff"
"""
Run Black baseline then Ruff comparison; checks for changes in behavior when formatting previously "formatted" code
"""

black_and_ruff = "black-and-ruff"
""""
Run Black baseline then reset and run Ruff comparison; checks changes in behavior when formatting "unformatted" code
"""


class Formatter(Enum):
black = "black"
ruff = "ruff"
18 changes: 12 additions & 6 deletions python/ruff-ecosystem/ruff_ecosystem/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@

from ruff_ecosystem import logger
from ruff_ecosystem.check import compare_check, markdown_check_result
from ruff_ecosystem.format import compare_format, markdown_format_result
from ruff_ecosystem.format import (
FormatComparison,
compare_format,
markdown_format_result,
)
from ruff_ecosystem.projects import (
Project,
RuffCommand,
Expand All @@ -30,6 +34,7 @@ async def main(
targets: list[Project],
project_dir: Path,
format: OutputFormat,
format_comparison: FormatComparison | None,
max_parallelism: int = 50,
raise_on_failure: bool = False,
) -> None:
Expand All @@ -55,6 +60,7 @@ async def limited_parallelism(coroutine: Awaitable[T]) -> T:
ruff_comparison_executable,
target,
project_dir,
format_comparison,
)
)
for target in targets
Expand Down Expand Up @@ -96,21 +102,20 @@ async def clone_and_compare(
ruff_comparison_executable: Path,
target: Project,
project_dir: Path,
format_comparison: FormatComparison | None,
) -> Comparison:
"""Check a specific repository against two versions of ruff."""
assert ":" not in target.repo.owner
assert ":" not in target.repo.name

match command:
case RuffCommand.check:
compare, options = (
compare_check,
target.check_options,
)
compare, options, kwargs = (compare_check, target.check_options, {})
case RuffCommand.format:
compare, options = (
compare, options, kwargs = (
compare_format,
target.format_options,
{"format_comparison": format_comparison},
)
case _:
raise ValueError(f"Unknown target Ruff command {command}")
Expand All @@ -124,6 +129,7 @@ async def clone_and_compare(
ruff_comparison_executable,
options,
cloned_repo,
**kwargs,
)
except ExceptionGroup as e:
raise e.exceptions[0] from e
Expand Down
84 changes: 81 additions & 3 deletions python/ruff-ecosystem/ruff_ecosystem/projects.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from dataclasses import dataclass, field
from enum import Enum
from pathlib import Path
from subprocess import PIPE
from subprocess import DEVNULL, PIPE
from typing import Self

from ruff_ecosystem import logger
Expand Down Expand Up @@ -133,7 +133,8 @@ async def clone(self: Self, checkout_dir: Path) -> ClonedRepository:
logger.debug(f"Reusing {self.owner}:{self.name}")

if self.ref:
logger.debug(f"Checking out ref {self.ref}")
logger.debug(f"Checking out {self.fullname} @ {self.ref}")

process = await create_subprocess_exec(
*["git", "checkout", "-f", self.ref],
cwd=checkout_dir,
Expand All @@ -147,7 +148,9 @@ async def clone(self: Self, checkout_dir: Path) -> ClonedRepository:
f"Failed to checkout {self.ref}: {stderr.decode()}"
)

return await ClonedRepository.from_path(checkout_dir, self)
cloned_repo = await ClonedRepository.from_path(checkout_dir, self)
await cloned_repo.reset()
return cloned_repo

logger.debug(f"Cloning {self.owner}:{self.name} to {checkout_dir}")
command = [
Expand Down Expand Up @@ -179,6 +182,28 @@ async def clone(self: Self, checkout_dir: Path) -> ClonedRepository:
logger.debug(
f"Finished cloning {self.fullname} with status {status_code}",
)

# Configure git user — needed for `self.commit` to work
await (
await create_subprocess_exec(
*["git", "config", "user.email", "ecosystem@astral.sh"],
cwd=checkout_dir,
env={"GIT_TERMINAL_PROMPT": "0"},
stdout=DEVNULL,
stderr=DEVNULL,
)
).wait()

await (
await create_subprocess_exec(
*["git", "config", "user.name", "Ecosystem Bot"],
cwd=checkout_dir,
env={"GIT_TERMINAL_PROMPT": "0"},
stdout=DEVNULL,
stderr=DEVNULL,
)
).wait()

return await ClonedRepository.from_path(checkout_dir, self)


Expand Down Expand Up @@ -236,3 +261,56 @@ async def _get_head_commit(checkout_dir: Path) -> str:
raise ProjectSetupError(f"Failed to retrieve commit sha at {checkout_dir}")

return stdout.decode().strip()

async def reset(self: Self) -> None:
"""
Reset the cloned repository to the ref it started at.
"""
process = await create_subprocess_exec(
*["git", "reset", "--hard", "origin/" + self.ref] if self.ref else [],
cwd=self.path,
env={"GIT_TERMINAL_PROMPT": "0"},
stdout=PIPE,
stderr=PIPE,
)
_, stderr = await process.communicate()
if await process.wait() != 0:
raise RuntimeError(f"Failed to reset: {stderr.decode()}")

async def commit(self: Self, message: str) -> str:
"""
Commit all current changes.
Empty commits are allowed.
"""
process = await create_subprocess_exec(
*["git", "commit", "--allow-empty", "-a", "-m", message],
cwd=self.path,
env={"GIT_TERMINAL_PROMPT": "0"},
stdout=PIPE,
stderr=PIPE,
)
_, stderr = await process.communicate()
if await process.wait() != 0:
raise RuntimeError(f"Failed to commit: {stderr.decode()}")

return await self._get_head_commit(self.path)

async def diff(self: Self, *args: str) -> list[str]:
"""
Get the current diff from git.
Arguments are passed to `git diff ...`
"""
process = await create_subprocess_exec(
*["git", "diff", *args],
cwd=self.path,
env={"GIT_TERMINAL_PROMPT": "0"},
stdout=PIPE,
stderr=PIPE,
)
stdout, stderr = await process.communicate()
if await process.wait() != 0:
raise RuntimeError(f"Failed to commit: {stderr.decode()}")

return stdout.decode().splitlines()

0 comments on commit 2f7e2a8

Please sign in to comment.