diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..7eeda76 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,28 @@ +name: CI +on: + push: + branches: [ main ] + tags: [ "**" ] + pull_request: + branches: [ "**" ] +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.9', '3.10', '3.11', '3.12'] + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - run: pip install -e . + - name: Release + if: startsWith(github.ref, 'refs/tags/') && matrix.python-version == '3.10' + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} + run: | + pip install setuptools twine wheel + python setup.py sdist bdist_wheel + twine upload dist/* diff --git a/README.md b/README.md new file mode 100644 index 0000000..822b515 --- /dev/null +++ b/README.md @@ -0,0 +1,16 @@ +# qmds + + + +## Install + +```bash +pip install qmds +``` + +## Use +```bash +diff-x +comm-x +git-diff-x +``` diff --git a/qmds/__init__.py b/qmds/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/qmds/cli.py b/qmds/cli.py new file mode 100644 index 0000000..7663171 --- /dev/null +++ b/qmds/cli.py @@ -0,0 +1,9 @@ +from os import environ as env + +from click import option, argument + +shell_exec_opt = option('-s', '--shell-executable', help=f'Shell to use for executing commands; defaults to $SHELL ({env.get("SHELL")})') +no_shell_opt = option('-S', '--no-shell', is_flag=True, help="Don't pass `shell=True` to Python `subprocess`es") +verbose_opt = option('-v', '--verbose', is_flag=True, help="Log intermediate commands to stderr") +exec_cmd_opt = option('-x', '--exec-cmd', 'exec_cmds', multiple=True, help='Command(s) to execute before invoking `comm`; alternate syntax to passing commands as positional arguments') +args = argument('args', metavar='[exec_cmd...] ', nargs=-1) diff --git a/qmds/comm_x/__init__.py b/qmds/comm_x/__init__.py new file mode 100644 index 0000000..af5ac37 --- /dev/null +++ b/qmds/comm_x/__init__.py @@ -0,0 +1,55 @@ +from __future__ import annotations + +from typing import Tuple + +from click import option, command +from utz import process + +from qmds.cli import args, shell_exec_opt, no_shell_opt, verbose_opt, exec_cmd_opt +from qmds.utils import join_pipelines + + +@command('comm-x', short_help='comm two files after running them through a pipeline of other commands', no_args_is_help=True) +@option('-1', '--exclude-1', is_flag=True, help='Exclude lines only found in the first pipeline') +@option('-2', '--exclude-2', is_flag=True, help='Exclude lines only found in the second pipeline') +@option('-3', '--exclude-3', is_flag=True, help='Exclude lines found in both pipelines') +@option('-i', '--case-insensitive', is_flag=True, help='Case insensitive comparison') +@shell_exec_opt +@no_shell_opt +@verbose_opt +@exec_cmd_opt +@args +def main( + exclude_1: bool, + exclude_2: bool, + exclude_3: bool, + case_insensitive: bool, + shell_executable: str | None, + no_shell: bool, + verbose: bool, + exec_cmds: Tuple[str, ...], + args: Tuple[str, ...], +): + if len(args) < 2: + raise ValueError('Must provide at least two files to comm') + + *cmds, path1, path2 = args + cmds = list(exec_cmds) + cmds + if cmds: + first, *rest = cmds + join_pipelines( + base_cmd=[ + 'comm', + *(['-1'] if exclude_1 else []), + *(['-2'] if exclude_2 else []), + *(['-3'] if exclude_3 else []), + *(['-i'] if case_insensitive else []), + ], + cmds1=[ f'{first} {path1}', *rest ], + cmds2=[ f'{first} {path2}', *rest ], + verbose=verbose, + shell=not no_shell, + shell_executable=shell_executable, + ) + else: + process.run(['comm', path1, path2]) diff --git a/qmds/diff_x/__init__.py b/qmds/diff_x/__init__.py new file mode 100644 index 0000000..91f85c2 --- /dev/null +++ b/qmds/diff_x/__init__.py @@ -0,0 +1,56 @@ +from __future__ import annotations + +import subprocess +from typing import Tuple + +from click import option, command + +from qmds.cli import args, shell_exec_opt, no_shell_opt, verbose_opt, exec_cmd_opt +from qmds.utils import join_pipelines + +color_opt = option('-c', '--color', is_flag=True, help='Colorize the output') +unified_opt = option('-U', '--unified', type=int, help='Number of lines of context to show (passes through to `diff`)') +ignore_whitespace_opt = option('-w', '--ignore-whitespace', is_flag=True, help="Ignore whitespace differences (pass `-w` to `diff`)") + + +@command('diff-x', short_help='Diff two files after running them through a pipeline of other commands', no_args_is_help=True) +@color_opt +@shell_exec_opt +@no_shell_opt +@unified_opt +@verbose_opt +@ignore_whitespace_opt +@exec_cmd_opt +@args +def main( + color: bool, + shell_executable: str | None, + no_shell: bool, + unified: int | None, + verbose: bool, + ignore_whitespace: bool, + exec_cmds: Tuple[str, ...], + args: Tuple[str, ...], +): + if len(args) < 2: + raise ValueError('Must provide at least two files to diff') + + *cmds, path1, path2 = args + cmds = list(exec_cmds) + cmds + diff_args = [ + *(['-w'] if ignore_whitespace else []), + *(['-U', str(unified)] if unified is not None else []), + *(['--color=always'] if color else []), + ] + if cmds: + first, *rest = cmds + join_pipelines( + base_cmd=['diff', *diff_args], + cmds1=[ f'{first} {path1}', *rest ], + cmds2=[ f'{first} {path2}', *rest ], + verbose=verbose, + shell=not no_shell, + shell_executable=shell_executable, + ) + else: + subprocess.run(['diff', *diff_args, path1, path2]) diff --git a/qmds/git_diff_x/__init__.py b/qmds/git_diff_x/__init__.py new file mode 100644 index 0000000..8e9d912 --- /dev/null +++ b/qmds/git_diff_x/__init__.py @@ -0,0 +1,85 @@ +from __future__ import annotations + +import shlex +from typing import Tuple + +import click +from click import option, argument, command +from utz import process + +from qmds.cli import shell_exec_opt, no_shell_opt, verbose_opt, exec_cmd_opt +from qmds.diff_x import color_opt, unified_opt, ignore_whitespace_opt +from qmds.utils import join_pipelines + + +@command('git-diff-x', short_help='Diff a Git-tracked file at two commits (or one commit vs. current worktree), optionally passing both through another command first') +@color_opt +@option('-r', '--refspec', default='HEAD', help='.. (compare two commits) or (compare to the worktree)') +@shell_exec_opt +@no_shell_opt +@unified_opt +@verbose_opt +@ignore_whitespace_opt +@exec_cmd_opt +@argument('args', metavar='[exec_cmd...] ', nargs=-1) +def main( + color: bool, + refspec: str | None, + shell_executable: str | None, + no_shell: bool, + unified: int | None, + verbose: bool, + ignore_whitespace: bool, + exec_cmds: Tuple[str, ...], + args: Tuple[str, ...], +): + """Diff a file at two commits (or one commit vs. current worktree), optionally passing both through `cmd` first + + Examples: + + dvc-utils diff -r HEAD^..HEAD wc -l foo.dvc # Compare the number of lines (`wc -l`) in `foo` (the file referenced by `foo.dvc`) at the previous vs. current commit (`HEAD^..HEAD`). + + dvc-utils diff md5sum foo # Diff the `md5sum` of `foo` (".dvc" extension is optional) at HEAD (last committed value) vs. the current worktree content. + """ + if not args: + raise click.UsageError('Must specify [cmd...] ') + + shell = not no_shell + *cmds, path = args + cmds = list(exec_cmds) + cmds + + pcs = refspec.split('..', 1) + if len(pcs) == 1: + ref1 = pcs[0] + ref2 = None + elif len(pcs) == 2: + ref1, ref2 = pcs + else: + raise ValueError(f"Invalid refspec: {refspec}") + + if cmds: + cmds1 = [ f'git show {ref1}:{path}', *cmds ] + if ref2: + cmds2 = [ f'git show {ref2}:{path}', *cmds ] + else: + cmd, *sub_cmds = cmds + cmds2 = [ f'{cmd} {path}', *sub_cmds ] + if not shell: + cmds1 = [ shlex.split(c) for c in cmds1 ] + cmds2 = [ shlex.split(c) for c in cmds2 ] + + join_pipelines( + base_cmd=[ + 'diff', + *(['-w'] if ignore_whitespace else []), + *(['-U', str(unified)] if unified is not None else []), + *(['--color=always'] if color else []), + ], + cmds1=cmds1, + cmds2=cmds2, + verbose=verbose, + shell=not no_shell, + shell_executable=shell_executable, + ) + else: + process.run(['git', 'diff', refspec, '--', path]) diff --git a/qmds/utils.py b/qmds/utils.py new file mode 100644 index 0000000..b68f885 --- /dev/null +++ b/qmds/utils.py @@ -0,0 +1,75 @@ +from __future__ import annotations + +from functools import cache +from os import environ as env, getcwd +from os.path import relpath +from subprocess import Popen + +from utz.process import err +from utz.process.named_pipes import named_pipes +from utz.process.pipeline import pipeline + + +@cache +def get_git_root() -> str: + return process.line('git', 'rev-parse', '--show-toplevel', log=False) + + +@cache +def get_dir_path() -> str: + return relpath(getcwd(), get_git_root()) + + +def join_pipelines( + base_cmd: list[str], + cmds1: list[str], + cmds2: list[str], + verbose: bool = False, + shell_executable: str | None = None, + **kwargs, +): + """Run two sequences of piped commands, pass their outputs as inputs to a ``base_cmd``. + + Args: + base_cmd: Top=level command that takes two positional args (named pipes with the outputs + of the ``cmds1`` and ``cmds2`` pipelines. + cmds1: First sequence of commands to pipe together + cmds2: Second sequence of commands to pipe together + verbose: Whether to print commands being executed + shell_executable: Shell to use for executing commands; defaults to $SHELL + **kwargs: Additional arguments passed to subprocess.Popen + + Each command sequence will be piped together before being compared. + For example, if cmds1 = ['cat foo.txt', 'sort'], the function will + execute 'cat foo.txt | sort' before comparing with cmds2's output. + + Adapted from https://stackoverflow.com/a/28840955""" + if shell_executable is None: + shell_executable = env.get('SHELL') + + with named_pipes(n=2) as pipes: + (pipe1, pipe2) = pipes + join_cmd = [ + *base_cmd, + pipe1, + pipe2, + ] + proc = Popen(join_cmd) + processes = [proc] + + for pipe, cmds in ((pipe1, cmds1), (pipe2, cmds2)): + if verbose: + err(f"Running pipeline: {' | '.join(cmds)}") + + processes += pipeline( + cmds, + pipe, + wait=False, + shell_executable=shell_executable, + **kwargs, + ) + + for p in processes: + p.wait() + + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..d33838c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +click +utz diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..f5f9984 --- /dev/null +++ b/setup.py @@ -0,0 +1,22 @@ +from setuptools import setup, find_packages + +setup( + name='qmds', + version="0.0.1", + description="Qommands: execute shell pipelines against multiple inputs, diff/compare/join results", + long_description=open("README.md").read(), + long_description_content_type="text/markdown", + packages=find_packages(), + install_requires=open("requirements.txt").read(), + entry_points={ + 'console_scripts': [ + 'diff-x = qmds.diff_x:main', + 'git-diff-x = qmds.git_diff_x:main', + ], + }, + license="MIT", + author="Ryan Williams", + author_email="ryan@runsascoded.com", + author_url="https://github.com/ryan-williams", + url="https://github.com/runsascoded/qmds", +)