diff --git a/setup.py b/setup.py index 5605d9a..e0b129d 100755 --- a/setup.py +++ b/setup.py @@ -25,6 +25,8 @@ "skare3-merge-pr=skare3_tools.github.scripts.merge_pr:main", "skare3-release-merge-info=skare3_tools.github.scripts.release_merge_info:main", "skare3-milestone-issues=skare3_tools.github.scripts.milestone_issues:main", + "skare3-clone-git-repos=skare3_tools.scripts.clone_git_repos:main", + "skare3-fix-namespace-packages=skare3_tools.scripts.fix_namespace_packages:main", ] } diff --git a/skare3_tools/scripts/clone_git_repos.py b/skare3_tools/scripts/clone_git_repos.py new file mode 100644 index 0000000..9228c1c --- /dev/null +++ b/skare3_tools/scripts/clone_git_repos.py @@ -0,0 +1,155 @@ +import argparse +import os +import subprocess +from pathlib import Path + +import ska_file +import yaml + +from skare3_tools import github + +USAGE = """ +skare3-clone-git-repos --help + +This script clones or updates git repos from GitHub. It is intended to be used to +create or maintain a local copy of all repos in an org (e.g. sot). This can be useful +for doing bulk updates or for doing specialized searches across all repos if the GitHub +search functionality is not sufficient. + +This script is somewhat experts-only and does not have guard rails. +""" + + +def get_argparse(): + parser = argparse.ArgumentParser() + + parser.add_argument( + "repo_names", + nargs="*", + help="Repo names to clone or update (default is all applicable repos in org)", + ) + parser.add_argument( + "--repos-dir", + default="repos", + help="Output directory for repos (default=repos)", + ) + parser.add_argument( + "--all-packages", + action="store_true", + help=( + "Update all packages in org (default is " + "only sot org ska3-flight and non-FSDS packages)" + ), + ) + parser.add_argument( + "--org", + default="sot", + help="GitHub org (default=sot)", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Dry run only", + ) + parser.add_argument( + "--github-token-file", + type=Path, + help=( + "Path to GitHub token file for reading repos from source org " + "(if not supplied use GITHUB_TOKEN environment variable)" + ), + ) + return parser + + +def get_ska3_pkgs() -> list[str]: + """Return list of Ska packages in ska3-flight or non-FSDS environment""" + # TODO get this from GitHub instead + user_git = Path.home() / "git" + ska3_flight = yaml.safe_load( + open(user_git / "skare3" / "pkg_defs" / "ska3-flight" / "meta.yaml") + ) + pkgs = [pkg.split()[0] for pkg in ska3_flight["requirements"]["run"]] + for pkg_remove in [ + "ska3-core", + "ska3-template", + "acis_thermal_check", + "acispy", + "backstop_history", + ]: + pkgs.remove(pkg_remove) + + ska3_non_fsds = yaml.safe_load( + open(user_git / "skare3" / "environment-non-fsds.yml") + ) + pkgs.extend(ska3_non_fsds["dependencies"]) + return sorted(pkgs) + + +def update_repo(repos_dir, name, url): + path = Path(repos_dir) / name + if path.exists(): + print("Updating", name) + with ska_file.chdir(path): + for branch in ["master", "main"]: + # fmt: off + # Non-zero return code means branch doesn't exist + if ( + subprocess.call(["git", "switch", branch]) == 0 + and subprocess.call(["git", "pull", "origin", branch]) == 0 + ): + # fmt: on + break + else: + raise Exception("bad return code") + subprocess.run(["git", "clean", "-fdx"], check=True) + else: + with ska_file.chdir(repos_dir): + print("Cloning", name) + retcode = subprocess.call(["git", "clone", url]) + if retcode: + raise Exception() + + +def get_org_repos(org="sot", token=None): + github.init(token=token) + org = github.Organization(org) + org_repos = org.repositories() + return org_repos + + +def get_fake_repo(repo_name, org): + out = {"name": repo_name, "clone_url": f"https://github.com/{org}/{repo_name}.git"} + return out + + +def main(argv=None): + args = get_argparse().parse_args(argv) + + repos_dir = Path(args.repos_dir) + repos_dir.mkdir(exist_ok=True, parents=True) + + if args.repo_names: + repos = [get_fake_repo(name, org=args.org) for name in args.repo_names] + else: + if args.all_packages: + if args.github_token_file is None: + token = os.environ["GITHUB_TOKEN"] + else: + token = args.github_token_file.read_text().strip() + repos_org = get_org_repos(org=args.org, token=token) + repos = repos_org + else: + repos = [get_fake_repo(name, org=args.org) for name in get_ska3_pkgs()] + + for repo in repos: + url = repo["clone_url"] + name = repo["name"] + print(f"********* {repos_dir}/{name} {url} *********") + if not args.dry_run: + update_repo(repos_dir, name, url) + print() + + +if __name__ == "__main__": + main() diff --git a/skare3_tools/scripts/fix_namespace_packages.py b/skare3_tools/scripts/fix_namespace_packages.py new file mode 100644 index 0000000..ac21da8 --- /dev/null +++ b/skare3_tools/scripts/fix_namespace_packages.py @@ -0,0 +1,266 @@ +import argparse +import difflib +import os +import re +import subprocess +from pathlib import Path + +import ska_file + + +def run_check(*args, **kwargs): + print("Running:", " ".join(args[0])) + subprocess.run(*args, check=True, **kwargs) + + +PKGS_MAP = { + "Chandra.Maneuver": "chandra_maneuver", + "Chandra.Time": "chandra_time", + "Chandra.cmd_states": "chandra_cmd_states", + "Ska.DBI": "ska_dbi", + "Ska.File": "ska_file", + "Ska.Matplotlib": "ska_matplotlib", + "Ska.Numpy": "ska_numpy", + "Ska.ParseCM": "ska_parsecm", + "Ska.Shell": "ska_shell", + "Ska.Sun": "ska_sun", + "Ska.Table": "ska_table", + "Ska.TelemArchive": "ska_telemarchive", + "Ska.arc5gl": "ska_arc5gl", + "Ska.engarchive": "cheta", + "Ska.ftp": "ska_ftp", + "Ska.quatutil": "ska_quatutil", + "Ska.report_ranges": "ska_report_ranges", + "Ska.tdb": "ska_tdb", +} + +USAGE = """ + +``skare3-fix-namespace-packages`` is a utility script to flatten namespace package +names like ``Chandra.Maneuver`` to the more standard ``chandra_maneuver``. + +Although the namespace versions will continue to be supported, the flattened names are +preferred. This is especially true for developers, where the namespace packages can +cause subtle problems with imports. + +This script has a number of modes of operation: +- Show summary information on a number of packages with the intent of bulk updates. +- Show diffs for a single directory and optionally write the changes. +- Make a git branch and commit the changes for one or more repos. +- Make a git branch, commit the changes, and make a GitHub PR for one or more repos. + +Print summary information on a number of repos +---------------------------------------------- +This will print a summary of the number of fixes needed for each repo: + + # Get all sot repos that are in Ska3 or Non-FSDS + python clone_git_repos.py + + # Summary + skare3-fix-namespace-packages --summary-only repos + +Fix an arbitrary directory of code files +---------------------------------------- +If the directory of code files is not a git repo then we need to be careful about +inspecting the changes and having a way to back them out. Here we first look at the +diffs, where the directory is named `acis_taco`: + + skare3-fix-namespace-packages --diffs acis_taco + +If the diffs look good then we can write the changes: + + skare3-fix-namespace-packages --write acis_taco + find acis_taco -name '*.bak' # Backups of original files + +Git repo with manual PR creation +-------------------------------- +If the directory `acis_taco` is a git repo then we can make a branch and commit the +changes. + +NOTE: You are responsible for ensuring the repo is up to date with the remote and +checked out at the correct branch. + + skare3-fix-namespace-packages --make-branch acis_taco + +The changes will be checked into a `flatten-namespace-package-names` branch: + + git show # See all the changes + +If the changes look good then you can push the branch and make a PR in the usual way +from the command line using `git` commands. + +Git repo with automatic PR creation +---------------------------------- +If the directory `acis_taco` is a git repo then we can make a branch and commit the +changes and then make a PR in one step. + +NOTE: You are responsible for ensuring the repo is up to date with the remote and +checked out at the correct branch. + + skare3-fix-namespace-packages --make-pr --github-token-file=OAUTH acis_taco +""" + + +def get_argparse(): + parser = argparse.ArgumentParser(usage=USAGE) + + parser.add_argument("dir_names", nargs="*") + parser.add_argument( + "--summary-only", + action="store_true", + help="Summary information only, no fixes applied", + ) + parser.add_argument("--diffs", action="store_true", help="Show diffs") + parser.add_argument( + "--sort-imports", + action="store_true", + help="Sort imports (needed for packages with CI checks of sort order)", + ) + parser.add_argument( + "-w", + "--write", + action="store_true", + help="Write back modified files", + ) + parser.add_argument( + "-n", + "--no-backups", + action="store_true", + help="Don't write backups for modified files", + ) + parser.add_argument( + "-b", + "--backup-suffix", + default=".bak", + help="Backup suffix (default=.bak)", + ) + parser.add_argument( + "--make-branch", + action="store_true", + help="Make a git branch and commit changes (implies --write)", + ) + parser.add_argument( + "--branch-name", + default="flatten-namespace-package-names", + help="Branch name to use if --make-branch is set", + ) + parser.add_argument( + "--make-pr", + action="store_true", + help="Make a GitHub pull request for the changes (implies --make-branch)", + ) + parser.add_argument( + "--github-token-file", + type=Path, + help=( + "Path to GitHub token file for making a PR " + "(if not supplied use GITHUB_TOKEN environment variable)" + ), + ) + + return parser + + +def flatten_namespace_pkgs(file_or_dir, opt: argparse.Namespace): + # Find every *.py file in the "repo_name" directory using Path + file_or_dir = Path(file_or_dir) + files = file_or_dir.rglob("*.py") if file_or_dir.is_dir() else [file_or_dir] + fixes_needed = 0 + for file in files: + fixes_needed += flatten_name_pkgs_for_file(file, opt) + return fixes_needed + + +def flatten_name_pkgs_for_file(file: Path, opt: argparse.Namespace): + text_orig = file.read_text() + text = text_orig + # Not very fast but it should work + for pkg_old, pkg_new in PKGS_MAP.items(): + text = re.sub(rf"\b{pkg_old}\b", pkg_new, text) + if pkg_old in text: + print(f"WARNING: {pkg_old} still found by grepping {file}") + + fix_needed = text != text_orig + if fix_needed and not opt.summary_only: + fixing = " ... fixing" if opt.write else "" + print(f" - {file}{fixing}") + if opt.diffs: + for line in difflib.unified_diff(text_orig.splitlines(), text.splitlines()): + print(line) + elif opt.write: + if opt.no_backups: + file.write_text(text) + else: + file.rename(str(file) + opt.backup_suffix) + file.write_text(text) + + # Fix import order and potential new unused imports. Note that both ruff and + # Pylance seem to fail in determining that a namespace package import is + # unused. + run_check(["ruff", "--select", "F401,I001", "--fix", str(file)]) + + return fix_needed + + +def make_branch(dir_name: Path, opt: argparse.Namespace): + with ska_file.chdir(dir_name): + run_check(["git", "switch", "-c", opt.branch_name]) + + +def commit_changes(dir_name: Path, opt: argparse.Namespace): + with ska_file.chdir(dir_name): + run_check(["git", "commit", "-a", "-m", "Flatten namespace packages"]) + + +def make_pr(dir_name: Path, opt: argparse.Namespace): + with ska_file.chdir(dir_name): + run_check(["git", "push", "-u", "origin", opt.branch_name]) + run_check(["gh", "pr", "create", "--fill"]) + + +def main(): + opt = get_argparse().parse_args() + if opt.summary_only: + opt.diffs = False + opt.write = False + opt.make_branch = False + opt.make_pr = False + + if opt.github_token_file: + # gh cli uses the GITHUB_TOKEN environment variable + os.environ["GITHUB_TOKEN"] = opt.github_token_file.read_text().strip() + + if opt.make_pr: + opt.make_branch = True + + if opt.make_branch: + opt.write = True + + for dir_name in opt.dir_names: + if not opt.summary_only: + print(f"Processing {dir_name}") + + dir_name = Path(dir_name) + + if opt.make_branch or opt.make_pr: + make_branch(dir_name, opt) + + fixes_needed = flatten_namespace_pkgs(dir_name, opt) + + if opt.make_branch: + commit_changes(dir_name, opt) + if opt.make_pr: + make_pr(dir_name, opt) + + if opt.summary_only and fixes_needed > 0: + # Any project with pyproject.toml or ruff.toml is using ruff or isort + imports_sorted = any( + (dir_name / config).exists() + for config in ["ruff.toml", "pyproject.toml"] + ) + sort_imports_str = " (use --sort-imports)" if imports_sorted else "" + print(f"{dir_name}: {fixes_needed}{sort_imports_str}") + + +if __name__ == "__main__": + main()