diff --git a/.github/workflows/check-alerts.yml b/.github/workflows/check-alerts.yml index 5b69ddf225..773899bacc 100644 --- a/.github/workflows/check-alerts.yml +++ b/.github/workflows/check-alerts.yml @@ -19,13 +19,20 @@ jobs: - repo: pytorch/pytorch branch: master with_flaky_test_alerting: YES + job_filter_regex: "" - repo: pytorch/pytorch branch: nightly with_flaky_test_alerting: NO + job_filter_regex: "" + - repo: pytorch/builder + branch: main + with_flaky_test_alerting: NO + job_filter_regex: "nightly.pypi.binary.size.validation" env: REPO_TO_CHECK: ${{ matrix.repo }} BRANCH_TO_CHECK: ${{ matrix.branch }} WITH_FLAKY_TEST_ALERT: ${{ matrix.with_flaky_test_alerting }} + JOB_NAME_REGEX: ${{ matrix.job_filter_regex }} # Don't do actual work on pull request DRY_RUN: ${{ github.event_name == 'pull_request'}} runs-on: ubuntu-18.04 diff --git a/.github/workflows/test-binary-size-validation.yml b/.github/workflows/test-binary-size-validation.yml new file mode 100644 index 0000000000..78536dd789 --- /dev/null +++ b/.github/workflows/test-binary-size-validation.yml @@ -0,0 +1,21 @@ +name: Test binary size validation script +on: + pull_request: + paths: + - .github/workflows/binary-size-validation.yml + - tools/binary_size_validation/test_binary_size_validation.py + - tools/binary_size_validation/binary_size_validation.py + workflow_dispatch: + +jobs: + test-binary-size-validation: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Install requirements + run: | + pip3 install -r tools/binary_size_validation/requirements.txt + - name: Run pytest + run: | + pytest tools/binary_size_validation/test_binary_size_validation.py diff --git a/tools/binary_size_validation/README.md b/tools/binary_size_validation/README.md new file mode 100644 index 0000000000..c8c0653902 --- /dev/null +++ b/tools/binary_size_validation/README.md @@ -0,0 +1,27 @@ +# PyTorch Wheel Binary Size Validation + +A script to fetch and validate the binary size of PyTorch wheels +in the given channel (test, nightly) against the given threshold. + + +### Installation + +```bash +pip install -r requirements.txt +``` + +### Usage + +```bash +# print help +python binary_size_validation.py --help + +# print sizes of the all items in the index +python binary_size_validation.py --url https://download.pytorch.org/whl/nightly/torch/ + +# fail if any of the torch2.0 wheels are larger than 900MB +python binary_size_validation.py --url https://download.pytorch.org/whl/nightly/torch/ --include "torch-2\.0" --threshold 900 + +# fail if any of the latest nightly pypi wheels are larger than 750MB +python binary_size_validation.py --include "pypi" --only-latest-version --threshold 750 +``` diff --git a/tools/binary_size_validation/binary_size_validation.py b/tools/binary_size_validation/binary_size_validation.py new file mode 100644 index 0000000000..8cd1b3a60e --- /dev/null +++ b/tools/binary_size_validation/binary_size_validation.py @@ -0,0 +1,91 @@ +# Script that parses wheel index (e.g. https://download.pytorch.org/whl/test/torch/), +# fetches and validates binary size for the files that match the given regex. + +import requests +import re +from collections import namedtuple +import click +from bs4 import BeautifulSoup +from urllib.parse import urljoin + +Wheel = namedtuple("Wheel", ["name", "url"]) + + +def parse_index(html: str, + base_url: str, + include_regex: str = "", + exclude_regex: str = "", + latest_version_only=False) -> list[Wheel]: + """ + parse the html page and return a list of wheels + :param html: html page + :param base_url: base url of the page + :param include_regex: regex to filter the wheel names. If empty, all wheels are included + :param exclude_regex: regex to exclude the matching wheel names. If empty, no wheels are excluded + :param latest_version_only: if True, return the wheels of the latest version only + :return: list of wheels + """ + soup = BeautifulSoup(html, "html.parser") + + wheels = [] + for a in soup.find_all("a"): + wheel_name = a.text + wheel_url = urljoin(base_url, a.get("href")) + if (not include_regex or re.search(include_regex, wheel_name)) \ + and (not exclude_regex or not re.search(exclude_regex, wheel_name)): + wheels.append(Wheel(name=wheel_name, url=wheel_url)) + + # filter out the wheels that are not the latest version + if len(wheels) > 0 and latest_version_only: + # get the prefixes (up to the second '+'/'-' sign) of the wheels + prefixes = set() + for wheel in wheels: + prefix = re.search(r"^([^-+]+[-+][^-+]+)[-+]", wheel.name).group(1) + if not prefix: + raise RuntimeError(f"Failed to get version prefix of {wheel.name}" + "Please check the regex_filter or don't use --latest-version-only") + prefixes.add(prefix) + latest_version = max(prefixes) + print(f"Latest version prefix: {latest_version}") + + # filter out the wheels that are not the latest version + wheels = [wheel for wheel in wheels if wheel.name.startswith(latest_version)] + + return wheels + + +def get_binary_size(file_url: str) -> int: + """ + get the binary size of the given file + :param file_url: url of the file + :return: binary size in bytes + """ + return int(requests.head(file_url).headers['Content-Length']) + + +@click.command( + help="Validate the binary sizes of the given wheel index." +) +@click.option("--url", help="url of the wheel index", + default="https://download.pytorch.org/whl/nightly/torch/") +@click.option("--include", help="regex to filter the wheel names. Only the matching wheel names will be checked.", + default="") +@click.option("--exclude", help="regex to exclude wheel names. Matching wheel names will NOT be checked.", + default="") +@click.option("--threshold", help="threshold in MB, optional", default=0) +@click.option("--only-latest-version", help="only validate the latest version", + is_flag=True, show_default=True, default=False) +def main(url, include, exclude, threshold, only_latest_version): + page = requests.get(url) + wheels = parse_index(page.text, url, include, exclude, only_latest_version) + for wheel in wheels: + print(f"Validating {wheel.url}...") + size = get_binary_size(wheel.url) + print(f"{wheel.name}: {int(size) / 1024 / 1024:.2f} MB") + if threshold and int(size) > threshold * 1024 * 1024: + raise RuntimeError( + f"Binary size of {wheel.name} {int(size) / 1024 / 1024:.2f} MB exceeds the threshold {threshold} MB") + + +if __name__ == "__main__": + main() diff --git a/tools/binary_size_validation/requirements.txt b/tools/binary_size_validation/requirements.txt new file mode 100644 index 0000000000..d1b6380ec4 --- /dev/null +++ b/tools/binary_size_validation/requirements.txt @@ -0,0 +1,4 @@ +beautifulsoup4==4.11.2 +click==8.0.4 +pytest==7.1.1 +requests==2.27.1 diff --git a/tools/binary_size_validation/test_binary_size_validation.py b/tools/binary_size_validation/test_binary_size_validation.py new file mode 100644 index 0000000000..3bca3d4234 --- /dev/null +++ b/tools/binary_size_validation/test_binary_size_validation.py @@ -0,0 +1,47 @@ +from binary_size_validation import parse_index + +# ignore long lines in this file +# flake8: noqa: E501 +test_html = """ + + + +

Links for torch

+ torch-1.13.0.dev20220728+cpu-cp310-cp310-linux_x86_64.whl
+ torch-1.13.0.dev20220728+cpu-cp310-cp310-win_amd64.whl
+ torch-1.13.0.dev20220728+cpu-cp37-cp37m-linux_x86_64.whl
+ torch-1.13.0.dev20220728+cpu-cp37-cp37m-win_amd64.whl
+ torch-2.0.0.dev20230206+rocm5.3-cp39-cp39-linux_x86_64.whl
+ torch-2.0.0.dev20230207+rocm5.3-cp310-cp310-linux_x86_64.whl
+ torch-2.0.0.dev20230207+rocm5.3-cp38-cp38-linux_x86_64.whl
+ torch-2.0.0.dev20230207+rocm5.3-cp39-cp39-linux_x86_64.whl
+ + + +""" + +base_url = "https://download.pytorch.org/whl/nightly/torch/" + + +def test_get_whl_links(): + wheels = parse_index(test_html, base_url) + assert len(wheels) == 8 + assert wheels[0].url == \ + "https://download.pytorch.org/whl/nightly/cpu/torch-1.13.0.dev20220728%2Bcpu-cp310-cp310-linux_x86_64.whl" + + +def test_include_exclude(): + wheels = parse_index(test_html, base_url, "amd6\\d") + assert len(wheels) == 2 + assert wheels[0].name == "torch-1.13.0.dev20220728+cpu-cp310-cp310-win_amd64.whl" + assert wheels[1].name == "torch-1.13.0.dev20220728+cpu-cp37-cp37m-win_amd64.whl" + + wheels = parse_index(test_html, base_url, "amd6\\d", "cp37") + assert len(wheels) == 1 + assert wheels[0].name == "torch-1.13.0.dev20220728+cpu-cp310-cp310-win_amd64.whl" + + +def test_latest_version_only(): + wheels = parse_index(test_html, base_url, latest_version_only=True) + assert len(wheels) == 3 + assert all(w.name.startswith("torch-2.0.0.dev20230207") for w in wheels) diff --git a/torchci/scripts/check_alerts.py b/torchci/scripts/check_alerts.py index d1ff4e3cac..9e68087831 100755 --- a/torchci/scripts/check_alerts.py +++ b/torchci/scripts/check_alerts.py @@ -4,11 +4,9 @@ import re import urllib.parse from collections import defaultdict -from curses.ascii import CAN from datetime import datetime, timedelta from difflib import SequenceMatcher -from email.policy import default -from typing import Any, Dict, List, Tuple +from typing import Any, Dict, List, Set, Tuple from setuptools import distutils # type: ignore[import] import requests @@ -400,11 +398,18 @@ def trunk_is_green(sha_grid: Any): return first_green_sha_ind < first_red_sha_ind -# Creates Job Statuses which has the logic for if need to alert or if there's flaky jobs def classify_jobs( - job_names: List[str], sha_grid: Any + all_job_names: List[str], sha_grid: Any, filtered_jobs_names: Set[str] ) -> Tuple[List[JobStatus], List[Any]]: - job_data = map_job_data(job_names, sha_grid) + """ + Creates Job Statuses which has the logic for if need to alert or if there's flaky jobs. + Classifies jobs into jobs to alert on and flaky jobs. + :param all_job_names: list of all job names as returned by the HUD + :param sha_grid: list of all job data as returned by the HUD (parallel index to all_job_names) + :param filtered_jobs_names: set of job names to actually consider + :return: + """ + job_data = map_job_data(all_job_names, sha_grid) job_statuses: list[JobStatus] = [] for job in job_data: job_statuses.append(JobStatus(job, job_data[job])) @@ -413,10 +418,13 @@ def classify_jobs( flaky_jobs = [] for job_status in job_statuses: + if job_status.job_name not in filtered_jobs_names: + continue if job_status.should_alert(): jobs_to_alert_on.append(job_status) flaky_jobs.extend(job_status.flaky_jobs) - return (jobs_to_alert_on, flaky_jobs) + + return jobs_to_alert_on, flaky_jobs def handle_flaky_tests_alert(existing_alerts: List[Dict]) -> Dict: @@ -453,8 +461,8 @@ def filter_job_names(job_names: List[str], job_name_regex: str) -> List[str]: def check_for_recurrently_failing_jobs_alert(repo: str, branch: str, job_name_regex: str, dry_run: bool): job_names, sha_grid = fetch_hud_data(repo=repo, branch=branch) - job_names = filter_job_names(job_names, job_name_regex) - (jobs_to_alert_on, flaky_jobs) = classify_jobs(job_names, sha_grid) + filtered_job_names = set(filter_job_names(job_names, job_name_regex)) + (jobs_to_alert_on, flaky_jobs) = classify_jobs(job_names, sha_grid, filtered_job_names) # Fetch alerts existing_alerts = fetch_alerts(TEST_INFRA_REPO_NAME, PYTORCH_ALERT_LABEL)