Skip to content

Maintainer Notes

vfdev edited this page Feb 15, 2023 · 37 revisions

How to create new release

Step one - branch cut

  • Cut a branch from master:
    • Cherry-pick to master the commit like "Change docs versionadded/changed 0.5.X -> 0.4.10"
    • Branch name should be "X.Y.Z" without prefix "v"
    • Optionally, revert commits that should not go to the release
  • Draft new release note
  • Tag name: "vX.Y.Z"
  • Make sure to point to the branch (not master !)

Created release binaries will be uploaded to pypi and conda.

See corresponding GitHub Actions:

Step two - Release note

Get your contributors for this release
import requests


def get_datetime_of_latest_release():
    r = requests.get('https://api.github.com/repos/pytorch/ignite/releases/latest')
    assert r.status_code == 200
    res = r.json()
    assert "created_at" in res, "{}".format(res)
    return res["created_at"]


def get_contributors_since_latest_release():
    dt = get_datetime_of_latest_release()
    contributors = []
    page = 1
    while True:
        r = requests.get('https://api.github.com/repos/pytorch/ignite/commits?since={}&per_page=100&page={}'.format(dt, page))
        assert r.status_code == 200, r
        commits = r.json()
        if len(commits) < 1:
            break
        unique_contribs = list(set([
            c['author']['login'] for c in commits if ('author' in c) and (c['author'] is not None)
        ]))
        contributors += unique_contribs
        page += 1

    contributors = list(set(contributors))
    contributors = ["@{}".format(c) for c in contributors]
    return sorted(contributors)


print(", ".join(get_contributors_since_latest_release()))
Generate Draft for the Release Note

Prerequisites

git clone https://github.com/pytorch/ignite.git /tmp/ignite
cd /tmp/ignite && git branch && git checkout <prev_release> && git checkout <next_release> && git checkout master
pip install GitPython PyGithub pandas

Python script

REPO = "pytorch/ignite"
LOCAL_REPO = "/tmp/ignite"
PREV = <prev_release>
NEXT = <next_release>


import time
from git import Repo
from github import Github
import pandas


def pprint(commit, idx=None, *args, **kw):
    """Pretty print commit object.
    """
    date = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(commit.committed_date))
    author = commit.author.name + ":"
    message = commit.message.rsplit("\n")[0]
    line = f"{str(idx) + ' ' if idx is not None else ''}{date} {author} {message}"
    print(line, *args, **kw)


def remove_reverted(commits_a: list, commits_b: list) -> list:
    """Removes hashes of reverted feature: merge commit hash + revert commit hash.
    """
    reverted = []
    new_commits = set((commit.hexsha for commit in commits_a)) - set((commit.hexsha for commit in commits_b))

    for commit in commits_a:
        if commit.hexsha in new_commits:
            if "Revert " in commit.message.rsplit("\n")[0]:
                hash_revert = commit.message.split("This reverts commit ")[-1].strip(".\n")
                reverted.extend([commit.hexsha, hash_revert])

    out = []
    c = 1
    for commit in commits_a:
      if commit.hexsha not in reverted:
          out.append(commit)
      else:
          pprint(commit, c)
          c += 1

    return out


git_repo = Repo(LOCAL_REPO)


# Get list of commit for branches
commits_prev = list(git_repo.iter_commits(PREV))
commits_next = list(git_repo.iter_commits(NEXT))
commits_master = list(git_repo.iter_commits("master"))

# Remove reverted commits (including revert commits)
print(f"Removing these commits from {PREV}:")
commits_prev = remove_reverted(commits_prev, commits_master)

print(f"Removing these commits from {NEXT}:")
commits_next = remove_reverted(commits_next, commits_master)

diff_prev_master = set(commits_prev) - set(commits_master)
diff_next_master = set(commits_next) - set(commits_master)

print()
print(f"master vs {PREV}:", len(diff_prev_master), "commits")
print(f"master vs {NEXT}:", len(diff_next_master), "commits")


same_hash_commits = set()
diff_hash_same_message_commits = set()


for c_next in commits_next:
    for c_prev in commits_prev:
        if c_next.hexsha == c_prev.hexsha:
            same_hash_commits.add(c_next)


diff_next_prev = [commit for commit in commits_next if commit not in same_hash_commits]

for c_next in diff_next_prev:
    for c_prev in diff_prev_master:
        if c_next.hexsha != c_prev.hexsha:
            if c_next.message == c_prev.message:
                diff_hash_same_message_commits.add(c_next)

diff_next_prev = [commit for commit in diff_next_prev if commit not in diff_hash_same_message_commits]

# same = same_hash_commits.union(diff_hash_same_message_commits)
print("Same hash:", len(same_hash_commits), "Diff hash, same message:", len(diff_hash_same_message_commits))

# diff_next_prev = [commit for commit in diff_next_prev if commit not in same]
print(f"{NEXT} vs {PREV}", len(diff_next_prev), "commits")

print("Number of Commits:", len(diff_next_prev))

c = 1
for commit in commits_next:
    if commit in diff_next_prev:
        pprint(commit, c)
        c += 1

# Get list of all PRs
g = Github()
repo = g.get_repo(REPO)
pulls = [*repo.get_pulls(state="all")]

print("Total Number of PRs:", len(pulls))

# We need a few columns:
columns = (
    "title",
    "number",
    "user",
    "labels",
    "state",
    "draft",
    "created_at",
    "merged_at",
    "merge_commit_sha",
    "body",
)

_pulls = []

for i, pull in enumerate(pulls):
    _pulls.append((
        pull.title,
        pull.number,
        pull.user.login,
        list(map(lambda x: x.name, pull.labels)),
        pull.state,
        pull.draft,
        pull.created_at,
        pull.merged_at,
        pull.merge_commit_sha,
        pull.body,
    ))


pulls_df = pandas.DataFrame.from_records(_pulls, columns=columns)
pulls_df_next = pulls_df.loc[pulls_df['merge_commit_sha'].isin([commit.hexsha for commit in diff_next_prev])]

# Group by labels
labels = {"no label": []}
# Contributors
contributors = set()
duplicates = set()


# Iterate over
for index, row in pulls_df_next.iterrows():
    
    to_save = [row.title, row.number]

    # print(row.number, row.title)

    if not row.labels:
        labels["no label"].append(to_save)

    else:        
        for label in row.labels:
            if tuple(to_save) in duplicates:
                to_save.append(True)
            else:
                duplicates.add(tuple(to_save))

            if labels.get(label, None) is not None:
                labels[label].append(to_save)
            else:
                labels[label] = [to_save]

    contributors.add(row.user)

c = 1

for label, prs in labels.items():
    print(f"\n## {label}\n")
    for pr in prs:
        print(f"- {'DUPLICATED ' if len(pr) == 3 else ''}{pr[0]} (#{pr[1]})")
        c += 1


# Get list of commits w/o PR
no_pull = [commit for commit in diff_next_prev if commit.hexsha not in pulls_df_next.merge_commit_sha.to_list()]

print()
print("Commits w/o PR:")
for i, commit in enumerate(no_pull, start=1):
    pprint(commit, i)

# Get list of contributor
print(f"\n## Contributors {NEXT}\n")
print(", ".join(map(lambda x: "@" + x, sorted(contributors))))

Step three - CI updates, docker images etc

...


Out-dated information

Automatic PyPI wheels+tar upload with Travis (out-dated)

PyPI wheels and tars can be built and upload using Travis as deploy phase in the test stage:

# PyPI Deployment: https://docs.travis-ci.com/user/deployment/pypi/
deploy:
  provider: pypi
  user: vfdev-5
  # If password contains non alphanumeric characters
  # https://github.com/travis-ci/dpl/issues/377
  # pass it as secured variable
  password: $PYPI_TOKEN
  # otherwise, follow "How to encrypt the password": https://docs.travis-ci.com/user/encryption-keys/
  # `travis encrypt deploy.password="password"`
  #  secure: "secured_password"

  skip_cleanup: true
  distributions: "sdist bdist_wheel"
  on:
    tags: true
    python: "3.5"

Automatic Conda wheels+tar upload (out-dated)

This is done almost manually, we build 4 python versions (2.7, 3.5, 3.6, 3.7) for single platform (linux-64) and convert the artifacts to osx-64 and win-64:

before_deploy:
  # Conda deploy if on tag
  # ANACONDA_TOKEN should be provided by Travis
  # How to generate ANACONDA_TOKEN: https://docs.anaconda.com/anaconda-cloud/user-guide/tasks/work-with-accounts#creating-access-tokens
  # We need a token with checked "Allow all API operations"

  # https://conda.io/docs/user-guide/tasks/build-packages/install-conda-build.html
  - conda install -y conda-build conda-verify anaconda-client
  - conda config --set anaconda_upload no
  - conda build --quiet --no-test --output-folder conda_build conda.recipe
  # Convert to other platforms: OSX, WIN
  - conda convert --platform win-64 conda_build/linux-64/*.tar.bz2 -o conda_build/
  - conda convert --platform osx-64 conda_build/linux-64/*.tar.bz2 -o conda_build/
  # Upload to Anaconda
  # We could use --all but too much platforms to uploaded
  - ls conda_build/*/*.tar.bz2 | xargs -I {} anaconda -v -t $ANACONDA_TOKEN upload -u pytorch {}

The recipe meta.yaml to build package is provided in the folder conda.recipe.

Documentation automatic generation

The documentation is automatically built for master and all tags and deployed when a PR is merged to master. To build multiple versions we are using a fork of sphinxcontrib-versioning. Documentation is deployed at https://pytorch.org/ignite and contains a selector of versions. History of builds is not conserved, so if you push manually some changes, they will be rewritten by the next doc deployment.

Automatic deployment is done in .travis.yml in the stage docs:

# GitHub Pages Deployment: https://docs.travis-ci.com/user/deployment/pages/
    - stage: Docs
      python: "3.5"
      if: branch != nightly
      # Use previously defined before_install
      before_install: *before_install

      install:
        - pip install -r docs/requirements.txt
        - pip install git+https://github.com/anmolsjoshi/sphinxcontrib-versioning.git

      script:
        - sphinx-versioning --use-master-conf build --whitelist-branches master docs/source docs/build/html
        # Create .nojekyll file to serve correctly _static and friends
        - touch docs/build/html/.nojekyll
      after_success: # Nothing to do

      # Deploy built docs when PR is merged to master
      deploy:
        provider: pages
        skip-cleanup: true
        github-token: $GITHUB_TOKEN  # Set in the settings page of your repository, as a secure variable
        keep-history: false
        local_dir: docs/build/html
        on:
          branch: master

How to manually create a release

How to create and upload pip/conda builds

At first, we build universal wheels and tars:

git checkout vX.Y.Z
python setup.py sdist bdist_wheel

Upload to pypi

twine upload dist/*

or for testing purposes it is possible to upload to test.pypi:

twine upload --repository-url https://test.pypi.org/legacy/ dist/*

How to manually update documentation

All you have to do to update the site is to modify the gh-pages branch. For example, regenerating docs is:

cd docs
pip install -r requirements.txt
pip install git+https://github.com/anmolsjoshi/sphinxcontrib-versioning.git
sphinx-versioning --use-master-conf build --whitelist-branches master docs/source docs/build/html
touch docs/build/html/.nojekyll
# copy build/html into gh-pages branch, commit, push

README

Side-by-side code compare

Image is created with PyCharm (Dracula Theme) with "Compare files" function and a screenshot. Line spacing ~1.1

Ignite (left side):

model = Net()
train_loader, val_loader = get_data_loaders(train_batch_size, val_batch_size)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.8)
criterion = torch.nn.NLLLoss()

max_epochs = 10
validate_every = 100
checkpoint_every = 100

trainer = create_supervised_trainer(model, optimizer, criterion)
evaluator = create_supervised_evaluator(model, metrics={'accuracy': Accuracy()})

@trainer.on(Events.ITERATION_COMPLETED(every=validate_every))
def validate(trainer):
    evaluator.run(val_loader)
    metrics = evaluator.state.metrics
    print("After {} iterations, binary accuracy = {:.2f}"
          .format(trainer.state.iteration, metrics['accuracy']))


checkpointer = ModelCheckpoint(checkpoint_dir, n_saved=3, create_dir=True)
trainer.add_event_handler(Events.ITERATION_COMPLETED(every=checkpoint_every),
                          checkpointer, {'mymodel': model})

trainer.run(train_loader, max_epochs=max_epochs)

and bare pytorch snippet (right side):

model = Net()
train_loader, val_loader = get_data_loaders(train_batch_size, val_batch_size)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.8)
criterion = torch.nn.NLLLoss()

max_epochs = 10
validate_every = 100
checkpoint_every = 100

def validate(model, val_loader):
    model = model.eval()
    num_correct = 0
    num_examples = 0
    for batch in val_loader:
        input, target = batch
        output = model(input)
        correct = torch.eq(torch.round(output).type(target.type()), target).view(-1)
        num_correct += torch.sum(correct).item()
        num_examples += correct.shape[0]
    return num_correct / num_examples


def checkpoint(model, optimizer, checkpoint_dir):
    # ...
    pass

iteration = 0

for epoch in range(max_epochs):
    for batch in train_loader:
        model = model.train()
        optimizer.zero_grad()
        input, target = batch
        output = model(input)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

        if iteration % validate_every == 0:
            binary_accuracy = validate(model, val_loader)
            print("After {} iterations, binary accuracy = {:.2f}"
                  .format(iteration, binary_accuracy))

        if iteration % checkpoint_every == 0:
            checkpoint(model, optimizer, checkpoint_dir)
        iteration += 1

GIF teaser