Skip to content

Commit

Permalink
Add ansible stage for building CUDA plugin (#6910)
Browse files Browse the repository at this point in the history
  • Loading branch information
will-cromar authored Apr 10, 2024
1 parent a816c42 commit a170ffe
Show file tree
Hide file tree
Showing 7 changed files with 109 additions and 11 deletions.
57 changes: 57 additions & 0 deletions .github/workflows/_build_plugin.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
name: build-cuda-plugin
on:
workflow_call:
inputs:
dev-image:
required: true
type: string
description: Base image for builds
runner:
required: false
type: string
description: Runner type for the test
default: linux.12xlarge
cuda:
required: false
type: string
description: Whether to build XLA with CUDA
default: 1

secrets:
gcloud-service-key:
required: true
description: Secret to access Bazel build cache

outputs:
docker-image:
value: ${{ jobs.build.outputs.docker-image }}
description: The docker image containing the built PyTorch.
jobs:
build:
runs-on: ${{ inputs.runner }}
container:
image: ${{ inputs.dev-image }}
env:
GCLOUD_SERVICE_KEY: ${{ secrets.gcloud-service-key }}
GOOGLE_APPLICATION_CREDENTIALS: /tmp/default_credentials.json
BAZEL_JOBS: 16
BAZEL_REMOTE_CACHE: 1
steps:
- name: Setup gcloud
shell: bash
run: |
echo "${GCLOUD_SERVICE_KEY}" > $GOOGLE_APPLICATION_CREDENTIALS
- name: Checkout repo
uses: actions/checkout@v4
with:
path: pytorch/xla
- name: Build
shell: bash
run: |
cd pytorch/xla/infra/ansible
ansible-playbook playbook.yaml -vvv -e "stage=build_plugin arch=amd64 accelerator=cuda src_root=${GITHUB_WORKSPACE}" --skip-tags=fetch_srcs,install_deps
- name: Upload wheel
uses: actions/upload-artifact@v4
with:
name: cuda-plugin
path: /dist/*.whl
8 changes: 8 additions & 0 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ jobs:
secrets:
gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }}

build-cuda-plugin:
name: "Build XLA CUDA plugin"
uses: ./.github/workflows/_build_plugin.yml
with:
dev-image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/development:3.8_cuda_12.1
secrets:
gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }}

test-cpu:
name: "CPU tests"
uses: ./.github/workflows/_test.yml
Expand Down
15 changes: 14 additions & 1 deletion infra/ansible/playbook.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"Pass the required variable with: --e \"{{ item.name }}=<value>\""
loop:
- name: stage
pattern: ^(build|release)$
pattern: ^(build|build_plugin|release)$
- name: arch
pattern: ^(aarch64|amd64)$
- name: accelerator
Expand Down Expand Up @@ -73,6 +73,7 @@
src_root: "/src"
tags: fetch_srcs

# TODO: better name now that there are two builds
- role: build_srcs
vars:
src_root: "/src"
Expand All @@ -81,8 +82,20 @@
combine(build_env[arch] | default({}, true)) |
combine(build_env[accelerator] | default({}, true))
}}"
when: stage == "build"
tags: build_srcs

- role: build_plugin
vars:
src_root: "/src"
env_vars: "{{
build_env.common | default({}, true) |
combine(build_env[arch] | default({}, true)) |
combine(build_env[accelerator] | default({}, true))
}}"
when: stage == "build_plugin"
tags: build_plugin

- role: configure_env
vars:
env_vars: "{{
Expand Down
23 changes: 23 additions & 0 deletions infra/ansible/roles/build_plugin/tasks/main.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
- name: Create /dist directory for exported wheels
ansible.builtin.file:
path: /dist
state: directory
mode: '0755'

- name: Build PyTorch/XLA CUDA Plugin
ansible.builtin.command:
cmd: pip wheel -w /dist plugins/cuda -v
chdir: "{{ (src_root, 'pytorch/xla') | path_join }}"
environment: "{{ env_vars }}"
when: accelerator == "cuda"

- name: Find plugin *.whl files in pytorch/xla/dist
ansible.builtin.find:
path: "/dist"
pattern: "torch_xla_cuda_plugin*.whl"
register: plugin_wheels

- name: Install plugin wheels
ansible.builtin.pip:
name: "{{ plugin_wheels.files | map(attribute='path') }}"
state: "forcereinstall"
7 changes: 0 additions & 7 deletions infra/ansible/roles/build_srcs/tasks/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,6 @@
chdir: "{{ (src_root, 'pytorch/xla') | path_join }}"
environment: "{{ env_vars }}"

- name: Build PyTorch/XLA CUDA Plugin
ansible.builtin.command:
cmd: pip wheel -w dist plugins/cuda -v
chdir: "{{ (src_root, 'pytorch/xla') | path_join }}"
environment: "{{ env_vars }}"
when: accelerator == "cuda"

- name: Find XLA *.whl files in pytorch/xla/dist
ansible.builtin.find:
path: "{{ (src_root, 'pytorch/xla/dist') | path_join }}"
Expand Down
4 changes: 2 additions & 2 deletions plugins/cuda/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
[build-system]
requires = ["setuptools"]
requires = ["setuptools", "numpy"]
build-backend = "setuptools.build_meta"

[project]
name = "torch_xla_cuda_plugin"
version = "0.0.1"
authors = [
{name = "PyTorch/XLA Dev Team", email = "pytorch-xla@googlegroups.com"},
]
description = "PyTorch/XLA CUDA Plugin"
requires-python = ">=3.8"
dynamic = ["version"]

[tool.setuptools.package-data]
torch_xla_cuda_plugin = ["lib/*.so"]
Expand Down
6 changes: 5 additions & 1 deletion plugins/cuda/setup.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import datetime
import os
import sys

Expand All @@ -10,4 +11,7 @@
build_util.bazel_build('@xla//xla/pjrt/c:pjrt_c_api_gpu_plugin.so',
'torch_xla_cuda_plugin/lib', ['--config=cuda'])

setuptools.setup()
setuptools.setup(
# TODO: Use a common version file
version=f'2.4.0.dev{datetime.date.today().strftime("%Y%m%d")}'
)

0 comments on commit a170ffe

Please sign in to comment.