Skip to content

Commit

Permalink
[ci] build python wheels (#183)
Browse files Browse the repository at this point in the history
  • Loading branch information
guocuimi authored May 13, 2024
1 parent eaf6208 commit 6f1f1b6
Show file tree
Hide file tree
Showing 10 changed files with 182 additions and 32 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@ name: build and test
on:
push:
paths-ignore:
- ".github/docker.yml"
- "docs/**"
- "tools/**"
- "gateway/**"
- "scripts/**"
- "python/**"
- "docker/**"
- "**/*.md"
- "**/*.txt"
- "**/*.sh"
Expand All @@ -19,12 +19,12 @@ on:

pull_request:
paths-ignore:
- ".github/docker.yml"
- "docs/**"
- "tools/**"
- "gateway/**"
- "scripts/**"
- "python/**"
- "docker/**"
- "**/*.md"
- "**/*.txt"
- "**/*.sh"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
name: Publish docker base image
name: Publish base docker image
on:
workflow_dispatch:

jobs:
publish_base:
runs-on: [self-hosted, linux, build]
runs-on: [self-hosted, linux, release]
steps:
- name: Checkout repository
uses: actions/checkout@v4
Expand Down
39 changes: 39 additions & 0 deletions .github/workflows/publish_devel_image.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
name: Publish devel docker image
on:
workflow_dispatch:

jobs:
publish_base:
runs-on: [self-hosted, linux, release]
steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Set up QEMU
uses: docker/setup-qemu-action@v3

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_HUB_USER }}
password: ${{ secrets.DOCKER_HUB_TOKEN }}

- name: Build devel image for cuda 12.1
uses: docker/build-push-action@v5
with:
context: ./docker
file: ./docker/Dockerfile.devel
push: true
build-args: |
UBUNTU_VERSION=22.04
CUDA_VERSION=12.1
GCC_VERSION=12
CMAKE_VERSION=3.18.5
NINJA_VERSION=1.9.0
tags: |
vectorchai/scalellm_devel:cuda12.1-ubuntu22.04
vectorchai/scalellm_devel:latest
2 changes: 1 addition & 1 deletion .github/workflows/release_cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ on:

jobs:
publish_scalellm:
runs-on: [self-hosted, linux, x64, release]
runs-on: [self-hosted, linux, release]
steps:
- uses: olegtarasov/get-tag@v2.1
id: tagName
Expand Down
55 changes: 55 additions & 0 deletions .github/workflows/release_wheel.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
name: Release wheel
# Build & Push scalellm docker image on creation of tags to https://hub.docker.com/r/vectorchai/scalellm
# Push events to matching v*, i.e. v1.0.0, v1.0.0-rc1, v20.15.10-rc5, etc.
# on:
# push:
# tags:
# - v[0-9]+.[0-9]+.[0-9]+*
on:
workflow_dispatch:
inputs:
tag:
description: 'Tag to build and push'
required: true
type: string
env:
# Tells where to store caches.
CI_CACHE_DIR: ${{ github.workspace }}/../../ci_cache

jobs:
build_wheel:
strategy:
matrix:
python: ["3.9"]
cuda: ["12.1"]
torch: ["2.3"]
runs-on: [self-hosted, linux, release]
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
submodules: recursive

- name: Build wheel
run: |
docker run --rm -t \
-v "$CI_CACHE_DIR":/ci_cache \
-v "$GITHUB_WORKSPACE":/ScaleLLM \
-e PYTHON_VERSION=${{ matrix.python }} \
-e CUDA_VERSION=${{ matrix.cuda }} \
-e TORCH_VERSION=${{ matrix.torch }} \
-e VCPKG_DEFAULT_BINARY_CACHE=/ci_cache/.vcpkg/bincache \
-e CCACHE_DIR=/ci_cache/.ccache \
vectorchai/scalellm_builder:cuda${{ matrix.cuda }}-ubuntu22.04 \
bash /ScaleLLM/scripts/build_wheel.sh
timeout-minutes: 60

- name: show wheel size
run: du -h python/dist/*

# - uses: actions/upload-artifact@v4
# with:
# name: wheel-cuda${{ matrix.cuda }}-torch${{ matrix.torch }}-python${{ matrix.python }}
# path: python/dist/*


1 change: 1 addition & 0 deletions docker/Dockerfile.base
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ ARG UBUNTU_VERSION=22.04

FROM ubuntu:${UBUNTU_VERSION}

LABEL maintainer="mi@vectorch.com"
ENV DEBIAN_FRONTEND noninteractive

# Install common dependencies
Expand Down
68 changes: 41 additions & 27 deletions Dockerfile.devel → docker/Dockerfile.devel
Original file line number Diff line number Diff line change
@@ -1,32 +1,46 @@
# ---- Build ----
FROM nvcr.io/nvidia/cuda:12.1.0-devel-ubuntu22.04 as build
ARG UBUNTU_VERSION=22.04

ARG UID=1000
ARG GID=1000
FROM ubuntu:${UBUNTU_VERSION}

LABEL maintainer="mi@vectorch.com"
ENV DEBIAN_FRONTEND noninteractive

# Install common dependencies
COPY ./common/install_base.sh install_base.sh
RUN bash ./install_base.sh && rm install_base.sh

# Install multiple python versions
COPY ./common/install_python.sh install_python.sh
RUN bash ./install_python.sh "3.9.0"
RUN bash ./install_python.sh "3.10.1"
RUN bash ./install_python.sh "3.11.0"
RUN bash ./install_python.sh "3.12.0"
RUN rm install_python.sh

# Install cuda, cudnn and nccl
ARG CUDA_VERSION=12.1
RUN wget -q https://raw.githubusercontent.com/pytorch/builder/main/common/install_cuda.sh -O install_cuda.sh
RUN bash ./install_cuda.sh ${CUDA_VERSION} && rm install_cuda.sh
ENV DESIRED_CUDA ${CUDA_VERSION}
ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:$PATH

# install build tools
RUN apt-get update -q -y && \
apt-get install -q -y \
build-essential \
ninja-build \
cmake \
ccache \
python3-dev \
python3-pip \
zip \
pkg-config \
libssl-dev \
libboost-all-dev \
curl \
git \
wget \
gcc-12 \
g++-12
# Install gcc
ARG GCC_VERSION=12
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
software-properties-common gpg-agent
COPY ./common/install_gcc.sh install_gcc.sh
RUN bash ./install_gcc.sh && rm install_gcc.sh

# build with gcc-12
RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12
ARG CMAKE_VERSION=3.18.5
COPY ./common/install_cmake.sh install_cmake.sh
RUN if [ -n "${CMAKE_VERSION}" ]; then bash ./install_cmake.sh; fi
RUN rm install_cmake.sh

ARG NINJA_VERSION=1.9.0
COPY ./common/install_ninja.sh install_ninja.sh
RUN if [ -n "${NINJA_VERSION}" ]; then bash ./install_ninja.sh; fi
RUN rm install_ninja.sh

# install jemalloc (optional)
RUN cd /tmp && \
Expand All @@ -42,6 +56,8 @@ ADD https://developer.nvidia.com/downloads/assets/tools/secure/nsight-systems/20
RUN apt-get install -y ./nsight-systems-2024.2.1_2024.2.1.106-1_amd64.deb

# install rust
ARG UID=1000
ARG GID=1000
ENV RUSTUP_HOME=/usr/local/rustup
ENV CARGO_HOME=/usr/local/cargo
ENV PATH=/usr/local/cargo/bin:$PATH
Expand All @@ -65,6 +81,4 @@ RUN echo "\
. \${HOME}/miniconda3/etc/profile.d/conda.sh\n\
conda activate base\n" >> ${HOME}/.bashrc

CMD ["/bin/bash"]


CMD ["bash"]
1 change: 1 addition & 0 deletions docker/common/install_base.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ install_ubuntu() {
${deploy_deps} \
build-essential \
ccache \
cmake \
zip \
pkg-config \
libssl-dev \
Expand Down
Empty file added python/examples/__init__.py
Empty file.
40 changes: 40 additions & 0 deletions scripts/build_wheel.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/bin/bash
set -e

ensure_env() {
local var_name="$1"
if [ -z "${!var_name}" ]; then
echo "Error: Environment variable '$var_name' is not set."
exit 1
fi
}

ensure_env PYTHON_VERSION
ensure_env TORCH_VERSION
ensure_env CUDA_VERSION
# ensure_env BUILD_VERSION

PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)"

export HOME=/tmp/home
mkdir -p $HOME
export PATH="$HOME/.local/bin:$PATH"
CUDA_MAJOR="${CUDA_VERSION%.*}"
CUDA_MINOR="${CUDA_VERSION#*.}"
TORCH_MAJOR="${TORCH_VERSION%.*}"
TORCH_MINOR="${TORCH_VERSION#*.}"

# choose the right python version
PYVER="${PYTHON_VERSION//./}"
export PATH="/opt/python/cp${PYVER}-cp${PYVER}/bin:$PATH"


echo "::group::Install PyTorch"
pip install torch==$TORCH_VERSION --index-url "https://download.pytorch.org/whl/cu${CUDA_MAJOR}${CUDA_MINOR}"
echo "::endgroup::"


echo "::group::Build wheel for ScaleLLM"
cd "$PROJECT_ROOT/python"
python setup.py bdist_wheel
echo "::endgroup::"

0 comments on commit 6f1f1b6

Please sign in to comment.