-
Notifications
You must be signed in to change notification settings - Fork 31
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
bab180e
commit db15847
Showing
3 changed files
with
156 additions
and
213 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,115 +1,176 @@ | ||
name: Build GPTQModel Wheels with CUDA for Linux | ||
name: Release | ||
|
||
on: workflow_dispatch | ||
defaults: | ||
run: | ||
shell: bash -le {0} | ||
on: | ||
schedule: | ||
- cron: '0 20 * * *' | ||
repository_dispatch: | ||
workflow_dispatch: | ||
|
||
env: | ||
CUDA_DEVICE_ORDER: PCI_BUS_ID | ||
AMD_SERVER: 10.0.13.31 | ||
INTEL_SERVER: 10.0.23.35 | ||
|
||
concurrency: | ||
group: ${{ github.ref }}-workflow | ||
cancel-in-progress: true | ||
|
||
jobs: | ||
build_wheels: | ||
if: ${{ github.repository_owner == 'ModelCloud' }} | ||
name: Build wheels for ${{ matrix.os }} and Python ${{ matrix.python }} and CUDA ${{ matrix.cuda }} | ||
runs-on: ${{ matrix.os }} | ||
check-vm: | ||
runs-on: self-hosted | ||
container: | ||
image: modelcloud/gptqmodel:alpine-ci-v1 | ||
outputs: | ||
ip: ${{ steps.get_ip.outputs.ip }} | ||
tag: ${{ steps.get_ip.outputs.tag }} | ||
steps: | ||
- name: Select server | ||
id: get_ip | ||
run: | | ||
if [[ "${{ runner.name }}" == *"intel"* ]]; then | ||
echo "current ci is intel" | ||
response=0 | ||
else | ||
echo "test intel vm status" | ||
response=$(curl --silent --fail --max-time 5 http://$INTEL_SERVER/gpu/runner/status/intel) || response=error | ||
if [ "$response" == "error" ]; then | ||
echo "test amd vm status" | ||
response=$(curl --silent --fail --max-time 5 http://${AMD_SERVER}/gpu/runner/status/intel) || response=error | ||
fi | ||
fi | ||
echo "response: $response" | ||
if [ "$response" == "0" ]; then | ||
tag="intel" | ||
elif [ "$response" == "-1" ]; then | ||
tag="amd" | ||
else | ||
echo "Error: Unexpected result - $response" | ||
exit 1 | ||
fi | ||
echo "Runner tag: $tag" | ||
response=$(curl -s --head --fail --max-time 5 http://${INTEL_SERVER}/gpu/status) || response=error | ||
if echo "$response" | grep "200 OK" > /dev/null; then | ||
echo "Intel server is online. set ip to $ip" | ||
ip=${INTEL_SERVER} | ||
else | ||
response=$(curl -s --head --max-time 5 http://${AMD_SERVER}/gpu/status) || response=error | ||
if echo "$response" | grep "200 OK" > /dev/null; then | ||
ip=${AMD_SERVER} | ||
echo "AMD server is online. set ip to $ip" | ||
else | ||
echo "AMD server is offline." | ||
exit 1 | ||
fi | ||
fi | ||
echo "ip=$ip" >> "$GITHUB_OUTPUT" | ||
echo "tag=$tag" >> "$GITHUB_OUTPUT" | ||
echo "tag: $tag, ip: $ip" | ||
build: | ||
strategy: | ||
fail-fast: false | ||
matrix: | ||
os: [ubuntu-22.04] | ||
pyver: ["3.8", "3.9", "3.10", "3.11", "3.12"] | ||
cuda: ["11.8"] # wheel for 12.1 are built in build_wheels_pypi.yml | ||
defaults: | ||
run: | ||
shell: bash | ||
env: | ||
CUDA_VERSION: ${{ matrix.cuda }} | ||
|
||
cuda: [ "11.8", "12.1", "12.4" ] | ||
torch: [ "2.0", "2.1", "2.2", "2.3", "2.4" ] | ||
python: [ "3.9", "3.10", "3.11" ] # Python 3.12 is unsupported now. https://github.com/intel/intel-extension-for-pytorch/issues/525 | ||
exclude: | ||
- cuda: "12.4" | ||
torch: "2.1" | ||
- cuda: "12.4" | ||
torch: "2.2" | ||
- cuda: "12.4" | ||
torch: "2.3" | ||
- torch: "2.4" | ||
cuda: "11.8" | ||
- torch: "2.4" | ||
cuda: "12.1" | ||
- torch: "2.0" | ||
python: "3.12" | ||
- torch: "2.1" | ||
python: "3.12" | ||
- torch: "2.0" | ||
cuda: "12.1" | ||
- torch: "2.0" | ||
cuda: "12.4" | ||
max-parallel: 4 | ||
runs-on: [ self-hosted ] | ||
needs: check-vm | ||
container: | ||
image: ${{ needs.check-vm.outputs.ip }}:5000/modelcloud/gptqmodel:compiler_cuda${{ matrix.cuda }}-torch${{ matrix.torch }}-python${{ matrix.python }} | ||
steps: | ||
- uses: actions/checkout@v4 | ||
|
||
- name: Free disk space | ||
- name: Print Env | ||
run: | | ||
# Go from 19G to 54G free disk space in 3min | ||
df -h | ||
sudo apt-get update | ||
sudo apt-get purge -y '^apache.*' | ||
sudo apt-get purge -y '^imagemagick.*' | ||
sudo apt-get purge -y '^dotnet.*' | ||
sudo apt-get purge -y '^aspnetcore.*' | ||
sudo apt-get purge -y 'php.*' | ||
sudo apt-get purge -y '^temurin.*' | ||
sudo apt-get purge -y '^mysql.*' | ||
sudo apt-get purge -y '^java.*' | ||
sudo apt-get purge -y '^openjdk.*' | ||
sudo apt-get purge -y microsoft-edge-stable google-cloud-cli azure-cli google-chrome-stable firefox powershell mono-devel | ||
df -h | ||
sudo apt-get autoremove -y >/dev/null 2>&1 | ||
sudo apt-get clean | ||
df -h | ||
echo "https://github.com/actions/virtual-environments/issues/709" | ||
sudo rm -rf "$AGENT_TOOLSDIRECTORY" | ||
df -h | ||
echo "remove big /usr/local" | ||
sudo rm -rf "/usr/local/share/boost" | ||
sudo rm -rf /usr/local/lib/android >/dev/null 2>&1 | ||
df -h | ||
echo "remove /usr/share leftovers" | ||
sudo rm -rf /usr/share/dotnet/sdk > /dev/null 2>&1 | ||
sudo rm -rf /usr/share/dotnet/shared > /dev/null 2>&1 | ||
sudo rm -rf /usr/share/swift > /dev/null 2>&1 | ||
df -h | ||
echo "remove other leftovers" | ||
sudo rm -rf /var/lib/mysql > /dev/null 2>&1 | ||
sudo rm -rf /home/runner/.dotnet > /dev/null 2>&1 | ||
sudo rm -rf /home/runneradmin/.dotnet > /dev/null 2>&1 | ||
sudo rm -rf /etc/skel/.dotnet > /dev/null 2>&1 | ||
sudo rm -rf /usr/local/.ghcup > /dev/null 2>&1 | ||
sudo rm -rf /usr/local/aws-cli > /dev/null 2>&1 | ||
sudo rm -rf /usr/local/lib/node_modules > /dev/null 2>&1 | ||
sudo rm -rf /usr/lib/heroku > /dev/null 2>&1 | ||
sudo rm -rf /usr/local/share/chromium > /dev/null 2>&1 | ||
df -h | ||
- uses: actions/setup-python@v5 | ||
with: | ||
python-version: ${{ matrix.pyver }} | ||
export PYENV_ROOT=/opt/pyenv && export PATH=$PYENV_ROOT/bin:$PATH && eval "$(pyenv init -)" && eval "$(pyenv init --path)" | ||
env_name="torch${{ matrix.torch }}_py${{ matrix.python }}" | ||
echo "env: $env_name" | ||
pyenv local ${{ matrix.python }} && pyenv activate $env_name && pyenv versions | ||
- name: Setup Miniconda | ||
uses: conda-incubator/setup-miniconda@v3.0.4 | ||
echo "=========" | ||
python --version | ||
echo "=========" | ||
nvcc --version | ||
echo "=========" | ||
pip show torch | ||
- name: Checkout Codes | ||
uses: actions/checkout@v4 | ||
with: | ||
activate-environment: "build" | ||
python-version: ${{ matrix.pyver }} | ||
mamba-version: "*" | ||
use-mamba: false | ||
channels: conda-forge,defaults | ||
channel-priority: true | ||
add-pip-as-python-dependency: true | ||
auto-activate-base: false | ||
|
||
- name: Install Dependencies | ||
repository: ${{ github.event.inputs.repo }} | ||
ref: ${{ github.event.inputs.ref }} | ||
|
||
- name: Install requirements | ||
run: | | ||
conda install cuda-toolkit -c "nvidia/label/cuda-${CUDA_VERSION}.0" | ||
export PYENV_ROOT=/opt/pyenv && export PATH=$PYENV_ROOT/bin:$PATH && eval "$(pyenv init -)" && eval "$(pyenv init --path)" | ||
# Refer to https://pytorch.org/get-started/locally/ | ||
python -m pip install torch --index-url https://download.pytorch.org/whl/cu118 | ||
python -m pip install --upgrade build setuptools wheel ninja numpy gekko pandas | ||
env_name="torch${{ matrix.torch }}_py${{ matrix.python }}" | ||
echo "env: $env_name" | ||
- name: Check install | ||
run: | | ||
python -c "import torch; print('torch version:', torch.__version__)" | ||
pyenv local ${{ matrix.python }} && pyenv activate $env_name && pyenv versions | ||
- name: Build Wheel | ||
pip install cmake | ||
pip install -r requirements.txt -i http://${{ needs.check-vm.outputs.ip }}/simple/ --trusted-host ${{ needs.check-vm.outputs.ip }} | ||
- name: Compile | ||
run: | | ||
# For some reason $CONDA_PREFIX is empty. | ||
export CUDA_HOME=/usr/share/miniconda | ||
export CUDA_PATH=/usr/share/miniconda | ||
export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${CONDA_PREFIX}/lib" | ||
export PYENV_ROOT=/opt/pyenv && export PATH=$PYENV_ROOT/bin:$PATH && eval "$(pyenv init -)" && eval "$(pyenv init --path)" | ||
env_name="torch${{ matrix.torch }}_py${{ matrix.python }}" | ||
echo "env: $env_name" | ||
export TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX" | ||
pyenv local ${{ matrix.python }} && pyenv activate $env_name && pyenv versions | ||
python setup.py bdist_wheel | ||
- name: List build dir | ||
run: | | ||
export PYENV_ROOT=/opt/pyenv && export PATH=$PYENV_ROOT/bin:$PATH && eval "$(pyenv init -)" && eval "$(pyenv init --path)" | ||
echo "CUDA_PATH:" | ||
echo $CUDA_PATH | ||
env_name="torch${{ matrix.torch }}_py${{ matrix.python }}" | ||
echo "env: $env_name" | ||
echo "PYPI_RELEASE:" | ||
echo $PYPI_RELEASE | ||
pyenv local ${{ matrix.python }} && pyenv activate $env_name && pyenv versions | ||
python setup.py sdist bdist_wheel | ||
cd dist | ||
ls -alh . | ||
whl=$(ls -t *.whl | head -n 1) | ||
twine check $whl | ||
echo "WHL_NAME=$whl" >> $GITHUB_ENV | ||
- uses: actions/upload-artifact@v4 | ||
- name: Upload artifact | ||
uses: actions/upload-artifact@v4 | ||
with: | ||
name: 'linux-cuda-wheels' | ||
path: ./dist/*.whl | ||
name: ${{ env.WHL_NAME }} | ||
path: dist/${{ env.WHL_NAME }} |
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.