.github/workflows/ci.yml

name: ci

permissions:
  contents: read
  pull-requests: read
  actions: read

env:
  VERSION: "${{ github.ref_name }}"

on:
  workflow_dispatch: { }
  push:
    tags:
      - "v*.*.*"
    branches:
      - main
    paths-ignore:
      - "docs/**"
      - "**.md"
      - "**.mdx"
      - "**.png"
      - "**.jpg"
  pull_request:
    branches:
      - main
    paths-ignore:
      - "docs/**"
      - "**.md"
      - "**.mdx"
      - "**.png"
      - "**.jpg"

concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
  cancel-in-progress: true

jobs:

  # cache-able building with ccache.
  darwin-metal:
    strategy:
      fail-fast: false
      matrix:
        arch: [ amd64, arm64 ]
        version: [ '3.0' ]
    # see https://github.com/actions/runner-images?tab=readme-ov-file#available-images,
    #     https://support.apple.com/en-us/102894.
    runs-on: ${{ matrix.arch == 'amd64' && 'macos-13' || 'macos-14' }}
    steps:
      - name: Checkout
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
          submodules: 'recursive'
      - name: Deps
        run: |
          brew update && brew install ccache
      - name: Setup XCode
        uses: maxim-lobanov/setup-xcode@v1
        with:
          xcode-version: '15.2'
      - name: Setup Cache
        timeout-minutes: 5
        uses: actions/cache@v3
        with:
          key: cache-darwin-metal-${{ matrix.arch }}-${{ matrix.version }}
          path: |
            ${{ github.workspace }}/.cache
      - name: Build
        # disable OpenMP,
        # see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691,
        #     https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216.
        env:
          CCACHE_DIR: "${{ github.workspace }}/.cache/ccache"
        run: |
          echo "===== BUILD ====="
          mkdir -p ${{ github.workspace }}/.cache
          cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \
            -DGGML_ACCELERATE=on -DGGML_METAL=on -DGGML_METAL_EMBED_LIBRARY=on \
            ${{ matrix.arch == 'amd64' && '-DGGML_NATIVE=off' || '-DGGML_NATIVE=on' }} \
            -DGGML_OPENMP=off
          cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc)
          
          echo "===== RESULT ====="
          if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then
            otool -L ${{ github.workspace }}/build/bin/llama-box
          else
            exit 1
          fi
          
          echo "===== PACKAGE ====="
          mkdir -p ${{ github.workspace }}/out
          zip -j ${{ github.workspace }}/out/llama-box-darwin-${{ matrix.arch }}-metal.zip ${{ github.workspace }}/build/bin/llama-box
      - name: Upload Artifact
        uses: actions/upload-artifact@v4
        with:
          path: ${{ github.workspace }}/out/*.zip
          name: llama-box-darwin-${{ matrix.arch }}-metal-${{ matrix.version }}

  # cache-able building with ccache.
  linux-hip:
    strategy:
      fail-fast: false
      matrix:
        arch: [ amd64 ]
        # see https://hub.docker.com/r/rocm/dev-ubuntu-22.04/tags.
        #     6.1 ==> 6.1.2
        #     5.7 ==> 5.7.1
        version: [ '6.1', '5.7' ]
    runs-on: ubuntu-22.04
    steps:
      - name: Maximize Space
        # see https://github.com/easimon/maximize-build-space/blob/master/action.yml.
        run: |
          sudo rm -rf /usr/share/dotnet
          sudo rm -rf /usr/local/lib/android
          sudo rm -rf /opt/ghc
          sudo rm -rf /opt/hostedtoolcache/CodeQL
          sudo docker image prune --all --force
      - name: Clone
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
          submodules: 'recursive'
      - name: Setup Cache
        timeout-minutes: 5
        uses: actions/cache@v3
        with:
          key: cache-linux-hip-${{ matrix.arch }}-${{ matrix.version }}
          path: |
            ${{ github.workspace }}/.cache
      - name: Setup QEMU
        if: ${{ matrix.arch == 'arm64' }}
        uses: docker/setup-qemu-action@v3
        with:
          image: tonistiigi/binfmt:qemu-v7.0.0
          platforms: "arm64"
      - name: Build
        # disable OpenMP,
        # see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691,
        #     https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216.
        # build fat binary,
        # see https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878,
        #     https://llvm.org/docs/AMDGPUUsage.html.
        # official gpu support list,
        # see https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.1.2/reference/system-requirements.html,
        #     https://rocm.docs.amd.com/en/docs-5.7.1/release/gpu_os_support.html.
        env:
          CCACHE_DIR: "${{ github.workspace }}/.cache/ccache"
          AMDGPU_TARGETS: "gfx803;gfx900;gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1010;gfx1030;gfx1100;gfx1101;gfx1102"
        run: |
          echo "===== SCRIPT ====="
          cat <<EOF > /tmp/entrypoint.sh
          #!/bin/bash
          apt-get update && apt-get install -y build-essential git cmake ccache
          git config --system --add safe.directory '*'
          mkdir -p ${{ github.workspace }}/.cache
          cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \
            -DGGML_HIPBLAS=on -DAMDGPU_TARGETS="${AMDGPU_TARGETS}" \
            ${{ matrix.arch == 'amd64' && '-DGGML_NATIVE=off' || '-DGGML_NATIVE=on' }} \
            -DGGML_OPENMP=off
          cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc)
          EOF
          chmod +x /tmp/entrypoint.sh
          cat /tmp/entrypoint.sh
          
          echo "===== BUILD ====="
          docker run \
            --rm \
            --privileged \
            --platform linux/${{ matrix.arch }} \
            --volume ${{ github.workspace }}:${{ github.workspace }} \
            --workdir ${{ github.workspace }} \
            --env CC=/opt/rocm/llvm/bin/clang \
            --env CXX=/opt/rocm/llvm/bin/clang++ \
            --env CCACHE_DIR \
            --env AMDGPU_TARGETS \
            --volume /tmp/entrypoint.sh:/entrypoint.sh \
            --entrypoint /entrypoint.sh \
            rocm/dev-ubuntu-22.04:${{ matrix.version == '6.1' && '6.1.2' || '5.7.1' }}-complete
          
          echo "===== RESULT ====="
          if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then
            ldd ${{ github.workspace }}/build/bin/llama-box
          else
            exit 1
          fi
          
          echo "===== PACKAGE ====="
          mkdir -p ${{ github.workspace }}/out
          zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-hip-${{ matrix.version }}.zip ${{ github.workspace }}/build/bin/*
      - name: Upload Artifact
        uses: actions/upload-artifact@v4
        with:
          path: ${{ github.workspace }}/out/*.zip
          name: llama-box-linux-${{ matrix.arch }}-hip-${{ matrix.version }}

  # cache-able building with ccache.
  linux-cuda:
    strategy:
      fail-fast: false
      matrix:
        arch: [ amd64 ]
        # see https://hub.docker.com/r/nvidia/cuda/tags?page=&page_size=&ordering=&name=devel.
        #     12.5 ==> 12.5.0
        #     11.7 ==> 11.7.1
        version: [ '12.5', '11.7' ]
    runs-on: ubuntu-22.04
    steps:
      - name: Maximize Space
        # see https://github.com/easimon/maximize-build-space/blob/master/action.yml.
        run: |
          sudo rm -rf /usr/share/dotnet
          sudo rm -rf /usr/local/lib/android
          sudo rm -rf /opt/ghc
          sudo rm -rf /opt/hostedtoolcache/CodeQL
          sudo docker image prune --all --force
      - name: Clone
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
          submodules: 'recursive'
      - name: Setup Cache
        timeout-minutes: 5
        uses: actions/cache@v3
        with:
          key: cache-linux-cuda-${{ matrix.arch }}-${{ matrix.version }}
          path: |
            ${{ github.workspace }}/.cache
      - name: Setup QEMU
        if: ${{ matrix.arch == 'arm64' }}
        uses: docker/setup-qemu-action@v3
        with:
          image: tonistiigi/binfmt:qemu-v7.0.0
          platforms: "arm64"
      - name: Build
        # disable OpenMP,
        # see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691,
        #     https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216.
        # build fat binary,
        # see https://developer.nvidia.com/cuda-gpus.
        env:
          CCACHE_DIR: "${{ github.workspace }}/.cache/ccache"
          CUDA_ARCHITECTURES: "52;61;70;75;80"
        run: |
          echo "===== SCRIPT ====="
          cat <<EOF > /tmp/entrypoint.sh
          #!/bin/bash
          apt-get update && apt-get install -y build-essential git cmake ccache
          git config --system --add safe.directory '*'
          mkdir -p ${{ github.workspace }}/.cache
          cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \
            -DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCHITECTURES}" \
            ${{ matrix.arch == 'amd64' && '-DGGML_NATIVE=off' || '-DGGML_NATIVE=on' }} \
            -DGGML_OPENMP=off
          cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc)
          EOF
          chmod +x /tmp/entrypoint.sh
          cat /tmp/entrypoint.sh
          
          echo "===== BUILD ====="
          docker run \
            --rm \
            --privileged \
            --platform linux/${{ matrix.arch }} \
            --volume ${{ github.workspace }}:${{ github.workspace }} \
            --workdir ${{ github.workspace }} \
            --env CCACHE_DIR \
            --env CUDA_ARCHITECTURES \
            --volume /tmp/entrypoint.sh:/entrypoint.sh \
            --entrypoint /entrypoint.sh \
            nvidia/cuda:${{ matrix.version == '12.5' && '12.5.0' || '11.7.1' }}-devel-ubuntu22.04
          
          echo "===== RESULT ====="
          if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then
            ldd ${{ github.workspace }}/build/bin/llama-box
          else
            exit 1
          fi
          
          echo "===== PACKAGE ====="
          mkdir -p ${{ github.workspace }}/out
          zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-cuda-${{ matrix.version }}.zip ${{ github.workspace }}/build/bin/*
      - name: Upload Artifact
        uses: actions/upload-artifact@v4
        with:
          path: ${{ github.workspace }}/out/*.zip
          name: llama-box-linux-${{ matrix.arch }}-cuda-${{ matrix.version }}

  # cache-able building with ccache.
  linux-oneapi:
    strategy:
      fail-fast: false
      matrix:
        arch: [ amd64 ]
        # see https://hub.docker.com/r/intel/oneapi-basekit/tags?page=&page_size=&ordering=&name=devel.
        #     2024.2 ==> 2024.2.0
        #     2024.1 ==> 2024.1.1
        version: [ '2024.2', '2024.1' ]
    runs-on: ubuntu-22.04
    steps:
      - name: Maximize Space
        # see https://github.com/easimon/maximize-build-space/blob/master/action.yml.
        run: |
          sudo rm -rf /usr/share/dotnet
          sudo rm -rf /usr/local/lib/android
          sudo rm -rf /opt/ghc
          sudo rm -rf /opt/hostedtoolcache/CodeQL
          sudo docker image prune --all --force
      - name: Clone
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
          submodules: 'recursive'
      - name: Setup Cache
        timeout-minutes: 5
        uses: actions/cache@v3
        with:
          key: cache-linux-oneapi-${{ matrix.arch }}-${{ matrix.version }}
          path: |
            ${{ github.workspace }}/.cache
      - name: Setup QEMU
        if: ${{ matrix.arch == 'arm64' }}
        uses: docker/setup-qemu-action@v3
        with:
          image: tonistiigi/binfmt:qemu-v7.0.0
          platforms: "arm64"
      - name: Build
        # disable OpenMP,
        # see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691,
        #     https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216.
        env:
          CCACHE_DIR: "${{ github.workspace }}/.cache/ccache"
        run: |
          echo "===== SCRIPT ====="
          cat <<EOF > /tmp/entrypoint.sh
          #!/bin/bash
          apt-get update && apt-get install -y build-essential git cmake ccache
          git config --system --add safe.directory '*'
          mkdir -p ${{ github.workspace }}/.cache
          cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \
            -DGGML_SYCL=on -DGGML_SYCL_F16=on \
            ${{ matrix.arch == 'amd64' && '-DGGML_NATIVE=off' || '-DGGML_NATIVE=on' }} \
            -DGGML_OPENMP=off
          cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc)
          EOF
          chmod +x /tmp/entrypoint.sh
          cat /tmp/entrypoint.sh

          echo "===== BUILD ====="
          docker run \
            --rm \
            --privileged \
            --platform linux/${{ matrix.arch }} \
            --volume ${{ github.workspace }}:${{ github.workspace }} \
            --workdir ${{ github.workspace }} \
            --env CC=icx \
            --env CXX=icpx \
            --env CCACHE_DIR \
            --volume /tmp/entrypoint.sh:/entrypoint.sh \
            --entrypoint /entrypoint.sh \
            intel/oneapi-basekit:${{ matrix.version == '2024.2' && '2024.2.0' || '2024.1.1' }}-devel-ubuntu22.04

          echo "===== RESULT ====="
          if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then
            ldd ${{ github.workspace }}/build/bin/llama-box
          else
            exit 1
          fi

          echo "===== PACKAGE ====="
          mkdir -p ${{ github.workspace }}/out
          zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-oneapi-${{ matrix.version }}.zip ${{ github.workspace }}/build/bin/*
      - name: Upload Artifact
        uses: actions/upload-artifact@v4
        with:
          path: ${{ github.workspace }}/out/*.zip
          name: llama-box-linux-${{ matrix.arch }}-oneapi-${{ matrix.version }}

  # cache-able building with ccache.
  windows-hip:
    strategy:
      fail-fast: false
      matrix:
        arch: [ amd64 ]
        # see https://www.amd.com/en/developer/resources/rocm-hub/hip-sdk.html.
        #     5.7 ==> 5.7.1
        #     5.5 ==> 5.5.1
        version: [ '5.7', '5.5' ]
    runs-on: windows-2022
    steps:
      - name: Clone
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
          submodules: 'recursive'
      - name: Deps
        run: |
          $ErrorActionPreference = "Stop"
          $ProgressPreference = 'SilentlyContinue'

          choco install ccache curl -y
      - name: Setup Cache
        timeout-minutes: 5
        uses: actions/cache@v3
        with:
          key: cache-windows-hip-${{ matrix.arch }}-${{ matrix.version }}
          path: |
            ${{ github.workspace }}\.cache
      - name: Setup HIP
        run: |
          $ErrorActionPreference = "Stop"
          $ProgressPreference = 'SilentlyContinue'
          
          Write-Host "I install AMD ROCm HIP SDK"
          curl.exe --retry 5 --retry-delay 5 `
            --output "${{ runner.temp }}\installer.exe" `
            --url "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-${{ matrix.version == '5.7' && '23.Q4' || '23.Q3' }}-WinSvr2022-For-HIP.exe"
          Start-Process "${{ runner.temp }}\installer.exe" -NoNewWindow -Wait `
            -ArgumentList '-install'

          Write-Host "I verify AMD ROCm HIP SDK"
          & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
          
          $hipPath = "$(Resolve-Path -Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | Split-Path | Split-Path)"
          "HIP_PATH=${hipPath}" | Out-File -FilePath $env:GITHUB_ENV -Append
      - name: Build
        # disable OpenMP,
        # see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691,
        #     https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216.
        # build fat binary,
        # see https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878,
        #     https://llvm.org/docs/AMDGPUUsage.html.
        # official gpu support list,
        # see https://rocm.docs.amd.com/en/docs-5.7.1/release/windows_support.html,
        #     https://rocm.docs.amd.com/en/docs-5.5.1/release/windows_support.html.
        env:
          CCACHE_DIR: "${{ github.workspace }}\\.cache\\ccache"
          AMDGPU_TARGETS: "${{ matrix.version == '5.7' && 'gfx803;gfx900;gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1010;gfx1030;gfx1100;gfx1101;gfx1102' || 'gfx803;gfx900;gfx906;gfx908;gfx90a;gfx940;gfx1010;gfx1030;gfx1100;gfx1101;gfx1102' }}"
        run: |
          Write-Host "===== BUILD ====="
          Write-Host "HIP_PATH=${env:HIP_PATH}"
          New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\.cache" -ErrorAction Ignore | Out-Null
          $env:CMAKE_PREFIX_PATH = "${env:HIP_PATH}"
          cmake -G "Unix Makefiles" -S ${{ github.workspace }} -B ${{ github.workspace }}\build -DCMAKE_BUILD_TYPE=Release `
            -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
            -DGGML_HIPBLAS=on -DAMDGPU_TARGETS="${env:AMDGPU_TARGETS}" `
            ${{ matrix.arch == 'amd64' && '-DGGML_NATIVE=off' || '-DGGML_NATIVE=on' }} `
            -DGGML_OPENMP=off
          cmake --build ${{ github.workspace }}\build --target llama-box --config Release -- -j ${env:NUMBER_OF_PROCESSORS}

          Write-Host "===== RESULT ====="
          if (Test-Path -Path "${{ github.workspace }}\build\bin\llama-box.exe") {
            llvm-objdump.exe -p "${{ github.workspace }}\build\bin\llama-box.exe"
          } else {
            exit 1
          }
          
          Write-Host "===== PACKAGE ====="
          New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\out" -ErrorAction Ignore | Out-Null
          Compress-Archive -Path "${{ github.workspace }}\build\bin\*" -DestinationPath "${{ github.workspace }}\out\llama-box-windows-${{ matrix.arch }}-hip-${{ matrix.version }}.zip"
      - name: Upload Artifact
        uses: actions/upload-artifact@v4
        with:
          path: ${{ github.workspace }}\\out\\*.zip
          name: llama-box-windows-${{ matrix.arch }}-hip-${{ matrix.version }}

  # uncache-able building,
  # see https://stackoverflow.com/questions/72829476/how-to-use-ccache-4-6-1-on-windows-msvc-with-cmake.
  windows-cuda:
    strategy:
      fail-fast: false
      matrix:
        arch: [ amd64 ]
        # see https://developer.nvidia.com/cuda-downloads?target_os=Windows&target_arch=x86_64&target_version=Server2022&target_type=exe_network.
        #     12.5 ==> 12.5.0
        #     11.7 ==> 11.7.1
        version: [ '12.5', '11.7' ]
    # see https://github.com/actions/runner-images?tab=readme-ov-file#available-images,
    #     https://forums.developer.nvidia.com/t/problems-with-latest-vs2022-update/294150.
    runs-on: ${{ matrix.version == '12.5' && 'windows-2022' || 'windows-2019' }}
    steps:
      - name: Clone
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
          submodules: 'recursive'
      - name: Setup CUDA
        # ensure MSBuildExtensions has been configured,
        # see https://github.com/NVlabs/tiny-cuda-nn/issues/164#issuecomment-1280749170.
        uses: Jimver/cuda-toolkit@v0.2.16
        with:
          cuda: ${{ matrix.version == '12.5' && '12.5.0' || '11.7.1' }}
          method: 'network'
          sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "thrust", "visual_studio_integration"]'
          use-github-cache: false
          use-local-cache: false
      - name: Build
        # disable OpenMP,
        # see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691,
        #     https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216.
        # build fat binary,
        # see https://developer.nvidia.com/cuda-gpus.
        env:
          CUDA_ARCHITECTURES: "52;61;70;75;80"
        run: |
          $ErrorActionPreference = "Stop"
          $ProgressPreference = 'SilentlyContinue'
          
          Write-Host "===== BUILD ====="
          Write-Host "CUDA_PATH=${env:CUDA_PATH}"
          cmake -S ${{ github.workspace }} -B ${{ github.workspace }}\build -DCMAKE_BUILD_TYPE=Release `
              -DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES="${env:CUDA_ARCHITECTURES}" `
              ${{ matrix.arch == 'amd64' && '-DGGML_NATIVE=off' || '-DGGML_NATIVE=on' }} `
              -DGGML_OPENMP=off
          cmake --build ${{ github.workspace }}\build --target llama-box --config Release -- /m:${env:NUMBER_OF_PROCESSORS}
  
          Write-Host "===== RESULT ====="
          if (Test-Path -Path "${{ github.workspace }}\build\bin\Release\llama-box.exe") {
            llvm-objdump.exe -p "${{ github.workspace }}\build\bin\Release\llama-box.exe"
          } else {
              exit 1
          }
          
          Write-Host "===== PACKAGE ====="
          New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\out" -ErrorAction Ignore | Out-Null
          Compress-Archive -Path "${{ github.workspace }}\build\bin\Release\*" -DestinationPath "${{ github.workspace }}\out\llama-box-windows-${{ matrix.arch }}-cuda-${{ matrix.version }}.zip"
      - name: Upload Artifact
        uses: actions/upload-artifact@v4
        with:
          path: ${{ github.workspace }}\\out\\*.zip
          name: llama-box-windows-${{ matrix.arch }}-cuda-${{ matrix.version }}

  # uncache-able building,
  # as the oneAPI need to configure the environment variables via setvars.bat.
  windows-oneapi:
    strategy:
      fail-fast: false
      matrix:
        arch: [ amd64 ]
        # see https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit-download.html?operatingsystem=windows&windows-install-type=offline.
        #     2024.2 ==> 2024.2.0
        #     2024.1 ==> 2024.1.1
        version: [ '2024.2', '2024.1' ]
    runs-on: windows-2022
    steps:
      - name: Clone
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
          submodules: 'recursive'
      - name: Deps
        run: |
          $ErrorActionPreference = "Stop"
          $ProgressPreference = 'SilentlyContinue'

          choco install curl ninja -y
      - name: Setup oneAPI
        run: |
          $ErrorActionPreference = "Stop"
          $ProgressPreference = 'SilentlyContinue'
          
          Write-Host "I install Intel oneAPI SDK"
          curl.exe --retry 5 --retry-delay 5 `
            --output "${{ runner.temp }}\installer.exe" `
            --url "https://registrationcenter-download.intel.com/akdlm/IRC_NAS/${{ matrix.version == '2024.2' && 'e83a8e64-04fc-45df-85c6-c2208d03bdb5/w_BaseKit_p_2024.2.0.635' || '7dff44ba-e3af-4448-841c-0d616c8da6e7/w_BaseKit_p_2024.1.0.595' }}.exe"
          Start-Process "${{ runner.temp }}\installer.exe" -NoNewWindow -Wait `
            -ArgumentList '-s','--action=install','--components=intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel','--eula=accept','-p=NEED_VS2017_INTEGRATION=0','-p=NEED_VS2019_INTEGRATION=0','-p=NEED_VS2022_INTEGRATION=0'
          
          Write-Host "I verify Intel oneAPI SDK"
          & 'C:\Program Files (x86)\Intel\oneAPI\*\bin\icx.exe' --version
          
          $oneapiPath = "$(Resolve-Path -Path 'C:\Program Files (x86)\Intel\oneAPI\*\bin\icx.exe' | Split-Path | Split-Path)"
          "ONEAPI_PATH=${oneapiPath}" | Out-File -FilePath $env:GITHUB_ENV -Append
          $oneapiRoot = "$(Split-Path -Path $oneapiPath)"
          "ONEAPI_ROOT=${oneapiRoot}" | Out-File -FilePath $env:GITHUB_ENV -Append
      - name: Build
        # disable OpenMP,
        # see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691,
        #     https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216.
        run: |
          Write-Host "===== BUILD ====="
          Write-Host "ONEAPI_PATH=${env:ONEAPI_PATH}"
          Write-Host "ONEAPI_ROOT=${env:ONEAPI_ROOT}"
          & "${{ github.workspace }}\llama-box\scripts\build-windows-oneapi.bat" "${{ github.workspace }}" "${{ matrix.arch }}"
          
          Write-Host "===== RESULT ====="
          if (Test-Path -Path "${{ github.workspace }}\build\bin\llama-box.exe") {
            llvm-objdump.exe -p "${{ github.workspace }}\build\bin\llama-box.exe"
          } else {
            exit 1
          }
          
          Write-Host "===== PACKAGE ====="
          New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\out" -ErrorAction Ignore | Out-Null
          Compress-Archive -Path "${{ github.workspace }}\build\bin\*" -DestinationPath "${{ github.workspace }}\out\llama-box-windows-${{ matrix.arch }}-oneapi-${{ matrix.version }}.zip"
      - name: Upload Artifact
        uses: actions/upload-artifact@v4
        with:
          path: ${{ github.workspace }}\\out\\*.zip
          name: llama-box-windows-${{ matrix.arch }}-oneapi-${{ matrix.version }}

  release:
    if: ${{ startsWith(github.ref, 'refs/tags/') }}
    permissions:
      contents: write
      actions: read
      id-token: write
    runs-on: ubuntu-22.04
    needs:
      - darwin-metal
      - linux-hip
      - linux-cuda
      - linux-oneapi
      - windows-hip
      - windows-cuda
      - windows-oneapi
    steps:
      - name: Download Artifact
        uses: actions/download-artifact@v4
        with:
          path: ${{ github.workspace }}/out
          merge-multiple: true
      - name: Release
        uses: softprops/action-gh-release@v1
        with:
          fail_on_unmatched_files: true
          tag_name: "${{ env.VERSION }}"
          prerelease: ${{ contains(github.ref, 'rc') }}
          files: ${{ github.workspace }}/out/*