diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml index c95608c73b8b..37e599d7f69d 100644 --- a/.github/workflows/image-pr.yml +++ b/.github/workflows/image-pr.yml @@ -75,6 +75,13 @@ jobs: ffmpeg: 'true' image-type: 'core' runs-on: 'ubuntu-latest' + - build-type: 'sycl_f16' + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: 'sycl-f16-ffmpeg-core' + ffmpeg: 'true' + image-type: 'core' + runs-on: 'ubuntu-latest' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "1" diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index ad13ce0599fc..8595f0fd1c72 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -122,6 +122,34 @@ jobs: ffmpeg: 'true' image-type: 'core' runs-on: 'ubuntu-latest' + - build-type: 'sycl_f16' + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: 'sycl-f16-core' + ffmpeg: 'false' + image-type: 'core' + runs-on: 'ubuntu-latest' + - build-type: 'sycl_f32' + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: 'sycl-f32-core' + ffmpeg: 'false' + image-type: 'core' + runs-on: 'ubuntu-latest' + - build-type: 'sycl_f16' + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: 'sycl-f16-ffmpeg-core' + ffmpeg: 'true' + image-type: 'core' + runs-on: 'ubuntu-latest' + - build-type: 'sycl_f32' + platforms: 'linux/amd64' + tag-latest: 'false' + tag-suffix: 'sycl-f32-ffmpeg-core' + ffmpeg: 'true' + image-type: 'core' + runs-on: 'ubuntu-latest' - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" diff --git a/Dockerfile b/Dockerfile index ab63d442b2b9..03e39a30cad4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,6 @@ ARG GO_VERSION=1.21-bullseye ARG IMAGE_TYPE=extras # extras or core - FROM golang:$GO_VERSION as requirements-core ARG BUILD_TYPE @@ -38,6 +37,11 @@ RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \ apt-get update && \ apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && apt-get clean \ ; fi +# oneapi requirements +RUN if [ "${BUILD_TYPE}" = "sycl_f16" ] || [ "${BUILD_TYPE}" = "sycl_f32" ]; then \ + wget https://registrationcenter-download.intel.com/akdlm/IRC_NAS/163da6e4-56eb-4948-aba3-debcec61c064/l_BaseKit_p_2024.0.1.46_offline.sh && \ + sh ./l_BaseKit_p_2024.0.1.46_offline.sh \ + ; fi ENV PATH /usr/local/cuda/bin:${PATH} # OpenBLAS requirements and stable diffusion diff --git a/Makefile b/Makefile index 89c54985ba41..8b734bebd31e 100644 --- a/Makefile +++ b/Makefile @@ -111,6 +111,14 @@ ifeq ($(BUILD_TYPE),hipblas) CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link endif +ifeq ($(BUILD_TYPE),sycl_f16) + CMAKE_ARGS+=-DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON +endif + +ifeq ($(BUILD_TYPE),sycl_f32) + CMAKE_ARGS+=-DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx +endif + ifeq ($(BUILD_TYPE),metal) CGO_LDFLAGS+=-framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders export LLAMA_METAL=1 diff --git a/backend/cpp/llama/Makefile b/backend/cpp/llama/Makefile index a64ee1b4cfbc..e6a189436158 100644 --- a/backend/cpp/llama/Makefile +++ b/backend/cpp/llama/Makefile @@ -3,6 +3,7 @@ LLAMA_VERSION?= CMAKE_ARGS?= BUILD_TYPE?= +ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh # If build type is cublas, then we set -DLLAMA_CUBLAS=ON to CMAKE_ARGS automatically ifeq ($(BUILD_TYPE),cublas) @@ -49,5 +50,10 @@ clean: rm -rf grpc-server grpc-server: llama.cpp llama.cpp/examples/grpc-server +ifneq (,$(findstring sycl,$(BUILD_TYPE))) + source $(ONEAPI_VARS) && \ + cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release +else cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release +endif cp llama.cpp/build/bin/grpc-server . \ No newline at end of file diff --git a/entrypoint.sh b/entrypoint.sh index 05f671284ff1..08a2ede30b59 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -13,6 +13,10 @@ if [ -n "$EXTRA_BACKENDS" ]; then done fi +if [ -e "/opt/intel/oneapi/setvars.sh"]; then + source /opt/intel/oneapi/setvars.sh +fi + if [ "$REBUILD" != "false" ]; then rm -rf ./local-ai make build -j${BUILD_PARALLELISM:-1}