From 62c4d995e3225e7386dc4eb82979235d0ac2423b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 7 Feb 2024 00:23:30 +0100 Subject: [PATCH 01/12] cleanup backends --- Makefile | 10 +++++- backend/go/llm/transformers/gpt2.go | 42 ----------------------- backend/go/llm/transformers/starcoder.go | 43 ------------------------ 3 files changed, 9 insertions(+), 86 deletions(-) delete mode 100644 backend/go/llm/transformers/gpt2.go delete mode 100644 backend/go/llm/transformers/starcoder.go diff --git a/Makefile b/Makefile index a6890759264a..5c9cb459be02 100644 --- a/Makefile +++ b/Makefile @@ -145,7 +145,15 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts) OPTIONAL_GRPC+=backend-assets/grpc/piper endif -ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface backend-assets/grpc/bert-embeddings backend-assets/grpc/llama backend-assets/grpc/llama-cpp backend-assets/grpc/llama-ggml backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC) +ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface +ALL_GRPC_BACKENDS+=backend-assets/grpc/bert-embeddings +ALL_GRPC_BACKENDS+=backend-assets/grpc/llama +ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp +ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml +ALL_GRPC_BACKENDS+=backend-assets/grpc/gpt4all +ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv backend-assets/grpc/whisper +ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC) + GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC) # If empty, then we build all diff --git a/backend/go/llm/transformers/gpt2.go b/backend/go/llm/transformers/gpt2.go deleted file mode 100644 index ab162a7615eb..000000000000 --- a/backend/go/llm/transformers/gpt2.go +++ /dev/null @@ -1,42 +0,0 @@ -package transformers - -// This is a wrapper to statisfy the GRPC service interface -// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) -import ( - "fmt" - - "github.com/go-skynet/LocalAI/pkg/grpc/base" - pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" - - transformers "github.com/go-skynet/go-ggml-transformers.cpp" -) - -type GPT2 struct { - base.SingleThread - - gpt2 *transformers.GPT2 -} - -func (llm *GPT2) Load(opts *pb.ModelOptions) error { - model, err := transformers.New(opts.ModelFile) - llm.gpt2 = model - return err -} - -func (llm *GPT2) Predict(opts *pb.PredictOptions) (string, error) { - return llm.gpt2.Predict(opts.Prompt, buildPredictOptions(opts)...) -} - -// fallback to Predict -func (llm *GPT2) PredictStream(opts *pb.PredictOptions, results chan string) error { - go func() { - res, err := llm.gpt2.Predict(opts.Prompt, buildPredictOptions(opts)...) - - if err != nil { - fmt.Println("err: ", err) - } - results <- res - close(results) - }() - return nil -} diff --git a/backend/go/llm/transformers/starcoder.go b/backend/go/llm/transformers/starcoder.go deleted file mode 100644 index 25a758a08070..000000000000 --- a/backend/go/llm/transformers/starcoder.go +++ /dev/null @@ -1,43 +0,0 @@ -package transformers - -// This is a wrapper to statisfy the GRPC service interface -// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) -import ( - "fmt" - - "github.com/go-skynet/LocalAI/pkg/grpc/base" - pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" - - transformers "github.com/go-skynet/go-ggml-transformers.cpp" -) - -type Starcoder struct { - base.SingleThread - - starcoder *transformers.Starcoder -} - -func (llm *Starcoder) Load(opts *pb.ModelOptions) error { - model, err := transformers.NewStarcoder(opts.ModelFile) - llm.starcoder = model - return err -} - -func (llm *Starcoder) Predict(opts *pb.PredictOptions) (string, error) { - return llm.starcoder.Predict(opts.Prompt, buildPredictOptions(opts)...) -} - -// fallback to Predict -func (llm *Starcoder) PredictStream(opts *pb.PredictOptions, results chan string) error { - go func() { - res, err := llm.starcoder.Predict(opts.Prompt, buildPredictOptions(opts)...) - - if err != nil { - fmt.Println("err: ", err) - } - results <- res - close(results) - }() - - return nil -} From b870c7f5595d667b10367cb4eb878bfffa9f51f6 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 7 Feb 2024 00:23:04 +0100 Subject: [PATCH 02/12] switch image to ubuntu 22.04 --- Dockerfile | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index f81b5ee300f7..e46bdbcbf3bc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,9 +1,11 @@ -ARG GO_VERSION=1.21-bullseye +ARG GO_VERSION=1.21 ARG IMAGE_TYPE=extras # extras or core -FROM golang:$GO_VERSION as requirements-core +FROM ubuntu:22.04 as requirements-core +ARG GO_VERSION=1.21.7 +ARG GO_ARCH=amd64 ARG BUILD_TYPE ARG CUDA_MAJOR_VERSION=11 ARG CUDA_MINOR_VERSION=7 @@ -11,7 +13,7 @@ ARG TARGETARCH ARG TARGETVARIANT ENV BUILD_TYPE=${BUILD_TYPE} - +ENV DEBIAN_FRONTEND=noninteractive ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh" ARG GO_TAGS="stablediffusion tinydream tts" @@ -19,6 +21,11 @@ ARG GO_TAGS="stablediffusion tinydream tts" RUN apt-get update && \ apt-get install -y ca-certificates curl patch pip cmake && apt-get clean +# Download Go 1.2.2 and install it to /usr/local/go +RUN curl -L -s https://go.dev/dl/go$GO_VERSION.linux-$GO_ARCH.tar.gz | tar -v -C /usr/local -xz + +# Let's people find our Go binaries +ENV PATH $PATH:/usr/local/go/bin COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/ RUN update-ca-certificates @@ -40,9 +47,19 @@ RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \ # oneapi requirements RUN if [ "${BUILD_TYPE}" = "sycl_f16" ] || [ "${BUILD_TYPE}" = "sycl_f32" ]; then \ - wget -q https://registrationcenter-download.intel.com/akdlm/IRC_NAS/163da6e4-56eb-4948-aba3-debcec61c064/l_BaseKit_p_2024.0.1.46_offline.sh && \ - sh ./l_BaseKit_p_2024.0.1.46_offline.sh -a -s --eula accept && \ - rm -rf l_BaseKit_p_2024.0.1.46_offline.sh \ + apt-get update && apt-get upgrade -y && \ + apt-get install -y --no-install-recommends \ + curl ca-certificates gpg-agent software-properties-common && \ + curl -fsSL https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB | gpg --dearmor | tee /usr/share/keyrings/intel-oneapi-archive-keyring.gpg && \ + echo "deb [signed-by=/usr/share/keyrings/intel-oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main " > /etc/apt/sources.list.d/oneAPI.list && \ + apt-get update && apt-get upgrade -y && \ + apt-get install -y --no-install-recommends \ + curl ca-certificates gpg-agent software-properties-common && \ + curl -fsSL https://repositories.intel.com/gpu/intel-graphics.key | gpg --dearmor | tee /usr/share/keyrings/intel-graphics-archive-keyring.gpg && \ + echo "deb [signed-by=/usr/share/keyrings/intel-graphics-archive-keyring.gpg arch=amd64] https://repositories.intel.com/gpu/ubuntu jammy unified" > /etc/apt/sources.list.d/intel-graphics.list && \ + apt-get update && apt-get upgrade -y && \ + apt-get install -y --no-install-recommends \ + ca-certificates build-essential pkg-config gnupg libarchive13 openssh-server openssh-client wget net-tools git intel-basekit intel-level-zero-gpu level-zero && apt-get clean \ ; fi ENV PATH /usr/local/cuda/bin:${PATH} From 1efd6a3f47d1ee184794aaebe00392644ed4764c Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 7 Feb 2024 01:04:42 +0100 Subject: [PATCH 03/12] adapt commands for ubuntu --- Dockerfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index e46bdbcbf3bc..9af12e3a5990 100644 --- a/Dockerfile +++ b/Dockerfile @@ -19,7 +19,7 @@ ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface ARG GO_TAGS="stablediffusion tinydream tts" RUN apt-get update && \ - apt-get install -y ca-certificates curl patch pip cmake && apt-get clean + apt-get install -y ca-certificates curl patch pip cmake git && apt-get clean # Download Go 1.2.2 and install it to /usr/local/go RUN curl -L -s https://go.dev/dl/go$GO_VERSION.linux-$GO_ARCH.tar.gz | tar -v -C /usr/local -xz @@ -38,9 +38,9 @@ RUN echo "Target Variant: $TARGETVARIANT" RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \ apt-get install -y software-properties-common && \ apt-add-repository contrib && \ - curl -O https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.0-1_all.deb && \ - dpkg -i cuda-keyring_1.0-1_all.deb && \ - rm -f cuda-keyring_1.0-1_all.deb && \ + curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \ + dpkg -i cuda-keyring_1.1-1_all.deb && \ + rm -f cuda-keyring_1.1-1_all.deb && \ apt-get update && \ apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && apt-get clean \ ; fi From 3288f050014229d7f4ffb23687518682a83a8e51 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 7 Feb 2024 01:09:28 +0100 Subject: [PATCH 04/12] transformers cleanup --- Makefile | 35 +------------------- backend/go/llm/transformers/dolly.go | 44 -------------------------- backend/go/llm/transformers/gptj.go | 42 ------------------------ backend/go/llm/transformers/gptneox.go | 42 ------------------------ backend/go/llm/transformers/mpt.go | 42 ------------------------ backend/go/llm/transformers/predict.go | 26 --------------- backend/go/llm/transformers/replit.go | 42 ------------------------ 7 files changed, 1 insertion(+), 272 deletions(-) delete mode 100644 backend/go/llm/transformers/dolly.go delete mode 100644 backend/go/llm/transformers/gptj.go delete mode 100644 backend/go/llm/transformers/gptneox.go delete mode 100644 backend/go/llm/transformers/mpt.go delete mode 100644 backend/go/llm/transformers/predict.go delete mode 100644 backend/go/llm/transformers/replit.go diff --git a/Makefile b/Makefile index 5c9cb459be02..1e265ab83d08 100644 --- a/Makefile +++ b/Makefile @@ -14,9 +14,6 @@ CPPLLAMA_VERSION?=1cfb5372cf5707c8ec6dde7c874f4a44a6c4c915 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all GPT4ALL_VERSION?=27a8b020c36b0df8f8b82a252d261cda47cf44b8 -# go-ggml-transformers version -GOGGMLTRANSFORMERS_VERSION?=ffb09d7dd71e2cbc6c5d7d05357d230eea6f369a - # go-rwkv version RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=633c5a3485c403cb2520693dc0991a25dace9f0f @@ -225,14 +222,6 @@ backend-assets/espeak-ng-data: sources/go-piper sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all $(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a -## CEREBRAS GPT -sources/go-ggml-transformers: - git clone --recurse-submodules https://github.com/go-skynet/go-ggml-transformers.cpp sources/go-ggml-transformers - cd sources/go-ggml-transformers && git checkout -b build $(GOGPT2_VERSION) && git submodule update --init --recursive --depth 1 - -sources/go-ggml-transformers/libtransformers.a: sources/go-ggml-transformers - $(MAKE) -C sources/go-ggml-transformers BUILD_TYPE=$(BUILD_TYPE) libtransformers.a - sources/whisper.cpp: git clone https://github.com/ggerganov/whisper.cpp.git sources/whisper.cpp cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1 @@ -260,12 +249,11 @@ sources/go-piper/libpiper_binding.a: sources/go-piper backend/cpp/llama/llama.cpp: LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp -get-sources: backend/cpp/llama/llama.cpp sources/go-llama sources/go-llama-ggml sources/go-ggml-transformers sources/gpt4all sources/go-piper sources/go-rwkv sources/whisper.cpp sources/go-bert sources/go-stable-diffusion sources/go-tiny-dream +get-sources: backend/cpp/llama/llama.cpp sources/go-llama sources/go-llama-ggml sources/gpt4all sources/go-piper sources/go-rwkv sources/whisper.cpp sources/go-bert sources/go-stable-diffusion sources/go-tiny-dream touch $@ replace: $(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang - $(GOCMD) mod edit -replace github.com/go-skynet/go-ggml-transformers.cpp=$(CURDIR)/sources/go-ggml-transformers $(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv $(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp $(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go @@ -284,7 +272,6 @@ rebuild: ## Rebuilds the project $(MAKE) -C sources/go-llama clean $(MAKE) -C sources/go-llama-ggml clean $(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ clean - $(MAKE) -C sources/go-ggml-transformers clean $(MAKE) -C sources/go-rwkv clean $(MAKE) -C sources/whisper.cpp clean $(MAKE) -C sources/go-stable-diffusion clean @@ -513,26 +500,6 @@ backend-assets/grpc/gpt4all: backend-assets/grpc backend-assets/gpt4all sources/ CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/ -backend-assets/grpc/dolly: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \ - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/dolly ./backend/go/llm/dolly/ - -backend-assets/grpc/gptj: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \ - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gptj ./backend/go/llm/gptj/ - -backend-assets/grpc/gptneox: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \ - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gptneox ./backend/go/llm/gptneox/ - -backend-assets/grpc/mpt: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \ - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/mpt ./backend/go/llm/mpt/ - -backend-assets/grpc/replit: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \ - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/replit ./backend/go/llm/replit/ - backend-assets/grpc/rwkv: backend-assets/grpc sources/go-rwkv/librwkv.a CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv LIBRARY_PATH=$(CURDIR)/sources/go-rwkv \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv diff --git a/backend/go/llm/transformers/dolly.go b/backend/go/llm/transformers/dolly.go deleted file mode 100644 index b3579b045ac7..000000000000 --- a/backend/go/llm/transformers/dolly.go +++ /dev/null @@ -1,44 +0,0 @@ -package transformers - -// This is a wrapper to statisfy the GRPC service interface -// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) -import ( - "fmt" - - "github.com/go-skynet/LocalAI/pkg/grpc/base" - pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" - - transformers "github.com/go-skynet/go-ggml-transformers.cpp" -) - -type Dolly struct { - base.SingleThread - - dolly *transformers.Dolly -} - -func (llm *Dolly) Load(opts *pb.ModelOptions) error { - model, err := transformers.NewDolly(opts.ModelFile) - llm.dolly = model - return err -} - -func (llm *Dolly) Predict(opts *pb.PredictOptions) (string, error) { - return llm.dolly.Predict(opts.Prompt, buildPredictOptions(opts)...) -} - -// fallback to Predict -func (llm *Dolly) PredictStream(opts *pb.PredictOptions, results chan string) error { - - go func() { - res, err := llm.dolly.Predict(opts.Prompt, buildPredictOptions(opts)...) - - if err != nil { - fmt.Println("err: ", err) - } - results <- res - close(results) - }() - - return nil -} diff --git a/backend/go/llm/transformers/gptj.go b/backend/go/llm/transformers/gptj.go deleted file mode 100644 index f00f10449893..000000000000 --- a/backend/go/llm/transformers/gptj.go +++ /dev/null @@ -1,42 +0,0 @@ -package transformers - -// This is a wrapper to statisfy the GRPC service interface -// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) -import ( - "fmt" - - "github.com/go-skynet/LocalAI/pkg/grpc/base" - pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" - - transformers "github.com/go-skynet/go-ggml-transformers.cpp" -) - -type GPTJ struct { - base.SingleThread - - gptj *transformers.GPTJ -} - -func (llm *GPTJ) Load(opts *pb.ModelOptions) error { - model, err := transformers.NewGPTJ(opts.ModelFile) - llm.gptj = model - return err -} - -func (llm *GPTJ) Predict(opts *pb.PredictOptions) (string, error) { - return llm.gptj.Predict(opts.Prompt, buildPredictOptions(opts)...) -} - -// fallback to Predict -func (llm *GPTJ) PredictStream(opts *pb.PredictOptions, results chan string) error { - go func() { - res, err := llm.gptj.Predict(opts.Prompt, buildPredictOptions(opts)...) - - if err != nil { - fmt.Println("err: ", err) - } - results <- res - close(results) - }() - return nil -} diff --git a/backend/go/llm/transformers/gptneox.go b/backend/go/llm/transformers/gptneox.go deleted file mode 100644 index a06d910e05fe..000000000000 --- a/backend/go/llm/transformers/gptneox.go +++ /dev/null @@ -1,42 +0,0 @@ -package transformers - -// This is a wrapper to statisfy the GRPC service interface -// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) -import ( - "fmt" - - "github.com/go-skynet/LocalAI/pkg/grpc/base" - pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" - - transformers "github.com/go-skynet/go-ggml-transformers.cpp" -) - -type GPTNeoX struct { - base.SingleThread - - gptneox *transformers.GPTNeoX -} - -func (llm *GPTNeoX) Load(opts *pb.ModelOptions) error { - model, err := transformers.NewGPTNeoX(opts.ModelFile) - llm.gptneox = model - return err -} - -func (llm *GPTNeoX) Predict(opts *pb.PredictOptions) (string, error) { - return llm.gptneox.Predict(opts.Prompt, buildPredictOptions(opts)...) -} - -// fallback to Predict -func (llm *GPTNeoX) PredictStream(opts *pb.PredictOptions, results chan string) error { - go func() { - res, err := llm.gptneox.Predict(opts.Prompt, buildPredictOptions(opts)...) - - if err != nil { - fmt.Println("err: ", err) - } - results <- res - close(results) - }() - return nil -} diff --git a/backend/go/llm/transformers/mpt.go b/backend/go/llm/transformers/mpt.go deleted file mode 100644 index f6e0a1432384..000000000000 --- a/backend/go/llm/transformers/mpt.go +++ /dev/null @@ -1,42 +0,0 @@ -package transformers - -// This is a wrapper to statisfy the GRPC service interface -// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) -import ( - "fmt" - - "github.com/go-skynet/LocalAI/pkg/grpc/base" - pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" - - transformers "github.com/go-skynet/go-ggml-transformers.cpp" -) - -type MPT struct { - base.SingleThread - - mpt *transformers.MPT -} - -func (llm *MPT) Load(opts *pb.ModelOptions) error { - model, err := transformers.NewMPT(opts.ModelFile) - llm.mpt = model - return err -} - -func (llm *MPT) Predict(opts *pb.PredictOptions) (string, error) { - return llm.mpt.Predict(opts.Prompt, buildPredictOptions(opts)...) -} - -// fallback to Predict -func (llm *MPT) PredictStream(opts *pb.PredictOptions, results chan string) error { - go func() { - res, err := llm.mpt.Predict(opts.Prompt, buildPredictOptions(opts)...) - - if err != nil { - fmt.Println("err: ", err) - } - results <- res - close(results) - }() - return nil -} diff --git a/backend/go/llm/transformers/predict.go b/backend/go/llm/transformers/predict.go deleted file mode 100644 index 861d1196a2f4..000000000000 --- a/backend/go/llm/transformers/predict.go +++ /dev/null @@ -1,26 +0,0 @@ -package transformers - -import ( - pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" - transformers "github.com/go-skynet/go-ggml-transformers.cpp" -) - -func buildPredictOptions(opts *pb.PredictOptions) []transformers.PredictOption { - predictOptions := []transformers.PredictOption{ - transformers.SetTemperature(float64(opts.Temperature)), - transformers.SetTopP(float64(opts.TopP)), - transformers.SetTopK(int(opts.TopK)), - transformers.SetTokens(int(opts.Tokens)), - transformers.SetThreads(int(opts.Threads)), - } - - if opts.Batch != 0 { - predictOptions = append(predictOptions, transformers.SetBatch(int(opts.Batch))) - } - - if opts.Seed != 0 { - predictOptions = append(predictOptions, transformers.SetSeed(int(opts.Seed))) - } - - return predictOptions -} diff --git a/backend/go/llm/transformers/replit.go b/backend/go/llm/transformers/replit.go deleted file mode 100644 index a979edcb8231..000000000000 --- a/backend/go/llm/transformers/replit.go +++ /dev/null @@ -1,42 +0,0 @@ -package transformers - -// This is a wrapper to statisfy the GRPC service interface -// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) -import ( - "fmt" - - "github.com/go-skynet/LocalAI/pkg/grpc/base" - pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" - - transformers "github.com/go-skynet/go-ggml-transformers.cpp" -) - -type Replit struct { - base.SingleThread - - replit *transformers.Replit -} - -func (llm *Replit) Load(opts *pb.ModelOptions) error { - model, err := transformers.NewReplit(opts.ModelFile) - llm.replit = model - return err -} - -func (llm *Replit) Predict(opts *pb.PredictOptions) (string, error) { - return llm.replit.Predict(opts.Prompt, buildPredictOptions(opts)...) -} - -// fallback to Predict -func (llm *Replit) PredictStream(opts *pb.PredictOptions, results chan string) error { - go func() { - res, err := llm.replit.Predict(opts.Prompt, buildPredictOptions(opts)...) - - if err != nil { - fmt.Println("err: ", err) - } - results <- res - close(results) - }() - return nil -} From d736b02d9ea851ac43240562ed0e775fc0ce1f25 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 7 Feb 2024 01:14:01 +0100 Subject: [PATCH 05/12] no contrib on ubuntu --- Dockerfile | 1 - 1 file changed, 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 9af12e3a5990..47e1c7359b7a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -37,7 +37,6 @@ RUN echo "Target Variant: $TARGETVARIANT" # CuBLAS requirements RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \ apt-get install -y software-properties-common && \ - apt-add-repository contrib && \ curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \ dpkg -i cuda-keyring_1.1-1_all.deb && \ rm -f cuda-keyring_1.1-1_all.deb && \ From 839130007a34eea42a119cf8e3d38aa942069f45 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 7 Feb 2024 08:50:34 +0100 Subject: [PATCH 06/12] Change test model to gguf --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1e265ab83d08..84d019da2734 100644 --- a/Makefile +++ b/Makefile @@ -316,7 +316,7 @@ run: prepare ## run local-ai test-models/testmodel: mkdir test-models mkdir test-dir - wget -q https://huggingface.co/nnakasato/ggml-model-test/resolve/main/ggml-model-q4.bin -O test-models/testmodel + wget -q https://huggingface.co/afrideva/open-llama-3b-v2-instruct-GGUF/resolve/main/open-llama-3b-v2-instruct.q4_k_m.gguf -O test-models/testmodel wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav From 0dd76e50ff94e7363baf47b0b36e202fa37c656e Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 7 Feb 2024 14:14:39 +0100 Subject: [PATCH 07/12] ci: disable bark tests (too cpu-intensive) Signed-off-by: Ettore Di Giacinto --- .github/workflows/test-extra.yml | 132 +++++++++++++++---------------- 1 file changed, 66 insertions(+), 66 deletions(-) diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml index b1ecec257d37..68da2c56c987 100644 --- a/.github/workflows/test-extra.yml +++ b/.github/workflows/test-extra.yml @@ -164,74 +164,74 @@ jobs: - tests-bark: - runs-on: ubuntu-latest - steps: - - name: Release space from worker - run: | - echo "Listing top largest packages" - pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr) - head -n 30 <<< "${pkgs}" - echo - df -h - echo - sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true - sudo apt-get remove --auto-remove android-sdk-platform-tools || true - sudo apt-get purge --auto-remove android-sdk-platform-tools || true - sudo rm -rf /usr/local/lib/android - sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true - sudo rm -rf /usr/share/dotnet - sudo apt-get remove -y '^mono-.*' || true - sudo apt-get remove -y '^ghc-.*' || true - sudo apt-get remove -y '.*jdk.*|.*jre.*' || true - sudo apt-get remove -y 'php.*' || true - sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true - sudo apt-get remove -y '^google-.*' || true - sudo apt-get remove -y azure-cli || true - sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true - sudo apt-get remove -y '^gfortran-.*' || true - sudo apt-get remove -y microsoft-edge-stable || true - sudo apt-get remove -y firefox || true - sudo apt-get remove -y powershell || true - sudo apt-get remove -y r-base-core || true - sudo apt-get autoremove -y - sudo apt-get clean - echo - echo "Listing top largest packages" - pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr) - head -n 30 <<< "${pkgs}" - echo - sudo rm -rfv build || true - sudo rm -rf /usr/share/dotnet || true - sudo rm -rf /opt/ghc || true - sudo rm -rf "/usr/local/share/boost" || true - sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true - df -h - - name: Clone - uses: actions/checkout@v4 - with: - submodules: true - - name: Dependencies - run: | - sudo apt-get update - sudo apt-get install build-essential ffmpeg - curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \ - sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \ - gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \ - sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \ - sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \ - sudo apt-get update && \ - sudo apt-get install -y conda - sudo apt-get install -y ca-certificates cmake curl patch - sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2 + # tests-bark: + # runs-on: ubuntu-latest + # steps: + # - name: Release space from worker + # run: | + # echo "Listing top largest packages" + # pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr) + # head -n 30 <<< "${pkgs}" + # echo + # df -h + # echo + # sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true + # sudo apt-get remove --auto-remove android-sdk-platform-tools || true + # sudo apt-get purge --auto-remove android-sdk-platform-tools || true + # sudo rm -rf /usr/local/lib/android + # sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true + # sudo rm -rf /usr/share/dotnet + # sudo apt-get remove -y '^mono-.*' || true + # sudo apt-get remove -y '^ghc-.*' || true + # sudo apt-get remove -y '.*jdk.*|.*jre.*' || true + # sudo apt-get remove -y 'php.*' || true + # sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true + # sudo apt-get remove -y '^google-.*' || true + # sudo apt-get remove -y azure-cli || true + # sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true + # sudo apt-get remove -y '^gfortran-.*' || true + # sudo apt-get remove -y microsoft-edge-stable || true + # sudo apt-get remove -y firefox || true + # sudo apt-get remove -y powershell || true + # sudo apt-get remove -y r-base-core || true + # sudo apt-get autoremove -y + # sudo apt-get clean + # echo + # echo "Listing top largest packages" + # pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr) + # head -n 30 <<< "${pkgs}" + # echo + # sudo rm -rfv build || true + # sudo rm -rf /usr/share/dotnet || true + # sudo rm -rf /opt/ghc || true + # sudo rm -rf "/usr/local/share/boost" || true + # sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true + # df -h + # - name: Clone + # uses: actions/checkout@v4 + # with: + # submodules: true + # - name: Dependencies + # run: | + # sudo apt-get update + # sudo apt-get install build-essential ffmpeg + # curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \ + # sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \ + # gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \ + # sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \ + # sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \ + # sudo apt-get update && \ + # sudo apt-get install -y conda + # sudo apt-get install -y ca-certificates cmake curl patch + # sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2 - sudo rm -rfv /usr/bin/conda || true + # sudo rm -rfv /usr/bin/conda || true - - name: Test bark - run: | - export PATH=$PATH:/opt/conda/bin - make -C backend/python/bark - make -C backend/python/bark test + # - name: Test bark + # run: | + # export PATH=$PATH:/opt/conda/bin + # make -C backend/python/bark + # make -C backend/python/bark test # Below tests needs GPU. Commented out for now From a7a1038be752c2119d1e2ce8078de3b3eb591afa Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 7 Feb 2024 14:30:34 +0100 Subject: [PATCH 08/12] cleanup --- pkg/model/initializers.go | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index df0aaf2f7a02..fce44fe15469 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -23,11 +23,6 @@ const ( GoLlamaBackend = "llama" LlamaGGML = "llama-ggml" LLamaCPP = "llama-cpp" - GPTJBackend = "gptj" - DollyBackend = "dolly" - MPTBackend = "mpt" - GPTNeoXBackend = "gptneox" - ReplitBackend = "replit" Gpt4AllLlamaBackend = "gpt4all-llama" Gpt4AllMptBackend = "gpt4all-mpt" Gpt4AllJBackend = "gpt4all-j" @@ -50,12 +45,7 @@ var AutoLoadBackends []string = []string{ LlamaGGML, GoLlamaBackend, Gpt4All, - GPTNeoXBackend, BertEmbeddingsBackend, - GPTJBackend, - DollyBackend, - MPTBackend, - ReplitBackend, RwkvBackend, WhisperBackend, StableDiffusionBackend, From 9cfda81d584fa3d3c08d1bcfec852ef1f88e3ba4 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 7 Feb 2024 20:07:09 +0100 Subject: [PATCH 09/12] refinements --- Dockerfile | 7 ++----- Makefile | 5 +++-- api/api_test.go | 18 ++++++++++-------- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Dockerfile b/Dockerfile index 47e1c7359b7a..91bbe46093dc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,7 +5,6 @@ ARG IMAGE_TYPE=extras FROM ubuntu:22.04 as requirements-core ARG GO_VERSION=1.21.7 -ARG GO_ARCH=amd64 ARG BUILD_TYPE ARG CUDA_MAJOR_VERSION=11 ARG CUDA_MINOR_VERSION=7 @@ -21,10 +20,8 @@ ARG GO_TAGS="stablediffusion tinydream tts" RUN apt-get update && \ apt-get install -y ca-certificates curl patch pip cmake git && apt-get clean -# Download Go 1.2.2 and install it to /usr/local/go -RUN curl -L -s https://go.dev/dl/go$GO_VERSION.linux-$GO_ARCH.tar.gz | tar -v -C /usr/local -xz - -# Let's people find our Go binaries +# Install Go +RUN curl -L -s https://go.dev/dl/go$GO_VERSION.linux-$TARGETARCH.tar.gz | tar -v -C /usr/local -xz ENV PATH $PATH:/usr/local/go/bin COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/ diff --git a/Makefile b/Makefile index 84d019da2734..e21ad2eb7793 100644 --- a/Makefile +++ b/Makefile @@ -148,7 +148,8 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/llama ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml ALL_GRPC_BACKENDS+=backend-assets/grpc/gpt4all -ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv backend-assets/grpc/whisper +ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv +ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC) GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC) @@ -316,7 +317,7 @@ run: prepare ## run local-ai test-models/testmodel: mkdir test-models mkdir test-dir - wget -q https://huggingface.co/afrideva/open-llama-3b-v2-instruct-GGUF/resolve/main/open-llama-3b-v2-instruct.q4_k_m.gguf -O test-models/testmodel + wget -q https://huggingface.co/TheBloke/stable-code-3b-GGUF/resolve/main/stable-code-3b.Q2_K.gguf -O test-models/testmodel wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav diff --git a/api/api_test.go b/api/api_test.go index 491a56b5a2ea..6e99609db0a0 100644 --- a/api/api_test.go +++ b/api/api_test.go @@ -29,6 +29,8 @@ import ( "github.com/sashabaranov/go-openai/jsonschema" ) +const testPrompt = "import torch\nimport torch.nn as nn" + type modelApplyRequest struct { ID string `json:"id"` URL string `json:"url"` @@ -460,7 +462,7 @@ var _ = Describe("API test", func() { return response["processed"].(bool) }, "960s", "10s").Should(Equal(true)) - resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-j", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "How are you?"}}}) + resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-j", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) Expect(resp.Choices[0].Message.Content).To(ContainSubstring("well")) @@ -629,28 +631,28 @@ var _ = Describe("API test", func() { Expect(len(models.Models)).To(Equal(6)) // If "config.yaml" should be included, this should be 8? }) It("can generate completions", func() { - resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel", Prompt: "abcdedfghikl"}) + resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel", Prompt: testPrompt}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) Expect(resp.Choices[0].Text).ToNot(BeEmpty()) }) It("can generate chat completions ", func() { - resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}}) + resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty()) }) It("can generate completions from model configs", func() { - resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "gpt4all", Prompt: "abcdedfghikl"}) + resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "gpt4all", Prompt: testPrompt}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) Expect(resp.Choices[0].Text).ToNot(BeEmpty()) }) It("can generate chat completions from model configs", func() { - resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-2", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}}) + resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-2", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty()) @@ -658,7 +660,7 @@ var _ = Describe("API test", func() { It("returns errors", func() { backends := len(model.AutoLoadBackends) + 1 // +1 for huggingface - _, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"}) + _, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: testPrompt}) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring(fmt.Sprintf("error, status code: 500, message: could not load model - all backends returned error: %d errors occurred:", backends))) }) @@ -834,13 +836,13 @@ var _ = Describe("API test", func() { app.Shutdown() }) It("can generate chat completions from config file (list1)", func() { - resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list1", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: "abcdedfghikl"}}}) + resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list1", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: testPrompt}}}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty()) }) It("can generate chat completions from config file (list2)", func() { - resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list2", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: "abcdedfghikl"}}}) + resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list2", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: testPrompt}}}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty()) From c7b7fa3b9989498d62ecc314efa7473bb073fcb2 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 8 Feb 2024 00:42:21 +0100 Subject: [PATCH 10/12] use intel base image --- .github/workflows/image-pr.yml | 7 +++++++ .github/workflows/image.yml | 18 ++++++++++++++++++ .github/workflows/image_build.yml | 6 ++++++ Dockerfile | 22 +++------------------- 4 files changed, 34 insertions(+), 19 deletions(-) diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml index 8dd699f5fb55..ae8bd070a125 100644 --- a/.github/workflows/image-pr.yml +++ b/.github/workflows/image-pr.yml @@ -21,6 +21,7 @@ jobs: cuda-minor-version: ${{ matrix.cuda-minor-version }} platforms: ${{ matrix.platforms }} runs-on: ${{ matrix.runs-on }} + base-image: ${{ matrix.base-image }} secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} @@ -39,6 +40,7 @@ jobs: ffmpeg: 'true' image-type: 'extras' runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "1" @@ -48,6 +50,7 @@ jobs: ffmpeg: 'true' image-type: 'extras' runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" core-image-build: uses: ./.github/workflows/image_build.yml with: @@ -60,6 +63,7 @@ jobs: cuda-minor-version: ${{ matrix.cuda-minor-version }} platforms: ${{ matrix.platforms }} runs-on: ${{ matrix.runs-on }} + base-image: ${{ matrix.base-image }} secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} @@ -75,9 +79,11 @@ jobs: ffmpeg: 'true' image-type: 'core' runs-on: 'ubuntu-latest' + base-image: "ubuntu:22.04" - build-type: 'sycl_f16' platforms: 'linux/amd64' tag-latest: 'false' + base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" tag-suffix: 'sycl-f16-ffmpeg-core' ffmpeg: 'true' image-type: 'core' @@ -91,3 +97,4 @@ jobs: ffmpeg: 'true' image-type: 'core' runs-on: 'ubuntu-latest' + base-image: "ubuntu:22.04" diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index c23cdabf6adb..ac61deeca6e8 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -25,6 +25,7 @@ jobs: cuda-minor-version: ${{ matrix.cuda-minor-version }} platforms: ${{ matrix.platforms }} runs-on: ${{ matrix.runs-on }} + base-image: ${{ matrix.base-image }} secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} @@ -44,6 +45,7 @@ jobs: ffmpeg: '' image-type: 'extras' runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" - build-type: '' platforms: 'linux/amd64' tag-latest: 'false' @@ -51,6 +53,7 @@ jobs: ffmpeg: 'true' image-type: 'extras' runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -60,6 +63,7 @@ jobs: ffmpeg: '' image-type: 'extras' runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "1" @@ -69,6 +73,7 @@ jobs: ffmpeg: '' image-type: 'extras' runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -78,6 +83,7 @@ jobs: ffmpeg: 'true' image-type: 'extras' runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "1" @@ -87,6 +93,7 @@ jobs: ffmpeg: 'true' image-type: 'extras' runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" - build-type: '' #platforms: 'linux/amd64,linux/arm64' platforms: 'linux/amd64' @@ -94,6 +101,7 @@ jobs: tag-suffix: '' ffmpeg: '' image-type: 'extras' + base-image: "ubuntu:22.04" runs-on: 'arc-runner-set' core-image-build: uses: ./.github/workflows/image_build.yml @@ -107,6 +115,7 @@ jobs: cuda-minor-version: ${{ matrix.cuda-minor-version }} platforms: ${{ matrix.platforms }} runs-on: ${{ matrix.runs-on }} + base-image: ${{ matrix.base-image }} secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} @@ -121,10 +130,12 @@ jobs: tag-suffix: '-ffmpeg-core' ffmpeg: 'true' image-type: 'core' + base-image: "ubuntu:22.04" runs-on: 'ubuntu-latest' - build-type: 'sycl_f16' platforms: 'linux/amd64' tag-latest: 'false' + base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" tag-suffix: '-sycl-f16-core' ffmpeg: 'false' image-type: 'core' @@ -132,6 +143,7 @@ jobs: - build-type: 'sycl_f32' platforms: 'linux/amd64' tag-latest: 'false' + base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" tag-suffix: '-sycl-f32-core' ffmpeg: 'false' image-type: 'core' @@ -139,6 +151,7 @@ jobs: - build-type: 'sycl_f16' platforms: 'linux/amd64' tag-latest: 'false' + base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" tag-suffix: '-sycl-f16-ffmpeg-core' ffmpeg: 'true' image-type: 'core' @@ -146,6 +159,7 @@ jobs: - build-type: 'sycl_f32' platforms: 'linux/amd64' tag-latest: 'false' + base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" tag-suffix: '-sycl-f32-ffmpeg-core' ffmpeg: 'true' image-type: 'core' @@ -158,6 +172,7 @@ jobs: tag-suffix: '-cublas-cuda11-core' ffmpeg: '' image-type: 'core' + base-image: "ubuntu:22.04" runs-on: 'ubuntu-latest' - build-type: 'cublas' cuda-major-version: "12" @@ -167,6 +182,7 @@ jobs: tag-suffix: '-cublas-cuda12-core' ffmpeg: '' image-type: 'core' + base-image: "ubuntu:22.04" runs-on: 'ubuntu-latest' - build-type: 'cublas' cuda-major-version: "11" @@ -177,6 +193,7 @@ jobs: ffmpeg: 'true' image-type: 'core' runs-on: 'ubuntu-latest' + base-image: "ubuntu:22.04" - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "1" @@ -186,3 +203,4 @@ jobs: ffmpeg: 'true' image-type: 'core' runs-on: 'ubuntu-latest' + base-image: "ubuntu:22.04" diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index ff9b751f4634..a45473b44e15 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -4,6 +4,11 @@ name: 'build container images (reusable)' on: workflow_call: inputs: + base-image: + description: 'Base image' + required: false + default: '' + type: string build-type: description: 'Build type' default: '' @@ -154,6 +159,7 @@ jobs: CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }} FFMPEG=${{ inputs.ffmpeg }} IMAGE_TYPE=${{ inputs.image-type }} + BASE_IMAGE=${{ inputs.base-image }} context: . file: ./Dockerfile platforms: ${{ inputs.platforms }} diff --git a/Dockerfile b/Dockerfile index 91bbe46093dc..5cee6a23543a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,9 @@ ARG GO_VERSION=1.21 ARG IMAGE_TYPE=extras -# extras or core +ARG BASE_IMAGE=ubuntu:22.04 -FROM ubuntu:22.04 as requirements-core +# extras or core +FROM ${BASE_IMAGE} as requirements-core ARG GO_VERSION=1.21.7 ARG BUILD_TYPE @@ -41,23 +42,6 @@ RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \ apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && apt-get clean \ ; fi -# oneapi requirements -RUN if [ "${BUILD_TYPE}" = "sycl_f16" ] || [ "${BUILD_TYPE}" = "sycl_f32" ]; then \ - apt-get update && apt-get upgrade -y && \ - apt-get install -y --no-install-recommends \ - curl ca-certificates gpg-agent software-properties-common && \ - curl -fsSL https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB | gpg --dearmor | tee /usr/share/keyrings/intel-oneapi-archive-keyring.gpg && \ - echo "deb [signed-by=/usr/share/keyrings/intel-oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main " > /etc/apt/sources.list.d/oneAPI.list && \ - apt-get update && apt-get upgrade -y && \ - apt-get install -y --no-install-recommends \ - curl ca-certificates gpg-agent software-properties-common && \ - curl -fsSL https://repositories.intel.com/gpu/intel-graphics.key | gpg --dearmor | tee /usr/share/keyrings/intel-graphics-archive-keyring.gpg && \ - echo "deb [signed-by=/usr/share/keyrings/intel-graphics-archive-keyring.gpg arch=amd64] https://repositories.intel.com/gpu/ubuntu jammy unified" > /etc/apt/sources.list.d/intel-graphics.list && \ - apt-get update && apt-get upgrade -y && \ - apt-get install -y --no-install-recommends \ - ca-certificates build-essential pkg-config gnupg libarchive13 openssh-server openssh-client wget net-tools git intel-basekit intel-level-zero-gpu level-zero && apt-get clean \ - ; fi - ENV PATH /usr/local/cuda/bin:${PATH} # OpenBLAS requirements and stable diffusion From e7f62c25f16968473bd516c6fbfd50b3a94a920b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 8 Feb 2024 00:47:26 +0100 Subject: [PATCH 11/12] Makefile: Add docker targets --- Makefile | 19 +++++++++++++++++++ entrypoint.sh | 4 ---- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index e21ad2eb7793..b17dba247d8a 100644 --- a/Makefile +++ b/Makefile @@ -532,3 +532,22 @@ backend-assets/grpc/whisper: backend-assets/grpc sources/whisper.cpp/libwhisper. $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/ grpcs: prepare $(GRPC_BACKENDS) + +DOCKER_IMAGE?=local-ai +IMAGE_TYPE?=core +BASE_IMAGE?=ubuntu:22.04 + +docker: + docker build \ + --build-arg BASE_IMAGE=$(BASE_IMAGE) \ + --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ + --build-arg GO_TAGS=$(GO_TAGS) \ + --build-arg BUILD_TYPE=$(BUILD_TYPE) \ + -t $(DOCKER_IMAGE) . + +docker-image-intel: + docker build \ + --build-arg BASE_IMAGE=intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04 \ + --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ + --build-arg GO_TAGS="none" \ + --build-arg BUILD_TYPE=sycl_f16 -t $(DOCKER_IMAGE) . \ No newline at end of file diff --git a/entrypoint.sh b/entrypoint.sh index ae1976afb6bf..05f671284ff1 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -13,10 +13,6 @@ if [ -n "$EXTRA_BACKENDS" ]; then done fi -if [ -e "/opt/intel/oneapi/setvars.sh" ]; then - source /opt/intel/oneapi/setvars.sh -fi - if [ "$REBUILD" != "false" ]; then rm -rf ./local-ai make build -j${BUILD_PARALLELISM:-1} From dcc02d15683aeb0ff1eb5e7312f3c9e4e53b668c Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 8 Feb 2024 08:43:56 +0100 Subject: [PATCH 12/12] Change test model --- Makefile | 2 +- api/api_test.go | 11 +++++++++-- tests/models_fixtures/config.yaml | 4 ++-- tests/models_fixtures/gpt4.yaml | 2 +- tests/models_fixtures/gpt4_2.yaml | 2 +- 5 files changed, 14 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index b17dba247d8a..51c941d4ac22 100644 --- a/Makefile +++ b/Makefile @@ -317,7 +317,7 @@ run: prepare ## run local-ai test-models/testmodel: mkdir test-models mkdir test-dir - wget -q https://huggingface.co/TheBloke/stable-code-3b-GGUF/resolve/main/stable-code-3b.Q2_K.gguf -O test-models/testmodel + wget -q https://huggingface.co/TheBloke/orca_mini_3B-GGML/resolve/main/orca-mini-3b.ggmlv3.q4_0.bin -O test-models/testmodel wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav diff --git a/api/api_test.go b/api/api_test.go index 6e99609db0a0..04d2d6fec02e 100644 --- a/api/api_test.go +++ b/api/api_test.go @@ -29,7 +29,14 @@ import ( "github.com/sashabaranov/go-openai/jsonschema" ) -const testPrompt = "import torch\nimport torch.nn as nn" +const testPrompt = `### System: +You are an AI assistant that follows instruction extremely well. Help as much as you can. + +### User: + +Can you help rephrasing sentences? + +### Response:` type modelApplyRequest struct { ID string `json:"id"` @@ -462,7 +469,7 @@ var _ = Describe("API test", func() { return response["processed"].(bool) }, "960s", "10s").Should(Equal(true)) - resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-j", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}}) + resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-j", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "How are you?"}}}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) Expect(resp.Choices[0].Message.Content).To(ContainSubstring("well")) diff --git a/tests/models_fixtures/config.yaml b/tests/models_fixtures/config.yaml index 3deabf9dca27..749d1699415f 100644 --- a/tests/models_fixtures/config.yaml +++ b/tests/models_fixtures/config.yaml @@ -4,7 +4,7 @@ top_p: 80 top_k: 0.9 temperature: 0.1 - context_size: 10 + context_size: 200 stopwords: - "HUMAN:" - "### Response:" @@ -20,7 +20,7 @@ top_k: 0.9 temperature: 0.1 model: testmodel - context_size: 10 + context_size: 200 stopwords: - "HUMAN:" - "### Response:" diff --git a/tests/models_fixtures/gpt4.yaml b/tests/models_fixtures/gpt4.yaml index 77b72b30a293..652a407ca343 100644 --- a/tests/models_fixtures/gpt4.yaml +++ b/tests/models_fixtures/gpt4.yaml @@ -4,7 +4,7 @@ parameters: top_p: 80 top_k: 0.9 temperature: 0.1 -context_size: 10 +context_size: 200 stopwords: - "HUMAN:" - "### Response:" diff --git a/tests/models_fixtures/gpt4_2.yaml b/tests/models_fixtures/gpt4_2.yaml index 62d9fdbcd03c..904693ca5ed5 100644 --- a/tests/models_fixtures/gpt4_2.yaml +++ b/tests/models_fixtures/gpt4_2.yaml @@ -4,7 +4,7 @@ parameters: top_p: 80 top_k: 0.9 temperature: 0.1 -context_size: 10 +context_size: 200 stopwords: - "HUMAN:" - "### Response:"