Skip to content

Commit

Permalink
Merge branch 'master' into update/WHISPER_CPP_VERSION
Browse files Browse the repository at this point in the history
  • Loading branch information
mudler authored Sep 26, 2024
2 parents 416155e + fa5c985 commit 9528755
Show file tree
Hide file tree
Showing 12 changed files with 169 additions and 45 deletions.
11 changes: 10 additions & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -178,13 +178,22 @@ jobs:
uses: actions/checkout@v4
with:
submodules: true
- name: Dependencies
run: |
# Install protoc
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
rm protoc.zip
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
PATH="$PATH:$HOME/go/bin" make protogen-go
- name: Build images
run: |
docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=extras --build-arg EXTRA_BACKENDS=rerankers --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
- name: Test
run: |
LOCALAI_MODELS_DIR=$PWD/models LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio \
PATH="$PATH:$HOME/go/bin" LOCALAI_MODELS_DIR=$PWD/models LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio \
make run-e2e-aio
- name: Setup tmate session if tests fail
if: ${{ failure() }}
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,7 @@ run-e2e-image:
ls -liah $(abspath ./tests/e2e-fixtures)
docker run -p 5390:8080 -e MODELS_PATH=/models -e THREADS=1 -e DEBUG=true -d --rm -v $(TEST_DIR):/models --gpus all --name e2e-tests-$(RANDOM) localai-tests

run-e2e-aio:
run-e2e-aio: protogen-go
@echo 'Running e2e AIO tests'
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e-aio

Expand Down
2 changes: 1 addition & 1 deletion docs/data/version.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{
"version": "v2.21.0"
"version": "v2.21.1"
}
60 changes: 60 additions & 0 deletions gallery/index.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,64 @@
---
## llama3.2
- &llama32
url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
license: llama3.2
description: |
The Meta Llama 3.2 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned generative models in 1B and 3B sizes (text in/text out). The Llama 3.2 instruction-tuned text only models are optimized for multilingual dialogue use cases, including agentic retrieval and summarization tasks. They outperform many of the available open source and closed chat models on common industry benchmarks.
Model Developer: Meta
Model Architecture: Llama 3.2 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.
tags:
- llm
- gguf
- gpu
- cpu
- llama3.2
name: "llama-3.2-1b-instruct:q4_k_m"
urls:
- https://huggingface.co/hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF
overrides:
parameters:
model: llama-3.2-1b-instruct-q4_k_m.gguf
files:
- filename: llama-3.2-1b-instruct-q4_k_m.gguf
sha256: 1d0e9419ec4e12aef73ccf4ffd122703e94c48344a96bc7c5f0f2772c2152ce3
uri: huggingface://hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF/llama-3.2-1b-instruct-q4_k_m.gguf
- !!merge <<: *llama32
name: "llama-3.2-3b-instruct:q4_k_m"
urls:
- https://huggingface.co/hugging-quants/Llama-3.2-3B-Instruct-Q4_K_M-GGUF
overrides:
parameters:
model: llama-3.2-3b-instruct-q4_k_m.gguf
files:
- filename: llama-3.2-3b-instruct-q4_k_m.gguf
sha256: c55a83bfb6396799337853ca69918a0b9bbb2917621078c34570bc17d20fd7a1
uri: huggingface://hugging-quants/Llama-3.2-3B-Instruct-Q4_K_M-GGUF/llama-3.2-3b-instruct-q4_k_m.gguf
- !!merge <<: *llama32
name: "llama-3.2-3b-instruct:q8_0"
urls:
- https://huggingface.co/hugging-quants/Llama-3.2-3B-Instruct-Q8_0-GGUF
overrides:
parameters:
model: llama-3.2-3b-instruct-q8_0.gguf
files:
- filename: llama-3.2-3b-instruct-q8_0.gguf
sha256: 51725f77f997a5080c3d8dd66e073da22ddf48ab5264f21f05ded9b202c3680e
uri: huggingface://hugging-quants/Llama-3.2-3B-Instruct-Q8_0-GGUF/llama-3.2-3b-instruct-q8_0.gguf
- !!merge <<: *llama32
name: "llama-3.2-1b-instruct:q8_0"
urls:
- https://huggingface.co/hugging-quants/Llama-3.2-1B-Instruct-Q8_0-GGUF
overrides:
parameters:
model: llama-3.2-1b-instruct-q8_0.gguf
files:
- filename: llama-3.2-1b-instruct-q8_0.gguf
sha256: ba345c83bf5cc679c653b853c46517eea5a34f03ed2205449db77184d9ae62a9
uri: huggingface://hugging-quants/Llama-3.2-1B-Instruct-Q8_0-GGUF/llama-3.2-1b-instruct-q8_0.gguf
## Qwen2.5
- &qwen25
name: "qwen2.5-14b-instruct"
Expand Down
15 changes: 10 additions & 5 deletions pkg/model/initializers.go
Original file line number Diff line number Diff line change
Expand Up @@ -304,18 +304,19 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
return nil, fmt.Errorf("failed allocating free ports: %s", err.Error())
}
// Make sure the process is executable
if err := ml.startProcess(uri, o.model, serverAddress); err != nil {
process, err := ml.startProcess(uri, o.model, serverAddress)
if err != nil {
log.Error().Err(err).Str("path", uri).Msg("failed to launch ")
return nil, err
}

log.Debug().Msgf("GRPC Service Started")

client = NewModel(modelName, serverAddress)
client = NewModel(modelName, serverAddress, process)
} else {
log.Debug().Msg("external backend is uri")
// address
client = NewModel(modelName, uri)
client = NewModel(modelName, uri, nil)
}
} else {
grpcProcess := backendPath(o.assetDir, backend)
Expand Down Expand Up @@ -346,13 +347,14 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
args, grpcProcess = library.LoadLDSO(o.assetDir, args, grpcProcess)

// Make sure the process is executable in any circumstance
if err := ml.startProcess(grpcProcess, o.model, serverAddress, args...); err != nil {
process, err := ml.startProcess(grpcProcess, o.model, serverAddress, args...)
if err != nil {
return nil, err
}

log.Debug().Msgf("GRPC Service Started")

client = NewModel(modelName, serverAddress)
client = NewModel(modelName, serverAddress, process)
}

log.Debug().Msgf("Wait for the service to start up")
Expand All @@ -374,6 +376,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string

if !ready {
log.Debug().Msgf("GRPC Service NOT ready")
ml.deleteProcess(o.model)
return nil, fmt.Errorf("grpc service not ready")
}

Expand All @@ -385,9 +388,11 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string

res, err := client.GRPC(o.parallelRequests, ml.wd).LoadModel(o.context, &options)
if err != nil {
ml.deleteProcess(o.model)
return nil, fmt.Errorf("could not load model: %w", err)
}
if !res.Success {
ml.deleteProcess(o.model)
return nil, fmt.Errorf("could not load model (no success): %s", res.Message)
}

Expand Down
32 changes: 14 additions & 18 deletions pkg/model/loader.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,28 +13,25 @@ import (

"github.com/mudler/LocalAI/pkg/utils"

process "github.com/mudler/go-processmanager"
"github.com/rs/zerolog/log"
)

// new idea: what if we declare a struct of these here, and use a loop to check?

// TODO: Split ModelLoader and TemplateLoader? Just to keep things more organized. Left together to share a mutex until I look into that. Would split if we seperate directories for .bin/.yaml and .tmpl
type ModelLoader struct {
ModelPath string
mu sync.Mutex
models map[string]*Model
grpcProcesses map[string]*process.Process
templates *templates.TemplateCache
wd *WatchDog
ModelPath string
mu sync.Mutex
models map[string]*Model
templates *templates.TemplateCache
wd *WatchDog
}

func NewModelLoader(modelPath string) *ModelLoader {
nml := &ModelLoader{
ModelPath: modelPath,
models: make(map[string]*Model),
templates: templates.NewTemplateCache(modelPath),
grpcProcesses: make(map[string]*process.Process),
ModelPath: modelPath,
models: make(map[string]*Model),
templates: templates.NewTemplateCache(modelPath),
}

return nml
Expand Down Expand Up @@ -127,6 +124,8 @@ func (ml *ModelLoader) LoadModel(modelName string, loader func(string, string) (
modelFile := filepath.Join(ml.ModelPath, modelName)
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)

ml.mu.Lock()
defer ml.mu.Unlock()
model, err := loader(modelName, modelFile)
if err != nil {
return nil, err
Expand All @@ -136,8 +135,6 @@ func (ml *ModelLoader) LoadModel(modelName string, loader func(string, string) (
return nil, fmt.Errorf("loader didn't return a model")
}

ml.mu.Lock()
defer ml.mu.Unlock()
ml.models[modelName] = model

return model, nil
Expand All @@ -146,14 +143,13 @@ func (ml *ModelLoader) LoadModel(modelName string, loader func(string, string) (
func (ml *ModelLoader) ShutdownModel(modelName string) error {
ml.mu.Lock()
defer ml.mu.Unlock()

_, ok := ml.models[modelName]
model, ok := ml.models[modelName]
if !ok {
return fmt.Errorf("model %s not found", modelName)
}

retries := 1
for ml.models[modelName].GRPC(false, ml.wd).IsBusy() {
for model.GRPC(false, ml.wd).IsBusy() {
log.Debug().Msgf("%s busy. Waiting.", modelName)
dur := time.Duration(retries*2) * time.Second
if dur > retryTimeout {
Expand Down Expand Up @@ -185,8 +181,8 @@ func (ml *ModelLoader) CheckIsLoaded(s string) *Model {
if !alive {
log.Warn().Msgf("GRPC Model not responding: %s", err.Error())
log.Warn().Msgf("Deleting the process in order to recreate it")
process, exists := ml.grpcProcesses[s]
if !exists {
process := m.Process()
if process == nil {
log.Error().Msgf("Process not found for '%s' and the model is not responding anymore !", s)
return m
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/model/loader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ var _ = Describe("ModelLoader", func() {

Context("LoadModel", func() {
It("should load a model and keep it in memory", func() {
mockModel = model.NewModel("foo", "test.model")
mockModel = model.NewModel("foo", "test.model", nil)

mockLoader := func(modelName, modelFile string) (*model.Model, error) {
return mockModel, nil
Expand All @@ -88,7 +88,7 @@ var _ = Describe("ModelLoader", func() {

Context("ShutdownModel", func() {
It("should shutdown a loaded model", func() {
mockModel = model.NewModel("foo", "test.model")
mockModel = model.NewModel("foo", "test.model", nil)

mockLoader := func(modelName, modelFile string) (*model.Model, error) {
return mockModel, nil
Expand Down
18 changes: 16 additions & 2 deletions pkg/model/model.go
Original file line number Diff line number Diff line change
@@ -1,20 +1,32 @@
package model

import grpc "github.com/mudler/LocalAI/pkg/grpc"
import (
"sync"

grpc "github.com/mudler/LocalAI/pkg/grpc"
process "github.com/mudler/go-processmanager"
)

type Model struct {
ID string `json:"id"`
address string
client grpc.Backend
process *process.Process
sync.Mutex
}

func NewModel(ID, address string) *Model {
func NewModel(ID, address string, process *process.Process) *Model {
return &Model{
ID: ID,
address: address,
process: process,
}
}

func (m *Model) Process() *process.Process {
return m.process
}

func (m *Model) GRPC(parallel bool, wd *WatchDog) grpc.Backend {
if m.client != nil {
return m.client
Expand All @@ -25,6 +37,8 @@ func (m *Model) GRPC(parallel bool, wd *WatchDog) grpc.Backend {
enableWD = true
}

m.Lock()
defer m.Unlock()
m.client = grpc.NewClient(m.address, parallel, wd, enableWD)
return m.client
}
33 changes: 18 additions & 15 deletions pkg/model/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,22 @@ import (
)

func (ml *ModelLoader) deleteProcess(s string) error {
if _, exists := ml.grpcProcesses[s]; exists {
if err := ml.grpcProcesses[s].Stop(); err != nil {
log.Error().Err(err).Msgf("(deleteProcess) error while deleting grpc process %s", s)
if m, exists := ml.models[s]; exists {
process := m.Process()
if process != nil {
if err := process.Stop(); err != nil {
log.Error().Err(err).Msgf("(deleteProcess) error while deleting process %s", s)
}
}
}
delete(ml.grpcProcesses, s)
delete(ml.models, s)
return nil
}

func (ml *ModelLoader) StopGRPC(filter GRPCProcessFilter) error {
var err error = nil
for k, p := range ml.grpcProcesses {
if filter(k, p) {
for k, m := range ml.models {
if filter(k, m.Process()) {
e := ml.ShutdownModel(k)
err = errors.Join(err, e)
}
Expand All @@ -44,17 +46,20 @@ func (ml *ModelLoader) StopAllGRPC() error {
func (ml *ModelLoader) GetGRPCPID(id string) (int, error) {
ml.mu.Lock()
defer ml.mu.Unlock()
p, exists := ml.grpcProcesses[id]
p, exists := ml.models[id]
if !exists {
return -1, fmt.Errorf("no grpc backend found for %s", id)
}
return strconv.Atoi(p.PID)
if p.Process() == nil {
return -1, fmt.Errorf("no grpc backend found for %s", id)
}
return strconv.Atoi(p.Process().PID)
}

func (ml *ModelLoader) startProcess(grpcProcess, id string, serverAddress string, args ...string) error {
func (ml *ModelLoader) startProcess(grpcProcess, id string, serverAddress string, args ...string) (*process.Process, error) {
// Make sure the process is executable
if err := os.Chmod(grpcProcess, 0700); err != nil {
return err
return nil, err
}

log.Debug().Msgf("Loading GRPC Process: %s", grpcProcess)
Expand All @@ -63,7 +68,7 @@ func (ml *ModelLoader) startProcess(grpcProcess, id string, serverAddress string

workDir, err := filepath.Abs(filepath.Dir(grpcProcess))
if err != nil {
return err
return nil, err
}

grpcControlProcess := process.New(
Expand All @@ -79,10 +84,8 @@ func (ml *ModelLoader) startProcess(grpcProcess, id string, serverAddress string
ml.wd.AddAddressModelMap(serverAddress, id)
}

ml.grpcProcesses[id] = grpcControlProcess

if err := grpcControlProcess.Run(); err != nil {
return err
return grpcControlProcess, err
}

log.Debug().Msgf("GRPC Service state dir: %s", grpcControlProcess.StateDir())
Expand Down Expand Up @@ -116,5 +119,5 @@ func (ml *ModelLoader) startProcess(grpcProcess, id string, serverAddress string
}
}()

return nil
return grpcControlProcess, nil
}
Loading

0 comments on commit 9528755

Please sign in to comment.