Skip to content

Commit

Permalink
🔥 add LaVA support and GPT vision API, Multiple requests for llama.cp…
Browse files Browse the repository at this point in the history
…p, return JSON types (#1254)

* wip

* wip

* Make it functional

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* wip

* Small fixups

* do not inject space on role encoding, encode img at beginning of messages

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add examples/config defaults

* Add include dir of current source dir

* cleanup

* fixes

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fixups

* Revert "fixups"

This reverts commit f1a4731.

* fixes

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
  • Loading branch information
mudler authored Nov 11, 2023
1 parent 3b4c5d5 commit 0eae727
Show file tree
Hide file tree
Showing 28 changed files with 26,865 additions and 821 deletions.
5 changes: 4 additions & 1 deletion .env
Original file line number Diff line number Diff line change
Expand Up @@ -66,4 +66,7 @@ MODELS_PATH=/models
### Python backends GRPC max workers
### Default number of workers for GRPC Python backends.
### This actually controls wether a backend can process multiple requests or not.
# PYTHON_GRPC_MAX_WORKERS=1
# PYTHON_GRPC_MAX_WORKERS=1

### Define the number of parallel LLAMA.cpp workers (Defaults to 1)
# LLAMACPP_PARALLEL=1
3 changes: 2 additions & 1 deletion api/backend/llm.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ type TokenUsage struct {
Completion int
}

func ModelInference(ctx context.Context, s string, loader *model.ModelLoader, c config.Config, o *options.Option, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
func ModelInference(ctx context.Context, s string, images []string, loader *model.ModelLoader, c config.Config, o *options.Option, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
modelFile := c.Model

grpcOpts := gRPCModelOpts(c)
Expand Down Expand Up @@ -72,6 +72,7 @@ func ModelInference(ctx context.Context, s string, loader *model.ModelLoader, c
fn := func() (LLMResponse, error) {
opts := gRPCPredictOpts(c, loader.ModelPath)
opts.Prompt = s
opts.Images = images

tokenUsage := TokenUsage{}

Expand Down
1 change: 1 addition & 0 deletions api/backend/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ func gRPCModelOpts(c config.Config) *pb.ModelOptions {
DraftModel: c.DraftModel,
AudioPath: c.VallE.AudioPath,
Quantization: c.Quantization,
MMProj: c.MMProj,
LoraAdapter: c.LoraAdapter,
LoraBase: c.LoraBase,
NGQA: c.NGQA,
Expand Down
1 change: 1 addition & 0 deletions api/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ type LLMConfig struct {
DraftModel string `yaml:"draft_model"`
NDraft int32 `yaml:"n_draft"`
Quantization string `yaml:"quantization"`
MMProj string `yaml:"mmproj"`
}

type AutoGPTQ struct {
Expand Down
18 changes: 13 additions & 5 deletions api/openai/chat.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,10 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
noActionDescription = config.FunctionsConfig.NoActionDescriptionName
}

if input.ResponseFormat == "json_object" {
input.Grammar = grammar.JSONBNF
}

// process functions if we have any defined or if we have a function call string
if len(input.Functions) > 0 && config.ShouldUseFunctions() {
log.Debug().Msgf("Response needs to process functions")
Expand Down Expand Up @@ -140,14 +144,14 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
}
}
r := config.Roles[role]
contentExists := i.Content != nil && *i.Content != ""
contentExists := i.Content != nil && i.StringContent != ""
// First attempt to populate content via a chat message specific template
if config.TemplateConfig.ChatMessage != "" {
chatMessageData := model.ChatMessageTemplateData{
SystemPrompt: config.SystemPrompt,
Role: r,
RoleName: role,
Content: *i.Content,
Content: i.StringContent,
MessageIndex: messageIndex,
}
templatedChatMessage, err := o.Loader.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData)
Expand All @@ -166,7 +170,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
if content == "" {
if r != "" {
if contentExists {
content = fmt.Sprint(r, " ", *i.Content)
content = fmt.Sprint(r, i.StringContent)
}
if i.FunctionCall != nil {
j, err := json.Marshal(i.FunctionCall)
Expand All @@ -180,7 +184,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
}
} else {
if contentExists {
content = fmt.Sprint(*i.Content)
content = fmt.Sprint(i.StringContent)
}
if i.FunctionCall != nil {
j, err := json.Marshal(i.FunctionCall)
Expand Down Expand Up @@ -334,7 +338,11 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
// Otherwise ask the LLM to understand the JSON output and the context, and return a message
// Note: This costs (in term of CPU) another computation
config.Grammar = ""
predFunc, err := backend.ModelInference(input.Context, predInput, o.Loader, *config, o, nil)
images := []string{}
for _, m := range input.Messages {
images = append(images, m.StringImages...)
}
predFunc, err := backend.ModelInference(input.Context, predInput, images, o.Loader, *config, o, nil)
if err != nil {
log.Error().Msgf("inference error: %s", err.Error())
return
Expand Down
5 changes: 5 additions & 0 deletions api/openai/completion.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
config "github.com/go-skynet/LocalAI/api/config"
"github.com/go-skynet/LocalAI/api/options"
"github.com/go-skynet/LocalAI/api/schema"
"github.com/go-skynet/LocalAI/pkg/grammar"
model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/gofiber/fiber/v2"
"github.com/google/uuid"
Expand Down Expand Up @@ -64,6 +65,10 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
return fmt.Errorf("failed reading parameters from request:%w", err)
}

if input.ResponseFormat == "json_object" {
input.Grammar = grammar.JSONBNF
}

log.Debug().Msgf("Parameter Config: %+v", config)

if input.Stream {
Expand Down
7 changes: 6 additions & 1 deletion api/openai/inference.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,13 @@ func ComputeChoices(
n = 1
}

images := []string{}
for _, m := range req.Messages {
images = append(images, m.StringImages...)
}

// get the model function to call for the result
predFunc, err := backend.ModelInference(req.Context, predInput, loader, *config, o, tokenCallback)
predFunc, err := backend.ModelInference(req.Context, predInput, images, loader, *config, o, tokenCallback)
if err != nil {
return result, backend.TokenUsage{}, err
}
Expand Down
65 changes: 64 additions & 1 deletion api/openai/request.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@ package openai

import (
"context"
"encoding/base64"
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
"os"
"path/filepath"
"strings"
Expand All @@ -24,7 +27,7 @@ func readInput(c *fiber.Ctx, o *options.Option, randomModel bool) (string, *sche
input.Cancel = cancel
// Get input data from the request body
if err := c.BodyParser(input); err != nil {
return "", nil, err
return "", nil, fmt.Errorf("failed parsing request body: %w", err)
}

modelFile := input.Model
Expand Down Expand Up @@ -61,6 +64,37 @@ func readInput(c *fiber.Ctx, o *options.Option, randomModel bool) (string, *sche
return modelFile, input, nil
}

// this function check if the string is an URL, if it's an URL downloads the image in memory
// encodes it in base64 and returns the base64 string
func getBase64Image(s string) (string, error) {
if strings.HasPrefix(s, "http") {
// download the image
resp, err := http.Get(s)
if err != nil {
return "", err
}
defer resp.Body.Close()

// read the image data into memory
data, err := ioutil.ReadAll(resp.Body)
if err != nil {
return "", err
}

// encode the image data in base64
encoded := base64.StdEncoding.EncodeToString(data)

// return the base64 string
return encoded, nil
}

// if the string instead is prefixed with "data:image/jpeg;base64,", drop it
if strings.HasPrefix(s, "data:image/jpeg;base64,") {
return strings.ReplaceAll(s, "data:image/jpeg;base64,", ""), nil
}
return "", fmt.Errorf("not valid string")
}

func updateConfig(config *config.Config, input *schema.OpenAIRequest) {
if input.Echo {
config.Echo = input.Echo
Expand Down Expand Up @@ -129,6 +163,35 @@ func updateConfig(config *config.Config, input *schema.OpenAIRequest) {
}
}

// Decode each request's message content
index := 0
for i, m := range input.Messages {
switch content := m.Content.(type) {
case string:
input.Messages[i].StringContent = content
case []interface{}:
dat, _ := json.Marshal(content)
c := []schema.Content{}
json.Unmarshal(dat, &c)
for _, pp := range c {
if pp.Type == "text" {
input.Messages[i].StringContent = pp.Text
} else if pp.Type == "image_url" {
// Detect if pp.ImageURL is an URL, if it is download the image and encode it in base64:
base64, err := getBase64Image(pp.ImageURL.URL)
if err == nil {
input.Messages[i].StringImages = append(input.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff
// set a placeholder for each image
input.Messages[i].StringContent = fmt.Sprintf("[img-%d]", index) + input.Messages[i].StringContent
index++
} else {
fmt.Print("Failed encoding image", err)
}
}
}
}
}

if input.RepeatPenalty != 0 {
config.RepeatPenalty = input.RepeatPenalty
}
Expand Down
16 changes: 15 additions & 1 deletion api/schema/openai.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,25 @@ type Choice struct {
Text string `json:"text,omitempty"`
}

type Content struct {
Type string `json:"type" yaml:"type"`
Text string `json:"text" yaml:"text"`
ImageURL ContentURL `json:"image_url" yaml:"image_url"`
}

type ContentURL struct {
URL string `json:"url" yaml:"url"`
}

type Message struct {
// The message role
Role string `json:"role,omitempty" yaml:"role"`
// The message content
Content *string `json:"content" yaml:"content"`
Content interface{} `json:"content" yaml:"content"`

StringContent string `json:"string_content,omitempty" yaml:"string_content,omitempty"`
StringImages []string `json:"string_images,omitempty" yaml:"string_images,omitempty"`

// A result of a function call
FunctionCall interface{} `json:"function_call,omitempty" yaml:"function_call,omitempty"`
}
Expand Down
21 changes: 17 additions & 4 deletions backend/cpp/llama/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,22 @@

## XXX: In some versions of CMake clip wasn't being built before llama.
## This is an hack for now, but it should be fixed in the future.
set(TARGET myclip)
add_library(${TARGET} clip.cpp clip.h)
install(TARGETS ${TARGET} LIBRARY)
target_link_libraries(${TARGET} PRIVATE common ggml ${CMAKE_THREAD_LIBS_INIT})
target_compile_features(${TARGET} PRIVATE cxx_std_11)
if (NOT MSVC)
target_compile_options(${TARGET} PRIVATE -Wno-cast-qual) # stb_image.h
endif()

set(TARGET grpc-server)
# END CLIP hack
set(CMAKE_CXX_STANDARD 17)
cmake_minimum_required(VERSION 3.15)
set(TARGET grpc-server)
set(_PROTOBUF_LIBPROTOBUF libprotobuf)
set(_REFLECTION grpc++_reflection)

if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
link_directories("/opt/homebrew/lib")
include_directories("/opt/homebrew/include")
Expand Down Expand Up @@ -47,10 +60,10 @@ add_library(hw_grpc_proto
${hw_grpc_srcs}
${hw_grpc_hdrs}
${hw_proto_srcs}
${hw_proto_hdrs})
${hw_proto_hdrs} )

add_executable(${TARGET} grpc-server.cpp)
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
add_executable(${TARGET} grpc-server.cpp json.hpp )
target_link_libraries(${TARGET} PRIVATE common llama myclip ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
absl::flags_parse
gRPC::${_REFLECTION}
gRPC::${_GRPC_GRPCPP}
Expand Down
8 changes: 7 additions & 1 deletion backend/cpp/llama/Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

LLAMA_VERSION?=
LLAMA_VERSION?=d9b33fe95bd257b36c84ee5769cc048230067d6f

CMAKE_ARGS?=
BUILD_TYPE?=
Expand Down Expand Up @@ -27,11 +27,17 @@ llama.cpp/examples/grpc-server:
mkdir -p llama.cpp/examples/grpc-server
cp -r $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
cp -r $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
cp -rfv $(abspath ./)/json.hpp llama.cpp/examples/grpc-server/
echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt
## XXX: In some versions of CMake clip wasn't being built before llama.
## This is an hack for now, but it should be fixed in the future.
cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h
cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp

rebuild:
cp -rfv $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
cp -rfv $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
cp -rfv $(abspath ./)/json.hpp llama.cpp/examples/grpc-server/
rm -rf grpc-server
$(MAKE) grpc-server

Expand Down
Loading

0 comments on commit 0eae727

Please sign in to comment.