diff --git a/pkg/bridge/ai/api_server.go b/pkg/bridge/ai/api_server.go
index 696734feb..db110f0a6 100644
--- a/pkg/bridge/ai/api_server.go
+++ b/pkg/bridge/ai/api_server.go
@@ -35,7 +35,6 @@ type BasicAPIServer struct {
 	zipperAddr  string
 	credential  string
 	httpHandler http.Handler
-	logger      *slog.Logger
 }
 
 // Serve starts the Basic API Server
@@ -44,19 +43,20 @@ func Serve(config *Config, zipperListenAddr string, credential string, logger *s
 	if err != nil {
 		return err
 	}
-	srv, err := NewBasicAPIServer(config, zipperListenAddr, provider, credential, logger)
+	srv, err := NewBasicAPIServer(config, zipperListenAddr, credential, provider, logger)
 	if err != nil {
 		return err
 	}
 
 	logger.Info("start AI Bridge service", "addr", config.Server.Addr, "provider", provider.Name())
-	return srv.ServeAddr(config.Server.Addr)
+	return http.ListenAndServe(config.Server.Addr, srv.httpHandler)
 }
 
-func BridgeHTTPHanlder(provider provider.LLMProvider, decorater func(http.Handler) http.Handler) http.Handler {
+// NewServeMux creates a new http.ServeMux for the llm bridge server.
+func NewServeMux(service *Service) *http.ServeMux {
 	var (
+		h   = &Handler{service}
 		mux = http.NewServeMux()
-		h   = NewHandler(provider)
 	)
 	// GET /overview
 	mux.HandleFunc("/overview", h.HandleOverview)
@@ -65,57 +65,59 @@ func BridgeHTTPHanlder(provider provider.LLMProvider, decorater func(http.Handle
 	// POST /v1/chat/completions (OpenAI compatible interface)
 	mux.HandleFunc("/v1/chat/completions", h.HandleChatCompletions)
 
-	return decorater(mux)
+	return mux
+}
+
+// DecorateHandler decorates the http.Handler.
+func DecorateHandler(h http.Handler, decorates ...func(handler http.Handler) http.Handler) http.Handler {
+	// decorate the http.Handler
+	for i := len(decorates) - 1; i >= 0; i-- {
+		h = decorates[i](h)
+	}
+	return h
 }
 
 // NewBasicAPIServer creates a new restful service
-func NewBasicAPIServer(config *Config, zipperAddr string, provider provider.LLMProvider, credential string, logger *slog.Logger) (*BasicAPIServer, error) {
+func NewBasicAPIServer(config *Config, zipperAddr, credential string, provider provider.LLMProvider, logger *slog.Logger) (*BasicAPIServer, error) {
 	zipperAddr = parseZipperAddr(zipperAddr)
 
-	cp := NewCallerProvider(zipperAddr, DefaultExchangeMetadataFunc)
+	logger = logger.With("component", "bridge")
+
+	service := NewService(zipperAddr, provider, &ServiceOptions{
+		Logger:         logger,
+		Tracer:         otel.Tracer("yomo-llm-bridge"),
+		CredentialFunc: func(r *http.Request) (string, error) { return credential, nil },
+	})
+
+	mux := NewServeMux(service)
 
 	server := &BasicAPIServer{
 		zipperAddr:  zipperAddr,
 		credential:  credential,
-		httpHandler: BridgeHTTPHanlder(provider, decorateReqContext(cp, logger, credential)),
-		logger:      logger.With("component", "bridge"),
+		httpHandler: DecorateHandler(mux, decorateReqContext(service, logger)),
 	}
 
 	return server, nil
 }
 
-// ServeAddr starts a http server that provides some endpoints to bridge up the http server and YoMo.
-// User can chat to the http server and interact with the YoMo's stream function.
-func (a *BasicAPIServer) ServeAddr(addr string) error {
-	return http.ListenAndServe(addr, a.httpHandler)
-}
-
-// decorateReqContext decorates the context of the request, it injects a transID and a caller into the context.
-func decorateReqContext(cp CallerProvider, logger *slog.Logger, credential string) func(handler http.Handler) http.Handler {
-	tracer := otel.Tracer("yomo-llm-bridge")
-
-	caller, err := cp.Provide(credential)
-	if err != nil {
-		logger.Info("can't load caller", "err", err)
-
-		return func(handler http.Handler) http.Handler {
-			return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-				w.Header().Set("Content-Type", "application/json")
-				RespondWithError(w, http.StatusInternalServerError, err)
-			})
-		}
-	}
-
-	caller.SetTracer(tracer)
-
+// decorateReqContext decorates the context of the request, it injects a transID into the request's context,
+// log the request information and start tracing the request.
+func decorateReqContext(service *Service, logger *slog.Logger) func(handler http.Handler) http.Handler {
 	host, _ := os.Hostname()
 
 	return func(handler http.Handler) http.Handler {
 		return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 			ctx := r.Context()
 
+			caller, err := service.LoadOrCreateCaller(r)
+			if err != nil {
+				RespondWithError(w, http.StatusBadRequest, err)
+				return
+			}
+			ctx = WithCallerContext(ctx, caller)
+
 			// trace every request
-			ctx, span := tracer.Start(
+			ctx, span := service.option.Tracer.Start(
 				ctx,
 				r.URL.Path,
 				trace.WithSpanKind(trace.SpanKindServer),
@@ -125,7 +127,6 @@ func decorateReqContext(cp CallerProvider, logger *slog.Logger, credential strin
 
 			transID := id.New(32)
 			ctx = WithTransIDContext(ctx, transID)
-			ctx = WithCallerContext(ctx, caller)
 
 			logger.Info("request", "method", r.Method, "path", r.URL.Path, "transID", transID)
 
@@ -136,24 +137,16 @@ func decorateReqContext(cp CallerProvider, logger *slog.Logger, credential strin
 
 // Handler handles the http request.
 type Handler struct {
-	provider provider.LLMProvider
-}
-
-// NewHandler returns a new Handler.
-func NewHandler(provider provider.LLMProvider) *Handler {
-	return &Handler{provider}
+	service *Service
 }
 
 // HandleOverview is the handler for GET /overview
 func (h *Handler) HandleOverview(w http.ResponseWriter, r *http.Request) {
-	caller := FromCallerContext(r.Context())
-
 	w.Header().Set("Content-Type", "application/json")
 
-	tcs, err := register.ListToolCalls(caller.Metadata())
+	tcs, err := register.ListToolCalls(FromCallerContext(r.Context()).Metadata())
 	if err != nil {
-		w.WriteHeader(http.StatusInternalServerError)
-		json.NewEncoder(w).Encode(map[string]string{"error": err.Error()})
+		RespondWithError(w, http.StatusInternalServerError, err)
 		return
 	}
 
@@ -172,7 +165,6 @@ var baseSystemMessage = `You are a very helpful assistant. Your job is to choose
 func (h *Handler) HandleInvoke(w http.ResponseWriter, r *http.Request) {
 	var (
 		ctx     = r.Context()
-		caller  = FromCallerContext(ctx)
 		transID = FromTransIDContext(ctx)
 	)
 	defer r.Body.Close()
@@ -185,14 +177,14 @@ func (h *Handler) HandleInvoke(w http.ResponseWriter, r *http.Request) {
 	ctx, cancel := context.WithTimeout(r.Context(), RequestTimeout)
 	defer cancel()
 
-	res, err := GetInvoke(ctx, req.Prompt, baseSystemMessage, transID, req.IncludeCallStack, caller, h.provider)
+	w.Header().Set("Content-Type", "application/json")
+
+	res, err := h.service.GetInvoke(ctx, req.Prompt, baseSystemMessage, transID, FromCallerContext(ctx), req.IncludeCallStack)
 	if err != nil {
-		w.Header().Set("Content-Type", "application/json")
 		RespondWithError(w, http.StatusInternalServerError, err)
 		return
 	}
 
-	w.Header().Set("Content-Type", "application/json")
 	w.WriteHeader(http.StatusOK)
 	_ = json.NewEncoder(w).Encode(res)
 }
@@ -201,7 +193,6 @@ func (h *Handler) HandleInvoke(w http.ResponseWriter, r *http.Request) {
 func (h *Handler) HandleChatCompletions(w http.ResponseWriter, r *http.Request) {
 	var (
 		ctx     = r.Context()
-		caller  = FromCallerContext(ctx)
 		transID = FromTransIDContext(ctx)
 	)
 	defer r.Body.Close()
@@ -214,7 +205,7 @@ func (h *Handler) HandleChatCompletions(w http.ResponseWriter, r *http.Request)
 	ctx, cancel := context.WithTimeout(r.Context(), RequestTimeout)
 	defer cancel()
 
-	if err := GetChatCompletions(ctx, req, transID, h.provider, caller, w); err != nil {
+	if err := h.service.GetChatCompletions(ctx, req, transID, FromCallerContext(ctx), w); err != nil {
 		RespondWithError(w, http.StatusBadRequest, err)
 		return
 	}
@@ -258,17 +249,17 @@ func getLocalIP() (string, error) {
 type callerContextKey struct{}
 
 // WithCallerContext adds the caller to the request context
-func WithCallerContext(ctx context.Context, caller Caller) context.Context {
+func WithCallerContext(ctx context.Context, caller *Caller) context.Context {
 	return context.WithValue(ctx, callerContextKey{}, caller)
 }
 
 // FromCallerContext returns the caller from the request context
-func FromCallerContext(ctx context.Context) Caller {
-	service, ok := ctx.Value(callerContextKey{}).(Caller)
+func FromCallerContext(ctx context.Context) *Caller {
+	caller, ok := ctx.Value(callerContextKey{}).(*Caller)
 	if !ok {
 		return nil
 	}
-	return service
+	return caller
 }
 
 type transIDContextKey struct{}
diff --git a/pkg/bridge/ai/api_server_test.go b/pkg/bridge/ai/api_server_test.go
index 5242c6161..235d27c63 100644
--- a/pkg/bridge/ai/api_server_test.go
+++ b/pkg/bridge/ai/api_server_test.go
@@ -4,13 +4,15 @@ import (
 	"bytes"
 	"fmt"
 	"io"
-	"log/slog"
 	"net/http"
 	"net/http/httptest"
 	"testing"
+	"time"
 
 	"github.com/stretchr/testify/assert"
+	"github.com/yomorun/yomo"
 	"github.com/yomorun/yomo/ai"
+	"github.com/yomorun/yomo/core/metadata"
 	"github.com/yomorun/yomo/pkg/bridge/ai/provider"
 	"github.com/yomorun/yomo/pkg/bridge/ai/register"
 )
@@ -38,11 +40,19 @@ func TestServer(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	cp := newMockCallerProvider()
+	flow := newMockDataFlow(newHandler(2 * time.Hour).handle)
 
-	cp.provideFunc = mockCallerProvideFunc(map[uint32][]mockFunctionCall{})
+	newCaller := func(_ yomo.Source, _ yomo.StreamFunction, _ metadata.M, _ time.Duration) (*Caller, error) {
+		return mockCaller(nil), err
+	}
+
+	service := newService("fake_zipper_addr", pd, newCaller, &ServiceOptions{
+		SourceBuilder:     func(_, _ string) yomo.Source { return flow },
+		ReducerBuilder:    func(_, _ string) yomo.StreamFunction { return flow },
+		MetadataExchanger: func(_ string) (metadata.M, error) { return metadata.M{"hello": "llm bridge"}, nil },
+	})
 
-	handler := BridgeHTTPHanlder(pd, decorateReqContext(cp, slog.Default(), ""))
+	handler := DecorateHandler(NewServeMux(service), decorateReqContext(service, service.logger))
 
 	// create a test server
 	server := httptest.NewServer(handler)
diff --git a/pkg/bridge/ai/call_syncer.go b/pkg/bridge/ai/call_syncer.go
index 048b4e7a0..594fa9f85 100644
--- a/pkg/bridge/ai/call_syncer.go
+++ b/pkg/bridge/ai/call_syncer.go
@@ -6,9 +6,7 @@ import (
 	"time"
 
 	openai "github.com/sashabaranov/go-openai"
-	"github.com/yomorun/yomo"
 	"github.com/yomorun/yomo/ai"
-	"github.com/yomorun/yomo/serverless"
 )
 
 // CallSyncer fires a bunch of function callings, and wait the result of these function callings.
@@ -223,39 +221,3 @@ func (f *callSyncer) background() {
 		}
 	}
 }
-
-// ToReducer converts a stream function to a reducer that can reduce the function calling result.
-func ToReducer(sfn yomo.StreamFunction, logger *slog.Logger, ch chan ReduceMessage) {
-	// set observe data tags
-	sfn.SetObserveDataTags(ai.ReducerTag)
-	// set reduce handler
-	sfn.SetHandler(func(ctx serverless.Context) {
-		invoke, err := ctx.LLMFunctionCall()
-		if err != nil {
-			ch <- ReduceMessage{ReqID: ""}
-			logger.Error("parse function calling invoke", "err", err.Error())
-			return
-		}
-		logger.Debug("sfn-reducer", "req_id", invoke.ReqID, "tool_call_id", invoke.ToolCallID, "result", string(invoke.Result))
-
-		message := openai.ChatCompletionMessage{
-			Role:       openai.ChatMessageRoleTool,
-			Content:    invoke.Result,
-			ToolCallID: invoke.ToolCallID,
-		}
-
-		ch <- ReduceMessage{ReqID: invoke.ReqID, Message: message}
-	})
-}
-
-// ToSource convert a yomo source to the source that can send function calling body to the llm function.
-func ToSource(source yomo.Source, logger *slog.Logger, ch chan TagFunctionCall) {
-	go func() {
-		for c := range ch {
-			buf, _ := c.FunctionCall.Bytes()
-			if err := source.Write(c.Tag, buf); err != nil {
-				logger.Error("send data to zipper", "err", err.Error())
-			}
-		}
-	}()
-}
diff --git a/pkg/bridge/ai/call_syncer_test.go b/pkg/bridge/ai/call_syncer_test.go
index 1eb6e525b..31884aafb 100644
--- a/pkg/bridge/ai/call_syncer_test.go
+++ b/pkg/bridge/ai/call_syncer_test.go
@@ -27,13 +27,10 @@ func TestTimeoutCallSyncer(t *testing.T) {
 	flow := newMockDataFlow(h.handle)
 	defer flow.Close()
 
-	reqs := make(chan TagFunctionCall)
-	ToSource(flow, slog.Default(), reqs)
+	req, _ := sourceWriteToChan(flow, slog.Default())
+	res, _ := reduceToChan(flow, slog.Default())
 
-	messages := make(chan ReduceMessage)
-	ToReducer(flow, slog.Default(), messages)
-
-	syncer := NewCallSyncer(slog.Default(), reqs, messages, time.Millisecond)
+	syncer := NewCallSyncer(slog.Default(), req, res, time.Millisecond)
 	go flow.run()
 
 	var (
@@ -61,13 +58,10 @@ func TestCallSyncer(t *testing.T) {
 	flow := newMockDataFlow(h.handle)
 	defer flow.Close()
 
-	reqs := make(chan TagFunctionCall)
-	ToSource(flow, slog.Default(), reqs)
-
-	messages := make(chan ReduceMessage)
-	ToReducer(flow, slog.Default(), messages)
+	req, _ := sourceWriteToChan(flow, slog.Default())
+	res, _ := reduceToChan(flow, slog.Default())
 
-	syncer := NewCallSyncer(slog.Default(), reqs, messages, 0)
+	syncer := NewCallSyncer(slog.Default(), req, res, 0)
 	go flow.run()
 
 	var (
@@ -118,7 +112,7 @@ func (h *handler) result() []openai.ChatCompletionMessage {
 	return want
 }
 
-// mockDataFlow mocks the data flow of ai bridge.
+// mockDataFlow mocks the data flow of llm bridge.
 // The data flow is: source -> hander -> reducer,
 // It is `Write() -> handler() -> reducer()` in this mock implementation.
 type mockDataFlow struct {
@@ -160,11 +154,11 @@ var _ yomo.StreamFunction = (*mockDataFlow)(nil)
 
 // The test will not use blowing function in this mock implementation.
 func (t *mockDataFlow) SetObserveDataTags(tag ...uint32)                      {}
+func (t *mockDataFlow) Connect() error                                        { return nil }
 func (t *mockDataFlow) Init(fn func() error) error                            { panic("unimplemented") }
 func (t *mockDataFlow) SetCronHandler(spec string, fn core.CronHandler) error { panic("unimplemented") }
 func (t *mockDataFlow) SetPipeHandler(fn core.PipeHandler) error              { panic("unimplemented") }
 func (t *mockDataFlow) SetWantedTarget(string)                                { panic("unimplemented") }
 func (t *mockDataFlow) Wait()                                                 { panic("unimplemented") }
-func (t *mockDataFlow) Connect() error                                        { panic("unimplemented") }
 func (t *mockDataFlow) SetErrorHandler(fn func(err error))                    { panic("unimplemented") }
 func (t *mockDataFlow) WriteWithTarget(_ uint32, _ []byte, _ string) error    { panic("unimplemented") }
diff --git a/pkg/bridge/ai/caller.go b/pkg/bridge/ai/caller.go
index 33c4b69a7..44c5b1256 100644
--- a/pkg/bridge/ai/caller.go
+++ b/pkg/bridge/ai/caller.go
@@ -1,134 +1,46 @@
 package ai
 
 import (
-	"context"
-	"encoding/json"
-	"fmt"
-	"io"
 	"log/slog"
-	"net/http"
-	"strings"
 	"sync/atomic"
 	"time"
 
-	"github.com/hashicorp/golang-lru/v2/expirable"
 	openai "github.com/sashabaranov/go-openai"
 	"github.com/yomorun/yomo"
 	"github.com/yomorun/yomo/ai"
+	"github.com/yomorun/yomo/core"
 	"github.com/yomorun/yomo/core/metadata"
 	"github.com/yomorun/yomo/core/ylog"
-	"github.com/yomorun/yomo/pkg/bridge/ai/provider"
-	"github.com/yomorun/yomo/pkg/bridge/ai/register"
-	"github.com/yomorun/yomo/pkg/id"
-	"go.opentelemetry.io/otel/trace"
-	"go.opentelemetry.io/otel/trace/noop"
+	"github.com/yomorun/yomo/serverless"
 )
 
-var (
-	// CallerProviderCacheSize is the size of the caller provider cache
-	CallerProviderCacheSize = 1024
-	// CallerProviderCacheTTL is the time to live of the provider cache
-	CallerProviderCacheTTL = time.Minute * 0
-)
-
-// CallerProvider provides the caller, which is used to interact with YoMo's stream function.
-type CallerProvider interface {
-	Provide(credential string) (Caller, error)
-}
-
-type callerProvider struct {
-	zipperAddr  string
-	exFn        ExchangeMetadataFunc
-	provideFunc provideFunc
-	callers     *expirable.LRU[string, Caller]
-}
-
-type provideFunc func(string, string, ExchangeMetadataFunc) (Caller, error)
-
-// NewCallerProvider returns a new caller provider.
-func NewCallerProvider(zipperAddr string, exFn ExchangeMetadataFunc) CallerProvider {
-	return newCallerProvider(zipperAddr, exFn, NewCaller)
-}
-
-func newCallerProvider(zipperAddr string, exFn ExchangeMetadataFunc, provideFunc provideFunc) CallerProvider {
-	p := &callerProvider{
-		zipperAddr:  zipperAddr,
-		exFn:        exFn,
-		provideFunc: provideFunc,
-		callers:     expirable.NewLRU(CallerProviderCacheSize, func(_ string, caller Caller) { caller.Close() }, CallerProviderCacheTTL),
-	}
-
-	return p
-}
-
-// Provide provides the caller according to the credential.
-func (p *callerProvider) Provide(credential string) (Caller, error) {
-	caller, ok := p.callers.Get(credential)
-	if ok {
-		return caller, nil
-	}
-
-	caller, err := p.provideFunc(credential, p.zipperAddr, p.exFn)
-	if err != nil {
-		return nil, err
-	}
-	p.callers.Add(credential, caller)
-
-	return caller, nil
-}
-
 // Caller calls the invoke function and keeps the metadata and system prompt.
-type Caller interface {
-	// Call calls the invoke function.
-	CallSyncer
-	// SetSystemPrompt sets the system prompt of the caller.
-	SetSystemPrompt(string)
-	// GetSystemPrompt returns the system prompt of the caller.
-	GetSystemPrompt() string
-	// SetTracer sets the tracer of the caller.
-	SetTracer(trace.Tracer)
-	// GetTracer returns the tracer of the caller.
-	GetTracer() trace.Tracer
-	// Metadata returns the metadata of the caller.
-	Metadata() metadata.M
-	// Close closes the caller, if the caller is closed, the caller will not be reused.
-	Close() error
-}
-
-type caller struct {
+type Caller struct {
 	CallSyncer
-	source  yomo.Source
-	reducer yomo.StreamFunction
-
-	tracer       atomic.Value
-	credential   string
+	source       yomo.Source
+	reducer      yomo.StreamFunction
 	md           metadata.M
 	systemPrompt atomic.Value
 	logger       *slog.Logger
 }
 
 // NewCaller returns a new caller.
-func NewCaller(credential string, zipperAddr string, exFn ExchangeMetadataFunc) (Caller, error) {
+func NewCaller(source yomo.Source, reducer yomo.StreamFunction, md metadata.M, callTimeout time.Duration) (*Caller, error) {
 	logger := ylog.Default()
 
-	source, reqCh, err := ChanToSource(zipperAddr, credential, logger)
+	reqCh, err := sourceWriteToChan(source, logger)
 	if err != nil {
 		return nil, err
 	}
 
-	reducer, resCh, err := ReduceToChan(zipperAddr, credential, logger)
+	resCh, err := reduceToChan(reducer, logger)
 	if err != nil {
 		return nil, err
 	}
 
-	callSyncer := NewCallSyncer(logger, reqCh, resCh, 60*time.Second)
+	callSyncer := NewCallSyncer(logger, reqCh, resCh, callTimeout)
 
-	md, err := exFn(credential)
-	if err != nil {
-		return nil, err
-	}
-
-	caller := &caller{
+	caller := &Caller{
 		CallSyncer: callSyncer,
 		source:     source,
 		reducer:    reducer,
@@ -136,59 +48,73 @@ func NewCaller(credential string, zipperAddr string, exFn ExchangeMetadataFunc)
 		logger:     logger,
 	}
 
-	caller.SetSystemPrompt("")
-
 	return caller, nil
 }
 
-// ChanToSource creates a yomo source and a channel,
-// The ai.FunctionCall objects are continuously be received from the channel and be sent by the source.
-func ChanToSource(zipperAddr, credential string, logger *slog.Logger) (yomo.Source, chan<- TagFunctionCall, error) {
-	source := yomo.NewSource(
-		"fc-source",
-		zipperAddr,
-		yomo.WithSourceReConnect(),
-		yomo.WithCredential(credential),
-	)
+// sourceWriteToChan makes source write data to the channel.
+// The TagFunctionCall objects are continuously be received from the channel and be sent by the source.
+func sourceWriteToChan(source yomo.Source, logger *slog.Logger) (chan<- TagFunctionCall, error) {
 	err := source.Connect()
 	if err != nil {
-		return nil, nil, err
+		return nil, err
 	}
 
 	ch := make(chan TagFunctionCall)
-	ToSource(source, logger, ch)
+	go func() {
+		for c := range ch {
+			buf, _ := c.FunctionCall.Bytes()
+			if err := source.Write(c.Tag, buf); err != nil {
+				logger.Error("send data to zipper", "err", err.Error())
+			}
+		}
+	}()
 
-	return source, ch, nil
+	return ch, nil
 }
 
-// ReduceToChan creates a yomo stream function to reduce the messages and returns both.
-func ReduceToChan(zipperAddr, credential string, logger *slog.Logger) (yomo.StreamFunction, <-chan ReduceMessage, error) {
-	reducer := yomo.NewStreamFunction(
-		"ai-reducer",
-		zipperAddr,
-		yomo.WithSfnReConnect(),
-		yomo.WithSfnCredential(credential),
-		yomo.DisableOtelTrace(),
-	)
+// reduceToChan configures the reducer and returns a channel to accept messages from the reducer.
+func reduceToChan(reducer yomo.StreamFunction, logger *slog.Logger) (<-chan ReduceMessage, error) {
 	reducer.SetObserveDataTags(ai.ReducerTag)
 
 	messages := make(chan ReduceMessage)
-	ToReducer(reducer, logger, messages)
+
+	reducer.SetObserveDataTags(ai.ReducerTag)
+	reducer.SetHandler(reduceFunc(messages, logger))
 
 	if err := reducer.Connect(); err != nil {
-		return reducer, nil, err
+		return nil, err
 	}
 
-	return reducer, messages, nil
+	return messages, nil
+}
+
+func reduceFunc(messages chan ReduceMessage, logger *slog.Logger) core.AsyncHandler {
+	return func(ctx serverless.Context) {
+		invoke, err := ctx.LLMFunctionCall()
+		if err != nil {
+			messages <- ReduceMessage{ReqID: ""}
+			logger.Error("parse function calling invoke", "err", err.Error())
+			return
+		}
+		logger.Debug("sfn-reducer", "req_id", invoke.ReqID, "tool_call_id", invoke.ToolCallID, "result", string(invoke.Result))
+
+		message := openai.ChatCompletionMessage{
+			Role:       openai.ChatMessageRoleTool,
+			Content:    invoke.Result,
+			ToolCallID: invoke.ToolCallID,
+		}
+
+		messages <- ReduceMessage{ReqID: invoke.ReqID, Message: message}
+	}
 }
 
 // SetSystemPrompt sets the system prompt
-func (c *caller) SetSystemPrompt(prompt string) {
+func (c *Caller) SetSystemPrompt(prompt string) {
 	c.systemPrompt.Store(prompt)
 }
 
 // SetSystemPrompt gets the system prompt
-func (c *caller) GetSystemPrompt() string {
+func (c *Caller) GetSystemPrompt() string {
 	if v := c.systemPrompt.Load(); v != nil {
 		return v.(string)
 	}
@@ -196,25 +122,12 @@ func (c *caller) GetSystemPrompt() string {
 }
 
 // Metadata returns the metadata of caller.
-func (c *caller) Metadata() metadata.M {
+func (c *Caller) Metadata() metadata.M {
 	return c.md
 }
 
-// SetTracer sets the otel tracer.
-func (c *caller) SetTracer(tracer trace.Tracer) {
-	c.tracer.Store(tracer)
-}
-
-// GetTracer gets the otel tracer.
-func (c *caller) GetTracer() trace.Tracer {
-	if v := c.tracer.Load(); v != nil {
-		return v.(trace.Tracer)
-	}
-	return noop.NewTracerProvider().Tracer("yomo-llm-bridge")
-}
-
 // Close closes the caller.
-func (c *caller) Close() error {
+func (c *Caller) Close() error {
 	_ = c.CallSyncer.Close()
 
 	var err error
@@ -228,501 +141,3 @@ func (c *caller) Close() error {
 
 	return err
 }
-
-// GetInvoke returns the invoke response
-func GetInvoke(
-	ctx context.Context,
-	userInstruction string, baseSystemMessage string, transID string,
-	includeCallStack bool,
-	caller Caller, provider provider.LLMProvider,
-) (*ai.InvokeResponse, error) {
-	md := caller.Metadata().Clone()
-	// read tools attached to the metadata
-	tcs, err := register.ListToolCalls(md)
-	if err != nil {
-		return &ai.InvokeResponse{}, err
-	}
-	// prepare tools
-	tools := prepareToolCalls(tcs)
-
-	chainMessage := ai.ChainMessage{}
-	messages := prepareMessages(baseSystemMessage, userInstruction, chainMessage, tools, true)
-	req := openai.ChatCompletionRequest{
-		Messages: messages,
-	}
-	// with tools
-	if len(tools) > 0 {
-		req.Tools = tools
-	}
-	var (
-		promptUsage     int
-		completionUsage int
-	)
-	_, span := caller.GetTracer().Start(ctx, "first_call")
-	chatCompletionResponse, err := provider.GetChatCompletions(ctx, req, md)
-	if err != nil {
-		return nil, err
-	}
-	span.End()
-	promptUsage = chatCompletionResponse.Usage.PromptTokens
-	completionUsage = chatCompletionResponse.Usage.CompletionTokens
-
-	// convert ChatCompletionResponse to InvokeResponse
-	res, err := ai.ConvertToInvokeResponse(&chatCompletionResponse, tcs)
-	if err != nil {
-		return nil, err
-	}
-	// if no tool_calls fired, just return the llm text result
-	if res.FinishReason != string(openai.FinishReasonToolCalls) {
-		return res, nil
-	}
-
-	// run llm function calls
-	ylog.Debug(">>>> start 1st call response",
-		"res_toolcalls", fmt.Sprintf("%+v", res.ToolCalls),
-		"res_assistant_msgs", fmt.Sprintf("%+v", res.AssistantMessage))
-
-	ylog.Debug(">> run function calls", "transID", transID, "res.ToolCalls", fmt.Sprintf("%+v", res.ToolCalls))
-
-	_, span = caller.GetTracer().Start(ctx, "run_sfn")
-	reqID := id.New(16)
-	llmCalls, err := caller.Call(ctx, transID, reqID, res.ToolCalls)
-	if err != nil {
-		return nil, err
-	}
-	span.End()
-
-	ylog.Debug(">>>> start 2nd call with", "calls", fmt.Sprintf("%+v", llmCalls), "preceeding_assistant_message", fmt.Sprintf("%+v", res.AssistantMessage))
-
-	chainMessage.PreceedingAssistantMessage = res.AssistantMessage
-	chainMessage.ToolMessages = transToolMessage(llmCalls)
-	// do not attach toolMessage to prompt in 2nd call
-	messages2 := prepareMessages(baseSystemMessage, userInstruction, chainMessage, tools, false)
-	req2 := openai.ChatCompletionRequest{
-		Messages: messages2,
-	}
-	_, span = caller.GetTracer().Start(ctx, "second_call")
-	chatCompletionResponse2, err := provider.GetChatCompletions(ctx, req2, md)
-	if err != nil {
-		return nil, err
-	}
-	span.End()
-
-	chatCompletionResponse2.Usage.PromptTokens += promptUsage
-	chatCompletionResponse2.Usage.CompletionTokens += completionUsage
-
-	res2, err := ai.ConvertToInvokeResponse(&chatCompletionResponse2, tcs)
-	if err != nil {
-		return nil, err
-	}
-
-	// INFO: call stack infomation
-	if includeCallStack {
-		res2.ToolCalls = res.ToolCalls
-		res2.ToolMessages = transToolMessage(llmCalls)
-	}
-	ylog.Debug("<<<< complete 2nd call", "res2", fmt.Sprintf("%+v", res2))
-
-	return res2, err
-}
-
-// GetChatCompletions accepts openai.ChatCompletionRequest and responds to http.ResponseWriter.
-func GetChatCompletions(
-	ctx context.Context,
-	req openai.ChatCompletionRequest, transID string,
-	provider provider.LLMProvider, caller Caller,
-	w http.ResponseWriter,
-) error {
-	reqCtx, reqSpan := caller.GetTracer().Start(ctx, "completions_request")
-	md := caller.Metadata().Clone()
-
-	// 1. find all hosting tool sfn
-	tagTools, err := register.ListToolCalls(md)
-	if err != nil {
-		return err
-	}
-	// 2. add those tools to request
-	req = addToolsToRequest(req, tagTools)
-
-	// 3. over write system prompt to request
-	req = overWriteSystemPrompt(req, caller.GetSystemPrompt())
-
-	var (
-		promptUsage      = 0
-		completionUsage  = 0
-		totalUsage       = 0
-		reqMessages      = req.Messages
-		toolCallsMap     = make(map[int]openai.ToolCall)
-		toolCalls        = []openai.ToolCall{}
-		assistantMessage = openai.ChatCompletionMessage{}
-	)
-	// 4. request first chat for getting tools
-	if req.Stream {
-		_, firstCallSpan := caller.GetTracer().Start(reqCtx, "first_call_request")
-		var (
-			flusher        = eventFlusher(w)
-			isFunctionCall = false
-		)
-		resStream, err := provider.GetChatCompletionsStream(reqCtx, req, md)
-		if err != nil {
-			return err
-		}
-
-		var (
-			i             int // number of chunks
-			j             int // number of tool call chunks
-			firstRespSpan trace.Span
-			respSpan      trace.Span
-		)
-		for {
-			if i == 0 {
-				_, firstRespSpan = caller.GetTracer().Start(reqCtx, "first_call_response_in_stream")
-			}
-			streamRes, err := resStream.Recv()
-			if err == io.EOF {
-				break
-			}
-			if err != nil {
-				return err
-			}
-			if len(streamRes.Choices) == 0 {
-				continue
-			}
-			if streamRes.Usage != nil {
-				promptUsage = streamRes.Usage.PromptTokens
-				completionUsage = streamRes.Usage.CompletionTokens
-				totalUsage = streamRes.Usage.TotalTokens
-			}
-			if tc := streamRes.Choices[0].Delta.ToolCalls; len(tc) > 0 {
-				isFunctionCall = true
-				if j == 0 {
-					firstCallSpan.End()
-				}
-				for _, t := range tc {
-					// this index should be toolCalls slice's index, the index field only appares in stream response
-					index := *t.Index
-					item, ok := toolCallsMap[index]
-					if !ok {
-						toolCallsMap[index] = openai.ToolCall{
-							Index:    t.Index,
-							ID:       t.ID,
-							Type:     t.Type,
-							Function: openai.FunctionCall{},
-						}
-						item = toolCallsMap[index]
-					}
-					if t.Function.Arguments != "" {
-						item.Function.Arguments += t.Function.Arguments
-					}
-					if t.Function.Name != "" {
-						item.Function.Name = t.Function.Name
-					}
-					toolCallsMap[index] = item
-				}
-				j++
-			} else if streamRes.Choices[0].FinishReason != openai.FinishReasonToolCalls {
-				_ = writeStreamEvent(w, flusher, streamRes)
-			}
-			if i == 0 && j == 0 && !isFunctionCall {
-				reqSpan.End()
-				recordTTFT(ctx, caller.GetTracer())
-				_, respSpan = caller.GetTracer().Start(ctx, "response_in_stream(TBT)")
-			}
-			i++
-		}
-		if !isFunctionCall {
-			respSpan.End()
-			return writeStreamDone(w, flusher)
-		}
-		firstRespSpan.End()
-		toolCalls = mapToSliceTools(toolCallsMap)
-
-		assistantMessage = openai.ChatCompletionMessage{
-			ToolCalls: toolCalls,
-			Role:      openai.ChatMessageRoleAssistant,
-		}
-		reqSpan.End()
-		flusher.Flush()
-	} else {
-		_, firstCallSpan := caller.GetTracer().Start(reqCtx, "first_call")
-		resp, err := provider.GetChatCompletions(ctx, req, md)
-		if err != nil {
-			return err
-		}
-		reqSpan.End()
-
-		promptUsage = resp.Usage.PromptTokens
-		completionUsage = resp.Usage.CompletionTokens
-		totalUsage = resp.Usage.CompletionTokens
-
-		ylog.Debug(" #1 first call", "response", fmt.Sprintf("%+v", resp))
-		// it is a function call
-		if resp.Choices[0].FinishReason == openai.FinishReasonToolCalls {
-			toolCalls = append(toolCalls, resp.Choices[0].Message.ToolCalls...)
-			assistantMessage = resp.Choices[0].Message
-			firstCallSpan.End()
-		} else {
-			w.Header().Set("Content-Type", "application/json")
-			json.NewEncoder(w).Encode(resp)
-			return nil
-		}
-	}
-
-	resCtx, resSpan := caller.GetTracer().Start(ctx, "completions_response")
-	defer resSpan.End()
-
-	_, sfnSpan := caller.GetTracer().Start(resCtx, "run_sfn")
-
-	// 5. find sfns that hit the function call
-	fnCalls := findTagTools(tagTools, toolCalls)
-
-	// 6. run llm function calls
-	reqID := id.New(16)
-	llmCalls, err := caller.Call(ctx, transID, reqID, fnCalls)
-	if err != nil {
-		return err
-	}
-	sfnSpan.End()
-
-	// 7. do the second call (the second call messages are from user input, first call resopnse and sfn calls result)
-	req.Messages = append(reqMessages, assistantMessage)
-	req.Messages = append(req.Messages, llmCalls...)
-	req.Tools = nil // reset tools field
-
-	ylog.Debug(" #2 second call", "request", fmt.Sprintf("%+v", req))
-
-	if req.Stream {
-		_, secondCallSpan := caller.GetTracer().Start(resCtx, "second_call_request")
-		flusher := w.(http.Flusher)
-		resStream, err := provider.GetChatCompletionsStream(resCtx, req, md)
-		if err != nil {
-			return err
-		}
-		secondCallSpan.End()
-
-		var (
-			i              int
-			secondRespSpan trace.Span
-		)
-		for {
-			if i == 0 {
-				recordTTFT(resCtx, caller.GetTracer())
-				_, secondRespSpan = caller.GetTracer().Start(resCtx, "second_call_response_in_stream(TBT)")
-			}
-			i++
-			streamRes, err := resStream.Recv()
-			if err == io.EOF {
-				secondRespSpan.End()
-				return writeStreamDone(w, flusher)
-			}
-			if err != nil {
-				return err
-			}
-			if streamRes.Usage != nil {
-				streamRes.Usage.PromptTokens += promptUsage
-				streamRes.Usage.CompletionTokens += completionUsage
-				streamRes.Usage.TotalTokens += totalUsage
-			}
-			_ = writeStreamEvent(w, flusher, streamRes)
-		}
-	} else {
-		_, secondCallSpan := caller.GetTracer().Start(resCtx, "second_call")
-
-		resp, err := provider.GetChatCompletions(resCtx, req, md)
-		if err != nil {
-			return err
-		}
-
-		resp.Usage.PromptTokens += promptUsage
-		resp.Usage.CompletionTokens += completionUsage
-		resp.Usage.TotalTokens += totalUsage
-
-		secondCallSpan.End()
-		w.Header().Set("Content-Type", "application/json")
-		return json.NewEncoder(w).Encode(resp)
-	}
-}
-
-// ExchangeMetadataFunc is used to exchange metadata
-type ExchangeMetadataFunc func(credential string) (metadata.M, error)
-
-// DefaultExchangeMetadataFunc is the default ExchangeMetadataFunc, It returns an empty metadata.
-func DefaultExchangeMetadataFunc(credential string) (metadata.M, error) {
-	return metadata.M{}, nil
-}
-
-func addToolsToRequest(req openai.ChatCompletionRequest, tagTools map[uint32]openai.Tool) openai.ChatCompletionRequest {
-	toolCalls := prepareToolCalls(tagTools)
-
-	if len(toolCalls) > 0 {
-		req.Tools = toolCalls
-	}
-
-	ylog.Debug(" #1 first call", "request", fmt.Sprintf("%+v", req))
-
-	return req
-}
-
-func overWriteSystemPrompt(req openai.ChatCompletionRequest, sysPrompt string) openai.ChatCompletionRequest {
-	// do nothing if system prompt is empty
-	if sysPrompt == "" {
-		return req
-	}
-	// over write system prompt
-	isOverWrite := false
-	for i, msg := range req.Messages {
-		if msg.Role != "system" {
-			continue
-		}
-		req.Messages[i] = openai.ChatCompletionMessage{
-			Role:    msg.Role,
-			Content: sysPrompt,
-		}
-		isOverWrite = true
-	}
-	// append system prompt
-	if !isOverWrite {
-		req.Messages = append(req.Messages, openai.ChatCompletionMessage{
-			Role:    "system",
-			Content: sysPrompt,
-		})
-	}
-
-	ylog.Debug(" #1 first call after overwrite", "request", fmt.Sprintf("%+v", req))
-
-	return req
-}
-
-func findTagTools(tagTools map[uint32]openai.Tool, toolCalls []openai.ToolCall) map[uint32][]*openai.ToolCall {
-	fnCalls := make(map[uint32][]*openai.ToolCall)
-	// functions may be more than one
-	for _, call := range toolCalls {
-		for tag, tc := range tagTools {
-			if tc.Function.Name == call.Function.Name && tc.Type == call.Type {
-				currentCall := call
-				fnCalls[tag] = append(fnCalls[tag], &currentCall)
-			}
-		}
-	}
-	return fnCalls
-}
-
-func writeStreamEvent(w http.ResponseWriter, flusher http.Flusher, streamRes openai.ChatCompletionStreamResponse) error {
-	if _, err := io.WriteString(w, "data: "); err != nil {
-		return err
-	}
-	if err := json.NewEncoder(w).Encode(streamRes); err != nil {
-		return err
-	}
-	if _, err := io.WriteString(w, "\n"); err != nil {
-		return err
-	}
-	flusher.Flush()
-
-	return nil
-}
-
-func writeStreamDone(w http.ResponseWriter, flusher http.Flusher) error {
-	_, err := io.WriteString(w, "data: [DONE]")
-	flusher.Flush()
-
-	return err
-}
-
-func prepareMessages(baseSystemMessage string, userInstruction string, chainMessage ai.ChainMessage, tools []openai.Tool, withTool bool) []openai.ChatCompletionMessage {
-	systemInstructions := []string{"## Instructions\n"}
-
-	// only append if there are tool calls
-	if withTool {
-		for _, t := range tools {
-			systemInstructions = append(systemInstructions, "- ")
-			systemInstructions = append(systemInstructions, t.Function.Description)
-			systemInstructions = append(systemInstructions, "\n")
-		}
-		systemInstructions = append(systemInstructions, "\n")
-	}
-
-	SystemPrompt := fmt.Sprintf("%s\n\n%s", baseSystemMessage, strings.Join(systemInstructions, ""))
-
-	messages := []openai.ChatCompletionMessage{}
-
-	// 1. system message
-	messages = append(messages, openai.ChatCompletionMessage{Role: "system", Content: SystemPrompt})
-
-	// 2. previous tool calls
-	// Ref: Tool Message Object in Messsages
-	// https://platform.openai.com/docs/guides/function-calling
-	// https://platform.openai.com/docs/api-reference/chat/create#chat-create-messages
-
-	if chainMessage.PreceedingAssistantMessage != nil {
-		// 2.1 assistant message
-		// try convert type of chainMessage.PreceedingAssistantMessage to type ChatCompletionMessage
-		assistantMessage, ok := chainMessage.PreceedingAssistantMessage.(openai.ChatCompletionMessage)
-		if ok {
-			ylog.Debug("======== add assistantMessage", "am", fmt.Sprintf("%+v", assistantMessage))
-			messages = append(messages, assistantMessage)
-		}
-
-		// 2.2 tool message
-		for _, tool := range chainMessage.ToolMessages {
-			tm := openai.ChatCompletionMessage{
-				Role:       "tool",
-				Content:    tool.Content,
-				ToolCallID: tool.ToolCallID,
-			}
-			ylog.Debug("======== add toolMessage", "tm", fmt.Sprintf("%+v", tm))
-			messages = append(messages, tm)
-		}
-	}
-
-	// 3. user instruction
-	messages = append(messages, openai.ChatCompletionMessage{Role: "user", Content: userInstruction})
-
-	return messages
-}
-
-func mapToSliceTools(m map[int]openai.ToolCall) []openai.ToolCall {
-	arr := make([]openai.ToolCall, len(m))
-	for k, v := range m {
-		arr[k] = v
-	}
-	return arr
-}
-
-func eventFlusher(w http.ResponseWriter) http.Flusher {
-	h := w.Header()
-	h.Set("Content-Type", "text/event-stream")
-	h.Set("Cache-Control", "no-cache, must-revalidate")
-	h.Set("x-content-type-options", "nosniff")
-	flusher := w.(http.Flusher)
-	return flusher
-}
-
-func prepareToolCalls(tcs map[uint32]openai.Tool) []openai.Tool {
-	// prepare tools
-	toolCalls := make([]openai.Tool, len(tcs))
-	idx := 0
-	for _, tc := range tcs {
-		toolCalls[idx] = tc
-		idx++
-	}
-	return toolCalls
-}
-
-func transToolMessage(msgs []openai.ChatCompletionMessage) []ai.ToolMessage {
-	toolMessages := make([]ai.ToolMessage, len(msgs))
-	for i, msg := range msgs {
-		toolMessages[i] = ai.ToolMessage{
-			Role:       msg.Role,
-			Content:    msg.Content,
-			ToolCallID: msg.ToolCallID,
-		}
-	}
-	return toolMessages
-}
-
-func recordTTFT(ctx context.Context, tracer trace.Tracer) {
-	_, span := tracer.Start(ctx, "TTFT")
-	span.End()
-	time.Sleep(time.Millisecond)
-}
diff --git a/pkg/bridge/ai/caller_test.go b/pkg/bridge/ai/caller_test.go
index e812ae49a..655311a97 100644
--- a/pkg/bridge/ai/caller_test.go
+++ b/pkg/bridge/ai/caller_test.go
@@ -1,499 +1,44 @@
 package ai
 
 import (
-	"context"
-	"errors"
-	"net/http/httptest"
 	"testing"
+	"time"
 
-	"github.com/hashicorp/golang-lru/v2/expirable"
-	openai "github.com/sashabaranov/go-openai"
 	"github.com/stretchr/testify/assert"
-	"github.com/yomorun/yomo/ai"
+	"github.com/yomorun/yomo"
 	"github.com/yomorun/yomo/core/metadata"
-	"github.com/yomorun/yomo/pkg/bridge/ai/provider"
-	"github.com/yomorun/yomo/pkg/bridge/ai/register"
 )
 
-func TestCallerInvoke(t *testing.T) {
-	type args struct {
-		providerMockData  []provider.MockData
-		mockCallReqResp   map[uint32][]mockFunctionCall
-		systemPrompt      string
-		userInstruction   string
-		baseSystemMessage string
-	}
-	tests := []struct {
-		name        string
-		args        args
-		wantRequest []openai.ChatCompletionRequest
-		wantUsage   ai.TokenUsage
-	}{
-		{
-			name: "invoke with tool call",
-			args: args{
-				providerMockData: []provider.MockData{
-					provider.MockChatCompletionResponse(toolCallResp, stopResp),
-				},
-				mockCallReqResp: map[uint32][]mockFunctionCall{
-					// toolID should equal to toolCallResp's toolID
-					0x33: {{toolID: "call_abc123", functionName: "get_current_weather", respContent: "temperature: 31°C"}},
-				},
-				systemPrompt:      "this is a system prompt",
-				userInstruction:   "hi",
-				baseSystemMessage: "this is a base system message",
-			},
-			wantRequest: []openai.ChatCompletionRequest{
-				{
-					Messages: []openai.ChatCompletionMessage{
-						{Role: "system", Content: "this is a base system message\n\n## Instructions\n- \n\n"},
-						{Role: "user", Content: "hi"},
-					},
-					Tools: []openai.Tool{{Type: openai.ToolTypeFunction, Function: &openai.FunctionDefinition{Name: "get_current_weather"}}},
-				},
-				{
-					Messages: []openai.ChatCompletionMessage{
-						{Role: "system", Content: "this is a base system message\n\n## Instructions\n"},
-						{Role: "assistant", ToolCalls: []openai.ToolCall{{ID: "call_abc123", Type: openai.ToolTypeFunction, Function: openai.FunctionCall{Name: "get_current_weather", Arguments: "{\n\"location\": \"Boston, MA\"\n}"}}}},
-						{Role: "tool", Content: "temperature: 31°C", ToolCallID: "call_abc123"},
-						{Role: "user", Content: "hi"},
-					},
-				},
-			},
-			wantUsage: ai.TokenUsage{PromptTokens: 95, CompletionTokens: 43},
-		},
-		{
-			name: "invoke without tool call",
-			args: args{
-				providerMockData: []provider.MockData{
-					provider.MockChatCompletionResponse(stopResp),
-				},
-				mockCallReqResp:   map[uint32][]mockFunctionCall{},
-				systemPrompt:      "this is a system prompt",
-				userInstruction:   "hi",
-				baseSystemMessage: "this is a base system message",
-			},
-			wantRequest: []openai.ChatCompletionRequest{
-				{
-					Messages: []openai.ChatCompletionMessage{
-						{Role: "system", Content: "this is a base system message\n\n## Instructions\n\n"},
-						{Role: "user", Content: "hi"},
-					},
-				},
-			},
-			wantUsage: ai.TokenUsage{PromptTokens: 13, CompletionTokens: 26},
-		},
-	}
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			register.SetRegister(register.NewDefault())
+func TestCaller(t *testing.T) {
+	cc := &testComponentCreator{flow: newMockDataFlow(newHandler(time.Millisecond).handle)}
 
-			pd, err := provider.NewMock("mock provider", tt.args.providerMockData...)
-			if err != nil {
-				t.Fatal(err)
-			}
+	md, err := cc.ExchangeMetadata("")
+	assert.NoError(t, err)
 
-			cp := newMockCallerProvider()
+	caller, err := NewCaller(cc.CreateSource(""), cc.CreateReducer(""), md, time.Minute)
+	assert.NoError(t, err)
 
-			cp.provideFunc = mockCallerProvideFunc(tt.args.mockCallReqResp)
+	defer caller.Close()
 
-			caller, err := cp.Provide("")
-			assert.NoError(t, err)
+	assert.Equal(t, md, caller.Metadata())
 
-			caller.SetSystemPrompt(tt.args.systemPrompt)
-
-			resp, err := GetInvoke(context.TODO(), tt.args.userInstruction, tt.args.baseSystemMessage, "transID", true, caller, pd)
-			assert.NoError(t, err)
-
-			assert.Equal(t, tt.wantUsage, resp.TokenUsage)
-			assert.Equal(t, tt.wantRequest, pd.RequestRecords())
-		})
-	}
+	sysPrompt := "hello system prompt"
+	caller.SetSystemPrompt(sysPrompt)
+	assert.Equal(t, sysPrompt, caller.GetSystemPrompt())
 }
 
-func TestCallerChatCompletion(t *testing.T) {
-	type args struct {
-		providerMockData []provider.MockData
-		mockCallReqResp  map[uint32][]mockFunctionCall
-		systemPrompt     string
-		request          openai.ChatCompletionRequest
-	}
-	tests := []struct {
-		name        string
-		args        args
-		wantRequest []openai.ChatCompletionRequest
-	}{
-		{
-			name: "chat with tool call",
-			args: args{
-				providerMockData: []provider.MockData{
-					provider.MockChatCompletionResponse(toolCallResp, stopResp),
-				},
-				mockCallReqResp: map[uint32][]mockFunctionCall{
-					// toolID should equal to toolCallResp's toolID
-					0x33: {{toolID: "call_abc123", functionName: "get_current_weather", respContent: "temperature: 31°C"}},
-				},
-				systemPrompt: "this is a system prompt",
-				request: openai.ChatCompletionRequest{
-					Messages: []openai.ChatCompletionMessage{{Role: "user", Content: "How is the weather today in Boston, MA?"}},
-				},
-			},
-			wantRequest: []openai.ChatCompletionRequest{
-				{
-					Messages: []openai.ChatCompletionMessage{
-						{Role: "user", Content: "How is the weather today in Boston, MA?"},
-						{Role: "system", Content: "this is a system prompt"},
-					},
-					Tools: []openai.Tool{{Type: openai.ToolTypeFunction, Function: &openai.FunctionDefinition{Name: "get_current_weather"}}},
-				},
-				{
-					Messages: []openai.ChatCompletionMessage{
-						{Role: "user", Content: "How is the weather today in Boston, MA?"},
-						{Role: "system", Content: "this is a system prompt"},
-						{Role: "assistant", ToolCalls: []openai.ToolCall{{ID: "call_abc123", Type: openai.ToolTypeFunction, Function: openai.FunctionCall{Name: "get_current_weather", Arguments: "{\n\"location\": \"Boston, MA\"\n}"}}}},
-						{Role: "tool", Content: "temperature: 31°C", ToolCallID: "call_abc123"},
-					},
-				},
-			},
-		},
-		{
-			name: "chat without tool call",
-			args: args{
-				providerMockData: []provider.MockData{
-					provider.MockChatCompletionResponse(stopResp),
-				},
-				mockCallReqResp: map[uint32][]mockFunctionCall{
-					// toolID should equal to toolCallResp's toolID
-					0x33: {{toolID: "call_abc123", functionName: "get_current_weather", respContent: "temperature: 31°C"}},
-				},
-				systemPrompt: "You are an assistant.",
-				request: openai.ChatCompletionRequest{
-					Messages: []openai.ChatCompletionMessage{{Role: "user", Content: "How are you"}},
-				},
-			},
-			wantRequest: []openai.ChatCompletionRequest{
-				{
-					Messages: []openai.ChatCompletionMessage{
-						{Role: "user", Content: "How are you"},
-						{Role: "system", Content: "You are an assistant."},
-					},
-					Tools: []openai.Tool{{Type: openai.ToolTypeFunction, Function: &openai.FunctionDefinition{Name: "get_current_weather"}}},
-				},
-			},
-		},
-		{
-			name: "chat with tool call in stream",
-			args: args{
-				providerMockData: []provider.MockData{
-					provider.MockChatCompletionStreamResponse(toolCallStreamResp, stopStreamResp),
-				},
-				mockCallReqResp: map[uint32][]mockFunctionCall{
-					// toolID should equal to toolCallResp's toolID
-					0x33: {{toolID: "call_9ctHOJqO3bYrpm2A6S7nHd5k", functionName: "get_current_weather", respContent: "temperature: 31°C"}},
-				},
-				systemPrompt: "You are a weather assistant",
-				request: openai.ChatCompletionRequest{
-					Stream:   true,
-					Messages: []openai.ChatCompletionMessage{{Role: "user", Content: "How is the weather today in Boston, MA?"}},
-				},
-			},
-			wantRequest: []openai.ChatCompletionRequest{
-				{
-					Stream: true,
-					Messages: []openai.ChatCompletionMessage{
-						{Role: "user", Content: "How is the weather today in Boston, MA?"},
-						{Role: "system", Content: "You are a weather assistant"},
-					},
-					Tools: []openai.Tool{{Type: openai.ToolTypeFunction, Function: &openai.FunctionDefinition{Name: "get_current_weather"}}},
-				},
-				{
-					Stream: true,
-					Messages: []openai.ChatCompletionMessage{
-						{Role: "user", Content: "How is the weather today in Boston, MA?"},
-						{Role: "system", Content: "You are a weather assistant"},
-						{Role: "assistant", ToolCalls: []openai.ToolCall{{Index: toInt(0), ID: "call_9ctHOJqO3bYrpm2A6S7nHd5k", Type: openai.ToolTypeFunction, Function: openai.FunctionCall{Name: "get_current_weather", Arguments: "{\"location\":\"Boston, MA\"}"}}}},
-						{Role: "tool", Content: "temperature: 31°C", ToolCallID: "call_9ctHOJqO3bYrpm2A6S7nHd5k"},
-					},
-				},
-			},
-		},
-		{
-			name: "chat without tool call in stream",
-			args: args{
-				providerMockData: []provider.MockData{
-					provider.MockChatCompletionStreamResponse(stopStreamResp),
-				},
-				mockCallReqResp: map[uint32][]mockFunctionCall{
-					// toolID should equal to toolCallResp's toolID
-					0x33: {{toolID: "call_9ctHOJqO3bYrpm2A6S7nHd5k", functionName: "get_current_weather", respContent: "temperature: 31°C"}},
-				},
-				systemPrompt: "You are a weather assistant",
-				request: openai.ChatCompletionRequest{
-					Stream:   true,
-					Messages: []openai.ChatCompletionMessage{{Role: "user", Content: "How is the weather today in Boston, MA?"}},
-				},
-			},
-			wantRequest: []openai.ChatCompletionRequest{
-				{
-					Stream: true,
-					Messages: []openai.ChatCompletionMessage{
-						{Role: "user", Content: "How is the weather today in Boston, MA?"},
-						{Role: "system", Content: "You are a weather assistant"},
-					},
-					Tools: []openai.Tool{{Type: openai.ToolTypeFunction, Function: &openai.FunctionDefinition{Name: "get_current_weather"}}},
-				},
-			},
-		},
-	}
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			register.SetRegister(register.NewDefault())
-
-			pd, err := provider.NewMock("mock provider", tt.args.providerMockData...)
-			if err != nil {
-				t.Fatal(err)
-			}
-
-			cp := newMockCallerProvider()
-
-			cp.provideFunc = mockCallerProvideFunc(tt.args.mockCallReqResp)
-
-			caller, err := cp.Provide("")
-			assert.NoError(t, err)
-
-			caller.SetSystemPrompt(tt.args.systemPrompt)
-
-			w := httptest.NewRecorder()
-			err = GetChatCompletions(context.TODO(), tt.args.request, "transID", pd, caller, w)
-			assert.NoError(t, err)
-
-			assert.Equal(t, tt.wantRequest, pd.RequestRecords())
-		})
-	}
+type testComponentCreator struct {
+	flow *mockDataFlow
 }
 
-func newMockCallerProvider() *callerProvider {
-	cp := &callerProvider{
-		zipperAddr: DefaultZipperAddr,
-		exFn:       DefaultExchangeMetadataFunc,
-		callers:    expirable.NewLRU(CallerProviderCacheSize, func(_ string, caller Caller) { caller.Close() }, CallerProviderCacheTTL),
-	}
-	return cp
+func (c *testComponentCreator) CreateSource(_ string) yomo.Source {
+	return c.flow
 }
 
-// mockCallerProvideFunc returns a mock caller provider, which is used for mockCallerProvider
-// the request-response of caller be provided has been defined in advance, the request and response are defined in the `calls`.
-func mockCallerProvideFunc(calls map[uint32][]mockFunctionCall) provideFunc {
-	// register function to register
-	for tag, call := range calls {
-		for _, c := range call {
-			register.RegisterFunction(tag, &openai.FunctionDefinition{Name: c.functionName}, uint64(tag), nil)
-		}
-	}
-
-	return func(credential, _ string, _ ExchangeMetadataFunc) (Caller, error) {
-		caller := &caller{
-			credential: credential,
-			md:         metadata.M{"hello": "llm bridge"},
-		}
-
-		caller.SetSystemPrompt("")
-		caller.CallSyncer = &mockCallSyncer{calls: calls}
-
-		return caller, nil
-	}
-}
-
-type mockFunctionCall struct {
-	toolID       string
-	functionName string
-	respContent  string
-}
-
-type mockCallSyncer struct {
-	calls map[uint32][]mockFunctionCall
+func (c *testComponentCreator) CreateReducer(_ string) yomo.StreamFunction {
+	return c.flow
 }
 
-// Call implements CallSyncer, it returns the mock response defined in advance.
-func (m *mockCallSyncer) Call(ctx context.Context, transID string, reqID string, toolCalls map[uint32][]*openai.ToolCall) ([]openai.ChatCompletionMessage, error) {
-	res := []openai.ChatCompletionMessage{}
-	for tag, calls := range toolCalls {
-		mcs, ok := m.calls[tag]
-		if !ok {
-			return nil, errors.New("call not found")
-		}
-		mcm := make(map[string]mockFunctionCall, len(mcs))
-		for _, mc := range mcs {
-			mcm[mc.toolID] = mc
-		}
-		for _, call := range calls {
-			mc, ok := mcm[call.ID]
-			if !ok {
-				return nil, errors.New("call not found")
-			}
-			res = append(res, openai.ChatCompletionMessage{
-				ToolCallID: mc.toolID,
-				Role:       openai.ChatMessageRoleTool,
-				Content:    mc.respContent,
-			})
-		}
-	}
-	return res, nil
+func (c *testComponentCreator) ExchangeMetadata(_ string) (metadata.M, error) {
+	return metadata.M{"hello": "llm bridge"}, nil
 }
-
-func (m *mockCallSyncer) Close() error { return nil }
-
-func toInt(val int) *int { return &val }
-
-var stopStreamResp = `data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" I'm"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" just"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" a"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" computer"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" program"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" so"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" I"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" don't"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" have"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" feelings"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" but"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" I'm"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" here"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" and"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" ready"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" to"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" help"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" you"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" with"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" whatever"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" you"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" need"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" How"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" can"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" I"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" assist"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" you"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" today"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null}
-
-data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[],"usage":{"prompt_tokens":13,"completion_tokens":34,"total_tokens":47}}
-
-data: [DONE]`
-
-var stopResp = `{
-  "id": "chatcmpl-9blYknv9rHvr2dvCQKMeW21hlBpCX",
-  "object": "chat.completion",
-  "created": 1718787982,
-  "model": "gpt-4o-2024-05-13",
-  "choices": [
-    {
-      "index": 0,
-      "message": {
-        "role": "assistant",
-        "content": "Hello! I'm just a computer program, so I don't have feelings, but thanks for asking. How can I assist you today?"
-      },
-      "logprobs": null,
-      "finish_reason": "stop"
-    }
-  ],
-  "usage": {
-    "prompt_tokens": 13,
-    "completion_tokens": 26,
-    "total_tokens": 39
-  },
-  "system_fingerprint": "fp_f4e629d0a5"
-}`
-
-var toolCallStreamResp = `data: {"id":"chatcmpl-9blTCqGy0TGLdK4sOYlGrNxbGGknW","object":"chat.completion.chunk","created":1718787638,"model":"gpt-4-turbo-2024-04-09","system_fingerprint":"fp_9d7f5c6195","choices":[{"index":0,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"call_9ctHOJqO3bYrpm2A6S7nHd5k","type":"function","function":{"name":"get_current_weather","arguments":""}}]},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blTCqGy0TGLdK4sOYlGrNxbGGknW","object":"chat.completion.chunk","created":1718787638,"model":"gpt-4-turbo-2024-04-09","system_fingerprint":"fp_9d7f5c6195","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blTCqGy0TGLdK4sOYlGrNxbGGknW","object":"chat.completion.chunk","created":1718787638,"model":"gpt-4-turbo-2024-04-09","system_fingerprint":"fp_9d7f5c6195","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"location"}}]},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blTCqGy0TGLdK4sOYlGrNxbGGknW","object":"chat.completion.chunk","created":1718787638,"model":"gpt-4-turbo-2024-04-09","system_fingerprint":"fp_9d7f5c6195","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blTCqGy0TGLdK4sOYlGrNxbGGknW","object":"chat.completion.chunk","created":1718787638,"model":"gpt-4-turbo-2024-04-09","system_fingerprint":"fp_9d7f5c6195","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"Boston"}}]},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blTCqGy0TGLdK4sOYlGrNxbGGknW","object":"chat.completion.chunk","created":1718787638,"model":"gpt-4-turbo-2024-04-09","system_fingerprint":"fp_9d7f5c6195","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":","}}]},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blTCqGy0TGLdK4sOYlGrNxbGGknW","object":"chat.completion.chunk","created":1718787638,"model":"gpt-4-turbo-2024-04-09","system_fingerprint":"fp_9d7f5c6195","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" MA"}}]},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blTCqGy0TGLdK4sOYlGrNxbGGknW","object":"chat.completion.chunk","created":1718787638,"model":"gpt-4-turbo-2024-04-09","system_fingerprint":"fp_9d7f5c6195","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]},"logprobs":null,"finish_reason":null}],"usage":null}
-
-data: {"id":"chatcmpl-9blTCqGy0TGLdK4sOYlGrNxbGGknW","object":"chat.completion.chunk","created":1718787638,"model":"gpt-4-turbo-2024-04-09","system_fingerprint":"fp_9d7f5c6195","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}],"usage":null}
-
-data: {"id":"chatcmpl-9blTCqGy0TGLdK4sOYlGrNxbGGknW","object":"chat.completion.chunk","created":1718787638,"model":"gpt-4-turbo-2024-04-09","system_fingerprint":"fp_9d7f5c6195","choices":[],"usage":{"prompt_tokens":83,"completion_tokens":17,"total_tokens":100}}`
-
-var toolCallResp = `{
-  "id": "chatcmpl-abc123",
-  "object": "chat.completion",
-  "created": 1699896916,
-  "model": "gpt-4-turbo-2024-04-09",
-  "choices": [
-    {
-      "index": 0,
-      "message": {
-        "role": "assistant",
-        "content": null,
-        "tool_calls": [
-          {
-            "id": "call_abc123",
-            "type": "function",
-            "function": {
-              "name": "get_current_weather",
-              "arguments": "{\n\"location\": \"Boston, MA\"\n}"
-            }
-          }
-        ]
-      },
-      "logprobs": null,
-      "finish_reason": "tool_calls"
-    }
-  ],
-  "usage": {
-    "prompt_tokens": 82,
-    "completion_tokens": 17,
-    "total_tokens": 99
-  }
-}`
diff --git a/pkg/bridge/ai/service.go b/pkg/bridge/ai/service.go
new file mode 100644
index 000000000..fd970465e
--- /dev/null
+++ b/pkg/bridge/ai/service.go
@@ -0,0 +1,640 @@
+package ai
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"log/slog"
+	"net/http"
+	"strings"
+	"time"
+
+	"github.com/hashicorp/golang-lru/v2/expirable"
+	openai "github.com/sashabaranov/go-openai"
+	"github.com/yomorun/yomo"
+	"github.com/yomorun/yomo/ai"
+	"github.com/yomorun/yomo/core/metadata"
+	"github.com/yomorun/yomo/core/ylog"
+	"github.com/yomorun/yomo/pkg/bridge/ai/provider"
+	"github.com/yomorun/yomo/pkg/bridge/ai/register"
+	"github.com/yomorun/yomo/pkg/id"
+	"go.opentelemetry.io/otel/trace"
+	"go.opentelemetry.io/otel/trace/noop"
+)
+
+// Service is the  service layer for llm bridge server.
+// service is responsible for handling the logic from handler layer.
+type Service struct {
+	zipperAddr    string
+	provider      provider.LLMProvider
+	newCallerFunc newCallerFunc
+	callers       *expirable.LRU[string, *Caller]
+	option        *ServiceOptions
+	logger        *slog.Logger
+}
+
+// ServiceOptions is the option for creating service
+type ServiceOptions struct {
+	// Logger is the logger for the service
+	Logger *slog.Logger
+	// Tracer is the tracer for the service
+	Tracer trace.Tracer
+	// CredentialFunc is the function for getting the credential from the request
+	CredentialFunc func(r *http.Request) (string, error)
+	// CallerCacheSize is the size of the caller's cache
+	CallerCacheSize int
+	// CallerCacheTTL is the time to live of the callers cache
+	CallerCacheTTL time.Duration
+	// CallerCallTimeout is the timeout for awaiting the function response.
+	CallerCallTimeout time.Duration
+	// SourceBuilder should builds an unconnected source.
+	SourceBuilder func(zipperAddr, credential string) yomo.Source
+	// ReducerBuilder should builds an unconnected reducer.
+	ReducerBuilder func(zipperAddr, credential string) yomo.StreamFunction
+	// MetadataExchanger exchanges metadata from the credential.
+	MetadataExchanger func(credential string) (metadata.M, error)
+}
+
+// NewService creates a new service for handling the logic from handler layer.
+func NewService(zipperAddr string, provider provider.LLMProvider, opt *ServiceOptions) *Service {
+	return newService(zipperAddr, provider, NewCaller, opt)
+}
+
+func initOption(opt *ServiceOptions) *ServiceOptions {
+	if opt == nil {
+		opt = &ServiceOptions{}
+	}
+	if opt.Tracer == nil {
+		opt.Tracer = noop.NewTracerProvider().Tracer("yomo-ai-bridge")
+	}
+	if opt.Logger == nil {
+		opt.Logger = ylog.Default()
+	}
+	if opt.CredentialFunc == nil {
+		opt.CredentialFunc = func(_ *http.Request) (string, error) { return "", nil }
+	}
+	if opt.CallerCacheSize == 0 {
+		opt.CallerCacheSize = 1
+	}
+	if opt.CallerCallTimeout == 0 {
+		opt.CallerCallTimeout = 60 * time.Second
+	}
+	if opt.SourceBuilder == nil {
+		opt.SourceBuilder = func(zipperAddr, credential string) yomo.Source {
+			return yomo.NewSource(
+				"fc-source",
+				zipperAddr,
+				yomo.WithSourceReConnect(), yomo.WithCredential(credential))
+		}
+	}
+	if opt.ReducerBuilder == nil {
+		opt.ReducerBuilder = func(zipperAddr, credential string) yomo.StreamFunction {
+			return yomo.NewStreamFunction(
+				"fc-reducer",
+				zipperAddr,
+				yomo.WithSfnReConnect(), yomo.WithSfnCredential(credential), yomo.DisableOtelTrace())
+		}
+	}
+	if opt.MetadataExchanger == nil {
+		opt.MetadataExchanger = func(credential string) (metadata.M, error) {
+			return metadata.New(), nil
+		}
+	}
+
+	return opt
+}
+
+func newService(zipperAddr string, provider provider.LLMProvider, ncf newCallerFunc, opt *ServiceOptions) *Service {
+	var onEvict = func(_ string, caller *Caller) {
+		caller.Close()
+	}
+
+	opt = initOption(opt)
+
+	service := &Service{
+		zipperAddr:    zipperAddr,
+		provider:      provider,
+		newCallerFunc: ncf,
+		callers:       expirable.NewLRU(opt.CallerCacheSize, onEvict, opt.CallerCacheTTL),
+		option:        opt,
+		logger:        opt.Logger,
+	}
+
+	return service
+}
+
+type newCallerFunc func(yomo.Source, yomo.StreamFunction, metadata.M, time.Duration) (*Caller, error)
+
+// LoadOrCreateCaller loads or creates the caller according to the http request.
+func (srv *Service) LoadOrCreateCaller(r *http.Request) (*Caller, error) {
+	credential, err := srv.option.CredentialFunc(r)
+	if err != nil {
+		return nil, err
+	}
+	return srv.loadOrCreateCaller(credential)
+}
+
+// GetInvoke returns the invoke response
+func (srv *Service) GetInvoke(ctx context.Context, userInstruction, baseSystemMessage, transID string, caller *Caller, includeCallStack bool) (*ai.InvokeResponse, error) {
+	md := caller.Metadata().Clone()
+	// read tools attached to the metadata
+	tcs, err := register.ListToolCalls(md)
+	if err != nil {
+		return &ai.InvokeResponse{}, err
+	}
+	// prepare tools
+	tools := prepareToolCalls(tcs)
+
+	chainMessage := ai.ChainMessage{}
+	messages := srv.prepareMessages(baseSystemMessage, userInstruction, chainMessage, tools, true)
+	req := openai.ChatCompletionRequest{
+		Messages: messages,
+	}
+	// with tools
+	if len(tools) > 0 {
+		req.Tools = tools
+	}
+	var (
+		promptUsage     int
+		completionUsage int
+	)
+	_, span := srv.option.Tracer.Start(ctx, "first_call")
+	chatCompletionResponse, err := srv.provider.GetChatCompletions(ctx, req, md)
+	if err != nil {
+		return nil, err
+	}
+	span.End()
+	promptUsage = chatCompletionResponse.Usage.PromptTokens
+	completionUsage = chatCompletionResponse.Usage.CompletionTokens
+
+	// convert ChatCompletionResponse to InvokeResponse
+	res, err := ai.ConvertToInvokeResponse(&chatCompletionResponse, tcs)
+	if err != nil {
+		return nil, err
+	}
+	// if no tool_calls fired, just return the llm text result
+	if res.FinishReason != string(openai.FinishReasonToolCalls) {
+		return res, nil
+	}
+
+	// run llm function calls
+	srv.logger.Debug(">>>> start 1st call response",
+		"res_toolcalls", fmt.Sprintf("%+v", res.ToolCalls),
+		"res_assistant_msgs", fmt.Sprintf("%+v", res.AssistantMessage))
+
+	srv.logger.Debug(">> run function calls", "transID", transID, "res.ToolCalls", fmt.Sprintf("%+v", res.ToolCalls))
+
+	_, span = srv.option.Tracer.Start(ctx, "run_sfn")
+	reqID := id.New(16)
+	llmCalls, err := caller.Call(ctx, transID, reqID, res.ToolCalls)
+	if err != nil {
+		return nil, err
+	}
+	span.End()
+
+	srv.logger.Debug(">>>> start 2nd call with", "calls", fmt.Sprintf("%+v", llmCalls), "preceeding_assistant_message", fmt.Sprintf("%+v", res.AssistantMessage))
+
+	chainMessage.PreceedingAssistantMessage = res.AssistantMessage
+	chainMessage.ToolMessages = transToolMessage(llmCalls)
+	// do not attach toolMessage to prompt in 2nd call
+	messages2 := srv.prepareMessages(baseSystemMessage, userInstruction, chainMessage, tools, false)
+	req2 := openai.ChatCompletionRequest{
+		Messages: messages2,
+	}
+	_, span = srv.option.Tracer.Start(ctx, "second_call")
+	chatCompletionResponse2, err := srv.provider.GetChatCompletions(ctx, req2, md)
+	if err != nil {
+		return nil, err
+	}
+	span.End()
+
+	chatCompletionResponse2.Usage.PromptTokens += promptUsage
+	chatCompletionResponse2.Usage.CompletionTokens += completionUsage
+
+	res2, err := ai.ConvertToInvokeResponse(&chatCompletionResponse2, tcs)
+	if err != nil {
+		return nil, err
+	}
+
+	// INFO: call stack infomation
+	if includeCallStack {
+		res2.ToolCalls = res.ToolCalls
+		res2.ToolMessages = transToolMessage(llmCalls)
+	}
+	srv.logger.Debug("<<<< complete 2nd call", "res2", fmt.Sprintf("%+v", res2))
+
+	return res2, err
+}
+
+// GetChatCompletions accepts openai.ChatCompletionRequest and responds to http.ResponseWriter.
+func (srv *Service) GetChatCompletions(ctx context.Context, req openai.ChatCompletionRequest, transID string, caller *Caller, w http.ResponseWriter) error {
+	reqCtx, reqSpan := srv.option.Tracer.Start(ctx, "completions_request")
+	md := caller.Metadata().Clone()
+
+	// 1. find all hosting tool sfn
+	tagTools, err := register.ListToolCalls(md)
+	if err != nil {
+		return err
+	}
+	// 2. add those tools to request
+	req = srv.addToolsToRequest(req, tagTools)
+
+	// 3. over write system prompt to request
+	req = srv.overWriteSystemPrompt(req, caller.GetSystemPrompt())
+
+	var (
+		promptUsage      = 0
+		completionUsage  = 0
+		totalUsage       = 0
+		reqMessages      = req.Messages
+		toolCallsMap     = make(map[int]openai.ToolCall)
+		toolCalls        = []openai.ToolCall{}
+		assistantMessage = openai.ChatCompletionMessage{}
+	)
+	// 4. request first chat for getting tools
+	if req.Stream {
+		_, firstCallSpan := srv.option.Tracer.Start(reqCtx, "first_call_request")
+		var (
+			flusher        = eventFlusher(w)
+			isFunctionCall = false
+		)
+		resStream, err := srv.provider.GetChatCompletionsStream(reqCtx, req, md)
+		if err != nil {
+			return err
+		}
+
+		var (
+			i             int // number of chunks
+			j             int // number of tool call chunks
+			firstRespSpan trace.Span
+			respSpan      trace.Span
+		)
+		for {
+			if i == 0 {
+				_, firstRespSpan = srv.option.Tracer.Start(reqCtx, "first_call_response_in_stream")
+			}
+			streamRes, err := resStream.Recv()
+			if err == io.EOF {
+				break
+			}
+			if err != nil {
+				return err
+			}
+			if len(streamRes.Choices) == 0 {
+				continue
+			}
+			if streamRes.Usage != nil {
+				promptUsage = streamRes.Usage.PromptTokens
+				completionUsage = streamRes.Usage.CompletionTokens
+				totalUsage = streamRes.Usage.TotalTokens
+			}
+			if tc := streamRes.Choices[0].Delta.ToolCalls; len(tc) > 0 {
+				isFunctionCall = true
+				if j == 0 {
+					firstCallSpan.End()
+				}
+				for _, t := range tc {
+					// this index should be toolCalls slice's index, the index field only appares in stream response
+					index := *t.Index
+					item, ok := toolCallsMap[index]
+					if !ok {
+						toolCallsMap[index] = openai.ToolCall{
+							Index:    t.Index,
+							ID:       t.ID,
+							Type:     t.Type,
+							Function: openai.FunctionCall{},
+						}
+						item = toolCallsMap[index]
+					}
+					if t.Function.Arguments != "" {
+						item.Function.Arguments += t.Function.Arguments
+					}
+					if t.Function.Name != "" {
+						item.Function.Name = t.Function.Name
+					}
+					toolCallsMap[index] = item
+				}
+				j++
+			} else if streamRes.Choices[0].FinishReason != openai.FinishReasonToolCalls {
+				_ = writeStreamEvent(w, flusher, streamRes)
+			}
+			if i == 0 && j == 0 && !isFunctionCall {
+				reqSpan.End()
+				recordTTFT(ctx, srv.option.Tracer)
+				_, respSpan = srv.option.Tracer.Start(ctx, "response_in_stream(TBT)")
+			}
+			i++
+		}
+		if !isFunctionCall {
+			respSpan.End()
+			return writeStreamDone(w, flusher)
+		}
+		firstRespSpan.End()
+		toolCalls = mapToSliceTools(toolCallsMap)
+
+		assistantMessage = openai.ChatCompletionMessage{
+			ToolCalls: toolCalls,
+			Role:      openai.ChatMessageRoleAssistant,
+		}
+		reqSpan.End()
+		flusher.Flush()
+	} else {
+		_, firstCallSpan := srv.option.Tracer.Start(reqCtx, "first_call")
+		resp, err := srv.provider.GetChatCompletions(ctx, req, md)
+		if err != nil {
+			return err
+		}
+		reqSpan.End()
+
+		promptUsage = resp.Usage.PromptTokens
+		completionUsage = resp.Usage.CompletionTokens
+		totalUsage = resp.Usage.CompletionTokens
+
+		srv.logger.Debug(" #1 first call", "response", fmt.Sprintf("%+v", resp))
+		// it is a function call
+		if resp.Choices[0].FinishReason == openai.FinishReasonToolCalls {
+			toolCalls = append(toolCalls, resp.Choices[0].Message.ToolCalls...)
+			assistantMessage = resp.Choices[0].Message
+			firstCallSpan.End()
+		} else {
+			w.Header().Set("Content-Type", "application/json")
+			json.NewEncoder(w).Encode(resp)
+			return nil
+		}
+	}
+
+	resCtx, resSpan := srv.option.Tracer.Start(ctx, "completions_response")
+	defer resSpan.End()
+
+	_, sfnSpan := srv.option.Tracer.Start(resCtx, "run_sfn")
+
+	// 5. find sfns that hit the function call
+	fnCalls := findTagTools(tagTools, toolCalls)
+
+	// 6. run llm function calls
+	reqID := id.New(16)
+	llmCalls, err := caller.Call(ctx, transID, reqID, fnCalls)
+	if err != nil {
+		return err
+	}
+	sfnSpan.End()
+
+	// 7. do the second call (the second call messages are from user input, first call resopnse and sfn calls result)
+	req.Messages = append(reqMessages, assistantMessage)
+	req.Messages = append(req.Messages, llmCalls...)
+	req.Tools = nil // reset tools field
+
+	srv.logger.Debug(" #2 second call", "request", fmt.Sprintf("%+v", req))
+
+	if req.Stream {
+		_, secondCallSpan := srv.option.Tracer.Start(resCtx, "second_call_request")
+		flusher := w.(http.Flusher)
+		resStream, err := srv.provider.GetChatCompletionsStream(resCtx, req, md)
+		if err != nil {
+			return err
+		}
+		secondCallSpan.End()
+
+		var (
+			i              int
+			secondRespSpan trace.Span
+		)
+		for {
+			if i == 0 {
+				recordTTFT(resCtx, srv.option.Tracer)
+				_, secondRespSpan = srv.option.Tracer.Start(resCtx, "second_call_response_in_stream(TBT)")
+			}
+			i++
+			streamRes, err := resStream.Recv()
+			if err == io.EOF {
+				secondRespSpan.End()
+				return writeStreamDone(w, flusher)
+			}
+			if err != nil {
+				return err
+			}
+			if streamRes.Usage != nil {
+				streamRes.Usage.PromptTokens += promptUsage
+				streamRes.Usage.CompletionTokens += completionUsage
+				streamRes.Usage.TotalTokens += totalUsage
+			}
+			_ = writeStreamEvent(w, flusher, streamRes)
+		}
+	} else {
+		_, secondCallSpan := srv.option.Tracer.Start(resCtx, "second_call")
+
+		resp, err := srv.provider.GetChatCompletions(resCtx, req, md)
+		if err != nil {
+			return err
+		}
+
+		resp.Usage.PromptTokens += promptUsage
+		resp.Usage.CompletionTokens += completionUsage
+		resp.Usage.TotalTokens += totalUsage
+
+		secondCallSpan.End()
+		w.Header().Set("Content-Type", "application/json")
+		return json.NewEncoder(w).Encode(resp)
+	}
+}
+
+func (srv *Service) loadOrCreateCaller(credential string) (*Caller, error) {
+	caller, ok := srv.callers.Get(credential)
+	if ok {
+		return caller, nil
+	}
+	md, err := srv.option.MetadataExchanger(credential)
+	if err != nil {
+		return nil, err
+	}
+	caller, err = srv.newCallerFunc(
+		srv.option.SourceBuilder(srv.zipperAddr, credential),
+		srv.option.ReducerBuilder(srv.zipperAddr, credential),
+		md,
+		srv.option.CallerCallTimeout,
+	)
+	if err != nil {
+		return nil, err
+	}
+
+	srv.callers.Add(credential, caller)
+
+	return caller, nil
+}
+
+func (srv *Service) addToolsToRequest(req openai.ChatCompletionRequest, tagTools map[uint32]openai.Tool) openai.ChatCompletionRequest {
+	toolCalls := prepareToolCalls(tagTools)
+
+	if len(toolCalls) > 0 {
+		req.Tools = toolCalls
+	}
+
+	srv.logger.Debug(" #1 first call", "request", fmt.Sprintf("%+v", req))
+
+	return req
+}
+
+func (srv *Service) overWriteSystemPrompt(req openai.ChatCompletionRequest, sysPrompt string) openai.ChatCompletionRequest {
+	// do nothing if system prompt is empty
+	if sysPrompt == "" {
+		return req
+	}
+	// over write system prompt
+	isOverWrite := false
+	for i, msg := range req.Messages {
+		if msg.Role != "system" {
+			continue
+		}
+		req.Messages[i] = openai.ChatCompletionMessage{
+			Role:    msg.Role,
+			Content: sysPrompt,
+		}
+		isOverWrite = true
+	}
+	// append system prompt
+	if !isOverWrite {
+		req.Messages = append(req.Messages, openai.ChatCompletionMessage{
+			Role:    "system",
+			Content: sysPrompt,
+		})
+	}
+
+	srv.logger.Debug(" #1 first call after overwrite", "request", fmt.Sprintf("%+v", req))
+
+	return req
+}
+
+func findTagTools(tagTools map[uint32]openai.Tool, toolCalls []openai.ToolCall) map[uint32][]*openai.ToolCall {
+	fnCalls := make(map[uint32][]*openai.ToolCall)
+	// functions may be more than one
+	for _, call := range toolCalls {
+		for tag, tc := range tagTools {
+			if tc.Function.Name == call.Function.Name && tc.Type == call.Type {
+				currentCall := call
+				fnCalls[tag] = append(fnCalls[tag], &currentCall)
+			}
+		}
+	}
+	return fnCalls
+}
+
+func writeStreamEvent(w http.ResponseWriter, flusher http.Flusher, streamRes openai.ChatCompletionStreamResponse) error {
+	if _, err := io.WriteString(w, "data: "); err != nil {
+		return err
+	}
+	if err := json.NewEncoder(w).Encode(streamRes); err != nil {
+		return err
+	}
+	if _, err := io.WriteString(w, "\n"); err != nil {
+		return err
+	}
+	flusher.Flush()
+
+	return nil
+}
+
+func writeStreamDone(w http.ResponseWriter, flusher http.Flusher) error {
+	_, err := io.WriteString(w, "data: [DONE]")
+	flusher.Flush()
+
+	return err
+}
+
+func (srv *Service) prepareMessages(baseSystemMessage string, userInstruction string, chainMessage ai.ChainMessage, tools []openai.Tool, withTool bool) []openai.ChatCompletionMessage {
+	systemInstructions := []string{"## Instructions\n"}
+
+	// only append if there are tool calls
+	if withTool {
+		for _, t := range tools {
+			systemInstructions = append(systemInstructions, "- ")
+			systemInstructions = append(systemInstructions, t.Function.Description)
+			systemInstructions = append(systemInstructions, "\n")
+		}
+		systemInstructions = append(systemInstructions, "\n")
+	}
+
+	SystemPrompt := fmt.Sprintf("%s\n\n%s", baseSystemMessage, strings.Join(systemInstructions, ""))
+
+	messages := []openai.ChatCompletionMessage{}
+
+	// 1. system message
+	messages = append(messages, openai.ChatCompletionMessage{Role: "system", Content: SystemPrompt})
+
+	// 2. previous tool calls
+	// Ref: Tool Message Object in Messsages
+	// https://platform.openai.com/docs/guides/function-calling
+	// https://platform.openai.com/docs/api-reference/chat/create#chat-create-messages
+
+	if chainMessage.PreceedingAssistantMessage != nil {
+		// 2.1 assistant message
+		// try convert type of chainMessage.PreceedingAssistantMessage to type ChatCompletionMessage
+		assistantMessage, ok := chainMessage.PreceedingAssistantMessage.(openai.ChatCompletionMessage)
+		if ok {
+			srv.logger.Debug("======== add assistantMessage", "am", fmt.Sprintf("%+v", assistantMessage))
+			messages = append(messages, assistantMessage)
+		}
+
+		// 2.2 tool message
+		for _, tool := range chainMessage.ToolMessages {
+			tm := openai.ChatCompletionMessage{
+				Role:       "tool",
+				Content:    tool.Content,
+				ToolCallID: tool.ToolCallID,
+			}
+			srv.logger.Debug("======== add toolMessage", "tm", fmt.Sprintf("%+v", tm))
+			messages = append(messages, tm)
+		}
+	}
+
+	// 3. user instruction
+	messages = append(messages, openai.ChatCompletionMessage{Role: "user", Content: userInstruction})
+
+	return messages
+}
+
+func mapToSliceTools(m map[int]openai.ToolCall) []openai.ToolCall {
+	arr := make([]openai.ToolCall, len(m))
+	for k, v := range m {
+		arr[k] = v
+	}
+	return arr
+}
+
+func eventFlusher(w http.ResponseWriter) http.Flusher {
+	h := w.Header()
+	h.Set("Content-Type", "text/event-stream")
+	h.Set("Cache-Control", "no-cache, must-revalidate")
+	h.Set("x-content-type-options", "nosniff")
+	flusher := w.(http.Flusher)
+	return flusher
+}
+
+func prepareToolCalls(tcs map[uint32]openai.Tool) []openai.Tool {
+	// prepare tools
+	toolCalls := make([]openai.Tool, len(tcs))
+	idx := 0
+	for _, tc := range tcs {
+		toolCalls[idx] = tc
+		idx++
+	}
+	return toolCalls
+}
+
+func transToolMessage(msgs []openai.ChatCompletionMessage) []ai.ToolMessage {
+	toolMessages := make([]ai.ToolMessage, len(msgs))
+	for i, msg := range msgs {
+		toolMessages[i] = ai.ToolMessage{
+			Role:       msg.Role,
+			Content:    msg.Content,
+			ToolCallID: msg.ToolCallID,
+		}
+	}
+	return toolMessages
+}
+
+func recordTTFT(ctx context.Context, tracer trace.Tracer) {
+	_, span := tracer.Start(ctx, "TTFT")
+	span.End()
+	time.Sleep(time.Millisecond)
+}
diff --git a/pkg/bridge/ai/service_test.go b/pkg/bridge/ai/service_test.go
new file mode 100644
index 000000000..474593a31
--- /dev/null
+++ b/pkg/bridge/ai/service_test.go
@@ -0,0 +1,503 @@
+package ai
+
+import (
+	"context"
+	"errors"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+	"time"
+
+	openai "github.com/sashabaranov/go-openai"
+	"github.com/stretchr/testify/assert"
+	"github.com/yomorun/yomo"
+	"github.com/yomorun/yomo/ai"
+	"github.com/yomorun/yomo/core/metadata"
+	"github.com/yomorun/yomo/pkg/bridge/ai/provider"
+	"github.com/yomorun/yomo/pkg/bridge/ai/register"
+)
+
+func TestServiceInvoke(t *testing.T) {
+	type args struct {
+		providerMockData  []provider.MockData
+		mockCallReqResp   map[uint32][]mockFunctionCall
+		systemPrompt      string
+		userInstruction   string
+		baseSystemMessage string
+	}
+	tests := []struct {
+		name        string
+		args        args
+		wantRequest []openai.ChatCompletionRequest
+		wantUsage   ai.TokenUsage
+	}{
+		{
+			name: "invoke with tool call",
+			args: args{
+				providerMockData: []provider.MockData{
+					provider.MockChatCompletionResponse(toolCallResp, stopResp),
+				},
+				mockCallReqResp: map[uint32][]mockFunctionCall{
+					// toolID should equal to toolCallResp's toolID
+					0x33: {{toolID: "call_abc123", functionName: "get_current_weather", respContent: "temperature: 31°C"}},
+				},
+				systemPrompt:      "this is a system prompt",
+				userInstruction:   "hi",
+				baseSystemMessage: "this is a base system message",
+			},
+			wantRequest: []openai.ChatCompletionRequest{
+				{
+					Messages: []openai.ChatCompletionMessage{
+						{Role: "system", Content: "this is a base system message\n\n## Instructions\n- \n\n"},
+						{Role: "user", Content: "hi"},
+					},
+					Tools: []openai.Tool{{Type: openai.ToolTypeFunction, Function: &openai.FunctionDefinition{Name: "get_current_weather"}}},
+				},
+				{
+					Messages: []openai.ChatCompletionMessage{
+						{Role: "system", Content: "this is a base system message\n\n## Instructions\n"},
+						{Role: "assistant", ToolCalls: []openai.ToolCall{{ID: "call_abc123", Type: openai.ToolTypeFunction, Function: openai.FunctionCall{Name: "get_current_weather", Arguments: "{\n\"location\": \"Boston, MA\"\n}"}}}},
+						{Role: "tool", Content: "temperature: 31°C", ToolCallID: "call_abc123"},
+						{Role: "user", Content: "hi"},
+					},
+				},
+			},
+			wantUsage: ai.TokenUsage{PromptTokens: 95, CompletionTokens: 43},
+		},
+		{
+			name: "invoke without tool call",
+			args: args{
+				providerMockData: []provider.MockData{
+					provider.MockChatCompletionResponse(stopResp),
+				},
+				mockCallReqResp:   map[uint32][]mockFunctionCall{},
+				systemPrompt:      "this is a system prompt",
+				userInstruction:   "hi",
+				baseSystemMessage: "this is a base system message",
+			},
+			wantRequest: []openai.ChatCompletionRequest{
+				{
+					Messages: []openai.ChatCompletionMessage{
+						{Role: "system", Content: "this is a base system message\n\n## Instructions\n\n"},
+						{Role: "user", Content: "hi"},
+					},
+				},
+			},
+			wantUsage: ai.TokenUsage{PromptTokens: 13, CompletionTokens: 26},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			register.SetRegister(register.NewDefault())
+
+			pd, err := provider.NewMock("mock provider", tt.args.providerMockData...)
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			flow := newMockDataFlow(newHandler(2 * time.Hour).handle)
+
+			newCaller := func(_ yomo.Source, _ yomo.StreamFunction, _ metadata.M, _ time.Duration) (*Caller, error) {
+				return mockCaller(tt.args.mockCallReqResp), err
+			}
+
+			service := newService("fake_zipper_addr", pd, newCaller, &ServiceOptions{
+				SourceBuilder:     func(_, _ string) yomo.Source { return flow },
+				ReducerBuilder:    func(_, _ string) yomo.StreamFunction { return flow },
+				MetadataExchanger: func(_ string) (metadata.M, error) { return metadata.M{"hello": "llm bridge"}, nil },
+			})
+
+			caller, err := service.LoadOrCreateCaller(&http.Request{})
+			assert.NoError(t, err)
+
+			caller.SetSystemPrompt(tt.args.systemPrompt)
+
+			resp, err := service.GetInvoke(context.TODO(), tt.args.userInstruction, tt.args.baseSystemMessage, "transID", caller, true)
+			assert.NoError(t, err)
+
+			assert.Equal(t, tt.wantUsage, resp.TokenUsage)
+			assert.Equal(t, tt.wantRequest, pd.RequestRecords())
+		})
+	}
+}
+
+func TestServiceChatCompletion(t *testing.T) {
+	type args struct {
+		providerMockData []provider.MockData
+		mockCallReqResp  map[uint32][]mockFunctionCall
+		systemPrompt     string
+		request          openai.ChatCompletionRequest
+	}
+	tests := []struct {
+		name        string
+		args        args
+		wantRequest []openai.ChatCompletionRequest
+	}{
+		{
+			name: "chat with tool call",
+			args: args{
+				providerMockData: []provider.MockData{
+					provider.MockChatCompletionResponse(toolCallResp, stopResp),
+				},
+				mockCallReqResp: map[uint32][]mockFunctionCall{
+					// toolID should equal to toolCallResp's toolID
+					0x33: {{toolID: "call_abc123", functionName: "get_current_weather", respContent: "temperature: 31°C"}},
+				},
+				systemPrompt: "this is a system prompt",
+				request: openai.ChatCompletionRequest{
+					Messages: []openai.ChatCompletionMessage{{Role: "user", Content: "How is the weather today in Boston, MA?"}},
+				},
+			},
+			wantRequest: []openai.ChatCompletionRequest{
+				{
+					Messages: []openai.ChatCompletionMessage{
+						{Role: "user", Content: "How is the weather today in Boston, MA?"},
+						{Role: "system", Content: "this is a system prompt"},
+					},
+					Tools: []openai.Tool{{Type: openai.ToolTypeFunction, Function: &openai.FunctionDefinition{Name: "get_current_weather"}}},
+				},
+				{
+					Messages: []openai.ChatCompletionMessage{
+						{Role: "user", Content: "How is the weather today in Boston, MA?"},
+						{Role: "system", Content: "this is a system prompt"},
+						{Role: "assistant", ToolCalls: []openai.ToolCall{{ID: "call_abc123", Type: openai.ToolTypeFunction, Function: openai.FunctionCall{Name: "get_current_weather", Arguments: "{\n\"location\": \"Boston, MA\"\n}"}}}},
+						{Role: "tool", Content: "temperature: 31°C", ToolCallID: "call_abc123"},
+					},
+				},
+			},
+		},
+		{
+			name: "chat without tool call",
+			args: args{
+				providerMockData: []provider.MockData{
+					provider.MockChatCompletionResponse(stopResp),
+				},
+				mockCallReqResp: map[uint32][]mockFunctionCall{
+					// toolID should equal to toolCallResp's toolID
+					0x33: {{toolID: "call_abc123", functionName: "get_current_weather", respContent: "temperature: 31°C"}},
+				},
+				systemPrompt: "You are an assistant.",
+				request: openai.ChatCompletionRequest{
+					Messages: []openai.ChatCompletionMessage{{Role: "user", Content: "How are you"}},
+				},
+			},
+			wantRequest: []openai.ChatCompletionRequest{
+				{
+					Messages: []openai.ChatCompletionMessage{
+						{Role: "user", Content: "How are you"},
+						{Role: "system", Content: "You are an assistant."},
+					},
+					Tools: []openai.Tool{{Type: openai.ToolTypeFunction, Function: &openai.FunctionDefinition{Name: "get_current_weather"}}},
+				},
+			},
+		},
+		{
+			name: "chat with tool call in stream",
+			args: args{
+				providerMockData: []provider.MockData{
+					provider.MockChatCompletionStreamResponse(toolCallStreamResp, stopStreamResp),
+				},
+				mockCallReqResp: map[uint32][]mockFunctionCall{
+					// toolID should equal to toolCallResp's toolID
+					0x33: {{toolID: "call_9ctHOJqO3bYrpm2A6S7nHd5k", functionName: "get_current_weather", respContent: "temperature: 31°C"}},
+				},
+				systemPrompt: "You are a weather assistant",
+				request: openai.ChatCompletionRequest{
+					Stream:   true,
+					Messages: []openai.ChatCompletionMessage{{Role: "user", Content: "How is the weather today in Boston, MA?"}},
+				},
+			},
+			wantRequest: []openai.ChatCompletionRequest{
+				{
+					Stream: true,
+					Messages: []openai.ChatCompletionMessage{
+						{Role: "user", Content: "How is the weather today in Boston, MA?"},
+						{Role: "system", Content: "You are a weather assistant"},
+					},
+					Tools: []openai.Tool{{Type: openai.ToolTypeFunction, Function: &openai.FunctionDefinition{Name: "get_current_weather"}}},
+				},
+				{
+					Stream: true,
+					Messages: []openai.ChatCompletionMessage{
+						{Role: "user", Content: "How is the weather today in Boston, MA?"},
+						{Role: "system", Content: "You are a weather assistant"},
+						{Role: "assistant", ToolCalls: []openai.ToolCall{{Index: toInt(0), ID: "call_9ctHOJqO3bYrpm2A6S7nHd5k", Type: openai.ToolTypeFunction, Function: openai.FunctionCall{Name: "get_current_weather", Arguments: "{\"location\":\"Boston, MA\"}"}}}},
+						{Role: "tool", Content: "temperature: 31°C", ToolCallID: "call_9ctHOJqO3bYrpm2A6S7nHd5k"},
+					},
+				},
+			},
+		},
+		{
+			name: "chat without tool call in stream",
+			args: args{
+				providerMockData: []provider.MockData{
+					provider.MockChatCompletionStreamResponse(stopStreamResp),
+				},
+				mockCallReqResp: map[uint32][]mockFunctionCall{
+					// toolID should equal to toolCallResp's toolID
+					0x33: {{toolID: "call_9ctHOJqO3bYrpm2A6S7nHd5k", functionName: "get_current_weather", respContent: "temperature: 31°C"}},
+				},
+				systemPrompt: "You are a weather assistant",
+				request: openai.ChatCompletionRequest{
+					Stream:   true,
+					Messages: []openai.ChatCompletionMessage{{Role: "user", Content: "How is the weather today in Boston, MA?"}},
+				},
+			},
+			wantRequest: []openai.ChatCompletionRequest{
+				{
+					Stream: true,
+					Messages: []openai.ChatCompletionMessage{
+						{Role: "user", Content: "How is the weather today in Boston, MA?"},
+						{Role: "system", Content: "You are a weather assistant"},
+					},
+					Tools: []openai.Tool{{Type: openai.ToolTypeFunction, Function: &openai.FunctionDefinition{Name: "get_current_weather"}}},
+				},
+			},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			register.SetRegister(register.NewDefault())
+
+			pd, err := provider.NewMock("mock provider", tt.args.providerMockData...)
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			flow := newMockDataFlow(newHandler(2 * time.Hour).handle)
+
+			newCaller := func(_ yomo.Source, _ yomo.StreamFunction, _ metadata.M, _ time.Duration) (*Caller, error) {
+				return mockCaller(tt.args.mockCallReqResp), err
+			}
+
+			service := newService("fake_zipper_addr", pd, newCaller, &ServiceOptions{
+				SourceBuilder:     func(_, _ string) yomo.Source { return flow },
+				ReducerBuilder:    func(_, _ string) yomo.StreamFunction { return flow },
+				MetadataExchanger: func(_ string) (metadata.M, error) { return metadata.M{"hello": "llm bridge"}, nil },
+			})
+
+			caller, err := service.LoadOrCreateCaller(&http.Request{})
+			assert.NoError(t, err)
+
+			caller.SetSystemPrompt(tt.args.systemPrompt)
+
+			w := httptest.NewRecorder()
+			err = service.GetChatCompletions(context.TODO(), tt.args.request, "transID", caller, w)
+			assert.NoError(t, err)
+
+			assert.Equal(t, tt.wantRequest, pd.RequestRecords())
+		})
+	}
+}
+
+// mockCaller returns a mock caller.
+// the request-response of caller has been defined in advance, the request and response are defined in the `calls`.
+func mockCaller(calls map[uint32][]mockFunctionCall) *Caller {
+	// register function to register
+	for tag, call := range calls {
+		for _, c := range call {
+			register.RegisterFunction(tag, &openai.FunctionDefinition{Name: c.functionName}, uint64(tag), nil)
+		}
+	}
+
+	caller := &Caller{
+		CallSyncer: &mockCallSyncer{calls: calls},
+		md:         metadata.M{"hello": "llm bridge"},
+	}
+
+	return caller
+}
+
+type mockFunctionCall struct {
+	toolID       string
+	functionName string
+	respContent  string
+}
+
+type mockCallSyncer struct {
+	calls map[uint32][]mockFunctionCall
+}
+
+// Call implements CallSyncer, it returns the mock response defined in advance.
+func (m *mockCallSyncer) Call(ctx context.Context, transID string, reqID string, toolCalls map[uint32][]*openai.ToolCall) ([]openai.ChatCompletionMessage, error) {
+	res := []openai.ChatCompletionMessage{}
+	for tag, calls := range toolCalls {
+		mcs, ok := m.calls[tag]
+		if !ok {
+			return nil, errors.New("call not found")
+		}
+		mcm := make(map[string]mockFunctionCall, len(mcs))
+		for _, mc := range mcs {
+			mcm[mc.toolID] = mc
+		}
+		for _, call := range calls {
+			mc, ok := mcm[call.ID]
+			if !ok {
+				return nil, errors.New("call not found")
+			}
+			res = append(res, openai.ChatCompletionMessage{
+				ToolCallID: mc.toolID,
+				Role:       openai.ChatMessageRoleTool,
+				Content:    mc.respContent,
+			})
+		}
+	}
+	return res, nil
+}
+
+func (m *mockCallSyncer) Close() error { return nil }
+
+func toInt(val int) *int { return &val }
+
+var stopStreamResp = `data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" I'm"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" just"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" a"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" computer"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" program"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" so"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" I"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" don't"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" have"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" feelings"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" but"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" I'm"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" here"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" and"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" ready"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" to"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" help"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" you"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" with"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" whatever"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" you"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" need"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" How"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" can"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" I"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" assist"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" you"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":" today"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null}
+
+data: {"id":"chatcmpl-9blY98pEJe6mXGKivCZyl61vxaUFq","object":"chat.completion.chunk","created":1718787945,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_f4e629d0a5","choices":[],"usage":{"prompt_tokens":13,"completion_tokens":34,"total_tokens":47}}
+
+data: [DONE]`
+
+var stopResp = `{
+  "id": "chatcmpl-9blYknv9rHvr2dvCQKMeW21hlBpCX",
+  "object": "chat.completion",
+  "created": 1718787982,
+  "model": "gpt-4o-2024-05-13",
+  "choices": [
+    {
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": "Hello! I'm just a computer program, so I don't have feelings, but thanks for asking. How can I assist you today?"
+      },
+      "logprobs": null,
+      "finish_reason": "stop"
+    }
+  ],
+  "usage": {
+    "prompt_tokens": 13,
+    "completion_tokens": 26,
+    "total_tokens": 39
+  },
+  "system_fingerprint": "fp_f4e629d0a5"
+}`
+
+var toolCallStreamResp = `data: {"id":"chatcmpl-9blTCqGy0TGLdK4sOYlGrNxbGGknW","object":"chat.completion.chunk","created":1718787638,"model":"gpt-4-turbo-2024-04-09","system_fingerprint":"fp_9d7f5c6195","choices":[{"index":0,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"call_9ctHOJqO3bYrpm2A6S7nHd5k","type":"function","function":{"name":"get_current_weather","arguments":""}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blTCqGy0TGLdK4sOYlGrNxbGGknW","object":"chat.completion.chunk","created":1718787638,"model":"gpt-4-turbo-2024-04-09","system_fingerprint":"fp_9d7f5c6195","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blTCqGy0TGLdK4sOYlGrNxbGGknW","object":"chat.completion.chunk","created":1718787638,"model":"gpt-4-turbo-2024-04-09","system_fingerprint":"fp_9d7f5c6195","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"location"}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blTCqGy0TGLdK4sOYlGrNxbGGknW","object":"chat.completion.chunk","created":1718787638,"model":"gpt-4-turbo-2024-04-09","system_fingerprint":"fp_9d7f5c6195","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blTCqGy0TGLdK4sOYlGrNxbGGknW","object":"chat.completion.chunk","created":1718787638,"model":"gpt-4-turbo-2024-04-09","system_fingerprint":"fp_9d7f5c6195","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"Boston"}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blTCqGy0TGLdK4sOYlGrNxbGGknW","object":"chat.completion.chunk","created":1718787638,"model":"gpt-4-turbo-2024-04-09","system_fingerprint":"fp_9d7f5c6195","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":","}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blTCqGy0TGLdK4sOYlGrNxbGGknW","object":"chat.completion.chunk","created":1718787638,"model":"gpt-4-turbo-2024-04-09","system_fingerprint":"fp_9d7f5c6195","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" MA"}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blTCqGy0TGLdK4sOYlGrNxbGGknW","object":"chat.completion.chunk","created":1718787638,"model":"gpt-4-turbo-2024-04-09","system_fingerprint":"fp_9d7f5c6195","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+data: {"id":"chatcmpl-9blTCqGy0TGLdK4sOYlGrNxbGGknW","object":"chat.completion.chunk","created":1718787638,"model":"gpt-4-turbo-2024-04-09","system_fingerprint":"fp_9d7f5c6195","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}],"usage":null}
+
+data: {"id":"chatcmpl-9blTCqGy0TGLdK4sOYlGrNxbGGknW","object":"chat.completion.chunk","created":1718787638,"model":"gpt-4-turbo-2024-04-09","system_fingerprint":"fp_9d7f5c6195","choices":[],"usage":{"prompt_tokens":83,"completion_tokens":17,"total_tokens":100}}`
+
+var toolCallResp = `{
+  "id": "chatcmpl-abc123",
+  "object": "chat.completion",
+  "created": 1699896916,
+  "model": "gpt-4-turbo-2024-04-09",
+  "choices": [
+    {
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": null,
+        "tool_calls": [
+          {
+            "id": "call_abc123",
+            "type": "function",
+            "function": {
+              "name": "get_current_weather",
+              "arguments": "{\n\"location\": \"Boston, MA\"\n}"
+            }
+          }
+        ]
+      },
+      "logprobs": null,
+      "finish_reason": "tool_calls"
+    }
+  ],
+  "usage": {
+    "prompt_tokens": 82,
+    "completion_tokens": 17,
+    "total_tokens": 99
+  }
+}`