From 58da2cc62b9c3e4596d4fdbd7ba3cbbdcc77be5c Mon Sep 17 00:00:00 2001 From: Bryan Moffatt Date: Thu, 25 Jan 2024 09:54:07 -0800 Subject: [PATCH] Plumb error info to X-ray (#544) * WIP: Plumb error info to x-ray * cleanup * fix xrayException shape - the json tags differ from InvokeResponse_Error * no stack should be empty list instead of nil * gofmt * stable order of the paths array --- lambda/invoke_loop.go | 46 ++++++++++++- lambda/invoke_loop_test.go | 106 ++++++++++++++++++++++++++++++ lambda/runtime_api_client.go | 14 ++-- lambda/runtime_api_client_test.go | 8 +-- 4 files changed, 165 insertions(+), 9 deletions(-) diff --git a/lambda/invoke_loop.go b/lambda/invoke_loop.go index 9e2d6598..338237ea 100644 --- a/lambda/invoke_loop.go +++ b/lambda/invoke_loop.go @@ -104,7 +104,13 @@ func handleInvoke(invoke *invoke, handler *handlerOptions) error { func reportFailure(invoke *invoke, invokeErr *messages.InvokeResponse_Error) error { errorPayload := safeMarshal(invokeErr) log.Printf("%s", errorPayload) - if err := invoke.failure(bytes.NewReader(errorPayload), contentTypeJSON); err != nil { + + causeForXRay, err := json.Marshal(makeXRayError(invokeErr)) + if err != nil { + return fmt.Errorf("unexpected error occured when serializing the function error cause for X-Ray: %v", err) + } + + if err := invoke.failure(bytes.NewReader(errorPayload), contentTypeJSON, causeForXRay); err != nil { return fmt.Errorf("unexpected error occurred when sending the function error to the API: %v", err) } return nil @@ -166,3 +172,41 @@ func safeMarshal(v interface{}) []byte { } return payload } + +type xrayException struct { + Type string `json:"type"` + Message string `json:"message"` + Stack []*messages.InvokeResponse_Error_StackFrame `json:"stack"` +} + +type xrayError struct { + WorkingDirectory string `json:"working_directory"` + Exceptions []xrayException `json:"exceptions"` + Paths []string `json:"paths"` +} + +func makeXRayError(invokeResponseError *messages.InvokeResponse_Error) *xrayError { + paths := make([]string, 0, len(invokeResponseError.StackTrace)) + visitedPaths := make(map[string]struct{}, len(invokeResponseError.StackTrace)) + for _, frame := range invokeResponseError.StackTrace { + if _, exists := visitedPaths[frame.Path]; !exists { + visitedPaths[frame.Path] = struct{}{} + paths = append(paths, frame.Path) + } + } + + cwd, _ := os.Getwd() + exceptions := []xrayException{{ + Type: invokeResponseError.Type, + Message: invokeResponseError.Message, + Stack: invokeResponseError.StackTrace, + }} + if exceptions[0].Stack == nil { + exceptions[0].Stack = []*messages.InvokeResponse_Error_StackFrame{} + } + return &xrayError{ + WorkingDirectory: cwd, + Paths: paths, + Exceptions: exceptions, + } +} diff --git a/lambda/invoke_loop_test.go b/lambda/invoke_loop_test.go index fab800b9..3374dc2a 100644 --- a/lambda/invoke_loop_test.go +++ b/lambda/invoke_loop_test.go @@ -90,6 +90,110 @@ func TestCustomErrorMarshaling(t *testing.T) { } } +func TestXRayCausePlumbing(t *testing.T) { + errors := []error{ + errors.New("barf"), + messages.InvokeResponse_Error{ + Type: "yoloError", + Message: "hello yolo", + StackTrace: []*messages.InvokeResponse_Error_StackFrame{ + {Label: "yolo", Path: "yolo", Line: 2}, + {Label: "hi", Path: "hello/hello", Line: 12}, + }, + }, + messages.InvokeResponse_Error{ + Type: "yoloError", + Message: "hello yolo", + StackTrace: []*messages.InvokeResponse_Error_StackFrame{ + {Label: "hi", Path: "hello/hello", Line: 12}, + {Label: "hihi", Path: "hello/hello", Line: 13}, + {Label: "yolo", Path: "yolo", Line: 2}, + {Label: "hi", Path: "hello/hello", Line: 14}, + }, + }, + messages.InvokeResponse_Error{ + Type: "yoloError", + Message: "hello yolo", + StackTrace: []*messages.InvokeResponse_Error_StackFrame{}, + }, + messages.InvokeResponse_Error{ + Type: "yoloError", + Message: "hello yolo", + }, + } + wd, _ := os.Getwd() + expected := []string{ + `{ + "working_directory":"` + wd + `", + "paths": [], + "exceptions": [{ + "type": "errorString", + "message": "barf", + "stack": [] + }] + }`, + `{ + "working_directory":"` + wd + `", + "paths": ["yolo", "hello/hello"], + "exceptions": [{ + "type": "yoloError", + "message": "hello yolo", + "stack": [ + {"label": "yolo", "path": "yolo", "line": 2}, + {"label": "hi", "path": "hello/hello", "line": 12} + ] + }] + }`, + `{ + "working_directory":"` + wd + `", + "paths": ["hello/hello", "yolo"], + "exceptions": [{ + "type": "yoloError", + "message": "hello yolo", + "stack": [ + {"label": "hi", "path": "hello/hello", "line": 12}, + {"label": "hihi", "path": "hello/hello", "line": 13}, + {"label": "yolo", "path": "yolo", "line": 2}, + {"label": "hi", "path": "hello/hello", "line": 14} + ] + }] + }`, + `{ + "working_directory":"` + wd + `", + "paths": [], + "exceptions": [{ + "type": "yoloError", + "message": "hello yolo", + "stack": [] + }] + }`, + `{ + "working_directory":"` + wd + `", + "paths": [], + "exceptions": [{ + "type": "yoloError", + "message": "hello yolo", + "stack": [] + }] + }`, + } + require.Equal(t, len(errors), len(expected)) + ts, record := runtimeAPIServer(``, len(errors)) + defer ts.Close() + n := 0 + handler := NewHandler(func() error { + defer func() { n++ }() + return errors[n] + }) + endpoint := strings.Split(ts.URL, "://")[1] + expectedError := fmt.Sprintf("failed to GET http://%s/2018-06-01/runtime/invocation/next: got unexpected status code: 410", endpoint) + assert.EqualError(t, startRuntimeAPILoop(endpoint, handler), expectedError) + for i := range errors { + assert.JSONEq(t, expected[i], string(record.xrayCauses[i])) + } + +} + func TestRuntimeAPIContextPlumbing(t *testing.T) { handler := NewHandler(func(ctx context.Context) (interface{}, error) { lc, _ := lambdacontext.FromContext(ctx) @@ -271,6 +375,7 @@ type requestRecord struct { nPosts int responses [][]byte contentTypes []string + xrayCauses []string } type eventMetadata struct { @@ -336,6 +441,7 @@ func runtimeAPIServer(eventPayload string, failAfter int, overrides ...eventMeta w.WriteHeader(http.StatusAccepted) record.responses = append(record.responses, response.Bytes()) record.contentTypes = append(record.contentTypes, r.Header.Get("Content-Type")) + record.xrayCauses = append(record.xrayCauses, r.Header.Get(headerXRayErrorCause)) default: w.WriteHeader(http.StatusBadRequest) } diff --git a/lambda/runtime_api_client.go b/lambda/runtime_api_client.go index 84384c29..158bd6b4 100644 --- a/lambda/runtime_api_client.go +++ b/lambda/runtime_api_client.go @@ -22,11 +22,13 @@ const ( headerCognitoIdentity = "Lambda-Runtime-Cognito-Identity" headerClientContext = "Lambda-Runtime-Client-Context" headerInvokedFunctionARN = "Lambda-Runtime-Invoked-Function-Arn" + headerXRayErrorCause = "Lambda-Runtime-Function-Xray-Error-Cause" trailerLambdaErrorType = "Lambda-Runtime-Function-Error-Type" trailerLambdaErrorBody = "Lambda-Runtime-Function-Error-Body" contentTypeJSON = "application/json" contentTypeBytes = "application/octet-stream" apiVersion = "2018-06-01" + xrayErrorCauseMaxSize = 1024 * 1024 ) type runtimeAPIClient struct { @@ -57,7 +59,7 @@ type invoke struct { // - An invoke is not complete until next() is called again! func (i *invoke) success(body io.Reader, contentType string) error { url := i.client.baseURL + i.id + "/response" - return i.client.post(url, body, contentType) + return i.client.post(url, body, contentType, nil) } // failure sends the payload to the Runtime API. This marks the function's invoke as a failure. @@ -65,9 +67,9 @@ func (i *invoke) success(body io.Reader, contentType string) error { // - The execution of the function process continues, and is billed, until next() is called again! // - A Lambda Function continues to be re-used for future invokes even after a failure. // If the error is fatal (panic, unrecoverable state), exit the process immediately after calling failure() -func (i *invoke) failure(body io.Reader, contentType string) error { +func (i *invoke) failure(body io.Reader, contentType string, causeForXRay []byte) error { url := i.client.baseURL + i.id + "/error" - return i.client.post(url, body, contentType) + return i.client.post(url, body, contentType, causeForXRay) } // next connects to the Runtime API and waits for a new invoke Request to be available. @@ -108,7 +110,7 @@ func (c *runtimeAPIClient) next() (*invoke, error) { }, nil } -func (c *runtimeAPIClient) post(url string, body io.Reader, contentType string) error { +func (c *runtimeAPIClient) post(url string, body io.Reader, contentType string, xrayErrorCause []byte) error { b := newErrorCapturingReader(body) req, err := http.NewRequest(http.MethodPost, url, b) if err != nil { @@ -118,6 +120,10 @@ func (c *runtimeAPIClient) post(url string, body io.Reader, contentType string) req.Header.Set("User-Agent", c.userAgent) req.Header.Set("Content-Type", contentType) + if xrayErrorCause != nil && len(xrayErrorCause) < xrayErrorCauseMaxSize { + req.Header.Set(headerXRayErrorCause, string(xrayErrorCause)) + } + resp, err := c.httpClient.Do(req) if err != nil { return fmt.Errorf("failed to POST to %s: %v", url, err) diff --git a/lambda/runtime_api_client_test.go b/lambda/runtime_api_client_test.go index 7ccd47fb..3f41403f 100644 --- a/lambda/runtime_api_client_test.go +++ b/lambda/runtime_api_client_test.go @@ -92,7 +92,7 @@ func TestClientDoneAndError(t *testing.T) { assert.NoError(t, err) }) t.Run(fmt.Sprintf("happy Error with payload[%d]", i), func(t *testing.T) { - err := invoke.failure(bytes.NewReader(payload), contentTypeJSON) + err := invoke.failure(bytes.NewReader(payload), contentTypeJSON, nil) assert.NoError(t, err) }) } @@ -105,7 +105,7 @@ func TestInvalidRequestsForMalformedEndpoint(t *testing.T) { require.Error(t, err) err = (&invoke{client: newRuntimeAPIClient("🚨")}).success(nil, "") require.Error(t, err) - err = (&invoke{client: newRuntimeAPIClient("🚨")}).failure(nil, "") + err = (&invoke{client: newRuntimeAPIClient("🚨")}).failure(nil, "", nil) require.Error(t, err) } @@ -145,7 +145,7 @@ func TestStatusCodes(t *testing.T) { require.NoError(t, err) }) t.Run("failure should not error", func(t *testing.T) { - err := invoke.failure(nil, "") + err := invoke.failure(nil, "", nil) require.NoError(t, err) }) } else { @@ -158,7 +158,7 @@ func TestStatusCodes(t *testing.T) { } }) t.Run("failure should error", func(t *testing.T) { - err := invoke.failure(nil, "") + err := invoke.failure(nil, "", nil) require.Error(t, err) if i != 301 && i != 302 && i != 303 { assert.Contains(t, err.Error(), "unexpected status code")