diff --git a/agent/go.sum b/agent/go.sum index 69c5c018ebe..802cf778818 100644 --- a/agent/go.sum +++ b/agent/go.sum @@ -491,8 +491,8 @@ gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gotest.tools/v3 v3.0.3 h1:4AuOwCGf4lLR9u3YOe2awrHygurzhO/HeQ6laiA6Sx0= gotest.tools/v3 v3.0.3/go.mod h1:Z7Lb0S5l+klDB31fvDQX8ss/FlKDxtlFlw3Oa8Ymbl8= +gotest.tools/v3 v3.5.0 h1:Ljk6PdHdOhAb5aDMWXjDLMMhph+BpztA4v1QdqEW2eY= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= diff --git a/agent/handlers/task_server_setup.go b/agent/handlers/task_server_setup.go index ba8f7816a68..ea0dedc792f 100644 --- a/agent/handlers/task_server_setup.go +++ b/agent/handlers/task_server_setup.go @@ -77,7 +77,7 @@ func taskServerSetup( muxRouter.HandleFunc(tmdsv1.CredentialsPath, tmdsv1.CredentialsHandler(credentialsManager, auditLogger)) - tmdsAgentState := v4.NewTMDSAgentState(state, ecsClient, cluster, availabilityZone, vpcID, containerInstanceArn) + tmdsAgentState := v4.NewTMDSAgentState(state, statsEngine, ecsClient, cluster, availabilityZone, vpcID, containerInstanceArn) metricsFactory := metrics.NewNopEntryFactory() v2HandlersSetup(muxRouter, state, ecsClient, statsEngine, cluster, credentialsManager, auditLogger, availabilityZone, containerInstanceArn) @@ -153,8 +153,8 @@ func v4HandlersSetup(muxRouter *mux.Router, muxRouter.HandleFunc(tmdsv4.ContainerMetadataPath(), tmdsv4.ContainerMetadataHandler(tmdsAgentState, metricsFactory)) muxRouter.HandleFunc(tmdsv4.TaskMetadataPath(), tmdsv4.TaskMetadataHandler(tmdsAgentState, metricsFactory)) muxRouter.HandleFunc(tmdsv4.TaskMetadataWithTagsPath(), tmdsv4.TaskMetadataWithTagsHandler(tmdsAgentState, metricsFactory)) - muxRouter.HandleFunc(v4.ContainerStatsPath, v4.ContainerStatsHandler(state, statsEngine)) - muxRouter.HandleFunc(v4.TaskStatsPath, v4.TaskStatsHandler(state, statsEngine)) + muxRouter.HandleFunc(tmdsv4.ContainerStatsPath(), tmdsv4.ContainerStatsHandler(tmdsAgentState, metricsFactory)) + muxRouter.HandleFunc(tmdsv4.TaskStatsPath(), tmdsv4.TaskStatsHandler(tmdsAgentState, metricsFactory)) muxRouter.HandleFunc(v4.ContainerAssociationsPath, v4.ContainerAssociationsHandler(state)) muxRouter.HandleFunc(v4.ContainerAssociationPathWithSlash, v4.ContainerAssociationHandler(state)) muxRouter.HandleFunc(v4.ContainerAssociationPath, v4.ContainerAssociationHandler(state)) diff --git a/agent/handlers/v4/container_stats_handler.go b/agent/handlers/v4/container_stats_handler.go deleted file mode 100644 index 26245f20c66..00000000000 --- a/agent/handlers/v4/container_stats_handler.go +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"). You may -// not use this file except in compliance with the License. A copy of the -// License is located at -// -// http://aws.amazon.com/apache2.0/ -// -// or in the "license" file accompanying this file. This file is distributed -// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -// express or implied. See the License for the specific language governing -// permissions and limitations under the License. -package v4 - -import ( - "encoding/json" - "fmt" - "net/http" - - "github.com/aws/amazon-ecs-agent/agent/engine/dockerstate" - v3 "github.com/aws/amazon-ecs-agent/agent/handlers/v3" - "github.com/aws/amazon-ecs-agent/agent/stats" - "github.com/aws/amazon-ecs-agent/ecs-agent/tmds/handlers/utils" - response "github.com/aws/amazon-ecs-agent/ecs-agent/tmds/handlers/v4/state" - - "github.com/cihub/seelog" -) - -// ContainerStatsPath specifies the relative URI path for serving container stats. -var ContainerStatsPath = "/v4/" + utils.ConstructMuxVar(v3.V3EndpointIDMuxName, utils.AnythingButSlashRegEx) + "/stats" - -// ContainerStatsHandler returns the handler method for handling container stats requests. -func ContainerStatsHandler(state dockerstate.TaskEngineState, statsEngine stats.Engine) func(w http.ResponseWriter, r *http.Request) { - return func(w http.ResponseWriter, r *http.Request) { - taskArn, err := v3.GetTaskARNByRequest(r, state) - if err != nil { - errResponseJSON, err := json.Marshal(fmt.Sprintf("V4 container handler: unable to get task arn from request: %s", err.Error())) - if e := utils.WriteResponseIfMarshalError(w, err); e != nil { - return - } - utils.WriteJSONToResponse(w, http.StatusNotFound, errResponseJSON, utils.RequestTypeTaskStats) - return - } - - containerID, err := v3.GetContainerIDByRequest(r, state) - if err != nil { - responseJSON, err := json.Marshal(fmt.Sprintf("V4 container stats handler: unable to get container ID from request: %s", err.Error())) - if e := utils.WriteResponseIfMarshalError(w, err); e != nil { - return - } - utils.WriteJSONToResponse(w, http.StatusNotFound, responseJSON, utils.RequestTypeContainerStats) - return - } - - seelog.Infof("V4 container stats handler: writing response for container '%s'", containerID) - // v4 handler shares the same container states response format with v2 handler. - WriteV4ContainerStatsResponse(w, taskArn, containerID, statsEngine) - } -} - -// WriteContainerStatsResponse writes the container stats to response writer. -func WriteV4ContainerStatsResponse(w http.ResponseWriter, - taskARN string, - containerID string, - statsEngine stats.Engine) { - dockerStats, network_rate_stats, err := statsEngine.ContainerDockerStats(taskARN, containerID) - if err != nil { - errResponseJSON, err := json.Marshal("Unable to get container stats for: " + containerID) - if e := utils.WriteResponseIfMarshalError(w, err); e != nil { - return - } - utils.WriteJSONToResponse(w, http.StatusInternalServerError, errResponseJSON, utils.RequestTypeContainerStats) - return - } - - containerStatsResponse := response.StatsResponse{ - StatsJSON: dockerStats, - Network_rate_stats: network_rate_stats, - } - - responseJSON, err := json.Marshal(containerStatsResponse) - if e := utils.WriteResponseIfMarshalError(w, err); e != nil { - return - } - utils.WriteJSONToResponse(w, http.StatusOK, responseJSON, utils.RequestTypeContainerStats) -} diff --git a/agent/handlers/v4/stats_response.go b/agent/handlers/v4/stats_response.go index 5c293bc5e8f..edbf8c0a23e 100644 --- a/agent/handlers/v4/stats_response.go +++ b/agent/handlers/v4/stats_response.go @@ -25,7 +25,7 @@ import ( // NewV4TaskStatsResponse returns a new v4 task stats response object func NewV4TaskStatsResponse(taskARN string, state dockerstate.TaskEngineState, - statsEngine stats.Engine) (map[string]response.StatsResponse, error) { + statsEngine stats.Engine) (map[string]*response.StatsResponse, error) { containerMap, ok := state.ContainerMapByArn(taskARN) if !ok { @@ -34,14 +34,14 @@ func NewV4TaskStatsResponse(taskARN string, taskARN) } - resp := make(map[string]response.StatsResponse) + resp := make(map[string]*response.StatsResponse) for _, dockerContainer := range containerMap { containerID := dockerContainer.DockerID dockerStats, network_rate_stats, err := statsEngine.ContainerDockerStats(taskARN, containerID) if err != nil { seelog.Warnf("V4 task stats response: Unable to get stats for container '%s' for task '%s': %v", containerID, taskARN, err) - resp[containerID] = response.StatsResponse{} + resp[containerID] = &response.StatsResponse{} continue } @@ -50,7 +50,7 @@ func NewV4TaskStatsResponse(taskARN string, Network_rate_stats: network_rate_stats, } - resp[containerID] = statsResponse + resp[containerID] = &statsResponse } return resp, nil diff --git a/agent/handlers/v4/task_stats_handler.go b/agent/handlers/v4/task_stats_handler.go deleted file mode 100644 index fd26d88b65e..00000000000 --- a/agent/handlers/v4/task_stats_handler.go +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"). You may -// not use this file except in compliance with the License. A copy of the -// License is located at -// -// http://aws.amazon.com/apache2.0/ -// -// or in the "license" file accompanying this file. This file is distributed -// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -// express or implied. See the License for the specific language governing -// permissions and limitations under the License. - -package v4 - -import ( - "encoding/json" - "fmt" - "net/http" - - "github.com/aws/amazon-ecs-agent/agent/engine/dockerstate" - v3 "github.com/aws/amazon-ecs-agent/agent/handlers/v3" - "github.com/aws/amazon-ecs-agent/agent/stats" - "github.com/aws/amazon-ecs-agent/ecs-agent/tmds/handlers/utils" - "github.com/cihub/seelog" -) - -var TaskStatsPath = "/v4/" + utils.ConstructMuxVar(v3.V3EndpointIDMuxName, utils.AnythingButSlashRegEx) + "/task/stats" - -func TaskStatsHandler(state dockerstate.TaskEngineState, statsEngine stats.Engine) func(http.ResponseWriter, *http.Request) { - return func(w http.ResponseWriter, r *http.Request) { - taskArn, err := v3.GetTaskARNByRequest(r, state) - if err != nil { - errResponseJSON, err := json.Marshal(fmt.Sprintf("V4 task stats handler: unable to get task arn from request: %s", err.Error())) - if e := utils.WriteResponseIfMarshalError(w, err); e != nil { - return - } - utils.WriteJSONToResponse(w, http.StatusNotFound, errResponseJSON, utils.RequestTypeTaskStats) - return - } - WriteV4TaskStatsResponse(w, taskArn, state, statsEngine) - } -} - -// WriteV4TaskStatsResponse writes the task stats to response writer. -func WriteV4TaskStatsResponse(w http.ResponseWriter, - taskARN string, - state dockerstate.TaskEngineState, - statsEngine stats.Engine) { - - taskStatsResponse, err := NewV4TaskStatsResponse(taskARN, state, statsEngine) - if err != nil { - seelog.Warnf("Unable to get task stats for task '%s': %v", taskARN, err) - errResponseJSON, err := json.Marshal("Unable to get task stats for: " + taskARN) - if e := utils.WriteResponseIfMarshalError(w, err); e != nil { - return - } - utils.WriteJSONToResponse(w, http.StatusInternalServerError, errResponseJSON, utils.RequestTypeTaskStats) - return - } - - responseJSON, err := json.Marshal(taskStatsResponse) - if e := utils.WriteResponseIfMarshalError(w, err); e != nil { - return - } - utils.WriteJSONToResponse(w, http.StatusOK, responseJSON, utils.RequestTypeTaskStats) -} diff --git a/agent/handlers/v4/tmdsstate.go b/agent/handlers/v4/tmdsstate.go index 0dc72dac124..8c503e62ba0 100644 --- a/agent/handlers/v4/tmdsstate.go +++ b/agent/handlers/v4/tmdsstate.go @@ -17,6 +17,7 @@ import ( "github.com/aws/amazon-ecs-agent/agent/api" "github.com/aws/amazon-ecs-agent/agent/engine/dockerstate" + "github.com/aws/amazon-ecs-agent/agent/stats" "github.com/aws/amazon-ecs-agent/ecs-agent/logger" "github.com/aws/amazon-ecs-agent/ecs-agent/logger/field" tmdsv4 "github.com/aws/amazon-ecs-agent/ecs-agent/tmds/handlers/v4/state" @@ -25,6 +26,7 @@ import ( // Implements AgentState interface for TMDS v4. type TMDSAgentState struct { state dockerstate.TaskEngineState + statsEngine stats.Engine ecsClient api.ECSClient cluster string availabilityZone string @@ -34,6 +36,7 @@ type TMDSAgentState struct { func NewTMDSAgentState( state dockerstate.TaskEngineState, + statsEngine stats.Engine, ecsClient api.ECSClient, cluster string, availabilityZone string, @@ -42,6 +45,7 @@ func NewTMDSAgentState( ) *TMDSAgentState { return &TMDSAgentState{ state: state, + statsEngine: statsEngine, ecsClient: ecsClient, cluster: cluster, availabilityZone: availabilityZone, @@ -149,3 +153,49 @@ func (s *TMDSAgentState) getTaskMetadata(v3EndpointID string, includeTags bool) return *taskResponse, nil } + +func (s *TMDSAgentState) GetContainerStats(v3EndpointID string) (tmdsv4.StatsResponse, error) { + taskARN, ok := s.state.TaskARNByV3EndpointID(v3EndpointID) + if !ok { + return tmdsv4.StatsResponse{}, tmdsv4.NewErrorStatsLookupFailure(fmt.Sprintf( + "V4 container handler: unable to get task arn from request: unable to get task Arn from v3 endpoint ID: %s", + v3EndpointID)) + } + + containerID, ok := s.state.DockerIDByV3EndpointID(v3EndpointID) + if !ok { + return tmdsv4.StatsResponse{}, tmdsv4.NewErrorStatsLookupFailure(fmt.Sprintf( + "V4 container stats handler: unable to get container ID from request: unable to get docker ID from v3 endpoint ID: %s", + v3EndpointID)) + } + + dockerStats, network_rate_stats, err := s.statsEngine.ContainerDockerStats(taskARN, containerID) + if err != nil { + return tmdsv4.StatsResponse{}, tmdsv4.NewErrorStatsFetchFailure( + fmt.Sprintf("Unable to get container stats for: %s", containerID), + err) + } + + return tmdsv4.StatsResponse{ + StatsJSON: dockerStats, + Network_rate_stats: network_rate_stats, + }, nil +} + +func (s *TMDSAgentState) GetTaskStats(v3EndpointID string) (map[string]*tmdsv4.StatsResponse, error) { + taskARN, ok := s.state.TaskARNByV3EndpointID(v3EndpointID) + if !ok { + return nil, tmdsv4.NewErrorStatsLookupFailure(fmt.Sprintf( + "V4 task stats handler: unable to get task arn from request: unable to get task Arn from v3 endpoint ID: %s", + v3EndpointID)) + } + + taskStatsResponse, err := NewV4TaskStatsResponse(taskARN, s.state, s.statsEngine) + if err != nil { + return nil, tmdsv4.NewErrorStatsFetchFailure( + fmt.Sprintf("Unable to get task stats for: %s", taskARN), + err) + } + + return taskStatsResponse, nil +} diff --git a/agent/vendor/github.com/aws/amazon-ecs-agent/ecs-agent/logger/field/constants.go b/agent/vendor/github.com/aws/amazon-ecs-agent/ecs-agent/logger/field/constants.go index d9f97940395..b9e94ebb4da 100644 --- a/agent/vendor/github.com/aws/amazon-ecs-agent/ecs-agent/logger/field/constants.go +++ b/agent/vendor/github.com/aws/amazon-ecs-agent/ecs-agent/logger/field/constants.go @@ -10,7 +10,6 @@ // on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either // express or implied. See the License for the specific language governing // permissions and limitations under the License. - package field const ( diff --git a/agent/vendor/github.com/aws/amazon-ecs-agent/ecs-agent/tmds/handlers/v4/handlers.go b/agent/vendor/github.com/aws/amazon-ecs-agent/ecs-agent/tmds/handlers/v4/handlers.go index 8ff2ffb935d..7b6a06ab220 100644 --- a/agent/vendor/github.com/aws/amazon-ecs-agent/ecs-agent/tmds/handlers/v4/handlers.go +++ b/agent/vendor/github.com/aws/amazon-ecs-agent/ecs-agent/tmds/handlers/v4/handlers.go @@ -51,6 +51,18 @@ func TaskMetadataWithTagsPath() string { utils.ConstructMuxVar(EndpointContainerIDMuxName, utils.AnythingButSlashRegEx)) } +// Returns a standard URI path for v4 container stats endpoint. +func ContainerStatsPath() string { + return fmt.Sprintf("/v4/%s/stats", + utils.ConstructMuxVar(EndpointContainerIDMuxName, utils.AnythingButSlashRegEx)) +} + +// Returns a standard URI path for v4 task stats endpoint. +func TaskStatsPath() string { + return fmt.Sprintf("/v4/%s/task/stats", + utils.ConstructMuxVar(EndpointContainerIDMuxName, utils.AnythingButSlashRegEx)) +} + // ContainerMetadataHandler returns the HTTP handler function for handling container metadata requests. func ContainerMetadataHandler( agentState state.AgentState, @@ -173,3 +185,79 @@ func getTaskErrorResponse(endpointContainerID string, err error) (int, string) { }) return http.StatusInternalServerError, "failed to get task metadata" } + +// Returns an HTTP handler for v4 container stats endpoint +func ContainerStatsHandler( + agentState state.AgentState, + metricsFactory metrics.EntryFactory, +) func(http.ResponseWriter, *http.Request) { + return statsHandler(agentState.GetContainerStats, metricsFactory, utils.RequestTypeContainerStats) +} + +// Returns an HTTP handler for v4 task stats endpoint +func TaskStatsHandler( + agentState state.AgentState, + metricsFactory metrics.EntryFactory, +) func(http.ResponseWriter, *http.Request) { + return statsHandler(agentState.GetTaskStats, metricsFactory, utils.RequestTypeTaskStats) +} + +// Generic function that returns an HTTP handler for container or task stats endpoint +// depending on the parameters. +func statsHandler[R state.StatsResponse | map[string]*state.StatsResponse]( + getStats func(string) (R, error), // container stats or task stats getter function + metricsFactory metrics.EntryFactory, + requestType string, // container stats or task stats request type +) func(http.ResponseWriter, *http.Request) { + return func(w http.ResponseWriter, r *http.Request) { + // Extract endpoint container ID + endpointContainerID := mux.Vars(r)[EndpointContainerIDMuxName] + + // Get stats + stats, err := getStats(endpointContainerID) + if err != nil { + logger.Error("Failed to get v4 stats", logger.Fields{ + field.TMDSEndpointContainerID: endpointContainerID, + field.Error: err, + field.RequestType: requestType, + }) + + responseCode, responseBody := getStatsErrorResponse(endpointContainerID, err) + utils.WriteJSONResponse(w, responseCode, responseBody, requestType) + + if utils.Is5XXStatus(responseCode) { + metricsFactory.New(metrics.InternalServerErrorMetricName).Done(err)() + } + + return + } + + // Write stats response + logger.Info("Writing response for v4 stats", logger.Fields{ + field.TMDSEndpointContainerID: endpointContainerID, + field.RequestType: requestType, + }) + utils.WriteJSONResponse(w, http.StatusOK, stats, requestType) + } +} + +// Returns appropriate HTTP status code and response body for stats endpoint error cases. +func getStatsErrorResponse(endpointContainerID string, err error) (int, string) { + // 404 if lookup failure + var errLookupFailure *state.ErrorStatsLookupFailure + if errors.As(err, &errLookupFailure) { + return http.StatusNotFound, errLookupFailure.ExternalReason() + } + + // 500 if any other known failure + var errStatsFetchFailure *state.ErrorStatsFetchFailure + if errors.As(err, &errStatsFetchFailure) { + return http.StatusInternalServerError, errStatsFetchFailure.ExternalReason() + } + + // 500 if unknown failure + logger.Error("Unknown error encountered when handling stats fetch error", logger.Fields{ + field.Error: err, + }) + return http.StatusInternalServerError, "failed to get stats" +} diff --git a/agent/vendor/github.com/aws/amazon-ecs-agent/ecs-agent/tmds/handlers/v4/state/state.go b/agent/vendor/github.com/aws/amazon-ecs-agent/ecs-agent/tmds/handlers/v4/state/state.go index df7f6532b51..dc86e36fae0 100644 --- a/agent/vendor/github.com/aws/amazon-ecs-agent/ecs-agent/tmds/handlers/v4/state/state.go +++ b/agent/vendor/github.com/aws/amazon-ecs-agent/ecs-agent/tmds/handlers/v4/state/state.go @@ -32,7 +32,7 @@ func (e *ErrorLookupFailure) Error() string { return fmt.Sprintf("container lookup failed: %s", e.externalReason) } -// General "catch-all" error to be returned when container metadata could not be +// General "catch-all" error to be returned when container or task metadata could not be // fetched for some reason type ErrorMetadataFetchFailure struct { externalReason string // Reason to be included in the response @@ -50,6 +50,46 @@ func (e *ErrorMetadataFetchFailure) ExternalReason() string { return e.externalReason } +// Error to be returned when container or task stats lookup failed due to a lookup failure +type ErrorStatsLookupFailure struct { + externalReason string // Reason to be returned in TMDS response +} + +func NewErrorStatsLookupFailure(externalReason string) *ErrorStatsLookupFailure { + return &ErrorStatsLookupFailure{externalReason} +} + +func (e *ErrorStatsLookupFailure) ExternalReason() string { + return e.externalReason +} + +func (e *ErrorStatsLookupFailure) Error() string { + return fmt.Sprintf("stats lookup failed: %s", e.externalReason) +} + +// General "catch-all" error to be returned when container or task stats could not +// be fetched for some reason. +type ErrorStatsFetchFailure struct { + externalReason string // Reason to be returned in TMDS response + cause error +} + +func NewErrorStatsFetchFailure(externalReason string, cause error) *ErrorStatsFetchFailure { + return &ErrorStatsFetchFailure{externalReason, cause} +} + +func (e *ErrorStatsFetchFailure) ExternalReason() string { + return e.externalReason +} + +func (e *ErrorStatsFetchFailure) Error() string { + return fmt.Sprintf("stats lookup failed: %s", e.externalReason) +} + +func (e *ErrorStatsFetchFailure) Unwrap() error { + return e.cause +} + // Interface for interacting with Agent State relevant to TMDS type AgentState interface { // Returns container metadata in v4 format for the container identified by the @@ -68,4 +108,16 @@ type AgentState interface { // Returns ErrorTaskLookupFailed if task lookup fails. // Returns ErrorMetadataFetchFailure if something else goes wrong. GetTaskMetadataWithTags(endpointContainerID string) (TaskResponse, error) + + // Returns container stats in v4 format for the container identified by the provided + // endpointContainerID. + // Returns ErrorStatsLookupFailure if container lookup fails. + // Returns ErrorStatsFetchFailure if something else goes wrong. + GetContainerStats(endpointContainerID string) (StatsResponse, error) + + // Returns task stats in v4 format for the task identified by the provided + // endpointContainerID. + // Returns ErrorStatsLookupFailure if container lookup fails. + // Returns ErrorStatsFetchFailure if something else goes wrong. + GetTaskStats(endpointContainerID string) (map[string]*StatsResponse, error) } diff --git a/ecs-agent/logger/field/constants.go b/ecs-agent/logger/field/constants.go index d9f97940395..b9e94ebb4da 100644 --- a/ecs-agent/logger/field/constants.go +++ b/ecs-agent/logger/field/constants.go @@ -10,7 +10,6 @@ // on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either // express or implied. See the License for the specific language governing // permissions and limitations under the License. - package field const ( diff --git a/ecs-agent/tmds/handlers/v4/handlers.go b/ecs-agent/tmds/handlers/v4/handlers.go index 8ff2ffb935d..7b6a06ab220 100644 --- a/ecs-agent/tmds/handlers/v4/handlers.go +++ b/ecs-agent/tmds/handlers/v4/handlers.go @@ -51,6 +51,18 @@ func TaskMetadataWithTagsPath() string { utils.ConstructMuxVar(EndpointContainerIDMuxName, utils.AnythingButSlashRegEx)) } +// Returns a standard URI path for v4 container stats endpoint. +func ContainerStatsPath() string { + return fmt.Sprintf("/v4/%s/stats", + utils.ConstructMuxVar(EndpointContainerIDMuxName, utils.AnythingButSlashRegEx)) +} + +// Returns a standard URI path for v4 task stats endpoint. +func TaskStatsPath() string { + return fmt.Sprintf("/v4/%s/task/stats", + utils.ConstructMuxVar(EndpointContainerIDMuxName, utils.AnythingButSlashRegEx)) +} + // ContainerMetadataHandler returns the HTTP handler function for handling container metadata requests. func ContainerMetadataHandler( agentState state.AgentState, @@ -173,3 +185,79 @@ func getTaskErrorResponse(endpointContainerID string, err error) (int, string) { }) return http.StatusInternalServerError, "failed to get task metadata" } + +// Returns an HTTP handler for v4 container stats endpoint +func ContainerStatsHandler( + agentState state.AgentState, + metricsFactory metrics.EntryFactory, +) func(http.ResponseWriter, *http.Request) { + return statsHandler(agentState.GetContainerStats, metricsFactory, utils.RequestTypeContainerStats) +} + +// Returns an HTTP handler for v4 task stats endpoint +func TaskStatsHandler( + agentState state.AgentState, + metricsFactory metrics.EntryFactory, +) func(http.ResponseWriter, *http.Request) { + return statsHandler(agentState.GetTaskStats, metricsFactory, utils.RequestTypeTaskStats) +} + +// Generic function that returns an HTTP handler for container or task stats endpoint +// depending on the parameters. +func statsHandler[R state.StatsResponse | map[string]*state.StatsResponse]( + getStats func(string) (R, error), // container stats or task stats getter function + metricsFactory metrics.EntryFactory, + requestType string, // container stats or task stats request type +) func(http.ResponseWriter, *http.Request) { + return func(w http.ResponseWriter, r *http.Request) { + // Extract endpoint container ID + endpointContainerID := mux.Vars(r)[EndpointContainerIDMuxName] + + // Get stats + stats, err := getStats(endpointContainerID) + if err != nil { + logger.Error("Failed to get v4 stats", logger.Fields{ + field.TMDSEndpointContainerID: endpointContainerID, + field.Error: err, + field.RequestType: requestType, + }) + + responseCode, responseBody := getStatsErrorResponse(endpointContainerID, err) + utils.WriteJSONResponse(w, responseCode, responseBody, requestType) + + if utils.Is5XXStatus(responseCode) { + metricsFactory.New(metrics.InternalServerErrorMetricName).Done(err)() + } + + return + } + + // Write stats response + logger.Info("Writing response for v4 stats", logger.Fields{ + field.TMDSEndpointContainerID: endpointContainerID, + field.RequestType: requestType, + }) + utils.WriteJSONResponse(w, http.StatusOK, stats, requestType) + } +} + +// Returns appropriate HTTP status code and response body for stats endpoint error cases. +func getStatsErrorResponse(endpointContainerID string, err error) (int, string) { + // 404 if lookup failure + var errLookupFailure *state.ErrorStatsLookupFailure + if errors.As(err, &errLookupFailure) { + return http.StatusNotFound, errLookupFailure.ExternalReason() + } + + // 500 if any other known failure + var errStatsFetchFailure *state.ErrorStatsFetchFailure + if errors.As(err, &errStatsFetchFailure) { + return http.StatusInternalServerError, errStatsFetchFailure.ExternalReason() + } + + // 500 if unknown failure + logger.Error("Unknown error encountered when handling stats fetch error", logger.Fields{ + field.Error: err, + }) + return http.StatusInternalServerError, "failed to get stats" +} diff --git a/ecs-agent/tmds/handlers/v4/handlers_test.go b/ecs-agent/tmds/handlers/v4/handlers_test.go index 1d514359813..ca8791a7fdf 100644 --- a/ecs-agent/tmds/handlers/v4/handlers_test.go +++ b/ecs-agent/tmds/handlers/v4/handlers_test.go @@ -26,12 +26,14 @@ import ( "github.com/aws/amazon-ecs-agent/ecs-agent/metrics" mock_metrics "github.com/aws/amazon-ecs-agent/ecs-agent/metrics/mocks" + "github.com/aws/amazon-ecs-agent/ecs-agent/stats" "github.com/aws/amazon-ecs-agent/ecs-agent/tmds/handlers/response" "github.com/aws/amazon-ecs-agent/ecs-agent/tmds/handlers/utils" v2 "github.com/aws/amazon-ecs-agent/ecs-agent/tmds/handlers/v2" state "github.com/aws/amazon-ecs-agent/ecs-agent/tmds/handlers/v4/state" mock_state "github.com/aws/amazon-ecs-agent/ecs-agent/tmds/handlers/v4/state/mocks" "github.com/aws/aws-sdk-go/aws" + "github.com/docker/docker/api/types" "github.com/gorilla/mux" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -107,8 +109,23 @@ var ( }}, }, } - now = time.Now() - credentialsID = "credentialsID" + now = time.Now() + credentialsID = "credentialsID" + containerStats = state.StatsResponse{ + StatsJSON: &types.StatsJSON{ + Stats: types.Stats{NumProcs: 2}, + Name: "name", + ID: "id", + Networks: map[string]types.NetworkStats{"a": {RxBytes: 5}}, + }, + Network_rate_stats: &stats.NetworkStatsPerSec{ + RxBytesPerSecond: 10, + TxBytesPerSecond: 15, + }, + } + taskStats = map[string]*state.StatsResponse{ + containerID: &containerStats, + } ) // Returns a standard agent task response @@ -306,8 +323,199 @@ func TestTaskMetadata(t *testing.T) { }) } +func TestContainerStatsPath(t *testing.T) { + assert.Equal(t, "/v4/{endpointContainerIDMuxName:[^/]*}/stats", ContainerStatsPath()) +} + +func TestTaskStatsPath(t *testing.T) { + assert.Equal(t, "/v4/{endpointContainerIDMuxName:[^/]*}/task/stats", TaskStatsPath()) +} + +func TestContainerStats(t *testing.T) { + // path for the stats endpoint + path := fmt.Sprintf("/v4/%s/stats", endpointContainerID) + + // helper function to setup mocks and a handler with container stats endpoint + setup := func() ( + *mock_state.MockAgentState, *gomock.Controller, *mock_metrics.MockEntryFactory, http.Handler, + ) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + agentState := mock_state.NewMockAgentState(ctrl) + metricsFactory := mock_metrics.NewMockEntryFactory(ctrl) + + router := mux.NewRouter() + router.HandleFunc( + ContainerStatsPath(), + ContainerStatsHandler(agentState, metricsFactory)) + + return agentState, ctrl, metricsFactory, router + } + + // Test cases start here + t.Run("stats lookup failure", func(t *testing.T) { + agentState, _, _, handler := setup() + agentState.EXPECT(). + GetContainerStats(endpointContainerID). + Return(state.StatsResponse{}, state.NewErrorStatsLookupFailure(externalReason)) + testTMDSRequest(t, handler, TMDSTestCase[string]{ + path: path, + expectedStatusCode: http.StatusNotFound, + expectedResponseBody: externalReason, + }) + }) + + internalServerErrorCases := []struct { + err error + responseBody string + }{ + { + err: state.NewErrorStatsFetchFailure(externalReason, errors.New("cause")), + responseBody: externalReason, + }, + { + err: errors.New("unknown error"), + responseBody: "failed to get stats", + }, + } + for _, tc := range internalServerErrorCases { + t.Run("stats fetch failure", func(t *testing.T) { + agentState, ctrl, metricsFactory, handler := setup() + + // Expectations + agentState.EXPECT(). + GetContainerStats(endpointContainerID). + Return(state.StatsResponse{}, tc.err) + + // Expect InternalServerError metric to be published with the error. + entry := mock_metrics.NewMockEntry(ctrl) + metricPublished := false // tracks if a metrics entry was published + entry.EXPECT().Done(tc.err).Return(func() { + // Set metricsPublished to true if metric was published. + metricPublished = true + }) + metricsFactory.EXPECT().New(metrics.InternalServerErrorMetricName).Return(entry) + + // Make test request + testTMDSRequest(t, handler, TMDSTestCase[string]{ + path: path, + expectedStatusCode: http.StatusInternalServerError, + expectedResponseBody: tc.responseBody, + }) + + // assert the metrics entry was published + assert.True(t, metricPublished) + }) + } + + t.Run("happy case", func(t *testing.T) { + agentState, _, _, handler := setup() + agentState.EXPECT(). + GetContainerStats(endpointContainerID). + Return(containerStats, nil) + testTMDSRequest(t, handler, TMDSTestCase[state.StatsResponse]{ + path: path, + expectedStatusCode: http.StatusOK, + expectedResponseBody: containerStats, + }) + }) +} + +func TestTaskStats(t *testing.T) { + // path for the stats endpoint + path := fmt.Sprintf("/v4/%s/task/stats", endpointContainerID) + + // helper function to setup mocks and a handler with container stats endpoint + setup := func() ( + *mock_state.MockAgentState, *gomock.Controller, *mock_metrics.MockEntryFactory, http.Handler, + ) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + agentState := mock_state.NewMockAgentState(ctrl) + metricsFactory := mock_metrics.NewMockEntryFactory(ctrl) + + router := mux.NewRouter() + router.HandleFunc( + TaskStatsPath(), + TaskStatsHandler(agentState, metricsFactory)) + + return agentState, ctrl, metricsFactory, router + } + + // Test cases start here + t.Run("stats lookup failure", func(t *testing.T) { + agentState, _, _, handler := setup() + agentState.EXPECT(). + GetTaskStats(endpointContainerID). + Return(nil, state.NewErrorStatsLookupFailure(externalReason)) + testTMDSRequest(t, handler, TMDSTestCase[string]{ + path: path, + expectedStatusCode: http.StatusNotFound, + expectedResponseBody: externalReason, + }) + }) + + internalServerErrorCases := []struct { + err error + responseBody string + }{ + { + err: state.NewErrorStatsFetchFailure(externalReason, errors.New("cause")), + responseBody: externalReason, + }, + { + err: errors.New("unknown error"), + responseBody: "failed to get stats", + }, + } + for _, tc := range internalServerErrorCases { + t.Run("stats fetch failure", func(t *testing.T) { + // setup + agentState, ctrl, metricsFactory, handler := setup() + metricPublished := false // tracks if a metrics entry was published + + // expect GetContainerStats to be called that should return an error + agentState.EXPECT(). + GetTaskStats(endpointContainerID). + Return(nil, tc.err) + + // expect InternalServerError metric to be published with the error. + // set metricsPublished to true if metric was published + entry := mock_metrics.NewMockEntry(ctrl) + entry.EXPECT().Done(tc.err).Return(func() { metricPublished = true }) + metricsFactory.EXPECT().New(metrics.InternalServerErrorMetricName).Return(entry) + + // Go + testTMDSRequest(t, handler, TMDSTestCase[string]{ + path: path, + expectedStatusCode: http.StatusInternalServerError, + expectedResponseBody: tc.responseBody, + }) + + // confirm the metrics entry was published + assert.True(t, metricPublished) + }) + } + + t.Run("happy case", func(t *testing.T) { + agentState, _, _, handler := setup() + agentState.EXPECT(). + GetTaskStats(endpointContainerID). + Return(taskStats, nil) + testTMDSRequest(t, handler, TMDSTestCase[map[string]*state.StatsResponse]{ + path: path, + expectedStatusCode: http.StatusOK, + expectedResponseBody: taskStats, + }) + }) +} + type TMDSResponse interface { - string | state.ContainerResponse | state.TaskResponse + string | + state.ContainerResponse | + state.TaskResponse | + state.StatsResponse | + map[string]*state.StatsResponse } type TMDSTestCase[R TMDSResponse] struct { diff --git a/ecs-agent/tmds/handlers/v4/state/mocks/state_mock.go b/ecs-agent/tmds/handlers/v4/state/mocks/state_mock.go index 2cf88f3c063..35d769eaca8 100644 --- a/ecs-agent/tmds/handlers/v4/state/mocks/state_mock.go +++ b/ecs-agent/tmds/handlers/v4/state/mocks/state_mock.go @@ -63,6 +63,21 @@ func (mr *MockAgentStateMockRecorder) GetContainerMetadata(arg0 interface{}) *go return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetContainerMetadata", reflect.TypeOf((*MockAgentState)(nil).GetContainerMetadata), arg0) } +// GetContainerStats mocks base method. +func (m *MockAgentState) GetContainerStats(arg0 string) (state.StatsResponse, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetContainerStats", arg0) + ret0, _ := ret[0].(state.StatsResponse) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetContainerStats indicates an expected call of GetContainerStats. +func (mr *MockAgentStateMockRecorder) GetContainerStats(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetContainerStats", reflect.TypeOf((*MockAgentState)(nil).GetContainerStats), arg0) +} + // GetTaskMetadata mocks base method. func (m *MockAgentState) GetTaskMetadata(arg0 string) (state.TaskResponse, error) { m.ctrl.T.Helper() @@ -92,3 +107,18 @@ func (mr *MockAgentStateMockRecorder) GetTaskMetadataWithTags(arg0 interface{}) mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetTaskMetadataWithTags", reflect.TypeOf((*MockAgentState)(nil).GetTaskMetadataWithTags), arg0) } + +// GetTaskStats mocks base method. +func (m *MockAgentState) GetTaskStats(arg0 string) (map[string]*state.StatsResponse, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetTaskStats", arg0) + ret0, _ := ret[0].(map[string]*state.StatsResponse) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetTaskStats indicates an expected call of GetTaskStats. +func (mr *MockAgentStateMockRecorder) GetTaskStats(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetTaskStats", reflect.TypeOf((*MockAgentState)(nil).GetTaskStats), arg0) +} diff --git a/ecs-agent/tmds/handlers/v4/state/state.go b/ecs-agent/tmds/handlers/v4/state/state.go index df7f6532b51..0774ba52a2b 100644 --- a/ecs-agent/tmds/handlers/v4/state/state.go +++ b/ecs-agent/tmds/handlers/v4/state/state.go @@ -32,7 +32,7 @@ func (e *ErrorLookupFailure) Error() string { return fmt.Sprintf("container lookup failed: %s", e.externalReason) } -// General "catch-all" error to be returned when container metadata could not be +// General "catch-all" error to be returned when container or task metadata could not be // fetched for some reason type ErrorMetadataFetchFailure struct { externalReason string // Reason to be included in the response @@ -50,6 +50,46 @@ func (e *ErrorMetadataFetchFailure) ExternalReason() string { return e.externalReason } +// Error to be returned when container or task stats lookup failed due to a lookup failure +type ErrorStatsLookupFailure struct { + externalReason string // Reason to be returned in TMDS response +} + +func NewErrorStatsLookupFailure(externalReason string) *ErrorStatsLookupFailure { + return &ErrorStatsLookupFailure{externalReason} +} + +func (e *ErrorStatsLookupFailure) ExternalReason() string { + return e.externalReason +} + +func (e *ErrorStatsLookupFailure) Error() string { + return fmt.Sprintf("stats lookup failed: %s", e.externalReason) +} + +// General "catch-all" error to be returned when container or task stats could not +// be fetched for some reason. +type ErrorStatsFetchFailure struct { + externalReason string // Reason to be returned in TMDS response + cause error +} + +func NewErrorStatsFetchFailure(externalReason string, cause error) *ErrorStatsFetchFailure { + return &ErrorStatsFetchFailure{externalReason, cause} +} + +func (e *ErrorStatsFetchFailure) ExternalReason() string { + return e.externalReason +} + +func (e *ErrorStatsFetchFailure) Error() string { + return fmt.Sprintf("failed to get stats: %s: %v", e.externalReason, e.cause) +} + +func (e *ErrorStatsFetchFailure) Unwrap() error { + return e.cause +} + // Interface for interacting with Agent State relevant to TMDS type AgentState interface { // Returns container metadata in v4 format for the container identified by the @@ -68,4 +108,16 @@ type AgentState interface { // Returns ErrorTaskLookupFailed if task lookup fails. // Returns ErrorMetadataFetchFailure if something else goes wrong. GetTaskMetadataWithTags(endpointContainerID string) (TaskResponse, error) + + // Returns container stats in v4 format for the container identified by the provided + // endpointContainerID. + // Returns ErrorStatsLookupFailure if container lookup fails. + // Returns ErrorStatsFetchFailure if something else goes wrong. + GetContainerStats(endpointContainerID string) (StatsResponse, error) + + // Returns task stats in v4 format for the task identified by the provided + // endpointContainerID. + // Returns ErrorStatsLookupFailure if container lookup fails. + // Returns ErrorStatsFetchFailure if something else goes wrong. + GetTaskStats(endpointContainerID string) (map[string]*StatsResponse, error) } diff --git a/ecs-agent/tmds/handlers/v4/state/state_test.go b/ecs-agent/tmds/handlers/v4/state/state_test.go index a5b413fde22..51d151b3310 100644 --- a/ecs-agent/tmds/handlers/v4/state/state_test.go +++ b/ecs-agent/tmds/handlers/v4/state/state_test.go @@ -66,3 +66,59 @@ func TestAsErrorMetadataFetchFailure(t *testing.T) { require.False(t, errors.As(errors.New("other error"), &target)) }) } + +func TestUnwrapErrorStatsFetchFailure(t *testing.T) { + t.Run("unwrap works", func(t *testing.T) { + cause := errors.New("cause") + var err error = NewErrorStatsFetchFailure("external reason", cause) + assert.Equal(t, cause, errors.Unwrap(err)) + }) + t.Run("unwrap with no cause", func(t *testing.T) { + var err error = NewErrorStatsFetchFailure("external reason", nil) + assert.Nil(t, errors.Unwrap(err)) + }) +} + +func TestAsErrorStatsFetchFailure(t *testing.T) { + t.Run("as works no wrap", func(t *testing.T) { + var target *ErrorStatsFetchFailure + var err = NewErrorStatsFetchFailure("containerID", errors.New("cause")) + require.True(t, errors.As(err, &target)) + assert.Equal(t, err, target) + }) + t.Run("as works wrapped", func(t *testing.T) { + var err = NewErrorStatsFetchFailure("reason", nil) + var target *ErrorStatsFetchFailure + require.True(t, errors.As(errors.Wrap(err, "outer"), &target)) + assert.Equal(t, err, target) + }) + t.Run("as should fail when no match", func(t *testing.T) { + var target *ErrorStatsFetchFailure + require.False(t, errors.As(errors.New("other error"), &target)) + }) +} + +// Tests Error() method of ErrorStatsFetchFailure type +func TestErrorStatsFetchFailureMessage(t *testing.T) { + var err error = NewErrorStatsFetchFailure("external reason", errors.New("cause")) + assert.Equal(t, "failed to get stats: external reason: cause", err.Error()) +} + +func TestAsErrorStatsLookupFailure(t *testing.T) { + t.Run("as works no wrap", func(t *testing.T) { + var target *ErrorStatsLookupFailure + var err = NewErrorStatsLookupFailure("containerID") + require.True(t, errors.As(err, &target)) + assert.Equal(t, err, target) + }) + t.Run("as works wrapped", func(t *testing.T) { + var err = NewErrorStatsLookupFailure("reason") + var target *ErrorStatsLookupFailure + require.True(t, errors.As(errors.Wrap(err, "outer"), &target)) + assert.Equal(t, err, target) + }) + t.Run("as should fail when no match", func(t *testing.T) { + var target *ErrorStatsLookupFailure + require.False(t, errors.As(errors.New("other error"), &target)) + }) +}