Skip to content

Commit

Permalink
Consolidate status information onto /status endpoint (#952)
Browse files Browse the repository at this point in the history
* Consolidate status information onto /status endpoint

* Update API documentation for /status endpoint changes

* Update changelog

* Restructure status API based on PR feedback
  • Loading branch information
zalegrala authored Sep 15, 2021
1 parent 267b951 commit b80ef06
Show file tree
Hide file tree
Showing 5 changed files with 198 additions and 103 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@
* [CHANGE] Renamed CLI flag from `--storage.trace.maintenance-cycle` to `--storage.trace.blocklist_poll`. This is a **breaking change** [#897](https://github.com/grafana/tempo/pull/897) (@mritunjaysharma394)
* [CHANGE] update jsonnet alerts and recording rules to use `job_selectors` and `cluster_selectors` for configurable unique identifier labels [#935](https://github.com/grafana/tempo/pull/935) (@kevinschoonover)
* [CHANGE] Modify generated tag keys in Vulture for easier filtering [#934](https://github.com/grafana/tempo/pull/934) (@zalegrala)
* [CHANGE] **BREAKING CHANGE** Consolidate status information onto /status endpoint [ #952 ](https://github.com/grafana/tempo/pull/952) @zalegrala)
The following endpoints moved.
`/runtime_config` moved to `/status/runtime_config`
`/config` moved to `/status/config`
`/services` moved to `/status/services`
* [FEATURE] Add runtime config handler [#936](https://github.com/grafana/tempo/pull/936) (@mapno)

## v1.1.0 / 2021-08-26
Expand Down
157 changes: 114 additions & 43 deletions cmd/tempo/app/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ import (
"github.com/grafana/dskit/modules"
"github.com/grafana/dskit/services"
"github.com/jedib0t/go-pretty/v6/table"
"github.com/pkg/errors"
"github.com/prometheus/common/version"
"github.com/weaveworks/common/middleware"
"github.com/weaveworks/common/server"
"github.com/weaveworks/common/signals"
Expand Down Expand Up @@ -246,10 +248,9 @@ func (t *App) Run() error {
}

// before starting servers, register /ready handler and gRPC health check service.
t.Server.HTTP.Path("/config").Handler(t.configHandler())
t.Server.HTTP.Path("/ready").Handler(t.readyHandler(sm))
t.Server.HTTP.Path("/services").Handler(t.servicesHandler())
t.Server.HTTP.Path("/status").Handler(t.statusHandler())
t.Server.HTTP.Path("/status").Handler(t.statusHandler()).Methods("GET")
t.Server.HTTP.Path("/status/{endpoint}").Handler(t.statusHandler()).Methods("GET")
grpc_health_v1.RegisterHealthServer(t.Server.GRPC, healthcheck.New(sm))

// Let's listen for events from this manager, and log them.
Expand Down Expand Up @@ -292,21 +293,32 @@ func (t *App) Run() error {
return sm.AwaitStopped(context.Background())
}

func (t *App) configHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
out, err := yaml.Marshal(t.cfg)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
func (t *App) writeStatusVersion(w io.Writer) error {
_, err := w.Write([]byte(version.Print("tempo") + "\n"))
if err != nil {
return err
}

w.Header().Set("Content-Type", "text/yaml")
w.WriteHeader(http.StatusOK)
if _, err := w.Write(out); err != nil {
level.Error(log.Logger).Log("msg", "error writing response", "err", err)
}
return nil
}

func (t *App) writeStatusConfig(w io.Writer) error {
out, err := yaml.Marshal(t.cfg)
if err != nil {
return err
}

_, err = w.Write([]byte("---\n"))
if err != nil {
return err
}

_, err = w.Write(out)
if err != nil {
return err
}

return nil
}

func (t *App) readyHandler(sm *services.Manager) http.HandlerFunc {
Expand Down Expand Up @@ -348,51 +360,108 @@ func (t *App) readyHandler(sm *services.Manager) http.HandlerFunc {

func (t *App) statusHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
var errs []error
msg := bytes.Buffer{}

v := r.URL.Query()
_, ok := r.URL.Query()["endpoints"]
if len(v) == 0 || ok {
t.writeStatusEndpoints(&msg)
simpleEndpoints := map[string]func(io.Writer) error{
"version": t.writeStatusVersion,
"services": t.writeStatusServices,
"endpoints": t.writeStatusEndpoints,
// "runtime_config": t.overrides.WriteStatusRuntimeConfig,
"config": t.writeStatusConfig,
}

w.Header().Set("Content-Type", "text/plain")
w.WriteHeader(http.StatusOK)
if _, err := w.Write(msg.Bytes()); err != nil {
level.Error(log.Logger).Log("msg", "error writing response", "err", err)
wrapStatus := func(endpoint string) {
msg.WriteString("GET /status/" + endpoint + "\n")

switch endpoint {
case "runtime_config":
err := t.overrides.WriteStatusRuntimeConfig(&msg, r)
if err != nil {
errs = append(errs, err)
}
default:
err := simpleEndpoints[endpoint](&msg)
if err != nil {
errs = append(errs, err)
}
}
}
}
}

func (t *App) servicesHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
msg := bytes.Buffer{}
vars := mux.Vars(r)

svcNames := make([]string, 0, len(t.serviceMap))
for name := range t.serviceMap {
svcNames = append(svcNames, name)
if endpoint, ok := vars["endpoint"]; ok {
wrapStatus(endpoint)
} else {
wrapStatus("version")
wrapStatus("services")
wrapStatus("endpoints")
wrapStatus("runtime_config")
wrapStatus("config")
}

sort.Strings(svcNames)
w.Header().Set("Content-Type", "text/plain")

for _, name := range svcNames {
service := t.serviceMap[name]
joinErrors := func(errs []error) error {
if len(errs) == 0 {
return nil
}
var err error

msg.WriteString(fmt.Sprintf("%s: %s\n", name, service.State()))
if err := service.FailureCase(); err != nil {
msg.WriteString(fmt.Sprintf(" Failure case: %s\n", err))
for _, e := range errs {
if e != nil {
err = errors.Wrap(err, e.Error())
}
}
return err
}

err := joinErrors(errs)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
} else {
w.WriteHeader(http.StatusOK)
}

w.Header().Set("Content-Type", "text/plain")
w.WriteHeader(http.StatusOK)
if _, err := w.Write(msg.Bytes()); err != nil {
level.Error(log.Logger).Log("msg", "error writing response", "err", err)
}
}
}

func (t *App) writeStatusEndpoints(w io.Writer) {
func (t *App) writeStatusServices(w io.Writer) error {
svcNames := make([]string, 0, len(t.serviceMap))
for name := range t.serviceMap {
svcNames = append(svcNames, name)
}

sort.Strings(svcNames)

x := table.NewWriter()
x.SetOutputMirror(w)
x.AppendHeader(table.Row{"service name", "status", "failure case"})

for _, name := range svcNames {
service := t.serviceMap[name]

var e string

if err := service.FailureCase(); err != nil {
e = err.Error()
}

x.AppendRows([]table.Row{
{name, service.State(), e},
})
}

x.AppendSeparator()
x.Render()

return nil
}

func (t *App) writeStatusEndpoints(w io.Writer) error {
type endpoint struct {
name string
regex string
Expand All @@ -418,7 +487,7 @@ func (t *App) writeStatusEndpoints(w io.Writer) {
return nil
})
if err != nil {
level.Error(log.Logger).Log("msg", "error walking routes", "err", err)
return errors.Wrap(err, "error walking routes")
}

sort.Slice(endpoints[:], func(i, j int) bool {
Expand All @@ -438,8 +507,10 @@ func (t *App) writeStatusEndpoints(w io.Writer) {
x.AppendSeparator()
x.Render()

_, err = w.Write([]byte(fmt.Sprintf("\nAPI documentation: %s\n", apiDocs)))
_, err = w.Write([]byte(fmt.Sprintf("\nAPI documentation: %s\n\n", apiDocs)))
if err != nil {
level.Error(log.Logger).Log("msg", "error writing response", "err", err)
return errors.Wrap(err, "error writing status endpoints")
}

return nil
}
2 changes: 0 additions & 2 deletions cmd/tempo/app/modules.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,6 @@ func (t *App) initOverrides() (services.Service, error) {
}
t.overrides = overrides

t.Server.HTTP.Handle("/runtime_config", overrides.Handler())

return t.overrides, nil
}

Expand Down
51 changes: 32 additions & 19 deletions docs/tempo/website/api_docs/_index.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,6 @@ For the sake of clarity, in this document we have grouped API endpoints by servi

_(*) This endpoint is not always available, check the specific section for more details._

### Configuration

```
GET /config
```

Displays the configuration currently applied to Tempo (in YAML format), including default values and settings via CLI flags.
Sensitive data is masked. Please be aware that the exported configuration **doesn't include the per-tenant overrides**.


### Readiness probe

```
Expand All @@ -51,14 +41,6 @@ GET /ready

Returns status code 200 when Tempo is ready to serve traffic.

### Services

```
GET /services
```

Displays a list of services and their status. If a service failed it will show the failure case.

### Metrics

```
Expand Down Expand Up @@ -208,5 +190,36 @@ GET /status
```
Print all available information by default.

```
GET /status/version
```

Print the version information.

```
GET /status/services
```

Displays a list of services and their status. If a service failed it will show the failure case.

```
GET /status/endpoints
```

Displays status information about the API endpoints.

```
GET /status/runtime_config
```

Displays the override configuration.

Query Parameter:
- `endpoints`: Prints status information about the API endpoints.
- `mode = (diff)`: Used to show the difference between defaults and overrides.

```
GET /status/runtime_config
```

Displays the configuration currently applied to Tempo (in YAML format), including default values and settings via CLI flags.
Sensitive data is masked. Please be aware that the exported configuration **doesn't include the per-tenant overrides**.
Loading

0 comments on commit b80ef06

Please sign in to comment.