Skip to content

Commit

Permalink
Allow adding extra fields to error logs (#514)
Browse files Browse the repository at this point in the history
## Which problem is this PR solving?

As noted in #513, errors that occur while Refinery is trying to send data to Honeycomb happen asynchronously. It's hard to relate any errors that show up in the logs with the spans that caused them to occur. However, there is a way provided by libhoney ([metadata](https://github.com/honeycombio/libhoney-go/blob/25068939fe8240ccc45e4c86271b6f057c5f833d/transmission/response.go#L11)) that makes it possible to attach data to the error. Before now, Refinery has attached the dataset, api host, and environment to the metadata. This helps but is not enough.


## Short description of the changes

This PR attempts to leverage that technique further by adding a new configuration value called `AdditionalErrorFields`. It allows the user to specify a list of field names. In the event of a transmission error, these fields (if they exist) will be copied from the failing span into the metadata, and will therefore show up as identified fields in the logs. 

The default value is `trace.span_id`.

This also removes the remaining instances of the obsolete standard library `ioutil`.

Closes #513
  • Loading branch information
kentquirk authored Sep 16, 2022
1 parent 8142980 commit c6533de
Show file tree
Hide file tree
Showing 12 changed files with 136 additions and 40 deletions.
34 changes: 16 additions & 18 deletions app/app_test.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
//go:build all || race
// +build all race

package app

Expand All @@ -9,7 +8,6 @@ import (
"context"
"fmt"
"io"
"io/ioutil"
"net"
"net/http"
"net/http/httptest"
Expand Down Expand Up @@ -215,7 +213,7 @@ func post(t testing.TB, req *http.Request) {
resp, err := httpClient.Do(req)
assert.NoError(t, err)
assert.Equal(t, http.StatusOK, resp.StatusCode)
io.Copy(ioutil.Discard, resp.Body)
io.Copy(io.Discard, resp.Body)
resp.Body.Close()
}

Expand Down Expand Up @@ -333,7 +331,7 @@ func TestPeerRouting(t *testing.T) {
req.Header.Set("Content-Type", "application/json")

blob := `[` + string(spans[0]) + `]`
req.Body = ioutil.NopCloser(strings.NewReader(blob))
req.Body = io.NopCloser(strings.NewReader(blob))
post(t, req)
assert.Eventually(t, func() bool {
return len(senders[0].Events()) == 1
Expand Down Expand Up @@ -364,7 +362,7 @@ func TestPeerRouting(t *testing.T) {
"long": "this is a test of the emergency broadcast system",
"foo": "bar",
},
Metadata: map[string]string{
Metadata: map[string]any{
"api_host": "http://api.honeycomb.io",
"dataset": "dataset",
"environment": "",
Expand All @@ -383,7 +381,7 @@ func TestPeerRouting(t *testing.T) {
req.Header.Set("X-Honeycomb-Team", legacyAPIKey)
req.Header.Set("Content-Type", "application/json")

req.Body = ioutil.NopCloser(strings.NewReader(blob))
req.Body = io.NopCloser(strings.NewReader(blob))
post(t, req)
assert.Eventually(t, func() bool {
return len(senders[1].Events()) == 1
Expand Down Expand Up @@ -488,7 +486,7 @@ func TestEventsEndpoint(t *testing.T) {
"trace.trace_id": "1",
"foo": "bar",
},
Metadata: map[string]string{
Metadata: map[string]any{
"api_host": "http://api.honeycomb.io",
"dataset": "dataset",
"environment": "",
Expand Down Expand Up @@ -535,7 +533,7 @@ func TestEventsEndpoint(t *testing.T) {
"trace.trace_id": "1",
"foo": "bar",
},
Metadata: map[string]string{
Metadata: map[string]any{
"api_host": "http://api.honeycomb.io",
"dataset": "dataset",
"environment": "",
Expand Down Expand Up @@ -602,7 +600,7 @@ func TestEventsEndpointWithNonLegacyKey(t *testing.T) {
"trace.trace_id": "1",
"foo": "bar",
},
Metadata: map[string]string{
Metadata: map[string]any{
"api_host": "http://api.honeycomb.io",
"dataset": "dataset",
"environment": "test",
Expand Down Expand Up @@ -649,7 +647,7 @@ func TestEventsEndpointWithNonLegacyKey(t *testing.T) {
"trace.trace_id": "1",
"foo": "bar",
},
Metadata: map[string]string{
Metadata: map[string]any{
"api_host": "http://api.honeycomb.io",
"dataset": "dataset",
"environment": "test",
Expand Down Expand Up @@ -716,7 +714,7 @@ func BenchmarkTraces(b *testing.B) {

sender := &countingWriterSender{
WriterSender: transmission.WriterSender{
W: ioutil.Discard,
W: io.Discard,
},
}
_, graph := newStartedApp(b, sender, 11000, nil, false)
Expand All @@ -734,7 +732,7 @@ func BenchmarkTraces(b *testing.B) {
sender.resetCount()
for n := 0; n < b.N; n++ {
blob := `[` + string(spans[n%len(spans)]) + `]`
req.Body = ioutil.NopCloser(strings.NewReader(blob))
req.Body = io.NopCloser(strings.NewReader(blob))
post(b, req)
}
sender.waitForCount(b, b.N)
Expand All @@ -752,7 +750,7 @@ func BenchmarkTraces(b *testing.B) {
blob = append(blob, ',')
}
blob[len(blob)-1] = ']'
req.Body = ioutil.NopCloser(bytes.NewReader(blob))
req.Body = io.NopCloser(bytes.NewReader(blob))

post(b, req)
}
Expand All @@ -776,13 +774,13 @@ func BenchmarkTraces(b *testing.B) {
blob = append(blob, ',')
}
blob[len(blob)-1] = ']'
req.Body = ioutil.NopCloser(bytes.NewReader(blob))
req.Body = io.NopCloser(bytes.NewReader(blob))

resp, err := httpClient.Do(req)
assert.NoError(b, err)
if resp != nil {
assert.Equal(b, http.StatusOK, resp.StatusCode)
io.Copy(ioutil.Discard, resp.Body)
io.Copy(io.Discard, resp.Body)
resp.Body.Close()
}
}
Expand All @@ -799,7 +797,7 @@ func BenchmarkTraces(b *testing.B) {
func BenchmarkDistributedTraces(b *testing.B) {
sender := &countingWriterSender{
WriterSender: transmission.WriterSender{
W: ioutil.Discard,
W: io.Discard,
},
}

Expand Down Expand Up @@ -837,7 +835,7 @@ func BenchmarkDistributedTraces(b *testing.B) {
sender.resetCount()
for n := 0; n < b.N; n++ {
blob := `[` + string(spans[n%len(spans)]) + `]`
req.Body = ioutil.NopCloser(strings.NewReader(blob))
req.Body = io.NopCloser(strings.NewReader(blob))
req.URL.Host = addrs[n%len(addrs)]
post(b, req)
}
Expand All @@ -856,7 +854,7 @@ func BenchmarkDistributedTraces(b *testing.B) {
blob = append(blob, ',')
}
blob[len(blob)-1] = ']'
req.Body = ioutil.NopCloser(bytes.NewReader(blob))
req.Body = io.NopCloser(bytes.NewReader(blob))
req.URL.Host = addrs[n%len(addrs)]

post(b, req)
Expand Down
2 changes: 2 additions & 0 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,4 +164,6 @@ type Config interface {
GetGRPCTimeout() time.Duration

GetPeerTimeout() time.Duration

GetAdditionalErrorFields() []string
}
73 changes: 66 additions & 7 deletions config/config_test.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package config

import (
"io/ioutil"
"os"
"sync"
"testing"
Expand Down Expand Up @@ -78,11 +77,12 @@ func TestRedisPasswordEnvVar(t *testing.T) {
}
}

// creates two temporary toml files from the strings passed in and returns their filenames
func createTempConfigs(t *testing.T, configBody string, rulesBody string) (string, string) {
tmpDir, err := ioutil.TempDir("", "")
tmpDir, err := os.MkdirTemp("", "")
assert.NoError(t, err)

configFile, err := ioutil.TempFile(tmpDir, "*.toml")
configFile, err := os.CreateTemp(tmpDir, "*.toml")
assert.NoError(t, err)

if configBody != "" {
Expand All @@ -91,7 +91,7 @@ func createTempConfigs(t *testing.T, configBody string, rulesBody string) (strin
}
configFile.Close()

rulesFile, err := ioutil.TempFile(tmpDir, "*.toml")
rulesFile, err := os.CreateTemp(tmpDir, "*.toml")
assert.NoError(t, err)

if rulesBody != "" {
Expand Down Expand Up @@ -636,11 +636,11 @@ func TestQueryAuthToken(t *testing.T) {
}

func TestGRPCServerParameters(t *testing.T) {
tmpDir, err := ioutil.TempDir("", "")
tmpDir, err := os.MkdirTemp("", "")
assert.NoError(t, err)
defer os.RemoveAll(tmpDir)

configFile, err := ioutil.TempFile(tmpDir, "*.toml")
configFile, err := os.CreateTemp(tmpDir, "*.toml")
assert.NoError(t, err)

_, err = configFile.Write([]byte(`
Expand Down Expand Up @@ -668,7 +668,7 @@ func TestGRPCServerParameters(t *testing.T) {
assert.NoError(t, err)
configFile.Close()

rulesFile, err := ioutil.TempFile(tmpDir, "*.toml")
rulesFile, err := os.CreateTemp(tmpDir, "*.toml")
assert.NoError(t, err)

c, err := NewConfig(configFile.Name(), rulesFile.Name(), func(err error) {})
Expand All @@ -680,3 +680,62 @@ func TestGRPCServerParameters(t *testing.T) {
assert.Equal(t, 4*time.Minute, c.GetGRPCTime())
assert.Equal(t, 5*time.Minute, c.GetGRPCTimeout())
}

func TestHoneycombAdditionalErrorConfig(t *testing.T) {
config, rules := createTempConfigs(t, `
AdditionalErrorFields = [
"first",
"second"
]
[InMemCollector]
CacheCapacity=1000
[HoneycombMetrics]
MetricsHoneycombAPI="http://honeycomb.io"
MetricsAPIKey="1234"
MetricsDataset="testDatasetName"
MetricsReportingInterval=3
[HoneycombLogger]
LoggerHoneycombAPI="http://honeycomb.io"
LoggerAPIKey="1234"
LoggerDataset="loggerDataset"
LoggerSamplerEnabled=true
LoggerSamplerThroughput=10
`, "")
defer os.Remove(rules)
defer os.Remove(config)

c, err := NewConfig(config, rules, func(err error) {})
assert.NoError(t, err)

assert.Equal(t, []string{"first", "second"}, c.GetAdditionalErrorFields())
}

func TestHoneycombAdditionalErrorDefaults(t *testing.T) {
config, rules := createTempConfigs(t, `
[InMemCollector]
CacheCapacity=1000
[HoneycombMetrics]
MetricsHoneycombAPI="http://honeycomb.io"
MetricsAPIKey="1234"
MetricsDataset="testDatasetName"
MetricsReportingInterval=3
[HoneycombLogger]
LoggerHoneycombAPI="http://honeycomb.io"
LoggerAPIKey="1234"
LoggerDataset="loggerDataset"
LoggerSamplerEnabled=true
LoggerSamplerThroughput=10
`, "")
defer os.Remove(rules)
defer os.Remove(config)

c, err := NewConfig(config, rules, func(err error) {})
assert.NoError(t, err)

assert.Equal(t, []string{"trace.span_id"}, c.GetAdditionalErrorFields())
}
9 changes: 4 additions & 5 deletions config/config_test_reload_error_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
package config

import (
"io/ioutil"
"os"
"sync"
"testing"
Expand All @@ -13,14 +12,14 @@ import (
)

func TestErrorReloading(t *testing.T) {
tmpDir, err := ioutil.TempDir("", "")
tmpDir, err := os.MkdirTemp("", "")
assert.NoError(t, err)
defer os.RemoveAll(tmpDir)

rulesFile, err := ioutil.TempFile(tmpDir, "*.toml")
rulesFile, err := os.CreateTemp(tmpDir, "*.toml")
assert.NoError(t, err)

configFile, err := ioutil.TempFile(tmpDir, "*.toml")
configFile, err := os.CreateTemp(tmpDir, "*.toml")
assert.NoError(t, err)

dummy := []byte(`
Expand Down Expand Up @@ -76,7 +75,7 @@ func TestErrorReloading(t *testing.T) {
}
}()

err = ioutil.WriteFile(rulesFile.Name(), []byte(`Sampler="InvalidSampler"`), 0644)
err = os.WriteFile(rulesFile.Name(), []byte(`Sampler="InvalidSampler"`), 0644)

if err != nil {
t.Error(err)
Expand Down
9 changes: 9 additions & 0 deletions config/file_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ type configContents struct {
DatasetPrefix string
QueryAuthToken string
GRPCServerParameters GRPCServerParameters
AdditionalErrorFields []string
}

type InMemoryCollectorCacheCapacity struct {
Expand Down Expand Up @@ -153,6 +154,7 @@ func NewConfig(config, rules string, errorCallback func(error)) (Config, error)
c.SetDefault("GRPCServerParameters.MaxConnectionAgeGrace", time.Duration(0))
c.SetDefault("GRPCServerParameters.Time", 10*time.Second)
c.SetDefault("GRPCServerParameters.Timeout", 2*time.Second)
c.SetDefault("AdditionalErrorFields", []string{"trace.span_id"})

c.SetConfigFile(config)
err := c.ReadInConfig()
Expand Down Expand Up @@ -870,3 +872,10 @@ func (f *fileConfig) GetPeerTimeout() time.Duration {

return f.conf.PeerManagement.Timeout
}

func (f *fileConfig) GetAdditionalErrorFields() []string {
f.mux.RLock()
defer f.mux.RUnlock()

return f.conf.AdditionalErrorFields
}
8 changes: 8 additions & 0 deletions config/mock.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ type MockConfig struct {
GRPCTime time.Duration
GRPCTimeout time.Duration
PeerTimeout time.Duration
AdditionalErrorFields []string

Mux sync.RWMutex
}
Expand Down Expand Up @@ -443,3 +444,10 @@ func (f *MockConfig) GetPeerTimeout() time.Duration {

return f.PeerTimeout
}

func (f *MockConfig) GetAdditionalErrorFields() []string {
f.Mux.RLock()
defer f.Mux.RUnlock()

return f.AdditionalErrorFields
}
13 changes: 13 additions & 0 deletions config_complete.toml
Original file line number Diff line number Diff line change
Expand Up @@ -116,12 +116,25 @@ EnvironmentCacheTTL = "1h"
# are not typically needed in normal operation.
# Can be specified in the environment as REFINERY_QUERY_AUTH_TOKEN.
# If left unspecified, the /query endpoints are inaccessible.
# Not eligible for live reload.
# QueryAuthToken = "some-random-value"

# AddRuleReasonToTrace causes traces that are sent to Honeycomb to include the field `meta.refinery.reason`.
# This field contains text indicating which rule was evaluated that caused the trace to be included.
# Eligible for live reload.
# AddRuleReasonToTrace = true

# AdditionalErrorFields should be a list of span fields that should be included when logging
# errors that happen during ingestion of events (for example, the span too large error).
# This is primarily useful in trying to track down misbehaving senders in a large installation.
# The fields `dataset`, `apihost`, and `environment` are always included.
# If a field is not present in the span, it will not be present in the error log.
# Default is ["trace.span_id"].
# Eligible for live reload.
AdditionalErrorFields = [
"trace.span_id"
]

############################
## Implementation Choices ##
############################
Expand Down
3 changes: 0 additions & 3 deletions internal/peer/peers_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
//go:build all || race
// +build all race

package peer

import (
Expand Down
Loading

0 comments on commit c6533de

Please sign in to comment.