Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support layered (multiple) configuration files #1301

Merged
merged 8 commits into from
Aug 29, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,12 +80,17 @@ This communication can be managed in two ways: via an explicit list of peers in

## Configuration

Configuration is controlled by Refinery's two configuration files, which is generally referred to as `config.yaml` for general configuration and `rules.yaml` for sampling configuration.
Configuration is controlled by Refinery's two configuration files, which is generally referred to as `config.yaml` for general configuration and `rules.yaml` for sampling configuration. These files can be loaded from an accessible filesystem, or loaded with an unauthenticated GET request from a URL.

Learn more about `config.yaml` and all the parameters that control Refinery's operation in our [Refinery configuration documentation](https://docs.honeycomb.io/manage-data-volume/refinery/configuration/).

Learn more about `rules.yaml` and sampler configuration in our [Refinery sampling methods documentation](https://docs.honeycomb.io/manage-data-volume/refinery/sampling-methods/).

It is valid to specify more than one configuration source.
For example, it would be possible to have a common configuration file, plus a separate file containing only keys.
On the command line, specify multiple files by repeating the command line switch.
In environment variables, separate multiple config locations with commas.

## Running Refinery

Refinery is a typical linux-style command line application, and supports several command line switches.
Expand Down
4 changes: 2 additions & 2 deletions config/cmdenv.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ import (
// that this system uses reflection to establish the relationship between the
// config struct and the command line options.
type CmdEnv struct {
ConfigLocation string `short:"c" long:"config" env:"REFINERY_CONFIG" default:"/etc/refinery/refinery.yaml" description:"config file or URL to load"`
RulesLocation string `short:"r" long:"rules_config" env:"REFINERY_RULES_CONFIG" default:"/etc/refinery/rules.yaml" description:"config file or URL to load"`
ConfigLocations []string `short:"c" long:"config" env:"REFINERY_CONFIG" env-delim:"," default:"/etc/refinery/refinery.yaml" description:"config file or URL to load; can be specified more than once"`
RulesLocations []string `short:"r" long:"rules_config" env:"REFINERY_RULES_CONFIG" env-delim:"," default:"/etc/refinery/rules.yaml" description:"config file or URL to load; can be specified more than once"`
HTTPListenAddr string `long:"http-listen-address" env:"REFINERY_HTTP_LISTEN_ADDRESS" description:"HTTP listen address for incoming event traffic"`
PeerListenAddr string `long:"peer-listen-address" env:"REFINERY_PEER_LISTEN_ADDRESS" description:"Peer listen address for communication between Refinery instances"`
GRPCListenAddr string `long:"grpc-listen-address" env:"REFINERY_GRPC_LISTEN_ADDRESS" description:"gRPC listen address for OTLP traffic"`
Expand Down
134 changes: 90 additions & 44 deletions config/configLoadHelpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"path/filepath"
"reflect"
"strconv"
"strings"

"github.com/creasty/defaults"
"github.com/pelletier/go-toml/v2"
Expand Down Expand Up @@ -108,17 +109,84 @@ func load(r io.Reader, format Format, into any) error {
}
}

func validateConfig(opts *CmdEnv) ([]string, error) {
location := opts.ConfigLocation
r, format, err := getReaderFor(location)
if err != nil {
return nil, err
// This loads all the named configs into destination in the order they are listed.
// It returns the MD5 hash of the collected configs as a string (if there's only one
// config, this is the hash of that config; if there are multiple, it's the hash of
// all of them concatenated together).
func loadConfigsInto(dest any, locations []string) (string, error) {
// start a hash of the configs we read
h := md5.New()
for _, location := range locations {
// trim leading and trailing whitespace just in case
location := strings.TrimSpace(location)
r, format, err := getReaderFor(location)
if err != nil {
return "", err
}
defer r.Close()
// write the data to the hash as we read it
rdr := io.TeeReader(r, h)

// when working on a struct, load only overwrites destination values that are
// explicitly named. So we can just keep loading successive files into
// the same object without losing data we've already specified.
if err := load(rdr, format, dest); err != nil {
return "", fmt.Errorf("loadConfigsInto unable to load config %s: %w", location, err)
}
}
hash := hex.EncodeToString(h.Sum(nil))
return hash, nil
}

func loadConfigsIntoMap(dest map[string]any, locations []string) error {
for _, location := range locations {
// trim leading and trailing whitespace just in case
location := strings.TrimSpace(location)
r, format, err := getReaderFor(location)
if err != nil {
return err
}
defer r.Close()

// when working on a map, when loading a nested object, load will overwrite the entire destination
// value, so we can't just keep loading successive files into the same object. Instead, we
// need to load into a new object and then merge it into the map.
temp := make(map[string]any)
if err := load(r, format, &temp); err != nil {
return fmt.Errorf("loadConfigsInto unable to load config %s: %w", location, err)
}
for k, v := range temp {
switch vm := v.(type) {
case map[string]any:
// if the value is a map, we need to merge its value into the existing map value, if any.
if dest[k] == nil {
// no existing value, just copy it over
dest[k] = vm
} else {
// this works without needing recursion because we know that
// configurations can never be more than two levels deep.
kentquirk marked this conversation as resolved.
Show resolved Hide resolved
for kk, vv := range vm {
dest[k].(map[string]any)[kk] = vv
}
}
default:
// everything else just gets copied over, including slices
dest[k] = v
}
}
}
defer r.Close()
return nil
}

var userData map[string]any
if err := load(r, format, &userData); err != nil {
return nil, fmt.Errorf("validateConfig unable to load config %s: %w", location, err)
// validateConfigs reads the configs from the given location and validates them.
// It returns a list of failures; if the list is empty, the config is valid.
// err is non-nil only for significant errors like a missing file.
func validateConfigs(opts *CmdEnv) ([]string, error) {
// first read the configs into a map so we can validate them
userData := make(map[string]any)
err := loadConfigsIntoMap(userData, opts.ConfigLocations)
if err != nil {
return nil, err
}

metadata, err := LoadConfigMetadata()
Expand All @@ -131,21 +199,14 @@ func validateConfig(opts *CmdEnv) ([]string, error) {
return failures, nil
}

// Basic validation worked. Now we need to reload it into the struct so that
// Basic validation worked. Now we need to reload everything into our struct so that
// we can apply defaults and options, and then validate a second time.

// we need a new reader for the source data
r2, _, err := getReaderFor(location)
var config configContents
_, err = loadConfigsInto(&config, opts.ConfigLocations)
if err != nil {
return nil, err
}
defer r2.Close()

var config configContents
if err := load(r2, format, &config); err != nil {
// this should never happen, since we already validated the config
return nil, fmt.Errorf("validateConfig unable to RELOAD config %s: %w", location, err)
}
// apply defaults and options
if err := defaults.Set(&config); err != nil {
return nil, fmt.Errorf("readConfigInto unable to apply defaults: %w", err)
Expand All @@ -166,12 +227,12 @@ func validateConfig(opts *CmdEnv) ([]string, error) {
if config.OTelMetrics.APIKey == "" {
config.OTelMetrics.APIKey = "InvalidHoneycombAPIKey"
}

if config.OTelTracing.APIKey == "" {
config.OTelTracing.APIKey = "InvalidHoneycombAPIKey"
}

// write it out to a YAML buffer
// The validator needs a map[string]any to work with, so we need to
// write it out to a buffer (we always use YAML) and then reload it.
buf := new(bytes.Buffer)
encoder := yaml.NewEncoder(buf)
encoder.SetIndent(2)
Expand All @@ -180,7 +241,7 @@ func validateConfig(opts *CmdEnv) ([]string, error) {
}

var rewrittenUserData map[string]any
if err := load(buf, format, &rewrittenUserData); err != nil {
if err := load(buf, FormatYAML, &rewrittenUserData); err != nil {
return nil, fmt.Errorf("validateConfig unable to reload hydrated config from buffer: %w", err)
}

Expand All @@ -189,17 +250,13 @@ func validateConfig(opts *CmdEnv) ([]string, error) {
return failures, nil
}

func validateRules(location string) ([]string, error) {
r, format, err := getReaderFor(location)
func validateRules(locations []string) ([]string, error) {
// first read the configs into a map so we can validate them
userData := make(map[string]any)
err := loadConfigsIntoMap(userData, locations)
if err != nil {
return nil, err
}
defer r.Close()

var userData map[string]any
if err := load(r, format, &userData); err != nil {
return nil, fmt.Errorf("validateRules unable to load config %s: %w", location, err)
}

metadata, err := LoadRulesMetadata()
if err != nil {
Expand All @@ -211,22 +268,11 @@ func validateRules(location string) ([]string, error) {
}

// readConfigInto reads the config from the given location and applies it to the given struct.
func readConfigInto(dest any, location string, opts *CmdEnv) (string, error) {
r, format, err := getReaderFor(location)
func readConfigInto(dest any, locations []string, opts *CmdEnv) (string, error) {
hash, err := loadConfigsInto(dest, locations)
if err != nil {
return "", err
return hash, err
}
defer r.Close()

// we're going to use a TeeReader to calculate the hash while also reading the data
h := md5.New()
rdr := io.TeeReader(r, h)

if err := load(rdr, format, dest); err != nil {
return "", fmt.Errorf("readConfigInto unable to load config %s: %w", location, err)
}
// the hash is now the MD5 of the config file
hash := hex.EncodeToString(h.Sum(nil))

// don't apply options and defaults if we're not given any
if opts == nil {
Expand Down
128 changes: 128 additions & 0 deletions config/configLoadHelpers_test.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
package config

import (
"fmt"
"net/http"
"os"
"reflect"
"strings"
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v3"
)

func Test_formatFromFilename(t *testing.T) {
Expand Down Expand Up @@ -147,3 +151,127 @@ func Test_ConfigHashMetrics(t *testing.T) {
})
}
}

// Creates temporary yaml files from the strings passed in and returns a slice of their filenames
// Because we use t.TempDir() the files will be cleaned up automatically.
func createTempConfigs(t *testing.T, cfgs ...string) []string {
tmpDir := t.TempDir()

var cfgFiles []string
for _, cfg := range cfgs {

configFile, err := os.CreateTemp(tmpDir, "cfg_*.yaml")
assert.NoError(t, err)

_, err = configFile.WriteString(cfg)
assert.NoError(t, err)
configFile.Close()
cfgFiles = append(cfgFiles, configFile.Name())
}
return cfgFiles
}

func setMap(m map[string]any, key string, value any) {
if strings.Contains(key, ".") {
parts := strings.Split(key, ".")
if _, ok := m[parts[0]]; !ok {
m[parts[0]] = make(map[string]any)
}
setMap(m[parts[0]].(map[string]any), strings.Join(parts[1:], "."), value)
return
}
m[key] = value
}

func makeYAML(args ...interface{}) string {
m := make(map[string]any)
for i := 0; i < len(args); i += 2 {
setMap(m, args[i].(string), args[i+1])
}
b, err := yaml.Marshal(m)
if err != nil {
panic(err)
}
return string(b)
}

func Test_loadConfigsInto(t *testing.T) {
cm1 := makeYAML("General.ConfigurationVersion", 2, "General.ConfigReloadInterval", Duration(1*time.Second), "Network.ListenAddr", "0.0.0.0:8080")
cm2 := makeYAML("General.ConfigReloadInterval", Duration(2*time.Second), "General.DatasetPrefix", "hello")
cfgfiles := createTempConfigs(t, cm1, cm2)

cfg := configContents{}
hash, err := loadConfigsInto(&cfg, cfgfiles)
require.NoError(t, err)
require.Equal(t, "2381a6563085f50ac56663b67ca85299", hash)
require.Equal(t, 2, cfg.General.ConfigurationVersion)
require.Equal(t, Duration(2*time.Second), cfg.General.ConfigReloadInterval)
require.Equal(t, "0.0.0.0:8080", cfg.Network.ListenAddr)
require.Equal(t, "hello", cfg.General.DatasetPrefix)
}

func Test_loadConfigsIntoMap(t *testing.T) {
cm1 := makeYAML("General.ConfigurationVersion", 2, "General.ConfigReloadInterval", Duration(1*time.Second), "Network.ListenAddr", "0.0.0.0:8080")
cm2 := makeYAML("General.ConfigReloadInterval", Duration(2*time.Second), "General.DatasetPrefix", "hello")
cfgfiles := createTempConfigs(t, cm1, cm2)

cfg := map[string]any{}
err := loadConfigsIntoMap(cfg, cfgfiles)
require.NoError(t, err)
fmt.Println(cfg)
kentquirk marked this conversation as resolved.
Show resolved Hide resolved
gen := cfg["General"].(map[string]any)
require.Equal(t, 2, gen["ConfigurationVersion"])
require.Equal(t, "2s", gen["ConfigReloadInterval"])
require.Equal(t, "hello", gen["DatasetPrefix"])
net := cfg["Network"].(map[string]any)
require.Equal(t, "0.0.0.0:8080", net["ListenAddr"])
}

func Test_validateConfigs(t *testing.T) {
emptySlice := []string{}
tests := []struct {
name string
cfgs []string
want []string
wantErr bool
}{
{
"test1", []string{
makeYAML("General.ConfigurationVersion", 2, "General.ConfigReloadInterval", Duration(1*time.Second), "Network.ListenAddr", "0.1.2.3:8080"),
},
emptySlice,
false,
},
{
"test2", []string{
makeYAML("General.ConfigurationVersion", 2, "General.ConfigReloadInterval", Duration(1*time.Second), "Network.ListenAddr", "0.1.2.3:8080"),
makeYAML("General.ConfigReloadInterval", Duration(2*time.Second)),
},
emptySlice,
false,
},
{
"test3", []string{
makeYAML("General.ConfigurationVersion", 2, "General.ConfigReloadInterval", Duration(1*time.Second), "Network.ListenAddr", "0.1.2.3:8080"),
makeYAML("General.ConfigReloadInterval", Duration(2*time.Second), "General.DatasetPrefix", 7),
},
[]string{"field General.DatasetPrefix must be a string but 7 is int"},
false,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
cfgfiles := createTempConfigs(t, tt.cfgs...)
opts := &CmdEnv{ConfigLocations: cfgfiles}
got, err := validateConfigs(opts)
if (err != nil) != tt.wantErr {
t.Errorf("validateConfigs() error = %v, wantErr %v", err, tt.wantErr)
return
}
if !reflect.DeepEqual(got, tt.want) {
t.Errorf("validateConfigs() = %v, want %v", got, tt.want)
}
})
}
}
Loading
Loading