From e822a9bd374c47762195503ec163eca35c7b0ee3 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Fri, 12 May 2023 15:20:41 -0700 Subject: [PATCH 01/38] Add t-value sampler draft --- pkg/sampling/doc.go | 16 +++ pkg/sampling/go.mod | 11 ++ pkg/sampling/go.sum | 17 +++ pkg/sampling/tail.go | 132 ++++++++++++++++++ pkg/sampling/tail_test.go | 130 +++++++++++++++++ .../probabilisticsamplerprocessor/config.go | 33 ++++- .../probabilisticsamplerprocessor/go.mod | 3 + .../logsprocessor.go | 26 ++-- .../tracesprocessor.go | 120 ++++++++++++---- 9 files changed, 444 insertions(+), 44 deletions(-) create mode 100644 pkg/sampling/doc.go create mode 100644 pkg/sampling/go.mod create mode 100644 pkg/sampling/go.sum create mode 100644 pkg/sampling/tail.go create mode 100644 pkg/sampling/tail_test.go diff --git a/pkg/sampling/doc.go b/pkg/sampling/doc.go new file mode 100644 index 000000000000..39a46e4a4cc7 --- /dev/null +++ b/pkg/sampling/doc.go @@ -0,0 +1,16 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This implements a prototype for OTEP 226. +package sampling // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling" diff --git a/pkg/sampling/go.mod b/pkg/sampling/go.mod new file mode 100644 index 000000000000..c69f61318218 --- /dev/null +++ b/pkg/sampling/go.mod @@ -0,0 +1,11 @@ +module github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling + +go 1.20 + +require github.com/stretchr/testify v1.8.2 + +require ( + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/pkg/sampling/go.sum b/pkg/sampling/go.sum new file mode 100644 index 000000000000..6a56e69bb33d --- /dev/null +++ b/pkg/sampling/go.sum @@ -0,0 +1,17 @@ +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.2 h1:+h33VjcLVPDHtOdpUCuF+7gSuG3yGIftsP1YvFihtJ8= +github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/pkg/sampling/tail.go b/pkg/sampling/tail.go new file mode 100644 index 000000000000..3c9a0de33430 --- /dev/null +++ b/pkg/sampling/tail.go @@ -0,0 +1,132 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package sampling // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling" + +import ( + "bytes" + "encoding/binary" + "fmt" + "strconv" + + "go.opentelemetry.io/collector/pdata/pcommon" +) + +const ( + MinSamplingProb = 0x1p-56 + MaxAdjustedCount int64 = 0x1p+56 // i.e., 1 / MinSamplingProb +) + +var ( + ErrPrecisionRange = fmt.Errorf("sampling precision out of range (-1 <= valid <= 14)") + ErrProbabilityRange = fmt.Errorf("sampling probability out of range (0x1p-56 <= valid <= 1)") + ErrAdjustedCountRange = fmt.Errorf("sampling adjusted count out of range (1 <= valid <= 0x1p+56)") + ErrAdjustedCountOnlyInteger = fmt.Errorf("sampling adjusted count out of range (1 <= valid <= 0x1p+56)") +) + +type Threshold [7]byte + +func probabilityInRange(prob float64) bool { + return prob <= 1 && prob >= MinSamplingProb +} + +func AdjustedCountToTvalue(count uint64) (string, error) { + switch { + case count == 0: + // Special case. + case count < 0: + return "", ErrProbabilityRange + case count > uint64(MaxAdjustedCount): + return "", ErrAdjustedCountRange + } + return strconv.FormatInt(int64(count), 10), nil +} + +// E.g., 3/7 w/ prec=2 -> "0x1.b7p-02" +func ProbabilityToTvalue(prob float64, format byte, prec int) (string, error) { + // Probability cases + switch { + case prob == 1: + return "1", nil + case prob == 0: + return "0", nil + case !probabilityInRange(prob): + return "", ErrProbabilityRange + } + // Precision cases + switch { + case prec == -1: + // Default precision (see FormatFloat) + case prec == 0: + // Precision == 0 forces probabilities to be powers-of-two. + case prec <= 14: + // Precision is in-range + default: + return "", ErrPrecisionRange + + } + return strconv.FormatFloat(prob, format, prec, 64), nil +} + +func TvalueToProbabilityAndAdjustedCount(s string) (float64, float64, error) { + number, err := strconv.ParseFloat(s, 64) // e.g., "0x1.b7p-02" -> approx 3/7 + if err != nil { + return 0, 0, err + } + + adjusted := 0.0 + switch { + case number == 0: + + case number < MinSamplingProb: + return 0, 0, ErrAdjustedCountRange + case number > float64(MaxAdjustedCount): + return 0, 0, ErrAdjustedCountRange + case number >= 1: + // It's an integer adjusted count; re-parse as an integer. + integer, err := strconv.ParseInt(s, 10, 64) + if err != nil { + return 0, 0, ErrAdjustedCountOnlyInteger + } + adjusted = float64(integer) + number = 1 / adjusted + default: + adjusted = 1 / number + } + + return number, adjusted, nil +} + +func ProbabilityToThreshold(prob float64) (t Threshold, _ error) { + if !probabilityInRange(prob) { + return t, ErrProbabilityRange + } + + unsigned := uint64(prob * 0x1p+56) + var bytes [8]byte + binary.BigEndian.PutUint64(bytes[:], unsigned) + copy(t[:], bytes[1:]) + return t, nil +} + +func ThresholdToProbability(t Threshold) float64 { + var eight [8]byte + copy(eight[1:8], t[:]) + b56 := binary.BigEndian.Uint64(eight[:]) + return float64(b56) / 0x1p56 +} + +func (t Threshold) ShouldSample(id pcommon.TraceID) bool { + return bytes.Compare(id[9:16], t[:]) < 0 +} diff --git a/pkg/sampling/tail_test.go b/pkg/sampling/tail_test.go new file mode 100644 index 000000000000..c8c58f9fef37 --- /dev/null +++ b/pkg/sampling/tail_test.go @@ -0,0 +1,130 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package sampling + +import ( + "fmt" + "math" + "testing" + + "github.com/stretchr/testify/require" +) + +func must[T any](t T, err error) T { + if err != nil { + panic(err) + } + return t +} + +func mustNot[T any](t T, err error) error { + if err == nil { + return fmt.Errorf("expected an error, got nil") + } + return err +} + +func TestValidAdjustedCountToTvalue(t *testing.T) { + require.Equal(t, "0", must(AdjustedCountToTvalue(0))) + require.Equal(t, "1", must(AdjustedCountToTvalue(1))) + require.Equal(t, "2", must(AdjustedCountToTvalue(2))) + + const largest uint64 = 0x1p+56 + require.Equal(t, "72057594037927936", must(AdjustedCountToTvalue(largest))) + require.Equal(t, fmt.Sprint(largest-1), must(AdjustedCountToTvalue(largest-1))) +} + +func TestInvalidAdjustedCountToTvalue(t *testing.T) { + // Because unsigned, no too-small value. + require.Error(t, mustNot(AdjustedCountToTvalue(0x1p56+1))) + require.Error(t, mustNot(AdjustedCountToTvalue(math.MaxInt64))) +} + +func TestValidProbabilityToTvalue(t *testing.T) { + require.Equal(t, "0x1p-01", must(ProbabilityToTvalue(0.5, -1))) + require.Equal(t, "0x1p-56", must(ProbabilityToTvalue(0x1p-56, -1))) + require.Equal(t, "0x1.555p-02", must(ProbabilityToTvalue(1/3., 3))) +} + +func TestInvalidProbabilityToTvalue(t *testing.T) { + // Too small + require.Error(t, mustNot(ProbabilityToTvalue(0x1p-57, -1))) + require.Error(t, mustNot(ProbabilityToTvalue(0x1p-57, 0))) + + // Too big + require.Error(t, mustNot(ProbabilityToTvalue(1.1, -1))) + require.Error(t, mustNot(ProbabilityToTvalue(1.1, 0))) + + // Bad precision + require.Error(t, mustNot(ProbabilityToTvalue(0.5, -3))) + require.Error(t, mustNot(ProbabilityToTvalue(0.5, 15))) +} + +func testTValueToProb(tv string) (float64, error) { + p, _, err := TvalueToProbabilityAndAdjustedCount(tv) + return p, err +} + +func testTValueToAdjCount(tv string) (float64, error) { + _, ac, err := TvalueToProbabilityAndAdjustedCount(tv) + return ac, err +} + +func TestTvalueToProbability(t *testing.T) { + require.Equal(t, 0.5, must(testTValueToProb("0.5"))) + require.Equal(t, 0.444, must(testTValueToProb("0.444"))) + require.Equal(t, 1.0, must(testTValueToProb("1"))) + require.Equal(t, 0.0, must(testTValueToProb("0"))) + + require.InEpsilon(t, 1/3., must(testTValueToProb("3")), 1e-9) +} + +func TestTvalueToAdjCount(t *testing.T) { + require.Equal(t, 2.0, must(testTValueToAdjCount("0.5"))) + require.Equal(t, 2.0, must(testTValueToAdjCount("2"))) + require.Equal(t, 3., must(testTValueToAdjCount("3"))) + require.Equal(t, 5., must(testTValueToAdjCount("5"))) + + require.InEpsilon(t, 1/0.444, must(testTValueToAdjCount("0.444")), 1e-9) + require.InEpsilon(t, 1/0.111111, must(testTValueToAdjCount("0.111111")), 1e-9) + + require.Equal(t, 1.0, must(testTValueToAdjCount("1"))) + require.Equal(t, 0.0, must(testTValueToAdjCount("0"))) +} + +func TestProbabilityToThreshold(t *testing.T) { + require.Equal(t, + Threshold{0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + must(ProbabilityToThreshold(0.5))) + require.Equal(t, + Threshold{0, 0, 0, 0, 0, 0, 0}, + must(ProbabilityToThreshold(0x1p-56))) + require.Equal(t, + Threshold{0, 0, 0, 0, 0, 0, 0xff}, + must(ProbabilityToThreshold(0x100p-56))) + require.Equal(t, + Threshold{0, 0, 0, 0, 0, 0, 0x01}, + must(ProbabilityToThreshold(0x1p-55))) + require.Equal(t, + Threshold{0, 0, 0, 0, 0, 0, 0x01}, + must(ProbabilityToThreshold(0x1p-55))) + require.Equal(t, + Threshold{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + must(ProbabilityToThreshold(1.0))) + + require.Equal(t, + Threshold{0x55, 0x53, 0xff, 0xff, 0xff, 0xff, 0xff}, + must(ProbabilityToThreshold(0x1.555p-2))) +} diff --git a/processor/probabilisticsamplerprocessor/config.go b/processor/probabilisticsamplerprocessor/config.go index 1e909ba40106..f68d4f5b17d6 100644 --- a/processor/probabilisticsamplerprocessor/config.go +++ b/processor/probabilisticsamplerprocessor/config.go @@ -16,6 +16,7 @@ package probabilisticsamplerprocessor // import "github.com/open-telemetry/opent import ( "fmt" + "math" "go.opentelemetry.io/collector/component" ) @@ -37,15 +38,23 @@ var validAttributeSource = map[AttributeSource]bool{ // Config has the configuration guiding the sampler processor. type Config struct { - // SamplingPercentage is the percentage rate at which traces or logs are going to be sampled. Defaults to zero, i.e.: no sample. - // Values greater or equal 100 are treated as "sample all traces/logs". + // SamplingPercentage is the percentage rate at which traces or logs are going to be sampled. Defaults to + // zero, i.e.: no sample. Values greater or equal 100 are treated as "sample all traces/logs". This is + // treated as having four significant figures when conveying the sampling probability. SamplingPercentage float32 `mapstructure:"sampling_percentage"` - // HashSeed allows one to configure the hashing seed. This is important in scenarios where multiple layers of collectors - // have different sampling rates: if they use the same seed all passing one layer may pass the other even if they have - // different sampling rates, configuring different seeds avoids that. + // HashSeed allows one to configure the legacy hashing seed. The current version of this protocol assumes + // that tracecontext v2 TraceIDs are being used, which ensures 7 bytes of randomness are available. We assume + // this is the case when HashSeed == 0. + // + // This is important in scenarios where multiple layers of collectors have different sampling rates: if they + // use the same seed all passing one layer may pass the other even if they have different sampling rates, + // configuring different seeds avoids that. HashSeed uint32 `mapstructure:"hash_seed"` + /////// + // Logs only fields below. + // AttributeSource (logs only) defines where to look for the attribute in from_attribute. The allowed values are // `traceID` or `record`. Default is `traceID`. AttributeSource `mapstructure:"attribute_source"` @@ -63,9 +72,19 @@ var _ component.Config = (*Config)(nil) // Validate checks if the processor configuration is valid func (cfg *Config) Validate() error { - if cfg.SamplingPercentage < 0 { - return fmt.Errorf("negative sampling rate: %.2f", cfg.SamplingPercentage) + ratio := float64(cfg.SamplingPercentage) / 100.0 + + switch { + case ratio < 0: + return fmt.Errorf("negative sampling rate: %.2f%%", cfg.SamplingPercentage) + case ratio == 0: + // Special case + case ratio < 0x1p-56: + return fmt.Errorf("sampling rate is too small: %.2f%%", cfg.SamplingPercentage) + case math.IsInf(ratio, 0) || math.IsNaN(ratio): + return fmt.Errorf("sampling rate is invalid: %.2f%%", cfg.SamplingPercentage) } + if cfg.AttributeSource != "" && !validAttributeSource[cfg.AttributeSource] { return fmt.Errorf("invalid attribute source: %v. Expected: %v or %v", cfg.AttributeSource, traceIDAttributeSource, recordAttributeSource) } diff --git a/processor/probabilisticsamplerprocessor/go.mod b/processor/probabilisticsamplerprocessor/go.mod index 7810f25ceb77..7fadfd269ee7 100644 --- a/processor/probabilisticsamplerprocessor/go.mod +++ b/processor/probabilisticsamplerprocessor/go.mod @@ -39,6 +39,7 @@ require ( github.com/mitchellh/reflectwalk v1.0.2 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling v0.0.0-00010101000000-000000000000 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect github.com/prometheus/client_golang v1.15.1 // indirect @@ -88,3 +89,5 @@ retract ( replace github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatautil => ../../pkg/pdatautil replace github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatatest => ../../pkg/pdatatest + +replace github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling => ../../pkg/sampling diff --git a/processor/probabilisticsamplerprocessor/logsprocessor.go b/processor/probabilisticsamplerprocessor/logsprocessor.go index a61abc3e96bb..a8c2b0870e3e 100644 --- a/processor/probabilisticsamplerprocessor/logsprocessor.go +++ b/processor/probabilisticsamplerprocessor/logsprocessor.go @@ -29,12 +29,12 @@ import ( ) type logSamplerProcessor struct { - scaledSamplingRate uint32 - hashSeed uint32 - traceIDEnabled bool - samplingSource string - samplingPriority string - logger *zap.Logger + hashScaledSamplingRate uint32 + hashSeed uint32 + traceIDEnabled bool + samplingSource string + samplingPriority string + logger *zap.Logger } // newLogsProcessor returns a processor.LogsProcessor that will perform head sampling according to the given @@ -42,12 +42,12 @@ type logSamplerProcessor struct { func newLogsProcessor(ctx context.Context, set processor.CreateSettings, nextConsumer consumer.Logs, cfg *Config) (processor.Logs, error) { lsp := &logSamplerProcessor{ - scaledSamplingRate: uint32(cfg.SamplingPercentage * percentageScaleFactor), - hashSeed: cfg.HashSeed, - traceIDEnabled: cfg.AttributeSource == traceIDAttributeSource, - samplingPriority: cfg.SamplingPriority, - samplingSource: cfg.FromAttribute, - logger: set.Logger, + hashScaledSamplingRate: uint32(cfg.SamplingPercentage * percentageScaleFactor), + hashSeed: cfg.HashSeed, + traceIDEnabled: cfg.AttributeSource == traceIDAttributeSource, + samplingPriority: cfg.SamplingPriority, + samplingSource: cfg.FromAttribute, + logger: set.Logger, } return processorhelper.NewLogsProcessor( @@ -78,7 +78,7 @@ func (lsp *logSamplerProcessor) processLogs(ctx context.Context, ld plog.Logs) ( lidBytes = value.Bytes().AsRaw() } } - priority := lsp.scaledSamplingRate + priority := lsp.hashScaledSamplingRate if lsp.samplingPriority != "" { if localPriority, ok := l.Attributes().Get(lsp.samplingPriority); ok { switch localPriority.Type() { diff --git a/processor/probabilisticsamplerprocessor/tracesprocessor.go b/processor/probabilisticsamplerprocessor/tracesprocessor.go index 1ea46d80201b..2e2a8590dcea 100644 --- a/processor/probabilisticsamplerprocessor/tracesprocessor.go +++ b/processor/probabilisticsamplerprocessor/tracesprocessor.go @@ -18,6 +18,8 @@ import ( "context" "strconv" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling" + "go.opencensus.io/stats" "go.opencensus.io/tag" "go.opentelemetry.io/collector/consumer" @@ -50,22 +52,53 @@ const ( numHashBuckets = 0x4000 // Using a power of 2 to avoid division. bitMaskHashBuckets = numHashBuckets - 1 percentageScaleFactor = numHashBuckets / 100.0 + + zeroTvalue = "t:0" ) type traceSamplerProcessor struct { - scaledSamplingRate uint32 - hashSeed uint32 - logger *zap.Logger + // Legacy hash-based calculation + hashScaledSamplingRate uint32 + hashSeed uint32 + + // Modern TraceID-randomness-based calculation + traceIDThreshold sampling.Threshold + tValueEncoding string + + logger *zap.Logger } // newTracesProcessor returns a processor.TracesProcessor that will perform head sampling according to the given // configuration. func newTracesProcessor(ctx context.Context, set processor.CreateSettings, cfg *Config, nextConsumer consumer.Traces) (processor.Traces, error) { tsp := &traceSamplerProcessor{ + logger: set.Logger, + } + // README allows percents >100 to equal 100%, but t-value + // encoding does not. Correct it here. + pct := float64(cfg.SamplingPercentage) + if pct > 100 { + pct = 100 + } + + if cfg.HashSeed != 0 { // Adjust sampling percentage on private so recalculations are avoided. - scaledSamplingRate: uint32(cfg.SamplingPercentage * percentageScaleFactor), - hashSeed: cfg.HashSeed, - logger: set.Logger, + tsp.hashScaledSamplingRate = uint32(pct * percentageScaleFactor) + tsp.hashSeed = cfg.HashSeed + } else { + // Encode t-value (OTEP 226), like %.4f. (See FormatFloat().) + ratio := pct / 100 + tval, err := sampling.ProbabilityToTvalue(ratio, 'f', 4) + if err != nil { + return nil, err + } + threshold, err := sampling.ProbabilityToThreshold(ratio) + if err != nil { + return nil, err + } + + tsp.tValueEncoding = tval + tsp.traceIDThreshold = threshold } return processorhelper.NewTracesProcessor( @@ -77,6 +110,21 @@ func newTracesProcessor(ctx context.Context, set processor.CreateSettings, cfg * processorhelper.WithCapabilities(consumer.Capabilities{MutatesData: true})) } +func (tsp *traceSamplerProcessor) probabilitySampleFromTraceID(input pcommon.TraceID) (sample, consistent bool) { + // When the hash seed is set, fall back to the legacy behavior + // using the FNV hash. + if tsp.hashSeed != 0 { + // If one assumes random trace ids hashing may seems avoidable, however, traces can be coming from sources + // with various different criteria to generate trace id and perhaps were already sampled without hashing. + // Hashing here prevents bias due to such systems. + return computeHash(input[:], tsp.hashSeed)&bitMaskHashBuckets < tsp.hashScaledSamplingRate, false + } + + // Hash seed zero => assume tracecontext v2 + + return tsp.traceIDThreshold.ShouldSample(input), true +} + func (tsp *traceSamplerProcessor) processTraces(ctx context.Context, td ptrace.Traces) (ptrace.Traces, error) { td.ResourceSpans().RemoveIf(func(rs ptrace.ResourceSpans) bool { rs.ScopeSpans().RemoveIf(func(ils ptrace.ScopeSpans) bool { @@ -94,24 +142,48 @@ func (tsp *traceSamplerProcessor) processTraces(ctx context.Context, td ptrace.T return true } - _ = stats.RecordWithTags( - ctx, - []tag.Mutator{tag.Upsert(tagPolicyKey, "sampling_priority"), tag.Upsert(tagSampledKey, "true")}, - statCountTracesSampled.M(int64(1)), - ) - - // If one assumes random trace ids hashing may seems avoidable, however, traces can be coming from sources - // with various different criteria to generate trace id and perhaps were already sampled without hashing. - // Hashing here prevents bias due to such systems. - tidBytes := s.TraceID() - sampled := sp == mustSampleSpan || - computeHash(tidBytes[:], tsp.hashSeed)&bitMaskHashBuckets < tsp.scaledSamplingRate - - _ = stats.RecordWithTags( - ctx, - []tag.Mutator{tag.Upsert(tagPolicyKey, "trace_id_hash"), tag.Upsert(tagSampledKey, strconv.FormatBool(sampled))}, - statCountTracesSampled.M(int64(1)), - ) + forceSample := sp == mustSampleSpan + + probSample, consistent := tsp.probabilitySampleFromTraceID(s.TraceID()) + + sampled := forceSample || probSample + + if forceSample { + _ = stats.RecordWithTags( + ctx, + []tag.Mutator{tag.Upsert(tagPolicyKey, "sampling_priority"), tag.Upsert(tagSampledKey, "true")}, + statCountTracesSampled.M(int64(1)), + ) + } else { + _ = stats.RecordWithTags( + ctx, + []tag.Mutator{tag.Upsert(tagPolicyKey, "trace_id_hash"), tag.Upsert(tagSampledKey, strconv.FormatBool(sampled))}, + statCountTracesSampled.M(int64(1)), + ) + } + + if consistent { + // Attach the t-value! + ts := s.TraceState() + + // Get the t-value encoding. + enc := tsp.tValueEncoding + if !probSample { + // forceSample is implied, use the zero value. + enc = zeroTvalue + } + + raw := ts.AsRaw() + if raw == "" { + // No incoming t-value, i.e., the simple case. + ts.FromRaw(enc) + } else { + // Complex case: combine t-values. + // TODO @@@ bring in code from + // https://github.com/open-telemetry/opentelemetry-go-contrib/tree/main/samplers/probability/consistent + } + } + return !sampled }) // Filter out empty ScopeMetrics From 1bc6017b33638ef1331623fb73826e2d0a79d94f Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Mon, 15 May 2023 16:28:33 -0700 Subject: [PATCH 02/38] copy/import tracestate parser package --- exporter/fileexporter/factory.go | 6 +- exporter/fileexporter/file_exporter.go | 34 ++- pkg/tracestate/tracestate.go | 190 ++++++++++++++++ pkg/tracestate/tracestate_test.go | 296 +++++++++++++++++++++++++ 4 files changed, 512 insertions(+), 14 deletions(-) create mode 100644 pkg/tracestate/tracestate.go create mode 100644 pkg/tracestate/tracestate_test.go diff --git a/exporter/fileexporter/factory.go b/exporter/fileexporter/factory.go index b5f821f40fde..41eba5cb4d7d 100644 --- a/exporter/fileexporter/factory.go +++ b/exporter/fileexporter/factory.go @@ -135,18 +135,20 @@ func createLogsExporter( } func newFileExporter(conf *Config, writer io.WriteCloser) *fileExporter { - return &fileExporter{ + e := &fileExporter{ path: conf.Path, formatType: conf.FormatType, file: writer, tracesMarshaler: tracesMarshalers[conf.FormatType], metricsMarshaler: metricsMarshalers[conf.FormatType], logsMarshaler: logsMarshalers[conf.FormatType], - exporter: buildExportFunc(conf), compression: conf.Compression, compressor: buildCompressor(conf.Compression), flushInterval: conf.FlushInterval, } + e.exporter = e.buildExportFunc(conf) + + return e } func buildFileWriter(cfg *Config) (io.WriteCloser, error) { diff --git a/exporter/fileexporter/file_exporter.go b/exporter/fileexporter/file_exporter.go index db3533c44d09..41bccff392ae 100644 --- a/exporter/fileexporter/file_exporter.go +++ b/exporter/fileexporter/file_exporter.go @@ -65,6 +65,14 @@ type fileExporter struct { stopTicker chan struct{} } +type binaryExporter struct { + *fileExporter +} + +type lineExporter struct { + *fileExporter +} + func (e *fileExporter) consumeTraces(_ context.Context, td ptrace.Traces) error { buf, err := e.tracesMarshaler.MarshalTraces(td) if err != nil { @@ -83,7 +91,7 @@ func (e *fileExporter) consumeMetrics(_ context.Context, md pmetric.Metrics) err return e.exporter(e, buf) } -func (e *fileExporter) consumeLogs(_ context.Context, ld plog.Logs) error { +func (e fileExporter) consumeLogs(_ context.Context, ld plog.Logs) error { buf, err := e.logsMarshaler.MarshalLogs(ld) if err != nil { return err @@ -92,20 +100,22 @@ func (e *fileExporter) consumeLogs(_ context.Context, ld plog.Logs) error { return e.exporter(e, buf) } -func exportMessageAsLine(e *fileExporter, buf []byte) error { +func (e lineExporter) Write(buf []byte) (int, error) { // Ensure only one write operation happens at a time. e.mutex.Lock() defer e.mutex.Unlock() - if _, err := e.file.Write(buf); err != nil { + n1, err := e.file.Write(buf) + if err != nil { return err } - if _, err := io.WriteString(e.file, "\n"); err != nil { + n2, err := io.WriteString(e.file, "\n") + if err != nil { return err } - return nil + return n1 + n2, nil } -func exportMessageAsBuffer(e *fileExporter, buf []byte) error { +func (e *binaryExporter) Write(buf []byte) (int, error) { // Ensure only one write operation happens at a time. e.mutex.Lock() defer e.mutex.Unlock() @@ -115,10 +125,10 @@ func exportMessageAsBuffer(e *fileExporter, buf []byte) error { binary.BigEndian.PutUint32(data, uint32(len(buf))) data = append(data, buf...) if err := binary.Write(e.file, binary.BigEndian, data); err != nil { - return err + return -1, err } - return nil + return len(data), nil } // startFlusher starts the flusher. @@ -172,13 +182,13 @@ func (e *fileExporter) Shutdown(context.Context) error { return e.file.Close() } -func buildExportFunc(cfg *Config) func(e *fileExporter, buf []byte) error { +func (e *fileExporter) buildExportFunc(cfg *Config) (io.Writer, error) { if cfg.FormatType == formatTypeProto { - return exportMessageAsBuffer + return binaryExporter{e} } // if the data format is JSON and needs to be compressed, telemetry data can't be written to file in JSON format. if cfg.FormatType == formatTypeJSON && cfg.Compression != "" { - return exportMessageAsBuffer + return binaryExporter{fileExporter: e}, nil } - return exportMessageAsLine + return lineExporter{fileExporter: e}, nil } diff --git a/pkg/tracestate/tracestate.go b/pkg/tracestate/tracestate.go new file mode 100644 index 000000000000..fd8a22430e96 --- /dev/null +++ b/pkg/tracestate/tracestate.go @@ -0,0 +1,190 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tracestate // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/tracestate" + +import ( + "fmt" + "strconv" + "strings" +) + +const ( + traceStateKey = "ot" + tValueSubkey = "t" + traceStateSizeLimit = 256 +) + +var ( + errTraceStateSyntax = fmt.Errorf("otel tracestate: %w", strconv.ErrSyntax) +) + +type otelTraceState struct { + tvalueString string + tvalueParsed float64 + unknown []string +} + +func newTraceState() otelTraceState { + return otelTraceState{ + tvalueString: "", // empty => !hasTValue(); includes "t:" prefix + } +} + +func (otts otelTraceState) serialize() string { + var sb strings.Builder + semi := func() { + if sb.Len() != 0 { + _, _ = sb.WriteString(";") + } + } + + if otts.hasTValue() { + _, _ = sb.WriteString(otts.tvalueString) + } + for _, unk := range otts.unknown { + ex := 0 + if sb.Len() != 0 { + ex = 1 + } + if sb.Len()+ex+len(unk) > traceStateSizeLimit { + // Note: should this generate an explicit error? + break + } + semi() + _, _ = sb.WriteString(unk) + } + return sb.String() +} + +func isValueByte(r byte) bool { + if isLCAlphaNum(r) { + return true + } + if isUCAlpha(r) { + return true + } + switch r { + case '.', '_', '-', '+': + return true + default: + return false + } +} + +func isLCAlphaNum(r byte) bool { + if isLCAlpha(r) { + return true + } + return r >= '0' && r <= '9' +} + +func isLCAlpha(r byte) bool { + return r >= 'a' && r <= 'z' +} + +func isUCAlpha(r byte) bool { + return r >= 'A' && r <= 'Z' +} + +func parseOTelTraceState(ts string) (otelTraceState, error) { // nolint: revive + var tval string + var unknown []string + + if len(ts) == 0 { + return newTraceState(), nil + } + + if len(ts) > traceStateSizeLimit { + return newTraceState(), errTraceStateSyntax + } + + for len(ts) > 0 { + eqPos := 0 + for ; eqPos < len(ts); eqPos++ { + if eqPos == 0 { + if isLCAlpha(ts[eqPos]) { + continue + } + } else if isLCAlphaNum(ts[eqPos]) { + continue + } + break + } + if eqPos == 0 || eqPos == len(ts) || ts[eqPos] != ':' { + return newTraceState(), errTraceStateSyntax + } + + key := ts[0:eqPos] + tail := ts[eqPos+1:] + + sepPos := 0 + + for ; sepPos < len(tail); sepPos++ { + if isValueByte(tail[sepPos]) { + continue + } + break + } + + if key == tValueSubkey { + tval = ts[0 : sepPos+eqPos+1] + } else { + unknown = append(unknown, ts[0:sepPos+eqPos+1]) + } + + if sepPos < len(tail) && tail[sepPos] != ';' { + return newTraceState(), errTraceStateSyntax + } + + if sepPos == len(tail) { + break + } + + ts = tail[sepPos+1:] + + // test for a trailing ; + if ts == "" { + return newTraceState(), errTraceStateSyntax + } + } + + // @@@ Use ../sampling + tv, err := strconv.ParseFloat(tval, 64) + if err != nil { + err = fmt.Errorf("otel tracestate t-value: %w", strconv.ErrSyntax) + } + switch { + case tv < 0: + + case tv == 0: + case tv < 0x1p-56: + case tv > 0x1p+56: + } + + otts := newTraceState() + otts.unknown = unknown + otts.tvalueString = tval + otts.tvalueParsed = tv + + return otts, nil +} + +func parseError(key string, err error) error { + return fmt.Errorf("otel tracestate: %s-value %w", key, err) +} + +func (otts otelTraceState) hasTValue() bool { + return otts.tvalueString != "" +} diff --git a/pkg/tracestate/tracestate_test.go b/pkg/tracestate/tracestate_test.go new file mode 100644 index 000000000000..55bae10e3648 --- /dev/null +++ b/pkg/tracestate/tracestate_test.go @@ -0,0 +1,296 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tracestate + +import ( + "errors" + "strconv" + "strings" + "testing" + + "github.com/stretchr/testify/require" +) + +func testName(in string) string { + x := strings.NewReplacer(":", "_", ";", "_").Replace(in) + if len(x) > 32 { + return "" + } + return x +} + +func TestNewTraceState(t *testing.T) { + otts := newTraceState() + require.False(t, otts.hasTValue()) + require.Equal(t, "", otts.serialize()) +} + +func TestTraceStatePRValueSerialize(t *testing.T) { + otts := newTraceState() + otts.tvalueString = "t:3" + otts.unknown = []string{"a:b", "c:d"} + require.True(t, otts.hasTValue()) + require.Equal(t, "t:3;a:b;c:d", otts.serialize()) +} + +func TestTraceStateSerializeOverflow(t *testing.T) { + long := "x:" + strings.Repeat(".", 254) + otts := newTraceState() + otts.unknown = []string{long} + // this drops the extra key, sorry! + require.Equal(t, long, otts.serialize()) + otts.tvalueString = "t:1" + require.Equal(t, "t:1", otts.serialize()) +} + +// func TestParseTraceStateForTraceID(t *testing.T) { +// type testCase struct { +// in string +// rval uint8 +// expectErr error +// } +// const notset = 255 +// for _, test := range []testCase{ +// // All are unsampled tests, i.e., `sampled` is not set in traceparent. +// {"r:2", 2, nil}, +// {"r:1;", notset, strconv.ErrSyntax}, +// {"r:1", 1, nil}, +// {"r:1=p:2", notset, strconv.ErrSyntax}, +// {"r:1;p:2=s:3", notset, strconv.ErrSyntax}, +// {":1;p:2=s:3", notset, strconv.ErrSyntax}, +// {":;p:2=s:3", notset, strconv.ErrSyntax}, +// {":;:", notset, strconv.ErrSyntax}, +// {":", notset, strconv.ErrSyntax}, +// {"", notset, nil}, +// {"r:;p=1", notset, strconv.ErrSyntax}, +// {"r:1", 1, nil}, +// {"r:10", 10, nil}, +// {"r:33", 33, nil}, +// {"r:61", 61, nil}, +// {"r:62", 62, nil}, // max r-value +// {"r:63", notset, strconv.ErrRange}, // out-of-range +// {"r:100", notset, strconv.ErrRange}, // out-of-range +// {"r:100001", notset, strconv.ErrRange}, // out-of-range +// {"p:64", notset, strconv.ErrRange}, +// {"p:100", notset, strconv.ErrRange}, +// {"r:1a", notset, strconv.ErrSyntax}, // not-hexadecimal +// {"p:-1", notset, strconv.ErrSyntax}, // non-negative +// } { +// t.Run(testName(test.in), func(t *testing.T) { +// // Note: passing isSampled=false as stated above. +// ts := pcommon.NewTraceState(test.in) +// otts, err := parseOTelTraceState(ts, false) + +// require.False(t, otts.hasTValue(), "should have no p-value") + +// if test.expectErr != nil { +// require.True(t, errors.Is(err, test.expectErr), "not expecting %v", err) +// } +// if test.rval != notset { +// require.True(t, otts.hasRValue()) +// require.Equal(t, test.rval, otts.rvalue) +// } else { +// require.False(t, otts.hasRValue(), "should have no r-value") +// } +// require.EqualValues(t, []string(nil), otts.unknown) + +// if test.expectErr == nil { +// // Require serialize to round-trip +// otts2, err := parseOTelTraceState(otts.serialize(), false) +// require.NoError(t, err) +// require.Equal(t, otts, otts2) +// } +// }) +// } +// } + +// func TestParseTraceStateSampled(t *testing.T) { +// type testCase struct { +// in string +// rval, pval uint8 +// expectErr error +// } +// const notset = 255 +// for _, test := range []testCase{ +// // All are sampled tests, i.e., `sampled` is set in traceparent. +// {"r:2;p:2", 2, 2, nil}, +// {"r:2;p:1", 2, 1, nil}, +// {"r:2;p:0", 2, 0, nil}, + +// {"r:1;p:1", 1, 1, nil}, +// {"r:1;p:0", 1, 0, nil}, + +// {"r:0;p:0", 0, 0, nil}, + +// {"r:62;p:0", 62, 0, nil}, +// {"r:62;p:62", 62, 62, nil}, + +// // The important special case: +// {"r:0;p:63", 0, 63, nil}, +// {"r:2;p:63", 2, 63, nil}, +// {"r:62;p:63", 62, 63, nil}, + +// // Inconsistent p causes unset p-value. +// {"r:2;p:3", 2, notset, errTraceStateInconsistent}, +// {"r:2;p:4", 2, notset, errTraceStateInconsistent}, +// {"r:2;p:62", 2, notset, errTraceStateInconsistent}, +// {"r:0;p:1", 0, notset, errTraceStateInconsistent}, +// {"r:1;p:2", 1, notset, errTraceStateInconsistent}, +// {"r:61;p:62", 61, notset, errTraceStateInconsistent}, + +// // Inconsistent r causes unset p-value and r-value. +// {"r:63;p:2", notset, notset, strconv.ErrRange}, +// {"r:120;p:2", notset, notset, strconv.ErrRange}, +// {"r:ab;p:2", notset, notset, strconv.ErrSyntax}, + +// // Syntax is tested before range errors +// {"r:ab;p:77", notset, notset, strconv.ErrSyntax}, + +// // p without r (when sampled) +// {"p:1", notset, 1, nil}, +// {"p:62", notset, 62, nil}, +// {"p:63", notset, 63, nil}, + +// // r without p (when sampled) +// {"r:2", 2, notset, nil}, +// {"r:62", 62, notset, nil}, +// {"r:0", 0, notset, nil}, +// } { +// t.Run(testName(test.in), func(t *testing.T) { +// // Note: passing isSampled=true as stated above. +// otts, err := parseOTelTraceState(test.in, true) + +// if test.expectErr != nil { +// require.True(t, errors.Is(err, test.expectErr), "not expecting %v", err) +// } else { +// require.NoError(t, err) +// } +// if test.pval != notset { +// require.True(t, otts.hasTValue()) +// require.Equal(t, test.pval, otts.pvalue) +// } else { +// require.False(t, otts.hasTValue(), "should have no p-value") +// } +// if test.rval != notset { +// require.True(t, otts.hasRValue()) +// require.Equal(t, test.rval, otts.rvalue) +// } else { +// require.False(t, otts.hasRValue(), "should have no r-value") +// } +// require.EqualValues(t, []string(nil), otts.unknown) + +// if test.expectErr == nil { +// // Require serialize to round-trip +// otts2, err := parseOTelTraceState(otts.serialize(), true) +// require.NoError(t, err) +// require.Equal(t, otts, otts2) +// } +// }) +// } +// } + +func TestParseTraceStateExtra(t *testing.T) { + type testCase struct { + in string + tval string + extra []string + expectErr error + } + const notset = "" + for _, test := range []testCase{ + {"t:2", "2", nil, nil}, + {"t:1;", notset, nil, strconv.ErrSyntax}, + {"t:1", "1", nil, nil}, + {"t:1=p:2", notset, nil, strconv.ErrSyntax}, + {"t:1;p:2=s:3", notset, nil, strconv.ErrSyntax}, + {":1;p:2=s:3", notset, nil, strconv.ErrSyntax}, + {":;p:2=s:3", notset, nil, strconv.ErrSyntax}, + {":;:", notset, nil, strconv.ErrSyntax}, + {":", notset, nil, strconv.ErrSyntax}, + {"", notset, nil, nil}, + {"t:;p=1", notset, nil, strconv.ErrSyntax}, + {"t:1", "1", nil, nil}, + {"t:10", "10", nil, nil}, + {"t:33", "33", nil, nil}, + {"t:61", "61", nil, nil}, + {"t:72057594037927936", "72057594037927936", nil, nil}, // max t-value = 0x1p+56 + {"t:0x1p-56", "0x1p-56", nil, nil}, // min t-value + + {"t:0x1p+57", notset, nil, strconv.ErrRange}, // out-of-range + {"t:72057594037927937", notset, nil, strconv.ErrRange}, // out-of-range + {"t:$", notset, nil, strconv.ErrSyntax}, // not-hexadecimal + {"p:-1", notset, nil, strconv.ErrSyntax}, // non-negative + + // one field + {"e100:1", notset, []string{"e100:1"}, nil}, + + // two fields + {"e1:1;e2:2", notset, []string{"e1:1", "e2:2"}, nil}, + {"e1:1;e2:2", notset, []string{"e1:1", "e2:2"}, nil}, + + // one extra key, two ways + {"t:2;extra:stuff", "2", []string{"extra:stuff"}, nil}, + {"extra:stuff;t:2", "2", []string{"extra:stuff"}, nil}, + + // two extra fields + {"e100:100;t:1;e101:101", "1", []string{"e100:100", "e101:101"}, nil}, + {"t:1;e100:100;e101:101", "1", []string{"e100:100", "e101:101"}, nil}, + {"e100:100;e101:101;t:1", "1", []string{"e100:100", "e101:101"}, nil}, + + // parse error prevents capturing unrecognized keys + {"1:1;u:V", notset, nil, strconv.ErrSyntax}, + {"X:1;u:V", notset, nil, strconv.ErrSyntax}, + {"x:1;u:V", notset, []string{"x:1", "u:V"}, nil}, + + // no trailing ; + {"x:1;", notset, nil, strconv.ErrSyntax}, + + // empty key + {"x:", notset, []string{"x:"}, nil}, + + // charset test + {"x:0X1FFF;y:.-_-.;z:", notset, []string{"x:0X1FFF", "y:.-_-.", "z:"}, nil}, + {"x1y2z3:1-2-3;y1:y_1;xy:-;t:50", "50", []string{"x1y2z3:1-2-3", "y1:y_1", "xy:-"}, nil}, + + // size exceeded + {"x:" + strings.Repeat("_", 255), notset, nil, strconv.ErrSyntax}, + {"x:" + strings.Repeat("_", 254), notset, []string{"x:" + strings.Repeat("_", 254)}, nil}, + } { + t.Run(testName(test.in), func(t *testing.T) { + // Note: These tests are independent of sampling state, + // so both are tested. + otts, err := parseOTelTraceState(test.in) + + if test.expectErr != nil { + require.True(t, errors.Is(err, test.expectErr), "not expecting %v", err) + } else { + require.NoError(t, err) + } + if test.tval != notset { + require.True(t, otts.hasTValue()) + require.Equal(t, "t:"+test.tval, otts.tvalueString) + } else { + + require.False(t, otts.hasTValue(), "should have no t-value") + } + require.EqualValues(t, test.extra, otts.unknown) + + // on success w/o t-value, serialize() should not modify + if !otts.hasTValue() && test.expectErr == nil { + require.Equal(t, test.in, otts.serialize()) + } + }) + } +} From d1fd891c84930325779aec5f005cfa893e7cbe7c Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Tue, 16 May 2023 12:27:41 -0700 Subject: [PATCH 03/38] test ot tracestate --- pkg/sampling/go.mod | 15 +- pkg/sampling/go.sum | 129 ++++++++++++++++++ pkg/sampling/tail.go | 50 +++---- pkg/sampling/tail_test.go | 102 +++++++++++--- pkg/{tracestate => sampling}/tracestate.go | 37 ++--- .../tracestate_test.go | 14 +- 6 files changed, 281 insertions(+), 66 deletions(-) rename pkg/{tracestate => sampling}/tracestate.go (84%) rename pkg/{tracestate => sampling}/tracestate_test.go (95%) diff --git a/pkg/sampling/go.mod b/pkg/sampling/go.mod index c69f61318218..e3e9f6c112a3 100644 --- a/pkg/sampling/go.mod +++ b/pkg/sampling/go.mod @@ -2,10 +2,23 @@ module github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling go 1.20 -require github.com/stretchr/testify v1.8.2 +require ( + github.com/stretchr/testify v1.8.2 + go.opentelemetry.io/collector/pdata v1.0.0-rcv0011 +) require ( github.com/davecgh/go-spew v1.1.1 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/golang/protobuf v1.5.2 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect + go.uber.org/atomic v1.7.0 // indirect + go.uber.org/multierr v1.11.0 // indirect + golang.org/x/net v0.9.0 // indirect + golang.org/x/sys v0.7.0 // indirect + golang.org/x/text v0.9.0 // indirect + google.golang.org/genproto v0.0.0-20230110181048-76db0878b65f // indirect + google.golang.org/grpc v1.54.0 // indirect + google.golang.org/protobuf v1.30.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/pkg/sampling/go.sum b/pkg/sampling/go.sum index 6a56e69bb33d..dfcf22cd4962 100644 --- a/pkg/sampling/go.sum +++ b/pkg/sampling/go.sum @@ -1,17 +1,146 @@ +cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= +github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= +github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= +github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= +github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= +github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= +github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw= +github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= +github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.2 h1:+h33VjcLVPDHtOdpUCuF+7gSuG3yGIftsP1YvFihtJ8= github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +go.opentelemetry.io/collector/pdata v0.66.0 h1:UdE5U6MsDNzuiWaXdjGx2lC3ElVqWmN/hiUE8vyvSuM= +go.opentelemetry.io/collector/pdata v0.66.0/go.mod h1:pqyaznLzk21m+1KL6fwOsRryRELL+zNM0qiVSn0MbVc= +go.opentelemetry.io/collector/pdata v1.0.0-rcv0011 h1:7lT0vseP89mHtUpvgmWYRvQZ0eY+SHbVsnXY20xkoMg= +go.opentelemetry.io/collector/pdata v1.0.0-rcv0011/go.mod h1:9vrXSQBeMRrdfGt9oMgYweqERJ8adaiQjN6LSbqRMMA= +go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw= +go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= +go.uber.org/multierr v1.8.0 h1:dg6GjLku4EH+249NNmoIciG9N/jURbDG+pFlTkhzIC8= +go.uber.org/multierr v1.8.0/go.mod h1:7EAYxJLBy9rStEaz58O2t4Uvip6FSURkq8/ppBp95ak= +go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= +golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b h1:PxfKdU9lEEDYjdIzOtC4qFWgkU2rGHdKlKowJSMN9h0= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.9.0 h1:aWJ/m6xSmxWBx+V0XRHTlrYrPG56jKsLdTFmsSsCzOM= +golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns= +golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= +golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.2.0 h1:ljd4t30dBnAvMZaQCevtY0xLLD0A+bRZXbgLMLU1F/A= +golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU= +golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.4.0 h1:BrVqGRd7+k1DiOgtnFvAkoQEWQvBc25ouMJM6429SFg= +golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE= +golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= +golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= +google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= +google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013 h1:+kGHl1aib/qcwaRi1CbqBZ1rk19r85MNUf8HaBghugY= +google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= +google.golang.org/genproto v0.0.0-20230110181048-76db0878b65f h1:BWUVssLB0HVOSY78gIdvk1dTVYtT1y8SBWtPYuTJ/6w= +google.golang.org/genproto v0.0.0-20230110181048-76db0878b65f/go.mod h1:RGgjbofJ8xD9Sq1VVhDM1Vok1vRONV+rg+CjzG4SZKM= +google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= +google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= +google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.51.0 h1:E1eGv1FTqoLIdnBCZufiSHgKjlqG6fKFf6pPWtMTh8U= +google.golang.org/grpc v1.51.0/go.mod h1:wgNDFcnuBGmxLKI/qn4T+m5BtEBYXJPvibbUPsAIPww= +google.golang.org/grpc v1.54.0 h1:EhTqbhiYeixwWQtAEZAxmV9MGqcjEU2mFx52xCzNyag= +google.golang.org/grpc v1.54.0/go.mod h1:PUSEXI6iWghWaB6lXM4knEgpJNu2qUcKfDtNci3EC2g= +google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= +google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= +google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= +google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= +google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= +google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.28.1 h1:d0NfwRgPtno5B1Wa6L2DAG+KivqkdutMf1UhdNx175w= +google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng= +google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= diff --git a/pkg/sampling/tail.go b/pkg/sampling/tail.go index 3c9a0de33430..c4b251794b5c 100644 --- a/pkg/sampling/tail.go +++ b/pkg/sampling/tail.go @@ -15,7 +15,6 @@ package sampling // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling" import ( - "bytes" "encoding/binary" "fmt" "strconv" @@ -23,20 +22,26 @@ import ( "go.opentelemetry.io/collector/pdata/pcommon" ) +// Threshold is an opaque type used to compare with the least-significant 7 bytes of the TraceID. +type Threshold struct { + // limit in range [1, 0x1p+56] + limit uint64 +} + const ( - MinSamplingProb = 0x1p-56 - MaxAdjustedCount int64 = 0x1p+56 // i.e., 1 / MinSamplingProb + MinSamplingProb = 0x1p-56 + MaxAdjustedCount = 0x1p+56 // i.e., 1 / MinSamplingProb + + LeastHalfTraceIDThresholdMask = MaxAdjustedCount - 1 ) var ( ErrPrecisionRange = fmt.Errorf("sampling precision out of range (-1 <= valid <= 14)") ErrProbabilityRange = fmt.Errorf("sampling probability out of range (0x1p-56 <= valid <= 1)") ErrAdjustedCountRange = fmt.Errorf("sampling adjusted count out of range (1 <= valid <= 0x1p+56)") - ErrAdjustedCountOnlyInteger = fmt.Errorf("sampling adjusted count out of range (1 <= valid <= 0x1p+56)") + ErrAdjustedCountOnlyInteger = fmt.Errorf("sampling adjusted count must be an integer") ) -type Threshold [7]byte - func probabilityInRange(prob float64) bool { return prob <= 1 && prob >= MinSamplingProb } @@ -90,15 +95,17 @@ func TvalueToProbabilityAndAdjustedCount(s string) (float64, float64, error) { case number == 0: case number < MinSamplingProb: - return 0, 0, ErrAdjustedCountRange - case number > float64(MaxAdjustedCount): - return 0, 0, ErrAdjustedCountRange - case number >= 1: - // It's an integer adjusted count; re-parse as an integer. + return 0, 0, ErrProbabilityRange + case number > 1: + // Greater than 1 indicates adjusted count; re-parse + // as a decimal integer. integer, err := strconv.ParseInt(s, 10, 64) if err != nil { return 0, 0, ErrAdjustedCountOnlyInteger } + if integer > MaxAdjustedCount { + return 0, 0, ErrAdjustedCountRange + } adjusted = float64(integer) number = 1 / adjusted default: @@ -108,25 +115,20 @@ func TvalueToProbabilityAndAdjustedCount(s string) (float64, float64, error) { return number, adjusted, nil } -func ProbabilityToThreshold(prob float64) (t Threshold, _ error) { +func ProbabilityToThreshold(prob float64) (Threshold, error) { if !probabilityInRange(prob) { - return t, ErrProbabilityRange + return Threshold{}, ErrProbabilityRange } - - unsigned := uint64(prob * 0x1p+56) - var bytes [8]byte - binary.BigEndian.PutUint64(bytes[:], unsigned) - copy(t[:], bytes[1:]) - return t, nil + return Threshold{ + limit: uint64(prob * 0x1p+56), + }, nil } func ThresholdToProbability(t Threshold) float64 { - var eight [8]byte - copy(eight[1:8], t[:]) - b56 := binary.BigEndian.Uint64(eight[:]) - return float64(b56) / 0x1p56 + return float64(t.limit) / MaxAdjustedCount } func (t Threshold) ShouldSample(id pcommon.TraceID) bool { - return bytes.Compare(id[9:16], t[:]) < 0 + value := binary.BigEndian.Uint64(id[8:]) & LeastHalfTraceIDThresholdMask + return value < t.limit } diff --git a/pkg/sampling/tail_test.go b/pkg/sampling/tail_test.go index c8c58f9fef37..cfdddaf9f24d 100644 --- a/pkg/sampling/tail_test.go +++ b/pkg/sampling/tail_test.go @@ -15,11 +15,15 @@ package sampling import ( + "bytes" + "encoding/binary" "fmt" "math" + "math/rand" "testing" "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/pdata/pcommon" ) func must[T any](t T, err error) T { @@ -53,23 +57,24 @@ func TestInvalidAdjustedCountToTvalue(t *testing.T) { } func TestValidProbabilityToTvalue(t *testing.T) { - require.Equal(t, "0x1p-01", must(ProbabilityToTvalue(0.5, -1))) - require.Equal(t, "0x1p-56", must(ProbabilityToTvalue(0x1p-56, -1))) - require.Equal(t, "0x1.555p-02", must(ProbabilityToTvalue(1/3., 3))) + require.Equal(t, "0x1p-01", must(ProbabilityToTvalue(0.5, 'x', -1))) + require.Equal(t, "0x1p-56", must(ProbabilityToTvalue(0x1p-56, 'x', -1))) + require.Equal(t, "0x1.555p-02", must(ProbabilityToTvalue(1/3., 'x', 3))) + require.Equal(t, "0", must(ProbabilityToTvalue(0, 'x', 3))) } func TestInvalidProbabilityToTvalue(t *testing.T) { // Too small - require.Error(t, mustNot(ProbabilityToTvalue(0x1p-57, -1))) - require.Error(t, mustNot(ProbabilityToTvalue(0x1p-57, 0))) + require.Error(t, mustNot(ProbabilityToTvalue(0x1p-57, 'x', -1))) + require.Error(t, mustNot(ProbabilityToTvalue(0x1p-57, 'x', 0))) // Too big - require.Error(t, mustNot(ProbabilityToTvalue(1.1, -1))) - require.Error(t, mustNot(ProbabilityToTvalue(1.1, 0))) + require.Error(t, mustNot(ProbabilityToTvalue(1.1, 'x', -1))) + require.Error(t, mustNot(ProbabilityToTvalue(1.1, 'x', 0))) // Bad precision - require.Error(t, mustNot(ProbabilityToTvalue(0.5, -3))) - require.Error(t, mustNot(ProbabilityToTvalue(0.5, 15))) + require.Error(t, mustNot(ProbabilityToTvalue(0.5, 'x', -3))) + require.Error(t, mustNot(ProbabilityToTvalue(0.5, 'x', 15))) } func testTValueToProb(tv string) (float64, error) { @@ -106,25 +111,86 @@ func TestTvalueToAdjCount(t *testing.T) { func TestProbabilityToThreshold(t *testing.T) { require.Equal(t, - Threshold{0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + Threshold{0x1p+55}, must(ProbabilityToThreshold(0.5))) require.Equal(t, - Threshold{0, 0, 0, 0, 0, 0, 0}, + Threshold{1}, must(ProbabilityToThreshold(0x1p-56))) require.Equal(t, - Threshold{0, 0, 0, 0, 0, 0, 0xff}, + Threshold{0x100}, must(ProbabilityToThreshold(0x100p-56))) require.Equal(t, - Threshold{0, 0, 0, 0, 0, 0, 0x01}, + Threshold{2}, must(ProbabilityToThreshold(0x1p-55))) require.Equal(t, - Threshold{0, 0, 0, 0, 0, 0, 0x01}, - must(ProbabilityToThreshold(0x1p-55))) - require.Equal(t, - Threshold{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + Threshold{MaxAdjustedCount}, must(ProbabilityToThreshold(1.0))) require.Equal(t, - Threshold{0x55, 0x53, 0xff, 0xff, 0xff, 0xff, 0xff}, + Threshold{0x1.555p-2 * MaxAdjustedCount}, must(ProbabilityToThreshold(0x1.555p-2))) } + +// The two benchmarks below were used to choose the implementation for +// the Threshold type in this package. The results indicate that it +// is faster to compare a 56-bit number than to compare as 7 element []byte. + +type benchTIDs [1024]pcommon.TraceID + +func (tids *benchTIDs) init() { + for i := range tids { + binary.BigEndian.PutUint64(tids[i][:8], rand.Uint64()) + binary.BigEndian.PutUint64(tids[i][8:], rand.Uint64()) + } +} + +// BenchmarkThresholdCompareAsUint64-10 1000000000 0.4515 ns/op 0 B/op 0 allocs/op +func BenchmarkThresholdCompareAsUint64(b *testing.B) { + var tids benchTIDs + var comps [1024]uint64 + tids.init() + for i := range comps { + comps[i] = (rand.Uint64() % 0x1p+56) + 1 + } + + b.ReportAllocs() + b.ResetTimer() + yes := 0 + no := 0 + for i := 0; i < b.N; i++ { + tid := tids[i%len(tids)] + comp := comps[i%len(comps)] + // Read 8 bytes, mask to 7 bytes + val := binary.BigEndian.Uint64(tid[8:]) & (0x1p+56 - 1) + + if val < comp { + yes++ + } else { + no++ + } + } +} + +// BenchmarkThresholdCompareAsBytes-10 528679580 2.288 ns/op 0 B/op 0 allocs/op +func BenchmarkThresholdCompareAsBytes(b *testing.B) { + var tids benchTIDs + var comps [1024][7]byte + tids.init() + for i := range comps { + var e8 [8]byte + binary.BigEndian.PutUint64(e8[:], rand.Uint64()) + copy(comps[i][:], e8[1:]) + } + + b.ReportAllocs() + b.ResetTimer() + yes := 0 + no := 0 + for i := 0; i < b.N; i++ { + if bytes.Compare(tids[i%len(tids)][9:], comps[i%len(comps)][:]) <= 0 { + yes++ + } else { + no++ + } + } +} diff --git a/pkg/tracestate/tracestate.go b/pkg/sampling/tracestate.go similarity index 84% rename from pkg/tracestate/tracestate.go rename to pkg/sampling/tracestate.go index fd8a22430e96..17dc30214f7f 100644 --- a/pkg/tracestate/tracestate.go +++ b/pkg/sampling/tracestate.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package tracestate // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/tracestate" +package sampling // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling" import ( "fmt" @@ -32,7 +32,7 @@ var ( type otelTraceState struct { tvalueString string - tvalueParsed float64 + tvalueParsed Threshold unknown []string } @@ -138,6 +138,7 @@ func parseOTelTraceState(ts string) (otelTraceState, error) { // nolint: revive break } + // Here, handle recognized fields. if key == tValueSubkey { tval = ts[0 : sepPos+eqPos+1] } else { @@ -160,24 +161,26 @@ func parseOTelTraceState(ts string) (otelTraceState, error) { // nolint: revive } } - // @@@ Use ../sampling - tv, err := strconv.ParseFloat(tval, 64) - if err != nil { - err = fmt.Errorf("otel tracestate t-value: %w", strconv.ErrSyntax) - } - switch { - case tv < 0: - - case tv == 0: - case tv < 0x1p-56: - case tv > 0x1p+56: - } - otts := newTraceState() otts.unknown = unknown - otts.tvalueString = tval - otts.tvalueParsed = tv + if tval != "" { + if len(tval) == 1 { + return otts, fmt.Errorf("otel tracestate t-value: %w", strconv.ErrSyntax) + } + prob, _, err := TvalueToProbabilityAndAdjustedCount(tval[2:]) + if err != nil { + return otts, fmt.Errorf("otel tracestate t-value: %w", err) + } + + th, err := ProbabilityToThreshold(prob) + if err != nil { + return otts, fmt.Errorf("otel tracestate t-value: %w", err) + } + + otts.tvalueString = tval + otts.tvalueParsed = th + } return otts, nil } diff --git a/pkg/tracestate/tracestate_test.go b/pkg/sampling/tracestate_test.go similarity index 95% rename from pkg/tracestate/tracestate_test.go rename to pkg/sampling/tracestate_test.go index 55bae10e3648..237b79516490 100644 --- a/pkg/tracestate/tracestate_test.go +++ b/pkg/sampling/tracestate_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package tracestate +package sampling import ( "errors" @@ -210,6 +210,7 @@ func TestParseTraceStateExtra(t *testing.T) { } const notset = "" for _, test := range []testCase{ + // {"t:2", "2", nil, nil}, {"t:1;", notset, nil, strconv.ErrSyntax}, {"t:1", "1", nil, nil}, @@ -228,10 +229,11 @@ func TestParseTraceStateExtra(t *testing.T) { {"t:72057594037927936", "72057594037927936", nil, nil}, // max t-value = 0x1p+56 {"t:0x1p-56", "0x1p-56", nil, nil}, // min t-value - {"t:0x1p+57", notset, nil, strconv.ErrRange}, // out-of-range - {"t:72057594037927937", notset, nil, strconv.ErrRange}, // out-of-range - {"t:$", notset, nil, strconv.ErrSyntax}, // not-hexadecimal - {"p:-1", notset, nil, strconv.ErrSyntax}, // non-negative + // various errors + {"t:0x1p+57", notset, nil, ErrAdjustedCountOnlyInteger}, // integer syntax + {"t:72057594037927937", notset, nil, ErrAdjustedCountRange}, // out-of-range + {"t:$", notset, nil, strconv.ErrSyntax}, // not-hexadecimal + {"t:-1", notset, nil, ErrProbabilityRange}, // non-negative // one field {"e100:1", notset, []string{"e100:1"}, nil}, @@ -274,7 +276,7 @@ func TestParseTraceStateExtra(t *testing.T) { otts, err := parseOTelTraceState(test.in) if test.expectErr != nil { - require.True(t, errors.Is(err, test.expectErr), "not expecting %v", err) + require.True(t, errors.Is(err, test.expectErr), "%q: not expecting %v wanted %v", test.in, err, test.expectErr) } else { require.NoError(t, err) } From 85e447257ce96810aedf5cd92f9c431cf57fcebf Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Tue, 16 May 2023 12:46:15 -0700 Subject: [PATCH 04/38] tidy --- pkg/sampling/tail.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/sampling/tail.go b/pkg/sampling/tail.go index c4b251794b5c..5c2cca04403f 100644 --- a/pkg/sampling/tail.go +++ b/pkg/sampling/tail.go @@ -120,15 +120,15 @@ func ProbabilityToThreshold(prob float64) (Threshold, error) { return Threshold{}, ErrProbabilityRange } return Threshold{ - limit: uint64(prob * 0x1p+56), + limit: uint64(prob * MaxAdjustedCount), }, nil } -func ThresholdToProbability(t Threshold) float64 { - return float64(t.limit) / MaxAdjustedCount -} - func (t Threshold) ShouldSample(id pcommon.TraceID) bool { value := binary.BigEndian.Uint64(id[8:]) & LeastHalfTraceIDThresholdMask return value < t.limit } + +func (t Threshold) Probability() float64 { + return float64(t.limit) / MaxAdjustedCount +} From bb75f8aebfb68d6e912ecb806d00b24cc839973b Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Tue, 16 May 2023 12:49:08 -0700 Subject: [PATCH 05/38] renames --- pkg/sampling/{tracestate.go => oteltracestate.go} | 0 pkg/sampling/{tracestate_test.go => oteltracestate_test.go} | 0 pkg/sampling/{tail.go => tvalue.go} | 0 pkg/sampling/{tail_test.go => tvalue_test.go} | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename pkg/sampling/{tracestate.go => oteltracestate.go} (100%) rename pkg/sampling/{tracestate_test.go => oteltracestate_test.go} (100%) rename pkg/sampling/{tail.go => tvalue.go} (100%) rename pkg/sampling/{tail_test.go => tvalue_test.go} (100%) diff --git a/pkg/sampling/tracestate.go b/pkg/sampling/oteltracestate.go similarity index 100% rename from pkg/sampling/tracestate.go rename to pkg/sampling/oteltracestate.go diff --git a/pkg/sampling/tracestate_test.go b/pkg/sampling/oteltracestate_test.go similarity index 100% rename from pkg/sampling/tracestate_test.go rename to pkg/sampling/oteltracestate_test.go diff --git a/pkg/sampling/tail.go b/pkg/sampling/tvalue.go similarity index 100% rename from pkg/sampling/tail.go rename to pkg/sampling/tvalue.go diff --git a/pkg/sampling/tail_test.go b/pkg/sampling/tvalue_test.go similarity index 100% rename from pkg/sampling/tail_test.go rename to pkg/sampling/tvalue_test.go From 6a57b77308c551d5bfbb735114da9beaa42ab75d Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Tue, 16 May 2023 17:28:58 -0700 Subject: [PATCH 06/38] testing two parsers w/ generic code --- pkg/sampling/anytracestate.go | 180 +++++++++++++++++ pkg/sampling/oteltracestate.go | 172 +++------------- pkg/sampling/oteltracestate_test.go | 191 ++---------------- pkg/sampling/w3ctracestate.go | 66 ++++++ pkg/sampling/w3ctracestate_test.go | 58 ++++++ .../probabilisticsamplerprocessor/config.go | 3 + 6 files changed, 354 insertions(+), 316 deletions(-) create mode 100644 pkg/sampling/anytracestate.go create mode 100644 pkg/sampling/w3ctracestate.go create mode 100644 pkg/sampling/w3ctracestate_test.go diff --git a/pkg/sampling/anytracestate.go b/pkg/sampling/anytracestate.go new file mode 100644 index 000000000000..777d2370f18d --- /dev/null +++ b/pkg/sampling/anytracestate.go @@ -0,0 +1,180 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package sampling // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling" + +import ( + "fmt" + "strconv" + "strings" +) + +const ( + traceStateSizeLimit = 256 +) + +var ( + errTraceStateSyntax = fmt.Errorf("otel tracestate: %w", strconv.ErrSyntax) +) + +type anyTraceStateParser[Instance any] interface { + parseField(instance *Instance, key, input string) error +} + +type baseTraceState struct { + fields []string +} + +type baseTraceStateParser struct { +} + +func (bp baseTraceStateParser) parseField(instance *baseTraceState, _, input string) error { + instance.fields = append(instance.fields, input) + return nil +} + +type anyTraceStateSyntax[Instance any, Parser anyTraceStateParser[Instance]] struct { + separator byte + equality byte + valid string +} + +func (a *anyTraceStateSyntax[Instance, Parser]) serialize(base *baseTraceState, sb *strings.Builder) { + for _, field := range base.fields { + ex := 0 + if sb.Len() != 0 { + ex = 1 + } + if sb.Len()+ex+len(field) > traceStateSizeLimit { + // Note: should this generate an explicit error? + break + } + a.separate(sb) + _, _ = sb.WriteString(field) + } +} + +func (a *anyTraceStateSyntax[Instance, Parser]) separate(sb *strings.Builder) { + if sb.Len() != 0 { + _ = sb.WriteByte(a.separator) + } +} + +var ( + w3cSyntax = anyTraceStateSyntax[w3CTraceState, w3CTraceStateParser]{ + separator: ',', + equality: '=', + valid: ";:._-+", + } + otelSyntax = anyTraceStateSyntax[otelTraceState, otelTraceStateParser]{ + separator: ';', + equality: ':', + valid: "._-+", + } +) + +func (syntax anyTraceStateSyntax[Instance, Parser]) parse(input string) (Instance, error) { + var parser Parser + var invalid Instance + var instance Instance + + if len(input) == 0 { + return invalid, nil + } + + if len(input) > traceStateSizeLimit { + return invalid, errTraceStateSyntax + } + + for len(input) > 0 { + eqPos := 0 + for ; eqPos < len(input); eqPos++ { + if eqPos == 0 { + if isLCAlpha(input[eqPos]) { + continue + } + } else if isLCAlphaNum(input[eqPos]) { + continue + } + break + } + if eqPos == 0 || eqPos == len(input) || input[eqPos] != syntax.equality { + return invalid, errTraceStateSyntax + } + + key := input[0:eqPos] + tail := input[eqPos+1:] + + sepPos := 0 + + for ; sepPos < len(tail); sepPos++ { + if syntax.isValueByte(tail[sepPos]) { + continue + } + break + } + + if err := parser.parseField(&instance, key, input[0:sepPos+eqPos+1]); err != nil { + return invalid, err + } + + if sepPos < len(tail) && tail[sepPos] != syntax.separator { + return invalid, errTraceStateSyntax + } + + if sepPos == len(tail) { + break + } + + input = tail[sepPos+1:] + + // test for a trailing ; + if input == "" { + return invalid, errTraceStateSyntax + } + } + return instance, nil +} + +func (syntax anyTraceStateSyntax[Instance, Parser]) isValueByte(r byte) bool { + if isLCAlphaNum(r) { + return true + } + if isUCAlpha(r) { + return true + } + return strings.ContainsRune(syntax.valid, rune(r)) +} + +func isLCAlphaNum(r byte) bool { + if isLCAlpha(r) { + return true + } + return r >= '0' && r <= '9' +} + +func isLCAlpha(r byte) bool { + return r >= 'a' && r <= 'z' +} + +func isUCAlpha(r byte) bool { + return r >= 'A' && r <= 'Z' +} + +func stripKey(key, input string) (string, error) { + if len(input) < len(key)+1 { + return "", errTraceStateSyntax + } + return input[len(key)+1:], nil +} diff --git a/pkg/sampling/oteltracestate.go b/pkg/sampling/oteltracestate.go index 17dc30214f7f..884d033963b9 100644 --- a/pkg/sampling/oteltracestate.go +++ b/pkg/sampling/oteltracestate.go @@ -16,176 +16,54 @@ package sampling // import "github.com/open-telemetry/opentelemetry-collector-co import ( "fmt" - "strconv" "strings" ) -const ( - traceStateKey = "ot" - tValueSubkey = "t" - traceStateSizeLimit = 256 -) - -var ( - errTraceStateSyntax = fmt.Errorf("otel tracestate: %w", strconv.ErrSyntax) -) - type otelTraceState struct { tvalueString string tvalueParsed Threshold - unknown []string + baseTraceState } -func newTraceState() otelTraceState { - return otelTraceState{ - tvalueString: "", // empty => !hasTValue(); includes "t:" prefix - } -} +type otelTraceStateParser struct{} -func (otts otelTraceState) serialize() string { - var sb strings.Builder - semi := func() { - if sb.Len() != 0 { - _, _ = sb.WriteString(";") +func (wp otelTraceStateParser) parseField(concrete *otelTraceState, key, input string) error { + switch { + case key == "t": + value, err := stripKey(key, input) + if err != nil { + return err } - } - if otts.hasTValue() { - _, _ = sb.WriteString(otts.tvalueString) - } - for _, unk := range otts.unknown { - ex := 0 - if sb.Len() != 0 { - ex = 1 + prob, _, err := TvalueToProbabilityAndAdjustedCount(value) + if err != nil { + return fmt.Errorf("otel tracestate t-value: %w", err) } - if sb.Len()+ex+len(unk) > traceStateSizeLimit { - // Note: should this generate an explicit error? - break + + th, err := ProbabilityToThreshold(prob) + if err != nil { + return fmt.Errorf("otel tracestate t-value: %w", err) } - semi() - _, _ = sb.WriteString(unk) - } - return sb.String() -} -func isValueByte(r byte) bool { - if isLCAlphaNum(r) { - return true - } - if isUCAlpha(r) { - return true - } - switch r { - case '.', '_', '-', '+': - return true - default: - return false - } -} + concrete.tvalueString = input + concrete.tvalueParsed = th -func isLCAlphaNum(r byte) bool { - if isLCAlpha(r) { - return true + return nil } - return r >= '0' && r <= '9' -} - -func isLCAlpha(r byte) bool { - return r >= 'a' && r <= 'z' -} -func isUCAlpha(r byte) bool { - return r >= 'A' && r <= 'Z' + return baseTraceStateParser{}.parseField(&concrete.baseTraceState, key, input) } -func parseOTelTraceState(ts string) (otelTraceState, error) { // nolint: revive - var tval string - var unknown []string - - if len(ts) == 0 { - return newTraceState(), nil - } - - if len(ts) > traceStateSizeLimit { - return newTraceState(), errTraceStateSyntax - } - - for len(ts) > 0 { - eqPos := 0 - for ; eqPos < len(ts); eqPos++ { - if eqPos == 0 { - if isLCAlpha(ts[eqPos]) { - continue - } - } else if isLCAlphaNum(ts[eqPos]) { - continue - } - break - } - if eqPos == 0 || eqPos == len(ts) || ts[eqPos] != ':' { - return newTraceState(), errTraceStateSyntax - } - - key := ts[0:eqPos] - tail := ts[eqPos+1:] - - sepPos := 0 - - for ; sepPos < len(tail); sepPos++ { - if isValueByte(tail[sepPos]) { - continue - } - break - } - - // Here, handle recognized fields. - if key == tValueSubkey { - tval = ts[0 : sepPos+eqPos+1] - } else { - unknown = append(unknown, ts[0:sepPos+eqPos+1]) - } - - if sepPos < len(tail) && tail[sepPos] != ';' { - return newTraceState(), errTraceStateSyntax - } - - if sepPos == len(tail) { - break - } - - ts = tail[sepPos+1:] +func (otts otelTraceState) serialize() string { + var sb strings.Builder - // test for a trailing ; - if ts == "" { - return newTraceState(), errTraceStateSyntax - } + if otts.hasTValue() { + _, _ = sb.WriteString(otts.tvalueString) } - otts := newTraceState() - otts.unknown = unknown - - if tval != "" { - if len(tval) == 1 { - return otts, fmt.Errorf("otel tracestate t-value: %w", strconv.ErrSyntax) - } - prob, _, err := TvalueToProbabilityAndAdjustedCount(tval[2:]) - if err != nil { - return otts, fmt.Errorf("otel tracestate t-value: %w", err) - } + otelSyntax.serialize(&otts.baseTraceState, &sb) - th, err := ProbabilityToThreshold(prob) - if err != nil { - return otts, fmt.Errorf("otel tracestate t-value: %w", err) - } - - otts.tvalueString = tval - otts.tvalueParsed = th - } - return otts, nil -} - -func parseError(key string, err error) error { - return fmt.Errorf("otel tracestate: %s-value %w", key, err) + return sb.String() } func (otts otelTraceState) hasTValue() bool { diff --git a/pkg/sampling/oteltracestate_test.go b/pkg/sampling/oteltracestate_test.go index 237b79516490..0d76cf442d89 100644 --- a/pkg/sampling/oteltracestate_test.go +++ b/pkg/sampling/oteltracestate_test.go @@ -32,176 +32,30 @@ func testName(in string) string { } func TestNewTraceState(t *testing.T) { - otts := newTraceState() + otts := otelTraceState{} require.False(t, otts.hasTValue()) require.Equal(t, "", otts.serialize()) } func TestTraceStatePRValueSerialize(t *testing.T) { - otts := newTraceState() + otts := otelTraceState{} otts.tvalueString = "t:3" - otts.unknown = []string{"a:b", "c:d"} + otts.fields = []string{"a:b", "c:d"} require.True(t, otts.hasTValue()) require.Equal(t, "t:3;a:b;c:d", otts.serialize()) } func TestTraceStateSerializeOverflow(t *testing.T) { long := "x:" + strings.Repeat(".", 254) - otts := newTraceState() - otts.unknown = []string{long} + otts := otelTraceState{} + otts.fields = []string{long} // this drops the extra key, sorry! require.Equal(t, long, otts.serialize()) otts.tvalueString = "t:1" require.Equal(t, "t:1", otts.serialize()) } -// func TestParseTraceStateForTraceID(t *testing.T) { -// type testCase struct { -// in string -// rval uint8 -// expectErr error -// } -// const notset = 255 -// for _, test := range []testCase{ -// // All are unsampled tests, i.e., `sampled` is not set in traceparent. -// {"r:2", 2, nil}, -// {"r:1;", notset, strconv.ErrSyntax}, -// {"r:1", 1, nil}, -// {"r:1=p:2", notset, strconv.ErrSyntax}, -// {"r:1;p:2=s:3", notset, strconv.ErrSyntax}, -// {":1;p:2=s:3", notset, strconv.ErrSyntax}, -// {":;p:2=s:3", notset, strconv.ErrSyntax}, -// {":;:", notset, strconv.ErrSyntax}, -// {":", notset, strconv.ErrSyntax}, -// {"", notset, nil}, -// {"r:;p=1", notset, strconv.ErrSyntax}, -// {"r:1", 1, nil}, -// {"r:10", 10, nil}, -// {"r:33", 33, nil}, -// {"r:61", 61, nil}, -// {"r:62", 62, nil}, // max r-value -// {"r:63", notset, strconv.ErrRange}, // out-of-range -// {"r:100", notset, strconv.ErrRange}, // out-of-range -// {"r:100001", notset, strconv.ErrRange}, // out-of-range -// {"p:64", notset, strconv.ErrRange}, -// {"p:100", notset, strconv.ErrRange}, -// {"r:1a", notset, strconv.ErrSyntax}, // not-hexadecimal -// {"p:-1", notset, strconv.ErrSyntax}, // non-negative -// } { -// t.Run(testName(test.in), func(t *testing.T) { -// // Note: passing isSampled=false as stated above. -// ts := pcommon.NewTraceState(test.in) -// otts, err := parseOTelTraceState(ts, false) - -// require.False(t, otts.hasTValue(), "should have no p-value") - -// if test.expectErr != nil { -// require.True(t, errors.Is(err, test.expectErr), "not expecting %v", err) -// } -// if test.rval != notset { -// require.True(t, otts.hasRValue()) -// require.Equal(t, test.rval, otts.rvalue) -// } else { -// require.False(t, otts.hasRValue(), "should have no r-value") -// } -// require.EqualValues(t, []string(nil), otts.unknown) - -// if test.expectErr == nil { -// // Require serialize to round-trip -// otts2, err := parseOTelTraceState(otts.serialize(), false) -// require.NoError(t, err) -// require.Equal(t, otts, otts2) -// } -// }) -// } -// } - -// func TestParseTraceStateSampled(t *testing.T) { -// type testCase struct { -// in string -// rval, pval uint8 -// expectErr error -// } -// const notset = 255 -// for _, test := range []testCase{ -// // All are sampled tests, i.e., `sampled` is set in traceparent. -// {"r:2;p:2", 2, 2, nil}, -// {"r:2;p:1", 2, 1, nil}, -// {"r:2;p:0", 2, 0, nil}, - -// {"r:1;p:1", 1, 1, nil}, -// {"r:1;p:0", 1, 0, nil}, - -// {"r:0;p:0", 0, 0, nil}, - -// {"r:62;p:0", 62, 0, nil}, -// {"r:62;p:62", 62, 62, nil}, - -// // The important special case: -// {"r:0;p:63", 0, 63, nil}, -// {"r:2;p:63", 2, 63, nil}, -// {"r:62;p:63", 62, 63, nil}, - -// // Inconsistent p causes unset p-value. -// {"r:2;p:3", 2, notset, errTraceStateInconsistent}, -// {"r:2;p:4", 2, notset, errTraceStateInconsistent}, -// {"r:2;p:62", 2, notset, errTraceStateInconsistent}, -// {"r:0;p:1", 0, notset, errTraceStateInconsistent}, -// {"r:1;p:2", 1, notset, errTraceStateInconsistent}, -// {"r:61;p:62", 61, notset, errTraceStateInconsistent}, - -// // Inconsistent r causes unset p-value and r-value. -// {"r:63;p:2", notset, notset, strconv.ErrRange}, -// {"r:120;p:2", notset, notset, strconv.ErrRange}, -// {"r:ab;p:2", notset, notset, strconv.ErrSyntax}, - -// // Syntax is tested before range errors -// {"r:ab;p:77", notset, notset, strconv.ErrSyntax}, - -// // p without r (when sampled) -// {"p:1", notset, 1, nil}, -// {"p:62", notset, 62, nil}, -// {"p:63", notset, 63, nil}, - -// // r without p (when sampled) -// {"r:2", 2, notset, nil}, -// {"r:62", 62, notset, nil}, -// {"r:0", 0, notset, nil}, -// } { -// t.Run(testName(test.in), func(t *testing.T) { -// // Note: passing isSampled=true as stated above. -// otts, err := parseOTelTraceState(test.in, true) - -// if test.expectErr != nil { -// require.True(t, errors.Is(err, test.expectErr), "not expecting %v", err) -// } else { -// require.NoError(t, err) -// } -// if test.pval != notset { -// require.True(t, otts.hasTValue()) -// require.Equal(t, test.pval, otts.pvalue) -// } else { -// require.False(t, otts.hasTValue(), "should have no p-value") -// } -// if test.rval != notset { -// require.True(t, otts.hasRValue()) -// require.Equal(t, test.rval, otts.rvalue) -// } else { -// require.False(t, otts.hasRValue(), "should have no r-value") -// } -// require.EqualValues(t, []string(nil), otts.unknown) - -// if test.expectErr == nil { -// // Require serialize to round-trip -// otts2, err := parseOTelTraceState(otts.serialize(), true) -// require.NoError(t, err) -// require.Equal(t, otts, otts2) -// } -// }) -// } -// } - -func TestParseTraceStateExtra(t *testing.T) { +func TestParseOTelTraceState(t *testing.T) { type testCase struct { in string tval string @@ -210,29 +64,31 @@ func TestParseTraceStateExtra(t *testing.T) { } const notset = "" for _, test := range []testCase{ - // + // correct cases + {"", notset, nil, nil}, {"t:2", "2", nil, nil}, - {"t:1;", notset, nil, strconv.ErrSyntax}, {"t:1", "1", nil, nil}, + {"t:1", "1", nil, nil}, + {"t:10", "10", nil, nil}, + {"t:33", "33", nil, nil}, + {"t:61", "61", nil, nil}, + {"t:72057594037927936", "72057594037927936", nil, nil}, // max t-value = 0x1p+56 + {"t:0x1p-56", "0x1p-56", nil, nil}, // min t-value + + // syntax errors + {"t:1;", notset, nil, strconv.ErrSyntax}, {"t:1=p:2", notset, nil, strconv.ErrSyntax}, {"t:1;p:2=s:3", notset, nil, strconv.ErrSyntax}, {":1;p:2=s:3", notset, nil, strconv.ErrSyntax}, {":;p:2=s:3", notset, nil, strconv.ErrSyntax}, {":;:", notset, nil, strconv.ErrSyntax}, {":", notset, nil, strconv.ErrSyntax}, - {"", notset, nil, nil}, {"t:;p=1", notset, nil, strconv.ErrSyntax}, - {"t:1", "1", nil, nil}, - {"t:10", "10", nil, nil}, - {"t:33", "33", nil, nil}, - {"t:61", "61", nil, nil}, - {"t:72057594037927936", "72057594037927936", nil, nil}, // max t-value = 0x1p+56 - {"t:0x1p-56", "0x1p-56", nil, nil}, // min t-value + {"t:$", notset, nil, strconv.ErrSyntax}, // not-hexadecimal - // various errors + // range errors {"t:0x1p+57", notset, nil, ErrAdjustedCountOnlyInteger}, // integer syntax {"t:72057594037927937", notset, nil, ErrAdjustedCountRange}, // out-of-range - {"t:$", notset, nil, strconv.ErrSyntax}, // not-hexadecimal {"t:-1", notset, nil, ErrProbabilityRange}, // non-negative // one field @@ -271,9 +127,7 @@ func TestParseTraceStateExtra(t *testing.T) { {"x:" + strings.Repeat("_", 254), notset, []string{"x:" + strings.Repeat("_", 254)}, nil}, } { t.Run(testName(test.in), func(t *testing.T) { - // Note: These tests are independent of sampling state, - // so both are tested. - otts, err := parseOTelTraceState(test.in) + otts, err := otelSyntax.parse(test.in) if test.expectErr != nil { require.True(t, errors.Is(err, test.expectErr), "%q: not expecting %v wanted %v", test.in, err, test.expectErr) @@ -284,10 +138,9 @@ func TestParseTraceStateExtra(t *testing.T) { require.True(t, otts.hasTValue()) require.Equal(t, "t:"+test.tval, otts.tvalueString) } else { - - require.False(t, otts.hasTValue(), "should have no t-value") + require.False(t, otts.hasTValue(), "should have no t-value: %s", otts.tvalueString) } - require.EqualValues(t, test.extra, otts.unknown) + require.EqualValues(t, test.extra, otts.fields) // on success w/o t-value, serialize() should not modify if !otts.hasTValue() && test.expectErr == nil { diff --git a/pkg/sampling/w3ctracestate.go b/pkg/sampling/w3ctracestate.go new file mode 100644 index 000000000000..acab8fe7b977 --- /dev/null +++ b/pkg/sampling/w3ctracestate.go @@ -0,0 +1,66 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package sampling // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling" + +import ( + "fmt" + "strings" +) + +type w3CTraceState struct { + otelString string + otelParsed otelTraceState + baseTraceState +} + +type w3CTraceStateParser struct{} + +func (wp w3CTraceStateParser) parseField(concrete *w3CTraceState, key, input string) error { + switch { + case key == "ot": + value, err := stripKey(key, input) + if err != nil { + return err + } + + otts, err := otelSyntax.parse(value) + + if err != nil { + return fmt.Errorf("w3c tracestate otel value: %w", err) + } + + concrete.otelString = input + concrete.otelParsed = otts + return nil + } + + return baseTraceStateParser{}.parseField(&concrete.baseTraceState, key, input) +} + +func (wts w3CTraceState) serialize() string { + var sb strings.Builder + + if wts.hasOTelValue() { + _, _ = sb.WriteString(wts.otelString) + } + + w3cSyntax.serialize(&wts.baseTraceState, &sb) + + return sb.String() +} + +func (wts w3CTraceState) hasOTelValue() bool { + return wts.otelString != "" +} diff --git a/pkg/sampling/w3ctracestate_test.go b/pkg/sampling/w3ctracestate_test.go new file mode 100644 index 000000000000..16f7d21b20f3 --- /dev/null +++ b/pkg/sampling/w3ctracestate_test.go @@ -0,0 +1,58 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package sampling + +import ( + "errors" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestParseW3CTraceState(t *testing.T) { + type testCase struct { + in string + otval string + expectErr error + } + const notset = "" + for _, test := range []testCase{ + // correct cases + {"ot=t:1", "t:1", nil}, + {"ot=t:100", "t:100", nil}, + } { + t.Run(testName(test.in), func(t *testing.T) { + otts, err := w3cSyntax.parse(test.in) + + if test.expectErr != nil { + require.True(t, errors.Is(err, test.expectErr), "%q: not expecting %v wanted %v", test.in, err, test.expectErr) + } else { + require.NoError(t, err) + } + if test.otval != notset { + require.True(t, otts.hasOTelValue()) + require.Equal(t, "ot="+test.otval, otts.otelString) + } else { + + require.False(t, otts.hasOTelValue(), "should have no otel value") + } + + // on success w/o t-value, serialize() should not modify + if !otts.hasOTelValue() && test.expectErr == nil { + require.Equal(t, test.in, otts.serialize()) + } + }) + } +} diff --git a/processor/probabilisticsamplerprocessor/config.go b/processor/probabilisticsamplerprocessor/config.go index f68d4f5b17d6..bb3679036154 100644 --- a/processor/probabilisticsamplerprocessor/config.go +++ b/processor/probabilisticsamplerprocessor/config.go @@ -43,6 +43,9 @@ type Config struct { // treated as having four significant figures when conveying the sampling probability. SamplingPercentage float32 `mapstructure:"sampling_percentage"` + // @@@ TODO + // SamplingOneInN int64 + // HashSeed allows one to configure the legacy hashing seed. The current version of this protocol assumes // that tracecontext v2 TraceIDs are being used, which ensures 7 bytes of randomness are available. We assume // this is the case when HashSeed == 0. From 7fa81300185b1bd50145ea4a7d13194d7157725b Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 17 May 2023 13:18:09 -0700 Subject: [PATCH 07/38] integrated --- pkg/sampling/anytracestate.go | 24 +-- pkg/sampling/oteltracestate.go | 34 +++- pkg/sampling/tvalue.go | 64 +++++-- pkg/sampling/tvalue_test.go | 1 + pkg/sampling/w3ctracestate.go | 28 +-- pkg/sampling/w3ctracestate_test.go | 12 +- .../factory_test.go | 6 +- .../tracesprocessor.go | 173 +++++++++++++----- .../tracesprocessor_test.go | 2 +- 9 files changed, 239 insertions(+), 105 deletions(-) diff --git a/pkg/sampling/anytracestate.go b/pkg/sampling/anytracestate.go index 777d2370f18d..cf0ad246bec1 100644 --- a/pkg/sampling/anytracestate.go +++ b/pkg/sampling/anytracestate.go @@ -45,9 +45,9 @@ func (bp baseTraceStateParser) parseField(instance *baseTraceState, _, input str } type anyTraceStateSyntax[Instance any, Parser anyTraceStateParser[Instance]] struct { - separator byte - equality byte - valid string + separator byte + equality byte + allowPunct string } func (a *anyTraceStateSyntax[Instance, Parser]) serialize(base *baseTraceState, sb *strings.Builder) { @@ -72,15 +72,15 @@ func (a *anyTraceStateSyntax[Instance, Parser]) separate(sb *strings.Builder) { } var ( - w3cSyntax = anyTraceStateSyntax[w3CTraceState, w3CTraceStateParser]{ - separator: ',', - equality: '=', - valid: ";:._-+", + w3cSyntax = anyTraceStateSyntax[W3CTraceState, w3CTraceStateParser]{ + separator: ',', + equality: '=', + allowPunct: ";:._-+", } - otelSyntax = anyTraceStateSyntax[otelTraceState, otelTraceStateParser]{ - separator: ';', - equality: ':', - valid: "._-+", + otelSyntax = anyTraceStateSyntax[OTelTraceState, otelTraceStateParser]{ + separator: ';', + equality: ':', + allowPunct: "._-+", } ) @@ -154,7 +154,7 @@ func (syntax anyTraceStateSyntax[Instance, Parser]) isValueByte(r byte) bool { if isUCAlpha(r) { return true } - return strings.ContainsRune(syntax.valid, rune(r)) + return strings.ContainsRune(syntax.allowPunct, rune(r)) } func isLCAlphaNum(r byte) bool { diff --git a/pkg/sampling/oteltracestate.go b/pkg/sampling/oteltracestate.go index 884d033963b9..fcd56cf5efaa 100644 --- a/pkg/sampling/oteltracestate.go +++ b/pkg/sampling/oteltracestate.go @@ -19,7 +19,7 @@ import ( "strings" ) -type otelTraceState struct { +type OTelTraceState struct { tvalueString string tvalueParsed Threshold baseTraceState @@ -27,7 +27,7 @@ type otelTraceState struct { type otelTraceStateParser struct{} -func (wp otelTraceStateParser) parseField(concrete *otelTraceState, key, input string) error { +func (wp otelTraceStateParser) parseField(instance *OTelTraceState, key, input string) error { switch { case key == "t": value, err := stripKey(key, input) @@ -45,19 +45,19 @@ func (wp otelTraceStateParser) parseField(concrete *otelTraceState, key, input s return fmt.Errorf("otel tracestate t-value: %w", err) } - concrete.tvalueString = input - concrete.tvalueParsed = th + instance.tvalueString = input + instance.tvalueParsed = th return nil } - return baseTraceStateParser{}.parseField(&concrete.baseTraceState, key, input) + return baseTraceStateParser{}.parseField(&instance.baseTraceState, key, input) } -func (otts otelTraceState) serialize() string { +func (otts *OTelTraceState) serialize() string { var sb strings.Builder - if otts.hasTValue() { + if otts.TValue() != "" { _, _ = sb.WriteString(otts.tvalueString) } @@ -66,6 +66,24 @@ func (otts otelTraceState) serialize() string { return sb.String() } -func (otts otelTraceState) hasTValue() bool { +func (otts *OTelTraceState) HasTValue() bool { return otts.tvalueString != "" } + +func (otts *OTelTraceState) UnsetTValue() { + otts.tvalueString = "" + otts.tvalueParsed = Threshold{} +} + +func (otts *OTelTraceState) TValue() string { + return otts.tvalueString +} + +func (otts *OTelTraceState) TValueThreshold() Threshold { + return otts.tvalueParsed +} + +func (otts *OTelTraceState) SetTValue(encoded string, threshold Threshold) { + otts.tvalueString = encoded + otts.tvalueParsed = threshold +} diff --git a/pkg/sampling/tvalue.go b/pkg/sampling/tvalue.go index 5c2cca04403f..ae27daa81964 100644 --- a/pkg/sampling/tvalue.go +++ b/pkg/sampling/tvalue.go @@ -18,28 +18,51 @@ import ( "encoding/binary" "fmt" "strconv" + "strings" "go.opentelemetry.io/collector/pdata/pcommon" ) +const ( + // MinSamplingProb is one in 2^56. + MinSamplingProb = 0x1p-56 + + // MaxAdjustedCount is the adjusted count corresponding with + // MinSamplingProb (i.e., 1 / MinSamplingProb). 0x1p+56 + MaxAdjustedCount = 1 / MinSamplingProb + + // LeastHalfTraceIDThresholdMask is the mask to use on the + // least-significant half of the TraceID, i.e., bytes 8-15. + // Because this is a 56 bit mask, the result after masking is + // the unsigned value of bytes 9 through 15. + LeastHalfTraceIDThresholdMask = MaxAdjustedCount - 1 + + // TValueZeroEncoding is the encoding for 0 adjusted count. + TValueZeroEncoding = "t:0" + TValueOneEncoding = "t:1" +) + // Threshold is an opaque type used to compare with the least-significant 7 bytes of the TraceID. type Threshold struct { - // limit in range [1, 0x1p+56] + // limit is in the range [0, 0x1p+56]. + // - 0 represents zero probability (no TraceID values are less-than) + // - 1 represents MinSamplingProb (1 TraceID value is less-than) + // - MaxAdjustedCount represents 100% sampling (all TraceID values are less-than). limit uint64 } -const ( - MinSamplingProb = 0x1p-56 - MaxAdjustedCount = 0x1p+56 // i.e., 1 / MinSamplingProb +var ( + // ErrProbabilityRange is returned when a value should be in the range [MinSamplingProb, 1]. + ErrProbabilityRange = fmt.Errorf("sampling probability out of range (0x1p-56 <= valid <= 1)") - LeastHalfTraceIDThresholdMask = MaxAdjustedCount - 1 -) + // ErrAdjustedCountRange is returned when a value should be in the range [1, MaxAdjustedCount]. + ErrAdjustedCountRange = fmt.Errorf("sampling adjusted count out of range (1 <= valid <= 0x1p+56)") -var ( - ErrPrecisionRange = fmt.Errorf("sampling precision out of range (-1 <= valid <= 14)") - ErrProbabilityRange = fmt.Errorf("sampling probability out of range (0x1p-56 <= valid <= 1)") - ErrAdjustedCountRange = fmt.Errorf("sampling adjusted count out of range (1 <= valid <= 0x1p+56)") + // ErrAdjustedCountOnlyInteger is returned when a floating-point syntax is used to convey adjusted count. ErrAdjustedCountOnlyInteger = fmt.Errorf("sampling adjusted count must be an integer") + + // ErrPrecisionRange is returned when the precision argument is out of range. + ErrPrecisionRange = fmt.Errorf("sampling precision out of range (-1 <= valid <= 14)") ) func probabilityInRange(prob float64) bool { @@ -63,9 +86,9 @@ func ProbabilityToTvalue(prob float64, format byte, prec int) (string, error) { // Probability cases switch { case prob == 1: - return "1", nil + return TValueOneEncoding, nil case prob == 0: - return "0", nil + return TValueZeroEncoding, nil case !probabilityInRange(prob): return "", ErrProbabilityRange } @@ -81,11 +104,14 @@ func ProbabilityToTvalue(prob float64, format byte, prec int) (string, error) { return "", ErrPrecisionRange } - return strconv.FormatFloat(prob, format, prec, 64), nil + return "t:" + strconv.FormatFloat(prob, format, prec, 64), nil } func TvalueToProbabilityAndAdjustedCount(s string) (float64, float64, error) { - number, err := strconv.ParseFloat(s, 64) // e.g., "0x1.b7p-02" -> approx 3/7 + if !strings.HasPrefix(s, "t:") { + return 0, 0, strconv.ErrSyntax + } + number, err := strconv.ParseFloat(s[2:], 64) // e.g., "0x1.b7p-02" -> approx 3/7 if err != nil { return 0, 0, err } @@ -99,7 +125,7 @@ func TvalueToProbabilityAndAdjustedCount(s string) (float64, float64, error) { case number > 1: // Greater than 1 indicates adjusted count; re-parse // as a decimal integer. - integer, err := strconv.ParseInt(s, 10, 64) + integer, err := strconv.ParseInt(s[2:], 10, 64) if err != nil { return 0, 0, ErrAdjustedCountOnlyInteger } @@ -116,7 +142,9 @@ func TvalueToProbabilityAndAdjustedCount(s string) (float64, float64, error) { } func ProbabilityToThreshold(prob float64) (Threshold, error) { - if !probabilityInRange(prob) { + // Note: prob == 0 is an allowed special case. Because we + // use less-than, all spans are unsampled with Threshold{0}. + if prob != 0 && !probabilityInRange(prob) { return Threshold{}, ErrProbabilityRange } return Threshold{ @@ -132,3 +160,7 @@ func (t Threshold) ShouldSample(id pcommon.TraceID) bool { func (t Threshold) Probability() float64 { return float64(t.limit) / MaxAdjustedCount } + +func (t Threshold) Unsigned() uint64 { + return t.limit +} diff --git a/pkg/sampling/tvalue_test.go b/pkg/sampling/tvalue_test.go index cfdddaf9f24d..c1b0e5e9d4c6 100644 --- a/pkg/sampling/tvalue_test.go +++ b/pkg/sampling/tvalue_test.go @@ -61,6 +61,7 @@ func TestValidProbabilityToTvalue(t *testing.T) { require.Equal(t, "0x1p-56", must(ProbabilityToTvalue(0x1p-56, 'x', -1))) require.Equal(t, "0x1.555p-02", must(ProbabilityToTvalue(1/3., 'x', 3))) require.Equal(t, "0", must(ProbabilityToTvalue(0, 'x', 3))) + require.Equal(t, "0", must(ProbabilityToTvalue(0, 'f', 4))) } func TestInvalidProbabilityToTvalue(t *testing.T) { diff --git a/pkg/sampling/w3ctracestate.go b/pkg/sampling/w3ctracestate.go index acab8fe7b977..ff7eb9cbbde8 100644 --- a/pkg/sampling/w3ctracestate.go +++ b/pkg/sampling/w3ctracestate.go @@ -19,15 +19,18 @@ import ( "strings" ) -type w3CTraceState struct { - otelString string - otelParsed otelTraceState +type W3CTraceState struct { + otelParsed OTelTraceState baseTraceState } type w3CTraceStateParser struct{} -func (wp w3CTraceStateParser) parseField(concrete *w3CTraceState, key, input string) error { +func NewW3CTraceState(input string) (W3CTraceState, error) { + return w3cSyntax.parse(input) +} + +func (wp w3CTraceStateParser) parseField(instance *W3CTraceState, key, input string) error { switch { case key == "ot": value, err := stripKey(key, input) @@ -41,19 +44,20 @@ func (wp w3CTraceStateParser) parseField(concrete *w3CTraceState, key, input str return fmt.Errorf("w3c tracestate otel value: %w", err) } - concrete.otelString = input - concrete.otelParsed = otts + instance.otelParsed = otts return nil } - return baseTraceStateParser{}.parseField(&concrete.baseTraceState, key, input) + return baseTraceStateParser{}.parseField(&instance.baseTraceState, key, input) } -func (wts w3CTraceState) serialize() string { +func (wts *W3CTraceState) Serialize() string { var sb strings.Builder - if wts.hasOTelValue() { - _, _ = sb.WriteString(wts.otelString) + ots := wts.otelParsed.serialize() + if ots != "" { + _, _ = sb.WriteString("ot=") + _, _ = sb.WriteString(ots) } w3cSyntax.serialize(&wts.baseTraceState, &sb) @@ -61,6 +65,6 @@ func (wts w3CTraceState) serialize() string { return sb.String() } -func (wts w3CTraceState) hasOTelValue() bool { - return wts.otelString != "" +func (wts *W3CTraceState) OTelValue() *OTelTraceState { + return &wts.otelParsed } diff --git a/pkg/sampling/w3ctracestate_test.go b/pkg/sampling/w3ctracestate_test.go index 16f7d21b20f3..ece0281e953e 100644 --- a/pkg/sampling/w3ctracestate_test.go +++ b/pkg/sampling/w3ctracestate_test.go @@ -34,7 +34,7 @@ func TestParseW3CTraceState(t *testing.T) { {"ot=t:100", "t:100", nil}, } { t.Run(testName(test.in), func(t *testing.T) { - otts, err := w3cSyntax.parse(test.in) + wts, err := w3cSyntax.parse(test.in) if test.expectErr != nil { require.True(t, errors.Is(err, test.expectErr), "%q: not expecting %v wanted %v", test.in, err, test.expectErr) @@ -42,16 +42,16 @@ func TestParseW3CTraceState(t *testing.T) { require.NoError(t, err) } if test.otval != notset { - require.True(t, otts.hasOTelValue()) - require.Equal(t, "ot="+test.otval, otts.otelString) + require.True(t, wts.hasOTelValue()) + require.Equal(t, "ot="+test.otval, wts.otelString) } else { - require.False(t, otts.hasOTelValue(), "should have no otel value") + require.False(t, wts.hasOTelValue(), "should have no otel value") } // on success w/o t-value, serialize() should not modify - if !otts.hasOTelValue() && test.expectErr == nil { - require.Equal(t, test.in, otts.serialize()) + if !wts.hasOTelValue() && test.expectErr == nil { + require.Equal(t, test.in, wts.serialize()) } }) } diff --git a/processor/probabilisticsamplerprocessor/factory_test.go b/processor/probabilisticsamplerprocessor/factory_test.go index 35c59af0a597..d3dfcd9acc03 100644 --- a/processor/probabilisticsamplerprocessor/factory_test.go +++ b/processor/probabilisticsamplerprocessor/factory_test.go @@ -26,22 +26,22 @@ import ( func TestCreateDefaultConfig(t *testing.T) { cfg := createDefaultConfig() - assert.NotNil(t, cfg, "failed to create default config") assert.NoError(t, componenttest.CheckConfigStruct(cfg)) + assert.NotNil(t, cfg, "failed to create default config") } func TestCreateProcessor(t *testing.T) { cfg := createDefaultConfig() set := processortest.NewNopCreateSettings() tp, err := createTracesProcessor(context.Background(), set, cfg, consumertest.NewNop()) - assert.NotNil(t, tp) assert.NoError(t, err, "cannot create trace processor") + assert.NotNil(t, tp) } func TestCreateProcessorLogs(t *testing.T) { cfg := createDefaultConfig() set := processortest.NewNopCreateSettings() tp, err := createLogsProcessor(context.Background(), set, cfg, consumertest.NewNop()) - assert.NotNil(t, tp) assert.NoError(t, err, "cannot create logs processor") + assert.NotNil(t, tp) } diff --git a/processor/probabilisticsamplerprocessor/tracesprocessor.go b/processor/probabilisticsamplerprocessor/tracesprocessor.go index 2e2a8590dcea..514c966b4ac4 100644 --- a/processor/probabilisticsamplerprocessor/tracesprocessor.go +++ b/processor/probabilisticsamplerprocessor/tracesprocessor.go @@ -16,6 +16,7 @@ package probabilisticsamplerprocessor // import "github.com/open-telemetry/opent import ( "context" + "fmt" "strconv" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling" @@ -30,6 +31,8 @@ import ( "go.uber.org/zap" ) +var ErrInconsistentTValue = fmt.Errorf("inconsistent OTel TraceState t-value set") + // samplingPriority has the semantic result of parsing the "sampling.priority" // attribute per OpenTracing semantic conventions. type samplingPriority int @@ -48,32 +51,47 @@ const ( // by the collector. doNotSampleSpan - // The constants help translate user friendly percentages to numbers direct used in sampling. + // Hashing method: The constants below help translate user friendly percentages + // to numbers direct used in sampling. numHashBuckets = 0x4000 // Using a power of 2 to avoid division. bitMaskHashBuckets = numHashBuckets - 1 percentageScaleFactor = numHashBuckets / 100.0 - - zeroTvalue = "t:0" ) -type traceSamplerProcessor struct { - // Legacy hash-based calculation +type traceSampler interface { + // shouldSample reports the result based on a probabilistic decision. + shouldSample(trace pcommon.TraceID) bool + + // updateSampled modifies the span assuming it will be + // sampled, probabilistically or otherwise. The "should" parameter + // is the result from shouldSample(), for the span's TraceID, which + // will not be recalculated. Returns an error when the incoming TraceState + // cannot be parsed. + updateSampled(span ptrace.Span, should bool) error +} + +type traceProcessor struct { + sampler traceSampler + logger *zap.Logger +} + +type traceHashSampler struct { + // Hash-based calculation hashScaledSamplingRate uint32 hashSeed uint32 +} - // Modern TraceID-randomness-based calculation +type traceIDSampler struct { + // TraceID-randomness-based calculation traceIDThreshold sampling.Threshold - tValueEncoding string - logger *zap.Logger + // tValueEncoding includes the leading "t:" + tValueEncoding string } // newTracesProcessor returns a processor.TracesProcessor that will perform head sampling according to the given // configuration. func newTracesProcessor(ctx context.Context, set processor.CreateSettings, cfg *Config, nextConsumer consumer.Traces) (processor.Traces, error) { - tsp := &traceSamplerProcessor{ - logger: set.Logger, - } // README allows percents >100 to equal 100%, but t-value // encoding does not. Correct it here. pct := float64(cfg.SamplingPercentage) @@ -81,10 +99,18 @@ func newTracesProcessor(ctx context.Context, set processor.CreateSettings, cfg * pct = 100 } + tp := &traceProcessor{ + logger: set.Logger, + } + if cfg.HashSeed != 0 { + ts := &traceHashSampler{} + // Adjust sampling percentage on private so recalculations are avoided. - tsp.hashScaledSamplingRate = uint32(pct * percentageScaleFactor) - tsp.hashSeed = cfg.HashSeed + ts.hashScaledSamplingRate = uint32(pct * percentageScaleFactor) + ts.hashSeed = cfg.HashSeed + + tp.sampler = ts } else { // Encode t-value (OTEP 226), like %.4f. (See FormatFloat().) ratio := pct / 100 @@ -92,13 +118,22 @@ func newTracesProcessor(ctx context.Context, set processor.CreateSettings, cfg * if err != nil { return nil, err } + // Parse the exact value of probability encoded at this precision. + ratio, _, err = sampling.TvalueToProbabilityAndAdjustedCount(tval) + if err != nil { + return nil, err + } + // Compute the sampling threshold from the exact probability. threshold, err := sampling.ProbabilityToThreshold(ratio) if err != nil { return nil, err } - tsp.tValueEncoding = tval - tsp.traceIDThreshold = threshold + ts := &traceIDSampler{} + ts.tValueEncoding = tval + ts.traceIDThreshold = threshold + + tp.sampler = ts } return processorhelper.NewTracesProcessor( @@ -106,26 +141,85 @@ func newTracesProcessor(ctx context.Context, set processor.CreateSettings, cfg * set, cfg, nextConsumer, - tsp.processTraces, + tp.processTraces, processorhelper.WithCapabilities(consumer.Capabilities{MutatesData: true})) } -func (tsp *traceSamplerProcessor) probabilitySampleFromTraceID(input pcommon.TraceID) (sample, consistent bool) { - // When the hash seed is set, fall back to the legacy behavior - // using the FNV hash. - if tsp.hashSeed != 0 { - // If one assumes random trace ids hashing may seems avoidable, however, traces can be coming from sources - // with various different criteria to generate trace id and perhaps were already sampled without hashing. - // Hashing here prevents bias due to such systems. - return computeHash(input[:], tsp.hashSeed)&bitMaskHashBuckets < tsp.hashScaledSamplingRate, false +func (ts *traceHashSampler) shouldSample(input pcommon.TraceID) bool { + // If one assumes random trace ids hashing may seems avoidable, however, traces can be coming from sources + // with various different criteria to generate trace id and perhaps were already sampled without hashing. + // Hashing here prevents bias due to such systems. + return computeHash(input[:], ts.hashSeed)&bitMaskHashBuckets < ts.hashScaledSamplingRate +} + +func (ts *traceHashSampler) updateSampled(ptrace.Span, bool) error { + // Nothing specified + return nil +} + +func (ts *traceIDSampler) shouldSample(input pcommon.TraceID) bool { + return ts.traceIDThreshold.ShouldSample(input) +} + +func (ts *traceIDSampler) updateSampled(span ptrace.Span, should bool) error { + state := span.TraceState() + raw := state.AsRaw() + + // Fast path for the case where there is no arriving TraceState. + if raw == "" { + if should { + state.FromRaw(ts.tValueEncoding) + } else { + state.FromRaw(sampling.TValueZeroEncoding) + } + return nil } - // Hash seed zero => assume tracecontext v2 + // Parse the arriving TraceState. + wts, err := sampling.NewW3CTraceState(raw) + if err != nil { + return err + } - return tsp.traceIDThreshold.ShouldSample(input), true + // Using the OTel trace state value: + otts := wts.OTelValue() + + // When this sampler decided not to sample, the t-value becomes zero. + // Incoming TValue consistency is not checked when this happens. + if !should { + otts.SetTValue("0", sampling.Threshold{}) + state.FromRaw(wts.Serialize()) + return nil + } + + arrivingHasNonZeroTValue := otts.HasTValue() && otts.TValueThreshold().Unsigned() != 0 + + if arrivingHasNonZeroTValue { + // Consistency check: if the TraceID is out of range + // (unless the TValue is zero), the TValue is a lie. + // If inconsistent, clear it. + if !otts.TValueThreshold().ShouldSample(span.TraceID()) { + // This value is returned below; the span continues + // with any t-value. + err = ErrInconsistentTValue + arrivingHasNonZeroTValue = false + otts.UnsetTValue() + } + } + + if arrivingHasNonZeroTValue && otts.TValueThreshold().Unsigned() < ts.traceIDThreshold.Unsigned() { + // Already-sampled case: test whether the unsigned value of the + // threshold is smaller than this sampler is configured with. + return err + } + + // Set the new effective t-value. + otts.SetTValue(ts.tValueEncoding, ts.traceIDThreshold) + state.FromRaw(wts.Serialize()) + return err } -func (tsp *traceSamplerProcessor) processTraces(ctx context.Context, td ptrace.Traces) (ptrace.Traces, error) { +func (tp *traceProcessor) processTraces(ctx context.Context, td ptrace.Traces) (ptrace.Traces, error) { td.ResourceSpans().RemoveIf(func(rs ptrace.ResourceSpans) bool { rs.ScopeSpans().RemoveIf(func(ils ptrace.ScopeSpans) bool { ils.Spans().RemoveIf(func(s ptrace.Span) bool { @@ -144,7 +238,7 @@ func (tsp *traceSamplerProcessor) processTraces(ctx context.Context, td ptrace.T forceSample := sp == mustSampleSpan - probSample, consistent := tsp.probabilitySampleFromTraceID(s.TraceID()) + probSample := tp.sampler.shouldSample(s.TraceID()) sampled := forceSample || probSample @@ -162,25 +256,10 @@ func (tsp *traceSamplerProcessor) processTraces(ctx context.Context, td ptrace.T ) } - if consistent { - // Attach the t-value! - ts := s.TraceState() - - // Get the t-value encoding. - enc := tsp.tValueEncoding - if !probSample { - // forceSample is implied, use the zero value. - enc = zeroTvalue - } - - raw := ts.AsRaw() - if raw == "" { - // No incoming t-value, i.e., the simple case. - ts.FromRaw(enc) - } else { - // Complex case: combine t-values. - // TODO @@@ bring in code from - // https://github.com/open-telemetry/opentelemetry-go-contrib/tree/main/samplers/probability/consistent + if sampled { + err := tp.sampler.updateSampled(s, probSample) + if err != nil { + tp.logger.Info("sampling t-value update failed", zap.Error(err)) } } diff --git a/processor/probabilisticsamplerprocessor/tracesprocessor_test.go b/processor/probabilisticsamplerprocessor/tracesprocessor_test.go index afabc5fc45cf..56fe0780dc7c 100644 --- a/processor/probabilisticsamplerprocessor/tracesprocessor_test.go +++ b/processor/probabilisticsamplerprocessor/tracesprocessor_test.go @@ -99,7 +99,7 @@ func Test_tracesamplerprocessor_SamplingPercentageRange(t *testing.T) { cfg: &Config{ SamplingPercentage: 5, }, - numBatches: 1e5, + numBatches: 1e6, numTracesPerBatch: 2, acceptableDelta: 0.01, }, From 36230e7d59a6153f55cd2f7efc36a65ca8c45db0 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 17 May 2023 15:02:00 -0700 Subject: [PATCH 08/38] Comments --- pkg/sampling/anytracestate.go | 82 ++++++++++++++-------------------- pkg/sampling/doc.go | 16 ------- pkg/sampling/oteltracestate.go | 21 ++++++++- pkg/sampling/tvalue.go | 48 ++++++++++++++------ pkg/sampling/w3ctracestate.go | 35 ++++++++++++++- 5 files changed, 123 insertions(+), 79 deletions(-) delete mode 100644 pkg/sampling/doc.go diff --git a/pkg/sampling/anytracestate.go b/pkg/sampling/anytracestate.go index cf0ad246bec1..458868892832 100644 --- a/pkg/sampling/anytracestate.go +++ b/pkg/sampling/anytracestate.go @@ -20,70 +20,38 @@ import ( "strings" ) -const ( - traceStateSizeLimit = 256 -) - -var ( - errTraceStateSyntax = fmt.Errorf("otel tracestate: %w", strconv.ErrSyntax) -) +// errTraceStateSyntax is returned for a variety of syntax errors. +var errTraceStateSyntax = fmt.Errorf("otel tracestate: %w", strconv.ErrSyntax) +// anyTraceStateParser describes how to instance types recognize +// specific fields. type anyTraceStateParser[Instance any] interface { parseField(instance *Instance, key, input string) error } -type baseTraceState struct { - fields []string -} - -type baseTraceStateParser struct { -} - -func (bp baseTraceStateParser) parseField(instance *baseTraceState, _, input string) error { - instance.fields = append(instance.fields, input) - return nil -} - +// anyTraceStateSyntax describes a variable key/value syntax. type anyTraceStateSyntax[Instance any, Parser anyTraceStateParser[Instance]] struct { separator byte equality byte allowPunct string } -func (a *anyTraceStateSyntax[Instance, Parser]) serialize(base *baseTraceState, sb *strings.Builder) { +// serializeBase adds the base fields to the output. +func (syntax anyTraceStateSyntax[Instance, Parser]) serializeBase(base *baseTraceState, sb *strings.Builder) { for _, field := range base.fields { - ex := 0 - if sb.Len() != 0 { - ex = 1 - } - if sb.Len()+ex+len(field) > traceStateSizeLimit { - // Note: should this generate an explicit error? - break - } - a.separate(sb) + syntax.separate(sb) _, _ = sb.WriteString(field) } } -func (a *anyTraceStateSyntax[Instance, Parser]) separate(sb *strings.Builder) { +// separate adds a separator to the output. +func (syntax anyTraceStateSyntax[Instance, Parser]) separate(sb *strings.Builder) { if sb.Len() != 0 { - _ = sb.WriteByte(a.separator) + _ = sb.WriteByte(syntax.separator) } } -var ( - w3cSyntax = anyTraceStateSyntax[W3CTraceState, w3CTraceStateParser]{ - separator: ',', - equality: '=', - allowPunct: ";:._-+", - } - otelSyntax = anyTraceStateSyntax[OTelTraceState, otelTraceStateParser]{ - separator: ';', - equality: ':', - allowPunct: "._-+", - } -) - +// parse uses variable syntax to parse the input string into key/value fields. func (syntax anyTraceStateSyntax[Instance, Parser]) parse(input string) (Instance, error) { var parser Parser var invalid Instance @@ -93,10 +61,6 @@ func (syntax anyTraceStateSyntax[Instance, Parser]) parse(input string) (Instanc return invalid, nil } - if len(input) > traceStateSizeLimit { - return invalid, errTraceStateSyntax - } - for len(input) > 0 { eqPos := 0 for ; eqPos < len(input); eqPos++ { @@ -147,6 +111,9 @@ func (syntax anyTraceStateSyntax[Instance, Parser]) parse(input string) (Instanc return instance, nil } +// isValueByte determines whether the byte is valid as part of a +// tracestate value. This is based on the syntax, since the W3C syntax +// allows the OTel separator and equality symbol to appear in values. func (syntax anyTraceStateSyntax[Instance, Parser]) isValueByte(r byte) bool { if isLCAlphaNum(r) { return true @@ -157,6 +124,22 @@ func (syntax anyTraceStateSyntax[Instance, Parser]) isValueByte(r byte) bool { return strings.ContainsRune(syntax.allowPunct, rune(r)) } +// baseTraceState encodes not-specified fields as a list. They will +// be re-encoded when serialized. +type baseTraceState struct { + fields []string +} + +// baseTraceStateParser parses not-specified fields into a list. +type baseTraceStateParser struct{} + +// parseField adds to the list of not-specified fields. +func (bp baseTraceStateParser) parseField(instance *baseTraceState, _, input string) error { + instance.fields = append(instance.fields, input) + return nil +} + +// isLCAlphaNum returns true for a-z, 0-9 func isLCAlphaNum(r byte) bool { if isLCAlpha(r) { return true @@ -164,14 +147,17 @@ func isLCAlphaNum(r byte) bool { return r >= '0' && r <= '9' } +// isLCAlphaNum returns true for a-z func isLCAlpha(r byte) bool { return r >= 'a' && r <= 'z' } +// isLCAlphaNum returns true for A-Z func isUCAlpha(r byte) bool { return r >= 'A' && r <= 'Z' } +// stripKey removes a fixed prefix from an formatted string. func stripKey(key, input string) (string, error) { if len(input) < len(key)+1 { return "", errTraceStateSyntax diff --git a/pkg/sampling/doc.go b/pkg/sampling/doc.go deleted file mode 100644 index 39a46e4a4cc7..000000000000 --- a/pkg/sampling/doc.go +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright The OpenTelemetry Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// This implements a prototype for OTEP 226. -package sampling // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling" diff --git a/pkg/sampling/oteltracestate.go b/pkg/sampling/oteltracestate.go index fcd56cf5efaa..bc7a56320119 100644 --- a/pkg/sampling/oteltracestate.go +++ b/pkg/sampling/oteltracestate.go @@ -19,14 +19,25 @@ import ( "strings" ) +// OTelTraceState parses the sampling t-value. It accumulates other, +// unrecognized fields in the baseTraceState object. type OTelTraceState struct { tvalueString string tvalueParsed Threshold baseTraceState } +// otelSyntax describes the OTel trace state entry. +var otelSyntax = anyTraceStateSyntax[OTelTraceState, otelTraceStateParser]{ + separator: ';', + equality: ':', + allowPunct: "._-+", +} + +// otelTraceStateParser parses tracestate strings like `k1:v1;k2:v2` type otelTraceStateParser struct{} +// parseField recognizes and parses t-value entries. func (wp otelTraceStateParser) parseField(instance *OTelTraceState, key, input string) error { switch { case key == "t": @@ -54,6 +65,7 @@ func (wp otelTraceStateParser) parseField(instance *OTelTraceState, key, input s return baseTraceStateParser{}.parseField(&instance.baseTraceState, key, input) } +// serialize generates the OTel tracestate encoding. Called by W3CTraceState.Serialize. func (otts *OTelTraceState) serialize() string { var sb strings.Builder @@ -61,28 +73,35 @@ func (otts *OTelTraceState) serialize() string { _, _ = sb.WriteString(otts.tvalueString) } - otelSyntax.serialize(&otts.baseTraceState, &sb) + otelSyntax.serializeBase(&otts.baseTraceState, &sb) return sb.String() } +// HasTValue indicates whether a non-empty t-value was received. func (otts *OTelTraceState) HasTValue() bool { return otts.tvalueString != "" } +// UnsetTValue clears the t-value, generally meant for use when the +// t-value is inconsistent. func (otts *OTelTraceState) UnsetTValue() { otts.tvalueString = "" otts.tvalueParsed = Threshold{} } +// TValue returns a whole encoding, including the leading "t:". func (otts *OTelTraceState) TValue() string { return otts.tvalueString } +// TValueThreshold returns the threshold used given the parsed t-value. func (otts *OTelTraceState) TValueThreshold() Threshold { return otts.tvalueParsed } +// SetTValue modifies the t-value. The user should supply the correct +// new threshold, it will not be re-calculated. func (otts *OTelTraceState) SetTValue(encoded string, threshold Threshold) { otts.tvalueString = encoded otts.tvalueParsed = threshold diff --git a/pkg/sampling/tvalue.go b/pkg/sampling/tvalue.go index ae27daa81964..1752cd59cd0e 100644 --- a/pkg/sampling/tvalue.go +++ b/pkg/sampling/tvalue.go @@ -39,16 +39,18 @@ const ( // TValueZeroEncoding is the encoding for 0 adjusted count. TValueZeroEncoding = "t:0" - TValueOneEncoding = "t:1" + + // TValueOneEncoding is the encoding for 100% sampling. + TValueOneEncoding = "t:1" ) -// Threshold is an opaque type used to compare with the least-significant 7 bytes of the TraceID. +// Threshold used to compare with the least-significant 7 bytes of the TraceID. type Threshold struct { - // limit is in the range [0, 0x1p+56]. - // - 0 represents zero probability (no TraceID values are less-than) + // unsigned is in the range [0, MaxAdjustedCount] + // - 0 represents zero probability (0 TraceID values are less-than) // - 1 represents MinSamplingProb (1 TraceID value is less-than) // - MaxAdjustedCount represents 100% sampling (all TraceID values are less-than). - limit uint64 + unsigned uint64 } var ( @@ -65,23 +67,28 @@ var ( ErrPrecisionRange = fmt.Errorf("sampling precision out of range (-1 <= valid <= 14)") ) +// probabilityInRange tests MinSamplingProb <= prob <= 1. func probabilityInRange(prob float64) bool { - return prob <= 1 && prob >= MinSamplingProb + return prob >= MinSamplingProb && prob <= 1 } +// AdjustedCountToTvalue encodes a t-value given an adjusted count. In +// this form, the encoding is a decimal integer. func AdjustedCountToTvalue(count uint64) (string, error) { switch { case count == 0: - // Special case. + return TValueZeroEncoding, nil case count < 0: return "", ErrProbabilityRange case count > uint64(MaxAdjustedCount): return "", ErrAdjustedCountRange } - return strconv.FormatInt(int64(count), 10), nil + return "t:" + strconv.FormatInt(int64(count), 10), nil } -// E.g., 3/7 w/ prec=2 -> "0x1.b7p-02" +// ProbabilityToTvalue encodes a t-value given a probability. In this +// form, the user controls floating-point format and precision. See +// strconv.FormatFloat() for an explanation of `format` and `prec`. func ProbabilityToTvalue(prob float64, format byte, prec int) (string, error) { // Probability cases switch { @@ -107,6 +114,13 @@ func ProbabilityToTvalue(prob float64, format byte, prec int) (string, error) { return "t:" + strconv.FormatFloat(prob, format, prec, 64), nil } +// TvalueToProbabilityAndAdjustedCount parses the t-value and returns +// both the probability and the adjusted count. In a Span-to-Metrics +// pipeline, users should count either the inverse of probability or +// the adjusted count. When the arriving t-value encodes adjusted +// count as opposed to probability, the adjusted count will be exactly +// the specified integer value; in these cases, probability corresponds +// with exactly implemented sampling ratio. func TvalueToProbabilityAndAdjustedCount(s string) (float64, float64, error) { if !strings.HasPrefix(s, "t:") { return 0, 0, strconv.ErrSyntax @@ -141,6 +155,8 @@ func TvalueToProbabilityAndAdjustedCount(s string) (float64, float64, error) { return number, adjusted, nil } +// ProbabilityToThreshold returns the sampling threshold exactly +// corresponding with the input probability. func ProbabilityToThreshold(prob float64) (Threshold, error) { // Note: prob == 0 is an allowed special case. Because we // use less-than, all spans are unsampled with Threshold{0}. @@ -148,19 +164,25 @@ func ProbabilityToThreshold(prob float64) (Threshold, error) { return Threshold{}, ErrProbabilityRange } return Threshold{ - limit: uint64(prob * MaxAdjustedCount), + unsigned: uint64(prob * MaxAdjustedCount), }, nil } +// ShouldSample returns true when the span passes this sampler's +// consistent sampling decision. func (t Threshold) ShouldSample(id pcommon.TraceID) bool { value := binary.BigEndian.Uint64(id[8:]) & LeastHalfTraceIDThresholdMask - return value < t.limit + return value < t.unsigned } +// Probability is the sampling ratio in the range [MinSamplingProb, 1]. func (t Threshold) Probability() float64 { - return float64(t.limit) / MaxAdjustedCount + return float64(t.unsigned) / MaxAdjustedCount } +// Unsigned is an unsigned integer that scales with the sampling +// threshold. This is useful to compare two thresholds without +// floating point conversions. func (t Threshold) Unsigned() uint64 { - return t.limit + return t.unsigned } diff --git a/pkg/sampling/w3ctracestate.go b/pkg/sampling/w3ctracestate.go index ff7eb9cbbde8..eddb2e5362fb 100644 --- a/pkg/sampling/w3ctracestate.go +++ b/pkg/sampling/w3ctracestate.go @@ -19,17 +19,46 @@ import ( "strings" ) +// W3CTraceState represents a W3C tracestate header, which is +// organized into vendor-specific sections. OpenTelemetry specifies +// a section that uses "ot" as the vendor key, where the t-value +// used for consistent sampling may be encoded. +// +// Note that we do not implement the limits specified in +// https://www.w3.org/TR/trace-context/#tracestate-limits because at +// this point in the traces pipeline, the tracestate is no longer +// being propagated. Those are propagation limits, OTel does not +// specifically restrict TraceState. +// +// TODO: Should this package's tracestate support do more to implement +// those limits? See +// https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/tracestate-handling.md, +// which indicates that OTel should use a limit of 256 bytes, while +// the W3C tracestate entry as a whole recommends a limit of 512 +// bytes. type W3CTraceState struct { otelParsed OTelTraceState baseTraceState } +// w3cSyntax describes the W3C tracestate entry. +var w3cSyntax = anyTraceStateSyntax[W3CTraceState, w3CTraceStateParser]{ + separator: ',', + equality: '=', + allowPunct: ";:._-+", +} + +// w3CTraceStateParser parses tracestate strings like `k1=v1,k2=v2` type w3CTraceStateParser struct{} +// NewW3CTraceState parses a W3C tracestate entry, especially tracking +// the OpenTelemetry entry where t-value resides for use in sampling +// decisions. func NewW3CTraceState(input string) (W3CTraceState, error) { return w3cSyntax.parse(input) } +// parseField recognizes the OpenTelemetry tracestate entry. func (wp w3CTraceStateParser) parseField(instance *W3CTraceState, key, input string) error { switch { case key == "ot": @@ -51,6 +80,8 @@ func (wp w3CTraceStateParser) parseField(instance *W3CTraceState, key, input str return baseTraceStateParser{}.parseField(&instance.baseTraceState, key, input) } +// Serialize returns a W3C tracestate encoding, as would be encoded in +// a ptrace.Span.TraceState(). func (wts *W3CTraceState) Serialize() string { var sb strings.Builder @@ -60,11 +91,13 @@ func (wts *W3CTraceState) Serialize() string { _, _ = sb.WriteString(ots) } - w3cSyntax.serialize(&wts.baseTraceState, &sb) + w3cSyntax.serializeBase(&wts.baseTraceState, &sb) return sb.String() } +// OTelValue returns a reference to this value's OpenTelemetry trace +// state entry. func (wts *W3CTraceState) OTelValue() *OTelTraceState { return &wts.otelParsed } From 7bae35cd1acb323b29da11d9d4043834693d315a Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 17 May 2023 15:45:47 -0700 Subject: [PATCH 09/38] revert two files --- exporter/fileexporter/factory.go | 6 ++--- exporter/fileexporter/file_exporter.go | 34 +++++++++----------------- 2 files changed, 14 insertions(+), 26 deletions(-) diff --git a/exporter/fileexporter/factory.go b/exporter/fileexporter/factory.go index 41eba5cb4d7d..b5f821f40fde 100644 --- a/exporter/fileexporter/factory.go +++ b/exporter/fileexporter/factory.go @@ -135,20 +135,18 @@ func createLogsExporter( } func newFileExporter(conf *Config, writer io.WriteCloser) *fileExporter { - e := &fileExporter{ + return &fileExporter{ path: conf.Path, formatType: conf.FormatType, file: writer, tracesMarshaler: tracesMarshalers[conf.FormatType], metricsMarshaler: metricsMarshalers[conf.FormatType], logsMarshaler: logsMarshalers[conf.FormatType], + exporter: buildExportFunc(conf), compression: conf.Compression, compressor: buildCompressor(conf.Compression), flushInterval: conf.FlushInterval, } - e.exporter = e.buildExportFunc(conf) - - return e } func buildFileWriter(cfg *Config) (io.WriteCloser, error) { diff --git a/exporter/fileexporter/file_exporter.go b/exporter/fileexporter/file_exporter.go index 41bccff392ae..db3533c44d09 100644 --- a/exporter/fileexporter/file_exporter.go +++ b/exporter/fileexporter/file_exporter.go @@ -65,14 +65,6 @@ type fileExporter struct { stopTicker chan struct{} } -type binaryExporter struct { - *fileExporter -} - -type lineExporter struct { - *fileExporter -} - func (e *fileExporter) consumeTraces(_ context.Context, td ptrace.Traces) error { buf, err := e.tracesMarshaler.MarshalTraces(td) if err != nil { @@ -91,7 +83,7 @@ func (e *fileExporter) consumeMetrics(_ context.Context, md pmetric.Metrics) err return e.exporter(e, buf) } -func (e fileExporter) consumeLogs(_ context.Context, ld plog.Logs) error { +func (e *fileExporter) consumeLogs(_ context.Context, ld plog.Logs) error { buf, err := e.logsMarshaler.MarshalLogs(ld) if err != nil { return err @@ -100,22 +92,20 @@ func (e fileExporter) consumeLogs(_ context.Context, ld plog.Logs) error { return e.exporter(e, buf) } -func (e lineExporter) Write(buf []byte) (int, error) { +func exportMessageAsLine(e *fileExporter, buf []byte) error { // Ensure only one write operation happens at a time. e.mutex.Lock() defer e.mutex.Unlock() - n1, err := e.file.Write(buf) - if err != nil { + if _, err := e.file.Write(buf); err != nil { return err } - n2, err := io.WriteString(e.file, "\n") - if err != nil { + if _, err := io.WriteString(e.file, "\n"); err != nil { return err } - return n1 + n2, nil + return nil } -func (e *binaryExporter) Write(buf []byte) (int, error) { +func exportMessageAsBuffer(e *fileExporter, buf []byte) error { // Ensure only one write operation happens at a time. e.mutex.Lock() defer e.mutex.Unlock() @@ -125,10 +115,10 @@ func (e *binaryExporter) Write(buf []byte) (int, error) { binary.BigEndian.PutUint32(data, uint32(len(buf))) data = append(data, buf...) if err := binary.Write(e.file, binary.BigEndian, data); err != nil { - return -1, err + return err } - return len(data), nil + return nil } // startFlusher starts the flusher. @@ -182,13 +172,13 @@ func (e *fileExporter) Shutdown(context.Context) error { return e.file.Close() } -func (e *fileExporter) buildExportFunc(cfg *Config) (io.Writer, error) { +func buildExportFunc(cfg *Config) func(e *fileExporter, buf []byte) error { if cfg.FormatType == formatTypeProto { - return binaryExporter{e} + return exportMessageAsBuffer } // if the data format is JSON and needs to be compressed, telemetry data can't be written to file in JSON format. if cfg.FormatType == formatTypeJSON && cfg.Compression != "" { - return binaryExporter{fileExporter: e}, nil + return exportMessageAsBuffer } - return lineExporter{fileExporter: e}, nil + return exportMessageAsLine } From 9010a679b4dadd2e63fd690331cc1f6014712ef3 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Thu, 1 Jun 2023 14:34:14 -0700 Subject: [PATCH 10/38] Update with r, s, and t-value. Now using regexps and strings.IndexByte() to iterate. --- pkg/sampling/anytracestate.go | 166 --------------------- pkg/sampling/common.go | 100 +++++++++++++ pkg/sampling/go.mod | 3 +- pkg/sampling/go.sum | 77 +--------- pkg/sampling/oteltracestate.go | 216 ++++++++++++++++++---------- pkg/sampling/oteltracestate_test.go | 185 +++++++++++++++--------- pkg/sampling/tvalue.go | 45 +++--- pkg/sampling/tvalue_test.go | 50 +++---- pkg/sampling/w3ctracestate.go | 192 ++++++++++++++----------- pkg/sampling/w3ctracestate_test.go | 56 ++++++-- 10 files changed, 557 insertions(+), 533 deletions(-) delete mode 100644 pkg/sampling/anytracestate.go create mode 100644 pkg/sampling/common.go diff --git a/pkg/sampling/anytracestate.go b/pkg/sampling/anytracestate.go deleted file mode 100644 index 458868892832..000000000000 --- a/pkg/sampling/anytracestate.go +++ /dev/null @@ -1,166 +0,0 @@ -// Copyright The OpenTelemetry Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package sampling // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling" - -import ( - "fmt" - "strconv" - "strings" -) - -// errTraceStateSyntax is returned for a variety of syntax errors. -var errTraceStateSyntax = fmt.Errorf("otel tracestate: %w", strconv.ErrSyntax) - -// anyTraceStateParser describes how to instance types recognize -// specific fields. -type anyTraceStateParser[Instance any] interface { - parseField(instance *Instance, key, input string) error -} - -// anyTraceStateSyntax describes a variable key/value syntax. -type anyTraceStateSyntax[Instance any, Parser anyTraceStateParser[Instance]] struct { - separator byte - equality byte - allowPunct string -} - -// serializeBase adds the base fields to the output. -func (syntax anyTraceStateSyntax[Instance, Parser]) serializeBase(base *baseTraceState, sb *strings.Builder) { - for _, field := range base.fields { - syntax.separate(sb) - _, _ = sb.WriteString(field) - } -} - -// separate adds a separator to the output. -func (syntax anyTraceStateSyntax[Instance, Parser]) separate(sb *strings.Builder) { - if sb.Len() != 0 { - _ = sb.WriteByte(syntax.separator) - } -} - -// parse uses variable syntax to parse the input string into key/value fields. -func (syntax anyTraceStateSyntax[Instance, Parser]) parse(input string) (Instance, error) { - var parser Parser - var invalid Instance - var instance Instance - - if len(input) == 0 { - return invalid, nil - } - - for len(input) > 0 { - eqPos := 0 - for ; eqPos < len(input); eqPos++ { - if eqPos == 0 { - if isLCAlpha(input[eqPos]) { - continue - } - } else if isLCAlphaNum(input[eqPos]) { - continue - } - break - } - if eqPos == 0 || eqPos == len(input) || input[eqPos] != syntax.equality { - return invalid, errTraceStateSyntax - } - - key := input[0:eqPos] - tail := input[eqPos+1:] - - sepPos := 0 - - for ; sepPos < len(tail); sepPos++ { - if syntax.isValueByte(tail[sepPos]) { - continue - } - break - } - - if err := parser.parseField(&instance, key, input[0:sepPos+eqPos+1]); err != nil { - return invalid, err - } - - if sepPos < len(tail) && tail[sepPos] != syntax.separator { - return invalid, errTraceStateSyntax - } - - if sepPos == len(tail) { - break - } - - input = tail[sepPos+1:] - - // test for a trailing ; - if input == "" { - return invalid, errTraceStateSyntax - } - } - return instance, nil -} - -// isValueByte determines whether the byte is valid as part of a -// tracestate value. This is based on the syntax, since the W3C syntax -// allows the OTel separator and equality symbol to appear in values. -func (syntax anyTraceStateSyntax[Instance, Parser]) isValueByte(r byte) bool { - if isLCAlphaNum(r) { - return true - } - if isUCAlpha(r) { - return true - } - return strings.ContainsRune(syntax.allowPunct, rune(r)) -} - -// baseTraceState encodes not-specified fields as a list. They will -// be re-encoded when serialized. -type baseTraceState struct { - fields []string -} - -// baseTraceStateParser parses not-specified fields into a list. -type baseTraceStateParser struct{} - -// parseField adds to the list of not-specified fields. -func (bp baseTraceStateParser) parseField(instance *baseTraceState, _, input string) error { - instance.fields = append(instance.fields, input) - return nil -} - -// isLCAlphaNum returns true for a-z, 0-9 -func isLCAlphaNum(r byte) bool { - if isLCAlpha(r) { - return true - } - return r >= '0' && r <= '9' -} - -// isLCAlphaNum returns true for a-z -func isLCAlpha(r byte) bool { - return r >= 'a' && r <= 'z' -} - -// isLCAlphaNum returns true for A-Z -func isUCAlpha(r byte) bool { - return r >= 'A' && r <= 'Z' -} - -// stripKey removes a fixed prefix from an formatted string. -func stripKey(key, input string) (string, error) { - if len(input) < len(key)+1 { - return "", errTraceStateSyntax - } - return input[len(key)+1:], nil -} diff --git a/pkg/sampling/common.go b/pkg/sampling/common.go new file mode 100644 index 000000000000..a40d0f72dfa4 --- /dev/null +++ b/pkg/sampling/common.go @@ -0,0 +1,100 @@ +package sampling + +import ( + "errors" + "strings" + + "go.uber.org/multierr" +) + +type KV struct { + Key string + Value string +} + +var ( + ErrTraceStateSize = errors.New("invalid tracestate size") + ErrTraceStateCount = errors.New("invalid tracestate item count") +) + +// keyValueScanner defines distinct scanner behaviors for lists of +// key-values. +type keyValueScanner struct { + // maxItems is 32 or -1 + maxItems int + // trim is set if OWS (optional whitespace) should be removed + trim bool + // separator is , or ; + separator byte + // equality is = or : + equality byte +} + +type commonTraceState struct { + kvs []KV +} + +func (cts commonTraceState) HasExtraValues() bool { + return len(cts.kvs) != 0 +} + +func (cts commonTraceState) ExtraValues() []KV { + return cts.kvs +} + +// trimOws removes optional whitespace on both ends of a string. +func trimOws(input string) string { + // Hard-codes the value of owsCharset + for len(input) > 0 && input[0] == ' ' || input[0] == '\t' { + input = input[1:] + } + for len(input) > 0 && input[len(input)-1] == ' ' || input[len(input)-1] == '\t' { + input = input[:len(input)-1] + } + return input +} + +func (s keyValueScanner) scanKeyValues(input string, f func(key, value string) error) error { + var rval error + items := 0 + for input != "" { + items++ + if s.maxItems > 0 && items >= s.maxItems { + // W3C specifies max 32 entries, tested here + // instead of via the regexp. + return ErrTraceStateCount + } + + sep := strings.IndexByte(input, s.separator) + + var member string + if sep < 0 { + member = input + input = "" + } else { + member = input[:sep] + input = input[sep+1:] + } + + if s.trim { + // Trim only required for W3C; OTel does not + // specify whitespace for its value encoding. + member = trimOws(member) + } + + if member == "" { + // W3C allows empty list members. + continue + } + + eq := strings.IndexByte(member, s.equality) + if eq < 0 { + // A regexp should have rejected this input. + continue + } + if err := f(member[:eq], member[eq+1:]); err != nil { + rval = multierr.Append(rval, err) + } + } + return rval +} diff --git a/pkg/sampling/go.mod b/pkg/sampling/go.mod index e3e9f6c112a3..feac2ad0590a 100644 --- a/pkg/sampling/go.mod +++ b/pkg/sampling/go.mod @@ -5,6 +5,7 @@ go 1.20 require ( github.com/stretchr/testify v1.8.2 go.opentelemetry.io/collector/pdata v1.0.0-rcv0011 + go.uber.org/multierr v1.11.0 ) require ( @@ -12,8 +13,6 @@ require ( github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/protobuf v1.5.2 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - go.uber.org/atomic v1.7.0 // indirect - go.uber.org/multierr v1.11.0 // indirect golang.org/x/net v0.9.0 // indirect golang.org/x/sys v0.7.0 // indirect golang.org/x/text v0.9.0 // indirect diff --git a/pkg/sampling/go.sum b/pkg/sampling/go.sum index dfcf22cd4962..5a83b0329a63 100644 --- a/pkg/sampling/go.sum +++ b/pkg/sampling/go.sum @@ -1,146 +1,71 @@ -cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= -github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= -github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= -github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= -github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= -github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= -github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= -github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= -github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= -github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= -github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= -github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw= github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= -github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= -github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= -github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= -github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ= +github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= -github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.2 h1:+h33VjcLVPDHtOdpUCuF+7gSuG3yGIftsP1YvFihtJ8= github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -go.opentelemetry.io/collector/pdata v0.66.0 h1:UdE5U6MsDNzuiWaXdjGx2lC3ElVqWmN/hiUE8vyvSuM= -go.opentelemetry.io/collector/pdata v0.66.0/go.mod h1:pqyaznLzk21m+1KL6fwOsRryRELL+zNM0qiVSn0MbVc= go.opentelemetry.io/collector/pdata v1.0.0-rcv0011 h1:7lT0vseP89mHtUpvgmWYRvQZ0eY+SHbVsnXY20xkoMg= go.opentelemetry.io/collector/pdata v1.0.0-rcv0011/go.mod h1:9vrXSQBeMRrdfGt9oMgYweqERJ8adaiQjN6LSbqRMMA= -go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw= -go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= -go.uber.org/multierr v1.8.0 h1:dg6GjLku4EH+249NNmoIciG9N/jURbDG+pFlTkhzIC8= -go.uber.org/multierr v1.8.0/go.mod h1:7EAYxJLBy9rStEaz58O2t4Uvip6FSURkq8/ppBp95ak= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= -golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= -golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.0.0-20220722155237-a158d28d115b h1:PxfKdU9lEEDYjdIzOtC4qFWgkU2rGHdKlKowJSMN9h0= -golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.9.0 h1:aWJ/m6xSmxWBx+V0XRHTlrYrPG56jKsLdTFmsSsCzOM= golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns= -golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= -golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.2.0 h1:ljd4t30dBnAvMZaQCevtY0xLLD0A+bRZXbgLMLU1F/A= -golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU= golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.4.0 h1:BrVqGRd7+k1DiOgtnFvAkoQEWQvBc25ouMJM6429SFg= -golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= -golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= -google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= -google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= -google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= -google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013 h1:+kGHl1aib/qcwaRi1CbqBZ1rk19r85MNUf8HaBghugY= -google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= google.golang.org/genproto v0.0.0-20230110181048-76db0878b65f h1:BWUVssLB0HVOSY78gIdvk1dTVYtT1y8SBWtPYuTJ/6w= google.golang.org/genproto v0.0.0-20230110181048-76db0878b65f/go.mod h1:RGgjbofJ8xD9Sq1VVhDM1Vok1vRONV+rg+CjzG4SZKM= -google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= -google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= -google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= -google.golang.org/grpc v1.51.0 h1:E1eGv1FTqoLIdnBCZufiSHgKjlqG6fKFf6pPWtMTh8U= -google.golang.org/grpc v1.51.0/go.mod h1:wgNDFcnuBGmxLKI/qn4T+m5BtEBYXJPvibbUPsAIPww= google.golang.org/grpc v1.54.0 h1:EhTqbhiYeixwWQtAEZAxmV9MGqcjEU2mFx52xCzNyag= google.golang.org/grpc v1.54.0/go.mod h1:PUSEXI6iWghWaB6lXM4knEgpJNu2qUcKfDtNci3EC2g= -google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= -google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= -google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= -google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= -google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= -google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.28.1 h1:d0NfwRgPtno5B1Wa6L2DAG+KivqkdutMf1UhdNx175w= -google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng= google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= diff --git a/pkg/sampling/oteltracestate.go b/pkg/sampling/oteltracestate.go index bc7a56320119..2e54e89be1e2 100644 --- a/pkg/sampling/oteltracestate.go +++ b/pkg/sampling/oteltracestate.go @@ -1,108 +1,172 @@ -// Copyright The OpenTelemetry Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package sampling // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling" +package sampling import ( "fmt" - "strings" + "io" + "regexp" + "strconv" ) -// OTelTraceState parses the sampling t-value. It accumulates other, -// unrecognized fields in the baseTraceState object. type OTelTraceState struct { - tvalueString string - tvalueParsed Threshold - baseTraceState + commonTraceState + + // sampling r, s, and t-values + ru uint64 // r value parsed, as unsigned + r string // 14 ASCII hex digits + sf float64 // s value parsed, as a probability + s string // original float syntax preserved + tf float64 // t value parsed, as a probability + t string // original float syntax preserved } -// otelSyntax describes the OTel trace state entry. -var otelSyntax = anyTraceStateSyntax[OTelTraceState, otelTraceStateParser]{ - separator: ';', - equality: ':', - allowPunct: "._-+", -} +const ( + // hardMaxOTelLength is the maximum encoded size of an OTel + // tracestate value. + hardMaxOTelLength = 256 + + // chr = ucalpha / lcalpha / DIGIT / "." / "_" / "-" + // ucalpha = %x41-5A ; A-Z + // lcalpha = %x61-7A ; a-z + // key = lcalpha *(lcalpha / DIGIT ) + // value = *(chr) + // list-member = key ":" value + // list = list-member *( ";" list-member ) + otelKeyRegexp = lcAlphaRegexp + lcDigitRegexp + `*` + otelValueRegexp = `[a-zA-Z0-9._\-]*` + otelMemberRegexp = `(?:` + otelKeyRegexp + `:` + otelValueRegexp + `)` + otelSemicolonMemberRegexp = `(?:` + `;` + otelMemberRegexp + `)` + otelTracestateRegexp = `^` + otelMemberRegexp + otelSemicolonMemberRegexp + `*$` +) -// otelTraceStateParser parses tracestate strings like `k1:v1;k2:v2` -type otelTraceStateParser struct{} +var ( + otelTracestateRe = regexp.MustCompile(otelTracestateRegexp) -// parseField recognizes and parses t-value entries. -func (wp otelTraceStateParser) parseField(instance *OTelTraceState, key, input string) error { - switch { - case key == "t": - value, err := stripKey(key, input) - if err != nil { - return err - } + ErrRandomValueRange = fmt.Errorf("r-value out of range") - prob, _, err := TvalueToProbabilityAndAdjustedCount(value) - if err != nil { - return fmt.Errorf("otel tracestate t-value: %w", err) - } + otelSyntax = keyValueScanner{ + maxItems: -1, + trim: false, + separator: ';', + equality: ':', + } +) - th, err := ProbabilityToThreshold(prob) - if err != nil { - return fmt.Errorf("otel tracestate t-value: %w", err) +func NewOTelTraceState(input string) (otts OTelTraceState, _ error) { + if len(input) > hardMaxOTelLength { + return otts, ErrTraceStateSize + } + + if !otelTracestateRe.MatchString(input) { + return OTelTraceState{}, strconv.ErrSyntax + } + + err := otelSyntax.scanKeyValues(input, func(key, value string) error { + var err error + switch key { + case "r": + var unsigned uint64 + unsigned, err = strconv.ParseUint(value, 16, 64) + if err == nil { + if unsigned >= 0x1p56 { + err = ErrRandomValueRange + } else { + otts.r = value + otts.ru = unsigned + } + } + case "s": + var prob float64 + prob, _, err = EncodedToProbabilityAndAdjustedCount(value) + if err == nil { + otts.s = value + otts.sf = prob + } + case "t": + var prob float64 + prob, _, err = EncodedToProbabilityAndAdjustedCount(value) + if err == nil { + otts.t = value + otts.tf = prob + } + default: + otts.kvs = append(otts.kvs, KV{ + Key: key, + Value: value, + }) } + return err + }) - instance.tvalueString = input - instance.tvalueParsed = th + return otts, err +} - return nil - } +func (otts OTelTraceState) HasRValue() bool { + return otts.r != "" +} - return baseTraceStateParser{}.parseField(&instance.baseTraceState, key, input) +func (otts OTelTraceState) RValue() string { + return otts.r } -// serialize generates the OTel tracestate encoding. Called by W3CTraceState.Serialize. -func (otts *OTelTraceState) serialize() string { - var sb strings.Builder +func (otts OTelTraceState) RValueUnsigned() uint64 { + return otts.ru +} - if otts.TValue() != "" { - _, _ = sb.WriteString(otts.tvalueString) - } +func (otts OTelTraceState) HasSValue() bool { + return otts.s != "" +} - otelSyntax.serializeBase(&otts.baseTraceState, &sb) +func (otts OTelTraceState) SValue() string { + return otts.s +} - return sb.String() +func (otts OTelTraceState) SValueProbability() float64 { + return otts.sf } -// HasTValue indicates whether a non-empty t-value was received. -func (otts *OTelTraceState) HasTValue() bool { - return otts.tvalueString != "" +func (otts OTelTraceState) HasTValue() bool { + return otts.t != "" } -// UnsetTValue clears the t-value, generally meant for use when the -// t-value is inconsistent. -func (otts *OTelTraceState) UnsetTValue() { - otts.tvalueString = "" - otts.tvalueParsed = Threshold{} +func (otts OTelTraceState) TValue() string { + return otts.t } -// TValue returns a whole encoding, including the leading "t:". -func (otts *OTelTraceState) TValue() string { - return otts.tvalueString +func (otts OTelTraceState) TValueProbability() float64 { + return otts.tf } -// TValueThreshold returns the threshold used given the parsed t-value. -func (otts *OTelTraceState) TValueThreshold() Threshold { - return otts.tvalueParsed +func (otts OTelTraceState) HasAnyValue() bool { + return otts.HasRValue() || otts.HasSValue() || otts.HasTValue() || otts.HasExtraValues() } -// SetTValue modifies the t-value. The user should supply the correct -// new threshold, it will not be re-calculated. -func (otts *OTelTraceState) SetTValue(encoded string, threshold Threshold) { - otts.tvalueString = encoded - otts.tvalueParsed = threshold +func (otts OTelTraceState) Serialize(w io.StringWriter) { + cnt := 0 + sep := func() { + if cnt != 0 { + w.WriteString(";") + } + cnt++ + } + if otts.HasRValue() { + sep() + w.WriteString("r:") + w.WriteString(otts.RValue()) + } + if otts.HasSValue() { + sep() + w.WriteString("s:") + w.WriteString(otts.SValue()) + } + if otts.HasTValue() { + sep() + w.WriteString("t:") + w.WriteString(otts.TValue()) + } + for _, kv := range otts.ExtraValues() { + sep() + w.WriteString(kv.Key) + w.WriteString(":") + w.WriteString(kv.Value) + } } diff --git a/pkg/sampling/oteltracestate_test.go b/pkg/sampling/oteltracestate_test.go index 0d76cf442d89..017f48b0d2ec 100644 --- a/pkg/sampling/oteltracestate_test.go +++ b/pkg/sampling/oteltracestate_test.go @@ -31,121 +31,170 @@ func testName(in string) string { return x } -func TestNewTraceState(t *testing.T) { - otts := otelTraceState{} - require.False(t, otts.hasTValue()) - require.Equal(t, "", otts.serialize()) +func TestEmptyOTelTraceState(t *testing.T) { + // Empty value is invalid + _, err := NewOTelTraceState("") + require.Error(t, err) } -func TestTraceStatePRValueSerialize(t *testing.T) { - otts := otelTraceState{} - otts.tvalueString = "t:3" - otts.fields = []string{"a:b", "c:d"} - require.True(t, otts.hasTValue()) - require.Equal(t, "t:3;a:b;c:d", otts.serialize()) -} +func TestOTelTraceStateTValueSerialize(t *testing.T) { + const orig = "r:1;s:2;t:3;a:b;c:d" + otts, err := NewOTelTraceState(orig) + require.NoError(t, err) + require.True(t, otts.HasTValue()) + require.Equal(t, "3", otts.TValue()) + + require.True(t, otts.HasSValue()) + require.Equal(t, "2", otts.SValue()) -func TestTraceStateSerializeOverflow(t *testing.T) { - long := "x:" + strings.Repeat(".", 254) - otts := otelTraceState{} - otts.fields = []string{long} - // this drops the extra key, sorry! - require.Equal(t, long, otts.serialize()) - otts.tvalueString = "t:1" - require.Equal(t, "t:1", otts.serialize()) + require.True(t, otts.HasRValue()) + require.Equal(t, "1", otts.RValue()) + + require.True(t, otts.HasAnyValue()) + var w strings.Builder + otts.Serialize(&w) + require.Equal(t, orig, w.String()) } func TestParseOTelTraceState(t *testing.T) { type testCase struct { in string + rval string + sval string tval string extra []string expectErr error } - const notset = "" + const ns = "" for _, test := range []testCase{ - // correct cases - {"", notset, nil, nil}, - {"t:2", "2", nil, nil}, - {"t:1", "1", nil, nil}, - {"t:1", "1", nil, nil}, - {"t:10", "10", nil, nil}, - {"t:33", "33", nil, nil}, - {"t:61", "61", nil, nil}, - {"t:72057594037927936", "72057594037927936", nil, nil}, // max t-value = 0x1p+56 - {"t:0x1p-56", "0x1p-56", nil, nil}, // min t-value + // t-value correct cases + {"t:2", ns, ns, "2", nil, nil}, + {"t:1", ns, ns, "1", nil, nil}, + {"t:1", ns, ns, "1", nil, nil}, + {"t:10", ns, ns, "10", nil, nil}, + {"t:33", ns, ns, "33", nil, nil}, + {"t:61", ns, ns, "61", nil, nil}, + {"t:72057594037927936", ns, ns, "72057594037927936", nil, nil}, // max t-value = 0x1p+56 + {"t:0x1p-56", ns, ns, "0x1p-56", nil, nil}, // min t-value // syntax errors - {"t:1;", notset, nil, strconv.ErrSyntax}, - {"t:1=p:2", notset, nil, strconv.ErrSyntax}, - {"t:1;p:2=s:3", notset, nil, strconv.ErrSyntax}, - {":1;p:2=s:3", notset, nil, strconv.ErrSyntax}, - {":;p:2=s:3", notset, nil, strconv.ErrSyntax}, - {":;:", notset, nil, strconv.ErrSyntax}, - {":", notset, nil, strconv.ErrSyntax}, - {"t:;p=1", notset, nil, strconv.ErrSyntax}, - {"t:$", notset, nil, strconv.ErrSyntax}, // not-hexadecimal + {"", ns, ns, ns, nil, strconv.ErrSyntax}, + {"t:1;", ns, ns, ns, nil, strconv.ErrSyntax}, + {"t:1=p:2", ns, ns, ns, nil, strconv.ErrSyntax}, + {"t:1;p:2=s:3", ns, ns, ns, nil, strconv.ErrSyntax}, + {":1;p:2=s:3", ns, ns, ns, nil, strconv.ErrSyntax}, + {":;p:2=s:3", ns, ns, ns, nil, strconv.ErrSyntax}, + {":;:", ns, ns, ns, nil, strconv.ErrSyntax}, + {":", ns, ns, ns, nil, strconv.ErrSyntax}, + {"t:;p=1", ns, ns, ns, nil, strconv.ErrSyntax}, + {"t:$", ns, ns, ns, nil, strconv.ErrSyntax}, // not-hexadecimal + {"t:0x1p+3", ns, ns, ns, nil, strconv.ErrSyntax}, // + is invalid // range errors - {"t:0x1p+57", notset, nil, ErrAdjustedCountOnlyInteger}, // integer syntax - {"t:72057594037927937", notset, nil, ErrAdjustedCountRange}, // out-of-range - {"t:-1", notset, nil, ErrProbabilityRange}, // non-negative + {"t:14.5", ns, ns, ns, nil, ErrAdjustedCountOnlyInteger}, // integer syntax + {"t:72057594037927937", ns, ns, ns, nil, ErrAdjustedCountRange}, // out-of-range + {"t:-1", ns, ns, ns, nil, ErrProbabilityRange}, // non-negative // one field - {"e100:1", notset, []string{"e100:1"}, nil}, + {"e100:1", ns, ns, ns, []string{"e100:1"}, nil}, // two fields - {"e1:1;e2:2", notset, []string{"e1:1", "e2:2"}, nil}, - {"e1:1;e2:2", notset, []string{"e1:1", "e2:2"}, nil}, + {"e1:1;e2:2", ns, ns, ns, []string{"e1:1", "e2:2"}, nil}, + {"e1:1;e2:2", ns, ns, ns, []string{"e1:1", "e2:2"}, nil}, // one extra key, two ways - {"t:2;extra:stuff", "2", []string{"extra:stuff"}, nil}, - {"extra:stuff;t:2", "2", []string{"extra:stuff"}, nil}, + {"t:2;extra:stuff", ns, ns, "2", []string{"extra:stuff"}, nil}, + {"extra:stuff;t:2", ns, ns, "2", []string{"extra:stuff"}, nil}, // two extra fields - {"e100:100;t:1;e101:101", "1", []string{"e100:100", "e101:101"}, nil}, - {"t:1;e100:100;e101:101", "1", []string{"e100:100", "e101:101"}, nil}, - {"e100:100;e101:101;t:1", "1", []string{"e100:100", "e101:101"}, nil}, + {"e100:100;t:1;e101:101", ns, ns, "1", []string{"e100:100", "e101:101"}, nil}, + {"t:1;e100:100;e101:101", ns, ns, "1", []string{"e100:100", "e101:101"}, nil}, + {"e100:100;e101:101;t:1", ns, ns, "1", []string{"e100:100", "e101:101"}, nil}, // parse error prevents capturing unrecognized keys - {"1:1;u:V", notset, nil, strconv.ErrSyntax}, - {"X:1;u:V", notset, nil, strconv.ErrSyntax}, - {"x:1;u:V", notset, []string{"x:1", "u:V"}, nil}, + {"1:1;u:V", ns, ns, ns, nil, strconv.ErrSyntax}, + {"X:1;u:V", ns, ns, ns, nil, strconv.ErrSyntax}, + {"x:1;u:V", ns, ns, ns, []string{"x:1", "u:V"}, nil}, + + // s-value + {"s:2;extra:stuff", ns, "2", ns, []string{"extra:stuff"}, nil}, + {"extra:stuff;s:2", ns, "2", ns, []string{"extra:stuff"}, nil}, + + // s-value range error + {"s:0x1p-58", ns, ns, ns, nil, ErrProbabilityRange}, + {"s:-1", ns, ns, ns, nil, ErrProbabilityRange}, + + // r-value + {"r:2;extra:stuff", "2", ns, ns, []string{"extra:stuff"}, nil}, + {"extra:stuff;r:2", "2", ns, ns, []string{"extra:stuff"}, nil}, + {"r:ffffffffffffff", "ffffffffffffff", ns, ns, nil, nil}, + {"r:8888", "8888", ns, ns, nil, nil}, + {"r:0", "0", ns, ns, nil, nil}, + + // r-value range error (15 bytes of hex or more) + {"r:100000000000000", ns, ns, ns, nil, ErrRandomValueRange}, + {"r:fffffffffffffffff", ns, ns, ns, nil, strconv.ErrRange}, // no trailing ; - {"x:1;", notset, nil, strconv.ErrSyntax}, + {"x:1;", ns, ns, ns, nil, strconv.ErrSyntax}, // empty key - {"x:", notset, []string{"x:"}, nil}, + {"x:", ns, ns, ns, []string{"x:"}, nil}, // charset test - {"x:0X1FFF;y:.-_-.;z:", notset, []string{"x:0X1FFF", "y:.-_-.", "z:"}, nil}, - {"x1y2z3:1-2-3;y1:y_1;xy:-;t:50", "50", []string{"x1y2z3:1-2-3", "y1:y_1", "xy:-"}, nil}, + {"x:0X1FFF;y:.-_-.;z:", ns, ns, ns, []string{"x:0X1FFF", "y:.-_-.", "z:"}, nil}, + {"x1y2z3:1-2-3;y1:y_1;xy:-;t:50", ns, ns, "50", []string{"x1y2z3:1-2-3", "y1:y_1", "xy:-"}, nil}, // size exceeded - {"x:" + strings.Repeat("_", 255), notset, nil, strconv.ErrSyntax}, - {"x:" + strings.Repeat("_", 254), notset, []string{"x:" + strings.Repeat("_", 254)}, nil}, + {"x:" + strings.Repeat("_", 255), ns, ns, ns, nil, ErrTraceStateSize}, + {"x:" + strings.Repeat("_", 254), ns, ns, ns, []string{"x:" + strings.Repeat("_", 254)}, nil}, } { t.Run(testName(test.in), func(t *testing.T) { - otts, err := otelSyntax.parse(test.in) + otts, err := NewOTelTraceState(test.in) if test.expectErr != nil { require.True(t, errors.Is(err, test.expectErr), "%q: not expecting %v wanted %v", test.in, err, test.expectErr) } else { require.NoError(t, err) } - if test.tval != notset { - require.True(t, otts.hasTValue()) - require.Equal(t, "t:"+test.tval, otts.tvalueString) + if test.rval != ns { + require.True(t, otts.HasRValue()) + require.Equal(t, test.rval, otts.RValue()) } else { - require.False(t, otts.hasTValue(), "should have no t-value: %s", otts.tvalueString) + require.False(t, otts.HasRValue(), "should have no r-value: %s", otts.RValue()) } - require.EqualValues(t, test.extra, otts.fields) + if test.sval != ns { + require.True(t, otts.HasSValue()) + require.Equal(t, test.sval, otts.SValue()) + } else { + require.False(t, otts.HasSValue(), "should have no s-value: %s", otts.SValue()) + } + if test.tval != ns { + require.True(t, otts.HasTValue()) + require.Equal(t, test.tval, otts.TValue()) + } else { + require.False(t, otts.HasTValue(), "should have no t-value: %s", otts.TValue()) + } + var expect []KV + for _, ex := range test.extra { + k, v, _ := strings.Cut(ex, ":") + expect = append(expect, KV{ + Key: k, + Value: v, + }) + } + require.Equal(t, expect, otts.ExtraValues()) - // on success w/o t-value, serialize() should not modify - if !otts.hasTValue() && test.expectErr == nil { - require.Equal(t, test.in, otts.serialize()) + if test.expectErr != nil { + return } + // on success Serialize() should not modify + // test by re-parsing + var w strings.Builder + otts.Serialize(&w) + cpy, err := NewOTelTraceState(w.String()) + require.NoError(t, err) + require.Equal(t, otts, cpy) }) } } diff --git a/pkg/sampling/tvalue.go b/pkg/sampling/tvalue.go index 1752cd59cd0e..75106445b213 100644 --- a/pkg/sampling/tvalue.go +++ b/pkg/sampling/tvalue.go @@ -18,7 +18,6 @@ import ( "encoding/binary" "fmt" "strconv" - "strings" "go.opentelemetry.io/collector/pdata/pcommon" ) @@ -37,11 +36,11 @@ const ( // the unsigned value of bytes 9 through 15. LeastHalfTraceIDThresholdMask = MaxAdjustedCount - 1 - // TValueZeroEncoding is the encoding for 0 adjusted count. - TValueZeroEncoding = "t:0" + // ProbabilityZeroEncoding is the encoding for 0 adjusted count. + ProbabilityZeroEncoding = "0" - // TValueOneEncoding is the encoding for 100% sampling. - TValueOneEncoding = "t:1" + // ProbabilityOneEncoding is the encoding for 100% sampling. + ProbabilityOneEncoding = "1" ) // Threshold used to compare with the least-significant 7 bytes of the TraceID. @@ -72,30 +71,31 @@ func probabilityInRange(prob float64) bool { return prob >= MinSamplingProb && prob <= 1 } -// AdjustedCountToTvalue encodes a t-value given an adjusted count. In -// this form, the encoding is a decimal integer. -func AdjustedCountToTvalue(count uint64) (string, error) { +// AdjustedCountToEncoded encodes a s-value or t-value given an +// adjusted count. In this form, the encoding is a decimal integer. +func AdjustedCountToEncoded(count uint64) (string, error) { switch { case count == 0: - return TValueZeroEncoding, nil + return ProbabilityZeroEncoding, nil case count < 0: return "", ErrProbabilityRange case count > uint64(MaxAdjustedCount): return "", ErrAdjustedCountRange } - return "t:" + strconv.FormatInt(int64(count), 10), nil + return strconv.FormatInt(int64(count), 10), nil } -// ProbabilityToTvalue encodes a t-value given a probability. In this -// form, the user controls floating-point format and precision. See -// strconv.FormatFloat() for an explanation of `format` and `prec`. -func ProbabilityToTvalue(prob float64, format byte, prec int) (string, error) { +// ProbabilityToEncoded encodes a s-value or t-value given a +// probability. In this form, the user controls floating-point format +// and precision. See strconv.FormatFloat() for an explanation of +// `format` and `prec`. +func ProbabilityToEncoded(prob float64, format byte, prec int) (string, error) { // Probability cases switch { case prob == 1: - return TValueOneEncoding, nil + return ProbabilityOneEncoding, nil case prob == 0: - return TValueZeroEncoding, nil + return ProbabilityZeroEncoding, nil case !probabilityInRange(prob): return "", ErrProbabilityRange } @@ -111,21 +111,18 @@ func ProbabilityToTvalue(prob float64, format byte, prec int) (string, error) { return "", ErrPrecisionRange } - return "t:" + strconv.FormatFloat(prob, format, prec, 64), nil + return strconv.FormatFloat(prob, format, prec, 64), nil } -// TvalueToProbabilityAndAdjustedCount parses the t-value and returns +// EncodedToProbabilityAndAdjustedCount parses the t-value and returns // both the probability and the adjusted count. In a Span-to-Metrics // pipeline, users should count either the inverse of probability or // the adjusted count. When the arriving t-value encodes adjusted // count as opposed to probability, the adjusted count will be exactly // the specified integer value; in these cases, probability corresponds // with exactly implemented sampling ratio. -func TvalueToProbabilityAndAdjustedCount(s string) (float64, float64, error) { - if !strings.HasPrefix(s, "t:") { - return 0, 0, strconv.ErrSyntax - } - number, err := strconv.ParseFloat(s[2:], 64) // e.g., "0x1.b7p-02" -> approx 3/7 +func EncodedToProbabilityAndAdjustedCount(s string) (float64, float64, error) { + number, err := strconv.ParseFloat(s, 64) // e.g., "0x1.b7p-02" -> approx 3/7 if err != nil { return 0, 0, err } @@ -139,7 +136,7 @@ func TvalueToProbabilityAndAdjustedCount(s string) (float64, float64, error) { case number > 1: // Greater than 1 indicates adjusted count; re-parse // as a decimal integer. - integer, err := strconv.ParseInt(s[2:], 10, 64) + integer, err := strconv.ParseInt(s, 10, 64) if err != nil { return 0, 0, ErrAdjustedCountOnlyInteger } diff --git a/pkg/sampling/tvalue_test.go b/pkg/sampling/tvalue_test.go index c1b0e5e9d4c6..afbbe84650ac 100644 --- a/pkg/sampling/tvalue_test.go +++ b/pkg/sampling/tvalue_test.go @@ -41,54 +41,54 @@ func mustNot[T any](t T, err error) error { } func TestValidAdjustedCountToTvalue(t *testing.T) { - require.Equal(t, "0", must(AdjustedCountToTvalue(0))) - require.Equal(t, "1", must(AdjustedCountToTvalue(1))) - require.Equal(t, "2", must(AdjustedCountToTvalue(2))) + require.Equal(t, "0", must(AdjustedCountToEncoded(0))) + require.Equal(t, "1", must(AdjustedCountToEncoded(1))) + require.Equal(t, "2", must(AdjustedCountToEncoded(2))) const largest uint64 = 0x1p+56 - require.Equal(t, "72057594037927936", must(AdjustedCountToTvalue(largest))) - require.Equal(t, fmt.Sprint(largest-1), must(AdjustedCountToTvalue(largest-1))) + require.Equal(t, "72057594037927936", must(AdjustedCountToEncoded(largest))) + require.Equal(t, fmt.Sprint(largest-1), must(AdjustedCountToEncoded(largest-1))) } -func TestInvalidAdjustedCountToTvalue(t *testing.T) { +func TestInvalidAdjustedCountToEncoded(t *testing.T) { // Because unsigned, no too-small value. - require.Error(t, mustNot(AdjustedCountToTvalue(0x1p56+1))) - require.Error(t, mustNot(AdjustedCountToTvalue(math.MaxInt64))) + require.Error(t, mustNot(AdjustedCountToEncoded(0x1p56+1))) + require.Error(t, mustNot(AdjustedCountToEncoded(math.MaxInt64))) } -func TestValidProbabilityToTvalue(t *testing.T) { - require.Equal(t, "0x1p-01", must(ProbabilityToTvalue(0.5, 'x', -1))) - require.Equal(t, "0x1p-56", must(ProbabilityToTvalue(0x1p-56, 'x', -1))) - require.Equal(t, "0x1.555p-02", must(ProbabilityToTvalue(1/3., 'x', 3))) - require.Equal(t, "0", must(ProbabilityToTvalue(0, 'x', 3))) - require.Equal(t, "0", must(ProbabilityToTvalue(0, 'f', 4))) +func TestValidProbabilityToEncoded(t *testing.T) { + require.Equal(t, "0x1p-01", must(ProbabilityToEncoded(0.5, 'x', -1))) + require.Equal(t, "0x1p-56", must(ProbabilityToEncoded(0x1p-56, 'x', -1))) + require.Equal(t, "0x1.555p-02", must(ProbabilityToEncoded(1/3., 'x', 3))) + require.Equal(t, "0", must(ProbabilityToEncoded(0, 'x', 3))) + require.Equal(t, "0", must(ProbabilityToEncoded(0, 'f', 4))) } -func TestInvalidProbabilityToTvalue(t *testing.T) { +func TestInvalidProbabilityToEncoded(t *testing.T) { // Too small - require.Error(t, mustNot(ProbabilityToTvalue(0x1p-57, 'x', -1))) - require.Error(t, mustNot(ProbabilityToTvalue(0x1p-57, 'x', 0))) + require.Error(t, mustNot(ProbabilityToEncoded(0x1p-57, 'x', -1))) + require.Error(t, mustNot(ProbabilityToEncoded(0x1p-57, 'x', 0))) // Too big - require.Error(t, mustNot(ProbabilityToTvalue(1.1, 'x', -1))) - require.Error(t, mustNot(ProbabilityToTvalue(1.1, 'x', 0))) + require.Error(t, mustNot(ProbabilityToEncoded(1.1, 'x', -1))) + require.Error(t, mustNot(ProbabilityToEncoded(1.1, 'x', 0))) // Bad precision - require.Error(t, mustNot(ProbabilityToTvalue(0.5, 'x', -3))) - require.Error(t, mustNot(ProbabilityToTvalue(0.5, 'x', 15))) + require.Error(t, mustNot(ProbabilityToEncoded(0.5, 'x', -3))) + require.Error(t, mustNot(ProbabilityToEncoded(0.5, 'x', 15))) } func testTValueToProb(tv string) (float64, error) { - p, _, err := TvalueToProbabilityAndAdjustedCount(tv) + p, _, err := EncodedToProbabilityAndAdjustedCount(tv) return p, err } func testTValueToAdjCount(tv string) (float64, error) { - _, ac, err := TvalueToProbabilityAndAdjustedCount(tv) + _, ac, err := EncodedToProbabilityAndAdjustedCount(tv) return ac, err } -func TestTvalueToProbability(t *testing.T) { +func TestEncodedToProbability(t *testing.T) { require.Equal(t, 0.5, must(testTValueToProb("0.5"))) require.Equal(t, 0.444, must(testTValueToProb("0.444"))) require.Equal(t, 1.0, must(testTValueToProb("1"))) @@ -97,7 +97,7 @@ func TestTvalueToProbability(t *testing.T) { require.InEpsilon(t, 1/3., must(testTValueToProb("3")), 1e-9) } -func TestTvalueToAdjCount(t *testing.T) { +func TestEncodedToAdjCount(t *testing.T) { require.Equal(t, 2.0, must(testTValueToAdjCount("0.5"))) require.Equal(t, 2.0, must(testTValueToAdjCount("2"))) require.Equal(t, 3., must(testTValueToAdjCount("3"))) diff --git a/pkg/sampling/w3ctracestate.go b/pkg/sampling/w3ctracestate.go index eddb2e5362fb..7dc246c8a65b 100644 --- a/pkg/sampling/w3ctracestate.go +++ b/pkg/sampling/w3ctracestate.go @@ -1,103 +1,131 @@ -// Copyright The OpenTelemetry Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package sampling // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling" +package sampling import ( - "fmt" + "regexp" + "strconv" "strings" ) -// W3CTraceState represents a W3C tracestate header, which is -// organized into vendor-specific sections. OpenTelemetry specifies -// a section that uses "ot" as the vendor key, where the t-value -// used for consistent sampling may be encoded. -// -// Note that we do not implement the limits specified in -// https://www.w3.org/TR/trace-context/#tracestate-limits because at -// this point in the traces pipeline, the tracestate is no longer -// being propagated. Those are propagation limits, OTel does not -// specifically restrict TraceState. -// -// TODO: Should this package's tracestate support do more to implement -// those limits? See -// https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/tracestate-handling.md, -// which indicates that OTel should use a limit of 256 bytes, while -// the W3C tracestate entry as a whole recommends a limit of 512 -// bytes. type W3CTraceState struct { - otelParsed OTelTraceState - baseTraceState + commonTraceState + otts OTelTraceState } -// w3cSyntax describes the W3C tracestate entry. -var w3cSyntax = anyTraceStateSyntax[W3CTraceState, w3CTraceStateParser]{ - separator: ',', - equality: '=', - allowPunct: ";:._-+", -} - -// w3CTraceStateParser parses tracestate strings like `k1=v1,k2=v2` -type w3CTraceStateParser struct{} +const ( + hardMaxW3CLength = 1024 + + // keyRegexp is not an exact test, it permits all the + // characters and then we check various conditions. + + // key = simple-key / multi-tenant-key + // simple-key = lcalpha 0*255( lcalpha / DIGIT / "_" / "-"/ "*" / "/" ) + // multi-tenant-key = tenant-id "@" system-id + // tenant-id = ( lcalpha / DIGIT ) 0*240( lcalpha / DIGIT / "_" / "-"/ "*" / "/" ) + // system-id = lcalpha 0*13( lcalpha / DIGIT / "_" / "-"/ "*" / "/" ) + // lcalpha = %x61-7A ; a-z + + lcAlphaRegexp = `[a-z]` + lcDigitPunctRegexp = `[a-z0-9\-\*/_]` + lcDigitRegexp = `[a-z0-9]` + tenantIDRegexp = lcDigitRegexp + lcDigitPunctRegexp + `{0,240}` + systemIDRegexp = lcAlphaRegexp + lcDigitPunctRegexp + `{0,13}` + multiTenantKeyRegexp = tenantIDRegexp + `@` + systemIDRegexp + simpleKeyRegexp = lcAlphaRegexp + lcDigitPunctRegexp + `{0,255}` + keyRegexp = `(?:(?:` + simpleKeyRegexp + `)|(?:` + multiTenantKeyRegexp + `))` + + // value = 0*255(chr) nblk-chr + // nblk-chr = %x21-2B / %x2D-3C / %x3E-7E + // chr = %x20 / nblk-chr + // + // Note the use of double-quoted strings in two places below. + // This is for \x expansion in these two cases. Also note + // \x2d is a hyphen character, so a quoted \ (i.e., \\\x2d) + // appears below. + valueNonblankCharRegexp = "[\x21-\x2b\\\x2d-\x3c\x3e-\x7e]" + valueCharRegexp = "[\x20-\x2b\\\x2d-\x3c\x3e-\x7e]" + valueRegexp = valueCharRegexp + `{0,255}` + valueNonblankCharRegexp + + // tracestate = list-member 0*31( OWS "," OWS list-member ) + // list-member = (key "=" value) / OWS + + owsCharSet = ` \t` + owsRegexp = `[` + owsCharSet + `]*` + w3cMemberRegexp = `(?:` + keyRegexp + `=` + valueRegexp + `)|(?:` + owsRegexp + `)` + + // This regexp is large enough that regexp impl refuses to + // make 31 copies of it (i.e., `{0,31}`) so we use `*` below. + w3cOwsCommaMemberRegexp = `(?:` + owsRegexp + `,` + owsRegexp + w3cMemberRegexp + `)` + + // The limit to 31 of owsCommaMemberRegexp is applied in code. + w3cTracestateRegexp = `^` + w3cMemberRegexp + w3cOwsCommaMemberRegexp + `*$` +) -// NewW3CTraceState parses a W3C tracestate entry, especially tracking -// the OpenTelemetry entry where t-value resides for use in sampling -// decisions. -func NewW3CTraceState(input string) (W3CTraceState, error) { - return w3cSyntax.parse(input) -} +var ( + w3cTracestateRe = regexp.MustCompile(w3cTracestateRegexp) -// parseField recognizes the OpenTelemetry tracestate entry. -func (wp w3CTraceStateParser) parseField(instance *W3CTraceState, key, input string) error { - switch { - case key == "ot": - value, err := stripKey(key, input) - if err != nil { - return err - } - - otts, err := otelSyntax.parse(value) + w3cSyntax = keyValueScanner{ + maxItems: 32, + trim: true, + separator: ',', + equality: '=', + } +) - if err != nil { - return fmt.Errorf("w3c tracestate otel value: %w", err) - } +func NewW3CTraceState(input string) (w3c W3CTraceState, _ error) { + if len(input) > hardMaxW3CLength { + return w3c, ErrTraceStateSize + } - instance.otelParsed = otts - return nil + if !w3cTracestateRe.MatchString(input) { + return w3c, strconv.ErrSyntax } - return baseTraceStateParser{}.parseField(&instance.baseTraceState, key, input) + err := w3cSyntax.scanKeyValues(input, func(key, value string) error { + switch key { + case "ot": + var err error + w3c.otts, err = NewOTelTraceState(value) + return err + default: + w3c.kvs = append(w3c.kvs, KV{ + Key: key, + Value: value, + }) + return nil + } + }) + return w3c, err } -// Serialize returns a W3C tracestate encoding, as would be encoded in -// a ptrace.Span.TraceState(). -func (wts *W3CTraceState) Serialize() string { - var sb strings.Builder - - ots := wts.otelParsed.serialize() - if ots != "" { - _, _ = sb.WriteString("ot=") - _, _ = sb.WriteString(ots) - } +func (w3c W3CTraceState) HasAnyValue() bool { + return w3c.HasOTelValue() || w3c.HasExtraValues() +} - w3cSyntax.serializeBase(&wts.baseTraceState, &sb) +func (w3c W3CTraceState) OTelValue() OTelTraceState { + return w3c.otts +} - return sb.String() +func (w3c W3CTraceState) HasOTelValue() bool { + return w3c.otts.HasAnyValue() } -// OTelValue returns a reference to this value's OpenTelemetry trace -// state entry. -func (wts *W3CTraceState) OTelValue() *OTelTraceState { - return &wts.otelParsed +func (w3c W3CTraceState) Serialize(w *strings.Builder) { + cnt := 0 + sep := func() { + if cnt != 0 { + w.WriteString(",") + } + cnt++ + } + if w3c.otts.HasAnyValue() { + sep() + w.WriteString("ot=") + w3c.otts.Serialize(w) + } + for _, kv := range w3c.ExtraValues() { + sep() + w.WriteString(kv.Key) + w.WriteString("=") + w.WriteString(kv.Value) + } } diff --git a/pkg/sampling/w3ctracestate_test.go b/pkg/sampling/w3ctracestate_test.go index ece0281e953e..21d5f3428a28 100644 --- a/pkg/sampling/w3ctracestate_test.go +++ b/pkg/sampling/w3ctracestate_test.go @@ -16,6 +16,7 @@ package sampling import ( "errors" + "strings" "testing" "github.com/stretchr/testify/require" @@ -24,35 +25,62 @@ import ( func TestParseW3CTraceState(t *testing.T) { type testCase struct { in string - otval string + rval string + sval string + tval string expectErr error } - const notset = "" + const ns = "" for _, test := range []testCase{ // correct cases - {"ot=t:1", "t:1", nil}, - {"ot=t:100", "t:100", nil}, + {"ot=t:1", ns, ns, "1", nil}, + {"ot=t:100", ns, ns, "100", nil}, + {"ot=s:100;t:200", ns, "100", "200", nil}, + {"ot=r:1", "1", ns, ns, nil}, + {"ot=r:1,unknown:value,other=something", "1", ns, ns, nil}, } { t.Run(testName(test.in), func(t *testing.T) { - wts, err := w3cSyntax.parse(test.in) + w3c, err := NewW3CTraceState(test.in) if test.expectErr != nil { - require.True(t, errors.Is(err, test.expectErr), "%q: not expecting %v wanted %v", test.in, err, test.expectErr) + require.True(t, errors.Is(err, test.expectErr), + "%q: not expecting %v wanted %v", test.in, err, test.expectErr, + ) } else { require.NoError(t, err) } - if test.otval != notset { - require.True(t, wts.hasOTelValue()) - require.Equal(t, "ot="+test.otval, wts.otelString) + if test.rval != ns { + require.True(t, w3c.HasOTelValue()) + require.True(t, w3c.OTelValue().HasRValue()) + require.Equal(t, test.rval, w3c.OTelValue().RValue()) } else { - - require.False(t, wts.hasOTelValue(), "should have no otel value") + require.False(t, w3c.OTelValue().HasRValue(), "should have no r-value") + } + if test.sval != ns { + require.True(t, w3c.HasOTelValue()) + require.True(t, w3c.OTelValue().HasSValue()) + require.Equal(t, test.sval, w3c.OTelValue().SValue()) + } else { + require.False(t, w3c.OTelValue().HasSValue(), "should have no s-value") + } + if test.tval != ns { + require.True(t, w3c.HasOTelValue()) + require.True(t, w3c.OTelValue().HasTValue()) + require.Equal(t, test.tval, w3c.OTelValue().TValue()) + } else { + require.False(t, w3c.OTelValue().HasTValue(), "should have no t-value") } - // on success w/o t-value, serialize() should not modify - if !wts.hasOTelValue() && test.expectErr == nil { - require.Equal(t, test.in, wts.serialize()) + if test.expectErr != nil { + return } + // on success Serialize() should not modify + // test by re-parsing + var w strings.Builder + w3c.Serialize(&w) + cpy, err := NewW3CTraceState(w.String()) + require.NoError(t, err, "with %v", w.String()) + require.Equal(t, w3c, cpy, "with %v", w.String()) }) } } From 0e27e405af530f9746d7fd8b95301a4f6718c245 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Thu, 1 Jun 2023 14:47:00 -0700 Subject: [PATCH 11/38] fix sampler build --- pkg/sampling/oteltracestate.go | 30 ++++++++++++------- pkg/sampling/w3ctracestate.go | 8 ++--- .../tracesprocessor.go | 15 ++++++---- 3 files changed, 34 insertions(+), 19 deletions(-) diff --git a/pkg/sampling/oteltracestate.go b/pkg/sampling/oteltracestate.go index 2e54e89be1e2..4122106e9a40 100644 --- a/pkg/sampling/oteltracestate.go +++ b/pkg/sampling/oteltracestate.go @@ -11,12 +11,12 @@ type OTelTraceState struct { commonTraceState // sampling r, s, and t-values - ru uint64 // r value parsed, as unsigned - r string // 14 ASCII hex digits - sf float64 // s value parsed, as a probability - s string // original float syntax preserved - tf float64 // t value parsed, as a probability - t string // original float syntax preserved + ru uint64 // r value parsed, as unsigned + r string // 14 ASCII hex digits + sf Threshold // s value parsed, as a probability + s string // original float syntax preserved + tf Threshold // t value parsed, as a probability + t string // original float syntax preserved } const ( @@ -79,14 +79,14 @@ func NewOTelTraceState(input string) (otts OTelTraceState, _ error) { prob, _, err = EncodedToProbabilityAndAdjustedCount(value) if err == nil { otts.s = value - otts.sf = prob + otts.sf, _ = ProbabilityToThreshold(prob) } case "t": var prob float64 prob, _, err = EncodedToProbabilityAndAdjustedCount(value) if err == nil { otts.t = value - otts.tf = prob + otts.tf, _ = ProbabilityToThreshold(prob) } default: otts.kvs = append(otts.kvs, KV{ @@ -120,7 +120,7 @@ func (otts OTelTraceState) SValue() string { return otts.s } -func (otts OTelTraceState) SValueProbability() float64 { +func (otts OTelTraceState) SValueThreshold() Threshold { return otts.sf } @@ -132,10 +132,20 @@ func (otts OTelTraceState) TValue() string { return otts.t } -func (otts OTelTraceState) TValueProbability() float64 { +func (otts OTelTraceState) TValueThreshold() Threshold { return otts.tf } +func (otts *OTelTraceState) SetTValue(value string, threshold Threshold) { + otts.t = value + otts.tf = threshold +} + +func (otts *OTelTraceState) UnsetTValue() { + otts.t = "" + otts.tf = Threshold{} +} + func (otts OTelTraceState) HasAnyValue() bool { return otts.HasRValue() || otts.HasSValue() || otts.HasTValue() || otts.HasExtraValues() } diff --git a/pkg/sampling/w3ctracestate.go b/pkg/sampling/w3ctracestate.go index 7dc246c8a65b..48e4343a7884 100644 --- a/pkg/sampling/w3ctracestate.go +++ b/pkg/sampling/w3ctracestate.go @@ -1,9 +1,9 @@ package sampling import ( + "io" "regexp" "strconv" - "strings" ) type W3CTraceState struct { @@ -101,15 +101,15 @@ func (w3c W3CTraceState) HasAnyValue() bool { return w3c.HasOTelValue() || w3c.HasExtraValues() } -func (w3c W3CTraceState) OTelValue() OTelTraceState { - return w3c.otts +func (w3c W3CTraceState) OTelValue() *OTelTraceState { + return &w3c.otts } func (w3c W3CTraceState) HasOTelValue() bool { return w3c.otts.HasAnyValue() } -func (w3c W3CTraceState) Serialize(w *strings.Builder) { +func (w3c W3CTraceState) Serialize(w io.StringWriter) { cnt := 0 sep := func() { if cnt != 0 { diff --git a/processor/probabilisticsamplerprocessor/tracesprocessor.go b/processor/probabilisticsamplerprocessor/tracesprocessor.go index 514c966b4ac4..96b69ad97f42 100644 --- a/processor/probabilisticsamplerprocessor/tracesprocessor.go +++ b/processor/probabilisticsamplerprocessor/tracesprocessor.go @@ -18,6 +18,7 @@ import ( "context" "fmt" "strconv" + "strings" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling" @@ -114,12 +115,12 @@ func newTracesProcessor(ctx context.Context, set processor.CreateSettings, cfg * } else { // Encode t-value (OTEP 226), like %.4f. (See FormatFloat().) ratio := pct / 100 - tval, err := sampling.ProbabilityToTvalue(ratio, 'f', 4) + tval, err := sampling.ProbabilityToEncoded(ratio, 'f', 4) if err != nil { return nil, err } // Parse the exact value of probability encoded at this precision. - ratio, _, err = sampling.TvalueToProbabilityAndAdjustedCount(tval) + ratio, _, err = sampling.EncodedToProbabilityAndAdjustedCount(tval) if err != nil { return nil, err } @@ -170,7 +171,7 @@ func (ts *traceIDSampler) updateSampled(span ptrace.Span, should bool) error { if should { state.FromRaw(ts.tValueEncoding) } else { - state.FromRaw(sampling.TValueZeroEncoding) + state.FromRaw(sampling.ProbabilityZeroEncoding) } return nil } @@ -188,7 +189,9 @@ func (ts *traceIDSampler) updateSampled(span ptrace.Span, should bool) error { // Incoming TValue consistency is not checked when this happens. if !should { otts.SetTValue("0", sampling.Threshold{}) - state.FromRaw(wts.Serialize()) + var w strings.Builder + wts.Serialize(&w) + state.FromRaw(w.String()) return nil } @@ -215,7 +218,9 @@ func (ts *traceIDSampler) updateSampled(span ptrace.Span, should bool) error { // Set the new effective t-value. otts.SetTValue(ts.tValueEncoding, ts.traceIDThreshold) - state.FromRaw(wts.Serialize()) + var w strings.Builder + wts.Serialize(&w) + state.FromRaw(w.String()) return err } From efcdc3db00a781c8430b7ae5c7a4c6265a1eb122 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Thu, 1 Jun 2023 16:51:57 -0700 Subject: [PATCH 12/38] add support for s-value for non-consistent mode --- pkg/sampling/oteltracestate.go | 55 ++++---- pkg/sampling/tvalue.go | 17 ++- pkg/sampling/w3ctracestate.go | 8 +- .../tracesprocessor.go | 121 +++++++++--------- 4 files changed, 110 insertions(+), 91 deletions(-) diff --git a/pkg/sampling/oteltracestate.go b/pkg/sampling/oteltracestate.go index 4122106e9a40..d3306db11e68 100644 --- a/pkg/sampling/oteltracestate.go +++ b/pkg/sampling/oteltracestate.go @@ -11,12 +11,12 @@ type OTelTraceState struct { commonTraceState // sampling r, s, and t-values - ru uint64 // r value parsed, as unsigned - r string // 14 ASCII hex digits - sf Threshold // s value parsed, as a probability - s string // original float syntax preserved - tf Threshold // t value parsed, as a probability - t string // original float syntax preserved + ru Randomness // r value parsed, as unsigned + r string // 14 ASCII hex digits + sp float64 // s value parsed, as a probability + s string // original float syntax preserved + tt Threshold // t value parsed, as a threshold + t string // original float syntax preserved } const ( @@ -71,7 +71,9 @@ func NewOTelTraceState(input string) (otts OTelTraceState, _ error) { err = ErrRandomValueRange } else { otts.r = value - otts.ru = unsigned + otts.ru = Randomness{ + unsigned: unsigned, + } } } case "s": @@ -79,14 +81,14 @@ func NewOTelTraceState(input string) (otts OTelTraceState, _ error) { prob, _, err = EncodedToProbabilityAndAdjustedCount(value) if err == nil { otts.s = value - otts.sf, _ = ProbabilityToThreshold(prob) + otts.sp = prob } case "t": var prob float64 prob, _, err = EncodedToProbabilityAndAdjustedCount(value) if err == nil { otts.t = value - otts.tf, _ = ProbabilityToThreshold(prob) + otts.tt, _ = ProbabilityToThreshold(prob) } default: otts.kvs = append(otts.kvs, KV{ @@ -100,57 +102,62 @@ func NewOTelTraceState(input string) (otts OTelTraceState, _ error) { return otts, err } -func (otts OTelTraceState) HasRValue() bool { +func (otts *OTelTraceState) HasRValue() bool { return otts.r != "" } -func (otts OTelTraceState) RValue() string { +func (otts *OTelTraceState) RValue() string { return otts.r } -func (otts OTelTraceState) RValueUnsigned() uint64 { +func (otts *OTelTraceState) RValueRandomness() Randomness { return otts.ru } -func (otts OTelTraceState) HasSValue() bool { +func (otts *OTelTraceState) HasSValue() bool { return otts.s != "" } -func (otts OTelTraceState) SValue() string { +func (otts *OTelTraceState) SValue() string { return otts.s } -func (otts OTelTraceState) SValueThreshold() Threshold { - return otts.sf +func (otts *OTelTraceState) SValueProbability() float64 { + return otts.sp } -func (otts OTelTraceState) HasTValue() bool { +func (otts *OTelTraceState) SetSValue(value string, probability float64) { + otts.s = value + otts.sp = probability +} + +func (otts *OTelTraceState) HasTValue() bool { return otts.t != "" } -func (otts OTelTraceState) TValue() string { +func (otts *OTelTraceState) TValue() string { return otts.t } -func (otts OTelTraceState) TValueThreshold() Threshold { - return otts.tf +func (otts *OTelTraceState) TValueThreshold() Threshold { + return otts.tt } func (otts *OTelTraceState) SetTValue(value string, threshold Threshold) { otts.t = value - otts.tf = threshold + otts.tt = threshold } func (otts *OTelTraceState) UnsetTValue() { otts.t = "" - otts.tf = Threshold{} + otts.tt = Threshold{} } -func (otts OTelTraceState) HasAnyValue() bool { +func (otts *OTelTraceState) HasAnyValue() bool { return otts.HasRValue() || otts.HasSValue() || otts.HasTValue() || otts.HasExtraValues() } -func (otts OTelTraceState) Serialize(w io.StringWriter) { +func (otts *OTelTraceState) Serialize(w io.StringWriter) { cnt := 0 sep := func() { if cnt != 0 { diff --git a/pkg/sampling/tvalue.go b/pkg/sampling/tvalue.go index 75106445b213..406172646ddf 100644 --- a/pkg/sampling/tvalue.go +++ b/pkg/sampling/tvalue.go @@ -52,6 +52,12 @@ type Threshold struct { unsigned uint64 } +// Randomness may be derived from r-value or TraceID. +type Randomness struct { + // randomness is in the range [0, MaxAdjustedCount-1] + unsigned uint64 +} + var ( // ErrProbabilityRange is returned when a value should be in the range [MinSamplingProb, 1]. ErrProbabilityRange = fmt.Errorf("sampling probability out of range (0x1p-56 <= valid <= 1)") @@ -167,9 +173,14 @@ func ProbabilityToThreshold(prob float64) (Threshold, error) { // ShouldSample returns true when the span passes this sampler's // consistent sampling decision. -func (t Threshold) ShouldSample(id pcommon.TraceID) bool { - value := binary.BigEndian.Uint64(id[8:]) & LeastHalfTraceIDThresholdMask - return value < t.unsigned +func (t Threshold) ShouldSample(rnd Randomness) bool { + return rnd.unsigned < t.unsigned +} + +func RandomnessFromTraceID(id pcommon.TraceID) Randomness { + return Randomness{ + unsigned: binary.BigEndian.Uint64(id[8:]) & LeastHalfTraceIDThresholdMask, + } } // Probability is the sampling ratio in the range [MinSamplingProb, 1]. diff --git a/pkg/sampling/w3ctracestate.go b/pkg/sampling/w3ctracestate.go index 48e4343a7884..cd952f48791a 100644 --- a/pkg/sampling/w3ctracestate.go +++ b/pkg/sampling/w3ctracestate.go @@ -97,19 +97,19 @@ func NewW3CTraceState(input string) (w3c W3CTraceState, _ error) { return w3c, err } -func (w3c W3CTraceState) HasAnyValue() bool { +func (w3c *W3CTraceState) HasAnyValue() bool { return w3c.HasOTelValue() || w3c.HasExtraValues() } -func (w3c W3CTraceState) OTelValue() *OTelTraceState { +func (w3c *W3CTraceState) OTelValue() *OTelTraceState { return &w3c.otts } -func (w3c W3CTraceState) HasOTelValue() bool { +func (w3c *W3CTraceState) HasOTelValue() bool { return w3c.otts.HasAnyValue() } -func (w3c W3CTraceState) Serialize(w io.StringWriter) { +func (w3c *W3CTraceState) Serialize(w io.StringWriter) { cnt := 0 sep := func() { if cnt != 0 { diff --git a/processor/probabilisticsamplerprocessor/tracesprocessor.go b/processor/probabilisticsamplerprocessor/tracesprocessor.go index 96b69ad97f42..cecfa5bf93e7 100644 --- a/processor/probabilisticsamplerprocessor/tracesprocessor.go +++ b/processor/probabilisticsamplerprocessor/tracesprocessor.go @@ -16,7 +16,6 @@ package probabilisticsamplerprocessor // import "github.com/open-telemetry/opent import ( "context" - "fmt" "strconv" "strings" @@ -32,8 +31,6 @@ import ( "go.uber.org/zap" ) -var ErrInconsistentTValue = fmt.Errorf("inconsistent OTel TraceState t-value set") - // samplingPriority has the semantic result of parsing the "sampling.priority" // attribute per OpenTracing semantic conventions. type samplingPriority int @@ -61,14 +58,13 @@ const ( type traceSampler interface { // shouldSample reports the result based on a probabilistic decision. - shouldSample(trace pcommon.TraceID) bool + shouldSample(tid pcommon.TraceID, rnd sampling.Randomness) bool - // updateSampled modifies the span assuming it will be + // updateTracestate modifies the OTelTraceState assuming it will be // sampled, probabilistically or otherwise. The "should" parameter // is the result from shouldSample(), for the span's TraceID, which - // will not be recalculated. Returns an error when the incoming TraceState - // cannot be parsed. - updateSampled(span ptrace.Span, should bool) error + // will not be recalculated. + updateTracestate(tid pcommon.TraceID, rnd sampling.Randomness, should bool, otts *sampling.OTelTraceState) } type traceProcessor struct { @@ -80,6 +76,8 @@ type traceHashSampler struct { // Hash-based calculation hashScaledSamplingRate uint32 hashSeed uint32 + probability float64 + svalueEncoding string } type traceIDSampler struct { @@ -104,18 +102,20 @@ func newTracesProcessor(ctx context.Context, set processor.CreateSettings, cfg * logger: set.Logger, } + ratio := pct / 100 if cfg.HashSeed != 0 { ts := &traceHashSampler{} // Adjust sampling percentage on private so recalculations are avoided. ts.hashScaledSamplingRate = uint32(pct * percentageScaleFactor) ts.hashSeed = cfg.HashSeed + ts.probability = ratio + ts.svalueEncoding = strconv.FormatFloat(ratio, 'g', 4, 64) tp.sampler = ts } else { // Encode t-value (OTEP 226), like %.4f. (See FormatFloat().) - ratio := pct / 100 - tval, err := sampling.ProbabilityToEncoded(ratio, 'f', 4) + tval, err := sampling.ProbabilityToEncoded(ratio, 'g', 4) if err != nil { return nil, err } @@ -146,82 +146,67 @@ func newTracesProcessor(ctx context.Context, set processor.CreateSettings, cfg * processorhelper.WithCapabilities(consumer.Capabilities{MutatesData: true})) } -func (ts *traceHashSampler) shouldSample(input pcommon.TraceID) bool { +func (ts *traceHashSampler) shouldSample(tid pcommon.TraceID, _ sampling.Randomness) bool { // If one assumes random trace ids hashing may seems avoidable, however, traces can be coming from sources // with various different criteria to generate trace id and perhaps were already sampled without hashing. // Hashing here prevents bias due to such systems. - return computeHash(input[:], ts.hashSeed)&bitMaskHashBuckets < ts.hashScaledSamplingRate -} - -func (ts *traceHashSampler) updateSampled(ptrace.Span, bool) error { - // Nothing specified - return nil -} - -func (ts *traceIDSampler) shouldSample(input pcommon.TraceID) bool { - return ts.traceIDThreshold.ShouldSample(input) + return computeHash(tid[:], ts.hashSeed)&bitMaskHashBuckets < ts.hashScaledSamplingRate } -func (ts *traceIDSampler) updateSampled(span ptrace.Span, should bool) error { - state := span.TraceState() - raw := state.AsRaw() +func (ts *traceHashSampler) updateTracestate(tid pcommon.TraceID, _ sampling.Randomness, should bool, otts *sampling.OTelTraceState) { + if !should { + otts.SetSValue(sampling.ProbabilityZeroEncoding, 0) + return + } - // Fast path for the case where there is no arriving TraceState. - if raw == "" { - if should { - state.FromRaw(ts.tValueEncoding) - } else { - state.FromRaw(sampling.ProbabilityZeroEncoding) - } - return nil + if otts.HasSValue() && otts.SValueProbability() == 0 { + // Zero count in, zero count out. + otts.SetSValue(sampling.ProbabilityZeroEncoding, 0) + return } - // Parse the arriving TraceState. - wts, err := sampling.NewW3CTraceState(raw) - if err != nil { - return err + if !otts.HasSValue() { + otts.SetSValue(ts.svalueEncoding, ts.probability) + return } - // Using the OTel trace state value: - otts := wts.OTelValue() + product := ts.probability * otts.SValueProbability() + + otts.SetSValue(strconv.FormatFloat(product, 'g', 4, 64), product) +} + +func (ts *traceIDSampler) shouldSample(_ pcommon.TraceID, randomness sampling.Randomness) bool { + return ts.traceIDThreshold.ShouldSample(randomness) +} +func (ts *traceIDSampler) updateTracestate(tid pcommon.TraceID, rnd sampling.Randomness, should bool, otts *sampling.OTelTraceState) { // When this sampler decided not to sample, the t-value becomes zero. // Incoming TValue consistency is not checked when this happens. if !should { - otts.SetTValue("0", sampling.Threshold{}) - var w strings.Builder - wts.Serialize(&w) - state.FromRaw(w.String()) - return nil + otts.SetTValue(sampling.ProbabilityZeroEncoding, sampling.Threshold{}) + return } - arrivingHasNonZeroTValue := otts.HasTValue() && otts.TValueThreshold().Unsigned() != 0 if arrivingHasNonZeroTValue { // Consistency check: if the TraceID is out of range // (unless the TValue is zero), the TValue is a lie. // If inconsistent, clear it. - if !otts.TValueThreshold().ShouldSample(span.TraceID()) { - // This value is returned below; the span continues - // with any t-value. - err = ErrInconsistentTValue + if !otts.TValueThreshold().ShouldSample(rnd) { arrivingHasNonZeroTValue = false otts.UnsetTValue() } } - if arrivingHasNonZeroTValue && otts.TValueThreshold().Unsigned() < ts.traceIDThreshold.Unsigned() { + if arrivingHasNonZeroTValue && + otts.TValueThreshold().Unsigned() < ts.traceIDThreshold.Unsigned() { // Already-sampled case: test whether the unsigned value of the // threshold is smaller than this sampler is configured with. - return err + return } - // Set the new effective t-value. otts.SetTValue(ts.tValueEncoding, ts.traceIDThreshold) - var w strings.Builder - wts.Serialize(&w) - state.FromRaw(w.String()) - return err + return } func (tp *traceProcessor) processTraces(ctx context.Context, td ptrace.Traces) (ptrace.Traces, error) { @@ -241,9 +226,24 @@ func (tp *traceProcessor) processTraces(ctx context.Context, td ptrace.Traces) ( return true } + state := s.TraceState() + raw := state.AsRaw() + + // Parse the arriving TraceState. + wts, err := sampling.NewW3CTraceState(raw) + var randomness sampling.Randomness + if err != nil { + tp.logger.Info("span trace state", zap.Error(err)) + randomness = sampling.RandomnessFromTraceID(s.TraceID()) + } else if wts.OTelValue().HasRValue() { + randomness = wts.OTelValue().RValueRandomness() + } else { + randomness = sampling.RandomnessFromTraceID(s.TraceID()) + } + forceSample := sp == mustSampleSpan - probSample := tp.sampler.shouldSample(s.TraceID()) + probSample := tp.sampler.shouldSample(s.TraceID(), randomness) sampled := forceSample || probSample @@ -262,10 +262,11 @@ func (tp *traceProcessor) processTraces(ctx context.Context, td ptrace.Traces) ( } if sampled { - err := tp.sampler.updateSampled(s, probSample) - if err != nil { - tp.logger.Info("sampling t-value update failed", zap.Error(err)) - } + tp.sampler.updateTracestate(s.TraceID(), randomness, probSample, wts.OTelValue()) + + var w strings.Builder + wts.Serialize(&w) + state.FromRaw(w.String()) } return !sampled From 939c75869d1fe30be27b69f7f5fca62785c1736d Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Mon, 10 Jul 2023 10:57:29 -0700 Subject: [PATCH 13/38] WIP --- .../probabilisticsamplerprocessor/config.go | 46 ++++++++++++++----- 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/processor/probabilisticsamplerprocessor/config.go b/processor/probabilisticsamplerprocessor/config.go index bb3679036154..7a673717b99f 100644 --- a/processor/probabilisticsamplerprocessor/config.go +++ b/processor/probabilisticsamplerprocessor/config.go @@ -38,22 +38,44 @@ var validAttributeSource = map[AttributeSource]bool{ // Config has the configuration guiding the sampler processor. type Config struct { - // SamplingPercentage is the percentage rate at which traces or logs are going to be sampled. Defaults to - // zero, i.e.: no sample. Values greater or equal 100 are treated as "sample all traces/logs". This is - // treated as having four significant figures when conveying the sampling probability. + // SamplingPercentage is the percentage rate at which traces or logs are going to be sampled. Defaults + // to zero, i.e.: no sample. Values greater or equal 100 are treated as "sample all traces/logs". This + // is treated as having four significant figures when conveying the sampling probability. SamplingPercentage float32 `mapstructure:"sampling_percentage"` - // @@@ TODO - // SamplingOneInN int64 + // HashSeed allows one to configure the hashing seed. This is important in scenarios where multiple + // layers of collectors have different sampling rates: if they use the same seed all passing one layer + // may pass the other even if they have different sampling rates, configuring different seeds avoids + // that. + HashSeed uint32 `mapstructure:"hash_seed"` - // HashSeed allows one to configure the legacy hashing seed. The current version of this protocol assumes - // that tracecontext v2 TraceIDs are being used, which ensures 7 bytes of randomness are available. We assume - // this is the case when HashSeed == 0. + // SamplerMode selects the sampling behavior. Supported values: // - // This is important in scenarios where multiple layers of collectors have different sampling rates: if they - // use the same seed all passing one layer may pass the other even if they have different sampling rates, - // configuring different seeds avoids that. - HashSeed uint32 `mapstructure:"hash_seed"` + // - "hash_seed_downsample": the legacy behavior of this + // processor. Using an FNV hash combined with the HashSeed + // value, this sampler performs a non-consistent + // probabilistic downsampling. The number of spans output + // is expected to equal SamplingPercentage (as a ratio) + // times the number of spans inpout. Statistically, a + // span-to-metrics pipeline based on this mechanism may have + // anomalous behavior. + // + // - "consistent_resample": Using an OTel-specified consistent + // sampling mechanism, this sampler selectively reduces the + // effective sampling probability of arriving spans. This + // can be useful to select a small fraction of complete + // traces from a stream with mixed sampling rates. The rate + // of spans passing through depends on how much sampling has + // already been applied. If an arriving span was head + // sampled at the same probability it passes through. If + // the span arrives with lower probability, a warning is + // logged because it means this sampler is configured with + // too large a sampling probability to ensure complete traces. + // + // - "consistent_downsample": Using an OTel-specified consistent + // sampling mechanism, this sampler reduces the effective sampling + // probability of each span by `Sampling + SamplerMode string `mapstructure:"sampler_mode"` /////// // Logs only fields below. From a31266c400296afa009b38156da64c9d3150cbd7 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 2 Aug 2023 14:47:34 -0700 Subject: [PATCH 14/38] use new proposed syntax see https://github.com/open-telemetry/opentelemetry-specification/issues/3602 --- pkg/sampling/{tvalue.go => encoding.go} | 0 pkg/sampling/{tvalue_test.go => encoding_test.go} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename pkg/sampling/{tvalue.go => encoding.go} (100%) rename pkg/sampling/{tvalue_test.go => encoding_test.go} (100%) diff --git a/pkg/sampling/tvalue.go b/pkg/sampling/encoding.go similarity index 100% rename from pkg/sampling/tvalue.go rename to pkg/sampling/encoding.go diff --git a/pkg/sampling/tvalue_test.go b/pkg/sampling/encoding_test.go similarity index 100% rename from pkg/sampling/tvalue_test.go rename to pkg/sampling/encoding_test.go From 690cd64f5239fe09c5342bc2636025bafc5b4909 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 2 Aug 2023 16:30:36 -0700 Subject: [PATCH 15/38] update tracestate libs for new encoding --- pkg/sampling/encoding.go | 160 ++++++++++++++-------------- pkg/sampling/encoding_test.go | 121 ++++++++++----------- pkg/sampling/oteltracestate.go | 70 +++--------- pkg/sampling/oteltracestate_test.go | 118 +++++++++----------- pkg/sampling/w3ctracestate_test.go | 132 +++++++++++------------ 5 files changed, 265 insertions(+), 336 deletions(-) diff --git a/pkg/sampling/encoding.go b/pkg/sampling/encoding.go index 406172646ddf..eeaed05e97e5 100644 --- a/pkg/sampling/encoding.go +++ b/pkg/sampling/encoding.go @@ -16,7 +16,9 @@ package sampling // import "github.com/open-telemetry/opentelemetry-collector-co import ( "encoding/binary" + "errors" "fmt" + "math" "strconv" "go.opentelemetry.io/collector/pdata/pcommon" @@ -26,21 +28,17 @@ const ( // MinSamplingProb is one in 2^56. MinSamplingProb = 0x1p-56 - // MaxAdjustedCount is the adjusted count corresponding with - // MinSamplingProb (i.e., 1 / MinSamplingProb). 0x1p+56 - MaxAdjustedCount = 1 / MinSamplingProb - // LeastHalfTraceIDThresholdMask is the mask to use on the // least-significant half of the TraceID, i.e., bytes 8-15. // Because this is a 56 bit mask, the result after masking is // the unsigned value of bytes 9 through 15. - LeastHalfTraceIDThresholdMask = MaxAdjustedCount - 1 + LeastHalfTraceIDThresholdMask = 1/MinSamplingProb - 1 // ProbabilityZeroEncoding is the encoding for 0 adjusted count. ProbabilityZeroEncoding = "0" // ProbabilityOneEncoding is the encoding for 100% sampling. - ProbabilityOneEncoding = "1" + ProbabilityOneEncoding = "" ) // Threshold used to compare with the least-significant 7 bytes of the TraceID. @@ -60,16 +58,13 @@ type Randomness struct { var ( // ErrProbabilityRange is returned when a value should be in the range [MinSamplingProb, 1]. - ErrProbabilityRange = fmt.Errorf("sampling probability out of range (0x1p-56 <= valid <= 1)") - - // ErrAdjustedCountRange is returned when a value should be in the range [1, MaxAdjustedCount]. - ErrAdjustedCountRange = fmt.Errorf("sampling adjusted count out of range (1 <= valid <= 0x1p+56)") + ErrProbabilityRange = errors.New("sampling probability out of range (0x1p-56 <= valid <= 1)") - // ErrAdjustedCountOnlyInteger is returned when a floating-point syntax is used to convey adjusted count. - ErrAdjustedCountOnlyInteger = fmt.Errorf("sampling adjusted count must be an integer") + // ErrTValueSize is returned for t-values longer than 14 hex digits. + ErrTValueSize = errors.New("t-value exceeds 14 hex digits") - // ErrPrecisionRange is returned when the precision argument is out of range. - ErrPrecisionRange = fmt.Errorf("sampling precision out of range (-1 <= valid <= 14)") + // ErrRValueSize is returned for r-values != 14 hex digits. + ErrRValueSize = errors.New("r-value must have 14 hex digits") ) // probabilityInRange tests MinSamplingProb <= prob <= 1. @@ -77,25 +72,16 @@ func probabilityInRange(prob float64) bool { return prob >= MinSamplingProb && prob <= 1 } -// AdjustedCountToEncoded encodes a s-value or t-value given an -// adjusted count. In this form, the encoding is a decimal integer. -func AdjustedCountToEncoded(count uint64) (string, error) { - switch { - case count == 0: - return ProbabilityZeroEncoding, nil - case count < 0: - return "", ErrProbabilityRange - case count > uint64(MaxAdjustedCount): - return "", ErrAdjustedCountRange +// removeTrailingZeros elimiantes trailing zeros from a string. +func removeTrailingZeros(in string) string { + for len(in) > 1 && in[len(in)-1] == '0' { + in = in[:len(in)-1] } - return strconv.FormatInt(int64(count), 10), nil + return in } -// ProbabilityToEncoded encodes a s-value or t-value given a -// probability. In this form, the user controls floating-point format -// and precision. See strconv.FormatFloat() for an explanation of -// `format` and `prec`. -func ProbabilityToEncoded(prob float64, format byte, prec int) (string, error) { +// ProbabilityToTValue encodes a t-value given a probability. +func ProbabilityToTValue(prob float64) (string, error) { // Probability cases switch { case prob == 1: @@ -105,57 +91,33 @@ func ProbabilityToEncoded(prob float64, format byte, prec int) (string, error) { case !probabilityInRange(prob): return "", ErrProbabilityRange } - // Precision cases - switch { - case prec == -1: - // Default precision (see FormatFloat) - case prec == 0: - // Precision == 0 forces probabilities to be powers-of-two. - case prec <= 14: - // Precision is in-range - default: - return "", ErrPrecisionRange + unsigned := uint64(math.Round(prob / MinSamplingProb)) - } - return strconv.FormatFloat(prob, format, prec, 64), nil + // Note fmt.Sprintf handles zero padding to 14 bytes as well as setting base=16. + // Otherwise could be done by hand using strconv.FormatUint(unsigned, 16) and + // and padding to 14 bytes before removing the trailing zeros. + return removeTrailingZeros(fmt.Sprintf("%014x", unsigned)), nil } -// EncodedToProbabilityAndAdjustedCount parses the t-value and returns -// both the probability and the adjusted count. In a Span-to-Metrics -// pipeline, users should count either the inverse of probability or -// the adjusted count. When the arriving t-value encodes adjusted -// count as opposed to probability, the adjusted count will be exactly -// the specified integer value; in these cases, probability corresponds -// with exactly implemented sampling ratio. -func EncodedToProbabilityAndAdjustedCount(s string) (float64, float64, error) { - number, err := strconv.ParseFloat(s, 64) // e.g., "0x1.b7p-02" -> approx 3/7 - if err != nil { - return 0, 0, err +// TValueToProbability parses the t-value and returns +// the probability. +func TValueToProbability(s string) (float64, error) { + if len(s) > 14 { + return 0, ErrTValueSize + } + if s == ProbabilityOneEncoding { + return 1, nil } - adjusted := 0.0 - switch { - case number == 0: - - case number < MinSamplingProb: - return 0, 0, ErrProbabilityRange - case number > 1: - // Greater than 1 indicates adjusted count; re-parse - // as a decimal integer. - integer, err := strconv.ParseInt(s, 10, 64) - if err != nil { - return 0, 0, ErrAdjustedCountOnlyInteger - } - if integer > MaxAdjustedCount { - return 0, 0, ErrAdjustedCountRange - } - adjusted = float64(integer) - number = 1 / adjusted - default: - adjusted = 1 / number + unsigned, err := strconv.ParseUint(s, 16, 64) + if err != nil { + return 0, err } - return number, adjusted, nil + // Zero-padding is done by shifting 4 bit positions per + // missing hex digit. + extend := 14 - len(s) + return float64(unsigned<<(4*extend)) * MinSamplingProb, nil } // ProbabilityToThreshold returns the sampling threshold exactly @@ -167,25 +129,29 @@ func ProbabilityToThreshold(prob float64) (Threshold, error) { return Threshold{}, ErrProbabilityRange } return Threshold{ - unsigned: uint64(prob * MaxAdjustedCount), + unsigned: uint64(prob / MinSamplingProb), }, nil } +// TValueToThreshold return a Threshold, see +// Threshold.ShouldSample(TraceID) and Threshold.Probability(). +func TValueToThreshold(s string) (Threshold, error) { + prob, err := TValueToProbability(s) + if err != nil { + return Threshold{}, err + } + return ProbabilityToThreshold(prob) +} + // ShouldSample returns true when the span passes this sampler's // consistent sampling decision. func (t Threshold) ShouldSample(rnd Randomness) bool { return rnd.unsigned < t.unsigned } -func RandomnessFromTraceID(id pcommon.TraceID) Randomness { - return Randomness{ - unsigned: binary.BigEndian.Uint64(id[8:]) & LeastHalfTraceIDThresholdMask, - } -} - // Probability is the sampling ratio in the range [MinSamplingProb, 1]. func (t Threshold) Probability() float64 { - return float64(t.unsigned) / MaxAdjustedCount + return float64(t.unsigned) * MinSamplingProb } // Unsigned is an unsigned integer that scales with the sampling @@ -194,3 +160,33 @@ func (t Threshold) Probability() float64 { func (t Threshold) Unsigned() uint64 { return t.unsigned } + +// Randomness is the value we compare with Threshold in ShouldSample. +func RandomnessFromTraceID(id pcommon.TraceID) Randomness { + return Randomness{ + unsigned: binary.BigEndian.Uint64(id[8:]) & LeastHalfTraceIDThresholdMask, + } +} + +// Unsigned is an unsigned integer that scales with the randomness +// value. This is useful to compare two randomness values without +// floating point conversions. +func (r Randomness) Unsigned() uint64 { + return r.unsigned +} + +// RValueToRandomness parses 14 hex bytes into a Randomness. +func RValueToRandomness(s string) (Randomness, error) { + if len(s) != 14 { + return Randomness{}, ErrRValueSize + } + + unsigned, err := strconv.ParseUint(s, 16, 64) + if err != nil { + return Randomness{}, err + } + + return Randomness{ + unsigned: unsigned, + }, nil +} diff --git a/pkg/sampling/encoding_test.go b/pkg/sampling/encoding_test.go index afbbe84650ac..2cb31976a9b7 100644 --- a/pkg/sampling/encoding_test.go +++ b/pkg/sampling/encoding_test.go @@ -18,7 +18,6 @@ import ( "bytes" "encoding/binary" "fmt" - "math" "math/rand" "testing" @@ -40,74 +39,32 @@ func mustNot[T any](t T, err error) error { return err } -func TestValidAdjustedCountToTvalue(t *testing.T) { - require.Equal(t, "0", must(AdjustedCountToEncoded(0))) - require.Equal(t, "1", must(AdjustedCountToEncoded(1))) - require.Equal(t, "2", must(AdjustedCountToEncoded(2))) - - const largest uint64 = 0x1p+56 - require.Equal(t, "72057594037927936", must(AdjustedCountToEncoded(largest))) - require.Equal(t, fmt.Sprint(largest-1), must(AdjustedCountToEncoded(largest-1))) -} - -func TestInvalidAdjustedCountToEncoded(t *testing.T) { - // Because unsigned, no too-small value. - require.Error(t, mustNot(AdjustedCountToEncoded(0x1p56+1))) - require.Error(t, mustNot(AdjustedCountToEncoded(math.MaxInt64))) -} - -func TestValidProbabilityToEncoded(t *testing.T) { - require.Equal(t, "0x1p-01", must(ProbabilityToEncoded(0.5, 'x', -1))) - require.Equal(t, "0x1p-56", must(ProbabilityToEncoded(0x1p-56, 'x', -1))) - require.Equal(t, "0x1.555p-02", must(ProbabilityToEncoded(1/3., 'x', 3))) - require.Equal(t, "0", must(ProbabilityToEncoded(0, 'x', 3))) - require.Equal(t, "0", must(ProbabilityToEncoded(0, 'f', 4))) +func TestValidProbabilityToTValue(t *testing.T) { + require.Equal(t, "8", must(ProbabilityToTValue(0.5))) + require.Equal(t, "00000000000001", must(ProbabilityToTValue(0x1p-56))) + require.Equal(t, "55555555555554", must(ProbabilityToTValue(1/3.))) + require.Equal(t, "54", must(ProbabilityToTValue(0x54p-8))) // 0x54p-8 is approximately 1/3 + require.Equal(t, "01", must(ProbabilityToTValue(0x1p-8))) + require.Equal(t, "0", must(ProbabilityToTValue(0))) } -func TestInvalidProbabilityToEncoded(t *testing.T) { +func TestInvalidProbabilityToTValue(t *testing.T) { // Too small - require.Error(t, mustNot(ProbabilityToEncoded(0x1p-57, 'x', -1))) - require.Error(t, mustNot(ProbabilityToEncoded(0x1p-57, 'x', 0))) + require.Error(t, mustNot(ProbabilityToTValue(0x1p-57))) + require.Error(t, mustNot(ProbabilityToTValue(0x1p-57))) // Too big - require.Error(t, mustNot(ProbabilityToEncoded(1.1, 'x', -1))) - require.Error(t, mustNot(ProbabilityToEncoded(1.1, 'x', 0))) - - // Bad precision - require.Error(t, mustNot(ProbabilityToEncoded(0.5, 'x', -3))) - require.Error(t, mustNot(ProbabilityToEncoded(0.5, 'x', 15))) + require.Error(t, mustNot(ProbabilityToTValue(1.1))) + require.Error(t, mustNot(ProbabilityToTValue(1.1))) } -func testTValueToProb(tv string) (float64, error) { - p, _, err := EncodedToProbabilityAndAdjustedCount(tv) - return p, err -} +func TestTValueToProbability(t *testing.T) { + require.Equal(t, 0.5, must(TValueToProbability("8"))) + require.Equal(t, 0x444p-12, must(TValueToProbability("444"))) + require.Equal(t, 0.0, must(TValueToProbability("0"))) -func testTValueToAdjCount(tv string) (float64, error) { - _, ac, err := EncodedToProbabilityAndAdjustedCount(tv) - return ac, err -} - -func TestEncodedToProbability(t *testing.T) { - require.Equal(t, 0.5, must(testTValueToProb("0.5"))) - require.Equal(t, 0.444, must(testTValueToProb("0.444"))) - require.Equal(t, 1.0, must(testTValueToProb("1"))) - require.Equal(t, 0.0, must(testTValueToProb("0"))) - - require.InEpsilon(t, 1/3., must(testTValueToProb("3")), 1e-9) -} - -func TestEncodedToAdjCount(t *testing.T) { - require.Equal(t, 2.0, must(testTValueToAdjCount("0.5"))) - require.Equal(t, 2.0, must(testTValueToAdjCount("2"))) - require.Equal(t, 3., must(testTValueToAdjCount("3"))) - require.Equal(t, 5., must(testTValueToAdjCount("5"))) - - require.InEpsilon(t, 1/0.444, must(testTValueToAdjCount("0.444")), 1e-9) - require.InEpsilon(t, 1/0.111111, must(testTValueToAdjCount("0.111111")), 1e-9) - - require.Equal(t, 1.0, must(testTValueToAdjCount("1"))) - require.Equal(t, 0.0, must(testTValueToAdjCount("0"))) + // 0x55555554p-32 is very close to 1/3 + require.InEpsilon(t, 1/3., must(TValueToProbability("55555554")), 1e-9) } func TestProbabilityToThreshold(t *testing.T) { @@ -124,12 +81,48 @@ func TestProbabilityToThreshold(t *testing.T) { Threshold{2}, must(ProbabilityToThreshold(0x1p-55))) require.Equal(t, - Threshold{MaxAdjustedCount}, + Threshold{1 / MinSamplingProb}, must(ProbabilityToThreshold(1.0))) require.Equal(t, - Threshold{0x1.555p-2 * MaxAdjustedCount}, - must(ProbabilityToThreshold(0x1.555p-2))) + Threshold{0x555p-12 / MinSamplingProb}, + must(TValueToThreshold("555"))) + require.Equal(t, + Threshold{0x123p-20 / MinSamplingProb}, + must(TValueToThreshold("00123"))) +} + +func TestShouldSample(t *testing.T) { + // Test four boundary conditions for 50% sampling, + thresh := must(ProbabilityToThreshold(0.5)) + // Smallest TraceID that should sample. + require.True(t, thresh.ShouldSample(RandomnessFromTraceID(pcommon.TraceID{ + // 9 meaningless bytes + 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, + 0, // randomness starts here + 0, 0, 0, 0, 0, 0, + }))) + // Largest TraceID that should sample. + require.True(t, thresh.ShouldSample(RandomnessFromTraceID(pcommon.TraceID{ + // 9 meaningless bytes + 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, + 0x7f, // randomness starts here + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + }))) + // Smallest TraceID that should NOT sample. + require.False(t, thresh.ShouldSample(RandomnessFromTraceID(pcommon.TraceID{ + // 9 meaningless bytes + 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, + 0x80, // randomness starts here + 0, 0, 0, 0, 0, 0, + }))) + // Largest TraceID that should NOT sample. + require.False(t, thresh.ShouldSample(RandomnessFromTraceID(pcommon.TraceID{ + // 9 meaningless bytes + 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, + 0xff, // randomness starts here + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + }))) } // The two benchmarks below were used to choose the implementation for diff --git a/pkg/sampling/oteltracestate.go b/pkg/sampling/oteltracestate.go index d3306db11e68..a5ce95b349eb 100644 --- a/pkg/sampling/oteltracestate.go +++ b/pkg/sampling/oteltracestate.go @@ -1,7 +1,6 @@ package sampling import ( - "fmt" "io" "regexp" "strconv" @@ -10,13 +9,11 @@ import ( type OTelTraceState struct { commonTraceState - // sampling r, s, and t-values - ru Randomness // r value parsed, as unsigned - r string // 14 ASCII hex digits - sp float64 // s value parsed, as a probability - s string // original float syntax preserved - tt Threshold // t value parsed, as a threshold - t string // original float syntax preserved + // sampling r and t-values + rnd Randomness // r value parsed, as unsigned + r string // 14 ASCII hex digits + tt Threshold // t value parsed, as a threshold + t string // 1-14 ASCII hex digits } const ( @@ -41,8 +38,6 @@ const ( var ( otelTracestateRe = regexp.MustCompile(otelTracestateRegexp) - ErrRandomValueRange = fmt.Errorf("r-value out of range") - otelSyntax = keyValueScanner{ maxItems: -1, trim: false, @@ -64,31 +59,16 @@ func NewOTelTraceState(input string) (otts OTelTraceState, _ error) { var err error switch key { case "r": - var unsigned uint64 - unsigned, err = strconv.ParseUint(value, 16, 64) - if err == nil { - if unsigned >= 0x1p56 { - err = ErrRandomValueRange - } else { - otts.r = value - otts.ru = Randomness{ - unsigned: unsigned, - } - } - } - case "s": - var prob float64 - prob, _, err = EncodedToProbabilityAndAdjustedCount(value) - if err == nil { - otts.s = value - otts.sp = prob + if otts.rnd, err = RValueToRandomness(value); err == nil { + otts.r = value + } else { + otts.rnd = Randomness{} } case "t": - var prob float64 - prob, _, err = EncodedToProbabilityAndAdjustedCount(value) - if err == nil { + if otts.tt, err = TValueToThreshold(value); err == nil { otts.t = value - otts.tt, _ = ProbabilityToThreshold(prob) + } else { + otts.tt = Threshold{} } default: otts.kvs = append(otts.kvs, KV{ @@ -111,24 +91,7 @@ func (otts *OTelTraceState) RValue() string { } func (otts *OTelTraceState) RValueRandomness() Randomness { - return otts.ru -} - -func (otts *OTelTraceState) HasSValue() bool { - return otts.s != "" -} - -func (otts *OTelTraceState) SValue() string { - return otts.s -} - -func (otts *OTelTraceState) SValueProbability() float64 { - return otts.sp -} - -func (otts *OTelTraceState) SetSValue(value string, probability float64) { - otts.s = value - otts.sp = probability + return otts.rnd } func (otts *OTelTraceState) HasTValue() bool { @@ -154,7 +117,7 @@ func (otts *OTelTraceState) UnsetTValue() { } func (otts *OTelTraceState) HasAnyValue() bool { - return otts.HasRValue() || otts.HasSValue() || otts.HasTValue() || otts.HasExtraValues() + return otts.HasRValue() || otts.HasTValue() || otts.HasExtraValues() } func (otts *OTelTraceState) Serialize(w io.StringWriter) { @@ -170,11 +133,6 @@ func (otts *OTelTraceState) Serialize(w io.StringWriter) { w.WriteString("r:") w.WriteString(otts.RValue()) } - if otts.HasSValue() { - sep() - w.WriteString("s:") - w.WriteString(otts.SValue()) - } if otts.HasTValue() { sep() w.WriteString("t:") diff --git a/pkg/sampling/oteltracestate_test.go b/pkg/sampling/oteltracestate_test.go index 017f48b0d2ec..a9ecc055154d 100644 --- a/pkg/sampling/oteltracestate_test.go +++ b/pkg/sampling/oteltracestate_test.go @@ -38,17 +38,14 @@ func TestEmptyOTelTraceState(t *testing.T) { } func TestOTelTraceStateTValueSerialize(t *testing.T) { - const orig = "r:1;s:2;t:3;a:b;c:d" + const orig = "r:10000000000000;t:3;a:b;c:d" otts, err := NewOTelTraceState(orig) require.NoError(t, err) require.True(t, otts.HasTValue()) require.Equal(t, "3", otts.TValue()) - require.True(t, otts.HasSValue()) - require.Equal(t, "2", otts.SValue()) - require.True(t, otts.HasRValue()) - require.Equal(t, "1", otts.RValue()) + require.Equal(t, "10000000000000", otts.RValue()) require.True(t, otts.HasAnyValue()) var w strings.Builder @@ -60,7 +57,6 @@ func TestParseOTelTraceState(t *testing.T) { type testCase struct { in string rval string - sval string tval string extra []string expectErr error @@ -68,86 +64,78 @@ func TestParseOTelTraceState(t *testing.T) { const ns = "" for _, test := range []testCase{ // t-value correct cases - {"t:2", ns, ns, "2", nil, nil}, - {"t:1", ns, ns, "1", nil, nil}, - {"t:1", ns, ns, "1", nil, nil}, - {"t:10", ns, ns, "10", nil, nil}, - {"t:33", ns, ns, "33", nil, nil}, - {"t:61", ns, ns, "61", nil, nil}, - {"t:72057594037927936", ns, ns, "72057594037927936", nil, nil}, // max t-value = 0x1p+56 - {"t:0x1p-56", ns, ns, "0x1p-56", nil, nil}, // min t-value + {"t:2", ns, "2", nil, nil}, + {"t:1", ns, "1", nil, nil}, + {"t:1", ns, "1", nil, nil}, + {"t:10", ns, "10", nil, nil}, + {"t:33", ns, "33", nil, nil}, + {"t:ab", ns, "ab", nil, nil}, + {"t:61", ns, "61", nil, nil}, // syntax errors - {"", ns, ns, ns, nil, strconv.ErrSyntax}, - {"t:1;", ns, ns, ns, nil, strconv.ErrSyntax}, - {"t:1=p:2", ns, ns, ns, nil, strconv.ErrSyntax}, - {"t:1;p:2=s:3", ns, ns, ns, nil, strconv.ErrSyntax}, - {":1;p:2=s:3", ns, ns, ns, nil, strconv.ErrSyntax}, - {":;p:2=s:3", ns, ns, ns, nil, strconv.ErrSyntax}, - {":;:", ns, ns, ns, nil, strconv.ErrSyntax}, - {":", ns, ns, ns, nil, strconv.ErrSyntax}, - {"t:;p=1", ns, ns, ns, nil, strconv.ErrSyntax}, - {"t:$", ns, ns, ns, nil, strconv.ErrSyntax}, // not-hexadecimal - {"t:0x1p+3", ns, ns, ns, nil, strconv.ErrSyntax}, // + is invalid - - // range errors - {"t:14.5", ns, ns, ns, nil, ErrAdjustedCountOnlyInteger}, // integer syntax - {"t:72057594037927937", ns, ns, ns, nil, ErrAdjustedCountRange}, // out-of-range - {"t:-1", ns, ns, ns, nil, ErrProbabilityRange}, // non-negative + {"", ns, ns, nil, strconv.ErrSyntax}, + {"t:1;", ns, ns, nil, strconv.ErrSyntax}, + {"t:1=p:2", ns, ns, nil, strconv.ErrSyntax}, + {"t:1;p:2=s:3", ns, ns, nil, strconv.ErrSyntax}, + {":1;p:2=s:3", ns, ns, nil, strconv.ErrSyntax}, + {":;p:2=s:3", ns, ns, nil, strconv.ErrSyntax}, + {":;:", ns, ns, nil, strconv.ErrSyntax}, + {":", ns, ns, nil, strconv.ErrSyntax}, + {"t:;p=1", ns, ns, nil, strconv.ErrSyntax}, + {"t:$", ns, ns, nil, strconv.ErrSyntax}, // not-hexadecimal + {"t:0x1p+3", ns, ns, nil, strconv.ErrSyntax}, // + is invalid + {"t:14.5", ns, ns, nil, strconv.ErrSyntax}, // integer syntax + {"t:-1", ns, ns, nil, strconv.ErrSyntax}, // non-negative + + // too many digits + {"t:ffffffffffffffff", ns, ns, nil, ErrTValueSize}, + {"t:100000000000000", ns, ns, nil, ErrTValueSize}, // one field - {"e100:1", ns, ns, ns, []string{"e100:1"}, nil}, + {"e100:1", ns, ns, []string{"e100:1"}, nil}, // two fields - {"e1:1;e2:2", ns, ns, ns, []string{"e1:1", "e2:2"}, nil}, - {"e1:1;e2:2", ns, ns, ns, []string{"e1:1", "e2:2"}, nil}, + {"e1:1;e2:2", ns, ns, []string{"e1:1", "e2:2"}, nil}, + {"e1:1;e2:2", ns, ns, []string{"e1:1", "e2:2"}, nil}, // one extra key, two ways - {"t:2;extra:stuff", ns, ns, "2", []string{"extra:stuff"}, nil}, - {"extra:stuff;t:2", ns, ns, "2", []string{"extra:stuff"}, nil}, + {"t:2;extra:stuff", ns, "2", []string{"extra:stuff"}, nil}, + {"extra:stuff;t:2", ns, "2", []string{"extra:stuff"}, nil}, // two extra fields - {"e100:100;t:1;e101:101", ns, ns, "1", []string{"e100:100", "e101:101"}, nil}, - {"t:1;e100:100;e101:101", ns, ns, "1", []string{"e100:100", "e101:101"}, nil}, - {"e100:100;e101:101;t:1", ns, ns, "1", []string{"e100:100", "e101:101"}, nil}, + {"e100:100;t:1;e101:101", ns, "1", []string{"e100:100", "e101:101"}, nil}, + {"t:1;e100:100;e101:101", ns, "1", []string{"e100:100", "e101:101"}, nil}, + {"e100:100;e101:101;t:1", ns, "1", []string{"e100:100", "e101:101"}, nil}, // parse error prevents capturing unrecognized keys - {"1:1;u:V", ns, ns, ns, nil, strconv.ErrSyntax}, - {"X:1;u:V", ns, ns, ns, nil, strconv.ErrSyntax}, - {"x:1;u:V", ns, ns, ns, []string{"x:1", "u:V"}, nil}, - - // s-value - {"s:2;extra:stuff", ns, "2", ns, []string{"extra:stuff"}, nil}, - {"extra:stuff;s:2", ns, "2", ns, []string{"extra:stuff"}, nil}, - - // s-value range error - {"s:0x1p-58", ns, ns, ns, nil, ErrProbabilityRange}, - {"s:-1", ns, ns, ns, nil, ErrProbabilityRange}, + {"1:1;u:V", ns, ns, nil, strconv.ErrSyntax}, + {"X:1;u:V", ns, ns, nil, strconv.ErrSyntax}, + {"x:1;u:V", ns, ns, []string{"x:1", "u:V"}, nil}, // r-value - {"r:2;extra:stuff", "2", ns, ns, []string{"extra:stuff"}, nil}, - {"extra:stuff;r:2", "2", ns, ns, []string{"extra:stuff"}, nil}, - {"r:ffffffffffffff", "ffffffffffffff", ns, ns, nil, nil}, - {"r:8888", "8888", ns, ns, nil, nil}, - {"r:0", "0", ns, ns, nil, nil}, + {"r:22222222222222;extra:stuff", "22222222222222", ns, []string{"extra:stuff"}, nil}, + {"extra:stuff;r:22222222222222", "22222222222222", ns, []string{"extra:stuff"}, nil}, + {"r:ffffffffffffff", "ffffffffffffff", ns, nil, nil}, + {"r:88888888888888", "88888888888888", ns, nil, nil}, + {"r:00000000000000", "00000000000000", ns, nil, nil}, // r-value range error (15 bytes of hex or more) - {"r:100000000000000", ns, ns, ns, nil, ErrRandomValueRange}, - {"r:fffffffffffffffff", ns, ns, ns, nil, strconv.ErrRange}, + {"r:100000000000000", ns, ns, nil, ErrRValueSize}, + {"r:fffffffffffffffff", ns, ns, nil, ErrRValueSize}, // no trailing ; - {"x:1;", ns, ns, ns, nil, strconv.ErrSyntax}, + {"x:1;", ns, ns, nil, strconv.ErrSyntax}, // empty key - {"x:", ns, ns, ns, []string{"x:"}, nil}, + {"x:", ns, ns, []string{"x:"}, nil}, // charset test - {"x:0X1FFF;y:.-_-.;z:", ns, ns, ns, []string{"x:0X1FFF", "y:.-_-.", "z:"}, nil}, - {"x1y2z3:1-2-3;y1:y_1;xy:-;t:50", ns, ns, "50", []string{"x1y2z3:1-2-3", "y1:y_1", "xy:-"}, nil}, + {"x:0X1FFF;y:.-_-.;z:", ns, ns, []string{"x:0X1FFF", "y:.-_-.", "z:"}, nil}, + {"x1y2z3:1-2-3;y1:y_1;xy:-;t:50", ns, "50", []string{"x1y2z3:1-2-3", "y1:y_1", "xy:-"}, nil}, // size exceeded - {"x:" + strings.Repeat("_", 255), ns, ns, ns, nil, ErrTraceStateSize}, - {"x:" + strings.Repeat("_", 254), ns, ns, ns, []string{"x:" + strings.Repeat("_", 254)}, nil}, + {"x:" + strings.Repeat("_", 255), ns, ns, nil, ErrTraceStateSize}, + {"x:" + strings.Repeat("_", 254), ns, ns, []string{"x:" + strings.Repeat("_", 254)}, nil}, } { t.Run(testName(test.in), func(t *testing.T) { otts, err := NewOTelTraceState(test.in) @@ -163,12 +151,6 @@ func TestParseOTelTraceState(t *testing.T) { } else { require.False(t, otts.HasRValue(), "should have no r-value: %s", otts.RValue()) } - if test.sval != ns { - require.True(t, otts.HasSValue()) - require.Equal(t, test.sval, otts.SValue()) - } else { - require.False(t, otts.HasSValue(), "should have no s-value: %s", otts.SValue()) - } if test.tval != ns { require.True(t, otts.HasTValue()) require.Equal(t, test.tval, otts.TValue()) diff --git a/pkg/sampling/w3ctracestate_test.go b/pkg/sampling/w3ctracestate_test.go index 21d5f3428a28..4a9ab2ca2869 100644 --- a/pkg/sampling/w3ctracestate_test.go +++ b/pkg/sampling/w3ctracestate_test.go @@ -14,73 +14,73 @@ package sampling -import ( - "errors" - "strings" - "testing" +// import ( +// "errors" +// "strings" +// "testing" - "github.com/stretchr/testify/require" -) +// "github.com/stretchr/testify/require" +// ) -func TestParseW3CTraceState(t *testing.T) { - type testCase struct { - in string - rval string - sval string - tval string - expectErr error - } - const ns = "" - for _, test := range []testCase{ - // correct cases - {"ot=t:1", ns, ns, "1", nil}, - {"ot=t:100", ns, ns, "100", nil}, - {"ot=s:100;t:200", ns, "100", "200", nil}, - {"ot=r:1", "1", ns, ns, nil}, - {"ot=r:1,unknown:value,other=something", "1", ns, ns, nil}, - } { - t.Run(testName(test.in), func(t *testing.T) { - w3c, err := NewW3CTraceState(test.in) +// func TestParseW3CTraceState(t *testing.T) { +// type testCase struct { +// in string +// rval string +// sval string +// tval string +// expectErr error +// } +// const ns = "" +// for _, test := range []testCase{ +// // correct cases +// {"ot=t:1", ns, ns, "1", nil}, +// {"ot=t:100", ns, ns, "100", nil}, +// {"ot=s:100;t:200", ns, "100", "200", nil}, +// {"ot=r:1", "1", ns, ns, nil}, +// {"ot=r:1,unknown:value,other=something", "1", ns, ns, nil}, +// } { +// t.Run(testName(test.in), func(t *testing.T) { +// w3c, err := NewW3CTraceState(test.in) - if test.expectErr != nil { - require.True(t, errors.Is(err, test.expectErr), - "%q: not expecting %v wanted %v", test.in, err, test.expectErr, - ) - } else { - require.NoError(t, err) - } - if test.rval != ns { - require.True(t, w3c.HasOTelValue()) - require.True(t, w3c.OTelValue().HasRValue()) - require.Equal(t, test.rval, w3c.OTelValue().RValue()) - } else { - require.False(t, w3c.OTelValue().HasRValue(), "should have no r-value") - } - if test.sval != ns { - require.True(t, w3c.HasOTelValue()) - require.True(t, w3c.OTelValue().HasSValue()) - require.Equal(t, test.sval, w3c.OTelValue().SValue()) - } else { - require.False(t, w3c.OTelValue().HasSValue(), "should have no s-value") - } - if test.tval != ns { - require.True(t, w3c.HasOTelValue()) - require.True(t, w3c.OTelValue().HasTValue()) - require.Equal(t, test.tval, w3c.OTelValue().TValue()) - } else { - require.False(t, w3c.OTelValue().HasTValue(), "should have no t-value") - } +// if test.expectErr != nil { +// require.True(t, errors.Is(err, test.expectErr), +// "%q: not expecting %v wanted %v", test.in, err, test.expectErr, +// ) +// } else { +// require.NoError(t, err) +// } +// if test.rval != ns { +// require.True(t, w3c.HasOTelValue()) +// require.True(t, w3c.OTelValue().HasRValue()) +// require.Equal(t, test.rval, w3c.OTelValue().RValue()) +// } else { +// require.False(t, w3c.OTelValue().HasRValue(), "should have no r-value") +// } +// if test.sval != ns { +// require.True(t, w3c.HasOTelValue()) +// require.True(t, w3c.OTelValue().HasSValue()) +// require.Equal(t, test.sval, w3c.OTelValue().SValue()) +// } else { +// require.False(t, w3c.OTelValue().HasSValue(), "should have no s-value") +// } +// if test.tval != ns { +// require.True(t, w3c.HasOTelValue()) +// require.True(t, w3c.OTelValue().HasTValue()) +// require.Equal(t, test.tval, w3c.OTelValue().TValue()) +// } else { +// require.False(t, w3c.OTelValue().HasTValue(), "should have no t-value") +// } - if test.expectErr != nil { - return - } - // on success Serialize() should not modify - // test by re-parsing - var w strings.Builder - w3c.Serialize(&w) - cpy, err := NewW3CTraceState(w.String()) - require.NoError(t, err, "with %v", w.String()) - require.Equal(t, w3c, cpy, "with %v", w.String()) - }) - } -} +// if test.expectErr != nil { +// return +// } +// // on success Serialize() should not modify +// // test by re-parsing +// var w strings.Builder +// w3c.Serialize(&w) +// cpy, err := NewW3CTraceState(w.String()) +// require.NoError(t, err, "with %v", w.String()) +// require.Equal(t, w3c, cpy, "with %v", w.String()) +// }) +// } +// } From c8baf291814d3cd5f2e65ecebac2d102896b0555 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 2 Aug 2023 16:30:51 -0700 Subject: [PATCH 16/38] wip working on probabilistic sampler with two new modes: downsampler and resampler --- .../probabilisticsamplerprocessor/config.go | 63 ++++++++--- .../tracesprocessor.go | 106 +++++++++--------- 2 files changed, 98 insertions(+), 71 deletions(-) diff --git a/processor/probabilisticsamplerprocessor/config.go b/processor/probabilisticsamplerprocessor/config.go index f2c5b2bba343..2aa92268d3e7 100644 --- a/processor/probabilisticsamplerprocessor/config.go +++ b/processor/probabilisticsamplerprocessor/config.go @@ -6,7 +6,9 @@ package probabilisticsamplerprocessor // import "github.com/open-telemetry/opent import ( "fmt" "math" + "strings" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling" "go.opentelemetry.io/collector/component" ) @@ -49,21 +51,21 @@ type Config struct { // span-to-metrics pipeline based on this mechanism may have // anomalous behavior. // - // - "consistent_resample": Using an OTel-specified consistent - // sampling mechanism, this sampler selectively reduces the - // effective sampling probability of arriving spans. This - // can be useful to select a small fraction of complete - // traces from a stream with mixed sampling rates. The rate - // of spans passing through depends on how much sampling has - // already been applied. If an arriving span was head - // sampled at the same probability it passes through. If - // the span arrives with lower probability, a warning is - // logged because it means this sampler is configured with - // too large a sampling probability to ensure complete traces. + // - "resample": Using an OTel-specified consistent sampling + // mechanism, this sampler selectively reduces the effective + // sampling probability of arriving spans. This can be + // useful to select a small fraction of complete traces from + // a stream with mixed sampling rates. The rate of spans + // passing through depends on how much sampling has already + // been applied. If an arriving span was head sampled at + // the same probability it passes through. If the span + // arrives with lower probability, a warning is logged + // because it means this sampler is configured with too + // large a sampling probability to ensure complete traces. // - // - "consistent_downsample": Using an OTel-specified consistent - // sampling mechanism, this sampler reduces the effective sampling - // probability of each span by `Sampling + // - "downsample": Using an OTel-specified consistent sampling + // mechanism, this sampler reduces the effective sampling + // probability of each span by `SamplingProbability`. SamplerMode string `mapstructure:"sampler_mode"` /////// @@ -93,7 +95,9 @@ func (cfg *Config) Validate() error { return fmt.Errorf("negative sampling rate: %.2f%%", cfg.SamplingPercentage) case ratio == 0: // Special case - case ratio < 0x1p-56: + case ratio < sampling.MinSamplingProb: + return fmt.Errorf("sampling rate is too small: %.2f%%", cfg.SamplingPercentage) + case ratio > 1: return fmt.Errorf("sampling rate is too small: %.2f%%", cfg.SamplingPercentage) case math.IsInf(ratio, 0) || math.IsNaN(ratio): return fmt.Errorf("sampling rate is invalid: %.2f%%", cfg.SamplingPercentage) @@ -102,5 +106,34 @@ func (cfg *Config) Validate() error { if cfg.AttributeSource != "" && !validAttributeSource[cfg.AttributeSource] { return fmt.Errorf("invalid attribute source: %v. Expected: %v or %v", cfg.AttributeSource, traceIDAttributeSource, recordAttributeSource) } + + // Force the mode to lower case, check validity + if _, err := parseSamplerMode(cfg.SamplerMode); err != nil { + return err + } return nil } + +type samplerMode int + +const ( + modeUnset = iota + modeHashSeed + modeDownsample + modeResample +) + +func parseSamplerMode(s string) (samplerMode, error) { + switch strings.ToLower(s) { + case "resample": + return modeResample, nil + case "hash_seed": + return modeHashSeed, nil + case "downsample": + return modeDownsample, nil + case "": + return modeUnset, nil + default: + return modeUnset, fmt.Errorf("unknown sampler mode: %q", s) + } +} diff --git a/processor/probabilisticsamplerprocessor/tracesprocessor.go b/processor/probabilisticsamplerprocessor/tracesprocessor.go index 71f150244362..e1a24bda96ec 100644 --- a/processor/probabilisticsamplerprocessor/tracesprocessor.go +++ b/processor/probabilisticsamplerprocessor/tracesprocessor.go @@ -46,12 +46,12 @@ const ( ) type traceSampler interface { - // shouldSample reports the result based on a probabilistic decision. - shouldSample(tid pcommon.TraceID, rnd sampling.Randomness) bool + // decide reports the result based on a probabilistic decision. + decide(s ptrace.Span) bool // updateTracestate modifies the OTelTraceState assuming it will be // sampled, probabilistically or otherwise. The "should" parameter - // is the result from shouldSample(), for the span's TraceID, which + // is the result from decide(), for the span's TraceID, which // will not be recalculated. updateTracestate(tid pcommon.TraceID, rnd sampling.Randomness, should bool, otts *sampling.OTelTraceState) } @@ -69,7 +69,7 @@ type traceHashSampler struct { svalueEncoding string } -type traceIDSampler struct { +type traceResampler struct { // TraceID-randomness-based calculation traceIDThreshold sampling.Threshold @@ -77,6 +77,23 @@ type traceIDSampler struct { tValueEncoding string } +func randomnessFromSpan(s ptrace.Span) (sampling.Randomness, *sampling.W3CTraceState, error) { + state := s.TraceState() + raw := state.AsRaw() + + // Parse the arriving TraceState. + wts, err := sampling.NewW3CTraceState(raw) + var randomness sampling.Randomness + if err == nil && wts.OTelValue().HasRValue() { + // When the tracestate is OK and has r-value, use it. + randomness = wts.OTelValue().RValueRandomness() + } else { + // All other cases, use the TraceID. + randomness = sampling.RandomnessFromTraceID(s.TraceID()) + } + return randomness, wts, err +} + // newTracesProcessor returns a processor.TracesProcessor that will perform head sampling according to the given // configuration. func newTracesProcessor(ctx context.Context, set processor.CreateSettings, cfg *Config, nextConsumer consumer.Traces) (processor.Traces, error) { @@ -91,8 +108,19 @@ func newTracesProcessor(ctx context.Context, set processor.CreateSettings, cfg * logger: set.Logger, } + // error ignored below b/c already checked once + mode, _ := parseSamplerMode(cfg.SamplerMode) + if mode == modeUnset { + if cfg.HashSeed != 0 { + mode = modeHashSeed + } else { + mode = modeDownsample + } + } + ratio := pct / 100 - if cfg.HashSeed != 0 { + switch mode { + case modeHashSeed: ts := &traceHashSampler{} // Adjust sampling percentage on private so recalculations are avoided. @@ -102,28 +130,25 @@ func newTracesProcessor(ctx context.Context, set processor.CreateSettings, cfg * ts.svalueEncoding = strconv.FormatFloat(ratio, 'g', 4, 64) tp.sampler = ts - } else { - // Encode t-value (OTEP 226), like %.4f. (See FormatFloat().) - tval, err := sampling.ProbabilityToEncoded(ratio, 'g', 4) - if err != nil { - return nil, err - } - // Parse the exact value of probability encoded at this precision. - ratio, _, err = sampling.EncodedToProbabilityAndAdjustedCount(tval) + case modeResample: + // Encode t-value: for cases where the incoming context has + tval, err := sampling.ProbabilityToTValue(ratio) if err != nil { return nil, err } // Compute the sampling threshold from the exact probability. - threshold, err := sampling.ProbabilityToThreshold(ratio) + threshold, err := sampling.TValueToThreshold(tval) if err != nil { return nil, err } - ts := &traceIDSampler{} + ts := &traceResampler{} ts.tValueEncoding = tval ts.traceIDThreshold = threshold tp.sampler = ts + case modeDownsample: + // TODO } return processorhelper.NewTracesProcessor( @@ -135,40 +160,24 @@ func newTracesProcessor(ctx context.Context, set processor.CreateSettings, cfg * processorhelper.WithCapabilities(consumer.Capabilities{MutatesData: true})) } -func (ts *traceHashSampler) shouldSample(tid pcommon.TraceID, _ sampling.Randomness) bool { +func (ts *traceHashSampler) decide(s ptrace.Span) bool { // If one assumes random trace ids hashing may seems avoidable, however, traces can be coming from sources // with various different criteria to generate trace id and perhaps were already sampled without hashing. // Hashing here prevents bias due to such systems. + tid := s.TraceID() return computeHash(tid[:], ts.hashSeed)&bitMaskHashBuckets < ts.hashScaledSamplingRate } func (ts *traceHashSampler) updateTracestate(tid pcommon.TraceID, _ sampling.Randomness, should bool, otts *sampling.OTelTraceState) { - if !should { - otts.SetSValue(sampling.ProbabilityZeroEncoding, 0) - return - } - - if otts.HasSValue() && otts.SValueProbability() == 0 { - // Zero count in, zero count out. - otts.SetSValue(sampling.ProbabilityZeroEncoding, 0) - return - } - - if !otts.HasSValue() { - otts.SetSValue(ts.svalueEncoding, ts.probability) - return - } - - product := ts.probability * otts.SValueProbability() - - otts.SetSValue(strconv.FormatFloat(product, 'g', 4, 64), product) + // No action, nothing is specified. } -func (ts *traceIDSampler) shouldSample(_ pcommon.TraceID, randomness sampling.Randomness) bool { +func (ts *traceResampler) decide(s ptrace.Span) bool { + rnd := randomnessFromSpan(s) return ts.traceIDThreshold.ShouldSample(randomness) } -func (ts *traceIDSampler) updateTracestate(tid pcommon.TraceID, rnd sampling.Randomness, should bool, otts *sampling.OTelTraceState) { +func (ts *traceResampler) updateTracestate(tid pcommon.TraceID, rnd sampling.Randomness, should bool, otts *sampling.OTelTraceState) { // When this sampler decided not to sample, the t-value becomes zero. // Incoming TValue consistency is not checked when this happens. if !should { @@ -202,8 +211,8 @@ func (tp *traceProcessor) processTraces(ctx context.Context, td ptrace.Traces) ( td.ResourceSpans().RemoveIf(func(rs ptrace.ResourceSpans) bool { rs.ScopeSpans().RemoveIf(func(ils ptrace.ScopeSpans) bool { ils.Spans().RemoveIf(func(s ptrace.Span) bool { - sp := parseSpanSamplingPriority(s) - if sp == doNotSampleSpan { + priority := parseSpanSamplingPriority(s) + if priority == doNotSampleSpan { // The OpenTelemetry mentions this as a "hint" we take a stronger // approach and do not sample the span since some may use it to // remove specific spans from traces. @@ -215,24 +224,9 @@ func (tp *traceProcessor) processTraces(ctx context.Context, td ptrace.Traces) ( return true } - state := s.TraceState() - raw := state.AsRaw() - - // Parse the arriving TraceState. - wts, err := sampling.NewW3CTraceState(raw) - var randomness sampling.Randomness - if err != nil { - tp.logger.Info("span trace state", zap.Error(err)) - randomness = sampling.RandomnessFromTraceID(s.TraceID()) - } else if wts.OTelValue().HasRValue() { - randomness = wts.OTelValue().RValueRandomness() - } else { - randomness = sampling.RandomnessFromTraceID(s.TraceID()) - } - - forceSample := sp == mustSampleSpan + probSample, otts := tp.sampler.decide(s) - probSample := tp.sampler.shouldSample(s.TraceID(), randomness) + forceSample := priority == mustSampleSpan sampled := forceSample || probSample From 7f47e4ac4bc7e4bdfc1ccf3308446ded927e878d Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Thu, 3 Aug 2023 10:55:15 -0700 Subject: [PATCH 17/38] unsigned implement split --- pkg/sampling/encoding.go | 192 --------------------------------- pkg/sampling/encoding_test.go | 50 +++++---- pkg/sampling/oteltracestate.go | 2 +- pkg/sampling/probability.go | 37 +++++++ pkg/sampling/randomness.go | 58 ++++++++++ pkg/sampling/threshold.go | 96 +++++++++++++++++ 6 files changed, 224 insertions(+), 211 deletions(-) delete mode 100644 pkg/sampling/encoding.go create mode 100644 pkg/sampling/probability.go create mode 100644 pkg/sampling/randomness.go create mode 100644 pkg/sampling/threshold.go diff --git a/pkg/sampling/encoding.go b/pkg/sampling/encoding.go deleted file mode 100644 index eeaed05e97e5..000000000000 --- a/pkg/sampling/encoding.go +++ /dev/null @@ -1,192 +0,0 @@ -// Copyright The OpenTelemetry Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package sampling // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling" - -import ( - "encoding/binary" - "errors" - "fmt" - "math" - "strconv" - - "go.opentelemetry.io/collector/pdata/pcommon" -) - -const ( - // MinSamplingProb is one in 2^56. - MinSamplingProb = 0x1p-56 - - // LeastHalfTraceIDThresholdMask is the mask to use on the - // least-significant half of the TraceID, i.e., bytes 8-15. - // Because this is a 56 bit mask, the result after masking is - // the unsigned value of bytes 9 through 15. - LeastHalfTraceIDThresholdMask = 1/MinSamplingProb - 1 - - // ProbabilityZeroEncoding is the encoding for 0 adjusted count. - ProbabilityZeroEncoding = "0" - - // ProbabilityOneEncoding is the encoding for 100% sampling. - ProbabilityOneEncoding = "" -) - -// Threshold used to compare with the least-significant 7 bytes of the TraceID. -type Threshold struct { - // unsigned is in the range [0, MaxAdjustedCount] - // - 0 represents zero probability (0 TraceID values are less-than) - // - 1 represents MinSamplingProb (1 TraceID value is less-than) - // - MaxAdjustedCount represents 100% sampling (all TraceID values are less-than). - unsigned uint64 -} - -// Randomness may be derived from r-value or TraceID. -type Randomness struct { - // randomness is in the range [0, MaxAdjustedCount-1] - unsigned uint64 -} - -var ( - // ErrProbabilityRange is returned when a value should be in the range [MinSamplingProb, 1]. - ErrProbabilityRange = errors.New("sampling probability out of range (0x1p-56 <= valid <= 1)") - - // ErrTValueSize is returned for t-values longer than 14 hex digits. - ErrTValueSize = errors.New("t-value exceeds 14 hex digits") - - // ErrRValueSize is returned for r-values != 14 hex digits. - ErrRValueSize = errors.New("r-value must have 14 hex digits") -) - -// probabilityInRange tests MinSamplingProb <= prob <= 1. -func probabilityInRange(prob float64) bool { - return prob >= MinSamplingProb && prob <= 1 -} - -// removeTrailingZeros elimiantes trailing zeros from a string. -func removeTrailingZeros(in string) string { - for len(in) > 1 && in[len(in)-1] == '0' { - in = in[:len(in)-1] - } - return in -} - -// ProbabilityToTValue encodes a t-value given a probability. -func ProbabilityToTValue(prob float64) (string, error) { - // Probability cases - switch { - case prob == 1: - return ProbabilityOneEncoding, nil - case prob == 0: - return ProbabilityZeroEncoding, nil - case !probabilityInRange(prob): - return "", ErrProbabilityRange - } - unsigned := uint64(math.Round(prob / MinSamplingProb)) - - // Note fmt.Sprintf handles zero padding to 14 bytes as well as setting base=16. - // Otherwise could be done by hand using strconv.FormatUint(unsigned, 16) and - // and padding to 14 bytes before removing the trailing zeros. - return removeTrailingZeros(fmt.Sprintf("%014x", unsigned)), nil -} - -// TValueToProbability parses the t-value and returns -// the probability. -func TValueToProbability(s string) (float64, error) { - if len(s) > 14 { - return 0, ErrTValueSize - } - if s == ProbabilityOneEncoding { - return 1, nil - } - - unsigned, err := strconv.ParseUint(s, 16, 64) - if err != nil { - return 0, err - } - - // Zero-padding is done by shifting 4 bit positions per - // missing hex digit. - extend := 14 - len(s) - return float64(unsigned<<(4*extend)) * MinSamplingProb, nil -} - -// ProbabilityToThreshold returns the sampling threshold exactly -// corresponding with the input probability. -func ProbabilityToThreshold(prob float64) (Threshold, error) { - // Note: prob == 0 is an allowed special case. Because we - // use less-than, all spans are unsampled with Threshold{0}. - if prob != 0 && !probabilityInRange(prob) { - return Threshold{}, ErrProbabilityRange - } - return Threshold{ - unsigned: uint64(prob / MinSamplingProb), - }, nil -} - -// TValueToThreshold return a Threshold, see -// Threshold.ShouldSample(TraceID) and Threshold.Probability(). -func TValueToThreshold(s string) (Threshold, error) { - prob, err := TValueToProbability(s) - if err != nil { - return Threshold{}, err - } - return ProbabilityToThreshold(prob) -} - -// ShouldSample returns true when the span passes this sampler's -// consistent sampling decision. -func (t Threshold) ShouldSample(rnd Randomness) bool { - return rnd.unsigned < t.unsigned -} - -// Probability is the sampling ratio in the range [MinSamplingProb, 1]. -func (t Threshold) Probability() float64 { - return float64(t.unsigned) * MinSamplingProb -} - -// Unsigned is an unsigned integer that scales with the sampling -// threshold. This is useful to compare two thresholds without -// floating point conversions. -func (t Threshold) Unsigned() uint64 { - return t.unsigned -} - -// Randomness is the value we compare with Threshold in ShouldSample. -func RandomnessFromTraceID(id pcommon.TraceID) Randomness { - return Randomness{ - unsigned: binary.BigEndian.Uint64(id[8:]) & LeastHalfTraceIDThresholdMask, - } -} - -// Unsigned is an unsigned integer that scales with the randomness -// value. This is useful to compare two randomness values without -// floating point conversions. -func (r Randomness) Unsigned() uint64 { - return r.unsigned -} - -// RValueToRandomness parses 14 hex bytes into a Randomness. -func RValueToRandomness(s string) (Randomness, error) { - if len(s) != 14 { - return Randomness{}, ErrRValueSize - } - - unsigned, err := strconv.ParseUint(s, 16, 64) - if err != nil { - return Randomness{}, err - } - - return Randomness{ - unsigned: unsigned, - }, nil -} diff --git a/pkg/sampling/encoding_test.go b/pkg/sampling/encoding_test.go index 2cb31976a9b7..7fb9fa3f668f 100644 --- a/pkg/sampling/encoding_test.go +++ b/pkg/sampling/encoding_test.go @@ -39,32 +39,43 @@ func mustNot[T any](t T, err error) error { return err } +func probabilityToTValue(prob float64) (string, error) { + th, err := ProbabilityToThreshold(prob) + return string(th.TValue()), err +} + +func tValueToProbability(tv string) (float64, error) { + th, err := TValueToThreshold(tv) + return th.Probability(), err +} + func TestValidProbabilityToTValue(t *testing.T) { - require.Equal(t, "8", must(ProbabilityToTValue(0.5))) - require.Equal(t, "00000000000001", must(ProbabilityToTValue(0x1p-56))) - require.Equal(t, "55555555555554", must(ProbabilityToTValue(1/3.))) - require.Equal(t, "54", must(ProbabilityToTValue(0x54p-8))) // 0x54p-8 is approximately 1/3 - require.Equal(t, "01", must(ProbabilityToTValue(0x1p-8))) - require.Equal(t, "0", must(ProbabilityToTValue(0))) + require.Equal(t, "", must(probabilityToTValue(1.0))) + require.Equal(t, "8", must(probabilityToTValue(0.5))) + require.Equal(t, "00000000000001", must(probabilityToTValue(0x1p-56))) + require.Equal(t, "55555555555554", must(probabilityToTValue(1/3.))) + require.Equal(t, "54", must(probabilityToTValue(0x54p-8))) // 0x54p-8 is approximately 1/3 + require.Equal(t, "01", must(probabilityToTValue(0x1p-8))) + require.Equal(t, "0", must(probabilityToTValue(0))) } -func TestInvalidProbabilityToTValue(t *testing.T) { +func TestInvalidprobabilityToTValue(t *testing.T) { // Too small - require.Error(t, mustNot(ProbabilityToTValue(0x1p-57))) - require.Error(t, mustNot(ProbabilityToTValue(0x1p-57))) + require.Error(t, mustNot(probabilityToTValue(0x1p-57))) + require.Error(t, mustNot(probabilityToTValue(0x1p-57))) // Too big - require.Error(t, mustNot(ProbabilityToTValue(1.1))) - require.Error(t, mustNot(ProbabilityToTValue(1.1))) + require.Error(t, mustNot(probabilityToTValue(1.1))) + require.Error(t, mustNot(probabilityToTValue(1.1))) } func TestTValueToProbability(t *testing.T) { - require.Equal(t, 0.5, must(TValueToProbability("8"))) - require.Equal(t, 0x444p-12, must(TValueToProbability("444"))) - require.Equal(t, 0.0, must(TValueToProbability("0"))) + require.Equal(t, 0.5, must(tValueToProbability("8"))) + require.Equal(t, 0x444p-12, must(tValueToProbability("444"))) + require.Equal(t, 0.0, must(tValueToProbability("0"))) // 0x55555554p-32 is very close to 1/3 - require.InEpsilon(t, 1/3., must(TValueToProbability("55555554")), 1e-9) + require.InEpsilon(t, 1/3., must(tValueToProbability("55555554")), 1e-9) } func TestProbabilityToThreshold(t *testing.T) { @@ -81,14 +92,17 @@ func TestProbabilityToThreshold(t *testing.T) { Threshold{2}, must(ProbabilityToThreshold(0x1p-55))) require.Equal(t, - Threshold{1 / MinSamplingProb}, + AlwaysSampleThreshold, must(ProbabilityToThreshold(1.0))) + require.Equal(t, + NeverSampleThreshold, + must(ProbabilityToThreshold(0))) require.Equal(t, - Threshold{0x555p-12 / MinSamplingProb}, + Threshold{0x555p-12 * MaxAdjustedCount}, must(TValueToThreshold("555"))) require.Equal(t, - Threshold{0x123p-20 / MinSamplingProb}, + Threshold{0x123p-20 * MaxAdjustedCount}, must(TValueToThreshold("00123"))) } diff --git a/pkg/sampling/oteltracestate.go b/pkg/sampling/oteltracestate.go index a5ce95b349eb..71b60bfae547 100644 --- a/pkg/sampling/oteltracestate.go +++ b/pkg/sampling/oteltracestate.go @@ -68,7 +68,7 @@ func NewOTelTraceState(input string) (otts OTelTraceState, _ error) { if otts.tt, err = TValueToThreshold(value); err == nil { otts.t = value } else { - otts.tt = Threshold{} + otts.tt = AlwaysSampleThreshold } default: otts.kvs = append(otts.kvs, KV{ diff --git a/pkg/sampling/probability.go b/pkg/sampling/probability.go new file mode 100644 index 000000000000..35bebdf5c641 --- /dev/null +++ b/pkg/sampling/probability.go @@ -0,0 +1,37 @@ +package sampling + +import ( + "errors" + "math" +) + +var ( + // ErrProbabilityRange is returned when a value should be in the range [1/MaxAdjustedCount, 1]. + ErrProbabilityRange = errors.New("sampling probability out of range (0x1p-56 <= valid <= 1)") +) + +// probabilityInRange tests MinSamplingProb <= prob <= 1. +func probabilityInRange(prob float64) bool { + return prob >= 1/MaxAdjustedCount && prob <= 1 +} + +func ProbabilityToThreshold(prob float64) (Threshold, error) { + // Probability cases + switch { + case prob == 1: + return AlwaysSampleThreshold, nil + case prob == 0: + return NeverSampleThreshold, nil + case !probabilityInRange(prob): + return AlwaysSampleThreshold, ErrProbabilityRange + } + unsigned := uint64(math.Round(prob * MaxAdjustedCount)) + return Threshold{ + unsigned: unsigned, + }, nil +} + +// Probability is the sampling ratio in the range [MinSamplingProb, 1]. +func (t Threshold) Probability() float64 { + return float64(t.unsigned) / MaxAdjustedCount +} diff --git a/pkg/sampling/randomness.go b/pkg/sampling/randomness.go new file mode 100644 index 000000000000..6dc6758dd9e1 --- /dev/null +++ b/pkg/sampling/randomness.go @@ -0,0 +1,58 @@ +package sampling + +import ( + "encoding/binary" + "errors" + "strconv" + + "go.opentelemetry.io/collector/pdata/pcommon" +) + +var ( + // ErrRValueSize is returned for r-values != NumHexDigits hex digits. + ErrRValueSize = errors.New("r-value must have 14 hex digits") +) + +const ( + // LeastHalfTraceIDThresholdMask is the mask to use on the + // least-significant half of the TraceID, i.e., bytes 8-15. + // Because this is a 56 bit mask, the result after masking is + // the unsigned value of bytes 9 through 15. + LeastHalfTraceIDThresholdMask = MaxAdjustedCount - 1 +) + +// Randomness may be derived from r-value or TraceID. +type Randomness struct { + // randomness is in the range [0, MaxAdjustedCount-1] + unsigned uint64 +} + +// Randomness is the value we compare with Threshold in ShouldSample. +func RandomnessFromTraceID(id pcommon.TraceID) Randomness { + return Randomness{ + unsigned: binary.BigEndian.Uint64(id[8:]) & LeastHalfTraceIDThresholdMask, + } +} + +// Unsigned is an unsigned integer that scales with the randomness +// value. This is useful to compare two randomness values without +// floating point conversions. +func (r Randomness) Unsigned() uint64 { + return r.unsigned +} + +// RValueToRandomness parses NumHexDigits hex bytes into a Randomness. +func RValueToRandomness(s string) (Randomness, error) { + if len(s) != NumHexDigits { + return Randomness{}, ErrRValueSize + } + + unsigned, err := strconv.ParseUint(s, hexBase, 64) + if err != nil { + return Randomness{}, err + } + + return Randomness{ + unsigned: unsigned, + }, nil +} diff --git a/pkg/sampling/threshold.go b/pkg/sampling/threshold.go new file mode 100644 index 000000000000..762ef5491c65 --- /dev/null +++ b/pkg/sampling/threshold.go @@ -0,0 +1,96 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package sampling // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling" + +import ( + "errors" + "strconv" + "strings" +) + +const ( + // MaxAdjustedCount is 2^56 i.e. 0x100000000000000 i.e., 1<<56. + MaxAdjustedCount = 1 << 56 + + // NumHexDigits is the number of hex digits equalling 56 bits. + NumHexDigits = 56 / 4 + + hexBase = 16 +) + +// Threshold used to compare with the least-significant 7 bytes of the TraceID. +type Threshold struct { + // unsigned is in the range [0, MaxAdjustedCount] + // - 0 represents never sampling (0 TraceID values are less-than) + // - 1 represents 1-in-MaxAdjustedCount (1 TraceID value is less-than) + // - MaxAdjustedCount represents always sampling (all TraceID values are less-than). + unsigned uint64 +} + +var ( + // ErrTValueSize is returned for t-values longer than NumHexDigits hex digits. + ErrTValueSize = errors.New("t-value exceeds 14 hex digits") + + NeverSampleThreshold = Threshold{unsigned: 0} + AlwaysSampleThreshold = Threshold{unsigned: MaxAdjustedCount} +) + +// TValueToThreshold returns a Threshold, see Threshold.ShouldSample(TraceID). +func TValueToThreshold(s string) (Threshold, error) { + if len(s) > NumHexDigits { + return AlwaysSampleThreshold, ErrTValueSize + } + if len(s) == 0 { + return AlwaysSampleThreshold, nil + } + + // Note that this handles zero correctly, but the inverse + // operation does not. I.e., "0" parses as unsigned == 0. + unsigned, err := strconv.ParseUint(s, hexBase, 64) + if err != nil { + return AlwaysSampleThreshold, err + } + + // Zero-padding is done by shifting 4 bits per absent hex digit. + extend := NumHexDigits - len(s) + return Threshold{ + unsigned: unsigned << (4 * extend), + }, nil +} + +func (th Threshold) TValue() string { + // Special cases + switch th.unsigned { + case MaxAdjustedCount: + // 100% sampling + return "" + case 0: + // 0% sampling. This is a special case, otherwise, the TrimRight + // below will return an empty matching the case above. + return "0" + } + // Add MaxAdjustedCount yields 15 hex digits with a leading "1". + allBits := MaxAdjustedCount + th.unsigned + // Then format and remove the most-significant hex digit. + digits := strconv.FormatUint(allBits, hexBase)[1:] + // Leaving NumHexDigits hex digits, with trailing zeros removed. + return strings.TrimRight(digits, "0") +} + +// ShouldSample returns true when the span passes this sampler's +// consistent sampling decision. +func (t Threshold) ShouldSample(rnd Randomness) bool { + return rnd.unsigned < t.unsigned +} From 422e0b255463bcdf000c92bcf42b542d528ee4d8 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Thu, 3 Aug 2023 16:02:58 -0700 Subject: [PATCH 18/38] two implementations --- pkg/sampling/encoding_test.go | 51 ++++----- pkg/sampling/impl.go | 31 ++++++ pkg/sampling/internal/bytes/probability.go | 39 +++++++ pkg/sampling/internal/bytes/randomness.go | 36 +++++++ pkg/sampling/internal/bytes/threshold.go | 101 ++++++++++++++++++ .../{ => internal/unsigned}/probability.go | 8 +- .../{ => internal/unsigned}/randomness.go | 27 ++--- .../{ => internal/unsigned}/threshold.go | 2 +- pkg/sampling/oteltracestate.go | 2 +- .../probabilisticsamplerprocessor/config.go | 2 +- 10 files changed, 248 insertions(+), 51 deletions(-) create mode 100644 pkg/sampling/impl.go create mode 100644 pkg/sampling/internal/bytes/probability.go create mode 100644 pkg/sampling/internal/bytes/randomness.go create mode 100644 pkg/sampling/internal/bytes/threshold.go rename pkg/sampling/{ => internal/unsigned}/probability.go (77%) rename pkg/sampling/{ => internal/unsigned}/randomness.go (55%) rename pkg/sampling/{ => internal/unsigned}/threshold.go (96%) diff --git a/pkg/sampling/encoding_test.go b/pkg/sampling/encoding_test.go index 7fb9fa3f668f..3e6eb9d65848 100644 --- a/pkg/sampling/encoding_test.go +++ b/pkg/sampling/encoding_test.go @@ -15,12 +15,13 @@ package sampling import ( - "bytes" "encoding/binary" "fmt" "math/rand" "testing" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling/internal/bytes" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling/internal/unsigned" "github.com/stretchr/testify/require" "go.opentelemetry.io/collector/pdata/pcommon" ) @@ -80,16 +81,16 @@ func TestTValueToProbability(t *testing.T) { func TestProbabilityToThreshold(t *testing.T) { require.Equal(t, - Threshold{0x1p+55}, + must(TValueToThreshold("8")), must(ProbabilityToThreshold(0.5))) require.Equal(t, - Threshold{1}, + must(TValueToThreshold("00000000000001")), must(ProbabilityToThreshold(0x1p-56))) require.Equal(t, - Threshold{0x100}, + must(TValueToThreshold("000000000001")), must(ProbabilityToThreshold(0x100p-56))) require.Equal(t, - Threshold{2}, + must(TValueToThreshold("00000000000002")), must(ProbabilityToThreshold(0x1p-55))) require.Equal(t, AlwaysSampleThreshold, @@ -97,13 +98,6 @@ func TestProbabilityToThreshold(t *testing.T) { require.Equal(t, NeverSampleThreshold, must(ProbabilityToThreshold(0))) - - require.Equal(t, - Threshold{0x555p-12 * MaxAdjustedCount}, - must(TValueToThreshold("555"))) - require.Equal(t, - Threshold{0x123p-20 * MaxAdjustedCount}, - must(TValueToThreshold("00123"))) } func TestShouldSample(t *testing.T) { @@ -155,10 +149,14 @@ func (tids *benchTIDs) init() { // BenchmarkThresholdCompareAsUint64-10 1000000000 0.4515 ns/op 0 B/op 0 allocs/op func BenchmarkThresholdCompareAsUint64(b *testing.B) { var tids benchTIDs - var comps [1024]uint64 + var comps [1024]unsigned.Threshold tids.init() for i := range comps { - comps[i] = (rand.Uint64() % 0x1p+56) + 1 + var err error + comps[i], err = unsigned.ProbabilityToThreshold(rand.Float64()) + if err != nil { + b.Fatal(err) + } } b.ReportAllocs() @@ -166,12 +164,11 @@ func BenchmarkThresholdCompareAsUint64(b *testing.B) { yes := 0 no := 0 for i := 0; i < b.N; i++ { - tid := tids[i%len(tids)] - comp := comps[i%len(comps)] - // Read 8 bytes, mask to 7 bytes - val := binary.BigEndian.Uint64(tid[8:]) & (0x1p+56 - 1) + idx := i % len(tids) + tid := tids[idx] + comp := comps[idx] - if val < comp { + if comp.ShouldSample(unsigned.RandomnessFromTraceID(tid)) { yes++ } else { no++ @@ -182,12 +179,14 @@ func BenchmarkThresholdCompareAsUint64(b *testing.B) { // BenchmarkThresholdCompareAsBytes-10 528679580 2.288 ns/op 0 B/op 0 allocs/op func BenchmarkThresholdCompareAsBytes(b *testing.B) { var tids benchTIDs - var comps [1024][7]byte + var comps [1024]bytes.Threshold tids.init() for i := range comps { - var e8 [8]byte - binary.BigEndian.PutUint64(e8[:], rand.Uint64()) - copy(comps[i][:], e8[1:]) + var err error + comps[i], err = bytes.ProbabilityToThreshold(rand.Float64()) + if err != nil { + b.Fatal(err) + } } b.ReportAllocs() @@ -195,7 +194,11 @@ func BenchmarkThresholdCompareAsBytes(b *testing.B) { yes := 0 no := 0 for i := 0; i < b.N; i++ { - if bytes.Compare(tids[i%len(tids)][9:], comps[i%len(comps)][:]) <= 0 { + idx := i % len(tids) + tid := tids[idx] + comp := comps[idx] + + if comp.ShouldSample(bytes.RandomnessFromTraceID(tid)) { yes++ } else { no++ diff --git a/pkg/sampling/impl.go b/pkg/sampling/impl.go new file mode 100644 index 000000000000..0a3cbc4aff46 --- /dev/null +++ b/pkg/sampling/impl.go @@ -0,0 +1,31 @@ +package sampling + +import ( + "go.opentelemetry.io/collector/pdata/pcommon" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling/internal/unsigned" +) + +type Randomness = unsigned.Randomness +type Threshold = unsigned.Threshold + +func RValueToRandomness(s string) (Randomness, error) { + return unsigned.RValueToRandomness(s) +} + +func TValueToThreshold(s string) (Threshold, error) { + return unsigned.TValueToThreshold(s) +} + +func ProbabilityToThreshold(prob float64) (Threshold, error) { + return unsigned.ProbabilityToThreshold(prob) +} + +func RandomnessFromTraceID(tid pcommon.TraceID) Randomness { + return unsigned.RandomnessFromTraceID(tid) +} + +var AlwaysSampleThreshold = unsigned.AlwaysSampleThreshold +var NeverSampleThreshold = unsigned.NeverSampleThreshold +var ErrTValueSize = unsigned.ErrTValueSize +var ErrRValueSize = unsigned.ErrRValueSize diff --git a/pkg/sampling/internal/bytes/probability.go b/pkg/sampling/internal/bytes/probability.go new file mode 100644 index 000000000000..fb89c4e2ffe1 --- /dev/null +++ b/pkg/sampling/internal/bytes/probability.go @@ -0,0 +1,39 @@ +package bytes + +import ( + "encoding/binary" + "errors" + "math" +) + +// ErrProbabilityRange is returned when a value should be in the range [1/MaxAdjustedCount, 1]. +var ErrProbabilityRange = errors.New("sampling probability out of range (0x1p-56 <= valid <= 1)") + +// probabilityInRange tests MinSamplingProb <= prob <= 1. +func probabilityInRange(prob float64) bool { + return prob >= 1/MaxAdjustedCount && prob <= 1 +} + +func ProbabilityToThreshold(prob float64) (Threshold, error) { + // Probability cases + switch { + case prob == 1: + return AlwaysSampleThreshold, nil + case prob == 0: + return NeverSampleThreshold, nil + case !probabilityInRange(prob): + return AlwaysSampleThreshold, ErrProbabilityRange + } + unsigned := uint64(math.Round(prob * MaxAdjustedCount)) + var th Threshold + binary.BigEndian.PutUint64(th.bytes[:], unsigned) + return th, nil +} + +// Probability is the sampling ratio in the range [MinSamplingProb, 1]. +func (t Threshold) Probability() float64 { + if t == AlwaysSampleThreshold { + return 1 + } + return float64(binary.BigEndian.Uint64(t.bytes[:])) / MaxAdjustedCount +} diff --git a/pkg/sampling/internal/bytes/randomness.go b/pkg/sampling/internal/bytes/randomness.go new file mode 100644 index 000000000000..407f6f0fcfb7 --- /dev/null +++ b/pkg/sampling/internal/bytes/randomness.go @@ -0,0 +1,36 @@ +package bytes + +import ( + "encoding/hex" + "errors" + + "go.opentelemetry.io/collector/pdata/pcommon" +) + +// ErrRValueSize is returned for r-values != NumHexDigits hex digits. +var ErrRValueSize = errors.New("r-value must have 14 hex digits") + +// Randomness may be derived from r-value or TraceID. +type Randomness struct { + // bytes[0] is unused, so that the relevant portion of these 8 + // bytes align with the TraceID's second 8 bytes. + bytes [8]byte +} + +// Randomness is the value we compare with Threshold in ShouldSample. +func RandomnessFromTraceID(id pcommon.TraceID) Randomness { + var r Randomness + copy(r.bytes[1:], id[9:]) + return r +} + +// RValueToRandomness parses NumHexDigits hex bytes into a Randomness. +func RValueToRandomness(s string) (Randomness, error) { + if len(s) != NumHexDigits { + return Randomness{}, ErrRValueSize + } + + var r Randomness + _, err := hex.Decode(r.bytes[1:], []byte(s)) + return r, err +} diff --git a/pkg/sampling/internal/bytes/threshold.go b/pkg/sampling/internal/bytes/threshold.go new file mode 100644 index 000000000000..2bb5680ae7c8 --- /dev/null +++ b/pkg/sampling/internal/bytes/threshold.go @@ -0,0 +1,101 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package bytes + +import ( + "bytes" + "encoding/hex" + "errors" + "strconv" + "strings" +) + +const ( + // MaxAdjustedCount is 2^56 i.e. 0x100000000000000 i.e., 1<<56. + MaxAdjustedCount = 1 << 56 + + // NumHexDigits is the number of hex digits equalling 56 bits. + NumHexDigits = 56 / 4 + + hexBase = 16 +) + +// Threshold used to compare with the least-significant 7 bytes of the TraceID. +type Threshold struct { + bytes [8]byte +} + +var ( + // ErrTValueSize is returned for t-values longer than NumHexDigits hex digits. + ErrTValueSize = errors.New("t-value exceeds 14 hex digits") + + NeverSampleThreshold = Threshold{bytes: [8]byte{ + 0, 0, 0, 0, 0, 0, 0, 0, + }} + AlwaysSampleThreshold = Threshold{bytes: [8]byte{ + 1, 0, 0, 0, 0, 0, 0, 0, + }} + + hex14Zeros = func() (r [NumHexDigits]byte) { + for i := range r { + r[i] = '0' + } + return + }() +) + +// TValueToThreshold returns a Threshold, see Threshold.ShouldSample(TraceID). +func TValueToThreshold(s string) (Threshold, error) { + if len(s) > NumHexDigits { + return AlwaysSampleThreshold, ErrTValueSize + } + if len(s) == 0 { + return AlwaysSampleThreshold, nil + } + + // Fill with padding, then copy most-significant hex digits. + hexPadded := hex14Zeros + copy(hexPadded[0:len(s)], s) + + var th Threshold + if _, err := hex.Decode(th.bytes[1:], hexPadded[:]); err != nil { + return AlwaysSampleThreshold, strconv.ErrSyntax // ErrSyntax for consistency w/ ../unsigned + } + return th, nil +} + +func (th Threshold) TValue() string { + // Special cases + switch { + case th == AlwaysSampleThreshold: + return "" + case th == NeverSampleThreshold: + return "0" + } + + var hexDigits [14]byte + _ = hex.Encode(hexDigits[:], th.bytes[1:]) + return strings.TrimRight(string(hexDigits[:]), "0") +} + +// ShouldSample returns true when the span passes this sampler's +// consistent sampling decision. +func (t Threshold) ShouldSample(rnd Randomness) bool { + if t == AlwaysSampleThreshold { + // 100% sampling case + return true + } + return bytes.Compare(rnd.bytes[1:], t.bytes[1:]) < 0 +} diff --git a/pkg/sampling/probability.go b/pkg/sampling/internal/unsigned/probability.go similarity index 77% rename from pkg/sampling/probability.go rename to pkg/sampling/internal/unsigned/probability.go index 35bebdf5c641..3f2d9656f2cd 100644 --- a/pkg/sampling/probability.go +++ b/pkg/sampling/internal/unsigned/probability.go @@ -1,14 +1,12 @@ -package sampling +package unsigned import ( "errors" "math" ) -var ( - // ErrProbabilityRange is returned when a value should be in the range [1/MaxAdjustedCount, 1]. - ErrProbabilityRange = errors.New("sampling probability out of range (0x1p-56 <= valid <= 1)") -) +// ErrProbabilityRange is returned when a value should be in the range [1/MaxAdjustedCount, 1]. +var ErrProbabilityRange = errors.New("sampling probability out of range (0x1p-56 <= valid <= 1)") // probabilityInRange tests MinSamplingProb <= prob <= 1. func probabilityInRange(prob float64) bool { diff --git a/pkg/sampling/randomness.go b/pkg/sampling/internal/unsigned/randomness.go similarity index 55% rename from pkg/sampling/randomness.go rename to pkg/sampling/internal/unsigned/randomness.go index 6dc6758dd9e1..ed9db8418f6f 100644 --- a/pkg/sampling/randomness.go +++ b/pkg/sampling/internal/unsigned/randomness.go @@ -1,4 +1,4 @@ -package sampling +package unsigned import ( "encoding/binary" @@ -8,18 +8,14 @@ import ( "go.opentelemetry.io/collector/pdata/pcommon" ) -var ( - // ErrRValueSize is returned for r-values != NumHexDigits hex digits. - ErrRValueSize = errors.New("r-value must have 14 hex digits") -) +// ErrRValueSize is returned for r-values != NumHexDigits hex digits. +var ErrRValueSize = errors.New("r-value must have 14 hex digits") -const ( - // LeastHalfTraceIDThresholdMask is the mask to use on the - // least-significant half of the TraceID, i.e., bytes 8-15. - // Because this is a 56 bit mask, the result after masking is - // the unsigned value of bytes 9 through 15. - LeastHalfTraceIDThresholdMask = MaxAdjustedCount - 1 -) +// LeastHalfTraceIDThresholdMask is the mask to use on the +// least-significant half of the TraceID, i.e., bytes 8-15. +// Because this is a 56 bit mask, the result after masking is +// the unsigned value of bytes 9 through 15. +const LeastHalfTraceIDThresholdMask = MaxAdjustedCount - 1 // Randomness may be derived from r-value or TraceID. type Randomness struct { @@ -34,13 +30,6 @@ func RandomnessFromTraceID(id pcommon.TraceID) Randomness { } } -// Unsigned is an unsigned integer that scales with the randomness -// value. This is useful to compare two randomness values without -// floating point conversions. -func (r Randomness) Unsigned() uint64 { - return r.unsigned -} - // RValueToRandomness parses NumHexDigits hex bytes into a Randomness. func RValueToRandomness(s string) (Randomness, error) { if len(s) != NumHexDigits { diff --git a/pkg/sampling/threshold.go b/pkg/sampling/internal/unsigned/threshold.go similarity index 96% rename from pkg/sampling/threshold.go rename to pkg/sampling/internal/unsigned/threshold.go index 762ef5491c65..3908a7403b2e 100644 --- a/pkg/sampling/threshold.go +++ b/pkg/sampling/internal/unsigned/threshold.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package sampling // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling" +package unsigned import ( "errors" diff --git a/pkg/sampling/oteltracestate.go b/pkg/sampling/oteltracestate.go index 71b60bfae547..6f881288bdb1 100644 --- a/pkg/sampling/oteltracestate.go +++ b/pkg/sampling/oteltracestate.go @@ -62,7 +62,7 @@ func NewOTelTraceState(input string) (otts OTelTraceState, _ error) { if otts.rnd, err = RValueToRandomness(value); err == nil { otts.r = value } else { - otts.rnd = Randomness{} + otts.rnd = Randomness{} // @@@ } case "t": if otts.tt, err = TValueToThreshold(value); err == nil { diff --git a/processor/probabilisticsamplerprocessor/config.go b/processor/probabilisticsamplerprocessor/config.go index 2aa92268d3e7..2f298175faae 100644 --- a/processor/probabilisticsamplerprocessor/config.go +++ b/processor/probabilisticsamplerprocessor/config.go @@ -95,7 +95,7 @@ func (cfg *Config) Validate() error { return fmt.Errorf("negative sampling rate: %.2f%%", cfg.SamplingPercentage) case ratio == 0: // Special case - case ratio < sampling.MinSamplingProb: + case ratio < (1 / sampling.MaxAdjustedCount): return fmt.Errorf("sampling rate is too small: %.2f%%", cfg.SamplingPercentage) case ratio > 1: return fmt.Errorf("sampling rate is too small: %.2f%%", cfg.SamplingPercentage) From 787b9fd1790e226ad80142cc52fe450609060848 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Tue, 5 Sep 2023 16:10:52 -0700 Subject: [PATCH 19/38] wip --- pkg/sampling/impl.go | 20 +++- pkg/sampling/internal/bytes/threshold.go | 13 +-- pkg/sampling/internal/unsigned/threshold.go | 4 + pkg/sampling/oteltracestate.go | 8 +- .../tracesprocessor.go | 98 +++++++++++-------- 5 files changed, 89 insertions(+), 54 deletions(-) diff --git a/pkg/sampling/impl.go b/pkg/sampling/impl.go index 0a3cbc4aff46..218026ab2d18 100644 --- a/pkg/sampling/impl.go +++ b/pkg/sampling/impl.go @@ -25,7 +25,19 @@ func RandomnessFromTraceID(tid pcommon.TraceID) Randomness { return unsigned.RandomnessFromTraceID(tid) } -var AlwaysSampleThreshold = unsigned.AlwaysSampleThreshold -var NeverSampleThreshold = unsigned.NeverSampleThreshold -var ErrTValueSize = unsigned.ErrTValueSize -var ErrRValueSize = unsigned.ErrRValueSize +func ThresholdLessThan(a, b Threshold) bool { + return unsigned.ThresholdLessThan(a, b) +} + +const MaxAdjustedCount = unsigned.MaxAdjustedCount + +var ( + AlwaysSampleThreshold = unsigned.AlwaysSampleThreshold + NeverSampleThreshold = unsigned.NeverSampleThreshold + + AlwaysSampleTValue = AlwaysSampleThreshold.TValue() + NeverSampleTValue = NeverSampleThreshold.TValue() + + ErrTValueSize = unsigned.ErrTValueSize + ErrRValueSize = unsigned.ErrRValueSize +) diff --git a/pkg/sampling/internal/bytes/threshold.go b/pkg/sampling/internal/bytes/threshold.go index 2bb5680ae7c8..ec33e137e5f4 100644 --- a/pkg/sampling/internal/bytes/threshold.go +++ b/pkg/sampling/internal/bytes/threshold.go @@ -41,12 +41,8 @@ var ( // ErrTValueSize is returned for t-values longer than NumHexDigits hex digits. ErrTValueSize = errors.New("t-value exceeds 14 hex digits") - NeverSampleThreshold = Threshold{bytes: [8]byte{ - 0, 0, 0, 0, 0, 0, 0, 0, - }} - AlwaysSampleThreshold = Threshold{bytes: [8]byte{ - 1, 0, 0, 0, 0, 0, 0, 0, - }} + NeverSampleThreshold = Threshold{bytes: [8]byte{0, 0, 0, 0, 0, 0, 0, 0}} + AlwaysSampleThreshold = Threshold{bytes: [8]byte{1, 0, 0, 0, 0, 0, 0, 0}} hex14Zeros = func() (r [NumHexDigits]byte) { for i := range r { @@ -99,3 +95,8 @@ func (t Threshold) ShouldSample(rnd Randomness) bool { } return bytes.Compare(rnd.bytes[1:], t.bytes[1:]) < 0 } + +func ThresholdLessThan(a, b Threshold) bool { + // Note full 8 byte compare + return bytes.Compare(a.bytes[:], b.bytes[:]) < 0 +} diff --git a/pkg/sampling/internal/unsigned/threshold.go b/pkg/sampling/internal/unsigned/threshold.go index 3908a7403b2e..eafb6b5fb9af 100644 --- a/pkg/sampling/internal/unsigned/threshold.go +++ b/pkg/sampling/internal/unsigned/threshold.go @@ -94,3 +94,7 @@ func (th Threshold) TValue() string { func (t Threshold) ShouldSample(rnd Randomness) bool { return rnd.unsigned < t.unsigned } + +func ThresholdLessThan(a, b Threshold) bool { + return a.unsigned < b.unsigned +} diff --git a/pkg/sampling/oteltracestate.go b/pkg/sampling/oteltracestate.go index 6f881288bdb1..1d32ff07d01e 100644 --- a/pkg/sampling/oteltracestate.go +++ b/pkg/sampling/oteltracestate.go @@ -98,6 +98,10 @@ func (otts *OTelTraceState) HasTValue() bool { return otts.t != "" } +func (otts *OTelTraceState) HasNonZeroTValue() bool { + return otts.HasTValue() && otts.TValueThreshold() != NeverSampleThreshold +} + func (otts *OTelTraceState) TValue() string { return otts.t } @@ -106,9 +110,9 @@ func (otts *OTelTraceState) TValueThreshold() Threshold { return otts.tt } -func (otts *OTelTraceState) SetTValue(value string, threshold Threshold) { - otts.t = value +func (otts *OTelTraceState) SetTValue(threshold Threshold, encoded string) { otts.tt = threshold + otts.t = encoded } func (otts *OTelTraceState) UnsetTValue() { diff --git a/processor/probabilisticsamplerprocessor/tracesprocessor.go b/processor/probabilisticsamplerprocessor/tracesprocessor.go index e1a24bda96ec..d4aab1f18d38 100644 --- a/processor/probabilisticsamplerprocessor/tracesprocessor.go +++ b/processor/probabilisticsamplerprocessor/tracesprocessor.go @@ -5,6 +5,7 @@ package probabilisticsamplerprocessor // import "github.com/open-telemetry/opent import ( "context" + "fmt" "strconv" "strings" @@ -45,15 +46,17 @@ const ( percentageScaleFactor = numHashBuckets / 100.0 ) +var ErrInconsistentArrivingTValue = fmt.Errorf("inconsistent arriving t-value: span should not have been sampled") + type traceSampler interface { // decide reports the result based on a probabilistic decision. - decide(s ptrace.Span) bool + decide(s ptrace.Span) (bool, *sampling.W3CTraceState, error) // updateTracestate modifies the OTelTraceState assuming it will be // sampled, probabilistically or otherwise. The "should" parameter // is the result from decide(), for the span's TraceID, which // will not be recalculated. - updateTracestate(tid pcommon.TraceID, rnd sampling.Randomness, should bool, otts *sampling.OTelTraceState) + updateTracestate(tid pcommon.TraceID, should bool, otts *sampling.OTelTraceState) } type traceProcessor struct { @@ -88,10 +91,15 @@ func randomnessFromSpan(s ptrace.Span) (sampling.Randomness, *sampling.W3CTraceS // When the tracestate is OK and has r-value, use it. randomness = wts.OTelValue().RValueRandomness() } else { - // All other cases, use the TraceID. + // Here we assume the trace was generated with a + // randomness flag, which we're not supposed to do in + // a head sampler. Until + // https://github.com/open-telemetry/opentelemetry-proto/pull/503 + // is addressed we simply assume it was random synthesizing + // an rv-value has questionable value for an out-of-band context. randomness = sampling.RandomnessFromTraceID(s.TraceID()) } - return randomness, wts, err + return randomness, &wts, err } // newTracesProcessor returns a processor.TracesProcessor that will perform head sampling according to the given @@ -132,21 +140,15 @@ func newTracesProcessor(ctx context.Context, set processor.CreateSettings, cfg * tp.sampler = ts case modeResample: // Encode t-value: for cases where the incoming context has - tval, err := sampling.ProbabilityToTValue(ratio) - if err != nil { - return nil, err - } - // Compute the sampling threshold from the exact probability. - threshold, err := sampling.TValueToThreshold(tval) + threshold, err := sampling.ProbabilityToThreshold(ratio) if err != nil { return nil, err } - ts := &traceResampler{} - ts.tValueEncoding = tval - ts.traceIDThreshold = threshold - - tp.sampler = ts + tp.sampler = &traceResampler{ + tValueEncoding: threshold.TValue(), + traceIDThreshold: threshold, + } case modeDownsample: // TODO } @@ -160,50 +162,59 @@ func newTracesProcessor(ctx context.Context, set processor.CreateSettings, cfg * processorhelper.WithCapabilities(consumer.Capabilities{MutatesData: true})) } -func (ts *traceHashSampler) decide(s ptrace.Span) bool { +func (ts *traceHashSampler) decide(s ptrace.Span) (bool, *sampling.W3CTraceState, error) { // If one assumes random trace ids hashing may seems avoidable, however, traces can be coming from sources // with various different criteria to generate trace id and perhaps were already sampled without hashing. // Hashing here prevents bias due to such systems. tid := s.TraceID() - return computeHash(tid[:], ts.hashSeed)&bitMaskHashBuckets < ts.hashScaledSamplingRate + decision := computeHash(tid[:], ts.hashSeed)&bitMaskHashBuckets < ts.hashScaledSamplingRate + return decision, nil, nil } -func (ts *traceHashSampler) updateTracestate(tid pcommon.TraceID, _ sampling.Randomness, should bool, otts *sampling.OTelTraceState) { +func (ts *traceHashSampler) updateTracestate(tid pcommon.TraceID, should bool, otts *sampling.OTelTraceState) { // No action, nothing is specified. } -func (ts *traceResampler) decide(s ptrace.Span) bool { - rnd := randomnessFromSpan(s) - return ts.traceIDThreshold.ShouldSample(randomness) -} - -func (ts *traceResampler) updateTracestate(tid pcommon.TraceID, rnd sampling.Randomness, should bool, otts *sampling.OTelTraceState) { - // When this sampler decided not to sample, the t-value becomes zero. - // Incoming TValue consistency is not checked when this happens. - if !should { - otts.SetTValue(sampling.ProbabilityZeroEncoding, sampling.Threshold{}) - return +func (ts *traceResampler) decide(s ptrace.Span) (bool, *sampling.W3CTraceState, error) { + rnd, wts, err := randomnessFromSpan(s) + if err != nil { + // TODO: Configure fail-open vs fail-closed? + return true, nil, err } - arrivingHasNonZeroTValue := otts.HasTValue() && otts.TValueThreshold().Unsigned() != 0 - - if arrivingHasNonZeroTValue { + otts := wts.OTelValue() + if otts.HasNonZeroTValue() { // Consistency check: if the TraceID is out of range // (unless the TValue is zero), the TValue is a lie. // If inconsistent, clear it. if !otts.TValueThreshold().ShouldSample(rnd) { - arrivingHasNonZeroTValue = false + // Let this error log: we have a misconfigured + // upstream sampler and are unsetting its t-value. + err = ErrInconsistentArrivingTValue otts.UnsetTValue() } } - if arrivingHasNonZeroTValue && - otts.TValueThreshold().Unsigned() < ts.traceIDThreshold.Unsigned() { - // Already-sampled case: test whether the unsigned value of the - // threshold is smaller than this sampler is configured with. + return ts.traceIDThreshold.ShouldSample(rnd), wts, err +} + +func (ts *traceResampler) updateTracestate(tid pcommon.TraceID, should bool, otts *sampling.OTelTraceState) { + // When this sampler decided not to sample, the t-value becomes zero. + // Incoming TValue consistency is not checked when this happens. + if !should { + otts.SetTValue(sampling.NeverSampleThreshold, sampling.NeverSampleTValue) + return + } + if otts.HasNonZeroTValue() && + sampling.ThresholdLessThan(otts.TValueThreshold(), ts.traceIDThreshold) { + // Smaller thresholds are more selective, so when the existing + // threshold is less than the resampler, do nothing. return } - // Set the new effective t-value. - otts.SetTValue(ts.tValueEncoding, ts.traceIDThreshold) + + // If the existing t-value represents zero, the resampler raises it + // but this is a very fishy configuration. + + otts.SetTValue(ts.traceIDThreshold, ts.tValueEncoding) return } @@ -224,7 +235,10 @@ func (tp *traceProcessor) processTraces(ctx context.Context, td ptrace.Traces) ( return true } - probSample, otts := tp.sampler.decide(s) + probSample, wts, err := tp.sampler.decide(s) + if err != nil { + tp.logger.Error("trace-state", zap.Error(err)) + } forceSample := priority == mustSampleSpan @@ -245,11 +259,11 @@ func (tp *traceProcessor) processTraces(ctx context.Context, td ptrace.Traces) ( } if sampled { - tp.sampler.updateTracestate(s.TraceID(), randomness, probSample, wts.OTelValue()) + tp.sampler.updateTracestate(s.TraceID(), probSample, wts.OTelValue()) var w strings.Builder wts.Serialize(&w) - state.FromRaw(w.String()) + s.TraceState().FromRaw(w.String()) } return !sampled From d7952104e2e4e6ba6e379b7e038ee988f31060d0 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 6 Sep 2023 15:57:43 -0700 Subject: [PATCH 20/38] Updates for OTEP 235 --- pkg/sampling/impl.go | 11 ++++ pkg/sampling/internal/unsigned/threshold.go | 2 + pkg/sampling/oteltracestate.go | 54 ++++++++++-------- pkg/sampling/oteltracestate_test.go | 62 ++++++++++----------- 4 files changed, 76 insertions(+), 53 deletions(-) diff --git a/pkg/sampling/impl.go b/pkg/sampling/impl.go index 218026ab2d18..7d81b8115ec0 100644 --- a/pkg/sampling/impl.go +++ b/pkg/sampling/impl.go @@ -6,25 +6,36 @@ import ( "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling/internal/unsigned" ) +// Randomness represents individual trace randomness. type Randomness = unsigned.Randomness + +// Threshold represents sampling selectivity. type Threshold = unsigned.Threshold +// RValueToRandomness parses a R-value. func RValueToRandomness(s string) (Randomness, error) { return unsigned.RValueToRandomness(s) } +// TValueToThreshold parses a T-value. func TValueToThreshold(s string) (Threshold, error) { return unsigned.TValueToThreshold(s) } +// ProbabilityToThreshold computes a re-usable Threshold value. func ProbabilityToThreshold(prob float64) (Threshold, error) { return unsigned.ProbabilityToThreshold(prob) } +// RandomnessFromTraceID returns the randomness using the least +// significant 56 bits of the TraceID (without consideration for +// trace flags). func RandomnessFromTraceID(tid pcommon.TraceID) Randomness { return unsigned.RandomnessFromTraceID(tid) } +// ThresholdLessThan allows comparing thresholds directly. Smaller +// thresholds have smaller probabilities, larger adjusted counts. func ThresholdLessThan(a, b Threshold) bool { return unsigned.ThresholdLessThan(a, b) } diff --git a/pkg/sampling/internal/unsigned/threshold.go b/pkg/sampling/internal/unsigned/threshold.go index eafb6b5fb9af..5b94072aad66 100644 --- a/pkg/sampling/internal/unsigned/threshold.go +++ b/pkg/sampling/internal/unsigned/threshold.go @@ -95,6 +95,8 @@ func (t Threshold) ShouldSample(rnd Randomness) bool { return rnd.unsigned < t.unsigned } +// ThresholdLessThan allows direct comparison of Threshold values. +// Smaller thresholds equate with smaller probabilities. func ThresholdLessThan(a, b Threshold) bool { return a.unsigned < b.unsigned } diff --git a/pkg/sampling/oteltracestate.go b/pkg/sampling/oteltracestate.go index 1d32ff07d01e..b0e40d4b9bf8 100644 --- a/pkg/sampling/oteltracestate.go +++ b/pkg/sampling/oteltracestate.go @@ -10,13 +10,18 @@ type OTelTraceState struct { commonTraceState // sampling r and t-values - rnd Randomness // r value parsed, as unsigned - r string // 14 ASCII hex digits - tt Threshold // t value parsed, as a threshold - t string // 1-14 ASCII hex digits + rnd Randomness // r value parsed, as unsigned + rvalue string // 14 ASCII hex digits + threshold Threshold // t value parsed, as a threshold + tvalue string // 1-14 ASCII hex digits } const ( + // RName is the OTel tracestate field for R-value + RName = "rv" + // TName is the OTel tracestate field for T-value + TName = "th" + // hardMaxOTelLength is the maximum encoded size of an OTel // tracestate value. hardMaxOTelLength = 256 @@ -58,17 +63,20 @@ func NewOTelTraceState(input string) (otts OTelTraceState, _ error) { err := otelSyntax.scanKeyValues(input, func(key, value string) error { var err error switch key { - case "r": + case RName: if otts.rnd, err = RValueToRandomness(value); err == nil { - otts.r = value + otts.rvalue = value } else { - otts.rnd = Randomness{} // @@@ + // The zero-value for randomness implies always-sample; + // the threshold test is R < T, but T is not meaningful + // at zero, and this value implies zero adjusted count. + otts.rnd = Randomness{} } - case "t": - if otts.tt, err = TValueToThreshold(value); err == nil { - otts.t = value + case TName: + if otts.threshold, err = TValueToThreshold(value); err == nil { + otts.tvalue = value } else { - otts.tt = AlwaysSampleThreshold + otts.threshold = AlwaysSampleThreshold } default: otts.kvs = append(otts.kvs, KV{ @@ -83,11 +91,11 @@ func NewOTelTraceState(input string) (otts OTelTraceState, _ error) { } func (otts *OTelTraceState) HasRValue() bool { - return otts.r != "" + return otts.rvalue != "" } func (otts *OTelTraceState) RValue() string { - return otts.r + return otts.rvalue } func (otts *OTelTraceState) RValueRandomness() Randomness { @@ -95,7 +103,7 @@ func (otts *OTelTraceState) RValueRandomness() Randomness { } func (otts *OTelTraceState) HasTValue() bool { - return otts.t != "" + return otts.tvalue != "" } func (otts *OTelTraceState) HasNonZeroTValue() bool { @@ -103,21 +111,21 @@ func (otts *OTelTraceState) HasNonZeroTValue() bool { } func (otts *OTelTraceState) TValue() string { - return otts.t + return otts.tvalue } func (otts *OTelTraceState) TValueThreshold() Threshold { - return otts.tt + return otts.threshold } func (otts *OTelTraceState) SetTValue(threshold Threshold, encoded string) { - otts.tt = threshold - otts.t = encoded + otts.threshold = threshold + otts.tvalue = encoded } func (otts *OTelTraceState) UnsetTValue() { - otts.t = "" - otts.tt = Threshold{} + otts.tvalue = "" + otts.threshold = Threshold{} } func (otts *OTelTraceState) HasAnyValue() bool { @@ -134,12 +142,14 @@ func (otts *OTelTraceState) Serialize(w io.StringWriter) { } if otts.HasRValue() { sep() - w.WriteString("r:") + w.WriteString(RName) + w.WriteString(":") w.WriteString(otts.RValue()) } if otts.HasTValue() { sep() - w.WriteString("t:") + w.WriteString(TName) + w.WriteString(":") w.WriteString(otts.TValue()) } for _, kv := range otts.ExtraValues() { diff --git a/pkg/sampling/oteltracestate_test.go b/pkg/sampling/oteltracestate_test.go index a9ecc055154d..5ed1a3187e20 100644 --- a/pkg/sampling/oteltracestate_test.go +++ b/pkg/sampling/oteltracestate_test.go @@ -38,7 +38,7 @@ func TestEmptyOTelTraceState(t *testing.T) { } func TestOTelTraceStateTValueSerialize(t *testing.T) { - const orig = "r:10000000000000;t:3;a:b;c:d" + const orig = "rv:10000000000000;th:3;a:b;c:d" otts, err := NewOTelTraceState(orig) require.NoError(t, err) require.True(t, otts.HasTValue()) @@ -64,32 +64,32 @@ func TestParseOTelTraceState(t *testing.T) { const ns = "" for _, test := range []testCase{ // t-value correct cases - {"t:2", ns, "2", nil, nil}, - {"t:1", ns, "1", nil, nil}, - {"t:1", ns, "1", nil, nil}, - {"t:10", ns, "10", nil, nil}, - {"t:33", ns, "33", nil, nil}, - {"t:ab", ns, "ab", nil, nil}, - {"t:61", ns, "61", nil, nil}, + {"th:2", ns, "2", nil, nil}, + {"th:1", ns, "1", nil, nil}, + {"th:1", ns, "1", nil, nil}, + {"th:10", ns, "10", nil, nil}, + {"th:33", ns, "33", nil, nil}, + {"th:ab", ns, "ab", nil, nil}, + {"th:61", ns, "61", nil, nil}, // syntax errors {"", ns, ns, nil, strconv.ErrSyntax}, - {"t:1;", ns, ns, nil, strconv.ErrSyntax}, - {"t:1=p:2", ns, ns, nil, strconv.ErrSyntax}, - {"t:1;p:2=s:3", ns, ns, nil, strconv.ErrSyntax}, + {"th:1;", ns, ns, nil, strconv.ErrSyntax}, + {"th:1=p:2", ns, ns, nil, strconv.ErrSyntax}, + {"th:1;p:2=s:3", ns, ns, nil, strconv.ErrSyntax}, {":1;p:2=s:3", ns, ns, nil, strconv.ErrSyntax}, {":;p:2=s:3", ns, ns, nil, strconv.ErrSyntax}, {":;:", ns, ns, nil, strconv.ErrSyntax}, {":", ns, ns, nil, strconv.ErrSyntax}, - {"t:;p=1", ns, ns, nil, strconv.ErrSyntax}, - {"t:$", ns, ns, nil, strconv.ErrSyntax}, // not-hexadecimal - {"t:0x1p+3", ns, ns, nil, strconv.ErrSyntax}, // + is invalid - {"t:14.5", ns, ns, nil, strconv.ErrSyntax}, // integer syntax - {"t:-1", ns, ns, nil, strconv.ErrSyntax}, // non-negative + {"th:;p=1", ns, ns, nil, strconv.ErrSyntax}, + {"th:$", ns, ns, nil, strconv.ErrSyntax}, // not-hexadecimal + {"th:0x1p+3", ns, ns, nil, strconv.ErrSyntax}, // + is invalid + {"th:14.5", ns, ns, nil, strconv.ErrSyntax}, // integer syntax + {"th:-1", ns, ns, nil, strconv.ErrSyntax}, // non-negative // too many digits - {"t:ffffffffffffffff", ns, ns, nil, ErrTValueSize}, - {"t:100000000000000", ns, ns, nil, ErrTValueSize}, + {"th:ffffffffffffffff", ns, ns, nil, ErrTValueSize}, + {"th:100000000000000", ns, ns, nil, ErrTValueSize}, // one field {"e100:1", ns, ns, []string{"e100:1"}, nil}, @@ -99,13 +99,13 @@ func TestParseOTelTraceState(t *testing.T) { {"e1:1;e2:2", ns, ns, []string{"e1:1", "e2:2"}, nil}, // one extra key, two ways - {"t:2;extra:stuff", ns, "2", []string{"extra:stuff"}, nil}, - {"extra:stuff;t:2", ns, "2", []string{"extra:stuff"}, nil}, + {"th:2;extra:stuff", ns, "2", []string{"extra:stuff"}, nil}, + {"extra:stuff;th:2", ns, "2", []string{"extra:stuff"}, nil}, // two extra fields - {"e100:100;t:1;e101:101", ns, "1", []string{"e100:100", "e101:101"}, nil}, - {"t:1;e100:100;e101:101", ns, "1", []string{"e100:100", "e101:101"}, nil}, - {"e100:100;e101:101;t:1", ns, "1", []string{"e100:100", "e101:101"}, nil}, + {"e100:100;th:1;e101:101", ns, "1", []string{"e100:100", "e101:101"}, nil}, + {"th:1;e100:100;e101:101", ns, "1", []string{"e100:100", "e101:101"}, nil}, + {"e100:100;e101:101;th:1", ns, "1", []string{"e100:100", "e101:101"}, nil}, // parse error prevents capturing unrecognized keys {"1:1;u:V", ns, ns, nil, strconv.ErrSyntax}, @@ -113,15 +113,15 @@ func TestParseOTelTraceState(t *testing.T) { {"x:1;u:V", ns, ns, []string{"x:1", "u:V"}, nil}, // r-value - {"r:22222222222222;extra:stuff", "22222222222222", ns, []string{"extra:stuff"}, nil}, - {"extra:stuff;r:22222222222222", "22222222222222", ns, []string{"extra:stuff"}, nil}, - {"r:ffffffffffffff", "ffffffffffffff", ns, nil, nil}, - {"r:88888888888888", "88888888888888", ns, nil, nil}, - {"r:00000000000000", "00000000000000", ns, nil, nil}, + {"rv:22222222222222;extra:stuff", "22222222222222", ns, []string{"extra:stuff"}, nil}, + {"extra:stuff;rv:22222222222222", "22222222222222", ns, []string{"extra:stuff"}, nil}, + {"rv:ffffffffffffff", "ffffffffffffff", ns, nil, nil}, + {"rv:88888888888888", "88888888888888", ns, nil, nil}, + {"rv:00000000000000", "00000000000000", ns, nil, nil}, // r-value range error (15 bytes of hex or more) - {"r:100000000000000", ns, ns, nil, ErrRValueSize}, - {"r:fffffffffffffffff", ns, ns, nil, ErrRValueSize}, + {"rv:100000000000000", ns, ns, nil, ErrRValueSize}, + {"rv:fffffffffffffffff", ns, ns, nil, ErrRValueSize}, // no trailing ; {"x:1;", ns, ns, nil, strconv.ErrSyntax}, @@ -131,7 +131,7 @@ func TestParseOTelTraceState(t *testing.T) { // charset test {"x:0X1FFF;y:.-_-.;z:", ns, ns, []string{"x:0X1FFF", "y:.-_-.", "z:"}, nil}, - {"x1y2z3:1-2-3;y1:y_1;xy:-;t:50", ns, "50", []string{"x1y2z3:1-2-3", "y1:y_1", "xy:-"}, nil}, + {"x1y2z3:1-2-3;y1:y_1;xy:-;th:50", ns, "50", []string{"x1y2z3:1-2-3", "y1:y_1", "xy:-"}, nil}, // size exceeded {"x:" + strings.Repeat("_", 255), ns, ns, nil, ErrTraceStateSize}, From 09000f7d92bbeb758e8b39780b06963aac02e4f3 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 6 Sep 2023 16:07:22 -0700 Subject: [PATCH 21/38] wip TODO --- .../probabilisticsamplerprocessor/config.go | 12 ++++---- .../probabilisticsamplerprocessor/factory.go | 1 + .../tracesprocessor.go | 28 ++++++++++--------- 3 files changed, 22 insertions(+), 19 deletions(-) diff --git a/processor/probabilisticsamplerprocessor/config.go b/processor/probabilisticsamplerprocessor/config.go index 2f298175faae..28d9b1b2d506 100644 --- a/processor/probabilisticsamplerprocessor/config.go +++ b/processor/probabilisticsamplerprocessor/config.go @@ -119,18 +119,18 @@ type samplerMode int const ( modeUnset = iota modeHashSeed - modeDownsample - modeResample + modeProportional + modeEqualizing ) func parseSamplerMode(s string) (samplerMode, error) { switch strings.ToLower(s) { - case "resample": - return modeResample, nil + case "equalizing": + return modeEqualizing, nil case "hash_seed": return modeHashSeed, nil - case "downsample": - return modeDownsample, nil + case "proportional": + return modeProportional, nil case "": return modeUnset, nil default: diff --git a/processor/probabilisticsamplerprocessor/factory.go b/processor/probabilisticsamplerprocessor/factory.go index 6659aabd6dad..8cd025c5fb9c 100644 --- a/processor/probabilisticsamplerprocessor/factory.go +++ b/processor/probabilisticsamplerprocessor/factory.go @@ -37,6 +37,7 @@ func NewFactory() processor.Factory { func createDefaultConfig() component.Config { return &Config{ AttributeSource: defaultAttributeSource, + SamplerMode: "equalizing", } } diff --git a/processor/probabilisticsamplerprocessor/tracesprocessor.go b/processor/probabilisticsamplerprocessor/tracesprocessor.go index d4aab1f18d38..2ccedefc9928 100644 --- a/processor/probabilisticsamplerprocessor/tracesprocessor.go +++ b/processor/probabilisticsamplerprocessor/tracesprocessor.go @@ -66,13 +66,13 @@ type traceProcessor struct { type traceHashSampler struct { // Hash-based calculation - hashScaledSamplingRate uint32 - hashSeed uint32 - probability float64 - svalueEncoding string + hashScaledSamplerate uint32 + hashSeed uint32 + probability float64 + svalueEncoding string } -type traceResampler struct { +type traceEqualizer struct { // TraceID-randomness-based calculation traceIDThreshold sampling.Threshold @@ -122,7 +122,8 @@ func newTracesProcessor(ctx context.Context, set processor.CreateSettings, cfg * if cfg.HashSeed != 0 { mode = modeHashSeed } else { - mode = modeDownsample + // TODO: make this modeProportional + mode = modeEqualizing } } @@ -132,25 +133,26 @@ func newTracesProcessor(ctx context.Context, set processor.CreateSettings, cfg * ts := &traceHashSampler{} // Adjust sampling percentage on private so recalculations are avoided. - ts.hashScaledSamplingRate = uint32(pct * percentageScaleFactor) + ts.hashScaledSamplerate = uint32(pct * percentageScaleFactor) ts.hashSeed = cfg.HashSeed ts.probability = ratio ts.svalueEncoding = strconv.FormatFloat(ratio, 'g', 4, 64) tp.sampler = ts - case modeResample: + case modeEqualizing: // Encode t-value: for cases where the incoming context has threshold, err := sampling.ProbabilityToThreshold(ratio) if err != nil { return nil, err } - tp.sampler = &traceResampler{ + tp.sampler = &traceEqualizer{ tValueEncoding: threshold.TValue(), traceIDThreshold: threshold, } - case modeDownsample: + case modeProportional: // TODO + panic("Not implemented") } return processorhelper.NewTracesProcessor( @@ -167,7 +169,7 @@ func (ts *traceHashSampler) decide(s ptrace.Span) (bool, *sampling.W3CTraceState // with various different criteria to generate trace id and perhaps were already sampled without hashing. // Hashing here prevents bias due to such systems. tid := s.TraceID() - decision := computeHash(tid[:], ts.hashSeed)&bitMaskHashBuckets < ts.hashScaledSamplingRate + decision := computeHash(tid[:], ts.hashSeed)&bitMaskHashBuckets < ts.hashScaledSamplerate return decision, nil, nil } @@ -175,7 +177,7 @@ func (ts *traceHashSampler) updateTracestate(tid pcommon.TraceID, should bool, o // No action, nothing is specified. } -func (ts *traceResampler) decide(s ptrace.Span) (bool, *sampling.W3CTraceState, error) { +func (ts *traceEqualizer) decide(s ptrace.Span) (bool, *sampling.W3CTraceState, error) { rnd, wts, err := randomnessFromSpan(s) if err != nil { // TODO: Configure fail-open vs fail-closed? @@ -197,7 +199,7 @@ func (ts *traceResampler) decide(s ptrace.Span) (bool, *sampling.W3CTraceState, return ts.traceIDThreshold.ShouldSample(rnd), wts, err } -func (ts *traceResampler) updateTracestate(tid pcommon.TraceID, should bool, otts *sampling.OTelTraceState) { +func (ts *traceEqualizer) updateTracestate(tid pcommon.TraceID, should bool, otts *sampling.OTelTraceState) { // When this sampler decided not to sample, the t-value becomes zero. // Incoming TValue consistency is not checked when this happens. if !should { From a4d467b2983324a20a3064b5e252b6a2f0d6a73a Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 6 Sep 2023 16:15:05 -0700 Subject: [PATCH 22/38] versions.yaml --- versions.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/versions.yaml b/versions.yaml index b5943968f20f..5e3b37a77e46 100644 --- a/versions.yaml +++ b/versions.yaml @@ -52,6 +52,7 @@ module-sets: - github.com/open-telemetry/opentelemetry-collector-contrib/exporter/lokiexporter - github.com/open-telemetry/opentelemetry-collector-contrib/exporter/mezmoexporter - github.com/open-telemetry/opentelemetry-collector-contrib/exporter/opencensusexporter + - github.com/open-telemetry/opentelemetry-collector-contrib/exporter/otelarrowexporter - github.com/open-telemetry/opentelemetry-collector-contrib/exporter/parquetexporter - github.com/open-telemetry/opentelemetry-collector-contrib/exporter/prometheusexporter - github.com/open-telemetry/opentelemetry-collector-contrib/exporter/prometheusremotewriteexporter @@ -201,6 +202,7 @@ module-sets: - github.com/open-telemetry/opentelemetry-collector-contrib/receiver/nsxtreceiver - github.com/open-telemetry/opentelemetry-collector-contrib/receiver/opencensusreceiver - github.com/open-telemetry/opentelemetry-collector-contrib/receiver/oracledbreceiver + - github.com/open-telemetry/opentelemetry-collector-contrib/receiver/otelarrowreceiver - github.com/open-telemetry/opentelemetry-collector-contrib/receiver/otlpjsonfilereceiver - github.com/open-telemetry/opentelemetry-collector-contrib/receiver/podmanreceiver - github.com/open-telemetry/opentelemetry-collector-contrib/receiver/postgresqlreceiver From e373b9b17204401baebe8f35f985631b95081dab Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Thu, 7 Sep 2023 16:24:05 -0700 Subject: [PATCH 23/38] Add proportional sampler mode; comment on TODOs; create SamplerMode type similar to configcomprsesion.CompressionType --- pkg/sampling/impl.go | 6 ++ pkg/sampling/internal/unsigned/randomness.go | 21 ++++- pkg/sampling/internal/unsigned/threshold.go | 2 +- pkg/sampling/oteltracestate.go | 10 ++ .../probabilisticsamplerprocessor/config.go | 36 +------ .../probabilisticsamplerprocessor/factory.go | 2 +- .../sampler_mode.go | 30 ++++++ .../sampler_mode_test.go | 47 ++++++++++ .../tracesprocessor.go | 94 +++++++++++++------ 9 files changed, 179 insertions(+), 69 deletions(-) create mode 100644 processor/probabilisticsamplerprocessor/sampler_mode.go create mode 100644 processor/probabilisticsamplerprocessor/sampler_mode_test.go diff --git a/pkg/sampling/impl.go b/pkg/sampling/impl.go index 7d81b8115ec0..072c4beabb20 100644 --- a/pkg/sampling/impl.go +++ b/pkg/sampling/impl.go @@ -34,6 +34,12 @@ func RandomnessFromTraceID(tid pcommon.TraceID) Randomness { return unsigned.RandomnessFromTraceID(tid) } +// RandomnessFromBits constructs a randomness using random bits. +// Bits should be in the range [0, MaxAdjustedCount). +func RandomnessFromBits(bits uint64) Randomness { + return unsigned.RandomnessFromBits(bits) +} + // ThresholdLessThan allows comparing thresholds directly. Smaller // thresholds have smaller probabilities, larger adjusted counts. func ThresholdLessThan(a, b Threshold) bool { diff --git a/pkg/sampling/internal/unsigned/randomness.go b/pkg/sampling/internal/unsigned/randomness.go index ed9db8418f6f..1252979098fa 100644 --- a/pkg/sampling/internal/unsigned/randomness.go +++ b/pkg/sampling/internal/unsigned/randomness.go @@ -17,19 +17,27 @@ var ErrRValueSize = errors.New("r-value must have 14 hex digits") // the unsigned value of bytes 9 through 15. const LeastHalfTraceIDThresholdMask = MaxAdjustedCount - 1 -// Randomness may be derived from r-value or TraceID. +// Randomness may be derived from R-value or TraceID. type Randomness struct { - // randomness is in the range [0, MaxAdjustedCount-1] + // unsigned is in the range [0, MaxAdjustedCount-1] unsigned uint64 } -// Randomness is the value we compare with Threshold in ShouldSample. +// RandomnessFromTraceID returns randomness from a TraceID (assumes +// the traceparent random flag was set). func RandomnessFromTraceID(id pcommon.TraceID) Randomness { return Randomness{ unsigned: binary.BigEndian.Uint64(id[8:]) & LeastHalfTraceIDThresholdMask, } } +// RandomnessFromBits returns randomness from 56 random bits. +func RandomnessFromBits(bits uint64) Randomness { + return Randomness{ + unsigned: bits & LeastHalfTraceIDThresholdMask, + } +} + // RValueToRandomness parses NumHexDigits hex bytes into a Randomness. func RValueToRandomness(s string) (Randomness, error) { if len(s) != NumHexDigits { @@ -45,3 +53,10 @@ func RValueToRandomness(s string) (Randomness, error) { unsigned: unsigned, }, nil } + +func (rnd Randomness) ToRValue() string { + // Note: adding MaxAdjustedCount then removing the leading byte accomplishes + // zero padding. + return strconv.FormatUint(MaxAdjustedCount+rnd.unsigned, hexBase)[1:] + +} diff --git a/pkg/sampling/internal/unsigned/threshold.go b/pkg/sampling/internal/unsigned/threshold.go index 5b94072aad66..229ed2efc98b 100644 --- a/pkg/sampling/internal/unsigned/threshold.go +++ b/pkg/sampling/internal/unsigned/threshold.go @@ -92,7 +92,7 @@ func (th Threshold) TValue() string { // ShouldSample returns true when the span passes this sampler's // consistent sampling decision. func (t Threshold) ShouldSample(rnd Randomness) bool { - return rnd.unsigned < t.unsigned + return t == NeverSampleThreshold || rnd.unsigned < t.unsigned } // ThresholdLessThan allows direct comparison of Threshold values. diff --git a/pkg/sampling/oteltracestate.go b/pkg/sampling/oteltracestate.go index b0e40d4b9bf8..d379192db988 100644 --- a/pkg/sampling/oteltracestate.go +++ b/pkg/sampling/oteltracestate.go @@ -128,6 +128,16 @@ func (otts *OTelTraceState) UnsetTValue() { otts.threshold = Threshold{} } +func (otts *OTelTraceState) SetRValue(randomness Randomness) { + otts.rnd = randomness + otts.rvalue = randomness.ToRValue() +} + +func (otts *OTelTraceState) UnsetRValue() { + otts.rvalue = "" + otts.rnd = Randomness{} +} + func (otts *OTelTraceState) HasAnyValue() bool { return otts.HasRValue() || otts.HasTValue() || otts.HasExtraValues() } diff --git a/processor/probabilisticsamplerprocessor/config.go b/processor/probabilisticsamplerprocessor/config.go index 28d9b1b2d506..b909c786d6b6 100644 --- a/processor/probabilisticsamplerprocessor/config.go +++ b/processor/probabilisticsamplerprocessor/config.go @@ -6,7 +6,6 @@ package probabilisticsamplerprocessor // import "github.com/open-telemetry/opent import ( "fmt" "math" - "strings" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling" "go.opentelemetry.io/collector/component" @@ -51,7 +50,7 @@ type Config struct { // span-to-metrics pipeline based on this mechanism may have // anomalous behavior. // - // - "resample": Using an OTel-specified consistent sampling + // - "equalizing": Using an OTel-specified consistent sampling // mechanism, this sampler selectively reduces the effective // sampling probability of arriving spans. This can be // useful to select a small fraction of complete traces from @@ -63,10 +62,10 @@ type Config struct { // because it means this sampler is configured with too // large a sampling probability to ensure complete traces. // - // - "downsample": Using an OTel-specified consistent sampling + // - "proportional": Using an OTel-specified consistent sampling // mechanism, this sampler reduces the effective sampling // probability of each span by `SamplingProbability`. - SamplerMode string `mapstructure:"sampler_mode"` + SamplerMode SamplerMode `mapstructure:"sampler_mode"` /////// // Logs only fields below. @@ -106,34 +105,5 @@ func (cfg *Config) Validate() error { if cfg.AttributeSource != "" && !validAttributeSource[cfg.AttributeSource] { return fmt.Errorf("invalid attribute source: %v. Expected: %v or %v", cfg.AttributeSource, traceIDAttributeSource, recordAttributeSource) } - - // Force the mode to lower case, check validity - if _, err := parseSamplerMode(cfg.SamplerMode); err != nil { - return err - } return nil } - -type samplerMode int - -const ( - modeUnset = iota - modeHashSeed - modeProportional - modeEqualizing -) - -func parseSamplerMode(s string) (samplerMode, error) { - switch strings.ToLower(s) { - case "equalizing": - return modeEqualizing, nil - case "hash_seed": - return modeHashSeed, nil - case "proportional": - return modeProportional, nil - case "": - return modeUnset, nil - default: - return modeUnset, fmt.Errorf("unknown sampler mode: %q", s) - } -} diff --git a/processor/probabilisticsamplerprocessor/factory.go b/processor/probabilisticsamplerprocessor/factory.go index 8cd025c5fb9c..ae3a06e9fd07 100644 --- a/processor/probabilisticsamplerprocessor/factory.go +++ b/processor/probabilisticsamplerprocessor/factory.go @@ -37,7 +37,7 @@ func NewFactory() processor.Factory { func createDefaultConfig() component.Config { return &Config{ AttributeSource: defaultAttributeSource, - SamplerMode: "equalizing", + SamplerMode: DefaultMode, } } diff --git a/processor/probabilisticsamplerprocessor/sampler_mode.go b/processor/probabilisticsamplerprocessor/sampler_mode.go new file mode 100644 index 000000000000..454a1ad769ad --- /dev/null +++ b/processor/probabilisticsamplerprocessor/sampler_mode.go @@ -0,0 +1,30 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package probabilisticsamplerprocessor + +import "fmt" + +type SamplerMode string + +const ( + HashSeed SamplerMode = "hash_seed" + Equalizing SamplerMode = "equalizing" + Proportional SamplerMode = "proportional" + modeUnset SamplerMode = "" + + DefaultMode = Proportional +) + +func (sm *SamplerMode) UnmarshalText(in []byte) error { + switch mode := SamplerMode(in); mode { + case HashSeed, + Equalizing, + Proportional, + modeUnset: + *sm = mode + return nil + default: + return fmt.Errorf("unsupported sampler mode %q", mode) + } +} diff --git a/processor/probabilisticsamplerprocessor/sampler_mode_test.go b/processor/probabilisticsamplerprocessor/sampler_mode_test.go new file mode 100644 index 000000000000..13dbe59cc722 --- /dev/null +++ b/processor/probabilisticsamplerprocessor/sampler_mode_test.go @@ -0,0 +1,47 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package probabilisticsamplerprocessor + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestUnmarshalText(t *testing.T) { + tests := []struct { + samplerMode string + shouldError bool + }{ + { + samplerMode: "hash_seed", + }, + { + samplerMode: "equalizing", + }, + { + samplerMode: "proportional", + }, + { + samplerMode: "", + }, + { + samplerMode: "dunno", + shouldError: true, + }, + } + for _, tt := range tests { + t.Run(tt.samplerMode, func(t *testing.T) { + temp := modeUnset + err := temp.UnmarshalText([]byte(tt.samplerMode)) + if tt.shouldError { + assert.Error(t, err) + return + } + require.NoError(t, err) + assert.Equal(t, temp, SamplerMode(tt.samplerMode)) + }) + } +} diff --git a/processor/probabilisticsamplerprocessor/tracesprocessor.go b/processor/probabilisticsamplerprocessor/tracesprocessor.go index 2ccedefc9928..d6abbec8960a 100644 --- a/processor/probabilisticsamplerprocessor/tracesprocessor.go +++ b/processor/probabilisticsamplerprocessor/tracesprocessor.go @@ -6,11 +6,10 @@ package probabilisticsamplerprocessor // import "github.com/open-telemetry/opent import ( "context" "fmt" + "math/rand" "strconv" "strings" - "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling" - "go.opencensus.io/stats" "go.opencensus.io/tag" "go.opentelemetry.io/collector/consumer" @@ -19,6 +18,8 @@ import ( "go.opentelemetry.io/collector/processor" "go.opentelemetry.io/collector/processor/processorhelper" "go.uber.org/zap" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling" ) // samplingPriority has the semantic result of parsing the "sampling.priority" @@ -80,6 +81,10 @@ type traceEqualizer struct { tValueEncoding string } +type traceProportionalizer struct { + ratio float64 +} + func randomnessFromSpan(s ptrace.Span) (sampling.Randomness, *sampling.W3CTraceState, error) { state := s.TraceState() raw := state.AsRaw() @@ -90,14 +95,19 @@ func randomnessFromSpan(s ptrace.Span) (sampling.Randomness, *sampling.W3CTraceS if err == nil && wts.OTelValue().HasRValue() { // When the tracestate is OK and has r-value, use it. randomness = wts.OTelValue().RValueRandomness() - } else { - // Here we assume the trace was generated with a - // randomness flag, which we're not supposed to do in - // a head sampler. Until - // https://github.com/open-telemetry/opentelemetry-proto/pull/503 - // is addressed we simply assume it was random synthesizing - // an rv-value has questionable value for an out-of-band context. + } else if true /* s.Flags()&0x2 == 0x2 */ { + // See https://github.com/open-telemetry/opentelemetry-proto/pull/503 randomness = sampling.RandomnessFromTraceID(s.TraceID()) + } else { + // Note: Creating an R-value here is the best we can + // do. Issue a warning? This is OK-ish for head + // sampling but kind of nonsense for tail sampling. + // This is especially nonsense if the caller has set a + // T-value already, (TODO: is it better to just assume + // the flag was set in a tail sampler? otherwise, + // inconsistent results) + randomness = sampling.RandomnessFromBits(uint64(rand.Int63n(sampling.MaxAdjustedCount))) + wts.OTelValue().SetRValue(randomness) } return randomness, &wts, err } @@ -117,19 +127,17 @@ func newTracesProcessor(ctx context.Context, set processor.CreateSettings, cfg * } // error ignored below b/c already checked once - mode, _ := parseSamplerMode(cfg.SamplerMode) - if mode == modeUnset { + if cfg.SamplerMode == modeUnset { if cfg.HashSeed != 0 { - mode = modeHashSeed + cfg.SamplerMode = HashSeed } else { - // TODO: make this modeProportional - mode = modeEqualizing + cfg.SamplerMode = DefaultMode } } ratio := pct / 100 - switch mode { - case modeHashSeed: + switch cfg.SamplerMode { + case HashSeed: ts := &traceHashSampler{} // Adjust sampling percentage on private so recalculations are avoided. @@ -139,8 +147,7 @@ func newTracesProcessor(ctx context.Context, set processor.CreateSettings, cfg * ts.svalueEncoding = strconv.FormatFloat(ratio, 'g', 4, 64) tp.sampler = ts - case modeEqualizing: - // Encode t-value: for cases where the incoming context has + case Equalizing: threshold, err := sampling.ProbabilityToThreshold(ratio) if err != nil { return nil, err @@ -150,9 +157,10 @@ func newTracesProcessor(ctx context.Context, set processor.CreateSettings, cfg * tValueEncoding: threshold.TValue(), traceIDThreshold: threshold, } - case modeProportional: - // TODO - panic("Not implemented") + case Proportional: + tp.sampler = &traceProportionalizer{ + ratio: ratio, + } } return processorhelper.NewTracesProcessor( @@ -184,16 +192,16 @@ func (ts *traceEqualizer) decide(s ptrace.Span) (bool, *sampling.W3CTraceState, return true, nil, err } otts := wts.OTelValue() - if otts.HasNonZeroTValue() { - // Consistency check: if the TraceID is out of range - // (unless the TValue is zero), the TValue is a lie. - // If inconsistent, clear it. - if !otts.TValueThreshold().ShouldSample(rnd) { - // Let this error log: we have a misconfigured - // upstream sampler and are unsetting its t-value. - err = ErrInconsistentArrivingTValue - otts.UnsetTValue() - } + // Consistency check: if the TraceID is out of range + // (unless the TValue is zero), the TValue is a lie. + // If inconsistent, clear it. + if !otts.TValueThreshold().ShouldSample(rnd) { + // Let this error log: we have a misconfigured + // upstream sampler and are unsetting its t-value. + // TODO: Note this will happen if we've made up + // the rvalue, some fraction of the time. + err = ErrInconsistentArrivingTValue + otts.UnsetTValue() } return ts.traceIDThreshold.ShouldSample(rnd), wts, err @@ -220,6 +228,30 @@ func (ts *traceEqualizer) updateTracestate(tid pcommon.TraceID, should bool, ott return } +func (ts *traceProportionalizer) decide(s ptrace.Span) (bool, *sampling.W3CTraceState, error) { + rnd, wts, err := randomnessFromSpan(s) + if err != nil { + // TODO: Configure fail-open vs fail-closed? + return true, nil, err + } + otts := wts.OTelValue() + incoming := 1.0 + if otts.HasTValue() { + incoming = otts.TValueThreshold().Probability() + } + threshold, err := sampling.ProbabilityToThreshold(incoming * ts.ratio) + otts.SetTValue(threshold, threshold.TValue()) + return threshold.ShouldSample(rnd), wts, err +} + +func (ts *traceProportionalizer) updateTracestate(tid pcommon.TraceID, should bool, otts *sampling.OTelTraceState) { + if !should { + otts.SetTValue(sampling.NeverSampleThreshold, sampling.NeverSampleTValue) + return + } + return +} + func (tp *traceProcessor) processTraces(ctx context.Context, td ptrace.Traces) (ptrace.Traces, error) { td.ResourceSpans().RemoveIf(func(rs ptrace.ResourceSpans) bool { rs.ScopeSpans().RemoveIf(func(ils ptrace.ScopeSpans) bool { From fe6a085b3de58c9aa51741030fcca917b77b4326 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 4 Oct 2023 12:49:55 -0700 Subject: [PATCH 24/38] back from internal --- pkg/sampling/common.go | 34 ++- pkg/sampling/encoding_test.go | 181 +++++++++---- pkg/sampling/impl.go | 60 ----- pkg/sampling/internal/bytes/probability.go | 39 --- pkg/sampling/internal/bytes/randomness.go | 36 --- pkg/sampling/internal/bytes/threshold.go | 102 -------- pkg/sampling/internal/unsigned/randomness.go | 62 ----- pkg/sampling/oteltracestate.go | 80 ++++-- pkg/sampling/oteltracestate_test.go | 243 ++++++++++++++---- .../{internal/unsigned => }/probability.go | 5 +- pkg/sampling/randomness.go | 95 +++++++ .../{internal/unsigned => }/threshold.go | 60 ++--- pkg/sampling/w3ctracestate.go | 61 +++-- pkg/sampling/w3ctracestate_test.go | 177 +++++++------ 14 files changed, 679 insertions(+), 556 deletions(-) delete mode 100644 pkg/sampling/impl.go delete mode 100644 pkg/sampling/internal/bytes/probability.go delete mode 100644 pkg/sampling/internal/bytes/randomness.go delete mode 100644 pkg/sampling/internal/bytes/threshold.go delete mode 100644 pkg/sampling/internal/unsigned/randomness.go rename pkg/sampling/{internal/unsigned => }/probability.go (90%) create mode 100644 pkg/sampling/randomness.go rename pkg/sampling/{internal/unsigned => }/threshold.go (56%) diff --git a/pkg/sampling/common.go b/pkg/sampling/common.go index a40d0f72dfa4..68d17785811e 100644 --- a/pkg/sampling/common.go +++ b/pkg/sampling/common.go @@ -1,7 +1,11 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + package sampling import ( "errors" + "io" "strings" "go.uber.org/multierr" @@ -13,8 +17,7 @@ type KV struct { } var ( - ErrTraceStateSize = errors.New("invalid tracestate size") - ErrTraceStateCount = errors.New("invalid tracestate item count") + ErrTraceStateSize = errors.New("invalid tracestate size") ) // keyValueScanner defines distinct scanner behaviors for lists of @@ -30,6 +33,7 @@ type keyValueScanner struct { equality byte } +// commonTraceState is embedded in both W3C and OTel trace states. type commonTraceState struct { kvs []KV } @@ -45,15 +49,17 @@ func (cts commonTraceState) ExtraValues() []KV { // trimOws removes optional whitespace on both ends of a string. func trimOws(input string) string { // Hard-codes the value of owsCharset - for len(input) > 0 && input[0] == ' ' || input[0] == '\t' { + for len(input) > 0 && (input[0] == ' ' || input[0] == '\t') { input = input[1:] } - for len(input) > 0 && input[len(input)-1] == ' ' || input[len(input)-1] == '\t' { + for len(input) > 0 && (input[len(input)-1] == ' ' || input[len(input)-1] == '\t') { input = input[:len(input)-1] } return input } +// scanKeyValues is common code to scan either W3C or OTel tracestate +// entries, as parameterized in the keyValueScanner struct. func (s keyValueScanner) scanKeyValues(input string, f func(key, value string) error) error { var rval error items := 0 @@ -62,7 +68,7 @@ func (s keyValueScanner) scanKeyValues(input string, f func(key, value string) e if s.maxItems > 0 && items >= s.maxItems { // W3C specifies max 32 entries, tested here // instead of via the regexp. - return ErrTraceStateCount + return ErrTraceStateSize } sep := strings.IndexByte(input, s.separator) @@ -98,3 +104,21 @@ func (s keyValueScanner) scanKeyValues(input string, f func(key, value string) e } return rval } + +// serializer assists with checking and combining errors from +// (io.StringWriter).WriteString(). +type serializer struct { + writer io.StringWriter + err error +} + +// write handles errors from io.StringWriter. +func (ser *serializer) write(str string) { + _, err := ser.writer.WriteString(str) + ser.check(err) +} + +// check handles errors (e.g., from another serializer). +func (ser *serializer) check(err error) { + ser.err = multierr.Append(ser.err, err) +} diff --git a/pkg/sampling/encoding_test.go b/pkg/sampling/encoding_test.go index 3e6eb9d65848..09bb6f93b640 100644 --- a/pkg/sampling/encoding_test.go +++ b/pkg/sampling/encoding_test.go @@ -1,27 +1,16 @@ // Copyright The OpenTelemetry Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +// SPDX-License-Identifier: Apache-2.0 package sampling import ( "encoding/binary" + "errors" "fmt" "math/rand" + "strconv" "testing" - "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling/internal/bytes" - "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling/internal/unsigned" "github.com/stretchr/testify/require" "go.opentelemetry.io/collector/pdata/pcommon" ) @@ -60,6 +49,28 @@ func TestValidProbabilityToTValue(t *testing.T) { require.Equal(t, "0", must(probabilityToTValue(0))) } +func TestThresholdLessThan(t *testing.T) { + require.True(t, ThresholdLessThan( + must(TValueToThreshold("4")), + must(TValueToThreshold("5")), + )) + + require.True(t, ThresholdLessThan( + must(TValueToThreshold("04")), + must(TValueToThreshold("4")), + )) + + require.False(t, ThresholdLessThan( + must(TValueToThreshold("4")), + must(TValueToThreshold("234")), + )) + + require.True(t, ThresholdLessThan( + must(TValueToThreshold("234")), + must(TValueToThreshold("4")), + )) +} + func TestInvalidprobabilityToTValue(t *testing.T) { // Too small require.Error(t, mustNot(probabilityToTValue(0x1p-57))) @@ -104,28 +115,28 @@ func TestShouldSample(t *testing.T) { // Test four boundary conditions for 50% sampling, thresh := must(ProbabilityToThreshold(0.5)) // Smallest TraceID that should sample. - require.True(t, thresh.ShouldSample(RandomnessFromTraceID(pcommon.TraceID{ + require.True(t, thresh.ShouldSample(TraceIDToRandomness(pcommon.TraceID{ // 9 meaningless bytes 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0, // randomness starts here 0, 0, 0, 0, 0, 0, }))) // Largest TraceID that should sample. - require.True(t, thresh.ShouldSample(RandomnessFromTraceID(pcommon.TraceID{ + require.True(t, thresh.ShouldSample(TraceIDToRandomness(pcommon.TraceID{ // 9 meaningless bytes 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0x7f, // randomness starts here 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, }))) // Smallest TraceID that should NOT sample. - require.False(t, thresh.ShouldSample(RandomnessFromTraceID(pcommon.TraceID{ + require.False(t, thresh.ShouldSample(TraceIDToRandomness(pcommon.TraceID{ // 9 meaningless bytes 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0x80, // randomness starts here 0, 0, 0, 0, 0, 0, }))) // Largest TraceID that should NOT sample. - require.False(t, thresh.ShouldSample(RandomnessFromTraceID(pcommon.TraceID{ + require.False(t, thresh.ShouldSample(TraceIDToRandomness(pcommon.TraceID{ // 9 meaningless bytes 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xff, // randomness starts here @@ -133,57 +144,117 @@ func TestShouldSample(t *testing.T) { }))) } -// The two benchmarks below were used to choose the implementation for -// the Threshold type in this package. The results indicate that it -// is faster to compare a 56-bit number than to compare as 7 element []byte. +func TestRValueSyntax(t *testing.T) { + type testCase struct { + in string + expectErr error + } + for _, test := range []testCase{ + // correct cases + {"12341234123412", nil}, -type benchTIDs [1024]pcommon.TraceID + // wrong size + {"123412341234120", RValueSizeError("123412341234120")}, + {"1234123412341", RValueSizeError("1234123412341")}, + {"", RValueSizeError("")}, -func (tids *benchTIDs) init() { - for i := range tids { - binary.BigEndian.PutUint64(tids[i][:8], rand.Uint64()) - binary.BigEndian.PutUint64(tids[i][8:], rand.Uint64()) + // bad syntax + {"abcdefgabcdefg", strconv.ErrSyntax}, + } { + t.Run(testName(test.in), func(t *testing.T) { + rnd, err := RValueToRandomness(test.in) + + if test.expectErr != nil { + require.True(t, errors.Is(err, test.expectErr), + "%q: not expecting %v wanted %v", test.in, err, test.expectErr, + ) + require.Equal(t, must(RValueToRandomness("00000000000000")), rnd) + } else { + require.NoError(t, err, "%q", test.in) + + val, err := strconv.ParseUint(test.in, 16, 64) + require.NoError(t, err) + + require.Equal(t, TraceIDToRandomness( + pcommon.TraceID{ + byte(rand.Intn(256)), // 0 + byte(rand.Intn(256)), // 1 + byte(rand.Intn(256)), // 2 + byte(rand.Intn(256)), // 3 + byte(rand.Intn(256)), // 4 + byte(rand.Intn(256)), // 5 + byte(rand.Intn(256)), // 6 + byte(rand.Intn(256)), // 7 + byte(rand.Intn(256)), // 8 + byte(val >> 48 & 0xff), // 9 + byte(val >> 40 & 0xff), // 10 + byte(val >> 32 & 0xff), // 11 + byte(val >> 24 & 0xff), // 12 + byte(val >> 16 & 0xff), // 13 + byte(val >> 8 & 0xff), // 14 + byte(val >> 0 & 0xff), // 15 + }, + ), rnd) + } + }) } } -// BenchmarkThresholdCompareAsUint64-10 1000000000 0.4515 ns/op 0 B/op 0 allocs/op -func BenchmarkThresholdCompareAsUint64(b *testing.B) { - var tids benchTIDs - var comps [1024]unsigned.Threshold - tids.init() - for i := range comps { - var err error - comps[i], err = unsigned.ProbabilityToThreshold(rand.Float64()) - if err != nil { - b.Fatal(err) - } +func TestTValueSyntax(t *testing.T) { + type testCase struct { + in string + expectErr error } + for _, test := range []testCase{ + // correct cases + {"", nil}, + {"1", nil}, - b.ReportAllocs() - b.ResetTimer() - yes := 0 - no := 0 - for i := 0; i < b.N; i++ { - idx := i % len(tids) - tid := tids[idx] - comp := comps[idx] + // syntax error + {"g", strconv.ErrSyntax}, + } { + t.Run(testName(test.in), func(t *testing.T) { + _, err := TValueToThreshold(test.in) - if comp.ShouldSample(unsigned.RandomnessFromTraceID(tid)) { - yes++ - } else { - no++ - } + if test.expectErr != nil { + require.True(t, errors.Is(err, test.expectErr), + "%q: not expecting %v wanted %v", test.in, err, test.expectErr, + ) + } else { + require.NoError(t, err, "%q", test.in) + } + }) } } -// BenchmarkThresholdCompareAsBytes-10 528679580 2.288 ns/op 0 B/op 0 allocs/op -func BenchmarkThresholdCompareAsBytes(b *testing.B) { +// There were two benchmarks used to choose the implementation for the +// Threshold type in this package. The results indicate that it is +// faster to compare a 56-bit number than to compare as 7 element +// []byte. + +type benchTIDs [1024]pcommon.TraceID + +func (tids *benchTIDs) init() { + for i := range tids { + binary.BigEndian.PutUint64(tids[i][:8], rand.Uint64()) + binary.BigEndian.PutUint64(tids[i][8:], rand.Uint64()) + } +} + +// The current implementation, using unsigned: +// +// BenchmarkThresholdCompareAsUint64-10 1000000000 0.4515 ns/op 0 B/op 0 allocs/op +// +// vs the tested and rejected, using bytes: +// +// BenchmarkThresholdCompareAsBytes-10 528679580 2.288 ns/op 0 B/op 0 allocs/op +func BenchmarkThresholdCompareAsUint64(b *testing.B) { var tids benchTIDs - var comps [1024]bytes.Threshold + var comps [1024]Threshold tids.init() for i := range comps { var err error - comps[i], err = bytes.ProbabilityToThreshold(rand.Float64()) + comps[i], err = ProbabilityToThreshold(rand.Float64()) if err != nil { b.Fatal(err) } @@ -198,7 +269,7 @@ func BenchmarkThresholdCompareAsBytes(b *testing.B) { tid := tids[idx] comp := comps[idx] - if comp.ShouldSample(bytes.RandomnessFromTraceID(tid)) { + if comp.ShouldSample(TraceIDToRandomness(tid)) { yes++ } else { no++ diff --git a/pkg/sampling/impl.go b/pkg/sampling/impl.go deleted file mode 100644 index 072c4beabb20..000000000000 --- a/pkg/sampling/impl.go +++ /dev/null @@ -1,60 +0,0 @@ -package sampling - -import ( - "go.opentelemetry.io/collector/pdata/pcommon" - - "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling/internal/unsigned" -) - -// Randomness represents individual trace randomness. -type Randomness = unsigned.Randomness - -// Threshold represents sampling selectivity. -type Threshold = unsigned.Threshold - -// RValueToRandomness parses a R-value. -func RValueToRandomness(s string) (Randomness, error) { - return unsigned.RValueToRandomness(s) -} - -// TValueToThreshold parses a T-value. -func TValueToThreshold(s string) (Threshold, error) { - return unsigned.TValueToThreshold(s) -} - -// ProbabilityToThreshold computes a re-usable Threshold value. -func ProbabilityToThreshold(prob float64) (Threshold, error) { - return unsigned.ProbabilityToThreshold(prob) -} - -// RandomnessFromTraceID returns the randomness using the least -// significant 56 bits of the TraceID (without consideration for -// trace flags). -func RandomnessFromTraceID(tid pcommon.TraceID) Randomness { - return unsigned.RandomnessFromTraceID(tid) -} - -// RandomnessFromBits constructs a randomness using random bits. -// Bits should be in the range [0, MaxAdjustedCount). -func RandomnessFromBits(bits uint64) Randomness { - return unsigned.RandomnessFromBits(bits) -} - -// ThresholdLessThan allows comparing thresholds directly. Smaller -// thresholds have smaller probabilities, larger adjusted counts. -func ThresholdLessThan(a, b Threshold) bool { - return unsigned.ThresholdLessThan(a, b) -} - -const MaxAdjustedCount = unsigned.MaxAdjustedCount - -var ( - AlwaysSampleThreshold = unsigned.AlwaysSampleThreshold - NeverSampleThreshold = unsigned.NeverSampleThreshold - - AlwaysSampleTValue = AlwaysSampleThreshold.TValue() - NeverSampleTValue = NeverSampleThreshold.TValue() - - ErrTValueSize = unsigned.ErrTValueSize - ErrRValueSize = unsigned.ErrRValueSize -) diff --git a/pkg/sampling/internal/bytes/probability.go b/pkg/sampling/internal/bytes/probability.go deleted file mode 100644 index fb89c4e2ffe1..000000000000 --- a/pkg/sampling/internal/bytes/probability.go +++ /dev/null @@ -1,39 +0,0 @@ -package bytes - -import ( - "encoding/binary" - "errors" - "math" -) - -// ErrProbabilityRange is returned when a value should be in the range [1/MaxAdjustedCount, 1]. -var ErrProbabilityRange = errors.New("sampling probability out of range (0x1p-56 <= valid <= 1)") - -// probabilityInRange tests MinSamplingProb <= prob <= 1. -func probabilityInRange(prob float64) bool { - return prob >= 1/MaxAdjustedCount && prob <= 1 -} - -func ProbabilityToThreshold(prob float64) (Threshold, error) { - // Probability cases - switch { - case prob == 1: - return AlwaysSampleThreshold, nil - case prob == 0: - return NeverSampleThreshold, nil - case !probabilityInRange(prob): - return AlwaysSampleThreshold, ErrProbabilityRange - } - unsigned := uint64(math.Round(prob * MaxAdjustedCount)) - var th Threshold - binary.BigEndian.PutUint64(th.bytes[:], unsigned) - return th, nil -} - -// Probability is the sampling ratio in the range [MinSamplingProb, 1]. -func (t Threshold) Probability() float64 { - if t == AlwaysSampleThreshold { - return 1 - } - return float64(binary.BigEndian.Uint64(t.bytes[:])) / MaxAdjustedCount -} diff --git a/pkg/sampling/internal/bytes/randomness.go b/pkg/sampling/internal/bytes/randomness.go deleted file mode 100644 index 407f6f0fcfb7..000000000000 --- a/pkg/sampling/internal/bytes/randomness.go +++ /dev/null @@ -1,36 +0,0 @@ -package bytes - -import ( - "encoding/hex" - "errors" - - "go.opentelemetry.io/collector/pdata/pcommon" -) - -// ErrRValueSize is returned for r-values != NumHexDigits hex digits. -var ErrRValueSize = errors.New("r-value must have 14 hex digits") - -// Randomness may be derived from r-value or TraceID. -type Randomness struct { - // bytes[0] is unused, so that the relevant portion of these 8 - // bytes align with the TraceID's second 8 bytes. - bytes [8]byte -} - -// Randomness is the value we compare with Threshold in ShouldSample. -func RandomnessFromTraceID(id pcommon.TraceID) Randomness { - var r Randomness - copy(r.bytes[1:], id[9:]) - return r -} - -// RValueToRandomness parses NumHexDigits hex bytes into a Randomness. -func RValueToRandomness(s string) (Randomness, error) { - if len(s) != NumHexDigits { - return Randomness{}, ErrRValueSize - } - - var r Randomness - _, err := hex.Decode(r.bytes[1:], []byte(s)) - return r, err -} diff --git a/pkg/sampling/internal/bytes/threshold.go b/pkg/sampling/internal/bytes/threshold.go deleted file mode 100644 index ec33e137e5f4..000000000000 --- a/pkg/sampling/internal/bytes/threshold.go +++ /dev/null @@ -1,102 +0,0 @@ -// Copyright The OpenTelemetry Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package bytes - -import ( - "bytes" - "encoding/hex" - "errors" - "strconv" - "strings" -) - -const ( - // MaxAdjustedCount is 2^56 i.e. 0x100000000000000 i.e., 1<<56. - MaxAdjustedCount = 1 << 56 - - // NumHexDigits is the number of hex digits equalling 56 bits. - NumHexDigits = 56 / 4 - - hexBase = 16 -) - -// Threshold used to compare with the least-significant 7 bytes of the TraceID. -type Threshold struct { - bytes [8]byte -} - -var ( - // ErrTValueSize is returned for t-values longer than NumHexDigits hex digits. - ErrTValueSize = errors.New("t-value exceeds 14 hex digits") - - NeverSampleThreshold = Threshold{bytes: [8]byte{0, 0, 0, 0, 0, 0, 0, 0}} - AlwaysSampleThreshold = Threshold{bytes: [8]byte{1, 0, 0, 0, 0, 0, 0, 0}} - - hex14Zeros = func() (r [NumHexDigits]byte) { - for i := range r { - r[i] = '0' - } - return - }() -) - -// TValueToThreshold returns a Threshold, see Threshold.ShouldSample(TraceID). -func TValueToThreshold(s string) (Threshold, error) { - if len(s) > NumHexDigits { - return AlwaysSampleThreshold, ErrTValueSize - } - if len(s) == 0 { - return AlwaysSampleThreshold, nil - } - - // Fill with padding, then copy most-significant hex digits. - hexPadded := hex14Zeros - copy(hexPadded[0:len(s)], s) - - var th Threshold - if _, err := hex.Decode(th.bytes[1:], hexPadded[:]); err != nil { - return AlwaysSampleThreshold, strconv.ErrSyntax // ErrSyntax for consistency w/ ../unsigned - } - return th, nil -} - -func (th Threshold) TValue() string { - // Special cases - switch { - case th == AlwaysSampleThreshold: - return "" - case th == NeverSampleThreshold: - return "0" - } - - var hexDigits [14]byte - _ = hex.Encode(hexDigits[:], th.bytes[1:]) - return strings.TrimRight(string(hexDigits[:]), "0") -} - -// ShouldSample returns true when the span passes this sampler's -// consistent sampling decision. -func (t Threshold) ShouldSample(rnd Randomness) bool { - if t == AlwaysSampleThreshold { - // 100% sampling case - return true - } - return bytes.Compare(rnd.bytes[1:], t.bytes[1:]) < 0 -} - -func ThresholdLessThan(a, b Threshold) bool { - // Note full 8 byte compare - return bytes.Compare(a.bytes[:], b.bytes[:]) < 0 -} diff --git a/pkg/sampling/internal/unsigned/randomness.go b/pkg/sampling/internal/unsigned/randomness.go deleted file mode 100644 index 1252979098fa..000000000000 --- a/pkg/sampling/internal/unsigned/randomness.go +++ /dev/null @@ -1,62 +0,0 @@ -package unsigned - -import ( - "encoding/binary" - "errors" - "strconv" - - "go.opentelemetry.io/collector/pdata/pcommon" -) - -// ErrRValueSize is returned for r-values != NumHexDigits hex digits. -var ErrRValueSize = errors.New("r-value must have 14 hex digits") - -// LeastHalfTraceIDThresholdMask is the mask to use on the -// least-significant half of the TraceID, i.e., bytes 8-15. -// Because this is a 56 bit mask, the result after masking is -// the unsigned value of bytes 9 through 15. -const LeastHalfTraceIDThresholdMask = MaxAdjustedCount - 1 - -// Randomness may be derived from R-value or TraceID. -type Randomness struct { - // unsigned is in the range [0, MaxAdjustedCount-1] - unsigned uint64 -} - -// RandomnessFromTraceID returns randomness from a TraceID (assumes -// the traceparent random flag was set). -func RandomnessFromTraceID(id pcommon.TraceID) Randomness { - return Randomness{ - unsigned: binary.BigEndian.Uint64(id[8:]) & LeastHalfTraceIDThresholdMask, - } -} - -// RandomnessFromBits returns randomness from 56 random bits. -func RandomnessFromBits(bits uint64) Randomness { - return Randomness{ - unsigned: bits & LeastHalfTraceIDThresholdMask, - } -} - -// RValueToRandomness parses NumHexDigits hex bytes into a Randomness. -func RValueToRandomness(s string) (Randomness, error) { - if len(s) != NumHexDigits { - return Randomness{}, ErrRValueSize - } - - unsigned, err := strconv.ParseUint(s, hexBase, 64) - if err != nil { - return Randomness{}, err - } - - return Randomness{ - unsigned: unsigned, - }, nil -} - -func (rnd Randomness) ToRValue() string { - // Note: adding MaxAdjustedCount then removing the leading byte accomplishes - // zero padding. - return strconv.FormatUint(MaxAdjustedCount+rnd.unsigned, hexBase)[1:] - -} diff --git a/pkg/sampling/oteltracestate.go b/pkg/sampling/oteltracestate.go index d379192db988..a0d0732ef361 100644 --- a/pkg/sampling/oteltracestate.go +++ b/pkg/sampling/oteltracestate.go @@ -1,6 +1,10 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + package sampling import ( + "fmt" "io" "regexp" "strconv" @@ -18,9 +22,9 @@ type OTelTraceState struct { const ( // RName is the OTel tracestate field for R-value - RName = "rv" + RName = "r" // TName is the OTel tracestate field for T-value - TName = "th" + TName = "t" // hardMaxOTelLength is the maximum encoded size of an OTel // tracestate value. @@ -49,15 +53,27 @@ var ( separator: ';', equality: ':', } + + // ErrInconsistentSampling is returned when a sampler update + // is illogical. It is safe to ignore. Samplers should avoid + // this condition using a ThresholdLessThan() test. + ErrInconsistentSampling = fmt.Errorf("cannot raise existing sampling probability") + ErrInconsistentZero = fmt.Errorf("cannot zero sampling probability") ) -func NewOTelTraceState(input string) (otts OTelTraceState, _ error) { +func NewOTelTraceState(input string) (OTelTraceState, error) { + // Note: the default value has threshold == 0 and tvalue == "". + // It is important to recognize this as always-sample, meaning + // to check HasTValue() before using TValueThreshold(), since + // TValueThreshold() == NeverSampleThreshold when !HasTValue(). + otts := OTelTraceState{} + if len(input) > hardMaxOTelLength { return otts, ErrTraceStateSize } if !otelTracestateRe.MatchString(input) { - return OTelTraceState{}, strconv.ErrSyntax + return otts, strconv.ErrSyntax } err := otelSyntax.scanKeyValues(input, func(key, value string) error { @@ -70,12 +86,14 @@ func NewOTelTraceState(input string) (otts OTelTraceState, _ error) { // The zero-value for randomness implies always-sample; // the threshold test is R < T, but T is not meaningful // at zero, and this value implies zero adjusted count. + otts.rvalue = "" otts.rnd = Randomness{} } case TName: if otts.threshold, err = TValueToThreshold(value); err == nil { otts.tvalue = value } else { + otts.tvalue = "" otts.threshold = AlwaysSampleThreshold } default: @@ -106,8 +124,8 @@ func (otts *OTelTraceState) HasTValue() bool { return otts.tvalue != "" } -func (otts *OTelTraceState) HasNonZeroTValue() bool { - return otts.HasTValue() && otts.TValueThreshold() != NeverSampleThreshold +func (otts *OTelTraceState) HasZeroTValue() bool { + return otts.HasTValue() && otts.TValueThreshold() == NeverSampleThreshold } func (otts *OTelTraceState) TValue() string { @@ -118,22 +136,36 @@ func (otts *OTelTraceState) TValueThreshold() Threshold { return otts.threshold } -func (otts *OTelTraceState) SetTValue(threshold Threshold, encoded string) { - otts.threshold = threshold - otts.tvalue = encoded +func (otts *OTelTraceState) UpdateTValueWithSampling(sampledThreshold Threshold, encodedTValue string) error { + if otts.HasTValue() && ThresholdLessThan(otts.threshold, sampledThreshold) { + return ErrInconsistentSampling + } + otts.threshold = sampledThreshold + otts.tvalue = encodedTValue + return nil +} + +func (otts *OTelTraceState) AdjustedCount() float64 { + if !otts.HasTValue() { + return 1 + } + if otts.TValueThreshold() == NeverSampleThreshold { + return 0 + } + return 1.0 / otts.threshold.Probability() } -func (otts *OTelTraceState) UnsetTValue() { +func (otts *OTelTraceState) ClearTValue() { otts.tvalue = "" otts.threshold = Threshold{} } func (otts *OTelTraceState) SetRValue(randomness Randomness) { otts.rnd = randomness - otts.rvalue = randomness.ToRValue() + otts.rvalue = randomness.RValue() } -func (otts *OTelTraceState) UnsetRValue() { +func (otts *OTelTraceState) ClearRValue() { otts.rvalue = "" otts.rnd = Randomness{} } @@ -142,30 +174,32 @@ func (otts *OTelTraceState) HasAnyValue() bool { return otts.HasRValue() || otts.HasTValue() || otts.HasExtraValues() } -func (otts *OTelTraceState) Serialize(w io.StringWriter) { +func (otts *OTelTraceState) Serialize(w io.StringWriter) error { + ser := serializer{writer: w} cnt := 0 sep := func() { if cnt != 0 { - w.WriteString(";") + ser.write(";") } cnt++ } if otts.HasRValue() { sep() - w.WriteString(RName) - w.WriteString(":") - w.WriteString(otts.RValue()) + ser.write(RName) + ser.write(":") + ser.write(otts.RValue()) } if otts.HasTValue() { sep() - w.WriteString(TName) - w.WriteString(":") - w.WriteString(otts.TValue()) + ser.write(TName) + ser.write(":") + ser.write(otts.TValue()) } for _, kv := range otts.ExtraValues() { sep() - w.WriteString(kv.Key) - w.WriteString(":") - w.WriteString(kv.Value) + ser.write(kv.Key) + ser.write(":") + ser.write(kv.Value) } + return ser.err } diff --git a/pkg/sampling/oteltracestate_test.go b/pkg/sampling/oteltracestate_test.go index 5ed1a3187e20..cb351ffaad4a 100644 --- a/pkg/sampling/oteltracestate_test.go +++ b/pkg/sampling/oteltracestate_test.go @@ -1,16 +1,5 @@ // Copyright The OpenTelemetry Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +// SPDX-License-Identifier: Apache-2.0 package sampling @@ -24,11 +13,10 @@ import ( ) func testName(in string) string { - x := strings.NewReplacer(":", "_", ";", "_").Replace(in) - if len(x) > 32 { - return "" + if len(in) > 32 { + return in[:32] + "..." } - return x + return in } func TestEmptyOTelTraceState(t *testing.T) { @@ -38,14 +26,16 @@ func TestEmptyOTelTraceState(t *testing.T) { } func TestOTelTraceStateTValueSerialize(t *testing.T) { - const orig = "rv:10000000000000;th:3;a:b;c:d" + const orig = "r:10000000000000;t:3;a:b;c:d" otts, err := NewOTelTraceState(orig) require.NoError(t, err) require.True(t, otts.HasTValue()) require.Equal(t, "3", otts.TValue()) + require.Equal(t, 0x3p-4, otts.TValueThreshold().Probability()) require.True(t, otts.HasRValue()) require.Equal(t, "10000000000000", otts.RValue()) + require.Equal(t, "10000000000000", otts.RValueRandomness().RValue()) require.True(t, otts.HasAnyValue()) var w strings.Builder @@ -53,6 +43,89 @@ func TestOTelTraceStateTValueSerialize(t *testing.T) { require.Equal(t, orig, w.String()) } +func TestOTelTraceStateZeroAdjustedCount(t *testing.T) { + const orig = "t:0" + otts, err := NewOTelTraceState(orig) + require.NoError(t, err) + require.True(t, otts.HasAnyValue()) + require.True(t, otts.HasTValue()) + require.True(t, otts.HasZeroTValue()) + require.Equal(t, "0", otts.TValue()) + require.Equal(t, 0.0, otts.TValueThreshold().Probability()) + + var w strings.Builder + otts.Serialize(&w) + require.Equal(t, orig, w.String()) +} + +func TestOTelTraceStateRValuePValue(t *testing.T) { + // Ensures the caller can handle RValueSizeError and search + // for p-value in extra-values. + const orig = "r:3;p:2" + otts, err := NewOTelTraceState(orig) + require.Error(t, err) + require.True(t, errors.Is(err, RValueSizeError("3"))) + require.False(t, otts.HasRValue()) + + // The error is oblivious to the old r-value, but that's ok. + require.Contains(t, err.Error(), "14 hex digits") + + require.Equal(t, []KV{{"p", "2"}}, otts.ExtraValues()) + + var w strings.Builder + otts.Serialize(&w) + require.Equal(t, "p:2", w.String()) +} + +func TestOTelTraceStateTValueUpdate(t *testing.T) { + const orig = "r:abcdefabcdefab" + otts, err := NewOTelTraceState(orig) + require.NoError(t, err) + require.False(t, otts.HasTValue()) + require.True(t, otts.HasRValue()) + + th, _ := TValueToThreshold("3") + require.NoError(t, otts.UpdateTValueWithSampling(th, "3")) + + require.Equal(t, "3", otts.TValue()) + require.Equal(t, 0x3p-4, otts.TValueThreshold().Probability()) + + const updated = "r:abcdefabcdefab;t:3" + var w strings.Builder + otts.Serialize(&w) + require.Equal(t, updated, w.String()) +} + +func TestOTelTraceStateRTUpdate(t *testing.T) { + otts, err := NewOTelTraceState("a:b") + require.NoError(t, err) + require.False(t, otts.HasTValue()) + require.False(t, otts.HasRValue()) + require.True(t, otts.HasAnyValue()) + + th, _ := TValueToThreshold("3") + require.NoError(t, otts.UpdateTValueWithSampling(th, "3")) + otts.SetRValue(must(RValueToRandomness("00000000000003"))) + + const updated = "r:00000000000003;t:3;a:b" + var w strings.Builder + otts.Serialize(&w) + require.Equal(t, updated, w.String()) +} + +func TestOTelTraceStateRTClear(t *testing.T) { + otts, err := NewOTelTraceState("a:b;r:12341234123412;t:1234") + require.NoError(t, err) + + otts.ClearTValue() + otts.ClearRValue() + + const updated = "a:b" + var w strings.Builder + otts.Serialize(&w) + require.Equal(t, updated, w.String()) +} + func TestParseOTelTraceState(t *testing.T) { type testCase struct { in string @@ -64,32 +137,32 @@ func TestParseOTelTraceState(t *testing.T) { const ns = "" for _, test := range []testCase{ // t-value correct cases - {"th:2", ns, "2", nil, nil}, - {"th:1", ns, "1", nil, nil}, - {"th:1", ns, "1", nil, nil}, - {"th:10", ns, "10", nil, nil}, - {"th:33", ns, "33", nil, nil}, - {"th:ab", ns, "ab", nil, nil}, - {"th:61", ns, "61", nil, nil}, + {"t:2", ns, "2", nil, nil}, + {"t:1", ns, "1", nil, nil}, + {"t:1", ns, "1", nil, nil}, + {"t:10", ns, "10", nil, nil}, + {"t:33", ns, "33", nil, nil}, + {"t:ab", ns, "ab", nil, nil}, + {"t:61", ns, "61", nil, nil}, // syntax errors {"", ns, ns, nil, strconv.ErrSyntax}, - {"th:1;", ns, ns, nil, strconv.ErrSyntax}, - {"th:1=p:2", ns, ns, nil, strconv.ErrSyntax}, - {"th:1;p:2=s:3", ns, ns, nil, strconv.ErrSyntax}, + {"t:1;", ns, ns, nil, strconv.ErrSyntax}, + {"t:1=p:2", ns, ns, nil, strconv.ErrSyntax}, + {"t:1;p:2=s:3", ns, ns, nil, strconv.ErrSyntax}, {":1;p:2=s:3", ns, ns, nil, strconv.ErrSyntax}, {":;p:2=s:3", ns, ns, nil, strconv.ErrSyntax}, {":;:", ns, ns, nil, strconv.ErrSyntax}, {":", ns, ns, nil, strconv.ErrSyntax}, - {"th:;p=1", ns, ns, nil, strconv.ErrSyntax}, - {"th:$", ns, ns, nil, strconv.ErrSyntax}, // not-hexadecimal - {"th:0x1p+3", ns, ns, nil, strconv.ErrSyntax}, // + is invalid - {"th:14.5", ns, ns, nil, strconv.ErrSyntax}, // integer syntax - {"th:-1", ns, ns, nil, strconv.ErrSyntax}, // non-negative + {"t:;p=1", ns, ns, nil, strconv.ErrSyntax}, + {"t:$", ns, ns, nil, strconv.ErrSyntax}, // not-hexadecimal + {"t:0x1p+3", ns, ns, nil, strconv.ErrSyntax}, // + is invalid + {"t:14.5", ns, ns, nil, strconv.ErrSyntax}, // integer syntax + {"t:-1", ns, ns, nil, strconv.ErrSyntax}, // non-negative // too many digits - {"th:ffffffffffffffff", ns, ns, nil, ErrTValueSize}, - {"th:100000000000000", ns, ns, nil, ErrTValueSize}, + {"t:ffffffffffffffff", ns, ns, nil, ErrTValueSize}, + {"t:100000000000000", ns, ns, nil, ErrTValueSize}, // one field {"e100:1", ns, ns, []string{"e100:1"}, nil}, @@ -99,13 +172,13 @@ func TestParseOTelTraceState(t *testing.T) { {"e1:1;e2:2", ns, ns, []string{"e1:1", "e2:2"}, nil}, // one extra key, two ways - {"th:2;extra:stuff", ns, "2", []string{"extra:stuff"}, nil}, - {"extra:stuff;th:2", ns, "2", []string{"extra:stuff"}, nil}, + {"t:2;extra:stuff", ns, "2", []string{"extra:stuff"}, nil}, + {"extra:stuff;t:2", ns, "2", []string{"extra:stuff"}, nil}, // two extra fields - {"e100:100;th:1;e101:101", ns, "1", []string{"e100:100", "e101:101"}, nil}, - {"th:1;e100:100;e101:101", ns, "1", []string{"e100:100", "e101:101"}, nil}, - {"e100:100;e101:101;th:1", ns, "1", []string{"e100:100", "e101:101"}, nil}, + {"e100:100;t:1;e101:101", ns, "1", []string{"e100:100", "e101:101"}, nil}, + {"t:1;e100:100;e101:101", ns, "1", []string{"e100:100", "e101:101"}, nil}, + {"e100:100;e101:101;t:1", ns, "1", []string{"e100:100", "e101:101"}, nil}, // parse error prevents capturing unrecognized keys {"1:1;u:V", ns, ns, nil, strconv.ErrSyntax}, @@ -113,15 +186,15 @@ func TestParseOTelTraceState(t *testing.T) { {"x:1;u:V", ns, ns, []string{"x:1", "u:V"}, nil}, // r-value - {"rv:22222222222222;extra:stuff", "22222222222222", ns, []string{"extra:stuff"}, nil}, - {"extra:stuff;rv:22222222222222", "22222222222222", ns, []string{"extra:stuff"}, nil}, - {"rv:ffffffffffffff", "ffffffffffffff", ns, nil, nil}, - {"rv:88888888888888", "88888888888888", ns, nil, nil}, - {"rv:00000000000000", "00000000000000", ns, nil, nil}, + {"r:22222222222222;extra:stuff", "22222222222222", ns, []string{"extra:stuff"}, nil}, + {"extra:stuff;r:22222222222222", "22222222222222", ns, []string{"extra:stuff"}, nil}, + {"r:ffffffffffffff", "ffffffffffffff", ns, nil, nil}, + {"r:88888888888888", "88888888888888", ns, nil, nil}, + {"r:00000000000000", "00000000000000", ns, nil, nil}, // r-value range error (15 bytes of hex or more) - {"rv:100000000000000", ns, ns, nil, ErrRValueSize}, - {"rv:fffffffffffffffff", ns, ns, nil, ErrRValueSize}, + {"r:100000000000000", ns, ns, nil, RValueSizeError("100000000000000")}, + {"r:fffffffffffffffff", ns, ns, nil, RValueSizeError("fffffffffffffffff")}, // no trailing ; {"x:1;", ns, ns, nil, strconv.ErrSyntax}, @@ -131,7 +204,7 @@ func TestParseOTelTraceState(t *testing.T) { // charset test {"x:0X1FFF;y:.-_-.;z:", ns, ns, []string{"x:0X1FFF", "y:.-_-.", "z:"}, nil}, - {"x1y2z3:1-2-3;y1:y_1;xy:-;th:50", ns, "50", []string{"x1y2z3:1-2-3", "y1:y_1", "xy:-"}, nil}, + {"x1y2z3:1-2-3;y1:y_1;xy:-;t:50", ns, "50", []string{"x1y2z3:1-2-3", "y1:y_1", "xy:-"}, nil}, // size exceeded {"x:" + strings.Repeat("_", 255), ns, ns, nil, ErrTraceStateSize}, @@ -180,3 +253,79 @@ func TestParseOTelTraceState(t *testing.T) { }) } } + +func TestUpdateTValueWithSampling(t *testing.T) { + type testCase struct { + // The input otel tracestate; no error conditions tested + in string + + // The incoming adjusted count; defined whether + // t-value is present or not. + adjCountIn float64 + + // the update probability; threshold and tvalue are + // derived from this + prob float64 + + // when update error is expected + updateErr error + + // output t-value + out string + + // output adjusted count + adjCountOut float64 + } + for _, test := range []testCase{ + // 8/16 in, 2/16 out + {"t:8", 2, 0x2p-4, nil, "t:2", 8}, + + // 1/16 in, 50% update (error) + {"t:1", 16, 0x8p-4, ErrInconsistentSampling, "t:1", 16}, + + // no sampling in, 1/16 update + {"", 1, 0x1p-4, nil, "t:1", 16}, + + // zero adj count in, 1/16 update (error) + {"t:0", 0, 0x1p-4, ErrInconsistentSampling, "t:0", 0}, + + // none in, 0% update + {"t:0", 0, 0, nil, "t:0", 0}, + + // 8/16 in, zero update + {"t:8", 2, 0, nil, "t:0", 0}, + + // none in, 100% update + {"", 1, 1, nil, "", 1}, + + // 1/2 in, 100% update (error) + {"t:8", 2, 1, ErrInconsistentSampling, "t:8", 2}, + } { + t.Run(test.in+"/"+test.out, func(t *testing.T) { + otts := OTelTraceState{} + if test.in != "" { + var err error + otts, err = NewOTelTraceState(test.in) + require.NoError(t, err) + } + + require.Equal(t, test.adjCountIn, otts.AdjustedCount()) + + newTh, err := ProbabilityToThreshold(test.prob) + require.NoError(t, err) + + upErr := otts.UpdateTValueWithSampling(newTh, newTh.TValue()) + + if test.updateErr != nil { + require.Equal(t, test.updateErr, upErr) + } + + var outData strings.Builder + err = otts.Serialize(&outData) + require.NoError(t, err) + require.Equal(t, test.out, outData.String()) + + require.Equal(t, test.adjCountOut, otts.AdjustedCount()) + }) + } +} diff --git a/pkg/sampling/internal/unsigned/probability.go b/pkg/sampling/probability.go similarity index 90% rename from pkg/sampling/internal/unsigned/probability.go rename to pkg/sampling/probability.go index 3f2d9656f2cd..118d8121191d 100644 --- a/pkg/sampling/internal/unsigned/probability.go +++ b/pkg/sampling/probability.go @@ -1,4 +1,7 @@ -package unsigned +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package sampling import ( "errors" diff --git a/pkg/sampling/randomness.go b/pkg/sampling/randomness.go new file mode 100644 index 000000000000..4a1c69e26c2a --- /dev/null +++ b/pkg/sampling/randomness.go @@ -0,0 +1,95 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package sampling + +import ( + "encoding/binary" + "fmt" + "strconv" + + "go.opentelemetry.io/collector/pdata/pcommon" +) + +// RValueSizeError indicates the size was not 14 bytes. This may allow +// parsing the legacy r-value. +type RValueSizeError string + +// numRandomnessValues equals MaxAdjustedCount--this variable has been +// introduced to improve readability. Recall that MaxAdjustedCount is +// 2**56 which is one greater than the maximum RValue +// ("ffffffffffffff", i.e., "100000000000000"). +const numRandomnessValues = MaxAdjustedCount + +// Error indicates that 14 bytes are needed. +func (r RValueSizeError) Error() string { + return fmt.Sprintf("r-value must have 14 hex digits: %q", string(r)) +} + +// LeastHalfTraceIDThresholdMask is the mask to use on the +// least-significant half of the TraceID, i.e., bytes 8-15. +// Because this is a 56 bit mask, the result after masking is +// the unsigned value of bytes 9 through 15. +// +// This helps extract 56 bits of randomness from the second half of +// the TraceID, as specified in https://www.w3.org/TR/trace-context-2/#randomness-of-trace-id +const LeastHalfTraceIDThresholdMask = MaxAdjustedCount - 1 + +// Randomness may be derived from R-value or TraceID. +// +// Randomness contains 56 bits of randomness, derived in one of two ways, see: +// https://www.w3.org/TR/trace-context-2/#randomness-of-trace-id +type Randomness struct { + // unsigned is in the range [0, MaxAdjustedCount-1] + unsigned uint64 +} + +// TraceIDToRandomness returns randomness from a TraceID (assumes +// the traceparent random flag was set). +func TraceIDToRandomness(id pcommon.TraceID) Randomness { + // To get the 56 bits we want, take the second half of the trace ID, + leastHalf := binary.BigEndian.Uint64(id[8:]) + return Randomness{ + // Then apply the mask to get the least-significant 56 bits / 7 bytes. + // Equivalently stated: zero the most-significant 8 bits. + unsigned: leastHalf & LeastHalfTraceIDThresholdMask, + } +} + +// RValueToRandomness parses NumHexDigits hex bytes into a Randomness. +func RValueToRandomness(s string) (Randomness, error) { + if len(s) != NumHexDigits { + return Randomness{}, RValueSizeError(s) + } + + unsigned, err := strconv.ParseUint(s, hexBase, 64) + if err != nil { + return Randomness{}, err + } + + return Randomness{ + unsigned: unsigned, + }, nil +} + +// ToRValue formats the r-value encoding. +func (rnd Randomness) RValue() string { + // The important part here is to format a full 14-byte hex + // string, including leading zeros. We could accomplish the + // same with custom code or with fmt.Sprintf directives, but + // here we let strconv.FormatUint fill in leading zeros, as + // follows: + // + // Format (numRandomnessValues+Randomness) as a hex string + // Strip the leading hex digit, which is a "1" by design + // + // For example, a randomness that requires two leading zeros + // (all in hexadecimal): + // + // randomness is 7 bytes: aabbccddeeff + // numRandomnessValues is 2^56: 100000000000000 + // randomness+numRandomnessValues: 100aabbccddeeff + // strip the leading "1": 00aabbccddeeff + return strconv.FormatUint(numRandomnessValues+rnd.unsigned, hexBase)[1:] + +} diff --git a/pkg/sampling/internal/unsigned/threshold.go b/pkg/sampling/threshold.go similarity index 56% rename from pkg/sampling/internal/unsigned/threshold.go rename to pkg/sampling/threshold.go index 229ed2efc98b..fae4d71da27e 100644 --- a/pkg/sampling/internal/unsigned/threshold.go +++ b/pkg/sampling/threshold.go @@ -1,18 +1,7 @@ // Copyright The OpenTelemetry Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package unsigned +// SPDX-License-Identifier: Apache-2.0 + +package sampling import ( "errors" @@ -25,8 +14,9 @@ const ( MaxAdjustedCount = 1 << 56 // NumHexDigits is the number of hex digits equalling 56 bits. - NumHexDigits = 56 / 4 + NumHexDigits = 56 / hexBits + hexBits = 4 hexBase = 16 ) @@ -47,7 +37,8 @@ var ( AlwaysSampleThreshold = Threshold{unsigned: MaxAdjustedCount} ) -// TValueToThreshold returns a Threshold, see Threshold.ShouldSample(TraceID). +// TValueToThreshold returns a Threshold. Because TValue strings +// have trailing zeros omitted, this function performs the reverse. func TValueToThreshold(s string) (Threshold, error) { if len(s) > NumHexDigits { return AlwaysSampleThreshold, ErrTValueSize @@ -56,36 +47,45 @@ func TValueToThreshold(s string) (Threshold, error) { return AlwaysSampleThreshold, nil } - // Note that this handles zero correctly, but the inverse - // operation does not. I.e., "0" parses as unsigned == 0. + // Having checked length above, there are no range errors + // possible. Parse the hex string to an unsigned valued. unsigned, err := strconv.ParseUint(s, hexBase, 64) if err != nil { - return AlwaysSampleThreshold, err + return AlwaysSampleThreshold, err // e.g. parse error } - // Zero-padding is done by shifting 4 bits per absent hex digit. - extend := NumHexDigits - len(s) + // The unsigned value requires shifting to account for the + // trailing zeros that were omitted by the encoding (see + // TValue for the reverse). Compute the number to shift by: + extendByHexZeros := NumHexDigits - len(s) return Threshold{ - unsigned: unsigned << (4 * extend), + unsigned: unsigned << (hexBits * extendByHexZeros), }, nil } +// TValue encodes a threshold, which is a variable-length hex string +// up to 14 characters. The empty string is returned for 100% +// sampling. func (th Threshold) TValue() string { // Special cases switch th.unsigned { case MaxAdjustedCount: - // 100% sampling + // 100% sampling. Samplers are specified not to + // include a TValue in this case. return "" case 0: - // 0% sampling. This is a special case, otherwise, the TrimRight - // below will return an empty matching the case above. + // 0% sampling. This is a special case, otherwise, + // the TrimRight below will return an empty string + // matching the case above. return "0" } - // Add MaxAdjustedCount yields 15 hex digits with a leading "1". - allBits := MaxAdjustedCount + th.unsigned - // Then format and remove the most-significant hex digit. - digits := strconv.FormatUint(allBits, hexBase)[1:] - // Leaving NumHexDigits hex digits, with trailing zeros removed. + + // For thresholds other than the extremes, format a full-width + // (14 digit) unsigned value with leading zeros, then, remove + // the trailing zeros. Use the logic for (Randomness).RValue(). + digits := Randomness(th).RValue() + + // Remove trailing zeros. return strings.TrimRight(digits, "0") } diff --git a/pkg/sampling/w3ctracestate.go b/pkg/sampling/w3ctracestate.go index cd952f48791a..855ddb291339 100644 --- a/pkg/sampling/w3ctracestate.go +++ b/pkg/sampling/w3ctracestate.go @@ -1,9 +1,13 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + package sampling import ( "io" "regexp" "strconv" + "strings" ) type W3CTraceState struct { @@ -12,7 +16,13 @@ type W3CTraceState struct { } const ( - hardMaxW3CLength = 1024 + hardMaxNumPairs = 32 + hardMaxW3CLength = 1024 + hardMaxKeyLength = 256 + hardMaxTenantLength = 241 + hardMaxSystemLength = 14 + + otelVendorCode = "ot" // keyRegexp is not an exact test, it permits all the // characters and then we check various conditions. @@ -27,10 +37,11 @@ const ( lcAlphaRegexp = `[a-z]` lcDigitPunctRegexp = `[a-z0-9\-\*/_]` lcDigitRegexp = `[a-z0-9]` - tenantIDRegexp = lcDigitRegexp + lcDigitPunctRegexp + `{0,240}` - systemIDRegexp = lcAlphaRegexp + lcDigitPunctRegexp + `{0,13}` - multiTenantKeyRegexp = tenantIDRegexp + `@` + systemIDRegexp - simpleKeyRegexp = lcAlphaRegexp + lcDigitPunctRegexp + `{0,255}` + multiTenantSep = `@` + tenantIDRegexp = lcDigitRegexp + lcDigitPunctRegexp + `*` // could be {0,hardMaxTenantLength-1} + systemIDRegexp = lcAlphaRegexp + lcDigitPunctRegexp + `*` // could be {0,hardMaxSystemLength-1} + multiTenantKeyRegexp = tenantIDRegexp + multiTenantSep + systemIDRegexp + simpleKeyRegexp = lcAlphaRegexp + lcDigitPunctRegexp + `*` // could be {0,hardMaxKeyLength-1} keyRegexp = `(?:(?:` + simpleKeyRegexp + `)|(?:` + multiTenantKeyRegexp + `))` // value = 0*255(chr) nblk-chr @@ -49,22 +60,23 @@ const ( // list-member = (key "=" value) / OWS owsCharSet = ` \t` - owsRegexp = `[` + owsCharSet + `]*` - w3cMemberRegexp = `(?:` + keyRegexp + `=` + valueRegexp + `)|(?:` + owsRegexp + `)` + owsRegexp = `(?:[` + owsCharSet + `]*)` + w3cMemberRegexp = `(?:` + keyRegexp + `=` + valueRegexp + `)?` // This regexp is large enough that regexp impl refuses to // make 31 copies of it (i.e., `{0,31}`) so we use `*` below. - w3cOwsCommaMemberRegexp = `(?:` + owsRegexp + `,` + owsRegexp + w3cMemberRegexp + `)` + w3cOwsMemberOwsRegexp = `(?:` + owsRegexp + w3cMemberRegexp + owsRegexp + `)` + w3cCommaOwsMemberOwsRegexp = `(?:` + `,` + w3cOwsMemberOwsRegexp + `)` // The limit to 31 of owsCommaMemberRegexp is applied in code. - w3cTracestateRegexp = `^` + w3cMemberRegexp + w3cOwsCommaMemberRegexp + `*$` + w3cTracestateRegexp = `^` + w3cOwsMemberOwsRegexp + w3cCommaOwsMemberOwsRegexp + `*$` ) var ( w3cTracestateRe = regexp.MustCompile(w3cTracestateRegexp) w3cSyntax = keyValueScanner{ - maxItems: 32, + maxItems: hardMaxNumPairs, trim: true, separator: ',', equality: '=', @@ -81,8 +93,19 @@ func NewW3CTraceState(input string) (w3c W3CTraceState, _ error) { } err := w3cSyntax.scanKeyValues(input, func(key, value string) error { + if len(key) > hardMaxKeyLength { + return ErrTraceStateSize + } + if tenant, system, found := strings.Cut(key, multiTenantSep); found { + if len(tenant) > hardMaxTenantLength { + return ErrTraceStateSize + } + if len(system) > hardMaxSystemLength { + return ErrTraceStateSize + } + } switch key { - case "ot": + case otelVendorCode: var err error w3c.otts, err = NewOTelTraceState(value) return err @@ -109,23 +132,25 @@ func (w3c *W3CTraceState) HasOTelValue() bool { return w3c.otts.HasAnyValue() } -func (w3c *W3CTraceState) Serialize(w io.StringWriter) { +func (w3c *W3CTraceState) Serialize(w io.StringWriter) error { + ser := serializer{writer: w} cnt := 0 sep := func() { if cnt != 0 { - w.WriteString(",") + ser.write(",") } cnt++ } if w3c.otts.HasAnyValue() { sep() - w.WriteString("ot=") - w3c.otts.Serialize(w) + ser.write("ot=") + ser.check(w3c.otts.Serialize(w)) } for _, kv := range w3c.ExtraValues() { sep() - w.WriteString(kv.Key) - w.WriteString("=") - w.WriteString(kv.Value) + ser.write(kv.Key) + ser.write("=") + ser.write(kv.Value) } + return ser.err } diff --git a/pkg/sampling/w3ctracestate_test.go b/pkg/sampling/w3ctracestate_test.go index 4a9ab2ca2869..d5a1eef5ec2a 100644 --- a/pkg/sampling/w3ctracestate_test.go +++ b/pkg/sampling/w3ctracestate_test.go @@ -1,86 +1,107 @@ // Copyright The OpenTelemetry Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +// SPDX-License-Identifier: Apache-2.0 package sampling -// import ( -// "errors" -// "strings" -// "testing" +import ( + "errors" + "strconv" + "strings" + "testing" -// "github.com/stretchr/testify/require" -// ) + "github.com/stretchr/testify/require" +) -// func TestParseW3CTraceState(t *testing.T) { -// type testCase struct { -// in string -// rval string -// sval string -// tval string -// expectErr error -// } -// const ns = "" -// for _, test := range []testCase{ -// // correct cases -// {"ot=t:1", ns, ns, "1", nil}, -// {"ot=t:100", ns, ns, "100", nil}, -// {"ot=s:100;t:200", ns, "100", "200", nil}, -// {"ot=r:1", "1", ns, ns, nil}, -// {"ot=r:1,unknown:value,other=something", "1", ns, ns, nil}, -// } { -// t.Run(testName(test.in), func(t *testing.T) { -// w3c, err := NewW3CTraceState(test.in) +func TestParseW3CTraceState(t *testing.T) { + type testCase struct { + in string + rval string + tval string + extra map[string]string + expectErr error + } + const ns = "" + for _, test := range []testCase{ + // correct cases + {"ot=t:1", ns, "1", nil, nil}, + {" ot=t:1 ", ns, "1", nil, nil}, + {"ot=t:1", ns, "1", nil, nil}, + {" ot=t:1 ", ns, "1", nil, nil}, + {" ot=t:1,other=value ", ns, "1", map[string]string{ + "other": "value", + }, nil}, + {"ot=t:1 , other=value", ns, "1", map[string]string{ + "other": "value", + }, nil}, + {",,,", ns, ns, nil, nil}, + {" , ot=t:1, , other=value ", ns, "1", map[string]string{ + "other": "value", + }, nil}, + {"ot=t:100;r:abcdabcdabcdff", "abcdabcdabcdff", "100", nil, nil}, + {" ot=t:100;r:abcdabcdabcdff", "abcdabcdabcdff", "100", nil, nil}, + {"ot=t:100;r:abcdabcdabcdff ", "abcdabcdabcdff", "100", nil, nil}, + {"ot=r:11111111111111", "11111111111111", ns, nil, nil}, + {"ot=r:ffffffffffffff,unknown=value,other=something", "ffffffffffffff", ns, map[string]string{ + "other": "something", + "unknown": "value", + }, nil}, -// if test.expectErr != nil { -// require.True(t, errors.Is(err, test.expectErr), -// "%q: not expecting %v wanted %v", test.in, err, test.expectErr, -// ) -// } else { -// require.NoError(t, err) -// } -// if test.rval != ns { -// require.True(t, w3c.HasOTelValue()) -// require.True(t, w3c.OTelValue().HasRValue()) -// require.Equal(t, test.rval, w3c.OTelValue().RValue()) -// } else { -// require.False(t, w3c.OTelValue().HasRValue(), "should have no r-value") -// } -// if test.sval != ns { -// require.True(t, w3c.HasOTelValue()) -// require.True(t, w3c.OTelValue().HasSValue()) -// require.Equal(t, test.sval, w3c.OTelValue().SValue()) -// } else { -// require.False(t, w3c.OTelValue().HasSValue(), "should have no s-value") -// } -// if test.tval != ns { -// require.True(t, w3c.HasOTelValue()) -// require.True(t, w3c.OTelValue().HasTValue()) -// require.Equal(t, test.tval, w3c.OTelValue().TValue()) -// } else { -// require.False(t, w3c.OTelValue().HasTValue(), "should have no t-value") -// } + // syntax errors + {"-1=2", ns, ns, nil, strconv.ErrSyntax}, // invalid key char + {"=", ns, ns, nil, strconv.ErrSyntax}, // invalid empty key -// if test.expectErr != nil { -// return -// } -// // on success Serialize() should not modify -// // test by re-parsing -// var w strings.Builder -// w3c.Serialize(&w) -// cpy, err := NewW3CTraceState(w.String()) -// require.NoError(t, err, "with %v", w.String()) -// require.Equal(t, w3c, cpy, "with %v", w.String()) -// }) -// } -// } + // size errors + {strings.Repeat("x", hardMaxKeyLength+1) + "=v", ns, ns, nil, ErrTraceStateSize}, // too long simple key + {strings.Repeat("x", hardMaxTenantLength+1) + "@y=v", ns, ns, nil, ErrTraceStateSize}, // too long multitenant-id + {"y@" + strings.Repeat("x", hardMaxSystemLength+1) + "=v", ns, ns, nil, ErrTraceStateSize}, // too long system-id + {"x=" + strings.Repeat("y", hardMaxW3CLength-1), ns, ns, nil, ErrTraceStateSize}, + {strings.Repeat("x=y,", hardMaxNumPairs) + "x=y", ns, ns, nil, ErrTraceStateSize}, + } { + t.Run(testName(test.in), func(t *testing.T) { + w3c, err := NewW3CTraceState(test.in) + + if test.expectErr != nil { + require.True(t, errors.Is(err, test.expectErr), + "%q: not expecting %v wanted %v", test.in, err, test.expectErr, + ) + } else { + require.NoError(t, err, "%q", test.in) + } + if test.rval != ns { + require.True(t, w3c.HasOTelValue()) + require.True(t, w3c.HasAnyValue()) + require.True(t, w3c.OTelValue().HasRValue()) + require.Equal(t, test.rval, w3c.OTelValue().RValue()) + } else { + require.False(t, w3c.OTelValue().HasRValue(), "should have no r-value") + } + if test.tval != ns { + require.True(t, w3c.HasOTelValue()) + require.True(t, w3c.HasAnyValue()) + require.True(t, w3c.OTelValue().HasTValue()) + require.Equal(t, test.tval, w3c.OTelValue().TValue()) + } else { + require.False(t, w3c.OTelValue().HasTValue(), "should have no t-value") + } + if test.extra != nil { + require.True(t, w3c.HasAnyValue()) + actual := map[string]string{} + for _, kv := range w3c.ExtraValues() { + actual[kv.Key] = kv.Value + } + require.Equal(t, test.extra, actual) + } + + if test.expectErr != nil { + return + } + // on success Serialize() should not modify + // test by re-parsing + var w strings.Builder + w3c.Serialize(&w) + cpy, err := NewW3CTraceState(w.String()) + require.NoError(t, err, "with %v", w.String()) + require.Equal(t, w3c, cpy, "with %v", w.String()) + }) + } +} From 396efb17905e144c2242b9169ba715af24d6cd1a Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 4 Oct 2023 14:13:40 -0700 Subject: [PATCH 25/38] wip --- pkg/sampling/threshold.go | 2 + .../tracesprocessor.go | 69 +++++++++---------- 2 files changed, 35 insertions(+), 36 deletions(-) diff --git a/pkg/sampling/threshold.go b/pkg/sampling/threshold.go index fae4d71da27e..f725847b355f 100644 --- a/pkg/sampling/threshold.go +++ b/pkg/sampling/threshold.go @@ -18,6 +18,8 @@ const ( hexBits = 4 hexBase = 16 + + NeverSampleTValue = "0" ) // Threshold used to compare with the least-significant 7 bytes of the TraceID. diff --git a/processor/probabilisticsamplerprocessor/tracesprocessor.go b/processor/probabilisticsamplerprocessor/tracesprocessor.go index d6abbec8960a..1225cacaa2c6 100644 --- a/processor/probabilisticsamplerprocessor/tracesprocessor.go +++ b/processor/probabilisticsamplerprocessor/tracesprocessor.go @@ -57,7 +57,7 @@ type traceSampler interface { // sampled, probabilistically or otherwise. The "should" parameter // is the result from decide(), for the span's TraceID, which // will not be recalculated. - updateTracestate(tid pcommon.TraceID, should bool, otts *sampling.OTelTraceState) + updateTracestate(tid pcommon.TraceID, should bool, otts *sampling.OTelTraceState) error } type traceProcessor struct { @@ -97,7 +97,8 @@ func randomnessFromSpan(s ptrace.Span) (sampling.Randomness, *sampling.W3CTraceS randomness = wts.OTelValue().RValueRandomness() } else if true /* s.Flags()&0x2 == 0x2 */ { // See https://github.com/open-telemetry/opentelemetry-proto/pull/503 - randomness = sampling.RandomnessFromTraceID(s.TraceID()) + // which merged but unreleased at the time of writing. + randomness = sampling.TraceIDToRandomness(s.TraceID()) } else { // Note: Creating an R-value here is the best we can // do. Issue a warning? This is OK-ish for head @@ -106,7 +107,12 @@ func randomnessFromSpan(s ptrace.Span) (sampling.Randomness, *sampling.W3CTraceS // T-value already, (TODO: is it better to just assume // the flag was set in a tail sampler? otherwise, // inconsistent results) - randomness = sampling.RandomnessFromBits(uint64(rand.Int63n(sampling.MaxAdjustedCount))) + randomness, _ = sampling.RValueToRandomness( + strconv.FormatUint( + sampling.MaxAdjustedCount+ + uint64(rand.Int63n(sampling.MaxAdjustedCount)), + 16)[1:], + ) wts.OTelValue().SetRValue(randomness) } return randomness, &wts, err @@ -181,8 +187,10 @@ func (ts *traceHashSampler) decide(s ptrace.Span) (bool, *sampling.W3CTraceState return decision, nil, nil } -func (ts *traceHashSampler) updateTracestate(tid pcommon.TraceID, should bool, otts *sampling.OTelTraceState) { - // No action, nothing is specified. +func (ts *traceHashSampler) updateTracestate(tid pcommon.TraceID, should bool, otts *sampling.OTelTraceState) error { + // Note: Sampling SIG will not like this idea. What about using + // r:00000000000000;t:{ProbabilityToThreshold(pct/100.0)}? + return nil } func (ts *traceEqualizer) decide(s ptrace.Span) (bool, *sampling.W3CTraceState, error) { @@ -195,37 +203,23 @@ func (ts *traceEqualizer) decide(s ptrace.Span) (bool, *sampling.W3CTraceState, // Consistency check: if the TraceID is out of range // (unless the TValue is zero), the TValue is a lie. // If inconsistent, clear it. - if !otts.TValueThreshold().ShouldSample(rnd) { - // Let this error log: we have a misconfigured - // upstream sampler and are unsetting its t-value. - // TODO: Note this will happen if we've made up - // the rvalue, some fraction of the time. + if otts.HasTValue() && !otts.TValueThreshold().ShouldSample(rnd) { err = ErrInconsistentArrivingTValue - otts.UnsetTValue() + otts.ClearTValue() } return ts.traceIDThreshold.ShouldSample(rnd), wts, err } -func (ts *traceEqualizer) updateTracestate(tid pcommon.TraceID, should bool, otts *sampling.OTelTraceState) { +func (ts *traceEqualizer) updateTracestate(tid pcommon.TraceID, should bool, otts *sampling.OTelTraceState) error { // When this sampler decided not to sample, the t-value becomes zero. // Incoming TValue consistency is not checked when this happens. if !should { - otts.SetTValue(sampling.NeverSampleThreshold, sampling.NeverSampleTValue) - return + return otts.UpdateTValueWithSampling(sampling.NeverSampleThreshold, sampling.NeverSampleTValue) } - if otts.HasNonZeroTValue() && - sampling.ThresholdLessThan(otts.TValueThreshold(), ts.traceIDThreshold) { - // Smaller thresholds are more selective, so when the existing - // threshold is less than the resampler, do nothing. - return - } - - // If the existing t-value represents zero, the resampler raises it - // but this is a very fishy configuration. - - otts.SetTValue(ts.traceIDThreshold, ts.tValueEncoding) - return + // Spans that appear consistently sampled but arrive w/ zero + // adjusted count remain zero. + return otts.UpdateTValueWithSampling(ts.traceIDThreshold, ts.tValueEncoding) } func (ts *traceProportionalizer) decide(s ptrace.Span) (bool, *sampling.W3CTraceState, error) { @@ -234,22 +228,22 @@ func (ts *traceProportionalizer) decide(s ptrace.Span) (bool, *sampling.W3CTrace // TODO: Configure fail-open vs fail-closed? return true, nil, err } - otts := wts.OTelValue() incoming := 1.0 - if otts.HasTValue() { - incoming = otts.TValueThreshold().Probability() + otts := wts.OTelValue() + if otts.HasZeroTValue() { + return true, wts, nil } - threshold, err := sampling.ProbabilityToThreshold(incoming * ts.ratio) - otts.SetTValue(threshold, threshold.TValue()) + incoming = otts.TValueThreshold().Probability() + threshold, _ := sampling.ProbabilityToThreshold(incoming * ts.ratio) + _ = otts.UpdateTValueWithSampling(threshold, threshold.TValue()) return threshold.ShouldSample(rnd), wts, err } -func (ts *traceProportionalizer) updateTracestate(tid pcommon.TraceID, should bool, otts *sampling.OTelTraceState) { +func (ts *traceProportionalizer) updateTracestate(tid pcommon.TraceID, should bool, otts *sampling.OTelTraceState) error { if !should { - otts.SetTValue(sampling.NeverSampleThreshold, sampling.NeverSampleTValue) - return + return otts.UpdateTValueWithSampling(sampling.NeverSampleThreshold, sampling.NeverSampleTValue) } - return + return nil } func (tp *traceProcessor) processTraces(ctx context.Context, td ptrace.Traces) (ptrace.Traces, error) { @@ -293,7 +287,10 @@ func (tp *traceProcessor) processTraces(ctx context.Context, td ptrace.Traces) ( } if sampled { - tp.sampler.updateTracestate(s.TraceID(), probSample, wts.OTelValue()) + err := tp.sampler.updateTracestate(s.TraceID(), probSample, wts.OTelValue()) + if err != nil { + tp.logger.Debug("tracestate update", zap.Error(err)) + } var w strings.Builder wts.Serialize(&w) From 36de5dd3d729f1427ecc9e749ad25cb38bda6ceb Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Fri, 6 Oct 2023 16:02:55 -0700 Subject: [PATCH 26/38] fix existing tests --- pkg/sampling/threshold.go | 2 +- .../probabilisticsamplerprocessor/factory.go | 2 +- .../probabilisticsamplerprocessor/sampler_mode.go | 3 +-- .../tracesprocessor.go | 15 ++++++++------- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/pkg/sampling/threshold.go b/pkg/sampling/threshold.go index f725847b355f..f12e41bbca75 100644 --- a/pkg/sampling/threshold.go +++ b/pkg/sampling/threshold.go @@ -94,7 +94,7 @@ func (th Threshold) TValue() string { // ShouldSample returns true when the span passes this sampler's // consistent sampling decision. func (t Threshold) ShouldSample(rnd Randomness) bool { - return t == NeverSampleThreshold || rnd.unsigned < t.unsigned + return rnd.unsigned < t.unsigned } // ThresholdLessThan allows direct comparison of Threshold values. diff --git a/processor/probabilisticsamplerprocessor/factory.go b/processor/probabilisticsamplerprocessor/factory.go index ae3a06e9fd07..089e457eed45 100644 --- a/processor/probabilisticsamplerprocessor/factory.go +++ b/processor/probabilisticsamplerprocessor/factory.go @@ -37,7 +37,7 @@ func NewFactory() processor.Factory { func createDefaultConfig() component.Config { return &Config{ AttributeSource: defaultAttributeSource, - SamplerMode: DefaultMode, + SamplerMode: modeUnset, } } diff --git a/processor/probabilisticsamplerprocessor/sampler_mode.go b/processor/probabilisticsamplerprocessor/sampler_mode.go index 454a1ad769ad..dc14d54f3d80 100644 --- a/processor/probabilisticsamplerprocessor/sampler_mode.go +++ b/processor/probabilisticsamplerprocessor/sampler_mode.go @@ -11,9 +11,8 @@ const ( HashSeed SamplerMode = "hash_seed" Equalizing SamplerMode = "equalizing" Proportional SamplerMode = "proportional" + DefaultMode SamplerMode = Proportional modeUnset SamplerMode = "" - - DefaultMode = Proportional ) func (sm *SamplerMode) UnmarshalText(in []byte) error { diff --git a/processor/probabilisticsamplerprocessor/tracesprocessor.go b/processor/probabilisticsamplerprocessor/tracesprocessor.go index 1225cacaa2c6..42796ebff699 100644 --- a/processor/probabilisticsamplerprocessor/tracesprocessor.go +++ b/processor/probabilisticsamplerprocessor/tracesprocessor.go @@ -230,13 +230,15 @@ func (ts *traceProportionalizer) decide(s ptrace.Span) (bool, *sampling.W3CTrace } incoming := 1.0 otts := wts.OTelValue() - if otts.HasZeroTValue() { - return true, wts, nil + if otts.HasTValue() { + incoming = otts.TValueThreshold().Probability() } - incoming = otts.TValueThreshold().Probability() threshold, _ := sampling.ProbabilityToThreshold(incoming * ts.ratio) - _ = otts.UpdateTValueWithSampling(threshold, threshold.TValue()) - return threshold.ShouldSample(rnd), wts, err + should := threshold.ShouldSample(rnd) + if should { + _ = otts.UpdateTValueWithSampling(threshold, threshold.TValue()) + } + return should, wts, err } func (ts *traceProportionalizer) updateTracestate(tid pcommon.TraceID, should bool, otts *sampling.OTelTraceState) error { @@ -268,8 +270,7 @@ func (tp *traceProcessor) processTraces(ctx context.Context, td ptrace.Traces) ( tp.logger.Error("trace-state", zap.Error(err)) } - forceSample := priority == mustSampleSpan - + forceSample := priority == mustSampleSpan || wts.OTelValue().HasZeroTValue() sampled := forceSample || probSample if forceSample { From f1aa0ad265cb9555f48dbe32cd6aa1bc2aed7026 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 11 Oct 2023 17:21:51 -0700 Subject: [PATCH 27/38] :wip: --- .../sampler_mode.go | 2 + .../tracesprocessor.go | 24 +-- .../tracesprocessor_test.go | 156 ++++++++++++++++-- 3 files changed, 155 insertions(+), 27 deletions(-) diff --git a/processor/probabilisticsamplerprocessor/sampler_mode.go b/processor/probabilisticsamplerprocessor/sampler_mode.go index dc14d54f3d80..a6108774d795 100644 --- a/processor/probabilisticsamplerprocessor/sampler_mode.go +++ b/processor/probabilisticsamplerprocessor/sampler_mode.go @@ -15,6 +15,8 @@ const ( modeUnset SamplerMode = "" ) +var AllModes = []SamplerMode{HashSeed, Equalizing, Proportional} + func (sm *SamplerMode) UnmarshalText(in []byte) error { switch mode := SamplerMode(in); mode { case HashSeed, diff --git a/processor/probabilisticsamplerprocessor/tracesprocessor.go b/processor/probabilisticsamplerprocessor/tracesprocessor.go index 42796ebff699..adf72ac709a2 100644 --- a/processor/probabilisticsamplerprocessor/tracesprocessor.go +++ b/processor/probabilisticsamplerprocessor/tracesprocessor.go @@ -57,7 +57,7 @@ type traceSampler interface { // sampled, probabilistically or otherwise. The "should" parameter // is the result from decide(), for the span's TraceID, which // will not be recalculated. - updateTracestate(tid pcommon.TraceID, should bool, otts *sampling.OTelTraceState) error + updateTracestate(tid pcommon.TraceID, should bool, wts *sampling.W3CTraceState) error } type traceProcessor struct { @@ -187,7 +187,7 @@ func (ts *traceHashSampler) decide(s ptrace.Span) (bool, *sampling.W3CTraceState return decision, nil, nil } -func (ts *traceHashSampler) updateTracestate(tid pcommon.TraceID, should bool, otts *sampling.OTelTraceState) error { +func (ts *traceHashSampler) updateTracestate(_ pcommon.TraceID, should bool, _ *sampling.W3CTraceState) error { // Note: Sampling SIG will not like this idea. What about using // r:00000000000000;t:{ProbabilityToThreshold(pct/100.0)}? return nil @@ -211,15 +211,15 @@ func (ts *traceEqualizer) decide(s ptrace.Span) (bool, *sampling.W3CTraceState, return ts.traceIDThreshold.ShouldSample(rnd), wts, err } -func (ts *traceEqualizer) updateTracestate(tid pcommon.TraceID, should bool, otts *sampling.OTelTraceState) error { +func (ts *traceEqualizer) updateTracestate(tid pcommon.TraceID, should bool, wts *sampling.W3CTraceState) error { // When this sampler decided not to sample, the t-value becomes zero. // Incoming TValue consistency is not checked when this happens. if !should { - return otts.UpdateTValueWithSampling(sampling.NeverSampleThreshold, sampling.NeverSampleTValue) + return wts.OTelValue().UpdateTValueWithSampling(sampling.NeverSampleThreshold, sampling.NeverSampleTValue) } // Spans that appear consistently sampled but arrive w/ zero // adjusted count remain zero. - return otts.UpdateTValueWithSampling(ts.traceIDThreshold, ts.tValueEncoding) + return wts.OTelValue().UpdateTValueWithSampling(ts.traceIDThreshold, ts.tValueEncoding) } func (ts *traceProportionalizer) decide(s ptrace.Span) (bool, *sampling.W3CTraceState, error) { @@ -241,9 +241,9 @@ func (ts *traceProportionalizer) decide(s ptrace.Span) (bool, *sampling.W3CTrace return should, wts, err } -func (ts *traceProportionalizer) updateTracestate(tid pcommon.TraceID, should bool, otts *sampling.OTelTraceState) error { +func (ts *traceProportionalizer) updateTracestate(tid pcommon.TraceID, should bool, wts *sampling.W3CTraceState) error { if !should { - return otts.UpdateTValueWithSampling(sampling.NeverSampleThreshold, sampling.NeverSampleTValue) + return wts.OTelValue().UpdateTValueWithSampling(sampling.NeverSampleThreshold, sampling.NeverSampleTValue) } return nil } @@ -270,7 +270,7 @@ func (tp *traceProcessor) processTraces(ctx context.Context, td ptrace.Traces) ( tp.logger.Error("trace-state", zap.Error(err)) } - forceSample := priority == mustSampleSpan || wts.OTelValue().HasZeroTValue() + forceSample := priority == mustSampleSpan || (wts != nil && wts.OTelValue().HasZeroTValue()) sampled := forceSample || probSample if forceSample { @@ -287,14 +287,16 @@ func (tp *traceProcessor) processTraces(ctx context.Context, td ptrace.Traces) ( ) } - if sampled { - err := tp.sampler.updateTracestate(s.TraceID(), probSample, wts.OTelValue()) + if sampled && wts != nil { + err := tp.sampler.updateTracestate(s.TraceID(), probSample, wts) if err != nil { tp.logger.Debug("tracestate update", zap.Error(err)) } var w strings.Builder - wts.Serialize(&w) + if err := wts.Serialize(&w); err != nil { + tp.logger.Debug("tracestate serialize", zap.Error(err)) + } s.TraceState().FromRaw(w.String()) } diff --git a/processor/probabilisticsamplerprocessor/tracesprocessor_test.go b/processor/probabilisticsamplerprocessor/tracesprocessor_test.go index 887cbf1798e4..6e00a2503d0c 100644 --- a/processor/probabilisticsamplerprocessor/tracesprocessor_test.go +++ b/processor/probabilisticsamplerprocessor/tracesprocessor_test.go @@ -289,24 +289,31 @@ func Test_tracesamplerprocessor_SpanSamplingPriority(t *testing.T) { sampled: true, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - sink := new(consumertest.TracesSink) - tsp, err := newTracesProcessor(context.Background(), processortest.NewNopCreateSettings(), tt.cfg, sink) - require.NoError(t, err) + for _, mode := range AllModes { + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + sink := new(consumertest.TracesSink) + cfg := &Config{} + if tt.cfg != nil { + *cfg = *tt.cfg + } + cfg.SamplerMode = mode + tsp, err := newTracesProcessor(context.Background(), processortest.NewNopCreateSettings(), cfg, sink) + require.NoError(t, err) - err = tsp.ConsumeTraces(context.Background(), tt.td) - require.NoError(t, err) + err = tsp.ConsumeTraces(context.Background(), tt.td) + require.NoError(t, err) - sampledData := sink.AllTraces() - if tt.sampled { - require.Equal(t, 1, len(sampledData)) - assert.Equal(t, 1, sink.SpanCount()) - } else { - require.Equal(t, 0, len(sampledData)) - assert.Equal(t, 0, sink.SpanCount()) - } - }) + sampledData := sink.AllTraces() + if tt.sampled { + require.Equal(t, 1, len(sampledData)) + assert.Equal(t, 1, sink.SpanCount()) + } else { + require.Equal(t, 0, len(sampledData)) + assert.Equal(t, 0, sink.SpanCount()) + } + }) + } } } @@ -402,6 +409,123 @@ func initSpanWithAttribute(key string, value pcommon.Value, dest ptrace.Span) { value.CopyTo(dest.Attributes().PutEmpty(key)) } +// func Test_tracesamplerprocessor_TraceState(t *testing.T) { +// singleSpanWithAttrib := func(key string, attribValue pcommon.Value) ptrace.Traces { +// traces := ptrace.NewTraces() +// initSpanWithAttribute(key, attribValue, traces.ResourceSpans().AppendEmpty().ScopeSpans().AppendEmpty().Spans().AppendEmpty()) +// return traces +// } +// tests := []struct { +// name string +// cfg *Config +// td ptrace.Traces +// sampled bool +// }{ +// { +// name: "must_sample", +// cfg: &Config{ +// SamplingPercentage: 0.0, +// }, +// td: singleSpanWithAttrib( +// "sampling.priority", +// pcommon.NewValueInt(2)), +// sampled: true, +// }, +// { +// name: "must_sample_double", +// cfg: &Config{ +// SamplingPercentage: 0.0, +// }, +// td: singleSpanWithAttrib( +// "sampling.priority", +// pcommon.NewValueDouble(1)), +// sampled: true, +// }, +// { +// name: "must_sample_string", +// cfg: &Config{ +// SamplingPercentage: 0.0, +// }, +// td: singleSpanWithAttrib( +// "sampling.priority", +// pcommon.NewValueStr("1")), +// sampled: true, +// }, +// { +// name: "must_not_sample", +// cfg: &Config{ +// SamplingPercentage: 100.0, +// }, +// td: singleSpanWithAttrib( +// "sampling.priority", +// pcommon.NewValueInt(0)), +// }, +// { +// name: "must_not_sample_double", +// cfg: &Config{ +// SamplingPercentage: 100.0, +// }, +// td: singleSpanWithAttrib( +// "sampling.priority", +// pcommon.NewValueDouble(0)), +// }, +// { +// name: "must_not_sample_string", +// cfg: &Config{ +// SamplingPercentage: 100.0, +// }, +// td: singleSpanWithAttrib( +// "sampling.priority", +// pcommon.NewValueStr("0")), +// }, +// { +// name: "defer_sample_expect_not_sampled", +// cfg: &Config{ +// SamplingPercentage: 0.0, +// }, +// td: singleSpanWithAttrib( +// "no.sampling.priority", +// pcommon.NewValueInt(2)), +// }, +// { +// name: "defer_sample_expect_sampled", +// cfg: &Config{ +// SamplingPercentage: 100.0, +// }, +// td: singleSpanWithAttrib( +// "no.sampling.priority", +// pcommon.NewValueInt(2)), +// sampled: true, +// }, +// } +// for _, mode := range AllModes { +// for _, tt := range tests { +// t.Run(tt.name, func(t *testing.T) { +// sink := new(consumertest.TracesSink) +// cfg := &Config{} +// if tt.cfg != nil { +// *cfg = *tt.cfg +// } +// cfg.SamplerMode = mode +// tsp, err := newTracesProcessor(context.Background(), processortest.NewNopCreateSettings(), cfg, sink) +// require.NoError(t, err) + +// err = tsp.ConsumeTraces(context.Background(), tt.td) +// require.NoError(t, err) + +// sampledData := sink.AllTraces() +// if tt.sampled { +// require.Equal(t, 1, len(sampledData)) +// assert.Equal(t, 1, sink.SpanCount()) +// } else { +// require.Equal(t, 0, len(sampledData)) +// assert.Equal(t, 0, sink.SpanCount()) +// } +// }) +// } +// } +// } + // genRandomTestData generates a slice of ptrace.Traces with the numBatches elements which one with // numTracesPerBatch spans (ie.: each span has a different trace ID). All spans belong to the specified // serviceName. From 700734ef99f88caf0d005aa03cf9f37336e2d282 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 15 Nov 2023 15:29:28 -0800 Subject: [PATCH 28/38] Update for rejection threshold --- pkg/sampling/encoding_test.go | 70 +++--- pkg/sampling/oteltracestate.go | 9 +- pkg/sampling/oteltracestate_test.go | 43 ++-- pkg/sampling/probability.go | 20 +- pkg/sampling/threshold.go | 39 +-- .../tracesprocessor_test.go | 237 +++++++++--------- 6 files changed, 211 insertions(+), 207 deletions(-) diff --git a/pkg/sampling/encoding_test.go b/pkg/sampling/encoding_test.go index 09bb6f93b640..6975cabb045b 100644 --- a/pkg/sampling/encoding_test.go +++ b/pkg/sampling/encoding_test.go @@ -40,34 +40,34 @@ func tValueToProbability(tv string) (float64, error) { } func TestValidProbabilityToTValue(t *testing.T) { - require.Equal(t, "", must(probabilityToTValue(1.0))) + require.Equal(t, "0", must(probabilityToTValue(1.0))) require.Equal(t, "8", must(probabilityToTValue(0.5))) - require.Equal(t, "00000000000001", must(probabilityToTValue(0x1p-56))) - require.Equal(t, "55555555555554", must(probabilityToTValue(1/3.))) - require.Equal(t, "54", must(probabilityToTValue(0x54p-8))) // 0x54p-8 is approximately 1/3 - require.Equal(t, "01", must(probabilityToTValue(0x1p-8))) - require.Equal(t, "0", must(probabilityToTValue(0))) + require.Equal(t, "ffffffffffffff", must(probabilityToTValue(0x1p-56))) + require.Equal(t, "aaaaaaaaaaaaac", must(probabilityToTValue(1/3.))) + require.Equal(t, "55555555555558", must(probabilityToTValue(2/3.))) + require.Equal(t, "54", must(probabilityToTValue(1-0x54p-8))) // 0x54p-8 is approximately 1/3 + require.Equal(t, "01", must(probabilityToTValue(1-0x1p-8))) } -func TestThresholdLessThan(t *testing.T) { - require.True(t, ThresholdLessThan( - must(TValueToThreshold("4")), +func TestThresholdGreater(t *testing.T) { + require.True(t, ThresholdGreater( must(TValueToThreshold("5")), + must(TValueToThreshold("4")), )) - require.True(t, ThresholdLessThan( - must(TValueToThreshold("04")), + require.True(t, ThresholdGreater( must(TValueToThreshold("4")), + must(TValueToThreshold("04")), )) - require.False(t, ThresholdLessThan( - must(TValueToThreshold("4")), + require.False(t, ThresholdGreater( must(TValueToThreshold("234")), + must(TValueToThreshold("4")), )) - require.True(t, ThresholdLessThan( - must(TValueToThreshold("234")), + require.True(t, ThresholdGreater( must(TValueToThreshold("4")), + must(TValueToThreshold("234")), )) } @@ -83,11 +83,11 @@ func TestInvalidprobabilityToTValue(t *testing.T) { func TestTValueToProbability(t *testing.T) { require.Equal(t, 0.5, must(tValueToProbability("8"))) - require.Equal(t, 0x444p-12, must(tValueToProbability("444"))) - require.Equal(t, 0.0, must(tValueToProbability("0"))) + require.Equal(t, 1-0x444p-12, must(tValueToProbability("444"))) + require.Equal(t, 1.0, must(tValueToProbability("0"))) // 0x55555554p-32 is very close to 1/3 - require.InEpsilon(t, 1/3., must(tValueToProbability("55555554")), 1e-9) + require.InEpsilon(t, 1-1/3., must(tValueToProbability("55555554")), 1e-9) } func TestProbabilityToThreshold(t *testing.T) { @@ -95,48 +95,50 @@ func TestProbabilityToThreshold(t *testing.T) { must(TValueToThreshold("8")), must(ProbabilityToThreshold(0.5))) require.Equal(t, - must(TValueToThreshold("00000000000001")), + must(TValueToThreshold("ffffffffffffff")), must(ProbabilityToThreshold(0x1p-56))) require.Equal(t, - must(TValueToThreshold("000000000001")), + must(TValueToThreshold("ffffffffffff00")), must(ProbabilityToThreshold(0x100p-56))) require.Equal(t, - must(TValueToThreshold("00000000000002")), - must(ProbabilityToThreshold(0x1p-55))) + must(TValueToThreshold("00000000000010")), + must(ProbabilityToThreshold(1.0-0x1p-52))) require.Equal(t, AlwaysSampleThreshold, must(ProbabilityToThreshold(1.0))) - require.Equal(t, - NeverSampleThreshold, - must(ProbabilityToThreshold(0))) + + zt, err := ProbabilityToThreshold(0) + require.Equal(t, zt, AlwaysSampleThreshold) + require.Error(t, err) + require.Equal(t, err, ErrProbabilityRange) } func TestShouldSample(t *testing.T) { // Test four boundary conditions for 50% sampling, thresh := must(ProbabilityToThreshold(0.5)) - // Smallest TraceID that should sample. - require.True(t, thresh.ShouldSample(TraceIDToRandomness(pcommon.TraceID{ + // Smallest TraceID that should NOT sample. + require.False(t, thresh.ShouldSample(TraceIDToRandomness(pcommon.TraceID{ // 9 meaningless bytes 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0, // randomness starts here 0, 0, 0, 0, 0, 0, }))) - // Largest TraceID that should sample. - require.True(t, thresh.ShouldSample(TraceIDToRandomness(pcommon.TraceID{ + // Largest TraceID that should NOT sample. + require.False(t, thresh.ShouldSample(TraceIDToRandomness(pcommon.TraceID{ // 9 meaningless bytes 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0x7f, // randomness starts here 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, }))) - // Smallest TraceID that should NOT sample. - require.False(t, thresh.ShouldSample(TraceIDToRandomness(pcommon.TraceID{ + // Smallest TraceID that should sample. + require.True(t, thresh.ShouldSample(TraceIDToRandomness(pcommon.TraceID{ // 9 meaningless bytes 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0x80, // randomness starts here 0, 0, 0, 0, 0, 0, }))) - // Largest TraceID that should NOT sample. - require.False(t, thresh.ShouldSample(TraceIDToRandomness(pcommon.TraceID{ + // Largest TraceID that should sample. + require.True(t, thresh.ShouldSample(TraceIDToRandomness(pcommon.TraceID{ // 9 meaningless bytes 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xff, // randomness starts here @@ -207,10 +209,10 @@ func TestTValueSyntax(t *testing.T) { } for _, test := range []testCase{ // correct cases - {"", nil}, {"1", nil}, // syntax error + {"", ErrTValueEmpty}, {"g", strconv.ErrSyntax}, } { t.Run(testName(test.in), func(t *testing.T) { diff --git a/pkg/sampling/oteltracestate.go b/pkg/sampling/oteltracestate.go index a0d0732ef361..82354d9a8ef8 100644 --- a/pkg/sampling/oteltracestate.go +++ b/pkg/sampling/oteltracestate.go @@ -124,10 +124,6 @@ func (otts *OTelTraceState) HasTValue() bool { return otts.tvalue != "" } -func (otts *OTelTraceState) HasZeroTValue() bool { - return otts.HasTValue() && otts.TValueThreshold() == NeverSampleThreshold -} - func (otts *OTelTraceState) TValue() string { return otts.tvalue } @@ -137,7 +133,7 @@ func (otts *OTelTraceState) TValueThreshold() Threshold { } func (otts *OTelTraceState) UpdateTValueWithSampling(sampledThreshold Threshold, encodedTValue string) error { - if otts.HasTValue() && ThresholdLessThan(otts.threshold, sampledThreshold) { + if otts.HasTValue() && ThresholdGreater(otts.threshold, sampledThreshold) { return ErrInconsistentSampling } otts.threshold = sampledThreshold @@ -147,9 +143,6 @@ func (otts *OTelTraceState) UpdateTValueWithSampling(sampledThreshold Threshold, func (otts *OTelTraceState) AdjustedCount() float64 { if !otts.HasTValue() { - return 1 - } - if otts.TValueThreshold() == NeverSampleThreshold { return 0 } return 1.0 / otts.threshold.Probability() diff --git a/pkg/sampling/oteltracestate_test.go b/pkg/sampling/oteltracestate_test.go index cb351ffaad4a..474cff88077d 100644 --- a/pkg/sampling/oteltracestate_test.go +++ b/pkg/sampling/oteltracestate_test.go @@ -31,7 +31,7 @@ func TestOTelTraceStateTValueSerialize(t *testing.T) { require.NoError(t, err) require.True(t, otts.HasTValue()) require.Equal(t, "3", otts.TValue()) - require.Equal(t, 0x3p-4, otts.TValueThreshold().Probability()) + require.Equal(t, 1-0x3p-4, otts.TValueThreshold().Probability()) require.True(t, otts.HasRValue()) require.Equal(t, "10000000000000", otts.RValue()) @@ -43,15 +43,14 @@ func TestOTelTraceStateTValueSerialize(t *testing.T) { require.Equal(t, orig, w.String()) } -func TestOTelTraceStateZeroAdjustedCount(t *testing.T) { +func TestOTelTraceStateZero(t *testing.T) { const orig = "t:0" otts, err := NewOTelTraceState(orig) require.NoError(t, err) require.True(t, otts.HasAnyValue()) require.True(t, otts.HasTValue()) - require.True(t, otts.HasZeroTValue()) require.Equal(t, "0", otts.TValue()) - require.Equal(t, 0.0, otts.TValueThreshold().Probability()) + require.Equal(t, 1.0, otts.TValueThreshold().Probability()) var w strings.Builder otts.Serialize(&w) @@ -88,7 +87,7 @@ func TestOTelTraceStateTValueUpdate(t *testing.T) { require.NoError(t, otts.UpdateTValueWithSampling(th, "3")) require.Equal(t, "3", otts.TValue()) - require.Equal(t, 0x3p-4, otts.TValueThreshold().Probability()) + require.Equal(t, 1-0x3p-4, otts.TValueThreshold().Probability()) const updated = "r:abcdefabcdefab;t:3" var w strings.Builder @@ -277,29 +276,35 @@ func TestUpdateTValueWithSampling(t *testing.T) { adjCountOut float64 } for _, test := range []testCase{ - // 8/16 in, 2/16 out - {"t:8", 2, 0x2p-4, nil, "t:2", 8}, + // 8/16 in, sampled at (0x10-0xe)/0x10 = 2/16 => adjCount 8 + {"t:8", 2, 0x2p-4, nil, "t:e", 8}, - // 1/16 in, 50% update (error) - {"t:1", 16, 0x8p-4, ErrInconsistentSampling, "t:1", 16}, - - // no sampling in, 1/16 update - {"", 1, 0x1p-4, nil, "t:1", 16}, + // 8/16 in, sampled at 14/16 => no update, adjCount 2 + {"t:8", 2, 0xep-4, nil, "t:8", 2}, - // zero adj count in, 1/16 update (error) - {"t:0", 0, 0x1p-4, ErrInconsistentSampling, "t:0", 0}, + // 1/16 in, 50% update (error) + {"t:f", 16, 0x8p-4, ErrInconsistentSampling, "t:f", 16}, - // none in, 0% update - {"t:0", 0, 0, nil, "t:0", 0}, + // 1/1 sampling in, 1/16 update + {"t:0", 1, 0x1p-4, nil, "t:f", 16}, - // 8/16 in, zero update - {"t:8", 2, 0, nil, "t:0", 0}, + // no t-value in, 1/16 update + {"", 0, 0x1p-4, nil, "t:f", 16}, // none in, 100% update - {"", 1, 1, nil, "", 1}, + {"", 0, 1, nil, "t:0", 1}, // 1/2 in, 100% update (error) {"t:8", 2, 1, ErrInconsistentSampling, "t:8", 2}, + + // 1/1 in, 0x1p-56 update + {"t:0", 1, 0x1p-56, nil, "t:ffffffffffffff", 0x1p56}, + + // 1/1 in, 0x1p-56 update + {"t:0", 1, 0x1p-56, nil, "t:ffffffffffffff", 0x1p56}, + + // 2/3 in, 1/3 update. Note that 0x555 + 0xaab = 0x1000. + {"t:555", 1 / (1 - 0x555p-12), 0x555p-12, nil, "t:aab", 1 / (1 - 0xaabp-12)}, } { t.Run(test.in+"/"+test.out, func(t *testing.T) { otts := OTelTraceState{} diff --git a/pkg/sampling/probability.go b/pkg/sampling/probability.go index 118d8121191d..265079bca9bd 100644 --- a/pkg/sampling/probability.go +++ b/pkg/sampling/probability.go @@ -11,28 +11,28 @@ import ( // ErrProbabilityRange is returned when a value should be in the range [1/MaxAdjustedCount, 1]. var ErrProbabilityRange = errors.New("sampling probability out of range (0x1p-56 <= valid <= 1)") +// MinSamplingProbability is the smallest representable probability +// and is the inverse of MaxAdjustedCount. +const MinSamplingProbability = 1.0 / MaxAdjustedCount + // probabilityInRange tests MinSamplingProb <= prob <= 1. func probabilityInRange(prob float64) bool { - return prob >= 1/MaxAdjustedCount && prob <= 1 + return prob >= MinSamplingProbability && prob <= 1 } func ProbabilityToThreshold(prob float64) (Threshold, error) { // Probability cases - switch { - case prob == 1: - return AlwaysSampleThreshold, nil - case prob == 0: - return NeverSampleThreshold, nil - case !probabilityInRange(prob): + if !probabilityInRange(prob) { return AlwaysSampleThreshold, ErrProbabilityRange } - unsigned := uint64(math.Round(prob * MaxAdjustedCount)) + scaled := uint64(math.Round(prob * MaxAdjustedCount)) + return Threshold{ - unsigned: unsigned, + unsigned: MaxAdjustedCount - scaled, }, nil } // Probability is the sampling ratio in the range [MinSamplingProb, 1]. func (t Threshold) Probability() float64 { - return float64(t.unsigned) / MaxAdjustedCount + return float64(MaxAdjustedCount-t.unsigned) / MaxAdjustedCount } diff --git a/pkg/sampling/threshold.go b/pkg/sampling/threshold.go index f12e41bbca75..81ea0b6d4abb 100644 --- a/pkg/sampling/threshold.go +++ b/pkg/sampling/threshold.go @@ -25,9 +25,9 @@ const ( // Threshold used to compare with the least-significant 7 bytes of the TraceID. type Threshold struct { // unsigned is in the range [0, MaxAdjustedCount] - // - 0 represents never sampling (0 TraceID values are less-than) - // - 1 represents 1-in-MaxAdjustedCount (1 TraceID value is less-than) - // - MaxAdjustedCount represents always sampling (all TraceID values are less-than). + // - 0 represents always sampling (0 Random values are less-than) + // - 1 represents sampling 1-in-(MaxAdjustedCount-1) + // - MaxAdjustedCount represents always sampling 1-in- unsigned uint64 } @@ -35,8 +35,11 @@ var ( // ErrTValueSize is returned for t-values longer than NumHexDigits hex digits. ErrTValueSize = errors.New("t-value exceeds 14 hex digits") - NeverSampleThreshold = Threshold{unsigned: 0} - AlwaysSampleThreshold = Threshold{unsigned: MaxAdjustedCount} + // ErrEmptyTValue indicates no t-value was found, i.e., no threshold available. + ErrTValueEmpty = errors.New("t-value is empty") + + // AlwaysSampleThreshold represents 100% sampling. + AlwaysSampleThreshold = Threshold{unsigned: 0} ) // TValueToThreshold returns a Threshold. Because TValue strings @@ -46,7 +49,7 @@ func TValueToThreshold(s string) (Threshold, error) { return AlwaysSampleThreshold, ErrTValueSize } if len(s) == 0 { - return AlwaysSampleThreshold, nil + return AlwaysSampleThreshold, ErrTValueEmpty } // Having checked length above, there are no range errors @@ -69,19 +72,11 @@ func TValueToThreshold(s string) (Threshold, error) { // up to 14 characters. The empty string is returned for 100% // sampling. func (th Threshold) TValue() string { - // Special cases - switch th.unsigned { - case MaxAdjustedCount: - // 100% sampling. Samplers are specified not to - // include a TValue in this case. - return "" - case 0: - // 0% sampling. This is a special case, otherwise, - // the TrimRight below will return an empty string - // matching the case above. + // Always-sample is a special case because TrimRight() below + // will trim it to the empty string, which represents no t-value. + if th == AlwaysSampleThreshold { return "0" } - // For thresholds other than the extremes, format a full-width // (14 digit) unsigned value with leading zeros, then, remove // the trailing zeros. Use the logic for (Randomness).RValue(). @@ -94,11 +89,17 @@ func (th Threshold) TValue() string { // ShouldSample returns true when the span passes this sampler's // consistent sampling decision. func (t Threshold) ShouldSample(rnd Randomness) bool { - return rnd.unsigned < t.unsigned + return rnd.unsigned >= t.unsigned +} + +// ThresholdGreater allows direct comparison of Threshold values. +// Greater thresholds equate with smaller sampling probabilities. +func ThresholdGreater(a, b Threshold) bool { + return a.unsigned > b.unsigned } // ThresholdLessThan allows direct comparison of Threshold values. -// Smaller thresholds equate with smaller probabilities. +// Smaller thresholds equate with greater sampling probabilities. func ThresholdLessThan(a, b Threshold) bool { return a.unsigned < b.unsigned } diff --git a/processor/probabilisticsamplerprocessor/tracesprocessor_test.go b/processor/probabilisticsamplerprocessor/tracesprocessor_test.go index 6e00a2503d0c..a3150e84cae1 100644 --- a/processor/probabilisticsamplerprocessor/tracesprocessor_test.go +++ b/processor/probabilisticsamplerprocessor/tracesprocessor_test.go @@ -398,6 +398,126 @@ func Test_parseSpanSamplingPriority(t *testing.T) { } } +// Test_tracesamplerprocessor_TraceState checks if handling of the context +// tracestate is correct. +func Test_tracesamplerprocessor_TraceState(t *testing.T) { + singleSpanWithAttrib := func(key string, attribValue pcommon.Value) ptrace.Traces { + traces := ptrace.NewTraces() + span := traces.ResourceSpans().AppendEmpty().ScopeSpans().AppendEmpty().Spans().AppendEmpty() + span.TraceState().FromRaw("") + return traces + } + tests := []struct { + name string + cfg *Config + td ptrace.Traces + sampled bool + }{ + { + name: "must_sample", + cfg: &Config{ + SamplingPercentage: 0.0, + }, + td: singleSpanWithAttrib( + "sampling.priority", + pcommon.NewValueInt(2)), + sampled: true, + }, + { + name: "must_sample_double", + cfg: &Config{ + SamplingPercentage: 0.0, + }, + td: singleSpanWithAttrib( + "sampling.priority", + pcommon.NewValueDouble(1)), + sampled: true, + }, + { + name: "must_sample_string", + cfg: &Config{ + SamplingPercentage: 0.0, + }, + td: singleSpanWithAttrib( + "sampling.priority", + pcommon.NewValueStr("1")), + sampled: true, + }, + { + name: "must_not_sample", + cfg: &Config{ + SamplingPercentage: 100.0, + }, + td: singleSpanWithAttrib( + "sampling.priority", + pcommon.NewValueInt(0)), + }, + { + name: "must_not_sample_double", + cfg: &Config{ + SamplingPercentage: 100.0, + }, + td: singleSpanWithAttrib( + "sampling.priority", + pcommon.NewValueDouble(0)), + }, + { + name: "must_not_sample_string", + cfg: &Config{ + SamplingPercentage: 100.0, + }, + td: singleSpanWithAttrib( + "sampling.priority", + pcommon.NewValueStr("0")), + }, + { + name: "defer_sample_expect_not_sampled", + cfg: &Config{ + SamplingPercentage: 0.0, + }, + td: singleSpanWithAttrib( + "no.sampling.priority", + pcommon.NewValueInt(2)), + }, + { + name: "defer_sample_expect_sampled", + cfg: &Config{ + SamplingPercentage: 100.0, + }, + td: singleSpanWithAttrib( + "no.sampling.priority", + pcommon.NewValueInt(2)), + sampled: true, + }, + } + for _, tt := range tests { + for _, mode := range AllModes { + t.Run(tt.name, func(t *testing.T) { + sink := new(consumertest.TracesSink) + cfg := &Config{} + if tt.cfg != nil { + *cfg = *tt.cfg + } + cfg.SamplerMode = mode + tsp, err := newTracesProcessor(context.Background(), processortest.NewNopCreateSettings(), cfg, sink) + require.NoError(t, err) + + err = tsp.ConsumeTraces(context.Background(), tt.td) + require.NoError(t, err) + + sampledData := sink.AllTraces() + if tt.sampled { + require.Equal(t, 1, len(sampledData)) + assert.Equal(t, 1, sink.SpanCount()) + } else { + require.Equal(t, 0, len(sampledData)) + assert.Equal(t, 0, sink.SpanCount()) + } + }) + } + } +} + func getSpanWithAttributes(key string, value pcommon.Value) ptrace.Span { span := ptrace.NewSpan() initSpanWithAttribute(key, value, span) @@ -409,123 +529,6 @@ func initSpanWithAttribute(key string, value pcommon.Value, dest ptrace.Span) { value.CopyTo(dest.Attributes().PutEmpty(key)) } -// func Test_tracesamplerprocessor_TraceState(t *testing.T) { -// singleSpanWithAttrib := func(key string, attribValue pcommon.Value) ptrace.Traces { -// traces := ptrace.NewTraces() -// initSpanWithAttribute(key, attribValue, traces.ResourceSpans().AppendEmpty().ScopeSpans().AppendEmpty().Spans().AppendEmpty()) -// return traces -// } -// tests := []struct { -// name string -// cfg *Config -// td ptrace.Traces -// sampled bool -// }{ -// { -// name: "must_sample", -// cfg: &Config{ -// SamplingPercentage: 0.0, -// }, -// td: singleSpanWithAttrib( -// "sampling.priority", -// pcommon.NewValueInt(2)), -// sampled: true, -// }, -// { -// name: "must_sample_double", -// cfg: &Config{ -// SamplingPercentage: 0.0, -// }, -// td: singleSpanWithAttrib( -// "sampling.priority", -// pcommon.NewValueDouble(1)), -// sampled: true, -// }, -// { -// name: "must_sample_string", -// cfg: &Config{ -// SamplingPercentage: 0.0, -// }, -// td: singleSpanWithAttrib( -// "sampling.priority", -// pcommon.NewValueStr("1")), -// sampled: true, -// }, -// { -// name: "must_not_sample", -// cfg: &Config{ -// SamplingPercentage: 100.0, -// }, -// td: singleSpanWithAttrib( -// "sampling.priority", -// pcommon.NewValueInt(0)), -// }, -// { -// name: "must_not_sample_double", -// cfg: &Config{ -// SamplingPercentage: 100.0, -// }, -// td: singleSpanWithAttrib( -// "sampling.priority", -// pcommon.NewValueDouble(0)), -// }, -// { -// name: "must_not_sample_string", -// cfg: &Config{ -// SamplingPercentage: 100.0, -// }, -// td: singleSpanWithAttrib( -// "sampling.priority", -// pcommon.NewValueStr("0")), -// }, -// { -// name: "defer_sample_expect_not_sampled", -// cfg: &Config{ -// SamplingPercentage: 0.0, -// }, -// td: singleSpanWithAttrib( -// "no.sampling.priority", -// pcommon.NewValueInt(2)), -// }, -// { -// name: "defer_sample_expect_sampled", -// cfg: &Config{ -// SamplingPercentage: 100.0, -// }, -// td: singleSpanWithAttrib( -// "no.sampling.priority", -// pcommon.NewValueInt(2)), -// sampled: true, -// }, -// } -// for _, mode := range AllModes { -// for _, tt := range tests { -// t.Run(tt.name, func(t *testing.T) { -// sink := new(consumertest.TracesSink) -// cfg := &Config{} -// if tt.cfg != nil { -// *cfg = *tt.cfg -// } -// cfg.SamplerMode = mode -// tsp, err := newTracesProcessor(context.Background(), processortest.NewNopCreateSettings(), cfg, sink) -// require.NoError(t, err) - -// err = tsp.ConsumeTraces(context.Background(), tt.td) -// require.NoError(t, err) - -// sampledData := sink.AllTraces() -// if tt.sampled { -// require.Equal(t, 1, len(sampledData)) -// assert.Equal(t, 1, sink.SpanCount()) -// } else { -// require.Equal(t, 0, len(sampledData)) -// assert.Equal(t, 0, sink.SpanCount()) -// } -// }) -// } -// } -// } - // genRandomTestData generates a slice of ptrace.Traces with the numBatches elements which one with // numTracesPerBatch spans (ie.: each span has a different trace ID). All spans belong to the specified // serviceName. From a94b8e7f4bf9d57d1af06f69ce6d97b60fb82534 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 15 Nov 2023 16:14:09 -0800 Subject: [PATCH 29/38] fix preexisting tests --- .../tracesprocessor.go | 82 +++++++++++-------- 1 file changed, 49 insertions(+), 33 deletions(-) diff --git a/processor/probabilisticsamplerprocessor/tracesprocessor.go b/processor/probabilisticsamplerprocessor/tracesprocessor.go index adf72ac709a2..0880da4f657a 100644 --- a/processor/probabilisticsamplerprocessor/tracesprocessor.go +++ b/processor/probabilisticsamplerprocessor/tracesprocessor.go @@ -65,14 +65,14 @@ type traceProcessor struct { logger *zap.Logger } -type traceHashSampler struct { +type traceHasher struct { // Hash-based calculation hashScaledSamplerate uint32 hashSeed uint32 probability float64 - svalueEncoding string } +// traceEqualizer adjusts thresholds absolutely. Cannot be used with zero. type traceEqualizer struct { // TraceID-randomness-based calculation traceIDThreshold sampling.Threshold @@ -81,10 +81,15 @@ type traceEqualizer struct { tValueEncoding string } +// traceEqualizer adjusts thresholds relatively. Cannot be used with zero. type traceProportionalizer struct { ratio float64 } +// zeroProbability is a bypass for all cases with Percent==0. +type zeroProbability struct { +} + func randomnessFromSpan(s ptrace.Span) (sampling.Randomness, *sampling.W3CTraceState, error) { state := s.TraceState() raw := state.AsRaw() @@ -141,31 +146,34 @@ func newTracesProcessor(ctx context.Context, set processor.CreateSettings, cfg * } } - ratio := pct / 100 - switch cfg.SamplerMode { - case HashSeed: - ts := &traceHashSampler{} - - // Adjust sampling percentage on private so recalculations are avoided. - ts.hashScaledSamplerate = uint32(pct * percentageScaleFactor) - ts.hashSeed = cfg.HashSeed - ts.probability = ratio - ts.svalueEncoding = strconv.FormatFloat(ratio, 'g', 4, 64) - - tp.sampler = ts - case Equalizing: - threshold, err := sampling.ProbabilityToThreshold(ratio) - if err != nil { - return nil, err - } + if pct == 0 { + tp.sampler = &zeroProbability{} + } else { + ratio := pct / 100 + switch cfg.SamplerMode { + case HashSeed: + ts := &traceHasher{} + + // Adjust sampling percentage on private so recalculations are avoided. + ts.hashScaledSamplerate = uint32(pct * percentageScaleFactor) + ts.hashSeed = cfg.HashSeed + ts.probability = ratio + + tp.sampler = ts + case Equalizing: + threshold, err := sampling.ProbabilityToThreshold(ratio) + if err != nil { + return nil, err + } - tp.sampler = &traceEqualizer{ - tValueEncoding: threshold.TValue(), - traceIDThreshold: threshold, - } - case Proportional: - tp.sampler = &traceProportionalizer{ - ratio: ratio, + tp.sampler = &traceEqualizer{ + tValueEncoding: threshold.TValue(), + traceIDThreshold: threshold, + } + case Proportional: + tp.sampler = &traceProportionalizer{ + ratio: ratio, + } } } @@ -178,7 +186,7 @@ func newTracesProcessor(ctx context.Context, set processor.CreateSettings, cfg * processorhelper.WithCapabilities(consumer.Capabilities{MutatesData: true})) } -func (ts *traceHashSampler) decide(s ptrace.Span) (bool, *sampling.W3CTraceState, error) { +func (ts *traceHasher) decide(s ptrace.Span) (bool, *sampling.W3CTraceState, error) { // If one assumes random trace ids hashing may seems avoidable, however, traces can be coming from sources // with various different criteria to generate trace id and perhaps were already sampled without hashing. // Hashing here prevents bias due to such systems. @@ -187,9 +195,8 @@ func (ts *traceHashSampler) decide(s ptrace.Span) (bool, *sampling.W3CTraceState return decision, nil, nil } -func (ts *traceHashSampler) updateTracestate(_ pcommon.TraceID, should bool, _ *sampling.W3CTraceState) error { - // Note: Sampling SIG will not like this idea. What about using - // r:00000000000000;t:{ProbabilityToThreshold(pct/100.0)}? +func (ts *traceHasher) updateTracestate(_ pcommon.TraceID, _ bool, _ *sampling.W3CTraceState) error { + // No changes; any t-value will pass through. return nil } @@ -215,7 +222,8 @@ func (ts *traceEqualizer) updateTracestate(tid pcommon.TraceID, should bool, wts // When this sampler decided not to sample, the t-value becomes zero. // Incoming TValue consistency is not checked when this happens. if !should { - return wts.OTelValue().UpdateTValueWithSampling(sampling.NeverSampleThreshold, sampling.NeverSampleTValue) + wts.OTelValue().ClearTValue() + return nil } // Spans that appear consistently sampled but arrive w/ zero // adjusted count remain zero. @@ -243,11 +251,19 @@ func (ts *traceProportionalizer) decide(s ptrace.Span) (bool, *sampling.W3CTrace func (ts *traceProportionalizer) updateTracestate(tid pcommon.TraceID, should bool, wts *sampling.W3CTraceState) error { if !should { - return wts.OTelValue().UpdateTValueWithSampling(sampling.NeverSampleThreshold, sampling.NeverSampleTValue) + wts.OTelValue().ClearTValue() } return nil } +func (*zeroProbability) decide(s ptrace.Span) (bool, *sampling.W3CTraceState, error) { + return false, nil, nil +} + +func (*zeroProbability) updateTracestate(_ pcommon.TraceID, _ bool, _ *sampling.W3CTraceState) error { + return nil +} + func (tp *traceProcessor) processTraces(ctx context.Context, td ptrace.Traces) (ptrace.Traces, error) { td.ResourceSpans().RemoveIf(func(rs ptrace.ResourceSpans) bool { rs.ScopeSpans().RemoveIf(func(ils ptrace.ScopeSpans) bool { @@ -270,7 +286,7 @@ func (tp *traceProcessor) processTraces(ctx context.Context, td ptrace.Traces) ( tp.logger.Error("trace-state", zap.Error(err)) } - forceSample := priority == mustSampleSpan || (wts != nil && wts.OTelValue().HasZeroTValue()) + forceSample := priority == mustSampleSpan sampled := forceSample || probSample if forceSample { From 4edcbcbe884ebb3226e1854ab461b341e8b2de18 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 15 Nov 2023 16:59:16 -0800 Subject: [PATCH 30/38] basic yes/no t-value sampling test --- .../tracesprocessor_test.go | 95 +++++-------------- 1 file changed, 24 insertions(+), 71 deletions(-) diff --git a/processor/probabilisticsamplerprocessor/tracesprocessor_test.go b/processor/probabilisticsamplerprocessor/tracesprocessor_test.go index a3150e84cae1..705809dc7895 100644 --- a/processor/probabilisticsamplerprocessor/tracesprocessor_test.go +++ b/processor/probabilisticsamplerprocessor/tracesprocessor_test.go @@ -5,6 +5,7 @@ package probabilisticsamplerprocessor import ( "context" + "fmt" "math" "math/rand" "testing" @@ -401,98 +402,49 @@ func Test_parseSpanSamplingPriority(t *testing.T) { // Test_tracesamplerprocessor_TraceState checks if handling of the context // tracestate is correct. func Test_tracesamplerprocessor_TraceState(t *testing.T) { + sid := idutils.UInt64ToSpanID(0xfefefefe) singleSpanWithAttrib := func(key string, attribValue pcommon.Value) ptrace.Traces { traces := ptrace.NewTraces() span := traces.ResourceSpans().AppendEmpty().ScopeSpans().AppendEmpty().Spans().AppendEmpty() span.TraceState().FromRaw("") + span.SetTraceID(pcommon.TraceID{ + // Don't care (9 bytes) + 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, + // Trace randomness (7 bytes) + 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }) + span.SetSpanID(sid) return traces } tests := []struct { name string cfg *Config - td ptrace.Traces + key string + value pcommon.Value sampled bool }{ { - name: "must_sample", - cfg: &Config{ - SamplingPercentage: 0.0, - }, - td: singleSpanWithAttrib( - "sampling.priority", - pcommon.NewValueInt(2)), - sampled: true, - }, - { - name: "must_sample_double", - cfg: &Config{ - SamplingPercentage: 0.0, - }, - td: singleSpanWithAttrib( - "sampling.priority", - pcommon.NewValueDouble(1)), - sampled: true, - }, - { - name: "must_sample_string", + name: "yes_sample", cfg: &Config{ - SamplingPercentage: 0.0, + SamplingPercentage: 50, }, - td: singleSpanWithAttrib( - "sampling.priority", - pcommon.NewValueStr("1")), + key: "n/a", + value: pcommon.NewValueInt(2), sampled: true, }, { - name: "must_not_sample", + name: "no_sample", cfg: &Config{ - SamplingPercentage: 100.0, + SamplingPercentage: 49, }, - td: singleSpanWithAttrib( - "sampling.priority", - pcommon.NewValueInt(0)), - }, - { - name: "must_not_sample_double", - cfg: &Config{ - SamplingPercentage: 100.0, - }, - td: singleSpanWithAttrib( - "sampling.priority", - pcommon.NewValueDouble(0)), - }, - { - name: "must_not_sample_string", - cfg: &Config{ - SamplingPercentage: 100.0, - }, - td: singleSpanWithAttrib( - "sampling.priority", - pcommon.NewValueStr("0")), - }, - { - name: "defer_sample_expect_not_sampled", - cfg: &Config{ - SamplingPercentage: 0.0, - }, - td: singleSpanWithAttrib( - "no.sampling.priority", - pcommon.NewValueInt(2)), - }, - { - name: "defer_sample_expect_sampled", - cfg: &Config{ - SamplingPercentage: 100.0, - }, - td: singleSpanWithAttrib( - "no.sampling.priority", - pcommon.NewValueInt(2)), - sampled: true, + key: "n/a", + value: pcommon.NewValueInt(2), + sampled: false, }, } for _, tt := range tests { - for _, mode := range AllModes { - t.Run(tt.name, func(t *testing.T) { + for _, mode := range []SamplerMode{Equalizing, Proportional} { + t.Run(fmt.Sprint(mode, "_", tt.name), func(t *testing.T) { sink := new(consumertest.TracesSink) cfg := &Config{} if tt.cfg != nil { @@ -501,8 +453,9 @@ func Test_tracesamplerprocessor_TraceState(t *testing.T) { cfg.SamplerMode = mode tsp, err := newTracesProcessor(context.Background(), processortest.NewNopCreateSettings(), cfg, sink) require.NoError(t, err) + td := singleSpanWithAttrib(tt.key, tt.value) - err = tsp.ConsumeTraces(context.Background(), tt.td) + err = tsp.ConsumeTraces(context.Background(), td) require.NoError(t, err) sampledData := sink.AllTraces() From 3cdb957e7c44dfa9ba9afbae2ec57b1aa015e31d Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 29 Nov 2023 15:25:01 -0800 Subject: [PATCH 31/38] add version for sampling pkg --- versions.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/versions.yaml b/versions.yaml index a51193928e03..4990bb68c1a2 100644 --- a/versions.yaml +++ b/versions.yaml @@ -55,8 +55,8 @@ module-sets: - github.com/open-telemetry/opentelemetry-collector-contrib/exporter/lokiexporter - github.com/open-telemetry/opentelemetry-collector-contrib/exporter/mezmoexporter - github.com/open-telemetry/opentelemetry-collector-contrib/exporter/opencensusexporter - - github.com/open-telemetry/opentelemetry-collector-contrib/exporter/parquetexporter - github.com/open-telemetry/opentelemetry-collector-contrib/exporter/opensearchexporter + - github.com/open-telemetry/opentelemetry-collector-contrib/exporter/parquetexporter - github.com/open-telemetry/opentelemetry-collector-contrib/exporter/prometheusexporter - github.com/open-telemetry/opentelemetry-collector-contrib/exporter/prometheusremotewriteexporter - github.com/open-telemetry/opentelemetry-collector-contrib/exporter/pulsarexporter @@ -127,6 +127,7 @@ module-sets: - github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatatest - github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatautil - github.com/open-telemetry/opentelemetry-collector-contrib/pkg/resourcetotelemetry + - github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling - github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza - github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl - github.com/open-telemetry/opentelemetry-collector-contrib/pkg/translator/azure From e50684733f8f9f8c0b6f4b488a2f5bf10a5177f1 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Thu, 7 Dec 2023 14:43:34 -0800 Subject: [PATCH 32/38] more testing --- pkg/sampling/oteltracestate.go | 4 +- pkg/sampling/oteltracestate_test.go | 94 +++++----- pkg/sampling/w3ctracestate_test.go | 24 +-- .../probabilisticsamplerprocessor/README.md | 47 +++++ .../probabilisticsamplerprocessor/go.mod | 3 +- .../tracesprocessor.go | 23 +-- .../tracesprocessor_test.go | 165 ++++++++++++++++-- 7 files changed, 264 insertions(+), 96 deletions(-) diff --git a/pkg/sampling/oteltracestate.go b/pkg/sampling/oteltracestate.go index 82354d9a8ef8..8fccd096f56c 100644 --- a/pkg/sampling/oteltracestate.go +++ b/pkg/sampling/oteltracestate.go @@ -22,9 +22,9 @@ type OTelTraceState struct { const ( // RName is the OTel tracestate field for R-value - RName = "r" + RName = "rv" // TName is the OTel tracestate field for T-value - TName = "t" + TName = "th" // hardMaxOTelLength is the maximum encoded size of an OTel // tracestate value. diff --git a/pkg/sampling/oteltracestate_test.go b/pkg/sampling/oteltracestate_test.go index 474cff88077d..97df83f92bc3 100644 --- a/pkg/sampling/oteltracestate_test.go +++ b/pkg/sampling/oteltracestate_test.go @@ -26,7 +26,7 @@ func TestEmptyOTelTraceState(t *testing.T) { } func TestOTelTraceStateTValueSerialize(t *testing.T) { - const orig = "r:10000000000000;t:3;a:b;c:d" + const orig = "rv:10000000000000;th:3;a:b;c:d" otts, err := NewOTelTraceState(orig) require.NoError(t, err) require.True(t, otts.HasTValue()) @@ -44,7 +44,7 @@ func TestOTelTraceStateTValueSerialize(t *testing.T) { } func TestOTelTraceStateZero(t *testing.T) { - const orig = "t:0" + const orig = "th:0" otts, err := NewOTelTraceState(orig) require.NoError(t, err) require.True(t, otts.HasAnyValue()) @@ -60,7 +60,7 @@ func TestOTelTraceStateZero(t *testing.T) { func TestOTelTraceStateRValuePValue(t *testing.T) { // Ensures the caller can handle RValueSizeError and search // for p-value in extra-values. - const orig = "r:3;p:2" + const orig = "rv:3;p:2" otts, err := NewOTelTraceState(orig) require.Error(t, err) require.True(t, errors.Is(err, RValueSizeError("3"))) @@ -77,7 +77,7 @@ func TestOTelTraceStateRValuePValue(t *testing.T) { } func TestOTelTraceStateTValueUpdate(t *testing.T) { - const orig = "r:abcdefabcdefab" + const orig = "rv:abcdefabcdefab" otts, err := NewOTelTraceState(orig) require.NoError(t, err) require.False(t, otts.HasTValue()) @@ -89,7 +89,7 @@ func TestOTelTraceStateTValueUpdate(t *testing.T) { require.Equal(t, "3", otts.TValue()) require.Equal(t, 1-0x3p-4, otts.TValueThreshold().Probability()) - const updated = "r:abcdefabcdefab;t:3" + const updated = "rv:abcdefabcdefab;th:3" var w strings.Builder otts.Serialize(&w) require.Equal(t, updated, w.String()) @@ -106,14 +106,14 @@ func TestOTelTraceStateRTUpdate(t *testing.T) { require.NoError(t, otts.UpdateTValueWithSampling(th, "3")) otts.SetRValue(must(RValueToRandomness("00000000000003"))) - const updated = "r:00000000000003;t:3;a:b" + const updated = "rv:00000000000003;th:3;a:b" var w strings.Builder otts.Serialize(&w) require.Equal(t, updated, w.String()) } func TestOTelTraceStateRTClear(t *testing.T) { - otts, err := NewOTelTraceState("a:b;r:12341234123412;t:1234") + otts, err := NewOTelTraceState("a:b;rv:12341234123412;th:1234") require.NoError(t, err) otts.ClearTValue() @@ -136,32 +136,32 @@ func TestParseOTelTraceState(t *testing.T) { const ns = "" for _, test := range []testCase{ // t-value correct cases - {"t:2", ns, "2", nil, nil}, - {"t:1", ns, "1", nil, nil}, - {"t:1", ns, "1", nil, nil}, - {"t:10", ns, "10", nil, nil}, - {"t:33", ns, "33", nil, nil}, - {"t:ab", ns, "ab", nil, nil}, - {"t:61", ns, "61", nil, nil}, + {"th:2", ns, "2", nil, nil}, + {"th:1", ns, "1", nil, nil}, + {"th:1", ns, "1", nil, nil}, + {"th:10", ns, "10", nil, nil}, + {"th:33", ns, "33", nil, nil}, + {"th:ab", ns, "ab", nil, nil}, + {"th:61", ns, "61", nil, nil}, // syntax errors {"", ns, ns, nil, strconv.ErrSyntax}, - {"t:1;", ns, ns, nil, strconv.ErrSyntax}, - {"t:1=p:2", ns, ns, nil, strconv.ErrSyntax}, - {"t:1;p:2=s:3", ns, ns, nil, strconv.ErrSyntax}, + {"th:1;", ns, ns, nil, strconv.ErrSyntax}, + {"th:1=p:2", ns, ns, nil, strconv.ErrSyntax}, + {"th:1;p:2=s:3", ns, ns, nil, strconv.ErrSyntax}, {":1;p:2=s:3", ns, ns, nil, strconv.ErrSyntax}, {":;p:2=s:3", ns, ns, nil, strconv.ErrSyntax}, {":;:", ns, ns, nil, strconv.ErrSyntax}, {":", ns, ns, nil, strconv.ErrSyntax}, - {"t:;p=1", ns, ns, nil, strconv.ErrSyntax}, - {"t:$", ns, ns, nil, strconv.ErrSyntax}, // not-hexadecimal - {"t:0x1p+3", ns, ns, nil, strconv.ErrSyntax}, // + is invalid - {"t:14.5", ns, ns, nil, strconv.ErrSyntax}, // integer syntax - {"t:-1", ns, ns, nil, strconv.ErrSyntax}, // non-negative + {"th:;p=1", ns, ns, nil, strconv.ErrSyntax}, + {"th:$", ns, ns, nil, strconv.ErrSyntax}, // not-hexadecimal + {"th:0x1p+3", ns, ns, nil, strconv.ErrSyntax}, // + is invalid + {"th:14.5", ns, ns, nil, strconv.ErrSyntax}, // integer syntax + {"th:-1", ns, ns, nil, strconv.ErrSyntax}, // non-negative // too many digits - {"t:ffffffffffffffff", ns, ns, nil, ErrTValueSize}, - {"t:100000000000000", ns, ns, nil, ErrTValueSize}, + {"th:ffffffffffffffff", ns, ns, nil, ErrTValueSize}, + {"th:100000000000000", ns, ns, nil, ErrTValueSize}, // one field {"e100:1", ns, ns, []string{"e100:1"}, nil}, @@ -171,13 +171,13 @@ func TestParseOTelTraceState(t *testing.T) { {"e1:1;e2:2", ns, ns, []string{"e1:1", "e2:2"}, nil}, // one extra key, two ways - {"t:2;extra:stuff", ns, "2", []string{"extra:stuff"}, nil}, - {"extra:stuff;t:2", ns, "2", []string{"extra:stuff"}, nil}, + {"th:2;extra:stuff", ns, "2", []string{"extra:stuff"}, nil}, + {"extra:stuff;th:2", ns, "2", []string{"extra:stuff"}, nil}, // two extra fields - {"e100:100;t:1;e101:101", ns, "1", []string{"e100:100", "e101:101"}, nil}, - {"t:1;e100:100;e101:101", ns, "1", []string{"e100:100", "e101:101"}, nil}, - {"e100:100;e101:101;t:1", ns, "1", []string{"e100:100", "e101:101"}, nil}, + {"e100:100;th:1;e101:101", ns, "1", []string{"e100:100", "e101:101"}, nil}, + {"th:1;e100:100;e101:101", ns, "1", []string{"e100:100", "e101:101"}, nil}, + {"e100:100;e101:101;th:1", ns, "1", []string{"e100:100", "e101:101"}, nil}, // parse error prevents capturing unrecognized keys {"1:1;u:V", ns, ns, nil, strconv.ErrSyntax}, @@ -185,15 +185,15 @@ func TestParseOTelTraceState(t *testing.T) { {"x:1;u:V", ns, ns, []string{"x:1", "u:V"}, nil}, // r-value - {"r:22222222222222;extra:stuff", "22222222222222", ns, []string{"extra:stuff"}, nil}, - {"extra:stuff;r:22222222222222", "22222222222222", ns, []string{"extra:stuff"}, nil}, - {"r:ffffffffffffff", "ffffffffffffff", ns, nil, nil}, - {"r:88888888888888", "88888888888888", ns, nil, nil}, - {"r:00000000000000", "00000000000000", ns, nil, nil}, + {"rv:22222222222222;extra:stuff", "22222222222222", ns, []string{"extra:stuff"}, nil}, + {"extra:stuff;rv:22222222222222", "22222222222222", ns, []string{"extra:stuff"}, nil}, + {"rv:ffffffffffffff", "ffffffffffffff", ns, nil, nil}, + {"rv:88888888888888", "88888888888888", ns, nil, nil}, + {"rv:00000000000000", "00000000000000", ns, nil, nil}, // r-value range error (15 bytes of hex or more) - {"r:100000000000000", ns, ns, nil, RValueSizeError("100000000000000")}, - {"r:fffffffffffffffff", ns, ns, nil, RValueSizeError("fffffffffffffffff")}, + {"rv:100000000000000", ns, ns, nil, RValueSizeError("100000000000000")}, + {"rv:fffffffffffffffff", ns, ns, nil, RValueSizeError("fffffffffffffffff")}, // no trailing ; {"x:1;", ns, ns, nil, strconv.ErrSyntax}, @@ -203,7 +203,7 @@ func TestParseOTelTraceState(t *testing.T) { // charset test {"x:0X1FFF;y:.-_-.;z:", ns, ns, []string{"x:0X1FFF", "y:.-_-.", "z:"}, nil}, - {"x1y2z3:1-2-3;y1:y_1;xy:-;t:50", ns, "50", []string{"x1y2z3:1-2-3", "y1:y_1", "xy:-"}, nil}, + {"x1y2z3:1-2-3;y1:y_1;xy:-;th:50", ns, "50", []string{"x1y2z3:1-2-3", "y1:y_1", "xy:-"}, nil}, // size exceeded {"x:" + strings.Repeat("_", 255), ns, ns, nil, ErrTraceStateSize}, @@ -277,34 +277,34 @@ func TestUpdateTValueWithSampling(t *testing.T) { } for _, test := range []testCase{ // 8/16 in, sampled at (0x10-0xe)/0x10 = 2/16 => adjCount 8 - {"t:8", 2, 0x2p-4, nil, "t:e", 8}, + {"th:8", 2, 0x2p-4, nil, "th:e", 8}, // 8/16 in, sampled at 14/16 => no update, adjCount 2 - {"t:8", 2, 0xep-4, nil, "t:8", 2}, + {"th:8", 2, 0xep-4, nil, "th:8", 2}, // 1/16 in, 50% update (error) - {"t:f", 16, 0x8p-4, ErrInconsistentSampling, "t:f", 16}, + {"th:f", 16, 0x8p-4, ErrInconsistentSampling, "th:f", 16}, // 1/1 sampling in, 1/16 update - {"t:0", 1, 0x1p-4, nil, "t:f", 16}, + {"th:0", 1, 0x1p-4, nil, "th:f", 16}, // no t-value in, 1/16 update - {"", 0, 0x1p-4, nil, "t:f", 16}, + {"", 0, 0x1p-4, nil, "th:f", 16}, // none in, 100% update - {"", 0, 1, nil, "t:0", 1}, + {"", 0, 1, nil, "th:0", 1}, // 1/2 in, 100% update (error) - {"t:8", 2, 1, ErrInconsistentSampling, "t:8", 2}, + {"th:8", 2, 1, ErrInconsistentSampling, "th:8", 2}, // 1/1 in, 0x1p-56 update - {"t:0", 1, 0x1p-56, nil, "t:ffffffffffffff", 0x1p56}, + {"th:0", 1, 0x1p-56, nil, "th:ffffffffffffff", 0x1p56}, // 1/1 in, 0x1p-56 update - {"t:0", 1, 0x1p-56, nil, "t:ffffffffffffff", 0x1p56}, + {"th:0", 1, 0x1p-56, nil, "th:ffffffffffffff", 0x1p56}, // 2/3 in, 1/3 update. Note that 0x555 + 0xaab = 0x1000. - {"t:555", 1 / (1 - 0x555p-12), 0x555p-12, nil, "t:aab", 1 / (1 - 0xaabp-12)}, + {"th:555", 1 / (1 - 0x555p-12), 0x555p-12, nil, "th:aab", 1 / (1 - 0xaabp-12)}, } { t.Run(test.in+"/"+test.out, func(t *testing.T) { otts := OTelTraceState{} diff --git a/pkg/sampling/w3ctracestate_test.go b/pkg/sampling/w3ctracestate_test.go index d5a1eef5ec2a..ae06ddfff3fa 100644 --- a/pkg/sampling/w3ctracestate_test.go +++ b/pkg/sampling/w3ctracestate_test.go @@ -23,25 +23,25 @@ func TestParseW3CTraceState(t *testing.T) { const ns = "" for _, test := range []testCase{ // correct cases - {"ot=t:1", ns, "1", nil, nil}, - {" ot=t:1 ", ns, "1", nil, nil}, - {"ot=t:1", ns, "1", nil, nil}, - {" ot=t:1 ", ns, "1", nil, nil}, - {" ot=t:1,other=value ", ns, "1", map[string]string{ + {"ot=th:1", ns, "1", nil, nil}, + {" ot=th:1 ", ns, "1", nil, nil}, + {"ot=th:1", ns, "1", nil, nil}, + {" ot=th:1 ", ns, "1", nil, nil}, + {" ot=th:1,other=value ", ns, "1", map[string]string{ "other": "value", }, nil}, - {"ot=t:1 , other=value", ns, "1", map[string]string{ + {"ot=th:1 , other=value", ns, "1", map[string]string{ "other": "value", }, nil}, {",,,", ns, ns, nil, nil}, - {" , ot=t:1, , other=value ", ns, "1", map[string]string{ + {" , ot=th:1, , other=value ", ns, "1", map[string]string{ "other": "value", }, nil}, - {"ot=t:100;r:abcdabcdabcdff", "abcdabcdabcdff", "100", nil, nil}, - {" ot=t:100;r:abcdabcdabcdff", "abcdabcdabcdff", "100", nil, nil}, - {"ot=t:100;r:abcdabcdabcdff ", "abcdabcdabcdff", "100", nil, nil}, - {"ot=r:11111111111111", "11111111111111", ns, nil, nil}, - {"ot=r:ffffffffffffff,unknown=value,other=something", "ffffffffffffff", ns, map[string]string{ + {"ot=th:100;rv:abcdabcdabcdff", "abcdabcdabcdff", "100", nil, nil}, + {" ot=th:100;rv:abcdabcdabcdff", "abcdabcdabcdff", "100", nil, nil}, + {"ot=th:100;rv:abcdabcdabcdff ", "abcdabcdabcdff", "100", nil, nil}, + {"ot=rv:11111111111111", "11111111111111", ns, nil, nil}, + {"ot=rv:ffffffffffffff,unknown=value,other=something", "ffffffffffffff", ns, map[string]string{ "other": "something", "unknown": "value", }, nil}, diff --git a/processor/probabilisticsamplerprocessor/README.md b/processor/probabilisticsamplerprocessor/README.md index 8efe43dd3cb5..c46b4b90f810 100644 --- a/processor/probabilisticsamplerprocessor/README.md +++ b/processor/probabilisticsamplerprocessor/README.md @@ -54,6 +54,7 @@ The following configuration options can be modified: - `attribute_source` (default = traceID, optional): defines where to look for the attribute in from_attribute. The allowed values are `traceID` or `record`. - `from_attribute` (default = null, optional): The optional name of a log record attribute used for sampling purposes, such as a unique log record ID. The value of the attribute is only used if the trace ID is absent or if `attribute_source` is set to `record`. - `sampling_priority` (default = null, optional): The optional name of a log record attribute used to set a different sampling priority from the `sampling_percentage` setting. 0 means to never sample the log record, and >= 100 means to always sample the log record. +- `sampler_mode` (default = "", optional): The optional sampling mode. One of "hash_seed", "equalizing", and "propotional". By default, when not explicitly set, if "hash_seed" is non-zero, the "hash_seed" mode will be configured, otherwise the "proportional" mode is selected. ## Hashing @@ -68,6 +69,7 @@ Sample 15% of the logs: ```yaml processors: probabilistic_sampler: + sampler_mode: hash_seed sampling_percentage: 15 ``` @@ -76,6 +78,7 @@ Sample logs according to their logID attribute: ```yaml processors: probabilistic_sampler: + sampler_mode: hash_seed sampling_percentage: 15 attribute_source: record # possible values: one of record or traceID from_attribute: logID # value is required if the source is not traceID @@ -86,10 +89,54 @@ Sample logs according to the attribute `priority`: ```yaml processors: probabilistic_sampler: + sampler_mode: hash_seed sampling_percentage: 15 sampling_priority: priority ``` +## Consistent Probability Sampling + +This processor includes an implementation of the tail sampling logic +described in [OTEP +235](https://github.com/open-telemetry/oteps/pull/235), which encodes +probability sampling information in the OpenTelemetry sub-field of the +W3C TraceState. There are two modes supported. + +- `proportional`: In this mode, the `sampling_percentage` + configuration is applied such that the number of spans exiting the + sampling is proportional to the number of spans entering the + sampling, regardless of how much sampling was already applied. All + sampling percentages are valid in this mode. +- `equalizing`: In this mode, the `sampling_percentage` configuration + is applied such that spans exit the sampler reduced to a minimum + probability. When spans arrive with probability equal to the + configured sampling percentage, the spans pass through unmodified. + When spans arrive with probability smaller than the configured + sampling percentage, errors are reported. When spans arrive with + larger probability than the configured sampling percentage, they + will be reduced in number as spans exit with the configured sampling + percentage. + +For example, to configure the proportional sampler, simply omit the +`hash_seed` field: + +``` +processors: + probabilistic_sampler: + # no hash_seed is set, uses proportional consistent by default + sampling_percentage: 10 +``` + +For example, to configure an equalizing sampler, set the mode explicitly: + +``` +processors: + probabilistic_sampler: + sampler_mode: equalizing + sampling_percentage: 10 +``` + +## Detailed examples Refer to [config.yaml](./testdata/config.yaml) for detailed examples on using the processor. diff --git a/processor/probabilisticsamplerprocessor/go.mod b/processor/probabilisticsamplerprocessor/go.mod index 19d3eb6d25a2..b814f591197f 100644 --- a/processor/probabilisticsamplerprocessor/go.mod +++ b/processor/probabilisticsamplerprocessor/go.mod @@ -4,6 +4,7 @@ go 1.20 require ( github.com/open-telemetry/opentelemetry-collector-contrib/internal/coreinternal v0.90.0 + github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling v0.90.0 github.com/stretchr/testify v1.8.4 go.opencensus.io v0.24.0 go.opentelemetry.io/collector/component v0.90.0 @@ -46,7 +47,6 @@ require ( github.com/mitchellh/reflectwalk v1.0.2 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect - github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling v0.0.0-00010101000000-000000000000 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect github.com/prometheus/client_golang v1.17.0 // indirect @@ -113,4 +113,3 @@ replace github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatatest replace github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling => ../../pkg/sampling replace github.com/open-telemetry/opentelemetry-collector-contrib/pkg/golden => ../../pkg/golden - diff --git a/processor/probabilisticsamplerprocessor/tracesprocessor.go b/processor/probabilisticsamplerprocessor/tracesprocessor.go index 0880da4f657a..200dae7ca0f5 100644 --- a/processor/probabilisticsamplerprocessor/tracesprocessor.go +++ b/processor/probabilisticsamplerprocessor/tracesprocessor.go @@ -6,7 +6,6 @@ package probabilisticsamplerprocessor // import "github.com/open-telemetry/opent import ( "context" "fmt" - "math/rand" "strconv" "strings" @@ -100,25 +99,14 @@ func randomnessFromSpan(s ptrace.Span) (sampling.Randomness, *sampling.W3CTraceS if err == nil && wts.OTelValue().HasRValue() { // When the tracestate is OK and has r-value, use it. randomness = wts.OTelValue().RValueRandomness() - } else if true /* s.Flags()&0x2 == 0x2 */ { + } else { // See https://github.com/open-telemetry/opentelemetry-proto/pull/503 // which merged but unreleased at the time of writing. + // + // When we have an additional flag indicating this + // randomness is present we should inspect the flag + // and return that no randomness is available, here. randomness = sampling.TraceIDToRandomness(s.TraceID()) - } else { - // Note: Creating an R-value here is the best we can - // do. Issue a warning? This is OK-ish for head - // sampling but kind of nonsense for tail sampling. - // This is especially nonsense if the caller has set a - // T-value already, (TODO: is it better to just assume - // the flag was set in a tail sampler? otherwise, - // inconsistent results) - randomness, _ = sampling.RValueToRandomness( - strconv.FormatUint( - sampling.MaxAdjustedCount+ - uint64(rand.Int63n(sampling.MaxAdjustedCount)), - 16)[1:], - ) - wts.OTelValue().SetRValue(randomness) } return randomness, &wts, err } @@ -241,6 +229,7 @@ func (ts *traceProportionalizer) decide(s ptrace.Span) (bool, *sampling.W3CTrace if otts.HasTValue() { incoming = otts.TValueThreshold().Probability() } + threshold, _ := sampling.ProbabilityToThreshold(incoming * ts.ratio) should := threshold.ShouldSample(rnd) if should { diff --git a/processor/probabilisticsamplerprocessor/tracesprocessor_test.go b/processor/probabilisticsamplerprocessor/tracesprocessor_test.go index 705809dc7895..e968620944a8 100644 --- a/processor/probabilisticsamplerprocessor/tracesprocessor_test.go +++ b/processor/probabilisticsamplerprocessor/tracesprocessor_test.go @@ -20,6 +20,7 @@ import ( conventions "go.opentelemetry.io/collector/semconv/v1.6.1" "github.com/open-telemetry/opentelemetry-collector-contrib/internal/coreinternal/idutils" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling" ) func TestNewTracesProcessor(t *testing.T) { @@ -403,43 +404,158 @@ func Test_parseSpanSamplingPriority(t *testing.T) { // tracestate is correct. func Test_tracesamplerprocessor_TraceState(t *testing.T) { sid := idutils.UInt64ToSpanID(0xfefefefe) - singleSpanWithAttrib := func(key string, attribValue pcommon.Value) ptrace.Traces { + singleSpanWithAttrib := func(ts, key string, attribValue pcommon.Value) ptrace.Traces { traces := ptrace.NewTraces() span := traces.ResourceSpans().AppendEmpty().ScopeSpans().AppendEmpty().Spans().AppendEmpty() - span.TraceState().FromRaw("") + span.TraceState().FromRaw(ts) + // This hard-coded TraceID will sample at 50% and not at 49%. + // The equivalent randomness is 0x80000000000000. span.SetTraceID(pcommon.TraceID{ // Don't care (9 bytes) 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, // Trace randomness (7 bytes) 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }) + attribValue.CopyTo(span.Attributes().PutEmpty(key)) span.SetSpanID(sid) return traces } tests := []struct { - name string - cfg *Config - key string - value pcommon.Value - sampled bool + name string + cfg *Config + ts string + key string + value pcommon.Value + + // returns sampled, adjustedCount + sf func(SamplerMode) (bool, float64, string) }{ { - name: "yes_sample", + name: "yes_sample_tid", cfg: &Config{ SamplingPercentage: 50, }, - key: "n/a", - value: pcommon.NewValueInt(2), - sampled: true, + ts: "", + key: "n/a", + value: pcommon.NewValueInt(2), + sf: func(SamplerMode) (bool, float64, string) { return true, 2, "ot=th:8" }, + }, + { + name: "yes_sample_rv1", + cfg: &Config{ + SamplingPercentage: 1, + }, + // 99/100 = .FD70A3D70A3D70A3D + ts: "ot=rv:FD70A3D70A3D71", // note upper case passes through, is not generated + key: "n/a", + value: pcommon.NewValueInt(2), + sf: func(SamplerMode) (bool, float64, string) { + return true, 1 / 0.01, "ot=rv:FD70A3D70A3D71;th:fd70a3d70a3d71" + }, + }, + { + name: "no_sample_rv2", + cfg: &Config{ + SamplingPercentage: 1, + }, + ts: "ot=rv:FD70A3D70A3D70", + key: "n/a", + value: pcommon.NewValueInt(2), }, { name: "no_sample", cfg: &Config{ SamplingPercentage: 49, }, - key: "n/a", - value: pcommon.NewValueInt(2), - sampled: false, + key: "n/a", + value: pcommon.NewValueInt(2), + }, + { + name: "no_sample_rv1", + cfg: &Config{ + SamplingPercentage: 1, + }, + // 99/100 = .FD70A3D70A3D70A3D + ts: "ot=rv:FD70A3D70A3D70", + key: "n/a", + value: pcommon.NewValueInt(2), + }, + { + name: "yes_sample_rv2", + cfg: &Config{ + SamplingPercentage: 1, + }, + // 99/100 = .FD70A3D70A3D70A3D + ts: "ot=rv:fd70B000000000", + key: "n/a", + value: pcommon.NewValueInt(2), + sf: func(SamplerMode) (bool, float64, string) { + return true, 1 / 0.01, "ot=rv:fd70B000000000;th:fd70a3d70a3d71" + }, + }, + { + name: "yes_sample_priority", + cfg: &Config{ + SamplingPercentage: 1, + }, + ts: "", + key: "sampling.priority", + value: pcommon.NewValueInt(2), + sf: func(SamplerMode) (bool, float64, string) { return true, 0, "" }, + }, + { + name: "no_sample_priority", + cfg: &Config{ + SamplingPercentage: 99, + }, + ts: "", + key: "sampling.priority", + value: pcommon.NewValueInt(0), + }, + { + name: "incoming_50", + cfg: &Config{ + SamplingPercentage: 50, + }, + ts: "ot=rv:90000000000000;th:80000000000000", // note extra zeros! + key: "n/a", + value: pcommon.NewValueInt(2), + sf: func(mode SamplerMode) (bool, float64, string) { + if mode == Equalizing { + return true, 2, "ot=rv:90000000000000;th:8" + } + return false, 0, "" + }, + }, + { + name: "norvalue_50", + cfg: &Config{ + SamplingPercentage: 50, + }, + ts: "ot=th:8", + key: "n/a", + value: pcommon.NewValueInt(2), + sf: func(mode SamplerMode) (bool, float64, string) { + if mode == Equalizing { + return true, 2, "ot=th:8" + } + return false, 0, "" + }, + }, + { + name: "incoming_rvalue_99", + cfg: &Config{ + SamplingPercentage: 50, + }, + ts: "ot=rv:c0000000000000;th:8", + key: "n/a", + value: pcommon.NewValueInt(2), + sf: func(mode SamplerMode) (bool, float64, string) { + if mode == Equalizing { + return true, 2, "ot=rv:c0000000000000;th:8" + } + return true, 4, "ot=rv:c0000000000000;th:c" + }, }, } for _, tt := range tests { @@ -453,18 +569,35 @@ func Test_tracesamplerprocessor_TraceState(t *testing.T) { cfg.SamplerMode = mode tsp, err := newTracesProcessor(context.Background(), processortest.NewNopCreateSettings(), cfg, sink) require.NoError(t, err) - td := singleSpanWithAttrib(tt.key, tt.value) + td := singleSpanWithAttrib(tt.ts, tt.key, tt.value) err = tsp.ConsumeTraces(context.Background(), td) require.NoError(t, err) sampledData := sink.AllTraces() - if tt.sampled { + + var expectSampled bool + var expectCount float64 + var expectTS string + if tt.sf != nil { + expectSampled, expectCount, expectTS = tt.sf(mode) + } + if expectSampled { require.Equal(t, 1, len(sampledData)) assert.Equal(t, 1, sink.SpanCount()) + got := sink.AllTraces()[0].ResourceSpans().At(0).ScopeSpans().At(0).Spans().At(0) + gotTs, err := sampling.NewW3CTraceState(got.TraceState().AsRaw()) + require.NoError(t, err) + if expectCount == 0 { + assert.Equal(t, 0.0, gotTs.OTelValue().AdjustedCount()) + } else { + assert.InEpsilon(t, expectCount, gotTs.OTelValue().AdjustedCount(), 1e-9) + } + require.Equal(t, expectTS, got.TraceState().AsRaw()) } else { require.Equal(t, 0, len(sampledData)) assert.Equal(t, 0, sink.SpanCount()) + require.Equal(t, "", expectTS) } }) } From 2cddfeb512974e6e257983e60fc0a663ae9102ab Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Thu, 7 Dec 2023 16:16:10 -0800 Subject: [PATCH 33/38] add probability to threshold with precision option --- pkg/sampling/probability.go | 17 ++++ .../probabilisticsamplerprocessor/config.go | 11 +++ .../probabilisticsamplerprocessor/factory.go | 7 +- .../tracesprocessor.go | 13 ++- .../tracesprocessor_test.go | 87 ++++++++++++------- 5 files changed, 101 insertions(+), 34 deletions(-) diff --git a/pkg/sampling/probability.go b/pkg/sampling/probability.go index 265079bca9bd..7ab83b6e6dfb 100644 --- a/pkg/sampling/probability.go +++ b/pkg/sampling/probability.go @@ -5,6 +5,7 @@ package sampling import ( "errors" + "fmt" "math" ) @@ -25,6 +26,7 @@ func ProbabilityToThreshold(prob float64) (Threshold, error) { if !probabilityInRange(prob) { return AlwaysSampleThreshold, ErrProbabilityRange } + scaled := uint64(math.Round(prob * MaxAdjustedCount)) return Threshold{ @@ -32,6 +34,21 @@ func ProbabilityToThreshold(prob float64) (Threshold, error) { }, nil } +func ProbabilityToThresholdWithPrecision(prob float64, prec uint8) (Threshold, error) { + th, err := ProbabilityToThreshold(prob) + if err != nil || prec == 0 || prec > 14 { + return th, err + } + scaled := th.unsigned + divisor := uint64(1) << (4 * (14 - prec)) + rescaled := uint64(math.Round(float64(scaled/divisor)) * float64(divisor)) + + fmt.Printf("SCALED %x %x %x\n", scaled, divisor, rescaled) + return Threshold{ + unsigned: rescaled, + }, nil +} + // Probability is the sampling ratio in the range [MinSamplingProb, 1]. func (t Threshold) Probability() float64 { return float64(MaxAdjustedCount-t.unsigned) / MaxAdjustedCount diff --git a/processor/probabilisticsamplerprocessor/config.go b/processor/probabilisticsamplerprocessor/config.go index b909c786d6b6..9bb1a923c957 100644 --- a/processor/probabilisticsamplerprocessor/config.go +++ b/processor/probabilisticsamplerprocessor/config.go @@ -81,6 +81,10 @@ type Config struct { // SamplingPriority (logs only) allows to use a log record attribute designed by the `sampling_priority` key // to be used as the sampling priority of the log record. SamplingPriority string `mapstructure:"sampling_priority"` + + // How many hex digits of th: value to use, max, from 1 up to + // 14. Default is 3. + SamplingPrecision uint8 `mapstructure:"sampling_precision"` } var _ component.Config = (*Config)(nil) @@ -105,5 +109,12 @@ func (cfg *Config) Validate() error { if cfg.AttributeSource != "" && !validAttributeSource[cfg.AttributeSource] { return fmt.Errorf("invalid attribute source: %v. Expected: %v or %v", cfg.AttributeSource, traceIDAttributeSource, recordAttributeSource) } + + if cfg.SamplingPrecision == 0 { + return fmt.Errorf("invalid sampling precision: 0") + } else if cfg.SamplingPrecision > sampling.NumHexDigits { + return fmt.Errorf("sampling precision is too great, should be <= 14: %d", cfg.SamplingPrecision) + } + return nil } diff --git a/processor/probabilisticsamplerprocessor/factory.go b/processor/probabilisticsamplerprocessor/factory.go index 61cfdb555d15..25d5bd1d6f8c 100644 --- a/processor/probabilisticsamplerprocessor/factory.go +++ b/processor/probabilisticsamplerprocessor/factory.go @@ -20,6 +20,8 @@ import ( var onceMetrics sync.Once +const defaultPrecision = 3 + // NewFactory returns a new factory for the Probabilistic sampler processor. func NewFactory() processor.Factory { onceMetrics.Do(func() { @@ -36,8 +38,9 @@ func NewFactory() processor.Factory { func createDefaultConfig() component.Config { return &Config{ - AttributeSource: defaultAttributeSource, - SamplerMode: modeUnset, + AttributeSource: defaultAttributeSource, + SamplerMode: modeUnset, + SamplingPrecision: defaultPrecision, } } diff --git a/processor/probabilisticsamplerprocessor/tracesprocessor.go b/processor/probabilisticsamplerprocessor/tracesprocessor.go index 200dae7ca0f5..cb8cdda793a2 100644 --- a/processor/probabilisticsamplerprocessor/tracesprocessor.go +++ b/processor/probabilisticsamplerprocessor/tracesprocessor.go @@ -83,6 +83,7 @@ type traceEqualizer struct { // traceEqualizer adjusts thresholds relatively. Cannot be used with zero. type traceProportionalizer struct { ratio float64 + prec uint8 } // zeroProbability is a bypass for all cases with Percent==0. @@ -149,7 +150,7 @@ func newTracesProcessor(ctx context.Context, set processor.CreateSettings, cfg * tp.sampler = ts case Equalizing: - threshold, err := sampling.ProbabilityToThreshold(ratio) + threshold, err := sampling.ProbabilityToThresholdWithPrecision(ratio, cfg.SamplingPrecision) if err != nil { return nil, err } @@ -161,6 +162,7 @@ func newTracesProcessor(ctx context.Context, set processor.CreateSettings, cfg * case Proportional: tp.sampler = &traceProportionalizer{ ratio: ratio, + prec: cfg.SamplingPrecision, } } } @@ -224,13 +226,18 @@ func (ts *traceProportionalizer) decide(s ptrace.Span) (bool, *sampling.W3CTrace // TODO: Configure fail-open vs fail-closed? return true, nil, err } - incoming := 1.0 otts := wts.OTelValue() + if otts.HasTValue() && !otts.TValueThreshold().ShouldSample(rnd) { + err = ErrInconsistentArrivingTValue + otts.ClearTValue() + } + + incoming := 1.0 if otts.HasTValue() { incoming = otts.TValueThreshold().Probability() } - threshold, _ := sampling.ProbabilityToThreshold(incoming * ts.ratio) + threshold, _ := sampling.ProbabilityToThresholdWithPrecision(incoming*ts.ratio, ts.prec) should := threshold.ShouldSample(rnd) if should { _ = otts.UpdateTValueWithSampling(threshold, threshold.TValue()) diff --git a/processor/probabilisticsamplerprocessor/tracesprocessor_test.go b/processor/probabilisticsamplerprocessor/tracesprocessor_test.go index e968620944a8..90c3d590bb54 100644 --- a/processor/probabilisticsamplerprocessor/tracesprocessor_test.go +++ b/processor/probabilisticsamplerprocessor/tracesprocessor_test.go @@ -416,7 +416,9 @@ func Test_tracesamplerprocessor_TraceState(t *testing.T) { // Trace randomness (7 bytes) 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }) - attribValue.CopyTo(span.Attributes().PutEmpty(key)) + if key != "" { + attribValue.CopyTo(span.Attributes().PutEmpty(key)) + } span.SetSpanID(sid) return traces } @@ -430,15 +432,23 @@ func Test_tracesamplerprocessor_TraceState(t *testing.T) { // returns sampled, adjustedCount sf func(SamplerMode) (bool, float64, string) }{ + { + name: "simple_100", + cfg: &Config{ + SamplingPercentage: 100, + }, + ts: "", + sf: func(SamplerMode) (bool, float64, string) { + return true, 1, "ot=th:0" + }, + }, { name: "yes_sample_tid", cfg: &Config{ SamplingPercentage: 50, }, - ts: "", - key: "n/a", - value: pcommon.NewValueInt(2), - sf: func(SamplerMode) (bool, float64, string) { return true, 2, "ot=th:8" }, + ts: "", + sf: func(SamplerMode) (bool, float64, string) { return true, 2, "ot=th:8" }, }, { name: "yes_sample_rv1", @@ -446,9 +456,7 @@ func Test_tracesamplerprocessor_TraceState(t *testing.T) { SamplingPercentage: 1, }, // 99/100 = .FD70A3D70A3D70A3D - ts: "ot=rv:FD70A3D70A3D71", // note upper case passes through, is not generated - key: "n/a", - value: pcommon.NewValueInt(2), + ts: "ot=rv:FD70A3D70A3D71", // note upper case passes through, is not generated sf: func(SamplerMode) (bool, float64, string) { return true, 1 / 0.01, "ot=rv:FD70A3D70A3D71;th:fd70a3d70a3d71" }, @@ -458,17 +466,13 @@ func Test_tracesamplerprocessor_TraceState(t *testing.T) { cfg: &Config{ SamplingPercentage: 1, }, - ts: "ot=rv:FD70A3D70A3D70", - key: "n/a", - value: pcommon.NewValueInt(2), + ts: "ot=rv:FD70A3D70A3D70", }, { name: "no_sample", cfg: &Config{ SamplingPercentage: 49, }, - key: "n/a", - value: pcommon.NewValueInt(2), }, { name: "no_sample_rv1", @@ -476,9 +480,7 @@ func Test_tracesamplerprocessor_TraceState(t *testing.T) { SamplingPercentage: 1, }, // 99/100 = .FD70A3D70A3D70A3D - ts: "ot=rv:FD70A3D70A3D70", - key: "n/a", - value: pcommon.NewValueInt(2), + ts: "ot=rv:FD70A3D70A3D70", }, { name: "yes_sample_rv2", @@ -486,9 +488,7 @@ func Test_tracesamplerprocessor_TraceState(t *testing.T) { SamplingPercentage: 1, }, // 99/100 = .FD70A3D70A3D70A3D - ts: "ot=rv:fd70B000000000", - key: "n/a", - value: pcommon.NewValueInt(2), + ts: "ot=rv:fd70B000000000", sf: func(SamplerMode) (bool, float64, string) { return true, 1 / 0.01, "ot=rv:fd70B000000000;th:fd70a3d70a3d71" }, @@ -517,9 +517,7 @@ func Test_tracesamplerprocessor_TraceState(t *testing.T) { cfg: &Config{ SamplingPercentage: 50, }, - ts: "ot=rv:90000000000000;th:80000000000000", // note extra zeros! - key: "n/a", - value: pcommon.NewValueInt(2), + ts: "ot=rv:90000000000000;th:80000000000000", // note extra zeros! sf: func(mode SamplerMode) (bool, float64, string) { if mode == Equalizing { return true, 2, "ot=rv:90000000000000;th:8" @@ -532,9 +530,7 @@ func Test_tracesamplerprocessor_TraceState(t *testing.T) { cfg: &Config{ SamplingPercentage: 50, }, - ts: "ot=th:8", - key: "n/a", - value: pcommon.NewValueInt(2), + ts: "ot=th:8", sf: func(mode SamplerMode) (bool, float64, string) { if mode == Equalizing { return true, 2, "ot=th:8" @@ -543,13 +539,11 @@ func Test_tracesamplerprocessor_TraceState(t *testing.T) { }, }, { - name: "incoming_rvalue_99", + name: "incoming_rvalue_75", cfg: &Config{ SamplingPercentage: 50, }, - ts: "ot=rv:c0000000000000;th:8", - key: "n/a", - value: pcommon.NewValueInt(2), + ts: "ot=rv:c0000000000000;th:8", sf: func(mode SamplerMode) (bool, float64, string) { if mode == Equalizing { return true, 2, "ot=rv:c0000000000000;th:8" @@ -557,6 +551,41 @@ func Test_tracesamplerprocessor_TraceState(t *testing.T) { return true, 4, "ot=rv:c0000000000000;th:c" }, }, + { + name: "inconsistent_yes1", + cfg: &Config{ + SamplingPercentage: 100, + }, + ts: "ot=rv:40000000000000;th:8", + sf: func(SamplerMode) (bool, float64, string) { + return true, 1, "ot=rv:40000000000000;th:0" + }, + }, + { + name: "inconsistent_no1", + cfg: &Config{ + SamplingPercentage: 1, + }, + ts: "ot=rv:40000000000000;th:8", + sf: func(SamplerMode) (bool, float64, string) { + return false, 0, "" + }, + }, + { + name: "precision_3", + cfg: &Config{ + SamplingPercentage: 40, + SamplingPrecision: 3, + }, + ts: "ot=rv:a0000000000000", // ;th:8 TODO fix + // this tolerance (b/c prec) then add a test + // like this with inconsistent-yes i.e., + // testng that th:8 is discarded before th:999 + // is added + sf: func(SamplerMode) (bool, float64, string) { + return true, 1 / 0.4, "ot=rv:a0000000000000;th:999" + }, + }, } for _, tt := range tests { for _, mode := range []SamplerMode{Equalizing, Proportional} { From f69d6ee48c2d20d45f2198c4202e02d4e5d405c4 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Fri, 8 Dec 2023 13:23:42 -0800 Subject: [PATCH 34/38] ProbabilityToThresholdWithPrecision --- pkg/sampling/encoding_test.go | 96 +++++++++++++++++++++++++++++++++++ pkg/sampling/probability.go | 43 ++++++++++++---- 2 files changed, 130 insertions(+), 9 deletions(-) diff --git a/pkg/sampling/encoding_test.go b/pkg/sampling/encoding_test.go index 6975cabb045b..84a6dfcb80e5 100644 --- a/pkg/sampling/encoding_test.go +++ b/pkg/sampling/encoding_test.go @@ -229,6 +229,102 @@ func TestTValueSyntax(t *testing.T) { } } +func TestProbabilityToThresholdWithPrecision(t *testing.T) { + type kase struct { + prob float64 + exact string + rounded []string + } + + for _, test := range []kase{ + // Note: remember 8 is half of 16: hex rounds up at 8+, down at 7-. + { + 1 - 0x456789ap-28, + "456789a", + []string{ + "45678a", + "45679", + "4568", + "456", + "45", + "4", + }, + }, + // Add 3 leading zeros + { + 1 - 0x456789ap-40, + "000456789a", + []string{ + "00045678a", + "00045679", + "0004568", + "000456", + "00045", + "0004", + }, + }, + // Rounding up + { + 1 - 0x789abcdefp-40, + "0789abcdef", + []string{ + "0789abcdef", + "0789abcdf", + "0789abce", + "0789abd", + "0789ac", + "0789b", + "078a", + "079", + "08", + }, + }, + // Rounding down + { + 1 - 0x12345678p-32, + "12345678", + []string{ + "1234568", + "123456", + "12345", + "1234", + "123", + "12", + "1", + }, + }, + // Zeros + { + 1 - 0x80801p-28, + "0080801", + []string{ + "00808", + "008", + }, + }, + } { + t.Run(test.exact, func(t *testing.T) { + th, err := ProbabilityToThreshold(test.prob) + require.NoError(t, err) + require.Equal(t, th.TValue(), test.exact) + + for _, round := range test.rounded { + t.Run(round, func(t *testing.T) { + // Requested precision is independent of leading zeros, + // so strip them to calculate test precision. + strip := round + for strip[0] == '0' { + strip = strip[1:] + } + rth, err := ProbabilityToThresholdWithPrecision(test.prob, uint8(len(strip))) + require.NoError(t, err) + require.Equal(t, round, rth.TValue()) + }) + } + }) + } +} + // There were two benchmarks used to choose the implementation for the // Threshold type in this package. The results indicate that it is // faster to compare a 56-bit number than to compare as 7 element diff --git a/pkg/sampling/probability.go b/pkg/sampling/probability.go index 7ab83b6e6dfb..0d723b28737a 100644 --- a/pkg/sampling/probability.go +++ b/pkg/sampling/probability.go @@ -5,13 +5,15 @@ package sampling import ( "errors" - "fmt" "math" ) // ErrProbabilityRange is returned when a value should be in the range [1/MaxAdjustedCount, 1]. var ErrProbabilityRange = errors.New("sampling probability out of range (0x1p-56 <= valid <= 1)") +// ErrPrecisionUnderflow is returned when a precision is too great for the range. +var ErrPrecisionUnderflow = errors.New("sampling precision underflow") + // MinSamplingProbability is the smallest representable probability // and is the inverse of MaxAdjustedCount. const MinSamplingProbability = 1.0 / MaxAdjustedCount @@ -35,17 +37,40 @@ func ProbabilityToThreshold(prob float64) (Threshold, error) { } func ProbabilityToThresholdWithPrecision(prob float64, prec uint8) (Threshold, error) { - th, err := ProbabilityToThreshold(prob) - if err != nil || prec == 0 || prec > 14 { - return th, err + // Assume full precision at 0. + if prec == 0 { + return ProbabilityToThreshold(prob) + } + if !probabilityInRange(prob) { + return AlwaysSampleThreshold, ErrProbabilityRange + } + + // Adjust precision considering the significance of leading + // zeros. If we can multiply the rejection probability by 16 + // and still be less than 1, then there is a leading zero of + // obligatory precision. + for reject := 1 - prob; reject*16 < 1; { + reject *= 16 + prec++ } - scaled := th.unsigned - divisor := uint64(1) << (4 * (14 - prec)) - rescaled := uint64(math.Round(float64(scaled/divisor)) * float64(divisor)) - fmt.Printf("SCALED %x %x %x\n", scaled, divisor, rescaled) + // Check if leading zeros plus precision is above the maximum. + // This is called underflow because the requested precision + // leads to complete no significant figures. + if prec > NumHexDigits { + return AlwaysSampleThreshold, ErrPrecisionUnderflow + } + + scaled := uint64(math.Round(prob * MaxAdjustedCount)) + rscaled := MaxAdjustedCount - scaled + shift := 4 * (14 - prec) + half := uint64(1) << (shift - 1) + + rscaled = (rscaled + half) >> shift + rscaled = rscaled << shift + return Threshold{ - unsigned: rescaled, + unsigned: rscaled, }, nil } From cc029344cebe12f5e410e8387bfa77411c187318 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Fri, 8 Dec 2023 13:23:52 -0800 Subject: [PATCH 35/38] test coverage for equalizing and proportional --- .../tracesprocessor_test.go | 75 +++++++++++-------- 1 file changed, 44 insertions(+), 31 deletions(-) diff --git a/processor/probabilisticsamplerprocessor/tracesprocessor_test.go b/processor/probabilisticsamplerprocessor/tracesprocessor_test.go index 90c3d590bb54..17fe756d76f3 100644 --- a/processor/probabilisticsamplerprocessor/tracesprocessor_test.go +++ b/processor/probabilisticsamplerprocessor/tracesprocessor_test.go @@ -428,12 +428,10 @@ func Test_tracesamplerprocessor_TraceState(t *testing.T) { ts string key string value pcommon.Value - - // returns sampled, adjustedCount - sf func(SamplerMode) (bool, float64, string) + sf func(SamplerMode) (sampled bool, adjCount float64, tracestate string) }{ { - name: "simple_100", + name: "100 percent", cfg: &Config{ SamplingPercentage: 100, }, @@ -443,7 +441,7 @@ func Test_tracesamplerprocessor_TraceState(t *testing.T) { }, }, { - name: "yes_sample_tid", + name: "50 percent sampled", cfg: &Config{ SamplingPercentage: 50, }, @@ -451,31 +449,29 @@ func Test_tracesamplerprocessor_TraceState(t *testing.T) { sf: func(SamplerMode) (bool, float64, string) { return true, 2, "ot=th:8" }, }, { - name: "yes_sample_rv1", + name: "1 percent sampled", cfg: &Config{ SamplingPercentage: 1, }, - // 99/100 = .FD70A3D70A3D70A3D + // 99/100 = .fd70a3d70a3d70a3d ts: "ot=rv:FD70A3D70A3D71", // note upper case passes through, is not generated sf: func(SamplerMode) (bool, float64, string) { return true, 1 / 0.01, "ot=rv:FD70A3D70A3D71;th:fd70a3d70a3d71" }, }, { - name: "no_sample_rv2", + name: "1 percent sampled with rvalue and precision", cfg: &Config{ SamplingPercentage: 1, + SamplingPrecision: 3, }, - ts: "ot=rv:FD70A3D70A3D70", - }, - { - name: "no_sample", - cfg: &Config{ - SamplingPercentage: 49, + ts: "ot=rv:FD70A3D70A3D71", + sf: func(SamplerMode) (bool, float64, string) { + return true, 1 / 0.01, "ot=rv:FD70A3D70A3D71;th:fd7" }, }, { - name: "no_sample_rv1", + name: "1 percent not sampled with rvalue", cfg: &Config{ SamplingPercentage: 1, }, @@ -483,7 +479,13 @@ func Test_tracesamplerprocessor_TraceState(t *testing.T) { ts: "ot=rv:FD70A3D70A3D70", }, { - name: "yes_sample_rv2", + name: "49 percent not sampled", + cfg: &Config{ + SamplingPercentage: 49, + }, + }, + { + name: "1 percent sampled with rvalue", cfg: &Config{ SamplingPercentage: 1, }, @@ -494,7 +496,7 @@ func Test_tracesamplerprocessor_TraceState(t *testing.T) { }, }, { - name: "yes_sample_priority", + name: "sampled by priority", cfg: &Config{ SamplingPercentage: 1, }, @@ -504,7 +506,7 @@ func Test_tracesamplerprocessor_TraceState(t *testing.T) { sf: func(SamplerMode) (bool, float64, string) { return true, 0, "" }, }, { - name: "no_sample_priority", + name: "not sampled by priority", cfg: &Config{ SamplingPercentage: 99, }, @@ -513,7 +515,7 @@ func Test_tracesamplerprocessor_TraceState(t *testing.T) { value: pcommon.NewValueInt(0), }, { - name: "incoming_50", + name: "incoming 50 percent", cfg: &Config{ SamplingPercentage: 50, }, @@ -526,7 +528,7 @@ func Test_tracesamplerprocessor_TraceState(t *testing.T) { }, }, { - name: "norvalue_50", + name: "incoming 50 percent with no rvalue", cfg: &Config{ SamplingPercentage: 50, }, @@ -539,7 +541,7 @@ func Test_tracesamplerprocessor_TraceState(t *testing.T) { }, }, { - name: "incoming_rvalue_75", + name: "equalizing vs proportional", cfg: &Config{ SamplingPercentage: 50, }, @@ -552,7 +554,7 @@ func Test_tracesamplerprocessor_TraceState(t *testing.T) { }, }, { - name: "inconsistent_yes1", + name: "inconsistent threshold arriving", cfg: &Config{ SamplingPercentage: 100, }, @@ -562,7 +564,7 @@ func Test_tracesamplerprocessor_TraceState(t *testing.T) { }, }, { - name: "inconsistent_no1", + name: "inconsistent threshold not samp,led", cfg: &Config{ SamplingPercentage: 1, }, @@ -572,18 +574,27 @@ func Test_tracesamplerprocessor_TraceState(t *testing.T) { }, }, { - name: "precision_3", + name: "40 percent precision 3", cfg: &Config{ SamplingPercentage: 40, SamplingPrecision: 3, }, - ts: "ot=rv:a0000000000000", // ;th:8 TODO fix - // this tolerance (b/c prec) then add a test - // like this with inconsistent-yes i.e., - // testng that th:8 is discarded before th:999 - // is added + ts: "ot=rv:a0000000000000", sf: func(SamplerMode) (bool, float64, string) { - return true, 1 / 0.4, "ot=rv:a0000000000000;th:999" + return true, 1 / 0.4, "ot=rv:a0000000000000;th:99a" + }, + }, + { + name: "60 percent inconsistent resampled", + cfg: &Config{ + SamplingPercentage: 60, + SamplingPrecision: 4, + }, + // This th:8 is inconsistent with rv, is erased. But, the + // rv qualifies for the 60% sampling (th:666666 repeating) + ts: "ot=rv:70000000000000;th:8", + sf: func(SamplerMode) (bool, float64, string) { + return true, 1 / 0.6, "ot=rv:70000000000000;th:6666" }, }, } @@ -619,8 +630,10 @@ func Test_tracesamplerprocessor_TraceState(t *testing.T) { require.NoError(t, err) if expectCount == 0 { assert.Equal(t, 0.0, gotTs.OTelValue().AdjustedCount()) - } else { + } else if cfg.SamplingPrecision == 0 { assert.InEpsilon(t, expectCount, gotTs.OTelValue().AdjustedCount(), 1e-9) + } else { + assert.InEpsilon(t, expectCount, gotTs.OTelValue().AdjustedCount(), 1e-3) } require.Equal(t, expectTS, got.TraceState().AsRaw()) } else { From 1eecc4a1fe60e73eac45ca80b16945dd2be0b33c Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Fri, 8 Dec 2023 13:36:05 -0800 Subject: [PATCH 36/38] config test --- processor/probabilisticsamplerprocessor/config.go | 2 +- .../probabilisticsamplerprocessor/config_test.go | 5 ++++- .../testdata/config.yaml | 14 +++++--------- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/processor/probabilisticsamplerprocessor/config.go b/processor/probabilisticsamplerprocessor/config.go index 9bb1a923c957..69017996845a 100644 --- a/processor/probabilisticsamplerprocessor/config.go +++ b/processor/probabilisticsamplerprocessor/config.go @@ -65,7 +65,7 @@ type Config struct { // - "proportional": Using an OTel-specified consistent sampling // mechanism, this sampler reduces the effective sampling // probability of each span by `SamplingProbability`. - SamplerMode SamplerMode `mapstructure:"sampler_mode"` + SamplerMode SamplerMode `mapstructure:"mode"` /////// // Logs only fields below. diff --git a/processor/probabilisticsamplerprocessor/config_test.go b/processor/probabilisticsamplerprocessor/config_test.go index 90711d343552..6ba11c6cd0d5 100644 --- a/processor/probabilisticsamplerprocessor/config_test.go +++ b/processor/probabilisticsamplerprocessor/config_test.go @@ -26,7 +26,8 @@ func TestLoadConfig(t *testing.T) { id: component.NewIDWithName(metadata.Type, ""), expected: &Config{ SamplingPercentage: 15.3, - HashSeed: 22, + SamplingPrecision: 4, + SamplerMode: "proportional", AttributeSource: "traceID", }, }, @@ -34,7 +35,9 @@ func TestLoadConfig(t *testing.T) { id: component.NewIDWithName(metadata.Type, "logs"), expected: &Config{ SamplingPercentage: 15.3, + SamplingPrecision: 3, HashSeed: 22, + SamplerMode: "hash_seed", AttributeSource: "record", FromAttribute: "foo", SamplingPriority: "bar", diff --git a/processor/probabilisticsamplerprocessor/testdata/config.yaml b/processor/probabilisticsamplerprocessor/testdata/config.yaml index a834def5d98c..2c9510e42c8f 100644 --- a/processor/probabilisticsamplerprocessor/testdata/config.yaml +++ b/processor/probabilisticsamplerprocessor/testdata/config.yaml @@ -7,21 +7,17 @@ processors: # The "sampling.priority" semantics have priority over trace id hashing and # can be used to control if given spans are sampled, ie.: forwarded, or not. probabilistic_sampler: + # mode may be "proportional", "equalizing", or "hash_seed" + mode: proportional # the percentage rate at which traces are going to be sampled. Defaults to # zero, i.e.: no sample. Values greater or equal 100 are treated as # "sample all traces". sampling_percentage: 15.3 - # hash_seed allows one to configure the hashing seed. This is important in - # scenarios where multiple layers of collectors are used to achieve the - # desired sampling rate, eg.: 10% on first layer and 10% on the - # second, resulting in an overall sampling rate of 1% (10% x 10%). - # If all layers use the same seed, all data passing one layer will also pass - # the next one, independent of the configured sampling rate. Having different - # seeds at different layers ensures that sampling rate in each layer work as - # intended. - hash_seed: 22 + sampling_precision: 4 probabilistic_sampler/logs: + # mode may be "proportional", "equalizing", or "hash_seed" + mode: hash_seed # the percentage rate at which logs are going to be sampled. Defaults to # zero, i.e.: no sample. Values greater or equal 100 are treated as # "sample all logs". From 2159107c3752ab6f52153e24c0a2c129b936e2c3 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Fri, 8 Dec 2023 13:45:13 -0800 Subject: [PATCH 37/38] comments and notes --- .../tracesprocessor.go | 31 ++++++++++++------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/processor/probabilisticsamplerprocessor/tracesprocessor.go b/processor/probabilisticsamplerprocessor/tracesprocessor.go index cb8cdda793a2..7124f2b2c990 100644 --- a/processor/probabilisticsamplerprocessor/tracesprocessor.go +++ b/processor/probabilisticsamplerprocessor/tracesprocessor.go @@ -104,7 +104,7 @@ func randomnessFromSpan(s ptrace.Span) (sampling.Randomness, *sampling.W3CTraceS // See https://github.com/open-telemetry/opentelemetry-proto/pull/503 // which merged but unreleased at the time of writing. // - // When we have an additional flag indicating this + // Note: When we have an additional flag indicating this // randomness is present we should inspect the flag // and return that no randomness is available, here. randomness = sampling.TraceIDToRandomness(s.TraceID()) @@ -193,16 +193,20 @@ func (ts *traceHasher) updateTracestate(_ pcommon.TraceID, _ bool, _ *sampling.W func (ts *traceEqualizer) decide(s ptrace.Span) (bool, *sampling.W3CTraceState, error) { rnd, wts, err := randomnessFromSpan(s) if err != nil { - // TODO: Configure fail-open vs fail-closed? - return true, nil, err + return false, nil, err } otts := wts.OTelValue() - // Consistency check: if the TraceID is out of range - // (unless the TValue is zero), the TValue is a lie. - // If inconsistent, clear it. - if otts.HasTValue() && !otts.TValueThreshold().ShouldSample(rnd) { - err = ErrInconsistentArrivingTValue - otts.ClearTValue() + // Consistency check: if the TraceID is out of range, the + // TValue is a lie. If inconsistent, clear it. + if otts.HasTValue() { + if !otts.TValueThreshold().ShouldSample(rnd) { + err = ErrInconsistentArrivingTValue + otts.ClearTValue() + } + } else if !otts.HasTValue() { + // Note: We could in this case attach another + // tracestate to signify that the incoming sampling + // threshold was at one point unknown. } return ts.traceIDThreshold.ShouldSample(rnd), wts, err @@ -223,10 +227,11 @@ func (ts *traceEqualizer) updateTracestate(tid pcommon.TraceID, should bool, wts func (ts *traceProportionalizer) decide(s ptrace.Span) (bool, *sampling.W3CTraceState, error) { rnd, wts, err := randomnessFromSpan(s) if err != nil { - // TODO: Configure fail-open vs fail-closed? - return true, nil, err + return false, nil, err } otts := wts.OTelValue() + // Consistency check: if the TraceID is out of range, the + // TValue is a lie. If inconsistent, clear it. if otts.HasTValue() && !otts.TValueThreshold().ShouldSample(rnd) { err = ErrInconsistentArrivingTValue otts.ClearTValue() @@ -235,6 +240,10 @@ func (ts *traceProportionalizer) decide(s ptrace.Span) (bool, *sampling.W3CTrace incoming := 1.0 if otts.HasTValue() { incoming = otts.TValueThreshold().Probability() + } else { + // Note: We could in this case attach another + // tracestate to signify that the incoming sampling + // threshold was at one point unknown. } threshold, _ := sampling.ProbabilityToThresholdWithPrecision(incoming*ts.ratio, ts.prec) From e0898a65054be3df6955897a84fe494e72e969cf Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Fri, 8 Dec 2023 13:51:13 -0800 Subject: [PATCH 38/38] update README --- .../probabilisticsamplerprocessor/README.md | 22 +++++++++++++------ 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/processor/probabilisticsamplerprocessor/README.md b/processor/probabilisticsamplerprocessor/README.md index c46b4b90f810..d9f37611ef98 100644 --- a/processor/probabilisticsamplerprocessor/README.md +++ b/processor/probabilisticsamplerprocessor/README.md @@ -54,7 +54,8 @@ The following configuration options can be modified: - `attribute_source` (default = traceID, optional): defines where to look for the attribute in from_attribute. The allowed values are `traceID` or `record`. - `from_attribute` (default = null, optional): The optional name of a log record attribute used for sampling purposes, such as a unique log record ID. The value of the attribute is only used if the trace ID is absent or if `attribute_source` is set to `record`. - `sampling_priority` (default = null, optional): The optional name of a log record attribute used to set a different sampling priority from the `sampling_percentage` setting. 0 means to never sample the log record, and >= 100 means to always sample the log record. -- `sampler_mode` (default = "", optional): The optional sampling mode. One of "hash_seed", "equalizing", and "propotional". By default, when not explicitly set, if "hash_seed" is non-zero, the "hash_seed" mode will be configured, otherwise the "proportional" mode is selected. +- `mode` (default = "", optional): The optional sampling mode. One of "hash_seed", "equalizing", and "propotional". By default, when not explicitly set, if "hash_seed" is non-zero, the "hash_seed" mode will be configured, otherwise the "proportional" mode is selected. +- `sampling_precision` (default = 3, optional): The number of digits of precision used to express the desired exactness. ## Hashing @@ -69,7 +70,7 @@ Sample 15% of the logs: ```yaml processors: probabilistic_sampler: - sampler_mode: hash_seed + mode: hash_seed sampling_percentage: 15 ``` @@ -78,7 +79,7 @@ Sample logs according to their logID attribute: ```yaml processors: probabilistic_sampler: - sampler_mode: hash_seed + mode: hash_seed sampling_percentage: 15 attribute_source: record # possible values: one of record or traceID from_attribute: logID # value is required if the source is not traceID @@ -89,7 +90,7 @@ Sample logs according to the attribute `priority`: ```yaml processors: probabilistic_sampler: - sampler_mode: hash_seed + mode: hash_seed sampling_percentage: 15 sampling_priority: priority ``` @@ -132,11 +133,18 @@ For example, to configure an equalizing sampler, set the mode explicitly: ``` processors: probabilistic_sampler: - sampler_mode: equalizing + mode: equalizing sampling_percentage: 10 ``` +The optional `sampling_precision` field determines how many +hexadecimal digits are used to express the sampling rejection +threshold. By default, 3 hex digits are used. For example, 60% +sampling is approximated as "666" with precision 3, because the +rejection threshold of 40% is approximated by `0x666` out of `0x1000`, +indicating a sampling probability of precisely 60.009765625%. + ## Detailed examples -Refer to [config.yaml](./testdata/config.yaml) for detailed -examples on using the processor. +Refer to [config.yaml](./testdata/config.yaml) for detailed examples +on using the processor.