open-telemetry · jmacd · May 12, 2023 · May 15, 2023 · May 16, 2023 · May 16, 2023
@@ -0,0 +1,100 @@
+package sampling
+
+import (
+	"errors"
+	"strings"
+
+	"go.uber.org/multierr"
+)
+
+type KV struct {
+	Key   string
+	Value string
+}
+
+var (
+	ErrTraceStateSize  = errors.New("invalid tracestate size")
+	ErrTraceStateCount = errors.New("invalid tracestate item count")
+)
+
+// keyValueScanner defines distinct scanner behaviors for lists of
+// key-values.
+type keyValueScanner struct {
+	// maxItems is 32 or -1
+	maxItems int
+	// trim is set if OWS (optional whitespace) should be removed
+	trim bool
+	// separator is , or ;
+	separator byte
+	// equality is = or :
+	equality byte
+}
+
+type commonTraceState struct {
+	kvs []KV
+}
+
+func (cts commonTraceState) HasExtraValues() bool {
+	return len(cts.kvs) != 0
+}
+
+func (cts commonTraceState) ExtraValues() []KV {
+	return cts.kvs
+}
+
+// trimOws removes optional whitespace on both ends of a string.
+func trimOws(input string) string {
+	// Hard-codes the value of owsCharset
+	for len(input) > 0 && input[0] == ' ' || input[0] == '\t' {
+		input = input[1:]
+	}
+	for len(input) > 0 && input[len(input)-1] == ' ' || input[len(input)-1] == '\t' {
+		input = input[:len(input)-1]
+	}
+	return input
+}
+
+func (s keyValueScanner) scanKeyValues(input string, f func(key, value string) error) error {
+	var rval error
+	items := 0
+	for input != "" {
+		items++
+		if s.maxItems > 0 && items >= s.maxItems {
+			// W3C specifies max 32 entries, tested here
+			// instead of via the regexp.
+			return ErrTraceStateCount
+		}
+
+		sep := strings.IndexByte(input, s.separator)
+
+		var member string
+		if sep < 0 {
+			member = input
+			input = ""
+		} else {
+			member = input[:sep]
+			input = input[sep+1:]
+		}
+
+		if s.trim {
+			// Trim only required for W3C; OTel does not
+			// specify whitespace for its value encoding.
+			member = trimOws(member)
+		}
+
+		if member == "" {
+			// W3C allows empty list members.
+			continue
+		}
+
+		eq := strings.IndexByte(member, s.equality)
+		if eq < 0 {
+			// A regexp should have rejected this input.
+			continue
+		}
+		if err := f(member[:eq], member[eq+1:]); err != nil {
+			rval = multierr.Append(rval, err)
+		}
+	}
+	return rval
+}
@@ -0,0 +1,207 @@
+// Copyright The OpenTelemetry Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package sampling
+
+import (
+	"encoding/binary"
+	"fmt"
+	"math/rand"
+	"testing"
+
+	"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling/internal/bytes"
+	"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling/internal/unsigned"
+	"github.com/stretchr/testify/require"
+	"go.opentelemetry.io/collector/pdata/pcommon"
+)
+
+func must[T any](t T, err error) T {
+	if err != nil {
+		panic(err)
+	}
+	return t
+}
+
+func mustNot[T any](t T, err error) error {
+	if err == nil {
+		return fmt.Errorf("expected an error, got nil")
+	}
+	return err
+}
+
+func probabilityToTValue(prob float64) (string, error) {
+	th, err := ProbabilityToThreshold(prob)
+	return string(th.TValue()), err
+}
+
+func tValueToProbability(tv string) (float64, error) {
+	th, err := TValueToThreshold(tv)
+	return th.Probability(), err
+}
+
+func TestValidProbabilityToTValue(t *testing.T) {
+	require.Equal(t, "", must(probabilityToTValue(1.0)))
+	require.Equal(t, "8", must(probabilityToTValue(0.5)))
+	require.Equal(t, "00000000000001", must(probabilityToTValue(0x1p-56)))
+	require.Equal(t, "55555555555554", must(probabilityToTValue(1/3.)))
+	require.Equal(t, "54", must(probabilityToTValue(0x54p-8))) // 0x54p-8 is approximately 1/3
+	require.Equal(t, "01", must(probabilityToTValue(0x1p-8)))
+	require.Equal(t, "0", must(probabilityToTValue(0)))
+}
+
+func TestInvalidprobabilityToTValue(t *testing.T) {
+	// Too small
+	require.Error(t, mustNot(probabilityToTValue(0x1p-57)))
+	require.Error(t, mustNot(probabilityToTValue(0x1p-57)))
+
+	// Too big
+	require.Error(t, mustNot(probabilityToTValue(1.1)))
+	require.Error(t, mustNot(probabilityToTValue(1.1)))
+}
+
+func TestTValueToProbability(t *testing.T) {
+	require.Equal(t, 0.5, must(tValueToProbability("8")))
+	require.Equal(t, 0x444p-12, must(tValueToProbability("444")))
+	require.Equal(t, 0.0, must(tValueToProbability("0")))
+
+	// 0x55555554p-32 is very close to 1/3
+	require.InEpsilon(t, 1/3., must(tValueToProbability("55555554")), 1e-9)
+}
+
+func TestProbabilityToThreshold(t *testing.T) {
+	require.Equal(t,
+		must(TValueToThreshold("8")),
+		must(ProbabilityToThreshold(0.5)))
+	require.Equal(t,
+		must(TValueToThreshold("00000000000001")),
+		must(ProbabilityToThreshold(0x1p-56)))
+	require.Equal(t,
+		must(TValueToThreshold("000000000001")),
+		must(ProbabilityToThreshold(0x100p-56)))
+	require.Equal(t,
+		must(TValueToThreshold("00000000000002")),
+		must(ProbabilityToThreshold(0x1p-55)))
+	require.Equal(t,
+		AlwaysSampleThreshold,
+		must(ProbabilityToThreshold(1.0)))
+	require.Equal(t,
+		NeverSampleThreshold,
+		must(ProbabilityToThreshold(0)))
+}
+
+func TestShouldSample(t *testing.T) {
+	// Test four boundary conditions for 50% sampling,
+	thresh := must(ProbabilityToThreshold(0.5))
+	// Smallest TraceID that should sample.
+	require.True(t, thresh.ShouldSample(RandomnessFromTraceID(pcommon.TraceID{
+		// 9 meaningless bytes
+		0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee,
+		0, // randomness starts here
+		0, 0, 0, 0, 0, 0,
+	})))
+	// Largest TraceID that should sample.
+	require.True(t, thresh.ShouldSample(RandomnessFromTraceID(pcommon.TraceID{
+		// 9 meaningless bytes
+		0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee,
+		0x7f, // randomness starts here
+		0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+	})))
+	// Smallest TraceID that should NOT sample.
+	require.False(t, thresh.ShouldSample(RandomnessFromTraceID(pcommon.TraceID{
+		// 9 meaningless bytes
+		0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee,
+		0x80, // randomness starts here
+		0, 0, 0, 0, 0, 0,
+	})))
+	// Largest TraceID that should NOT sample.
+	require.False(t, thresh.ShouldSample(RandomnessFromTraceID(pcommon.TraceID{
+		// 9 meaningless bytes
+		0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee,
+		0xff, // randomness starts here
+		0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+	})))
+}
+
+// The two benchmarks below were used to choose the implementation for
+// the Threshold type in this package.  The results indicate that it
+// is faster to compare a 56-bit number than to compare as 7 element []byte.
+
+type benchTIDs [1024]pcommon.TraceID
+
+func (tids *benchTIDs) init() {
+	for i := range tids {
+		binary.BigEndian.PutUint64(tids[i][:8], rand.Uint64())
+		binary.BigEndian.PutUint64(tids[i][8:], rand.Uint64())
+	}
+}
+
+// BenchmarkThresholdCompareAsUint64-10    	1000000000	         0.4515 ns/op	       0 B/op	       0 allocs/op
+func BenchmarkThresholdCompareAsUint64(b *testing.B) {
+	var tids benchTIDs
+	var comps [1024]unsigned.Threshold
+	tids.init()
+	for i := range comps {
+		var err error
+		comps[i], err = unsigned.ProbabilityToThreshold(rand.Float64())
+		if err != nil {
+			b.Fatal(err)
+		}
+	}
+
+	b.ReportAllocs()
+	b.ResetTimer()
+	yes := 0
+	no := 0
+	for i := 0; i < b.N; i++ {
+		idx := i % len(tids)
+		tid := tids[idx]
+		comp := comps[idx]
+
+		if comp.ShouldSample(unsigned.RandomnessFromTraceID(tid)) {
+			yes++
+		} else {
+			no++
+		}
+	}
+}
+
+// BenchmarkThresholdCompareAsBytes-10     	528679580	         2.288 ns/op	       0 B/op	       0 allocs/op
+func BenchmarkThresholdCompareAsBytes(b *testing.B) {
+	var tids benchTIDs
+	var comps [1024]bytes.Threshold
+	tids.init()
+	for i := range comps {
+		var err error
+		comps[i], err = bytes.ProbabilityToThreshold(rand.Float64())
+		if err != nil {
+			b.Fatal(err)
+		}
+	}
+
+	b.ReportAllocs()
+	b.ResetTimer()
+	yes := 0
+	no := 0
+	for i := 0; i < b.N; i++ {
+		idx := i % len(tids)
+		tid := tids[idx]
+		comp := comps[idx]
+
+		if comp.ShouldSample(bytes.RandomnessFromTraceID(tid)) {
+			yes++
+		} else {
+			no++
+		}
+	}
+}
@@ -0,0 +1,23 @@
+module github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling
+
+go 1.20
+
+require (
+	github.com/stretchr/testify v1.8.2
+	go.opentelemetry.io/collector/pdata v1.0.0-rcv0011
+	go.uber.org/multierr v1.11.0
+)
+
+require (
+	github.com/davecgh/go-spew v1.1.1 // indirect
+	github.com/gogo/protobuf v1.3.2 // indirect
+	github.com/golang/protobuf v1.5.2 // indirect
+	github.com/pmezard/go-difflib v1.0.0 // indirect
+	golang.org/x/net v0.9.0 // indirect
+	golang.org/x/sys v0.7.0 // indirect
+	golang.org/x/text v0.9.0 // indirect
+	google.golang.org/genproto v0.0.0-20230110181048-76db0878b65f // indirect
+	google.golang.org/grpc v1.54.0 // indirect
+	google.golang.org/protobuf v1.30.0 // indirect
+	gopkg.in/yaml.v3 v3.0.1 // indirect
+)