Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Probabilistic sampler processor based on draft t-value/r-value encoding #24811

Closed
wants to merge 42 commits into from
Closed
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
e822a9b
Add t-value sampler draft
jmacd May 12, 2023
1bc6017
copy/import tracestate parser package
jmacd May 15, 2023
d1fd891
test ot tracestate
jmacd May 16, 2023
85e4472
tidy
jmacd May 16, 2023
bb75f8a
renames
jmacd May 16, 2023
6a57b77
testing two parsers w/ generic code
jmacd May 17, 2023
7fa8130
integrated
jmacd May 17, 2023
36230e7
Comments
jmacd May 17, 2023
7bae35c
revert two files
jmacd May 17, 2023
9010a67
Update with r, s, and t-value. Now using regexps and strings.IndexBy…
jmacd Jun 1, 2023
0e27e40
fix sampler build
jmacd Jun 1, 2023
efcdc3d
add support for s-value for non-consistent mode
jmacd Jun 1, 2023
939c758
WIP
jmacd Jul 10, 2023
b9a1e56
Merge branch 'main' of github.com:open-telemetry/opentelemetry-collec…
jmacd Aug 2, 2023
a31266c
use new proposed syntax see https://github.com/open-telemetry/opentel…
jmacd Aug 2, 2023
690cd64
update tracestate libs for new encoding
jmacd Aug 2, 2023
c8baf29
wip working on probabilistic sampler with two new modes: downsampler …
jmacd Aug 2, 2023
7f47e4a
unsigned implement split
jmacd Aug 3, 2023
422e0b2
two implementations
jmacd Aug 3, 2023
787b9fd
wip
jmacd Sep 5, 2023
ed36f03
Merge branch 'main' of github.com:open-telemetry/opentelemetry-collec…
jmacd Sep 6, 2023
d795210
Updates for OTEP 235
jmacd Sep 6, 2023
09000f7
wip TODO
jmacd Sep 6, 2023
a4d467b
versions.yaml
jmacd Sep 6, 2023
e373b9b
Add proportional sampler mode; comment on TODOs; create SamplerMode t…
jmacd Sep 7, 2023
fe6a085
back from internal
jmacd Oct 4, 2023
396efb1
wip
jmacd Oct 4, 2023
36de5dd
fix existing tests
jmacd Oct 6, 2023
f1aa0ad
:wip:
jmacd Oct 12, 2023
700734e
Update for rejection threshold
jmacd Nov 15, 2023
ae50bdd
Merge branch 'main' of github.com:open-telemetry/opentelemetry-collec…
jmacd Nov 15, 2023
a94b8e7
fix preexisting tests
jmacd Nov 16, 2023
4edcbcb
basic yes/no t-value sampling test
jmacd Nov 16, 2023
53bf119
Merge branch 'main' of github.com:open-telemetry/opentelemetry-collec…
jmacd Nov 29, 2023
3cdb957
add version for sampling pkg
jmacd Nov 29, 2023
e506847
more testing
jmacd Dec 7, 2023
2cddfeb
add probability to threshold with precision option
jmacd Dec 8, 2023
f69d6ee
ProbabilityToThresholdWithPrecision
jmacd Dec 8, 2023
cc02934
test coverage for equalizing and proportional
jmacd Dec 8, 2023
1eecc4a
config test
jmacd Dec 8, 2023
2159107
comments and notes
jmacd Dec 8, 2023
e0898a6
update README
jmacd Dec 8, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 100 additions & 0 deletions pkg/sampling/common.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
package sampling

import (
"errors"
"strings"

"go.uber.org/multierr"
)

type KV struct {
Key string
Value string
}

var (
ErrTraceStateSize = errors.New("invalid tracestate size")
ErrTraceStateCount = errors.New("invalid tracestate item count")
)

// keyValueScanner defines distinct scanner behaviors for lists of
// key-values.
type keyValueScanner struct {
// maxItems is 32 or -1
maxItems int
// trim is set if OWS (optional whitespace) should be removed
trim bool
// separator is , or ;
separator byte
// equality is = or :
equality byte
}

type commonTraceState struct {
kvs []KV
}

func (cts commonTraceState) HasExtraValues() bool {
return len(cts.kvs) != 0
}

func (cts commonTraceState) ExtraValues() []KV {
return cts.kvs
}

// trimOws removes optional whitespace on both ends of a string.
func trimOws(input string) string {
// Hard-codes the value of owsCharset
for len(input) > 0 && input[0] == ' ' || input[0] == '\t' {
input = input[1:]
}
for len(input) > 0 && input[len(input)-1] == ' ' || input[len(input)-1] == '\t' {
input = input[:len(input)-1]
}
return input
}

func (s keyValueScanner) scanKeyValues(input string, f func(key, value string) error) error {
var rval error
items := 0
for input != "" {
items++
if s.maxItems > 0 && items >= s.maxItems {
// W3C specifies max 32 entries, tested here
// instead of via the regexp.
return ErrTraceStateCount
}

sep := strings.IndexByte(input, s.separator)

var member string
if sep < 0 {
member = input
input = ""
} else {
member = input[:sep]
input = input[sep+1:]
}

if s.trim {
// Trim only required for W3C; OTel does not
// specify whitespace for its value encoding.
member = trimOws(member)
}

if member == "" {
// W3C allows empty list members.
continue
}

eq := strings.IndexByte(member, s.equality)
if eq < 0 {
// A regexp should have rejected this input.
continue
}
if err := f(member[:eq], member[eq+1:]); err != nil {
rval = multierr.Append(rval, err)
}
}
return rval
}
207 changes: 207 additions & 0 deletions pkg/sampling/encoding_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
// Copyright The OpenTelemetry Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package sampling

import (
"encoding/binary"
"fmt"
"math/rand"
"testing"

"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling/internal/bytes"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling/internal/unsigned"
"github.com/stretchr/testify/require"
"go.opentelemetry.io/collector/pdata/pcommon"
)

func must[T any](t T, err error) T {
if err != nil {
panic(err)
}
return t
}

func mustNot[T any](t T, err error) error {
if err == nil {
return fmt.Errorf("expected an error, got nil")
}
return err
}

func probabilityToTValue(prob float64) (string, error) {
th, err := ProbabilityToThreshold(prob)
return string(th.TValue()), err
}

func tValueToProbability(tv string) (float64, error) {
th, err := TValueToThreshold(tv)
return th.Probability(), err
}

func TestValidProbabilityToTValue(t *testing.T) {
require.Equal(t, "", must(probabilityToTValue(1.0)))
require.Equal(t, "8", must(probabilityToTValue(0.5)))
require.Equal(t, "00000000000001", must(probabilityToTValue(0x1p-56)))
require.Equal(t, "55555555555554", must(probabilityToTValue(1/3.)))
require.Equal(t, "54", must(probabilityToTValue(0x54p-8))) // 0x54p-8 is approximately 1/3
require.Equal(t, "01", must(probabilityToTValue(0x1p-8)))
require.Equal(t, "0", must(probabilityToTValue(0)))
}

func TestInvalidprobabilityToTValue(t *testing.T) {
// Too small
require.Error(t, mustNot(probabilityToTValue(0x1p-57)))
require.Error(t, mustNot(probabilityToTValue(0x1p-57)))

// Too big
require.Error(t, mustNot(probabilityToTValue(1.1)))
require.Error(t, mustNot(probabilityToTValue(1.1)))
}

func TestTValueToProbability(t *testing.T) {
require.Equal(t, 0.5, must(tValueToProbability("8")))
require.Equal(t, 0x444p-12, must(tValueToProbability("444")))
require.Equal(t, 0.0, must(tValueToProbability("0")))

// 0x55555554p-32 is very close to 1/3
require.InEpsilon(t, 1/3., must(tValueToProbability("55555554")), 1e-9)
}

func TestProbabilityToThreshold(t *testing.T) {
require.Equal(t,
must(TValueToThreshold("8")),
must(ProbabilityToThreshold(0.5)))
require.Equal(t,
must(TValueToThreshold("00000000000001")),
must(ProbabilityToThreshold(0x1p-56)))
require.Equal(t,
must(TValueToThreshold("000000000001")),
must(ProbabilityToThreshold(0x100p-56)))
require.Equal(t,
must(TValueToThreshold("00000000000002")),
must(ProbabilityToThreshold(0x1p-55)))
require.Equal(t,
AlwaysSampleThreshold,
must(ProbabilityToThreshold(1.0)))
require.Equal(t,
NeverSampleThreshold,
must(ProbabilityToThreshold(0)))
}

func TestShouldSample(t *testing.T) {
// Test four boundary conditions for 50% sampling,
thresh := must(ProbabilityToThreshold(0.5))
// Smallest TraceID that should sample.
require.True(t, thresh.ShouldSample(RandomnessFromTraceID(pcommon.TraceID{
// 9 meaningless bytes
0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee,
0, // randomness starts here
0, 0, 0, 0, 0, 0,
})))
// Largest TraceID that should sample.
require.True(t, thresh.ShouldSample(RandomnessFromTraceID(pcommon.TraceID{
// 9 meaningless bytes
0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee,
0x7f, // randomness starts here
0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
})))
// Smallest TraceID that should NOT sample.
require.False(t, thresh.ShouldSample(RandomnessFromTraceID(pcommon.TraceID{
// 9 meaningless bytes
0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee,
0x80, // randomness starts here
0, 0, 0, 0, 0, 0,
})))
// Largest TraceID that should NOT sample.
require.False(t, thresh.ShouldSample(RandomnessFromTraceID(pcommon.TraceID{
// 9 meaningless bytes
0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee,
0xff, // randomness starts here
0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
})))
}

// The two benchmarks below were used to choose the implementation for
// the Threshold type in this package. The results indicate that it
// is faster to compare a 56-bit number than to compare as 7 element []byte.

type benchTIDs [1024]pcommon.TraceID

func (tids *benchTIDs) init() {
for i := range tids {
binary.BigEndian.PutUint64(tids[i][:8], rand.Uint64())
binary.BigEndian.PutUint64(tids[i][8:], rand.Uint64())
}
}

// BenchmarkThresholdCompareAsUint64-10 1000000000 0.4515 ns/op 0 B/op 0 allocs/op
func BenchmarkThresholdCompareAsUint64(b *testing.B) {
var tids benchTIDs
var comps [1024]unsigned.Threshold
tids.init()
for i := range comps {
var err error
comps[i], err = unsigned.ProbabilityToThreshold(rand.Float64())
if err != nil {
b.Fatal(err)
}
}

b.ReportAllocs()
b.ResetTimer()
yes := 0
no := 0
for i := 0; i < b.N; i++ {
idx := i % len(tids)
tid := tids[idx]
comp := comps[idx]

if comp.ShouldSample(unsigned.RandomnessFromTraceID(tid)) {
yes++
} else {
no++
}
}
}

// BenchmarkThresholdCompareAsBytes-10 528679580 2.288 ns/op 0 B/op 0 allocs/op
func BenchmarkThresholdCompareAsBytes(b *testing.B) {
var tids benchTIDs
var comps [1024]bytes.Threshold
tids.init()
for i := range comps {
var err error
comps[i], err = bytes.ProbabilityToThreshold(rand.Float64())
if err != nil {
b.Fatal(err)
}
}

b.ReportAllocs()
b.ResetTimer()
yes := 0
no := 0
for i := 0; i < b.N; i++ {
idx := i % len(tids)
tid := tids[idx]
comp := comps[idx]

if comp.ShouldSample(bytes.RandomnessFromTraceID(tid)) {
yes++
} else {
no++
}
}
}
23 changes: 23 additions & 0 deletions pkg/sampling/go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
module github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling

go 1.20

require (
github.com/stretchr/testify v1.8.2
go.opentelemetry.io/collector/pdata v1.0.0-rcv0011
go.uber.org/multierr v1.11.0
)

require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/protobuf v1.5.2 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
golang.org/x/net v0.9.0 // indirect
golang.org/x/sys v0.7.0 // indirect
golang.org/x/text v0.9.0 // indirect
google.golang.org/genproto v0.0.0-20230110181048-76db0878b65f // indirect
google.golang.org/grpc v1.54.0 // indirect
google.golang.org/protobuf v1.30.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
Loading
Loading