diff --git a/sdk/include/opentelemetry/sdk/trace/samplers/probability.h b/sdk/include/opentelemetry/sdk/trace/samplers/probability.h new file mode 100644 index 0000000000..18bca7042a --- /dev/null +++ b/sdk/include/opentelemetry/sdk/trace/samplers/probability.h @@ -0,0 +1,50 @@ +#pragma once + +#include "opentelemetry/sdk/trace/sampler.h" + +OPENTELEMETRY_BEGIN_NAMESPACE +namespace sdk +{ +namespace trace +{ +namespace trace_api = opentelemetry::trace; +/** + * The probability sampler, based on it's configuration, should either defer the + * decision to sample to it's parent, or compute and return a decision based on + * the provided trace_id and probability. + */ +class ProbabilitySampler : public Sampler +{ +public: + /** + * @param probability a required value, 1.0 >= probability >= 0.0, that given any + * random trace_id, ShouldSample will return RECORD_AND_SAMPLE + * @throws invalid_argument if probability is out of bounds [0.0, 1.0] + */ + explicit ProbabilitySampler(double probability); + + /** + * @return Returns either RECORD_AND_SAMPLE or NOT_RECORD based on current + * sampler configuration and provided parent_context / tracer_id. tracer_id + * is used as a pseudorandom value in conjunction with the predefined + * threshold to determine whether this trace should be sampled + */ + SamplingResult ShouldSample( + const trace_api::SpanContext *parent_context, + trace_api::TraceId trace_id, + nostd::string_view /*name*/, + trace_api::SpanKind /*span_kind*/, + const trace_api::KeyValueIterable & /*attributes*/) noexcept override; + + /** + * @return Description MUST be ProbabilitySampler{0.000100} + */ + std::string GetDescription() const noexcept override; + +private: + std::string sampler_description_; + const uint64_t threshold_; +}; +} // namespace trace +} // namespace sdk +OPENTELEMETRY_END_NAMESPACE diff --git a/sdk/src/trace/CMakeLists.txt b/sdk/src/trace/CMakeLists.txt index 98f949997e..1e6d302461 100644 --- a/sdk/src/trace/CMakeLists.txt +++ b/sdk/src/trace/CMakeLists.txt @@ -1 +1,2 @@ -add_library(opentelemetry_trace tracer_provider.cc tracer.cc span.cc samplers/parent_or_else.cc) +add_library(opentelemetry_trace tracer_provider.cc tracer.cc span.cc + samplers/parent_or_else.cc samplers/probability.cc) diff --git a/sdk/src/trace/samplers/probability.cc b/sdk/src/trace/samplers/probability.cc new file mode 100644 index 0000000000..2f8c9453cd --- /dev/null +++ b/sdk/src/trace/samplers/probability.cc @@ -0,0 +1,114 @@ +// Copyright 2020, Open Telemetry Authors +// Copyright 2017, OpenCensus Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "opentelemetry/sdk/trace/samplers/probability.h" + +#include +#include +#include + +namespace trace_api = opentelemetry::trace; + +namespace +{ + /** + * Converts a probability in [0, 1] to a threshold in [0, UINT64_MAX] + * + * @param probability a required value top be converted to uint64_t. is + * bounded by 1 >= probability >= 0. + * @return Returns threshold value computed after converting probability to + * uint64_t datatype + */ + uint64_t CalculateThreshold(double probability) noexcept + { + if (probability <= 0.0) return 0; + if (probability >= 1.0) return UINT64_MAX; + + // We can't directly return probability * UINT64_MAX. + // + // UINT64_MAX is (2^64)-1, but as a double rounds up to 2^64. + // For probabilities >= 1-(2^-54), the product wraps to zero! + // Instead, calculate the high and low 32 bits separately. + const double product = UINT32_MAX * probability; + double hi_bits, lo_bits = ldexp(modf(product, &hi_bits), 32) + product; + return (static_cast(hi_bits) << 32) + + static_cast(lo_bits); + } + + /** + * @param trace_id a required value to be converted to uint64_t. trace_id must + * at least 8 bytes long + * @return Returns threshold value computed after converting trace_id to + * uint64_t datatype + */ + uint64_t CalculateThresholdFromBuffer(const trace_api::TraceId& trace_id) noexcept + { + // We only use the first 8 bytes of TraceId. + static_assert(trace_api::TraceId::kSize >= 8, "TraceID must be at least 8 bytes long."); + + uint64_t res = 0; + std::memcpy(&res, &trace_id, 8); + + double probability = (double) res / UINT64_MAX; + + return CalculateThreshold(probability); + } +} // namespace + +OPENTELEMETRY_BEGIN_NAMESPACE +namespace sdk +{ +namespace trace +{ +ProbabilitySampler::ProbabilitySampler(double probability) +: threshold_(CalculateThreshold(probability)) +{ + if (probability > 1.0) probability = 1.0; + if (probability < 0.0) probability = 0.0; + sampler_description_ = "ProbabilitySampler{" + std::to_string(probability) + "}"; + } + +SamplingResult ProbabilitySampler::ShouldSample( + const trace_api::SpanContext *parent_context, + trace_api::TraceId trace_id, + nostd::string_view /*name*/, + trace_api::SpanKind /*span_kind*/, + const trace_api::KeyValueIterable & /*attributes*/) noexcept +{ + if (parent_context && !parent_context->HasRemoteParent()) { + if (parent_context->IsSampled()) { + return { Decision::RECORD_AND_SAMPLE, nullptr }; + } else { + return { Decision::NOT_RECORD, nullptr }; + } + } + + if (threshold_ == 0) return { Decision::NOT_RECORD, nullptr }; + + if (CalculateThresholdFromBuffer(trace_id) <= threshold_) + { + return { Decision::RECORD_AND_SAMPLE, nullptr }; + } + + return { Decision::NOT_RECORD, nullptr }; +} + +std::string ProbabilitySampler::GetDescription() const noexcept +{ + return sampler_description_; +} +} // namespace trace +} // namespace sdk +OPENTELEMETRY_END_NAMESPACE diff --git a/sdk/test/trace/BUILD b/sdk/test/trace/BUILD index a3c3c77f91..280acc4fe3 100644 --- a/sdk/test/trace/BUILD +++ b/sdk/test/trace/BUILD @@ -74,3 +74,15 @@ cc_test( "@com_google_googletest//:gtest_main", ], ) + +cc_test( + name = "probability_sampler_test", + srcs = [ + "probability_sampler_test.cc", + ], + deps = [ + "//sdk/src/trace", + "//sdk/src/common:random", + "@com_google_googletest//:gtest_main", + ], +) diff --git a/sdk/test/trace/CMakeLists.txt b/sdk/test/trace/CMakeLists.txt index b4524da0f2..8326d62a7b 100644 --- a/sdk/test/trace/CMakeLists.txt +++ b/sdk/test/trace/CMakeLists.txt @@ -1,8 +1,8 @@ foreach(testname tracer_provider_test span_data_test simple_processor_test tracer_test always_off_sampler_test always_on_sampler_test - parent_or_else_sampler_test) + parent_or_else_sampler_test probability_sampler_test) add_executable(${testname} "${testname}.cc") target_link_libraries(${testname} ${GTEST_BOTH_LIBRARIES} - ${CMAKE_THREAD_LIBS_INIT} opentelemetry_trace) + ${CMAKE_THREAD_LIBS_INIT} opentelemetry_common opentelemetry_trace) gtest_add_tests(TARGET ${testname} TEST_PREFIX trace. TEST_LIST ${testname}) endforeach() diff --git a/sdk/test/trace/probability_sampler_test.cc b/sdk/test/trace/probability_sampler_test.cc new file mode 100644 index 0000000000..37e2a6c943 --- /dev/null +++ b/sdk/test/trace/probability_sampler_test.cc @@ -0,0 +1,222 @@ +#include "opentelemetry/sdk/trace/samplers/probability.h" +#include "src/common/random.h" + +#include +#include +#include + +using opentelemetry::sdk::common::Random; +using opentelemetry::sdk::trace::Decision; +using opentelemetry::sdk::trace::ProbabilitySampler; +using opentelemetry::trace::SpanContext; + +namespace +{ +/* + * Helper function for running probability sampler tests. + * Given a span context, sampler, and number of iterations this function + * will return the number of RECORD_AND_SAMPLE decision based on randomly + * generated traces. + * + * @param context a required valid span context + * @param sampler a required valid sampler + * @param iterations a requried number specifying the number of times to + * generate a random trace_id and check if it should sample using the provided + * provider and context + */ +int RunShouldSampleCountDecision(SpanContext &context, + ProbabilitySampler &sampler, + int iterations) +{ + int actual_count = 0; + + opentelemetry::trace::SpanKind span_kind = opentelemetry::trace::SpanKind::kInternal; + + using M = std::map; + M m1 = {{}}; + opentelemetry::trace::KeyValueIterableView view{m1}; + + for (int i = 0; i < iterations; ++i) + { + uint8_t buf[16] = {0}; + Random::GenerateRandomBuffer(buf); + + opentelemetry::trace::TraceId trace_id(buf); + + auto result = sampler.ShouldSample(&context, trace_id, "", span_kind, view); + if (result.decision == Decision::RECORD_AND_SAMPLE) + { + ++actual_count; + } + } + + return actual_count; +} +} // namespace + +TEST(ProbabilitySampler, ShouldSampleWithoutContext) +{ + opentelemetry::trace::TraceId invalid_trace_id; + + opentelemetry::trace::SpanKind span_kind = opentelemetry::trace::SpanKind::kInternal; + + using M = std::map; + M m1 = {{}}; + opentelemetry::trace::KeyValueIterableView view{m1}; + + ProbabilitySampler s1(0.01); + + auto sampling_result = s1.ShouldSample(nullptr, invalid_trace_id, "", span_kind, view); + + ASSERT_EQ(Decision::RECORD_AND_SAMPLE, sampling_result.decision); + ASSERT_EQ(nullptr, sampling_result.attributes); + + constexpr uint8_t buf[] = {0, 0, 0, 0, 0, 0, 0, 0x80, 0, 0, 0, 0, 0, 0, 0, 0}; + opentelemetry::trace::TraceId valid_trace_id(buf); + + sampling_result = s1.ShouldSample(nullptr, valid_trace_id, "", span_kind, view); + + ASSERT_EQ(Decision::NOT_RECORD, sampling_result.decision); + ASSERT_EQ(nullptr, sampling_result.attributes); + + ProbabilitySampler s2(0.50000001); + + sampling_result = s2.ShouldSample(nullptr, valid_trace_id, "", span_kind, view); + + ASSERT_EQ(Decision::RECORD_AND_SAMPLE, sampling_result.decision); + ASSERT_EQ(nullptr, sampling_result.attributes); + + ProbabilitySampler s3(0.49999999); + + sampling_result = s3.ShouldSample(nullptr, valid_trace_id, "", span_kind, view); + + ASSERT_EQ(Decision::NOT_RECORD, sampling_result.decision); + ASSERT_EQ(nullptr, sampling_result.attributes); + + ProbabilitySampler s4(0.50000000); + + sampling_result = s4.ShouldSample(nullptr, valid_trace_id, "", span_kind, view); + + ASSERT_EQ(Decision::RECORD_AND_SAMPLE, sampling_result.decision); + ASSERT_EQ(nullptr, sampling_result.attributes); +} + +TEST(ProbabilitySampler, ShouldSampleWithContext) +{ + opentelemetry::trace::TraceId trace_id; + opentelemetry::trace::SpanKind span_kind = opentelemetry::trace::SpanKind::kInternal; + SpanContext c1(false, false); + SpanContext c2(true, false); + SpanContext c3(false, true); + SpanContext c4(true, true); + + using M = std::map; + M m1 = {{}}; + opentelemetry::trace::KeyValueIterableView view{m1}; + + ProbabilitySampler s1(0.01); + + auto sampling_result = s1.ShouldSample(&c1, trace_id, "", span_kind, view); + + ASSERT_EQ(Decision::NOT_RECORD, sampling_result.decision); + ASSERT_EQ(nullptr, sampling_result.attributes); + + sampling_result = s1.ShouldSample(&c2, trace_id, "", span_kind, view); + + ASSERT_EQ(Decision::RECORD_AND_SAMPLE, sampling_result.decision); + ASSERT_EQ(nullptr, sampling_result.attributes); + + sampling_result = s1.ShouldSample(&c3, trace_id, "", span_kind, view); + + ASSERT_EQ(Decision::RECORD_AND_SAMPLE, sampling_result.decision); + ASSERT_EQ(nullptr, sampling_result.attributes); + + sampling_result = s1.ShouldSample(&c4, trace_id, "", span_kind, view); + + ASSERT_EQ(Decision::RECORD_AND_SAMPLE, sampling_result.decision); + ASSERT_EQ(nullptr, sampling_result.attributes); +} + +TEST(ProbabilitySampler, ProbabilitySamplerHalf) +{ + double probability = 0.5; + int iterations = 100000, expected_count = iterations * probability, variance = iterations * 0.01; + + SpanContext c(true, true); + ProbabilitySampler s(probability); + + int actual_count = RunShouldSampleCountDecision(c, s, iterations); + + ASSERT_TRUE(actual_count < (expected_count + variance)); + ASSERT_TRUE(actual_count > (expected_count - variance)); +} + +TEST(ProbabilitySampler, ProbabilitySamplerOnePercent) +{ + double probability = 0.01; + int iterations = 100000, expected_count = iterations * probability, variance = iterations * 0.01; + + SpanContext c(true, true); + ProbabilitySampler s(probability); + + int actual_count = RunShouldSampleCountDecision(c, s, iterations); + + ASSERT_TRUE(actual_count < (expected_count + variance)); + ASSERT_TRUE(actual_count > (expected_count - variance)); +} + +TEST(ProbabilitySampler, ProbabilitySamplerAll) +{ + double probability = 1.0; + int iterations = 100000, expected_count = iterations * probability; + + SpanContext c(true, true); + ProbabilitySampler s(probability); + + int actual_count = RunShouldSampleCountDecision(c, s, iterations); + + ASSERT_EQ(actual_count, expected_count); +} + +TEST(ProbabilitySampler, ProbabilitySamplerNone) +{ + double probability = 0.0; + int iterations = 100000, expected_count = iterations * probability; + + SpanContext c(true, true); + ProbabilitySampler s(probability); + + int actual_count = RunShouldSampleCountDecision(c, s, iterations); + + ASSERT_EQ(actual_count, expected_count); +} + +TEST(ProbabilitySampler, GetDescription) +{ + ProbabilitySampler s1(0.01); + ASSERT_EQ("ProbabilitySampler{0.010000}", s1.GetDescription()); + + ProbabilitySampler s2(0.00); + ASSERT_EQ("ProbabilitySampler{0.000000}", s2.GetDescription()); + + ProbabilitySampler s3(1.00); + ASSERT_EQ("ProbabilitySampler{1.000000}", s3.GetDescription()); + + ProbabilitySampler s4(0.102030405); + ASSERT_EQ("ProbabilitySampler{0.102030}", s4.GetDescription()); + + ProbabilitySampler s5(3.00); + ASSERT_EQ("ProbabilitySampler{1.000000}", s5.GetDescription()); + + ProbabilitySampler s6(-3.00); + ASSERT_EQ("ProbabilitySampler{0.000000}", s6.GetDescription()); + + ProbabilitySampler s7(1.00000000001); + ASSERT_EQ("ProbabilitySampler{1.000000}", s7.GetDescription()); + + ProbabilitySampler s8(-1.00000000001); + ASSERT_EQ("ProbabilitySampler{0.000000}", s8.GetDescription()); + + ProbabilitySampler s9(0.50); + ASSERT_EQ("ProbabilitySampler{0.500000}", s9.GetDescription()); +}