Skip to content

Commit

Permalink
Merge pull request #201 from DataDog/anmarchenko/telemetry_metrics_ne…
Browse files Browse the repository at this point in the history
…t_requests

[SDTEST-116] HTTP transport metrics and minor telemetry tweaks
  • Loading branch information
anmarchenko authored Jul 29, 2024
2 parents 3d14714 + 8034e41 commit edc5176
Show file tree
Hide file tree
Showing 33 changed files with 794 additions and 120 deletions.
36 changes: 27 additions & 9 deletions lib/datadog/ci/configuration/components.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
require_relative "../test_visibility/serializers/factories/test_level"
require_relative "../test_visibility/serializers/factories/test_suite_level"
require_relative "../test_visibility/transport"
require_relative "../transport/adapters/telemetry_webmock_safe_adapter"
require_relative "../transport/api/builder"
require_relative "../transport/remote_settings_api"
require_relative "../utils/identity"
require_relative "../utils/parsing"
require_relative "../utils/test_run"
require_relative "../worker"
Expand Down Expand Up @@ -62,15 +64,7 @@ def activate_ci!(settings)
return unless settings.ci.enabled

# Configure datadog gem for test visibility mode

# Configure telemetry

# in development environment Datadog's telemetry is disabled by default
# for test visibility we want to enable it by default unless explicitly disabled
# NOTE: before agentless mode is released, we only enable telemetry when running with Datadog Agent
env_telemetry_enabled = ENV[Core::Telemetry::Ext::ENV_ENABLED]
settings.telemetry.enabled = !settings.ci.agentless_mode_enabled &&
(env_telemetry_enabled.nil? || Utils::Parsing.convert_to_bool(env_telemetry_enabled))
configure_telemetry(settings)

# Test visibility uses its own remote settings
settings.remote.enabled = false
Expand Down Expand Up @@ -255,6 +249,30 @@ def check_dd_site(settings)
end
end

def configure_telemetry(settings)
# in development environment Datadog's telemetry is disabled by default
# for test visibility we want to enable it by default unless explicitly disabled
# NOTE: before agentless mode is released, we only enable telemetry when running with Datadog Agent
env_telemetry_enabled = ENV[Core::Telemetry::Ext::ENV_ENABLED]
settings.telemetry.enabled = !settings.ci.agentless_mode_enabled &&
(env_telemetry_enabled.nil? || Utils::Parsing.convert_to_bool(env_telemetry_enabled))

return unless settings.telemetry.enabled

begin
require "datadog/core/environment/identity"
require "datadog/core/telemetry/http/adapters/net"

# patch gem's identity to report datadog-ci library version instead of datadog gem version
Core::Environment::Identity.include(CI::Utils::Identity)

# patch gem's telemetry transport layer to use Net::HTTP instead of WebMock's Net::HTTP
Core::Telemetry::Http::Adapters::Net.include(CI::Transport::Adapters::TelemetryWebmockSafeAdapter)
rescue => e
Datadog.logger.warn("Failed to patch Datadog gem's telemetry layer: #{e}")
end
end

def timecop?
Gem.loaded_specs.key?("timecop") || !!defined?(Timecop)
end
Expand Down
8 changes: 4 additions & 4 deletions lib/datadog/ci/ext/telemetry.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,11 @@ module Telemetry

METRIC_MANUAL_API_EVENTS = "manual_api_events"

METRIC_CODE_COVERAGE_STARTED = "code_coverage_started"
METRIC_CODE_COVERAGE_FINISHED = "code_coverage_finished"

METRIC_EVENTS_ENQUEUED = "events_enqueued_for_serialization"
METRIC_ENDPOINT_PAYLOAD_BYTES = "endpoint_payload.bytes"
METRIC_ENDPOINT_PAYLOAD_REQUESTS = "endpoint_payload.requests"
METRIC_ENDPOINT_PAYLOAD_REQUESTS_MS = "endpoint_payload.requests_ms"
METRIC_ENDPOINT_PAYLOAD_REQUESTS_ERRORS = "endpoint_payload.requests_errors"
METRIC_ENDPOINT_PAYLOAD_BYTES = "endpoint_payload.bytes"
METRIC_ENDPOINT_PAYLOAD_EVENTS_COUNT = "endpoint_payload.events_count"
METRIC_ENDPOINT_PAYLOAD_EVENTS_SERIALIZATION_MS = "endpoint_payload.events_serialization_ms"
METRIC_ENDPOINT_PAYLOAD_DROPPED = "endpoint_payload.dropped"
Expand Down Expand Up @@ -52,6 +49,8 @@ module Telemetry
METRIC_ITR_UNSKIPPABLE = "itr_unskippable"
METRIC_ITR_FORCED_RUN = "itr_forced_run"

METRIC_CODE_COVERAGE_STARTED = "code_coverage_started"
METRIC_CODE_COVERAGE_FINISHED = "code_coverage_finished"
METRIC_CODE_COVERAGE_IS_EMPTY = "code_coverage.is_empty"
METRIC_CODE_COVERAGE_FILES = "code_coverage.files"
METRIC_CODE_COVERAGE_ERRORS = "code_coverage.errors"
Expand Down Expand Up @@ -98,6 +97,7 @@ module Endpoint
module ErrorType
NETWORK = "network"
TIMEOUT = "timeout"
STATUS_CODE = "status_code"
end

module ExitCode
Expand Down
11 changes: 10 additions & 1 deletion lib/datadog/ci/test_optimisation/coverage/transport.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# frozen_string_literal: true

require_relative "event"
require_relative "../../ext/telemetry"
require_relative "../../transport/event_platform_transport"
require_relative "../../transport/telemetry"

module Datadog
module CI
Expand All @@ -10,6 +12,10 @@ module Coverage
class Transport < Datadog::CI::Transport::EventPlatformTransport
private

def telemetry_endpoint_tag
Ext::Telemetry::Endpoint::CODE_COVERAGE
end

def send_payload(encoded_payload)
api.citestcov_request(
path: Ext::Transport::TEST_COVERAGE_INTAKE_PATH,
Expand All @@ -19,7 +25,10 @@ def send_payload(encoded_payload)

def encode_events(events)
events.filter_map do |event|
next unless event.valid?
unless event.valid?
CI::Transport::Telemetry.endpoint_payload_dropped(1, endpoint: telemetry_endpoint_tag)
next
end

encoded = encoder.encode(event)
next if event_too_large?(event, encoded)
Expand Down
17 changes: 8 additions & 9 deletions lib/datadog/ci/test_visibility/transport.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
require "datadog/core/environment/identity"

require_relative "serializers/factories/test_level"
require_relative "../ext/telemetry"
require_relative "../ext/transport"
require_relative "../transport/event_platform_transport"
require_relative "../transport/telemetry"

module Datadog
module CI
Expand All @@ -31,6 +33,10 @@ def send_traces(traces)

private

def telemetry_endpoint_tag
Ext::Telemetry::Endpoint::TEST_CYCLE
end

def send_payload(encoded_payload)
api.citestcycle_request(
path: Datadog::CI::Ext::Transport::TEST_VISIBILITY_INTAKE_PATH,
Expand All @@ -46,21 +52,14 @@ def encode_events(traces)

def encode_span(trace, span)
serializer = serializers_factory.serializer(trace, span, options: {itr_correlation_id: itr&.correlation_id})

if serializer.valid?
encoded = encoder.encode(serializer)

if encoded.size > max_payload_size
# This single event is too large, we can't flush it
Datadog.logger.warn("Dropping test event. Payload too large: '#{span.inspect}'")
Datadog.logger.warn(encoded)

return nil
end
return nil if event_too_large?(span, encoded)

encoded
else
Datadog.logger.warn("Invalid event skipped: #{serializer} Errors: #{serializer.validation_errors}")
CI::Transport::Telemetry.endpoint_payload_dropped(1, endpoint: telemetry_endpoint_tag)
nil
end
end
Expand Down
49 changes: 38 additions & 11 deletions lib/datadog/ci/transport/adapters/net.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
require "datadog/core/transport/response"
require "datadog/core/transport/ext"

require_relative "net_http_client"
require_relative "../gzip"
require_relative "../../ext/telemetry"
require_relative "../../ext/transport"

module Datadog
Expand All @@ -26,7 +28,7 @@ def initialize(hostname:, port:, ssl:, timeout_seconds:)
end

def open(&block)
req = net_http_client.new(hostname, port)
req = NetHttpClient.original_net_http.new(hostname, port)

req.use_ssl = ssl
req.open_timeout = req.read_timeout = timeout
Expand Down Expand Up @@ -63,6 +65,8 @@ class Response
include Datadog::Core::Transport::Response

attr_reader :http_response
# Stats for telemetry
attr_accessor :request_compressed, :request_size

def initialize(http_response)
@http_response = http_response
Expand All @@ -86,7 +90,10 @@ def code
end

def ok?
code.between?(200, 299)
http_code = code
return false if http_code.nil?

http_code.between?(200, 299)
end

def unsupported?
Expand All @@ -98,11 +105,17 @@ def not_found?
end

def client_error?
code.between?(400, 499)
http_code = code
return false if http_code.nil?

http_code.between?(400, 499)
end

def server_error?
code.between?(500, 599)
http_code = code
return false if http_code.nil?

http_code.between?(500, 599)
end

def gzipped_content?
Expand All @@ -119,17 +132,31 @@ def gzipped_body?(body)
first_bytes.b == Ext::Transport::GZIP_MAGIC_NUMBER
end

def inspect
"#{super}, http_response:#{http_response}"
def error
nil
end
end

private
def telemetry_error_type
return nil if ok?

case error
when nil
Ext::Telemetry::ErrorType::STATUS_CODE
when Timeout::Error
Ext::Telemetry::ErrorType::TIMEOUT
else
Ext::Telemetry::ErrorType::NETWORK
end
end

def net_http_client
return ::Net::HTTP unless defined?(WebMock::HttpLibAdapters::NetHttpAdapter::OriginalNetHTTP)
# compatibility with Datadog::Tracing transport layer
def trace_count
0
end

WebMock::HttpLibAdapters::NetHttpAdapter::OriginalNetHTTP
def inspect
"#{super}, http_response:#{http_response}"
end
end
end
end
Expand Down
17 changes: 17 additions & 0 deletions lib/datadog/ci/transport/adapters/net_http_client.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# frozen_string_literal: true

module Datadog
module CI
module Transport
module Adapters
module NetHttpClient
def self.original_net_http
return ::Net::HTTP unless defined?(WebMock::HttpLibAdapters::NetHttpAdapter::OriginalNetHTTP)

WebMock::HttpLibAdapters::NetHttpAdapter::OriginalNetHTTP
end
end
end
end
end
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# frozen_string_literal: true

require_relative "net_http_client"

module Datadog
module CI
module Transport
module Adapters
module TelemetryWebmockSafeAdapter
def self.included(base)
base.prepend(InstanceMethods)
end

module InstanceMethods
def open(&block)
req = NetHttpClient.original_net_http.new(@hostname, @port)

req.use_ssl = @ssl
req.open_timeout = req.read_timeout = @timeout

req.start(&block)
end
end
end
end
end
end
end
Loading

0 comments on commit edc5176

Please sign in to comment.