Skip to content

Commit

Permalink
feat: add opentelemetry plugin (#6119)
Browse files Browse the repository at this point in the history
Co-authored-by: roketyyang <roketyyang@tencent.com>
  • Loading branch information
yangxikun and roketyyang authored Jan 26, 2022
1 parent 14f0889 commit 7617e19
Show file tree
Hide file tree
Showing 8 changed files with 1,392 additions and 0 deletions.
347 changes: 347 additions & 0 deletions apisix/plugins/opentelemetry.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,347 @@
--
-- Licensed to the Apache Software Foundation (ASF) under one or more
-- contributor license agreements. See the NOTICE file distributed with
-- this work for additional information regarding copyright ownership.
-- The ASF licenses this file to You under the Apache License, Version 2.0
-- (the "License"); you may not use this file except in compliance with
-- the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
--
local plugin_name = "opentelemetry"
local core = require("apisix.core")
local plugin = require("apisix.plugin")
local process = require("ngx.process")

local always_off_sampler_new = require("opentelemetry.trace.sampling.always_off_sampler").new
local always_on_sampler_new = require("opentelemetry.trace.sampling.always_on_sampler").new
local parent_base_sampler_new = require("opentelemetry.trace.sampling.parent_base_sampler").new
local trace_id_ratio_sampler_new =
require("opentelemetry.trace.sampling.trace_id_ratio_sampler").new

local exporter_client_new = require("opentelemetry.trace.exporter.http_client").new
local otlp_exporter_new = require("opentelemetry.trace.exporter.otlp").new
local batch_span_processor_new = require("opentelemetry.trace.batch_span_processor").new
local id_generator = require("opentelemetry.trace.id_generator")
local tracer_provider_new = require("opentelemetry.trace.tracer_provider").new

local span_kind = require("opentelemetry.trace.span_kind")
local span_status = require("opentelemetry.trace.span_status")
local resource_new = require("opentelemetry.resource").new
local attr = require("opentelemetry.attribute")

local context_storage = require("opentelemetry.context_storage")
local context = require("opentelemetry.context").new(context_storage)
local carrier_new = require("opentelemetry.trace.propagation.carrier").new
local trace_context = require("opentelemetry.trace.propagation.trace_context")

local ngx = ngx
local ngx_var = ngx.var
local table = table
local type = type
local pairs = pairs
local ipairs = ipairs
local unpack = unpack

local lrucache = core.lrucache.new({
type = 'plugin', count = 128, ttl = 24 * 60 * 60,
})


local attr_schema = {
type = "object",
properties = {
trace_id_source = {
type = "string",
enum = {"x-request-id", "random"},
description = "the source of trace id",
default = "random",
},
resource = {
type = "object",
description = "additional resource",
additionalProperties = {{type = "boolean"}, {type = "number"}, {type = "string"}},
},
collector = {
type = "object",
description = "opentelemetry collector",
properties = {
address = {type = "string", description = "host:port", default = "127.0.0.1:4317"},
request_timeout = {type = "integer", description = "second uint", default = 3},
request_headers = {
type = "object",
description = "http headers",
additionalProperties = {
one_of = {{type = "boolean"},{type = "number"}, {type = "string"}},
},
}
},
default = {address = "127.0.0.1:4317", request_timeout = 3}
},
batch_span_processor = {
type = "object",
description = "batch span processor",
properties = {
drop_on_queue_full = {
type = "boolean",
description = "if true, drop span when queue is full,"
.. " otherwise force process batches",
},
max_queue_size = {
type = "integer",
description = "maximum queue size to buffer spans for delayed processing",
},
batch_timeout = {
type = "number",
description = "maximum duration for constructing a batch",
},
inactive_timeout = {
type = "number",
description = "maximum duration for processing batches",
},
max_export_batch_size = {
type = "integer",
description = "maximum number of spans to process in a single batch",
}
},
default = {},
},
},
}

local schema = {
type = "object",
properties = {
sampler = {
type = "object",
properties = {
name = {
type = "string",
enum = {"always_on", "always_off", "trace_id_ratio", "parent_base"},
title = "sampling strategy",
default = "always_off"
},
options = {
type = "object",
properties = {
fraction = {
type = "number", title = "trace_id_ratio fraction", default = 0
},
root = {
type = "object",
title = "parent_base root sampler",
properties = {
name = {
type = "string",
enum = {"always_on", "always_off", "trace_id_ratio"},
title = "sampling strategy",
default = "always_off"
},
options = {
type = "object",
properties = {
fraction = {
type = "number",
title = "trace_id_ratio fraction parameter",
default = 0,
},
},
default = {fraction = 0}
}
},
default = {name = "always_off", options = {fraction = 0}}
},
},
default = {fraction = 0, root = {name = "always_off"}}
}
},
default = {name = "always_off", options = {fraction = 0, root = {name = "always_off"}}}
},
additional_attributes = {
type = "array",
items = {
type = "string",
minLength = 1,
}
}
}
}


local _M = {
version = 0.1,
priority = -1200, -- last running plugin, but before serverless post func
name = plugin_name,
schema = schema,
attr_schema = attr_schema,
}


function _M.check_schema(conf)
return core.schema.check(schema, conf)
end


local hostname
local sampler_factory
local plugin_info

function _M.init()
if process.type() ~= "worker" then
return
end

sampler_factory = {
always_off = always_off_sampler_new,
always_on = always_on_sampler_new,
parent_base = parent_base_sampler_new,
trace_id_ratio = trace_id_ratio_sampler_new,
}
hostname = core.utils.gethostname()

plugin_info = plugin.plugin_attr(plugin_name) or {}
local ok, err = core.schema.check(attr_schema, plugin_info)
if not ok then
core.log.error("failed to check the plugin_attr[", plugin_name, "]",
": ", err)
return
end

if plugin_info.trace_id_source == "x-request-id" then
id_generator.new_ids = function()
local trace_id = core.request.headers()["x-request-id"] or ngx_var.request_id
return trace_id, id_generator.new_span_id()
end
end
end


local function create_tracer_obj(conf)
-- create exporter
local exporter = otlp_exporter_new(exporter_client_new(plugin_info.collector.address,
plugin_info.collector.request_timeout,
plugin_info.collector.request_headers))
-- create span processor
local batch_span_processor = batch_span_processor_new(exporter,
plugin_info.batch_span_processor)
-- create sampler
local sampler
local sampler_name = conf.sampler.name
local sampler_options = conf.sampler.options
if sampler_name == "parent_base" then
local root_sampler
if sampler_options.root then
local name, fraction = sampler_options.root.name, sampler_options.root.options.fraction
root_sampler = sampler_factory[name](fraction)
else
root_sampler = always_off_sampler_new()
end
sampler = sampler_factory[sampler_name](root_sampler)
else
sampler = sampler_factory[sampler_name](sampler_options.fraction)
end
local resource_attrs = {attr.string("hostname", hostname)}
if plugin_info.resource then
if not plugin_info.resource["service.name"] then
table.insert(resource_attrs, attr.string("service.name", "APISIX"))
end
for k, v in pairs(plugin_info.resource) do
if type(v) == "string" then
table.insert(resource_attrs, attr.string(k, v))
end
if type(v) == "number" then
table.insert(resource_attrs, attr.double(k, v))
end
if type(v) == "boolean" then
table.insert(resource_attrs, attr.bool(k, v))
end
end
end
-- create tracer provider
local tp = tracer_provider_new(batch_span_processor, {
resource = resource_new(unpack(resource_attrs)),
sampler = sampler,
})
-- create tracer
return tp:tracer("opentelemetry-lua")
end


function _M.rewrite(conf, api_ctx)
local tracer, err = core.lrucache.plugin_ctx(lrucache, api_ctx, nil, create_tracer_obj, conf)
if not tracer then
core.log.error("failed to fetch tracer object: ", err)
return
end

-- extract trace context from the headers of downstream HTTP request
local upstream_context = trace_context.extract(context, carrier_new())
local attributes = {
attr.string("service", api_ctx.service_name),
attr.string("route", api_ctx.route_name),
}
if conf.additional_attributes then
for _, key in ipairs(conf.additional_attributes) do
local val = api_ctx.var[key]
if val then
core.table.insert(attributes, attr.string(key, val))
end
end
end

local ctx = tracer:start(upstream_context, api_ctx.var.request_uri, {
kind = span_kind.client,
attributes = attributes,
})
ctx:attach()

-- inject trace context into the headers of upstream HTTP request
trace_context.inject(ctx, carrier_new())
end


function _M.body_filter(conf, api_ctx)
if ngx.arg[2] then
local upstream_status = core.response.get_upstream_status(api_ctx)
local ctx = context:current()
ctx:detach()

-- get span from current context
local span = ctx:span()
if upstream_status and upstream_status >= 500 then
span:set_status(span_status.error,
"upstream response status: " .. upstream_status)
end

span:finish()
end
end


-- body_filter maybe not called because of empty http body response
-- so we need to check if the span has finished in log phase
function _M.log(conf, api_ctx)
local ctx = context:current()
if ctx then
-- ctx:detach() is not necessary, because of ctx is stored in ngx.ctx
local upstream_status = core.response.get_upstream_status(api_ctx)

-- get span from current context
local span = ctx:span()
if upstream_status and upstream_status >= 500 then
span:set_status(span_status.error,
"upstream response status: " .. upstream_status)
end

span:finish()
end
end


return _M
16 changes: 16 additions & 0 deletions conf/config-default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,7 @@ plugins: # plugin list (sorted by priority)
# <- recommend to use priority (0, 100) for your custom plugins
- example-plugin # priority: 0
#- skywalking # priority: -1100
#- opentelemetry # priority: -1200
- aws-lambda # priority: -1899
- azure-functions # priority: -1900
- openwhisk # priority: -1901
Expand Down Expand Up @@ -412,6 +413,21 @@ plugin_attr:
service_name: APISIX
service_instance_name: APISIX Instance Name
endpoint_addr: http://127.0.0.1:12800
opentelemetry:
trace_id_source: x-request-id
resource:
service.name: APISIX
collector:
address: 127.0.0.1:4317
request_timeout: 3
request_headers:
Authorization: token
batch_span_processor:
drop_on_queue_full: false
max_queue_size: 1024
batch_timeout: 2
inactive_timeout: 1
max_export_batch_size: 16
prometheus:
export_uri: /apisix/prometheus/metrics
metric_prefix: apisix_
Expand Down
1 change: 1 addition & 0 deletions docs/en/latest/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@
"plugins/prometheus",
"plugins/zipkin",
"plugins/skywalking",
"plugins/opentelemetry",
"plugins/node-status",
"plugins/datadog"
]
Expand Down
Loading

0 comments on commit 7617e19

Please sign in to comment.