Skip to content

Commit

Permalink
feature: add opentelemetry plugin. (apache#3891)
Browse files Browse the repository at this point in the history
  • Loading branch information
roketyyang committed Jan 17, 2022
1 parent 9f5ad21 commit 9069345
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 74 deletions.
77 changes: 33 additions & 44 deletions apisix/plugins/opentelemetry.lua
Original file line number Diff line number Diff line change
Expand Up @@ -50,31 +50,35 @@ local pairs = pairs
local ipairs = ipairs
local unpack = unpack

local hostname
local lrucache = core.lrucache.new({
type = 'plugin', count = 128, ttl = 24 * 60 * 60,
})


local attr_schema = {
type = "object",
properties = {
x_request_id_as_trace_id = {
type = "boolean",
description = "use x-request-id as new trace id",
default = false,
trace_id_source = {
type = "string",
enum = {"x-request-id", "random"},
description = "alternate use x-request-id as trace id",
default = "random",
},
resource = {
type = "object",
description = "additional resource",
additional_properties = {{type = "boolean"}, {type = "number"}, {type = "string"}},
additionalProperties = {{type = "boolean"}, {type = "number"}, {type = "string"}},
},
collector = {
type = "object",
description = "otel collector",
description = "opentelemetry collector",
properties = {
address = {type = "string", description = "host:port", default = "127.0.0.1:4317"},
request_timeout = {type = "integer", description = "second uint", default = 3},
request_headers = {
type = "object",
description = "http headers",
additional_properties = {
additionalProperties = {
one_of = {{type = "boolean"},{type = "number"}, {type = "string"}},
},
}
Expand Down Expand Up @@ -160,24 +164,17 @@ local schema = {
},
default = {name = "always_off", options = {fraction = 0, root = {name = "always_off"}}}
},
tags = {
additional_attributes = {
type = "array",
items = {
type = "object",
properties = {
position = {
type = "string",
enum = {"http", "arg", "cookie"}
},
name = {
type = "string", minLength = 1
}
}
type = "string",
minLength = 1,
}
}
}
}


local _M = {
version = 0.1,
priority = -1200, -- last running plugin, but before serverless post func
Expand All @@ -186,10 +183,13 @@ local _M = {
attr_schema = attr_schema,
}


function _M.check_schema(conf)
return core.schema.check(schema, conf)
end


local hostname
local sampler_factory
local plugin_info

Expand All @@ -214,22 +214,16 @@ function _M.init()
return
end

if plugin_info.x_request_id_as_trace_id then
if plugin_info.trace_id_source == "x-request-id" then
id_generator.new_ids = function()
local trace_id = ngx_req.get_headers()["x-request-id"] or ngx_var.request_id
return trace_id, id_generator.new_span_id()
end
end
end

local tracers = {}

local function fetch_tracer(conf, ctx)
local t = tracers[ctx.route_id]
if t and t.v == ctx.conf_version then
return t.tracer
end

local function create_tracer_obj(conf)
-- create exporter
local exporter = otlp_exporter_new(exporter_client_new(plugin_info.collector.address,
plugin_info.collector.request_timeout,
Expand Down Expand Up @@ -276,30 +270,33 @@ local function fetch_tracer(conf, ctx)
sampler = sampler,
})
-- create tracer
local tracer = tp:tracer("opentelemetry-lua")
tracers[ctx.route_id] = {tracer = tracer, v = ctx.conf_version}

return tracer
return tp:tracer("opentelemetry-lua")
end


function _M.access(conf, api_ctx)
local tracer, err = core.lrucache.plugin_ctx(lrucache, api_ctx, nil, create_tracer_obj, conf)
if not tracer then
core.log.error("failed to fetch tracer object: ", err)
return
end

-- extract trace context from the headers of downstream HTTP request
local upstream_context = trace_context.extract(context, carrier_new())
local attributes = {
attr.string("service", api_ctx.service_name),
attr.string("route", api_ctx.route_name),
}
if conf.tags then
for _, tag in ipairs(conf.tags) do
local key = tag.position .. "_" .. tag.name
if conf.additional_attributes then
for _, key in ipairs(conf.additional_attributes) do
local val = api_ctx.var[key]
if val then
core.table.insert(attributes, attr.string(key, val))
end
end
end

local ctx, _ = fetch_tracer(conf, api_ctx):start(upstream_context, api_ctx.var.request_uri, {
local ctx, _ = tracer:start(upstream_context, api_ctx.var.request_uri, {
kind = span_kind.client,
attributes = attributes,
})
Expand All @@ -309,6 +306,7 @@ function _M.access(conf, api_ctx)
trace_context.inject(ctx, carrier_new())
end


function _M.body_filter(conf, ctx)
if ngx.arg[2] then
local upstream_status = core.response.get_upstream_status(ctx)
Expand All @@ -323,14 +321,5 @@ function _M.body_filter(conf, ctx)
end
end

function _M.destroy()
if process.type() ~= "worker" then
return
end

for _, t in pairs(tracers) do
t.tracer.provider:shutdown()
end
end

return _M
1 change: 1 addition & 0 deletions conf/config-default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,7 @@ plugins: # plugin list (sorted by priority)
# <- recommend to use priority (0, 100) for your custom plugins
- example-plugin # priority: 0
#- skywalking # priority: -1100
#- opentelemetry # priority: -1200
- aws-lambda # priority: -1899
- azure-functions # priority: -1900
- openwhisk # priority: -1901
Expand Down
11 changes: 5 additions & 6 deletions docs/en/latest/plugins/opentelemetry.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,8 @@ Just support reporting in `HTTP` with `Content-Type=application/x-protobuf`, the
| sampler.options.root.name | string | optional | always_off | ["always_on", "always_off", "trace_id_ratio"] | sampling strategy
| sampler.options.root.options | object | optional | {fraction = 0} | | sampling strategy parameters
| sampler.options.root.options.fraction | number | optional | 0 | [0, 1] | trace_id_ratio fraction
| tags | array[object] | optional | | | append to trace span attributes
| tags.position | string | required | | ["http", "arg", "cookie"] | where variable in
| tags.name | string | required | | | variable name
| additional_attributes | array[string] | optional | | | append to trace span attributes
| additional_attributes[0] | string | required | | | key of ctx.var

## How To Enable

Expand Down Expand Up @@ -95,18 +94,18 @@ We can set the collecting by specifying the configuration in `conf/config.yaml`.

| Name | Type | Default | Description |
| ------------ | ------ | -------- | ----------------------------------------------------- |
| x_request_id_as_trace_id | boolean | false | use current request id as new TraceID, you should make sure the request id is match regex pattern: `[0-9a-f]{32}`|
| trace_id_source | enum | random | alternate use x-request-id as trace id, valid value is `random` or `x-request-id`, if use `x-request-id`, please make sure it match regex pattern `[0-9a-f]{32}` |
| resource | object | | additional [resource](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/resource/sdk.md) append to trace |
| collector | object | {address = "127.0.0.1:4317", request_timeout = 3} | otlp collector |
| collector.address | string | 127.0.0.1:4317 | collector address |
| collector.request_timeout | integer | 3 | report request timeout |
| collector.request_timeout | integer | 3 | report request timeout(second) |
| collector.request_headers | object | | report request http headers |
| batch_span_processor | object | | trace span processor |
| batch_span_processor.drop_on_queue_full | boolean | true | drop span when queue is full, otherwise force process batches |
| batch_span_processor.max_queue_size | integer | 2048 | maximum queue size to buffer spans for delayed processing |
| batch_span_processor.batch_timeout | number | 5 | maximum duration(second) for constructing a batch |
| batch_span_processor.max_export_batch_size | integer | 256 | maximum number of spans to process in a single batch |
| batch_span_processor.inactive_timeout | number | 2 | timer interval for processing batches |
| batch_span_processor.inactive_timeout | number | 2 | timer interval(second) for processing batches |

Here is an example:

Expand Down
8 changes: 4 additions & 4 deletions docs/zh/latest/plugins/opentelemetry.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,18 +95,18 @@ curl http://127.0.0.1:9080/apisix/admin/routes/1 -H 'X-API-KEY: edd1c9f034335f1

| 名称 | 类型 | 默认值 | 描述 |
| ------------ | ------ | -------- | ----------------------------------------------------- |
| x_request_id_as_trace_id | boolean | false | 使用当前请求 ID 作为新的 TraceID,必须确保当前请求 ID 是符合 TraceID 规范的:`[0-9a-f]{32}` |
| trace_id_source | enum | random | 合法的取值:`random``x-request-id`,允许使用当前请求 ID 代替随机 ID 作为新的 TraceID,必须确保当前请求 ID 是符合 TraceID 规范的:`[0-9a-f]{32}` |
| resource | object | | 追加到 trace 的额外 [resource](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/resource/sdk.md) |
| collector | object | {address = "127.0.0.1:4317", request_timeout = 3} | 数据采集服务 |
| collector.address | string | 127.0.0.1:4317 | 数据采集服务地址 |
| collector.request_timeout | integer | 3 | 数据采集服务上报请求超时时长 |
| collector.request_timeout | integer | 3 | 数据采集服务上报请求超时时长,单位秒 |
| collector.request_headers | object | | 数据采集服务上报请求附加的 HTTP 请求头 |
| batch_span_processor | object | | trace span 处理器参数配置 |
| batch_span_processor.drop_on_queue_full | boolean | true | 当处理器缓存队列慢试,丢弃新到来的 span |
| batch_span_processor.max_queue_size | integer | 2048 | 处理器缓存队列容量最大值 |
| batch_span_processor.batch_timeout | number | 5 | 单位秒,构造一批 span 超时时长 |
| batch_span_processor.batch_timeout | number | 5 | 构造一批 span 超时时长,单位秒 |
| batch_span_processor.max_export_batch_size | integer | 256 | 一批 span 的数量,每次上报的 span 数量 |
| batch_span_processor.inactive_timeout | number | 2 | 每隔多长时间检查是否有一批 span 可以上报 |
| batch_span_processor.inactive_timeout | number | 2 | 每隔多长时间检查是否有一批 span 可以上报,单位秒 |

配置示例:

Expand Down
20 changes: 0 additions & 20 deletions t/plugin/opentelemetry.t
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,6 @@ GET /opentracing
--- response_body
opentracing
--- wait: 1
--- no_error_log
[error]
--- grep_error_log eval
qr/opentelemetry export span/
--- grep_error_log_out
Expand Down Expand Up @@ -195,8 +193,6 @@ opentracing
--- grep_error_log eval
qr/opentelemetry export span/
--- grep_error_log_out
--- no_error_log
[error]
Expand Down Expand Up @@ -264,8 +260,6 @@ opentracing
--- grep_error_log eval
qr/opentelemetry export span/
--- grep_error_log_out
--- no_error_log
[error]
Expand Down Expand Up @@ -337,8 +331,6 @@ GET /opentracing
--- response_body
opentracing
--- wait: 1
--- no_error_log
[error]
--- grep_error_log eval
qr/opentelemetry export span/
--- grep_error_log_out
Expand Down Expand Up @@ -410,8 +402,6 @@ opentracing
--- grep_error_log eval
qr/opentelemetry export span/
--- grep_error_log_out
--- no_error_log
[error]
Expand Down Expand Up @@ -487,8 +477,6 @@ GET /opentracing
--- response_body
opentracing
--- wait: 1
--- no_error_log
[error]
--- grep_error_log eval
qr/opentelemetry export span/
--- grep_error_log_out
Expand Down Expand Up @@ -570,8 +558,6 @@ opentracing
--- grep_error_log eval
qr/opentelemetry export span/
--- grep_error_log_out
--- no_error_log
[error]
Expand All @@ -583,8 +569,6 @@ traceparent: 00-00000000000000000000000000000001-0000000000000001-01
--- response_body
opentracing
--- wait: 1
--- no_error_log
[error]
--- grep_error_log eval
qr/opentelemetry export span/
--- grep_error_log_out
Expand Down Expand Up @@ -670,8 +654,6 @@ GET /opentracing
--- response_body
opentracing
--- wait: 1
--- no_error_log
[error]
--- grep_error_log eval
qr/opentelemetry export span/
--- grep_error_log_out
Expand All @@ -689,5 +671,3 @@ opentracing
--- grep_error_log eval
qr/opentelemetry export span/
--- grep_error_log_out
--- no_error_log
[error]

0 comments on commit 9069345

Please sign in to comment.