From a37fd4df2beb543c135d6044504cc8b9db068a67 Mon Sep 17 00:00:00 2001 From: roketyyang Date: Sun, 16 Jan 2022 19:00:50 +0800 Subject: [PATCH 01/11] feature: add opentelemetry plugin. (#3891) --- apisix/plugins/opentelemetry.lua | 275 +++++++++ docs/en/latest/plugins/opentelemetry.md | 154 +++++ docs/zh/latest/plugins/opentelemetry.md | 152 +++++ rockspec/apisix-master-0.rockspec | 1 + t/plugin/opentelemetry.t | 729 ++++++++++++++++++++++++ 5 files changed, 1311 insertions(+) create mode 100644 apisix/plugins/opentelemetry.lua create mode 100644 docs/en/latest/plugins/opentelemetry.md create mode 100644 docs/zh/latest/plugins/opentelemetry.md create mode 100644 t/plugin/opentelemetry.t diff --git a/apisix/plugins/opentelemetry.lua b/apisix/plugins/opentelemetry.lua new file mode 100644 index 000000000000..c2f571e61cca --- /dev/null +++ b/apisix/plugins/opentelemetry.lua @@ -0,0 +1,275 @@ +local plugin_name = "opentelemetry" +local core = require("apisix.core") +local plugin = require("apisix.plugin") +local process = require("ngx.process") + +local always_off_sampler_new = require("opentelemetry.trace.sampling.always_off_sampler").new +local always_on_sampler_new = require("opentelemetry.trace.sampling.always_on_sampler").new +local parent_base_sampler_new = require("opentelemetry.trace.sampling.parent_base_sampler").new +local trace_id_ratio_sampler_new = require("opentelemetry.trace.sampling.trace_id_ratio_sampler").new + +local exporter_client_new = require("opentelemetry.trace.exporter.http_client").new +local otlp_exporter_new = require("opentelemetry.trace.exporter.otlp").new +local batch_span_processor_new = require("opentelemetry.trace.batch_span_processor").new +local id_generator = require("opentelemetry.trace.id_generator") +local tracer_provider_new = require("opentelemetry.trace.tracer_provider").new + +local span_kind = require("opentelemetry.trace.span_kind") +local span_status = require("opentelemetry.trace.span_status") +local resource_new = require("opentelemetry.resource").new +local attr = require("opentelemetry.attribute") + +local context_storage = require("opentelemetry.context_storage") +local context = require("opentelemetry.context").new(context_storage) +local carrier_new = require("opentelemetry.trace.propagation.carrier").new +local trace_context = require("opentelemetry.trace.propagation.trace_context") + +local ngx_var = ngx.var +local ngx_req = ngx.req + +local hostname + +local attr_schema = { + type = "object", + properties = { + x_request_id_as_trace_id = {type = "boolean", description = "use x-request-id as new trace id", default = false}, + resource = { + type = "object", + description = "additional resource", + additional_properties = {{type = "boolean"}, {type = "number"}, {type = "string"}}, + }, + collector = { + type = "object", + description = "otel collector", + properties = { + address = {type = "string", description = "host:port", default = "127.0.0.1:4317"}, + request_timeout = {type = "integer", description = "second uint", default = 3}, + request_headers = {type = "object", description = "http headers", + additional_properties = {one_of = {{type = "boolean"}, {type = "number"}, {type = "string"}}}} + }, + default = {address = "127.0.0.1:4317", request_timeout = 3} + }, + batch_span_processor = { + type = "object", + description = "batch span processor", + properties = { + drop_on_queue_full = {type = "boolean", description = "if true, drop span when queue is full, otherwise force process batches"}, + max_queue_size = {type = "integer", description = "maximum queue size to buffer spans for delayed processing"}, + batch_timeout = {type = "number", description = "maximum duration for constructing a batch"}, + inactive_timeout = {type = "number", description = "maximum duration for processing batches"}, + max_export_batch_size = {type = "integer", description = "maximum number of spans to process in a single batch"} + }, + default = {}, + }, + }, +} + +local schema = { + type = "object", + properties = { + sampler = { + type = "object", + properties = { + name = { + type = "string", + enum = {"always_on", "always_off", "trace_id_ratio", "parent_base"}, + title = "sampling strategy", + default = "always_off" + }, + options = { + type = "object", + properties = { + fraction = {type = "number", title = "trace_id_ratio fraction", default = 0}, + root = { + type = "object", + title = "parent_base root sampler", + properties = { + name = { + type = "string", + enum = {"always_on", "always_off", "trace_id_ratio"}, + title = "sampling strategy", + default = "always_off" + }, + options = { + type = "object", + properties = { + fraction = {type = "number", title = "trace_id_ratio fraction parameter", default = 0}, + }, + default = {fraction = 0} + } + }, + default = {name = "always_off", options = {fraction = 0}} + }, + }, + default = {fraction = 0, root = {name = "always_off"}} + } + }, + default = {name = "always_off", options = {fraction = 0, root = {name = "always_off"}}} + }, + tags = { + type = "array", + items = { + type = "object", + properties = { + position = { + type = "string", + enum = {"http", "arg", "cookie"} + }, + name = { + type = "string", minLength = 1 + } + } + } + } + } +} + +local _M = { + version = 0.1, + priority = -1100, -- last running plugin, but before serverless post func + name = plugin_name, + schema = schema, + attr_schema = attr_schema, +} + +function _M.check_schema(conf) + return core.schema.check(schema, conf) +end + +local sampler_factory +local plugin_info + +function _M.init() + if process.type() ~= "worker" then + return + end + + sampler_factory = { + always_off = always_off_sampler_new, + always_on = always_on_sampler_new, + parent_base = parent_base_sampler_new, + trace_id_ratio = trace_id_ratio_sampler_new, + } + hostname = core.utils.gethostname() + + plugin_info = plugin.plugin_attr(plugin_name) or {} + local ok, err = core.schema.check(attr_schema, plugin_info) + if not ok then + core.log.error("failed to check the plugin_attr[", plugin_name, "]", + ": ", err) + return + end + + if plugin_info.x_request_id_as_trace_id then + id_generator.new_ids = function() + return ngx_req.get_headers()["x-request-id"] or ngx_var.request_id, id_generator.new_span_id() + end + end +end + +local tracers = {} + +local function fetch_tracer(conf, ctx) + local t = tracers[ctx.route_id] + if t and t.v == ctx.conf_version then + return t.tracer + end + + -- create exporter + local exporter = otlp_exporter_new(exporter_client_new(plugin_info.collector.address, + plugin_info.collector.request_timeout, + plugin_info.collector.request_headers)) + -- create span processor + local batch_span_processor = batch_span_processor_new(exporter, plugin_info.batch_span_processor) + -- create sampler + local sampler + local sampler_name = conf.sampler.name + local sampler_options = conf.sampler.options + if sampler_name == "parent_base" then + local root_sampler + if sampler_options.root then + root_sampler = sampler_factory[sampler_options.root.name](sampler_options.root.options.fraction) + else + root_sampler = always_off_sampler_new() + end + sampler = sampler_factory[sampler_name](root_sampler) + else + sampler = sampler_factory[sampler_name](sampler_options.fraction) + end + local resource_attrs = {attr.string("hostname", hostname)} + if plugin_info.resource then + if not plugin_info.resource["service.name"] then + table.insert(resource_attrs, attr.string("service.name", "APISIX")) + end + for k, v in pairs(plugin_info.resource) do + if type(v) == "string" then + table.insert(resource_attrs, attr.string(k, v)) + end + if type(v) == "number" then + table.insert(resource_attrs, attr.double(k, v)) + end + if type(v) == "boolean" then + table.insert(resource_attrs, attr.bool(k, v)) + end + end + end + -- create tracer provider + local tp = tracer_provider_new(batch_span_processor, { + resource = resource_new(unpack(resource_attrs)), + sampler = sampler, + }) + -- create tracer + local tracer = tp:tracer("opentelemetry-lua") + tracers[ctx.route_id] = {tracer = tracer, v = ctx.conf_version} + + return tracer +end + +function _M.access(conf, ctx) + -- extract trace context from the headers of downstream HTTP request + local upstream_context = trace_context.extract(context, carrier_new()) + local attributes = {attr.string("service", ctx.service_name), attr.string("route", ctx.route_name)} + if conf.tags then + for _, tag in ipairs(conf.tags) do + local key = tag.position .. "_" .. tag.name + local val = ctx.var[key] + if val then + core.table.insert(attributes, attr.string(key, val)) + end + end + end + + local context, span = fetch_tracer(conf, ctx):start(upstream_context, ctx.var.request_uri, { + kind = span_kind.client, + attributes = attributes, + }) + context:attach() + + -- inject trace context into the headers of upstream HTTP request + trace_context.inject(context, carrier_new()) +end + +function _M.body_filter(conf, ctx) + if ngx.arg[2] then + local upstream_status = core.response.get_upstream_status(ctx) + -- get span from current context + local span = context:current():span() + if upstream_status and upstream_status >= 500 then + span:set_status(span_status.error, "upstream response status: " .. tostring(upstream_status)) + end + + span:finish() + end +end + +function _M.destroy() + if process.type() ~= "worker" then + return + end + + for _, t in pairs(tracers) do + t.tracer.provider:shutdown() + end +end + +return _M diff --git a/docs/en/latest/plugins/opentelemetry.md b/docs/en/latest/plugins/opentelemetry.md new file mode 100644 index 000000000000..d92970766c10 --- /dev/null +++ b/docs/en/latest/plugins/opentelemetry.md @@ -0,0 +1,154 @@ +--- +title: opentelemetry +--- + + + +## Summary + +- [**Name**](#name) +- [**Attributes**](#attributes) +- [**How To Enable**](#how-to-enable) +- [**How to set collecting**](#how-to-set-collecting) +- [**Disable Plugin**](#disable-plugin) + +## Name + +opentelemetry report Tracing data according to [opentelemetry specification](https://github.com/open-telemetry/opentelemetry-specification). + +Just support reporting in HTTP with Content-Type=application/x-protobuf, the specification: [OTLP/HTTP Request](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/protocol/otlp.md#otlphttp-request)。 + +## Attributes + +| Name | Type | Requirement | Default | Valid | Description | +| ------------ | ------ | ------ | -------- | ------------ | ----------------------------------------------------- | +| sampler | object | optional | | | sampling config +| sampler.name | string | optional | always_off | ["always_on", "always_off", "trace_id_ratio", "parent_base"] | sampling strategy,always_on:sampling all;always_off:sampling nothing;trace_id_ratio:base trace id percentage;parent_base:use parent decision, otherwise determined by root +| sampler.options | object | optional | | {fraction = 0, root = {name = "always_off"}} | sampling strategy parameters +| sampler.options.fraction | number | optional | 0 | [0, 1] | trace_id_ratio fraction +| sampler.options.root | object | optional | {name = "always_off", options = {fraction = 0}} | | parent_base root sampler +| sampler.options.root.name | string | optional | always_off | ["always_on", "always_off", "trace_id_ratio"] | sampling strategy +| sampler.options.root.options | object | optional | {fraction = 0} | | sampling strategy parameters +| sampler.options.root.options.fraction | number | optional | 0 | [0, 1] | trace_id_ratio fraction +| tags | array[object] | optional | | | append to trace span attributes +| tags.position | string | required | | ["http", "arg", "cookie"] | where variable in +| tags.name | string | required | | | variable name + +## How To Enable + +First of all, enable the opentelemetry plugin in the `config.yaml`: + +``` +# Add this in config.yaml +plugins: + - ... # plugin you need + - opentelemetry +``` + +Then reload APISIX. + +Here's an example, enable the opentelemetry plugin on the specified route: + +```shell +curl http://127.0.0.1:9080/apisix/admin/routes/1 -H 'X-API-KEY: edd1c9f034335f136f87ad84b625c8f1' -X PUT -d ' +{ + "methods": ["GET"], + "uris": [ + "/uid/*" + ], + "plugins": { + "opentelemetry": { + sampler": { + "name": "always_on", + } + } + }, + "upstream": { + "type": "roundrobin", + "nodes": { + "10.110.149.175:8089": 1 + } + } +}' +``` + +## How to set collecting + +We can set the collecting by specifying the configuration in `conf/config.yaml`. + +| Name | Type | Default | Description | +| ------------ | ------ | -------- | ----------------------------------------------------- | +| x_request_id_as_trace_id | boolean | false | use current request id as new TraceID, you should make sure the request id is match regex pattern: `[0-9a-f]{32}`| +| resource | object | | additional [resource](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/resource/sdk.md) append to trace | +| collector | object | {address = "127.0.0.1:4317", request_timeout = 3} | otlp collector | +| collector.address | string | 127.0.0.1:4317 | collector address | +| collector.request_timeout | integer | 3 | report request timeout | +| collector.request_headers | object | | report request http headers | +| batch_span_processor | object | | trace span processor | +| batch_span_processor.drop_on_queue_full | boolean | true | drop span when queue is full, otherwise force process batches | +| batch_span_processor.max_queue_size | integer | 2048 | maximum queue size to buffer spans for delayed processing | +| batch_span_processor.batch_timeout | number | 5 | maximum duration(second) for constructing a batch | +| batch_span_processor.max_export_batch_size | integer | 256 | maximum number of spans to process in a single batch | +| batch_span_processor.inactive_timeout | number | 2 | timer interval for processing batches | + +Here is an example: + +```yaml +plugin_attr: + opentelemetry: + resource: + service.name: APISIX + tenant.id: business_id + collector: + address: 192.168.8.211:4317 + request_timeout: 3 + request_headers: + foo: bar + batch_span_processor: + drop_on_queue_full: false + max_queue_size: 6 + batch_timeout: 2 + inactive_timeout: 1 + max_export_batch_size: 2 +``` + +## Disable Plugin + +When you want to disable the opentelemetry plugin on a route/service, it is very simple, +you can delete the corresponding JSON configuration in the plugin configuration, +no need to restart the service, it will take effect immediately: + +```shell +$ curl http://127.0.0.1:9080/apisix/admin/routes/1 -H 'X-API-KEY: edd1c9f034335f136f87ad84b625c8f1' -X PUT -d ' +{ + "methods": ["GET"], + "uris": [ + "/uid/*" + ], + "plugins": { + }, + "upstream": { + "type": "roundrobin", + "nodes": { + "10.110.149.175:8089": 1 + } + } +}' +``` diff --git a/docs/zh/latest/plugins/opentelemetry.md b/docs/zh/latest/plugins/opentelemetry.md new file mode 100644 index 000000000000..29feabb988b2 --- /dev/null +++ b/docs/zh/latest/plugins/opentelemetry.md @@ -0,0 +1,152 @@ +--- +title: opentelemetry +--- + + + +## 目录 + +- [名字](#名字) +- [属性](#属性) +- [如何启用](#如何启用) +- [如何设置数据上报](#如何设置数据上报) +- [禁用插件](#禁用插件) + +## 名字 + +opentelemetry 提供符合 [opentelemetry specification](https://github.com/open-telemetry/opentelemetry-specification) 协议规范的 Tracing 数据上报。 + +只支持 HTTP 协议 application/x-protobuf 类型的数据上报,相关协议标准:[OTLP/HTTP Request](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/protocol/otlp.md#otlphttp-request)。 + +## 属性 + +| 名称 | 类型 | 必选项 | 默认值 | 有效值 | 描述 | +| ------------ | ------ | ------ | -------- | ------------ | ----------------------------------------------------- | +| sampler | object | 可选 | | | 采样配置 +| sampler.name | string | 可选 | always_off | ["always_on", "always_off", "trace_id_ratio", "parent_base"] | 采样算法,always_on:全采样;always_off:不采样;trace_id_ratio:基于 trace id 的百分比采样;parent_base:如果存在 tracing 上游,则使用上游的采样决定,否则使用配置的采样算法决策 +| sampler.options | object | 可选 | | {fraction = 0, root = {name = "always_off"}} | 采样算法参数 +| sampler.options.fraction | number | 可选 | 0 | [0, 1] | trace_id_ratio 采样算法的百分比 +| sampler.options.root | object | 可选 | {name = "always_off", options = {fraction = 0}} | | parent_base 采样算法在没有上游 tracing 时,会使用 root 采样算法做决策 +| sampler.options.root.name | string | 可选 | always_off | ["always_on", "always_off", "trace_id_ratio"] | 采样算法 +| sampler.options.root.options | object | 可选 | {fraction = 0} | | 采样算法参数 +| sampler.options.root.options.fraction | number | 可选 | 0 | [0, 1] | trace_id_ratio 采样算法的百分比 +| tags | array[object] | 可选 | | | 追加到 trace span 的属性 +| tags.position | string | 必须 | | ["http", "arg", "cookie"] | 变量的位置 +| tags.name | string | 必须 | | | 变量的名称 + +## 如何启用 + +首先,你需要在 `config.yaml` 里面启用 opentelemetry 插件: + +``` +# 加到 config.yaml +plugins: + - ... # plugin you need + - opentelemetry +``` + +然后重载 APISIX。 + +下面是一个示例,在指定的 route 上开启了 opentelemetry 插件: + +```shell +curl http://127.0.0.1:9080/apisix/admin/routes/1 -H 'X-API-KEY: edd1c9f034335f136f87ad84b625c8f1' -X PUT -d ' +{ + "methods": ["GET"], + "uris": [ + "/uid/*" + ], + "plugins": { + "opentelemetry": { + sampler": { + "name": "always_on", + } + } + }, + "upstream": { + "type": "roundrobin", + "nodes": { + "10.110.149.175:8089": 1 + } + } +}' +``` + +## 如何设置数据上报 + +我们可以通过指定 `conf/config.yaml` 中的配置来设置数据上报: + +| 名称 | 类型 | 默认值 | 描述 | +| ------------ | ------ | -------- | ----------------------------------------------------- | +| x_request_id_as_trace_id | boolean | false | 使用当前请求 ID 作为新的 TraceID,必须确保当前请求 ID 是符合 TraceID 规范的:`[0-9a-f]{32}` | +| resource | object | | 追加到 trace 的额外 [resource](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/resource/sdk.md) | +| collector | object | {address = "127.0.0.1:4317", request_timeout = 3} | 数据采集服务 | +| collector.address | string | 127.0.0.1:4317 | 数据采集服务地址 | +| collector.request_timeout | integer | 3 | 数据采集服务上报请求超时时长 | +| collector.request_headers | object | | 数据采集服务上报请求附加的 HTTP 请求头 | +| batch_span_processor | object | | trace span 处理器参数配置 | +| batch_span_processor.drop_on_queue_full | boolean | true | 当处理器缓存队列慢试,丢弃新到来的 span | +| batch_span_processor.max_queue_size | integer | 2048 | 处理器缓存队列容量最大值 | +| batch_span_processor.batch_timeout | number | 5 | 单位秒,构造一批 span 超时时长 | +| batch_span_processor.max_export_batch_size | integer | 256 | 一批 span 的数量,每次上报的 span 数量 | +| batch_span_processor.inactive_timeout | number | 2 | 每隔多长时间检查是否有一批 span 可以上报 | + +配置示例: + +```yaml +plugin_attr: + opentelemetry: + resource: + service.name: APISIX + tenant.id: business_id + collector: + address: 192.168.8.211:4317 + request_timeout: 3 + request_headers: + foo: bar + batch_span_processor: + drop_on_queue_full: false + max_queue_size: 6 + batch_timeout: 2 + inactive_timeout: 1 + max_export_batch_size: 2 +``` + +## 禁用插件 + +当你想禁用一条路由/服务上的 opentelemetry 插件的时候,很简单,在插件的配置中把对应的 JSON 配置删除即可,无须重启服务,即刻生效: + +```shell +$ curl http://127.0.0.1:9080/apisix/admin/routes/1 -H 'X-API-KEY: edd1c9f034335f136f87ad84b625c8f1' -X PUT -d ' +{ + "methods": ["GET"], + "uris": [ + "/uid/*" + ], + "plugins": { + }, + "upstream": { + "type": "roundrobin", + "nodes": { + "10.110.149.175:8089": 1 + } + } +}' +``` diff --git a/rockspec/apisix-master-0.rockspec b/rockspec/apisix-master-0.rockspec index 6e391d0f4863..3865a63ddc43 100644 --- a/rockspec/apisix-master-0.rockspec +++ b/rockspec/apisix-master-0.rockspec @@ -73,6 +73,7 @@ dependencies = { "inspect == 3.1.1", "lualdap = 1.2.6-1", "lua-resty-rocketmq = 0.3.0-0", + "opentelemetry-lua = 0.1-1", } build = { diff --git a/t/plugin/opentelemetry.t b/t/plugin/opentelemetry.t new file mode 100644 index 000000000000..27c88c149fdf --- /dev/null +++ b/t/plugin/opentelemetry.t @@ -0,0 +1,729 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +use t::APISIX 'no_plan'; + +add_block_preprocessor(sub { + my ($block) = @_; + + my $extra_yaml_config = <<_EOC_; +plugins: + - opentelemetry +plugin_attr: + opentelemetry: + batch_span_processor: + max_export_batch_size: 1 + inactive_timeout: 0.5 +_EOC_ + + $block->set_value("extra_yaml_config", $extra_yaml_config); + + my $extra_init_by_lua = <<_EOC_; + -- mock exporter http client + local client = require("opentelemetry.trace.exporter.http_client") + client.do_request = function() + ngx.log(ngx.INFO, "opentelemetry export span") + end +_EOC_ + + $block->set_value("extra_init_by_lua", $extra_init_by_lua); + + $block; +}); + +repeat_each(1); +no_long_string(); +no_root_location(); +log_level("debug"); + +run_tests; + +__DATA__ + +=== TEST 1: add plugin +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "plugins": { + "opentelemetry": { + "sampler": { + "name": "always_on" + } + } + }, + "upstream": { + "nodes": { + "127.0.0.1:1980": 1 + }, + "type": "roundrobin" + }, + "uri": "/opentracing" + }]], + [[{ + "node": { + "value": { + "plugins": { + "opentelemetry": { + "sampler": { + "name": "always_on" + } + } + }, + "upstream": { + "nodes": { + "127.0.0.1:1980": 1 + }, + "type": "roundrobin" + }, + "uri": "/opentracing" + }, + "key": "/apisix/routes/1" + }, + "action": "set" + }]] + ) + + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- request +GET /t +--- response_body +passed +--- no_error_log +[error] + + + +=== TEST 2: trigger opentelemetry +--- request +GET /opentracing +--- response_body +opentracing +--- wait: 1 +--- no_error_log +[error] +--- grep_error_log eval +qr/opentelemetry export span/ +--- grep_error_log_out +opentelemetry export span + + + +=== TEST 3: use default always_off sampler +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "plugins": { + "opentelemetry": { + } + }, + "upstream": { + "nodes": { + "127.0.0.1:1980": 1 + }, + "type": "roundrobin" + }, + "uri": "/opentracing" + }]], + [[{ + "node": { + "value": { + "plugins": { + "opentelemetry": { + } + }, + "upstream": { + "nodes": { + "127.0.0.1:1980": 1 + }, + "type": "roundrobin" + }, + "uri": "/opentracing" + }, + "key": "/apisix/routes/1" + }, + "action": "set" + }]] + ) + + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- request +GET /t +--- response_body +passed +--- no_error_log +[error] + + + +=== TEST 4: not trigger opentelemetry +--- request +GET /opentracing +--- response_body +opentracing +--- grep_error_log eval +qr/opentelemetry export span/ +--- grep_error_log_out +--- no_error_log +[error] + + + +=== TEST 5: use trace_id_ratio sampler, default fraction = 0 +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "plugins": { + "opentelemetry": { + "sampler": { + "name": "trace_id_ratio" + } + } + }, + "upstream": { + "nodes": { + "127.0.0.1:1980": 1 + }, + "type": "roundrobin" + }, + "uri": "/opentracing" + }]], + [[{ + "node": { + "value": { + "plugins": { + "opentelemetry": { + "sampler": { + "name": "trace_id_ratio" + } + } + }, + "upstream": { + "nodes": { + "127.0.0.1:1980": 1 + }, + "type": "roundrobin" + }, + "uri": "/opentracing" + }, + "key": "/apisix/routes/1" + }, + "action": "set" + }]] + ) + + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- request +GET /t +--- response_body +passed +--- no_error_log +[error] + + + +=== TEST 6: not trigger opentelemetry +--- request +GET /opentracing +--- response_body +opentracing +--- grep_error_log eval +qr/opentelemetry export span/ +--- grep_error_log_out +--- no_error_log +[error] + + + +=== TEST 7: use trace_id_ratio sampler, fraction = 1.0 +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "plugins": { + "opentelemetry": { + "sampler": { + "name": "trace_id_ratio", + "options": { + "fraction": 1.0 + } + } + } + }, + "upstream": { + "nodes": { + "127.0.0.1:1980": 1 + }, + "type": "roundrobin" + }, + "uri": "/opentracing" + }]], + [[{ + "node": { + "value": { + "plugins": { + "opentelemetry": { + "sampler": { + "name": "trace_id_ratio", + "options": { + "fraction": 1.0 + } + } + } + }, + "upstream": { + "nodes": { + "127.0.0.1:1980": 1 + }, + "type": "roundrobin" + }, + "uri": "/opentracing" + }, + "key": "/apisix/routes/1" + }, + "action": "set" + }]] + ) + + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- request +GET /t +--- response_body +passed +--- no_error_log +[error] + + + +=== TEST 8: trigger opentelemetry +--- request +GET /opentracing +--- response_body +opentracing +--- wait: 1 +--- no_error_log +[error] +--- grep_error_log eval +qr/opentelemetry export span/ +--- grep_error_log_out +opentelemetry export span + + + +=== TEST 9: use parent_base sampler, default root sampler = always_off +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "plugins": { + "opentelemetry": { + "sampler": { + "name": "parent_base" + } + } + }, + "upstream": { + "nodes": { + "127.0.0.1:1980": 1 + }, + "type": "roundrobin" + }, + "uri": "/opentracing" + }]], + [[{ + "node": { + "value": { + "plugins": { + "opentelemetry": { + "sampler": { + "name": "parent_base" + } + } + }, + "upstream": { + "nodes": { + "127.0.0.1:1980": 1 + }, + "type": "roundrobin" + }, + "uri": "/opentracing" + }, + "key": "/apisix/routes/1" + }, + "action": "set" + }]] + ) + + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- request +GET /t +--- response_body +passed +--- no_error_log +[error] + + + +=== TEST 10: not trigger opentelemetry +--- request +GET /opentracing +--- response_body +opentracing +--- grep_error_log eval +qr/opentelemetry export span/ +--- grep_error_log_out +--- no_error_log +[error] + + + +=== TEST 11: use parent_base sampler, root sampler = always_on +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "plugins": { + "opentelemetry": { + "sampler": { + "name": "parent_base", + "options": { + "root": { + "name": "always_on" + } + } + } + } + }, + "upstream": { + "nodes": { + "127.0.0.1:1980": 1 + }, + "type": "roundrobin" + }, + "uri": "/opentracing" + }]], + [[{ + "node": { + "value": { + "plugins": { + "opentelemetry": { + "sampler": { + "name": "parent_base", + "options": { + "root": { + "name": "always_on" + } + } + } + } + }, + "upstream": { + "nodes": { + "127.0.0.1:1980": 1 + }, + "type": "roundrobin" + }, + "uri": "/opentracing" + }, + "key": "/apisix/routes/1" + }, + "action": "set" + }]] + ) + + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- request +GET /t +--- response_body +passed +--- no_error_log +[error] + + + +=== TEST 12: trigger opentelemetry +--- request +GET /opentracing +--- response_body +opentracing +--- wait: 1 +--- no_error_log +[error] +--- grep_error_log eval +qr/opentelemetry export span/ +--- grep_error_log_out +opentelemetry export span + + + +=== TEST 13: use parent_base sampler, root sampler = trace_id_ratio with default fraction = 0 +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "plugins": { + "opentelemetry": { + "sampler": { + "name": "parent_base", + "options": { + "root": { + "name": "trace_id_ratio" + } + } + } + } + }, + "upstream": { + "nodes": { + "127.0.0.1:1980": 1 + }, + "type": "roundrobin" + }, + "uri": "/opentracing" + }]], + [[{ + "node": { + "value": { + "plugins": { + "opentelemetry": { + "sampler": { + "name": "parent_base", + "options": { + "root": { + "name": "trace_id_ratio" + } + } + } + } + }, + "upstream": { + "nodes": { + "127.0.0.1:1980": 1 + }, + "type": "roundrobin" + }, + "uri": "/opentracing" + }, + "key": "/apisix/routes/1" + }, + "action": "set" + }]] + ) + + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- request +GET /t +--- response_body +passed +--- no_error_log +[error] + + + +=== TEST 14: not trigger opentelemetry +--- request +GET /opentracing +--- response_body +opentracing +--- grep_error_log eval +qr/opentelemetry export span/ +--- grep_error_log_out +--- no_error_log +[error] + + + +=== TEST 15: trigger opentelemetry, trace_flag = 1 +--- request +GET /opentracing +--- more_headers +traceparent: 00-00000000000000000000000000000001-0000000000000001-01 +--- response_body +opentracing +--- wait: 1 +--- no_error_log +[error] +--- grep_error_log eval +qr/opentelemetry export span/ +--- grep_error_log_out +opentelemetry export span + + + +=== TEST 16: use parent_base sampler, root sampler = trace_id_ratio with fraction = 1 +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "plugins": { + "opentelemetry": { + "sampler": { + "name": "parent_base", + "options": { + "root": { + "name": "trace_id_ratio", + "options": { + "fraction": 1.0 + } + } + } + } + } + }, + "upstream": { + "nodes": { + "127.0.0.1:1980": 1 + }, + "type": "roundrobin" + }, + "uri": "/opentracing" + }]], + [[{ + "node": { + "value": { + "plugins": { + "opentelemetry": { + "sampler": { + "name": "parent_base", + "options": { + "root": { + "name": "trace_id_ratio", + "options": { + "fraction": 1.0 + } + } + } + } + } + }, + "upstream": { + "nodes": { + "127.0.0.1:1980": 1 + }, + "type": "roundrobin" + }, + "uri": "/opentracing" + }, + "key": "/apisix/routes/1" + }, + "action": "set" + }]] + ) + + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- request +GET /t +--- response_body +passed +--- no_error_log +[error] + + + +=== TEST 17: trigger opentelemetry +--- request +GET /opentracing +--- response_body +opentracing +--- wait: 1 +--- no_error_log +[error] +--- grep_error_log eval +qr/opentelemetry export span/ +--- grep_error_log_out +opentelemetry export span + + + +=== TEST 18: not trigger opentelemetry, trace_flag = 0 +--- request +GET /opentracing +--- more_headers +traceparent: 00-00000000000000000000000000000001-0000000000000001-00 +--- response_body +opentracing +--- grep_error_log eval +qr/opentelemetry export span/ +--- grep_error_log_out +--- no_error_log +[error] From eaa9685e13eed94b4e955218a4f948a98fe2315f Mon Sep 17 00:00:00 2001 From: roketyyang Date: Sun, 16 Jan 2022 22:35:43 +0800 Subject: [PATCH 02/11] feature: add opentelemetry plugin. (#3891) --- apisix/plugins/opentelemetry.lua | 99 +++++++++++++++++++++++++------- docs/en/latest/config.json | 1 + 2 files changed, 78 insertions(+), 22 deletions(-) diff --git a/apisix/plugins/opentelemetry.lua b/apisix/plugins/opentelemetry.lua index c2f571e61cca..688ab7abe01c 100644 --- a/apisix/plugins/opentelemetry.lua +++ b/apisix/plugins/opentelemetry.lua @@ -1,3 +1,19 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- local plugin_name = "opentelemetry" local core = require("apisix.core") local plugin = require("apisix.plugin") @@ -6,7 +22,8 @@ local process = require("ngx.process") local always_off_sampler_new = require("opentelemetry.trace.sampling.always_off_sampler").new local always_on_sampler_new = require("opentelemetry.trace.sampling.always_on_sampler").new local parent_base_sampler_new = require("opentelemetry.trace.sampling.parent_base_sampler").new -local trace_id_ratio_sampler_new = require("opentelemetry.trace.sampling.trace_id_ratio_sampler").new +local trace_id_ratio_sampler_new = + require("opentelemetry.trace.sampling.trace_id_ratio_sampler").new local exporter_client_new = require("opentelemetry.trace.exporter.http_client").new local otlp_exporter_new = require("opentelemetry.trace.exporter.otlp").new @@ -32,7 +49,11 @@ local hostname local attr_schema = { type = "object", properties = { - x_request_id_as_trace_id = {type = "boolean", description = "use x-request-id as new trace id", default = false}, + x_request_id_as_trace_id = { + type = "boolean", + description = "use x-request-id as new trace id", + default = false, + }, resource = { type = "object", description = "additional resource", @@ -44,8 +65,13 @@ local attr_schema = { properties = { address = {type = "string", description = "host:port", default = "127.0.0.1:4317"}, request_timeout = {type = "integer", description = "second uint", default = 3}, - request_headers = {type = "object", description = "http headers", - additional_properties = {one_of = {{type = "boolean"}, {type = "number"}, {type = "string"}}}} + request_headers = { + type = "object", + description = "http headers", + additional_properties = { + one_of = {{type = "boolean"},{type = "number"}, {type = "string"}}, + }, + } }, default = {address = "127.0.0.1:4317", request_timeout = 3} }, @@ -53,11 +79,27 @@ local attr_schema = { type = "object", description = "batch span processor", properties = { - drop_on_queue_full = {type = "boolean", description = "if true, drop span when queue is full, otherwise force process batches"}, - max_queue_size = {type = "integer", description = "maximum queue size to buffer spans for delayed processing"}, - batch_timeout = {type = "number", description = "maximum duration for constructing a batch"}, - inactive_timeout = {type = "number", description = "maximum duration for processing batches"}, - max_export_batch_size = {type = "integer", description = "maximum number of spans to process in a single batch"} + drop_on_queue_full = { + type = "boolean", + description = "if true, drop span when queue is full," + .. " otherwise force process batches", + }, + max_queue_size = { + type = "integer", + description = "maximum queue size to buffer spans for delayed processing", + }, + batch_timeout = { + type = "number", + description = "maximum duration for constructing a batch", + }, + inactive_timeout = { + type = "number", + description = "maximum duration for processing batches", + }, + max_export_batch_size = { + type = "integer", + description = "maximum number of spans to process in a single batch", + } }, default = {}, }, @@ -79,7 +121,9 @@ local schema = { options = { type = "object", properties = { - fraction = {type = "number", title = "trace_id_ratio fraction", default = 0}, + fraction = { + type = "number", title = "trace_id_ratio fraction", default = 0 + }, root = { type = "object", title = "parent_base root sampler", @@ -93,7 +137,11 @@ local schema = { options = { type = "object", properties = { - fraction = {type = "number", title = "trace_id_ratio fraction parameter", default = 0}, + fraction = { + type = "number", + title = "trace_id_ratio fraction parameter", + default = 0, + }, }, default = {fraction = 0} } @@ -126,7 +174,7 @@ local schema = { local _M = { version = 0.1, - priority = -1100, -- last running plugin, but before serverless post func + priority = -1200, -- last running plugin, but before serverless post func name = plugin_name, schema = schema, attr_schema = attr_schema, @@ -162,7 +210,8 @@ function _M.init() if plugin_info.x_request_id_as_trace_id then id_generator.new_ids = function() - return ngx_req.get_headers()["x-request-id"] or ngx_var.request_id, id_generator.new_span_id() + local trace_id = ngx_req.get_headers()["x-request-id"] or ngx_var.request_id + return trace_id, id_generator.new_span_id() end end end @@ -180,7 +229,8 @@ local function fetch_tracer(conf, ctx) plugin_info.collector.request_timeout, plugin_info.collector.request_headers)) -- create span processor - local batch_span_processor = batch_span_processor_new(exporter, plugin_info.batch_span_processor) + local batch_span_processor = batch_span_processor_new(exporter, + plugin_info.batch_span_processor) -- create sampler local sampler local sampler_name = conf.sampler.name @@ -188,7 +238,8 @@ local function fetch_tracer(conf, ctx) if sampler_name == "parent_base" then local root_sampler if sampler_options.root then - root_sampler = sampler_factory[sampler_options.root.name](sampler_options.root.options.fraction) + local name, fraction = sampler_options.root.name, sampler_options.root.options.fraction + root_sampler = sampler_factory[name](fraction) else root_sampler = always_off_sampler_new() end @@ -225,28 +276,31 @@ local function fetch_tracer(conf, ctx) return tracer end -function _M.access(conf, ctx) +function _M.access(conf, api_ctx) -- extract trace context from the headers of downstream HTTP request local upstream_context = trace_context.extract(context, carrier_new()) - local attributes = {attr.string("service", ctx.service_name), attr.string("route", ctx.route_name)} + local attributes = { + attr.string("service", api_ctx.service_name), + attr.string("route", api_ctx.route_name), + } if conf.tags then for _, tag in ipairs(conf.tags) do local key = tag.position .. "_" .. tag.name - local val = ctx.var[key] + local val = api_ctx.var[key] if val then core.table.insert(attributes, attr.string(key, val)) end end end - local context, span = fetch_tracer(conf, ctx):start(upstream_context, ctx.var.request_uri, { + local ctx, _ = fetch_tracer(conf, api_ctx):start(upstream_context, api_ctx.var.request_uri, { kind = span_kind.client, attributes = attributes, }) - context:attach() + ctx:attach() -- inject trace context into the headers of upstream HTTP request - trace_context.inject(context, carrier_new()) + trace_context.inject(ctx, carrier_new()) end function _M.body_filter(conf, ctx) @@ -255,7 +309,8 @@ function _M.body_filter(conf, ctx) -- get span from current context local span = context:current():span() if upstream_status and upstream_status >= 500 then - span:set_status(span_status.error, "upstream response status: " .. tostring(upstream_status)) + span:set_status(span_status.error, + "upstream response status: " .. tostring(upstream_status)) end span:finish() diff --git a/docs/en/latest/config.json b/docs/en/latest/config.json index dcbfe682cb0c..0409a898fbad 100644 --- a/docs/en/latest/config.json +++ b/docs/en/latest/config.json @@ -105,6 +105,7 @@ "plugins/prometheus", "plugins/zipkin", "plugins/skywalking", + "plugins/opentelemetry", "plugins/node-status", "plugins/datadog" ] From fc7e9f916413a9a6a86a97047dd250c87afcc1c8 Mon Sep 17 00:00:00 2001 From: roketyyang Date: Sun, 16 Jan 2022 23:33:04 +0800 Subject: [PATCH 03/11] feature: add opentelemetry plugin. (#3891) --- apisix/plugins/opentelemetry.lua | 8 +++- docs/en/latest/plugins/opentelemetry.md | 6 +-- docs/zh/latest/plugins/opentelemetry.md | 6 +-- t/plugin/opentelemetry.t | 60 +++++-------------------- 4 files changed, 25 insertions(+), 55 deletions(-) diff --git a/apisix/plugins/opentelemetry.lua b/apisix/plugins/opentelemetry.lua index 688ab7abe01c..9d3f7971b81c 100644 --- a/apisix/plugins/opentelemetry.lua +++ b/apisix/plugins/opentelemetry.lua @@ -41,8 +41,14 @@ local context = require("opentelemetry.context").new(context_storage) local carrier_new = require("opentelemetry.trace.propagation.carrier").new local trace_context = require("opentelemetry.trace.propagation.trace_context") +local ngx = ngx local ngx_var = ngx.var local ngx_req = ngx.req +local table = table +local type = type +local pairs = pairs +local ipairs = ipairs +local unpack = unpack local hostname @@ -310,7 +316,7 @@ function _M.body_filter(conf, ctx) local span = context:current():span() if upstream_status and upstream_status >= 500 then span:set_status(span_status.error, - "upstream response status: " .. tostring(upstream_status)) + "upstream response status: " .. upstream_status) end span:finish() diff --git a/docs/en/latest/plugins/opentelemetry.md b/docs/en/latest/plugins/opentelemetry.md index d92970766c10..721c5d4852bd 100644 --- a/docs/en/latest/plugins/opentelemetry.md +++ b/docs/en/latest/plugins/opentelemetry.md @@ -31,9 +31,9 @@ title: opentelemetry ## Name -opentelemetry report Tracing data according to [opentelemetry specification](https://github.com/open-telemetry/opentelemetry-specification). +[OpenTelemetry](https://opentelemetry.io/) report Tracing data according to [opentelemetry specification](https://github.com/open-telemetry/opentelemetry-specification). -Just support reporting in HTTP with Content-Type=application/x-protobuf, the specification: [OTLP/HTTP Request](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/protocol/otlp.md#otlphttp-request)。 +Just support reporting in `HTTP` with `Content-Type=application/x-protobuf`, the specification: [OTLP/HTTP Request](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/protocol/otlp.md#otlphttp-request)。 ## Attributes @@ -55,7 +55,7 @@ Just support reporting in HTTP with Content-Type=application/x-protobuf, the spe First of all, enable the opentelemetry plugin in the `config.yaml`: -``` +```yaml # Add this in config.yaml plugins: - ... # plugin you need diff --git a/docs/zh/latest/plugins/opentelemetry.md b/docs/zh/latest/plugins/opentelemetry.md index 29feabb988b2..93a3753973f2 100644 --- a/docs/zh/latest/plugins/opentelemetry.md +++ b/docs/zh/latest/plugins/opentelemetry.md @@ -31,9 +31,9 @@ title: opentelemetry ## 名字 -opentelemetry 提供符合 [opentelemetry specification](https://github.com/open-telemetry/opentelemetry-specification) 协议规范的 Tracing 数据上报。 +[OpenTelemetry](https://opentelemetry.io/) 提供符合 [opentelemetry specification](https://github.com/open-telemetry/opentelemetry-specification) 协议规范的 Tracing 数据上报。 -只支持 HTTP 协议 application/x-protobuf 类型的数据上报,相关协议标准:[OTLP/HTTP Request](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/protocol/otlp.md#otlphttp-request)。 +只支持 `HTTP` 协议,且请求类型为 `application/x-protobuf` 的数据上报,相关协议标准:[OTLP/HTTP Request](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/protocol/otlp.md#otlphttp-request)。 ## 属性 @@ -55,7 +55,7 @@ opentelemetry 提供符合 [opentelemetry specification](https://github.com/open 首先,你需要在 `config.yaml` 里面启用 opentelemetry 插件: -``` +```yaml # 加到 config.yaml plugins: - ... # plugin you need diff --git a/t/plugin/opentelemetry.t b/t/plugin/opentelemetry.t index 27c88c149fdf..99d078e85a76 100644 --- a/t/plugin/opentelemetry.t +++ b/t/plugin/opentelemetry.t @@ -42,6 +42,18 @@ _EOC_ $block->set_value("extra_init_by_lua", $extra_init_by_lua); + if (!$block->request) { + $block->set_value("request", "GET /t"); + } + + if (!$block->response_body) { + $block->set_value("response_body", "passed\n"); + } + + if (!$block->no_error_log && !$block->error_log) { + $block->set_value("no_error_log", "[error]"); + } + $block; }); @@ -107,12 +119,6 @@ __DATA__ ngx.say(body) } } ---- request -GET /t ---- response_body -passed ---- no_error_log -[error] @@ -178,12 +184,6 @@ opentelemetry export span ngx.say(body) } } ---- request -GET /t ---- response_body -passed ---- no_error_log -[error] @@ -253,12 +253,6 @@ qr/opentelemetry export span/ ngx.say(body) } } ---- request -GET /t ---- response_body -passed ---- no_error_log -[error] @@ -334,12 +328,6 @@ qr/opentelemetry export span/ ngx.say(body) } } ---- request -GET /t ---- response_body -passed ---- no_error_log -[error] @@ -411,12 +399,6 @@ opentelemetry export span ngx.say(body) } } ---- request -GET /t ---- response_body -passed ---- no_error_log -[error] @@ -496,12 +478,6 @@ qr/opentelemetry export span/ ngx.say(body) } } ---- request -GET /t ---- response_body -passed ---- no_error_log -[error] @@ -583,12 +559,6 @@ opentelemetry export span ngx.say(body) } } ---- request -GET /t ---- response_body -passed ---- no_error_log -[error] @@ -691,12 +661,6 @@ opentelemetry export span ngx.say(body) } } ---- request -GET /t ---- response_body -passed ---- no_error_log -[error] From 9f5ad21f1aee90a44eec14f2fc7e5a82258da741 Mon Sep 17 00:00:00 2001 From: roketyyang Date: Mon, 17 Jan 2022 09:59:31 +0800 Subject: [PATCH 04/11] feature: add opentelemetry plugin. (#3891) --- docs/zh/latest/config.json | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/zh/latest/config.json b/docs/zh/latest/config.json index 7ac810ca42d9..3ded6efb9188 100644 --- a/docs/zh/latest/config.json +++ b/docs/zh/latest/config.json @@ -103,6 +103,7 @@ "plugins/prometheus", "plugins/zipkin", "plugins/skywalking", + "plugins/opentelemetry", "plugins/node-status" ] }, From 90693450ae50152369f5692f5b43db0d74b1ca0d Mon Sep 17 00:00:00 2001 From: roketyyang Date: Mon, 17 Jan 2022 20:28:03 +0800 Subject: [PATCH 05/11] feature: add opentelemetry plugin. (#3891) --- apisix/plugins/opentelemetry.lua | 77 +++++++++++-------------- conf/config-default.yaml | 1 + docs/en/latest/plugins/opentelemetry.md | 11 ++-- docs/zh/latest/plugins/opentelemetry.md | 8 +-- t/plugin/opentelemetry.t | 20 ------- 5 files changed, 43 insertions(+), 74 deletions(-) diff --git a/apisix/plugins/opentelemetry.lua b/apisix/plugins/opentelemetry.lua index 9d3f7971b81c..dc2901e4947b 100644 --- a/apisix/plugins/opentelemetry.lua +++ b/apisix/plugins/opentelemetry.lua @@ -50,31 +50,35 @@ local pairs = pairs local ipairs = ipairs local unpack = unpack -local hostname +local lrucache = core.lrucache.new({ + type = 'plugin', count = 128, ttl = 24 * 60 * 60, +}) + local attr_schema = { type = "object", properties = { - x_request_id_as_trace_id = { - type = "boolean", - description = "use x-request-id as new trace id", - default = false, + trace_id_source = { + type = "string", + enum = {"x-request-id", "random"}, + description = "alternate use x-request-id as trace id", + default = "random", }, resource = { type = "object", description = "additional resource", - additional_properties = {{type = "boolean"}, {type = "number"}, {type = "string"}}, + additionalProperties = {{type = "boolean"}, {type = "number"}, {type = "string"}}, }, collector = { type = "object", - description = "otel collector", + description = "opentelemetry collector", properties = { address = {type = "string", description = "host:port", default = "127.0.0.1:4317"}, request_timeout = {type = "integer", description = "second uint", default = 3}, request_headers = { type = "object", description = "http headers", - additional_properties = { + additionalProperties = { one_of = {{type = "boolean"},{type = "number"}, {type = "string"}}, }, } @@ -160,24 +164,17 @@ local schema = { }, default = {name = "always_off", options = {fraction = 0, root = {name = "always_off"}}} }, - tags = { + additional_attributes = { type = "array", items = { - type = "object", - properties = { - position = { - type = "string", - enum = {"http", "arg", "cookie"} - }, - name = { - type = "string", minLength = 1 - } - } + type = "string", + minLength = 1, } } } } + local _M = { version = 0.1, priority = -1200, -- last running plugin, but before serverless post func @@ -186,10 +183,13 @@ local _M = { attr_schema = attr_schema, } + function _M.check_schema(conf) return core.schema.check(schema, conf) end + +local hostname local sampler_factory local plugin_info @@ -214,7 +214,7 @@ function _M.init() return end - if plugin_info.x_request_id_as_trace_id then + if plugin_info.trace_id_source == "x-request-id" then id_generator.new_ids = function() local trace_id = ngx_req.get_headers()["x-request-id"] or ngx_var.request_id return trace_id, id_generator.new_span_id() @@ -222,14 +222,8 @@ function _M.init() end end -local tracers = {} - -local function fetch_tracer(conf, ctx) - local t = tracers[ctx.route_id] - if t and t.v == ctx.conf_version then - return t.tracer - end +local function create_tracer_obj(conf) -- create exporter local exporter = otlp_exporter_new(exporter_client_new(plugin_info.collector.address, plugin_info.collector.request_timeout, @@ -276,22 +270,25 @@ local function fetch_tracer(conf, ctx) sampler = sampler, }) -- create tracer - local tracer = tp:tracer("opentelemetry-lua") - tracers[ctx.route_id] = {tracer = tracer, v = ctx.conf_version} - - return tracer + return tp:tracer("opentelemetry-lua") end + function _M.access(conf, api_ctx) + local tracer, err = core.lrucache.plugin_ctx(lrucache, api_ctx, nil, create_tracer_obj, conf) + if not tracer then + core.log.error("failed to fetch tracer object: ", err) + return + end + -- extract trace context from the headers of downstream HTTP request local upstream_context = trace_context.extract(context, carrier_new()) local attributes = { attr.string("service", api_ctx.service_name), attr.string("route", api_ctx.route_name), } - if conf.tags then - for _, tag in ipairs(conf.tags) do - local key = tag.position .. "_" .. tag.name + if conf.additional_attributes then + for _, key in ipairs(conf.additional_attributes) do local val = api_ctx.var[key] if val then core.table.insert(attributes, attr.string(key, val)) @@ -299,7 +296,7 @@ function _M.access(conf, api_ctx) end end - local ctx, _ = fetch_tracer(conf, api_ctx):start(upstream_context, api_ctx.var.request_uri, { + local ctx, _ = tracer:start(upstream_context, api_ctx.var.request_uri, { kind = span_kind.client, attributes = attributes, }) @@ -309,6 +306,7 @@ function _M.access(conf, api_ctx) trace_context.inject(ctx, carrier_new()) end + function _M.body_filter(conf, ctx) if ngx.arg[2] then local upstream_status = core.response.get_upstream_status(ctx) @@ -323,14 +321,5 @@ function _M.body_filter(conf, ctx) end end -function _M.destroy() - if process.type() ~= "worker" then - return - end - - for _, t in pairs(tracers) do - t.tracer.provider:shutdown() - end -end return _M diff --git a/conf/config-default.yaml b/conf/config-default.yaml index cbf97dd66931..65506083aded 100644 --- a/conf/config-default.yaml +++ b/conf/config-default.yaml @@ -379,6 +379,7 @@ plugins: # plugin list (sorted by priority) # <- recommend to use priority (0, 100) for your custom plugins - example-plugin # priority: 0 #- skywalking # priority: -1100 + #- opentelemetry # priority: -1200 - aws-lambda # priority: -1899 - azure-functions # priority: -1900 - openwhisk # priority: -1901 diff --git a/docs/en/latest/plugins/opentelemetry.md b/docs/en/latest/plugins/opentelemetry.md index 721c5d4852bd..9183e276a395 100644 --- a/docs/en/latest/plugins/opentelemetry.md +++ b/docs/en/latest/plugins/opentelemetry.md @@ -47,9 +47,8 @@ Just support reporting in `HTTP` with `Content-Type=application/x-protobuf`, the | sampler.options.root.name | string | optional | always_off | ["always_on", "always_off", "trace_id_ratio"] | sampling strategy | sampler.options.root.options | object | optional | {fraction = 0} | | sampling strategy parameters | sampler.options.root.options.fraction | number | optional | 0 | [0, 1] | trace_id_ratio fraction -| tags | array[object] | optional | | | append to trace span attributes -| tags.position | string | required | | ["http", "arg", "cookie"] | where variable in -| tags.name | string | required | | | variable name +| additional_attributes | array[string] | optional | | | append to trace span attributes +| additional_attributes[0] | string | required | | | key of ctx.var ## How To Enable @@ -95,18 +94,18 @@ We can set the collecting by specifying the configuration in `conf/config.yaml`. | Name | Type | Default | Description | | ------------ | ------ | -------- | ----------------------------------------------------- | -| x_request_id_as_trace_id | boolean | false | use current request id as new TraceID, you should make sure the request id is match regex pattern: `[0-9a-f]{32}`| +| trace_id_source | enum | random | alternate use x-request-id as trace id, valid value is `random` or `x-request-id`, if use `x-request-id`, please make sure it match regex pattern `[0-9a-f]{32}` | | resource | object | | additional [resource](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/resource/sdk.md) append to trace | | collector | object | {address = "127.0.0.1:4317", request_timeout = 3} | otlp collector | | collector.address | string | 127.0.0.1:4317 | collector address | -| collector.request_timeout | integer | 3 | report request timeout | +| collector.request_timeout | integer | 3 | report request timeout(second) | | collector.request_headers | object | | report request http headers | | batch_span_processor | object | | trace span processor | | batch_span_processor.drop_on_queue_full | boolean | true | drop span when queue is full, otherwise force process batches | | batch_span_processor.max_queue_size | integer | 2048 | maximum queue size to buffer spans for delayed processing | | batch_span_processor.batch_timeout | number | 5 | maximum duration(second) for constructing a batch | | batch_span_processor.max_export_batch_size | integer | 256 | maximum number of spans to process in a single batch | -| batch_span_processor.inactive_timeout | number | 2 | timer interval for processing batches | +| batch_span_processor.inactive_timeout | number | 2 | timer interval(second) for processing batches | Here is an example: diff --git a/docs/zh/latest/plugins/opentelemetry.md b/docs/zh/latest/plugins/opentelemetry.md index 93a3753973f2..d0e3e98b6f67 100644 --- a/docs/zh/latest/plugins/opentelemetry.md +++ b/docs/zh/latest/plugins/opentelemetry.md @@ -95,18 +95,18 @@ curl http://127.0.0.1:9080/apisix/admin/routes/1 -H 'X-API-KEY: edd1c9f034335f1 | 名称 | 类型 | 默认值 | 描述 | | ------------ | ------ | -------- | ----------------------------------------------------- | -| x_request_id_as_trace_id | boolean | false | 使用当前请求 ID 作为新的 TraceID,必须确保当前请求 ID 是符合 TraceID 规范的:`[0-9a-f]{32}` | +| trace_id_source | enum | random | 合法的取值:`random` 或 `x-request-id`,允许使用当前请求 ID 代替随机 ID 作为新的 TraceID,必须确保当前请求 ID 是符合 TraceID 规范的:`[0-9a-f]{32}` | | resource | object | | 追加到 trace 的额外 [resource](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/resource/sdk.md) | | collector | object | {address = "127.0.0.1:4317", request_timeout = 3} | 数据采集服务 | | collector.address | string | 127.0.0.1:4317 | 数据采集服务地址 | -| collector.request_timeout | integer | 3 | 数据采集服务上报请求超时时长 | +| collector.request_timeout | integer | 3 | 数据采集服务上报请求超时时长,单位秒 | | collector.request_headers | object | | 数据采集服务上报请求附加的 HTTP 请求头 | | batch_span_processor | object | | trace span 处理器参数配置 | | batch_span_processor.drop_on_queue_full | boolean | true | 当处理器缓存队列慢试,丢弃新到来的 span | | batch_span_processor.max_queue_size | integer | 2048 | 处理器缓存队列容量最大值 | -| batch_span_processor.batch_timeout | number | 5 | 单位秒,构造一批 span 超时时长 | +| batch_span_processor.batch_timeout | number | 5 | 构造一批 span 超时时长,单位秒 | | batch_span_processor.max_export_batch_size | integer | 256 | 一批 span 的数量,每次上报的 span 数量 | -| batch_span_processor.inactive_timeout | number | 2 | 每隔多长时间检查是否有一批 span 可以上报 | +| batch_span_processor.inactive_timeout | number | 2 | 每隔多长时间检查是否有一批 span 可以上报,单位秒 | 配置示例: diff --git a/t/plugin/opentelemetry.t b/t/plugin/opentelemetry.t index 99d078e85a76..0f54a12692a9 100644 --- a/t/plugin/opentelemetry.t +++ b/t/plugin/opentelemetry.t @@ -128,8 +128,6 @@ GET /opentracing --- response_body opentracing --- wait: 1 ---- no_error_log -[error] --- grep_error_log eval qr/opentelemetry export span/ --- grep_error_log_out @@ -195,8 +193,6 @@ opentracing --- grep_error_log eval qr/opentelemetry export span/ --- grep_error_log_out ---- no_error_log -[error] @@ -264,8 +260,6 @@ opentracing --- grep_error_log eval qr/opentelemetry export span/ --- grep_error_log_out ---- no_error_log -[error] @@ -337,8 +331,6 @@ GET /opentracing --- response_body opentracing --- wait: 1 ---- no_error_log -[error] --- grep_error_log eval qr/opentelemetry export span/ --- grep_error_log_out @@ -410,8 +402,6 @@ opentracing --- grep_error_log eval qr/opentelemetry export span/ --- grep_error_log_out ---- no_error_log -[error] @@ -487,8 +477,6 @@ GET /opentracing --- response_body opentracing --- wait: 1 ---- no_error_log -[error] --- grep_error_log eval qr/opentelemetry export span/ --- grep_error_log_out @@ -570,8 +558,6 @@ opentracing --- grep_error_log eval qr/opentelemetry export span/ --- grep_error_log_out ---- no_error_log -[error] @@ -583,8 +569,6 @@ traceparent: 00-00000000000000000000000000000001-0000000000000001-01 --- response_body opentracing --- wait: 1 ---- no_error_log -[error] --- grep_error_log eval qr/opentelemetry export span/ --- grep_error_log_out @@ -670,8 +654,6 @@ GET /opentracing --- response_body opentracing --- wait: 1 ---- no_error_log -[error] --- grep_error_log eval qr/opentelemetry export span/ --- grep_error_log_out @@ -689,5 +671,3 @@ opentracing --- grep_error_log eval qr/opentelemetry export span/ --- grep_error_log_out ---- no_error_log -[error] From 84084d3b7122e083f48cad1443ac0c08297f39b4 Mon Sep 17 00:00:00 2001 From: roketyyang Date: Tue, 18 Jan 2022 21:33:43 +0800 Subject: [PATCH 06/11] feature: add opentelemetry plugin. (#3891) --- apisix/plugins/opentelemetry.lua | 26 +- docs/en/latest/plugins/opentelemetry.md | 6 +- docs/zh/latest/plugins/opentelemetry.md | 5 +- t/plugin/opentelemetry.t | 440 +++++++++++++----------- 4 files changed, 263 insertions(+), 214 deletions(-) diff --git a/apisix/plugins/opentelemetry.lua b/apisix/plugins/opentelemetry.lua index dc2901e4947b..4d61ef13d3ab 100644 --- a/apisix/plugins/opentelemetry.lua +++ b/apisix/plugins/opentelemetry.lua @@ -61,7 +61,7 @@ local attr_schema = { trace_id_source = { type = "string", enum = {"x-request-id", "random"}, - description = "alternate use x-request-id as trace id", + description = "the source of trace id", default = "random", }, resource = { @@ -307,11 +307,14 @@ function _M.access(conf, api_ctx) end -function _M.body_filter(conf, ctx) +function _M.body_filter(conf, api_ctx) if ngx.arg[2] then - local upstream_status = core.response.get_upstream_status(ctx) + local upstream_status = core.response.get_upstream_status(api_ctx) + local ctx = context:current(); + ctx:detach() + -- get span from current context - local span = context:current():span() + local span = ctx:span() if upstream_status and upstream_status >= 500 then span:set_status(span_status.error, "upstream response status: " .. upstream_status) @@ -322,4 +325,19 @@ function _M.body_filter(conf, ctx) end +function _M.log(conf, api_ctx) + local ctx = context:current(); + if ctx then + -- get span from current context + local span = ctx:span() + if upstream_status and upstream_status >= 500 then + span:set_status(span_status.error, + "upstream response status: " .. upstream_status) + end + + span:finish() + end +end + + return _M diff --git a/docs/en/latest/plugins/opentelemetry.md b/docs/en/latest/plugins/opentelemetry.md index 9183e276a395..4f7d599cc381 100644 --- a/docs/en/latest/plugins/opentelemetry.md +++ b/docs/en/latest/plugins/opentelemetry.md @@ -47,8 +47,8 @@ Just support reporting in `HTTP` with `Content-Type=application/x-protobuf`, the | sampler.options.root.name | string | optional | always_off | ["always_on", "always_off", "trace_id_ratio"] | sampling strategy | sampler.options.root.options | object | optional | {fraction = 0} | | sampling strategy parameters | sampler.options.root.options.fraction | number | optional | 0 | [0, 1] | trace_id_ratio fraction -| additional_attributes | array[string] | optional | | | append to trace span attributes -| additional_attributes[0] | string | required | | | key of ctx.var +| additional_attributes | array[string] | optional | | | attributes (variable and its value) which will be appended to the trace span +| additional_attributes[0] | string | required | | | APISIX or Nginx variable, like `http_header` or `route_id` ## How To Enable @@ -94,7 +94,7 @@ We can set the collecting by specifying the configuration in `conf/config.yaml`. | Name | Type | Default | Description | | ------------ | ------ | -------- | ----------------------------------------------------- | -| trace_id_source | enum | random | alternate use x-request-id as trace id, valid value is `random` or `x-request-id`, if use `x-request-id`, please make sure it match regex pattern `[0-9a-f]{32}` | +| trace_id_source | enum | random | the source of trace id, the valid value is `random` or `x-request-id`. If `x-request-id` is set, the value of `x-request-id` request header will be used as trace id. Please make sure it match regex pattern `[0-9a-f]{32}` | | resource | object | | additional [resource](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/resource/sdk.md) append to trace | | collector | object | {address = "127.0.0.1:4317", request_timeout = 3} | otlp collector | | collector.address | string | 127.0.0.1:4317 | collector address | diff --git a/docs/zh/latest/plugins/opentelemetry.md b/docs/zh/latest/plugins/opentelemetry.md index d0e3e98b6f67..fbf784e3095e 100644 --- a/docs/zh/latest/plugins/opentelemetry.md +++ b/docs/zh/latest/plugins/opentelemetry.md @@ -47,9 +47,8 @@ title: opentelemetry | sampler.options.root.name | string | 可选 | always_off | ["always_on", "always_off", "trace_id_ratio"] | 采样算法 | sampler.options.root.options | object | 可选 | {fraction = 0} | | 采样算法参数 | sampler.options.root.options.fraction | number | 可选 | 0 | [0, 1] | trace_id_ratio 采样算法的百分比 -| tags | array[object] | 可选 | | | 追加到 trace span 的属性 -| tags.position | string | 必须 | | ["http", "arg", "cookie"] | 变量的位置 -| tags.name | string | 必须 | | | 变量的名称 +| additional_attributes | array[string] | optional | | | 追加到 trace span 的额外属性(变量名为 key,变量值为 value) +| additional_attributes[0] | string | required | | | APISIX or Nginx 变量,例如 `http_header` or `route_id` ## 如何启用 diff --git a/t/plugin/opentelemetry.t b/t/plugin/opentelemetry.t index 0f54a12692a9..83a9dce8e9d7 100644 --- a/t/plugin/opentelemetry.t +++ b/t/plugin/opentelemetry.t @@ -20,7 +20,8 @@ use t::APISIX 'no_plan'; add_block_preprocessor(sub { my ($block) = @_; - my $extra_yaml_config = <<_EOC_; + if (!$block->extra_yaml_config) { + my $extra_yaml_config = <<_EOC_; plugins: - opentelemetry plugin_attr: @@ -29,24 +30,27 @@ plugin_attr: max_export_batch_size: 1 inactive_timeout: 0.5 _EOC_ + $block->set_value("extra_yaml_config", $extra_yaml_config); + } - $block->set_value("extra_yaml_config", $extra_yaml_config); - my $extra_init_by_lua = <<_EOC_; - -- mock exporter http client - local client = require("opentelemetry.trace.exporter.http_client") - client.do_request = function() - ngx.log(ngx.INFO, "opentelemetry export span") - end + if (!$block->extra_init_by_lua) { + my $extra_init_by_lua = <<_EOC_; +-- mock exporter http client +local client = require("opentelemetry.trace.exporter.http_client") +client.do_request = function() + ngx.log(ngx.INFO, "opentelemetry export span") +end _EOC_ - $block->set_value("extra_init_by_lua", $extra_init_by_lua); + $block->set_value("extra_init_by_lua", $extra_init_by_lua); + } if (!$block->request) { $block->set_value("request", "GET /t"); } - if (!$block->response_body) { + if (!defined $block->response_body) { $block->set_value("response_body", "passed\n"); } @@ -88,28 +92,6 @@ __DATA__ "type": "roundrobin" }, "uri": "/opentracing" - }]], - [[{ - "node": { - "value": { - "plugins": { - "opentelemetry": { - "sampler": { - "name": "always_on" - } - } - }, - "upstream": { - "nodes": { - "127.0.0.1:1980": 1 - }, - "type": "roundrobin" - }, - "uri": "/opentracing" - }, - "key": "/apisix/routes/1" - }, - "action": "set" }]] ) @@ -154,25 +136,6 @@ opentelemetry export span "type": "roundrobin" }, "uri": "/opentracing" - }]], - [[{ - "node": { - "value": { - "plugins": { - "opentelemetry": { - } - }, - "upstream": { - "nodes": { - "127.0.0.1:1980": 1 - }, - "type": "roundrobin" - }, - "uri": "/opentracing" - }, - "key": "/apisix/routes/1" - }, - "action": "set" }]] ) @@ -218,28 +181,6 @@ qr/opentelemetry export span/ "type": "roundrobin" }, "uri": "/opentracing" - }]], - [[{ - "node": { - "value": { - "plugins": { - "opentelemetry": { - "sampler": { - "name": "trace_id_ratio" - } - } - }, - "upstream": { - "nodes": { - "127.0.0.1:1980": 1 - }, - "type": "roundrobin" - }, - "uri": "/opentracing" - }, - "key": "/apisix/routes/1" - }, - "action": "set" }]] ) @@ -288,31 +229,6 @@ qr/opentelemetry export span/ "type": "roundrobin" }, "uri": "/opentracing" - }]], - [[{ - "node": { - "value": { - "plugins": { - "opentelemetry": { - "sampler": { - "name": "trace_id_ratio", - "options": { - "fraction": 1.0 - } - } - } - }, - "upstream": { - "nodes": { - "127.0.0.1:1980": 1 - }, - "type": "roundrobin" - }, - "uri": "/opentracing" - }, - "key": "/apisix/routes/1" - }, - "action": "set" }]] ) @@ -360,28 +276,6 @@ opentelemetry export span "type": "roundrobin" }, "uri": "/opentracing" - }]], - [[{ - "node": { - "value": { - "plugins": { - "opentelemetry": { - "sampler": { - "name": "parent_base" - } - } - }, - "upstream": { - "nodes": { - "127.0.0.1:1980": 1 - }, - "type": "roundrobin" - }, - "uri": "/opentracing" - }, - "key": "/apisix/routes/1" - }, - "action": "set" }]] ) @@ -432,33 +326,6 @@ qr/opentelemetry export span/ "type": "roundrobin" }, "uri": "/opentracing" - }]], - [[{ - "node": { - "value": { - "plugins": { - "opentelemetry": { - "sampler": { - "name": "parent_base", - "options": { - "root": { - "name": "always_on" - } - } - } - } - }, - "upstream": { - "nodes": { - "127.0.0.1:1980": 1 - }, - "type": "roundrobin" - }, - "uri": "/opentracing" - }, - "key": "/apisix/routes/1" - }, - "action": "set" }]] ) @@ -511,33 +378,6 @@ opentelemetry export span "type": "roundrobin" }, "uri": "/opentracing" - }]], - [[{ - "node": { - "value": { - "plugins": { - "opentelemetry": { - "sampler": { - "name": "parent_base", - "options": { - "root": { - "name": "trace_id_ratio" - } - } - } - } - }, - "upstream": { - "nodes": { - "127.0.0.1:1980": 1 - }, - "type": "roundrobin" - }, - "uri": "/opentracing" - }, - "key": "/apisix/routes/1" - }, - "action": "set" }]] ) @@ -606,36 +446,6 @@ opentelemetry export span "type": "roundrobin" }, "uri": "/opentracing" - }]], - [[{ - "node": { - "value": { - "plugins": { - "opentelemetry": { - "sampler": { - "name": "parent_base", - "options": { - "root": { - "name": "trace_id_ratio", - "options": { - "fraction": 1.0 - } - } - } - } - } - }, - "upstream": { - "nodes": { - "127.0.0.1:1980": 1 - }, - "type": "roundrobin" - }, - "uri": "/opentracing" - }, - "key": "/apisix/routes/1" - }, - "action": "set" }]] ) @@ -671,3 +481,225 @@ opentracing --- grep_error_log eval qr/opentelemetry export span/ --- grep_error_log_out + + + +=== TEST 19: set additional_attributes +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/services/1', + ngx.HTTP_PUT, + [[{ + "name": "service_name", + "upstream": { + "nodes": { + "127.0.0.1:1980": 1 + }, + "type": "roundrobin" + } + }]] + ) + if code >= 300 then + ngx.status = code + ngx.say(body) + return + end + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "name": "route_name", + "plugins": { + "opentelemetry": { + "sampler": { + "name": "always_on" + }, + "additional_attributes": [ + "http_user_agent", + "arg_foo", + "cookie_token", + "remote_addr" + ] + } + }, + "uri": "/opentracing", + "service_id": "1" + }]] + ) + + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } + + + +=== TEST 20: trigger opentelemetry, test trace_id_source=x-request-id, custom resource, additional_attributes +--- extra_yaml_config +plugins: + - opentelemetry +plugin_attr: + opentelemetry: + trace_id_source: x-request-id + resource: + service.name: test + test_key: test_val + batch_span_processor: + max_export_batch_size: 1 + inactive_timeout: 0.5 +--- extra_init_by_lua + local core = require("apisix.core") + local otlp = require("opentelemetry.trace.exporter.otlp") + otlp.export_spans = function(self, spans) + if (#spans ~= 1) then + ngx.log(ngx.ERR, "unexpected spans length: ", #spans) + return + end + + local span = spans[1] + if span:context().trace_id ~= "01010101010101010101010101010101" then + ngx.log(ngx.ERR, "unexpected trace id: ", span:context().trace_id) + return + end + + if span.name ~= "/opentracing?foo=bar&a=b" then + ngx.log(ngx.ERR, "expect span name: /opentracing?foo=bar&a=b, but got ", span.name) + return + end + + local expected_resource_attrs = { + test_key = "test_val", + } + expected_resource_attrs["service.name"] = "test" + expected_resource_attrs["telemetry.sdk.language"] = "lua" + expected_resource_attrs["telemetry.sdk.name"] = "opentelemetry-lua" + expected_resource_attrs["telemetry.sdk.version"] = "0.1.1" + expected_resource_attrs["hostname"] = core.utils.gethostname() + local actual_resource_attrs = span.tracer.provider.resource:attributes() + if #actual_resource_attrs ~= 6 then + ngx.log(ngx.ERR, "expect len(actual_resource) = 6, but got ", #actual_resource_attrs) + return + end + for _, attr in ipairs(actual_resource_attrs) do + local expected_val = expected_resource_attrs[attr.key] + if not expected_val then + ngx.log(ngx.ERR, "unexpected resource attr key: ", attr.key) + return + end + if attr.value.string_value ~= expected_val then + ngx.log(ngx.ERR, "unexpected resource attr val: ", attr.value.string_value) + return + end + end + + local expected_attributes = { + service = "service_name", + route = "route_name", + http_user_agent = "test_nginx", + arg_foo = "bar", + cookie_token = "auth_token", + remote_addr = "127.0.0.1", + } + if #span.attributes ~= 6 then + ngx.log(ngx.ERR, "expect len(span.attributes) = 6, but got ", #span.attributes) + return + end + for _, attr in ipairs(span.attributes) do + local expected_val = expected_attributes[attr.key] + if not expected_val then + ngx.log(ngx.ERR, "unexpected attr key: ", attr.key) + return + end + if attr.value.string_value ~= expected_val then + ngx.log(ngx.ERR, "unexpected attr val: ", attr.value.string_value) + return + end + end + + ngx.log(ngx.INFO, "opentelemetry export span") + end +--- request +GET /opentracing?foo=bar&a=b +--- more_headers +X-Request-Id: 01010101010101010101010101010101 +User-Agent: test_nginx +Cookie: token=auth_token; +--- response_body +opentracing +--- wait: 1 +--- grep_error_log eval +qr/opentelemetry export span/ +--- grep_error_log_out +opentelemetry export span + + + +=== TEST 21: create route for /specific_status +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/2', + ngx.HTTP_PUT, + [[{ + "name": "route_name", + "plugins": { + "opentelemetry": { + "sampler": { + "name": "always_on" + } + } + }, + "uri": "/specific_status", + "upstream": { + "nodes": { + "127.0.0.1:1980": 1 + }, + "type": "roundrobin" + } + }]] + ) + + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } + + + +=== TEST 22: 500 status, test span.status +--- extra_init_by_lua + local otlp = require("opentelemetry.trace.exporter.otlp") + otlp.export_spans = function(self, spans) + if (#spans ~= 1) then + ngx.log(ngx.ERR, "unexpected spans length: ", #spans) + return + end + + local span = spans[1] + if span.status.code ~= 2 then + ngx.log(ngx.ERR, "unexpected status.code: ", span.status.code) + end + if span.status.message ~= "upstream response status: 500" then + ngx.log(ngx.ERR, "unexpected status.message: ", span.status.message) + end + + ngx.log(ngx.INFO, "opentelemetry export span") + end +--- request +GET /specific_status +--- more_headers +X-Test-Upstream-Status: 500 +--- error_code: 500 +--- response_body +upstream status: 500 +--- wait: 1 +--- grep_error_log eval +qr/opentelemetry export span/ +--- grep_error_log_out +opentelemetry export span From d8aa4d8bc1b5f66efba0e4d289780d3f7bb4b394 Mon Sep 17 00:00:00 2001 From: roketyyang Date: Tue, 18 Jan 2022 21:42:54 +0800 Subject: [PATCH 07/11] feature: add opentelemetry plugin. (#3891) --- apisix/plugins/opentelemetry.lua | 1 + 1 file changed, 1 insertion(+) diff --git a/apisix/plugins/opentelemetry.lua b/apisix/plugins/opentelemetry.lua index 4d61ef13d3ab..b95ed7b8236c 100644 --- a/apisix/plugins/opentelemetry.lua +++ b/apisix/plugins/opentelemetry.lua @@ -328,6 +328,7 @@ end function _M.log(conf, api_ctx) local ctx = context:current(); if ctx then + local upstream_status = core.response.get_upstream_status(api_ctx) -- get span from current context local span = ctx:span() if upstream_status and upstream_status >= 500 then From 04aef3122196376a4f62004db3bcb879b5e28dea Mon Sep 17 00:00:00 2001 From: roketyyang Date: Thu, 20 Jan 2022 12:42:57 +0800 Subject: [PATCH 08/11] feature: add opentelemetry plugin. (#3891) --- apisix/plugins/opentelemetry.lua | 7 ++++--- t/plugin/opentelemetry.t | 22 ++++++++++++++++++++++ 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/apisix/plugins/opentelemetry.lua b/apisix/plugins/opentelemetry.lua index b95ed7b8236c..be7f64ef3e6e 100644 --- a/apisix/plugins/opentelemetry.lua +++ b/apisix/plugins/opentelemetry.lua @@ -274,7 +274,7 @@ local function create_tracer_obj(conf) end -function _M.access(conf, api_ctx) +function _M.rewrite(conf, api_ctx) local tracer, err = core.lrucache.plugin_ctx(lrucache, api_ctx, nil, create_tracer_obj, conf) if not tracer then core.log.error("failed to fetch tracer object: ", err) @@ -310,7 +310,7 @@ end function _M.body_filter(conf, api_ctx) if ngx.arg[2] then local upstream_status = core.response.get_upstream_status(api_ctx) - local ctx = context:current(); + local ctx = context:current() ctx:detach() -- get span from current context @@ -326,9 +326,10 @@ end function _M.log(conf, api_ctx) - local ctx = context:current(); + local ctx = context:current() if ctx then local upstream_status = core.response.get_upstream_status(api_ctx) + -- get span from current context local span = ctx:span() if upstream_status and upstream_status >= 500 then diff --git a/t/plugin/opentelemetry.t b/t/plugin/opentelemetry.t index 83a9dce8e9d7..61fa9e8a6f0b 100644 --- a/t/plugin/opentelemetry.t +++ b/t/plugin/opentelemetry.t @@ -703,3 +703,25 @@ upstream status: 500 qr/opentelemetry export span/ --- grep_error_log_out opentelemetry export span + + + +=== TEST 23: test response empty body +--- extra_init_by_lua + local otlp = require("opentelemetry.trace.exporter.otlp") + otlp.export_spans = function(self, spans) + ngx.log(ngx.INFO, "opentelemetry export span") + end + + local opentelemetry = require("apisix.plugins.opentelemetry") + opentelemetry.body_filter = function() + ngx.log(ngx.INFO, "mock response empty body") + end +--- request +GET /specific_status +--- response_body +--- wait: 1 +--- grep_error_log eval +qr/opentelemetry export span/ +--- grep_error_log_out +opentelemetry export span From 0f077639fe7a2817d572f1d93e10ec86b4d31f3c Mon Sep 17 00:00:00 2001 From: roketyyang Date: Fri, 21 Jan 2022 20:53:44 +0800 Subject: [PATCH 09/11] feature: add opentelemetry plugin. (#3891) --- apisix/plugins/opentelemetry.lua | 6 ++++-- t/plugin/opentelemetry.t | 27 +++++++++++++++++++++------ 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/apisix/plugins/opentelemetry.lua b/apisix/plugins/opentelemetry.lua index be7f64ef3e6e..fec3d0cacea3 100644 --- a/apisix/plugins/opentelemetry.lua +++ b/apisix/plugins/opentelemetry.lua @@ -43,7 +43,6 @@ local trace_context = require("opentelemetry.trace.propagation.trace_context") local ngx = ngx local ngx_var = ngx.var -local ngx_req = ngx.req local table = table local type = type local pairs = pairs @@ -216,7 +215,7 @@ function _M.init() if plugin_info.trace_id_source == "x-request-id" then id_generator.new_ids = function() - local trace_id = ngx_req.get_headers()["x-request-id"] or ngx_var.request_id + local trace_id = core.request.headers()["x-request-id"] or ngx_var.request_id return trace_id, id_generator.new_span_id() end end @@ -325,9 +324,12 @@ function _M.body_filter(conf, api_ctx) end +-- body_filter maybe not called because of empty http body response +-- so we need to check if the span has finished in log phase function _M.log(conf, api_ctx) local ctx = context:current() if ctx then + -- ctx:detach() is not necessary, because of ctx is stored in ngx.ctx local upstream_status = core.response.get_upstream_status(api_ctx) -- get span from current context diff --git a/t/plugin/opentelemetry.t b/t/plugin/opentelemetry.t index 61fa9e8a6f0b..4bf4acd2e8ad 100644 --- a/t/plugin/opentelemetry.t +++ b/t/plugin/opentelemetry.t @@ -71,6 +71,7 @@ run_tests; __DATA__ === TEST 1: add plugin +--- SKIP --- config location /t { content_by_lua_block { @@ -105,6 +106,7 @@ __DATA__ === TEST 2: trigger opentelemetry +--- SKIP --- request GET /opentracing --- response_body @@ -118,6 +120,7 @@ opentelemetry export span === TEST 3: use default always_off sampler +--- SKIP --- config location /t { content_by_lua_block { @@ -149,6 +152,7 @@ opentelemetry export span === TEST 4: not trigger opentelemetry +--- SKIP --- request GET /opentracing --- response_body @@ -160,6 +164,7 @@ qr/opentelemetry export span/ === TEST 5: use trace_id_ratio sampler, default fraction = 0 +--- SKIP --- config location /t { content_by_lua_block { @@ -194,6 +199,7 @@ qr/opentelemetry export span/ === TEST 6: not trigger opentelemetry +--- SKIP --- request GET /opentracing --- response_body @@ -205,6 +211,7 @@ qr/opentelemetry export span/ === TEST 7: use trace_id_ratio sampler, fraction = 1.0 +--- SKIP --- config location /t { content_by_lua_block { @@ -242,6 +249,7 @@ qr/opentelemetry export span/ === TEST 8: trigger opentelemetry +--- SKIP --- request GET /opentracing --- response_body @@ -255,6 +263,7 @@ opentelemetry export span === TEST 9: use parent_base sampler, default root sampler = always_off +--- SKIP --- config location /t { content_by_lua_block { @@ -289,6 +298,7 @@ opentelemetry export span === TEST 10: not trigger opentelemetry +--- SKIP --- request GET /opentracing --- response_body @@ -300,6 +310,7 @@ qr/opentelemetry export span/ === TEST 11: use parent_base sampler, root sampler = always_on +--- SKIP --- config location /t { content_by_lua_block { @@ -339,6 +350,7 @@ qr/opentelemetry export span/ === TEST 12: trigger opentelemetry +--- SKIP --- request GET /opentracing --- response_body @@ -352,6 +364,7 @@ opentelemetry export span === TEST 13: use parent_base sampler, root sampler = trace_id_ratio with default fraction = 0 +--- SKIP --- config location /t { content_by_lua_block { @@ -391,6 +404,7 @@ opentelemetry export span === TEST 14: not trigger opentelemetry +--- SKIP --- request GET /opentracing --- response_body @@ -402,6 +416,7 @@ qr/opentelemetry export span/ === TEST 15: trigger opentelemetry, trace_flag = 1 +--- SKIP --- request GET /opentracing --- more_headers @@ -417,6 +432,7 @@ opentelemetry export span === TEST 16: use parent_base sampler, root sampler = trace_id_ratio with fraction = 1 +--- SKIP --- config location /t { content_by_lua_block { @@ -459,6 +475,7 @@ opentelemetry export span === TEST 17: trigger opentelemetry +--- SKIP --- request GET /opentracing --- response_body @@ -472,6 +489,7 @@ opentelemetry export span === TEST 18: not trigger opentelemetry, trace_flag = 0 +--- SKIP --- request GET /opentracing --- more_headers @@ -485,6 +503,7 @@ qr/opentelemetry export span/ === TEST 19: set additional_attributes +--- SKIP --- config location /t { content_by_lua_block { @@ -621,6 +640,7 @@ plugin_attr: ngx.log(ngx.INFO, "opentelemetry export span") end +--- SKIP --- request GET /opentracing?foo=bar&a=b --- more_headers @@ -712,13 +732,8 @@ opentelemetry export span otlp.export_spans = function(self, spans) ngx.log(ngx.INFO, "opentelemetry export span") end - - local opentelemetry = require("apisix.plugins.opentelemetry") - opentelemetry.body_filter = function() - ngx.log(ngx.INFO, "mock response empty body") - end --- request -GET /specific_status +HEAD /specific_status --- response_body --- wait: 1 --- grep_error_log eval From d83a5e42bec082c09a1491796962830c37b6395c Mon Sep 17 00:00:00 2001 From: roketyyang Date: Fri, 21 Jan 2022 21:47:32 +0800 Subject: [PATCH 10/11] feature: add opentelemetry plugin. (#3891) --- t/plugin/opentelemetry.t | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/t/plugin/opentelemetry.t b/t/plugin/opentelemetry.t index 4bf4acd2e8ad..0c142667a1ab 100644 --- a/t/plugin/opentelemetry.t +++ b/t/plugin/opentelemetry.t @@ -71,7 +71,6 @@ run_tests; __DATA__ === TEST 1: add plugin ---- SKIP --- config location /t { content_by_lua_block { @@ -106,7 +105,6 @@ __DATA__ === TEST 2: trigger opentelemetry ---- SKIP --- request GET /opentracing --- response_body @@ -120,7 +118,6 @@ opentelemetry export span === TEST 3: use default always_off sampler ---- SKIP --- config location /t { content_by_lua_block { @@ -152,7 +149,6 @@ opentelemetry export span === TEST 4: not trigger opentelemetry ---- SKIP --- request GET /opentracing --- response_body @@ -164,7 +160,6 @@ qr/opentelemetry export span/ === TEST 5: use trace_id_ratio sampler, default fraction = 0 ---- SKIP --- config location /t { content_by_lua_block { @@ -199,7 +194,6 @@ qr/opentelemetry export span/ === TEST 6: not trigger opentelemetry ---- SKIP --- request GET /opentracing --- response_body @@ -211,7 +205,6 @@ qr/opentelemetry export span/ === TEST 7: use trace_id_ratio sampler, fraction = 1.0 ---- SKIP --- config location /t { content_by_lua_block { @@ -249,7 +242,6 @@ qr/opentelemetry export span/ === TEST 8: trigger opentelemetry ---- SKIP --- request GET /opentracing --- response_body @@ -263,7 +255,6 @@ opentelemetry export span === TEST 9: use parent_base sampler, default root sampler = always_off ---- SKIP --- config location /t { content_by_lua_block { @@ -298,7 +289,6 @@ opentelemetry export span === TEST 10: not trigger opentelemetry ---- SKIP --- request GET /opentracing --- response_body @@ -310,7 +300,6 @@ qr/opentelemetry export span/ === TEST 11: use parent_base sampler, root sampler = always_on ---- SKIP --- config location /t { content_by_lua_block { @@ -350,7 +339,6 @@ qr/opentelemetry export span/ === TEST 12: trigger opentelemetry ---- SKIP --- request GET /opentracing --- response_body @@ -364,7 +352,6 @@ opentelemetry export span === TEST 13: use parent_base sampler, root sampler = trace_id_ratio with default fraction = 0 ---- SKIP --- config location /t { content_by_lua_block { @@ -404,7 +391,6 @@ opentelemetry export span === TEST 14: not trigger opentelemetry ---- SKIP --- request GET /opentracing --- response_body @@ -416,7 +402,6 @@ qr/opentelemetry export span/ === TEST 15: trigger opentelemetry, trace_flag = 1 ---- SKIP --- request GET /opentracing --- more_headers @@ -432,7 +417,6 @@ opentelemetry export span === TEST 16: use parent_base sampler, root sampler = trace_id_ratio with fraction = 1 ---- SKIP --- config location /t { content_by_lua_block { @@ -475,7 +459,6 @@ opentelemetry export span === TEST 17: trigger opentelemetry ---- SKIP --- request GET /opentracing --- response_body @@ -489,7 +472,6 @@ opentelemetry export span === TEST 18: not trigger opentelemetry, trace_flag = 0 ---- SKIP --- request GET /opentracing --- more_headers @@ -503,7 +485,6 @@ qr/opentelemetry export span/ === TEST 19: set additional_attributes ---- SKIP --- config location /t { content_by_lua_block { @@ -640,7 +621,6 @@ plugin_attr: ngx.log(ngx.INFO, "opentelemetry export span") end ---- SKIP --- request GET /opentracing?foo=bar&a=b --- more_headers From e602fce6f7d45ab688f7c823465b4cf406f80efe Mon Sep 17 00:00:00 2001 From: roketyyang Date: Tue, 25 Jan 2022 10:48:30 +0800 Subject: [PATCH 11/11] feature: add opentelemetry plugin. (#3891) --- apisix/plugins/opentelemetry.lua | 2 +- conf/config-default.yaml | 15 +++++++++++++++ docs/en/latest/plugins/opentelemetry.md | 2 +- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/apisix/plugins/opentelemetry.lua b/apisix/plugins/opentelemetry.lua index fec3d0cacea3..12d61c99fc3b 100644 --- a/apisix/plugins/opentelemetry.lua +++ b/apisix/plugins/opentelemetry.lua @@ -295,7 +295,7 @@ function _M.rewrite(conf, api_ctx) end end - local ctx, _ = tracer:start(upstream_context, api_ctx.var.request_uri, { + local ctx = tracer:start(upstream_context, api_ctx.var.request_uri, { kind = span_kind.client, attributes = attributes, }) diff --git a/conf/config-default.yaml b/conf/config-default.yaml index 65506083aded..17b1838a974f 100644 --- a/conf/config-default.yaml +++ b/conf/config-default.yaml @@ -407,6 +407,21 @@ plugin_attr: service_name: APISIX service_instance_name: APISIX Instance Name endpoint_addr: http://127.0.0.1:12800 + opentelemetry: + trace_id_source: x-request-id + resource: + service.name: APISIX + collector: + address: 127.0.0.1:4317 + request_timeout: 3 + request_headers: + Authorization: token + batch_span_processor: + drop_on_queue_full: false + max_queue_size: 1024 + batch_timeout: 2 + inactive_timeout: 1 + max_export_batch_size: 16 prometheus: export_uri: /apisix/prometheus/metrics metric_prefix: apisix_ diff --git a/docs/en/latest/plugins/opentelemetry.md b/docs/en/latest/plugins/opentelemetry.md index 4f7d599cc381..d2610eb02159 100644 --- a/docs/en/latest/plugins/opentelemetry.md +++ b/docs/en/latest/plugins/opentelemetry.md @@ -90,7 +90,7 @@ curl http://127.0.0.1:9080/apisix/admin/routes/1 -H 'X-API-KEY: edd1c9f034335f1 ## How to set collecting -We can set the collecting by specifying the configuration in `conf/config.yaml`. +You can set the collecting by specifying the configuration in `conf/config.yaml`. | Name | Type | Default | Description | | ------------ | ------ | -------- | ----------------------------------------------------- |