Skip to content

Commit

Permalink
feat(tags) allow utf-8 in tags
Browse files Browse the repository at this point in the history
This change expands the range of accepted characters in tags from a limited set of ASCII characters to almost all utf-8 sequences.

Exceptions:
* ',' and '/' are reserved for filtering tags with "and" and "or" and are not allowed in tags
* Non-printable ASCII (like the space character, ' ') is not allowed

Attempting to tag an entity with an invalid utf-8 sequence will raise an error.
  • Loading branch information
kikito authored and dndx committed Feb 1, 2021
1 parent cb98863 commit 98d4e66
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 31 deletions.
14 changes: 12 additions & 2 deletions kong/api/endpoints.lua
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,16 @@ local ERRORS_HTTP_CODES = {
[Errors.codes.FOREIGN_KEYS_UNRESOLVED] = 400,
}

local TAGS_AND_REGEX
local TAGS_OR_REGEX
do
-- printable ASCII (0x21-0x7E except ','(0x2C) and '/'(0x2F),
-- plus non-ASCII utf8 (0x80-0xF4)
local tag_bytes = [[\x21-\x2B\x2D\x2E\x30-\x7E\x80-\xF4]]

TAGS_AND_REGEX = "^([" .. tag_bytes .. "]+(?:,|$))+$"
TAGS_OR_REGEX = "^([" .. tag_bytes .. "]+(?:/|$))+$"
end

local function get_message(default, ...)
local message
Expand Down Expand Up @@ -149,11 +159,11 @@ local function extract_options(args, schema, context)
tags = tags[1]
end

if re_match(tags, [=[^([a-zA-Z0-9\.\-\_~]+(?:,|$))+$]=], 'jo') then
if re_match(tags, TAGS_AND_REGEX, 'jo') then
-- 'a,b,c' or 'a'
options.tags_cond = 'and'
options.tags = split(tags, ',')
elseif re_match(tags, [=[^([a-zA-Z0-9\.\-\_~]+(?:/|$))+$]=], 'jo') then
elseif re_match(tags, TAGS_OR_REGEX, 'jo') then
-- 'a/b/c'
options.tags_cond = 'or'
options.tags = split(tags, '/')
Expand Down
4 changes: 2 additions & 2 deletions kong/db/dao/init.lua
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,8 @@ local function validate_options_value(self, options)
end
elseif #options.tags > 5 then
errors.tags = "cannot query more than 5 tags"
elseif not match(concat(options.tags), "^[%w%.%-%_~]+$") then
errors.tags = "must only contain alphanumeric and '., -, _, ~' characters"
elseif not match(concat(options.tags), "^[\033-\043\045\046\048-\126\128-\244]+$") then
errors.tags = "must only contain printable ascii (except `,` and `/`) or valid utf-8"
elseif #options.tags > 1 and options.tags_cond ~= "and" and options.tags_cond ~= "or" then
errors.tags_cond = "must be a either 'and' or 'or' when more than one tag is specified"
end
Expand Down
38 changes: 34 additions & 4 deletions kong/db/schema/typedefs.lua
Original file line number Diff line number Diff line change
Expand Up @@ -98,14 +98,43 @@ local function validate_name(name)
end


local function validate_utf8_name(name)

local ok, index = utils.validate_utf8(name)
local function validate_utf8_string(str)
local ok, index = utils.validate_utf8(str)

if not ok then
return nil, "invalid utf-8 character sequence detected at position " .. tostring(index)
end

return true
end


local function validate_tag(tag)

local ok, err = validate_utf8_string(tag)
if not ok then
return nil, err
end

-- printable ASCII (33-126 except ','(44) and '/'(47),
-- plus non-ASCII utf8 (128-244)
if not match(tag, "^[\033-\043\045\046\048-\126\128-\244]+$") then
return nil,
"invalid tag '" .. tag ..
"': expected printable ascii (except `,` and `/`) or valid utf-8 sequences"
end

return true
end


local function validate_utf8_name(name)

local ok, err = validate_utf8_string(name)
if not ok then
return nil, err
end

if not match(name, "^[%w%.%-%_~\128-\244]+$") then
return nil,
"invalid value '" .. name ..
Expand Down Expand Up @@ -369,9 +398,10 @@ typedefs.key = Schema.define {
typedefs.tag = Schema.define {
type = "string",
required = true,
match = "^[%w%.%-%_~]+$",
custom_validator = validate_tag,
}


typedefs.tags = Schema.define {
type = "set",
elements = typedefs.tag,
Expand Down
52 changes: 34 additions & 18 deletions spec/02-integration/03-db/07-tags_spec.lua
Original file line number Diff line number Diff line change
Expand Up @@ -392,9 +392,9 @@ for _, strategy in helpers.each_strategy() do
assert.is_nil(rows)
assert.match([[tags: must be a table]], err)

rows, err, _, _ = db.services:page(nil, nil, { tags = { "oops", "@_@" }, tags_cond = 'and' })
rows, err, _, _ = db.services:page(nil, nil, { tags = { "oops", string.char(255) }, tags_cond = 'and' })
assert.is_nil(rows)
assert.match([[tags: must only contain alphanumeric and]], err)
assert.match([[tags: must only contain printable ascii]], err)

rows, err, _, _ = db.services:page(nil, nil, { tags = { "1", "2", "3", "4", "5", "6" } })
assert.is_nil(rows)
Expand All @@ -407,22 +407,38 @@ for _, strategy in helpers.each_strategy() do

end)

describe("#db errors if tag value is invalid", function()
assert.has_error(function()
bp.services:insert({
host = "invalid-tag.com",
name = "service-invalid-tag",
tags = { "invalid tag" }
})
end, string.format('[%s] schema violation (tags.1: invalid value: invalid tag)', strategy))

assert.has_error(function()
bp.services:insert({
host = "invalid-tag.com",
name = "service-invalid-tag",
tags = { "foo,bar" }
})
end, string.format('[%s] schema violation (tags.1: invalid value: foo,bar)', strategy))
it("#db errors if tag value is invalid", function()
local ok, err = pcall(bp.services.insert, bp.services, {
host = "invalid-tag.com",
name = "service-invalid-tag",
tags = { "tag with spaces" }
})
assert.is_falsy(ok)
assert.matches("invalid tag", err)

local ok, err = pcall(bp.services.insert, bp.services, {
host = "invalid-tag.com",
name = "service-invalid-tag",
tags = { "tag,with,commas" }
})
assert.is_falsy(ok)
assert.matches("invalid tag", err)

local ok, err = pcall(bp.services.insert, bp.services, {
host = "invalid-tag.com",
name = "service-invalid-tag",
tags = { "tag/with/slashes" }
})
assert.is_falsy(ok)
assert.matches("invalid tag", err)

local ok, err = pcall(bp.services.insert, bp.services, {
host = "invalid-tag.com",
name = "service-invalid-tag",
tags = { "tag-with-invalid-utf8" .. string.char(255) }
})
assert.is_falsy(ok)
assert.matches("invalid utf%-8", err)
end)


Expand Down
24 changes: 19 additions & 5 deletions spec/02-integration/04-admin_api/14-tags_spec.lua
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ describe("Admin API - tags", function()
for i = 1, 2 do
local consumer = {
username = "adminapi-filter-by-tag-" .. i,
tags = { "corp_a", "consumer"..i }
tags = { "corp_a", "consumer"..i, "🦍" }
}
local row, err, err_t = bp.consumers:insert(consumer)
assert.is_nil(err)
Expand Down Expand Up @@ -50,6 +50,19 @@ describe("Admin API - tags", function()
end
end)

it("filter by single unicode tag", function()
local res = assert(client:send {
method = "GET",
path = "/consumers?tags=🦍"
})
local body = assert.res_status(200, res)
local json = cjson.decode(body)
assert.equals(2, #json.data)
for i = 1, 2 do
assert.contains("🦍", json.data[i].tags)
end
end)

it("filter by multiple tags with AND", function()
local res = assert(client:send {
method = "GET",
Expand All @@ -58,9 +71,10 @@ describe("Admin API - tags", function()
local body = assert.res_status(200, res)
local json = cjson.decode(body)
assert.equals(1, #json.data)
assert.equals(2, #json.data[1].tags)
assert.equals(3, #json.data[1].tags)
assert.contains('corp_a', json.data[1].tags)
assert.contains('consumer1', json.data[1].tags)
assert.contains('🦍', json.data[1].tags)
end)

it("filter by multiple tags with OR", function()
Expand Down Expand Up @@ -102,7 +116,7 @@ describe("Admin API - tags", function()

local res = assert(client:send {
method = "GET",
path = "/consumers?tags=foo@bar"
path = "/consumers?tags=" .. string.char(255)
})
local body = assert.res_status(400, res)
local json = cjson.decode(body)
Expand Down Expand Up @@ -176,11 +190,11 @@ describe("Admin API - tags", function()
it("/tags/:tags with invalid :tags value", function()
local res = assert(client:send {
method = "GET",
path = "/tags/@_@"
path = "/tags/" .. string.char(255)
})
local body = assert.res_status(400, res)
local json = cjson.decode(body)
assert.equals("invalid value: @_@", json.message)
assert.matches("invalid utf%-8", json.message)
end)

it("/tags ignores ?tags= query", function()
Expand Down

0 comments on commit 98d4e66

Please sign in to comment.