Skip to content

Commit

Permalink
fix(outputs.iotdb): Handle paths that contain illegal characters (#14519
Browse files Browse the repository at this point in the history
)

Co-authored-by: SeanGaluzzi <SeanGaluzzi@users.noreply.github.com>
Co-authored-by: SeanGaluzzi <sean.galuzzi@argo.consulting>
  • Loading branch information
3 people authored Jan 23, 2024
1 parent 439df81 commit 4c1d8e3
Show file tree
Hide file tree
Showing 4 changed files with 146 additions and 2 deletions.
15 changes: 15 additions & 0 deletions plugins/outputs/iotdb/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -141,4 +141,19 @@ to use them.
## - "fields" -- root.sg.device, s1=100, s2="hello", tag1="private", tag2="working"
## - "device_id" -- root.sg.device.private.working, s1=100, s2="hello"
# convert_tags_to = "device_id"

## Handling of unsupported characters
## Some characters in different versions of IoTDB are not supported in path name
## A guide with suggetions on valid paths can be found here:
## for iotdb 0.13.x -> https://iotdb.apache.org/UserGuide/V0.13.x/Reference/Syntax-Conventions.html#identifiers
## for iotdb 1.x.x and above -> https://iotdb.apache.org/UserGuide/V1.3.x/User-Manual/Syntax-Rule.html#identifier
##
## Available values are:
## - "1.0", "1.1", "1.2", "1.3" -- enclose in `` the world having forbidden character
## such as @ $ # : [ ] { } ( ) space
## - "0.13" -- enclose in `` the world having forbidden character
## such as space
##
## Keep this section commented if you don't want to sanitize the path
# sanitize_tag = "1.3"
```
55 changes: 53 additions & 2 deletions plugins/outputs/iotdb/iotdb.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"errors"
"fmt"
"math"
"regexp"
"strconv"
"strings"
"time"
Expand All @@ -21,6 +22,12 @@ import (
//go:embed sample.conf
var sampleConfig string

// matches any word that has a non valid backtick
// `word` <- dosen't match
// “word , `wo`rd` , `word , word` <- match
var forbiddenBacktick = regexp.MustCompile("^[^\x60].*?[\x60]+.*?[^\x60]$|^[\x60].*[\x60]+.*[\x60]$|^[\x60]+.*[^\x60]$|^[^\x60].*[\x60]+$")
var allowedBacktick = regexp.MustCompile("^[\x60].*[\x60]$")

type IoTDB struct {
Host string `toml:"host"`
Port string `toml:"port"`
Expand All @@ -30,9 +37,11 @@ type IoTDB struct {
ConvertUint64To string `toml:"uint64_conversion"`
TimeStampUnit string `toml:"timestamp_precision"`
TreatTagsAs string `toml:"convert_tags_to"`
SanitizeTags string `toml:"sanitize_tag"`
Log telegraf.Logger `toml:"-"`

session *client.Session
sanityRegex []*regexp.Regexp
session *client.Session
}

type recordsWithTags struct {
Expand Down Expand Up @@ -74,6 +83,22 @@ func (s *IoTDB) Init() error {
s.Password = config.NewSecret([]byte("root"))
}

switch s.SanitizeTags {
case "0.13":
matchUnsupportedCharacter := regexp.MustCompile("[^0-9a-zA-Z_:@#${}\x60]")

regex := []*regexp.Regexp{matchUnsupportedCharacter}
s.sanityRegex = append(s.sanityRegex, regex...)

// from version 1.x.x IoTDB changed the allowed keys in nodes
case "1.0", "1.1", "1.2", "1.3":
matchUnsupportedCharacter := regexp.MustCompile("[^0-9a-zA-Z_\x60]")
matchNumericString := regexp.MustCompile(`^\d+$`)

regex := []*regexp.Regexp{matchUnsupportedCharacter, matchNumericString}
s.sanityRegex = append(s.sanityRegex, regex...)
}

s.Log.Info("Initialization completed.")
return nil
}
Expand Down Expand Up @@ -229,6 +254,28 @@ func (s *IoTDB) convertMetricsToRecordsWithTags(metrics []telegraf.Metric) (*rec
return rwt, nil
}

// checks is the tag contains any IoTDB invalid character
func (s *IoTDB) validateTag(tag string) (string, error) {
// IoTDB uses "root" as a keyword and can be called only at the start of the path
if tag == "root" {
return "", errors.New("cannot use 'root' as tag")
} else if forbiddenBacktick.MatchString(tag) { // returns an error if the backsticks are used in an inappropriate way
return "", errors.New("cannot use ` in tag names")
} else if allowedBacktick.MatchString(tag) { // if the tag in already enclosed in tags returns the tag
return tag, nil
}

// loops through all the regex patterns and if one
// pattern matches returns the tag between `
for _, regex := range s.sanityRegex {
if regex.MatchString(tag) {
return "`" + tag + "`", nil
}
}

return tag, nil
}

// modify recordsWithTags according to 'TreatTagsAs' Configuration
func (s *IoTDB) modifyRecordsWithTags(rwt *recordsWithTags) error {
switch s.TreatTagsAs {
Expand All @@ -251,7 +298,11 @@ func (s *IoTDB) modifyRecordsWithTags(rwt *recordsWithTags) error {
for index, tags := range rwt.TagsList { // for each record
topic := []string{rwt.DeviceIDList[index]}
for _, tag := range tags { // for each tag, append it's Value
topic = append(topic, tag.Value)
tagValue, err := s.validateTag(tag.Value) // validates tag
if err != nil {
return err
}
topic = append(topic, tagValue)
}
rwt.DeviceIDList[index] = strings.Join(topic, ".")
}
Expand Down
63 changes: 63 additions & 0 deletions plugins/outputs/iotdb/iotdb_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,69 @@ func TestMetricConversionToRecordsWithTags(t *testing.T) {
}
}

// Test tag sanitize
func TestTagSanitization(t *testing.T) {
tests := []struct {
name string
plugin *IoTDB
expected []string
input []string
}{
{ //don't sanitize tags containing UnsopportedCharacter on IoTDB V1.3
name: "Don't Sanitize Tags",
plugin: func() *IoTDB { s := newIoTDB(); s.SanitizeTags = "1.3"; return s }(),
expected: []string{"word", "`word`", "word_"},
input: []string{"word", "`word`", "word_"},
},
{ //sanitize tags containing UnsopportedCharacter on IoTDB V1.3 enclosing them in backticks
name: "Sanitize Tags",
plugin: func() *IoTDB { s := newIoTDB(); s.SanitizeTags = "1.3"; return s }(),
expected: []string{"`wo rd`", "`@`", "`$`", "`#`", "`:`", "`{`", "`}`", "`1`", "`1234`"},
input: []string{"wo rd", "@", "$", "#", ":", "{", "}", "1", "1234"},
},
{ //test on forbidden word and forbidden syntax
name: "Errors",
plugin: func() *IoTDB { s := newIoTDB(); s.SanitizeTags = "1.3"; return s }(),
expected: []string{"", ""},
input: []string{"root", "wo`rd"},
},
{
name: "Don't Sanitize Tags",
plugin: func() *IoTDB { s := newIoTDB(); s.SanitizeTags = "0.13"; return s }(),
expected: []string{"word", "`word`", "word_", "@", "$", "#", ":", "{", "}"},
input: []string{"word", "`word`", "word_", "@", "$", "#", ":", "{", "}"},
},
{ //sanitize tags containing UnsopportedCharacter on IoTDB V0.13 enclosing them in backticks
name: "Sanitize Tags",
plugin: func() *IoTDB { s := newIoTDB(); s.SanitizeTags = "0.13"; return s }(),
expected: []string{"`wo rd`", "`\\`"},
input: []string{"wo rd", "\\"},
},
{ //test on forbidden word and forbidden syntax on IoTDB V0.13
name: "Errors",
plugin: func() *IoTDB { s := newIoTDB(); s.SanitizeTags = "0.13"; return s }(),
expected: []string{"", ""},
input: []string{"root", "wo`rd"},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
tt.plugin.Log = &testutil.Logger{}
actuals := []string{}

require.NoError(t, tt.plugin.Init())

for _, input := range tt.input {
actual, _ := tt.plugin.validateTag(input)
actuals = append(actuals, actual)
}

require.EqualValues(t, tt.expected, actuals)
})
}
}

// Test tags handling, which means testing function `modifyRecordsWithTags`
func TestTagsHandling(t *testing.T) {
var testTimestamp = time.Date(2022, time.July, 20, 12, 25, 33, 44, time.UTC)
Expand Down
15 changes: 15 additions & 0 deletions plugins/outputs/iotdb/sample.conf
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,18 @@
## - "fields" -- root.sg.device, s1=100, s2="hello", tag1="private", tag2="working"
## - "device_id" -- root.sg.device.private.working, s1=100, s2="hello"
# convert_tags_to = "device_id"

## Handling of unsupported characters
## Some characters in different versions of IoTDB are not supported in path name
## A guide with suggetions on valid paths can be found here:
## for iotdb 0.13.x -> https://iotdb.apache.org/UserGuide/V0.13.x/Reference/Syntax-Conventions.html#identifiers
## for iotdb 1.x.x and above -> https://iotdb.apache.org/UserGuide/V1.3.x/User-Manual/Syntax-Rule.html#identifier
##
## Available values are:
## - "1.0", "1.1", "1.2", "1.3" -- enclose in `` the world having forbidden character
## such as @ $ # : [ ] { } ( ) space
## - "0.13" -- enclose in `` the world having forbidden character
## such as space
##
## Keep this section commented if you don't want to sanitize the path
# sanitize_tag = "1.3"

0 comments on commit 4c1d8e3

Please sign in to comment.