From d51dcef7a87c8cbebbf3d32f3b031b886d567b58 Mon Sep 17 00:00:00 2001 From: Steffen Siering Date: Fri, 31 Jan 2020 21:05:24 +0100 Subject: [PATCH] Timestamp fractions pattern (#15911) * Add support for `f` in date patterns * Use `f` pattern in timestamp encoder * update check * Add godoc * typo --- libbeat/common/dtfmt/builder.go | 24 +++++- libbeat/common/dtfmt/doc.go | 48 +++++------ libbeat/common/dtfmt/dtfmt_test.go | 66 ++++++++++----- libbeat/common/dtfmt/elems.go | 16 ++-- libbeat/common/dtfmt/fmt.go | 11 ++- libbeat/common/dtfmt/prog.go | 38 +++++---- libbeat/common/dtfmt/util.go | 124 +++++++++++++++++++++++------ libbeat/outputs/codec/common.go | 4 +- 8 files changed, 237 insertions(+), 94 deletions(-) diff --git a/libbeat/common/dtfmt/builder.go b/libbeat/common/dtfmt/builder.go index 85af091e95f..d1789760afe 100644 --- a/libbeat/common/dtfmt/builder.go +++ b/libbeat/common/dtfmt/builder.go @@ -109,6 +109,26 @@ func (b *builder) nanoOfSecond(digits int) { } } +func (b *builder) fractNanoOfSecond(digits int) { + const fractDigits = 3 + + if digits <= 0 { + return + } + + // cap number of digits at 9, as we do not support higher precision and + // would remove trailing zeroes anyway. + if digits > 9 { + digits = 9 + } + + minDigits := fractDigits + if digits < minDigits { + minDigits = digits + } + b.add(paddedNumber{ftNanoOfSecond, 9 - digits, minDigits, digits, fractDigits, false}) +} + func (b *builder) secondOfMinute(digits int) { b.appendDecimal(ftSecondOfMinute, digits, 2) } @@ -223,12 +243,12 @@ func (b *builder) appendDecimalValue(ft fieldType, minDigits, maxDigits int, sig if minDigits <= 1 { b.add(unpaddedNumber{ft, maxDigits, signed}) } else { - b.add(paddedNumber{ft, 0, minDigits, maxDigits, signed}) + b.add(paddedNumber{ft, 0, minDigits, maxDigits, 0, signed}) } } func (b *builder) appendExtDecimal(ft fieldType, divExp, minDigits, maxDigits int) { - b.add(paddedNumber{ft, divExp, minDigits, maxDigits, false}) + b.add(paddedNumber{ft, divExp, minDigits, maxDigits, 0, false}) } func (b *builder) appendDecimal(ft fieldType, minDigits, maxDigits int) { diff --git a/libbeat/common/dtfmt/doc.go b/libbeat/common/dtfmt/doc.go index a204929f5ec..3906f94442a 100644 --- a/libbeat/common/dtfmt/doc.go +++ b/libbeat/common/dtfmt/doc.go @@ -22,29 +22,31 @@ // // Symbol Meaning Type Supported Examples // ------ ------- ------- --------- ------- -// G era text no AD -// C century of era (>=0) number no 20 -// Y year of era (>=0) year yes 1996 -// -// x weekyear year yes 1996 -// w week of weekyear number yes 27 -// e day of week number yes 2 -// E day of week text yes Tuesday; Tue -// -// y year year yes 1996 -// D day of year number yes 189 -// M month of year month yes July; Jul; 07 -// d day of month number yes 10 -// -// a halfday of day text yes PM -// K hour of halfday (0~11) number yes 0 -// h clockhour of halfday (1~12) number yes 12 -// -// H hour of day (0~23) number yes 0 -// k clockhour of day (1~24) number yes 24 -// m minute of hour number yes 30 -// s second of minute number yes 55 -// S fraction of second millis no 978 +// G era text no AD +// C century of era (>=0) number no 20 +// Y year of era (>=0) year yes 1996 +// +// x weekyear year yes 1996 +// w week of weekyear number yes 27 +// e day of week number yes 2 +// E day of week text yes Tuesday; Tue +// +// y year year yes 1996 +// D day of year number yes 189 +// M month of year month yes July; Jul; 07 +// d day of month number yes 10 +// +// a halfday of day text yes PM +// K hour of halfday (0~11) number yes 0 +// h clockhour of halfday (1~12) number yes 12 +// +// H hour of day (0~23) number yes 0 +// k clockhour of day (1~24) number yes 24 +// m minute of hour number yes 30 +// s second of minute number yes 55 +// S fraction of second nanoseconds yes 978000 +// f fraction of seconds nanoseconds yes 123456789 +// multiple of 3 // // z time zone text no Pacific Standard Time; PST // Z time zone offset/id zone no -0800; -08:00; America/Los_Angeles diff --git a/libbeat/common/dtfmt/dtfmt_test.go b/libbeat/common/dtfmt/dtfmt_test.go index d017bdbec17..10dbc14d6db 100644 --- a/libbeat/common/dtfmt/dtfmt_test.go +++ b/libbeat/common/dtfmt/dtfmt_test.go @@ -82,10 +82,37 @@ func TestFormat(t *testing.T) { {mkTime(8, 5, 24, 0), "kk:mm:ss aa", "09:05:24 AM"}, {mkTime(20, 5, 24, 0), "k:m:s a", "21:5:24 PM"}, {mkTime(20, 5, 24, 0), "kk:mm:ss aa", "21:05:24 PM"}, - {mkTime(1, 2, 3, 123), "S", "1"}, - {mkTime(1, 2, 3, 123), "SS", "12"}, - {mkTime(1, 2, 3, 123), "SSS", "123"}, - {mkTime(1, 2, 3, 123), "SSSS", "1230"}, + {mkTime(1, 2, 3, 123*time.Millisecond), "S", "1"}, + {mkTime(1, 2, 3, 123*time.Millisecond), "SS", "12"}, + {mkTime(1, 2, 3, 123*time.Millisecond), "SSS", "123"}, + {mkTime(1, 2, 3, 123*time.Millisecond), "SSSS", "1230"}, + {mkTime(1, 2, 3, 123*time.Millisecond), "f", "1"}, + {mkTime(1, 2, 3, 123*time.Millisecond), "ff", "12"}, + {mkTime(1, 2, 3, 123*time.Millisecond), "fff", "123"}, + {mkTime(1, 2, 3, 123*time.Millisecond), "ffff", "123"}, + {mkTime(1, 2, 3, 123*time.Millisecond), "fffff", "123"}, + {mkTime(1, 2, 3, 123*time.Millisecond), "ffffff", "123"}, + {mkTime(1, 2, 3, 123*time.Millisecond), "fffffff", "123"}, + {mkTime(1, 2, 3, 123*time.Millisecond), "ffffffff", "123"}, + {mkTime(1, 2, 3, 123*time.Millisecond), "fffffffff", "123"}, + {mkTime(1, 2, 3, 123*time.Microsecond), "f", "0"}, + {mkTime(1, 2, 3, 123*time.Microsecond), "ff", "00"}, + {mkTime(1, 2, 3, 123*time.Microsecond), "fff", "000"}, + {mkTime(1, 2, 3, 123*time.Microsecond), "ffff", "0001"}, + {mkTime(1, 2, 3, 123*time.Microsecond), "fffff", "00012"}, + {mkTime(1, 2, 3, 123*time.Microsecond), "ffffff", "000123"}, + {mkTime(1, 2, 3, 123*time.Microsecond), "fffffff", "000123"}, + {mkTime(1, 2, 3, 123*time.Microsecond), "ffffffff", "000123"}, + {mkTime(1, 2, 3, 123*time.Microsecond), "fffffffff", "000123"}, + {mkTime(1, 2, 3, 123*time.Nanosecond), "f", "0"}, + {mkTime(1, 2, 3, 123*time.Nanosecond), "ff", "00"}, + {mkTime(1, 2, 3, 123*time.Nanosecond), "fff", "000"}, + {mkTime(1, 2, 3, 123*time.Nanosecond), "ffff", "000"}, + {mkTime(1, 2, 3, 123*time.Nanosecond), "fffff", "000"}, + {mkTime(1, 2, 3, 123*time.Nanosecond), "ffffff", "000"}, + {mkTime(1, 2, 3, 123*time.Nanosecond), "fffffff", "0000001"}, + {mkTime(1, 2, 3, 123*time.Nanosecond), "ffffffff", "00000012"}, + {mkTime(1, 2, 3, 123*time.Nanosecond), "fffffffff", "000000123"}, // literals {time.Now(), "--=++,_!/?\\[]{}@#$%^&*()", "--=++,_!/?\\[]{}@#$%^&*()"}, @@ -94,24 +121,27 @@ func TestFormat(t *testing.T) { {time.Now(), "'plain' '' 'text'", "plain ' text"}, {time.Now(), "'plain '' text'", "plain ' text"}, - // beats timestamp - {mkDateTime(2017, 1, 2, 4, 6, 7, 123), + // timestamps with microseconds precision only + {mkDateTime(2017, 1, 2, 4, 6, 7, 123*time.Millisecond), + "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", + "2017-01-02T04:06:07.123Z"}, + {mkDateTime(2017, 1, 2, 4, 6, 7, 123456*time.Microsecond), "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", "2017-01-02T04:06:07.123Z"}, // beats timestamp - {mkDateTimeWithLocation(2017, 1, 2, 4, 6, 7, 123, time.FixedZone("PST", -8*60*60)), - "yyyy-MM-dd'T'HH:mm:ss.SSSz", + {mkDateTimeWithLocation(2017, 1, 2, 4, 6, 7, 123*time.Millisecond, time.FixedZone("PST", -8*60*60)), + "yyyy-MM-dd'T'HH:mm:ss.fffffffffz", "2017-01-02T04:06:07.123-08:00"}, // beats nanoseconds timestamp - {mkDateTime(2017, 1, 2, 4, 6, 7, 123), - "yyyy-MM-dd'T'HH:mm:ss.nnnnnnnnn'Z'", - "2017-01-02T04:06:07.123000000Z"}, + {mkDateTime(2017, 1, 2, 4, 6, 7, 123*time.Nanosecond), + "yyyy-MM-dd'T'HH:mm:ss.fffffffff'Z'", + "2017-01-02T04:06:07.000000123Z"}, - {mkDateTimeWithLocation(2017, 1, 2, 4, 6, 7, 123, time.FixedZone("PST", -8*60*60)), - "yyyy-MM-dd'T'HH:mm:ss.nnnnnnnnnz", - "2017-01-02T04:06:07.123000000-08:00"}, + {mkDateTimeWithLocation(2017, 1, 2, 4, 6, 7, 123*time.Millisecond, time.FixedZone("PST", -8*60*60)), + "yyyy-MM-dd'T'HH:mm:ss.fffffffffz", + "2017-01-02T04:06:07.123-08:00"}, } for i, test := range tests { @@ -132,14 +162,14 @@ func mkDate(y, m, d int) time.Time { return mkDateTime(y, m, d, 0, 0, 0, 0) } -func mkTime(h, m, s, S int) time.Time { +func mkTime(h, m, s int, S time.Duration) time.Time { return mkDateTime(2000, 1, 1, h, m, s, S) } -func mkDateTime(y, M, d, h, m, s, S int) time.Time { +func mkDateTime(y, M, d, h, m, s int, S time.Duration) time.Time { return mkDateTimeWithLocation(y, M, d, h, m, s, S, time.UTC) } -func mkDateTimeWithLocation(y, M, d, h, m, s, S int, l *time.Location) time.Time { - return time.Date(y, time.Month(M), d, h, m, s, S*1000000, l) +func mkDateTimeWithLocation(y, M, d, h, m, s int, S time.Duration, l *time.Location) time.Time { + return time.Date(y, time.Month(M), d, h, m, s, int(S), l) } diff --git a/libbeat/common/dtfmt/elems.go b/libbeat/common/dtfmt/elems.go index 5582f48c6ff..c443683348f 100644 --- a/libbeat/common/dtfmt/elems.go +++ b/libbeat/common/dtfmt/elems.go @@ -44,10 +44,10 @@ type unpaddedNumber struct { } type paddedNumber struct { - ft fieldType - divExp int - minDigits, maxDigits int - signed bool + ft fieldType + divExp int + minDigits, maxDigits, fractDigits int + signed bool } type textField struct { @@ -188,10 +188,14 @@ func (n unpaddedNumber) compile() (prog, error) { } func (n paddedNumber) compile() (prog, error) { - if n.divExp == 0 { + switch { + case n.fractDigits != 0: + return makeProg(opExtNumFractPadded, byte(n.ft), byte(n.divExp), byte(n.maxDigits), byte(n.fractDigits)) + case n.divExp == 0: return makeProg(opNumPadded, byte(n.ft), byte(n.maxDigits)) + default: + return makeProg(opExtNumPadded, byte(n.ft), byte(n.divExp), byte(n.maxDigits)) } - return makeProg(opExtNumPadded, byte(n.ft), byte(n.divExp), byte(n.maxDigits)) } func (n twoDigitYear) compile() (prog, error) { diff --git a/libbeat/common/dtfmt/fmt.go b/libbeat/common/dtfmt/fmt.go index c8243022d98..0e8d49ee2b0 100644 --- a/libbeat/common/dtfmt/fmt.go +++ b/libbeat/common/dtfmt/fmt.go @@ -59,8 +59,6 @@ func releaseCtx(c *ctx) { // If pattern is invalid an error is returned. func NewFormatter(pattern string) (*Formatter, error) { b := newBuilder() - - // pattern: yyyy-MM-dd'T'HH:mm:ss.fffffffff'Z' err := parsePatternTo(b, pattern) if err != nil { return nil, err @@ -136,7 +134,6 @@ func (f *Formatter) Format(t time.Time) (string, error) { } func parsePatternTo(b *builder, pattern string) error { - // pattern: yyyy-MM-dd'T'HH:mm:ss.fffffffff'Z' for i := 0; i < len(pattern); { tok, tokText, err := parseToken(pattern, &i) if err != nil { @@ -213,8 +210,8 @@ func parsePatternTo(b *builder, pattern string) error { case 'S': // fraction of second b.nanoOfSecond(tokLen) - case 'z': // timezone offset - b.timeZoneOffsetText() + case 'f': // faction of second (without zeros) + b.fractNanoOfSecond(tokLen) case 'n': // nano second // if timestamp layout use `n`, it always return 9 digits nanoseconds. @@ -223,6 +220,9 @@ func parsePatternTo(b *builder, pattern string) error { } b.nanoOfSecond(tokLen) + case 'z': // timezone offset + b.timeZoneOffsetText() + case '\'': // literal if tokLen == 1 { b.appendRune(rune(tokText[0])) @@ -243,7 +243,6 @@ func parseToken(pattern string, i *int) (rune, string, error) { start := *i idx := start length := len(pattern) - // pattern: yyyy-MM-dd'T'HH:mm:ss.fffffffff'Z' r, w := utf8.DecodeRuneInString(pattern[idx:]) idx += w if ('A' <= r && r <= 'Z') || ('a' <= r && r <= 'z') { diff --git a/libbeat/common/dtfmt/prog.go b/libbeat/common/dtfmt/prog.go index a2d4517c83e..73e65085bdd 100644 --- a/libbeat/common/dtfmt/prog.go +++ b/libbeat/common/dtfmt/prog.go @@ -27,20 +27,21 @@ type prog struct { } const ( - opNone byte = iota - opCopy1 // copy next byte - opCopy2 // copy next 2 bytes - opCopy3 // copy next 3 bytes - opCopy4 // copy next 4 bytes - opCopyShort // [op, len, content[len]] - opCopyLong // [op, len1, len, content[len1<<8 + len]] - opNum // [op, ft] - opNumPadded // [op, ft, digits] - opExtNumPadded // [op, ft, divExp, digits] - opZeros // [op, count] - opTwoDigit // [op, ft] - opTextShort // [op, ft] - opTextLong // [op, ft] + opNone byte = iota + opCopy1 // copy next byte + opCopy2 // copy next 2 bytes + opCopy3 // copy next 3 bytes + opCopy4 // copy next 4 bytes + opCopyShort // [op, len, content[len]] + opCopyLong // [op, len1, len, content[len1<<8 + len]] + opNum // [op, ft] + opNumPadded // [op, ft, digits] + opExtNumPadded // [op, ft, divExp, digits] + opExtNumFractPadded // [op, ft, divExp, digits, fractDigits] + opZeros // [op, count] + opTwoDigit // [op, ft] + opTextShort // [op, ft] + opTextLong // [op, ft] ) var pow10Table [10]int @@ -108,6 +109,15 @@ func (p prog) eval(bytes []byte, ctx *ctx, t time.Time) ([]byte, error) { return bytes, err } bytes = appendPadded(bytes, v/div, digits) + case opExtNumFractPadded: + ft, divExp, digits, fractDigits := fieldType(p.p[i]), int(p.p[i+1]), int(p.p[i+2]), int(p.p[i+3]) + div := pow10Table[divExp] + i += 4 + v, err := getIntField(ft, ctx, t) + if err != nil { + return bytes, err + } + bytes = appendFractPadded(bytes, v/div, digits, fractDigits) case opZeros: digits := int(p.p[i]) i++ diff --git a/libbeat/common/dtfmt/util.go b/libbeat/common/dtfmt/util.go index f6b5a0c48d1..4341bc552e8 100644 --- a/libbeat/common/dtfmt/util.go +++ b/libbeat/common/dtfmt/util.go @@ -18,44 +18,122 @@ package dtfmt import ( - "math" "strconv" ) +// appendUnpadded appends the string representation of the integer value to the +// buffer. func appendUnpadded(bs []byte, i int) []byte { return strconv.AppendInt(bs, int64(i), 10) } -func appendPadded(bs []byte, i, sz int) []byte { - if i < 0 { +// appendPadded appends a number value as string to the buffer. The string will +// be prefixed with '0' in case the encoded string value is takes less then +// 'digits' bytes. +// +// for example: +// appendPadded(..., 10, 5) -> 00010 +// appendPadded(..., 12345, 5) -> 12345 +func appendPadded(bs []byte, val, digits int) []byte { + if val < 0 { bs = append(bs, '-') - i = -i + val = -1 } - if i < 10 { - for ; sz > 1; sz-- { - bs = append(bs, '0') + // compute number of initial padding zeroes + var padDigits int + switch { + case val < 10: + padDigits = digits - 1 + case val < 100: + padDigits = digits - 2 + case val < 1000: + padDigits = digits - 3 + case val < 10000: + padDigits = digits - 4 + case val < 100000: + padDigits = digits - 5 + case val < 1000000: + padDigits = digits - 6 + case val < 10000000: + padDigits = digits - 7 + case val < 100000000: + padDigits = digits - 8 + case val < 1000000000: + padDigits = digits - 9 + default: + padDigits = digits - 1 + for tmp := val; tmp > 10; tmp = tmp / 10 { + padDigits-- } - return append(bs, byte(i)+'0') } - if i < 100 { - for ; sz > 2; sz-- { - bs = append(bs, '0') - } - return strconv.AppendInt(bs, int64(i), 10) + for i := 0; i < padDigits; i++ { + bs = append(bs, '0') } - digits := 0 - if i < 1000 { - digits = 3 - } else if i < 10000 { - digits = 4 - } else { - digits = int(math.Log10(float64(i))) + 1 + // encode value + if val < 10 { + return append(bs, byte(val)+'0') } - for ; sz > digits; sz-- { - bs = append(bs, '0') + return strconv.AppendInt(bs, int64(val), 10) +} + +// appendFractPadded appends a number value as string to the buffer. +// The string will be prefixed with '0' in case the value is smaller than +// a value that can be represented with 'digits'. +// Trailing zeroes at the end will be removed, such that only a multiple of fractSz +// digits will be printed. If the value is 0, a total of 'fractSz' zeros will +// be printed. +// +// for example: +// appendFractPadded(..., 0, 9, 3) -> "000" +// appendFractPadded(..., 123000, 9, 3) -> "000123" +// appendFractPadded(..., 120000, 9, 3) -> "000120" +// appendFractPadded(..., 120000010, 9, 3) -> "000120010" +// appendFractPadded(..., 123456789, 6, 3) -> "123456" +func appendFractPadded(bs []byte, val, digits, fractSz int) []byte { + if fractSz == 0 || digits <= fractSz { + return appendPadded(bs, val, digits) } - return strconv.AppendInt(bs, int64(i), 10) + initalLen := len(bs) + bs = appendPadded(bs, val, digits) + + // find and remove trailing zeroes, such that a multiple of fractSz is still + // serialized + + // find index range of last digits in buffer, such that a multiple of fractSz + // will be kept if the range of digits is removed. + // invariant: 0 <= end - begin <= fractSz + end := len(bs) + digits = end - initalLen + begin := initalLen + ((digits-1)/fractSz)*fractSz + + // remove trailing zeros, such that a multiple of fractSz digits will be + // present in the final buffer. At minimum fractSz digits will always be + // reported. + for { + if !allZero(bs[begin:end]) { + break + } + + digits -= (end - begin) + end = begin + begin -= fractSz + + if digits <= fractSz { + break + } + } + + return bs[:end] +} + +func allZero(buf []byte) bool { + for _, b := range buf { + if b != '0' { + return false + } + } + return true } diff --git a/libbeat/outputs/codec/common.go b/libbeat/outputs/codec/common.go index a168f352e44..5ed8cbd1333 100644 --- a/libbeat/outputs/codec/common.go +++ b/libbeat/outputs/codec/common.go @@ -36,9 +36,9 @@ func MakeTimestampEncoder() func(*time.Time, structform.ExtVisitor) error { func MakeUTCOrLocalTimestampEncoder(localTime bool) func(*time.Time, structform.ExtVisitor) error { var dtPattern string if localTime { - dtPattern = "yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSSz" + dtPattern = "yyyy-MM-dd'T'HH:mm:ss.fffffffffz" } else { - dtPattern = "yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSS'Z'" + dtPattern = "yyyy-MM-dd'T'HH:mm:ss.fffffffff'Z'" } formatter, err := dtfmt.NewFormatter(dtPattern)