From 92a970456baf20ca5d41c85bb568d36f850c434c Mon Sep 17 00:00:00 2001 From: Hikmatulloh Hari Mukti Date: Tue, 17 Sep 2024 08:30:55 +0700 Subject: [PATCH] chore: proto.UnmarshalValue simplify utf8 string decoding --- proto/value_unmarshal.go | 19 ++++-------- proto/value_unmarshal_internal_test.go | 41 ++++---------------------- 2 files changed, 11 insertions(+), 49 deletions(-) diff --git a/proto/value_unmarshal.go b/proto/value_unmarshal.go index d384aed0..e2284bc2 100755 --- a/proto/value_unmarshal.go +++ b/proto/value_unmarshal.go @@ -224,31 +224,22 @@ func UnmarshalValue(b []byte, arch byte, baseType basetype.BaseType, profileType } return SliceString(vals), nil } - b = trimRightZero(b) return String(utf8String(b)), nil } return Value{}, fmt.Errorf("type %s(%d) is not supported: %w", baseType, baseType, ErrTypeNotSupported) } -// trimRightZero returns a subslice of b up to the null-terminated -// string ('\x00') and discard the remaining bytes, as these are likely -// padding bytes used to meet the desired length. -func trimRightZero(b []byte) []byte { - for i := range b { - if b[i] == 0 { - return b[:i] - } - } - return b -} - // utf8String converts b into a valid UTF-8 string. If it encounters -// utf8.RuneError character, it will discard that character. +// utf8.RuneError character it will discard it. It stops when all bytes +// have been successfully decoded or reach a null-terminated string '\x00'. func utf8String(b []byte) string { buf := make([]byte, 0, 255) for len(b) > 0 { r, size := utf8.DecodeRune(b) + if r == 0 { + break + } if r != utf8.RuneError { buf = utf8.AppendRune(buf, r) } diff --git a/proto/value_unmarshal_internal_test.go b/proto/value_unmarshal_internal_test.go index 680d2601..ccd2cf18 100644 --- a/proto/value_unmarshal_internal_test.go +++ b/proto/value_unmarshal_internal_test.go @@ -9,47 +9,18 @@ import ( "testing" ) -func TestTrimRightZero(t *testing.T) { - tt := []struct { - str string - expected string - }{ - {str: "", expected: ""}, - {str: "\x00", expected: ""}, - {str: "Open Water", expected: "Open Water"}, - {str: "Open Water\x00", expected: "Open Water"}, - {str: "Open Water\x00\x00", expected: "Open Water"}, - {str: "Walk or jog lightly.\x00��", expected: "Walk or jog lightly."}, - {str: "Walk or jog lightly.��", expected: "Walk or jog lightly.��"}, - } - - for _, tc := range tt { - t.Run(tc.str, func(t *testing.T) { - res := trimRightZero([]byte(tc.str)) - if string(res) != tc.expected { - t.Fatalf("expected: %s, got: %s", tc.expected, res) - } - }) - } -} - -func BenchmarkTrimRightZero(b *testing.B) { - for i := 0; i < b.N; i++ { - _ = trimRightZero([]byte("")) - _ = trimRightZero([]byte("\x00")) - _ = trimRightZero([]byte("Open Water")) - _ = trimRightZero([]byte("Open Water\x00")) - _ = trimRightZero([]byte("Open Water\x00\x00")) - _ = trimRightZero([]byte("Walk or jog lightly.\x00��")) - } -} - func TestUTF8String(t *testing.T) { tt := []struct { in []byte out string }{ + {in: []byte(""), out: ""}, + {in: []byte("\x00"), out: ""}, + {in: []byte("Open Water"), out: "Open Water"}, + {in: []byte("Open Water\x00"), out: "Open Water"}, + {in: []byte("Open Water\x00\x00"), out: "Open Water"}, {in: []byte("Walk or jog lightly.��"), out: "Walk or jog lightly."}, + {in: []byte("Walk or jog lightly.\x00��"), out: "Walk or jog lightly."}, {in: []byte("0000000000000�0000000"), out: "00000000000000000000"}, {in: []byte("0000000000000\xe80000000"), out: "00000000000000000000"}, }