From c5e5dc90bb7ebc744c353f3988fa81743160fce1 Mon Sep 17 00:00:00 2001 From: Wang Lei Date: Thu, 17 May 2018 11:01:09 +0800 Subject: [PATCH 1/3] ReadFileInto skips a single leading UTF8 BOM sequence if it exists. --- read.go | 12 ++++++++++++ read_test.go | 11 +++++++++++ testdata/notepad.ini | 3 +++ 3 files changed, 26 insertions(+) create mode 100644 testdata/notepad.ini diff --git a/read.go b/read.go index 5b840a9..285fdbe 100644 --- a/read.go +++ b/read.go @@ -221,6 +221,9 @@ func ReadStringInto(config interface{}, str string) error { // ReadFileInto reads gcfg formatted data from the file filename and sets the // values into the corresponding fields in config. +// +// For compatibility with files created on Windows, the ReadFileInto skips a +// single leading UTF8 BOM sequence if it exists. func ReadFileInto(config interface{}, filename string) error { f, err := os.Open(filename) if err != nil { @@ -231,6 +234,15 @@ func ReadFileInto(config interface{}, filename string) error { if err != nil { return err } + + //Skips a single leading UTF8 BOM sequence if it exists. + if len(src) > 3 { + bom := src[:3] + if bom[0] == 0xEF && bom[1] == 0xBB && bom[2] == 0xBF { + src = src[3:] + } + } + fset := token.NewFileSet() file := fset.AddFile(filename, fset.Base(), len(src)) return readInto(config, fset, file, src) diff --git a/read_test.go b/read_test.go index 6cfeaa1..bd8f63d 100644 --- a/read_test.go +++ b/read_test.go @@ -339,6 +339,17 @@ func TestReadFileIntoUnicode(t *testing.T) { } } +func TestReadFileIntoNotepad(t *testing.T) { + res := &struct{ X甲 struct{ X乙 string } }{} + err := ReadFileInto(res, "testdata/notepad.ini") + if err != nil { + t.Error(err) + } + if "丁" != res.X甲.X乙 { + t.Errorf("got %q, wanted %q", res.X甲.X乙, "丁") + } +} + func TestReadStringIntoSubsectDefaults(t *testing.T) { type subsect struct { Color string diff --git a/testdata/notepad.ini b/testdata/notepad.ini new file mode 100644 index 0000000..08673b8 --- /dev/null +++ b/testdata/notepad.ini @@ -0,0 +1,3 @@ +; Comment line +[甲] +乙=丁 # Update 乙 to 丁 by notepad on windows From 2ca7dc409c0dfa69b082ad313ad1c4574fc447cc Mon Sep 17 00:00:00 2001 From: Wang Lei Date: Thu, 17 May 2018 17:22:00 +0800 Subject: [PATCH 2/3] 1. > to >= 2. Make helper function, say skipLeadingUtf8Bom([]byte) []byte 3. Add table-driven unit tests for skipLeadingUtf8Bom --- read.go | 21 ++++++++++++++------- read_test.go | 26 ++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/read.go b/read.go index 285fdbe..ce03b89 100644 --- a/read.go +++ b/read.go @@ -235,15 +235,22 @@ func ReadFileInto(config interface{}, filename string) error { return err } - //Skips a single leading UTF8 BOM sequence if it exists. - if len(src) > 3 { - bom := src[:3] - if bom[0] == 0xEF && bom[1] == 0xBB && bom[2] == 0xBF { - src = src[3:] - } - } + // Skips a single leading UTF8 BOM sequence if it exists. + src = skipLeadingUtf8Bom(src) fset := token.NewFileSet() file := fset.AddFile(filename, fset.Base(), len(src)) return readInto(config, fset, file, src) } + +func skipLeadingUtf8Bom(src []byte) []byte{ + if len(src) >= 3 { + bom := src[:3] + if bom[0] == 0xEF && bom[1] == 0xBB && bom[2] == 0xBF { + return src[3:] + } + } + return src +} + + diff --git a/read_test.go b/read_test.go index bd8f63d..779c752 100644 --- a/read_test.go +++ b/read_test.go @@ -7,6 +7,7 @@ import ( "os" "reflect" "testing" + "bytes" ) const ( @@ -415,3 +416,28 @@ func TestPanics(t *testing.T) { testPanic(t, tt.id, tt.config, tt.gcfg) } } + +var utf8bomtests = []struct{ + id string + in []byte + out []byte +}{ + {"0 bytes input",[]byte{}, []byte{}}, + {"3 bytes input (BOM only)",[]byte{0xEF,0xBB,0xBF}, []byte{}}, + {"3 bytes input (comment only, without BOM)",[]byte(";c\n"), []byte(";c\n")}, + {"normal input with BOM",[]byte("\xEF\xBB\xBF[section]\nname=value"), []byte("[section]\nname=value")}, + {"normal input without BOM",[]byte("[section]\nname=value"), []byte("[section]\nname=value")}, +} + +func testUtf8Bom(t *testing.T, id string, in, out []byte){ + got := skipLeadingUtf8Bom([]byte(in)) + if !bytes.Equal(got,out) { + t.Errorf("%s.", id) + } +} + +func TestUtf8Boms(t *testing.T) { + for _, tt := range utf8bomtests { + testUtf8Bom(t, tt.id, tt.in, tt.out) + } +} \ No newline at end of file From 99a6611d695d8e3106158b7fb7fbbe2e13c4377d Mon Sep 17 00:00:00 2001 From: Wang Lei Date: Fri, 18 May 2018 02:26:40 +0800 Subject: [PATCH 3/3] Make the code look more beautiful. --- read.go | 15 ++++++++------- read_test.go | 22 +++++++++++----------- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/read.go b/read.go index ce03b89..0679665 100644 --- a/read.go +++ b/read.go @@ -1,6 +1,7 @@ package gcfg import ( + "bytes" "fmt" "io" "io/ioutil" @@ -13,6 +14,7 @@ import ( ) var unescape = map[rune]rune{'\\': '\\', '"': '"', 'n': '\n', 't': '\t'} +var utf8Bom = []byte("\ufeff") // no error: invalid literals should be caught by scanner func unquote(s string) string { @@ -243,14 +245,13 @@ func ReadFileInto(config interface{}, filename string) error { return readInto(config, fset, file, src) } -func skipLeadingUtf8Bom(src []byte) []byte{ - if len(src) >= 3 { - bom := src[:3] - if bom[0] == 0xEF && bom[1] == 0xBB && bom[2] == 0xBF { - return src[3:] +func skipLeadingUtf8Bom(src []byte) []byte { + lengthUtf8Bom := len(utf8Bom) + + if len(src) >= lengthUtf8Bom { + if bytes.Equal(src[:lengthUtf8Bom], utf8Bom) { + return src[lengthUtf8Bom:] } } return src } - - diff --git a/read_test.go b/read_test.go index 779c752..6d3eecc 100644 --- a/read_test.go +++ b/read_test.go @@ -1,13 +1,13 @@ package gcfg import ( + "bytes" "encoding" "fmt" "math/big" "os" "reflect" "testing" - "bytes" ) const ( @@ -417,21 +417,21 @@ func TestPanics(t *testing.T) { } } -var utf8bomtests = []struct{ - id string +var utf8bomtests = []struct { + id string in []byte out []byte }{ - {"0 bytes input",[]byte{}, []byte{}}, - {"3 bytes input (BOM only)",[]byte{0xEF,0xBB,0xBF}, []byte{}}, - {"3 bytes input (comment only, without BOM)",[]byte(";c\n"), []byte(";c\n")}, - {"normal input with BOM",[]byte("\xEF\xBB\xBF[section]\nname=value"), []byte("[section]\nname=value")}, - {"normal input without BOM",[]byte("[section]\nname=value"), []byte("[section]\nname=value")}, + {"0 bytes input", []byte{}, []byte{}}, + {"3 bytes input (BOM only)", []byte("\ufeff"), []byte{}}, + {"3 bytes input (comment only, without BOM)", []byte(";c\n"), []byte(";c\n")}, + {"normal input with BOM", []byte("\ufeff[section]\nname=value"), []byte("[section]\nname=value")}, + {"normal input without BOM", []byte("[section]\nname=value"), []byte("[section]\nname=value")}, } -func testUtf8Bom(t *testing.T, id string, in, out []byte){ +func testUtf8Bom(t *testing.T, id string, in, out []byte) { got := skipLeadingUtf8Bom([]byte(in)) - if !bytes.Equal(got,out) { + if !bytes.Equal(got, out) { t.Errorf("%s.", id) } } @@ -440,4 +440,4 @@ func TestUtf8Boms(t *testing.T) { for _, tt := range utf8bomtests { testUtf8Bom(t, tt.id, tt.in, tt.out) } -} \ No newline at end of file +}