Skip to content

Commit

Permalink
Merge pull request #89 from yoheimuta/support-utf8-bom
Browse files Browse the repository at this point in the history
Support UTF-8-BOM files
  • Loading branch information
yoheimuta authored Jun 26, 2024
2 parents bd0bfef + d117c9d commit 1e1fd66
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 16 deletions.
1 change: 1 addition & 0 deletions _testdata/bom.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
syntax = "proto3";
34 changes: 18 additions & 16 deletions lexer/scanner/token.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ const (
TCOMMA // ,
TDOT // .
TMINUS // -
TBOM // Byte Order Mark

// Keywords
TSYNTAX
Expand Down Expand Up @@ -64,22 +65,23 @@ const (

func asMiscToken(ch rune) Token {
m := map[rune]Token{
';': TSEMICOLON,
':': TCOLON,
'=': TEQUALS,
'"': TQUOTE,
'\'': TQUOTE,
'(': TLEFTPAREN,
')': TRIGHTPAREN,
'{': TLEFTCURLY,
'}': TRIGHTCURLY,
'[': TLEFTSQUARE,
']': TRIGHTSQUARE,
'<': TLESS,
'>': TGREATER,
',': TCOMMA,
'.': TDOT,
'-': TMINUS,
';': TSEMICOLON,
':': TCOLON,
'=': TEQUALS,
'"': TQUOTE,
'\'': TQUOTE,
'(': TLEFTPAREN,
')': TRIGHTPAREN,
'{': TLEFTCURLY,
'}': TRIGHTCURLY,
'[': TLEFTSQUARE,
']': TRIGHTSQUARE,
'<': TLESS,
'>': TGREATER,
',': TCOMMA,
'.': TDOT,
'-': TMINUS,
'\uFEFF': TBOM,
}
if t, ok := m[ch]; ok {
return t
Expand Down
11 changes: 11 additions & 0 deletions parser/proto.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ func (p *Proto) Accept(v Visitor) {
//
// See https://developers.google.com/protocol-buffers/docs/reference/proto3-spec#proto_file
func (p *Parser) ParseProto() (*Proto, error) {
p.parseBOM()

syntaxComments := p.ParseComments()
syntax, err := p.ParseSyntax()
if err != nil {
Expand All @@ -55,6 +57,15 @@ func (p *Parser) ParseProto() (*Proto, error) {
}, nil
}

// See https://protobuf.com/docs/language-spec#source-code-representation
func (p *Parser) parseBOM() {
p.lex.Next()
if p.lex.Token == scanner.TBOM {
return
}
defer p.lex.UnNext()
}

// protoBody = { import | package | option | topLevelDef | emptyStatement }
// topLevelDef = message | enum | service | extend
// See https://developers.google.com/protocol-buffers/docs/reference/proto3-spec#proto_file
Expand Down
27 changes: 27 additions & 0 deletions parser/proto_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2242,6 +2242,33 @@ message foo {
},
},
},
{
name: "parsing a UTF-8-BOM file",
input: string([]byte{
0xEF, 0xBB, 0xBF,
}) + `
syntax = "proto3";
`,
wantProto: &parser.Proto{
Syntax: &parser.Syntax{
ProtobufVersion: "proto3",
ProtobufVersionQuote: `"proto3"`,
Meta: meta.Meta{
Pos: meta.Position{
Offset: 4,
Line: 2,
Column: 1,
},
LastPos: meta.Position{
Offset: 21,
Line: 2,
Column: 18,
},
},
},
Meta: &parser.ProtoMeta{},
},
},
}

for _, test := range tests {
Expand Down

0 comments on commit 1e1fd66

Please sign in to comment.