Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix parser #557

Merged
merged 10 commits into from
Nov 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion decode.go
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,14 @@ func (d *Decoder) nodeToValue(node ast.Node) (any, error) {
}
return nil, errors.ErrSyntax(fmt.Sprintf("cannot convert %q to boolean", fmt.Sprint(v)), n.Value.GetToken())
case token.StringTag:
return d.nodeToValue(n.Value)
v, err := d.nodeToValue(n.Value)
if err != nil {
return nil, err
}
if v == nil {
return "", nil
}
return fmt.Sprint(v), nil
case token.MappingTag:
return d.nodeToValue(n.Value)
default:
Expand Down
110 changes: 95 additions & 15 deletions parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -442,11 +442,18 @@ func (p *parser) parseMap(ctx *context) (*ast.MappingNode, error) {
tk = ctx.currentToken()
}
for tk.Column() == keyTk.Column() {
typ := tk.Type()
if ctx.isFlow && typ == token.SequenceEndType {
// [
// key: value
// ] <=
break
}
if !p.isMapToken(tk) {
return nil, errors.ErrSyntax("non-map value is specified", tk.RawToken())
}
cm := p.parseHeadComment(ctx)
if tk.Type() == token.MappingEndType {
if typ == token.MappingEndType {
// a: {
// b: c
// } <=
Expand Down Expand Up @@ -644,6 +651,15 @@ func (p *parser) parseMapValue(ctx *context, key ast.MapKeyNode, colonTk *Token)
keyCol := key.GetToken().Position.Column
keyLine := key.GetToken().Position.Line

if tk.Column() != keyCol && tk.Line() == keyLine && (tk.GroupType() == TokenGroupMapKey || tk.GroupType() == TokenGroupMapKeyValue) {
// a: b:
// ^
//
// a: b: c
// ^
return nil, errors.ErrSyntax("mapping value is not allowed in this context", tk.RawToken())
}

if tk.Column() == keyCol && p.isMapToken(tk) {
// in this case,
// ----
Expand Down Expand Up @@ -673,9 +689,6 @@ func (p *parser) parseMapValue(ctx *context, key ast.MapKeyNode, colonTk *Token)
if tk.Column() <= keyCol && tk.GroupType() == TokenGroupAnchorName {
// key: <value does not defined>
// &anchor
//
// key: <value does not defined>
// &anchor
return nil, errors.ErrSyntax("anchor is not allowed in this context", tk.RawToken())
}

Expand Down Expand Up @@ -932,17 +945,7 @@ func (p *parser) parseSequence(ctx *context) (*ast.SequenceNode, error) {
comment := p.parseHeadComment(ctx)
ctx.goNext() // skip sequence entry token

valueTk := ctx.currentToken()
if valueTk == nil {
node, err := newNullNode(ctx, ctx.createNullToken(seqTk))
if err != nil {
return nil, err
}
seqNode.Values = append(seqNode.Values, node)
break
}

value, err := p.parseToken(ctx.withIndex(uint(len(seqNode.Values))), valueTk)
value, err := p.parseSequenceValue(ctx.withIndex(uint(len(seqNode.Values))), seqTk)
if err != nil {
return nil, err
}
Expand All @@ -968,6 +971,83 @@ func (p *parser) parseSequence(ctx *context) (*ast.SequenceNode, error) {
return seqNode, nil
}

func (p *parser) parseSequenceValue(ctx *context, seqTk *Token) (ast.Node, error) {
tk := ctx.currentToken()
if tk == nil {
return newNullNode(ctx, ctx.insertNullToken(seqTk))
}

if ctx.isComment() {
tk = ctx.nextNotCommentToken()
}
seqCol := seqTk.Column()
seqLine := seqTk.Line()

if tk.Column() == seqCol && tk.Type() == token.SequenceEntryType {
// in this case,
// ----
// - <value does not defined>
// -
return newNullNode(ctx, ctx.insertNullToken(seqTk))
}

if tk.Line() == seqLine && tk.GroupType() == TokenGroupAnchorName &&
ctx.nextToken().Column() == seqCol && ctx.nextToken().Type() == token.SequenceEntryType {
// in this case,
// ----
// - &anchor
// -
group := &TokenGroup{
Type: TokenGroupAnchor,
Tokens: []*Token{tk, ctx.createNullToken(tk)},
}
anchor, err := p.parseAnchor(ctx.withGroup(group), group)
if err != nil {
return nil, err
}
ctx.goNext()
return anchor, nil
}

if tk.Column() <= seqCol && tk.GroupType() == TokenGroupAnchorName {
// - <value does not defined>
// &anchor
return nil, errors.ErrSyntax("anchor is not allowed in this sequence context", tk.RawToken())
}

if tk.Column() < seqCol {
// in this case,
// ----
// - <value does not defined>
// next
return newNullNode(ctx, ctx.insertNullToken(seqTk))
}

if tk.Line() == seqLine && tk.GroupType() == TokenGroupAnchorName &&
ctx.nextToken().Column() < seqCol {
// in this case,
// ----
// - &anchor
// next
group := &TokenGroup{
Type: TokenGroupAnchor,
Tokens: []*Token{tk, ctx.createNullToken(tk)},
}
anchor, err := p.parseAnchor(ctx.withGroup(group), group)
if err != nil {
return nil, err
}
ctx.goNext()
return anchor, nil
}

value, err := p.parseToken(ctx, ctx.currentToken())
if err != nil {
return nil, err
}
return value, nil
}

func (p *parser) parseDirective(ctx *context, g *TokenGroup) (*ast.DirectiveNode, error) {
node, err := newDirectiveNode(ctx, g.First())
if err != nil {
Expand Down
4 changes: 4 additions & 0 deletions parser/token.go
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,10 @@ func createDocumentTokens(tokens []*Token) ([]*Token, error) {
}

func isScalarType(tk *Token) bool {
switch tk.GroupType() {
case TokenGroupMapKey, TokenGroupMapKeyValue:
return false
}
typ := tk.Type()
return typ == token.AnchorType ||
typ == token.AliasType ||
Expand Down
15 changes: 15 additions & 0 deletions scanner/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -352,10 +352,25 @@ func (c *Context) bufferedToken(pos *token.Position) *token.Token {
} else {
tk = token.New(string(source), string(c.obuf), pos)
}
c.setTokenTypeByPrevTag(tk)
c.resetBuffer()
return tk
}

func (c *Context) setTokenTypeByPrevTag(tk *token.Token) {
lastTk := c.lastToken()
if lastTk == nil {
return
}
if lastTk.Type != token.TagType {
return
}
tag := token.ReservedTagKeyword(lastTk.Value)
if _, exists := token.ReservedTagKeywordMap[tag]; !exists {
tk.Type = token.StringType
}
}

func (c *Context) lastToken() *token.Token {
if len(c.tokens) != 0 {
return c.tokens[len(c.tokens)-1]
Expand Down
33 changes: 26 additions & 7 deletions scanner/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -515,8 +515,17 @@ func (s *Scanner) scanDoubleQuote(ctx *Context) (*token.Token, error) {
}

func (s *Scanner) validateDocumentSeparatorMarker(ctx *Context, src []rune) error {
if s.foundDocumentSeparatorMarker(src) {
return ErrInvalidToken(
token.Invalid("found unexpected document separator", string(ctx.obuf), s.pos()),
)
}
return nil
}

func (s *Scanner) foundDocumentSeparatorMarker(src []rune) bool {
if len(src) < 3 {
return nil
return false
}
var marker string
if len(src) == 3 {
Expand All @@ -526,12 +535,7 @@ func (s *Scanner) validateDocumentSeparatorMarker(ctx *Context, src []rune) erro
return r == ' ' || r == '\t' || r == '\n' || r == '\r'
})
}
if marker == "---" || marker == "..." {
return ErrInvalidToken(
token.Invalid("found unexpected document separator", string(ctx.obuf), s.pos()),
)
}
return nil
return marker == "---" || marker == "..."
}

func (s *Scanner) scanQuote(ctx *Context, ch rune) (bool, error) {
Expand Down Expand Up @@ -701,6 +705,14 @@ func (s *Scanner) scanDocument(ctx *Context, c rune) error {
ctx.addBuf(c)
ctx.updateDocumentNewLineState()
s.progressLine(ctx)
if ctx.next() {
if s.foundDocumentSeparatorMarker(ctx.src[ctx.idx:]) {
value := ctx.bufferedSrc()
ctx.addToken(token.String(string(value), string(ctx.obuf), s.pos()))
ctx.clear()
s.breakDocument(ctx)
}
}
} else if s.isFirstCharAtLine && c == ' ' {
ctx.addDocumentIndent(s.column)
s.progressColumn(ctx, 1)
Expand Down Expand Up @@ -1319,6 +1331,13 @@ func (s *Scanner) scan(ctx *Context) error {
return err
}
case '\t':
if ctx.existsBuffer() && s.lastDelimColumn == 0 {
// tab indent for plain text (yaml-test-suite's spec-example-7-12-plain-lines).
s.indentNum++
ctx.addOriginBuf(c)
s.progressColumn(ctx, 1)
continue
}
if err := s.scanTab(ctx, c); err != nil {
return err
}
Expand Down
12 changes: 1 addition & 11 deletions yaml_test_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@ var failureTestNames = []string{
"anchors-on-empty-scalars", // no json.
"aliases-in-flow-objects", // no json.
"aliases-in-explicit-block-mapping", // no json.
"aliases-in-implicit-block-mapping",
"bare-document-after-document-end-marker",
"block-mapping-with-missing-keys", // no json.
"block-mapping-with-missing-keys", // no json.
"block-mapping-with-missing-values",
"block-mapping-with-multiline-scalars",
"block-scalar-with-more-spaces-than-first-content-line",
Expand Down Expand Up @@ -86,22 +84,14 @@ var failureTestNames = []string{
"spec-example-8-19-compact-block-mappings", // no json.
"spec-example-6-19-secondary-tag-handle",
"spec-example-6-24-verbatim-tags",
"spec-example-6-28-non-specific-tags",
"spec-example-6-4-line-prefixes",
"spec-example-6-6-line-folding",
"spec-example-6-6-line-folding-1-3",
"spec-example-6-8-flow-folding",
"spec-example-7-12-plain-lines",
"spec-example-7-19-single-pair-flow-mappings",
"spec-example-7-20-single-pair-explicit-entry",
"spec-example-7-24-flow-nodes",
"spec-example-8-10-folded-lines-8-13-final-empty-lines",
"spec-example-8-15-block-sequence-entry-types",
"spec-example-8-17-explicit-block-mapping-entries",
"spec-example-8-2-block-indentation-indicator",
"spec-example-9-3-bare-documents",
"spec-example-9-4-explicit-documents",
"spec-example-9-5-directives-documents",
"spec-example-9-6-stream",
"spec-example-9-6-stream-1-3",
"syntax-character-edge-cases/00", // no json.
Expand Down