Skip to content

Commit

Permalink
Port stale simple_keys fix to v2 (#543)
Browse files Browse the repository at this point in the history
This should simplify the logic and significantly improve
performance in edge cases as found and reported on #537
by CJ Cullen.
  • Loading branch information
cjcullen authored and niemeyer committed Nov 19, 2019
1 parent 770b8da commit 36babc3
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 34 deletions.
12 changes: 12 additions & 0 deletions decode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -722,6 +722,18 @@ var unmarshalTests = []struct {
"a: 5.5\n",
&struct{ A jsonNumberT }{"5.5"},
},
{
`
a:
b
b:
? a
: a`,
&M{"a": "b",
"b": M{
"a": "a",
}},
},
}

type M map[interface{}]interface{}
Expand Down
10 changes: 10 additions & 0 deletions limit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ var limitTests = []struct {
{name: "10kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 10*1024/4-1) + `]`)},
{name: "100kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 100*1024/4-1) + `]`)},
{name: "1000kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 1000*1024/4-1) + `]`)},
{name: "1000kb slice nested at max-depth", data: []byte(strings.Repeat(`[`, 10000) + `1` + strings.Repeat(`,1`, 1000*1024/2-20000-1) + strings.Repeat(`]`, 10000))},
{name: "1000kb slice nested in maps at max-depth", data: []byte("{a,b:\n" + strings.Repeat(" {a,b:", 10000-2) + ` [1` + strings.Repeat(",1", 1000*1024/2-6*10000-1) + `]` + strings.Repeat(`}`, 10000-1))},
}

func (s *S) TestLimits(c *C) {
Expand Down Expand Up @@ -82,6 +84,14 @@ func Benchmark1000KBMaps(b *testing.B) {
benchmark(b, "1000kb of maps")
}

func BenchmarkDeepSlice(b *testing.B) {
benchmark(b, "1000kb slice nested at max-depth")
}

func BenchmarkDeepFlow(b *testing.B) {
benchmark(b, "1000kb slice nested in maps at max-depth")
}

func benchmark(b *testing.B, name string) {
for _, t := range limitTests {
if t.name != name {
Expand Down
69 changes: 35 additions & 34 deletions scannerc.go
Original file line number Diff line number Diff line change
Expand Up @@ -634,13 +634,12 @@ func yaml_parser_fetch_more_tokens(parser *yaml_parser_t) bool {
need_more_tokens = true
} else {
// Check if any potential simple key may occupy the head position.
if !yaml_parser_stale_simple_keys(parser) {
return false
}

for i := range parser.simple_keys {
for i := len(parser.simple_keys) - 1; i >= 0; i-- {
simple_key := &parser.simple_keys[i]
if simple_key.possible && simple_key.token_number == parser.tokens_parsed {
if simple_key.token_number < parser.tokens_parsed {
break
}
if yaml_simple_key_is_valid(parser, simple_key) && simple_key.token_number == parser.tokens_parsed {
need_more_tokens = true
break
}
Expand Down Expand Up @@ -678,11 +677,6 @@ func yaml_parser_fetch_next_token(parser *yaml_parser_t) bool {
return false
}

// Remove obsolete potential simple keys.
if !yaml_parser_stale_simple_keys(parser) {
return false
}

// Check the indentation level against the current column.
if !yaml_parser_unroll_indent(parser, parser.mark.column) {
return false
Expand Down Expand Up @@ -837,27 +831,28 @@ func yaml_parser_fetch_next_token(parser *yaml_parser_t) bool {
"found character that cannot start any token")
}

// Check the list of potential simple keys and remove the positions that
// cannot contain simple keys anymore.
func yaml_parser_stale_simple_keys(parser *yaml_parser_t) bool {
// Check for a potential simple key for each flow level.
for i := range parser.simple_keys {
simple_key := &parser.simple_keys[i]

// The specification requires that a simple key
//
// - is limited to a single line,
// - is shorter than 1024 characters.
if simple_key.possible && (simple_key.mark.line < parser.mark.line || simple_key.mark.index+1024 < parser.mark.index) {

// Check if the potential simple key to be removed is required.
if simple_key.required {
return yaml_parser_set_scanner_error(parser,
"while scanning a simple key", simple_key.mark,
"could not find expected ':'")
}
simple_key.possible = false
func yaml_simple_key_is_valid(parser *yaml_parser_t, simple_key *yaml_simple_key_t) bool {
if !simple_key.possible {
return false
}

// The 1.2 specification says:
//
// "If the ? indicator is omitted, parsing needs to see past the
// implicit key to recognize it as such. To limit the amount of
// lookahead required, the “:” indicator must appear at most 1024
// Unicode characters beyond the start of the key. In addition, the key
// is restricted to a single line."
//
if simple_key.mark.line < parser.mark.line || simple_key.mark.index+1024 < parser.mark.index {
// Check if the potential simple key to be removed is required.
if simple_key.required {
return yaml_parser_set_scanner_error(parser,
"while scanning a simple key", simple_key.mark,
"could not find expected ':'")
}
simple_key.possible = false
return false
}
return true
}
Expand All @@ -879,8 +874,8 @@ func yaml_parser_save_simple_key(parser *yaml_parser_t) bool {
possible: true,
required: required,
token_number: parser.tokens_parsed + (len(parser.tokens) - parser.tokens_head),
mark: parser.mark,
}
simple_key.mark = parser.mark

if !yaml_parser_remove_simple_key(parser) {
return false
Expand Down Expand Up @@ -912,7 +907,12 @@ const max_flow_level = 10000
// Increase the flow level and resize the simple key list if needed.
func yaml_parser_increase_flow_level(parser *yaml_parser_t) bool {
// Reset the simple key on the next level.
parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{})
parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{
possible: false,
required: false,
token_number: parser.tokens_parsed + (len(parser.tokens) - parser.tokens_head),
mark: parser.mark,
})

// Increase the flow level.
parser.flow_level++
Expand Down Expand Up @@ -1286,7 +1286,8 @@ func yaml_parser_fetch_value(parser *yaml_parser_t) bool {
simple_key := &parser.simple_keys[len(parser.simple_keys)-1]

// Have we found a simple key?
if simple_key.possible {
if yaml_simple_key_is_valid(parser, simple_key) {

// Create the KEY token and insert it into the queue.
token := yaml_token_t{
typ: yaml_KEY_TOKEN,
Expand Down

0 comments on commit 36babc3

Please sign in to comment.