Skip to content

Commit

Permalink
Fix substring and count unicode handling
Browse files Browse the repository at this point in the history
This is done by working with the runes directly, both when counting length of strings as well as when slicing them to substrings.

Fixes #2799

Signed-off-by: Anders Eknert <anders.eknert@bisnode.com>
  • Loading branch information
anderseknert authored and patrick-east committed Oct 23, 2020
1 parent e8c5e12 commit c84109a
Show file tree
Hide file tree
Showing 9 changed files with 96 additions and 6 deletions.
23 changes: 23 additions & 0 deletions test/cases/testdata/aggregates/test-aggregates-0027.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
cases:
- modules:
- |
package generated
p = x {
count("abcde", x)
}
note: aggregates/count string
query: data.generated.p = x
want_result:
- x: 5
- modules:
- |
package generated
p = x {
count("åäö", x)
}
note: aggregates/count string
query: data.generated.p = x
want_result:
- x: 3
11 changes: 11 additions & 0 deletions test/cases/testdata/strings/test-strings-0889.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,14 @@ cases:
query: data.generated.p = x
want_result:
- x: cde
- modules:
- |
package generated
p = x {
substring("åäö", 0, 2, x)
}
note: 'strings/substring: unicode'
query: data.generated.p = x
want_result:
- x: åä
11 changes: 11 additions & 0 deletions test/cases/testdata/strings/test-strings-0890.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,14 @@ cases:
query: data.generated.p = x
want_result:
- x: cdefgh
- modules:
- |
package generated
p = x {
substring("aåäö", 2, -1, x)
}
note: 'strings/substring: remainder unicode'
query: data.generated.p = x
want_result:
- x: äö
11 changes: 11 additions & 0 deletions test/cases/testdata/strings/test-strings-0891.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,14 @@ cases:
query: data.generated.p = x
want_result:
- x: cdefgh
- modules:
- |
package generated
p = x {
substring("abcdefghåäö", 2, 10000, x)
}
note: 'strings/substring: too long unicode'
query: data.generated.p = x
want_result:
- x: cdefghåäö
11 changes: 11 additions & 0 deletions test/cases/testdata/strings/test-strings-0892.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,14 @@ cases:
query: data.generated.p = x
want_error: negative offset
want_error_code: eval_builtin_error
- modules:
- |
package generated
p = x {
substring("åäö", -1, -1, x)
}
note: 'strings/substring: offset negative unicode'
query: data.generated.p = x
want_error: negative offset
want_error_code: eval_builtin_error
11 changes: 11 additions & 0 deletions test/cases/testdata/strings/test-strings-0893.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,14 @@ cases:
query: data.generated.p = x
want_result:
- x: ""
- modules:
- |
package generated
p = x {
substring("åäö", 3, -1, x)
}
note: 'strings/substring: offset too long unicode'
query: data.generated.p = x
want_result:
- x: ""
11 changes: 11 additions & 0 deletions test/cases/testdata/strings/test-strings-0894.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,14 @@ cases:
query: data.generated.p = x
want_result:
- x: ""
- modules:
- |
package generated
p = x {
substring("åäö", 4, -1, x)
}
note: 'strings/substring: offset too long 2 unicode'
query: data.generated.p = x
want_result:
- x: ""
2 changes: 1 addition & 1 deletion topdown/aggregates.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ func builtinCount(a ast.Value) (ast.Value, error) {
case ast.Set:
return ast.IntNumberTerm(a.Len()).Value, nil
case ast.String:
return ast.IntNumberTerm(len(a)).Value, nil
return ast.IntNumberTerm(len([]rune(a))).Value, nil
}
return nil, builtins.NewOperandTypeErr(1, a, "array", "object", "set")
}
Expand Down
11 changes: 6 additions & 5 deletions topdown/strings.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,11 +107,12 @@ func builtinSubstring(a, b, c ast.Value) (ast.Value, error) {
if err != nil {
return nil, err
}
runes := []rune(base)

startIndex, err := builtins.IntOperand(b, 2)
if err != nil {
return nil, err
} else if startIndex >= len(base) {
} else if startIndex >= len(runes) {
return ast.String(""), nil
} else if startIndex < 0 {
return nil, fmt.Errorf("negative offset")
Expand All @@ -124,13 +125,13 @@ func builtinSubstring(a, b, c ast.Value) (ast.Value, error) {

var s ast.String
if length < 0 {
s = ast.String(base[startIndex:])
s = ast.String(runes[startIndex:])
} else {
upto := startIndex + length
if len(base) < upto {
upto = len(base)
if len(runes) < upto {
upto = len(runes)
}
s = ast.String(base[startIndex:upto])
s = ast.String(runes[startIndex:upto])
}

return s, nil
Expand Down

0 comments on commit c84109a

Please sign in to comment.