From 8cbfbb2a769529c408e642d8b6403c5b420a8ac6 Mon Sep 17 00:00:00 2001 From: "Zhang, Eric" <59079351+ericzhang6222@users.noreply.github.com> Date: Mon, 25 May 2020 15:20:34 +0800 Subject: [PATCH] #234 Added new func to std_seq.go. (#240) 1. Updated `seq` library to add functions that are used for manipulating sequenced data structures including string and array. These functions are: `contains`, `has_prefix`, `has_suffix`, `sub`, `split` and `join`. 2. Changed some APIs signature, like changed `//seq.sub(subject, old, new)` to `//seq.sub(old, new, subject)`. It can help to support following code: ``` let rmspace = //seq.sub(" ", ""); rmspace("hello world"), rmspace("a b c")] ``` --- docs/README.md | 2 +- docs/std-seq.md | 86 +++++++++++++- docs/std-str.md | 69 ------------ examples/grpc/grpc-proto.arrai | 12 +- examples/grpc/grpc.arrai | 2 +- examples/grpc/proto-util.arrai | 14 +-- internal/shell/shell_cmd_test.go | 2 +- internal/shell/shell_test.go | 12 +- rel/value_set_array.go | 2 +- syntax/std_seq.go | 173 +++++++++++++++++++++++++++- syntax/std_seq_array_helper.go | 187 +++++++++++++++++++++++++++++++ syntax/std_seq_bytes_helper.go | 38 +++++++ syntax/std_seq_contains_test.go | 73 ++++++++++++ syntax/std_seq_join_test.go | 81 +++++++++++++ syntax/std_seq_prefix_test.go | 67 +++++++++++ syntax/std_seq_split_test.go | 98 ++++++++++++++++ syntax/std_seq_sub_test.go | 75 +++++++++++++ syntax/std_seq_suffix_test.go | 60 ++++++++++ syntax/std_seq_test.go | 4 +- syntax/std_str.go | 36 ------ syntax/std_str_test.go | 42 ------- syntax/test_helpers.go | 4 + 22 files changed, 962 insertions(+), 177 deletions(-) create mode 100644 syntax/std_seq_array_helper.go create mode 100644 syntax/std_seq_bytes_helper.go create mode 100644 syntax/std_seq_contains_test.go create mode 100644 syntax/std_seq_join_test.go create mode 100644 syntax/std_seq_prefix_test.go create mode 100644 syntax/std_seq_split_test.go create mode 100644 syntax/std_seq_sub_test.go create mode 100644 syntax/std_seq_suffix_test.go diff --git a/docs/README.md b/docs/README.md index e018b045..de70a965 100644 --- a/docs/README.md +++ b/docs/README.md @@ -432,7 +432,7 @@ External libraries may be accessed via package references. 1. **`//math`:** math functions and constants such as `//math.sin` and `//math.pi`. 2. **`//str`:** string functions such as `//str.upper` and - `//str.join`. + `//str.lower`. 3. **`//fn`:** higher order functions such as `//fn.fix` and `//fn.fixt`. See the [standard library reference](std.md) for full documentation on all packages. 2. **`//{./path}`** provides access to other arrai files relative to the current diff --git a/docs/std-seq.md b/docs/std-seq.md index a5fcb56c..42c912fb 100644 --- a/docs/std-seq.md +++ b/docs/std-seq.md @@ -1,21 +1,101 @@ # seq -The `seq` library contains functions that are used for string manipulations. +The `seq` library contains functions that are used for manipulating sequenced data structures including string and array. -## `//seq.concat(seqs <: array) <: array`
`concat(seqs <: string) <: string` +## `//seq.concat(seqs <: array) <: array`
`//seq.concat(seqs <: string) <: string` `concat` takes an array of sequences `seqs` and returns a sequence that is the concatenation of the sequences in the array. +Usage: | example | equals | |:-|:-| | `//seq.concat(["ba", "na", "na"])` | `"banana"` | | `//seq.concat([[1, 2], [3, 4, 5]])` | `[1, 2, 3, 4, 5]` | -## `//seq.repeat(n <: number, seq <: array) <: array`
`repeat(n <: number, seq <: string) <: string` +## `//seq.contains(sub <: array, subject <: array) <: bool`
`//seq.contains(sub <: string, subject <: string) <: bool` + +`contains` checks whether sequence `sub` is contained in sequence `subject` and returns true if it is, or false otherwise. + +Usage: +| example | equals | +|:-|:-| +| `//seq.contains("substring", "the full string which has substring")` | `true` | +| `//seq.contains("microwave", "just some random sentence")` | `false` | +| `//seq.contains([1,2,3,4,5], [1,2,3,4,5])` | `true` | +| `//seq.contains([['B','C']],[['A', 'B'], ['B','C'],['D','E']])` | `true` | + +## `//seq.has_prefix(prefix <: array, subject <: array) <: bool`
`//seq.has_prefix(prefix <: string, subject <: string) <: bool` + +`has_prefix` checks whether the sequence `subject` is prefixed by sequence `prefix` and returns true if it is, or false otherwise. + +Usage: +| example | equals | +|:-|:-| +| `//seq.has_prefix("I'm", "I'm running out of stuff to write")` | `true` | +| `//seq.has_prefix("to write", "I'm running out of stuff to write")` | `false` | +| `//seq.has_prefix(['A'],['A','B','C'])` | `true` | +| `//seq.has_prefix([1, 2],[1, 2, 3])` | `true` | +| `//seq.has_prefix([[1, 2]],[[1, 2], [3]])` | `true` | + + +## `//seq.has_suffix(suffix <: array, subject <: array) <: bool`
`//seq.has_suffix(suffix <: string, subject <: string) <: bool` + +`has_suffix` checks whether the sequence `subject` is suffixed by sequence `suffix` and returns true if it is, or false otherwise. + +Usage: +| example | equals | +|:-|:-| +| `//seq.has_suffix("I'm", "I'm running out of stuff to write")` | `false` | +| `//seq.has_suffix("to write", "I'm running out of stuff to write")` | `true` | +| `//seq.has_suffix(['E'],['A','B','C','D','E'])` | `true` | +| `//seq.has_suffix([[3, 4]],[[1 ,2], [3, 4]])` | `true` | + +## `//seq.join(joiner <: array, subject <: array) <: array`
`//seq.join(joiner <: string, subject <: array_of_string) <: string` + +`join` returns a concatenated sequence with each member of sequence `subject` delimited by sequence `joiner` + +Usage: +| example | equals | +|:-|:-| +| `//seq.join(", ", ["pew", "another pew", "and more pews"])` | `"pew, another pew, and more pews"` | +| `//seq.join(" ", ["this", "is", "a", "sentence"])` | `"this is a sentence"` | +| `//seq.join(["", "this", "is", "a", "sentence"])` | `"thisisasentence"` | +| `//seq.join([0], [[1, 2], [3, 4], [5, 6]]` | `[1, 2, 0, 3, 4, 0, 5, 6]` +| `//seq.join([0], [[2, [3, 4]], [5, 6]])` | `[2, [3, 4], 0, 5, 6]` | +| `//seq.join([[0],[1]], [[[1, 2], [3, 4]],[[5, 6],[7, 8]]])` | `[[1, 2], [3, 4], [0], [1], [5, 6], [7, 8]]` | + +## `//seq.split(delimiter <: array, subject <: array) <: array`
`//seq.split(delimiter <: string, subject <: string) <: array of string` + +`split` splits sequence `subject` based on the provided sequence `delimiter`. It returns an array of sequence which are split from the sequence `subject`. + +Usage: +| example | equals | +|:-|:-| +| `//seq.split(" ", "deliberately adding spaces to demonstrate the split function")` | `["deliberately", "adding", "spaces", "to", "demonstrate", "the", "split", "function"]` | +| `//seq.split("random stuff", "this is just a random sentence")` | `["this is just a random sentence"]` | +| `//seq.split([1],[1, 2, 3])` | `[[],[2,3]]` | +| `//seq.split([3],[1, 2, 3])` | `[[1,2],[]]` | +| `//seq.split(['A'],['B', 'A', 'C', 'A', 'D', 'E'])` | `[['B'],['C'], ['D', 'E']]` | +| `//seq.split([['C','D'],['E','F']],[['A','B'], ['C','D'], ['E','F'], ['G']])`) | `[[['A','B']], [['G']]]` | + +## `//seq.sub(old <: array, new <: array, subject <: array) <: array`
`//seq.sub(old <: string, new <: string, subject <: string) <: string` + +`sub` replaces occurrences of sequence `old` in sequence `subject` with sequence `new`. It returns the modified sequence. + +Usage: +| example | equals | +|:-|:-| +| `//seq.sub("old string", "new sentence", "this is the old string")` | `"this is the new sentence"` | +| `//seq.sub("string", "stuff", "just another sentence")` | `"just another sentence"` | +| `//seq.sub([1], [2], [1, 2, 3])` | `[2, 2, 3]` | +| `//seq.sub([[2,2]], [[4,4]], [[1,1], [2,2], [3,3]])`| `[[1,1], [4,4], [3,3]]` | + +## `//seq.repeat(n <: number, seq <: array) <: array`
`//seq.repeat(n <: number, seq <: string) <: string` `repeat` returns a sequence that contains `seq` repeated `n` times. +Usage: | example | equals | |:-|:-| | `//seq.repeat(2, "hots")` | `"hotshots"` | diff --git a/docs/std-str.md b/docs/std-str.md index 2af6c359..4e97dfd7 100644 --- a/docs/std-str.md +++ b/docs/std-str.md @@ -2,41 +2,6 @@ The `str` library contains functions that are used for string manipulations. -## `//str.contains(str <: string, substr <: string) <: bool` - -`contains` checks whether `substr` is contained in `str`. It returns a -boolean. - -Usage: - -| example | equals | -|:-|:-| -| `//str.contains("the full string which has substring", "substring")` | `true` | -| `//str.contains("just some random sentence", "microwave")` | `{}` which is equal to `false` | - -## `//str.sub(s <: string, old <: string, new <: string) <: string` - -`sub` replaces occurrences of `old` in `s` with `new`. It returns the modified string. - -Usage: - -| example | equals | -|:-|:-| -| `//str.sub("this is the old string", "old string", "new sentence")` | `"this is the new sentence"` | -| `//str.sub("just another sentence", "string", "stuff")` | `"just another sentence"` | - -## `//str.split(s <: string, delimiter <: string) <: array of string` - -`split` splits the string `s` based on the provided `delimiter`. It returns an array of strings -which are split from the string `s`. - -Usage: - -| example | equals | -|:-|:-| -| `//str.split("deliberately adding spaces to demonstrate the split function", " ")` | `["deliberately", "adding", "spaces", "to", "demonstrate", "the", "split", "function"]` | -| `//str.split("this is just a random sentence", "random stuff")` | `["this is just a random sentence"]` | - ## `//str.lower(s <: string) <: string` `lower` returns the string `s` with all of the character converted to lowercase. @@ -72,37 +37,3 @@ Usage: |:-|:-| | `//str.title("laser noises pew pew pew")` | `"Laser Noises Pew Pew Pew"` | | `//str.title("pew")` | `"Pew"` | - -## `//str.has_prefix(s <: string, prefix <: string) <: bool` - -`has_prefix` checks whether the string `s` is prefixed by `prefix`. It returns a boolean. - -Usage: - -| example | equals | -|:-|:-| -| `//str.has_prefix("I'm running out of stuff to write", "I'm")` | `true` | -| `//str.has_prefix("I'm running out of stuff to write", "to write")` | `{}` which is equal to `false` | - -## `//str.has_suffix(s <: string, suffix <: string) <: bool` - -`has_suffix` checks whether the string `s` is suffixed by `suffix`. It returns a boolean. - -Usage: - -| example | equals | -|:-|:-| -| `//str.has_suffix("I'm running out of stuff to write", "I'm")` | `{}` which is equal to `false` | -| `//str.has_suffix("I'm running out of stuff to write", "to write")` | `true` | - -## `//str.join(s <: array_of_string, delimiter <: string) <: string` - -`join` returns a concatenated string with each member of `s` delimited by `delimiter` - -Usage: - -| example | equals | -|:-|:-| -| `//str.join(["pew", "another pew", "and more pews"], ", ")` | `"pew, another pew, and more pews"` | -| `//str.join(["this", "is", "a", "sentence"], " ")` | `"this is a sentence"` | -| `//str.join(["this", "is", "a", "sentence"], "")` | `"thisisasentence"` | diff --git a/examples/grpc/grpc-proto.arrai b/examples/grpc/grpc-proto.arrai index ed194a84..db1923af 100644 --- a/examples/grpc/grpc-proto.arrai +++ b/examples/grpc/grpc-proto.arrai @@ -2,7 +2,7 @@ let grpc = //{./grpc}; let wrap = "wrap" <: app.attrs.patterns; let proto = //{./proto-util}(wrap); -let endpoints = app.endpoints where !//str.has_prefix(.@item.name, "enum "); +let endpoints = app.endpoints where !//seq.has_prefix("enum ", .@item.name); //archive.tar.tar({ app.name + ".proto": $` // THIS IS AUTOGENERATED BY sysl // @@ -24,8 +24,8 @@ let endpoints = app.endpoints where !//str.has_prefix(.@item.name, "enum "); ${cond (app.endpoints: $` ${//rel.union((endpoints >> (.params >> cond ( - //str.contains(grpc.type(.), "google.protobuf"): $` - import "${//str.sub(grpc.type(.), ".", "/")}.proto";`, + //seq.contains("google.protobuf", grpc.type(.)): $` + import "${//seq.sub(".", "/", grpc.type(.))}.proto";`, ) => .@item )) => .@item)::\i:\n} service ${app.name} { @@ -37,14 +37,14 @@ let endpoints = app.endpoints where !//str.has_prefix(.@item.name, "enum "); }`:::\n} ${endpoints >> proto.wrapSequence(.).grpcType::\i} ${cond (wrap: endpoints) >> - let retTokens = //str.split(ep.ret("ok"), " "); - let retName = //str.sub(//seq.concat(retTokens -- {"sequence", "of"}), ".", ""); + let retTokens = //seq.split(" ", ep.ret("ok")); + let retName = //seq.sub(".", "", //seq.concat(retTokens -- {"sequence", "of"})); let attr = ep.attrs(retName + "_rpcId"); let epi = proto.endpointInfo(ep); $` message ${epi.paramName} { ${ep.params >> - let name = //str.sub(.name, "-", ""); + let name = //seq.sub("-", "", .name); $`${grpc.type(.)} req${name} = ${.attrs(name + "_rpcId")};` ::\i} } diff --git a/examples/grpc/grpc.arrai b/examples/grpc/grpc.arrai index 0c17c397..b43578f2 100644 --- a/examples/grpc/grpc.arrai +++ b/examples/grpc/grpc.arrai @@ -13,7 +13,7 @@ ), 'sequence': 'repeated ' + type(t.sequence) *: cond ( - //str.contains(t.type_ref, "google-protobuf"): //str.sub(t.type_ref, "-", "."), + //seq.contains("google-protobuf", t.type_ref): //seq.sub("-", ".", t.type_ref), *: t.type_ref, ), ), diff --git a/examples/grpc/proto-util.arrai b/examples/grpc/proto-util.arrai index b090d77c..816fd9be 100644 --- a/examples/grpc/proto-util.arrai +++ b/examples/grpc/proto-util.arrai @@ -1,21 +1,21 @@ let grpc = //{./grpc}; \wrap ( - field: \. $`${grpc.type(.)} ${//str.sub(.key, "-", "")} = ${.attrs.rpcId};`, + field: \. $`${grpc.type(.)} ${//seq.sub("-", "", .key)} = ${.attrs.rpcId};`, imports: \fields - fields where(//str.contains(grpc.type(.@item), "google.protobuf")) >> - $`import "${//str.sub(grpc.type(.), ".", "/")}.proto";`, + fields where(//seq.contains("google.protobuf", grpc.type(.@item))) >> + $`import "${//seq.sub(".", "/", grpc.type(.))}.proto";`, endpointInfo: \ep - let method = //str.sub(//str.title(//str.lower(ep.name)), "-", ""); + let method = //seq.sub("-", "", //str.title(//str.lower(ep.name))); let paramName = cond ( wrap: method + "Request", *: $"${ep.params >> grpc.type(.)::, }", ); let streamRes = cond ( - ep.attrs.stream: //str.sub(ep.ret("ok"), "sequence of", "stream"), - *: //str.sub(ep.ret("ok"), "sequence of ", "") + "s", + ep.attrs.stream: //seq.sub("sequence of", "stream", ep.ret("ok")), + *: //seq.sub("sequence of ", "", ep.ret("ok")) + "s", ); let responseName = cond (wrap: method + "Response", *: streamRes); ( @@ -26,7 +26,7 @@ let grpc = //{./grpc}; ), wrapSequence: \ep - let type = //str.sub(ep.ret("ok"), "sequence of ", ""); + let type = //seq.sub("sequence of ", "", ep.ret("ok")); let wrapType = type + "s"; let name = //str.lower(type) + "s"; ( diff --git a/internal/shell/shell_cmd_test.go b/internal/shell/shell_cmd_test.go index d1faca5b..c2cbd475 100644 --- a/internal/shell/shell_cmd_test.go +++ b/internal/shell/shell_cmd_test.go @@ -14,7 +14,7 @@ func TestIsCommand(t *testing.T) { t.Parallel() assert.True(t, isCommand("/hi")) - assert.False(t, isCommand("//str.join")) + assert.False(t, isCommand("//seq.join")) } func TestTryRunCommand(t *testing.T) { diff --git a/internal/shell/shell_test.go b/internal/shell/shell_test.go index b48942ce..12322540 100644 --- a/internal/shell/shell_test.go +++ b/internal/shell/shell_test.go @@ -151,12 +151,12 @@ func TestGetLastToken(t *testing.T) { assert.Equal(t, "//str", getLastToken([]rune("//str"))) assert.Equal(t, "//", getLastToken([]rune("//"))) assert.Equal(t, "///", getLastToken([]rune("///"))) - assert.Equal(t, "//", getLastToken([]rune("//str.contains(//"))) - assert.Equal(t, "//arch", getLastToken([]rune("//str.contains(//arch"))) - assert.Equal(t, "tuple.", getLastToken([]rune("//str.contains(tuple."))) + assert.Equal(t, "//", getLastToken([]rune("//seq.contains(//"))) + assert.Equal(t, "//arch", getLastToken([]rune("//seq.contains(//arch"))) + assert.Equal(t, "tuple.", getLastToken([]rune("//seq.contains(tuple."))) assert.Equal(t, "x.", getLastToken([]rune("x."))) assert.Equal(t, "x", getLastToken([]rune("x"))) - assert.Equal(t, "", getLastToken([]rune("//str.contains("))) + assert.Equal(t, "", getLastToken([]rune("//seq.contains("))) assert.Equal(t, "", getLastToken([]rune(""))) } @@ -166,14 +166,14 @@ func TestTabCompletionStdlib(t *testing.T) { stdlibNames := stdlib.Names().OrderedNames() assertTabCompletion(t, append(stdlibNames, "{"), 0, "//\t", nil) - assertTabCompletion(t, append(stdlibNames, "{"), 0, "//str.contains(//\t", nil) + assertTabCompletion(t, append(stdlibNames, "{"), 0, "//seq.contains(//\t", nil) prefix := "s" assertTabCompletionWithPrefix(t, prefix, stdlibNames, "//%s\t", nil) assertTabCompletionWithPrefix(t, prefix, stdlibNames, "x(//%s\t", nil) assertTabCompletionWithPrefix(t, prefix, stdlibNames, "x(//%s\t + random)", nil) - lib := "str" + lib := "seq" strlib := stdlib.MustGet(lib).(rel.Tuple).Names().OrderedNames() assertTabCompletionWithPrefix(t, prefix, strlib, "//"+lib+".%s\t", nil) for i := 0; i < len(strlib); i++ { diff --git a/rel/value_set_array.go b/rel/value_set_array.go index 18794e82..33c96219 100644 --- a/rel/value_set_array.go +++ b/rel/value_set_array.go @@ -156,7 +156,7 @@ func (a Array) Kind() int { return arrayKind } -// Bool returns true iff the tuple has attributes. +// IsTrue returns true if the tuple has attributes. func (a Array) IsTrue() bool { return a.count > 0 } diff --git a/syntax/std_seq.go b/syntax/std_seq.go index 4259ad2e..63f4436f 100644 --- a/syntax/std_seq.go +++ b/syntax/std_seq.go @@ -7,13 +7,13 @@ import ( "github.com/arr-ai/arrai/rel" ) -func stdSeqConcat(arg rel.Value) rel.Value { - if set, is := arg.(rel.Set); is { +func stdSeqConcat(seq rel.Value) rel.Value { + if set, is := seq.(rel.Set); is { if !set.IsTrue() { return rel.None } } - values := arg.(rel.Array).Values() + values := seq.(rel.Array).Values() if len(values) == 0 { return rel.None } @@ -64,5 +64,172 @@ func stdSeq() rel.Attr { return rel.NewTupleAttr("seq", rel.NewNativeFunctionAttr("concat", stdSeqConcat), rel.NewNativeFunctionAttr("repeat", stdSeqRepeat), + createNestedFuncAttr("contains", 2, func(args ...rel.Value) rel.Value { //nolint:dupl + sub, subject := args[0], args[1] + switch subject := subject.(type) { + case rel.String: + return rel.NewBool(strings.Contains(mustAsString(subject), mustAsString(sub))) + case rel.Array: + return arrayContains(sub, subject) + case rel.Bytes: + return rel.NewBool(strings.Contains(asString(subject), asString(sub))) + case rel.GenericSet: + emptySet, isSet := sub.(rel.GenericSet) + return rel.NewBool(isSet && !emptySet.IsTrue()) + } + + return rel.NewBool(false) + }), + createNestedFuncAttr("has_prefix", 2, func(args ...rel.Value) rel.Value { //nolint:dupl + prefix, subject := args[0], args[1] + switch subject := subject.(type) { + case rel.String: + return rel.NewBool(strings.HasPrefix(mustAsString(subject), mustAsString(prefix))) + case rel.Array: + return arrayHasPrefix(prefix, subject) + case rel.Bytes: + return rel.NewBool(strings.HasPrefix(asString(subject), asString(prefix))) + case rel.GenericSet: + emptySet, isSet := prefix.(rel.GenericSet) + return rel.NewBool(isSet && !emptySet.IsTrue()) + } + + return rel.NewBool(false) + }), + createNestedFuncAttr("has_suffix", 2, func(args ...rel.Value) rel.Value { //nolint:dupl + suffix, subject := args[0], args[1] + switch subject := subject.(type) { + case rel.String: + return rel.NewBool(strings.HasSuffix(mustAsString(subject), mustAsString(suffix))) + case rel.Array: + return arrayHasSuffix(suffix, subject) + case rel.Bytes: + return rel.NewBool(strings.HasSuffix(asString(subject), asString(suffix))) + case rel.GenericSet: + emptySet, isSet := suffix.(rel.GenericSet) + return rel.NewBool(isSet && !emptySet.IsTrue()) + } + + return rel.NewBool(false) + }), + createNestedFuncAttr("sub", 3, func(args ...rel.Value) rel.Value { + old, new, subject := args[0], args[1], args[2] + switch subject := subject.(type) { + case rel.String: + return rel.NewString( + []rune( + strings.ReplaceAll( + mustAsString(subject), + mustAsString(old), + mustAsString(new), + ), + ), + ) + case rel.Array: + return arraySub(old, new, subject) + case rel.Bytes: + _, oldIsSet := old.(rel.GenericSet) + _, newIsSet := new.(rel.GenericSet) + if !oldIsSet && newIsSet { + return rel.NewBytes([]byte(strings.ReplaceAll(subject.String(), + old.String(), ""))) + } else if oldIsSet && !newIsSet { + return rel.NewBytes([]byte(strings.ReplaceAll(subject.String(), + "", new.String()))) + } + return rel.NewBytes([]byte(strings.ReplaceAll(subject.String(), + old.String(), new.String()))) + case rel.GenericSet: + _, oldIsSet := old.(rel.GenericSet) + _, newIsSet := new.(rel.GenericSet) + if oldIsSet && newIsSet { + return subject + } else if oldIsSet && !newIsSet { + return new + } else if !oldIsSet && newIsSet { + return subject + } + } + + panic(fmt.Errorf("sub: unsupported args: %s, %s, %s", old, new, subject)) + }), + createNestedFuncAttr("split", 2, func(args ...rel.Value) rel.Value { + delimiter, subject := args[0], args[1] + switch subject := subject.(type) { + case rel.String: + splitted := strings.Split(mustAsString(subject), mustAsString(delimiter)) + vals := make([]rel.Value, 0, len(splitted)) + for _, s := range splitted { + vals = append(vals, rel.NewString([]rune(s))) + } + return rel.NewArray(vals...) + case rel.Array: + return arraySplit(delimiter, subject) + case rel.Bytes: + return bytesSplit(delimiter, subject) + case rel.GenericSet: + switch delimiter.(type) { + case rel.String: + return rel.NewArray(subject) + case rel.Array, rel.Bytes: + return rel.NewArray(rel.NewArray()) + case rel.GenericSet: + return subject + } + } + + panic(fmt.Errorf("split: unsupported args: %s, %s", delimiter, subject)) + }), + createNestedFuncAttr("join", 2, func(args ...rel.Value) rel.Value { + joiner, subject := args[0], args[1] + switch subject := subject.(type) { + case rel.Array: + switch subject.Values()[0].(type) { + case rel.String: + // if subject is rel.String + return strJoin(args...) + case rel.Value: + if _, isStr := joiner.(rel.String); isStr { + return strJoin(args...) + } + return arrayJoin(joiner, subject) + } + case rel.Bytes: + if _, isSet := joiner.(rel.GenericSet); isSet { + return subject + } + return bytesJoin(joiner.(rel.Bytes), subject) + case rel.GenericSet: + switch joiner.(type) { + case rel.String: + // if joiner is rel.String + return strJoin(args...) + case rel.Array, rel.GenericSet, rel.Bytes: + return subject + } + } + + panic(fmt.Errorf("join: unsupported args: %s, %s", joiner, subject)) + }), ) } + +func strJoin(args ...rel.Value) rel.Value { + joiner, subject := args[0], args[1] + strs := subject.(rel.Set) + toJoin := make([]string, 0, strs.Count()) + for i, ok := strs.(rel.Set).ArrayEnumerator(); ok && i.MoveNext(); { + toJoin = append(toJoin, mustAsString(i.Current())) + } + return rel.NewString([]rune(strings.Join(toJoin, mustAsString(joiner)))) +} + +func asString(val rel.Value) string { + switch val := val.(type) { + case rel.Bytes: + return val.String() + case rel.Set: + return mustAsString(val) + } + panic("value can't be converted to a string") +} diff --git a/syntax/std_seq_array_helper.go b/syntax/std_seq_array_helper.go new file mode 100644 index 00000000..191e87c6 --- /dev/null +++ b/syntax/std_seq_array_helper.go @@ -0,0 +1,187 @@ +package syntax + +import ( + "github.com/arr-ai/arrai/rel" +) + +// Checks if array subject contains sub. +func arrayContains(sub rel.Value, subject rel.Array) rel.Value { + subArray := convert2Array(sub) + return rel.NewBool(search(subject.Values(), subArray.Values()) > -1) +} + +// Substitutes all old in subject with new. +func arraySub(old, new rel.Value, subject rel.Array) rel.Value { + // Convert to array to facilitate process + oldArray := convert2Array(old) + newArray := convert2Array(new) + + result := make([]rel.Value, 0, subject.Count()) + if !old.IsTrue() { + for _, e := range subject.Values() { + result = append(append(result, newArray.Values()...), e) + } + result = append(result, newArray.Values()...) + } else { + subjectVals := subject.Values() + for { + if i := search(subjectVals, oldArray.Values()); i >= 0 { + result = append(append(result, subjectVals[:i]...), newArray.Values()...) + subjectVals = subjectVals[i+oldArray.Count():] + } else { + result = append(result, subjectVals...) + break + } + } + } + + return rel.NewArray(result...) +} + +// Splits array subject by delimiter. +func arraySplit(delimiter rel.Value, subject rel.Array) rel.Value { + delimiterArray := convert2Array(delimiter) + var result []rel.Value + + if !delimiterArray.IsTrue() { + for _, e := range subject.Values() { + result = append(result, rel.NewArray(e)) + } + } else { + subjectVals := subject.Values() + for { + if i := search(subjectVals, delimiterArray.Values()); i >= 0 { + result = append(result, rel.NewArray(subjectVals[:i]...)) + subjectVals = subjectVals[i+delimiterArray.Count():] + } else { + result = append(result, rel.NewArray(subjectVals...)) + break + } + } + } + + return rel.NewArray(result...) +} + +// Joins array joiner to subject. +// The type of subject element must be rel.Array, it can help to make sure the API output is clear and will not confuse. +// For example: +// `//seq.join([0], [1, 2])`, it can return [1, 0, 2] or [1, 2], +// `//seq.join([], [1, [2, 3]])`, it can return [1, 2, 3] or [1, [2, 3]]. +// All of the results make sense. It can see the output can't be sure in above cases, it is not good. +func arrayJoin(joiner rel.Value, subject rel.Array) rel.Value { + joinerArray := convert2Array(joiner) + + result := make([]rel.Value, 0, subject.Count()) + for i, value := range subject.Values() { + if i > 0 { + result = append(result, joinerArray.Values()...) + } + switch vArray := value.(type) { + case rel.Array: + result = append(result, vArray.Values()...) + case rel.Value: + panic("the type of subject element must be rel.Array") + } + } + + return rel.NewArray(result...) +} + +// Check if array subject starts with prefix. +func arrayHasPrefix(prefix rel.Value, subject rel.Array) rel.Value { + prefixArray := convert2Array(prefix) + + if !prefixArray.IsTrue() && subject.IsTrue() { + return rel.NewBool(true) + } + if subject.Count() < prefixArray.Count() { + return rel.NewBool(false) + } + + prefixVals := prefixArray.Values() + prefixOffset := 0 + arrayEnum, _ := subject.ArrayEnumerator() + for arrayEnum.MoveNext() { + if prefixOffset < prefixArray.Count() && arrayEnum.Current().Equal(prefixVals[prefixOffset]) { + prefixOffset++ + if prefixOffset == prefixArray.Count() { + break + } + } else { + return rel.NewBool(false) + } + } + + return rel.NewBool(true) +} + +// Check if array subject ends with suffix. +func arrayHasSuffix(suffix rel.Value, subject rel.Array) rel.Value { + suffixArray := convert2Array(suffix) + + if !suffixArray.IsTrue() && subject.IsTrue() { + return rel.NewBool(true) + } + if subject.Count() < suffixArray.Count() { + return rel.NewBool(false) + } + + subjectVals := subject.Values() + suffixVals := suffixArray.Values() + suffixOffset := suffixArray.Count() - 1 + + for _, val := range subjectVals[subject.Count()-1:] { + if suffixOffset > -1 && val.Equal(suffixVals[suffixOffset]) { + suffixOffset-- + if suffixOffset == -1 { + break + } + } else { + return rel.NewBool(false) + } + } + + return rel.NewBool(true) +} + +func convert2Array(val rel.Value) rel.Array { + switch val := val.(type) { + case rel.Array: + return val + case rel.GenericSet: + valArray, _ := rel.AsArray(val) + return valArray + } + + panic("it supports types rel.Array and rel.GenericSet only.") +} + +// Searches array sub in subject and return the first indedx if found, or return -1. +// It is brute force approach, can be improved later if it is necessary. +// Case: subject=[1,2,3,4], sub=[2], return 1 +// Case: subject=[1,2,3,4], sub=[2,3], return 1 +// Case: subject=[1,2,3,4], sub=[2,5], return -1 +func search(subject, sub []rel.Value) int { + subjectOffset, subOffset := 0, 0 + + for ; subjectOffset < len(subject); subjectOffset++ { + if subOffset < len(sub) && subject[subjectOffset].Equal(sub[subOffset]) { + subOffset++ + } else { + if subOffset > 0 && subOffset < len(sub) { + subOffset = 0 + subjectOffset-- + } + } + if subOffset == len(sub) { + break + } + } + + if subjectOffset < len(subject) { + // see len(sub) > 1 + return (subjectOffset + 1) - len(sub) + } + return -1 +} diff --git a/syntax/std_seq_bytes_helper.go b/syntax/std_seq_bytes_helper.go new file mode 100644 index 00000000..f79f1c11 --- /dev/null +++ b/syntax/std_seq_bytes_helper.go @@ -0,0 +1,38 @@ +package syntax + +import ( + "strings" + + "github.com/arr-ai/arrai/rel" +) + +// Joins byte array joiner to subject. +func bytesJoin(joiner, subject rel.Bytes) rel.Value { + result := make([]byte, 0, subject.Count()) + for index, e := range subject.Bytes() { + if index > 0 && index < subject.Count() { + result = append(result, joiner.Bytes()...) + } + result = append(result, e) + } + + return rel.NewBytes(result) +} + +// Splits byte array subject by delimiter. +func bytesSplit(delimiter rel.Value, subject rel.Bytes) rel.Value { + var splitted []string + + switch delimiter := delimiter.(type) { + case rel.Bytes: + splitted = strings.Split(subject.String(), delimiter.String()) + case rel.GenericSet: + splitted = strings.Split(subject.String(), mustAsString(delimiter)) + } + + result := make([]rel.Value, 0, len(splitted)) + for _, s := range splitted { + result = append(result, rel.NewBytes([]byte(s)).(rel.Value)) + } + return rel.NewArray(result...) +} diff --git a/syntax/std_seq_contains_test.go b/syntax/std_seq_contains_test.go new file mode 100644 index 00000000..1ce8b86d --- /dev/null +++ b/syntax/std_seq_contains_test.go @@ -0,0 +1,73 @@ +package syntax + +import "testing" + +func TestStrContains(t *testing.T) { + t.Parallel() + + AssertCodesEvalToSameValue(t, `true`, `//seq.contains("A", "A")`) + + AssertCodesEvalToSameValue(t, `true `, `//seq.contains("", "this is a test") `) + AssertCodesEvalToSameValue(t, `true `, `//seq.contains("is a test", "this is a test") `) + AssertCodesEvalToSameValue(t, `false`, `//seq.contains("is not a test", "this is a test")`) + AssertCodesEvalToSameValue(t, `false`, `//seq.contains("a is", "this is a test")`) + + AssertCodesEvalToSameValue(t, `true`, `//seq.contains("", "A")`) + AssertCodesEvalToSameValue(t, `false`, `//seq.contains("A", "")`) + AssertCodesEvalToSameValue(t, `true`, `//seq.contains("", "")`) + + AssertCodeErrors(t, `//seq.contains(1, "ABC")`, "") +} + +func TestArrayContains(t *testing.T) { //nolint:dupl + t.Parallel() + AssertCodesEvalToSameValue(t, `true`, `//seq.contains(['A'],['A', 'D','E'])`) + AssertCodesEvalToSameValue(t, `true`, `//seq.contains(['E'],['A','C','E'])`) + AssertCodesEvalToSameValue(t, `true`, `//seq.contains(['C'],['B','C','D'])`) + + AssertCodesEvalToSameValue(t, `true`, `//seq.contains(['L','M','N'],['L','M','N','D','E'])`) + AssertCodesEvalToSameValue(t, `true`, `//seq.contains(['B','C'],['T','B','C','X','Y'])`) + AssertCodesEvalToSameValue(t, `true`, `//seq.contains(['C','D','E'],['1','3','C','D','E'])`) + AssertCodesEvalToSameValue(t, `true`, `//seq.contains(['A1','B1','C1','D1','E1'],['A1','B1','C1','D1','E1'])`) + AssertCodesEvalToSameValue(t, `false`, `//seq.contains(['B1','C2','E3'],['A','B1','C2','D','E3'])`) + AssertCodesEvalToSameValue(t, `false`, `//seq.contains(['A2','B3','C4','E5'],['A','B','C','D','E'])`) + AssertCodesEvalToSameValue(t, `false`, `//seq.contains(['A','B','C','D','E','F'],['A','B','C','D','E'])`) + + AssertCodesEvalToSameValue(t, `true`, `//seq.contains(['A1','B2','C3'], ['A', 'A1', 'B2','C3','D','E'])`) + AssertCodesEvalToSameValue(t, `true`, `//seq.contains(['B4','C5'],['A', 'A', 'B4','C5','D','E'])`) + AssertCodesEvalToSameValue(t, `true`, `//seq.contains([['B1','C1']],[['A', 'B'], ['B1','C1'],['D','E']])`) + + AssertCodesEvalToSameValue(t, `true`, `//seq.contains([1,2], [1,2,3,4,5])`) + AssertCodesEvalToSameValue(t, `true`, `//seq.contains([1,2,3,4,5], [1,2,3,4,5])`) + AssertCodesEvalToSameValue(t, `true`, `//seq.contains([[1,2],[3,4],[5]], [[1,2],[3,4],[5]])`) + + AssertCodesEvalToSameValue(t, `false`, `//seq.contains([1], [])`) + AssertCodesEvalToSameValue(t, `true`, `//seq.contains([], [1])`) + AssertCodesEvalToSameValue(t, `true`, `//seq.contains([], [])`) + + AssertCodeErrors(t, `//seq.contains(1, [1,2,3,4,5])`, "") + AssertCodeErrors(t, `//seq.contains('A',['A','B','C','D','E'])`, "") +} + +func TestBytesContains(t *testing.T) { + t.Parallel() + // hello bytes - 104 101 108 108 111 + AssertCodesEvalToSameValue(t, `true`, + `//seq.contains(//unicode.utf8.encode('hello'),//unicode.utf8.encode('hello'))`) + + AssertCodesEvalToSameValue(t, `true`, + `//seq.contains({ |@, @byte| (0, 104)},//unicode.utf8.encode('hello'))`) + AssertCodesEvalToSameValue(t, `true`, + `//seq.contains({ |@, @byte| (0, 111)},//unicode.utf8.encode('hello'))`) + AssertCodesEvalToSameValue(t, `true`, + `//seq.contains({ |@, @byte| (0, 108),(0, 108)},//unicode.utf8.encode('hello'))`) + AssertCodesEvalToSameValue(t, `true`, + `//seq.contains(//unicode.utf8.encode('h'),//unicode.utf8.encode('hello'))`) + + AssertCodesEvalToSameValue(t, `false`, + `//seq.contains(//unicode.utf8.encode('A'),//unicode.utf8.encode(''))`) + AssertCodesEvalToSameValue(t, `true`, + `//seq.contains(//unicode.utf8.encode(''),//unicode.utf8.encode(''))`) + AssertCodesEvalToSameValue(t, `true`, + `//seq.contains(//unicode.utf8.encode(''),//unicode.utf8.encode('hello'))`) +} diff --git a/syntax/std_seq_join_test.go b/syntax/std_seq_join_test.go new file mode 100644 index 00000000..51a8ac0c --- /dev/null +++ b/syntax/std_seq_join_test.go @@ -0,0 +1,81 @@ +package syntax + +import "testing" + +func TestStrJoin(t *testing.T) { + t.Parallel() + + AssertCodesEvalToSameValue(t, `""`, `//seq.join(",",[])`) + + AssertCodesEvalToSameValue(t, `"this is a test" `, `//seq.join(" ",["this", "is", "a", "test"])`) + AssertCodesEvalToSameValue(t, `"this"`, `//seq.join(",",["this"])`) + AssertCodesEvalToSameValue(t, `"You and me"`, `//seq.join(" and ",["You", "me"])`) + AssertCodesEvalToSameValue(t, `"AB"`, `//seq.join("",['A','B'])`) + + AssertCodesEvalToSameValue(t, `"Youme"`, `//seq.join("",["You", "me"])`) + AssertCodesEvalToSameValue(t, `""`, `//seq.join(",",[])`) + AssertCodesEvalToSameValue(t, `",,"`, `//seq.join(",",["", "", ""])`) + + // It is not supported + // AssertCodesEvalToSameValue(t, `""`, `//seq.join("",[])`) + + AssertCodeErrors(t, `//seq.join("this", 2)`, "") +} + +func TestArrayJoin(t *testing.T) { + t.Parallel() + // joiner "" is translated to rel.GenericSet + AssertCodesEvalToSameValue(t, `[1, 2, 0, 3, 4, 0, 5, 6]`, `//seq.join([0], [[1, 2], [3, 4], [5, 6]])`) + AssertCodesEvalToSameValue(t, `[2, [3, 4], 0, 5, 6]`, `//seq.join([0], [[2, [3, 4]], [5, 6]])`) + AssertCodesEvalToSameValue(t, `[1, 2, 10, 11, 3, 4, 10, 11, 5, 6]`, + `//seq.join([10,11], [[1, 2], [3, 4], [5, 6]])`) + AssertCodesEvalToSameValue(t, `[[1, 2], [3, 4], 0, [5, 6], [7, 8]]`, + `//seq.join([0], [[[1, 2], [3, 4]],[[5, 6],[7, 8]]])`) + + AssertCodesEvalToSameValue(t, `[1, 2, [10], [11], 3, 4, [10], [11], 5, 6]`, + `//seq.join([[10],[11]], [[1, 2], [3, 4], [5, 6]])`) + AssertCodesEvalToSameValue(t, `[[1, 2], [3, 4], [0], [1], [5, 6], [7, 8]]`, + `//seq.join([[0],[1]], [[[1, 2], [3, 4]],[[5, 6],[7, 8]]])`) + + AssertCodesEvalToSameValue(t, `['AA', 'AB', 'BB', 'AB', 'CC', 'DD']`, + `//seq.join(['AB'], [['AA'], ['BB'], ['CC' , 'DD']])`) + AssertCodesEvalToSameValue(t, `['AA', 'AB', 'BB', ['CC', 'DD']]`, + `//seq.join(['AB'], [['AA'], ['BB' ,['CC' , 'DD']]])`) + + // Test cases the delimiter is [] + AssertCodesEvalToSameValue(t, `[1,2]`, `//seq.join([],[[1],[2]])`) + AssertCodesEvalToSameValue(t, `[]`, `//seq.join([],[])`) + AssertCodesEvalToSameValue(t, `[]`, `//seq.join([1],[])`) + + AssertCodesEvalToSameValue(t, `[1, 2, 3, 4]`, `//seq.join([], [[1, 2], [3, 4]])`) + AssertCodesEvalToSameValue(t, `[[1, 2], 3, 4]`, `//seq.join([], [[[1, 2]], [3, 4]])`) + AssertCodesEvalToSameValue(t, `[[1, 2], [3, 4], 5]`, `//seq.join([], [[[1, 2]], [[3,4], 5]])`) + + AssertCodeErrors(t, `//seq.join(1, [1,2,3,4,5])`, "") + AssertCodeErrors(t, `//seq.join('A', [1,2])`, "") + AssertCodeErrors(t, `//seq.join([],[1,2])`, "") + AssertCodeErrors(t, `//seq.join([1],[1,2])`, "") + AssertCodeErrors(t, `//seq.join([0], [1,2,3,4,5])`, "") + AssertCodeErrors(t, `//seq.join(['A'], ['AT','BB', 'CD'])`, "") +} + +func TestBytesJoin(t *testing.T) { + t.Parallel() + // joiner "" is translated to rel.GenericSet + AssertCodesEvalToSameValue(t, `//unicode.utf8.encode('hhehlhlho')`, + `//seq.join({ |@, @byte| (0, 104)},//unicode.utf8.encode('hello'))`) + AssertCodesEvalToSameValue(t, + `{ |@, @byte| (0, 104), (1, 108), (2, 111), (3, 101), (4, 108), (5, 111),`+ + `(6, 108), (7, 108), (8, 111), (9, 108), (10, 108), (11, 111), (12, 111) }`, + `//seq.join({ |@, @byte| (0, 108), (1, 111)},{ |@, @byte| (0, 104), (1, 101),`+ + ` (2, 108), (3, 108), (4, 111) })`) + AssertCodesEvalToSameValue(t, `//unicode.utf8.encode('hateatlatlato')`, + `//seq.join(//unicode.utf8.encode('at'),//unicode.utf8.encode('hello'))`) + + AssertCodesEvalToSameValue(t, `//unicode.utf8.encode('')`, + `//seq.join(//unicode.utf8.encode(''),//unicode.utf8.encode(''))`) + AssertCodesEvalToSameValue(t, `//unicode.utf8.encode('hello')`, + `//seq.join(//unicode.utf8.encode(''),//unicode.utf8.encode('hello'))`) + AssertCodesEvalToSameValue(t, `//unicode.utf8.encode('')`, + `//seq.join(//unicode.utf8.encode('h'),//unicode.utf8.encode(''))`) +} diff --git a/syntax/std_seq_prefix_test.go b/syntax/std_seq_prefix_test.go new file mode 100644 index 00000000..dd4395d5 --- /dev/null +++ b/syntax/std_seq_prefix_test.go @@ -0,0 +1,67 @@ +package syntax + +import "testing" + +func TestStrPrefix(t *testing.T) { + t.Parallel() + AssertCodesEvalToSameValue(t, `true`, `//seq.has_prefix("ABC","ABC")`) + + AssertCodesEvalToSameValue(t, `true`, `//seq.has_prefix("A","ABCDE")`) + AssertCodesEvalToSameValue(t, `true`, `//seq.has_prefix("AB","ABCDE")`) + AssertCodesEvalToSameValue(t, `false`, `//seq.has_prefix("BCD","ABCDE")`) + AssertCodesEvalToSameValue(t, `false`, `//seq.has_prefix("CD","ABCDE")`) + AssertCodesEvalToSameValue(t, `false`, `//seq.has_prefix("CD","")`) + AssertCodesEvalToSameValue(t, `true`, `//seq.has_prefix("","ABCD")`) + + AssertCodesEvalToSameValue(t, `true`, `//seq.has_prefix("","")`) + AssertCodesEvalToSameValue(t, `false`, `//seq.has_prefix("A","")`) + AssertCodesEvalToSameValue(t, `true`, `//seq.has_prefix("","A")`) + + AssertCodeErrors(t, `//seq.has_prefix(1,"ABC")`, "") +} + +func TestArrayPrefix(t *testing.T) { + t.Parallel() + AssertCodesEvalToSameValue(t, `true`, `//seq.has_prefix(['A'],['A'])`) + AssertCodesEvalToSameValue(t, `true`, `//seq.has_prefix(['A','B'],['A','B'])`) + + AssertCodesEvalToSameValue(t, `true`, `//seq.has_prefix(['A'],['A','B','C','D','E'])`) + AssertCodesEvalToSameValue(t, `true`, `//seq.has_prefix(['A','B'],['A','B','C','D','E'])`) + AssertCodesEvalToSameValue(t, `true`, `//seq.has_prefix(['A','B','C'],['A','B','C','D','E'])`) + AssertCodesEvalToSameValue(t, `true`, `//seq.has_prefix(['A','B','C','D'],['A','B','C','D','E'])`) + AssertCodesEvalToSameValue(t, `false`, `//seq.has_prefix(['B'],['A','B','C','D','E'])`) + AssertCodesEvalToSameValue(t, `false`, `//seq.has_prefix(['B','C'],['A','B','C','D','E'])`) + AssertCodesEvalToSameValue(t, `false`, `//seq.has_prefix(['A','B','C','D','E','F'],['A','B','C','D','E'])`) + AssertCodesEvalToSameValue(t, `false`, `//seq.has_prefix(['A','B','C','D','E','F'],['A','B','C','D','E'])`) + + AssertCodesEvalToSameValue(t, `true`, `//seq.has_prefix([1, 2],[1, 2, 3])`) + AssertCodesEvalToSameValue(t, `true`, `//seq.has_prefix([[1, 2]],[[1, 2], [3]])`) + + AssertCodesEvalToSameValue(t, `true`, `//seq.has_prefix([], [])`) + AssertCodesEvalToSameValue(t, `false`, `//seq.has_prefix(['A','B','C','D','E','F'],[])`) + AssertCodesEvalToSameValue(t, `true`, `//seq.has_prefix([],['A','B','C','D','E'])`) + + AssertCodeErrors(t, `//seq.has_prefix(1,[1,2,3])`, "") + AssertCodeErrors(t, `//seq.has_prefix('A',['A','B','C'])`, "") +} + +func TestBytesPrefix(t *testing.T) { + t.Parallel() + AssertCodesEvalToSameValue(t, `true`, + `//seq.has_prefix(//unicode.utf8.encode('hello'),//unicode.utf8.encode('hello'))`) + + AssertCodesEvalToSameValue(t, `true`, `//seq.has_prefix(//unicode.utf8.encode('h'),//unicode.utf8.encode('hello'))`) + AssertCodesEvalToSameValue(t, `true`, `//seq.has_prefix(//unicode.utf8.encode('he'),//unicode.utf8.encode('hello'))`) + AssertCodesEvalToSameValue(t, `false`, `//seq.has_prefix(//unicode.utf8.encode('e'),//unicode.utf8.encode('hello'))`) + AssertCodesEvalToSameValue(t, `false`, `//seq.has_prefix(//unicode.utf8.encode('l'),//unicode.utf8.encode('hello'))`) + AssertCodesEvalToSameValue(t, `false`, `//seq.has_prefix(//unicode.utf8.encode('o'),//unicode.utf8.encode('hello'))`) + + AssertCodesEvalToSameValue(t, `true `, `//seq.has_prefix(//unicode.utf8.encode('h'),//unicode.utf8.encode('hello'))`) + AssertCodesEvalToSameValue(t, `true `, `//seq.has_prefix(//unicode.utf8.encode('he'),//unicode.utf8.encode('hello'))`) + AssertCodesEvalToSameValue(t, `true `, + `//seq.has_prefix(//unicode.utf8.encode('hello'),//unicode.utf8.encode('hello'))`) + + AssertCodesEvalToSameValue(t, `true`, `//seq.has_prefix(//unicode.utf8.encode(''),//unicode.utf8.encode(''))`) + AssertCodesEvalToSameValue(t, `false`, `//seq.has_prefix(//unicode.utf8.encode('o'),//unicode.utf8.encode(''))`) + AssertCodesEvalToSameValue(t, `true`, `//seq.has_prefix(//unicode.utf8.encode(''),//unicode.utf8.encode('hello'))`) +} diff --git a/syntax/std_seq_split_test.go b/syntax/std_seq_split_test.go new file mode 100644 index 00000000..42b94684 --- /dev/null +++ b/syntax/std_seq_split_test.go @@ -0,0 +1,98 @@ +package syntax + +import "testing" + +func TestStrSplit(t *testing.T) { + t.Parallel() + + AssertCodesEvalToSameValue(t, `[[], "B", "CD"] `, `//seq.split("A","ABACD")`) + AssertCodesEvalToSameValue(t, `["ABAC", []] `, `//seq.split("D","ABACD")`) + + AssertCodesEvalToSameValue(t, `["this", "is", "a", "test"]`, `//seq.split(" ","this is a test") `) + AssertCodesEvalToSameValue(t, `["this is a test"] `, `//seq.split(",","this is a test") `) + AssertCodesEvalToSameValue(t, `["th", " ", " a test"] `, `//seq.split("is","this is a test")`) + AssertCodeErrors(t, `//seq.split(1, "this is a test")`, "") + + AssertCodesEvalToSameValue(t, + `["t", "h", "i", "s", " ", "i", "s", " ", "a", " ", "t", "e", "s", "t"]`, + `//seq.split("","this is a test")`) + + // As https://github.com/arr-ai/arrai/issues/268, `{}`, `[]` and `""` means empty set in arr.ai + // And the intent for //seq.split is to return an array, so it should be expressed as such. + // `""` -> empty string, `[]` -> empty array and `{}` -> empty set + AssertCodesEvalToSameValue(t, `[]`, `//seq.split("","") `) + + AssertCodesEvalToSameValue(t, `[""]`, `//seq.split(",","") `) + + AssertCodeErrors(t, `//seq.split(1,"ABC")`, "") +} + +func TestArraySplit(t *testing.T) { //nolint:dupl + t.Parallel() + AssertCodesEvalToSameValue(t, `[['A'], ['B']]`, + `//seq.split([],['A', 'B'])`) + AssertCodesEvalToSameValue(t, `[[], ['B']]`, + `//seq.split(['A'],['A', 'B'])`) + AssertCodesEvalToSameValue(t, `[['A'], []]`, + `//seq.split(['B'],['A', 'B'])`) + AssertCodesEvalToSameValue(t, `[[],['B'],['C', 'D', 'E']]`, + `//seq.split(['A'],['A', 'B', 'A', 'C', 'D', 'E'])`) + + AssertCodesEvalToSameValue(t, `[['B'],['C'], ['D', 'E']]`, + `//seq.split(['A'],['B', 'A', 'C', 'A', 'D', 'E'])`) + AssertCodesEvalToSameValue(t, `[['A', 'B', 'C']]`, + `//seq.split(['F'],['A', 'B', 'C'])`) + AssertCodesEvalToSameValue(t, `[[['A','B'], ['C','D'], ['E','F']]]`, + `//seq.split([['F','F']],[['A','B'], ['C','D'], ['E','F']])`) + AssertCodesEvalToSameValue(t, `[[['A','B']], [['E','F']]]`, + `//seq.split([['C','D']],[['A','B'], ['C','D'], ['E','F']])`) + AssertCodesEvalToSameValue(t, `[[['A','B']], [['E','F'],['G']]]`, + `//seq.split([['C','D']],[['A','B'], ['C','D'], ['E','F'], ['G']])`) + + AssertCodesEvalToSameValue(t, `[[['A','B']], [['G']]]`, + `//seq.split([['C','D'],['E','F']],[['A','B'], ['C','D'], ['E','F'], ['G']])`) + AssertCodesEvalToSameValue(t, `[[['A','B'], ['C','D'], ['E','F'], ['G']]]`, + `//seq.split([['C','D'],['E','T']],[['A','B'], ['C','D'], ['E','F'], ['G']])`) + + AssertCodesEvalToSameValue(t, `[[],[2,3]]`, `//seq.split([1],[1, 2, 3])`) + AssertCodesEvalToSameValue(t, `[[1,2],[]]`, `//seq.split([3],[1, 2, 3])`) + AssertCodesEvalToSameValue(t, `[[1],[3]]`, `//seq.split([2],[1, 2, 3])`) + AssertCodesEvalToSameValue(t, `[[[1,2]],[[5,6]]]`, `//seq.split([[3,4]],[[1,2],[3,4],[5,6]])`) + AssertCodesEvalToSameValue(t, `[[[1,2]], [[3,4]]]`, `//seq.split([],[[1,2], [3,4]])`) + AssertCodesEvalToSameValue(t, `[['A'],['B'],['A']]`, `//seq.split([],['A', 'B', 'A'])`) + AssertCodesEvalToSameValue(t, `[]`, `//seq.split([],[])`) + AssertCodesEvalToSameValue(t, `[[]]`, `//seq.split(['A'],[])`) + + AssertCodeErrors(t, `//seq.split(1,[1,2,3])`, "") + AssertCodeErrors(t, `//seq.split('A',['A','B'])`, "") +} + +func TestBytesSplit(t *testing.T) { + t.Parallel() + // hello bytes - 104 101 108 108 111 + AssertCodesEvalToSameValue(t, + `[//unicode.utf8.encode('y'),//unicode.utf8.encode('e'),//unicode.utf8.encode('s')]`, + `//seq.split(//unicode.utf8.encode(""),//unicode.utf8.encode("yes"))`) + AssertCodesEvalToSameValue(t, + `[//unicode.utf8.encode("this"), //unicode.utf8.encode("is"),`+ + ` //unicode.utf8.encode("a"), //unicode.utf8.encode("test")]`, + `//seq.split(//unicode.utf8.encode(" "),//unicode.utf8.encode("this is a test"))`) + AssertCodesEvalToSameValue(t, + `[//unicode.utf8.encode("this is a test")]`, + `//seq.split(//unicode.utf8.encode("get"),//unicode.utf8.encode("this is a test"))`) + + AssertCodesEvalToSameValue(t, `[[], //unicode.utf8.encode("B"), //unicode.utf8.encode("CD")]`, + `//seq.split(//unicode.utf8.encode("A"),//unicode.utf8.encode("ABACD"))`) + AssertCodesEvalToSameValue(t, `[//unicode.utf8.encode("ABAC"), []]`, + `//seq.split(//unicode.utf8.encode("D"),//unicode.utf8.encode("ABACD"))`) + + AssertCodesEvalToSameValue(t, + `//unicode.utf8.encode("")`, + `//seq.split(//unicode.utf8.encode(""),//unicode.utf8.encode(""))`) + AssertCodesEvalToSameValue(t, + `[//unicode.utf8.encode("A"),//unicode.utf8.encode("B"),//unicode.utf8.encode("C")]`, + `//seq.split(//unicode.utf8.encode(""),//unicode.utf8.encode("ABC"))`) + AssertCodesEvalToSameValue(t, + `[//unicode.utf8.encode("")]`, + `//seq.split(//unicode.utf8.encode(","),//unicode.utf8.encode(""))`) +} diff --git a/syntax/std_seq_sub_test.go b/syntax/std_seq_sub_test.go new file mode 100644 index 00000000..9107d111 --- /dev/null +++ b/syntax/std_seq_sub_test.go @@ -0,0 +1,75 @@ +package syntax + +import "testing" + +func TestStrSub(t *testing.T) { + t.Parallel() + AssertCodesEvalToSameValue(t, `" BC"`, `//seq.sub( "A", " ","ABC")`) + AssertCodesEvalToSameValue(t, `"this is not a test"`, `//seq.sub("aaa", "is", "this is not a test")`) + AssertCodesEvalToSameValue(t, `"this is a test"`, `//seq.sub("is not", "is", "this is not a test")`) + AssertCodesEvalToSameValue(t, `"this is a test"`, `//seq.sub("not ", "","this is not a test")`) + AssertCodesEvalToSameValue(t, `"t1his is not1 a t1est1"`, `//seq.sub("t", "t1","this is not a test")`) + AssertCodesEvalToSameValue(t, `"this is still a test"`, + `//seq.sub( "doesn't matter", "hello there","this is still a test")`) + AssertCodeErrors(t, `//seq.sub("hello there", "test", 1)`, "") + ///////////////// + AssertCodesEvalToSameValue(t, `""`, `//seq.sub( "","", "")`) + AssertCodesEvalToSameValue(t, `"A"`, `//seq.sub( "","A", "")`) + AssertCodesEvalToSameValue(t, `""`, `//seq.sub( "A","", "")`) + AssertCodesEvalToSameValue(t, `"ABC"`, `//seq.sub( "","", "ABC")`) + AssertCodesEvalToSameValue(t, `"EAEBECE"`, `//seq.sub( "", "E","ABC")`) + AssertCodesEvalToSameValue(t, `"BC"`, `//seq.sub( "A", "","ABC")`) + + AssertCodeErrors(t, `//seq.sub(1,'B','BCD')`, "") +} + +func TestArraySub(t *testing.T) { + t.Parallel() + AssertCodesEvalToSameValue(t, `['T', 'B', 'T', 'C', 'D', 'E']`, + `//seq.sub(['A'], ['T'], ['A', 'B', 'A', 'C', 'D', 'E'])`) + AssertCodesEvalToSameValue(t, `[['A', 'B'], ['T','C'],['A','D']]`, + `//seq.sub([['A','C']], [['T','C']], [['A', 'B'], ['A','C'],['A','D']])`) + AssertCodesEvalToSameValue(t, `[2, 2, 3]`, `//seq.sub([1], [2], [1, 2, 3])`) + AssertCodesEvalToSameValue(t, `[[1,1], [4,4], [3,3]]`, `//seq.sub([[2,2]], [[4,4]], [[1,1], [2,2], [3,3]])`) + + AssertCodeErrors(t, `//seq.sub(1,'B',[1,2,3])`, "") + AssertCodeErrors(t, `//seq.sub(1,'B',['A','B','C'])`, "") +} + +func TestArraySubEdgeCases(t *testing.T) { + AssertCodesEvalToSameValue(t, `[]`, `//seq.sub( [],[], [])`) + AssertCodesEvalToSameValue(t, `[1]`, `//seq.sub( [],[1], [])`) + AssertCodesEvalToSameValue(t, `[1,2]`, `//seq.sub( [],[1,2], [])`) + AssertCodesEvalToSameValue(t, `[[1,2]]`, `//seq.sub( [],[[1,2]], [])`) + AssertCodesEvalToSameValue(t, `[]`, `//seq.sub( [1],[], [])`) + AssertCodesEvalToSameValue(t, `[1,2,3]`, `//seq.sub( [],[], [1,2,3])`) + AssertCodesEvalToSameValue(t, `[[1,2],3]`, `//seq.sub( [],[], [[1,2],3])`) + AssertCodesEvalToSameValue(t, `[4,1,4,2,4,3,4]`, `//seq.sub( [], [4],[1,2,3])`) + AssertCodesEvalToSameValue(t, `[4,[1,2],4,[3,4],4]`, `//seq.sub( [], [4],[[1,2],[3,4]])`) + AssertCodesEvalToSameValue(t, `[[4],[1,2],[4],[3,4],[4]]`, `//seq.sub( [], [[4]],[[1,2],[3,4]])`) + AssertCodesEvalToSameValue(t, `[1,3]`, `//seq.sub([2], [],[1,2,3])`) +} + +func TestBytesSub(t *testing.T) { + t.Parallel() + // hello bytes - 104 101 108 108 111 + AssertCodesEvalToSameValue(t, `//unicode.utf8.encode('oello')`, + `//seq.sub({ |@, @byte| (0, 104)},{ |@, @byte| (0, 111)},//unicode.utf8.encode('hello'))`) + AssertCodesEvalToSameValue(t, `//unicode.utf8.encode('hehho')`, + `//seq.sub({ |@, @byte| (0, 108)},{ |@, @byte| (0, 104)},//unicode.utf8.encode('hello'))`) + /////////////////// + AssertCodesEvalToSameValue(t, `//unicode.utf8.encode('')`, + `//seq.sub(//unicode.utf8.encode(''),//unicode.utf8.encode(''),//unicode.utf8.encode(''))`) + AssertCodesEvalToSameValue(t, `//unicode.utf8.encode('')`, + `//seq.sub(//unicode.utf8.encode('a'),//unicode.utf8.encode(''),//unicode.utf8.encode(''))`) + AssertCodesEvalToSameValue(t, `//unicode.utf8.encode('a')`, + `//seq.sub(//unicode.utf8.encode(''),//unicode.utf8.encode('a'),//unicode.utf8.encode(''))`) + + AssertCodesEvalToSameValue(t, `//unicode.utf8.encode('hello')`, + `//seq.sub(//unicode.utf8.encode(''),//unicode.utf8.encode(''),//unicode.utf8.encode('hello'))`) + AssertCodesEvalToSameValue(t, `//unicode.utf8.encode('ello')`, + `//seq.sub(//unicode.utf8.encode('h'),//unicode.utf8.encode(''),//unicode.utf8.encode('hello'))`) + + AssertCodesEvalToSameValue(t, `//unicode.utf8.encode('thtetltltot')`, + `//seq.sub(//unicode.utf8.encode(''),//unicode.utf8.encode('t'),//unicode.utf8.encode('hello'))`) +} diff --git a/syntax/std_seq_suffix_test.go b/syntax/std_seq_suffix_test.go new file mode 100644 index 00000000..dc1b4a44 --- /dev/null +++ b/syntax/std_seq_suffix_test.go @@ -0,0 +1,60 @@ +package syntax + +import "testing" + +func TestStrSuffix(t *testing.T) { + t.Parallel() + AssertCodesEvalToSameValue(t, `true`, `//seq.has_suffix("ABCDE","ABCDE")`) + + AssertCodesEvalToSameValue(t, `true`, `//seq.has_suffix("E","ABCDE")`) + AssertCodesEvalToSameValue(t, `true`, `//seq.has_suffix("DE","ABCDE")`) + AssertCodesEvalToSameValue(t, `false`, `//seq.has_suffix("CD", "ABCDE")`) + AssertCodesEvalToSameValue(t, `false`, `//seq.has_suffix("D","ABCDE")`) + + AssertCodesEvalToSameValue(t, `false`, `//seq.has_suffix("D","")`) + AssertCodesEvalToSameValue(t, `true`, `//seq.has_suffix("","")`) + AssertCodesEvalToSameValue(t, `true`, `//seq.has_suffix("","ABCDE")`) + + AssertCodeErrors(t, `//seq.has_suffix(1,"ABC")`, "") +} + +func TestArraySuffix(t *testing.T) { + t.Parallel() + AssertCodesEvalToSameValue(t, `true`, `//seq.has_suffix(['A','B'],['A','B'])`) + + AssertCodesEvalToSameValue(t, `true`, `//seq.has_suffix(['E'],['A','B','C','D','E'])`) + AssertCodesEvalToSameValue(t, `true`, `//seq.has_suffix(['E'],['A','B','C','D','E'])`) + AssertCodesEvalToSameValue(t, `true`, `//seq.has_suffix( ['D','E'],['A','B','C','D','E'])`) + + AssertCodesEvalToSameValue(t, `false`, `//seq.has_suffix(['D'],['A','B','C','D','E'])`) + AssertCodesEvalToSameValue(t, `false`, `//seq.has_suffix(['C','D'],['A','B','C','D','E'])`) + AssertCodesEvalToSameValue(t, `false`, `//seq.has_suffix(['A','B','C','D','E','F'],['A','B','C','D','E'])`) + + AssertCodesEvalToSameValue(t, `true`, `//seq.has_suffix([3, 4],[1 ,2, 3, 4])`) + AssertCodesEvalToSameValue(t, `true`, `//seq.has_suffix([3, 4],[[1 ,2], 3, 4])`) + AssertCodesEvalToSameValue(t, `true`, `//seq.has_suffix([3, 4],[3, 4])`) + AssertCodesEvalToSameValue(t, `true`, `//seq.has_suffix([[3, 4]],[[1 ,2], [3, 4]])`) + + AssertCodesEvalToSameValue(t, `true`, `//seq.has_suffix([],['A','B','C','D','E'])`) + AssertCodesEvalToSameValue(t, `true`, `//seq.has_suffix([], [])`) + AssertCodesEvalToSameValue(t, `false`, `//seq.has_suffix(['D','E'],[])`) + + AssertCodeErrors(t, `//seq.has_suffix(1,[1,2])`, "") + AssertCodeErrors(t, `//seq.has_suffix('A',['A','B'])`, "") +} + +func TestBytesSuffix(t *testing.T) { + t.Parallel() + AssertCodesEvalToSameValue(t, `true`, + `//seq.has_suffix(//unicode.utf8.encode('hello'),//unicode.utf8.encode('hello'))`) + + AssertCodesEvalToSameValue(t, `true`, `//seq.has_suffix(//unicode.utf8.encode('o'),//unicode.utf8.encode('hello'))`) + AssertCodesEvalToSameValue(t, `true`, `//seq.has_suffix(//unicode.utf8.encode('lo'),//unicode.utf8.encode('hello'))`) + AssertCodesEvalToSameValue(t, `false`, `//seq.has_suffix(//unicode.utf8.encode('e'),//unicode.utf8.encode('hello'))`) + AssertCodesEvalToSameValue(t, `false`, `//seq.has_suffix(//unicode.utf8.encode('ell'),//unicode.utf8.encode('hello'))`) + AssertCodesEvalToSameValue(t, `false`, `//seq.has_suffix(//unicode.utf8.encode('h'),//unicode.utf8.encode('hello'))`) + + AssertCodesEvalToSameValue(t, `true`, `//seq.has_suffix(//unicode.utf8.encode(''),//unicode.utf8.encode(''))`) + AssertCodesEvalToSameValue(t, `false`, `//seq.has_suffix(//unicode.utf8.encode('o'),//unicode.utf8.encode(''))`) + AssertCodesEvalToSameValue(t, `true`, `//seq.has_suffix(//unicode.utf8.encode(''),//unicode.utf8.encode('hello'))`) +} diff --git a/syntax/std_seq_test.go b/syntax/std_seq_test.go index b171cd84..21ef808e 100644 --- a/syntax/std_seq_test.go +++ b/syntax/std_seq_test.go @@ -1,6 +1,8 @@ package syntax -import "testing" +import ( + "testing" +) func TestSeqConcat(t *testing.T) { t.Parallel() diff --git a/syntax/std_str.go b/syntax/std_str.go index c98fc2f9..d6833073 100644 --- a/syntax/std_str.go +++ b/syntax/std_str.go @@ -71,47 +71,11 @@ var ( func stdStr() rel.Attr { return rel.NewTupleAttr("str", - createNestedFuncAttr("contains", 2, func(args ...rel.Value) rel.Value { - return rel.NewBool(strings.Contains(mustAsString(args[0]), mustAsString(args[1]))) - }), rel.NewAttr("expand", stdStrExpand), - createNestedFuncAttr("has_prefix", 2, func(args ...rel.Value) rel.Value { - return rel.NewBool(strings.HasPrefix(mustAsString(args[0]), mustAsString(args[1]))) - }), - createNestedFuncAttr("has_suffix", 2, func(args ...rel.Value) rel.Value { - return rel.NewBool(strings.HasPrefix(mustAsString(args[0]), mustAsString(args[1]))) - }), - createNestedFuncAttr("join", 2, func(args ...rel.Value) rel.Value { - strs := args[0].(rel.Set) - toJoin := make([]string, 0, strs.Count()) - for i, ok := strs.(rel.Set).ArrayEnumerator(); ok && i.MoveNext(); { - toJoin = append(toJoin, mustAsString(i.Current())) - } - return rel.NewString([]rune(strings.Join(toJoin, mustAsString(args[1])))) - }), createNestedFuncAttr("lower", 1, func(args ...rel.Value) rel.Value { return rel.NewString([]rune(strings.ToLower(mustAsString(args[0])))) }), rel.NewAttr("repr", stdStrRepr), - createNestedFuncAttr("split", 2, func(args ...rel.Value) rel.Value { - splitted := strings.Split(mustAsString(args[0]), mustAsString(args[1])) - vals := make([]rel.Value, 0, len(splitted)) - for _, s := range splitted { - vals = append(vals, rel.NewString([]rune(s))) - } - return rel.NewArray(vals...) - }), - createNestedFuncAttr("sub", 3, func(args ...rel.Value) rel.Value { - return rel.NewString( - []rune( - strings.ReplaceAll( - mustAsString(args[0]), - mustAsString(args[1]), - mustAsString(args[2]), - ), - ), - ) - }), createNestedFuncAttr("title", 1, func(args ...rel.Value) rel.Value { return rel.NewString([]rune(strings.Title(mustAsString(args[0])))) }), diff --git a/syntax/std_str_test.go b/syntax/std_str_test.go index 878623bf..eb39fcd7 100644 --- a/syntax/std_str_test.go +++ b/syntax/std_str_test.go @@ -4,31 +4,6 @@ import ( "testing" ) -func TestStrSub(t *testing.T) { - t.Parallel() - AssertCodesEvalToSameValue(t, - `"this is a test"`, - `//str.sub("this is not a test", "is not", "is")`) - AssertCodesEvalToSameValue(t, - `"this is a test"`, - `//str.sub("this is not a test", "not ", "")`) - AssertCodesEvalToSameValue(t, - `"this is still a test"`, - `//str.sub("this is still a test", "doesn't matter", "hello there")`) - AssertCodeErrors(t, `//str.sub("hello there", "test", 1)`, "") -} - -func TestStrSplit(t *testing.T) { - t.Parallel() - AssertCodesEvalToSameValue(t, - `["t", "h", "i", "s", " ", "i", "s", " ", "a", " ", "t", "e", "s", "t"]`, - `//str.split("this is a test", "")`) - AssertCodesEvalToSameValue(t, `["this", "is", "a", "test"]`, `//str.split("this is a test", " ") `) - AssertCodesEvalToSameValue(t, `["this is a test"] `, `//str.split("this is a test", ",") `) - AssertCodesEvalToSameValue(t, `["th", " ", " a test"] `, `//str.split("this is a test", "is")`) - AssertCodeErrors(t, `//str.split("this is a test", 1)`, "") -} - func TestStrLower(t *testing.T) { t.Parallel() AssertCodesEvalToSameValue(t, `"" `, `//str.lower("") `) @@ -55,20 +30,3 @@ func TestStrTitle(t *testing.T) { AssertCodesEvalToSameValue(t, `"This Is A Test"`, `//str.title("this is a test")`) AssertCodeErrors(t, `//str.title(123)`, "") } - -func TestStrContains(t *testing.T) { - t.Parallel() - AssertCodesEvalToSameValue(t, `true `, `//str.contains("this is a test", "") `) - AssertCodesEvalToSameValue(t, `true `, `//str.contains("this is a test", "is a test") `) - AssertCodesEvalToSameValue(t, `false`, `//str.contains("this is a test", "is not a test")`) - AssertCodeErrors(t, `//str.contains(123, 124)`, "") -} - -func TestStrJoin(t *testing.T) { - t.Parallel() - AssertCodesEvalToSameValue(t, `"" `, `//str.join([], ",") `) - AssertCodesEvalToSameValue(t, `",," `, `//str.join(["", "", ""], ",") `) - AssertCodesEvalToSameValue(t, `"this is a test" `, `//str.join(["this", "is", "a", "test"], " ")`) - AssertCodesEvalToSameValue(t, `"this" `, `//str.join(["this"], ",") `) - AssertCodeErrors(t, `//str.join("this", 2)`, "") -} diff --git a/syntax/test_helpers.go b/syntax/test_helpers.go index ae051fc7..f44f9eb9 100644 --- a/syntax/test_helpers.go +++ b/syntax/test_helpers.go @@ -2,6 +2,7 @@ package syntax import ( "errors" + "fmt" "testing" "github.com/arr-ai/arrai/rel" @@ -84,6 +85,9 @@ func AssertCodeErrors(t *testing.T, code, errString string) bool { if assert.NoError(t, err, "parsing code: %s", code) { codeExpr := pc.CompileExpr(ast) _, err := codeExpr.Eval(rel.EmptyScope) + if err == nil { + panic(fmt.Sprintf("the code `%s` didn't generate any error", code)) + } assert.EqualError(t, errors.New(err.Error()[:len(errString)]), errString) } return false