Skip to content

Commit

Permalink
sql,util: support encoding of JSON and Array inverted index spans for @>
Browse files Browse the repository at this point in the history
This commit adds support for encoding spans of JSON and Array inverted
keys for the purpose of evaluating contains (@>) predicates. It ensures
that the resulting spans produce correct results for all possible inputs
to the contains (@>) predicate.

Release note: None
  • Loading branch information
rytaft committed Nov 5, 2020
1 parent 91ef6f9 commit 9fd78ea
Show file tree
Hide file tree
Showing 12 changed files with 814 additions and 11 deletions.
11 changes: 11 additions & 0 deletions pkg/roachpb/data.go
Original file line number Diff line number Diff line change
Expand Up @@ -2029,6 +2029,17 @@ func (s Span) EqualValue(o Span) bool {
return s.Key.Equal(o.Key) && s.EndKey.Equal(o.EndKey)
}

// Compare returns an integer comparing two Spans lexicographically.
// The result will be 0 if s==o, -1 if s starts before o or if the starts
// are equal and s ends before o, and +1 otherwise.
func (s Span) Compare(o Span) int {
cmp := bytes.Compare(s.Key, o.Key)
if cmp == 0 {
return bytes.Compare(s.EndKey, o.EndKey)
}
return cmp
}

// Overlaps returns true WLOG for span A and B iff:
// 1. Both spans contain one key (just the start key) and they are equal; or
// 2. The span with only one key is contained inside the other span; or
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/lex/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ go_library(
"encode.go",
"experimental_keywords.go",
"keywords.go", # keep
"reserved_keywords.go",
"tokens.go", # keep
],
importpath = "github.com/cockroachdb/cockroach/pkg/sql/lex",
Expand Down
68 changes: 68 additions & 0 deletions pkg/sql/rowenc/index_encoding.go
Original file line number Diff line number Diff line change
Expand Up @@ -806,6 +806,51 @@ func EncodeInvertedIndexTableKeys(
return nil, errors.AssertionFailedf("trying to apply inverted index to unsupported type %s", datum.ResolvedType())
}

// EncodeContainingInvertedIndexSpans takes in a key prefix and returns the
// spans that must be scanned in the inverted index to evaluate a contains (@>)
// predicate with the given datum, which should be a container (either JSON
// or Array). These spans should be used to find the objects in the index that
// contain the given json or array. In other words, if we have a predicate
// x @> y, this function should use the value of y to find the spans to scan
// in an inverted index on x.
//
// The spans returned by EncodeContainingInvertedIndexSpans represent the
// intersection of unions. For example, if the returned results are:
//
// { {["a", "b"), ["c", "d")}, {["e", "f")} }
//
// the expression should be evaluated as:
//
// INTERSECTION
// / \
// UNION ["e", "f")
// / \
// ["a", "b") ["c", "d")
//
// The input inKey is prefixed to all returned keys.
func EncodeContainingInvertedIndexSpans(
evalCtx *tree.EvalContext, val tree.Datum, inKey []byte, version descpb.IndexDescriptorVersion,
) (spans []roachpb.Spans, tight bool, err error) {
if val == tree.DNull {
return nil, false, nil
}
datum := tree.UnwrapDatum(evalCtx, val)
switch val.ResolvedType().Family() {
case types.JsonFamily:
return json.EncodeContainingInvertedIndexSpans(inKey, val.(*tree.DJSON).JSON)
case types.ArrayFamily:
spans, err := encodeContainingArrayInvertedIndexSpans(val.(*tree.DArray), inKey, version)
if err != nil {
return nil, false, err
}
// Spans for array inverted indexes are always tight.
return spans, true, err
}
return nil, false, errors.AssertionFailedf(
"trying to apply inverted index to unsupported type %s", datum.ResolvedType(),
)
}

// encodeArrayInvertedIndexTableKeys returns a list of inverted index keys for
// the given input array, one per entry in the array. The input inKey is
// prefixed to all returned keys.
Expand Down Expand Up @@ -842,6 +887,29 @@ func encodeArrayInvertedIndexTableKeys(
return outKeys, nil
}

// encodeContainingArrayInvertedIndexSpans returns the spans that must be
// scanned in the inverted index to evaluate a contains (@>) predicate with
// the given array, one slice of spans per entry in the array. The input
// inKey is prefixed to all returned keys.
func encodeContainingArrayInvertedIndexSpans(
val *tree.DArray, inKey []byte, version descpb.IndexDescriptorVersion,
) (spans []roachpb.Spans, err error) {
if val.Array.Len() == 0 {
// All arrays contain the empty array.
return []roachpb.Spans{{roachpb.Span{Key: inKey}}}, nil
}

keys, err := encodeArrayInvertedIndexTableKeys(val, inKey, version)
if err != nil {
return nil, err
}
spans = make([]roachpb.Spans, len(keys))
for i, key := range keys {
spans[i] = roachpb.Spans{{Key: key}}
}
return spans, nil
}

// EncodeGeoInvertedIndexTableKeys is the equivalent of EncodeInvertedIndexTableKeys
// for Geography and Geometry.
func EncodeGeoInvertedIndexTableKeys(
Expand Down
97 changes: 97 additions & 0 deletions pkg/sql/rowenc/index_encoding_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,103 @@ func TestInvertedIndexKey(t *testing.T) {
}
}

func TestEncodeContainingArrayInvertedIndexSpans(t *testing.T) {
testCases := []struct {
value string
contains string
expected bool
}{
// This test uses EncodeInvertedIndexTableKeys and EncodeContainingInvertedIndexSpans
// to determine whether the first Array value contains the second. If the first
// value contains the second, expected is true. Otherwise it is false.
{`{}`, `{}`, true},
{`{}`, `{1}`, false},
{`{1}`, `{}`, true},
{`{1}`, `{1}`, true},
{`{1}`, `{1, 2}`, false},
{`{1, 2}`, `{1}`, true},
{`{1, 2}`, `{2}`, true},
{`{1, 2}`, `{1, 2}`, true},
{`{1, 2}`, `{1, 2, 1}`, true},
{`{1, 2, 3}`, `{1, 2, 4}`, false},
{`{1, 2, 3}`, `{}`, true},
}

evalCtx := tree.MakeTestingEvalContext(cluster.MakeTestingClusterSettings())
parseArray := func(s string) tree.Datum {
arr, _, err := tree.ParseDArrayFromString(&evalCtx, s, types.Int)
if err != nil {
t.Fatalf("Failed to parse array %s: %v", s, err)
}
return arr
}

version := descpb.EmptyArraysInInvertedIndexesVersion
for _, c := range testCases {
value, contains := parseArray(c.value), parseArray(c.contains)

// First check that evaluating `value @> contains` matches the expected
// result.
res, err := tree.ArrayContains(&evalCtx, value.(*tree.DArray), contains.(*tree.DArray))
if err != nil {
t.Fatal(err)
}
if bool(*res) != c.expected {
t.Fatalf(
"expected value of %s @> %s did not match actual value. Expected: %v. Got: %s",
c.value, c.contains, c.expected, res.String(),
)
}

// Now check that we get the same result with the inverted index spans.
keys, err := EncodeInvertedIndexTableKeys(value, nil, version)
if err != nil {
t.Fatal(err)
}

spansSlice, _, err := EncodeContainingInvertedIndexSpans(&evalCtx, contains, nil, version)
if err != nil {
t.Fatal(err)
}

// The spans returned by EncodeContainingInvertedIndexSpans represent the
// intersection of unions. So the below logic is performing a union on the
// inner loop (any span in the slice can contain any of the keys), and an
// intersection on the outer loop (all of the span slices must contain at
// least one key).
actual := true
for _, spans := range spansSlice {
found := false
for _, span := range spans {
if span.EndKey == nil {
// ContainsKey expects that the EndKey is filled in.
span.EndKey = span.Key.PrefixEnd()
}
for _, key := range keys {
if span.ContainsKey(key) {
found = true
break
}
}
if found == true {
break
}
}
actual = actual && found
}

if actual != c.expected {
if c.expected {
t.Errorf("expected %s to contain %s but it did not",
c.value, c.contains)
} else {
t.Errorf("expected %s not to contain %s but it did",
c.value, c.contains)
}
}
}
}

type arrayEncodingTest struct {
name string
datum tree.DArray
Expand Down
8 changes: 8 additions & 0 deletions pkg/util/encoding/encoding.go
Original file line number Diff line number Diff line change
Expand Up @@ -941,6 +941,14 @@ func EncodeNotNullAscending(b []byte) []byte {
return append(b, encodedNotNull)
}

// EncodeJSONObjectSpanStartAscending encodes the first possible value for JSON
// objects, which is \x00\xff. Non-objects (i.e., scalars and arrays) will
// start with \x00\x01 or \x00\x03 (see AddJSONPathTerminator and
// EncodeArrayAscending), so all objects will be ordered after them.
func EncodeJSONObjectSpanStartAscending(b []byte) []byte {
return append(b, escape, escaped00)
}

// EncodeArrayAscending encodes a value used to signify membership of an array for JSON objects.
func EncodeArrayAscending(b []byte) []byte {
return append(b, escape, escapedJSONArray)
Expand Down
3 changes: 3 additions & 0 deletions pkg/util/json/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ go_library(
deps = [
"//pkg/geo",
"//pkg/geo/geopb",
"//pkg/roachpb",
"//pkg/sql/pgwire/pgcode",
"//pkg/sql/pgwire/pgerror",
"//pkg/util/encoding",
Expand All @@ -39,8 +40,10 @@ go_test(
deps = [
"//pkg/sql/pgwire/pgerror",
"//pkg/util/encoding",
"//pkg/util/randutil",
"//pkg/util/timeutil",
"//pkg/util/unique",
"//vendor/github.com/cockroachdb/apd/v2:apd",
"//vendor/github.com/stretchr/testify/require",
],
)
11 changes: 11 additions & 0 deletions pkg/util/json/encoded.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"strconv"
"unsafe"

"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
"github.com/cockroachdb/errors"
)
Expand Down Expand Up @@ -714,6 +715,16 @@ func (j *jsonEncoded) encodeInvertedIndexKeys(b []byte) ([][]byte, error) {
return decoded.encodeInvertedIndexKeys(b)
}

func (j *jsonEncoded) encodeContainingInvertedIndexSpans(
b []byte, root bool,
) ([]roachpb.Spans, bool, error) {
decoded, err := j.decode()
if err != nil {
return nil, false, err
}
return decoded.encodeContainingInvertedIndexSpans(b, root)
}

// numInvertedIndexEntries implements the JSON interface.
func (j *jsonEncoded) numInvertedIndexEntries() (int, error) {
if j.isScalar() || j.containerLen == 0 {
Expand Down
Loading

0 comments on commit 9fd78ea

Please sign in to comment.