Skip to content

Commit

Permalink
index creation
Browse files Browse the repository at this point in the history
  • Loading branch information
jordanlewis committed May 16, 2024
1 parent 39a713a commit 510bbc3
Show file tree
Hide file tree
Showing 24 changed files with 347 additions and 16 deletions.
1 change: 1 addition & 0 deletions pkg/sql/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -590,6 +590,7 @@ go_library(
"//pkg/util/uint128",
"//pkg/util/ulid",
"//pkg/util/uuid",
"//pkg/util/vector/vectorpb",
"@com_github_cockroachdb_apd_v3//:apd",
"@com_github_cockroachdb_errors//:errors",
"@com_github_cockroachdb_errors//hintdetail",
Expand Down
14 changes: 12 additions & 2 deletions pkg/sql/backfill.go
Original file line number Diff line number Diff line change
Expand Up @@ -1806,12 +1806,17 @@ func countExpectedRowsForInvertedIndex(
ctx context.Context, txn descs.Txn,
) error {
var stmt string
geoConfig := idx.GetGeoConfig()
if geoConfig.IsEmpty() {
if geoConfig := idx.GetGeoConfig(); geoConfig.IsEmpty() {
stmt = fmt.Sprintf(
`SELECT coalesce(sum_int(crdb_internal.num_inverted_index_entries(%s, %d)), 0) FROM [%d AS t]`,
colNameOrExpr, idx.GetVersion(), desc.GetID(),
)
} else if vectorConfig := idx.GetVectorConfig(); vectorConfig.IsEmpty() {
nLists := vectorConfig.GetIvfFlat().NLists
stmt = fmt.Sprintf(
`SELECT coalesce(sum_int(crdb_internal.num_inverted_index_entries(%s, %d)), 0) + least(%d, count(colNameOrExpr)) FROM [%d AS t]`,
colNameOrExpr, idx.GetVersion(), nLists, desc.GetID(),
)
} else {
stmt = fmt.Sprintf(
`SELECT coalesce(sum_int(crdb_internal.num_geo_inverted_index_entries(%d, %d, %s)), 0) FROM [%d AS t]`,
Expand All @@ -1832,6 +1837,11 @@ func countExpectedRowsForInvertedIndex(
return errors.New("failed to verify inverted index count")
}
expectedCount = int64(tree.MustBeDInt(row[0]))
// For ivf indexes, the expected count is the sum of the number of
// entries in the inverted index and the number of rows in the table.
if len(row) > 1 {
expectedCount += int64(tree.MustBeDInt(row[1]))
}
return nil
})
}); err != nil {
Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/catalog/catpb/enum.proto
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,7 @@ enum InvertedIndexColumnKind {
// TRIGRAM is the trigram kind of inverted index column. It's only valid on
// text columns.
TRIGRAM = 1;
// IVFFLAT is the IVFFLAT kind of inverted index column. It's only valid on
// vector columns.
IVFFLAT = 2;
}
2 changes: 1 addition & 1 deletion pkg/sql/catalog/colinfo/col_type_info.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ func ColumnTypeIsInvertedIndexable(t *types.T) bool {
switch t.Family() {
case types.ArrayFamily:
return t.ArrayContents().Family() != types.RefCursorFamily
case types.JsonFamily, types.StringFamily:
case types.JsonFamily, types.StringFamily, types.PGVectorFamily:
return true
}
return ColumnTypeIsOnlyInvertedIndexable(t)
Expand Down
19 changes: 18 additions & 1 deletion pkg/sql/catalog/descs/hydrate.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,24 @@ func hydrate(
if !isHydratable(desc) {
return nil
}
return typedesc.HydrateTypesInDescriptor(ctx, desc, typeLookupFunc)
err := typedesc.HydrateTypesInDescriptor(ctx, desc, typeLookupFunc)
if err != nil {
return err
}
if tableDesc, ok := desc.(catalog.TableDescriptor); ok {
for _, idx := range tableDesc.NonDropIndexes() {
vectorConfig := idx.GetVectorConfig()
if vectorConfig.IsEmpty() {
continue
}
ivfFlat := vectorConfig.GetIvfFlat()
if ivfFlat == nil {
continue
}
ivfFlat.Centroids = nil // scan centroids
}
}
return nil
}

// makeTypeLookupFuncForHydration builds a typedesc.TypeLookupFunc for the
Expand Down
5 changes: 5 additions & 0 deletions pkg/sql/colenc/inverted.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ func (b *BatchEncoder) encodeInvertedSecondaryIndex(
vec = b.b.ColVecs()[i]
}
indexGeoConfig := index.GetGeoConfig()
indexVectorConfig := index.GetVectorConfig()
for row := 0; row < b.count; row++ {
if kys[row] == nil {
continue
Expand All @@ -67,6 +68,10 @@ func (b *BatchEncoder) encodeInvertedSecondaryIndex(
if keys, err = rowenc.EncodeGeoInvertedIndexTableKeys(ctx, val, kys[row], indexGeoConfig); err != nil {
return err
}
} else if !indexVectorConfig.IsEmpty() {
if keys, err = rowenc.EncodeVectorInvertedIndexTableKeys(ctx, val, kys[row], indexVectorConfig); err != nil {
return err
}
} else {
if keys, err = rowenc.EncodeInvertedIndexTableKeys(val, kys[row], index.GetVersion()); err != nil {
return err
Expand Down
27 changes: 27 additions & 0 deletions pkg/sql/create_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util/errorutil/unimplemented"
"github.com/cockroachdb/cockroach/pkg/util/log/eventpb"
"github.com/cockroachdb/cockroach/pkg/util/vector/vectorpb"
"github.com/cockroachdb/errors"
)

Expand Down Expand Up @@ -437,6 +438,32 @@ func populateInvertedIndexDescriptor(
default:
return newUndefinedOpclassError(invCol.OpClass)
}
case types.PGVectorFamily:
width := column.GetType().Width()
if width == 0 {
return pgerror.New(pgcode.InvalidObjectDefinition, "column does not have dimensions")
}
indexDesc.VectorConfig = vectorpb.Config{
IndexType: &vectorpb.Config_IvfFlat{
IvfFlat: &vectorpb.IVFFlatConfig{
// lists defaults to 100.
NLists: 100,
},
},
Dimensions: width,
}
indexDesc.InvertedColumnKinds[0] = catpb.InvertedIndexColumnKind_IVFFLAT
switch invCol.OpClass {
// The default operator class is "vector_l2_ops".
case "vector_l2_ops", "":
indexDesc.VectorConfig.DistanceFunction = vectorpb.DistanceFunction_L2
case "vector_ip_ops":
indexDesc.VectorConfig.DistanceFunction = vectorpb.DistanceFunction_IP
case "vector_cosine_ops":
indexDesc.VectorConfig.DistanceFunction = vectorpb.DistanceFunction_COSINE
default:
return newUndefinedOpclassError(invCol.OpClass)
}
default:
return tabledesc.NewInvalidInvertedColumnError(column.GetName(), column.GetType().Name())
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/parser/sql.y
Original file line number Diff line number Diff line change
Expand Up @@ -11387,7 +11387,7 @@ opt_index_access_method:
{
/* FORCE DOC */
switch $2 {
case "gin", "gist":
case "gin", "gist", "ivfflat":
$$.val = true
case "btree":
$$.val = false
Expand Down
2 changes: 2 additions & 0 deletions pkg/sql/rowenc/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ go_library(
"//pkg/util/trigram",
"//pkg/util/tsearch",
"//pkg/util/unique",
"//pkg/util/vector",
"//pkg/util/vector/vectorpb",
"@com_github_cockroachdb_errors//:errors",
"@com_github_cockroachdb_redact//:redact",
],
Expand Down
35 changes: 33 additions & 2 deletions pkg/sql/rowenc/index_encoding.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/trigram"
"github.com/cockroachdb/cockroach/pkg/util/tsearch"
"github.com/cockroachdb/cockroach/pkg/util/unique"
"github.com/cockroachdb/cockroach/pkg/util/vector"
"github.com/cockroachdb/cockroach/pkg/util/vector/vectorpb"
"github.com/cockroachdb/errors"
)

Expand Down Expand Up @@ -624,10 +626,13 @@ func EncodeInvertedIndexKeys(
} else {
val = tree.DNull
}
indexGeoConfig := index.GetGeoConfig()
if !indexGeoConfig.IsEmpty() {
config := index.GetVectorConfig()
if indexGeoConfig := index.GetGeoConfig(); !indexGeoConfig.IsEmpty() {
return EncodeGeoInvertedIndexTableKeys(ctx, val, keyPrefix, indexGeoConfig)
} else if vectorConfig := config; !vectorConfig.IsEmpty() {
return EncodeVectorInvertedIndexTableKeys(ctx, val, keyPrefix, vectorConfig)
}

return EncodeInvertedIndexTableKeys(val, keyPrefix, index.GetVersion())
}

Expand Down Expand Up @@ -1087,6 +1092,32 @@ func EncodeGeoInvertedIndexTableKeys(
}
}

// EncodeVectorInvertedIndexTableKeys is the equivalent of EncodeInvertedIndexTableKeys
// for vectors.
func EncodeVectorInvertedIndexTableKeys(_ context.Context, val tree.Datum, keyPrefix []byte,
vectorConfig vectorpb.Config) ([][]byte, error) {
if val == tree.DNull {
return nil, nil
}
vec := tree.MustBeDPGVector(val).T
centroid, err := vector.GetClosestCentroid(vec, vectorConfig)
if err != nil {
return nil, err
}
if vectorConfig.Dimensions != int32(len(centroid)) {
return nil, errors.Errorf("centroid has %d dimensions, expected %d", len(centroid), vectorConfig.Dimensions)
}
if vectorConfig.Dimensions != int32(len(vec)) {
return nil, errors.Errorf("vector has %d dimensions, expected %d", len(vec), vectorConfig.Dimensions)
}
// The buffer will be used to encode the centroid and the input vector. 4 bytes per
// float32 times 2 vectors.
b := make([]byte, 0, len(keyPrefix)+int(1+4*2*vectorConfig.Dimensions))
b = append(b, keyPrefix...)
encoding.EncodeIvfCentroidVector(b, centroid, vec)
return [][]byte{b}, nil
}

func encodeGeoKeys(
inKey []byte, geoKeys []geoindex.Key, bbox geopb.BoundingBox,
) (keys [][]byte, err error) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ go_library(
"//pkg/sql/types",
"//pkg/util/errorutil/unimplemented",
"//pkg/util/protoutil",
"//pkg/util/vector/vectorpb",
"@com_github_cockroachdb_errors//:errors",
"@com_github_cockroachdb_redact//:redact",
"@com_github_lib_pq//oid",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util/errorutil/unimplemented"
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
"github.com/cockroachdb/cockroach/pkg/util/vector/vectorpb"
"github.com/cockroachdb/errors"
)

Expand Down Expand Up @@ -348,6 +349,33 @@ func processColNodeType(
}
indexSpec.secondary.GeoConfig = geoindex.DefaultGeographyIndexConfig()
b.IncrementSchemaChangeIndexCounter("geography_inverted")
case types.PGVectorFamily:
width := columnType.Type.Width()
if width == 0 {
panic(pgerror.New(pgcode.InvalidObjectDefinition, "column does not have dimensions"))
}
indexSpec.secondary.VectorConfig = &vectorpb.Config{
IndexType: &vectorpb.Config_IvfFlat{
IvfFlat: &vectorpb.IVFFlatConfig{
// lists defaults to 100.
NLists: 100,
},
},
Dimensions: width,
}
invertedKind = catpb.InvertedIndexColumnKind_IVFFLAT
switch columnNode.OpClass {
// The default operator class is "vector_l2_ops".
case "vector_l2_ops", "":
indexSpec.secondary.VectorConfig.DistanceFunction = vectorpb.DistanceFunction_L2
case "vector_ip_ops":
indexSpec.secondary.VectorConfig.DistanceFunction = vectorpb.DistanceFunction_IP
case "vector_cosine_ops":
indexSpec.secondary.VectorConfig.DistanceFunction = vectorpb.DistanceFunction_COSINE
default:
panic(newUndefinedOpclassError(columnNode.OpClass))
}
b.IncrementSchemaChangeIndexCounter("ivfflat_inverted")
case types.StringFamily:
// Check the opclass of the last column in the list, which is the column
// we're going to inverted index.
Expand Down Expand Up @@ -930,7 +958,7 @@ func maybeAddIndexPredicate(b BuildCtx, n *tree.CreateIndex, idxSpec *indexSpec)
}

// maybeApplyStorageParameters apply any storage parameters into the index spec,
// this is only used for GeoConfig today.
// this is only used for GeoConfig and VectorConfig today.
func maybeApplyStorageParameters(b BuildCtx, n *tree.CreateIndex, idxSpec *indexSpec) {
if len(n.StorageParams) == 0 {
return
Expand All @@ -939,6 +967,9 @@ func maybeApplyStorageParameters(b BuildCtx, n *tree.CreateIndex, idxSpec *index
if idxSpec.secondary.GeoConfig != nil {
dummyIndexDesc.GeoConfig = *idxSpec.secondary.GeoConfig
}
if idxSpec.secondary.VectorConfig != nil {
dummyIndexDesc.VectorConfig = *idxSpec.secondary.VectorConfig
}
storageParamSetter := &indexstorageparam.Setter{
IndexDesc: dummyIndexDesc,
}
Expand All @@ -951,6 +982,11 @@ func maybeApplyStorageParameters(b BuildCtx, n *tree.CreateIndex, idxSpec *index
} else {
idxSpec.secondary.GeoConfig = nil
}
if !dummyIndexDesc.VectorConfig.IsEmpty() {
idxSpec.secondary.VectorConfig = &dummyIndexDesc.VectorConfig
} else {
idxSpec.secondary.VectorConfig = nil
}
}

// fallbackIfRelationIsNotTable falls back if a relation element is
Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/schemachanger/scexec/scmutationexec/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,9 @@ func addNewIndexMutation(
if opIndex.GeoConfig != nil {
idx.GeoConfig = *opIndex.GeoConfig
}
if opIndex.VectorConfig != nil {
idx.VectorConfig = *opIndex.VectorConfig
}
return enqueueIndexMutation(tbl, idx, state, descpb.DescriptorMutation_ADD)
}

Expand Down
2 changes: 2 additions & 0 deletions pkg/sql/schemachanger/scpb/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ go_proto_library(
"//pkg/sql/sem/catid", # keep
"//pkg/sql/sem/semenumpb",
"//pkg/sql/types",
"//pkg/util/vector/vectorpb",
"@com_github_gogo_protobuf//gogoproto",
],
)
Expand All @@ -60,6 +61,7 @@ proto_library(
"//pkg/sql/catalog/catpb:catpb_proto",
"//pkg/sql/sem/semenumpb:semenumpb_proto",
"//pkg/sql/types:types_proto",
"//pkg/util/vector/vectorpb:vectorpb_proto",
"@com_github_gogo_protobuf//gogoproto:gogo_proto",
],
)
Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/schemachanger/scpb/elements.proto
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import "sql/catalog/catpb/function.proto";
import "sql/types/types.proto";
import "gogoproto/gogo.proto";
import "geo/geopb/config.proto";
import "util/vector/vectorpb/config.proto";

option (gogoproto.equal_all) = true;

Expand Down Expand Up @@ -262,6 +263,8 @@ message Index {
// Invisibility specifies index invisibility to the optimizer.
double invisibility = 25;

cockroach.vector.vectorindex.Config vector_config = 26 [(gogoproto.nullable) = true];

reserved 3, 4, 5, 6, 7;
}

Expand Down
16 changes: 16 additions & 0 deletions pkg/sql/sem/builtins/builtins.go
Original file line number Diff line number Diff line change
Expand Up @@ -6414,6 +6414,22 @@ SELECT
Volatility: volatility.Stable,
CalledOnNullInput: true,
},
tree.Overload{
Types: tree.ParamTypes{
{Name: "val", Typ: types.PGVector},
{Name: "version", Typ: types.Int},
},
ReturnType: tree.FixedReturnType(types.Int),
Fn: func(ctx context.Context, evalCtx *eval.Context, args tree.Datums) (tree.Datum, error) {
if args[0] == tree.DNull {
return tree.DZero, nil
}
return tree.NewDInt(tree.DInt(1)), nil
},
Info: "This function is used only by CockroachDB's developers for testing purposes.",
Volatility: volatility.Stable,
CalledOnNullInput: true,
},
),

"crdb_internal.assignment_cast": makeBuiltin(
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/sem/builtins/fixed_oids.go
Original file line number Diff line number Diff line change
Expand Up @@ -2592,6 +2592,7 @@ var builtinOidsArray = []string{
2626: `inner_product(v1: vector, v2: vector) -> float`,
2627: `vector_dims(vector: vector) -> int`,
2628: `vector_norm(vector: vector) -> float`,
2629: `crdb_internal.num_inverted_index_entries(val: vector, version: int) -> int`,
}

var builtinOidsBySignature map[string]oid.Oid
Expand Down
Loading

0 comments on commit 510bbc3

Please sign in to comment.