Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

keys: support splitting Ranges on tenant-id prefixed keys #48138

Merged
merged 3 commits into from
Apr 29, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion c-deps/libroach/encoding.cc
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,23 @@ WARN_UNUSED_RESULT bool IsInt(rocksdb::Slice* buf) {
return false;
}

WARN_UNUSED_RESULT bool DecodeTablePrefix(rocksdb::Slice* buf, uint64_t* tbl) {
WARN_UNUSED_RESULT bool StripTenantPrefix(rocksdb::Slice* buf) {
if (buf->size() == 0) {
return true;
}
// kTenantPrefix is guaranteed to be a single byte.
if ((*buf)[0] != kTenantPrefix[0]) {
return true;
}
buf->remove_prefix(1);
uint64_t tid;
return DecodeUvarint64(buf, &tid);
}

WARN_UNUSED_RESULT bool DecodeTenantAndTablePrefix(rocksdb::Slice* buf, uint64_t* tbl) {
if (!StripTenantPrefix(buf)) {
return false;
}
if (!IsInt(buf) || !DecodeUvarint64(buf, tbl)) {
return false;
}
Expand Down
14 changes: 8 additions & 6 deletions c-deps/libroach/encoding.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,11 +118,13 @@ rocksdb::Slice KeyPrefix(const rocksdb::Slice& src);
// if it is of type int.
WARN_UNUSED_RESULT bool IsInt(rocksdb::Slice* buf);

// DecodeTablePrefix validates that the given key has a table prefix. On
// completion, buf holds the remainder of the key (with the prefix removed) and
// tbl stores the decoded descriptor ID of the table.
//
// TODO(nvanbenschoten): support tenant ID prefix.
WARN_UNUSED_RESULT bool DecodeTablePrefix(rocksdb::Slice* buf, uint64_t* tbl);
// StripTenantPrefix validates that the given key has a tenant prefix. On
// completion, buf holds the remainder of the key (with the prefix removed).
WARN_UNUSED_RESULT bool StripTenantPrefix(rocksdb::Slice* buf);

// DecodeTenantAndTablePrefix validates that the given key has a tenant and
// table prefix. On completion, buf holds the remainder of the key (with the
// prefix removed) and tbl stores the decoded descriptor ID of the table.
WARN_UNUSED_RESULT bool DecodeTenantAndTablePrefix(rocksdb::Slice* buf, uint64_t* tbl);

} // namespace cockroach
32 changes: 32 additions & 0 deletions c-deps/libroach/encoding_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,35 @@ TEST(Libroach, Encoding) {
EXPECT_EQ(*it, out);
}
}

TEST(Libroach, DecodeTenantAndTablePrefix) {
// clang-format off
std::vector<std::pair<std::string, uint64_t>> cases{
{{'\x89'}, 1LLU},
{{'\xf6', '\xff'}, 255LLU},
{{'\xf7', '\xff', '\xff'}, 65535LLU},
{{'\xf8', '\xff', '\xff', '\xff'}, 16777215LLU},
{{'\xf9', '\xff', '\xff', '\xff', '\xff'}, 4294967295LLU},
{{'\xfa', '\xff', '\xff', '\xff', '\xff', '\xff'}, 1099511627775LLU},
{{'\xfb', '\xff', '\xff', '\xff', '\xff', '\xff', '\xff'}, 281474976710655LLU},
{{'\xfc', '\xff', '\xff', '\xff', '\xff', '\xff', '\xff', '\xff'}, 72057594037927935LLU},
{{'\xfd', '\xff', '\xff', '\xff', '\xff', '\xff', '\xff', '\xff', '\xff'}, 18446744073709551615LLU},
{{'\xfe', '\x8d', '\x89'}, 1LLU},
{{'\xfe', '\x8d', '\xf6', '\xff'}, 255LLU},
{{'\xfe', '\x8d', '\xf7', '\xff', '\xff'}, 65535LLU},
{{'\xfe', '\x8d', '\xf8', '\xff', '\xff', '\xff'}, 16777215LLU},
{{'\xfe', '\x8d', '\xf9', '\xff', '\xff', '\xff', '\xff'}, 4294967295LLU},
{{'\xfe', '\x8d', '\xfa', '\xff', '\xff', '\xff', '\xff', '\xff'}, 1099511627775LLU},
{{'\xfe', '\x8d', '\xfb', '\xff', '\xff', '\xff', '\xff', '\xff', '\xff'}, 281474976710655LLU},
{{'\xfe', '\x8d', '\xfc', '\xff', '\xff', '\xff', '\xff', '\xff', '\xff', '\xff'}, 72057594037927935LLU},
{{'\xfe', '\x8d', '\xfd', '\xff', '\xff', '\xff', '\xff', '\xff', '\xff', '\xff', '\xff'}, 18446744073709551615LLU},
};
// clang-format on

for (auto it = cases.begin(); it != cases.end(); it++) {
rocksdb::Slice slice(it->first);
uint64_t tbl = 0;
EXPECT_TRUE(DecodeTenantAndTablePrefix(&slice, &tbl));
EXPECT_EQ(it->second, tbl);
}
}
1 change: 1 addition & 0 deletions c-deps/libroach/keys.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ const rocksdb::Slice kLocalRangeIDPrefix("\x01\x69", 2);
const rocksdb::Slice kLocalRangeIDReplicatedInfix("\x72", 1);
const rocksdb::Slice kLocalRangeAppliedStateSuffix("\x72\x61\x73\x6b", 4);
const rocksdb::Slice kMeta2KeyMax("\x03\xff\xff", 3);
const rocksdb::Slice kTenantPrefix("\xfe", 1);
const rocksdb::Slice kMinKey("", 0);
const rocksdb::Slice kMaxKey("\xff\xff", 2);

Expand Down
46 changes: 26 additions & 20 deletions c-deps/libroach/row_counter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,40 +17,46 @@ using namespace cockroach;
int RowCounter::GetRowPrefixLength(rocksdb::Slice* key) {
size_t n = key->size();

if (!IsInt(key)) {
// Strip tenant ID prefix to get a "SQL key" starting with a table ID.
rocksdb::Slice buf = rocksdb::Slice(*key);
if (!StripTenantPrefix(&buf)) {
return 0;
}
size_t sql_n = key->size();

if (!IsInt(&buf)) {
// Not a table key, so the row prefix is the entire key.
return n;
}

// The column ID length is encoded as a varint and we take advantage of the
// fact that the column ID itself will be encoded in 0-9 bytes and thus the
// length of the column ID data will fit in a single byte.
rocksdb::Slice buf = rocksdb::Slice(*key);
buf.remove_prefix(n - 1);
// The column family ID length is encoded as a varint and we take advantage of
// the fact that the column family ID itself will be encoded in 0-9 bytes and
// thus the length of the column family ID data will fit in a single byte.
buf.remove_prefix(sql_n - 1);

if (!IsInt(&buf)) {
// The last byte is not a valid column ID suffix.
// The last byte is not a valid column family ID suffix.
return 0;
}

uint64_t col_id_len;
if (!DecodeUvarint64(&buf, &col_id_len)) {
uint64_t col_fam_id_len;
if (!DecodeUvarint64(&buf, &col_fam_id_len)) {
return 0;
}

if (col_id_len > uint64_t(n - 1)) {
// The column ID length was impossible. colIDLen is the length of
// the encoded column ID suffix. We add 1 to account for the byte
// holding the length of the encoded column ID and if that total
// (colIDLen+1) is greater than the key suffix (n == len(buf))
// then we bail. Note that we don't consider this an error because
// EnsureSafeSplitKey can be called on keys that look like table
// keys but which do not have a column ID length suffix (e.g
// by SystemConfig.ComputeSplitKey).
if (col_fam_id_len > uint64_t(sql_n - 1)) {
// The column family ID length was impossible. colFamIDLen is the length of
// the encoded column family ID suffix. We add 1 to account for the byte
// holding the length of the encoded column family ID and if that total
// (colFamIDLen+1) is greater than the key suffix (sqlN == len(sqlKey)) then
// we bail. Note that we don't consider this an error because
// EnsureSafeSplitKey can be called on keys that look like table keys but
// which do not have a column family ID length suffix (e.g by
// SystemConfig.ComputeSplitKey).
return 0;
}

return n - int(col_id_len) - 1;
return n - int(col_fam_id_len) - 1;
}

// EnsureSafeSplitKey transforms the SQL table key argumnet such that it is a
Expand Down Expand Up @@ -95,7 +101,7 @@ bool RowCounter::Count(const rocksdb::Slice& key) {
prev_key.assign(decoded_key.data(), decoded_key.size());

uint64_t tbl;
if (!DecodeTablePrefix(&decoded_key, &tbl)) {
if (!DecodeTenantAndTablePrefix(&decoded_key, &tbl)) {
return false;
}

Expand Down
2 changes: 0 additions & 2 deletions c-deps/libroach/row_counter.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@ const int MaxReservedDescID = 49;
// RowCounter counts how many distinct rows appear in the KVs that is is shown
// via `Count`. Note: the `DataSize` field of the BulkOpSummary is *not*
// populated by this and should be set separately.
//
// TODO(nvanbenschoten): support tenant ID prefix.
struct RowCounter {
RowCounter(cockroach::roachpb::BulkOpSummary* summary) : summary(summary) {}
bool Count(const rocksdb::Slice& key);
Expand Down
11 changes: 7 additions & 4 deletions pkg/keys/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,8 @@ var (
// possible suggested compaction keys for a store.
LocalStoreSuggestedCompactionsMax = LocalStoreSuggestedCompactionsMin.PrefixEnd()

// The global keyspace includes the meta{1,2}, system, and SQL keys.
// The global keyspace includes the meta{1,2}, system, system tenant SQL
// keys, and non-system tenant SQL keys.

// 1. Meta keys
//
Expand Down Expand Up @@ -261,7 +262,7 @@ var (
// TimeseriesKeyMax is the maximum value for any timeseries data.
TimeseriesKeyMax = TimeseriesPrefix.PrefixEnd()

// 3. SQL keys
// 3. System tenant SQL keys
//
// TODO(nvanbenschoten): Figure out what to do with all of these. At a
// minimum, prefix them all with "System".
Expand All @@ -287,8 +288,10 @@ var (
// UserTableDataMin is the start key of user structured data.
UserTableDataMin = SystemSQLCodec.TablePrefix(MinUserDescID)

// tenantPrefix is the prefix for all non-system tenant keys.
tenantPrefix = roachpb.Key{tenantPrefixByte}
// 4. Non-system tenant SQL keys
//
// TenantPrefix is the prefix for all non-system tenant keys.
TenantPrefix = roachpb.Key{tenantPrefixByte}
TenantTableDataMin = MakeTenantPrefix(roachpb.MinTenantID)
TenantTableDataMax = MakeTenantPrefix(roachpb.MaxTenantID).PrefixEnd()
)
Expand Down
1 change: 1 addition & 0 deletions pkg/keys/gen_cpp_keys.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ namespace cockroach {
genKey(keys.LocalRangeIDReplicatedInfix, "LocalRangeIDReplicatedInfix")
genKey(keys.LocalRangeAppliedStateSuffix, "LocalRangeAppliedStateSuffix")
genKey(keys.Meta2KeyMax, "Meta2KeyMax")
genKey(keys.TenantPrefix, "TenantPrefix")
genKey(keys.MinKey, "MinKey")
genKey(keys.MaxKey, "MaxKey")
fmt.Fprintf(f, "\n")
Expand Down
59 changes: 34 additions & 25 deletions pkg/keys/keys.go
Original file line number Diff line number Diff line change
Expand Up @@ -689,44 +689,53 @@ func DecodeTableIDIndexID(key []byte) ([]byte, uint32, uint32, error) {
// prefix of every key for the same row. (Any key with this maximal prefix is
// also guaranteed to be part of the input key's row.)
// For secondary index keys, the row prefix is defined as the entire key.
//
// TODO(nvanbenschoten): support tenant ID prefix.
func GetRowPrefixLength(key roachpb.Key) (int, error) {
n := len(key)
if encoding.PeekType(key) != encoding.Int {

// Strip tenant ID prefix to get a "SQL key" starting with a table ID.
sqlKey, _, err := DecodeTenantPrefix(key)
if err != nil {
return 0, errors.Errorf("%s: not a valid table key", key)
}
sqlN := len(sqlKey)

if encoding.PeekType(sqlKey) != encoding.Int {
// Not a table key, so the row prefix is the entire key.
return n, nil
}
// The column ID length is encoded as a varint and we take advantage of the
// fact that the column ID itself will be encoded in 0-9 bytes and thus the
// length of the column ID data will fit in a single byte.
buf := key[n-1:]
if encoding.PeekType(buf) != encoding.Int {
// The last byte is not a valid column ID suffix.
// The column family ID length is encoded as a varint and we take advantage
// of the fact that the column family ID itself will be encoded in 0-9 bytes
// and thus the length of the column family ID data will fit in a single
// byte.
colFamIDLenByte := sqlKey[sqlN-1:]
if encoding.PeekType(colFamIDLenByte) != encoding.Int {
// The last byte is not a valid column family ID suffix.
return 0, errors.Errorf("%s: not a valid table key", key)
}

// Strip off the family ID / column ID suffix from the buf. The last byte of the buf
// contains the length of the column ID suffix (which might be 0 if the buf
// does not contain a column ID suffix).
_, colIDLen, err := encoding.DecodeUvarintAscending(buf)
// Strip off the column family ID suffix from the buf. The last byte of the
// buf contains the length of the column family ID suffix, which might be 0
// if the buf does not contain a column ID suffix or if the column family is
// 0 (see the optimization in MakeFamilyKey).
_, colFamIDLen, err := encoding.DecodeUvarintAscending(colFamIDLenByte)
if err != nil {
return 0, err
}
// Note how this next comparison (and by extension the code after it) is overflow-safe. There
// are more intuitive ways of writing this that aren't as safe. See #18628.
if colIDLen > uint64(n-1) {
// The column ID length was impossible. colIDLen is the length of
// the encoded column ID suffix. We add 1 to account for the byte
// holding the length of the encoded column ID and if that total
// (colIDLen+1) is greater than the key suffix (n == len(buf))
// then we bail. Note that we don't consider this an error because
// EnsureSafeSplitKey can be called on keys that look like table
// keys but which do not have a column ID length suffix (e.g
// by SystemConfig.ComputeSplitKey).
// Note how this next comparison (and by extension the code after it) is
// overflow-safe. There are more intuitive ways of writing this that aren't
// as safe. See #18628.
if colFamIDLen > uint64(sqlN-1) {
// The column family ID length was impossible. colFamIDLen is the length
// of the encoded column family ID suffix. We add 1 to account for the
// byte holding the length of the encoded column family ID and if that
// total (colFamIDLen+1) is greater than the key suffix (sqlN ==
// len(sqlKey)) then we bail. Note that we don't consider this an error
// because EnsureSafeSplitKey can be called on keys that look like table
// keys but which do not have a column family ID length suffix (e.g by
// SystemConfig.ComputeSplitKey).
return 0, errors.Errorf("%s: malformed table key", key)
}
return len(key) - int(colIDLen) - 1, nil
return n - int(colFamIDLen) - 1, nil
}

// EnsureSafeSplitKey transforms an SQL table key such that it is a valid split key
Expand Down
69 changes: 51 additions & 18 deletions pkg/keys/keys_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -526,26 +526,53 @@ func TestMakeFamilyKey(t *testing.T) {
}

func TestEnsureSafeSplitKey(t *testing.T) {
e := func(vals ...uint64) roachpb.Key {
var k roachpb.Key
tenSysCodec := SystemSQLCodec
ten5Codec := MakeSQLCodec(roachpb.MakeTenantID(5))
encInt := encoding.EncodeUvarintAscending
encInts := func(c SQLCodec, vals ...uint64) roachpb.Key {
k := c.TenantPrefix()
for _, v := range vals {
k = encoding.EncodeUvarintAscending(k, v)
k = encInt(k, v)
}
return k
}
es := func(vals ...uint64) roachpb.Key {
return encInts(tenSysCodec, vals...)
}
e5 := func(vals ...uint64) roachpb.Key {
return encInts(ten5Codec, vals...)
}

goodData := []struct {
in roachpb.Key
expected roachpb.Key
}{
{e(1, 2, 0), e(1, 2)}, // /Table/1/2/0 -> /Table/1/2
{e(1, 2, 1), e(1)}, // /Table/1/2/1 -> /Table/1
{e(1, 2, 2), e()}, // /Table/1/2/2 -> /Table
{e(1, 2, 3, 0), e(1, 2, 3)}, // /Table/1/2/3/0 -> /Table/1/2/3
{e(1, 2, 3, 1), e(1, 2)}, // /Table/1/2/3/1 -> /Table/1/2
{e(1, 2, 200, 2), e(1, 2)}, // /Table/1/2/200/2 -> /Table/1/2
{e(1, 2, 3, 4, 1), e(1, 2, 3)}, // /Table/1/2/3/4/1 -> /Table/1/2/3
// TODO(nvanbenschoten): add test cases for tenant keys.
{es(), es()}, // Not a table key
{es(1, 2, 0), es(1, 2)}, // /Table/1/2/0 -> /Table/1/2
{es(1, 2, 1), es(1)}, // /Table/1/2/1 -> /Table/1
{es(1, 2, 2), es()}, // /Table/1/2/2 -> /Table
{es(1, 2, 3, 0), es(1, 2, 3)}, // /Table/1/2/3/0 -> /Table/1/2/3
{es(1, 2, 3, 1), es(1, 2)}, // /Table/1/2/3/1 -> /Table/1/2
{es(1, 2, 200, 2), es(1, 2)}, // /Table/1/2/200/2 -> /Table/1/2
{es(1, 2, 3, 4, 1), es(1, 2, 3)}, // /Table/1/2/3/4/1 -> /Table/1/2/3
// Same test cases, but for tenant 5.
{e5(), e5()}, // Not a table key
{e5(1, 2, 0), e5(1, 2)}, // /Tenant/5/Table/1/2/0 -> /Tenant/5/Table/1/2
{e5(1, 2, 1), e5(1)}, // /Tenant/5/Table/1/2/1 -> /Tenant/5/Table/1
{e5(1, 2, 2), e5()}, // /Tenant/5/Table/1/2/2 -> /Tenant/5/Table
{e5(1, 2, 3, 0), e5(1, 2, 3)}, // /Tenant/5/Table/1/2/3/0 -> /Tenant/5/Table/1/2/3
{e5(1, 2, 3, 1), e5(1, 2)}, // /Tenant/5/Table/1/2/3/1 -> /Tenant/5/Table/1/2
{e5(1, 2, 200, 2), e5(1, 2)}, // /Tenant/5/Table/1/2/200/2 -> /Tenant/5/Table/1/2
{e5(1, 2, 3, 4, 1), e5(1, 2, 3)}, // /Tenant/5/Table/1/2/3/4/1 -> /Tenant/5/Table/1/2/3
// Test cases using SQL encoding functions.
{MakeFamilyKey(tenSysCodec.IndexPrefix(1, 2), 0), es(1, 2)}, // /Table/1/2/0 -> /Table/1/2
{MakeFamilyKey(tenSysCodec.IndexPrefix(1, 2), 1), es(1, 2)}, // /Table/1/2/1 -> /Table/1/2
{MakeFamilyKey(encInt(tenSysCodec.IndexPrefix(1, 2), 3), 0), es(1, 2, 3)}, // /Table/1/2/3/0 -> /Table/1/2/3
{MakeFamilyKey(encInt(tenSysCodec.IndexPrefix(1, 2), 3), 1), es(1, 2, 3)}, // /Table/1/2/3/1 -> /Table/1/2/3
{MakeFamilyKey(ten5Codec.IndexPrefix(1, 2), 0), e5(1, 2)}, // /Tenant/5/Table/1/2/0 -> /Table/1/2
{MakeFamilyKey(ten5Codec.IndexPrefix(1, 2), 1), e5(1, 2)}, // /Tenant/5/Table/1/2/1 -> /Table/1/2
{MakeFamilyKey(encInt(ten5Codec.IndexPrefix(1, 2), 3), 0), e5(1, 2, 3)}, // /Tenant/5/Table/1/2/3/0 -> /Table/1/2/3
{MakeFamilyKey(encInt(ten5Codec.IndexPrefix(1, 2), 3), 1), e5(1, 2, 3)}, // /Tenant/5/Table/1/2/3/1 -> /Table/1/2/3
}
for i, d := range goodData {
out, err := EnsureSafeSplitKey(d.in)
Expand All @@ -572,18 +599,24 @@ func TestEnsureSafeSplitKey(t *testing.T) {
err string
}{
// Column ID suffix size is too large.
{e(1), "malformed table key"},
{e(1, 2), "malformed table key"},
{es(1), "malformed table key"},
{es(1, 2), "malformed table key"},
// The table ID is invalid.
{e(200)[:1], "insufficient bytes to decode uvarint value"},
{es(200)[:1], "insufficient bytes to decode uvarint value"},
// The index ID is invalid.
{e(1, 200)[:2], "insufficient bytes to decode uvarint value"},
{es(1, 200)[:2], "insufficient bytes to decode uvarint value"},
// The column ID suffix is invalid.
{e(1, 2, 200)[:3], "insufficient bytes to decode uvarint value"},
{es(1, 2, 200)[:3], "insufficient bytes to decode uvarint value"},
// Exercises a former overflow bug. We decode a uint(18446744073709551610) which, if casted
// to int carelessly, results in -6.
{encoding.EncodeVarintAscending(SystemSQLCodec.TablePrefix(999), 322434), "malformed table key"},
// TODO(nvanbenschoten): add test cases for tenant keys.
{encoding.EncodeVarintAscending(tenSysCodec.TablePrefix(999), 322434), "malformed table key"},
// Same test cases, but for tenant 5.
{e5(1), "malformed table key"},
{e5(1, 2), "malformed table key"},
{e5(200)[:3], "insufficient bytes to decode uvarint value"},
{e5(1, 200)[:4], "insufficient bytes to decode uvarint value"},
{e5(1, 2, 200)[:5], "insufficient bytes to decode uvarint value"},
{encoding.EncodeVarintAscending(ten5Codec.TablePrefix(999), 322434), "malformed table key"},
}
for i, d := range errorData {
_, err := EnsureSafeSplitKey(d.in)
Expand Down
2 changes: 1 addition & 1 deletion pkg/keys/sql.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ func MakeTenantPrefix(tenID roachpb.TenantID) roachpb.Key {
if tenID == roachpb.SystemTenantID {
return nil
}
return encoding.EncodeUvarintAscending(tenantPrefix, tenID.ToUint64())
return encoding.EncodeUvarintAscending(TenantPrefix, tenID.ToUint64())
}

// DecodeTenantPrefix determines the tenant ID from the key prefix, returning
Expand Down
Loading