Skip to content

Commit

Permalink
Merge #79546 #80182
Browse files Browse the repository at this point in the history
79546: [CRDB-2743] sql: create builltin generator crdb_internal.probe_range r=Santamaura a=Santamaura

Previously there was difficulty in diagnosing kv layer health when
an incident occurs. This patch introduces the new virtual table
crdb_internal.probe_range which utilizes the kvprober to probe
each range to determine if the range can be reached or not.

resolves #61695

Release note: None

![Screen Shot 2022-04-20 at 1 15 17 PM](https://user-images.githubusercontent.com/17861665/164293795-d1021d6b-4eca-46f3-8ebb-37389a094301.png)



80182: backupccl: Fix SHOW BACKUPS on a bucket's base directory. r=benbardin a=benbardin

Fixes #77930. Tested on GS and AWS.

Release note (enterprise change): Fix bug where backups in the base directory of a cloud storage bucket would not be discovered by SHOW BACKUPS. These backups will now appear correctly.

Co-authored-by: Santamaura <santamaura@cockroachlabs.com>
Co-authored-by: Ben Bardin <bardin@cockroachlabs.com>
  • Loading branch information
3 people committed Apr 25, 2022
3 parents 0059cd6 + 5a951cd + e7a03f5 commit 6bbb282
Show file tree
Hide file tree
Showing 7 changed files with 293 additions and 14 deletions.
19 changes: 19 additions & 0 deletions docs/generated/sql/functions.md
Original file line number Diff line number Diff line change
Expand Up @@ -3155,6 +3155,25 @@ table. Returns an error if validation fails.</p>
</span></td></tr></tbody>
</table>

### TUPLE{INT AS RANGE_ID, STRING AS ERROR, INT AS END_TO_END_LATENCY_MS, STRING AS VERBOSE_TRACE} functions

<table>
<thead><tr><th>Function &rarr; Returns</th><th>Description</th></tr></thead>
<tbody>
<tr><td><a name="crdb_internal.probe_ranges"></a><code>crdb_internal.probe_ranges(timeout: <a href="interval.html">interval</a>, probe_type: unknown_enum) &rarr; tuple{int AS range_id, string AS error, int AS end_to_end_latency_ms, string AS verbose_trace}</code></td><td><span class="funcdesc"><p>Returns rows of range data based on the results received when using the prober.
Parameters
timeout: interval for the maximum time the user wishes the prober to probe a range.
probe_type: enum indicating which kind of probe the prober should conduct (options are read or write).
Example usage
number of failed write probes: select count(1) from crdb_internal.probe_ranges(INTERVAL ‘1000ms’, ‘write’) where error != ‘’;
50 slowest probes: select range_id, error, end_to_end_latency_ms from crdb_internal.probe_ranges(INTERVAL ‘1000ms’, true) order by end_to_end_latency_ms desc limit 50;
Notes
If a probe should fail, the latency will be set to MaxInt64 in order to naturally sort above other latencies.
Read probes are cheaper than write probes. If write probes have already ran, it’s not necessary to also run a read probe.
A write probe will effectively probe reads as well.</p>
</span></td></tr></tbody>
</table>

### UUID functions

<table>
Expand Down
11 changes: 8 additions & 3 deletions pkg/ccl/backupccl/manifest_handling.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"fmt"
"net/url"
"path"
"regexp"
"sort"
"strings"

Expand Down Expand Up @@ -102,6 +103,11 @@ const (
backupMetadataDirectory = "metadata"
)

// On some cloud storage platforms (i.e. GS, S3), backups in a base bucket may
// omit a leading slash. However, backups in a subdirectory of a base bucket
// will contain one.
var backupPathRE = regexp.MustCompile("^/?[^\\/]+/[^\\/]+/[^\\/]+/" + backupManifestName + "$")

var writeMetadataSST = settings.RegisterBoolSetting(
settings.TenantWritable,
"kv.bulkio.write_metadata_sst.enabled",
Expand Down Expand Up @@ -1128,13 +1134,12 @@ func ListFullBackupsInCollection(
) ([]string, error) {
var backupPaths []string
if err := store.List(ctx, "", listingDelimDataSlash, func(f string) error {
if ok, err := path.Match("/*/*/*/"+backupManifestName, f); err != nil {
return err
} else if ok {
if backupPathRE.MatchString(f) {
backupPaths = append(backupPaths, f)
}
return nil
}); err != nil {
// Can't happen, just required to handle the error for lint.
return nil, err
}
for i, backupPath := range backupPaths {
Expand Down
21 changes: 10 additions & 11 deletions pkg/kv/kvprober/kvprober.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,9 +137,9 @@ type Metrics struct {
ProbePlanFailures *metric.Counter
}

// proberOps is an interface that the prober will use to run ops against some
// proberOpsI is an interface that the prober will use to run ops against some
// system. This interface exists so that ops can be mocked for tests.
type proberOps interface {
type proberOpsI interface {
Read(key interface{}) func(context.Context, *kv.Txn) error
Write(key interface{}) func(context.Context, *kv.Txn) error
}
Expand All @@ -157,12 +157,11 @@ type proberTxn interface {
TxnRootKV(context.Context, func(context.Context, *kv.Txn) error) error
}

// proberOpsImpl is used to probe the kv layer.
type proberOpsImpl struct {
}
// ProberOps collects the methods used to probe the KV layer.
type ProberOps struct{}

// We attempt to commit a txn that reads some data at the key.
func (p *proberOpsImpl) Read(key interface{}) func(context.Context, *kv.Txn) error {
func (p *ProberOps) Read(key interface{}) func(context.Context, *kv.Txn) error {
return func(ctx context.Context, txn *kv.Txn) error {
_, err := txn.Get(ctx, key)
return err
Expand All @@ -176,7 +175,7 @@ func (p *proberOpsImpl) Read(key interface{}) func(context.Context, *kv.Txn) err
// there is no need to clean up data at the key post range split / merge.
// Note that MVCC tombstones may be left by the probe, but this is okay, as
// GC will clean it up.
func (p *proberOpsImpl) Write(key interface{}) func(context.Context, *kv.Txn) error {
func (p *ProberOps) Write(key interface{}) func(context.Context, *kv.Txn) error {
return func(ctx context.Context, txn *kv.Txn) error {
// Use a single batch so that the entire txn requires a single pass
// through Raft. It's not strictly necessary that we Put before we
Expand Down Expand Up @@ -277,10 +276,10 @@ func (p *Prober) Start(ctx context.Context, stopper *stop.Stopper) error {

// Doesn't return an error. Instead increments error type specific metrics.
func (p *Prober) readProbe(ctx context.Context, db *kv.DB, pl planner) {
p.readProbeImpl(ctx, &proberOpsImpl{}, &proberTxnImpl{db: p.db}, pl)
p.readProbeImpl(ctx, &ProberOps{}, &proberTxnImpl{db: p.db}, pl)
}

func (p *Prober) readProbeImpl(ctx context.Context, ops proberOps, txns proberTxn, pl planner) {
func (p *Prober) readProbeImpl(ctx context.Context, ops proberOpsI, txns proberTxn, pl planner) {
if !readEnabled.Get(&p.settings.SV) {
return
}
Expand Down Expand Up @@ -335,10 +334,10 @@ func (p *Prober) readProbeImpl(ctx context.Context, ops proberOps, txns proberTx

// Doesn't return an error. Instead increments error type specific metrics.
func (p *Prober) writeProbe(ctx context.Context, db *kv.DB, pl planner) {
p.writeProbeImpl(ctx, &proberOpsImpl{}, &proberTxnImpl{db: p.db}, pl)
p.writeProbeImpl(ctx, &ProberOps{}, &proberTxnImpl{db: p.db}, pl)
}

func (p *Prober) writeProbeImpl(ctx context.Context, ops proberOps, txns proberTxn, pl planner) {
func (p *Prober) writeProbeImpl(ctx context.Context, ops proberOpsI, txns proberTxn, pl planner) {
if !writeEnabled.Get(&p.settings.SV) {
return
}
Expand Down
18 changes: 18 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/generator_probe_ranges
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# LogicTest: local fakedist

query ITIT colnames
SELECT * FROM crdb_internal.probe_ranges(INTERVAL '1000ms', 'read') WHERE range_id < 0
----
range_id error end_to_end_latency_ms verbose_trace

query I
SELECT count(1) FROM crdb_internal.probe_ranges(INTERVAL '1000ms', 'read') WHERE error != ''
----
0

# Test that the trace has a string matching `proposing command` to verify trace events
# from the kvserver write path are received.
query I
SELECT count(1) FROM crdb_internal.probe_ranges(INTERVAL '1000ms', 'write') WHERE range_id = 1 AND verbose_trace LIKE '%proposing command%'
----
1
4 changes: 4 additions & 0 deletions pkg/sql/sem/builtins/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ go_library(
"all_builtins.go",
"builtins.go",
"generator_builtins.go",
"generator_probe_ranges.go",
"geo_builtins.go",
"math_builtins.go",
"notice.go",
Expand Down Expand Up @@ -39,6 +40,7 @@ go_library(
"//pkg/keys",
"//pkg/kv",
"//pkg/kv/kvclient",
"//pkg/kv/kvprober",
"//pkg/kv/kvserver/kvserverbase",
"//pkg/roachpb",
"//pkg/security",
Expand All @@ -50,6 +52,7 @@ go_library(
"//pkg/sql/catalog/catconstants",
"//pkg/sql/catalog/descpb",
"//pkg/sql/catalog/descs",
"//pkg/sql/catalog/typedesc",
"//pkg/sql/colexecerror",
"//pkg/sql/lex",
"//pkg/sql/lexbase",
Expand Down Expand Up @@ -77,6 +80,7 @@ go_library(
"//pkg/util",
"//pkg/util/arith",
"//pkg/util/bitarray",
"//pkg/util/contextutil",
"//pkg/util/duration",
"//pkg/util/encoding",
"//pkg/util/errorutil",
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/sem/builtins/all_builtins.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ func init() {
initOverlapsBuiltins()
initReplicationBuiltins()
initPgcryptoBuiltins()
initProbeRangesBuiltins()

AllBuiltinNames = make([]string, 0, len(builtins))
AllAggregateBuiltinNames = make([]string, 0, len(aggregates))
Expand Down
Loading

0 comments on commit 6bbb282

Please sign in to comment.