Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
98399: roachtest: move HealthChecker, DiskUsageTracker/Logger to roachtestutil pkg r=benbardin a=msbutler

 Epic: none

 Release note: none

98593: sql: use lower admission control priority in schema changes r=ajwerner a=ajwerner

This will change the transaction priority for schema changes and the corresponding validation transactions to be run at the BulkPri priority level.

Epic: none

Fixes: #98592

Release note: None

98625: skip-test: add 'Epic: None' to commit msg r=smg260 a=smg260

.. to satisfy `lint-epic-issue-refs ` when a skip test PR is created

Epic: none

Release note: None

98675: storage: enable flushable ingestion r=nicktrav a=jbowens

Add a new cluster version that bumps the Pebble format major version to FormatFlushableIngest, enabling the use of flushable ingestions. Future work will introduce a cluster setting allowing disabling flushable ingestions.

Epic: None
Release note: None
Informs #97194.

Co-authored-by: Michael Butler <butler@cockroachlabs.com>
Co-authored-by: ajwerner <awerner32@gmail.com>
Co-authored-by: Miral Gadani <miral@cockroachlabs.com>
Co-authored-by: Jackson Owens <jackson@cockroachlabs.com>
  • Loading branch information
5 people committed Mar 15, 2023
5 parents f8ca1e6 + a525208 + ae71a42 + 184d216 + d22420f commit 125aa4a
Show file tree
Hide file tree
Showing 20 changed files with 389 additions and 316 deletions.
2 changes: 1 addition & 1 deletion docs/generated/settings/settings-for-tenants.txt
Original file line number Diff line number Diff line change
Expand Up @@ -299,4 +299,4 @@ trace.opentelemetry.collector string address of an OpenTelemetry trace collecto
trace.snapshot.rate duration 0s if non-zero, interval at which background trace snapshots are captured
trace.span_registry.enabled boolean true if set, ongoing traces can be seen at https://<ui>/#/debug/tracez
trace.zipkin.collector string the address of a Zipkin instance to receive traces, as <host>:<port>. If no port is specified, 9411 will be used.
version version 1000022.2-80 set the active cluster version in the format '<major>.<minor>'
version version 1000022.2-82 set the active cluster version in the format '<major>.<minor>'
2 changes: 1 addition & 1 deletion docs/generated/settings/settings.html
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,6 @@
<tr><td><div id="setting-trace-snapshot-rate" class="anchored"><code>trace.snapshot.rate</code></div></td><td>duration</td><td><code>0s</code></td><td>if non-zero, interval at which background trace snapshots are captured</td></tr>
<tr><td><div id="setting-trace-span-registry-enabled" class="anchored"><code>trace.span_registry.enabled</code></div></td><td>boolean</td><td><code>true</code></td><td>if set, ongoing traces can be seen at https://&lt;ui&gt;/#/debug/tracez</td></tr>
<tr><td><div id="setting-trace-zipkin-collector" class="anchored"><code>trace.zipkin.collector</code></div></td><td>string</td><td><code></code></td><td>the address of a Zipkin instance to receive traces, as &lt;host&gt;:&lt;port&gt;. If no port is specified, 9411 will be used.</td></tr>
<tr><td><div id="setting-version" class="anchored"><code>version</code></div></td><td>version</td><td><code>1000022.2-80</code></td><td>set the active cluster version in the format &#39;&lt;major&gt;.&lt;minor&gt;&#39;</td></tr>
<tr><td><div id="setting-version" class="anchored"><code>version</code></div></td><td>version</td><td><code>1000022.2-82</code></td><td>set the active cluster version in the format &#39;&lt;major&gt;.&lt;minor&gt;&#39;</td></tr>
</tbody>
</table>
3 changes: 2 additions & 1 deletion pkg/ccl/backupccl/restore_job.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/sessiondata"
"github.com/cockroachdb/cockroach/pkg/sql/stats"
"github.com/cockroachdb/cockroach/pkg/util/admission/admissionpb"
"github.com/cockroachdb/cockroach/pkg/util/ctxgroup"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/interval"
Expand Down Expand Up @@ -1876,7 +1877,7 @@ func revalidateIndexes(
runner := descs.NewHistoricalInternalExecTxnRunner(hlc.Timestamp{}, func(ctx context.Context, fn descs.InternalExecFn) error {
return execCfg.InternalDB.DescsTxn(ctx, func(ctx context.Context, txn descs.Txn) error {
return fn(ctx, txn)
})
}, isql.WithPriority(admissionpb.BulkNormalPri))
})

invalidIndexes := make(map[descpb.ID][]descpb.IndexID)
Expand Down
2 changes: 1 addition & 1 deletion pkg/cli/testdata/declarative-rules/deprules
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
dep
----
debug declarative-print-rules 1000022.2-80 dep
debug declarative-print-rules 1000022.2-82 dep
deprules
----
- name: 'CheckConstraint transitions to ABSENT uphold 2-version invariant: PUBLIC->VALIDATED'
Expand Down
2 changes: 1 addition & 1 deletion pkg/cli/testdata/declarative-rules/oprules
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
op
----
debug declarative-print-rules 1000022.2-80 op
debug declarative-print-rules 1000022.2-82 op
rules
----
[]
8 changes: 8 additions & 0 deletions pkg/clusterversion/cockroach_versions.go
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,10 @@ const (
// progress of each job in the system.jobs table.
V23_1JobInfoTableIsBackfilled

// V23_1EnableFlushableIngest upgrades the Pebble format major version to
// FormatFlushableIngest, which enables use of flushable ingestion.
V23_1EnableFlushableIngest

// *************************************************
// Step (1): Add new versions here.
// Do not add new versions to a patch release.
Expand Down Expand Up @@ -837,6 +841,10 @@ var rawVersionsSingleton = keyedVersions{
Key: V23_1JobInfoTableIsBackfilled,
Version: roachpb.Version{Major: 22, Minor: 2, Internal: 80},
},
{
Key: V23_1EnableFlushableIngest,
Version: roachpb.Version{Major: 22, Minor: 2, Internal: 82},
},

// *************************************************
// Step (2): Add new versions here.
Expand Down
7 changes: 7 additions & 0 deletions pkg/cmd/roachtest/roachtestutil/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,22 @@ go_library(
name = "roachtestutil",
srcs = [
"commandbuilder.go",
"disk_usage.go",
"health_checker.go",
"jaeger.go",
"validation_check.go",
],
importpath = "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestutil",
visibility = ["//visibility:public"],
deps = [
"//pkg/cmd/roachtest/cluster",
"//pkg/cmd/roachtest/option",
"//pkg/cmd/roachtest/test",
"//pkg/roachprod/install",
"//pkg/roachprod/logger",
"//pkg/testutils/sqlutils",
"//pkg/util/humanizeutil",
"//pkg/util/timeutil",
"@com_github_cockroachdb_errors//:errors",
],
)
Expand Down
183 changes: 183 additions & 0 deletions pkg/cmd/roachtest/roachtestutil/disk_usage.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
// Copyright 2023 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package roachtestutil

import (
"context"
"fmt"
"sort"
"strconv"
"strings"
"time"

"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster"
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option"
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test"
"github.com/cockroachdb/cockroach/pkg/roachprod/install"
"github.com/cockroachdb/cockroach/pkg/roachprod/logger"
"github.com/cockroachdb/cockroach/pkg/util/humanizeutil"
"github.com/cockroachdb/errors"
)

// DiskUsageLogger regularly logs the disk spaced used by the nodes in the cluster.
type DiskUsageLogger struct {
t test.Test
c cluster.Cluster
doneCh chan struct{}
}

// NewDiskUsageLogger populates a DiskUsageLogger.
func NewDiskUsageLogger(t test.Test, c cluster.Cluster) *DiskUsageLogger {
return &DiskUsageLogger{
t: t,
c: c,
doneCh: make(chan struct{}),
}
}

// Done instructs the Runner to terminate.
func (dul *DiskUsageLogger) Done() {
close(dul.doneCh)
}

// Runner runs in a loop until Done() is called and prints the cluster-wide per
// node disk usage in descending order.
func (dul *DiskUsageLogger) Runner(ctx context.Context) error {
l, err := dul.t.L().ChildLogger("diskusage")
if err != nil {
return err
}
quietLogger, err := dul.t.L().ChildLogger("diskusage-exec", logger.QuietStdout, logger.QuietStderr)
if err != nil {
return err
}

ticker := time.NewTicker(time.Minute)
defer ticker.Stop()

for {
select {
case <-ctx.Done():
return ctx.Err()
case <-dul.doneCh:
return nil
case <-ticker.C:
}

type usage struct {
nodeNum int
bytes int
}

var bytesUsed []usage
for i := 1; i <= dul.c.Spec().NodeCount; i++ {
cur, err := GetDiskUsageInBytes(ctx, dul.c, quietLogger, i)
if err != nil {
// This can trigger spuriously as compactions remove files out from under `du`.
l.Printf("%s", errors.Wrapf(err, "node #%d", i))
cur = -1
}
bytesUsed = append(bytesUsed, usage{
nodeNum: i,
bytes: cur,
})
}
sort.Slice(bytesUsed, func(i, j int) bool { return bytesUsed[i].bytes > bytesUsed[j].bytes }) // descending

var s []string
for _, usage := range bytesUsed {
s = append(s, fmt.Sprintf("n#%d: %s", usage.nodeNum, humanizeutil.IBytes(int64(usage.bytes))))
}

l.Printf("%s\n", strings.Join(s, ", "))
}
}

// DiskUsageTracker can grab the disk usage of the provided cluster.
//
// TODO(msbutler): deprecate this, once restore roachtests also use prom setup.
type DiskUsageTracker struct {
c cluster.Cluster
l *logger.Logger
}

// GetDiskUsage sums the disk usage for the given nodes in megabytes.
func (du *DiskUsageTracker) GetDiskUsage(ctx context.Context, nodes option.NodeListOption) int {
var usage int
for _, n := range nodes {
cur, err := GetDiskUsageInBytes(ctx, du.c, du.l, n)
if err != nil {
du.l.Printf("Unable to get disk usage for node %d", n)
return 0
}
usage += cur
}
return usage / 1e6
}

func NewDiskUsageTracker(
c cluster.Cluster, parentLogger *logger.Logger,
) (*DiskUsageTracker, error) {
diskLogger, err := parentLogger.ChildLogger("disk-usage", logger.QuietStdout)
if err != nil {
return nil, err
}
return &DiskUsageTracker{c: c, l: diskLogger}, nil
}

// GetDiskUsageInBytes does what's on the tin. nodeIdx starts at one.
func GetDiskUsageInBytes(
ctx context.Context, c cluster.Cluster, logger *logger.Logger, nodeIdx int,
) (int, error) {
var result install.RunResultDetails
for {
var err error
// `du` can warn if files get removed out from under it (which
// happens during RocksDB compactions, for example). Discard its
// stderr to avoid breaking Atoi later.
// TODO(bdarnell): Refactor this stack to not combine stdout and
// stderr so we don't need to do this (and the Warning check
// below).
result, err = c.RunWithDetailsSingleNode(
ctx,
logger,
c.Node(nodeIdx),
"du -sk {store-dir} 2>/dev/null | grep -oE '^[0-9]+'",
)
if err != nil {
if ctx.Err() != nil {
return 0, ctx.Err()
}
// If `du` fails, retry.
// TODO(bdarnell): is this worth doing? It was originally added
// because of the "files removed out from under it" problem, but
// that doesn't result in a command failure, just a stderr
// message.
logger.Printf("retrying disk usage computation after spurious error: %s", err)
continue
}

break
}

// We need this check because sometimes the first line of the roachprod output is a warning
// about adding an ip to a list of known hosts.
if strings.Contains(result.Stdout, "Warning") {
result.Stdout = strings.Split(result.Stdout, "\n")[1]
}

size, err := strconv.Atoi(strings.TrimSpace(result.Stdout))
if err != nil {
return 0, err
}

return size * 1024, nil
}
Loading

0 comments on commit 125aa4a

Please sign in to comment.