Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

release-22.1: roachtest: use highmem instances for some tests #88553

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions pkg/cmd/roachtest/spec/cluster_spec.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ type ClusterSpec struct {
NodeCount int
// CPUs is the number of CPUs per node.
CPUs int
HighMem bool
SSDs int
RAID0 bool
VolumeSize int
Expand Down Expand Up @@ -81,6 +82,9 @@ func ClustersCompatible(s1, s2 ClusterSpec) bool {
// String implements fmt.Stringer.
func (s ClusterSpec) String() string {
str := fmt.Sprintf("n%dcpu%d", s.NodeCount, s.CPUs)
if s.HighMem {
str += "m"
}
if s.Geo {
str += "-Geo"
}
Expand Down Expand Up @@ -183,11 +187,11 @@ func (s *ClusterSpec) RoachprodOpts(
// based on the cloud and CPU count.
switch s.Cloud {
case AWS:
machineType = AWSMachineType(s.CPUs)
machineType = AWSMachineType(s.CPUs, s.HighMem)
case GCE:
machineType = GCEMachineType(s.CPUs)
machineType = GCEMachineType(s.CPUs, s.HighMem)
case Azure:
machineType = AzureMachineType(s.CPUs)
machineType = AzureMachineType(s.CPUs, s.HighMem)
}
}

Expand Down
53 changes: 37 additions & 16 deletions pkg/cmd/roachtest/spec/machine_type.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,42 +13,63 @@ package spec
import "fmt"

// AWSMachineType selects a machine type given the desired number of CPUs.
func AWSMachineType(cpus int) string {
func AWSMachineType(cpus int, highmem bool) string {
// TODO(erikgrinaker): These have significantly less RAM than
// their GCE counterparts. Consider harmonizing them.
family := "c5d" // 2 GB RAM per CPU
if highmem {
family = "m5d" // 4 GB RAM per CPU
}

var size string
switch {
case cpus <= 2:
return "c5d.large"
size = "large"
case cpus <= 4:
return "c5d.xlarge"
size = "xlarge"
case cpus <= 8:
return "c5d.2xlarge"
size = "2xlarge"
case cpus <= 16:
return "c5d.4xlarge"
size = "4xlarge"
case cpus <= 36:
return "c5d.9xlarge"
size = "9xlarge"
case cpus <= 72:
return "c5d.18xlarge"
size = "18xlarge"
case cpus <= 96:
// There is no c5d.24xlarge.
return "m5d.24xlarge"
size = "24xlarge"
default:
panic(fmt.Sprintf("no aws machine type with %d cpus", cpus))
}

// There is no c5d.24xlarge.
if family == "c5d" && size == "24xlarge" {
family = "m5d"
}

return fmt.Sprintf("%s.%s", family, size)
}

// GCEMachineType selects a machine type given the desired number of CPUs.
func GCEMachineType(cpus int) string {
// TODO(peter): This is awkward: below 16 cpus, use n1-standard so that the
// machines have a decent amount of RAM. We could use customer machine
func GCEMachineType(cpus int, highmem bool) string {
// TODO(peter): This is awkward: at or below 16 cpus, use n1-standard so that
// the machines have a decent amount of RAM. We could use custom machine
// configurations, but the rules for the amount of RAM per CPU need to be
// determined (you can't request any arbitrary amount of RAM).
if cpus < 16 {
return fmt.Sprintf("n1-standard-%d", cpus)
series := "n1"
kind := "standard" // 3.75 GB RAM per CPU
if highmem {
kind = "highmem" // 6.5 GB RAM per CPU
} else if cpus > 16 {
kind = "highcpu" // 0.9 GB RAM per CPU
}
return fmt.Sprintf("n1-highcpu-%d", cpus)
return fmt.Sprintf("%s-%s-%d", series, kind, cpus)
}

// AzureMachineType selects a machine type given the desired number of CPUs.
func AzureMachineType(cpus int) string {
func AzureMachineType(cpus int, highmem bool) string {
if highmem {
panic("highmem not implemented for Azure")
}
switch {
case cpus <= 2:
return "Standard_D2_v3"
Expand Down
11 changes: 11 additions & 0 deletions pkg/cmd/roachtest/spec/option.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,17 @@ func CPU(n int) Option {
return nodeCPUOption(n)
}

type nodeHighMemOption bool

func (o nodeHighMemOption) apply(spec *ClusterSpec) {
spec.HighMem = bool(o)
}

// HighMem requests nodes with additional memory per CPU.
func HighMem(enabled bool) Option {
return nodeHighMemOption(enabled)
}

type volumeSizeOption int

func (o volumeSizeOption) apply(spec *ClusterSpec) {
Expand Down
2 changes: 2 additions & 0 deletions pkg/cmd/roachtest/tests/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,8 @@ func registerRestore(r registry.Registry) {
clusterOpts = append(clusterOpts, spec.VolumeSize(largeVolumeSize))
testName += fmt.Sprintf("/pd-volume=%dGB", largeVolumeSize)
}
// Has been seen to OOM: https://github.com/cockroachdb/cockroach/issues/71805
clusterOpts = append(clusterOpts, spec.HighMem(true))

r.Add(registry.TestSpec{
Name: testName,
Expand Down
4 changes: 3 additions & 1 deletion pkg/cmd/roachtest/tests/tpcc.go
Original file line number Diff line number Diff line change
Expand Up @@ -746,6 +746,7 @@ func registerTPCC(r registry.Registry) {
registerTPCCBenchSpec(r, tpccBenchSpec{
Nodes: 9,
CPUs: 4,
HighMem: true, // can OOM otherwise: https://github.com/cockroachdb/cockroach/issues/73376
Distribution: multiRegion,
LoadConfig: multiLoadgen,

Expand Down Expand Up @@ -829,6 +830,7 @@ func (l tpccBenchLoadConfig) numLoadNodes(d tpccBenchDistribution) int {
type tpccBenchSpec struct {
Nodes int
CPUs int
HighMem bool
Chaos bool
AdmissionControlDisabled bool
Distribution tpccBenchDistribution
Expand Down Expand Up @@ -891,7 +893,7 @@ func registerTPCCBenchSpec(r registry.Registry, b tpccBenchSpec) {
nameParts = append(nameParts, "no-admission")
}

opts := []spec.Option{spec.CPU(b.CPUs)}
opts := []spec.Option{spec.CPU(b.CPUs), spec.HighMem(b.HighMem)}
switch b.Distribution {
case singleZone:
// No specifier.
Expand Down