Skip to content

Commit

Permalink
Merge #32475
Browse files Browse the repository at this point in the history
32475: roachprod: introduce a --local-ssd-no-ext4-barrier flag for create r=andreimatei a=andreimatei

Use of nobarrier is common for roachprod clusters. They might even
become the default for roachtest clusters. This patch bring first-class
support for them into roachprod, which learns to mount a drive
accrdingly.

Release note: None

Co-authored-by: Andrei Matei <andrei@cockroachlabs.com>
  • Loading branch information
craig[bot] and andreimatei committed Nov 26, 2018
2 parents a7e13e8 + e30ef35 commit 69ad6b8
Show file tree
Hide file tree
Showing 6 changed files with 117 additions and 30 deletions.
6 changes: 5 additions & 1 deletion pkg/cmd/roachprod/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -1336,8 +1336,12 @@ func main() {

createCmd.Flags().DurationVarP(&createVMOpts.Lifetime,
"lifetime", "l", 12*time.Hour, "Lifetime of the cluster")
createCmd.Flags().BoolVar(&createVMOpts.UseLocalSSD,
createCmd.Flags().BoolVar(&createVMOpts.SSDOpts.UseLocalSSD,
"local-ssd", true, "Use local SSD")
createCmd.Flags().BoolVar(&createVMOpts.SSDOpts.NoExt4Barrier,
"local-ssd-no-ext4-barrier", false,
`Mount the local SSD with the "-o nobarrier" flag. `+
`Ignored if --local-ssd=false is specified.`)
createCmd.Flags().IntVarP(&numNodes,
"nodes", "n", 4, "Total number of nodes, distributed across all clouds")
createCmd.Flags().StringSliceVarP(&createVMOpts.VMProviders,
Expand Down
22 changes: 19 additions & 3 deletions pkg/cmd/roachprod/vm/aws/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -510,7 +510,7 @@ func (p *Provider) runInstance(name string, zone string, opts vm.CreateOpts) err
}

var machineType string
if opts.UseLocalSSD {
if opts.SSDOpts.UseLocalSSD {
machineType = p.opts.SSDMachineType
} else {
machineType = p.opts.MachineType
Expand Down Expand Up @@ -553,6 +553,22 @@ func (p *Provider) runInstance(name string, zone string, opts vm.CreateOpts) err
_ = data.Instances[0].InstanceID // silence unused warning
}

// Create AWS startup script file.
extraMountOpts := ""
// Dynamic args.
if opts.SSDOpts.UseLocalSSD {
if opts.SSDOpts.NoExt4Barrier {
extraMountOpts = "nobarrier"
}
}
filename, err := writeStartupScript(extraMountOpts)
if err != nil {
return errors.Wrapf(err, "could not write GCE startup script to temp file")
}
defer func() {
_ = os.Remove(filename)
}()

args := []string{
"ec2", "run-instances",
"--associate-public-ip-address",
Expand All @@ -564,11 +580,11 @@ func (p *Provider) runInstance(name string, zone string, opts vm.CreateOpts) err
"--security-group-ids", sgID,
"--subnet-id", subnetID,
"--tag-specifications", tagSpecs,
"--user-data", awsStartupScript,
"--user-data", "file://" + filename,
}

// The local NVMe devices are automatically mapped. Otherwise, we need to map an EBS data volume.
if !opts.UseLocalSSD {
if !opts.SSDOpts.UseLocalSSD {
args = append(args,
"--block-device-mapping",
// Size is measured in GB. gp2 type derives guaranteed iops from size.
Expand Down
54 changes: 46 additions & 8 deletions pkg/cmd/roachprod/vm/aws/support.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,23 +17,35 @@ package aws

import (
"encoding/json"
"io/ioutil"
"log"
"os/exec"
"strings"
"text/template"

"github.com/cockroachdb/cockroach/pkg/cmd/roachprod/vm"
"github.com/pkg/errors"
)

// Both M5 and I3 machines expose their EBS or local SSD volumes as NVMe block devices, but
// the actual device numbers vary a bit between the two types.
// This user-data script will create a filesystem, mount the data volume, and chmod 777.
// Both M5 and I3 machines expose their EBS or local SSD volumes as NVMe block
// devices, but the actual device numbers vary a bit between the two types.
// This user-data script will create a filesystem, mount the data volume, and
// chmod 777.
// https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/nvme-ebs-volumes.html
const awsStartupScript = `#!/usr/bin/env bash
//
// This is a template because the instantiator needs to optionally configure the
// mounting options. The script cannot take arguments since it is to be invoked
// by the aws tool which cannot pass args.
const awsStartupScriptTemplate = `#!/usr/bin/env bash
# Script for setting up a GCE machine for roachprod use.
set -x
sudo apt-get update
sudo apt-get install -qy --no-install-recommends mdadm
mount_opts="discard,defaults"
{{if .ExtraMountOpts}}mount_opts="${mount_opts},{{.ExtraMountOpts}}"{{end}}
disks=()
mountpoint="/mnt/data1"
for d in $(ls /dev/nvme?n1); do
Expand All @@ -53,18 +65,18 @@ elif [ "${#disks[@]}" -eq "1" ]; then
mkdir -p ${mountpoint}
disk=${disks[0]}
mkfs.ext4 -E nodiscard ${disk}
mount -o discard,defaults ${disk} ${mountpoint}
mount -o ${mount_opts} ${disk} ${mountpoint}
chmod 777 ${mountpoint}
echo "${disk} ${mountpoint} ext4 discard,defaults 1 1" | tee -a /etc/fstab
echo "${disk} ${mountpoint} ext4 ${mount_opts} 1 1" | tee -a /etc/fstab
else
echo "${#disks[@]} disks mounted, creating ${mountpoint} using RAID 0"
mkdir -p ${mountpoint}
raiddisk="/dev/md0"
mdadm --create ${raiddisk} --level=0 --raid-devices=${#disks[@]} "${disks[@]}"
mkfs.ext4 -E nodiscard ${raiddisk}
mount -o discard,defaults ${raiddisk} ${mountpoint}
mount -o ${mount_opts} ${raiddisk} ${mountpoint}
chmod 777 ${mountpoint}
echo "${raiddisk} ${mountpoint} ext4 discard,defaults 1 1" | tee -a /etc/fstab
echo "${raiddisk} ${mountpoint} ext4 ${mount_opts} 1 1" | tee -a /etc/fstab
fi
sudo apt-get install -qy chrony
Expand All @@ -80,6 +92,32 @@ sudo sh -c 'echo "root - nofile 65536\n* - nofile 65536" > /etc/security/limits.
sudo touch /mnt/data1/.roachprod-initialized
`

// writeStartupScript writes the startup script to a temp file.
// Returns the path to the file.
// After use, the caller should delete the temp file.
//
// extraMountOpts, if not empty, is appended to the default mount options. It is
// a comma-separated list of options for the "mount -o" flag.
func writeStartupScript(extraMountOpts string) (string, error) {
type tmplParams struct {
ExtraMountOpts string
}

args := tmplParams{ExtraMountOpts: extraMountOpts}

tmpfile, err := ioutil.TempFile("", "gce-startup-script")
if err != nil {
return "", err
}
defer tmpfile.Close()

t := template.Must(template.New("start").Parse(awsStartupScriptTemplate))
if err := t.Execute(tmpfile, args); err != nil {
return "", err
}
return tmpfile.Name(), nil
}

// runCommand is used to invoke an AWS command for which no output is expected.
func runCommand(args []string) error {
cmd := exec.Command("aws", args...)
Expand Down
25 changes: 15 additions & 10 deletions pkg/cmd/roachprod/vm/gce/gcloud.go
Original file line number Diff line number Diff line change
Expand Up @@ -223,15 +223,6 @@ func (p *Provider) Create(names []string, opts vm.CreateOpts) error {
"`roachprod gc --gce-project=%s` cronjob\n", p.opts.Project)
}

// Create GCE startup script file.
filename, err := writeStartupScript()
if err != nil {
return errors.Wrapf(err, "could not write GCE startup script to temp file")
}
defer func() {
_ = os.Remove(filename)
}()

if !opts.GeoDistributed {
p.opts.Zones = []string{p.opts.Zones[0]}
}
Expand Down Expand Up @@ -262,10 +253,24 @@ func (p *Provider) Create(names []string, opts vm.CreateOpts) error {
args = append(args, "--service-account", p.opts.ServiceAccount)
}

extraMountOpts := ""
// Dynamic args.
if opts.UseLocalSSD {
if opts.SSDOpts.UseLocalSSD {
args = append(args, "--local-ssd", "interface=SCSI")
if opts.SSDOpts.NoExt4Barrier {
extraMountOpts = "nobarrier"
}
}

// Create GCE startup script file.
filename, err := writeStartupScript(extraMountOpts)
if err != nil {
return errors.Wrapf(err, "could not write GCE startup script to temp file")
}
defer func() {
_ = os.Remove(filename)
}()

args = append(args, "--machine-type", p.opts.MachineType)
args = append(args, "--labels", fmt.Sprintf("lifetime=%s", opts.Lifetime))

Expand Down
33 changes: 26 additions & 7 deletions pkg/cmd/roachprod/vm/gce/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,22 @@ package gce

import (
"io/ioutil"
"text/template"
)

// Startup script used to find/format/mount all local SSDs in GCE.
// Each disk is mounted to /mnt/data<disknum> and chmoded to all users.
const gceLocalSSDStartupScript = `#!/usr/bin/env bash
//
// This is a template because the instantiator needs to optionally configure the
// mounting options. The script cannot take arguments since it is to be invoked
// by the gcloud tool which cannot pass args.
const gceLocalSSDStartupScriptTemplate = `#!/usr/bin/env bash
# Script for setting up a GCE machine for roachprod use.
mount_opts="discard,defaults"
{{if .ExtraMountOpts}}mount_opts="${mount_opts},{{.ExtraMountOpts}}"{{end}}
disknum=0
# Assume google.
for d in $(ls /dev/disk/by-id/google-local-ssd-*); do
let "disknum++"
grep -e "${d}" /etc/fstab > /dev/null
Expand All @@ -32,8 +41,8 @@ for d in $(ls /dev/disk/by-id/google-local-ssd-*); do
mountpoint="/mnt/data${disknum}"
sudo mkdir -p "${mountpoint}"
sudo mkfs.ext4 -F ${d}
sudo mount -o discard,defaults ${d} ${mountpoint}
echo "${d} ${mountpoint} ext4 discard,defaults 1 1" | sudo tee -a /etc/fstab
sudo mount -o ${mount_opts} ${d} ${mountpoint}
echo "${d} ${mountpoint} ext4 ${mount_opts} 1 1" | sudo tee -a /etc/fstab
else
echo "Disk ${disknum}: ${d} already mounted, skipping..."
fi
Expand Down Expand Up @@ -63,17 +72,27 @@ EOF
sysctl --system # reload sysctl settings
`

// write the startup script to a temp file.
// writeStartupScript writes the startup script to a temp file.
// Returns the path to the file.
// After use, the caller should delete the temp file.
func writeStartupScript() (string, error) {
//
// extraMountOpts, if not empty, is appended to the default mount options. It is
// a comma-separated list of options for the "mount -o" flag.
func writeStartupScript(extraMountOpts string) (string, error) {
type tmplParams struct {
ExtraMountOpts string
}

args := tmplParams{ExtraMountOpts: extraMountOpts}

tmpfile, err := ioutil.TempFile("", "gce-startup-script")
if err != nil {
return "", err
}
defer tmpfile.Close()

if _, err := tmpfile.WriteString(gceLocalSSDStartupScript); err != nil {
t := template.Must(template.New("start").Parse(gceLocalSSDStartupScriptTemplate))
if err := t.Execute(tmpfile, args); err != nil {
return "", err
}
return tmpfile.Name(), nil
Expand Down
7 changes: 6 additions & 1 deletion pkg/cmd/roachprod/vm/vm.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,10 +111,15 @@ func (vl List) ProviderIDs() []string {

// CreateOpts is the set of options when creating VMs.
type CreateOpts struct {
UseLocalSSD bool
Lifetime time.Duration
GeoDistributed bool
VMProviders []string
SSDOpts struct {
UseLocalSSD bool
// NoExt4Barrier, if set, makes the "-o nobarrier" flag be used when
// mounting the SSD. Ignored if UseLocalSSD is not set.
NoExt4Barrier bool
}
}

// ProviderFlags is a hook point for Providers to supply additional,
Expand Down

0 comments on commit 69ad6b8

Please sign in to comment.