Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement snapshot restore #8131

Merged
merged 11 commits into from
Jun 15, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions command/commands.go
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,11 @@ func Commands(metaPtr *Meta, agentUi cli.Ui) map[string]cli.CommandFactory {
Meta: meta,
}, nil
},
"operator snapshot restore": func() (cli.Command, error) {
return &OperatorSnapshotRestoreCommand{
Meta: meta,
}, nil
},

"plan": func() (cli.Command, error) {
return &JobPlanCommand{
Expand Down
12 changes: 9 additions & 3 deletions command/operator_snapshot_inspect_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"path/filepath"
"testing"

"github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/command/agent"
"github.com/mitchellh/cli"
"github.com/stretchr/testify/require"
Expand All @@ -14,7 +15,7 @@ import (
func TestOperatorSnapshotInspect_Works(t *testing.T) {
t.Parallel()

snapPath := generateSnapshotFile(t)
snapPath := generateSnapshotFile(t, nil)

ui := new(cli.MockUi)
cmd := &OperatorSnapshotInspectCommand{Meta: Meta{Ui: ui}}
Expand Down Expand Up @@ -67,24 +68,29 @@ func TestOperatorSnapshotInspect_HandlesFailure(t *testing.T) {

}

func generateSnapshotFile(t *testing.T) string {
func generateSnapshotFile(t *testing.T, prepare func(srv *agent.TestAgent, client *api.Client, url string)) string {

tmpDir, err := ioutil.TempDir("", "nomad-tempdir")
require.NoError(t, err)

t.Cleanup(func() { os.RemoveAll(tmpDir) })

srv, _, url := testServer(t, false, func(c *agent.Config) {
srv, api, url := testServer(t, false, func(c *agent.Config) {
c.DevMode = false
c.DataDir = filepath.Join(tmpDir, "server")

c.Client.Enabled = false
c.AdvertiseAddrs.HTTP = "127.0.0.1"
c.AdvertiseAddrs.RPC = "127.0.0.1"
c.AdvertiseAddrs.Serf = "127.0.0.1"
})

defer srv.Shutdown()

if prepare != nil {
prepare(srv, api, url)
}

ui := new(cli.MockUi)
cmd := &OperatorSnapshotSaveCommand{Meta: Meta{Ui: ui}}

Expand Down
95 changes: 95 additions & 0 deletions command/operator_snapshot_restore.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
package command

import (
"fmt"
"os"
"strings"

"github.com/hashicorp/nomad/api"
"github.com/posener/complete"
)

type OperatorSnapshotRestoreCommand struct {
Meta
}

func (c *OperatorSnapshotRestoreCommand) Help() string {
helpText := `
Usage: nomad snapshot restore [options] FILE
notnoop marked this conversation as resolved.
Show resolved Hide resolved

Restores an atomic, point-in-time snapshot of the state of the Nomad servers
which includes jobs, nodes, allocations, periodic jobs, and ACLs.

Restores involve a potentially dangerous low-level Raft operation that is not
designed to handle server failures during a restore. This command is primarily
intended to be used when recovering from a disaster, restoring into a fresh
cluster of Nomad servers.

If ACLs are enabled, a management token must be supplied in order to perform
snapshot operations.

To restore a snapshot from the file "backup.snap":

$ nomad snapshot restore backup.snap
notnoop marked this conversation as resolved.
Show resolved Hide resolved

General Options:

` + generalOptionsUsage()
return strings.TrimSpace(helpText)
}

func (c *OperatorSnapshotRestoreCommand) AutocompleteFlags() complete.Flags {
return c.Meta.AutocompleteFlags(FlagSetClient)
}

func (c *OperatorSnapshotRestoreCommand) AutocompleteArgs() complete.Predictor {
return complete.PredictNothing
}

func (c *OperatorSnapshotRestoreCommand) Synopsis() string {
return "Restore snapshot of Nomad server state"
}

func (c *OperatorSnapshotRestoreCommand) Name() string { return "operator snapshot restore" }

func (c *OperatorSnapshotRestoreCommand) Run(args []string) int {
flags := c.Meta.FlagSet(c.Name(), FlagSetClient)
flags.Usage = func() { c.Ui.Output(c.Help()) }

if err := flags.Parse(args); err != nil {
c.Ui.Error(fmt.Sprintf("Failed to parse args: %v", err))
return 1
}

// Check for misuse
args = flags.Args()
if len(args) != 1 {
c.Ui.Error("This command takes one: <filename>")
notnoop marked this conversation as resolved.
Show resolved Hide resolved
c.Ui.Error(commandErrorText(c))
return 1
}

snap, err := os.Open(args[0])
if err != nil {
c.Ui.Error(fmt.Sprintf("Error opening snapshot file: %q", err))
return 1
}
defer snap.Close()

// Set up a client.
client, err := c.Meta.Client()
if err != nil {
c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err))
return 1
}

// Fetch the current configuration.
notnoop marked this conversation as resolved.
Show resolved Hide resolved
_, err = client.Operator().SnapshotRestore(snap, &api.WriteOptions{})
if err != nil {
c.Ui.Error(fmt.Sprintf("Failed to get restore snapshot: %v", err))
return 1
}

c.Ui.Output("Snapshot Restored")
return 0
}
96 changes: 96 additions & 0 deletions command/operator_snapshot_restore_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
package command

import (
"io/ioutil"
"os"
"path/filepath"
"strings"
"testing"

"github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/command/agent"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/mitchellh/cli"
"github.com/stretchr/testify/require"
)

func TestOperatorSnapshotRestore_Works(t *testing.T) {
t.Parallel()

tmpDir, err := ioutil.TempDir("", "nomad-tempdir")
require.NoError(t, err)
defer os.RemoveAll(tmpDir)

snapshotPath := generateSnapshotFile(t, func(srv *agent.TestAgent, client *api.Client, url string) {
sampleJob := `
job "snapshot-test-job" {
type = "service"
datacenters = [ "dc1" ]
group "group1" {
count = 1
task "task1" {
driver = "exec"
resources = {
cpu = 1000
memory = 512
}
}
}

}`

ui := new(cli.MockUi)
cmd := &JobRunCommand{Meta: Meta{Ui: ui}}
cmd.JobGetter.testStdin = strings.NewReader(sampleJob)

code := cmd.Run([]string{"--address=" + url, "-detach", "-"})
require.Zero(t, code)
})

srv, _, url := testServer(t, false, func(c *agent.Config) {
c.DevMode = false
c.DataDir = filepath.Join(tmpDir, "server1")

c.Client.Enabled = false
c.AdvertiseAddrs.HTTP = "127.0.0.1"
c.AdvertiseAddrs.RPC = "127.0.0.1"
c.AdvertiseAddrs.Serf = "127.0.0.1"
})

defer srv.Shutdown()

// job is not found before restore
j, err := srv.Agent.Server().State().JobByID(nil, structs.DefaultNamespace, "snapshot-test-job")
require.NoError(t, err)
require.Nil(t, j)

ui := new(cli.MockUi)
cmd := &OperatorSnapshotRestoreCommand{Meta: Meta{Ui: ui}}

code := cmd.Run([]string{"--address=" + url, snapshotPath})
require.Empty(t, ui.ErrorWriter.String())
require.Zero(t, code)
require.Contains(t, ui.OutputWriter.String(), "Snapshot Restored")

foundJob, err := srv.Agent.Server().State().JobByID(nil, structs.DefaultNamespace, "snapshot-test-job")
require.NoError(t, err)
require.Equal(t, "snapshot-test-job", foundJob.ID)
}

func TestOperatorSnapshotRestore_Fails(t *testing.T) {
t.Parallel()

ui := new(cli.MockUi)
cmd := &OperatorSnapshotRestoreCommand{Meta: Meta{Ui: ui}}

// Fails on misuse
code := cmd.Run([]string{"some", "bad", "args"})
require.Equal(t, 1, code)
require.Contains(t, ui.ErrorWriter.String(), commandErrorText(cmd))
ui.ErrorWriter.Reset()

// Fails when specified file does not exist
code = cmd.Run([]string{"/unicorns/leprechauns"})
require.Equal(t, 1, code)
require.Contains(t, ui.ErrorWriter.String(), "no such file")
}
18 changes: 18 additions & 0 deletions command/operator_snapshot_save_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,21 @@ func TestOperatorSnapshotSave_Works(t *testing.T) {
require.NoError(t, err)
require.NotZero(t, meta.Index)
}

func TestOperatorSnapshotSave_Fails(t *testing.T) {
t.Parallel()

ui := new(cli.MockUi)
cmd := &OperatorSnapshotSaveCommand{Meta: Meta{Ui: ui}}

// Fails on misuse
code := cmd.Run([]string{"some", "bad", "args"})
require.Equal(t, 1, code)
require.Contains(t, ui.ErrorWriter.String(), commandErrorText(cmd))
ui.ErrorWriter.Reset()

// Fails when specified file does not exist
code = cmd.Run([]string{"/unicorns/leprechauns"})
require.Equal(t, 1, code)
require.Contains(t, ui.ErrorWriter.String(), "no such file")
}