Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

csi: plugins track jobs in addition to allocations, and use job information to set expected counts #8699

Merged
merged 20 commits into from
Aug 27, 2020
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
5d9ad99
nomad/structs/csi: add explicit job support
langmartin Aug 18, 2020
15c59f9
nomad/state/state_store: capture job updates directly
langmartin Aug 18, 2020
8ae586d
command/agent/csi_endpoint: add the JobDescriptionToApi
langmartin Aug 19, 2020
1db9d68
api/csi: add JobDescription
langmartin Aug 19, 2020
a1f2141
api/nodes: CSIInfo needs the AllocID
langmartin Aug 20, 2020
01d6d1e
command/agent/csi_endpoint: AllocID was missing, JobDescription
langmartin Aug 20, 2020
4ed3f86
nomad/state/state_store: restore provider & version
langmartin Aug 22, 2020
1970511
Update nomad/state/state_store.go
langmartin Aug 24, 2020
47b0b9f
nomad/structs/csi: comment the map types
langmartin Aug 24, 2020
1fe8961
nomad/state/state_store: boilerplate left in by accident
langmartin Aug 24, 2020
62e5e09
nomad/structs/csi: IsEmpty handles jobs correctly, nil summary == 0
langmartin Aug 24, 2020
c1056b9
nomad/csi_endpoint_test: plugin lifecycle with job
langmartin Aug 24, 2020
6fa359e
nomad/state/state_store: cleanup plugins when allocs missing
langmartin Aug 24, 2020
875d9f1
nomad/structs/csi: headfake the linter
langmartin Aug 24, 2020
f4d19d6
nomad/structs/csi: nil safe methods
langmartin Aug 25, 2020
857a5bb
api/csi: back out api change
langmartin Aug 26, 2020
85f22b2
command/agent/csi_endpoint: back out api change
langmartin Aug 26, 2020
464c166
nomad/structs/csi: don't track version
langmartin Aug 26, 2020
2de6679
nomad/state/state_store: typos
langmartin Aug 26, 2020
3e96d1c
nomad/structs/csi: comments, remove AsSlice
langmartin Aug 26, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions api/csi.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,8 +214,10 @@ type CSIPlugin struct {
Allocations []*AllocationListStub
ControllersHealthy int
ControllersExpected int
ControllerJobs []JobDescription
NodesHealthy int
NodesExpected int
NodeJobs []JobDescription
CreateIndex uint64
ModifyIndex uint64
}
Expand Down Expand Up @@ -246,6 +248,12 @@ func (v CSIPluginIndexSort) Swap(i, j int) {
v[i], v[j] = v[j], v[i]
}

type JobDescription struct {
Namespace string
ID string
Version uint64
}

// CSIPlugins returns a handle on the CSIPlugins endpoint
func (c *Client) CSIPlugins() *CSIPlugins {
return &CSIPlugins{client: c}
Expand Down
1 change: 1 addition & 0 deletions api/nodes.go
Original file line number Diff line number Diff line change
Expand Up @@ -568,6 +568,7 @@ type CSIControllerInfo struct {
// as plugin health changes on the node.
type CSIInfo struct {
PluginID string
AllocID string
Healthy bool
HealthDescription string
UpdateTime time.Time
Expand Down
24 changes: 22 additions & 2 deletions command/agent/csi_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -278,11 +278,13 @@ func structsCSIPluginToApi(plug *structs.CSIPlugin) *api.CSIPlugin {
Allocations: make([]*api.AllocationListStub, 0, len(plug.Allocations)),
ControllerRequired: plug.ControllerRequired,
ControllersHealthy: plug.ControllersHealthy,
ControllersExpected: len(plug.Controllers),
ControllersExpected: plug.ControllersExpected,
Controllers: make(map[string]*api.CSIInfo, len(plug.Controllers)),
ControllerJobs: make([]api.JobDescription, 0, plug.ControllersExpected),
NodesHealthy: plug.NodesHealthy,
NodesExpected: len(plug.Nodes),
NodesExpected: plug.NodesExpected,
Nodes: make(map[string]*api.CSIInfo, len(plug.Nodes)),
NodeJobs: make([]api.JobDescription, 0, plug.NodesExpected),
CreateIndex: plug.CreateIndex,
ModifyIndex: plug.ModifyIndex,
}
Expand All @@ -299,9 +301,26 @@ func structsCSIPluginToApi(plug *structs.CSIPlugin) *api.CSIPlugin {
out.Allocations = append(out.Allocations, structsAllocListStubToApi(a))
}

for _, jd := range plug.ControllerJobs.AsSlice() {
out.ControllerJobs = append(out.ControllerJobs, structsJobDescriptionToApi(jd))
}

for _, jd := range plug.NodeJobs.AsSlice() {
out.NodeJobs = append(out.NodeJobs, structsJobDescriptionToApi(jd))
}

return out
}

// structsJobDescriptionToApi converts the struct
func structsJobDescriptionToApi(desc structs.JobDescription) api.JobDescription {
return api.JobDescription{
Namespace: desc.Namespace,
ID: desc.ID,
Version: desc.Version,
}
}

// structsCSIVolumeToApi converts CSIVolume, creating the allocation array
func structsCSIVolumeToApi(vol *structs.CSIVolume) *api.CSIVolume {
if vol == nil {
Expand Down Expand Up @@ -358,6 +377,7 @@ func structsCSIInfoToApi(info *structs.CSIInfo) *api.CSIInfo {
}
out := &api.CSIInfo{
PluginID: info.PluginID,
AllocID: info.AllocID,
Healthy: info.Healthy,
HealthDescription: info.HealthDescription,
UpdateTime: info.UpdateTime,
Expand Down
11 changes: 4 additions & 7 deletions command/agent/csi_endpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/state"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/kr/pretty"
"github.com/stretchr/testify/require"
)

Expand All @@ -33,11 +32,13 @@ func TestHTTP_CSIEndpointPlugin(t *testing.T) {
out, ok := obj.(*api.CSIPlugin)
require.True(t, ok)

require.Equal(t, 1, out.ControllersExpected)
// ControllersExpected is 0 because this plugin was created without a job,
// which sets expected
require.Equal(t, 0, out.ControllersExpected)
require.Equal(t, 1, out.ControllersHealthy)
require.Len(t, out.Controllers, 1)

require.Equal(t, 2, out.NodesExpected)
require.Equal(t, 0, out.NodesExpected)
require.Equal(t, 2, out.NodesHealthy)
require.Len(t, out.Nodes, 2)
})
Expand Down Expand Up @@ -92,11 +93,7 @@ func TestHTTP_CSIEndpointVolume(t *testing.T) {
out, ok := raw.(*api.CSIVolume)
require.True(t, ok)

pretty.Log(out)

require.Equal(t, 1, out.ControllersExpected)
require.Equal(t, 1, out.ControllersHealthy)
require.Equal(t, 2, out.NodesExpected)
require.Equal(t, 2, out.NodesHealthy)
})
}
Expand Down
77 changes: 77 additions & 0 deletions nomad/csi_endpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -689,6 +689,83 @@ func TestCSIPluginEndpoint_RegisterViaFingerprint(t *testing.T) {
require.Nil(t, resp2.Plugin)
}

func TestCSIPluginEndpoint_RegisterViaJob(t *testing.T) {
langmartin marked this conversation as resolved.
Show resolved Hide resolved
t.Parallel()
srv, shutdown := TestServer(t, nil)
defer shutdown()
testutil.WaitForLeader(t, srv.RPC)

codec := rpcClient(t, srv)

// Register a job that creates the plugin
job := mock.Job()
job.TaskGroups[0].Tasks[0].CSIPluginConfig = &structs.TaskCSIPluginConfig{
ID: "foo",
Type: structs.CSIPluginTypeNode,
}

req1 := &structs.JobRegisterRequest{
Job: job,
WriteRequest: structs.WriteRequest{Region: "global"},
}
resp1 := &structs.JobRegisterResponse{}
err := msgpackrpc.CallWithCodec(codec, "Job.Register", req1, resp1)
require.NoError(t, err)

// Verify that the plugin exists and is unhealthy
req2 := &structs.CSIPluginGetRequest{
ID: "foo",
QueryOptions: structs.QueryOptions{Region: "global"},
}
resp2 := &structs.CSIPluginGetResponse{}
err = msgpackrpc.CallWithCodec(codec, "CSIPlugin.Get", req2, resp2)
require.NoError(t, err)
require.NotNil(t, resp2.Plugin)
require.Zero(t, resp2.Plugin.ControllersHealthy)
require.Zero(t, resp2.Plugin.NodesHealthy)
require.Equal(t, job.ID, resp2.Plugin.NodeJobs[structs.DefaultNamespace][job.ID].ID)

// Health depends on node fingerprints
deleteNodes := state.CreateTestCSIPlugin(srv.fsm.State(), "foo")
defer deleteNodes()

resp2.Plugin = nil
err = msgpackrpc.CallWithCodec(codec, "CSIPlugin.Get", req2, resp2)
require.NoError(t, err)
require.NotNil(t, resp2.Plugin)
require.NotZero(t, resp2.Plugin.ControllersHealthy)
require.NotZero(t, resp2.Plugin.NodesHealthy)
require.Equal(t, job.ID, resp2.Plugin.NodeJobs[structs.DefaultNamespace][job.ID].ID)

// All fingerprints failing makes the plugin unhealthy, but does not delete it
deleteNodes()
err = msgpackrpc.CallWithCodec(codec, "CSIPlugin.Get", req2, resp2)
require.NoError(t, err)
require.NotNil(t, resp2.Plugin)
require.Zero(t, resp2.Plugin.ControllersHealthy)
require.Zero(t, resp2.Plugin.NodesHealthy)
require.Equal(t, job.ID, resp2.Plugin.NodeJobs[structs.DefaultNamespace][job.ID].ID)

// Job deregistration is necessary to gc the plugin
req3 := &structs.JobDeregisterRequest{
JobID: job.ID,
Purge: true,
WriteRequest: structs.WriteRequest{
Region: "global",
Namespace: structs.DefaultNamespace,
},
}
resp3 := &structs.JobDeregisterResponse{}
err = msgpackrpc.CallWithCodec(codec, "Job.Deregister", req3, resp3)
require.NoError(t, err)

// Plugin has been gc'ed
resp2.Plugin = nil
err = msgpackrpc.CallWithCodec(codec, "CSIPlugin.Get", req2, resp2)
require.NoError(t, err)
require.Nil(t, resp2.Plugin)
}

func TestCSIPluginEndpoint_DeleteViaGC(t *testing.T) {
t.Parallel()
srv, shutdown := TestServer(t, func(c *Config) {
Expand Down
Loading