Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

api: nomad debug new /agent/host #8325

Merged
merged 28 commits into from
Jul 2, 2020
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
fa0f7e9
command/agent/host: collect host data, multi platform
langmartin Jun 16, 2020
95746f1
nomad/structs/structs: new HostDataRequest/Response
langmartin Jun 19, 2020
ae93f83
client/agent_endpoint: add RPC endpoint
langmartin Jun 30, 2020
d91e0fe
nomad/client_agent_endpoint: add Agent Host with forwarding
langmartin Jun 30, 2020
018c8b1
nomad/client_agent_endpoint: use findClientConn
langmartin Jun 30, 2020
4e3d9e7
command/agent/agent_endpoint: add Host
langmartin Jun 18, 2020
3002a47
api/agent: add the Host endpoint
langmartin Jun 18, 2020
1fbefe8
command/debug: call agent hosts
langmartin Jun 30, 2020
e224ca6
command/agent/host/host_test: log -> assertions
langmartin Jul 1, 2020
41a06a6
command/agent/host: eliminate calling external programs
langmartin Jul 1, 2020
4e88fad
command/agent/host/windows: add back in host stubs
langmartin Jul 1, 2020
163b5da
command/agent/host: uname returns an error string, windows
langmartin Jul 1, 2020
a7d6f56
command/agent/host/host: lint
langmartin Jul 1, 2020
0c9d9ac
command/agent/host/windows: name conflict
langmartin Jul 1, 2020
d0ccc2f
command/agent/host: use sockaddr dumpSocket for networks
langmartin Jul 1, 2020
77c40a8
api/agent: match the new network type
langmartin Jul 1, 2020
cbd2931
command/debug: capture /agent/host error in the bundle
langmartin Jul 1, 2020
93a9797
command/agent/agent_endpoint: get the token from Server or Client
langmartin Jul 1, 2020
6cee4ab
command/agent/agent_endpoint: use localServer in forwarding
langmartin Jul 1, 2020
9986bd5
nomad/structs/structs: finish comments
langmartin Jul 1, 2020
b39a3f7
command/agent/host/windows: don't use the Must varieties, error
langmartin Jul 1, 2020
c4f59b8
command/agent/host: redact environment variables a couple of ways
langmartin Jul 1, 2020
1dfdac5
api/agent: remove unused Systemd field
langmartin Jul 1, 2020
07d4f3d
command/agent/host/windows: return errors correctly
langmartin Jul 1, 2020
bb23c96
client/agent_endpoint: require ACLs or EnableDebug
langmartin Jul 1, 2020
a43468d
nomad/client_agent_endpoint: require ACLs or EnableDebug
langmartin Jul 1, 2020
c402022
command/agent/agent_endpoint: require ACLs or EnableDebug
langmartin Jul 1, 2020
7d41f78
command/agent/agent_endpoint: use http.MethodGet
langmartin Jul 1, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions api/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,32 @@ func (a *Agent) Health() (*AgentHealthResponse, error) {
return nil, fmt.Errorf("unable to unmarshal response with status %d: %v", resp.StatusCode, err)
}

// Host returns debugging context about the agent's host operating system
func (a *Agent) Host(serverID, nodeID string, q *QueryOptions) (*HostDataResponse, error) {
if q == nil {
q = &QueryOptions{}
}
if q.Params == nil {
q.Params = make(map[string]string)
}

if serverID != "" {
q.Params["server_id"] = serverID
}

if nodeID != "" {
q.Params["node_id"] = nodeID
}

var resp HostDataResponse
_, err := a.client.query("/v1/agent/host", &resp, q)
if err != nil {
return nil, err
}

return &resp, nil
}

// Monitor returns a channel which will receive streaming logs from the agent
// Providing a non-nil stopCh can be used to close the connection and stop log streaming
func (a *Agent) Monitor(stopCh <-chan struct{}, q *QueryOptions) (<-chan *StreamFrame, <-chan error) {
Expand Down Expand Up @@ -438,3 +464,23 @@ type AgentHealth struct {
// Message describes why the agent is unhealthy
Message string `json:"message"`
}

type HostData struct {
OS string
Network []map[string]string
ResolvConf string
Hosts string
Systemd string
langmartin marked this conversation as resolved.
Show resolved Hide resolved
Environment map[string]string
Disk map[string]DiskUsage
}

type DiskUsage struct {
DiskMB int64
UsedMB int64
}

type HostDataResponse struct {
AgentID string
HostData *HostData `json:",omitempty"`
}
20 changes: 20 additions & 0 deletions client/agent_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"time"

"github.com/hashicorp/go-msgpack/codec"
"github.com/hashicorp/nomad/command/agent/host"
"github.com/hashicorp/nomad/command/agent/monitor"
"github.com/hashicorp/nomad/command/agent/pprof"
"github.com/hashicorp/nomad/helper"
Expand Down Expand Up @@ -210,3 +211,22 @@ OUTER:
return
}
}

// Host collects data about the host evironment running the agent
func (a *Agent) Host(args *structs.QueryOptions, reply *structs.HostDataResponse) error {
aclObj, err := a.c.ResolveToken(args.AuthToken)
if err != nil {
return err
} else if aclObj != nil && !aclObj.AllowAgentRead() {
return structs.ErrPermissionDenied
}

data, err := host.MakeHostData()
if err != nil {
return err
}

reply.AgentID = a.c.NodeID()
reply.HostData = data
return nil
}
82 changes: 82 additions & 0 deletions client/agent_endpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -353,3 +353,85 @@ func TestAgentProfile_ACL(t *testing.T) {
})
}
}

func TestAgentHost(t *testing.T) {
t.Parallel()

// start server and client
s1, cleanup := nomad.TestServer(t, nil)
defer cleanup()

testutil.WaitForLeader(t, s1.RPC)

c, cleanupC := TestClient(t, func(c *config.Config) {
c.Servers = []string{s1.GetConfig().RPCAddr.String()}
c.EnableDebug = true
})
defer cleanupC()

req := structs.QueryOptions{}
var resp structs.HostDataResponse

err := c.ClientRPC("Agent.Host", &req, &resp)
require.NoError(t, err)

require.NotNil(t, resp.HostData)
require.Equal(t, c.NodeID(), resp.AgentID)
}

func TestAgentHost_ACL(t *testing.T) {
t.Parallel()

s, root, cleanupS := nomad.TestACLServer(t, nil)
defer cleanupS()
testutil.WaitForLeader(t, s.RPC)

c, cleanupC := TestClient(t, func(c *config.Config) {
c.ACLEnabled = true
c.Servers = []string{s.GetConfig().RPCAddr.String()}
})
defer cleanupC()

policyGood := mock.AgentPolicy(acl.PolicyRead)
tokenGood := mock.CreatePolicyAndToken(t, s.State(), 1005, "valid", policyGood)

policyBad := mock.NodePolicy(acl.PolicyWrite)
tokenBad := mock.CreatePolicyAndToken(t, s.State(), 1009, "invalid", policyBad)

cases := []struct {
Name string
Token string
authErr bool
}{
{
Name: "bad token",
Token: tokenBad.SecretID,
authErr: true,
},
{
Name: "good token",
Token: tokenGood.SecretID,
},
{
Name: "root token",
Token: root.SecretID,
},
}

for _, tc := range cases {
t.Run(tc.Name, func(t *testing.T) {
req := structs.QueryOptions{
AuthToken: tc.Token,
}
var resp structs.HostDataResponse

err := c.ClientRPC("Agent.Host", &req, &resp)
if tc.authErr {
require.EqualError(t, err, structs.ErrPermissionDenied.Error())
} else {
require.NoError(t, err)
require.NotEmpty(t, resp.HostData)
}
})
}
}
65 changes: 65 additions & 0 deletions command/agent/agent_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/hashicorp/go-msgpack/codec"
"github.com/hashicorp/nomad/acl"
cstructs "github.com/hashicorp/nomad/client/structs"
"github.com/hashicorp/nomad/command/agent/host"
"github.com/hashicorp/nomad/command/agent/pprof"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/serf/serf"
Expand Down Expand Up @@ -657,3 +658,67 @@ type healthResponseAgent struct {
Ok bool `json:"ok"`
Message string `json:"message,omitempty"`
}

// AgentHostRequest runs on servers and clients, and captures information about the host system to add
// to the nomad debug archive.
func (s *HTTPServer) AgentHostRequest(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
if req.Method != "GET" {
langmartin marked this conversation as resolved.
Show resolved Hide resolved
return nil, CodedError(405, ErrInvalidMethod)
}

var secret string
s.parseToken(req, &secret)

// Check agent read permissions
if aclObj, err := s.agent.Client().ResolveToken(secret); err != nil {
langmartin marked this conversation as resolved.
Show resolved Hide resolved
return nil, err
} else if aclObj != nil && !aclObj.AllowAgentRead() {
return nil, structs.ErrPermissionDenied
}

serverID := req.URL.Query().Get("server_id")
nodeID := req.URL.Query().Get("node_id")

if serverID != "" && nodeID != "" {
return nil, CodedError(400, "Can only forward to either client node or server")
}

// If no other node is specified, return our local host's data
if serverID == "" && nodeID == "" {
data, err := host.MakeHostData()
if err != nil {
return nil, CodedError(500, err.Error())
}
return data, nil
}

args := &structs.HostDataRequest{
ServerID: serverID,
NodeID: nodeID,
}

s.parse(resp, req, &args.QueryOptions.Region, &args.QueryOptions)

var reply structs.HostDataResponse
var rpcErr error

// serverID is set, so forward to that server
if serverID != "" {
rpcErr = s.agent.Server().RPC("Agent.Host", &args, &reply)
return reply, rpcErr
}

// Make the RPC. The RPC endpoint actually forwards the request to the correct
// agent, but we need to use the correct RPC interface.
localClient, remoteClient, _ := s.rpcHandlerForNode(nodeID)

if localClient {
rpcErr = s.agent.Client().ClientRPC("Agent.Host", &args, &reply)
} else if remoteClient {
rpcErr = s.agent.Client().RPC("Agent.Host", &args, &reply)
} else {
rpcErr = fmt.Errorf("node not found: %s", nodeID)
}

return reply, rpcErr
}
7 changes: 7 additions & 0 deletions command/agent/host/darwin.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
// +build darwin

package host

func mountedPaths() []string {
return []string{"/"}
}
95 changes: 95 additions & 0 deletions command/agent/host/host.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
package host

import (
"os"
"strings"
)

type HostData struct {
OS string
Network []map[string]string
ResolvConf string
Hosts string
Environment map[string]string
Disk map[string]DiskUsage
}

type DiskUsage struct {
DiskMB int64
UsedMB int64
}

func MakeHostData() (*HostData, error) {
du := make(map[string]DiskUsage)
for _, path := range mountedPaths() {
u, err := diskUsage(path)
if err != nil {
continue
}
du[path] = u
}

return &HostData{
OS: uname(),
Network: network(),
ResolvConf: resolvConf(),
Hosts: etcHosts(),
Environment: environment(),
Disk: du,
}, nil
}

// diskUsage calculates the DiskUsage
func diskUsage(path string) (du DiskUsage, err error) {
s, err := makeDf(path)
if err != nil {
return du, err
}

disk := float64(s.total())
// Bavail is blocks available to unprivileged users, Bfree includes reserved blocks
free := float64(s.available())
used := disk - free
mb := float64(1048576)

disk = disk / mb
used = used / mb

du.DiskMB = int64(disk)
du.UsedMB = int64(used)
return du, nil
}

// environment returns the process environment in a map
func environment() map[string]string {
env := make(map[string]string)

for _, e := range os.Environ() {
s := strings.SplitN(e, "=", 2)
env[s[0]] = s[1]
}
return env
}
langmartin marked this conversation as resolved.
Show resolved Hide resolved

// slurp returns the file contents as a string, ignoring errors
func slurp(path string) string {
var sb strings.Builder
buf := make([]byte, 512)
fh, err := os.Open(path)
if err != nil {
return err.Error()
}

var l int
for {
l, err = fh.Read(buf)
if err != nil {
if l > 0 {
sb.Write(buf[0 : l-1])
}
break
}
sb.Write(buf[0 : l-1])
}
return sb.String()
}
27 changes: 27 additions & 0 deletions command/agent/host/host_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package host

import (
"testing"

"github.com/stretchr/testify/require"
)

func TestHostUtils(t *testing.T) {
mounts := mountedPaths()
require.NotEmpty(t, mounts)

du, err := diskUsage("/")
require.NoError(t, err)
require.NotZero(t, du.DiskMB)
require.NotZero(t, du.UsedMB)
}

func TestMakeHostData(t *testing.T) {
host, err := MakeHostData()
require.NoError(t, err)
require.NotEmpty(t, host.OS)
require.NotEmpty(t, host.Network)
require.NotEmpty(t, host.ResolvConf)
require.NotEmpty(t, host.Hosts)
require.NotEmpty(t, host.Disk)
}
Loading