diff --git a/api/agent.go b/api/agent.go index b3a43d56698e..39bfb95443cd 100644 --- a/api/agent.go +++ b/api/agent.go @@ -239,6 +239,32 @@ func (a *Agent) Health() (*AgentHealthResponse, error) { return nil, fmt.Errorf("unable to unmarshal response with status %d: %v", resp.StatusCode, err) } +// Host returns debugging context about the agent's host operating system +func (a *Agent) Host(serverID, nodeID string, q *QueryOptions) (*HostDataResponse, error) { + if q == nil { + q = &QueryOptions{} + } + if q.Params == nil { + q.Params = make(map[string]string) + } + + if serverID != "" { + q.Params["server_id"] = serverID + } + + if nodeID != "" { + q.Params["node_id"] = nodeID + } + + var resp HostDataResponse + _, err := a.client.query("/v1/agent/host", &resp, q) + if err != nil { + return nil, err + } + + return &resp, nil +} + // Monitor returns a channel which will receive streaming logs from the agent // Providing a non-nil stopCh can be used to close the connection and stop log streaming func (a *Agent) Monitor(stopCh <-chan struct{}, q *QueryOptions) (<-chan *StreamFrame, <-chan error) { @@ -438,3 +464,22 @@ type AgentHealth struct { // Message describes why the agent is unhealthy Message string `json:"message"` } + +type HostData struct { + OS string + Network []map[string]string + ResolvConf string + Hosts string + Environment map[string]string + Disk map[string]DiskUsage +} + +type DiskUsage struct { + DiskMB int64 + UsedMB int64 +} + +type HostDataResponse struct { + AgentID string + HostData *HostData `json:",omitempty"` +} diff --git a/client/agent_endpoint.go b/client/agent_endpoint.go index ee6e5c11338f..8f6d3bf4baec 100644 --- a/client/agent_endpoint.go +++ b/client/agent_endpoint.go @@ -8,6 +8,7 @@ import ( "time" "github.com/hashicorp/go-msgpack/codec" + "github.com/hashicorp/nomad/command/agent/host" "github.com/hashicorp/nomad/command/agent/monitor" "github.com/hashicorp/nomad/command/agent/pprof" "github.com/hashicorp/nomad/helper" @@ -210,3 +211,24 @@ OUTER: return } } + +// Host collects data about the host evironment running the agent +func (a *Agent) Host(args *structs.QueryOptions, reply *structs.HostDataResponse) error { + aclObj, err := a.c.ResolveToken(args.AuthToken) + if err != nil { + return err + } + if (aclObj != nil && !aclObj.AllowAgentRead()) || + (aclObj == nil && !a.c.config.EnableDebug) { + return structs.ErrPermissionDenied + } + + data, err := host.MakeHostData() + if err != nil { + return err + } + + reply.AgentID = a.c.NodeID() + reply.HostData = data + return nil +} diff --git a/client/agent_endpoint_test.go b/client/agent_endpoint_test.go index e976688d0a70..507ad42c2b14 100644 --- a/client/agent_endpoint_test.go +++ b/client/agent_endpoint_test.go @@ -353,3 +353,85 @@ func TestAgentProfile_ACL(t *testing.T) { }) } } + +func TestAgentHost(t *testing.T) { + t.Parallel() + + // start server and client + s1, cleanup := nomad.TestServer(t, nil) + defer cleanup() + + testutil.WaitForLeader(t, s1.RPC) + + c, cleanupC := TestClient(t, func(c *config.Config) { + c.Servers = []string{s1.GetConfig().RPCAddr.String()} + c.EnableDebug = true + }) + defer cleanupC() + + req := structs.QueryOptions{} + var resp structs.HostDataResponse + + err := c.ClientRPC("Agent.Host", &req, &resp) + require.NoError(t, err) + + require.NotNil(t, resp.HostData) + require.Equal(t, c.NodeID(), resp.AgentID) +} + +func TestAgentHost_ACL(t *testing.T) { + t.Parallel() + + s, root, cleanupS := nomad.TestACLServer(t, nil) + defer cleanupS() + testutil.WaitForLeader(t, s.RPC) + + c, cleanupC := TestClient(t, func(c *config.Config) { + c.ACLEnabled = true + c.Servers = []string{s.GetConfig().RPCAddr.String()} + }) + defer cleanupC() + + policyGood := mock.AgentPolicy(acl.PolicyRead) + tokenGood := mock.CreatePolicyAndToken(t, s.State(), 1005, "valid", policyGood) + + policyBad := mock.NodePolicy(acl.PolicyWrite) + tokenBad := mock.CreatePolicyAndToken(t, s.State(), 1009, "invalid", policyBad) + + cases := []struct { + Name string + Token string + authErr bool + }{ + { + Name: "bad token", + Token: tokenBad.SecretID, + authErr: true, + }, + { + Name: "good token", + Token: tokenGood.SecretID, + }, + { + Name: "root token", + Token: root.SecretID, + }, + } + + for _, tc := range cases { + t.Run(tc.Name, func(t *testing.T) { + req := structs.QueryOptions{ + AuthToken: tc.Token, + } + var resp structs.HostDataResponse + + err := c.ClientRPC("Agent.Host", &req, &resp) + if tc.authErr { + require.EqualError(t, err, structs.ErrPermissionDenied.Error()) + } else { + require.NoError(t, err) + require.NotEmpty(t, resp.HostData) + } + }) + } +} diff --git a/client/config/config.go b/client/config/config.go index 61678ed7ac5b..2323d9add548 100644 --- a/client/config/config.go +++ b/client/config/config.go @@ -21,6 +21,7 @@ import ( var ( // DefaultEnvBlacklist is the default set of environment variables that are // filtered when passing the environment variables of the host to a task. + // duplicated in command/agent/host, update that if this changes. DefaultEnvBlacklist = strings.Join([]string{ "CONSUL_TOKEN", "CONSUL_HTTP_TOKEN", diff --git a/command/agent/agent_endpoint.go b/command/agent/agent_endpoint.go index c50e4a0fb7a1..23c5be215592 100644 --- a/command/agent/agent_endpoint.go +++ b/command/agent/agent_endpoint.go @@ -17,6 +17,7 @@ import ( "github.com/hashicorp/go-msgpack/codec" "github.com/hashicorp/nomad/acl" cstructs "github.com/hashicorp/nomad/client/structs" + "github.com/hashicorp/nomad/command/agent/host" "github.com/hashicorp/nomad/command/agent/pprof" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/serf/serf" @@ -657,3 +658,83 @@ type healthResponseAgent struct { Ok bool `json:"ok"` Message string `json:"message,omitempty"` } + +// AgentHostRequest runs on servers and clients, and captures information about the host system to add +// to the nomad debug archive. +func (s *HTTPServer) AgentHostRequest(resp http.ResponseWriter, req *http.Request) (interface{}, error) { + if req.Method != http.MethodGet { + return nil, CodedError(405, ErrInvalidMethod) + } + + var secret string + s.parseToken(req, &secret) + + // Check agent read permissions + var aclObj *acl.ACL + var enableDebug bool + var err error + if srv := s.agent.Server(); srv != nil { + aclObj, err = srv.ResolveToken(secret) + enableDebug = srv.GetConfig().EnableDebug + } else { + // Not a Server; use the Client for token resolution + aclObj, err = s.agent.Client().ResolveToken(secret) + enableDebug = s.agent.Client().GetConfig().EnableDebug + } + if err != nil { + return nil, err + } + + if (aclObj != nil && !aclObj.AllowAgentRead()) || + (aclObj == nil && !enableDebug) { + return nil, structs.ErrPermissionDenied + } + + serverID := req.URL.Query().Get("server_id") + nodeID := req.URL.Query().Get("node_id") + + if serverID != "" && nodeID != "" { + return nil, CodedError(400, "Can only forward to either client node or server") + } + + // If no other node is specified, return our local host's data + if serverID == "" && nodeID == "" { + data, err := host.MakeHostData() + if err != nil { + return nil, CodedError(500, err.Error()) + } + return data, nil + } + + args := &structs.HostDataRequest{ + ServerID: serverID, + NodeID: nodeID, + } + + s.parse(resp, req, &args.QueryOptions.Region, &args.QueryOptions) + + var reply structs.HostDataResponse + var rpcErr error + + // serverID is set, so forward to that server + if serverID != "" { + rpcErr = s.agent.Server().RPC("Agent.Host", &args, &reply) + return reply, rpcErr + } + + // Make the RPC. The RPC endpoint actually forwards the request to the correct + // agent, but we need to use the correct RPC interface. + localClient, remoteClient, localServer := s.rpcHandlerForNode(nodeID) + + if localClient { + rpcErr = s.agent.Client().ClientRPC("Agent.Host", &args, &reply) + } else if remoteClient { + rpcErr = s.agent.Client().RPC("Agent.Host", &args, &reply) + } else if localServer { + rpcErr = s.agent.Server().RPC("Agent.Host", &args, &reply) + } else { + rpcErr = fmt.Errorf("node not found: %s", nodeID) + } + + return reply, rpcErr +} diff --git a/command/agent/host/darwin.go b/command/agent/host/darwin.go new file mode 100644 index 000000000000..856ac91637ae --- /dev/null +++ b/command/agent/host/darwin.go @@ -0,0 +1,7 @@ +// +build darwin + +package host + +func mountedPaths() []string { + return []string{"/"} +} diff --git a/command/agent/host/host.go b/command/agent/host/host.go new file mode 100644 index 000000000000..1940a01d22c8 --- /dev/null +++ b/command/agent/host/host.go @@ -0,0 +1,123 @@ +package host + +import ( + "io/ioutil" + "os" + "strings" +) + +type HostData struct { + OS string + Network []map[string]string + ResolvConf string + Hosts string + Environment map[string]string + Disk map[string]DiskUsage +} + +type DiskUsage struct { + DiskMB int64 + UsedMB int64 +} + +func MakeHostData() (*HostData, error) { + du := make(map[string]DiskUsage) + for _, path := range mountedPaths() { + u, err := diskUsage(path) + if err != nil { + continue + } + du[path] = u + } + + return &HostData{ + OS: uname(), + Network: network(), + ResolvConf: resolvConf(), + Hosts: etcHosts(), + Environment: environment(), + Disk: du, + }, nil +} + +// diskUsage calculates the DiskUsage +func diskUsage(path string) (du DiskUsage, err error) { + s, err := makeDf(path) + if err != nil { + return du, err + } + + disk := float64(s.total()) + // Bavail is blocks available to unprivileged users, Bfree includes reserved blocks + free := float64(s.available()) + used := disk - free + mb := float64(1048576) + + disk = disk / mb + used = used / mb + + du.DiskMB = int64(disk) + du.UsedMB = int64(used) + return du, nil +} + +var ( + envRedactSet = makeEnvRedactSet() +) + +// environment returns the process environment in a map +func environment() map[string]string { + env := make(map[string]string) + + for _, e := range os.Environ() { + s := strings.SplitN(e, "=", 2) + k := s[0] + up := strings.ToUpper(k) + v := s[1] + + _, redact := envRedactSet[k] + if redact || + strings.Contains(up, "TOKEN") || + strings.Contains(up, "SECRET") { + v = "" + } + + env[k] = v + } + return env +} + +// makeEnvRedactSet creates a set of well known environment variables that should be +// redacted in the output +func makeEnvRedactSet() map[string]struct{} { + // Duplicated from config.DefaultEnvBlacklist in order to avoid an import cycle + configDefault := []string{ + "CONSUL_TOKEN", + "CONSUL_HTTP_TOKEN", + "VAULT_TOKEN", + "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_SESSION_TOKEN", + "GOOGLE_APPLICATION_CREDENTIALS", + } + + set := make(map[string]struct{}) + for _, e := range configDefault { + set[e] = struct{}{} + } + + return set +} + +// slurp returns the file contents as a string, returning an error string +func slurp(path string) string { + fh, err := os.Open(path) + if err != nil { + return err.Error() + } + + bs, err := ioutil.ReadAll(fh) + if err != nil { + return err.Error() + } + + return string(bs) +} diff --git a/command/agent/host/host_test.go b/command/agent/host/host_test.go new file mode 100644 index 000000000000..12131fa87a41 --- /dev/null +++ b/command/agent/host/host_test.go @@ -0,0 +1,42 @@ +package host + +import ( + "os" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestHostUtils(t *testing.T) { + mounts := mountedPaths() + require.NotEmpty(t, mounts) + + du, err := diskUsage("/") + require.NoError(t, err) + require.NotZero(t, du.DiskMB) + require.NotZero(t, du.UsedMB) +} + +func TestMakeHostData(t *testing.T) { + // setenv variables that should be redacted + prev := os.Getenv("VAULT_TOKEN") + os.Setenv("VAULT_TOKEN", "foo") + defer os.Setenv("VAULT_TOKEN", prev) + + os.Setenv("BOGUS_TOKEN", "foo") + os.Setenv("BOGUS_SECRET", "foo") + os.Setenv("ryanSECRETS", "foo") + + host, err := MakeHostData() + require.NoError(t, err) + require.NotEmpty(t, host.OS) + require.NotEmpty(t, host.Network) + require.NotEmpty(t, host.ResolvConf) + require.NotEmpty(t, host.Hosts) + require.NotEmpty(t, host.Disk) + require.NotEmpty(t, host.Environment) + require.Equal(t, "", host.Environment["VAULT_TOKEN"]) + require.Equal(t, "", host.Environment["BOGUS_TOKEN"]) + require.Equal(t, "", host.Environment["BOGUS_SECRET"]) + require.Equal(t, "", host.Environment["ryanSECRETS"]) +} diff --git a/command/agent/host/linux.go b/command/agent/host/linux.go new file mode 100644 index 000000000000..f886282ce28e --- /dev/null +++ b/command/agent/host/linux.go @@ -0,0 +1,42 @@ +// +build linux + +package host + +import ( + "bufio" + "os" + "strings" +) + +// mountedPaths produces a list of mounts +func mountedPaths() []string { + fh, err := os.Open("/proc/mounts") + if err != nil { + return []string{err.Error()} + } + rd := bufio.NewReader(fh) + + var paths []string + for { + str, err := rd.ReadString('\n') + if err != nil { + break + } + + ln := strings.Split(str, " ") + switch ln[2] { + case "autofs", "binfmt_misc", "cgroup", "debugfs", + "devpts", "devtmpfs", + "fusectl", "fuse.lxcfs", + "hugetlbfs", "mqueue", + "proc", "pstore", "rpc_pipefs", "securityfs", + "sysfs", "tmpfs", "vboxsf": + continue + default: + } + + paths = append(paths, ln[1]) + } + + return paths +} diff --git a/command/agent/host/network.go b/command/agent/host/network.go new file mode 100644 index 000000000000..6f8d522c2739 --- /dev/null +++ b/command/agent/host/network.go @@ -0,0 +1,83 @@ +package host + +import ( + "fmt" + + sockaddr "github.com/hashicorp/go-sockaddr" +) + +// network uses go-sockaddr to capture our view of the network +// on error, return the text of the error +func network() (output []map[string]string) { + ifaddrs, err := sockaddr.GetAllInterfaces() + if err != nil { + output = append(output, map[string]string{"error": err.Error()}) + return output + } + + for _, inf := range ifaddrs { + output = append(output, dumpSockAddr(inf.SockAddr)) + } + + return output +} + +// dumpSockAddr is adapted from +// https://github.com/hashicorp/go-sockaddr/blob/c7188e74f6acae5a989bdc959aa779f8b9f42faf/cmd/sockaddr/command/dump.go#L144-L244 +func dumpSockAddr(sa sockaddr.SockAddr) map[string]string { + output := make(map[string]string) + + // Attributes for all SockAddr types + for _, attr := range sockaddr.SockAddrAttrs() { + output[string(attr)] = sockaddr.SockAddrAttr(sa, attr) + } + + // Attributes for all IP types (both IPv4 and IPv6) + if sa.Type()&sockaddr.TypeIP != 0 { + ip := *sockaddr.ToIPAddr(sa) + for _, attr := range sockaddr.IPAttrs() { + output[string(attr)] = sockaddr.IPAddrAttr(ip, attr) + } + } + + if sa.Type() == sockaddr.TypeIPv4 { + ipv4 := *sockaddr.ToIPv4Addr(sa) + for _, attr := range sockaddr.IPv4Attrs() { + output[string(attr)] = sockaddr.IPv4AddrAttr(ipv4, attr) + } + } + + if sa.Type() == sockaddr.TypeIPv6 { + ipv6 := *sockaddr.ToIPv6Addr(sa) + for _, attr := range sockaddr.IPv6Attrs() { + output[string(attr)] = sockaddr.IPv6AddrAttr(ipv6, attr) + } + } + + if sa.Type() == sockaddr.TypeUnix { + us := *sockaddr.ToUnixSock(sa) + for _, attr := range sockaddr.UnixSockAttrs() { + output[string(attr)] = sockaddr.UnixSockAttr(us, attr) + } + } + + // Developer-focused arguments + { + arg1, arg2 := sa.DialPacketArgs() + output["DialPacket"] = fmt.Sprintf("%+q %+q", arg1, arg2) + } + { + arg1, arg2 := sa.DialStreamArgs() + output["DialStream"] = fmt.Sprintf("%+q %+q", arg1, arg2) + } + { + arg1, arg2 := sa.ListenPacketArgs() + output["ListenPacket"] = fmt.Sprintf("%+q %+q", arg1, arg2) + } + { + arg1, arg2 := sa.ListenStreamArgs() + output["ListenStream"] = fmt.Sprintf("%+q %+q", arg1, arg2) + } + + return output +} diff --git a/command/agent/host/unix.go b/command/agent/host/unix.go new file mode 100644 index 000000000000..a5fae0a58911 --- /dev/null +++ b/command/agent/host/unix.go @@ -0,0 +1,68 @@ +// +build !windows + +package host + +import ( + "strings" + "syscall" + + "golang.org/x/sys/unix" +) + +// uname returns the syscall like `uname -a` +func uname() string { + u := &unix.Utsname{} + err := unix.Uname(u) + if err != nil { + return err.Error() + } + + uname := strings.Join([]string{ + nullStr(u.Machine[:]), + nullStr(u.Nodename[:]), + nullStr(u.Release[:]), + nullStr(u.Sysname[:]), + nullStr(u.Version[:]), + }, " ") + + return uname +} + +func etcHosts() string { + return slurp("/etc/hosts") +} + +func resolvConf() string { + return slurp("/etc/resolv.conf") +} + +func nullStr(bs []byte) string { + // find the null byte + var i int + var b byte + for i, b = range bs { + if b == 0 { + break + } + } + + return string(bs[:i]) +} + +type df struct { + s *syscall.Statfs_t +} + +func makeDf(path string) (*df, error) { + var s syscall.Statfs_t + err := syscall.Statfs(path, &s) + return &df{s: &s}, err +} + +func (d *df) total() uint64 { + return d.s.Blocks * uint64(d.s.Bsize) +} + +func (d *df) available() uint64 { + return d.s.Bavail * uint64(d.s.Bsize) +} diff --git a/command/agent/host/windows.go b/command/agent/host/windows.go new file mode 100644 index 000000000000..2b15988b9a51 --- /dev/null +++ b/command/agent/host/windows.go @@ -0,0 +1,65 @@ +// +build windows + +package host + +import ( + "os" + "syscall" + "unsafe" +) + +func uname() string { + return "" +} + +func resolvConf() string { + return "" +} + +func etcHosts() string { + return "" +} + +func mountedPaths() (disks []string) { + for _, c := range "ABCDEFGHIJKLMNOPQRSTUVWXYZ" { + d := string(c) + ":\\" + _, err := os.Stat(d) + if err == nil { + disks = append(disks, d) + } + } + return disks +} + +type df struct { + size int64 + avail int64 +} + +func makeDf(path string) (*df, error) { + h, err := syscall.LoadDLL("kernel32.dll") + if err != nil { + return nil, err + } + + c, err := h.FindProc("GetDiskFreeSpaceExW") + if err != nil { + return nil, err + } + + df := &df{} + + c.Call(uintptr(unsafe.Pointer(syscall.StringToUTF16Ptr(path))), + uintptr(unsafe.Pointer(&df.size)), + uintptr(unsafe.Pointer(&df.avail))) + + return df, nil +} + +func (d *df) total() uint64 { + return uint64(d.size) +} + +func (d *df) available() uint64 { + return uint64(d.avail) +} diff --git a/command/agent/http.go b/command/agent/http.go index ae715e8bd41b..87b51c0aca89 100644 --- a/command/agent/http.go +++ b/command/agent/http.go @@ -291,6 +291,7 @@ func (s *HTTPServer) registerHandlers(enableDebug bool) { s.mux.HandleFunc("/v1/agent/servers", s.wrap(s.AgentServersRequest)) s.mux.HandleFunc("/v1/agent/keyring/", s.wrap(s.KeyringOperationRequest)) s.mux.HandleFunc("/v1/agent/health", s.wrap(s.HealthRequest)) + s.mux.HandleFunc("/v1/agent/host", s.wrap(s.AgentHostRequest)) // Monitor is *not* an untrusted endpoint despite the log contents // potentially containing unsanitized user input. Monitor, like @@ -661,6 +662,7 @@ func (s *HTTPServer) parseToken(req *http.Request, token *string) { } // parse is a convenience method for endpoints that need to parse multiple flags +// It sets r to the region and b to the QueryOptions in req func (s *HTTPServer) parse(resp http.ResponseWriter, req *http.Request, r *string, b *structs.QueryOptions) bool { s.parseRegion(req, r) s.parseToken(req, &b.AuthToken) diff --git a/command/debug.go b/command/debug.go index 46ae6cd9da39..063b5de33bbc 100644 --- a/command/debug.go +++ b/command/debug.go @@ -281,6 +281,7 @@ func (c *DebugCommand) collect(client *api.Client) error { c.startMonitors(client) c.collectPeriodic(client) c.collectPprofs(client) + c.collectAgentHosts(client) return nil } @@ -346,6 +347,38 @@ func (c *DebugCommand) startMonitor(path, idKey, nodeID string, client *api.Clie } } +// collectAgentHosts calls collectAgentHost for each selected node +func (c *DebugCommand) collectAgentHosts(client *api.Client) { + for _, n := range c.nodeIDs { + c.collectAgentHost("client", n, client) + } + + for _, n := range c.serverIDs { + c.collectAgentHost("server", n, client) + } + +} + +// collectAgentHost gets the agent host data +func (c *DebugCommand) collectAgentHost(path, id string, client *api.Client) { + var host *api.HostDataResponse + var err error + if path == "server" { + host, err = client.Agent().Host(id, "", nil) + } else { + host, err = client.Agent().Host("", id, nil) + } + + path = filepath.Join(path, id) + + if err != nil { + c.writeBytes(path, "agent-host.log", []byte(err.Error())) + return + } + + c.writeJSON(path, "agent-host.json", host) +} + // collectPprofs captures the /agent/pprof for each listed node func (c *DebugCommand) collectPprofs(client *api.Client) { for _, n := range c.nodeIDs { diff --git a/nomad/client_agent_endpoint.go b/nomad/client_agent_endpoint.go index 131f5fa215dc..df7debcdb0bc 100644 --- a/nomad/client_agent_endpoint.go +++ b/nomad/client_agent_endpoint.go @@ -12,6 +12,7 @@ import ( log "github.com/hashicorp/go-hclog" sframer "github.com/hashicorp/nomad/client/lib/streamframer" cstructs "github.com/hashicorp/nomad/client/structs" + "github.com/hashicorp/nomad/command/agent/host" "github.com/hashicorp/nomad/command/agent/monitor" "github.com/hashicorp/nomad/command/agent/pprof" "github.com/hashicorp/nomad/helper" @@ -313,46 +314,18 @@ func (a *Agent) forwardFor(serverID, region string) (*serverParts, error) { } func (a *Agent) forwardMonitorClient(conn io.ReadWriteCloser, args cstructs.MonitorRequest, encoder *codec.Encoder, decoder *codec.Decoder) { - nodeID := args.NodeID - - snap, err := a.srv.State().Snapshot() - if err != nil { - handleStreamResultError(err, nil, encoder) - return - } + // Get the Connection to the client either by fowarding to another server + // or creating direct stream - node, err := snap.NodeByID(nil, nodeID) + state, srv, err := a.findClientConn(args.NodeID) if err != nil { handleStreamResultError(err, helper.Int64ToPtr(500), encoder) return } - if node == nil { - err := fmt.Errorf("Unknown node %q", nodeID) - handleStreamResultError(err, helper.Int64ToPtr(400), encoder) - return - } - - if err := nodeSupportsRpc(node); err != nil { - handleStreamResultError(err, helper.Int64ToPtr(400), encoder) - return - } - - // Get the Connection to the client either by fowarding to another server - // or creating direct stream var clientConn net.Conn - state, ok := a.srv.getNodeConn(nodeID) - if !ok { - // Determine the server that has a connection to the node - srv, err := a.srv.serverWithNodeConn(nodeID, a.srv.Region()) - if err != nil { - var code *int64 - if structs.IsErrNoNodeConn(err) { - code = helper.Int64ToPtr(404) - } - handleStreamResultError(err, code, encoder) - return - } + + if state == nil { conn, err := a.srv.streamingRpc(srv, "Agent.Monitor") if err != nil { handleStreamResultError(err, nil, encoder) @@ -402,54 +375,128 @@ func (a *Agent) forwardMonitorServer(conn io.ReadWriteCloser, server *serverPart structs.Bridge(conn, serverConn) return } - func (a *Agent) forwardProfileClient(args *structs.AgentPprofRequest, reply *structs.AgentPprofResponse) error { - nodeID := args.NodeID + state, srv, err := a.findClientConn(args.NodeID) + + if err != nil { + return err + } + + if srv != nil { + return a.srv.forwardServer(srv, "Agent.Profile", args, reply) + } + + // NodeRpc + rpcErr := NodeRpc(state.Session, "Agent.Profile", args, reply) + if rpcErr != nil { + return rpcErr + } + + return nil +} + +// Host returns data about the agent's host system for the `debug` command. +func (a *Agent) Host(args *structs.HostDataRequest, reply *structs.HostDataResponse) error { + + aclObj, err := a.srv.ResolveToken(args.AuthToken) + if err != nil { + return err + } + if (aclObj != nil && !aclObj.AllowAgentRead()) || + (aclObj == nil && !a.srv.config.EnableDebug) { + return structs.ErrPermissionDenied + } + + // Forward to different region if necessary + // this would typically be done in a.srv.forward() but since + // we are targeting a specific server, not just the leader + // we must manually handle region forwarding here. + region := args.RequestRegion() + if region == "" { + return fmt.Errorf("missing target RPC") + } + + if region != a.srv.config.Region { + // Mark that we are forwarding + args.SetForwarded() + return a.srv.forwardRegion(region, "Agent.Host", args, reply) + } + + // Targeting a client node, forward request to node + if args.NodeID != "" { + client, srv, err := a.findClientConn(args.NodeID) + + if err != nil { + return err + } + + if srv != nil { + return a.srv.forwardServer(srv, "Agent.Host", args, reply) + } + + return NodeRpc(client.Session, "Agent.Host", args, reply) + } + + // Handle serverID not equal to ours + if args.ServerID != "" { + srv, err := a.forwardFor(args.ServerID, region) + if err != nil { + return err + } + if srv != nil { + return a.srv.forwardServer(srv, "Agent.Host", args, reply) + } + } + + data, err := host.MakeHostData() + if err != nil { + return err + } + + reply.AgentID = a.srv.serf.LocalMember().Name + reply.HostData = data + return nil +} +// findClientConn is a helper that returns a connection to the client node or, if the client +// is connected to a different server, a serverParts describing the server to which the +// client bound RPC should be forwarded. +func (a *Agent) findClientConn(nodeID string) (*nodeConnState, *serverParts, error) { snap, err := a.srv.State().Snapshot() if err != nil { - return structs.NewErrRPCCoded(500, err.Error()) + return nil, nil, structs.NewErrRPCCoded(500, err.Error()) } node, err := snap.NodeByID(nil, nodeID) if err != nil { - return structs.NewErrRPCCoded(500, err.Error()) + return nil, nil, structs.NewErrRPCCoded(500, err.Error()) } if node == nil { err := fmt.Errorf("Unknown node %q", nodeID) - return structs.NewErrRPCCoded(404, err.Error()) + return nil, nil, structs.NewErrRPCCoded(404, err.Error()) } if err := nodeSupportsRpc(node); err != nil { - return structs.NewErrRPCCoded(400, err.Error()) + return nil, nil, structs.NewErrRPCCoded(400, err.Error()) } // Get the Connection to the client either by fowarding to another server // or creating direct stream state, ok := a.srv.getNodeConn(nodeID) - if !ok { - // Determine the server that has a connection to the node - srv, err := a.srv.serverWithNodeConn(nodeID, a.srv.Region()) - if err != nil { - code := 500 - if structs.IsErrNoNodeConn(err) { - code = 404 - } - return structs.NewErrRPCCoded(code, err.Error()) - } + if ok { + return state, nil, nil + } - rpcErr := a.srv.forwardServer(srv, "Agent.Profile", args, reply) - if rpcErr != nil { - return rpcErr - } - } else { - // NodeRpc - rpcErr := NodeRpc(state.Session, "Agent.Profile", args, reply) - if rpcErr != nil { - return rpcErr + // Determine the server that has a connection to the node + srv, err := a.srv.serverWithNodeConn(nodeID, a.srv.Region()) + if err != nil { + code := 500 + if structs.IsErrNoNodeConn(err) { + code = 404 } + return nil, nil, structs.NewErrRPCCoded(code, err.Error()) } - return nil + return nil, srv, nil } diff --git a/nomad/client_agent_endpoint_test.go b/nomad/client_agent_endpoint_test.go index e61f6fc8ac98..499a0594937d 100644 --- a/nomad/client_agent_endpoint_test.go +++ b/nomad/client_agent_endpoint_test.go @@ -809,3 +809,206 @@ func TestAgentProfile_ACL(t *testing.T) { }) } } + +func TestAgentHost_Server(t *testing.T) { + t.Parallel() + + // start servers + s1, cleanup := TestServer(t, func(c *Config) { + c.BootstrapExpect = 2 + c.EnableDebug = true + }) + defer cleanup() + + s2, cleanup := TestServer(t, func(c *Config) { + c.BootstrapExpect = 2 + c.EnableDebug = true + }) + defer cleanup() + + TestJoin(t, s1, s2) + testutil.WaitForLeader(t, s1.RPC) + testutil.WaitForLeader(t, s2.RPC) + + // determine leader and nonleader + servers := []*Server{s1, s2} + var nonLeader *Server + var leader *Server + for _, s := range servers { + if !s.IsLeader() { + nonLeader = s + } else { + leader = s + } + } + + c, cleanupC := client.TestClient(t, func(c *config.Config) { + c.Servers = []string{s2.GetConfig().RPCAddr.String()} + c.EnableDebug = true + }) + defer cleanupC() + + testutil.WaitForResult(func() (bool, error) { + nodes := s2.connectedNodes() + return len(nodes) == 1, nil + }, func(err error) { + t.Fatalf("should have a clients") + }) + + cases := []struct { + desc string + serverID string + nodeID string + origin *Server + expectedErr string + expectedAgentID string + }{ + { + desc: "remote leader", + serverID: "leader", + origin: nonLeader, + expectedAgentID: leader.serf.LocalMember().Name, + }, + { + desc: "remote server", + serverID: nonLeader.serf.LocalMember().Name, + origin: leader, + expectedAgentID: nonLeader.serf.LocalMember().Name, + }, + { + desc: "serverID is current leader", + serverID: "leader", + origin: leader, + expectedAgentID: leader.serf.LocalMember().Name, + }, + { + desc: "serverID is current server", + serverID: nonLeader.serf.LocalMember().Name, + origin: nonLeader, + expectedAgentID: nonLeader.serf.LocalMember().Name, + }, + { + desc: "serverID is unknown", + serverID: uuid.Generate(), + origin: nonLeader, + expectedErr: "unknown Nomad server", + expectedAgentID: "", + }, + { + desc: "local client", + nodeID: c.NodeID(), + origin: s2, + expectedErr: "", + expectedAgentID: c.NodeID(), + }, + { + desc: "remote client", + nodeID: c.NodeID(), + origin: s1, + expectedErr: "", + expectedAgentID: c.NodeID(), + }, + } + + for _, tc := range cases { + t.Run(tc.desc, func(t *testing.T) { + req := structs.HostDataRequest{ + ServerID: tc.serverID, + NodeID: tc.nodeID, + QueryOptions: structs.QueryOptions{Region: "global"}, + } + + reply := structs.HostDataResponse{} + + err := tc.origin.RPC("Agent.Host", &req, &reply) + if tc.expectedErr != "" { + require.Contains(t, err.Error(), tc.expectedErr) + } else { + require.Nil(t, err) + require.NotEmpty(t, reply.HostData) + } + + require.Equal(t, tc.expectedAgentID, reply.AgentID) + }) + } +} + +func TestAgentHost_ACL(t *testing.T) { + t.Parallel() + + // start server + s, root, cleanupS := TestACLServer(t, nil) + defer cleanupS() + testutil.WaitForLeader(t, s.RPC) + + policyBad := mock.NamespacePolicy("other", "", []string{acl.NamespaceCapabilityReadFS}) + tokenBad := mock.CreatePolicyAndToken(t, s.State(), 1005, "invalid", policyBad) + + policyGood := mock.AgentPolicy(acl.PolicyRead) + tokenGood := mock.CreatePolicyAndToken(t, s.State(), 1009, "valid", policyGood) + + cases := []struct { + Name string + Token string + ExpectedErr string + }{ + { + Name: "bad token", + Token: tokenBad.SecretID, + ExpectedErr: "Permission denied", + }, + { + Name: "good token", + Token: tokenGood.SecretID, + }, + { + Name: "root token", + Token: root.SecretID, + }, + } + + for _, tc := range cases { + t.Run(tc.Name, func(t *testing.T) { + req := structs.HostDataRequest{ + QueryOptions: structs.QueryOptions{ + Namespace: structs.DefaultNamespace, + Region: "global", + AuthToken: tc.Token, + }, + } + + var resp structs.HostDataResponse + + err := s.RPC("Agent.Host", &req, &resp) + if tc.ExpectedErr != "" { + require.Equal(t, tc.ExpectedErr, err.Error()) + } else { + require.NoError(t, err) + require.NotNil(t, resp.HostData) + } + }) + } +} + +func TestAgentHost_ACLDebugRequired(t *testing.T) { + t.Parallel() + + // start server + s, cleanupS := TestServer(t, func(c *Config) { + c.EnableDebug = false + }) + defer cleanupS() + testutil.WaitForLeader(t, s.RPC) + + req := structs.HostDataRequest{ + QueryOptions: structs.QueryOptions{ + Namespace: structs.DefaultNamespace, + Region: "global", + }, + } + + var resp structs.HostDataResponse + + err := s.RPC("Agent.Host", &req, &resp) + require.Equal(t, "Permission denied", err.Error()) +} diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 97d3d4d062c9..b415fac59970 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -32,6 +32,7 @@ import ( "golang.org/x/crypto/blake2b" "github.com/hashicorp/nomad/acl" + "github.com/hashicorp/nomad/command/agent/host" "github.com/hashicorp/nomad/command/agent/pprof" "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/args" @@ -1482,6 +1483,20 @@ type NodeConnQueryResponse struct { QueryMeta } +// HostDataRequest is used by /agent/host to retrieve data about the agent's host system. If +// ServerID or NodeID is specified, the request is forwarded to the remote agent +type HostDataRequest struct { + ServerID string + NodeID string + QueryOptions +} + +// HostDataResponse contains the HostData content +type HostDataResponse struct { + AgentID string + HostData *host.HostData +} + // EmitNodeEventsRequest is a request to update the node events source // with a new client-side event type EmitNodeEventsRequest struct { diff --git a/vendor/github.com/hashicorp/nomad/api/agent.go b/vendor/github.com/hashicorp/nomad/api/agent.go index b3a43d56698e..39bfb95443cd 100644 --- a/vendor/github.com/hashicorp/nomad/api/agent.go +++ b/vendor/github.com/hashicorp/nomad/api/agent.go @@ -239,6 +239,32 @@ func (a *Agent) Health() (*AgentHealthResponse, error) { return nil, fmt.Errorf("unable to unmarshal response with status %d: %v", resp.StatusCode, err) } +// Host returns debugging context about the agent's host operating system +func (a *Agent) Host(serverID, nodeID string, q *QueryOptions) (*HostDataResponse, error) { + if q == nil { + q = &QueryOptions{} + } + if q.Params == nil { + q.Params = make(map[string]string) + } + + if serverID != "" { + q.Params["server_id"] = serverID + } + + if nodeID != "" { + q.Params["node_id"] = nodeID + } + + var resp HostDataResponse + _, err := a.client.query("/v1/agent/host", &resp, q) + if err != nil { + return nil, err + } + + return &resp, nil +} + // Monitor returns a channel which will receive streaming logs from the agent // Providing a non-nil stopCh can be used to close the connection and stop log streaming func (a *Agent) Monitor(stopCh <-chan struct{}, q *QueryOptions) (<-chan *StreamFrame, <-chan error) { @@ -438,3 +464,22 @@ type AgentHealth struct { // Message describes why the agent is unhealthy Message string `json:"message"` } + +type HostData struct { + OS string + Network []map[string]string + ResolvConf string + Hosts string + Environment map[string]string + Disk map[string]DiskUsage +} + +type DiskUsage struct { + DiskMB int64 + UsedMB int64 +} + +type HostDataResponse struct { + AgentID string + HostData *HostData `json:",omitempty"` +}