Skip to content

Commit

Permalink
rkt: logging enhancements (#4044)
Browse files Browse the repository at this point in the history
* Added extra debug logging; extended timeout; added jitter.

* small log changes

* increase timeout

* remove unneccessary uuid
  • Loading branch information
angrycub authored and dadgar committed Mar 28, 2018
1 parent f28370f commit 33e57bf
Showing 1 changed file with 53 additions and 15 deletions.
68 changes: 53 additions & 15 deletions client/driver/rkt.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"fmt"
"io/ioutil"
"log"
"math/rand"
"net"
"os"
"os/exec"
Expand Down Expand Up @@ -61,7 +62,7 @@ const (
rktCmd = "rkt"

// rktNetworkDeadline is how long to wait for container network to start
rktNetworkDeadline = 5 * time.Second
rktNetworkDeadline = 1 * time.Minute
)

// RktDriver is a driver for running images via Rkt
Expand Down Expand Up @@ -119,18 +120,25 @@ type rktPID struct {
}

// Retrieve pod status for the pod with the given UUID.
func rktGetStatus(uuid string) (*rktv1.Pod, error) {
func rktGetStatus(uuid string, logger *log.Logger) (*rktv1.Pod, error) {
statusArgs := []string{
"status",
"--format=json",
uuid,
}
var outBuf bytes.Buffer
var outBuf, errBuf bytes.Buffer
cmd := exec.Command(rktCmd, statusArgs...)
cmd.Stdout = &outBuf
cmd.Stderr = ioutil.Discard
cmd.Stderr = &errBuf
if err := cmd.Run(); err != nil {
return nil, err
if outBuf.Len() > 0 {
logger.Printf("[DEBUG] driver.rkt: status output for UUID %s: %q", uuid, elide(outBuf))
}
if errBuf.Len() == 0 {
return nil, err
}
logger.Printf("[DEBUG] driver.rkt: status error output for UUID %s: %q", uuid, elide(errBuf))
return nil, fmt.Errorf("%s. stderr: %q", err, elide(errBuf))
}
var status rktv1.Pod
if err := json.Unmarshal(outBuf.Bytes(), &status); err != nil {
Expand Down Expand Up @@ -159,11 +167,14 @@ func rktGetManifest(uuid string) (*appcschema.PodManifest, error) {
return &manifest, nil
}

func rktGetDriverNetwork(uuid string, driverConfigPortMap map[string]string) (*cstructs.DriverNetwork, error) {
func rktGetDriverNetwork(uuid string, driverConfigPortMap map[string]string, logger *log.Logger) (*cstructs.DriverNetwork, error) {
deadline := time.Now().Add(rktNetworkDeadline)
var lastErr error
try := 0

for time.Now().Before(deadline) {
if status, err := rktGetStatus(uuid); err == nil {
try++
if status, err := rktGetStatus(uuid, logger); err == nil {
for _, net := range status.Networks {
if !net.IP.IsGlobalUnicast() {
continue
Expand All @@ -183,6 +194,7 @@ func rktGetDriverNetwork(uuid string, driverConfigPortMap map[string]string) (*c
return nil, lastErr
}

// This is a successful landing.
return &cstructs.DriverNetwork{
PortMap: portmap,
IP: status.Networks[0].IP.String(),
Expand All @@ -198,7 +210,9 @@ func rktGetDriverNetwork(uuid string, driverConfigPortMap map[string]string) (*c
lastErr = fmt.Errorf("getting status failed: %v", err)
}

time.Sleep(400 * time.Millisecond)
waitTime := getJitteredNetworkRetryTime()
logger.Printf("[DEBUG] driver.rkt: getting network info for pod UUID %s failed attempt %d: %v. Sleeping for %v", uuid, try, lastErr, waitTime)
time.Sleep(waitTime)
}
return nil, fmt.Errorf("timed out, last error: %v", lastErr)
}
Expand Down Expand Up @@ -236,7 +250,7 @@ func rktRemove(uuid string) error {
cmd.Stderr = errBuf
if err := cmd.Run(); err != nil {
if msg := errBuf.String(); len(msg) > 0 {
return fmt.Errorf("error removing pod %q: %s", uuid, msg)
return fmt.Errorf("error removing pod: %s", msg)
}
return err
}
Expand Down Expand Up @@ -355,6 +369,10 @@ func (d *RktDriver) Fingerprint(req *cstructs.FingerprintRequest, resp *cstructs
return nil
}

// Output version information when the fingerprinter first sees rkt
if req.Node.Attributes[rktDriverAttr] != "1" {
d.logger.Printf("[DEBUG] driver.rkt: detect version: %s", strings.Replace(out, "\n", " ", -1))
}
resp.AddAttribute(rktDriverAttr, "1")
resp.AddAttribute("driver.rkt.version", rktMatches[1])
resp.AddAttribute("driver.rkt.appc.version", appcMatches[1])
Expand Down Expand Up @@ -627,7 +645,7 @@ func (d *RktDriver) Start(ctx *ExecContext, task *structs.Task) (*StartResponse,
err, outBuf.String(), errBuf.String())
}
uuid := strings.TrimSpace(outBuf.String())
d.logger.Printf("[DEBUG] driver.rkt: pod %q for task %q prepared, UUID is: %s", img, d.taskName, uuid)
d.logger.Printf("[DEBUG] driver.rkt: pod %q for task %q prepared. (UUID: %s)", img, d.taskName, uuid)
runArgs = append(runArgs, uuid)

// The task's environment is set via --set-env flags above, but the rkt
Expand Down Expand Up @@ -677,10 +695,10 @@ func (d *RktDriver) Start(ctx *ExecContext, task *structs.Task) (*StartResponse,
// Only return a driver network if *not* using host networking
var driverNetwork *cstructs.DriverNetwork
if network != "host" {
d.logger.Printf("[DEBUG] driver.rkt: retrieving network information for pod %q (UUID %s) for task %q", img, uuid, d.taskName)
driverNetwork, err = rktGetDriverNetwork(uuid, driverConfig.PortMap)
d.logger.Printf("[DEBUG] driver.rkt: retrieving network information for pod %q (UUID: %s) for task %q", img, uuid, d.taskName)
driverNetwork, err = rktGetDriverNetwork(uuid, driverConfig.PortMap, d.logger)
if err != nil && !pluginClient.Exited() {
d.logger.Printf("[WARN] driver.rkt: network status retrieval for pod %q (UUID %s) for task %q failed. Last error: %v", img, uuid, d.taskName, err)
d.logger.Printf("[WARN] driver.rkt: network status retrieval for pod %q (UUID: %s) for task %q failed. Last error: %v", img, uuid, d.taskName, err)

// If a portmap was given, this turns into a fatal error
if len(driverConfig.PortMap) != 0 {
Expand Down Expand Up @@ -820,12 +838,32 @@ func (h *rktHandle) run() {

// Remove the pod
if err := rktRemove(h.uuid); err != nil {
h.logger.Printf("[ERR] driver.rkt: error removing pod %q - must gc manually: %v", h.uuid, err)
h.logger.Printf("[ERR] driver.rkt: error removing pod (UUID: %q) - must gc manually: %v", h.uuid, err)
} else {
h.logger.Printf("[DEBUG] driver.rkt: removed pod %q", h.uuid)
h.logger.Printf("[DEBUG] driver.rkt: removed pod (UUID: %q)", h.uuid)
}

// Send the results
h.waitCh <- dstructs.NewWaitResult(ps.ExitCode, 0, werr)
close(h.waitCh)
}

// Create a time with a 0 to 100ms jitter for rktGetDriverNetwork retries
func getJitteredNetworkRetryTime() time.Duration {
return time.Duration(900+rand.Intn(100)) * time.Millisecond
}

// Conditionally elide a buffer to an arbitrary length
func elideToLen(inBuf bytes.Buffer, length int) bytes.Buffer {
if inBuf.Len() > length {
inBuf.Truncate(length)
inBuf.WriteString("...")
}
return inBuf
}

// Conditionally elide a buffer to an 80 character string
func elide(inBuf bytes.Buffer) string {
tempBuf := elideToLen(inBuf, 80)
return tempBuf.String()
}

0 comments on commit 33e57bf

Please sign in to comment.