Skip to content

Commit

Permalink
criu swrk: return child error to caller
Browse files Browse the repository at this point in the history
In the podman CI we are seeing a weird flake during criu version
detection[1]. The write to the socket just fails with broken pipe.
The logical thing to assume here is that the child exited. However the
current code never reports back the child error from wait nor does it
try to capture the output from it. This fixes both. The cleanup error is
now added to the returned error so the caller sees both.

As errors.Join is used from the std lib bump the minimum go version to
1.20.

[1] containers/podman#18856

Signed-off-by: Paul Holzinger <pholzing@redhat.com>
Signed-off-by: Radostin Stoyanov <rstoyanov@fedoraproject.org>
  • Loading branch information
Luap99 authored and rst0git committed Jul 15, 2024
1 parent ae18f42 commit 64ad5bb
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 11 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@ jobs:
strategy:
fail-fast: false
matrix:
go-version: [1.18.x, 1.19.x, 1.20.x]
go-version: [1.20, 1.21, 1.22]
criu_branch: [master, criu-dev]

steps:

- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
# needed for codecov
fetch-depth: 0
Expand All @@ -32,7 +32,7 @@ jobs:
sudo make -C criu install-criu PREFIX=/usr
- name: Install Go ${{ matrix.go-version }}
uses: actions/setup-go@v4
uses: actions/setup-go@v5
with:
go-version: ${{ matrix.go-version }}

Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module github.com/checkpoint-restore/go-criu/v7

go 1.18
go 1.20

require (
github.com/spf13/cobra v1.8.0
Expand Down
29 changes: 24 additions & 5 deletions main.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
package criu

import (
"bytes"

Check failure on line 4 in main.go

View workflow job for this annotation

GitHub Actions / test (1.2, master)

package bytes is not in std (/opt/hostedtoolcache/go/1.2.2/x64/src/bytes)

Check failure on line 4 in main.go

View workflow job for this annotation

GitHub Actions / test (1.2, criu-dev)

package bytes is not in std (/opt/hostedtoolcache/go/1.2.2/x64/src/bytes)
"errors"
"fmt"

Check failure on line 6 in main.go

View workflow job for this annotation

GitHub Actions / test (1.2, master)

package fmt is not in std (/opt/hostedtoolcache/go/1.2.2/x64/src/fmt)

Check failure on line 6 in main.go

View workflow job for this annotation

GitHub Actions / test (1.2, criu-dev)

package fmt is not in std (/opt/hostedtoolcache/go/1.2.2/x64/src/fmt)
"os"
"os/exec"
"strconv"
"strings"
"syscall"

"github.com/checkpoint-restore/go-criu/v7/rpc"
Expand All @@ -17,6 +19,7 @@ type Criu struct {
swrkCmd *exec.Cmd
swrkSk *os.File
swrkPath string
output *bytes.Buffer
}

// MakeCriu returns the Criu object required for most operations
Expand Down Expand Up @@ -44,9 +47,12 @@ func (c *Criu) Prepare() error {
srv := os.NewFile(uintptr(fds[1]), "criu-xprt-srv")
defer srv.Close()

out := new(bytes.Buffer)
args := []string{"swrk", strconv.Itoa(fds[1])}
// #nosec G204
cmd := exec.Command(c.swrkPath, args...)
cmd.Stdout = out
cmd.Stderr = out

err = cmd.Start()
if err != nil {
Expand All @@ -61,13 +67,20 @@ func (c *Criu) Prepare() error {
}

// Cleanup cleans up
func (c *Criu) Cleanup() {
func (c *Criu) Cleanup() error {
var errs []error
if c.swrkCmd != nil {
c.swrkSk.Close()
if err := c.swrkSk.Close(); err != nil {
errs = append(errs, err)
}
c.swrkSk = nil
_ = c.swrkCmd.Wait()
if err := c.swrkCmd.Wait(); err != nil {
errs = append(errs, fmt.Errorf("criu swrk failed: %w (%s)", err, strings.TrimSpace(c.output.String())))
}
c.swrkCmd = nil
c.output = nil
}
return errors.Join(errs...)
}

func (c *Criu) sendAndRecv(reqB []byte) ([]byte, int, error) {
Expand Down Expand Up @@ -99,7 +112,7 @@ func (c *Criu) doSwrk(reqType rpc.CriuReqType, opts *rpc.CriuOpts, nfy Notify) e
return nil
}

func (c *Criu) doSwrkWithResp(reqType rpc.CriuReqType, opts *rpc.CriuOpts, nfy Notify, features *rpc.CriuFeatures) (*rpc.CriuResp, error) {
func (c *Criu) doSwrkWithResp(reqType rpc.CriuReqType, opts *rpc.CriuOpts, nfy Notify, features *rpc.CriuFeatures) (_ *rpc.CriuResp, retErr error) {
var resp *rpc.CriuResp

req := rpc.CriuReq{
Expand All @@ -121,7 +134,13 @@ func (c *Criu) doSwrkWithResp(reqType rpc.CriuReqType, opts *rpc.CriuOpts, nfy N
return nil, err
}

defer c.Cleanup()
defer func() {
// append any cleanup errors to the returned error
err := c.Cleanup()
if err != nil {
retErr = errors.Join(retErr, err)
}
}()
}

for {
Expand Down
11 changes: 9 additions & 2 deletions phaul/client.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package phaul

import (
"errors"
"fmt"

"github.com/checkpoint-restore/go-criu/v7"
Expand Down Expand Up @@ -55,7 +56,7 @@ func isLastIter(iter int, stats *stats.DumpStatsEntry, prevStats *stats.DumpStat
}

// Migrate function
func (pc *Client) Migrate() error {
func (pc *Client) Migrate() (retErr error) {
criu := criu.MakeCriu()
psi := rpc.CriuPageServerInfo{
Fd: proto.Int32(int32(pc.cfg.Memfd)),
Expand All @@ -72,7 +73,13 @@ func (pc *Client) Migrate() error {
return err
}

defer criu.Cleanup()
defer func() {
// append any cleanup errors to the returned error
err := criu.Cleanup()
if err != nil {
retErr = errors.Join(retErr, err)
}
}()

imgs, err := preparePhaulImages(pc.cfg.Wdir)
if err != nil {
Expand Down

0 comments on commit 64ad5bb

Please sign in to comment.