Skip to content

Commit

Permalink
Merge pull request opencontainers#3546 from kolyshkin/criu-add-ignore…
Browse files Browse the repository at this point in the history
…-cgroup

checkpoint/restore: implement --manage-cgroups-mode ignore
  • Loading branch information
mrunalp committed Jan 27, 2023
2 parents a1c51c5 + c4aa452 commit 0147921
Show file tree
Hide file tree
Showing 7 changed files with 145 additions and 101 deletions.
104 changes: 57 additions & 47 deletions checkpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ checkpointed.`,
cli.StringFlag{Name: "page-server", Value: "", Usage: "ADDRESS:PORT of the page server"},
cli.BoolFlag{Name: "file-locks", Usage: "handle file locks, for safety"},
cli.BoolFlag{Name: "pre-dump", Usage: "dump container's memory information only, leave the container running after this"},
cli.StringFlag{Name: "manage-cgroups-mode", Value: "", Usage: "cgroups mode: 'soft' (default), 'full' and 'strict'"},
cli.StringFlag{Name: "manage-cgroups-mode", Value: "", Usage: "cgroups mode: soft|full|strict|ignore (default: soft)"},
cli.StringSliceFlag{Name: "empty-ns", Usage: "create a namespace, but don't restore its properties"},
cli.BoolFlag{Name: "auto-dedup", Usage: "enable auto deduplication of memory images"},
},
Expand Down Expand Up @@ -67,17 +67,6 @@ checkpointed.`,
return err
}

// these are the mandatory criu options for a container
if err := setPageServer(context, options); err != nil {
return err
}
if err := setManageCgroupsMode(context, options); err != nil {
return err
}
if err := setEmptyNsMask(context, options); err != nil {
return err
}

err = container.Checkpoint(options)
if err == nil && !(options.LeaveRunning || options.PreDump) {
// Destroy the container unless we tell CRIU to keep it.
Expand Down Expand Up @@ -119,59 +108,80 @@ func prepareImagePaths(context *cli.Context) (string, string, error) {
return imagePath, parentPath, nil
}

func setPageServer(context *cli.Context, options *libcontainer.CriuOpts) error {
// xxx following criu opts are optional
// The dump image can be sent to a criu page server
func criuOptions(context *cli.Context) (*libcontainer.CriuOpts, error) {
imagePath, parentPath, err := prepareImagePaths(context)
if err != nil {
return nil, err
}

opts := &libcontainer.CriuOpts{
ImagesDirectory: imagePath,
WorkDirectory: context.String("work-path"),
ParentImage: parentPath,
LeaveRunning: context.Bool("leave-running"),
TcpEstablished: context.Bool("tcp-established"),
ExternalUnixConnections: context.Bool("ext-unix-sk"),
ShellJob: context.Bool("shell-job"),
FileLocks: context.Bool("file-locks"),
PreDump: context.Bool("pre-dump"),
AutoDedup: context.Bool("auto-dedup"),
LazyPages: context.Bool("lazy-pages"),
StatusFd: context.Int("status-fd"),
LsmProfile: context.String("lsm-profile"),
LsmMountContext: context.String("lsm-mount-context"),
}

// CRIU options below may or may not be set.

if psOpt := context.String("page-server"); psOpt != "" {
address, port, err := net.SplitHostPort(psOpt)

if err != nil || address == "" || port == "" {
return errors.New("Use --page-server ADDRESS:PORT to specify page server")
return nil, errors.New("Use --page-server ADDRESS:PORT to specify page server")
}
portInt, err := strconv.Atoi(port)
if err != nil {
return errors.New("Invalid port number")
return nil, errors.New("Invalid port number")
}
options.PageServer = libcontainer.CriuPageServerInfo{
opts.PageServer = libcontainer.CriuPageServerInfo{
Address: address,
Port: int32(portInt),
}
}
return nil
}

func setManageCgroupsMode(context *cli.Context, options *libcontainer.CriuOpts) error {
if cgOpt := context.String("manage-cgroups-mode"); cgOpt != "" {
switch cgOpt {
case "soft":
options.ManageCgroupsMode = criu.CriuCgMode_SOFT
case "full":
options.ManageCgroupsMode = criu.CriuCgMode_FULL
case "strict":
options.ManageCgroupsMode = criu.CriuCgMode_STRICT
default:
return errors.New("Invalid manage cgroups mode")
}
switch context.String("manage-cgroups-mode") {
case "":
// do nothing
case "soft":
opts.ManageCgroupsMode = criu.CriuCgMode_SOFT
case "full":
opts.ManageCgroupsMode = criu.CriuCgMode_FULL
case "strict":
opts.ManageCgroupsMode = criu.CriuCgMode_STRICT
case "ignore":
opts.ManageCgroupsMode = criu.CriuCgMode_IGNORE
default:
return nil, errors.New("Invalid manage-cgroups-mode value")
}
return nil
}

var namespaceMapping = map[specs.LinuxNamespaceType]int{
specs.NetworkNamespace: unix.CLONE_NEWNET,
}

func setEmptyNsMask(context *cli.Context, options *libcontainer.CriuOpts) error {
/* Runc doesn't manage network devices and their configuration */
// runc doesn't manage network devices and their configuration.
nsmask := unix.CLONE_NEWNET

for _, ns := range context.StringSlice("empty-ns") {
f, exists := namespaceMapping[specs.LinuxNamespaceType(ns)]
if !exists {
return fmt.Errorf("namespace %q is not supported", ns)
if context.IsSet("empty-ns") {
namespaceMapping := map[specs.LinuxNamespaceType]int{
specs.NetworkNamespace: unix.CLONE_NEWNET,
}

for _, ns := range context.StringSlice("empty-ns") {
f, exists := namespaceMapping[specs.LinuxNamespaceType(ns)]
if !exists {
return nil, fmt.Errorf("namespace %q is not supported", ns)
}
nsmask |= f
}
nsmask |= f
}

options.EmptyNs = uint32(nsmask)
return nil
opts.EmptyNs = uint32(nsmask)

return opts, nil
}
7 changes: 2 additions & 5 deletions libcontainer/container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -1560,11 +1560,8 @@ func (c *Container) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
return err
}

if cgroups.IsCgroup2UnifiedMode() {
return nil
}
// the stuff below is cgroupv1-specific

// TODO(@kolyshkin): should we use c.cgroupManager.GetPaths()
// instead of reading /proc/pid/cgroup?
path := fmt.Sprintf("/proc/%d/cgroup", pid)
cgroupsPaths, err := cgroups.ParseCgroupFile(path)
if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion man/runc-checkpoint.8.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ together with **criu lazy-pages**. See
: Do a pre-dump, i.e. dump container's memory information only, leaving the
container running. See [criu iterative migration](https://criu.org/Iterative_migration).

**--manage-cgroups-mode** **soft**|**full**|**strict**.
**--manage-cgroups-mode** **soft**|**full**|**strict**|**ignore**.
: Cgroups mode. Default is **soft**. See
[criu --manage-cgroups option](https://criu.org/CLI/opt/--manage-cgroups).

Expand Down
7 changes: 6 additions & 1 deletion man/runc-restore.8.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,15 @@ image files directory.
: Allow checkpoint/restore of file locks. See
[criu --file-locks option](https://criu.org/CLI/opt/--file-locks).

**--manage-cgroups-mode** **soft**|**full**|**strict**.
**--manage-cgroups-mode** **soft**|**full**|**strict**|**ignore**.
: Cgroups mode. Default is **soft**. See
[criu --manage-cgroups option](https://criu.org/CLI/opt/--manage-cgroups).

: In particular, to restore the container into a different cgroup,
**--manage-cgroups-mode ignore** must be used during both
**checkpoint** and **restore**, and the _container_id_ (or
**cgroupsPath** property in OCI config, if set) must be changed.

**--bundle**|**-b** _path_
: Path to the root of the bundle directory. Default is current directory.

Expand Down
30 changes: 1 addition & 29 deletions restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package main
import (
"os"

"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/userns"
"github.com/sirupsen/logrus"
"github.com/urfave/cli"
Expand Down Expand Up @@ -53,7 +52,7 @@ using the runc checkpoint command.`,
cli.StringFlag{
Name: "manage-cgroups-mode",
Value: "",
Usage: "cgroups mode: 'soft' (default), 'full' and 'strict'",
Usage: "cgroups mode: soft|full|strict|ignore (default: soft)",
},
cli.StringFlag{
Name: "bundle, b",
Expand Down Expand Up @@ -113,9 +112,6 @@ using the runc checkpoint command.`,
if err != nil {
return err
}
if err := setEmptyNsMask(context, options); err != nil {
return err
}
status, err := startContainer(context, CT_ACT_RESTORE, options)
if err != nil {
return err
Expand All @@ -126,27 +122,3 @@ using the runc checkpoint command.`,
return nil
},
}

func criuOptions(context *cli.Context) (*libcontainer.CriuOpts, error) {
imagePath, parentPath, err := prepareImagePaths(context)
if err != nil {
return nil, err
}

return &libcontainer.CriuOpts{
ImagesDirectory: imagePath,
WorkDirectory: context.String("work-path"),
ParentImage: parentPath,
LeaveRunning: context.Bool("leave-running"),
TcpEstablished: context.Bool("tcp-established"),
ExternalUnixConnections: context.Bool("ext-unix-sk"),
ShellJob: context.Bool("shell-job"),
FileLocks: context.Bool("file-locks"),
PreDump: context.Bool("pre-dump"),
AutoDedup: context.Bool("auto-dedup"),
LazyPages: context.Bool("lazy-pages"),
StatusFd: context.Int("status-fd"),
LsmProfile: context.String("lsm-profile"),
LsmMountContext: context.String("lsm-mount-context"),
}, nil
}
66 changes: 59 additions & 7 deletions tests/integration/checkpoint.bats
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,14 @@ function simple_cr() {
# TCP port for lazy migration
port=27277

__runc checkpoint --lazy-pages --page-server 0.0.0.0:${port} --status-fd ${lazy_w} --work-path ./work-dir --image-path ./image-dir test_busybox &
__runc checkpoint \
--lazy-pages \
--page-server 0.0.0.0:${port} \
--status-fd ${lazy_w} \
--manage-cgroups-mode=ignore \
--work-path ./work-dir \
--image-path ./image-dir \
test_busybox &
cpt_pid=$!

# wait for lazy page server to be ready
Expand All @@ -246,14 +253,18 @@ function simple_cr() {
lp_pid=$!

# Restore lazily from checkpoint.
# The restored container needs a different name (as well as systemd
# unit name, in case systemd cgroup driver is used) as the checkpointed
# container is not yet destroyed. It is only destroyed at that point
# in time when the last page is lazily transferred to the destination.
#
# The restored container needs a different name and a different cgroup
# (and a different systemd unit name, in case systemd cgroup driver is
# used) as the checkpointed container is not yet destroyed. It is only
# destroyed at that point in time when the last page is lazily
# transferred to the destination.
#
# Killing the CRIU on the checkpoint side will let the container
# continue to run if the migration failed at some point.
[ -v RUNC_USE_SYSTEMD ] && set_cgroups_path
runc_restore_with_pipes ./image-dir test_busybox_restore --lazy-pages
runc_restore_with_pipes ./image-dir test_busybox_restore \
--lazy-pages \
--manage-cgroups-mode=ignore

wait $cpt_pid

Expand Down Expand Up @@ -405,3 +416,44 @@ function simple_cr() {
# busybox should be back up and running
testcontainer test_busybox running
}

@test "checkpoint then restore into a different cgroup (via --manage-cgroups-mode ignore)" {
set_resources_limit
set_cgroups_path
runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
[ "$status" -eq 0 ]
testcontainer test_busybox running

local orig_path
orig_path=$(get_cgroup_path "pids")
# Check that the cgroup exists.
test -d "$orig_path"

runc checkpoint --work-path ./work-dir --manage-cgroups-mode ignore test_busybox
grep -B 5 Error ./work-dir/dump.log || true
[ "$status" -eq 0 ]
testcontainer test_busybox checkpointed
# Check that the cgroup is gone.
! test -d "$orig_path"

# Restore into a different cgroup.
set_cgroups_path # Changes the path.
runc restore -d --manage-cgroups-mode ignore --pid-file pid \
--work-path ./work-dir --console-socket "$CONSOLE_SOCKET" test_busybox
grep -B 5 Error ./work-dir/restore.log || true
[ "$status" -eq 0 ]
testcontainer test_busybox running

# Check that the old cgroup path doesn't exist.
! test -d "$orig_path"

# Check that the new path exists.
local new_path
new_path=$(get_cgroup_path "pids")
test -d "$new_path"

# Check that container's init is in the new cgroup.
local pid
pid=$(cat "pid")
grep -q "${REL_CGROUPS_PATH}$" "/proc/$pid/cgroup"
}
30 changes: 19 additions & 11 deletions tests/integration/helpers.bash
Original file line number Diff line number Diff line change
Expand Up @@ -232,19 +232,27 @@ function set_cgroups_path() {
update_config '.linux.cgroupsPath |= "'"${OCI_CGROUPS_PATH}"'"'
}

# Get a value from a cgroup file.
function get_cgroup_value() {
local source=$1
local cgroup var current

# Get a path to cgroup directory, based on controller name.
# Parameters:
# $1: controller name (like "pids") or a file name (like "pids.max").
function get_cgroup_path() {
if [ -v CGROUP_V2 ]; then
cgroup=$CGROUP_PATH
else
var=${source%%.*} # controller name (e.g. memory)
var=CGROUP_${var^^}_BASE_PATH # variable name (e.g. CGROUP_MEMORY_BASE_PATH)
eval cgroup=\$"${var}${REL_CGROUPS_PATH}"
echo "$CGROUP_PATH"
return
fi
cat "$cgroup/$source"

local var cgroup
var=${1%%.*} # controller name (e.g. memory)
var=CGROUP_${var^^}_BASE_PATH # variable name (e.g. CGROUP_MEMORY_BASE_PATH)
eval cgroup=\$"${var}${REL_CGROUPS_PATH}"
echo "$cgroup"
}

# Get a value from a cgroup file.
function get_cgroup_value() {
local cgroup
cgroup="$(get_cgroup_path "$1")"
cat "$cgroup/$1"
}

# Helper to check a if value in a cgroup file matches the expected one.
Expand Down

0 comments on commit 0147921

Please sign in to comment.