Skip to content

Commit

Permalink
libct/capabilities: create capabilities map based on current environment
Browse files Browse the repository at this point in the history
Commit 5fb831a changed the behavior of runc
to match the OCI runtime spec, which now describes that unknown or unavailable
capabilities should be ignored.

While this change addressed situations where a capability was requested that's
not supported by the current kernel ("unknown capabilities"), it did not take
into account situations where the kernel *supports* a capability, but that
capability is not *available* in the current environment.

This causes issues if, for example, runc is running in a docker-in-docker setup,
and the outer container does not have all known capabilities enabled, either
on purpose (for example, Talos version 0.13 drops two capabilities (kexec + module
loading) from all processes but PID 1), or because the outer container was created
by an older version of docker or runc, which did not yet support newer capabilities.

This patch attempts to address this problem by limiting the list of "known" capa-
bilities on the set of effective capabilties for the current process. This code
is based on the code in containerd's "caps" package, with some modifications:

- the full list of capabilities uses github.com/syndtr/gocapability, instead of
  a self-defined list. Containerd removed the use of github.com/syndtr/gocapability,
  but this dependency is still in use in runc, so this change makes it a closer
  match to the current code.
- functions where un-exported, as we don't intend them to be used externally.
- a sync.Once was added to the .current() function, so that /proc/self/status
  is only parsed once. This assumes effective capabilities do not change during
  runc's lifecycle.

There are some things left to be looked at:

1. current() may return an error when failing to parse /proc/self/status, but this
   error is currently ignored. If an error occurs in this code, it will mean that
   *no* capabilities are known. While this will be logged as warning when attempting
   to apply capabilities, it's not a very desirable situation. We'll have to decide
   what to do in that situation, which could be "panic" (runc unable to run success-
   fully), or "fall back to a safe/default list".
2. the current code applies the same list (effective caps) to every "type" (ambient,
   inheritable, bounding, ...). When applying capabilities, should each of those
   types in the container's spec be limited to the _corresponding_ type in the
   current processes' capabilities?
3. integration test: we may want an integration test for this.
4. do we want to upstream this functionality to github.com/syndtr/gocapability ?

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
  • Loading branch information
thaJeztah committed Oct 8, 2021
1 parent 3a5223d commit f30ff03
Show file tree
Hide file tree
Showing 2 changed files with 108 additions and 14 deletions.
17 changes: 3 additions & 14 deletions libcontainer/capabilities/capabilities.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ package capabilities

import (
"sort"
"strings"

"github.com/opencontainers/runc/libcontainer/configs"
"github.com/sirupsen/logrus"
Expand All @@ -15,8 +14,7 @@ import (
const allCapabilityTypes = capability.CAPS | capability.BOUNDING | capability.AMBIENT

var (
capabilityMap map[string]capability.Cap
capTypes = []capability.CapType{
capTypes = []capability.CapType{
capability.BOUNDING,
capability.PERMITTED,
capability.INHERITABLE,
Expand All @@ -25,16 +23,6 @@ var (
}
)

func init() {
capabilityMap = make(map[string]capability.Cap, capability.CAP_LAST_CAP+1)
for _, c := range capability.List() {
if c > capability.CAP_LAST_CAP {
continue
}
capabilityMap["CAP_"+strings.ToUpper(c.String())] = c
}
}

// New creates a new Caps from the given Capabilities config. Unknown Capabilities
// or Capabilities that are unavailable in the current environment are ignored,
// printing a warning instead.
Expand Down Expand Up @@ -69,6 +57,7 @@ func New(capConfig *configs.Capabilities) (*Caps, error) {
// are not returned, but appended to unknownCaps.
func capSlice(caps []string, unknownCaps map[string]struct{}) []capability.Cap {
var out []capability.Cap
capabilityMap, _ := current()
for _, c := range caps {
if v, ok := capabilityMap[c]; !ok {
unknownCaps[c] = struct{}{}
Expand Down Expand Up @@ -102,7 +91,7 @@ func (c *Caps) ApplyBoundingSet() error {
return c.pid.Apply(capability.BOUNDING)
}

// Apply sets all the capabilities for the current process in the config.
// ApplyCaps sets all the capabilities for the current process in the config.
func (c *Caps) ApplyCaps() error {
c.pid.Clear(allCapabilityTypes)
for _, g := range capTypes {
Expand Down
105 changes: 105 additions & 0 deletions libcontainer/capabilities/capabilities_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package capabilities

import (
"bufio"
"fmt"
"io"
"os"
"strconv"
"strings"
"sync"

"github.com/syndtr/gocapability/capability"
)

// fromBitmap parses an uint64 bitmap into a capability map. Unknown cap numbers
// are ignored.
func fromBitmap(v uint64) map[string]capability.Cap {
var res map[string]capability.Cap
for i := 0; i <= 63; i++ {
if b := (v >> i) & 0x1; b == 0x1 {
c := capability.Cap(i)
if s := c.String(); s != "unknown" {
res["CAP_"+strings.ToUpper(s)] = c
}
}
}
return res
}

// parseProcPIDStatus returns uint64 bitmap value from /proc/<PID>/status file
func parseProcPIDStatus(r io.Reader) (map[capability.CapType]uint64, error) {
res := make(map[capability.CapType]uint64)
scanner := bufio.NewScanner(r)
for scanner.Scan() {
line := scanner.Text()
pair := strings.SplitN(line, ":", 2)
if len(pair) != 2 {
continue
}
k := strings.TrimSpace(pair[0])
v := strings.TrimSpace(pair[1])
switch k {
case "CapInh", "CapPrm", "CapEff", "CapBnd", "CapAmb":
ui64, err := strconv.ParseUint(v, 16, 64)
if err != nil {
return nil, fmt.Errorf("failed to parse line %q", line)
}
switch k {
case "CapInh":
res[capability.INHERITABLE] = ui64
case "CapPrm":
res[capability.PERMITTED] = ui64
case "CapEff":
res[capability.EFFECTIVE] = ui64
case "CapBnd":
res[capability.BOUNDING] = ui64
case "CapAmb":
res[capability.AMBIENT] = ui64
}
}
}
if err := scanner.Err(); err != nil {
return nil, err
}
return res, nil
}

var (
curCaps map[string]capability.Cap
curCapsErr error
curCapsOnce sync.Once
)

// current returns a map of the effective known caps of the current process.
func current() (map[string]capability.Cap, error) {
curCapsOnce.Do(func() {
f, curCapsErr := os.Open("/proc/self/status")
if curCapsErr != nil {
return
}
defer f.Close()
caps, curCapsErr := parseProcPIDStatus(f)
if curCapsErr != nil {
return
}
curCaps = fromBitmap(caps[capability.EFFECTIVE])
})
return curCaps, curCapsErr
}

0 comments on commit f30ff03

Please sign in to comment.