Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix race checking for process exit and waiting for exec fifo #2185

Merged
merged 2 commits into from
Dec 26, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 43 additions & 40 deletions libcontainer/container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -265,22 +265,24 @@ func (c *linuxContainer) Exec() error {

func (c *linuxContainer) exec() error {
path := filepath.Join(c.root, execFifoFilename)

fifoOpen := make(chan struct{})
select {
case <-awaitProcessExit(c.initProcess.pid(), fifoOpen):
return errors.New("container process is already dead")
case result := <-awaitFifoOpen(path):
close(fifoOpen)
if result.err != nil {
return result.err
}
f := result.file
defer f.Close()
if err := readFromExecFifo(f); err != nil {
return err
pid := c.initProcess.pid()
blockingFifoOpenCh := awaitFifoOpen(path)
for {
select {
case result := <-blockingFifoOpenCh:
return handleFifoResult(result)

case <-time.After(time.Millisecond * 100):
stat, err := system.Stat(pid)
if err != nil || stat.State == system.Zombie {
// could be because process started, ran, and completed between our 100ms timeout and our system.Stat() check.
// see if the fifo exists and has data (with a non-blocking open, which will succeed if the writing process is complete).
if err := handleFifoResult(fifoOpen(path, false)); err != nil {
return errors.New("container process is already dead")
}
return nil
}
}
return os.Remove(path)
}
}

Expand All @@ -295,38 +297,39 @@ func readFromExecFifo(execFifo io.Reader) error {
return nil
}

func awaitProcessExit(pid int, exit <-chan struct{}) <-chan struct{} {
isDead := make(chan struct{})
go func() {
for {
select {
case <-exit:
return
case <-time.After(time.Millisecond * 100):
stat, err := system.Stat(pid)
if err != nil || stat.State == system.Zombie {
close(isDead)
return
}
}
}
}()
return isDead
}

func awaitFifoOpen(path string) <-chan openResult {
fifoOpened := make(chan openResult)
go func() {
f, err := os.OpenFile(path, os.O_RDONLY, 0)
if err != nil {
fifoOpened <- openResult{err: newSystemErrorWithCause(err, "open exec fifo for reading")}
return
}
fifoOpened <- openResult{file: f}
result := fifoOpen(path, true)
fifoOpened <- result
}()
return fifoOpened
}

func fifoOpen(path string, block bool) openResult {
flags := os.O_RDONLY
if !block {
flags |= syscall.O_NONBLOCK
}
f, err := os.OpenFile(path, flags, 0)
if err != nil {
return openResult{err: newSystemErrorWithCause(err, "open exec fifo for reading")}
}
return openResult{file: f}
}

func handleFifoResult(result openResult) error {
if result.err != nil {
return result.err
}
f := result.file
defer f.Close()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if this deferred close would function correctly w.r.t. the os.Remove call a few lines down.
On windows, we cannot rename/delete/move a file with open file handles.

Should the code be refactored so that this works on all platforms ?

Thanks

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is moving existing code that was previously on line 279. It is also explicitly in a file that it only runs on Linux.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should have placed my previous comment on the original code.

if err := readFromExecFifo(f); err != nil {
return err
}
return os.Remove(f.Name())
}

type openResult struct {
file *os.File
err error
Expand Down
12 changes: 10 additions & 2 deletions tests/integration/helpers.bash
Original file line number Diff line number Diff line change
Expand Up @@ -218,12 +218,16 @@ function wait_for_container() {
local attempts=$1
local delay=$2
local cid=$3
# optionally wait for a specific status
local wait_for_status="${4:-}"
local i

for ((i = 0; i < attempts; i++)); do
runc state $cid
if [[ "$status" -eq 0 ]]; then
return 0
if [[ "${output}" == *"${wait_for_status}"* ]]; then
return 0
fi
fi
sleep $delay
done
Expand All @@ -237,12 +241,16 @@ function wait_for_container_inroot() {
local attempts=$1
local delay=$2
local cid=$3
# optionally wait for a specific status
local wait_for_status="${4:-}"
local i

for ((i = 0; i < attempts; i++)); do
ROOT=$4 runc state $cid
if [[ "$status" -eq 0 ]]; then
return 0
if [[ "${output}" == *"${wait_for_status}"* ]]; then
return 0
fi
fi
sleep $delay
done
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/tty.bats
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ EOF
__runc run test_busybox
) &

wait_for_container 15 1 test_busybox
wait_for_container 15 1 test_busybox running
testcontainer test_busybox running

# Kill the container.
Expand Down