Skip to content

Commit

Permalink
fix pexpect child shutdown race (#1331) (#1336)
Browse files Browse the repository at this point in the history
* ensure that any stdout data produced by the child after the last `expect()` call is consumed

Co-authored-by: Matt Davis <6775756+nitzmahone@users.noreply.github.com>
  • Loading branch information
TheRealHaoLiu and nitzmahone authored Feb 13, 2024
1 parent c311e81 commit c129775
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 6 deletions.
13 changes: 11 additions & 2 deletions ansible_runner/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,10 +294,11 @@ def run(self):
child.logfile_read = stdout_handle
except pexpect.exceptions.ExceptionPexpect as e:
child = collections.namedtuple(
'MissingProcess', 'exitstatus isalive close'
'MissingProcess', 'exitstatus isalive expect close'
)(
exitstatus=127,
isalive=lambda: False,
expect=lambda *args, **kwargs: None,
close=lambda: None,
)

Expand Down Expand Up @@ -339,9 +340,17 @@ def _decode(x):
Runner.handle_termination(child.pid, is_cancel=False)
self.timed_out = True

# fix for https://github.com/ansible/ansible-runner/issues/1330
# Since we're (ab)using pexpect's logging callback as our source of stdout data, we need to pump the stream one last
# time, in case any new output was written by the child between the last return from expect and its termination. Ideally
# this would have an arbitrarily large timeout value as well, in case a ridiculous amount of data was written, but just
# invoking one last pump should cover the vast majority of real-world cases.
child.expect(pexpect.EOF, timeout=5)

# close the child to ensure no more output will be written before we close the stream interposers
child.close()
stdout_handle.close()
stderr_handle.close()
child.close()
self.rc = child.exitstatus if not (self.timed_out or self.canceled) else 254

if self.canceled:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# part of the regression test for https://github.com/ansible/ansible-runner/issues/1330

- hosts: localhost
gather_facts: no
tasks:
# sleep significantly longer than the configured pexpect timeout; the cancel callback will inject
# additional delay before the next process status sampling interval that can cause further output to be lost;
# if all is well, we'll do another loop over the child output until it's all been consumed...
- raw: sleep 2
22 changes: 18 additions & 4 deletions test/integration/test_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,16 @@
import json
import os
import re
import pytest
import six
import sys
import time

from ansible_runner import Runner
from test.utils.common import iterate_timeout

from ansible_runner.exceptions import AnsibleRunnerException
import pytest

from test.utils.common import iterate_timeout
from ansible_runner import Runner, run
from ansible_runner.exceptions import AnsibleRunnerException


@pytest.mark.xfail(reason='Test is unstable')
Expand Down Expand Up @@ -281,3 +282,16 @@ def test_set_extra_vars(rc):
with open(os.path.join(rc.artifact_dir, 'stdout')) as f:
if 'hello there' in f.read():
break


# regression test for https://github.com/ansible/ansible-runner/issues/1330
def test_pexpect_timeout(project_fixtures):
r = run(
private_data_dir=str(project_fixtures / 'pexpect_timeout_data_loss'),
playbook='pb.yml',
settings={"pexpect_timeout": 0.1}, # set the pexpect timeout very low
cancel_callback=lambda: time.sleep(3) or False, # induce enough delay in the child polling loop that the child will exit before being polled again
)

# ensure we got playbook_on_stats; if pexpect ate it, we won't...
assert any(ev for ev in r.events if ev.get('event', None) == 'playbook_on_stats')

0 comments on commit c129775

Please sign in to comment.