Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

2820 fix hanging child process #2824

Merged
merged 13 commits into from
Jan 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,8 @@ Changelog](https://keepachangelog.com/en/1.0.0/).
- Automatic jumping to the bottom in the telemetry overview windows. #1850
- 2-second delay when the Island server starts, and it's not running on AWS. #1636
- Malformed MSSQL agent launch command. #2018
- A bug where the Log4Shell exploiter that could leave LDAP servers and child
processes running. #2820

### Security
- Log files are created with random names and secure permissions. #1761, #2775
Expand Down
28 changes: 24 additions & 4 deletions monkey/infection_monkey/exploit/log4shell_utils/ldap_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,14 @@
import threading
import time
from pathlib import Path
from typing import Optional

from ldaptor.protocols.ldap.ldapserver import LDAPServer
from twisted.internet.protocol import ServerFactory

from common.common_consts.timeouts import LONG_REQUEST_TIMEOUT
from common.utils.code_utils import insecure_generate_random_string

# WARNING: It was observed that this LDAP server would raise an exception and fail to start if
# multiple Python threads attempt to start multiple LDAP servers simultaneously. It was
# thought that since each LDAP server is started in its own process, there would be no
Expand Down Expand Up @@ -90,7 +94,12 @@ def __init__(
:param storage_dir: A directory where the LDAP server can safely store files it needs during
runtime.
"""
self._reactor_startup_completed = multiprocessing.Event()

# Unlike a forked process, spawned process does not inherit threads or unnecessary file
# descriptors from the parent process. Therefore we use a spawn context for new processes,
# so we carry over as little baggage as possible from the parent process.
self._context = multiprocessing.get_context("spawn")
self._reactor_startup_completed = self._context.Event()
self._ldap_server_port = ldap_server_port
self._http_server_ip = http_server_ip
self._http_server_port = http_server_port
Expand All @@ -110,14 +119,18 @@ def run(self):
# has been stopped. To work around this, the reactor is configured and run in a separate
# process. This allows us to run multiple LDAP servers sequentially or simultaneously and
# stop each one when we're done with it.
self._server_process = multiprocessing.Process(
target=self._run_twisted_reactor, daemon=True
self._server_process = self._context.Process(
name=f"{threading.current_thread()}-LDAPServer-{insecure_generate_random_string(n=8)}",
target=self._run_twisted_reactor,
daemon=True,
)

self._server_process.start()
reactor_running = self._reactor_startup_completed.wait(REACTOR_START_TIMEOUT_SEC)

if not reactor_running:
logger.error("The LDAP server failed to start, stopping the server process...")
self.stop(timeout=LONG_REQUEST_TIMEOUT)
raise LDAPServerStartError("An unknown error prevented the LDAP server from starting")

logger.debug("The LDAP exploit server has successfully started")
Expand All @@ -134,6 +147,8 @@ def _run_twisted_reactor(self):
# successfully started.
threading.Thread(target=self._check_if_reactor_startup_completed, daemon=True).start()
reactor.run()
logger.debug("Control returned from twisted to LDAPExploitServer")
logger.debug("Exiting twisted process")

def _check_if_reactor_startup_completed(self):
from twisted.internet import reactor
Expand Down Expand Up @@ -178,7 +193,7 @@ def _output_twisted_logs_to_python_logger():
log_observer = log.PythonLoggingObserver()
log_observer.start()

def stop(self, timeout: float = None):
def stop(self, timeout: Optional[float] = None):
"""
Stops the LDAP server.

Expand All @@ -187,6 +202,9 @@ def stop(self, timeout: float = None):
terminates. If `timeout` is a positive floating point number, this method
blocks for at most `timeout` seconds.
"""
if self._server_process is None:
return

if self._server_process.is_alive():
logger.debug("Stopping LDAP exploit server")

Expand All @@ -197,5 +215,7 @@ def stop(self, timeout: float = None):

if self._server_process.is_alive():
logger.warning("Timed out while waiting for the LDAP exploit server to stop")
logger.warning("Forcefully killing the LDAP server process")
self._server_process.kill()
else:
logger.debug("Successfully stopped the LDAP exploit server")
1 change: 1 addition & 0 deletions monkey/infection_monkey/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def main():
except Exception as err:
logger.exception(f"An unexpected error occurred while running the agent: {err}")
finally:
logger.debug("Stopping the queue listener")
queue_listener.stop()


Expand Down
11 changes: 8 additions & 3 deletions monkey/infection_monkey/monkey.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import argparse
import contextlib
import logging
import multiprocessing
import os
Expand Down Expand Up @@ -103,11 +102,14 @@ def __init__(self, args, ipc_logger_queue: multiprocessing.Queue, log_path: Path
logger.info(f"Agent ID: {self._agent_id}")
logger.info(f"Process ID: {os.getpid()}")

# Spawn the manager before the acquiring the singleton in case the file handle gets copied
# over to the manager process
self._manager = multiprocessing.get_context("spawn").Manager()

self._singleton = SystemSingleton()
self._opts = self._get_arguments(args)

self._ipc_logger_queue = ipc_logger_queue
self._manager = multiprocessing.get_context("spawn").Manager()

self._agent_event_forwarder = None
self._agent_event_queue = self._setup_agent_event_queue()
Expand Down Expand Up @@ -454,8 +456,11 @@ def cleanup(self):
self._plugin_event_forwarder.stop()
self._agent_event_forwarder.stop()
self._delete_plugin_dir()
with contextlib.suppress(AssertionError):
self._manager.shutdown()
try:
self._singleton.unlock()
except AssertionError as err:
logger.warning(f"Failed to release the singleton: {err}")

logger.info("Agent is shutting down")

Expand Down