Skip to content

Commit

Permalink
Use current user's IDs when reading/writing files within mounted dire…
Browse files Browse the repository at this point in the history
…ctories in containers (#10)
  • Loading branch information
wraymo authored Oct 14, 2021
1 parent 134416a commit c042a37
Show file tree
Hide file tree
Showing 9 changed files with 48 additions and 53 deletions.
1 change: 0 additions & 1 deletion components/compression-job-handler/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
python-Levenshtein
PyYAML==5.4

setuptools~=45.2.0
msgpack~=1.0.2
zstandard~=0.15.2
mysql-connector-python==8.0.26
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from clp_py_utils.clp_config import CLPConfig

CONTAINER_CLP_INSTALL_PREFIX = '/opt'

def check_dependencies():
try:
Expand Down
7 changes: 4 additions & 3 deletions components/package-template/src/sbin/compress
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ if clp_home is None:
sys.exit(-1)
load_bundled_python_lib_path(clp_home)

from clp.package_utils import check_env
from clp.package_utils import check_env, CONTAINER_CLP_INSTALL_PREFIX
from clp_py_utils.core import read_yaml_config_file
from clp_py_utils.clp_package_config import CLPPackageConfig
from pydantic import ValidationError
Expand Down Expand Up @@ -99,9 +99,10 @@ def main(argv):
# TODO: check path and perform path conversion
docker_exec_cmd = [
'docker', 'exec',
'--workdir', '/root/clp',
'--workdir', f'{CONTAINER_CLP_INSTALL_PREFIX}/clp',
'-u', f'{os.getuid()}:{os.getgid()}',
clp_package_config.cluster_name,
'sbin/native/compress', '--config', f'/root/.{clp_package_config.cluster_name}.yaml'
'sbin/native/compress', '--config', f'{CONTAINER_CLP_INSTALL_PREFIX}/.{clp_package_config.cluster_name}.yaml'
]
for path in parsed_args.paths:
docker_exec_cmd.append(path)
Expand Down
15 changes: 8 additions & 7 deletions components/package-template/src/sbin/decompress
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ if clp_home is None:
sys.exit(-1)
load_bundled_python_lib_path(clp_home)

from clp.package_utils import check_env
from clp.package_utils import check_env, CONTAINER_CLP_INSTALL_PREFIX
from clp_py_utils.core import read_yaml_config_file
from clp_py_utils.clp_package_config import CLPPackageConfig
from clp_py_utils.clp_config import CLPConfig
Expand Down Expand Up @@ -120,7 +120,7 @@ def main(argv):
# Parse and validate config file
container_clp_config_file_name = f'.{clp_package_config.cluster_name}.yaml'
host_config_file_path = clp_home / container_clp_config_file_name
container_config_file_path = f'/root/{container_clp_config_file_name}'
container_config_file_path = f'{CONTAINER_CLP_INSTALL_PREFIX}/{container_clp_config_file_name}'

try:
clp_config = CLPConfig.parse_obj(read_yaml_config_file(host_config_file_path))
Expand All @@ -131,7 +131,7 @@ def main(argv):
log.error(ex)
return -1

docker_clp_home = pathlib.Path('/') / 'root' / 'clp'
docker_clp_home = pathlib.Path(CONTAINER_CLP_INSTALL_PREFIX) / 'clp'
docker_extraction_dir = pathlib.Path('/') / 'mnt' / '_extraction_dir_'

host_data_directory = clp_home / pathlib.Path(clp_config.data_directory).relative_to(docker_clp_home)
Expand All @@ -149,15 +149,16 @@ def main(argv):
'--hostname', container_name,
'--name', container_name,
'-v', f'{clp_home}:{docker_clp_home}',
'-v', f'{extraction_dir}:{docker_extraction_dir}'
'-v', f'{extraction_dir}:{docker_extraction_dir}',
'-u', f'{os.getuid()}:{os.getgid()}'
]
if not clp_config.data_directory.startswith('/root/clp'):
if not clp_config.data_directory.startswith(f'{CONTAINER_CLP_INSTALL_PREFIX}/clp'):
clp_execution_env_startup_cmd.append('-v')
clp_execution_env_startup_cmd.append(f'{host_data_directory}:{clp_config.data_directory}')
if not clp_config.logs_directory.startswith('/root/clp'):
if not clp_config.logs_directory.startswith(f'{CONTAINER_CLP_INSTALL_PREFIX}/clp'):
clp_execution_env_startup_cmd.append('-v')
clp_execution_env_startup_cmd.append(f'{host_log_directory}:{clp_config.logs_directory}')
if not clp_config.archive_output.directory.startswith('/root/clp'):
if not clp_config.archive_output.directory.startswith(f'{CONTAINER_CLP_INSTALL_PREFIX}/clp'):
clp_execution_env_startup_cmd.append('-v')
clp_execution_env_startup_cmd.append(
f'{host_archive_out_directory}:{clp_config.archive_output.directory}')
Expand Down
7 changes: 4 additions & 3 deletions components/package-template/src/sbin/search
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ if clp_home is None:
sys.exit(-1)
load_bundled_python_lib_path(clp_home)

from clp.package_utils import check_env
from clp.package_utils import check_env, CONTAINER_CLP_INSTALL_PREFIX
from clp_py_utils.core import read_yaml_config_file
from clp_py_utils.clp_package_config import CLPPackageConfig
from pydantic import ValidationError
Expand Down Expand Up @@ -98,9 +98,10 @@ def main(argv):

docker_exec_cmd = [
'docker', 'exec',
'--workdir', '/root/clp',
'--workdir', f'{CONTAINER_CLP_INSTALL_PREFIX}/clp',
'-u', f'{os.getuid()}:{os.getgid()}',
clp_cluster_name,
'sbin/native/search', '--config', f'/root/.{clp_cluster_name}.yaml',
'sbin/native/search', '--config', f'{CONTAINER_CLP_INSTALL_PREFIX}/.{clp_cluster_name}.yaml',
parsed_args.wildcard_query
]
if parsed_args.file_path:
Expand Down
40 changes: 23 additions & 17 deletions components/package-template/src/sbin/start-clp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ if clp_home is None:
sys.exit(-1)
load_bundled_python_lib_path(clp_home)

from clp.package_utils import prepare_package_and_config, check_dependencies
from clp.package_utils import prepare_package_and_config, check_dependencies, CONTAINER_CLP_INSTALL_PREFIX
from clp_py_utils.core import read_yaml_config_file
from clp_py_utils.clp_package_config import CLPPackageConfig, ArchiveOutput as PackageArchiveOutput
from clp_py_utils.clp_config import Database, ArchiveOutput, CLPConfig, Scheduler
Expand Down Expand Up @@ -87,7 +87,8 @@ def start_sql_db(cluster_name: str, clp_config: CLPConfig, host_data_directory:
'-e', f'MYSQL_ROOT_PASSWORD={clp_config.database.password}',
'-e', f'MYSQL_USER={clp_config.database.username}',
'-e', f'MYSQL_PASSWORD={clp_config.database.password}',
'-e', f'MYSQL_DATABASE=initial_database'
'-e', f'MYSQL_DATABASE=initial_database',
'-u', f'{os.getuid()}:{os.getgid()}'
]
if publish_ports:
database_startup_cmd.append('-p')
Expand All @@ -108,14 +109,16 @@ def create_sql_db_tables(cluster_name: str, container_config_file_path: str):
# Initialize database tables
log.info('Initializing scheduler database tables')
database_table_creation_commands = [
['python3', '/root/clp/lib/python3/site-packages/clp_py_utils/initialize-clp-metadata-db.py',
['python3', f'{CONTAINER_CLP_INSTALL_PREFIX}/clp/lib/python3/site-packages/clp_py_utils/initialize-clp-metadata-db.py',
'--config', container_config_file_path],
['python3', '/root/clp/lib/python3/site-packages/clp_py_utils/initialize-orchestration-db.py',
['python3', f'{CONTAINER_CLP_INSTALL_PREFIX}/clp/lib/python3/site-packages/clp_py_utils/initialize-orchestration-db.py',
'--config', container_config_file_path]
]
for command in database_table_creation_commands:
docker_exec_cmd = ['docker', 'exec', '-it',
'-e', 'PYTHONPATH=/root/clp/lib/python3/site-packages', cluster_name]
'-e', f'PYTHONPATH={CONTAINER_CLP_INSTALL_PREFIX}/clp/lib/python3/site-packages',
'-u', f'{os.getuid()}:{os.getgid()}',
cluster_name]
docker_exec_cmd += command
log.debug(' '.join(docker_exec_cmd))
max_attempts = 20
Expand Down Expand Up @@ -172,10 +175,11 @@ def start_scheduler(cluster_name: str, clp_config: CLPConfig, container_config_f
'--config', container_config_file_path]
log.info('Starting scheduler service')
docker_exec_cmd = [
'docker', 'exec', '--detach', '--workdir', '/root/clp',
'-e', 'PYTHONPATH=/root/clp/lib/python3/site-packages',
'docker', 'exec', '--detach', '--workdir', f'{CONTAINER_CLP_INSTALL_PREFIX}/clp',
'-e', f'PYTHONPATH={CONTAINER_CLP_INSTALL_PREFIX}/clp/lib/python3/site-packages',
'-e', f'BROKER_URL=amqp://{clp_config.scheduler.username}:{clp_config.scheduler.password}@localhost:5672',
# rabbitmq runs on scheduler node
'-u', f'{os.getuid()}:{os.getgid()}',
cluster_name
]
docker_exec_cmd += scheduler_startup_cmd
Expand All @@ -188,22 +192,24 @@ def start_scheduler(cluster_name: str, clp_config: CLPConfig, container_config_f


def start_worker(cluster_name: str, clp_config: CLPConfig, num_cpus: int):
worker_startup_cmd = ['/root/clp/bin/celery', '-A', 'job_orchestration.executor', 'worker',
worker_startup_cmd = [f'{CONTAINER_CLP_INSTALL_PREFIX}/clp/lib/python3/site-packages/bin/celery',
'-A', 'job_orchestration.executor', 'worker',
'--concurrency', str(num_cpus),
'--loglevel', 'WARNING',
'-Q', 'compression']
log.info("Starting CLP worker")
docker_exec_cmd = [
'docker', 'exec', '--detach',
'--workdir', '/root/clp',
'-e', 'CLP_HOME=/root/clp',
'--workdir', f'{CONTAINER_CLP_INSTALL_PREFIX}/clp',
'-e', f'CLP_HOME={CONTAINER_CLP_INSTALL_PREFIX}/clp',
'-e', f'CLP_DATA_DIR={clp_config.data_directory}',
'-e', f'CLP_LOGS_DIR={clp_config.logs_directory}',
'-e', 'PYTHONPATH=/root/clp/lib/python3/site-packages',
'-e', f'PYTHONPATH={CONTAINER_CLP_INSTALL_PREFIX}/clp/lib/python3/site-packages',
'-e', f'BROKER_URL=amqp://{clp_config.scheduler.username}:{clp_config.scheduler.password}'
f'@{clp_config.scheduler.host}:5672',
'-e', f'RESULT_BACKEND=rpc://{clp_config.scheduler.username}:{clp_config.scheduler.password}'
f'@{clp_config.scheduler.host}:5672',
'-u', f'{os.getuid()}:{os.getgid()}',
cluster_name
]
docker_exec_cmd += worker_startup_cmd
Expand Down Expand Up @@ -340,10 +346,10 @@ def main(argv):
# Create temporary clp config file which we mount into the container
# Prepare package and initialize all required directories if necessary
# Note: config file is also updated with absolute path
docker_clp_home = pathlib.Path('/') / 'root' / 'clp'
docker_clp_home = pathlib.Path(CONTAINER_CLP_INSTALL_PREFIX) / 'clp'
container_clp_config_file_name = f'.{clp_package_config.cluster_name}.yaml'
host_config_file_path = clp_home / container_clp_config_file_name
container_config_file_path = f'/root/{container_clp_config_file_name}'
container_config_file_path = f'{CONTAINER_CLP_INSTALL_PREFIX}/{container_clp_config_file_name}'

# Persist config file used for container
if not host_config_file_path.exists() and need_to_start_scheduler:
Expand Down Expand Up @@ -378,7 +384,7 @@ def main(argv):
'--network', clp_package_config.cluster_name,
'--hostname', f'{clp_package_config.cluster_name}',
'--name', f'{clp_package_config.cluster_name}',
'-v', f'{clp_home}:/root/clp',
'-v', f'{clp_home}:{CONTAINER_CLP_INSTALL_PREFIX}/clp',
'-v', f'{uncompressed_log_dir}:{uncompressed_log_dir}'
]
if parsed_args.publish_ports:
Expand All @@ -388,13 +394,13 @@ def main(argv):
clp_execution_env_startup_cmd += ports_to_publish

# Mount data, logs, archive output directory if it is outside of the package
if not clp_config.data_directory.startswith('/root/clp'):
if not clp_config.data_directory.startswith(f'{CONTAINER_CLP_INSTALL_PREFIX}/clp'):
clp_execution_env_startup_cmd.append('-v')
clp_execution_env_startup_cmd.append(f'{host_data_directory}:{clp_config.data_directory}')
if not clp_config.logs_directory.startswith('/root/clp'):
if not clp_config.logs_directory.startswith(f'{CONTAINER_CLP_INSTALL_PREFIX}/clp'):
clp_execution_env_startup_cmd.append('-v')
clp_execution_env_startup_cmd.append(f'{host_log_directory}:{clp_config.logs_directory}')
if not clp_config.archive_output.directory.startswith('/root/clp'):
if not clp_config.archive_output.directory.startswith(f'{CONTAINER_CLP_INSTALL_PREFIX}/clp'):
clp_execution_env_startup_cmd.append('-v')
clp_execution_env_startup_cmd.append(f'{host_archive_out_directory}:{clp_config.archive_output.directory}')
clp_execution_env_startup_cmd.append(clp_execution_env_container)
Expand Down
19 changes: 7 additions & 12 deletions tools/packager/build-clp-package.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ def main(argv):
return -1

host_working_dir.mkdir(parents=True, exist_ok=True)
pip_cache_directory = pathlib.PurePath('/tmp')
container_working_directory = pathlib.PurePath('/tmp/out')
versioned_artifact_name = f'{packaging_config.artifact_name}-{packaging_config.arch}-v{packaging_config.version}'
artifact_dir = (host_working_dir / versioned_artifact_name).resolve()
Expand Down Expand Up @@ -209,34 +210,28 @@ def main(argv):
container_exec_prefix = [
'docker', 'exec', '-it',
'-e', f'WORKING_DIR={container_working_directory}',
'-e', f'CACHE_DIR={pip_cache_directory}',
'-e', f'ARTIFACT_NAME={versioned_artifact_name}',
'-e', f'BUILD_PARALLELISM={build_parallelization}',
'-w', str(container_working_directory),
'-u', f'{os.getuid()}:{os.getgid()}',
build_environment_container_name
]

# Run the component installation scripts
install_cmds = [
[str(container_install_scripts_dir / 'install-celery.sh')],
[str(container_install_scripts_dir / 'install-python-component.sh'), 'job-orchestration'],
[str(container_install_scripts_dir / 'install-python-component.sh'), 'clp-py-utils'],
[str(container_install_scripts_dir / 'install-python-component.sh'), 'compression-job-handler'],
[str(container_install_scripts_dir / 'install-python-component.sh'), 'job-orchestration'],
[str(container_install_scripts_dir / 'install-core.sh')],
[str(container_install_scripts_dir / 'install-core.sh')]
]
for cmd in install_cmds:
container_exec_cmd = container_exec_prefix + cmd
log.info(' '.join(container_exec_cmd))
subprocess.run(container_exec_cmd, check=True)

# Set current user as owner of built files and build tar
cmds = [
f'chown -R {os.getuid()}:{os.getgid()} {container_working_directory}',
f'tar -czf {versioned_artifact_name}.tar.gz {versioned_artifact_name}',
f'chown -R {os.getuid()}:{os.getgid()} {versioned_artifact_name}.tar.gz'
]
for cmd in cmds:
container_exec_cmd = container_exec_prefix + cmd.split()
subprocess.run(container_exec_cmd, check=True)
archive_cmd = f'tar -czf {versioned_artifact_name}.tar.gz {versioned_artifact_name}'
subprocess.run(container_exec_prefix + archive_cmd.split(), check=True)
except subprocess.CalledProcessError as ex:
print(ex.stdout)
log.error('Failed to build CLP')
Expand Down
9 changes: 0 additions & 9 deletions tools/packager/install-scripts/install-celery.sh

This file was deleted.

2 changes: 1 addition & 1 deletion tools/packager/install-scripts/install-python-component.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ echo "Installing ${component_name}"

cd ${WORKING_DIR}/${component_name}

xargs --max-args=1 --max-procs=16 \
PIP_CACHE_DIR=${CACHE_DIR} xargs --max-args=1 --max-procs=16 \
pip install --target ${WORKING_DIR}/${ARTIFACT_NAME}/lib/python3/site-packages < requirements.txt

cp -R ${python_package_name} ${WORKING_DIR}/${ARTIFACT_NAME}/lib/python3/site-packages

0 comments on commit c042a37

Please sign in to comment.