diff --git a/components/clp-py-utils/clp_py_utils/clp_config.py b/components/clp-py-utils/clp_py_utils/clp_config.py index 146df7db0..66eb5e19e 100644 --- a/components/clp-py-utils/clp_py_utils/clp_config.py +++ b/components/clp-py-utils/clp_py_utils/clp_config.py @@ -1,47 +1,59 @@ +import pathlib import typing from pydantic import BaseModel, validator -from clp_py_utils.pretty_size import pretty_size +from .core import get_config_value, make_config_path_absolute, read_yaml_config_file, validate_path_could_be_dir + +# Constants +CLP_DEFAULT_CREDENTIALS_FILE_PATH = pathlib.Path('etc') / 'credentials.yml' class Database(BaseModel): - type: str - host: str - port: int - username: str - password: str - name: str + type: str = 'mariadb' + host: str = 'localhost' + port: int = 3306 + name: str = 'clp-db' ssl_cert: typing.Optional[str] = None auto_commit: bool = False compress: bool = True + username: typing.Optional[str] = None + password: typing.Optional[str] = None + @validator('type') def validate_database_type(cls, field): - supported_database_type = ['mysql', 'mariadb', 'bundled'] - if field not in supported_database_type: - raise ValueError(f'must be one of the following {"|".join(supported_database_type)}') + supported_database_types = ['mysql', 'mariadb'] + if field not in supported_database_types: + raise ValueError(f"database.type must be one of the following {'|'.join(supported_database_types)}") return field - def get_mysql_connection_params(self): - # Currently, mysql's connector parameter is the same as mariadb - connection_params = { - 'host': self.host, - 'port': self.port, - 'user': self.username, - 'password': self.password, - 'database': self.name, - 'compress': self.compress, - 'autocommit': self.auto_commit - } - if self.ssl_cert: - connection_params['ssl_cert'] = self.ssl_cert - return connection_params + @validator('name') + def validate_database_name(cls, field): + if '' == field: + raise ValueError("database.name cannot be empty.") + return field - def get_mariadb_connection_params(self): - # Currently, mysql's connector parameter is the same as mysql + @validator('host') + def validate_database_host(cls, field): + if '' == field: + raise ValueError("database.host cannot be empty.") + return field + + def ensure_credentials_loaded(self): + if self.username is None or self.password is None: + raise ValueError("Credentials not loaded.") + + def get_mysql_connection_params(self, disable_localhost_socket_connection: bool = False): + self.ensure_credentials_loaded() + + host = self.host + if disable_localhost_socket_connection and 'localhost' == self.host: + host = '127.0.0.1' + + # Currently, mysql's connection parameters are the same as mariadb connection_params = { - 'host': self.host, + 'host': host, 'port': self.port, 'user': self.username, 'password': self.password, @@ -53,10 +65,17 @@ def get_mariadb_connection_params(self): connection_params['ssl_cert'] = self.ssl_cert return connection_params - def get_clp_connection_params_and_type(self): + def get_clp_connection_params_and_type(self, disable_localhost_socket_connection: bool = False): + self.ensure_credentials_loaded() + + host = self.host + if disable_localhost_socket_connection and 'localhost' == self.host: + host = '127.0.0.1' + connection_params_and_type = { - 'type': 'mysql', # hard code this as mysql as CLP only support "mysql" for global database - 'host': self.host, + # NOTE: clp-core does not distinguish between mysql and mariadb + 'type': 'mysql', + 'host': host, 'port': self.port, 'username': self.username, 'password': self.password, @@ -71,114 +90,132 @@ def get_clp_connection_params_and_type(self): class Scheduler(BaseModel): - host: str - jobs_poll_delay: int + jobs_poll_delay: int = 1 # seconds -class SchedulerQueue(BaseModel): - host: str - port: int - username: str - password: str +class Queue(BaseModel): + host: str = 'localhost' + port: int = 5672 + username: typing.Optional[str] + password: typing.Optional[str] -class ArchiveOutput(BaseModel): - type: str # Support only 'fs' type for now - directory: str - target_archive_size: int - target_dictionaries_size: int - target_encoded_file_size: int - target_segment_size: int - @validator('type') - def validate_type(cls, field): - if 'fs' != field: - raise ValueError('only fs type is supported in the opensource distribution') - return field +class ArchiveOutput(BaseModel): + directory: pathlib.Path = pathlib.Path('var') / 'data' / 'archives' + target_archive_size: int = 256 * 1024 * 1024 # 256 MB + target_dictionaries_size: int = 32 * 1024 * 1024 # 32 MB + target_encoded_file_size: int = 256 * 1024 * 1024 # 256 MB + target_segment_size: int = 256 * 1024 * 1024 # 256 MB @validator('target_archive_size') def validate_target_archive_size(cls, field): if field <= 0: - raise ValueError('target_archive_size parameter must be greater than 0') + raise ValueError('target_archive_size must be greater than 0') return field @validator('target_dictionaries_size') def validate_target_dictionaries_size(cls, field): if field <= 0: - raise ValueError('target_dictionaries_size parameter must be greater than 0') + raise ValueError('target_dictionaries_size must be greater than 0') return field @validator('target_encoded_file_size') def validate_target_encoded_file_size(cls, field): if field <= 0: - raise ValueError('target_encoded_file_size parameter must be greater than 0') + raise ValueError('target_encoded_file_size must be greater than 0') return field @validator('target_segment_size') def validate_target_segment_size(cls, field): if field <= 0: - raise ValueError('target_segment_size parameter must be greater than 0') + raise ValueError('target_segment_size must be greater than 0') return field + def make_config_paths_absolute(self, clp_home: pathlib.Path): + self.directory = make_config_path_absolute(clp_home, self.directory) + + def dump_to_primitive_dict(self): + d = self.dict() + # Turn directory (pathlib.Path) into a primitive string + d['directory'] = str(d['directory']) + return d + class CLPConfig(BaseModel): - input_logs_dfs_path: str - database: Database - scheduler: Scheduler - scheduler_queue: SchedulerQueue - archive_output: ArchiveOutput - data_directory: str - logs_directory: str - - def generate_config_file_content_with_comments(self): - file_content = [ - f'# A path containing any logs you which to compress. Must be reachable by all workers.', - f'# - This path will be exposed inside the docker container.', - f'# - This path should not be any path that exists in the container image (an Ubuntu image) (e.g., /var/log).', - f'# - Limitations: Docker follow symlink outside context, therefore, we recommend avoiding symbolic links', - f'input_logs_dfs_path: {self.input_logs_dfs_path}', - f'', - f'database:', - f' type: {self.database.type}', - f' host: {self.database.host}', - f' port: {self.database.port}', - f' username: {self.database.username}', - f' password: {self.database.password}', - f' name: {self.database.name}', - f'', - f'scheduler:', - f' host: {self.scheduler.host}', - f' jobs_poll_delay: {self.scheduler.jobs_poll_delay} # Seconds', - f'', - f'scheduler_queue:', - f' host: {self.scheduler_queue.host}', - f' port: {self.scheduler_queue.port}', - f' username: {self.scheduler_queue.username}', - f' password: {self.scheduler_queue.password}', - f'', - f'# Where archives should be output to', - f'# Note: Only one output type may be specified', - f'archive_output:', - f' type: {self.archive_output.type}', - f' directory: "{self.archive_output.directory}"', - f'', - f' # How much data CLP should try to compress into each archive', - f' target_archive_size: {self.archive_output.target_archive_size} # {pretty_size(self.archive_output.target_archive_size)}', - f'', - f' # How large the dictionaries should be allowed to get before the archive is closed and a new one is created', - f' target_dictionaries_size: {self.archive_output.target_dictionaries_size} # {pretty_size(self.archive_output.target_dictionaries_size)}', - f'', - f' # How large each encoded file should be before being split into a new encoded file', - f' target_encoded_file_size: {self.archive_output.target_encoded_file_size} # {pretty_size(self.archive_output.target_encoded_file_size)}', - f'', - f' # How much data CLP should try to fit into each segment within an archive', - f' target_segment_size: {self.archive_output.target_segment_size} # {pretty_size(self.archive_output.target_segment_size)}', - f'', - f'# Location where other data is stored', - f'data_directory: "{self.data_directory}"', - f'', - f'# Location where logs are stored', - f'logs_directory: "{self.logs_directory}"', - f'', - ] - return '\n'.join(file_content) + execution_container: str = 'ghcr.io/y-scope/clp/clp-execution-x86-ubuntu-focal:main' + + input_logs_directory: pathlib.Path = pathlib.Path('/') + + database: Database = Database() + scheduler: Scheduler = Scheduler() + queue: Queue = Queue() + credentials_file_path: pathlib.Path = CLP_DEFAULT_CREDENTIALS_FILE_PATH + + archive_output: ArchiveOutput = ArchiveOutput() + data_directory: pathlib.Path = pathlib.Path('var') / 'data' + logs_directory: pathlib.Path = pathlib.Path('var') / 'log' + + def make_config_paths_absolute(self, clp_home: pathlib.Path): + self.input_logs_directory = make_config_path_absolute(clp_home, self.input_logs_directory) + self.credentials_file_path = make_config_path_absolute(clp_home, self.credentials_file_path) + self.archive_output.make_config_paths_absolute(clp_home) + self.data_directory = make_config_path_absolute(clp_home, self.data_directory) + self.logs_directory = make_config_path_absolute(clp_home, self.logs_directory) + + def validate_input_logs_dir(self): + # NOTE: This can't be a pydantic validator since input_logs_dir might be a package-relative + # path that will only be resolved after pydantic validation + input_logs_dir = self.input_logs_directory + if not input_logs_dir.exists(): + raise ValueError(f"input_logs_directory '{input_logs_dir}' doesn't exist.") + if not input_logs_dir.is_dir(): + raise ValueError(f"input_logs_directory '{input_logs_dir}' is not a directory.") + + def validate_archive_output_dir(self): + try: + validate_path_could_be_dir(self.archive_output.directory) + except ValueError as ex: + raise ValueError(f"archive_output.directory is invalid: {ex}") + + def validate_data_dir(self): + try: + validate_path_could_be_dir(self.data_directory) + except ValueError as ex: + raise ValueError(f"data_directory is invalid: {ex}") + + def validate_logs_dir(self): + try: + validate_path_could_be_dir(self.logs_directory) + except ValueError as ex: + raise ValueError(f"logs_directory is invalid: {ex}") + + def load_database_credentials_from_file(self): + config = read_yaml_config_file(self.credentials_file_path) + if config is None: + raise ValueError(f"Credentials file '{self.credentials_file_path}' is empty.") + try: + self.database.username = get_config_value(config, 'db.user') + self.database.password = get_config_value(config, 'db.password') + except KeyError as ex: + raise ValueError(f"Credentials file '{self.credentials_file_path}' does not contain key '{ex}'.") + + def load_queue_credentials_from_file(self): + config = read_yaml_config_file(self.credentials_file_path) + if config is None: + raise ValueError(f"Credentials file '{self.credentials_file_path}' is empty.") + try: + self.queue.username = get_config_value(config, "queue.user") + self.queue.password = get_config_value(config, "queue.password") + except KeyError as ex: + raise ValueError(f"Credentials file '{self.credentials_file_path}' does not contain key '{ex}'.") + + def dump_to_primitive_dict(self): + d = self.dict() + d['archive_output'] = self.archive_output.dump_to_primitive_dict() + # Turn paths into primitive strings + d['input_logs_directory'] = str(self.input_logs_directory) + d['credentials_file_path'] = str(self.credentials_file_path) + d['data_directory'] = str(self.data_directory) + d['logs_directory'] = str(self.logs_directory) + return d diff --git a/components/clp-py-utils/clp_py_utils/clp_io_config.py b/components/clp-py-utils/clp_py_utils/clp_io_config.py index 88b810562..0d987a5f6 100644 --- a/components/clp-py-utils/clp_py_utils/clp_io_config.py +++ b/components/clp-py-utils/clp_py_utils/clp_io_config.py @@ -11,13 +11,11 @@ class PathsToCompress(BaseModel): class InputConfig(BaseModel): - type: str list_path: str path_prefix_to_remove: str = None class OutputConfig(BaseModel): - type: str target_archive_size: int target_dictionaries_size: int target_segment_size: int diff --git a/components/clp-py-utils/clp_py_utils/clp_package_config.py b/components/clp-py-utils/clp_py_utils/clp_package_config.py deleted file mode 100644 index 4c4d71ae0..000000000 --- a/components/clp-py-utils/clp_py_utils/clp_package_config.py +++ /dev/null @@ -1,60 +0,0 @@ -from pydantic import BaseModel, validator - -from clp_py_utils.pretty_size import pretty_size - - -# Limited set of configurations operation found in clp_config.py -class ArchiveOutput(BaseModel): - target_archive_size: int - target_dictionaries_size: int - target_encoded_file_size: int - target_segment_size: int - - @validator('target_archive_size') - def validate_target_archive_size(cls, field): - if field <= 0: - raise ValueError('target_archive_size parameter must be greater than 0') - return field - - @validator('target_dictionaries_size') - def validate_target_dictionaries_size(cls, field): - if field <= 0: - raise ValueError('target_dictionaries_size parameter must be greater than 0') - return field - - @validator('target_encoded_file_size') - def validate_target_encoded_file_size(cls, field): - if field <= 0: - raise ValueError('target_encoded_file_size parameter must be greater than 0') - return field - - @validator('target_segment_size') - def validate_target_segment_size(cls, field): - if field <= 0: - raise ValueError('target_segment_size parameter must be greater than 0') - return field - - -class CLPPackageConfig(BaseModel): - cluster_name: str - archive_output: ArchiveOutput - - def generate_package_config_file_content_with_comments(self): - file_content = [ - f'cluster_name: {self.cluster_name}', - f'', - f'archive_output:', - f' # How much data CLP should try to compress into each archive', - f' target_archive_size: {str(self.archive_output.target_archive_size)} # {pretty_size(self.archive_output.target_archive_size)}', - f'', - f' # How large the dictionaries should be allowed to get before the archive is closed and a new one is created', - f' target_dictionaries_size: {str(self.archive_output.target_dictionaries_size)} # {pretty_size(self.archive_output.target_dictionaries_size)}', - f'', - f' # How large each encoded file should be before being split into a new encoded file', - f' target_encoded_file_size: {str(self.archive_output.target_encoded_file_size)} # {pretty_size(self.archive_output.target_encoded_file_size)}', - f'', - f' # How much data CLP should try to fit into each segment within an archive', - f' target_segment_size: {str(self.archive_output.target_segment_size)} # {pretty_size(self.archive_output.target_segment_size)}', - f'' - ] - return '\n'.join(file_content) diff --git a/components/clp-py-utils/clp_py_utils/core.py b/components/clp-py-utils/clp_py_utils/core.py index db202d432..56fe198e6 100644 --- a/components/clp-py-utils/clp_py_utils/core.py +++ b/components/clp-py-utils/clp_py_utils/core.py @@ -1,11 +1,54 @@ import pathlib import yaml +from yaml.parser import ParserError + + +def get_config_value(config, key): + """ + Gets a value from the given dictionary using a dot-separated configuration + key, where each dot represents a deeper dictionary. NOTE: This method does + not support keys that contain dots since that is indistinguishable from a + deeper dictionary. + + :param config: + :param key: + """ + singular_keys = key.split('.') + current_config = config + for current_key in singular_keys: + current_config = current_config[current_key] + return current_config + + +def make_config_path_absolute(default_root: pathlib.Path, config_path: pathlib.Path): + """ + Turns relative paths into absolute paths by prefixing them with the + default_root + + :param default_root: + :param config_path: + """ + if config_path.is_absolute(): + return config_path + else: + return default_root / config_path def read_yaml_config_file(yaml_config_file_path: pathlib.Path): with open(yaml_config_file_path, 'r') as yaml_config_file: - config = yaml.safe_load(yaml_config_file) - if config is None: - raise Exception(f'Unable to parse configuration from {yaml_config_file_path}.') + try: + config = yaml.safe_load(yaml_config_file) + except ParserError as ex: + raise ValueError(f"Unable to parse configuration from {yaml_config_file_path}: {ex}") return config + + +def validate_path_could_be_dir(path: pathlib.Path): + part = path + while True: + if part.exists(): + if not part.is_dir(): + raise ValueError(f"{part} is not a directory.") + return + part = part.parent diff --git a/components/clp-py-utils/clp_py_utils/create-db-tables.py b/components/clp-py-utils/clp_py_utils/create-db-tables.py new file mode 100644 index 000000000..b591a0d0a --- /dev/null +++ b/components/clp-py-utils/clp_py_utils/create-db-tables.py @@ -0,0 +1,43 @@ +import argparse +import logging +import pathlib +import subprocess +import sys + +# Setup logging +# Create logger +logger = logging.getLogger(__file__) +logger.setLevel(logging.INFO) +# Setup console logging +logging_console_handler = logging.StreamHandler() +logging_formatter = logging.Formatter("%(asctime)s [%(levelname)s] %(message)s") +logging_console_handler.setFormatter(logging_formatter) +logger.addHandler(logging_console_handler) + + +def main(argv): + args_parser = argparse.ArgumentParser(description="Creates database tables for CLP.") + args_parser.add_argument('--config', required=True, help="Database config file.") + parsed_args = args_parser.parse_args(argv[1:]) + + config_file_path = pathlib.Path(parsed_args.config) + + script_dir = pathlib.Path(__file__).parent.resolve() + + cmd = [ + 'python3', str(script_dir / 'initialize-clp-metadata-db.py'), + '--config', str(config_file_path) + ] + subprocess.run(cmd, check=True) + + cmd = [ + 'python3', str(script_dir / 'initialize-orchestration-db.py'), + '--config', str(config_file_path) + ] + subprocess.run(cmd, check=True) + + return 0 + + +if '__main__' == __name__: + sys.exit(main(sys.argv)) diff --git a/components/clp-py-utils/clp_py_utils/initialize-clp-metadata-db.py b/components/clp-py-utils/clp_py_utils/initialize-clp-metadata-db.py index 7d02488f1..81de60324 100644 --- a/components/clp-py-utils/clp_py_utils/initialize-clp-metadata-db.py +++ b/components/clp-py-utils/clp_py_utils/initialize-clp-metadata-db.py @@ -4,34 +4,34 @@ import sys from contextlib import closing -from pydantic import ValidationError -from sql_adapter import SQL_Adapter - -from clp_py_utils.clp_config import CLPConfig +from clp_py_utils.clp_config import Database from clp_py_utils.core import read_yaml_config_file +from sql_adapter import SQL_Adapter # Setup logging # Create logger -logger = logging.getLogger(__name__) +logger = logging.getLogger(__file__) logger.setLevel(logging.INFO) # Setup console logging logging_console_handler = logging.StreamHandler() -logging_formatter = logging.Formatter('%(asctime)s [%(levelname)s] %(message)s') +logging_formatter = logging.Formatter("%(asctime)s [%(levelname)s] %(message)s") logging_console_handler.setFormatter(logging_formatter) logger.addHandler(logging_console_handler) def main(argv): - args_parser = argparse.ArgumentParser(description='Setup CLP metadata tables compression and search.') - args_parser.add_argument('--config', required=True, help='CLP package config file.') + args_parser = argparse.ArgumentParser(description="Sets up CLP's metadata tables.") + args_parser.add_argument('--config', required=True, help="Database config file.") parsed_args = args_parser.parse_args(argv[1:]) try: - clp_config = CLPConfig.parse_obj(read_yaml_config_file(parsed_args.config)) - sql_adapter = SQL_Adapter(clp_config.database) - clp_db_connection_params = clp_config.database.get_clp_connection_params_and_type() + database_config = Database.parse_obj(read_yaml_config_file(parsed_args.config)) + if database_config is None: + raise ValueError(f"Database configuration file '{parsed_args.config}' is empty.") + sql_adapter = SQL_Adapter(database_config) + clp_db_connection_params = database_config.get_clp_connection_params_and_type(True) table_prefix = clp_db_connection_params['table_prefix'] - with closing(sql_adapter.create_connection()) as metadata_db, \ + with closing(sql_adapter.create_connection(True)) as metadata_db, \ closing(metadata_db.cursor(dictionary=True)) as metadata_db_cursor: metadata_db_cursor.execute(f""" CREATE TABLE IF NOT EXISTS `{table_prefix}archives` ( @@ -45,8 +45,7 @@ def main(argv): UNIQUE KEY `archive_id` (`id`) USING BTREE, PRIMARY KEY (`pagination_id`) ); - """ - ) + """) metadata_db_cursor.execute(f""" CREATE TABLE IF NOT EXISTS `{table_prefix}files` ( @@ -63,17 +62,11 @@ def main(argv): PRIMARY KEY (`id`) ) ROW_FORMAT=DYNAMIC ; - """ - ) + """) metadata_db.commit() - logger.info('Successfully created clp metadata tables for compression and search') - - except ValidationError as err: - logger.error(err) - return -1 - except Exception as ex: - logger.error(ex) + except: + logger.exception("Failed to create clp metadata tables.") return -1 return 0 diff --git a/components/clp-py-utils/clp_py_utils/initialize-orchestration-db.py b/components/clp-py-utils/clp_py_utils/initialize-orchestration-db.py index 68161c229..3412a7895 100644 --- a/components/clp-py-utils/clp_py_utils/initialize-orchestration-db.py +++ b/components/clp-py-utils/clp_py_utils/initialize-orchestration-db.py @@ -4,32 +4,32 @@ import sys from contextlib import closing -from pydantic import ValidationError -from sql_adapter import SQL_Adapter - -from clp_py_utils.clp_config import CLPConfig +from clp_py_utils.clp_config import Database from clp_py_utils.core import read_yaml_config_file +from sql_adapter import SQL_Adapter # Setup logging # Create logger -logger = logging.getLogger(__name__) +logger = logging.getLogger(__file__) logger.setLevel(logging.INFO) # Setup console logging logging_console_handler = logging.StreamHandler() -logging_formatter = logging.Formatter('%(asctime)s [%(levelname)s] %(message)s') +logging_formatter = logging.Formatter("%(asctime)s [%(levelname)s] %(message)s") logging_console_handler.setFormatter(logging_formatter) logger.addHandler(logging_console_handler) def main(argv): - args_parser = argparse.ArgumentParser(description='Setup metadata tables for job orchestration.') - args_parser.add_argument('--config', required=True, help='CLP package config file.') + args_parser = argparse.ArgumentParser(description="Sets up metadata tables for job orchestration.") + args_parser.add_argument('--config', required=True, help="Database config file.") parsed_args = args_parser.parse_args(argv[1:]) try: - clp_config = CLPConfig.parse_obj(read_yaml_config_file(parsed_args.config)) - sql_adapter = SQL_Adapter(clp_config.database) - with closing(sql_adapter.create_connection()) as scheduling_db, \ + database_config = Database.parse_obj(read_yaml_config_file(parsed_args.config)) + if database_config is None: + raise ValueError(f"Database configuration file '{parsed_args.config}' is empty.") + sql_adapter = SQL_Adapter(database_config) + with closing(sql_adapter.create_connection(True)) as scheduling_db, \ closing(scheduling_db.cursor(dictionary=True)) as scheduling_db_cursor: scheduling_db_cursor.execute(""" CREATE TABLE IF NOT EXISTS `compression_jobs` ( @@ -50,8 +50,7 @@ def main(argv): INDEX `JOB_STATUS` (`job_status`) USING BTREE ) ROW_FORMAT=DYNAMIC ; - """ - ) + """) scheduling_db_cursor.execute(""" CREATE TABLE IF NOT EXISTS `compression_tasks` ( @@ -73,17 +72,11 @@ def main(argv): REFERENCES `compression_jobs` (`job_id`) ON UPDATE NO ACTION ON DELETE NO ACTION ) ROW_FORMAT=DYNAMIC ; - """ - ) + """) scheduling_db.commit() - logger.info('Successfully created compression_jobs and compression_tasks orchestration tables') - - except ValidationError as err: - logger.error(err) - return -1 - except Exception as ex: - logger.error(ex) + except: + logger.exception("Failed to create scheduling tables.") return -1 return 0 diff --git a/components/clp-py-utils/clp_py_utils/sql_adapter.py b/components/clp-py-utils/clp_py_utils/sql_adapter.py index a42fa79c9..40b3156ee 100644 --- a/components/clp-py-utils/clp_py_utils/sql_adapter.py +++ b/components/clp-py-utils/clp_py_utils/sql_adapter.py @@ -11,9 +11,11 @@ class SQL_Adapter: def __init__(self, database_config: Database): self.database_config = database_config - def create_mysql_connection(self) -> mysql.connector.MySQLConnection: + def create_mysql_connection(self, disable_localhost_socket_connection: bool = False) -> \ + mysql.connector.MySQLConnection: try: - connection = mysql.connector.connect(**self.database_config.get_mysql_connection_params()) + connection = mysql.connector.connect( + **self.database_config.get_mysql_connection_params(disable_localhost_socket_connection)) except mysql.connector.Error as err: if err.errno == errorcode.ER_ACCESS_DENIED_ERROR: logging.error('Database access denied.') @@ -25,19 +27,20 @@ def create_mysql_connection(self) -> mysql.connector.MySQLConnection: else: return connection - def create_mariadb_connection(self) -> mariadb.connection: + def create_mariadb_connection(self, disable_localhost_socket_connection: bool = False) -> mariadb.connection: try: - connection = mariadb.connect(**self.database_config.get_mysql_connection_params()) + connection = mariadb.connect( + **self.database_config.get_mysql_connection_params(disable_localhost_socket_connection)) except mariadb.Error as err: logging.error(f'Error connecting to MariaDB: {err}') raise err else: return connection - def create_connection(self): + def create_connection(self, disable_localhost_socket_connection: bool = False): if 'mysql' == self.database_config.type: - return self.create_mysql_connection() + return self.create_mysql_connection(disable_localhost_socket_connection) elif 'mariadb' == self.database_config.type: - return self.create_mariadb_connection() + return self.create_mariadb_connection(disable_localhost_socket_connection) else: raise NotImplementedError diff --git a/components/clp-py-utils/constraints.txt b/components/clp-py-utils/constraints.txt new file mode 100644 index 000000000..4c25d21ba --- /dev/null +++ b/components/clp-py-utils/constraints.txt @@ -0,0 +1,2 @@ +# Constrain typing-extensions to 4.1.1 to support Ubuntu 18.04 +typing-extensions==4.1.1 diff --git a/components/compression-job-handler/.gitignore b/components/compression-job-handler/.gitignore index 4b0a0fbd9..d5b2b7a87 100644 --- a/components/compression-job-handler/.gitignore +++ b/components/compression-job-handler/.gitignore @@ -1 +1 @@ -clp-config.yaml \ No newline at end of file +clp-config.yml \ No newline at end of file diff --git a/components/compression-job-handler/compression_job_handler/compression_job_handler.py b/components/compression-job-handler/compression_job_handler/compression_job_handler.py index e88983615..310caaac9 100644 --- a/components/compression-job-handler/compression_job_handler/compression_job_handler.py +++ b/components/compression-job-handler/compression_job_handler/compression_job_handler.py @@ -8,7 +8,6 @@ from contextlib import closing import msgpack -import mysql.connector import zstandard import zstandard as zstd from pydantic import ValidationError @@ -232,79 +231,79 @@ def handle_job(scheduling_db, scheduling_db_cursor, clp_io_config: ClpIoConfig, try: job_completed_with_errors = False - if 'fs' == clp_io_config.input.type: - # Create new job in the sql database - scheduling_db_cursor.execute( - 'INSERT INTO compression_jobs (clp_config) VALUES (%s);', - (zstd_cctx.compress(msgpack.packb(clp_io_config.dict(exclude_none=True, exclude_unset=True))),) - ) - scheduling_db.commit() - scheduling_job_id = scheduling_db_cursor.lastrowid - - # Create job-specific logger - job_str = f'job-{scheduling_job_id}' - job_logger = logging.getLogger(job_str) - job_logger.setLevel(logging.INFO) - combined_log_file_path = f'{logs_dir_abs}/{job_str}.log' - job_logger_file_handler = logging.FileHandler(combined_log_file_path) - job_logger_file_handler.setFormatter(logging_formatter) - job_logger.addHandler(logging_console_handler) - job_logger.addHandler(job_logger_file_handler) - - job_logger.debug(f'Starting job {scheduling_job_id}') - - paths_to_compress_buffer = PathsToCompressBuffer( - scheduler_db_cursor=scheduling_db_cursor, - maintain_file_ordering=False, - empty_directories_allowed=True, - target_archive_size=clp_io_config.output.target_archive_size, - file_size_to_trigger_compression=clp_io_config.output.target_archive_size * 2, - scheduling_job_id=scheduling_job_id, - zstd_cctx=zstd_cctx - ) - - # Compress all files at once to try and satisfy the target number of archives - job_logger.info("Iterating and partitioning files into tasks.") - # TODO: Handle file not found - with open(pathlib.Path(clp_io_config.input.list_path).resolve(), 'r') as f: - for path_idx, path in enumerate(f, start=1): - stripped_path = path.strip() - if '' == stripped_path: - # Skip empty paths - continue - path = pathlib.Path(stripped_path) - - try: - file, empty_directory = validate_path_and_get_info(fs_logs_required_parent_dir, path) - except ValueError as ex: - job_logger.error(str(ex)) - job_completed_with_errors = True - continue - - if file: - paths_to_compress_buffer.add_file(file) - elif empty_directory: - paths_to_compress_buffer.add_empty_directory(empty_directory) - - if path.is_dir(): - for internal_path in path.rglob('*'): - try: - file, empty_directory = validate_path_and_get_info( - fs_logs_required_parent_dir, internal_path) - except ValueError as ex: - job_logger.error(str(ex)) - job_completed_with_errors = True - continue - - if file: - paths_to_compress_buffer.add_file(file) - elif empty_directory: - paths_to_compress_buffer.add_empty_directory(empty_directory) - - if path_idx % 10000 == 0: - scheduling_db.commit() - - paths_to_compress_buffer.flush() + # Create new job in the sql database + scheduling_db_cursor.execute( + 'INSERT INTO compression_jobs (clp_config) VALUES (%s);', + (zstd_cctx.compress(msgpack.packb(clp_io_config.dict(exclude_none=True, exclude_unset=True))),) + ) + scheduling_db.commit() + scheduling_job_id = scheduling_db_cursor.lastrowid + + # Create job-specific logger + job_str = f'job-{scheduling_job_id}' + # FIXME: This will write to the current working directory which may require elevated privileges + job_logger = logging.getLogger(job_str) + job_logger.setLevel(logging.INFO) + combined_log_file_path = f'{logs_dir_abs}/{job_str}.log' + job_logger_file_handler = logging.FileHandler(combined_log_file_path) + job_logger_file_handler.setFormatter(logging_formatter) + job_logger.addHandler(logging_console_handler) + job_logger.addHandler(job_logger_file_handler) + + job_logger.debug(f'Starting job {scheduling_job_id}') + + paths_to_compress_buffer = PathsToCompressBuffer( + scheduler_db_cursor=scheduling_db_cursor, + maintain_file_ordering=False, + empty_directories_allowed=True, + target_archive_size=clp_io_config.output.target_archive_size, + file_size_to_trigger_compression=clp_io_config.output.target_archive_size * 2, + scheduling_job_id=scheduling_job_id, + zstd_cctx=zstd_cctx + ) + + # Compress all files at once to try and satisfy the target number of archives + job_logger.info("Iterating and partitioning files into tasks.") + # TODO: Handle file not found + with open(pathlib.Path(clp_io_config.input.list_path).resolve(), 'r') as f: + for path_idx, path in enumerate(f, start=1): + stripped_path = path.strip() + if '' == stripped_path: + # Skip empty paths + continue + path = pathlib.Path(stripped_path) + + try: + file, empty_directory = validate_path_and_get_info(fs_logs_required_parent_dir, path) + except ValueError as ex: + job_logger.error(str(ex)) + job_completed_with_errors = True + continue + + if file: + paths_to_compress_buffer.add_file(file) + elif empty_directory: + paths_to_compress_buffer.add_empty_directory(empty_directory) + + if path.is_dir(): + for internal_path in path.rglob('*'): + try: + file, empty_directory = validate_path_and_get_info( + fs_logs_required_parent_dir, internal_path) + except ValueError as ex: + job_logger.error(str(ex)) + job_completed_with_errors = True + continue + + if file: + paths_to_compress_buffer.add_file(file) + elif empty_directory: + paths_to_compress_buffer.add_empty_directory(empty_directory) + + if path_idx % 10000 == 0: + scheduling_db.commit() + + paths_to_compress_buffer.flush() # Ensure all of the scheduled task and the total number of tasks # in the job row has been updated and committed @@ -340,14 +339,14 @@ def handle_job(scheduling_db, scheduling_db_cursor, clp_io_config: ClpIoConfig, # Using fetchall() here t results = scheduling_db_cursor.fetchall() + # TODO Why is this necessary in the newest MariaDB/MySQL? + scheduling_db.commit() if len(results) > 1: logging.error("Duplicated job_id") logging.error(str(results)) if len(results) == 0: time.sleep(1) continue - if isinstance(scheduling_db, mysql.connector.MySQLConnection): - scheduling_db.commit() # clear the query cache job_row = results[0] job_status = job_row['job_status'] @@ -384,9 +383,7 @@ def handle_job(scheduling_db, scheduling_db_cursor, clp_io_config: ClpIoConfig, job_logger.info(f'handler for job_status "{job_status}" is not implemented') raise NotImplementedError - scheduling_db.commit() # clear the query cache time.sleep(1) - except Exception as ex: if job_logger: job_logger.exception(f'Exception while processing {job_str}.') @@ -416,7 +413,7 @@ def handle_jobs(sql_adapter: SQL_Adapter, clp_io_config: ClpIoConfig, logs_dir_a zstd_cctx = zstd.ZstdCompressor(level=3) # Connect to SQL Database - with closing(sql_adapter.create_connection()) as scheduling_db, \ + with closing(sql_adapter.create_connection(True)) as scheduling_db, \ closing(scheduling_db.cursor(dictionary=True)) as scheduling_db_cursor: # Execute new compression job handle_job(scheduling_db=scheduling_db, scheduling_db_cursor=scheduling_db_cursor, clp_io_config=clp_io_config, @@ -449,7 +446,7 @@ def main(argv): sql_adapter = SQL_Adapter(clp_config.database) clp_io_config = ClpIoConfig( - input=InputConfig(type='fs', list_path=str(pathlib.Path(parsed_args.log_list_path).resolve())), + input=InputConfig(list_path=str(pathlib.Path(parsed_args.log_list_path).resolve())), output=OutputConfig.parse_obj(clp_config.archive_output) ) diff --git a/components/compression-job-handler/constraints.txt b/components/compression-job-handler/constraints.txt new file mode 100644 index 000000000..4c25d21ba --- /dev/null +++ b/components/compression-job-handler/constraints.txt @@ -0,0 +1,2 @@ +# Constrain typing-extensions to 4.1.1 to support Ubuntu 18.04 +typing-extensions==4.1.1 diff --git a/components/job-orchestration/constraints.txt b/components/job-orchestration/constraints.txt new file mode 100644 index 000000000..4c25d21ba --- /dev/null +++ b/components/job-orchestration/constraints.txt @@ -0,0 +1,2 @@ +# Constrain typing-extensions to 4.1.1 to support Ubuntu 18.04 +typing-extensions==4.1.1 diff --git a/components/job-orchestration/job_orchestration/executor/celeryconfig.py b/components/job-orchestration/job_orchestration/executor/celeryconfig.py index 768b5f4ec..81d18652c 100644 --- a/components/job-orchestration/job_orchestration/executor/celeryconfig.py +++ b/components/job-orchestration/job_orchestration/executor/celeryconfig.py @@ -1,10 +1,17 @@ import os -result_persistent = True + +# Worker settings +# Force workers to consume only one task at a time worker_prefetch_multiplier = 1 +imports = ['job_orchestration.executor.compression.task'] + +# Queue settings task_queue_max_priority = 3 -imports = 'job_orchestration.executor.compression.task' task_routes = {'job_orchestration.executor.compression.task.compress': 'compression'} task_create_missing_queues = True +# Results backend settings +result_persistent = True + broker_url = os.getenv('BROKER_URL') -result_backend = os.getenv('RESULT_BACKEND') \ No newline at end of file +result_backend = os.getenv('RESULT_BACKEND') diff --git a/components/job-orchestration/job_orchestration/executor/compression/fs_to_fs_compress_method.py b/components/job-orchestration/job_orchestration/executor/compression/fs_to_fs_compress_method.py index a19e1bebf..4f886bf39 100644 --- a/components/job-orchestration/job_orchestration/executor/compression/fs_to_fs_compress_method.py +++ b/components/job-orchestration/job_orchestration/executor/compression/fs_to_fs_compress_method.py @@ -7,21 +7,22 @@ import subprocess import sys -import celery.utils.nodenames import yaml from celery.utils.log import get_task_logger from clp_py_utils.clp_io_config import ClpIoConfig, PathsToCompress -def compress(clp_config: ClpIoConfig, clp_home_str: str, data_dir_str: str, logs_dir_str: str, - job_id_str: str, task_id_str: str, paths_to_compress: PathsToCompress, database_connection_params): +def compress(clp_config: ClpIoConfig, clp_home_str: str, data_dir_str: str, archive_output_dir_str: str, + logs_dir_str: str, job_id_str: str, task_id_str: str, paths_to_compress: PathsToCompress, + database_connection_params): """ Compresses files from an FS into archives on an FS :param clp_config: ClpIoConfig :param clp_home_str: :param data_dir_str: + :param archive_output_dir_str: :param logs_dir_str: :param job_id_str: :param task_id_str: @@ -59,10 +60,10 @@ def compress(clp_config: ClpIoConfig, clp_home_str: str, data_dir_str: str, logs db_config_file.close() # Start assembling compression command - archives_dir = data_dir / 'archives' + archive_output_dir = pathlib.Path(archive_output_dir_str).resolve() compression_cmd = [ str(clp_home / 'bin' / 'clp'), - 'c', str(archives_dir), + 'c', str(archive_output_dir), '--print-archive-stats-progress', '--target-dictionaries-size', str(clp_config.output.target_dictionaries_size), diff --git a/components/job-orchestration/job_orchestration/executor/compression/task.py b/components/job-orchestration/job_orchestration/executor/compression/task.py index 3460c9818..ecd037a62 100644 --- a/components/job-orchestration/job_orchestration/executor/compression/task.py +++ b/components/job-orchestration/job_orchestration/executor/compression/task.py @@ -5,19 +5,19 @@ import pika from celery.utils.log import get_task_logger +from clp_py_utils.clp_io_config import ClpIoConfig, PathsToCompress from job_orchestration.executor.celery import app from . import fs_to_fs_compress_method logger = get_task_logger(__name__) -from clp_py_utils.clp_io_config import ClpIoConfig, PathsToCompress - @app.task() def compress(job_id: int, task_id: int, clp_io_config_json: str, paths_to_compress_json: str, database_connection_params): clp_home = os.getenv('CLP_HOME') data_dir = os.getenv('CLP_DATA_DIR') + archive_output_dir = os.getenv('CLP_ARCHIVE_OUTPUT_DIR') logs_dir = os.getenv('CLP_LOGS_DIR') celery_broker_url = os.getenv('BROKER_URL') @@ -39,13 +39,9 @@ def compress(job_id: int, task_id: int, clp_io_config_json: str, paths_to_compre channel.tx_commit() logger.info(f'COMPRESSION STARTED job_id={job_id} task_id={task_id}') - if 'fs' == clp_io_config.input.type and 'fs' == clp_io_config.output.type: - compression_successful, worker_output = \ - fs_to_fs_compress_method.compress( - clp_io_config, clp_home, data_dir, logs_dir, str(job_id), str(task_id), - paths_to_compress, database_connection_params) - else: - raise NotImplementedError + compression_successful, worker_output = \ + fs_to_fs_compress_method.compress(clp_io_config, clp_home, data_dir, archive_output_dir, logs_dir, str(job_id), + str(task_id), paths_to_compress, database_connection_params) if compression_successful: message['status'] = 'COMPLETED' diff --git a/components/job-orchestration/job_orchestration/scheduler/scheduler.py b/components/job-orchestration/job_orchestration/scheduler/scheduler.py index a6d0c92bd..8bb8f5f96 100644 --- a/components/job-orchestration/job_orchestration/scheduler/scheduler.py +++ b/components/job-orchestration/job_orchestration/scheduler/scheduler.py @@ -13,27 +13,25 @@ from pydantic import ValidationError from clp_py_utils.clp_config import CLPConfig, Database +from clp_py_utils.core import read_yaml_config_file from clp_py_utils.sql_adapter import SQL_Adapter from job_orchestration.executor.compression.task import compress from job_orchestration.scheduler.results_consumer import ReconnectingResultsConsumer from job_orchestration.scheduler.scheduler_data \ - import Job, Task, TaskUpdate, TaskCompletionUpdate, TaskFailureUpdate + import Job, Task, TaskCompletionUpdate, TaskFailureUpdate, TaskUpdate # Setup logging # Create logger console_handler = logging.StreamHandler() console_handler.setLevel(logging.INFO) -console_handler.setFormatter( - logging.Formatter('%(asctime)s [%(levelname)s] [%(name)s] %(message)s')) -log = logging.getLogger('scheduler') -log.addHandler(console_handler) -log.setLevel(logging.DEBUG) +console_handler.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] [%(name)s] %(message)s")) +logger = logging.getLogger(__file__) +logger.addHandler(console_handler) +logger.setLevel(logging.DEBUG) scheduled_jobs = {} jobs_lock = threading.Lock() -from clp_py_utils.core import read_yaml_config_file - def fetch_new_task_metadata(db_cursor) -> list: db_cursor.execute( @@ -56,34 +54,31 @@ def fetch_new_task_metadata(db_cursor) -> list: def update_task_metadata(db_cursor, task_id, kv: typing.Dict[str, typing.Any]): if not len(kv): - log.error("Must specify at least one field to update") + logger.error("Must specify at least one field to update") raise ValueError field_set_expressions = [f'{k}="{v}"' for k, v in kv.items()] - query = f'UPDATE compression_tasks SET {", ".join(field_set_expressions)} ' \ - f'WHERE task_id={task_id};' + query = f'UPDATE compression_tasks SET {", ".join(field_set_expressions)} WHERE task_id={task_id};' db_cursor.execute(query) def update_job_metadata(db_cursor, job_id, kv): if not len(kv): - log.error("Must specify at least one field to update") + logger.error("Must specify at least one field to update") raise ValueError field_set_expressions = [f'{k}="{v}"' for k, v in kv.items()] - query = f'UPDATE compression_jobs SET {", ".join(field_set_expressions)} ' \ - f'WHERE job_id={job_id};' + query = f'UPDATE compression_jobs SET {", ".join(field_set_expressions)} WHERE job_id={job_id};' db_cursor.execute(query) def increment_job_metadata(db_cursor, job_id, kv): if not len(kv): - log.error("Must specify at least one field to increment") + logger.error("Must specify at least one field to increment") raise ValueError field_set_expressions = [f'{k}={k}+{v}' for k, v in kv.items()] - query = f'UPDATE compression_jobs SET {", ".join(field_set_expressions)} ' \ - f'WHERE job_id={job_id};' + query = f'UPDATE compression_jobs SET {", ".join(field_set_expressions)} WHERE job_id={job_id};' db_cursor.execute(query) @@ -92,7 +87,7 @@ def schedule_task(job: Job, task: Task, database_config: Database, dctx: zstanda (job.job_id, task.task_id, job.get_clp_config_json(dctx), task.get_clp_paths_to_compress_json(dctx), - database_config.get_clp_connection_params_and_type()), + database_config.get_clp_connection_params_and_type(True)), task_id=str(task.task_id), queue='compression', priority=task.priority) @@ -103,13 +98,13 @@ def search_and_schedule_new_tasks(db_conn, db_cursor, database_config: Database) global scheduled_jobs global jobs_lock - log.debug('Search and schedule new tasks') + logger.debug('Search and schedule new tasks') dctx = zstandard.ZstdDecompressor() # Fetch new task for task_row in fetch_new_task_metadata(db_cursor): - log.debug(f"Found task with job_id={task_row['job_id']} task_id={task_row['task_id']}") + logger.debug(f"Found task with job_id={task_row['job_id']} task_id={task_row['task_id']}") # Only Add database credentials to ephemeral task specification passed to workers task = Task.parse_obj(task_row) @@ -173,7 +168,7 @@ def task_results_consumer(sql_adapter: SQL_Adapter, celery_broker_url): def callback(ch, method, properties, body): global scheduled_jobs global jobs_lock - global log + global logger try: # Validate message body @@ -183,12 +178,12 @@ def callback(ch, method, properties, body): elif 'FAILED' == task_update.status: task_update = TaskFailureUpdate.parse_raw(body) except ValidationError as err: - log.error(err) + logger.error(err) exit(-1) - with closing(sql_adapter.create_connection()) as db_conn, \ + with closing(sql_adapter.create_connection(True)) as db_conn, \ closing(db_conn.cursor(dictionary=True)) as db_cursor, jobs_lock: - log.debug(f'Task update received: ' + logger.debug(f'Task update received: ' f'job_id={task_update.job_id} ' f'task_id={task_update.task_id} ' f'status={task_update.status}') @@ -202,7 +197,7 @@ def callback(ch, method, properties, body): # It could be that previous scheduler crashed. # The only thing we can do is to log, and discard the message # to prevent infinite loop - log.warning(f'Discarding untracked task update: {task_update.json()}') + logger.warning(f'Discarding untracked task update: {task_update.json()}') ch.basic_ack(method.delivery_tag) return @@ -220,13 +215,13 @@ def callback(ch, method, properties, body): elif 'COMPLETED' == task_update.status: # Update sent by worker when task finishes if 'COMPRESSING' != task.task_status: - log.warning(f'Discarding untracked task update: {task_update.json()}') + logger.warning(f'Discarding untracked task update: {task_update.json()}') ch.basic_ack(method.delivery_tag) raise NotImplementedError task_duration = max(int((now - task.task_start_time).total_seconds()), 1) - log.info(f'Task job-{task_update.job_id}-task-{task_update.task_id} ' + logger.info(f'Task job-{task_update.job_id}-task-{task_update.task_id} ' f'completed in {task_duration} second.') update_task_metadata(db_cursor, task_update.task_id, dict( @@ -241,8 +236,8 @@ def callback(ch, method, properties, body): num_tasks_completed=1 )) elif 'FAILED' == task_update.status: - log.warning(f'Marking job_id={task_update.job_id} as failed.') - log.warning(str(task_update.error_message)) + logger.warning(f'Marking job_id={task_update.job_id} as failed.') + logger.warning(str(task_update.error_message)) update_task_metadata(db_cursor, task_update.task_id, dict( task_status=task_update.status, task_duration=int((now - task.task_start_time).total_seconds()) @@ -275,7 +270,7 @@ def callback(ch, method, properties, body): except Exception as error: # Transaction failure, rollback, don't send ACK and simply reprocess the msg again - log.error(f'Database update failed: {error}.') + logger.error(f'Database update failed: {error}.') db_conn.rollback() consumer = ReconnectingResultsConsumer(celery_broker_url, callback) @@ -285,7 +280,6 @@ def callback(ch, method, properties, body): def main(argv): - global scheduled_jobs args_parser = argparse.ArgumentParser() args_parser.add_argument('--config', '-c', required=True, help='CLP configuration file.') args = args_parser.parse_args(argv[1:]) @@ -297,14 +291,14 @@ def main(argv): try: clp_config = CLPConfig.parse_obj(read_yaml_config_file(config_path)) except ValidationError as err: - log.error(err) + logger.error(err) except Exception as ex: - log.error(ex) + logger.error(ex) # read_yaml_config_file already logs the parsing error inside pass else: # Collect new jobs from the database - log.info('Starting CLP job scheduler') + logger.info('Starting CLP job scheduler') sql_adapter = SQL_Adapter(clp_config.database) results_consumer = task_results_consumer(sql_adapter, celery_broker_url) @@ -312,18 +306,18 @@ def main(argv): while True: try: # Start Job Processing Loop - with closing(sql_adapter.create_connection()) as db_conn, \ + with closing(sql_adapter.create_connection(True)) as db_conn, \ closing(db_conn.cursor(dictionary=True)) as db_cursor: search_and_schedule_new_tasks(db_conn, db_cursor, sql_adapter.database_config) update_completed_jobs(db_conn, db_cursor) except Exception as ex: - log.error('Error in scheduling: ') - log.error(ex) + logger.error('Error in scheduling: ') + logger.error(ex) finally: try: time.sleep(clp_config.scheduler.jobs_poll_delay) except KeyboardInterrupt: - log.info('Gracefully shutting down') + logger.info('Gracefully shutting down') break if results_consumer: @@ -331,12 +325,12 @@ def main(argv): results_consumer._consumer.stop() except RuntimeError as err: if 'IOLoop is not reentrant and is already running' != str(err): - log.error(err) + logger.error(err) raise RuntimeError else: # Normal graceful shutdown path pass - log.info('Scheduler stopped') + logger.info('Scheduler stopped') if '__main__' == __name__: diff --git a/components/package-template/src/.gitignore b/components/package-template/src/.gitignore index 3283d6cd6..e69de29bb 100644 --- a/components/package-template/src/.gitignore +++ b/components/package-template/src/.gitignore @@ -1 +0,0 @@ -etc/clp-config.yaml diff --git a/components/package-template/src/README.md b/components/package-template/src/README.md index 96883fde3..3f4f2cc85 100644 --- a/components/package-template/src/README.md +++ b/components/package-template/src/README.md @@ -3,9 +3,44 @@ Compressed Log Processor (CLP) is a tool that compresses text logs and allows users to search the compressed data without decompression. CLP's compression ratio is significantly higher than gzip. -## Getting started +## Usage + +### Starting CLP + +```shell +sbin/start-clp +``` + +### Compressing logs + +```shell +sbin/compress +``` + +For more options, run the script with the `--help` option. + +### Decompressing logs + +```shell +sbin/decompress -d +``` +For more options, run the script with the `--help` option. + +### Searching logs + +```bash +sbin/search +``` -CLP can be run in Docker containers, in one of two modes: +CLP supports two wildcard characters: +* `*` which matches 0 or more characters +* `?` which matches any single character + +For more options, run the script with the `--help` option. + +## Deployment options + +CLP can be run in Docker containers, in one of two deployments: * On a single-node (typically for development and testing) * Across multiple nodes @@ -14,96 +49,88 @@ CLP can be run in Docker containers, in one of two modes: ### Requirements * [Docker](https://docs.docker.com/engine/install/) - * `docker` should be in the user's path, and - * [runnable without superuser privileges](https://docs.docker.com/engine/install/linux-postinstall/#manage-docker-as-a-non-root-user) + * `docker` should be in the user's path + * `docker` should be [runnable without superuser privileges](https://docs.docker.com/engine/install/linux-postinstall/#manage-docker-as-a-non-root-user) (without sudo) -* Plenty of disk space * Python3 * For systems with a version < 3.7, run `pip3 install -r requirements-pre-3.7.txt` -### Starting CLP +### Configuration -```bash -./sbin/start-clp --uncompressed-logs-dir -``` - -Note that running CLP in containers means that the `uncompressed-logs-dir` must be mounted inside the container. -Therefore: -* The `uncompressed-logs-dir` must not include symbolic links to items **outside** of the directory -* Changing `uncompressed-logs-dir` requires restarting CLP. - -### Stopping CLP - -```bash -./sbin/stop-clp -``` +* If necessary, you can uncomment and modify any configurations in + `etc/clp-config.yml` + * You can use a configuration file at a different location, but you will need + to pass the location to any CLP command you run + (`sbin/ --config `). +* Note: In most cases, changing any configurations will require restarting CLP. ## Multi-node deployment +A multi-node deployment comprises a control node and one or more worker nodes. +Any node can be a control node. + ### Requirements * The single-node deployment requirements -* For the scheduler node, port 3306 and 5672 must be available and accessible from all compute nodes -* A distributed file system mounted at the same path on all nodes +* A distributed filesystem mounted at the same path on all nodes +* Your uncompressed logs should be on the distributed filesystem -### Starting the scheduler +### Setup -```bash -sbin/start-clp --start-scheduler-only --publish-ports \ - --uncompressed-logs-dir -``` +The easiest way to set up a multi-node deployment is as follows: -### Starting the worker(s) +* Copy the package to a location on your distributed filesystem +* Modify `etc/clp-config.yml`: + * Uncomment the file if it is commented. + * Set `input_logs_directory` to the location of your logs on the distributed + filesystem. + * Set the `host` in the `database` section to the hostname/IP of your control + node. + * Set the `host` in the `queue` section to the hostname/IP of your control + node. +* You can now skip to the next sections to see how to start/stop the components. -```bash -sbin/start-clp --start-worker-only --publish-ports \ - --uncompressed-logs-dir -``` +If you don't want to store data within the package, you can change the +configuration file as follows: -### Stopping components +* Set `directory` in the `archive_output` section to a location on the + distributed filesystem (outside the package). +* Set `data_directory` and `logs_directory` to locations on the distributed + filesystem (outside the package). + +### Starting the control components + +On the control node, run the following commands (these must be started in the +order below): -Every component can be stopped by: ```bash -./sbin/stop-clp +sbin/start-clp db +sbin/start-clp queue +sbin/start-clp scheduler ``` -## Usage +### Starting the worker-node components -Once CLP is started, you can use it as follows. - -### Compressing logs +On every node you want to run workers, run this command: ```bash -./sbin/compress +sbin/start-clp worker ``` -Note: -* The uncompressed logs must be within `uncompressed-logs-dir` -* CLP is designed to compress text logs - -For more options, run the script with the `--help` option. +### Stopping components -### Decompressing logs +To stop an individual component on a node, you can use: -To decompress all compressed logs: ```bash -./sbin/decompress -d +sbin/stop-clp ``` -For more options, run the script with the `--help` option. -### Searching logs +To stop all components on a node, you can use: -To search all logs for a given wildcard query: ```bash -./sbin/search +sbin/stop-clp ``` -CLP supports two wildcard characters: -* `*` which matches 0 or more characters -* `?` which matches any single character - -For more options, run the script with the `--help` option. - ## Troubleshooting ### ModuleNotFoundError diff --git a/components/package-template/src/etc/clp-config.yaml.template b/components/package-template/src/etc/clp-config.yaml.template deleted file mode 100644 index e10728715..000000000 --- a/components/package-template/src/etc/clp-config.yaml.template +++ /dev/null @@ -1,14 +0,0 @@ -clp_cluster_name: clp-mini-cluster - -archive_output: - # How much data CLP should try to compress into each archive - target_archive_size: 268435456 # 256MB - - # How large the dictionaries should be allowed to get before the archive is closed and a new one is created - target_dictionaries_size: 33554432 # 32MB - - # How large each encoded file should be before being split into a new encoded file - target_encoded_file_size: 268435456 # 256MB - - # How much data CLP should try to fit into each segment within an archive - target_segment_size: 268435456 # 256MB diff --git a/components/package-template/src/etc/clp-config.yml b/components/package-template/src/etc/clp-config.yml new file mode 100644 index 000000000..32672690b --- /dev/null +++ b/components/package-template/src/etc/clp-config.yml @@ -0,0 +1,46 @@ +## A path containing any logs you which to compress. Must be reachable by all +## workers. +## - This path will be exposed inside the container, so symbolic links to files +## outside this path will be ignored. +#input_logs_directory: "/" +# +## File containing credentials for services +#credentials_file_path: "etc/credentials.yml" +# +#database: +# type: "mariadb" # "mariadb" or "mysql" +# host: "localhost" +# port: 3306 +# name: "clp-db" +# +#scheduler: +# jobs_poll_delay: 1 # seconds +# +#queue: +# host: "localhost" +# port: 5672 +# +## Where archives should be output to +#archive_output: +# directory: "var/data/archives" +# +# # How much data CLP should try to compress into each archive +# target_archive_size: 268435456 # 256 MB +# +# # How large the dictionaries should be allowed to get before the archive is +# # closed and a new one is created +# target_dictionaries_size: 33554432 # 32 MB +# +# # How large each encoded file should be before being split into a new encoded +# # file +# target_encoded_file_size: 268435456 # 256 MB +# +# # How much data CLP should try to fit into each segment within an archive +# target_segment_size: 268435456 # 256 MB +# +## Location where other data (besides archives) are stored. It will be created if +## it doesn't exist. +#data_directory: "var/data" +# +## Location where logs are stored. It will be created if it doesn't exist. +#logs_directory: "var/log" diff --git a/components/package-template/src/etc/credentials.template.yml b/components/package-template/src/etc/credentials.template.yml new file mode 100644 index 000000000..61feba2c5 --- /dev/null +++ b/components/package-template/src/etc/credentials.template.yml @@ -0,0 +1,9 @@ +## Database credentials +#db: +# user: "user" +# password: "pass" +# +## Queue credentials +#queue: +# user: "user" +# password: "pass" diff --git a/components/package-template/src/etc/mysql/conf.d/logging.cnf b/components/package-template/src/etc/mysql/conf.d/logging.cnf new file mode 100644 index 000000000..a9f80a7ce --- /dev/null +++ b/components/package-template/src/etc/mysql/conf.d/logging.cnf @@ -0,0 +1,10 @@ +[mysqld] +# https://dev.mysql.com/doc/refman/8.0/en/log-destinations.html +# Enable the general log +general_log=1 +# Set the destination file +general_log_file=/var/log/mysql/mysql.log + +# https://dev.mysql.com/doc/refman/8.0/en/error-log-destination-configuration.html +# Send error logs to file +log_error=/var/log/mysql/mysql-error.log diff --git a/components/package-template/src/lib/python3/site-packages/clp/package_utils.py b/components/package-template/src/lib/python3/site-packages/clp/package_utils.py index d922b0f82..11aa533e0 100644 --- a/components/package-template/src/lib/python3/site-packages/clp/package_utils.py +++ b/components/package-template/src/lib/python3/site-packages/clp/package_utils.py @@ -1,71 +1,245 @@ -import json +import enum +import errno import pathlib +import secrets +import socket import subprocess +import typing -from clp_py_utils.clp_config import CLPConfig +import yaml + +from clp_py_utils.clp_config import CLPConfig, CLP_DEFAULT_CREDENTIALS_FILE_PATH +from clp_py_utils.core import \ + get_config_value, \ + make_config_path_absolute, \ + read_yaml_config_file, \ + validate_path_could_be_dir + +# CONSTANTS +# Component names +DB_COMPONENT_NAME = 'db' +QUEUE_COMPONENT_NAME = 'queue' +SCHEDULER_COMPONENT_NAME = 'scheduler' +WORKER_COMPONENT_NAME = 'worker' + +# Paths +CONTAINER_CLP_HOME = pathlib.Path('/') / 'opt' / 'clp' +CONTAINER_INPUT_LOGS_ROOT_DIR = pathlib.Path('/') / 'mnt' / 'logs' +CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH = pathlib.Path('etc') / 'clp-config.yml' + +DOCKER_MOUNT_TYPE_STRINGS = [ + 'bind' +] + + +class DockerMountType(enum.IntEnum): + BIND = 0 + + +class DockerMount: + def __init__(self, type: DockerMountType, src: pathlib.Path, dst: pathlib.Path, is_read_only: bool = False): + self.__type = type + self.__src = src + self.__dst = dst + self.__is_read_only = is_read_only + + def __str__(self): + mount_str = f"type={DOCKER_MOUNT_TYPE_STRINGS[self.__type]},src={self.__src},dst={self.__dst}" + if self.__is_read_only: + mount_str += ",readonly" + return mount_str + + +class CLPDockerMounts: + def __init__(self, clp_home: pathlib.Path, docker_clp_home: pathlib.Path): + self.input_logs_dir: typing.Optional[DockerMount] = None + self.clp_home: typing.Optional[DockerMount] = DockerMount(DockerMountType.BIND, clp_home, docker_clp_home) + self.data_dir: typing.Optional[DockerMount] = None + self.logs_dir: typing.Optional[DockerMount] = None + self.archives_output_dir: typing.Optional[DockerMount] = None -CONTAINER_CLP_INSTALL_PREFIX = '/opt' def check_dependencies(): try: - subprocess.run('command -v git', shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=True) + subprocess.run("command -v docker", shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=True) except subprocess.CalledProcessError: - raise EnvironmentError('git is not installed on the path.') - + raise EnvironmentError("docker is not installed or available on the path") try: - subprocess.run('command -v docker', shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=True) subprocess.run(['docker', 'ps'], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=True) except subprocess.CalledProcessError: - raise EnvironmentError('docker is not installed on the path or cannot run without superuser privileges (sudo).') - - -def check_env(cluster_name: str): - check_docker_network_bridge_cmd = ['docker', 'network', 'inspect', cluster_name] - proc = subprocess.run(check_docker_network_bridge_cmd, stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - if 0 != proc.returncode: - raise EnvironmentError(f'Failed to inspect docker network bridge {cluster_name}') - - bridge_bridge_specification = json.loads(proc.stdout.decode('utf-8'))[0] - required_containers = {cluster_name} - for container_id, container in bridge_bridge_specification['Containers'].items(): - try: - required_containers.remove(container['Name']) - except KeyError: - pass - - if required_containers: - raise EnvironmentError(f'The required container is not started: {",".join(required_containers)}') - - -def prepare_package_and_config(clp_config: CLPConfig, clp_home: pathlib.Path, docker_clp_home: pathlib.Path): - host_data_directory = pathlib.Path(clp_config.data_directory) - if '' == host_data_directory.anchor: - # In the config file, we assume prefix is clp_home inside the docker (/root/clp) - host_data_directory = clp_home / clp_config.data_directory - clp_config.data_directory = str(docker_clp_home / clp_config.data_directory) - host_data_directory.mkdir(parents=True, exist_ok=True) - - host_log_directory = pathlib.Path(clp_config.logs_directory) - if '' == host_log_directory.anchor: - # In the config file, we assume prefix is clp_home, inside the docker (/root/clp) - host_log_directory = clp_home / clp_config.logs_directory - clp_config.logs_directory = str(docker_clp_home / clp_config.logs_directory) - host_log_directory.mkdir(parents=True, exist_ok=True) - - host_archive_output_directory = pathlib.Path(clp_config.archive_output.directory) - if '' == host_archive_output_directory.anchor: - # In the config file, we assume prefix is clp_home, inside the docker (/root/clp) - host_archive_output_directory = clp_home / clp_config.archive_output.directory - clp_config.archive_output.directory = \ - str(docker_clp_home / clp_config.archive_output.directory) - host_archive_output_directory.mkdir(parents=True, exist_ok=True) - - return host_data_directory, host_log_directory, host_archive_output_directory, clp_config - - -def make_config_path_absolute(clp_home: pathlib.Path, config_path: pathlib.Path): - if config_path.is_absolute(): - return config_path + raise EnvironmentError("docker cannot run without superuser privileges (sudo).") + + +def container_exists(container_name): + cmd = ['docker', 'ps', '-q', '-f', f'name={container_name}'] + proc = subprocess.run(cmd, stdout=subprocess.PIPE) + for line in proc.stdout.decode('utf-8'): + if line != "": + return True + + return False + + +def validate_port(port_name: str, hostname: str, port: int): + try: + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + sock.bind((hostname, port)) + sock.close() + except OSError as e: + if e.errno == errno.EADDRINUSE: + raise ValueError(f"{port_name} {hostname}:{port} is already in use. Please choose a different port.") + else: + raise ValueError(f"{port_name} {hostname}:{port} is invalid: {e.strerror}.") + + +def is_path_already_mounted(mounted_host_root: pathlib.Path, mounted_container_root: pathlib.Path, + host_path: pathlib.Path, container_path: pathlib.Path): + try: + host_path_relative_to_mounted_root = host_path.relative_to(mounted_host_root) + except ValueError: + return False + + try: + container_path_relative_to_mounted_root = container_path.relative_to(mounted_container_root) + except ValueError: + return False + + return host_path_relative_to_mounted_root == container_path_relative_to_mounted_root + + +def generate_container_config(clp_config: CLPConfig, clp_home: pathlib.Path): + """ + Copies the given config and sets up mounts mapping the relevant host paths into the container + + :param clp_config: + :param clp_home: + """ + container_clp_config = clp_config.copy(deep=True) + + docker_mounts = CLPDockerMounts(clp_home, CONTAINER_CLP_HOME) + + input_logs_dir = clp_config.input_logs_directory.resolve() + container_clp_config.input_logs_directory = CONTAINER_INPUT_LOGS_ROOT_DIR / \ + input_logs_dir.relative_to(input_logs_dir.anchor) + docker_mounts.input_logs_dir = DockerMount(DockerMountType.BIND, input_logs_dir, + container_clp_config.input_logs_directory, True) + + container_clp_config.data_directory = CONTAINER_CLP_HOME / 'var' / 'data' + if not is_path_already_mounted(clp_home, CONTAINER_CLP_HOME, clp_config.data_directory, + container_clp_config.data_directory): + docker_mounts.data_dir = DockerMount(DockerMountType.BIND, clp_config.data_directory, + container_clp_config.data_directory) + + container_clp_config.logs_directory = CONTAINER_CLP_HOME / 'var' / 'log' + if not is_path_already_mounted(clp_home, CONTAINER_CLP_HOME, clp_config.logs_directory, + container_clp_config.logs_directory): + docker_mounts.logs_dir = DockerMount(DockerMountType.BIND, clp_config.logs_directory, + container_clp_config.logs_directory) + + container_clp_config.archive_output.directory = pathlib.Path('/') / 'mnt' / 'archive-output' + if not is_path_already_mounted(clp_home, CONTAINER_CLP_HOME, clp_config.archive_output.directory, + container_clp_config.archive_output.directory): + docker_mounts.archives_output_dir = DockerMount(DockerMountType.BIND, clp_config.archive_output.directory, + container_clp_config.archive_output.directory) + + return container_clp_config, docker_mounts + + +def validate_config_key_existence(config, key): + try: + value = get_config_value(config, key) + except KeyError: + raise ValueError(f"{key} must be specified in CLP's configuration.") + return value + + +def validate_and_load_config_file(config_file_path: pathlib.Path, default_config_file_path: pathlib.Path, + clp_home: pathlib.Path): + if config_file_path.exists(): + raw_clp_config = read_yaml_config_file(config_file_path) + if raw_clp_config is None: + clp_config = CLPConfig() + else: + clp_config = CLPConfig.parse_obj(raw_clp_config) else: - return clp_home / config_path + if config_file_path != default_config_file_path: + raise ValueError(f"Config file '{config_file_path}' does not exist.") + + clp_config = CLPConfig() + + clp_config.make_config_paths_absolute(clp_home) + + # Make data and logs directories node-specific + hostname = socket.gethostname() + clp_config.data_directory /= hostname + clp_config.logs_directory /= hostname + + return clp_config + + +def generate_credentials_file(credentials_file_path: pathlib.Path): + credentials = { + DB_COMPONENT_NAME: { + 'user': 'clp-user', + 'password': secrets.token_urlsafe(8) + }, + QUEUE_COMPONENT_NAME: { + 'user': 'clp-user', + 'password': secrets.token_urlsafe(8) + }, + } + + with open(credentials_file_path, 'w') as f: + yaml.safe_dump(credentials, f) + + +def validate_credentials_file_path(clp_config: CLPConfig, clp_home: pathlib.Path, generate_default_file: bool): + credentials_file_path = clp_config.credentials_file_path + if not credentials_file_path.exists(): + if make_config_path_absolute(clp_home, CLP_DEFAULT_CREDENTIALS_FILE_PATH) == credentials_file_path \ + and generate_default_file: + generate_credentials_file(credentials_file_path) + else: + raise ValueError(f"Credentials file path '{credentials_file_path}' does not exist.") + elif not credentials_file_path.is_file(): + raise ValueError(f"Credentials file path '{credentials_file_path}' is not a file.") + + +def validate_and_load_db_credentials_file(clp_config: CLPConfig, clp_home: pathlib.Path, generate_default_file: bool): + validate_credentials_file_path(clp_config, clp_home, generate_default_file) + clp_config.load_database_credentials_from_file() + + +def validate_and_load_queue_credentials_file(clp_config: CLPConfig, clp_home: pathlib.Path, + generate_default_file: bool): + validate_credentials_file_path(clp_config, clp_home, generate_default_file) + clp_config.load_queue_credentials_from_file() + + +def validate_db_config(clp_config: CLPConfig, data_dir: pathlib.Path, logs_dir: pathlib.Path): + try: + validate_path_could_be_dir(data_dir) + except ValueError as ex: + raise ValueError(f"database data directory is invalid: {ex}") + + try: + validate_path_could_be_dir(logs_dir) + except ValueError as ex: + raise ValueError(f"database logs directory is invalid: {ex}") + + validate_port("database.port", clp_config.database.host, clp_config.database.port) + + +def validate_queue_config(clp_config: CLPConfig, logs_dir: pathlib.Path): + try: + validate_path_could_be_dir(logs_dir) + except ValueError as ex: + raise ValueError(f"queue logs directory is invalid: {ex}") + + validate_port("queue.port", clp_config.queue.host, clp_config.queue.port) + + +def validate_worker_config(clp_config: CLPConfig): + clp_config.validate_input_logs_dir() + clp_config.validate_archive_output_dir() diff --git a/components/package-template/src/sbin/compress b/components/package-template/src/sbin/compress index 6691d616d..5f69994dd 100755 --- a/components/package-template/src/sbin/compress +++ b/components/package-template/src/sbin/compress @@ -5,123 +5,160 @@ import os import pathlib import subprocess import sys +import uuid # Setup logging # Create logger -log = logging.getLogger('clp') -log.setLevel(logging.INFO) +logger = logging.getLogger(__file__) +logger.setLevel(logging.INFO) # Setup console logging logging_console_handler = logging.StreamHandler() -logging_formatter = logging.Formatter('%(asctime)s [%(levelname)s] [%(name)s] %(message)s') +logging_formatter = logging.Formatter("%(asctime)s [%(levelname)s] [%(name)s] %(message)s") logging_console_handler.setFormatter(logging_formatter) -log.addHandler(logging_console_handler) +logger.addHandler(logging_console_handler) def get_clp_home(): - clp_home = None + # Determine CLP_HOME from an environment variable or this script's path + _clp_home = None if 'CLP_HOME' in os.environ: - clp_home = pathlib.Path(os.environ['CLP_HOME']) + _clp_home = pathlib.Path(os.environ['CLP_HOME']) else: for path in pathlib.Path(__file__).resolve().parents: if 'sbin' == path.name: - clp_home = path.parent + _clp_home = path.parent break - if clp_home is None: - log.error('CLP_HOME is not set and could not be determined automatically.') + if _clp_home is None: + logger.error("CLP_HOME is not set and could not be determined automatically.") return None - elif not clp_home.exists(): - log.error('CLP_HOME does not exist.') + elif not _clp_home.exists(): + logger.error("CLP_HOME set to nonexistent path.") return None - return clp_home.resolve() + return _clp_home.resolve() -def load_bundled_python_lib_path(clp_home): - python_site_packages_path = clp_home / 'lib' / 'python3' / 'site-packages' +def load_bundled_python_lib_path(_clp_home): + python_site_packages_path = _clp_home / 'lib' / 'python3' / 'site-packages' if not python_site_packages_path.is_dir(): - log.error('Failed to load python3 packages bundled with CLP.') - return -1 + logger.error("Failed to load python3 packages bundled with CLP.") + return False + # Add packages to the front of the path sys.path.insert(0, str(python_site_packages_path)) + return True + clp_home = get_clp_home() -if clp_home is None: +if clp_home is None or not load_bundled_python_lib_path(clp_home): sys.exit(-1) -load_bundled_python_lib_path(clp_home) - -from clp.package_utils import check_env, CONTAINER_CLP_INSTALL_PREFIX -from clp_py_utils.core import read_yaml_config_file -from clp_py_utils.clp_package_config import CLPPackageConfig -from pydantic import ValidationError +import yaml +from clp.package_utils import \ + CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH, \ + CONTAINER_CLP_HOME, \ + CONTAINER_INPUT_LOGS_ROOT_DIR, \ + generate_container_config, \ + validate_and_load_config_file, \ + validate_and_load_db_credentials_file def main(argv): - args_parser = argparse.ArgumentParser(description='Startup script for CLP') - args_parser.add_argument('--config', '-c', type=str, help='CLP package configuration file.') - args_parser.add_argument('paths', metavar='PATH', nargs='*', help='Paths to compress.') - args_parser.add_argument('-f', '--input-list', dest='input_list', help='A file listing all paths to compress.') + default_config_file_path = clp_home / CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH + + args_parser = argparse.ArgumentParser(description="Compresses files/directories") + args_parser.add_argument('--config', '-c', default=str(default_config_file_path), + help="CLP package configuration file.") + args_parser.add_argument('paths', metavar='PATH', nargs='*', help="Paths to compress.") + args_parser.add_argument('-f', '--input-list', dest='input_list', help="A file listing all paths to compress.") + parsed_args = args_parser.parse_args(argv[1:]) - # Infer config file path + # Validate and load config file try: - if not parsed_args.config: - # Did not provide a config file - default_clp_package_config_file = clp_home / 'etc' / 'clp-config.yaml' - if not default_clp_package_config_file.exists(): - raise FileNotFoundError - log.info(f'Using default config file at {default_clp_package_config_file.relative_to(pathlib.Path.cwd())}') - package_config_file_path = default_clp_package_config_file - else: - # Provided a config file - package_config_file_path = pathlib.Path(parsed_args.config).resolve(strict=True) - except FileNotFoundError: - log.error('Did not provide a clp package config file or the specified config file does not exist.') - return + config_file_path = pathlib.Path(parsed_args.config) + clp_config = validate_and_load_config_file(config_file_path, default_config_file_path, clp_home) + clp_config.validate_logs_dir() - try: - clp_package_config = CLPPackageConfig.parse_obj(read_yaml_config_file(package_config_file_path)) - except ValidationError as err: - log.error(err) - return - except Exception as ex: - # read_yaml_config_file already logs the parsing error inside - return - - clp_cluster_name = clp_package_config.cluster_name - try: - check_env(clp_cluster_name) - except EnvironmentError as ex: - log.error(ex) + validate_and_load_db_credentials_file(clp_config, clp_home, False) + except: + logger.exception("Failed to load config.") return -1 - docker_exec_cmd = [ - 'docker', 'exec', - '--workdir', f'{CONTAINER_CLP_INSTALL_PREFIX}/clp', + container_name = f'clp-compressor-{str(uuid.uuid4())[-4:]}' + + container_clp_config, mounts = generate_container_config(clp_config, clp_home) + container_config_filename = f'.{container_name}-config.yml' + container_config_file_path_on_host = clp_config.logs_directory / container_config_filename + with open(container_config_file_path_on_host, 'w') as f: + yaml.safe_dump(container_clp_config.dump_to_primitive_dict(), f) + + container_start_cmd = [ + 'docker', 'run', + '-i', + '--rm', + '--network', 'host', + '-w', str(CONTAINER_CLP_HOME), '-u', f'{os.getuid()}:{os.getgid()}', - clp_package_config.cluster_name, - 'sbin/native/compress', '--config', f'{CONTAINER_CLP_INSTALL_PREFIX}/.{clp_package_config.cluster_name}.yaml' + '--name', container_name, + '--mount', str(mounts.clp_home), + ] + necessary_mounts = [ + mounts.input_logs_dir, + mounts.data_dir, + mounts.logs_dir, + mounts.archives_output_dir + ] + for mount in necessary_mounts: + if mount: + container_start_cmd.append('--mount') + container_start_cmd.append(str(mount)) + container_start_cmd.append(clp_config.execution_container) + + compress_cmd = [ + str(CONTAINER_CLP_HOME / 'sbin' / 'native' / 'compress'), + '--config', str(container_clp_config.logs_directory / container_config_filename), + '--remove-path-prefix', str(CONTAINER_INPUT_LOGS_ROOT_DIR) ] for path in parsed_args.paths: - path = str(pathlib.Path(path).resolve()) - docker_exec_cmd.append(path) + # Resolve path and prefix it with CONTAINER_INPUT_LOGS_ROOT_DIR + resolved_path = pathlib.Path(path).resolve() + path = str(CONTAINER_INPUT_LOGS_ROOT_DIR / resolved_path.relative_to(resolved_path.anchor)) + compress_cmd.append(path) if parsed_args.input_list is not None: - # Validate all paths in input list - all_paths_valid = True - with open(parsed_args.input_list, 'r') as f: - for line in f: - path = pathlib.Path(line.rstrip()) - if not path.is_absolute(): - log.error(f'Invalid relative path in input list: {path}') - all_paths_valid = False - if not all_paths_valid: - raise ValueError("--input-list must only contain absolute paths") - - docker_exec_cmd.append('--input-list') - docker_exec_cmd.append(parsed_args.input_list) - log.debug(docker_exec_cmd) - subprocess.run(docker_exec_cmd) + # Get unused output path + while True: + output_list_filename = f'{uuid.uuid4()}.txt' + output_list_path = clp_config.logs_directory / output_list_filename + if not output_list_path.exists(): + break + + try: + with open(output_list_path, 'w') as output_list: + # Validate all paths in input list + all_paths_valid = True + with open(parsed_args.input_list, 'r') as f: + for line in f: + resolved_path = pathlib.Path(line.rstrip()).resolve() + if not resolved_path.is_absolute(): + logger.error(f"Invalid relative path in input list: {resolved_path}") + all_paths_valid = False + path = CONTAINER_INPUT_LOGS_ROOT_DIR / resolved_path.relative_to(resolved_path.anchor) + output_list.write(f'{path}\n') + if not all_paths_valid: + raise ValueError("--input-list must only contain absolute paths") + finally: + output_list_path.unlink() + + compress_cmd.append('--input-list') + compress_cmd.append(container_clp_config.logs_directory / output_list_filename) + + cmd = container_start_cmd + compress_cmd + subprocess.run(cmd, check=True) + + # Remove generated files + container_config_file_path_on_host.unlink() return 0 diff --git a/components/package-template/src/sbin/decompress b/components/package-template/src/sbin/decompress index 9678bd234..c5c550cb9 100755 --- a/components/package-template/src/sbin/decompress +++ b/components/package-template/src/sbin/decompress @@ -3,197 +3,160 @@ import argparse import logging import os import pathlib -import shutil import subprocess import sys import uuid # Setup logging # Create logger -log = logging.getLogger('clp') -log.setLevel(logging.DEBUG) +logger = logging.getLogger('clp') +logger.setLevel(logging.DEBUG) # Setup console logging logging_console_handler = logging.StreamHandler() -logging_formatter = logging.Formatter('%(asctime)s [%(levelname)s] [%(name)s] %(message)s') +logging_formatter = logging.Formatter("%(asctime)s [%(levelname)s] [%(name)s] %(message)s") logging_console_handler.setFormatter(logging_formatter) -log.addHandler(logging_console_handler) +logger.addHandler(logging_console_handler) def get_clp_home(): - clp_home = None + # Determine CLP_HOME from an environment variable or this script's path + _clp_home = None if 'CLP_HOME' in os.environ: - clp_home = pathlib.Path(os.environ['CLP_HOME']) + _clp_home = pathlib.Path(os.environ['CLP_HOME']) else: for path in pathlib.Path(__file__).resolve().parents: if 'sbin' == path.name: - clp_home = path.parent + _clp_home = path.parent break - if clp_home is None: - log.error('CLP_HOME is not set and could not be determined automatically.') + if _clp_home is None: + logger.error("CLP_HOME is not set and could not be determined automatically.") return None - elif not clp_home.exists(): - log.error('CLP_HOME does not exist.') + elif not _clp_home.exists(): + logger.error("CLP_HOME set to nonexistent path.") return None - return clp_home.resolve() + return _clp_home.resolve() -def load_bundled_python_lib_path(clp_home): - python_site_packages_path = clp_home / 'lib' / 'python3' / 'site-packages' +def load_bundled_python_lib_path(_clp_home): + python_site_packages_path = _clp_home / 'lib' / 'python3' / 'site-packages' if not python_site_packages_path.is_dir(): - log.error('Failed to load python3 packages bundled with CLP.') - return -1 + logger.error("Failed to load python3 packages bundled with CLP.") + return False + # Add packages to the front of the path sys.path.insert(0, str(python_site_packages_path)) + return True + clp_home = get_clp_home() -if clp_home is None: +if clp_home is None or not load_bundled_python_lib_path(clp_home): sys.exit(-1) -load_bundled_python_lib_path(clp_home) -from clp.package_utils import check_env, CONTAINER_CLP_INSTALL_PREFIX -from clp_py_utils.core import read_yaml_config_file -from clp_py_utils.clp_package_config import CLPPackageConfig -from clp_py_utils.clp_config import CLPConfig -from pydantic import ValidationError +import yaml +from clp.package_utils import \ + CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH, \ + CONTAINER_CLP_HOME, \ + DockerMount, \ + DockerMountType, \ + generate_container_config, \ + validate_and_load_config_file, \ + validate_and_load_db_credentials_file, \ + validate_path_could_be_dir def main(argv): - args_parser = argparse.ArgumentParser(description='Script to decompress logs') - args_parser.add_argument('--config', '-c', type=str, help='CLP package configuration file.') - args_parser.add_argument('paths', metavar='PATH', nargs='*', help='Paths to decompress.') - args_parser.add_argument('-f', '--files-from', help='A file listing all files to decompress.') - args_parser.add_argument('-d', '--extraction-dir', metavar='DIR', default='.', help='Decompress files into DIR') + default_config_file_path = clp_home / CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH + + args_parser = argparse.ArgumentParser(description="Decompresses logs") + args_parser.add_argument('--config', '-c', type=str, default=str(default_config_file_path), + help="CLP package configuration file.") + args_parser.add_argument('paths', metavar='PATH', nargs='*', help="Files to decompress.") + args_parser.add_argument('-f', '--files-from', help="A file listing all files to decompress.") + args_parser.add_argument('-d', '--extraction-dir', metavar='DIR', default='.', help="Decompress files into DIR") parsed_args = args_parser.parse_args(argv[1:]) - # Infer config file path + # Validate and load config file try: - if not parsed_args.config: - # Did not provide a config file - default_clp_package_config_file = clp_home / 'etc' / 'clp-config.yaml' - if not default_clp_package_config_file.exists(): - raise FileNotFoundError - log.info(f'Using default config file at {default_clp_package_config_file.relative_to(pathlib.Path.cwd())}') - package_config_file_path = default_clp_package_config_file - else: - # Provided a config file - package_config_file_path = pathlib.Path(parsed_args.config).resolve(strict=True) - except FileNotFoundError: - log.error('Did not provide a clp package config file or the specified config file does not exist.') - return + config_file_path = pathlib.Path(parsed_args.config) + clp_config = validate_and_load_config_file(config_file_path, default_config_file_path, clp_home) + clp_config.validate_logs_dir() - try: - clp_package_config = CLPPackageConfig.parse_obj(read_yaml_config_file(package_config_file_path)) - except ValidationError as err: - log.error(err) - return - except Exception as ex: - # read_yaml_config_file already logs the parsing error inside - return - - # Validate paths were specified using only one method - if len(parsed_args.paths) > 0 and parsed_args.files_from is not None: - args_parser.error( - "Paths cannot be specified both on the command line and through a file.") + validate_and_load_db_credentials_file(clp_config, clp_home, False) + except: + logger.exception("Failed to load config.") return -1 - files_to_decompress_path = None + paths_to_decompress_file_path = None if parsed_args.files_from: - files_to_decompress_path = pathlib.Path(parsed_args.files_from).resolve(strict=True) + paths_to_decompress_file_path = pathlib.Path(parsed_args.files_from) # Validate extraction directory extraction_dir = pathlib.Path(parsed_args.extraction_dir).resolve() - if extraction_dir.exists() and not extraction_dir.is_dir(): - log.error(f'extraction-dir ({extraction_dir}) is not a valid directory.') - return -1 - extraction_dir.mkdir(exist_ok=True) - - clp_cluster_name = clp_package_config.cluster_name try: - check_env(clp_cluster_name) - except EnvironmentError as ex: - log.error(ex) - return -1 - - # Parse and validate config file - container_clp_config_file_name = f'.{clp_package_config.cluster_name}.yaml' - host_config_file_path = clp_home / container_clp_config_file_name - container_config_file_path = f'{CONTAINER_CLP_INSTALL_PREFIX}/{container_clp_config_file_name}' - - try: - clp_config = CLPConfig.parse_obj(read_yaml_config_file(host_config_file_path)) - except ValidationError as err: - log.error(err) - return -1 - except Exception as ex: - log.error(ex) + validate_path_could_be_dir(extraction_dir) + except ValueError as ex: + logger.error(f"extraction-dir is invalid: {ex}") return -1 + extraction_dir.mkdir(exist_ok=True) - docker_clp_home = pathlib.Path(CONTAINER_CLP_INSTALL_PREFIX) / 'clp' - docker_extraction_dir = pathlib.Path('/') / 'mnt' / '_extraction_dir_' + container_name = f'clp-decompressor-{str(uuid.uuid4())[-4:]}' - host_data_directory = clp_home / pathlib.Path(clp_config.data_directory).relative_to(docker_clp_home) - host_log_directory = clp_home / pathlib.Path(clp_config.logs_directory).relative_to(docker_clp_home) - host_archive_out_directory = \ - clp_home / pathlib.Path(clp_config.archive_output.directory).relative_to(docker_clp_home) + container_clp_config, mounts = generate_container_config(clp_config, clp_home) + container_config_filename = f'.{container_name}-config.yml' + container_config_file_path_on_host = clp_config.logs_directory / container_config_filename + with open(container_config_file_path_on_host, 'w') as f: + yaml.safe_dump(container_clp_config.dump_to_primitive_dict(), f) - # Start execution environment - clp_execution_env_container = 'whywhywhywhywhywhy/clp-execution-env:x86-ubuntu-focal-20210919' - container_name = f'{clp_cluster_name}-decompressor-{uuid.uuid4()}'[:62] # max docker hostname = 63 chars - clp_execution_env_startup_cmd = [ - 'docker', 'run', '-di', + container_start_cmd = [ + 'docker', 'run', + '-i', '--rm', - '--network', clp_cluster_name, - '--hostname', container_name, + '--network', 'host', + '-w', str(CONTAINER_CLP_HOME), + '-u', f'{os.getuid()}:{os.getgid()}', '--name', container_name, - '-v', f'{clp_home}:{docker_clp_home}', - '-v', f'{extraction_dir}:{docker_extraction_dir}', - '-u', f'{os.getuid()}:{os.getgid()}' + '--mount', str(mounts.clp_home), ] - if not clp_config.data_directory.startswith(f'{CONTAINER_CLP_INSTALL_PREFIX}/clp'): - clp_execution_env_startup_cmd.append('-v') - clp_execution_env_startup_cmd.append(f'{host_data_directory}:{clp_config.data_directory}') - if not clp_config.logs_directory.startswith(f'{CONTAINER_CLP_INSTALL_PREFIX}/clp'): - clp_execution_env_startup_cmd.append('-v') - clp_execution_env_startup_cmd.append(f'{host_log_directory}:{clp_config.logs_directory}') - if not clp_config.archive_output.directory.startswith(f'{CONTAINER_CLP_INSTALL_PREFIX}/clp'): - clp_execution_env_startup_cmd.append('-v') - clp_execution_env_startup_cmd.append( - f'{host_archive_out_directory}:{clp_config.archive_output.directory}') - clp_execution_env_startup_cmd.append(clp_execution_env_container) - subprocess.run(clp_execution_env_startup_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True) - try: - # Copy config file into container - copy_cmd = ['docker', 'cp', host_config_file_path, f'{container_name}:{container_config_file_path}'] - subprocess.run(copy_cmd) - - docker_exec_cmd = [ - 'docker', 'exec', - '--workdir', str(docker_clp_home), - container_name, - 'sbin/native/decompress', '--config', container_config_file_path, - '-d', str(docker_extraction_dir) - ] - for path in parsed_args.paths: - docker_exec_cmd.append(path) - temporary_files_to_decompress_path = None - if files_to_decompress_path: - # Copy list to logs directory - temp_list_name = f'{uuid.uuid4()}-decompress-paths.txt' - temporary_files_to_decompress_path = host_log_directory / temp_list_name - shutil.copyfile(files_to_decompress_path, temporary_files_to_decompress_path) - - docker_exec_cmd.append('--files-from') - docker_exec_cmd.append(pathlib.Path(clp_config.logs_directory) / temp_list_name) - logging.info(docker_exec_cmd) - subprocess.run(docker_exec_cmd) - if files_to_decompress_path: - temporary_files_to_decompress_path.unlink() - finally: - docker_stop_cmd = ['docker', 'stop', container_name] - subprocess.run(docker_stop_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + # Set up mounts + container_extraction_dir = pathlib.Path('/') / 'mnt' / 'extraction-dir' + necessary_mounts = [ + mounts.data_dir, + mounts.logs_dir, + mounts.archives_output_dir, + DockerMount(DockerMountType.BIND, extraction_dir, container_extraction_dir), + ] + container_paths_to_decompress_file_path = None + if paths_to_decompress_file_path: + container_paths_to_decompress_file_path = pathlib.Path('/') / 'mnt' / 'paths-to-decompress.txt' + necessary_mounts.append( + DockerMount(DockerMountType.BIND, paths_to_decompress_file_path, container_paths_to_decompress_file_path)) + for mount in necessary_mounts: + if mount: + container_start_cmd.append('--mount') + container_start_cmd.append(str(mount)) + + container_start_cmd.append(clp_config.execution_container) + + decompress_cmd = [ + str(CONTAINER_CLP_HOME / 'sbin' / 'native' / 'decompress'), + '--config', str(container_clp_config.logs_directory / container_config_filename), + '-d', str(container_extraction_dir) + ] + for path in parsed_args.paths: + decompress_cmd.append(path) + if container_paths_to_decompress_file_path: + decompress_cmd.append('--input-list') + decompress_cmd.append(container_paths_to_decompress_file_path) + + cmd = container_start_cmd + decompress_cmd + subprocess.run(cmd, check=True) + + # Remove generated files + container_config_file_path_on_host.unlink() return 0 diff --git a/components/package-template/src/sbin/native/compress b/components/package-template/src/sbin/native/compress index 48aabd0d4..956ca9215 100755 --- a/components/package-template/src/sbin/native/compress +++ b/components/package-template/src/sbin/native/compress @@ -9,7 +9,7 @@ import uuid # Setup logging # Create logger -logger = logging.getLogger('compress') +logger = logging.getLogger(__file__) logger.setLevel(logging.INFO) # Setup console logging logging_console_handler = logging.StreamHandler() @@ -19,113 +19,117 @@ logger.addHandler(logging_console_handler) def get_clp_home(): - clp_home = None + # Determine CLP_HOME from an environment variable or this script's path + _clp_home = None if 'CLP_HOME' in os.environ: - clp_home = pathlib.Path(os.environ['CLP_HOME']) + _clp_home = pathlib.Path(os.environ['CLP_HOME']) else: for path in pathlib.Path(__file__).resolve().parents: if 'sbin' == path.name: - clp_home = path.parent + _clp_home = path.parent break - if clp_home is None: - logging.error('CLP_HOME is not set and could not be determined automatically.') + if _clp_home is None: + logger.error("CLP_HOME is not set and could not be determined automatically.") return None - elif not clp_home.exists(): - logger.error('CLP_HOME does not exist.') + elif not _clp_home.exists(): + logger.error("CLP_HOME set to nonexistent path.") return None - return clp_home.resolve() + return _clp_home.resolve() -def load_bundled_python_lib_path(clp_home): - python_site_packages_path = clp_home / 'lib' / 'python3' / 'site-packages' +def load_bundled_python_lib_path(_clp_home): + python_site_packages_path = _clp_home / 'lib' / 'python3' / 'site-packages' if not python_site_packages_path.is_dir(): - logger.error('Failed to load python3 packages bundled with CLP.') - return -1 + logger.error("Failed to load python3 packages bundled with CLP.") + return False + # Add packages to the front of the path sys.path.insert(0, str(python_site_packages_path)) + return True + clp_home = get_clp_home() -if clp_home is None: +if clp_home is None or not load_bundled_python_lib_path(clp_home): sys.exit(-1) -load_bundled_python_lib_path(clp_home) -from pydantic import ValidationError -from clp.package_utils import make_config_path_absolute -from clp_py_utils.clp_config import CLPConfig -from clp_py_utils.clp_io_config import InputConfig, OutputConfig, ClpIoConfig -from clp_py_utils.core import read_yaml_config_file +from clp.package_utils import CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH, validate_and_load_config_file +from clp_py_utils.clp_io_config import ClpIoConfig, InputConfig, OutputConfig from clp_py_utils.sql_adapter import SQL_Adapter from compression_job_handler.compression_job_handler import handle_jobs def main(argv): - args_parser = argparse.ArgumentParser(description='Compress log files.') - args_parser.add_argument('--config', '-c', required=True, help='CLP configuration file.') - args_parser.add_argument('paths', metavar='PATH', nargs='*', help='Paths to compress.') - args_parser.add_argument('-f', '--input-list', dest='input_list', help='A file listing all paths to compress.') + default_config_file_path = clp_home / CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH + + args_parser = argparse.ArgumentParser(description="Compresses log files.") + args_parser.add_argument('--config', '-c', default=str(default_config_file_path), + help="CLP package configuration file.") + args_parser.add_argument('paths', metavar='PATH', nargs='*', help="Paths to compress.") + args_parser.add_argument('-f', '--input-list', dest='input_list', help="A file listing all paths to compress.") args_parser.add_argument('--remove-path-prefix', metavar='DIR', - help='Remove the given path prefix from each compressed file/dir.') - args_parser.add_argument('--no-progress-reporting', action='store_true', help='Disables progress reporting.') + help="Removes the given path prefix from each compressed file/dir.") + args_parser.add_argument('--no-progress-reporting', action='store_true', help="Disables progress reporting.") parsed_args = args_parser.parse_args(argv[1:]) # Validate some input paths were specified if parsed_args.input_list is None and len(parsed_args.paths) == 0: - args_parser.error('No paths specified.') + args_parser.error("No paths specified.") # Validate paths were specified using only one method if len(parsed_args.paths) > 0 and parsed_args.input_list is not None: - args_parser.error('Paths cannot be specified on the command line AND through a file.') + args_parser.error("Paths cannot be specified on the command line AND through a file.") - # Load configuration - clp_config_file_path = pathlib.Path(parsed_args.config) + # Validate and load config file try: - clp_config = CLPConfig.parse_obj(read_yaml_config_file(clp_config_file_path)) - except ValidationError as err: - logger.error(err) - except FileNotFoundError as err: - logger.error(f'CLP config file not found at "{str(clp_config_file_path)}"') - except Exception as ex: - logger.error(ex) - else: + config_file_path = pathlib.Path(parsed_args.config) + clp_config = validate_and_load_config_file(config_file_path, default_config_file_path, clp_home) + clp_config.validate_input_logs_dir() + clp_config.validate_logs_dir() + except: + logger.exception("Failed to load config.") + return -1 - logs_dir = make_config_path_absolute(clp_home, pathlib.Path(clp_config.logs_directory)) - - comp_jobs_dir = logs_dir / 'comp-jobs' - comp_jobs_dir.mkdir(parents=True, exist_ok=True) - - if parsed_args.input_list is None: - # Write paths to file - log_list_path = comp_jobs_dir / f'{str(uuid.uuid4())}.txt' - with open(log_list_path, 'w') as f: - for path in parsed_args.paths: - stripped_path = path.strip() - if '' == stripped_path: - # Skip empty paths - continue - resolved_path = pathlib.Path(stripped_path).resolve() - - f.write(str(resolved_path) + '\n') - else: - # Copy to jobs directory - log_list_path = pathlib.Path(parsed_args.input_list).resolve() - shutil.copy(log_list_path, comp_jobs_dir / log_list_path.name) - - logger.info(f'Compression job submitted to compression-job-handler.') - - mysql_adapter = SQL_Adapter(clp_config.database) - clp_io_config = ClpIoConfig( - input=InputConfig(type='fs', list_path=str(log_list_path)), - output=OutputConfig.parse_obj(clp_config.archive_output) - ) - - # Execute compression-job-handler.handle_jobs - logs_directory_abs = str(pathlib.Path(clp_config.logs_directory).resolve()) - handle_jobs(sql_adapter=mysql_adapter, clp_io_config=clp_io_config, logs_dir_abs=logs_directory_abs, - fs_logs_required_parent_dir=pathlib.Path(clp_config.input_logs_dfs_path), - no_progress_reporting=parsed_args.no_progress_reporting) + comp_jobs_dir = clp_config.logs_directory / 'comp-jobs' + comp_jobs_dir.mkdir(parents=True, exist_ok=True) + + if parsed_args.input_list is None: + # Write paths to file + log_list_path = comp_jobs_dir / f'{str(uuid.uuid4())}.txt' + with open(log_list_path, 'w') as f: + for path in parsed_args.paths: + stripped_path = path.strip() + if '' == stripped_path: + # Skip empty paths + continue + resolved_path = pathlib.Path(stripped_path).resolve() + + f.write(f"{resolved_path}\n") + else: + # Copy to jobs directory + log_list_path = pathlib.Path(parsed_args.input_list).resolve() + shutil.copy(log_list_path, comp_jobs_dir / log_list_path.name) + + logger.info("Compression job submitted to compression-job-handler.") + + mysql_adapter = SQL_Adapter(clp_config.database) + clp_input_config = InputConfig(list_path=str(log_list_path)) + if parsed_args.remove_path_prefix: + clp_input_config.path_prefix_to_remove = parsed_args.remove_path_prefix + clp_io_config = ClpIoConfig( + input=clp_input_config, + output=OutputConfig.parse_obj(clp_config.archive_output) + ) + + # Execute compression-job-handler.handle_jobs + logs_directory_abs = str(pathlib.Path(clp_config.logs_directory).resolve()) + handle_jobs(sql_adapter=mysql_adapter, clp_io_config=clp_io_config, logs_dir_abs=logs_directory_abs, + fs_logs_required_parent_dir=pathlib.Path(clp_config.input_logs_directory), + no_progress_reporting=parsed_args.no_progress_reporting) + + return 0 if '__main__' == __name__: diff --git a/components/package-template/src/sbin/native/decompress b/components/package-template/src/sbin/native/decompress index 02129a728..679468242 100755 --- a/components/package-template/src/sbin/native/decompress +++ b/components/package-template/src/sbin/native/decompress @@ -9,63 +9,64 @@ import uuid # Setup logging # Create logger -logger = logging.getLogger(__name__) +logger = logging.getLogger(__file__) logger.setLevel(logging.INFO) # Setup console logging logging_console_handler = logging.StreamHandler() -logging_formatter = logging.Formatter('%(asctime)s [%(levelname)s] [%(name)s] %(message)s') +logging_formatter = logging.Formatter("%(asctime)s [%(levelname)s] [%(name)s] %(message)s") logging_console_handler.setFormatter(logging_formatter) logger.addHandler(logging_console_handler) def get_clp_home(): - clp_home = None + # Determine CLP_HOME from an environment variable or this script's path + _clp_home = None if 'CLP_HOME' in os.environ: - clp_home = pathlib.Path(os.environ['CLP_HOME']) + _clp_home = pathlib.Path(os.environ['CLP_HOME']) else: for path in pathlib.Path(__file__).resolve().parents: if 'sbin' == path.name: - clp_home = path.parent + _clp_home = path.parent break - if clp_home is None: - logger.error('CLP_HOME is not set and could not be determined automatically.') + if _clp_home is None: + logger.error("CLP_HOME is not set and could not be determined automatically.") return None - elif not clp_home.exists(): - logger.error('CLP_HOME does not exist.') + elif not _clp_home.exists(): + logger.error("CLP_HOME set to nonexistent path.") return None - return clp_home.resolve() + return _clp_home.resolve() -def load_bundled_python_lib_path(clp_home): - python_site_packages_path = clp_home / 'lib' / 'python3' / 'site-packages' +def load_bundled_python_lib_path(_clp_home): + python_site_packages_path = _clp_home / 'lib' / 'python3' / 'site-packages' if not python_site_packages_path.is_dir(): - logger.error('Failed to load python3 packages bundled with CLP.') - sys.exit(-1) + logger.error("Failed to load python3 packages bundled with CLP.") + return False + # Add packages to the front of the path sys.path.insert(0, str(python_site_packages_path)) + return True + clp_home = get_clp_home() -if clp_home is None: +if clp_home is None or not load_bundled_python_lib_path(clp_home): sys.exit(-1) -load_bundled_python_lib_path(clp_home) -from clp.package_utils import make_config_path_absolute -from clp_py_utils.clp_config import CLPConfig -from clp_py_utils.core import read_yaml_config_file import yaml +from clp.package_utils import CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH, validate_and_load_config_file +from clp_py_utils.clp_config import CLPConfig def decompress_paths(paths, list_path: pathlib.Path, clp_config: CLPConfig, archives_dir: pathlib.Path, logs_dir: pathlib.Path, extraction_dir: pathlib.Path): # Generate database config file for clp - db_config_file_path = logs_dir / f'decompress-db-config-{uuid.uuid4()}.yml' - db_config_file = open(db_config_file_path, 'w') - yaml.safe_dump(clp_config.database.get_clp_connection_params_and_type(), db_config_file) - db_config_file.close() + db_config_file_path = logs_dir / f'.decompress-db-config-{uuid.uuid4()}.yml' + with open(db_config_file_path, 'w') as f: + yaml.safe_dump(clp_config.database.get_clp_connection_params_and_type(True), f) decompression_cmd = [ str(clp_home / 'bin' / 'clp'), @@ -74,7 +75,7 @@ def decompress_paths(paths, list_path: pathlib.Path, clp_config: CLPConfig, ] files_to_decompress_list_path = None if list_path is not None: - decompression_cmd.append("-f") + decompression_cmd.append('-f') decompression_cmd.append(str(list_path)) elif len(paths) > 0: # Write paths to file @@ -86,15 +87,14 @@ def decompress_paths(paths, list_path: pathlib.Path, clp_config: CLPConfig, decompression_cmd.append('-f') decompression_cmd.append(str(files_to_decompress_list_path)) - proc = subprocess.run(decompression_cmd, close_fds=True) - return_code = proc.returncode + proc = subprocess.Popen(decompression_cmd) + return_code = proc.wait() if 0 != return_code: - logger.error(f'Decompression failed, return_code={return_code}') + logger.error(f"Decompression failed, return_code={return_code}") return return_code # Remove generated files if files_to_decompress_list_path is not None: - # Remove path list files_to_decompress_list_path.unlink() db_config_file_path.unlink() @@ -102,41 +102,38 @@ def decompress_paths(paths, list_path: pathlib.Path, clp_config: CLPConfig, def main(argv): - args_parser = argparse.ArgumentParser(description='Decompresses logs') - args_parser.add_argument('--config', '-c', required=True, help='CLP configuration file.') - args_parser.add_argument('paths', metavar='PATH', nargs='*', help='Paths to decompress.') - args_parser.add_argument('-f', '--files-from', help='Decompress all paths in the given list.') - args_parser.add_argument('-d', '--extraction-dir', metavar='DIR', help='Decompress files into DIR', default='.') + default_config_file_path = clp_home / CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH + + args_parser = argparse.ArgumentParser(description="Decompresses logs.") + args_parser.add_argument('--config', '-c', required=True, default=str(default_config_file_path), + help="CLP configuration file.") + args_parser.add_argument('paths', metavar='PATH', nargs='*', help="Paths to decompress.") + args_parser.add_argument('-f', '--files-from', help="Decompress all paths in the given list.") + args_parser.add_argument('-d', '--extraction-dir', metavar='DIR', help="Decompress files into DIR", default='.') parsed_args = args_parser.parse_args(argv[1:]) # Validate paths were specified using only one method if len(parsed_args.paths) > 0 and parsed_args.files_from is not None: - args_parser.error('Paths cannot be specified both on the command line and through a file.') - return -1 + args_parser.error("Paths cannot be specified both on the command line and through a file.") # Validate extraction directory extraction_dir = pathlib.Path(parsed_args.extraction_dir) if not extraction_dir.is_dir(): - logger.error(f'extraction-dir ({extraction_dir}) is not a valid directory.') + logger.error(f"extraction-dir ({extraction_dir}) is not a valid directory.") return -1 - # Load configuration - clp_config_file_path = pathlib.Path(parsed_args.config) + # Validate and load config file try: - clp_config = CLPConfig.parse_obj(read_yaml_config_file(clp_config_file_path)) - except FileNotFoundError: - logger.error(f'CLP config file not found at "{clp_config_file_path}"') + config_file_path = pathlib.Path(parsed_args.config) + clp_config = validate_and_load_config_file(config_file_path, default_config_file_path, clp_home) + clp_config.validate_archive_output_dir() + clp_config.validate_logs_dir() + except: + logger.exception("Failed to load config.") return -1 - except Exception as ex: - logger.error(ex) - return -1 - - logs_dir = make_config_path_absolute(clp_home, pathlib.Path(clp_config.logs_directory)) - archives_dir = make_config_path_absolute(clp_home, pathlib.Path(clp_config.archive_output.directory)) - return_code = \ - decompress_paths(parsed_args.paths, parsed_args.files_from, clp_config, archives_dir, logs_dir, extraction_dir) - return return_code + return decompress_paths(parsed_args.paths, parsed_args.files_from, clp_config, clp_config.archive_output.directory, + clp_config.logs_directory, extraction_dir) if '__main__' == __name__: diff --git a/components/package-template/src/sbin/native/search b/components/package-template/src/sbin/native/search index 8b1c49bd4..58c5e41a5 100755 --- a/components/package-template/src/sbin/native/search +++ b/components/package-template/src/sbin/native/search @@ -9,95 +9,92 @@ import uuid # Setup logging # Create logger -logger = logging.getLogger(__name__) +logger = logging.getLogger(__file__) logger.setLevel(logging.INFO) # Setup console logging logging_console_handler = logging.StreamHandler() -logging_formatter = logging.Formatter('%(asctime)s [%(levelname)s] [%(name)s] %(message)s') +logging_formatter = logging.Formatter("%(asctime)s [%(levelname)s] [%(name)s] %(message)s") logging_console_handler.setFormatter(logging_formatter) logger.addHandler(logging_console_handler) def get_clp_home(): - clp_home = None + # Determine CLP_HOME from an environment variable or this script's path + _clp_home = None if 'CLP_HOME' in os.environ: - clp_home = pathlib.Path(os.environ['CLP_HOME']) + _clp_home = pathlib.Path(os.environ['CLP_HOME']) else: for path in pathlib.Path(__file__).resolve().parents: if 'sbin' == path.name: - clp_home = path.parent + _clp_home = path.parent break - if clp_home is None: - logger.error('CLP_HOME is not set and could not be determined automatically.') + if _clp_home is None: + logger.error("CLP_HOME is not set and could not be determined automatically.") return None - elif not clp_home.exists(): - logger.error('CLP_HOME does not exist.') + elif not _clp_home.exists(): + logger.error("CLP_HOME set to nonexistent path.") return None - return clp_home.resolve() + return _clp_home.resolve() -def load_bundled_python_lib_path(clp_home): - python_site_packages_path = clp_home / 'lib' / 'python3' / 'site-packages' +def load_bundled_python_lib_path(_clp_home): + python_site_packages_path = _clp_home / 'lib' / 'python3' / 'site-packages' if not python_site_packages_path.is_dir(): - logger.error('Failed to load python3 packages bundled with CLP.') - sys.exit(-1) + logger.error("Failed to load python3 packages bundled with CLP.") + return False + # Add packages to the front of the path sys.path.insert(0, str(python_site_packages_path)) + return True + clp_home = get_clp_home() -if clp_home is None: +if clp_home is None or not load_bundled_python_lib_path(clp_home): sys.exit(-1) -load_bundled_python_lib_path(clp_home) -from clp.package_utils import make_config_path_absolute -from clp_py_utils.clp_config import CLPConfig -from clp_py_utils.core import read_yaml_config_file import yaml +from clp.package_utils import CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH, validate_and_load_config_file def main(argv): - args_parser = argparse.ArgumentParser(description='Searches the compressed logs.') - args_parser.add_argument('--config', '-c', required=True, help='CLP configuration file.') - args_parser.add_argument('wildcard_query', help='Wildcard query.') - args_parser.add_argument("--file-path", help="File to search.") + default_config_file_path = clp_home / CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH + + args_parser = argparse.ArgumentParser(description="Searches the compressed logs.") + args_parser.add_argument('--config', '-c', required=True, help="CLP configuration file.") + args_parser.add_argument('wildcard_query', help="Wildcard query.") + args_parser.add_argument('--file-path', help="File to search.") parsed_args = args_parser.parse_args(argv[1:]) - # Load configuration - clp_config_file_path = pathlib.Path(parsed_args.config) + # Validate and load config file try: - clp_config = CLPConfig.parse_obj(read_yaml_config_file(clp_config_file_path)) - except FileNotFoundError: - logger.error(f'CLP config file not found at "{clp_config_file_path}"') + config_file_path = pathlib.Path(parsed_args.config) + clp_config = validate_and_load_config_file(config_file_path, default_config_file_path, clp_home) + clp_config.validate_logs_dir() + except: + logger.exception("Failed to load config.") return -1 - except Exception as ex: - logger.error(ex) - return -1 - - logs_dir = make_config_path_absolute(clp_home, pathlib.Path(clp_config.logs_directory)) - archives_dir = make_config_path_absolute(clp_home, - pathlib.Path(clp_config.archive_output.directory)) # Generate database config file for clp - db_config_file_path = logs_dir / f'decompress-db-config-{uuid.uuid4()}.yml' + db_config_file_path = clp_config.logs_directory / f'decompress-db-config-{uuid.uuid4()}.yml' db_config_file = open(db_config_file_path, 'w') - yaml.safe_dump(clp_config.database.get_clp_connection_params_and_type(), db_config_file) + yaml.safe_dump(clp_config.database.get_clp_connection_params_and_type(True), db_config_file) db_config_file.close() - search_cmd = [ + cmd = [ str(clp_home / 'bin' / 'clg'), - str(archives_dir), parsed_args.wildcard_query, + str(clp_config.archive_output.directory), parsed_args.wildcard_query, '--db-config-file', str(db_config_file_path), ] if parsed_args.file_path is not None: - search_cmd.append(parsed_args.file_path) + cmd.append(parsed_args.file_path) - proc = subprocess.run(search_cmd, close_fds=True) - return_code = proc.returncode + proc = subprocess.Popen(cmd) + return_code = proc.wait() if 0 != return_code: - logger.error(f'Search failed, return_code={return_code}') + logger.error(f"Search failed, return_code={return_code}") return return_code # Remove generated files diff --git a/components/package-template/src/sbin/search b/components/package-template/src/sbin/search index 7a80fe1ba..3d8b303b1 100755 --- a/components/package-template/src/sbin/search +++ b/components/package-template/src/sbin/search @@ -5,109 +5,128 @@ import os import pathlib import subprocess import sys +import uuid # Setup logging # Create logger -log = logging.getLogger('clp') -log.setLevel(logging.DEBUG) +logger = logging.getLogger(__file__) +logger.setLevel(logging.DEBUG) # Setup console logging logging_console_handler = logging.StreamHandler() -logging_formatter = logging.Formatter('%(asctime)s [%(levelname)s] [%(name)s] %(message)s') +logging_formatter = logging.Formatter("%(asctime)s [%(levelname)s] [%(name)s] %(message)s") logging_console_handler.setFormatter(logging_formatter) -log.addHandler(logging_console_handler) +logger.addHandler(logging_console_handler) def get_clp_home(): - clp_home = None + # Determine CLP_HOME from an environment variable or this script's path + _clp_home = None if 'CLP_HOME' in os.environ: - clp_home = pathlib.Path(os.environ['CLP_HOME']) + _clp_home = pathlib.Path(os.environ['CLP_HOME']) else: for path in pathlib.Path(__file__).resolve().parents: if 'sbin' == path.name: - clp_home = path.parent + _clp_home = path.parent break - if clp_home is None: - log.error('CLP_HOME is not set and could not be determined automatically.') + if _clp_home is None: + logger.error("CLP_HOME is not set and could not be determined automatically.") return None - elif not clp_home.exists(): - log.error('CLP_HOME does not exist.') + elif not _clp_home.exists(): + logger.error("CLP_HOME set to nonexistent path.") return None - return clp_home.resolve() + return _clp_home.resolve() -def load_bundled_python_lib_path(clp_home): - python_site_packages_path = clp_home / 'lib' / 'python3' / 'site-packages' +def load_bundled_python_lib_path(_clp_home): + python_site_packages_path = _clp_home / 'lib' / 'python3' / 'site-packages' if not python_site_packages_path.is_dir(): - log.error('Failed to load python3 packages bundled with CLP.') - return -1 + logger.error("Failed to load python3 packages bundled with CLP.") + return False + # Add packages to the front of the path sys.path.insert(0, str(python_site_packages_path)) + return True + clp_home = get_clp_home() -if clp_home is None: +if clp_home is None or not load_bundled_python_lib_path(clp_home): sys.exit(-1) -load_bundled_python_lib_path(clp_home) -from clp.package_utils import check_env, CONTAINER_CLP_INSTALL_PREFIX -from clp_py_utils.core import read_yaml_config_file -from clp_py_utils.clp_package_config import CLPPackageConfig -from pydantic import ValidationError +import yaml +from clp.package_utils import \ + CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH, \ + CONTAINER_CLP_HOME, \ + generate_container_config, \ + validate_and_load_config_file, \ + validate_and_load_db_credentials_file def main(argv): - args_parser = argparse.ArgumentParser(description='Searches the compressed logs.') - args_parser.add_argument('--config', '-c', type=str, help='CLP package configuration file.') + default_config_file_path = clp_home / CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH + + args_parser = argparse.ArgumentParser(description="Searches the compressed logs.") + args_parser.add_argument('--config', '-c', default=str(default_config_file_path), + help="CLP package configuration file.") args_parser.add_argument('wildcard_query', help="Wildcard query.") args_parser.add_argument('--file-path', help="File to search.") parsed_args = args_parser.parse_args(argv[1:]) - # Infer config file path - try: - if not parsed_args.config: - # Did not provide a config file - default_clp_package_config_file = clp_home / 'etc' / 'clp-config.yaml' - if not default_clp_package_config_file.exists(): - raise FileNotFoundError - log.debug(f'Using default config file at {default_clp_package_config_file.relative_to(pathlib.Path.cwd())}') - package_config_file_path = default_clp_package_config_file - else: - # Provided a config file - package_config_file_path = pathlib.Path(parsed_args.config).resolve(strict=True) - except FileNotFoundError: - log.error('Did not provide a clp package config file or the specified config file does not exist.') - return - + # Validate and load config file try: - clp_package_config = CLPPackageConfig.parse_obj(read_yaml_config_file(package_config_file_path)) - except ValidationError as err: - log.error(err) - return - except Exception as ex: - # read_yaml_config_file already logs the parsing error inside - return - - clp_cluster_name = clp_package_config.cluster_name - try: - check_env(clp_cluster_name) - except EnvironmentError as ex: - logging.error(ex) + config_file_path = pathlib.Path(parsed_args.config) + clp_config = validate_and_load_config_file(config_file_path, default_config_file_path, clp_home) + clp_config.validate_logs_dir() + + # Validate and load necessary credentials + validate_and_load_db_credentials_file(clp_config, clp_home, True) + except: + logger.exception("Failed to load config.") return -1 - docker_exec_cmd = [ - 'docker', 'exec', - '--workdir', f'{CONTAINER_CLP_INSTALL_PREFIX}/clp', + container_name = f'clp-search-{str(uuid.uuid4())[-4:]}' + + container_clp_config, mounts = generate_container_config(clp_config, clp_home) + container_config_filename = f'.{container_name}-config.yml' + container_config_file_path_on_host = clp_config.logs_directory / container_config_filename + with open(container_config_file_path_on_host, 'w') as f: + yaml.safe_dump(container_clp_config.dump_to_primitive_dict(), f) + + container_start_cmd = [ + 'docker', 'run', + '-i', + '--rm', + '--network', 'host', + '-w', str(CONTAINER_CLP_HOME), '-u', f'{os.getuid()}:{os.getgid()}', - clp_cluster_name, - 'sbin/native/search', '--config', f'{CONTAINER_CLP_INSTALL_PREFIX}/.{clp_cluster_name}.yaml', - parsed_args.wildcard_query + '--name', container_name, + '--mount', str(mounts.clp_home), + ] + necessary_mounts = [ + mounts.logs_dir, + mounts.archives_output_dir, + ] + for mount in necessary_mounts: + if mount: + container_start_cmd.append('--mount') + container_start_cmd.append(str(mount)) + container_start_cmd.append(clp_config.execution_container) + + search_cmd = [ + str(CONTAINER_CLP_HOME / 'sbin' / 'native' / 'search'), + '--config', str(container_clp_config.logs_directory / container_config_filename), + parsed_args.wildcard_query, ] if parsed_args.file_path: - docker_exec_cmd.append('--file-path') - docker_exec_cmd.append(parsed_args.file_path) - subprocess.run(docker_exec_cmd) + search_cmd.append('--file-path') + search_cmd.append(parsed_args.file_path) + cmd = container_start_cmd + search_cmd + subprocess.run(cmd, check=True) + + # Remove generated files + container_config_file_path_on_host.unlink() return 0 diff --git a/components/package-template/src/sbin/start-clp b/components/package-template/src/sbin/start-clp index 23f924a6f..29f298480 100755 --- a/components/package-template/src/sbin/start-clp +++ b/components/package-template/src/sbin/start-clp @@ -4,439 +4,474 @@ import logging import multiprocessing import os import pathlib -import secrets import socket import subprocess import sys import time +import uuid # Setup logging # Create logger -log = logging.getLogger('clp') -log.setLevel(logging.INFO) +logger = logging.getLogger('clp') +logger.setLevel(logging.INFO) # Setup console logging logging_console_handler = logging.StreamHandler() -logging_formatter = logging.Formatter('%(asctime)s [%(levelname)s] [%(name)s] %(message)s') +logging_formatter = logging.Formatter("%(asctime)s [%(levelname)s] [%(name)s] %(message)s") logging_console_handler.setFormatter(logging_formatter) -log.addHandler(logging_console_handler) +logger.addHandler(logging_console_handler) def get_clp_home(): - clp_home = None + # Determine CLP_HOME from an environment variable or this script's path + _clp_home = None if 'CLP_HOME' in os.environ: - clp_home = pathlib.Path(os.environ['CLP_HOME']) + _clp_home = pathlib.Path(os.environ['CLP_HOME']) else: for path in pathlib.Path(__file__).resolve().parents: if 'sbin' == path.name: - clp_home = path.parent + _clp_home = path.parent break - if clp_home is None: - log.error('CLP_HOME is not set and could not be determined automatically.') + if _clp_home is None: + logger.error("CLP_HOME is not set and could not be determined automatically.") return None - elif not clp_home.exists(): - log.error('CLP_HOME does not exist.') + elif not _clp_home.exists(): + logger.error("CLP_HOME set to nonexistent path.") return None - return clp_home.resolve() + return _clp_home.resolve() -def load_bundled_python_lib_path(clp_home): - python_site_packages_path = clp_home / 'lib' / 'python3' / 'site-packages' +def load_bundled_python_lib_path(_clp_home): + python_site_packages_path = _clp_home / 'lib' / 'python3' / 'site-packages' if not python_site_packages_path.is_dir(): - log.error('Failed to load python3 packages bundled with CLP.') - return -1 + logger.error("Failed to load python3 packages bundled with CLP.") + return False + # Add packages to the front of the path sys.path.insert(0, str(python_site_packages_path)) + return True + clp_home = get_clp_home() -if clp_home is None: +if clp_home is None or not load_bundled_python_lib_path(clp_home): sys.exit(-1) -load_bundled_python_lib_path(clp_home) -from clp.package_utils import prepare_package_and_config, check_dependencies, CONTAINER_CLP_INSTALL_PREFIX -from clp_py_utils.core import read_yaml_config_file -from clp_py_utils.clp_package_config import CLPPackageConfig, ArchiveOutput as PackageArchiveOutput -from clp_py_utils.clp_config import Database, ArchiveOutput, CLPConfig, Scheduler, SchedulerQueue +import yaml +from clp.package_utils import \ + CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH, \ + CONTAINER_CLP_HOME, \ + DB_COMPONENT_NAME, \ + QUEUE_COMPONENT_NAME, \ + SCHEDULER_COMPONENT_NAME, \ + WORKER_COMPONENT_NAME, \ + check_dependencies, \ + container_exists, \ + CLPDockerMounts, \ + DockerMount, \ + DockerMountType, \ + generate_container_config, \ + validate_and_load_config_file, \ + validate_and_load_db_credentials_file, \ + validate_and_load_queue_credentials_file, \ + validate_db_config, \ + validate_queue_config, \ + validate_worker_config +from clp_py_utils.clp_config import CLPConfig + + +def append_docker_port_settings_for_host_ips(hostname: str, host_port: int, container_port: int, cmd: [str]): + # Note: We use a set because gethostbyname_ex can return the same IP twice for one hostname + for ip in set(socket.gethostbyname_ex(hostname)[2]): + cmd.append('-p') + cmd.append(f'{ip}:{host_port}:{container_port}') + +def wait_for_database_to_init(container_name: str, clp_config: CLPConfig, timeout: int): + # Try to connect to the database + begin_time = time.time() + container_exec_cmd = [ + 'docker', 'exec', + '-it', + container_name + ] + mysqladmin_cmd = [ + 'mysqladmin', 'ping', + '--silent', + '-h', '127.0.0.1', + '-u', str(clp_config.database.username), + f'--password={clp_config.database.password}' + ] + cmd = container_exec_cmd + mysqladmin_cmd + while True: + try: + subprocess.run(cmd, stdout=subprocess.DEVNULL, check=True) + return True + except subprocess.CalledProcessError: + if time.time() - begin_time > timeout: + break + time.sleep(1) -def provision_docker_network_bridge(clp_cluster_name: str): - cmd = ['docker', 'network', 'create', '--driver', 'bridge', clp_cluster_name] - log.info('Provision docker network bridge') - log.debug(' '.join(cmd)) - try: - subprocess.run(cmd, stdout=subprocess.PIPE, check=True) - except subprocess.CalledProcessError: - log.error(f'Cluster "{clp_cluster_name}" has already been provisioned.') - raise EnvironmentError + logger.error("Timeout while waiting for database to initialize.") + return False -def start_sql_db(cluster_name: str, clp_config: CLPConfig, host_data_directory: pathlib.Path, publish_ports: bool): - log.info(f'Starting scheduler {clp_config.database.type} database') +def start_db(instance_id: str, clp_config: CLPConfig, conf_dir: pathlib.Path): + logger.info("Starting database...") - persistent_storage_path = host_data_directory / 'db' - persistent_storage_path.mkdir(exist_ok=True, parents=True) + container_name = f'clp-{DB_COMPONENT_NAME}-{instance_id}' + if container_exists(container_name): + logger.info("Database already running.") + return - database_startup_cmd = [ - 'docker', 'run', '-d', - '--network', cluster_name, - '--hostname', f'{clp_config.database.host}', - '--name', f'{clp_config.database.host}', - '-v', f'{str(persistent_storage_path)}:/var/lib/mysql', + db_data_dir = clp_config.data_directory / DB_COMPONENT_NAME + db_logs_dir = clp_config.logs_directory / DB_COMPONENT_NAME + + validate_db_config(clp_config, db_data_dir, db_logs_dir) + + # Create directories + db_data_dir.mkdir(exist_ok=True, parents=True) + db_logs_dir.mkdir(exist_ok=True, parents=True) + + # Start container + mounts = [ + DockerMount(DockerMountType.BIND, conf_dir / 'mysql' / 'conf.d', pathlib.Path('/') / 'etc' / 'mysql' / 'conf.d', + True), + DockerMount(DockerMountType.BIND, db_data_dir, pathlib.Path('/') / 'var' / 'lib' / 'mysql'), + DockerMount(DockerMountType.BIND, db_logs_dir, pathlib.Path('/') / 'var' / 'log' / 'mysql'), + ] + cmd = [ + 'docker', 'run', + '-d', + '--rm', + '--name', container_name, '-e', f'MYSQL_ROOT_PASSWORD={clp_config.database.password}', '-e', f'MYSQL_USER={clp_config.database.username}', '-e', f'MYSQL_PASSWORD={clp_config.database.password}', - '-e', f'MYSQL_DATABASE=initial_database', - '-u', f'{os.getuid()}:{os.getgid()}' + '-e', f'MYSQL_DATABASE={clp_config.database.name}', + '-u', f'{os.getuid()}:{os.getgid()}', ] - if publish_ports: - database_startup_cmd.append('-p') - database_startup_cmd.append(f'{str(clp_config.database.port)}:{str(clp_config.database.port)}') + for mount in mounts: + cmd.append('--mount') + cmd.append(str(mount)) + append_docker_port_settings_for_host_ips(clp_config.database.host, clp_config.database.port, 3306, cmd) if 'mysql' == clp_config.database.type: - database_startup_cmd.append('mysql:8.0.23') + cmd.append('mysql:8.0.23') elif 'mariadb' == clp_config.database.type: - database_startup_cmd.append('mariadb:10.6.4-focal') - log.debug(' '.join(database_startup_cmd)) - try: - subprocess.run(database_startup_cmd, stdout=subprocess.PIPE, check=True) - except subprocess.CalledProcessError: - log.error(f'Unable to start "{clp_config.database.type}" inside docker') - raise EnvironmentError - - -def create_sql_db_tables(cluster_name: str, container_config_file_path: str): - # Initialize database tables - log.info('Initializing scheduler database tables') - database_table_creation_commands = [ - ['python3', f'{CONTAINER_CLP_INSTALL_PREFIX}/clp/lib/python3/site-packages/clp_py_utils/initialize-clp-metadata-db.py', - '--config', container_config_file_path], - ['python3', f'{CONTAINER_CLP_INSTALL_PREFIX}/clp/lib/python3/site-packages/clp_py_utils/initialize-orchestration-db.py', - '--config', container_config_file_path] + cmd.append('mariadb:10.6.4-focal') + subprocess.run(cmd, stdout=subprocess.DEVNULL, check=True) + + if not wait_for_database_to_init(container_name, clp_config, 30): + raise EnvironmentError("Database did not initialize in time") + + logger.info("Started database.") + + +def create_db_tables(instance_id: str, clp_config: CLPConfig, container_clp_config: CLPConfig, mounts: CLPDockerMounts): + logger.info("Creating database tables...") + + container_name = f'clp-db-table-creator-{instance_id}' + + # Create database config file + db_config_filename = f'{container_name}.yml' + db_config_file_path = clp_config.logs_directory / db_config_filename + with open(db_config_file_path, 'w') as f: + yaml.safe_dump(container_clp_config.database.dict(), f) + + clp_site_packages_dir = CONTAINER_CLP_HOME / 'lib' / 'python3' / 'site-packages' + container_start_cmd = [ + 'docker', 'run', + '-i', + '--network', 'host', + '--rm', + '--name', container_name, + '-e', f'PYTHONPATH={clp_site_packages_dir}', + '-u', f'{os.getuid()}:{os.getgid()}', + '--mount', str(mounts.clp_home), ] - for command in database_table_creation_commands: - docker_exec_cmd = ['docker', 'exec', '-it', - '-e', f'PYTHONPATH={CONTAINER_CLP_INSTALL_PREFIX}/clp/lib/python3/site-packages', - cluster_name] - docker_exec_cmd += command - log.debug(' '.join(docker_exec_cmd)) - max_attempts = 20 - for attempt in range(max_attempts + 1): - if attempt == max_attempts: - log.error('Unable to connect to the database with the provided credentials') - raise EnvironmentError - try: - subprocess.run(docker_exec_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True) - except subprocess.CalledProcessError: - log.debug('Waiting for database to be ready') - time.sleep(1) # database not ready - else: - break - log.debug('Scheduler database tables initialization completed') + necessary_mounts = [mounts.data_dir, mounts.logs_dir] + for mount in necessary_mounts: + if mount: + container_start_cmd.append('--mount') + container_start_cmd.append(str(mount)) + container_start_cmd.append(clp_config.execution_container) + + clp_py_utils_dir = clp_site_packages_dir / 'clp_py_utils' + create_tables_cmd = [ + 'python3', + str(clp_py_utils_dir / 'create-db-tables.py'), + '--config', str(container_clp_config.logs_directory / db_config_filename), + ] + + cmd = container_start_cmd + create_tables_cmd + logger.debug(' '.join(cmd)) + subprocess.run(cmd, stdout=subprocess.DEVNULL, check=True) + + db_config_file_path.unlink() + logger.info("Created database tables.") -def start_scheduler_queue(host_config_file_path: pathlib.Path, cluster_name: str, clp_config: CLPConfig, - host_logs_dir: pathlib.Path, publish_ports: bool): - log.info('Starting scheduler queue') + +def start_queue(instance_id: str, clp_config: CLPConfig): + logger.info("Starting queue...") + + container_name = f'clp-{QUEUE_COMPONENT_NAME}-{instance_id}' + if container_exists(container_name): + logger.info("Queue already running.") + return + + queue_logs_dir = clp_config.logs_directory / QUEUE_COMPONENT_NAME + validate_queue_config(clp_config, queue_logs_dir) log_filename = 'rabbitmq.log' # Generate config file + config_filename = f'{container_name}.conf' + host_config_file_path = clp_config.logs_directory / config_filename with open(host_config_file_path, 'w') as f: - f.write(f'listeners.tcp.default = {clp_config.scheduler_queue.port}\n') - f.write(f'default_user = {clp_config.scheduler_queue.username}\n') - f.write(f'default_pass = {clp_config.scheduler_queue.password}\n') - f.write(f'log.file = {log_filename}\n') + f.write(f"default_user = {clp_config.queue.username}\n") + f.write(f"default_pass = {clp_config.queue.password}\n") + f.write(f"log.file = {log_filename}\n") - # Create logs directory that can be mounted into rabbitmq container - host_rabbitmq_logs_dir = host_logs_dir / 'rabbitmq' - host_rabbitmq_logs_dir.mkdir(parents=True, exist_ok=True) + # Create directories + queue_logs_dir.mkdir(exist_ok=True, parents=True) # Start container rabbitmq_logs_dir = pathlib.Path('/') / 'var' / 'log' / 'rabbitmq' + mounts = [ + DockerMount(DockerMountType.BIND, host_config_file_path, + pathlib.Path('/') / 'etc' / 'rabbitmq' / 'rabbitmq.conf', True), + DockerMount(DockerMountType.BIND, queue_logs_dir, rabbitmq_logs_dir), + ] + rabbitmq_pid_file_path = pathlib.Path('/') / 'tmp' / 'rabbitmq.pid' cmd = [ 'docker', 'run', - '-di', - '--network', cluster_name, - '--hostname', clp_config.scheduler_queue.host, - '--name', f'{cluster_name}-rabbitmq', + '-d', + '--rm', + '--name', container_name, # Override RABBITMQ_LOGS since the image sets it to *only* log to stdout - '--env', f'RABBITMQ_LOGS={rabbitmq_logs_dir / log_filename}', - # Mount the config file into the container - '--mount', f'type=bind,src={host_config_file_path},dst=/etc/rabbitmq/rabbitmq.conf,readonly', - # Mount the logs directory in the container so logs are persisted on the host - '--mount', f'type=bind,src={host_rabbitmq_logs_dir},dst={rabbitmq_logs_dir}', + '-e', f'RABBITMQ_LOGS={rabbitmq_logs_dir / log_filename}', + '-e', f'RABBITMQ_PID_FILE={rabbitmq_pid_file_path}', '-u', f'{os.getuid()}:{os.getgid()}', ] - if publish_ports: - cmd.append('-p') - cmd.append(f'{clp_config.scheduler_queue.port}:{clp_config.scheduler_queue.port}') + append_docker_port_settings_for_host_ips(clp_config.queue.host, clp_config.queue.port, 5672, cmd) + for mount in mounts: + cmd.append('--mount') + cmd.append(str(mount)) cmd.append('rabbitmq:3.9.8') - log.debug(' '.join(cmd)) - try: - subprocess.run(cmd, stdout=subprocess.DEVNULL, check=True) - except subprocess.CalledProcessError: - logging.exception('Unable to start scheduler queue') - return False + subprocess.run(cmd, stdout=subprocess.DEVNULL, check=True) - return True + # Wait for queue to start up + cmd = [ + 'docker', 'exec', '-it', container_name, + 'rabbitmqctl', 'wait', str(rabbitmq_pid_file_path), + ] + subprocess.run(cmd, stdout=subprocess.DEVNULL, check=True) + + logger.info("Started queue.") + + +def start_scheduler(instance_id: str, clp_config: CLPConfig, container_clp_config: CLPConfig, mounts: CLPDockerMounts): + logger.info("Starting scheduler...") + + container_name = f'clp-{SCHEDULER_COMPONENT_NAME}-{instance_id}' + if container_exists(container_name): + logger.info("Scheduler already running.") + return + container_config_filename = f'{container_name}.yml' + container_config_file_path = clp_config.logs_directory / container_config_filename + with open(container_config_file_path, 'w') as f: + yaml.safe_dump(container_clp_config.dump_to_primitive_dict(), f) -def start_scheduler(cluster_name: str, clp_config: CLPConfig, container_config_file_path: str): - scheduler_startup_cmd = ['python3', '-u', '-m', 'job_orchestration.scheduler.scheduler', - '--config', container_config_file_path] - log.info('Starting scheduler service') - docker_exec_cmd = [ - 'docker', 'exec', '--detach', '--workdir', f'{CONTAINER_CLP_INSTALL_PREFIX}/clp', - '-e', f'PYTHONPATH={CONTAINER_CLP_INSTALL_PREFIX}/clp/lib/python3/site-packages', - '-e', f'BROKER_URL=amqp://{clp_config.scheduler_queue.username}:{clp_config.scheduler_queue.password}' - f'@{clp_config.scheduler_queue.host}:{clp_config.scheduler_queue.port}', - cluster_name + clp_site_packages_dir = CONTAINER_CLP_HOME / 'lib' / 'python3' / 'site-packages' + container_start_cmd = [ + 'docker', 'run', + '-di', + '--network', 'host', + '-w', str(CONTAINER_CLP_HOME), + '--rm', + '--name', container_name, + '-e', f'PYTHONPATH={clp_site_packages_dir}', + '-e', f'BROKER_URL=amqp://' + f'{container_clp_config.queue.username}:{container_clp_config.queue.password}@' + f'{container_clp_config.queue.host}:{container_clp_config.queue.port}', + '-u', f'{os.getuid()}:{os.getgid()}', + '--mount', str(mounts.clp_home), ] - docker_exec_cmd += scheduler_startup_cmd - log.debug(docker_exec_cmd) - try: - subprocess.run(docker_exec_cmd) - except subprocess.CalledProcessError: - log.error('Failed to start clp scheduler service') - raise EnvironmentError - - -def start_worker(cluster_name: str, clp_config: CLPConfig, num_cpus: int): - worker_startup_cmd = [f'{CONTAINER_CLP_INSTALL_PREFIX}/clp/lib/python3/site-packages/bin/celery', - '-A', 'job_orchestration.executor', 'worker', - '--concurrency', str(num_cpus), - '--loglevel', 'WARNING', - '-Q', 'compression'] - log.info("Starting CLP worker") - docker_exec_cmd = [ - 'docker', 'exec', '--detach', - '--workdir', f'{CONTAINER_CLP_INSTALL_PREFIX}/clp', - '-e', f'CLP_HOME={CONTAINER_CLP_INSTALL_PREFIX}/clp', - '-e', f'CLP_DATA_DIR={clp_config.data_directory}', - '-e', f'CLP_LOGS_DIR={clp_config.logs_directory}', - '-e', f'PYTHONPATH={CONTAINER_CLP_INSTALL_PREFIX}/clp/lib/python3/site-packages', - '-e', f'BROKER_URL=amqp://{clp_config.scheduler_queue.username}:{clp_config.scheduler_queue.password}' - f'@{clp_config.scheduler_queue.host}:{clp_config.scheduler_queue.port}', - '-e', f'RESULT_BACKEND=rpc://{clp_config.scheduler_queue.username}:{clp_config.scheduler_queue.password}' - f'@{clp_config.scheduler_queue.host}:{clp_config.scheduler_queue.port}', - cluster_name + necessary_mounts = [ + mounts.logs_dir, ] - docker_exec_cmd += worker_startup_cmd - log.debug(docker_exec_cmd) - try: - subprocess.run(docker_exec_cmd) - except subprocess.CalledProcessError: - log.error('Failed to start CLP worker') - raise EnvironmentError - - -def generate_default_package_config(package_config_file_path: pathlib.Path): - clp_package_config = CLPPackageConfig( - cluster_name='clp-mini-cluster', - archive_output=PackageArchiveOutput( - target_archive_size=268435456, # 256MB - target_dictionaries_size=33554432, # 32MB - target_encoded_file_size=268435456, # 256MB - target_segment_size=268435456 # 256MB - ) - ) - with open(package_config_file_path, 'w') as config_file: - config_file.write(clp_package_config.generate_package_config_file_content_with_comments()) + for mount in necessary_mounts: + if mount: + container_start_cmd.append('--mount') + container_start_cmd.append(str(mount)) + container_start_cmd.append(clp_config.execution_container) + + scheduler_cmd = [ + 'python3', '-u', '-m', + 'job_orchestration.scheduler.scheduler', + '--config', str(container_clp_config.logs_directory / container_config_filename), + ] + cmd = container_start_cmd + scheduler_cmd + subprocess.run(cmd, stdout=subprocess.DEVNULL, check=True) + + logger.info("Started scheduler.") + + +def start_worker(instance_id: str, clp_config: CLPConfig, container_clp_config: CLPConfig, num_cpus: int, + mounts: CLPDockerMounts): + logger.info("Starting worker...") + + container_name = f'clp-{WORKER_COMPONENT_NAME}-{instance_id}' + if container_exists(container_name): + logger.info("Worker already running.") + return + + validate_worker_config(clp_config) + + # Create necessary directories + clp_config.archive_output.directory.mkdir(parents=True, exist_ok=True) + + clp_site_packages_dir = CONTAINER_CLP_HOME / 'lib' / 'python3' / 'site-packages' + container_start_cmd = [ + 'docker', 'run', + '-di', + '--network', 'host', + '-w', str(CONTAINER_CLP_HOME), + '--rm', + '--name', container_name, + '-e', f'PYTHONPATH={clp_site_packages_dir}', + '-e', f'BROKER_URL=amqp://' + f'{container_clp_config.queue.username}:{container_clp_config.queue.password}@' + f'{container_clp_config.queue.host}:{container_clp_config.queue.port}', + '-e', f'RESULT_BACKEND=rpc://' + f'{container_clp_config.queue.username}:{container_clp_config.queue.password}' + f'@{container_clp_config.queue.host}:{container_clp_config.queue.port}', + '-e', f'CLP_HOME={CONTAINER_CLP_HOME}', + '-e', f'CLP_DATA_DIR={container_clp_config.data_directory}', + '-e', f'CLP_ARCHIVE_OUTPUT_DIR={container_clp_config.archive_output.directory}', + '-e', f'CLP_LOGS_DIR={container_clp_config.logs_directory}', + '-u', f'{os.getuid()}:{os.getgid()}', + '--mount', str(mounts.clp_home), + ] + necessary_mounts = [ + mounts.data_dir, + mounts.logs_dir, + mounts.archives_output_dir, + mounts.input_logs_dir, + ] + for mount in necessary_mounts: + if mount: + container_start_cmd.append('--mount') + container_start_cmd.append(str(mount)) + container_start_cmd.append(clp_config.execution_container) + + worker_cmd = [ + str(clp_site_packages_dir / 'bin' / 'celery'), + '-A', + 'job_orchestration.executor', + 'worker', + '--concurrency', str(num_cpus), + '--loglevel', 'WARNING', + '-Q', 'compression', + ] + cmd = container_start_cmd + worker_cmd + subprocess.run(cmd, stdout=subprocess.DEVNULL, check=True) + + logger.info("Started worker.") def main(argv): - args_parser = argparse.ArgumentParser(description='Startup script for CLP') - args_parser.add_argument('--uncompressed-logs-dir', type=str, required=True, - help='The directory containing uncompressed logs.') - args_parser.add_argument('--config', '-c', type=str, help='CLP package configuration file.') - args_parser.add_argument('--num-cpus', type=int, default=0, - help='Number of logical CPU cores to use for compression') - args_parser.add_argument('--publish-ports', action='store_true', help='Publish container ports to the host port') - args_parser.add_argument('--start-scheduler-only', action='store_true', help='Start only scheduler service') - args_parser.add_argument('--start-worker-only', action='store_true', help='Start only worker service') + default_config_file_path = clp_home / CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH + + args_parser = argparse.ArgumentParser(description="Starts CLP") + args_parser.add_argument('--config', '-c', default=str(default_config_file_path), + help="CLP package configuration file.") + + component_args_parser = args_parser.add_subparsers(dest='component_name') + component_args_parser.add_parser(DB_COMPONENT_NAME) + component_args_parser.add_parser(QUEUE_COMPONENT_NAME) + component_args_parser.add_parser(SCHEDULER_COMPONENT_NAME) + worker_args_parser = component_args_parser.add_parser(WORKER_COMPONENT_NAME) + worker_args_parser.add_argument('--num-cpus', type=int, default=0, + help="Number of logical CPU cores to use for compression") parsed_args = args_parser.parse_args(argv[1:]) - # Check required system dependencies + if parsed_args.component_name: + component_name = parsed_args.component_name + else: + component_name = "" + try: check_dependencies() - except EnvironmentError as ex: - log.error(ex) - return + except: + logger.exception("Dependency checking failed.") + return -1 - # Infer components to enable - startup_component_count = parsed_args.start_scheduler_only + parsed_args.start_worker_only - if startup_component_count > 1: - log.error('--start-scheduler-only and --start-worker-only are mutually exclusive') - return - if not parsed_args.start_scheduler_only and not parsed_args.start_worker_only: - need_to_start_scheduler = True - need_to_start_worker = True - else: - need_to_start_scheduler = parsed_args.start_scheduler_only - need_to_start_worker = parsed_args.start_worker_only + # Validate and load config file + try: + config_file_path = pathlib.Path(parsed_args.config) + clp_config = validate_and_load_config_file(config_file_path, default_config_file_path, clp_home) + + # Validate and load necessary credentials + if component_name in ['', DB_COMPONENT_NAME, SCHEDULER_COMPONENT_NAME]: + validate_and_load_db_credentials_file(clp_config, clp_home, True) + if component_name in ['', QUEUE_COMPONENT_NAME, SCHEDULER_COMPONENT_NAME, WORKER_COMPONENT_NAME]: + validate_and_load_queue_credentials_file(clp_config, clp_home, True) + + clp_config.validate_data_dir() + clp_config.validate_logs_dir() + except: + logger.exception("Failed to load config.") + return -1 + # Get the number of CPU cores to use + num_cpus = multiprocessing.cpu_count() + if WORKER_COMPONENT_NAME == component_name and parsed_args.num_cpus != 0: + num_cpus = parsed_args.num_cpus - # Infer number of CPU cores used for compression - num_cpus = parsed_args.num_cpus - if 0 == num_cpus: - num_cpus = multiprocessing.cpu_count() + container_clp_config, mounts = generate_container_config(clp_config, clp_home) - # Validate uncompressed-log-dir - uncompressed_log_dir = pathlib.Path(parsed_args.uncompressed_logs_dir).resolve() - if not (uncompressed_log_dir.exists() and uncompressed_log_dir.is_dir()): - log.error(f'The specified uncompressed log directory path is invalid: {uncompressed_log_dir}') - return + # Create necessary directories + clp_config.data_directory.mkdir(parents=True, exist_ok=True) + clp_config.logs_directory.mkdir(parents=True, exist_ok=True) - # Infer config file path try: - if not parsed_args.config: - # Did not provide a config file - default_clp_package_config_file = clp_home / 'etc' / 'clp-config.yaml' - if not default_clp_package_config_file.exists(): - log.info('Generating a default config file.') - generate_default_package_config(default_clp_package_config_file) - log.info(f'Using default config file at {default_clp_package_config_file.relative_to(pathlib.Path.cwd())}') - package_config_file_path = default_clp_package_config_file + # Create instance-id file + instance_id_file_path = clp_config.logs_directory / 'instance-id' + if instance_id_file_path.exists(): + with open(instance_id_file_path, 'r') as f: + instance_id = f.readline() else: - # Provided a config file - package_config_file_path = pathlib.Path(parsed_args.config).resolve(strict=True) - except FileNotFoundError: - log.error('Did not provide a clp package config file or the specified config file does not exist.') - return - - # Parse and validate config file path - try: - clp_package_config = CLPPackageConfig.parse_obj(read_yaml_config_file(package_config_file_path)) - - if need_to_start_scheduler: - # Generate a clp config from a clp package config (a reduced set of clp config) - # This config file will be used to start CLP - clp_config = CLPConfig( - input_logs_dfs_path=str(uncompressed_log_dir), - database=Database( - type='mariadb', - host=f'{clp_package_config.cluster_name}-db', - port=3306, - username='clp-user', - password=f'clp-{secrets.token_urlsafe(8)}', - name='initial_database' - ), - scheduler=Scheduler( - host=f'{clp_package_config.cluster_name}', - jobs_poll_delay=1 - ), - scheduler_queue=SchedulerQueue( - host=f'{clp_package_config.cluster_name}-queue', - port=5672, - username='clp-user', - password=f'clp-{secrets.token_urlsafe(8)}' - ), - archive_output=ArchiveOutput( - type='fs', - directory=f'var/data/{clp_package_config.cluster_name}/archives', - target_archive_size=clp_package_config.archive_output.target_archive_size, - target_dictionaries_size=clp_package_config.archive_output.target_dictionaries_size, - target_encoded_file_size=clp_package_config.archive_output.target_encoded_file_size, - target_segment_size=clp_package_config.archive_output.target_segment_size - ), - data_directory=f'var/data/{clp_package_config.cluster_name}', - logs_directory=f'var/log/{clp_package_config.cluster_name}' - ) - - # If ports are published, user wants to run CLP in distributed mode - # Host parameter will be the "host"'s hostname instead of docker network hostname - if parsed_args.publish_ports: - host_hostname = socket.gethostname() - clp_config.database.host = host_hostname - clp_config.scheduler.host = host_hostname + instance_id = str(uuid.uuid4())[-4:] + with open(instance_id_file_path, 'w') as f: + f.write(instance_id) + f.flush() + + conf_dir = clp_home / 'etc' + + # Start components + if '' == component_name or DB_COMPONENT_NAME == component_name: + start_db(instance_id, clp_config, conf_dir) + create_db_tables(instance_id, clp_config, container_clp_config, mounts) + if '' == component_name or QUEUE_COMPONENT_NAME == component_name: + start_queue(instance_id, clp_config) + if '' == component_name or SCHEDULER_COMPONENT_NAME == component_name: + start_scheduler(instance_id, clp_config, container_clp_config, mounts) + if '' == component_name or WORKER_COMPONENT_NAME == component_name: + start_worker(instance_id, clp_config, container_clp_config, num_cpus, mounts) except Exception as ex: - log.error(ex) - return + # Stop CLP + subprocess.run(['python3', str(clp_home / 'sbin' / 'stop-clp')], check=True) - try: - # Create temporary clp config file which we mount into the container - # Prepare package and initialize all required directories if necessary - # Note: config file is also updated with absolute path - docker_clp_home = pathlib.Path(CONTAINER_CLP_INSTALL_PREFIX) / 'clp' - container_clp_config_file_name = f'.{clp_package_config.cluster_name}.yaml' - host_config_file_path = clp_home / container_clp_config_file_name - container_config_file_path = f'{CONTAINER_CLP_INSTALL_PREFIX}/{container_clp_config_file_name}' - - # Persist config file used for container - if not host_config_file_path.exists() and need_to_start_scheduler: - host_data_directory, host_log_directory, host_archive_out_directory, clp_config = \ - prepare_package_and_config(clp_config, clp_home, docker_clp_home) - with open(host_config_file_path, 'w') as config_file: - config_file.write(clp_config.generate_config_file_content_with_comments()) + if type(ex) == ValueError: + logger.error(f"Failed to start CLP: {ex}") else: - try: - clp_config = CLPConfig.parse_obj(read_yaml_config_file(host_config_file_path)) - host_data_directory = clp_home / pathlib.Path(clp_config.data_directory).relative_to(docker_clp_home) - host_log_directory = clp_home / pathlib.Path(clp_config.logs_directory).relative_to(docker_clp_home) - host_archive_out_directory = \ - clp_home / pathlib.Path(clp_config.archive_output.directory).relative_to(docker_clp_home) - except Exception as ex: - log.error(ex) - return - - # Setup basic networking infrastructure - provision_docker_network_bridge(clp_package_config.cluster_name) - - if need_to_start_scheduler: - # Optimize, start database as early as possible (slow process) - log.info('Starting CLP scheduler') - log.debug('Starting CLP scheduler database service') - start_sql_db(clp_package_config.cluster_name, clp_config, host_data_directory, parsed_args.publish_ports) - - # Start execution environment - clp_execution_env_container = 'ghcr.io/y-scope/clp/clp-execution-x86-ubuntu-focal:main' - clp_execution_env_startup_cmd = [ - 'docker', 'run', '-di', - '--network', clp_package_config.cluster_name, - '--hostname', f'{clp_package_config.cluster_name}', - '--name', f'{clp_package_config.cluster_name}', - '-v', f'{clp_home}:{CONTAINER_CLP_INSTALL_PREFIX}/clp', - '-v', f'{uncompressed_log_dir}:{uncompressed_log_dir}', - '-u', f'{os.getuid()}:{os.getgid()}', - ] - - # Mount data, logs, archive output directory if it is outside of the package - if not clp_config.data_directory.startswith(f'{CONTAINER_CLP_INSTALL_PREFIX}/clp'): - clp_execution_env_startup_cmd.append('-v') - clp_execution_env_startup_cmd.append(f'{host_data_directory}:{clp_config.data_directory}') - if not clp_config.logs_directory.startswith(f'{CONTAINER_CLP_INSTALL_PREFIX}/clp'): - clp_execution_env_startup_cmd.append('-v') - clp_execution_env_startup_cmd.append(f'{host_log_directory}:{clp_config.logs_directory}') - if not clp_config.archive_output.directory.startswith(f'{CONTAINER_CLP_INSTALL_PREFIX}/clp'): - clp_execution_env_startup_cmd.append('-v') - clp_execution_env_startup_cmd.append(f'{host_archive_out_directory}:{clp_config.archive_output.directory}') - clp_execution_env_startup_cmd.append(clp_execution_env_container) - log.debug(' '.join(clp_execution_env_startup_cmd)) - subprocess.run(clp_execution_env_startup_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=True) - - # Copy config file into container - copy_cmd = ['docker', 'cp', str(host_config_file_path), - f'{clp_package_config.cluster_name}:{container_config_file_path}'] - log.debug(' '.join(copy_cmd)) - subprocess.run(copy_cmd) - - if need_to_start_scheduler: - rabbitmq_config_file_path = clp_home / '.rabbitmq.conf' - if not start_scheduler_queue(rabbitmq_config_file_path, clp_package_config.cluster_name, clp_config, - host_log_directory, parsed_args.publish_ports): - return -1 - create_sql_db_tables(clp_package_config.cluster_name, container_config_file_path) - start_scheduler(clp_package_config.cluster_name, clp_config, container_config_file_path) - if need_to_start_worker: - start_worker(clp_package_config.cluster_name, clp_config, num_cpus) - except subprocess.CalledProcessError as ex: - log.error(ex.stdout.decode('utf-8')) - log.error(f'Failed to provision "{clp_package_config.cluster_name}"') - except EnvironmentError as ex: - log.error(ex) - log.error(f'Failed to provision "{clp_package_config.cluster_name}"') + logger.exception("Failed to start CLP.") + return -1 return 0 diff --git a/components/package-template/src/sbin/stop-clp b/components/package-template/src/sbin/stop-clp index 0abbf493c..7c4f9cf68 100755 --- a/components/package-template/src/sbin/stop-clp +++ b/components/package-template/src/sbin/stop-clp @@ -1,7 +1,5 @@ #!/usr/bin/env python3 - import argparse -import json import logging import os import pathlib @@ -10,113 +8,150 @@ import sys # Setup logging # Create logger -log = logging.getLogger('clp') -log.setLevel(logging.INFO) +logger = logging.getLogger('clp') +logger.setLevel(logging.INFO) # Setup console logging logging_console_handler = logging.StreamHandler() logging_formatter = logging.Formatter('%(asctime)s [%(levelname)s] [%(name)s] %(message)s') logging_console_handler.setFormatter(logging_formatter) -log.addHandler(logging_console_handler) +logger.addHandler(logging_console_handler) def get_clp_home(): - clp_home = None + # Determine CLP_HOME from an environment variable or this script's path + _clp_home = None if 'CLP_HOME' in os.environ: - clp_home = pathlib.Path(os.environ['CLP_HOME']) + _clp_home = pathlib.Path(os.environ['CLP_HOME']) else: for path in pathlib.Path(__file__).resolve().parents: if 'sbin' == path.name: - clp_home = path.parent + _clp_home = path.parent break - if clp_home is None: - log.error('CLP_HOME is not set and could not be determined automatically.') + if _clp_home is None: + logger.error("CLP_HOME is not set and could not be determined automatically.") return None - elif not clp_home.exists(): - log.error('CLP_HOME does not exist.') + elif not _clp_home.exists(): + logger.error("CLP_HOME set to nonexistent path.") return None - return clp_home.resolve() + return _clp_home.resolve() -def load_bundled_python_lib_path(clp_home): - python_site_packages_path = clp_home / 'lib' / 'python3' / 'site-packages' +def load_bundled_python_lib_path(_clp_home): + python_site_packages_path = _clp_home / 'lib' / 'python3' / 'site-packages' if not python_site_packages_path.is_dir(): - log.error('Failed to load python3 packages bundled with CLP.') - return -1 + logger.error("Failed to load python3 packages bundled with CLP.") + return False + # Add packages to the front of the path sys.path.insert(0, str(python_site_packages_path)) + return True + clp_home = get_clp_home() -if clp_home is None: +if clp_home is None or not load_bundled_python_lib_path(clp_home): sys.exit(-1) -load_bundled_python_lib_path(clp_home) - -from clp_py_utils.core import read_yaml_config_file -from clp_py_utils.clp_package_config import CLPPackageConfig -from pydantic import ValidationError +from clp.package_utils import \ + CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH, \ + DB_COMPONENT_NAME, \ + QUEUE_COMPONENT_NAME, \ + SCHEDULER_COMPONENT_NAME, \ + WORKER_COMPONENT_NAME, \ + container_exists, \ + validate_and_load_config_file, \ + validate_and_load_db_credentials_file, \ + validate_and_load_queue_credentials_file -def inspect_docker_network_bridge(clp_cluster_name: str): - cmd = ['docker', 'network', 'inspect', clp_cluster_name] - log.info('Inspecting docker network bridge') - log.debug(' '.join(cmd)) - proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - if 0 != proc.returncode: - log.error(f'Cluster "{clp_cluster_name}" has not been provisioned.') - raise EnvironmentError - bridge_bridge_specification = json.loads(proc.stdout.decode('utf-8'))[0] +def stop_container(container_name: str): + if not container_exists(container_name): + return - return bridge_bridge_specification + logger.info(f"Stopping {container_name}...") + cmd = ['docker', 'stop', container_name] + subprocess.run(cmd, stdout=subprocess.DEVNULL, check=True) + logger.info(f"Stopped {container_name}.") def main(argv): - args_parser = argparse.ArgumentParser(description='Startup script for CLP') - args_parser.add_argument('--config', '-c', type=str, help='CLP package configuration file.') + default_config_file_path = clp_home / CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH + + args_parser = argparse.ArgumentParser(description="Stops CLP") + args_parser.add_argument('--config', '-c', default=str(default_config_file_path), + help="CLP package configuration file.") + + component_args_parser = args_parser.add_subparsers(dest='component_name') + component_args_parser.add_parser(DB_COMPONENT_NAME) + component_args_parser.add_parser(QUEUE_COMPONENT_NAME) + component_args_parser.add_parser(SCHEDULER_COMPONENT_NAME) + component_args_parser.add_parser(WORKER_COMPONENT_NAME) + parsed_args = args_parser.parse_args(argv[1:]) - # Infer config file path - try: - if not parsed_args.config: - # Did not provide a config file - default_clp_package_config_file = clp_home / 'etc' / 'clp-config.yaml' - if not default_clp_package_config_file.exists(): - raise FileNotFoundError - log.info(f'Using default config file at {default_clp_package_config_file.relative_to(pathlib.Path.cwd())}') - package_config_file_path = default_clp_package_config_file - else: - # Provided a config file - package_config_file_path = pathlib.Path(parsed_args.config).resolve(strict=True) - except FileNotFoundError: - log.error('Did not provide a clp package config file or the specified config file does not exist.') - return + if parsed_args.component_name: + component_name = parsed_args.component_name + else: + component_name = "" + # Validate and load config file try: - clp_package_config = CLPPackageConfig.parse_obj(read_yaml_config_file(package_config_file_path)) - except ValidationError as err: - log.error(err) - return - except Exception as ex: - # read_yaml_config_file already logs the parsing error inside - return + config_file_path = pathlib.Path(parsed_args.config) + clp_config = validate_and_load_config_file(config_file_path, default_config_file_path, clp_home) + + # Validate and load necessary credentials + if component_name in ['', DB_COMPONENT_NAME]: + validate_and_load_db_credentials_file(clp_config, clp_home, False) + if component_name in ['', QUEUE_COMPONENT_NAME, SCHEDULER_COMPONENT_NAME, WORKER_COMPONENT_NAME]: + validate_and_load_queue_credentials_file(clp_config, clp_home, False) + except: + logger.exception("Failed to load config.") + return -1 - clp_cluster_name = clp_package_config.cluster_name try: - bridge_bridge_specification = inspect_docker_network_bridge(clp_cluster_name) - for container_id in bridge_bridge_specification['Containers']: - # Stop and remove container - log.debug(f'Removing container {container_id}') - subprocess.run(['docker', 'stop', container_id], stdout=subprocess.DEVNULL) - subprocess.run(['docker', 'rm', container_id], stdout=subprocess.DEVNULL) - log.debug(f'Removing docker network bridge {clp_cluster_name}') - subprocess.run(['docker', 'network', 'rm', clp_cluster_name], stdout=subprocess.DEVNULL) - except EnvironmentError: - log.error(f'Failed to decommission "{clp_cluster_name}"') - else: - log.info(f'Successfully decommissioned "{clp_cluster_name}"') + # Read instance ID from file + logs_dir = clp_config.logs_directory + instance_id_file_path = logs_dir / 'instance-id' + if not (logs_dir.exists() and logs_dir.is_dir() and instance_id_file_path.exists()): + # No instance ID file, so nothing to do + return 0 + with open(instance_id_file_path, 'r') as f: + instance_id = f.readline() + + if '' == component_name or WORKER_COMPONENT_NAME == component_name: + stop_container(f'clp-{WORKER_COMPONENT_NAME}-{instance_id}') + if '' == component_name or SCHEDULER_COMPONENT_NAME == component_name: + container_name = f'clp-{SCHEDULER_COMPONENT_NAME}-{instance_id}' + stop_container(container_name) + + container_config_file_path = logs_dir / f'{container_name}.yml' + if container_config_file_path.exists(): + container_config_file_path.unlink() + if '' == component_name or QUEUE_COMPONENT_NAME == component_name: + container_name = f'clp-{QUEUE_COMPONENT_NAME}-{instance_id}' + stop_container(container_name) + + queue_config_file_path = logs_dir / f'{container_name}.conf' + if queue_config_file_path.exists(): + queue_config_file_path.unlink() + if '' == component_name or DB_COMPONENT_NAME == component_name: + stop_container(f'clp-db-{instance_id}') + + if '' == component_name: + # NOTE: We can only remove the instance ID file if all containers have been stopped. + # Currently, we only remove the instance file when all containers are stopped at once. + # If a single container is stopped, it's expensive to check if the others are running, + # so instead we don't remove the instance file. In the worst case, a user will have to + # remove it manually. + instance_id_file_path.unlink() + except: + logger.exception("Failed to stop CLP.") + return -1 + + return 0 if '__main__' == __name__: - main(sys.argv) + sys.exit(main(sys.argv)) diff --git a/tools/packager/install-scripts/install-python-component.sh b/tools/packager/install-scripts/install-python-component.sh index d40bf6885..430ca9454 100755 --- a/tools/packager/install-scripts/install-python-component.sh +++ b/tools/packager/install-scripts/install-python-component.sh @@ -1,8 +1,11 @@ #!/bin/bash +# Exit on error +set -e + cUsage="Usage: ${BASH_SOURCE[0]} " if [ "$#" -lt 1 ] ; then - echo $cUsage + echo "$cUsage" exit 1 fi component_name=$1 @@ -10,9 +13,26 @@ python_package_name=${component_name//-/_} echo "Installing ${component_name}" -cd ${WORKING_DIR}/${component_name} +cd "${WORKING_DIR}/${component_name}" || exit 1 + +num_reqs_processed=0 +while IFS= read -r -d '' req ; do + if [ -z "$req" ] ; then + continue + fi + + PIP_CACHE_DIR=${CACHE_DIR} pip3 install \ + -c constraints.txt \ + --target "${WORKING_DIR}/${ARTIFACT_NAME}/lib/python3/site-packages" \ + "$req" & -PIP_CACHE_DIR=${CACHE_DIR} xargs --max-args=1 --max-procs=16 \ - pip install --target ${WORKING_DIR}/${ARTIFACT_NAME}/lib/python3/site-packages < requirements.txt + (( ++num_reqs_processed )) + if (( num_reqs_processed % BUILD_PARALLELISM == 0)) ; then + # Wait after starting every $BUILD_PARALLELISM jobs + wait + fi +done < <(sed 's/#.*//' requirements.txt | tr '\n' '\0') +# Wait for remaining jobs to finish +wait -cp -R ${python_package_name} ${WORKING_DIR}/${ARTIFACT_NAME}/lib/python3/site-packages +cp -R "${python_package_name}" "${WORKING_DIR}/${ARTIFACT_NAME}/lib/python3/site-packages"