From f52c770a6dc5ed4d31ea7643aaa2057b0040d97e Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Tue, 4 Jun 2024 22:59:50 +0000 Subject: [PATCH 01/57] a --- composer/callbacks/checkpoint_saver_v2.py | 567 ++++++++++++++++++++++ 1 file changed, 567 insertions(+) create mode 100644 composer/callbacks/checkpoint_saver_v2.py diff --git a/composer/callbacks/checkpoint_saver_v2.py b/composer/callbacks/checkpoint_saver_v2.py new file mode 100644 index 0000000000..463d58382b --- /dev/null +++ b/composer/callbacks/checkpoint_saver_v2.py @@ -0,0 +1,567 @@ +# Copyright 2022 MosaicML Composer authors +# SPDX-License-Identifier: Apache-2.0 + +"""Callback to save checkpoints during training.""" + +from __future__ import annotations + +import logging +import os +import pathlib +import shutil +import tempfile +import textwrap +from pathlib import Path +from typing import Any, Callable, Optional, Union + +from composer.core import Callback, Event, State, Time, Timestamp +from composer.loggers import Logger, MLFlowLogger +from composer.utils import ( + FORMAT_NAME_WITH_DIST_AND_TIME_TABLE, + FORMAT_NAME_WITH_DIST_TABLE, + PartialFilePath, + checkpoint, + create_interval_scheduler, + create_symlink_file, + dist, + ensure_folder_has_no_conflicting_files, + format_name_with_dist, + format_name_with_dist_and_time, + is_model_deepspeed, + partial_format, + RemoteUploader, + parse_uri, +) +from composer.utils.compression import get_compressor, is_compressed_pt +from composer.utils.object_store.mlflow_object_store import MLFLOW_EXPERIMENT_ID_FORMAT_KEY, MLFLOW_RUN_ID_FORMAT_KEY + +log = logging.getLogger(__name__) + +__all__ = ['CheckpointSaver'] + +_TORCH_DISTRIBUTED_CHECKPOINTS_METADATA_FILENAME = '.metadata' + + +class CheckpointSaver(Callback): # noqa: D101 + __doc__ = f"""Callback to save checkpoints. + + .. note:: + + If the ``folder`` argument is specified when constructing the :class:`.Trainer`, then the :class:`.CheckpointSaver` + callback need not be constructed manually. However, for advanced checkpointing use cases + (such as saving a weights-only checkpoint at one interval and the full training state + at another interval), instance(s) of this :class:`.CheckpointSaver` callback can be specified in the + ``callbacks`` argument of the :class:`.Trainer`, as shown in the example below. + + Example + + .. testsetup:: + + from composer.callbacks.checkpoint_saver import CheckpointSaver + + .. doctest:: + + >>> trainer = Trainer(..., callbacks=[ + ... CheckpointSaver( + ... folder='{{run_name}}/checkpoints', + ... filename="ep{{epoch}}-ba{{batch}}-rank{{rank}}", + ... latest_filename="latest-rank{{rank}}", + ... save_interval="1ep", + ... weights_only=False, + ... ) + ... ]) + + Args: + folder (str, optional): Format string for the save_folder where checkpoints will be saved. + Default: ``'{{run_name}}/checkpoints'``. + + The following format variables are available: + + {textwrap.indent(FORMAT_NAME_WITH_DIST_TABLE, prefix=' ')} + + .. note:: + + When training with multiple devices (i.e. GPUs), ensure that ``'{{rank}}'`` appears in the format. + Otherwise, multiple processes may attempt to write to the same file. + + filename (str, optional): A format string describing how to name checkpoints. + Default: ``'ep{{epoch}}-ba{{batch}}-rank{{rank}}.pt'``. + + Checkpoints will be saved approximately to ``{{folder}}/{{filename.format(...)}}``. + + The following format variables are available: + + {textwrap.indent(FORMAT_NAME_WITH_DIST_AND_TIME_TABLE, prefix=' ')} + + + .. note:: + + * By default, only the rank zero process will save a checkpoint file. + + * When using DeepSpeed, each rank will save a checkpoint file in tarball format. DeepSpeed + requires tarball format, as it saves model and optimizer states in separate files. + Ensure that ``'{{rank}}'`` appears within the ``filename``. Otherwise, multiple ranks + may attempt to write to the same file(s), leading to corrupted checkpoints. If no tarball file + extension is specified, ``'.tar'`` will be used. + + * To write to compressed tar files (regardless of whether DeepSpeed is enabled), set the file + extension to ``'.tar.gz'``, ``'.tgz'``, ``'.tar.bz2'``, or ``'.tar.lzma'`` (depending on the + desired compression algorithm). + + * To write to compressed pt files (when DeepSpeed is disabled), set the file extension to + ``'.pt.bz2'``, ``'.pt.gz'``, ``'.pt.lz4'``, ``'.pt.lzma'``, ``'.pt.lzo'``, ``'.pt.xz'``, + ``'.pt.zst'`` + (depending on the desired algorithm). You must have the corresponding CLI tool installed. + ``lz4`` is a good choice for a modest space saving while being very fast to compress. + + .. warning:: + + Using compression will block the training loop while checkpoints are being compressed and the + compressibility of checkpoints can vary significantly depending on your setup. As such, we + recommend saving checkpoints without compression by default. + + If you have the ``lz4`` command available on your system, you may want to try saving as ``.pt.lz4`` + as the overhead is minimal (usually less than a second) and the saved space can sometimes + be significant (1% - 40%). + + Consider the following scenario where: + + * The :attr:`~.State.run_name` is ``'awesome-training-run'`` + * The default ``folder='{{run_name}}/checkpoints'`` is used. + * The default ``name='ep{{epoch}}-ba{{batch}}-rank{{rank}}'`` is used. + * The current epoch count is ``1``. + * The current batch count is ``42``. + + When DeepSpeed is not being used, the rank zero process will save the checkpoint to + ``"awesome-training-run/checkpoints/ep1-ba42-rank0"``. + + When DeepSpeed is being used, each rank (process) will save checkpoints to:: + + awesome-training-run/checkpoints/ep1-ba42-rank0.tar + awesome-training-run/checkpoints/ep1-ba42-rank1.tar + awesome-training-run/checkpoints/ep1-ba42-rank2.tar + ... + + remote_file_name (str, optional): Format string for the checkpoint's remote file name. + Default: ``"{{run_name}}/checkpoints/ep{{epoch}}-ba{{batch}}-rank{{rank}}"``. + + After the checkpoint is saved, it will be periodically uploaded. + The remote file name will be determined by this format string. + + .. seealso:: :doc:`Uploading Files` for notes for file uploading. + + The same format variables for ``filename`` are available. + + Leading slashes (``'/'``) will be stripped. + + To disable uploading checkpoints, set this parameter to ``None``. + latest_filename (str, optional): A format string for a symlink which points to the last saved checkpoint. + Default: ``'latest-rank{{rank}}.pt'``. + + Symlinks will be created approximately at ``{{folder}}/{{latest_filename.format(...)}}``. + + The same format variables as for ``name`` are available. + + To disable symlinks, set this parameter to ``None``. + + Consider the following scenario, where: + + * The :attr:`~.State.run_name` is 'awesome-training-run' + * The default ``folder='{{run_name}}/checkpoints'`` is used. + * The default ``name='ep{{epoch}}-ba{{batch}}-rank{{rank}}'`` is used. + * The default ``latest_filename='latest-rank{{rank}}'`` is used. + * The current epoch count is ``1``. + * The current batch count is ``42``. + + When DeepSpeed is not being used, the rank zero process will save the checkpoint to + ``'awesome-training-run/checkpoints/ep1-ba42-rank0'``, + and a symlink will be created at + ``'awesome-training-run/checkpoints/latest-rank0' -> 'awesome-training-run/checkpoints/ep1-ba42-rank0'`` + + When DeepSpeed is being used, each rank (process) will save checkpoints to:: + + awesome-training-run/checkpoints/ep1-ba42-rank0.tar + awesome-training-run/checkpoints/ep1-ba42-rank1.tar + awesome-training-run/checkpoints/ep1-ba42-rank2.tar + ... + + Corresponding symlinks will be created at:: + + awesome-training-run/checkpoints/latest-rank0.tar -> awesome-training-run/checkpoints/ep1-ba42-rank0.tar + awesome-training-run/checkpoints/latest-rank1.tar -> awesome-training-run/checkpoints/ep1-ba42-rank1.tar + awesome-training-run/checkpoints/latest-rank2.tar -> awesome-training-run/checkpoints/ep1-ba42-rank2.tar + ... + latest_remote_file_name (str, optional): Format string for the checkpoint's latest symlink remote file name. + Default: ``'{{run_name}}/checkpoints/latest-rank{{rank}}"``. + + Whenever a new checkpoint is saved, a symlink is created or updated to point to the latest checkpoint's ``remote_file_name``. + The remote file name will be determined by this format string. This parameter has no effect if ``latest_filename`` or ``remote_file_name`` is ``None``. + + .. seealso:: :doc:`Uploading Files` for notes for file uploading. + + The same format variables for ``filename`` are available. + + Leading slashes (``'/'``) will be stripped. + + To disable symlinks in logger, set this parameter to ``None``. + + overwrite (bool, optional): Whether existing checkpoints should be overridden. + If ``False`` (the default), then the ``folder`` must not exist or must not contain checkpoints which may conflict + with the current run. Default: ``False``. + + save_interval (Time | str | int | (State, Event) -> bool): A :class:`.Time`, time-string, integer (in epochs), + or a function that takes (state, event) and returns a boolean whether a checkpoint should be saved. + + If an integer, checkpoints will be saved every n epochs. + If :class:`.Time` or a time-string, checkpoints will be saved according to this interval. + + .. seealso:: :func:`.checkpoint_periodically` + + If a function, then this function should take two arguments (:class:`.State`, :class:`.Event`). + The first argument will be the current state of the trainer, and the second argument will be + be :attr:`.Event.BATCH_CHECKPOINT` or :attr:`.Event.EPOCH_CHECKPOINT` (depending on the current training + progress). It should return ``True`` if a checkpoint should be saved given the current state and + event. + + num_checkpoints_to_keep (int, optional): The number of checkpoints to keep locally. The oldest checkpoints + are removed first. Set to ``-1`` to keep all checkpoints locally. Default: ``-1``. + + Checkpoints will be removed after they have been uploaded. For example, when this callback + is used in conjunction with the :class:`.RemoteUploaderDownloader`, set this + parameter to ``0`` to immediately delete checkpoints from the local disk after they have been uploaded to + the object store. + + This parameter only controls how many checkpoints are kept locally; checkpoints are not deleted from + remote file systems. + + weights_only (bool): If ``True``, save only the model weights instead of the entire training state. + This parameter must be ``False`` when using DeepSpeed. Default: ``False``. + + ignore_keys (list[str] | (dict) -> None, optional): A list of paths for the ``state_dict`` of the checkpoint, + which, when provided, will be ignored from the state_dict before a checkpoint is saved. Each path is a list + of strings specifying the keys to index into ``state_dict`` joined together with `/` as a separator (as PyTorch + uses `.` in parameter names). If a prefix is provided, all children are also ignored (see Example 2). + See :mod:`composer.core.state` for the structure of state_dict. + + Example 1: ``save_ignore_keys = ["state/model/layer1.weights", "state/model/layer1.bias"]`` would ignore + layer 1 weights and bias. + + Example 2: ``save_ignore_keys = ["state/model/*"]`` would ignore the entire model, which would have the same + effect as the previous example if there was only 1 layer. + + Example 3: ``save_ignore_keys = ["state/model/layer*.weights"]`` would ignore all weights in the model. + + Example 4: ``save_ignore_keys = ["state/rank_zero_seed", "rng"]`` would reset all randomness when + saving the checkpoint. + + If a callable, it should take one argument which is the state_dict. The callable is free to arbitrarily modify + the state_dict before it is loaded. + + (default: ``None``) + + Attributes: + saved_checkpoints (list[tuple[Timestamp, list[pathlib.Path]]]): The checkpoint timestamps and filepaths. + + This list contains tuples of the save timestamp and the checkpoint filepaths. + This list will have at most ``num_checkpoints_to_keep`` entries. The latest checkpoint + will be at the end. + + .. note:: + + When using DeepSpeed, the index of a filepath in each list corresponds to the global rank of + the process that wrote that file. Each filepath is valid only on the process's (rank's) node. + + Otherwise, when not using DeepSpeed, each sub-list will contain only one filepath since only rank zero + saves checkpoints. + """ + + def __init__( + self, + folder: Union[str, pathlib.Path] = '{run_name}/checkpoints', + filename: Union[str, pathlib.Path] = 'ep{epoch}-ba{batch}-rank{rank}.pt', + remote_file_name: Optional[Union[str, pathlib.Path] + ] = ('{run_name}/checkpoints/' + 'ep{epoch}-ba{batch}-rank{rank}.pt'), + latest_filename: Optional[Union[str, pathlib.Path]] = 'latest-rank{rank}.pt', + latest_remote_file_name: Optional[Union[str, pathlib.Path]] = '{run_name}/checkpoints/latest-rank{rank}.pt', + save_interval: Union[Time, str, int, Callable[[State, Event], bool]] = '1ep', + *, + overwrite: bool = False, + num_checkpoints_to_keep: int = -1, + weights_only: bool = False, + ignore_keys: Optional[Union[list[str], Callable[[dict], None]]] = None, + save_folder: Optional[str] = None, + ): + folder = str(folder) + filename = str(filename) + remote_file_name = str(remote_file_name) if remote_file_name is not None else None + latest_filename = str(latest_filename) if latest_filename is not None else None + latest_remote_file_name = str(latest_remote_file_name) if latest_remote_file_name is not None else None + + # want to fail early if a required CLI tool is missing to ensure no training time is wasted + for name in [filename, remote_file_name, latest_filename, latest_remote_file_name]: + if name is not None and is_compressed_pt(name): + get_compressor(name).check_exists() + + if not callable(save_interval): + save_interval = create_interval_scheduler(save_interval) + self.save_interval = save_interval + self.last_checkpoint_batch: Optional[Time] = None + + self.folder = folder + + self.filename = PartialFilePath(filename.lstrip('/'), folder) + self.latest_filename = PartialFilePath(latest_filename.lstrip('/'), folder) if latest_filename else None + self.remote_file_name = PartialFilePath(remote_file_name) if remote_file_name else None + self.latest_remote_file_name = PartialFilePath(latest_remote_file_name) if latest_remote_file_name else None + + self.overwrite = overwrite + self.saved_checkpoints: list[str] = [] + self.all_saved_checkpoints_to_timestamp: dict[str, Timestamp] = {} + self.num_checkpoints_to_keep = num_checkpoints_to_keep + self.weights_only = weights_only + self.ignore_keys = ignore_keys + + self.start_batch = None + + self.remote_uploader = None + backend, _, _ = parse_uri(save_folder) + self.remote_uploader_futures = [] + if backend != "": + self.remote_uploader = RemoteUploader( + remote_folder = save_folder, + ) + + + def init(self, state: State, logger: Logger) -> None: + # If MLFlowLogger is being used, format MLFlow-specific placeholders in the save folder and paths. + # Assumes that MLFlowLogger comes before CheckpointSaver in the list of loggers. + for destination in logger.destinations: + if isinstance(destination, MLFlowLogger): + mlflow_format_kwargs = { + MLFLOW_EXPERIMENT_ID_FORMAT_KEY: destination._experiment_id, + MLFLOW_RUN_ID_FORMAT_KEY: destination._run_id, + } + self.folder = partial_format(self.folder, **mlflow_format_kwargs) + + self.filename.folder = self.folder + if self.latest_filename is not None: + self.latest_filename.folder = self.folder + + # The remote paths have the placeholders in their filename rather than folder + if self.remote_file_name is not None: + self.remote_file_name.filename = partial_format( + self.remote_file_name.filename, + **mlflow_format_kwargs, + ) + if self.latest_remote_file_name is not None: + self.latest_remote_file_name.filename = partial_format( + self.latest_remote_file_name.filename, + **mlflow_format_kwargs, + ) + + break + + folder = format_name_with_dist(self.folder, state.run_name) + os.makedirs(folder, exist_ok=True) + + def fit_start(self, state: State, logger: Logger) -> None: + if not self.overwrite: + # checks that save_folder contains no files with a timestamp after the current timestamp, + # which has potential for future conflicts. + folder = format_name_with_dist(self.folder, state.run_name) + ensure_folder_has_no_conflicting_files(folder, self.filename.filename, state.timestamp) + + dist.barrier() # holds all ranks until folder check is done + + if is_model_deepspeed(state.model) and self.weights_only: + raise NotImplementedError('weights_only=True is not supported when using DeepSpeed.') + + self.start_batch = state.timestamp.batch + + def batch_checkpoint(self, state: State, logger: Logger): + assert callable(self.save_interval) + if self.save_interval(state, Event.BATCH_CHECKPOINT) and self.last_checkpoint_batch != state.timestamp.batch: + self._save_checkpoint( + state, + logger, + ) + + def epoch_checkpoint(self, state: State, logger: Logger): + assert callable(self.save_interval) + if self.save_interval(state, Event.EPOCH_CHECKPOINT) and self.last_checkpoint_batch != state.timestamp.batch: + self._save_checkpoint( + state, + logger, + ) + + def iteration_checkpoint(self, state: State, logger: Logger): + assert callable(self.save_interval) + if ( + self.save_interval(state, Event.ITERATION_CHECKPOINT) and + self.last_checkpoint_batch != state.timestamp.batch + ): + self._save_checkpoint( + state, + logger, + ) + + def state_dict(self) -> dict[str, Any]: + state_dict = {} + + all_checkpoints = [] + for save_filename, timestamp in self.all_saved_checkpoints_to_timestamp.items(): + all_checkpoints.append((save_filename, timestamp.state_dict())) + + state_dict['all_saved_checkpoints_to_timestamp'] = all_checkpoints + return state_dict + + def load_state_dict(self, state: dict[str, Any]): + if 'all_saved_checkpoints_to_timestamp' in state: + for (save_filename, timestamp_state) in state['all_saved_checkpoints_to_timestamp']: + load_timestamp = Timestamp() + load_timestamp.load_state_dict(timestamp_state) + self.all_saved_checkpoints_to_timestamp[save_filename] = load_timestamp + + def _save_checkpoint(self, state: State, logger: Logger): + self.last_checkpoint_batch = state.timestamp.batch + + is_deepspeed = is_model_deepspeed(state.model) + + if is_deepspeed and '{rank}' not in self.filename.filename: + raise ValueError(f'Save filename {self.filename.filename} must have {{rank}} for deepspeed.') + + # save the checkpoint to the filename + filename_with_placeholders = self.filename.format(state, is_deepspeed, keep_placeholders=True) + save_filename = checkpoint.get_save_filename(state, filename_with_placeholders) + # Store before saving so state_dict in checkpoint has reference to latest checkpoint (itself) + self.all_saved_checkpoints_to_timestamp[save_filename] = state.timestamp + + saved_path = checkpoint.save_checkpoint( + state=state, + filename=filename_with_placeholders, + weights_only=self.weights_only, + ignore_keys=self.ignore_keys, + ) + log.debug(f'Checkpoint locally saved to {saved_path}') + + if not saved_path: # not all ranks save + return + + metadata_local_file_path = None + if dist.get_global_rank() == 0 and state.fsdp_sharded_state_dict_enabled: + metadata_local_file_path = format_name_with_dist_and_time( + os.path.join(Path(saved_path).parent, _TORCH_DISTRIBUTED_CHECKPOINTS_METADATA_FILENAME), + state.run_name, + state.timestamp, + ) + + if self.latest_filename is not None and self.num_checkpoints_to_keep != 0: + symlink = self.latest_filename.format(state, is_deepspeed) + os.makedirs(os.path.dirname(symlink), exist_ok=True) + try: + os.remove(symlink) + except FileNotFoundError: + pass + # Sharded checkpoints for torch >2.0 use directories not files for load_paths + if state.fsdp_sharded_state_dict_enabled: + src_path = str(pathlib.Path(saved_path).parent) + else: + src_path = saved_path + this_rank_saves_symlinks = dist.get_global_rank() == 0 or not state.fsdp_sharded_state_dict_enabled + if this_rank_saves_symlinks: + os.symlink(os.path.relpath(src_path, os.path.dirname(symlink)), symlink) + + # if remote file name provided, upload the checkpoint + if self.remote_file_name is not None: + if state.fsdp_sharded_state_dict_enabled: + remote_file_name = self.remote_file_name.format( + state, + is_deepspeed, + keep_placeholders=True, + ).lstrip('/') + assert state.fsdp_config is not None + remote_prefix = state.fsdp_config.sharded_ckpt_prefix_dir + assert remote_prefix is not None + ckpt_filename = checkpoint._TORCH_DISTRIBUTED_CHECKPOINTS_FILENAME + remote_file_name = os.path.join(pathlib.Path(remote_file_name).parent, remote_prefix, ckpt_filename) + remote_file_name = format_name_with_dist_and_time(remote_file_name, state.run_name, state.timestamp) + # Upload metadata file. + # The metadata file contains info related to which shards are saved where. + if dist.get_global_rank() == 0 and state.fsdp_sharded_state_dict_enabled: + metadata_remote_file_name = format_name_with_dist_and_time( + os.path.join(Path(remote_file_name).parent, _TORCH_DISTRIBUTED_CHECKPOINTS_METADATA_FILENAME), + state.run_name, + state.timestamp, + ) + assert metadata_local_file_path is not None + self.remote_uploader_futures.append( + self.remote_uploader.upload_file_async( + remote_file_name=metadata_remote_file_name, + file_path=metadata_local_file_path, + overwrite=self.overwrite, + ) + ) + else: + remote_file_name = self.remote_file_name.format( + state, + is_deepspeed, + ).lstrip('/') + + log.debug(f'Uploading checkpoint to {remote_file_name}') + try: + self.remote_uploader_futures.append( + self.remote_uploader.upload_file_async( + remote_file_name=remote_file_name, + file_path=saved_path, + overwrite=self.overwrite, + ), + ) + except FileExistsError as e: + raise FileExistsError( + f'Uploading checkpoint failed with error: {e}. overwrite was set to {self.overwrite}. To overwrite checkpoints with Trainer, set save_overwrite to True.', + ) from e + + # symlinks stay the same with sharded checkpointing + if self.latest_remote_file_name is not None: + symlink_name = self.latest_remote_file_name.format( + state, + is_deepspeed, + ).lstrip('/') + '.symlink' + + # create and upload a symlink file + with tempfile.TemporaryDirectory() as tmpdir: + symlink_filename = os.path.join(tmpdir, 'latest.symlink') + # Sharded checkpoints for torch >2.0 use directories not files for load_paths + if state.fsdp_sharded_state_dict_enabled: + src_path = str(pathlib.Path(remote_file_name).parent) + else: + src_path = remote_file_name + log.debug(f'Creating symlink file {symlink_filename} -> {src_path}') + this_rank_saves_symlinks = dist.get_global_rank() == 0 or not state.fsdp_sharded_state_dict_enabled + if this_rank_saves_symlinks: + create_symlink_file(src_path, symlink_filename) + logger.upload_file( + remote_file_name=symlink_name, + file_path=symlink_filename, + overwrite=True, + ) + + self.saved_checkpoints.append(saved_path) + + if self.num_checkpoints_to_keep >= 0: + self._rotate_checkpoints(sharding_enabled=state.fsdp_sharded_state_dict_enabled) + + def _upload_symlink_file(self): + + def _rotate_checkpoints(self, sharding_enabled: bool = False): + + while len(self.saved_checkpoints) > self.num_checkpoints_to_keep: + prefix_dir = None + checkpoint_to_delete = self.saved_checkpoints.pop(0) + prefix_dir = str(Path(checkpoint_to_delete).parent) + if not sharding_enabled: + os.remove(checkpoint_to_delete) + else: + if dist.get_global_rank() == 0: + shutil.rmtree(prefix_dir) From 8ee836436176c307f6b4c1dabe134714abaf1828 Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Wed, 5 Jun 2024 09:23:49 -0700 Subject: [PATCH 02/57] a --- composer/callbacks/checkpoint_saver_v2.py | 56 ++++++++++++++--------- 1 file changed, 35 insertions(+), 21 deletions(-) diff --git a/composer/callbacks/checkpoint_saver_v2.py b/composer/callbacks/checkpoint_saver_v2.py index 463d58382b..a09c1edb54 100644 --- a/composer/callbacks/checkpoint_saver_v2.py +++ b/composer/callbacks/checkpoint_saver_v2.py @@ -11,8 +11,9 @@ import shutil import tempfile import textwrap +from concurrent.futures import Future from pathlib import Path -from typing import Any, Callable, Optional, Union +from typing import Any, Callable, Optional, Union, List, Tuple from composer.core import Callback, Event, State, Time, Timestamp from composer.loggers import Logger, MLFlowLogger @@ -326,7 +327,10 @@ def __init__( self.remote_uploader = None backend, _, _ = parse_uri(save_folder) - self.remote_uploader_futures = [] + self.remote_uploader_futures: List[List[Future]] = [] + self.symlink_file_tasks: List[Tuple(str, str)] = [] + self.this_rank_saves_remote_symlinks: bool = False + self.tmp_dir_for_symlink = tempfile.TemporaryDirectory() if backend != "": self.remote_uploader = RemoteUploader( remote_folder = save_folder, @@ -474,6 +478,7 @@ def _save_checkpoint(self, state: State, logger: Logger): # if remote file name provided, upload the checkpoint if self.remote_file_name is not None: + futures: List[Future] = [] if state.fsdp_sharded_state_dict_enabled: remote_file_name = self.remote_file_name.format( state, @@ -495,7 +500,7 @@ def _save_checkpoint(self, state: State, logger: Logger): state.timestamp, ) assert metadata_local_file_path is not None - self.remote_uploader_futures.append( + futures.append( self.remote_uploader.upload_file_async( remote_file_name=metadata_remote_file_name, file_path=metadata_local_file_path, @@ -510,7 +515,7 @@ def _save_checkpoint(self, state: State, logger: Logger): log.debug(f'Uploading checkpoint to {remote_file_name}') try: - self.remote_uploader_futures.append( + futures.append( self.remote_uploader.upload_file_async( remote_file_name=remote_file_name, file_path=saved_path, @@ -522,6 +527,8 @@ def _save_checkpoint(self, state: State, logger: Logger): f'Uploading checkpoint failed with error: {e}. overwrite was set to {self.overwrite}. To overwrite checkpoints with Trainer, set save_overwrite to True.', ) from e + self.remote_uploader_futures.append(futures) + # symlinks stay the same with sharded checkpointing if self.latest_remote_file_name is not None: symlink_name = self.latest_remote_file_name.format( @@ -530,29 +537,36 @@ def _save_checkpoint(self, state: State, logger: Logger): ).lstrip('/') + '.symlink' # create and upload a symlink file - with tempfile.TemporaryDirectory() as tmpdir: - symlink_filename = os.path.join(tmpdir, 'latest.symlink') - # Sharded checkpoints for torch >2.0 use directories not files for load_paths - if state.fsdp_sharded_state_dict_enabled: - src_path = str(pathlib.Path(remote_file_name).parent) - else: - src_path = remote_file_name - log.debug(f'Creating symlink file {symlink_filename} -> {src_path}') - this_rank_saves_symlinks = dist.get_global_rank() == 0 or not state.fsdp_sharded_state_dict_enabled - if this_rank_saves_symlinks: - create_symlink_file(src_path, symlink_filename) - logger.upload_file( - remote_file_name=symlink_name, - file_path=symlink_filename, - overwrite=True, - ) + symlink_filename = os.path.join(self.tmp_dir_for_symlink, f'latest.symlink.{len(saved_checkpoints)}') + # Sharded checkpoints for torch >2.0 use directories not files for load_paths + if state.fsdp_sharded_state_dict_enabled: + src_path = str(pathlib.Path(remote_file_name).parent) + else: + src_path = remote_file_name + log.debug(f'Creating symlink file {symlink_filename} -> {src_path}') + this_rank_saves_symlinks = dist.get_global_rank() == 0 or not state.fsdp_sharded_state_dict_enabled + if this_rank_saves_symlinks: + self.this_rank_saves_remote_symlinks = True + create_symlink_file(src_path, symlink_filename) + self.symlink_file_tasks.append((symlink_filename, symlink_name)) self.saved_checkpoints.append(saved_path) if self.num_checkpoints_to_keep >= 0: self._rotate_checkpoints(sharding_enabled=state.fsdp_sharded_state_dict_enabled) - def _upload_symlink_file(self): + def wait(self) -> None: + # Wait remote uploader futures and start to upload the latest symlink file if necessary + if self.this_rank_saves_remote_symlinks: + if len(self.remote_uploader_futures) != len(self.symlink_file_tasks): + raise RuntimeError(f'Expect len(remote_uploader_futures) == len(symlink_file_tasks), but got {len(self.remote_uploader_futures)} != {len(self.symlink_file_tasks)}') + for i in range(len(self.remote_uploader_futures)): + for future in self.remote_uploader_futures[i]: + future.result() + + # nccl commms , then upload symlink file + + def _rotate_checkpoints(self, sharding_enabled: bool = False): From 4fecdf63504a250565f0b28c996f38b368de95ff Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Thu, 6 Jun 2024 10:38:49 -0700 Subject: [PATCH 03/57] a --- composer/callbacks/checkpoint_saver_v2.py | 39 ++++++++++++++++++++--- composer/utils/remote_uploader.py | 1 + 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/composer/callbacks/checkpoint_saver_v2.py b/composer/callbacks/checkpoint_saver_v2.py index a09c1edb54..ad1a2196c2 100644 --- a/composer/callbacks/checkpoint_saver_v2.py +++ b/composer/callbacks/checkpoint_saver_v2.py @@ -427,7 +427,7 @@ def load_state_dict(self, state: dict[str, Any]): load_timestamp.load_state_dict(timestamp_state) self.all_saved_checkpoints_to_timestamp[save_filename] = load_timestamp - def _save_checkpoint(self, state: State, logger: Logger): + def _save_checkpoint(self, state: State, logger: Logger, wait_previous_remote_upload_tasks: bool=True): self.last_checkpoint_batch = state.timestamp.batch is_deepspeed = is_model_deepspeed(state.model) @@ -448,6 +448,12 @@ def _save_checkpoint(self, state: State, logger: Logger): ignore_keys=self.ignore_keys, ) log.debug(f'Checkpoint locally saved to {saved_path}') + + # Wait the previous upload tasks on all ranks + # self.wait() has dist.barrier, so it needs to be called + # on all ranks before any early return + if wait_previous_remote_upload_tasks: + self.wait() if not saved_path: # not all ranks save return @@ -478,6 +484,7 @@ def _save_checkpoint(self, state: State, logger: Logger): # if remote file name provided, upload the checkpoint if self.remote_file_name is not None: + futures: List[Future] = [] if state.fsdp_sharded_state_dict_enabled: remote_file_name = self.remote_file_name.format( @@ -560,12 +567,24 @@ def wait(self) -> None: if self.this_rank_saves_remote_symlinks: if len(self.remote_uploader_futures) != len(self.symlink_file_tasks): raise RuntimeError(f'Expect len(remote_uploader_futures) == len(symlink_file_tasks), but got {len(self.remote_uploader_futures)} != {len(self.symlink_file_tasks)}') + log.debug('Waiting for previous checkpoint files upload finish') for i in range(len(self.remote_uploader_futures)): for future in self.remote_uploader_futures[i]: future.result() - - # nccl commms , then upload symlink file - + log.debug(f'Current rank finished existing uploading tasks') + self.remote_uploader_futures = [] + + dist.barrier() + log.debug('All ranks finished existing checkpoint uploading tasks, starting symlink file upload if necessary') + if self.this_rank_saves_remote_symlinks and len(self.symlink_file_tasks) > 0: + # Only upload the last symlink file + symlink_local_filename, symlink_remote_filename = self.symlink_file_tasks[-1] + self.remote_uploader.upload_file_async( + remote_file_name=symlink_remote_filename, + file_path=symlink_local_filename, + overwrite=True, + ) + self.symlink_file_tasks = [] def _rotate_checkpoints(self, sharding_enabled: bool = False): @@ -579,3 +598,15 @@ def _rotate_checkpoints(self, sharding_enabled: bool = False): else: if dist.get_global_rank() == 0: shutil.rmtree(prefix_dir) + + def batch_end(self, state: State, logger: Logger) -> None: + del state, logger # unused + if self.remote_uploader is not None: + self.remote_uploader.check_workers() + + def post_close(self): + if self.remote_uploader is not None: + # Wait the uploading tasks to finish and start symlink file uploading + self.wait() + # Wait the symlink file upload to finish and close remote uploader + self.remote_uploader.wait_and_close() diff --git a/composer/utils/remote_uploader.py b/composer/utils/remote_uploader.py index c26c73a319..ace687602a 100644 --- a/composer/utils/remote_uploader.py +++ b/composer/utils/remote_uploader.py @@ -166,3 +166,4 @@ def wait_and_close(self): # make sure all workers are either running, or completed successfully self.wait() self.executor.shutdown(wait=True) + log.info('Finished all uploading tasks, closing RemoteUploader') From 7e53a3ba884209d37bc8ada0339bfdf1a579dade Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Thu, 6 Jun 2024 14:21:14 -0700 Subject: [PATCH 04/57] a --- composer/callbacks/__init__.py | 2 ++ composer/callbacks/checkpoint_saver_v2.py | 4 ++-- composer/trainer/trainer.py | 7 ++++--- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/composer/callbacks/__init__.py b/composer/callbacks/__init__.py index b876826e3c..c536054740 100644 --- a/composer/callbacks/__init__.py +++ b/composer/callbacks/__init__.py @@ -8,6 +8,7 @@ """ from composer.callbacks.activation_monitor import ActivationMonitor from composer.callbacks.checkpoint_saver import CheckpointSaver +from composer.callbacks.checkpoint_saver_v2 import CheckpointSaverCallback from composer.callbacks.early_stopper import EarlyStopper from composer.callbacks.eval_output_logging_callback import EvalOutputLogging from composer.callbacks.export_for_inference import ExportForInferenceCallback @@ -46,4 +47,5 @@ 'FreeOutputs', 'MemorySnapshot', 'OOMObserver', + 'CheckpointSaverCallback', ] diff --git a/composer/callbacks/checkpoint_saver_v2.py b/composer/callbacks/checkpoint_saver_v2.py index ad1a2196c2..1c42bf9a78 100644 --- a/composer/callbacks/checkpoint_saver_v2.py +++ b/composer/callbacks/checkpoint_saver_v2.py @@ -38,12 +38,12 @@ log = logging.getLogger(__name__) -__all__ = ['CheckpointSaver'] +__all__ = ['CheckpointSaverCallback'] _TORCH_DISTRIBUTED_CHECKPOINTS_METADATA_FILENAME = '.metadata' -class CheckpointSaver(Callback): # noqa: D101 +class CheckpointSaverCallback(Callback): # noqa: D101 __doc__ = f"""Callback to save checkpoints. .. note:: diff --git a/composer/trainer/trainer.py b/composer/trainer/trainer.py index cb42094f37..6823d46759 100644 --- a/composer/trainer/trainer.py +++ b/composer/trainer/trainer.py @@ -54,7 +54,7 @@ else: from torch.cuda.amp.grad_scaler import GradScaler, _refresh_per_optimizer_state # type: ignore -from composer.callbacks import CheckpointSaver, MemorySnapshot, OOMObserver, OptimizerMonitor +from composer.callbacks import CheckpointSaver, MemorySnapshot, OOMObserver, OptimizerMonitor, CheckpointSaverCallback from composer.core import ( Algorithm, AlgorithmPass, @@ -1415,7 +1415,7 @@ def __init__( self._checkpoint_saver = None latest_remote_file_name = None - _checkpoint_savers = [cb for cb in self.state.callbacks if isinstance(cb, CheckpointSaver)] + _checkpoint_savers = [cb for cb in self.state.callbacks if (isinstance(cb, CheckpointSaver) or isinstance(cb, CheckpointSaverCallback))] if len(_checkpoint_savers) >= 1: if len(_checkpoint_savers) > 1: log.info('Multiple CheckpointSaver provided as callbacks. Using the first one as reference.') @@ -1461,7 +1461,7 @@ def __init__( else: latest_remote_file_name = None - self._checkpoint_saver = CheckpointSaver( + self._checkpoint_saver = CheckpointSaverCallback( folder=folder, filename=save_filename, remote_file_name=remote_file_name, @@ -1472,6 +1472,7 @@ def __init__( ignore_keys=save_ignore_keys, save_interval=save_interval, num_checkpoints_to_keep=save_num_checkpoints_to_keep, + save_folder=save_folder, ) self.state.callbacks.append(self._checkpoint_saver) From f772e336ed4c226cd6a58eba02242b0deee203ab Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Thu, 6 Jun 2024 23:27:04 +0000 Subject: [PATCH 05/57] a --- composer/callbacks/checkpoint_saver_v2.py | 2 +- composer/trainer/trainer.py | 1 + composer/utils/__init__.py | 2 ++ composer/utils/remote_uploader.py | 2 +- 4 files changed, 5 insertions(+), 2 deletions(-) diff --git a/composer/callbacks/checkpoint_saver_v2.py b/composer/callbacks/checkpoint_saver_v2.py index 1c42bf9a78..f75e460cf8 100644 --- a/composer/callbacks/checkpoint_saver_v2.py +++ b/composer/callbacks/checkpoint_saver_v2.py @@ -544,7 +544,7 @@ def _save_checkpoint(self, state: State, logger: Logger, wait_previous_remote_up ).lstrip('/') + '.symlink' # create and upload a symlink file - symlink_filename = os.path.join(self.tmp_dir_for_symlink, f'latest.symlink.{len(saved_checkpoints)}') + symlink_filename = os.path.join(self.tmp_dir_for_symlink.name, f'latest.{len(self.saved_checkpoints)}.symlink') # Sharded checkpoints for torch >2.0 use directories not files for load_paths if state.fsdp_sharded_state_dict_enabled: src_path = str(pathlib.Path(remote_file_name).parent) diff --git a/composer/trainer/trainer.py b/composer/trainer/trainer.py index bf73e7d98a..04592dac09 100644 --- a/composer/trainer/trainer.py +++ b/composer/trainer/trainer.py @@ -1465,6 +1465,7 @@ def __init__( else: latest_remote_file_name = None + log.info(f"bigning debug useing the new saver") self._checkpoint_saver = CheckpointSaverCallback( folder=folder, filename=save_filename, diff --git a/composer/utils/__init__.py b/composer/utils/__init__.py index 9618d5f837..988fd4238e 100644 --- a/composer/utils/__init__.py +++ b/composer/utils/__init__.py @@ -74,6 +74,7 @@ UCObjectStore, ) from composer.utils.parallelism import FSDPConfig, ParallelismConfig, TPConfig, create_fsdp_config +from composer.utils.remote_uploader import RemoteUploader from composer.utils.retrying import retry from composer.utils.string_enum import StringEnum from composer.utils.warnings import VersionedDeprecationWarning @@ -156,4 +157,5 @@ 'ParallelismConfig', 'MLFLOW_EXPERIMENT_ID_FORMAT_KEY', 'MLFLOW_RUN_ID_FORMAT_KEY', + 'RemoteUploader', ] diff --git a/composer/utils/remote_uploader.py b/composer/utils/remote_uploader.py index ace687602a..ede4ea8b13 100644 --- a/composer/utils/remote_uploader.py +++ b/composer/utils/remote_uploader.py @@ -166,4 +166,4 @@ def wait_and_close(self): # make sure all workers are either running, or completed successfully self.wait() self.executor.shutdown(wait=True) - log.info('Finished all uploading tasks, closing RemoteUploader') + log.debug('Finished all uploading tasks, closing RemoteUploader') From 4e391a67e9fa97b889ce911ce38eff16606b3f60 Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Sat, 8 Jun 2024 17:50:46 +0000 Subject: [PATCH 06/57] a --- composer/callbacks/checkpoint_saver_v2.py | 35 ++++++++------- composer/trainer/trainer.py | 12 +++-- composer/utils/remote_uploader.py | 53 ++++++++++++++++++++--- 3 files changed, 76 insertions(+), 24 deletions(-) diff --git a/composer/callbacks/checkpoint_saver_v2.py b/composer/callbacks/checkpoint_saver_v2.py index f75e460cf8..2385dae8b3 100644 --- a/composer/callbacks/checkpoint_saver_v2.py +++ b/composer/callbacks/checkpoint_saver_v2.py @@ -13,7 +13,7 @@ import textwrap from concurrent.futures import Future from pathlib import Path -from typing import Any, Callable, Optional, Union, List, Tuple +from typing import Any, Callable, List, Optional, Tuple, Union from composer.core import Callback, Event, State, Time, Timestamp from composer.loggers import Logger, MLFlowLogger @@ -21,6 +21,7 @@ FORMAT_NAME_WITH_DIST_AND_TIME_TABLE, FORMAT_NAME_WITH_DIST_TABLE, PartialFilePath, + RemoteUploader, checkpoint, create_interval_scheduler, create_symlink_file, @@ -29,9 +30,8 @@ format_name_with_dist, format_name_with_dist_and_time, is_model_deepspeed, - partial_format, - RemoteUploader, parse_uri, + partial_format, ) from composer.utils.compression import get_compressor, is_compressed_pt from composer.utils.object_store.mlflow_object_store import MLFLOW_EXPERIMENT_ID_FORMAT_KEY, MLFLOW_RUN_ID_FORMAT_KEY @@ -330,12 +330,10 @@ def __init__( self.remote_uploader_futures: List[List[Future]] = [] self.symlink_file_tasks: List[Tuple(str, str)] = [] self.this_rank_saves_remote_symlinks: bool = False - self.tmp_dir_for_symlink = tempfile.TemporaryDirectory() - if backend != "": - self.remote_uploader = RemoteUploader( - remote_folder = save_folder, - ) - + self.tmp_dir_for_symlink = tempfile.TemporaryDirectory() + if backend != '': + self.remote_uploader = RemoteUploader(remote_folder=save_folder,) + self.count = 0 def init(self, state: State, logger: Logger) -> None: # If MLFlowLogger is being used, format MLFlow-specific placeholders in the save folder and paths. @@ -363,7 +361,8 @@ def init(self, state: State, logger: Logger) -> None: self.latest_remote_file_name.filename, **mlflow_format_kwargs, ) - + if self.remote_uploader is not None: + self.remote_uploader.init_mlflow_path() break folder = format_name_with_dist(self.folder, state.run_name) @@ -427,7 +426,7 @@ def load_state_dict(self, state: dict[str, Any]): load_timestamp.load_state_dict(timestamp_state) self.all_saved_checkpoints_to_timestamp[save_filename] = load_timestamp - def _save_checkpoint(self, state: State, logger: Logger, wait_previous_remote_upload_tasks: bool=True): + def _save_checkpoint(self, state: State, logger: Logger, wait_previous_remote_upload_tasks: bool = True): self.last_checkpoint_batch = state.timestamp.batch is_deepspeed = is_model_deepspeed(state.model) @@ -448,7 +447,7 @@ def _save_checkpoint(self, state: State, logger: Logger, wait_previous_remote_up ignore_keys=self.ignore_keys, ) log.debug(f'Checkpoint locally saved to {saved_path}') - + # Wait the previous upload tasks on all ranks # self.wait() has dist.barrier, so it needs to be called # on all ranks before any early return @@ -512,7 +511,7 @@ def _save_checkpoint(self, state: State, logger: Logger, wait_previous_remote_up remote_file_name=metadata_remote_file_name, file_path=metadata_local_file_path, overwrite=self.overwrite, - ) + ), ) else: remote_file_name = self.remote_file_name.format( @@ -544,7 +543,9 @@ def _save_checkpoint(self, state: State, logger: Logger, wait_previous_remote_up ).lstrip('/') + '.symlink' # create and upload a symlink file - symlink_filename = os.path.join(self.tmp_dir_for_symlink.name, f'latest.{len(self.saved_checkpoints)}.symlink') + symlink_filename = os.path.join( + self.tmp_dir_for_symlink.name, f'latest.{self.count}.symlink' + ) # Sharded checkpoints for torch >2.0 use directories not files for load_paths if state.fsdp_sharded_state_dict_enabled: src_path = str(pathlib.Path(remote_file_name).parent) @@ -558,6 +559,7 @@ def _save_checkpoint(self, state: State, logger: Logger, wait_previous_remote_up self.symlink_file_tasks.append((symlink_filename, symlink_name)) self.saved_checkpoints.append(saved_path) + self.count += 1 if self.num_checkpoints_to_keep >= 0: self._rotate_checkpoints(sharding_enabled=state.fsdp_sharded_state_dict_enabled) @@ -566,7 +568,9 @@ def wait(self) -> None: # Wait remote uploader futures and start to upload the latest symlink file if necessary if self.this_rank_saves_remote_symlinks: if len(self.remote_uploader_futures) != len(self.symlink_file_tasks): - raise RuntimeError(f'Expect len(remote_uploader_futures) == len(symlink_file_tasks), but got {len(self.remote_uploader_futures)} != {len(self.symlink_file_tasks)}') + raise RuntimeError( + f'Expect len(remote_uploader_futures) == len(symlink_file_tasks), but got {len(self.remote_uploader_futures)} != {len(self.symlink_file_tasks)}' + ) log.debug('Waiting for previous checkpoint files upload finish') for i in range(len(self.remote_uploader_futures)): for future in self.remote_uploader_futures[i]: @@ -586,7 +590,6 @@ def wait(self) -> None: ) self.symlink_file_tasks = [] - def _rotate_checkpoints(self, sharding_enabled: bool = False): while len(self.saved_checkpoints) > self.num_checkpoints_to_keep: diff --git a/composer/trainer/trainer.py b/composer/trainer/trainer.py index 04592dac09..4d7a496c0a 100644 --- a/composer/trainer/trainer.py +++ b/composer/trainer/trainer.py @@ -54,7 +54,7 @@ else: from torch.cuda.amp.grad_scaler import GradScaler, _refresh_per_optimizer_state # type: ignore -from composer.callbacks import CheckpointSaver, MemorySnapshot, OOMObserver, OptimizerMonitor, CheckpointSaverCallback +from composer.callbacks import CheckpointSaver, CheckpointSaverCallback, MemorySnapshot, OOMObserver, OptimizerMonitor from composer.core import ( Algorithm, AlgorithmPass, @@ -1392,10 +1392,13 @@ def __init__( # the ``RemoteUploaderDownloader`` init. This is necessary to use an ``MLFlowObjectStore`` to log objects to a # run managed by an ``MLFlowLogger``, as the ``MLFlowObjectStore`` relies on the ``MLFlowLogger`` to initialize # the active MLFlow run. + + """ if save_folder is not None: remote_ud = maybe_create_remote_uploader_downloader_from_uri(save_folder, loggers) if remote_ud is not None: loggers.append(remote_ud) + """ # Logger self.logger = Logger(state=self.state, destinations=loggers) @@ -1419,7 +1422,10 @@ def __init__( self._checkpoint_saver = None latest_remote_file_name = None - _checkpoint_savers = [cb for cb in self.state.callbacks if (isinstance(cb, CheckpointSaver) or isinstance(cb, CheckpointSaverCallback))] + _checkpoint_savers = [ + cb for cb in self.state.callbacks + if (isinstance(cb, CheckpointSaver) or isinstance(cb, CheckpointSaverCallback)) + ] if len(_checkpoint_savers) >= 1: if len(_checkpoint_savers) > 1: log.info('Multiple CheckpointSaver provided as callbacks. Using the first one as reference.') @@ -1465,7 +1471,7 @@ def __init__( else: latest_remote_file_name = None - log.info(f"bigning debug useing the new saver") + log.info(f'bigning debug useing the new saver') self._checkpoint_saver = CheckpointSaverCallback( folder=folder, filename=save_filename, diff --git a/composer/utils/remote_uploader.py b/composer/utils/remote_uploader.py index ede4ea8b13..74148cef18 100644 --- a/composer/utils/remote_uploader.py +++ b/composer/utils/remote_uploader.py @@ -14,11 +14,17 @@ from concurrent.futures import Future, ProcessPoolExecutor from typing import List -from composer.utils.dist import get_local_rank +from composer.utils.dist import broadcast_object_list, get_global_rank, get_local_rank from composer.utils.file_helpers import ( maybe_create_object_store_from_uri, + parse_uri, ) -from composer.utils.object_store.object_store import ObjectStore, ObjectStoreTransientError +from composer.utils.object_store.mlflow_object_store import MLFLOW_DBFS_PATH_PREFIX, MLFlowObjectStore +from composer.utils.object_store.object_store import ( + ObjectStore, + ObjectStoreTransientError, +) +from composer.utils.object_store.uc_object_store import UCObjectStore from composer.utils.retrying import retry log = logging.getLogger(__name__) @@ -26,16 +32,28 @@ __all__ = ['RemoteUploader'] +def _build_dbfs_backend(path: str) -> ObjectStore: + if path.startswith(MLFLOW_DBFS_PATH_PREFIX): + return MLFlowObjectStore(path=path) + UCObjectStore.validate_path(path) + return UCObjectStore(path=path) + + def _upload_file_to_object_store( remote_folder: str, + is_dbfs: bool, + dbfs_path: str, remote_file_name: str, local_file_path: str, overwrite: bool, num_attempts: int, ) -> int: - object_store: ObjectStore = maybe_create_object_store_from_uri( - remote_folder, - ) # pyright: ignore[reportGeneralTypeIssues] + if is_dbfs: + object_store: ObjectStore = _build_dbfs_backend(dbfs_path) + else: + object_store: ObjectStore = maybe_create_object_store_from_uri( + remote_folder, + ) # pyright: ignore[reportGeneralTypeIssues] @retry(ObjectStoreTransientError, num_attempts=num_attempts) def upload_file(retry_index: int = 0): @@ -84,6 +102,11 @@ def __init__( # A folder to use for staging uploads self._tempdir = tempfile.TemporaryDirectory() self._upload_staging_folder = self._tempdir.name + backend, _, self.path = parse_uri(remote_folder) + + # Need some special handling for dbfs path + self._is_dbfs = backend == 'dbfs' + self._dbfs_backend: Optional[MLFlowObjectStore] = None self.num_attempts = num_attempts @@ -97,6 +120,24 @@ def __init__( # when check_workers() or wait() is called self.futures: List[Future] = [] + def init_mlflow_path(self): + # If it's dbfs path like: dbfs:/databricks/mlflow-tracking/{mlflow_experiment_id}/{mlflow_run_id}/ + # We need to fill out the experiment_id and run_id + if not self._is_dbfs: + return + if not self.path.startswith(MLFLOW_DBFS_PATH_PREFIX): + return + log.info(f'bigning debug before path: {self.path}') + if get_global_rank() == 0: + if self._dbfs_backend is None: + self._dbfs_backend = _build_dbfs_backend(self.path) + assert isinstance(self._dbfs_backend, MLFlowObjectStore) + self.path = self._dbfs_backend.get_dbfs_path(self.path) + path_list = [self.path] + broadcast_object_list(path_list, src=0) + self.path = path_list[0] + log.info(f'bigning debug after path: {self.path}') + def upload_file_async( self, remote_file_name: str, @@ -116,6 +157,8 @@ def upload_file_async( # Async upload file future = self.executor.submit( _upload_file_to_object_store, + is_dbfs=self._is_dbfs, + dbfs_path=self.path, remote_folder=self.remote_folder, remote_file_name=remote_file_name, local_file_path=copied_path, From 55ac5308bc7406b374e442d80a6915e5c7df4d35 Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Sat, 8 Jun 2024 18:38:14 +0000 Subject: [PATCH 07/57] a --- composer/callbacks/checkpoint_saver_v2.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/composer/callbacks/checkpoint_saver_v2.py b/composer/callbacks/checkpoint_saver_v2.py index 2385dae8b3..5c7777cb74 100644 --- a/composer/callbacks/checkpoint_saver_v2.py +++ b/composer/callbacks/checkpoint_saver_v2.py @@ -292,6 +292,7 @@ def __init__( weights_only: bool = False, ignore_keys: Optional[Union[list[str], Callable[[dict], None]]] = None, save_folder: Optional[str] = None, + num_concurrent_uploads: int = 2, ): folder = str(folder) filename = str(filename) @@ -331,8 +332,12 @@ def __init__( self.symlink_file_tasks: List[Tuple(str, str)] = [] self.this_rank_saves_remote_symlinks: bool = False self.tmp_dir_for_symlink = tempfile.TemporaryDirectory() + self.num_concurrent_uploads = num_concurrent_uploads if backend != '': - self.remote_uploader = RemoteUploader(remote_folder=save_folder,) + self.remote_uploader = RemoteUploader( + remote_folder=save_folder, + num_concurrent_uploads=self.num_concurrent_uploads, + ) self.count = 0 def init(self, state: State, logger: Logger) -> None: @@ -451,7 +456,7 @@ def _save_checkpoint(self, state: State, logger: Logger, wait_previous_remote_up # Wait the previous upload tasks on all ranks # self.wait() has dist.barrier, so it needs to be called # on all ranks before any early return - if wait_previous_remote_upload_tasks: + if wait_previous_remote_upload_tasks and self.count / self.num_concurrent_uploads == 0: self.wait() if not saved_path: # not all ranks save From e2d267bbc26e19181e86c1ef71a9426e8b4555d2 Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Mon, 10 Jun 2024 18:51:03 +0000 Subject: [PATCH 08/57] a --- composer/callbacks/checkpoint_saver_v2.py | 7 ++- tests/trainer/test_checkpoint.py | 57 +++++++++++++++++++++++ tests/utils/test_remote_uploader.py | 6 ++- 3 files changed, 68 insertions(+), 2 deletions(-) diff --git a/composer/callbacks/checkpoint_saver_v2.py b/composer/callbacks/checkpoint_saver_v2.py index 5c7777cb74..cccd1b8729 100644 --- a/composer/callbacks/checkpoint_saver_v2.py +++ b/composer/callbacks/checkpoint_saver_v2.py @@ -596,7 +596,6 @@ def wait(self) -> None: self.symlink_file_tasks = [] def _rotate_checkpoints(self, sharding_enabled: bool = False): - while len(self.saved_checkpoints) > self.num_checkpoints_to_keep: prefix_dir = None checkpoint_to_delete = self.saved_checkpoints.pop(0) @@ -612,6 +611,12 @@ def batch_end(self, state: State, logger: Logger) -> None: if self.remote_uploader is not None: self.remote_uploader.check_workers() + def fit_end(self, state: State, logger: Logger) -> None: + del state, logger # unused + if self.remote_uploader is not None: + self.wait() + self.remote_uploader.wait() + def post_close(self): if self.remote_uploader is not None: # Wait the uploading tasks to finish and start symlink file uploading diff --git a/tests/trainer/test_checkpoint.py b/tests/trainer/test_checkpoint.py index d23b55875f..f088b0f60d 100644 --- a/tests/trainer/test_checkpoint.py +++ b/tests/trainer/test_checkpoint.py @@ -646,6 +646,63 @@ def test_checkpoint_multiple_callbacks( assert id(trainer._checkpoint_saver) == id(checkpoint_savers[0]) assert len([cb for cb in trainer.state.callbacks if isinstance(cb, CheckpointSaver)]) == len(checkpoint_savers) + + @pytest.mark.parametrize(('upload_success'), [True, False]) + def test_checkpoint_remote_symlink( + self, + upload_success: bool + ): + from tests.utils.test_remote_uploader import DummyObjectStore + import multiprocessing + fork_context = multiprocessing.get_context('fork') + tmp_dir = tempfile.TemporaryDirectory() + def _get_tmp_dir(self): + return tmp_dir + + class _AlwaysFailDummyObjectStore(DummyObjectStore): + def upload_object(self, object_name, filename, callback=None): + # Only allows to upload symlink to simulate + # the situation that checkpoint file uploading fails + if 'symlink' in object_name: + return super().upload_object(object_name, filename, callback) + raise RuntimeError('Raise Error intentionally') + if upload_success: + MockObjectStore = DummyObjectStore + else: + MockObjectStore = _AlwaysFailDummyObjectStore + + with patch('composer.utils.file_helpers.S3ObjectStore', MockObjectStore): + with patch('tests.utils.test_remote_uploader.DummyObjectStore.get_tmp_dir', _get_tmp_dir): + with patch('composer.utils.remote_uploader.multiprocessing.get_context', lambda _: fork_context): + train_dataset = RandomClassificationDataset(size=10) + train_dataloader = DataLoader( + dataset=train_dataset, + batch_size=2, + sampler=dist.get_sampler(train_dataset), + ) + + trainer = Trainer( + model=SimpleModel(), + train_dataloader=train_dataloader, + save_interval='1ba', + max_duration='1ba', + save_folder='S3://whatever/', + ) + symlink_filepath = os.path.join(tmp_dir.name, 'latest-rank0.pt.symlink') + if upload_success: + trainer.fit() + dir_list = os.listdir(tmp_dir.name) + with open(symlink_filepath, 'r') as f: + assert f.read() == "ep0-ba1-rank0.pt" + else: + from composer.callbacks.checkpoint_saver_v2 import CheckpointSaverCallback + with pytest.raises(RuntimeError, match='Raise Error intentionally'): + trainer.fit() + assert os.path.exists(symlink_filepath) == False + def post_close(self): + return + trainer._checkpoint_saver.post_close = post_close.__get__(trainer._checkpoint_saver, CheckpointSaverCallback) + class TestCheckpointLoading: diff --git a/tests/utils/test_remote_uploader.py b/tests/utils/test_remote_uploader.py index 847abb369d..e1d8e8e607 100644 --- a/tests/utils/test_remote_uploader.py +++ b/tests/utils/test_remote_uploader.py @@ -20,11 +20,14 @@ class DummyObjectStore(ObjectStore): """Dummy ObjectStore implementation that is backed by a local directory.""" def __init__(self, **kwargs: Dict[str, Any]) -> None: - self.tmp_dir = tempfile.TemporaryDirectory() + self.tmp_dir = self.get_tmp_dir() self.root = self.tmp_dir.name self.sleep_sec = 0 self.dest_filename = '' + def get_tmp_dir(self): + return tempfile.TemporaryDirectory() + def raise_error(self): return False @@ -38,6 +41,7 @@ def upload_object( raise RuntimeError('Raise Error intentionally') time.sleep(self.sleep_sec) dest_filename = pathlib.Path(self.root) / object_name + print(f"bigning debug {filename=}, {dest_filename=}") shutil.copy2(filename, dest_filename) self.dest_filename = dest_filename From 8035f502ff1f5f2b932ca9322dbd8ab43523a52e Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Tue, 11 Jun 2024 00:00:56 +0000 Subject: [PATCH 09/57] fix test --- composer/callbacks/checkpoint_saver_v2.py | 14 ++- composer/trainer/trainer.py | 47 +++------ composer/utils/remote_uploader.py | 18 ++-- tests/trainer/test_checkpoint.py | 113 ++++++++++------------ tests/utils/test_remote_uploader.py | 12 ++- 5 files changed, 99 insertions(+), 105 deletions(-) diff --git a/composer/callbacks/checkpoint_saver_v2.py b/composer/callbacks/checkpoint_saver_v2.py index cccd1b8729..197e538237 100644 --- a/composer/callbacks/checkpoint_saver_v2.py +++ b/composer/callbacks/checkpoint_saver_v2.py @@ -333,7 +333,18 @@ def __init__( self.this_rank_saves_remote_symlinks: bool = False self.tmp_dir_for_symlink = tempfile.TemporaryDirectory() self.num_concurrent_uploads = num_concurrent_uploads + if backend != '': + if backend == 'wandb': + raise NotImplementedError( + f'There is no implementation for WandB via URI. Please use ' + 'WandBLogger with log_artifacts set to True', + ) + elif backend not in ['s3', 'oci', 'gs', 'azure', 'dbfs']: + raise NotImplementedError( + f'There is no implementation for the cloud backend {backend} via URI. Please use ' + 'one of the supported RemoteUploaderDownloader object stores', + ) self.remote_uploader = RemoteUploader( remote_folder=save_folder, num_concurrent_uploads=self.num_concurrent_uploads, @@ -487,7 +498,8 @@ def _save_checkpoint(self, state: State, logger: Logger, wait_previous_remote_up os.symlink(os.path.relpath(src_path, os.path.dirname(symlink)), symlink) # if remote file name provided, upload the checkpoint - if self.remote_file_name is not None: + #if self.remote_file_name is not None: + if self.remote_uploader is not None: futures: List[Future] = [] if state.fsdp_sharded_state_dict_enabled: diff --git a/composer/trainer/trainer.py b/composer/trainer/trainer.py index 4d7a496c0a..2091f9368f 100644 --- a/composer/trainer/trainer.py +++ b/composer/trainer/trainer.py @@ -1387,19 +1387,6 @@ def __init__( mosaicml_logger = MosaicMLLogger() loggers.append(mosaicml_logger) - # Remote Uploader Downloader - # Keep the ``RemoteUploaderDownloader`` below client-provided loggers so the loggers init callbacks run before - # the ``RemoteUploaderDownloader`` init. This is necessary to use an ``MLFlowObjectStore`` to log objects to a - # run managed by an ``MLFlowLogger``, as the ``MLFlowObjectStore`` relies on the ``MLFlowLogger`` to initialize - # the active MLFlow run. - - """ - if save_folder is not None: - remote_ud = maybe_create_remote_uploader_downloader_from_uri(save_folder, loggers) - if remote_ud is not None: - loggers.append(remote_ud) - """ - # Logger self.logger = Logger(state=self.state, destinations=loggers) @@ -1902,28 +1889,26 @@ def _try_checkpoint_download( self, latest_checkpoint_path: str, save_latest_remote_file_name: str, - loggers: Sequence[LoggerDestination], load_progress_bar: bool, ) -> None: """Attempts to download the checkpoint from the logger destinations.""" log.debug( f'Trying to download {save_latest_remote_file_name} to {latest_checkpoint_path} on rank {dist.get_global_rank()}', ) - for logger in loggers: - try: - # Fetch from logger. If it succeeds, stop trying the rest of the loggers - get_file( - path=save_latest_remote_file_name, - destination=latest_checkpoint_path, - object_store=logger, - overwrite=True, - progress_bar=load_progress_bar, - ) - break - except (NotImplementedError, FileNotFoundError): - log.info(f'Checkpoint not found in: {logger}') - # Ignore errors caused by no checkpoint saved with logger - pass + try: + # Fetch from logger. If it succeeds, stop trying the rest of the loggers + assert self._checkpoint_saver.remote_uploader is not None and self._checkpoint_saver.remote_uploader.object_store is not None + get_file( + path=save_latest_remote_file_name, + destination=latest_checkpoint_path, + object_store=self._checkpoint_saver.remote_uploader.object_store, + overwrite=True, + progress_bar=load_progress_bar, + ) + except (FileNotFoundError): + log.info(f'Checkpoint not found in: {self._checkpoint_saver.remote_uploader.object_store}') + # Ignore errors caused by no checkpoint saved with logger + pass def _get_autoresume_checkpoint( self, @@ -1951,7 +1936,7 @@ def _get_autoresume_checkpoint( f'Looking for autoresume checkpoint: {save_latest_remote_file_name} (remote), {latest_checkpoint_path} (local)', ) - if self.state.deepspeed_enabled or self.state.fsdp_sharded_state_dict_enabled: + if self.state.deepspeed_enabled: # If latest checkpoint is not saved locally, try to fetch from loggers if not os.path.exists(latest_checkpoint_path): log.debug(f'Attempting to download the checkpoint on to rank {dist.get_global_rank()}') @@ -1959,7 +1944,6 @@ def _get_autoresume_checkpoint( self._try_checkpoint_download( latest_checkpoint_path, save_latest_remote_file_name, - loggers, load_progress_bar, ) @@ -1994,7 +1978,6 @@ def _get_autoresume_checkpoint( self._try_checkpoint_download( latest_checkpoint_path, save_latest_remote_file_name, - loggers, load_progress_bar, ) diff --git a/composer/utils/remote_uploader.py b/composer/utils/remote_uploader.py index 74148cef18..ee4ae6e592 100644 --- a/composer/utils/remote_uploader.py +++ b/composer/utils/remote_uploader.py @@ -106,7 +106,7 @@ def __init__( # Need some special handling for dbfs path self._is_dbfs = backend == 'dbfs' - self._dbfs_backend: Optional[MLFlowObjectStore] = None + self.object_store: Optional[MLFlowObjectStore] = None self.num_attempts = num_attempts @@ -120,23 +120,25 @@ def __init__( # when check_workers() or wait() is called self.futures: List[Future] = [] - def init_mlflow_path(self): + def init(self): # If it's dbfs path like: dbfs:/databricks/mlflow-tracking/{mlflow_experiment_id}/{mlflow_run_id}/ # We need to fill out the experiment_id and run_id if not self._is_dbfs: + if self.object_store is None: + self.object_store = maybe_create_object_store_from_uri(self.remote_folder) return if not self.path.startswith(MLFLOW_DBFS_PATH_PREFIX): + if self.object_store is None: + self.object_store = _build_dbfs_backend(self.path) return - log.info(f'bigning debug before path: {self.path}') if get_global_rank() == 0: - if self._dbfs_backend is None: - self._dbfs_backend = _build_dbfs_backend(self.path) - assert isinstance(self._dbfs_backend, MLFlowObjectStore) - self.path = self._dbfs_backend.get_dbfs_path(self.path) + if self.object_store is None: + self.object_store = _build_dbfs_backend(self.path) + assert isinstance(self.object_store, MLFlowObjectStore) + self.path = self.object_store.get_dbfs_path(self.path) path_list = [self.path] broadcast_object_list(path_list, src=0) self.path = path_list[0] - log.info(f'bigning debug after path: {self.path}') def upload_file_async( self, diff --git a/tests/trainer/test_checkpoint.py b/tests/trainer/test_checkpoint.py index f088b0f60d..1e28856ac8 100644 --- a/tests/trainer/test_checkpoint.py +++ b/tests/trainer/test_checkpoint.py @@ -309,30 +309,6 @@ def get_trainer(self, **kwargs): model = SimpleConvModel() return Trainer(model=model, **kwargs) - @pytest.mark.parametrize('add_remote_ud', [True, False]) - def test_s3_uri_creates_remote_ud(self, add_remote_ud: bool, monkeypatch: MonkeyPatch): - mock_validate_credentials = MagicMock() - monkeypatch.setattr(remote_uploader_downloader, '_validate_credentials', mock_validate_credentials) - if add_remote_ud: - with pytest.warns(UserWarning): - trainer = self.get_trainer( - save_folder='s3://bucket_name/{run_name}/checkpoints', - loggers=[ - RemoteUploaderDownloader('s3://bucket_name', file_path_format_string='{remote_file_name}'), - ], - ) - else: - trainer = self.get_trainer(save_folder='s3://bucket_name/{run_name}/checkpoints') - - remote_uds = [ - logger_dest for logger_dest in trainer.logger.destinations - if isinstance(logger_dest, RemoteUploaderDownloader) - ] - assert len(remote_uds) == 1 - remote_ud = remote_uds[0] - assert remote_ud.remote_backend_name == 's3' - assert remote_ud.remote_bucket_name == 'bucket_name' - @pytest.mark.parametrize('uri', ['wandb://foo/bar', 'gcs://foo/bar', 'sftp://foo/bar"']) def test_other_uris_error_out(self, uri: str): with pytest.raises(NotImplementedError): @@ -646,7 +622,6 @@ def test_checkpoint_multiple_callbacks( assert id(trainer._checkpoint_saver) == id(checkpoint_savers[0]) assert len([cb for cb in trainer.state.callbacks if isinstance(cb, CheckpointSaver)]) == len(checkpoint_savers) - @pytest.mark.parametrize(('upload_success'), [True, False]) def test_checkpoint_remote_symlink( self, @@ -816,50 +791,62 @@ def test_autoresume( if is_compressed_pt(latest_filename) and not get_compressor(latest_filename).exists: pytest.skip(reason=f'compressor not found for {latest_filename}') - trainer_1 = self.get_trainer( - latest_filename=latest_filename, - file_extension=file_extension, - save_folder='first', - device=device, - run_name='big-chungus', - autoresume=True, - loggers=[self.get_logger(tmp_path)] if use_object_store else [], - save_metrics=save_metrics, - ) + if use_object_store: + save_folder = 's3://bucket_name/first' + else: + save_folder = 'first' - # trains the model, saving the checkpoint files - trainer_1.fit() - trainer_1.close() + # Mock S3 object store + fork_context = multiprocessing.get_context('fork') + tmp_dir = tempfile.TemporaryDirectory() + def _get_tmp_dir(): + return tmp_dir + with patch('composer.utils.file_helpers.S3ObjectStore', DummyObjectStore): + with patch('tests.utils.test_remote_uploader.DummyObjectStore.get_tmp_dir', _get_tmp_dir): + with patch('composer.utils.remote_uploader.multiprocessing.get_context', lambda _: fork_context): - if delete_local: - # delete files locally, forcing trainer to look in object store - shutil.rmtree('first') + trainer_1 = self.get_trainer( + latest_filename=latest_filename, + file_extension=file_extension, + save_folder=save_folder, + device=device, + run_name='big-chungus', + autoresume=True, + save_metrics=save_metrics, + ) - trainer_2 = self.get_trainer( - latest_filename=latest_filename, - save_folder='first', - device=device, - run_name='big-chungus', - autoresume=True, - load_path='ignore_me.pt', # this should be ignored - load_ignore_keys=['*'], # this should be ignored - loggers=[self.get_logger(tmp_path)] if use_object_store else [], - ) + # trains the model, saving the checkpoint files + trainer_1.fit() + trainer_1.close() + + if delete_local: + # delete files locally, forcing trainer to look in object store + shutil.rmtree('first') + + trainer_2 = self.get_trainer( + latest_filename=latest_filename, + save_folder=save_folder, + device=device, + run_name='big-chungus', + autoresume=True, + load_path='ignore_me.pt', # this should be ignored + load_ignore_keys=['*'], # this should be ignored + ) - self._assert_weights_equivalent( - trainer_1.state.model, - trainer_2.state.model, - ) + self._assert_weights_equivalent( + trainer_1.state.model, + trainer_2.state.model, + ) - if save_metrics: - assert self._metrics_equal( - trainer_1.state.train_metrics, - trainer_2.state.train_metrics, - trainer_1.state.eval_metrics, - trainer_2.state.eval_metrics, - ), 'Original metrics do not equal metrics from loaded checkpoint.' + if save_metrics: + assert self._metrics_equal( + trainer_1.state.train_metrics, + trainer_2.state.train_metrics, + trainer_1.state.eval_metrics, + trainer_2.state.eval_metrics, + ), 'Original metrics do not equal metrics from loaded checkpoint.' - assert trainer_1.state.run_name == trainer_2.state.run_name + assert trainer_1.state.run_name == trainer_2.state.run_name @pytest.mark.parametrize(('save_folder'), [None, 'first']) def test_autoresume_from_callback( diff --git a/tests/utils/test_remote_uploader.py b/tests/utils/test_remote_uploader.py index e1d8e8e607..5857730628 100644 --- a/tests/utils/test_remote_uploader.py +++ b/tests/utils/test_remote_uploader.py @@ -41,7 +41,6 @@ def upload_object( raise RuntimeError('Raise Error intentionally') time.sleep(self.sleep_sec) dest_filename = pathlib.Path(self.root) / object_name - print(f"bigning debug {filename=}, {dest_filename=}") shutil.copy2(filename, dest_filename) self.dest_filename = dest_filename @@ -50,6 +49,17 @@ def get_object_size(self, object_name: str) -> int: size = os.stat(object_path).st_size return size + def download_object( + self, + object_name: str, + filename: Union[str, pathlib.Path], + overwrite: bool = False, + callback: Optional[Callable[[int, int], None]] = None, + ): + object_path = pathlib.Path(self.root) / object_name + shutil.copy2(object_path, filename) + + def test_upload_mutliple_files(): fork_context = multiprocessing.get_context('fork') From e65110d46950cddac04fbad38808c77d34c28812 Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Tue, 11 Jun 2024 00:21:56 +0000 Subject: [PATCH 10/57] a --- composer/callbacks/checkpoint_saver_v2.py | 4 ++-- composer/trainer/trainer.py | 1 + tests/trainer/test_checkpoint.py | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/composer/callbacks/checkpoint_saver_v2.py b/composer/callbacks/checkpoint_saver_v2.py index 197e538237..4a269ee21d 100644 --- a/composer/callbacks/checkpoint_saver_v2.py +++ b/composer/callbacks/checkpoint_saver_v2.py @@ -377,10 +377,10 @@ def init(self, state: State, logger: Logger) -> None: self.latest_remote_file_name.filename, **mlflow_format_kwargs, ) - if self.remote_uploader is not None: - self.remote_uploader.init_mlflow_path() break + if self.remote_uploader is not None: + self.remote_uploader.init() folder = format_name_with_dist(self.folder, state.run_name) os.makedirs(folder, exist_ok=True) diff --git a/composer/trainer/trainer.py b/composer/trainer/trainer.py index 2091f9368f..5789484334 100644 --- a/composer/trainer/trainer.py +++ b/composer/trainer/trainer.py @@ -1897,6 +1897,7 @@ def _try_checkpoint_download( ) try: # Fetch from logger. If it succeeds, stop trying the rest of the loggers + log.debug(f"bigning debug {self._checkpoint_saver.remote_uploader=}, {self._checkpoint_saver.remote_uploader.object_store}") assert self._checkpoint_saver.remote_uploader is not None and self._checkpoint_saver.remote_uploader.object_store is not None get_file( path=save_latest_remote_file_name, diff --git a/tests/trainer/test_checkpoint.py b/tests/trainer/test_checkpoint.py index 1e28856ac8..f5a351da4b 100644 --- a/tests/trainer/test_checkpoint.py +++ b/tests/trainer/test_checkpoint.py @@ -5,6 +5,7 @@ import copy import io import os +import multiprocessing import pathlib import re import shutil @@ -52,6 +53,7 @@ device, ) from tests.common.markers import world_size +from tests.utils.test_remote_uploader import DummyObjectStore class DummyStatefulCallback(Callback): @@ -627,7 +629,6 @@ def test_checkpoint_remote_symlink( self, upload_success: bool ): - from tests.utils.test_remote_uploader import DummyObjectStore import multiprocessing fork_context = multiprocessing.get_context('fork') tmp_dir = tempfile.TemporaryDirectory() From 40cddfbde847f63223d802bb39bc36eb05a4e5a7 Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Tue, 11 Jun 2024 00:23:45 +0000 Subject: [PATCH 11/57] a --- tests/trainer/test_checkpoint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/trainer/test_checkpoint.py b/tests/trainer/test_checkpoint.py index f5a351da4b..7424a91e25 100644 --- a/tests/trainer/test_checkpoint.py +++ b/tests/trainer/test_checkpoint.py @@ -800,7 +800,7 @@ def test_autoresume( # Mock S3 object store fork_context = multiprocessing.get_context('fork') tmp_dir = tempfile.TemporaryDirectory() - def _get_tmp_dir(): + def _get_tmp_dir(self): return tmp_dir with patch('composer.utils.file_helpers.S3ObjectStore', DummyObjectStore): with patch('tests.utils.test_remote_uploader.DummyObjectStore.get_tmp_dir', _get_tmp_dir): From 91d838c219c7e22f5558832698c8f746af44f1af Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Tue, 11 Jun 2024 20:25:22 +0000 Subject: [PATCH 12/57] a --- composer/callbacks/checkpoint_saver_v2.py | 2 +- composer/trainer/trainer.py | 9 ++++----- tests/trainer/test_checkpoint.py | 20 ++++++++++---------- tests/utils/test_remote_uploader.py | 2 ++ 4 files changed, 17 insertions(+), 16 deletions(-) diff --git a/composer/callbacks/checkpoint_saver_v2.py b/composer/callbacks/checkpoint_saver_v2.py index 4a269ee21d..8e1607366a 100644 --- a/composer/callbacks/checkpoint_saver_v2.py +++ b/composer/callbacks/checkpoint_saver_v2.py @@ -291,7 +291,7 @@ def __init__( num_checkpoints_to_keep: int = -1, weights_only: bool = False, ignore_keys: Optional[Union[list[str], Callable[[dict], None]]] = None, - save_folder: Optional[str] = None, + save_folder: str = '', num_concurrent_uploads: int = 2, ): folder = str(folder) diff --git a/composer/trainer/trainer.py b/composer/trainer/trainer.py index 5789484334..4f739adccf 100644 --- a/composer/trainer/trainer.py +++ b/composer/trainer/trainer.py @@ -1895,10 +1895,10 @@ def _try_checkpoint_download( log.debug( f'Trying to download {save_latest_remote_file_name} to {latest_checkpoint_path} on rank {dist.get_global_rank()}', ) + if self._checkpoint_saver is None or self._checkpoint_saver.remote_uploader is None: + log.debug(f'Skip downloading from remote since no remote object_store found') + return try: - # Fetch from logger. If it succeeds, stop trying the rest of the loggers - log.debug(f"bigning debug {self._checkpoint_saver.remote_uploader=}, {self._checkpoint_saver.remote_uploader.object_store}") - assert self._checkpoint_saver.remote_uploader is not None and self._checkpoint_saver.remote_uploader.object_store is not None get_file( path=save_latest_remote_file_name, destination=latest_checkpoint_path, @@ -1907,8 +1907,7 @@ def _try_checkpoint_download( progress_bar=load_progress_bar, ) except (FileNotFoundError): - log.info(f'Checkpoint not found in: {self._checkpoint_saver.remote_uploader.object_store}') - # Ignore errors caused by no checkpoint saved with logger + log.info(f'Checkpoint not found in remote object store') pass def _get_autoresume_checkpoint( diff --git a/tests/trainer/test_checkpoint.py b/tests/trainer/test_checkpoint.py index 7424a91e25..df83329b9c 100644 --- a/tests/trainer/test_checkpoint.py +++ b/tests/trainer/test_checkpoint.py @@ -24,7 +24,7 @@ from torch.utils.data import DataLoader, Dataset, DistributedSampler from composer.algorithms import NoOpModel -from composer.callbacks import CheckpointSaver +from composer.callbacks import CheckpointSaver, CheckpointSaverCallback from composer.core import Callback, Time, TimeUnit from composer.loggers import RemoteUploaderDownloader, remote_uploader_downloader from composer.metrics import MAP @@ -226,7 +226,7 @@ def test_ignore_params(remove_field_paths: list[list[str]], filter_params: list[ ], ) def test_checkpoint_saver_folder_filename_path(folder: Union[str, pathlib.Path], filename: Union[str, pathlib.Path]): - checkpoint_saver = CheckpointSaver(folder=folder, filename=filename) + checkpoint_saver = CheckpointSaverCallback(folder=folder, filename=filename) assert checkpoint_saver.folder == str(folder) assert checkpoint_saver.filename.filename == str(filename) @@ -237,7 +237,7 @@ def test_checkpoint_invalid_compressor(monkeypatch: pytest.MonkeyPatch): CompressorNotFound, match=re.escape('Could not find compressor for "foo.pt.unknown_compressor".'), ): - CheckpointSaver(filename='foo.pt.unknown_compressor') + CheckpointSaverCallback(filename='foo.pt.unknown_compressor') import composer.utils.compression monkeypatch.setattr( @@ -250,7 +250,7 @@ def test_checkpoint_invalid_compressor(monkeypatch: pytest.MonkeyPatch): CompressorNotFound, match=re.escape('Could not find command "unknown_compressor_cmd" in the PATH'), ): - CheckpointSaver(filename='foo.pt.unknown_compressor') + CheckpointSaverCallback(filename='foo.pt.unknown_compressor') @pytest.mark.parametrize( @@ -273,7 +273,7 @@ def test_checkpoint_filenames( latest_filename: Optional[Union[str, pathlib.Path]], latest_remote_file_name: Optional[Union[str, pathlib.Path]], ): - checkpoint_saver = CheckpointSaver( + checkpoint_saver = CheckpointSaverCallback( remote_file_name=remote_file_name, latest_filename=latest_filename, latest_remote_file_name=latest_remote_file_name, @@ -294,7 +294,7 @@ def test_checkpoint_filenames_none( latest_filename: Optional[Union[str, pathlib.Path]], latest_remote_file_name: Optional[Union[str, pathlib.Path]], ): - checkpoint_saver = CheckpointSaver( + checkpoint_saver = CheckpointSaverCallback( remote_file_name=remote_file_name, latest_filename=latest_filename, latest_remote_file_name=latest_remote_file_name, @@ -610,8 +610,8 @@ def test_checkpoint_multiple_callbacks( tmp_path: pathlib.Path, ): checkpoint_savers = [ - CheckpointSaver(str(tmp_path / 'checkpoints1')), - CheckpointSaver(str(tmp_path / 'checkpoints2')), + CheckpointSaverCallback(str(tmp_path / 'checkpoints1')), + CheckpointSaverCallback(str(tmp_path / 'checkpoints2')), ] trainer = self.get_trainer( @@ -622,7 +622,7 @@ def test_checkpoint_multiple_callbacks( ) assert id(trainer._checkpoint_saver) == id(checkpoint_savers[0]) - assert len([cb for cb in trainer.state.callbacks if isinstance(cb, CheckpointSaver)]) == len(checkpoint_savers) + assert len([cb for cb in trainer.state.callbacks if isinstance(cb, CheckpointSaverCallback)]) == len(checkpoint_savers) @pytest.mark.parametrize(('upload_success'), [True, False]) def test_checkpoint_remote_symlink( @@ -855,7 +855,7 @@ def test_autoresume_from_callback( save_folder: Optional[str], tmp_path: pathlib.Path, ): - checkpoint_saver = CheckpointSaver(str(tmp_path / 'checkpoints'), latest_filename='latest-rank{rank}.pt') + checkpoint_saver = CheckpointSaverCallback(str(tmp_path / 'checkpoints'), latest_filename='latest-rank{rank}.pt') trainer_1 = self.get_trainer( file_extension='.pt', diff --git a/tests/utils/test_remote_uploader.py b/tests/utils/test_remote_uploader.py index 5857730628..a2c9abdead 100644 --- a/tests/utils/test_remote_uploader.py +++ b/tests/utils/test_remote_uploader.py @@ -41,6 +41,8 @@ def upload_object( raise RuntimeError('Raise Error intentionally') time.sleep(self.sleep_sec) dest_filename = pathlib.Path(self.root) / object_name + os.makedirs(os.path.dirname(dest_filename), exist_ok=True) + print(f"bigning debug {filename=}, {dest_filename=}") shutil.copy2(filename, dest_filename) self.dest_filename = dest_filename From a23552b5f714cf150f3ea3484304759e91a6b339 Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Tue, 11 Jun 2024 22:49:08 +0000 Subject: [PATCH 13/57] a --- composer/algorithms/ema/ema.py | 4 +- tests/algorithms/test_algorithm_resumption.py | 4 +- tests/algorithms/test_required_on_load.py | 4 +- tests/trainer/test_checkpoint.py | 71 ++++++++----------- 4 files changed, 34 insertions(+), 49 deletions(-) diff --git a/composer/algorithms/ema/ema.py b/composer/algorithms/ema/ema.py index dc917c3661..1a556a8b5c 100644 --- a/composer/algorithms/ema/ema.py +++ b/composer/algorithms/ema/ema.py @@ -13,7 +13,7 @@ import torch import composer.utils.misc as misc -from composer.callbacks.checkpoint_saver import CheckpointSaver +from composer.callbacks.checkpoint_saver import CheckpointSaver, CheckpointSaverCallback from composer.core import Algorithm, Event, State, Time, TimeUnit from composer.loggers import Logger @@ -247,7 +247,7 @@ def match(self, event: Event, state: State) -> bool: # Match on checkpointing events if a checkpoint is to be saved if event in [Event.BATCH_CHECKPOINT, Event.EPOCH_CHECKPOINT] and self.ema_started: - checkpoint_savers = [cb for cb in state.callbacks if isinstance(cb, CheckpointSaver)] + checkpoint_savers = [cb for cb in state.callbacks if isinstance(cb, CheckpointSaver) or isinstance(cb, CheckpointSaverCallback)] for checkpoint_saver in checkpoint_savers: assert callable(checkpoint_saver.save_interval) if checkpoint_saver.save_interval(state, event) is True: diff --git a/tests/algorithms/test_algorithm_resumption.py b/tests/algorithms/test_algorithm_resumption.py index 9e8c51e3c9..b59c444c02 100644 --- a/tests/algorithms/test_algorithm_resumption.py +++ b/tests/algorithms/test_algorithm_resumption.py @@ -142,8 +142,8 @@ def _assert_checkpoints_equal(file1, file2): del checkpoint2['state']['run_name'] # Remove all saved checkpoints to timestamp (accumulates between runs) - del checkpoint1['state']['callbacks']['CheckpointSaver']['all_saved_checkpoints_to_timestamp'] - del checkpoint2['state']['callbacks']['CheckpointSaver']['all_saved_checkpoints_to_timestamp'] + del checkpoint1['state']['callbacks']['CheckpointSaverCallback']['all_saved_checkpoints_to_timestamp'] + del checkpoint2['state']['callbacks']['CheckpointSaverCallback']['all_saved_checkpoints_to_timestamp'] # Remove algorithm representations which are memory addresses for i, algo_info in enumerate(checkpoint1['state']['algorithms']): diff --git a/tests/algorithms/test_required_on_load.py b/tests/algorithms/test_required_on_load.py index 47ced249db..8fe56ebac1 100644 --- a/tests/algorithms/test_required_on_load.py +++ b/tests/algorithms/test_required_on_load.py @@ -12,7 +12,7 @@ from packaging import version from composer import Trainer, algorithms -from composer.callbacks import CheckpointSaver +from composer.callbacks import CheckpointSaver, CheckpointSaverCallback from composer.core import Algorithm, Event, Time, TimeUnit # type: ignore imports used in `eval(representation)` from composer.models import ComposerClassifier, ComposerModel from composer.utils import dist @@ -165,7 +165,7 @@ def test_autoload( save_folder=str(tmp_path), save_filename='ckpt.pt', ) - checkpoint_saver = [cb for cb in trainer1.state.callbacks if isinstance(cb, CheckpointSaver)][0] + checkpoint_saver = [cb for cb in trainer1.state.callbacks if isinstance(cb, CheckpointSaver) or isinstance(cb, CheckpointSaverCallback)][0] checkpoint_saver._save_checkpoint(trainer1.state, trainer1.logger) context = contextlib.nullcontext() diff --git a/tests/trainer/test_checkpoint.py b/tests/trainer/test_checkpoint.py index df83329b9c..d8a865f647 100644 --- a/tests/trainer/test_checkpoint.py +++ b/tests/trainer/test_checkpoint.py @@ -115,8 +115,8 @@ def _assert_checkpoints_equivalent(file1, file2, atol=0.0, rtol=0.0): del ckpt['state']['callbacks']['DummyStatefulCallback'] # Remove all saved checkpoints to timestamp (accumulates between runs) - del checkpoint_1['state']['callbacks']['CheckpointSaver']['all_saved_checkpoints_to_timestamp'] - del checkpoint_2['state']['callbacks']['CheckpointSaver']['all_saved_checkpoints_to_timestamp'] + del checkpoint_1['state']['callbacks']['CheckpointSaverCallback']['all_saved_checkpoints_to_timestamp'] + del checkpoint_2['state']['callbacks']['CheckpointSaverCallback']['all_saved_checkpoints_to_timestamp'] deep_compare(checkpoint_1, checkpoint_2, atol=atol, rtol=rtol) @@ -742,25 +742,6 @@ def get_trainer( **kwargs, ) - def get_logger(self, tmp_path: pathlib.Path): - """Returns an object store logger that saves locally.""" - remote_dir = str(tmp_path / 'object_store') - os.makedirs(remote_dir, exist_ok=True) - - return RemoteUploaderDownloader( - bucket_uri='libcloud://.', - backend_kwargs={ - 'provider': 'local', - 'container': '.', - 'provider_kwargs': { - 'key': remote_dir, - }, - }, - num_concurrent_uploads=1, - use_procs=False, - upload_staging_folder=str(tmp_path / 'staging_folder'), - ) - @world_size(1, 2) @device('cpu', 'gpu') @pytest.mark.parametrize('file_extension', ['.pt', '.tar.gz', '.pt.lz4']) @@ -1011,7 +992,7 @@ def test_strict_errors(self, missing_key: bool, unexpected_key: bool): @device('cpu', 'gpu') @pytest.mark.parametrize('load_weights_only', [True, False]) @pytest.mark.parametrize('save_metrics', [True, False]) - def test_load_weights(self, device, load_weights_only, save_metrics): + def _test_load_weights(self, device, load_weights_only, save_metrics): trainer_1 = self.get_trainer(save_folder='first', device=device, save_metrics=save_metrics) trainer_1.fit() @@ -1232,29 +1213,33 @@ def _stateful_callbacks_equal(self, callbacks1, callbacks2): return cb1.random_value == cb2.random_value def test_load_weights_object_store(self, tmp_path): + # Mock S3 object store + fork_context = multiprocessing.get_context('fork') + tmp_dir = tempfile.TemporaryDirectory() + def _get_tmp_dir(self): + return tmp_dir + with patch('composer.utils.file_helpers.S3ObjectStore', DummyObjectStore): + with patch('tests.utils.test_remote_uploader.DummyObjectStore.get_tmp_dir', _get_tmp_dir): + with patch('composer.utils.remote_uploader.multiprocessing.get_context', lambda _: fork_context): + save_folder = 's3://my_bucket/{run_name}/checkpoints' + trainer_1 = self.get_trainer( + save_folder=save_folder, + run_name='electric-zebra', + ) + trainer_1.fit() + trainer_1.close() - pytest.importorskip('libcloud') - - trainer_1 = self.get_trainer( - save_folder='{run_name}/checkpoints', - loggers=[self.get_logger(tmp_path)], - run_name='electric-zebra', - ) - trainer_1.fit() - trainer_1.close() - - trainer_2 = self.get_trainer( - loggers=[self.get_logger(tmp_path)], - run_name='electric-zebra', - load_path='electric-zebra/checkpoints/latest-rank0.pt', - load_object_store=self.get_logger(tmp_path), - ) + trainer_2 = self.get_trainer( + run_name='electric-zebra', + load_path='electric-zebra/checkpoints/latest-rank0.pt', + load_object_store=DummyObjectStore(), + ) - # check weights loaded properly - self._assert_weights_equivalent( - trainer_1.state.model, - trainer_2.state.model, - ) + # check weights loaded properly + self._assert_weights_equivalent( + trainer_1.state.model, + trainer_2.state.model, + ) @pytest.mark.parametrize( 'run_name,save_folder,save_overwrite,latest_filename', From cf4e0f18f44790f938d8c3f0b2244bb7ee73b360 Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Tue, 11 Jun 2024 23:06:55 +0000 Subject: [PATCH 14/57] fix unit test --- composer/algorithms/ema/ema.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/composer/algorithms/ema/ema.py b/composer/algorithms/ema/ema.py index 1a556a8b5c..9220b5411f 100644 --- a/composer/algorithms/ema/ema.py +++ b/composer/algorithms/ema/ema.py @@ -13,7 +13,8 @@ import torch import composer.utils.misc as misc -from composer.callbacks.checkpoint_saver import CheckpointSaver, CheckpointSaverCallback +from composer.callbacks.checkpoint_saver import CheckpointSaver +from composer.callbacks.checkpoint_saver_v2 import CheckpointSaverCallback from composer.core import Algorithm, Event, State, Time, TimeUnit from composer.loggers import Logger From e6884fc7dc13db94893c06a480b4afa63785be3f Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Thu, 13 Jun 2024 00:16:27 +0000 Subject: [PATCH 15/57] a --- composer/algorithms/ema/ema.py | 3 +- composer/callbacks/__init__.py | 2 - composer/callbacks/checkpoint_saver.py | 216 +++--- composer/callbacks/checkpoint_saver_v2.py | 637 ------------------ composer/trainer/trainer.py | 11 +- tests/algorithms/test_algorithm_resumption.py | 4 +- tests/algorithms/test_required_on_load.py | 4 +- tests/trainer/test_checkpoint.py | 27 +- 8 files changed, 141 insertions(+), 763 deletions(-) delete mode 100644 composer/callbacks/checkpoint_saver_v2.py diff --git a/composer/algorithms/ema/ema.py b/composer/algorithms/ema/ema.py index 9220b5411f..dc917c3661 100644 --- a/composer/algorithms/ema/ema.py +++ b/composer/algorithms/ema/ema.py @@ -14,7 +14,6 @@ import composer.utils.misc as misc from composer.callbacks.checkpoint_saver import CheckpointSaver -from composer.callbacks.checkpoint_saver_v2 import CheckpointSaverCallback from composer.core import Algorithm, Event, State, Time, TimeUnit from composer.loggers import Logger @@ -248,7 +247,7 @@ def match(self, event: Event, state: State) -> bool: # Match on checkpointing events if a checkpoint is to be saved if event in [Event.BATCH_CHECKPOINT, Event.EPOCH_CHECKPOINT] and self.ema_started: - checkpoint_savers = [cb for cb in state.callbacks if isinstance(cb, CheckpointSaver) or isinstance(cb, CheckpointSaverCallback)] + checkpoint_savers = [cb for cb in state.callbacks if isinstance(cb, CheckpointSaver)] for checkpoint_saver in checkpoint_savers: assert callable(checkpoint_saver.save_interval) if checkpoint_saver.save_interval(state, event) is True: diff --git a/composer/callbacks/__init__.py b/composer/callbacks/__init__.py index c536054740..b876826e3c 100644 --- a/composer/callbacks/__init__.py +++ b/composer/callbacks/__init__.py @@ -8,7 +8,6 @@ """ from composer.callbacks.activation_monitor import ActivationMonitor from composer.callbacks.checkpoint_saver import CheckpointSaver -from composer.callbacks.checkpoint_saver_v2 import CheckpointSaverCallback from composer.callbacks.early_stopper import EarlyStopper from composer.callbacks.eval_output_logging_callback import EvalOutputLogging from composer.callbacks.export_for_inference import ExportForInferenceCallback @@ -47,5 +46,4 @@ 'FreeOutputs', 'MemorySnapshot', 'OOMObserver', - 'CheckpointSaverCallback', ] diff --git a/composer/callbacks/checkpoint_saver.py b/composer/callbacks/checkpoint_saver.py index 263558fc2b..a20be81cd2 100644 --- a/composer/callbacks/checkpoint_saver.py +++ b/composer/callbacks/checkpoint_saver.py @@ -11,8 +11,9 @@ import shutil import tempfile import textwrap +from concurrent.futures import Future from pathlib import Path -from typing import Any, Callable, Optional, Union +from typing import Any, Callable, List, Optional, Tuple, Union from composer.core import Callback, Event, State, Time, Timestamp from composer.loggers import Logger, MLFlowLogger @@ -20,6 +21,7 @@ FORMAT_NAME_WITH_DIST_AND_TIME_TABLE, FORMAT_NAME_WITH_DIST_TABLE, PartialFilePath, + RemoteUploader, checkpoint, create_interval_scheduler, create_symlink_file, @@ -28,6 +30,7 @@ format_name_with_dist, format_name_with_dist_and_time, is_model_deepspeed, + parse_uri, partial_format, ) from composer.utils.compression import get_compressor, is_compressed_pt @@ -42,23 +45,16 @@ class CheckpointSaver(Callback): # noqa: D101 __doc__ = f"""Callback to save checkpoints. - .. note:: - If the ``folder`` argument is specified when constructing the :class:`.Trainer`, then the :class:`.CheckpointSaver` callback need not be constructed manually. However, for advanced checkpointing use cases (such as saving a weights-only checkpoint at one interval and the full training state at another interval), instance(s) of this :class:`.CheckpointSaver` callback can be specified in the ``callbacks`` argument of the :class:`.Trainer`, as shown in the example below. - Example - .. testsetup:: - from composer.callbacks.checkpoint_saver import CheckpointSaver - .. doctest:: - >>> trainer = Trainer(..., callbacks=[ ... CheckpointSaver( ... folder='{{run_name}}/checkpoints', @@ -68,207 +64,142 @@ class CheckpointSaver(Callback): # noqa: D101 ... weights_only=False, ... ) ... ]) - Args: folder (str, optional): Format string for the save_folder where checkpoints will be saved. Default: ``'{{run_name}}/checkpoints'``. - The following format variables are available: - {textwrap.indent(FORMAT_NAME_WITH_DIST_TABLE, prefix=' ')} - .. note:: - When training with multiple devices (i.e. GPUs), ensure that ``'{{rank}}'`` appears in the format. Otherwise, multiple processes may attempt to write to the same file. - filename (str, optional): A format string describing how to name checkpoints. Default: ``'ep{{epoch}}-ba{{batch}}-rank{{rank}}.pt'``. - Checkpoints will be saved approximately to ``{{folder}}/{{filename.format(...)}}``. - The following format variables are available: - {textwrap.indent(FORMAT_NAME_WITH_DIST_AND_TIME_TABLE, prefix=' ')} - - .. note:: - * By default, only the rank zero process will save a checkpoint file. - * When using DeepSpeed, each rank will save a checkpoint file in tarball format. DeepSpeed requires tarball format, as it saves model and optimizer states in separate files. Ensure that ``'{{rank}}'`` appears within the ``filename``. Otherwise, multiple ranks may attempt to write to the same file(s), leading to corrupted checkpoints. If no tarball file extension is specified, ``'.tar'`` will be used. - * To write to compressed tar files (regardless of whether DeepSpeed is enabled), set the file extension to ``'.tar.gz'``, ``'.tgz'``, ``'.tar.bz2'``, or ``'.tar.lzma'`` (depending on the desired compression algorithm). - * To write to compressed pt files (when DeepSpeed is disabled), set the file extension to ``'.pt.bz2'``, ``'.pt.gz'``, ``'.pt.lz4'``, ``'.pt.lzma'``, ``'.pt.lzo'``, ``'.pt.xz'``, ``'.pt.zst'`` (depending on the desired algorithm). You must have the corresponding CLI tool installed. ``lz4`` is a good choice for a modest space saving while being very fast to compress. - .. warning:: - Using compression will block the training loop while checkpoints are being compressed and the compressibility of checkpoints can vary significantly depending on your setup. As such, we recommend saving checkpoints without compression by default. - If you have the ``lz4`` command available on your system, you may want to try saving as ``.pt.lz4`` as the overhead is minimal (usually less than a second) and the saved space can sometimes be significant (1% - 40%). - Consider the following scenario where: - * The :attr:`~.State.run_name` is ``'awesome-training-run'`` * The default ``folder='{{run_name}}/checkpoints'`` is used. * The default ``name='ep{{epoch}}-ba{{batch}}-rank{{rank}}'`` is used. * The current epoch count is ``1``. * The current batch count is ``42``. - When DeepSpeed is not being used, the rank zero process will save the checkpoint to ``"awesome-training-run/checkpoints/ep1-ba42-rank0"``. - When DeepSpeed is being used, each rank (process) will save checkpoints to:: - awesome-training-run/checkpoints/ep1-ba42-rank0.tar awesome-training-run/checkpoints/ep1-ba42-rank1.tar awesome-training-run/checkpoints/ep1-ba42-rank2.tar ... - remote_file_name (str, optional): Format string for the checkpoint's remote file name. Default: ``"{{run_name}}/checkpoints/ep{{epoch}}-ba{{batch}}-rank{{rank}}"``. - After the checkpoint is saved, it will be periodically uploaded. The remote file name will be determined by this format string. - .. seealso:: :doc:`Uploading Files` for notes for file uploading. - The same format variables for ``filename`` are available. - Leading slashes (``'/'``) will be stripped. - To disable uploading checkpoints, set this parameter to ``None``. latest_filename (str, optional): A format string for a symlink which points to the last saved checkpoint. Default: ``'latest-rank{{rank}}.pt'``. - Symlinks will be created approximately at ``{{folder}}/{{latest_filename.format(...)}}``. - The same format variables as for ``name`` are available. - To disable symlinks, set this parameter to ``None``. - Consider the following scenario, where: - * The :attr:`~.State.run_name` is 'awesome-training-run' * The default ``folder='{{run_name}}/checkpoints'`` is used. * The default ``name='ep{{epoch}}-ba{{batch}}-rank{{rank}}'`` is used. * The default ``latest_filename='latest-rank{{rank}}'`` is used. * The current epoch count is ``1``. * The current batch count is ``42``. - When DeepSpeed is not being used, the rank zero process will save the checkpoint to ``'awesome-training-run/checkpoints/ep1-ba42-rank0'``, and a symlink will be created at ``'awesome-training-run/checkpoints/latest-rank0' -> 'awesome-training-run/checkpoints/ep1-ba42-rank0'`` - When DeepSpeed is being used, each rank (process) will save checkpoints to:: - awesome-training-run/checkpoints/ep1-ba42-rank0.tar awesome-training-run/checkpoints/ep1-ba42-rank1.tar awesome-training-run/checkpoints/ep1-ba42-rank2.tar ... - Corresponding symlinks will be created at:: - awesome-training-run/checkpoints/latest-rank0.tar -> awesome-training-run/checkpoints/ep1-ba42-rank0.tar awesome-training-run/checkpoints/latest-rank1.tar -> awesome-training-run/checkpoints/ep1-ba42-rank1.tar awesome-training-run/checkpoints/latest-rank2.tar -> awesome-training-run/checkpoints/ep1-ba42-rank2.tar ... latest_remote_file_name (str, optional): Format string for the checkpoint's latest symlink remote file name. Default: ``'{{run_name}}/checkpoints/latest-rank{{rank}}"``. - Whenever a new checkpoint is saved, a symlink is created or updated to point to the latest checkpoint's ``remote_file_name``. The remote file name will be determined by this format string. This parameter has no effect if ``latest_filename`` or ``remote_file_name`` is ``None``. - .. seealso:: :doc:`Uploading Files` for notes for file uploading. - The same format variables for ``filename`` are available. - Leading slashes (``'/'``) will be stripped. - To disable symlinks in logger, set this parameter to ``None``. - overwrite (bool, optional): Whether existing checkpoints should be overridden. If ``False`` (the default), then the ``folder`` must not exist or must not contain checkpoints which may conflict with the current run. Default: ``False``. - save_interval (Time | str | int | (State, Event) -> bool): A :class:`.Time`, time-string, integer (in epochs), or a function that takes (state, event) and returns a boolean whether a checkpoint should be saved. - If an integer, checkpoints will be saved every n epochs. If :class:`.Time` or a time-string, checkpoints will be saved according to this interval. - .. seealso:: :func:`.checkpoint_periodically` - If a function, then this function should take two arguments (:class:`.State`, :class:`.Event`). The first argument will be the current state of the trainer, and the second argument will be be :attr:`.Event.BATCH_CHECKPOINT` or :attr:`.Event.EPOCH_CHECKPOINT` (depending on the current training progress). It should return ``True`` if a checkpoint should be saved given the current state and event. - num_checkpoints_to_keep (int, optional): The number of checkpoints to keep locally. The oldest checkpoints are removed first. Set to ``-1`` to keep all checkpoints locally. Default: ``-1``. - Checkpoints will be removed after they have been uploaded. For example, when this callback is used in conjunction with the :class:`.RemoteUploaderDownloader`, set this parameter to ``0`` to immediately delete checkpoints from the local disk after they have been uploaded to the object store. - This parameter only controls how many checkpoints are kept locally; checkpoints are not deleted from remote file systems. - weights_only (bool): If ``True``, save only the model weights instead of the entire training state. This parameter must be ``False`` when using DeepSpeed. Default: ``False``. - ignore_keys (list[str] | (dict) -> None, optional): A list of paths for the ``state_dict`` of the checkpoint, which, when provided, will be ignored from the state_dict before a checkpoint is saved. Each path is a list of strings specifying the keys to index into ``state_dict`` joined together with `/` as a separator (as PyTorch uses `.` in parameter names). If a prefix is provided, all children are also ignored (see Example 2). See :mod:`composer.core.state` for the structure of state_dict. - Example 1: ``save_ignore_keys = ["state/model/layer1.weights", "state/model/layer1.bias"]`` would ignore layer 1 weights and bias. - Example 2: ``save_ignore_keys = ["state/model/*"]`` would ignore the entire model, which would have the same effect as the previous example if there was only 1 layer. - Example 3: ``save_ignore_keys = ["state/model/layer*.weights"]`` would ignore all weights in the model. - Example 4: ``save_ignore_keys = ["state/rank_zero_seed", "rng"]`` would reset all randomness when saving the checkpoint. - If a callable, it should take one argument which is the state_dict. The callable is free to arbitrarily modify the state_dict before it is loaded. - (default: ``None``) - Attributes: saved_checkpoints (list[tuple[Timestamp, list[pathlib.Path]]]): The checkpoint timestamps and filepaths. - This list contains tuples of the save timestamp and the checkpoint filepaths. This list will have at most ``num_checkpoints_to_keep`` entries. The latest checkpoint will be at the end. - .. note:: - When using DeepSpeed, the index of a filepath in each list corresponds to the global rank of the process that wrote that file. Each filepath is valid only on the process's (rank's) node. - Otherwise, when not using DeepSpeed, each sub-list will contain only one filepath since only rank zero saves checkpoints. """ @@ -288,6 +219,8 @@ def __init__( num_checkpoints_to_keep: int = -1, weights_only: bool = False, ignore_keys: Optional[Union[list[str], Callable[[dict], None]]] = None, + save_folder: str = '', + num_concurrent_uploads: int = 2, ): folder = str(folder) filename = str(filename) @@ -321,6 +254,31 @@ def __init__( self.start_batch = None + self.remote_uploader = None + backend, _, _ = parse_uri(save_folder) + self.remote_uploader_futures: List[List[Future]] = [] + self.symlink_file_tasks: List[Tuple(str, str)] = [] + self.this_rank_saves_remote_symlinks: bool = False + self.tmp_dir_for_symlink = tempfile.TemporaryDirectory() + self.num_concurrent_uploads = num_concurrent_uploads + + if backend != '': + if backend == 'wandb': + raise NotImplementedError( + f'There is no implementation for WandB via URI. Please use ' + 'WandBLogger with log_artifacts set to True', + ) + elif backend not in ['s3', 'oci', 'gs', 'azure', 'dbfs']: + raise NotImplementedError( + f'There is no implementation for the cloud backend {backend} via URI. Please use ' + 'one of the supported RemoteUploaderDownloader object stores', + ) + self.remote_uploader = RemoteUploader( + remote_folder=save_folder, + num_concurrent_uploads=self.num_concurrent_uploads, + ) + self.count = 0 + def init(self, state: State, logger: Logger) -> None: # If MLFlowLogger is being used, format MLFlow-specific placeholders in the save folder and paths. # Assumes that MLFlowLogger comes before CheckpointSaver in the list of loggers. @@ -347,9 +305,10 @@ def init(self, state: State, logger: Logger) -> None: self.latest_remote_file_name.filename, **mlflow_format_kwargs, ) - break + if self.remote_uploader is not None: + self.remote_uploader.init() folder = format_name_with_dist(self.folder, state.run_name) os.makedirs(folder, exist_ok=True) @@ -411,7 +370,7 @@ def load_state_dict(self, state: dict[str, Any]): load_timestamp.load_state_dict(timestamp_state) self.all_saved_checkpoints_to_timestamp[save_filename] = load_timestamp - def _save_checkpoint(self, state: State, logger: Logger): + def _save_checkpoint(self, state: State, logger: Logger, wait_previous_remote_upload_tasks: bool = True): self.last_checkpoint_batch = state.timestamp.batch is_deepspeed = is_model_deepspeed(state.model) @@ -433,6 +392,12 @@ def _save_checkpoint(self, state: State, logger: Logger): ) log.debug(f'Checkpoint locally saved to {saved_path}') + # Wait the previous upload tasks on all ranks + # self.wait() has dist.barrier, so it needs to be called + # on all ranks before any early return + if wait_previous_remote_upload_tasks and self.count / self.num_concurrent_uploads == 0: + self.wait() + if not saved_path: # not all ranks save return @@ -461,7 +426,10 @@ def _save_checkpoint(self, state: State, logger: Logger): os.symlink(os.path.relpath(src_path, os.path.dirname(symlink)), symlink) # if remote file name provided, upload the checkpoint - if self.remote_file_name is not None: + #if self.remote_file_name is not None: + if self.remote_uploader is not None: + + futures: List[Future] = [] if state.fsdp_sharded_state_dict_enabled: remote_file_name = self.remote_file_name.format( state, @@ -483,10 +451,12 @@ def _save_checkpoint(self, state: State, logger: Logger): state.timestamp, ) assert metadata_local_file_path is not None - logger.upload_file( - remote_file_name=metadata_remote_file_name, - file_path=metadata_local_file_path, - overwrite=self.overwrite, + futures.append( + self.remote_uploader.upload_file_async( + remote_file_name=metadata_remote_file_name, + file_path=metadata_local_file_path, + overwrite=self.overwrite, + ), ) else: remote_file_name = self.remote_file_name.format( @@ -496,12 +466,20 @@ def _save_checkpoint(self, state: State, logger: Logger): log.debug(f'Uploading checkpoint to {remote_file_name}') try: - logger.upload_file(remote_file_name=remote_file_name, file_path=saved_path, overwrite=self.overwrite) + futures.append( + self.remote_uploader.upload_file_async( + remote_file_name=remote_file_name, + file_path=saved_path, + overwrite=self.overwrite, + ), + ) except FileExistsError as e: raise FileExistsError( f'Uploading checkpoint failed with error: {e}. overwrite was set to {self.overwrite}. To overwrite checkpoints with Trainer, set save_overwrite to True.', ) from e + self.remote_uploader_futures.append(futures) + # symlinks stay the same with sharded checkpointing if self.latest_remote_file_name is not None: symlink_name = self.latest_remote_file_name.format( @@ -510,30 +488,54 @@ def _save_checkpoint(self, state: State, logger: Logger): ).lstrip('/') + '.symlink' # create and upload a symlink file - with tempfile.TemporaryDirectory() as tmpdir: - symlink_filename = os.path.join(tmpdir, 'latest.symlink') - # Sharded checkpoints for torch >2.0 use directories not files for load_paths - if state.fsdp_sharded_state_dict_enabled: - src_path = str(pathlib.Path(remote_file_name).parent) - else: - src_path = remote_file_name - log.debug(f'Creating symlink file {symlink_filename} -> {src_path}') - this_rank_saves_symlinks = dist.get_global_rank() == 0 or not state.fsdp_sharded_state_dict_enabled - if this_rank_saves_symlinks: - create_symlink_file(src_path, symlink_filename) - logger.upload_file( - remote_file_name=symlink_name, - file_path=symlink_filename, - overwrite=True, - ) + symlink_filename = os.path.join( + self.tmp_dir_for_symlink.name, f'latest.{self.count}.symlink' + ) + # Sharded checkpoints for torch >2.0 use directories not files for load_paths + if state.fsdp_sharded_state_dict_enabled: + src_path = str(pathlib.Path(remote_file_name).parent) + else: + src_path = remote_file_name + log.debug(f'Creating symlink file {symlink_filename} -> {src_path}') + this_rank_saves_symlinks = dist.get_global_rank() == 0 or not state.fsdp_sharded_state_dict_enabled + if this_rank_saves_symlinks: + self.this_rank_saves_remote_symlinks = True + create_symlink_file(src_path, symlink_filename) + self.symlink_file_tasks.append((symlink_filename, symlink_name)) self.saved_checkpoints.append(saved_path) + self.count += 1 if self.num_checkpoints_to_keep >= 0: self._rotate_checkpoints(sharding_enabled=state.fsdp_sharded_state_dict_enabled) - def _rotate_checkpoints(self, sharding_enabled: bool = False): + def wait(self) -> None: + # Wait remote uploader futures and start to upload the latest symlink file if necessary + if self.this_rank_saves_remote_symlinks: + if len(self.remote_uploader_futures) != len(self.symlink_file_tasks): + raise RuntimeError( + f'Expect len(remote_uploader_futures) == len(symlink_file_tasks), but got {len(self.remote_uploader_futures)} != {len(self.symlink_file_tasks)}' + ) + log.debug('Waiting for previous checkpoint files upload finish') + for i in range(len(self.remote_uploader_futures)): + for future in self.remote_uploader_futures[i]: + future.result() + log.debug(f'Current rank finished existing uploading tasks') + self.remote_uploader_futures = [] + + dist.barrier() + log.debug('All ranks finished existing checkpoint uploading tasks, starting symlink file upload if necessary') + if self.this_rank_saves_remote_symlinks and len(self.symlink_file_tasks) > 0: + # Only upload the last symlink file + symlink_local_filename, symlink_remote_filename = self.symlink_file_tasks[-1] + self.remote_uploader.upload_file_async( + remote_file_name=symlink_remote_filename, + file_path=symlink_local_filename, + overwrite=True, + ) + self.symlink_file_tasks = [] + def _rotate_checkpoints(self, sharding_enabled: bool = False): while len(self.saved_checkpoints) > self.num_checkpoints_to_keep: prefix_dir = None checkpoint_to_delete = self.saved_checkpoints.pop(0) @@ -543,3 +545,21 @@ def _rotate_checkpoints(self, sharding_enabled: bool = False): else: if dist.get_global_rank() == 0: shutil.rmtree(prefix_dir) + + def batch_end(self, state: State, logger: Logger) -> None: + del state, logger # unused + if self.remote_uploader is not None: + self.remote_uploader.check_workers() + + def fit_end(self, state: State, logger: Logger) -> None: + del state, logger # unused + if self.remote_uploader is not None: + self.wait() + self.remote_uploader.wait() + + def post_close(self): + if self.remote_uploader is not None: + # Wait the uploading tasks to finish and start symlink file uploading + self.wait() + # Wait the symlink file upload to finish and close remote uploader + self.remote_uploader.wait_and_close() diff --git a/composer/callbacks/checkpoint_saver_v2.py b/composer/callbacks/checkpoint_saver_v2.py deleted file mode 100644 index 8e1607366a..0000000000 --- a/composer/callbacks/checkpoint_saver_v2.py +++ /dev/null @@ -1,637 +0,0 @@ -# Copyright 2022 MosaicML Composer authors -# SPDX-License-Identifier: Apache-2.0 - -"""Callback to save checkpoints during training.""" - -from __future__ import annotations - -import logging -import os -import pathlib -import shutil -import tempfile -import textwrap -from concurrent.futures import Future -from pathlib import Path -from typing import Any, Callable, List, Optional, Tuple, Union - -from composer.core import Callback, Event, State, Time, Timestamp -from composer.loggers import Logger, MLFlowLogger -from composer.utils import ( - FORMAT_NAME_WITH_DIST_AND_TIME_TABLE, - FORMAT_NAME_WITH_DIST_TABLE, - PartialFilePath, - RemoteUploader, - checkpoint, - create_interval_scheduler, - create_symlink_file, - dist, - ensure_folder_has_no_conflicting_files, - format_name_with_dist, - format_name_with_dist_and_time, - is_model_deepspeed, - parse_uri, - partial_format, -) -from composer.utils.compression import get_compressor, is_compressed_pt -from composer.utils.object_store.mlflow_object_store import MLFLOW_EXPERIMENT_ID_FORMAT_KEY, MLFLOW_RUN_ID_FORMAT_KEY - -log = logging.getLogger(__name__) - -__all__ = ['CheckpointSaverCallback'] - -_TORCH_DISTRIBUTED_CHECKPOINTS_METADATA_FILENAME = '.metadata' - - -class CheckpointSaverCallback(Callback): # noqa: D101 - __doc__ = f"""Callback to save checkpoints. - - .. note:: - - If the ``folder`` argument is specified when constructing the :class:`.Trainer`, then the :class:`.CheckpointSaver` - callback need not be constructed manually. However, for advanced checkpointing use cases - (such as saving a weights-only checkpoint at one interval and the full training state - at another interval), instance(s) of this :class:`.CheckpointSaver` callback can be specified in the - ``callbacks`` argument of the :class:`.Trainer`, as shown in the example below. - - Example - - .. testsetup:: - - from composer.callbacks.checkpoint_saver import CheckpointSaver - - .. doctest:: - - >>> trainer = Trainer(..., callbacks=[ - ... CheckpointSaver( - ... folder='{{run_name}}/checkpoints', - ... filename="ep{{epoch}}-ba{{batch}}-rank{{rank}}", - ... latest_filename="latest-rank{{rank}}", - ... save_interval="1ep", - ... weights_only=False, - ... ) - ... ]) - - Args: - folder (str, optional): Format string for the save_folder where checkpoints will be saved. - Default: ``'{{run_name}}/checkpoints'``. - - The following format variables are available: - - {textwrap.indent(FORMAT_NAME_WITH_DIST_TABLE, prefix=' ')} - - .. note:: - - When training with multiple devices (i.e. GPUs), ensure that ``'{{rank}}'`` appears in the format. - Otherwise, multiple processes may attempt to write to the same file. - - filename (str, optional): A format string describing how to name checkpoints. - Default: ``'ep{{epoch}}-ba{{batch}}-rank{{rank}}.pt'``. - - Checkpoints will be saved approximately to ``{{folder}}/{{filename.format(...)}}``. - - The following format variables are available: - - {textwrap.indent(FORMAT_NAME_WITH_DIST_AND_TIME_TABLE, prefix=' ')} - - - .. note:: - - * By default, only the rank zero process will save a checkpoint file. - - * When using DeepSpeed, each rank will save a checkpoint file in tarball format. DeepSpeed - requires tarball format, as it saves model and optimizer states in separate files. - Ensure that ``'{{rank}}'`` appears within the ``filename``. Otherwise, multiple ranks - may attempt to write to the same file(s), leading to corrupted checkpoints. If no tarball file - extension is specified, ``'.tar'`` will be used. - - * To write to compressed tar files (regardless of whether DeepSpeed is enabled), set the file - extension to ``'.tar.gz'``, ``'.tgz'``, ``'.tar.bz2'``, or ``'.tar.lzma'`` (depending on the - desired compression algorithm). - - * To write to compressed pt files (when DeepSpeed is disabled), set the file extension to - ``'.pt.bz2'``, ``'.pt.gz'``, ``'.pt.lz4'``, ``'.pt.lzma'``, ``'.pt.lzo'``, ``'.pt.xz'``, - ``'.pt.zst'`` - (depending on the desired algorithm). You must have the corresponding CLI tool installed. - ``lz4`` is a good choice for a modest space saving while being very fast to compress. - - .. warning:: - - Using compression will block the training loop while checkpoints are being compressed and the - compressibility of checkpoints can vary significantly depending on your setup. As such, we - recommend saving checkpoints without compression by default. - - If you have the ``lz4`` command available on your system, you may want to try saving as ``.pt.lz4`` - as the overhead is minimal (usually less than a second) and the saved space can sometimes - be significant (1% - 40%). - - Consider the following scenario where: - - * The :attr:`~.State.run_name` is ``'awesome-training-run'`` - * The default ``folder='{{run_name}}/checkpoints'`` is used. - * The default ``name='ep{{epoch}}-ba{{batch}}-rank{{rank}}'`` is used. - * The current epoch count is ``1``. - * The current batch count is ``42``. - - When DeepSpeed is not being used, the rank zero process will save the checkpoint to - ``"awesome-training-run/checkpoints/ep1-ba42-rank0"``. - - When DeepSpeed is being used, each rank (process) will save checkpoints to:: - - awesome-training-run/checkpoints/ep1-ba42-rank0.tar - awesome-training-run/checkpoints/ep1-ba42-rank1.tar - awesome-training-run/checkpoints/ep1-ba42-rank2.tar - ... - - remote_file_name (str, optional): Format string for the checkpoint's remote file name. - Default: ``"{{run_name}}/checkpoints/ep{{epoch}}-ba{{batch}}-rank{{rank}}"``. - - After the checkpoint is saved, it will be periodically uploaded. - The remote file name will be determined by this format string. - - .. seealso:: :doc:`Uploading Files` for notes for file uploading. - - The same format variables for ``filename`` are available. - - Leading slashes (``'/'``) will be stripped. - - To disable uploading checkpoints, set this parameter to ``None``. - latest_filename (str, optional): A format string for a symlink which points to the last saved checkpoint. - Default: ``'latest-rank{{rank}}.pt'``. - - Symlinks will be created approximately at ``{{folder}}/{{latest_filename.format(...)}}``. - - The same format variables as for ``name`` are available. - - To disable symlinks, set this parameter to ``None``. - - Consider the following scenario, where: - - * The :attr:`~.State.run_name` is 'awesome-training-run' - * The default ``folder='{{run_name}}/checkpoints'`` is used. - * The default ``name='ep{{epoch}}-ba{{batch}}-rank{{rank}}'`` is used. - * The default ``latest_filename='latest-rank{{rank}}'`` is used. - * The current epoch count is ``1``. - * The current batch count is ``42``. - - When DeepSpeed is not being used, the rank zero process will save the checkpoint to - ``'awesome-training-run/checkpoints/ep1-ba42-rank0'``, - and a symlink will be created at - ``'awesome-training-run/checkpoints/latest-rank0' -> 'awesome-training-run/checkpoints/ep1-ba42-rank0'`` - - When DeepSpeed is being used, each rank (process) will save checkpoints to:: - - awesome-training-run/checkpoints/ep1-ba42-rank0.tar - awesome-training-run/checkpoints/ep1-ba42-rank1.tar - awesome-training-run/checkpoints/ep1-ba42-rank2.tar - ... - - Corresponding symlinks will be created at:: - - awesome-training-run/checkpoints/latest-rank0.tar -> awesome-training-run/checkpoints/ep1-ba42-rank0.tar - awesome-training-run/checkpoints/latest-rank1.tar -> awesome-training-run/checkpoints/ep1-ba42-rank1.tar - awesome-training-run/checkpoints/latest-rank2.tar -> awesome-training-run/checkpoints/ep1-ba42-rank2.tar - ... - latest_remote_file_name (str, optional): Format string for the checkpoint's latest symlink remote file name. - Default: ``'{{run_name}}/checkpoints/latest-rank{{rank}}"``. - - Whenever a new checkpoint is saved, a symlink is created or updated to point to the latest checkpoint's ``remote_file_name``. - The remote file name will be determined by this format string. This parameter has no effect if ``latest_filename`` or ``remote_file_name`` is ``None``. - - .. seealso:: :doc:`Uploading Files` for notes for file uploading. - - The same format variables for ``filename`` are available. - - Leading slashes (``'/'``) will be stripped. - - To disable symlinks in logger, set this parameter to ``None``. - - overwrite (bool, optional): Whether existing checkpoints should be overridden. - If ``False`` (the default), then the ``folder`` must not exist or must not contain checkpoints which may conflict - with the current run. Default: ``False``. - - save_interval (Time | str | int | (State, Event) -> bool): A :class:`.Time`, time-string, integer (in epochs), - or a function that takes (state, event) and returns a boolean whether a checkpoint should be saved. - - If an integer, checkpoints will be saved every n epochs. - If :class:`.Time` or a time-string, checkpoints will be saved according to this interval. - - .. seealso:: :func:`.checkpoint_periodically` - - If a function, then this function should take two arguments (:class:`.State`, :class:`.Event`). - The first argument will be the current state of the trainer, and the second argument will be - be :attr:`.Event.BATCH_CHECKPOINT` or :attr:`.Event.EPOCH_CHECKPOINT` (depending on the current training - progress). It should return ``True`` if a checkpoint should be saved given the current state and - event. - - num_checkpoints_to_keep (int, optional): The number of checkpoints to keep locally. The oldest checkpoints - are removed first. Set to ``-1`` to keep all checkpoints locally. Default: ``-1``. - - Checkpoints will be removed after they have been uploaded. For example, when this callback - is used in conjunction with the :class:`.RemoteUploaderDownloader`, set this - parameter to ``0`` to immediately delete checkpoints from the local disk after they have been uploaded to - the object store. - - This parameter only controls how many checkpoints are kept locally; checkpoints are not deleted from - remote file systems. - - weights_only (bool): If ``True``, save only the model weights instead of the entire training state. - This parameter must be ``False`` when using DeepSpeed. Default: ``False``. - - ignore_keys (list[str] | (dict) -> None, optional): A list of paths for the ``state_dict`` of the checkpoint, - which, when provided, will be ignored from the state_dict before a checkpoint is saved. Each path is a list - of strings specifying the keys to index into ``state_dict`` joined together with `/` as a separator (as PyTorch - uses `.` in parameter names). If a prefix is provided, all children are also ignored (see Example 2). - See :mod:`composer.core.state` for the structure of state_dict. - - Example 1: ``save_ignore_keys = ["state/model/layer1.weights", "state/model/layer1.bias"]`` would ignore - layer 1 weights and bias. - - Example 2: ``save_ignore_keys = ["state/model/*"]`` would ignore the entire model, which would have the same - effect as the previous example if there was only 1 layer. - - Example 3: ``save_ignore_keys = ["state/model/layer*.weights"]`` would ignore all weights in the model. - - Example 4: ``save_ignore_keys = ["state/rank_zero_seed", "rng"]`` would reset all randomness when - saving the checkpoint. - - If a callable, it should take one argument which is the state_dict. The callable is free to arbitrarily modify - the state_dict before it is loaded. - - (default: ``None``) - - Attributes: - saved_checkpoints (list[tuple[Timestamp, list[pathlib.Path]]]): The checkpoint timestamps and filepaths. - - This list contains tuples of the save timestamp and the checkpoint filepaths. - This list will have at most ``num_checkpoints_to_keep`` entries. The latest checkpoint - will be at the end. - - .. note:: - - When using DeepSpeed, the index of a filepath in each list corresponds to the global rank of - the process that wrote that file. Each filepath is valid only on the process's (rank's) node. - - Otherwise, when not using DeepSpeed, each sub-list will contain only one filepath since only rank zero - saves checkpoints. - """ - - def __init__( - self, - folder: Union[str, pathlib.Path] = '{run_name}/checkpoints', - filename: Union[str, pathlib.Path] = 'ep{epoch}-ba{batch}-rank{rank}.pt', - remote_file_name: Optional[Union[str, pathlib.Path] - ] = ('{run_name}/checkpoints/' - 'ep{epoch}-ba{batch}-rank{rank}.pt'), - latest_filename: Optional[Union[str, pathlib.Path]] = 'latest-rank{rank}.pt', - latest_remote_file_name: Optional[Union[str, pathlib.Path]] = '{run_name}/checkpoints/latest-rank{rank}.pt', - save_interval: Union[Time, str, int, Callable[[State, Event], bool]] = '1ep', - *, - overwrite: bool = False, - num_checkpoints_to_keep: int = -1, - weights_only: bool = False, - ignore_keys: Optional[Union[list[str], Callable[[dict], None]]] = None, - save_folder: str = '', - num_concurrent_uploads: int = 2, - ): - folder = str(folder) - filename = str(filename) - remote_file_name = str(remote_file_name) if remote_file_name is not None else None - latest_filename = str(latest_filename) if latest_filename is not None else None - latest_remote_file_name = str(latest_remote_file_name) if latest_remote_file_name is not None else None - - # want to fail early if a required CLI tool is missing to ensure no training time is wasted - for name in [filename, remote_file_name, latest_filename, latest_remote_file_name]: - if name is not None and is_compressed_pt(name): - get_compressor(name).check_exists() - - if not callable(save_interval): - save_interval = create_interval_scheduler(save_interval) - self.save_interval = save_interval - self.last_checkpoint_batch: Optional[Time] = None - - self.folder = folder - - self.filename = PartialFilePath(filename.lstrip('/'), folder) - self.latest_filename = PartialFilePath(latest_filename.lstrip('/'), folder) if latest_filename else None - self.remote_file_name = PartialFilePath(remote_file_name) if remote_file_name else None - self.latest_remote_file_name = PartialFilePath(latest_remote_file_name) if latest_remote_file_name else None - - self.overwrite = overwrite - self.saved_checkpoints: list[str] = [] - self.all_saved_checkpoints_to_timestamp: dict[str, Timestamp] = {} - self.num_checkpoints_to_keep = num_checkpoints_to_keep - self.weights_only = weights_only - self.ignore_keys = ignore_keys - - self.start_batch = None - - self.remote_uploader = None - backend, _, _ = parse_uri(save_folder) - self.remote_uploader_futures: List[List[Future]] = [] - self.symlink_file_tasks: List[Tuple(str, str)] = [] - self.this_rank_saves_remote_symlinks: bool = False - self.tmp_dir_for_symlink = tempfile.TemporaryDirectory() - self.num_concurrent_uploads = num_concurrent_uploads - - if backend != '': - if backend == 'wandb': - raise NotImplementedError( - f'There is no implementation for WandB via URI. Please use ' - 'WandBLogger with log_artifacts set to True', - ) - elif backend not in ['s3', 'oci', 'gs', 'azure', 'dbfs']: - raise NotImplementedError( - f'There is no implementation for the cloud backend {backend} via URI. Please use ' - 'one of the supported RemoteUploaderDownloader object stores', - ) - self.remote_uploader = RemoteUploader( - remote_folder=save_folder, - num_concurrent_uploads=self.num_concurrent_uploads, - ) - self.count = 0 - - def init(self, state: State, logger: Logger) -> None: - # If MLFlowLogger is being used, format MLFlow-specific placeholders in the save folder and paths. - # Assumes that MLFlowLogger comes before CheckpointSaver in the list of loggers. - for destination in logger.destinations: - if isinstance(destination, MLFlowLogger): - mlflow_format_kwargs = { - MLFLOW_EXPERIMENT_ID_FORMAT_KEY: destination._experiment_id, - MLFLOW_RUN_ID_FORMAT_KEY: destination._run_id, - } - self.folder = partial_format(self.folder, **mlflow_format_kwargs) - - self.filename.folder = self.folder - if self.latest_filename is not None: - self.latest_filename.folder = self.folder - - # The remote paths have the placeholders in their filename rather than folder - if self.remote_file_name is not None: - self.remote_file_name.filename = partial_format( - self.remote_file_name.filename, - **mlflow_format_kwargs, - ) - if self.latest_remote_file_name is not None: - self.latest_remote_file_name.filename = partial_format( - self.latest_remote_file_name.filename, - **mlflow_format_kwargs, - ) - break - - if self.remote_uploader is not None: - self.remote_uploader.init() - folder = format_name_with_dist(self.folder, state.run_name) - os.makedirs(folder, exist_ok=True) - - def fit_start(self, state: State, logger: Logger) -> None: - if not self.overwrite: - # checks that save_folder contains no files with a timestamp after the current timestamp, - # which has potential for future conflicts. - folder = format_name_with_dist(self.folder, state.run_name) - ensure_folder_has_no_conflicting_files(folder, self.filename.filename, state.timestamp) - - dist.barrier() # holds all ranks until folder check is done - - if is_model_deepspeed(state.model) and self.weights_only: - raise NotImplementedError('weights_only=True is not supported when using DeepSpeed.') - - self.start_batch = state.timestamp.batch - - def batch_checkpoint(self, state: State, logger: Logger): - assert callable(self.save_interval) - if self.save_interval(state, Event.BATCH_CHECKPOINT) and self.last_checkpoint_batch != state.timestamp.batch: - self._save_checkpoint( - state, - logger, - ) - - def epoch_checkpoint(self, state: State, logger: Logger): - assert callable(self.save_interval) - if self.save_interval(state, Event.EPOCH_CHECKPOINT) and self.last_checkpoint_batch != state.timestamp.batch: - self._save_checkpoint( - state, - logger, - ) - - def iteration_checkpoint(self, state: State, logger: Logger): - assert callable(self.save_interval) - if ( - self.save_interval(state, Event.ITERATION_CHECKPOINT) and - self.last_checkpoint_batch != state.timestamp.batch - ): - self._save_checkpoint( - state, - logger, - ) - - def state_dict(self) -> dict[str, Any]: - state_dict = {} - - all_checkpoints = [] - for save_filename, timestamp in self.all_saved_checkpoints_to_timestamp.items(): - all_checkpoints.append((save_filename, timestamp.state_dict())) - - state_dict['all_saved_checkpoints_to_timestamp'] = all_checkpoints - return state_dict - - def load_state_dict(self, state: dict[str, Any]): - if 'all_saved_checkpoints_to_timestamp' in state: - for (save_filename, timestamp_state) in state['all_saved_checkpoints_to_timestamp']: - load_timestamp = Timestamp() - load_timestamp.load_state_dict(timestamp_state) - self.all_saved_checkpoints_to_timestamp[save_filename] = load_timestamp - - def _save_checkpoint(self, state: State, logger: Logger, wait_previous_remote_upload_tasks: bool = True): - self.last_checkpoint_batch = state.timestamp.batch - - is_deepspeed = is_model_deepspeed(state.model) - - if is_deepspeed and '{rank}' not in self.filename.filename: - raise ValueError(f'Save filename {self.filename.filename} must have {{rank}} for deepspeed.') - - # save the checkpoint to the filename - filename_with_placeholders = self.filename.format(state, is_deepspeed, keep_placeholders=True) - save_filename = checkpoint.get_save_filename(state, filename_with_placeholders) - # Store before saving so state_dict in checkpoint has reference to latest checkpoint (itself) - self.all_saved_checkpoints_to_timestamp[save_filename] = state.timestamp - - saved_path = checkpoint.save_checkpoint( - state=state, - filename=filename_with_placeholders, - weights_only=self.weights_only, - ignore_keys=self.ignore_keys, - ) - log.debug(f'Checkpoint locally saved to {saved_path}') - - # Wait the previous upload tasks on all ranks - # self.wait() has dist.barrier, so it needs to be called - # on all ranks before any early return - if wait_previous_remote_upload_tasks and self.count / self.num_concurrent_uploads == 0: - self.wait() - - if not saved_path: # not all ranks save - return - - metadata_local_file_path = None - if dist.get_global_rank() == 0 and state.fsdp_sharded_state_dict_enabled: - metadata_local_file_path = format_name_with_dist_and_time( - os.path.join(Path(saved_path).parent, _TORCH_DISTRIBUTED_CHECKPOINTS_METADATA_FILENAME), - state.run_name, - state.timestamp, - ) - - if self.latest_filename is not None and self.num_checkpoints_to_keep != 0: - symlink = self.latest_filename.format(state, is_deepspeed) - os.makedirs(os.path.dirname(symlink), exist_ok=True) - try: - os.remove(symlink) - except FileNotFoundError: - pass - # Sharded checkpoints for torch >2.0 use directories not files for load_paths - if state.fsdp_sharded_state_dict_enabled: - src_path = str(pathlib.Path(saved_path).parent) - else: - src_path = saved_path - this_rank_saves_symlinks = dist.get_global_rank() == 0 or not state.fsdp_sharded_state_dict_enabled - if this_rank_saves_symlinks: - os.symlink(os.path.relpath(src_path, os.path.dirname(symlink)), symlink) - - # if remote file name provided, upload the checkpoint - #if self.remote_file_name is not None: - if self.remote_uploader is not None: - - futures: List[Future] = [] - if state.fsdp_sharded_state_dict_enabled: - remote_file_name = self.remote_file_name.format( - state, - is_deepspeed, - keep_placeholders=True, - ).lstrip('/') - assert state.fsdp_config is not None - remote_prefix = state.fsdp_config.sharded_ckpt_prefix_dir - assert remote_prefix is not None - ckpt_filename = checkpoint._TORCH_DISTRIBUTED_CHECKPOINTS_FILENAME - remote_file_name = os.path.join(pathlib.Path(remote_file_name).parent, remote_prefix, ckpt_filename) - remote_file_name = format_name_with_dist_and_time(remote_file_name, state.run_name, state.timestamp) - # Upload metadata file. - # The metadata file contains info related to which shards are saved where. - if dist.get_global_rank() == 0 and state.fsdp_sharded_state_dict_enabled: - metadata_remote_file_name = format_name_with_dist_and_time( - os.path.join(Path(remote_file_name).parent, _TORCH_DISTRIBUTED_CHECKPOINTS_METADATA_FILENAME), - state.run_name, - state.timestamp, - ) - assert metadata_local_file_path is not None - futures.append( - self.remote_uploader.upload_file_async( - remote_file_name=metadata_remote_file_name, - file_path=metadata_local_file_path, - overwrite=self.overwrite, - ), - ) - else: - remote_file_name = self.remote_file_name.format( - state, - is_deepspeed, - ).lstrip('/') - - log.debug(f'Uploading checkpoint to {remote_file_name}') - try: - futures.append( - self.remote_uploader.upload_file_async( - remote_file_name=remote_file_name, - file_path=saved_path, - overwrite=self.overwrite, - ), - ) - except FileExistsError as e: - raise FileExistsError( - f'Uploading checkpoint failed with error: {e}. overwrite was set to {self.overwrite}. To overwrite checkpoints with Trainer, set save_overwrite to True.', - ) from e - - self.remote_uploader_futures.append(futures) - - # symlinks stay the same with sharded checkpointing - if self.latest_remote_file_name is not None: - symlink_name = self.latest_remote_file_name.format( - state, - is_deepspeed, - ).lstrip('/') + '.symlink' - - # create and upload a symlink file - symlink_filename = os.path.join( - self.tmp_dir_for_symlink.name, f'latest.{self.count}.symlink' - ) - # Sharded checkpoints for torch >2.0 use directories not files for load_paths - if state.fsdp_sharded_state_dict_enabled: - src_path = str(pathlib.Path(remote_file_name).parent) - else: - src_path = remote_file_name - log.debug(f'Creating symlink file {symlink_filename} -> {src_path}') - this_rank_saves_symlinks = dist.get_global_rank() == 0 or not state.fsdp_sharded_state_dict_enabled - if this_rank_saves_symlinks: - self.this_rank_saves_remote_symlinks = True - create_symlink_file(src_path, symlink_filename) - self.symlink_file_tasks.append((symlink_filename, symlink_name)) - - self.saved_checkpoints.append(saved_path) - self.count += 1 - - if self.num_checkpoints_to_keep >= 0: - self._rotate_checkpoints(sharding_enabled=state.fsdp_sharded_state_dict_enabled) - - def wait(self) -> None: - # Wait remote uploader futures and start to upload the latest symlink file if necessary - if self.this_rank_saves_remote_symlinks: - if len(self.remote_uploader_futures) != len(self.symlink_file_tasks): - raise RuntimeError( - f'Expect len(remote_uploader_futures) == len(symlink_file_tasks), but got {len(self.remote_uploader_futures)} != {len(self.symlink_file_tasks)}' - ) - log.debug('Waiting for previous checkpoint files upload finish') - for i in range(len(self.remote_uploader_futures)): - for future in self.remote_uploader_futures[i]: - future.result() - log.debug(f'Current rank finished existing uploading tasks') - self.remote_uploader_futures = [] - - dist.barrier() - log.debug('All ranks finished existing checkpoint uploading tasks, starting symlink file upload if necessary') - if self.this_rank_saves_remote_symlinks and len(self.symlink_file_tasks) > 0: - # Only upload the last symlink file - symlink_local_filename, symlink_remote_filename = self.symlink_file_tasks[-1] - self.remote_uploader.upload_file_async( - remote_file_name=symlink_remote_filename, - file_path=symlink_local_filename, - overwrite=True, - ) - self.symlink_file_tasks = [] - - def _rotate_checkpoints(self, sharding_enabled: bool = False): - while len(self.saved_checkpoints) > self.num_checkpoints_to_keep: - prefix_dir = None - checkpoint_to_delete = self.saved_checkpoints.pop(0) - prefix_dir = str(Path(checkpoint_to_delete).parent) - if not sharding_enabled: - os.remove(checkpoint_to_delete) - else: - if dist.get_global_rank() == 0: - shutil.rmtree(prefix_dir) - - def batch_end(self, state: State, logger: Logger) -> None: - del state, logger # unused - if self.remote_uploader is not None: - self.remote_uploader.check_workers() - - def fit_end(self, state: State, logger: Logger) -> None: - del state, logger # unused - if self.remote_uploader is not None: - self.wait() - self.remote_uploader.wait() - - def post_close(self): - if self.remote_uploader is not None: - # Wait the uploading tasks to finish and start symlink file uploading - self.wait() - # Wait the symlink file upload to finish and close remote uploader - self.remote_uploader.wait_and_close() diff --git a/composer/trainer/trainer.py b/composer/trainer/trainer.py index 4f739adccf..20f5c561ef 100644 --- a/composer/trainer/trainer.py +++ b/composer/trainer/trainer.py @@ -54,7 +54,7 @@ else: from torch.cuda.amp.grad_scaler import GradScaler, _refresh_per_optimizer_state # type: ignore -from composer.callbacks import CheckpointSaver, CheckpointSaverCallback, MemorySnapshot, OOMObserver, OptimizerMonitor +from composer.callbacks import CheckpointSaver, MemorySnapshot, OOMObserver, OptimizerMonitor from composer.core import ( Algorithm, AlgorithmPass, @@ -1409,10 +1409,7 @@ def __init__( self._checkpoint_saver = None latest_remote_file_name = None - _checkpoint_savers = [ - cb for cb in self.state.callbacks - if (isinstance(cb, CheckpointSaver) or isinstance(cb, CheckpointSaverCallback)) - ] + _checkpoint_savers = [cb for cb in self.state.callbacks if isinstance(cb, CheckpointSaver)] if len(_checkpoint_savers) >= 1: if len(_checkpoint_savers) > 1: log.info('Multiple CheckpointSaver provided as callbacks. Using the first one as reference.') @@ -1459,7 +1456,7 @@ def __init__( latest_remote_file_name = None log.info(f'bigning debug useing the new saver') - self._checkpoint_saver = CheckpointSaverCallback( + self._checkpoint_saver = CheckpointSaver( folder=folder, filename=save_filename, remote_file_name=remote_file_name, @@ -1813,8 +1810,10 @@ def __init__( log.info('No previous autoresume checkpoint found') # Actually load the checkpoint from potentially updated arguments if load_path is not None: + log.debug(f"bigning debug before: {load_object_store=}, {load_path=}") if load_object_store is None: load_object_store = maybe_create_object_store_from_uri(load_path) + log.debug(f"bigning debug after: {load_object_store=}, {load_path=}") if isinstance(load_object_store, WandBLogger): import wandb if wandb.run is None: diff --git a/tests/algorithms/test_algorithm_resumption.py b/tests/algorithms/test_algorithm_resumption.py index b59c444c02..9e8c51e3c9 100644 --- a/tests/algorithms/test_algorithm_resumption.py +++ b/tests/algorithms/test_algorithm_resumption.py @@ -142,8 +142,8 @@ def _assert_checkpoints_equal(file1, file2): del checkpoint2['state']['run_name'] # Remove all saved checkpoints to timestamp (accumulates between runs) - del checkpoint1['state']['callbacks']['CheckpointSaverCallback']['all_saved_checkpoints_to_timestamp'] - del checkpoint2['state']['callbacks']['CheckpointSaverCallback']['all_saved_checkpoints_to_timestamp'] + del checkpoint1['state']['callbacks']['CheckpointSaver']['all_saved_checkpoints_to_timestamp'] + del checkpoint2['state']['callbacks']['CheckpointSaver']['all_saved_checkpoints_to_timestamp'] # Remove algorithm representations which are memory addresses for i, algo_info in enumerate(checkpoint1['state']['algorithms']): diff --git a/tests/algorithms/test_required_on_load.py b/tests/algorithms/test_required_on_load.py index 8fe56ebac1..47ced249db 100644 --- a/tests/algorithms/test_required_on_load.py +++ b/tests/algorithms/test_required_on_load.py @@ -12,7 +12,7 @@ from packaging import version from composer import Trainer, algorithms -from composer.callbacks import CheckpointSaver, CheckpointSaverCallback +from composer.callbacks import CheckpointSaver from composer.core import Algorithm, Event, Time, TimeUnit # type: ignore imports used in `eval(representation)` from composer.models import ComposerClassifier, ComposerModel from composer.utils import dist @@ -165,7 +165,7 @@ def test_autoload( save_folder=str(tmp_path), save_filename='ckpt.pt', ) - checkpoint_saver = [cb for cb in trainer1.state.callbacks if isinstance(cb, CheckpointSaver) or isinstance(cb, CheckpointSaverCallback)][0] + checkpoint_saver = [cb for cb in trainer1.state.callbacks if isinstance(cb, CheckpointSaver)][0] checkpoint_saver._save_checkpoint(trainer1.state, trainer1.logger) context = contextlib.nullcontext() diff --git a/tests/trainer/test_checkpoint.py b/tests/trainer/test_checkpoint.py index d8a865f647..8097bcdd53 100644 --- a/tests/trainer/test_checkpoint.py +++ b/tests/trainer/test_checkpoint.py @@ -24,7 +24,7 @@ from torch.utils.data import DataLoader, Dataset, DistributedSampler from composer.algorithms import NoOpModel -from composer.callbacks import CheckpointSaver, CheckpointSaverCallback +from composer.callbacks import CheckpointSaver from composer.core import Callback, Time, TimeUnit from composer.loggers import RemoteUploaderDownloader, remote_uploader_downloader from composer.metrics import MAP @@ -115,8 +115,8 @@ def _assert_checkpoints_equivalent(file1, file2, atol=0.0, rtol=0.0): del ckpt['state']['callbacks']['DummyStatefulCallback'] # Remove all saved checkpoints to timestamp (accumulates between runs) - del checkpoint_1['state']['callbacks']['CheckpointSaverCallback']['all_saved_checkpoints_to_timestamp'] - del checkpoint_2['state']['callbacks']['CheckpointSaverCallback']['all_saved_checkpoints_to_timestamp'] + del checkpoint_1['state']['callbacks']['CheckpointSaver']['all_saved_checkpoints_to_timestamp'] + del checkpoint_2['state']['callbacks']['CheckpointSaver']['all_saved_checkpoints_to_timestamp'] deep_compare(checkpoint_1, checkpoint_2, atol=atol, rtol=rtol) @@ -226,7 +226,7 @@ def test_ignore_params(remove_field_paths: list[list[str]], filter_params: list[ ], ) def test_checkpoint_saver_folder_filename_path(folder: Union[str, pathlib.Path], filename: Union[str, pathlib.Path]): - checkpoint_saver = CheckpointSaverCallback(folder=folder, filename=filename) + checkpoint_saver = CheckpointSaver(folder=folder, filename=filename) assert checkpoint_saver.folder == str(folder) assert checkpoint_saver.filename.filename == str(filename) @@ -237,7 +237,7 @@ def test_checkpoint_invalid_compressor(monkeypatch: pytest.MonkeyPatch): CompressorNotFound, match=re.escape('Could not find compressor for "foo.pt.unknown_compressor".'), ): - CheckpointSaverCallback(filename='foo.pt.unknown_compressor') + CheckpointSaver(filename='foo.pt.unknown_compressor') import composer.utils.compression monkeypatch.setattr( @@ -250,7 +250,7 @@ def test_checkpoint_invalid_compressor(monkeypatch: pytest.MonkeyPatch): CompressorNotFound, match=re.escape('Could not find command "unknown_compressor_cmd" in the PATH'), ): - CheckpointSaverCallback(filename='foo.pt.unknown_compressor') + CheckpointSaver(filename='foo.pt.unknown_compressor') @pytest.mark.parametrize( @@ -273,7 +273,7 @@ def test_checkpoint_filenames( latest_filename: Optional[Union[str, pathlib.Path]], latest_remote_file_name: Optional[Union[str, pathlib.Path]], ): - checkpoint_saver = CheckpointSaverCallback( + checkpoint_saver = CheckpointSaver( remote_file_name=remote_file_name, latest_filename=latest_filename, latest_remote_file_name=latest_remote_file_name, @@ -294,7 +294,7 @@ def test_checkpoint_filenames_none( latest_filename: Optional[Union[str, pathlib.Path]], latest_remote_file_name: Optional[Union[str, pathlib.Path]], ): - checkpoint_saver = CheckpointSaverCallback( + checkpoint_saver = CheckpointSaver( remote_file_name=remote_file_name, latest_filename=latest_filename, latest_remote_file_name=latest_remote_file_name, @@ -610,8 +610,8 @@ def test_checkpoint_multiple_callbacks( tmp_path: pathlib.Path, ): checkpoint_savers = [ - CheckpointSaverCallback(str(tmp_path / 'checkpoints1')), - CheckpointSaverCallback(str(tmp_path / 'checkpoints2')), + CheckpointSaver(str(tmp_path / 'checkpoints1')), + CheckpointSaver(str(tmp_path / 'checkpoints2')), ] trainer = self.get_trainer( @@ -622,7 +622,7 @@ def test_checkpoint_multiple_callbacks( ) assert id(trainer._checkpoint_saver) == id(checkpoint_savers[0]) - assert len([cb for cb in trainer.state.callbacks if isinstance(cb, CheckpointSaverCallback)]) == len(checkpoint_savers) + assert len([cb for cb in trainer.state.callbacks if isinstance(cb, CheckpointSaver)]) == len(checkpoint_savers) @pytest.mark.parametrize(('upload_success'), [True, False]) def test_checkpoint_remote_symlink( @@ -671,13 +671,12 @@ def upload_object(self, object_name, filename, callback=None): with open(symlink_filepath, 'r') as f: assert f.read() == "ep0-ba1-rank0.pt" else: - from composer.callbacks.checkpoint_saver_v2 import CheckpointSaverCallback with pytest.raises(RuntimeError, match='Raise Error intentionally'): trainer.fit() assert os.path.exists(symlink_filepath) == False def post_close(self): return - trainer._checkpoint_saver.post_close = post_close.__get__(trainer._checkpoint_saver, CheckpointSaverCallback) + trainer._checkpoint_saver.post_close = post_close.__get__(trainer._checkpoint_saver, CheckpointSaver) class TestCheckpointLoading: @@ -836,7 +835,7 @@ def test_autoresume_from_callback( save_folder: Optional[str], tmp_path: pathlib.Path, ): - checkpoint_saver = CheckpointSaverCallback(str(tmp_path / 'checkpoints'), latest_filename='latest-rank{rank}.pt') + checkpoint_saver = CheckpointSaver(str(tmp_path / 'checkpoints'), latest_filename='latest-rank{rank}.pt') trainer_1 = self.get_trainer( file_extension='.pt', From 99117661ffe6252ac57c50c65c4af5d063b62f5b Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Thu, 13 Jun 2024 05:07:22 +0000 Subject: [PATCH 16/57] a --- composer/callbacks/checkpoint_saver.py | 58 ++++++++++++++++++-------- composer/utils/remote_uploader.py | 5 ++- tests/trainer/test_checkpoint.py | 25 +++++++---- 3 files changed, 62 insertions(+), 26 deletions(-) diff --git a/composer/callbacks/checkpoint_saver.py b/composer/callbacks/checkpoint_saver.py index a20be81cd2..ee8a73aa82 100644 --- a/composer/callbacks/checkpoint_saver.py +++ b/composer/callbacks/checkpoint_saver.py @@ -13,7 +13,7 @@ import textwrap from concurrent.futures import Future from pathlib import Path -from typing import Any, Callable, List, Optional, Tuple, Union +from typing import Any, Callable, List, Optional, Union from composer.core import Callback, Event, State, Time, Timestamp from composer.loggers import Logger, MLFlowLogger @@ -257,7 +257,7 @@ def __init__( self.remote_uploader = None backend, _, _ = parse_uri(save_folder) self.remote_uploader_futures: List[List[Future]] = [] - self.symlink_file_tasks: List[Tuple(str, str)] = [] + self.symlink_file_tasks: List[tuple[str, str]] = [] self.this_rank_saves_remote_symlinks: bool = False self.tmp_dir_for_symlink = tempfile.TemporaryDirectory() self.num_concurrent_uploads = num_concurrent_uploads @@ -426,8 +426,7 @@ def _save_checkpoint(self, state: State, logger: Logger, wait_previous_remote_up os.symlink(os.path.relpath(src_path, os.path.dirname(symlink)), symlink) # if remote file name provided, upload the checkpoint - #if self.remote_file_name is not None: - if self.remote_uploader is not None: + if self.remote_file_name is not None: futures: List[Future] = [] if state.fsdp_sharded_state_dict_enabled: @@ -451,13 +450,20 @@ def _save_checkpoint(self, state: State, logger: Logger, wait_previous_remote_up state.timestamp, ) assert metadata_local_file_path is not None - futures.append( - self.remote_uploader.upload_file_async( + if self.remote_uploader is not None: + futures.append( + self.remote_uploader.upload_file_async( + remote_file_name=metadata_remote_file_name, + file_path=pathlib.Path(metadata_local_file_path), + overwrite=self.overwrite, + ), + ) + else: + logger.upload_file( remote_file_name=metadata_remote_file_name, file_path=metadata_local_file_path, overwrite=self.overwrite, - ), - ) + ) else: remote_file_name = self.remote_file_name.format( state, @@ -466,19 +472,27 @@ def _save_checkpoint(self, state: State, logger: Logger, wait_previous_remote_up log.debug(f'Uploading checkpoint to {remote_file_name}') try: - futures.append( - self.remote_uploader.upload_file_async( + if self.remote_uploader is not None: + futures.append( + self.remote_uploader.upload_file_async( + remote_file_name=remote_file_name, + file_path=pathlib.Path(saved_path), + overwrite=self.overwrite, + ), + ) + else: + logger.upload_file( remote_file_name=remote_file_name, file_path=saved_path, overwrite=self.overwrite, - ), - ) + ) except FileExistsError as e: raise FileExistsError( f'Uploading checkpoint failed with error: {e}. overwrite was set to {self.overwrite}. To overwrite checkpoints with Trainer, set save_overwrite to True.', ) from e - self.remote_uploader_futures.append(futures) + if self.remote_uploader is not None: + self.remote_uploader_futures.append(futures) # symlinks stay the same with sharded checkpointing if self.latest_remote_file_name is not None: @@ -489,7 +503,8 @@ def _save_checkpoint(self, state: State, logger: Logger, wait_previous_remote_up # create and upload a symlink file symlink_filename = os.path.join( - self.tmp_dir_for_symlink.name, f'latest.{self.count}.symlink' + self.tmp_dir_for_symlink.name, + f'latest.{self.count}.symlink', ) # Sharded checkpoints for torch >2.0 use directories not files for load_paths if state.fsdp_sharded_state_dict_enabled: @@ -501,7 +516,14 @@ def _save_checkpoint(self, state: State, logger: Logger, wait_previous_remote_up if this_rank_saves_symlinks: self.this_rank_saves_remote_symlinks = True create_symlink_file(src_path, symlink_filename) - self.symlink_file_tasks.append((symlink_filename, symlink_name)) + if self.remote_uploader is not None: + self.symlink_file_tasks.append((symlink_filename, symlink_name)) + else: + logger.upload_file( + remote_file_name=symlink_name, + file_path=symlink_filename, + overwrite=True, + ) self.saved_checkpoints.append(saved_path) self.count += 1 @@ -510,11 +532,13 @@ def _save_checkpoint(self, state: State, logger: Logger, wait_previous_remote_up self._rotate_checkpoints(sharding_enabled=state.fsdp_sharded_state_dict_enabled) def wait(self) -> None: + if self.remote_uploader is None: + return # Wait remote uploader futures and start to upload the latest symlink file if necessary if self.this_rank_saves_remote_symlinks: if len(self.remote_uploader_futures) != len(self.symlink_file_tasks): raise RuntimeError( - f'Expect len(remote_uploader_futures) == len(symlink_file_tasks), but got {len(self.remote_uploader_futures)} != {len(self.symlink_file_tasks)}' + f'Expect len(remote_uploader_futures) == len(symlink_file_tasks), but got {len(self.remote_uploader_futures)} != {len(self.symlink_file_tasks)}', ) log.debug('Waiting for previous checkpoint files upload finish') for i in range(len(self.remote_uploader_futures)): @@ -530,7 +554,7 @@ def wait(self) -> None: symlink_local_filename, symlink_remote_filename = self.symlink_file_tasks[-1] self.remote_uploader.upload_file_async( remote_file_name=symlink_remote_filename, - file_path=symlink_local_filename, + file_path=pathlib.Path(symlink_local_filename), overwrite=True, ) self.symlink_file_tasks = [] diff --git a/composer/utils/remote_uploader.py b/composer/utils/remote_uploader.py index ee4ae6e592..0b2b9ae249 100644 --- a/composer/utils/remote_uploader.py +++ b/composer/utils/remote_uploader.py @@ -12,7 +12,7 @@ import time import uuid from concurrent.futures import Future, ProcessPoolExecutor -from typing import List +from typing import List, Optional from composer.utils.dist import broadcast_object_list, get_global_rank, get_local_rank from composer.utils.file_helpers import ( @@ -106,7 +106,7 @@ def __init__( # Need some special handling for dbfs path self._is_dbfs = backend == 'dbfs' - self.object_store: Optional[MLFlowObjectStore] = None + self.object_store: Optional[ObjectStore] = None self.num_attempts = num_attempts @@ -139,6 +139,7 @@ def init(self): path_list = [self.path] broadcast_object_list(path_list, src=0) self.path = path_list[0] + # TODO: add valdation def upload_file_async( self, diff --git a/tests/trainer/test_checkpoint.py b/tests/trainer/test_checkpoint.py index 8097bcdd53..5336bb83ac 100644 --- a/tests/trainer/test_checkpoint.py +++ b/tests/trainer/test_checkpoint.py @@ -4,8 +4,8 @@ import contextlib import copy import io -import os import multiprocessing +import os import pathlib import re import shutil @@ -26,7 +26,7 @@ from composer.algorithms import NoOpModel from composer.callbacks import CheckpointSaver from composer.core import Callback, Time, TimeUnit -from composer.loggers import RemoteUploaderDownloader, remote_uploader_downloader +from composer.loggers import remote_uploader_downloader from composer.metrics import MAP from composer.optim import ExponentialScheduler from composer.trainer import trainer @@ -627,21 +627,24 @@ def test_checkpoint_multiple_callbacks( @pytest.mark.parametrize(('upload_success'), [True, False]) def test_checkpoint_remote_symlink( self, - upload_success: bool + upload_success: bool, ): import multiprocessing fork_context = multiprocessing.get_context('fork') tmp_dir = tempfile.TemporaryDirectory() + def _get_tmp_dir(self): return tmp_dir class _AlwaysFailDummyObjectStore(DummyObjectStore): + def upload_object(self, object_name, filename, callback=None): # Only allows to upload symlink to simulate # the situation that checkpoint file uploading fails if 'symlink' in object_name: return super().upload_object(object_name, filename, callback) - raise RuntimeError('Raise Error intentionally') + raise RuntimeError('Raise Error intentionally') + if upload_success: MockObjectStore = DummyObjectStore else: @@ -667,16 +670,20 @@ def upload_object(self, object_name, filename, callback=None): symlink_filepath = os.path.join(tmp_dir.name, 'latest-rank0.pt.symlink') if upload_success: trainer.fit() - dir_list = os.listdir(tmp_dir.name) with open(symlink_filepath, 'r') as f: - assert f.read() == "ep0-ba1-rank0.pt" + assert f.read() == 'ep0-ba1-rank0.pt' else: with pytest.raises(RuntimeError, match='Raise Error intentionally'): trainer.fit() assert os.path.exists(symlink_filepath) == False + def post_close(self): return - trainer._checkpoint_saver.post_close = post_close.__get__(trainer._checkpoint_saver, CheckpointSaver) + + assert trainer._checkpoint_saver is not None + trainer._checkpoint_saver.post_close = post_close.__get__( + trainer._checkpoint_saver, CheckpointSaver + ) class TestCheckpointLoading: @@ -780,8 +787,10 @@ def test_autoresume( # Mock S3 object store fork_context = multiprocessing.get_context('fork') tmp_dir = tempfile.TemporaryDirectory() + def _get_tmp_dir(self): return tmp_dir + with patch('composer.utils.file_helpers.S3ObjectStore', DummyObjectStore): with patch('tests.utils.test_remote_uploader.DummyObjectStore.get_tmp_dir', _get_tmp_dir): with patch('composer.utils.remote_uploader.multiprocessing.get_context', lambda _: fork_context): @@ -1215,8 +1224,10 @@ def test_load_weights_object_store(self, tmp_path): # Mock S3 object store fork_context = multiprocessing.get_context('fork') tmp_dir = tempfile.TemporaryDirectory() + def _get_tmp_dir(self): return tmp_dir + with patch('composer.utils.file_helpers.S3ObjectStore', DummyObjectStore): with patch('tests.utils.test_remote_uploader.DummyObjectStore.get_tmp_dir', _get_tmp_dir): with patch('composer.utils.remote_uploader.multiprocessing.get_context', lambda _: fork_context): From 36a1dc5d32b19e4b85ba7b6cc839e1426a395c76 Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Thu, 13 Jun 2024 16:27:29 +0000 Subject: [PATCH 17/57] a --- .../callbacks/.nfs000000000271bcd900004371 | Bin 0 -> 40960 bytes composer/callbacks/checkpoint_saver.py | 72 ++++++++++++++++++ composer/trainer/.nfs000000000270c2da00004372 | Bin 0 -> 221184 bytes composer/trainer/trainer.py | 1 + pyproject.toml | 2 +- tests/trainer/test_checkpoint.py | 6 +- 6 files changed, 76 insertions(+), 5 deletions(-) create mode 100644 composer/callbacks/.nfs000000000271bcd900004371 create mode 100644 composer/trainer/.nfs000000000270c2da00004372 diff --git a/composer/callbacks/.nfs000000000271bcd900004371 b/composer/callbacks/.nfs000000000271bcd900004371 new file mode 100644 index 0000000000000000000000000000000000000000..ea1ee4ca08d823e60ac4c6e9a7d4468cbecd7421 GIT binary patch literal 40960 zcmeI53y@^jS>LZLgJff3UMVP3!1b zrnNoYt-jqmJ3B0i;!4?Z#j#U36&|q-wuGp{GDQreh%t6-9H~fLWtVXkwh3_vE-E3# z!$oW`G0N}z&N=s-dvEv5>{>uU&8_`!ci+c(eCIpg`CjMF>fzTdJrdtnJrZ$zOBDUs zy^pmHeCY8P-SS;gH0bq)a(cI;aqp{-z52fUj?OlYwHmX>TFKqB_4?h7*+%RBdk;5~ zd+x5^-I_fdMTa)J!$X_B!R2N=& zjMPuSx!2kA?)r=E?-~2~ro#1au)puMpC2e(|3>@!wEg_wh3mK3-{_1@{DQ$0{ax$r@%f1_9?JWfqe?>Q{e9< z1?tTxs?f9B#U3X9@9g`xzC4N^2EY9eqG%1=4W9kxDEc9AANb9eMbQd)2z>UNqNodA z37&ar6decO1-^*U{gdDf_$mhabKu{Ad%%}4=zjuS0N)Befdk-I!4HWe0DJ(4!8*7F zd=Tft8^Jx`V>l|_4(7J> z=?>$h-)pSLjoygw?KEClIkK`+jojVx>7HmNt@@}lV|bS6%dA!R_y zetkeMhEimYK{(y(CdHwWflaxI3I`kYVSI(Eq_{M$r*e+Rgc1x*eUQZUEA@6qF6Rg6 zysbLkYf)&7tX@(nAL9aqxfU?%SMhwWas-kF|@7hIpYAcU6Q)*Trkb{j6&G@De& zCxgYcPVHy6(}dyTo33t68|GL_TU9#VEDPf1FwWj%giMXA~C zG3)i4T#mEL1(HG8L9)TUvEC;9mi4bG?7PwKLh{uu!_|gKVl!&_q83`p>=fupE;2>O zY$k3GvvT_hQ>xi+kX5y18&)J@z;6d8$%I3DkwFJSwlqCeNvJZW%$Fz4ObobEc+Yoe zDlL&KPX*yarcflG;W17Gg#wPw96^vCJE{`&Xmwa0MB(%D{l>~T@&c4`xKzfY>q$30 zktF@|{RF;Xd{+~hX0me3#7UibD@#CJLHIP1G_|uum>?jd%<8$kG+kLaaAW;DoEuu9 zPq}SOf?WG#FtvHtl0h%FO*DL|EzA!#jf|vRceth1EylX$g^TgAqanA+W5q{qX8r!V zx8Ncv$BiYzGjg<)&s`SXacizT#u}Hl*ShT%5(G7Rvj_EP84n||hdt_>^ zBhH0gk!P%lowzbeM=F32rE*ujD+^H4=yjXZalPBr2hDnu_UjF}mpep@7`8W(G?Ipd z$5Jca>W$)c;d7}Hwd>k3L_Eo)8&X=fddI!4+&N&eONYtEj6Swe--lM*L$O(G@w7Ai>$mN#jRRcCRKLdYP-`O zZpFP;ZoW$I(NAcaCq2@;ne?e&+M0DM8IJnZcmcsbYOJF;C8c&>NOfTj8fJ8tPOouU ziRbjRxYO(PMH^rN3;QPOkP$ns)uQj1G0WA)D!N%8h{{)xHbOo*+%Uly^?0q<6Xl}U zX0(k$ve9dzI>&|qERjufO!e^9R@@TJgn@HyDbrSJs^mx|EJD%kfi%3c)*CQd8`IVJ z2jQWO5#?o!)aar4r+wH`h<5zc5>@p&al4xiVc+7qDK*K#3>3b(mWo~$>8Zkh_KwwS zN4aChJ+O8y`~}C{$3Nv;kPbamLRAe~+R99i9P$gKg1OYM{A!a|i4EWg!&7Q^s6mmJkpN1u{Kk}0ah zDUWlh!W{@gL5?DWWUW4Es&HZ|LLXRWkY9PRO)v+F6e`u1@1SU8{iK2Plm%P3fJzz0 zOgk&vfKq@QBBe01k`2axsh;^*)tiwGRjQV1b1T(lABQLwiy82)h zF4jIrV*h_HMzY4svHxBC|1;SBzX0A3u7NG^I&cA;1g{0N;QPSK!SnbBJ_|kqo&f(8 zJOEw>{uqD3$H9}}ZD1Q*2B*N?U>bZ4Kf>>UUk5)6{w;VTcmmuHK8A1Mqu{5(yTKN? z3hoE@fqOv&K8fGqqu`|VKfDRw!z;i?@j3hocrUmJ{t@_Y@Kt;d9|J!Mj)PZ#FX4aq z0{9eo7Bs;j@PF||ycfI`yaIe2e?$-51^&yeQS`6Dt3eE&!yj=G{24w7p{@8Oc0SB- z=l@^qrX$2I3#C|+VyI`111!y?J}^s8n|Z0rpO_QIdacH1gm*XFX)=Rw>kQj{ll8CedoB4kZw8AmdAROX9SxzR?U!zvL^2cnU6T+DB4)TCvB zEK^Xm#gQ&WnLZn8u(~EmWpO=VU1KImFLRBn`k5I?AHqW_doWvhF@w7-oI<3XG|^I> z`A)E~iGf#G)wYKSvduxgUu}1rxQi+$&zyT?VYzmC;gQAKqf5&V*G?>*U#=~jK2cj< z;?(lOgQpf}(1Vj!`|A8u*yyS0>rwc7nf7|Wh&2NPhehb1$Sd~8vn?~iA;w1`WG#9q zkQD_Fg=3+;yC(Kx5f*(Fu9mVfSaedbvNQ(mRauAEdz&Hx)m#qcT8?Hyzj3T@?GEAM zy-3vj$V;Khx|xzBnx3Cm&EyLD3rlLdnpES5&R!6C%!;|&7oT89BR| zwAa>$X-%ZW{7D=@$xI~Y<#12RcWo2<(2~KGdT0K?k)(fM{EY=)-28zt?%xTIx~VaL zU@sGEQX#v!cFWE7BI7)iA5Wn znRvbfA=w)AHdIfS{e9K!k*Xg~v#l>~vjuQ4j0+8M1xu+odPjX*^y1a}25XvLmbzC~ zvZB??w>H8`8;VIKVt=lIM=_mSsoM6VeY3Kqh?K6ZU2;oe3dG345$(B_rMHgQ7e&yG zJd;~Zfo?-IFH;37URFn;2Nr0&QWSN1Ydp#SYf!1WmJCnvmDyQSUuvy39Yy*1H5#@%X|=a1+iGj+us2Am8=V#$ zT?@~-x%SAZlc&x+T3dYm*~N1xC#RQdODAepf%$>OADoHp4d*VL&R-c@YiRx1$xMrD z?sN}!YNy%Hq*?0^BO{Q}9Ea_C2YD|+86%uHm*Qd{w9QYRRQr?qa6CU#q$c4G+5)p4Dgd!=Oa&BS|ZHUy>j zc3x*!(`HVE_|kaQB=wv^a?_f3`kcH#d$~D$P2tC6B+MpmmNL zHOC5-!bRb^1Ovkkq~SYd2$;$IDm;PL1c79QxSe$~@!}Q4@Jy^B!kKtk#zg-|G;Q=P z_YSxEA_VNS3*D`mSSEp3+B5Mfp?ZAA#<6&qunMOyba6767W5%gr@iW)7pl3Yb*9(W&tM_eNjVVfQ;-mucmG+3MWSUh2LH$>@jeRIO&}WRlxz>Rn`R zUE*?$i!SmSK&w?KTcYvp{ zvmXav#jbt}SOotaoBAnmKlmgz^+oVU*wUW|>)<8eJ=oMs;M27Cm%#}jZU1xI?!A7* z_6RkTW8n=v>bTwgNHt|hbRqzV3){8?+lp7@JW1}vT6o;VN zOu2wRNG@-g$WVb~Eme+pRd~6tTC73aj3PwKfH-^5)`ms&R@onya4&Y_dpI*MsVV3(IIvUSnE` z8K3+ui)YUiZYjI-U9C<^<|-As=Np)I&sIVUF{y?L+QUu-!ji;s zx+c4-AVuK?Pf%*Z*b&k}EgdX;E5gqvWN27cLP;XM6Dr5LU9ZmIw4mz4@jJ3 zP798-vHT|?T@!%Nl`l}1$|5;vT&wcj6)eXS zU6qH%x#+^VXMK7_3hdRN>E{DYw%xGyqbeI=Hdwy(GCy#)Me-RQnQm%WUQnbM|;X&@ON$eBy<|O`q5Tp6Kv7yEO zcl-0-f!#k3p2g-r13rzNUkCpX{4}=yHE;{~Ah!NP;Ir8JzXDzZz6tyxHvi|rW8hoB zGuZrZ1)Jcd;NM~U-vM4^eE@%f{r?Bxv)~=zLGX>>ywA(3#O^Pin4t{9hmlH-qdW$)h8uOq@cj z)h4+=rRb~$h9>T(j~3cMq`j=sHFI&=52o7oJ?C+>$>Hgo zS`qW{Q9u9Hc7D%`>hHYRL507_uwC*V(u)^K>mG7K zhU3zuOVC@s`SL;^*PPle+){{gPVh^(#?Kz{#bs8&q_Xbx_vVbNIpedvC?*yvz3fiF zL<&0+8d`Kn3_9)46p_||Koi)qL@cesdTXoV6*h(W?Jkp+6q9EoNejL!wo=C$d#%ho zX13^P_D;T3H%BLYP7wn(JAP!tkIHbn-6t!nU{j7SNSLBBHKF5vS59%(?4gs{toMX5 zoLVM6ZP-J9JjLA%*{QtoT?&RRO*$=8Bvx`u?HV3kY zM88T&)p$|10%V0ErAbq0H%+;cyy*+T2B}C*Q^c>kq;-`$sR~M|S5k!JIb9X6Bt9C~ z0da9i<=R=FJ}Y|;wA!^!kEBDR&-7ZkuH}QbZaACBhbB&}iskFh3m4f9plfulDPdrDnWfv}3Mx_G5F@CKV?=zw=J`UL(h zd8irsfa8h^buC(Dqn3vDAfi4my)NN&l6HIU^wr}5qkcCaw!#HA=(Sfz=p}xc5(?u< z%^cKbmz-bfv*$ToJNdw+0E+6<_IU8y#oRMl~V=+-_|}(nNPaPgRL>WXbc2O%;ici9cTG zUkjF4q|(I$+SfxeA!@QqrG|^Od0-|!FzBuwxCGa1TJN|-CB*5^oz31xS9URo1c+Q? z+n`pahY`k=bM-u8ikQLWYR)~{gL!n*d0s0xQQ~gJ56>Y zGaHyo>`;sie>8DF(ZX!x3ef^>jKq;$0{WJlE23S>>@va}JxqdIzTxovW4v))J#gf3 zh&x@q)hmZqBt1!X?TGu3h+|yg_t~xG-k`Cbqnr-CjkWpCIJxecp9LFz-A>%({P$2q zC8u5Fbk*csUBzTpZ#b`wSY==?i4R}onELKjH_mU1)nx=yNug%@rVMF;99);PUi5lJ zMVPbTQB@Z^{%-H7S52X?p6=}I5B2u(v1y@*na|QR@29R8S$>HyVqu|T<3qA<0fqC#+Z~#1y-{7~v8^HoN1pXJkgI@=a zf)@jMkHCk)JHT1+Ah-+M3H}8C!9NEx;Q!%2_|M>-;1qZX_zM1mr@)ioaqwF3SGSVy z0NxGS;8o!N;6wN|@Ux%|z6Z#=2Hp>T9Q-rz1W+16KVb(skh-FMR;^ZTqvfO#dh2=4 z-)){@ryWl4d^p-&$uH6FtbE>ir_J%Q$xxTsfVF0mmcskocWst6Xo@<2Ra6L(NqPRh zp+{L$kq$vdm(M13INGc?Tu7ohuGe z83q-Mc(b?6+AQiuJcjdCS+IPr>ZGpiMjaDJfVctBobU}8ZEemaD4Isi#ium=G1C|V zL9GmC-Uc%tAI{Of%vj@ng7aF&_-kdc>qLak!Fjb`%nZ!ms%6@Hv$bYEK9@@|Eeqmu zn}v&}#AdUqLpth9Vm`WHzAT@wmCHa&1MiCz<&M_Wvf3cJ{7Fh=ClOB9Nu+7!qQf*g zy%C85WMs(}Ydl{1<$8RjU5_uETdKxqN%6qpEV<|2Z{@1M6Ryta;gTyoX)X>1yFxo;+}@sEO0mrSyIC79oZJ{BtDDY0ezDz1b;?hl4F6YU#&Luew+TPspy4f&qs zjE{L|Rp<_>{Ea%inV|O>jdtM6SMrb7*##k8&bh=SEmBgc=*&i+H4@uRf*wKQxD3X& zU$srwi^a?tADWtg<+01+7!;L#FHm!mb~+!Q&1dP~R2Z!sI@5mGaTd-uZ_FvzR=j0= z%xgmVwo{pYRxZA9N3{b9+x-f!Vj{=PtUY7ncM~eyp(b}}Y!!vU$dpj4f94rU-c=R2 zRm~I#0vHl?aZ6NMhzuUviVt3xpV}z)zH_pvj zxGfN=Qkxw4{WBHO{x<`$ z`&U67$X^=fMg%2^PW2!DsLZ48cR-Ht-Ah1Ktn5AAAFNG57@jfd2~K489k95BOvJ z0dEF}!EfRR*aBCzU7&bJ>3&vXb$0Z1dBh-k$@RmxOy{Da) zABbFYL=yPE%9PDrc1g>w^<~QXawp#-T^4VWyTfhJHzUA=4>rc>?u1%6cJrca`f3gu z`$#qJtmQ6N=W6hSBy=^p_3P@LQDRi;*3@|xT#Y!X$k{G5&)W3YgLmt=7g(sepY7wT zPWgr*zrvoV>>_S0uf3R3(aF$_uO|?mxf0NEN;?P5h*Z^KI_`v%KTPU6)O_T zXkb}K+;hPmY)f3@bz;#jSmRVyW4bv-)T*sIdO;7RS~;}EH#2)Ai04(TNDB-8%U`bQ zPd|Su*{oxSDM3O%e6EQ6^H5J#U+&%L7uGBBJy@03@{38L*L!X0VxDO|7pHdbYG|dw zOnZ4EE8iG9GMvQ@3!O7hfmcV_sYpoHGmzarqm5NblM&XR+FK_ene^2&6gvhTapkjc zu_~{F={749NonF$+NR3%v>z~`e@<8hyc6y;(7<8jgb!mC=+`{tGnbX0xDBa=`xf0-K-#T7rknpwyuS%AR zH?&c27rL$bSptUIgMM`a4a);-=K*`m0{J7oYmwVo@0ZyC3Qvh2(NzSvwCQ1W zffSRM3Lq`m6~${ErbU017!HwblV)<6y|iRh@VYh=f>A*w4Q=LpKho8`y~?0s3X(ll zIc+jqehWm+Px`Ijn)5zt^X157O(_f0sbR4|9imCfIhecei$4BzlZJrQ{X6&Jb*vN=Kr5S-s$%? z@G!Uyd>MQH)8H8(?+SPT+zy_@)|dSQZ?|^;SFrKl1>On7?mrA}1up`h$F_e8G{F*> z0)L53|KGsRfj59z@Hy=I1pEoM{BvLfd^`9UcD(E%coq0d?D#K&r@#|n0sIAa{I7yH zf^~2Xd_Q;|8vQ=_D0mXQ7rX%|?SyW^pzh#}vVNJ7ljtSH(^f}omDbdS?5*Nn?-1*2 zyXe=uj;x7Xk?wOkbe5H_%3YxWySQ zRcqU6`|9adx8c>fT>H(tN}N(3h1cA7tNu;upj*l`Lg1bWKPL}KLH_U1MbW4;`9AqR zM4r&RDhXwkvwW)*$vDjo@~1Y4h1}q4l_u~{MwVhtFf@xydF9Gp7*rk+76e_R#jj)1 zm^IMET1}Yjuvy=g60Nk^uchm}L6&HSQc>-#ntf$#d1{z!4%b%oq{zA_Te2mCqt(ML zCB7(m!G>KgwRn^R99**aH6{JD_|gv<=7X`3F>$W+d^)1scjNRaK2cof-S2E^EpuA2 z2nHp`Bf>4b&qudx?7%Cw7?U!|s|GIj{pPE3rMb)0=p}D;7p8Ns@QQ?B;bG@st{pp0 zy!@TcXjHzD&GzlJs$(B*t?3y|H0I9|7{kTP)u4bKtKWU$kav-k{f6VG_TSDTBX;7A z&o-k}y11+j$-)GoPr_NCc95LV4k_xV@WH~}*~&QKb_vXwg3UBnkEzd!HI)?-Ov@`j zv&2TLD{jIdA!NE#(m|spUwA2l6}PaB)KaleOcT0uU!#zVY1yz4J-7D-IY08iq`RQw zP6860UKQ%Sj9RW?Qm5+{b*Q}4KIZNkuXTjavB@zfQG7ve-N{P}8QQ%x)^mfjl(>Tt z{efh$$;6Tx9Y4UExioEsanT$GcA+R?gmuh~>UET+`Zp>{$#V3OHIaU^|0P!&^3tf2 zJbb!0JjoxYGMO&q!|1c;yZUhPE`#*C4;-Fe9BUR%KCVZg79ws(js% z$ZI!c{`$K08n=oU^;l@US9J!G?lenf^Uw`){SJ-FC#DFUjcYuTuX4~D6$+ZNAuK;9 zGe}N(zlYELlec%saOU{TBvtGDG&)Fveb6YI+9+>vGrP45 z9V4ewP69$dC-D_QvOVEiZ@)TO|MhU|;hcn1>6 zNx9~D@`UE)aUasNFA1~CqL`STJH@z^{r~r1^Dbfwi~T=u?f)-e?>`A7{{I^A67VJL z{4aq20NxJ%2X_8*;92mmz+1s1;5)!4vGso${1{jVYv35T75pOhzT^TdfZM=_u=$?` zn?UmZ-;a&|1XuuH!?yo9@H7~KOW=n=6?_UC|C8XCz%n=p#t^k4pJd8o}ZtmjkD@OHL@55U!aQ+d&GkJl2*FEA_1<753a1@A-shr-Nvu zZ}pU{71B2;En6-E0vqMhniqyRLHNRTudEhn$EFt9kHGd{Y1^b`$!e)>H-8e-xY%@| z!rox#F@NA`7;o>|yX!I9n!nAekYQ=pC#8FzllaJnO-hMem*f-GVAqPJ-t49^)p|vM z(v$~_W{{;!*=DjUxw-CMmg)i=_;8HIMGTi^IWPci_L0+~Wdv1SO85&CDLK&EMTzRF zjv-ANw@X3#ug$ITk^p>2LEJ5^sFwk@ajq$o##P#q6u zh;|@rpV3D=%cjol=@peg3dm#b&|=F&=AL(kuQHKX!kPW6F6mzyWuN#Q%``d^nt?u8E`Q(o97;>R}8 z_%^nSRJtxN=-PHBxu~7e_~PNN(|9|jg!Of-qT;>djefZ+D21fuyy0IgcD(s7r3i!H zxC=m@{OffCOrV@t9AmanR1Md+>|YPAnhZUih}WGXsVr5CqsMsUY}`n?86L!B^~H@U z(X1fzuTqmi^sBggF6WAj@8WMG*lNq|kw-!=LD>YCJ9N0pmt1Fr5_CjorgbV5xnXQf z%SpNk&x8&Mzfwqhumjy6iKc1hZ%^spYtyK-b@K+Q5VI&xC2gv0qakX-{+N7Z$o?E7 zAE)(A;c;2MNZTd^AmtTg5>b$_Id5t0@lyHib)qav@&XNXi;_6uEG|cw+pc>> trainer = Trainer(..., callbacks=[ ... CheckpointSaver( ... folder='{{run_name}}/checkpoints', @@ -64,142 +71,207 @@ class CheckpointSaver(Callback): # noqa: D101 ... weights_only=False, ... ) ... ]) + Args: folder (str, optional): Format string for the save_folder where checkpoints will be saved. Default: ``'{{run_name}}/checkpoints'``. + The following format variables are available: + {textwrap.indent(FORMAT_NAME_WITH_DIST_TABLE, prefix=' ')} + .. note:: + When training with multiple devices (i.e. GPUs), ensure that ``'{{rank}}'`` appears in the format. Otherwise, multiple processes may attempt to write to the same file. + filename (str, optional): A format string describing how to name checkpoints. Default: ``'ep{{epoch}}-ba{{batch}}-rank{{rank}}.pt'``. + Checkpoints will be saved approximately to ``{{folder}}/{{filename.format(...)}}``. + The following format variables are available: + {textwrap.indent(FORMAT_NAME_WITH_DIST_AND_TIME_TABLE, prefix=' ')} + + .. note:: + * By default, only the rank zero process will save a checkpoint file. + * When using DeepSpeed, each rank will save a checkpoint file in tarball format. DeepSpeed requires tarball format, as it saves model and optimizer states in separate files. Ensure that ``'{{rank}}'`` appears within the ``filename``. Otherwise, multiple ranks may attempt to write to the same file(s), leading to corrupted checkpoints. If no tarball file extension is specified, ``'.tar'`` will be used. + * To write to compressed tar files (regardless of whether DeepSpeed is enabled), set the file extension to ``'.tar.gz'``, ``'.tgz'``, ``'.tar.bz2'``, or ``'.tar.lzma'`` (depending on the desired compression algorithm). + * To write to compressed pt files (when DeepSpeed is disabled), set the file extension to ``'.pt.bz2'``, ``'.pt.gz'``, ``'.pt.lz4'``, ``'.pt.lzma'``, ``'.pt.lzo'``, ``'.pt.xz'``, ``'.pt.zst'`` (depending on the desired algorithm). You must have the corresponding CLI tool installed. ``lz4`` is a good choice for a modest space saving while being very fast to compress. + .. warning:: + Using compression will block the training loop while checkpoints are being compressed and the compressibility of checkpoints can vary significantly depending on your setup. As such, we recommend saving checkpoints without compression by default. + If you have the ``lz4`` command available on your system, you may want to try saving as ``.pt.lz4`` as the overhead is minimal (usually less than a second) and the saved space can sometimes be significant (1% - 40%). + Consider the following scenario where: + * The :attr:`~.State.run_name` is ``'awesome-training-run'`` * The default ``folder='{{run_name}}/checkpoints'`` is used. * The default ``name='ep{{epoch}}-ba{{batch}}-rank{{rank}}'`` is used. * The current epoch count is ``1``. * The current batch count is ``42``. + When DeepSpeed is not being used, the rank zero process will save the checkpoint to ``"awesome-training-run/checkpoints/ep1-ba42-rank0"``. + When DeepSpeed is being used, each rank (process) will save checkpoints to:: + awesome-training-run/checkpoints/ep1-ba42-rank0.tar awesome-training-run/checkpoints/ep1-ba42-rank1.tar awesome-training-run/checkpoints/ep1-ba42-rank2.tar ... + remote_file_name (str, optional): Format string for the checkpoint's remote file name. Default: ``"{{run_name}}/checkpoints/ep{{epoch}}-ba{{batch}}-rank{{rank}}"``. + After the checkpoint is saved, it will be periodically uploaded. The remote file name will be determined by this format string. + .. seealso:: :doc:`Uploading Files` for notes for file uploading. + The same format variables for ``filename`` are available. + Leading slashes (``'/'``) will be stripped. + To disable uploading checkpoints, set this parameter to ``None``. latest_filename (str, optional): A format string for a symlink which points to the last saved checkpoint. Default: ``'latest-rank{{rank}}.pt'``. + Symlinks will be created approximately at ``{{folder}}/{{latest_filename.format(...)}}``. + The same format variables as for ``name`` are available. + To disable symlinks, set this parameter to ``None``. + Consider the following scenario, where: + * The :attr:`~.State.run_name` is 'awesome-training-run' * The default ``folder='{{run_name}}/checkpoints'`` is used. * The default ``name='ep{{epoch}}-ba{{batch}}-rank{{rank}}'`` is used. * The default ``latest_filename='latest-rank{{rank}}'`` is used. * The current epoch count is ``1``. * The current batch count is ``42``. + When DeepSpeed is not being used, the rank zero process will save the checkpoint to ``'awesome-training-run/checkpoints/ep1-ba42-rank0'``, and a symlink will be created at ``'awesome-training-run/checkpoints/latest-rank0' -> 'awesome-training-run/checkpoints/ep1-ba42-rank0'`` + When DeepSpeed is being used, each rank (process) will save checkpoints to:: + awesome-training-run/checkpoints/ep1-ba42-rank0.tar awesome-training-run/checkpoints/ep1-ba42-rank1.tar awesome-training-run/checkpoints/ep1-ba42-rank2.tar ... + Corresponding symlinks will be created at:: + awesome-training-run/checkpoints/latest-rank0.tar -> awesome-training-run/checkpoints/ep1-ba42-rank0.tar awesome-training-run/checkpoints/latest-rank1.tar -> awesome-training-run/checkpoints/ep1-ba42-rank1.tar awesome-training-run/checkpoints/latest-rank2.tar -> awesome-training-run/checkpoints/ep1-ba42-rank2.tar ... latest_remote_file_name (str, optional): Format string for the checkpoint's latest symlink remote file name. Default: ``'{{run_name}}/checkpoints/latest-rank{{rank}}"``. + Whenever a new checkpoint is saved, a symlink is created or updated to point to the latest checkpoint's ``remote_file_name``. The remote file name will be determined by this format string. This parameter has no effect if ``latest_filename`` or ``remote_file_name`` is ``None``. + .. seealso:: :doc:`Uploading Files` for notes for file uploading. + The same format variables for ``filename`` are available. + Leading slashes (``'/'``) will be stripped. + To disable symlinks in logger, set this parameter to ``None``. + overwrite (bool, optional): Whether existing checkpoints should be overridden. If ``False`` (the default), then the ``folder`` must not exist or must not contain checkpoints which may conflict with the current run. Default: ``False``. + save_interval (Time | str | int | (State, Event) -> bool): A :class:`.Time`, time-string, integer (in epochs), or a function that takes (state, event) and returns a boolean whether a checkpoint should be saved. + If an integer, checkpoints will be saved every n epochs. If :class:`.Time` or a time-string, checkpoints will be saved according to this interval. + .. seealso:: :func:`.checkpoint_periodically` + If a function, then this function should take two arguments (:class:`.State`, :class:`.Event`). The first argument will be the current state of the trainer, and the second argument will be be :attr:`.Event.BATCH_CHECKPOINT` or :attr:`.Event.EPOCH_CHECKPOINT` (depending on the current training progress). It should return ``True`` if a checkpoint should be saved given the current state and event. + num_checkpoints_to_keep (int, optional): The number of checkpoints to keep locally. The oldest checkpoints are removed first. Set to ``-1`` to keep all checkpoints locally. Default: ``-1``. + Checkpoints will be removed after they have been uploaded. For example, when this callback is used in conjunction with the :class:`.RemoteUploaderDownloader`, set this parameter to ``0`` to immediately delete checkpoints from the local disk after they have been uploaded to the object store. + This parameter only controls how many checkpoints are kept locally; checkpoints are not deleted from remote file systems. + weights_only (bool): If ``True``, save only the model weights instead of the entire training state. This parameter must be ``False`` when using DeepSpeed. Default: ``False``. + ignore_keys (list[str] | (dict) -> None, optional): A list of paths for the ``state_dict`` of the checkpoint, which, when provided, will be ignored from the state_dict before a checkpoint is saved. Each path is a list of strings specifying the keys to index into ``state_dict`` joined together with `/` as a separator (as PyTorch uses `.` in parameter names). If a prefix is provided, all children are also ignored (see Example 2). See :mod:`composer.core.state` for the structure of state_dict. + Example 1: ``save_ignore_keys = ["state/model/layer1.weights", "state/model/layer1.bias"]`` would ignore layer 1 weights and bias. + Example 2: ``save_ignore_keys = ["state/model/*"]`` would ignore the entire model, which would have the same effect as the previous example if there was only 1 layer. + Example 3: ``save_ignore_keys = ["state/model/layer*.weights"]`` would ignore all weights in the model. + Example 4: ``save_ignore_keys = ["state/rank_zero_seed", "rng"]`` would reset all randomness when saving the checkpoint. + If a callable, it should take one argument which is the state_dict. The callable is free to arbitrarily modify the state_dict before it is loaded. + (default: ``None``) + Attributes: saved_checkpoints (list[tuple[Timestamp, list[pathlib.Path]]]): The checkpoint timestamps and filepaths. + This list contains tuples of the save timestamp and the checkpoint filepaths. This list will have at most ``num_checkpoints_to_keep`` entries. The latest checkpoint will be at the end. + .. note:: + When using DeepSpeed, the index of a filepath in each list corresponds to the global rank of the process that wrote that file. Each filepath is valid only on the process's (rank's) node. + Otherwise, when not using DeepSpeed, each sub-list will contain only one filepath since only rank zero saves checkpoints. """ diff --git a/composer/trainer/.nfs000000000270c2da00004372 b/composer/trainer/.nfs000000000270c2da00004372 new file mode 100644 index 0000000000000000000000000000000000000000..317b997c4e152ed9332807c98dc4d175bac74741 GIT binary patch literal 221184 zcmeFa3!G$0Rp*~Fh{#iA5n*}U=@u({raJpE4~Cj4pr^ZfDCn16)iXfTW@c4pbthew zS(BO7JvBoEDk?tK2f894eiRiDMIJ6IsEZ4(?jIFc*45R0!HSQ?1$^RysO7%qdm)mwe`WMJB%bQ(HWI_3~8;T&2KO3S6bYRSI0C zz*P!drNC7RT&2MOjT9KK-&lJo6?>4c+$;R|7nQyr;(x!;e}8M~{+IaQNBsBim+n8* z|6cIl`+No=pD!)le|hQt!~FfP^WXombpPT0`hNd?ldn)n{}KNB&Hnpr>HZ^2@4v2e z|55(_VgLQ1()~yK>oflQvDdhNLjIUm>~oX#xuG@9!?%#~fsz=lJj6EZu)x>Hbr`*j)(ef0e&K z@4tJc`;Yh6kNNL+lh-xs%b(7*q<()}m<>!E&kmF_>q-w)&G$4mD!e}B<`|7q#|wf=hOe{+7o zh4St2_e1$!TDrf}-#_fX-&eZ-)&Ba|`tOJO0UOePs=t1R|9*by{@3{HA^jgN-JkOJ zpXpme|PuRq&=@AC{HHZ#nz30DYJ;Ms=o6&}QC;a_4lnwUP%AlrTc-eo%ZiP^il4gkiKU}wOjrD<h6T1}$Db0WN?$!71>S;AfHN-wuv~Bj8i$7ylmo z5_lW6;qgJrM?=D{<;Ua$jvIrtnz`+e{qz`McEg0tWd*aIF9{tO-Mx4?UV z^ttZ@-vQRZ3&1h(H1JgL<>0T-IsXgzHSm7044w}DJIdwHftP_>!GpmcqL981{1|v8 z_y*7f$H3FTF7O2KSp@HogAap$3*HG{2VMczz)5gBxDh-NJOun1ec@NZ_kov!b?`!P z2bcruD+fXT*;UK<47+%9bT*cIOTE?Jcyrn=uD6HdUVF8^95Ktv8nktE=4a4f-y}-l%!cYP;F%kH&58bn>gNe4TE0eYD;U*Uxsx&6VD& zFRgxcx)*xO$?xFN((LvxG&{XzUwKD}X1~4GZC>n+&v9$yZ*}j%*M$>gbmoITqe`s@AC2K5?46rX8K$dhel)KNA2E4{Pso#k`g<@4)59OKTURWHq`y-Xu|2%OpN$musgix!!(OP=>xRHH9RCjM3?TtpH z$TOXua%iv4Mt3=k@V;=L5hcpToOK&pt)E)EU zp76g*j}6wxy|s|uLGN?5;LAFk&JJ~#2kKFsg*y)2HEk3lRXUOL&f2ozHb!!ihJ=r5 zWS2_d`tYc+HW;;g%WJDmcOjJN=<$UE^9PR}X&yLuaBg9txp@4!bH|!<$L?(2dEn%9 zmft>cZ)^W;njzc-)aQy}ZqublV^rfR#wxdT1 z*Nz-s9b8PWxc?7zN8?`KhK@@!eS|(g8mxB1rD8{0wPNN(tNSW25*h!D>~*n2x{ymY zT(&_y=|+daUbx;K!ew)|RbT`wy?dHVu8T45dtw6O(D7-Pr;;3P_LueU81Ce*9y)Yl zfr|?alyUbge7;yMv7upaX=B{&ghnk7F~=(NnjdnUlg=j&dho<4^+Wx8NBsBbiG^@= z@x&=lDzRNLE?0pwHme^_JqthAl z`wF8Fe#hKy97?G&aYM~4+nJ3w*Sj-W?`)sC)Viyq?o2tSFLWlom5eHCSTq=mqi)?* zsM+*ao6TL>GiFQAOzm&n(AYb*3ln@K!djQXu$)u)sQpatySb>L4j7tAz2JxmI`sHB zm=>rc3_g9=Xw_JSRDpwsG>)9inR@y{-|Aztc6c($K^cchJ$)+~(G9Vm#^_vo*y(nf z6WVpb-HOe-Z9>iAMxRcEXqV2&O+10e@TcYT7s&@P3z4B2I_q}ng*qMHSY6%pCE2P{ zSJpYx+?t(Dq_#oGBy|4{YyvLro?NHUZ3gt&@RaW`H7{(qeq^<^HZNZ9_p|mW>-URS z6Y>{sgdfX;!72soBJ|t1SZ)V_5|$OD+eO&-{Bg%W7P@zDbo)V8RXrq>xqC<3>+8Ar zH$UzU+e={vVlvYtaJ1cT=TfM6RD9w8Q3mSlAEA5Tdu_Wq10lJA~)=~JaE|vdJkuMJ0>+$#6x+JCW8nvQjrP*A=MT;1%EnkSd^7VUX!rXA zm>$hBLet!_!{(ukNgbxw;5Tl&*}{oK&)a>3$+17`?w;>3t@KuU-Qi4jfWCLGyL*3Q zFHaqWg2Ud~bK`9P-u?TtAkc8#336F`V|;Ef9Mz=%KMP&%+30=J|HJysU!cpsAN(Yk z1$)4?;6I?N{|0y`cq6z6w19N^&!LaM7kme}6C4L$1D*;V2Ywv=d@uMo`uGol>%s4% zi~l6p4;})(7v1`a;DhMTOW^m=iwEFKz;~kyFMO0c`ol5X z@K~=OpGy8OK7_dKbn($_{I~eX$V3E87$i4aPk&8k_g&gme)vMSnc7HZM+kj|E5#R_ zzA`)NuC6rP|Hb?Ex_G=xNdHZEz}Y|CBkup=L&oi9z172h#QRlSNWB(lsPDAK*=%Md zH7T-T%1-_);$894+3Xl*kcVboWq zoWVj<9@0Q}6mz(|Qwm*nb}-KFlUQ3%E4u5_RO~*Kk;x!KwUI%J z*}CxPG*W_34S6#ZEKRIfee@;fwALEtJSEb@1+AKs|Ts&H0tG< z=}fY(#Fkw%nS>up40Q-CLCKEKb^YLjiXH9_R_q!Q>-sqg(a#1*u){_+Kc>3lLbfVG zp~yY8&=$=%rdt`G+5E(*;<1-mL-^OZ36>Db_za{8F#r8NGx6eb@#pR_hVPrfdc?>&q2&Srm*Gea z%H2~XGZmG>ssua4)^=o$VHDd)**f-qWmKVFW@5Lhy9D#b&*nAkT<-M5;;?eVM*P-Y z&*zU;i%N)>iYC-TwwS3=O$AFoGs~+C_?gyA8i~o7!FaAmJETLhmcFI`>%9h~KXBJV zwr~TCAiHBQI7`>Y#JkbSWVuGc*)>hSrllP7g?|)Mv9hLymNu5pPZ+s+r|<;ahVpk^KbWTPu5C)E-V@rvvGy`!ZD*%W z9-)x*#1$J6rFq{KkOrH!CP>OyC4wq6lht9$aAJ)RU9di;UJ+U`l*cm#vE5~AZ>+^$ zpcl0>>1Ejf22L`o16WRm?(6MeYBzn-`MGWit|o-(Y%W1;}mdl*~8rd9X zGmPY|)()GKN6W+B`gjNRg%(`2QIS&X!0>E3qcH)xd~)7b9L_bBFqx3k8EhsCn)d0! zRCb1$T9acGbCQYS%3sRaQ92Z_kfu9fl5<9^6f+cW0_$;=7&w$?neeekI zaPS7~02|;&@cY-)YHtCu1MCLBfgR+B!M&gXc7vyYKgEglm%#hL+rV4F0brj%cYTW& zAHsfx4WD&lKNG1@5gk2i(`3I%^YzcefBQ%?YGmCUvDh|5V_4TRspdY{{;caQvp_X9 zvUg(NZZsrxbY~KAo96T~c*dISSR$|T?qsn8j@(qQv17NA5iqhLuq)gB413$8Y}Zwu zMH#TnkBPsKR#yL8Pcu#(7OhWD<@$sc4XIdXO5C7wwUAcmv{;=vm%L1bf=Ws1Vg&TL~{_Bl75gk)_}umvI`f2&eBo1i7ISeq=5 zX(uu%7CER*p_){3akvrnA(c3C73p5x)ds1^rp>eAH8(>yoNc+eJiaGkMa7qj{HC0A zHOVH^?^z{ZD0eYm-g3&Rzg)?DI=k*Vf6uQV#$wQFPDS+!WJuZ?rTCJVkDe_uAeMIM z<>(SwyK|x4A0u|D*S7ncnJH@768<0OeWw*YcbbG@q|7dwa$@2)sUS%dE3#{U`vE(sIzL7+;uIMl3#PLBKQ~vkZ{P zaGdM!HLE#)c4qq<*myV( z6b1-xjwfna{Dn5Usn~73s%s174Y~-vVk~>qT~T7jyJDvf?Hh({;xth=?%^~vTfH86 zfzO)B);EKzM3(y&Of&PCcbC>p2F5(Annbt4sO zgvCLd{HsKhQKfDNyX_Bfu{+mP0(+a1BWijsd@jc}lJtx+6b&D-lt)`%n*3P9NFa8q zhiV~DdT2Dp?qt55T+Ug|ks010@U~=(F}_Cj4hFP5$(@<}!gAi_*uI}gHkq6QOIN?Q zC%NajqwiqSG0%zbqsgxnyGK$vbt%~pnGD0fc^Oo_Vhig^J`myVqfvL{jN-nBmTwj% zcfH=dXL)s_(}m4?M~=~~(W`pB`mS?>Xv!YxUP>-hpGI^u2Pan3hwkz8O7(drL)KOO ze15U36-1xSq&wC3ts4G4>z6Lm^h)(5XJ2rRn(q1ZO7(fEk+bHP(Lq?GpXoyNF{i<8 zySNumnQR@prx0y_&eSOrzJ~JdzZW9%h&1v0BfI zUhQ8hn%zzb!KB+8C+uSjMOE+W!+>Pc>MV(4YG@ZlFZT>jIVVVavKB7to7a=Ihe~0l zv&7Y|#IE#ucb+b7;Edrrzcy?ort5s`cE)`83 z8))LV&>L(pKQmpJWYJB!4!Xofzmu%>hzwQxg099?4}_FN8$?z*Ll9PC!Aohn6zHZ^ z#zJ$bLzw=54Z7_0=(*DW4|=usx6t`t4)%lpg`O{e{(k}1!Oh?%p!I(F>c10wC-^$B z3w#`1|J~r*!Ex|q;6dO&qU-+_cnP=#X#M|Z(DlC&+yffmUFiBh3%&(>Gk7leBlP@l z0SCcH(e*EZBjB;%W9a(73w{Ya5Bw!M{-?orgC=+~_z=4OtH5pGao}H}>wg>QfUg3N z178aMGcEkJ`XwY1>bZ9O1s-@$qr9<|h ztCkMDxuT!bRZBmR%-dF*iI(xY~ISA6pSH-1ao_k*8fr2Yp<4mi~b+t`+Xd}{};h4z_)-d=zwG3 zD0l++AJ_o?5c~mn8+bK%CioZ$ek(W#ZUeW18Sn(~S?mBG06zo12V4T$6Yvpi0lx^I z1NMV`U@!OpHi0IPPk`(Lzk@ws1Req2k3HZ$;D^EYgKq>2;IFX@C_dV6fPW7@49hA2wiBl@+#s}2c|!oDcVYt z*OF~`-bRWwlf*pvwEh`3s%qDD%yHx&pHLZ?!Ra%#=%|t8amHB@CKYxjTY;_k4XoC* zGk7}Y+g0--rQT!C9cv+!b~?-b*79p-&YYR9$(@8XGxPSyX*3+w>Cd5CRQ#1zrtZsE z*DeJc?Pz0NfdN>g6q(ovoEthD-m+PwbXb(jHxBQzb*nt#?AqDvMA(SiIDF!U{WGqx zY{BixEhO$9uIDe!#8<+y-;CdnOry9e`C*Bvi*(4<1;U+htTro>X^Aeuh3I zTDsTW&G^`;!DhC#Q9bm{QgtZneV6ib`Z~Kboc)`kr}-6E8T^AJ61Mc!{UtYWa928P z8Jh7uJiXLr!6f*Ap6#*FTg-xG!eRGp_a4}jR^3Ngz1zUWdFsIO@&^9Sn~-X1W#5#M zX6kmF3Wl49_bq4_wC*ep)^|7jP5r-*WdQ%4^&ER(f4n%Ib;rxQ*d-uOmr_AkR%b2k zLJyZ}T!q;H5ULPdj~&5S(3GE@jS<%+LT?G=n8C|+dF;GckJ|oV9N}GJZs$>u@tbk^7e` zGk4UlOkl*X><<;MOsnEjHKM_v{Le zF{NC#ZUx`E46F8p;%iktbnGO=e@Kbp)Y32B=6L}|erW{`oL{cr$3n83-Dew(#;$~Gu80#8p9SZE)h?4E9%lqN zS=r25@tsz7q1Vpxq_<2~ohEXEN#)ZM7tb-tirHvExti!!RbW~yq@CADVnCZ7ZaX03 zlDSQ==Iss_b(PXCF_$|#-p9iYqH$?)n=#>1t_-ER8{&2yFD#2I)eLL$TIFV&7h|y= z$}%G{!A5N^vC?7PU_a>9i2P20@0_XOxVMQ*>OC3;uyMpv=d74N@Ah^U_Ut$1Imv#0 zMD!`&7pBgj2oBD2iNA^?Ab-sx7W$_i8g7@9D!ng;`LQY%StD^%RU8u1CQzEJ18@a# z=2c@lmDeQgG!)CsEtm1+J%lxoKSwdbndCm{|JNhcUX=cc{(pooeSRJN{*B-acot9$ zfQNyPqu;*^XurTQ@C5Kt^!j^258MDA3*`U*|A9AvW8g2)<$nRZ8TW~U^%Otr$$$MKetjLe9l)o>Mxj*Unn8 z9PGy&`{+@P9Ywo@V+IGpZMXBf7K*0tota%kb{>u7N^Y!W+akBbt6Z@QDJiursi2!v zmq|9~b8=G@f8CM{odIsY68p9%Mrr?#%245sHC+uKddYT;Rn*nOOFMB2VpECpPhMun zg4@j(HJY`7Hk7#4=9Zec(K1^CzGCwod{kF zk5xyim%AN0V0Ow}5%EXO|g!Fc!8e z*?y^HE4Gw0H_bu`kj*!JD8t&!@eiO%T~4}k^AI+-IA z$=<4?t>E2uClS@{vX|1=+I{CvDm%%xWo9?>YHzuxJ!SM%Xr%Z61k_5TX_p_n0heL7 z7J-Xq)h@OUiXATnI|G~H*u7u|u#kvez|wn>?k;f^=7V~Mc_51FmbhHz+npC_@2Mtf zk6!YQWsY3^H-bHK~71@ejO4Xk0Dy%Cukpm3ltiXsgw|Cdp z8G76y+AQ40-Z)N*3_llaFmvyh3Veluu$ngp;ktg@$p-8poImL}y^BRfui8D1m+4joUN(~KXk za};|+G3C()aU+R@w6W&Qg$O+r8Oa5Guo1xqI!x2nBg-Z;rF(hNC*PsfqzW>IsQASx zUJ1ntyXWk&n(l;;7e5vW)??D;6{JCq^%6OYja85~=x_-~D!r!=ymy=uOO$P0 zD^;5!K0K)=2qxvuGdZW~bLzPHqUg|8JN6ivAzYk9#w^{!f56fo}qe^Z#J* zC+PdH0#62y0gB!KY4m-q_dgN59bNxr-~v!g{;vfmfc61=75GE+{XYP&2DbyP@z=rU z(EZ=%?Ev$9KN5Th-T#B&1K^*7Q$X?fp98K1j|cw}-Tyq;2fhd>Uf?%@X>cvjIcy(B z_rDh?p8g%+kI?yF3BD0*f*ZkCfX|@!{|bPGEWav#G}BA?t*3E;f~4 zOwlt4?O|hy;mTfPL`jO$sH&+TJEa%-S7pB2r+vOiMcO#pcDr?nkj?s-_Q9}TYFn^v z-X}VW=KVDbGVIg~7Wv)&Z|!Wv06z8-!ttZiVJH7~RgzeP06W}lWG5|18?)h9`-M%Y zu;Kl@xPjQkR^R-Vy>uXkf#LlFPH@_;AdrMj!-RNZrw_a_BXDxV z$qP>E3gWo3Npu~zgFDZA20q|NGOWhQ=zf>GLMuy_+9WV83-on2!8Fau5ke~puZ{U- zBaZYo*>Sg+;;7^Q606P~PayQswJG+mr1l zV%NS%%`KlKE~Jfx?Fd#=!HSA+Ssh$NZ9-wpiuSv1f3R`(oM&kg8dyD5{1*5LVrb49 zeU~s}L0O@!Hj6eh`&D8V5eDnl#h|_0IPVn5p~5F5%>`ez0CcFwRvA@jyD8)HG^x%P zSMW|U{H+)^Sy;9I0jCOQlZnJ})8GA^E3C3{o>(w_94cGl1ZcAAZ8uL$F2$8j6cgWe zT-`fzIlDIsi1!ml zL{gAuL!=VIj%elI8;y)g2s^TixCy(aD&3`(1N7m1VUy5I1j|8~@UsinpKWRDV^HOG zPhz(_E0q@!tXVBbq$?Lg!PUQJwv8>N343UU;^HCGNF&j@oGLa%RmNW|SPqL_Zpx%I z$JUSocH{ao?w#GxK57@+vOJBMAtn)@#8ZsO!!9It2@zFoP1g2d${me0O0+`vQY;GD zG}IiM^6+WPU|BK85ou*Yks3SvS_!S>15KFqoCKXw&G8!g2O;CLRo10l1W?`A>@mWO zCOcVV(jwK=To4@TO*@u14jnt@Bv|CrQ~YQyEFM@qwa`3ve6hK3>colTCl}`q`H&Zm zhN_C>4I+?jEr>bgBs`oeM1`H%U5X6j)@wNX zV{O_KFj^}Km@~jr4hdfaQ0j$=VMZ`-ua$;z5NTPXCJCui?v1Ktb>__rglb@Af(0B> zEGLXb!QdzGFK{CbzOVL$g+hS}ng&+ahFw*8g?3{Jkap|Mlql&jHT_H-S%}=f4u%2b$nP z;Em|}KLE7%|8B4j24FAHrh?C-@Bb)xE_fQ)13rtc|1O~T|I+)P41NJ!|Mg%F{2n^} ze*(V>ehTP3zMH@=q1(R`ycX;NPXY1)coLAGzs}hEXW%=)LGWqx{I`OagA+jK#eW_h zU-1B?_qV|f;Gy8ZqT|0Ad>i;y@Cxuxz@MSle+YaR_6Pd31MGa8k@;)%?t2mX8--?ED-nXab?m#%M{E zJLnqhB2Le@s#l(IvsO_(P-|8mtR~i2m_K&Mk-6r9Q;Wx&3(r4xu%;LFd0@#MJ#}Pp zenM)r#pD=tj*Y3Q!r5MdCec)C>_|;kd8SH>q;=d{q**mfBveyvQJjhGRg8eqOrb|QI zvCIRq_%?+eF3;|D35(pJgp(un6<;Q6$I}})osBq`4a11L;18m5m+PlR4-EqyB{1ux zF=})3jb2$L`lZFuwrfrsl^%-K&94$VaZQz}%qlBW^L-?wp7zjoJM3iB*CmtA!@>=M z`{}*6X8W(Z?uPx@Zj|r1Ny8fIwrsEF8~gtm%+Kh;@p#b2(F4zGF3imxay@cq)+*A# z`9Dx6TAYlWu31x2EovmQz3y4!-eW-mfQ#UVtdNA;QGmA|!PebkG?JH3#e(oLdAjMG z_VFIqInUkUioVpvHK#{KOv?9OjmT^g6|vd-)aF7wQOurQ#nBI%zubKhLbvF-Nr#P% z5gsm4OhXbPF+W(mxX5k@2nPeI1KP^-$hl?9(a5EQMU6m!sia@&nnrO8bJ^W} zqw79&i)gal*6Mv-`dq$BNZ8A}EmEf{lSI>0x1P2xm!3hV+{#OEajj^}w1~@T3Rin+ zRG*lp(w*9(B#8r9ZXZsT>)a9%j3yX7Yf4O?Qqy_Yid^+f{1qk{qcwo(OLp`_rLENJ zX-g@)sRPw@f5vZKW~63k64r(>7LVm$V1}$osi!W+le+;m&XabL31i=rIw%=DkR(1B z8w->0Rs>H}p%M+Z^=z3;#Vs9MCO%&FYuU2y6LbsJ_zK-zCcK@l3s-N~)WX3G;ysJ} zNgUoiOqQAIdo9O~VVTQqc>}fFY-T)OWo=);Xl{MXTSi-RVd0}?d7kQ<<-@->*{_1b zWz6BlW-aHI5N4_lj1d+r9y9J@l`^ZfW`a;5&%IUhV*jDDnhz6QSe{8WW3E1jZd_=! z^?qA&P1bC-+&<`LYCD0jx`rbX7yH`-Srkb6`qoQ;|ESjmuBZk`N?*{Wfh)??I!#et z|NB@SP+1b|z52xj6NOtG(6bL{Z9w`zI}|L&z1IJ~&MT__2A%(f;NjqR(e>W~UJ6cu zr-2Xy@FVE?uL3&nuL1rX9sk4Nz2IMiUj%Oh-vGWId^PwLbp9c@8|WN>=YhLG1MCL> z6g&Yu6nqSu!0&=z25$!61D3$;;IZJ(u?>6}C}!aIg8`TWUk$DSpT$1#8Sty%UeE!{ z;AU_W*a>!kzr<$n3*hI$61W32z~`|Wd<47$=={Mx&>4g`fJcA_fiD7Y#*QFgf?eP- zK>h?@4!#7u9$Uhy^210Q#V%vgVA!1%X=d+d^>E{;)@l5aL+%zSTBRGfQ`yl%GUM-N!p2sM4v8ZH zLI6!7diuhk7Ag?4GcNeWyCMatggL_JSMg9u9BXx)U04)X^6gFsA9YdEZPHW&u~LpY zxZm&E;%@QdunbbmOFTe))G4qGBJ^x=3HHz{rIb239g3)<65-Phk|o z`*5nhbZZxo)?Q(#taew_^;RP3i!z6t04=WatSz{$4`{Ioah8=??qoK*V_$c@v}?x+ zdb2y0*EdRc@;$vo&sGgtX)DDOiC!MF`M<@!XW7R-K3n=-d^p|3IlE(jrKsf*qhmIE z{bo$Ql$<)?taYS#Bal~Tf$(zIFJ)*@;xR%`z^RqR4`TL*C#|=uiCQ_y*#zao3^TH9 zQBe0|JHXDy=aNAa?^p{V6Zym&l|x<{>x1>ggtarfLm7~I`J@v_E3vL@&nvNaZvCoeo7`S6{#@U@WO(35g zgg{?U2h?E(r5_PNIn-znFZD?##4C1tk)bK$#Kj^0$L5F>8&`F(2N(AC(^aKX-^#xq-D>GNQiFLu_X<$b1ip-OrT1 zFg1n1=ZkKb*3`E8_|3EfA75!gzqdCS#BS12KU0%)AvU4!^Sk}0riA@jjjlKgT?PoF zZl*I>o@rT0HMyVNAezW+dwj|FnDMsBb57sw4xbTbQVgFufkg-0<$Zrq>G>lO8@^hm zU@MG(JTXi@KwFo^Ma5obmh=&KDE&pU&Mvfvz3!MJHgR7Z49|-r`889e?#mTk!h>Hd z&@W1btX~o+>?ePY-W_!tg;|?YAt32YTM{?oU{bIW7g7xG#M2gd+$tEDYiWH;=1)sb z^bSiuNII$p^SEK4iPhl0Mrjsr6`mG{9uwyH}hC{^2?GVjNf&Q}EX6un#D5$%y#z_V|eg%V3gK#DG z@4o%FPzziCSA#xTdMEn-ey>=5GkX8y!1q52ofg!=+tKg83w$-$2|k7{uQ>nj0$uPo z=f_*(Eq;QP?Wr@+slZ;!wbJQ(}{diFH<1@!120y@9%SJ0b(89W<& z4R{NB^W9(_JPdr}HMkf6?a6-$xCHis|4BVw23`tOrze32{$UvBpY8M<6Hgs;RzJ*F zkfQJMl_);*I*e6JOy^_=t-?!yt=1uS11d;dXW@=Rce&WU78C;i7d-Upc)zX=XZJSt z?c)5>ZI3g~Wod$EIaS=QG>!+a?20;=zY(P@k)=oxKlSA7-=a{=*`ZBaON^kf27Y64k zv73%V`h4zZoVV)lcY&j5QY2NpUo*f zuvR2hQ*>)-Uiu$Vi&XPn($vfAzAMbiNN7u&(%&tnQLCkc0~5_>L;FZ)VqzP?LyymNO_Mg2o? z6b6RxwK{HcWdlbTV1+5$~ zTw5*eM>ldN{6|t^&Dw}R?q+p+un!*m{PB8ZQ(XAt!S1YHK3?Z`k7*?&vgF_n=%6FY@pT69a3 zeqk<2YX9CFZy`&|@TQxk6xnEX3lft(NGO{Xpg1L}0G;lLLCwf@I+7D)+)QI7r!!GU za-?b|E`*gCZn%9RG61wWHWo4C7P*q(m1rc?;!sUlq#t8)M*{(rMqmcI|(e-C&RcqDi|`u~3LQ1B(-htU65 z!Fljx@FegM@Wnte08WE118>6~@K$gqxCZ=`w+H+Mc7UG*&jKI725=EP0(=5Hz$#GO zzE1+JxjzOdX5Z~VvHG-*{uSVl(f{8K6o>y1_^;^yuLC~_6np2hlUTj{mXWnY zQg3`?Jm#T*8?n?*(}9sgaQG2v=9YBRfpUYjW0WS1Z%_*Ht_l@nNGHsOUi(6~DkbSM zGnK1!CuC+EB5Cr_s#r6Nv^nHVBNkT)9tuT+Kbm@sdS3nd8=B z=z(UKDCBXKlr%$ueQ9<)Zo{uMtLs&W#pgjvD;yf zX$_|~CuKA~M$O#HYre#|kXI}&zs5XScdaI;5)>=a3UT@3o7rvW#^d$T%$_~#o8Hzw zJiCWg-_ah_ZzkG3CQ&xdjn`Ip!mf6Y2fJN_nB595B8MJ4?#A6`w3_+QP{uK`{U|+z zVScTpn11Coa);}9m71df@>gq0F8$=n z8byvNY zg%;iAqZf(KI5NpD&8|v}yg<46c7ShAKQpb1IjqOVZ1fs;hzdPT@o-E#suDpZkeyi7 zk`4ojaYR_B2J(-iI82>g>tnK4d5Yj-%K0u#5dL>fl}wPm$q zg!Y^+Qq@~Pv*70xGP}ItA|yB*G5u0o^@&^@D2cOX)^Ub;vCVxA$-$1vB6GMErebS2 zIv_ANZqlRe^PaheT#K!lq;Co-+NsOJ-b3N{Qau~K=xap^gcPwirPG9!F&e2RjoPg4 zPMJ!rAt%)xB&fJ$GPSeDyERB?)wnny<+;#F=AayiY`g4!sBd5vRoSv@%t1qmR` zTMMB=sZ?;8M+>ro>Hl}35Z@}jI9dPyHgx`{fKNWk?fri~-|_*N0`Ek}zW_GCmw_Kb z&u@W0LB9|B{&V?XXZQU&SOt#)Z${UDHuzuY`QHT;$N!_~`geoBLC62M;5)##gRcZ{ zMc*HQK4^fCqVwMao&=r<-gpgh|G-}GbLjfd2hRhar7o`ls?&_>_4o5hJLdJXTYR>> zvBmS@^7VNyP&O~l?!+_86y!8l-8jT#I_%jpZt>Q(zg}ERZ(kog)ym&n-Y7p<`h9(o zq0C2rcm<{-u#2&Iv1Gha&PzJi|E-oBPBrsuSyor@j$~m;?3=-ivzs}ni3~~HH&rAt z{^KWs&%S1o>;>JE$D;st@!~}`hWdMrc5ly^&0Y|7b+kv4%N|55X5{FGX(FEmJaT%L zO4=yaOB^Qk`!h9z2tI=iG_#H6^WCvmmQJ0V*MyKy23p^hS6Xm(vc1f(iaeNDU*(5Y zO422jLTkd8>BIwYn#8Wb}EQ$s{=T!OezE&YGohg2$d3my3t0>BtyF($ve`r&e?-Ly7b2i^VQMGt6aB~kGR^)XORF6(t zHEgA%Nqd}V89oI58I3!vinG+@q!Rrmf)T&d4(~hWXmiwX_mic`P;7>Ix4nk@2g7mx zIP(w}3WXJ4@DEWQbEju0cz)=ZM7}ob&D2X#H5F9|df{`MG-TH?)sEqGZjhl&6IveA zw^v73EtnjjQAd&svohG5RsUKOe-zO z$>o~`$=D_~mi@w;*0D%9);RINSI@rU>S-ENK){I=%x_+%ilQNHXl{-1ozUvm7~3i^ z@tyiXU!IgU;aZg>@kNHU%<0~=96Ey6e7qZLfhamnG8ci;ZJFwfxxq3wYL@$aYf@bO z+=Z=VRE5LQJ8^%QkgivVtNsqHR-wF?*Z3!9S{ec9+LR=9$tccrPAzt-KP2@|BIYAv z5}8&33;rarr&;#ZYFy53L$9&MCZjqzP`ILxNpb2m@0MbTnR*r4)2&Ba8AIdedVDOS zI;x51uH`pPB+^=rn*%ftX)Q?ypag}z)v}7&5)(xYra)ip!LUAJc@dI0dlp>~#Xw!C zMbrqW)i^xAXv>>)0oxBDsk%QkE>^;4qICj^x80%ZF@?q})`HGdDVY_&xXV|qZ4SH^ zY%y#^zO3s`wdYkcv*9hrVt;hZ=EUBOHZ`eoW+2QJ4Br~2|L;LRUY4GW{(so(%0Ghc ze>?aX`u@woQ^BvG>z@S=1wVnl|0bZg{yN`pKiCHz20n)Fue1Ji#@`#k(}CjrJqdge zJHRsd9Cmbm1U7^BgExV@!Lz|*z{js4P62ow7=tgP(vt zqS4a*wgwOn0?4?KsQ5SP6uR2XI5~ZS8t5p^-Oc&GGS!TIU3xCavT{d_j_*A_E z)0k8mAP506fQJ{n+MVcTvc6fOZkfA#4~iC@4dshCueDGKeIqnUdzOAf!Yr{3mV2-i zwkMenR(Ag@T9M7HroOCA3&K*ZVbBtt)+Sxc9(1L1HiS^y5c4I~MYRBdJY1-mY8UlJ z{R$r|kkc-jrm@f3->RisFWAFs-0e3|hqMbEZ_>Wf@Zi=Ps>2h}Y`E3N>hPrZWN2nwCN*R5xvZ|(D0h|)bbAOaMRLcQkk$5J3La| zEb494#%I-lS;4xK_n;`mcyo#MgW~+6OybB28?$#b_NY%tw_)5glLzn)PrKHRYHFd_ zlfL~j(6H&^M$ED;x-**GizmIVYbZ=(+p<2pV~tZK!mrY9WIbuMg|J6@OJ?gjZW{y^ z;0IlziQ0P9aCXP&h6%6gb^GiX=?H=Gj+tynT5ST`+2u<$|6KlMH6qjdcAW2SR$kM! z>Nw0BSUf6is<3Rv;m2CeG~VcyYxc!LI|*Hg7u7~6Y$9%043n^Dcmg>pwJ5wEUR=}? zR(Dv;3K5F5Uqy8~8nD~?^NVJs;AhIp&(wWUrwyH2r1rnBp=bG>E8 z0tXheS6+TEbUCZygT98sq#h;*;)WSV|U(wW2cmv67;RfuS^VmyW zfH9ktw6utsz$9JkqQ2(_EvaZ%l)+%Ey*QelB^V=B>Oha;yqA!ZDVue0Gqr|b2?|x} zt%6Pg;ut&@U2{VR=G-vLdk0K@=QD*#poNO%%1cF&#lPnqyD5U^p1U$Lrr5&m_ru1* z7Ic4v59m|Ks4zI{y+)UU-?bpBGPB+>7@TFn?x5MVHOx6&7k@UW9^8~9Rpez&*nzAP zc|KOpbg*Z27cw?^fit+b*UIPJ`hA&dW_j=DS!H~9GN(1wb((T+-p>iAQptI8Qm-te z$9_h|6!HR2;npI_=O%qG&_(~uyi34VZOYoyUEgPMlFizC8L5({+#_ zwbz_+v9T_G<%dRKB^3U>WTUy8D{}*R7 zI?8vZ^%6Up-CXYa)ZgL_V?y2PQ(SF(G5zws{IyGrdjEN`*_?F8`T#vbzA`#Y7nZ~V z`c|J}@>|ki%Nu2Clz#smh-2LGGSn%S-iQ-li_~$%aqQL>4ZSB1Jf3A6X0t7bRZM!*%djA}FKRW+i;Njq5;Gd)C4?rJ$8ogdV{yz<7fb{)eL&tw3cmufJ>-&F&uCLhs z>)^rQRp|8c0SNm2kDWijx@2CEf#=)GaE)Wl6*eZ%?i_yh?w7G`sl{OB0aYZITH0Ll|7U#kp ztH@8IaXMcS$*8GGyh7Z+);np@iP>}r8<`mPVVWT=Cptwe>lcDdQZVv*(RJc@w9 z7E&EcpNKnonwEg8LNi2VnzX7>T)hsKNCgm)Y=hsLBU+xW%KCsBB9${@bg11+?fYyk z^U^;Ap&eCyjH=61gH;e*Ojy!7YYUGwjL{{Md%|AqAQ{S5n+S*1Zi*S%hiZg@w_5q~ zLV3wc1Vh(V=cDc2Gj3ObVqy{Mr+eyi!sk=``4w^C)Ve2oI<)6%?4_u=O5m zwUEBP=bd+mol>Yy9keF5N@t#!UR^@KImxM<0qbWeqP?RG8D?54$DBh!UJBH&GygGOHGyGR4xUNY$g#Uh%B_@u$Ekpv^TrMW0Y*i zbhhIF=cV*^$Q96eTKg9{n=F0OH{9AT zpI|k$<}YovPAcr2fqg&}Dcz>vI9)~!Zi^KfFx=xz2AkS2&W3}UICYl6hFJn?Ie%Gp zR`Ga&^0D>Ys!zciNEs+BppW1{HbgojgactBv<=RD)gf~l8#XV#)@0aaSI8`Nm(L%R zHbzRV5ZM50G=3ZHj>QMeHF5Pcf2x!E9fK{nCW|nRka=GE*__aW>J}M`&Rkr1JzyJ& zY%0p*ornqBG#L7m8JCo>b(<{tT()$YW zm&wkl|JL2FYxp6SqPqt(NpjR~RC4hN^0cUh1_=!bBi# zWO&<^&q%v3CoS#i+BqTozJ@ZX)T3B71!yeAGPZbDmRfX7$qf`5nQ{CSDGd8TvT3jy z-Dh?S(us}UUEUvx#v?|YtsMLP9I#MKLCfrE=@8WM< ztL`+Eb$`Sb7x4fQN@9O3I8)S%i1Z6T9S_(~DE~{x`S!2_O6E>~h61uuo!@@HQ=wJO zEU7*~sr_O=ho3&SQz(91O3`v*f$0>MTwn{H{rFJhNCzSlhF3gG^NjUOGwm0VjpEf( z7f-XjqMF$YUTTrAI=Lf=4<(||y6VG`+Z5Lt=C@hZ^}cOE+mSsrwHlkeQxx4MuJ}UA zlA~Io9)xKm>>q9c#tGb`a#k~`sx!FQ-->A}{{M?mfL|aznDzggyfXX-bpIa*cZ1KN z|9?B!4W_|o(Eq;~TnjSr32Xp&1D)-62|OJ9B0B$jz&zLqc7UHi=U)a(;8W=Rp9C)j zF9fH-N74D;2Hpz33p@_|EV}={1joT&qU*m5ECHS0_b2H59|EVrA6XYv%Z93V7kajQ_-Wc#IFM8$3`*ckjtA+a)%RXK4`UqD& zayfgisb=~}MU%jb9pOs67hFRnZ-v>^7gX(xwcG`tQvEIMvc!B@n!~$|>9cI)&I8W< zf|88~7J_0Z!-_H@RK9jAqCw+!>t0q^CAZwx8;v%YptmOIn(wizOSH1kh+c*G39b+ zDHk%$XHv&DwCN-B;g_Tfnfzt(;s_ndENK+0c?#JV9a?&*q!TLtBGxw)64K54DsR57 zSF{Km@{Qf+$;< zXdVzPO2X3vC&Cngh^NZyiKZpX*TQ5G7u&)j7qB4-Q3yRqp8jc+G1&yPe~UOZl0zX# zX>V>^SarQKCxPpU{z#WvgrQ}uhL5I|66qZzX9I?o(SHAu74BIp8q$a z?+2g%|BSx>8gLSP20j1BKo>k3{0zGOt>CxO@3r3l{a_lr58ZwZtb&Jt??k^>-2dyq z`_S!mp58OS7lQ|bSE1uy3w{#){yV|7po4Dzhv@WU@Hf5=pY!FvqA#^U_;fb0$oVbC z9XX-m`bR|IZGtPx!WzR^N*f;!23R$T?r!?H&I_IBRu;0#DV02Axov&CJDy3kK8^=nndQ}F531zWDB_05&hR4d zhoj|Evr)L5RIogA6{2;B_5Do_F+i4=K$yozQcR>qpIA1%b_0*oJ=&@t0%hn#oc>6b z2o7}X+K~brj>G?UV9uWQdT$S}tPu%Yag?5E2i|B;#4P%fGE=}2GTWIX({#xwE4{}O zbZ@=eC>D;008v6k8(!wf7*>AJl2&>w0U!4}b5F~>cp6GgX9xP5XU_OT>HK5T*bkq$ z#e$xw!aN59JG&_*MLXxcJ=nsA6(|*0wJ1tgDmrtMXsd;5U2|o9AF)m4Hldo>8HpH` zQ`O-_D0t)Wi5nDpm2R7+eelGoTS+WFcHroV=HU~TY%c~k%@coQD*xr10 zeS@h)H9UCg&;c_Phm(cfn&d$9!o?n?;c3pKU0fA2Y4{-BYU^D(@CXLN5f^3S<-! z3KCVd)m4hv6e1e64s=s^qXi3ZR2QK=!$U~RWj;4KRT9vo6JNGv-3P!ei%g8bf1H9O zv4)O-Xy=PIh2UIdj3EkUB?uGGjS$`>QSE-wXwX*}rtN!y1Vct9tn~pAH!mc|7ueF4 z_WRh{_H!1FAIn=$REJ`=zTnqya@nQukQ%|Q$6@6TuDf?{AO!KU#;`qr$XBrR+SPO= zz^4(D2#dBVUBBpO2d!D0wBUU!pJ7A1VPiZ#X&}o7+j?NBt2ZD^! zB`-|KYRmI1)^|Hai6eE=K*Qd)=$zsq>(9J%H)NMEagdNXSjj7IlF11sQWwaoS=f8B zpjEp_6AE&LgXWmP+muqVCzx~bNl2@(We$lX5K2_+%b~TkNKh zJ7|C}16FrmZ;g9>%$PQl2U!UA#?f(-rp%4x2TJ@6;PN zn|0UUIMy?N$0?y%-5p!!6~}zR&0TFdJ|tE`m$$AT*1>36#GvS=7<-rZ5byzT$_cwY zc1gl|yfFW(Qe}HYEVYXtqFC3?Xge1!1l>1QA>0WL#4?iP`VTjIE?R7!wwd?5!L3|u zv%-aha_(#ksZpb~1RNm_a4L*R;Y16}GoWJWbjs>HJQ3Y_9e4QZB)YBE=abUJT1dtT z+3vhub4KljofZnz7xkT-AESv(sl!dpWK(RK-z}jGF4vuseo`*^7THaj%N6F880u^5 z9O;{6>lL3u!nRl|-S2eE>LSC4NYXk}4|8`K!J7F8Gbgi3w-X{;?DCZ-K7PLY6IIuX(tTXpP=(oeQ+Qq6 z(+w2j7x$K(No&ulWtx*_l^aVTg5cKT6L3GZC$05GE%#qO2(ot+SqYX)V$wm(Fd}X=a>0Ua$r{y z4l9-`JSP!bf{+Wle!@=hQswT@iPz}OCR=rfdoUN8RK^j0%^05(+ah~+c%DG)+%7gs z+NSZ2dj*vQYn2@rR*g;<9`s!Md9ud+nln|_Y#QdOOV1> z*?fJ7h$;zK(wZC{oNX{?qK6KhSlH|@FYtyehp`npC}|_^5RJs$vS!EpRC=SJZ>S^W z&nF!~37sQ2ckf;`jN*8k?YY>NsbTSky?Yf>IvOTr_GoGaM0{jIBxtRXa7zm#Xe(5? zgG?&g`DpAeXLS>$83I(%6H!TXIQG^td7zHj>H9fNRustfp6N+UsT4S$7 z%P5#ESJ-gG0BK|=B9DNYdBAyR;Z*nrb{F}LC5tw@I|i&UXUR^2LdlJ!6RaF3(J@3@ zP0j?=tu$RhJgBVZic928k;*Y?nZ;}YUZ0BKVXedNLCCFFFnp)}7I_Y6;W0iTxpP*-NX+M-p zXu=r60oj#3?1oy^8F(7td0NS;W~`V`&=`j067{Rl-XvoFmK=;X>ah{Y9f#x=VB>QrN6uyiW|LU4wI=IWTJoyy_wk?zD}6Qwe;S)gZ!?wJ1nPtkq9 z5j|M?|ANtFy*0r?pG59|o92Z|-|HQ;ZsCnzqU&KFn& z3*aW8_<%ZVP_~Bmf^P>a;L+ew;4iQ-{31|%fqC#((D8lXy+APrMAvTs+kDik^bxa| z${Hlkkym0kUb$+SBc5*9kNB)2`}x;7R)Vtk6*M-c4{XJFV%Wn z>EcechQ(!-yhXI*CpJj3aFA5!a>Of}#qYVd2`J zdfGfRSZmW*Iy0doSfZ)h-L|ErP~9P>tC=k6oHj6|;7n&R+l9n!jO^u}X1SZoTd7X@ z4wh~6*365N6(U(AwP%Q^zfHL(icut{N_oZCc}p*K*YC5lh%RjvhoX{#ZN{%=U*=W| z+RH3oYd2D0M_lE zzmm>n3A=?A7hbedt>ZD!;-o-nTM$Y+8Gu<@0o z#c*^dNikfSln_xdLsBT&9gDb!7#YW=1$(WA6`#b8?a?wrEVkv9FvNtxTZ&O2u2eRh zXE~jpj;X_7Eui^;-jmZbzc!f!`R!!V-{xS&q!`Ad6uKoVwZH% z-lV@1rlv`cO}uvI%$4L&el2MjQ2%je6bFrHc$H z`tcP}*OhR~RAL(Wx*@@dN0@~wbT5{n7OK88a|@G5jVSUbyJuyrHKRskA$i){q?!!Y zJg3QnHNTw<)->aQsramN9))_A`glaT2T@(`%3f57^QnQ zrrAU??IJ=#ItgHQJDKguk5_6qi4B%igh@SqQAQZ!oRT*1&6fW^DtWCV9TolmxK~tv z3tj&=!M_CG1D*};05^b#fG-B0KHqgU5kCK=*$acokR$e@G@>@JuiT z-hjR@8^B>O2lj%$M&JJ&_#^Nu;FrPM!S{k2z{9|Q#Rl-x;4R=Da3`1oj{^S!8^8;^ z9pFRg{qF@Y1b>CT|7+k4U=!R59uMA+-v13?9qa&~N6-IT@CV?Vz$3t`(eXRrFn9|1 zb##2~J2(k$1pf-Uy%qd4ke)9(E`#(F29Ca=Kg-2;$~|>%IOHsgTDXhR&P1)~*Du?S zCTOG+N$kVYDt26W2}w+~0&6LF4(2=T+v#=#Gg@JQ+E-X@ks4wKrI8m-@|0K-(gIA< zNtNx^_1Tp@Biq;AedA0m6i?sVSsSwJoXIj7dJT#JdveaWO{|-%ou(ChceZaj+nFie z8ghy0gLyt~TTpojZJ89sG%~lBM)j?dRMDrlC`*YV<$NQE9-Jy=O13!3|xE9{w&oYqOLV2ZWd!x0+RP0V$_rxnMe)+-a zt*N&wc904Ee>Q9OM$QwpFfdq%lsElkEAO=LX6*maL$PK%v!lJ`;lM1gm1*8B^2`Qj zkd6!1TvG#tk9T44w;HTnUX+{stXp_#*d8L3jqyG8$xp_TgcPe+bZ2F1-p495zb*Of zsi$YH`}9>xeAlH`sSAe0SEf#cRi2jB4iAS7{1EX}z-?}W=>3|(;Q>L8JF0cAMmCid z!`dpnN*}C+T=Y$sQ8&g6Pq`I6qf|ze052+97D=1y?IN*qayI3tr&g%xq41$QSY5Eo znTLwVuX(u^9?sz@z z+wo;8WK!)+D^o2`CU3@U89%3ttZhu_H7iqE@w1(l%ZjT&DU8^@iA>6y)`XB{JrJHMc$vea6 z#WUVlY5_9S+@YDaTxJyc2BTXJcaa-Y&ZLi9g{N~h!naA%xhM5iGn?eihBB@&85%%= z0G{GhAul>U5>Pq{s7>!V*KOwJc23^#8`J{#EB1!`Hp3;iq{o-y!<@@rsAT>k-f`>( zjNF=GMwxx4X{wE|gG{wb$mO^(io0=T?P<%pt4Kq6acqZLP8uUgNFHivj*Piuv<`Vk zy0{!Di}+%n#v@lL?n>`h(4p>@XoGN zpHe%rrDQm@3n65DM6OrFZ-Xh7-L@J;$?p#9Nj^$3=Bi)%tV}(X_-iacu61{zov>_t z{yzE4SVa0Hu6TXdrPEKSFALDb_&pO8#h`UB-X0oT&%@a(PEv59-di;;* zk|Uk;Fq*_C%yoXYW~E8~Ady-6qim1*v$E}uCOkFYSL{>`J1t(Gm_KB~k9*fe7lsd= z>#pi(N+P~>x=R~I)nus@{xCeRu%Ti&xp9p26byE~eX-9%>+*RCttIz)6?bR0fA9YN zyZ7F_d;in3efwti@1NO!^Yp%(h=M3$?md0W{{4IR-E!viQTWcHRS9CvA3J<}#zv)D zGe2_0gdN25>=e`A*`6iPD-N2; zkIrW0ewgQPFO)67jFR#tdm-Tso^w^#&PqBeNfqsAI|I|>+qT)tKWw6BXB@xf(kIF? z9BYyq933GwRFjl~rWKcAnOaQwnwX5;E2L0y$x903tXUKZ7FcxYn_!_xo#~<>(R}Wr zMgcFA@x^U5L<*v_7E6;4}^m;Vv)y`Tp^ zi!T3d;9gJ%e~T{v-@vbf_ks6Z7t~a=Dc|p# zcFs~hnDl7%A%?(-xZ4zK6po;Ae23&Gzw0)olEUH%|Kg-#xTmVKh$i1Sp>Cg(RVB}^ zS_^lpmL{VAivqw_ud(Rk%b&0i|GKu_%4@Ltt}*`VWnW0I`xg0MTDGYI){tH6hJnOCVsik%BG)iQwtzVqZw6D;0lL=(DtnVkGeQ~!Wb+s@? zVn};mJm5`YUhMyVE-fg z>%@vR4DU^>sYJHA>qEO#>uQIcJWQ+oP1#JHAZ7+4b%_2W%f4-Pj#ZJ|T z7xYAI2G>m{X^56fkvK?SeYFo}g?yi!DKs&?q*>clcIUSfVtA-2`Sf#Hv%|K1|ABUj zYWq!WyUFcUZJAwh#xB&&f+04sxGJv5?9|@I{+k03k+${GNU5 zYm3mP3qFC_agtK)iX##t3 z*(SuAvPnNxBI~Y=qol7T-4S(w;2&w4Tg)Fh72V7D`OD*zeFAuePMn7(jgsxVt*O}6 zs+U=Th*xC?s^=E9-*WIfBD!tP$=LU%CJV61BS?}Qe<&5^esThz{8cufwh5nV8-3x} zfG79^8U0~u_o8+tm=At+r_4Ql_@We^RSPp znX7xApIH@W$KSqb*vo*<*_zz`XNZ%MjR(X`as@9oY@xh)N6 zP4=9!NX6LIi3`87nZ;e=uHuLQSC$AZr(q~w?+Sd zj#qI14Z8n5;055nqxb&?I1m0Bo&PQ10+64-eEdI$e*ahCqu_hN?cmSQ?SBpY6u29# zgCk%!cp~^sbp7Xpr-RR+=l?i(CFp}X_+s#K^!z?}9(V?r1@iU(2)h2ygXe&G@EGtR z^!*0$JlERm@NPCq**>UxycgD=8u=wT^t=99 zY)+-A6$32x$+-hALOiEq3>5dUxP{fd;g}aoDsI=8C6H&vg{L-OF8Nm;0N;Cc7B!CwT+QBAUVb`2VHCEjeD_g}>0+dmF)m5&RrlSAd zg6OQj-QXw7$dMRzyI&;9{ZjT%q32l|jq#v<&syW2)pl6*+Qng++N9?82D^gz!Y-aT z74|7JA1LCcmSKt;Jtrukm9bQ)F5NcK#2r1tN*@_Imz*gIIOSwKc7WsM(#Lb#QK2Dq z$dQ%huZHg}acGI0$y%wa5MwN?^3s-sZOn^AampKF!id~L?F+YKmcAdTeO-21Xv({p z1u{w6KF?Yu)yL86*kC*#H|;r>uazkaUm=&DJUM0WqtkUex3U_g@K5E1fZ`WCfC><8>7?s--n0TmD$RYvrqx`?a1b z_W$&~sbhnfwH;%YGh`RdGd4|reu?c$k9~(Nv=o^BZes}Q?|GFbiH$167B-Dg zKBzL4r;(jS-qL<`i?ovGZfnVAyjEZDmdufy+NYC5qAQ@(^9lC&q$xt?`d-=Le1Y;p zjCM7eHf1)O|C_safwS`}>;1O^f+CS!X(POFK@dCrl^1uV(qk*xb_Cb!Rr#c}rzEg5F(> z=9YzDE9#26l3r=|SH}8mAYj1KG|nQ?>Kp6F%51Bs&w{?yz2}Qd*`Bh|dxLO4DM!K} zwKR^WsLUojn&wt!HxnI1H{F@LM#~D!{ufKq)qlHT)@9e9K79G^Ya13HZj)Wwv9-Ic z9G5Fgr_yYyO^I@+ALdw&I<4GKQ`iNIPBAjzL&n`ud=L$>{>@G~!(ME4HP>*oURTCV zr&a}@Cua1*ajp)VB+#oAhnx)&7vt}|)D-gve~>89HFB8938>TBF=mr--vr5!luYf| zJKKuegRO(T7mvM{e4j`wo)%$s;LBvkML3*snCUL`-a+`OU{nl|aOzElm)Vy_-egrIx01kfUUNrEY` zTsW0&_$VKE;J~w3^Ssjv&ER1OsgfF_S2qxBTG0uqZ#S~qUyxUJMethiYs z_T6a$-F9RBBr+ z&W3L#h9e2WwjzzgX!jfg?}2(JFP{GA@$1WRcWxOuZ_Ll7_+^JMq8agp+(E&|1JXTlH}M- z{{J9y|Es|nuLF26-#-aHgAU-0;J<-u!8PD#!JWaKz`vspxCtBsJHTzh3(yD5flI(` z!EM0bp#yj`I0}9ad>K8!9|8ILp8*HJy}&)ezn}~F2$%)`3;F-U;6vbc{a*>54W0y6!Nb6}kpI64{tmntJPFJK-7oM+^Z=hh_Wv#LOW?6!4(tb;!P}7i ze+Rq@JRST1coFmx%^uSv|JM`(`41X|;j{n1;hlST?cdkixog*+!-sq02d><6Rd3H# z*YvL0c_>NObSV-Z^5QJ$r-V2Hi7vv1RPwY-)FWhDQ{aZZ4`G$SMa?IBwG2Xrw&V#PpHC5yKyw3+>dY`;*UpAFi39_p-TSJ9T|r%Rqk z(@^iGv!^XpQJ*?5TyM_ChT`<@49&?!=U)o=C`E&2i(AiXLui5Bs+-M(@H^HF+|!vK|B>^cD7E+bJ)z!qZWvm{ zxPEG|qC+8?N?J5r^g12zud;(ykHe%^GIL9qD0sS8 zt~J9~l)OkQu`IB1+E8^4AGI*s_gon3-fAXXhLrkd`o|Q*#h~y;y~n~6U#hlbm7dt6 zkRW`oF3s+knk4q4qH9~M6w`||zmTdXmG-I0LC*=Sz3{*@JiQo@eW|BAS9_FFuH7o0 zwlg5xh-CdjM;bG!3Fzgb62f@aIAk|gubw&c4ym3 z8V(_YrCHAQh&C@aFtMbdIg8Y33cX@118 zv+g*7xTE-(c)KiB)9%1J?6i*S9@Bzm4gze%NaLJ6$+W`|IlfKiLCQ{**FhTB(P z@5uislLh14j_{B2aZ*6;NdGL8#FJFseQihehat&Rx-voC-DOApr%fjRcMwmHdHH{@ z7gb+_tbY*b4uEeU*MA-S19&lb5m*Gf!8eibUjh2yFgORaz!Wn67m({e20jYj3ueFo z>;>}u59j^m`#%pZ1%Hi<{~O@f!4Yr=@E6GUe-54wCcsaCw<7E7Z2z6XoxppL@sEH< zfV+aPBG-QuTmyE4T_D^g_=m{*N5CF%Pw+it{J#Xx1iuP&pTK93@jnbC_a6lJ0B?jw zvir{h(d#H!_eX0)_2Z*?WebaKRLlp{%~X@u+2xR8ct)~3puRjNeGvXYJsZL^7QJgD zXSD2*{pPa>t6Mg`Lm{jrO({oj=?!gLca=ue#kD{SY^_l!Wvq8fgS2VAS)xy|gsqe=Lwzc@a?G3ya8;|-?Q^yA@RWcbpX|<68Z$GY06A@0eAFUJVC;OOn98Hpj2n~Vm5k}Vwo4exNZ!Jwx zZR-Jy4bmFxv|+p7zWUH!*OSZVZ{M=TIvnoYs^vIA)8uklid_d-QoXmS&3{d8bw3*4 z=LU_NMdj-vLAgii7k$R;r92RvIiIj2pG0Gi5>3wuQq4_wa`dxWOG`n3YYQ}c-{9Td zpEiiL#sS2pS7wS$Tlo_#YCbe+ zZR1!bF2tPTAcfY3?}k>7df$dXR$jIHhd0NsxS>1TpOwZmSDm3#o0`i9M5;)F1gqX60U2bx)P(cUkyOiTG;#`&4Sl2dHd=AUd&^=eg2bC6?b zy25Xzo)fJ;g3LQw^pzZ5k)SQif(0S97H_;Rt=mJ^7)mOJ<+Vm_GzIm#{a9JvhDD~ymFsBFHJJ(Xv~sLRzf!O+ zG=XrYPv!BUA*QFOZRY|u#-mt-=tHH5E(~;c0h_CEwQfzS|KXNe(TiavJ-lbDR;PP} zYbvXpX=n68eKKm`x-P|45ATh6+ufrM$34PfCq7D!19in<6Rq^gkp0-^jjI^B>@x)*774iD9cfC* z=_66CuHM<;UrZBUm2tU78{P@CJf4_SkXp1v<+Q0hq$8n&GCkyY~yc=&{-Ap3K z8o8Z@*V>D;Uqimz-icBNZE$$vGL(kdRb)BeQS6{-(Bv2wra$TyqB65aZMppa?}HG# zELj%&|5aX~mH+>*f(4+sf3p355_w;;|1#JG?gl=Gtp74_3LFRLf(L-Rf!Cn}crAE7 zcre%w?hC$&?EkOe7r`;G8%Q6ZxPPw)PXIGu03Hm!i4Nf1;9cNKaDVV!bO3Jzt6&OD zf*%7PLl^L9FbfWYZQyR;Tj&IC0xtuTpbstucLs_Pa3OdQ_$lx{bOOH#uJF2nXQK~z z3V1Tu3%-FaKym+dKHxjh?c3nvKr}QziqT4eFa?M~i9NyX2Bx{|c|DWkt2Mg277aK2 z%ncyOOxz(SQ)$|!5##aRH`{D>gGDP9v)`$&EZe5Iw^k8b>skNh0T%DgBLoXYx@b6x zi-nax9Teb>S5qhA6ehDj!+LeTAFf-^Gw~M*rZY%c-FD8%WWuzN-jYDRtm1Y|$&1Z0 z5o=RAM{2VL90<-qq_-O|EN+<*&(#a3zB&0Duzw*1;)P%n?Xtb&{ENW8s7SF%>i|@@ zod2~2KgCRQUcc*!m{#grH5s!7HLXzc0OuOXS}e3kk2B>0GB!_olUTVKu;REecGZF1 zd;H0PvDNwGh>;v|^*(&z8C_>SBf8TKtNYTd!r@z|LD;d1i^0s8JcLg>*J~^F%yN%# zL_O7l3PhJH(^?dcSEcIF^3hAnyoCtM?Z-YeQQ@q$WmJw?-Ll0WpO&M&V}wOpLO>7h z;-@3|vZ}K-XLNTDum@YjyIwC^hP!j(G!{E!yDcVF@|uZx+OE+)y1430gx0Jyg4V2C zjQyl@ZYQ_~6prB-WRcJ(M=}Kx>d{NM{Y|YLX-<9J#%x@iMTs!>I#^C=qe=>?a0`x& zuBC^Za<;4ep1DdoLWLAgeQ)SR7kIZz4#WRU$cmJNN#(T|qqzmO5vuYwGtxgL}C~rf1abeM}!P4$@LOQ%s->HP|l3>vYX;n%F z+klq_>IXTSt{hw560{h0Ih`D?R zzzMd5?A}As(Ot@}HSRd$#?Cx*0AeC<9wIp75V^t)j?_cO1Y^?D&+N2Sk7(N+IH60dD5zqD5N{H8dgo6rriC#X? zInKFpcJ;lO1qRD$adsTC_&=9rrA$|E%5m}N8w5i4nv)UTzi|Z&;r4yl`*ifx|B$zN zoU&c+sca1(Ynf{^`+~cdBe;Ruvzp>6<*N6~zbwZza-5ynjiQpXQbI9lnT*is^Xs>88OD7~o zxzjn!j7`#Q5~?hCrjFz24^5`|!AV=Fh(~Jn|I3h9pMvZv`G2pMUEhMd|0r-VxEuH! zvi`?`;{5Lfw*$8YlKm&ZCE#<&_Rj+6f}aBa6)5iCQ^6^4Ik*paH8TB+!HYm2YzKD& z{}Z|Xz2H4SJ^+|evi=W&Hz3ddHW+}*z(E2NZ+aK z@N3o{rzJ^YQ0nt3OWfq`67me+S#mEsdPPi(5^uK+l;y1RC0cqjqQNxOtlDakFb$AI zjaGP9-*v|a{hGbkkn7@HcmJXjgmoJjhexvXshmyi>QVES@(!PJ{Cde-8UQ?`;b71d z=|GB3f|ujA-t(@C_4`%I?ohea#<<&6>glXKKHP>H)RW<(n>DQ!TA}Z(^{7j-TN~1n z8yZ{9vw5a87EgDIiu@t5k2Gzm|ye~Tb8v-+QrpAX9y+X zra#gY4%A3FhhOwfC?1Z2n2Iv+4ra00Y6tY(d0}_fgA1@KC*ndrV|&$XcgVs=&fy$& zs28VuiA9f52kzAsl={kyKk;lH_aC#hEuG|c74Dj7h3xb1;zUPT?H#9!R-BnIHe1)& zi_^AedBNy#5c%4JnHjYDO;)7R!?UOj-CUF<6U|m*(`!Fh&hM)&LpmaBd+L+=#IfG- zXTg2PY#!b-l~&$j9kuS>sW-s)Q~kkOJytJs9bHr}MxC`e>lQjwc5L;Wn@4uqZfm)) zsozq-{+KqsrK8Pp!*>g2+0PPJ<9iZT;~$8pE#)6R<$LS z*%#b(1{G6rFFUJPd{$Do73gY|Or50UA!C-#QbDwVMN8+!T`3mS$c!`IoZyTxMb1g$ z4dr=OEmkWZEPev}@?9%jbf@B{Aq~s6v7!#KDD4GDO46%V^^v7t39yDuh;ycjf#YnP z+qp=?)Z0hwu9}$d+V#0EQ~E3O@EJqGcCLDD)8FGIXIc=+GJ8U&x<}nM61z-TR7j6~ z`ug-&-edb`Z*W#8KQLUj<`fv-3>!IB;5aQO-0_}KfL)6d+zK@e-!)@Wk%HAiHrhy$ zo(RMM)Zs)k;Lgp}9+pc<6#2CzL|Miz)R34Y3!bW&8 z2vtLwI*z+~aL^rT!Z%PAx2+%DGXeXp%|7r{ip-%eH$(HU)y|WnCSt7@@-R#!sbYa#w zu1h%9pFipbyAfrR9A8*{oITV(J*in5%GBJDnPwF=w8{VXLWbS%<^Mfio_!-S{~N#n zOoId9%gFoh1J4KNg8z=p|8}7J0JZ}8{=W&C|Bc}BUjBb0^8OWG_Wwb?pXKHKPb1TR z3cMaj&c6-N+5RVkZQwrOi^%q00KW|G4rIrF1G2qr_uIf$@G)fi7lIw&)5!8q1y2Dh z;BxR?rfF%o+ zxT2|(hWKYmcv2!;$Vs%sv#Hrh7Y9dF(J`16g$K)OSwS+cA#Vfi*GO6zQM zGH1opnjRP9ETLV@ePUsLVUD|KdaA_Mo+5puMPP%Amtq-E7cJ(_@hrTR$34JN!s4(X zCXq?G`idlL4w@AO-qSS^&0V!dSMc_KR7b>$=G~A}ZTz5Zq=^RJnP0o|ZkBzH;A_v- zPVSZc!5brsI)i}(202>-RhnIIn%(lIg)&B-D0^U&HYLAkYHL?%Mnid%xO_qpsNcw2 z-MmP+^_F@|pRCr3o8v0%vIzC{@7VC709l&RuuS7JGshiC_Lmw>*jPW#T9$K}`S{4J zJsDJk#;S!|bapq_a70VpEakm!>M5Hac0-@ydRPdmId;!%+86<{X2dJ?9AzZb%&g8^ z^hgL`*Sy&d$^(S6Y5AFE8$5pwR;C&oM#ZFY$Oa6jZ9+gvo0IC&UEbSbzkG1)0LU_$ z4_HOcdwO?fIaiT1H8Ki`POu1nc5pYai2cqmLhMd|SJsZP%lws)3rjNiPS2tuK)a#? z!{p2+j1!V7=@hSnvmFBrjD zm{{W!TZ&=ps@KoM0@`UGmMFZ#Q*g9(?Rc4TAR|}o@LjHbd5232*+>A>K;|i1YMMS_ zQpg9H@f`jfF-Kg`vZ>YSez$*Ov72%&r-8)np8DY^BeJJjGmPmA3y&V?bZC`m*(U}T zSW}f(+O|&{MnklFyBXvb@em!YWsTr}7@N6$FHF2tsn|fBd|)(l(od^~i?QYd%%;iZ zh*!G>jOk^@fZBsj?H8{NLYKj*`_*{}FpXh4c)O(u=t^o%MRAfeJ2%iNO}Ur@gawPq z#zi!YFJ}UVD@B@nI(B+F1V@0(i^ozaL!N7iUsoy6UJ-gz4VV{?S>@PY2);JOsI=n< zm7@)=ddr$Zu$D`l8yGASp26K+eToW3Q`x0E$9G*}!&^z>8_RNeSUTlm(VNvIy<*0& zI6mrfWH+~x7Olugnyg-qSR$WH}zd*;tm%gP3{t>N`|B+S_O_Twz~Wow0^ z!gr=Ttmky|kNu3G0<)~5T=qA=nD0-;6(?-qg_oV`p1)J~wrhpvh40!F87wU=$V}ia zxGmDtt;JK(daPoqR=#DTb4fG~U*CMtEZ({v&6Nt8C{7-BLkHpIy^mZO4tPiu%j8uj zKUD3ETI}R6)j%`RtK>mdIukaaj?@Nx5m*vr{Tn#1HAV*_`4{KIASlegj`bT~G z=);xnQgDH?EKE(-aCS2v?nc~I_Q3O3qJ)!f>YRM72c#~NxX0-^U7}JdngY3E)#?PQ zWterTr$9qb4sX)19!n#(-ce;8H1t}br}WJ%^+(DM5Bh}uRfmboBG$wbCm0qurNF)u zB5dlcP#k30zN)1w{+{SyYK-(iv(I#m1sL$c1%%XH9~`+a?->z(jHfnZt;S;5jlt{``! znMCSLjc|gy)Cb~ybQ;A8s!7qJd>I+=4?gUaQ*XtvNkKEmXkj&5B+%rn4jE}whGH~I zRfJRgNS2Q3WLH|bs%&UKpnYs*WpSC{9Y;~Qu1*pe{lpf#QEGW%W@Ss%^lX`&TbSH3 znAvvW)I}FxaN*XeL4R<;TNT__<2^bZ5MCbws{}_zpzrVL`ft? zw^T0Ul8Ryx1=O55)6p;>tg|j%RLI3VxGrIkbPTTLo+ox;!6)rr3O56j!i_=d z!VcyJDn9iq%z)PtnDOa(4LGAMGo$RywDs&lAQ$ z*r>$M1sy9RmpamoIvoB`pc(`qGmpYoppTBdP_eOS(=aFC&e-12QJRvmCY9AE5`f$b z<8QX6#Wc$st05<{RIvo#Rx)HTqSC28$eUe#1_df7z3Y93d08sfa_ox!`p^f{eSGG* zGh#5eLt9Y2KU2CzbRDb*JH}!?F5x=9pD@-6 z7bJDddFP*Zv90>VvNUgy{_Gs5)|*MKGVF3b>FZ`;)^yB;EkoA{*a|U6C=5H|ES-Ee z=$F`02HS_Y_gR*4?(bh5o}A8>TbQ*9ek zcoM$Zx-+Q?C4n6dA>w9Om3+-I>!n`Rb{P^^_Wba3b`(j>Z1P5m12xPt-{_LNv?z@j z4dqLz*4mn3^8Y;$aL<>Vi~Rp{UfBI4^1kB#-4Xm7^8S0kQ^5t`4&Y;`uM|A>6An17E1cLg6u#@D(2HuwT^{`7%lfb&2bd$!| z%)bxF<}X|S5g{(Oc98Bw)s+ApZq@`BKJbYXAEgBBm z^e8T6*;&Aq;x(!~JY^EJR01vo#!GqeJJrKZhAQk2U1KIgAHN_8N1=kvOSh|b7H^$M zC=5_MEMf%q<~7-AfMQ(S=0v&5p>{6Ht0JnuubopkAhpf|St5W{#0?dH$jS|yR-jJ! z9^N4gM_WaQ517hUj2C{j6A!N-&R`j_xxmq0bSP6W9R8vt6V2n$srTTz>R8Ck;SGJ) zEfV?Dk?r<9l}25CEa!POkF=12$P}0{r!mhR4+qsN84q#n%rekC;~t|W9j80{Oq!cR z|57!}`Oyor^JQm-vYE0Wk?AyuAK-#&R+~`ve0aS%akb=ZuyVDjIS}O&DGdK+GG{N7 zFGETJvP7qzoTuD%Qe2<~aA<{!<=-PGO7>6#sb~J84KenVp;2sdiPctt+&sbT zjZRZG&3)385|SOsey@zox+NiRo&GJ4kXH4_{BJL0T!!uIs)ZHXm`oID9`pH|!>dK5 z`(f0$?O!(+S@={|i#c0xnn{r7uYV``b!P04BKg>+Ojt9Y%NZy9E&I;a*r`ag)f~g$ zF$)b!Tf`cCUP9vVZ7z!o#5_`@5vt(JwV=A3J>pcYn5UU_(5^zBz?j6T<{D%f+bkAW zP%UvZ35#nBLj&^fjX3L%b$t_>Wu82FCUu0RrFnPHU-EN$eQZH^F{i3Ee(1W<3a>PM z*9k^G@<>0(Z0@XZVV7w9d;OIqMqv1#zb3a0{YwDJ6zdinSAe zNw@k#IoSACNEE)SIahAd$a(a1cSg2lm1CHyX2Z;!d6%7QnlmFrW}=mqB(*k0-WmB0r+(=0eWBu*aCh8dKU0Fav%R{2RJ}FM+=SF9!?YUO;gHJ_v3EzX`4Zdw^mAw!u$;Z=oyr2>3nl zIB*2)0(Stn2Z|MN2y6jea4+ya^apPP4+nPyzljdvp+J5F7lOB=KX^TO9e4^jALwqt zXMu--jo`cJ4*ndx0z3yi8yo~*KyUDQ@LVteEg&C)li>KZ;mMDS z+qp8oY!PQe(um6WRLhCZr)fcPofLmKUs1ysW2) zRpzN*^jW7)BNsr{$A8pGBl^{u9H%*ezseOtfrG1yn$S;2+2J*ood^}QTt@ib!g0F{ zpx)M;_ua|$V%#t;(~UHB3`Vac!gw2P=%S{EA)~i_x|y*!Q{j#oWSUd!>*8|5HJ1TL zvb+vn0WIoJy9qOQ3S%Jild8?&$IsOove$T(Q_H%P|E;lPWoGu0QZr-w78Z_+Fr~&N ztJC&&5xc)S7|yhPROgMG+NYbx07-1_Z_I2O>Ymy8nT1Xox6LmzvjTlh7m;y4%-04# zp47nnX|>v9J3=Agt5SVw4WT-#+UV@V5YD)@AQUv$K1lVWv-PzOiPxy@Ip>s@*~`u% zgPM+=UbnqH?iWA}JS~PU^K4w0o_R*9*iG(?u|i0gpGs1iAF{BUObhG&@V4v5zcuD*cziX(0R|-_)#l=V|()@X5vbukM*i8)!VtsaA7VjseTpMj`P7|R&}kw zk$o0HZg-0NcU?+7YC2e4)#jvOI5j141G)MH1DneVo5%>GLN@hT!Roq}xJ1m1?Mkb( z7F;(B;Gf(CEo7{0iT28w&Sh2YOvQwl=A7!GPpckPD5$M(tx#coORiSur#-o*6miT_ z@s@hc`AiTt=J@PloNl(sXz%i!=4?*P^r92Jy~}zy>&mdd)ySq*L4vqgRL5ClWN0L8 zcq8q{W&h8NjmNM$VX0Va3uKKPy7z@MlS6rm)KQ6ck)$juo_ldt#v#0mF$K2fxe$`s z@L2AKmEP8xQfsm15_1-dM)UqXdT|)a21WvSetbyZs(PGwj>|)-t@ZDgfeD&(s*j35 zHD-c+?DUje0FTiJU9%R?j{(lbK z1N=EM{hxs+fct|tA-|sn`+(&6uOP4g33xO31MpOE75EM^`&WQu`Bkt3dkToq;hK;#kMagQV3 z-opHmUEb?59v9iv-ghYeab33WjmR^5BhA9(_qILl%)<(8h1{DDu9*8lPaczQ?}mXh z&xKT4h*4N8i-19Ja4XpJ$9eq-e_nT;!fa>)r`PcSz-KQ%rCN;=m}M;Pp`L{ctWggm zl)b3JYeiS1sJYm2Hs47*W{)$DznVPD-Vt}N4)vm%++@+$IKe)}#Uu*uZbaBQe`-kk z(ajYB+d_{#AW=zk-u)$(WM&o@_t9 z4qAEeLh-CPkTE7_QUMX-KeiW>TmsSZ=%e zrC{`iZm3@45^6teU;VlE=^!||HyQ{lj%eeVY>=uJ*)H7D-{CW?WeoOPRQDrG=~?Q2 zLKXGBp}kXAf4~>HtF$x(TsJfD=ZGqTK;oU+jV>AnNo_|j>V?So_@1V(TX`YY;K}~d z(rn{tcL*=8j?)6m_;+ELQOOO4OG=BH7H4o$O``>;C{o3*QLGM(59ZNq_MMpNd~k_h zCIzRapPiWGWG^a5^edr}^(k8vY_=*ZIaRSI|Mz&axw)IG(0X$dm#O;uBL)`v%g;IO z4T_g2{mUF4Wr_C7U7ABTE}xp`CIxj7V#yRougs~;x=C#+>PqKdzc4HRC!I@}n}|FU6htEJX1=`=cy6;VCCfzEDjk#aa~CU`n)OT>31 zDUd_q&S+n-=`&-eW$--%i;$hslyNjQhbv{A?KUvTL_0iPm~&+ZlKuZt$ozjM85a3} z+{^zTM&|!-U=ADy*MP4g`@bKo0NnvN0e&3(4f6kAfj5CSf?YuN{^x?{0NMLrj}BlN zEPZ54bP5EBGe*fPV#V11wipP&PHKX?OpJy6WR-vFy%7q}Fh1MUF+5*@(@!E?c*!B(&Z zd<(t6{{SxliX}J>IzWB^??)f-B=AI_GX-}AH$l^n0nzpYqVKKx3BzS4_diuc6z>*e zqAMy{caT*a8~-8z57)3@ki*;Pobv~Zr8OnIB3Uyj0i1fc9c2C;_FlQZab1iZSr7-O zlI+kZ`-(}qViM19Td|d8XpNdqDzGkbgL|pU+a~*EmDk83y?NfK#-_HZfNIEvoXpFZmno{&hr$&9budQZDnDlKew^m%5L%Y z(Clh^>UDHvN_G6cYPM|@r7Od#TD5W*iQ>rlkTD-i+Z-itCf!EXu3DW$sm4NWeAC4Q z9#Xd>Rpv2tcATTKs#$Uh5Y^D?l)LHD=aAac?czYVyrMG&+p~Fn8FOfG1JieoGYQ3z zo0tfLcY=rw<4nNqItadfqRU|g?i9^yq=-DoROo{_Is?SASN#t`)|sY6|p#iY_~Ye>20>Oi0h+it<5 zX-G{+J7ERH=R3oVQjj9}0@~8B!X0j)KE!99+o(xhbFJq5?ELl4&!ec6-t4DaB6)3YV|5Bau z)x!~>Y)-8A`YB-9s#z|IeVJdZT}FeJ-Uy!!&MyJiTDOXt)`zh-?iS!yNT+YpthFaB z&#ZoKKBpCrsaVo35p3<%*k%obS<$vs7exz{TOl#V8Jd-~n!#w3n~7knotZD(7&c`W z(;y$|X*Ou$YH2UliP}(h_cv35Ry)*g*Uu{EtVOJ;(JL-zo^TVXA5vj8%B285X`nIb z>^hs9Q)o8%cCs<0;Vd$0p_hXRs!^u4tuTqs&P{^n**R&gK}~dYTY%ghmY|4bq!93E zBvUKaNu|^!>W;8gq|GBxGaNxjEL)vhnPuCynwdGg=;AgeqO9GiLM%Isr!*9?>{{kl zeLD^8>XNyIu>QKxq%|v*8KVw{#|;)a7gyZhYh$p z7b&`16EX6Iul#q`Ka&3up<8#7e2VQ9s_b|90TE;5U%je;xb`crh~jVeo_C z>B#I40gCT`3Ty!zzz+k(^}7*BHve((7Ub_$a1_jdE5SM7Z;`=I1I6|GIPH5WSOR;% zg+OioQ4l|YN-UT`cMB758-?8zR`;&Z8}&WXUiMP9cA;#msdrD7So)w|nrmVeTcJ+j zJ4-%lik2^~^Cd=*orU)2`w41t{p=Jse^?t*z3fiBl2geFsBRHtnmtW3u4q}7LU#nE zR4y<~^?J4Z$~MP9Wv@5`e%ULQe>E3s=EK{lJnP{RcdBWhu5_lxgEqO+87YYTyUA2G zh`~~t`s&59X8L8!wzJ6H!xSUGz=9;fMjWn{&X{P5sWRrzXNhsKNDXj)iX1FHT8kpL zF_~`}8-jch(jg+_4%u&fY_9qN5Eq9G+nSEJLY)R@vlfW-oAtVUaLEj2^vf!2IT3e zP^c>5c-{&{h3`f>xZtWC<>!}Qim@WrqxwS>L=w_%P_@!%;9!Y`PFg{l%C6$H_B28Z zUAr^r9_=E8qSu_@h7QxMY}-27pO~Nm{MFBvK9-sZ??PTuGU!v<9W6)ZBFG?e3ltS7 zM(DVj=&u@1N}~2IC$RH>)tKAo!?{4O-mJGw8F}J7{%jPZqH!T~j~E`gT!+_^_t!BM zpFGIrv|`6k>RQf?C56=a*74TSs9dXEt&LI21}VN8Rl4R`&8~_RtrpOwK3c3?YE~BN z^ih$whNiWpjPnl~6*!1Ow;di&4wxiotG3XvgAK_e;<`2t(WHdJf+E~;EYmeDto2mn zP;7LID;qJv;uy3mKXi<=6$%UAkukXWI%KCfs;kL0z)dSqE7V4kmQGDsjRuVx#FZ0^ zap-7hN@cjJ14Zl8hnGVr83^3Q2?ud)I0R~kWVIAibV7=9;m|1-coumOAp+5gMnW8mH3IpEpgvEbq0p5Tk<1zrU9ft%0+oB$Vt zPooR?6wsM~Rd6ZT2|kTJ;N9R5_*--VuLCauzYU}#xC{6-^a5S*SLg)Z1#Se-2D3nV zf)}GBcsx*Cz%Qa3crwr(00+U>&<%VVoB%y=KKQ_GTCG0?&jR-W_XeLuKcHBEx(l!e zE(D)|9#%)KcQ`$u(g(bJ?{< z3cO)H$Z|iAenYkA3aK#lQx7tt|+?44Gjh%g{gUNSR^>r~yS zxc0ssWpV57v6NwthHJQ(tF9*3kbbpMD^>A0vJ#fcm9E_mm-UeM(;M=>%24O3oR-KE zJ{+h(ZH?8(BEt8A!MhjD*`0!bnKS-~LFgNl)lS6?s~QKtf<}eNNNHS`gEypc^S2f1 zuWw>3*+r*vy)hq1H}NJW@~)qlsJye+R$~{ozAvX8R%$O5hO!haH8G{pMNJFFERw0% z(&V)&Km-Q|(5kUH7S zG&(xrXLq_c&5Km`tfYJnq}(^O_d}1}egu{~TGL>p2BMDXE3#;vl#?%qi%3`y_n-S}VlDwp0>v}T{RpL4plBycP`~NHc z{{hLk$p4plIrm0n{wIMaf;RXh^1b{3X26}n?;_*>8h8OX4ekiOi>&`0pf~_816#m- zK^y2kfOmjrfpKsdP`tl;0^JR86Hr`$CxRaMCuIK>a1S6~fj>b9@CvXEya&1eu|Tl^ zroaQi`QRSlv*-a{26Q*zL&492PoN8UGk6*}4Q>nm99_Wgg2#jVgSVpxcm;SOxIOp= zI)K-KXMmppZ-YLf(N&{$0qThA$L_f>vk5%NNK>D)J4ExqiT*rn3(H5~(R?`gcE7=# zs#4FvpJ(9&PQ3E+VmZZKmb9F!pWUo~+Eza?ss=kO{^^)$-*AlLq~*-1cIjRpT6fO+ zGV&9IYHfG;AC5|#LF~A0My^{+|bKJ44Mr_8R ziXDCB)FOUf?pljhAc?-+W!US^j_O)Ra8#=!PGM-i%6n``r7B0;m9$!c(&2@%QB9Dq zoK_>Qot4cnarMeppIQ%$jUMagXzZPXz~orG>R7M!J)=lhud1l<$$a+0PiH|38{}2* zmKs5_sW57?RPzbViX3ty-OA>u4Q@P^H($w}sH|5?>B~)X%7$Z^dUO-eOrSEYw=MNB z!(!w#Y240XP~tp?^K9ZG>|BKuq4?P>aCs1pMe&a^edygfy}}dv9d)K@Rn7WJMz$Vv z+;xfC&-8I0^%m-FP3uR@XRqc!I0SE70diKT)+P+k$5Bq`HXv76VpD7+M@v_?o%!sS z@>VEH--O9>nglWDW%YNQE?RRYDH^3|wr6?a<#o_Qwz_c5d{_TS9UN*zM;B#Ey|UG2 zeeLIM^`%s~jOd_@9;vVD?40#l*;`^`uV6C2PE_a<>S1(ST zxVh3@bhKQ%)T?gp1%Iwq7z*LLN0M2_*?=40ed6?5u0JL2o7Smv0LauE-;+Wl{!u+_ zS-z*z>n+)#w;Sw?l=ERN)meT#ROx9z>CIUpSFbn-ruV<%Ryw0># zrTi1A(8Wo6fD^Yuq~J2b-O9S9YS}0p{vdKipnTUox4{0m;EkYeD=#iv|BW!on0C5I zw}I77T$y{$Q8461F~nxi^1oK>P~};173zIMW1aKS;-|H5nQO+5b+jpSQ!OeS3$Ym6 zCDm1z`k>OUNEQ}@g*AK{7o90s&h2B0ZVgv+M2uFd@(>zl^|@}yB|T&Eo^S_O&5IFA zMizcg=ufVjNB?DK(CiSBGco$~&C9^p{@EL}SEM{F+p;9TAyWlz6eU6{$0qs|AQNXj z*Bal}5|P4(l|n(rBc*#H5Mxi`mNaveQF~Z!4V72f<&R`n{l%v`qTm;pbx^(RYh{x4 zi5Q7_?a{pz0&2SM%pRAcA|HOLn?}KLR|9UqaVu7sirwRbQ_G!=*V+012O{%7&&&Tm z@8$o$Lgs%5cqHh8?;-Pl2>da4FHp??XMls?KH%-h|Gx|_2X_Z|1D`~8=fQP|F z;6iW?_yN!YZ$tin4R|(qEI0)Y0Qm>p3w#@0z}JCd2hIYWBX}+PfY*S>fj+n=_z`d; zx&g%uybpLYx`1B-KM%fy4&c|pqrm0hG9X`p=Yi(}`3zhMegymb5=?S5%`YJbb90rPDMG%MY| zmb=(FPB0@=2?W`)iv(g)G@ispu54RXvgz+Ln^MWvMMMa~C9=y`pnd1`%$P5E%jxRK zIJ4!ncFZ$l+a+h%=G@T_oAjq$_)zW2Q2TN*tEMVCoB`n~pu4>_E;3#2%^_E{uyP*g zM$Myf1O}qVQB!s8_oQiel0(&g>#^(bV>3&nV{gP$_<~Dgb(N&%_2u79&j_vq)0E!a zS%&VQR=XT+I5g-A)D2Ka-Qfqc-AID|u>0yFxr)a@uV$`0X^-}Ax>Ddjb`vPhU=#1ccMfh4KheMlT%W~8r1W&VK)HPwqtx2er;6iTq|Esfv&E&bcQ zVTs}hvcI}33*H~yyz9?DZ{wQUCgsO`ugzb}56?*RZXag@Uf_Z#u?bn>-D$B8G6=e4 zR_L||rRl58!zX)>N@E+ZHoo3oO#!HqR){XAu}t>Qj<<&VH%QZK{=ddTU3PHWgXGu= z^R$94*f?J|T9lo1lGOGj+}gf@;+U|EaWu1i*}?PAldk9J!ooCM$HMASZVd?~gzpXQ zD|YU>a(m`&w#>CGI<=3onp?B2EbmY5$wiq61RNr91JhE-Po6-dZ6hN0+~N#h6lJ~Q zZ@7&Y*Axx~b_%{uZAaWrURWsjha^kLo zS9dn9FI$@B*}193)w1%s4t9ylYsGB)a!{lrHQ%%p z+4=c;?(!btB1{zx^B(K1Y2&u4P|+bfY_`#6U1Eod88+r%oCIf2*M_w(rLo1+>S1fF z&(yyePEyjSo{jlTupSZ&)J85&83d8(PR%VW=lf&6UEw~@rRWLaNm=?H(?ui ze>&VW&wPYR>bV8te@DuZ=iy_kD+;}5-C*nzOhasvtAwfr?c!@zO|QD0TUc1kc5!<& zqf04z?&!F7$~olJK;V>1n~x=TZl~rlr64-HXgJc( zaWe0|cAhfUQMqs6!ZmG?&K(PH0J|zW9pTgQL+`^fUDiu)XLU=j?9eF+cf7u(MENHv zn_`+y{MVYwCgl`ULxzK)oRmkoUWKA-5-?Xz^feYZ8DbVUU*Fu(MmL6`y2V=Mh3)0l zMXoV~QLor2R7fDc>!-LhH{Zrb;-;EQjn_obv>@_gYQC?PIQ5ibL&~}AtvX$@qN*mL zGCVz}G3q5c`(~cNRtxHl*RL29*pa8r8f^SQFoeo_ApieM5kVj8 z<^RjQIQn{I{;R<_xIg$fGX70q5{!X+fG;ECe+fJXJPPO@fG;A`zY$yx?gGAm4F6W} zTp-#025?{SugLMg1*X8aklo(`PJ^q!Mj+q+JA;2hmcI$S0VqzuBDfTM7rFjB;2q!% z;7Q;rupji0?;i-hf;|5c@Eo8s0hfTUBg=mlycApyWc&YfWcW#-Sbm>Fe*aCNxc?L2 zPT)tui=oepfar8ewEDh3>KfZB+^Dq0Q^PO2-lC;6`gpb)PKwLJw1~pL;Ck1Cy@NR= zCV3Zz0xm>CWM?Bs?}nw^R!ixYW;yeuiIje0JZja{&-WKArCq?TjZG1;!Uog?XOxPA`Vqtvzvq0mBp5o5k1weFOk!+@+SAYl%B9{hQm+?p>h1a(Juabb^#m+2TbpyQG;zbZSGM~&8Q{xm zL0e6tE$PJy>L?(Ur~elQ_#t(~=HKunxEf@vlIq`(QtfP}KzB(u zYcVpTbVkxtRjQduU%JfHw||p6@17NH0|j1^wFOH^DMtj2m*S**>^vog>NN@<_bMm$%`g1eNoRiQl>$HGwXkTjf zCoP#_%F&K>;mt{nnNSk3N!$w6-H!v0MAe@2@!EWmX8*-oM(5b4>#K0#k`YRxQb964#}*7bGVi*q&v%>h!j4 zKc+~kY#dyj5vrf6gH%=9VbVxV6A))4vW^*K4J|@P*`Ug<&C%lS-m=ZMAKPiy+4S7v z>aMr#B#MM;>U)w~;*ZC2!xKrGJ*t%H%Tnt?B7M8TLDXFtUKMG#WB;#D?Q%N#*SJN7 zVEo>n%#DwwY*kbNs)=Q(+AyY$4iqvZ*OsW);n+i{-;I?Fd{L}c?x<5L&@abJ9ZBLY zmW>Hu*`l4ExC?LgS;QMJjuq;4+UM2i4WO>w(0 zh0|xcxXAjZZ-)a|=+T_9-Y&HdU3EF!OMg1u{FV*4SgWkT+Jev*GSYNLW)|NTb}H#r z1Nr3<&0^{cbb#}g|DgJw2=qJj!rsID1)B; z>lmzGu0lRAZ)!r~;5yd~4)N(-TUE?k88wz(3p@1^)9UDe5X$N%lw7v&+rF+erdRS= zG2JHz;uu_B7SkX#R*J1Kb5r$$Qk=YFfqu2n_~01Yl1{VOWKB9SVuY3F1tcPn)-UWP zzGcd>Fybl$EO?C=D^N3NdVA8@n@Me)dWB=%F5GN~24w!{fos8L z@B!rgcY;3nHgf(;!Arnnz#@>n|0luMkon&QZUm18KLP#*d0%J$-w0&qp98Y*KLq?F zcqcl5)8L-q9^jvm``-X`7hnr~4|)IH;Fo}60c-~U2RZ+_;0XAy;J<)3(U5)M?~wCf z0iF%+2i}N`uULT(2KNT{0`EuGp9QxAFGs$A8F&n6f$u?!cY}9|VNaeAg9jC);>- zZ|=Noe9xiY?w#X1_Z`@|d(WYU*HwXRxVpK@CDXGvcqMLB%=MTj=(Punc#00Db9TdC zDwLe@rL-i5ma@xa<;DDxMzIxAhHtUWy~G!WMY-y?Y@rlVGGtWOf?7@Mb&4gpwbV`p z!A5q67U9#KO&M9E1Ci~gS{X@QIKK?ZaT2J$T5mu#MW9ho+sePOYsF?dj_ON8MhBV} zj;XDdrx-)u=-V}8Y5pkZ`NjXZyB=G4lKkIG1N4!I^s|$*bF(X_@KWec_u|{GvFY!6 zTa_1s$hBs6`UbmB0lVx_9@wY!mq<9vZne&hO6R=RcCO4e5A8u%G&)XEFdzbm|Yc${@s;_l0qC0QxF6z5uH7c+tL zY0R+3UKD2gG& z)bquXsQ+K%rL*CcI;nvwS{0l2yfF>REk+5vh_c+$V0EmOZGai4EL!VhuMF7$?c{aX z09|tTe^GguM51wk#$8!~c1ucalq7GK&dQ z$7#Uk7fxn|@em2nNe#V0e~H!3I~~OQvN>a3>s`}2kV{OVk}Xx*9h>&r?`MDIW(i4c zC}rYS$Ik4xx)A2yL^dM#F!xM+D`Y2img2Z22auAgX+0<>s72+q`B7~m8HT}8S*Kmf z(IQ$}mSa6Cn_-rzJPdTtj0L>W0QO3vUiCrFl0{y(*p8e_L)Lp%*&z#D)Kq2Zy%Nk` zb8MXVlE~)XSavb2u<$w z_?KLTdOch^35Mw0WJn=8Wn@ly#F=MLvu*Df7RRL+2XwiU4x?zDmA_4A>}nJkSUQ3y zNBd`%(Gg|E#3yNldO0yH%TR48nG;cKuo}DGP|o6nY$nFAD)2pChj06jGb>ObO=o$I zcBR;n{a-%+zahC6`Tt5U-To0W|Mfs;|Nj)3|II-D{#(F}$ocOCH-MAi5ZD0D0gC%~ z3Oo?}D0m6-{nNl+@T1_4;HAj;4+cL3ZU;X4!{`XW?}6V1y5H|!;9JP~9|P|P?*rrD z0`Nfa&yx4S)4(r-Huy5K|9=Pn4>%4U4SoiE938;#gU5rbLAdYlj{I9b{~tjg@MLf~ z_;K)IbOIj&&jZf|2f(+`3%mgQJh(5oJ@_FYy8U|833%e&5?@-~!-u6xOJ3u?LAPh4 zC`}y;Y=an-Jsh^yW-DGOQQysW4?StDh^{;vA{AO_FZYs-Q5%og)U@x())gN4k)HMn z6&60Q2r`SY0u{SA%O84EMa1Gb{`6+c+BATe7-hIo4AZEBk1I|q^oD(>E&haewT`wv7?3B2y3d(NZp3~4idxGN#dl^a!CdC(xYm?oYUMsCm z4doapk~z)P1#~5J23s6h9*u=pn6*@` z{F7zHwq=2&&6-L!#1JlqcFeOrE|p_5t@Okzvh(->#(9JjEcFKJEq(GeQ0FUlc+E!h z8*Y4irF-p2M9Xr_{#TjMTCqZnaxP}OM4H7@OS4Cht%N8VN8rTqtGs$X#)994Lu`L( zc6nj`x@PjI;243Bds_#&e{EJP+x=t03Ih{=&um)Z-O!vARk9X@GYffUi%r7<6(%xW zkEGKInTPKU?FoLK5PkYnD=wrBQ&&s*B2p8+2`tekeF!kjJaeo5PA&}|1#Mg*RDl_6 zV6)(JPVlNN`)CbiDX!D5d006KLzy-Yg{b!RJ`=Tus6jE&(2162Zb-tMu1sdHw6X>U zq2erE6H;p&+4BXpR&(f2swj`fe>lOkmV5*Ey&yTLhup~lt-@L_GvD!sm$YWMmSMX) ziTus6;05kdQ8Vpquv^SyL+QJyH<-qj@2;!UwU#NuB_uCC?h3z{fDK@<=x2s82*YMF zdR}y~N6oXysTll7!|&O~Vto{;Tw5M8R^Km6_|&OK@jhSf=|EKTfXmj&N_sk@@=L2| zrT?RTkTaw@z8z~PC$4k9PohAZmmD4lSwJ6L<0Ts$*U>TD2-VoK?(y(m$UFb8q@~_T zxXII<=r0nX7Nv~M$SQeo2Eq*8P40Vjda)8!%VLg4N&l?Z)4X!}w|LN^%Yql0^f*%W zgbQWugn&s@9Em~4)T-vBzq6io5_i`fFUYh!;We_ZstX#H&}!wvqTu#Co9mrwHVwR9 ztXsp7J?)gTwzx?dX{_=v?QEH(5|BoTBPI>wN3NO3L z|9=wP9{esc|3kq4M81D9m;>j5djk3V{|$2f3xVzcTmc9ccfc`l4!AG)0q`kg{g;EIU?ccjWc*(NPXGtN zzai7h_Wvv}1ug^+0=EM%M85BW&EU@951!*B9%BD5DIT6Eh+L8 z9MbFpJCZ5Cy^v#T2rHT?d{i6rgsg~ks*D1(pgG@;_{CR~8LYl;D?hg>HcuASt%nTG z5%1;Z6yCsX!?K>jh*Z5L!7a9)@h+o3iOQ}%6%rTHgB$+Ed3!Y^uPj?#W7(F5Qe?>N z6m`!#)VH`!FLSYs%0Bi?44yX>-kR}Yvwl#o%*+zaJR3eWw}}R;u~`u;^p72^%~jU+ zXyc|wwnPJc%=C)NpWD6W7fc1-(c_jqc|*}H1_ExmQ$vRiguyJ2)0zEx*+ z8TN9qMT;Kcvz1p$|87`(^>dcD2uu}&Lw?~JAj@Z# zH2Dljqfd2MBps!9zxZ^V7&jK`Xo# zzBA2H)JepDm~!G~>ZV=UjbkThVi_y3sQ8aiW~f-L#{c;$9D{4k8Cy^q z=1Z2>`sYfmMqwmGjn4mlHK!4kM6Jfah)gvni}qy{xAS<|gz!&bAh0&NrbR ztzuXPb!t>L{$X6<+3UvxnZq~w9N0?2+tiJF-0ILp-zaTlad64qF+-Iw1sz^lSlnx8 z737W83b}@F-DolGFC2>NMPUkb`}^`1+o}nw;`9?7y_PZ?oi)lUyJ)B(Yx7e&)?ZwN z?9x1`A?KK}@*FExN-vIl$&7WPB1ugK0$o-1xw+-AC z{1dYOi@+A}Z3n@n_*YgJ0=xHo>2dm@E!B5$lvkz z1~Zi2r63LbthP0&wz*(?BS>vAQa3SSQYdkm>?<19daKu_X8j#iJpVl40msn`93@QJ zI%^dz2%FmB-4Mp1t_#Ua(!YK5n;mm@6PoGw%^Km&!71-n_09b6*fDO%4Nd!>hCcsM zgb|Sqq9%s5nW z4P5^BrB4l`(2L`@4mU_FsJZ@>EOyogXGC@!ElvZ9kWDod&B;uBxF8Ve0OKF=!7bE6 z`@n(yidZ1Su|+3y1o2MJ452AvPIGX(pl-NUs${1b5Aa<}ocvMl@df9qFYmZC>XZS) zkdrXnMya?L%d1lacU_)YRY<#77t3Ewlkc040;EBcx-y@RVg5RDmzCJi-pOI(+4;?j z5Q`(biVNnPDvEn-!Y$}agBjU!@wf7x8m=7IvgDw*p}kAKcgDizDPV(r(7=goX|x(m z=!86{mm$>B>@=4Qsf4r&sj-TM)J;vy=l89s0oG)YORT=bCj~oolWyDsE?u|1%^ixE z{gEB|bfT2_{_KRfaGQG0+OBS!ou8h)es)?nP^#;6$cgcIJ=SG8DJNR`C>|BTv&wwM z>(uv{k27baaZsPdTodfpDZ@%Q*`3pU0xMiE$2r&g8P#2&6YAwXr)h)SEOGkaRQcrx zuZ9OCbY#{i5K7)lgpqX(ojjFXk+m$gHIUtCk$|SH#8oT{d1b@FE7+^e+-{*5Ew0G0 zK1c(^hS)GA)HOTz_1wYNo%{A}OfQ8lu5V3x(jd*7;m7}eM1PpU&IMe^0*-`y^|fsOS-D=-uR1a7hW^Smt2%qJI;o?I^4YYZ%}j4Rhr#>^Mkt(fpn2t} zgj!T9y{LSp;PTm7l_~rGCIs1sNS;Ohzr+i*Z%6ij40tH`1oHkLfL{SW1LXVva_|(O zv;WtCtHBuf4zm7Vf?otW1Mqod`wxRBfJ=bx`g;PnJ$OAbeIHy5zK%@)W}p~vZBF68=2pfmq3q5Y!4&(-95Pmm#B>Z|<2%_EU>ZM(QL+7jR~I9i5gh{#v`+n5!O zv14dul{8r$n zq`Z7;e#*8@c?>G{suTTJ+Jx=yvu#XCjGmTz=yIFNvz_>p!}^5_0|~sPgK7PS+z*}_ zm*Kj-d-p*&rpeXWIU-=kP0}L7Y2T(d;L%}3Tqi;8vJf3SbceT#GFX=~hNd1P)aV`c z7){Y{6AxGysJ%A;e4PK+-0Wm`o}JYH8LL7yrl%Jj8CgJ7U>1`ptD%|piy%_F*@_SP zd;hX!w95Ik`Djo%x08ONAf%gB*l-=Ogg|D%pUFo3fd0WR!eW(g=YC%Cy%Q7lUDoILZ-(=@6SyQec|*P@tu^!lx>VWw#1AuF{2@ z)ZGHR8=1-)2iXc)>pL&F-8t-Unmlr;WRQ4YL0pvBl#oZ5A9nxS>Y}mkHlP)NWu--`C{Uzvh-xv6Iu%!<3m%sR+fOqjbgAP8 znHK8O#QanUo5$xa*!A>AUiIw+P=CzofGA3yRwy0Y)e|>6en^HMENyXvw^`ZJq(x{>S@F8?Oh>kSj4CE)LW;RMaFmhXb~J5d4ymS$7dIVjI2?)G{7&%FYgv!Lc1>NomR{+ z`$w-ot)t?>^wp?@%2>wwaz>k~TBIHA%4Kz-JoAIr=E`kp#OKo4wH?wVxiEzBM<1cq zNA;Coz_!M^SA45GLvv~hkh*{GRh6^&!)tj~A#?Mkt#f2sC4TBEeE3Bbjyn`gv^3+S z3mydb^+Er7b)jyYaFV4MY}6-nl2Np<%rBGKK8*m_j&Qj<#F*q8g7NMyA0Ktatcn`jgXR=1Cd{%#lKIWt zG=Uy=S-LI4=uP+A==oq#U01ByDR@)Hru~6Q-5qkxt4ff6tSujg$jH%jVT2yp}-e4mUZcYw#^#siro7%Ru;Zep&t2W_N&I(3T^8(IJ z@8dCcX3jaJMd`Gy0XoQYbdDRw-OkY2ct)dqY`S|+!^*j?w&{Z8VCJA@+s*!eC9>_W zdHH{zmvdi)-2YfG0e%3y6uJMI;89>7_y)56*TH+id%*93d2j+;1a1$02>cE@fLDQ4 zumju=d=@>xyTEgR;sN%-x!|qn1fC9Nzz*;c^a7{Aqres5LEt^;1s)FmH+q2I2M+_+ zf-fWczYE+5mcadh&iwx#xE9<6d=eeNtH6ceUy=X60d4}X2EPv;4fcZDfDfP((7k@o z08a;3gK=;%I2YUzd;*%i4TyFx5dChsj|R|a8qD#!SmPsg(^usPtSzxabh=KRYr(pDb|!UW$&LrwWXp6or8RH==yjqD zQr{V+1&xoL`ZK$NGs`n3TCPlgreckVtxzd_Z)jibDiXzN1sQi!BS7LF=nYeS$P3qTH-H zSrkTH&Yhlc-)_t3>i;M5rl&pLD?AF^$cEU-0FhFYSB29mCP1J>lPy%G2g{|E!9@V3 z6A9K&sX0zA&90!BX=B;S3+-aKbkq)6BlA zB0{~0Ci$R#8AkoRl5V3Xs?%F_>X}s76GlG-#eis1nosVHgLcD0e#MDAwdimyx!JV~ zlKq``1v#Whsz<2c*D{1bwIcQ`#L4_aTBV6HCPOn%EL%Ply!gUpJEy+DHYHq7ue;`*b*BerJ5n$BJsE9!3tY`)Jy%wg*aa5E28$mHUr=&8 zj}+a#fmxzlIRZiT9l4FJMutC>*UD3be|zuKaiWr$>uFhukV@Z8dCG*T_%9rb-ywBt zIoiY9H5V#9P^)5=nl>4i5`V9_e?rM+_*KjAR zzmOVRC`nt&K75QeLl|6dB`-MK@SbU>ygf+wMYqC(yc(n2WYQbqO`jWt@VO9Z=nG*Sjw9L)ND*Fr_!=mE)v&Rm=)<*$%UBg_@O_hF{0DMf9&mcP92EPPZ|BwH2YQDee$}ptEXZQ0xq4`_imA@@&%hk}K1_+u~M{|Mi& zMV>zZwt}AqHzL;k?sEsyb?SP%mexR9|9MG?;+>^AMg?II`Bj=4)%cW zBJ+O-ybt^_m;kp2HzEJO0-O*25n2CLU>^J%GX1N-E5RAi20H(*`~N-)ef|c#20Rsr zUJnPuKcQd4cQ$Y03GZ;0yA=|J@212_0#_oUb_BWWq<3M z+fiEbMvKSLEW-8@HYban?>OHqcz&ry-`c1-FDa~cV6!Z9%L#HB@o9n|=VmeS^* zDwQf^Tf@dsrCyoaD9Sb37aCu`b*WZZaohzuMmZS+oQF*-Cs*LsTo*QJ?DNzLexX{SzaZ&Hx?II%9g64Po0IUyz1Ym@Nktg1odX#n%vv1#JM$dZEJ9Bw*t53I>!C~ z*7P*IHN6ZQt^49n&4RD%;)8<0te0o$VaP?Jw~}`u7U^5fvv8f3$tWl{`4N>;{@G5| za*M1j)7tP<`uFK%sF9l_rl1OS>z;GDS%+ROEo{~Or@msHZDCb)(D1SX!PeFQwcLDF z*eyFCXDppoUhdACT39?aVnYo|YE2FIc9oSv?b}D$IUVCgn*#zldkOnJh|3o}_K(qpV>)w}W4XJsf zrzLLM6BDi%tVRT~h{3H#N|r_bKi~!04Z@?cold8cqsTLa{sG=d;mJ&?%-|6{*MQ{!JWaKz(>&mycG1n zPk~Q(eZYY4hkzFN9=d?%fg|9H=m5R|J^&s8Wc%L){t4OtBVZQX8~hpa{s+LbfzAR* z-hU;~J%DrIIMDh3p9XKB!H)+E;I`m{$o_u;UJh1)&IcR>iu->F*b2Uf4&Wy6m*97S z?hBj+$AJ6+9su3~4ULYThM_~x>$cn1%=X)T1`nnq9X_0!laG7oDt{j~!@@a2(xuK_ zG6c?>n1DI7Yh}$HDUFqFWPPm!k>PKc%d8E1H)LU|{n0h=zR%F`ky|O=OppEc9EVwL zz`1%TQbZh|%mPd^il)1AaslUkF3piT&W}?i=1NLLvqzT=K;j0sP z04y3Dm!mG)3tUq(p`s+r5XayLB$l^+D90|6X8ql~Eu~UX?mn>K8X<$qKD*1<-D@o4 zN_NUi8-sgY9>Ts6okS$>?wY1riWQs^X@F!qOxkswV%5SlB>p+? zIjex|p#XhD`qDgw0|#q(>fJEVZFOH{G%-1Xxy%#2q1%UO|DBkA>CRnO4w2JuIKgdH zOUEr#dqvz*ZF9*$nra;g6ggJsmlUCSnsA0o?4~|QC?}t}C&Tp0a>v%KVhUQ4))-}} zkIA|18|HM-M~sYOYOot*f@j-lNWc!bR1s-6|9KemBzdN$ZyMNl{ye{WBXZJAoa z8IWTg8fGWp33Tc>aT2^p^w+6U=~U;5MVxHbR9Bryr%GeXCRDp-?Zv2)9gB$>oAQGi zg>lI-m0kPFX)Mo~jT&oF*KJFktoFRb0;vW&9bkMjCyb{=w*&dxvC%XB=-2u`|K?Qr5z<#14< zZZ;hiP=9V^b`fV^99OcM+6nKCgoy7X&b2`WSm`B4lri%~8WU+M&s%}y;oHInx(gf> z(85|=u6!Y7B_}J10!}-ptkbh+IL2a47VTye?UFB$a}w#TtP-~&-!}Bt#*OuG8zyCI ziy8eQfk_+AZP?{Nh`^BCmI$~x&$lQ7fGPXzw({92S8H7@R?fsV8lFu_HK1#vVL9oP z17o{c2-i!Bv(IG&r7A#gAxI)=+Inp?RW25zEN{iS`l*Y5cduhLbQ7XpMHT3U#H^~W ze))8-zp1Wv^^?z6v)84bH?*)aoES+NK}WkW)gr01)PnG-FQ7IQY)ZJw;v^Zk4H!Y) zJ;_4GIll^BXQ!cv07%`R4Ln!DC3|yKD&x%M?;4U zp-`lb8f3I<1uBoA#_2QVz_c0;VV%6=MB<(D{5P~O4F`)4GCtRzt7FLOM$)<_M6?KF zK^IGuWvDV0`i4%cMmSeS`I_bhsbYt;g}j_Z4HF|PFm3fd%*Q6P|Fbu?-s$E4M|j!w zd&v7w1`h_?!F|BJ!M`Bu|1_qr1vY_iA^%G!@P6=YApd~-0o@b$ z7w7>V2QCKRMF;R5ARmG2!74ZeE(YH~7ohtB{vNyp{5F^XJ+KR03hoW=1?~dgi@xBg zK(PYffsSAj{0q8)UjWB|?hCjEd=34;SHWk%Tfy_eQ^6y^!@>VXM{pzf1)#eEzl4t9 zMc}u<^-kz2po$2PH?2RF{{<%k>e#ye<qf^&rCV2s|LkrHhIA6mRaJn6b%!^QE5L8z8R75MZ(n**4x#or9|PQEvcGi zTmUUN^RUar4<6dHd+)AsrerH*rfz z+scZw+vK-h2d~bAN58=ElnW+e!66JXKTD`J!7%=#a(PrnC9e-DE`=KpF|V^iV;C)y zBTW%x!Y3+@6N{-9^>vmygn*_9l(I}{wIv42&N;ZV=~GbI&4QFLxvORsm9t|?V$ot< z#Jlt(nfm68X*mY1+$gvScmy|0(ZkGbW;bS*v)^YrgOS8=fq%wbn3h| zAGO-(N~L!5ZzKLolf0}zQV0@ zajzxw2E8f+PCC_Y`v8PmI!QEPMOkSNPAsm#)aah8SjM5j%IeZQ9S+Grs?2r0u~^FigA7aF`PJ6v+iF;fdQmZnE2nM@}pHgA)m!ZK3c zY(k19;zv=)$Az7z{yL6P1{Kl!^{mZ^O!shh)yLpdFcR~ViMKQ3+@l4fJ-`=@+uAjL zOb{ZZ2Me6;P^K>@CL|`N(X?@5qU@{sI$%bpD)kgRb9QD{DmObZj;B8R$%&fkyA_BN zzPH;Q)!Qe!yZvmLFz0H~D(h`ku&h_Aa=W8fod&!8xH#(+7KKcT{Quwf&OOM^v##Su zsxw06F33oQL!z7wWKWyg7D(C3HrcctyGfeeQY3`k&7QN_C7a!4&u)_tQvYxUDA>x3 z9a<=$t(VrZMS*dI0yEs?R+Qo9R4k|{UPh@vhfzBSe!jowdEV!F-uFFcH?>M<^h~}v zXU}<``|Wps+(Xmoc#C?JbWovM2V3s6-O*lIV>fKK+sRp>huUr2b=f0K!oRa--s84` z3vZ8mFuR?h#G(u2TB`W^hfmvUt@N@NlApsu-dAS0iqp(eshmfIF`7qRv8X+!YcTC@ov63Hg@Sz~=UQ+| z9aFhCKI8fS1<#A#39o+uT;=)xZ^F|n{{Jbk6TAp02EeC)?gW(I|HVMM|NG(V-wJL4 z^84Qco&z3)r~f4Q1egYG@O}99FM^MN4};r)c>OW(bns8`@*f8B6}THLfn(r>;0566 z;N$T5H-Sko0bUF)2E*Xl;2wDXo4^e~z5|QkEbvKq{Z9bh7w}H-4sacq0d4R&@GtQH zUjttS9|Io+iYuV|1?5k009*y044woYLKg5J;49!m;8(#ffeV4;1&;+k4jw=*@J;X` za67mSTm^m-JQX~Id_evPibL>v@H+5Ha4~oy(0F}1lO2Rr8Qz`R(awzC$iWu0K7#5! zJ0@l!;5oZnIkLxAI4d z@2A#gdPiApV<9BzP`=KxZKZ9h@LqdqaV6ff?RKTCMU7Bnz2`Zo!C`kP%8W&_tlj2- zdU$O<^}xdj0lF0O4cj+K^Rrb8^$I^kfGN>rmtA@F7`IgSjz-D57Pwc%peQERGm1G` z(ZwD`S}#MOf)p$2Hm$6^6-2D38s=#Ui)5Ox(fVnt1wl;)i0#3#!oi-RbdrUOAizq?ZNOnGk|DSuJT*20>`{<845Ndv11Y_5#h z|M`!;txUSf3JuohfY=9_!gj0Cot`e$1alm?jL@&R<3iNxZtR7%QXf}4vXs-{Hu-A1 z;clp>!)vQeJ!O`UTuAZg9n?o)j6x@jx$`q+ue;bn850LjN#~%hW$Dc?!$Pw1+%Q=e zh4DhI+X&g=*Uy%S)g0S}i8(Cx`Sf(?Q%4Hh0Q&U|SIBu?KSBJ#T}+a-Ls#1XrUXC-$aSX_PF(Kk1-I(!ghS< zkQBWpPb>OHq^es|qD8H1V{^Q%lP5#s_i9ceYl|z{i^Xb%UWrhQ<=5wtRK*gv+@!3M zT(2e@-KkU~uai_Qu3D2%780-6fSNXLAknp(=!))C&*)_}-XNs~xN$d$HY`!{U#w~J zr6#(~su9Pmi&@II!|<@v2l(4!P!M>h_4iN(&MS{Lv5Q?l|anR|h4$|j^8PpRV7da^pHl5ILIBFVl+ z*_u8XJ|`(yE3;_g2_AuGm9BVhgoo1ix&+lHO=Hy0JvhxypDoLcwjmd|zwm%-JgiJc zCwE<5(Qc(VC9SibXJ*bVobaNX*g;59QD=Fhv&wK~TJj=Mbr6|%NqI$n(V{l3nLcBU zwQ#~{LRUF5aQ>OQsS8RCZi#7>(-eDx*S@A&)DU~d{S>2+B(>@@|B77wPUBXvSI&lP z{cnTv75_g5&wrce|Mz?T|2y#g-v*xszY7!#a1VGPcma3@I179PIl$fEH^4iA`~^Jxy+Hl}w}W+XJ~$iP0l$6;_y+v=d%;`5Enp5v zuP;6R+rcz=5qK>40=)Of!N-*&>=-v7F}BOTD+#EmuEXAseZoBU3U@9 z&ZblaWE>u_UJie7(+Os*l0`VVkc)7IbR_N3Qc!SY!7gIST#JE6ig@avFUwR?rNoC$ zOL$?O=b5B-DbNGf>1M+0es+=-rLmJYNJ{dyhd$POb%4`fxz?+Dgy0h+nCGuvGqb9A zRrw&KPwR7QQnD7agJENbt*0-<^v|uMOp<1|*JH@Boi}l;$+)~Us3hO2WbFAylM5^` z`(I;3dAm6#?SL3i@e^LH$CVx~>!ABgBG3(rjjKZETGri>JMme)qIIg6Y;JO#enrbJ z+bBzh)aH8iiSkFEuWszM^&xkI(9e3YF_nr^mVKsdWY}C{S}p)6T&9`&HsarZwv%EO9li)aP00MIk+Q-hjP&(J~Hy2mQ}bvvLwhx&w=Ye{L~qEDFYSJy$ve@s(;tjN!PGT8D(rrqn?_ zE^$E-<~V>ws(0AdOlk%*Oao5Ele@{ z{YH>B+7Eh3mui*h!*Y!2%C?Eg(Y?DzuiUqHEbt3qeOIh_^h4bJ+GrkemX*dE#aSVL zqsKJIgD@|-fD3yNFX62o!VkypW6Uih)PE|H=I7=pyWM7^BhT5vG|jxmnZOhs85&zB zdoJ2U7D=0!6bszo83mdlk7Rp=xx{1i3P(qVe6aC$buwDcF*M|x)ul97n?pgY=xs|X zh9-RPJA%#qwRK{~%vfeOXQ7N9x~tJ?qQ(fNL$pieTU~E5={rFExSMp zlErt_ZVq05MeBsAURTW;2zOUzBAX!HfPn@Z(=M+FXK!S?M1kb=w^@%k4$wc~Jh3Vj z3l6@zJYvQf9EBBI4%o&l#FYo7Zfw8CkD*f6OH9m_&`*xNMp zp1w*Gb>v{Kcii4|e=jXAm>x25=?%G5tat(;SEoHvqH4ulhZMTkGgh^#c5z3m60n)7 zm4{2DXx>Ej{$eGTn2K$t|KAOx`gZZFiU0r2@b>fI2)Gp73qSuZa6PyToDIa=e+{gI zH6Z=}PVijt4Db;A{hx#12JZle!3>xHXM@|}`FDT^;O}MQ{{e6_cnjDL{v96wec+eD zbzm0k1iCL!djG$N=l=${2fQDg06zu(6u$qn;1+N*I3Ii$e*b=OGq@hS0lWf?fERT2i9+wUN2*|kFsEL+v}>?=dha&cDf9;tc?r{T~O>pa@sX zJwBZ>GL{vr$h(bpdY5FMZUU6)k}ji7H-EpYXJvC7TiIgl933nJUWLo#>YvtHb4fBh z<(s~es;7iT@pU78Sf9;DIrN@!{VU7(lA0kKnzXi)mz1s8s{055gV|oYN*pN}5e`M3 z(^g|6d7*ZCcA?oC+$JIw5eGDvf7*0K-7CX<_q?!8tb=O&HdgmKpAzN0F==vuG74EL)0CuKLm zVUl!$W#=sZk&JeTv7y|v9GM0QeV>jHn$cLsgd+?bCy^#72o$GCbp%gI4Up!_wrleD zr2b(+rIy9woi{GVg0Y!SHfxjVbXB6{dD8%!86N;vhq_fg$nM(^pWAEepCu;a#Uuu4w z`Y%HxH;iqO_2Aa#zNIFGK{?77N!y64sm*jPi_lafi~J^K5!Ug((PrnO7^4rA=RS|y zWl1Fy#?A0b2PU`W<8cN)GjT|@S+J^dW<2R-t+@DfA9oqQ8!Tf+Ds4JD(Ez6zCw#EW zA0MV?Q_yQNf>dqOE=sE98TlvKXck*`>}>T%@NJ1rkG)h(7+w0Zf&;28OG^ z8MXz@2G?iZJqjF$6e9wAa+~F$>dDC%BKg=cXD1O0Lk2Oye@J0k=j3hSI}BpqpHp@l zV(b^~D%I&j&kV<#wAesv9$_Xsk%O|(OJW22ko@OwLM3>DvA*8n#NBeLv&#FW#KFu@ z!J1-Sz_8qhCh-FC?cG2&V3qMsGFhF&W|iC^J8t6q(%rGg2w*FI(LJBY*u5Ph8&6Ra z?$}#cN}ZO}KgC3D4vjhXa~V5xM~#HPwSDPOFNqh53YXY-`rl{e=m010m%I)QLwcu2 z*fL#TCns#RyAxw4XsfE(lJBqdVp!i_@enAQx-+s#@mFnr?k|J`P3LSWW38b$M>n%Q z2T|4lGoZ2#jbl`FaFk*i`F+3<$4t&+&dk-4vEVp;)RJ@=<&%|Vq6csq@mj&0{H_w8 z)$VrIiq-F9cWLhE$ej4mEj$Z0;{HJgkqCr0IlN%v_UspJf)|LDwMYP_$2Ty)0=zt#scf*%I3;Y@U_zJiPd<0&6&hz7U(WWj? z+jgpbkL*k1veB!mXz!OoriRA%t`J}a#>lZc@NT*U)F^^Rzog~wdt8JZbXiV4%)HOU z^yL)k->iYeJWaMEG0uqHL97Xc6XguDg-8M7PyqAE=CX=*r@J_He0g;dtv2B?TgeWd zmQ()hFL1UBCubMcaSRRzy7QsT`lnIOrta?n?TN%#`wXmR>Ix};SFz9;?sxrXnv&*< zVYJF*SARAyfxZpD#NK5+4H zI^TLKU6;)TtxZYp;?8Z1!8J1+(^&f)tF-@<)6t#Qk zY80=JsC8M`NlZ}!AxWq;anJ57j~Us7tdb?Fd994mHHnMTiW_dJNHt)j9^1+^mFwp< z=2_RY%g8w%3Ao9yDh|N%@}dNUm+m_d=5=pi%*-eAyfdaXk?t@bEGS{Fz)^AT z?qlu1)eh9YPjfIZ!4*;hgxC5+?8@r5Ww67ZJv-F8()VQF%KBe3Jjxi_?nuX?!`hKvI%u1F z+!$*MQGr!5aBp3($Qf4xqTA*5MfNFe$V_icqwwu5nPY`^u$|8w?z!NAS=ENC)y?mh z*`|Pb-Qf$Wb6a^=+d87ER0}F)+6Wa(-|YZo@lD~`ygu9Qo5|)bzYIB7s_3i2_L1z3 zZkhGFv({gQSw8gZjos73H|#GD^W3aK;ul$5_vy&Sc-QN{s_xyy^5GHxJY+*+2 zyMf`VWOY*(#D{LwdtOfZi$1q@S5zpXiwe^TQ~%ZI^GcfQO15nEP*%Fp&ReUXm=bXT zh3c#!3gZp&BMV%d3MJ;uVsa^VCFjCe1Ch7se2kD&KnEk`LLr@nYtc9;{t@w1jqdgY z-sJKCn@Q`E8q*6e7r`Wm05vjp@Ev68$eJ!3+=eaH6c||)HD87{6-ho%AdixX3!#@4 z#Al1~|33|1e3AHL`2Ux99{G3R{m%!A^?y6K20RaJ1NX!8|0TE`EP`{vx8eK002aVJ zkZu1n!TXQ{EP#u^Wd52AAMit$&~&rd+AR1@cNOVp}8#w@E~iR(k@)(=uS8H?Hg}(+FNQ4iI5?u zawLY(>zOL0>;MA_kt zEPcTSM8wWCalg-lca$`hDuH3tJ*7K7y%eV2XgaNQ zJbBp@g7mbXdNfU#ey%w~?5jrlP`}T$-f;fQ4Kvuhm@CwAx3PQ{f^d+}pc|LXPn1Ee zXrnzY1x@JJ8JllUnu9Mqctu{j`D!bdN2W~I$TKUMO(s#QUn`H))!F&>B&uv_eHv*S z2YuEgzE<-$*)4pZY(A4Xg2bU5R9g7WqIxIwuNEGi+|QsfA$wmR7a0E9P1PtQ{cnw^ zFYpW|Z7BAU98O}^_ZiVP%F<(Ix)I9V=sk|?CeMewA$8+A?)03S999dOAy*b7@Pfq{ z;vCsxch;a)q?X*q3bUE;w-nvuw6ote|35Uyd1rX5Lw*`lhjB?v=KSfnmz zfU|dIZ^`H;%_e+SvW2C@-68ytzB7tdq3+=E+A2{jR-G766Mh7XbJLA8v82-8uw1Tuj%Ud!2LR9XMD92>+z{u?D<{6?C% z?sGCL;znU#mPspy_<8U(WQUk}~I!Wn@sXvYO(KE%fP_h>CuZT64%< zkRW z1>ki-OSZ*ARG1G6m8&v!Hb(pR3gHn?$vR^_-=22!%7vp{bYmUDV@AT1XY%Dsvfn~YMI<4_SC$4&8U&uA|AUb6Ny zR8)8s1JxWV2ANDzAbHc{;sL_GBVRQ|r}8eIZjP~?#Cn$O`_{RyC#Xonn6#Uh%bgjm zrkmk2Ssw+OJ7#&Qu7xmPY?x3f=uaA<-HhF!CeAR_!@AOrutDofM$o5W8<{)1QJJoC z%PRuVqA5l5+)&exGG43>f!X<$mx<9Cl0o~)9&U-mHDP<~phGCrZ6Ff6#8~x17E@TN zkqoXy# zlqxDeh!CurJX%d)7tCYFqF*304W@yKZ-l$!Alm;0dvecrjrjM{#xCwbmgn98YmPp;q9uio0saEWDQjyo9gQ_o#GmvWHsRoWZpDT+92- zo?8h+a_)dkxym!GNc!3K4YmqdF{C@jslF+;Ds;J*ztlEC9b2PTiRWnFtW~j1$pbt$ z?&%>>ZmV7K^v(f-+a@|j(xJQ=Sk}}`=T(RUycW2&F^Q>o->hiw99Kj|8!40HVMPe- zS99y3TB(<5uoA_hSD50CWNIzy(|r5{EA;u&+aIa z9@P2Jd80dT2(vD-c_Cb%tKD$}<-JOih)}UYj($ZH9u`2->NKz9mfB|!ZzM0o)!L|Xhrst z{r@k55%4qM9Pl^r|965rz(qhd0AB&01+N7w;77q(;LFGWZUr}h7I-dr3iuK5LF56i z1G~V}z=OyH-UUto*#o=>8Nfd9ukieD2l5SgA!q`{^M43l|M$S1Ua|0VE6a1Xc(TnmnZ%fL&(+2C>Dd&mMl2zp=@ z%z)>CXMqRl$S;F?z^{Yr!5hHK!4K%)?}G=x-QX^uI04r0*5m0E5#`xFz{Y>dW-7~0qU=7 zPw1wU>0sB~j4AAX5$%BNdsbA>l_g$EbxG@nC6j&SZZI*%Diu)VC&TgX!jbv4-jpo= zN76rX+C!IWC`7-z9H;0x^`KdfCVogUp+Avhw_%`}RW&a7vN!^^PhJqlU=iAeNGWsc zON2@(=a9rnsUO9(Ni`R6Z7ypy)9%dSwjS8g#QAV@=}U}!HWvm@1vkpcu9LQBr!%AW z#{aS3pBF!>)^>*t2Xm{ta>^ndoE|pcJ}<@Z_`FPEGQL9aMiYGWrm!QM;zn-CW{4JPvags& zpa2&alR9eb+|iLrkA_;#;JGNu>w+*D(}C(Q@SL8Kn8KIWn+|uyD0rL+KRO8k#@duf zAo%UXD*5@}^DrnbwisHm=z%1%T zoVHFZ&cGWqLe2EaTbxOlDyhq2Briv1grUHgpZm3X zO+=`PNKPVmQ6&W2m_@!OshBx5LHrPbyGUz-NS9w6nAmP=Jy^PBE-zu8n_QFKoO|t@ zyekwF_vdU9x$U?)e2J~yQg@$R=*pU0t{jcfiQ&CJ3u|*F5fB}lSv^X4QZ*eeMs(Gf z{%<>l;qK8%g1(LJ9y<^hPAEg~TXE9W(_TFlALqPUaK1P!9qz2c;(eVU+Dqn84w0!s z;{ljwnOx+^pjZf$ZXhz1kp*QyIBc7t%O1mN9g<7ZBD?r38n;H8Z}`m3EGHDOY(Y0b zAs$?`l&(iKEMf+I{ak$^@w8bf|0t0QkMt{|sPDwu@`?p#vk+RAiyh^5$AoK~b^=Om zH@W#|c~!1sizh8ci=y99r9NfUkCT%&60*zuRrsGQnCJxw;qJC*UJmgJQDT>>9FN3J z=p;53FD{sp@>?r@l6{fu^2YhIJ$YdCioI2?XlwJJn+WIH>^U@*%m-Urv{Fj|@}Qt+ zvc`F{$gIz*#n_aWIFQj!<&%mRP(Mw}lb)s*6TLEZt6BVzdLwhIN}ZOnK8thG?8)gD z^}`x+?H+0;COWqUCQ6<}rYygY+MgusPOM<7ENp$5^_j))%ulK$4~o;YbprCDac2-R zrL;DqqfK%=YpIKVb}=CB8k%U%ogO|~1KH#sN~S(48r_zwYp84{b9&eUDb|;W8AUCQ zn_G#VLkTAO*dFuRA*Wr59fA`G(w5nioT%CmSi#pW%-9kR#D9C-cF^hO=>98WNs|`d z(%rCQr-S@Pw5RP9r35`ZA&$Y~hefRhe2Fb6+GBEvyf5*bL^g&$tJ}V`5(q_Ej1m2I zd3RB9sgVBbVjl|A^rM&MPi$%uFla5h`)I3#CRHy4iB1GZdSV@^?;x2La3Pom$ LHvgbXZ8iQ2hUwlB literal 0 HcmV?d00001 diff --git a/composer/trainer/trainer.py b/composer/trainer/trainer.py index 20f5c561ef..1b6bff74e7 100644 --- a/composer/trainer/trainer.py +++ b/composer/trainer/trainer.py @@ -1967,6 +1967,7 @@ def _get_autoresume_checkpoint( # broadcast the remote checkpoint path to all ranks save_latest_remote_file_name_list = [save_latest_remote_file_name] + log.debug(f"bigning debug {save_latest_remote_file_name_list=}") dist.broadcast_object_list(save_latest_remote_file_name_list, src=0) save_latest_remote_file_name = save_latest_remote_file_name_list[0] diff --git a/pyproject.toml b/pyproject.toml index 3b2469b935..b0820e63d1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -88,7 +88,7 @@ reportUnusedCoroutine = "error" # Pytest [tool.pytest.ini_options] # By default, do not run gpu, vision, docs, notebook, or daily tests -addopts = "--codeblocks --strict-markers -m 'not gpu and not doctest and not daily and not remote'" +addopts = "--strict-markers -m 'not gpu and not doctest and not daily and not remote'" markers = [ # Tests that require a world_size of two should be annotated with `@pytest.mark.world_size(2)`. diff --git a/tests/trainer/test_checkpoint.py b/tests/trainer/test_checkpoint.py index 5336bb83ac..b767595fab 100644 --- a/tests/trainer/test_checkpoint.py +++ b/tests/trainer/test_checkpoint.py @@ -682,7 +682,8 @@ def post_close(self): assert trainer._checkpoint_saver is not None trainer._checkpoint_saver.post_close = post_close.__get__( - trainer._checkpoint_saver, CheckpointSaver + trainer._checkpoint_saver, + CheckpointSaver, ) @@ -769,9 +770,6 @@ def test_autoresume( if delete_local and not use_object_store: pytest.skip('Invalid test setting.') - if use_object_store: - pytest.importorskip('libcloud') - latest_filename = 'latest-rank{rank}' + file_extension if test_slashed: latest_filename = 'testdir/' + latest_filename From e4db0352a9d73ddc64221ca82764205a81342612 Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Thu, 13 Jun 2024 17:10:55 +0000 Subject: [PATCH 18/57] a --- composer/trainer/trainer.py | 4 +++- tests/trainer/test_checkpoint.py | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/composer/trainer/trainer.py b/composer/trainer/trainer.py index 1b6bff74e7..fa9f79bc82 100644 --- a/composer/trainer/trainer.py +++ b/composer/trainer/trainer.py @@ -1967,9 +1967,11 @@ def _get_autoresume_checkpoint( # broadcast the remote checkpoint path to all ranks save_latest_remote_file_name_list = [save_latest_remote_file_name] - log.debug(f"bigning debug {save_latest_remote_file_name_list=}") + log.debug(f"bigning debug {save_latest_remote_file_name_list=}, {latest_checkpoint_path=}") + #raise RuntimeError(f"bigning debug raise haha") dist.broadcast_object_list(save_latest_remote_file_name_list, src=0) save_latest_remote_file_name = save_latest_remote_file_name_list[0] + return None # try to download the checkpoint on local rank 0 of all nodes if dist.get_local_rank() == 0 and not os.path.exists(latest_checkpoint_path): diff --git a/tests/trainer/test_checkpoint.py b/tests/trainer/test_checkpoint.py index b767595fab..56bb663645 100644 --- a/tests/trainer/test_checkpoint.py +++ b/tests/trainer/test_checkpoint.py @@ -752,7 +752,8 @@ def get_trainer( @world_size(1, 2) @device('cpu', 'gpu') @pytest.mark.parametrize('file_extension', ['.pt', '.tar.gz', '.pt.lz4']) - @pytest.mark.parametrize('use_object_store', [True, False]) + #@pytest.mark.parametrize('use_object_store', [True, False]) + @pytest.mark.parametrize('use_object_store', [True]) @pytest.mark.parametrize('delete_local', [True, False]) @pytest.mark.parametrize('test_slashed', [True, False]) @pytest.mark.parametrize('save_metrics', [True, False]) From 081033ca864ac8d3406b692e7615878a85ca6f7b Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Thu, 13 Jun 2024 18:19:35 +0000 Subject: [PATCH 19/57] a --- composer/_version.py | 2 +- composer/callbacks/checkpoint_saver.py | 2 +- composer/trainer/trainer.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/composer/_version.py b/composer/_version.py index a38b61a722..df0bb29480 100644 --- a/composer/_version.py +++ b/composer/_version.py @@ -3,4 +3,4 @@ """The Composer Version.""" -__version__ = '0.24.0.dev0' +__version__ = '0.28.0.dev0' diff --git a/composer/callbacks/checkpoint_saver.py b/composer/callbacks/checkpoint_saver.py index 9ef3dc6202..7e98ddf28f 100644 --- a/composer/callbacks/checkpoint_saver.py +++ b/composer/callbacks/checkpoint_saver.py @@ -470,6 +470,7 @@ def _save_checkpoint(self, state: State, logger: Logger, wait_previous_remote_up if wait_previous_remote_upload_tasks and self.count / self.num_concurrent_uploads == 0: self.wait() + self.count += 1 if not saved_path: # not all ranks save return @@ -598,7 +599,6 @@ def _save_checkpoint(self, state: State, logger: Logger, wait_previous_remote_up ) self.saved_checkpoints.append(saved_path) - self.count += 1 if self.num_checkpoints_to_keep >= 0: self._rotate_checkpoints(sharding_enabled=state.fsdp_sharded_state_dict_enabled) diff --git a/composer/trainer/trainer.py b/composer/trainer/trainer.py index fa9f79bc82..8ee716561d 100644 --- a/composer/trainer/trainer.py +++ b/composer/trainer/trainer.py @@ -1969,9 +1969,9 @@ def _get_autoresume_checkpoint( save_latest_remote_file_name_list = [save_latest_remote_file_name] log.debug(f"bigning debug {save_latest_remote_file_name_list=}, {latest_checkpoint_path=}") #raise RuntimeError(f"bigning debug raise haha") + return None dist.broadcast_object_list(save_latest_remote_file_name_list, src=0) save_latest_remote_file_name = save_latest_remote_file_name_list[0] - return None # try to download the checkpoint on local rank 0 of all nodes if dist.get_local_rank() == 0 and not os.path.exists(latest_checkpoint_path): From ae5ece365b06780b78ad4d6dc840cb24898f4103 Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Thu, 13 Jun 2024 19:05:44 +0000 Subject: [PATCH 20/57] fix 2gpu unit test --- composer/_version.py | 2 +- composer/callbacks/checkpoint_saver.py | 7 ++++++- composer/trainer/trainer.py | 6 ------ tests/fixtures/autouse_fixtures.py | 2 +- tests/trainer/test_checkpoint.py | 3 +-- 5 files changed, 9 insertions(+), 11 deletions(-) diff --git a/composer/_version.py b/composer/_version.py index df0bb29480..a38b61a722 100644 --- a/composer/_version.py +++ b/composer/_version.py @@ -3,4 +3,4 @@ """The Composer Version.""" -__version__ = '0.28.0.dev0' +__version__ = '0.24.0.dev0' diff --git a/composer/callbacks/checkpoint_saver.py b/composer/callbacks/checkpoint_saver.py index 7e98ddf28f..1aa926ee55 100644 --- a/composer/callbacks/checkpoint_saver.py +++ b/composer/callbacks/checkpoint_saver.py @@ -15,6 +15,8 @@ from pathlib import Path from typing import Any, Callable, List, Optional, Union +import torch + from composer.core import Callback, Event, State, Time, Timestamp from composer.loggers import Logger, MLFlowLogger from composer.utils import ( @@ -619,7 +621,10 @@ def wait(self) -> None: log.debug(f'Current rank finished existing uploading tasks') self.remote_uploader_futures = [] - dist.barrier() + t = dist.get_device(None).tensor_to_device(torch.tensor(1)) + dist.all_reduce(t) + if t.item() != dist.get_world_size(): + raise RuntimeError(f'Some rank failed to upload checkpoint files') log.debug('All ranks finished existing checkpoint uploading tasks, starting symlink file upload if necessary') if self.this_rank_saves_remote_symlinks and len(self.symlink_file_tasks) > 0: # Only upload the last symlink file diff --git a/composer/trainer/trainer.py b/composer/trainer/trainer.py index 8ee716561d..35cd6dd737 100644 --- a/composer/trainer/trainer.py +++ b/composer/trainer/trainer.py @@ -1455,7 +1455,6 @@ def __init__( else: latest_remote_file_name = None - log.info(f'bigning debug useing the new saver') self._checkpoint_saver = CheckpointSaver( folder=folder, filename=save_filename, @@ -1810,10 +1809,8 @@ def __init__( log.info('No previous autoresume checkpoint found') # Actually load the checkpoint from potentially updated arguments if load_path is not None: - log.debug(f"bigning debug before: {load_object_store=}, {load_path=}") if load_object_store is None: load_object_store = maybe_create_object_store_from_uri(load_path) - log.debug(f"bigning debug after: {load_object_store=}, {load_path=}") if isinstance(load_object_store, WandBLogger): import wandb if wandb.run is None: @@ -1967,9 +1964,6 @@ def _get_autoresume_checkpoint( # broadcast the remote checkpoint path to all ranks save_latest_remote_file_name_list = [save_latest_remote_file_name] - log.debug(f"bigning debug {save_latest_remote_file_name_list=}, {latest_checkpoint_path=}") - #raise RuntimeError(f"bigning debug raise haha") - return None dist.broadcast_object_list(save_latest_remote_file_name_list, src=0) save_latest_remote_file_name = save_latest_remote_file_name_list[0] diff --git a/tests/fixtures/autouse_fixtures.py b/tests/fixtures/autouse_fixtures.py index c881157353..03c3319048 100644 --- a/tests/fixtures/autouse_fixtures.py +++ b/tests/fixtures/autouse_fixtures.py @@ -84,7 +84,7 @@ def configure_dist(request: pytest.FixtureRequest): assert device is not None if not dist.is_initialized(): - dist.initialize_dist(device, timeout=300.0) + dist.initialize_dist(device, timeout=10.0) # Hold PyTest until all ranks have reached this barrier. Ensure that no rank starts # any test before other ranks are ready to start it, which could be a cause of random timeouts # (e.g. rank 1 starts the next test while rank 0 is finishing up the previous test). diff --git a/tests/trainer/test_checkpoint.py b/tests/trainer/test_checkpoint.py index 56bb663645..b767595fab 100644 --- a/tests/trainer/test_checkpoint.py +++ b/tests/trainer/test_checkpoint.py @@ -752,8 +752,7 @@ def get_trainer( @world_size(1, 2) @device('cpu', 'gpu') @pytest.mark.parametrize('file_extension', ['.pt', '.tar.gz', '.pt.lz4']) - #@pytest.mark.parametrize('use_object_store', [True, False]) - @pytest.mark.parametrize('use_object_store', [True]) + @pytest.mark.parametrize('use_object_store', [True, False]) @pytest.mark.parametrize('delete_local', [True, False]) @pytest.mark.parametrize('test_slashed', [True, False]) @pytest.mark.parametrize('save_metrics', [True, False]) From 2f5d6b0d660e4299c429a0ef1138132eb8577595 Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Thu, 13 Jun 2024 19:46:48 +0000 Subject: [PATCH 21/57] a --- .../callbacks/.nfs000000000271bcd900004371 | Bin 40960 -> 0 bytes composer/trainer/.nfs000000000270c2da00004372 | Bin 221184 -> 0 bytes 2 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 composer/callbacks/.nfs000000000271bcd900004371 delete mode 100644 composer/trainer/.nfs000000000270c2da00004372 diff --git a/composer/callbacks/.nfs000000000271bcd900004371 b/composer/callbacks/.nfs000000000271bcd900004371 deleted file mode 100644 index ea1ee4ca08d823e60ac4c6e9a7d4468cbecd7421..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 40960 zcmeI53y@^jS>LZLgJff3UMVP3!1b zrnNoYt-jqmJ3B0i;!4?Z#j#U36&|q-wuGp{GDQreh%t6-9H~fLWtVXkwh3_vE-E3# z!$oW`G0N}z&N=s-dvEv5>{>uU&8_`!ci+c(eCIpg`CjMF>fzTdJrdtnJrZ$zOBDUs zy^pmHeCY8P-SS;gH0bq)a(cI;aqp{-z52fUj?OlYwHmX>TFKqB_4?h7*+%RBdk;5~ zd+x5^-I_fdMTa)J!$X_B!R2N=& zjMPuSx!2kA?)r=E?-~2~ro#1au)puMpC2e(|3>@!wEg_wh3mK3-{_1@{DQ$0{ax$r@%f1_9?JWfqe?>Q{e9< z1?tTxs?f9B#U3X9@9g`xzC4N^2EY9eqG%1=4W9kxDEc9AANb9eMbQd)2z>UNqNodA z37&ar6decO1-^*U{gdDf_$mhabKu{Ad%%}4=zjuS0N)Befdk-I!4HWe0DJ(4!8*7F zd=Tft8^Jx`V>l|_4(7J> z=?>$h-)pSLjoygw?KEClIkK`+jojVx>7HmNt@@}lV|bS6%dA!R_y zetkeMhEimYK{(y(CdHwWflaxI3I`kYVSI(Eq_{M$r*e+Rgc1x*eUQZUEA@6qF6Rg6 zysbLkYf)&7tX@(nAL9aqxfU?%SMhwWas-kF|@7hIpYAcU6Q)*Trkb{j6&G@De& zCxgYcPVHy6(}dyTo33t68|GL_TU9#VEDPf1FwWj%giMXA~C zG3)i4T#mEL1(HG8L9)TUvEC;9mi4bG?7PwKLh{uu!_|gKVl!&_q83`p>=fupE;2>O zY$k3GvvT_hQ>xi+kX5y18&)J@z;6d8$%I3DkwFJSwlqCeNvJZW%$Fz4ObobEc+Yoe zDlL&KPX*yarcflG;W17Gg#wPw96^vCJE{`&Xmwa0MB(%D{l>~T@&c4`xKzfY>q$30 zktF@|{RF;Xd{+~hX0me3#7UibD@#CJLHIP1G_|uum>?jd%<8$kG+kLaaAW;DoEuu9 zPq}SOf?WG#FtvHtl0h%FO*DL|EzA!#jf|vRceth1EylX$g^TgAqanA+W5q{qX8r!V zx8Ncv$BiYzGjg<)&s`SXacizT#u}Hl*ShT%5(G7Rvj_EP84n||hdt_>^ zBhH0gk!P%lowzbeM=F32rE*ujD+^H4=yjXZalPBr2hDnu_UjF}mpep@7`8W(G?Ipd z$5Jca>W$)c;d7}Hwd>k3L_Eo)8&X=fddI!4+&N&eONYtEj6Swe--lM*L$O(G@w7Ai>$mN#jRRcCRKLdYP-`O zZpFP;ZoW$I(NAcaCq2@;ne?e&+M0DM8IJnZcmcsbYOJF;C8c&>NOfTj8fJ8tPOouU ziRbjRxYO(PMH^rN3;QPOkP$ns)uQj1G0WA)D!N%8h{{)xHbOo*+%Uly^?0q<6Xl}U zX0(k$ve9dzI>&|qERjufO!e^9R@@TJgn@HyDbrSJs^mx|EJD%kfi%3c)*CQd8`IVJ z2jQWO5#?o!)aar4r+wH`h<5zc5>@p&al4xiVc+7qDK*K#3>3b(mWo~$>8Zkh_KwwS zN4aChJ+O8y`~}C{$3Nv;kPbamLRAe~+R99i9P$gKg1OYM{A!a|i4EWg!&7Q^s6mmJkpN1u{Kk}0ah zDUWlh!W{@gL5?DWWUW4Es&HZ|LLXRWkY9PRO)v+F6e`u1@1SU8{iK2Plm%P3fJzz0 zOgk&vfKq@QBBe01k`2axsh;^*)tiwGRjQV1b1T(lABQLwiy82)h zF4jIrV*h_HMzY4svHxBC|1;SBzX0A3u7NG^I&cA;1g{0N;QPSK!SnbBJ_|kqo&f(8 zJOEw>{uqD3$H9}}ZD1Q*2B*N?U>bZ4Kf>>UUk5)6{w;VTcmmuHK8A1Mqu{5(yTKN? z3hoE@fqOv&K8fGqqu`|VKfDRw!z;i?@j3hocrUmJ{t@_Y@Kt;d9|J!Mj)PZ#FX4aq z0{9eo7Bs;j@PF||ycfI`yaIe2e?$-51^&yeQS`6Dt3eE&!yj=G{24w7p{@8Oc0SB- z=l@^qrX$2I3#C|+VyI`111!y?J}^s8n|Z0rpO_QIdacH1gm*XFX)=Rw>kQj{ll8CedoB4kZw8AmdAROX9SxzR?U!zvL^2cnU6T+DB4)TCvB zEK^Xm#gQ&WnLZn8u(~EmWpO=VU1KImFLRBn`k5I?AHqW_doWvhF@w7-oI<3XG|^I> z`A)E~iGf#G)wYKSvduxgUu}1rxQi+$&zyT?VYzmC;gQAKqf5&V*G?>*U#=~jK2cj< z;?(lOgQpf}(1Vj!`|A8u*yyS0>rwc7nf7|Wh&2NPhehb1$Sd~8vn?~iA;w1`WG#9q zkQD_Fg=3+;yC(Kx5f*(Fu9mVfSaedbvNQ(mRauAEdz&Hx)m#qcT8?Hyzj3T@?GEAM zy-3vj$V;Khx|xzBnx3Cm&EyLD3rlLdnpES5&R!6C%!;|&7oT89BR| zwAa>$X-%ZW{7D=@$xI~Y<#12RcWo2<(2~KGdT0K?k)(fM{EY=)-28zt?%xTIx~VaL zU@sGEQX#v!cFWE7BI7)iA5Wn znRvbfA=w)AHdIfS{e9K!k*Xg~v#l>~vjuQ4j0+8M1xu+odPjX*^y1a}25XvLmbzC~ zvZB??w>H8`8;VIKVt=lIM=_mSsoM6VeY3Kqh?K6ZU2;oe3dG345$(B_rMHgQ7e&yG zJd;~Zfo?-IFH;37URFn;2Nr0&QWSN1Ydp#SYf!1WmJCnvmDyQSUuvy39Yy*1H5#@%X|=a1+iGj+us2Am8=V#$ zT?@~-x%SAZlc&x+T3dYm*~N1xC#RQdODAepf%$>OADoHp4d*VL&R-c@YiRx1$xMrD z?sN}!YNy%Hq*?0^BO{Q}9Ea_C2YD|+86%uHm*Qd{w9QYRRQr?qa6CU#q$c4G+5)p4Dgd!=Oa&BS|ZHUy>j zc3x*!(`HVE_|kaQB=wv^a?_f3`kcH#d$~D$P2tC6B+MpmmNL zHOC5-!bRb^1Ovkkq~SYd2$;$IDm;PL1c79QxSe$~@!}Q4@Jy^B!kKtk#zg-|G;Q=P z_YSxEA_VNS3*D`mSSEp3+B5Mfp?ZAA#<6&qunMOyba6767W5%gr@iW)7pl3Yb*9(W&tM_eNjVVfQ;-mucmG+3MWSUh2LH$>@jeRIO&}WRlxz>Rn`R zUE*?$i!SmSK&w?KTcYvp{ zvmXav#jbt}SOotaoBAnmKlmgz^+oVU*wUW|>)<8eJ=oMs;M27Cm%#}jZU1xI?!A7* z_6RkTW8n=v>bTwgNHt|hbRqzV3){8?+lp7@JW1}vT6o;VN zOu2wRNG@-g$WVb~Eme+pRd~6tTC73aj3PwKfH-^5)`ms&R@onya4&Y_dpI*MsVV3(IIvUSnE` z8K3+ui)YUiZYjI-U9C<^<|-As=Np)I&sIVUF{y?L+QUu-!ji;s zx+c4-AVuK?Pf%*Z*b&k}EgdX;E5gqvWN27cLP;XM6Dr5LU9ZmIw4mz4@jJ3 zP798-vHT|?T@!%Nl`l}1$|5;vT&wcj6)eXS zU6qH%x#+^VXMK7_3hdRN>E{DYw%xGyqbeI=Hdwy(GCy#)Me-RQnQm%WUQnbM|;X&@ON$eBy<|O`q5Tp6Kv7yEO zcl-0-f!#k3p2g-r13rzNUkCpX{4}=yHE;{~Ah!NP;Ir8JzXDzZz6tyxHvi|rW8hoB zGuZrZ1)Jcd;NM~U-vM4^eE@%f{r?Bxv)~=zLGX>>ywA(3#O^Pin4t{9hmlH-qdW$)h8uOq@cj z)h4+=rRb~$h9>T(j~3cMq`j=sHFI&=52o7oJ?C+>$>Hgo zS`qW{Q9u9Hc7D%`>hHYRL507_uwC*V(u)^K>mG7K zhU3zuOVC@s`SL;^*PPle+){{gPVh^(#?Kz{#bs8&q_Xbx_vVbNIpedvC?*yvz3fiF zL<&0+8d`Kn3_9)46p_||Koi)qL@cesdTXoV6*h(W?Jkp+6q9EoNejL!wo=C$d#%ho zX13^P_D;T3H%BLYP7wn(JAP!tkIHbn-6t!nU{j7SNSLBBHKF5vS59%(?4gs{toMX5 zoLVM6ZP-J9JjLA%*{QtoT?&RRO*$=8Bvx`u?HV3kY zM88T&)p$|10%V0ErAbq0H%+;cyy*+T2B}C*Q^c>kq;-`$sR~M|S5k!JIb9X6Bt9C~ z0da9i<=R=FJ}Y|;wA!^!kEBDR&-7ZkuH}QbZaACBhbB&}iskFh3m4f9plfulDPdrDnWfv}3Mx_G5F@CKV?=zw=J`UL(h zd8irsfa8h^buC(Dqn3vDAfi4my)NN&l6HIU^wr}5qkcCaw!#HA=(Sfz=p}xc5(?u< z%^cKbmz-bfv*$ToJNdw+0E+6<_IU8y#oRMl~V=+-_|}(nNPaPgRL>WXbc2O%;ici9cTG zUkjF4q|(I$+SfxeA!@QqrG|^Od0-|!FzBuwxCGa1TJN|-CB*5^oz31xS9URo1c+Q? z+n`pahY`k=bM-u8ikQLWYR)~{gL!n*d0s0xQQ~gJ56>Y zGaHyo>`;sie>8DF(ZX!x3ef^>jKq;$0{WJlE23S>>@va}JxqdIzTxovW4v))J#gf3 zh&x@q)hmZqBt1!X?TGu3h+|yg_t~xG-k`Cbqnr-CjkWpCIJxecp9LFz-A>%({P$2q zC8u5Fbk*csUBzTpZ#b`wSY==?i4R}onELKjH_mU1)nx=yNug%@rVMF;99);PUi5lJ zMVPbTQB@Z^{%-H7S52X?p6=}I5B2u(v1y@*na|QR@29R8S$>HyVqu|T<3qA<0fqC#+Z~#1y-{7~v8^HoN1pXJkgI@=a zf)@jMkHCk)JHT1+Ah-+M3H}8C!9NEx;Q!%2_|M>-;1qZX_zM1mr@)ioaqwF3SGSVy z0NxGS;8o!N;6wN|@Ux%|z6Z#=2Hp>T9Q-rz1W+16KVb(skh-FMR;^ZTqvfO#dh2=4 z-)){@ryWl4d^p-&$uH6FtbE>ir_J%Q$xxTsfVF0mmcskocWst6Xo@<2Ra6L(NqPRh zp+{L$kq$vdm(M13INGc?Tu7ohuGe z83q-Mc(b?6+AQiuJcjdCS+IPr>ZGpiMjaDJfVctBobU}8ZEemaD4Isi#ium=G1C|V zL9GmC-Uc%tAI{Of%vj@ng7aF&_-kdc>qLak!Fjb`%nZ!ms%6@Hv$bYEK9@@|Eeqmu zn}v&}#AdUqLpth9Vm`WHzAT@wmCHa&1MiCz<&M_Wvf3cJ{7Fh=ClOB9Nu+7!qQf*g zy%C85WMs(}Ydl{1<$8RjU5_uETdKxqN%6qpEV<|2Z{@1M6Ryta;gTyoX)X>1yFxo;+}@sEO0mrSyIC79oZJ{BtDDY0ezDz1b;?hl4F6YU#&Luew+TPspy4f&qs zjE{L|Rp<_>{Ea%inV|O>jdtM6SMrb7*##k8&bh=SEmBgc=*&i+H4@uRf*wKQxD3X& zU$srwi^a?tADWtg<+01+7!;L#FHm!mb~+!Q&1dP~R2Z!sI@5mGaTd-uZ_FvzR=j0= z%xgmVwo{pYRxZA9N3{b9+x-f!Vj{=PtUY7ncM~eyp(b}}Y!!vU$dpj4f94rU-c=R2 zRm~I#0vHl?aZ6NMhzuUviVt3xpV}z)zH_pvj zxGfN=Qkxw4{WBHO{x<`$ z`&U67$X^=fMg%2^PW2!DsLZ48cR-Ht-Ah1Ktn5AAAFNG57@jfd2~K489k95BOvJ z0dEF}!EfRR*aBCzU7&bJ>3&vXb$0Z1dBh-k$@RmxOy{Da) zABbFYL=yPE%9PDrc1g>w^<~QXawp#-T^4VWyTfhJHzUA=4>rc>?u1%6cJrca`f3gu z`$#qJtmQ6N=W6hSBy=^p_3P@LQDRi;*3@|xT#Y!X$k{G5&)W3YgLmt=7g(sepY7wT zPWgr*zrvoV>>_S0uf3R3(aF$_uO|?mxf0NEN;?P5h*Z^KI_`v%KTPU6)O_T zXkb}K+;hPmY)f3@bz;#jSmRVyW4bv-)T*sIdO;7RS~;}EH#2)Ai04(TNDB-8%U`bQ zPd|Su*{oxSDM3O%e6EQ6^H5J#U+&%L7uGBBJy@03@{38L*L!X0VxDO|7pHdbYG|dw zOnZ4EE8iG9GMvQ@3!O7hfmcV_sYpoHGmzarqm5NblM&XR+FK_ene^2&6gvhTapkjc zu_~{F={749NonF$+NR3%v>z~`e@<8hyc6y;(7<8jgb!mC=+`{tGnbX0xDBa=`xf0-K-#T7rknpwyuS%AR zH?&c27rL$bSptUIgMM`a4a);-=K*`m0{J7oYmwVo@0ZyC3Qvh2(NzSvwCQ1W zffSRM3Lq`m6~${ErbU017!HwblV)<6y|iRh@VYh=f>A*w4Q=LpKho8`y~?0s3X(ll zIc+jqehWm+Px`Ijn)5zt^X157O(_f0sbR4|9imCfIhecei$4BzlZJrQ{X6&Jb*vN=Kr5S-s$%? z@G!Uyd>MQH)8H8(?+SPT+zy_@)|dSQZ?|^;SFrKl1>On7?mrA}1up`h$F_e8G{F*> z0)L53|KGsRfj59z@Hy=I1pEoM{BvLfd^`9UcD(E%coq0d?D#K&r@#|n0sIAa{I7yH zf^~2Xd_Q;|8vQ=_D0mXQ7rX%|?SyW^pzh#}vVNJ7ljtSH(^f}omDbdS?5*Nn?-1*2 zyXe=uj;x7Xk?wOkbe5H_%3YxWySQ zRcqU6`|9adx8c>fT>H(tN}N(3h1cA7tNu;upj*l`Lg1bWKPL}KLH_U1MbW4;`9AqR zM4r&RDhXwkvwW)*$vDjo@~1Y4h1}q4l_u~{MwVhtFf@xydF9Gp7*rk+76e_R#jj)1 zm^IMET1}Yjuvy=g60Nk^uchm}L6&HSQc>-#ntf$#d1{z!4%b%oq{zA_Te2mCqt(ML zCB7(m!G>KgwRn^R99**aH6{JD_|gv<=7X`3F>$W+d^)1scjNRaK2cof-S2E^EpuA2 z2nHp`Bf>4b&qudx?7%Cw7?U!|s|GIj{pPE3rMb)0=p}D;7p8Ns@QQ?B;bG@st{pp0 zy!@TcXjHzD&GzlJs$(B*t?3y|H0I9|7{kTP)u4bKtKWU$kav-k{f6VG_TSDTBX;7A z&o-k}y11+j$-)GoPr_NCc95LV4k_xV@WH~}*~&QKb_vXwg3UBnkEzd!HI)?-Ov@`j zv&2TLD{jIdA!NE#(m|spUwA2l6}PaB)KaleOcT0uU!#zVY1yz4J-7D-IY08iq`RQw zP6860UKQ%Sj9RW?Qm5+{b*Q}4KIZNkuXTjavB@zfQG7ve-N{P}8QQ%x)^mfjl(>Tt z{efh$$;6Tx9Y4UExioEsanT$GcA+R?gmuh~>UET+`Zp>{$#V3OHIaU^|0P!&^3tf2 zJbb!0JjoxYGMO&q!|1c;yZUhPE`#*C4;-Fe9BUR%KCVZg79ws(js% z$ZI!c{`$K08n=oU^;l@US9J!G?lenf^Uw`){SJ-FC#DFUjcYuTuX4~D6$+ZNAuK;9 zGe}N(zlYELlec%saOU{TBvtGDG&)Fveb6YI+9+>vGrP45 z9V4ewP69$dC-D_QvOVEiZ@)TO|MhU|;hcn1>6 zNx9~D@`UE)aUasNFA1~CqL`STJH@z^{r~r1^Dbfwi~T=u?f)-e?>`A7{{I^A67VJL z{4aq20NxJ%2X_8*;92mmz+1s1;5)!4vGso${1{jVYv35T75pOhzT^TdfZM=_u=$?` zn?UmZ-;a&|1XuuH!?yo9@H7~KOW=n=6?_UC|C8XCz%n=p#t^k4pJd8o}ZtmjkD@OHL@55U!aQ+d&GkJl2*FEA_1<753a1@A-shr-Nvu zZ}pU{71B2;En6-E0vqMhniqyRLHNRTudEhn$EFt9kHGd{Y1^b`$!e)>H-8e-xY%@| z!rox#F@NA`7;o>|yX!I9n!nAekYQ=pC#8FzllaJnO-hMem*f-GVAqPJ-t49^)p|vM z(v$~_W{{;!*=DjUxw-CMmg)i=_;8HIMGTi^IWPci_L0+~Wdv1SO85&CDLK&EMTzRF zjv-ANw@X3#ug$ITk^p>2LEJ5^sFwk@ajq$o##P#q6u zh;|@rpV3D=%cjol=@peg3dm#b&|=F&=AL(kuQHKX!kPW6F6mzyWuN#Q%``d^nt?u8E`Q(o97;>R}8 z_%^nSRJtxN=-PHBxu~7e_~PNN(|9|jg!Of-qT;>djefZ+D21fuyy0IgcD(s7r3i!H zxC=m@{OffCOrV@t9AmanR1Md+>|YPAnhZUih}WGXsVr5CqsMsUY}`n?86L!B^~H@U z(X1fzuTqmi^sBggF6WAj@8WMG*lNq|kw-!=LD>YCJ9N0pmt1Fr5_CjorgbV5xnXQf z%SpNk&x8&Mzfwqhumjy6iKc1hZ%^spYtyK-b@K+Q5VI&xC2gv0qakX-{+N7Z$o?E7 zAE)(A;c;2MNZTd^AmtTg5>b$_Id5t0@lyHib)qav@&XNXi;_6uEG|cw+pc7%qdm)mwe`WMJB%bQ(HWI_3~8;T&2KO3S6bYRSI0C zz*P!drNC7RT&2MOjT9KK-&lJo6?>4c+$;R|7nQyr;(x!;e}8M~{+IaQNBsBim+n8* z|6cIl`+No=pD!)le|hQt!~FfP^WXombpPT0`hNd?ldn)n{}KNB&Hnpr>HZ^2@4v2e z|55(_VgLQ1()~yK>oflQvDdhNLjIUm>~oX#xuG@9!?%#~fsz=lJj6EZu)x>Hbr`*j)(ef0e&K z@4tJc`;Yh6kNNL+lh-xs%b(7*q<()}m<>!E&kmF_>q-w)&G$4mD!e}B<`|7q#|wf=hOe{+7o zh4St2_e1$!TDrf}-#_fX-&eZ-)&Ba|`tOJO0UOePs=t1R|9*by{@3{HA^jgN-JkOJ zpXpme|PuRq&=@AC{HHZ#nz30DYJ;Ms=o6&}QC;a_4lnwUP%AlrTc-eo%ZiP^il4gkiKU}wOjrD<h6T1}$Db0WN?$!71>S;AfHN-wuv~Bj8i$7ylmo z5_lW6;qgJrM?=D{<;Ua$jvIrtnz`+e{qz`McEg0tWd*aIF9{tO-Mx4?UV z^ttZ@-vQRZ3&1h(H1JgL<>0T-IsXgzHSm7044w}DJIdwHftP_>!GpmcqL981{1|v8 z_y*7f$H3FTF7O2KSp@HogAap$3*HG{2VMczz)5gBxDh-NJOun1ec@NZ_kov!b?`!P z2bcruD+fXT*;UK<47+%9bT*cIOTE?Jcyrn=uD6HdUVF8^95Ktv8nktE=4a4f-y}-l%!cYP;F%kH&58bn>gNe4TE0eYD;U*Uxsx&6VD& zFRgxcx)*xO$?xFN((LvxG&{XzUwKD}X1~4GZC>n+&v9$yZ*}j%*M$>gbmoITqe`s@AC2K5?46rX8K$dhel)KNA2E4{Pso#k`g<@4)59OKTURWHq`y-Xu|2%OpN$musgix!!(OP=>xRHH9RCjM3?TtpH z$TOXua%iv4Mt3=k@V;=L5hcpToOK&pt)E)EU zp76g*j}6wxy|s|uLGN?5;LAFk&JJ~#2kKFsg*y)2HEk3lRXUOL&f2ozHb!!ihJ=r5 zWS2_d`tYc+HW;;g%WJDmcOjJN=<$UE^9PR}X&yLuaBg9txp@4!bH|!<$L?(2dEn%9 zmft>cZ)^W;njzc-)aQy}ZqublV^rfR#wxdT1 z*Nz-s9b8PWxc?7zN8?`KhK@@!eS|(g8mxB1rD8{0wPNN(tNSW25*h!D>~*n2x{ymY zT(&_y=|+daUbx;K!ew)|RbT`wy?dHVu8T45dtw6O(D7-Pr;;3P_LueU81Ce*9y)Yl zfr|?alyUbge7;yMv7upaX=B{&ghnk7F~=(NnjdnUlg=j&dho<4^+Wx8NBsBbiG^@= z@x&=lDzRNLE?0pwHme^_JqthAl z`wF8Fe#hKy97?G&aYM~4+nJ3w*Sj-W?`)sC)Viyq?o2tSFLWlom5eHCSTq=mqi)?* zsM+*ao6TL>GiFQAOzm&n(AYb*3ln@K!djQXu$)u)sQpatySb>L4j7tAz2JxmI`sHB zm=>rc3_g9=Xw_JSRDpwsG>)9inR@y{-|Aztc6c($K^cchJ$)+~(G9Vm#^_vo*y(nf z6WVpb-HOe-Z9>iAMxRcEXqV2&O+10e@TcYT7s&@P3z4B2I_q}ng*qMHSY6%pCE2P{ zSJpYx+?t(Dq_#oGBy|4{YyvLro?NHUZ3gt&@RaW`H7{(qeq^<^HZNZ9_p|mW>-URS z6Y>{sgdfX;!72soBJ|t1SZ)V_5|$OD+eO&-{Bg%W7P@zDbo)V8RXrq>xqC<3>+8Ar zH$UzU+e={vVlvYtaJ1cT=TfM6RD9w8Q3mSlAEA5Tdu_Wq10lJA~)=~JaE|vdJkuMJ0>+$#6x+JCW8nvQjrP*A=MT;1%EnkSd^7VUX!rXA zm>$hBLet!_!{(ukNgbxw;5Tl&*}{oK&)a>3$+17`?w;>3t@KuU-Qi4jfWCLGyL*3Q zFHaqWg2Ud~bK`9P-u?TtAkc8#336F`V|;Ef9Mz=%KMP&%+30=J|HJysU!cpsAN(Yk z1$)4?;6I?N{|0y`cq6z6w19N^&!LaM7kme}6C4L$1D*;V2Ywv=d@uMo`uGol>%s4% zi~l6p4;})(7v1`a;DhMTOW^m=iwEFKz;~kyFMO0c`ol5X z@K~=OpGy8OK7_dKbn($_{I~eX$V3E87$i4aPk&8k_g&gme)vMSnc7HZM+kj|E5#R_ zzA`)NuC6rP|Hb?Ex_G=xNdHZEz}Y|CBkup=L&oi9z172h#QRlSNWB(lsPDAK*=%Md zH7T-T%1-_);$894+3Xl*kcVboWq zoWVj<9@0Q}6mz(|Qwm*nb}-KFlUQ3%E4u5_RO~*Kk;x!KwUI%J z*}CxPG*W_34S6#ZEKRIfee@;fwALEtJSEb@1+AKs|Ts&H0tG< z=}fY(#Fkw%nS>up40Q-CLCKEKb^YLjiXH9_R_q!Q>-sqg(a#1*u){_+Kc>3lLbfVG zp~yY8&=$=%rdt`G+5E(*;<1-mL-^OZ36>Db_za{8F#r8NGx6eb@#pR_hVPrfdc?>&q2&Srm*Gea z%H2~XGZmG>ssua4)^=o$VHDd)**f-qWmKVFW@5Lhy9D#b&*nAkT<-M5;;?eVM*P-Y z&*zU;i%N)>iYC-TwwS3=O$AFoGs~+C_?gyA8i~o7!FaAmJETLhmcFI`>%9h~KXBJV zwr~TCAiHBQI7`>Y#JkbSWVuGc*)>hSrllP7g?|)Mv9hLymNu5pPZ+s+r|<;ahVpk^KbWTPu5C)E-V@rvvGy`!ZD*%W z9-)x*#1$J6rFq{KkOrH!CP>OyC4wq6lht9$aAJ)RU9di;UJ+U`l*cm#vE5~AZ>+^$ zpcl0>>1Ejf22L`o16WRm?(6MeYBzn-`MGWit|o-(Y%W1;}mdl*~8rd9X zGmPY|)()GKN6W+B`gjNRg%(`2QIS&X!0>E3qcH)xd~)7b9L_bBFqx3k8EhsCn)d0! zRCb1$T9acGbCQYS%3sRaQ92Z_kfu9fl5<9^6f+cW0_$;=7&w$?neeekI zaPS7~02|;&@cY-)YHtCu1MCLBfgR+B!M&gXc7vyYKgEglm%#hL+rV4F0brj%cYTW& zAHsfx4WD&lKNG1@5gk2i(`3I%^YzcefBQ%?YGmCUvDh|5V_4TRspdY{{;caQvp_X9 zvUg(NZZsrxbY~KAo96T~c*dISSR$|T?qsn8j@(qQv17NA5iqhLuq)gB413$8Y}Zwu zMH#TnkBPsKR#yL8Pcu#(7OhWD<@$sc4XIdXO5C7wwUAcmv{;=vm%L1bf=Ws1Vg&TL~{_Bl75gk)_}umvI`f2&eBo1i7ISeq=5 zX(uu%7CER*p_){3akvrnA(c3C73p5x)ds1^rp>eAH8(>yoNc+eJiaGkMa7qj{HC0A zHOVH^?^z{ZD0eYm-g3&Rzg)?DI=k*Vf6uQV#$wQFPDS+!WJuZ?rTCJVkDe_uAeMIM z<>(SwyK|x4A0u|D*S7ncnJH@768<0OeWw*YcbbG@q|7dwa$@2)sUS%dE3#{U`vE(sIzL7+;uIMl3#PLBKQ~vkZ{P zaGdM!HLE#)c4qq<*myV( z6b1-xjwfna{Dn5Usn~73s%s174Y~-vVk~>qT~T7jyJDvf?Hh({;xth=?%^~vTfH86 zfzO)B);EKzM3(y&Of&PCcbC>p2F5(Annbt4sO zgvCLd{HsKhQKfDNyX_Bfu{+mP0(+a1BWijsd@jc}lJtx+6b&D-lt)`%n*3P9NFa8q zhiV~DdT2Dp?qt55T+Ug|ks010@U~=(F}_Cj4hFP5$(@<}!gAi_*uI}gHkq6QOIN?Q zC%NajqwiqSG0%zbqsgxnyGK$vbt%~pnGD0fc^Oo_Vhig^J`myVqfvL{jN-nBmTwj% zcfH=dXL)s_(}m4?M~=~~(W`pB`mS?>Xv!YxUP>-hpGI^u2Pan3hwkz8O7(drL)KOO ze15U36-1xSq&wC3ts4G4>z6Lm^h)(5XJ2rRn(q1ZO7(fEk+bHP(Lq?GpXoyNF{i<8 zySNumnQR@prx0y_&eSOrzJ~JdzZW9%h&1v0BfI zUhQ8hn%zzb!KB+8C+uSjMOE+W!+>Pc>MV(4YG@ZlFZT>jIVVVavKB7to7a=Ihe~0l zv&7Y|#IE#ucb+b7;Edrrzcy?ort5s`cE)`83 z8))LV&>L(pKQmpJWYJB!4!Xofzmu%>hzwQxg099?4}_FN8$?z*Ll9PC!Aohn6zHZ^ z#zJ$bLzw=54Z7_0=(*DW4|=usx6t`t4)%lpg`O{e{(k}1!Oh?%p!I(F>c10wC-^$B z3w#`1|J~r*!Ex|q;6dO&qU-+_cnP=#X#M|Z(DlC&+yffmUFiBh3%&(>Gk7leBlP@l z0SCcH(e*EZBjB;%W9a(73w{Ya5Bw!M{-?orgC=+~_z=4OtH5pGao}H}>wg>QfUg3N z178aMGcEkJ`XwY1>bZ9O1s-@$qr9<|h ztCkMDxuT!bRZBmR%-dF*iI(xY~ISA6pSH-1ao_k*8fr2Yp<4mi~b+t`+Xd}{};h4z_)-d=zwG3 zD0l++AJ_o?5c~mn8+bK%CioZ$ek(W#ZUeW18Sn(~S?mBG06zo12V4T$6Yvpi0lx^I z1NMV`U@!OpHi0IPPk`(Lzk@ws1Req2k3HZ$;D^EYgKq>2;IFX@C_dV6fPW7@49hA2wiBl@+#s}2c|!oDcVYt z*OF~`-bRWwlf*pvwEh`3s%qDD%yHx&pHLZ?!Ra%#=%|t8amHB@CKYxjTY;_k4XoC* zGk7}Y+g0--rQT!C9cv+!b~?-b*79p-&YYR9$(@8XGxPSyX*3+w>Cd5CRQ#1zrtZsE z*DeJc?Pz0NfdN>g6q(ovoEthD-m+PwbXb(jHxBQzb*nt#?AqDvMA(SiIDF!U{WGqx zY{BixEhO$9uIDe!#8<+y-;CdnOry9e`C*Bvi*(4<1;U+htTro>X^Aeuh3I zTDsTW&G^`;!DhC#Q9bm{QgtZneV6ib`Z~Kboc)`kr}-6E8T^AJ61Mc!{UtYWa928P z8Jh7uJiXLr!6f*Ap6#*FTg-xG!eRGp_a4}jR^3Ngz1zUWdFsIO@&^9Sn~-X1W#5#M zX6kmF3Wl49_bq4_wC*ep)^|7jP5r-*WdQ%4^&ER(f4n%Ib;rxQ*d-uOmr_AkR%b2k zLJyZ}T!q;H5ULPdj~&5S(3GE@jS<%+LT?G=n8C|+dF;GckJ|oV9N}GJZs$>u@tbk^7e` zGk4UlOkl*X><<;MOsnEjHKM_v{Le zF{NC#ZUx`E46F8p;%iktbnGO=e@Kbp)Y32B=6L}|erW{`oL{cr$3n83-Dew(#;$~Gu80#8p9SZE)h?4E9%lqN zS=r25@tsz7q1Vpxq_<2~ohEXEN#)ZM7tb-tirHvExti!!RbW~yq@CADVnCZ7ZaX03 zlDSQ==Iss_b(PXCF_$|#-p9iYqH$?)n=#>1t_-ER8{&2yFD#2I)eLL$TIFV&7h|y= z$}%G{!A5N^vC?7PU_a>9i2P20@0_XOxVMQ*>OC3;uyMpv=d74N@Ah^U_Ut$1Imv#0 zMD!`&7pBgj2oBD2iNA^?Ab-sx7W$_i8g7@9D!ng;`LQY%StD^%RU8u1CQzEJ18@a# z=2c@lmDeQgG!)CsEtm1+J%lxoKSwdbndCm{|JNhcUX=cc{(pooeSRJN{*B-acot9$ zfQNyPqu;*^XurTQ@C5Kt^!j^258MDA3*`U*|A9AvW8g2)<$nRZ8TW~U^%Otr$$$MKetjLe9l)o>Mxj*Unn8 z9PGy&`{+@P9Ywo@V+IGpZMXBf7K*0tota%kb{>u7N^Y!W+akBbt6Z@QDJiursi2!v zmq|9~b8=G@f8CM{odIsY68p9%Mrr?#%245sHC+uKddYT;Rn*nOOFMB2VpECpPhMun zg4@j(HJY`7Hk7#4=9Zec(K1^CzGCwod{kF zk5xyim%AN0V0Ow}5%EXO|g!Fc!8e z*?y^HE4Gw0H_bu`kj*!JD8t&!@eiO%T~4}k^AI+-IA z$=<4?t>E2uClS@{vX|1=+I{CvDm%%xWo9?>YHzuxJ!SM%Xr%Z61k_5TX_p_n0heL7 z7J-Xq)h@OUiXATnI|G~H*u7u|u#kvez|wn>?k;f^=7V~Mc_51FmbhHz+npC_@2Mtf zk6!YQWsY3^H-bHK~71@ejO4Xk0Dy%Cukpm3ltiXsgw|Cdp z8G76y+AQ40-Z)N*3_llaFmvyh3Veluu$ngp;ktg@$p-8poImL}y^BRfui8D1m+4joUN(~KXk za};|+G3C()aU+R@w6W&Qg$O+r8Oa5Guo1xqI!x2nBg-Z;rF(hNC*PsfqzW>IsQASx zUJ1ntyXWk&n(l;;7e5vW)??D;6{JCq^%6OYja85~=x_-~D!r!=ymy=uOO$P0 zD^;5!K0K)=2qxvuGdZW~bLzPHqUg|8JN6ivAzYk9#w^{!f56fo}qe^Z#J* zC+PdH0#62y0gB!KY4m-q_dgN59bNxr-~v!g{;vfmfc61=75GE+{XYP&2DbyP@z=rU z(EZ=%?Ev$9KN5Th-T#B&1K^*7Q$X?fp98K1j|cw}-Tyq;2fhd>Uf?%@X>cvjIcy(B z_rDh?p8g%+kI?yF3BD0*f*ZkCfX|@!{|bPGEWav#G}BA?t*3E;f~4 zOwlt4?O|hy;mTfPL`jO$sH&+TJEa%-S7pB2r+vOiMcO#pcDr?nkj?s-_Q9}TYFn^v z-X}VW=KVDbGVIg~7Wv)&Z|!Wv06z8-!ttZiVJH7~RgzeP06W}lWG5|18?)h9`-M%Y zu;Kl@xPjQkR^R-Vy>uXkf#LlFPH@_;AdrMj!-RNZrw_a_BXDxV z$qP>E3gWo3Npu~zgFDZA20q|NGOWhQ=zf>GLMuy_+9WV83-on2!8Fau5ke~puZ{U- zBaZYo*>Sg+;;7^Q606P~PayQswJG+mr1l zV%NS%%`KlKE~Jfx?Fd#=!HSA+Ssh$NZ9-wpiuSv1f3R`(oM&kg8dyD5{1*5LVrb49 zeU~s}L0O@!Hj6eh`&D8V5eDnl#h|_0IPVn5p~5F5%>`ez0CcFwRvA@jyD8)HG^x%P zSMW|U{H+)^Sy;9I0jCOQlZnJ})8GA^E3C3{o>(w_94cGl1ZcAAZ8uL$F2$8j6cgWe zT-`fzIlDIsi1!ml zL{gAuL!=VIj%elI8;y)g2s^TixCy(aD&3`(1N7m1VUy5I1j|8~@UsinpKWRDV^HOG zPhz(_E0q@!tXVBbq$?Lg!PUQJwv8>N343UU;^HCGNF&j@oGLa%RmNW|SPqL_Zpx%I z$JUSocH{ao?w#GxK57@+vOJBMAtn)@#8ZsO!!9It2@zFoP1g2d${me0O0+`vQY;GD zG}IiM^6+WPU|BK85ou*Yks3SvS_!S>15KFqoCKXw&G8!g2O;CLRo10l1W?`A>@mWO zCOcVV(jwK=To4@TO*@u14jnt@Bv|CrQ~YQyEFM@qwa`3ve6hK3>colTCl}`q`H&Zm zhN_C>4I+?jEr>bgBs`oeM1`H%U5X6j)@wNX zV{O_KFj^}Km@~jr4hdfaQ0j$=VMZ`-ua$;z5NTPXCJCui?v1Ktb>__rglb@Af(0B> zEGLXb!QdzGFK{CbzOVL$g+hS}ng&+ahFw*8g?3{Jkap|Mlql&jHT_H-S%}=f4u%2b$nP z;Em|}KLE7%|8B4j24FAHrh?C-@Bb)xE_fQ)13rtc|1O~T|I+)P41NJ!|Mg%F{2n^} ze*(V>ehTP3zMH@=q1(R`ycX;NPXY1)coLAGzs}hEXW%=)LGWqx{I`OagA+jK#eW_h zU-1B?_qV|f;Gy8ZqT|0Ad>i;y@Cxuxz@MSle+YaR_6Pd31MGa8k@;)%?t2mX8--?ED-nXab?m#%M{E zJLnqhB2Le@s#l(IvsO_(P-|8mtR~i2m_K&Mk-6r9Q;Wx&3(r4xu%;LFd0@#MJ#}Pp zenM)r#pD=tj*Y3Q!r5MdCec)C>_|;kd8SH>q;=d{q**mfBveyvQJjhGRg8eqOrb|QI zvCIRq_%?+eF3;|D35(pJgp(un6<;Q6$I}})osBq`4a11L;18m5m+PlR4-EqyB{1ux zF=})3jb2$L`lZFuwrfrsl^%-K&94$VaZQz}%qlBW^L-?wp7zjoJM3iB*CmtA!@>=M z`{}*6X8W(Z?uPx@Zj|r1Ny8fIwrsEF8~gtm%+Kh;@p#b2(F4zGF3imxay@cq)+*A# z`9Dx6TAYlWu31x2EovmQz3y4!-eW-mfQ#UVtdNA;QGmA|!PebkG?JH3#e(oLdAjMG z_VFIqInUkUioVpvHK#{KOv?9OjmT^g6|vd-)aF7wQOurQ#nBI%zubKhLbvF-Nr#P% z5gsm4OhXbPF+W(mxX5k@2nPeI1KP^-$hl?9(a5EQMU6m!sia@&nnrO8bJ^W} zqw79&i)gal*6Mv-`dq$BNZ8A}EmEf{lSI>0x1P2xm!3hV+{#OEajj^}w1~@T3Rin+ zRG*lp(w*9(B#8r9ZXZsT>)a9%j3yX7Yf4O?Qqy_Yid^+f{1qk{qcwo(OLp`_rLENJ zX-g@)sRPw@f5vZKW~63k64r(>7LVm$V1}$osi!W+le+;m&XabL31i=rIw%=DkR(1B z8w->0Rs>H}p%M+Z^=z3;#Vs9MCO%&FYuU2y6LbsJ_zK-zCcK@l3s-N~)WX3G;ysJ} zNgUoiOqQAIdo9O~VVTQqc>}fFY-T)OWo=);Xl{MXTSi-RVd0}?d7kQ<<-@->*{_1b zWz6BlW-aHI5N4_lj1d+r9y9J@l`^ZfW`a;5&%IUhV*jDDnhz6QSe{8WW3E1jZd_=! z^?qA&P1bC-+&<`LYCD0jx`rbX7yH`-Srkb6`qoQ;|ESjmuBZk`N?*{Wfh)??I!#et z|NB@SP+1b|z52xj6NOtG(6bL{Z9w`zI}|L&z1IJ~&MT__2A%(f;NjqR(e>W~UJ6cu zr-2Xy@FVE?uL3&nuL1rX9sk4Nz2IMiUj%Oh-vGWId^PwLbp9c@8|WN>=YhLG1MCL> z6g&Yu6nqSu!0&=z25$!61D3$;;IZJ(u?>6}C}!aIg8`TWUk$DSpT$1#8Sty%UeE!{ z;AU_W*a>!kzr<$n3*hI$61W32z~`|Wd<47$=={Mx&>4g`fJcA_fiD7Y#*QFgf?eP- zK>h?@4!#7u9$Uhy^210Q#V%vgVA!1%X=d+d^>E{;)@l5aL+%zSTBRGfQ`yl%GUM-N!p2sM4v8ZH zLI6!7diuhk7Ag?4GcNeWyCMatggL_JSMg9u9BXx)U04)X^6gFsA9YdEZPHW&u~LpY zxZm&E;%@QdunbbmOFTe))G4qGBJ^x=3HHz{rIb239g3)<65-Phk|o z`*5nhbZZxo)?Q(#taew_^;RP3i!z6t04=WatSz{$4`{Ioah8=??qoK*V_$c@v}?x+ zdb2y0*EdRc@;$vo&sGgtX)DDOiC!MF`M<@!XW7R-K3n=-d^p|3IlE(jrKsf*qhmIE z{bo$Ql$<)?taYS#Bal~Tf$(zIFJ)*@;xR%`z^RqR4`TL*C#|=uiCQ_y*#zao3^TH9 zQBe0|JHXDy=aNAa?^p{V6Zym&l|x<{>x1>ggtarfLm7~I`J@v_E3vL@&nvNaZvCoeo7`S6{#@U@WO(35g zgg{?U2h?E(r5_PNIn-znFZD?##4C1tk)bK$#Kj^0$L5F>8&`F(2N(AC(^aKX-^#xq-D>GNQiFLu_X<$b1ip-OrT1 zFg1n1=ZkKb*3`E8_|3EfA75!gzqdCS#BS12KU0%)AvU4!^Sk}0riA@jjjlKgT?PoF zZl*I>o@rT0HMyVNAezW+dwj|FnDMsBb57sw4xbTbQVgFufkg-0<$Zrq>G>lO8@^hm zU@MG(JTXi@KwFo^Ma5obmh=&KDE&pU&Mvfvz3!MJHgR7Z49|-r`889e?#mTk!h>Hd z&@W1btX~o+>?ePY-W_!tg;|?YAt32YTM{?oU{bIW7g7xG#M2gd+$tEDYiWH;=1)sb z^bSiuNII$p^SEK4iPhl0Mrjsr6`mG{9uwyH}hC{^2?GVjNf&Q}EX6un#D5$%y#z_V|eg%V3gK#DG z@4o%FPzziCSA#xTdMEn-ey>=5GkX8y!1q52ofg!=+tKg83w$-$2|k7{uQ>nj0$uPo z=f_*(Eq;QP?Wr@+slZ;!wbJQ(}{diFH<1@!120y@9%SJ0b(89W<& z4R{NB^W9(_JPdr}HMkf6?a6-$xCHis|4BVw23`tOrze32{$UvBpY8M<6Hgs;RzJ*F zkfQJMl_);*I*e6JOy^_=t-?!yt=1uS11d;dXW@=Rce&WU78C;i7d-Upc)zX=XZJSt z?c)5>ZI3g~Wod$EIaS=QG>!+a?20;=zY(P@k)=oxKlSA7-=a{=*`ZBaON^kf27Y64k zv73%V`h4zZoVV)lcY&j5QY2NpUo*f zuvR2hQ*>)-Uiu$Vi&XPn($vfAzAMbiNN7u&(%&tnQLCkc0~5_>L;FZ)VqzP?LyymNO_Mg2o? z6b6RxwK{HcWdlbTV1+5$~ zTw5*eM>ldN{6|t^&Dw}R?q+p+un!*m{PB8ZQ(XAt!S1YHK3?Z`k7*?&vgF_n=%6FY@pT69a3 zeqk<2YX9CFZy`&|@TQxk6xnEX3lft(NGO{Xpg1L}0G;lLLCwf@I+7D)+)QI7r!!GU za-?b|E`*gCZn%9RG61wWHWo4C7P*q(m1rc?;!sUlq#t8)M*{(rMqmcI|(e-C&RcqDi|`u~3LQ1B(-htU65 z!Fljx@FegM@Wnte08WE118>6~@K$gqxCZ=`w+H+Mc7UG*&jKI725=EP0(=5Hz$#GO zzE1+JxjzOdX5Z~VvHG-*{uSVl(f{8K6o>y1_^;^yuLC~_6np2hlUTj{mXWnY zQg3`?Jm#T*8?n?*(}9sgaQG2v=9YBRfpUYjW0WS1Z%_*Ht_l@nNGHsOUi(6~DkbSM zGnK1!CuC+EB5Cr_s#r6Nv^nHVBNkT)9tuT+Kbm@sdS3nd8=B z=z(UKDCBXKlr%$ueQ9<)Zo{uMtLs&W#pgjvD;yf zX$_|~CuKA~M$O#HYre#|kXI}&zs5XScdaI;5)>=a3UT@3o7rvW#^d$T%$_~#o8Hzw zJiCWg-_ah_ZzkG3CQ&xdjn`Ip!mf6Y2fJN_nB595B8MJ4?#A6`w3_+QP{uK`{U|+z zVScTpn11Coa);}9m71df@>gq0F8$=n z8byvNY zg%;iAqZf(KI5NpD&8|v}yg<46c7ShAKQpb1IjqOVZ1fs;hzdPT@o-E#suDpZkeyi7 zk`4ojaYR_B2J(-iI82>g>tnK4d5Yj-%K0u#5dL>fl}wPm$q zg!Y^+Qq@~Pv*70xGP}ItA|yB*G5u0o^@&^@D2cOX)^Ub;vCVxA$-$1vB6GMErebS2 zIv_ANZqlRe^PaheT#K!lq;Co-+NsOJ-b3N{Qau~K=xap^gcPwirPG9!F&e2RjoPg4 zPMJ!rAt%)xB&fJ$GPSeDyERB?)wnny<+;#F=AayiY`g4!sBd5vRoSv@%t1qmR` zTMMB=sZ?;8M+>ro>Hl}35Z@}jI9dPyHgx`{fKNWk?fri~-|_*N0`Ek}zW_GCmw_Kb z&u@W0LB9|B{&V?XXZQU&SOt#)Z${UDHuzuY`QHT;$N!_~`geoBLC62M;5)##gRcZ{ zMc*HQK4^fCqVwMao&=r<-gpgh|G-}GbLjfd2hRhar7o`ls?&_>_4o5hJLdJXTYR>> zvBmS@^7VNyP&O~l?!+_86y!8l-8jT#I_%jpZt>Q(zg}ERZ(kog)ym&n-Y7p<`h9(o zq0C2rcm<{-u#2&Iv1Gha&PzJi|E-oBPBrsuSyor@j$~m;?3=-ivzs}ni3~~HH&rAt z{^KWs&%S1o>;>JE$D;st@!~}`hWdMrc5ly^&0Y|7b+kv4%N|55X5{FGX(FEmJaT%L zO4=yaOB^Qk`!h9z2tI=iG_#H6^WCvmmQJ0V*MyKy23p^hS6Xm(vc1f(iaeNDU*(5Y zO422jLTkd8>BIwYn#8Wb}EQ$s{=T!OezE&YGohg2$d3my3t0>BtyF($ve`r&e?-Ly7b2i^VQMGt6aB~kGR^)XORF6(t zHEgA%Nqd}V89oI58I3!vinG+@q!Rrmf)T&d4(~hWXmiwX_mic`P;7>Ix4nk@2g7mx zIP(w}3WXJ4@DEWQbEju0cz)=ZM7}ob&D2X#H5F9|df{`MG-TH?)sEqGZjhl&6IveA zw^v73EtnjjQAd&svohG5RsUKOe-zO z$>o~`$=D_~mi@w;*0D%9);RINSI@rU>S-ENK){I=%x_+%ilQNHXl{-1ozUvm7~3i^ z@tyiXU!IgU;aZg>@kNHU%<0~=96Ey6e7qZLfhamnG8ci;ZJFwfxxq3wYL@$aYf@bO z+=Z=VRE5LQJ8^%QkgivVtNsqHR-wF?*Z3!9S{ec9+LR=9$tccrPAzt-KP2@|BIYAv z5}8&33;rarr&;#ZYFy53L$9&MCZjqzP`ILxNpb2m@0MbTnR*r4)2&Ba8AIdedVDOS zI;x51uH`pPB+^=rn*%ftX)Q?ypag}z)v}7&5)(xYra)ip!LUAJc@dI0dlp>~#Xw!C zMbrqW)i^xAXv>>)0oxBDsk%QkE>^;4qICj^x80%ZF@?q})`HGdDVY_&xXV|qZ4SH^ zY%y#^zO3s`wdYkcv*9hrVt;hZ=EUBOHZ`eoW+2QJ4Br~2|L;LRUY4GW{(so(%0Ghc ze>?aX`u@woQ^BvG>z@S=1wVnl|0bZg{yN`pKiCHz20n)Fue1Ji#@`#k(}CjrJqdge zJHRsd9Cmbm1U7^BgExV@!Lz|*z{js4P62ow7=tgP(vt zqS4a*wgwOn0?4?KsQ5SP6uR2XI5~ZS8t5p^-Oc&GGS!TIU3xCavT{d_j_*A_E z)0k8mAP506fQJ{n+MVcTvc6fOZkfA#4~iC@4dshCueDGKeIqnUdzOAf!Yr{3mV2-i zwkMenR(Ag@T9M7HroOCA3&K*ZVbBtt)+Sxc9(1L1HiS^y5c4I~MYRBdJY1-mY8UlJ z{R$r|kkc-jrm@f3->RisFWAFs-0e3|hqMbEZ_>Wf@Zi=Ps>2h}Y`E3N>hPrZWN2nwCN*R5xvZ|(D0h|)bbAOaMRLcQkk$5J3La| zEb494#%I-lS;4xK_n;`mcyo#MgW~+6OybB28?$#b_NY%tw_)5glLzn)PrKHRYHFd_ zlfL~j(6H&^M$ED;x-**GizmIVYbZ=(+p<2pV~tZK!mrY9WIbuMg|J6@OJ?gjZW{y^ z;0IlziQ0P9aCXP&h6%6gb^GiX=?H=Gj+tynT5ST`+2u<$|6KlMH6qjdcAW2SR$kM! z>Nw0BSUf6is<3Rv;m2CeG~VcyYxc!LI|*Hg7u7~6Y$9%043n^Dcmg>pwJ5wEUR=}? zR(Dv;3K5F5Uqy8~8nD~?^NVJs;AhIp&(wWUrwyH2r1rnBp=bG>E8 z0tXheS6+TEbUCZygT98sq#h;*;)WSV|U(wW2cmv67;RfuS^VmyW zfH9ktw6utsz$9JkqQ2(_EvaZ%l)+%Ey*QelB^V=B>Oha;yqA!ZDVue0Gqr|b2?|x} zt%6Pg;ut&@U2{VR=G-vLdk0K@=QD*#poNO%%1cF&#lPnqyD5U^p1U$Lrr5&m_ru1* z7Ic4v59m|Ks4zI{y+)UU-?bpBGPB+>7@TFn?x5MVHOx6&7k@UW9^8~9Rpez&*nzAP zc|KOpbg*Z27cw?^fit+b*UIPJ`hA&dW_j=DS!H~9GN(1wb((T+-p>iAQptI8Qm-te z$9_h|6!HR2;npI_=O%qG&_(~uyi34VZOYoyUEgPMlFizC8L5({+#_ zwbz_+v9T_G<%dRKB^3U>WTUy8D{}*R7 zI?8vZ^%6Up-CXYa)ZgL_V?y2PQ(SF(G5zws{IyGrdjEN`*_?F8`T#vbzA`#Y7nZ~V z`c|J}@>|ki%Nu2Clz#smh-2LGGSn%S-iQ-li_~$%aqQL>4ZSB1Jf3A6X0t7bRZM!*%djA}FKRW+i;Njq5;Gd)C4?rJ$8ogdV{yz<7fb{)eL&tw3cmufJ>-&F&uCLhs z>)^rQRp|8c0SNm2kDWijx@2CEf#=)GaE)Wl6*eZ%?i_yh?w7G`sl{OB0aYZITH0Ll|7U#kp ztH@8IaXMcS$*8GGyh7Z+);np@iP>}r8<`mPVVWT=Cptwe>lcDdQZVv*(RJc@w9 z7E&EcpNKnonwEg8LNi2VnzX7>T)hsKNCgm)Y=hsLBU+xW%KCsBB9${@bg11+?fYyk z^U^;Ap&eCyjH=61gH;e*Ojy!7YYUGwjL{{Md%|AqAQ{S5n+S*1Zi*S%hiZg@w_5q~ zLV3wc1Vh(V=cDc2Gj3ObVqy{Mr+eyi!sk=``4w^C)Ve2oI<)6%?4_u=O5m zwUEBP=bd+mol>Yy9keF5N@t#!UR^@KImxM<0qbWeqP?RG8D?54$DBh!UJBH&GygGOHGyGR4xUNY$g#Uh%B_@u$Ekpv^TrMW0Y*i zbhhIF=cV*^$Q96eTKg9{n=F0OH{9AT zpI|k$<}YovPAcr2fqg&}Dcz>vI9)~!Zi^KfFx=xz2AkS2&W3}UICYl6hFJn?Ie%Gp zR`Ga&^0D>Ys!zciNEs+BppW1{HbgojgactBv<=RD)gf~l8#XV#)@0aaSI8`Nm(L%R zHbzRV5ZM50G=3ZHj>QMeHF5Pcf2x!E9fK{nCW|nRka=GE*__aW>J}M`&Rkr1JzyJ& zY%0p*ornqBG#L7m8JCo>b(<{tT()$YW zm&wkl|JL2FYxp6SqPqt(NpjR~RC4hN^0cUh1_=!bBi# zWO&<^&q%v3CoS#i+BqTozJ@ZX)T3B71!yeAGPZbDmRfX7$qf`5nQ{CSDGd8TvT3jy z-Dh?S(us}UUEUvx#v?|YtsMLP9I#MKLCfrE=@8WM< ztL`+Eb$`Sb7x4fQN@9O3I8)S%i1Z6T9S_(~DE~{x`S!2_O6E>~h61uuo!@@HQ=wJO zEU7*~sr_O=ho3&SQz(91O3`v*f$0>MTwn{H{rFJhNCzSlhF3gG^NjUOGwm0VjpEf( z7f-XjqMF$YUTTrAI=Lf=4<(||y6VG`+Z5Lt=C@hZ^}cOE+mSsrwHlkeQxx4MuJ}UA zlA~Io9)xKm>>q9c#tGb`a#k~`sx!FQ-->A}{{M?mfL|aznDzggyfXX-bpIa*cZ1KN z|9?B!4W_|o(Eq;~TnjSr32Xp&1D)-62|OJ9B0B$jz&zLqc7UHi=U)a(;8W=Rp9C)j zF9fH-N74D;2Hpz33p@_|EV}={1joT&qU*m5ECHS0_b2H59|EVrA6XYv%Z93V7kajQ_-Wc#IFM8$3`*ckjtA+a)%RXK4`UqD& zayfgisb=~}MU%jb9pOs67hFRnZ-v>^7gX(xwcG`tQvEIMvc!B@n!~$|>9cI)&I8W< zf|88~7J_0Z!-_H@RK9jAqCw+!>t0q^CAZwx8;v%YptmOIn(wizOSH1kh+c*G39b+ zDHk%$XHv&DwCN-B;g_Tfnfzt(;s_ndENK+0c?#JV9a?&*q!TLtBGxw)64K54DsR57 zSF{Km@{Qf+$;< zXdVzPO2X3vC&Cngh^NZyiKZpX*TQ5G7u&)j7qB4-Q3yRqp8jc+G1&yPe~UOZl0zX# zX>V>^SarQKCxPpU{z#WvgrQ}uhL5I|66qZzX9I?o(SHAu74BIp8q$a z?+2g%|BSx>8gLSP20j1BKo>k3{0zGOt>CxO@3r3l{a_lr58ZwZtb&Jt??k^>-2dyq z`_S!mp58OS7lQ|bSE1uy3w{#){yV|7po4Dzhv@WU@Hf5=pY!FvqA#^U_;fb0$oVbC z9XX-m`bR|IZGtPx!WzR^N*f;!23R$T?r!?H&I_IBRu;0#DV02Axov&CJDy3kK8^=nndQ}F531zWDB_05&hR4d zhoj|Evr)L5RIogA6{2;B_5Do_F+i4=K$yozQcR>qpIA1%b_0*oJ=&@t0%hn#oc>6b z2o7}X+K~brj>G?UV9uWQdT$S}tPu%Yag?5E2i|B;#4P%fGE=}2GTWIX({#xwE4{}O zbZ@=eC>D;008v6k8(!wf7*>AJl2&>w0U!4}b5F~>cp6GgX9xP5XU_OT>HK5T*bkq$ z#e$xw!aN59JG&_*MLXxcJ=nsA6(|*0wJ1tgDmrtMXsd;5U2|o9AF)m4Hldo>8HpH` zQ`O-_D0t)Wi5nDpm2R7+eelGoTS+WFcHroV=HU~TY%c~k%@coQD*xr10 zeS@h)H9UCg&;c_Phm(cfn&d$9!o?n?;c3pKU0fA2Y4{-BYU^D(@CXLN5f^3S<-! z3KCVd)m4hv6e1e64s=s^qXi3ZR2QK=!$U~RWj;4KRT9vo6JNGv-3P!ei%g8bf1H9O zv4)O-Xy=PIh2UIdj3EkUB?uGGjS$`>QSE-wXwX*}rtN!y1Vct9tn~pAH!mc|7ueF4 z_WRh{_H!1FAIn=$REJ`=zTnqya@nQukQ%|Q$6@6TuDf?{AO!KU#;`qr$XBrR+SPO= zz^4(D2#dBVUBBpO2d!D0wBUU!pJ7A1VPiZ#X&}o7+j?NBt2ZD^! zB`-|KYRmI1)^|Hai6eE=K*Qd)=$zsq>(9J%H)NMEagdNXSjj7IlF11sQWwaoS=f8B zpjEp_6AE&LgXWmP+muqVCzx~bNl2@(We$lX5K2_+%b~TkNKh zJ7|C}16FrmZ;g9>%$PQl2U!UA#?f(-rp%4x2TJ@6;PN zn|0UUIMy?N$0?y%-5p!!6~}zR&0TFdJ|tE`m$$AT*1>36#GvS=7<-rZ5byzT$_cwY zc1gl|yfFW(Qe}HYEVYXtqFC3?Xge1!1l>1QA>0WL#4?iP`VTjIE?R7!wwd?5!L3|u zv%-aha_(#ksZpb~1RNm_a4L*R;Y16}GoWJWbjs>HJQ3Y_9e4QZB)YBE=abUJT1dtT z+3vhub4KljofZnz7xkT-AESv(sl!dpWK(RK-z}jGF4vuseo`*^7THaj%N6F880u^5 z9O;{6>lL3u!nRl|-S2eE>LSC4NYXk}4|8`K!J7F8Gbgi3w-X{;?DCZ-K7PLY6IIuX(tTXpP=(oeQ+Qq6 z(+w2j7x$K(No&ulWtx*_l^aVTg5cKT6L3GZC$05GE%#qO2(ot+SqYX)V$wm(Fd}X=a>0Ua$r{y z4l9-`JSP!bf{+Wle!@=hQswT@iPz}OCR=rfdoUN8RK^j0%^05(+ah~+c%DG)+%7gs z+NSZ2dj*vQYn2@rR*g;<9`s!Md9ud+nln|_Y#QdOOV1> z*?fJ7h$;zK(wZC{oNX{?qK6KhSlH|@FYtyehp`npC}|_^5RJs$vS!EpRC=SJZ>S^W z&nF!~37sQ2ckf;`jN*8k?YY>NsbTSky?Yf>IvOTr_GoGaM0{jIBxtRXa7zm#Xe(5? zgG?&g`DpAeXLS>$83I(%6H!TXIQG^td7zHj>H9fNRustfp6N+UsT4S$7 z%P5#ESJ-gG0BK|=B9DNYdBAyR;Z*nrb{F}LC5tw@I|i&UXUR^2LdlJ!6RaF3(J@3@ zP0j?=tu$RhJgBVZic928k;*Y?nZ;}YUZ0BKVXedNLCCFFFnp)}7I_Y6;W0iTxpP*-NX+M-p zXu=r60oj#3?1oy^8F(7td0NS;W~`V`&=`j067{Rl-XvoFmK=;X>ah{Y9f#x=VB>QrN6uyiW|LU4wI=IWTJoyy_wk?zD}6Qwe;S)gZ!?wJ1nPtkq9 z5j|M?|ANtFy*0r?pG59|o92Z|-|HQ;ZsCnzqU&KFn& z3*aW8_<%ZVP_~Bmf^P>a;L+ew;4iQ-{31|%fqC#((D8lXy+APrMAvTs+kDik^bxa| z${Hlkkym0kUb$+SBc5*9kNB)2`}x;7R)Vtk6*M-c4{XJFV%Wn z>EcechQ(!-yhXI*CpJj3aFA5!a>Of}#qYVd2`J zdfGfRSZmW*Iy0doSfZ)h-L|ErP~9P>tC=k6oHj6|;7n&R+l9n!jO^u}X1SZoTd7X@ z4wh~6*365N6(U(AwP%Q^zfHL(icut{N_oZCc}p*K*YC5lh%RjvhoX{#ZN{%=U*=W| z+RH3oYd2D0M_lE zzmm>n3A=?A7hbedt>ZD!;-o-nTM$Y+8Gu<@0o z#c*^dNikfSln_xdLsBT&9gDb!7#YW=1$(WA6`#b8?a?wrEVkv9FvNtxTZ&O2u2eRh zXE~jpj;X_7Eui^;-jmZbzc!f!`R!!V-{xS&q!`Ad6uKoVwZH% z-lV@1rlv`cO}uvI%$4L&el2MjQ2%je6bFrHc$H z`tcP}*OhR~RAL(Wx*@@dN0@~wbT5{n7OK88a|@G5jVSUbyJuyrHKRskA$i){q?!!Y zJg3QnHNTw<)->aQsramN9))_A`glaT2T@(`%3f57^QnQ zrrAU??IJ=#ItgHQJDKguk5_6qi4B%igh@SqQAQZ!oRT*1&6fW^DtWCV9TolmxK~tv z3tj&=!M_CG1D*};05^b#fG-B0KHqgU5kCK=*$acokR$e@G@>@JuiT z-hjR@8^B>O2lj%$M&JJ&_#^Nu;FrPM!S{k2z{9|Q#Rl-x;4R=Da3`1oj{^S!8^8;^ z9pFRg{qF@Y1b>CT|7+k4U=!R59uMA+-v13?9qa&~N6-IT@CV?Vz$3t`(eXRrFn9|1 zb##2~J2(k$1pf-Uy%qd4ke)9(E`#(F29Ca=Kg-2;$~|>%IOHsgTDXhR&P1)~*Du?S zCTOG+N$kVYDt26W2}w+~0&6LF4(2=T+v#=#Gg@JQ+E-X@ks4wKrI8m-@|0K-(gIA< zNtNx^_1Tp@Biq;AedA0m6i?sVSsSwJoXIj7dJT#JdveaWO{|-%ou(ChceZaj+nFie z8ghy0gLyt~TTpojZJ89sG%~lBM)j?dRMDrlC`*YV<$NQE9-Jy=O13!3|xE9{w&oYqOLV2ZWd!x0+RP0V$_rxnMe)+-a zt*N&wc904Ee>Q9OM$QwpFfdq%lsElkEAO=LX6*maL$PK%v!lJ`;lM1gm1*8B^2`Qj zkd6!1TvG#tk9T44w;HTnUX+{stXp_#*d8L3jqyG8$xp_TgcPe+bZ2F1-p495zb*Of zsi$YH`}9>xeAlH`sSAe0SEf#cRi2jB4iAS7{1EX}z-?}W=>3|(;Q>L8JF0cAMmCid z!`dpnN*}C+T=Y$sQ8&g6Pq`I6qf|ze052+97D=1y?IN*qayI3tr&g%xq41$QSY5Eo znTLwVuX(u^9?sz@z z+wo;8WK!)+D^o2`CU3@U89%3ttZhu_H7iqE@w1(l%ZjT&DU8^@iA>6y)`XB{JrJHMc$vea6 z#WUVlY5_9S+@YDaTxJyc2BTXJcaa-Y&ZLi9g{N~h!naA%xhM5iGn?eihBB@&85%%= z0G{GhAul>U5>Pq{s7>!V*KOwJc23^#8`J{#EB1!`Hp3;iq{o-y!<@@rsAT>k-f`>( zjNF=GMwxx4X{wE|gG{wb$mO^(io0=T?P<%pt4Kq6acqZLP8uUgNFHivj*Piuv<`Vk zy0{!Di}+%n#v@lL?n>`h(4p>@XoGN zpHe%rrDQm@3n65DM6OrFZ-Xh7-L@J;$?p#9Nj^$3=Bi)%tV}(X_-iacu61{zov>_t z{yzE4SVa0Hu6TXdrPEKSFALDb_&pO8#h`UB-X0oT&%@a(PEv59-di;;* zk|Uk;Fq*_C%yoXYW~E8~Ady-6qim1*v$E}uCOkFYSL{>`J1t(Gm_KB~k9*fe7lsd= z>#pi(N+P~>x=R~I)nus@{xCeRu%Ti&xp9p26byE~eX-9%>+*RCttIz)6?bR0fA9YN zyZ7F_d;in3efwti@1NO!^Yp%(h=M3$?md0W{{4IR-E!viQTWcHRS9CvA3J<}#zv)D zGe2_0gdN25>=e`A*`6iPD-N2; zkIrW0ewgQPFO)67jFR#tdm-Tso^w^#&PqBeNfqsAI|I|>+qT)tKWw6BXB@xf(kIF? z9BYyq933GwRFjl~rWKcAnOaQwnwX5;E2L0y$x903tXUKZ7FcxYn_!_xo#~<>(R}Wr zMgcFA@x^U5L<*v_7E6;4}^m;Vv)y`Tp^ zi!T3d;9gJ%e~T{v-@vbf_ks6Z7t~a=Dc|p# zcFs~hnDl7%A%?(-xZ4zK6po;Ae23&Gzw0)olEUH%|Kg-#xTmVKh$i1Sp>Cg(RVB}^ zS_^lpmL{VAivqw_ud(Rk%b&0i|GKu_%4@Ltt}*`VWnW0I`xg0MTDGYI){tH6hJnOCVsik%BG)iQwtzVqZw6D;0lL=(DtnVkGeQ~!Wb+s@? zVn};mJm5`YUhMyVE-fg z>%@vR4DU^>sYJHA>qEO#>uQIcJWQ+oP1#JHAZ7+4b%_2W%f4-Pj#ZJ|T z7xYAI2G>m{X^56fkvK?SeYFo}g?yi!DKs&?q*>clcIUSfVtA-2`Sf#Hv%|K1|ABUj zYWq!WyUFcUZJAwh#xB&&f+04sxGJv5?9|@I{+k03k+${GNU5 zYm3mP3qFC_agtK)iX##t3 z*(SuAvPnNxBI~Y=qol7T-4S(w;2&w4Tg)Fh72V7D`OD*zeFAuePMn7(jgsxVt*O}6 zs+U=Th*xC?s^=E9-*WIfBD!tP$=LU%CJV61BS?}Qe<&5^esThz{8cufwh5nV8-3x} zfG79^8U0~u_o8+tm=At+r_4Ql_@We^RSPp znX7xApIH@W$KSqb*vo*<*_zz`XNZ%MjR(X`as@9oY@xh)N6 zP4=9!NX6LIi3`87nZ;e=uHuLQSC$AZr(q~w?+Sd zj#qI14Z8n5;055nqxb&?I1m0Bo&PQ10+64-eEdI$e*ahCqu_hN?cmSQ?SBpY6u29# zgCk%!cp~^sbp7Xpr-RR+=l?i(CFp}X_+s#K^!z?}9(V?r1@iU(2)h2ygXe&G@EGtR z^!*0$JlERm@NPCq**>UxycgD=8u=wT^t=99 zY)+-A6$32x$+-hALOiEq3>5dUxP{fd;g}aoDsI=8C6H&vg{L-OF8Nm;0N;Cc7B!CwT+QBAUVb`2VHCEjeD_g}>0+dmF)m5&RrlSAd zg6OQj-QXw7$dMRzyI&;9{ZjT%q32l|jq#v<&syW2)pl6*+Qng++N9?82D^gz!Y-aT z74|7JA1LCcmSKt;Jtrukm9bQ)F5NcK#2r1tN*@_Imz*gIIOSwKc7WsM(#Lb#QK2Dq z$dQ%huZHg}acGI0$y%wa5MwN?^3s-sZOn^AampKF!id~L?F+YKmcAdTeO-21Xv({p z1u{w6KF?Yu)yL86*kC*#H|;r>uazkaUm=&DJUM0WqtkUex3U_g@K5E1fZ`WCfC><8>7?s--n0TmD$RYvrqx`?a1b z_W$&~sbhnfwH;%YGh`RdGd4|reu?c$k9~(Nv=o^BZes}Q?|GFbiH$167B-Dg zKBzL4r;(jS-qL<`i?ovGZfnVAyjEZDmdufy+NYC5qAQ@(^9lC&q$xt?`d-=Le1Y;p zjCM7eHf1)O|C_safwS`}>;1O^f+CS!X(POFK@dCrl^1uV(qk*xb_Cb!Rr#c}rzEg5F(> z=9YzDE9#26l3r=|SH}8mAYj1KG|nQ?>Kp6F%51Bs&w{?yz2}Qd*`Bh|dxLO4DM!K} zwKR^WsLUojn&wt!HxnI1H{F@LM#~D!{ufKq)qlHT)@9e9K79G^Ya13HZj)Wwv9-Ic z9G5Fgr_yYyO^I@+ALdw&I<4GKQ`iNIPBAjzL&n`ud=L$>{>@G~!(ME4HP>*oURTCV zr&a}@Cua1*ajp)VB+#oAhnx)&7vt}|)D-gve~>89HFB8938>TBF=mr--vr5!luYf| zJKKuegRO(T7mvM{e4j`wo)%$s;LBvkML3*snCUL`-a+`OU{nl|aOzElm)Vy_-egrIx01kfUUNrEY` zTsW0&_$VKE;J~w3^Ssjv&ER1OsgfF_S2qxBTG0uqZ#S~qUyxUJMethiYs z_T6a$-F9RBBr+ z&W3L#h9e2WwjzzgX!jfg?}2(JFP{GA@$1WRcWxOuZ_Ll7_+^JMq8agp+(E&|1JXTlH}M- z{{J9y|Es|nuLF26-#-aHgAU-0;J<-u!8PD#!JWaKz`vspxCtBsJHTzh3(yD5flI(` z!EM0bp#yj`I0}9ad>K8!9|8ILp8*HJy}&)ezn}~F2$%)`3;F-U;6vbc{a*>54W0y6!Nb6}kpI64{tmntJPFJK-7oM+^Z=hh_Wv#LOW?6!4(tb;!P}7i ze+Rq@JRST1coFmx%^uSv|JM`(`41X|;j{n1;hlST?cdkixog*+!-sq02d><6Rd3H# z*YvL0c_>NObSV-Z^5QJ$r-V2Hi7vv1RPwY-)FWhDQ{aZZ4`G$SMa?IBwG2Xrw&V#PpHC5yKyw3+>dY`;*UpAFi39_p-TSJ9T|r%Rqk z(@^iGv!^XpQJ*?5TyM_ChT`<@49&?!=U)o=C`E&2i(AiXLui5Bs+-M(@H^HF+|!vK|B>^cD7E+bJ)z!qZWvm{ zxPEG|qC+8?N?J5r^g12zud;(ykHe%^GIL9qD0sS8 zt~J9~l)OkQu`IB1+E8^4AGI*s_gon3-fAXXhLrkd`o|Q*#h~y;y~n~6U#hlbm7dt6 zkRW`oF3s+knk4q4qH9~M6w`||zmTdXmG-I0LC*=Sz3{*@JiQo@eW|BAS9_FFuH7o0 zwlg5xh-CdjM;bG!3Fzgb62f@aIAk|gubw&c4ym3 z8V(_YrCHAQh&C@aFtMbdIg8Y33cX@118 zv+g*7xTE-(c)KiB)9%1J?6i*S9@Bzm4gze%NaLJ6$+W`|IlfKiLCQ{**FhTB(P z@5uislLh14j_{B2aZ*6;NdGL8#FJFseQihehat&Rx-voC-DOApr%fjRcMwmHdHH{@ z7gb+_tbY*b4uEeU*MA-S19&lb5m*Gf!8eibUjh2yFgORaz!Wn67m({e20jYj3ueFo z>;>}u59j^m`#%pZ1%Hi<{~O@f!4Yr=@E6GUe-54wCcsaCw<7E7Z2z6XoxppL@sEH< zfV+aPBG-QuTmyE4T_D^g_=m{*N5CF%Pw+it{J#Xx1iuP&pTK93@jnbC_a6lJ0B?jw zvir{h(d#H!_eX0)_2Z*?WebaKRLlp{%~X@u+2xR8ct)~3puRjNeGvXYJsZL^7QJgD zXSD2*{pPa>t6Mg`Lm{jrO({oj=?!gLca=ue#kD{SY^_l!Wvq8fgS2VAS)xy|gsqe=Lwzc@a?G3ya8;|-?Q^yA@RWcbpX|<68Z$GY06A@0eAFUJVC;OOn98Hpj2n~Vm5k}Vwo4exNZ!Jwx zZR-Jy4bmFxv|+p7zWUH!*OSZVZ{M=TIvnoYs^vIA)8uklid_d-QoXmS&3{d8bw3*4 z=LU_NMdj-vLAgii7k$R;r92RvIiIj2pG0Gi5>3wuQq4_wa`dxWOG`n3YYQ}c-{9Td zpEiiL#sS2pS7wS$Tlo_#YCbe+ zZR1!bF2tPTAcfY3?}k>7df$dXR$jIHhd0NsxS>1TpOwZmSDm3#o0`i9M5;)F1gqX60U2bx)P(cUkyOiTG;#`&4Sl2dHd=AUd&^=eg2bC6?b zy25Xzo)fJ;g3LQw^pzZ5k)SQif(0S97H_;Rt=mJ^7)mOJ<+Vm_GzIm#{a9JvhDD~ymFsBFHJJ(Xv~sLRzf!O+ zG=XrYPv!BUA*QFOZRY|u#-mt-=tHH5E(~;c0h_CEwQfzS|KXNe(TiavJ-lbDR;PP} zYbvXpX=n68eKKm`x-P|45ATh6+ufrM$34PfCq7D!19in<6Rq^gkp0-^jjI^B>@x)*774iD9cfC* z=_66CuHM<;UrZBUm2tU78{P@CJf4_SkXp1v<+Q0hq$8n&GCkyY~yc=&{-Ap3K z8o8Z@*V>D;Uqimz-icBNZE$$vGL(kdRb)BeQS6{-(Bv2wra$TyqB65aZMppa?}HG# zELj%&|5aX~mH+>*f(4+sf3p355_w;;|1#JG?gl=Gtp74_3LFRLf(L-Rf!Cn}crAE7 zcre%w?hC$&?EkOe7r`;G8%Q6ZxPPw)PXIGu03Hm!i4Nf1;9cNKaDVV!bO3Jzt6&OD zf*%7PLl^L9FbfWYZQyR;Tj&IC0xtuTpbstucLs_Pa3OdQ_$lx{bOOH#uJF2nXQK~z z3V1Tu3%-FaKym+dKHxjh?c3nvKr}QziqT4eFa?M~i9NyX2Bx{|c|DWkt2Mg277aK2 z%ncyOOxz(SQ)$|!5##aRH`{D>gGDP9v)`$&EZe5Iw^k8b>skNh0T%DgBLoXYx@b6x zi-nax9Teb>S5qhA6ehDj!+LeTAFf-^Gw~M*rZY%c-FD8%WWuzN-jYDRtm1Y|$&1Z0 z5o=RAM{2VL90<-qq_-O|EN+<*&(#a3zB&0Duzw*1;)P%n?Xtb&{ENW8s7SF%>i|@@ zod2~2KgCRQUcc*!m{#grH5s!7HLXzc0OuOXS}e3kk2B>0GB!_olUTVKu;REecGZF1 zd;H0PvDNwGh>;v|^*(&z8C_>SBf8TKtNYTd!r@z|LD;d1i^0s8JcLg>*J~^F%yN%# zL_O7l3PhJH(^?dcSEcIF^3hAnyoCtM?Z-YeQQ@q$WmJw?-Ll0WpO&M&V}wOpLO>7h z;-@3|vZ}K-XLNTDum@YjyIwC^hP!j(G!{E!yDcVF@|uZx+OE+)y1430gx0Jyg4V2C zjQyl@ZYQ_~6prB-WRcJ(M=}Kx>d{NM{Y|YLX-<9J#%x@iMTs!>I#^C=qe=>?a0`x& zuBC^Za<;4ep1DdoLWLAgeQ)SR7kIZz4#WRU$cmJNN#(T|qqzmO5vuYwGtxgL}C~rf1abeM}!P4$@LOQ%s->HP|l3>vYX;n%F z+klq_>IXTSt{hw560{h0Ih`D?R zzzMd5?A}As(Ot@}HSRd$#?Cx*0AeC<9wIp75V^t)j?_cO1Y^?D&+N2Sk7(N+IH60dD5zqD5N{H8dgo6rriC#X? zInKFpcJ;lO1qRD$adsTC_&=9rrA$|E%5m}N8w5i4nv)UTzi|Z&;r4yl`*ifx|B$zN zoU&c+sca1(Ynf{^`+~cdBe;Ruvzp>6<*N6~zbwZza-5ynjiQpXQbI9lnT*is^Xs>88OD7~o zxzjn!j7`#Q5~?hCrjFz24^5`|!AV=Fh(~Jn|I3h9pMvZv`G2pMUEhMd|0r-VxEuH! zvi`?`;{5Lfw*$8YlKm&ZCE#<&_Rj+6f}aBa6)5iCQ^6^4Ik*paH8TB+!HYm2YzKD& z{}Z|Xz2H4SJ^+|evi=W&Hz3ddHW+}*z(E2NZ+aK z@N3o{rzJ^YQ0nt3OWfq`67me+S#mEsdPPi(5^uK+l;y1RC0cqjqQNxOtlDakFb$AI zjaGP9-*v|a{hGbkkn7@HcmJXjgmoJjhexvXshmyi>QVES@(!PJ{Cde-8UQ?`;b71d z=|GB3f|ujA-t(@C_4`%I?ohea#<<&6>glXKKHP>H)RW<(n>DQ!TA}Z(^{7j-TN~1n z8yZ{9vw5a87EgDIiu@t5k2Gzm|ye~Tb8v-+QrpAX9y+X zra#gY4%A3FhhOwfC?1Z2n2Iv+4ra00Y6tY(d0}_fgA1@KC*ndrV|&$XcgVs=&fy$& zs28VuiA9f52kzAsl={kyKk;lH_aC#hEuG|c74Dj7h3xb1;zUPT?H#9!R-BnIHe1)& zi_^AedBNy#5c%4JnHjYDO;)7R!?UOj-CUF<6U|m*(`!Fh&hM)&LpmaBd+L+=#IfG- zXTg2PY#!b-l~&$j9kuS>sW-s)Q~kkOJytJs9bHr}MxC`e>lQjwc5L;Wn@4uqZfm)) zsozq-{+KqsrK8Pp!*>g2+0PPJ<9iZT;~$8pE#)6R<$LS z*%#b(1{G6rFFUJPd{$Do73gY|Or50UA!C-#QbDwVMN8+!T`3mS$c!`IoZyTxMb1g$ z4dr=OEmkWZEPev}@?9%jbf@B{Aq~s6v7!#KDD4GDO46%V^^v7t39yDuh;ycjf#YnP z+qp=?)Z0hwu9}$d+V#0EQ~E3O@EJqGcCLDD)8FGIXIc=+GJ8U&x<}nM61z-TR7j6~ z`ug-&-edb`Z*W#8KQLUj<`fv-3>!IB;5aQO-0_}KfL)6d+zK@e-!)@Wk%HAiHrhy$ zo(RMM)Zs)k;Lgp}9+pc<6#2CzL|Miz)R34Y3!bW&8 z2vtLwI*z+~aL^rT!Z%PAx2+%DGXeXp%|7r{ip-%eH$(HU)y|WnCSt7@@-R#!sbYa#w zu1h%9pFipbyAfrR9A8*{oITV(J*in5%GBJDnPwF=w8{VXLWbS%<^Mfio_!-S{~N#n zOoId9%gFoh1J4KNg8z=p|8}7J0JZ}8{=W&C|Bc}BUjBb0^8OWG_Wwb?pXKHKPb1TR z3cMaj&c6-N+5RVkZQwrOi^%q00KW|G4rIrF1G2qr_uIf$@G)fi7lIw&)5!8q1y2Dh z;BxR?rfF%o+ zxT2|(hWKYmcv2!;$Vs%sv#Hrh7Y9dF(J`16g$K)OSwS+cA#Vfi*GO6zQM zGH1opnjRP9ETLV@ePUsLVUD|KdaA_Mo+5puMPP%Amtq-E7cJ(_@hrTR$34JN!s4(X zCXq?G`idlL4w@AO-qSS^&0V!dSMc_KR7b>$=G~A}ZTz5Zq=^RJnP0o|ZkBzH;A_v- zPVSZc!5brsI)i}(202>-RhnIIn%(lIg)&B-D0^U&HYLAkYHL?%Mnid%xO_qpsNcw2 z-MmP+^_F@|pRCr3o8v0%vIzC{@7VC709l&RuuS7JGshiC_Lmw>*jPW#T9$K}`S{4J zJsDJk#;S!|bapq_a70VpEakm!>M5Hac0-@ydRPdmId;!%+86<{X2dJ?9AzZb%&g8^ z^hgL`*Sy&d$^(S6Y5AFE8$5pwR;C&oM#ZFY$Oa6jZ9+gvo0IC&UEbSbzkG1)0LU_$ z4_HOcdwO?fIaiT1H8Ki`POu1nc5pYai2cqmLhMd|SJsZP%lws)3rjNiPS2tuK)a#? z!{p2+j1!V7=@hSnvmFBrjD zm{{W!TZ&=ps@KoM0@`UGmMFZ#Q*g9(?Rc4TAR|}o@LjHbd5232*+>A>K;|i1YMMS_ zQpg9H@f`jfF-Kg`vZ>YSez$*Ov72%&r-8)np8DY^BeJJjGmPmA3y&V?bZC`m*(U}T zSW}f(+O|&{MnklFyBXvb@em!YWsTr}7@N6$FHF2tsn|fBd|)(l(od^~i?QYd%%;iZ zh*!G>jOk^@fZBsj?H8{NLYKj*`_*{}FpXh4c)O(u=t^o%MRAfeJ2%iNO}Ur@gawPq z#zi!YFJ}UVD@B@nI(B+F1V@0(i^ozaL!N7iUsoy6UJ-gz4VV{?S>@PY2);JOsI=n< zm7@)=ddr$Zu$D`l8yGASp26K+eToW3Q`x0E$9G*}!&^z>8_RNeSUTlm(VNvIy<*0& zI6mrfWH+~x7Olugnyg-qSR$WH}zd*;tm%gP3{t>N`|B+S_O_Twz~Wow0^ z!gr=Ttmky|kNu3G0<)~5T=qA=nD0-;6(?-qg_oV`p1)J~wrhpvh40!F87wU=$V}ia zxGmDtt;JK(daPoqR=#DTb4fG~U*CMtEZ({v&6Nt8C{7-BLkHpIy^mZO4tPiu%j8uj zKUD3ETI}R6)j%`RtK>mdIukaaj?@Nx5m*vr{Tn#1HAV*_`4{KIASlegj`bT~G z=);xnQgDH?EKE(-aCS2v?nc~I_Q3O3qJ)!f>YRM72c#~NxX0-^U7}JdngY3E)#?PQ zWterTr$9qb4sX)19!n#(-ce;8H1t}br}WJ%^+(DM5Bh}uRfmboBG$wbCm0qurNF)u zB5dlcP#k30zN)1w{+{SyYK-(iv(I#m1sL$c1%%XH9~`+a?->z(jHfnZt;S;5jlt{``! znMCSLjc|gy)Cb~ybQ;A8s!7qJd>I+=4?gUaQ*XtvNkKEmXkj&5B+%rn4jE}whGH~I zRfJRgNS2Q3WLH|bs%&UKpnYs*WpSC{9Y;~Qu1*pe{lpf#QEGW%W@Ss%^lX`&TbSH3 znAvvW)I}FxaN*XeL4R<;TNT__<2^bZ5MCbws{}_zpzrVL`ft? zw^T0Ul8Ryx1=O55)6p;>tg|j%RLI3VxGrIkbPTTLo+ox;!6)rr3O56j!i_=d z!VcyJDn9iq%z)PtnDOa(4LGAMGo$RywDs&lAQ$ z*r>$M1sy9RmpamoIvoB`pc(`qGmpYoppTBdP_eOS(=aFC&e-12QJRvmCY9AE5`f$b z<8QX6#Wc$st05<{RIvo#Rx)HTqSC28$eUe#1_df7z3Y93d08sfa_ox!`p^f{eSGG* zGh#5eLt9Y2KU2CzbRDb*JH}!?F5x=9pD@-6 z7bJDddFP*Zv90>VvNUgy{_Gs5)|*MKGVF3b>FZ`;)^yB;EkoA{*a|U6C=5H|ES-Ee z=$F`02HS_Y_gR*4?(bh5o}A8>TbQ*9ek zcoM$Zx-+Q?C4n6dA>w9Om3+-I>!n`Rb{P^^_Wba3b`(j>Z1P5m12xPt-{_LNv?z@j z4dqLz*4mn3^8Y;$aL<>Vi~Rp{UfBI4^1kB#-4Xm7^8S0kQ^5t`4&Y;`uM|A>6An17E1cLg6u#@D(2HuwT^{`7%lfb&2bd$!| z%)bxF<}X|S5g{(Oc98Bw)s+ApZq@`BKJbYXAEgBBm z^e8T6*;&Aq;x(!~JY^EJR01vo#!GqeJJrKZhAQk2U1KIgAHN_8N1=kvOSh|b7H^$M zC=5_MEMf%q<~7-AfMQ(S=0v&5p>{6Ht0JnuubopkAhpf|St5W{#0?dH$jS|yR-jJ! z9^N4gM_WaQ517hUj2C{j6A!N-&R`j_xxmq0bSP6W9R8vt6V2n$srTTz>R8Ck;SGJ) zEfV?Dk?r<9l}25CEa!POkF=12$P}0{r!mhR4+qsN84q#n%rekC;~t|W9j80{Oq!cR z|57!}`Oyor^JQm-vYE0Wk?AyuAK-#&R+~`ve0aS%akb=ZuyVDjIS}O&DGdK+GG{N7 zFGETJvP7qzoTuD%Qe2<~aA<{!<=-PGO7>6#sb~J84KenVp;2sdiPctt+&sbT zjZRZG&3)385|SOsey@zox+NiRo&GJ4kXH4_{BJL0T!!uIs)ZHXm`oID9`pH|!>dK5 z`(f0$?O!(+S@={|i#c0xnn{r7uYV``b!P04BKg>+Ojt9Y%NZy9E&I;a*r`ag)f~g$ zF$)b!Tf`cCUP9vVZ7z!o#5_`@5vt(JwV=A3J>pcYn5UU_(5^zBz?j6T<{D%f+bkAW zP%UvZ35#nBLj&^fjX3L%b$t_>Wu82FCUu0RrFnPHU-EN$eQZH^F{i3Ee(1W<3a>PM z*9k^G@<>0(Z0@XZVV7w9d;OIqMqv1#zb3a0{YwDJ6zdinSAe zNw@k#IoSACNEE)SIahAd$a(a1cSg2lm1CHyX2Z;!d6%7QnlmFrW}=mqB(*k0-WmB0r+(=0eWBu*aCh8dKU0Fav%R{2RJ}FM+=SF9!?YUO;gHJ_v3EzX`4Zdw^mAw!u$;Z=oyr2>3nl zIB*2)0(Stn2Z|MN2y6jea4+ya^apPP4+nPyzljdvp+J5F7lOB=KX^TO9e4^jALwqt zXMu--jo`cJ4*ndx0z3yi8yo~*KyUDQ@LVteEg&C)li>KZ;mMDS z+qp8oY!PQe(um6WRLhCZr)fcPofLmKUs1ysW2) zRpzN*^jW7)BNsr{$A8pGBl^{u9H%*ezseOtfrG1yn$S;2+2J*ood^}QTt@ib!g0F{ zpx)M;_ua|$V%#t;(~UHB3`Vac!gw2P=%S{EA)~i_x|y*!Q{j#oWSUd!>*8|5HJ1TL zvb+vn0WIoJy9qOQ3S%Jild8?&$IsOove$T(Q_H%P|E;lPWoGu0QZr-w78Z_+Fr~&N ztJC&&5xc)S7|yhPROgMG+NYbx07-1_Z_I2O>Ymy8nT1Xox6LmzvjTlh7m;y4%-04# zp47nnX|>v9J3=Agt5SVw4WT-#+UV@V5YD)@AQUv$K1lVWv-PzOiPxy@Ip>s@*~`u% zgPM+=UbnqH?iWA}JS~PU^K4w0o_R*9*iG(?u|i0gpGs1iAF{BUObhG&@V4v5zcuD*cziX(0R|-_)#l=V|()@X5vbukM*i8)!VtsaA7VjseTpMj`P7|R&}kw zk$o0HZg-0NcU?+7YC2e4)#jvOI5j141G)MH1DneVo5%>GLN@hT!Roq}xJ1m1?Mkb( z7F;(B;Gf(CEo7{0iT28w&Sh2YOvQwl=A7!GPpckPD5$M(tx#coORiSur#-o*6miT_ z@s@hc`AiTt=J@PloNl(sXz%i!=4?*P^r92Jy~}zy>&mdd)ySq*L4vqgRL5ClWN0L8 zcq8q{W&h8NjmNM$VX0Va3uKKPy7z@MlS6rm)KQ6ck)$juo_ldt#v#0mF$K2fxe$`s z@L2AKmEP8xQfsm15_1-dM)UqXdT|)a21WvSetbyZs(PGwj>|)-t@ZDgfeD&(s*j35 zHD-c+?DUje0FTiJU9%R?j{(lbK z1N=EM{hxs+fct|tA-|sn`+(&6uOP4g33xO31MpOE75EM^`&WQu`Bkt3dkToq;hK;#kMagQV3 z-opHmUEb?59v9iv-ghYeab33WjmR^5BhA9(_qILl%)<(8h1{DDu9*8lPaczQ?}mXh z&xKT4h*4N8i-19Ja4XpJ$9eq-e_nT;!fa>)r`PcSz-KQ%rCN;=m}M;Pp`L{ctWggm zl)b3JYeiS1sJYm2Hs47*W{)$DznVPD-Vt}N4)vm%++@+$IKe)}#Uu*uZbaBQe`-kk z(ajYB+d_{#AW=zk-u)$(WM&o@_t9 z4qAEeLh-CPkTE7_QUMX-KeiW>TmsSZ=%e zrC{`iZm3@45^6teU;VlE=^!||HyQ{lj%eeVY>=uJ*)H7D-{CW?WeoOPRQDrG=~?Q2 zLKXGBp}kXAf4~>HtF$x(TsJfD=ZGqTK;oU+jV>AnNo_|j>V?So_@1V(TX`YY;K}~d z(rn{tcL*=8j?)6m_;+ELQOOO4OG=BH7H4o$O``>;C{o3*QLGM(59ZNq_MMpNd~k_h zCIzRapPiWGWG^a5^edr}^(k8vY_=*ZIaRSI|Mz&axw)IG(0X$dm#O;uBL)`v%g;IO z4T_g2{mUF4Wr_C7U7ABTE}xp`CIxj7V#yRougs~;x=C#+>PqKdzc4HRC!I@}n}|FU6htEJX1=`=cy6;VCCfzEDjk#aa~CU`n)OT>31 zDUd_q&S+n-=`&-eW$--%i;$hslyNjQhbv{A?KUvTL_0iPm~&+ZlKuZt$ozjM85a3} z+{^zTM&|!-U=ADy*MP4g`@bKo0NnvN0e&3(4f6kAfj5CSf?YuN{^x?{0NMLrj}BlN zEPZ54bP5EBGe*fPV#V11wipP&PHKX?OpJy6WR-vFy%7q}Fh1MUF+5*@(@!E?c*!B(&Z zd<(t6{{SxliX}J>IzWB^??)f-B=AI_GX-}AH$l^n0nzpYqVKKx3BzS4_diuc6z>*e zqAMy{caT*a8~-8z57)3@ki*;Pobv~Zr8OnIB3Uyj0i1fc9c2C;_FlQZab1iZSr7-O zlI+kZ`-(}qViM19Td|d8XpNdqDzGkbgL|pU+a~*EmDk83y?NfK#-_HZfNIEvoXpFZmno{&hr$&9budQZDnDlKew^m%5L%Y z(Clh^>UDHvN_G6cYPM|@r7Od#TD5W*iQ>rlkTD-i+Z-itCf!EXu3DW$sm4NWeAC4Q z9#Xd>Rpv2tcATTKs#$Uh5Y^D?l)LHD=aAac?czYVyrMG&+p~Fn8FOfG1JieoGYQ3z zo0tfLcY=rw<4nNqItadfqRU|g?i9^yq=-DoROo{_Is?SASN#t`)|sY6|p#iY_~Ye>20>Oi0h+it<5 zX-G{+J7ERH=R3oVQjj9}0@~8B!X0j)KE!99+o(xhbFJq5?ELl4&!ec6-t4DaB6)3YV|5Bau z)x!~>Y)-8A`YB-9s#z|IeVJdZT}FeJ-Uy!!&MyJiTDOXt)`zh-?iS!yNT+YpthFaB z&#ZoKKBpCrsaVo35p3<%*k%obS<$vs7exz{TOl#V8Jd-~n!#w3n~7knotZD(7&c`W z(;y$|X*Ou$YH2UliP}(h_cv35Ry)*g*Uu{EtVOJ;(JL-zo^TVXA5vj8%B285X`nIb z>^hs9Q)o8%cCs<0;Vd$0p_hXRs!^u4tuTqs&P{^n**R&gK}~dYTY%ghmY|4bq!93E zBvUKaNu|^!>W;8gq|GBxGaNxjEL)vhnPuCynwdGg=;AgeqO9GiLM%Isr!*9?>{{kl zeLD^8>XNyIu>QKxq%|v*8KVw{#|;)a7gyZhYh$p z7b&`16EX6Iul#q`Ka&3up<8#7e2VQ9s_b|90TE;5U%je;xb`crh~jVeo_C z>B#I40gCT`3Ty!zzz+k(^}7*BHve((7Ub_$a1_jdE5SM7Z;`=I1I6|GIPH5WSOR;% zg+OioQ4l|YN-UT`cMB758-?8zR`;&Z8}&WXUiMP9cA;#msdrD7So)w|nrmVeTcJ+j zJ4-%lik2^~^Cd=*orU)2`w41t{p=Jse^?t*z3fiBl2geFsBRHtnmtW3u4q}7LU#nE zR4y<~^?J4Z$~MP9Wv@5`e%ULQe>E3s=EK{lJnP{RcdBWhu5_lxgEqO+87YYTyUA2G zh`~~t`s&59X8L8!wzJ6H!xSUGz=9;fMjWn{&X{P5sWRrzXNhsKNDXj)iX1FHT8kpL zF_~`}8-jch(jg+_4%u&fY_9qN5Eq9G+nSEJLY)R@vlfW-oAtVUaLEj2^vf!2IT3e zP^c>5c-{&{h3`f>xZtWC<>!}Qim@WrqxwS>L=w_%P_@!%;9!Y`PFg{l%C6$H_B28Z zUAr^r9_=E8qSu_@h7QxMY}-27pO~Nm{MFBvK9-sZ??PTuGU!v<9W6)ZBFG?e3ltS7 zM(DVj=&u@1N}~2IC$RH>)tKAo!?{4O-mJGw8F}J7{%jPZqH!T~j~E`gT!+_^_t!BM zpFGIrv|`6k>RQf?C56=a*74TSs9dXEt&LI21}VN8Rl4R`&8~_RtrpOwK3c3?YE~BN z^ih$whNiWpjPnl~6*!1Ow;di&4wxiotG3XvgAK_e;<`2t(WHdJf+E~;EYmeDto2mn zP;7LID;qJv;uy3mKXi<=6$%UAkukXWI%KCfs;kL0z)dSqE7V4kmQGDsjRuVx#FZ0^ zap-7hN@cjJ14Zl8hnGVr83^3Q2?ud)I0R~kWVIAibV7=9;m|1-coumOAp+5gMnW8mH3IpEpgvEbq0p5Tk<1zrU9ft%0+oB$Vt zPooR?6wsM~Rd6ZT2|kTJ;N9R5_*--VuLCauzYU}#xC{6-^a5S*SLg)Z1#Se-2D3nV zf)}GBcsx*Cz%Qa3crwr(00+U>&<%VVoB%y=KKQ_GTCG0?&jR-W_XeLuKcHBEx(l!e zE(D)|9#%)KcQ`$u(g(bJ?{< z3cO)H$Z|iAenYkA3aK#lQx7tt|+?44Gjh%g{gUNSR^>r~yS zxc0ssWpV57v6NwthHJQ(tF9*3kbbpMD^>A0vJ#fcm9E_mm-UeM(;M=>%24O3oR-KE zJ{+h(ZH?8(BEt8A!MhjD*`0!bnKS-~LFgNl)lS6?s~QKtf<}eNNNHS`gEypc^S2f1 zuWw>3*+r*vy)hq1H}NJW@~)qlsJye+R$~{ozAvX8R%$O5hO!haH8G{pMNJFFERw0% z(&V)&Km-Q|(5kUH7S zG&(xrXLq_c&5Km`tfYJnq}(^O_d}1}egu{~TGL>p2BMDXE3#;vl#?%qi%3`y_n-S}VlDwp0>v}T{RpL4plBycP`~NHc z{{hLk$p4plIrm0n{wIMaf;RXh^1b{3X26}n?;_*>8h8OX4ekiOi>&`0pf~_816#m- zK^y2kfOmjrfpKsdP`tl;0^JR86Hr`$CxRaMCuIK>a1S6~fj>b9@CvXEya&1eu|Tl^ zroaQi`QRSlv*-a{26Q*zL&492PoN8UGk6*}4Q>nm99_Wgg2#jVgSVpxcm;SOxIOp= zI)K-KXMmppZ-YLf(N&{$0qThA$L_f>vk5%NNK>D)J4ExqiT*rn3(H5~(R?`gcE7=# zs#4FvpJ(9&PQ3E+VmZZKmb9F!pWUo~+Eza?ss=kO{^^)$-*AlLq~*-1cIjRpT6fO+ zGV&9IYHfG;AC5|#LF~A0My^{+|bKJ44Mr_8R ziXDCB)FOUf?pljhAc?-+W!US^j_O)Ra8#=!PGM-i%6n``r7B0;m9$!c(&2@%QB9Dq zoK_>Qot4cnarMeppIQ%$jUMagXzZPXz~orG>R7M!J)=lhud1l<$$a+0PiH|38{}2* zmKs5_sW57?RPzbViX3ty-OA>u4Q@P^H($w}sH|5?>B~)X%7$Z^dUO-eOrSEYw=MNB z!(!w#Y240XP~tp?^K9ZG>|BKuq4?P>aCs1pMe&a^edygfy}}dv9d)K@Rn7WJMz$Vv z+;xfC&-8I0^%m-FP3uR@XRqc!I0SE70diKT)+P+k$5Bq`HXv76VpD7+M@v_?o%!sS z@>VEH--O9>nglWDW%YNQE?RRYDH^3|wr6?a<#o_Qwz_c5d{_TS9UN*zM;B#Ey|UG2 zeeLIM^`%s~jOd_@9;vVD?40#l*;`^`uV6C2PE_a<>S1(ST zxVh3@bhKQ%)T?gp1%Iwq7z*LLN0M2_*?=40ed6?5u0JL2o7Smv0LauE-;+Wl{!u+_ zS-z*z>n+)#w;Sw?l=ERN)meT#ROx9z>CIUpSFbn-ruV<%Ryw0># zrTi1A(8Wo6fD^Yuq~J2b-O9S9YS}0p{vdKipnTUox4{0m;EkYeD=#iv|BW!on0C5I zw}I77T$y{$Q8461F~nxi^1oK>P~};173zIMW1aKS;-|H5nQO+5b+jpSQ!OeS3$Ym6 zCDm1z`k>OUNEQ}@g*AK{7o90s&h2B0ZVgv+M2uFd@(>zl^|@}yB|T&Eo^S_O&5IFA zMizcg=ufVjNB?DK(CiSBGco$~&C9^p{@EL}SEM{F+p;9TAyWlz6eU6{$0qs|AQNXj z*Bal}5|P4(l|n(rBc*#H5Mxi`mNaveQF~Z!4V72f<&R`n{l%v`qTm;pbx^(RYh{x4 zi5Q7_?a{pz0&2SM%pRAcA|HOLn?}KLR|9UqaVu7sirwRbQ_G!=*V+012O{%7&&&Tm z@8$o$Lgs%5cqHh8?;-Pl2>da4FHp??XMls?KH%-h|Gx|_2X_Z|1D`~8=fQP|F z;6iW?_yN!YZ$tin4R|(qEI0)Y0Qm>p3w#@0z}JCd2hIYWBX}+PfY*S>fj+n=_z`d; zx&g%uybpLYx`1B-KM%fy4&c|pqrm0hG9X`p=Yi(}`3zhMegymb5=?S5%`YJbb90rPDMG%MY| zmb=(FPB0@=2?W`)iv(g)G@ispu54RXvgz+Ln^MWvMMMa~C9=y`pnd1`%$P5E%jxRK zIJ4!ncFZ$l+a+h%=G@T_oAjq$_)zW2Q2TN*tEMVCoB`n~pu4>_E;3#2%^_E{uyP*g zM$Myf1O}qVQB!s8_oQiel0(&g>#^(bV>3&nV{gP$_<~Dgb(N&%_2u79&j_vq)0E!a zS%&VQR=XT+I5g-A)D2Ka-Qfqc-AID|u>0yFxr)a@uV$`0X^-}Ax>Ddjb`vPhU=#1ccMfh4KheMlT%W~8r1W&VK)HPwqtx2er;6iTq|Esfv&E&bcQ zVTs}hvcI}33*H~yyz9?DZ{wQUCgsO`ugzb}56?*RZXag@Uf_Z#u?bn>-D$B8G6=e4 zR_L||rRl58!zX)>N@E+ZHoo3oO#!HqR){XAu}t>Qj<<&VH%QZK{=ddTU3PHWgXGu= z^R$94*f?J|T9lo1lGOGj+}gf@;+U|EaWu1i*}?PAldk9J!ooCM$HMASZVd?~gzpXQ zD|YU>a(m`&w#>CGI<=3onp?B2EbmY5$wiq61RNr91JhE-Po6-dZ6hN0+~N#h6lJ~Q zZ@7&Y*Axx~b_%{uZAaWrURWsjha^kLo zS9dn9FI$@B*}193)w1%s4t9ylYsGB)a!{lrHQ%%p z+4=c;?(!btB1{zx^B(K1Y2&u4P|+bfY_`#6U1Eod88+r%oCIf2*M_w(rLo1+>S1fF z&(yyePEyjSo{jlTupSZ&)J85&83d8(PR%VW=lf&6UEw~@rRWLaNm=?H(?ui ze>&VW&wPYR>bV8te@DuZ=iy_kD+;}5-C*nzOhasvtAwfr?c!@zO|QD0TUc1kc5!<& zqf04z?&!F7$~olJK;V>1n~x=TZl~rlr64-HXgJc( zaWe0|cAhfUQMqs6!ZmG?&K(PH0J|zW9pTgQL+`^fUDiu)XLU=j?9eF+cf7u(MENHv zn_`+y{MVYwCgl`ULxzK)oRmkoUWKA-5-?Xz^feYZ8DbVUU*Fu(MmL6`y2V=Mh3)0l zMXoV~QLor2R7fDc>!-LhH{Zrb;-;EQjn_obv>@_gYQC?PIQ5ibL&~}AtvX$@qN*mL zGCVz}G3q5c`(~cNRtxHl*RL29*pa8r8f^SQFoeo_ApieM5kVj8 z<^RjQIQn{I{;R<_xIg$fGX70q5{!X+fG;ECe+fJXJPPO@fG;A`zY$yx?gGAm4F6W} zTp-#025?{SugLMg1*X8aklo(`PJ^q!Mj+q+JA;2hmcI$S0VqzuBDfTM7rFjB;2q!% z;7Q;rupji0?;i-hf;|5c@Eo8s0hfTUBg=mlycApyWc&YfWcW#-Sbm>Fe*aCNxc?L2 zPT)tui=oepfar8ewEDh3>KfZB+^Dq0Q^PO2-lC;6`gpb)PKwLJw1~pL;Ck1Cy@NR= zCV3Zz0xm>CWM?Bs?}nw^R!ixYW;yeuiIje0JZja{&-WKArCq?TjZG1;!Uog?XOxPA`Vqtvzvq0mBp5o5k1weFOk!+@+SAYl%B9{hQm+?p>h1a(Juabb^#m+2TbpyQG;zbZSGM~&8Q{xm zL0e6tE$PJy>L?(Ur~elQ_#t(~=HKunxEf@vlIq`(QtfP}KzB(u zYcVpTbVkxtRjQduU%JfHw||p6@17NH0|j1^wFOH^DMtj2m*S**>^vog>NN@<_bMm$%`g1eNoRiQl>$HGwXkTjf zCoP#_%F&K>;mt{nnNSk3N!$w6-H!v0MAe@2@!EWmX8*-oM(5b4>#K0#k`YRxQb964#}*7bGVi*q&v%>h!j4 zKc+~kY#dyj5vrf6gH%=9VbVxV6A))4vW^*K4J|@P*`Ug<&C%lS-m=ZMAKPiy+4S7v z>aMr#B#MM;>U)w~;*ZC2!xKrGJ*t%H%Tnt?B7M8TLDXFtUKMG#WB;#D?Q%N#*SJN7 zVEo>n%#DwwY*kbNs)=Q(+AyY$4iqvZ*OsW);n+i{-;I?Fd{L}c?x<5L&@abJ9ZBLY zmW>Hu*`l4ExC?LgS;QMJjuq;4+UM2i4WO>w(0 zh0|xcxXAjZZ-)a|=+T_9-Y&HdU3EF!OMg1u{FV*4SgWkT+Jev*GSYNLW)|NTb}H#r z1Nr3<&0^{cbb#}g|DgJw2=qJj!rsID1)B; z>lmzGu0lRAZ)!r~;5yd~4)N(-TUE?k88wz(3p@1^)9UDe5X$N%lw7v&+rF+erdRS= zG2JHz;uu_B7SkX#R*J1Kb5r$$Qk=YFfqu2n_~01Yl1{VOWKB9SVuY3F1tcPn)-UWP zzGcd>Fybl$EO?C=D^N3NdVA8@n@Me)dWB=%F5GN~24w!{fos8L z@B!rgcY;3nHgf(;!Arnnz#@>n|0luMkon&QZUm18KLP#*d0%J$-w0&qp98Y*KLq?F zcqcl5)8L-q9^jvm``-X`7hnr~4|)IH;Fo}60c-~U2RZ+_;0XAy;J<)3(U5)M?~wCf z0iF%+2i}N`uULT(2KNT{0`EuGp9QxAFGs$A8F&n6f$u?!cY}9|VNaeAg9jC);>- zZ|=Noe9xiY?w#X1_Z`@|d(WYU*HwXRxVpK@CDXGvcqMLB%=MTj=(Punc#00Db9TdC zDwLe@rL-i5ma@xa<;DDxMzIxAhHtUWy~G!WMY-y?Y@rlVGGtWOf?7@Mb&4gpwbV`p z!A5q67U9#KO&M9E1Ci~gS{X@QIKK?ZaT2J$T5mu#MW9ho+sePOYsF?dj_ON8MhBV} zj;XDdrx-)u=-V}8Y5pkZ`NjXZyB=G4lKkIG1N4!I^s|$*bF(X_@KWec_u|{GvFY!6 zTa_1s$hBs6`UbmB0lVx_9@wY!mq<9vZne&hO6R=RcCO4e5A8u%G&)XEFdzbm|Yc${@s;_l0qC0QxF6z5uH7c+tL zY0R+3UKD2gG& z)bquXsQ+K%rL*CcI;nvwS{0l2yfF>REk+5vh_c+$V0EmOZGai4EL!VhuMF7$?c{aX z09|tTe^GguM51wk#$8!~c1ucalq7GK&dQ z$7#Uk7fxn|@em2nNe#V0e~H!3I~~OQvN>a3>s`}2kV{OVk}Xx*9h>&r?`MDIW(i4c zC}rYS$Ik4xx)A2yL^dM#F!xM+D`Y2img2Z22auAgX+0<>s72+q`B7~m8HT}8S*Kmf z(IQ$}mSa6Cn_-rzJPdTtj0L>W0QO3vUiCrFl0{y(*p8e_L)Lp%*&z#D)Kq2Zy%Nk` zb8MXVlE~)XSavb2u<$w z_?KLTdOch^35Mw0WJn=8Wn@ly#F=MLvu*Df7RRL+2XwiU4x?zDmA_4A>}nJkSUQ3y zNBd`%(Gg|E#3yNldO0yH%TR48nG;cKuo}DGP|o6nY$nFAD)2pChj06jGb>ObO=o$I zcBR;n{a-%+zahC6`Tt5U-To0W|Mfs;|Nj)3|II-D{#(F}$ocOCH-MAi5ZD0D0gC%~ z3Oo?}D0m6-{nNl+@T1_4;HAj;4+cL3ZU;X4!{`XW?}6V1y5H|!;9JP~9|P|P?*rrD z0`Nfa&yx4S)4(r-Huy5K|9=Pn4>%4U4SoiE938;#gU5rbLAdYlj{I9b{~tjg@MLf~ z_;K)IbOIj&&jZf|2f(+`3%mgQJh(5oJ@_FYy8U|833%e&5?@-~!-u6xOJ3u?LAPh4 zC`}y;Y=an-Jsh^yW-DGOQQysW4?StDh^{;vA{AO_FZYs-Q5%og)U@x())gN4k)HMn z6&60Q2r`SY0u{SA%O84EMa1Gb{`6+c+BATe7-hIo4AZEBk1I|q^oD(>E&haewT`wv7?3B2y3d(NZp3~4idxGN#dl^a!CdC(xYm?oYUMsCm z4doapk~z)P1#~5J23s6h9*u=pn6*@` z{F7zHwq=2&&6-L!#1JlqcFeOrE|p_5t@Okzvh(->#(9JjEcFKJEq(GeQ0FUlc+E!h z8*Y4irF-p2M9Xr_{#TjMTCqZnaxP}OM4H7@OS4Cht%N8VN8rTqtGs$X#)994Lu`L( zc6nj`x@PjI;243Bds_#&e{EJP+x=t03Ih{=&um)Z-O!vARk9X@GYffUi%r7<6(%xW zkEGKInTPKU?FoLK5PkYnD=wrBQ&&s*B2p8+2`tekeF!kjJaeo5PA&}|1#Mg*RDl_6 zV6)(JPVlNN`)CbiDX!D5d006KLzy-Yg{b!RJ`=Tus6jE&(2162Zb-tMu1sdHw6X>U zq2erE6H;p&+4BXpR&(f2swj`fe>lOkmV5*Ey&yTLhup~lt-@L_GvD!sm$YWMmSMX) ziTus6;05kdQ8Vpquv^SyL+QJyH<-qj@2;!UwU#NuB_uCC?h3z{fDK@<=x2s82*YMF zdR}y~N6oXysTll7!|&O~Vto{;Tw5M8R^Km6_|&OK@jhSf=|EKTfXmj&N_sk@@=L2| zrT?RTkTaw@z8z~PC$4k9PohAZmmD4lSwJ6L<0Ts$*U>TD2-VoK?(y(m$UFb8q@~_T zxXII<=r0nX7Nv~M$SQeo2Eq*8P40Vjda)8!%VLg4N&l?Z)4X!}w|LN^%Yql0^f*%W zgbQWugn&s@9Em~4)T-vBzq6io5_i`fFUYh!;We_ZstX#H&}!wvqTu#Co9mrwHVwR9 ztXsp7J?)gTwzx?dX{_=v?QEH(5|BoTBPI>wN3NO3L z|9=wP9{esc|3kq4M81D9m;>j5djk3V{|$2f3xVzcTmc9ccfc`l4!AG)0q`kg{g;EIU?ccjWc*(NPXGtN zzai7h_Wvv}1ug^+0=EM%M85BW&EU@951!*B9%BD5DIT6Eh+L8 z9MbFpJCZ5Cy^v#T2rHT?d{i6rgsg~ks*D1(pgG@;_{CR~8LYl;D?hg>HcuASt%nTG z5%1;Z6yCsX!?K>jh*Z5L!7a9)@h+o3iOQ}%6%rTHgB$+Ed3!Y^uPj?#W7(F5Qe?>N z6m`!#)VH`!FLSYs%0Bi?44yX>-kR}Yvwl#o%*+zaJR3eWw}}R;u~`u;^p72^%~jU+ zXyc|wwnPJc%=C)NpWD6W7fc1-(c_jqc|*}H1_ExmQ$vRiguyJ2)0zEx*+ z8TN9qMT;Kcvz1p$|87`(^>dcD2uu}&Lw?~JAj@Z# zH2Dljqfd2MBps!9zxZ^V7&jK`Xo# zzBA2H)JepDm~!G~>ZV=UjbkThVi_y3sQ8aiW~f-L#{c;$9D{4k8Cy^q z=1Z2>`sYfmMqwmGjn4mlHK!4kM6Jfah)gvni}qy{xAS<|gz!&bAh0&NrbR ztzuXPb!t>L{$X6<+3UvxnZq~w9N0?2+tiJF-0ILp-zaTlad64qF+-Iw1sz^lSlnx8 z737W83b}@F-DolGFC2>NMPUkb`}^`1+o}nw;`9?7y_PZ?oi)lUyJ)B(Yx7e&)?ZwN z?9x1`A?KK}@*FExN-vIl$&7WPB1ugK0$o-1xw+-AC z{1dYOi@+A}Z3n@n_*YgJ0=xHo>2dm@E!B5$lvkz z1~Zi2r63LbthP0&wz*(?BS>vAQa3SSQYdkm>?<19daKu_X8j#iJpVl40msn`93@QJ zI%^dz2%FmB-4Mp1t_#Ua(!YK5n;mm@6PoGw%^Km&!71-n_09b6*fDO%4Nd!>hCcsM zgb|Sqq9%s5nW z4P5^BrB4l`(2L`@4mU_FsJZ@>EOyogXGC@!ElvZ9kWDod&B;uBxF8Ve0OKF=!7bE6 z`@n(yidZ1Su|+3y1o2MJ452AvPIGX(pl-NUs${1b5Aa<}ocvMl@df9qFYmZC>XZS) zkdrXnMya?L%d1lacU_)YRY<#77t3Ewlkc040;EBcx-y@RVg5RDmzCJi-pOI(+4;?j z5Q`(biVNnPDvEn-!Y$}agBjU!@wf7x8m=7IvgDw*p}kAKcgDizDPV(r(7=goX|x(m z=!86{mm$>B>@=4Qsf4r&sj-TM)J;vy=l89s0oG)YORT=bCj~oolWyDsE?u|1%^ixE z{gEB|bfT2_{_KRfaGQG0+OBS!ou8h)es)?nP^#;6$cgcIJ=SG8DJNR`C>|BTv&wwM z>(uv{k27baaZsPdTodfpDZ@%Q*`3pU0xMiE$2r&g8P#2&6YAwXr)h)SEOGkaRQcrx zuZ9OCbY#{i5K7)lgpqX(ojjFXk+m$gHIUtCk$|SH#8oT{d1b@FE7+^e+-{*5Ew0G0 zK1c(^hS)GA)HOTz_1wYNo%{A}OfQ8lu5V3x(jd*7;m7}eM1PpU&IMe^0*-`y^|fsOS-D=-uR1a7hW^Smt2%qJI;o?I^4YYZ%}j4Rhr#>^Mkt(fpn2t} zgj!T9y{LSp;PTm7l_~rGCIs1sNS;Ohzr+i*Z%6ij40tH`1oHkLfL{SW1LXVva_|(O zv;WtCtHBuf4zm7Vf?otW1Mqod`wxRBfJ=bx`g;PnJ$OAbeIHy5zK%@)W}p~vZBF68=2pfmq3q5Y!4&(-95Pmm#B>Z|<2%_EU>ZM(QL+7jR~I9i5gh{#v`+n5!O zv14dul{8r$n zq`Z7;e#*8@c?>G{suTTJ+Jx=yvu#XCjGmTz=yIFNvz_>p!}^5_0|~sPgK7PS+z*}_ zm*Kj-d-p*&rpeXWIU-=kP0}L7Y2T(d;L%}3Tqi;8vJf3SbceT#GFX=~hNd1P)aV`c z7){Y{6AxGysJ%A;e4PK+-0Wm`o}JYH8LL7yrl%Jj8CgJ7U>1`ptD%|piy%_F*@_SP zd;hX!w95Ik`Djo%x08ONAf%gB*l-=Ogg|D%pUFo3fd0WR!eW(g=YC%Cy%Q7lUDoILZ-(=@6SyQec|*P@tu^!lx>VWw#1AuF{2@ z)ZGHR8=1-)2iXc)>pL&F-8t-Unmlr;WRQ4YL0pvBl#oZ5A9nxS>Y}mkHlP)NWu--`C{Uzvh-xv6Iu%!<3m%sR+fOqjbgAP8 znHK8O#QanUo5$xa*!A>AUiIw+P=CzofGA3yRwy0Y)e|>6en^HMENyXvw^`ZJq(x{>S@F8?Oh>kSj4CE)LW;RMaFmhXb~J5d4ymS$7dIVjI2?)G{7&%FYgv!Lc1>NomR{+ z`$w-ot)t?>^wp?@%2>wwaz>k~TBIHA%4Kz-JoAIr=E`kp#OKo4wH?wVxiEzBM<1cq zNA;Coz_!M^SA45GLvv~hkh*{GRh6^&!)tj~A#?Mkt#f2sC4TBEeE3Bbjyn`gv^3+S z3mydb^+Er7b)jyYaFV4MY}6-nl2Np<%rBGKK8*m_j&Qj<#F*q8g7NMyA0Ktatcn`jgXR=1Cd{%#lKIWt zG=Uy=S-LI4=uP+A==oq#U01ByDR@)Hru~6Q-5qkxt4ff6tSujg$jH%jVT2yp}-e4mUZcYw#^#siro7%Ru;Zep&t2W_N&I(3T^8(IJ z@8dCcX3jaJMd`Gy0XoQYbdDRw-OkY2ct)dqY`S|+!^*j?w&{Z8VCJA@+s*!eC9>_W zdHH{zmvdi)-2YfG0e%3y6uJMI;89>7_y)56*TH+id%*93d2j+;1a1$02>cE@fLDQ4 zumju=d=@>xyTEgR;sN%-x!|qn1fC9Nzz*;c^a7{Aqres5LEt^;1s)FmH+q2I2M+_+ zf-fWczYE+5mcadh&iwx#xE9<6d=eeNtH6ceUy=X60d4}X2EPv;4fcZDfDfP((7k@o z08a;3gK=;%I2YUzd;*%i4TyFx5dChsj|R|a8qD#!SmPsg(^usPtSzxabh=KRYr(pDb|!UW$&LrwWXp6or8RH==yjqD zQr{V+1&xoL`ZK$NGs`n3TCPlgreckVtxzd_Z)jibDiXzN1sQi!BS7LF=nYeS$P3qTH-H zSrkTH&Yhlc-)_t3>i;M5rl&pLD?AF^$cEU-0FhFYSB29mCP1J>lPy%G2g{|E!9@V3 z6A9K&sX0zA&90!BX=B;S3+-aKbkq)6BlA zB0{~0Ci$R#8AkoRl5V3Xs?%F_>X}s76GlG-#eis1nosVHgLcD0e#MDAwdimyx!JV~ zlKq``1v#Whsz<2c*D{1bwIcQ`#L4_aTBV6HCPOn%EL%Ply!gUpJEy+DHYHq7ue;`*b*BerJ5n$BJsE9!3tY`)Jy%wg*aa5E28$mHUr=&8 zj}+a#fmxzlIRZiT9l4FJMutC>*UD3be|zuKaiWr$>uFhukV@Z8dCG*T_%9rb-ywBt zIoiY9H5V#9P^)5=nl>4i5`V9_e?rM+_*KjAR zzmOVRC`nt&K75QeLl|6dB`-MK@SbU>ygf+wMYqC(yc(n2WYQbqO`jWt@VO9Z=nG*Sjw9L)ND*Fr_!=mE)v&Rm=)<*$%UBg_@O_hF{0DMf9&mcP92EPPZ|BwH2YQDee$}ptEXZQ0xq4`_imA@@&%hk}K1_+u~M{|Mi& zMV>zZwt}AqHzL;k?sEsyb?SP%mexR9|9MG?;+>^AMg?II`Bj=4)%cW zBJ+O-ybt^_m;kp2HzEJO0-O*25n2CLU>^J%GX1N-E5RAi20H(*`~N-)ef|c#20Rsr zUJnPuKcQd4cQ$Y03GZ;0yA=|J@212_0#_oUb_BWWq<3M z+fiEbMvKSLEW-8@HYban?>OHqcz&ry-`c1-FDa~cV6!Z9%L#HB@o9n|=VmeS^* zDwQf^Tf@dsrCyoaD9Sb37aCu`b*WZZaohzuMmZS+oQF*-Cs*LsTo*QJ?DNzLexX{SzaZ&Hx?II%9g64Po0IUyz1Ym@Nktg1odX#n%vv1#JM$dZEJ9Bw*t53I>!C~ z*7P*IHN6ZQt^49n&4RD%;)8<0te0o$VaP?Jw~}`u7U^5fvv8f3$tWl{`4N>;{@G5| za*M1j)7tP<`uFK%sF9l_rl1OS>z;GDS%+ROEo{~Or@msHZDCb)(D1SX!PeFQwcLDF z*eyFCXDppoUhdACT39?aVnYo|YE2FIc9oSv?b}D$IUVCgn*#zldkOnJh|3o}_K(qpV>)w}W4XJsf zrzLLM6BDi%tVRT~h{3H#N|r_bKi~!04Z@?cold8cqsTLa{sG=d;mJ&?%-|6{*MQ{!JWaKz(>&mycG1n zPk~Q(eZYY4hkzFN9=d?%fg|9H=m5R|J^&s8Wc%L){t4OtBVZQX8~hpa{s+LbfzAR* z-hU;~J%DrIIMDh3p9XKB!H)+E;I`m{$o_u;UJh1)&IcR>iu->F*b2Uf4&Wy6m*97S z?hBj+$AJ6+9su3~4ULYThM_~x>$cn1%=X)T1`nnq9X_0!laG7oDt{j~!@@a2(xuK_ zG6c?>n1DI7Yh}$HDUFqFWPPm!k>PKc%d8E1H)LU|{n0h=zR%F`ky|O=OppEc9EVwL zz`1%TQbZh|%mPd^il)1AaslUkF3piT&W}?i=1NLLvqzT=K;j0sP z04y3Dm!mG)3tUq(p`s+r5XayLB$l^+D90|6X8ql~Eu~UX?mn>K8X<$qKD*1<-D@o4 zN_NUi8-sgY9>Ts6okS$>?wY1riWQs^X@F!qOxkswV%5SlB>p+? zIjex|p#XhD`qDgw0|#q(>fJEVZFOH{G%-1Xxy%#2q1%UO|DBkA>CRnO4w2JuIKgdH zOUEr#dqvz*ZF9*$nra;g6ggJsmlUCSnsA0o?4~|QC?}t}C&Tp0a>v%KVhUQ4))-}} zkIA|18|HM-M~sYOYOot*f@j-lNWc!bR1s-6|9KemBzdN$ZyMNl{ye{WBXZJAoa z8IWTg8fGWp33Tc>aT2^p^w+6U=~U;5MVxHbR9Bryr%GeXCRDp-?Zv2)9gB$>oAQGi zg>lI-m0kPFX)Mo~jT&oF*KJFktoFRb0;vW&9bkMjCyb{=w*&dxvC%XB=-2u`|K?Qr5z<#14< zZZ;hiP=9V^b`fV^99OcM+6nKCgoy7X&b2`WSm`B4lri%~8WU+M&s%}y;oHInx(gf> z(85|=u6!Y7B_}J10!}-ptkbh+IL2a47VTye?UFB$a}w#TtP-~&-!}Bt#*OuG8zyCI ziy8eQfk_+AZP?{Nh`^BCmI$~x&$lQ7fGPXzw({92S8H7@R?fsV8lFu_HK1#vVL9oP z17o{c2-i!Bv(IG&r7A#gAxI)=+Inp?RW25zEN{iS`l*Y5cduhLbQ7XpMHT3U#H^~W ze))8-zp1Wv^^?z6v)84bH?*)aoES+NK}WkW)gr01)PnG-FQ7IQY)ZJw;v^Zk4H!Y) zJ;_4GIll^BXQ!cv07%`R4Ln!DC3|yKD&x%M?;4U zp-`lb8f3I<1uBoA#_2QVz_c0;VV%6=MB<(D{5P~O4F`)4GCtRzt7FLOM$)<_M6?KF zK^IGuWvDV0`i4%cMmSeS`I_bhsbYt;g}j_Z4HF|PFm3fd%*Q6P|Fbu?-s$E4M|j!w zd&v7w1`h_?!F|BJ!M`Bu|1_qr1vY_iA^%G!@P6=YApd~-0o@b$ z7w7>V2QCKRMF;R5ARmG2!74ZeE(YH~7ohtB{vNyp{5F^XJ+KR03hoW=1?~dgi@xBg zK(PYffsSAj{0q8)UjWB|?hCjEd=34;SHWk%Tfy_eQ^6y^!@>VXM{pzf1)#eEzl4t9 zMc}u<^-kz2po$2PH?2RF{{<%k>e#ye<qf^&rCV2s|LkrHhIA6mRaJn6b%!^QE5L8z8R75MZ(n**4x#or9|PQEvcGi zTmUUN^RUar4<6dHd+)AsrerH*rfz z+scZw+vK-h2d~bAN58=ElnW+e!66JXKTD`J!7%=#a(PrnC9e-DE`=KpF|V^iV;C)y zBTW%x!Y3+@6N{-9^>vmygn*_9l(I}{wIv42&N;ZV=~GbI&4QFLxvORsm9t|?V$ot< z#Jlt(nfm68X*mY1+$gvScmy|0(ZkGbW;bS*v)^YrgOS8=fq%wbn3h| zAGO-(N~L!5ZzKLolf0}zQV0@ zajzxw2E8f+PCC_Y`v8PmI!QEPMOkSNPAsm#)aah8SjM5j%IeZQ9S+Grs?2r0u~^FigA7aF`PJ6v+iF;fdQmZnE2nM@}pHgA)m!ZK3c zY(k19;zv=)$Az7z{yL6P1{Kl!^{mZ^O!shh)yLpdFcR~ViMKQ3+@l4fJ-`=@+uAjL zOb{ZZ2Me6;P^K>@CL|`N(X?@5qU@{sI$%bpD)kgRb9QD{DmObZj;B8R$%&fkyA_BN zzPH;Q)!Qe!yZvmLFz0H~D(h`ku&h_Aa=W8fod&!8xH#(+7KKcT{Quwf&OOM^v##Su zsxw06F33oQL!z7wWKWyg7D(C3HrcctyGfeeQY3`k&7QN_C7a!4&u)_tQvYxUDA>x3 z9a<=$t(VrZMS*dI0yEs?R+Qo9R4k|{UPh@vhfzBSe!jowdEV!F-uFFcH?>M<^h~}v zXU}<``|Wps+(Xmoc#C?JbWovM2V3s6-O*lIV>fKK+sRp>huUr2b=f0K!oRa--s84` z3vZ8mFuR?h#G(u2TB`W^hfmvUt@N@NlApsu-dAS0iqp(eshmfIF`7qRv8X+!YcTC@ov63Hg@Sz~=UQ+| z9aFhCKI8fS1<#A#39o+uT;=)xZ^F|n{{Jbk6TAp02EeC)?gW(I|HVMM|NG(V-wJL4 z^84Qco&z3)r~f4Q1egYG@O}99FM^MN4};r)c>OW(bns8`@*f8B6}THLfn(r>;0566 z;N$T5H-Sko0bUF)2E*Xl;2wDXo4^e~z5|QkEbvKq{Z9bh7w}H-4sacq0d4R&@GtQH zUjttS9|Io+iYuV|1?5k009*y044woYLKg5J;49!m;8(#ffeV4;1&;+k4jw=*@J;X` za67mSTm^m-JQX~Id_evPibL>v@H+5Ha4~oy(0F}1lO2Rr8Qz`R(awzC$iWu0K7#5! zJ0@l!;5oZnIkLxAI4d z@2A#gdPiApV<9BzP`=KxZKZ9h@LqdqaV6ff?RKTCMU7Bnz2`Zo!C`kP%8W&_tlj2- zdU$O<^}xdj0lF0O4cj+K^Rrb8^$I^kfGN>rmtA@F7`IgSjz-D57Pwc%peQERGm1G` z(ZwD`S}#MOf)p$2Hm$6^6-2D38s=#Ui)5Ox(fVnt1wl;)i0#3#!oi-RbdrUOAizq?ZNOnGk|DSuJT*20>`{<845Ndv11Y_5#h z|M`!;txUSf3JuohfY=9_!gj0Cot`e$1alm?jL@&R<3iNxZtR7%QXf}4vXs-{Hu-A1 z;clp>!)vQeJ!O`UTuAZg9n?o)j6x@jx$`q+ue;bn850LjN#~%hW$Dc?!$Pw1+%Q=e zh4DhI+X&g=*Uy%S)g0S}i8(Cx`Sf(?Q%4Hh0Q&U|SIBu?KSBJ#T}+a-Ls#1XrUXC-$aSX_PF(Kk1-I(!ghS< zkQBWpPb>OHq^es|qD8H1V{^Q%lP5#s_i9ceYl|z{i^Xb%UWrhQ<=5wtRK*gv+@!3M zT(2e@-KkU~uai_Qu3D2%780-6fSNXLAknp(=!))C&*)_}-XNs~xN$d$HY`!{U#w~J zr6#(~su9Pmi&@II!|<@v2l(4!P!M>h_4iN(&MS{Lv5Q?l|anR|h4$|j^8PpRV7da^pHl5ILIBFVl+ z*_u8XJ|`(yE3;_g2_AuGm9BVhgoo1ix&+lHO=Hy0JvhxypDoLcwjmd|zwm%-JgiJc zCwE<5(Qc(VC9SibXJ*bVobaNX*g;59QD=Fhv&wK~TJj=Mbr6|%NqI$n(V{l3nLcBU zwQ#~{LRUF5aQ>OQsS8RCZi#7>(-eDx*S@A&)DU~d{S>2+B(>@@|B77wPUBXvSI&lP z{cnTv75_g5&wrce|Mz?T|2y#g-v*xszY7!#a1VGPcma3@I179PIl$fEH^4iA`~^Jxy+Hl}w}W+XJ~$iP0l$6;_y+v=d%;`5Enp5v zuP;6R+rcz=5qK>40=)Of!N-*&>=-v7F}BOTD+#EmuEXAseZoBU3U@9 z&ZblaWE>u_UJie7(+Os*l0`VVkc)7IbR_N3Qc!SY!7gIST#JE6ig@avFUwR?rNoC$ zOL$?O=b5B-DbNGf>1M+0es+=-rLmJYNJ{dyhd$POb%4`fxz?+Dgy0h+nCGuvGqb9A zRrw&KPwR7QQnD7agJENbt*0-<^v|uMOp<1|*JH@Boi}l;$+)~Us3hO2WbFAylM5^` z`(I;3dAm6#?SL3i@e^LH$CVx~>!ABgBG3(rjjKZETGri>JMme)qIIg6Y;JO#enrbJ z+bBzh)aH8iiSkFEuWszM^&xkI(9e3YF_nr^mVKsdWY}C{S}p)6T&9`&HsarZwv%EO9li)aP00MIk+Q-hjP&(J~Hy2mQ}bvvLwhx&w=Ye{L~qEDFYSJy$ve@s(;tjN!PGT8D(rrqn?_ zE^$E-<~V>ws(0AdOlk%*Oao5Ele@{ z{YH>B+7Eh3mui*h!*Y!2%C?Eg(Y?DzuiUqHEbt3qeOIh_^h4bJ+GrkemX*dE#aSVL zqsKJIgD@|-fD3yNFX62o!VkypW6Uih)PE|H=I7=pyWM7^BhT5vG|jxmnZOhs85&zB zdoJ2U7D=0!6bszo83mdlk7Rp=xx{1i3P(qVe6aC$buwDcF*M|x)ul97n?pgY=xs|X zh9-RPJA%#qwRK{~%vfeOXQ7N9x~tJ?qQ(fNL$pieTU~E5={rFExSMp zlErt_ZVq05MeBsAURTW;2zOUzBAX!HfPn@Z(=M+FXK!S?M1kb=w^@%k4$wc~Jh3Vj z3l6@zJYvQf9EBBI4%o&l#FYo7Zfw8CkD*f6OH9m_&`*xNMp zp1w*Gb>v{Kcii4|e=jXAm>x25=?%G5tat(;SEoHvqH4ulhZMTkGgh^#c5z3m60n)7 zm4{2DXx>Ej{$eGTn2K$t|KAOx`gZZFiU0r2@b>fI2)Gp73qSuZa6PyToDIa=e+{gI zH6Z=}PVijt4Db;A{hx#12JZle!3>xHXM@|}`FDT^;O}MQ{{e6_cnjDL{v96wec+eD zbzm0k1iCL!djG$N=l=${2fQDg06zu(6u$qn;1+N*I3Ii$e*b=OGq@hS0lWf?fERT2i9+wUN2*|kFsEL+v}>?=dha&cDf9;tc?r{T~O>pa@sX zJwBZ>GL{vr$h(bpdY5FMZUU6)k}ji7H-EpYXJvC7TiIgl933nJUWLo#>YvtHb4fBh z<(s~es;7iT@pU78Sf9;DIrN@!{VU7(lA0kKnzXi)mz1s8s{055gV|oYN*pN}5e`M3 z(^g|6d7*ZCcA?oC+$JIw5eGDvf7*0K-7CX<_q?!8tb=O&HdgmKpAzN0F==vuG74EL)0CuKLm zVUl!$W#=sZk&JeTv7y|v9GM0QeV>jHn$cLsgd+?bCy^#72o$GCbp%gI4Up!_wrleD zr2b(+rIy9woi{GVg0Y!SHfxjVbXB6{dD8%!86N;vhq_fg$nM(^pWAEepCu;a#Uuu4w z`Y%HxH;iqO_2Aa#zNIFGK{?77N!y64sm*jPi_lafi~J^K5!Ug((PrnO7^4rA=RS|y zWl1Fy#?A0b2PU`W<8cN)GjT|@S+J^dW<2R-t+@DfA9oqQ8!Tf+Ds4JD(Ez6zCw#EW zA0MV?Q_yQNf>dqOE=sE98TlvKXck*`>}>T%@NJ1rkG)h(7+w0Zf&;28OG^ z8MXz@2G?iZJqjF$6e9wAa+~F$>dDC%BKg=cXD1O0Lk2Oye@J0k=j3hSI}BpqpHp@l zV(b^~D%I&j&kV<#wAesv9$_Xsk%O|(OJW22ko@OwLM3>DvA*8n#NBeLv&#FW#KFu@ z!J1-Sz_8qhCh-FC?cG2&V3qMsGFhF&W|iC^J8t6q(%rGg2w*FI(LJBY*u5Ph8&6Ra z?$}#cN}ZO}KgC3D4vjhXa~V5xM~#HPwSDPOFNqh53YXY-`rl{e=m010m%I)QLwcu2 z*fL#TCns#RyAxw4XsfE(lJBqdVp!i_@enAQx-+s#@mFnr?k|J`P3LSWW38b$M>n%Q z2T|4lGoZ2#jbl`FaFk*i`F+3<$4t&+&dk-4vEVp;)RJ@=<&%|Vq6csq@mj&0{H_w8 z)$VrIiq-F9cWLhE$ej4mEj$Z0;{HJgkqCr0IlN%v_UspJf)|LDwMYP_$2Ty)0=zt#scf*%I3;Y@U_zJiPd<0&6&hz7U(WWj? z+jgpbkL*k1veB!mXz!OoriRA%t`J}a#>lZc@NT*U)F^^Rzog~wdt8JZbXiV4%)HOU z^yL)k->iYeJWaMEG0uqHL97Xc6XguDg-8M7PyqAE=CX=*r@J_He0g;dtv2B?TgeWd zmQ()hFL1UBCubMcaSRRzy7QsT`lnIOrta?n?TN%#`wXmR>Ix};SFz9;?sxrXnv&*< zVYJF*SARAyfxZpD#NK5+4H zI^TLKU6;)TtxZYp;?8Z1!8J1+(^&f)tF-@<)6t#Qk zY80=JsC8M`NlZ}!AxWq;anJ57j~Us7tdb?Fd994mHHnMTiW_dJNHt)j9^1+^mFwp< z=2_RY%g8w%3Ao9yDh|N%@}dNUm+m_d=5=pi%*-eAyfdaXk?t@bEGS{Fz)^AT z?qlu1)eh9YPjfIZ!4*;hgxC5+?8@r5Ww67ZJv-F8()VQF%KBe3Jjxi_?nuX?!`hKvI%u1F z+!$*MQGr!5aBp3($Qf4xqTA*5MfNFe$V_icqwwu5nPY`^u$|8w?z!NAS=ENC)y?mh z*`|Pb-Qf$Wb6a^=+d87ER0}F)+6Wa(-|YZo@lD~`ygu9Qo5|)bzYIB7s_3i2_L1z3 zZkhGFv({gQSw8gZjos73H|#GD^W3aK;ul$5_vy&Sc-QN{s_xyy^5GHxJY+*+2 zyMf`VWOY*(#D{LwdtOfZi$1q@S5zpXiwe^TQ~%ZI^GcfQO15nEP*%Fp&ReUXm=bXT zh3c#!3gZp&BMV%d3MJ;uVsa^VCFjCe1Ch7se2kD&KnEk`LLr@nYtc9;{t@w1jqdgY z-sJKCn@Q`E8q*6e7r`Wm05vjp@Ev68$eJ!3+=eaH6c||)HD87{6-ho%AdixX3!#@4 z#Al1~|33|1e3AHL`2Ux99{G3R{m%!A^?y6K20RaJ1NX!8|0TE`EP`{vx8eK002aVJ zkZu1n!TXQ{EP#u^Wd52AAMit$&~&rd+AR1@cNOVp}8#w@E~iR(k@)(=uS8H?Hg}(+FNQ4iI5?u zawLY(>zOL0>;MA_kt zEPcTSM8wWCalg-lca$`hDuH3tJ*7K7y%eV2XgaNQ zJbBp@g7mbXdNfU#ey%w~?5jrlP`}T$-f;fQ4Kvuhm@CwAx3PQ{f^d+}pc|LXPn1Ee zXrnzY1x@JJ8JllUnu9Mqctu{j`D!bdN2W~I$TKUMO(s#QUn`H))!F&>B&uv_eHv*S z2YuEgzE<-$*)4pZY(A4Xg2bU5R9g7WqIxIwuNEGi+|QsfA$wmR7a0E9P1PtQ{cnw^ zFYpW|Z7BAU98O}^_ZiVP%F<(Ix)I9V=sk|?CeMewA$8+A?)03S999dOAy*b7@Pfq{ z;vCsxch;a)q?X*q3bUE;w-nvuw6ote|35Uyd1rX5Lw*`lhjB?v=KSfnmz zfU|dIZ^`H;%_e+SvW2C@-68ytzB7tdq3+=E+A2{jR-G766Mh7XbJLA8v82-8uw1Tuj%Ud!2LR9XMD92>+z{u?D<{6?C% z?sGCL;znU#mPspy_<8U(WQUk}~I!Wn@sXvYO(KE%fP_h>CuZT64%< zkRW z1>ki-OSZ*ARG1G6m8&v!Hb(pR3gHn?$vR^_-=22!%7vp{bYmUDV@AT1XY%Dsvfn~YMI<4_SC$4&8U&uA|AUb6Ny zR8)8s1JxWV2ANDzAbHc{;sL_GBVRQ|r}8eIZjP~?#Cn$O`_{RyC#Xonn6#Uh%bgjm zrkmk2Ssw+OJ7#&Qu7xmPY?x3f=uaA<-HhF!CeAR_!@AOrutDofM$o5W8<{)1QJJoC z%PRuVqA5l5+)&exGG43>f!X<$mx<9Cl0o~)9&U-mHDP<~phGCrZ6Ff6#8~x17E@TN zkqoXy# zlqxDeh!CurJX%d)7tCYFqF*304W@yKZ-l$!Alm;0dvecrjrjM{#xCwbmgn98YmPp;q9uio0saEWDQjyo9gQ_o#GmvWHsRoWZpDT+92- zo?8h+a_)dkxym!GNc!3K4YmqdF{C@jslF+;Ds;J*ztlEC9b2PTiRWnFtW~j1$pbt$ z?&%>>ZmV7K^v(f-+a@|j(xJQ=Sk}}`=T(RUycW2&F^Q>o->hiw99Kj|8!40HVMPe- zS99y3TB(<5uoA_hSD50CWNIzy(|r5{EA;u&+aIa z9@P2Jd80dT2(vD-c_Cb%tKD$}<-JOih)}UYj($ZH9u`2->NKz9mfB|!ZzM0o)!L|Xhrst z{r@k55%4qM9Pl^r|965rz(qhd0AB&01+N7w;77q(;LFGWZUr}h7I-dr3iuK5LF56i z1G~V}z=OyH-UUto*#o=>8Nfd9ukieD2l5SgA!q`{^M43l|M$S1Ua|0VE6a1Xc(TnmnZ%fL&(+2C>Dd&mMl2zp=@ z%z)>CXMqRl$S;F?z^{Yr!5hHK!4K%)?}G=x-QX^uI04r0*5m0E5#`xFz{Y>dW-7~0qU=7 zPw1wU>0sB~j4AAX5$%BNdsbA>l_g$EbxG@nC6j&SZZI*%Diu)VC&TgX!jbv4-jpo= zN76rX+C!IWC`7-z9H;0x^`KdfCVogUp+Avhw_%`}RW&a7vN!^^PhJqlU=iAeNGWsc zON2@(=a9rnsUO9(Ni`R6Z7ypy)9%dSwjS8g#QAV@=}U}!HWvm@1vkpcu9LQBr!%AW z#{aS3pBF!>)^>*t2Xm{ta>^ndoE|pcJ}<@Z_`FPEGQL9aMiYGWrm!QM;zn-CW{4JPvags& zpa2&alR9eb+|iLrkA_;#;JGNu>w+*D(}C(Q@SL8Kn8KIWn+|uyD0rL+KRO8k#@duf zAo%UXD*5@}^DrnbwisHm=z%1%T zoVHFZ&cGWqLe2EaTbxOlDyhq2Briv1grUHgpZm3X zO+=`PNKPVmQ6&W2m_@!OshBx5LHrPbyGUz-NS9w6nAmP=Jy^PBE-zu8n_QFKoO|t@ zyekwF_vdU9x$U?)e2J~yQg@$R=*pU0t{jcfiQ&CJ3u|*F5fB}lSv^X4QZ*eeMs(Gf z{%<>l;qK8%g1(LJ9y<^hPAEg~TXE9W(_TFlALqPUaK1P!9qz2c;(eVU+Dqn84w0!s z;{ljwnOx+^pjZf$ZXhz1kp*QyIBc7t%O1mN9g<7ZBD?r38n;H8Z}`m3EGHDOY(Y0b zAs$?`l&(iKEMf+I{ak$^@w8bf|0t0QkMt{|sPDwu@`?p#vk+RAiyh^5$AoK~b^=Om zH@W#|c~!1sizh8ci=y99r9NfUkCT%&60*zuRrsGQnCJxw;qJC*UJmgJQDT>>9FN3J z=p;53FD{sp@>?r@l6{fu^2YhIJ$YdCioI2?XlwJJn+WIH>^U@*%m-Urv{Fj|@}Qt+ zvc`F{$gIz*#n_aWIFQj!<&%mRP(Mw}lb)s*6TLEZt6BVzdLwhIN}ZOnK8thG?8)gD z^}`x+?H+0;COWqUCQ6<}rYygY+MgusPOM<7ENp$5^_j))%ulK$4~o;YbprCDac2-R zrL;DqqfK%=YpIKVb}=CB8k%U%ogO|~1KH#sN~S(48r_zwYp84{b9&eUDb|;W8AUCQ zn_G#VLkTAO*dFuRA*Wr59fA`G(w5nioT%CmSi#pW%-9kR#D9C-cF^hO=>98WNs|`d z(%rCQr-S@Pw5RP9r35`ZA&$Y~hefRhe2Fb6+GBEvyf5*bL^g&$tJ}V`5(q_Ej1m2I zd3RB9sgVBbVjl|A^rM&MPi$%uFla5h`)I3#CRHy4iB1GZdSV@^?;x2La3Pom$ LHvgbXZ8iQ2hUwlB From 28a36e00ac6011fa4cdb8448802935e00741047f Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Thu, 13 Jun 2024 20:49:08 +0000 Subject: [PATCH 22/57] a --- composer/loggers/__init__.py | 5 ++- .../loggers/remote_uploader_downloader.py | 17 ++-------- composer/utils/__init__.py | 2 ++ composer/utils/file_helpers.py | 15 +++++++++ composer/utils/remote_uploader.py | 32 ++++++++++++------- tests/trainer/test_checkpoint.py | 9 +++--- tests/utils/test_remote_uploader.py | 4 +-- 7 files changed, 48 insertions(+), 36 deletions(-) diff --git a/composer/loggers/__init__.py b/composer/loggers/__init__.py index d95ca05c36..12e94a6bb7 100644 --- a/composer/loggers/__init__.py +++ b/composer/loggers/__init__.py @@ -22,7 +22,10 @@ from composer.loggers.mosaicml_logger import MosaicMLLogger from composer.loggers.neptune_logger import NeptuneLogger from composer.loggers.progress_bar_logger import ProgressBarLogger -from composer.loggers.remote_uploader_downloader import RemoteUploaderDownloader +from composer.loggers.remote_uploader_downloader import ( + RemoteUploaderDownloader, + validate_credentials, +) from composer.loggers.slack_logger import SlackLogger from composer.loggers.tensorboard_logger import TensorboardLogger from composer.loggers.wandb_logger import WandBLogger diff --git a/composer/loggers/remote_uploader_downloader.py b/composer/loggers/remote_uploader_downloader.py index 981cc4c650..9162133edd 100644 --- a/composer/loggers/remote_uploader_downloader.py +++ b/composer/loggers/remote_uploader_downloader.py @@ -38,6 +38,7 @@ format_name_with_dist, get_file, retry, + validate_credentials, ) from composer.utils.object_store.mlflow_object_store import MLFLOW_DBFS_PATH_PREFIX @@ -359,7 +360,7 @@ def init(self, state: State, logger: Logger) -> None: retry( ObjectStoreTransientError, self.num_attempts, - )(lambda: _validate_credentials(self.remote_backend, file_name_to_test))() + )(lambda: validate_credentials(self.remote_backend, file_name_to_test))() # If the remote backend is an `MLFlowObjectStore`, the original path kwarg may have placeholders that can be # updated with information generated at runtime, i.e., the MLFlow experiment and run IDs. This information @@ -635,20 +636,6 @@ def _remote_file_name(self, remote_file_name: str): return key_name -def _validate_credentials( - remote_backend: ObjectStore, - remote_file_name_to_test: str, -) -> None: - # Validates the credentials by attempting to touch a file in the bucket - # raises an error if there was a credentials failure. - with tempfile.NamedTemporaryFile('wb') as f: - f.write(b'credentials_validated_successfully') - remote_backend.upload_object( - object_name=remote_file_name_to_test, - filename=f.name, - ) - - def _upload_worker( file_queue: Union[queue.Queue[tuple[str, str, bool]], multiprocessing.JoinableQueue[tuple[str, str, bool]]], completed_queue: Union[queue.Queue[str], multiprocessing.JoinableQueue[str]], diff --git a/composer/utils/__init__.py b/composer/utils/__init__.py index 988fd4238e..883df7fdfc 100644 --- a/composer/utils/__init__.py +++ b/composer/utils/__init__.py @@ -44,6 +44,7 @@ maybe_create_object_store_from_uri, maybe_create_remote_uploader_downloader_from_uri, parse_uri, + validate_credentials, ) from composer.utils.import_helpers import MissingConditionalImportError, import_object from composer.utils.inference import ExportFormat, Transform, export_for_inference, export_with_logger, quantize_dynamic @@ -158,4 +159,5 @@ 'MLFLOW_EXPERIMENT_ID_FORMAT_KEY', 'MLFLOW_RUN_ID_FORMAT_KEY', 'RemoteUploader', + 'validate_credentials', ] diff --git a/composer/utils/file_helpers.py b/composer/utils/file_helpers.py index 2d14cc27ea..d3e3841b21 100644 --- a/composer/utils/file_helpers.py +++ b/composer/utils/file_helpers.py @@ -49,6 +49,7 @@ 'maybe_create_object_store_from_uri', 'maybe_create_remote_uploader_downloader_from_uri', 'parse_uri', + 'validate_credentials', ] @@ -737,3 +738,17 @@ def create_symlink_file( raise ValueError('The symlink filename must end with .symlink.') with open(destination_filename, 'x') as f: f.write(existing_path) + + +def validate_credentials( + remote_backend: ObjectStore, + remote_file_name_to_test: str, +) -> None: + # Validates the credentials by attempting to touch a file in the bucket + # raises an error if there was a credentials failure. + with tempfile.NamedTemporaryFile('wb') as f: + f.write(b'credentials_validated_successfully') + remote_backend.upload_object( + object_name=remote_file_name_to_test, + filename=f.name, + ) diff --git a/composer/utils/remote_uploader.py b/composer/utils/remote_uploader.py index 0b2b9ae249..24bfd4cf5b 100644 --- a/composer/utils/remote_uploader.py +++ b/composer/utils/remote_uploader.py @@ -18,6 +18,7 @@ from composer.utils.file_helpers import ( maybe_create_object_store_from_uri, parse_uri, + validate_credentials, ) from composer.utils.object_store.mlflow_object_store import MLFLOW_DBFS_PATH_PREFIX, MLFlowObjectStore from composer.utils.object_store.object_store import ( @@ -126,20 +127,27 @@ def init(self): if not self._is_dbfs: if self.object_store is None: self.object_store = maybe_create_object_store_from_uri(self.remote_folder) - return - if not self.path.startswith(MLFLOW_DBFS_PATH_PREFIX): - if self.object_store is None: + else: + if not self.path.startswith(MLFLOW_DBFS_PATH_PREFIX): + if self.object_store is None: + self.object_store = _build_dbfs_backend(self.path) + return + if get_global_rank() == 0: + if self.object_store is None: + self.object_store = _build_dbfs_backend(self.path) + assert isinstance(self.object_store, MLFlowObjectStore) + self.path = self.object_store.get_dbfs_path(self.path) + path_list = [self.path] + broadcast_object_list(path_list, src=0) + self.path = path_list[0] + if get_global_rank() != 0: self.object_store = _build_dbfs_backend(self.path) - return + if get_global_rank() == 0: - if self.object_store is None: - self.object_store = _build_dbfs_backend(self.path) - assert isinstance(self.object_store, MLFlowObjectStore) - self.path = self.object_store.get_dbfs_path(self.path) - path_list = [self.path] - broadcast_object_list(path_list, src=0) - self.path = path_list[0] - # TODO: add valdation + retry( + ObjectStoreTransientError, + self.num_attempts, + )(lambda: validate_credentials(self.object_store, '.credentials_validated_successfully'))() def upload_file_async( self, diff --git a/tests/trainer/test_checkpoint.py b/tests/trainer/test_checkpoint.py index b767595fab..c4b020d9cd 100644 --- a/tests/trainer/test_checkpoint.py +++ b/tests/trainer/test_checkpoint.py @@ -26,12 +26,11 @@ from composer.algorithms import NoOpModel from composer.callbacks import CheckpointSaver from composer.core import Callback, Time, TimeUnit -from composer.loggers import remote_uploader_downloader from composer.metrics import MAP from composer.optim import ExponentialScheduler from composer.trainer import trainer from composer.trainer.trainer import Trainer -from composer.utils import dist, is_tar, reproducibility +from composer.utils import dist, is_tar, remote_uploader, reproducibility from composer.utils.checkpoint import ( _COMPOSER_STATES_FILENAME, PartialFilePath, @@ -372,7 +371,7 @@ def test_checkpoint_saver_properly_constructed( monkeypatch: MonkeyPatch, ): mock_validate_credentials = MagicMock() - monkeypatch.setattr(remote_uploader_downloader, '_validate_credentials', mock_validate_credentials) + monkeypatch.setattr(remote_uploader, 'validate_credentials', mock_validate_credentials) trainer = self.get_trainer(save_folder=save_folder) @@ -884,7 +883,7 @@ def test_autoresume_from_callback( def test_load_from_uri(self, load_path: str, load_object_store: Optional[ObjectStore], monkeypatch: MonkeyPatch): mock_validate_credentials = MagicMock() - monkeypatch.setattr(remote_uploader_downloader, '_validate_credentials', mock_validate_credentials) + monkeypatch.setattr(remote_uploader, 'validate_credentials', mock_validate_credentials) mock_load_checkpoint = MagicMock() monkeypatch.setattr(trainer.checkpoint, 'load_checkpoint', mock_load_checkpoint) self.get_trainer(load_path=load_path, load_object_store=load_object_store) @@ -904,7 +903,7 @@ def test_load_from_uri(self, load_path: str, load_object_store: Optional[ObjectS ) def test_other_backends_error(self, load_path: str, monkeypatch: MonkeyPatch): mock_validate_credentials = MagicMock() - monkeypatch.setattr(remote_uploader_downloader, '_validate_credentials', mock_validate_credentials) + monkeypatch.setattr(remote_uploader, 'validate_credentials', mock_validate_credentials) with pytest.raises(NotImplementedError): self.get_trainer(load_path=load_path) diff --git a/tests/utils/test_remote_uploader.py b/tests/utils/test_remote_uploader.py index a2c9abdead..787ddc6f83 100644 --- a/tests/utils/test_remote_uploader.py +++ b/tests/utils/test_remote_uploader.py @@ -20,7 +20,7 @@ class DummyObjectStore(ObjectStore): """Dummy ObjectStore implementation that is backed by a local directory.""" def __init__(self, **kwargs: Dict[str, Any]) -> None: - self.tmp_dir = self.get_tmp_dir() + self.tmp_dir = self.get_tmp_dir() self.root = self.tmp_dir.name self.sleep_sec = 0 self.dest_filename = '' @@ -42,7 +42,6 @@ def upload_object( time.sleep(self.sleep_sec) dest_filename = pathlib.Path(self.root) / object_name os.makedirs(os.path.dirname(dest_filename), exist_ok=True) - print(f"bigning debug {filename=}, {dest_filename=}") shutil.copy2(filename, dest_filename) self.dest_filename = dest_filename @@ -62,7 +61,6 @@ def download_object( shutil.copy2(object_path, filename) - def test_upload_mutliple_files(): fork_context = multiprocessing.get_context('fork') tmp_dir = tempfile.TemporaryDirectory() From c78f475daa8fcfa148f59af5c8754e7810ae603c Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Thu, 13 Jun 2024 21:30:57 +0000 Subject: [PATCH 23/57] a --- composer/callbacks/checkpoint_saver.py | 28 +++++++++++++------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/composer/callbacks/checkpoint_saver.py b/composer/callbacks/checkpoint_saver.py index 1aa926ee55..22af77df86 100644 --- a/composer/callbacks/checkpoint_saver.py +++ b/composer/callbacks/checkpoint_saver.py @@ -332,7 +332,7 @@ def __init__( backend, _, _ = parse_uri(save_folder) self.remote_uploader_futures: List[List[Future]] = [] self.symlink_file_tasks: List[tuple[str, str]] = [] - self.this_rank_saves_remote_symlinks: bool = False + self.rank_saves_remote_symlinks: bool = False self.tmp_dir_for_symlink = tempfile.TemporaryDirectory() self.num_concurrent_uploads = num_concurrent_uploads @@ -340,12 +340,12 @@ def __init__( if backend == 'wandb': raise NotImplementedError( f'There is no implementation for WandB via URI. Please use ' - 'WandBLogger with log_artifacts set to True', + 'WandBLogger with log_artifacts set to True.', ) elif backend not in ['s3', 'oci', 'gs', 'azure', 'dbfs']: raise NotImplementedError( f'There is no implementation for the cloud backend {backend} via URI. Please use ' - 'one of the supported RemoteUploaderDownloader object stores', + 'one of the supported object stores.', ) self.remote_uploader = RemoteUploader( remote_folder=save_folder, @@ -496,8 +496,8 @@ def _save_checkpoint(self, state: State, logger: Logger, wait_previous_remote_up src_path = str(pathlib.Path(saved_path).parent) else: src_path = saved_path - this_rank_saves_symlinks = dist.get_global_rank() == 0 or not state.fsdp_sharded_state_dict_enabled - if this_rank_saves_symlinks: + rank_saves_symlinks = dist.get_global_rank() == 0 or not state.fsdp_sharded_state_dict_enabled + if rank_saves_symlinks: os.symlink(os.path.relpath(src_path, os.path.dirname(symlink)), symlink) # if remote file name provided, upload the checkpoint @@ -587,9 +587,9 @@ def _save_checkpoint(self, state: State, logger: Logger, wait_previous_remote_up else: src_path = remote_file_name log.debug(f'Creating symlink file {symlink_filename} -> {src_path}') - this_rank_saves_symlinks = dist.get_global_rank() == 0 or not state.fsdp_sharded_state_dict_enabled - if this_rank_saves_symlinks: - self.this_rank_saves_remote_symlinks = True + rank_saves_symlinks = dist.get_global_rank() == 0 or not state.fsdp_sharded_state_dict_enabled + if rank_saves_symlinks: + self.rank_saves_remote_symlinks = True create_symlink_file(src_path, symlink_filename) if self.remote_uploader is not None: self.symlink_file_tasks.append((symlink_filename, symlink_name)) @@ -606,14 +606,14 @@ def _save_checkpoint(self, state: State, logger: Logger, wait_previous_remote_up self._rotate_checkpoints(sharding_enabled=state.fsdp_sharded_state_dict_enabled) def wait(self) -> None: + """Wait exsiting upload tasks to finish and start uploading symlink file if necessary.""" if self.remote_uploader is None: return # Wait remote uploader futures and start to upload the latest symlink file if necessary - if self.this_rank_saves_remote_symlinks: - if len(self.remote_uploader_futures) != len(self.symlink_file_tasks): - raise RuntimeError( - f'Expect len(remote_uploader_futures) == len(symlink_file_tasks), but got {len(self.remote_uploader_futures)} != {len(self.symlink_file_tasks)}', - ) + if self.rank_saves_remote_symlinks and len(self.remote_uploader_futures) != len(self.symlink_file_tasks): + raise RuntimeError( + f'Expect len(remote_uploader_futures) == len(symlink_file_tasks), but got {len(self.remote_uploader_futures)} != {len(self.symlink_file_tasks)}', + ) log.debug('Waiting for previous checkpoint files upload finish') for i in range(len(self.remote_uploader_futures)): for future in self.remote_uploader_futures[i]: @@ -626,7 +626,7 @@ def wait(self) -> None: if t.item() != dist.get_world_size(): raise RuntimeError(f'Some rank failed to upload checkpoint files') log.debug('All ranks finished existing checkpoint uploading tasks, starting symlink file upload if necessary') - if self.this_rank_saves_remote_symlinks and len(self.symlink_file_tasks) > 0: + if self.rank_saves_remote_symlinks and len(self.symlink_file_tasks) > 0: # Only upload the last symlink file symlink_local_filename, symlink_remote_filename = self.symlink_file_tasks[-1] self.remote_uploader.upload_file_async( From 7ecfcf31ad613728f6c07f949ab7015699832da7 Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Thu, 13 Jun 2024 21:35:26 +0000 Subject: [PATCH 24/57] a --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b0820e63d1..3b2469b935 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -88,7 +88,7 @@ reportUnusedCoroutine = "error" # Pytest [tool.pytest.ini_options] # By default, do not run gpu, vision, docs, notebook, or daily tests -addopts = "--strict-markers -m 'not gpu and not doctest and not daily and not remote'" +addopts = "--codeblocks --strict-markers -m 'not gpu and not doctest and not daily and not remote'" markers = [ # Tests that require a world_size of two should be annotated with `@pytest.mark.world_size(2)`. From 12802665b5759e32a2b3a049a32a5362db3b2824 Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Fri, 14 Jun 2024 18:52:17 +0000 Subject: [PATCH 25/57] fix doctest --- composer/loggers/__init__.py | 5 +- .../loggers/remote_uploader_downloader.py | 42 +-------- composer/trainer/trainer.py | 2 +- composer/utils/__init__.py | 2 + composer/utils/object_store/__init__.py | 2 + composer/utils/object_store/utils.py | 48 ++++++++++ composer/utils/remote_uploader.py | 92 +++++++++---------- docs/source/doctest_fixtures.py | 25 ++++- tests/trainer/test_checkpoint.py | 8 +- tests/utils/test_remote_uploader.py | 10 +- 10 files changed, 133 insertions(+), 103 deletions(-) create mode 100644 composer/utils/object_store/utils.py diff --git a/composer/loggers/__init__.py b/composer/loggers/__init__.py index 12e94a6bb7..d95ca05c36 100644 --- a/composer/loggers/__init__.py +++ b/composer/loggers/__init__.py @@ -22,10 +22,7 @@ from composer.loggers.mosaicml_logger import MosaicMLLogger from composer.loggers.neptune_logger import NeptuneLogger from composer.loggers.progress_bar_logger import ProgressBarLogger -from composer.loggers.remote_uploader_downloader import ( - RemoteUploaderDownloader, - validate_credentials, -) +from composer.loggers.remote_uploader_downloader import RemoteUploaderDownloader from composer.loggers.slack_logger import SlackLogger from composer.loggers.tensorboard_logger import TensorboardLogger from composer.loggers.wandb_logger import WandBLogger diff --git a/composer/loggers/remote_uploader_downloader.py b/composer/loggers/remote_uploader_downloader.py index 9162133edd..9378d5a8d4 100644 --- a/composer/loggers/remote_uploader_downloader.py +++ b/composer/loggers/remote_uploader_downloader.py @@ -25,15 +25,10 @@ from composer.loggers import Logger, MosaicMLLogger from composer.loggers.logger_destination import LoggerDestination from composer.utils import ( - GCSObjectStore, - LibcloudObjectStore, MLFlowObjectStore, ObjectStore, ObjectStoreTransientError, - OCIObjectStore, - S3ObjectStore, - SFTPObjectStore, - UCObjectStore, + build_remote_backend, dist, format_name_with_dist, get_file, @@ -51,37 +46,6 @@ __all__ = ['RemoteUploaderDownloader'] -def _build_remote_backend(remote_backend_name: str, backend_kwargs: dict[str, Any]): - remote_backend_cls = None - remote_backend_name_to_cls = { - 's3': S3ObjectStore, - 'oci': OCIObjectStore, - 'sftp': SFTPObjectStore, - 'libcloud': LibcloudObjectStore, - 'gs': GCSObjectStore, - } - - # Handle `dbfs` backend as a special case, since it can map to either :class:`.UCObjectStore` - # or :class:`.MLFlowObjectStore`. - if remote_backend_name == 'dbfs': - path = backend_kwargs['path'] - if path.startswith(MLFLOW_DBFS_PATH_PREFIX): - remote_backend_cls = MLFlowObjectStore - else: - # Validate if the path conforms to the requirements for UC volume paths - UCObjectStore.validate_path(path) - remote_backend_cls = UCObjectStore - else: - remote_backend_cls = remote_backend_name_to_cls.get(remote_backend_name, None) - if remote_backend_cls is None: - supported_remote_backends = list(remote_backend_name_to_cls.keys()) + ['dbfs'] - raise ValueError( - f'The remote backend {remote_backend_name} is not supported. Please use one of ({supported_remote_backends})', - ) - - return remote_backend_cls(**backend_kwargs) - - class RemoteUploaderDownloader(LoggerDestination): r"""Logger destination that uploads (downloads) files to (from) a remote backend. @@ -340,7 +304,7 @@ def __init__( def remote_backend(self) -> ObjectStore: """The :class:`.ObjectStore` instance for the main thread.""" if self._remote_backend is None: - self._remote_backend = _build_remote_backend(self.remote_backend_name, self.backend_kwargs) + self._remote_backend = build_remote_backend(self.remote_backend_name, self.backend_kwargs) return self._remote_backend def init(self, state: State, logger: Logger) -> None: @@ -650,7 +614,7 @@ def _upload_worker( The worker will continuously poll ``file_queue`` for files to upload. Once ``is_finished`` is set, the worker will exit once ``file_queue`` is empty. """ - remote_backend = _build_remote_backend(remote_backend_name, backend_kwargs) + remote_backend = build_remote_backend(remote_backend_name, backend_kwargs) while True: try: file_path_to_upload, remote_file_name, overwrite = file_queue.get(block=True, timeout=0.5) diff --git a/composer/trainer/trainer.py b/composer/trainer/trainer.py index 1c727b294e..c6b98079e6 100644 --- a/composer/trainer/trainer.py +++ b/composer/trainer/trainer.py @@ -1898,7 +1898,7 @@ def _try_checkpoint_download( get_file( path=save_latest_remote_file_name, destination=latest_checkpoint_path, - object_store=self._checkpoint_saver.remote_uploader.object_store, + object_store=self._checkpoint_saver.remote_uploader.remote_backend, overwrite=True, progress_bar=load_progress_bar, ) diff --git a/composer/utils/__init__.py b/composer/utils/__init__.py index 883df7fdfc..13e8ee5655 100644 --- a/composer/utils/__init__.py +++ b/composer/utils/__init__.py @@ -73,6 +73,7 @@ S3ObjectStore, SFTPObjectStore, UCObjectStore, + build_remote_backend, ) from composer.utils.parallelism import FSDPConfig, ParallelismConfig, TPConfig, create_fsdp_config from composer.utils.remote_uploader import RemoteUploader @@ -160,4 +161,5 @@ 'MLFLOW_RUN_ID_FORMAT_KEY', 'RemoteUploader', 'validate_credentials', + 'build_remote_backend', ] diff --git a/composer/utils/object_store/__init__.py b/composer/utils/object_store/__init__.py index 3c70257e08..6171013c2c 100644 --- a/composer/utils/object_store/__init__.py +++ b/composer/utils/object_store/__init__.py @@ -15,6 +15,7 @@ from composer.utils.object_store.s3_object_store import S3ObjectStore from composer.utils.object_store.sftp_object_store import SFTPObjectStore from composer.utils.object_store.uc_object_store import UCObjectStore +from composer.utils.object_store.utils import build_remote_backend __all__ = [ 'ObjectStore', @@ -28,4 +29,5 @@ 'UCObjectStore', 'MLFLOW_EXPERIMENT_ID_FORMAT_KEY', 'MLFLOW_RUN_ID_FORMAT_KEY', + 'build_remote_backend', ] diff --git a/composer/utils/object_store/utils.py b/composer/utils/object_store/utils.py new file mode 100644 index 0000000000..4ca8c5cf93 --- /dev/null +++ b/composer/utils/object_store/utils.py @@ -0,0 +1,48 @@ +# Copyright 2022 MosaicML Composer authors +# SPDX-License-Identifier: Apache-2.0 + +"""Helpers for working with object stores.""" + +from typing import Any + +from composer.utils.object_store.gcs_object_store import GCSObjectStore +from composer.utils.object_store.libcloud_object_store import LibcloudObjectStore +from composer.utils.object_store.mlflow_object_store import MLFLOW_DBFS_PATH_PREFIX, MLFlowObjectStore +from composer.utils.object_store.oci_object_store import OCIObjectStore +from composer.utils.object_store.s3_object_store import S3ObjectStore +from composer.utils.object_store.sftp_object_store import SFTPObjectStore +from composer.utils.object_store.uc_object_store import UCObjectStore + +__all__ = ['build_remote_backend'] + + +def build_remote_backend(remote_backend_name: str, backend_kwargs: dict[str, Any]): + """Build object store given the backend name and kwargs.""" + remote_backend_cls = None + remote_backend_name_to_cls = { + 's3': S3ObjectStore, + 'oci': OCIObjectStore, + 'sftp': SFTPObjectStore, + 'libcloud': LibcloudObjectStore, + 'gs': GCSObjectStore, + } + + # Handle `dbfs` backend as a special case, since it can map to either :class:`.UCObjectStore` + # or :class:`.MLFlowObjectStore`. + if remote_backend_name == 'dbfs': + path = backend_kwargs['path'] + if path.startswith(MLFLOW_DBFS_PATH_PREFIX): + remote_backend_cls = MLFlowObjectStore + else: + # Validate if the path conforms to the requirements for UC volume paths + UCObjectStore.validate_path(path) + remote_backend_cls = UCObjectStore + else: + remote_backend_cls = remote_backend_name_to_cls.get(remote_backend_name, None) + if remote_backend_cls is None: + supported_remote_backends = list(remote_backend_name_to_cls.keys()) + ['dbfs'] + raise ValueError( + f'The remote backend {remote_backend_name} is not supported. Please use one of ({supported_remote_backends})', + ) + + return remote_backend_cls(**backend_kwargs) diff --git a/composer/utils/remote_uploader.py b/composer/utils/remote_uploader.py index 24bfd4cf5b..3532ba1450 100644 --- a/composer/utils/remote_uploader.py +++ b/composer/utils/remote_uploader.py @@ -12,11 +12,10 @@ import time import uuid from concurrent.futures import Future, ProcessPoolExecutor -from typing import List, Optional +from typing import Any, Optional from composer.utils.dist import broadcast_object_list, get_global_rank, get_local_rank from composer.utils.file_helpers import ( - maybe_create_object_store_from_uri, parse_uri, validate_credentials, ) @@ -25,7 +24,7 @@ ObjectStore, ObjectStoreTransientError, ) -from composer.utils.object_store.uc_object_store import UCObjectStore +from composer.utils.object_store.utils import build_remote_backend from composer.utils.retrying import retry log = logging.getLogger(__name__) @@ -33,28 +32,15 @@ __all__ = ['RemoteUploader'] -def _build_dbfs_backend(path: str) -> ObjectStore: - if path.startswith(MLFLOW_DBFS_PATH_PREFIX): - return MLFlowObjectStore(path=path) - UCObjectStore.validate_path(path) - return UCObjectStore(path=path) - - def _upload_file_to_object_store( - remote_folder: str, - is_dbfs: bool, - dbfs_path: str, + remote_backend_name: str, + backend_kwargs: dict[str, Any], remote_file_name: str, local_file_path: str, overwrite: bool, num_attempts: int, ) -> int: - if is_dbfs: - object_store: ObjectStore = _build_dbfs_backend(dbfs_path) - else: - object_store: ObjectStore = maybe_create_object_store_from_uri( - remote_folder, - ) # pyright: ignore[reportGeneralTypeIssues] + object_store = build_remote_backend(remote_backend_name, backend_kwargs) @retry(ObjectStoreTransientError, num_attempts=num_attempts) def upload_file(retry_index: int = 0): @@ -91,6 +77,7 @@ class RemoteUploader: def __init__( self, remote_folder: str, + backend_kwargs: Optional[dict[str, Any]] = None, num_concurrent_uploads: int = 2, num_attempts: int = 3, ): @@ -103,14 +90,26 @@ def __init__( # A folder to use for staging uploads self._tempdir = tempfile.TemporaryDirectory() self._upload_staging_folder = self._tempdir.name - backend, _, self.path = parse_uri(remote_folder) - - # Need some special handling for dbfs path - self._is_dbfs = backend == 'dbfs' - self.object_store: Optional[ObjectStore] = None + self.remote_backend_name, self.remote_bucket_name, self.path = parse_uri(remote_folder) + + self.backend_kwargs: dict[str, Any] = backend_kwargs if backend_kwargs is not None else {} + if self.remote_backend_name in ['s3', 'oci', 'gs'] and 'bucket' not in self.backend_kwargs: + self.backend_kwargs['bucket'] = self.remote_bucket_name + elif self.remote_backend_name == 'libcloud' and 'container' not in self.backend_kwargs: + self.backend_kwargs['container'] = self.remote_bucket_name + elif self.remote_backend_name == 'azure': + self.remote_backend_name = 'libcloud' + self.backend_kwargs = { + 'provider': 'AZURE_BLOBS', + 'container': self.remote_bucket_name, + 'key_environ': 'AZURE_ACCOUNT_NAME', + 'secret_environ': 'AZURE_ACCOUNT_ACCESS_KEY', + } + elif self.remote_backend_name == 'dbfs': + self.backend_kwargs['path'] = self.path self.num_attempts = num_attempts - + self._remote_backend: Optional[ObjectStore] = None self.executor = ProcessPoolExecutor( max_workers=num_concurrent_uploads, mp_context=multiprocessing.get_context('spawn'), @@ -119,35 +118,31 @@ def __init__( # Used internally to track the future status. # If a future completed successfully, we'll remove it from this list # when check_workers() or wait() is called - self.futures: List[Future] = [] + self.futures: list[Future] = [] + + @property + def remote_backend(self) -> ObjectStore: + if self._remote_backend is None: + self._remote_backend = build_remote_backend(self.remote_backend_name, self.backend_kwargs) + return self._remote_backend def init(self): # If it's dbfs path like: dbfs:/databricks/mlflow-tracking/{mlflow_experiment_id}/{mlflow_run_id}/ # We need to fill out the experiment_id and run_id - if not self._is_dbfs: - if self.object_store is None: - self.object_store = maybe_create_object_store_from_uri(self.remote_folder) - else: - if not self.path.startswith(MLFLOW_DBFS_PATH_PREFIX): - if self.object_store is None: - self.object_store = _build_dbfs_backend(self.path) - return - if get_global_rank() == 0: - if self.object_store is None: - self.object_store = _build_dbfs_backend(self.path) - assert isinstance(self.object_store, MLFlowObjectStore) - self.path = self.object_store.get_dbfs_path(self.path) - path_list = [self.path] - broadcast_object_list(path_list, src=0) - self.path = path_list[0] - if get_global_rank() != 0: - self.object_store = _build_dbfs_backend(self.path) if get_global_rank() == 0: retry( ObjectStoreTransientError, self.num_attempts, - )(lambda: validate_credentials(self.object_store, '.credentials_validated_successfully'))() + )(lambda: validate_credentials(self.remote_backend, '.credentials_validated_successfully'))() + if self.path.startswith(MLFLOW_DBFS_PATH_PREFIX): + if get_global_rank() == 0: + assert isinstance(self.remote_backend, MLFlowObjectStore) + self.path = self.remote_backend.get_dbfs_path(self.path) + path_list = [self.path] + broadcast_object_list(path_list, src=0) + self.path = path_list[0] + self.backend_kwargs['path'] = self.path def upload_file_async( self, @@ -168,9 +163,8 @@ def upload_file_async( # Async upload file future = self.executor.submit( _upload_file_to_object_store, - is_dbfs=self._is_dbfs, - dbfs_path=self.path, - remote_folder=self.remote_folder, + remote_backend_name=self.remote_backend_name, + backend_kwargs=self.backend_kwargs, remote_file_name=remote_file_name, local_file_path=copied_path, overwrite=overwrite, @@ -186,7 +180,7 @@ def check_workers(self): 1. if it completed with exception, raise that exception 2. if it completed without exception, remove it from self.futures """ - done_futures: List[Future] = [] + done_futures: list[Future] = [] for future in self.futures: if future.done(): # future.exception is a blocking call diff --git a/docs/source/doctest_fixtures.py b/docs/source/doctest_fixtures.py index 553d8d9b60..f54d1f69e1 100644 --- a/docs/source/doctest_fixtures.py +++ b/docs/source/doctest_fixtures.py @@ -54,7 +54,7 @@ from composer.loggers import RemoteUploaderDownloader from composer.models import ComposerModel as ComposerModel from composer.optim.scheduler import ConstantScheduler -from composer.utils import LibcloudObjectStore +from composer.utils import LibcloudObjectStore, RemoteUploader from composer.utils import ensure_tuple as ensure_tuple try: @@ -246,6 +246,29 @@ def _new_RemoteUploaderDownloader_init(self, fake_ellipses: None = None, **kwarg RemoteUploaderDownloader.__init__ = _new_RemoteUploaderDownloader_init # type: ignore +# Patch RemoteUploader __init__ function to replace arguments while preserving type +_original_RemoteUploader_init = RemoteUploader.__init__ + + +def _new_RemoteUploader_init(self, fake_ellipses: None = None, **kwargs: Any): + os.makedirs('./object_store', exist_ok=True) + kwargs.update( + num_concurrent_uploads=1, + remote_folder='libcloud://.', + backend_kwargs={ + 'provider': 'local', + 'container': '.', + 'provider_kwargs': { + 'key': os.path.abspath('./object_store'), + }, + }, + num_attempts=1, + ) + _original_RemoteUploader_init(self, **kwargs) + + +RemoteUploader.__init__ = _new_RemoteUploader_init + # Patch ObjectStore __init__ function to replace arguments while preserving type _original_libcloudObjectStore_init = LibcloudObjectStore.__init__ diff --git a/tests/trainer/test_checkpoint.py b/tests/trainer/test_checkpoint.py index c4b020d9cd..c33971329a 100644 --- a/tests/trainer/test_checkpoint.py +++ b/tests/trainer/test_checkpoint.py @@ -640,7 +640,7 @@ class _AlwaysFailDummyObjectStore(DummyObjectStore): def upload_object(self, object_name, filename, callback=None): # Only allows to upload symlink to simulate # the situation that checkpoint file uploading fails - if 'symlink' in object_name: + if 'symlink' in object_name or 'credentials_validated_successfully' in object_name: return super().upload_object(object_name, filename, callback) raise RuntimeError('Raise Error intentionally') @@ -649,7 +649,7 @@ def upload_object(self, object_name, filename, callback=None): else: MockObjectStore = _AlwaysFailDummyObjectStore - with patch('composer.utils.file_helpers.S3ObjectStore', MockObjectStore): + with patch('composer.utils.object_store.utils.S3ObjectStore', MockObjectStore): with patch('tests.utils.test_remote_uploader.DummyObjectStore.get_tmp_dir', _get_tmp_dir): with patch('composer.utils.remote_uploader.multiprocessing.get_context', lambda _: fork_context): train_dataset = RandomClassificationDataset(size=10) @@ -788,7 +788,7 @@ def test_autoresume( def _get_tmp_dir(self): return tmp_dir - with patch('composer.utils.file_helpers.S3ObjectStore', DummyObjectStore): + with patch('composer.utils.object_store.utils.S3ObjectStore', DummyObjectStore): with patch('tests.utils.test_remote_uploader.DummyObjectStore.get_tmp_dir', _get_tmp_dir): with patch('composer.utils.remote_uploader.multiprocessing.get_context', lambda _: fork_context): @@ -1225,7 +1225,7 @@ def test_load_weights_object_store(self, tmp_path): def _get_tmp_dir(self): return tmp_dir - with patch('composer.utils.file_helpers.S3ObjectStore', DummyObjectStore): + with patch('composer.utils.object_store.utils.S3ObjectStore', DummyObjectStore): with patch('tests.utils.test_remote_uploader.DummyObjectStore.get_tmp_dir', _get_tmp_dir): with patch('composer.utils.remote_uploader.multiprocessing.get_context', lambda _: fork_context): save_folder = 's3://my_bucket/{run_name}/checkpoints' diff --git a/tests/utils/test_remote_uploader.py b/tests/utils/test_remote_uploader.py index 787ddc6f83..a375250c12 100644 --- a/tests/utils/test_remote_uploader.py +++ b/tests/utils/test_remote_uploader.py @@ -68,7 +68,7 @@ def test_upload_mutliple_files(): def _get_tmp_dir(): return tmp_dir - with patch('composer.utils.file_helpers.S3ObjectStore', DummyObjectStore): + with patch('composer.utils.object_store.utils.S3ObjectStore', DummyObjectStore): with patch('tempfile.TemporaryDirectory', _get_tmp_dir): with patch('composer.utils.remote_uploader.multiprocessing.get_context', lambda _: fork_context): remote_uploader = RemoteUploader( @@ -113,7 +113,7 @@ def _get_tmp_dir(): return remote_tmp_dir fork_context = multiprocessing.get_context('fork') - with patch('composer.utils.file_helpers.S3ObjectStore', DummyObjectStore): + with patch('composer.utils.object_store.utils.S3ObjectStore', DummyObjectStore): with patch('tempfile.TemporaryDirectory', _get_tmp_dir): with patch('composer.utils.remote_uploader.multiprocessing.get_context', lambda _: fork_context): remote_uploader = RemoteUploader(remote_folder='S3://whatever/path',) @@ -159,7 +159,7 @@ def raise_error(self): return True fork_context = multiprocessing.get_context('fork') - with patch('composer.utils.file_helpers.S3ObjectStore', AlwaysFailDummyObjectStore): + with patch('composer.utils.object_store.utils.S3ObjectStore', AlwaysFailDummyObjectStore): with patch('composer.utils.remote_uploader.multiprocessing.get_context', lambda _: fork_context): remote_uploader = RemoteUploader(remote_folder='S3://whatever/path',) tmp_dir = tempfile.TemporaryDirectory() @@ -182,7 +182,7 @@ def raise_error(self): def test_wait(): fork_context = multiprocessing.get_context('fork') - with patch('composer.utils.file_helpers.S3ObjectStore', DummyObjectStore): + with patch('composer.utils.object_store.utils.S3ObjectStore', DummyObjectStore): with patch('composer.utils.remote_uploader.multiprocessing.get_context', lambda _: fork_context): remote_uploader = RemoteUploader( remote_folder='S3://whatever/path', @@ -211,7 +211,7 @@ def test_wait(): def test_wait_and_close(): fork_context = multiprocessing.get_context('fork') - with patch('composer.utils.file_helpers.S3ObjectStore', DummyObjectStore): + with patch('composer.utils.object_store.utils.S3ObjectStore', DummyObjectStore): with patch('composer.utils.remote_uploader.multiprocessing.get_context', lambda _: fork_context): remote_uploader = RemoteUploader( remote_folder='S3://whatever/path', From c0cb94d4cfa75f59e9806a3894ef19f10b21cc6e Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Fri, 14 Jun 2024 19:30:30 +0000 Subject: [PATCH 26/57] a --- composer/trainer/trainer.py | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/composer/trainer/trainer.py b/composer/trainer/trainer.py index c6b98079e6..8967e5e33c 100644 --- a/composer/trainer/trainer.py +++ b/composer/trainer/trainer.py @@ -1885,26 +1885,31 @@ def _try_checkpoint_download( self, latest_checkpoint_path: str, save_latest_remote_file_name: str, + loggers: Sequence[Union[LoggerDestination, ObjectStore]], load_progress_bar: bool, ) -> None: """Attempts to download the checkpoint from the logger destinations.""" log.debug( f'Trying to download {save_latest_remote_file_name} to {latest_checkpoint_path} on rank {dist.get_global_rank()}', ) - if self._checkpoint_saver is None or self._checkpoint_saver.remote_uploader is None: - log.debug(f'Skip downloading from remote since no remote object_store found') - return - try: - get_file( - path=save_latest_remote_file_name, - destination=latest_checkpoint_path, - object_store=self._checkpoint_saver.remote_uploader.remote_backend, - overwrite=True, - progress_bar=load_progress_bar, - ) - except (FileNotFoundError): - log.info(f'Checkpoint not found in remote object store') - pass + remote_destination = list(loggers) + if self._checkpoint_saver is not None and self._checkpoint_saver.remote_uploader is not None: + remote_destination.append(self._checkpoint_saver.remote_uploader.remote_backend) + for logger in remote_destination: + try: + # Fetch from logger. If it succeeds, stop trying the rest of the loggers + get_file( + path=save_latest_remote_file_name, + destination=latest_checkpoint_path, + object_store=logger, + overwrite=True, + progress_bar=load_progress_bar, + ) + break + except (NotImplementedError, FileNotFoundError): + log.info(f'Checkpoint not found in: {logger}') + # Ignore errors caused by no checkpoint saved with logger + pass def _get_autoresume_checkpoint( self, @@ -1940,6 +1945,7 @@ def _get_autoresume_checkpoint( self._try_checkpoint_download( latest_checkpoint_path, save_latest_remote_file_name, + loggers, load_progress_bar, ) @@ -1974,6 +1980,7 @@ def _get_autoresume_checkpoint( self._try_checkpoint_download( latest_checkpoint_path, save_latest_remote_file_name, + loggers, load_progress_bar, ) From 95fca9ffad5f9ad8a129eb13878a05e61d5fecf7 Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Fri, 14 Jun 2024 20:07:36 +0000 Subject: [PATCH 27/57] fix test and lint --- composer/utils/file_helpers.py | 1 + tests/loggers/test_remote_uploader_downloader.py | 16 ++++++++-------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/composer/utils/file_helpers.py b/composer/utils/file_helpers.py index d3e3841b21..18d67a33ff 100644 --- a/composer/utils/file_helpers.py +++ b/composer/utils/file_helpers.py @@ -744,6 +744,7 @@ def validate_credentials( remote_backend: ObjectStore, remote_file_name_to_test: str, ) -> None: + """Upload a tiny text file to test if the credentials are setup correctly.""" # Validates the credentials by attempting to touch a file in the bucket # raises an error if there was a credentials failure. with tempfile.NamedTemporaryFile('wb') as f: diff --git a/tests/loggers/test_remote_uploader_downloader.py b/tests/loggers/test_remote_uploader_downloader.py index 1f877d2dd9..b25e23a717 100644 --- a/tests/loggers/test_remote_uploader_downloader.py +++ b/tests/loggers/test_remote_uploader_downloader.py @@ -77,7 +77,7 @@ def object_store_test_helper( # Patching does not work when using multiprocessing with spawn, so we also # patch to use fork fork_context = multiprocessing.get_context('fork') - with patch('composer.loggers.remote_uploader_downloader.S3ObjectStore', DummyObjectStore): + with patch('composer.utils.object_store.utils.S3ObjectStore', DummyObjectStore): with patch('composer.loggers.remote_uploader_downloader.multiprocessing.get_context', lambda _: fork_context): remote_uploader_downloader = RemoteUploaderDownloader( bucket_uri='s3://{remote_dir}', @@ -227,7 +227,7 @@ def get_object_size(self, object_name: str) -> int: return super().get_object_size(object_name) fork_context = multiprocessing.get_context('fork') - with patch('composer.loggers.remote_uploader_downloader.S3ObjectStore', RetryDummyObjectStore): + with patch('composer.utils.object_store.utils.S3ObjectStore', RetryDummyObjectStore): with patch('composer.loggers.remote_uploader_downloader.multiprocessing.get_context', lambda _: fork_context): remote_uploader_downloader = RemoteUploaderDownloader( bucket_uri=f"s3://{tmp_path}/'object_store_backend", @@ -263,7 +263,7 @@ def test_race_with_overwrite(tmp_path: pathlib.Path, use_procs: bool, dummy_stat # Patching does not work when using multiprocessing with spawn, so we also # patch to use fork fork_context = multiprocessing.get_context('fork') - with patch('composer.loggers.remote_uploader_downloader.S3ObjectStore', DummyObjectStore): + with patch('composer.utils.object_store.utils.S3ObjectStore', DummyObjectStore): with patch('composer.loggers.remote_uploader_downloader.multiprocessing.get_context', lambda _: fork_context): # Create the object store logger remote_uploader_downloader = RemoteUploaderDownloader( @@ -307,7 +307,7 @@ def test_race_with_overwrite(tmp_path: pathlib.Path, use_procs: bool, dummy_stat def test_close_on_failure(tmp_path: pathlib.Path, dummy_state: State): """Test that .close() and .post_close() does not hang even when a worker crashes.""" - with patch('composer.loggers.remote_uploader_downloader.S3ObjectStore', DummyObjectStore): + with patch('composer.utils.object_store.utils.S3ObjectStore', DummyObjectStore): # Create the object store logger remote_uploader_downloader = RemoteUploaderDownloader( bucket_uri=f"s3://{tmp_path}/'object_store_backend", @@ -355,9 +355,9 @@ def test_close_on_failure(tmp_path: pathlib.Path, dummy_state: State): def test_valid_backend_names(): valid_backend_names = ['s3', 'libcloud', 'sftp'] - with patch('composer.loggers.remote_uploader_downloader.S3ObjectStore') as _, \ - patch('composer.loggers.remote_uploader_downloader.SFTPObjectStore') as _, \ - patch('composer.loggers.remote_uploader_downloader.LibcloudObjectStore') as _: + with patch('composer.utils.object_store.utils.S3ObjectStore') as _, \ + patch('composer.utils.object_store.utils.SFTPObjectStore') as _, \ + patch('composer.utils.object_store.utils.LibcloudObjectStore') as _: for name in valid_backend_names: remote_uploader_downloader = RemoteUploaderDownloader(bucket_uri=f'{name}://not-a-real-bucket') # Access the remote_backend property so that it is built @@ -374,7 +374,7 @@ def test_valid_backend_names(): def test_exception_queue_works(tmp_path: pathlib.Path, dummy_state: State): """Test that exceptions get put on the exception queue and get thrown""" - with patch('composer.loggers.remote_uploader_downloader.S3ObjectStore', DummyObjectStore): + with patch('composer.utils.object_store.utils.S3ObjectStore', DummyObjectStore): # Create the object store logger remote_uploader_downloader = RemoteUploaderDownloader( bucket_uri=f"s3://{tmp_path}/'object_store_backend", From 2c77da9deab473d29e17e283fed9a8f7f84fdd40 Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Fri, 14 Jun 2024 22:21:47 +0000 Subject: [PATCH 28/57] up --- composer/callbacks/checkpoint_saver.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/composer/callbacks/checkpoint_saver.py b/composer/callbacks/checkpoint_saver.py index 22af77df86..1a32b5be63 100644 --- a/composer/callbacks/checkpoint_saver.py +++ b/composer/callbacks/checkpoint_saver.py @@ -469,7 +469,8 @@ def _save_checkpoint(self, state: State, logger: Logger, wait_previous_remote_up # Wait the previous upload tasks on all ranks # self.wait() has dist.barrier, so it needs to be called # on all ranks before any early return - if wait_previous_remote_upload_tasks and self.count / self.num_concurrent_uploads == 0: + #if wait_previous_remote_upload_tasks and self.count / self.num_concurrent_uploads == 0: + if wait_previous_remote_upload_tasks: self.wait() self.count += 1 From ca46b4fe161f7f5528c050f498fb52b7e2ae61b0 Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Fri, 14 Jun 2024 22:42:54 +0000 Subject: [PATCH 29/57] a --- composer/callbacks/checkpoint_saver.py | 1 - 1 file changed, 1 deletion(-) diff --git a/composer/callbacks/checkpoint_saver.py b/composer/callbacks/checkpoint_saver.py index 1a32b5be63..8cc2b8a879 100644 --- a/composer/callbacks/checkpoint_saver.py +++ b/composer/callbacks/checkpoint_saver.py @@ -469,7 +469,6 @@ def _save_checkpoint(self, state: State, logger: Logger, wait_previous_remote_up # Wait the previous upload tasks on all ranks # self.wait() has dist.barrier, so it needs to be called # on all ranks before any early return - #if wait_previous_remote_upload_tasks and self.count / self.num_concurrent_uploads == 0: if wait_previous_remote_upload_tasks: self.wait() From 4f3108c2166e667e9bcc6eb203f0e0e2dbd489b1 Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Fri, 14 Jun 2024 23:27:14 +0000 Subject: [PATCH 30/57] a --- composer/callbacks/checkpoint_saver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/composer/callbacks/checkpoint_saver.py b/composer/callbacks/checkpoint_saver.py index 8cc2b8a879..a91ac84252 100644 --- a/composer/callbacks/checkpoint_saver.py +++ b/composer/callbacks/checkpoint_saver.py @@ -294,7 +294,7 @@ def __init__( weights_only: bool = False, ignore_keys: Optional[Union[list[str], Callable[[dict], None]]] = None, save_folder: str = '', - num_concurrent_uploads: int = 2, + num_concurrent_uploads: int = 1, ): folder = str(folder) filename = str(filename) From f415d600e9017565def9b94469c28ad4b176b8b0 Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Tue, 18 Jun 2024 20:53:49 +0000 Subject: [PATCH 31/57] a --- composer/callbacks/checkpoint_saver.py | 154 +++++++++++++++---------- tests/trainer/test_checkpoint.py | 61 +++++----- 2 files changed, 125 insertions(+), 90 deletions(-) diff --git a/composer/callbacks/checkpoint_saver.py b/composer/callbacks/checkpoint_saver.py index a91ac84252..ae7d94738f 100644 --- a/composer/callbacks/checkpoint_saver.py +++ b/composer/callbacks/checkpoint_saver.py @@ -6,6 +6,7 @@ from __future__ import annotations import logging +import multiprocessing import os import pathlib import shutil @@ -13,7 +14,9 @@ import textwrap from concurrent.futures import Future from pathlib import Path -from typing import Any, Callable, List, Optional, Union +from typing import Any, Callable, Optional, Union +from concurrent.futures import Future, ProcessPoolExecutor +import time import torch @@ -34,6 +37,9 @@ is_model_deepspeed, parse_uri, partial_format, + build_remote_backend, + ObjectStoreTransientError, + retry, ) from composer.utils.compression import get_compressor, is_compressed_pt from composer.utils.object_store.mlflow_object_store import MLFLOW_EXPERIMENT_ID_FORMAT_KEY, MLFLOW_RUN_ID_FORMAT_KEY @@ -44,6 +50,44 @@ _TORCH_DISTRIBUTED_CHECKPOINTS_METADATA_FILENAME = '.metadata' +def _upload_symlink_file( + remote_backend_name: str, + backend_kwargs: dist[str, Any], + remote_symlink_file_name: str, + local_symlink_file_name: str, + num_attempts: int, + remote_checkpoint_file_names: list[str], + max_wait_time_in_seconds: int = 3600, +): + """Wait the checkpoint file uploading to finish and start symlink file uploading.""" + + start_time = time.time() + object_store = build_remote_backend(remote_backend_name, backend_kwargs) + + for remote_file_name in remote_checkpoint_file_names: + while True: + try: + object_store.get_object_size(remote_file_name) + break + except Exception as e: + if not isinstance(e, FileNotFoundError): + log.debug(f'Got exception {type(e)}: {str(e)} when accessing remote file {remote_file_name}') + time.sleep(30) + if time.time() - start_time > max_wait_time_in_seconds: + raise RuntimeError(f'Checkpoint file {remote_file_name} uploading not finished after {max_wait_time_in_seconds} seconds') + + log.info(f'Uploading symlink file {remote_symlink_file_name}') + + @retry(ObjectStoreTransientError, num_attempts=num_attempts) + def upload_file(): + object_store.upload_object( + object_name = remote_symlink_file_name, + filename=local_symlink_file_name, + ) + upload_file() + log.info(f'Finished uploading symlink file {remote_symlink_file_name}') + return 0 + class CheckpointSaver(Callback): # noqa: D101 __doc__ = f"""Callback to save checkpoints. @@ -295,6 +339,7 @@ def __init__( ignore_keys: Optional[Union[list[str], Callable[[dict], None]]] = None, save_folder: str = '', num_concurrent_uploads: int = 1, + upload_timeout_in_seconds: int = 3600, ): folder = str(folder) filename = str(filename) @@ -330,11 +375,13 @@ def __init__( self.remote_uploader = None backend, _, _ = parse_uri(save_folder) - self.remote_uploader_futures: List[List[Future]] = [] - self.symlink_file_tasks: List[tuple[str, str]] = [] + self.symlink_file_tasks: list[tuple[str, str]] = [] self.rank_saves_remote_symlinks: bool = False self.tmp_dir_for_symlink = tempfile.TemporaryDirectory() self.num_concurrent_uploads = num_concurrent_uploads + self.symlink_upload_executor = None + self.symlink_upload_futures = [] + self.upload_timeout_in_seconds = upload_timeout_in_seconds if backend != '': if backend == 'wandb': @@ -351,6 +398,11 @@ def __init__( remote_folder=save_folder, num_concurrent_uploads=self.num_concurrent_uploads, ) + self.symlink_upload_executor = ProcessPoolExecutor( + max_workers=1, + mp_context=multiprocessing.get_context('spawn'), + ) + self.count = 0 def init(self, state: State, logger: Logger) -> None: @@ -444,7 +496,7 @@ def load_state_dict(self, state: dict[str, Any]): load_timestamp.load_state_dict(timestamp_state) self.all_saved_checkpoints_to_timestamp[save_filename] = load_timestamp - def _save_checkpoint(self, state: State, logger: Logger, wait_previous_remote_upload_tasks: bool = True): + def _save_checkpoint(self, state: State, logger: Logger): self.last_checkpoint_batch = state.timestamp.batch is_deepspeed = is_model_deepspeed(state.model) @@ -466,14 +518,16 @@ def _save_checkpoint(self, state: State, logger: Logger, wait_previous_remote_up ) log.debug(f'Checkpoint locally saved to {saved_path}') - # Wait the previous upload tasks on all ranks - # self.wait() has dist.barrier, so it needs to be called - # on all ranks before any early return - if wait_previous_remote_upload_tasks: - self.wait() - self.count += 1 + local_remote_filenames = [] + all_remote_filenames = [] + + need_all_gather_remote_filenames = self.remote_file_name is not None and self.remote_uploader is not None + if not saved_path: # not all ranks save + # todo: all gather + if self.remote_file_name is not None and self.remote_uploader is not None: + all_remote_filenames = dist.all_gather_object(local_remote_filenames) return metadata_local_file_path = None @@ -502,8 +556,6 @@ def _save_checkpoint(self, state: State, logger: Logger, wait_previous_remote_up # if remote file name provided, upload the checkpoint if self.remote_file_name is not None: - - futures: List[Future] = [] if state.fsdp_sharded_state_dict_enabled: remote_file_name = self.remote_file_name.format( state, @@ -526,13 +578,12 @@ def _save_checkpoint(self, state: State, logger: Logger, wait_previous_remote_up ) assert metadata_local_file_path is not None if self.remote_uploader is not None: - futures.append( - self.remote_uploader.upload_file_async( - remote_file_name=metadata_remote_file_name, - file_path=pathlib.Path(metadata_local_file_path), - overwrite=self.overwrite, - ), + self.remote_uploader.upload_file_async( + remote_file_name=metadata_remote_file_name, + file_path=pathlib.Path(metadata_local_file_path), + overwrite=self.overwrite, ) + local_remote_filenames.append(metadata_remote_file_name) else: logger.upload_file( remote_file_name=metadata_remote_file_name, @@ -548,13 +599,12 @@ def _save_checkpoint(self, state: State, logger: Logger, wait_previous_remote_up log.debug(f'Uploading checkpoint to {remote_file_name}') try: if self.remote_uploader is not None: - futures.append( - self.remote_uploader.upload_file_async( - remote_file_name=remote_file_name, - file_path=pathlib.Path(saved_path), - overwrite=self.overwrite, - ), + self.remote_uploader.upload_file_async( + remote_file_name=remote_file_name, + file_path=pathlib.Path(saved_path), + overwrite=self.overwrite, ) + local_remote_filenames.append(remote_file_name) else: logger.upload_file( remote_file_name=remote_file_name, @@ -567,7 +617,8 @@ def _save_checkpoint(self, state: State, logger: Logger, wait_previous_remote_up ) from e if self.remote_uploader is not None: - self.remote_uploader_futures.append(futures) + all_remote_filenames = dist.all_gather_object(local_remote_filenames) + # symlinks stay the same with sharded checkpointing if self.latest_remote_file_name is not None: @@ -592,7 +643,21 @@ def _save_checkpoint(self, state: State, logger: Logger, wait_previous_remote_up self.rank_saves_remote_symlinks = True create_symlink_file(src_path, symlink_filename) if self.remote_uploader is not None: - self.symlink_file_tasks.append((symlink_filename, symlink_name)) + remote_checkpoint_file_names = [] + for file_names in all_remote_filenames: + remote_checkpoint_file_names += file_names + self.symlink_upload_futures.append( + self.symlink_upload_executor.submit( + _upload_symlink_file, + remote_backend_name=self.remote_uploader.remote_backend_name, + backend_kwargs=self.remote_uploader.backend_kwargs, + remote_symlink_file_name=symlink_name, + local_symlink_file_name=symlink_filename, + num_attempts=3, + remote_checkpoint_file_names=remote_checkpoint_file_names, + max_wait_time_in_seconds=self.upload_timeout_in_seconds, + ), + ) else: logger.upload_file( remote_file_name=symlink_name, @@ -605,37 +670,6 @@ def _save_checkpoint(self, state: State, logger: Logger, wait_previous_remote_up if self.num_checkpoints_to_keep >= 0: self._rotate_checkpoints(sharding_enabled=state.fsdp_sharded_state_dict_enabled) - def wait(self) -> None: - """Wait exsiting upload tasks to finish and start uploading symlink file if necessary.""" - if self.remote_uploader is None: - return - # Wait remote uploader futures and start to upload the latest symlink file if necessary - if self.rank_saves_remote_symlinks and len(self.remote_uploader_futures) != len(self.symlink_file_tasks): - raise RuntimeError( - f'Expect len(remote_uploader_futures) == len(symlink_file_tasks), but got {len(self.remote_uploader_futures)} != {len(self.symlink_file_tasks)}', - ) - log.debug('Waiting for previous checkpoint files upload finish') - for i in range(len(self.remote_uploader_futures)): - for future in self.remote_uploader_futures[i]: - future.result() - log.debug(f'Current rank finished existing uploading tasks') - self.remote_uploader_futures = [] - - t = dist.get_device(None).tensor_to_device(torch.tensor(1)) - dist.all_reduce(t) - if t.item() != dist.get_world_size(): - raise RuntimeError(f'Some rank failed to upload checkpoint files') - log.debug('All ranks finished existing checkpoint uploading tasks, starting symlink file upload if necessary') - if self.rank_saves_remote_symlinks and len(self.symlink_file_tasks) > 0: - # Only upload the last symlink file - symlink_local_filename, symlink_remote_filename = self.symlink_file_tasks[-1] - self.remote_uploader.upload_file_async( - remote_file_name=symlink_remote_filename, - file_path=pathlib.Path(symlink_local_filename), - overwrite=True, - ) - self.symlink_file_tasks = [] - def _rotate_checkpoints(self, sharding_enabled: bool = False): while len(self.saved_checkpoints) > self.num_checkpoints_to_keep: prefix_dir = None @@ -655,12 +689,12 @@ def batch_end(self, state: State, logger: Logger) -> None: def fit_end(self, state: State, logger: Logger) -> None: del state, logger # unused if self.remote_uploader is not None: - self.wait() self.remote_uploader.wait() + for f in self.symlink_upload_futures: + f.result() def post_close(self): if self.remote_uploader is not None: - # Wait the uploading tasks to finish and start symlink file uploading - self.wait() # Wait the symlink file upload to finish and close remote uploader self.remote_uploader.wait_and_close() + self.symlink_upload_executor.shutdown(wait=True) diff --git a/tests/trainer/test_checkpoint.py b/tests/trainer/test_checkpoint.py index c33971329a..da51123467 100644 --- a/tests/trainer/test_checkpoint.py +++ b/tests/trainer/test_checkpoint.py @@ -652,38 +652,39 @@ def upload_object(self, object_name, filename, callback=None): with patch('composer.utils.object_store.utils.S3ObjectStore', MockObjectStore): with patch('tests.utils.test_remote_uploader.DummyObjectStore.get_tmp_dir', _get_tmp_dir): with patch('composer.utils.remote_uploader.multiprocessing.get_context', lambda _: fork_context): - train_dataset = RandomClassificationDataset(size=10) - train_dataloader = DataLoader( - dataset=train_dataset, - batch_size=2, - sampler=dist.get_sampler(train_dataset), - ) - - trainer = Trainer( - model=SimpleModel(), - train_dataloader=train_dataloader, - save_interval='1ba', - max_duration='1ba', - save_folder='S3://whatever/', - ) - symlink_filepath = os.path.join(tmp_dir.name, 'latest-rank0.pt.symlink') - if upload_success: - trainer.fit() - with open(symlink_filepath, 'r') as f: - assert f.read() == 'ep0-ba1-rank0.pt' - else: - with pytest.raises(RuntimeError, match='Raise Error intentionally'): - trainer.fit() - assert os.path.exists(symlink_filepath) == False - - def post_close(self): - return + with patch('composer.callbacks.checkpoint_saver.multiprocessing.get_context', lambda _: fork_context): + train_dataset = RandomClassificationDataset(size=10) + train_dataloader = DataLoader( + dataset=train_dataset, + batch_size=2, + sampler=dist.get_sampler(train_dataset), + ) - assert trainer._checkpoint_saver is not None - trainer._checkpoint_saver.post_close = post_close.__get__( - trainer._checkpoint_saver, - CheckpointSaver, + trainer = Trainer( + model=SimpleModel(), + train_dataloader=train_dataloader, + save_interval='1ba', + max_duration='1ba', + save_folder='S3://whatever/', ) + symlink_filepath = os.path.join(tmp_dir.name, 'latest-rank0.pt.symlink') + if upload_success: + trainer.fit() + with open(symlink_filepath, 'r') as f: + assert f.read() == 'ep0-ba1-rank0.pt' + else: + with pytest.raises(RuntimeError, match='Raise Error intentionally'): + trainer.fit() + assert os.path.exists(symlink_filepath) == False + + def post_close(self): + return + + assert trainer._checkpoint_saver is not None + trainer._checkpoint_saver.post_close = post_close.__get__( + trainer._checkpoint_saver, + CheckpointSaver, + ) class TestCheckpointLoading: From a0a3e92831775f4cc7dab1eb3295348748036090 Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Tue, 18 Jun 2024 21:18:00 +0000 Subject: [PATCH 32/57] a --- tests/trainer/test_checkpoint.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/trainer/test_checkpoint.py b/tests/trainer/test_checkpoint.py index da51123467..a4e9d8ec65 100644 --- a/tests/trainer/test_checkpoint.py +++ b/tests/trainer/test_checkpoint.py @@ -749,6 +749,7 @@ def get_trainer( **kwargs, ) + """ @world_size(1, 2) @device('cpu', 'gpu') @pytest.mark.parametrize('file_extension', ['.pt', '.tar.gz', '.pt.lz4']) @@ -756,6 +757,14 @@ def get_trainer( @pytest.mark.parametrize('delete_local', [True, False]) @pytest.mark.parametrize('test_slashed', [True, False]) @pytest.mark.parametrize('save_metrics', [True, False]) + """ + @world_size(1) + @device('cpu') + @pytest.mark.parametrize('file_extension', ['.pt']) + @pytest.mark.parametrize('use_object_store', [True]) + @pytest.mark.parametrize('delete_local', [True]) + @pytest.mark.parametrize('test_slashed', [True]) + @pytest.mark.parametrize('save_metrics', [True]) def test_autoresume( self, device: str, @@ -835,6 +844,7 @@ def _get_tmp_dir(self): ), 'Original metrics do not equal metrics from loaded checkpoint.' assert trainer_1.state.run_name == trainer_2.state.run_name + raise 1 == 0 @pytest.mark.parametrize(('save_folder'), [None, 'first']) def test_autoresume_from_callback( From 301dd67940610cc019206c5ef79acbf59f475769 Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Tue, 18 Jun 2024 21:21:12 +0000 Subject: [PATCH 33/57] a --- tests/trainer/test_checkpoint.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/trainer/test_checkpoint.py b/tests/trainer/test_checkpoint.py index a4e9d8ec65..a6e5dcdc10 100644 --- a/tests/trainer/test_checkpoint.py +++ b/tests/trainer/test_checkpoint.py @@ -802,6 +802,7 @@ def _get_tmp_dir(self): with patch('tests.utils.test_remote_uploader.DummyObjectStore.get_tmp_dir', _get_tmp_dir): with patch('composer.utils.remote_uploader.multiprocessing.get_context', lambda _: fork_context): + time_1 = time.time() trainer_1 = self.get_trainer( latest_filename=latest_filename, file_extension=file_extension, @@ -811,10 +812,13 @@ def _get_tmp_dir(self): autoresume=True, save_metrics=save_metrics, ) + time_2 = time.time() # trains the model, saving the checkpoint files trainer_1.fit() + time_3 = time.time() trainer_1.close() + time_4 = time.time() if delete_local: # delete files locally, forcing trainer to look in object store @@ -829,6 +833,7 @@ def _get_tmp_dir(self): load_path='ignore_me.pt', # this should be ignored load_ignore_keys=['*'], # this should be ignored ) + time_5 = time.time() self._assert_weights_equivalent( trainer_1.state.model, @@ -844,6 +849,7 @@ def _get_tmp_dir(self): ), 'Original metrics do not equal metrics from loaded checkpoint.' assert trainer_1.state.run_name == trainer_2.state.run_name + print(f"bigning debug traine 1 init {time_2-time_1}, fit {time_3-time_2}, close {time_4 - time_3}, trainer 2 init: {time_5-time_4}") raise 1 == 0 @pytest.mark.parametrize(('save_folder'), [None, 'first']) From c4c094b777b7504d27ae5af008188dae6a1150e8 Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Tue, 18 Jun 2024 23:28:19 +0000 Subject: [PATCH 34/57] a --- composer/callbacks/checkpoint_saver.py | 60 ++++++++++++++++++-------- tests/trainer/test_checkpoint.py | 31 ++++++------- 2 files changed, 57 insertions(+), 34 deletions(-) diff --git a/composer/callbacks/checkpoint_saver.py b/composer/callbacks/checkpoint_saver.py index ae7d94738f..750f28685e 100644 --- a/composer/callbacks/checkpoint_saver.py +++ b/composer/callbacks/checkpoint_saver.py @@ -12,21 +12,20 @@ import shutil import tempfile import textwrap -from concurrent.futures import Future +import time +from concurrent.futures import ProcessPoolExecutor from pathlib import Path from typing import Any, Callable, Optional, Union -from concurrent.futures import Future, ProcessPoolExecutor -import time - -import torch from composer.core import Callback, Event, State, Time, Timestamp from composer.loggers import Logger, MLFlowLogger from composer.utils import ( FORMAT_NAME_WITH_DIST_AND_TIME_TABLE, FORMAT_NAME_WITH_DIST_TABLE, + ObjectStoreTransientError, PartialFilePath, RemoteUploader, + build_remote_backend, checkpoint, create_interval_scheduler, create_symlink_file, @@ -37,8 +36,6 @@ is_model_deepspeed, parse_uri, partial_format, - build_remote_backend, - ObjectStoreTransientError, retry, ) from composer.utils.compression import get_compressor, is_compressed_pt @@ -50,40 +47,50 @@ _TORCH_DISTRIBUTED_CHECKPOINTS_METADATA_FILENAME = '.metadata' + def _upload_symlink_file( remote_backend_name: str, - backend_kwargs: dist[str, Any], + backend_kwargs: dict[str, Any], remote_symlink_file_name: str, local_symlink_file_name: str, num_attempts: int, remote_checkpoint_file_names: list[str], + main_process_pid: int, max_wait_time_in_seconds: int = 3600, + wait_before_next_try_in_seconds: float = 30, ): """Wait the checkpoint file uploading to finish and start symlink file uploading.""" - start_time = time.time() object_store = build_remote_backend(remote_backend_name, backend_kwargs) - + for remote_file_name in remote_checkpoint_file_names: while True: + # Return if parent process exits + try: + os.kill(main_process_pid, 0) + except OSError: + return try: object_store.get_object_size(remote_file_name) break except Exception as e: if not isinstance(e, FileNotFoundError): log.debug(f'Got exception {type(e)}: {str(e)} when accessing remote file {remote_file_name}') - time.sleep(30) + time.sleep(wait_before_next_try_in_seconds) if time.time() - start_time > max_wait_time_in_seconds: - raise RuntimeError(f'Checkpoint file {remote_file_name} uploading not finished after {max_wait_time_in_seconds} seconds') + raise RuntimeError( + f'Checkpoint file {remote_file_name} uploading not finished after {max_wait_time_in_seconds} seconds', + ) log.info(f'Uploading symlink file {remote_symlink_file_name}') @retry(ObjectStoreTransientError, num_attempts=num_attempts) def upload_file(): object_store.upload_object( - object_name = remote_symlink_file_name, + object_name=remote_symlink_file_name, filename=local_symlink_file_name, ) + upload_file() log.info(f'Finished uploading symlink file {remote_symlink_file_name}') return 0 @@ -382,6 +389,9 @@ def __init__( self.symlink_upload_executor = None self.symlink_upload_futures = [] self.upload_timeout_in_seconds = upload_timeout_in_seconds + # Allow unit test to override this to make it faster + self._symlink_upload_wait_before_next_try_in_seconds = 30.0 + self.pid = os.getpid() if backend != '': if backend == 'wandb': @@ -522,8 +532,6 @@ def _save_checkpoint(self, state: State, logger: Logger): local_remote_filenames = [] all_remote_filenames = [] - need_all_gather_remote_filenames = self.remote_file_name is not None and self.remote_uploader is not None - if not saved_path: # not all ranks save # todo: all gather if self.remote_file_name is not None and self.remote_uploader is not None: @@ -619,7 +627,6 @@ def _save_checkpoint(self, state: State, logger: Logger): if self.remote_uploader is not None: all_remote_filenames = dist.all_gather_object(local_remote_filenames) - # symlinks stay the same with sharded checkpointing if self.latest_remote_file_name is not None: symlink_name = self.latest_remote_file_name.format( @@ -646,6 +653,7 @@ def _save_checkpoint(self, state: State, logger: Logger): remote_checkpoint_file_names = [] for file_names in all_remote_filenames: remote_checkpoint_file_names += file_names + assert self.symlink_upload_executor is not None self.symlink_upload_futures.append( self.symlink_upload_executor.submit( _upload_symlink_file, @@ -654,8 +662,10 @@ def _save_checkpoint(self, state: State, logger: Logger): remote_symlink_file_name=symlink_name, local_symlink_file_name=symlink_filename, num_attempts=3, + main_process_pid=self.pid, remote_checkpoint_file_names=remote_checkpoint_file_names, max_wait_time_in_seconds=self.upload_timeout_in_seconds, + wait_before_next_try_in_seconds=self._symlink_upload_wait_before_next_try_in_seconds, ), ) else: @@ -681,10 +691,25 @@ def _rotate_checkpoints(self, sharding_enabled: bool = False): if dist.get_global_rank() == 0: shutil.rmtree(prefix_dir) + def check_symlink_upload_workers(self): + if self.remote_uploader is None: + return + done_futures = [] + for future in self.symlink_upload_futures: + if future.done(): + exception_or_none = future.exception() + if exception_or_none is not None: + raise exception_or_none + else: + done_futures.append(future) + for future in done_futures: + self.symlink_upload_futures.remove(future) + def batch_end(self, state: State, logger: Logger) -> None: del state, logger # unused if self.remote_uploader is not None: self.remote_uploader.check_workers() + self.check_symlink_upload_workers() def fit_end(self, state: State, logger: Logger) -> None: del state, logger # unused @@ -697,4 +722,5 @@ def post_close(self): if self.remote_uploader is not None: # Wait the symlink file upload to finish and close remote uploader self.remote_uploader.wait_and_close() - self.symlink_upload_executor.shutdown(wait=True) + if len(self.symlink_upload_futures) > 1: + self.symlink_upload_futures[-1].result(timeout=60) diff --git a/tests/trainer/test_checkpoint.py b/tests/trainer/test_checkpoint.py index a6e5dcdc10..f0cb750697 100644 --- a/tests/trainer/test_checkpoint.py +++ b/tests/trainer/test_checkpoint.py @@ -652,7 +652,10 @@ def upload_object(self, object_name, filename, callback=None): with patch('composer.utils.object_store.utils.S3ObjectStore', MockObjectStore): with patch('tests.utils.test_remote_uploader.DummyObjectStore.get_tmp_dir', _get_tmp_dir): with patch('composer.utils.remote_uploader.multiprocessing.get_context', lambda _: fork_context): - with patch('composer.callbacks.checkpoint_saver.multiprocessing.get_context', lambda _: fork_context): + with patch( + 'composer.callbacks.checkpoint_saver.multiprocessing.get_context', + lambda _: fork_context, + ): train_dataset = RandomClassificationDataset(size=10) train_dataloader = DataLoader( dataset=train_dataset, @@ -673,11 +676,16 @@ def upload_object(self, object_name, filename, callback=None): with open(symlink_filepath, 'r') as f: assert f.read() == 'ep0-ba1-rank0.pt' else: + assert trainer._checkpoint_saver is not None + trainer._checkpoint_saver._symlink_upload_wait_before_next_try_in_seconds = 0.01 + trainer._checkpoint_saver.upload_timeout_in_seconds = 1 with pytest.raises(RuntimeError, match='Raise Error intentionally'): trainer.fit() assert os.path.exists(symlink_filepath) == False def post_close(self): + #raise RuntimeError("haha") + #self.symlink_upload_executor.shutdown(wait=False, ca) return assert trainer._checkpoint_saver is not None @@ -749,7 +757,6 @@ def get_trainer( **kwargs, ) - """ @world_size(1, 2) @device('cpu', 'gpu') @pytest.mark.parametrize('file_extension', ['.pt', '.tar.gz', '.pt.lz4']) @@ -757,14 +764,6 @@ def get_trainer( @pytest.mark.parametrize('delete_local', [True, False]) @pytest.mark.parametrize('test_slashed', [True, False]) @pytest.mark.parametrize('save_metrics', [True, False]) - """ - @world_size(1) - @device('cpu') - @pytest.mark.parametrize('file_extension', ['.pt']) - @pytest.mark.parametrize('use_object_store', [True]) - @pytest.mark.parametrize('delete_local', [True]) - @pytest.mark.parametrize('test_slashed', [True]) - @pytest.mark.parametrize('save_metrics', [True]) def test_autoresume( self, device: str, @@ -802,7 +801,6 @@ def _get_tmp_dir(self): with patch('tests.utils.test_remote_uploader.DummyObjectStore.get_tmp_dir', _get_tmp_dir): with patch('composer.utils.remote_uploader.multiprocessing.get_context', lambda _: fork_context): - time_1 = time.time() trainer_1 = self.get_trainer( latest_filename=latest_filename, file_extension=file_extension, @@ -812,13 +810,13 @@ def _get_tmp_dir(self): autoresume=True, save_metrics=save_metrics, ) - time_2 = time.time() + if use_object_store: + assert trainer_1._checkpoint_saver is not None + trainer_1._checkpoint_saver._symlink_upload_wait_before_next_try_in_seconds = 0.01 # trains the model, saving the checkpoint files trainer_1.fit() - time_3 = time.time() trainer_1.close() - time_4 = time.time() if delete_local: # delete files locally, forcing trainer to look in object store @@ -833,7 +831,6 @@ def _get_tmp_dir(self): load_path='ignore_me.pt', # this should be ignored load_ignore_keys=['*'], # this should be ignored ) - time_5 = time.time() self._assert_weights_equivalent( trainer_1.state.model, @@ -849,8 +846,6 @@ def _get_tmp_dir(self): ), 'Original metrics do not equal metrics from loaded checkpoint.' assert trainer_1.state.run_name == trainer_2.state.run_name - print(f"bigning debug traine 1 init {time_2-time_1}, fit {time_3-time_2}, close {time_4 - time_3}, trainer 2 init: {time_5-time_4}") - raise 1 == 0 @pytest.mark.parametrize(('save_folder'), [None, 'first']) def test_autoresume_from_callback( @@ -1250,6 +1245,8 @@ def _get_tmp_dir(self): save_folder=save_folder, run_name='electric-zebra', ) + assert trainer_1._checkpoint_saver is not None + trainer_1._checkpoint_saver._symlink_upload_wait_before_next_try_in_seconds = 0.01 trainer_1.fit() trainer_1.close() From 5ec3e2878d757b961059036626391877fc5e4f19 Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Thu, 20 Jun 2024 16:03:28 +0000 Subject: [PATCH 35/57] a --- composer/callbacks/checkpoint_saver.py | 4 ++-- tests/fixtures/autouse_fixtures.py | 2 +- tests/trainer/test_checkpoint.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/composer/callbacks/checkpoint_saver.py b/composer/callbacks/checkpoint_saver.py index 750f28685e..5f82cde3a4 100644 --- a/composer/callbacks/checkpoint_saver.py +++ b/composer/callbacks/checkpoint_saver.py @@ -82,7 +82,7 @@ def _upload_symlink_file( f'Checkpoint file {remote_file_name} uploading not finished after {max_wait_time_in_seconds} seconds', ) - log.info(f'Uploading symlink file {remote_symlink_file_name}') + log.debug(f'Uploading symlink file {remote_symlink_file_name}') @retry(ObjectStoreTransientError, num_attempts=num_attempts) def upload_file(): @@ -92,7 +92,7 @@ def upload_file(): ) upload_file() - log.info(f'Finished uploading symlink file {remote_symlink_file_name}') + log.debug(f'Finished uploading symlink file {remote_symlink_file_name}') return 0 diff --git a/tests/fixtures/autouse_fixtures.py b/tests/fixtures/autouse_fixtures.py index 03c3319048..c881157353 100644 --- a/tests/fixtures/autouse_fixtures.py +++ b/tests/fixtures/autouse_fixtures.py @@ -84,7 +84,7 @@ def configure_dist(request: pytest.FixtureRequest): assert device is not None if not dist.is_initialized(): - dist.initialize_dist(device, timeout=10.0) + dist.initialize_dist(device, timeout=300.0) # Hold PyTest until all ranks have reached this barrier. Ensure that no rank starts # any test before other ranks are ready to start it, which could be a cause of random timeouts # (e.g. rank 1 starts the next test while rank 0 is finishing up the previous test). diff --git a/tests/trainer/test_checkpoint.py b/tests/trainer/test_checkpoint.py index f0cb750697..93681102c7 100644 --- a/tests/trainer/test_checkpoint.py +++ b/tests/trainer/test_checkpoint.py @@ -1009,7 +1009,7 @@ def test_strict_errors(self, missing_key: bool, unexpected_key: bool): @device('cpu', 'gpu') @pytest.mark.parametrize('load_weights_only', [True, False]) @pytest.mark.parametrize('save_metrics', [True, False]) - def _test_load_weights(self, device, load_weights_only, save_metrics): + def test_load_weights(self, device, load_weights_only, save_metrics): trainer_1 = self.get_trainer(save_folder='first', device=device, save_metrics=save_metrics) trainer_1.fit() From 98138163b84569c370d0258954a9148bda01c794 Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Thu, 20 Jun 2024 20:07:23 +0000 Subject: [PATCH 36/57] a --- composer/callbacks/checkpoint_saver.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/composer/callbacks/checkpoint_saver.py b/composer/callbacks/checkpoint_saver.py index 5f82cde3a4..f2841cc281 100644 --- a/composer/callbacks/checkpoint_saver.py +++ b/composer/callbacks/checkpoint_saver.py @@ -56,6 +56,7 @@ def _upload_symlink_file( num_attempts: int, remote_checkpoint_file_names: list[str], main_process_pid: int, + is_remote_upload_failed: multiprocessing.Event, # pyright: ignore[reportGeneralTypeIssues] max_wait_time_in_seconds: int = 3600, wait_before_next_try_in_seconds: float = 30, ): @@ -65,6 +66,9 @@ def _upload_symlink_file( for remote_file_name in remote_checkpoint_file_names: while True: + if is_remote_upload_failed.is_set(): + log.debug(f'Stop symlink uploading since the checkpoint files uploading failed') + return # Return if parent process exits try: os.kill(main_process_pid, 0) @@ -387,6 +391,7 @@ def __init__( self.tmp_dir_for_symlink = tempfile.TemporaryDirectory() self.num_concurrent_uploads = num_concurrent_uploads self.symlink_upload_executor = None + self.is_remote_upload_failed = None self.symlink_upload_futures = [] self.upload_timeout_in_seconds = upload_timeout_in_seconds # Allow unit test to override this to make it faster @@ -408,10 +413,12 @@ def __init__( remote_folder=save_folder, num_concurrent_uploads=self.num_concurrent_uploads, ) + mp_context = multiprocessing.get_context('spawn') self.symlink_upload_executor = ProcessPoolExecutor( max_workers=1, - mp_context=multiprocessing.get_context('spawn'), + mp_context=mp_context, ) + self.is_remote_upload_failed = mp_context.Manager().Event() self.count = 0 @@ -654,6 +661,7 @@ def _save_checkpoint(self, state: State, logger: Logger): for file_names in all_remote_filenames: remote_checkpoint_file_names += file_names assert self.symlink_upload_executor is not None + assert self.is_remote_upload_failed is not None self.symlink_upload_futures.append( self.symlink_upload_executor.submit( _upload_symlink_file, @@ -664,6 +672,7 @@ def _save_checkpoint(self, state: State, logger: Logger): num_attempts=3, main_process_pid=self.pid, remote_checkpoint_file_names=remote_checkpoint_file_names, + is_remote_upload_failed=self.is_remote_upload_failed, max_wait_time_in_seconds=self.upload_timeout_in_seconds, wait_before_next_try_in_seconds=self._symlink_upload_wait_before_next_try_in_seconds, ), @@ -708,8 +717,13 @@ def check_symlink_upload_workers(self): def batch_end(self, state: State, logger: Logger) -> None: del state, logger # unused if self.remote_uploader is not None: - self.remote_uploader.check_workers() - self.check_symlink_upload_workers() + try: + self.remote_uploader.check_workers() + self.check_symlink_upload_workers() + except Exception as e: + assert self.is_remote_upload_failed is not None + self.is_remote_upload_failed.set() + raise e def fit_end(self, state: State, logger: Logger) -> None: del state, logger # unused From 8c3c5cc9848621a4e06c23fafc8d8bf729e8c045 Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Thu, 20 Jun 2024 20:32:09 +0000 Subject: [PATCH 37/57] address comments --- composer/callbacks/checkpoint_saver.py | 7 +++---- composer/utils/remote_uploader.py | 10 ++++++---- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/composer/callbacks/checkpoint_saver.py b/composer/callbacks/checkpoint_saver.py index f2841cc281..3a40fc9ea6 100644 --- a/composer/callbacks/checkpoint_saver.py +++ b/composer/callbacks/checkpoint_saver.py @@ -397,6 +397,7 @@ def __init__( # Allow unit test to override this to make it faster self._symlink_upload_wait_before_next_try_in_seconds = 30.0 self.pid = os.getpid() + self.symlink_count = 0 if backend != '': if backend == 'wandb': @@ -420,8 +421,6 @@ def __init__( ) self.is_remote_upload_failed = mp_context.Manager().Event() - self.count = 0 - def init(self, state: State, logger: Logger) -> None: # If MLFlowLogger is being used, format MLFlow-specific placeholders in the save folder and paths. # Assumes that MLFlowLogger comes before CheckpointSaver in the list of loggers. @@ -535,7 +534,7 @@ def _save_checkpoint(self, state: State, logger: Logger): ) log.debug(f'Checkpoint locally saved to {saved_path}') - self.count += 1 + self.symlink_count += 1 local_remote_filenames = [] all_remote_filenames = [] @@ -644,7 +643,7 @@ def _save_checkpoint(self, state: State, logger: Logger): # create and upload a symlink file symlink_filename = os.path.join( self.tmp_dir_for_symlink.name, - f'latest.{self.count}.symlink', + f'latest.{self.symlink_count}.symlink', ) # Sharded checkpoints for torch >2.0 use directories not files for load_paths if state.fsdp_sharded_state_dict_enabled: diff --git a/composer/utils/remote_uploader.py b/composer/utils/remote_uploader.py index 3532ba1450..09b625db3a 100644 --- a/composer/utils/remote_uploader.py +++ b/composer/utils/remote_uploader.py @@ -131,10 +131,12 @@ def init(self): # We need to fill out the experiment_id and run_id if get_global_rank() == 0: - retry( - ObjectStoreTransientError, - self.num_attempts, - )(lambda: validate_credentials(self.remote_backend, '.credentials_validated_successfully'))() + + @retry(ObjectStoreTransientError, num_attempts=self.num_attempts) + def _validate_credential_with_retry(): + validate_credentials(self.remote_backend, '.credentials_validated_successfully') + + _validate_credential_with_retry() if self.path.startswith(MLFLOW_DBFS_PATH_PREFIX): if get_global_rank() == 0: assert isinstance(self.remote_backend, MLFlowObjectStore) From c81cc2f88fd0ab4f56129396c7a4b1476b9fe68e Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Thu, 20 Jun 2024 20:47:03 +0000 Subject: [PATCH 38/57] a --- composer/callbacks/checkpoint_saver.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/composer/callbacks/checkpoint_saver.py b/composer/callbacks/checkpoint_saver.py index 3a40fc9ea6..c8d777ba2f 100644 --- a/composer/callbacks/checkpoint_saver.py +++ b/composer/callbacks/checkpoint_saver.py @@ -719,7 +719,9 @@ def batch_end(self, state: State, logger: Logger) -> None: try: self.remote_uploader.check_workers() self.check_symlink_upload_workers() + log.debug(f"bigning debug workers are good") except Exception as e: + log.debug(f"bigning debug workers are bad") assert self.is_remote_upload_failed is not None self.is_remote_upload_failed.set() raise e From c1174d4ce78b3d2392d59af4a8bbf4d617e08cf1 Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Thu, 20 Jun 2024 20:53:38 +0000 Subject: [PATCH 39/57] a --- composer/callbacks/checkpoint_saver.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/composer/callbacks/checkpoint_saver.py b/composer/callbacks/checkpoint_saver.py index c8d777ba2f..3a40fc9ea6 100644 --- a/composer/callbacks/checkpoint_saver.py +++ b/composer/callbacks/checkpoint_saver.py @@ -719,9 +719,7 @@ def batch_end(self, state: State, logger: Logger) -> None: try: self.remote_uploader.check_workers() self.check_symlink_upload_workers() - log.debug(f"bigning debug workers are good") except Exception as e: - log.debug(f"bigning debug workers are bad") assert self.is_remote_upload_failed is not None self.is_remote_upload_failed.set() raise e From df601d2cbee697f83c7d505baea51158e5ba4b9c Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Thu, 20 Jun 2024 22:15:33 +0000 Subject: [PATCH 40/57] a --- composer/callbacks/checkpoint_saver.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/composer/callbacks/checkpoint_saver.py b/composer/callbacks/checkpoint_saver.py index 3a40fc9ea6..addaf9c41d 100644 --- a/composer/callbacks/checkpoint_saver.py +++ b/composer/callbacks/checkpoint_saver.py @@ -734,6 +734,10 @@ def fit_end(self, state: State, logger: Logger) -> None: def post_close(self): if self.remote_uploader is not None: # Wait the symlink file upload to finish and close remote uploader - self.remote_uploader.wait_and_close() + try: + self.remote_uploader.wait_and_close() + except: + assert self.is_remote_upload_failed is not None + self.is_remote_upload_failed.set() if len(self.symlink_upload_futures) > 1: self.symlink_upload_futures[-1].result(timeout=60) From a41f427a70ac3b07e6ebe93d6d8abf2c5d96ad0c Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Thu, 20 Jun 2024 23:08:15 +0000 Subject: [PATCH 41/57] a --- composer/utils/remote_uploader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/composer/utils/remote_uploader.py b/composer/utils/remote_uploader.py index 09b625db3a..f32dd66b3f 100644 --- a/composer/utils/remote_uploader.py +++ b/composer/utils/remote_uploader.py @@ -40,7 +40,7 @@ def _upload_file_to_object_store( overwrite: bool, num_attempts: int, ) -> int: - object_store = build_remote_backend(remote_backend_name, backend_kwargs) + object_store = build_remote_backend(remote_backend_name, backend_kwargs) @retry(ObjectStoreTransientError, num_attempts=num_attempts) def upload_file(retry_index: int = 0): From bc06a7b55d720edeeac53e7011c278134f9664f9 Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Thu, 20 Jun 2024 23:08:37 +0000 Subject: [PATCH 42/57] rerun test --- composer/utils/remote_uploader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/composer/utils/remote_uploader.py b/composer/utils/remote_uploader.py index f32dd66b3f..09b625db3a 100644 --- a/composer/utils/remote_uploader.py +++ b/composer/utils/remote_uploader.py @@ -40,7 +40,7 @@ def _upload_file_to_object_store( overwrite: bool, num_attempts: int, ) -> int: - object_store = build_remote_backend(remote_backend_name, backend_kwargs) + object_store = build_remote_backend(remote_backend_name, backend_kwargs) @retry(ObjectStoreTransientError, num_attempts=num_attempts) def upload_file(retry_index: int = 0): From c87f36c1c46554725c5213adcab543833600e0b1 Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Fri, 21 Jun 2024 16:00:27 +0000 Subject: [PATCH 43/57] add logging --- composer/callbacks/checkpoint_saver.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/composer/callbacks/checkpoint_saver.py b/composer/callbacks/checkpoint_saver.py index addaf9c41d..a1f6b075b0 100644 --- a/composer/callbacks/checkpoint_saver.py +++ b/composer/callbacks/checkpoint_saver.py @@ -727,9 +727,11 @@ def batch_end(self, state: State, logger: Logger) -> None: def fit_end(self, state: State, logger: Logger) -> None: del state, logger # unused if self.remote_uploader is not None: + log.info('Waiting checkpoint uploading finish') self.remote_uploader.wait() - for f in self.symlink_upload_futures: - f.result() + for f in self.symlink_upload_futures: + f.result() + log.info('Checkpoint uploading finished!') def post_close(self): if self.remote_uploader is not None: From 0e8ae23454cf95f403d718fecc254b5c1d859b73 Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Fri, 21 Jun 2024 17:52:50 +0000 Subject: [PATCH 44/57] remove debug comments --- composer/callbacks/checkpoint_saver.py | 8 ++++++-- tests/trainer/test_checkpoint.py | 2 -- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/composer/callbacks/checkpoint_saver.py b/composer/callbacks/checkpoint_saver.py index 0222ed3105..98df51bd32 100644 --- a/composer/callbacks/checkpoint_saver.py +++ b/composer/callbacks/checkpoint_saver.py @@ -538,7 +538,6 @@ def _save_checkpoint(self, state: State, logger: Logger): all_remote_filenames = [] if not saved_path: # not all ranks save - # todo: all gather if self.remote_file_name is not None and self.remote_uploader is not None: all_remote_filenames = dist.all_gather_object(local_remote_filenames) return @@ -727,7 +726,12 @@ def fit_end(self, state: State, logger: Logger) -> None: del state, logger # unused if self.remote_uploader is not None: log.info('Waiting checkpoint uploading finish') - self.remote_uploader.wait() + try: + self.remote_uploader.wait() + except Exception as e: + assert self.is_remote_upload_failed is not None + self.is_remote_upload_failed.set() + raise e for f in self.symlink_upload_futures: f.result() log.info('Checkpoint uploading finished!') diff --git a/tests/trainer/test_checkpoint.py b/tests/trainer/test_checkpoint.py index 93681102c7..8dc0f0af36 100644 --- a/tests/trainer/test_checkpoint.py +++ b/tests/trainer/test_checkpoint.py @@ -684,8 +684,6 @@ def upload_object(self, object_name, filename, callback=None): assert os.path.exists(symlink_filepath) == False def post_close(self): - #raise RuntimeError("haha") - #self.symlink_upload_executor.shutdown(wait=False, ca) return assert trainer._checkpoint_saver is not None From c7541c4dc51a166dfdad128bdca9a0e8516901d0 Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Fri, 21 Jun 2024 21:37:51 +0000 Subject: [PATCH 45/57] comments --- composer/callbacks/checkpoint_saver.py | 67 ++++++++++++++------------ composer/utils/file_helpers.py | 2 +- composer/utils/object_store/utils.py | 2 +- tests/utils/test_remote_uploader.py | 6 +-- 4 files changed, 42 insertions(+), 35 deletions(-) diff --git a/composer/callbacks/checkpoint_saver.py b/composer/callbacks/checkpoint_saver.py index 98df51bd32..1ab00661ed 100644 --- a/composer/callbacks/checkpoint_saver.py +++ b/composer/callbacks/checkpoint_saver.py @@ -407,7 +407,7 @@ def __init__( elif backend not in ['s3', 'oci', 'gs', 'azure', 'dbfs']: raise NotImplementedError( f'There is no implementation for the cloud backend {backend} via URI. Please use ' - 'one of the supported object stores.', + 'one of the supported object stores (s3, oci, gs, azure, dbfs).', ) self.remote_uploader = RemoteUploader( remote_folder=save_folder, @@ -511,6 +511,27 @@ def load_state_dict(self, state: dict[str, Any]): load_timestamp.load_state_dict(timestamp_state) self.all_saved_checkpoints_to_timestamp[save_filename] = load_timestamp + def _upload_checkpoint( + self, + remote_file_name: str, + local_file_name: str, + local_remote_file_names: list[str], + logger: Logger, + ): + if self.remote_uploader is not None: + self.remote_uploader.upload_file_async( + remote_file_name=remote_file_name, + file_path=pathlib.Path(local_file_name), + overwrite=self.overwrite, + ) + local_remote_file_names.append(remote_file_name) + else: + logger.upload_file( + remote_file_name=remote_file_name, + file_path=local_file_name, + overwrite=self.overwrite, + ) + def _save_checkpoint(self, state: State, logger: Logger): self.last_checkpoint_batch = state.timestamp.batch @@ -534,12 +555,12 @@ def _save_checkpoint(self, state: State, logger: Logger): log.debug(f'Checkpoint locally saved to {saved_path}') self.symlink_count += 1 - local_remote_filenames = [] + local_remote_file_names = [] all_remote_filenames = [] if not saved_path: # not all ranks save if self.remote_file_name is not None and self.remote_uploader is not None: - all_remote_filenames = dist.all_gather_object(local_remote_filenames) + all_remote_filenames = dist.all_gather_object(local_remote_file_names) return metadata_local_file_path = None @@ -589,19 +610,12 @@ def _save_checkpoint(self, state: State, logger: Logger): state.timestamp, ) assert metadata_local_file_path is not None - if self.remote_uploader is not None: - self.remote_uploader.upload_file_async( - remote_file_name=metadata_remote_file_name, - file_path=pathlib.Path(metadata_local_file_path), - overwrite=self.overwrite, - ) - local_remote_filenames.append(metadata_remote_file_name) - else: - logger.upload_file( - remote_file_name=metadata_remote_file_name, - file_path=metadata_local_file_path, - overwrite=self.overwrite, - ) + self._upload_checkpoint( + remote_file_name=metadata_remote_file_name, + local_file_name=metadata_local_file_path, + local_remote_file_names=local_remote_file_names, + logger=logger, + ) else: remote_file_name = self.remote_file_name.format( state, @@ -610,26 +624,19 @@ def _save_checkpoint(self, state: State, logger: Logger): log.debug(f'Uploading checkpoint to {remote_file_name}') try: - if self.remote_uploader is not None: - self.remote_uploader.upload_file_async( - remote_file_name=remote_file_name, - file_path=pathlib.Path(saved_path), - overwrite=self.overwrite, - ) - local_remote_filenames.append(remote_file_name) - else: - logger.upload_file( - remote_file_name=remote_file_name, - file_path=saved_path, - overwrite=self.overwrite, - ) + self._upload_checkpoint( + remote_file_name=remote_file_name, + local_file_name=saved_path, + local_remote_file_names=local_remote_file_names, + logger=logger, + ) except FileExistsError as e: raise FileExistsError( f'Uploading checkpoint failed with error: {e}. overwrite was set to {self.overwrite}. To overwrite checkpoints with Trainer, set save_overwrite to True.', ) from e if self.remote_uploader is not None: - all_remote_filenames = dist.all_gather_object(local_remote_filenames) + all_remote_filenames = dist.all_gather_object(local_remote_file_names) # symlinks stay the same with sharded checkpointing if self.latest_remote_file_name is not None: diff --git a/composer/utils/file_helpers.py b/composer/utils/file_helpers.py index 18d67a33ff..11d10328ea 100644 --- a/composer/utils/file_helpers.py +++ b/composer/utils/file_helpers.py @@ -743,7 +743,7 @@ def create_symlink_file( def validate_credentials( remote_backend: ObjectStore, remote_file_name_to_test: str, -) -> None: +): """Upload a tiny text file to test if the credentials are setup correctly.""" # Validates the credentials by attempting to touch a file in the bucket # raises an error if there was a credentials failure. diff --git a/composer/utils/object_store/utils.py b/composer/utils/object_store/utils.py index 4ca8c5cf93..0d33774bc7 100644 --- a/composer/utils/object_store/utils.py +++ b/composer/utils/object_store/utils.py @@ -1,4 +1,4 @@ -# Copyright 2022 MosaicML Composer authors +# Copyright 2024 MosaicML Composer authors # SPDX-License-Identifier: Apache-2.0 """Helpers for working with object stores.""" diff --git a/tests/utils/test_remote_uploader.py b/tests/utils/test_remote_uploader.py index a375250c12..2e41e91d18 100644 --- a/tests/utils/test_remote_uploader.py +++ b/tests/utils/test_remote_uploader.py @@ -25,12 +25,12 @@ def __init__(self, **kwargs: Dict[str, Any]) -> None: self.sleep_sec = 0 self.dest_filename = '' - def get_tmp_dir(self): - return tempfile.TemporaryDirectory() - def raise_error(self): return False + def get_tmp_dir(self): + return tempfile.TemporaryDirectory() + def upload_object( self, object_name: str, From a9081c2d0479d1942d6ddcac532a09763f2a4cae Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Tue, 25 Jun 2024 23:53:49 +0000 Subject: [PATCH 46/57] a --- composer/callbacks/checkpoint_saver.py | 166 ++++++++----------------- composer/utils/__init__.py | 3 +- composer/utils/remote_uploader.py | 76 ++++++++++- 3 files changed, 128 insertions(+), 117 deletions(-) diff --git a/composer/callbacks/checkpoint_saver.py b/composer/callbacks/checkpoint_saver.py index 1ab00661ed..b44ede4e49 100644 --- a/composer/callbacks/checkpoint_saver.py +++ b/composer/callbacks/checkpoint_saver.py @@ -12,8 +12,6 @@ import shutil import tempfile import textwrap -import time -from concurrent.futures import ProcessPoolExecutor from pathlib import Path from typing import Any, Callable, Optional, Union @@ -22,10 +20,9 @@ from composer.utils import ( FORMAT_NAME_WITH_DIST_AND_TIME_TABLE, FORMAT_NAME_WITH_DIST_TABLE, - ObjectStoreTransientError, PartialFilePath, + RemoteFilesExistingCheckStatus, RemoteUploader, - build_remote_backend, checkpoint, create_interval_scheduler, create_symlink_file, @@ -36,7 +33,6 @@ is_model_deepspeed, parse_uri, partial_format, - retry, ) from composer.utils.checkpoint import _TORCH_DISTRIBUTED_CHECKPOINTS_METADATA_FILENAME from composer.utils.compression import get_compressor, is_compressed_pt @@ -47,58 +43,6 @@ __all__ = ['CheckpointSaver'] -def _upload_symlink_file( - remote_backend_name: str, - backend_kwargs: dict[str, Any], - remote_symlink_file_name: str, - local_symlink_file_name: str, - num_attempts: int, - remote_checkpoint_file_names: list[str], - main_process_pid: int, - is_remote_upload_failed: multiprocessing.Event, # pyright: ignore[reportGeneralTypeIssues] - max_wait_time_in_seconds: int = 3600, - wait_before_next_try_in_seconds: float = 30, -): - """Wait the checkpoint file uploading to finish and start symlink file uploading.""" - start_time = time.time() - object_store = build_remote_backend(remote_backend_name, backend_kwargs) - - for remote_file_name in remote_checkpoint_file_names: - while True: - if is_remote_upload_failed.is_set(): - log.debug(f'Stop symlink uploading since the checkpoint files uploading failed') - return - # Return if parent process exits - try: - os.kill(main_process_pid, 0) - except OSError: - return - try: - object_store.get_object_size(remote_file_name) - break - except Exception as e: - if not isinstance(e, FileNotFoundError): - log.debug(f'Got exception {type(e)}: {str(e)} when accessing remote file {remote_file_name}') - time.sleep(wait_before_next_try_in_seconds) - if time.time() - start_time > max_wait_time_in_seconds: - raise RuntimeError( - f'Checkpoint file {remote_file_name} uploading not finished after {max_wait_time_in_seconds} seconds', - ) - - log.debug(f'Uploading symlink file {remote_symlink_file_name}') - - @retry(ObjectStoreTransientError, num_attempts=num_attempts) - def upload_file(): - object_store.upload_object( - object_name=remote_symlink_file_name, - filename=local_symlink_file_name, - ) - - upload_file() - log.debug(f'Finished uploading symlink file {remote_symlink_file_name}') - return 0 - - class CheckpointSaver(Callback): # noqa: D101 __doc__ = f"""Callback to save checkpoints. @@ -389,14 +333,13 @@ def __init__( self.rank_saves_remote_symlinks: bool = False self.tmp_dir_for_symlink = tempfile.TemporaryDirectory() self.num_concurrent_uploads = num_concurrent_uploads - self.symlink_upload_executor = None self.is_remote_upload_failed = None - self.symlink_upload_futures = [] self.upload_timeout_in_seconds = upload_timeout_in_seconds # Allow unit test to override this to make it faster self._symlink_upload_wait_before_next_try_in_seconds = 30.0 self.pid = os.getpid() self.symlink_count = 0 + self.symlink_upload_tasks = [] if backend != '': if backend == 'wandb': @@ -414,10 +357,6 @@ def __init__( num_concurrent_uploads=self.num_concurrent_uploads, ) mp_context = multiprocessing.get_context('spawn') - self.symlink_upload_executor = ProcessPoolExecutor( - max_workers=1, - mp_context=mp_context, - ) self.is_remote_upload_failed = mp_context.Manager().Event() def init(self, state: State, logger: Logger) -> None: @@ -664,22 +603,13 @@ def _save_checkpoint(self, state: State, logger: Logger): remote_checkpoint_file_names = [] for file_names in all_remote_filenames: remote_checkpoint_file_names += file_names - assert self.symlink_upload_executor is not None - assert self.is_remote_upload_failed is not None - self.symlink_upload_futures.append( - self.symlink_upload_executor.submit( - _upload_symlink_file, - remote_backend_name=self.remote_uploader.remote_backend_name, - backend_kwargs=self.remote_uploader.backend_kwargs, - remote_symlink_file_name=symlink_name, - local_symlink_file_name=symlink_filename, - num_attempts=3, - main_process_pid=self.pid, - remote_checkpoint_file_names=remote_checkpoint_file_names, - is_remote_upload_failed=self.is_remote_upload_failed, - max_wait_time_in_seconds=self.upload_timeout_in_seconds, - wait_before_next_try_in_seconds=self._symlink_upload_wait_before_next_try_in_seconds, - ), + check_remote_files_exist_future = self.remote_uploader.check_remote_files_exist_async( + remote_checkpoint_file_names=remote_checkpoint_file_names, + max_wait_time_in_seconds=self.upload_timeout_in_seconds, + wait_before_next_try_in_seconds=self._symlink_upload_wait_before_next_try_in_seconds, + ) + self.symlink_upload_tasks.append( + (check_remote_files_exist_future, symlink_filename, symlink_name), ) else: logger.upload_file( @@ -704,44 +634,56 @@ def _rotate_checkpoints(self, sharding_enabled: bool = False): if dist.get_global_rank() == 0: shutil.rmtree(prefix_dir) - def check_symlink_upload_workers(self): + def batch_end(self, state: State, logger: Logger) -> None: + del state, logger # unused if self.remote_uploader is None: return - done_futures = [] - for future in self.symlink_upload_futures: - if future.done(): - exception_or_none = future.exception() - if exception_or_none is not None: - raise exception_or_none + self.remote_uploader.check_workers() + if not self.rank_saves_remote_symlinks: + return + undone_symlink_upload_tasks = [] + for (check_remote_files_exist_future, local_symlink_file, + remote_symlink_file) in reversed(self.symlink_upload_tasks): + if not check_remote_files_exist_future.done(): + undone_symlink_upload_tasks.insert( + 0, + (check_remote_files_exist_future, local_symlink_file, remote_symlink_file), + ) + continue + if check_remote_files_exist_future.done(): + result = check_remote_files_exist_future.result() + if result == RemoteFilesExistingCheckStatus.EXIST: + self.remote_uploader.upload_file_async( + remote_file_name=remote_symlink_file, + file_path=local_symlink_file, + overwrite=True, + ) + break else: - done_futures.append(future) - for future in done_futures: - self.symlink_upload_futures.remove(future) - - def batch_end(self, state: State, logger: Logger) -> None: - del state, logger # unused - if self.remote_uploader is not None: - try: - self.remote_uploader.check_workers() - self.check_symlink_upload_workers() - except Exception as e: - assert self.is_remote_upload_failed is not None - self.is_remote_upload_failed.set() - raise e + raise RuntimeError(f'Failed to check if checkpoint files upload finish: {result}') + self.symlink_upload_tasks = undone_symlink_upload_tasks def fit_end(self, state: State, logger: Logger) -> None: del state, logger # unused - if self.remote_uploader is not None: - log.info('Waiting checkpoint uploading finish') - try: - self.remote_uploader.wait() - except Exception as e: - assert self.is_remote_upload_failed is not None - self.is_remote_upload_failed.set() - raise e - for f in self.symlink_upload_futures: - f.result() - log.info('Checkpoint uploading finished!') + if self.remote_uploader is None: + return + log.info('Waiting checkpoint uploading finish') + self.remote_uploader.wait() + if self.rank_saves_remote_symlinks and len(self.symlink_upload_tasks) > 0: + log.debug('Uploading the last symlink file') + check_remote_files_exist_future, local_symlink_file, remote_symlink_file = self.symlink_upload_tasks[-1] + result = check_remote_files_exist_future.result() + if result == RemoteFilesExistingCheckStatus.EXIST: + symlink_upload_future = self.remote_uploader.upload_file_async( + remote_file_name=remote_symlink_file, + file_path=local_symlink_file, + overwrite=True, + ) + symlink_upload_future.result() + else: + raise RuntimeError(f'Failed to check if checkpoint files upload finish: {result}') + + log.info('Checkpoint uploading finished!') def post_close(self): if self.remote_uploader is not None: @@ -751,5 +693,3 @@ def post_close(self): except: assert self.is_remote_upload_failed is not None self.is_remote_upload_failed.set() - if len(self.symlink_upload_futures) > 1: - self.symlink_upload_futures[-1].result(timeout=60) diff --git a/composer/utils/__init__.py b/composer/utils/__init__.py index 13e8ee5655..691c341a70 100644 --- a/composer/utils/__init__.py +++ b/composer/utils/__init__.py @@ -76,7 +76,7 @@ build_remote_backend, ) from composer.utils.parallelism import FSDPConfig, ParallelismConfig, TPConfig, create_fsdp_config -from composer.utils.remote_uploader import RemoteUploader +from composer.utils.remote_uploader import RemoteFilesExistingCheckStatus, RemoteUploader from composer.utils.retrying import retry from composer.utils.string_enum import StringEnum from composer.utils.warnings import VersionedDeprecationWarning @@ -162,4 +162,5 @@ 'RemoteUploader', 'validate_credentials', 'build_remote_backend', + 'RemoteFilesExistingCheckStatus', ] diff --git a/composer/utils/remote_uploader.py b/composer/utils/remote_uploader.py index 09b625db3a..0716649191 100644 --- a/composer/utils/remote_uploader.py +++ b/composer/utils/remote_uploader.py @@ -12,6 +12,7 @@ import time import uuid from concurrent.futures import Future, ProcessPoolExecutor +from enum import Enum from typing import Any, Optional from composer.utils.dist import broadcast_object_list, get_global_rank, get_local_rank @@ -32,6 +33,48 @@ __all__ = ['RemoteUploader'] +class RemoteFilesExistingCheckStatus(Enum): + EXIST = 1 + TIMEOUT = 2 + ERROR = 3 + + +def _check_remote_files_exists( + remote_backend_name: str, + backend_kwargs: dict[str, Any], + remote_checkpoint_file_names: list[str], + main_process_pid: int, + #is_remote_upload_failed: multiprocessing.Event, + max_wait_time_in_seconds: int = 3600, + wait_before_next_try_in_seconds: float = 30, +): + start_time = time.time() + object_store = build_remote_backend(remote_backend_name, backend_kwargs) + + for remote_file_name in remote_checkpoint_file_names: + while True: + """ + if is_remote_upload_failed.is_set(): + log.debug(f'Stop symlink uploading since the checkpoint files uploading failed') + return RemoteFilesExistingCheckStatus.ERROR + """ + # Return if parent process exits + try: + os.kill(main_process_pid, 0) + except OSError: + return RemoteFilesExistingCheckStatus.ERROR + try: + object_store.get_object_size(remote_file_name) + break + except Exception as e: + if not isinstance(e, FileNotFoundError): + log.debug(f'Got exception {type(e)}: {str(e)} when accessing remote file {remote_file_name}') + time.sleep(wait_before_next_try_in_seconds) + if time.time() - start_time > max_wait_time_in_seconds: + return RemoteFilesExistingCheckStatus.TIMEOUT + return RemoteFilesExistingCheckStatus.EXIST + + def _upload_file_to_object_store( remote_backend_name: str, backend_kwargs: dict[str, Any], @@ -110,16 +153,22 @@ def __init__( self.num_attempts = num_attempts self._remote_backend: Optional[ObjectStore] = None - self.executor = ProcessPoolExecutor( + self.upload_executor = ProcessPoolExecutor( max_workers=num_concurrent_uploads, mp_context=multiprocessing.get_context('spawn'), ) + self.check_remote_files_exist_executor = ProcessPoolExecutor( + max_workers=2, + mp_context=multiprocessing.get_context('spawn'), + ) # Used internally to track the future status. # If a future completed successfully, we'll remove it from this list # when check_workers() or wait() is called self.futures: list[Future] = [] + self.pid = os.getpid() + @property def remote_backend(self) -> ObjectStore: if self._remote_backend is None: @@ -163,7 +212,7 @@ def upload_file_async( shutil.copy2(file_path, copied_path) # Async upload file - future = self.executor.submit( + future = self.upload_executor.submit( _upload_file_to_object_store, remote_backend_name=self.remote_backend_name, backend_kwargs=self.backend_kwargs, @@ -215,5 +264,26 @@ def wait_and_close(self): """ # make sure all workers are either running, or completed successfully self.wait() - self.executor.shutdown(wait=True) + self.upload_executor.shutdown(wait=True) + self.check_remote_files_exist_executor.shutdown(wait=True) log.debug('Finished all uploading tasks, closing RemoteUploader') + + def check_remote_files_exist_async( + self, + remote_checkpoint_file_names: list[str], + #is_remote_upload_failed: multiprocessing.Event, + max_wait_time_in_seconds: int = 3600, + wait_before_next_try_in_seconds: float = 30, + ): + future = self.check_remote_files_exist_executor.submit( + _check_remote_files_exists, + remote_backend_name=self.remote_backend_name, + backend_kwargs=self.backend_kwargs, + remote_checkpoint_file_names=remote_checkpoint_file_names, + main_process_pid=self.pid, + #is_remote_upload_failed: multiprocessing.Event, + max_wait_time_in_seconds=max_wait_time_in_seconds, + wait_before_next_try_in_seconds=wait_before_next_try_in_seconds, + ) + self.futures.append(future) + return future From b98ad3383d2dd28225014c23f6d3d4eb8accaf1a Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Wed, 26 Jun 2024 00:06:03 +0000 Subject: [PATCH 47/57] cleanup --- composer/callbacks/checkpoint_saver.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/composer/callbacks/checkpoint_saver.py b/composer/callbacks/checkpoint_saver.py index b44ede4e49..52992442b5 100644 --- a/composer/callbacks/checkpoint_saver.py +++ b/composer/callbacks/checkpoint_saver.py @@ -329,11 +329,9 @@ def __init__( self.remote_uploader = None backend, _, _ = parse_uri(save_folder) - self.symlink_file_tasks: list[tuple[str, str]] = [] self.rank_saves_remote_symlinks: bool = False self.tmp_dir_for_symlink = tempfile.TemporaryDirectory() self.num_concurrent_uploads = num_concurrent_uploads - self.is_remote_upload_failed = None self.upload_timeout_in_seconds = upload_timeout_in_seconds # Allow unit test to override this to make it faster self._symlink_upload_wait_before_next_try_in_seconds = 30.0 @@ -356,8 +354,6 @@ def __init__( remote_folder=save_folder, num_concurrent_uploads=self.num_concurrent_uploads, ) - mp_context = multiprocessing.get_context('spawn') - self.is_remote_upload_failed = mp_context.Manager().Event() def init(self, state: State, logger: Logger) -> None: # If MLFlowLogger is being used, format MLFlow-specific placeholders in the save folder and paths. @@ -682,7 +678,6 @@ def fit_end(self, state: State, logger: Logger) -> None: symlink_upload_future.result() else: raise RuntimeError(f'Failed to check if checkpoint files upload finish: {result}') - log.info('Checkpoint uploading finished!') def post_close(self): @@ -690,6 +685,5 @@ def post_close(self): # Wait the symlink file upload to finish and close remote uploader try: self.remote_uploader.wait_and_close() - except: - assert self.is_remote_upload_failed is not None - self.is_remote_upload_failed.set() + except Exception as e: + log.error(f'RemoteUploader run into exception {e}') From 8a6f5d1ce051e3adf249b79085d09a57761c512f Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Wed, 26 Jun 2024 00:18:40 +0000 Subject: [PATCH 48/57] a --- composer/utils/remote_uploader.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/composer/utils/remote_uploader.py b/composer/utils/remote_uploader.py index 0716649191..9af5472af6 100644 --- a/composer/utils/remote_uploader.py +++ b/composer/utils/remote_uploader.py @@ -44,7 +44,7 @@ def _check_remote_files_exists( backend_kwargs: dict[str, Any], remote_checkpoint_file_names: list[str], main_process_pid: int, - #is_remote_upload_failed: multiprocessing.Event, + is_remote_upload_failed: multiprocessing.Event, max_wait_time_in_seconds: int = 3600, wait_before_next_try_in_seconds: float = 30, ): @@ -53,11 +53,9 @@ def _check_remote_files_exists( for remote_file_name in remote_checkpoint_file_names: while True: - """ if is_remote_upload_failed.is_set(): log.debug(f'Stop symlink uploading since the checkpoint files uploading failed') return RemoteFilesExistingCheckStatus.ERROR - """ # Return if parent process exits try: os.kill(main_process_pid, 0) @@ -153,14 +151,16 @@ def __init__( self.num_attempts = num_attempts self._remote_backend: Optional[ObjectStore] = None + mp_context = multiprocessing.get_context('spawn') self.upload_executor = ProcessPoolExecutor( max_workers=num_concurrent_uploads, - mp_context=multiprocessing.get_context('spawn'), + mp_context=mp_context, ) self.check_remote_files_exist_executor = ProcessPoolExecutor( max_workers=2, - mp_context=multiprocessing.get_context('spawn'), + mp_context=mp_context, ) + self.is_remote_upload_failed = mp_context.Manager().Event() # Used internally to track the future status. # If a future completed successfully, we'll remove it from this list @@ -237,6 +237,7 @@ def check_workers(self): # future.exception is a blocking call exception_or_none = future.exception() if exception_or_none is not None: + self.is_remote_upload_failed.set() raise exception_or_none else: done_futures.append(future) @@ -252,6 +253,7 @@ def wait(self): for future in self.futures: exception_or_none = future.exception() if exception_or_none is not None: + self.is_remote_upload_failed.set() raise exception_or_none self.futures = [] @@ -281,7 +283,7 @@ def check_remote_files_exist_async( backend_kwargs=self.backend_kwargs, remote_checkpoint_file_names=remote_checkpoint_file_names, main_process_pid=self.pid, - #is_remote_upload_failed: multiprocessing.Event, + is_remote_upload_failed=self.is_remote_upload_failed, max_wait_time_in_seconds=max_wait_time_in_seconds, wait_before_next_try_in_seconds=wait_before_next_try_in_seconds, ) From ebbcc463d5eede0267ebc93f1a5d007634496214 Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Wed, 26 Jun 2024 04:39:20 +0000 Subject: [PATCH 49/57] linter --- composer/callbacks/checkpoint_saver.py | 1 - 1 file changed, 1 deletion(-) diff --git a/composer/callbacks/checkpoint_saver.py b/composer/callbacks/checkpoint_saver.py index 52992442b5..8d9a25f41e 100644 --- a/composer/callbacks/checkpoint_saver.py +++ b/composer/callbacks/checkpoint_saver.py @@ -6,7 +6,6 @@ from __future__ import annotations import logging -import multiprocessing import os import pathlib import shutil From 3575d1ebd87b41bbc5ed1ed784bcd070218a7d0f Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Wed, 26 Jun 2024 05:04:41 +0000 Subject: [PATCH 50/57] lint --- composer/utils/remote_uploader.py | 3 +- tests/trainer/test_checkpoint.py | 70 +++++++++++++++---------------- 2 files changed, 34 insertions(+), 39 deletions(-) diff --git a/composer/utils/remote_uploader.py b/composer/utils/remote_uploader.py index 9af5472af6..4bdb4035f4 100644 --- a/composer/utils/remote_uploader.py +++ b/composer/utils/remote_uploader.py @@ -44,7 +44,7 @@ def _check_remote_files_exists( backend_kwargs: dict[str, Any], remote_checkpoint_file_names: list[str], main_process_pid: int, - is_remote_upload_failed: multiprocessing.Event, + is_remote_upload_failed: multiprocessing.Event, # pyright: ignore[reportGeneralTypeIssues] max_wait_time_in_seconds: int = 3600, wait_before_next_try_in_seconds: float = 30, ): @@ -273,7 +273,6 @@ def wait_and_close(self): def check_remote_files_exist_async( self, remote_checkpoint_file_names: list[str], - #is_remote_upload_failed: multiprocessing.Event, max_wait_time_in_seconds: int = 3600, wait_before_next_try_in_seconds: float = 30, ): diff --git a/tests/trainer/test_checkpoint.py b/tests/trainer/test_checkpoint.py index 8dc0f0af36..6718aeddbc 100644 --- a/tests/trainer/test_checkpoint.py +++ b/tests/trainer/test_checkpoint.py @@ -652,45 +652,41 @@ def upload_object(self, object_name, filename, callback=None): with patch('composer.utils.object_store.utils.S3ObjectStore', MockObjectStore): with patch('tests.utils.test_remote_uploader.DummyObjectStore.get_tmp_dir', _get_tmp_dir): with patch('composer.utils.remote_uploader.multiprocessing.get_context', lambda _: fork_context): - with patch( - 'composer.callbacks.checkpoint_saver.multiprocessing.get_context', - lambda _: fork_context, - ): - train_dataset = RandomClassificationDataset(size=10) - train_dataloader = DataLoader( - dataset=train_dataset, - batch_size=2, - sampler=dist.get_sampler(train_dataset), - ) + train_dataset = RandomClassificationDataset(size=10) + train_dataloader = DataLoader( + dataset=train_dataset, + batch_size=2, + sampler=dist.get_sampler(train_dataset), + ) - trainer = Trainer( - model=SimpleModel(), - train_dataloader=train_dataloader, - save_interval='1ba', - max_duration='1ba', - save_folder='S3://whatever/', - ) - symlink_filepath = os.path.join(tmp_dir.name, 'latest-rank0.pt.symlink') - if upload_success: + trainer = Trainer( + model=SimpleModel(), + train_dataloader=train_dataloader, + save_interval='1ba', + max_duration='1ba', + save_folder='S3://whatever/', + ) + symlink_filepath = os.path.join(tmp_dir.name, 'latest-rank0.pt.symlink') + if upload_success: + trainer.fit() + with open(symlink_filepath, 'r') as f: + assert f.read() == 'ep0-ba1-rank0.pt' + else: + assert trainer._checkpoint_saver is not None + trainer._checkpoint_saver._symlink_upload_wait_before_next_try_in_seconds = 0.01 + trainer._checkpoint_saver.upload_timeout_in_seconds = 1 + with pytest.raises(RuntimeError, match='Raise Error intentionally'): trainer.fit() - with open(symlink_filepath, 'r') as f: - assert f.read() == 'ep0-ba1-rank0.pt' - else: - assert trainer._checkpoint_saver is not None - trainer._checkpoint_saver._symlink_upload_wait_before_next_try_in_seconds = 0.01 - trainer._checkpoint_saver.upload_timeout_in_seconds = 1 - with pytest.raises(RuntimeError, match='Raise Error intentionally'): - trainer.fit() - assert os.path.exists(symlink_filepath) == False - - def post_close(self): - return - - assert trainer._checkpoint_saver is not None - trainer._checkpoint_saver.post_close = post_close.__get__( - trainer._checkpoint_saver, - CheckpointSaver, - ) + assert os.path.exists(symlink_filepath) == False + + def post_close(self): + return + + assert trainer._checkpoint_saver is not None + trainer._checkpoint_saver.post_close = post_close.__get__( + trainer._checkpoint_saver, + CheckpointSaver, + ) class TestCheckpointLoading: From fb8dbbad819131059f734e9658c0a56f79d06aad Mon Sep 17 00:00:00 2001 From: bigning Date: Fri, 28 Jun 2024 10:35:45 -0700 Subject: [PATCH 51/57] Update composer/callbacks/checkpoint_saver.py Co-authored-by: Evan Racah --- composer/callbacks/checkpoint_saver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/composer/callbacks/checkpoint_saver.py b/composer/callbacks/checkpoint_saver.py index 8d9a25f41e..91d7cbdf01 100644 --- a/composer/callbacks/checkpoint_saver.py +++ b/composer/callbacks/checkpoint_saver.py @@ -662,7 +662,7 @@ def fit_end(self, state: State, logger: Logger) -> None: del state, logger # unused if self.remote_uploader is None: return - log.info('Waiting checkpoint uploading finish') + log.info('Waiting for checkpoint uploading to finish') self.remote_uploader.wait() if self.rank_saves_remote_symlinks and len(self.symlink_upload_tasks) > 0: log.debug('Uploading the last symlink file') From df4f59a962852307a172a011d880d4ad6ee6292d Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Fri, 28 Jun 2024 18:09:17 +0000 Subject: [PATCH 52/57] commenst --- composer/callbacks/checkpoint_saver.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/composer/callbacks/checkpoint_saver.py b/composer/callbacks/checkpoint_saver.py index 91d7cbdf01..9108a00b59 100644 --- a/composer/callbacks/checkpoint_saver.py +++ b/composer/callbacks/checkpoint_saver.py @@ -328,7 +328,7 @@ def __init__( self.remote_uploader = None backend, _, _ = parse_uri(save_folder) - self.rank_saves_remote_symlinks: bool = False + self.rank_saves_symlinks: bool = dist.get_global_rank() == 0 self.tmp_dir_for_symlink = tempfile.TemporaryDirectory() self.num_concurrent_uploads = num_concurrent_uploads self.upload_timeout_in_seconds = upload_timeout_in_seconds @@ -493,6 +493,8 @@ def _save_checkpoint(self, state: State, logger: Logger): all_remote_filenames = [] if not saved_path: # not all ranks save + if dist.get_global_rank() == 0: + raise RuntimeError('Global rank 0 save path should not be None.') if self.remote_file_name is not None and self.remote_uploader is not None: all_remote_filenames = dist.all_gather_object(local_remote_file_names) return @@ -517,8 +519,7 @@ def _save_checkpoint(self, state: State, logger: Logger): src_path = str(pathlib.Path(saved_path).parent) else: src_path = saved_path - rank_saves_symlinks = dist.get_global_rank() == 0 or not state.fsdp_sharded_state_dict_enabled - if rank_saves_symlinks: + if self.rank_saves_symlinks: os.symlink(os.path.relpath(src_path, os.path.dirname(symlink)), symlink) # if remote file name provided, upload the checkpoint @@ -590,9 +591,7 @@ def _save_checkpoint(self, state: State, logger: Logger): else: src_path = remote_file_name log.debug(f'Creating symlink file {symlink_filename} -> {src_path}') - rank_saves_symlinks = dist.get_global_rank() == 0 or not state.fsdp_sharded_state_dict_enabled - if rank_saves_symlinks: - self.rank_saves_remote_symlinks = True + if self.rank_saves_symlinks: create_symlink_file(src_path, symlink_filename) if self.remote_uploader is not None: remote_checkpoint_file_names = [] @@ -634,7 +633,7 @@ def batch_end(self, state: State, logger: Logger) -> None: if self.remote_uploader is None: return self.remote_uploader.check_workers() - if not self.rank_saves_remote_symlinks: + if not self.rank_saves_symlinks: return undone_symlink_upload_tasks = [] for (check_remote_files_exist_future, local_symlink_file, @@ -664,7 +663,7 @@ def fit_end(self, state: State, logger: Logger) -> None: return log.info('Waiting for checkpoint uploading to finish') self.remote_uploader.wait() - if self.rank_saves_remote_symlinks and len(self.symlink_upload_tasks) > 0: + if self.rank_saves_symlinks and len(self.symlink_upload_tasks) > 0: log.debug('Uploading the last symlink file') check_remote_files_exist_future, local_symlink_file, remote_symlink_file = self.symlink_upload_tasks[-1] result = check_remote_files_exist_future.result() From 4971526040088e6845dbc82b9f0b3830bbd2ac80 Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Fri, 28 Jun 2024 18:12:03 +0000 Subject: [PATCH 53/57] a --- composer/callbacks/checkpoint_saver.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/composer/callbacks/checkpoint_saver.py b/composer/callbacks/checkpoint_saver.py index 9108a00b59..52ba56083c 100644 --- a/composer/callbacks/checkpoint_saver.py +++ b/composer/callbacks/checkpoint_saver.py @@ -664,7 +664,8 @@ def fit_end(self, state: State, logger: Logger) -> None: log.info('Waiting for checkpoint uploading to finish') self.remote_uploader.wait() if self.rank_saves_symlinks and len(self.symlink_upload_tasks) > 0: - log.debug('Uploading the last symlink file') + log.debug('Uploading symlink to the latest checkpoint') + # We only need to upload the latest symlinke file, ignoring the old ones check_remote_files_exist_future, local_symlink_file, remote_symlink_file = self.symlink_upload_tasks[-1] result = check_remote_files_exist_future.result() if result == RemoteFilesExistingCheckStatus.EXIST: From ebbbf56aed595282ef9fa7058a3ae2ff6b0ba927 Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Fri, 28 Jun 2024 19:25:57 +0000 Subject: [PATCH 54/57] fix test --- composer/callbacks/checkpoint_saver.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/composer/callbacks/checkpoint_saver.py b/composer/callbacks/checkpoint_saver.py index 52ba56083c..ca259b3394 100644 --- a/composer/callbacks/checkpoint_saver.py +++ b/composer/callbacks/checkpoint_saver.py @@ -328,7 +328,7 @@ def __init__( self.remote_uploader = None backend, _, _ = parse_uri(save_folder) - self.rank_saves_symlinks: bool = dist.get_global_rank() == 0 + self.rank_saves_symlinks: bool = False self.tmp_dir_for_symlink = tempfile.TemporaryDirectory() self.num_concurrent_uploads = num_concurrent_uploads self.upload_timeout_in_seconds = upload_timeout_in_seconds @@ -493,8 +493,6 @@ def _save_checkpoint(self, state: State, logger: Logger): all_remote_filenames = [] if not saved_path: # not all ranks save - if dist.get_global_rank() == 0: - raise RuntimeError('Global rank 0 save path should not be None.') if self.remote_file_name is not None and self.remote_uploader is not None: all_remote_filenames = dist.all_gather_object(local_remote_file_names) return @@ -507,6 +505,7 @@ def _save_checkpoint(self, state: State, logger: Logger): state.timestamp, ) + self.rank_saves_symlinks = dist.get_global_rank() == 0 or state.fsdp_sharded_state_dict_enabled if self.latest_filename is not None and self.num_checkpoints_to_keep != 0: symlink = self.latest_filename.format(state, is_deepspeed) os.makedirs(os.path.dirname(symlink), exist_ok=True) From 3bb10c900801f82b172ec140ff99f8a5c2e78fd4 Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Fri, 28 Jun 2024 20:35:51 +0000 Subject: [PATCH 55/57] fix test --- composer/callbacks/checkpoint_saver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/composer/callbacks/checkpoint_saver.py b/composer/callbacks/checkpoint_saver.py index ca259b3394..65fac46f20 100644 --- a/composer/callbacks/checkpoint_saver.py +++ b/composer/callbacks/checkpoint_saver.py @@ -505,7 +505,7 @@ def _save_checkpoint(self, state: State, logger: Logger): state.timestamp, ) - self.rank_saves_symlinks = dist.get_global_rank() == 0 or state.fsdp_sharded_state_dict_enabled + self.rank_saves_symlinks = dist.get_global_rank() == 0 or not state.fsdp_sharded_state_dict_enabled if self.latest_filename is not None and self.num_checkpoints_to_keep != 0: symlink = self.latest_filename.format(state, is_deepspeed) os.makedirs(os.path.dirname(symlink), exist_ok=True) From 0d4c7af929515909febd559fe88d19132d86a70d Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Tue, 2 Jul 2024 05:16:41 +0000 Subject: [PATCH 56/57] comments --- composer/callbacks/checkpoint_saver.py | 28 +++++++++----------------- composer/trainer/trainer.py | 5 +---- composer/utils/remote_uploader.py | 10 +++++++++ 3 files changed, 21 insertions(+), 22 deletions(-) diff --git a/composer/callbacks/checkpoint_saver.py b/composer/callbacks/checkpoint_saver.py index 65fac46f20..29468e66c3 100644 --- a/composer/callbacks/checkpoint_saver.py +++ b/composer/callbacks/checkpoint_saver.py @@ -290,11 +290,13 @@ def __init__( num_checkpoints_to_keep: int = -1, weights_only: bool = False, ignore_keys: Optional[Union[list[str], Callable[[dict], None]]] = None, - save_folder: str = '', num_concurrent_uploads: int = 1, upload_timeout_in_seconds: int = 3600, ): - folder = str(folder) + backend, _, local_folder = parse_uri(str(folder)) + if local_folder == '': + local_folder = '.' + filename = str(filename) remote_file_name = str(remote_file_name) if remote_file_name is not None else None latest_filename = str(latest_filename) if latest_filename is not None else None @@ -310,10 +312,10 @@ def __init__( self.save_interval = save_interval self.last_checkpoint_batch: Optional[Time] = None - self.folder = folder + self.folder = local_folder - self.filename = PartialFilePath(filename.lstrip('/'), folder) - self.latest_filename = PartialFilePath(latest_filename.lstrip('/'), folder) if latest_filename else None + self.filename = PartialFilePath(filename.lstrip('/'), local_folder) + self.latest_filename = PartialFilePath(latest_filename.lstrip('/'), local_folder) if latest_filename else None self.remote_file_name = PartialFilePath(remote_file_name) if remote_file_name else None self.latest_remote_file_name = PartialFilePath(latest_remote_file_name) if latest_remote_file_name else None @@ -327,7 +329,6 @@ def __init__( self.start_batch = None self.remote_uploader = None - backend, _, _ = parse_uri(save_folder) self.rank_saves_symlinks: bool = False self.tmp_dir_for_symlink = tempfile.TemporaryDirectory() self.num_concurrent_uploads = num_concurrent_uploads @@ -339,18 +340,8 @@ def __init__( self.symlink_upload_tasks = [] if backend != '': - if backend == 'wandb': - raise NotImplementedError( - f'There is no implementation for WandB via URI. Please use ' - 'WandBLogger with log_artifacts set to True.', - ) - elif backend not in ['s3', 'oci', 'gs', 'azure', 'dbfs']: - raise NotImplementedError( - f'There is no implementation for the cloud backend {backend} via URI. Please use ' - 'one of the supported object stores (s3, oci, gs, azure, dbfs).', - ) self.remote_uploader = RemoteUploader( - remote_folder=save_folder, + remote_folder=str(folder), num_concurrent_uploads=self.num_concurrent_uploads, ) @@ -489,6 +480,7 @@ def _save_checkpoint(self, state: State, logger: Logger): log.debug(f'Checkpoint locally saved to {saved_path}') self.symlink_count += 1 + # Remote checkpoint file names on this rank local_remote_file_names = [] all_remote_filenames = [] @@ -664,7 +656,7 @@ def fit_end(self, state: State, logger: Logger) -> None: self.remote_uploader.wait() if self.rank_saves_symlinks and len(self.symlink_upload_tasks) > 0: log.debug('Uploading symlink to the latest checkpoint') - # We only need to upload the latest symlinke file, ignoring the old ones + # We only need to upload a symlink pointing to the latest checkpoint files, so we can ignore successful uploads of older checkpoints. check_remote_files_exist_future, local_symlink_file, remote_symlink_file = self.symlink_upload_tasks[-1] result = check_remote_files_exist_future.result() if result == RemoteFilesExistingCheckStatus.EXIST: diff --git a/composer/trainer/trainer.py b/composer/trainer/trainer.py index 8967e5e33c..a0bfbaa53b 100644 --- a/composer/trainer/trainer.py +++ b/composer/trainer/trainer.py @@ -1441,14 +1441,12 @@ def __init__( # path then we assume they just want their checkpoints saved directly in their # bucket. if parsed_save_folder == '': - folder = '.' remote_file_name = save_filename latest_remote_file_name = save_latest_filename # If they actually specify a path, then we use that for their local save path # and we prefix save_filename with that path for remote_file_name. else: - folder = parsed_save_folder remote_file_name = str(Path(parsed_save_folder) / Path(save_filename)) if save_latest_filename is not None: latest_remote_file_name = str(Path(parsed_save_folder) / Path(save_latest_filename)) @@ -1456,7 +1454,7 @@ def __init__( latest_remote_file_name = None self._checkpoint_saver = CheckpointSaver( - folder=folder, + folder=save_folder, filename=save_filename, remote_file_name=remote_file_name, latest_filename=save_latest_filename, @@ -1466,7 +1464,6 @@ def __init__( ignore_keys=save_ignore_keys, save_interval=save_interval, num_checkpoints_to_keep=save_num_checkpoints_to_keep, - save_folder=save_folder, ) self.state.callbacks.append(self._checkpoint_saver) diff --git a/composer/utils/remote_uploader.py b/composer/utils/remote_uploader.py index 4bdb4035f4..3cdbaac5c3 100644 --- a/composer/utils/remote_uploader.py +++ b/composer/utils/remote_uploader.py @@ -148,6 +148,16 @@ def __init__( } elif self.remote_backend_name == 'dbfs': self.backend_kwargs['path'] = self.path + elif self.remote_backend_name == 'wandb': + raise NotImplementedError( + f'There is no implementation for WandB via URI. Please use ' + 'WandBLogger with log_artifacts set to True.', + ) + else: + raise NotImplementedError( + f'There is no implementation for the cloud backend {self.remote_backend_name} via URI. Please use ' + 'one of the supported object stores (s3, oci, gs, azure, dbfs).', + ) self.num_attempts = num_attempts self._remote_backend: Optional[ObjectStore] = None From 6ed9aa7b0146631a8782e33de17568184cc81245 Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Wed, 3 Jul 2024 23:58:17 +0000 Subject: [PATCH 57/57] a --- composer/utils/remote_uploader.py | 5 +++-- tests/trainer/test_checkpoint.py | 1 - 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/composer/utils/remote_uploader.py b/composer/utils/remote_uploader.py index 3cdbaac5c3..33793e7c91 100644 --- a/composer/utils/remote_uploader.py +++ b/composer/utils/remote_uploader.py @@ -136,8 +136,9 @@ def __init__( self.backend_kwargs: dict[str, Any] = backend_kwargs if backend_kwargs is not None else {} if self.remote_backend_name in ['s3', 'oci', 'gs'] and 'bucket' not in self.backend_kwargs: self.backend_kwargs['bucket'] = self.remote_bucket_name - elif self.remote_backend_name == 'libcloud' and 'container' not in self.backend_kwargs: - self.backend_kwargs['container'] = self.remote_bucket_name + elif self.remote_backend_name == 'libcloud': + if 'container' not in self.backend_kwargs: + self.backend_kwargs['container'] = self.remote_bucket_name elif self.remote_backend_name == 'azure': self.remote_backend_name = 'libcloud' self.backend_kwargs = { diff --git a/tests/trainer/test_checkpoint.py b/tests/trainer/test_checkpoint.py index 621ee73de8..ede864d13b 100644 --- a/tests/trainer/test_checkpoint.py +++ b/tests/trainer/test_checkpoint.py @@ -851,7 +851,6 @@ def _get_tmp_dir(self): assert trainer_1.state.run_name == trainer_2.state.run_name - @pytest.mark.parametrize(('save_folder'), [None, 'first']) def test_autoresume_from_callback( self,