From 37dc470e153cb9bd1da3dc826dd6de48249b6259 Mon Sep 17 00:00:00 2001 From: tchaton Date: Thu, 3 Jun 2021 18:01:39 +0100 Subject: [PATCH 01/10] add pytorch profiler --- docs/source/api_references.rst | 9 ++++++++- pytorch_lightning/profiler/profilers.py | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/docs/source/api_references.rst b/docs/source/api_references.rst index f73a8954f8764..ff616f6e56f6c 100644 --- a/docs/source/api_references.rst +++ b/docs/source/api_references.rst @@ -137,8 +137,15 @@ Profiler API .. autosummary:: :toctree: api :nosignatures: + :template: classtemplate.rst + + AbstractProfiler + AdvancedProfiler + BaseProfiler + PassThroughProfiler + PyTorchProfiler + SimpleProfiler - profilers Trainer API ----------- diff --git a/pytorch_lightning/profiler/profilers.py b/pytorch_lightning/profiler/profilers.py index 78327fa0a91d8..57f2de5579251 100644 --- a/pytorch_lightning/profiler/profilers.py +++ b/pytorch_lightning/profiler/profilers.py @@ -26,6 +26,7 @@ import numpy as np +from pytorch_lightning.profiler.pytorch import PyTorchProfiler # noqa F401 from pytorch_lightning.utilities import rank_zero_warn from pytorch_lightning.utilities.cloud_io import get_filesystem From edb24f0daa8f4efd0cbb95c02e0529afa23edc45 Mon Sep 17 00:00:00 2001 From: tchaton Date: Thu, 3 Jun 2021 18:19:15 +0100 Subject: [PATCH 02/10] remove cyclic import --- pytorch_lightning/profiler/profilers.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytorch_lightning/profiler/profilers.py b/pytorch_lightning/profiler/profilers.py index 57f2de5579251..78327fa0a91d8 100644 --- a/pytorch_lightning/profiler/profilers.py +++ b/pytorch_lightning/profiler/profilers.py @@ -26,7 +26,6 @@ import numpy as np -from pytorch_lightning.profiler.pytorch import PyTorchProfiler # noqa F401 from pytorch_lightning.utilities import rank_zero_warn from pytorch_lightning.utilities.cloud_io import get_filesystem From ffb6a4c9a7a1a895219cd5f546a08c540afb00a7 Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 4 Jun 2021 08:38:26 +0100 Subject: [PATCH 03/10] move each profiler to its own file --- docs/source/api_references.rst | 1 - pytorch_lightning/profiler/__init__.py | 7 +- pytorch_lightning/profiler/advanced.py | 92 ++++++ pytorch_lightning/profiler/base.py | 215 +++++++++++++ pytorch_lightning/profiler/profilers.py | 387 ------------------------ pytorch_lightning/profiler/pytorch.py | 2 +- pytorch_lightning/profiler/simple.py | 123 ++++++++ 7 files changed, 435 insertions(+), 392 deletions(-) create mode 100644 pytorch_lightning/profiler/advanced.py create mode 100644 pytorch_lightning/profiler/base.py delete mode 100644 pytorch_lightning/profiler/profilers.py create mode 100644 pytorch_lightning/profiler/simple.py diff --git a/docs/source/api_references.rst b/docs/source/api_references.rst index ff616f6e56f6c..05e52ee542557 100644 --- a/docs/source/api_references.rst +++ b/docs/source/api_references.rst @@ -139,7 +139,6 @@ Profiler API :nosignatures: :template: classtemplate.rst - AbstractProfiler AdvancedProfiler BaseProfiler PassThroughProfiler diff --git a/pytorch_lightning/profiler/__init__.py b/pytorch_lightning/profiler/__init__.py index 6ac6e16c18529..5bd2ff41aeefe 100644 --- a/pytorch_lightning/profiler/__init__.py +++ b/pytorch_lightning/profiler/__init__.py @@ -194,14 +194,15 @@ def custom_processing_step(self, data): python -c 'import torch; print(torch.autograd.profiler.load_nvprof("trace_name.prof"))' """ - -from pytorch_lightning.profiler.profilers import AdvancedProfiler, BaseProfiler, PassThroughProfiler, SimpleProfiler +from pytorch_lightning.profiler.advanced import AdvancedProfiler +from pytorch_lightning.profiler.base import BaseProfiler, PassThroughProfiler from pytorch_lightning.profiler.pytorch import PyTorchProfiler +from pytorch_lightning.profiler.simple import SimpleProfiler __all__ = [ 'BaseProfiler', 'SimpleProfiler', 'AdvancedProfiler', 'PassThroughProfiler', - "PyTorchProfiler", + 'PyTorchProfiler', ] diff --git a/pytorch_lightning/profiler/advanced.py b/pytorch_lightning/profiler/advanced.py new file mode 100644 index 0000000000000..3a017d72ff5e0 --- /dev/null +++ b/pytorch_lightning/profiler/advanced.py @@ -0,0 +1,92 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Profiler to check if there are any bottlenecks in your code.""" +import cProfile +import io +import logging +import pstats +from pathlib import Path +from typing import Dict, Optional, Union + +from pytorch_lightning.profiler.base import BaseProfiler + +log = logging.getLogger(__name__) + + +class AdvancedProfiler(BaseProfiler): + """ + This profiler uses Python's cProfiler to record more detailed information about + time spent in each function call recorded during a given action. The output is quite + verbose and you should only use this if you want very detailed reports. + """ + + def __init__( + self, + dirpath: Optional[Union[str, Path]] = None, + filename: Optional[str] = None, + line_count_restriction: float = 1.0, + output_filename: Optional[str] = None, + ) -> None: + """ + Args: + dirpath: Directory path for the ``filename``. If ``dirpath`` is ``None`` but ``filename`` is present, the + ``trainer.log_dir`` (from :class:`~pytorch_lightning.loggers.tensorboard.TensorBoardLogger`) + will be used. + + filename: If present, filename where the profiler results will be saved instead of printing to stdout. + The ``.txt`` extension will be used automatically. + + line_count_restriction: this can be used to limit the number of functions + reported for each action. either an integer (to select a count of lines), + or a decimal fraction between 0.0 and 1.0 inclusive (to select a percentage of lines) + + Raises: + ValueError: + If you attempt to stop recording an action which was never started. + """ + super().__init__(dirpath=dirpath, filename=filename, output_filename=output_filename) + self.profiled_actions: Dict[str, cProfile.Profile] = {} + self.line_count_restriction = line_count_restriction + + def start(self, action_name: str) -> None: + if action_name not in self.profiled_actions: + self.profiled_actions[action_name] = cProfile.Profile() + self.profiled_actions[action_name].enable() + + def stop(self, action_name: str) -> None: + pr = self.profiled_actions.get(action_name) + if pr is None: + raise ValueError(f"Attempting to stop recording an action ({action_name}) which was never started.") + pr.disable() + + def summary(self) -> str: + recorded_stats = {} + for action_name, pr in self.profiled_actions.items(): + s = io.StringIO() + ps = pstats.Stats(pr, stream=s).strip_dirs().sort_stats('cumulative') + ps.print_stats(self.line_count_restriction) + recorded_stats[action_name] = s.getvalue() + return self._stats_to_str(recorded_stats) + + def teardown(self, stage: Optional[str] = None) -> None: + super().teardown(stage=stage) + self.profiled_actions = {} + + def __reduce__(self): + # avoids `TypeError: cannot pickle 'cProfile.Profile' object` + return ( + self.__class__, + tuple(), + dict(dirpath=self.dirpath, filename=self.filename, line_count_restriction=self.line_count_restriction), + ) diff --git a/pytorch_lightning/profiler/base.py b/pytorch_lightning/profiler/base.py new file mode 100644 index 0000000000000..2a064085e8da7 --- /dev/null +++ b/pytorch_lightning/profiler/base.py @@ -0,0 +1,215 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Profiler to check if there are any bottlenecks in your code.""" +import logging +import os +from abc import ABC, abstractmethod +from contextlib import contextmanager +from pathlib import Path +from typing import Any, Callable, Dict, Optional, TextIO, Union + +from pytorch_lightning.utilities import rank_zero_warn +from pytorch_lightning.utilities.cloud_io import get_filesystem + +log = logging.getLogger(__name__) + + +class AbstractProfiler(ABC): + """Specification of a profiler.""" + + @abstractmethod + def start(self, action_name: str) -> None: + """Defines how to start recording an action.""" + + @abstractmethod + def stop(self, action_name: str) -> None: + """Defines how to record the duration once an action is complete.""" + + @abstractmethod + def summary(self) -> str: + """Create profiler summary in text format.""" + + @abstractmethod + def setup(self, **kwargs: Any) -> None: + """Execute arbitrary pre-profiling set-up steps as defined by subclass.""" + + @abstractmethod + def teardown(self, **kwargs: Any) -> None: + """Execute arbitrary post-profiling tear-down steps as defined by subclass.""" + + +class BaseProfiler(AbstractProfiler): + """ + If you wish to write a custom profiler, you should inherit from this class. + """ + + def __init__( + self, + dirpath: Optional[Union[str, Path]] = None, + filename: Optional[str] = None, + output_filename: Optional[str] = None, + ) -> None: + self.dirpath = dirpath + self.filename = filename + if output_filename is not None: + rank_zero_warn( + "`Profiler` signature has changed in v1.3. The `output_filename` parameter has been removed in" + " favor of `dirpath` and `filename`. Support for the old signature will be removed in v1.5", + DeprecationWarning + ) + filepath = Path(output_filename) + self.dirpath = filepath.parent + self.filename = filepath.stem + + self._output_file: Optional[TextIO] = None + self._write_stream: Optional[Callable] = None + self._local_rank: Optional[int] = None + self._log_dir: Optional[str] = None + self._stage: Optional[str] = None + + @contextmanager + def profile(self, action_name: str) -> None: + """ + Yields a context manager to encapsulate the scope of a profiled action. + + Example:: + + with self.profile('load training data'): + # load training data code + + The profiler will start once you've entered the context and will automatically + stop once you exit the code block. + """ + try: + self.start(action_name) + yield action_name + finally: + self.stop(action_name) + + def profile_iterable(self, iterable, action_name: str) -> None: + iterator = iter(iterable) + while True: + try: + self.start(action_name) + value = next(iterator) + self.stop(action_name) + yield value + except StopIteration: + self.stop(action_name) + break + + def _rank_zero_info(self, *args, **kwargs) -> None: + if self._local_rank in (None, 0): + log.info(*args, **kwargs) + + def _prepare_filename(self, extension: str = ".txt") -> str: + filename = "" + if self._stage is not None: + filename += f"{self._stage}-" + filename += str(self.filename) + if self._local_rank is not None: + filename += f"-{self._local_rank}" + filename += extension + return filename + + def _prepare_streams(self) -> None: + if self._write_stream is not None: + return + if self.filename: + filepath = os.path.join(self.dirpath, self._prepare_filename()) + fs = get_filesystem(filepath) + file = fs.open(filepath, "a") + self._output_file = file + self._write_stream = file.write + else: + self._write_stream = self._rank_zero_info + + def describe(self) -> None: + """Logs a profile report after the conclusion of run.""" + # there are pickling issues with open file handles in Python 3.6 + # so to avoid them, we open and close the files within this function + # by calling `_prepare_streams` and `teardown` + self._prepare_streams() + summary = self.summary() + if summary: + self._write_stream(summary) + if self._output_file is not None: + self._output_file.flush() + self.teardown(stage=self._stage) + + def _stats_to_str(self, stats: Dict[str, str]) -> str: + stage = f"{self._stage.upper()} " if self._stage is not None else "" + output = [stage + "Profiler Report"] + for action, value in stats.items(): + header = f"Profile stats for: {action}" + if self._local_rank is not None: + header += f" rank: {self._local_rank}" + output.append(header) + output.append(value) + return os.linesep.join(output) + + def setup( + self, + stage: Optional[str] = None, + local_rank: Optional[int] = None, + log_dir: Optional[str] = None, + ) -> None: + """Execute arbitrary pre-profiling set-up steps.""" + self._stage = stage + self._local_rank = local_rank + self._log_dir = log_dir + self.dirpath = self.dirpath or log_dir + + def teardown(self, stage: Optional[str] = None) -> None: + """ + Execute arbitrary post-profiling tear-down steps. + + Closes the currently open file and stream. + """ + self._write_stream = None + if self._output_file is not None: + self._output_file.close() + self._output_file = None # can't pickle TextIOWrapper + + def __del__(self) -> None: + self.teardown(stage=self._stage) + + def start(self, action_name: str) -> None: + raise NotImplementedError + + def stop(self, action_name: str) -> None: + raise NotImplementedError + + def summary(self) -> str: + raise NotImplementedError + + @property + def local_rank(self) -> int: + return 0 if self._local_rank is None else self._local_rank + + +class PassThroughProfiler(BaseProfiler): + """ + This class should be used when you don't want the (small) overhead of profiling. + The Trainer uses this class by default. + """ + + def start(self, action_name: str) -> None: + pass + + def stop(self, action_name: str) -> None: + pass + + def summary(self) -> str: + return "" diff --git a/pytorch_lightning/profiler/profilers.py b/pytorch_lightning/profiler/profilers.py deleted file mode 100644 index 78327fa0a91d8..0000000000000 --- a/pytorch_lightning/profiler/profilers.py +++ /dev/null @@ -1,387 +0,0 @@ -# Copyright The PyTorch Lightning team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Profiler to check if there are any bottlenecks in your code.""" -import cProfile -import io -import logging -import os -import pstats -import time -from abc import ABC, abstractmethod -from collections import defaultdict -from contextlib import contextmanager -from pathlib import Path -from typing import Any, Callable, Dict, Optional, TextIO, Tuple, Union - -import numpy as np - -from pytorch_lightning.utilities import rank_zero_warn -from pytorch_lightning.utilities.cloud_io import get_filesystem - -log = logging.getLogger(__name__) - - -class AbstractProfiler(ABC): - """Specification of a profiler.""" - - @abstractmethod - def start(self, action_name: str) -> None: - """Defines how to start recording an action.""" - - @abstractmethod - def stop(self, action_name: str) -> None: - """Defines how to record the duration once an action is complete.""" - - @abstractmethod - def summary(self) -> str: - """Create profiler summary in text format.""" - - @abstractmethod - def setup(self, **kwargs: Any) -> None: - """Execute arbitrary pre-profiling set-up steps as defined by subclass.""" - - @abstractmethod - def teardown(self, **kwargs: Any) -> None: - """Execute arbitrary post-profiling tear-down steps as defined by subclass.""" - - -class BaseProfiler(AbstractProfiler): - """ - If you wish to write a custom profiler, you should inherit from this class. - """ - - def __init__( - self, - dirpath: Optional[Union[str, Path]] = None, - filename: Optional[str] = None, - output_filename: Optional[str] = None, - ) -> None: - self.dirpath = dirpath - self.filename = filename - if output_filename is not None: - rank_zero_warn( - "`Profiler` signature has changed in v1.3. The `output_filename` parameter has been removed in" - " favor of `dirpath` and `filename`. Support for the old signature will be removed in v1.5", - DeprecationWarning - ) - filepath = Path(output_filename) - self.dirpath = filepath.parent - self.filename = filepath.stem - - self._output_file: Optional[TextIO] = None - self._write_stream: Optional[Callable] = None - self._local_rank: Optional[int] = None - self._log_dir: Optional[str] = None - self._stage: Optional[str] = None - - @contextmanager - def profile(self, action_name: str) -> None: - """ - Yields a context manager to encapsulate the scope of a profiled action. - - Example:: - - with self.profile('load training data'): - # load training data code - - The profiler will start once you've entered the context and will automatically - stop once you exit the code block. - """ - try: - self.start(action_name) - yield action_name - finally: - self.stop(action_name) - - def profile_iterable(self, iterable, action_name: str) -> None: - iterator = iter(iterable) - while True: - try: - self.start(action_name) - value = next(iterator) - self.stop(action_name) - yield value - except StopIteration: - self.stop(action_name) - break - - def _rank_zero_info(self, *args, **kwargs) -> None: - if self._local_rank in (None, 0): - log.info(*args, **kwargs) - - def _prepare_filename(self, extension: str = ".txt") -> str: - filename = "" - if self._stage is not None: - filename += f"{self._stage}-" - filename += str(self.filename) - if self._local_rank is not None: - filename += f"-{self._local_rank}" - filename += extension - return filename - - def _prepare_streams(self) -> None: - if self._write_stream is not None: - return - if self.filename: - filepath = os.path.join(self.dirpath, self._prepare_filename()) - fs = get_filesystem(filepath) - file = fs.open(filepath, "a") - self._output_file = file - self._write_stream = file.write - else: - self._write_stream = self._rank_zero_info - - def describe(self) -> None: - """Logs a profile report after the conclusion of run.""" - # there are pickling issues with open file handles in Python 3.6 - # so to avoid them, we open and close the files within this function - # by calling `_prepare_streams` and `teardown` - self._prepare_streams() - summary = self.summary() - if summary: - self._write_stream(summary) - if self._output_file is not None: - self._output_file.flush() - self.teardown(stage=self._stage) - - def _stats_to_str(self, stats: Dict[str, str]) -> str: - stage = f"{self._stage.upper()} " if self._stage is not None else "" - output = [stage + "Profiler Report"] - for action, value in stats.items(): - header = f"Profile stats for: {action}" - if self._local_rank is not None: - header += f" rank: {self._local_rank}" - output.append(header) - output.append(value) - return os.linesep.join(output) - - def setup( - self, - stage: Optional[str] = None, - local_rank: Optional[int] = None, - log_dir: Optional[str] = None, - ) -> None: - """Execute arbitrary pre-profiling set-up steps.""" - self._stage = stage - self._local_rank = local_rank - self._log_dir = log_dir - self.dirpath = self.dirpath or log_dir - - def teardown(self, stage: Optional[str] = None) -> None: - """ - Execute arbitrary post-profiling tear-down steps. - - Closes the currently open file and stream. - """ - self._write_stream = None - if self._output_file is not None: - self._output_file.close() - self._output_file = None # can't pickle TextIOWrapper - - def __del__(self) -> None: - self.teardown(stage=self._stage) - - def start(self, action_name: str) -> None: - raise NotImplementedError - - def stop(self, action_name: str) -> None: - raise NotImplementedError - - def summary(self) -> str: - raise NotImplementedError - - @property - def local_rank(self) -> int: - return 0 if self._local_rank is None else self._local_rank - - -class PassThroughProfiler(BaseProfiler): - """ - This class should be used when you don't want the (small) overhead of profiling. - The Trainer uses this class by default. - """ - - def start(self, action_name: str) -> None: - pass - - def stop(self, action_name: str) -> None: - pass - - def summary(self) -> str: - return "" - - -class SimpleProfiler(BaseProfiler): - """ - This profiler simply records the duration of actions (in seconds) and reports - the mean duration of each action and the total time spent over the entire training run. - """ - - def __init__( - self, - dirpath: Optional[Union[str, Path]] = None, - filename: Optional[str] = None, - extended: bool = True, - output_filename: Optional[str] = None, - ) -> None: - """ - Args: - dirpath: Directory path for the ``filename``. If ``dirpath`` is ``None`` but ``filename`` is present, the - ``trainer.log_dir`` (from :class:`~pytorch_lightning.loggers.tensorboard.TensorBoardLogger`) - will be used. - - filename: If present, filename where the profiler results will be saved instead of printing to stdout. - The ``.txt`` extension will be used automatically. - - Raises: - ValueError: - If you attempt to start an action which has already started, or - if you attempt to stop recording an action which was never started. - """ - super().__init__(dirpath=dirpath, filename=filename, output_filename=output_filename) - self.current_actions: Dict[str, float] = {} - self.recorded_durations = defaultdict(list) - self.extended = extended - self.start_time = time.monotonic() - - def start(self, action_name: str) -> None: - if action_name in self.current_actions: - raise ValueError(f"Attempted to start {action_name} which has already started.") - self.current_actions[action_name] = time.monotonic() - - def stop(self, action_name: str) -> None: - end_time = time.monotonic() - if action_name not in self.current_actions: - raise ValueError(f"Attempting to stop recording an action ({action_name}) which was never started.") - start_time = self.current_actions.pop(action_name) - duration = end_time - start_time - self.recorded_durations[action_name].append(duration) - - def _make_report(self) -> Tuple[list, float]: - total_duration = time.monotonic() - self.start_time - report = [[a, d, 100. * np.sum(d) / total_duration] for a, d in self.recorded_durations.items()] - report.sort(key=lambda x: x[2], reverse=True) - return report, total_duration - - def summary(self) -> str: - sep = os.linesep - output_string = "" - if self._stage is not None: - output_string += f"{self._stage.upper()} " - output_string += f"Profiler Report{sep}" - - if self.extended: - - if len(self.recorded_durations) > 0: - max_key = np.max([len(k) for k in self.recorded_durations.keys()]) - - def log_row(action, mean, num_calls, total, per): - row = f"{sep}{action:<{max_key}s}\t| {mean:<15}\t|" - row += f"{num_calls:<15}\t| {total:<15}\t| {per:<15}\t|" - return row - - output_string += log_row("Action", "Mean duration (s)", "Num calls", "Total time (s)", "Percentage %") - output_string_len = len(output_string) - output_string += f"{sep}{'-' * output_string_len}" - report, total_duration = self._make_report() - output_string += log_row("Total", "-", "_", f"{total_duration:.5}", "100 %") - output_string += f"{sep}{'-' * output_string_len}" - for action, durations, duration_per in report: - output_string += log_row( - action, - f"{np.mean(durations):.5}", - f"{len(durations):}", - f"{np.sum(durations):.5}", - f"{duration_per:.5}", - ) - else: - - def log_row(action, mean, total): - return f"{sep}{action:<20s}\t| {mean:<15}\t| {total:<15}" - - output_string += log_row("Action", "Mean duration (s)", "Total time (s)") - output_string += f"{sep}{'-' * 65}" - - for action, durations in self.recorded_durations.items(): - output_string += log_row(action, f"{np.mean(durations):.5}", f"{np.sum(durations):.5}") - output_string += sep - return output_string - - -class AdvancedProfiler(BaseProfiler): - """ - This profiler uses Python's cProfiler to record more detailed information about - time spent in each function call recorded during a given action. The output is quite - verbose and you should only use this if you want very detailed reports. - """ - - def __init__( - self, - dirpath: Optional[Union[str, Path]] = None, - filename: Optional[str] = None, - line_count_restriction: float = 1.0, - output_filename: Optional[str] = None, - ) -> None: - """ - Args: - dirpath: Directory path for the ``filename``. If ``dirpath`` is ``None`` but ``filename`` is present, the - ``trainer.log_dir`` (from :class:`~pytorch_lightning.loggers.tensorboard.TensorBoardLogger`) - will be used. - - filename: If present, filename where the profiler results will be saved instead of printing to stdout. - The ``.txt`` extension will be used automatically. - - line_count_restriction: this can be used to limit the number of functions - reported for each action. either an integer (to select a count of lines), - or a decimal fraction between 0.0 and 1.0 inclusive (to select a percentage of lines) - - Raises: - ValueError: - If you attempt to stop recording an action which was never started. - """ - super().__init__(dirpath=dirpath, filename=filename, output_filename=output_filename) - self.profiled_actions: Dict[str, cProfile.Profile] = {} - self.line_count_restriction = line_count_restriction - - def start(self, action_name: str) -> None: - if action_name not in self.profiled_actions: - self.profiled_actions[action_name] = cProfile.Profile() - self.profiled_actions[action_name].enable() - - def stop(self, action_name: str) -> None: - pr = self.profiled_actions.get(action_name) - if pr is None: - raise ValueError(f"Attempting to stop recording an action ({action_name}) which was never started.") - pr.disable() - - def summary(self) -> str: - recorded_stats = {} - for action_name, pr in self.profiled_actions.items(): - s = io.StringIO() - ps = pstats.Stats(pr, stream=s).strip_dirs().sort_stats('cumulative') - ps.print_stats(self.line_count_restriction) - recorded_stats[action_name] = s.getvalue() - return self._stats_to_str(recorded_stats) - - def teardown(self, stage: Optional[str] = None) -> None: - super().teardown(stage=stage) - self.profiled_actions = {} - - def __reduce__(self): - # avoids `TypeError: cannot pickle 'cProfile.Profile' object` - return ( - self.__class__, - tuple(), - dict(dirpath=self.dirpath, filename=self.filename, line_count_restriction=self.line_count_restriction), - ) diff --git a/pytorch_lightning/profiler/pytorch.py b/pytorch_lightning/profiler/pytorch.py index 5b3a5a105cf3a..b78922d7f4a47 100644 --- a/pytorch_lightning/profiler/pytorch.py +++ b/pytorch_lightning/profiler/pytorch.py @@ -23,7 +23,7 @@ from torch import nn, Tensor from torch.autograd.profiler import record_function -from pytorch_lightning.profiler.profilers import BaseProfiler +from pytorch_lightning.profiler.base import BaseProfiler from pytorch_lightning.utilities.distributed import rank_zero_warn from pytorch_lightning.utilities.exceptions import MisconfigurationException from pytorch_lightning.utilities.imports import _KINETO_AVAILABLE diff --git a/pytorch_lightning/profiler/simple.py b/pytorch_lightning/profiler/simple.py new file mode 100644 index 0000000000000..7fb8ac5be0c92 --- /dev/null +++ b/pytorch_lightning/profiler/simple.py @@ -0,0 +1,123 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Profiler to check if there are any bottlenecks in your code.""" +import logging +import os +import time +from collections import defaultdict +from pathlib import Path +from typing import Dict, Optional, Tuple, Union + +import numpy as np + +from pytorch_lightning.profiler.base import BaseProfiler + +log = logging.getLogger(__name__) + + +class SimpleProfiler(BaseProfiler): + """ + This profiler simply records the duration of actions (in seconds) and reports + the mean duration of each action and the total time spent over the entire training run. + """ + + def __init__( + self, + dirpath: Optional[Union[str, Path]] = None, + filename: Optional[str] = None, + extended: bool = True, + output_filename: Optional[str] = None, + ) -> None: + """ + Args: + dirpath: Directory path for the ``filename``. If ``dirpath`` is ``None`` but ``filename`` is present, the + ``trainer.log_dir`` (from :class:`~pytorch_lightning.loggers.tensorboard.TensorBoardLogger`) + will be used. + + filename: If present, filename where the profiler results will be saved instead of printing to stdout. + The ``.txt`` extension will be used automatically. + + Raises: + ValueError: + If you attempt to start an action which has already started, or + if you attempt to stop recording an action which was never started. + """ + super().__init__(dirpath=dirpath, filename=filename, output_filename=output_filename) + self.current_actions: Dict[str, float] = {} + self.recorded_durations = defaultdict(list) + self.extended = extended + self.start_time = time.monotonic() + + def start(self, action_name: str) -> None: + if action_name in self.current_actions: + raise ValueError(f"Attempted to start {action_name} which has already started.") + self.current_actions[action_name] = time.monotonic() + + def stop(self, action_name: str) -> None: + end_time = time.monotonic() + if action_name not in self.current_actions: + raise ValueError(f"Attempting to stop recording an action ({action_name}) which was never started.") + start_time = self.current_actions.pop(action_name) + duration = end_time - start_time + self.recorded_durations[action_name].append(duration) + + def _make_report(self) -> Tuple[list, float]: + total_duration = time.monotonic() - self.start_time + report = [[a, d, 100. * np.sum(d) / total_duration] for a, d in self.recorded_durations.items()] + report.sort(key=lambda x: x[2], reverse=True) + return report, total_duration + + def summary(self) -> str: + sep = os.linesep + output_string = "" + if self._stage is not None: + output_string += f"{self._stage.upper()} " + output_string += f"Profiler Report{sep}" + + if self.extended: + + if len(self.recorded_durations) > 0: + max_key = np.max([len(k) for k in self.recorded_durations.keys()]) + + def log_row(action, mean, num_calls, total, per): + row = f"{sep}{action:<{max_key}s}\t| {mean:<15}\t|" + row += f"{num_calls:<15}\t| {total:<15}\t| {per:<15}\t|" + return row + + output_string += log_row("Action", "Mean duration (s)", "Num calls", "Total time (s)", "Percentage %") + output_string_len = len(output_string) + output_string += f"{sep}{'-' * output_string_len}" + report, total_duration = self._make_report() + output_string += log_row("Total", "-", "_", f"{total_duration:.5}", "100 %") + output_string += f"{sep}{'-' * output_string_len}" + for action, durations, duration_per in report: + output_string += log_row( + action, + f"{np.mean(durations):.5}", + f"{len(durations):}", + f"{np.sum(durations):.5}", + f"{duration_per:.5}", + ) + else: + + def log_row(action, mean, total): + return f"{sep}{action:<20s}\t| {mean:<15}\t| {total:<15}" + + output_string += log_row("Action", "Mean duration (s)", "Total time (s)") + output_string += f"{sep}{'-' * 65}" + + for action, durations in self.recorded_durations.items(): + output_string += log_row(action, f"{np.mean(durations):.5}", f"{np.sum(durations):.5}") + output_string += sep + return output_string From 3e6ec6000a400bb007fd2d1c2f0765440eb400b6 Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 4 Jun 2021 08:41:44 +0100 Subject: [PATCH 04/10] add changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a30ddc6530790..c0e743c5334d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -122,6 +122,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - `Trainer.fit` now raises an error when using manual optimization with unsupported features such as `gradient_clip_val` or `accumulate_grad_batches` ([#7788](https://github.com/PyTorchLightning/pytorch-lightning/pull/7788)) +- Moved profilers to their own file ([#7822](https://github.com/PyTorchLightning/pytorch-lightning/pull/7822)) + + ### Deprecated From fdcdc80e19d9d08d95934a8e742f7f03aa83589b Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 4 Jun 2021 09:22:01 +0100 Subject: [PATCH 05/10] add deprecated_call --- pytorch_lightning/profiler/profilers.py | 17 +++++++++++++++++ tests/deprecated_api/test_remove_1-6.py | 6 ++++++ 2 files changed, 23 insertions(+) create mode 100644 pytorch_lightning/profiler/profilers.py diff --git a/pytorch_lightning/profiler/profilers.py b/pytorch_lightning/profiler/profilers.py new file mode 100644 index 0000000000000..b29e1e4159f45 --- /dev/null +++ b/pytorch_lightning/profiler/profilers.py @@ -0,0 +1,17 @@ +from pytorch_lightning.utilities.distributed import rank_zero_deprecation + +rank_zero_deprecation( + "Using ``import pytorch_lightning.profiler.profilers`` is depreceated in v1.4, and will be removed in v1.6. " + "HINT: Use ``import pytorch_lightning.profiler`` directly." +) + +from pytorch_lightning.profiler.advanced import AdvancedProfiler # noqa E402 +from pytorch_lightning.profiler.base import BaseProfiler, PassThroughProfiler # noqa E402 +from pytorch_lightning.profiler.simple import SimpleProfiler # noqa E402 + +__all__ = [ + 'BaseProfiler', + 'SimpleProfiler', + 'AdvancedProfiler', + 'PassThroughProfiler', +] diff --git a/tests/deprecated_api/test_remove_1-6.py b/tests/deprecated_api/test_remove_1-6.py index 63b1c60fe7c62..455e2e7489249 100644 --- a/tests/deprecated_api/test_remove_1-6.py +++ b/tests/deprecated_api/test_remove_1-6.py @@ -87,3 +87,9 @@ def training_step(self, *args): trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True) with pytest.deprecated_call(match=r"tbptt_pad_token=...\)` is no longer supported"): trainer.fit(TestModel()) + + +def test_v1_6_0_profilers_import(tmpdir): + + with pytest.deprecated_call(match=r"depreceated in v1.4, and will be removed in v1.6"): + import pytorch_lightning.profiler.profilers # noqa F401 From f01f4ca82c6503d60c11fd3088c41055cf12aa0a Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 4 Jun 2021 10:49:41 +0100 Subject: [PATCH 06/10] update --- tests/deprecated_api/test_remove_1-6.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/deprecated_api/test_remove_1-6.py b/tests/deprecated_api/test_remove_1-6.py index 455e2e7489249..4245288825621 100644 --- a/tests/deprecated_api/test_remove_1-6.py +++ b/tests/deprecated_api/test_remove_1-6.py @@ -92,4 +92,4 @@ def training_step(self, *args): def test_v1_6_0_profilers_import(tmpdir): with pytest.deprecated_call(match=r"depreceated in v1.4, and will be removed in v1.6"): - import pytorch_lightning.profiler.profilers # noqa F401 + from pytorch_lightning.profiler.profilers import BaseProfiler # noqa F401 From a5ce896ba0a5da33cc04a69f28cfa6ee0256ea43 Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 4 Jun 2021 15:00:50 +0100 Subject: [PATCH 07/10] update --- docs/source/api_references.rst | 1 + pytorch_lightning/profiler/__init__.py | 5 +++-- pytorch_lightning/profiler/profilers.py | 13 ++++++++----- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/docs/source/api_references.rst b/docs/source/api_references.rst index 05e52ee542557..ff616f6e56f6c 100644 --- a/docs/source/api_references.rst +++ b/docs/source/api_references.rst @@ -139,6 +139,7 @@ Profiler API :nosignatures: :template: classtemplate.rst + AbstractProfiler AdvancedProfiler BaseProfiler PassThroughProfiler diff --git a/pytorch_lightning/profiler/__init__.py b/pytorch_lightning/profiler/__init__.py index 5bd2ff41aeefe..90fe55e0c19f1 100644 --- a/pytorch_lightning/profiler/__init__.py +++ b/pytorch_lightning/profiler/__init__.py @@ -195,14 +195,15 @@ def custom_processing_step(self, data): """ from pytorch_lightning.profiler.advanced import AdvancedProfiler -from pytorch_lightning.profiler.base import BaseProfiler, PassThroughProfiler +from pytorch_lightning.profiler.base import AbstractProfiler, BaseProfiler, PassThroughProfiler from pytorch_lightning.profiler.pytorch import PyTorchProfiler from pytorch_lightning.profiler.simple import SimpleProfiler __all__ = [ + 'AbstractProfiler', 'BaseProfiler', - 'SimpleProfiler', 'AdvancedProfiler', 'PassThroughProfiler', 'PyTorchProfiler', + 'SimpleProfiler', ] diff --git a/pytorch_lightning/profiler/profilers.py b/pytorch_lightning/profiler/profilers.py index b29e1e4159f45..2ffbf185ec28d 100644 --- a/pytorch_lightning/profiler/profilers.py +++ b/pytorch_lightning/profiler/profilers.py @@ -1,17 +1,20 @@ -from pytorch_lightning.utilities.distributed import rank_zero_deprecation +from pytorch_lightning.utilities.distributed import rank_zero_warn -rank_zero_deprecation( +rank_zero_warn( "Using ``import pytorch_lightning.profiler.profilers`` is depreceated in v1.4, and will be removed in v1.6. " - "HINT: Use ``import pytorch_lightning.profiler`` directly." + "HINT: Use ``import pytorch_lightning.profiler`` directly.", DeprecationWarning ) from pytorch_lightning.profiler.advanced import AdvancedProfiler # noqa E402 -from pytorch_lightning.profiler.base import BaseProfiler, PassThroughProfiler # noqa E402 +from pytorch_lightning.profiler.base import AbstractProfiler, BaseProfiler, PassThroughProfiler # noqa E402 +from pytorch_lightning.profiler.pytorch import PyTorchProfiler # noqa E402 from pytorch_lightning.profiler.simple import SimpleProfiler # noqa E402 __all__ = [ + 'AbstractProfiler', 'BaseProfiler', - 'SimpleProfiler', 'AdvancedProfiler', 'PassThroughProfiler', + 'PyTorchProfiler', + 'SimpleProfiler', ] From 1992271a821adfb7968ac7c7a418056f2d566a57 Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 4 Jun 2021 15:49:06 +0100 Subject: [PATCH 08/10] udpate --- pytorch_lightning/profiler/profilers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pytorch_lightning/profiler/profilers.py b/pytorch_lightning/profiler/profilers.py index 2ffbf185ec28d..c97dab0c8968b 100644 --- a/pytorch_lightning/profiler/profilers.py +++ b/pytorch_lightning/profiler/profilers.py @@ -1,8 +1,8 @@ -from pytorch_lightning.utilities.distributed import rank_zero_warn +from pytorch_lightning.utilities.distributed import rank_zero_deprecation -rank_zero_warn( +rank_zero_deprecation( "Using ``import pytorch_lightning.profiler.profilers`` is depreceated in v1.4, and will be removed in v1.6. " - "HINT: Use ``import pytorch_lightning.profiler`` directly.", DeprecationWarning + "HINT: Use ``import pytorch_lightning.profiler`` directly." ) from pytorch_lightning.profiler.advanced import AdvancedProfiler # noqa E402 From aea7ba506ae13eaa4ef3a5c3deef985088a1e072 Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 4 Jun 2021 15:50:18 +0100 Subject: [PATCH 09/10] update --- tests/deprecated_api/test_remove_1-6.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/deprecated_api/test_remove_1-6.py b/tests/deprecated_api/test_remove_1-6.py index 4245288825621..d3f57dcbc903b 100644 --- a/tests/deprecated_api/test_remove_1-6.py +++ b/tests/deprecated_api/test_remove_1-6.py @@ -13,6 +13,9 @@ # limitations under the License. """ Test deprecated functionality which will be removed in v1.6.0 """ +import os +from unittest import mock + import pytest from pytorch_lightning import Trainer @@ -89,6 +92,7 @@ def training_step(self, *args): trainer.fit(TestModel()) +@mock.patch.dict(os.environ, {"PYTHONWARNINGS": "default"}) def test_v1_6_0_profilers_import(tmpdir): with pytest.deprecated_call(match=r"depreceated in v1.4, and will be removed in v1.6"): From 8d4a2995ce57239492131f55aa7eabac838a9783 Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 4 Jun 2021 16:03:09 +0100 Subject: [PATCH 10/10] remvove test --- tests/deprecated_api/test_remove_1-6.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/tests/deprecated_api/test_remove_1-6.py b/tests/deprecated_api/test_remove_1-6.py index d3f57dcbc903b..7ca0939fd60d2 100644 --- a/tests/deprecated_api/test_remove_1-6.py +++ b/tests/deprecated_api/test_remove_1-6.py @@ -12,10 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. """ Test deprecated functionality which will be removed in v1.6.0 """ - -import os -from unittest import mock - import pytest from pytorch_lightning import Trainer @@ -90,10 +86,3 @@ def training_step(self, *args): trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True) with pytest.deprecated_call(match=r"tbptt_pad_token=...\)` is no longer supported"): trainer.fit(TestModel()) - - -@mock.patch.dict(os.environ, {"PYTHONWARNINGS": "default"}) -def test_v1_6_0_profilers_import(tmpdir): - - with pytest.deprecated_call(match=r"depreceated in v1.4, and will be removed in v1.6"): - from pytorch_lightning.profiler.profilers import BaseProfiler # noqa F401