Merge pull request #195 from WenjieDu/lr_scheduler

Add learning-rate schedulers
WenjieDu · Sep 28, 2023 · fc79142 · fc79142
2 parents 28b9fdc + 1798ecf
commit fc79142
Show file tree

Hide file tree

Showing 18 changed files with 1,053 additions and 13 deletions.
diff --git a/docs/pypots.optim.rst b/docs/pypots.optim.rst
@@ -54,3 +54,12 @@ pypots.optim.base module
    :undoc-members:
    :show-inheritance:
    :inherited-members:
+
+pypots.optim.lr_scheduler module
+------------------------------
+
+.. automodule:: pypots.optim.lr_scheduler
+   :members:
+   :undoc-members:
+   :show-inheritance:
+   :inherited-members:
diff --git a/pypots/optim/adadelta.py b/pypots/optim/adadelta.py
@@ -6,11 +6,12 @@
 # Created by Wenjie Du <wenjay.du@gmail.com>
 # License: GLP-v3
 
-from typing import Iterable
+from typing import Iterable, Optional
 
 from torch.optim import Adadelta as torch_Adadelta
 
 from .base import Optimizer
+from .lr_scheduler.base import LRScheduler
 
 
 class Adadelta(Optimizer):
@@ -39,8 +40,9 @@ def __init__(
         rho: float = 0.9,
         eps: float = 1e-08,
         weight_decay: float = 0.01,
+        lr_scheduler: Optional[LRScheduler] = None,
     ):
-        super().__init__(lr)
+        super().__init__(lr, lr_scheduler)
         self.rho = rho
         self.eps = eps
         self.weight_decay = weight_decay
@@ -61,3 +63,6 @@ def init_optimizer(self, params: Iterable) -> None:
             eps=self.eps,
             weight_decay=self.weight_decay,
         )
+
+        if self.lr_scheduler is not None:
+            self.lr_scheduler.init_scheduler(self.torch_optimizer)
diff --git a/pypots/optim/adagrad.py b/pypots/optim/adagrad.py
@@ -6,11 +6,12 @@
 # Created by Wenjie Du <wenjay.du@gmail.com>
 # License: GLP-v3
 
-from typing import Iterable
+from typing import Iterable, Optional
 
 from torch.optim import Adagrad as torch_Adagrad
 
 from .base import Optimizer
+from .lr_scheduler.base import LRScheduler
 
 
 class Adagrad(Optimizer):
@@ -43,8 +44,9 @@ def __init__(
         weight_decay: float = 0.01,
         initial_accumulator_value: float = 0.01,  # it is set as 0 in the torch implementation, but delta shouldn't be 0
         eps: float = 1e-08,
+        lr_scheduler: Optional[LRScheduler] = None,
     ):
-        super().__init__(lr)
+        super().__init__(lr, lr_scheduler)
         self.lr_decay = lr_decay
         self.weight_decay = weight_decay
         self.initial_accumulator_value = initial_accumulator_value
@@ -67,3 +69,6 @@ def init_optimizer(self, params: Iterable) -> None:
             initial_accumulator_value=self.initial_accumulator_value,
             eps=self.eps,
         )
+
+        if self.lr_scheduler is not None:
+            self.lr_scheduler.init_scheduler(self.torch_optimizer)
diff --git a/pypots/optim/adam.py b/pypots/optim/adam.py
@@ -6,11 +6,12 @@
 # Created by Wenjie Du <wenjay.du@gmail.com>
 # License: GLP-v3
 
-from typing import Iterable, Tuple
+from typing import Iterable, Tuple, Optional
 
 from torch.optim import Adam as torch_Adam
 
 from .base import Optimizer
+from .lr_scheduler.base import LRScheduler
 
 
 class Adam(Optimizer):
@@ -42,8 +43,9 @@ def __init__(
         eps: float = 1e-08,
         weight_decay: float = 0,
         amsgrad: bool = False,
+        lr_scheduler: Optional[LRScheduler] = None,
     ):
-        super().__init__(lr)
+        super().__init__(lr, lr_scheduler)
         self.betas = betas
         self.eps = eps
         self.weight_decay = weight_decay
@@ -66,3 +68,6 @@ def init_optimizer(self, params: Iterable) -> None:
             weight_decay=self.weight_decay,
             amsgrad=self.amsgrad,
         )
+
+        if self.lr_scheduler is not None:
+            self.lr_scheduler.init_scheduler(self.torch_optimizer)
diff --git a/pypots/optim/adamw.py b/pypots/optim/adamw.py
@@ -6,11 +6,12 @@
 # Created by Wenjie Du <wenjay.du@gmail.com>
 # License: GLP-v3
 
-from typing import Iterable, Tuple
+from typing import Iterable, Tuple, Optional
 
 from torch.optim import AdamW as torch_AdamW
 
 from .base import Optimizer
+from .lr_scheduler.base import LRScheduler
 
 
 class AdamW(Optimizer):
@@ -42,8 +43,9 @@ def __init__(
         eps: float = 1e-08,
         weight_decay: float = 0.01,
         amsgrad: bool = False,
+        lr_scheduler: Optional[LRScheduler] = None,
     ):
-        super().__init__(lr)
+        super().__init__(lr, lr_scheduler)
         self.betas = betas
         self.eps = eps
         self.weight_decay = weight_decay
@@ -66,3 +68,6 @@ def init_optimizer(self, params: Iterable) -> None:
             weight_decay=self.weight_decay,
             amsgrad=self.amsgrad,
         )
+
+        if self.lr_scheduler is not None:
+            self.lr_scheduler.init_scheduler(self.torch_optimizer)
diff --git a/pypots/optim/base.py b/pypots/optim/base.py
@@ -19,6 +19,8 @@
 from abc import ABC, abstractmethod
 from typing import Callable, Iterable, Optional
 
+from .lr_scheduler.base import LRScheduler
+
 
 class Optimizer(ABC):
     """The base wrapper for PyTorch optimizers, also is the base class for all optimizers in pypots.optim.
@@ -35,9 +37,10 @@ class Optimizer(ABC):
 
     """
 
-    def __init__(self, lr):
+    def __init__(self, lr, lr_scheduler: Optional[LRScheduler] = None):
         self.lr = lr
         self.torch_optimizer = None
+        self.lr_scheduler = lr_scheduler
 
     @abstractmethod
     def init_optimizer(self, params: Iterable) -> None:
@@ -97,6 +100,9 @@ def step(self, closure: Optional[Callable] = None) -> None:
         """
         self.torch_optimizer.step(closure)
 
+        if self.lr_scheduler is not None:
+            self.lr_scheduler.step()
+
     def zero_grad(self, set_to_none: bool = True) -> None:
         """Sets the gradients of all optimized ``torch.Tensor`` to zero.
 

diff --git a/pypots/optim/lr_scheduler/__init__.py b/pypots/optim/lr_scheduler/__init__.py
@@ -0,0 +1,29 @@
+"""
+Learning rate schedulers available in PyPOTS. Their functionalities are the same with those in PyTorch,
+the only difference that is also why we implement them is that you don't have to pass according optimizers
+into them immediately while initializing them. Instead, you can pass them into pypots.optim.Optimizer
+after initialization and call their `init_scheduler()` method in pypots.optim.Optimizer.init_optimizer() to initialize
+schedulers together with optimizers.
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: GLP-v3
+
+from .lambda_lrs import LambdaLR
+from .multiplicative_lrs import MultiplicativeLR
+from .step_lrs import StepLR
+from .multistep_lrs import MultiStepLR
+from .constant_lrs import ConstantLR
+from .exponential_lrs import ExponentialLR
+from .linear_lrs import LinearLR
+
+
+__all__ = [
+    "LambdaLR",
+    "MultiplicativeLR",
+    "StepLR",
+    "MultiStepLR",
+    "ConstantLR",
+    "ExponentialLR",
+    "LinearLR",
+]
diff --git a/pypots/optim/lr_scheduler/base.py b/pypots/optim/lr_scheduler/base.py
@@ -0,0 +1,162 @@
+"""
+The base class for learning rate schedulers. This class is adapted from PyTorch,
+please refer to torch.optim.lr_scheduler for more details.
+"""
+
+# Created by Wenjie Du <wenjay.du@gmail.com>
+# License: GLP-v3
+
+import weakref
+from abc import ABC, abstractmethod
+from functools import wraps
+
+from torch.optim import Optimizer
+
+from ...utils.logging import logger
+
+
+class LRScheduler(ABC):
+    """Base class for PyPOTS learning rate schedulers.
+
+    Parameters
+    ----------
+    last_epoch: int
+        The index of last epoch. Default: -1.
+
+    verbose: If ``True``, prints a message to stdout for
+        each update. Default: ``False``.
+
+    """
+
+    def __init__(self, last_epoch=-1, verbose=False):
+        self.last_epoch = last_epoch
+        self.verbose = verbose
+        self.optimizer = None
+        self.base_lrs = None
+        self._last_lr = None
+        self._step_count = 0
+
+    def init_scheduler(self, optimizer):
+        """Initialize the scheduler. This method should be called in pypots.optim.Optimizer.init_optimizer()
+        to initialize the scheduler together with the optimizer.
+
+        Parameters
+        ----------
+        optimizer: torch.optim.Optimizer,
+            The optimizer to be scheduled.
+
+        """
+
+        # Attach optimizer
+        if not isinstance(optimizer, Optimizer):
+            raise TypeError("{} is not an Optimizer".format(type(optimizer).__name__))
+        self.optimizer = optimizer
+
+        # Initialize epoch and base learning rates
+        if self.last_epoch == -1:
+            for group in optimizer.param_groups:
+                group.setdefault("initial_lr", group["lr"])
+        else:
+            for i, group in enumerate(optimizer.param_groups):
+                if "initial_lr" not in group:
+                    raise KeyError(
+                        "param 'initial_lr' is not specified "
+                        "in param_groups[{}] when resuming an optimizer".format(i)
+                    )
+        self.base_lrs = [group["initial_lr"] for group in optimizer.param_groups]
+
+        # Following https://github.com/pytorch/pytorch/issues/20124
+        # We would like to ensure that `lr_scheduler.step()` is called after
+        # `optimizer.step()`
+        def with_counter(method):
+            if getattr(method, "_with_counter", False):
+                # `optimizer.step()` has already been replaced, return.
+                return method
+
+            # Keep a weak reference to the optimizer instance to prevent
+            # cyclic references.
+            instance_ref = weakref.ref(method.__self__)
+            # Get the unbound method for the same purpose.
+            func = method.__func__
+            cls = instance_ref().__class__
+            del method
+
+            @wraps(func)
+            def wrapper(*args, **kwargs):
+                instance = instance_ref()
+                instance._step_count += 1
+                wrapped = func.__get__(instance, cls)
+                return wrapped(*args, **kwargs)
+
+            # Note that the returned function here is no longer a bound method,
+            # so attributes like `__func__` and `__self__` no longer exist.
+            wrapper._with_counter = True
+            return wrapper
+
+        self.optimizer.step = with_counter(self.optimizer.step)
+        self.optimizer._step_count = 0
+
+    @abstractmethod
+    def get_lr(self):
+        """Compute learning rate."""
+        # Compute learning rate using chainable form of the scheduler
+        raise NotImplementedError
+
+    def get_last_lr(self):
+        """Return last computed learning rate by current scheduler."""
+        return self._last_lr
+
+    @staticmethod
+    def print_lr(is_verbose, group, lr):
+        """Display the current learning rate."""
+        if is_verbose:
+            logger.info(f"Adjusting learning rate of group {group} to {lr:.4e}.")
+
+    def step(self):
+        """Step could be called after every batch update. This should be called in ``pypots.optim.Optimizer.step()``
+        after ``pypots.optim.Optimizer.torch_optimizer.step()``.
+        """
+        # Raise a warning if old pattern is detected
+        # https://github.com/pytorch/pytorch/issues/20124
+        if self._step_count == 1:
+            if not hasattr(self.optimizer.step, "_with_counter"):
+                logger.warning(
+                    "Seems like `optimizer.step()` has been overridden after learning rate scheduler "
+                    "initialization. Please, make sure to call `optimizer.step()` before "
+                    "`lr_scheduler.step()`. See more details at "
+                    "https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate",
+                )
+
+            # Just check if there were two first lr_scheduler.step() calls before optimizer.step()
+            elif self.optimizer._step_count < 1:
+                logger.warning.warn(
+                    "Detected call of `lr_scheduler.step()` before `optimizer.step()`. "
+                    "In PyTorch 1.1.0 and later, you should call them in the opposite order: "
+                    "`optimizer.step()` before `lr_scheduler.step()`.  Failure to do this "
+                    "will result in PyTorch skipping the first value of the learning rate schedule. "
+                    "See more details at "
+                    "https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate",
+                )
+        self._step_count += 1
+
+        class _enable_get_lr_call:
+            def __init__(self, o):
+                self.o = o
+
+            def __enter__(self):
+                self.o._get_lr_called_within_step = True
+                return self
+
+            def __exit__(self, type, value, traceback):
+                self.o._get_lr_called_within_step = False
+
+        with _enable_get_lr_call(self):
+            self.last_epoch += 1
+            values = self.get_lr()
+
+        for i, data in enumerate(zip(self.optimizer.param_groups, values)):
+            param_group, lr = data
+            param_group["lr"] = lr
+            self.print_lr(self.verbose, i, lr)
+
+        self._last_lr = [group["lr"] for group in self.optimizer.param_groups]