Skip to content
This repository has been archived by the owner on Oct 9, 2023. It is now read-only.

Commit

Permalink
Default learning rate to None (default lr for the optimizer) (#1172)
Browse files Browse the repository at this point in the history
  • Loading branch information
ethanwharris authored Feb 15, 2022
1 parent 1b1b939 commit baf0b0d
Show file tree
Hide file tree
Showing 26 changed files with 70 additions and 37 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

- Renamed `ClassificationInput` to `ClassificationInputMixin` ([#1116](https://github.com/PyTorchLightning/lightning-flash/pull/1116))

- Changed the default `learning_rate` for all tasks to be `None`, corresponding to the default for your chosen optimizer ([#1172](https://github.com/PyTorchLightning/lightning-flash/pull/1172))

### Deprecated

### Fixed
Expand Down
2 changes: 1 addition & 1 deletion flash/audio/speech_recognition/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def __init__(
processor_backbone: str = None,
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
learning_rate: float = 1e-5,
learning_rate: Optional[float] = None,
):
os.environ["TOKENIZERS_PARALLELISM"] = "TRUE"
# disable HF thousand warnings
Expand Down
12 changes: 7 additions & 5 deletions flash/core/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,8 @@ class Task(DatasetProcessor, ModuleWrapperBase, LightningModule, FineTuningHooks
Args:
model: Model to use for the task.
loss_fn: Loss function for training.
learning_rate: Learning rate to use for training, defaults to ``5e-5``.
learning_rate: Learning rate to use for training. If ``None`` (the default) then the default LR for your chosen
optimizer will be used.
optimizer: Optimizer to use for training.
lr_scheduler: The LR scheduler to use during training.
metrics: Metrics to compute for training and evaluation. Can either be an metric from the `torchmetrics`
Expand All @@ -305,7 +306,7 @@ def __init__(
self,
model: MODEL_TYPE = None,
loss_fn: LOSS_FN_TYPE = None,
learning_rate: float = 5e-5,
learning_rate: Optional[float] = None,
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
metrics: METRICS_TYPE = None,
Expand Down Expand Up @@ -458,12 +459,11 @@ def _get_optimizer_class_from_registry(self, optimizer_key: str) -> Optimizer:

def configure_optimizers(self) -> Union[Optimizer, Tuple[List[Optimizer], List[_LRScheduler]]]:
"""Implement how optimizer and optionally learning rate schedulers should be configured."""
optimizers_kwargs: Dict[str, Any] = {}
if isinstance(self.optimizer, str):
optimizer_fn = self._get_optimizer_class_from_registry(self.optimizer.lower())
optimizers_kwargs: Dict[str, Any] = {"lr": self.learning_rate}
elif isinstance(self.optimizer, Callable):
optimizer_fn = self.optimizer
optimizers_kwargs: Dict[str, Any] = {"lr": self.learning_rate}
elif isinstance(self.optimizer, Tuple):
if len(self.optimizer) != 2:
raise MisconfigurationException(
Expand All @@ -485,13 +485,15 @@ def configure_optimizers(self) -> Union[Optimizer, Tuple[List[Optimizer], List[_

optimizer_fn: Callable = self._get_optimizer_class_from_registry(self.optimizer[0])
optimizers_kwargs: Dict[str, Any] = self.optimizer[1]
optimizers_kwargs["lr"] = self.learning_rate
else:
raise TypeError(
f"""Optimizer should be of type string or callable or tuple(string, dictionary)
but got {type(self.optimizer)}."""
)

if self.learning_rate is not None:
optimizers_kwargs["lr"] = self.learning_rate

model_parameters = filter(lambda p: p.requires_grad, self.parameters())
optimizer: Optimizer = optimizer_fn(model_parameters, **optimizers_kwargs)
if self.lr_scheduler is not None:
Expand Down
12 changes: 11 additions & 1 deletion flash/core/optimizers/optimizers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from functools import partial
from inspect import isclass
from typing import Callable, List

Expand All @@ -17,7 +18,16 @@
_optimizers.append(_optimizer)

for fn in _optimizers:
_OPTIMIZERS_REGISTRY(fn, name=fn.__name__.lower())
name = fn.__name__.lower()
if name == "sgd":

def wrapper(fn, parameters, lr=None, **kwargs):
if lr is None:
raise TypeError("The `learning_rate` argument is required when the optimizer is SGD.")
return fn(parameters, lr, **kwargs)

fn = partial(wrapper, fn)
_OPTIMIZERS_REGISTRY(fn, name=name)


if _TORCH_OPTIMIZER_AVAILABLE:
Expand Down
2 changes: 1 addition & 1 deletion flash/graph/classification/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def __init__(
pooling_fn: Optional[Union[str, Callable]] = "mean",
head: Optional[Union[Callable, nn.Module]] = None,
loss_fn: LOSS_FN_TYPE = F.cross_entropy,
learning_rate: float = 1e-3,
learning_rate: Optional[float] = None,
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
metrics: METRICS_TYPE = None,
Expand Down
2 changes: 1 addition & 1 deletion flash/image/classification/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def __init__(
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
metrics: METRICS_TYPE = None,
learning_rate: float = 1e-3,
learning_rate: Optional[float] = None,
multi_label: bool = False,
training_strategy: Optional[str] = "default",
training_strategy_kwargs: Optional[Dict[str, Any]] = None,
Expand Down
2 changes: 1 addition & 1 deletion flash/image/detection/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def __init__(
pretrained: bool = True,
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
learning_rate: float = 1e-2,
learning_rate: Optional[float] = None,
predict_kwargs: Dict = None,
**kwargs: Any,
):
Expand Down
2 changes: 1 addition & 1 deletion flash/image/embedding/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def __init__(
pretrained: bool = False,
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
learning_rate: float = 1e-3,
learning_rate: Optional[float] = None,
backbone_kwargs: Optional[Dict[str, Any]] = None,
training_strategy_kwargs: Optional[Dict[str, Any]] = None,
pretraining_transform_kwargs: Optional[Dict[str, Any]] = None,
Expand Down
4 changes: 2 additions & 2 deletions flash/image/face_detection/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Any, Iterable, List, Union
from typing import Any, Iterable, List, Optional, Union

import torch
from torch.nn import Module
Expand Down Expand Up @@ -54,7 +54,7 @@ def __init__(
metrics: METRICS_TYPE = None,
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
learning_rate: float = 1e-4,
learning_rate: Optional[float] = None,
**kwargs: Any,
):
self.save_hyperparameters()
Expand Down
2 changes: 1 addition & 1 deletion flash/image/instance_segmentation/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def __init__(
pretrained: bool = True,
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
learning_rate: float = 5e-4,
learning_rate: Optional[float] = None,
output_transform: OUTPUT_TRANSFORM_TYPE = InstanceSegmentationOutputTransform(),
predict_kwargs: Dict = None,
**kwargs: Any,
Expand Down
2 changes: 1 addition & 1 deletion flash/image/keypoint_detection/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def __init__(
pretrained: bool = True,
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
learning_rate: float = 5e-4,
learning_rate: Optional[float] = None,
predict_kwargs: Dict = None,
**kwargs: Any,
):
Expand Down
5 changes: 3 additions & 2 deletions flash/image/segmentation/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ class SemanticSegmentation(ClassificationTask):
package, a custom metric inherenting from `torchmetrics.Metric`, a callable function or a list/dict
containing a combination of the aforementioned. In all cases, each metric needs to have the signature
`metric(preds,target)` and return a single scalar tensor. Defaults to :class:`torchmetrics.IOU`.
learning_rate: Learning rate to use for training.
learning_rate: Learning rate to use for training. If ``None`` (the default) then the default LR for your chosen
optimizer will be used.
multi_label: Whether the targets are multi-label or not.
output: The :class:`~flash.core.data.io.output.Output` to use when formatting prediction outputs.
output_transform: :class:`~flash.core.data.io.output_transform.OutputTransform` use for post processing samples.
Expand All @@ -101,7 +102,7 @@ def __init__(
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
metrics: METRICS_TYPE = None,
learning_rate: float = 1e-3,
learning_rate: Optional[float] = None,
multi_label: bool = False,
output_transform: OUTPUT_TRANSFORM_TYPE = None,
) -> None:
Expand Down
2 changes: 1 addition & 1 deletion flash/image/style_transfer/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def __init__(
style_weight: float = 1e10,
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
learning_rate: float = 1e-3,
learning_rate: Optional[float] = None,
):
self.save_hyperparameters(ignore="style_image")

Expand Down
2 changes: 1 addition & 1 deletion flash/pointcloud/detection/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def __init__(
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
metrics: METRICS_TYPE = None,
learning_rate: float = 1e-2,
learning_rate: Optional[float] = None,
lambda_loss_cls: float = 1.0,
lambda_loss_bbox: float = 1.0,
lambda_loss_dir: float = 1.0,
Expand Down
2 changes: 1 addition & 1 deletion flash/pointcloud/segmentation/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def __init__(
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
metrics: METRICS_TYPE = None,
learning_rate: float = 1e-2,
learning_rate: Optional[float] = None,
multi_label: bool = False,
):
import flash
Expand Down
2 changes: 1 addition & 1 deletion flash/tabular/classification/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def __init__(
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
metrics: METRICS_TYPE = None,
learning_rate: float = 5e-4,
learning_rate: Optional[float] = None,
**backbone_kwargs,
):
self.save_hyperparameters()
Expand Down
2 changes: 1 addition & 1 deletion flash/tabular/forecasting/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def __init__(
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
metrics: Union[torchmetrics.Metric, List[torchmetrics.Metric]] = None,
learning_rate: float = 4e-3,
learning_rate: Optional[float] = None,
):
self.save_hyperparameters()

Expand Down
2 changes: 1 addition & 1 deletion flash/tabular/regression/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def __init__(
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
metrics: METRICS_TYPE = None,
learning_rate: float = 1e-2,
learning_rate: Optional[float] = None,
**backbone_kwargs
):
self.save_hyperparameters()
Expand Down
2 changes: 1 addition & 1 deletion flash/template/classification/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def __init__(
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
metrics: METRICS_TYPE = None,
learning_rate: float = 1e-2,
learning_rate: Optional[float] = None,
multi_label: bool = False,
):
self.save_hyperparameters()
Expand Down
2 changes: 1 addition & 1 deletion flash/text/classification/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def __init__(
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
metrics: METRICS_TYPE = None,
learning_rate: float = 1e-2,
learning_rate: Optional[float] = None,
multi_label: bool = False,
enable_ort: bool = False,
):
Expand Down
2 changes: 1 addition & 1 deletion flash/text/question_answering/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def __init__(
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
metrics: METRICS_TYPE = None,
learning_rate: float = 5e-5,
learning_rate: Optional[float] = None,
enable_ort: bool = False,
n_best_size: int = 20,
version_2_with_negative: bool = True,
Expand Down
2 changes: 1 addition & 1 deletion flash/text/seq2seq/core/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def __init__(
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
metrics: METRICS_TYPE = None,
learning_rate: float = 5e-5,
learning_rate: Optional[float] = None,
num_beams: Optional[int] = None,
enable_ort: bool = False,
output_transform: Optional[OutputTransform] = None,
Expand Down
2 changes: 1 addition & 1 deletion flash/text/seq2seq/summarization/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def __init__(
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
metrics: METRICS_TYPE = None,
learning_rate: float = 1e-5,
learning_rate: Optional[float] = None,
num_beams: Optional[int] = 4,
use_stemmer: bool = True,
enable_ort: bool = False,
Expand Down
9 changes: 2 additions & 7 deletions flash/text/seq2seq/translation/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Any, Dict, List, Optional, Union
from typing import Any, Dict, Optional, Union

from torchmetrics import BLEUScore

Expand Down Expand Up @@ -56,7 +56,7 @@ def __init__(
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
metrics: METRICS_TYPE = None,
learning_rate: float = 1e-5,
learning_rate: Optional[float] = None,
num_beams: Optional[int] = 4,
n_gram: bool = 4,
smooth: bool = True,
Expand Down Expand Up @@ -99,8 +99,3 @@ def compute_metrics(self, generated_tokens, batch, prefix):
else:
result = self.bleu(reference_corpus, translate_corpus)
self.log(f"{prefix}_bleu_score", result, on_step=False, on_epoch=True, prog_bar=True)

@staticmethod
def _ci_benchmark_fn(history: List[Dict[str, Any]]):
"""This function is used only for debugging usage with CI."""
assert history[-1]["val_bleu_score"] > 0.6, history[-1]["val_bleu_score"]
4 changes: 2 additions & 2 deletions flash/video/classification/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,10 @@ def __init__(
backbone_kwargs: Optional[Dict] = None,
pretrained: bool = True,
loss_fn: LOSS_FN_TYPE = F.cross_entropy,
optimizer: OPTIMIZER_TYPE = "SGD",
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
metrics: METRICS_TYPE = Accuracy(),
learning_rate: float = 1e-3,
learning_rate: Optional[float] = None,
head: Optional[Union[FunctionType, nn.Module]] = None,
):
self.save_hyperparameters()
Expand Down
23 changes: 23 additions & 0 deletions tests/core/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
from itertools import chain
from numbers import Number
from typing import Any, Tuple
from unittest import mock
from unittest.mock import MagicMock

import pytest
import pytorch_lightning as pl
Expand All @@ -29,6 +31,7 @@
from torchmetrics import Accuracy

import flash
from flash import Task
from flash.audio import SpeechRecognition
from flash.core.adapter import Adapter
from flash.core.classification import ClassificationTask
Expand Down Expand Up @@ -357,6 +360,26 @@ def test_optimizers_and_schedulers(tmpdir, optim, sched, interval):
trainer.fit(task, train_dl)


def test_optimizer_learning_rate():
mock_optimizer = MagicMock()
Task.optimizers(mock_optimizer, "test")

model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10), nn.LogSoftmax())

ClassificationTask(model, optimizer="test").configure_optimizers()
mock_optimizer.assert_called_once_with(mock.ANY)

mock_optimizer.reset_mock()

ClassificationTask(model, optimizer="test", learning_rate=10).configure_optimizers()
mock_optimizer.assert_called_once_with(mock.ANY, lr=10)

mock_optimizer.reset_mock()

with pytest.raises(TypeError, match="The `learning_rate` argument is required"):
ClassificationTask(model, optimizer="sgd").configure_optimizers()


@pytest.mark.skipif(not _TORCH_OPTIMIZER_AVAILABLE, reason="torch_optimizer isn't installed.")
@pytest.mark.parametrize("optim", ["Yogi"])
def test_external_optimizers_torch_optimizer(tmpdir, optim):
Expand Down

0 comments on commit baf0b0d

Please sign in to comment.