Skip to content
This repository has been archived by the owner on Oct 9, 2023. It is now read-only.

Default learning rate to None (default lr for the optimizer) #1172

Merged
merged 5 commits into from
Feb 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

- Renamed `ClassificationInput` to `ClassificationInputMixin` ([#1116](https://github.com/PyTorchLightning/lightning-flash/pull/1116))

- Changed the default `learning_rate` for all tasks to be `None`, corresponding to the default for your chosen optimizer ([#1172](https://github.com/PyTorchLightning/lightning-flash/pull/1172))

### Deprecated

### Fixed
Expand Down
2 changes: 1 addition & 1 deletion flash/audio/speech_recognition/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def __init__(
processor_backbone: str = None,
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
learning_rate: float = 1e-5,
learning_rate: Optional[float] = None,
):
os.environ["TOKENIZERS_PARALLELISM"] = "TRUE"
# disable HF thousand warnings
Expand Down
12 changes: 7 additions & 5 deletions flash/core/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,8 @@ class Task(DatasetProcessor, ModuleWrapperBase, LightningModule, FineTuningHooks
Args:
model: Model to use for the task.
loss_fn: Loss function for training.
learning_rate: Learning rate to use for training, defaults to ``5e-5``.
learning_rate: Learning rate to use for training. If ``None`` (the default) then the default LR for your chosen
optimizer will be used.
optimizer: Optimizer to use for training.
lr_scheduler: The LR scheduler to use during training.
metrics: Metrics to compute for training and evaluation. Can either be an metric from the `torchmetrics`
Expand All @@ -305,7 +306,7 @@ def __init__(
self,
model: MODEL_TYPE = None,
loss_fn: LOSS_FN_TYPE = None,
learning_rate: float = 5e-5,
learning_rate: Optional[float] = None,
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
metrics: METRICS_TYPE = None,
Expand Down Expand Up @@ -458,12 +459,11 @@ def _get_optimizer_class_from_registry(self, optimizer_key: str) -> Optimizer:

def configure_optimizers(self) -> Union[Optimizer, Tuple[List[Optimizer], List[_LRScheduler]]]:
"""Implement how optimizer and optionally learning rate schedulers should be configured."""
optimizers_kwargs: Dict[str, Any] = {}
if isinstance(self.optimizer, str):
optimizer_fn = self._get_optimizer_class_from_registry(self.optimizer.lower())
optimizers_kwargs: Dict[str, Any] = {"lr": self.learning_rate}
elif isinstance(self.optimizer, Callable):
optimizer_fn = self.optimizer
optimizers_kwargs: Dict[str, Any] = {"lr": self.learning_rate}
elif isinstance(self.optimizer, Tuple):
if len(self.optimizer) != 2:
raise MisconfigurationException(
Expand All @@ -485,13 +485,15 @@ def configure_optimizers(self) -> Union[Optimizer, Tuple[List[Optimizer], List[_

optimizer_fn: Callable = self._get_optimizer_class_from_registry(self.optimizer[0])
optimizers_kwargs: Dict[str, Any] = self.optimizer[1]
optimizers_kwargs["lr"] = self.learning_rate
else:
raise TypeError(
f"""Optimizer should be of type string or callable or tuple(string, dictionary)
but got {type(self.optimizer)}."""
)

if self.learning_rate is not None:
optimizers_kwargs["lr"] = self.learning_rate

model_parameters = filter(lambda p: p.requires_grad, self.parameters())
optimizer: Optimizer = optimizer_fn(model_parameters, **optimizers_kwargs)
if self.lr_scheduler is not None:
Expand Down
12 changes: 11 additions & 1 deletion flash/core/optimizers/optimizers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from functools import partial
from inspect import isclass
from typing import Callable, List

Expand All @@ -17,7 +18,16 @@
_optimizers.append(_optimizer)

for fn in _optimizers:
_OPTIMIZERS_REGISTRY(fn, name=fn.__name__.lower())
name = fn.__name__.lower()
if name == "sgd":

def wrapper(fn, parameters, lr=None, **kwargs):
if lr is None:
raise TypeError("The `learning_rate` argument is required when the optimizer is SGD.")
return fn(parameters, lr, **kwargs)

fn = partial(wrapper, fn)
_OPTIMIZERS_REGISTRY(fn, name=name)


if _TORCH_OPTIMIZER_AVAILABLE:
Expand Down
2 changes: 1 addition & 1 deletion flash/graph/classification/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def __init__(
pooling_fn: Optional[Union[str, Callable]] = "mean",
head: Optional[Union[Callable, nn.Module]] = None,
loss_fn: LOSS_FN_TYPE = F.cross_entropy,
learning_rate: float = 1e-3,
learning_rate: Optional[float] = None,
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
metrics: METRICS_TYPE = None,
Expand Down
2 changes: 1 addition & 1 deletion flash/image/classification/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def __init__(
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
metrics: METRICS_TYPE = None,
learning_rate: float = 1e-3,
learning_rate: Optional[float] = None,
multi_label: bool = False,
training_strategy: Optional[str] = "default",
training_strategy_kwargs: Optional[Dict[str, Any]] = None,
Expand Down
2 changes: 1 addition & 1 deletion flash/image/detection/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def __init__(
pretrained: bool = True,
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
learning_rate: float = 1e-2,
learning_rate: Optional[float] = None,
predict_kwargs: Dict = None,
**kwargs: Any,
):
Expand Down
2 changes: 1 addition & 1 deletion flash/image/embedding/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def __init__(
pretrained: bool = False,
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
learning_rate: float = 1e-3,
learning_rate: Optional[float] = None,
backbone_kwargs: Optional[Dict[str, Any]] = None,
training_strategy_kwargs: Optional[Dict[str, Any]] = None,
pretraining_transform_kwargs: Optional[Dict[str, Any]] = None,
Expand Down
4 changes: 2 additions & 2 deletions flash/image/face_detection/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Any, Iterable, List, Union
from typing import Any, Iterable, List, Optional, Union

import torch
from torch.nn import Module
Expand Down Expand Up @@ -54,7 +54,7 @@ def __init__(
metrics: METRICS_TYPE = None,
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
learning_rate: float = 1e-4,
learning_rate: Optional[float] = None,
**kwargs: Any,
):
self.save_hyperparameters()
Expand Down
2 changes: 1 addition & 1 deletion flash/image/instance_segmentation/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def __init__(
pretrained: bool = True,
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
learning_rate: float = 5e-4,
learning_rate: Optional[float] = None,
output_transform: OUTPUT_TRANSFORM_TYPE = InstanceSegmentationOutputTransform(),
predict_kwargs: Dict = None,
**kwargs: Any,
Expand Down
2 changes: 1 addition & 1 deletion flash/image/keypoint_detection/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def __init__(
pretrained: bool = True,
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
learning_rate: float = 5e-4,
learning_rate: Optional[float] = None,
predict_kwargs: Dict = None,
**kwargs: Any,
):
Expand Down
5 changes: 3 additions & 2 deletions flash/image/segmentation/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ class SemanticSegmentation(ClassificationTask):
package, a custom metric inherenting from `torchmetrics.Metric`, a callable function or a list/dict
containing a combination of the aforementioned. In all cases, each metric needs to have the signature
`metric(preds,target)` and return a single scalar tensor. Defaults to :class:`torchmetrics.IOU`.
learning_rate: Learning rate to use for training.
learning_rate: Learning rate to use for training. If ``None`` (the default) then the default LR for your chosen
optimizer will be used.
multi_label: Whether the targets are multi-label or not.
output: The :class:`~flash.core.data.io.output.Output` to use when formatting prediction outputs.
output_transform: :class:`~flash.core.data.io.output_transform.OutputTransform` use for post processing samples.
Expand All @@ -101,7 +102,7 @@ def __init__(
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
metrics: METRICS_TYPE = None,
learning_rate: float = 1e-3,
learning_rate: Optional[float] = None,
multi_label: bool = False,
output_transform: OUTPUT_TRANSFORM_TYPE = None,
) -> None:
Expand Down
2 changes: 1 addition & 1 deletion flash/image/style_transfer/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def __init__(
style_weight: float = 1e10,
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
learning_rate: float = 1e-3,
learning_rate: Optional[float] = None,
):
self.save_hyperparameters(ignore="style_image")

Expand Down
2 changes: 1 addition & 1 deletion flash/pointcloud/detection/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def __init__(
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
metrics: METRICS_TYPE = None,
learning_rate: float = 1e-2,
learning_rate: Optional[float] = None,
lambda_loss_cls: float = 1.0,
lambda_loss_bbox: float = 1.0,
lambda_loss_dir: float = 1.0,
Expand Down
2 changes: 1 addition & 1 deletion flash/pointcloud/segmentation/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def __init__(
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
metrics: METRICS_TYPE = None,
learning_rate: float = 1e-2,
learning_rate: Optional[float] = None,
multi_label: bool = False,
):
import flash
Expand Down
2 changes: 1 addition & 1 deletion flash/tabular/classification/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def __init__(
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
metrics: METRICS_TYPE = None,
learning_rate: float = 5e-4,
learning_rate: Optional[float] = None,
**backbone_kwargs,
):
self.save_hyperparameters()
Expand Down
2 changes: 1 addition & 1 deletion flash/tabular/forecasting/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def __init__(
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
metrics: Union[torchmetrics.Metric, List[torchmetrics.Metric]] = None,
learning_rate: float = 4e-3,
learning_rate: Optional[float] = None,
):
self.save_hyperparameters()

Expand Down
2 changes: 1 addition & 1 deletion flash/tabular/regression/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def __init__(
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
metrics: METRICS_TYPE = None,
learning_rate: float = 1e-2,
learning_rate: Optional[float] = None,
**backbone_kwargs
):
self.save_hyperparameters()
Expand Down
2 changes: 1 addition & 1 deletion flash/template/classification/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def __init__(
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
metrics: METRICS_TYPE = None,
learning_rate: float = 1e-2,
learning_rate: Optional[float] = None,
multi_label: bool = False,
):
self.save_hyperparameters()
Expand Down
2 changes: 1 addition & 1 deletion flash/text/classification/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def __init__(
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
metrics: METRICS_TYPE = None,
learning_rate: float = 1e-2,
learning_rate: Optional[float] = None,
multi_label: bool = False,
enable_ort: bool = False,
):
Expand Down
2 changes: 1 addition & 1 deletion flash/text/question_answering/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def __init__(
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
metrics: METRICS_TYPE = None,
learning_rate: float = 5e-5,
learning_rate: Optional[float] = None,
enable_ort: bool = False,
n_best_size: int = 20,
version_2_with_negative: bool = True,
Expand Down
2 changes: 1 addition & 1 deletion flash/text/seq2seq/core/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def __init__(
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
metrics: METRICS_TYPE = None,
learning_rate: float = 5e-5,
learning_rate: Optional[float] = None,
num_beams: Optional[int] = None,
enable_ort: bool = False,
output_transform: Optional[OutputTransform] = None,
Expand Down
2 changes: 1 addition & 1 deletion flash/text/seq2seq/summarization/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def __init__(
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
metrics: METRICS_TYPE = None,
learning_rate: float = 1e-5,
learning_rate: Optional[float] = None,
num_beams: Optional[int] = 4,
use_stemmer: bool = True,
enable_ort: bool = False,
Expand Down
9 changes: 2 additions & 7 deletions flash/text/seq2seq/translation/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Any, Dict, List, Optional, Union
from typing import Any, Dict, Optional, Union

from torchmetrics import BLEUScore

Expand Down Expand Up @@ -56,7 +56,7 @@ def __init__(
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
metrics: METRICS_TYPE = None,
learning_rate: float = 1e-5,
learning_rate: Optional[float] = None,
num_beams: Optional[int] = 4,
n_gram: bool = 4,
smooth: bool = True,
Expand Down Expand Up @@ -99,8 +99,3 @@ def compute_metrics(self, generated_tokens, batch, prefix):
else:
result = self.bleu(reference_corpus, translate_corpus)
self.log(f"{prefix}_bleu_score", result, on_step=False, on_epoch=True, prog_bar=True)

@staticmethod
def _ci_benchmark_fn(history: List[Dict[str, Any]]):
"""This function is used only for debugging usage with CI."""
assert history[-1]["val_bleu_score"] > 0.6, history[-1]["val_bleu_score"]
4 changes: 2 additions & 2 deletions flash/video/classification/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,10 @@ def __init__(
backbone_kwargs: Optional[Dict] = None,
pretrained: bool = True,
loss_fn: LOSS_FN_TYPE = F.cross_entropy,
optimizer: OPTIMIZER_TYPE = "SGD",
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
metrics: METRICS_TYPE = Accuracy(),
learning_rate: float = 1e-3,
learning_rate: Optional[float] = None,
head: Optional[Union[FunctionType, nn.Module]] = None,
):
self.save_hyperparameters()
Expand Down
23 changes: 23 additions & 0 deletions tests/core/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
from itertools import chain
from numbers import Number
from typing import Any, Tuple
from unittest import mock
from unittest.mock import MagicMock

import pytest
import pytorch_lightning as pl
Expand All @@ -29,6 +31,7 @@
from torchmetrics import Accuracy

import flash
from flash import Task
from flash.audio import SpeechRecognition
from flash.core.adapter import Adapter
from flash.core.classification import ClassificationTask
Expand Down Expand Up @@ -357,6 +360,26 @@ def test_optimizers_and_schedulers(tmpdir, optim, sched, interval):
trainer.fit(task, train_dl)


def test_optimizer_learning_rate():
mock_optimizer = MagicMock()
Task.optimizers(mock_optimizer, "test")

model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10), nn.LogSoftmax())

ClassificationTask(model, optimizer="test").configure_optimizers()
mock_optimizer.assert_called_once_with(mock.ANY)

mock_optimizer.reset_mock()

ClassificationTask(model, optimizer="test", learning_rate=10).configure_optimizers()
mock_optimizer.assert_called_once_with(mock.ANY, lr=10)

mock_optimizer.reset_mock()

with pytest.raises(TypeError, match="The `learning_rate` argument is required"):
ClassificationTask(model, optimizer="sgd").configure_optimizers()


@pytest.mark.skipif(not _TORCH_OPTIMIZER_AVAILABLE, reason="torch_optimizer isn't installed.")
@pytest.mark.parametrize("optim", ["Yogi"])
def test_external_optimizers_torch_optimizer(tmpdir, optim):
Expand Down