Skip to content

Commit

Permalink
Updated behavior of saving models to handle new frequency in fastai l… (
Browse files Browse the repository at this point in the history
  • Loading branch information
Raalsky authored Jun 30, 2021
1 parent 0b255f8 commit c7bbe22
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 108 deletions.
7 changes: 6 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
## neptune-fastai 0.10.0

### Breaking changes
- Behavior of uploading models and fastai minimal version requirement set to 2.4 ([#16](https://github.com/neptune-ai/neptune-fastai/pull/16))

## neptune-fastai 0.9.6

### Fixes
- Warning instead of an error when calling callback from method without SaveModelCallback ([#15](https://github.com/neptune-ai/neptune-fastai/pull/15))
- Warning instead of an error when calling callback from method without SaveModelCallback ([#15](https://github.com/neptune-ai/neptune-fastai/pull/15))
6 changes: 2 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,7 @@ learn = tabular_learner(dls,
learn.fit_one_cycle(10,
cbs=[
NeptuneCallback(run=neptune_run,
base_namespace='experiment',
save_best_model=True,
save_model_freq=4),
SaveModelCallback(monitor='accuracy', every_epoch=True)
base_namespace='experiment'),
SaveModelCallback(monitor='accuracy')
])
```
10 changes: 4 additions & 6 deletions examples/tabular_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,10 @@ def main():
metrics=accuracy)
learn.fit_one_cycle(10,
cbs=[
NeptuneCallback(run=neptune_run,
base_namespace='experiment',
save_best_model=True,
save_model_freq=4),
SaveModelCallback(monitor='accuracy', every_epoch=True)
])
NeptuneCallback(run=neptune_run,
base_namespace='experiment'),
SaveModelCallback(monitor='accuracy')
])


if __name__ == '__main__':
Expand Down
82 changes: 24 additions & 58 deletions neptune_fastai/impl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from fastai.callback.hook import total_params
from fastai.basics import Callback, store_attr, join_path_file
from fastai.torch_core import trainable_params, default_device
from fastai.callback.tracker import SaveModelCallback, TrackerCallback
from fastai.callback.tracker import SaveModelCallback

try:
# neptune-client=0.9.0 package structure
Expand All @@ -52,24 +52,22 @@ class NeptuneCallback(Callback):
def __init__(self,
run: neptune.Run,
base_namespace: str = '',
save_best_model: bool = True,
save_model_freq: int = 0,
upload_saved_models: Optional[str] = 'all',
**kwargs):
super().__init__(**kwargs)

verify_type('run', run, neptune.Run)
verify_type('base_namespace', base_namespace, str)
verify_type('save_best_model', save_best_model, bool)
verify_type('save_model_freq', save_model_freq, int)
verify_type('upload_saved_models', upload_saved_models, (str, type(None)))

assert upload_saved_models is None or upload_saved_models in ('all', 'last')

self.neptune_run = run
self.best_model_epoch = 0
self.save_model_freq = save_model_freq
self.fit_index = retrieve_fit_index(run, f'{base_namespace}/metrics/')

run[INTEGRATION_VERSION_KEY] = __version__

store_attr('base_namespace,save_best_model,save_model_freq')
store_attr('base_namespace,upload_saved_models')

@property
def name(self) -> str:
Expand Down Expand Up @@ -154,39 +152,14 @@ def _log_model_configuration(self):
}
}

def _check_for_save_model(self):
every_epoch = self.save_model_freq > 0

if not hasattr(self, 'save_model'):
if every_epoch or self.save_best_model:
save_model_cb = SaveModelCallback(every_epoch=every_epoch)
self.learn.add_cb(save_model_cb)

def after_create(self):
self._check_for_save_model()

def before_fit(self):
every_epoch = self.save_model_freq > 0

if hasattr(self, 'save_model') and every_epoch and not self.save_model.every_epoch:
warnings.warn(
'NeptuneCallback: SaveModelCallback is required to have every_epoch set to True when using '
'save_model_freq. Model checkpoints will not be uploaded.'
)
self.save_model_freq = 0

if not hasattr(self, 'save_model'):
if every_epoch or self.save_best_model:
if self.upload_saved_models:
warnings.warn(
'NeptuneCallback: SaveModelCallback is necessary for uploading model checkpoints.'
)

if every_epoch:
self.save_model_freq = 0

if self.save_best_model:
self.save_model_freq = False

def before_fit(self):
self._log_model_configuration()

_log_model_architecture(self.neptune_run, self.base_namespace, self.learn)
Expand Down Expand Up @@ -244,32 +217,25 @@ def after_validate(self):
)

def after_epoch(self):
if hasattr(self, 'save_model') and hasattr(self.save_model, 'every_epoch') and self.save_model.every_epoch:
if self.save_model_freq > 0:
if self.epoch % self.save_model_freq == 0:
path = join_path_file(f'{self.learn.save_model.fname}_{self.learn.save_model.epoch}',
self.learn.path / self.learn.model_dir,
ext='.pth')
prefix = f'{self.base_namespace}/io_files/artifacts/model_checkpoints/fit_{self.fit_index}/' \
f'epoch_{self.learn.epoch}'
self.neptune_run[prefix].upload(str(path))

if self.save_best_model:
# Enforce tracker to check for new best model
TrackerCallback.after_epoch(self.save_model)

if hasattr(self.save_model, 'new_best') and self.save_model.new_best:
self.best_model_epoch = self.epoch
if self.upload_saved_models == 'all'\
and hasattr(self, 'save_model')\
and hasattr(self.save_model, 'every_epoch')\
and self.save_model.every_epoch\
and self.epoch % self.save_model.every_epoch == 0:
filename = f'{self.learn.save_model.fname}_{self.learn.save_model.epoch}'
path = join_path_file(filename, self.learn.path / self.learn.model_dir, ext='.pth')
prefix = f'{self.base_namespace}/io_files/artifacts/model_checkpoints/fit_{self.fit_index}/' \
f'epoch_{self.learn.save_model.epoch}'
self.neptune_run[prefix].upload(str(path))

def after_fit(self):
if self.save_best_model and hasattr(self, 'save_model'):
if hasattr(self.save_model, 'every_epoch') and self.save_model.every_epoch:
filename = f'{self.learn.save_model.fname}_{self.best_model_epoch}'
else:
filename = self.learn.save_model.fname

if self.upload_saved_models\
and hasattr(self, 'save_model')\
and hasattr(self.save_model, 'every_epoch')\
and not self.save_model.every_epoch:
filename = self.learn.save_model.fname
path = join_path_file(filename, self.learn.path / self.learn.model_dir, ext='.pth')
prefix = f'{self.base_namespace}/io_files/artifacts/model_checkpoints/fit_{self.fit_index}/best'
prefix = f'{self.base_namespace}/io_files/artifacts/model_checkpoints/fit_{self.fit_index}/{filename}'

self.neptune_run[prefix].upload(str(path))

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def main():
'tox==3.6.1',
]

base_libs = ['neptune-client>=0.9.16', 'fastai>=2.0.0']
base_libs = ['neptune-client>=0.9.16', 'fastai>=2.4']

version = None
if os.path.exists('PKG-INFO'):
Expand Down
55 changes: 17 additions & 38 deletions tests/neptune_fastai/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class TestBase:
def test_basename(self, run, dataset):
neptune_callback = NeptuneCallback(run=run,
base_namespace='experiment',
save_best_model=False)
upload_saved_models=None)

learn = tabular_learner(dataset, metrics=accuracy, cbs=[neptune_callback])
learn.fit_one_cycle(1)
Expand All @@ -55,7 +55,7 @@ def test_basename(self, run, dataset):
def test_basename_fit_callback(self, run, dataset):
neptune_callback = NeptuneCallback(run=run,
base_namespace='experiment',
save_best_model=False)
upload_saved_models=None)

learn = tabular_learner(dataset, layers=[10, 10], metrics=accuracy)
learn.fit_one_cycle(1, cbs=[neptune_callback])
Expand All @@ -75,7 +75,7 @@ def test_basename_fit_callback(self, run, dataset):
def test_multiple_fits(self, run, dataset):
neptune_callback = NeptuneCallback(run=run,
base_namespace='experiment',
save_best_model=False)
upload_saved_models=None)

learn = tabular_learner(dataset, metrics=accuracy, layers=[10, 10], cbs=[neptune_callback])
learn.fit_one_cycle(1)
Expand All @@ -96,7 +96,7 @@ def test_multiple_fits(self, run, dataset):

def test_frozen_fits(self, run, dataset):
neptune_callback = NeptuneCallback(run=run,
save_best_model=False)
upload_saved_models=None)

learn = tabular_learner(dataset, metrics=accuracy, layers=[10, 10], cbs=[neptune_callback])
learn.fit_one_cycle(1)
Expand All @@ -120,7 +120,7 @@ def test_frozen_fits(self, run, dataset):
assert 'frozen_level' in structure['metrics']['fit_1']

def test_optimizer_hyperparams(self, run, dataset):
neptune_callback = NeptuneCallback(run=run, save_best_model=False)
neptune_callback = NeptuneCallback(run=run, upload_saved_models=None)

learn = tabular_learner(dataset, metrics=accuracy, layers=[10, 10], cbs=[neptune_callback])
learn.fit_one_cycle(1)
Expand All @@ -143,15 +143,11 @@ def test_optimizer_hyperparams(self, run, dataset):

def test_saving_from_constructor(self, run, dataset):
learn = tabular_learner(dataset, metrics=accuracy, layers=[10, 10],
cbs=[NeptuneCallback(run=run, save_best_model=True)])
cbs=[SaveModelCallback(), NeptuneCallback(run=run)])
learn.fit_one_cycle(1)

learn = tabular_learner(dataset, metrics=accuracy, layers=[10, 10],
cbs=[NeptuneCallback(run=run, save_best_model=False, save_model_freq=2)])
learn.fit_one_cycle(2)

learn = tabular_learner(dataset, metrics=accuracy, layers=[10, 10],
cbs=[NeptuneCallback(run=run, save_best_model=True, save_model_freq=2)])
cbs=[SaveModelCallback(every_epoch=2), NeptuneCallback(run=run)])
learn.fit_one_cycle(2)

run.sync()
Expand All @@ -164,37 +160,26 @@ def test_saving_from_constructor(self, run, dataset):

assert 'fit_0' in structure['metrics']
assert 'fit_1' in structure['metrics']
assert 'fit_2' in structure['metrics']
assert len(structure['metrics']) == 3
assert len(structure['metrics']) == 2

assert 'artifacts' in structure['io_files']
assert 'model_checkpoints' in structure['io_files']['artifacts']
assert 'fit_0' in structure['io_files']['artifacts']['model_checkpoints']
assert 'fit_1' in structure['io_files']['artifacts']['model_checkpoints']
assert 'fit_2' in structure['io_files']['artifacts']['model_checkpoints']
assert len(structure['io_files']['artifacts']['model_checkpoints']) == 3
assert len(structure['io_files']['artifacts']['model_checkpoints']) == 2

assert 'best' in structure['io_files']['artifacts']['model_checkpoints']['fit_0']
assert 'model' in structure['io_files']['artifacts']['model_checkpoints']['fit_0']
assert len(structure['io_files']['artifacts']['model_checkpoints']['fit_0']) == 1

assert 'epoch_0' in structure['io_files']['artifacts']['model_checkpoints']['fit_1']
assert len(structure['io_files']['artifacts']['model_checkpoints']['fit_1']) == 1

assert 'best' in structure['io_files']['artifacts']['model_checkpoints']['fit_2']
assert 'epoch_0' in structure['io_files']['artifacts']['model_checkpoints']['fit_2']
assert len(structure['io_files']['artifacts']['model_checkpoints']['fit_2']) == 2

def test_saving_from_method(self, run, dataset):
learn = tabular_learner(dataset, metrics=accuracy, layers=[10, 10])
learn.fit_one_cycle(1, cbs=[SaveModelCallback(), NeptuneCallback(run=run, save_best_model=True)])
learn.fit_one_cycle(1, cbs=[SaveModelCallback(), NeptuneCallback(run=run)])

learn = tabular_learner(dataset, metrics=accuracy, layers=[10, 10])
learn.fit_one_cycle(2, cbs=[SaveModelCallback(every_epoch=True),
NeptuneCallback(run=run, save_best_model=False, save_model_freq=2)])

learn = tabular_learner(dataset, metrics=accuracy, layers=[10, 10])
learn.fit_one_cycle(2, cbs=[SaveModelCallback(every_epoch=True),
NeptuneCallback(run=run, save_best_model=True, save_model_freq=2)])
learn.fit_one_cycle(2, cbs=[SaveModelCallback(every_epoch=2), NeptuneCallback(run=run)])

run.sync()

Expand All @@ -206,39 +191,33 @@ def test_saving_from_method(self, run, dataset):

assert 'fit_0' in structure['metrics']
assert 'fit_1' in structure['metrics']
assert 'fit_2' in structure['metrics']
assert len(structure['metrics']) == 3
assert len(structure['metrics']) == 2

assert 'artifacts' in structure['io_files']
assert 'model_checkpoints' in structure['io_files']['artifacts']
assert 'fit_0' in structure['io_files']['artifacts']['model_checkpoints']
assert 'fit_1' in structure['io_files']['artifacts']['model_checkpoints']
assert 'fit_2' in structure['io_files']['artifacts']['model_checkpoints']
assert len(structure['io_files']['artifacts']['model_checkpoints']) == 3
assert len(structure['io_files']['artifacts']['model_checkpoints']) == 2

assert 'best' in structure['io_files']['artifacts']['model_checkpoints']['fit_0']
assert 'model' in structure['io_files']['artifacts']['model_checkpoints']['fit_0']
assert len(structure['io_files']['artifacts']['model_checkpoints']['fit_0']) == 1

assert 'epoch_0' in structure['io_files']['artifacts']['model_checkpoints']['fit_1']
assert len(structure['io_files']['artifacts']['model_checkpoints']['fit_1']) == 1

assert 'best' in structure['io_files']['artifacts']['model_checkpoints']['fit_2']
assert 'epoch_0' in structure['io_files']['artifacts']['model_checkpoints']['fit_2']
assert len(structure['io_files']['artifacts']['model_checkpoints']['fit_2']) == 2

def test_without_save_model_constr(self, run, dataset):
try:
learn = tabular_learner(dataset,
metrics=accuracy,
layers=[10, 10],
cbs=[NeptuneCallback(run=run, save_best_model=True)])
cbs=[NeptuneCallback(run=run), SaveModelCallback()])
learn.fit_one_cycle(1)
except AttributeError as exception:
fail(exception)

def test_without_save_model_method(self, run, dataset):
try:
learn = tabular_learner(dataset, metrics=accuracy, layers=[10, 10])
learn.fit_one_cycle(1, cbs=[NeptuneCallback(run=run, save_best_model=True)])
learn.fit_one_cycle(1, cbs=[NeptuneCallback(run=run), SaveModelCallback()])
except AttributeError as exception:
fail(exception)

0 comments on commit c7bbe22

Please sign in to comment.