From 0c2a837bb9fd0b1546de85945413e6989a7131fd Mon Sep 17 00:00:00 2001 From: Spencer Wong <88933912+blimlim@users.noreply.github.com> Date: Fri, 13 Dec 2024 15:52:56 +1100 Subject: [PATCH] Check CICE4 restart file dates (#539) * Add CICE4 restart date checks in access.py driver --- payu/models/access.py | 7 ++ payu/models/cice.py | 150 ++++++++++++++++++++++---- payu/models/cice5.py | 25 +++++ test/models/test_access.py | 44 ++++++-- test/models/test_cice.py | 210 +++++++++++++++++++++++++++++++++++-- 5 files changed, 398 insertions(+), 38 deletions(-) diff --git a/payu/models/access.py b/payu/models/access.py index a2946275..41ae73b3 100644 --- a/payu/models/access.py +++ b/payu/models/access.py @@ -218,6 +218,13 @@ def setup(self): f90nml.write(cpl_nml, nml_work_path + '~') shutil.move(nml_work_path + '~', nml_work_path) + if model.model_type == 'cice': + if model.prior_restart_path and not self.expt.repeat_run: + # Set up and check the cice restart files. + model.overwrite_restart_ptr(run_start_date, + previous_runtime, + start_date_fpath) + # Now change the oasis runtime. This needs to be done after the others. for model in self.expt.models: if model.model_type == 'oasis': diff --git a/payu/models/cice.py b/payu/models/cice.py index 3a3233cb..70c9b090 100644 --- a/payu/models/cice.py +++ b/payu/models/cice.py @@ -17,6 +17,7 @@ import sys import shutil import datetime +import struct import re import tarfile @@ -173,27 +174,7 @@ def setup(self): setup_nml = self.ice_in['setup_nml'] if self.prior_restart_path: - # Generate ice.restart_file - # TODO: better check of restart filename - iced_restart_file = None - iced_restart_files = [f for f in self.get_prior_restart_files() - if f.startswith('iced.')] - - if len(iced_restart_files) > 0: - iced_restart_file = sorted(iced_restart_files)[-1] - - if iced_restart_file is None: - raise FileNotFoundError( - f'No iced restart file found in {self.prior_restart_path}') - - res_ptr_path = os.path.join(self.work_init_path, - 'ice.restart_file') - if os.path.islink(res_ptr_path): - # If we've linked in a previous pointer it should be deleted - os.remove(res_ptr_path) - with open(res_ptr_path, 'w') as res_ptr: - res_dir = self.get_ptr_restart_dir() - print(os.path.join(res_dir, iced_restart_file), file=res_ptr) + self._make_restart_ptr() # Update input namelist setup_nml['runtype'] = 'continue' @@ -394,3 +375,130 @@ def link_restart(self, fpath): ) make_symlink(input_path, input_work_path) + + def _make_restart_ptr(self): + """ + CICE4 restart pointers are created in the access driver, where + the correct run start dates are available. + """ + pass + + def overwrite_restart_ptr(self, + run_start_date, + previous_runtime, + calendar_file): + """ + Generate restart pointer file 'ice.restart_file' pointing to + 'iced.YYYYMMDD' with the correct start date. + Additionally check that the `iced.YYYYMNDD` restart file's header + has the correct previous runtime. + Typically called from the access driver, which provides the + the correct date and runtime. + + Parameters + ---------- + run_start_date: datetime.date + Start date of the new simulation + previous_runtime: int + Seconds between experiment initialisation date and start date + calendar_file: str + Calendar restart file used to calculate timing information + """ + # Expected iced restart file name + iced_restart_file = self.find_matching_iced(self.prior_restart_path, + run_start_date) + + res_ptr_path = os.path.join(self.work_init_path, + 'ice.restart_file') + if os.path.islink(res_ptr_path): + # If we've linked in a previous pointer it should be deleted + os.remove(res_ptr_path) + + iced_path = os.path.join(self.prior_restart_path, + iced_restart_file) + + # Check binary restart has correct time + self._cice4_check_date_consistency(iced_path, + previous_runtime, + calendar_file) + + with open(res_ptr_path, 'w') as res_ptr: + res_dir = self.get_ptr_restart_dir() + res_ptr.write(os.path.join(res_dir, iced_restart_file)) + + def _cice4_check_date_consistency(self, + iced_path, + previous_runtime, + calendar_file): + """ + Check that the previous runtime in iced restart file header + matches the runtime calculated from the calendar restart file. + + Parameters + ---------- + iced_path: str or Path + Path to iced restart file + previous_runtime: int + Seconds between experiment initialisation date and start date + calendar_file: str or Path + Calendar restart file used to calculate timing information + """ + _, _, cice_iced_runtime, _ = read_binary_iced_header(iced_path) + if previous_runtime != cice_iced_runtime: + msg = (f"Previous runtime from calendar file " + f"{calendar_file}: {previous_runtime} " + "does not match previous runtime in restart" + f"file {iced_path}: {cice_iced_runtime}.") + raise RuntimeError(msg) + + def find_matching_iced(self, dir_path, date): + """ + Check a directory for an iced.YYYYMMDD restart file matching a + specified date. + Raises an error if the expected file is not found. + + Parameters + ---------- + dir_path: str or Path + Path to directory containing iced restart files + date: datetime.date + Date for matching iced file names + + Returns + ------- + iced_file_name: str + Name of iced restart file found in dir_path matching + the specified date + """ + # Expected iced restart file name + date_int = cal.date_to_int(date) + iced_restart_file = f"iced.{date_int:08d}" + + dir_files = [f for f in os.listdir(dir_path) + if os.path.isfile(os.path.join(dir_path, f))] + + if iced_restart_file not in dir_files: + msg = (f"CICE restart file not found in {dir_path}. Expected " + f"{iced_restart_file} to exist. Is 'dumpfreq' set " + f"in {self.ice_nml_fname} consistently with the run-length?" + ) + raise FileNotFoundError(msg) + + return iced_restart_file + + +CICE4_RESTART_HEADER_SIZE = 24 +CICE4_RESTART_HEADER_FORMAT = '>iidd' + + +def read_binary_iced_header(iced_path): + """ + Read header information from a CICE4 binary restart file. + """ + with open(iced_path, 'rb') as iced_file: + header = iced_file.read(CICE4_RESTART_HEADER_SIZE) + bint, istep0, time, time_forc = struct.unpack( + CICE4_RESTART_HEADER_FORMAT, + header) + + return (bint, istep0, time, time_forc) diff --git a/payu/models/cice5.py b/payu/models/cice5.py index 1c8fcfac..c20a2205 100644 --- a/payu/models/cice5.py +++ b/payu/models/cice5.py @@ -81,3 +81,28 @@ def _calc_runtime(self): the timing information in the cice_in.nml namelist. """ pass + + def _make_restart_ptr(self): + """ + Generate restart pointer which points to the latest iced.YYYYMMDD + restart file. + """ + iced_restart_file = None + iced_restart_files = [f for f in self.get_prior_restart_files() + if f.startswith('iced.')] + + if len(iced_restart_files) > 0: + iced_restart_file = sorted(iced_restart_files)[-1] + + if iced_restart_file is None: + raise FileNotFoundError( + f'No iced restart file found in {self.prior_restart_path}') + + res_ptr_path = os.path.join(self.work_init_path, + 'ice.restart_file') + if os.path.islink(res_ptr_path): + # If we've linked in a previous pointer it should be deleted + os.remove(res_ptr_path) + with open(res_ptr_path, 'w') as res_ptr: + res_dir = self.get_ptr_restart_dir() + res_ptr.write(os.path.join(res_dir, iced_restart_file)) diff --git a/test/models/test_access.py b/test/models/test_access.py index 4b15f497..362d7d45 100644 --- a/test/models/test_access.py +++ b/test/models/test_access.py @@ -4,10 +4,12 @@ import pytest import cftime +import f90nml +from unittest.mock import patch import payu -from test.common import cd +from test.common import cd, expt_workdir from test.common import tmpdir, ctrldir, labdir, workdir, archive_dir from test.common import config as config_orig from test.common import write_config @@ -16,7 +18,6 @@ from test.common import make_expt_archive_dir, remove_expt_archive_dirs from test.common import config_path from payu.calendar import GREGORIAN, NOLEAP -import f90nml verbose = True @@ -44,7 +45,6 @@ def setup_module(module): tmpdir.mkdir() labdir.mkdir() ctrldir.mkdir() - workdir.mkdir() archive_dir.mkdir() make_all_files() except Exception as e: @@ -65,6 +65,23 @@ def teardown_module(module): print(e) +@pytest.fixture(autouse=True) +def empty_workdir(): + """ + Model setup tests require a clean work directory and symlink from + the control directory. + """ + expt_workdir.mkdir(parents=True) + # Symlink must exist for setup to use correct locations + workdir.symlink_to(expt_workdir) + + yield expt_workdir + try: + shutil.rmtree(expt_workdir) + except FileNotFoundError: + pass + workdir.unlink() + @pytest.fixture def access_1year_config(): # Write an access model config file with 1 year runtime @@ -229,7 +246,15 @@ def test_access_cice_calendar_cycling_500( # which we are trying to bypass. shutil.copy(default_input_ice, cice_model.work_path) - access_model.setup() + # Skip writing restart pointer as it requires iced file + # with valid header. Restart pointer functionality is tested + # in test_cice.py. + with patch( + 'payu.models.cice.Cice.overwrite_restart_ptr', + return_value=None + ): + access_model.setup() + access_model.archive() end_date_fpath = os.path.join( @@ -269,7 +294,7 @@ def test_access_cice_1year_runtimes( expected_runtime ): """ - The large setup/archive cycling test won't pick up situations + The large setup/archive cycling test won't pick up situations where the calculations during setup and archive are simultaneously wrong, e.g. if they both used the wrong calendar. Hence test seperately that the correct runtimes for cice are @@ -331,7 +356,14 @@ def test_access_cice_1year_runtimes( # which we are trying to bypass. shutil.copy(ctrl_input_ice_path, cice_model.work_path) - access_model.setup() + # Skip writing restart pointer as it requires iced file + # with valid header. Restart pointer functionality is tested + # in test_cice.py + with patch( + 'payu.models.cice.Cice.overwrite_restart_ptr', + return_value=None + ): + access_model.setup() # Check that the correct runtime is written to the work directory's # input ice namelist. diff --git a/test/models/test_cice.py b/test/models/test_cice.py index 8f9453a7..8afcc92e 100644 --- a/test/models/test_cice.py +++ b/test/models/test_cice.py @@ -1,5 +1,7 @@ +import datetime import os import shutil +import struct import pytest import f90nml @@ -13,6 +15,7 @@ from test.common import workdir, expt_archive_dir, ctrldir_basename from test.common import write_config, config_path, write_metadata from test.common import make_exe +from payu.models.cice import CICE4_RESTART_HEADER_FORMAT verbose = True @@ -203,6 +206,18 @@ def test_setup(config, cice_nml, cice_history_nml): input_nml["setup_nml"]["dump_last"] +@pytest.fixture +def prior_restart_dir(): + """Create prior restart directory""" + restart_path = RESTART_PATH + os.mkdir(restart_path) + + yield restart_path + + # Cleanup + shutil.rmtree(restart_path) + + @pytest.fixture( # prior_istep0, prior_npt, runtime, expected_npt params=[ @@ -217,14 +232,12 @@ def run_timing_params(request): @pytest.fixture -def prior_restart_cice4(run_timing_params): +def prior_restart_cice4(run_timing_params, prior_restart_dir): """ Create fake prior restart files required by CICE4's setup. This differs from CICE5, which doesn't require a cice_in.nml file in the restart directory. """ - prior_restart_path = RESTART_PATH - os.mkdir(prior_restart_path) prior_istep0, prior_npt, _, _ = run_timing_params # Previous cice_in namelist with time information @@ -233,21 +246,20 @@ def prior_restart_cice4(run_timing_params): "npt": prior_npt, "dt": DEFAULT_DT }} - f90nml.write(restart_cice_in, prior_restart_path/CICE_NML_NAME) + f90nml.write(restart_cice_in, prior_restart_dir/CICE_NML_NAME) # Additional restart files required by CICE4 setup - (prior_restart_path/ICED_RESTART_NAME).touch() - (prior_restart_path/RESTART_POINTER_NAME).touch() + (prior_restart_dir/ICED_RESTART_NAME).touch() + (prior_restart_dir/RESTART_POINTER_NAME).touch() - yield prior_restart_path + yield prior_restart_dir - # Teardown - shutil.rmtree(prior_restart_path) + # Teardown handled by prior restart dir fixture @pytest.mark.parametrize("config", [CONFIG_WITH_RESTART], indirect=True) -def test_restart_setup(config, cice_nml, cice_history_nml, prior_restart_cice4, +def test_restart_setup(config, cice_nml, prior_restart_cice4, run_timing_params): """ Test that seting up an experiment from a cloned control directory @@ -289,7 +301,7 @@ def test_restart_setup(config, cice_nml, cice_history_nml, prior_restart_cice4, @pytest.mark.parametrize("config", [DEFAULT_CONFIG], indirect=True) -def test_no_restart_ptr(config, cice_nml, cice_history_nml): +def test_no_restart_ptr(config, cice_nml): """ Test that payu raises an error if no prior restart path is specified, restart is `true` in cice_in.nml, and the restart pointer is missing. @@ -311,6 +323,182 @@ def test_no_restart_ptr(config, cice_nml, cice_history_nml): model.setup() +def write_iced_header(iced_path, bint, istep0, time, time_forc): + """ + Write a fake binary CICE4 iced restart file containing + only a header. + """ + with open(iced_path, 'wb') as iced_file: + header = struct.pack( + CICE4_RESTART_HEADER_FORMAT, bint, istep0, time, time_forc + ) + iced_file.write(header) + + +@pytest.mark.parametrize("config", [DEFAULT_CONFIG], + indirect=["config"]) +@pytest.mark.parametrize( + 'run_start_date, previous_runtime', + [ + (datetime.datetime(1, 1, 1), 1), + (datetime.datetime(9999, 12, 31), 315537811200) + ] +) +def test_overwrite_restart_ptr(config, + cice_nml, + run_start_date, + previous_runtime, + prior_restart_dir, + empty_workdir + ): + """ + CICE4 in ACCESS-ESM1.5 finds the iced restart file based on the + run start date. Check that: + 1. payu identifies the correct iced restart from given start date + 2. payu writes the correct filename to the restart pointer file. + """ + # Initialize the experiment + with cd(ctrldir): + lab = payu.laboratory.Laboratory(lab_path=str(labdir)) + expt = payu.experiment.Experiment(lab, reproduce=False) + cice_model = expt.models[0] + + # Create iced restart with the specified date and runtime + run_start_date_int = payu.calendar.date_to_int(run_start_date) + iced_name = f"iced.{run_start_date_int:08d}" + iced_path = prior_restart_dir / iced_name + write_iced_header(iced_path, + bint=0, + istep0=0, + time=previous_runtime, + time_forc=0) + + # Create an iced restart with different date, to check + # that payu ignores it + wrong_iced_name = "iced.01010101" + wrong_runtime = 1000 + wrong_iced_path = prior_restart_dir / wrong_iced_name + write_iced_header(wrong_iced_path, + bint=0, + istep0=0, + time=wrong_runtime, + time_forc=0) + + # Check test set up correctly + if iced_name == wrong_iced_name: + msg = (f"Correct and incorrect iced files have the " + f"same name: '{iced_name}'. These should not match.") + raise ValueError(msg) + + # Set model paths + cice_model.prior_restart_path = prior_restart_dir + cice_model.work_init_path = empty_workdir + + cice_model.overwrite_restart_ptr(run_start_date, + previous_runtime, + "fake_file") + + # Check correct iced filename written to pointer + res_ptr_path = os.path.join(cice_model.work_init_path, + "ice.restart_file") + + with open(res_ptr_path, 'r') as res_ptr: + ptr_iced = res_ptr.read() + + assert ptr_iced == f"./{iced_name}" + + +@pytest.mark.parametrize("config", [DEFAULT_CONFIG], + indirect=["config"]) +def test_overwrite_restart_ptr_missing_iced(config, + cice_nml, + prior_restart_dir, + empty_workdir + ): + """ + Check that cice raises error when an iced restart file matching + the run start date is not found. + """ + # Initialize the experiment + with cd(ctrldir): + lab = payu.laboratory.Laboratory(lab_path=str(labdir)) + expt = payu.experiment.Experiment(lab, reproduce=False) + cice_model = expt.models[0] + + # Run timing information + previous_runtime = 500000 + run_start_date = datetime.date(500, 12, 3) + # Expected iced file + run_start_date_int = payu.calendar.date_to_int(run_start_date) + iced_name = f"iced.{run_start_date_int:08d}" + + # Create iced restart files with wrong dates in their name + wrong_iced_dates = [run_start_date - datetime.timedelta(days=1), + run_start_date + datetime.timedelta(days=1)] + wrong_iced_names = [f"iced.{payu.calendar.date_to_int(date)}" + for date in wrong_iced_dates] + wrong_runtime = 1000 + for wrong_iced_file in wrong_iced_names: + write_iced_header(prior_restart_dir / wrong_iced_file, + bint=0, + istep0=0, + time=wrong_runtime, + time_forc=0) + + # Set model paths + cice_model.prior_restart_path = prior_restart_dir + cice_model.work_init_path = empty_workdir + + with pytest.raises(FileNotFoundError, match=iced_name): + cice_model.overwrite_restart_ptr(run_start_date, + previous_runtime, + "fake_file") + + +@pytest.mark.parametrize("config", [DEFAULT_CONFIG], + indirect=["config"]) +def test_check_date_consistency(config, + cice_nml, + prior_restart_dir, + ): + """ + CICE4 in ACCESS-ESM1.5 reads the binary restart header to check that + its runtime matches a given prior runtime. + Check that an error is raised when the two do not match. + """ + # Initialize the experiment + with cd(ctrldir): + lab = payu.laboratory.Laboratory(lab_path=str(labdir)) + expt = payu.experiment.Experiment(lab, reproduce=False) + cice_model = expt.models[0] + + # Experiment timing information + previous_runtime = 500000 + + # Create an iced file with a different runtime + iced_name = "iced.YYYYMMDD" + wrong_runtime = 1000 + iced_path = prior_restart_dir / iced_name + write_iced_header(iced_path, + bint=0, + istep0=0, + time=wrong_runtime, + time_forc=0) + + # Sanity check + if wrong_runtime == previous_runtime: + msg = ("Correct runtime 'previous_runtime' and incorrect " + "runtime 'wrong_runtime' have the same value:" + f" {previous_runtime}. These should not match.") + raise ValueError(msg) + + with pytest.raises(RuntimeError, match=iced_name): + cice_model._cice4_check_date_consistency( + iced_path, + previous_runtime, + "fake_file") + + CONFIG_WITH_COMPRESSION = { "laboratory": "lab", "jobname": "testrun",