Skip to content

Commit

Permalink
Add standardise_mom6_filenames.sh script (#32) (#35)
Browse files Browse the repository at this point in the history
* Add standardise_mom6_filenames.sh script (#32)

* mom6 filenames test
---------

Co-authored-by: Anton Steketee <79179784+anton-seaice@users.noreply.github.com>
  • Loading branch information
minghangli-uni and anton-seaice authored Sep 10, 2024
1 parent 1a1cd0a commit b4048fd
Show file tree
Hide file tree
Showing 3 changed files with 213 additions and 0 deletions.
1 change: 1 addition & 0 deletions payu_config/archive.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@

source $(dirname "$0")/archive_scripts/archive_cice_restarts.sh
source $(dirname "$0")/archive_scripts/concat_ice_daily.sh
source $(dirname "$0")/archive_scripts/standardise_mom6_filenames.sh
python3 $(dirname "$0")/archive_scripts/build_intake_ds.py
53 changes: 53 additions & 0 deletions payu_config/archive_scripts/standardise_mom6_filenames.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/usr/bin/bash
# Copyright 2024 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details.
# SPDX-License-Identifier: Apache-2.0.
#
# Standardise file naming for MOM6 output files in access-om3 by removing the underscore before the four-digit year, i.e., replacing '_YYYY' with 'YYYY'
# This was written assuming it would be used as a payu "userscript" at the "archive" stage, but alternatively a path to an "archive" directory can be provided.
# For more details, see https://github.com/COSIMA/om3-scripts/issues/32

Help()
{
# Display help
echo -e "Standardise file naming for MOM6 output files.\n"
echo "Syntax: scriptTemplate [-h|d DIRECTORY]"
echo "options:"
echo "h Print this help message."
echo -e "d Process files in the specified 'DIRECTORY'."
}

while getopts ":hd:" option; do
case $option in
h) # display help
Help
exit;;
d) # Enter a directory
out_dir=$OPTARG
if [ ! -d $out_dir ]; then
echo $out_dir Does not exist
exit
fi;;
\?) # Invalid option
echo "Error: Invalid option"
exit;;
esac
done

# if no directory was specified, collect all directories from 'archive'
if [ -z $out_dir ]; then
out_dirs=$(ls -rd archive/output*[0-9] 2>/dev/null)
else
out_dirs=$out_dir
fi

# process each output directory
for dir in ${out_dirs[@]}; do
# process each mom6 file
for current_file in $dir/access-om3.mom6.*.nc; do
if [ -f $current_file ]; then
new_filename=$(echo $current_file | sed -E 's/_([0-9]{4})/\1/')
# rename the file without overwriting existing files
mv -n $current_file $new_filename
fi
done
done
159 changes: 159 additions & 0 deletions test/test_payu_conf/test_mom6_filenames.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
import pytest
import pandas as pd

from os import makedirs, chdir
from subprocess import run
from pathlib import Path

scripts_base = Path(__file__).parents[2]
run_str = f"{scripts_base}/payu_config/archive_scripts/standardise_mom6_filenames.sh"

DIAG_BASE = "access-om3.mom6.h.test"


def assert_file_exists(p):
if not Path(p).resolve().is_file():
raise AssertionError("File does not exist: %s" % str(p))


def assert_f_not_exists(p):
if Path(p).resolve().is_file():
raise AssertionError("File exists and should not: %s" % str(p))


def yearly_files(dir_name, n, tmp_path):
"""
Make empty data files
"""

times = pd.date_range("2010-01-01", freq="YE", periods=n)

out_dir = str(tmp_path) + "/" + dir_name + "/"
paths = [f"{out_dir}{DIAG_BASE}._{str(t)[0:4]}.nc" for t in times]

makedirs(out_dir)

for p in paths:
with open(p, "w") as f:
f.close()

for p in paths:
assert_file_exists(p)

return paths


@pytest.mark.parametrize(
"hist_dir, use_dir, n",
[
("archive/output000", False, 12),
("archive/output999", False, 1),
("archive/output9999", False, 1),
("archive/output574", True, 12),
],
) # run this test with a several folder names and lengths, provide the directory as an argument sometimes
def test_true_case(hist_dir, use_dir, n, tmp_path):

yearly_paths = yearly_files(hist_dir, n, tmp_path)
chdir(tmp_path)
output_dir = Path(yearly_paths[0]).parents[0]

if not use_dir: # default path
run([run_str])
else: # provide path
run(
[
run_str,
"-d",
output_dir,
],
)

expected_years = pd.date_range("2010-01-01", freq="YE", periods=n + 1)

# valid output filenames
expected_paths = [
f"{output_dir}/{DIAG_BASE}.{str(t)[0:4]}.nc" for t in expected_years
]

for p in expected_paths[0:n]:
assert_file_exists(p)

for p in expected_paths[n]:
assert_f_not_exists(p)

for p in yearly_paths:
assert_f_not_exists(p)


@pytest.mark.parametrize(
"hist_dir, use_dir, n",
[
("archive/output000", False, 12),
],
)
def test_dont_override(hist_dir, use_dir, n, tmp_path):
"""
make some empty data files, and make some files where the files should be renamed to,
and confirm it doesn't delete any of them
"""

yearly_paths = yearly_files(hist_dir, n, tmp_path)
chdir(tmp_path)
output_dir = Path(yearly_paths[0]).parents[0]

# write the expected output too
expected_years = pd.date_range("2010-01-01", freq="YE", periods=n)

expected_paths = [
f"{output_dir}/{DIAG_BASE}.{str(t)[0:4]}.nc" for t in expected_years
]

for p in expected_paths:
with open(p, "w") as f:
f.close()

if not use_dir: # default path
run([run_str])
else: # provide path
run(
[
run_str,
"-d",
output_dir,
],
)

for p in expected_paths:
assert_file_exists(p)

for p in yearly_paths:
assert_file_exists(p)


# @pytest.mark.parametrize("hist_dir, ndays", [("Default", 31), ("Default", 27)])
# def test_no_override(hist_dir, ndays, hist_base, tmp_path):
# """
# Run the script to convert the daily data into monthly files, but the output filename already exists, and check nothing happens.
# """

# daily_paths = daily_files(hist_dir, hist_base, ndays, tmp_path)

# chdir(tmp_path)
# output_dir = Path(daily_paths[0]).parents[0]

# expected_months = pd.date_range("2010-01-01", freq="ME", periods=1)

# monthly_paths = [
# f"{output_dir}/{hist_base}.{str(t)[0:7]}.nc" for t in expected_months
# ]
# for p in monthly_paths:
# Path(p).touch()

# run([run_str])

# for p in daily_paths:
# assert_file_exists(p)

# for p in monthly_paths:
# assert_file_exists(p)

0 comments on commit b4048fd

Please sign in to comment.