Skip to content

Commit

Permalink
Allow missing/empty cycle time files in timeseries (JCSDA-internal#211)
Browse files Browse the repository at this point in the history
## Description

- Added support for missing/empty cycle time files when running in
timeseries mode
- Adding requirements that work on discover milan nodes

## Demonstration
Comparing two time series of different lengths:

![time_series_omb](https://github.com/user-attachments/assets/0b91a062-28a3-4bb2-ae0d-6aa8d919abfe)


Timeseries with a missing cycle time:

![time_series_omb](https://github.com/user-attachments/assets/d97dd883-7f62-410d-a8f7-31e30edc4eb5)
  • Loading branch information
asewnath authored Nov 21, 2024
1 parent a21fce8 commit 474199a
Show file tree
Hide file tree
Showing 9 changed files with 161 additions and 26 deletions.
25 changes: 10 additions & 15 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,18 +1,13 @@
setuptools>=59.4.0
pyyaml>=6.0
pycodestyle>=2.8.0
netCDF4>=1.5.3
matplotlib>=3.9.0
cartopy>=0.21.1
scipy>=1.9.3
xarray>=2022.3.0
pandas>=1.4.0
numpy>=2.0.0

# Not explicitly part of eva but dependcies of eva dependencies already in spack-stack
# versions need to be set to avoid other versions being picked
pyproj==3.1.0
importlib-metadata==4.8.2
setuptools
pyyaml
pycodestyle
netCDF4
matplotlib
cartopy
scipy
xarray
pandas
numpy

# Additional packages
git+https://github.com/NOAA-EMC/emcpy.git@f7b863d9508b921a78d7ff0e53de0b95e9a176f7#egg=emcpy
Expand Down
26 changes: 26 additions & 0 deletions requirements_gmao.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
setuptools>=59.4.0
pyyaml>=6.0
pycodestyle>=2.8.0
netCDF4
matplotlib
cartopy>=0.21.1
scipy>=1.9.3
xarray>=2022.3.0
pandas>=1.4.0
numpy==1.22.3

# Not explicitly part of eva but dependcies of eva dependencies already in spack-stack
# versions need to be set to avoid other versions being picked
pyproj
importlib-metadata==4.8.2
contourpy==1.0.7

# Additional packages
git+https://github.com/NOAA-EMC/emcpy.git@f7b863d9508b921a78d7ff0e53de0b95e9a176f7#egg=emcpy
scikit-learn
seaborn
hvplot
nbconvert
bokeh
geopandas
geoviews
28 changes: 28 additions & 0 deletions requirements_sles15.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
setuptools>=59.4.0
pyyaml>=6.0
pycodestyle>=2.8.0
netCDF4
matplotlib
cartopy>=0.21.1
scipy>=1.9.3
xarray>=2022.3.0
pandas>=1.4.0
numpy==1.22.3
attrs==21.4.0

# Not explicitly part of eva but dependcies of eva dependencies already in spack-stack
# versions need to be set to avoid other versions being picked
pyproj
importlib_metadata==7.1.0
contourpy==1.0.7
msgpack>=1.0.0

# Additional packages
git+https://github.com/NOAA-EMC/emcpy.git@f7b863d9508b921a78d7ff0e53de0b95e9a176f7#egg=emcpy
scikit-learn
seaborn
hvplot
nbconvert
bokeh
geopandas
geoviews
29 changes: 23 additions & 6 deletions src/eva/eva_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,22 @@
import argparse
import os
from collections import defaultdict
import xarray as xr
import numpy as np

from eva.utilities.config import get
from eva.utilities.logger import Logger
from eva.utilities.timing import Timing
from eva.data.data_driver import data_driver
from eva.time_series.time_series import add_empty_to_timeseries
from eva.time_series.time_series import collapse_collection_to_time_series
from eva.time_series.time_series_utils import create_empty_data, get_filename, check_file
from eva.transforms.transform_driver import transform_driver
from eva.plotting.batch.base.plot_tools.figure_driver import figure_driver
from eva.data.data_collections import DataCollections
from eva.utilities.duration import iso_duration_to_timedelta
from eva.utilities.utils import load_yaml_file


# --------------------------------------------------------------------------------------------------


Expand Down Expand Up @@ -160,14 +163,29 @@ def read_transform_time_series(logger, timing, eva_dict, data_collections):
if name == time_series_config['collection']:
transform_dict['transforms'].append(transform)

# Assert that datasets_config is the same length as dates
logger.assert_abort(len(datasets_config) == len(dates), 'When running in time ' +
'series mode the number of datasets must be the same as the ' +
'number of dates.')
# Check if first file is empty. If it is, abort.
empty_dataset_config = datasets_config[0]
filename = get_filename(empty_dataset_config, logger)
check_file(filename, logger)

# Loop over datasets reading each one in turn, internally appending the data_collections
for ind, dataset_config in enumerate(datasets_config):

# Pull out information to check for missing date
date = dates[ind]

# Check if file exists, if not add empty and continue
filename = get_filename(dataset_config, logger)
if not os.path.isfile(filename):
add_empty_to_timeseries(logger, date, ind, timing, time_series_config,
empty_dataset_config, data_collections)
continue
# Check if file exists but is size zero, add empty and continue
elif os.stat(filename).st_size == 0:
add_empty_to_timeseries(logger, date, ind, timing, time_series_config,
empty_dataset_config, data_collections)
continue

# Create a temporary collection for this time step
data_collections_tmp = DataCollections()

Expand All @@ -185,7 +203,6 @@ def read_transform_time_series(logger, timing, eva_dict, data_collections):
timing.stop('TransformDriverExecute')

# Collapse data into time series
date = dates[ind]
collapse_collection_to_time_series(logger, ind, date, time_series_config,
data_collections, data_collections_tmp)

Expand Down
11 changes: 10 additions & 1 deletion src/eva/tests/config/testIodaObsSpaceAmsuaN19_TimeSeries.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,15 @@ datasets:
- name: ObsValue
variables: &variables [brightnessTemperature]
- name: hofx
#Empty
- name: experiment
type: IodaObsSpace
filenames:
- ${data_input_path}/ioda_obs_space.amsua_n19.hofx.2020-12-14T000000Z.nc4
channels: *channels
groups:
- name: ObsValue
- name: hofx
- name: experiment
type: IodaObsSpace
filenames:
Expand All @@ -31,7 +40,7 @@ transforms:
time_series:

- begin_date: '2020-12-14T21:00:00'
final_date: '2020-12-15T03:00:00'
final_date: '2020-12-15T09:00:00'
interval: 'PT6H'

collection: experiment
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,16 @@ datasets:
bias_file: ${data_input_path}/gsi.x0048v2.bc.amsua_n19.2021-12-11T21:00:00Z.satbias
lapse_file: ${data_input_path}/gsi.x0048v2.bc.amsua_n19.2021-12-11T21:00:00Z.tlapse

# Empty
- name: experiment
type: JediVariationalBiasCorrection
bias_file: ${data_input_path}/gsi.x0048v2.bc.amsua_n19.2021-12-12T03:00:00Z.satbias
lapse_file: ${data_input_path}/gsi.x0048v2.bc.amsua_n19.2021-12-12T03:00:00Z.tlapse

time_series:

- begin_date: '2020-12-15T00:00:00'
final_date: '2020-12-15T06:00:00'
- begin_date: '2021-12-11T15:00:00'
final_date: '2021-12-12T03:00:00'
interval: 'PT6H'

collection: experiment
Expand Down
14 changes: 13 additions & 1 deletion src/eva/time_series/time_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@

import numpy as np
import xarray as xr

from eva.data.data_collections import DataCollections
from eva.time_series.time_series_utils import create_empty_data

# --------------------------------------------------------------------------------------------------

Expand All @@ -23,6 +24,17 @@
}


# --------------------------------------------------------------------------------------------------

def add_empty_to_timeseries(logger, date, ind, timing, time_series_config,
dataset_config, data_collections):

''' Add empty collection to timeseries for missing date '''
empty_data_collection = create_empty_data(time_series_config, dataset_config, timing, logger)
collapse_collection_to_time_series(logger, ind, date, time_series_config, data_collections,
empty_data_collection)


# --------------------------------------------------------------------------------------------------


Expand Down
43 changes: 43 additions & 0 deletions src/eva/time_series/time_series_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import os
import numpy as np
import xarray as xr
from eva.data.data_driver import data_driver
from eva.data.data_collections import DataCollections


filename_retrieval = {
"IodaObsSpace": lambda dataset_config: dataset_config["filenames"][0],
"JediVariationalBiasCorrection": lambda dataset_config: dataset_config["bias_file"],
}


def get_filename(dataset_config, logger):
""" Retrieve filename using given type """

dataset_type = dataset_config["type"]
logger.assert_abort(dataset_type in filename_retrieval,
f'Unknown dataset_type {dataset_type}')
filename = filename_retrieval[dataset_type](dataset_config)
return filename


def check_file(filename, logger):
""" Check if first file exists and is nonzero """

if not os.path.isfile(filename):
logger.abort('First file provided to timeseries must exist.')
elif os.stat(filename).st_size == 0:
logger.abort('First file provided to timeseries must be nonzero.')


def create_empty_data(timeseries_config, dataset_config, timing, logger):
""" Creating an empty data collection to use for missing cycle times """

dc_tmp = DataCollections()
collection = timeseries_config["collection"]
data_driver(dataset_config, dc_tmp, timing, logger)
dataset = dc_tmp.get_data_collection(collection)
empty_data = xr.full_like(dataset, np.nan)
dc = DataCollections()
dc.create_or_add_to_collection(collection, empty_data)
return dc
1 change: 0 additions & 1 deletion src/eva/utilities/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

# --------------------------------------------------------------------------------------------------


import re
import string
import yaml
Expand Down

0 comments on commit 474199a

Please sign in to comment.