Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modify llc read methods #32

Merged
merged 3 commits into from
Jan 17, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
157 changes: 0 additions & 157 deletions xmitgcm/llc_utils.py

This file was deleted.

30 changes: 20 additions & 10 deletions xmitgcm/mds_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ def open_mdsdataset(data_dir, grid_dir=None,
grid_vars_to_coords=True, swap_dims=None,
endian=">", chunks=None,
ignore_unknown_vars=False, default_dtype=None,
nx=None, ny=None, nz=None):
nx=None, ny=None, nz=None,
llc_method="smallchunks"):
"""Open MITgcm-style mds (.data / .meta) file output as xarray datset.

Parameters
Expand Down Expand Up @@ -80,6 +81,15 @@ def open_mdsdataset(data_dir, grid_dir=None,
The numerical dimensions of the model. These will be inferred from
``XC.meta`` and ``RC.meta`` if they are not specified. If
``geometry==llc``, ``ny`` does not have to specified.
llc_method : {"smallchunks", "bigchunks"}, optional
Which routine to use for reading LLC data. "smallchunks" splits the file
into a individual dask chunk of size (nx x nx) for each face of each
level (i.e. the total number of chunks is 13 * nz). "bigchunks" loads
the whole raw data file (either into memory or as a numpy.memmap),
splits it into faces, and concatenates those faces together using
``dask.array.concatenate``. The different methods will have different
memory and i/o performance depending on the details of the system
configuration.

Returns
-------
Expand Down Expand Up @@ -150,9 +160,8 @@ def open_mdsdataset(data_dir, grid_dir=None,
endian=endian, chunks=chunks,
ignore_unknown_vars=ignore_unknown_vars,
default_dtype=default_dtype,
nx=nx, ny=ny, nz=nz)
nx=nx, ny=ny, nz=nz, llc_method=llc_method)
datasets = [open_mdsdataset(

data_dir, iters=iternum, read_grid=False, **kwargs)
for iternum in iters]
# now add the grid
Expand All @@ -177,9 +186,9 @@ def open_mdsdataset(data_dir, grid_dir=None,
geometry, endian,
ignore_unknown_vars=ignore_unknown_vars,
default_dtype=default_dtype,
nx=nx, ny=ny, nz=nz)
nx=nx, ny=ny, nz=nz, llc_method=llc_method)
ds = xr.Dataset.load_store(store)

if swap_dims:
ds = _swap_dimensions(ds, geometry)
if grid_vars_to_coords:
Expand Down Expand Up @@ -255,7 +264,7 @@ def __init__(self, data_dir, grid_dir=None,
geometry='sphericalpolar',
endian='>', ignore_unknown_vars=False,
default_dtype=np.dtype('f4'),
nx=None, ny=None, nz=None):
nx=None, ny=None, nz=None, llc_method="smallchunks"):
"""
This is not a user-facing class. See open_mdsdataset for argument
documentation. The only ones which are distinct are.
Expand Down Expand Up @@ -322,6 +331,7 @@ def __init__(self, data_dir, grid_dir=None,
nyraw = self.ny
self.default_shape_3D = (self.nz, nyraw, self.nx)
self.default_shape_2D = (nyraw, self.nx)
self.llc_method=llc_method

# Now set up the corresponding coordinates.
# Rather than assuming the dimension names, we use Comodo conventions
Expand Down Expand Up @@ -408,21 +418,21 @@ def __init__(self, data_dir, grid_dir=None,
prefixes = []
if read_grid:
prefixes = prefixes + list(self._all_grid_variables.keys())

# add data files
prefixes = (prefixes +
_get_all_matching_prefixes(
data_dir,
iternum,
file_prefixes))

for p in prefixes:
# use a generator to loop through the variables in each file
for (vname, dims, data, attrs) in self.load_from_prefix(p, iternum):
# print(vname, dims, data.shape)
#Sizes of grid variables can vary between mitgcm versions. Check for
#such inconsistency and correct if so
(vname, dims, data, attrs) = self.fix_inconsistent_variables(vname, dims, data, attrs)
(vname, dims, data, attrs) = self.fix_inconsistent_variables(vname, dims, data, attrs)

thisvar = xr.Variable(dims, data, attrs)
self._variables[vname] = thisvar
Expand Down Expand Up @@ -479,7 +489,7 @@ def load_from_prefix(self, prefix, iternum=None):
basename = os.path.join(ddir, fname_base)
try:
vardata = read_mds(basename, iternum, endian=self.endian,
llc=self.llc)
llc=self.llc, llc_method=self.llc_method)
except IOError as ioe:
# that might have failed because there was no meta file present
# we can try to get around this by specifying the shape and dtype
Expand Down
34 changes: 23 additions & 11 deletions xmitgcm/test/test_mds_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,29 +294,34 @@ def test_read_mds_no_meta(all_mds_datadirs):
assert isinstance(res[prefix], dask.array.core.Array)
assert res[prefix].shape == shape


def test_read_raw_data_llc(llc_mds_datadirs):
@pytest.mark.parametrize("method", ["smallchunks", "bigchunks"])
def test_read_raw_data_llc(llc_mds_datadirs, method):
dirname, expected = llc_mds_datadirs

from xmitgcm.llc_utils import read_3d_llc_data
from xmitgcm.utils import read_3d_llc_data

shape = expected['shape']
nz, nface, ny, nx = shape
# the function will also return a nrecs dimension
nrecs = 1
shape = (nrecs,) + shape

dtype = expected['dtype'].newbyteorder('>')

# if we use memmap=True, we open too many files
kwargs = dict(method=method, dtype=dtype, memmap=False)

fname = os.path.join(dirname, 'T.%010d.data' % expected['test_iternum'])
data = read_3d_llc_data(fname, nz, nx, dtype=dtype, memmap=False)
data = read_3d_llc_data(fname, nz, nx, **kwargs)
assert data.shape == shape
assert data.compute().shape == shape

fname = os.path.join(dirname, 'XC.data')
data = read_3d_llc_data(fname, 1, nx, dtype=dtype, memmap=False)
# make sure the first dimension is squeezed off
assert data.shape == shape[1:]
assert data.compute().shape == shape[1:]
data = read_3d_llc_data(fname, 1, nx, **kwargs)
# the z dimension is squeezed out by MDS, so the function matches that behavior
shape_2d = (shape[0],) + shape[2:]
assert data.shape == shape_2d
assert data.compute().shape == shape_2d

#########################################################
### Below are all tests that actually create datasets ###
Expand Down Expand Up @@ -490,7 +495,7 @@ def test_swap_dims(all_mds_datadirs):
print(ds)
assert 'XC' in ds['S'].dims
assert 'YC' in ds['S'].dims



def test_prefixes(all_mds_datadirs):
Expand Down Expand Up @@ -652,17 +657,24 @@ def test_layers_diagnostics(layers_mds_datadirs):
assert var in ds
assert ds[var].dims == dims

def test_llc_dims(llc_mds_datadirs):
@pytest.mark.parametrize("method", ["smallchunks", "bigchunks"])
def test_llc_dims(llc_mds_datadirs, method):
"""Check that the LLC file dimensions are correct."""
dirname, expected = llc_mds_datadirs
ds = xmitgcm.open_mdsdataset(dirname,
iters=expected['test_iternum'],
geometry=expected['geometry'])
geometry=expected['geometry'], llc_method=method)

nz, nface, ny, nx = expected['shape']
nt = 1

assert ds.dims['face'] == 13
assert ds.rA.dims == ('face', 'j', 'i')
assert ds.rA.values.shape == (nface, ny, nx)
assert ds.U.dims == ('time', 'k', 'face', 'j', 'i_g')
assert ds.U.values.shape == (nt, nz, nface, ny, nx)
assert ds.V.dims == ('time', 'k', 'face', 'j_g', 'i')
assert ds.V.values.shape == (nt, nz, nface, ny, nx)

def test_drc_length(all_mds_datadirs):
"""Test that open_mdsdataset is adding an extra level to drC if it has length nr"""
Expand Down
Loading