MITgcm · rabernat · Jan 17, 2017 · Jan 16, 2017 · Jan 16, 2017 · Jan 16, 2017
diff --git a/xmitgcm/llc_utils.py b/xmitgcm/llc_utils.py
diff --git a/xmitgcm/mds_store.py b/xmitgcm/mds_store.py
@@ -35,7 +35,8 @@ def open_mdsdataset(data_dir, grid_dir=None,
                     grid_vars_to_coords=True, swap_dims=None,
                     endian=">", chunks=None,
                     ignore_unknown_vars=False, default_dtype=None,
-                    nx=None, ny=None, nz=None):
+                    nx=None, ny=None, nz=None,
+                    llc_method="smallchunks"):
     """Open MITgcm-style mds (.data / .meta) file output as xarray datset.
 
     Parameters
@@ -80,6 +81,15 @@ def open_mdsdataset(data_dir, grid_dir=None,
         The numerical dimensions of the model. These will be inferred from
         ``XC.meta`` and ``RC.meta`` if they are not specified. If
         ``geometry==llc``, ``ny`` does not have to specified.
+    llc_method : {"smallchunks", "bigchunks"}, optional
+        Which routine to use for reading LLC data. "smallchunks" splits the file
+        into a individual dask chunk of size (nx x nx) for each face of each
+        level (i.e. the total number of chunks is 13 * nz). "bigchunks" loads
+        the whole raw data file (either into memory or as a numpy.memmap),
+        splits it into faces, and concatenates those faces together using
+        ``dask.array.concatenate``. The different methods will have different
+        memory and i/o performance depending on the details of the system
+        configuration.
 
     Returns
     -------
@@ -150,9 +160,8 @@ def open_mdsdataset(data_dir, grid_dir=None,
                     endian=endian, chunks=chunks,
                     ignore_unknown_vars=ignore_unknown_vars,
                     default_dtype=default_dtype,
-                    nx=nx, ny=ny, nz=nz)
+                    nx=nx, ny=ny, nz=nz, llc_method=llc_method)
                 datasets = [open_mdsdataset(
-
                         data_dir, iters=iternum, read_grid=False, **kwargs)
                     for iternum in iters]
                 # now add the grid
@@ -177,9 +186,9 @@ def open_mdsdataset(data_dir, grid_dir=None,
                           geometry, endian,
                           ignore_unknown_vars=ignore_unknown_vars,
                           default_dtype=default_dtype,
-                          nx=nx, ny=ny, nz=nz)
+                          nx=nx, ny=ny, nz=nz, llc_method=llc_method)
     ds = xr.Dataset.load_store(store)
-    
+
     if swap_dims:
         ds = _swap_dimensions(ds, geometry)
     if grid_vars_to_coords:
@@ -255,7 +264,7 @@ def __init__(self, data_dir, grid_dir=None,
                  geometry='sphericalpolar',
                  endian='>', ignore_unknown_vars=False,
                  default_dtype=np.dtype('f4'),
-                 nx=None, ny=None, nz=None):
+                 nx=None, ny=None, nz=None, llc_method="smallchunks"):
         """
         This is not a user-facing class. See open_mdsdataset for argument
         documentation. The only ones which are distinct are.
@@ -322,6 +331,7 @@ def __init__(self, data_dir, grid_dir=None,
             nyraw = self.ny
         self.default_shape_3D = (self.nz, nyraw, self.nx)
         self.default_shape_2D = (nyraw, self.nx)
+        self.llc_method=llc_method
 
         # Now set up the corresponding coordinates.
         # Rather than assuming the dimension names, we use Comodo conventions
@@ -408,21 +418,21 @@ def __init__(self, data_dir, grid_dir=None,
         prefixes = []
         if read_grid:
             prefixes = prefixes + list(self._all_grid_variables.keys())
-            
+
         # add data files
         prefixes = (prefixes +
                     _get_all_matching_prefixes(
                                                data_dir,
                                                iternum,
                                                file_prefixes))
-    
+
         for p in prefixes:
             # use a generator to loop through the variables in each file
             for (vname, dims, data, attrs) in self.load_from_prefix(p, iternum):
                 # print(vname, dims, data.shape)
                 #Sizes of grid variables can vary between mitgcm versions. Check for
                 #such inconsistency and correct if so
-                (vname, dims, data, attrs) = self.fix_inconsistent_variables(vname, dims, data, attrs) 
+                (vname, dims, data, attrs) = self.fix_inconsistent_variables(vname, dims, data, attrs)
 
                 thisvar = xr.Variable(dims, data, attrs)
                 self._variables[vname] = thisvar
@@ -479,7 +489,7 @@ def load_from_prefix(self, prefix, iternum=None):
         basename = os.path.join(ddir, fname_base)
         try:
             vardata = read_mds(basename, iternum, endian=self.endian,
-                               llc=self.llc)
+                               llc=self.llc, llc_method=self.llc_method)
         except IOError as ioe:
             # that might have failed because there was no meta file present
             # we can try to get around this by specifying the shape and dtype

diff --git a/xmitgcm/test/test_mds_store.py b/xmitgcm/test/test_mds_store.py
@@ -294,29 +294,34 @@ def test_read_mds_no_meta(all_mds_datadirs):
             assert isinstance(res[prefix], dask.array.core.Array)
             assert res[prefix].shape == shape
 
-
-def test_read_raw_data_llc(llc_mds_datadirs):
+@pytest.mark.parametrize("method", ["smallchunks", "bigchunks"])
+def test_read_raw_data_llc(llc_mds_datadirs, method):
     dirname, expected = llc_mds_datadirs
 
-    from xmitgcm.llc_utils import read_3d_llc_data
+    from xmitgcm.utils import read_3d_llc_data
 
     shape = expected['shape']
     nz, nface, ny, nx = shape
+    # the function will also return a nrecs dimension
+    nrecs = 1
+    shape = (nrecs,) + shape
 
     dtype = expected['dtype'].newbyteorder('>')
 
     # if we use memmap=True, we open too many files
+    kwargs = dict(method=method, dtype=dtype, memmap=False)
 
     fname = os.path.join(dirname, 'T.%010d.data' % expected['test_iternum'])
-    data = read_3d_llc_data(fname, nz, nx, dtype=dtype, memmap=False)
+    data = read_3d_llc_data(fname, nz, nx, **kwargs)
     assert data.shape == shape
     assert data.compute().shape == shape
 
     fname = os.path.join(dirname, 'XC.data')
-    data = read_3d_llc_data(fname, 1, nx, dtype=dtype, memmap=False)
-    # make sure the first dimension is squeezed off
-    assert data.shape == shape[1:]
-    assert data.compute().shape == shape[1:]
+    data = read_3d_llc_data(fname, 1, nx, **kwargs)
+    # the z dimension is squeezed out by MDS, so the function matches that behavior
+    shape_2d = (shape[0],) + shape[2:]
+    assert data.shape == shape_2d
+    assert data.compute().shape == shape_2d
 
 #########################################################
 ### Below are all tests that actually create datasets ###
@@ -490,7 +495,7 @@ def test_swap_dims(all_mds_datadirs):
         print(ds)
         assert 'XC' in ds['S'].dims
         assert 'YC' in ds['S'].dims
- 
+
 
 
 def test_prefixes(all_mds_datadirs):
@@ -652,17 +657,24 @@ def test_layers_diagnostics(layers_mds_datadirs):
         assert var in ds
         assert ds[var].dims == dims
 
-def test_llc_dims(llc_mds_datadirs):
+@pytest.mark.parametrize("method", ["smallchunks", "bigchunks"])
+def test_llc_dims(llc_mds_datadirs, method):
     """Check that the LLC file dimensions are correct."""
     dirname, expected = llc_mds_datadirs
     ds = xmitgcm.open_mdsdataset(dirname,
                             iters=expected['test_iternum'],
-                            geometry=expected['geometry'])
+                            geometry=expected['geometry'], llc_method=method)
+
+    nz, nface, ny, nx = expected['shape']
+    nt = 1
 
     assert ds.dims['face'] == 13
     assert ds.rA.dims == ('face', 'j', 'i')
+    assert ds.rA.values.shape == (nface, ny, nx)
     assert ds.U.dims == ('time', 'k', 'face', 'j', 'i_g')
+    assert ds.U.values.shape == (nt, nz, nface, ny, nx)
     assert ds.V.dims == ('time', 'k', 'face', 'j_g', 'i')
+    assert ds.V.values.shape == (nt, nz, nface, ny, nx)
 
 def test_drc_length(all_mds_datadirs):
     """Test that open_mdsdataset is adding an extra level to drC if it has length nr"""