From b14e19db11f925b72c3ec6956d046b47c899b21c Mon Sep 17 00:00:00 2001
From: Ed Safford <62339196+EdwardSafford-NOAA@users.noreply.github.com>
Date: Tue, 2 Jan 2024 14:36:13 -0500
Subject: [PATCH] Extend eva to support legacy ConMon time/vert data (#169)

* Ref #167

Add support for batch processing on DataType/dtype.

* Ref #167

Fix datatype use with station data.

* Ref #167

Add update for conmon vert plots.

* Ref #167

Fix typo.
---
 src/eva/data/data_collections.py              |  31 +++-
 src/eva/data/mon_data_space.py                | 155 +++++++++++++-----
 .../batch/base/diagnostics/line_plot.py       |  13 +-
 .../batch/base/plot_tools/dynamic_config.py   |   5 +-
 .../batch/base/plot_tools/figure_driver.py    |   9 +-
 .../batch/emcpy/diagnostics/line_plot.py      |  13 +-
 6 files changed, 165 insertions(+), 61 deletions(-)

diff --git a/src/eva/data/data_collections.py b/src/eva/data/data_collections.py
index 28326ef5..514f9e77 100644
--- a/src/eva/data/data_collections.py
+++ b/src/eva/data/data_collections.py
@@ -170,7 +170,7 @@ def add_variable_to_collection(self, collection_name, group_name, variable_name,
     # ----------------------------------------------------------------------------------------------
 
     def get_variable_data_array(self, collection_name, group_name, variable_name,
-                                channels=None, levels=None):
+                                channels=None, levels=None, datatypes=None):
 
         """
         Retrieve a specific variable (as a DataArray) from a collection.
@@ -181,18 +181,21 @@ def get_variable_data_array(self, collection_name, group_name, variable_name,
             variable_name (str): Name of the variable.
             channels (int or list[int]): Indices of channels to select (optional).
             levels (int or list[int]): Indices of levels to select (optional).
+            datatypes (str or list[str]): Indices of data types to select (optional).
 
         Returns:
             DataArray: The selected variable as an xarray DataArray.
 
         Raises:
-            ValueError: If channels are provided but the 'Channel' dimension is missing.
+            ValueError: If channels, levels, or datatypes are provided but the
+                        corresponding 'Channel', 'Level', or 'DataType' dimension
+                        is missing.
         """
 
         group_variable_name = group_name + '::' + variable_name
         data_array = self._collections[collection_name][group_variable_name]
 
-        if channels is None and levels is None:
+        if channels is None and levels is None and datatypes is None:
             return data_array
 
         if channels is not None:
@@ -201,6 +204,7 @@ def get_variable_data_array(self, collection_name, group_name, variable_name,
                 if 'Channel' not in list(self._collections[collection_name].dims):
                     self.logger.abort(f'In get_variable_data_array channels is provided but ' +
                                       f'Channel is not a dimension in Dataset')
+
                 # Make sure it is a list
                 channels_sel = []
                 channels_sel.append(channels)
@@ -229,10 +233,26 @@ def get_variable_data_array(self, collection_name, group_name, variable_name,
                 self.logger.abort('In get_variable_data_array levels is neither none ' +
                                   'nor a list of integers')
 
+        elif datatypes is not None:
+            if isinstance(datatypes, str) or not any(not isinstance(dt, str) for dt in datatypes):
+                # DataType must be a dimension if it will be used for selection
+                if 'DataType' not in list(self._collections[collection_name].dims):
+                    self.logger.abort(f'In get_variable_data_array levels is provided but ' +
+                                      f'DataType is not a dimension in Dataset')
+                datatypes_sel = []
+                datatypes_sel.append(datatypes)
+
+                # Create a new DataArray with the requested datatypes
+                data_array_dtypes = data_array.sel(DataType=datatypes_sel)
+                return data_array_dtypes
+            else:
+                self.logger.abort('In get_variable_data_array datatype is neither none ' +
+                                  'nor a list of strings')
+
     # ----------------------------------------------------------------------------------------------
 
     def get_variable_data(self, collection_name, group_name, variable_name,
-                          channels=None, levels=None):
+                          channels=None, levels=None, datatypes=None):
 
         """
         Retrieve the data of a specific variable from a collection.
@@ -243,13 +263,14 @@ def get_variable_data(self, collection_name, group_name, variable_name,
             variable_name (str): Name of the variable.
             channels (int or list[int]): Indices of channels to select (optional).
             levels (int or list[int]): Indices of levels to select (optional).
+            datatypes (str or list[str]): Indices of data types to select (optional).
 
         Returns:
             ndarray: The selected variable data as a NumPy array.
         """
 
         variable_array = self.get_variable_data_array(collection_name, group_name, variable_name,
-                                                      channels, levels)
+                                                      channels, levels, datatypes)
 
         # Extract the actual data array
         variable_data = variable_array.data
diff --git a/src/eva/data/mon_data_space.py b/src/eva/data/mon_data_space.py
index 3a393b61..51f1ffad 100644
--- a/src/eva/data/mon_data_space.py
+++ b/src/eva/data/mon_data_space.py
@@ -30,6 +30,15 @@ class MonDataSpace(EvaDatasetBase):
     A class for handling MonDataSpace dataset configuration and processing.
     """
 
+    # index values for specific control files
+    level_iuse_ozn = 7
+    channel_iuse_rad = 7
+    channel_num_rad = 4
+    type_con = 1
+    datatype_con = 5
+    subtype_con = 7
+    regions_rad_ang = 5
+
     def execute(self, dataset_config, data_collections, timing):
 
         """
@@ -57,13 +66,13 @@ def execute(self, dataset_config, data_collections, timing):
 
         dims_arr = []
         if self.is_stn_data(control_file[0]):
-            coords, dims, attribs, nvars, vars, scanpo, levs_dict, chans_dict = (
-                                                     self.get_stn_ctl_dict(control_file[0]))
+            coords, dims, attribs, nvars, vars, scanpo, levs_dict, chans_dict, datatype_dict = (
+                                                        self.get_stn_ctl_dict(control_file[0]))
             ndims_used = 2
             dims_arr = ['xdef', 'ydef', 'zdef']
             stn_data = True
         else:
-            coords, dims, attribs, nvars, vars, scanpo, levs_dict, chans_dict = (
+            coords, dims, attribs, nvars, vars, scanpo, levs_dict, chans_dict, datatype_dict = (
                                                           self.get_ctl_dict(control_file[0]))
             ndims_used, dims_arr = self.get_ndims_used(dims)
 
@@ -90,7 +99,8 @@ def execute(self, dataset_config, data_collections, timing):
         # Set coordinate ranges
         # ---------------------
         channo = chans_dict["chan_nums"] if chans_dict is not None else None
-        x_range, y_range, z_range = self.get_dim_ranges(coords, dims, channo)
+        datatypes = datatype_dict["datatype"] if datatype_dict is not None else None
+        x_range, y_range, z_range = self.get_dim_ranges(coords, dims, channo, datatypes)
 
         # Get missing value threshold
         # ---------------------------
@@ -121,7 +131,7 @@ def execute(self, dataset_config, data_collections, timing):
             # create dataset from file contents
             timestep_ds = None
             timestep_ds = self.load_dset(vars, nvars, coords, darr, dims, ndims_used,
-                                         dims_arr, x_range, y_range, z_range, channo, cyc_darr)
+                                         dims_arr, x_range, y_range, z_range, cyc_darr)
 
             if attribs['sat']:
                 timestep_ds.attrs['satellite'] = attribs['sat']
@@ -165,7 +175,8 @@ def execute(self, dataset_config, data_collections, timing):
             # Conditionally add channel, level, scan, and iteration related variables
             # -----------------------------------------------------------------------
             iterations = x_range if 'Iteration' in coords.values() else None
-            ds = self.loadConditionalItems(ds, chans_dict, levs_dict, scanpo, iterations)
+            ds = self.loadConditionalItems(ds, chans_dict, levs_dict,
+                                           datatype_dict, scanpo, iterations)
 
             # Rename variables with group
             rename_dict = {}
@@ -299,13 +310,14 @@ def get_ctl_dict(self, control_file):
             list: List of scan positions.
             dict: Dictionary containing channel information.
             dict: Dictionary containing level information.
+            dict: Dictionary containing datatype information.
         """
 
         coords = {'xdef': None, 'ydef': None, 'zdef': None}
         coord_list = []
         dims = {'xdef': 0, 'ydef': 0, 'zdef': 0}
         dim_list = []
-        attribs = {'sensor': None, 'sat': None, 'dtype': None}
+        attribs = {'sensor': None, 'sat': None, 'datatype': None}
         vars = []
         nvars = 0
         chans_dict = None
@@ -318,13 +330,18 @@ def get_ctl_dict(self, control_file):
         levs = []
         level_assim = []
         level_nassim = []
+        datatype_dict = None
+        datatype = []
+        datatype_assim = []
 
         coord_dict = {
                      'channel': 'Channel',
                      'scan': 'Scan',
                      'pressure': 'Level',
+                     'vertical': 'Level',
                      'region': 'Region',
-                     'iter': 'Iteration'
+                     'iter': 'Iteration',
+                     'data type': 'DataType'
         }
 
         with open(control_file, 'r') as fp:
@@ -334,6 +351,7 @@ def get_ctl_dict(self, control_file):
                 # Locate the coordinates using coord_dict.  There will be 1-3
                 # coordinates specified as XDEF, YDEF, and ZDEF.
                 for item in list(coord_dict.keys()):
+
                     if 'DEF' in line and item in line:
                         coord_list.append(coord_dict[item])
 
@@ -375,8 +393,14 @@ def get_ctl_dict(self, control_file):
                         attribs['sensor'] = strs[1]
                         attribs['sat'] = strs[2]
 
-                if line.find('dtype station') != -1 or line.find('DTYPE station') != -1:
-                    attribs['dtype'] = 'station'
+                if line.find('datatype station') != -1 or line.find('DTYPE station') != -1:
+                    attribs['datatype'] = 'station'
+
+                if line.find('subtype') != -1:
+                    strs = line.split()
+                    datatype.append(strs[self.type_con] + strs[self.datatype_con] + '_' +
+                                    strs[self.subtype_con])
+                    datatype_assim.append(strs[9])
 
                 # Note we need to extract the actual channel numbers.  We have the
                 # number of channels via the xdef line, but they are not necessarily
@@ -388,11 +412,11 @@ def get_ctl_dict(self, control_file):
                 if line.find('channel=') != -1:
                     strs = line.split()
                     if strs[4].isdigit():
-                        channo.append(int(strs[4]))
-                    if strs[7] == '1':
-                        chan_assim.append(int(strs[4]))
-                    if strs[7] == '-1':
-                        chan_nassim.append(int(strs[4]))
+                        channo.append(int(strs[self.channel_num_rad]))
+                    if strs[self.channel_iuse_rad] == '1':
+                        chan_assim.append(int(strs[self.channel_num_rad]))
+                    if strs[self.channel_iuse_rad] == '-1':
+                        chan_nassim.append(int(strs[self.channel_num_rad]))
 
                 if line.find('level=') != -1:
 
@@ -400,17 +424,23 @@ def get_ctl_dict(self, control_file):
                     tlev = strs[2].replace(',', '')
                     if tlev.isdigit():
                         levs.append(int(tlev))
-                    if strs[7] == '1':
-                        level_assim.append(int(tlev))
-                    if strs[7] == '-1':
-                        level_nassim.append(int(tlev))
+
+                    # Ozn data control files include the assim flag on the Level definition
+                    # lines.  Con data control files use level but assim is included on the
+                    # datatype line, not Level
+                    #
+                    if len(strs) >= self.level_iuse_ozn:
+                        if strs[self.level_iuse_ozn] == '1':
+                            level_assim.append(int(tlev))
+                        if strs[self.level_iuse_ozn] == '-1':
+                            level_nassim.append(int(tlev))
 
             # The list of variables is at the end of the file between the lines
             # "vars" and "end vars".
             start = len(lines) - (nvars + 1)
             for x in range(start, start + nvars):
                 strs = lines[x].split()
-                vars.append(strs[-1])
+                vars.append(strs[0])
 
             # Ignore any coordinates in the control file that have a value of 1.
             used = 0
@@ -442,15 +472,22 @@ def get_ctl_dict(self, control_file):
                               'chans_nassim': chan_nassim}
 
             if 'Level' in coords.values():
-                for x in range(len(level_assim), len(levs)):
-                    level_assim.append(0)
-                for x in range(len(level_nassim), len(levs)):
-                    level_nassim.append(0)
-                levs_dict = {'levels': levs,
-                             'levels_assim': level_assim,
-                             'levels_nassim': level_nassim}
+                levs_dict = {'levels': levs}
+
+                if len(level_assim) > 0 or len(level_nassim) > 0:
+                    for x in range(len(level_assim), len(levs)):
+                        level_assim.append(0)
+                    for x in range(len(level_nassim), len(levs)):
+                        level_nassim.append(0)
+                    levs_dict['levels_assim'] = level_assim
+                    levs_dict['levels_nassim'] = level_nassim
+
+            if 'DataType' in coords.values():
+                datatype_dict = {'datatype': datatype,
+                                 'assim': datatype_assim}
+
             fp.close()
-        return coords, dims, attribs, nvars, vars, scanpo, levs_dict, chans_dict
+        return coords, dims, attribs, nvars, vars, scanpo, levs_dict, chans_dict, datatype_dict
 
     # ----------------------------------------------------------------------------------------------
 
@@ -471,12 +508,13 @@ def get_stn_ctl_dict(self, control_file):
             list: List of scan positions.
             dict: Dictionary containing channel information.
             dict: Dictionary containing level information.
+            dict: Dictionary containing datatype information.
         """
 
         coords = {'xdef': 'Level', 'ydef': 'Nobs', 'zdef': None}
         dims = {'xdef': 0, 'ydef': 0, 'zdef': 0}
         dim_list = []
-        attribs = {'sensor': None, 'sat': None, 'dtype': 'station'}
+        attribs = {'sensor': None, 'sat': None, 'datatype': 'station'}
         vars = []
         nvars = 0
         chans_dict = None
@@ -486,13 +524,17 @@ def get_stn_ctl_dict(self, control_file):
         lev_vals = []
         level_assim = []
         level_nassim = []
+        datatype_dict = None
+        datatype = []
+        datatype_assim = []
 
         coord_dict = {
                      'channel': 'Channel',
                      'scan': 'Scan',
                      'pressure': 'Level',
                      'region': 'Region',
-                     'iter': 'Iteration'
+                     'iter': 'Iteration',
+                     'data type': 'DataType'
         }
 
         with open(control_file, 'r') as fp:
@@ -554,8 +596,12 @@ def get_stn_ctl_dict(self, control_file):
                              'levels_nassim': level_nassim}
                 dims['xdef'] = len(levs)
 
+            if 'DataType' in coords.values():
+                datatype_dict = {'datatype': datatype,
+                                 'assim': datatype_assim}
+
         fp.close()
-        return coords, dims, attribs, nvars, vars, scanpo, levs_dict, chans_dict
+        return coords, dims, attribs, nvars, vars, scanpo, levs_dict, chans_dict, datatype_dict
 
     # ----------------------------------------------------------------------------------------------
 
@@ -598,7 +644,7 @@ def read_ieee(self, file_name, coords, dims, ndims_used, dims_arr, nvars, vars,
             load_data = False
 
         if ndims_used == 1:		# MinMon
-            rtn_array = np.empty((0, dims[dims_arr[0]]), dtype=float)
+            rtn_array = np.empty((0, dims[dims_arr[0]]), datatype=float)
             if not load_data:
                 zarray = np.zeros((dims[dims_arr[0]]), float)
             dimensions = [dims[dims_arr[0]]]
@@ -609,7 +655,7 @@ def read_ieee(self, file_name, coords, dims, ndims_used, dims_arr, nvars, vars,
                 zarray = np.zeros((dims[dims_arr[0]], dims[dims_arr[1]]), float)
             dimensions = [dims[dims_arr[1]], dims[dims_arr[0]]]
 
-        if ndims_used == 3:		# RadMon angle
+        if ndims_used == 3:		# RadMon angle, ConMon time/vert
             rtn_array = np.empty((0, dims[dims_arr[0]], dims[dims_arr[1]],
                                   dims[dims_arr[2]]), float)
             zarray = np.zeros((dims[dims_arr[0]], dims[dims_arr[1]],
@@ -627,7 +673,7 @@ def read_ieee(self, file_name, coords, dims, ndims_used, dims_arr, nvars, vars,
                         tarr = zarray
                     else:
                         mylist = []
-                        for z in range(5):
+                        for z in range(dims['zdef']):
                             arr = f.read_reals(dtype=np.dtype('>f4')).reshape(dims['ydef'],
                                                                               dims['xdef'])
                             mylist.append(np.transpose(arr))
@@ -635,6 +681,7 @@ def read_ieee(self, file_name, coords, dims, ndims_used, dims_arr, nvars, vars,
 
                 else:
                     tarr = zarray
+
                 rtn_array = np.append(rtn_array, [tarr], axis=0)
 
         else:		# ndims_used == 1|2
@@ -768,7 +815,7 @@ def var_to_np_array(self, dims, ndims_used, dims_arr, var):
 
     # ----------------------------------------------------------------------------------------------
 
-    def get_dim_ranges(self, coords, dims, channo):
+    def get_dim_ranges(self, coords, dims, channo, datatypes):
 
         """
         Get the valid ranges for each dimension based on the specified coordinates and channel
@@ -778,6 +825,7 @@ def get_dim_ranges(self, coords, dims, channo):
             coords (dict): Dictionary of coordinates.
             dims (dict): Dictionary of dimension sizes.
             channo (list): List of channel numbers.
+            datatypes (list): List of data types.
 
         Returns:
             numpy.ndarray or None: Valid x coordinate range or None.
@@ -795,7 +843,12 @@ def get_dim_ranges(self, coords, dims, channo):
         # - The z coordinate is never used for channel.
 
         if dims['xdef'] > 1:
-            x_range = channo if coords['xdef'] == 'Channel' else np.arange(1, dims['xdef']+1)
+            if coords['xdef'] == 'Channel':
+                x_range = channo
+            elif coords['xdef'] == 'DataType':
+                x_range = datatypes
+            else:
+                x_range = np.arange(1, dims['xdef']+1)
 
         if dims['ydef'] > 1:
             y_range = channo if coords['ydef'] == 'Channel' else np.arange(1, dims['ydef']+1)
@@ -842,7 +895,7 @@ def get_ndims_used(self, dims):
     # ----------------------------------------------------------------------------------------------
 
     def load_dset(self, vars, nvars, coords, darr, dims, ndims_used,
-                  dims_arr, x_range, y_range, z_range, channo, cyc_darr=None):
+                  dims_arr, x_range, y_range, z_range, cyc_darr=None):
 
         """
         Create a dataset from various components.
@@ -859,7 +912,6 @@ def load_dset(self, vars, nvars, coords, darr, dims, ndims_used,
             y_range (numpy.ndarray or None): Valid y coordinate range.
             z_range (numpy.ndarray or None): Valid z coordinate range.
             cyc_darr (numpy.ndarray): Numpy array of cycle data.
-            channo (list): List of channel numbers.
 
         Returns:
             xarray.Dataset: Created dataset.
@@ -904,9 +956,6 @@ def load_dset(self, vars, nvars, coords, darr, dims, ndims_used,
                     coords[dims_arr[2]]: z_range
                 }
 
-            if 'Channel' in coords.values():
-                d.update({"Channel": {"dims": ("Channel"), "data": channo}})
-
             new_ds = Dataset.from_dict(d)
             rtn_ds = new_ds if rtn_ds is None else rtn_ds.merge(new_ds)
 
@@ -948,7 +997,8 @@ def load_dset(self, vars, nvars, coords, darr, dims, ndims_used,
 
     # ----------------------------------------------------------------------------------------------
 
-    def loadConditionalItems(self, dataset, chans_dict, levs_dict, scanpo, iterations=None):
+    def loadConditionalItems(self, dataset, chans_dict, levs_dict, datatype_dict,
+                             scanpo, iterations=None):
 
         """
         Add channel, level, and scan related variables to the dataset.
@@ -973,13 +1023,28 @@ def loadConditionalItems(self, dataset, chans_dict, levs_dict, scanpo, iteration
                 dataset['chan_nassim'] = (['Channel'], chans_dict["chans_nassim"])
 
         if levs_dict is not None:
-            dataset['level'] = (['Level'], levs_dict["levels"])
+
+            # If datatype_dict is available then level needs to include that dimension
+            # for potential batch processing by DataType (conmon vert plots).
+            if datatype_dict is not None:
+                combined_list = []
+                for x in datatype_dict['datatype']:
+                    combined_list.append(levs_dict['levels'])
+                dataset['level'] = (['DataType', 'Level'], combined_list)
+            else:
+                dataset['level'] = (['Level'], levs_dict["levels"])
             dataset['level_yaxis_z'] = (['Level'], [0.0]*len(levs_dict["levels"]))
 
-            if len(levs_dict["levels_assim"]) > 0:
+            if 'levels_assim' in levs_dict:
                 dataset['level_assim'] = (['Level'], levs_dict["levels_assim"])
-            if len(levs_dict["levels_nassim"]) > 0:
+            if 'levels_nassim' in levs_dict:
                 dataset['level_nassim'] = (['Level'], levs_dict["levels_nassim"])
+            if 'levels_value' in levs_dict:
+                dataset['level_value'] = (['Level'], levs_dict["levels_value"])
+
+        if datatype_dict is not None:
+            dataset['datatype'] = (['DataType'], datatype_dict['datatype'])
+            dataset['datatype_assim'] = (['DataType'], datatype_dict['assim'])
 
         if scanpo is not None:
             nscan = dataset.dims.get('Scan')
diff --git a/src/eva/plotting/batch/base/diagnostics/line_plot.py b/src/eva/plotting/batch/base/diagnostics/line_plot.py
index 8b8c2d14..2d733783 100644
--- a/src/eva/plotting/batch/base/diagnostics/line_plot.py
+++ b/src/eva/plotting/batch/base/diagnostics/line_plot.py
@@ -58,16 +58,21 @@ def __init__(self, config, logger, dataobj):
             logger.abort('In Scatter comparison the first variable \'var1\' does not appear to ' +
                          'be in the required format of collection::group::variable.')
 
-        # Optionally get the channel|level to plot
+        # Optionally get the channel|level|datatype to plot
         channel = None
         if 'channel' in config:
             channel = config.get('channel')
         level = None
         if 'level' in config:
             level = config.get('level')
-
-        xdata = dataobj.get_variable_data(var0_cgv[0], var0_cgv[1], var0_cgv[2], channel, level)
-        ydata = dataobj.get_variable_data(var1_cgv[0], var1_cgv[1], var1_cgv[2], channel, level)
+        datatype = None
+        if 'datatype' in config:
+            datatype = config.get('datatype')
+
+        xdata = dataobj.get_variable_data(var0_cgv[0], var0_cgv[1],
+                                          var0_cgv[2], channel, level, datatype)
+        ydata = dataobj.get_variable_data(var1_cgv[0], var1_cgv[1],
+                                          var1_cgv[2], channel, level, datatype)
 
         # see if we need to slice data
         xdata = slice_var_from_str(config['x'], xdata, logger)
diff --git a/src/eva/plotting/batch/base/plot_tools/dynamic_config.py b/src/eva/plotting/batch/base/plot_tools/dynamic_config.py
index ffe24508..4df75fd2 100644
--- a/src/eva/plotting/batch/base/plot_tools/dynamic_config.py
+++ b/src/eva/plotting/batch/base/plot_tools/dynamic_config.py
@@ -62,15 +62,16 @@ def vminvmaxcmap(logger, option_dict, plots_dict, data_collections):
     # is kept
     percentage_capture = option_dict.get('percentage capture', 100)
 
-    # Optionally the data might have a channel or level.
+    # Optionally the data might have a channel, level, or datatype.
     channel = option_dict.get('channel', None)
     level = option_dict.get('level', None)
+    datatype = option_dict.get('datatype', None)
 
     # Get the data variable to use for determining cbar limits
     varname = option_dict.get('data variable')
     varname_cgv = varname.split('::')
     datavar = data_collections.get_variable_data(varname_cgv[0], varname_cgv[1],
-                                                 varname_cgv[2], channel, level)
+                                                 varname_cgv[2], channel, level, datatype)
 
     # Reorder the data array
     datavar = np.sort(datavar)
diff --git a/src/eva/plotting/batch/base/plot_tools/figure_driver.py b/src/eva/plotting/batch/base/plot_tools/figure_driver.py
index 28464d38..5a7d1bfc 100644
--- a/src/eva/plotting/batch/base/plot_tools/figure_driver.py
+++ b/src/eva/plotting/batch/base/plot_tools/figure_driver.py
@@ -98,7 +98,7 @@ def figure_driver(config, data_collections, timing, logger):
             step_var_name = 'channel'
             title_fill = ' Ch. '
 
-            # Get list of levels, conditionally override step variables
+            # Get list of levels, load step variables
             levels_str_or_list = batch_conf.get('levels', [])
             levels = parse_channel_list(levels_str_or_list, logger)
             if levels:
@@ -106,6 +106,13 @@ def figure_driver(config, data_collections, timing, logger):
                 step_var_name = 'level'
                 title_fill = ' Lev. '
 
+            # Get list of datatypes and load step variables
+            datatypes = batch_conf.get('datatypes', [])
+            if datatypes:
+                step_vars = datatypes
+                step_var_name = 'datatype'
+                title_fill = ' Dtype. '
+
             # Set some fake values to ensure the loops are entered
             if not variables:
                 logger.abort("Batch Figure must provide variables, even if with channels")
diff --git a/src/eva/plotting/batch/emcpy/diagnostics/line_plot.py b/src/eva/plotting/batch/emcpy/diagnostics/line_plot.py
index 1738cf8b..8c2f5175 100644
--- a/src/eva/plotting/batch/emcpy/diagnostics/line_plot.py
+++ b/src/eva/plotting/batch/emcpy/diagnostics/line_plot.py
@@ -58,16 +58,21 @@ def __init__(self, config, logger, dataobj):
             logger.abort('In Scatter comparison the first variable \'var1\' does not appear to ' +
                          'be in the required format of collection::group::variable.')
 
-        # Optionally get the channel|level to plot
+        # Optionally get the channel|level|datatype to plot
         channel = None
         if 'channel' in config:
             channel = config.get('channel')
         level = None
         if 'level' in config:
             level = config.get('level')
+        datatype = None
+        if 'datatype' in config:
+            datatype = config.get('datatype')
 
-        xdata = dataobj.get_variable_data(var0_cgv[0], var0_cgv[1], var0_cgv[2], channel, level)
-        ydata = dataobj.get_variable_data(var1_cgv[0], var1_cgv[1], var1_cgv[2], channel, level)
+        xdata = dataobj.get_variable_data(var0_cgv[0], var0_cgv[1], var0_cgv[2],
+                                          channel, level, datatype)
+        ydata = dataobj.get_variable_data(var1_cgv[0], var1_cgv[1], var1_cgv[2],
+                                          channel, level, datatype)
 
         # see if we need to slice data
         xdata = slice_var_from_str(config['x'], xdata, logger)
@@ -97,7 +102,7 @@ def __init__(self, config, logger, dataobj):
                                                          'batch', 'emcpy', 'defaults',
                                                          'line_plot.yaml'))
         config = get_schema(layer_schema, config, logger)
-        delvars = ['x', 'y', 'type', 'schema', 'channel', 'level']
+        delvars = ['x', 'y', 'type', 'schema', 'channel', 'level', 'datatype']
         for d in delvars:
             config.pop(d, None)
         self.plotobj = update_object(self.plotobj, config, logger)