icesat2py · JessicaS11 · Feb 17, 2023 · Nov 22, 2022 · Nov 22, 2022 · Nov 28, 2022
diff --git a/icepyx/core/read.py b/icepyx/core/read.py
@@ -54,6 +54,48 @@ def _make_np_datetime(df, keyword):
     return df
 
 
+# TODO: add tests, round out docs, and test for atl09 and atl06, for this new function!!
+def _get_track_type_str(grp_path):
+    """
+    Determine whether the product contains ground tracks, paths, or profiles and
+    parse the string/label the dimension accordingly.
+
+    Parameters
+    ----------
+    grp_path : str
+        The group path for the ground track, path, or profile.
+
+    Returns
+    -------
+    track_str : str
+       The string for the ground track, path, or profile of this group
+    spot_dim_name : str
+        What the dimension should be named in the dataset
+    """
+
+    import re
+
+    # TODO: This won't work for profile (e.g. atmos) data --> needs to be generalized!
+    if re.match(r"gt[1-3]['r','l']", grp_path):
+        track_str = re.match(r"gt[1-3]['r','l']", grp_path).group()
+        # spot = is2ref.gt2spot(track_str, is2ds.sc_orient.values[0])
+        # FIX THIS (line above)!!
+        spot_dim_name = "spot"
+        # add a test for the gt2spot function (called here)!
+
+    elif re.match(r"profile_[1-3]", grp_path):
+        track_str = re.match(r"profile_[1-3]", grp_path).group()
+        spot = int(track_str[-1])
+        spot_dim_name = "profile"
+
+    elif re.match(r"pt[1-3]", grp_path):
+        track_str = re.match(r"pt[1-3]", grp_path).group()
+        spot = int(track_str[-1])
+        spot_dim_name = "path"
+
+    return track_str, spot_dim_name
+
+
 # Dev note: function fully tested (except else, which don't know how to get to)
 def _check_datasource(filepath):
     """
@@ -395,35 +437,65 @@ def _add_vars_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict):
                 except NameError:
                     import random
 
-                    is2ds["gran_idx"] = [random.randint(900000, 999999)]
+                    is2ds["gran_idx"] = [random.randint(800000, 899998)]
                     warnings.warn("Your granule index is made up of random values.")
                     # You must include the orbit/cycle_number and orbit/rgt variables to generate
             except KeyError:
-                pass
+                # Added this when dealing with ATL11 - need to see if it breaks with other datasets
+                is2ds["gran_idx"] = [np.nanmax(is2ds["gran_idx"]) - 1]
+                # pass
 
             if hasattr(is2ds, "data_start_utc"):
                 is2ds = _make_np_datetime(is2ds, "data_start_utc")
                 is2ds = _make_np_datetime(is2ds, "data_end_utc")
 
         else:
-            import re
+            track_str, spot_dim_name = _get_track_type_str(grp_path)
 
-            gt_str = re.match(r"gt[1-3]['r','l']", grp_path).group()
-            spot = is2ref.gt2spot(gt_str, is2ds.sc_orient.values[0])
-            # add a test for the new function (called here)!
+            # get the spot number if relevant
+            if spot_dim_name == "spot":
+                spot = is2ref.gt2spot(track_str, is2ds.sc_orient.values[0])
+            else:
+                spot = track_str
 
             grp_spec_vars = [
                 k
                 for k, v in wanted_dict.items()
                 if any(f"{grp_path}/{k}" in x for x in v)
             ]
 
+            # NEXT TODO: handle the case where it's the second time through a 2d delta_time...
+
+            # handle delta_times with 1 or more dimensions
+            idx_range = range(0, len(ds.delta_time.data))
+            # if hasattr(is2ds, "photon_idx"):
+
+            #     # if is2ds already has a 2d photon idx/delta time AND the current delta time does too
+            #     if np.ndim(ds.delta_time.data) > 1: # and np.ndim(is2ds.photon_idx) > 1:
+            #         # repeat the range the number of times needed, then transpose to match the shape of the existing photon_idx
+            #         photon_ids = (
+            #         np.broadcast_to([*idx_range], (np.shape(ds.delta_time)[1], np.shape(ds.delta_time)[0])).transpose()
+            #         + np.full_like(
+            #             ds.delta_time, np.max(is2ds.photon_idx), dtype="int64"
+            #         )
+            #         + 1
+            #     )
+            #     # the original case, where delta_time is 2d but the existing photon_idx is 1d
+            #     else:
+            #         photon_ids = (
+            #             range(0, len(ds.delta_time.data))
+            #             + np.full_like(
+            #                 ds.delta_time, np.max(is2ds.photon_idx), dtype="int64"
+            #             )
+            #             + 1
+            #         )
+            # else:
+            #     photon_ids = range(0, len(ds.delta_time.data))
+
             try:
                 photon_ids = (
-                    range(0, len(ds.delta_time.data))
-                    + np.full_like(
-                        ds.delta_time, np.max(is2ds.photon_idx), dtype="int64"
-                    )
+                    idx_range
+                    + np.full_like(idx_range, np.max(is2ds.photon_idx), dtype="int64")
                     + 1
                 )
             except AttributeError:
@@ -432,16 +504,52 @@ def _add_vars_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict):
             hold_delta_times = ds.delta_time.data
             ds = (
                 ds.reset_coords(drop=False)
-                .expand_dims(dim=["spot", "gran_idx"])
+                .expand_dims(dim=[spot_dim_name, "gran_idx"])
                 .assign_coords(
-                    spot=("spot", [spot]), delta_time=("delta_time", photon_ids)
+                    {
+                        spot_dim_name: (spot_dim_name, [spot]),
+                        "delta_time": ("delta_time", photon_ids),
+                    }
                 )
-                .assign(gt=(("gran_idx", "spot"), [[gt_str]]))
+                .assign(gt=(("gran_idx", spot_dim_name), [[track_str]]))
                 .rename_dims({"delta_time": "photon_idx"})
                 .rename({"delta_time": "photon_idx"})
-                .assign_coords(delta_time=("photon_idx", hold_delta_times))
+                # .set_index("photon_idx")
             )
 
+            # handle cases where the delta time is 2d due to multiple cycles in that group
+            if spot_dim_name == "path" and np.ndim(hold_delta_times) > 1:
+                ds = ds.assign_coords(
+                    {"delta_time": (("photon_idx", "cycle_number"), hold_delta_times)}
+                )
+            else:
+                ds = ds.assign_coords({"delta_time": ("photon_idx", hold_delta_times)})
+
+            # for ATL11
+            if "ref_pt" in ds.coords:
+                ds = (
+                    ds.drop_indexes(["ref_pt", "photon_idx"])
+                    .drop(["ref_pt", "photon_idx"])
+                    .swap_dims({"ref_pt": "photon_idx"})
+                    .assign_coords(
+                        ref_pt=("photon_idx", ds.ref_pt.data),
+                        photon_idx=ds.photon_idx.data,
+                    )
+                )
+
+                # for the subgoups where there is 1d delta time data, make sure that the cycle number is still a coordinate for merging
+                try:
+                    ds = ds.assign_coords(
+                        {
+                            "cycle_number": (
+                                "photon_idx",
+                                ds.cycle_number["photon_idx"].data,
+                            )
+                        }
+                    )
+                except KeyError:
+                    pass
+
             grp_spec_vars.extend(["gt", "photon_idx"])
 
             is2ds = is2ds.merge(
@@ -450,7 +558,10 @@ def _add_vars_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict):
 
             # re-cast some dtypes to make array smaller
             is2ds["gt"] = is2ds.gt.astype(str)
-            is2ds["spot"] = is2ds.spot.astype(np.uint8)
+            try:
+                is2ds[spot_dim_name] = is2ds[spot_dim_name].astype(np.uint8)
+            except:
+                pass
 
         return is2ds, ds[grp_spec_vars]
 
@@ -642,8 +753,10 @@ def _build_single_file_dataset(self, file, groups_list):
         # with h5py.File(filepath,'r') as h5pt:
         #     prod_id = h5pt.attrs["identifier_product_type"]
 
-        # DEVNOTE: does not actually apply wanted variable list, and has not been tested for merging multiple files into one ds
+        # DEVNOTE: if and elif does not actually apply wanted variable list, and has not been tested for merging multiple files into one ds
         # if a gridded product
+        # TODO: all products need to be tested, and quicklook products added or explicitly excluded
+        # Level 3b, gridded (netcdf): ATL14, 15, 16, 17, 18, 19, 20, 21
         if self._prod in [
             "ATL14",
             "ATL15",
@@ -656,6 +769,39 @@ def _build_single_file_dataset(self, file, groups_list):
         ]:
             is2ds = xr.open_dataset(file)
 
+        # Level 3b, hdf5: ATL11
+        elif self._prod in ["ATL11"]:
+            is2ds = self._build_dataset_template(file)
+
+            # returns the wanted groups as a single list of full group path strings
+            wanted_dict, wanted_groups = Variables.parse_var_list(
+                groups_list, tiered=False
+            )
+            wanted_groups_set = set(wanted_groups)
+
+            # orbit_info is used automatically as the first group path so the info is available for the rest of the groups
+            # wanted_groups_set.remove("orbit_info")
+            wanted_groups_set.remove("ancillary_data")
+            # Note: the sorting is critical for datasets with highly nested groups
+            wanted_groups_list = ["ancillary_data"] + sorted(wanted_groups_set)
+
+            # returns the wanted groups as a list of lists with group path string elements separated
+            _, wanted_groups_tiered = Variables.parse_var_list(
+                groups_list, tiered=True, tiered_vars=True
+            )
+
+            while wanted_groups_list:
+                # print(wanted_groups_list)
+                grp_path = wanted_groups_list[0]
+                wanted_groups_list = wanted_groups_list[1:]
+                ds = self._read_single_grp(file, grp_path)
+                is2ds, ds = Read._add_vars_to_ds(
+                    is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict
+                )
+
+            return is2ds
+
+        # Level 2 and 3a Products: ATL03, 06, 07, 08, 09, 10, 12, 13
         else:
             is2ds = self._build_dataset_template(file)
 
@@ -677,6 +823,7 @@ def _build_single_file_dataset(self, file, groups_list):
             )
 
             while wanted_groups_list:
+                # print(wanted_groups_list)
                 grp_path = wanted_groups_list[0]
                 wanted_groups_list = wanted_groups_list[1:]
                 ds = self._read_single_grp(file, grp_path)
@@ -685,7 +832,9 @@ def _build_single_file_dataset(self, file, groups_list):
                 )
 
                 # if there are any deeper nested variables, get those so they have actual coordinates and add them
+                # this may apply to (at a minimum): ATL08
                 if any(grp_path in grp_path2 for grp_path2 in wanted_groups_list):
+                    print("nested var")
                     for grp_path2 in wanted_groups_list:
                         if grp_path in grp_path2:
                             sub_ds = self._read_single_grp(file, grp_path2)

diff --git a/icepyx/core/variables.py b/icepyx/core/variables.py
@@ -289,6 +289,8 @@ def _check_valid_lists(
         # check if the list of beams, if specified, are available in the product
         if self.product == "ATL09":
             beam_avail = ["profile_" + str(i + 1) for i in range(3)]
+        elif self.product == "ATL11":
+            beam_avail = ["pt" + str(i + 1) for i in range(3)]
         else:
             beam_avail = ["gt" + str(i + 1) + "l" for i in range(3)]
             beam_avail = beam_avail + ["gt" + str(i + 1) + "r" for i in range(3)]
@@ -403,6 +405,7 @@ def append(self, defaults=False, var_list=None, beam_list=None, keyword_list=Non
         beam_list : list of strings, default None
             A list of beam strings, if only selected beams are wanted (the default value of None will automatically
             include all beams). For ATL09, acceptable values are ['profile_1', 'profile_2', 'profile_3'].
+            For ATL11, acceptable values are ['pt1','pt2','pt3'].
             For all other products, acceptable values are ['gt1l', 'gt1r', 'gt2l', 'gt2r', 'gt3l', 'gt3r'].
 
         keyword_list : list of strings, default None
@@ -479,6 +482,10 @@ def append(self, defaults=False, var_list=None, beam_list=None, keyword_list=Non
                 "data_end_utc",
             ]
 
+        # Adjust the nec_varlist for individual products
+        if self.product == "ATL11":
+            nec_varlist.remove("sc_orient")
+
         try:
             self._check_valid_lists(vgrp, allpaths, var_list=nec_varlist)
         except ValueError:
@@ -533,6 +540,7 @@ def remove(self, all=False, var_list=None, beam_list=None, keyword_list=None):
         beam_list : list of strings, default None
             A list of beam strings, if only selected beams are wanted (the default value of None will automatically
             include all beams). For ATL09, acceptable values are ['profile_1', 'profile_2', 'profile_3'].
+            For ATL11, acceptable values are ['pt1','pt2','pt3'].
             For all other products, acceptable values are ['gt1l', 'gt1r', 'gt2l', 'gt2r', 'gt3l', 'gt3r'].
 
         keyword_list : list of strings, default None