From f7385437c96b8b162fb1789b147b8c5f116d680d Mon Sep 17 00:00:00 2001 From: SammyAgrawal Date: Wed, 26 Jun 2024 21:46:52 -0400 Subject: [PATCH] debugging local console error --- feedstock/eNATL60.py | 23 ++++++++++++++++++++--- feedstock/requirements.txt | 1 + 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/feedstock/eNATL60.py b/feedstock/eNATL60.py index fb9d11c..124d093 100644 --- a/feedstock/eNATL60.py +++ b/feedstock/eNATL60.py @@ -1,6 +1,8 @@ """ ... """ +import logging +import xarray as xr import apache_beam as beam from pangeo_forge_recipes.patterns import pattern_from_file_sequence from pangeo_forge_recipes.transforms import ( @@ -11,6 +13,8 @@ T, ) +logger = logging.getLogger(__name__) + # Common Parameters days = range(1, 32) dataset_url = 'https://zenodo.org/records/10513552/files' @@ -25,11 +29,24 @@ class Preprocess(beam.PTransform): @staticmethod def _set_coords(item: Indexed[T]) -> Indexed[T]: index, ds = item - ds = ds.set_coords(['deptht', 'depthw', 'nav_lon', 'nav_lat', 'time_counter', 'tmask']) + logger.info(f"Index is {index=}") + logger.info(f"Dataset before processing {ds=}") + logger.info(f"Time counter data : {ds.time_counter.data}") + # could try using cftime to force + # create t_new as variable + t_new = xr.DataArray(ds.time_counter.data, dims=['time']) + logger.info(f"New Time Dimension {t_new=}") + ds = ds.assign_coords(time=t_new) + ds = ds.drop(['time_counter']) + ds = ds.set_coords(['deptht', 'depthw', 'nav_lon', 'nav_lat', 'tmask']) + + return index, ds + + # ds = ds.set_coords(['deptht', 'depthw', 'nav_lon', 'nav_lat', 'time_counter', 'tmask']) # ds = ds.assign_coords( # tmask=ds.coords['tmask'].squeeze(), deptht=ds.coords['deptht'].squeeze() # ) - return index, ds + def expand(self, pcoll: beam.PCollection) -> beam.PCollection: return pcoll | 'Set coordinates' >> beam.Map(self._set_coords) @@ -38,7 +55,7 @@ def expand(self, pcoll: beam.PCollection) -> beam.PCollection: eNATL60_BLBT02 = ( beam.Create(pattern.items()) | OpenURLWithFSSpec() - | OpenWithXarray() + | OpenWithXarray(xarray_open_kwargs = {'use_cftime':True}) | Preprocess() | StoreToZarr( store_name='eNATL60_BLBT02.zarr', diff --git a/feedstock/requirements.txt b/feedstock/requirements.txt index d234810..2250fc2 100644 --- a/feedstock/requirements.txt +++ b/feedstock/requirements.txt @@ -2,3 +2,4 @@ pangeo-forge-recipes==0.10.4 gcsfs apache-beam[gcp] leap-data-management-utils==0.0.12 +xarray=2024.05.0 \ No newline at end of file