Skip to content

Commit

Permalink
ENH: adds "open_dataarray" method to units module.
Browse files Browse the repository at this point in the history
Changes "create_data_array" to "create_dataarray" to be consistent with xarray naming convention in methods.

This feature adds the ability to symmetrically go between UnitsDataArray -> netcdf -> UnitsDataArray.  The goal is move away from pickle in the lambda function interface and more generally, provide users a way to share binary outputs and get them back in PODPAC.
  • Loading branch information
mlshapiro committed Nov 11, 2019
1 parent 5f64ab2 commit f77e0c6
Show file tree
Hide file tree
Showing 6 changed files with 113 additions and 23 deletions.
2 changes: 1 addition & 1 deletion podpac/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def makedirs(name, mode=511, exist_ok=False):
from podpac.core.node import Node, NodeException
import podpac.core.authentication as authentication
from podpac.core.utils import NodeTrait
from podpac.core.units import ureg as units
from podpac.core.units import ureg as units, open_dataarray, UnitsDataArray

# Organized submodules
# These files are simply wrappers to create a curated namespace of podpac modules
Expand Down
4 changes: 2 additions & 2 deletions podpac/core/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import traitlets as tl

from podpac.core.settings import settings
from podpac.core.units import ureg, UnitsDataArray, create_data_array
from podpac.core.units import ureg, UnitsDataArray, create_dataarray
from podpac.core.utils import common_doc
from podpac.core.utils import JSONEncoder, is_json_serializable
from podpac.core.utils import _get_query_params_from_url, _get_from_url, _get_param
Expand Down Expand Up @@ -252,7 +252,7 @@ def create_output_array(self, coords, data=np.nan, **kwargs):
if self.units is not None:
attrs["units"] = ureg.Unit(self.units)

return create_data_array(coords, data=data, dtype=self.dtype, attrs=attrs, **kwargs)
return create_dataarray(coords, data=data, dtype=self.dtype, attrs=attrs, **kwargs)

# -----------------------------------------------------------------------------------------------------------------
# Serialization
Expand Down
77 changes: 64 additions & 13 deletions podpac/core/test/test_units.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,12 @@

from podpac.core.units import ureg
from podpac.core.units import UnitsDataArray
from podpac.core.units import create_data_array
from podpac.core.units import create_dataarray
from podpac.core.units import open_dataarray
from podpac.core.units import get_image

from podpac.data import Array


class TestUnitDataArray(object):
def test_no_units_to_base_units_has_no_units(self):
Expand Down Expand Up @@ -331,77 +334,125 @@ def setup_class(cls):
cls.coords = Coordinates([[0, 1, 2], [0, 1, 2, 3]], dims=["lat", "lon"])

def test_default(self):
a = create_data_array(self.coords)
a = create_dataarray(self.coords)
assert isinstance(a, UnitsDataArray)
assert a.shape == self.coords.shape
assert np.all(np.isnan(a))

def test_empty(self):
a = create_data_array(self.coords, data=None)
a = create_dataarray(self.coords, data=None)
assert isinstance(a, UnitsDataArray)
assert a.shape == self.coords.shape
assert a.dtype == float

a = create_data_array(self.coords, data=None, dtype=bool)
a = create_dataarray(self.coords, data=None, dtype=bool)
assert isinstance(a, UnitsDataArray)
assert a.shape == self.coords.shape
assert a.dtype == bool

def test_zeros(self):
a = create_data_array(self.coords, data=0)
a = create_dataarray(self.coords, data=0)
assert isinstance(a, UnitsDataArray)
assert a.shape == self.coords.shape
assert a.dtype == float
assert np.all(a == 0.0)

a = create_data_array(self.coords, data=0, dtype=bool)
a = create_dataarray(self.coords, data=0, dtype=bool)
assert isinstance(a, UnitsDataArray)
assert a.shape == self.coords.shape
assert a.dtype == bool
assert np.all(~a)

def test_ones(self):
a = create_data_array(self.coords, data=1)
a = create_dataarray(self.coords, data=1)
assert isinstance(a, UnitsDataArray)
assert a.shape == self.coords.shape
assert a.dtype == float
assert np.all(a == 1.0)

a = create_data_array(self.coords, data=1, dtype=bool)
a = create_dataarray(self.coords, data=1, dtype=bool)
assert isinstance(a, UnitsDataArray)
assert a.shape == self.coords.shape
assert a.dtype == bool
assert np.all(a)

def test_full(self):
a = create_data_array(self.coords, data=10)
a = create_dataarray(self.coords, data=10)
assert isinstance(a, UnitsDataArray)
assert a.shape == self.coords.shape
assert a.dtype == float
assert np.all(a == 10)

a = create_data_array(self.coords, data=10, dtype=int)
a = create_dataarray(self.coords, data=10, dtype=int)
assert isinstance(a, UnitsDataArray)
assert a.shape == self.coords.shape
assert a.dtype == int
assert np.all(a == 10)

def test_array(self):
data = np.random.random(self.coords.shape)
a = create_data_array(self.coords, data=data)
a = create_dataarray(self.coords, data=data)
assert isinstance(a, UnitsDataArray)
assert a.dtype == float
np.testing.assert_equal(a.data, data)

data = np.round(10 * np.random.random(self.coords.shape))
a = create_data_array(self.coords, data=data, dtype=int)
a = create_dataarray(self.coords, data=data, dtype=int)
assert isinstance(a, UnitsDataArray)
assert a.dtype == int
np.testing.assert_equal(a.data, data.astype(int))

def test_invalid_coords(self):
with pytest.raises(TypeError):
create_data_array((3, 4))
create_dataarray((3, 4))


class TestOpenDataArray(object):
def test_open_after_create(self):
coords = Coordinates([[0, 1, 2], [0, 1, 2, 3]], dims=["lat", "lon"])
uda_1 = create_dataarray(coords, data=np.random.rand(3, 4))
ncdf = uda_1.to_netcdf()
uda_2 = open_dataarray(ncdf)

assert isinstance(uda_2, UnitsDataArray)
assert np.all(uda_2.data == uda_1.data)

def test_open_after_create_with_attrs(self):
coords = Coordinates([[0, 1, 2], [0, 1, 2, 3]], dims=["lat", "lon"], crs="EPSG:4193")
uda_1 = create_dataarray(coords, data=np.random.rand(3, 4), attrs={"some_attr": 5})
ncdf = uda_1.to_netcdf()
uda_2 = open_dataarray(ncdf)

assert isinstance(uda_2, UnitsDataArray)
assert np.all(uda_2.data == uda_1.data)

assert "some_attr" in uda_2.attrs
assert uda_2.attrs.get("some_attr") == uda_1.attrs.get("some_attr")

assert "crs" in uda_2.attrs
assert uda_2.attrs.get("crs") == uda_1.attrs.get("crs")

def test_open_after_eval(self):

# mock node
data = np.random.rand(5, 5)
lat = np.linspace(-10, 10, 5)
lon = np.linspace(-10, 10, 5)
native_coords = Coordinates([lat, lon], ["lat", "lon"])
node = Array(source=data, native_coordinates=native_coords)
uda = node.eval(node.native_coordinates)

ncdf = uda.to_netcdf()
uda_2 = open_dataarray(ncdf)

assert isinstance(uda_2, UnitsDataArray)
assert np.all(uda_2.data == uda.data)

assert "layer_style" in uda_2.attrs
assert uda_2.attrs.get("layer_style").json == uda.attrs.get("layer_style").json

assert "crs" in uda_2.attrs
assert uda_2.attrs.get("crs") == uda.attrs.get("crs")


class TestGetImage(object):
Expand Down
44 changes: 42 additions & 2 deletions podpac/core/units.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
ureg = UnitRegistry()

import podpac
from podpac import Coordinates
from podpac.core.settings import settings
from podpac.core.utils import JSONEncoder
from podpac.core.style import Style
Expand Down Expand Up @@ -351,9 +352,48 @@ def func(self, *args, **kwargs):
# ---------------------------------------------------------------------------------------------------------------------


def create_data_array(coords, data=np.nan, dtype=float, **kwargs):
def open_dataarray(*args, **kwargs):
"""
Open an :class:`podpac.UnitsDataArray` from a file or file-like object containing a single data variable.
This is a wrapper around :func:`xarray.open_datarray`.
The inputs to this function are passed directly to :func:`xarray.open_datarray`.
See http://xarray.pydata.org/en/stable/generated/xarray.open_dataarray.html#xarray.open_dataarray.
The DataArray passed back from :func:`xarray.open_datarray` is used to create a units data array using :func:`creare_dataarray`.
Returns
-------
:class:`podpac.UnitsDataArray`
"""
da = xr.open_dataarray(*args, **kwargs)
coords = Coordinates.from_xarray(da.coords, crs=da.attrs.get("crs"))

# pass in kwargs to constructor
uda_kwargs = {"attrs": da.attrs}
return create_dataarray(coords, data=da.data, **uda_kwargs)


def create_dataarray(coords, data=np.nan, dtype=float, **kwargs):
"""Shortcut to create :class:`podpac.UnitsDataArray`
Parameters
----------
coords : :class:`podpac.Coordinates`
PODPAC Coordinates
data : np.ndarray, optional
Data to fill in. Defaults to np.nan.
dtype : type, optional
Data type. Defaults to float.
**kwargs
keyword arguments to pass to :class:`podpac.UnitsDataArray` constructor
Returns
-------
:class:`podpac.UnitsDataArray`
"""
if not isinstance(coords, podpac.Coordinates):
raise TypeError("create_data_array expected Coordinates object, not '%s'" % type(coords))
raise TypeError("create_dataarray expected Coordinates object, not '%s'" % type(coords))

if data is None:
data = np.empty(coords.shape, dtype=dtype)
Expand Down
4 changes: 2 additions & 2 deletions podpac/datalib/egi.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from podpac.data import DataSource
from podpac import authentication
from podpac import settings
from podpac.core.units import UnitsDataArray, create_data_array
from podpac.core.units import UnitsDataArray, create_dataarray
from podpac.core.node import node_eval

# Set up logging
Expand Down Expand Up @@ -225,7 +225,7 @@ def eval(self, coordinates, output=None):
)
raise e
# Force update on native_coordinates (in case of multiple evals)
self.set_trait('native_coordinates', self.get_native_coordinates())
self.set_trait("native_coordinates", self.get_native_coordinates())

# run normal eval once self.data is prepared
return super(EGI, self).eval(coordinates, output)
Expand Down
5 changes: 2 additions & 3 deletions podpac/datalib/smap_egi.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def isnat(a):
import podpac.datalib
from podpac.core.coordinates import Coordinates
from podpac.datalib import EGI
from podpac.core.units import create_data_array
from podpac.core.units import create_dataarray

SMAP_PRODUCT_DICT = {
#'shortname': ['lat_key', 'lon_key', 'data_key', 'quality_flag', 'default_verison']
Expand Down Expand Up @@ -143,7 +143,6 @@ def coverage(self):
else:
return (self.data_key, self.lat_key, self.lon_key)


@tl.default("version")
def _version_default(self):
return SMAP_PRODUCT_DICT[self.product][4]
Expand Down Expand Up @@ -219,7 +218,7 @@ def read_file(self, filelike):
c = Coordinates([time, lon, lat], dims=["time", "lon", "lat"], crs="epsg:6933")

# make units data array with coordinates and data
return create_data_array(c, data=data)
return create_dataarray(c, data=data)

def append_file(self, all_data, data):
"""Append data
Expand Down

0 comments on commit f77e0c6

Please sign in to comment.