Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ValueError when saving multiindexed datasets in netcdf #1547

Closed
philastrophist opened this issue Sep 1, 2017 · 1 comment
Closed

ValueError when saving multiindexed datasets in netcdf #1547

philastrophist opened this issue Sep 1, 2017 · 1 comment

Comments

@philastrophist
Copy link

Given this test code below (it reconstructs how I receive my datasets so ignore the complicated part at the beginning, just look at the variable concat), a ValueError gets thrown.

import xarray as xr
import numpy as np
import pandas as pd
import seaborn as sns # pandas aware plotting library
from copy import deepcopy
import time

np.random.seed(123)

def strTimeProp(start, end, format, prop):
    """Get a time at a proportion of a range of two formatted times.

    start and end should be strings specifying times formated in the
    given format (strftime-style), giving an interval [start, end].
    prop specifies how a proportion of the interval to be taken after
    start.  The returned time will be in the specified format.
    """

    stime = time.mktime(time.strptime(start, format))
    etime = time.mktime(time.strptime(end, format))

    ptime = stime + prop * (etime - stime)

    return time.strftime(format, time.localtime(ptime))


def randomDate(start, end):
    return strTimeProp(start, end, '%d/%m/%Y %I:%M %p', np.random.random())


n_stars = 100
stars = list(range(n_stars))
pointings_targets = {0: [0], 1: [1], 2: [2, 3]}

pointing_exposures = {0: {'iband': 30, 'ha1':34}, 1: {'iband': 29, 'ha1':31}, 2: {'iband': 24, 'ha2': 33}}
detection_efficiency = 0.95

columns = list('abcdefghi')




pointings = list(pointings_targets.keys())
targets = [j for i in pointings_targets.values() for j in i]
n_pointings = len(pointings)
n_targets = len(targets)


# what stars are in each pointing
pointing_stars = {}
for pointing in pointings:
	pointing_stars[pointing] = pointings_targets[pointing]  # contains the targets 
	stars_to_choose_from = list(set(stars) - set(pointing_stars[pointing]))  # other stars to choose
	chosen_stars = np.random.choice(stars_to_choose_from, np.random.randint(5, len(stars_to_choose_from)), replace=False).tolist()
	pointing_stars[pointing] += chosen_stars # list(set(chosen_stars) | set(pointing_stars[pointing]))  # stars in a pointing 

	
data = []
indices = []
for pointing, bands in pointing_exposures.items():
	for band_name, n_exps in bands.items():
		dates = sorted([randomDate('1/1/2015 1:00 AM', '30/1/2015 1:00 AM') for _ in range(n_exps)])
		for i, date in enumerate(dates):
			stars_detected = np.random.choice(pointing_stars[pointing], int(detection_efficiency*len(pointing_stars[pointing])), replace=False)
			values = np.random.randn(len(stars_detected), len(columns))
			table = pd.DataFrame(values, columns=columns, index=stars_detected)  # as if from file
			table.index.name = 'objid'
			arr = table.to_xarray().to_array()
			indices.append((pointing, band_name, date))
			data.append(arr)
concat = xr.concat(data, pd.MultiIndex.from_tuples(indices, names=['pointing', 'band', 'date'])).to_dataset('variable')


o = concat.coords['objid'].values
other = pd.DataFrame(np.random.normal(0, 1, size=(len(o), 2)), index=o, columns=['ra', 'dec'])
other.index.name = 'objid'
concat = concat.merge(other.to_xarray())  # dataframe 
concat.to_netcdf('test')
ValueError                                Traceback (most recent call last)
<ipython-input-3-bd4eb2732e50> in <module>()
----> 1 concat.to_netcdf('test2.cdf')

C:\Anaconda3\envs\test_xarray\lib\site-packages\xarray\core\dataset.py in to_netcdf(self, path, mode, format, group, engine, encoding, unlimited_dims)
    975         return to_netcdf(self, path, mode, format=format, group=group,
    976                          engine=engine, encoding=encoding,
--> 977                          unlimited_dims=unlimited_dims)
    978
    979     def __unicode__(self):

C:\Anaconda3\envs\test_xarray\lib\site-packages\xarray\backends\api.py in to_netcdf(dataset, path_or_file, mode, format, group, engine, writer, encoding, unlimited_dims)
    571     try:
    572         dataset.dump_to_store(store, sync=sync, encoding=encoding,
--> 573                               unlimited_dims=unlimited_dims)
    574         if path_or_file is None:
    575             return target.getvalue()

C:\Anaconda3\envs\test_xarray\lib\site-packages\xarray\core\dataset.py in dump_to_store(self, store, encoder, sync, encoding, unlimited_dims)
    914
    915         store.store(variables, attrs, check_encoding,
--> 916                     unlimited_dims=unlimited_dims)
    917         if sync:
    918             store.sync()

C:\Anaconda3\envs\test_xarray\lib\site-packages\xarray\backends\common.py in store(self, variables, attributes, *args, **kwargs)
    242         # All NetCDF files get CF encoded by default, without this attempting
    243         # to write times, for example, would fail.
--> 244         cf_variables, cf_attrs = cf_encoder(variables, attributes)
    245         AbstractWritableDataStore.store(self, cf_variables, cf_attrs,
    246                                         *args, **kwargs)

C:\Anaconda3\envs\test_xarray\lib\site-packages\xarray\conventions.py in cf_encoder(variables, attributes)
   1087     """
   1088     new_vars = OrderedDict((k, encode_cf_variable(v, name=k))
-> 1089                            for k, v in iteritems(variables))
   1090     return new_vars, attributes

C:\Anaconda3\envs\test_xarray\lib\site-packages\xarray\conventions.py in <genexpr>(.0)
   1087     """
   1088     new_vars = OrderedDict((k, encode_cf_variable(v, name=k))
-> 1089                            for k, v in iteritems(variables))
   1090     return new_vars, attributes

C:\Anaconda3\envs\test_xarray\lib\site-packages\xarray\conventions.py in encode_cf_variable(var, needs_copy, name)
    739     var = maybe_default_fill_value(var)
    740     var = maybe_encode_bools(var)
--> 741     var = ensure_dtype_not_object(var)
    742     return var
    743

C:\Anaconda3\envs\test_xarray\lib\site-packages\xarray\conventions.py in ensure_dtype_not_object(var)
    707             data[missing] = fill_value
    708         else:
--> 709             data = data.astype(dtype=_infer_dtype(data))
    710         var = Variable(dims, data, attrs, encoding)
    711     return var

ValueError: setting an array element with a sequence
@shoyer
Copy link
Member

shoyer commented Sep 2, 2017

Unfortunately, we don't support saving objects with a MultiIndex to netCDF files yet (#1077). But we should definitely give a better error message here, to make this clear! (Also it should be documented, if it isn't already.)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

2 participants