Skip to content

Commit

Permalink
REF: dont alter state in pytables read_axes (pandas-dev#30184)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and proost committed Dec 19, 2019
1 parent 1b6e867 commit 1e28057
Showing 1 changed file with 83 additions and 51 deletions.
134 changes: 83 additions & 51 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -1965,7 +1965,9 @@ def is_indexed(self) -> bool:
return getattr(self.table.cols, self.cname).is_indexed # type: ignore

def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
""" set the values from this selection: take = take ownership """
"""
Convert the data from this selection to the appropriate pandas type.
"""
assert isinstance(values, np.ndarray), type(values)

# values is a recarray
Expand All @@ -1991,7 +1993,7 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
new_pd_index = Index(values, **kwargs)

new_pd_index = _set_tz(new_pd_index, self.tz)
self.values = new_pd_index
return new_pd_index, new_pd_index

def take_data(self):
""" return the values"""
Expand Down Expand Up @@ -2144,7 +2146,7 @@ def is_indexed(self) -> bool:

def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
"""
Set the values from this selection.
Convert the data from this selection to the appropriate pandas type.
Parameters
----------
Expand All @@ -2154,7 +2156,9 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
errors : str
"""
assert isinstance(values, np.ndarray), type(values)
self.values = Int64Index(np.arange(len(values)))

values = Int64Index(np.arange(len(values)))
return values, values

def set_attr(self):
pass
Expand Down Expand Up @@ -2338,8 +2342,20 @@ def validate_attr(self, append):
)

def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
"""set the data from this selection (and convert to the correct dtype
if we can)
"""
Convert the data from this selection to the appropriate pandas type.
Parameters
----------
values : np.ndarray
nan_rep :
encoding : str
errors : str
Returns
-------
index : listlike to become an Index
data : ndarraylike to become a column
"""
assert isinstance(values, np.ndarray), type(values)

Expand All @@ -2349,44 +2365,50 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):

assert self.typ is not None
if self.dtype is None:
self.set_data(values)
# Note: in tests we never have timedelta64 or datetime64,
# so the _get_data_and_dtype_name may be unnecessary
converted, dtype_name = _get_data_and_dtype_name(values)
kind = _dtype_to_kind(dtype_name)
else:
self.data = values
converted = values
dtype_name = self.dtype
kind = self.kind

own_data = self.data
assert isinstance(own_data, np.ndarray) # for mypy
assert isinstance(converted, np.ndarray) # for mypy

# use the meta if needed
meta = _ensure_decoded(self.meta)
metadata = self.metadata
ordered = self.ordered
tz = self.tz

assert self.dtype is not None

assert dtype_name is not None
# convert to the correct dtype
dtype = _ensure_decoded(self.dtype)
dtype = _ensure_decoded(dtype_name)

# reverse converts
if dtype == "datetime64":

# recreate with tz if indicated
own_data = _set_tz(own_data, self.tz, coerce=True)
converted = _set_tz(converted, tz, coerce=True)

elif dtype == "timedelta64":
own_data = np.asarray(own_data, dtype="m8[ns]")
converted = np.asarray(converted, dtype="m8[ns]")
elif dtype == "date":
try:
own_data = np.asarray(
[date.fromordinal(v) for v in own_data], dtype=object
converted = np.asarray(
[date.fromordinal(v) for v in converted], dtype=object
)
except ValueError:
own_data = np.asarray(
[date.fromtimestamp(v) for v in own_data], dtype=object
converted = np.asarray(
[date.fromtimestamp(v) for v in converted], dtype=object
)

elif meta == "category":

# we have a categorical
categories = self.metadata
codes = own_data.ravel()
categories = metadata
codes = converted.ravel()

# if we have stored a NaN in the categories
# then strip it; in theory we could have BOTH
Expand All @@ -2403,24 +2425,24 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
categories = categories[~mask]
codes[codes != -1] -= mask.astype(int).cumsum().values

own_data = Categorical.from_codes(
codes, categories=categories, ordered=self.ordered
converted = Categorical.from_codes(
codes, categories=categories, ordered=ordered
)

else:

try:
own_data = own_data.astype(dtype, copy=False)
converted = converted.astype(dtype, copy=False)
except TypeError:
own_data = own_data.astype("O", copy=False)
converted = converted.astype("O", copy=False)

# convert nans / decode
if _ensure_decoded(self.kind) == "string":
own_data = _unconvert_string_array(
own_data, nan_rep=nan_rep, encoding=encoding, errors=errors
if _ensure_decoded(kind) == "string":
converted = _unconvert_string_array(
converted, nan_rep=nan_rep, encoding=encoding, errors=errors
)

self.data = own_data
return self.values, converted

def set_attr(self):
""" set the data for this column """
Expand Down Expand Up @@ -3552,9 +3574,9 @@ def create_index(self, columns=None, optlevel=None, kind: Optional[str] = None):
)
v.create_index(**kw)

def read_axes(
def _read_axes(
self, where, start: Optional[int] = None, stop: Optional[int] = None
) -> bool:
) -> List[Tuple[ArrayLike, ArrayLike]]:
"""
Create the axes sniffed from the table.
Expand All @@ -3566,32 +3588,26 @@ def read_axes(
Returns
-------
bool
Indicates success.
List[Tuple[index_values, column_values]]
"""

# validate the version
self.validate_version(where)

# infer the data kind
if not self.infer_axes():
return False

# create the selection
selection = Selection(self, where=where, start=start, stop=stop)
values = selection.select()

results = []
# convert the data
for a in self.axes:
a.set_info(self.info)
a.convert(
res = a.convert(
values,
nan_rep=self.nan_rep,
encoding=self.encoding,
errors=self.errors,
)
results.append(res)

return True
return results

def get_object(self, obj, transposed: bool):
""" return the data for this obj """
Expand Down Expand Up @@ -4038,13 +4054,13 @@ def read_column(
# column must be an indexable or a data column
c = getattr(self.table.cols, column)
a.set_info(self.info)
a.convert(
col_values = a.convert(
c[start:stop],
nan_rep=self.nan_rep,
encoding=self.encoding,
errors=self.errors,
)
return Series(_set_tz(a.take_data(), a.tz), name=column)
return Series(_set_tz(col_values[1], a.tz), name=column)

raise KeyError(f"column [{column}] not found in the table")

Expand Down Expand Up @@ -4328,34 +4344,50 @@ def read(
stop: Optional[int] = None,
):

if not self.read_axes(where=where, start=start, stop=stop):
# validate the version
self.validate_version(where)

# infer the data kind
if not self.infer_axes():
return None

result = self._read_axes(where=where, start=start, stop=stop)

info = (
self.info.get(self.non_index_axes[0][0], dict())
if len(self.non_index_axes)
else dict()
)
index = self.index_axes[0].values

inds = [i for i, ax in enumerate(self.axes) if ax is self.index_axes[0]]
assert len(inds) == 1
ind = inds[0]

index = result[ind][0]

frames = []
for a in self.values_axes:
for i, a in enumerate(self.axes):
if a not in self.values_axes:
continue
index_vals, cvalues = result[i]

# we could have a multi-index constructor here
# ensure_index doesn't recognized our list-of-tuples here
if info.get("type") == "MultiIndex":
cols = MultiIndex.from_tuples(a.values)
cols = MultiIndex.from_tuples(index_vals)
else:
cols = Index(a.values)
cols = Index(index_vals)

names = info.get("names")
if names is not None:
cols.set_names(names, inplace=True)

if self.is_transposed:
values = a.cvalues
values = cvalues
index_ = cols
cols_ = Index(index, name=getattr(index, "name", None))
else:
values = a.cvalues.T
values = cvalues.T
index_ = Index(index, name=getattr(index, "name", None))
cols_ = cols

Expand Down

0 comments on commit 1e28057

Please sign in to comment.