Skip to content

Commit

Permalink
REF: use public pandas API in dataframe.empty (#571)
Browse files Browse the repository at this point in the history
* REF: use public pandas API in dataframe.empty

* Address failing tests

* REF/PERF: faster implementation, equally kludgy, somewhat more future-proof
  • Loading branch information
jbrockmendel committed Mar 16, 2021
1 parent e037540 commit 0597805
Showing 1 changed file with 22 additions and 20 deletions.
42 changes: 22 additions & 20 deletions fastparquet/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,31 +163,33 @@ def set_cats(values, i=i, col=col, **kwargs):

axes = [df._data.axes[0], index]

# allocate and create blocks
blocks = []
for block in df._data.blocks:
if block.is_categorical:
categories = block.values.categories
code = np.zeros(shape=size, dtype=block.values.codes.dtype)
values = Categorical(values=code, categories=categories,
# Patch our blocks with desired-length arrays. Kids: don't try this at home.
mgr = df._data
for block in mgr.blocks:
bvalues = block.values
shape = list(bvalues.shape)
shape[-1] = size

if isinstance(bvalues, Categorical):
categories = bvalues.categories
code = np.zeros(shape=shape, dtype=bvalues.codes.dtype)

values = Categorical(values=code, dtype=bvalues.dtype,
fastpath=True)
new_block = block.make_block_same_class(values=values)
elif getattr(block.dtype, 'tz', None):
new_shape = (size, )
values = np.empty(shape=new_shape, dtype='M8[ns]')
new_block = block.make_block_same_class(
type(block.values)(values, dtype=block.values.dtype)
)

elif getattr(bvalues.dtype, 'tz', None):
values = np.empty(shape=shape, dtype='M8[ns]')
values = type(bvalues)(values, dtype=bvalues.dtype)
else:
new_shape = (block.values.shape[0], size)
values = np.empty(shape=new_shape, dtype=block.values.dtype)
new_block = block.make_block_same_class(values=values)
# Note: this will break on any ExtensionDtype other than
# Categorical and DatetimeTZ
values = np.empty(shape=shape, dtype=bvalues.dtype)

blocks.append(new_block)
block.values = values

# create block manager
df = DataFrame(BlockManager(blocks, axes))
mgr.axes[-1] = index

# create block manager
# create views
for block in df._data.blocks:
dtype = block.dtype
Expand Down

0 comments on commit 0597805

Please sign in to comment.