Skip to content

Commit

Permalink
API: the fmt keyword now replaces the table keyword; allowed valu…
Browse files Browse the repository at this point in the history
…es are ``s|t``

     the same defaults as prior < 0.13.0 remain, e.g. ``put`` implies 's' (Storer) format
     and ``append`` imples 't' (Table) format
  • Loading branch information
jreback committed Aug 24, 2013
1 parent a3abf80 commit 952a342
Show file tree
Hide file tree
Showing 5 changed files with 157 additions and 42 deletions.
5 changes: 3 additions & 2 deletions doc/source/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1803,6 +1803,7 @@ The examples above show storing using ``put``, which write the HDF5 to ``PyTable
the ``storer`` format. These types of stores are are **not** appendable once written (though you can simply
remove them and rewrite). Nor are they **queryable**; they must be
retrieved in their entirety. These offer very fast writing and slightly faster reading than ``table`` stores.
This format is specified by default when using ``put`` or by ``fmt='s'``
.. warning::
Expand All @@ -1826,7 +1827,7 @@ Table Format
format. Conceptually a ``table`` is shaped very much like a DataFrame,
with rows and columns. A ``table`` may be appended to in the same or
other sessions. In addition, delete & query type operations are
supported.
supported. This format is specified by ``fmt='t'`` to ``append`` or ``put``.
.. ipython:: python
:suppress:
Expand All @@ -1853,7 +1854,7 @@ supported.
.. note::
You can also create a ``table`` by passing ``table=True`` to a ``put`` operation.
You can also create a ``table`` by passing ``fmt='t'`` to a ``put`` operation.
.. _io.hdf5-keys:
Expand Down
1 change: 1 addition & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ pandas 0.13
- removed the ``warn`` argument from ``open``. Instead a ``PossibleDataLossError`` exception will
be raised if you try to use ``mode='w'`` with an OPEN file handle (:issue:`4367`)
- allow a passed locations array or mask as a ``where`` condition (:issue:`4467`)
- the ``fmt`` keyword now replaces the ``table`` keyword; allowed values are ``s|t``
- ``JSON``

- added ``date_unit`` parameter to specify resolution of timestamps. Options
Expand Down
20 changes: 20 additions & 0 deletions doc/source/v0.13.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,33 @@ API changes
store2.close()
store2

.. ipython:: python
:suppress:

import os
os.remove(path)

- removed the ``_quiet`` attribute, replace by a ``DuplicateWarning`` if retrieving
duplicate rows from a table (:issue:`4367`)
- removed the ``warn`` argument from ``open``. Instead a ``PossibleDataLossError`` exception will
be raised if you try to use ``mode='w'`` with an OPEN file handle (:issue:`4367`)
- allow a passed locations array or mask as a ``where`` condition (:issue:`4467`).
See :ref:`here<io.hdf5-where_mask>` for an example.

- the ``fmt`` keyword now replaces the ``table`` keyword; allowed values are ``s|t``
the same defaults as prior < 0.13.0 remain, e.g. ``put`` implies 's' (Storer) format
and ``append`` imples 't' (Table) format

.. ipython:: python

path = 'test.h5'
df = DataFrame(randn(10,2))
df.to_hdf(path,'df_table',fmt='t')
df.to_hdf(path,'df_table2',append=True)
df.to_hdf(path,'df_storer')
with get_store(path) as store:
print store

.. ipython:: python
:suppress:

Expand Down
86 changes: 69 additions & 17 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,23 @@ class DuplicateWarning(Warning):
map directly to c-types [inferred_type->%s,key->%s] [items->%s]
"""

# formats
_FORMAT_MAP = {
u('s') : 's',
u('storer') : 's',
u('t') : 't',
u('table') : 't',
}

fmt_deprecate_doc = """
the table keyword has been deprecated
use the fmt='s|t' keyword instead
s : specifies the Storer format
and is the default for put operations
t : specifies the Table format
and is the default for append operations
"""

# map object types
_TYPE_MAP = {

Expand Down Expand Up @@ -545,7 +562,7 @@ def select_as_coordinates(self, key, where=None, start=None, stop=None, **kwargs

def unique(self, key, column, **kwargs):
warnings.warn("unique(key,column) is deprecated\n"
"use select_column(key,column).unique() instead")
"use select_column(key,column).unique() instead",FutureWarning)
return self.get_storer(key).read_column(column=column, **kwargs).unique()

def select_column(self, key, column, **kwargs):
Expand Down Expand Up @@ -641,24 +658,28 @@ def func(_start, _stop):

return TableIterator(self, func, nrows=nrows, start=start, stop=stop, auto_close=auto_close).get_values()

def put(self, key, value, table=None, append=False, **kwargs):
def put(self, key, value, fmt=None, append=False, **kwargs):
"""
Store object in HDFStore
Parameters
----------
key : object
value : {Series, DataFrame, Panel}
table : boolean, default False
Write as a PyTables Table structure which may perform worse but
allow more flexible operations like searching / selecting subsets
of the data
fmt : 's|t', default is 's' for storer format
s : storer format
Fast writing/reading. Not-appendable, nor searchable
t : table format
Write as a PyTables Table structure which may perform worse but
allow more flexible operations like searching / selecting subsets
of the data
append : boolean, default False
For table data structures, append the input data to the existing
table
encoding : default None, provide an encoding for strings
"""
self._write_to_group(key, value, table=table, append=append, **kwargs)
kwargs = self._validate_format(fmt or 's', kwargs)
self._write_to_group(key, value, append=append, **kwargs)

def remove(self, key, where=None, start=None, stop=None):
"""
Expand Down Expand Up @@ -709,7 +730,7 @@ def remove(self, key, where=None, start=None, stop=None):
'can only remove with where on objects written as tables')
return s.delete(where=where, start=start, stop=stop)

def append(self, key, value, columns=None, append=True, **kwargs):
def append(self, key, value, fmt=None, append=True, columns=None, **kwargs):
"""
Append to Table in file. Node must already exist and be Table
format.
Expand All @@ -718,6 +739,11 @@ def append(self, key, value, columns=None, append=True, **kwargs):
----------
key : object
value : {Series, DataFrame, Panel, Panel4D}
fmt : 't', default is 't' for table format
t : table format
Write as a PyTables Table structure which may perform worse but
allow more flexible operations like searching / selecting subsets
of the data
append : boolean, default True, append the input data to the existing
data_columns : list of columns to create as data columns, or True to use all columns
min_itemsize : dict of columns that specify minimum string sizes
Expand All @@ -735,7 +761,7 @@ def append(self, key, value, columns=None, append=True, **kwargs):
raise Exception(
"columns is not a supported keyword in append, try data_columns")

kwargs['table'] = True
kwargs = self._validate_format(fmt or 't', kwargs)
self._write_to_group(key, value, append=append, **kwargs)

def append_to_multiple(self, d, value, selector, data_columns=None, axes=None, **kwargs):
Expand Down Expand Up @@ -901,13 +927,39 @@ def _check_if_open(self):
if not self.is_open:
raise ClosedFileError("{0} file is not open!".format(self._path))

def _create_storer(self, group, value=None, table=False, append=False, **kwargs):
def _validate_format(self, fmt, kwargs):
""" validate / deprecate formats; return the new kwargs """
kwargs = kwargs.copy()

if 'format' in kwargs:
raise TypeError("pls specify an object format with the 'fmt' keyword")

# table arg
table = kwargs.pop('table',None)

if table is not None:
warnings.warn(fmt_deprecate_doc,FutureWarning)

if table:
fmt = 't'
else:
fmt = 's'

# validate
try:
kwargs['fmt'] = _FORMAT_MAP[fmt.lower()]
except:
raise TypeError("invalid HDFStore format specified [{0}]".format(fmt))

return kwargs

def _create_storer(self, group, fmt=None, value=None, append=False, **kwargs):
""" return a suitable Storer class to operate """

def error(t):
raise TypeError(
"cannot properly create the storer for: [%s] [group->%s,value->%s,table->%s,append->%s,kwargs->%s]" %
(t, group, type(value), table, append, kwargs))
"cannot properly create the storer for: [%s] [group->%s,value->%s,fmt->%s,append->%s,kwargs->%s]" %
(t, group, type(value), fmt, append, kwargs))

pt = _ensure_decoded(getattr(group._v_attrs, 'pandas_type', None))
tt = _ensure_decoded(getattr(group._v_attrs, 'table_type', None))
Expand All @@ -931,7 +983,7 @@ def error(t):
error('_TYPE_MAP')

# we are actually a table
if table or append:
if fmt == 't':
pt += u('_table')

# a storer node
Expand Down Expand Up @@ -983,7 +1035,7 @@ def error(t):
error('_TABLE_MAP')

def _write_to_group(
self, key, value, index=True, table=False, append=False,
self, key, value, fmt, index=True, append=False,
complib=None, encoding=None, **kwargs):
group = self.get_node(key)

Expand All @@ -994,7 +1046,7 @@ def _write_to_group(

# we don't want to store a table node at all if are object is 0-len
# as there are not dtypes
if getattr(value,'empty',None) and (table or append):
if getattr(value,'empty',None) and (fmt == 't' or append):
return

if group is None:
Expand All @@ -1014,12 +1066,12 @@ def _write_to_group(
group = self._handle.createGroup(path, p)
path = new_path

s = self._create_storer(group, value, table=table, append=append,
s = self._create_storer(group, fmt, value, append=append,
encoding=encoding, **kwargs)
if append:
# raise if we are trying to append to a non-table,
# or a table that exists (and we are putting)
if not s.is_table or (s.is_table and table is None and s.is_exists):
if not s.is_table or (s.is_table and fmt == 's' and s.is_exists):
raise ValueError('Can only append to Tables')
if not s.is_exists:
s.set_object_info()
Expand Down
Loading

0 comments on commit 952a342

Please sign in to comment.