Merge pull request #3804 from jreback/ujson

ENH: add ujson support in pandas.io.json
pandas-dev · Jun 11, 2013 · a7f37d4 · a7f37d4
2 parents e958833 + 8e4314d
commit a7f37d4
Show file tree

Hide file tree

Showing 25 changed files with 7,083 additions and 200 deletions.
diff --git a/LICENSES/ULTRAJSON_LICENSE b/LICENSES/ULTRAJSON_LICENSE
@@ -0,0 +1,34 @@
+Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the ESN Social Software AB nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc)
+http://code.google.com/p/stringencoders/
+Copyright (c) 2007  Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved.
+
+Numeric decoder derived from from TCL library
+http://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms
+ * Copyright (c) 1988-1993 The Regents of the University of California.
+ * Copyright (c) 1994 Sun Microsystems, Inc.
diff --git a/doc/source/api.rst b/doc/source/api.rst
@@ -45,6 +45,16 @@ Excel
    read_excel
    ExcelFile.parse
 
+JSON
+~~~~
+
+.. currentmodule:: pandas.io.json
+
+.. autosummary::
+   :toctree: generated/
+
+   read_json
+
 HTML
 ~~~~
 
@@ -597,6 +607,7 @@ Serialization / IO / Conversion
    DataFrame.to_hdf
    DataFrame.to_dict
    DataFrame.to_excel
+   DataFrame.to_json
    DataFrame.to_html
    DataFrame.to_stata
    DataFrame.to_records

diff --git a/doc/source/io.rst b/doc/source/io.rst
@@ -35,6 +35,7 @@ object.
     * ``read_excel``
     * ``read_hdf``
     * ``read_sql``
+    * ``read_json``
     * ``read_html``
     * ``read_stata``
     * ``read_clipboard``
@@ -45,6 +46,7 @@ The corresponding ``writer`` functions are object methods that are accessed like
     * ``to_excel``
     * ``to_hdf``
     * ``to_sql``
+    * ``to_json``
     * ``to_html``
     * ``to_stata``
     * ``to_clipboard``
@@ -937,6 +939,104 @@ The Series object also has a ``to_string`` method, but with only the ``buf``,
 which, if set to ``True``, will additionally output the length of the Series.
 
 
+JSON
+----
+
+Read and write ``JSON`` format files.
+
+.. _io.json:
+
+Writing JSON
+~~~~~~~~~~~~
+
+A ``Series`` or ``DataFrame`` can be converted to a valid JSON string. Use ``to_json``
+with optional parameters:
+
+- path_or_buf : the pathname or buffer to write the output
+  This can be ``None`` in which case a JSON string is returned
+- orient : The format of the JSON string, default is ``index`` for ``Series``, ``columns`` for ``DataFrame``
+
+  * split   : dict like {index -> [index], columns -> [columns], data -> [values]}
+  * records : list like [{column -> value}, ... , {column -> value}]
+  * index   : dict like {index -> {column -> value}}
+  * columns : dict like {column -> {index -> value}}
+  * values  : just the values array
+
+- date_format : type of date conversion (epoch = epoch milliseconds, iso = ISO8601), default is epoch
+- double_precision : The number of decimal places to use when encoding floating point values, default 10.
+- force_ascii : force encoded string to be ASCII, default True.
+
+Note NaN's and None will be converted to null and datetime objects will be converted based on the date_format parameter
+
+.. ipython:: python
+
+   dfj = DataFrame(randn(5, 2), columns=list('AB'))
+   json = dfj.to_json()
+   json
+
+Writing in iso date format
+
+.. ipython:: python
+
+   dfd = DataFrame(randn(5, 2), columns=list('AB'))
+   dfd['date'] = Timestamp('20130101')
+   json = dfd.to_json(date_format='iso')
+   json
+
+Writing to a file, with a date index and a date column
+
+.. ipython:: python
+
+   dfj2 = dfj.copy()
+   dfj2['date'] = Timestamp('20130101')
+   dfj2.index = date_range('20130101',periods=5)
+   dfj2.to_json('test.json')
+   open('test.json').read()
+
+Reading JSON
+~~~~~~~~~~~~
+
+Reading a JSON string to pandas object can take a number of parameters.
+The parser will try to parse a ``DataFrame`` if ``typ`` is not supplied or
+is ``None``. To explicity force ``Series`` parsing, pass ``typ=series``
+
+- filepath_or_buffer : a **VALID** JSON string or file handle / StringIO. The string could be
+  a URL. Valid URL schemes include http, ftp, s3, and file. For file URLs, a host
+  is expected. For instance, a local file could be
+  file ://localhost/path/to/table.json
+- typ    : type of object to recover (series or frame), default 'frame'
+- orient : The format of the JSON string, one of the following
+
+  * split : dict like {index -> [index], name -> name, data -> [values]}
+  * records : list like [value, ... , value]
+  * index : dict like {index -> value}
+
+- dtype : dtype of the resulting object
+- numpy : direct decoding to numpy arrays. default True but falls back to standard decoding if a problem occurs.
+- parse_dates : a list of columns to parse for dates; If True, then try to parse datelike columns, default is False
+- keep_default_dates : boolean, default True. If parsing dates, then parse the default datelike columns
+
+The parser will raise one of ``ValueError/TypeError/AssertionError`` if the JSON is
+not parsable.
+
+Reading from a JSON string
+
+.. ipython:: python
+
+   pd.read_json(json)
+
+Reading from a file, parsing dates
+
+.. ipython:: python
+
+   pd.read_json('test.json',parse_dates=True)
+
+.. ipython:: python
+   :suppress:
+
+   import os
+   os.remove('test.json')
+
 HTML
 ----
 
@@ -2193,7 +2293,6 @@ into a .dta file. The format version of this file is always the latest one, 115.
 
 .. ipython:: python
 
-   from pandas.io.stata import StataWriter
    df = DataFrame(randn(10, 2), columns=list('AB'))
    df.to_stata('stata.dta')
 

diff --git a/doc/source/v0.11.1.txt b/doc/source/v0.11.1.txt
@@ -16,6 +16,7 @@ API changes
     * ``read_excel``
     * ``read_hdf``
     * ``read_sql``
+    * ``read_json``
     * ``read_html``
     * ``read_stata``
     * ``read_clipboard``
@@ -26,6 +27,7 @@ API changes
     * ``to_excel``
     * ``to_hdf``
     * ``to_sql``
+    * ``to_json``
     * ``to_html``
     * ``to_stata``
     * ``to_clipboard``
@@ -175,6 +177,10 @@ Enhancements
     accessable via ``read_stata`` top-level function for reading,
     and ``to_stata`` DataFrame method for writing, :ref:`See the docs<io.stata>`
 
+  - Added module for reading and writing json format files: ``pandas.io.json``
+    accessable via ``read_json`` top-level function for reading,
+    and ``to_json`` DataFrame method for writing, :ref:`See the docs<io.json>`
+
   - ``DataFrame.replace()`` now allows regular expressions on contained
     ``Series`` with object dtype. See the examples section in the regular docs
     :ref:`Replacing via String Expression <missing_data.replace_expression>`

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -495,6 +495,45 @@ def to_clipboard(self):
         from pandas.io import clipboard
         clipboard.to_clipboard(self)
 
+    def to_json(self, path_or_buf=None, orient=None, date_format='epoch',
+                double_precision=10, force_ascii=True):
+        """
+        Convert the object to a JSON string.
+
+        Note NaN's and None will be converted to null and datetime objects
+        will be converted to UNIX timestamps.
+
+        Parameters
+        ----------
+        path_or_buf : the path or buffer to write the result string
+            if this is None, return a StringIO of the converted string
+        orient : {'split', 'records', 'index', 'columns', 'values'},
+            default is 'index' for Series, 'columns' for DataFrame
+
+            The format of the JSON string
+            split : dict like
+                {index -> [index], columns -> [columns], data -> [values]}
+            records : list like [{column -> value}, ... , {column -> value}]
+            index : dict like {index -> {column -> value}}
+            columns : dict like {column -> {index -> value}}
+            values : just the values array
+        date_format : type of date conversion (epoch = epoch milliseconds, iso = ISO8601),
+            default is epoch
+        double_precision : The number of decimal places to use when encoding
+            floating point values, default 10.
+        force_ascii : force encoded string to be ASCII, default True.
+
+        Returns
+        -------
+        result : a JSON compatible string written to the path_or_buf;
+                 if the path_or_buf is none, return a StringIO of the result
+
+        """
+
+        from pandas.io import json
+        return json.to_json(path_or_buf=path_or_buf, obj=self, orient=orient, date_format=date_format,
+                            double_precision=double_precision, force_ascii=force_ascii)
+
 # install the indexerse
 for _name, _indexer in indexing.get_indexers_list():
     PandasObject._create_indexer(_name,_indexer)

diff --git a/pandas/io/api.py b/pandas/io/api.py
@@ -6,6 +6,7 @@
 from pandas.io.clipboard import read_clipboard
 from pandas.io.excel import ExcelFile, ExcelWriter, read_excel
 from pandas.io.pytables import HDFStore, Term, get_store, read_hdf
+from pandas.io.json import read_json
 from pandas.io.html import read_html
 from pandas.io.sql import read_sql
 from pandas.io.stata import read_stata
diff --git a/pandas/io/common.py b/pandas/io/common.py
@@ -2,6 +2,7 @@
 
 import urlparse
 from pandas.util import py3compat
+from StringIO import StringIO
 
 _VALID_URLS = set(urlparse.uses_relative + urlparse.uses_netloc +
                   urlparse.uses_params)

diff --git a/pandas/io/excel.py b/pandas/io/excel.py
@@ -11,7 +11,7 @@
 
 from pandas.io.parsers import TextParser
 from pandas.tseries.period import Period
-import json
+from pandas import json
 
 def read_excel(path_or_buf, sheetname, kind=None, **kwds):
     """Read an Excel table into a pandas DataFrame