-
-
Notifications
You must be signed in to change notification settings - Fork 18k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ENH: Enable ExcelWriter to construct in-memory sheets #10376
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,7 @@ pytz | |
openpyxl | ||
xlsxwriter | ||
xlrd | ||
xlwt | ||
html5lib | ||
patsy | ||
beautiful-soup | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,7 @@ pytz | |
openpyxl | ||
xlsxwriter | ||
xlrd | ||
xlwt | ||
html5lib | ||
patsy | ||
beautiful-soup | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2130,7 +2130,9 @@ one can pass an :class:`~pandas.io.excel.ExcelWriter`. | |
df1.to_excel(writer, sheet_name='Sheet1') | ||
df2.to_excel(writer, sheet_name='Sheet2') | ||
|
||
.. note:: Wringing a little more performance out of ``read_excel`` | ||
.. note:: | ||
|
||
Wringing a little more performance out of ``read_excel`` | ||
Internally, Excel stores all numeric data as floats. Because this can | ||
produce unexpected behavior when reading in data, pandas defaults to trying | ||
to convert integers to floats if it doesn't lose information (``1.0 --> | ||
|
@@ -2182,6 +2184,45 @@ argument to ``to_excel`` and to ``ExcelWriter``. The built-in engines are: | |
|
||
df.to_excel('path_to_file.xlsx', sheet_name='Sheet1') | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a reference here (so we can link to it) |
||
Writing Excel Files to Memory | ||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
|
||
.. versionadded:: 0.17 | ||
|
||
.. _io.excel_writing_buffer | ||
|
||
Pandas supports writing Excel files to buffer-like objects such as ``StringIO`` or | ||
``BytesIO`` using :class:`~pandas.io.excel.ExcelWriter`. | ||
|
||
.. code-block:: python | ||
|
||
# Safe import for either Python 2.x or 3.x | ||
try: | ||
from io import BytesIO | ||
except ImportError: | ||
from cStringIO import StringIO as BytesIO | ||
|
||
bio = BytesIO() | ||
|
||
# By setting the 'engine' in the ExcelWriter constructor. | ||
writer = ExcelWriter(bio, engine='xlsxwriter') | ||
df.to_excel(writer, sheet_name='Sheet1') | ||
|
||
# Save the workbook | ||
writer.save() | ||
|
||
# Seek to the beginning and read to copy the workbook to a variable in memory | ||
bio.seek(0) | ||
workbook = bio.read() | ||
|
||
.. note:: | ||
|
||
``engine`` is optional but recommended. Setting the engine determines | ||
the version of workbook produced. Setting ``engine='xlrd'`` will produce an | ||
Excel 2003-format workbook (xls). Using either ``'openpyxl'`` or | ||
``'xlsxwriter'`` will produce an Excel 2007-format workbook (xlsx). If | ||
omitted, an Excel 2007-formatted workbook is produced. | ||
|
||
.. _io.clipboard: | ||
|
||
Clipboard | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,11 +9,13 @@ | |
import abc | ||
import numpy as np | ||
|
||
from pandas.core.frame import DataFrame | ||
from pandas.io.parsers import TextParser | ||
from pandas.io.common import _is_url, _urlopen | ||
from pandas.tseries.period import Period | ||
from pandas import json | ||
from pandas.compat import map, zip, reduce, range, lrange, u, add_metaclass | ||
from pandas.compat import (map, zip, reduce, range, lrange, u, add_metaclass, | ||
BytesIO, string_types) | ||
from pandas.core import config | ||
from pandas.core.common import pprint_thing | ||
import pandas.compat as compat | ||
|
@@ -417,10 +419,13 @@ def _parse_cell(cell_contents,cell_typ): | |
if parse_cols is None or should_parse[j]: | ||
row.append(_parse_cell(value,typ)) | ||
data.append(row) | ||
|
||
|
||
if sheet.nrows == 0: | ||
return DataFrame() | ||
|
||
if header is not None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what about There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I added a test for this case and it works correctly already. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yep, saw that below. thanks. |
||
data[header] = _trim_excel_header(data[header]) | ||
|
||
parser = TextParser(data, header=header, index_col=index_col, | ||
has_index_names=has_index_names, | ||
na_values=na_values, | ||
|
@@ -474,6 +479,8 @@ def _conv_value(val): | |
val = bool(val) | ||
elif isinstance(val, Period): | ||
val = "%s" % val | ||
elif com.is_list_like(val): | ||
val = str(val) | ||
|
||
return val | ||
|
||
|
@@ -497,6 +504,11 @@ class ExcelWriter(object): | |
datetime_format : string, default None | ||
Format string for datetime objects written into Excel files | ||
(e.g. 'YYYY-MM-DD HH:MM:SS') | ||
|
||
Notes | ||
----- | ||
For compatibility with CSV writers, ExcelWriter serializes lists | ||
and dicts to strings before writing. | ||
""" | ||
# Defining an ExcelWriter implementation (see abstract methods for more...) | ||
|
||
|
@@ -521,9 +533,13 @@ class ExcelWriter(object): | |
# ExcelWriter. | ||
def __new__(cls, path, engine=None, **kwargs): | ||
# only switch class if generic(ExcelWriter) | ||
if cls == ExcelWriter: | ||
if issubclass(cls, ExcelWriter): | ||
if engine is None: | ||
ext = os.path.splitext(path)[-1][1:] | ||
if isinstance(path, string_types): | ||
ext = os.path.splitext(path)[-1][1:] | ||
else: | ||
ext = 'xlsx' | ||
|
||
try: | ||
engine = config.get_option('io.excel.%s.writer' % ext) | ||
except KeyError: | ||
|
@@ -574,7 +590,11 @@ def save(self): | |
def __init__(self, path, engine=None, | ||
date_format=None, datetime_format=None, **engine_kwargs): | ||
# validate that this engine can handle the extension | ||
ext = os.path.splitext(path)[-1] | ||
if isinstance(path, string_types): | ||
ext = os.path.splitext(path)[-1] | ||
else: | ||
ext = 'xls' if engine == 'xlwt' else 'xlsx' | ||
|
||
self.check_extension(ext) | ||
|
||
self.path = path | ||
|
@@ -1159,7 +1179,7 @@ class _XlwtWriter(ExcelWriter): | |
def __init__(self, path, engine=None, encoding=None, **engine_kwargs): | ||
# Use the xlwt module as the Excel writer. | ||
import xlwt | ||
|
||
engine_kwargs['engine'] = engine | ||
super(_XlwtWriter, self).__init__(path, **engine_kwargs) | ||
|
||
if encoding is None: | ||
|
@@ -1311,6 +1331,8 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): | |
style_dict = {} | ||
|
||
for cell in cells: | ||
val = _conv_value(cell.val) | ||
|
||
num_format_str = None | ||
if isinstance(cell.val, datetime.datetime): | ||
num_format_str = self.datetime_format | ||
|
@@ -1336,7 +1358,7 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): | |
else: | ||
wks.write(startrow + cell.row, | ||
startcol + cell.col, | ||
cell.val, style) | ||
val, style) | ||
|
||
def _convert_to_style(self, style_dict, num_format_str=None): | ||
""" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
# pylint: disable=E1101 | ||
|
||
from pandas.compat import u, range, map, openpyxl_compat | ||
from pandas.compat import u, range, map, openpyxl_compat, BytesIO, iteritems | ||
from datetime import datetime, date, time | ||
import sys | ||
import os | ||
|
@@ -455,7 +455,7 @@ def test_reading_multiple_specific_sheets(self): | |
def test_creating_and_reading_multiple_sheets(self): | ||
# Test reading multiple sheets, from a runtime created excel file | ||
# with multiple sheets. | ||
# See PR #9450 | ||
# See PR #9450 | ||
|
||
_skip_if_no_xlrd() | ||
_skip_if_no_xlwt() | ||
|
@@ -471,7 +471,7 @@ def tdf(sheetname): | |
|
||
with ensure_clean('.xlsx') as pth: | ||
with ExcelWriter(pth) as ew: | ||
for sheetname, df in dfs.iteritems(): | ||
for sheetname, df in iteritems(dfs): | ||
df.to_excel(ew,sheetname) | ||
dfs_returned = pd.read_excel(pth,sheetname=sheets) | ||
for s in sheets: | ||
|
@@ -520,6 +520,29 @@ def test_reader_seconds(self): | |
actual = read_excel(epoch_1904, 'Sheet1') | ||
tm.assert_frame_equal(actual, expected) | ||
|
||
# GH6403 | ||
def test_read_excel_blank(self): | ||
_skip_if_no_xlrd() | ||
|
||
blank = os.path.join(self.dirpath, 'blank.xls') | ||
actual = read_excel(blank, 'Sheet1') | ||
tm.assert_frame_equal(actual, DataFrame()) | ||
|
||
blank = os.path.join(self.dirpath, 'blank.xlsx') | ||
actual = read_excel(blank, 'Sheet1') | ||
tm.assert_frame_equal(actual, DataFrame()) | ||
|
||
def test_read_excel_blank_with_header(self): | ||
_skip_if_no_xlrd() | ||
|
||
expected = DataFrame(columns=['col_1', 'col_2']) | ||
blank = os.path.join(self.dirpath, 'blank_with_header.xls') | ||
actual = read_excel(blank, 'Sheet1') | ||
tm.assert_frame_equal(actual, expected) | ||
|
||
blank = os.path.join(self.dirpath, 'blank_with_header.xlsx') | ||
actual = read_excel(blank, 'Sheet1') | ||
tm.assert_frame_equal(actual, expected) | ||
|
||
class ExcelWriterBase(SharedItems): | ||
# Base class for test cases to run with different Excel writers. | ||
|
@@ -1218,6 +1241,30 @@ def test_datetimes(self): | |
|
||
tm.assert_series_equal(write_frame['A'], read_frame['A']) | ||
|
||
# GH7074 | ||
def test_bytes_io(self): | ||
bio = BytesIO() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. awesome. can you add a small section to the excel docs (io.rst) showing writing to bytes? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added something |
||
df = DataFrame(np.random.randn(10, 2)) | ||
writer = ExcelWriter(bio) | ||
df.to_excel(writer) | ||
writer.save() | ||
bio.seek(0) | ||
reread_df = pd.read_excel(bio) | ||
tm.assert_frame_equal(df, reread_df) | ||
|
||
# GH8188 | ||
def test_write_lists_dict(self): | ||
df = pd.DataFrame({'mixed': ['a', ['b', 'c'], {'d': 'e', 'f': 2}], | ||
'numeric': [1, 2, 3.0], | ||
'str': ['apple', 'banana', 'cherry']}) | ||
expected = df.copy() | ||
expected.mixed = expected.mixed.apply(str) | ||
expected.numeric = expected.numeric.astype('int64') | ||
with ensure_clean(self.ext) as path: | ||
df.to_excel(path, 'Sheet1') | ||
read = read_excel(path, 'Sheet1', header=0) | ||
tm.assert_frame_equal(read, expected) | ||
|
||
def raise_wrapper(major_ver): | ||
def versioned_raise_wrapper(orig_method): | ||
@functools.wraps(orig_method) | ||
|
@@ -1512,6 +1559,7 @@ class XlsxWriterTests_NoMerge(ExcelWriterBase, tm.TestCase): | |
|
||
|
||
class ExcelWriterEngineTests(tm.TestCase): | ||
|
||
def test_ExcelWriter_dispatch(self): | ||
with tm.assertRaisesRegexp(ValueError, 'No engine'): | ||
ExcelWriter('nothing') | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
did you mean to take this out?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes - both 1.x and 2.x are clearly supported and tested in excel.py and test_excel.py. I suppose the person who added the OpenPyXL 2.x path forgot to change this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
hmm, IIRC there were some issues with >=2.0.0 and < 2.0.3 something like that (e.g. some style sheet issues. But ok, past that now, so ok.