From 92203096ceba0cfea72bb28d434f34f58e68bb0a Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Tue, 16 Jun 2015 18:05:53 -0400 Subject: [PATCH] ENH: Enable ExcelWriter to construct in-memory sheets Add support for StringIO/BytesIO to ExcelWriter Add vbench support for writing excel files Add support for serializing lists/dicts to strings Fix bug when reading blank excel sheets Added xlwt to Python 3.4 builds closes #8188 closes #7074 closes #6403 closes #7171 closes #6947 --- ci/requirements-3.4.txt | 1 + ci/requirements-3.4_SLOW.txt | 1 + doc/source/install.rst | 3 +- doc/source/io.rst | 43 +++++++++++++++- doc/source/whatsnew/v0.17.0.txt | 6 +++ pandas/core/frame.py | 3 ++ pandas/io/excel.py | 38 +++++++++++--- pandas/io/tests/data/blank.xls | Bin 0 -> 23040 bytes pandas/io/tests/data/blank.xlsx | Bin 0 -> 8379 bytes pandas/io/tests/data/blank_with_header.xls | Bin 0 -> 23040 bytes pandas/io/tests/data/blank_with_header.xlsx | Bin 0 -> 8773 bytes pandas/io/tests/test_excel.py | 54 ++++++++++++++++++-- vb_suite/packers.py | 44 ++++++++++++++++ 13 files changed, 179 insertions(+), 14 deletions(-) create mode 100755 pandas/io/tests/data/blank.xls create mode 100755 pandas/io/tests/data/blank.xlsx create mode 100755 pandas/io/tests/data/blank_with_header.xls create mode 100755 pandas/io/tests/data/blank_with_header.xlsx diff --git a/ci/requirements-3.4.txt b/ci/requirements-3.4.txt index 24af93fb16194..fd0a5bc53dd7e 100644 --- a/ci/requirements-3.4.txt +++ b/ci/requirements-3.4.txt @@ -3,6 +3,7 @@ pytz openpyxl xlsxwriter xlrd +xlwt html5lib patsy beautiful-soup diff --git a/ci/requirements-3.4_SLOW.txt b/ci/requirements-3.4_SLOW.txt index 6372d9b4f6068..ecc31dad78d07 100644 --- a/ci/requirements-3.4_SLOW.txt +++ b/ci/requirements-3.4_SLOW.txt @@ -3,6 +3,7 @@ pytz openpyxl xlsxwriter xlrd +xlwt html5lib patsy beautiful-soup diff --git a/doc/source/install.rst b/doc/source/install.rst index b3f86db5e3e59..1ba82bf60f128 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -249,10 +249,9 @@ Optional Dependencies * `statsmodels `__ * Needed for parts of :mod:`pandas.stats` * `openpyxl `__, `xlrd/xlwt `__ - * openpyxl version 1.6.1 or higher, but lower than 2.0.0 * Needed for Excel I/O * `XlsxWriter `__ - * Alternative Excel writer. + * Alternative Excel writer * `boto `__: necessary for Amazon S3 access. * `blosc `__: for msgpack compression using ``blosc`` diff --git a/doc/source/io.rst b/doc/source/io.rst index 73a2f2f1d3531..9852822c556dc 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2130,7 +2130,9 @@ one can pass an :class:`~pandas.io.excel.ExcelWriter`. df1.to_excel(writer, sheet_name='Sheet1') df2.to_excel(writer, sheet_name='Sheet2') -.. note:: Wringing a little more performance out of ``read_excel`` +.. note:: + + Wringing a little more performance out of ``read_excel`` Internally, Excel stores all numeric data as floats. Because this can produce unexpected behavior when reading in data, pandas defaults to trying to convert integers to floats if it doesn't lose information (``1.0 --> @@ -2182,6 +2184,45 @@ argument to ``to_excel`` and to ``ExcelWriter``. The built-in engines are: df.to_excel('path_to_file.xlsx', sheet_name='Sheet1') +Writing Excel Files to Memory +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 0.17 + +.. _io.excel_writing_buffer + +Pandas supports writing Excel files to buffer-like objects such as ``StringIO`` or +``BytesIO`` using :class:`~pandas.io.excel.ExcelWriter`. + +.. code-block:: python + + # Safe import for either Python 2.x or 3.x + try: + from io import BytesIO + except ImportError: + from cStringIO import StringIO as BytesIO + + bio = BytesIO() + + # By setting the 'engine' in the ExcelWriter constructor. + writer = ExcelWriter(bio, engine='xlsxwriter') + df.to_excel(writer, sheet_name='Sheet1') + + # Save the workbook + writer.save() + + # Seek to the beginning and read to copy the workbook to a variable in memory + bio.seek(0) + workbook = bio.read() + +.. note:: + + ``engine`` is optional but recommended. Setting the engine determines + the version of workbook produced. Setting ``engine='xlrd'`` will produce an + Excel 2003-format workbook (xls). Using either ``'openpyxl'`` or + ``'xlsxwriter'`` will produce an Excel 2007-format workbook (xlsx). If + omitted, an Excel 2007-formatted workbook is produced. + .. _io.clipboard: Clipboard diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 6f7e9bce0a3a6..fc2e6b1cb936f 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -38,6 +38,8 @@ Backwards incompatible API changes Other API Changes ^^^^^^^^^^^^^^^^^ +- Enable writing Excel files in :ref:`memory <_io.excel_writing_buffer>` using StringIO/BytesIO (:issue:`7074`) +- Enable serialization of lists and dicts to strings in ExcelWriter (:issue:`8188`) .. _whatsnew_0170.deprecations: @@ -53,11 +55,15 @@ Removal of prior version deprecations/changes Performance Improvements ~~~~~~~~~~~~~~~~~~~~~~~~ +- Added vbench benchmarks for alternative ExcelWriter engines and reading Excel files (:issue:`7171`) .. _whatsnew_0170.bug_fixes: Bug Fixes ~~~~~~~~~ - Bug in ``DataFrame.apply`` when function returns categorical series. (:issue:`9573`) + + - Bug in ``pd.Series`` when setting a value on an empty ``Series`` whose index has a frequency. (:issue:`10193`) - Bug in ``DataFrame.reset_index`` when index contains `NaT`. (:issue:`10388`) +- Bug in ``ExcelReader`` when worksheet is empty (:issue:`6403`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cf7f1fa033f6e..a4e4cf612ca85 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1246,6 +1246,9 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', >>> df1.to_excel(writer,'Sheet1') >>> df2.to_excel(writer,'Sheet2') >>> writer.save() + + For compatibility with to_csv, to_excel serializes lists and dicts to + strings before writing. """ from pandas.io.excel import ExcelWriter if self.columns.nlevels > 1: diff --git a/pandas/io/excel.py b/pandas/io/excel.py index cab342dc339f4..d58d6590b96c0 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -9,11 +9,13 @@ import abc import numpy as np +from pandas.core.frame import DataFrame from pandas.io.parsers import TextParser from pandas.io.common import _is_url, _urlopen from pandas.tseries.period import Period from pandas import json -from pandas.compat import map, zip, reduce, range, lrange, u, add_metaclass +from pandas.compat import (map, zip, reduce, range, lrange, u, add_metaclass, + BytesIO, string_types) from pandas.core import config from pandas.core.common import pprint_thing import pandas.compat as compat @@ -417,10 +419,13 @@ def _parse_cell(cell_contents,cell_typ): if parse_cols is None or should_parse[j]: row.append(_parse_cell(value,typ)) data.append(row) - + + if sheet.nrows == 0: + return DataFrame() + if header is not None: data[header] = _trim_excel_header(data[header]) - + parser = TextParser(data, header=header, index_col=index_col, has_index_names=has_index_names, na_values=na_values, @@ -474,6 +479,8 @@ def _conv_value(val): val = bool(val) elif isinstance(val, Period): val = "%s" % val + elif com.is_list_like(val): + val = str(val) return val @@ -497,6 +504,11 @@ class ExcelWriter(object): datetime_format : string, default None Format string for datetime objects written into Excel files (e.g. 'YYYY-MM-DD HH:MM:SS') + + Notes + ----- + For compatibility with CSV writers, ExcelWriter serializes lists + and dicts to strings before writing. """ # Defining an ExcelWriter implementation (see abstract methods for more...) @@ -521,9 +533,13 @@ class ExcelWriter(object): # ExcelWriter. def __new__(cls, path, engine=None, **kwargs): # only switch class if generic(ExcelWriter) - if cls == ExcelWriter: + if issubclass(cls, ExcelWriter): if engine is None: - ext = os.path.splitext(path)[-1][1:] + if isinstance(path, string_types): + ext = os.path.splitext(path)[-1][1:] + else: + ext = 'xlsx' + try: engine = config.get_option('io.excel.%s.writer' % ext) except KeyError: @@ -574,7 +590,11 @@ def save(self): def __init__(self, path, engine=None, date_format=None, datetime_format=None, **engine_kwargs): # validate that this engine can handle the extension - ext = os.path.splitext(path)[-1] + if isinstance(path, string_types): + ext = os.path.splitext(path)[-1] + else: + ext = 'xls' if engine == 'xlwt' else 'xlsx' + self.check_extension(ext) self.path = path @@ -1159,7 +1179,7 @@ class _XlwtWriter(ExcelWriter): def __init__(self, path, engine=None, encoding=None, **engine_kwargs): # Use the xlwt module as the Excel writer. import xlwt - + engine_kwargs['engine'] = engine super(_XlwtWriter, self).__init__(path, **engine_kwargs) if encoding is None: @@ -1311,6 +1331,8 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): style_dict = {} for cell in cells: + val = _conv_value(cell.val) + num_format_str = None if isinstance(cell.val, datetime.datetime): num_format_str = self.datetime_format @@ -1336,7 +1358,7 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): else: wks.write(startrow + cell.row, startcol + cell.col, - cell.val, style) + val, style) def _convert_to_style(self, style_dict, num_format_str=None): """ diff --git a/pandas/io/tests/data/blank.xls b/pandas/io/tests/data/blank.xls new file mode 100755 index 0000000000000000000000000000000000000000..952c76f045e8af5f100b8c2690ecf8d604770a64 GIT binary patch literal 23040 zcmeHPeQ;FO6+dsYn`{Ul2_NB8cmx7T2oZJxQ9hOrMI4ro5gc^H!6n&*3?X5bjSVw0 z#Mb^(snvwyRMJ|jPPJOU6+5*BoR0R7wRKt;D`hNpsAI=YJJNPq9d-LV_wC!g@4k=h zR_&Dbz07_4?s@nA&bjBFd)~e0-Iq6hS#$ouXQsSE%CVBl=+kT^l}mID*YnJLHQ|0X z%UScc3D+R`$0ekJ$}l>zJVYIl*DFr-YgCR7e}@9}bnq0;M88McLHlS7(bnC`eftur zwq|`p^0v-y{R=(T2pJL@%Wx8gw?Zo?4YV>E!nuYf3+cv+^Eh#SN|b$BoLA6TI53k*NO9JoHaTw>YNpng*2Vcp?sL`VRH}A*$QnfC8&!!X&0q% z{%`=nmmf)}@&Ot{i7b^y9tY%n^jUTqe@?lG?9HxXu zF-0G$q#7+6rUapcckgRdIV{GAVK4$TahMVstkBxGYFh|$xJf!aIE_N!L3Ok1W;M^6 zwP0uCvK=GQ^)oeisYf=dfE;ty5jdQG}qsY$ogh*2|LWo2fx zz{*dECb5jg3QU*anYU14YSz#5H7g6&g_`GSJVpi4-?O^-HBKjWrn74i^uL+6yy-l^ zoyxn>8P%!$|1w=8ottMI0Yjf5sjXKXjr0z=imc2{V5mv6E5^` zyU_1)p}WcBhX0(vnPAh;cUGTyE;-+b6?ecRTMlb%Gc!S?d>OKoR~quwN6uqCPWl;d z(obWGM=SGs7;Dm{cHJHxPq(vz^2bW**l;#;Fg%|QTIgILK%?O2%W%cMo*$BUyflr* z6FFe7z(q=`U`+T*;J|{E#35K}1$L<=ao|2FsRA2qNfo$LN~*wSUQz`fUnNywhf-1n z9vLN7;GtJi1s+Q!Rbb~=QU#6~lvJU))C%)UtUPb4;rh0^yuH;elz)hjrZ!f{FF;AlaBVNVFJ`NYo0VI>AK%DRjE6Id;((QLUA%bFCR8jI zvk4{BOre;TlZi~BWSU(ltmq`6K04^4ZYQB+t$a33siScTgfd1WYu_f6$t>Hi)oW8QJlTw~?`TvI*kuw|HV)0I^BOIxeGRyDXhulG zka22`>ipHwGpiU7!erR%S;mX0B6Q*ng80|)Aep!~2;yH72r|w(@dlY)3!Assv>ma4KlSD zkWAbgWTpiK8E2iyAcKLiRE@_MMHbZS^YzuOt;z26!h!Hc8erAs4lOu6I||YD1yoNu z%|}{It@uk)H>Ej>4Fza4k}879_H*xkkPY!QKW#7zsOQ)Ov#C+pMC@$pJlGg(5Wyyt zY`=K!x6W+Fs%)ZmHgi1K7^@V)raamH{s(V6vl*wdS!8F^&c;$d;Cg|K`HW&TJ;D zY#c0Yo(CJ*(lYVP$+OOErl@QjEbVd+HnOE<;>X{9&za3sm5qa?MLpQamX?WseCBy) zHq)5R5^qb>!DdMjmKKSA^SL(-Htf)FfZ_W~yu) zENzho8`;t#(T`4i!->sEyq?CqEC1YFtvoj`%T~BPD~UGW@e^mzSp`s7lnOdi0hO%? zuRgYHbOxPW0EPXip!Eu!N3^os=(WbwY!t8d-S!* zuC9UbAw{l<*%6RyHztFvvHDrx*LRQey7gN(b^aTKX$cqeZDvksWEPVdXV}F2@Xv13?Zz=i4AoAMAmHS7z^XXXnhT5^Y0x=GvYf80s=B zt~Mnbq-&wH9>r8SB5q=VnTa(T^L?x!)+ZBfo!uQe)(VQCW!2E^dI-d}-w8oGAfT~E z5GH^DGvm601{|AW1tc>Z;b34i78w#ae4h$l;{a~>KNs+9?E5Ep4$t6G85}-P1t04G zZumhLaARr^5a5esaQH&SA(V}C0B3(_^jL+gmchUXTFhar>$>;#!W4R~T^1OR_FM|I~#XGzAB-?W1e}f{-B>1Jc=;%G@^frqJD9c-8 z-WQqPheO=uP~1qm0`k+QWN$i^fO}CKvo;Nmc@u!Sr774HI~HR_Bm5~|mYD4w!W)&G z!Na8b3BxgJl~p6K+yqNc2?xPYIqhq>EG$+lu`dx2n|jj8f$%OXMqmWS3uB}q6vrA( z&&z>o)1`5cx!`Dxoaxexoaxex++d)JuH+cq=H9gE@h>S%Cjff0(1E?rWtYz$($eQl zC`e-DOi7HKDT$F|hj~?h2)dhX6(JNtM{GfxQ~8BItBg%`8Jp_1&U6>Fwa{9tYz(?* z8>}OZj$?ht(_}d1MU81#HA@Tun zwGnM+XV?q7i$IH=bepiCLJ1y09e9dzIAsalDz*-P6|H)aUn#>fuuD3bUdWW?{6^EH zM40jldsmYZy+1sO?w8~;OxxHn@({LVKP27bdWT`^8vdTtZpOgE`(;gAyCv1rkxJ~< z*L8L!_v;OPOP4g3ZQT?{zB|#~+L`F$Twm^dedn%SJ>4yRU5UN0U_(&zB4S=dZ{iV! z!}!&5zX7lf@^}tTMCg&l;Nz4QMj|@1G(aDVkfb2vtx^EF)^IJ7LE&?4?lM3}(Y&M5 zoi%^Z=oo0nBpP!#4BBCd#sIwM!q~N5ZP~S672uaBh2!UVMKlU>@URb{Q^$zw8k`Bw z@i2Y-*ayGbxNA#nr=T|p`aE&|vY_-!?WxCLBWl*}z@1aU@wlFV^c7M1PH~=t((U4S zH^!S`&Fh9d%6qX!>H0z5E(=7~qy~+%2BPBlb7eW8n+x1*uyvsmo9Cm@F3$F{{|@0?vQ+ zfr}q*-d+9pye=7Zp4PAFYfW|u8TwDW5Y9gOhPDcLbfor`vd7-g za-NJS{^4uD*MP49Ujx1dd=2;-@HOCTz}JAU0bc{Y2LA6g;I;k_oO%7sP|MWn!TYfO zZ~pK}UjOq4th_Ig*Z*6P_+zjek$9i=ok;wF+hHVL+aE#Vbw98FIrjT762}9cK;rno z86-I#FdO-8yOa72En8dktCRR&Bk8@^<*4_x>uHd;rILyLy{QAaDCKCu3sqcyr$Y#i z18}^;h;|n)BZbQ-jYL5fgo6^?fbBi0J^MM1zhBh%V+qiSfA|{kHQ;N&*MP49Ujx1d zd=2;-@HOCTz}LVftpQ%g^U{~sxOgqkyZd;1qgWmDJAGd3^NuTC7W0~&*YCW?h}ZqR z#;-x*cmBN2H}^&HTA$+q{GBm>SIhq&mcN_iPs{mJYF=i~K+=(BBJurM_~rF~J<=Sc zxkwF2jYxd|6D0oikIz>F*JFDl@7UC_Wu3nfMwj({vXQGqhOzVsv z>zVt}q-+QG6h24TCFIy+l8gSue}K*NXBmpExIVXYE<{_!W>5YOf!ofzwcCYEOeuW0 t;fC7){yc_w{1nTdzpEC52R|XY8r=9pk-gY@$~{n;JWKzL_SZ@L{{?{WA}{~| literal 0 HcmV?d00001 diff --git a/pandas/io/tests/data/blank.xlsx b/pandas/io/tests/data/blank.xlsx new file mode 100755 index 0000000000000000000000000000000000000000..73f6ba6d29af3c8764f2aa9bb3291a43d0650074 GIT binary patch literal 8379 zcmeHMXH-*bvko9#dX*|2K_C?AB2}6oy^ACy(n1eKs`M_Qw+I5^pdd(>ju8ZQtHom08jx4u+5d6o#0R>_QP-g5Hj>t9h5Od7lEXG5`PP_z(U9-N{|rtw3^> z1GUqT1p$qZw1jL!Rm3cs%Xxl}qIjYVJ8EQ|>@KpSPD~7PpXE`)z;CnbmX3u*vrEle zjmULm%(p>upDx2zl50HBGfP6_H*M$(3t6IvlEI~=V# z_-gm2m-TMD@fjI0>sBzc6?RjH#O-@4iPtW3!TD!!JRds4#T59hM6b*h!`_FRFz&3p znQN_62{1!-6cqE_3wB$mD4=>bfR7+zd^DCR8~2dEdg??DeHYC;2y84%a0*mEyu+Fw zI5)o^@2Bi@adq(Ez&H^SPz#7<#POb;a#FdQuyWQLysMVN^vUnI*`Q&)JBd8>*{seB z|M+uxl%R{;_OsQeYobI3Wag-5@goeYE-r8YI)5Q$wIPt>D+XzIFjt8&q`dD2g}Dm} zemnm|#{Xaj{`BgxiG3I{UT)t~YgGDtvg)KpkXz7jN3DU)faNvkCTn$EKBxS_eke|` zhV|sY{!-s9^Xbh+I|EiGu#k3ZJ+DH!DA;$@-n%5ni0mnt-Nuf(gW)j~$arcRcHbM7 zoCO{|c~%hfbWZ6 zBzG)(RPjno%h_9K(I1?q9V?$rCDY}(p!9(FdEex-OvNts5Sm@&2a5M6?AAEh+!Gde z_bzHAUD-9G`qe(|%FaZ)%df2k3Fg>7KWyriohuBT)q)&?&~M2M?qgp3?<{EuQl8kx zTv5dV02lxSSl%$9e-XvY+06m$?CkKZc>PHlEKCK%l)3-xqcO3^9+SaCx564j#(W!V zKBQ8zFX<80rS4&cHl4H8a8^ySw)lq;JK><4LM<-ktBA4|k8iYuTl(vK>NO=XO$=r? zuP>^M3x74=bxcSdQ=YQ&go}hjcs*u#YBR-$V~66^6C0X)*BxwCPH7XmQt?`&b|Ld9 zPY{q2YTKXn1;X$~R*O~m^y{O!ZhdAxl35)`d1h7Cq>BSjkExWs_0EH!7Z#DP@6u_1 zq*NEXk;^s!yf3Vsq(vEpCDi&^BK&=tRRv=!L=#9h8Vb5{GFEvMAF!u)dUj);t0pbr z15eZksh5Dh)%$)JMEo5ueZgNKU$gx9~x;6Ctq%_k= z@zF9ao9;wqlti3i$75JTRbXGct|kSO{; zJ^x*}L_r%q2-QC#uW|R&iv2_1O4}DQekQ(G)vWw9V#tf6tmXKhQa9W118d)x@TmYO@i;t4altr?7{@N2`&c7DLjo{^<xU!A7n7ao{4&$4IDDJpS*KWfV2hkZ2LHxIvW8xYoh9=sTshi`1VF0@Qf74w<* z8kKbGiwR1V%c|Ba&NfRk&?Aht{ChY~zy*%UcmO~l<2U8`Jsfabs3TP9`&sxKdDl(v zI!}sIw~&o_U4=V4^3*Wmdqj_wXO(C%wmC(x*kz=e=;z4~s{1oe%s;|vtp}xHHEaN3 z{QcMs@EmUQh!UJ*nte5m3i>Y1K|IEh;Rt2+K);Kbc@B@S6CshgO6;5qu9L<0N_`^R z@p0FG~ZR{KwCYruOCtMA71l^lC z9rRcU-huaQQ=@rp%+^NDjXm9t@m>#!l#Z@>_*Nscv8-nBwMwq5WPUxrU6M3#{n7fo z$c9t3Zb`!-_Ssji4D%=2g##MUa%GZtawBQmjKdH7eS6T1h_@1{+h2PLi2KeP?zMXH85 z>QT~sMBii9bNK1$t(S*n71 zywU>s52@TOl{dHTB-(WK!kPOeuY+$P^a*rh_v`pVwp=BY_GAvRI{2sj(h+&i6}^WtE?;J{xLRf!a>HeA!rL3Z!FGh3YTmI#RURd3amd=ITpFSa6o!2U0Y??KO>_UI8vj?V|Q%N&kIwLo!BkTz1Vd#}fPbjZx_^P`| zJ0BJUPq@63j*=Vn5juJH0Ga%@Gm+hljPdgEx_9(2(MPk)bw(GNn55|SRsJ#Pv@ewq zl%jE)Qb#7^@pVCNo|pmMNrxLF3@l$<2lu&M48l8jv0iFU-2v0S_D^~lN^+u~G!8j> zlT~s@Sc1^r z@`T^hxf09>fc@(CICGl`S3@ z+;X`QmhD+j6DyQx!G>$DBB>!S;qw`~e#!I+tO0D}Mj(j-uGi3F%hibEmL4h1R@4n7 z=P(ut;%OP{a~<*uGU$8dnQLwKE7MT*ieyC8v*22ha&Y`RHjvUioGc&b108}~ry_MT zyvv)A)4DA?yg2Ns&(ym;&ml+C4(^|FHvM2CA$m^C;v`xRE-cy_Z z?6u|f6xoM8D8EHnSCA8#n5UsP0r<#5W2@YcJXXG9UVLzhdecENQ`%sJ|Dn0Ce<%6-HCQF`9z@ho*41g+k%(Lf;pWZv}PGHFgRU!ly5-gt5&*wn?Hi3n4Cg8 zR`I2mWmL*lz3kLX@nuIF(xTXw-nw@m&`4Gi)eE5qO{2JT%|h#)u^s-!wX0L4mVuH+ zidAL;i9Dy3H?LmEbcG<>SlE)p*=-~Rs;714j9^8X!`MZ;(FwwQQ@ z{(G)`SADUGUEg*oDhFZbA^mcxNpe(kp^5}Oc{;}vU0=p%6U1(3iuKHRtt0f_am&Ps zxz`>%UhHDsYLjS#2vfTu7xd(TCgc6oUv{l;`XsX_kfwOjKSkiHxz*J+XKo2Q=@ZKb z6SC1%aIHSj;QA0edaacdyJd*JiCfS*Uyn|~xE0}LpSX0;>A?6o0T`18EGsPuqRD&` zA+M@-sW9qm;fUvo^6nxfa@4h9?KYl&N93TiW&{>rm_(%(6>xR1d@E0Y-#G*-yz686z0dES)EQc>t{Mv7-*wV?}v{c{&bF6eZb~E zyVFTK$^C`1XVO{s$i`c4>nfCZJ)U;CxLjiw#pbnM@Jg(hlwU2p5^_hp?u4p*5S1~p zf@cYb!PMR;+=Sm$qlizJofid-_Q?S&l-c(RixwPloNx3wp`KB!80^_!ocGPq{f;yJ z-D=5dn8>VO0su(QYsE|g$-}q8&hc{_I6Aay)XSO>5E9}^JvE{x zzMA#O;fRFKLv|~HTkkxPp*O3_=p4itZu;1Wxt)$_1Zj7hlo|#XCXsXe;dal(kn8Y7 zj+mC-RMk)+%;-AW=E08j@s1K~%Cu}MK{x`C3k#0#9p+br1waxfqDnJ@Q}h_sneS++ zz#QoXUd(XfM!ne%QtPDIB;?PyAXC=cofuLmg=#V1azRhkS%WxthusURa`CbBCRbt? z={zMKhNQ_#Oso!S0aW3pUyx0Pb)1^mC^5lmH`;}IT~d2o+rT%ghATOIyPDJEss-g| zVcG_dY@*c}mdtEJm<;ejJy=w@);PoswnJv+UZ4_V+_xS#=JPL-^GKl_e+lYSXkIy zH(8N?Hak2JuRXeb({|};r=5k1nVkx5Iw~R-50xa!=Gj~qknFWyqg<4b0q;`@&eig# z5ai|*Nx%6r$NOBM#HNKYX5vdWSsUZHS6;__Ld({rz^^YAxn?2jJW$ zE9JOZ!t=AL^Tv;IP~9?u(HN`!sGg7ozb2B!${wGFo>UOE)A>i)Dlgf}O)o_IcA1gU zYilv33Z>|VVj;9dl4ZbX{#C}(v&2i~^Ui!{#J_`_!je+oZA?ON#=wpO)4+i^TkE(v zySNKkJG()@!4A{x{HJe&;bLr}u4XHcI&22_Lg{^_V`iKq6x8+u4Dm3RyE!9_L1ubAx z<#DmK{5TEysAFmBU7dop##PIUsLKEN*>ilx26X>@ zAd?gv=7Hp4Tf`@k{{{T3@suC{CCzCWw@{ku0gFyAXO3aTWzC4eoG}d@eJ5e z6x@)i+i-{W;G*0Vwy26I4X8iAFw0rcc+ntHjS;gs@Lj6iaU7ZuovbrmJ1Q^DRVs4| zv$Y$3O~(>5tzv}^lpM9?5}wwRwOw#5qe;x+!6mVAPf)FA8X9m|SLqx-edo@_IM!yR z`<@$MlZMxVFC{ynZO?jX%VjD^aZSBHaK2LD+QU>VJY=lrpo1L{Oj5G~9El@dew*}F zC>QVbYg4g1J>^K>rRrvy;sNp)hk0^fkn$`;dHSRj(e*_D+U7bA;Y z>-RvfC}(90*SPx}R&rzWZ?0h;G@j#24|&D&0x9{2`;9c=B|gaJ$0i@SSNP(HMb}(C zk2vR8F|qoW3dX_~0Q|e*?w=9;bN$VzS4Z=&4*oib`qS{+nu4LtPh+WH4gcDk|Jkqs z)AIZGo%&z>{Mxhq;b{#szy76z`>XM<9mXHVK19FmH~#A2*WKL@2e&Bx=k9;o>;3P& z3{(Er%lEy>uU>vt&p)2Uf%>;zzH8}Uz5J^3et03I{qE(5=KIz3uhsg8Apqcq+4=sh ncK>Sr*L?f4`4PiU=Kr02bu@7?IQcgHBnEV1%x>^z(&$&C{yw&R#&H!sLz^WdpC50RjN2ue2gIoow+-^bLT$1 z3q>l`z1G~lbMDM{&YW}R+?ly^{pzpl-hSwrDesVWtfC6~G*?ZP3f;ih3ka&||HTA!)D-r{H)ijY={=tDqsA>u9o&Zmc+u6X&Ny*%!rmC5@#$#(hS| z&dAbbKp@n;F0@*dyHlJS#CbH%2AvRf&I-yxnoj3XK1z49yNBq{RmM6>QV(_0E=uG4 zzf@w6VbC&qk(E@CbYEW1q>!}LrG&jL;bBXhgN}4Ab_~{7-LM)PKWvE=!l;)(EXIgoFhVqOm=XrO&~973Ed)8dB%L0fMiKC!hS?3XT4&8# zu%mhT_9mIx-aI`#6VZK|?fwndq&jc37iyp?x~N#FZGjyJd4`$`Q5*D98d|>SQfi?X z(GC`+#gvlev)h{lVSDrPJfUHJ1LWS(w0Z})v{5tuP*tthVycx|Ok0fvwbG?_W>yRA z{Df!<%S58cbP1k$3l*laeu1y7ELaz7zD?uND~SG?GsUlUI%P7QLyMsQL*DYIa|d@T z??q=+xAOnbbc0N8o^^yQeMXQIdU!rh4?igM@1+0GgZ={#`jZ~?;~w;Wa{_0ALqFg4pj+}d%Ukj|>5q8eJmW!s-h=*g5Bk?V=s)qGmn+YY1Wt`Z zKbD_hG-UbJy>)f>*W6!2;bWqFU4h=!b#lhZ85BMt=n=#a;fMc50dYwGZ1$kHdeB>3 z>EnRu*55}+dcprKf3A}B*J(2@XJCvaOrY?B-=v;Qm!s1r8`>Rp6dc zRt4^QWmVw5R8|EJeq~kQHG{G$w3b_8ez_Ie%B_GwWO;2^ZmYokPYnXb;piGIYG90s zcp>|=ULh3ChZF9Y)_B-7ogf%k4-b+{_=Cht0g1)!AbJp71dyW99nEoyzM#OBqZ{IJ zPE}YajP~=5m;UyvoOk=1Y#>8a=?;>6*$-r?9VF!;N$&k2MUs0ob1nu*l`tnuok-l| zAxZX|1(2$I6OSA@k~8xMo1FnvTD3yMEpd!+p&gFuLS2Hgs!$1cp)Opw;1DX2NH~O2 zX;LVrKFu>LhXNVeS{1nXZ42r}+E@due*3JBKXQOw&(A&_XnA713|`JC;lLfrGRks@zV)UeoN4aKgi@#K(g)rAecWYk|5)*6Mv97 zrGW4Y3O}9j#I^*T_=8}2K0HXa-51ti<< z4>HpRf{eRPRFI>gu~du47sVDdn)A&y9UZCO%)-Iw1{!45kFunOq!3h zhC1+D4>zoCYVi~&L-w$)8NC#nu7>7kyO`( zbH8(EGgfC4ce0t|!^WDW2sV|euJ=E9%bm?Qoy{UAn-(87JdepMtt!=Z{`Y@zXER=B zv)IX|&4&$77xHYPsjjns{kuDx2|Al4PBt+gHrB9KD6Klx^~s;!bZ1k~Y!>-@+6=H+ zRD!2v+h53@cV{zEXXD~&vwYa7o|bKY_k|PgY$oY!Ts&>A4;$6fvhBY;|Drpa$vPVs zPn+k%M)kC8d-l{>cQ#XWHZGoau@4*7)3WU+-g?iS%~YL@i>JkX*r=YCZU6Yp^X_b> zF`Fg+o@Rp0k`g>E7QgSgH!Lb;jp}K!_~0w&+}X_3*|>Px zA|E!Yr^VtQo&1Iyn~``ujd54uxw%GrZeEnD@_bejZMx-W?x3@ZpztUibfyNXdJ$fI zY~J7wI=cu8|ItAkHBi-Oh>rjCdUw#qA}G8?2W{3sRSzK===`ob=$s-bd_xDFuYs!m zK(zjem)t?;7D3?&I_N?TRJA_Qzm8qu1{$R%JY8vRvU4zciQV$X)LaFv#d2e3;Rzh! zMG>qJ^5}@Kc^NSuNoukG0BiW`l06;$J;_XWU+-Y_7CT_5K37B5mZ1*n$Ht)~hF;y& zhanixWcb<3puC(V8SwIwU{FbTLxh{Wo?)P{nv2?U)wbNx&=~Bhz}tv*dr}>H&2_1s zp26r5O|FT#5s+&S27?{67em8!xaz^5Ditq9i9w+nK&4c?{#03z%F0Qptelj}${od` z?Yx1C++ftSixQ~-lXXzZKKv!Yiipd$4pgFSlUBA8yAX?IO}YV3f*T8U<`0BXXO31U zoQtAPKlW>-#n`ul4i##&4OF2#f@D@8JQ^B}NrnQB*r$Wnxqw^o&jUP{_})n#!!vkX1xF0j!N+!`8$1o$Eq9I;Rf2o>X8z&RdT7OPU#ax^r87V|RJ)xG=s;R^kBlZD2kJy%1s z`{9dh^kVnVVclOEjb9FY&hB4}0TSX;dw1{NRA;{buh)c`gt*i$480dd@3cjLih?&5 zVv&qKyu@7z#f_w;ke@cD`ZMVy!iyG|jcEwX8v)EMO~a=+uvjx15l`{5M7DPbZ&dOQ z50@GsbjP?|R)fHDBRoAV0t7?lwQu0EI9aX6zC=K5?8~GEqr2=Fp%LgWjFEv*yw)fq zFE3Qfkj9J5#XxK2WJt4eGNf6#qoEqQgxBac^=E{|zoIdn0O(CZ2Tq%-A)h0p&E`xf zN@C@tBvwvJV&ynsUOEtg?q=IXh(utB&1iGFF!5)Vv8yg;SKZp3>4CNu+G|yefqAyV zJ2EgF>&xlw21Be65Pcc;0B!A|VzPi>g%m6Yr-yQs0NXG8#tuVuVmD63#xCiaVQ4Y8}5)hG$@xbSkruDOLPNQ&M6~ zxs#)-q{JVHPND}Cxh&VVHjF%sZP^bi_qg62aCHNJr;R8Q(V1l-`j{rcQfw`;n%>dZ zvtuD_%)hZ51_V+1CEkrgr_>RxN5Vz^sL%g~=ook%Q|_$&qd~_(JFd`JwmU$(L!mJM zZ-B72kk{I_kk^F9^KEf(-Rtqb<{_j}kd*sj2wEN^uIq&CwUG4V$3FPYhFzNzI|RK& z(C3NsR|RE2ta)0PjqrY=R^t}jIUQzR6Og_pO5ZBZb5OcV9PcK0cdEQ@DxkcVXjZNt z;{B^YWDOh8G;1L)j@J_MH$y`hY7A*QK()fLf}cX-xE4~NV2HO@ST?n+6bk1 z3~r2}PlO)DzAD_Yat8Fn1>S(a2uGq-SeyGXucy;$KIMNO(r%>~%)k@mip%_LXh|wLO@{Y=dF;mxly;N97|MS}`$-*OlNeh;WLFo7uz-gZ ztTL9R2^RR=R(KGDsK5$eq!kpi{{LToipON7c!=5C${zdnZ4X}faMPaJ$M1>Kyt&W4 z$zkYeBz}Lz_g5ov7`_ULL(5L2u}BAz_>prr5?1#xeA?|bAkaXdfj|R+1_BKP z8VEEHXduu)pn*UGfd&E%WB%Xz;gdZ7=MPwUUn0-{HzV=K zVAmn>KI>bN_ye~)ka%u?FA~rDdH&DK&5t1QdcYG%ygqOSNnH<^jr`UD49Hv4spNtF^dWwek7Kmp#aGOe5B+yKgy6EgUSTcE7cXPQ%Q%fh zK^BA;B^YE|UwZEWUdKNm>IZ8H&`EFv8VEEHXduu)pn*UGfd&E%1R4l55NIIKz?WJB zJdfw8FVAuDTAp|JVZ)I%XXbbMJlE$iGPFR zzpH_3u)UFYY?|1z&fkdgHuZ9jdWwjRWGof?ks0MP(J67JE36;;ne8Yk+rT}I&k=SB zIrd6&$-npyum%1sL#Y+V@&2#LE--&Gm#6N literal 0 HcmV?d00001 diff --git a/pandas/io/tests/data/blank_with_header.xlsx b/pandas/io/tests/data/blank_with_header.xlsx new file mode 100755 index 0000000000000000000000000000000000000000..7b30edb13be2c90d7235fa7995455d39ab3004e1 GIT binary patch literal 8773 zcmeHMWmweP_8z3WL57wTq>+|Jx zVgSYsLm3xmu$42|RL95F%EOrRk&`1`A<7NbCx9D>_y1}C!z(ZVF;?#6CW+idy^`rG z=YPi>-P>Y(>h>IY9Y1<-6PV6S&yq9Oal(Ht33J9l_Z=TugPsJW@b9{|*{d{!2hY9> zJ@QGkGE*+8%O%}*I)g0M;1ty<0c{G*93%y3j7@M#9Pl$W8@od%dCo-XtI7(Jdvnp1 z`O>fx;tzq=uiv_J+R&l3FC?($CYV{!7~%>5#U+`@nXMDq&)VoQ&62E6a$ZlpFdOqL zQl^N}xl?|&mf}J!R^P+^+N^NAAouG#v$+##L*lwn$6GS~>A2?6X@s<#1BJ~vNUz``d^*x8KsAq8LR<3eP80rV35djQBcB3hY)T}aY_X>t0Y0qvBdGl9>_ZE51rE2H%0^bfgIf90-XG`e|uEblA=)qfjD*6-jL|!q~EI zzL|v{L$gzyzlC#gA$e>A-09LtJKLuHM5Ilwe|nS!5noqV$N-JMfV5tho8buYiwXjR z*a(oCx?4GVaB+Tr{sYecVMP8i_4t%wwN7rFH@gaLG9S-BIV)fk6t}7WG_AC@p&*jcmFp``+GsdcunN>%fL{HE<6mFMf>KLOpcu_NgL zKBh8%Fu&kv`UnQe0ZpGjtBH#Pwm>czK>XC|B-s1g?#0R6n*HXsqp1|ntlmRb??rad zXmE&4!tF88{m@Ir&&0mgH1eyuJmOjl0A;AfOXd-azV%v_vBrKrJohYH`mJIZ=>c?{ zu$?+r`}hLp8YDzUL%jRH-P%%z?SBMK zSbNxLfLn^Q6-1;xUaHdY1i-AhJklkQe$3(`kt)+7*~eKx9>#bPATd&Mxm2>-MUG~l ztDD4h8>6oHg~1%_sZhJPSQy<@!&q+ZGc(Ch8MmOupiVW93)!bfjD@jGsFCk>VX!f? z%G+)%BxsrWMrH;w9V{yC3do~I>58B869nbQjRIz{%QQ?l@S$srj zAR_43*ooJYLST%bjbR`Ct@kH>MVaT>I?}-#sPvk%wK0MTx;>8_GwM2QR!R#n@StAP zq7=jNe5O=DIdQm%9aycWD4J-bm{7~bR*sY@;UNE5X=qO-Oa5vZ^``-#IOwwr)*0 zy7ms`$TwM#lw0B#V-7?4I3Fg`GWEw8E_37yPjJ{xH<R_M!uK?<5;)h zJ#1>zC)Kng`}rFmZzNm>7Hr21<0#*qQ2_a#hMM=O`Mx ztn`Q4g_2{60hF^V!APAg<{3z>pSc~`M{cx&^O+5!%8}1Vj}??^ZoX9>L!+FUjFP<- zQ71IPJKQt)F+~r7|T=3RaKBz$2B}1*pRA?9}V{c~kt-Lsl7>;As6hWO0i?enxyq z)Xj7(=2?@;i?;r$Nvs7Ga-MpSlezKY#hB-2=stLGj|9$YW3V-C_`u8k4DHnfZ^iVM zr(ZoZ56Ns1UA6plvR0Q3@k}xab9*Pfms=w zI2B%wwcg~OLhR$dQB*Ao3K#=Q;j~0+nfoGnx2e*Ssg|QjaRZ58RH^jBjZWo^)YZ}&XcWpjCzr~G9zjfd*K_mFFJV>M=tT6Bc{kZJ1HPJA5S^l0#p;(x@V!b!BrmYBGBfe=i^5uLhFP!UyBx*-Por;!uQ2d~ z_}qfwmm)e+`8;W^E9PvWfgxMMPELHQWb6E!u^AvVn;srzX=u7(ZlON2Vbg7eW}LNh zI+_hVwpPuZi_%O;2c2%L`GthG+%AhNYPI+j&EAsNLo*m(@n^RJX*^w4%;$^bBSM+8 zNTW9%v~lA~p?XiObWi@VVvJ1qY80`8)oO zykT0-)B?Dw)>lSdZTB8-mwL79=Y4LMlRNAx94xibM;gE~lwwyspC(y?8kUzoR0dYw zurj;xDMdq$xiYS%@xiZo+zsj5cfN_2R3EGjSgn6tmII2`S6MiSHjGVtc$az` zOD1)j&VtI~C4^MMdVe&)^$^crAd`nll^b5d7E%yJlPMPbv5{(Uti0p5Nh}Z^yK08e z6kvp=xcO64c-UH5fjzi>biCg)S$qooyB3SwMQE}8cDLE`IbaGz>h_j7_nU^+71kI# zN7OmvxyJmIB;Ji;K}^N`piB1_@vQXA&;y{;n{~|riiZ)!4W4g0IvQ%phy0ToARGq0 zrxaqQ>nGlug!2o6fDl6XS-c=UIEKLFt{E(`3RyV~D>per>jCznaMCKbr9pM{3X3_c z!JmHr@+_c8RAMx~1=R*NN86GmJx*9SQjWKd)Sso>7J%?koTwRq`DZaX6F~SN@92 zx_ugT`6btOe|%5Ci>6QWcqTzYdeThzPN!Wflp8IW;29>n-3JQ&2WV}#b}@)K(lg`* zc@+x1Y9hTi?Vav96aZWDggqFTAG6}k9Muko-{D5#*Pa^j45_#&j}O(2Hdpek&$vpt zLahDYrNw`-D}4X#%K!JSxF@}~XhrN7H{t~MAF%R6^&zn0JoggrL_&0a26iZutY9guWWD_dZpJlo3{NjC|N%D!f%a79Ghqp({<(opI8;^;9e zb$KQ>XTd|TKjq|pF1ex@Ye6~uGqzUJV@5j~-k`xNtO zeJvnh&?W9m#fQS@YnK;OuJOa5bjx#MoM$N1xiPhNmmJH^_Wn=Qg$Y;Qa}yRTodHZ= zWo9orZG;9{<6+IiX@{Yz__4N*lNA;sC%HB> zi_I~805;^5pz$AX_t4^k3X#=}=-g$6X`CtGV^6>J@dk7+KRVB^@->=Ir5hMOPwTuiXKok&Uzl{O_;Qnh) zU>`@TACrMWRO9Bj2_knRzM&ViGW4i6C|0#6S>j^PX(?c}vo&l-PevtwoUq#r+U@m% zhTp5JB6r{i-t)ELcfXiX3Q-$)SfLwQl1jkv)ZN%sm+ACe9GmQ>zPzqfgx(#vjrG38 z*}jb9ynfYuGEWqsz%ev&Xp&vpG0-w)Hl`vgG);?Aky=Gn&e7>6N7*7HYRv1s5QToy z9ZdGDD|}h4gV_nG3M*A=30L@hvxPb1!K6oVT>(0h*4$?N8o8ICPgsVS;OwU{Re(HL z{|mHTx0zA-28^Gx-kofR#0&;r!}LQZBz-pqTI`H#MFl-qPr z#?5}o!|$Se=Uv#dlH9S25_D84CAp4+0k&;&HJV3mJq*#WnfesLRy*k1I+$)w;{5HsSe>_6?T&ZSo4qPMy;m0eGBq->tuMIXf~h8f z(?xWY6_+U})hjM+m%puBfbKUWl_7Z39>JRgh{~6xi-m@}i>n8hxvT4U2qB8Z|J1t> zP=uyRIjnFKMC@Xp(}wTVLt%H6lXTN9GiVwdU#a=&t-9T@SC-Dsd=zxqFJK3C^4oOc z-`zijQS7&B&n4mck4#yDlVOlx&4!7!q|t!=tu;|5q~hCF7PyvmQIfk|(_H@R<;aP^ z8!?2;mUZ{R4sSwB3)1LgYGRbE0@!(IE<@gxj$t+EQ5$mirrVt*z%k(vjfJLZNfD+B z(F;dgyUADNG;s@ZX7C`PX9!!)o zU1pkn%mAAVv<`G(u~{{HT9jSa`4H(X#gU+uT8`U3=}2hMc&{-BJ8lqe!zTAs0`^9C z>Je7~+N)Rk{3?UhP`~y1m!vO73E~`92)ILJmw?qHGm@v;pJ6@T(GNV9lg*cNUB865 z@T~hWQ5UcFjLC|0OrB~a?@AOVIOX!Uv)a?0GphHJ8Cg>6Ma{&TIg-;@3Z3QE)@EHz ztubvIn>P|IiIr?I4?AoY#24M$LJTyK;maUF>YmNuT33(#UdO=*!0s-!pHrC1CSy zxp{qMT&Ddksp1%9Hpn3&+yz0bguh0(g^Rn@KOv4-vwt;6il%xe9ZAHZ>IFXTa06s; zFrv=5(3-2I%aRYci1FU3OB(Y-%cY-P>ty!VWd^8+#85MR4y_KwXSj&|CRH+{9HFfV z=JQ5jUt_Liidd+PK~>FIQW1j?KUYO1uQO}qowk|&;3Fa<5`|e!XZN~;1&w&>#SjoD zCLVQ{suk(Ml5OJ*_qV==V0k-{ZY(k40lxPDi*e#n%-fF=`Ar)fkwPt4r9zJJQo}#c zR-(UnpO0M5>&mv7$P5|nBZg%HFmKo*TrI4xZYRe(6&VE6nuFL(gKs(IC29*<(Xx6SGb@0~`!AT)^Dq_*Bx9xZT)m$PWbPC|J#Z9zxQ%N^jj}KPH@+~Tt8#{ z97_U1f&Q!F|La(OoH(v~xh`CPdI==^;pJ~K_PU4bGV!MeEkrdOF%FM^i^l7we@*v4 u4FP~|3IO1G^1p8VS1SG0x`gT%>pzKALm3qjX#fDu_YWdi8NL%V;Qs(&aoDl| literal 0 HcmV?d00001 diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index 768aa40696cbc..e898d699ff2fd 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -1,6 +1,6 @@ # pylint: disable=E1101 -from pandas.compat import u, range, map, openpyxl_compat +from pandas.compat import u, range, map, openpyxl_compat, BytesIO, iteritems from datetime import datetime, date, time import sys import os @@ -455,7 +455,7 @@ def test_reading_multiple_specific_sheets(self): def test_creating_and_reading_multiple_sheets(self): # Test reading multiple sheets, from a runtime created excel file # with multiple sheets. - # See PR #9450 + # See PR #9450 _skip_if_no_xlrd() _skip_if_no_xlwt() @@ -471,7 +471,7 @@ def tdf(sheetname): with ensure_clean('.xlsx') as pth: with ExcelWriter(pth) as ew: - for sheetname, df in dfs.iteritems(): + for sheetname, df in iteritems(dfs): df.to_excel(ew,sheetname) dfs_returned = pd.read_excel(pth,sheetname=sheets) for s in sheets: @@ -520,6 +520,29 @@ def test_reader_seconds(self): actual = read_excel(epoch_1904, 'Sheet1') tm.assert_frame_equal(actual, expected) + # GH6403 + def test_read_excel_blank(self): + _skip_if_no_xlrd() + + blank = os.path.join(self.dirpath, 'blank.xls') + actual = read_excel(blank, 'Sheet1') + tm.assert_frame_equal(actual, DataFrame()) + + blank = os.path.join(self.dirpath, 'blank.xlsx') + actual = read_excel(blank, 'Sheet1') + tm.assert_frame_equal(actual, DataFrame()) + + def test_read_excel_blank_with_header(self): + _skip_if_no_xlrd() + + expected = DataFrame(columns=['col_1', 'col_2']) + blank = os.path.join(self.dirpath, 'blank_with_header.xls') + actual = read_excel(blank, 'Sheet1') + tm.assert_frame_equal(actual, expected) + + blank = os.path.join(self.dirpath, 'blank_with_header.xlsx') + actual = read_excel(blank, 'Sheet1') + tm.assert_frame_equal(actual, expected) class ExcelWriterBase(SharedItems): # Base class for test cases to run with different Excel writers. @@ -1218,6 +1241,30 @@ def test_datetimes(self): tm.assert_series_equal(write_frame['A'], read_frame['A']) + # GH7074 + def test_bytes_io(self): + bio = BytesIO() + df = DataFrame(np.random.randn(10, 2)) + writer = ExcelWriter(bio) + df.to_excel(writer) + writer.save() + bio.seek(0) + reread_df = pd.read_excel(bio) + tm.assert_frame_equal(df, reread_df) + + # GH8188 + def test_write_lists_dict(self): + df = pd.DataFrame({'mixed': ['a', ['b', 'c'], {'d': 'e', 'f': 2}], + 'numeric': [1, 2, 3.0], + 'str': ['apple', 'banana', 'cherry']}) + expected = df.copy() + expected.mixed = expected.mixed.apply(str) + expected.numeric = expected.numeric.astype('int64') + with ensure_clean(self.ext) as path: + df.to_excel(path, 'Sheet1') + read = read_excel(path, 'Sheet1', header=0) + tm.assert_frame_equal(read, expected) + def raise_wrapper(major_ver): def versioned_raise_wrapper(orig_method): @functools.wraps(orig_method) @@ -1512,6 +1559,7 @@ class XlsxWriterTests_NoMerge(ExcelWriterBase, tm.TestCase): class ExcelWriterEngineTests(tm.TestCase): + def test_ExcelWriter_dispatch(self): with tm.assertRaisesRegexp(ValueError, 'No engine'): ExcelWriter('nothing') diff --git a/vb_suite/packers.py b/vb_suite/packers.py index 6c7005cb03c4f..62e0e8fc33b58 100644 --- a/vb_suite/packers.py +++ b/vb_suite/packers.py @@ -7,6 +7,7 @@ import os import pandas as pd from pandas.core import common as com +from pandas.compat import BytesIO from random import randrange f = '__test__.msg' @@ -206,3 +207,46 @@ def remove(f): packers_read_stata_with_validation = Benchmark("pd.read_stata(f)", setup, start_date=start_date) packers_write_stata_with_validation = Benchmark("df.to_stata(f, {'index': 'tc'})", setup, cleanup="remove(f)", start_date=start_date) + +#---------------------------------------------------------------------- +# Excel - alternative writers +setup = common_setup + """ +bio = BytesIO() +""" + +excel_writer_bench = """ +bio.seek(0) +writer = pd.io.excel.ExcelWriter(bio, engine='{engine}') +df[:2000].to_excel(writer) +writer.save() +""" + +benchmark_xlsxwriter = excel_writer_bench.format(engine='xlsxwriter') + +packers_write_excel_xlsxwriter = Benchmark(benchmark_xlsxwriter, setup) + +benchmark_openpyxl = excel_writer_bench.format(engine='openpyxl') + +packers_write_excel_openpyxl = Benchmark(benchmark_openpyxl, setup) + +benchmark_xlwt = excel_writer_bench.format(engine='xlwt') + +packers_write_excel_xlwt = Benchmark(benchmark_xlwt, setup) + + +#---------------------------------------------------------------------- +# Excel - reader + +setup = common_setup + """ +bio = BytesIO() +writer = pd.io.excel.ExcelWriter(bio, engine='xlsxwriter') +df[:2000].to_excel(writer) +writer.save() +""" + +benchmark_read_excel=""" +bio.seek(0) +pd.read_excel(bio) +""" + +packers_read_excel = Benchmark(benchmark_read_excel, setup)