From e8f54811c7128d6ee849d072f27459e9b9322034 Mon Sep 17 00:00:00 2001 From: dmosberger Date: Fri, 4 Dec 2020 01:10:02 -0700 Subject: [PATCH] Expose 'read_only' parameter for 'import_set' and 'import_book' (#483) --- HISTORY.md | 1 + docs/formats.rst | 9 +++++++++ src/tablib/formats/_xlsx.py | 8 ++++---- tests/files/bad_dimensions.xlsx | Bin 0 -> 9220 bytes tests/test_tablib.py | 7 +++++++ 5 files changed, 21 insertions(+), 4 deletions(-) create mode 100644 tests/files/bad_dimensions.xlsx diff --git a/HISTORY.md b/HISTORY.md index 00b849b4..e2607845 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -9,6 +9,7 @@ ### Improvements - Added Python 3.9 support +- Added read_only option to xlsx file reader (#482). ### Bugfixes diff --git a/docs/formats.rst b/docs/formats.rst index 0c46733c..2357efe9 100644 --- a/docs/formats.rst +++ b/docs/formats.rst @@ -206,6 +206,15 @@ Import/export data in Excel 07+ Spreadsheet representation. This format is optional, install Tablib with ``pip install "tablib[xlsx]"`` to make the format available. +The ``import_set()`` and ``import_book()`` methods accept keyword +argument ``read_only``. If its value is ``True`` (the default), the +XLSX data source is read lazily. Lazy reading generally reduces time +and memory consumption, especially for large spreadsheets. However, +it relies on the XLSX data source declaring correct dimensions. Some +programs generate XLSX files with incorrect dimensions. Such files +may need to be loaded with this optimization turned off by passing +``read_only=False``. + .. note:: When reading an ``xlsx`` file containing formulas in its cells, Tablib will diff --git a/src/tablib/formats/_xlsx.py b/src/tablib/formats/_xlsx.py index e2a3fde7..34911e9b 100644 --- a/src/tablib/formats/_xlsx.py +++ b/src/tablib/formats/_xlsx.py @@ -59,12 +59,12 @@ def export_book(cls, databook, freeze_panes=True): return stream.getvalue() @classmethod - def import_set(cls, dset, in_stream, headers=True): + def import_set(cls, dset, in_stream, headers=True, read_only=True): """Returns databook from XLS stream.""" dset.wipe() - xls_book = load_workbook(in_stream, read_only=True, data_only=True) + xls_book = load_workbook(in_stream, read_only=read_only, data_only=True) sheet = xls_book.active dset.title = sheet.title @@ -77,12 +77,12 @@ def import_set(cls, dset, in_stream, headers=True): dset.append(row_vals) @classmethod - def import_book(cls, dbook, in_stream, headers=True): + def import_book(cls, dbook, in_stream, headers=True, read_only=True): """Returns databook from XLS stream.""" dbook.wipe() - xls_book = load_workbook(in_stream, read_only=True, data_only=True) + xls_book = load_workbook(in_stream, read_only=read_only, data_only=True) for sheet in xls_book.worksheets: data = tablib.Dataset() diff --git a/tests/files/bad_dimensions.xlsx b/tests/files/bad_dimensions.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..8493760deb70d89847d3a3ee8631352065d8560d GIT binary patch literal 9220 zcmeHNcQo8vv{oX75D6keqDzo4LDXmw1kqcRQAe-QyJ#bXs8OOt52E+pA|Z?#Wx`^i51 zBPB2}qnQzT7kyible^)vV&5ZG^|#j*>a96l_dFE{Zjb1cPQQ4TQlnNLmeB0{pf=bc zMU-qYu;{y^Kn;OZ#P^11`Q>i&;g*40=Au|ia`*(M`To_@m|+XyU}2vB37AWm7H*i| zuye37va_?|aI>)vR~>iw3Ly1HYPxNcG}*HP5-#3%i%y(m$?7V%E-nqd~f`&dBe^zAv55@Pm%c zhnE_i^BjEIpA*sr^a?I3lkYV%)VnBMdQjeoI%HBySG<==0i2vrT;FFRd~fbIG+;e( zU@N}H9tORWEZrlK3abtXg)+T0%LZHpUJh={i!poaQ7B2gXD8?&w5q6B>sJ?VDEGRz z-k(hmTLly)0eFkdTkR3OwE)#^(Pe+SkwBO5W{yQm zJ28e^(d~2uI#2m$rC+W$u*q1~9cQz!a^a@HWrpM?(ZUp_-EueI=Ne$IG)4};^}~G5`f%$O7p3Yz8_CE;+z`r zR!-xS;Wx6cbASV+>CG zMifO)eb@j@%Y|CbQb!uj9aKlihqP^OwfE4Bk#Pm@%)|X<`a)L zR!XtCZqkpNaY5-<#w*w#b*Ye&^(e_KeFU$*&k}oR$$3{F2pbv8tB`PKeR1C)VuNnl z+4zt#*5y0oE%77qpx8G54iBt1-;Gq4b6d96%6Xh8?aCbecqiAg@ZV7bUArXnb6s?x z0X76#=jM~?J#>Nq5^6?cK_Z*dBj+=p&pfd(65nd82-SQ=uN)d*H)Qn0qI<&GY44n> zhw(@ku&U8hGp4Bg$Q`ocr{^Q(IaK)sf~8IY`!ErCyR=Py+Bk)a0H_jOeg(;-M^t2= zv0|bZoQYK#@^;$)frTXs3uUU6Snbacashe}BnGeC|431GM|&t!Z1^F*5ZMK})wcaN zcg`82iEhTMmsRDhfUrKtcqUgd5Jg*>a}H^l2`SX-q&1X^+!u5WUkN@?s2rN!d66h@ zq({o_VIyzEaxW%9riaNiEMaex-kWuCQamq_HuepaK2PAd&nWr(F3fhp^T5q_jn^jy z!H!(rlUpvfQJgwdPP0Bx9Y5@pshpLM*xv$d~F-7Ga#W;GkXl;i-h5GicLEpIie zdWqw>eXXr{p&@QJOs9xIJrEjIeyL{0B;>0X5`@L?a~Z3tUQ{qr6l<@~U*n-coV8%j zG8XqSg{ji2^>ZO_64!9aj5Kk40a~#&<)x7)!eci_rA4uSObA0&#u3aiilY48gkU^f zN*&EjOq?7!f9^Ox9`2pYkRN+bctkg08-P?2?IW4cm^c#jf(IcLXq)XL_9WM$)IIXi zKr`Q4A|ejmUhibM2Bf`bO@Usam+-bbcG!D|BT(gYM^|0s+dR$T5Yt2lrdKf$cU-N> z0XcK%7s}xv&|Gf}q<7jsiZx94zXKE+8||tKKqzx|Z^;ZLV?!uZ3w3ZSY}u9S9ovpej7Nz{^fNuv0G-4a zAPzU9xCc}sjHcN+x$w7b_U47>MHz04F@7!%NK@f;y)dkAdK2kEWOre_P|0WQkO{R_ zohVsYJPGr{IYCBoW6xs{Gj8VJZm#4%k#RIPbT9#_IyqR_n*CTQU-!3KVlX*BeURF= zG%0vDGt5XRc;Keoc^K|BX{(E+$y%~nQ$DuoYrNVppKTeO#NAurD`7##EJOt+xP+{8 zB7V8)mH+t~O60VhKv%qGG4x;{R3VF15DPcNM{>t zmM$SXN1DMGZS=r$o#72w=h3DDXW5#vr}2eYn?VgFkXJNP+CnybzBkd(RaNQ%c}Ze% zKE9Gow4oPqF55Pf==lpqH2MYBDvg!!N5<+4Iq@Zx3IK&5lcM6c4}6OVi_jnPMGoTZ zho)|RZ@@jG>NN zJ8~M^Ihg!7jj41iv;#;RP!v(f5ELE7Q+YP*l@Qs-Hh43pYc`gTX}VpKc9GAbS?>1Z zR;Q$Fu5}MV)7ct@%^58d({B?*;YSbab}~q;4o@^oq?W2VXpdasayl1ax(A76ZL=ii ztkZZ~h&3y~XhEP#^;Nro;3L>3J%h!baJf^yklpge7d(wv?*SZp8ZeozUv{!~sE&kw z_^TyQ7t}n@rEcvaf4kB{yus@)4yQQk7gL3QOaS|xNt(tzPi~2<+5#jp}6V6-udG*^+cA?Z*Q1hl8G1 zZXs0|WIEX$9Pkb>>^^qP-9-$M!Ns$BkrApHrwS!BSC6#h7w^i5mM5 zzVx7I4*V)#bv=y{77bvQ@whi=(62sHLW!8_9x7TQ7%zgX&ypn|q_6wEEG%K(UZy`p z(V~91hz%Ft9)xkv_-|)4=~)*U+S{LaC?yhtna!kuNN++$hsybD#)EY16|t!}h}OG} zRO;Hd*!Cr~@fQ&VRgrTO<0R!?_1xCgPgRrYSP($P5p{V&`$2XDiLg6$YBw?K6~f%W zO%e|W@FSPQSB+A%bQxeZkiVR;00a1Lw#?1eP&Jcr6SJ)W!XB=h@6uTA!n?Ym{t{om zI7=If+Y_o$afoe6Po!=%E6vaM`7V3&2lvccV^N2RO^4}yYRv=$RJDwvoq0K;g#!&L z$P<$V@LL8i;^)QIx7 z?ztz5ei#J_QZb};<&T3}PSs!=Ol>#8WM`BVT&gfB-gwr~Wuv&UJ^ zi|>u$lW@tb)6EtPN~5X0Y@IGEWQ?l*->m=_GfCqhJrAL zvfgJwJnJ)Nc$r&_*jhooZJt6i5HX=F61s&8h$u z@%e|V%!sN+NqMuxC*n&^>hytv#(qSaSl{xqc4XK&hA((8Rx{T$4O49{LVvO76nav?rQ@>P!F{`&#JHA%YL|ryF>rr)bkz5QEYa zJ(jcfeA?gYzvUvaZ%{MgteT)VkQ7n-Q?Q3)E;t`t5#ntt`W)S;imMjKi_>Wp%;m&K97xL6>~AyGrNb ztAeDXZ}NJZP&b&(Dwm>`UEK@mc?T$L-Hm6p=!m%wiE!I4D$?XM?clPDdy*^Nd_3ZN zI6GjglXlf^b=5~_eG<_`Uc9<5&-Vo)gdizsercP;CyB=>^R*Knk@Gyvs!2f$bQx4na|fR)tNa{uYvpP-!Azf z)80pDl-2K(MQEpq)_n3>1?r+-!*)GmMH?WQe2YUvUGmi}_3S=lK*NZl$Va5 zfa$b)n;Xj!2gzNAOI`L$x$yqJ_H=!)L^Db7cL5p?L7KX2BuBh79vBs&*&nrELc{xC z!uz}7{j}}rqQMfyV2R_KBu8fa+oJ|cGjRK*V0eEfynhznzfr*n&h@_c0PzxTp-)4L zVDgnAnIb>>I7N;ay(jGP86rIu3!@m+FP-#O&DmSbwCi4H6RxxL;JPnD!*MXp0pV+s zH{I7x*QpU9t?$0JV>@uK8IVMI7G=mIJjjm%*<yj*9=LmKrj&JV*qz$ zn127?zrd$fcsxXxWkEW~Ltt?|kcar;oAdPY0g*Z4|5g9QB$kI*_RV=_`GDvgahajF zSr$rzJhv^b|5yDJlh(+b|McG?5S`QdpZ-e(FsH-+^Xlz51c2(jHaVyM7Wn-vun1VE)%G$RMfQl^2$(YS%}!_=f8}U{|}P=j-k!p+hp0$}9?? z8{skv930&*Giu*EY?SHFL33`kf|-1dW4f!Muo+=w#ZC^jBJB7+@0#zHMq|-<10T9+ z7^VkxvJ;5ncMeY5G*=!)9NEDNd8XEPOeoj4awmNE%D(_Uz zLxjgpU0NQsmHtR^zPsnJ{^W6;XvLu?AEG=*f1sfW*xbN2+PCF1A7h>00F-oPF`Jc# zRmdw>?)AoXtU=m$4u{o;?w|TbnDpievD( zn|Ar-rOWFeUl3B-bSG!!CiXI~adn8|{?T{FZcjL*x>o5scG>(%1;c~pi)wREvGB!Ep+1Hy^&agUjO$x`i%%JCiR zJH|iB`gm1JQkP(|%2`A>82$zuhaBtlcEs810VlU3{w!ziO8naKEIWL1d*D>4FbSq# z?h^c^^BE@Xgg`zORSemT>3m8j|JwB|8FfOnoeBt()cSMU>Tij+Uk#q+a85X^Q_;o$ zVelUqtzYe&WkOE)j#F_V_&KIu`JZ1coTUf;Y{Biq4-0?cPkyy+OXpUUHlXS)B9@cy;;+2rC$-uG0VVSM~IS>Rs{oQ>0y9OS96U?S|#u0732 XD#_tt9FK)Xgn12Ovi3CeCr|$a?KAv7 literal 0 HcmV?d00001 diff --git a/tests/test_tablib.py b/tests/test_tablib.py index b13d17cd..ccb28e10 100755 --- a/tests/test_tablib.py +++ b/tests/test_tablib.py @@ -1040,6 +1040,13 @@ def test_xlsx_cell_values(self): data = tablib.Dataset().load(fh) self.assertEqual(data.headers[0], 'Hello World') + def test_xlsx_bad_dimensions(self): + """Test loading file with bad dimension. Must be done with + read_only=False.""" + xls_source = Path(__file__).parent / 'files' / 'bad_dimensions.xlsx' + with xls_source.open('rb') as fh: + data = tablib.Dataset().load(fh, read_only=False) + self.assertEqual(data.height, 3) class JSONTests(BaseTestCase): def test_json_format_detect(self):