From 458f79ad9f4da504c68d73b48e83ad53b9634027 Mon Sep 17 00:00:00 2001 From: Joel Natividad <1980690+jqnatividad@users.noreply.github.com> Date: Tue, 8 Oct 2024 06:18:48 -0400 Subject: [PATCH] `excel`: add `--header-row` option --- Cargo.lock | 2 +- Cargo.toml | 2 +- resources/test/excel-xlsx.xlsx | Bin 19493 -> 20511 bytes src/cmd/excel.rs | 134 ++++++++++++++++++++------------- tests/test_excel.rs | 26 ++++++- 5 files changed, 109 insertions(+), 55 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0df269c97..e546f0575 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1138,7 +1138,7 @@ checksum = "ade8366b8bd5ba243f0a58f036cc0ca8a2f069cff1a2351ef1cac6b083e16fc0" [[package]] name = "calamine" version = "0.25.0" -source = "git+https://github.com/tafia/calamine?rev=06a1093#06a1093898d63f5a14a7c35f4e7aa9f5ddf1d315" +source = "git+https://github.com/tafia/calamine?rev=8efe95d#8efe95d681a4d3d50b0063924bba9c77cedd6e2c" dependencies = [ "byteorder", "chrono", diff --git a/Cargo.toml b/Cargo.toml index 98b65b93d..93f37b2fc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -291,7 +291,7 @@ dynfmt = { git = "https://github.com/jqnatividad/dynfmt", branch = "2021-clippy_ grex = { git = "https://github.com/pemistahl/grex", rev = "ff8533d" } # use calamine 0.25.0 with unreleased fixes and select clippy lints -calamine = { git = "https://github.com/tafia/calamine", rev = "06a1093" } +calamine = { git = "https://github.com/tafia/calamine", rev = "8efe95d" } # use modernized version of local_encoding local-encoding = { git = "https://github.com/slonopotamus/local-encoding-rs", branch = "travis-madness" } diff --git a/resources/test/excel-xlsx.xlsx b/resources/test/excel-xlsx.xlsx index 317833dbbcd5fd62767dd29fd3c5d1a8159d2e73..62f3f9b09d4ecd310dcd3d2b5ae7b4cd2ec25060 100644 GIT binary patch delta 5642 zcmZ8lcQhPa)8Ey5udA$RtM>?^MD!YMtr|pGL=X`~$?8N6tG6I(qK2?U^xkV&B4qUh z8@=;-o^!tEdEamT`rR{g=bSrp@7&)^aR%^RK9JEs2N#bHKmZ^D004IY-+kLX8-V~o zV=W^$4x+<-L5wUCi+fHHS*2D{ztCJ5jZ|q^Y@l~O0mOB8KXlM~J16q$V!{nRk#=|k=E+!fXmD}cixy|b3OqMVPz%w2_3Zlxnl@}(VwGAFQ4YaxHq$4uTEiu9 zFz?AiaQX702(*xw9|5Xd$r6yz3sinv?l0Qek9KRC2^sx5#eVv8@Ep_&?i zS#^dLV-_Nnl#+zvXnQVQte--Oi8?^|-TC_yTQ|DuubZSuKEo)l?ojXrZaSFy46!-JsHPq4_l{71bb(oE_L#+{=%P9TOZO1fh+ZGfF=5 zY(#jZt$3MHt&%SpcK*O@XIj=jOdc?Ai@!`fd%@_*5~TUvSyi5Gx#>gW7mbhzvE+^* zYb6u#Ny7Ky7P%*G3moz1!HvVuW)DS?!3^|uXSh7L#E(_?UGL%n04+!eAp{|ech@>1 z{JW_u44U=qL{W_*10%RxA+Gnq_2wc$OVimX_tRo5P25gq6x@lOvgx#5#A_!rCT7no zf8~YotTXdLcxoc<)o-^ar_H8XM!eh`JZzhy^^;20*Jph_F|noi8;?MMl$i%(MOFkv z(C6x;M!5$(!gi3gLJpb|3^f9(qpW4*m@>$zk>=uG#!)eSk0?Y2Kdh8Lb}OUrrLs;F zkXi~tvk7CiG!?d>euo32+A|T2&tTLEHmk`QNGC9vjgdk53pH`XvUWbUhgp)I`v2+T zwW>JQs(E=>7TOaElTY0M#7h??V)}SHPxVN?^~uVR4=9H!N8YoyFUQ#r=Mi&0PGpu0aNEl`wDi+yPRLGs2;K#2VwSamJdqIB8 zx03JqOuKDX*FeTBUlb8dU&T$)U)aneG3A;>>3#S)ba|pJXT3c_p)QFZ-SA*bOpkVt5k8sBl;&HWb ztQV!BO^HbS^Y0rRy{}~X0PtmT>)02{z08 z1ErnN3BYDiEqy@y*}fSjXx_Ph@p&vo!KwXQrfHn;yGj4Nr=Qz@i z4%8}JOWs!}{~nR}+yP}sr*0|*YED@4RAKYd%f?S0DN7xhKp9suswlzik?z*CA^wpl z?V6aXr`Aa+qmr9*Cc~*WR$HYjWaOS3ZftT8PZ$cY>Dgj>^+qAzI!TJzIUXo^PlEQ7!J@p_DA#>wGy@dIt^*|dHr>rseUW+ zM~8M+nJL}?cDRAZK-OenTJ+@bMvf)-6=RwGW!?svJiVI)&l=b1y`K7%YDWV2MCKan z18-mmX?+A!RyHVW@}$<~h~Bq3Z8p_SeN&JOFN*EOoa9Ayg7HuM2)2TXe9J!DLC-#v}~_L_1m}!^pX2|NV9&!$1gqJbv42l z*{vYmJ^`#H_C}&5Z1aVG;oj=CYS%=1HhSbu!z|B)+qIbNjGrHELR9GjW$DyUrMgYa z3!+2!hxhPdv+5;fsRL_|{wf|M>FoVGs<$5i6JebvsUQ%c-z7$+cA|Nyva0ZWN`s`# zg^{C3WbVOPaFV26A+OIWGy7RU1%Nn7po??Rt7`4O#_X|Q-9_2pGPy9lI)sx1&iSBH z*J4wMS~M!^<5Oy?F#Q545bXrVo5kb(CkNp?H4Cb*42_qqqtRUXCMsrLAES)k9B zvLH*~mq{nYCxVxuWNU%avBg(AcN%cVY~S&A?i$bDj89h`Ed5yb6QcXUa%54kE-B1? zU&K4oR&rIfel49Tg2JT#fGk#=`A58hVM4!x1|-Q1#-NyS)GhWDtSbdZswJg`XXTx8UbORNCWV1XXqYXg5=`C`x8RU{sC~6`#)PZQY!b65I(A8nc$LUCj z_N+-Ex8?YN9BIeSjF1-bd*+rirQ`8lj0&)Pp?v`sT6z=m);;Xuhlj?`+&|7V1o#Y7 zRh)LrkLo~gE+|yB1lMSw7(2JU5i)isO2W#1ik$RsbW+ScDZ$hYX-?MT=}Kc03OtIQ z#xESn=?;nV6o=ao**ldBrwklabaA52EDzHq*VRa~E|PZIml4|bx$8jiV%FIxusSNe z{)r3glK4n*0pyNvBU6J*Q7}ZX0i-K)hk}kWqs_x{V=RlI(KGAINJDJ+3>%GLxq}P^ zACJZA(C6BOYPxShni3-Y9Y2#WvWij6>(!ht{5eMr#9S$8{*B-JrEY^Ll;-TwQa3mA ziyGt=!E1x9S%i@`FQed9d#_FRu87%9lT~Z#OXa0OR|`{@EY+!tJ^OmTItc;67NHL) zt2hYM%2DaZR-mcQ_n)l-n0)?C`()9nxKC$p&iOFnfEyRx^Ve49U$z+2m>}w%VK@`M z1V8;2l<~Fc>$PhKuG*A$`ZPbqp1le@eiy}lz1ANSP{v>;fa3&rRn zvG>q7~|(PgSRWeyTA8qs6FjVLDPj7lnP(A$V5Ty;2&$1wL>R*Id2y!2L~JbjBK zTr-GX_LMW75l*F8s#~M%)zN`$h9O4YL$FGhR8}GE{iC638wL$~-zwlAJmShBmzAJ& z@pV<)tS?Dh`8|kxpKG%M0_!c;?f%z}BQI{nrQ6fdGh-W(dpd_S(V5i3*tym=H3-Lg1( zZPij8%dA06c+y%G^a>j!TlFiVJl6mh#*!DWi$6C_BmRc|?0PK5>B%#mk0V&yQ<;n) zqSwk*cb*9}Y>KeXvUR9Q{`8@lRS--^T~L{!w?LD3`-*daqIRgQq-aHUV2TjNDJQE5 zn|3|%h<2^-cZ;%Hq}SW#cM_HhGd*n_zF7CtU*XqLhR7&W&#Vd7%NQetWiwcCK*-g4xG1SvfO1soI(Q{K$m=*tp8ewKN-&u`1D)^d+O>L(HvxWid^&6lWFK!Em670~i+1s2MLZLB75%Pgw5$?I=zWKYPg5oyQ(-wv2gRsWF+yri@->R~ zwaFG$8PBsu$GMT5doOcJ-yrhk?p(dEQ8meV`1BW_uC%2x)sq2BcGQP(^IRm8AfBfZ z%-%RpvtVK~-8Y=I9W>R1G11jcE2J0>bfEI`<(oHe1h5PY443cIae@$9{q54nTBHCBhz-9!H);Y6vi7FV}(kIH7kO|`0@1#f;1gYgtk({ z^6U%cT-q0C9(^W2c^pBzNqr~40Bz2L9yiz@9KNaR%eF~6F<)c-NGYN-xwOUN>dxA!%pkcGF131;1jmXfOQ6Gc3@bp&^&Xz9=B#GF`T)pIM)DWjd~lus z<%M3d{jvC%&h*xn#^(#^wYEmBExx}Eqgw)a9+0fWnxeQvYPh;`bV$_5S`98q>kTtj zlr;udd)`9i@(`cgJ$pvKt&jFjEml|W%Fp>f-x=6Fk6%23kF^D%4wm;uPkgVhF3%6Z z9&LX9Q?rI&$q77@yv6k-QQVTOe!V6LjL zpb@MLW039QuF7MZ>*A0#BHCOo>)ZqKb+FXV5)YJ%Azr>4k7AuZzRKaDAu!@6W;Q)O z28T$)tDC*L&`Y^jJ?OTap!IDPIQGlF4jl1vty$2c$!p5|axbFp#Ol=|8k=*~fnLhF zLZ{AJz3M@aS-t8)_loG zLtMiRNT#~GHpSDiki<$k`bVC;kN<9nP%k48u1u)KM>#erLmn7s=pS#3dqZ4*gZno- zo4dzEOkB2g$ZvQ{dY$Ud9&iB>I$dIP5li?tDgy!8MaJa?I}aSAWvDCoS|e87NgG_W zFwIv1Q$8QrViE^rPljaEiw>-z1eRi^Gjb|YoO}Rb*YZO-mFgZ&T?1RT+2r)kkz_j^ zg|A+}PK$|rGv8iU&LXBCLN2dm$(u#?Eaxdxuc=!;MtYxOrw95>VE1wYQSrR12V#Sp zK-|CgDi=7Dt7c-We_)&v(5iCzg8kA|J*(s@)W{a-R*O zk2WI1Z2mC63tDK1pEn10ZIqq{ot;j?$WwQHcmQkVbh=E-RnliFYD)bRLbV}{MM7m zMDq{Hdq&TsrcuelpJ-ekrpilC(?6{7An!FUIc}MI@3g@s(bnBwCvy%ASDusj5q1w@ zYmP@?(YE=kndcboyMZlx=wS4Q>f8yI(6ln2 zvOU5SeNj%@^b@r8w25Ghwxk7|J*o9ucnj(tihodxAI6Nw<1Q%9di8D4t|6`)IgE4a zNXJQGmw9Q{Y54wR^ifxC`;QbQUnwGl5D%W)+Ln|s`0ItNtYS39FGz<~`)*$IQ-6kV z5lhU84MnxpRHy=pn8~~NukW*^ebb-xsJOqAo+-gej8F~gKN>fHJxQ$YdJCneF_!nt z!~OYn@mMLO&F1B>PEmn$Srkt zmVbs-Bme;A?Z$tF9?7Y}$?|Vg3IKrqRo)_Bq>Y9K&ILL04><@ar9p+9)BxdR(A+NR zk+_p$fR2LPb{ qZ_j{mRb;CU7wbPS?v~9_{f_~e2J%V=M8N=H0LTXL5^t?n5*$|2Umn*OwT>k9J@z&dR1dXU#3@66#=h>J;_ z#r!kD#M>?+hvAF9UT(y)7y7Qr94Y;wc8_rWm#G5>JXh{!hCXQJx|Jx%q)a!ZR`ZS- z*Tl(p2GzI%s*Emkc&=y0dZhyXoJeW44pJ@5J)QyVdSdeX0r%*yW>ocjZnjxqQsuOG zAwVqS%I_vrEC+WLIU0)+A1xmTId~!=R9&1D?}~r+t}}>&vP^zgU>{Q|ahX@}EGj-A zn?~~dH%7TAR7)l#dGoEp{p114l(3bB&qW+!xe2&Jow-x|P9b*{xEmeJ&#lD}yD}Z+ zmyclqqowt7i;B*XczEBFGeO?u}xiupZJz)pEiugbY6q@FSQi; zY?yi)w)2Fmx_Z^|aUM|m=GLjLoa54%*$96?A|Oa1g!qSx8~}*KNrA;MwUB!)(SXLx zIb`PD`8M-35Mr0jZz@L-`Q|*LPgq#K8ZrArA0l%&)N}RfD6PtQy#&qHqolmud*v(E z*3<5pKU0tlLTBIQ6%(L6nCV15Pi|Fhxx0zK{1 zj?{P^`&!#@qXqEzCut?q4rRwz(F?T|FVepLq!C9}P&-H(Nw1_B=cVpnZTdR8KOF6z z<=jI()4R$TrrI(y@Y}%sk^{C9Yt85nW{59Ts=wt1>87GK*4G#2*)xfzS3;k?T(#ds zkLY}!i;eWnDIU9CjFCxXdn|m=-`Xw!o947k3EY1Zzck*F&)ylQ@jB(sH(yaLrGWjm z$Y`67txV#zX|X$&=5=3QsYL3$7Djq#V-pmn4-O&`5Ai!{$OWU4t^>C~ip*vaa|QH`3bc-(1E zdl=2TPXFP=>3d#H3y;?^-C(f%?cw1*i?uf0#v@&dns^dV+2{d6YQy#>i6F5u>a8b3 zdgFO5d~}PbF?dIFVk?Sp^8`MtOYbF}@=?&ZBs|b$RfvS#m+nQEchCbjDV(`EL04bX z^%2xv6VYBx7&JH0R-OJ9YRq3(!++}`Mf%o_7juQilV}+Debj{H{1C-H#gO0G{7f3;L`qVk=E2mgzdD6(&B4= z<3|tQc^kipc)+MNwm@Vk^Y$biYHdtVl13%+s-1zxm`#$mRkeRfda2{4`)LqNb9cmY z^q0n{c&NmT*R|*19p7XZamT@Z^JSL7*_dx2V0H$pc#2riJyYd(l?pb^C6f1%gNL-Z zs3^Zzu9T?NF}b0cU^ni-AcGUY%dL$zJwIi|I9j_hm5IfQjEpGt*I zI3gBGEsRRqj+*rP(y(XBo|3 z$I#-br_8gRem2%`+KxWT&DQvEaD0z5n_V0-#4uO4`nH}A?y#52d`lztlhWc>WTq#- z3whTquWxFqOer_fmo0^!IKd!)a*2yAhQ%Q)v@KVfR2yLW1@J48KpdKem3jq0KC6B~ z@kx|~Cp3~|7@rtfM*XAAV}|!9F8q*)%UJyvCN}~nCm5i)pGzL5Zm!34)S1p}m%k_R zD`wr58r5iT=$j{@=0mrinbqFuzNY2GYf|#lJ_`~y%v$3)HgG6^_qfyr(Kp&v5eXC;@Lp5&C$s9xXN zi(QZKIUuwX2F|B#^(|?_EtYHbjZwl)CGD@dc3@*7R^5>40vmh7h+-JeI-rn@b|QqOgBgEKQGc!6T0NM+GB8_fj7ke6q6`+->K! zzLGTDEuZC;pEa|f=qmX;vd`QI3+i$=J$Z_bs^JKB+lp>3m}VWz>A*HD_SFyD;`aGy zwSMbN$owDKLg>d>sg-|ey2@UVEr=fXiun?56i$K3PrvC~Cd(MNXg7E6^t%9RG~sPw zgmiyCJbzDSiy)`_YMI7UO{w;YB$6k-A@*2~nuuc>U3-8?vmu`*82XHuw*Ga+y7U z)rYVp=p2-gXd-dxF(`vs8vk+bdY@!mJ41ggjlW-LMd*`JnC`iWvFYcUMl+~DZo%F5 z1})dC#NN2Tt#eU0%$YtIuhki1bKIPkz8;6kZ0{i!#gZJH&bIw$ZKqVu>@oXsvD9UZC(y^)AHpNIw>RZ|^^5z)ki~)~amv3i_Iu&)6(GK#OQ>eBzfk|mC z?&_w5iRX4V64>>btaW=DB~I&4w)v;MJAbgY9ewWKSYZi`s_u0*5FcnIBa{kf0C5s# zjD+_R#Wav#=2R~iF0NA^hgRgq*e)is8Zt(5obqj~>QnvrF?2sot$AL^L@!#j7!{te zklS7`Rrure+rBoESQu7#RUMHkxSE}4Ja#JUL`5C-rAw<~SKjvzq{6U$!p%J#JtHYggsllLfz+Xmd>(0-os0zkfe_`|L)@c%V72R?CbWsTZ-QBFE!$_uG8K4Q%=w!x||!aXRt2 z%pXg>0)=KSe12G$PvKOC80gxZqq+_`x2;||ylTSnR2!R*UoLD@moY1XF3ZuJz`fLagzZd&{oZj|9(qp-;ekMreJnEd`_!z3tr4>FTL z@d`P!s|@J^D8|UezD!Tza(_cyn)(%~((`+=OuV{*0U>^zJCu*UqM+dP8{F1Rcptu-rKEj;D%Y#0 zL0mDFe-5JjQ;=TpU>l(S`6-bTGH|0`XRN_e=TEz_<*zA@Mx~c0itO+Kj>b9?Lt5h% zawF&PNr`M>*~2@}nrp1yEz$I9X>-MKGN@V!jK-?VVRkbPVuTueynMXX9~>U^*1lCL zbbRp3>ui8<&}ZJ!));iuHFczPes=nElCMq+6Zr9Hyp_(tFMi*$k|IBof?nM|DA`fm|0mDv^1SUXu6}1wNF0e`o`Ip#0n~9ALkr zw;6QJA0q*u7uy#AAACXd!yrP{8L>dglYYJ^C6r9dgPz&Ng0tS)j{JztO%2p`)3GTE z6Liq>qOSSDjfaA=7*uO(<$UP9iYm>wlPxFqNJ5)!fY%dZl@-iGp~btp_e=1>yhT-N0C&E`-f} zTeiUBbmhQF@@P%FDkVSj=ZR{d>N_4n#kbzBwt#!Gie^_mDjia=MaxHDS4`~`Y+Fc~ zfQ%|DS?8!Q*F7-EZU0@{oJ7A{lA+aGV!B!fEtjJ>w(*4Js9Im>)mw=(5Puot;{)X5 zf%*L(?bnOd1&o<-$vL7FB`H=}pTg_OB8MBDPm(tnS1>M}X*omG{{7!l#_#B<1Anp> zg=0^-{&p9lBY>1&zG$o?LCEl;vGcODZ45J7&~~NZ7C|Vmw-@X4&6yM}vh|ouZ1T0m zO_-jd(cwYa<`mklOgpUXJ?iJ~djG~x3%hY%jY}ww-N1Arll_*zYe@31;JvNQ0B)w3 z;Szs#j$J14_94wk3(0NHeJ`w_hqDJ$_Rz4pu~GJQkXgA`g_W`+XmETR!)(WDu#!2+ zpkR*}nGDQYiW*#XIa2o3id8(6IP=VpVmB|1KWjYvwRHIgp{>XT;*|>LPBM7V3132L65@wveuHbLbV3&|i z)8D}#atu2M`L8^gODA(gUTYb)wfTqHohi z0~;%-9!Gk*0I#Xi0FEi%>t3XDt)p@-~Y5;)YV)pW{G2*671iAi|=>P!dzlRs-I*!HE5cq*E-HZ)~HkBbm<4R2R zfeGw?{Y7>P+y^)#jv3Ag+_?02M;9&wH0J$_9>LkT|COr%0Q7&dmJ9xk8HKZ5h#^iO zwHVI8ObeKO1(#sP$Ni5gq6J)(_TSb5!*!c!0|%6Fo$6dTK7>3FsB!^Ca1Rg?z;hK` zB|;52p@ze1iQ{O^C4dAC99oARhq!>BwQvv21-btr9v}d~^xv5Q%i6dmb0NKd9?*p< tWBi{0H(dbWj<2hQzpr0_jI*ELl~8Xl3sVZpzd>vlE1d!Wh%or;{Res##pM71 diff --git a/src/cmd/excel.rs b/src/cmd/excel.rs index b826581fe..e60001b28 100644 --- a/src/cmd/excel.rs +++ b/src/cmd/excel.rs @@ -1,6 +1,6 @@ static USAGE: &str = r#" Exports a specified Excel/ODS sheet to a CSV file. -The first row of a sheet is assumed to be the header row. +The first non-empty row of a sheet is assumed to be the header row. Examples: @@ -76,6 +76,7 @@ Excel options: Negative indices start from the end (-1 = last sheet). If the sheet cannot be found, qsv will read the first sheet. [default: 0] + --header-row The header row. Set if other than the first non-empty row of the sheet. --metadata Outputs workbook metadata in CSV or JSON format: index, sheet_name, headers, type, visible, column_count, row_count, safe_headers, safe_headers_count, unsafe_headers, unsafe_headers_count @@ -146,7 +147,9 @@ Common options: use std::{cmp, fmt::Write, io::Read, path::PathBuf}; -use calamine::{open_workbook, open_workbook_auto, Data, Error, Range, Reader, SheetType, Sheets}; +use calamine::{ + open_workbook, open_workbook_auto, Data, Error, HeaderRow, Range, Reader, SheetType, Sheets, +}; use file_format::FileFormat; use indicatif::HumanCount; use log::info; @@ -162,6 +165,7 @@ use crate::{ struct Args { arg_input: String, flag_sheet: String, + flag_header_row: Option, flag_metadata: String, flag_error_format: String, flag_table: Option, @@ -276,10 +280,9 @@ struct RequestedRange { } impl RequestedRange { + /// takes a string like C3 or $C$3 and returns a 0 indexed column number, 2 + /// returns 0 on missing. fn parse_col(col: &str) -> Option { - // takes a string like C3 or $C$3 and returns a 0 indexed column number, 2 - // returns 0 on missing. - let mut col: String = col.replace('$', ""); col.make_ascii_lowercase(); col.chars() @@ -289,10 +292,9 @@ impl RequestedRange { .map(|r| r - 1) } + /// takes a string like R32 or $R$32 and returns 0 indexed row number, 31. + /// returns 0 on missing fn parse_row(row: &str) -> Option { - // takes a string like R32 or $R$32 and returns 0 indexed row number, 31. - // returns 0 on missing - let mut row = row.replace('$', ""); row.make_ascii_lowercase(); row.chars() @@ -303,9 +305,8 @@ impl RequestedRange { .map(|r| r - 1) } - pub fn from_string(range: &str, worksheet_size: (usize, usize)) -> CliResult { - // worksheet_size is from range.getsize, height,width. 1 indexed. - + /// worksheet_size is from range.getsize, height,width. 1 indexed. + fn from_string(range: &str, worksheet_size: (usize, usize)) -> CliResult { let Some((start, end)) = range.split_once(':') else { return fail_clierror!("Unable to parse range string"); }; @@ -325,6 +326,68 @@ impl RequestedRange { } } +/// Parses and validates the requested range for a specific sheet in an Excel workbook. +/// +/// # Arguments +/// +/// * `requested_range` - A string in the format "SheetName!Range" (e.g., "Sheet1!A1:B10"). +/// * `sheet` - A mutable reference to a String that will be updated with the sheet name. +/// * `sheet_names` - A slice of available sheet names in the workbook. +/// * `sheet_range` - A mutable reference to a Range that will be updated with the worksheet +/// range. +/// * `sheets` - A mutable reference to the Sheets struct containing the workbook data. +/// +/// # Returns +/// +/// * `Ok(String)` - The range part of the requested_range if successful. +/// * `Err(CliError)` - If there's an error in parsing or finding the requested sheet/range. +/// +/// # Errors +/// +/// This function will return an error if: +/// * The requested_range format is invalid (doesn't contain a '!' separator). +/// * The specified sheet name is not found in the workbook. +/// * The worksheet range cannot be retrieved for the specified sheet. +fn get_requested_range( + requested_range: &str, + sheet: &mut String, + sheet_names: &[String], + sheet_range: &mut Range, + sheets: &mut Sheets>, +) -> Result { + let split_range: Vec<&str> = requested_range.split('!').collect(); + + // Ensure that both sheet name and range are provided + if split_range.len() != 2 { + return fail_clierror!("Invalid range format. Expected format: 'SheetName!Range'."); + } + + let sheet_name = split_range[0].to_lowercase(); + sheet.clone_from(&sheet_name); + let range_str = split_range[1].to_string(); + + // Find the sheet index + let sheet_index = sheet_names + .iter() + .position(|s| s.to_lowercase() == sheet_name) + .unwrap_or(usize::MAX); + + if sheet_index == usize::MAX { + return fail_clierror!("Sheet \"{sheet}\" not found in available sheets: {sheet_names:?}."); + } + + // Get the worksheet range + *sheet_range = if let Some(result) = sheets.worksheet_range_at(sheet_index) { + result? + } else { + return fail_clierror!( + "Cannot get sheet: \"{sheet}\". Available sheets are: {sheet_names:?}" + ); + }; + + Ok(range_str) +} + pub fn run(argv: &[&str]) -> CliResult<()> { let args: Args = util::get_args(USAGE, argv)?; @@ -736,6 +799,13 @@ pub fn run(argv: &[&str]) -> CliResult<()> { return fail_clierror!("Cannot get sheet index for {sheet}"); }; + let header_row: HeaderRow = if let Some(hr) = args.flag_header_row { + HeaderRow::Row(hr) + } else { + HeaderRow::FirstNonEmptyRow + }; + sheets.with_header_row(header_row); + let export_mode: ExportMode; let table_headers; let range: Range = if let Some(table) = table { @@ -856,7 +926,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> { let mut trimmed_record = csv::StringRecord::with_capacity(500, col_count); // get headers - info!("exporting sheet ({sheet})... processing first row as header..."); + info!("exporting sheet ({sheet})... processing first non-empty row as header..."); let headers = if export_mode == ExportMode::Table { table_headers } else { @@ -1082,43 +1152,3 @@ pub fn run(argv: &[&str]) -> CliResult<()> { Ok(()) } - -fn get_requested_range( - requested_range: &str, - sheet: &mut String, - sheet_names: &[String], - sheet_range: &mut Range, - sheets: &mut Sheets>, -) -> Result { - let split_range: Vec<&str> = requested_range.split('!').collect(); - - // Ensure that both sheet name and range are provided - if split_range.len() != 2 { - return fail_clierror!("Invalid range format. Expected format: 'SheetName!Range'."); - } - - let sheet_name = split_range[0].to_lowercase(); - sheet.clone_from(&sheet_name); - let range_str = split_range[1].to_string(); - - // Find the sheet index - let sheet_index = sheet_names - .iter() - .position(|s| s.to_lowercase() == sheet_name) - .unwrap_or(usize::MAX); - - if sheet_index == usize::MAX { - return fail_clierror!("Sheet \"{sheet}\" not found in available sheets: {sheet_names:?}."); - } - - // Get the worksheet range - *sheet_range = if let Some(result) = sheets.worksheet_range_at(sheet_index) { - result? - } else { - return fail_clierror!( - "Cannot get sheet: \"{sheet}\". Available sheets are: {sheet_names:?}" - ); - }; - - Ok(range_str) -} diff --git a/tests/test_excel.rs b/tests/test_excel.rs index 00e152589..57e6d8824 100644 --- a/tests/test_excel.rs +++ b/tests/test_excel.rs @@ -904,7 +904,7 @@ fn excel_metadata_xlsx_ranges_tables_pretty_json() { let expected = r#"excel-xlsx.xlsx", "format": "Excel: xlsx", - "sheet_count": 6, + "sheet_count": 7, "sheet": [ { "index": 0, @@ -1069,6 +1069,30 @@ fn excel_metadata_xlsx_ranges_tables_pretty_json() { ], "unsafe_headers_count": 6, "duplicate_headers_count": 5 + }, + { + "index": 6, + "name": "firstnonemptyrow", + "typ": "WorkSheet", + "visible": "Visible", + "headers": [ + "col1", + "col2", + "col3", + "col4" + ], + "column_count": 4, + "row_count": 6, + "safe_headers": [ + "col1", + "col2", + "col3", + "col4" + ], + "safe_headers_count": 4, + "unsafe_headers": [], + "unsafe_headers_count": 0, + "duplicate_headers_count": 0 } ], "names": [