From 84f915b407fba728a15a278687fcc8b8ab94f55c Mon Sep 17 00:00:00 2001 From: huan_cheng Date: Fri, 28 May 2021 20:01:48 +0800 Subject: [PATCH 01/14] add support for Identity-H/V cmap fonts --- pdfminer/cmapdb.py | 4 ++++ pdfminer/pdffont.py | 13 ++++++++++--- samples/contrib/issue-625-identity-cmap.pdf | Bin 0 -> 32829 bytes tests/test_highlevel_extracttext.py | 9 ++++++++- 4 files changed, 22 insertions(+), 4 deletions(-) create mode 100644 samples/contrib/issue-625-identity-cmap.pdf diff --git a/pdfminer/cmapdb.py b/pdfminer/cmapdb.py index 7c0b7721..da85febd 100644 --- a/pdfminer/cmapdb.py +++ b/pdfminer/cmapdb.py @@ -111,6 +111,10 @@ def dump(self, out=sys.stdout, code2cid=None, code=None): class IdentityCMap(CMapBase): + def get_unichr(self, cid): + log.debug('get_unichr: %r, %r', self, cid) + return chr(cid) + def decode(self, code): n = len(code)//2 if n: diff --git a/pdfminer/pdffont.py b/pdfminer/pdffont.py index c0da1b6d..7c3fb1e0 100644 --- a/pdfminer/pdffont.py +++ b/pdfminer/pdffont.py @@ -684,9 +684,16 @@ def __init__(self, rsrcmgr, spec, strict=settings.STRICT): BytesIO(self.fontfile.get_data())) self.unicode_map = None if 'ToUnicode' in spec: - strm = stream_value(spec['ToUnicode']) - self.unicode_map = FileUnicodeMap() - CMapParser(self.unicode_map, BytesIO(strm.get_data())).run() + if type(spec['ToUnicode']) is PDFStream: + strm = stream_value(spec['ToUnicode']) + self.unicode_map = FileUnicodeMap() + CMapParser(self.unicode_map, BytesIO(strm.get_data())).run() + else: + cmap_name = literal_name(spec['ToUnicode']) + encoding = literal_name(spec['Encoding']) + if cid_ordering.find('Identity') > -1 or cmap_name.find('Identity') > -1 \ + or encoding.find('Identity') > -1: + self.unicode_map = self.cmap elif self.cidcoding in ('Adobe-Identity', 'Adobe-UCS'): if ttf: try: diff --git a/samples/contrib/issue-625-identity-cmap.pdf b/samples/contrib/issue-625-identity-cmap.pdf new file mode 100644 index 0000000000000000000000000000000000000000..eb0980ab556a69387288ebbb81bcc7483f880844 GIT binary patch literal 32829 zcmeFZ2{e^m+c+*EM9Ns^A@g+RAyQ_NIU?h6&M{>kBT7^|hPbM!wpoC$A2lGz zW2K=Et+}S9xE<|An-oNK^9=|CkE&(?9zH?h4ggpP72K>m0}%Rwpc+GiiKV3u5H=k4 z*Y4IH)X-mg(;ok}GZ@8GPY-}y1*mFj^4lOZ@PFG?)zpvbqe%|(@bv*ba5RKe^*n+M z#C?LjydVZ$6f6D2}HC*%Rgmiol#m zUcPSPs&EF>0J8w9D>Z=XLtZ7IM*x*Rxaw{cjmR1r>{K5LToV+4v${6g0aOyiQ3p_1 zT%Ce-!Bep~D!~;`AyAN10s@a95=l503KoGPAzVQb8cB4aP?2aN1ql|8*4;NiTpj0% zM^W%7P=_M9P%vm06dFS!xKare1OZ7v;$2B3G|I&l2l{}y{WnJgOhO4JQ-2!{jd!7- z31B!^FdUUcpyCM>3!gqO2MNESSpr+ zCjlCe5F{)SLB-)nC<=v$z!I<&92rlSy32p2p8hruFI8h|BNmjSZ> z>N3zp1TZ@iML^*Ja3UUyCm?ZDstbjHMq$V(0@j5D=88t32_z)h73GQ~qFoUr9F~Bg z5C|j$5rM-J(FC$9o=m3T&`8jcOvYmo1Q)PQ1RCf3cYp&vfWQG6iD(x9oJyu3QAjEg zK_+8e5CkF>k0v5rToF_vz?+OAku=1Kw85Zstj-h)<*zdpS0^Gd1Ps=dh{K`K2owQN zp^#B142}%ggU;SSoFV^1fq)WB7y94sEQ27sLScm3gC@hU4&JO*0kH*jwBITkP>BDC z1_FyFlQ9@H28F;;DFi$jOyEC4fj-V(<3L02Ur>O0U=;;zU!NeL!JyVcB4{*2Lak{b zj)Veg0ab)CmEu9t@;v~Wz!q2{Rs}^6Ct#2&1R`C90Fe(`RSXu?K;PhiRSlqfAV{B1 z3_u!4COt47jhK2!aU@;$s_LP{k#v=*s)rUw($ySn3`C7=Neu!6s{+LiNJ2dT)Rdt& zSP-q1Tze%qq+#-}`rA|+xuS}Jf<&#TPYBn>)Ws7hCK?`w0FM>LXbA9FQExUL2yrBx zVO4EBKoj~sjf|^HLsOK$^B3&-J5__hG*RNJMjjMMTKWLURi<27TL>T!P-T@h;25hA zboJ;|5=YTjg+@DZbr%8<0^biv?BWP~e`U1cCyT2mwhYqlm6Jz|$Bs3Pr|J$Y?woC}SLn0(7(s$`$Q`pdx{M zqsb&Jl7s`AkmT|!S~Rv?#S7T%AfS3x^}K-W=}^fu3$>f-;}+x&`4`YKFbEhEZ}#cJ zx#}Bbn`IBb1NLU6Y{l}>82sM?NW}aKGGemG2L=Gx0WmCM2%KK0L$&(0osPbgr#JG?{QX`<`G#HN~2 zvWYn%i{p_}jr~TRo|ns-5y@|d56N@O5!a2d*RZ8Gm!$07vbgWLoI}Lk6uXupNxVwF zf9_%b{+{Jf&9lkjr(Sb4T7PXR@7-FOb>QYlc27%DDtDD{XvK39ho=?d5LLqevC$+7 z=f35JeJ7LoJ_I>vME0op-R9hVj*q2tk8AapuaP~YWPm2=4Mn$mgfj%(JS=qYfwbd> z@5A>mJ&8|auG#1I0sDj_#N4MY%wB9JYnSf@BhF_t<}PeDoc(#4pXJ5UBGY+eLrybx z8=5j^818Y|6Vy_>=T>#vOpKM{TWlzo*%z#m(M1c=?lD8L@}yq!4TneC7$w}=LMFcZ zMz-+`T3T~OcT}5fP(2?c?0If%4bkcOaB$yj%%xf9^5!XG{F|@>l_E33o{iJJ?P`Um zCa-!`@l2`NlnCFtH+#d|_$0m>8Ql6ad1_8Vn9rz(y1-RunXOR7y8-P-2`Q4&J#@-A zL3Z4)^`6u-j&=7AHkJt8RxygL{#=D=a zWqg5s_O56B`B+`1N{&v)=^S}0;>>DiY$+plYFG@{;DpkfMics$%DD*JBSKPh?nrrAUt zI-Gr7Ba0^s(%0vs_7AOGO7^KecIW!{Q@(6FSHThfw(ZkXv>kL11J5+8?s|T;J}9kI{2ZBL zo%;FOE^{>mFTc=IN!9%h+@eI5uJ*`c1>0zlZoHCf*1_O}lOqfd4Q1_Ji2QY9jxw&d zYz<0}GT+Y(u&-db(ES4cqxVP3tS|S0-MW2Kj4Y|1oYL6QVe9MtJvC{{o@zJ41Vii$ zCOMalZ*{AEne+K<=A)8-)cNTXdG!|>k%|L*b&cfTT#s}(`nl<_dZkuUz10-=^>OUm z1;+X+nVRbU{h12Ss_=^o>FG6nCy?i?;%2r+-K^ZcuXy`K)9qf&f{cU*@*fyO^0IV& zyQ_}|9=tpMn2Dk3y659FV+@NAoqYoy9X%Z1rZ{v@!qVvG_GZtuMn?Bvo^Mc!jNH06 z7eUbYxVM!-)@k?HF~L_!@AblVHofJ2-x?P5eWTQa{<#M{4c>e1X4-FDh$eTwlI&xn zUM09WYB))R`HS?j&$6^vyqAy=eO`VcJ3Fb!xo>3dr@GidzoDg9qDt)>42cZvQ$egD z{EIDb?B2y@{S1tnNPAPuARQL_l_|Qp=RTX?J3A)_F{6UdZT(Hyz*~cBnT%8YZhz}E z8Q;}uvY*(&>{nrVuZAna%c$V4p3Z5NTji%-Nm=^&YT0XQZB0IIdbK6mZd2FInh&}h zMoz&JPetzf9|lfzvH)?ftEdw@mEzK#XCWA;PEe9UKx^lquf4~uMqyG|xD zEg*XFW~k7xqL7RvzJ~z{bv$U7C4*-M!^{nB9ocB+eFsXs^!v2O=70L_+Rkxo_8BJ4 zP!Y}TkkGovR;Sw|Ew9Sv$=a4nkwT;sB5A5iOcp|jbkX?>!zVgrHp%vW((`!xHoL>U zaOc2j>k(lFJuS=oxr-VTB=gF&(|n9g<(3XpwI6<58GnsN3oKA}FCG`Hl2s4CZNG`b za^bVTEaN5a^fJ#Imoj^vx@?jNKr6?@K6!YceVb3>x9#~wE}_x_M^YILMEX+WNFE>3 z)#ime+r~G~Zz>6OBs)Are?BfO8l-LZ>TK`_R?o3~b1`f8$)w;2|D%b{EOTj+Nf8p_ zuKZ52R~WGBb6b(dHh0RIL#|%)xl*4IWPCf^>FTxTJbETkALNh&kuhQKO3&Nz2_KE* zbrRpm74qbBlH@X1W!;Cd=AX}9U%X{0M`tE_^AV0aSzM3yJup9Yd{H>V?8}oS@pEh% z<|B(&tH{NY%;xa6Jkz0_+? zb>{hf%}tF2^|D(b+Tl(7nlrKS$KJ6DhBmF6-G7APL%y`=wC1J8*9&dRgha*(2E$TE z;<3WLC%8F|JnAPOi~DkC`hfGxHv<*NTc@RGT6_mW*<>rdlsh>H#|OQX$9i6DGE?8} z`X(pu&S+JLOfj_EQ+^}ZuS2fT3jKeH zTP911 zchNI-Pfn`ADbZlXxj{#j?Qi)XJd^sMxaE=LR@dhi&7lXs=Qsva)*d7`_jd3Zbbpha zx~Y@=^LU7U!MTswyF*M`j}*3Z-AkICYj~RA6YhV!_QuTA0p?{XmbtF6wj3_Br-;84 ze@muEg??PMbi(10tcPU^Yo6_O+rRX>kHbU6Zbyh+;uLAt;luU#S6&7Qoce7$H(Z{r zmg-~R)3-(FJ=Me3f3bfiU+^VwhwWu6Ymz8-f#sX=R^+D*RR;?6D`LJ&HE9^Hc`$dU zxJ-!r-kU^AL{EcdRcmLu~VwQvA7F}l7`dT&~x>C4=oxR|5C-;ik?$)}V zmq!)1G~Rx6%QtsnQuX@4klDwE-90Q)&vw<;+4OxyV*C~R#p{f3NPlj27whIjOE&Wi zKH%GgJ`+-s74L)od~$B*rLc2Z7Y{s5*ZyvGkKS4k{9#ZAwpg z8M(#6A(LykrD3h7z|MG`nlIO8ZfP*uXGfeq9JB8}kMWqjis!CtN0;Z{c(qX6Gir*i zUwQG#{#^KRb!q<1mrWCS#h7k|)ZuD1BbH)?`+fYI6S8j>Jm+@5yfvpdB*aChyNPfh*Ty~Vb_7wn^4vp z|K90PvnKXM#oh5o4u&aC*&f2=DYfl$9&>*T{~U-qq~THol~Z6 z&GrHpzn$r-N?^2j&8D(#)Yr*~Brk&Q8ctw+@4|tp=}BWgxTxB4C-LWOr-TUOnLR#r zxW)0=!yIaCd#|(*&F4n^6UMW5&H1(wvdgFO0>ay$Z+LN5i6Hi!p*DFTJis$y@pY5a z+J1cjp$b!jE(3?V7HwSN8!K)FPsK?v2snNdI8;}i>|uH;d~)*3^yjKgnXNh3cm?9i z;%u^tFP+kuyl|QjILKTzytpJlL|e!infRDUZn~_$zFI-gD)GaS#z_aAs<9JO1D;{$ zg1#vvRVldl@v6sqYFDWGEIGP_%vW=EUunI_m1!u-bNy;xa?d1dYwW8CF^LN%CuSoo z54wN4(4BR~_|gv^&cO?%J2Di>HrZ<*Rrc-FR^kY&dg6Fb;%Zj&HinLQ=f`?C6pi&7 zHt8deaPcIcO}D3F^~=OJ^r9bAr`#*OZZ5JP9Nc(GxvUb~uJpoF)V{9$o#vnI{I5GZ zf2;ie#c){YgZ-)hz-|E3_M6>6;K9>>WH&JQf7fi#V6T`B49y~~?w3gJ7&@-Bj&Jd# z31%pLDMGrVW)sDe&K}z`Pehd5@txfyzqu(Vy8|y8E^Tm;Tf;y3Jy`F4#38n{ zZzAEl%%9p$D=Zrd?vxTK3w1eg&)D43JJDTeO^)~;4kCLJvcH-zXqEG6r1{g~-9)|l z=X-6W-aVJ=QT^GDN~lB@V8;;+N$qYHrx{Ecf^XLx$a(7Y^<%n|Q&B3GXnshLH4OWT zx^P*3p8zG~RBflF=PThmr#@acFY~L9+wH;oP3^}7=F>NC!Jr)e+6OO9Z;YB9dfFsf zHC4@1A;dEt_PI^Nj9~Tf*x>deC*Lm)ZCv{p^<@f6F0>V~SMmrFH~KmYeYhx{Ca#pw z>ilNDlTEHTJgmuK>JX})%Dr{hEsD&5=j>Mpyn|-lo|>d6H^!Dzn0q@1rElD*mQ=|8 zGgJP$S*Z07yR(1k1MB$)c+(s?;IJ5zf&x6?<}Tdn1G}EI6!>EQ!A35MzIzM(oBsz} zwkY}rFm2BkMc+AvkXKzG`bIJ=`ORa6)A^I9w8G{%{4c)ID~=i8?ZmR3ecw*l4&Dsj zVyQi<-+JWCwV&tSZtlA@Q!t;C%|8CHS-)K|>9*XV)>q`z;-}-e7v3MO;oP_eFIsEZiJAsoUiJ zaqL^b^0)GtnkdmD%Z+A>=7szrfjfTo+-hX|@!0opKkwrtUH^hmh6`hQYn_Vfv#V{d zT$kB1F>+$?lJ5^!5)Ye8s?40kEh5tQ+DwpEvre`T;UG&4tM~eeiR#5C8(g=Zi{p>9 zj*d7xqb)z*t-a5XM)4jX3 z^^Ts*TNWQr?Bv@Q8V=-RKipmuoVRy$m*MQ{PG2(~>`E7_W0bnksKJ-dY~XR`x$z*6 zQeeZUn8*s(NJoL;HP;?+m%!)(S9jnQ4u}|nJE7ksX;m2TT)Aojvo}DFC zAv_M(&ZwlGZtQs(dSgB2a9iYQNzTlqW?|o-k23ij2Qu>bkL_f7d38NnzoO%llaR;Y zT-xcK&z`aIonBmYJ{-e|^&igE*44aVrO?0LzV3Ye#aN<<$Y|RR_iOu@vl&uaTc0*$ zR+Z*8oZ*f*8{5U8dZvxIPWqAFTI0cr62#TZcicD*yYQVi*zG2olVe?jW)`IQsh8f` zz$iTCpU8m@u2Aq#RoNLTH+RZp@M&dZCQF|96}KdguG@R}hiK^&+dXY`Cz{)*B|RTT z7C+%<-c^0;XlCd+X9AnO1x1WEVXA>?=fF|N_K4nX_rJbP-PnCA8xH8rURc0-LS7RhTkRS zpj&Mu+@IfRDL$#elz!ya!^sBgTZ}0-@$dHgtl@5PO(<8qzGvOm%8$9ZSq7+#E8Few z+-qPAKUy%T=5(Z!-QDc1+3ClFrSsgzR|1Ug?~o`1Og)dap7BSl+4(el?c=xA?<2al zvBqU@T_{ozD|os7{prJ1mfCrf)j?ZJ55_jTO*w8dslUSYfLmPkq`X>O{KxRAj<%0o zFH%txxNkdJ!wz!Q^w%oKY}hOnbD%@?81pmdu8duUuc({u7WsWS-h*je`=w}NA*X8c zdG8vGpAoNkKmuDk!uaD9)}=Qt-Fi00eILf4@qWj`Nh3x*&XZ{y8^0fV6TwnH+?TxL zMC3rbm-1AB01Km?McLaeKe!Bz8Rpd{l%N>*uYJlxDt0n8&cc|Ce0^~`32ky)^?dQI zqZwTTI~x)ju-SupOs_wmvv~LH9G|?yXSGR8qMC%L2=eOL2j4}nQ3QY5cC-Ch6jx;V zV3Hd*<8-6w+8c$V`-MC|qEq9%XA(Bndtc3Xan3lzMZnV2GAT4@A4MdbU#MW?lPlHp zww-M9UM_-$tvtS5`W+WL7Iw9sjhI=7OqSLc*80RUS0;Da{{HRs;cGqXgwG{yxEb7x zkub2R++n&sMn0NT_?#xmzs-dIzM8}K=Ux?4*0DsJcdrtr4>ycwvqil>JM#p?x|Q+m zLi9b()JhW}#e*NTCElK8sJpqQ%k0M4P3x6wViYE^DVSURS=sMUSK{B4v1TcEni!aGII(V3eXjzSBcW?AM*t$muYDaWg3R8X%dSC6+{MwIy;RxH(^ zFkH1mg`nk!yIq~{tKM?`=e@w5A?3=`lsnnzFPm;>4g1{ma&BMeiM!KxCuig1V1I$U zOx!4ASLx#%i4)C>Ubp#d0__&Q1aGW8#oyfG9)o&))$@amj|Zw)>pS9U(V)vsF_oYjh(StUo@J{*&=XL3iojvQqT-sQ+Z8 zh}d6UmB6nemglegrD+_=wP7)fd%P}-Vkh&CNzTG9w4`?vVo&^CePu?usGFARMvuI* zsl1qyne{SV`F>Xe6#67mqD<1OykpbbJ{*nf)rlxtgFv^|UOtf}rr{!`?x`!y_rN#j z^^952x5;5qLYu5*O4)~FGMAo+ysJ=?Px{(%;EsCjg#nLr?!MNGo=ipCB)|DRi%_}H zrh0o|sC3;op}0pP9Jn82bO?SXUYjULD%Dj`VR^X4{^P9*JkHgmYrA?sr*2 z@jt81ykEanHD6%!#W&uU=JGiopI&1r%P!q=&g#?@r|TW*BmvdynIWT9PWyC2UNZ4o zp#-MF^R_?n4Il})QoRxwJcS10o~&d4#Gt;0cwRg#QwvpdN%i*3Mf;vuktyEQ5A`KOlh4EvH$pK`*`It)zLg? zcl4WbH|;Qc8{YYhnbMzZJKU-@4?W@V(-0;WCo1DxwhIralGczLjtezb6tU$&Ch{n5fFg+=%_ zks_kICPv>F^j90$(pfc7{v7z9=#5n#Il zLWLI|yw!l0gE)=|>S@K`R0kd)1B)SWESMlHhkXfPKw1}2PV2tX?zeuR54)yZZy2%yuom*_q5?*8Msgo9dN;Vm_t+?#F5~r z3KE=tf-R8X)eayggaz9`)K}oqXNUxW5OM_q5(UYDMguwm62TnO2*AU^LripPBLU^W zK{+~X91gUHZ6QoJ1|E$UN8?v;rT4|d@nJa@ixS7cd0^nNE+oJNNr0n(BUuC_XoJOr zIe~T%M_5X`Rz?Et37|b77y)1fR~}xFcu4RX1WBZ?8aF7Tv2z_!WIAsKyMs*Yd zObbB=k3s;pLJ;Yz35bOPM1re}0!#o90f@sV2k;Eq0|L=j9oXU(UI6O{hX(avX9AqC zq)i{3@dNdcIU%7z9bjM-nocMbcpHd9)0qzi4*h^rfb>3KYYmJC2(khX+@S!OXiJNw z@g^kL%7Q`Of%kvRA$D-gf68%SX0-ah=>JD}tIv=IaJ;|G;kdAVB?ogR0yaR2w>emJI1#v5XgtstE6;yK=D&Jg#o^a+AS_6TKjx4aK#KvQ!V69h zi-1x`>kFP}lY(3aR|$4@5XS?K#zPdLKms)YFZy)h8qn{FU?{p;fHe>pZG`~m6VO2X zXrfC5QUWhL5Md&K3Ah`mVLaeZh!pHYM*;{Uj;{IO_;B(N7aE~xZNOnphzab21(-oj zSm_V6Aetss;QS-(3*q5`5W}1RxCIZi_bME4R2Xp2?`jVVWCxaE!9vhE2v4gAXQSz4 zTHzR49po^mUONK)=&L}0Itl|O4bERe#391)f`cIu!1w^$0`wLZYEzna2B({0S2(|wE;xt} z=nBE$@N~Ko0XNgQ8nl7!U}xBFm6kL*uJotT6VeaDgw_qx5$GA16T#fTnQ1^*U>*p7 zX#orfkA@ZONShdVDS!n_frfJd1JMWsiGzj2f(j1Y10XacfF_2zoKBEco&fCb91kVOGAzzG9l0x|)L&;SDLXh?tt5D26`ji4Y0r@zGs zD`pKM13Lo70UhCJ03%ocI1!joAP*Qo4^Z zM>;Yz*`g)1Vl1o!^Z_G*8-NQ6kP#vXPQk)ygF7fp;0Bt!-(~}*3A+RHKuhQX3#|YF zCaaGS325WcfKOm^U@CX3|VZj6q7LI@c7M+$L;R_xDlo(n<#~pkG z03QKY0ooI=z{~+&2Q7f1Cctw9D;61|PV4Y5H(;v3@uVlT$$@VO#0fCLKpdeeqa`?S zUL4#2*t7&zm9T9_vEC&)i zRHYcGM_~dbfe5}0peOV?u=Jn^h$*clfI`H8WaSA5p8$bj9i$R1S$U!rt@OhJQW9Z8 zuLFEY#DawW1pE;q79{j1z)VCe@V{tJ&|N3OWaSC;qp2P^9&R?&CP4IC@ zXpmq^mtJ@WJqMUV`Tg}162R5J@Amv%6t{BB>7PP55)KC@2)Yw8|SPPis*LV(k65!J!Kw6*!0n5;=Vu%VW(eBE6s$0(P?!+5w5y6G?V(r zKn~as(0vtAm_y{@1ZnpmhxPDzHK(Bs%HT8fCZPRWog{1r?;&Am#u1n`FiFsIC76T6 zS%JL@4|)TzjCKL$LxVOb7+$Rq62STAAP%%$h53IS#I4{D=SaJtQvu2V4nQ#fDTsr_ z{{K9PqgiT5J|OM?6u?2!qo4r+7qkl~=&S;bLMx{SZ(!2DEfAO*Bm$h`f0{#swaTNA z|6u?BDu)aK$Nt-Vh38k{t_E*dy3EmH3_NfJ;8^g247rLsZG1TUl`&Rx@Xa7?ObpP* zG%Bq0q2cj|@u0zYrh7 zhHCzgIV8qk1#GLtqdQG-GAq>kPXjg#5GNWTz(jz%MRSXxJ_W`Hnrd(X=Wr!pBLZUu z1G?4KqfHnAxB_q#oD7ZnfT$3c)i$tvRTJW1v<&KK{XjVw1DJKXx}&uNW|WqL%@pY8 z(>Q?!7lis)x_X0USXz(;xr44|aL~a6DjEY-HzXvmk+gQScmqleT}tS-65tDVnxJQa z1&%fjs8(7a2w>u&KMou)Jl%Vx!=Q}?VA9-FXa;@@)&P=_r2&F4K8A?HK~XR=1T0Q- zi;)PhCrF?L_t2grSAsQ2qZKJ!&FNr(yaO;q;D+G9VG>#$q%D{ybU9Y(NTcBj&1tlR z!vmNwDun%^*NO)lCV)AxAOS6b4-8$96^PJT0qO{XrwbnxZa_ZZyug?bmVu9$fky)BC!68?2*d7DpUy#GO(qV!e#I_J+0&SB7AOX>Xuz*_oC4l`sT!TO% z607kbjhrxO24p3I_z{L|wDkqZ0Aj9$YhYA}2`pF%*I?)lx$D<(4T4x_x|ABCgrZN91T!Zn^>dX1x z!?pjB4|IS<|9vdl2P^7$u*CuYECG!oAdx648Ug>$0oX7Q*3sXk{ZIV80PVo!>YV9# z;J_E#;9ncSC)?o2FTsUGrK0}Rzc~2M{$GH0@^KZ1zdX9PdM@wR&#UkM3<;g|GD6uO z`}5n=Uk|(fBS5%a{rgAYFE!F1huw}pyt8I_Xdt($XEu9J`ZTM3mel2tD{mBKcJVw3 znEd45M77)8WNX!%7c(pLZ1(4VVmhpE3AmK5MNNYL}zG#G&Js*tO8NpXqBn zuYM0{oj~(dkHe2kJX((_-K*^Pyb$TQ&h;7bb>9`eP87!>-)gbTd7C1R$0%IZPTz38 z?UD}11@SrSO^S%2X_*2hhNRnuk0&$Qv-aZRUM}Ev@cW6E`jRasKYtDJ+^qEyKlssD zBR0a)juNBDSS^#;_B}!NP-K5v1sZue=6m_&nTo^A7pyl_76 zeVmN7V|?C&Q!jts?(02O{aCBeK*M5kIdkWaSr^XhNAGJ#GMoCJmMZAp!zTZ>PT8x+ z&TrGZQ>3<2+xa3riYae)m#yERu%R$WWJqk_s$S>(o_uwzh5Uh%Szo2g-{PL0aQ>uV zbW!xFwMy;j=yGXhx$#dtk5$M_3l2%?WZAR5#<5jo)-Sv{oX_;`${vy&I6GByHvPy! zo-|G>u8kuHdC@IL*u(MQKwl=;PNVqIwWWIUyy@BPcI1?WD-Rz`?2>1kV(rRJ_oGfF z=HoR5_Fc3!QNjh}dObWGD_?VLyo*6cZuwS9;@N_)n}1Ae3tW&E+M&CKC@-38zueYNTpkVksl|l3foR8NZy47o zv=?C+wIFX9RZLa9=~%HGqpEn3#B5@2pGGcDm>FHVBxvD!Mnb8!(XpRWG7(t(!6Q^(R!sXHB4+dVlN0cgw>7kTg+6^Elczf#>wcTy5!j2qygMGbr*nAJIQ@9pfyj)} zu5*J4lU|rNoU>wO4?l44CP&KhNIj4lxz$y3zP)fA=bpspirf?Hj><@0OPIPf(tDzN zI$!kEL+X{-eB17*u}fVZ`8ln}5}c3MXq8JvzBzeJa=qX+gY6s(+VWL9NQbuOgf%#~ z7k=!L6H2bjkiy+=wI)ZI`m{thP6^&_R0R6g}i5Oe7WjyVBt)l%&cO; z+`FqEd32wK48Cc2cJ7?d7Hq=8w>43UxX1v77F3~Tf{e#LX-%bKjo9PP`k%%`>LL`k zg%({-j4k#N>K(!({4`I8cZVkDZT+Zt<=c}?ucPmZdIp!?o^1?FsO4`y(Bq9AKF4_u zGwN_=ADi)$=T@iZJVjE{ww|3b9PAMu^L^E=Njb3J$H2ztbyXJW%F-5dK328EDQW$F z-kvAI&DW^cN~Nd96Ph%ZH5sdHSzmp=ey3BQ!772PfcT~U*t?tGW6So&BZPHzTi3Ab zlwRW7SnF!jt}v!`W&7|Ysl8Ix7707A*?v*Jo#*~JpKtCJRnA9~BUyR2(s3?w{qW{KmWzBZpQ;V2M zifKAhs)b*D8@FKk`{xaFx-w6`n&~Iv(|6?Bdlr7L@Khb|K(pZ6qRoidmu1JDllU4dc-qr8+2t_H z2?*Z)X)~d-&Hjy&=(%?uNgIVT=Y<1b%fu6}%bRb!xlwR>pZmcIkHGo9LMwiG>#(oK z1G&2l`Gky5e<-Ltk~}8u#qF1xjqGdS2;F}?`c1V!n#bo8{i4iW!ipaU_c!MK=r)-# z;LU&R<~M7nODzqRjXQB)5C6W|6@QETzP^|t)WTB4-Fn0->ie9qeY4s_rpAp|3;82G zC&YqpB>HNt{ju&`VK86RytjybjR*Kgi0=3P0)ZMrje6sa@9^awm8pA8QqtQA$L7-y7S3&| zw$w~cwM6Q+DeTU=Vy+=UVP2b=^Xfe|`!2d3u3}WdO1fBOZl6DOaI3*($w17N&}GD| z+7XU|?UxU&wH9P=N${Hq+kc#(^=F&@>t_@HWYhoUL+$8udzH8a?PL&XHvbdY)sEr6u{T($=j%7GqnbERNd# z$n+h;m1Vf?#^+jeOl-i0p+~iLu37U+X)!)%8uNtfF2k|id(U&Pxu+HvSagA%4YSEs zBGfYDo5Af(3b9<7 zP3y;Y4F#&^u`Q~z8TQXV$l4%$rpQ!!nrGd``2dCP4@+O3xy~-#T4oh|6fNfoKiDLV|;BfWP1eI6TSy>EfV$RwZ1}P>p-c!Q{&5iS8(uudj8IE9dK~{4ZxX z@76Fv@m2|q$LS|FzM%%c(pKqz{2zbYcmbL91sI$?%U_OZ{=!Ya{-eP^a1+>^1pB$*1?uYeP*}Ln z_q&>Us@nO1tuv-wt|Rh#YWNiu_M15xpC!dfTGS3LlqlbKPEL)4BWp_Ceyo{Bdbr3QPs;jGd8plW=m*X@5TnW!5@=Vu zy+1{1e3Ha^`%OTAed?8@gqM874>*tRTXN-@N_m4Cs6cRYFVF5Pa&gEtE}gCV9Z@S54NQzUuxZqDBX7N*q6Jl`e$Mt zd~geoTO)9v289IO+mG#@pXJ7+qz|`j3tGR8=LGrjPM!YnQ=`@TTcgBTT6j$E780ba z#cp@{+~9PQHOomAEk0_C_sKuDjg%rI8WeLvRkUgOKEaeT`a|iA>qzF>m`gvDWbvGa zL+4-MdW5GX^4Hz;W3oBBbhwbprL#-*W=)W?SWQg{?DX^a~>E7Y~{!%=N ztv4@W+g^wKPwonmEKa(I=l3~$i=-U?w)S&wSVs4~*SCajeM|V-ZDXmFqqwoxhO6}X zNVAyy`LZy!okx=&jJz4$>#eg+=x)c^#kG0u-YjwUEh=$B1(~+rHnr;xH3*dS@tFsP zFMX}}o>$1VkDXoVs#5h1<(VI-s8=c~Qwb-Y@eZaWc7G_6tUurUwpe?k?zOJQS50Y8 zcPk8Qo)G?cuwj;m-;s`6_0dMxzE~fa54dNv%O$&^Pk(YJziNMNakSep zmXl2$J{To`5v5W8&(k=Td$t1_CokWZ9~6^|**IW!!Qt+3Aw^e9!CNY)K1STQ!7twv znI0J=;77hv{k_d~Z+Cb}=YxXdCmfQ5Hb{=<_Juxj4zluleEq$YGx0{>^^uPsO*AA>vvB^~CUyo|kbf1*35-k6c}iLWdAP3_L7uQP^YZ$7(;oTSPz zjTwnt(|!DDdMe`pvYc4qcrU~v+dO%GD4yUk@44+gJJ&N0*?~!k?H7L>QtlmVWgQL` z%24V%8vU~U+m;+tv;2vM8+_aL=bempzVUfNz{+Oh^XA||W7fp#>?-`u>QchP=E%$K z=~O>qLciSk0yoF1vOr?{^PD!L1wn=p3d4)j-~BcQus;~jae6UON*Edwoi|a>pNTJ_ zG@WADMcU3$cKcfS)`6g?lY>>-bLoa^^ywh}ZtV?RLysa;7Lt`kv>y;e{yN6CL z1D%T=FLE}w9M#!5c-v}*n0qF%wg6S^KN+3Kh&{!zEj%|L^MPIM!bi@Qf|-yLw+q`I zJUdyGs1J-9VqQzcGhSCN*W~4EX*yb(f**<0 z_htPTRoT9$Ovw;j9v-b?R4 zW~p48coNTbrkv90cyF6gj186Ffo<-YK}f*TrI(!G53h>Y9~-Hk&1|;Zn=8ncq_g|h zo@d^D+y!M7DpyBqgO|T6`WPr5U7X~7biZ&D2BGeG;h56JbBZrrcFp-ePUlIs?|8dk zD%$0XpVH&zx61k-`>lt1rfwOOY`s`ACZ%TK63JH2e*U}rmw|6u#*}!)3)|icN14w? ze7(MLNb^O=1#M=NSVMDuwGtog@wIyyBF?;3*TgD}$G!MmbU-+DF9VO(hOag;N;6s< zj?wZLuI{854*Zb0DQ&Dj8$VXHi&eOPuyJ5Jb7K#mV}s|L(MWsI#Y@Rj7Sp`BW(lrb z-1t1nt&a#>xn2a6id~&iF*5c@);8)*d0`{J_RakSlb=4`c0#zb67t*qkL$Drm^|0I zXYFxV((|PHfH2O#Nq=$AdKOCTcV5nscYNZmPM0bIZmZQh@7SBnvT2;l{ z2X)A(`mGZAGCiJoaY#F_sCzBK46Khv?-YN3JKfwpIbreE-EJGVAMc?Ii}}{s@5S6) zDO6*7tME*2Pxm5|v^88i{f%CCdwzVh$#|Y&x+ z9*5-4Kk(SL9H1>XFe=YveNEJ$!}Zm2Y8$#mD5G1_;KYGwY(_)Ux_X;3F8v>)E+QFH zxq_mu=K>L%r;&D98=_F{ zC*0y`v1fJ+@p`8bF>z=s)G^F!xtSgDjgz<(%Oa(w z>@@srSEr!_#p6}^G7F<}$BewVhpIasmYtA)vTHS5K*M_JZTQ{h3nsjrA%bhcP za{)h@f>3cL>y)#4^NU@DWvsAsV)7e#4&C%(;CK=v7KvMOeIsgBzu7{m?e%@_XPl_( zbJ*}PqaDY&$+B|No@zO6^ZC)lCn0i)tV&L=@O}-0GB=aNO*Y)jRd7+~8hdUmd_zla zigj*yV$D^-6RuSc^+YrHTTjP74Z!&v+Y{rGcx4WoFtTydLu|wAV&jV=br1S=N@eAv zjGpR>Zp4Tl;1edVTNgA+S~&H*Nmky?wNUP%$JV%LmBDvAWS9*J1{l-i?@XSJ z)vPjp%+|}K1*MnE;x?TQpQAKH6EZSTwU5UR~ODT7Mt`s5ugmRBx4 zD-Y|>JN?GMJ?BwS8(+gL!K6S%HrEF1; zIQNNBd3FB%ZcI^b21fVAF7NA6a@$#1bQRDt=BK!oud>@-kd$aUxZsEzc2xJd>x#Lg zJf2Q)|0(jifR)uO$d5_Rrlan{Rb>s*0S50$x0#uJ3{735*;i0H<6=Sk3^h4NqbB)g z`ObFRo28z8eSW;?GQ(z6#+f^NnYBL+f1l=*OKQEfMd1^XH+909E7P1oNq)^CCaDjB z>k7%DWtH4@JOy&!n-bS&?Vbg}3R3?e`M{BF>S6w@ABMzk_eRYMl@~j& z*($E%ay#wT6#?P44#A@xwDa>xwKrRm{QGnF65e^h~q_u+J02h zt&YB}x~69`{JpL`Opr7hU-$jK^?9@4>O=PnjGK>?j!Qdyt5M|c^(gCNXpY#HRyBQ1 zH*|QCqT#vDG__8SOO@SL?0jV)qpCw>LvOCAZhNofHdcj7^x)yxllO)X=93tY$)HLY z^7J2-8CN(8UAoQm#-9V(nWIJ;+}D=Dm7mP^+G-(b5p(cIm%%1yjeE8un;+;eEmm@E zo9rW4bA9gaQL6Bh@Rw+@uQfhj`KrCnU-4yo1&6Xxhw!!foz^lVT+%U$=Xfq&=HBM? zt^CXU=h+Xpr!%8uuIKMN`9iw1il|Z2TQ{~a@T0LmHO0N|9eOV0Vm{xFevSO`gjBJj zKL15kqm(tM@YKzGl4mkS>#GckI#^!a_mCYnynQ)n>Qh~c_49`|6IQ`D{QS@Hd43pt zy7mEQc~YAnK}?9!`%^X}7h9ZnbjeS`_(;%GPAZGL@7b0C zMsqoy7nieov~z5ywXCvt+8S$kF4x4SseJBiNU{#=uR0_5thId2=rQNXJfHT417_A{ zc@F78{?&bkiC*630g5ikH{BL5qVd6f%U>$!7kS@U~jD;=*&TFhB?EcKSW zBo1Yd99cJ&FQIlWjya{fVf3`eh6nO--J( zxoMt7G?#6x($OtG-vh+gZF!zeVYfNI-}(`Fm^? z*`D8bE<04jfvDNFabcwEy>WLR?_yW~Yqrgsr>nN6jO$E~&ttl4PBh)M_fs^yF?2Sw zDvhc?g*6O}`?leZm<7cUvt^SQahQpby33#caet2QrM@rL@vQL@fpQMc8M3!k8>23m zo?}Do5{`{a$l*9E&(I(G6(>*&}ry7?Jc^GNASH!aET zF7;Ciw7lT8pTe@8Df87{GJho2uWM0UP_j0EK#VJ1u$0HA^vDKk*oOFpDoWy9p-BLC zzq4ei(2m_z{c?f+Cw2ufUZv_^e$gA|dnz}0L`(y*&L&;E_0qsc(#eA_IC;> zogP%3UufZ|Hz0e6S=ml}lDEU^jH&Ird#&IS%e0-{=JCT4wXcGQLLb`S*Zj_68>f6* z-)2t`zeuB=_U(HD#F2~NWetvJu!f%(LYgd{;CLJGQmTfx{)?LCZ>n{%Mtc?lSZy9CvRf==`pa=gja=TFX=t;?9=gnoWRnPm~^0NM_ z^TlVwJ=dX@tg20>?-E{-JjSE8IKCx>o(!JJTJKd5>)liExa8h?3*<*L8AE12&31>1 zK3{pi6WmA04v)UHS=msc;_?wsN*C%soG_Pu<)(GQ^J)PMDe zdAYj=m7tzQ>ESxxip<~?>o2^u#=2H%RAeMv&pbq)LB-U&-S$&6_aoNRc2PI=?O*&j z>nY;Q@!WEIiX+xE@6Ms8>lQY=TYqnxx$RWEQ+9qa%)`ND5ms4*iw^1gaWR}mAVl;v|T zcJl}C?VK?%lWFq*KZTuTR2ffXf&>i?!QI{6o#2f-!QEYgyCt|=aCZn60t9!0 zy9JvfGjqRt$*eW!SFPReIaSrQ`bVF7_kP$6dahAo^lBlkUsJ;^vY~9R)M98|HcW)= z|8BNiKpA!d!^r;eg`-L`oA7ukOVtgvT4&17PjAsK87&DBHLrHE9%D2@&FNvyd&B2Z z{+_I7H;6lpBSFfo-~zq>49`4r^OnLlhqR?lzD|9NI;wbuV*V0|fxg>;Mrl86kOTzX z-IOcLy4E&%ScQ}3BZ2Il{63W3K?&9A2LS?EfQP{Fw$W}#UVW?VUSLdG@pb>u7mX5D zM&FbzObrnu`ghCz2SQt;$v+gg{cOOwq#>{{m>DKMHi3(ZGROyVtKDr}P@or!lc6{0 z*~Vn6pCZ7vCN`?0{P*Aw|}bKwXbDzs{Rb+;KdG&;kQ z-Nwe$m-ddC227V8br%E7u{bjQ&bf&+Bmt~T@gMe-Uy(~wPc9A%380BO`|oZiZy-@0 zEg2Kh@bK{>xL#L{lEO9HJr4iGAY3Fd9THBIeMQcCP(_Q!ekD@V^x#WE%Hsy6CYc`3 zL~XZ;cf+YjgKy#Hji*B=>Cll`wPkKM`i>v~kEkI$aVE~5ZTBX1OLr804yFI$Rc(DF zlN>=w_1n~Zq>xg$%#raqeATk({o!-dq%tgAYxo=fO@$+JIx~_C@KA21WM|Rg00Bb6 zu+Gj22AfZF1~>jLtiPe`(cfs{Gj=GopI0X1Yc>rHyJIpG+Y{M(?w{L@is>SjRaR-_ z&aeKcb_>>;v3-L92fX{wg@Rt*lcJn5N)TDqXE3dgvfQWalPfH3?dl{@dXZPA(h%qcE07oYHb}Hw`$LpFd9bHuH}t+TgHK_Y~K^4y{OyP5Y%9OFNKx{?1Sv z2Qh-|-T~GRPN5cM+a_L}460(-VSSG#V1QDtOuCHMSrm#ruZL^bPl*uX^)f2&Pt}Hl zp=%5`@>ui|x#eQ{Upn8u5lr+~ zAefjZmc=H?!aCDltg7s-@W3=KI*bd3^`~jHnbPBLabH8inXLk;bWq2-5VEwt)h%dK zQ(py&uV=ea_qz$Vv`tU(d8O>APNdPP9qzR*78PZ?E}?N`lz*e@tBk8M-HoFH5BCtL zryq5Z@K75fsqOmu*1|9my>T7;tYmYNWx4dUNi!Ra>d5>4QUw}EdW3K{n|s(NMh+u^ zNX)a{nN`Wr{~Wi$eRz!K%d>|{)oh>{N4zt&X+=4%Q3;lUHqA`p9em-SvSdzC|$BSH2D2Kn@L zXj9@sP{#*mVZE7_`Zqtz>!N6jP^rxj&Ip{?cgL`1NE2ciBaTz%p(nzGCb@STEXPzRjNFtaLS>5pGi=UdEh{S7yEe)sqXZ?(>L)W1NnpIyXZB3M)=Bf?& zzYdopa0Zn{GkBy??dF*rf0Laj9kJCIhp%WdPpFMraZdkB}6}c zSmWzs!i=1qgi#Aw(L$3RTq{(j+uuMzF~c7p5L(Q(W^x9${r+0joi8g$ zt|m)poVo%MW+g|jK$iipn8(ja1Y@8&&F&W+RXCLEFGF*upzC6+OXe@6GLwqrU1#a6 z`5&}OUpG=Vg2{xUXuiL66f%&1_;z6heK#_xNZWShjBV$lQSK>^HpfOx`F@0rj0Gc> zyhDr6)d8Ibi$v3hRON~yk50!{t7a}nlT)>F-58X5rY&2WSr`p}Ml^C}yVO1E82iF(-HI7E z9?>|_j|9K3SIr~e2iZ0W2cM+Ti-Mq9DHcZ3?N$$q$N5csD?hv=(Vx{tG9q^4OI9b} zj5T{8DEr<SOCKm)3b*I@NILBg_5!gUZFOz$Hl! zSWjTAxdqiAB;qV$#k8ssvC0Dsm_JpLfy1sSL&Q;8 zq1~&0_m$%J|JE2_4F_@F7okrnymcay%=rqezuu#U| zYck;l0!#&+SHnVid|G#{CwJz&1327R;k~Ek{e$ahyj~NyL>)0Tp>JEA$gYi16H^yX zz)_A+lw50gYHCvu=mZgB58|y0gy0l~1`ygDXCIyi?`~!<$VhAZ;bZdR988Ca0MrTp z5SNTRPKGT$?%G3XEGmXyzgRGE_+kn}_8#7YYcrkMxYHxAUg}-{(l|=?!;cjQ;17iR zUFD6mB)pt!1~AU$PaG>S1?s;c6iR3i5uickwHiT-h4K30$l0`?uZ0$UGp~9Hk&iXx z@JiWf)sABqWlvRaSRn#4O-o!@tclFdVM1JnCV zym+tAENtPD7iL#nCmdxq!O^}A8cdOaU-?mQ6CkI4N_771EfRyFRfz9k@{09Hd%}ZK zv5Qt!?cAD6;z1mvi#R-3T)$_np}=npp6894>n$DrQKKV7)Ax3Hwh77-%K_T%>C$>t zgHk6msZYs6p9n=Hd|})3w@5hJ>>xpf2wuY*_-RFQx_WAX!m~VZZX(l)$9}#KLvK99 zDCa68ru&g?PyvG^9`1)%=|{M;Ar~i;QKc~B&5-$e=w9iBbF=0#udGJREL?|u@l>E_ zds01cXn!%RAdyUkb=^9D;jn|DYqMK>+kwT8mCdAwNl2-m-cJ0(pZ;)?Igul`FKmWw zD|h@eG%FHu_na;9{mA(>>v*aH6}F=&GQ7a!isWEU}_n{_R zP=Sxs#v0keLO9%pS<+k!7KyGGYJje$T$_YvC(o#lOp@ABXIGjOjl)W%LLv1aZV823 z6FD5Wt9N#la+uL1M-`>7RR39tpMCiQ5i7-5gG-HE98EMqQ7u$fr4nk(-ok9!G&zU5 zvQG&+_AWhpPbjS|rF65(0NUl0#!k|bBIKs-dsU&BBbRfKmA)iak73W5U^^pi!UBEe zyi%<^PDA}G4rX6gx~Q(w#nV*YFelzIu=op)fHo~(?lL`O|2cu((-TyzqpxlnDg0hK z`fm~A-G3DlJ!r@1os{Dyc?c8}*UzRW=8Ybvmn z3GKe}Rwz4Omx0+5u|9V?{(Qj{Ax98bB6^H10wckt ziwSfE6*;2QdIaV18IQtLr+tDkOcRs0a!;V_TX{2==tnQ5WG{YW?+#-Dhr1=2n~L}A zClvUy`C59$*2*BZ6b3g3l5rc%iE#(g3_+K4*a@56gx7F_vJ~&GzOu>iyCTp&qO>0_ zTrUXzObr?&^=El_k+SYu%lUpmt|83$m+fO=D(de9gyU)w7Y8E7oM#tc1G;A zsK>Sz^<%ldR3crsoQK$-$s+=yN?H3}xDr^ajtS6c*Nqz4~H|h6+LOs+`Gq233r{IqIgme~d8B_{W z|M$`%IIpRDAP4u9Bt*s4eobHdE;(=seBy^H=&7E|rFuu#ENfgaiyIq7lp&MlSE@K) zzSJcAS-vah$ugEuYBEXWi&@(*f=Dr~J$lbG%*(HauO{wms!-oTjyHzedLNZK7U<*S z&oVfy@B7HsP^NY*r<=!W9re8_KWu<7%&M5+k&Hy`y(bneF68F`lk7&}jlc{yrPf~M zP3?xGS|UAYxcg3#YZPG-DqrB+3Sxr_A0^`x=_p}f+x|w?1ypO+0{E4H9~u_UVSB-@VZF8ppyE$P{U>AHgESvN9gDMKreZoV}$^&T)=eQBhi<{PX2#ROd}7G^OG;78a2mL)9<^ z?SslU7#1sB(@Aji3;U=O=oxQ)i2`YJbl2u7E^$G_c@$%h5BfZwSZ8{_4?pa?DoJS- zT-@CcjSY+OO&7KNvXO+CUtyKAgh8HT4?thYdl%svD%lT-8hk)1cQcb)&!IjJ-KObvx2Yvmf5*F9MJMW%e*ts4G(l8aCfn1o z`*|iFYY3s{oSCNgU|!c|lFnnSXYE(I{X6&4VB(q+&LE5alYj=KRmvP))Kr6;oH!yR zWYi6D)UUtNk0lCZ*^}bFWrBTwmpbz)CuK^Wdp0U3QQKZ5nH!9@{oO|kP3@{MXE&6P z19x>RoUM9uKHlt)dW4+SJr>ivc4@okMVP))@S59Qdp~zP_E3N z5AXh!E8w$)t+@u>Iqk6#pjAb?qZ;9zfG8)#1-8Yz&tbk#**C!j0%vOqepkBlr~4`)hMkSJfsx-$5P{c2fx_fd*-0Khtsl=DRCub$b2S#RN8TKU z1H#eYW5nX9N%6`RWEr5=W2Vz`5qN&CbE6S?>mLQJN91zK{-*<}#aGgWOH!ZOV5@M7 z&ND34_e0%u@4Mn#k>o2A<6ZVPWmDE|KYn^8CIWf8n?1`_2Fj7`a+h+5he4_elk6~- z?ATeSG1jMsKHaHBvX(5v?vu9t{F>U8?3?tRii_VcNI-#KmmG7>t!}w^cIeUoI*o}2 zJ+<*}qvMygX=7*H31lVb+Ze|C9KIeY0Y)w8S1zeMW@lZmJOEV2f}lPDOS6 zig5^yzTXghP6zqnoQ0JiDS^p~T@_Q!ED~kH4D?Ac2p{(1qj<`d!9ChDw^hrx(j%FM z-s%lV!)1~f#s}^k7p_`!tQCc(6+;qIiOf62-Q${ zXrz?K9|!)*Rxs*}aExv}ae{IBWd1IZIN<$+j{b7cE3e(QMB6M9SUv=q+2V~OCs(Bd z+Dl2D&;1LoWz&02QV|NNaOsqMhZiP}62fF{q10 zaJ!9~A;~qqA0M!5aulY|W($N>C^iQ+O~w^1DAZN-F!DA!bM}eHw+1l+3Aq`-oh)aV zF~dvO%-tcAcqEjKIm0(w!~?yzKY>VW4p(3!7iD>D$MsJ7pnkvgr(G%c9w!~+YvD`@ zkBi+KY_1DJwzM^X!5fKkLLtQmUab8Ld6Um#Os#QfCw#YLM@62T3p%pglYwn2=h<{s zHq&cm6`#zNz8yy1+5bW=h~N5{xbMd1vRr;xaAO+B8lAz9&8$Y=FTf3#;1BVcbj z@q@fR4FwJe)M_>49ppr_Efn9qD~g`fSgl9Q>K8#QcEA!37VU1@=a}kZj`RBjv*_Nf zZ_`!TxLB3nl9?5NROlohAEQ7i6E3ohjXKYXg1Zw6vGJv9W*A0NzmdEsb~2`C?Iw903N?PUwPhkA&Chiq+dA|6j zfXC6vZO)_gHk#qaNZh`A&R2mJ<;3_%)8pn)oMLS+N}{? z{75i75q^KxdT%-Mne*)$MzG!J*g{6$ov=JM(FTduxmTvhEioM*KbOSnfJ5aDB0i1U z96Z~*Cs)N>oY3e;%v%R~T-6y`lxzB3TfUp$sWTB7z4Mi9Wgf&^A{i$syMdWqHTk%? zj9As?C<}z8A--O}ui*Lhjw~)6pudF)s;_V0(&epFthkzZwfFwL;rM^qy8vn|0J3rb zfKmTz76X#>>E(%djM$j~gsBmLgf`*?IMIwohJeS;$bgB>gn`2VWM~5Tra4SNfWl@3 z;7Qp5FfkiIV+H|aT{bpmfFcao?-Yn=ft^ZvB>?_SuL|IfSpY^etDzBqRb~bma+)wO zGZ>i|KU+3g*x3z$EFU0$k%bk2t}+7VIaWph)633mz-DX+92o(yUKU_QH30VdPh+Ml zz`;fUvOPEvfT+X&89>$vE8zA95&cU8yuW`wBeMCI2CV#-29T>?YJY-pUd#nrI{%u> z2!K}qn)@8t^Kvfxvu*Hs{Btdg&(Q@hG$5`KXkh-&Iv}d?ulrzO0NUVR)B$Yczw4NR z+?1Df&z<)#HMZv_?dS2&^#SgDz(Mdr;{ZJKFEn;0pl|o3#`gF34CE%fTo35+1%jgf zb1uLf2R!~SGn}7;4j}gJrS=@Q z^q(5=h@Ml5p2xq=3uu59|E0$A*E0v`(=30j_a}z##atkV^shQL79bk*KkJx*#It{^ zb8;{M8twsZIsnQ3lW+TMssI}Am27RD{$NLd<3H{UAfb$2QiMpG2QbW Date: Fri, 28 May 2021 20:31:43 +0800 Subject: [PATCH 02/14] format code to pass flake8 check --- pdfminer/pdffont.py | 163 ++++++++++++++++++++++---------------------- 1 file changed, 82 insertions(+), 81 deletions(-) diff --git a/pdfminer/pdffont.py b/pdfminer/pdffont.py index 7c3fb1e0..277cd453 100644 --- a/pdfminer/pdffont.py +++ b/pdfminer/pdffont.py @@ -180,7 +180,7 @@ def getdict(data): value = b1 << 8 | b2 else: value = b1 << 24 | b2 << 16 | \ - struct.unpack('>H', fp.read(2))[0] + struct.unpack('>H', fp.read(2))[0] stack.append(value) return d @@ -188,84 +188,84 @@ def getdict(data): class CFFFont: STANDARD_STRINGS = ( - '.notdef', 'space', 'exclam', 'quotedbl', 'numbersign', - 'dollar', 'percent', 'ampersand', 'quoteright', 'parenleft', - 'parenright', 'asterisk', 'plus', 'comma', 'hyphen', 'period', - 'slash', 'zero', 'one', 'two', 'three', 'four', 'five', 'six', - 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal', - 'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G', - 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', - 'U', 'V', 'W', 'X', 'Y', 'Z', 'bracketleft', 'backslash', - 'bracketright', 'asciicircum', 'underscore', 'quoteleft', 'a', - 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', - 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', - 'braceleft', 'bar', 'braceright', 'asciitilde', 'exclamdown', - 'cent', 'sterling', 'fraction', 'yen', 'florin', 'section', - 'currency', 'quotesingle', 'quotedblleft', 'guillemotleft', - 'guilsinglleft', 'guilsinglright', 'fi', 'fl', 'endash', - 'dagger', 'daggerdbl', 'periodcentered', 'paragraph', 'bullet', - 'quotesinglbase', 'quotedblbase', 'quotedblright', - 'guillemotright', 'ellipsis', 'perthousand', 'questiondown', - 'grave', 'acute', 'circumflex', 'tilde', 'macron', 'breve', - 'dotaccent', 'dieresis', 'ring', 'cedilla', 'hungarumlaut', - 'ogonek', 'caron', 'emdash', 'AE', 'ordfeminine', 'Lslash', - 'Oslash', 'OE', 'ordmasculine', 'ae', 'dotlessi', 'lslash', - 'oslash', 'oe', 'germandbls', 'onesuperior', 'logicalnot', 'mu', - 'trademark', 'Eth', 'onehalf', 'plusminus', 'Thorn', - 'onequarter', 'divide', 'brokenbar', 'degree', 'thorn', - 'threequarters', 'twosuperior', 'registered', 'minus', 'eth', - 'multiply', 'threesuperior', 'copyright', 'Aacute', - 'Acircumflex', 'Adieresis', 'Agrave', 'Aring', 'Atilde', - 'Ccedilla', 'Eacute', 'Ecircumflex', 'Edieresis', 'Egrave', - 'Iacute', 'Icircumflex', 'Idieresis', 'Igrave', 'Ntilde', - 'Oacute', 'Ocircumflex', 'Odieresis', 'Ograve', 'Otilde', - 'Scaron', 'Uacute', 'Ucircumflex', 'Udieresis', 'Ugrave', - 'Yacute', 'Ydieresis', 'Zcaron', 'aacute', 'acircumflex', - 'adieresis', 'agrave', 'aring', 'atilde', 'ccedilla', 'eacute', - 'ecircumflex', 'edieresis', 'egrave', 'iacute', 'icircumflex', - 'idieresis', 'igrave', 'ntilde', 'oacute', 'ocircumflex', - 'odieresis', 'ograve', 'otilde', 'scaron', 'uacute', - 'ucircumflex', 'udieresis', 'ugrave', 'yacute', 'ydieresis', - 'zcaron', 'exclamsmall', 'Hungarumlautsmall', 'dollaroldstyle', - 'dollarsuperior', 'ampersandsmall', 'Acutesmall', - 'parenleftsuperior', 'parenrightsuperior', 'twodotenleader', - 'onedotenleader', 'zerooldstyle', 'oneoldstyle', 'twooldstyle', - 'threeoldstyle', 'fouroldstyle', 'fiveoldstyle', 'sixoldstyle', - 'sevenoldstyle', 'eightoldstyle', 'nineoldstyle', - 'commasuperior', 'threequartersemdash', 'periodsuperior', - 'questionsmall', 'asuperior', 'bsuperior', 'centsuperior', - 'dsuperior', 'esuperior', 'isuperior', 'lsuperior', 'msuperior', - 'nsuperior', 'osuperior', 'rsuperior', 'ssuperior', 'tsuperior', - 'ff', 'ffi', 'ffl', 'parenleftinferior', 'parenrightinferior', - 'Circumflexsmall', 'hyphensuperior', 'Gravesmall', 'Asmall', - 'Bsmall', 'Csmall', 'Dsmall', 'Esmall', 'Fsmall', 'Gsmall', - 'Hsmall', 'Ismall', 'Jsmall', 'Ksmall', 'Lsmall', 'Msmall', - 'Nsmall', 'Osmall', 'Psmall', 'Qsmall', 'Rsmall', 'Ssmall', - 'Tsmall', 'Usmall', 'Vsmall', 'Wsmall', 'Xsmall', 'Ysmall', - 'Zsmall', 'colonmonetary', 'onefitted', 'rupiah', 'Tildesmall', - 'exclamdownsmall', 'centoldstyle', 'Lslashsmall', 'Scaronsmall', - 'Zcaronsmall', 'Dieresissmall', 'Brevesmall', 'Caronsmall', - 'Dotaccentsmall', 'Macronsmall', 'figuredash', 'hypheninferior', - 'Ogoneksmall', 'Ringsmall', 'Cedillasmall', 'questiondownsmall', - 'oneeighth', 'threeeighths', 'fiveeighths', 'seveneighths', - 'onethird', 'twothirds', 'zerosuperior', 'foursuperior', - 'fivesuperior', 'sixsuperior', 'sevensuperior', 'eightsuperior', - 'ninesuperior', 'zeroinferior', 'oneinferior', 'twoinferior', - 'threeinferior', 'fourinferior', 'fiveinferior', 'sixinferior', - 'seveninferior', 'eightinferior', 'nineinferior', - 'centinferior', 'dollarinferior', 'periodinferior', - 'commainferior', 'Agravesmall', 'Aacutesmall', - 'Acircumflexsmall', 'Atildesmall', 'Adieresissmall', - 'Aringsmall', 'AEsmall', 'Ccedillasmall', 'Egravesmall', - 'Eacutesmall', 'Ecircumflexsmall', 'Edieresissmall', - 'Igravesmall', 'Iacutesmall', 'Icircumflexsmall', - 'Idieresissmall', 'Ethsmall', 'Ntildesmall', 'Ogravesmall', - 'Oacutesmall', 'Ocircumflexsmall', 'Otildesmall', - 'Odieresissmall', 'OEsmall', 'Oslashsmall', 'Ugravesmall', - 'Uacutesmall', 'Ucircumflexsmall', 'Udieresissmall', - 'Yacutesmall', 'Thornsmall', 'Ydieresissmall', '001.000', - '001.001', '001.002', '001.003', 'Black', 'Bold', 'Book', - 'Light', 'Medium', 'Regular', 'Roman', 'Semibold', + '.notdef', 'space', 'exclam', 'quotedbl', 'numbersign', + 'dollar', 'percent', 'ampersand', 'quoteright', 'parenleft', + 'parenright', 'asterisk', 'plus', 'comma', 'hyphen', 'period', + 'slash', 'zero', 'one', 'two', 'three', 'four', 'five', 'six', + 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal', + 'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G', + 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', + 'U', 'V', 'W', 'X', 'Y', 'Z', 'bracketleft', 'backslash', + 'bracketright', 'asciicircum', 'underscore', 'quoteleft', 'a', + 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', + 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + 'braceleft', 'bar', 'braceright', 'asciitilde', 'exclamdown', + 'cent', 'sterling', 'fraction', 'yen', 'florin', 'section', + 'currency', 'quotesingle', 'quotedblleft', 'guillemotleft', + 'guilsinglleft', 'guilsinglright', 'fi', 'fl', 'endash', + 'dagger', 'daggerdbl', 'periodcentered', 'paragraph', 'bullet', + 'quotesinglbase', 'quotedblbase', 'quotedblright', + 'guillemotright', 'ellipsis', 'perthousand', 'questiondown', + 'grave', 'acute', 'circumflex', 'tilde', 'macron', 'breve', + 'dotaccent', 'dieresis', 'ring', 'cedilla', 'hungarumlaut', + 'ogonek', 'caron', 'emdash', 'AE', 'ordfeminine', 'Lslash', + 'Oslash', 'OE', 'ordmasculine', 'ae', 'dotlessi', 'lslash', + 'oslash', 'oe', 'germandbls', 'onesuperior', 'logicalnot', 'mu', + 'trademark', 'Eth', 'onehalf', 'plusminus', 'Thorn', + 'onequarter', 'divide', 'brokenbar', 'degree', 'thorn', + 'threequarters', 'twosuperior', 'registered', 'minus', 'eth', + 'multiply', 'threesuperior', 'copyright', 'Aacute', + 'Acircumflex', 'Adieresis', 'Agrave', 'Aring', 'Atilde', + 'Ccedilla', 'Eacute', 'Ecircumflex', 'Edieresis', 'Egrave', + 'Iacute', 'Icircumflex', 'Idieresis', 'Igrave', 'Ntilde', + 'Oacute', 'Ocircumflex', 'Odieresis', 'Ograve', 'Otilde', + 'Scaron', 'Uacute', 'Ucircumflex', 'Udieresis', 'Ugrave', + 'Yacute', 'Ydieresis', 'Zcaron', 'aacute', 'acircumflex', + 'adieresis', 'agrave', 'aring', 'atilde', 'ccedilla', 'eacute', + 'ecircumflex', 'edieresis', 'egrave', 'iacute', 'icircumflex', + 'idieresis', 'igrave', 'ntilde', 'oacute', 'ocircumflex', + 'odieresis', 'ograve', 'otilde', 'scaron', 'uacute', + 'ucircumflex', 'udieresis', 'ugrave', 'yacute', 'ydieresis', + 'zcaron', 'exclamsmall', 'Hungarumlautsmall', 'dollaroldstyle', + 'dollarsuperior', 'ampersandsmall', 'Acutesmall', + 'parenleftsuperior', 'parenrightsuperior', 'twodotenleader', + 'onedotenleader', 'zerooldstyle', 'oneoldstyle', 'twooldstyle', + 'threeoldstyle', 'fouroldstyle', 'fiveoldstyle', 'sixoldstyle', + 'sevenoldstyle', 'eightoldstyle', 'nineoldstyle', + 'commasuperior', 'threequartersemdash', 'periodsuperior', + 'questionsmall', 'asuperior', 'bsuperior', 'centsuperior', + 'dsuperior', 'esuperior', 'isuperior', 'lsuperior', 'msuperior', + 'nsuperior', 'osuperior', 'rsuperior', 'ssuperior', 'tsuperior', + 'ff', 'ffi', 'ffl', 'parenleftinferior', 'parenrightinferior', + 'Circumflexsmall', 'hyphensuperior', 'Gravesmall', 'Asmall', + 'Bsmall', 'Csmall', 'Dsmall', 'Esmall', 'Fsmall', 'Gsmall', + 'Hsmall', 'Ismall', 'Jsmall', 'Ksmall', 'Lsmall', 'Msmall', + 'Nsmall', 'Osmall', 'Psmall', 'Qsmall', 'Rsmall', 'Ssmall', + 'Tsmall', 'Usmall', 'Vsmall', 'Wsmall', 'Xsmall', 'Ysmall', + 'Zsmall', 'colonmonetary', 'onefitted', 'rupiah', 'Tildesmall', + 'exclamdownsmall', 'centoldstyle', 'Lslashsmall', 'Scaronsmall', + 'Zcaronsmall', 'Dieresissmall', 'Brevesmall', 'Caronsmall', + 'Dotaccentsmall', 'Macronsmall', 'figuredash', 'hypheninferior', + 'Ogoneksmall', 'Ringsmall', 'Cedillasmall', 'questiondownsmall', + 'oneeighth', 'threeeighths', 'fiveeighths', 'seveneighths', + 'onethird', 'twothirds', 'zerosuperior', 'foursuperior', + 'fivesuperior', 'sixsuperior', 'sevensuperior', 'eightsuperior', + 'ninesuperior', 'zeroinferior', 'oneinferior', 'twoinferior', + 'threeinferior', 'fourinferior', 'fiveinferior', 'sixinferior', + 'seveninferior', 'eightinferior', 'nineinferior', + 'centinferior', 'dollarinferior', 'periodinferior', + 'commainferior', 'Agravesmall', 'Aacutesmall', + 'Acircumflexsmall', 'Atildesmall', 'Adieresissmall', + 'Aringsmall', 'AEsmall', 'Ccedillasmall', 'Egravesmall', + 'Eacutesmall', 'Ecircumflexsmall', 'Edieresissmall', + 'Igravesmall', 'Iacutesmall', 'Icircumflexsmall', + 'Idieresissmall', 'Ethsmall', 'Ntildesmall', 'Ogravesmall', + 'Oacutesmall', 'Ocircumflexsmall', 'Otildesmall', + 'Odieresissmall', 'OEsmall', 'Oslashsmall', 'Ugravesmall', + 'Uacutesmall', 'Ucircumflexsmall', 'Udieresissmall', + 'Yacutesmall', 'Thornsmall', 'Ydieresissmall', '001.000', + '001.001', '001.002', '001.003', 'Black', 'Bold', 'Book', + 'Light', 'Medium', 'Regular', 'Roman', 'Semibold', ) class INDEX: @@ -691,8 +691,9 @@ def __init__(self, rsrcmgr, spec, strict=settings.STRICT): else: cmap_name = literal_name(spec['ToUnicode']) encoding = literal_name(spec['Encoding']) - if cid_ordering.find('Identity') > -1 or cmap_name.find('Identity') > -1 \ - or encoding.find('Identity') > -1: + if any([cid_ordering.find('Identity') > -1, + cmap_name.find('Identity') > -1, + encoding.find('Identity') > -1]): self.unicode_map = self.cmap elif self.cidcoding in ('Adobe-Identity', 'Adobe-UCS'): if ttf: From 2a97bde50c44bf72200efba4aad77291496b90e9 Mon Sep 17 00:00:00 2001 From: Pieter Marsman Date: Wed, 13 Oct 2021 20:21:27 +0200 Subject: [PATCH 03/14] Remove indent --- pdfminer/pdffont.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pdfminer/pdffont.py b/pdfminer/pdffont.py index 277cd453..3679ae49 100644 --- a/pdfminer/pdffont.py +++ b/pdfminer/pdffont.py @@ -180,7 +180,7 @@ def getdict(data): value = b1 << 8 | b2 else: value = b1 << 24 | b2 << 16 | \ - struct.unpack('>H', fp.read(2))[0] + struct.unpack('>H', fp.read(2))[0] stack.append(value) return d From 9f7763cbeef00c18bf42f58c7adf665589b62858 Mon Sep 17 00:00:00 2001 From: Pieter Marsman Date: Wed, 13 Oct 2021 20:22:49 +0200 Subject: [PATCH 04/14] Remove indent --- pdfminer/pdffont.py | 156 ++++++++++++++++++++++---------------------- 1 file changed, 78 insertions(+), 78 deletions(-) diff --git a/pdfminer/pdffont.py b/pdfminer/pdffont.py index 3679ae49..fd543c74 100644 --- a/pdfminer/pdffont.py +++ b/pdfminer/pdffont.py @@ -188,84 +188,84 @@ def getdict(data): class CFFFont: STANDARD_STRINGS = ( - '.notdef', 'space', 'exclam', 'quotedbl', 'numbersign', - 'dollar', 'percent', 'ampersand', 'quoteright', 'parenleft', - 'parenright', 'asterisk', 'plus', 'comma', 'hyphen', 'period', - 'slash', 'zero', 'one', 'two', 'three', 'four', 'five', 'six', - 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal', - 'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G', - 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', - 'U', 'V', 'W', 'X', 'Y', 'Z', 'bracketleft', 'backslash', - 'bracketright', 'asciicircum', 'underscore', 'quoteleft', 'a', - 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', - 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', - 'braceleft', 'bar', 'braceright', 'asciitilde', 'exclamdown', - 'cent', 'sterling', 'fraction', 'yen', 'florin', 'section', - 'currency', 'quotesingle', 'quotedblleft', 'guillemotleft', - 'guilsinglleft', 'guilsinglright', 'fi', 'fl', 'endash', - 'dagger', 'daggerdbl', 'periodcentered', 'paragraph', 'bullet', - 'quotesinglbase', 'quotedblbase', 'quotedblright', - 'guillemotright', 'ellipsis', 'perthousand', 'questiondown', - 'grave', 'acute', 'circumflex', 'tilde', 'macron', 'breve', - 'dotaccent', 'dieresis', 'ring', 'cedilla', 'hungarumlaut', - 'ogonek', 'caron', 'emdash', 'AE', 'ordfeminine', 'Lslash', - 'Oslash', 'OE', 'ordmasculine', 'ae', 'dotlessi', 'lslash', - 'oslash', 'oe', 'germandbls', 'onesuperior', 'logicalnot', 'mu', - 'trademark', 'Eth', 'onehalf', 'plusminus', 'Thorn', - 'onequarter', 'divide', 'brokenbar', 'degree', 'thorn', - 'threequarters', 'twosuperior', 'registered', 'minus', 'eth', - 'multiply', 'threesuperior', 'copyright', 'Aacute', - 'Acircumflex', 'Adieresis', 'Agrave', 'Aring', 'Atilde', - 'Ccedilla', 'Eacute', 'Ecircumflex', 'Edieresis', 'Egrave', - 'Iacute', 'Icircumflex', 'Idieresis', 'Igrave', 'Ntilde', - 'Oacute', 'Ocircumflex', 'Odieresis', 'Ograve', 'Otilde', - 'Scaron', 'Uacute', 'Ucircumflex', 'Udieresis', 'Ugrave', - 'Yacute', 'Ydieresis', 'Zcaron', 'aacute', 'acircumflex', - 'adieresis', 'agrave', 'aring', 'atilde', 'ccedilla', 'eacute', - 'ecircumflex', 'edieresis', 'egrave', 'iacute', 'icircumflex', - 'idieresis', 'igrave', 'ntilde', 'oacute', 'ocircumflex', - 'odieresis', 'ograve', 'otilde', 'scaron', 'uacute', - 'ucircumflex', 'udieresis', 'ugrave', 'yacute', 'ydieresis', - 'zcaron', 'exclamsmall', 'Hungarumlautsmall', 'dollaroldstyle', - 'dollarsuperior', 'ampersandsmall', 'Acutesmall', - 'parenleftsuperior', 'parenrightsuperior', 'twodotenleader', - 'onedotenleader', 'zerooldstyle', 'oneoldstyle', 'twooldstyle', - 'threeoldstyle', 'fouroldstyle', 'fiveoldstyle', 'sixoldstyle', - 'sevenoldstyle', 'eightoldstyle', 'nineoldstyle', - 'commasuperior', 'threequartersemdash', 'periodsuperior', - 'questionsmall', 'asuperior', 'bsuperior', 'centsuperior', - 'dsuperior', 'esuperior', 'isuperior', 'lsuperior', 'msuperior', - 'nsuperior', 'osuperior', 'rsuperior', 'ssuperior', 'tsuperior', - 'ff', 'ffi', 'ffl', 'parenleftinferior', 'parenrightinferior', - 'Circumflexsmall', 'hyphensuperior', 'Gravesmall', 'Asmall', - 'Bsmall', 'Csmall', 'Dsmall', 'Esmall', 'Fsmall', 'Gsmall', - 'Hsmall', 'Ismall', 'Jsmall', 'Ksmall', 'Lsmall', 'Msmall', - 'Nsmall', 'Osmall', 'Psmall', 'Qsmall', 'Rsmall', 'Ssmall', - 'Tsmall', 'Usmall', 'Vsmall', 'Wsmall', 'Xsmall', 'Ysmall', - 'Zsmall', 'colonmonetary', 'onefitted', 'rupiah', 'Tildesmall', - 'exclamdownsmall', 'centoldstyle', 'Lslashsmall', 'Scaronsmall', - 'Zcaronsmall', 'Dieresissmall', 'Brevesmall', 'Caronsmall', - 'Dotaccentsmall', 'Macronsmall', 'figuredash', 'hypheninferior', - 'Ogoneksmall', 'Ringsmall', 'Cedillasmall', 'questiondownsmall', - 'oneeighth', 'threeeighths', 'fiveeighths', 'seveneighths', - 'onethird', 'twothirds', 'zerosuperior', 'foursuperior', - 'fivesuperior', 'sixsuperior', 'sevensuperior', 'eightsuperior', - 'ninesuperior', 'zeroinferior', 'oneinferior', 'twoinferior', - 'threeinferior', 'fourinferior', 'fiveinferior', 'sixinferior', - 'seveninferior', 'eightinferior', 'nineinferior', - 'centinferior', 'dollarinferior', 'periodinferior', - 'commainferior', 'Agravesmall', 'Aacutesmall', - 'Acircumflexsmall', 'Atildesmall', 'Adieresissmall', - 'Aringsmall', 'AEsmall', 'Ccedillasmall', 'Egravesmall', - 'Eacutesmall', 'Ecircumflexsmall', 'Edieresissmall', - 'Igravesmall', 'Iacutesmall', 'Icircumflexsmall', - 'Idieresissmall', 'Ethsmall', 'Ntildesmall', 'Ogravesmall', - 'Oacutesmall', 'Ocircumflexsmall', 'Otildesmall', - 'Odieresissmall', 'OEsmall', 'Oslashsmall', 'Ugravesmall', - 'Uacutesmall', 'Ucircumflexsmall', 'Udieresissmall', - 'Yacutesmall', 'Thornsmall', 'Ydieresissmall', '001.000', - '001.001', '001.002', '001.003', 'Black', 'Bold', 'Book', - 'Light', 'Medium', 'Regular', 'Roman', 'Semibold', + '.notdef', 'space', 'exclam', 'quotedbl', 'numbersign', + 'dollar', 'percent', 'ampersand', 'quoteright', 'parenleft', + 'parenright', 'asterisk', 'plus', 'comma', 'hyphen', 'period', + 'slash', 'zero', 'one', 'two', 'three', 'four', 'five', 'six', + 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal', + 'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G', + 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', + 'U', 'V', 'W', 'X', 'Y', 'Z', 'bracketleft', 'backslash', + 'bracketright', 'asciicircum', 'underscore', 'quoteleft', 'a', + 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', + 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + 'braceleft', 'bar', 'braceright', 'asciitilde', 'exclamdown', + 'cent', 'sterling', 'fraction', 'yen', 'florin', 'section', + 'currency', 'quotesingle', 'quotedblleft', 'guillemotleft', + 'guilsinglleft', 'guilsinglright', 'fi', 'fl', 'endash', + 'dagger', 'daggerdbl', 'periodcentered', 'paragraph', 'bullet', + 'quotesinglbase', 'quotedblbase', 'quotedblright', + 'guillemotright', 'ellipsis', 'perthousand', 'questiondown', + 'grave', 'acute', 'circumflex', 'tilde', 'macron', 'breve', + 'dotaccent', 'dieresis', 'ring', 'cedilla', 'hungarumlaut', + 'ogonek', 'caron', 'emdash', 'AE', 'ordfeminine', 'Lslash', + 'Oslash', 'OE', 'ordmasculine', 'ae', 'dotlessi', 'lslash', + 'oslash', 'oe', 'germandbls', 'onesuperior', 'logicalnot', 'mu', + 'trademark', 'Eth', 'onehalf', 'plusminus', 'Thorn', + 'onequarter', 'divide', 'brokenbar', 'degree', 'thorn', + 'threequarters', 'twosuperior', 'registered', 'minus', 'eth', + 'multiply', 'threesuperior', 'copyright', 'Aacute', + 'Acircumflex', 'Adieresis', 'Agrave', 'Aring', 'Atilde', + 'Ccedilla', 'Eacute', 'Ecircumflex', 'Edieresis', 'Egrave', + 'Iacute', 'Icircumflex', 'Idieresis', 'Igrave', 'Ntilde', + 'Oacute', 'Ocircumflex', 'Odieresis', 'Ograve', 'Otilde', + 'Scaron', 'Uacute', 'Ucircumflex', 'Udieresis', 'Ugrave', + 'Yacute', 'Ydieresis', 'Zcaron', 'aacute', 'acircumflex', + 'adieresis', 'agrave', 'aring', 'atilde', 'ccedilla', 'eacute', + 'ecircumflex', 'edieresis', 'egrave', 'iacute', 'icircumflex', + 'idieresis', 'igrave', 'ntilde', 'oacute', 'ocircumflex', + 'odieresis', 'ograve', 'otilde', 'scaron', 'uacute', + 'ucircumflex', 'udieresis', 'ugrave', 'yacute', 'ydieresis', + 'zcaron', 'exclamsmall', 'Hungarumlautsmall', 'dollaroldstyle', + 'dollarsuperior', 'ampersandsmall', 'Acutesmall', + 'parenleftsuperior', 'parenrightsuperior', 'twodotenleader', + 'onedotenleader', 'zerooldstyle', 'oneoldstyle', 'twooldstyle', + 'threeoldstyle', 'fouroldstyle', 'fiveoldstyle', 'sixoldstyle', + 'sevenoldstyle', 'eightoldstyle', 'nineoldstyle', + 'commasuperior', 'threequartersemdash', 'periodsuperior', + 'questionsmall', 'asuperior', 'bsuperior', 'centsuperior', + 'dsuperior', 'esuperior', 'isuperior', 'lsuperior', 'msuperior', + 'nsuperior', 'osuperior', 'rsuperior', 'ssuperior', 'tsuperior', + 'ff', 'ffi', 'ffl', 'parenleftinferior', 'parenrightinferior', + 'Circumflexsmall', 'hyphensuperior', 'Gravesmall', 'Asmall', + 'Bsmall', 'Csmall', 'Dsmall', 'Esmall', 'Fsmall', 'Gsmall', + 'Hsmall', 'Ismall', 'Jsmall', 'Ksmall', 'Lsmall', 'Msmall', + 'Nsmall', 'Osmall', 'Psmall', 'Qsmall', 'Rsmall', 'Ssmall', + 'Tsmall', 'Usmall', 'Vsmall', 'Wsmall', 'Xsmall', 'Ysmall', + 'Zsmall', 'colonmonetary', 'onefitted', 'rupiah', 'Tildesmall', + 'exclamdownsmall', 'centoldstyle', 'Lslashsmall', 'Scaronsmall', + 'Zcaronsmall', 'Dieresissmall', 'Brevesmall', 'Caronsmall', + 'Dotaccentsmall', 'Macronsmall', 'figuredash', 'hypheninferior', + 'Ogoneksmall', 'Ringsmall', 'Cedillasmall', 'questiondownsmall', + 'oneeighth', 'threeeighths', 'fiveeighths', 'seveneighths', + 'onethird', 'twothirds', 'zerosuperior', 'foursuperior', + 'fivesuperior', 'sixsuperior', 'sevensuperior', 'eightsuperior', + 'ninesuperior', 'zeroinferior', 'oneinferior', 'twoinferior', + 'threeinferior', 'fourinferior', 'fiveinferior', 'sixinferior', + 'seveninferior', 'eightinferior', 'nineinferior', + 'centinferior', 'dollarinferior', 'periodinferior', + 'commainferior', 'Agravesmall', 'Aacutesmall', + 'Acircumflexsmall', 'Atildesmall', 'Adieresissmall', + 'Aringsmall', 'AEsmall', 'Ccedillasmall', 'Egravesmall', + 'Eacutesmall', 'Ecircumflexsmall', 'Edieresissmall', + 'Igravesmall', 'Iacutesmall', 'Icircumflexsmall', + 'Idieresissmall', 'Ethsmall', 'Ntildesmall', 'Ogravesmall', + 'Oacutesmall', 'Ocircumflexsmall', 'Otildesmall', + 'Odieresissmall', 'OEsmall', 'Oslashsmall', 'Ugravesmall', + 'Uacutesmall', 'Ucircumflexsmall', 'Udieresissmall', + 'Yacutesmall', 'Thornsmall', 'Ydieresissmall', '001.000', + '001.001', '001.002', '001.003', 'Black', 'Bold', 'Book', + 'Light', 'Medium', 'Regular', 'Roman', 'Semibold', ) class INDEX: From 36e90cef51ebcee3a192e73c6ca88efad60bcff8 Mon Sep 17 00:00:00 2001 From: Pieter Marsman Date: Wed, 13 Oct 2021 20:24:40 +0200 Subject: [PATCH 05/14] Use isinstance instead of type check --- pdfminer/pdffont.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pdfminer/pdffont.py b/pdfminer/pdffont.py index fd543c74..f0394f87 100644 --- a/pdfminer/pdffont.py +++ b/pdfminer/pdffont.py @@ -684,7 +684,7 @@ def __init__(self, rsrcmgr, spec, strict=settings.STRICT): BytesIO(self.fontfile.get_data())) self.unicode_map = None if 'ToUnicode' in spec: - if type(spec['ToUnicode']) is PDFStream: + if isinstance(spec['ToUnicode'], PDFStream): strm = stream_value(spec['ToUnicode']) self.unicode_map = FileUnicodeMap() CMapParser(self.unicode_map, BytesIO(strm.get_data())).run() From dd68ae6d766a8827d0339f0b19641883d4a0746c Mon Sep 17 00:00:00 2001 From: Pieter Marsman Date: Wed, 13 Oct 2021 20:26:43 +0200 Subject: [PATCH 06/14] Use or instead of any --- pdfminer/pdffont.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pdfminer/pdffont.py b/pdfminer/pdffont.py index f0394f87..a4dc9511 100644 --- a/pdfminer/pdffont.py +++ b/pdfminer/pdffont.py @@ -691,9 +691,9 @@ def __init__(self, rsrcmgr, spec, strict=settings.STRICT): else: cmap_name = literal_name(spec['ToUnicode']) encoding = literal_name(spec['Encoding']) - if any([cid_ordering.find('Identity') > -1, - cmap_name.find('Identity') > -1, - encoding.find('Identity') > -1]): + if cid_ordering.find('Identity') > -1 \ + or cmap_name.find('Identity') > -1 \ + or encoding.find('Identity') > -1: self.unicode_map = self.cmap elif self.cidcoding in ('Adobe-Identity', 'Adobe-UCS'): if ttf: From a59a4751394b3413a93a439cd7363d1c4d8a2859 Mon Sep 17 00:00:00 2001 From: Pieter Marsman Date: Wed, 13 Oct 2021 20:28:32 +0200 Subject: [PATCH 07/14] Use str in variable, instead of str.find() --- pdfminer/pdffont.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pdfminer/pdffont.py b/pdfminer/pdffont.py index a4dc9511..36f7d50a 100644 --- a/pdfminer/pdffont.py +++ b/pdfminer/pdffont.py @@ -691,9 +691,9 @@ def __init__(self, rsrcmgr, spec, strict=settings.STRICT): else: cmap_name = literal_name(spec['ToUnicode']) encoding = literal_name(spec['Encoding']) - if cid_ordering.find('Identity') > -1 \ - or cmap_name.find('Identity') > -1 \ - or encoding.find('Identity') > -1: + if 'Identity' in cid_ordering \ + or 'Identity' in cmap_name \ + or 'Identity' in encoding: self.unicode_map = self.cmap elif self.cidcoding in ('Adobe-Identity', 'Adobe-UCS'): if ttf: From 9795edb4f66debec1fa87e270b5a8085c0b3d327 Mon Sep 17 00:00:00 2001 From: Pieter Marsman Date: Wed, 13 Oct 2021 21:12:57 +0200 Subject: [PATCH 08/14] Fix mypy error: add typing annotations to get_unichr() --- pdfminer/cmapdb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pdfminer/cmapdb.py b/pdfminer/cmapdb.py index eff20046..3dc4cdeb 100644 --- a/pdfminer/cmapdb.py +++ b/pdfminer/cmapdb.py @@ -122,7 +122,7 @@ def dump(self, out: TextIO = sys.stdout, class IdentityCMap(CMapBase): - def get_unichr(self, cid): + def get_unichr(self, cid: int) -> str: log.debug('get_unichr: %r, %r', self, cid) return chr(cid) From 05ca3a28691cd3765ebc62fa8ddd2737dab0b60f Mon Sep 17 00:00:00 2001 From: Pieter Marsman Date: Wed, 13 Oct 2021 21:23:14 +0200 Subject: [PATCH 09/14] Fix type of PDFCIDFont. Can be any type of CMapBase. This is a quick fix, the entire cmap structure does not have proper inheritance. --- pdfminer/cmapdb.py | 6 +++++- pdfminer/pdffont.py | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pdfminer/cmapdb.py b/pdfminer/cmapdb.py index 3dc4cdeb..1ce012c8 100644 --- a/pdfminer/cmapdb.py +++ b/pdfminer/cmapdb.py @@ -16,6 +16,7 @@ import pickle as pickle import struct import logging +from abc import ABC, abstractmethod from typing import (Any, BinaryIO, Dict, Iterable, Iterator, List, MutableMapping, Optional, TextIO, Tuple, Union, cast) from .psparser import PSStackParser @@ -37,7 +38,7 @@ class CMapError(Exception): pass -class CMapBase: +class CMapBase(ABC): debug = 0 @@ -59,6 +60,9 @@ def add_cid2unichr(self, cid: int, code: Union[PSLiteral, bytes, int] ) -> None: return + def get_unichr(self, cid: int) -> str: + return f'(cid:{cid})' + def use_cmap(self, cmap: "CMapBase") -> None: return diff --git a/pdfminer/pdffont.py b/pdfminer/pdffont.py index 9568b54a..153c97d4 100644 --- a/pdfminer/pdffont.py +++ b/pdfminer/pdffont.py @@ -761,7 +761,7 @@ def __init__( self.fontfile = stream_value(descriptor.get('FontFile2')) ttf = TrueTypeFont(self.basefont, BytesIO(self.fontfile.get_data())) - self.unicode_map: Optional[UnicodeMap] = None + self.unicode_map: Optional[CMapBase] = None if 'ToUnicode' in spec: if isinstance(spec['ToUnicode'], PDFStream): strm = stream_value(spec['ToUnicode']) From 62a5791ae8cd66f7283c319fae173bf36e18b80c Mon Sep 17 00:00:00 2001 From: Pieter Marsman Date: Wed, 13 Oct 2021 21:41:32 +0200 Subject: [PATCH 10/14] Added line to CHANGELOG.md --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f0b8c6a8..a580bca6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,11 @@ All notable changes in pdfminer.six will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). +## [Unreleased] + +### Added +- Support for identity cmap's ([#626](https://github.com/pdfminer/pdfminer.six/pull/626)) + ## [20211012] ### Added From 9f26d81cb254fc95b3eedc3b77002473eb25d123 Mon Sep 17 00:00:00 2001 From: Pieter Marsman Date: Wed, 13 Oct 2021 21:41:56 +0200 Subject: [PATCH 11/14] Add separate class for IdentityUnicodeMap --- pdfminer/cmapdb.py | 11 +++++++---- pdfminer/pdffont.py | 5 +++-- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/pdfminer/cmapdb.py b/pdfminer/cmapdb.py index 1ce012c8..d9dfaba4 100644 --- a/pdfminer/cmapdb.py +++ b/pdfminer/cmapdb.py @@ -126,10 +126,6 @@ def dump(self, out: TextIO = sys.stdout, class IdentityCMap(CMapBase): - def get_unichr(self, cid: int) -> str: - log.debug('get_unichr: %r, %r', self, cid) - return chr(cid) - def decode(self, code: bytes) -> Tuple[int, ...]: n = len(code)//2 if n: @@ -168,6 +164,13 @@ def dump(self, out: TextIO = sys.stdout) -> None: return +class IdentityUnicodeMap(UnicodeMap): + def get_unichr(self, cid: int) -> str: + log.debug('get_unichr: %r, %r', self, cid) + return chr(cid) + + + class FileCMap(CMap): def add_code2cid(self, code: str, cid: int) -> None: diff --git a/pdfminer/pdffont.py b/pdfminer/pdffont.py index 153c97d4..00e325ea 100644 --- a/pdfminer/pdffont.py +++ b/pdfminer/pdffont.py @@ -7,6 +7,7 @@ from . import settings from .cmapdb import CMap +from .cmapdb import IdentityUnicodeMap from .cmapdb import CMapBase from .cmapdb import CMapDB from .cmapdb import CMapParser @@ -761,7 +762,7 @@ def __init__( self.fontfile = stream_value(descriptor.get('FontFile2')) ttf = TrueTypeFont(self.basefont, BytesIO(self.fontfile.get_data())) - self.unicode_map: Optional[CMapBase] = None + self.unicode_map: Optional[UnicodeMap] = None if 'ToUnicode' in spec: if isinstance(spec['ToUnicode'], PDFStream): strm = stream_value(spec['ToUnicode']) @@ -773,7 +774,7 @@ def __init__( if 'Identity' in cid_ordering \ or 'Identity' in cmap_name \ or 'Identity' in encoding: - self.unicode_map = self.cmap + self.unicode_map = IdentityUnicodeMap() elif self.cidcoding in ('Adobe-Identity', 'Adobe-UCS'): if ttf: try: From e4745981009a84e94dff5ad909078832bd695aac Mon Sep 17 00:00:00 2001 From: Pieter Marsman Date: Wed, 13 Oct 2021 21:45:51 +0200 Subject: [PATCH 12/14] Remove ABC from CmapBase --- pdfminer/cmapdb.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pdfminer/cmapdb.py b/pdfminer/cmapdb.py index d9dfaba4..3b25b9cf 100644 --- a/pdfminer/cmapdb.py +++ b/pdfminer/cmapdb.py @@ -38,7 +38,7 @@ class CMapError(Exception): pass -class CMapBase(ABC): +class CMapBase: debug = 0 @@ -60,9 +60,6 @@ def add_cid2unichr(self, cid: int, code: Union[PSLiteral, bytes, int] ) -> None: return - def get_unichr(self, cid: int) -> str: - return f'(cid:{cid})' - def use_cmap(self, cmap: "CMapBase") -> None: return From c0d91efc4b9b37da8f3b671261fc33a09dbae17d Mon Sep 17 00:00:00 2001 From: Pieter Marsman Date: Wed, 13 Oct 2021 21:46:17 +0200 Subject: [PATCH 13/14] Remove ABC from CmapBase --- pdfminer/cmapdb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pdfminer/cmapdb.py b/pdfminer/cmapdb.py index 3b25b9cf..7bff03d5 100644 --- a/pdfminer/cmapdb.py +++ b/pdfminer/cmapdb.py @@ -16,7 +16,6 @@ import pickle as pickle import struct import logging -from abc import ABC, abstractmethod from typing import (Any, BinaryIO, Dict, Iterable, Iterator, List, MutableMapping, Optional, TextIO, Tuple, Union, cast) from .psparser import PSStackParser @@ -163,6 +162,7 @@ def dump(self, out: TextIO = sys.stdout) -> None: class IdentityUnicodeMap(UnicodeMap): def get_unichr(self, cid: int) -> str: + """Interpret character id as unicode codepoint""" log.debug('get_unichr: %r, %r', self, cid) return chr(cid) From 0397729853ca059c0fa0889c73b05e5bcd7ee809 Mon Sep 17 00:00:00 2001 From: Pieter Marsman Date: Wed, 13 Oct 2021 21:49:07 +0200 Subject: [PATCH 14/14] Remove blank line --- pdfminer/cmapdb.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pdfminer/cmapdb.py b/pdfminer/cmapdb.py index 7bff03d5..6974c1c3 100644 --- a/pdfminer/cmapdb.py +++ b/pdfminer/cmapdb.py @@ -167,7 +167,6 @@ def get_unichr(self, cid: int) -> str: return chr(cid) - class FileCMap(CMap): def add_code2cid(self, code: str, cid: int) -> None: