From d461d418e10c529e64cd1fbb33134466cff48a23 Mon Sep 17 00:00:00 2001 From: jreback Date: Mon, 6 May 2013 09:17:33 -0400 Subject: [PATCH 1/2] ENH: HDFStore will retain index attributes (freq,tz,name) on recreation (GH3499_) TST: added legacy_table_0.11 table and tests --- RELEASE.rst | 1 + pandas/core/index.py | 10 +-- pandas/io/pytables.py | 80 +++++++++++++++--- .../data/legacy_hdf/legacy_table_0.11.h5 | Bin 0 -> 293877 bytes pandas/io/tests/test_pytables.py | 43 +++++++++- 5 files changed, 113 insertions(+), 21 deletions(-) create mode 100644 pandas/io/tests/data/legacy_hdf/legacy_table_0.11.h5 diff --git a/RELEASE.rst b/RELEASE.rst index 69cfd1eb99d7e..7caf9c7fd50a5 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -38,6 +38,7 @@ pandas 0.11.1 - Fixed various issues with internal pprinting code, the repr() for various objects including TimeStamp and *Index now produces valid python code strings and can be used to recreate the object, (GH3038_), (GH3379_), (GH3251_) + - ``HDFStore`` will retain index attributes (freq,tz,name) on recreation (GH3499_) **API Changes** diff --git a/pandas/core/index.py b/pandas/core/index.py index 101b69ffc3c7e..4a7981e57c622 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -83,12 +83,12 @@ class Index(np.ndarray): _engine_type = _index.ObjectEngine - def __new__(cls, data, dtype=None, copy=False, name=None): + def __new__(cls, data, dtype=None, copy=False, name=None, **kwargs): from pandas.tseries.period import PeriodIndex if isinstance(data, np.ndarray): if issubclass(data.dtype.type, np.datetime64): from pandas.tseries.index import DatetimeIndex - result = DatetimeIndex(data, copy=copy, name=name) + result = DatetimeIndex(data, copy=copy, name=name, **kwargs) if dtype is not None and _o_dtype == dtype: return Index(result.to_pydatetime(), dtype=_o_dtype) else: @@ -102,7 +102,7 @@ def __new__(cls, data, dtype=None, copy=False, name=None): except TypeError: pass elif isinstance(data, PeriodIndex): - return PeriodIndex(data, copy=copy, name=name) + return PeriodIndex(data, copy=copy, name=name, **kwargs) if issubclass(data.dtype.type, np.integer): return Int64Index(data, copy=copy, dtype=dtype, name=name) @@ -123,10 +123,10 @@ def __new__(cls, data, dtype=None, copy=False, name=None): if (inferred.startswith('datetime') or tslib.is_timestamp_array(subarr)): from pandas.tseries.index import DatetimeIndex - return DatetimeIndex(subarr, copy=copy, name=name) + return DatetimeIndex(subarr, copy=copy, name=name, **kwargs) elif inferred == 'period': - return PeriodIndex(subarr, name=name) + return PeriodIndex(subarr, name=name, **kwargs) subarr = subarr.view(cls) subarr.name = name diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 06ae9a7f7f11f..9c6f5f85eee14 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -943,7 +943,8 @@ class IndexCol(object): is_searchable = False def __init__(self, values=None, kind=None, typ=None, cname=None, itemsize=None, - name=None, axis=None, kind_attr=None, pos=None, **kwargs): + name=None, axis=None, kind_attr=None, pos=None, freq=None, tz=None, + index_name=None, **kwargs): self.values = values self.kind = kind self.typ = typ @@ -953,6 +954,9 @@ def __init__(self, values=None, kind=None, typ=None, cname=None, itemsize=None, self.kind_attr = kind_attr self.axis = axis self.pos = pos + self.freq = freq + self.tz = tz + self.index_name = None self.table = None if name is not None: @@ -1023,7 +1027,22 @@ def convert(self, values, nan_rep): values = values[self.cname] except: pass - self.values = Index(_maybe_convert(values, self.kind)) + + kwargs = dict() + if self.freq is not None: + kwargs['freq'] = self.freq + if self.tz is not None: + kwargs['tz'] = self.tz + if self.name is not None: + kwargs['name'] = self.index_name + try: + self.values = Index(_maybe_convert(values, self.kind), **kwargs) + except: + + # if the output freq is different that what we recorded, then infer it + if 'freq' in kwargs: + kwargs['freq'] = 'infer' + self.values = Index(_maybe_convert(values, self.kind), **kwargs) return self def take_data(self): @@ -1098,6 +1117,30 @@ def validate_attr(self, append): raise TypeError("incompatible kind in col [%s - %s]" % (existing_kind, self.kind)) + def update_info(self, info): + """ set/update the info for this indexable with the key/value + if validate is True, then raise if an existing value does not match the value """ + + for key in ['freq','tz','name']: + + value = getattr(self,key,None) + + try: + idx = info[self.name] + except: + idx = info[self.name] = dict() + + existing_value = idx.get(key) + if key in idx and existing_value != value: + raise ValueError("invalid info for [%s] for [%s]""" + ", existing_value [%s] conflicts with new value [%s]" % (self.name, + key,existing_value,value)) + + if value is not None or existing_value is not None: + idx[key] = value + + return self + def get_attr(self): """ set the kind for this colummn """ self.kind = getattr(self.attrs, self.kind_attr, None) @@ -2060,6 +2103,7 @@ def __init__(self, *args, **kwargs): self.non_index_axes = [] self.values_axes = [] self.data_columns = [] + self.info = dict() self.nan_rep = None self.selection = None @@ -2173,18 +2217,20 @@ def values_cols(self): def set_attrs(self): """ set our table type & indexables """ - self.attrs.table_type = self.table_type - self.attrs.index_cols = self.index_cols() - self.attrs.values_cols = self.values_cols() + self.attrs.table_type = self.table_type + self.attrs.index_cols = self.index_cols() + self.attrs.values_cols = self.values_cols() self.attrs.non_index_axes = self.non_index_axes self.attrs.data_columns = self.data_columns - self.attrs.nan_rep = self.nan_rep - self.attrs.levels = self.levels + self.attrs.info = self.info + self.attrs.nan_rep = self.nan_rep + self.attrs.levels = self.levels def get_attrs(self): """ retrieve our attributes """ self.non_index_axes = getattr(self.attrs,'non_index_axes',None) or [] self.data_columns = getattr(self.attrs,'data_columns',None) or [] + self.info = getattr(self.attrs,'info',None) or dict() self.nan_rep = getattr(self.attrs,'nan_rep',None) self.levels = getattr(self.attrs,'levels',None) or [] t = self.table @@ -2221,8 +2267,17 @@ def indexables(self): d = self.description self._indexables = [] + # info + info = getattr(self.attrs,'info',None) or dict() + # index columns - self._indexables.extend([IndexCol(name=name, axis=axis, pos=i) for i, (axis, name) in enumerate(self.attrs.index_cols)]) + def create_index(i, axis, name): + kwargs = dict( name=name, axis=axis, pos=i ) + i = info.get(name) + if i is not None and len(i): + kwargs.update(i) + return IndexCol(**kwargs) + self._indexables.extend([ create_index(i,axis,name) for i, (axis, name) in enumerate(self.attrs.index_cols)]) # values columns dc = set(self.data_columns) @@ -2379,7 +2434,8 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None, existing_table.infer_axes() axes = [ a.axis for a in existing_table.index_axes] data_columns = existing_table.data_columns - nan_rep = existing_table.nan_rep + nan_rep = existing_table.nan_rep + self.info = existing_table.info else: existing_table = None @@ -2421,7 +2477,7 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None, self.non_index_axes.append((i, append_axis)) # set axis positions (based on the axes) - self.index_axes = [index_axes_map[a].set_pos(j) for j, + self.index_axes = [index_axes_map[a].set_pos(j).update_info(self.info) for j, a in enumerate(axes)] j = len(self.index_axes) @@ -3042,10 +3098,10 @@ class AppendableNDimTable(AppendablePanelTable): def _convert_index(index): if isinstance(index, DatetimeIndex): converted = index.asi8 - return IndexCol(converted, 'datetime64', _tables().Int64Col()) + return IndexCol(converted, 'datetime64', _tables().Int64Col(), freq=getattr(index,'freq',None), tz=getattr(index,'tz',None)) elif isinstance(index, (Int64Index, PeriodIndex)): atom = _tables().Int64Col() - return IndexCol(index.values, 'integer', atom) + return IndexCol(index.values, 'integer', atom, freq=getattr(index,'freq',None)) if isinstance(index, MultiIndex): raise Exception('MultiIndex not supported here!') diff --git a/pandas/io/tests/data/legacy_hdf/legacy_table_0.11.h5 b/pandas/io/tests/data/legacy_hdf/legacy_table_0.11.h5 new file mode 100644 index 0000000000000000000000000000000000000000..958effc2ce6f83dcc62caa866b7a7de7c98c4a30 GIT binary patch literal 293877 zcmeI531C#!_5bfo0t_L6fT+QxPTh!0LlVMft1}4+CK{5M37f6IFi9qmGMNmM2?Vqi zTq?C{u|@1tqWL1Y^h)s0n1aaY_ULW<`RVc$9nW*Me{~ggmE3^YA^bijk~RF-KSr`Zl!JY#yi?roepGnG7L)47p+LkJ`q zr$Oz%2}9W1;P>%^`j&dym6yn_dpk}&Rn?_7Zlq+t`5P>bfhyK1*{4I@)+|?zL%QFT zms87(YyEML{9V50Qls@|_@l3zzsr>VF6a$3dE4v5i-N+clwTdfQ_@;<$lK~OnOY`O ze$&Q=ne#93h1y%%0xG}gI_1yLE8zdles+k=Cym#rR{CdQGnCI84ElJ>xUtvECS`&bf1KvP= z$QO*cAM*1&w~#0O4&wU`=Jze|3C7}nrpCTo%8{OobcE8KJ~4nxi(Pw~I-SP(h~pEv zdpVh>l8r1$u@MvL?r<3s=Y*`2|A@Tga)vuy(Y6!~VMTlRJ(lLjbF;W0-ld{T2*cJ3=gkAkzXS_6Z3W( zr$hEe>9#$xn=v0bG?CI*gtHM_)ZwT@e1ZT7fB*=900{IW0(G6GfsP3^^TsZ`*HiKI zdn@eje=Q1+>Ad(O*S2Gx3*C3&eIZ4-{7h>8(dSbzxKI9TmR+yQ{}Bk zj4Iw0`Pui+sXA=ov#vQ!XV)~$Zw!oIbowE`{MQZhUvh2ufBAo1F!{5{s0Z)u8oTGI z$ocbzKC<|`yCU=3&YWLRd-kNNf|WyVx+Z)1mdM4MR{W#r$<3~t9%(%5{b7>C355y&)q$G`v%v5CCj%iKkrl5{Icc}Z&UtQ-=z6n&%X1kXa3{* z`DyKImU~}tP5Z~?K3{2wY~Z-92yO)Uq?1L(^w2?#L>*BmMDrUE@cr`@H;; zk6ds5A^nNZ@?MEFJhd`??c*Cdgi(-$6UP2wY6o_Nn7u}&t?1W`R|u)W?g@OZOXdi zs<*l7YR@StYVjhFVIF|3TKKPhGY@O@H9MMgNIB?6~^yoty58JQK|Pz`1Kvq-tz&<-CfL z(7drVlm0mGqUG1#!olA5-GUt;f;r%^|GsEj{n@b^QRWg?|lEl z&`)m~yC?F4A59tC^oxgF6S5z<(*3~)5%0Q*S30&0?3&d!XUe?NV()^65zm})>B%oY z6S?i_KVN(4celIVeEknsz54t+k-uI(ebTzKw?%?;&pmTdVe5=eU;Pz3cQ}9XTh|3w zef;Iz4cj7VPi%MW9QKmy?kDD#t-17VS73a@-02Gn&#hVX`^#^+z3ZBPyWDsA{4d?K z!*$dN8?PUG&4-btd28E#6517MuN^HLdB?~dD>Ei`ZjYQ^+`g#em!Cy$`dv=NgKaNH zp5A@M^&egJNu;xCMs>bt!mQ$slOEZ%>W*b^xn4Z%#~W{rL|muca@EU|8@5NL-+W?q zY1$UoxlmLFJ9ak8MeOgvn}7+?V5brYv14Y_G6Kw17lu2>5wlX zp0g(u&L7(_rMTdWOGp0Y`PIz*RNGNG?*DFd{paSNZobj6HS+kTJ5PFdM_SkX&gyB! zK2KS5@vS$Uxpl?2wz$q(^Tp3M4f)j7^vh>{SI=&fx>{;ct#NZ_n-RV8PZH&2{3 zXU^j9|L)a2uH!ov3_Nnn3z6rJ$h~0qm=7Y?|MtnV&;Q{^u72dU(G?H?0T2KI5C8!X z009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH z0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI z5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X z009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH z0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI z5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X z009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!XNErbe z8^CP5v>aVxHp`ZKYmX3T8Cxqq8LO);D`Q-8k7d)BS=6EQ zBLAWKD@1#I?as!wK-k+7@P(YsZ6Rma+u-*($22t;jFG#HWwKn^LMm@^WwFPj%NuAa zudbr1w2VVArXSu7v$yZa)-tphbxa!Ee+bGm>!4rgyS&hI3@2^ z;%rHQI-aPHk@Rl@G!9qmZqC(dO_8z#^6zY%I;QcO%wE%Z8A_TlvR5lbPgQlPjZ2aL zUw$Ozuu9@M83EEyaEjt5ohr|e$e$C&}8Zi4KU^5dh-0LP~K+J>tNabwqV%rTj2A{%={w}KXK0yn~`6&KjdrbXbiWsah1z0 zQGV)&jh7(z^@#zTme{qYsncotSll-=_>Yb5gLW!GRwHuA9hx4}@etlWA!*b1FS{(y z#@gq0G&lQwvP_Xtcn;w|ntaWEZ`ddEi1RcaW*@KW>k=pVHiFBGO`N^q+RM%{>@U{7 zhiEw=FK^GqwY9~w#F5xkIclFV<;MnSYi6wL6l*_8Ql+{2Fz4jta>5S9<~$*}9QkAS z*yM6WH&*?c{ohcN zyr(S`_BA=Dd;J~0c3BVkr9pChmCdXxt(dCDRs3?8vd>KdL!Pkddo8D zt=1cu<1>a?pY^n)$9l?#WCtZ}9f2lqXb}ya zzWO(e8+HFBzCD>QnEjY1>M?=J|P9pvj~S=!51@`%@or z?kmv&vhV&XOZmVa-skDLZ>C4xM zauc;&&3BT^b!+T?Jm~#=$}L+*Eu_amzCc3u(P&pp|f2*P%e-Ul+IJgIoX(6 zucr*w=R?x}&ZAX{#M`Eyi;ej!JyEE1Z}#1j=Rr{;w{Q0ux-&cvW$QM7?TdX|*G(i; z{-JO&##bN3LK>q9X{+8LN8;5xi*?$nQos={~IE0^iW|3qHjc2527o7_%je=}p%F(3c<(g%w^`1p=ruUUBB%)dYTz|H^M z?0fv^w;w$EvVv^3wBm^));yMV_(kizPyMRl!w+38Gw<7R?Tb%M^Vo(vmMmFgFTefD zamPJ*O4i9YZq9jg+Uj+8?%sa%qYIbcwsq2tOPOQYyc6y{b@+qd{&4HLukHGx^qCWH zxO(m5o?{&jer@w9PS2=sf6?{vGmqXf?K$71d$Jave%Q3b#-4cHnh%2m?ary$ z@ARqdhy9}9k?_--HdZZs{&yqlw%IRnv-Hmn&3$9{;{z9NSv=>SKTIA|)iUajCq6Xc z*~-qxx1WC5SiU?o{#O$s?`}Bl+ix7T?$;ed55H^7Mfd-9MehH)9qDd&{nF3xT0iQs zUo}q{a>lNPaX)_M_GO=3amkpiZ$EgGs(Ra?vOTSZ0IP&|q+e%!L-Y^AZ8BG{R4d;9%?V6~FxYRC2sBtUV?Gj-pcI$eEM z_mQu|@mYOKJ?%N6oS{OmAu{sq2J zy9^c;`OUw_ke^q;|BH6I`4!RMAu^wYZd5DlJyc$oV$2&1`gqIuuTRvscKE|BvK{=A z@%tF692uQl&!dy|!ndgZC{sDP{$N2yZbg2EXS5U@oe?a|2nWYzc=KdMnQXR9Mf|GZ zq2A9J1&Z><@*mQZEYCAqi^(YA`g~ca*E71o8{)gXY|rTCwl=<7MAh)p=jwVCw?JE< zUbWrJe{Do@KiyhSZUt|nul-14x-aviG7mWEm6n`~(OkO{c-{f<7y^^VHLHBnc-4&+ZI-Shra`=xZTHjvSaHos6S8Q%p zG=vrH;TL#|V2=|7yI=k09$DY7(fcNP zUuD>PY&@RXztt6&$lnJum}|7fhl&o?$N|Ah<1qHM(wI-bbFNZ=l20>h@pp!XgSf!bveSv zQ9145P)lG=QNCdZs(x-?J*hu>A4*ieC};2W7Zw@qR?nkVJgCRR+nLCI$^Rr;58t+j zXO6QiIgfg-E{dCq*rGQUb%;+8009sH0T2Lzen{ZTVdF9aXKamJedxeZcWwT+Ye(n* z4SN5%ovyY3U)SN27Mdj-dCiEQ{dnQ)k=AAPPoH(lGm+Ih{&CTgs^3S#3;FXbm!qf2 z6{mlBL*193MNXOOob%+CXI*ng9R2qJ?w4HQxgnpA%hB`h?`>!2JTY*CYx}(O|9NZC zhmotF`u)$ZF5jl=<+C~c_{%~o-dQ(f)aFR-_Q_eB7w?F?_Rss)Z1}+%m#mk|iDzoD zy&B%QF6Z)hB2V49xU21hZIPxsKYrYH&kmAXtfx2Z%oz<8-~ZG4$c1;LUvlW|HzObY zW<&mW#=RAh?Q8HZ7IRXu-8cD@dz*jziRHRgWixI;cITmPYI-~5hFdOr5v zyYV;dxEEbl&wBUnm%2W5jk@#Fzpb;srt0N#gH>_tq9ZpvebTPouJhgJ{demHpGGFU z^4k9mulZcn%jL8f5)c3Z5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI z5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X z009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH z0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI z5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X z009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH z0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI z5C8!X009sH0T2KI5C8!X009sH0T2KI5ctXo*w_GOSBm+4x4zOJ^c zjB&|5mQ7z~4>u}#2c1CrjUrH9R$L=GJX81Dsna*=m0jpV))Hv)b@GD-Z%CeMZfldN zutBj+R7sCNhiiLXd7i50VO?J)E0<;1*g)ps|LQ90D!Bo(L-@}L{D(_rDYs~ zAunoou<2#Bo{H)!DWAsWsQ!Yye3{S2hRSm}@`&+Xsd31Bs$W5kQH?`;!g07{t`c-p zISbWsZ@KF1NDeXrG!9qmcF)!6&PB=&$gb{Ktd6;<$}O3_rgI6az3Oby#8XvWYGY|+ zwQTkks{E*cWrys%{GsG>bU)dt??;wm`rRSvCtLHAPL=mEQ+r18KW2Y+7{5;Xk^W5j zFzIQzU-Ngg2F(5_)_)B!=U48l$NsA@uR!V`laU{D@VpK-rJ}NqyF8Z76{vLRmU7fi zn@#N*{Te3owRZ%4p$30j0`x z)zSQ_CZ{~6_A?egf8*SafNpy7{HjpiX62b}Y_M#9TQKbRE%5neX8w_gpSb6U&B(9X zAM!PIG=^K+xXR_0C_nYX#)c@{(I*CQZHZlbnmSGPxVA<0_hDVGjqZnbDoJ0^Txgm| zoQCjz3Q3zD?si#^jkVA1Xm0lVw2cTm7WLa$ldswD4f|vsQL;nNP}A&bRJ5#^W0dAK zLYE*;{^i=s&N1vU*4~GRc8EgxCU4ipwY9~w)Uono1GF{s`N(|TV0Ag>GgqHk)|OmO z*rC{#{4lv3`Qy!<$>rR#G{)wJlgnwb{ho+BfnaR4zuw+wta>#2y`j8((xay>6!tYa zr+fV!zIJ}z$TvuitFoDOr4>_Ed*Y90g~-M-WSOgV!PfB*?H3pCUXHQ^TZ+bmb`QKQ zy&exTYpTXW@Ac)DAmiPqM)518ud0sL20qztYj#SH>1+w8Nw>_*Kl-Y_ReRGP_Zrf1 zFt@BSkw2r+MNWr~zXE?I?scMg*{prl-{j)Sd=|p@gL{;a1fU#Zls%3>c#8_r}~;& zya8uOs3#SEFjeE|Le4;J1e1Ac70#W?eb{vVh0NF!HTSuVD8(Ks|sjq$yZrSvB&o@qfh3+?%-uAGw zwr!!aT|JmCkPoKLQ^do|L7)y`>sh0?v*xqLX3?M(b!sqK8j#afj- z4`utx>|8?S9|{)}?}@}h9HWVBtKK08u6q2X{oN)nymlS;vckSslHmJaSQ?=00JNY0wB;I2qc;> zoRpw*BR`8jUx-df6Xe5b5C8!X009sHfrFO-#rHbBY98R$>tV+F@<18iQCd-3Cqj6{ zI*56FgVsTa0Ij>xIw`H=IDf9n?`fRvs$b~#=z<-$sAF0OrTUQePXdYJY~Q#|*#X(r zvfI`1zOFZ0=~1~mA5`VmXaQp#FZR9WNG?O;phCpE%lJ?pfgg+glqt4Ead0#qHvdwMhb}4IyZ3SC z_siCJP+Eo8#YW%9skeKKmEWN1rR&yI(O_)Vb;;$jb;dhZB$p#Uvi8>Ga&BGT++Qb` zv$l7C9VdzBl`1#&q=0NcS7FzV>IF=*m9& zH5y&?-iwa+_8xX`el3pPuH#%p@iOba>g`~S4_(q@J*k;J#et|%V8k7!PA}28(6M8+ zPDEX?dPzL5IsoxG^g6xI@oaq^Hx&O#sZZ+uweKC@TS%+ioC@2Oc2|9!w~ zlgl}DrsY>Bmn+wDJ8w=dH&M&Iabt2hb6fZ4ekz|!oV?+5ABo2A^)bG4?Ny2!@!pT| zoonyaD&zL8mdttoV&|{>X5ZFz6Vmy?7T@XCPGF|)K(ck#p2T-r^-J;Tlp5u$^~SGz zoM#Ow){>r{AZ{b-NG1OX5L0T2KI5IFb=^fu1($^`R;=*0BkM}i)J00@8p2!H?x zSO`#@=Q91hhj-|8u~f%-I{r^Bdi6BUbJ?Rx-mTla?OAn9>z{50&m`X?s`wgQ5$7pvFrKJ@`IQ;w+a? zdh=c{**NLFzLbR?3~`>x{2GleidUoKy~P{s&9B87A9tOs;yh*6ebrlkj`LJ{%8?#y zU*kM`u9s+>=d@|sXNihq^+NN#uf_MU?bkR@rH}aY0gCISaYM&n!MG9rP89#O9XI0M zPw^uMBF+=zru*N4F~?`6`uje65?2U)bk~P@-f7bVXUSrxtv4i>3k%)T)CFp@R#Ir6SZ8|)5+ybP2I1%seCSR@|M$mI#qF=s~=X} zh_^KkkiNf@(!JR?y%$fkULu|=w0*CBU8{`ep=>F$@BR|!nX8?^Ol`HsdSL82>o+mZ zb8>>w{7{0p4dJ`7fdB}A00@8p2=q?^y^ZtSm0-RQotXAdOz0H|fB*=900?}e2~eD8 zQ2*WU<$7H#)p4HN-qVX-dLcs0Q^cwsy@#Zfl;QYL$Ta(j zJJQEtZv+2p)r{4*)YGoKLhHq}POioGn$UQdxk1@qr>@Uf&yHPx6m?O3RDo6A{C-0G z&Na!OSzA4=h94X9vGJ*5$dJ78eL1RUHu-J44i@zCKU>*eA6^s`G}QM$n&00whrF#m z6Cz`L598k$8O^`I7iyP(tXzD5r1+cWRBwJ>0sk-BN%anq`6P6sTKT<`=J)E}V9>`~ z#+6hrn~=%Wio5tb=b~Qz=azh(^^I+Q{fCo8y+@hq&GiQhGIA^OGd!cE=;(}KVMaJO zHp838r9^w&G9?b`?|K;pit@w{^+T5D8C^0u!|NGc%s*8zyd0qgA*Xl*+5+{eZQf4t zhg(G%-f6l1zwylsBoAXStq*3>vIXS6R%_P5C!_KMbZv<7&os@$HnFIR~3_9^=a zcmwqzUr@HoY#;o*lr!|J{@$PbBlf(;M1Plx_}JKCl9fZBh>bg4C9faGD}9;QBpJyX zlYf}&EzzRn+Y{v!TtwfBwSchaWSfv*s|DyiWzA+aj%!3gd^5d=S@kb*5%>JmZxTKO# zAo%#|J}#!0#s7P=g?Zy8b3?wTew^MjtMNOfqO7vCKEJ-Ic&eygjz4p|XgnC>&z!HI zTRveKH10}d>qLLz)mNA>AB_#eFK6*G9E{pIrJ}M@6m1@V;+X0c`(lenRwYuhebom-_zvD*i`E zJzPGK-e}w;TJO7P56>KDJ9c|_eKxe$Dy^&e-(khtp5a$)DY|#FjQNGvQAC(C@wTCYvD2u!+gmpI7auZXGsg zc&;VO(j7}}*?g|U1z&k_Fw1wOy_k2(PM4KV$aM?VI*Q z9{IyYH(Pq1eOURQhW$^E>mnoJf4_zQYA+of)G19;`%#x@*4vM|L<Rkpz(TYXaqE#WkXXGj%VWI?dE} zqz_q3pvl+C4@&fjVx0;b6kBiYWw^H2=RZ^Sx8-w{X0mcwo{bG;4*svMqOOt~Fgt|* zoWOsmz6#MEU%RuhEfDs$1biW9b6dz6_BQx^&M{5RW8@x}&t$p$Q{+?SO|C5Vc=)j> zZ=gKSKloNxQB_*TAsF(ab_bhYR_m##u9EVig;ak*UcSs{V?*V+9C^feuhcj!+O7H% zRKtm>acECC4jTJ=(Q&nARV`VL@B)X^AjY2zxFTcZ5b4;vezY)79Mz_lfI?P=;X+2d}_`(a(K zjqZnbDoJ0^Txgm|oQ81w6_Pf6AG6DPY^;55M{~2^r)@;ov8dn1ntaWEZ`ddEh>{)J zv!vP6sAyR+$0*Hfgf2mx{L8hMonzQzti2Br?GT0XP2R4HYio;Vsbl5G254($%sbM=`=2PT&jb||(7Y{})wA0L&ST+S^^W9-Hu$>p@zeoxZ)$Q6vO_Sf6{j8%_j zzc-YZPkQvUg~Glj=X9^X!`IHw8~FywaaA_6uC!vRYL6o3}jQgUp($@z8sHxh2YY_o-2mue{8vI$9g}WV@}|DLtmMC7>qV z>T2Khw`y?gGjXpI#mi>xtNtbzPv%oV!=LqDfBBk6 z>~m8Vze3}!)*G1PGlpBA$J?@A>nlgKQ_myh0so+zL+bQnry`k?OorDJs~+T9rN*otxYqeMJ&#^7OwEv&i^0h^n|?l|b%mw+Ih4{= z+-F3S((Ly4N9f5pPt+8WpO2jnbe~6=*Ne^bD87tbS6e;9G-HbvDZ0hzdDJP77(0G| zu3R5m6E!PhES#2Hj%vC6%jD~7<+_5RtmNfQ)N<(=$>pr=-CxIKH$_@`^qg&spV)PJ z(UaKwhg&u|-t&#qBwy)m4?Any7CPJ2gXseKVCp=DoENS4Qw53ae1(3mMtZQ?Ijyfy zx;Hx~?Ie4y)OJ4dNG;40%J!AnxrE9;6fP#-6N!a5Mibdqy+aPht9KUbv{k1ne`PI| zr^|_+Abk=~lIv|s_H;#zJ*klp&pw9pN~r#*eNabr1ET8-e1P)*d~?fB>pEY+=l8Yx z0%18h;S$z$7OE#h=H`Bj)?@gwQl?X0>1(;-eUw@KZ?P~U0Yjn+wN9jER@m8++5wy&I|t@NneQ+0f8jTSK0@nYX=j^Hvh4k|>vyNnMl ziN=PCB8+wL*!M_?4;B1WeC0PUJ~UI=jy53cdx|u^N-ZuO!hgh@q(pI4^nQZYhv|54 z?&Z33kp@eblsZf>JzWz zCzs3Ca&PH=Sj(Yf@*}$^B`?pdGu${Xxg3?dztbe~yi(q*V* zDGr4E8tEr>dWpt`PMfA~eX^=py{J0?*||9m4lC{r=OU3 z(68;d5%+#=-mE0W@t_ztYCVAX9_Tnw=%e@gFwf6Z9sdb^^j;rve&0YxU-cy(H+kHr z^*NKC`$m3GJ>SgMH~LiEKZLK~ah}iXqQ!b5J%;w`xiVRtC$;|){T$b++fRu#?z41E z^5-};>*2o-_?_f(6le8hZgRPD-H=l!B$u10HT1f?+j}vFjIFR**a@a;ybPS zrTBD8jq=rcQVA;t)}A})G@7l z>eER2DS**dP_j7BwfuUj<2*%~V%uAsXZkcX&bp-Z{)_W$`hlufR~wV4 z(azYT=O&jEb||(dbwAWm>?iq=_ZyR!=hj&-sZTB!Tf_c26DN*M4n5vlsq)^xpHpuf zD4q&&o=T5NI7`Fm&3ip_yoEI$y7&5W%Lc~N*Vir1GnrqbxK4^!qvO5B8|=-m#nIbs z9GECxX5Cl4_2)QGrKcQaSNr(=pFP)0G|ux(?zkvhoH&r&Zgy^dPu}0-Je59l|1IBt zOmUqwZs_a=DdTz&zprGDUrF`%eJ;_8-upZn z`XGIn=N$(k&J*LK``<}A&~ctZAG8j#Td%Lo=I6wgiu;EcqCC#?foA>aM9(Cm{NCI@ z%zniBeV){QzkZJE6os2MYn3ikMHyBpN4>5fdB}A00@9U5&{(GdAt7p!YB2**uKVjcG~{EzQD%v9Z$ONx#*HF zjydh3%OVrkzTiK>o_=fk7bC|7u4PLXk3K5h(KPsiTQ?2;k&w5E1=~Nb+ELv)Y|!xB z%$)q$T}y0f1J%yb9ZPNeMV*ZczFfdpdK_s5uiXFMg^lT1%NMSHYg~KAaObwOR!#ri zPmVl$iTj@a9zN8b?#On!m+-%Aj2%FLC4A+hVCnhxtZxq)R?wTFynb*Nb7rQm{>Pjz mvu?Zi_G{KZaK@%LKb(GY_pu*!0;Zk%q2GJwO*Wy&mHmG_5(N4H literal 0 HcmV?d00001 diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index d7f497648236a..6bf52c58ad71b 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -7,6 +7,7 @@ import datetime import numpy as np +import pandas from pandas import (Series, DataFrame, Panel, MultiIndex, bdate_range, date_range, Index) from pandas.io.pytables import HDFStore, get_store, Term, IncompatibilityWarning, PerformanceWarning @@ -2041,6 +2042,22 @@ def test_select_iterator(self): result = concat(results) tm.assert_frame_equal(expected, result) + def test_retain_index_attributes(self): + + # GH 3499, losing frequency info on index recreation + df = DataFrame(dict(A = Series(xrange(3), + index=date_range('2000-1-1',periods=3,freq='H')))) + + with ensure_clean(self.path) as store: + store.put('data', df, table=True) + + result = store.get('data') + tm.assert_frame_equal(df,result) + + for attr in ['freq','tz']: + for idx in ['index','columns']: + self.assert_(getattr(getattr(df,idx),attr,None) == getattr(getattr(result,idx),attr,None)) + def test_panel_select(self): wp = tm.makePanel() @@ -2437,6 +2454,16 @@ def test_legacy_0_10_read(self): finally: safe_close(store) + def test_legacy_0_11_read(self): + # legacy from 0.11 + try: + store = HDFStore(tm.get_data_path('legacy_hdf/legacy_table_0.11.h5'), 'r') + df = store.select('df') + df1 = store.select('df1') + mi = store.select('mi') + finally: + safe_close(store) + def test_copy(self): def do_copy(f = None, new_f = None, keys = None, propindexes = True, **kwargs): @@ -2497,14 +2524,22 @@ def do_copy(f = None, new_f = None, keys = None, propindexes = True, **kwargs): def test_legacy_table_write(self): raise nose.SkipTest - # legacy table types + store = HDFStore(tm.get_data_path('legacy_hdf/legacy_table_%s.h5' % pandas.__version__), 'a') + df = tm.makeDataFrame() wp = tm.makePanel() - store = HDFStore(tm.get_data_path('legacy_hdf/legacy_table.h5'), 'a') + index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], + ['one', 'two', 'three']], + labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['foo', 'bar']) + df = DataFrame(np.random.randn(10, 3), index=index, + columns=['A', 'B', 'C']) + store.append('mi', df) - self.assertRaises(Exception, store.append, 'df1', df) - self.assertRaises(Exception, store.append, 'wp1', wp) + df = DataFrame(dict(A = 'foo', B = 'bar'),index=range(10)) + store.append('df', df, data_columns = ['B'], min_itemsize={'A' : 200 }) store.close() From 9120b05c99de5b02ef885d2cfa797e5fdea21c39 Mon Sep 17 00:00:00 2001 From: jreback Date: Mon, 6 May 2013 11:28:20 -0400 Subject: [PATCH 2/2] ENH: support timezone data_columns in HDFStore (GH2852) DOC: update release notes/whatsnew, added whatsnew 0.11.1 to index.rst ENH: warn a FrequencyWarning if appending with a different frequency that existing --- RELEASE.rst | 10 ++- doc/source/v0.11.1.txt | 14 ++++- doc/source/whatsnew.rst | 2 + pandas/io/pytables.py | 102 +++++++++++++++++++++++-------- pandas/io/tests/test_pytables.py | 75 +++++++++++++++++++++-- 5 files changed, 167 insertions(+), 36 deletions(-) diff --git a/RELEASE.rst b/RELEASE.rst index 7caf9c7fd50a5..f3f4d7c895931 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -38,7 +38,12 @@ pandas 0.11.1 - Fixed various issues with internal pprinting code, the repr() for various objects including TimeStamp and *Index now produces valid python code strings and can be used to recreate the object, (GH3038_), (GH3379_), (GH3251_) - - ``HDFStore`` will retain index attributes (freq,tz,name) on recreation (GH3499_) + - ``HDFStore`` + + - will retain index attributes (freq,tz,name) on recreation (GH3499_) + - will warn with a FrequencyWarning if you are attempting to append + an index with a different frequency than the existing + - support datelike columns with a timezone as data_columns (GH2852_) **API Changes** @@ -88,6 +93,7 @@ pandas 0.11.1 .. _GH3251: https://github.com/pydata/pandas/issues/3251 .. _GH3379: https://github.com/pydata/pandas/issues/3379 .. _GH3480: https://github.com/pydata/pandas/issues/3480 +.. _GH2852: https://github.com/pydata/pandas/issues/2852 .. _GH3454: https://github.com/pydata/pandas/issues/3454 .. _GH3457: https://github.com/pydata/pandas/issues/3457 .. _GH3491: https://github.com/pydata/pandas/issues/3491 @@ -103,7 +109,7 @@ pandas 0.11.1 .. _GH3461: https://github.com/pydata/pandas/issues/3461 .. _GH3468: https://github.com/pydata/pandas/issues/3468 .. _GH3448: https://github.com/pydata/pandas/issues/3448 -.. _GH3449: https://github.com/pydata/pandas/issues/3449 +.. _GH3499: https://github.com/pydata/pandas/issues/3499 .. _GH3495: https://github.com/pydata/pandas/issues/3495 .. _GH3492: https://github.com/pydata/pandas/issues/3492 .. _GH3493: https://github.com/pydata/pandas/issues/3493 diff --git a/doc/source/v0.11.1.txt b/doc/source/v0.11.1.txt index 4be34cdbf84eb..2e3a67ead65e0 100644 --- a/doc/source/v0.11.1.txt +++ b/doc/source/v0.11.1.txt @@ -1,6 +1,6 @@ -.. _whatsnew_0120: +.. _whatsnew_0111: -v0.12.0 (??) +v0.11.1 (??) ------------------------ This is a major release from 0.11.0 and includes many new features and @@ -12,13 +12,21 @@ API changes Enhancements ~~~~~~~~~~~~ - - pd.read_html() can now parse HTML string, files or urls and return dataframes + - ``pd.read_html()`` can now parse HTML string, files or urls and return dataframes courtesy of @cpcloud. (GH3477_) + - ``HDFStore`` + + - will retain index attributes (freq,tz,name) on recreation (GH3499_) + - will warn with a FrequencyWarning if you are attempting to append + an index with a different frequency than the existing + - support datelike columns with a timezone as data_columns (GH2852_) See the `full release notes `__ or issue tracker on GitHub for a complete list. .. _GH2437: https://github.com/pydata/pandas/issues/2437 +.. _GH2852: https://github.com/pydata/pandas/issues/2852 .. _GH3477: https://github.com/pydata/pandas/issues/3477 .. _GH3492: https://github.com/pydata/pandas/issues/3492 +.. _GH3499: https://github.com/pydata/pandas/issues/3499 diff --git a/doc/source/whatsnew.rst b/doc/source/whatsnew.rst index 81bd39dd0e70f..a02e41176ced1 100644 --- a/doc/source/whatsnew.rst +++ b/doc/source/whatsnew.rst @@ -18,6 +18,8 @@ These are new features and improvements of note in each release. .. include:: v0.12.0.txt +.. include:: v0.11.1.txt + .. include:: v0.11.0.txt .. include:: v0.10.1.txt diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 9c6f5f85eee14..1661080b11799 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -42,6 +42,11 @@ class IncompatibilityWarning(Warning): pass where criteria is being ignored as this version [%s] is too old (or not-defined), read the file in and write it out to a new file to upgrade (with the copy_to method) """ +class FrequencyWarning(Warning): pass +frequency_doc = """ +the frequency of the existing index is [%s] which conflicts with the new freq [%s], +resetting the frequency to None +""" class PerformanceWarning(Warning): pass performance_doc = """ your performance may suffer as PyTables will pickle object types that it cannot map @@ -149,9 +154,12 @@ def get_store(path, mode='a', complevel=None, complib=None, ### interface to/from ### -def to_hdf(path_or_buf, key, value, mode=None, complevel=None, complib=None, **kwargs): +def to_hdf(path_or_buf, key, value, mode=None, complevel=None, complib=None, append=None, **kwargs): """ store this object, close it if we opened it """ - f = lambda store: store.put(key, value, **kwargs) + if append: + f = lambda store: store.append(key, value, **kwargs) + else: + f = lambda store: store.put(key, value, **kwargs) if isinstance(path_or_buf, basestring): with get_store(path_or_buf, mode=mode, complevel=complevel, complib=complib) as store: @@ -941,6 +949,7 @@ class IndexCol(object): is_an_indexable = True is_data_indexable = True is_searchable = False + _info_fields = ['freq','tz','name'] def __init__(self, values=None, kind=None, typ=None, cname=None, itemsize=None, name=None, axis=None, kind_attr=None, pos=None, freq=None, tz=None, @@ -1121,7 +1130,7 @@ def update_info(self, info): """ set/update the info for this indexable with the key/value if validate is True, then raise if an existing value does not match the value """ - for key in ['freq','tz','name']: + for key in self._info_fields: value = getattr(self,key,None) @@ -1132,15 +1141,31 @@ def update_info(self, info): existing_value = idx.get(key) if key in idx and existing_value != value: - raise ValueError("invalid info for [%s] for [%s]""" - ", existing_value [%s] conflicts with new value [%s]" % (self.name, - key,existing_value,value)) - if value is not None or existing_value is not None: - idx[key] = value + # frequency just warn + if key == 'freq': + ws = frequency_doc % (existing_value,value) + warnings.warn(ws, FrequencyWarning) + + # reset + idx[key] = None + + else: + raise ValueError("invalid info for [%s] for [%s]""" + ", existing_value [%s] conflicts with new value [%s]" % (self.name, + key,existing_value,value)) + else: + if value is not None or existing_value is not None: + idx[key] = value return self + def set_info(self, info): + """ set my state from the passed info """ + idx = info.get(self.name) + if idx is not None: + self.__dict__.update(idx) + def get_attr(self): """ set the kind for this colummn """ self.kind = getattr(self.attrs, self.kind_attr, None) @@ -1180,6 +1205,7 @@ class DataCol(IndexCol): is_an_indexable = False is_data_indexable = False is_searchable = False + _info_fields = ['tz'] @classmethod def create_for_block(cls, i=None, name=None, cname=None, version=None, **kwargs): @@ -1249,7 +1275,7 @@ def set_kind(self): if self.typ is None: self.typ = getattr(self.description,self.cname,None) - def set_atom(self, block, existing_col, min_itemsize, nan_rep, **kwargs): + def set_atom(self, block, existing_col, min_itemsize, nan_rep, info, **kwargs): """ create and setup my atom from the block b """ self.values = list(block.items) @@ -1264,10 +1290,27 @@ def set_atom(self, block, existing_col, min_itemsize, nan_rep, **kwargs): "[date] is not implemented as a table column") elif inferred_type == 'datetime': if getattr(rvalues[0],'tzinfo',None) is not None: + + # if this block has more than one timezone, raise + if len(set([r.tzinfo for r in rvalues])) != 1: + raise TypeError( + "too many timezones in this block, create separate data columns") + + # convert this column to datetime64[ns] utc, and save the tz + index = DatetimeIndex(rvalues) + tz = getattr(index,'tz',None) + if tz is None: + raise TypeError( + "invalid timezone specification") + + values = index.tz_convert('UTC').values.view('i8') + self.tz = tz + self.update_info(info) + self.set_atom_datetime64(block, values.reshape(block.values.shape)) + + else: raise TypeError( - "timezone support on datetimes is not yet implemented as a table column") - raise TypeError( - "[datetime] is not implemented as a table column") + "[datetime] is not implemented as a table column") elif inferred_type == 'unicode': raise TypeError( "[unicode] is not implemented as a table column") @@ -1347,10 +1390,12 @@ def set_atom_data(self, block): def get_atom_datetime64(self, block): return _tables().Int64Col(shape=block.shape[0]) - def set_atom_datetime64(self, block): + def set_atom_datetime64(self, block, values = None): self.kind = 'datetime64' self.typ = self.get_atom_datetime64(block) - self.set_data(block.values.view('i8'), 'datetime64') + if values is None: + values = block.values.view('i8') + self.set_data(values, 'datetime64') @property def shape(self): @@ -1389,7 +1434,18 @@ def convert(self, values, nan_rep): # reverse converts if self.dtype == 'datetime64': - self.data = np.asarray(self.data, dtype='M8[ns]') + # recreate the timezone + if self.tz is not None: + + # data should be 2-dim here + # we stored as utc, so just set the tz + + index = DatetimeIndex(self.data.ravel(),tz='UTC').tz_convert(self.tz) + self.data = np.array(index.tolist(),dtype=object).reshape(self.data.shape) + + else: + self.data = np.asarray(self.data, dtype='M8[ns]') + elif self.dtype == 'date': self.data = np.array( [date.fromtimestamp(v) for v in self.data], dtype=object) @@ -2267,17 +2323,8 @@ def indexables(self): d = self.description self._indexables = [] - # info - info = getattr(self.attrs,'info',None) or dict() - # index columns - def create_index(i, axis, name): - kwargs = dict( name=name, axis=axis, pos=i ) - i = info.get(name) - if i is not None and len(i): - kwargs.update(i) - return IndexCol(**kwargs) - self._indexables.extend([ create_index(i,axis,name) for i, (axis, name) in enumerate(self.attrs.index_cols)]) + self._indexables.extend([ IndexCol(name=name,axis=axis,pos=i) for i, (axis, name) in enumerate(self.attrs.index_cols)]) # values columns dc = set(self.data_columns) @@ -2370,6 +2417,7 @@ def read_axes(self, where, **kwargs): # convert the data for a in self.axes: + a.set_info(self.info) a.convert(values, nan_rep=self.nan_rep) return True @@ -2535,6 +2583,7 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None, existing_col=existing_col, min_itemsize=min_itemsize, nan_rep=nan_rep, + info=self.info, **kwargs) col.set_pos(j) @@ -2654,6 +2703,7 @@ def read_column(self, column, where = None, **kwargs): # column must be an indexable or a data column c = getattr(self.table.cols, column) + a.set_info(self.info) return Series(a.convert(c[:], nan_rep=self.nan_rep).take_data()) raise KeyError("column [%s] not found in the table" % column) @@ -3365,6 +3415,8 @@ def convert_value(self, v): if self.kind == 'datetime64' or self.kind == 'datetime' : v = lib.Timestamp(v) + if v.tz is not None: + v = v.tz_convert('UTC') return [v.value, v] elif isinstance(v, datetime) or hasattr(v, 'timetuple') or self.kind == 'date': v = time.mktime(v.timetuple()) diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 6bf52c58ad71b..3daa08a0d591a 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -10,7 +10,9 @@ import pandas from pandas import (Series, DataFrame, Panel, MultiIndex, bdate_range, date_range, Index) -from pandas.io.pytables import HDFStore, get_store, Term, IncompatibilityWarning, PerformanceWarning +from pandas.io.pytables import (HDFStore, get_store, Term, + IncompatibilityWarning, PerformanceWarning, + FrequencyWarning) import pandas.util.testing as tm from pandas.tests.test_series import assert_series_equal from pandas.tests.test_frame import assert_frame_equal @@ -1260,16 +1262,48 @@ def test_unimplemented_dtypes_table_columns(self): self.assertRaises(TypeError, store.append, 'df_unimplemented', df) def test_table_append_with_timezones(self): - # not implemented yet with ensure_clean(self.path) as store: - # check with mixed dtypes - df = DataFrame(dict(A = Timestamp('20130102',tz='US/Eastern')),index=range(5)) - - # timezones not yet supported + def compare(a,b): + tm.assert_frame_equal(a,b) + + # compare the zones on each element + for c in a.columns: + for i in a.index: + a_e = a[c][i] + b_e = b[c][i] + if not (a_e == b_e and a_e.tz == b_e.tz): + raise AssertionError("invalid tz comparsion [%s] [%s]" % (a_e,b_e)) + + from datetime import timedelta + + _maybe_remove(store, 'df_tz') + df = DataFrame(dict(A = [ Timestamp('20130102 2:00:00',tz='US/Eastern') + timedelta(hours=1)*i for i in range(5) ])) + store.append('df_tz',df,data_columns=['A']) + compare(store['df_tz'],df) + + # select with tz aware + compare(store.select('df_tz',where=Term('A','>=',df.A[3])),df[df.A>=df.A[3]]) + + _maybe_remove(store, 'df_tz') + df = DataFrame(dict(A = Timestamp('20130102',tz='US/Eastern'), B = Timestamp('20130103',tz='US/Eastern')),index=range(5)) + store.append('df_tz',df) + compare(store['df_tz'],df) + + _maybe_remove(store, 'df_tz') + df = DataFrame(dict(A = Timestamp('20130102',tz='US/Eastern'), B = Timestamp('20130102',tz='EET')),index=range(5)) self.assertRaises(TypeError, store.append, 'df_tz', df) + # this is ok + _maybe_remove(store, 'df_tz') + store.append('df_tz',df,data_columns=['A','B']) + compare(store['df_tz'],df) + + # can't append with diff timezone + df = DataFrame(dict(A = Timestamp('20130102',tz='US/Eastern'), B = Timestamp('20130102',tz='CET')),index=range(5)) + self.assertRaises(ValueError, store.append, 'df_tz', df) + def test_remove(self): with ensure_clean(self.path) as store: @@ -2049,6 +2083,7 @@ def test_retain_index_attributes(self): index=date_range('2000-1-1',periods=3,freq='H')))) with ensure_clean(self.path) as store: + _maybe_remove(store,'data') store.put('data', df, table=True) result = store.get('data') @@ -2058,6 +2093,34 @@ def test_retain_index_attributes(self): for idx in ['index','columns']: self.assert_(getattr(getattr(df,idx),attr,None) == getattr(getattr(result,idx),attr,None)) + + # try to append a table with a different frequency + warnings.filterwarnings('ignore', category=FrequencyWarning) + df2 = DataFrame(dict(A = Series(xrange(3), + index=date_range('2002-1-1',periods=3,freq='D')))) + store.append('data',df2) + warnings.filterwarnings('always', category=FrequencyWarning) + + self.assert_(store.get_storer('data').info['index']['freq'] is None) + + # this is ok + _maybe_remove(store,'df2') + df2 = DataFrame(dict(A = Series(xrange(3), + index=[Timestamp('20010101'),Timestamp('20010102'),Timestamp('20020101')]))) + store.append('df2',df2) + df3 = DataFrame(dict(A = Series(xrange(3),index=date_range('2002-1-1',periods=3,freq='D')))) + store.append('df2',df3) + + def test_retain_index_attributes2(self): + + with tm.ensure_clean(self.path) as path: + warnings.filterwarnings('ignore', category=FrequencyWarning) + df = DataFrame(dict(A = Series(xrange(3), index=date_range('2000-1-1',periods=3,freq='H')))) + df.to_hdf(path,'data',mode='w',append=True) + df2 = DataFrame(dict(A = Series(xrange(3), index=date_range('2002-1-1',periods=3,freq='D')))) + df2.to_hdf(path,'data',append=True) + warnings.filterwarnings('always', category=FrequencyWarning) + def test_panel_select(self): wp = tm.makePanel()