From 866217640ca7b2feb90d846ceb1afb5ec50f49da Mon Sep 17 00:00:00 2001 From: Chang She Date: Wed, 7 Nov 2012 17:28:00 -0500 Subject: [PATCH 01/52] BUG: DataFrame constructor list of lists with duplicated columns --- pandas/core/frame.py | 1 + pandas/core/internals.py | 3 ++- pandas/tests/test_frame.py | 7 +++++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f39c8b0f0028b..5a000485d85a4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3318,6 +3318,7 @@ def _arith_op(left, right): if this._is_mixed_type or other._is_mixed_type: # XXX no good for duplicate columns + # but cannot outer join in align if dups anyways? result = {} for col in this: result[col] = _arith_op(this[col].values, other[col].values) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 10a85c5592514..76550a0b5fe7f 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1325,7 +1325,8 @@ def _shape_compat(x): names, arrays = zip(*tuples) # index may box values - items = ref_items[ref_items.isin(names)] + names = Index(names) + items = names[names.isin(ref_items)] first = arrays[0] shape = (len(arrays),) + _shape_compat(first) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 5c5fd1902c4cc..d728511c26227 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -2319,6 +2319,13 @@ def test_constructor_column_duplicates(self): [('a',[8]),('a',[5]), ('b', [6])], columns=['b', 'a','a']) + #additional test for #2079 + vals = [[1, -1, 2.], [2, -2, 3.]] + rs = DataFrame(vals, columns=['A', 'A', 'B']) + xp = DataFrame(vals) + xp.columns = ['A', 'A', 'B'] + assert_frame_equal(rs, xp) + def test_new_empty_index(self): df1 = DataFrame(randn(0, 3)) df2 = DataFrame(randn(0, 3)) From 5a152bdfdd044c6f53c76b361065b1784dc73b61 Mon Sep 17 00:00:00 2001 From: Chang She Date: Wed, 7 Nov 2012 18:03:38 -0500 Subject: [PATCH 02/52] Revert 866217640c false alarm --- pandas/core/internals.py | 3 +-- pandas/tests/test_frame.py | 7 ------- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 76550a0b5fe7f..10a85c5592514 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1325,8 +1325,7 @@ def _shape_compat(x): names, arrays = zip(*tuples) # index may box values - names = Index(names) - items = names[names.isin(ref_items)] + items = ref_items[ref_items.isin(names)] first = arrays[0] shape = (len(arrays),) + _shape_compat(first) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index d728511c26227..5c5fd1902c4cc 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -2319,13 +2319,6 @@ def test_constructor_column_duplicates(self): [('a',[8]),('a',[5]), ('b', [6])], columns=['b', 'a','a']) - #additional test for #2079 - vals = [[1, -1, 2.], [2, -2, 3.]] - rs = DataFrame(vals, columns=['A', 'A', 'B']) - xp = DataFrame(vals) - xp.columns = ['A', 'A', 'B'] - assert_frame_equal(rs, xp) - def test_new_empty_index(self): df1 = DataFrame(randn(0, 3)) df2 = DataFrame(randn(0, 3)) From 4d8aa2b16b9b6f9597b7f74fbf2260a6beb0de2c Mon Sep 17 00:00:00 2001 From: "K.-Michael Aye" Date: Wed, 7 Nov 2012 20:29:03 -0800 Subject: [PATCH 03/52] Updating help text for plot_series description for parameter 'kind' was given twice. --- pandas/tools/plotting.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index 00724a2dc35a0..98cf676c60a4d 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -1301,7 +1301,9 @@ def plot_series(series, label=None, kind='line', use_index=True, rot=None, Parameters ---------- label : label argument to provide to plot - kind : {'line', 'bar'} + kind : {'line', 'bar', 'barh'} + bar : vertical bar plot + barh : horizontal bar plot rot : int, default 30 Rotation for tick labels use_index : boolean, default True @@ -1312,9 +1314,6 @@ def plot_series(series, label=None, kind='line', use_index=True, rot=None, matplotlib line style to use ax : matplotlib axis object If not passed, uses gca() - kind : {'line', 'bar', 'barh'} - bar : vertical bar plot - barh : horizontal bar plot logy : boolean, default False For line plots, use log scaling on y axis xticks : sequence From 6b13318f12edbabf13f43b372f3a608bddb0d5e0 Mon Sep 17 00:00:00 2001 From: y-p Date: Thu, 8 Nov 2012 20:55:36 +0200 Subject: [PATCH 04/52] TST: printing df.columns should not raise exception when labels are unicode --- pandas/tests/test_index.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index c1d0894f9bfef..2b83e7108a094 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -847,6 +847,10 @@ def test_int_name_format(self): repr(s) repr(df) + def test_print_unicode_columns(self): + df=pd.DataFrame({u"\u05d0":[1,2,3],"\u05d1":[4,5,6],"c":[7,8,9]}) + print(df.columns) # should not raise UnicodeDecodeError + class TestMultiIndex(unittest.TestCase): def setUp(self): From 9cf39ac8e274c9351ea58639beb28be444a8a4a2 Mon Sep 17 00:00:00 2001 From: Chang She Date: Thu, 8 Nov 2012 13:55:12 -0500 Subject: [PATCH 05/52] DOC: build docs for previous releases --- doc/make.py | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/doc/make.py b/doc/make.py index bc4f4a2f03598..5affd4b2414ed 100755 --- a/doc/make.py +++ b/doc/make.py @@ -49,6 +49,33 @@ def upload_stable_pdf(): ':/usr/share/nginx/pandas/pandas-docs/stable/'): raise SystemExit('PDF upload to stable failed') +def upload_prev(ver, doc_root='./'): + 'push a copy of older release to appropriate version directory' + local_dir = doc_root + 'build/html' + remote_dir = '/usr/share/nginx/pandas/pandas-docs/version/%s/' % ver + cmd = 'cd %s; rsync -avz . pandas@pandas.pydata.org:%s -essh' + cmd = cmd % (local_dir, remote_dir) + print cmd + if os.system(cmd): + raise SystemExit('Upload to %s from %s failed' % (remote_dir, local_dir)) + + local_dir = doc_root + 'build/latex' + pdf_cmd = 'cd %s; scp pandas.pdf pandas@pandas.pydata.org:%s' + pdf_cmd = pdf_cmd % (local_dir, remote_dir) + if os.system(pdf_cmd): + raise SystemExit('Upload PDF to %s from %s failed' % (ver, doc_root)) + +def build_prev(ver): + if os.system('git checkout v%s' % ver) != 1: + os.chdir('..') + os.system('python setup.py clean') + os.system('python setup.py build_ext --inplace') + os.chdir('doc') + os.system('python make.py clean') + os.system('python make.py html') + os.system('python make.py latex') + os.system('git checkout master') + def clean(): if os.path.exists('build'): shutil.rmtree('build') @@ -201,7 +228,15 @@ def _get_config(): # current_dir = os.getcwd() # os.chdir(os.path.dirname(os.path.join(current_dir, __file__))) -if len(sys.argv)>1: +if len(sys.argv) > 2: + ftype = sys.argv[1] + ver = sys.argv[2] + + if ftype == 'build_previous': + build_prev(ver) + if ftype == 'upload_previous': + upload_prev(ver) +elif len(sys.argv) > 1: for arg in sys.argv[1:]: func = funcd.get(arg) if func is None: From 07921941ca9b2bce86eb91c683dfae3c3665b59a Mon Sep 17 00:00:00 2001 From: y-p Date: Thu, 8 Nov 2012 23:33:40 +0200 Subject: [PATCH 06/52] BUG: printing df.columns should not raise exception when labels are unicode --- pandas/core/format.py | 3 ++- pandas/core/index.py | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/format.py b/pandas/core/format.py index aae911ba807ef..4505e6153a9a3 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -268,7 +268,8 @@ def to_string(self, force_unicode=False): if len(frame.columns) == 0 or len(frame.index) == 0: info_line = (u'Empty %s\nColumns: %s\nIndex: %s' % (type(self.frame).__name__, - frame.columns, frame.index)) + com.pprint_thing(frame.columns), + com.pprint_thing(frame.index))) text = info_line else: strcols = self._to_str_columns(force_unicode) diff --git a/pandas/core/index.py b/pandas/core/index.py index 1ba78c698a1b5..65b60941fbbfc 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -209,9 +209,10 @@ def __str__(self): try: return np.array_repr(self.values) except UnicodeError: - converted = u','.join(unicode(x) for x in self.values) - return u'%s([%s], dtype=''%s'')' % (type(self).__name__, converted, + converted = u','.join(com.pprint_thing(x) for x in self.values) + result = u'%s([%s], dtype=''%s'')' % (type(self).__name__, converted, str(self.values.dtype)) + return com.console_encode(result) def _mpl_repr(self): # how to represent ourselves to matplotlib From 0230bbf2388acabd751f3e67e5686e918cf18d0a Mon Sep 17 00:00:00 2001 From: Chang She Date: Thu, 8 Nov 2012 17:54:23 -0500 Subject: [PATCH 07/52] BUG: allow full color name as string when plotting single column frame --- pandas/tests/test_graphics.py | 9 +++++++++ pandas/tools/plotting.py | 25 +++++++++++++++++-------- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py index 4ea1fb0271aff..5050203424436 100644 --- a/pandas/tests/test_graphics.py +++ b/pandas/tests/test_graphics.py @@ -87,6 +87,9 @@ def test_bar_colors(self): rs = rect.get_facecolor() self.assert_(xp == rs) + plt.close('all') + df.ix[:, [0]].plot(kind='bar', color='DodgerBlue') + @slow def test_bar_linewidth(self): df = DataFrame(np.random.randn(5, 5)) @@ -493,6 +496,12 @@ def test_line_colors(self): finally: sys.stderr = tmp + # make color a list if plotting one column frame + # handles cases like df.plot(color='DodgerBlue') + plt.close('all') + df.ix[:, [0]].plot(color='DodgerBlue') + + class TestDataFrameGroupByPlots(unittest.TestCase): @classmethod diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index 98cf676c60a4d..d2ab5f3038b00 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -553,6 +553,23 @@ def __init__(self, data, kind=None, by=None, subplots=False, sharex=True, self.kwds = kwds + self._validate_color_args() + + def _validate_color_args(self): + from pandas import DataFrame + if 'color' not in self.kwds and 'colors' in self.kwds: + warnings.warn(("'colors' is being deprecated. Please use 'color'" + "instead of 'colors'")) + colors = self.kwds.pop('colors') + self.kwds['color'] = colors + + if ('color' in self.kwds and + (isinstance(self.data, Series) or + isinstance(self.data, DataFrame) and len(self.data.columns) ==1 )): + #support series.plot(color='green') + self.kwds['color'] = [self.kwds['color']] + + def _iter_data(self): from pandas.core.frame import DataFrame if isinstance(self.data, (Series, np.ndarray)): @@ -858,14 +875,6 @@ class LinePlot(MPLPlot): def __init__(self, data, **kwargs): self.mark_right = kwargs.pop('mark_right', True) MPLPlot.__init__(self, data, **kwargs) - if 'color' not in self.kwds and 'colors' in self.kwds: - warnings.warn(("'colors' is being deprecated. Please use 'color'" - "instead of 'colors'")) - colors = self.kwds.pop('colors') - self.kwds['color'] = colors - if 'color' in self.kwds and isinstance(self.data, Series): - #support series.plot(color='green') - self.kwds['color'] = [self.kwds['color']] def _index_freq(self): from pandas.core.frame import DataFrame From 83c377ee763f1173eb4c7d9fe38a27d0ec623c80 Mon Sep 17 00:00:00 2001 From: y-p Date: Fri, 9 Nov 2012 01:33:26 +0200 Subject: [PATCH 08/52] BUG: pprint_thing() should not realize lazy things The semantics of str()/unicode() do not realize iterators, and neither should pprint_thing. --- pandas/core/common.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/common.py b/pandas/core/common.py index 7bbbaab49e864..46c28e8af52ac 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -1140,6 +1140,9 @@ def pprint_thing(thing, _nest_lvl=0): from pandas.core.format import print_config if thing is None: result = '' + elif (py3compat.PY3 and hasattr(thing,'__next__')) or \ + hasattr(thing,'next'): + return unicode(thing) elif (isinstance(thing, dict) and _nest_lvl < print_config.pprint_nest_depth): result = _pprint_dict(thing, _nest_lvl) From f6ca3e3fe11b0c5679ab2a3bb8764280ac084651 Mon Sep 17 00:00:00 2001 From: y-p Date: Thu, 8 Nov 2012 21:08:30 +0200 Subject: [PATCH 09/52] TST: df.ix[:,unicode] should not die with UnicodeEncodeError --- pandas/tests/test_internals.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index 57ccfff23e5de..e9c0b2ae980d6 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -408,6 +408,13 @@ def test_get_numeric_data(self): self.assertEqual(rs.ix[0, 'bool'], not df.ix[0, 'bool']) + def test_missing_unicode_key(self): + df=DataFrame({"a":[1]}) + try: + df.ix[:,u"\u05d0"] # should not raise UnicodeEncodeError + except KeyError: + pass # this is the expected exception + if __name__ == '__main__': # unittest.main() import nose From 51877c8ffb9625e4132e332e2ff943ac787e00ad Mon Sep 17 00:00:00 2001 From: y-p Date: Thu, 8 Nov 2012 21:09:35 +0200 Subject: [PATCH 10/52] BUG: df.ix[:,unicode] should not die with UnicodeEncodeError --- pandas/core/internals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 10a85c5592514..24e88fd43ab0b 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -935,7 +935,7 @@ def _find_block(self, item): def _check_have(self, item): if item not in self.items: - raise KeyError('no item named %s' % str(item)) + raise KeyError('no item named %s' % com.pprint_thing(item)) def reindex_axis(self, new_axis, method=None, axis=0, copy=True): new_axis = _ensure_index(new_axis) From 209fb0b67a1b18c31735e42751529d6d52e729ac Mon Sep 17 00:00:00 2001 From: y-p Date: Thu, 8 Nov 2012 21:28:24 +0200 Subject: [PATCH 11/52] TST: MultiIndex repr should properly encode unicode labels --- pandas/tests/test_index.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 2b83e7108a094..b94840d0dfd85 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -1675,6 +1675,10 @@ def test_tolist(self): exp = list(self.index.values) self.assertEqual(result, exp) + def test_repr_with_unicode_data(self): + d={"a":[u"\u05d0",2,3],"b":[4,5,6],"c":[7,8,9]} + index=pd.DataFrame(d).set_index(["a","b"]).index + self.assertFalse("\\u" in repr(index)) # we don't want unicode-escaped def test_get_combined_index(): from pandas.core.index import _get_combined_index From 46ef8a606456044ecf048b2374defa047fcc9cdb Mon Sep 17 00:00:00 2001 From: y-p Date: Fri, 9 Nov 2012 01:33:13 +0200 Subject: [PATCH 12/52] BUG: MultiIndex repr should properly encode unicode labels --- pandas/core/index.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/index.py b/pandas/core/index.py index 65b60941fbbfc..291502c406018 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -1321,11 +1321,15 @@ def __repr__(self): self[-50:].values]) else: values = self.values - summary = np.array2string(values, max_line_width=70) + + summary = com.pprint_thing(values) np.set_printoptions(threshold=options['threshold']) - return output % summary + if py3compat.PY3: + return output % summary + else: + return com.console_encode(output % summary) def __len__(self): return len(self.labels[0]) From 4cc097080da0b3197a10605d013e8f38eba5af71 Mon Sep 17 00:00:00 2001 From: y-p Date: Thu, 8 Nov 2012 22:55:33 +0200 Subject: [PATCH 13/52] TST: nested exceptions clobber the exception context, must reraise with named arg --- pandas/tests/test_frame.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 5c5fd1902c4cc..57799c6455fee 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -1295,6 +1295,21 @@ def test_iget_value(self): expected = self.frame.get_value(row, col) assert_almost_equal(result, expected) + def test_nested_exception(self): + # Ignore the strange way of triggering the problem + # (which may get fixed), it's just a way to trigger + # the issue or reraising an outer exception without + # a named argument + df=DataFrame({"a":[1,2,3],"b":[4,5,6],"c":[7,8,9]}).set_index(["a","b"]) + l=list(df.index) + l[0]=["a","b"] + df.index=l + + try: + print df + except Exception,e: + self.assertNotEqual(type(e),UnboundLocalError) + _seriesd = tm.getSeriesData() _tsd = tm.getTimeSeriesData() From f2db4c170850741314df93b82b2395011eaa6453 Mon Sep 17 00:00:00 2001 From: y-p Date: Thu, 8 Nov 2012 22:20:02 +0200 Subject: [PATCH 14/52] BUG: nested exceptions clobber the exception context, must reraise with named arg --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5a000485d85a4..4889285d5879c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3919,7 +3919,7 @@ def _apply_standard(self, func, axis, ignore_failures=False): except (NameError, UnboundLocalError): # pragma: no cover # no k defined yet pass - raise + raise e if len(results) > 0 and _is_sequence(results[0]): if not isinstance(results[0], Series): From 2d755914b18980c676bfe023c8cb6efe4ee5cc20 Mon Sep 17 00:00:00 2001 From: y-p Date: Fri, 9 Nov 2012 00:48:37 +0200 Subject: [PATCH 15/52] BUG: use pprint_thing() rather then str() --- pandas/core/frame.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4889285d5879c..2c3bc9a31c9b6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3915,7 +3915,8 @@ def _apply_standard(self, func, axis, ignore_failures=False): try: if hasattr(e, 'args'): k = res_index[i] - e.args = e.args + ('occurred at index %s' % str(k),) + e.args = e.args + ('occurred at index %s' % + com.pprint_thing(k),) except (NameError, UnboundLocalError): # pragma: no cover # no k defined yet pass From acfa4aba4be4d4b81656484664645e9708a4c2bf Mon Sep 17 00:00:00 2001 From: y-p Date: Fri, 9 Nov 2012 00:49:59 +0200 Subject: [PATCH 16/52] BUG: use pprint_thing() rather then str() in Block.repr() --- pandas/core/internals.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 24e88fd43ab0b..cd1ca8838d65d 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -49,9 +49,10 @@ def set_ref_items(self, ref_items, maybe_rename=True): self.ref_items = ref_items def __repr__(self): - shape = ' x '.join([str(s) for s in self.shape]) + shape = ' x '.join([com.pprint_thing(s) for s in self.shape]) name = type(self).__name__ - return '%s: %s, %s, dtype %s' % (name, self.items, shape, self.dtype) + result = '%s: %s, %s, dtype %s' % (name, self.items, shape, self.dtype) + return com.console_encode(result) # repr must return byte-string def __contains__(self, item): return item in self.items From 2a22b5b277535716b57cd4bdc8f8e871bb118a66 Mon Sep 17 00:00:00 2001 From: Chang She Date: Fri, 9 Nov 2012 11:07:49 -0500 Subject: [PATCH 17/52] BUG: remove inlining for some functions to maintain clang compatibility #2188 --- pandas/src/numpy_helper.h | 18 +++++------------- pandas/src/util.pxd | 2 +- 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/pandas/src/numpy_helper.h b/pandas/src/numpy_helper.h index 0678e2c52d2f0..50c3ffa80dd63 100644 --- a/pandas/src/numpy_helper.h +++ b/pandas/src/numpy_helper.h @@ -136,8 +136,7 @@ char_to_string(char* data) { #include #include -PANDAS_INLINE double -xstrtod(const char *p, char **q, char decimal, char sci, int skip_trailing); +double xstrtod(const char *p, char **q, char decimal, char sci, int skip_trailing); int to_double(char *item, double *p_value, char sci, char decimal) { @@ -153,7 +152,7 @@ int to_double(char *item, double *p_value, char sci, char decimal) #define PyBytes_AS_STRING PyString_AS_STRING #endif -PANDAS_INLINE int floatify(PyObject* str, double *result) { +int floatify(PyObject* str, double *result) { int status; char *data; PyObject* tmp = NULL; @@ -242,17 +241,11 @@ PANDAS_INLINE int floatify(PyObject* str, double *result) { // * Commented out the other functions. // -PANDAS_INLINE void lowercase(char *p) { - for ( ; *p; ++p) *p = tolower(*p); -} - -PANDAS_INLINE void uppercase(char *p) { - for ( ; *p; ++p) *p = toupper(*p); -} -PANDAS_INLINE double xstrtod(const char *str, char **endptr, char decimal, - char sci, int skip_trailing) +double +xstrtod(const char *str, char **endptr, char decimal, + char sci, int skip_trailing) { double number; int exponent; @@ -401,4 +394,3 @@ void set_array_owndata(PyArrayObject *ao) { // } // return ap; // } - diff --git a/pandas/src/util.pxd b/pandas/src/util.pxd index 6f24cd53b7275..7a6757da7ad73 100644 --- a/pandas/src/util.pxd +++ b/pandas/src/util.pxd @@ -14,8 +14,8 @@ cdef extern from "numpy_helper.h": inline int assign_value_1d(ndarray, Py_ssize_t, object) except -1 inline cnp.int64_t get_nat() inline object get_value_1d(ndarray, Py_ssize_t) + inline int floatify(object, double*) except -1 inline char *get_c_string(object) - inline int floatify(object, double *result) except -1 inline object char_to_string(char*) cdef inline object get_value_at(ndarray arr, object loc): From cb1133f9449e9376f5d29a61964739ae753ab5ac Mon Sep 17 00:00:00 2001 From: Chang She Date: Thu, 8 Nov 2012 10:48:41 -0500 Subject: [PATCH 18/52] BUG: join_non_unique doesn't sort properly for DatetimeIndex #2196 --- pandas/core/common.py | 7 +++++++ pandas/tools/merge.py | 2 +- pandas/tseries/tests/test_timeseries.py | 8 ++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index 7bbbaab49e864..60a0c30a49d78 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -852,6 +852,13 @@ def is_integer_dtype(arr_or_dtype): (issubclass(tipo, np.datetime64) or issubclass(tipo, np.timedelta64))) +def _is_int_or_datetime_dtype(arr_or_dtype): + # also timedelta64 + if isinstance(arr_or_dtype, np.dtype): + tipo = arr_or_dtype.type + else: + tipo = arr_or_dtype.dtype.type + return issubclass(tipo, np.integer) def is_datetime64_dtype(arr_or_dtype): if isinstance(arr_or_dtype, np.dtype): diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index d92ed1cb01c42..62529201b287c 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -555,7 +555,7 @@ def _right_outer_join(x, y, max_groups): def _factorize_keys(lk, rk, sort=True): - if com.is_integer_dtype(lk) and com.is_integer_dtype(rk): + if com._is_int_or_datetime_dtype(lk) and com._is_int_or_datetime_dtype(rk): klass = lib.Int64Factorizer lk = com._ensure_int64(lk) rk = com._ensure_int64(rk) diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index eabacc2222ebf..daaa86f681ee1 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -1645,6 +1645,14 @@ def _check_join(left, right, how='inner'): _check_join(index[:15], obj_index[5:], how='right') _check_join(index[:15], obj_index[5:], how='left') + def test_join_nonunique(self): + idx1 = to_datetime(['2012-11-06 16:00:11.477563', + '2012-11-06 16:00:11.477563']) + idx2 = to_datetime(['2012-11-06 15:11:09.006507', + '2012-11-06 15:11:09.006507']) + rs = idx1.join(idx2, how='outer') + self.assert_(rs.is_monotonic) + def test_unpickle_daterange(self): pth, _ = os.path.split(os.path.abspath(__file__)) filepath = os.path.join(pth, 'data', 'daterange_073.pickle') From 68bde2528825015a4a21a807b52869d1fa1ffb43 Mon Sep 17 00:00:00 2001 From: timmie Date: Fri, 9 Nov 2012 13:37:43 +0100 Subject: [PATCH 19/52] set link target for faq site --- doc/source/faq.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/faq.rst b/doc/source/faq.rst index 0f676ba6066de..2a3620f8ae50c 100644 --- a/doc/source/faq.rst +++ b/doc/source/faq.rst @@ -21,6 +21,7 @@ Frequently Asked Questions (FAQ) import matplotlib.pyplot as plt plt.close('all') +.. _ref-scikits-migration: Migrating from scikits.timeseries to pandas >= 0.8.0 ---------------------------------------------------- From d5d31f19fa3831cce263ad804c4718f0052900d2 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Fri, 9 Nov 2012 12:00:25 -0500 Subject: [PATCH 20/52] BUG: don't coerce False/True to 0/1 in engines. close #2199 --- RELEASE.rst | 3 ++- pandas/core/frame.py | 4 +--- pandas/src/engines.pyx | 10 ++++++++-- pandas/tests/test_frame.py | 10 ++++++++++ 4 files changed, 21 insertions(+), 6 deletions(-) diff --git a/RELEASE.rst b/RELEASE.rst index 2bc47c5d0fdf9..28f8e7ea0ae47 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -92,7 +92,8 @@ pandas 0.9.1 - Fix Series and DataFrame.diff for integer dtypes (#2087, #2174) - Fix bug when taking intersection of DatetimeIndex with empty index (#2129) - Pass through timezone information when calling DataFrame.align (#2127) - + - Properly sort when joining on datetime64 values (#2196) + - Fix indexing bug in which False/True were being coerced to 0/1 (#2199) pandas 0.9.0 ============ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5a000485d85a4..7ca84e3bad7a4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2099,12 +2099,10 @@ def xs(self, key, axis=0, level=None, copy=True): new_values = self._data.fast_2d_xs(loc, copy=copy) return Series(new_values, index=self.columns, name=self.index[loc]) - else: # isinstance(loc, slice) or loc.dtype == np.bool_: + else: result = self[loc] result.index = new_index return result - # else: - # return self.take(loc) def lookup(self, row_labels, col_labels): """ diff --git a/pandas/src/engines.pyx b/pandas/src/engines.pyx index a564cbafae7ab..9f2ec4edbd1fc 100644 --- a/pandas/src/engines.pyx +++ b/pandas/src/engines.pyx @@ -127,10 +127,11 @@ cdef class IndexEngine: if not self.unique: return self._get_loc_duplicates(val) + self._check_type(val) + try: return self.mapping.get_item(val) except TypeError: - self._check_type(val) raise KeyError(val) cdef inline _get_loc_duplicates(self, object val): @@ -222,7 +223,7 @@ cdef class IndexEngine: cdef _make_hash_table(self, n): raise NotImplementedError - cdef inline _check_type(self, object val): + cdef _check_type(self, object val): hash(val) cdef inline _ensure_mapping_populated(self): @@ -280,6 +281,11 @@ cdef class Int64Engine(IndexEngine): return _algos.backfill_int64(self._get_index_values(), other, limit=limit) + cdef _check_type(self, object val): + hash(val) + if util.is_bool_object(val): + raise KeyError(val) + cdef _maybe_get_bool_indexer(self, object val): cdef: ndarray[uint8_t, cast=True] indexer diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 5c5fd1902c4cc..43bd71c9c2802 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -1108,6 +1108,16 @@ def test_getitem_ix_boolean_duplicates_multiple(self): exp = df[df[0] > 0] assert_frame_equal(result, exp) + def test_getitem_setitem_ix_bool_keyerror(self): + # #2199 + df = DataFrame({'a': [1, 2, 3]}) + + self.assertRaises(KeyError, df.ix.__getitem__, False) + self.assertRaises(KeyError, df.ix.__getitem__, True) + + self.assertRaises(KeyError, df.ix.__setitem__, False, 0) + self.assertRaises(KeyError, df.ix.__setitem__, True, 0) + def test_getitem_list_duplicates(self): # #1943 df = DataFrame(np.random.randn(4,4), columns=list('AABC')) From 02b6f7960ed655ca71bcdbd009125346806314ec Mon Sep 17 00:00:00 2001 From: timmie Date: Fri, 9 Nov 2012 13:37:12 +0100 Subject: [PATCH 21/52] link the info on scikits.timeseries link to the FAQ, devel pages and issue list. --- doc/source/related.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/doc/source/related.rst b/doc/source/related.rst index e613d34c2a29f..33dad8115e5b1 100644 --- a/doc/source/related.rst +++ b/doc/source/related.rst @@ -45,3 +45,13 @@ customizable by the user (so 5-minutely data is easier to do with pandas for example). We are aiming to merge these libraries together in the near future. + +Progress: + + - It has a collection of moving window statistics implemented in + `Bottleneck `__ + - `Outstanding issues `__ + +Summarising, Pandas offers superior functionality due to its combination with the :py:class:`pandas.DataFrame`. + +An introduction for former users of :mod:`scikits.timeseries` is provided in the :ref:`migration guide `. \ No newline at end of file From 27e34a4daab82e51312714794db78d2b860327f8 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Fri, 9 Nov 2012 12:40:00 -0500 Subject: [PATCH 22/52] BUG: improper MultiIndex conversion issue. close #2200 --- RELEASE.rst | 3 +++ pandas/core/index.py | 9 ++++++++- pandas/core/internals.py | 4 +++- pandas/tests/test_multilevel.py | 16 ++++++++++++++++ 4 files changed, 30 insertions(+), 2 deletions(-) diff --git a/RELEASE.rst b/RELEASE.rst index 28f8e7ea0ae47..bd3af90cf68f1 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -94,6 +94,9 @@ pandas 0.9.1 - Pass through timezone information when calling DataFrame.align (#2127) - Properly sort when joining on datetime64 values (#2196) - Fix indexing bug in which False/True were being coerced to 0/1 (#2199) + - Many unicode formatting fixes (#2201) + - Fix improper MultiIndex conversion issue when assigning + e.g. DataFrame.index (#2200) pandas 0.9.0 ============ diff --git a/pandas/core/index.py b/pandas/core/index.py index 291502c406018..1e4d6347aaeec 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -2438,8 +2438,15 @@ def _ensure_index(index_like): return Index(index_like, name=index_like.name) if isinstance(index_like, list): - if len(index_like) and isinstance(index_like[0], (list, np.ndarray)): + klasses = (list, np.ndarray) + all_arrays = all(isinstance(x, klasses) for x in index_like) + + if len(index_like) > 0 and all_arrays: return MultiIndex.from_arrays(index_like) + else: + # #2200 ? + index_like = [tuple(x) if isinstance(x, klasses) else x + for x in index_like] return Index(index_like) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index cd1ca8838d65d..ddbf4b85fcb68 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -506,10 +506,12 @@ def is_mixed_dtype(self): def set_axis(self, axis, value): cur_axis = self.axes[axis] + value = _ensure_index(value) + if len(value) != len(cur_axis): raise Exception('Length mismatch (%d vs %d)' % (len(value), len(cur_axis))) - self.axes[axis] = _ensure_index(value) + self.axes[axis] = value if axis == 0: for block in self.blocks: diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 9a279990e9939..d2502ac1844c7 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -1578,6 +1578,22 @@ def test_multiindex_na_repr(self): idf = df3.set_index(['A' * 30, 'C' * 30]) repr(idf) + def test_assign_index_sequences(self): + # #2200 + df = DataFrame({"a":[1,2,3], + "b":[4,5,6], + "c":[7,8,9]}).set_index(["a","b"]) + l = list(df.index) + l[0]=("faz","boo") + df.index = l + repr(df) + + # this travels an improper code path + l[0] = ["faz","boo"] + df.index = l + repr(df) + + if __name__ == '__main__': # unittest.main() From 6b0939009c2d469fbdd785d861447a70edfb0ac9 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Fri, 9 Nov 2012 13:05:32 -0500 Subject: [PATCH 23/52] ENH: add pow/rpow to DataFrame. close #2190 --- RELEASE.rst | 1 + pandas/core/frame.py | 2 ++ pandas/tests/test_frame.py | 30 ++++++++++++++++++++---------- 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/RELEASE.rst b/RELEASE.rst index bd3af90cf68f1..fb5b1173fd512 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -33,6 +33,7 @@ pandas 0.9.1 - New `top` and `bottom` options for handling NAs in rank (#1508, #2159) - Add `where` and `mask` functions to DataFrame (#2109, #2151) - Add `at_time` and `between_time` functions to DataFrame (#2149) + - Add flexible `pow` and `rpow` methods to DataFrame (#2190) **API Changes** diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9f36de1fcc3c7..31c1a09f409c3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -676,11 +676,13 @@ def __contains__(self, key): mul = _arith_method(operator.mul, 'multiply') sub = _arith_method(operator.sub, 'subtract') div = divide = _arith_method(lambda x, y: x / y, 'divide') + pow = _arith_method(operator.pow, 'pow') radd = _arith_method(_radd_compat, 'radd') rmul = _arith_method(operator.mul, 'rmultiply') rsub = _arith_method(lambda x, y: y - x, 'rsubtract') rdiv = _arith_method(lambda x, y: y / x, 'rdivide') + rpow = _arith_method(lambda x, y: y ** x, 'rpow') __add__ = _arith_method(operator.add, '__add__', default_axis=None) __sub__ = _arith_method(operator.sub, '__sub__', default_axis=None) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 8af7e991d9f66..0b36e8d39a00a 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -1316,7 +1316,7 @@ def test_nested_exception(self): df.index=l try: - print df + repr(df) except Exception,e: self.assertNotEqual(type(e),UnboundLocalError) @@ -3074,15 +3074,25 @@ def test_first_last_valid(self): self.assert_(index == frame.index[-6]) def test_arith_flex_frame(self): - res_add = self.frame.add(self.frame) - res_sub = self.frame.sub(self.frame) - res_mul = self.frame.mul(self.frame) - res_div = self.frame.div(2 * self.frame) - - assert_frame_equal(res_add, self.frame + self.frame) - assert_frame_equal(res_sub, self.frame - self.frame) - assert_frame_equal(res_mul, self.frame * self.frame) - assert_frame_equal(res_div, self.frame / (2 * self.frame)) + ops = ['add', 'sub', 'mul', 'div', 'pow'] + aliases = {'div': 'truediv'} + + for op in ops: + alias = aliases.get(op, op) + f = getattr(operator, alias) + result = getattr(self.frame, op)(2 * self.frame) + exp = f(self.frame, 2 * self.frame) + assert_frame_equal(result, exp) + + # res_add = self.frame.add(self.frame) + # res_sub = self.frame.sub(self.frame) + # res_mul = self.frame.mul(self.frame) + # res_div = self.frame.div(2 * self.frame) + + # assert_frame_equal(res_add, self.frame + self.frame) + # assert_frame_equal(res_sub, self.frame - self.frame) + # assert_frame_equal(res_mul, self.frame * self.frame) + # assert_frame_equal(res_div, self.frame / (2 * self.frame)) const_add = self.frame.add(1) assert_frame_equal(const_add, self.frame + 1) From 5d5bda6a406c6e25aaaec14ad79ca93b24ecc42e Mon Sep 17 00:00:00 2001 From: y-p Date: Fri, 9 Nov 2012 19:45:08 +0200 Subject: [PATCH 24/52] ENH: eliminate _str() in favor of pprint_thing --- pandas/core/format.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/pandas/core/format.py b/pandas/core/format.py index 4505e6153a9a3..d87d2006785db 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -406,13 +406,6 @@ def _get_column_name_list(self): names.append('' if columns.name is None else columns.name) return names - -def _str(x): - if not isinstance(x, basestring): - return str(x) - return x - - class HTMLFormatter(object): indent_delta = 2 @@ -437,7 +430,7 @@ def _maybe_bold_row(x): self._maybe_bold_row = _maybe_bold_row def write(self, s, indent=0): - self.elements.append(' ' * indent + _str(s)) + self.elements.append(' ' * indent + com.pprint_thing(s)) def write_th(self, s, indent=0, tags=None): return self._write_cell(s, kind='th', indent=indent, tags=tags) @@ -450,7 +443,7 @@ def _write_cell(self, s, kind='td', indent=0, tags=None): start_tag = '<%s %s>' % (kind, tags) else: start_tag = '<%s>' % kind - self.write('%s%s' % (start_tag, _str(s), kind), indent) + self.write('%s%s' % (start_tag, com.pprint_thing(s), kind), indent) def write_tr(self, line, indent=0, indent_delta=4, header=False, align=None, tags=None): From 8d45dc4b766629a99d5c766448c433bc2c067b8e Mon Sep 17 00:00:00 2001 From: Chang She Date: Fri, 9 Nov 2012 14:39:25 -0500 Subject: [PATCH 25/52] ENH: let users disable autoconversion to PeriodIndex in plotting so an externally setup twinx can work with irregular + regular freq timeseries #2205 --- RELEASE.rst | 1 + doc/source/v0.9.1.txt | 5 ++++- pandas/__init__.py | 2 +- pandas/tests/test_graphics.py | 22 ++++++++++++++++++ pandas/tools/plotting.py | 42 ++++++++++++++++++++++++++++++++++- 5 files changed, 69 insertions(+), 3 deletions(-) diff --git a/RELEASE.rst b/RELEASE.rst index fb5b1173fd512..a64cfdc44c51f 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -56,6 +56,7 @@ pandas 0.9.1 - Make .drop(...) work with non-unique indexes (#2101) - Improve performance of Series/DataFrame.diff (re: #2087) - Support unary ~ (__invert__) in DataFrame (#2110) + - Turn off pandas-style tick locators and formatters (#2205) **Bug fixes** diff --git a/doc/source/v0.9.1.txt b/doc/source/v0.9.1.txt index 13ad23eecb017..6cf6bb0ed7274 100644 --- a/doc/source/v0.9.1.txt +++ b/doc/source/v0.9.1.txt @@ -60,6 +60,9 @@ New features parse_cols='A:D') + - Added option to disable pandas-style tick locators and formatters + using `series.plot(x_compat=True)` or `pandas.plot_params['x_compat'] = + True` (GH2205_) - Existing TimeSeries methods `at_time` and `between_time` were added to DataFrame (GH2149_) - DataFrame.dot can now accept ndarrays (GH2042_) @@ -122,6 +125,7 @@ on GitHub for a complete list. .. _GH2124: https://github.com/pydata/pandas/issues/2124 .. _GH2110: https://github.com/pydata/pandas/issues/2110 .. _GH2184: https://github.com/pydata/pandas/issues/2184 +.. _GH2205: https://github.com/pydata/pandas/issues/2205 .. _GH2181: https://github.com/pydata/pandas/issues/2181 .. _GH2180: https://github.com/pydata/pandas/issues/2180 @@ -197,4 +201,3 @@ on GitHub for a complete list. .. _GH1959: https://github.com/pydata/pandas/issues/1959 .. _GH1890: https://github.com/pydata/pandas/issues/1890 .. _GH1555: https://github.com/pydata/pandas/issues/1555 - diff --git a/pandas/__init__.py b/pandas/__init__.py index 1119f72b5d009..3760e3fbc434b 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -36,5 +36,5 @@ from pandas.tools.describe import value_range from pandas.tools.merge import merge, concat, ordered_merge from pandas.tools.pivot import pivot_table, crosstab -from pandas.tools.plotting import scatter_matrix +from pandas.tools.plotting import scatter_matrix, plot_params from pandas.tools.tile import cut, qcut diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py index 5050203424436..5d536dcbb3f79 100644 --- a/pandas/tests/test_graphics.py +++ b/pandas/tests/test_graphics.py @@ -233,6 +233,28 @@ def test_plot_xy(self): # columns.inferred_type == 'mixed' # TODO add MultiIndex test + @slow + def test_xcompat(self): + import pandas as pd + import matplotlib.pyplot as plt + + df = tm.makeTimeDataFrame() + ax = df.plot(x_compat=True) + lines = ax.get_lines() + self.assert_(not isinstance(lines[0].get_xdata(), PeriodIndex)) + + plt.close('all') + pd.plot_params['xaxis.compat'] = True + ax = df.plot() + lines = ax.get_lines() + self.assert_(not isinstance(lines[0].get_xdata(), PeriodIndex)) + + plt.close('all') + pd.plot_params['x_compat'] = False + ax = df.plot() + lines = ax.get_lines() + self.assert_(isinstance(lines[0].get_xdata(), PeriodIndex)) + def _check_data(self, xp, rs): xp_lines = xp.get_lines() rs_lines = rs.get_lines() diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index d2ab5f3038b00..4cfd32d58399d 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -26,6 +26,43 @@ def _get_standard_kind(kind): return {'density': 'kde'}.get(kind, kind) +class _Options(dict): + + #alias so the names are same as plotting method parameter names + _ALIASES = {'x_compat' : 'xaxis.compat'} + _DEFAULT_KEYS = ['xaxis.compat'] + + def __init__(self): + self['xaxis.compat'] = False + + def __getitem__(self, key): + key = self._get_canonical_key(key) + if key not in self: + raise ValueError('%s is not a valid pandas plotting option' % key) + return super(_Options, self).__getitem__(key) + + def __setitem__(self, key, value): + key = self._get_canonical_key(key) + return super(_Options, self).__setitem__(key, value) + + def __delitem__(self, key): + key = self._get_canonical_key(key) + if key in self._DEFAULT_KEYS: + raise ValueError('Cannot remove default parameter %s' % key) + return super(_Options, self).__delitem__(key) + + def __contains__(self, key): + key = self._get_canonical_key(key) + return super(_Options, self).__contains__(key) + + def reset(self): + self.__init__() + + def _get_canonical_key(self, key): + return self._ALIASES.get(key, key) + +plot_params = _Options() + def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, diagonal='hist', marker='.', **kwds): """ @@ -875,6 +912,9 @@ class LinePlot(MPLPlot): def __init__(self, data, **kwargs): self.mark_right = kwargs.pop('mark_right', True) MPLPlot.__init__(self, data, **kwargs) + self.x_compat = plot_params['x_compat'] + if 'x_compat' in self.kwds: + self.x_compat = bool(self.kwds.pop('x_compat')) def _index_freq(self): from pandas.core.frame import DataFrame @@ -923,7 +963,7 @@ def _maybe_add_color(self, colors, kwds, style, i): def _make_plot(self): import pandas.tseries.plotting as tsplot # this is slightly deceptive - if self.use_index and self._use_dynamic_x(): + if not self.x_compat and self.use_index and self._use_dynamic_x(): data = self._maybe_convert_index(self.data) self._make_ts_plot(data, **self.kwds) else: From aa5d9b52eb76f438ea7e26bb9a600b532802094f Mon Sep 17 00:00:00 2001 From: Chang She Date: Fri, 9 Nov 2012 16:42:21 -0500 Subject: [PATCH 26/52] ENH: add pd.plot_params.use as contextmanager to temporarily set a plot parameter #2205 --- pandas/tests/test_graphics.py | 12 ++++++++++++ pandas/tools/plotting.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py index 5d536dcbb3f79..6a41d07708335 100644 --- a/pandas/tests/test_graphics.py +++ b/pandas/tests/test_graphics.py @@ -255,6 +255,18 @@ def test_xcompat(self): lines = ax.get_lines() self.assert_(isinstance(lines[0].get_xdata(), PeriodIndex)) + plt.close('all') + #useful if you're plotting a bunch together + with pd.plot_params.use('x_compat', True): + ax = df.plot() + lines = ax.get_lines() + self.assert_(not isinstance(lines[0].get_xdata(), PeriodIndex)) + + plt.close('all') + ax = df.plot() + lines = ax.get_lines() + self.assert_(isinstance(lines[0].get_xdata(), PeriodIndex)) + def _check_data(self, xp, rs): xp_lines = xp.get_lines() rs_lines = rs.get_lines() diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index 4cfd32d58399d..f12e2fcdf8d87 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -4,6 +4,7 @@ import datetime import warnings import re +from contextlib import contextmanager import numpy as np @@ -27,6 +28,12 @@ def _get_standard_kind(kind): class _Options(dict): + """ + Stores pandas plotting options. + Allows for parameter aliasing so you can just use parameter names that are + the same as the plot function parameters, but is stored in a canonical + format that makes it easy to breakdown into groups later + """ #alias so the names are same as plotting method parameter names _ALIASES = {'x_compat' : 'xaxis.compat'} @@ -56,11 +63,32 @@ def __contains__(self, key): return super(_Options, self).__contains__(key) def reset(self): + """ + Reset the option store to its initial state + + Returns + ------- + None + """ self.__init__() def _get_canonical_key(self, key): return self._ALIASES.get(key, key) + @contextmanager + def use(self, key, value): + """ + Temporarily set a parameter value using the with statement. + Aliasing allowed. + """ + old_value = self[key] + try: + self[key] = value + yield self + finally: + self[key] = old_value + + plot_params = _Options() def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, From e7f6eda0563ee05f9581d6d3a88cfeda7acb296b Mon Sep 17 00:00:00 2001 From: Chang She Date: Fri, 9 Nov 2012 16:42:54 -0500 Subject: [PATCH 27/52] DOC: describe suppression of auto tick locator adjustment #2205 --- doc/source/visualization.rst | 48 ++++++++++++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst index f4cbbe7a074a7..5804cdcee53be 100644 --- a/doc/source/visualization.rst +++ b/doc/source/visualization.rst @@ -123,7 +123,7 @@ To plot data on a secondary y-axis, use the ``secondary_y`` keyword: Selective Plotting on Secondary Y-axis ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -To plot some columns in a DataFrame, give the column names to the `secondary_y` +To plot some columns in a DataFrame, give the column names to the ``secondary_y`` keyword: .. ipython:: python @@ -135,7 +135,7 @@ keyword: Note that the columns plotted on the secondary y-axis is automatically marked with "(right)" in the legend. To turn off the automatic marking, use the -`mark_right=False` keyword: +``mark_right=False`` keyword: .. ipython:: python @@ -145,6 +145,50 @@ with "(right)" in the legend. To turn off the automatic marking, use the df.plot(secondary_y=['A', 'B'], mark_right=False) +Suppressing tick resolution adjustment +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Pandas includes automatically tick resolution adjustment for regular frequency +time-series data. For limited cases where pandas cannot infer the frequency +information (e.g., in an externally created ``twinx``), you can choose to +suppress this behavior for alignment purposes. + +Here is the default behavior, notice how the x-axis tick labelling is performed: + +.. ipython:: python + + plt.figure() + + @savefig ser_plot_suppress.png width=4.5in + df.A.plot() + + +Using the ``x_compat`` parameter, you can suppress this bevahior: + +.. ipython:: python + + plt.figure() + + @savefig ser_plot_suppress_parm.png width=4.5in + df.A.plot(x_compat=True) + + +If you have more than one plot that needs to be suppressed, the ``use`` method +in ``pandas.plot_params`` can be used in a `with statement`: + +.. ipython:: python + + import pandas as pd + + plt.figure() + + @savefig ser_plot_suppress_context.png width=4.5in + with pd.plot_params.use('x_compat', True): + df.A.plot(color='r') + df.B.plot(color='g') + df.C.plot(color='b') + + Targeting different subplots ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From a5075d124f89cbca91eb5caeaeda9ecb3452b7f9 Mon Sep 17 00:00:00 2001 From: Chang She Date: Fri, 9 Nov 2012 17:00:28 -0500 Subject: [PATCH 28/52] TST: add from future import for py2.5 compat --- pandas/tests/test_graphics.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py index 6a41d07708335..311295a9e2b88 100644 --- a/pandas/tests/test_graphics.py +++ b/pandas/tests/test_graphics.py @@ -1,3 +1,5 @@ +from __future__ import with_statement + import nose import os import string From 2d576ee9b9337210a8c85f11e48177db42ecd57f Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Fri, 9 Nov 2012 18:58:39 -0500 Subject: [PATCH 29/52] RLS: mark 0.9.1 RC 1 --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 2da0f3f67c598..8f6084ff49dbb 100755 --- a/setup.py +++ b/setup.py @@ -205,9 +205,9 @@ MAJOR = 0 MINOR = 9 MICRO = 1 -ISRELEASED = False +ISRELEASED = True VERSION = '%d.%d.%d' % (MAJOR, MINOR, MICRO) -QUALIFIER = '' +QUALIFIER = 'rc1' FULLVERSION = VERSION if not ISRELEASED: From 0ba00ee3a7e1562d3f2c9b453ca213d0c555c926 Mon Sep 17 00:00:00 2001 From: y-p Date: Sun, 11 Nov 2012 01:10:16 +0200 Subject: [PATCH 30/52] CLN: use com._is_sequence instead of duplicating code --- pandas/core/indexing.py | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 1ab2c3b7f8460..0cfb4004708fa 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -10,20 +10,9 @@ # "null slice" _NS = slice(None, None) - -def _is_sequence(x): - try: - iter(x) - assert(not isinstance(x, basestring)) - return True - except Exception: - return False - - class IndexingError(Exception): pass - class _NDFrameIndexer(object): def __init__(self, obj): @@ -149,7 +138,7 @@ def _align_series(self, indexer, ser): if isinstance(indexer, tuple): for i, idx in enumerate(indexer): ax = self.obj.axes[i] - if _is_sequence(idx) or isinstance(idx, slice): + if com._is_sequence(idx) or isinstance(idx, slice): new_ix = ax[idx] if ser.index.equals(new_ix): return ser.values.copy() @@ -174,7 +163,7 @@ def _align_frame(self, indexer, df): idx, cols = None, None for i, ix in enumerate(indexer): ax = self.obj.axes[i] - if _is_sequence(ix) or isinstance(ix, slice): + if com._is_sequence(ix) or isinstance(ix, slice): if idx is None: idx = ax[ix] elif cols is None: From c8a45cb67bc20672e5d6225ed69a435d14de531f Mon Sep 17 00:00:00 2001 From: y-p Date: Sun, 11 Nov 2012 02:07:13 +0200 Subject: [PATCH 31/52] TST: df with dupe cols should raise KeyError on accessing non-existent col via list --- pandas/tests/test_frame.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 0b36e8d39a00a..c30dde8cc3490 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -62,6 +62,15 @@ def test_getitem(self): self.assert_('random' not in self.frame) self.assertRaises(Exception, self.frame.__getitem__, 'random') + def test_getitem_dupe_cols(self): + df=DataFrame([[1,2,3],[4,5,6]],columns=['a','a','b']) + try: + df[['baf']] + except KeyError: + pass + else: + self.fail("Dataframe failed to raise KeyError") + def test_get(self): b = self.frame.get('B') assert_series_equal(b, self.frame['B']) From 65e716af386e309a1d54e45d426966181c89bd86 Mon Sep 17 00:00:00 2001 From: y-p Date: Sun, 11 Nov 2012 02:25:35 +0200 Subject: [PATCH 32/52] BUG: df with dupe cols should raise KeyError on accessing non-existent col via list #2218 --- pandas/core/frame.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 31c1a09f409c3..2bd4655371b70 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1796,13 +1796,18 @@ def _getitem_array(self, key): indexer = self.columns.get_indexer(key) mask = indexer == -1 if mask.any(): - raise KeyError("No column(s) named: %s" % str(key[mask])) + raise KeyError("No column(s) named: %s" % + com.pprint_thing(key[mask])) result = self.reindex(columns=key) if result.columns.name is None: result.columns.name = self.columns.name return result else: mask = self.columns.isin(key) + for k in key: + if k not in self.columns: + raise KeyError("No column(s) named: %s" % + com.pprint_thing(k)) return self.take(mask.nonzero()[0], axis=1) def _slice(self, slobj, axis=0): From 611cd0f53d3e1ddcbe38d026f047f20ac59ec4b0 Mon Sep 17 00:00:00 2001 From: y-p Date: Sun, 11 Nov 2012 01:44:16 +0200 Subject: [PATCH 33/52] TST: df.iteritems() should yield Series even with non-unique column labels --- pandas/tests/test_frame.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index c30dde8cc3490..01b5d6ae46fd4 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -1145,6 +1145,11 @@ def test_get_value(self): expected = self.frame[col][idx] assert_almost_equal(result, expected) + def test_iteritems(self): + df=DataFrame([[1,2,3],[4,5,6]],columns=['a','a','b']) + for k,v in df.iteritems(): + self.assertEqual(type(v),Series) + def test_lookup(self): def alt(df, rows, cols): result = [] @@ -7458,6 +7463,7 @@ def __nonzero__(self): self.assert_(r0.all()) self.assert_(r1.all()) + if __name__ == '__main__': # unittest.main() import nose From 4a5b75b44b00483df9ff4816b6a39b5043f9e2d3 Mon Sep 17 00:00:00 2001 From: y-p Date: Sun, 11 Nov 2012 01:31:09 +0200 Subject: [PATCH 34/52] BUG: modify df.iteritems to support duplicate column labels #2219 --- pandas/core/frame.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2bd4655371b70..e4e9705e562d0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -633,7 +633,8 @@ def keys(self): def iteritems(self): """Iterator over (column, series) pairs""" - return ((k, self[k]) for k in self.columns) + for i, k in enumerate(self.columns): + yield (k,self.take([i],axis=1)[k]) def iterrows(self): """ From bd45d391bce7fa2310c7dbe4646d4b1666783332 Mon Sep 17 00:00:00 2001 From: y-p Date: Sun, 11 Nov 2012 02:43:59 +0200 Subject: [PATCH 35/52] ENH: warn user when invoking to_dict() on df with non-unique columns --- pandas/core/frame.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e4e9705e562d0..05d3713375481 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -837,6 +837,10 @@ def to_dict(self, outtype='dict'): ------- result : dict like {column -> {index -> value}} """ + import warnings + if not self.columns.is_unique: + warnings.warn("DataFrame columns are not unique, some " + "columns will be omitted.",UserWarning) if outtype.lower().startswith('d'): return dict((k, v.to_dict()) for k, v in self.iteritems()) elif outtype.lower().startswith('l'): From 804eaaedc86a1e5989ae7cc3476b729ae9ad8cf4 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 11 Nov 2012 16:09:46 -0500 Subject: [PATCH 36/52] in core/frame.py changed method __getitem__ to use .mask directly (e.g. df.mask(df > 0) is equivalent semantically to df[df>0]) added inplace keyword to where method (to update the dataframe in place, default is NOT to use inplace, and return a new dataframe) changed method _boolean_set_ to use where and inplace=True (this allows alignment of the passed values and is slightly less strict than the current method) all tests pass (as well as an added test in boolean frame indexing) --- pandas/core/frame.py | 19 +++++++++---------- pandas/tests/test_frame.py | 6 ++++++ 2 files changed, 15 insertions(+), 10 deletions(-) mode change 100644 => 100755 pandas/core/frame.py mode change 100644 => 100755 pandas/tests/test_frame.py diff --git a/pandas/core/frame.py b/pandas/core/frame.py old mode 100644 new mode 100755 index 31c1a09f409c3..cada5f23cd87c --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1775,9 +1775,8 @@ def __getitem__(self, key): elif isinstance(self.columns, MultiIndex): return self._getitem_multilevel(key) elif isinstance(key, DataFrame): - values = key.values - if values.dtype == bool: - return self.values[values] + if key.values.dtype == bool: + return self.mask(key) else: raise ValueError('Cannot index using non-boolean DataFrame') else: @@ -1891,11 +1890,7 @@ def _boolean_set(self, key, value): if self._is_mixed_type: raise ValueError('Cannot do boolean setting on mixed-type frame') - if isinstance(value, DataFrame): - assert(value._indexed_same(self)) - np.putmask(self.values, mask, value.values) - else: - self.values[mask] = value + self.where(key, value, inplace=True) def _set_item_multiple(self, keys, value): if isinstance(value, DataFrame): @@ -4878,7 +4873,7 @@ def combineMult(self, other): """ return self.mul(other, fill_value=1.) - def where(self, cond, other): + def where(self, cond, other, inplace=False): """ Return a DataFrame with the same shape as self and whose corresponding entries are from self where cond is True and otherwise are from other. @@ -4905,9 +4900,13 @@ def where(self, cond, other): if isinstance(other, DataFrame): _, other = self.align(other, join='left', fill_value=NA) + if inplace: + np.putmask(self.values, cond, other) + return self + rs = np.where(cond, self, other) return self._constructor(rs, self.index, self.columns) - + def mask(self, cond): """ Returns copy of self whose values are replaced with nan if the diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py old mode 100644 new mode 100755 index 0b36e8d39a00a..69b744a84beb9 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -141,6 +141,12 @@ def test_getitem_boolean(self): self.assertRaises(ValueError, self.tsframe.__getitem__, self.tsframe) + # test df[df >0] works + bif = self.tsframe[self.tsframe > 0] + bifw = DataFrame(np.where(self.tsframe>0,self.tsframe,np.nan),index=self.tsframe.index,columns=self.tsframe.columns) + self.assert_(isinstance(bif,DataFrame)) + self.assert_(bif.shape == self.tsframe.shape) + assert_frame_equal(bif,bifw) def test_getitem_boolean_list(self): df = DataFrame(np.arange(12).reshape(3,4)) From 540fafd32f1a4a93c79ab3cec190c47e8ebcfcd5 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 11 Nov 2012 16:44:27 -0500 Subject: [PATCH 37/52] relaxed __setitem__ restriction on boolean indexing a frame on an equal sized frame thus we now allow: df[df[:-1]<0] = 2 (essentially partial boolean indexing) all tests continue to pass (added new test to test partial boolean indexing, removed test requiring an equal indexed frame) --- pandas/core/frame.py | 5 ----- pandas/tests/test_frame.py | 6 +++++- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cada5f23cd87c..b83961309253a 100755 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1870,11 +1870,6 @@ def __setitem__(self, key, value): # support boolean setting with DataFrame input, e.g. # df[df > df2] = 0 if isinstance(key, DataFrame): - if not (key.index.equals(self.index) and - key.columns.equals(self.columns)): - raise PandasError('Can only index with like-indexed ' - 'DataFrame objects') - self._boolean_set(key, value) elif isinstance(key, (np.ndarray, list)): return self._set_item_multiple(key, value) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 69b744a84beb9..824b668bed977 100755 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -284,7 +284,11 @@ def test_setitem_boolean(self): values[values == 5] = 0 assert_almost_equal(df.values, values) - self.assertRaises(Exception, df.__setitem__, df[:-1] > 0, 2) + # a df that needs alignment first + df[df[:-1]<0] = 2 + np.putmask(values[:-1],values[:-1]<0,2) + assert_almost_equal(df.values, values) + self.assertRaises(Exception, df.__setitem__, df * 0, 2) # index with DataFrame From 030bc669b0a338deb35773612472a76dbf2ae9dd Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 11 Nov 2012 17:31:33 -0500 Subject: [PATCH 38/52] fixed file modes for core/frame.py, test/test_frame.py --- pandas/core/frame.py | 3 +-- pandas/tests/test_frame.py | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b83961309253a..5e41bc6ec9481 100755 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1878,8 +1878,7 @@ def __setitem__(self, key, value): self._set_item(key, value) def _boolean_set(self, key, value): - mask = key.values - if mask.dtype != np.bool_: + if key.values.dtype != np.bool_: raise ValueError('Must pass DataFrame with boolean values only') if self._is_mixed_type: diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 824b668bed977..ef4e557b8e3a8 100755 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -142,6 +142,7 @@ def test_getitem_boolean(self): self.assertRaises(ValueError, self.tsframe.__getitem__, self.tsframe) # test df[df >0] works + bif = self.tsframe[self.tsframe > 0] bifw = DataFrame(np.where(self.tsframe>0,self.tsframe,np.nan),index=self.tsframe.index,columns=self.tsframe.columns) self.assert_(isinstance(bif,DataFrame)) From a72d886ed1d9b715844c2742a7b06b47abdf9e21 Mon Sep 17 00:00:00 2001 From: Justin C Johnson Date: Sun, 11 Nov 2012 18:35:03 -0600 Subject: [PATCH 39/52] BUG: Incorrect error message due to zero based levels. close #2226 --- pandas/core/index.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/index.py b/pandas/core/index.py index 1e4d6347aaeec..9638da8f418cf 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -1364,9 +1364,10 @@ def _get_level_number(self, level): raise Exception('Level %s not found' % str(level)) elif level < 0: level += self.nlevels + # Note: levels are zero-based elif level >= self.nlevels: raise ValueError('Index has only %d levels, not %d' - % (self.nlevels, level)) + % (self.nlevels, level + 1)) return level _tuples = None From 4dae1b5320444ad4c7c0dcc21416e6de04d3b7b3 Mon Sep 17 00:00:00 2001 From: y-p Date: Mon, 12 Nov 2012 16:02:21 +0200 Subject: [PATCH 40/52] TST: add dual ( x op y <-> y op x ) tests for arith operators The previous tests tested 1 ** panel, but not panel**1 for example. --- pandas/tests/test_panel.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 202ed0ed3adb3..82c6ea65d133a 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -229,6 +229,12 @@ def test_arith(self): self._test_op(self.panel, lambda x, y: y / x) self._test_op(self.panel, lambda x, y: y ** x) + self._test_op(self.panel, lambda x, y: x + y) # panel + 1 + self._test_op(self.panel, lambda x, y: x - y) # panel - 1 + self._test_op(self.panel, lambda x, y: x * y) # panel * 1 + self._test_op(self.panel, lambda x, y: x / y) # panel / 1 + self._test_op(self.panel, lambda x, y: x ** y) # panel ** 1 + self.assertRaises(Exception, self.panel.__add__, self.panel['ItemA']) @staticmethod From 3001af27fec931116cd14796ad83d24ff96d8640 Mon Sep 17 00:00:00 2001 From: y-p Date: Sun, 11 Nov 2012 13:33:00 +0200 Subject: [PATCH 41/52] TST: getting column from and applying op to a df should commute analogue to test_panel.test_arith which does the same for panel/frame as this does for frame/series. --- pandas/tests/test_frame.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 0b36e8d39a00a..8ef2df02d6447 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -3114,6 +3114,39 @@ def test_arith_mixed(self): 'B': [2, 4, 6]}) assert_frame_equal(result, expected) + + def test_arith_getitem_commute(self): + df = DataFrame({'A' : [1.1,3.3],'B' : [2.5,-3.9]}) + + self._test_op(df, operator.add) + self._test_op(df, operator.sub) + self._test_op(df, operator.mul) + self._test_op(df, operator.truediv) + self._test_op(df, operator.floordiv) + self._test_op(df, operator.pow) + + self._test_op(df, lambda x, y: y + x) + self._test_op(df, lambda x, y: y - x) + self._test_op(df, lambda x, y: y * x) + self._test_op(df, lambda x, y: y / x) + self._test_op(df, lambda x, y: y ** x) + + self._test_op(df, lambda x, y: x + y) + self._test_op(df, lambda x, y: x - y) + self._test_op(df, lambda x, y: x * y) + self._test_op(df, lambda x, y: x / y) + self._test_op(df, lambda x, y: x ** y) + + @staticmethod + def _test_op(df, op): + result = op(df, 1) + + if not df.columns.is_unique: + raise ValueError("Only unique columns supported by this test") + + for col in result.columns: + assert_series_equal(result[col], op(df[col], 1)) + def test_bool_flex_frame(self): data = np.random.randn(5, 3) other_data = np.random.randn(5, 3) From 8034116f41d642b8c16232ca0c971ae110072e42 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 13 Nov 2012 12:44:01 -0500 Subject: [PATCH 42/52] in core/frame.py removed mask method made other optional kw parm in where changed __setitem__ to use where (rather than mask) --- pandas/core/frame.py | 19 ++----------------- pandas/tests/test_frame.py | 3 --- 2 files changed, 2 insertions(+), 20 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5e41bc6ec9481..fe7ca6fa5c9b1 100755 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1776,7 +1776,7 @@ def __getitem__(self, key): return self._getitem_multilevel(key) elif isinstance(key, DataFrame): if key.values.dtype == bool: - return self.mask(key) + return self.where(key) else: raise ValueError('Cannot index using non-boolean DataFrame') else: @@ -4867,7 +4867,7 @@ def combineMult(self, other): """ return self.mul(other, fill_value=1.) - def where(self, cond, other, inplace=False): + def where(self, cond, other=NA, inplace=False): """ Return a DataFrame with the same shape as self and whose corresponding entries are from self where cond is True and otherwise are from other. @@ -4901,21 +4901,6 @@ def where(self, cond, other, inplace=False): rs = np.where(cond, self, other) return self._constructor(rs, self.index, self.columns) - def mask(self, cond): - """ - Returns copy of self whose values are replaced with nan if the - corresponding entry in cond is False - - Parameters - ---------- - cond: boolean DataFrame or array - - Returns - ------- - wh: DataFrame - """ - return self.where(cond, NA) - _EMPTY_SERIES = Series([]) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index ef4e557b8e3a8..aec7ddffd84e4 100755 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -142,7 +142,6 @@ def test_getitem_boolean(self): self.assertRaises(ValueError, self.tsframe.__getitem__, self.tsframe) # test df[df >0] works - bif = self.tsframe[self.tsframe > 0] bifw = DataFrame(np.where(self.tsframe>0,self.tsframe,np.nan),index=self.tsframe.index,columns=self.tsframe.columns) self.assert_(isinstance(bif,DataFrame)) @@ -5215,8 +5214,6 @@ def test_where(self): for k, v in rs.iteritems(): assert_series_equal(v, np.where(cond[k], df[k], other5)) - assert_frame_equal(rs, df.mask(cond)) - err1 = (df + 1).values[0:2, :] self.assertRaises(ValueError, df.where, cond, err1) From a4143469a961ccc9740a292c84b27c00d9674425 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 13 Nov 2012 12:56:09 -0500 Subject: [PATCH 43/52] added back mask method that does condition inversion added condition testing to where that raised ValueError on an invalid condition (e.g. not an ndarray like object) added tests for same --- pandas/core/frame.py | 18 ++++++++++++++++++ pandas/tests/test_frame.py | 12 ++++++++++++ 2 files changed, 30 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index fe7ca6fa5c9b1..c9184f148e5a9 100755 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4882,6 +4882,9 @@ def where(self, cond, other=NA, inplace=False): ------- wh: DataFrame """ + if not hasattr(cond,'shape'): + raise ValueError('where requires an ndarray like object for its condition') + if isinstance(cond, np.ndarray): if cond.shape != self.shape: raise ValueError('Array onditional must be same shape as self') @@ -4901,6 +4904,21 @@ def where(self, cond, other=NA, inplace=False): rs = np.where(cond, self, other) return self._constructor(rs, self.index, self.columns) + def mask(self, cond): + """ + Returns copy of self whose values are replaced with nan if the + inverted condition is True + + Parameters + ---------- + cond: boolean DataFrame or array + + Returns + ------- + wh: DataFrame + """ + return self.where(~cond, NA) + _EMPTY_SERIES = Series([]) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index aec7ddffd84e4..dcc7bcb909cd4 100755 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -5220,6 +5220,18 @@ def test_where(self): err2 = cond.ix[:2, :].values self.assertRaises(ValueError, df.where, err2, other1) + # invalid conditions + self.assertRaises(ValueError, df.mask, True) + self.assertRaises(ValueError, df.mask, 0) + + def test_mask(self): + df = DataFrame(np.random.randn(5, 3)) + cond = df > 0 + + rs = df.where(cond, np.nan) + assert_frame_equal(rs, df.mask(df <= 0)) + assert_frame_equal(rs, df.mask(~cond)) + #---------------------------------------------------------------------- # Transposing From d56d0e691de8e3123575ebe80c2ad4d63328b7cb Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Tue, 13 Nov 2012 16:53:58 -0500 Subject: [PATCH 44/52] BUG: fix internal error in constructing DataFrame.values with duplicate column names. close #2236 --- RELEASE.rst | 1 + pandas/core/internals.py | 20 +++++++++++++++----- pandas/tests/test_frame.py | 11 +++++++++++ 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/RELEASE.rst b/RELEASE.rst index a64cfdc44c51f..7db939a0699fb 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -99,6 +99,7 @@ pandas 0.9.1 - Many unicode formatting fixes (#2201) - Fix improper MultiIndex conversion issue when assigning e.g. DataFrame.index (#2200) + - Fix conversion of mixed-type DataFrame to ndarray with dup columns (#2236) pandas 0.9.0 ============ diff --git a/pandas/core/internals.py b/pandas/core/internals.py index ddbf4b85fcb68..7275a54a4faae 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -732,12 +732,22 @@ def _interleave(self, items): # By construction, all of the item should be covered by one of the # blocks - for block in self.blocks: - indexer = items.get_indexer(block.items) - assert((indexer != -1).all()) - result[indexer] = block.get_values(dtype) - itemmask[indexer] = 1 + if items.is_unique: + for block in self.blocks: + indexer = items.get_indexer(block.items) + assert((indexer != -1).all()) + result[indexer] = block.get_values(dtype) + itemmask[indexer] = 1 + else: + for block in self.blocks: + mask = items.isin(block.items) + indexer = mask.nonzero()[0] + assert(len(indexer) == len(block.items)) + result[indexer] = block.get_values(dtype) + itemmask[indexer] = 1 + assert(itemmask.all()) + return result def xs(self, key, axis=1, copy=True): diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 8ef2df02d6447..b3007e886812d 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -4078,6 +4078,17 @@ def test_as_matrix(self): expected = self.frame.reindex(columns=['A', 'B']).values assert_almost_equal(mat, expected) + def test_as_matrix_duplicates(self): + df = DataFrame([[1, 2, 'a', 'b'], + [1, 2, 'a', 'b']], + columns=['one', 'one', 'two', 'two']) + + result = df.values + expected = np.array([[1, 2, 'a', 'b'], [1, 2, 'a', 'b']], + dtype=object) + + self.assertTrue(np.array_equal(result, expected)) + def test_values(self): self.frame.values[:, 0] = 5. self.assert_((self.frame.values[:, 0] == 5).all()) From ca8a6ba0931e272729a4cd2fd42d4a42b838c9e4 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Tue, 13 Nov 2012 18:22:42 -0500 Subject: [PATCH 45/52] BUG: make inplace semantics of DataFrame.where consistent. #2230 --- RELEASE.rst | 2 ++ pandas/core/frame.py | 19 +++++++++++++------ pandas/tests/test_frame.py | 30 ++++++++++++++++++++---------- 3 files changed, 35 insertions(+), 16 deletions(-) mode change 100755 => 100644 pandas/core/frame.py mode change 100755 => 100644 pandas/tests/test_frame.py diff --git a/RELEASE.rst b/RELEASE.rst index 7db939a0699fb..7d6f2fbd31aa2 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -57,6 +57,7 @@ pandas 0.9.1 - Improve performance of Series/DataFrame.diff (re: #2087) - Support unary ~ (__invert__) in DataFrame (#2110) - Turn off pandas-style tick locators and formatters (#2205) + - DataFrame[DataFrame] uses DataFrame.where to compute masked frame (#2230) **Bug fixes** @@ -100,6 +101,7 @@ pandas 0.9.1 - Fix improper MultiIndex conversion issue when assigning e.g. DataFrame.index (#2200) - Fix conversion of mixed-type DataFrame to ndarray with dup columns (#2236) + - Fix duplicate columns issue (#2218, #2219) pandas 0.9.0 ============ diff --git a/pandas/core/frame.py b/pandas/core/frame.py old mode 100755 new mode 100644 index c9184f148e5a9..25abc10eae4e7 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1884,7 +1884,7 @@ def _boolean_set(self, key, value): if self._is_mixed_type: raise ValueError('Cannot do boolean setting on mixed-type frame') - self.where(key, value, inplace=True) + self.where(-key, value, inplace=True) def _set_item_multiple(self, keys, value): if isinstance(value, DataFrame): @@ -4872,7 +4872,6 @@ def where(self, cond, other=NA, inplace=False): Return a DataFrame with the same shape as self and whose corresponding entries are from self where cond is True and otherwise are from other. - Parameters ---------- cond: boolean DataFrame or array @@ -4882,17 +4881,25 @@ def where(self, cond, other=NA, inplace=False): ------- wh: DataFrame """ - if not hasattr(cond,'shape'): - raise ValueError('where requires an ndarray like object for its condition') + if not hasattr(cond, 'shape'): + raise ValueError('where requires an ndarray like object for its ' + 'condition') if isinstance(cond, np.ndarray): if cond.shape != self.shape: raise ValueError('Array onditional must be same shape as self') cond = self._constructor(cond, index=self.index, columns=self.columns) + if cond.shape != self.shape: cond = cond.reindex(self.index, columns=self.columns) - cond = cond.fillna(False) + + if inplace: + cond = -(cond.fillna(True).astype(bool)) + else: + cond = cond.fillna(False).astype(bool) + elif inplace: + cond = -cond if isinstance(other, DataFrame): _, other = self.align(other, join='left', fill_value=NA) @@ -4903,7 +4910,7 @@ def where(self, cond, other=NA, inplace=False): rs = np.where(cond, self, other) return self._constructor(rs, self.index, self.columns) - + def mask(self, cond): """ Returns copy of self whose values are replaced with nan if the diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py old mode 100755 new mode 100644 index a0df3f918e0d8..5c6a8270c9441 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -143,7 +143,8 @@ def test_getitem_boolean(self): # test df[df >0] works bif = self.tsframe[self.tsframe > 0] - bifw = DataFrame(np.where(self.tsframe>0,self.tsframe,np.nan),index=self.tsframe.index,columns=self.tsframe.columns) + bifw = DataFrame(np.where(self.tsframe > 0, self.tsframe, np.nan), + index=self.tsframe.index,columns=self.tsframe.columns) self.assert_(isinstance(bif,DataFrame)) self.assert_(bif.shape == self.tsframe.shape) assert_frame_equal(bif,bifw) @@ -285,8 +286,8 @@ def test_setitem_boolean(self): assert_almost_equal(df.values, values) # a df that needs alignment first - df[df[:-1]<0] = 2 - np.putmask(values[:-1],values[:-1]<0,2) + df[df[:-1] < 0] = 2 + np.putmask(values[:-1], values[:-1] < 0, 2) assert_almost_equal(df.values, values) self.assertRaises(Exception, df.__setitem__, df * 0, 2) @@ -5268,6 +5269,13 @@ def test_where(self): self.assertRaises(ValueError, df.mask, True) self.assertRaises(ValueError, df.mask, 0) + # where inplace + df = DataFrame(np.random.randn(5, 3)) + + expected = df.mask(df < 0) + df.where(df >= 0, np.nan, inplace=True) + assert_frame_equal(df, expected) + def test_mask(self): df = DataFrame(np.random.randn(5, 3)) cond = df > 0 @@ -7232,13 +7240,15 @@ def test_xs_view(self): def test_boolean_indexing(self): idx = range(3) cols = range(3) - df1 = DataFrame(index=idx, columns=cols, \ - data=np.array([[0.0, 0.5, 1.0], - [1.5, 2.0, 2.5], - [3.0, 3.5, 4.0]], dtype=float)) - df2 = DataFrame(index=idx, columns=cols, data=np.ones((len(idx), len(cols)))) - - expected = DataFrame(index=idx, columns=cols, \ + df1 = DataFrame(index=idx, columns=cols, + data=np.array([[0.0, 0.5, 1.0], + [1.5, 2.0, 2.5], + [3.0, 3.5, 4.0]], + dtype=float)) + df2 = DataFrame(index=idx, columns=cols, + data=np.ones((len(idx), len(cols)))) + + expected = DataFrame(index=idx, columns=cols, data=np.array([[0.0, 0.5, 1.0], [1.5, 2.0, -1], [-1, -1, -1]], dtype=float)) From cda2084b50b91413fc96c6a2749561ead284f474 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Tue, 13 Nov 2012 18:51:09 -0500 Subject: [PATCH 46/52] BUG: 1 ** NA issue in computing new fill value in SparseSeries. close #2220 --- RELEASE.rst | 1 + pandas/sparse/series.py | 7 +++++-- pandas/sparse/tests/test_sparse.py | 16 ++++++++++++++++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/RELEASE.rst b/RELEASE.rst index 7d6f2fbd31aa2..ac9e4a6339989 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -102,6 +102,7 @@ pandas 0.9.1 e.g. DataFrame.index (#2200) - Fix conversion of mixed-type DataFrame to ndarray with dup columns (#2236) - Fix duplicate columns issue (#2218, #2219) + - Fix SparseSeries.__pow__ issue with NA input (#2220) pandas 0.9.0 ============ diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py index 70d35607573c2..73f9ca30ebc24 100644 --- a/pandas/sparse/series.py +++ b/pandas/sparse/series.py @@ -42,8 +42,11 @@ def wrapper(self, other): elif isinstance(other, DataFrame): return NotImplemented elif np.isscalar(other): - new_fill_value = op(np.float64(self.fill_value), - np.float64(other)) + if isnull(other) or isnull(self.fill_value): + new_fill_value = np.nan + else: + new_fill_value = op(np.float64(self.fill_value), + np.float64(other)) return SparseSeries(op(self.sp_values, other), index=self.index, diff --git a/pandas/sparse/tests/test_sparse.py b/pandas/sparse/tests/test_sparse.py index c3df935d79792..fb9b3a37c9968 100644 --- a/pandas/sparse/tests/test_sparse.py +++ b/pandas/sparse/tests/test_sparse.py @@ -1297,6 +1297,22 @@ def test_isin(self): rs = sparse_df[sparse_df.flag.isin([1.])] assert_frame_equal(xp, rs) + def test_sparse_pow_issue(self): + # #2220 + df = SparseDataFrame({'A' : [1.1,3.3],'B' : [2.5,-3.9]}) + + # note : no error without nan + df = SparseDataFrame({'A' : [nan, 0, 1] }) + + # note that 2 ** df works fine, also df ** 1 + result = 1 ** df + + r1 = result.take([0],1)['A'] + r2 = result['A'] + + self.assertEqual(len(r2.sp_values), len(r1.sp_values)) + + def _dense_series_compare(s, f): result = f(s) assert(isinstance(result, SparseSeries)) From 5fdec0d98647d59d1cd38b6995dd17152ebbf675 Mon Sep 17 00:00:00 2001 From: y-p Date: Sat, 10 Nov 2012 15:26:20 +0200 Subject: [PATCH 47/52] ENH: Use __file__ to determine REPO_PATH in vb_suite/suite.py --- vb_suite/suite.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vb_suite/suite.py b/vb_suite/suite.py index 0a7c4eb9945f7..4d38388548984 100644 --- a/vb_suite/suite.py +++ b/vb_suite/suite.py @@ -56,9 +56,9 @@ DB_PATH = config.get('setup', 'db_path') TMP_DIR = config.get('setup', 'tmp_dir') except: - REPO_PATH = os.path.join(HOME, 'code/pandas') + REPO_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__),"../")) REPO_URL = 'git@github.com:pydata/pandas.git' - DB_PATH = os.path.join(HOME, 'code/pandas/vb_suite/benchmarks.db') + DB_PATH = os.path.join(REPO_PATH, 'vb_suite/benchmarks.db') TMP_DIR = os.path.join(HOME, 'tmp/vb_pandas') PREPARE = """ From 48df7c33bd657ea629902f0d145e17b1164eef00 Mon Sep 17 00:00:00 2001 From: y-p Date: Sat, 10 Nov 2012 15:34:07 +0200 Subject: [PATCH 48/52] ENH: make vbench_suite/run_suite executable --- vb_suite/run_suite.py | 1 + 1 file changed, 1 insertion(+) mode change 100644 => 100755 vb_suite/run_suite.py diff --git a/vb_suite/run_suite.py b/vb_suite/run_suite.py old mode 100644 new mode 100755 index febd9d1fa6cad..0c03d17607f4e --- a/vb_suite/run_suite.py +++ b/vb_suite/run_suite.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python from vbench.api import BenchmarkRunner from suite import * From 930a26779165628f54a788b996142808c218a204 Mon Sep 17 00:00:00 2001 From: y-p Date: Sat, 10 Nov 2012 01:25:39 +0200 Subject: [PATCH 49/52] BUG: coerce ndarray dtype to object when comparing series fixes #1926 (partialy at least) --- pandas/core/series.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 4194cbd4e4156..b6e1448514112 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -110,7 +110,10 @@ def na_op(x, y): y = lib.list_to_object_array(y) if isinstance(y, np.ndarray): - result = lib.vec_compare(x, y, op) + if y.dtype != np.object_: + result = lib.vec_compare(x, y.astype(np.object_), op) + else: + result = lib.vec_compare(x, y, op) else: result = lib.scalar_compare(x, y, op) else: From 4f982ac830c44d8ef0bb996c4480d559ff7688ee Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Tue, 13 Nov 2012 19:02:20 -0500 Subject: [PATCH 50/52] TST: unit test for #2214 --- pandas/tests/test_series.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 03bfccba83e72..7422c925fd657 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -184,6 +184,13 @@ def test_comparisons(self): assert_almost_equal(result, expected) + s = Series(['a', 'b', 'c']) + s2 = Series([False, True, False]) + + # it works! + s == s2 + s2 == s + def test_none_comparison(self): # bug brought up by #1079 s = Series(np.random.randn(10), index=range(0, 20, 2)) From bf538a960379358615f64c67ddf02544b86148e9 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Tue, 13 Nov 2012 19:10:32 -0500 Subject: [PATCH 51/52] BUG: icol duplicate columns with integer sequence failure. close #2228 --- pandas/core/frame.py | 6 +++++- pandas/tests/test_frame.py | 4 ++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9a537fc5b678a..4b826b98f4d18 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1726,7 +1726,11 @@ def icol(self, i): else: label = self.columns[i] if isinstance(label, Index): - return self.reindex(columns=label) + if self.columns.inferred_type == 'integer': + # XXX re: #2228 + return self.reindex(columns=label) + else: + return self.ix[:, i] values = self._data.iget(i) return Series.from_array(values, index=self.index, name=label) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 49173d2ae209f..7787a5c6e4fc7 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -1323,6 +1323,10 @@ def test_irow_icol_duplicates(self): xp = df.T.ix[0] assert_series_equal(rs, xp) + rs = df.icol([0]) + xp = df.ix[:, [0]] + assert_frame_equal(rs, xp) + def test_iget_value(self): for i, row in enumerate(self.frame.index): for j, col in enumerate(self.frame.columns): From e374f0ffda19d52ec5611ef49fdb84f3133517ed Mon Sep 17 00:00:00 2001 From: Chang She Date: Wed, 14 Nov 2012 01:04:34 -0500 Subject: [PATCH 52/52] BUG: axes.color_cycle from mpl rcParams should not be joined as single string --- pandas/tests/test_graphics.py | 15 +++++++++++++++ pandas/tools/plotting.py | 4 +++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py index 311295a9e2b88..0c5f1a6c3daa2 100644 --- a/pandas/tests/test_graphics.py +++ b/pandas/tests/test_graphics.py @@ -537,6 +537,21 @@ def test_line_colors(self): plt.close('all') df.ix[:, [0]].plot(color='DodgerBlue') + @slow + def test_default_color_cycle(self): + import matplotlib.pyplot as plt + plt.rcParams['axes.color_cycle'] = list('rgbk') + + plt.close('all') + df = DataFrame(np.random.randn(5, 3)) + ax = df.plot() + + lines = ax.get_lines() + for i, l in enumerate(lines): + xp = plt.rcParams['axes.color_cycle'][i] + rs = l.get_color() + self.assert_(xp == rs) + class TestDataFrameGroupByPlots(unittest.TestCase): diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index f12e2fcdf8d87..e61d1be9c824a 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -979,7 +979,9 @@ def _use_dynamic_x(self): def _get_colors(self): import matplotlib.pyplot as plt - cycle = ''.join(plt.rcParams.get('axes.color_cycle', list('bgrcmyk'))) + cycle = plt.rcParams.get('axes.color_cycle', list('bgrcmyk')) + if isinstance(cycle, basestring): + cycle = list(cycle) has_colors = 'color' in self.kwds colors = self.kwds.get('color', cycle) return colors