diff --git a/RELEASE.rst b/RELEASE.rst index b88580bdf2aa1..bef84c4b02150 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -25,7 +25,7 @@ Where to get it pandas 0.9.0 ============ -**Release date:** NOT YET RELEASED +**Release date:** 10/7/2012 **New features** @@ -36,9 +36,11 @@ pandas 0.9.0 Finance (#1748, #1739) - Recognize and convert more boolean values in file parsing (Yes, No, TRUE, FALSE, variants thereof) (#1691, #1295) + - Add Panel.update method, analogous to DataFrame.update (#1999, #1988) **Improvements to existing features** + - Proper handling of NA values in merge operations (#1990) - Add ``flags`` option for ``re.compile`` in some Series.str methods (#1659) - Parsing of UTC date strings in read_* functions (#1693) - Handle generator input to Series (#1679) @@ -62,6 +64,8 @@ pandas 0.9.0 **API Changes** + - Change default header names in read_* functions to more Pythonic X0, X1, + etc. instead of X.1, X.2. (#2000) - Deprecated ``day_of_year`` API removed from PeriodIndex, use ``dayofyear`` (#1723) - Don't modify NumPy suppress printoption at import time @@ -240,6 +244,9 @@ pandas 0.9.0 - Fix BlockManager.iget bug when dealing with non-unique MultiIndex as columns (#1970) - Fix reset_index bug if both drop and level are specified (#1957) + - Work around unsafe NumPy object->int casting with Cython function (#1987) + - Fix datetime64 formatting bug in DataFrame.to_csv (#1993) + - Default start date in pandas.io.data to 1/1/2000 as the docs say (#2011) pandas 0.8.1 diff --git a/doc/source/computation.rst b/doc/source/computation.rst index 40114415c6fa7..23bd05b156409 100644 --- a/doc/source/computation.rst +++ b/doc/source/computation.rst @@ -397,8 +397,8 @@ available: :widths: 20, 80 ``ewma``, EW moving average - ``ewvar``, EW moving variance - ``ewstd``, EW moving standard deviation + ``ewmvar``, EW moving variance + ``ewmstd``, EW moving standard deviation ``ewmcorr``, EW moving correlation ``ewmcov``, EW moving covariance diff --git a/doc/source/v0.9.0.txt b/doc/source/v0.9.0.txt index 0cac6041e6a21..0a670193554f7 100644 --- a/doc/source/v0.9.0.txt +++ b/doc/source/v0.9.0.txt @@ -1,7 +1,7 @@ .. _whatsnew_0900: -v0.9.0 (September 25, 2012) ---------------------------- +v0.9.0 (October 7, 2012) +------------------------ This is a major release from 0.8.1 and includes several new features and enhancements along with a large number of bug fixes. New features include @@ -30,31 +30,62 @@ New features API changes ~~~~~~~~~~~ - - Creating a Series from another Series, passing an index, will cause - reindexing to happen inside rather than treating the Series like an - ndarray. Technically improper usages like Series(df[col1], index=df[col2]) - that worked before "by accident" (this was never intended) will lead to all - NA Series in some cases. - - Deprecated ``day_of_year`` API removed from PeriodIndex, use ``dayofyear`` - (GH1723_) - - Don't modify NumPy suppress printoption to True at import time - - The internal HDF5 data arrangement for DataFrames has been transposed. - Legacy files will still be readable by HDFStore (GH1834_, GH1824_) - - Legacy cruft removed: pandas.stats.misc.quantileTS - - Use ISO8601 format for Period repr: monthly, daily, and on down (GH1776_) - - Empty DataFrame columns are now created as object dtype. This will prevent - a class of TypeErrors that was occurring in code where the dtype of a - column would depend on the presence of data or not (e.g. a SQL query having - results) (GH1783_) - - Setting parts of DataFrame/Panel using ix now aligns input Series/DataFrame - (GH1630_) - - ``first`` and ``last`` methods in ``GroupBy`` no longer drop non-numeric - columns (GH1809_) - - Resolved inconsistencies in specifying custom NA values in text parser. - `na_values` of type dict no longer override default NAs unless - `keep_default_na` is set to false explicitly (GH1657_) - - DataFrame.dot will not do data alignment, and also work with Series - (GH1915_) + - The default column names when ``header=None`` and no columns names passed to + functions like ``read_csv`` has changed to be more Pythonic and amenable to + attribute access: + +.. ipython:: python + + from StringIO import StringIO + + data = '0,0,1\n1,1,0\n0,1,0' + df = read_csv(StringIO(data), header=None) + df + + +- Creating a Series from another Series, passing an index, will cause reindexing + to happen inside rather than treating the Series like an ndarray. Technically + improper usages like ``Series(df[col1], index=df[col2])11 that worked before + "by accident" (this was never intended) will lead to all NA Series in some + cases. To be perfectly clear: + +.. ipython:: python + + s1 = Series([1, 2, 3]) + s1 + + s2 = Series(s1, index=['foo', 'bar', 'baz']) + s2 + +- Deprecated ``day_of_year`` API removed from PeriodIndex, use ``dayofyear`` + (GH1723_) + +- Don't modify NumPy suppress printoption to True at import time + +- The internal HDF5 data arrangement for DataFrames has been transposed. Legacy + files will still be readable by HDFStore (GH1834_, GH1824_) + +- Legacy cruft removed: pandas.stats.misc.quantileTS + +- Use ISO8601 format for Period repr: monthly, daily, and on down (GH1776_) + +- Empty DataFrame columns are now created as object dtype. This will prevent a + class of TypeErrors that was occurring in code where the dtype of a column + would depend on the presence of data or not (e.g. a SQL query having results) + (GH1783_) + +- Setting parts of DataFrame/Panel using ix now aligns input Series/DataFrame + (GH1630_) + +- ``first`` and ``last`` methods in ``GroupBy`` no longer drop non-numeric + columns (GH1809_) + +- Resolved inconsistencies in specifying custom NA values in text parser. + ``na_values`` of type dict no longer override default NAs unless + ``keep_default_na`` is set to false explicitly (GH1657_) + +- ``DataFrame.dot`` will not do data alignment, and also work with Series + (GH1915_) See the `full release notes diff --git a/pandas/core/common.py b/pandas/core/common.py index e3c0efbf34731..668017c29c6ab 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -786,7 +786,7 @@ def is_list_like(arg): def _astype_nansafe(arr, dtype): - if isinstance(dtype, basestring): + if not isinstance(dtype, np.dtype): dtype = np.dtype(dtype) if issubclass(arr.dtype.type, np.datetime64): @@ -797,6 +797,9 @@ def _astype_nansafe(arr, dtype): if np.isnan(arr).any(): raise ValueError('Cannot convert NA to integer') + elif arr.dtype == np.object_ and np.issubdtype(dtype.type, np.integer): + # work around NumPy brokenness, #1987 + return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape) return arr.astype(dtype) diff --git a/pandas/core/format.py b/pandas/core/format.py index 0597a1ef0eb95..dca1976be838f 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -298,7 +298,7 @@ def to_latex(self, force_unicode=False, column_format=None): if column_format is None: column_format = '|l|%s|' % '|'.join('c' for _ in strcols) else: - assert isinstance(column_format, str) + assert isinstance(column_format, basestring) self.buf.write('\\begin{tabular}{%s}\n' % column_format) self.buf.write('\\hline\n') diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 66524e571497c..91005ead01a24 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1106,8 +1106,11 @@ def _helper_csvexcel(self, writer, na_rep=None, cols=None, val = series[col][j] if lib.checknull(val): val = na_rep + if float_format is not None and com.is_float(val): val = float_format % val + elif isinstance(val, np.datetime64): + val = lib.Timestamp(val)._repr_base row_fields.append(val) @@ -1366,7 +1369,7 @@ def info(self, verbose=True, buf=None): counts = self.count() assert(len(cols) == len(counts)) for col, count in counts.iteritems(): - if not isinstance(col, (unicode, str)): + if not isinstance(col, basestring): col = str(col) lines.append(_put_str(col, space) + '%d non-null values' % count) @@ -2458,7 +2461,8 @@ def set_index(self, keys, drop=True, append=False, inplace=False, frame.index = index return frame - def reset_index(self, level=None, drop=False, inplace=False): + def reset_index(self, level=None, drop=False, inplace=False, col_level=0, + col_fill=''): """ For DataFrame with multi-level index, return new DataFrame with labeling information in the columns under the index names, defaulting @@ -2476,6 +2480,13 @@ def reset_index(self, level=None, drop=False, inplace=False): the index to the default integer index. inplace : boolean, default False Modify the DataFrame in place (do not create a new object) + col_level : int or str, default 0 + If the columns have multiple levels, determines which level the + labels are inserted into. By default it is inserted into the first + level. + col_fill : object, default '' + If the columns have multiple levels, determines how the other levels + are named. If None then the index name is repeated. Returns ------- @@ -2504,11 +2515,22 @@ def _maybe_cast(values): names = self.index.names zipped = zip(self.index.levels, self.index.labels) + multi_col = isinstance(self.columns, MultiIndex) for i, (lev, lab) in reversed(list(enumerate(zipped))): col_name = names[i] if col_name is None: col_name = 'level_%d' % i + if multi_col: + if col_fill is None: + col_name = tuple([col_name] * + self.columns.nlevels) + else: + name_lst = [col_fill] * self.columns.nlevels + lev_num = self.columns._get_level_number(col_level) + name_lst[lev_num] = col_name + col_name = tuple(name_lst) + # to ndarray and maybe infer different dtype level_values = _maybe_cast(lev.values) if level is None or i in level: @@ -2518,6 +2540,14 @@ def _maybe_cast(values): name = self.index.name if name is None or name == 'index': name = 'index' if 'index' not in self else 'level_0' + if isinstance(self.columns, MultiIndex): + if col_fill is None: + name = tuple([name] * self.columns.nlevels) + else: + name_lst = [col_fill] * self.columns.nlevels + lev_num = self.columns._get_level_number(col_level) + name_lst[lev_num] = name + name = tuple(name_lst) new_obj.insert(0, name, _maybe_cast(self.index.values)) new_obj.index = new_index @@ -2714,7 +2744,13 @@ def _m8_to_i8(x): values = list(_m8_to_i8(self.values.T)) else: if np.iterable(cols) and not isinstance(cols, basestring): - values = [_m8_to_i8(self[x].values) for x in cols] + if isinstance(cols, tuple): + if cols in self.columns: + values = [self[cols]] + else: + values = [_m8_to_i8(self[x].values) for x in cols] + else: + values = [_m8_to_i8(self[x].values) for x in cols] else: values = [self[cols]] @@ -3359,7 +3395,7 @@ def update(self, other, join='left', overwrite=True, filter_func=None, Parameters ---------- - other : DataFrame + other : DataFrame, or object coercible into a DataFrame join : {'left', 'right', 'outer', 'inner'}, default 'left' overwrite : boolean, default True If True then overwrite values for common keys in the calling frame @@ -3373,7 +3409,11 @@ def update(self, other, join='left', overwrite=True, filter_func=None, if join != 'left': raise NotImplementedError + if not isinstance(other, DataFrame): + other = DataFrame(other) + other = other.reindex_like(self) + for col in self.columns: this = self[col].values that = other[col].values @@ -4385,7 +4425,7 @@ def var(self, axis=0, skipna=True, level=None, ddof=1): @Substitution(name='standard deviation', shortname='std', na_action=_doc_exclude_na, extras='') - @Appender(_stat_doc + + @Appender(_stat_doc + """ Normalized by N-1 (unbiased estimator). """) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index bc12122434d1f..f0f6f7b2a8c63 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -303,7 +303,7 @@ def mean(self): def median(self): """ - Compute mean of groups, excluding missing values + Compute median of groups, excluding missing values For multiple groupings, the result index will be a MultiIndex """ diff --git a/pandas/core/index.py b/pandas/core/index.py index 7447fdd59368a..6443bbf01a4f2 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -196,7 +196,7 @@ def _has_complex_internals(self): def summary(self, name=None): if len(self) > 0: - index_summary = ', %s to %s' % (str(self[0]), str(self[-1])) + index_summary = ', %s to %s' % (unicode(self[0]), unicode(self[-1])) else: index_summary = '' diff --git a/pandas/core/panel.py b/pandas/core/panel.py index c19f8d419e353..211434ab07154 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -1318,6 +1318,36 @@ def join(self, other, how='left', lsuffix='', rsuffix=''): return concat([self] + list(other), axis=0, join=how, join_axes=join_axes, verify_integrity=True) + def update(self, other, join='left', overwrite=True, filter_func=None, + raise_conflict=False): + """ + Modify Panel in place using non-NA values from passed + Panel, or object coercible to Panel. Aligns on items + + Parameters + ---------- + other : Panel, or object coercible to Panel + join : How to join individual DataFrames + {'left', 'right', 'outer', 'inner'}, default 'left' + overwrite : boolean, default True + If True then overwrite values for common keys in the calling panel + filter_func : callable(1d-array) -> 1d-array, default None + Can choose to replace values other than NA. Return True for values + that should be updated + raise_conflict : bool + If True, will raise an error if a DataFrame and other both + contain data in the same place. + """ + + if not isinstance(other, Panel): + other = Panel(other) + + other = other.reindex(items=self.items) + + for frame in self.items: + self[frame].update(other[frame], join, overwrite, filter_func, + raise_conflict) + def _get_join_index(self, other, how): if how == 'left': join_major, join_minor = self.major_axis, self.minor_axis diff --git a/pandas/io/data.py b/pandas/io/data.py index ab4a95d37a4ce..8753d1dabfba2 100644 --- a/pandas/io/data.py +++ b/pandas/io/data.py @@ -67,7 +67,7 @@ def _sanitize_dates(start, end): start = to_datetime(start) end = to_datetime(end) if start is None: - start = dt.datetime.today() - dt.timedelta(365) + start = dt.datetime(2010, 1, 1) if end is None: end = dt.datetime.today() return start, end @@ -178,7 +178,8 @@ def get_data_fred(name=None, start=dt.datetime(2010, 1, 1), url = fred_URL + '%s' % name + \ '/downloaddata/%s' % name + '.csv' - data = read_csv(urllib.urlopen(url), index_col=0, parse_dates=True) + data = read_csv(urllib.urlopen(url), index_col=0, parse_dates=True, header=None, + skiprows=1, names=["DATE", name]) return data.truncate(start, end) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index f01d769824734..d1ede95527029 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -43,7 +43,8 @@ class DateConversionError(Exception): If None defaults to Excel dialect. Ignored if sep longer than 1 char See csv.Dialect documentation for more details header : int, default 0 - Row to use for the column labels of the parsed DataFrame + Row to use for the column labels of the parsed DataFrame. Specify None if + there is no header row. skiprows : list-like or integer Row numbers to skip (0-indexed) or number of rows to skip (int) at the start of the file @@ -51,7 +52,8 @@ class DateConversionError(Exception): Column to use as the row labels of the DataFrame. If a sequence is given, a MultiIndex is used. names : array-like - List of column names + List of column names to use. If passed, header will be implicitly set to + None. na_values : list-like or dict, default None Additional strings to recognize as NA/NaN. If dict passed, specific per-column NA values @@ -156,7 +158,7 @@ def _read(cls, filepath_or_buffer, kwds): if skipfooter is not None: kwds['skip_footer'] = skipfooter - if isinstance(filepath_or_buffer, str) and _is_url(filepath_or_buffer): + if isinstance(filepath_or_buffer, basestring) and _is_url(filepath_or_buffer): from urllib2 import urlopen filepath_or_buffer = urlopen(filepath_or_buffer) if py3compat.PY3: # pragma: no cover @@ -613,7 +615,7 @@ def _infer_columns(self): ncols = len(line) if not names: - columns = ['X.%d' % (i + 1) for i in range(ncols)] + columns = ['X%d' % i for i in range(ncols)] else: columns = names @@ -747,7 +749,7 @@ def _explicit_index_names(self, columns): else: index_name = columns[self.index_col] - if index_name is not None and 'Unnamed' in index_name: + if index_name is not None and 'Unnamed' in str(index_name): index_name = None elif self.index_col is not None: @@ -833,6 +835,9 @@ def get_chunk(self, rows=None): alldata = self._rows_to_cols(content) data = self._exclude_implicit_index(alldata) + if self.parse_dates is not None: + data, columns = self._process_date_conversion(data) + # apply converters for col, f in self.converters.iteritems(): if isinstance(col, int) and col not in self.orig_columns: @@ -841,9 +846,6 @@ def get_chunk(self, rows=None): data = _convert_to_ndarrays(data, self.na_values, self.verbose) - if self.parse_dates is not None: - data, columns = self._process_date_conversion(data) - if self.index_col is None: numrows = len(content) index = Index(np.arange(numrows)) @@ -1160,19 +1162,9 @@ def _convert_types(values, na_values): return result, na_count -def _get_col_names(colspec, columns): - colset = set(columns) - colnames = [] - for c in colspec: - if c in colset: - colnames.append(str(c)) - elif isinstance(c, int): - colnames.append(str(columns[c])) - return colnames - def _try_convert_dates(parser, colspec, data_dict, columns): colspec = _get_col_names(colspec, columns) - new_name = '_'.join(colspec) + new_name = '_'.join([str(x) for x in colspec]) to_parse = [data_dict[c] for c in colspec if c in data_dict] try: @@ -1181,6 +1173,17 @@ def _try_convert_dates(parser, colspec, data_dict, columns): new_col = parser(_concat_date_cols(to_parse)) return new_name, new_col, colspec +def _get_col_names(colspec, columns): + colset = set(columns) + colnames = [] + for c in colspec: + if c in colset: + colnames.append(c) + elif isinstance(c, int): + colnames.append(columns[c]) + return colnames + + def _concat_date_cols(date_cols): if len(date_cols) == 1: return np.array([str(x) for x in date_cols[0]], dtype=object) diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py index 8df1270679364..ebd1b9489c6ea 100644 --- a/pandas/io/tests/test_parsers.py +++ b/pandas/io/tests/test_parsers.py @@ -11,7 +11,7 @@ from numpy import nan import numpy as np -from pandas import DataFrame, Series, Index, isnull, MultiIndex +from pandas import DataFrame, Series, Index, isnull, MultiIndex, DatetimeIndex import pandas.io.parsers as parsers from pandas.io.parsers import (read_csv, read_table, read_fwf, ExcelFile, TextParser) @@ -108,7 +108,12 @@ def test_empty_string(self): def test_read_csv(self): - pass + if not py3compat.PY3: + fname=u"file:///"+unicode(self.csv1) + try: + df1 = read_csv(fname, index_col=0, parse_dates=True) + except IOError: + assert(False), "read_csv should accept unicode objects as urls" def test_dialect(self): data = """\ @@ -210,10 +215,10 @@ def func(*date_cols): 'actual' : [1,3]}) self.assert_('nominal' in df) self.assert_('actual' in df) - self.assert_('X.2' not in df) - self.assert_('X.3' not in df) - self.assert_('X.4' not in df) - from datetime import datetime + self.assert_('X1' not in df) + self.assert_('X2' not in df) + self.assert_('X3' not in df) + d = datetime(1999, 1, 27, 19, 0) self.assert_(df.ix[0, 'nominal'] == d) @@ -224,9 +229,10 @@ def func(*date_cols): keep_date_col=True) self.assert_('nominal' in df) self.assert_('actual' in df) - self.assert_('X.2' in df) - self.assert_('X.3' in df) - self.assert_('X.4' in df) + + self.assert_('X1' in df) + self.assert_('X2' in df) + self.assert_('X3' in df) data = """\ KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000 @@ -238,22 +244,24 @@ def func(*date_cols): """ df = read_csv(StringIO(data), header=None, parse_dates=[[1, 2], [1,3]]) - self.assert_('X.2_X.3' in df) - self.assert_('X.2_X.4' in df) - self.assert_('X.2' not in df) - self.assert_('X.3' not in df) - self.assert_('X.4' not in df) - from datetime import datetime + + self.assert_('X1_X2' in df) + self.assert_('X1_X3' in df) + self.assert_('X1' not in df) + self.assert_('X2' not in df) + self.assert_('X3' not in df) + d = datetime(1999, 1, 27, 19, 0) - self.assert_(df.ix[0, 'X.2_X.3'] == d) + self.assert_(df.ix[0, 'X1_X2'] == d) df = read_csv(StringIO(data), header=None, parse_dates=[[1, 2], [1,3]], keep_date_col=True) - self.assert_('X.2_X.3' in df) - self.assert_('X.2_X.4' in df) - self.assert_('X.2' in df) - self.assert_('X.3' in df) - self.assert_('X.4' in df) + + self.assert_('X1_X2' in df) + self.assert_('X1_X3' in df) + self.assert_('X1' in df) + self.assert_('X2' in df) + self.assert_('X3' in df) data = '''\ KORD,19990127 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000 @@ -264,7 +272,6 @@ def func(*date_cols): ''' df = read_csv(StringIO(data), sep=',', header=None, parse_dates=[1], index_col=1) - from datetime import datetime d = datetime(1999, 1, 27, 19, 0) self.assert_(df.index[0] == d) @@ -556,7 +563,7 @@ def test_skiprows_bug(self): index_col=0, parse_dates=True) expected = DataFrame(np.arange(1., 10.).reshape((3,3)), - columns=['X.2', 'X.3', 'X.4'], + columns=['X1', 'X2', 'X3'], index=[datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)]) assert_frame_equal(data, expected) @@ -660,6 +667,20 @@ def test_parse_dates_string(self): 'C': [2, 4, 5]}, idx) assert_frame_equal(rs, xp) + def test_yy_format(self): + data = """date,time,B,C +090131,0010,1,2 +090228,1020,3,4 +090331,0830,5,6 +""" + rs = read_csv(StringIO(data), index_col=0, + parse_dates=[['date', 'time']]) + idx = DatetimeIndex([datetime(2009,1,31,0,10,0), + datetime(2009,2,28,10,20,0), + datetime(2009,3,31,8,30,0)]).asobject + idx.name = 'date' + xp = DataFrame({'B': [1, 3, 5], 'C': [2, 4, 6]}, idx) + assert_frame_equal(rs, xp) def test_parse_dates_column_list(self): from pandas.core.datetools import to_datetime @@ -701,7 +722,7 @@ def test_no_header(self): assert_almost_equal(df.values, expected) assert_almost_equal(df.values, df2.values) self.assert_(np.array_equal(df.columns, - ['X.1', 'X.2', 'X.3', 'X.4', 'X.5'])) + ['X0', 'X1', 'X2', 'X3', 'X4'])) self.assert_(np.array_equal(df2.columns, names)) def test_header_with_index_col(self): @@ -843,7 +864,7 @@ def test_parse_cols_list(self): def test_read_table_unicode(self): fin = StringIO('\u0141aski, Jan;1') df1 = read_table(fin, sep=";", encoding="utf-8", header=None) - self.assert_(isinstance(df1['X.1'].values[0], unicode)) + self.assert_(isinstance(df1['X0'].values[0], unicode)) def test_read_table_wrong_num_columns(self): data = """A,B,C,D,E,F @@ -1281,8 +1302,8 @@ def test_read_csv_parse_simple_list(self): foo bar""" df = read_csv(StringIO(text), header=None) - expected = DataFrame({'X.1' : ['foo', 'bar baz', 'qux foo', - 'foo', 'bar']}) + expected = DataFrame({'X0' : ['foo', 'bar baz', 'qux foo', + 'foo', 'bar']}) assert_frame_equal(df, expected) def test_parse_dates_custom_euroformat(self): diff --git a/pandas/src/datetime.pyx b/pandas/src/datetime.pyx index dc733a8306370..33e20d44e077d 100644 --- a/pandas/src/datetime.pyx +++ b/pandas/src/datetime.pyx @@ -178,7 +178,7 @@ class Timestamp(_Timestamp): @property def dayofyear(self): - return self.day + return self._get_field('doy') @property def week(self): diff --git a/pandas/src/tseries.pyx b/pandas/src/tseries.pyx index 2a4302f906220..54641a78a08d9 100644 --- a/pandas/src/tseries.pyx +++ b/pandas/src/tseries.pyx @@ -692,6 +692,17 @@ def value_count_int64(ndarray[int64_t] values): return result_keys, result_counts +def astype_intsafe(ndarray[object] arr, new_dtype): + cdef: + Py_ssize_t i, n = len(arr) + ndarray result + + result = np.empty(n, dtype=new_dtype) + for i in range(n): + util.set_value_at(result, i, arr[i]) + + return result + include "hashtable.pyx" include "datetime.pyx" include "skiplist.pyx" diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index cb03d7ba6d4e6..e2b0b918f0142 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -1,3 +1,4 @@ +from __future__ import with_statement from datetime import datetime import sys @@ -309,25 +310,19 @@ def test_2d_float32(self): tm.assert_almost_equal(result, expected) def test_console_encode(self): - import sys - - if py3compat.PY3 or sys.stdin.encoding is None: + """ + On Python 2, if sys.stdin.encoding is None (IPython with zmq frontend) + common.console_encode should encode things as utf-8. + """ + if py3compat.PY3: raise nose.SkipTest - # stub test - # need to mock-out sys.stdin.encoding=None for real test - result = com.console_encode(u"\u05d0") - try: - expected = u"\u05d0".encode(sys.stdin.encoding) - - # lot of console encodings, ISO-8869-1, cp850, etc. won't encode - # this character + with tm.stdin_encoding(encoding=None): + result = com.console_encode(u"\u05d0") + expected = u"\u05d0".encode('utf-8') self.assertEqual(result, expected) - except UnicodeEncodeError: - pass if __name__ == '__main__': nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], exit=False) - diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index e8d7639c090b9..9d45273e78b9b 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -237,7 +237,7 @@ def test_string_repr_encoding(self): filepath = os.path.join(pth, 'data', 'unicode_series.csv') df = pandas.read_csv(filepath, header=None) repr(df) - repr(df['X.2']) + repr(df['X1']) def test_repr_corner(self): # representing infs poses no problems @@ -246,7 +246,7 @@ def test_repr_corner(self): def test_frame_info_encoding(self): index = ['\'Til There Was You (1997)', - '\xc1 k\xf6ldum klaka (Cold Fever) (1994)'] + 'ldum klaka (Cold Fever) (1994)'] fmt.set_printoptions(max_rows=1) df = DataFrame(columns=['a', 'b', 'c'], index=index) repr(df) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 236549cf6071f..cf37de4294f3e 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -4169,6 +4169,28 @@ def test_drop_duplicates(self): expected = df2.drop_duplicates(['AAA', 'B'], take_last=True) assert_frame_equal(result, expected) + def test_drop_duplicates_tuple(self): + df = DataFrame({('AA', 'AB') : ['foo', 'bar', 'foo', 'bar', + 'foo', 'bar', 'bar', 'foo'], + 'B' : ['one', 'one', 'two', 'two', + 'two', 'two', 'one', 'two'], + 'C' : [1, 1, 2, 2, 2, 2, 1, 2], + 'D' : range(8)}) + + # single column + result = df.drop_duplicates(('AA', 'AB')) + expected = df[:2] + assert_frame_equal(result, expected) + + result = df.drop_duplicates(('AA', 'AB'), take_last=True) + expected = df.ix[[6, 7]] + assert_frame_equal(result, expected) + + # multi column + expected = df.ix[[0, 1, 2, 3]] + result = df.drop_duplicates((('AA', 'AB'), 'B')) + assert_frame_equal(result, expected) + def test_drop_duplicates_NA(self): # none df = DataFrame({'A' : [None, None, 'foo', 'bar', @@ -5808,6 +5830,27 @@ def test_update_raise(self): np.testing.assert_raises(Exception, df.update, *(other,), **{'raise_conflict' : True}) + def test_update_from_non_df(self): + d = {'a': Series([1, 2, 3, 4]), 'b': Series([5, 6, 7, 8])} + df = DataFrame(d) + + d['a'] = Series([5, 6, 7, 8]) + df.update(d) + + expected = DataFrame(d) + + assert_frame_equal(df, expected) + + d = {'a': [1, 2, 3, 4], 'b': [5, 6, 7, 8]} + df = DataFrame(d) + + d['a'] = [5, 6, 7, 8] + df.update(d) + + expected = DataFrame(d) + + assert_frame_equal(df, expected) + def test_combineAdd(self): # trivial comb = self.frame.combineAdd(self.frame) @@ -6654,6 +6697,50 @@ def test_reset_index_right_dtype(self): resetted = df.reset_index() self.assert_(resetted['time'].dtype == np.float64) + def test_reset_index_multiindex_col(self): + vals = np.random.randn(3, 3).astype(object) + idx = ['x', 'y', 'z'] + full = np.hstack(([[x] for x in idx], vals)) + df = DataFrame(vals, Index(idx, name='a'), + columns=[['b', 'b', 'c'], ['mean', 'median', 'mean']]) + rs = df.reset_index() + xp = DataFrame(full, columns=[['a', 'b', 'b', 'c'], + ['', 'mean', 'median', 'mean']]) + assert_frame_equal(rs, xp) + + rs = df.reset_index(col_fill=None) + xp = DataFrame(full, columns=[['a', 'b', 'b', 'c'], + ['a', 'mean', 'median', 'mean']]) + assert_frame_equal(rs, xp) + + rs = df.reset_index(col_level=1, col_fill='blah') + xp = DataFrame(full, columns=[['blah', 'b', 'b', 'c'], + ['a', 'mean', 'median', 'mean']]) + assert_frame_equal(rs, xp) + + df = DataFrame(vals, + MultiIndex.from_arrays([[0, 1, 2], ['x', 'y', 'z']], + names=['d', 'a']), + columns=[['b', 'b', 'c'], ['mean', 'median', 'mean']]) + rs = df.reset_index('a', ) + xp = DataFrame(full, Index([0, 1, 2], name='d'), + columns=[['a', 'b', 'b', 'c'], + ['', 'mean', 'median', 'mean']]) + assert_frame_equal(rs, xp) + + rs = df.reset_index('a', col_fill=None) + xp = DataFrame(full, Index(range(3), name='d'), + columns=[['a', 'b', 'b', 'c'], + ['a', 'mean', 'median', 'mean']]) + assert_frame_equal(rs, xp) + + rs = df.reset_index('a', col_fill='blah', col_level=1) + xp = DataFrame(full, Index(range(3), name='d'), + columns=[['blah', 'b', 'b', 'c'], + ['a', 'mean', 'median', 'mean']]) + assert_frame_equal(rs, xp) + + #---------------------------------------------------------------------- # Tests to cope with refactored internals diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 4d10a22aa8615..3a2005fff0ba5 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -27,6 +27,7 @@ class TestIndex(unittest.TestCase): def setUp(self): + self.unicodeIndex = tm.makeUnicodeIndex(100) self.strIndex = tm.makeStringIndex(100) self.dateIndex = tm.makeDateIndex(100) self.intIndex = tm.makeIntIndex(100) @@ -374,6 +375,7 @@ def test_take(self): def _check_method_works(self, method): method(self.empty) method(self.dateIndex) + method(self.unicodeIndex) method(self.strIndex) method(self.intIndex) method(self.tuples) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 2c21c663b330f..36e667322fa9d 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1187,6 +1187,122 @@ def test_dropna(self): exp = p.ix[['a', 'c', 'd']] assert_panel_equal(result, exp) + def test_update(self): + pan = Panel([[[1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]], + [[1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]]) + + other = Panel([[[3.6, 2., np.nan], + [np.nan, np.nan, 7]]], items=[1]) + + pan.update(other) + + expected = Panel([[[1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]], + [[3.6, 2., 3], + [1.5, np.nan, 7], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]]) + + assert_panel_equal(pan, expected) + + def test_update_from_dict(self): + pan = Panel({'one': DataFrame([[1.5, np.nan, 3], + [1.5, np.nan, 3], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]), + 'two': DataFrame([[1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]])}) + + other = {'two': DataFrame([[3.6, 2., np.nan], + [np.nan, np.nan, 7]])} + + pan.update(other) + + expected = Panel({'two': DataFrame([[3.6, 2., 3], + [1.5, np.nan, 7], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]), + 'one': DataFrame([[1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]])}) + + assert_panel_equal(pan, expected) + + def test_update_nooverwrite(self): + pan = Panel([[[1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]], + [[1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]]) + + other = Panel([[[3.6, 2., np.nan], + [np.nan, np.nan, 7]]], items=[1]) + + pan.update(other, overwrite=False) + + expected = Panel([[[1.5, np.nan, 3], + [1.5, np.nan, 3], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]], + [[1.5, 2., 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]]) + + assert_panel_equal(pan, expected) + + def test_update_filtered(self): + pan = Panel([[[1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]], + [[1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]]) + + other = Panel([[[3.6, 2., np.nan], + [np.nan, np.nan, 7]]], items=[1]) + + pan.update(other, filter_func=lambda x: x > 2) + + expected = Panel([[[1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]], + [[1.5, np.nan, 3], + [1.5, np.nan, 7], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]]) + + assert_panel_equal(pan, expected) + + def test_update_raise(self): + pan = Panel([[[1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]], + [[1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]]) + + np.testing.assert_raises(Exception, pan.update, *(pan,), + **{'raise_conflict': True}) class TestLongPanel(unittest.TestCase): """ diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 9474f6061f936..3a28401fb4f15 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -2341,6 +2341,17 @@ def test_astype_cast_nan_int(self): df = Series([1.0, 2.0, 3.0, np.nan]) self.assertRaises(ValueError, df.astype, np.int64) + def test_astype_cast_object_int(self): + arr = Series(["car", "house", "tree","1"]) + + self.assertRaises(ValueError, arr.astype, int) + self.assertRaises(ValueError, arr.astype, np.int64) + self.assertRaises(ValueError, arr.astype, np.int8) + + arr = Series(['1', '2', '3', '4'], dtype=object) + result = arr.astype(int) + self.assert_(np.array_equal(result, np.arange(1, 5))) + def test_map(self): index, data = tm.getMixedTypeDict() diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index d64d79913b10f..4a50016c39927 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -142,7 +142,6 @@ def _merger(x, y): -# TODO: NA group handling # TODO: transformations?? # TODO: only copy DataFrames when modification necessary @@ -572,7 +571,16 @@ def _factorize_keys(lk, rk, sort=True): if sort: llab, rlab = _sort_labels(rizer.uniques, llab, rlab) - # TODO: na handling + # NA group + lmask = llab == -1; lany = lmask.any() + rmask = rlab == -1; rany = rmask.any() + + if lany or rany: + if lany: + np.putmask(llab, lmask, count) + if rany: + np.putmask(rlab, rmask, count) + count += 1 return llab, rlab, count diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py index 829471deb9e6d..d7ad584e8f62f 100644 --- a/pandas/tools/tests/test_merge.py +++ b/pandas/tools/tests/test_merge.py @@ -777,6 +777,34 @@ def test_left_merge_na_buglet(self): expected = left.join(rdf) tm.assert_frame_equal(merged, expected) + def test_merge_na_keys(self): + data = [[1950, "A", 1.5], + [1950, "B", 1.5], + [1955, "B", 1.5], + [1960, "B", np.nan], + [1970, "B", 4.], + [1950, "C", 4.], + [1960, "C", np.nan], + [1965, "C", 3.], + [1970, "C", 4.]] + + frame = DataFrame(data, columns=["year", "panel", "data"]) + + other_data = [[1960, 'A', np.nan], + [1970, 'A', np.nan], + [1955, 'A', np.nan], + [1965, 'A', np.nan], + [1965, 'B', np.nan], + [1955, 'C', np.nan]] + other = DataFrame(other_data, columns=['year', 'panel', 'data']) + + result = frame.merge(other, how='outer') + + expected = frame.fillna(-999).merge(other.fillna(-999), how='outer') + expected = expected.replace(-999, np.nan) + + tm.assert_frame_equal(result, expected) + def _check_join(left, right, result, join_col, how='left', lsuffix='_x', rsuffix='_y'): diff --git a/pandas/tseries/converter.py b/pandas/tseries/converter.py index fa046776285b9..3364dae48a3aa 100644 --- a/pandas/tseries/converter.py +++ b/pandas/tseries/converter.py @@ -33,7 +33,7 @@ def _to_ordinalf(tm): return tot_sec def time2num(d): - if isinstance(d, str): + if isinstance(d, basestring): parsed = tools.to_datetime(d) if not isinstance(parsed, datetime): raise ValueError('Could not parse time %s' % d) @@ -150,11 +150,14 @@ def try_parse(values): return dates.date2num(values) elif (com.is_integer(values) or com.is_float(values)): return values - elif isinstance(values, str): + elif isinstance(values, basestring): return try_parse(values) elif isinstance(values, (list, tuple, np.ndarray)): if not isinstance(values, np.ndarray): - values = np.array(values, dtype='O') + values = np._asarray_tuplesafe(values) + + if com.is_integer_dtype(values) or com.is_float_dtype(values): + return values try: values = tools.to_datetime(values) diff --git a/pandas/tseries/tests/test_converter.py b/pandas/tseries/tests/test_converter.py new file mode 100644 index 0000000000000..d661f0671b120 --- /dev/null +++ b/pandas/tseries/tests/test_converter.py @@ -0,0 +1,31 @@ +from datetime import datetime, time, timedelta +import sys +import os +import unittest + +import nose + +import numpy as np + +try: + import pandas.tseries.converter as converter +except ImportError: + raise nose.SkipTest + +def test_timtetonum_accepts_unicode(): + assert(converter.time2num("00:01")==converter.time2num(u"00:01")) + +class TestDateTimeConverter(unittest.TestCase): + + def setUp(self): + self.dtc = converter.DatetimeConverter() + + def test_convert_accepts_unicode(self): + r1 = self.dtc.convert("12:22",None,None) + r2 = self.dtc.convert(u"12:22",None,None) + assert(r1==r2), "DatetimeConverter.convert should accept unicode" + +if __name__ == '__main__': + import nose + nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], + exit=False) diff --git a/pandas/tseries/tests/test_plotting.py b/pandas/tseries/tests/test_plotting.py index d710c09dda261..8d2cd17bd55d5 100644 --- a/pandas/tseries/tests/test_plotting.py +++ b/pandas/tseries/tests/test_plotting.py @@ -854,6 +854,28 @@ def test_ax_plot(self): lines = ax.plot(x, y, label='Y') assert_array_equal(DatetimeIndex(lines[0].get_xdata()), x) + @slow + def test_mpl_nopandas(self): + import matplotlib.pyplot as plt + + dates = [date(2008, 12, 31), date(2009, 1, 31)] + values1 = np.arange(10.0, 11.0, 0.5) + values2 = np.arange(11.0, 12.0, 0.5) + + kw = dict(fmt='-', lw=4) + + plt.close('all') + fig = plt.figure() + ax = fig.add_subplot(111) + ax.plot_date([x.toordinal() for x in dates], values1, **kw) + ax.plot_date([x.toordinal() for x in dates], values2, **kw) + + line1, line2 = ax.get_lines() + assert_array_equal(np.array([x.toordinal() for x in dates]), + line1.get_xydata()[:, 0]) + assert_array_equal(np.array([x.toordinal() for x in dates]), + line2.get_xydata()[:, 0]) + PNG_PATH = 'tmp.png' def _check_plot_works(f, freq=None, series=None, *args, **kwargs): import matplotlib.pyplot as plt diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 29a128e63a785..e116822b75ad3 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -23,6 +23,8 @@ from pandas.util.testing import assert_series_equal, assert_almost_equal import pandas.util.testing as tm +from pandas.util.py3compat import StringIO + from pandas.lib import NaT, iNaT import pandas.lib as lib import cPickle as pickle @@ -967,16 +969,16 @@ def test_frame_to_period(self): def test_timestamp_fields(self): # extra fields from DatetimeIndex like quarter and week from pandas.lib import Timestamp - idx = tm.makeDateIndex(10) + idx = tm.makeDateIndex(100) fields = ['dayofweek', 'dayofyear', 'week', 'weekofyear', 'quarter'] for f in fields: - expected = getattr(idx, f)[0] - result = getattr(Timestamp(idx[0]), f) + expected = getattr(idx, f)[-1] + result = getattr(Timestamp(idx[-1]), f) self.assertEqual(result, expected) - self.assertEqual(idx.freq, Timestamp(idx[0], idx.freq).freq) - self.assertEqual(idx.freqstr, Timestamp(idx[0], idx.freq).freqstr) + self.assertEqual(idx.freq, Timestamp(idx[-1], idx.freq).freq) + self.assertEqual(idx.freqstr, Timestamp(idx[-1], idx.freq).freqstr) def test_timestamp_date_out_of_range(self): self.assertRaises(ValueError, Timestamp, '1676-01-01') @@ -1185,6 +1187,14 @@ def test_to_html_timestamp(self): result = df.to_html() self.assert_('2000-01-01' in result) + def test_to_csv_numpy_16_bug(self): + frame = DataFrame({'a': date_range('1/1/2000', periods=10)}) + + buf = StringIO() + frame.to_csv(buf) + + result = buf.getvalue() + self.assert_('2000-01-01' in result) def _simple_ts(start, end, freq='D'): rng = date_range(start, end, freq=freq) diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index 36bfca3ec2a7c..36a9f32bd04c4 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -36,7 +36,7 @@ def _infer(a, b): def _maybe_get_tz(tz): - if isinstance(tz, (str, unicode)): + if isinstance(tz, basestring): import pytz tz = pytz.timezone(tz) if com.is_integer(tz): diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 01117f3e9b4c0..904426731738a 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -7,6 +7,8 @@ import string import sys +from contextlib import contextmanager # contextlib is available since 2.5 + from distutils.version import LooseVersion from numpy.random import randn @@ -23,7 +25,6 @@ from pandas.tseries.period import PeriodIndex from pandas.tseries.interval import IntervalIndex - Index = index.Index Series = series.Series DataFrame = frame.DataFrame @@ -36,6 +37,10 @@ def rands(n): choices = string.ascii_letters + string.digits return ''.join([random.choice(choices) for _ in xrange(n)]) +def randu(n): + choices = u"".join(map(unichr,range(1488,1488+26))) + string.digits + return ''.join([random.choice(choices) for _ in xrange(n)]) + #------------------------------------------------------------------------------- # Console debugging tools @@ -50,6 +55,10 @@ def debug(f, *args, **kwargs): pdb = Pdb(**kw) return pdb.runcall(f, *args, **kwargs) +def pudebug(f, *args, **kwargs): + import pudb + return pudb.runcall(f, *args, **kwargs) + def set_trace(): from IPython.core.debugger import Pdb try: @@ -183,6 +192,9 @@ def getCols(k): def makeStringIndex(k): return Index([rands(10) for _ in xrange(k)]) +def makeUnicodeIndex(k): + return Index([randu(10) for _ in xrange(k)]) + def makeIntIndex(k): return Index(range(k)) @@ -378,3 +390,52 @@ def test_network(self): t.network = True return t + + +class SimpleMock(object): + """ + Poor man's mocking object + + Note: only works for new-style classes, assumes __getattribute__ exists. + + >>> a = type("Duck",(),{}) + >>> a.attr1,a.attr2 ="fizz","buzz" + >>> b = SimpleMock(a,"attr1","bar") + >>> b.attr1 == "bar" and b.attr2 == "buzz" + True + >>> a.attr1 == "fizz" and a.attr2 == "buzz" + True + """ + def __init__(self, obj, *args, **kwds): + assert(len(args) % 2 == 0) + attrs = kwds.get("attrs", {}) + for k, v in zip(args[::2], args[1::2]): + # dict comprehensions break 2.6 + attrs[k]=v + self.attrs = attrs + self.obj = obj + + def __getattribute__(self,name): + attrs = object.__getattribute__(self, "attrs") + obj = object.__getattribute__(self, "obj") + return attrs.get(name, type(obj).__getattribute__(obj,name)) + +@contextmanager +def stdin_encoding(encoding=None): + """ + Context manager for running bits of code while emulating an arbitrary + stdin encoding. + + >>> import sys + >>> _encoding = sys.stdin.encoding + >>> with stdin_encoding('AES'): sys.stdin.encoding + 'AES' + >>> sys.stdin.encoding==_encoding + True + + """ + import sys + _stdin = sys.stdin + sys.stdin = SimpleMock(sys.stdin, "encoding", encoding) + yield + sys.stdin = _stdin diff --git a/setup.py b/setup.py index 999143ff2c3e7..d39fcd73ad17c 100755 --- a/setup.py +++ b/setup.py @@ -185,7 +185,7 @@ MICRO = 0 ISRELEASED = True VERSION = '%d.%d.%d' % (MAJOR, MINOR, MICRO) -QUALIFIER = 'rc2' +QUALIFIER = '' FULLVERSION = VERSION if not ISRELEASED: @@ -372,15 +372,14 @@ def srcpath(name=None, suffix='.pyx', subdir='src'): algos_ext = Extension('pandas._algos', sources=[srcpath('generated', suffix=suffix)], - include_dirs=[np.get_include()], - ) + include_dirs=[np.get_include()]) lib_depends = tseries_depends + ['pandas/src/numpy_helper.h', 'pandas/src/datetime/np_datetime.h', 'pandas/src/datetime/np_datetime_strings.h'] # some linux distros require it -libraries = ['m'] if 'win' not in sys.platform else [] +libraries = ['m'] if 'win32' not in sys.platform else [] lib_ext = Extension('pandas.lib', depends=lib_depends, @@ -393,6 +392,11 @@ def srcpath(name=None, suffix='.pyx', subdir='src'): # extra_compile_args=['-Wconversion'] ) +sparse_ext = Extension('pandas._sparse', + sources=[srcpath('sparse', suffix=suffix)], + include_dirs=[np.get_include()], + libraries=libraries) + period_ext = Extension('pandas._period', depends=plib_depends + ['pandas/src/numpy_helper.h', 'pandas/src/period.h'], @@ -402,10 +406,6 @@ def srcpath(name=None, suffix='.pyx', subdir='src'): include_dirs=[np.get_include()]) -sparse_ext = Extension('pandas._sparse', - sources=[srcpath('sparse', suffix=suffix)], - include_dirs=[np.get_include()]) - sandbox_ext = Extension('pandas._sandbox', sources=[srcpath('sandbox', suffix=suffix)], include_dirs=[np.get_include()]) diff --git a/tox.ini b/tox.ini index 2f5f998b5aeff..9baf33cf8d2f9 100644 --- a/tox.ini +++ b/tox.ini @@ -10,7 +10,7 @@ envlist = py25, py26, py27, py31, py32 commands = {envpython} setup.py clean build_ext install {envbindir}/nosetests tests - rm -rf {toxinidir}/build {toxinidir}/tests + /bin/rm -rf {toxinidir}/build {toxinidir}/tests deps = cython numpy >= 1.6.1