Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into ea-repr
Browse files Browse the repository at this point in the history
  • Loading branch information
TomAugspurger committed Nov 28, 2018
2 parents a35399e + db8d33e commit 740f9e5
Show file tree
Hide file tree
Showing 287 changed files with 16,063 additions and 14,141 deletions.
10 changes: 8 additions & 2 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,13 @@ jobs:
name: build
command: |
./ci/circle/install_circle.sh
./ci/circle/show_circle.sh
export PATH="$MINICONDA_DIR/bin:$PATH"
source activate pandas-dev
python -c "import pandas; pandas.show_versions();"
- run:
name: test
command: ./ci/circle/run_circle.sh --skip-slow --skip-network
command: |
export PATH="$MINICONDA_DIR/bin:$PATH"
source activate pandas-dev
echo "pytest --strict --durations=10 --color=no --junitxml=$CIRCLE_TEST_REPORTS/reports/junit.xml --skip-slow --skip-network pandas"
pytest --strict --durations=10 --color=no --junitxml=$CIRCLE_TEST_REPORTS/reports/junit.xml --skip-slow --skip-network pandas
3 changes: 2 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ before_script:

script:
- echo "script start"
- source activate pandas-dev
- ci/run_build_docs.sh
- ci/script_single.sh
- ci/script_multi.sh
Expand All @@ -115,7 +116,7 @@ after_success:

after_script:
- echo "after_script start"
- source activate pandas && pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd
- source activate pandas-dev && pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd
- if [ -e test-data-single.xml ]; then
ci/print_skipped.py test-data-single.xml;
fi
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ build: clean_pyc
python setup.py build_ext --inplace

lint-diff:
git diff master --name-only -- "*.py" | grep -E "pandas|scripts" | xargs flake8
git diff upstream/master --name-only -- "*.py" | xargs flake8

develop: build
-python setup.py develop
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ pip install pandas
```

## Dependencies
- [NumPy](https://www.numpy.org): 1.9.0 or higher
- [NumPy](https://www.numpy.org): 1.12.0 or higher
- [python-dateutil](https://labix.org/python-dateutil): 2.5.0 or higher
- [pytz](https://pythonhosted.org/pytz): 2011k or higher

Expand Down
13 changes: 13 additions & 0 deletions asv_bench/benchmarks/binary_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ def setup(self):
np.iinfo(np.int16).max,
size=(N, N)))

self.s = Series(np.random.randn(N))

# Division

def time_frame_float_div(self):
Expand All @@ -74,6 +76,17 @@ def time_frame_int_mod(self):
def time_frame_float_mod(self):
self.df % self.df2

# Dot product

def time_frame_dot(self):
self.df.dot(self.df2)

def time_series_dot(self):
self.s.dot(self.s)

def time_frame_series_dot(self):
self.df.dot(self.s)


class Timeseries(object):

Expand Down
30 changes: 30 additions & 0 deletions asv_bench/benchmarks/frame_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,36 @@ def time_reindex_upcast(self):
self.df2.reindex(np.random.permutation(range(1200)))


class Rename(object):

def setup(self):
N = 10**3
self.df = DataFrame(np.random.randn(N * 10, N))
self.idx = np.arange(4 * N, 7 * N)
self.dict_idx = {k: k for k in self.idx}
self.df2 = DataFrame(
{c: {0: np.random.randint(0, 2, N).astype(np.bool_),
1: np.random.randint(0, N, N).astype(np.int16),
2: np.random.randint(0, N, N).astype(np.int32),
3: np.random.randint(0, N, N).astype(np.int64)}
[np.random.randint(0, 4)] for c in range(N)})

def time_rename_single(self):
self.df.rename({0: 0})

def time_rename_axis0(self):
self.df.rename(self.dict_idx)

def time_rename_axis1(self):
self.df.rename(columns=self.dict_idx)

def time_rename_both_axes(self):
self.df.rename(index=self.dict_idx, columns=self.dict_idx)

def time_dict_rename_both_axes(self):
self.df.rename(index=self.dict_idx, columns=self.dict_idx)


class Iteration(object):

def setup(self):
Expand Down
53 changes: 42 additions & 11 deletions asv_bench/benchmarks/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,48 @@
matplotlib.use('Agg')


class Plotting(object):

def setup(self):
self.s = Series(np.random.randn(1000000))
self.df = DataFrame({'col': self.s})

def time_series_plot(self):
self.s.plot()

def time_frame_plot(self):
self.df.plot()
class SeriesPlotting(object):
params = [['line', 'bar', 'area', 'barh', 'hist', 'kde', 'pie']]
param_names = ['kind']

def setup(self, kind):
if kind in ['bar', 'barh', 'pie']:
n = 100
elif kind in ['kde']:
n = 10000
else:
n = 1000000

self.s = Series(np.random.randn(n))
if kind in ['area', 'pie']:
self.s = self.s.abs()

def time_series_plot(self, kind):
self.s.plot(kind=kind)


class FramePlotting(object):
params = [['line', 'bar', 'area', 'barh', 'hist', 'kde', 'pie', 'scatter',
'hexbin']]
param_names = ['kind']

def setup(self, kind):
if kind in ['bar', 'barh', 'pie']:
n = 100
elif kind in ['kde', 'scatter', 'hexbin']:
n = 10000
else:
n = 1000000

self.x = Series(np.random.randn(n))
self.y = Series(np.random.randn(n))
if kind in ['area', 'pie']:
self.x = self.x.abs()
self.y = self.y.abs()
self.df = DataFrame({'x': self.x, 'y': self.y})

def time_frame_plot(self, kind):
self.df.plot(x='x', y='y', kind=kind)


class TimeseriesPlotting(object):
Expand Down
38 changes: 38 additions & 0 deletions asv_bench/benchmarks/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,4 +146,42 @@ def time_get_dummies_1d_sparse(self):
pd.get_dummies(self.s, sparse=True)


class Cut(object):
params = [[4, 10, 1000]]
param_names = ['bins']

def setup(self, bins):
N = 10**5
self.int_series = pd.Series(np.arange(N).repeat(5))
self.float_series = pd.Series(np.random.randn(N).repeat(5))
self.timedelta_series = pd.Series(np.random.randint(N, size=N),
dtype='timedelta64[ns]')
self.datetime_series = pd.Series(np.random.randint(N, size=N),
dtype='datetime64[ns]')

def time_cut_int(self, bins):
pd.cut(self.int_series, bins)

def time_cut_float(self, bins):
pd.cut(self.float_series, bins)

def time_cut_timedelta(self, bins):
pd.cut(self.timedelta_series, bins)

def time_cut_datetime(self, bins):
pd.cut(self.datetime_series, bins)

def time_qcut_int(self, bins):
pd.qcut(self.int_series, bins)

def time_qcut_float(self, bins):
pd.qcut(self.float_series, bins)

def time_qcut_timedelta(self, bins):
pd.qcut(self.timedelta_series, bins)

def time_qcut_datetime(self, bins):
pd.qcut(self.datetime_series, bins)


from .pandas_vb_common import setup # noqa: F401
36 changes: 36 additions & 0 deletions asv_bench/benchmarks/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,42 @@ def time_rolling(self, constructor, window, dtype, method):
getattr(self.roll, method)()


class ExpandingMethods(object):

sample_time = 0.2
params = (['DataFrame', 'Series'],
['int', 'float'],
['median', 'mean', 'max', 'min', 'std', 'count', 'skew', 'kurt',
'sum'])
param_names = ['contructor', 'window', 'dtype', 'method']

def setup(self, constructor, dtype, method):
N = 10**5
arr = (100 * np.random.random(N)).astype(dtype)
self.expanding = getattr(pd, constructor)(arr).expanding()

def time_expanding(self, constructor, dtype, method):
getattr(self.expanding, method)()


class EWMMethods(object):

sample_time = 0.2
params = (['DataFrame', 'Series'],
[10, 1000],
['int', 'float'],
['mean', 'std'])
param_names = ['contructor', 'window', 'dtype', 'method']

def setup(self, constructor, window, dtype, method):
N = 10**5
arr = (100 * np.random.random(N)).astype(dtype)
self.ewm = getattr(pd, constructor)(arr).ewm(halflife=window)

def time_ewm(self, constructor, window, dtype, method):
getattr(self.ewm, method)()


class VariableWindowMethods(Methods):
sample_time = 0.2
params = (['DataFrame', 'Series'],
Expand Down
36 changes: 32 additions & 4 deletions asv_bench/benchmarks/stat_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,14 +96,42 @@ def time_average_old(self, constructor, pct):

class Correlation(object):

params = ['spearman', 'kendall', 'pearson']
param_names = ['method']
params = [['spearman', 'kendall', 'pearson'], [True, False]]
param_names = ['method', 'use_bottleneck']

def setup(self, method):
def setup(self, method, use_bottleneck):
try:
pd.options.compute.use_bottleneck = use_bottleneck
except TypeError:
from pandas.core import nanops
nanops._USE_BOTTLENECK = use_bottleneck
self.df = pd.DataFrame(np.random.randn(1000, 30))
self.s = pd.Series(np.random.randn(1000))
self.s2 = pd.Series(np.random.randn(1000))

def time_corr(self, method):
def time_corr(self, method, use_bottleneck):
self.df.corr(method=method)

def time_corr_series(self, method, use_bottleneck):
self.s.corr(self.s2, method=method)


class Covariance(object):

params = [[True, False]]
param_names = ['use_bottleneck']

def setup(self, use_bottleneck):
try:
pd.options.compute.use_bottleneck = use_bottleneck
except TypeError:
from pandas.core import nanops
nanops._USE_BOTTLENECK = use_bottleneck
self.s = pd.Series(np.random.randn(100000))
self.s2 = pd.Series(np.random.randn(100000))

def time_cov_series(self, use_bottleneck):
self.s.cov(self.s2)


from .pandas_vb_common import setup # noqa: F401
30 changes: 30 additions & 0 deletions asv_bench/benchmarks/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,21 +26,42 @@ def time_extract(self):
def time_findall(self):
self.s.str.findall('[A-Z]+')

def time_find(self):
self.s.str.find('[A-Z]+')

def time_rfind(self):
self.s.str.rfind('[A-Z]+')

def time_get(self):
self.s.str.get(0)

def time_len(self):
self.s.str.len()

def time_join(self):
self.s.str.join(' ')

def time_match(self):
self.s.str.match('A')

def time_normalize(self):
self.s.str.normalize('NFC')

def time_pad(self):
self.s.str.pad(100, side='both')

def time_partition(self):
self.s.str.partition('A')

def time_rpartition(self):
self.s.str.rpartition('A')

def time_replace(self):
self.s.str.replace('A', '\x01\x01')

def time_translate(self):
self.s.str.translate({'A': '\x01\x01'})

def time_slice(self):
self.s.str.slice(5, 15, 2)

Expand All @@ -65,6 +86,12 @@ def time_upper(self):
def time_lower(self):
self.s.str.lower()

def time_wrap(self):
self.s.str.wrap(10)

def time_zfill(self):
self.s.str.zfill(10)


class Repeat(object):

Expand Down Expand Up @@ -129,6 +156,9 @@ def setup(self, expand):
def time_split(self, expand):
self.s.str.split('--', expand=expand)

def time_rsplit(self, expand):
self.s.str.rsplit('--', expand=expand)


class Dummies(object):

Expand Down
Loading

0 comments on commit 740f9e5

Please sign in to comment.