Skip to content

Commit

Permalink
merge with master yet again
Browse files Browse the repository at this point in the history
  • Loading branch information
Dr-Irv committed Dec 3, 2018
2 parents 0e60770 + f06b969 commit 0835997
Show file tree
Hide file tree
Showing 295 changed files with 13,297 additions and 10,440 deletions.
4 changes: 2 additions & 2 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,5 @@ jobs:
command: |
export PATH="$MINICONDA_DIR/bin:$PATH"
source activate pandas-dev
echo "pytest --strict --durations=10 --color=no --junitxml=$CIRCLE_TEST_REPORTS/reports/junit.xml --skip-slow --skip-network pandas"
pytest --strict --durations=10 --color=no --junitxml=$CIRCLE_TEST_REPORTS/reports/junit.xml --skip-slow --skip-network pandas
echo "pytest -m "not slow and not network" --strict --durations=10 --color=no --junitxml=$CIRCLE_TEST_REPORTS/reports/junit.xml pandas"
pytest -m "not slow and not network" --strict --durations=10 --color=no --junitxml=$CIRCLE_TEST_REPORTS/reports/junit.xml pandas
25 changes: 10 additions & 15 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,28 +34,28 @@ matrix:
include:
- dist: trusty
env:
- JOB="3.7" ENV_FILE="ci/deps/travis-37.yaml" TEST_ARGS="--skip-slow --skip-network"
- JOB="3.7" ENV_FILE="ci/deps/travis-37.yaml" PATTERN="not slow and not network"

- dist: trusty
env:
- JOB="2.7, locale, slow, old NumPy" ENV_FILE="ci/deps/travis-27-locale.yaml" LOCALE_OVERRIDE="zh_CN.UTF-8" SLOW=true
- JOB="2.7, locale, slow, old NumPy" ENV_FILE="ci/deps/travis-27-locale.yaml" LOCALE_OVERRIDE="zh_CN.UTF-8" PATTERN="slow"
addons:
apt:
packages:
- language-pack-zh-hans
- dist: trusty
env:
- JOB="2.7" ENV_FILE="ci/deps/travis-27.yaml" TEST_ARGS="--skip-slow"
- JOB="2.7" ENV_FILE="ci/deps/travis-27.yaml" PATTERN="not slow"
addons:
apt:
packages:
- python-gtk2
- dist: trusty
env:
- JOB="3.6, lint, coverage" ENV_FILE="ci/deps/travis-36.yaml" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" COVERAGE=true LINT=true
- JOB="3.6, coverage" ENV_FILE="ci/deps/travis-36.yaml" PATTERN="not slow and not network" PANDAS_TESTING_MODE="deprecate" COVERAGE=true
- dist: trusty
env:
- JOB="3.7, NumPy dev" ENV_FILE="ci/deps/travis-37-numpydev.yaml" TEST_ARGS="--skip-slow --skip-network -W error" PANDAS_TESTING_MODE="deprecate"
- JOB="3.7, NumPy dev" ENV_FILE="ci/deps/travis-37-numpydev.yaml" PATTERN="not slow and not network" TEST_ARGS="-W error" PANDAS_TESTING_MODE="deprecate"
addons:
apt:
packages:
Expand All @@ -64,7 +64,7 @@ matrix:
# In allow_failures
- dist: trusty
env:
- JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" SLOW=true
- JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow"

# In allow_failures
- dist: trusty
Expand All @@ -73,7 +73,7 @@ matrix:
allow_failures:
- dist: trusty
env:
- JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" SLOW=true
- JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow"
- dist: trusty
env:
- JOB="3.6, doc" ENV_FILE="ci/deps/travis-36-doc.yaml" DOC=true
Expand Down Expand Up @@ -107,20 +107,15 @@ script:
- echo "script start"
- source activate pandas-dev
- ci/run_build_docs.sh
- ci/script_single.sh
- ci/script_multi.sh
- ci/code_checks.sh

after_success:
- ci/upload_coverage.sh
- ci/run_tests.sh

after_script:
- echo "after_script start"
- source activate pandas-dev && pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd
- if [ -e test-data-single.xml ]; then
ci/print_skipped.py test-data-single.xml;
ci/print_skipped.py test-data-single.xml;
fi
- if [ -e test-data-multiple.xml ]; then
ci/print_skipped.py test-data-multiple.xml;
ci/print_skipped.py test-data-multiple.xml;
fi
- echo "after_script done"
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ pip install pandas
```

## Dependencies
- [NumPy](https://www.numpy.org): 1.9.0 or higher
- [NumPy](https://www.numpy.org): 1.12.0 or higher
- [python-dateutil](https://labix.org/python-dateutil): 2.5.0 or higher
- [pytz](https://pythonhosted.org/pytz): 2011k or higher

Expand Down
13 changes: 13 additions & 0 deletions asv_bench/benchmarks/binary_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ def setup(self):
np.iinfo(np.int16).max,
size=(N, N)))

self.s = Series(np.random.randn(N))

# Division

def time_frame_float_div(self):
Expand All @@ -74,6 +76,17 @@ def time_frame_int_mod(self):
def time_frame_float_mod(self):
self.df % self.df2

# Dot product

def time_frame_dot(self):
self.df.dot(self.df2)

def time_series_dot(self):
self.s.dot(self.s)

def time_frame_series_dot(self):
self.df.dot(self.s)


class Timeseries(object):

Expand Down
8 changes: 8 additions & 0 deletions asv_bench/benchmarks/categoricals.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ def setup(self):
self.values_some_nan = list(np.tile(self.categories + [np.nan], N))
self.values_all_nan = [np.nan] * len(self.values)
self.values_all_int8 = np.ones(N, 'int8')
self.categorical = pd.Categorical(self.values, self.categories)
self.series = pd.Series(self.categorical)

def time_regular(self):
pd.Categorical(self.values, self.categories)
Expand All @@ -68,6 +70,12 @@ def time_all_nan(self):
def time_from_codes_all_int8(self):
pd.Categorical.from_codes(self.values_all_int8, self.categories)

def time_existing_categorical(self):
pd.Categorical(self.categorical)

def time_existing_series(self):
pd.Categorical(self.series)


class ValueCounts(object):

Expand Down
30 changes: 30 additions & 0 deletions asv_bench/benchmarks/frame_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,36 @@ def time_reindex_upcast(self):
self.df2.reindex(np.random.permutation(range(1200)))


class Rename(object):

def setup(self):
N = 10**3
self.df = DataFrame(np.random.randn(N * 10, N))
self.idx = np.arange(4 * N, 7 * N)
self.dict_idx = {k: k for k in self.idx}
self.df2 = DataFrame(
{c: {0: np.random.randint(0, 2, N).astype(np.bool_),
1: np.random.randint(0, N, N).astype(np.int16),
2: np.random.randint(0, N, N).astype(np.int32),
3: np.random.randint(0, N, N).astype(np.int64)}
[np.random.randint(0, 4)] for c in range(N)})

def time_rename_single(self):
self.df.rename({0: 0})

def time_rename_axis0(self):
self.df.rename(self.dict_idx)

def time_rename_axis1(self):
self.df.rename(columns=self.dict_idx)

def time_rename_both_axes(self):
self.df.rename(index=self.dict_idx, columns=self.dict_idx)

def time_dict_rename_both_axes(self):
self.df.rename(index=self.dict_idx, columns=self.dict_idx)


class Iteration(object):

def setup(self):
Expand Down
53 changes: 42 additions & 11 deletions asv_bench/benchmarks/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,48 @@
matplotlib.use('Agg')


class Plotting(object):

def setup(self):
self.s = Series(np.random.randn(1000000))
self.df = DataFrame({'col': self.s})

def time_series_plot(self):
self.s.plot()

def time_frame_plot(self):
self.df.plot()
class SeriesPlotting(object):
params = [['line', 'bar', 'area', 'barh', 'hist', 'kde', 'pie']]
param_names = ['kind']

def setup(self, kind):
if kind in ['bar', 'barh', 'pie']:
n = 100
elif kind in ['kde']:
n = 10000
else:
n = 1000000

self.s = Series(np.random.randn(n))
if kind in ['area', 'pie']:
self.s = self.s.abs()

def time_series_plot(self, kind):
self.s.plot(kind=kind)


class FramePlotting(object):
params = [['line', 'bar', 'area', 'barh', 'hist', 'kde', 'pie', 'scatter',
'hexbin']]
param_names = ['kind']

def setup(self, kind):
if kind in ['bar', 'barh', 'pie']:
n = 100
elif kind in ['kde', 'scatter', 'hexbin']:
n = 10000
else:
n = 1000000

self.x = Series(np.random.randn(n))
self.y = Series(np.random.randn(n))
if kind in ['area', 'pie']:
self.x = self.x.abs()
self.y = self.y.abs()
self.df = DataFrame({'x': self.x, 'y': self.y})

def time_frame_plot(self, kind):
self.df.plot(x='x', y='y', kind=kind)


class TimeseriesPlotting(object):
Expand Down
38 changes: 38 additions & 0 deletions asv_bench/benchmarks/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,4 +146,42 @@ def time_get_dummies_1d_sparse(self):
pd.get_dummies(self.s, sparse=True)


class Cut(object):
params = [[4, 10, 1000]]
param_names = ['bins']

def setup(self, bins):
N = 10**5
self.int_series = pd.Series(np.arange(N).repeat(5))
self.float_series = pd.Series(np.random.randn(N).repeat(5))
self.timedelta_series = pd.Series(np.random.randint(N, size=N),
dtype='timedelta64[ns]')
self.datetime_series = pd.Series(np.random.randint(N, size=N),
dtype='datetime64[ns]')

def time_cut_int(self, bins):
pd.cut(self.int_series, bins)

def time_cut_float(self, bins):
pd.cut(self.float_series, bins)

def time_cut_timedelta(self, bins):
pd.cut(self.timedelta_series, bins)

def time_cut_datetime(self, bins):
pd.cut(self.datetime_series, bins)

def time_qcut_int(self, bins):
pd.qcut(self.int_series, bins)

def time_qcut_float(self, bins):
pd.qcut(self.float_series, bins)

def time_qcut_timedelta(self, bins):
pd.qcut(self.timedelta_series, bins)

def time_qcut_datetime(self, bins):
pd.qcut(self.datetime_series, bins)


from .pandas_vb_common import setup # noqa: F401
36 changes: 36 additions & 0 deletions asv_bench/benchmarks/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,42 @@ def time_rolling(self, constructor, window, dtype, method):
getattr(self.roll, method)()


class ExpandingMethods(object):

sample_time = 0.2
params = (['DataFrame', 'Series'],
['int', 'float'],
['median', 'mean', 'max', 'min', 'std', 'count', 'skew', 'kurt',
'sum'])
param_names = ['contructor', 'window', 'dtype', 'method']

def setup(self, constructor, dtype, method):
N = 10**5
arr = (100 * np.random.random(N)).astype(dtype)
self.expanding = getattr(pd, constructor)(arr).expanding()

def time_expanding(self, constructor, dtype, method):
getattr(self.expanding, method)()


class EWMMethods(object):

sample_time = 0.2
params = (['DataFrame', 'Series'],
[10, 1000],
['int', 'float'],
['mean', 'std'])
param_names = ['contructor', 'window', 'dtype', 'method']

def setup(self, constructor, window, dtype, method):
N = 10**5
arr = (100 * np.random.random(N)).astype(dtype)
self.ewm = getattr(pd, constructor)(arr).ewm(halflife=window)

def time_ewm(self, constructor, window, dtype, method):
getattr(self.ewm, method)()


class VariableWindowMethods(Methods):
sample_time = 0.2
params = (['DataFrame', 'Series'],
Expand Down
36 changes: 32 additions & 4 deletions asv_bench/benchmarks/stat_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,14 +96,42 @@ def time_average_old(self, constructor, pct):

class Correlation(object):

params = ['spearman', 'kendall', 'pearson']
param_names = ['method']
params = [['spearman', 'kendall', 'pearson'], [True, False]]
param_names = ['method', 'use_bottleneck']

def setup(self, method):
def setup(self, method, use_bottleneck):
try:
pd.options.compute.use_bottleneck = use_bottleneck
except TypeError:
from pandas.core import nanops
nanops._USE_BOTTLENECK = use_bottleneck
self.df = pd.DataFrame(np.random.randn(1000, 30))
self.s = pd.Series(np.random.randn(1000))
self.s2 = pd.Series(np.random.randn(1000))

def time_corr(self, method):
def time_corr(self, method, use_bottleneck):
self.df.corr(method=method)

def time_corr_series(self, method, use_bottleneck):
self.s.corr(self.s2, method=method)


class Covariance(object):

params = [[True, False]]
param_names = ['use_bottleneck']

def setup(self, use_bottleneck):
try:
pd.options.compute.use_bottleneck = use_bottleneck
except TypeError:
from pandas.core import nanops
nanops._USE_BOTTLENECK = use_bottleneck
self.s = pd.Series(np.random.randn(100000))
self.s2 = pd.Series(np.random.randn(100000))

def time_cov_series(self, use_bottleneck):
self.s.cov(self.s2)


from .pandas_vb_common import setup # noqa: F401
Loading

0 comments on commit 0835997

Please sign in to comment.