Skip to content

Commit

Permalink
Delay import (#17710)
Browse files Browse the repository at this point in the history
closes #16764
  • Loading branch information
TomAugspurger authored and jreback committed Oct 2, 2017
1 parent a3d538a commit 2310faa
Show file tree
Hide file tree
Showing 21 changed files with 183 additions and 181 deletions.
2 changes: 2 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,8 @@ script:
- ci/script_single.sh
- ci/script_multi.sh
- ci/lint.sh
- echo "checking imports"
- source activate pandas && python ci/check_imports.py
- echo "script done"

after_success:
Expand Down
36 changes: 36 additions & 0 deletions ci/check_imports.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""
Check that certain modules are not loaded by `import pandas`
"""
import sys

blacklist = {
'bs4',
'html5lib',
'ipython',
'jinja2'
'lxml',
'matplotlib',
'numexpr',
'openpyxl',
'py',
'pytest',
's3fs',
'scipy',
'tables',
'xlrd',
'xlsxwriter',
'xlwt',
}


def main():
import pandas # noqa

modules = set(x.split('.')[0] for x in sys.modules)
imported = modules & blacklist
if modules & blacklist:
sys.exit("Imported {}".format(imported))


if __name__ == '__main__':
main()
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ Other Enhancements
- :func:`read_excel` raises ``ImportError`` with a better message if ``xlrd`` is not installed. (:issue:`17613`)
- :func:`read_json` now accepts a ``chunksize`` parameter that can be used when ``lines=True``. If ``chunksize`` is passed, read_json now returns an iterator which reads in ``chunksize`` lines with each iteration. (:issue:`17048`)
- :meth:`DataFrame.assign` will preserve the original order of ``**kwargs`` for Python 3.6+ users instead of sorting the column names
- Improved the import time of pandas by about 2.25x (:issue:`16764`)


.. _whatsnew_0210.api_breaking:
Expand Down Expand Up @@ -559,6 +560,8 @@ Other API Changes
- :func:`to_datetime` when passed a tz-aware ``origin=`` kwarg will now raise a more informative ``ValueError`` rather than a ``TypeError`` (:issue:`16842`)
- Renamed non-functional ``index`` to ``index_col`` in :func:`read_stata` to improve API consistency (:issue:`16342`)
- Bug in :func:`DataFrame.drop` caused boolean labels ``False`` and ``True`` to be treated as labels 0 and 1 respectively when dropping indices from a numeric index. This will now raise a ValueError (:issue:`16877`)
- Pandas no longer registers matplotlib converters on import. The converters
will be registered and used when the first plot is draw (:issue:`17710`)

.. _whatsnew_0210.deprecations:

Expand Down
23 changes: 0 additions & 23 deletions pandas/core/computation/__init__.py
Original file line number Diff line number Diff line change
@@ -1,23 +0,0 @@

import warnings
from distutils.version import LooseVersion

_NUMEXPR_INSTALLED = False
_MIN_NUMEXPR_VERSION = "2.4.6"

try:
import numexpr as ne
ver = ne.__version__
_NUMEXPR_INSTALLED = ver >= LooseVersion(_MIN_NUMEXPR_VERSION)

if not _NUMEXPR_INSTALLED:
warnings.warn(
"The installed version of numexpr {ver} is not supported "
"in pandas and will be not be used\nThe minimum supported "
"version is {min_ver}\n".format(
ver=ver, min_ver=_MIN_NUMEXPR_VERSION), UserWarning)

except ImportError: # pragma: no cover
pass

__all__ = ['_NUMEXPR_INSTALLED']
22 changes: 22 additions & 0 deletions pandas/core/computation/check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import warnings
from distutils.version import LooseVersion

_NUMEXPR_INSTALLED = False
_MIN_NUMEXPR_VERSION = "2.4.6"

try:
import numexpr as ne
ver = ne.__version__
_NUMEXPR_INSTALLED = ver >= LooseVersion(_MIN_NUMEXPR_VERSION)

if not _NUMEXPR_INSTALLED:
warnings.warn(
"The installed version of numexpr {ver} is not supported "
"in pandas and will be not be used\nThe minimum supported "
"version is {min_ver}\n".format(
ver=ver, min_ver=_MIN_NUMEXPR_VERSION), UserWarning)

except ImportError: # pragma: no cover
pass

__all__ = ['_NUMEXPR_INSTALLED']
8 changes: 6 additions & 2 deletions pandas/core/computation/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@

import tokenize
from pandas.io.formats.printing import pprint_thing
from pandas.core.computation import _NUMEXPR_INSTALLED
from pandas.core.computation.expr import Expr, _parsers, tokenize_string
from pandas.core.computation.scope import _ensure_scope
from pandas.compat import string_types
from pandas.core.computation.engines import _engines
Expand All @@ -32,6 +30,7 @@ def _check_engine(engine):
string engine
"""
from pandas.core.computation.check import _NUMEXPR_INSTALLED

if engine is None:
if _NUMEXPR_INSTALLED:
Expand Down Expand Up @@ -69,6 +68,8 @@ def _check_parser(parser):
KeyError
* If an invalid parser is passed
"""
from pandas.core.computation.expr import _parsers

if parser not in _parsers:
raise KeyError('Invalid parser {parser!r} passed, valid parsers are'
' {valid}'.format(parser=parser, valid=_parsers.keys()))
Expand Down Expand Up @@ -129,6 +130,8 @@ def _convert_expression(expr):


def _check_for_locals(expr, stack_level, parser):
from pandas.core.computation.expr import tokenize_string

at_top_of_stack = stack_level == 0
not_pandas_parser = parser != 'pandas'

Expand Down Expand Up @@ -252,6 +255,7 @@ def eval(expr, parser='pandas', engine=None, truediv=True,
pandas.DataFrame.query
pandas.DataFrame.eval
"""
from pandas.core.computation.expr import Expr

inplace = validate_bool_kwarg(inplace, "inplace")

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/computation/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import warnings
import numpy as np
from pandas.core.common import _values_from_object
from pandas.core.computation import _NUMEXPR_INSTALLED
from pandas.core.computation.check import _NUMEXPR_INSTALLED
from pandas.core.config import get_option

if _NUMEXPR_INSTALLED:
Expand Down
58 changes: 30 additions & 28 deletions pandas/core/config_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,34 +437,36 @@ def use_inf_as_na_cb(key):
writer_engine_doc = """
: string
The default Excel writer engine for '{ext}' files. Available options:
'{default}' (the default){others}.
"""

with cf.config_prefix('io.excel'):
# going forward, will be additional writers
for ext, options in [('xls', ['xlwt']), ('xlsm', ['openpyxl'])]:
default = options.pop(0)
if options:
options = " " + ", ".join(options)
else:
options = ""
doc = writer_engine_doc.format(ext=ext, default=default,
others=options)
cf.register_option(ext + '.writer', default, doc, validator=str)

def _register_xlsx(engine, other):
others = ", '{other}'".format(other=other)
doc = writer_engine_doc.format(ext='xlsx', default=engine,
others=others)
cf.register_option('xlsx.writer', engine, doc, validator=str)

try:
# better memory footprint
import xlsxwriter # noqa
_register_xlsx('xlsxwriter', 'openpyxl')
except ImportError:
# fallback
_register_xlsx('openpyxl', 'xlsxwriter')
auto, {others}.
"""

_xls_options = ['xlwt']
_xlsm_options = ['openpyxl']
_xlsx_options = ['openpyxl', 'xlsxwriter']


with cf.config_prefix("io.excel.xls"):
cf.register_option("writer", "auto",
writer_engine_doc.format(
ext='xls',
others=', '.join(_xls_options)),
validator=str)

with cf.config_prefix("io.excel.xlsm"):
cf.register_option("writer", "auto",
writer_engine_doc.format(
ext='xlsm',
others=', '.join(_xlsm_options)),
validator=str)


with cf.config_prefix("io.excel.xlsx"):
cf.register_option("writer", "auto",
writer_engine_doc.format(
ext='xlsx',
others=', '.join(_xlsx_options)),
validator=str)


# Set up the io.parquet specific configuration.
parquet_engine_doc = """
Expand Down
7 changes: 5 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,7 @@
create_block_manager_from_blocks)
from pandas.core.series import Series
from pandas.core.categorical import Categorical
import pandas.core.computation.expressions as expressions
import pandas.core.algorithms as algorithms
from pandas.core.computation.eval import eval as _eval
from pandas.compat import (range, map, zip, lrange, lmap, lzip, StringIO, u,
OrderedDict, raise_with_traceback)
from pandas import compat
Expand Down Expand Up @@ -2296,6 +2294,8 @@ def eval(self, expr, inplace=False, **kwargs):
>>> df.eval('a + b')
>>> df.eval('c = a + b')
"""
from pandas.core.computation.eval import eval as _eval

inplace = validate_bool_kwarg(inplace, 'inplace')
resolvers = kwargs.pop('resolvers', None)
kwargs['level'] = kwargs.pop('level', 0) + 1
Expand Down Expand Up @@ -3840,6 +3840,7 @@ def _combine_const(self, other, func, raise_on_error=True, try_cast=True):

def _compare_frame_evaluate(self, other, func, str_rep, try_cast=True):

import pandas.core.computation.expressions as expressions
# unique
if self.columns.is_unique:

Expand Down Expand Up @@ -3992,6 +3993,7 @@ def combine_first(self, other):
-------
combined : DataFrame
"""
import pandas.core.computation.expressions as expressions

def combiner(x, y, needs_i8_conversion=False):
x_values = x.values if hasattr(x, 'values') else x
Expand Down Expand Up @@ -4027,6 +4029,7 @@ def update(self, other, join='left', overwrite=True, filter_func=None,
If True, will raise an error if the DataFrame and other both
contain data in the same place.
"""
import pandas.core.computation.expressions as expressions
# TODO: Support other joins
if join != 'left': # pragma: no cover
raise NotImplementedError("Only left join is supported")
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@
from pandas._libs.tslib import Timedelta
from pandas._libs.lib import BlockPlacement

import pandas.core.computation.expressions as expressions
from pandas.util._decorators import cache_readonly
from pandas.util._validators import validate_bool_kwarg
from pandas import compat
Expand Down Expand Up @@ -1395,6 +1394,8 @@ def where(self, other, cond, align=True, raise_on_error=True,
-------
a new block(s), the result of the func
"""
import pandas.core.computation.expressions as expressions

values = self.values
orig_other = other
if transpose:
Expand Down
10 changes: 8 additions & 2 deletions pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

from pandas import compat
from pandas.util._decorators import Appender
import pandas.core.computation.expressions as expressions

from pandas.compat import bind_method
import pandas.core.missing as missing
Expand Down Expand Up @@ -668,8 +667,9 @@ def _arith_method_SERIES(op, name, str_rep, fill_zeros=None, default_axis=None,
Wrapper function for Series arithmetic operations, to avoid
code duplication.
"""

def na_op(x, y):
import pandas.core.computation.expressions as expressions

try:
result = expressions.evaluate(op, str_rep, x, y,
raise_on_error=True, **eval_kwargs)
Expand Down Expand Up @@ -1193,6 +1193,8 @@ def to_series(right):
def _arith_method_FRAME(op, name, str_rep=None, default_axis='columns',
fill_zeros=None, **eval_kwargs):
def na_op(x, y):
import pandas.core.computation.expressions as expressions

try:
result = expressions.evaluate(op, str_rep, x, y,
raise_on_error=True, **eval_kwargs)
Expand Down Expand Up @@ -1349,6 +1351,8 @@ def _arith_method_PANEL(op, name, str_rep=None, fill_zeros=None,
# copied from Series na_op above, but without unnecessary branch for
# non-scalar
def na_op(x, y):
import pandas.core.computation.expressions as expressions

try:
result = expressions.evaluate(op, str_rep, x, y,
raise_on_error=True, **eval_kwargs)
Expand Down Expand Up @@ -1378,6 +1382,8 @@ def f(self, other):

def _comp_method_PANEL(op, name, str_rep=None, masker=False):
def na_op(x, y):
import pandas.core.computation.expressions as expressions

try:
result = expressions.evaluate(op, str_rep, x, y,
raise_on_error=True)
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
is_string_like, is_scalar)
from pandas.core.dtypes.missing import notna

import pandas.core.computation.expressions as expressions
import pandas.core.common as com
import pandas.core.ops as ops
import pandas.core.missing as missing
Expand Down Expand Up @@ -1500,6 +1499,8 @@ def _add_aggregate_operations(cls, use_numexpr=True):
def _panel_arith_method(op, name, str_rep=None, default_axis=None,
fill_zeros=None, **eval_kwargs):
def na_op(x, y):
import pandas.core.computation.expressions as expressions

try:
result = expressions.evaluate(op, str_rep, x, y,
raise_on_error=True,
Expand Down
Loading

1 comment on commit 2310faa

@gerritholl
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This commit appears to trigger pydata/xarray#1661

Please sign in to comment.