From efe717f32fb4678adffbf81bddb639d2eaf11a8c Mon Sep 17 00:00:00 2001 From: Vasily Litvinov Date: Tue, 25 Jan 2022 00:12:41 +0300 Subject: [PATCH] Implement "modin.pandas.show_versions()" and "python -m modin --versions" (#4007) Signed-off-by: Vasilij Litvinov --- modin/__main__.py | 39 ++++++++++++++ modin/config/envvars.py | 15 +++--- modin/pandas/__init__.py | 2 +- modin/test/test_utils.py | 12 +++++ modin/utils.py | 113 +++++++++++++++++++++++++++++++++++++++ setup.cfg | 2 + 6 files changed, 176 insertions(+), 7 deletions(-) create mode 100644 modin/__main__.py diff --git a/modin/__main__.py b/modin/__main__.py new file mode 100644 index 00000000000..10f2be17038 --- /dev/null +++ b/modin/__main__.py @@ -0,0 +1,39 @@ +# Licensed to Modin Development Team under one or more contributor license agreements. +# See the NOTICE file distributed with this work for additional information regarding +# copyright ownership. The Modin Development Team licenses this file to you under the +# Apache License, Version 2.0 (the "License"); you may not use this file except in +# compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under +# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific language +# governing permissions and limitations under the License. + +"""Command-line interface piece, called when user issues "python -m modin --foo".""" + +import argparse + + +def main(): + parser = argparse.ArgumentParser( + "python -m modin", + description="Drop-in pandas replacement; refer to https://modin.readthedocs.io/ for details.", + ) + parser.add_argument( + "--versions", + action="store_true", + default=False, + help="Show versions of all known components", + ) + + args = parser.parse_args() + if args.versions: + from modin.utils import show_versions + + show_versions() + + +if __name__ == "__main__": + main() diff --git a/modin/config/envvars.py b/modin/config/envvars.py index 84dc8958055..de6de4fe6c1 100644 --- a/modin/config/envvars.py +++ b/modin/config/envvars.py @@ -81,6 +81,8 @@ def _get_default(cls): ------- str """ + from modin.utils import MIN_RAY_VERSION, MIN_DASK_VERSION + if IsDebug.get(): return "Python" try: @@ -89,9 +91,9 @@ def _get_default(cls): except ImportError: pass else: - if version.parse(ray.__version__) < version.parse("1.4.0"): + if version.parse(ray.__version__) < MIN_RAY_VERSION: raise ImportError( - "Please `pip install modin[ray]` to install compatible Ray version." + "Please `pip install modin[ray]` to install compatible Ray version (>={MIN_RAY_VERSION})." ) return "Ray" try: @@ -101,11 +103,12 @@ def _get_default(cls): except ImportError: pass else: - if version.parse(dask.__version__) < version.parse( - "2.22.0" - ) or version.parse(distributed.__version__) < version.parse("2.22.0"): + if ( + version.parse(dask.__version__) < MIN_DASK_VERSION + or version.parse(distributed.__version__) < MIN_DASK_VERSION + ): raise ImportError( - "Please `pip install modin[dask]` to install compatible Dask version." + "Please `pip install modin[dask]` to install compatible Dask version (>={MIN_DASK_VERSION})." ) return "Dask" try: diff --git a/modin/pandas/__init__.py b/modin/pandas/__init__.py index d6be7f69cb7..74d2d7cbf35 100644 --- a/modin/pandas/__init__.py +++ b/modin/pandas/__init__.py @@ -76,7 +76,6 @@ Grouper, array, Period, - show_versions, DateOffset, timedelta_range, infer_freq, @@ -233,6 +232,7 @@ def init_remote_ray(partition): wide_to_long, ) from .plotting import Plotting as plotting +from modin.utils import show_versions __all__ = [ "DataFrame", diff --git a/modin/test/test_utils.py b/modin/test/test_utils.py index ef305330545..d0d6414a2b7 100644 --- a/modin/test/test_utils.py +++ b/modin/test/test_utils.py @@ -13,6 +13,7 @@ import pytest import modin.utils +import json from textwrap import dedent, indent @@ -251,3 +252,14 @@ def warns_that_defaulting_to_pandas(): defaulting to Pandas. """ return pytest.warns(UserWarning, match="defaulting to pandas") + + +@pytest.mark.parametrize("as_json", [True, False]) +def test_show_versions(as_json, capsys): + modin.utils.show_versions(as_json=as_json) + versions = capsys.readouterr().out + assert modin.__version__ in versions + + if as_json: + versions = json.loads(versions) + assert versions["modin dependencies"]["modin"] == modin.__version__ diff --git a/modin/utils.py b/modin/utils.py index 0d4b0b06a8d..eefb0f38e3b 100644 --- a/modin/utils.py +++ b/modin/utils.py @@ -12,19 +12,31 @@ # governing permissions and limitations under the License. """Collection of general utility functions, mostly for internal use.""" +from __future__ import annotations import importlib import types import re +import sys +import json +import codecs from textwrap import dedent, indent from typing import Union +from packaging import version import pandas import numpy as np from pandas.util._decorators import Appender +from pandas.util._print_versions import _get_sys_info, _get_dependency_info +from pandas._typing import JSONSerializable + from modin.config import Engine, StorageFormat, IsExperimental +from modin._version import get_versions + +MIN_RAY_VERSION = version.parse("1.4.0") +MIN_DASK_VERSION = version.parse("2.22.0") PANDAS_API_URL_TEMPLATE = f"https://pandas.pydata.org/pandas-docs/version/{pandas.__version__}/reference/api/{{}}.html" @@ -561,3 +573,104 @@ def import_optional_dependency(name, message): f"Missing optional dependency '{name}'. {message} " f"Use pip or conda to install {name}." ) from None + + +def _get_modin_deps_info() -> dict[str, JSONSerializable]: + """ + Return Modin-specific dependencies information as a JSON serializable dictionary. + + Returns + ------- + dict[str, JSONSerializable] + The dictionary of Modin dependencies and their versions. + """ + import modin # delayed import so modin.__init__ is fully initialized + + result = {"modin": modin.__version__} + + for pkg_name, pkg_version in [ + ("ray", MIN_RAY_VERSION), + ("dask", MIN_DASK_VERSION), + ("distributed", MIN_DASK_VERSION), + ]: + try: + pkg = importlib.import_module(pkg_name) + except ImportError: + result[pkg_name] = None + else: + result[pkg_name] = pkg.__version__ + ( + f" (outdated; >={pkg_version} required)" + if version.parse(pkg.__version__) < pkg_version + else "" + ) + + try: + # We import ``PyDbEngine`` from this module since correct import of ``PyDbEngine`` itself + # from Omnisci is located in it with all the necessary options for dlopen. + from modin.experimental.core.execution.native.implementations.omnisci_on_native.utils import ( # noqa + PyDbEngine, + ) + + result["omniscidbe"] = "present" + except ImportError: + result["omniscidbe"] = None + + return result + + +# Disable flake8 checks for print() in this file +# flake8: noqa: T001 +def show_versions(as_json: str | bool = False) -> None: + """ + Provide useful information, important for bug reports. + + It comprises info about hosting operation system, pandas version, + and versions of other installed relative packages. + + Parameters + ---------- + as_json : str or bool, default: False + * If False, outputs info in a human readable form to the console. + * If str, it will be considered as a path to a file. + Info will be written to that file in JSON format. + * If True, outputs info in JSON format to the console. + + Notes + ----- + This is mostly a copy of pandas.show_versions() but adds separate listing + of Modin-specific dependencies. + """ + sys_info = _get_sys_info() + sys_info["commit"] = get_versions()["full-revisionid"] + modin_deps = _get_modin_deps_info() + deps = _get_dependency_info() + + if as_json: + j = { + "system": sys_info, + "modin dependencies": modin_deps, + "dependencies": deps, + } + + if as_json is True: + sys.stdout.writelines(json.dumps(j, indent=2)) + else: + assert isinstance(as_json, str) # needed for mypy + with codecs.open(as_json, "wb", encoding="utf8") as f: + json.dump(j, f, indent=2) + + else: + assert isinstance(sys_info["LOCALE"], dict) # needed for mypy + language_code = sys_info["LOCALE"]["language-code"] + encoding = sys_info["LOCALE"]["encoding"] + sys_info["LOCALE"] = f"{language_code}.{encoding}" + + maxlen = max(max(len(x) for x in d) for d in (deps, modin_deps)) + print("\nINSTALLED VERSIONS") + print("------------------") + for k, v in sys_info.items(): + print(f"{k:<{maxlen}}: {v}") + for name, d in (("Modin", modin_deps), ("pandas", deps)): + print(f"\n{name} dependencies\n{'-' * (len(name) + 13)}") + for k, v in d.items(): + print(f"{k:<{maxlen}}: {v}") diff --git a/setup.cfg b/setup.cfg index 4b68525c140..76605da2173 100644 --- a/setup.cfg +++ b/setup.cfg @@ -59,6 +59,8 @@ omit = modin/core/execution/ray/implementations/cudf_on_ray/* modin/core/execution/ray/implementations/cudf_on_ray/frame/* modin/core/execution/ray/implementations/cudf_on_ray/series/* + # Skip CLI part + modin/__main__.py parallel = True [coverage:report]