Skip to content

Commit

Permalink
FEAT-#4035: Upgrade pandas support to 1.4 (#4036)
Browse files Browse the repository at this point in the history
Co-authored-by: Igoshev, Yaroslav <yaroslav.igoshev@intel.com>
Co-authored-by: Alexey Prutskov <alexey.prutskov@intel.com>
Co-authored-by: Rehan Durrani <rehan@ponder.io>
Co-authored-by: ienkovich <ilya.enkovich@intel.com>
Co-authored-by: Vasily Litvinov <vasilij.n.litvinov@intel.com>
Co-authored-by: Yaroslav Igoshev <Poolliver868@mail.ru>
Signed-off-by: Devin Petersohn <devin.petersohn@gmail.com>
  • Loading branch information
7 people authored Jan 26, 2022
1 parent 95bb922 commit 39fbc57
Show file tree
Hide file tree
Showing 28 changed files with 427 additions and 315 deletions.
38 changes: 19 additions & 19 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
fetch-depth: 1
- uses: actions/setup-python@v2
with:
python-version: "3.7.x"
python-version: "3.8.x"
architecture: "x64"
- run: pip install black
- run: black --check --diff modin/ asv_bench/benchmarks scripts/doc_checker.py
Expand All @@ -43,7 +43,7 @@ jobs:
fetch-depth: 1
- uses: actions/setup-python@v2
with:
python-version: "3.7.x"
python-version: "3.8.x"
architecture: "x64"
- run: pip install -r docs/requirements-doc.txt
- run: cd docs && sphinx-build -T -E -b html . build
Expand All @@ -57,7 +57,7 @@ jobs:
fetch-depth: 1
- uses: actions/setup-python@v2
with:
python-version: "3.7.x"
python-version: "3.8.x"
architecture: "x64"
- run: pip install pytest pytest-cov pydocstyle numpydoc==1.1.0 xgboost
- run: pytest scripts/test
Expand Down Expand Up @@ -132,7 +132,7 @@ jobs:
fetch-depth: 1
- uses: actions/setup-python@v2
with:
python-version: "3.7.x"
python-version: "3.8.x"
architecture: "x64"
- run: pip install flake8 flake8-print
- run: flake8 --enable=T modin/ asv_bench/benchmarks scripts/doc_checker.py
Expand All @@ -152,7 +152,7 @@ jobs:
with:
activate-environment: modin
environment-file: environment-dev.yml
python-version: 3.7
python-version: 3.8
channel-priority: strict
use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
- name: Conda environment
Expand All @@ -179,7 +179,7 @@ jobs:
with:
activate-environment: modin
environment-file: environment-dev.yml
python-version: 3.7
python-version: 3.8
channel-priority: strict
use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
- name: Conda environment
Expand All @@ -202,7 +202,7 @@ jobs:
fetch-depth: 1
- uses: actions/setup-python@v2
with:
python-version: "3.7.x"
python-version: "3.8.x"
architecture: "x64"
- name: Clean install and run
run: |
Expand All @@ -223,7 +223,7 @@ jobs:
fetch-depth: 1
- uses: actions/setup-python@v2
with:
python-version: "3.7.x"
python-version: "3.8.x"
architecture: "x64"
- name: Clean install and run
run: |
Expand All @@ -246,7 +246,7 @@ jobs:
with:
activate-environment: modin
environment-file: environment-dev.yml
python-version: 3.7
python-version: 3.8
channel-priority: strict
use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
- name: Conda environment
Expand Down Expand Up @@ -276,7 +276,7 @@ jobs:
env:
MODIN_MEMORY: 1000000000
MODIN_TEST_DATASET_SIZE: "small"
name: Test ${{ matrix.execution }} execution, Python 3.7
name: Test ${{ matrix.execution }} execution, Python 3.8
steps:
- uses: actions/checkout@v2
with:
Expand All @@ -285,7 +285,7 @@ jobs:
with:
activate-environment: modin
environment-file: environment-dev.yml
python-version: 3.7
python-version: 3.8
channel-priority: strict
use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
- name: Conda environment
Expand Down Expand Up @@ -331,7 +331,7 @@ jobs:
shell: bash -l {0}
env:
MODIN_STORAGE_FORMAT: "omnisci"
name: Test OmniSci storage format, Python 3.7
name: Test OmniSci storage format, Python 3.8
steps:
- uses: actions/checkout@v2
with:
Expand All @@ -341,7 +341,7 @@ jobs:
with:
activate-environment: modin_on_omnisci
environment-file: requirements/env_omnisci.yml
python-version: 3.7
python-version: 3.8
use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
- name: Conda environment
run: |
Expand Down Expand Up @@ -433,7 +433,7 @@ jobs:
shell: bash -l {0}
strategy:
matrix:
python-version: ["3.7", "3.8"]
python-version: ["3.8"]
engine: ["python", "ray", "dask"]
env:
MODIN_ENGINE: ${{matrix.engine}}
Expand Down Expand Up @@ -514,7 +514,7 @@ jobs:
with:
activate-environment: modin
environment-file: environment-dev.yml
python-version: 3.7
python-version: 3.8
channel-priority: strict
use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
- name: Conda environment
Expand Down Expand Up @@ -554,7 +554,7 @@ jobs:
with:
activate-environment: modin
environment-file: environment-dev.yml
python-version: 3.7
python-version: 3.8
channel-priority: strict
use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
- name: Conda environment
Expand Down Expand Up @@ -586,7 +586,7 @@ jobs:
shell: bash -l {0}
strategy:
matrix:
python-version: ["3.7", "3.8"]
python-version: ["3.8"]
engine: ["ray", "dask"]
test-task:
- modin/pandas/test/dataframe/test_binary.py
Expand Down Expand Up @@ -642,7 +642,7 @@ jobs:
shell: bash -l {0}
strategy:
matrix:
python-version: ["3.7", "3.8"]
python-version: ["3.8"]
env:
MODIN_STORAGE_FORMAT: pyarrow
MODIN_EXPERIMENTAL: "True"
Expand Down Expand Up @@ -673,7 +673,7 @@ jobs:
shell: bash -l {0}
strategy:
matrix:
python-version: [ "3.7", "3.8" ]
python-version: ["3.8" ]
engine: ["ray", "dask"]
env:
MODIN_EXPERIMENTAL: "True"
Expand Down
10 changes: 4 additions & 6 deletions .github/workflows/push-to-master.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@ jobs:
with:
activate-environment: modin
environment-file: requirements/requirements-no-engine.yml
python-version: 3.7
python-version: 3.8
channel-priority: strict
use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
- name: install Ray nightly build
run: pip install https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-2.0.0.dev0-cp37-cp37m-manylinux2014_x86_64.whl
run: pip install https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-2.0.0.dev0-cp38-cp38-manylinux2014_x86_64.whl
- name: Conda environment
run: |
conda info
Expand Down Expand Up @@ -63,7 +63,7 @@ jobs:
with:
activate-environment: modin
environment-file: environment-dev.yml
python-version: 3.7
python-version: 3.8
channel-priority: strict
use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
- name: Conda environment
Expand All @@ -81,7 +81,7 @@ jobs:
shell: bash -l {0}
strategy:
matrix:
python-version: ["3.7", "3.8"]
python-version: ["3.8"]
test-task:
- modin/pandas/test/dataframe/test_binary.py
- modin/pandas/test/dataframe/test_default.py
Expand Down Expand Up @@ -118,8 +118,6 @@ jobs:
- run: pip install -r requirements-dev.txt --use-deprecated=legacy-resolver
# Use a ray master commit that includes the fix here: https://github.com/ray-project/ray/pull/16278
# Can be changed after a Ray version > 1.4 is released.
- run: pip install https://s3-us-west-2.amazonaws.com/ray-wheels/master/c8e3ed9eec30119092ef966ee7b8982c8954c333/ray-2.0.0.dev0-cp37-cp37m-manylinux2014_x86_64.whl
if: matrix.python-version == '3.7'
- run: pip install https://s3-us-west-2.amazonaws.com/ray-wheels/master/c8e3ed9eec30119092ef966ee7b8982c8954c333/ray-2.0.0.dev0-cp38-cp38-manylinux2014_x86_64.whl
if: matrix.python-version == '3.8'
- name: Install HDF5
Expand Down
18 changes: 9 additions & 9 deletions .github/workflows/push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
with:
activate-environment: modin
environment-file: environment-dev.yml
python-version: 3.7
python-version: 3.8
channel-priority: strict
use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
- name: Conda environment
Expand All @@ -41,7 +41,7 @@ jobs:
env:
MODIN_MEMORY: 1000000000
MODIN_TEST_DATASET_SIZE: "small"
name: Test ${{ matrix.execution }} execution, Python 3.7
name: Test ${{ matrix.execution }} execution, Python 3.8
steps:
- uses: actions/checkout@v2
with:
Expand All @@ -50,7 +50,7 @@ jobs:
with:
activate-environment: modin
environment-file: environment-dev.yml
python-version: 3.7
python-version: 3.8
channel-priority: strict
use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
- name: Conda environment
Expand Down Expand Up @@ -96,7 +96,7 @@ jobs:
MODIN_EXPERIMENTAL: "True"
MODIN_ENGINE: "native"
MODIN_STORAGE_FORMAT: "omnisci"
name: Test OmniSci storage format, Python 3.7
name: Test OmniSci storage format, Python 3.8
steps:
- uses: actions/checkout@v2
with:
Expand All @@ -106,7 +106,7 @@ jobs:
with:
activate-environment: modin_on_omnisci
environment-file: requirements/env_omnisci.yml
python-version: 3.7
python-version: 3.8
use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
- name: Conda environment
run: |
Expand Down Expand Up @@ -135,7 +135,7 @@ jobs:
shell: bash -l {0}
strategy:
matrix:
python-version: ["3.7", "3.8"]
python-version: ["3.8"]
engine: ["python", "ray", "dask"]
env:
MODIN_ENGINE: ${{matrix.engine}}
Expand Down Expand Up @@ -202,7 +202,7 @@ jobs:
shell: bash -l {0}
strategy:
matrix:
python-version: ["3.7", "3.8"]
python-version: ["3.8"]
engine: ["ray", "dask"]
test-task:
- modin/pandas/test/dataframe/test_binary.py
Expand Down Expand Up @@ -257,7 +257,7 @@ jobs:
shell: bash -l {0}
strategy:
matrix:
python-version: ["3.7", "3.8"]
python-version: ["3.8"]
env:
MODIN_STORAGE_FORMAT: pyarrow
MODIN_EXPERIMENTAL: "True"
Expand Down Expand Up @@ -287,7 +287,7 @@ jobs:
shell: bash -l {0}
strategy:
matrix:
python-version: [ "3.7", "3.8" ]
python-version: ["3.8"]
engine: ["ray", "dask"]
env:
MODIN_EXPERIMENTAL: "True"
Expand Down
4 changes: 2 additions & 2 deletions environment-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ name: modin
channels:
- conda-forge
dependencies:
- pandas==1.3.5
- numpy>=1.16.5
- pandas==1.4.0
- numpy>=1.18.5
- pyarrow>=4.0.1
- dask[complete]>=2.22.0
- distributed>=2.22.0
Expand Down
4 changes: 2 additions & 2 deletions modin/core/io/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

import pandas
import pandas._libs.lib as lib
from pandas._typing import CompressionOptions, FilePathOrBuffer, StorageOptions
from pandas._typing import CompressionOptions, StorageOptions
from pandas.util._decorators import doc

from modin.db_conn import ModinDatabaseConnection
Expand Down Expand Up @@ -826,7 +826,7 @@ def to_sql(
def to_pickle(
cls,
obj: Any,
filepath_or_buffer: FilePathOrBuffer,
filepath_or_buffer,
compression: CompressionOptions = "infer",
protocol: int = pickle.HIGHEST_PROTOCOL,
storage_options: StorageOptions = None,
Expand Down
3 changes: 1 addition & 2 deletions modin/core/io/text/fwf_dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
"""Module houses `FWFDispatcher` class, that is used for reading of tables with fixed-width formatted lines."""

import pandas
from pandas._typing import FilePathOrBuffer

from modin.core.io.text.text_file_dispatcher import TextFileDispatcher

Expand All @@ -27,7 +26,7 @@ class FWFDispatcher(TextFileDispatcher):
@classmethod
def check_parameters_support(
cls,
filepath_or_buffer: FilePathOrBuffer,
filepath_or_buffer,
read_kwargs: dict,
):
"""
Expand Down
7 changes: 3 additions & 4 deletions modin/core/io/text/text_file_dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
import numpy as np
import pandas
import pandas._libs.lib as lib
from pandas._typing import FilePathOrBuffer
from pandas.core.dtypes.common import is_list_like

from modin.core.io.file_dispatcher import FileDispatcher, OpenFile
Expand All @@ -36,7 +35,7 @@
from modin.core.io.text.utils import CustomNewlineIterator
from modin.config import NPartitions

ColumnNamesTypes = Tuple[Union[pandas.Index, pandas.MultiIndex, pandas.Int64Index]]
ColumnNamesTypes = Tuple[Union[pandas.Index, pandas.MultiIndex]]
IndexColType = Union[int, str, bool, Sequence[int], Sequence[str], None]


Expand Down Expand Up @@ -614,7 +613,7 @@ def _launch_tasks(cls, splits: list, **partition_kwargs) -> Tuple[list, list, li
@classmethod
def check_parameters_support(
cls,
filepath_or_buffer: FilePathOrBuffer,
filepath_or_buffer,
read_kwargs: dict,
) -> bool:
"""
Expand Down Expand Up @@ -912,7 +911,7 @@ def _get_new_qc(
return new_query_compiler

@classmethod
def _read(cls, filepath_or_buffer: FilePathOrBuffer, **kwargs):
def _read(cls, filepath_or_buffer, **kwargs):
"""
Read data from `filepath_or_buffer` according to `kwargs` parameters.
Expand Down
3 changes: 3 additions & 0 deletions modin/core/storage_formats/pandas/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,9 @@ def generic_parse(fname, **kwargs):

bio.seek(start)
to_read = header + bio.read(end - start)
if "memory_map" in kwargs:
kwargs = kwargs.copy()
del kwargs["memory_map"]
pandas_df = callback(BytesIO(to_read), **kwargs)
index = (
pandas_df.index
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@

import pandas
import pandas._libs.lib as lib
from pandas._typing import FilePathOrBuffer
from pandas.io.common import is_url

ReadCsvKwargsType = Dict[
Expand All @@ -51,7 +50,6 @@
Sequence,
Callable,
Dialect,
FilePathOrBuffer,
None,
],
]
Expand Down
Loading

0 comments on commit 39fbc57

Please sign in to comment.