Skip to content

Commit

Permalink
refactor(dependencies): pandas and numpy are now optional for non-bac…
Browse files Browse the repository at this point in the history
…kend installs (#9564)
  • Loading branch information
cpcloud authored Aug 7, 2024
1 parent 524a2fa commit cff210a
Show file tree
Hide file tree
Showing 63 changed files with 794 additions and 621 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/ibis-main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,13 @@ jobs:
- name: install ibis
run: poetry install --without dev --without docs --extras "visualization decompiler"

- name: install pyarrow
- name: install numpy/pandas/pyarrow
if: matrix.pyarrow
run: poetry run pip install pyarrow pyarrow-hotfix
run: poetry run pip install numpy pandas pyarrow pyarrow-hotfix

- name: check pyarrow import
- name: check imports
if: matrix.pyarrow
run: poetry run python -c 'import pyarrow, pyarrow_hotfix'
run: poetry run python -c 'import numpy, pandas, pyarrow, pyarrow_hotfix'

- uses: extractions/setup-just@v2
env:
Expand Down
11 changes: 9 additions & 2 deletions .github/workflows/nix.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,19 @@ jobs:
name: ibis
authToken: ${{ secrets.CACHIX_AUTH_TOKEN }}
extraPullNames: nix-community,poetry2nix
- name: nix build and test
- name: nix build and test core
run: |
set -euo pipefail
version='${{ matrix.python-version }}'
nix build ".#ibis${version//./}" --fallback --keep-going --print-build-logs
nix build ".#ibisCore${version//./}" --fallback --keep-going --print-build-logs
- name: nix build and test local
run: |
set -euo pipefail
version='${{ matrix.python-version }}'
nix build ".#ibisLocal${version//./}" --fallback --keep-going --print-build-logs
- name: nix build devShell
run: |
Expand Down
4 changes: 2 additions & 2 deletions flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,9 @@
in
rec {
packages = {
inherit (pkgs) ibis310 ibis311 ibis312;
inherit (pkgs) ibisCore310 ibisCore311 ibisCore312 ibisLocal310 ibisLocal311 ibisLocal312;

default = pkgs.ibis312;
default = pkgs.ibisCore312;

inherit (pkgs) update-lock-files gen-examples check-release-notes-spelling;
};
Expand Down
19 changes: 12 additions & 7 deletions ibis/backends/bigquery/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@

from __future__ import annotations

import contextlib
import functools

import dateutil.parser
import google.cloud.bigquery as bq
import pandas as pd

import ibis.common.exceptions as com
import ibis.expr.datatypes as dt
Expand Down Expand Up @@ -69,9 +70,9 @@ def bq_param_array(dtype: dt.Array, value, name):

@bigquery_param.register
def bq_param_timestamp(_: dt.Timestamp, value, name):
# TODO(phillipc): Not sure if this is the correct way to do this.
timestamp_value = pd.Timestamp(value, tz="UTC").to_pydatetime()
return bq.ScalarQueryParameter(name, "TIMESTAMP", timestamp_value)
with contextlib.suppress(TypeError):
value = dateutil.parser.parse(value)
return bq.ScalarQueryParameter(name, "TIMESTAMP", value.isoformat())


@bigquery_param.register
Expand All @@ -96,9 +97,13 @@ def bq_param_boolean(_: dt.Boolean, value, name):

@bigquery_param.register
def bq_param_date(_: dt.Date, value, name):
return bq.ScalarQueryParameter(
name, "DATE", pd.Timestamp(value).to_pydatetime().date()
)
with contextlib.suppress(TypeError):
value = dateutil.parser.parse(value)

with contextlib.suppress(AttributeError):
value = value.date()

return bq.ScalarQueryParameter(name, "DATE", value.isoformat())


def rename_partitioned_column(table_expr, bq_table, partition_col):
Expand Down
51 changes: 4 additions & 47 deletions ibis/backends/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from typing import TYPE_CHECKING, Any

import _pytest
import pandas as pd
import pytest
from packaging.requirements import Requirement
from packaging.version import parse as vparse
Expand Down Expand Up @@ -571,7 +570,7 @@ def geo_df(geo):


@pytest.fixture
def temp_table(con) -> str:
def temp_table(con):
"""Return a temporary table name.
Parameters
Expand All @@ -590,7 +589,7 @@ def temp_table(con) -> str:


@pytest.fixture
def temp_table2(con) -> str:
def temp_table2(con):
name = util.gen_name("temp_table2")
yield name
with contextlib.suppress(NotImplementedError):
Expand All @@ -606,7 +605,7 @@ def temp_table_orig(con, temp_table):


@pytest.fixture
def temp_view(ddl_con) -> str:
def temp_view(ddl_con):
"""Return a temporary view name.
Parameters
Expand All @@ -625,7 +624,7 @@ def temp_view(ddl_con) -> str:


@pytest.fixture
def alternate_current_database(ddl_con, ddl_backend) -> str:
def alternate_current_database(ddl_con, ddl_backend):
"""Create a temporary database and yield its name. Drops the created
database upon completion.
Expand All @@ -648,48 +647,6 @@ def alternate_current_database(ddl_con, ddl_backend) -> str:
ddl_con.drop_database(name, force=True)


@pytest.fixture
def test_employee_schema() -> ibis.schema:
sch = ibis.schema(
[
("first_name", "string"),
("last_name", "string"),
("department_name", "string"),
("salary", "float64"),
]
)

return sch


@pytest.fixture
def test_employee_data_1():
df = pd.DataFrame(
{
"first_name": ["A", "B", "C"],
"last_name": ["D", "E", "F"],
"department_name": ["AA", "BB", "CC"],
"salary": [100.0, 200.0, 300.0],
}
)

return df


@pytest.fixture
def test_employee_data_2():
df2 = pd.DataFrame(
{
"first_name": ["X", "Y", "Z"],
"last_name": ["A", "B", "C"],
"department_name": ["XX", "YY", "ZZ"],
"salary": [400.0, 500.0, 600.0],
}
)

return df2


@pytest.fixture
def assert_sql(con, snapshot):
def checker(expr, file_name="out.sql"):
Expand Down
4 changes: 1 addition & 3 deletions ibis/backends/duckdb/converter.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
from __future__ import annotations

import numpy as np

from ibis.formats.pandas import PandasData


class DuckDBPandasData(PandasData):
@staticmethod
def convert_Array(s, dtype, pandas_type):
return s.replace(np.nan, None)
return s.replace(float("nan"), None)
8 changes: 5 additions & 3 deletions ibis/backends/duckdb/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,11 +266,13 @@ def test_connect_duckdb(url, tmp_path):
@pytest.mark.parametrize(
"out_method, extension", [("to_csv", "csv"), ("to_parquet", "parquet")]
)
def test_connect_local_file(out_method, extension, test_employee_data_1, tmp_path):
getattr(test_employee_data_1, out_method)(tmp_path / f"out.{extension}")
def test_connect_local_file(out_method, extension, tmp_path):
df = pd.DataFrame({"a": [1, 2, 3]})
path = tmp_path / f"out.{extension}"
getattr(df, out_method)(path)
with pytest.warns(FutureWarning, match="v9.1"):
# ibis.connect uses con.register
con = ibis.connect(tmp_path / f"out.{extension}")
con = ibis.connect(path)
t = next(iter(con.tables.values()))
assert not t.head().execute().empty

Expand Down
3 changes: 1 addition & 2 deletions ibis/backends/mysql/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from typing import TYPE_CHECKING, Any
from urllib.parse import unquote_plus

import numpy as np
import pymysql
import sqlglot as sg
import sqlglot.expressions as sge
Expand Down Expand Up @@ -509,7 +508,7 @@ def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:

df = op.data.to_frame()
# nan can not be used with MySQL
df = df.replace(np.nan, None)
df = df.replace(float("nan"), None)

data = df.itertuples(index=False)
sql = self._build_insert_template(
Expand Down
3 changes: 1 addition & 2 deletions ibis/backends/oracle/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from typing import TYPE_CHECKING, Any
from urllib.parse import unquote_plus

import numpy as np
import oracledb
import sqlglot as sg
import sqlglot.expressions as sge
Expand Down Expand Up @@ -534,7 +533,7 @@ def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:
properties=sge.Properties(expressions=[sge.TemporaryProperty()]),
).sql(self.name)

data = op.data.to_frame().replace({np.nan: None})
data = op.data.to_frame().replace(float("nan"), None)
insert_stmt = self._build_insert_template(
name, schema=schema, placeholder=":{i:d}"
)
Expand Down
8 changes: 3 additions & 5 deletions ibis/backends/polars/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
from functools import partial, reduce, singledispatch
from math import isnan

import numpy as np
import pandas as pd
import polars as pl

import ibis.common.exceptions as com
Expand Down Expand Up @@ -834,7 +832,7 @@ def count_star(op, **kw):

@translate.register(ops.TimestampNow)
def timestamp_now(op, **_):
return pl.lit(pd.Timestamp("now", tz="UTC").tz_localize(None))
return pl.lit(datetime.datetime.now())


@translate.register(ops.DateNow)
Expand Down Expand Up @@ -1175,12 +1173,12 @@ def elementwise_udf(op, **kw):

@translate.register(ops.E)
def execute_e(op, **_):
return pl.lit(np.e)
return pl.lit(math.e)


@translate.register(ops.Pi)
def execute_pi(op, **_):
return pl.lit(np.pi)
return pl.lit(math.pi)


@translate.register(ops.Time)
Expand Down
4 changes: 1 addition & 3 deletions ibis/backends/postgres/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
from typing import TYPE_CHECKING, Any
from urllib.parse import unquote_plus

import numpy as np
import pandas as pd
import sqlglot as sg
import sqlglot.expressions as sge
from pandas.api.types import is_float_dtype
Expand Down Expand Up @@ -139,7 +137,7 @@ def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:
convert_df = df.convert_dtypes()
for col in convert_df.columns:
if not is_float_dtype(convert_df[col]):
df[col] = df[col].replace(np.nan, None)
df[col] = df[col].replace(float("nan"), None)

data = df.itertuples(index=False)
sql = self._build_insert_template(
Expand Down
3 changes: 2 additions & 1 deletion ibis/backends/sqlite/converter.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from __future__ import annotations

import pandas as pd
from packaging.version import parse as vparse

from ibis.formats.pandas import PandasData

# The "mixed" format was added in pandas 2
_DATETIME_FORMAT = "mixed" if pd.__version__ >= "2.0.0" else None
_DATETIME_FORMAT = "mixed" if vparse(pd.__version__) >= vparse("2.0.0") else None


class SQLitePandasData(PandasData):
Expand Down
7 changes: 4 additions & 3 deletions ibis/backends/tests/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@
from pathlib import Path
from typing import TYPE_CHECKING, Any, Literal

import numpy as np
import pandas as pd
import pandas.testing as tm
import pytest
from filelock import FileLock

Expand All @@ -22,6 +19,10 @@

PYTHON_SHORT_VERSION = f"{sys.version_info.major}{sys.version_info.minor}"

np = pytest.importorskip("numpy")
pd = pytest.importorskip("pandas")
tm = pytest.importorskip("pandas.testing")


class BackendTest(abc.ABC):
"""
Expand Down
4 changes: 2 additions & 2 deletions ibis/backends/tests/data.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from __future__ import annotations

import numpy as np
import pandas as pd
import pytest

np = pytest.importorskip("numpy")
pd = pytest.importorskip("pandas")
pa = pytest.importorskip("pyarrow")

array_types = pd.DataFrame(
Expand Down
5 changes: 3 additions & 2 deletions ibis/backends/tests/test_aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
from datetime import date
from operator import methodcaller

import numpy as np
import pandas as pd
import pytest
from pytest import param

Expand Down Expand Up @@ -32,6 +30,9 @@
)
from ibis.legacy.udf.vectorized import reduction

np = pytest.importorskip("numpy")
pd = pytest.importorskip("pandas")

with pytest.warns(FutureWarning, match="v9.0"):

@reduction(input_type=[dt.double], output_type=dt.double)
Expand Down
7 changes: 4 additions & 3 deletions ibis/backends/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@
from datetime import datetime
from functools import partial

import numpy as np
import pandas as pd
import pandas.testing as tm
import pytest
import pytz
import toolz
Expand All @@ -35,6 +32,10 @@
)
from ibis.common.collections import frozendict

np = pytest.importorskip("numpy")
pd = pytest.importorskip("pandas")
tm = pytest.importorskip("pandas.testing")

pytestmark = [
pytest.mark.never(
["sqlite", "mysql", "exasol"], reason="No array support", raises=Exception
Expand Down
Loading

0 comments on commit cff210a

Please sign in to comment.