Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(python): Use Pathlib everywhere #9914

Merged
merged 5 commits into from
Jul 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions py-polars/docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@
import re
import sys
import warnings
from pathlib import Path

import sphinx_autosummary_accessors

# add polars directory
sys.path.insert(0, os.path.abspath("../.."))
sys.path.insert(0, str(Path("../..").resolve()))

# -- Project information -----------------------------------------------------

Expand Down Expand Up @@ -200,8 +201,8 @@ def linkcode_resolve(domain, info):

linespec = f"#L{lineno}-L{lineno + len(source) - 1}" if lineno else ""

conf_dir_path = os.path.dirname(os.path.realpath(__file__))
polars_root = os.path.abspath(f"{conf_dir_path}/../../polars")
conf_dir_path = Path(__file__).absolute().parent
polars_root = (conf_dir_path.parent.parent / "polars").absolute()

fn = os.path.relpath(fn, start=polars_root)
return f"{github_root}/blob/main/py-polars/polars/{fn}{linespec}"
Expand Down
8 changes: 4 additions & 4 deletions py-polars/polars/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def load(cls, cfg: Path | str) -> type[Config]:
"""
options = json.loads(
Path(normalise_filepath(cfg)).read_text()
if isinstance(cfg, Path) or os.path.exists(cfg)
if isinstance(cfg, Path) or Path(cfg).exists()
else cfg
)
os.environ.update(options.get("environment", {}))
Expand Down Expand Up @@ -221,9 +221,9 @@ def save(cls, file: Path | str | None = None) -> str:
separators=(",", ":"),
)
if isinstance(file, (str, Path)):
file = os.path.abspath(normalise_filepath(file))
Path(file).write_text(options)
return file
file = Path(normalise_filepath(file)).resolve()
file.write_text(options)
return str(file)

return options

Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/io/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def managed_file(file: Any) -> Iterator[Any]:
if isinstance(file, str):
file = normalise_filepath(file, check_not_dir)
if has_non_utf8_non_utf8_lossy_encoding:
with open(file, encoding=encoding_str) as f:
with Path(file).open(encoding=encoding_str) as f:
return _check_empty(
BytesIO(f.read().encode("utf8")), context=f"{file!r}"
)
Expand Down
5 changes: 2 additions & 3 deletions py-polars/polars/lazyframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -894,7 +894,7 @@ def show_graph(
*,
optimized: bool = True,
show: bool = True,
output_path: str | None = None,
output_path: str | Path | None = None,
raw_output: bool = False,
figsize: tuple[float, float] = (16.0, 12.0),
type_coercion: bool = True,
Expand Down Expand Up @@ -975,8 +975,7 @@ def show_graph(
raise ImportError("Graphviz dot binary should be on your PATH") from None

if output_path:
with Path(output_path).open(mode="wb") as file:
file.write(graph)
Path(output_path).write_bytes(graph)

if not show:
return None
Expand Down
15 changes: 7 additions & 8 deletions py-polars/polars/utils/various.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from __future__ import annotations

import inspect
import os
import re
import sys
import warnings
from collections.abc import MappingView, Sized
from enum import Enum
from pathlib import Path
from typing import TYPE_CHECKING, Any, Generator, Iterable, Literal, Sequence, TypeVar

import polars as pl
Expand All @@ -25,7 +25,6 @@

if TYPE_CHECKING:
from collections.abc import Reversible
from pathlib import Path

from polars import DataFrame, Series
from polars.type_aliases import PolarsDataType, PolarsIntegerType, SizeUnit
Expand Down Expand Up @@ -183,10 +182,10 @@ def can_create_dicts_with_pyarrow(dtypes: Sequence[PolarsDataType]) -> bool:

def normalise_filepath(path: str | Path, check_not_directory: bool = True) -> str:
"""Create a string path, expanding the home directory if present."""
path = os.path.expanduser(path)
if check_not_directory and os.path.exists(path) and os.path.isdir(path):
path = Path(path).expanduser()
if check_not_directory and path.exists() and path.is_dir():
raise IsADirectoryError(f"Expected a file path; {path!r} is a directory")
return path
return str(path)


def parse_version(version: Sequence[str | int]) -> tuple[int, ...]:
Expand Down Expand Up @@ -358,15 +357,15 @@ def find_stacklevel() -> int:
Taken from:
https://github.com/pandas-dev/pandas/blob/ab89c53f48df67709a533b6a95ce3d911871a0a8/pandas/util/_exceptions.py#L30-L51
"""
pkg_dir = os.path.dirname(pl.__file__)
test_dir = os.path.join(pkg_dir, "tests")
pkg_dir = Path(pl.__file__).parent
test_dir = pkg_dir / "tests"

# https://stackoverflow.com/questions/17407119/python-inspect-stack-is-slow
frame = inspect.currentframe()
n = 0
while frame:
fname = inspect.getfile(frame)
if fname.startswith(pkg_dir) and not fname.startswith(test_dir):
if fname.startswith(str(pkg_dir)) and not fname.startswith(str(test_dir)):
frame = frame.f_back
n += 1
else:
Expand Down
1 change: 1 addition & 0 deletions py-polars/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ select = [
"UP", # pyupgrade
"PT", # flake8-pytest-style
"RUF", # Ruff-specific rules
"PTH", # flake8-use-pathlib
]

ignore = [
Expand Down
6 changes: 3 additions & 3 deletions py-polars/scripts/check_stacklevels.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import subprocess
import sys
from ast import NodeVisitor
from pathlib import Path

# Files in which it's OK to set the stacklevel manually.
# `git ls-files` lists files with forwards-slashes
Expand Down Expand Up @@ -38,10 +39,9 @@ def visit_Call(self, node: ast.Call) -> None:
for file in files:
if file in EXCLUDE:
continue
if not file.endswith(".py"):
if Path(file).suffix != ".py":
continue
with open(file) as fd:
content = fd.read()
content = Path(file).read_text()
tree = ast.parse(content)
stacklevel_checker = StackLevelChecker(file)
stacklevel_checker.visit(tree)
Expand Down
5 changes: 2 additions & 3 deletions py-polars/tests/benchmark/test_release.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

To run these tests: pytest -m benchmark
"""
import os
import time
from pathlib import Path
from typing import cast
Expand All @@ -21,12 +20,12 @@


@pytest.mark.skipif(
not (Path(os.path.dirname(__file__)) / "G1_1e7_1e2_5_0.csv").is_file(),
not (Path(__file__).parent / "G1_1e7_1e2_5_0.csv").is_file(),
reason="Dataset must be generated before running this test.",
)
def test_read_scan_large_csv() -> None:
filename = "G1_1e7_1e2_5_0.csv"
path = Path(os.path.dirname(__file__)) / filename
path = Path(__file__).parent / filename

predicate = pl.col("v2") < 5

Expand Down
4 changes: 1 addition & 3 deletions py-polars/tests/unit/io/conftest.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
from __future__ import annotations

import os
from pathlib import Path

import pytest


@pytest.fixture()
def io_files_path() -> Path:
current_dir = os.path.dirname(__file__)
return Path(current_dir) / "files"
return Path(__file__).parent / "files"
8 changes: 3 additions & 5 deletions py-polars/tests/unit/io/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,8 +379,7 @@ def test_read_csv_encoding(tmp_path: Path) -> None:
)

file_path = tmp_path / "encoding.csv"
with open(file_path, "wb") as f:
f.write(bts)
file_path.write_bytes(bts)

file_str = str(file_path)
bytesio = io.BytesIO(bts)
Expand Down Expand Up @@ -487,9 +486,8 @@ def test_compressed_csv(io_files_path: Path) -> None:

def test_partial_decompression(foods_file_path: Path) -> None:
f_out = io.BytesIO()
with open(foods_file_path, "rb") as f_read: # noqa: SIM117
with gzip.GzipFile(fileobj=f_out, mode="w") as f:
f.write(f_read.read())
with gzip.GzipFile(fileobj=f_out, mode="w") as f:
f.write(foods_file_path.read_bytes())

csv_bytes = f_out.getvalue()
for n_rows in [1, 5, 26]:
Expand Down
7 changes: 2 additions & 5 deletions py-polars/tests/unit/io/test_database.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from __future__ import annotations

import os
import sys
from datetime import date
from pathlib import Path
from typing import TYPE_CHECKING

import pytest
Expand All @@ -11,8 +11,6 @@
from polars.testing import assert_frame_equal

if TYPE_CHECKING:
from pathlib import Path

from polars.type_aliases import (
DbReadEngine,
DbWriteEngine,
Expand All @@ -35,8 +33,7 @@ def sample_df() -> pl.DataFrame:
def create_temp_sqlite_db(test_db: str) -> None:
import sqlite3

if os.path.exists(test_db):
os.unlink(test_db)
Path(test_db).unlink(missing_ok=True)

# NOTE: at the time of writing adcb/connectorx have weak SQLite support (poor or
# no bool/date/datetime dtypes, for example) and there is a bug in connectorx that
Expand Down
12 changes: 5 additions & 7 deletions py-polars/tests/unit/io/test_lazy_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@ def test_invalid_utf8(tmp_path: Path) -> None:
bts = bytes(np.random.randint(0, 255, 200))

file_path = tmp_path / "nonutf8.csv"
with open(file_path, "wb") as f:
f.write(bts)
file_path.write_bytes(bts)

a = pl.read_csv(file_path, has_header=False, encoding="utf8-lossy")
b = pl.scan_csv(file_path, has_header=False, encoding="utf8-lossy").collect()
Expand Down Expand Up @@ -192,17 +191,16 @@ def test_glob_skip_rows(tmp_path: Path) -> None:

for i in range(2):
file_path = tmp_path / f"test_{i}.csv"
with open(file_path, "w") as f:
f.write(
f"""
file_path.write_text(
f"""
metadata goes here
file number {i}
foo,bar,baz
1,2,3
4,5,6
7,8,9
"""
)
)
file_path = tmp_path / "*.csv"
assert pl.read_csv(file_path, skip_rows=2).to_dict(False) == {
"foo": [1, 4, 7, 1, 4, 7],
Expand All @@ -227,7 +225,7 @@ def test_glob_n_rows(io_files_path: Path) -> None:
}


def test_scan_csv_schema_overwrite_not_projected_8483(foods_file_path: str) -> None:
def test_scan_csv_schema_overwrite_not_projected_8483(foods_file_path: Path) -> None:
df = (
pl.scan_csv(
foods_file_path,
Expand Down
4 changes: 2 additions & 2 deletions py-polars/tests/unit/io/test_lazy_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ def test_scan_with_projection(tmp_path: Path) -> None:
json_bytes = bytes(json, "utf-8")

file_path = tmp_path / "escape_chars.json"
with open(file_path, "wb") as f:
f.write(json_bytes)
file_path.write_bytes(json_bytes)

actual = pl.scan_ndjson(file_path).select(["id", "text"]).collect()

expected = pl.DataFrame(
Expand Down
12 changes: 6 additions & 6 deletions py-polars/tests/unit/io/test_other.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

import copy
import os.path
from pathlib import Path
from typing import cast

import polars as pl
Expand Down Expand Up @@ -51,8 +51,8 @@ def test_unit_io_subdir_has_no_init() -> None:
# --------------------------------------------------------------------------------
# TLDR: it can mask the builtin 'io' module, causing a fatal python error.
# --------------------------------------------------------------------------------
io_dir = os.path.dirname(__file__)
assert io_dir.endswith(f"unit{os.path.sep}io")
assert not os.path.exists(
f"{io_dir}{os.path.sep}__init__.py"
), "Found undesirable '__init__.py' in the 'unit.io' tests subdirectory"
io_dir = Path(__file__).parent
assert io_dir.parts[-2:] == ("unit", "io")
assert not (
io_dir / "__init__.py"
).exists(), "Found undesirable '__init__.py' in the 'unit.io' tests subdirectory"
6 changes: 2 additions & 4 deletions py-polars/tests/unit/io/test_parquet.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from __future__ import annotations

import io
import os
from datetime import datetime, timezone
from pathlib import Path
from typing import TYPE_CHECKING

import numpy as np
Expand All @@ -19,8 +19,6 @@
)

if TYPE_CHECKING:
from pathlib import Path

from polars.type_aliases import ParquetCompression


Expand Down Expand Up @@ -513,7 +511,7 @@ def test_parquet_string_cache() -> None:

def test_tz_aware_parquet_9586() -> None:
result = pl.read_parquet(
os.path.join("tests", "unit", "io", "files", "tz_aware.parquet")
Path("tests") / "unit" / "io" / "files" / "tz_aware.parquet"
)
expected = pl.DataFrame(
{"UTC_DATETIME_ID": [datetime(2023, 6, 26, 14, 15, 0, tzinfo=timezone.utc)]}
Expand Down
4 changes: 1 addition & 3 deletions py-polars/tests/unit/streaming/conftest.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
import os
from pathlib import Path

import pytest


@pytest.fixture()
def io_files_path() -> Path:
current_dir = os.path.dirname(__file__)
return Path(current_dir) / ".." / "io" / "files"
return Path(__file__).parent.parent / "io" / "files"
8 changes: 3 additions & 5 deletions py-polars/tests/unit/test_cfg.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from __future__ import annotations

import os
from typing import TYPE_CHECKING, Iterator
from pathlib import Path
from typing import Iterator

import pytest

Expand All @@ -10,9 +11,6 @@
from polars.exceptions import StringCacheMismatchError
from polars.testing import assert_frame_equal

if TYPE_CHECKING:
from pathlib import Path


@pytest.fixture(autouse=True)
def _environ() -> Iterator[None]:
Expand Down Expand Up @@ -531,7 +529,7 @@ def test_config_load_save(tmp_path: Path) -> None:

# ...load back from config...
if file is not None:
assert os.path.isfile(cfg)
assert Path(cfg).is_file()
pl.Config.load(cfg)

# ...and confirm the saved options were set.
Expand Down
Loading