Skip to content

Commit

Permalink
chore(python): Use Pathlib everywhere (#9914)
Browse files Browse the repository at this point in the history
  • Loading branch information
zundertj authored Jul 16, 2023
1 parent cc0795f commit da4df4f
Show file tree
Hide file tree
Showing 18 changed files with 51 additions and 68 deletions.
7 changes: 4 additions & 3 deletions py-polars/docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@
import re
import sys
import warnings
from pathlib import Path

import sphinx_autosummary_accessors

# add polars directory
sys.path.insert(0, os.path.abspath("../.."))
sys.path.insert(0, str(Path("../..").resolve()))

# -- Project information -----------------------------------------------------

Expand Down Expand Up @@ -200,8 +201,8 @@ def linkcode_resolve(domain, info):

linespec = f"#L{lineno}-L{lineno + len(source) - 1}" if lineno else ""

conf_dir_path = os.path.dirname(os.path.realpath(__file__))
polars_root = os.path.abspath(f"{conf_dir_path}/../../polars")
conf_dir_path = Path(__file__).absolute().parent
polars_root = (conf_dir_path.parent.parent / "polars").absolute()

fn = os.path.relpath(fn, start=polars_root)
return f"{github_root}/blob/main/py-polars/polars/{fn}{linespec}"
Expand Down
8 changes: 4 additions & 4 deletions py-polars/polars/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def load(cls, cfg: Path | str) -> type[Config]:
"""
options = json.loads(
Path(normalise_filepath(cfg)).read_text()
if isinstance(cfg, Path) or os.path.exists(cfg)
if isinstance(cfg, Path) or Path(cfg).exists()
else cfg
)
os.environ.update(options.get("environment", {}))
Expand Down Expand Up @@ -221,9 +221,9 @@ def save(cls, file: Path | str | None = None) -> str:
separators=(",", ":"),
)
if isinstance(file, (str, Path)):
file = os.path.abspath(normalise_filepath(file))
Path(file).write_text(options)
return file
file = Path(normalise_filepath(file)).resolve()
file.write_text(options)
return str(file)

return options

Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/io/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def managed_file(file: Any) -> Iterator[Any]:
if isinstance(file, str):
file = normalise_filepath(file, check_not_dir)
if has_non_utf8_non_utf8_lossy_encoding:
with open(file, encoding=encoding_str) as f:
with Path(file).open(encoding=encoding_str) as f:
return _check_empty(
BytesIO(f.read().encode("utf8")), context=f"{file!r}"
)
Expand Down
5 changes: 2 additions & 3 deletions py-polars/polars/lazyframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -894,7 +894,7 @@ def show_graph(
*,
optimized: bool = True,
show: bool = True,
output_path: str | None = None,
output_path: str | Path | None = None,
raw_output: bool = False,
figsize: tuple[float, float] = (16.0, 12.0),
type_coercion: bool = True,
Expand Down Expand Up @@ -975,8 +975,7 @@ def show_graph(
raise ImportError("Graphviz dot binary should be on your PATH") from None

if output_path:
with Path(output_path).open(mode="wb") as file:
file.write(graph)
Path(output_path).write_bytes(graph)

if not show:
return None
Expand Down
15 changes: 7 additions & 8 deletions py-polars/polars/utils/various.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from __future__ import annotations

import inspect
import os
import re
import sys
import warnings
from collections.abc import MappingView, Sized
from enum import Enum
from pathlib import Path
from typing import TYPE_CHECKING, Any, Generator, Iterable, Literal, Sequence, TypeVar

import polars as pl
Expand All @@ -25,7 +25,6 @@

if TYPE_CHECKING:
from collections.abc import Reversible
from pathlib import Path

from polars import DataFrame, Series
from polars.type_aliases import PolarsDataType, PolarsIntegerType, SizeUnit
Expand Down Expand Up @@ -183,10 +182,10 @@ def can_create_dicts_with_pyarrow(dtypes: Sequence[PolarsDataType]) -> bool:

def normalise_filepath(path: str | Path, check_not_directory: bool = True) -> str:
"""Create a string path, expanding the home directory if present."""
path = os.path.expanduser(path)
if check_not_directory and os.path.exists(path) and os.path.isdir(path):
path = Path(path).expanduser()
if check_not_directory and path.exists() and path.is_dir():
raise IsADirectoryError(f"Expected a file path; {path!r} is a directory")
return path
return str(path)


def parse_version(version: Sequence[str | int]) -> tuple[int, ...]:
Expand Down Expand Up @@ -358,15 +357,15 @@ def find_stacklevel() -> int:
Taken from:
https://github.com/pandas-dev/pandas/blob/ab89c53f48df67709a533b6a95ce3d911871a0a8/pandas/util/_exceptions.py#L30-L51
"""
pkg_dir = os.path.dirname(pl.__file__)
test_dir = os.path.join(pkg_dir, "tests")
pkg_dir = Path(pl.__file__).parent
test_dir = pkg_dir / "tests"

# https://stackoverflow.com/questions/17407119/python-inspect-stack-is-slow
frame = inspect.currentframe()
n = 0
while frame:
fname = inspect.getfile(frame)
if fname.startswith(pkg_dir) and not fname.startswith(test_dir):
if fname.startswith(str(pkg_dir)) and not fname.startswith(str(test_dir)):
frame = frame.f_back
n += 1
else:
Expand Down
1 change: 1 addition & 0 deletions py-polars/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ select = [
"UP", # pyupgrade
"PT", # flake8-pytest-style
"RUF", # Ruff-specific rules
"PTH", # flake8-use-pathlib
]

ignore = [
Expand Down
6 changes: 3 additions & 3 deletions py-polars/scripts/check_stacklevels.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import subprocess
import sys
from ast import NodeVisitor
from pathlib import Path

# Files in which it's OK to set the stacklevel manually.
# `git ls-files` lists files with forwards-slashes
Expand Down Expand Up @@ -38,10 +39,9 @@ def visit_Call(self, node: ast.Call) -> None:
for file in files:
if file in EXCLUDE:
continue
if not file.endswith(".py"):
if Path(file).suffix != ".py":
continue
with open(file) as fd:
content = fd.read()
content = Path(file).read_text()
tree = ast.parse(content)
stacklevel_checker = StackLevelChecker(file)
stacklevel_checker.visit(tree)
Expand Down
5 changes: 2 additions & 3 deletions py-polars/tests/benchmark/test_release.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
To run these tests: pytest -m benchmark
"""
import os
import time
from pathlib import Path
from typing import cast
Expand All @@ -21,12 +20,12 @@


@pytest.mark.skipif(
not (Path(os.path.dirname(__file__)) / "G1_1e7_1e2_5_0.csv").is_file(),
not (Path(__file__).parent / "G1_1e7_1e2_5_0.csv").is_file(),
reason="Dataset must be generated before running this test.",
)
def test_read_scan_large_csv() -> None:
filename = "G1_1e7_1e2_5_0.csv"
path = Path(os.path.dirname(__file__)) / filename
path = Path(__file__).parent / filename

predicate = pl.col("v2") < 5

Expand Down
4 changes: 1 addition & 3 deletions py-polars/tests/unit/io/conftest.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
from __future__ import annotations

import os
from pathlib import Path

import pytest


@pytest.fixture()
def io_files_path() -> Path:
current_dir = os.path.dirname(__file__)
return Path(current_dir) / "files"
return Path(__file__).parent / "files"
8 changes: 3 additions & 5 deletions py-polars/tests/unit/io/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,8 +379,7 @@ def test_read_csv_encoding(tmp_path: Path) -> None:
)

file_path = tmp_path / "encoding.csv"
with open(file_path, "wb") as f:
f.write(bts)
file_path.write_bytes(bts)

file_str = str(file_path)
bytesio = io.BytesIO(bts)
Expand Down Expand Up @@ -487,9 +486,8 @@ def test_compressed_csv(io_files_path: Path) -> None:

def test_partial_decompression(foods_file_path: Path) -> None:
f_out = io.BytesIO()
with open(foods_file_path, "rb") as f_read: # noqa: SIM117
with gzip.GzipFile(fileobj=f_out, mode="w") as f:
f.write(f_read.read())
with gzip.GzipFile(fileobj=f_out, mode="w") as f:
f.write(foods_file_path.read_bytes())

csv_bytes = f_out.getvalue()
for n_rows in [1, 5, 26]:
Expand Down
7 changes: 2 additions & 5 deletions py-polars/tests/unit/io/test_database.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from __future__ import annotations

import os
import sys
from datetime import date
from pathlib import Path
from typing import TYPE_CHECKING

import pytest
Expand All @@ -11,8 +11,6 @@
from polars.testing import assert_frame_equal

if TYPE_CHECKING:
from pathlib import Path

from polars.type_aliases import (
DbReadEngine,
DbWriteEngine,
Expand All @@ -35,8 +33,7 @@ def sample_df() -> pl.DataFrame:
def create_temp_sqlite_db(test_db: str) -> None:
import sqlite3

if os.path.exists(test_db):
os.unlink(test_db)
Path(test_db).unlink(missing_ok=True)

# NOTE: at the time of writing adcb/connectorx have weak SQLite support (poor or
# no bool/date/datetime dtypes, for example) and there is a bug in connectorx that
Expand Down
12 changes: 5 additions & 7 deletions py-polars/tests/unit/io/test_lazy_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@ def test_invalid_utf8(tmp_path: Path) -> None:
bts = bytes(np.random.randint(0, 255, 200))

file_path = tmp_path / "nonutf8.csv"
with open(file_path, "wb") as f:
f.write(bts)
file_path.write_bytes(bts)

a = pl.read_csv(file_path, has_header=False, encoding="utf8-lossy")
b = pl.scan_csv(file_path, has_header=False, encoding="utf8-lossy").collect()
Expand Down Expand Up @@ -192,17 +191,16 @@ def test_glob_skip_rows(tmp_path: Path) -> None:

for i in range(2):
file_path = tmp_path / f"test_{i}.csv"
with open(file_path, "w") as f:
f.write(
f"""
file_path.write_text(
f"""
metadata goes here
file number {i}
foo,bar,baz
1,2,3
4,5,6
7,8,9
"""
)
)
file_path = tmp_path / "*.csv"
assert pl.read_csv(file_path, skip_rows=2).to_dict(False) == {
"foo": [1, 4, 7, 1, 4, 7],
Expand All @@ -227,7 +225,7 @@ def test_glob_n_rows(io_files_path: Path) -> None:
}


def test_scan_csv_schema_overwrite_not_projected_8483(foods_file_path: str) -> None:
def test_scan_csv_schema_overwrite_not_projected_8483(foods_file_path: Path) -> None:
df = (
pl.scan_csv(
foods_file_path,
Expand Down
4 changes: 2 additions & 2 deletions py-polars/tests/unit/io/test_lazy_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ def test_scan_with_projection(tmp_path: Path) -> None:
json_bytes = bytes(json, "utf-8")

file_path = tmp_path / "escape_chars.json"
with open(file_path, "wb") as f:
f.write(json_bytes)
file_path.write_bytes(json_bytes)

actual = pl.scan_ndjson(file_path).select(["id", "text"]).collect()

expected = pl.DataFrame(
Expand Down
12 changes: 6 additions & 6 deletions py-polars/tests/unit/io/test_other.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

import copy
import os.path
from pathlib import Path
from typing import cast

import polars as pl
Expand Down Expand Up @@ -51,8 +51,8 @@ def test_unit_io_subdir_has_no_init() -> None:
# --------------------------------------------------------------------------------
# TLDR: it can mask the builtin 'io' module, causing a fatal python error.
# --------------------------------------------------------------------------------
io_dir = os.path.dirname(__file__)
assert io_dir.endswith(f"unit{os.path.sep}io")
assert not os.path.exists(
f"{io_dir}{os.path.sep}__init__.py"
), "Found undesirable '__init__.py' in the 'unit.io' tests subdirectory"
io_dir = Path(__file__).parent
assert io_dir.parts[-2:] == ("unit", "io")
assert not (
io_dir / "__init__.py"
).exists(), "Found undesirable '__init__.py' in the 'unit.io' tests subdirectory"
6 changes: 2 additions & 4 deletions py-polars/tests/unit/io/test_parquet.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from __future__ import annotations

import io
import os
from datetime import datetime, timezone
from pathlib import Path
from typing import TYPE_CHECKING

import numpy as np
Expand All @@ -19,8 +19,6 @@
)

if TYPE_CHECKING:
from pathlib import Path

from polars.type_aliases import ParquetCompression


Expand Down Expand Up @@ -513,7 +511,7 @@ def test_parquet_string_cache() -> None:

def test_tz_aware_parquet_9586() -> None:
result = pl.read_parquet(
os.path.join("tests", "unit", "io", "files", "tz_aware.parquet")
Path("tests") / "unit" / "io" / "files" / "tz_aware.parquet"
)
expected = pl.DataFrame(
{"UTC_DATETIME_ID": [datetime(2023, 6, 26, 14, 15, 0, tzinfo=timezone.utc)]}
Expand Down
4 changes: 1 addition & 3 deletions py-polars/tests/unit/streaming/conftest.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
import os
from pathlib import Path

import pytest


@pytest.fixture()
def io_files_path() -> Path:
current_dir = os.path.dirname(__file__)
return Path(current_dir) / ".." / "io" / "files"
return Path(__file__).parent.parent / "io" / "files"
8 changes: 3 additions & 5 deletions py-polars/tests/unit/test_cfg.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from __future__ import annotations

import os
from typing import TYPE_CHECKING, Iterator
from pathlib import Path
from typing import Iterator

import pytest

Expand All @@ -10,9 +11,6 @@
from polars.exceptions import StringCacheMismatchError
from polars.testing import assert_frame_equal

if TYPE_CHECKING:
from pathlib import Path


@pytest.fixture(autouse=True)
def _environ() -> Iterator[None]:
Expand Down Expand Up @@ -531,7 +529,7 @@ def test_config_load_save(tmp_path: Path) -> None:

# ...load back from config...
if file is not None:
assert os.path.isfile(cfg)
assert Path(cfg).is_file()
pl.Config.load(cfg)

# ...and confirm the saved options were set.
Expand Down
Loading

0 comments on commit da4df4f

Please sign in to comment.