Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(python): Rename type_aliases module to _typing #17282

Merged
merged 2 commits into from
Jun 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/user-guide/expressions/plugins.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ from typing import TYPE_CHECKING

import polars as pl
from polars.plugins import register_plugin_function
from polars.type_aliases import IntoExpr
from polars._typing import IntoExpr


def pig_latinnify(expr: IntoExpr) -> pl.Expr:
Expand Down
278 changes: 278 additions & 0 deletions py-polars/polars/_typing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,278 @@
from __future__ import annotations

from typing import (
TYPE_CHECKING,
Any,
Collection,
Iterable,
List,
Literal,
Mapping,
Protocol,
Sequence,
Tuple,
Type,
TypedDict,
TypeVar,
Union,
)

if TYPE_CHECKING:
import sys
from datetime import date, datetime, time, timedelta
from decimal import Decimal

from sqlalchemy.engine import Connection, Engine
from sqlalchemy.orm import Session

from polars import DataFrame, Expr, LazyFrame, Series
from polars.datatypes import DataType, DataTypeClass, IntegerType, TemporalType
from polars.dependencies import numpy as np
from polars.dependencies import pandas as pd
from polars.dependencies import pyarrow as pa
from polars.selectors import _selector_proxy_

if sys.version_info >= (3, 10):
from typing import TypeAlias
else:
from typing_extensions import TypeAlias

# Data types
PolarsDataType: TypeAlias = Union["DataTypeClass", "DataType"]
PolarsTemporalType: TypeAlias = Union[Type["TemporalType"], "TemporalType"]
PolarsIntegerType: TypeAlias = Union[Type["IntegerType"], "IntegerType"]
OneOrMoreDataTypes: TypeAlias = Union[PolarsDataType, Iterable[PolarsDataType]]
PythonDataType: TypeAlias = Union[
Type[int],
Type[float],
Type[bool],
Type[str],
Type["date"],
Type["time"],
Type["datetime"],
Type["timedelta"],
Type[List[Any]],
Type[Tuple[Any, ...]],
Type[bytes],
Type["Decimal"],
Type[None],
]

SchemaDefinition: TypeAlias = Union[
Mapping[str, Union[PolarsDataType, PythonDataType]],
Sequence[Union[str, Tuple[str, Union[PolarsDataType, PythonDataType, None]]]],
]
SchemaDict: TypeAlias = Mapping[str, PolarsDataType]

NumericLiteral: TypeAlias = Union[int, float, "Decimal"]
TemporalLiteral: TypeAlias = Union["date", "time", "datetime", "timedelta"]
NonNestedLiteral: TypeAlias = Union[NumericLiteral, TemporalLiteral, str, bool, bytes]
# Python literal types (can convert into a `lit` expression)
PythonLiteral: TypeAlias = Union[NonNestedLiteral, List[Any]]
# Inputs that can convert into a `col` expression
IntoExprColumn: TypeAlias = Union["Expr", "Series", str]
# Inputs that can convert into an expression
IntoExpr: TypeAlias = Union[PythonLiteral, IntoExprColumn, None]

ComparisonOperator: TypeAlias = Literal["eq", "neq", "gt", "lt", "gt_eq", "lt_eq"]

# selector type, and related collection/sequence
SelectorType: TypeAlias = "_selector_proxy_"
ColumnNameOrSelector: TypeAlias = Union[str, SelectorType]

# User-facing string literal types
# The following all have an equivalent Rust enum with the same name
Ambiguous: TypeAlias = Literal["earliest", "latest", "raise", "null"]
AvroCompression: TypeAlias = Literal["uncompressed", "snappy", "deflate"]
CsvQuoteStyle: TypeAlias = Literal["necessary", "always", "non_numeric", "never"]
CategoricalOrdering: TypeAlias = Literal["physical", "lexical"]
CsvEncoding: TypeAlias = Literal["utf8", "utf8-lossy"]
FillNullStrategy: TypeAlias = Literal[
"forward", "backward", "min", "max", "mean", "zero", "one"
]
FloatFmt: TypeAlias = Literal["full", "mixed"]
IndexOrder: TypeAlias = Literal["c", "fortran"]
IpcCompression: TypeAlias = Literal["uncompressed", "lz4", "zstd"]
JoinValidation: TypeAlias = Literal["m:m", "m:1", "1:m", "1:1"]
Label: TypeAlias = Literal["left", "right", "datapoint"]
NonExistent: TypeAlias = Literal["raise", "null"]
NullBehavior: TypeAlias = Literal["ignore", "drop"]
ParallelStrategy: TypeAlias = Literal["auto", "columns", "row_groups", "none"]
ParquetCompression: TypeAlias = Literal[
"lz4", "uncompressed", "snappy", "gzip", "lzo", "brotli", "zstd"
]
PivotAgg: TypeAlias = Literal[
"min", "max", "first", "last", "sum", "mean", "median", "len"
]
RankMethod: TypeAlias = Literal["average", "min", "max", "dense", "ordinal", "random"]
Roll: TypeAlias = Literal["raise", "forward", "backward"]
SerializationFormat: TypeAlias = Literal["binary", "json"]
SizeUnit: TypeAlias = Literal[
"b",
"kb",
"mb",
"gb",
"tb",
"bytes",
"kilobytes",
"megabytes",
"gigabytes",
"terabytes",
]
StartBy: TypeAlias = Literal[
"window",
"datapoint",
"monday",
"tuesday",
"wednesday",
"thursday",
"friday",
"saturday",
"sunday",
]
TimeUnit: TypeAlias = Literal["ns", "us", "ms"]
UniqueKeepStrategy: TypeAlias = Literal["first", "last", "any", "none"]
UnstackDirection: TypeAlias = Literal["vertical", "horizontal"]
MapElementsStrategy: TypeAlias = Literal["thread_local", "threading"]

# The following have a Rust enum equivalent with a different name
AsofJoinStrategy: TypeAlias = Literal["backward", "forward", "nearest"] # AsofStrategy
ClosedInterval: TypeAlias = Literal["left", "right", "both", "none"] # ClosedWindow
InterpolationMethod: TypeAlias = Literal["linear", "nearest"]
JoinStrategy: TypeAlias = Literal[
"inner", "left", "full", "semi", "anti", "cross", "outer"
] # JoinType
RollingInterpolationMethod: TypeAlias = Literal[
"nearest", "higher", "lower", "midpoint", "linear"
] # QuantileInterpolOptions
ToStructStrategy: TypeAlias = Literal[
"first_non_null", "max_width"
] # ListToStructWidthStrategy

# The following have no equivalent on the Rust side
ConcatMethod = Literal[
"vertical",
"vertical_relaxed",
"diagonal",
"diagonal_relaxed",
"horizontal",
"align",
]
CorrelationMethod: TypeAlias = Literal["pearson", "spearman"]
DbReadEngine: TypeAlias = Literal["adbc", "connectorx"]
DbWriteEngine: TypeAlias = Literal["sqlalchemy", "adbc"]
DbWriteMode: TypeAlias = Literal["replace", "append", "fail"]
EpochTimeUnit = Literal["ns", "us", "ms", "s", "d"]
JaxExportType: TypeAlias = Literal["array", "dict"]
Orientation: TypeAlias = Literal["col", "row"]
SearchSortedSide: TypeAlias = Literal["any", "left", "right"]
TorchExportType: TypeAlias = Literal["tensor", "dataset", "dict"]
TransferEncoding: TypeAlias = Literal["hex", "base64"]
WindowMappingStrategy: TypeAlias = Literal["group_to_rows", "join", "explode"]
ExplainFormat: TypeAlias = Literal["plain", "tree"]

# type signature for allowed frame init
FrameInitTypes: TypeAlias = Union[
Mapping[str, Union[Sequence[object], Mapping[str, Sequence[object]], "Series"]],
Sequence[Any],
"np.ndarray[Any, Any]",
"pa.Table",
"pd.DataFrame",
]

# Excel IO
ColumnFormatDict: TypeAlias = Mapping[
# dict of colname(s) or selector(s) to format string or dict
Union[ColumnNameOrSelector, Tuple[ColumnNameOrSelector, ...]],
Union[str, Mapping[str, str]],
]
ConditionalFormatDict: TypeAlias = Mapping[
# dict of colname(s) to str, dict, or sequence of str/dict
Union[ColumnNameOrSelector, Collection[str]],
Union[str, Union[Mapping[str, Any], Sequence[Union[str, Mapping[str, Any]]]]],
]
ColumnTotalsDefinition: TypeAlias = Union[
# dict of colname(s) to str, a collection of str, or a boolean
Mapping[Union[str, Collection[str]], str],
Sequence[str],
bool,
]
ColumnWidthsDefinition: TypeAlias = Union[
Mapping[ColumnNameOrSelector, Union[Tuple[str, ...], int]], int
]
RowTotalsDefinition: TypeAlias = Union[
# dict of colname to str(s), a collection of str, or a boolean
Mapping[str, Union[str, Collection[str]]],
Collection[str],
bool,
]

# standard/named hypothesis profiles used for parametric testing
ParametricProfileNames: TypeAlias = Literal["fast", "balanced", "expensive"]

# typevars for core polars types
PolarsType = TypeVar("PolarsType", "DataFrame", "LazyFrame", "Series", "Expr")
FrameType = TypeVar("FrameType", "DataFrame", "LazyFrame")
BufferInfo: TypeAlias = Tuple[int, int, int]

# type alias for supported spreadsheet engines
ExcelSpreadsheetEngine: TypeAlias = Literal["xlsx2csv", "openpyxl", "calamine"]


class SeriesBuffers(TypedDict):
"""Underlying buffers of a Series."""

values: Series
validity: Series | None
offsets: Series | None


# minimal protocol definitions that can reasonably represent
# an executable connection, cursor, or equivalent object
class BasicConnection(Protocol):
def cursor(self, *args: Any, **kwargs: Any) -> Any:
"""Return a cursor object."""


class BasicCursor(Protocol):
def execute(self, *args: Any, **kwargs: Any) -> Any:
"""Execute a query."""


class Cursor(BasicCursor):
def fetchall(self, *args: Any, **kwargs: Any) -> Any:
"""Fetch all results."""

def fetchmany(self, *args: Any, **kwargs: Any) -> Any:
"""Fetch results in batches."""


AlchemyConnection: TypeAlias = Union["Connection", "Engine", "Session"]
ConnectionOrCursor: TypeAlias = Union[
BasicConnection, BasicCursor, Cursor, AlchemyConnection
]


# Annotations for `__getitem__` methods
SingleIndexSelector: TypeAlias = int
MultiIndexSelector: TypeAlias = Union[
slice,
range,
Sequence[int],
"Series",
"np.ndarray[Any, Any]",
]
SingleNameSelector: TypeAlias = str
MultiNameSelector: TypeAlias = Union[
slice,
Sequence[str],
"Series",
"np.ndarray[Any, Any]",
]
BooleanMask: TypeAlias = Union[
Sequence[bool],
"Series",
"np.ndarray[Any, Any]",
]
SingleColSelector: TypeAlias = Union[SingleIndexSelector, SingleNameSelector]
MultiColSelector: TypeAlias = Union[MultiIndexSelector, MultiNameSelector, BooleanMask]
4 changes: 2 additions & 2 deletions py-polars/polars/_utils/construction/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,13 @@

if TYPE_CHECKING:
from polars import DataFrame, Series
from polars.polars import PySeries
from polars.type_aliases import (
from polars._typing import (
Orientation,
PolarsDataType,
SchemaDefinition,
SchemaDict,
)
from polars.polars import PySeries

_MIN_NUMPY_SIZE_FOR_MULTITHREADING = 1000

Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/_utils/construction/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@

if TYPE_CHECKING:
from polars import DataFrame, Series
from polars._typing import PolarsDataType
from polars.dependencies import pandas as pd
from polars.type_aliases import PolarsDataType


def sequence_to_pyseries(
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/_utils/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from datetime import date, tzinfo
from decimal import Decimal

from polars.type_aliases import TimeUnit
from polars._typing import TimeUnit


@overload
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/_utils/deprecation.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import sys
from typing import Mapping

from polars.type_aliases import Ambiguous
from polars._typing import Ambiguous

if sys.version_info >= (3, 10):
from typing import ParamSpec
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/_utils/getitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

if TYPE_CHECKING:
from polars import DataFrame, Series
from polars.type_aliases import (
from polars._typing import (
MultiColSelector,
MultiIndexSelector,
SingleColSelector,
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/_utils/parse/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@

if TYPE_CHECKING:
from polars import Expr
from polars._typing import IntoExpr, PolarsDataType
from polars.polars import PyExpr
from polars.type_aliases import IntoExpr, PolarsDataType


def parse_into_expression(
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/_utils/serde.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
if TYPE_CHECKING:
from io import IOBase

from polars.type_aliases import SerializationFormat
from polars._typing import SerializationFormat


@overload
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/_utils/various.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
from collections.abc import Iterator, Reversible

from polars import DataFrame
from polars.type_aliases import PolarsDataType, SizeUnit
from polars._typing import PolarsDataType, SizeUnit

if sys.version_info >= (3, 13):
from typing import TypeIs
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import sys
from types import TracebackType

from polars.type_aliases import FloatFmt
from polars._typing import FloatFmt

if sys.version_info >= (3, 10):
from typing import TypeAlias
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/convert/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@

if TYPE_CHECKING:
from polars import DataFrame, Series
from polars._typing import Orientation, SchemaDefinition, SchemaDict
from polars.dependencies import numpy as np
from polars.interchange.protocol import SupportsInterchange
from polars.type_aliases import Orientation, SchemaDefinition, SchemaDict


def from_dict(
Expand Down
Loading