Skip to content

Commit

Permalink
Addressed minor comments
Browse files Browse the repository at this point in the history
  • Loading branch information
soumya-ghosh committed Jul 10, 2024
1 parent 84ae86d commit e4eb226
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 9 deletions.
10 changes: 5 additions & 5 deletions pyiceberg/io/pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -2144,7 +2144,7 @@ def _dataframe_to_data_files(


@dataclass(frozen=True)
class TablePartition:
class _TablePartition:
partition_key: PartitionKey
arrow_table_partition: pa.Table

Expand All @@ -2154,7 +2154,7 @@ def _get_table_partitions(
partition_spec: PartitionSpec,
schema: Schema,
slice_instructions: list[dict[str, Any]],
) -> list[TablePartition]:
) -> list[_TablePartition]:
sorted_slice_instructions = sorted(slice_instructions, key=lambda x: x["offset"])

partition_fields = partition_spec.fields
Expand All @@ -2175,11 +2175,11 @@ def _get_table_partitions(
for partition_field in partition_fields
]
partition_key = PartitionKey(raw_partition_field_values=fieldvalues, partition_spec=partition_spec, schema=schema)
table_partitions.append(TablePartition(partition_key=partition_key, arrow_table_partition=partition_slice))
table_partitions.append(_TablePartition(partition_key=partition_key, arrow_table_partition=partition_slice))
return table_partitions


def _determine_partitions(spec: PartitionSpec, schema: Schema, arrow_table: pa.Table) -> List[TablePartition]:
def _determine_partitions(spec: PartitionSpec, schema: Schema, arrow_table: pa.Table) -> List[_TablePartition]:
"""Based on the iceberg table partition spec, slice the arrow table into partitions with their keys.
Example:
Expand Down Expand Up @@ -2235,6 +2235,6 @@ def _determine_partitions(spec: PartitionSpec, schema: Schema, arrow_table: pa.T
last = reversed_indices[ptr]
ptr = ptr + group_size

table_partitions: List[TablePartition] = _get_table_partitions(arrow_table, spec, schema, slice_instructions)
table_partitions: List[_TablePartition] = _get_table_partitions(arrow_table, spec, schema, slice_instructions)

return table_partitions
4 changes: 0 additions & 4 deletions tests/io/test_pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -1723,8 +1723,6 @@ def test_bin_pack_arrow_table(arrow_table_with_null: pa.Table) -> None:


def test_partition_for_demo() -> None:
import pyarrow as pa

test_pa_schema = pa.schema([("year", pa.int64()), ("n_legs", pa.int64()), ("animal", pa.string())])
test_schema = Schema(
NestedField(field_id=1, name="year", field_type=StringType(), required=False),
Expand Down Expand Up @@ -1757,8 +1755,6 @@ def test_partition_for_demo() -> None:


def test_identity_partition_on_multi_columns() -> None:
import pyarrow as pa

test_pa_schema = pa.schema([("born_year", pa.int64()), ("n_legs", pa.int64()), ("animal", pa.string())])
test_schema = Schema(
NestedField(field_id=1, name="born_year", field_type=StringType(), required=False),
Expand Down

0 comments on commit e4eb226

Please sign in to comment.