Skip to content

Commit

Permalink
Add logic to generate a new snapshot-id
Browse files Browse the repository at this point in the history
  • Loading branch information
Fokko committed Oct 3, 2023
1 parent 3471c5f commit bfa8143
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 0 deletions.
21 changes: 21 additions & 0 deletions pyiceberg/table/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from __future__ import annotations

import itertools
import uuid
from abc import ABC, abstractmethod
from copy import copy
from dataclasses import dataclass
Expand Down Expand Up @@ -498,6 +499,14 @@ def location(self) -> str:
"""Return the table's base location."""
return self.metadata.location

def new_snapshot_id(self) -> int:
"""Generate a new snapshot-id that's not in use."""
snapshot_id = _generate_snapshot_id()
while self.snapshot_by_id(snapshot_id) is not None:
snapshot_id = _generate_snapshot_id()

return snapshot_id

def current_snapshot(self) -> Optional[Snapshot]:
"""Get the current snapshot for this table, or None if there is no current snapshot."""
if snapshot_id := self.metadata.current_snapshot_id:
Expand Down Expand Up @@ -1566,3 +1575,15 @@ def _add_and_move_fields(
elif len(moves) > 0:
return _move_fields(fields, moves)
return None if len(adds) == 0 else tuple(*fields, *adds)


def _generate_snapshot_id() -> int:
"""Generate a new Snapshot ID from a UUID.
Right shifting the 64 bits removes the MAC address and time
leaving only the part that's based on the clock (and has the
highest entropy).
Returns: An 64 bit long
"""
return uuid.uuid4().int & (1 << 64) - 1
6 changes: 6 additions & 0 deletions tests/table/test_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
StaticTable,
Table,
UpdateSchema,
_generate_snapshot_id,
_match_deletes_to_datafile,
)
from pyiceberg.table.metadata import INITIAL_SEQUENCE_NUMBER
Expand Down Expand Up @@ -506,3 +507,8 @@ def test_add_nested_list_type_column(table: Table) -> None:
element_required=False,
)
assert new_schema.highest_field_id == 7


def test_generate_snapshot_id(table: Table) -> None:
assert isinstance(_generate_snapshot_id(), int)
assert isinstance(table.new_snapshot_id(), int)

0 comments on commit bfa8143

Please sign in to comment.