Skip to content

Commit

Permalink
fix schema_id during AddSchemaUpdate
Browse files Browse the repository at this point in the history
  • Loading branch information
kevinjqliu committed Jan 22, 2024
1 parent 6c87139 commit a56838d
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 4 deletions.
7 changes: 5 additions & 2 deletions pyiceberg/table/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,8 +504,11 @@ def _(update: AddSchemaUpdate, base_metadata: TableMetadata, context: _TableMeta
if update.last_column_id < base_metadata.last_column_id:
raise ValueError(f"Invalid last column id {update.last_column_id}, must be >= {base_metadata.last_column_id}")

# PROBLEM: `update.schema_`'s `schema_id` starts with 0 but should be 1
# fresh_schema = assign_fresh_schema_ids(update.schema_)
# `update.schema_.schema_id` should be the last_schema_id + 1
last_schema_id = max(schema.schema_id for schema in base_metadata.schemas)
next_schema_id = last_schema_id + 1
new_schema = update.schema_.model_copy(update={"schema_id": next_schema_id})
update = update.model_copy(update={"schema_": new_schema})

context.add_update(update)
return base_metadata.model_copy(
Expand Down
6 changes: 4 additions & 2 deletions tests/catalog/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,8 +397,10 @@ def test_commit_table(catalog: InMemoryCatalog) -> None:

# Then
assert response.metadata.table_uuid == given_table.metadata.table_uuid
# assert len(response.metadata.schemas) == 1
# assert response.metadata.schemas[0] == new_schema
assert given_table.metadata.current_schema_id == 1
assert len(response.metadata.schemas) == 2
assert response.metadata.schemas[1] == new_schema
assert given_table.metadata.last_column_id == new_schema.highest_field_id


def test_add_column(catalog: InMemoryCatalog) -> None:
Expand Down

0 comments on commit a56838d

Please sign in to comment.