diff --git a/.palantir/revapi.yml b/.palantir/revapi.yml index fade79326a49..f2f2db61d44d 100644 --- a/.palantir/revapi.yml +++ b/.palantir/revapi.yml @@ -1145,6 +1145,10 @@ acceptedBreaks: new: "method org.apache.iceberg.BaseMetastoreOperations.CommitStatus org.apache.iceberg.BaseMetastoreTableOperations::checkCommitStatus(java.lang.String,\ \ org.apache.iceberg.TableMetadata)" justification: "Removing deprecated code" + - code: "java.method.numberOfParametersChanged" + old: "method void org.apache.iceberg.UnboundPartitionSpec::(int, java.util.List)" + new: "method void org.apache.iceberg.UnboundPartitionSpec::(int, int, java.util.List)" + justification: "Add schema id in UnboundPartitionSpec" apache-iceberg-0.14.0: org.apache.iceberg:iceberg-api: - code: "java.class.defaultSerializationChanged" @@ -1184,6 +1188,10 @@ acceptedBreaks: - code: "java.method.removed" old: "method org.apache.iceberg.RowDelta org.apache.iceberg.RowDelta::validateNoConflictingAppends(org.apache.iceberg.expressions.Expression)" justification: "Deprecations for 1.0 release" + - code: "java.method.numberOfParametersChanged" + old: "method void org.apache.iceberg.UnboundPartitionSpec::(int, java.util.List)" + new: "method void org.apache.iceberg.UnboundPartitionSpec::(int, int, java.util.List)" + justification: "Add schema id in UnboundPartitionSpec" release-base-0.13.0: org.apache.iceberg:iceberg-api: - code: "java.class.defaultSerializationChanged" diff --git a/api/src/main/java/org/apache/iceberg/PartitionSpec.java b/api/src/main/java/org/apache/iceberg/PartitionSpec.java index 9b74893f1831..c89aa419ad84 100644 --- a/api/src/main/java/org/apache/iceberg/PartitionSpec.java +++ b/api/src/main/java/org/apache/iceberg/PartitionSpec.java @@ -101,7 +101,8 @@ int lastAssignedFieldId() { } public UnboundPartitionSpec toUnbound() { - UnboundPartitionSpec.Builder builder = UnboundPartitionSpec.builder().withSpecId(specId); + UnboundPartitionSpec.Builder builder = + UnboundPartitionSpec.builder().withSpecId(specId).withSchemaId(schema.schemaId()); for (PartitionField field : fields) { builder.addField( @@ -630,21 +631,19 @@ static void checkCompatibility(PartitionSpec spec, Schema schema) { // In the case of a Version 1 partition-spec field gets deleted, // it is replaced with a void transform, see: // https://iceberg.apache.org/spec/#partition-transforms - // We don't care about the source type since a VoidTransform is always compatible and skip the - // checks - if (!transform.equals(Transforms.alwaysNull())) { - ValidationException.check( - sourceType != null, "Cannot find source column for partition field: %s", field); - ValidationException.check( - sourceType.isPrimitiveType(), - "Cannot partition by non-primitive source field: %s", - sourceType); - ValidationException.check( - transform.canTransform(sourceType), - "Invalid source type %s for transform: %s", - sourceType, - transform); - } + // We don't care about the source type, but we should check partition field could + // be found in schema + ValidationException.check( + sourceType != null, "Cannot find source column for partition field: %s", field); + ValidationException.check( + sourceType.isPrimitiveType(), + "Cannot partition by non-primitive source field: %s", + sourceType); + ValidationException.check( + transform.canTransform(sourceType), + "Invalid source type %s for transform: %s", + sourceType, + transform); } } diff --git a/api/src/main/java/org/apache/iceberg/UnboundPartitionSpec.java b/api/src/main/java/org/apache/iceberg/UnboundPartitionSpec.java index cc8526f9072c..1bc4be374e62 100644 --- a/api/src/main/java/org/apache/iceberg/UnboundPartitionSpec.java +++ b/api/src/main/java/org/apache/iceberg/UnboundPartitionSpec.java @@ -27,17 +27,23 @@ public class UnboundPartitionSpec { private final int specId; + private final int schmeId; private final List fields; - public UnboundPartitionSpec(int specId, List fields) { + public UnboundPartitionSpec(int specId, int schmeId, List fields) { this.specId = specId; this.fields = fields; + this.schmeId = schmeId; } public int specId() { return specId; } + public int schemaId() { + return schmeId; + } + public List fields() { return fields; } @@ -78,6 +84,7 @@ static Builder builder() { static class Builder { private final List fields; private int specId = 0; + private int schemaId = -1; private Builder() { this.fields = Lists.newArrayList(); @@ -88,6 +95,11 @@ Builder withSpecId(int newSpecId) { return this; } + Builder withSchemaId(int newSchemaId) { + this.schemaId = newSchemaId; + return this; + } + Builder addField(String transformAsString, int sourceId, int partitionId, String name) { fields.add(new UnboundPartitionField(transformAsString, sourceId, partitionId, name)); return this; @@ -99,7 +111,7 @@ Builder addField(String transformAsString, int sourceId, String name) { } UnboundPartitionSpec build() { - return new UnboundPartitionSpec(specId, fields); + return new UnboundPartitionSpec(specId, schemaId, fields); } } diff --git a/core/src/main/java/org/apache/iceberg/PartitionSpecParser.java b/core/src/main/java/org/apache/iceberg/PartitionSpecParser.java index a51b03c8f015..2104bd6dba74 100644 --- a/core/src/main/java/org/apache/iceberg/PartitionSpecParser.java +++ b/core/src/main/java/org/apache/iceberg/PartitionSpecParser.java @@ -19,6 +19,7 @@ package org.apache.iceberg; import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; import com.github.benmanes.caffeine.cache.Cache; import com.github.benmanes.caffeine.cache.Caffeine; @@ -33,6 +34,7 @@ public class PartitionSpecParser { private PartitionSpecParser() {} private static final String SPEC_ID = "spec-id"; + private static final String SCHEMA_ID = "schema-id"; private static final String FIELDS = "fields"; private static final String SOURCE_ID = "source-id"; private static final String FIELD_ID = "field-id"; @@ -54,6 +56,7 @@ public static String toJson(PartitionSpec spec, boolean pretty) { public static void toJson(UnboundPartitionSpec spec, JsonGenerator generator) throws IOException { generator.writeStartObject(); generator.writeNumberField(SPEC_ID, spec.specId()); + generator.writeNumberField(SCHEMA_ID, spec.schemaId()); generator.writeFieldName(FIELDS); toJsonFields(spec, generator); generator.writeEndObject(); @@ -75,10 +78,22 @@ public static UnboundPartitionSpec fromJson(JsonNode json) { Preconditions.checkArgument(json.isObject(), "Cannot parse spec from non-object: %s", json); int specId = JsonUtil.getInt(SPEC_ID, json); UnboundPartitionSpec.Builder builder = UnboundPartitionSpec.builder().withSpecId(specId); + Integer schemaId = JsonUtil.getIntOrNull(SCHEMA_ID, json); + if (schemaId != null) { + builder.withSchemaId(schemaId); + } buildFromJsonFields(builder, JsonUtil.get(FIELDS, json)); return builder.build(); } + public static UnboundPartitionSpec fromJson(String json) { + try { + return fromJson(JsonUtil.mapper().readValue(json, JsonNode.class)); + } catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + } + private static final Cache, PartitionSpec> SPEC_CACHE = Caffeine.newBuilder().weakValues().build(); diff --git a/core/src/main/java/org/apache/iceberg/SerializableTable.java b/core/src/main/java/org/apache/iceberg/SerializableTable.java index 082e50b840dc..7c0ab81c21a2 100644 --- a/core/src/main/java/org/apache/iceberg/SerializableTable.java +++ b/core/src/main/java/org/apache/iceberg/SerializableTable.java @@ -193,7 +193,15 @@ public Map specs() { Map specs = Maps.newHashMapWithExpectedSize(specAsJsonMap.size()); specAsJsonMap.forEach( (specId, specAsJson) -> { - specs.put(specId, PartitionSpecParser.fromJson(schema(), specAsJson)); + int schemaId = PartitionSpecParser.fromJson(specAsJson).schemaId(); + // In some unit tests, there may be situations that + // metadata location is not passed in. + // In this case, schemas cannot be read and latest schema is used by default. + Schema schema = + schemaId != -1 && metadataFileLocation != null + ? schemas().get(schemaId) + : schema(); + specs.put(specId, PartitionSpecParser.fromJson(schema, specAsJson)); }); this.lazySpecs = specs; } else if (lazySpecs == null) { diff --git a/core/src/main/java/org/apache/iceberg/TableMetadata.java b/core/src/main/java/org/apache/iceberg/TableMetadata.java index d20dd59d2b97..e8e6c4620323 100644 --- a/core/src/main/java/org/apache/iceberg/TableMetadata.java +++ b/core/src/main/java/org/apache/iceberg/TableMetadata.java @@ -1052,12 +1052,6 @@ public Builder setCurrentSchema(int schemaId) { Preconditions.checkArgument( schema != null, "Cannot set current schema to unknown schema: %s", schemaId); - // rebuild all the partition specs and sort orders for the new current schema - this.specs = - Lists.newArrayList(Iterables.transform(specs, spec -> updateSpecSchema(schema, spec))); - specsById.clear(); - specsById.putAll(PartitionUtil.indexSpecs(specs)); - this.sortOrders = Lists.newArrayList( Iterables.transform(sortOrders, order -> updateSortOrderSchema(schema, order))); @@ -1537,6 +1531,17 @@ && changes(MetadataUpdate.AddSchema.class) this.lastAddedSchemaId = newSchemaId; + PartitionSpec currentSpec = specsById.get(defaultSpecId); + if (currentSpec != null) { + PartitionSpec newCurrentSpec = updateSpecSchema(newSchema, currentSpec); + specsById.put(defaultSpecId, newCurrentSpec); + for (int i = 0; i < specs.size(); i++) { + if (specs.get(i).specId() == defaultSpecId) { + specs.set(i, newCurrentSpec); + } + } + } + return newSchemaId; } diff --git a/core/src/main/java/org/apache/iceberg/TableMetadataParser.java b/core/src/main/java/org/apache/iceberg/TableMetadataParser.java index 8bda184142cd..b618d586fd82 100644 --- a/core/src/main/java/org/apache/iceberg/TableMetadataParser.java +++ b/core/src/main/java/org/apache/iceberg/TableMetadataParser.java @@ -387,8 +387,9 @@ public static TableMetadata fromJson(String metadataLocation, JsonNode node) { ImmutableList.Builder builder = ImmutableList.builder(); for (JsonNode spec : specArray) { UnboundPartitionSpec unboundSpec = PartitionSpecParser.fromJson(spec); - if (unboundSpec.specId() == defaultSpecId) { - builder.add(unboundSpec.bind(schema)); + int schemaId = unboundSpec.schemaId(); + if (schemaId != -1) { + builder.add(unboundSpec.bind(schemas.get(schemaId))); } else { builder.add(unboundSpec.bindUnchecked(schema)); } diff --git a/core/src/test/java/org/apache/iceberg/TestAllManifestsTableTaskParser.java b/core/src/test/java/org/apache/iceberg/TestAllManifestsTableTaskParser.java index 2f057d7bd5a8..e356737bc030 100644 --- a/core/src/test/java/org/apache/iceberg/TestAllManifestsTableTaskParser.java +++ b/core/src/test/java/org/apache/iceberg/TestAllManifestsTableTaskParser.java @@ -122,7 +122,7 @@ private String taskJson() { + "{\"id\":12,\"name\":\"lower_bound\",\"required\":false,\"type\":\"string\"}," + "{\"id\":13,\"name\":\"upper_bound\",\"required\":false,\"type\":\"string\"}]},\"element-required\":true}}," + "{\"id\":18,\"name\":\"reference_snapshot_id\",\"required\":true,\"type\":\"long\"}]}," - + "\"partition-specs\":[{\"spec-id\":0,\"fields\":[{\"name\":\"data_bucket\"," + + "\"partition-specs\":[{\"spec-id\":0,\"schema-id\":0,\"fields\":[{\"name\":\"data_bucket\"," + "\"transform\":\"bucket[16]\",\"source-id\":4,\"field-id\":1000}]}]," + "\"manifest-list-Location\":\"/path/manifest-list-file.avro\"," + "\"residual-filter\":{\"type\":\"eq\",\"term\":\"id\",\"value\":1}," diff --git a/core/src/test/java/org/apache/iceberg/TestFileScanTaskParser.java b/core/src/test/java/org/apache/iceberg/TestFileScanTaskParser.java index c4a9fdf2340a..48348052647e 100644 --- a/core/src/test/java/org/apache/iceberg/TestFileScanTaskParser.java +++ b/core/src/test/java/org/apache/iceberg/TestFileScanTaskParser.java @@ -102,7 +102,7 @@ private String fileScanTaskJson() { + "\"schema\":{\"type\":\"struct\",\"schema-id\":0,\"fields\":[" + "{\"id\":3,\"name\":\"id\",\"required\":true,\"type\":\"int\"}," + "{\"id\":4,\"name\":\"data\",\"required\":true,\"type\":\"string\"}]}," - + "\"spec\":{\"spec-id\":0,\"fields\":[{\"name\":\"data_bucket\"," + + "\"spec\":{\"spec-id\":0,\"schema-id\":0,\"fields\":[{\"name\":\"data_bucket\"," + "\"transform\":\"bucket[16]\",\"source-id\":4,\"field-id\":1000}]}," + "\"data-file\":{\"spec-id\":0,\"content\":\"DATA\",\"file-path\":\"/path/to/data-a.parquet\"," + "\"file-format\":\"PARQUET\",\"partition\":{\"1000\":0}," diff --git a/core/src/test/java/org/apache/iceberg/TestFilesTableTaskParser.java b/core/src/test/java/org/apache/iceberg/TestFilesTableTaskParser.java index bea60601377e..e5868be7d26a 100644 --- a/core/src/test/java/org/apache/iceberg/TestFilesTableTaskParser.java +++ b/core/src/test/java/org/apache/iceberg/TestFilesTableTaskParser.java @@ -100,7 +100,7 @@ private String taskJson() { + "{\"id\":4,\"name\":\"data\",\"required\":true,\"type\":\"string\"}]}," + "\"file-io\":{\"io-impl\":\"org.apache.iceberg.hadoop.HadoopFileIO\"," + "\"properties\":{\"k1\":\"v1\",\"k2\":\"v2\"}}," - + "\"partition-specs\":[{\"spec-id\":0,\"fields\":[{" + + "\"partition-specs\":[{\"spec-id\":0,\"schema-id\":0,\"fields\":[{" + "\"name\":\"data_bucket\",\"transform\":\"bucket[16]\",\"source-id\":4,\"field-id\":1000}]}]," + "\"residual-filter\":{\"type\":\"eq\",\"term\":\"id\",\"value\":1}," + "\"manifest-file\":{\"path\":\"/path/input.m0.avro\"," diff --git a/core/src/test/java/org/apache/iceberg/TestPartitionSpecParser.java b/core/src/test/java/org/apache/iceberg/TestPartitionSpecParser.java index e97d2f98b416..f4d6e98f84d4 100644 --- a/core/src/test/java/org/apache/iceberg/TestPartitionSpecParser.java +++ b/core/src/test/java/org/apache/iceberg/TestPartitionSpecParser.java @@ -22,6 +22,7 @@ import java.util.Arrays; import java.util.List; +import org.apache.iceberg.types.Types; import org.junit.jupiter.api.TestTemplate; import org.junit.jupiter.api.extension.ExtendWith; @@ -37,6 +38,7 @@ public void testToJsonForV1Table() { String expected = "{\n" + " \"spec-id\" : 0,\n" + + " \"schema-id\" : 0,\n" + " \"fields\" : [ {\n" + " \"name\" : \"data_bucket\",\n" + " \"transform\" : \"bucket[16]\",\n" @@ -54,6 +56,7 @@ public void testToJsonForV1Table() { expected = "{\n" + " \"spec-id\" : 1,\n" + + " \"schema-id\" : 0,\n" + " \"fields\" : [ {\n" + " \"name\" : \"id_bucket\",\n" + " \"transform\" : \"bucket[8]\",\n" @@ -74,6 +77,7 @@ public void testFromJsonWithFieldId() { String specString = "{\n" + " \"spec-id\" : 1,\n" + + " \"schema-id\" : 0,\n" + " \"fields\" : [ {\n" + " \"name\" : \"id_bucket\",\n" + " \"transform\" : \"bucket[8]\",\n" @@ -100,6 +104,7 @@ public void testFromJsonWithoutFieldId() { String specString = "{\n" + " \"spec-id\" : 1,\n" + + " \"schema-id\" : 0,\n" + " \"fields\" : [ {\n" + " \"name\" : \"id_bucket\",\n" + " \"transform\" : \"bucket[8]\",\n" @@ -120,10 +125,78 @@ public void testFromJsonWithoutFieldId() { } @TestTemplate - public void testTransforms() { - for (PartitionSpec spec : PartitionSpecTestBase.SPECS) { - assertThat(roundTripJSON(spec)).isEqualTo(spec); - } + public void testFromJsonWithoutSchemaId() { + String specString = + "{\n" + + " \"spec-id\" : 1,\n" + + " \"fields\" : [ {\n" + + " \"name\" : \"id_bucket\",\n" + + " \"transform\" : \"bucket[8]\",\n" + + " \"source-id\" : 1,\n" + + " \"field-id\" : 1001\n" + + " }, {\n" + + " \"name\" : \"data_bucket\",\n" + + " \"transform\" : \"bucket[16]\",\n" + + " \"source-id\" : 2,\n" + + " \"field-id\" : 1000\n" + + " } ]\n" + + "}"; + + PartitionSpec spec = PartitionSpecParser.fromJson(table.schema(), specString); + + assertThat(spec.fields().size()).isEqualTo(2); + // should be the field ids in the JSON + assertThat(spec.fields().get(0).fieldId()).isEqualTo(1001); + assertThat(spec.fields().get(1).fieldId()).isEqualTo(1000); + } + + @TestTemplate + public void testForUpdateSchema() { + String expected = + "{\n" + + " \"spec-id\" : 0,\n" + + " \"schema-id\" : 0,\n" + + " \"fields\" : [ {\n" + + " \"name\" : \"data_bucket\",\n" + + " \"transform\" : \"bucket[16]\",\n" + + " \"source-id\" : 2,\n" + + " \"field-id\" : 1000\n" + + " } ]\n" + + "}"; + assertThat(PartitionSpecParser.toJson(table.spec(), true)).isEqualTo(expected); + + PartitionSpec spec = + PartitionSpec.builderFor(table.schema()).bucket("id", 8).bucket("data", 16).build(); + + table.ops().commit(table.ops().current(), table.ops().current().updatePartitionSpec(spec)); + Schema newSchema = + new Schema( + Types.NestedField.required(1, "id", Types.IntegerType.get()), + Types.NestedField.required(2, "data_renamed", Types.StringType.get())); + + table + .ops() + .commit( + table.ops().current(), + table.ops().current().updateSchema(newSchema, table.schema().highestFieldId())); + + expected = + "{\n" + + " \"spec-id\" : 1,\n" + + " \"schema-id\" : 1,\n" + + " \"fields\" : [ {\n" + + " \"name\" : \"id_bucket\",\n" + + " \"transform\" : \"bucket[8]\",\n" + + " \"source-id\" : 1,\n" + + " \"field-id\" : 1000\n" + + " }, {\n" + + " \"name\" : \"data_bucket\",\n" + + " \"transform\" : \"bucket[16]\",\n" + + " \"source-id\" : 2,\n" + + " \"field-id\" : 1001\n" + + " } ]\n" + + "}"; + assertThat(PartitionSpecParser.toJson(table.spec(), true)).isEqualTo(expected); } private static PartitionSpec roundTripJSON(PartitionSpec spec) { diff --git a/core/src/test/java/org/apache/iceberg/TestPartitioning.java b/core/src/test/java/org/apache/iceberg/TestPartitioning.java index a4df125f1de2..991ed63b142a 100644 --- a/core/src/test/java/org/apache/iceberg/TestPartitioning.java +++ b/core/src/test/java/org/apache/iceberg/TestPartitioning.java @@ -28,6 +28,7 @@ import java.nio.file.Path; import org.apache.iceberg.exceptions.ValidationException; import org.apache.iceberg.expressions.Expressions; +import org.apache.iceberg.transforms.Transforms; import org.apache.iceberg.types.Types; import org.apache.iceberg.types.Types.NestedField; import org.apache.iceberg.types.Types.StructType; @@ -422,12 +423,25 @@ public void testGroupingKeyTypeWithIncompatibleSpecEvolution() { } @Test - public void testDeletingPartitionField() { + public void testDeletingPartitionFieldV1() { TestTables.TestTable table = TestTables.create(tableDir, "test", SCHEMA, BY_DATA_SPEC, V1_FORMAT_VERSION); table.updateSpec().removeField("data").commit(); + assertThatThrownBy( + () -> table.updateSchema().deleteColumn("data").commit(), + "should throw validationError", + ValidationException.class); + } + + @Test + public void testDeletingPartitionFieldV2() { + TestTables.TestTable table = + TestTables.create(tableDir, "test", SCHEMA, BY_DATA_SPEC, V2_FORMAT_VERSION); + + table.updateSpec().removeField("data").commit(); + table.updateSchema().deleteColumn("data").commit(); table.updateSpec().addField("id").commit(); @@ -435,8 +449,7 @@ public void testDeletingPartitionField() { PartitionSpec spec = PartitionSpec.builderFor(SCHEMA) .withSpecId(2) - .alwaysNull("data", "data") - .identity("id") + .add(1, 1001, "id", Transforms.identity()) .build(); assertThat(table.spec()).isEqualTo(spec); diff --git a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java index 5ada35765773..4f2d3f10eca6 100644 --- a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java +++ b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java @@ -64,6 +64,7 @@ import org.apache.iceberg.transforms.Transforms; import org.apache.iceberg.types.Types; import org.apache.iceberg.util.JsonUtil; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; import org.junit.jupiter.params.ParameterizedTest; @@ -80,6 +81,8 @@ public class TestTableMetadata { Types.NestedField.required(2, "y", Types.LongType.get(), "comment"), Types.NestedField.required(3, "z", Types.LongType.get())); + private static final List TEST_SCHEMAS = Lists.newArrayList(); + private static final long SEQ_NO = 34; private static final int LAST_ASSIGNED_COLUMN_ID = 3; @@ -96,6 +99,15 @@ public class TestTableMetadata { public TableOperations ops = new LocalTableOperations(temp); + @BeforeAll + public static void constructSchemas() { + TEST_SCHEMAS.clear(); + for (int i = 0; i < 8; i++) { + TEST_SCHEMAS.add(new Schema(i, ImmutableList.of())); + } + TEST_SCHEMAS.set(7, TEST_SCHEMA); + } + @Test @SuppressWarnings("MethodLength") public void testJsonConversion() throws Exception { @@ -158,6 +170,8 @@ public void testJsonConversion() throws Exception { .path("/some/partition/stats/file.parquet") .fileSizeInBytes(42L) .build()); + List schemas = Lists.newArrayList(TEST_SCHEMAS); + schemas.set(6, schema); TableMetadata expected = new TableMetadata( @@ -169,7 +183,7 @@ public void testJsonConversion() throws Exception { System.currentTimeMillis(), 3, 7, - ImmutableList.of(TEST_SCHEMA, schema), + schemas, 5, ImmutableList.of(SPEC_5), SPEC_5.lastAssignedFieldId(), @@ -555,7 +569,7 @@ public void testJsonWithPreviousMetadataLog() throws Exception { System.currentTimeMillis(), 3, 7, - ImmutableList.of(TEST_SCHEMA), + TEST_SCHEMAS, 5, ImmutableList.of(SPEC_5), SPEC_5.lastAssignedFieldId(), @@ -1470,7 +1484,7 @@ private static Stream upgradeFormatVersionProvider() { @ParameterizedTest @MethodSource("upgradeFormatVersionProvider") - public void testReplaceMetadataThroughTableProperty(int baseFormatVersion, int newFormatVersion) { + void testReplaceMetadataThroughTableProperty(int baseFormatVersion, int newFormatVersion) { Schema schema = new Schema(Types.NestedField.required(10, "x", Types.StringType.get())); TableMetadata meta = @@ -1499,7 +1513,7 @@ public void testReplaceMetadataThroughTableProperty(int baseFormatVersion, int n @ParameterizedTest @MethodSource("upgradeFormatVersionProvider") - public void testUpgradeMetadataThroughTableProperty(int baseFormatVersion, int newFormatVersion) { + void testUpgradeMetadataThroughTableProperty(int baseFormatVersion, int newFormatVersion) { Schema schema = new Schema(Types.NestedField.required(10, "x", Types.StringType.get())); TableMetadata meta = diff --git a/core/src/test/java/org/apache/iceberg/rest/requests/TestCreateTableRequest.java b/core/src/test/java/org/apache/iceberg/rest/requests/TestCreateTableRequest.java index 0d4280c25a79..124ada8489da 100644 --- a/core/src/test/java/org/apache/iceberg/rest/requests/TestCreateTableRequest.java +++ b/core/src/test/java/org/apache/iceberg/rest/requests/TestCreateTableRequest.java @@ -61,7 +61,7 @@ public void testRoundTripSerDe() throws JsonProcessingException { "{\"name\":\"test_tbl\",\"location\":\"file://tmp/location/\",\"schema\":{\"type\":\"struct\"," + "\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"id\",\"required\":true,\"type\":\"int\"}," + "{\"id\":2,\"name\":\"data\",\"required\":false,\"type\":\"string\"}]},\"partition-spec\":{\"spec-id\":0," - + "\"fields\":[{\"name\":\"id_bucket\",\"transform\":\"bucket[16]\",\"source-id\":1,\"field-id\":1000}]}," + + "\"schema-id\":0,\"fields\":[{\"name\":\"id_bucket\",\"transform\":\"bucket[16]\",\"source-id\":1,\"field-id\":1000}]}," + "\"write-order\":{\"order-id\":1,\"fields\":" + "[{\"transform\":\"identity\",\"source-id\":2,\"direction\":\"asc\",\"null-order\":\"nulls-last\"}]}," + "\"properties\":{\"owner\":\"Hank\"},\"stage-create\":false}"; diff --git a/core/src/test/java/org/apache/iceberg/rest/responses/TestLoadTableResponseParser.java b/core/src/test/java/org/apache/iceberg/rest/responses/TestLoadTableResponseParser.java index cc6f4cfc74d7..362c239885fb 100644 --- a/core/src/test/java/org/apache/iceberg/rest/responses/TestLoadTableResponseParser.java +++ b/core/src/test/java/org/apache/iceberg/rest/responses/TestLoadTableResponseParser.java @@ -98,6 +98,7 @@ public void roundTripSerde() { + " \"default-spec-id\" : 0,\n" + " \"partition-specs\" : [ {\n" + " \"spec-id\" : 0,\n" + + " \"schema-id\" : 0,\n" + " \"fields\" : [ ]\n" + " } ],\n" + " \"last-partition-id\" : 999,\n" @@ -171,6 +172,7 @@ public void roundTripSerdeWithConfig() { + " \"default-spec-id\" : 0,\n" + " \"partition-specs\" : [ {\n" + " \"spec-id\" : 0,\n" + + " \"schema-id\" : 0,\n" + " \"fields\" : [ ]\n" + " } ],\n" + " \"last-partition-id\" : 999,\n" @@ -274,6 +276,7 @@ public void roundTripSerdeWithCredentials() { + " \"default-spec-id\" : 0,\n" + " \"partition-specs\" : [ {\n" + " \"spec-id\" : 0,\n" + + " \"schema-id\" : 0,\n" + " \"fields\" : [ ]\n" + " } ],\n" + " \"last-partition-id\" : 999,\n" diff --git a/format/spec.md b/format/spec.md index 601cbcc3bc4e..b218747afc29 100644 --- a/format/spec.md +++ b/format/spec.md @@ -1295,10 +1295,11 @@ Note that default values are serialized using the JSON single-value serializatio Partition specs are serialized as a JSON object with the following fields: -|Field|JSON representation|Example| -|--- |--- |--- | -|**`spec-id`**|`JSON int`|`0`| -|**`fields`**|`JSON list: [`
  `,`
  `...`
`]`|`[ {`
  `"source-id": 4,`
  `"field-id": 1000,`
  `"name": "ts_day",`
  `"transform": "day"`
`}, {`
  `"source-id": 1,`
  `"field-id": 1001,`
  `"name": "id_bucket",`
  `"transform": "bucket[16]"`
`} ]`| +| Field |JSON representation| Example | +|-----------------|--- |----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| **`spec-id`** |`JSON int`| `0` | +| **`schema-id`** |`JSON int`| `-1` | +| **`fields`** |`JSON list: [`
  `,`
  `...`
`]`| `[ {`
  `"source-id": 4,`
  `"field-id": 1000,`
  `"name": "ts_day",`
  `"transform": "day"`
`}, {`
  `"source-id": 1,`
  `"field-id": 1001,`
  `"name": "id_bucket",`
  `"transform": "bucket[16]"`
`} ]` | Each partition field in `fields` is stored as a JSON object with the following properties. diff --git a/spark/v3.3/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestAlterTablePartitionFields.java b/spark/v3.3/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestAlterTablePartitionFields.java index 2c109f1007d1..972c270fe919 100644 --- a/spark/v3.3/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestAlterTablePartitionFields.java +++ b/spark/v3.3/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestAlterTablePartitionFields.java @@ -19,11 +19,13 @@ package org.apache.iceberg.spark.extensions; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Table; import org.apache.iceberg.TableProperties; import org.apache.iceberg.TestHelpers; +import org.apache.iceberg.exceptions.ValidationException; import org.apache.iceberg.spark.SparkCatalogConfig; import org.apache.iceberg.spark.source.SparkTable; import org.apache.spark.sql.connector.catalog.CatalogManager; @@ -525,23 +527,38 @@ public void testSparkTableAddDropPartitions() throws Exception { public void testDropColumnOfOldPartitionFieldV1() { // default table created in v1 format sql( - "CREATE TABLE %s (id bigint NOT NULL, ts timestamp, day_of_ts date) USING iceberg PARTITIONED BY (day_of_ts) TBLPROPERTIES('format-version' = '1')", + "CREATE TABLE %s (id bigint NOT NULL, ts timestamp, day_of_ts date) USING iceberg PARTITIONED BY (day_of_ts)" + + "TBLPROPERTIES ('format-version' = '1')", tableName); + sql("INSERT INTO TABLE %s VALUES (1, TIMESTAMP '2024-10-23', DATE '2024-10-23')", tableName); + sql("ALTER TABLE %s REPLACE PARTITION FIELD day_of_ts WITH days(ts)", tableName); + sql("INSERT INTO TABLE %s VALUES (1, TIMESTAMP '2024-10-23', DATE '2024-10-23')", tableName); - sql("ALTER TABLE %s DROP COLUMN day_of_ts", tableName); + assertThatThrownBy( + () -> sql("ALTER TABLE %s DROP COLUMN day_of_ts", tableName), + "should throw validationError", + ValidationException.class); } @Test public void testDropColumnOfOldPartitionFieldV2() { + sql( - "CREATE TABLE %s (id bigint NOT NULL, ts timestamp, day_of_ts date) USING iceberg PARTITIONED BY (day_of_ts) TBLPROPERTIES('format-version' = '2')", + "CREATE TABLE %s (id bigint NOT NULL, ts timestamp, day_of_ts date) USING iceberg PARTITIONED BY (day_of_ts)", tableName); + sql("ALTER TABLE %s SET TBLPROPERTIES ('format-version' = '2');", tableName); + + sql("INSERT INTO TABLE %s VALUES (1, TIMESTAMP '2024-10-23', DATE '2024-10-23')", tableName); sql("ALTER TABLE %s REPLACE PARTITION FIELD day_of_ts WITH days(ts)", tableName); + sql("INSERT INTO TABLE %s VALUES (2, TIMESTAMP '2024-10-23', DATE '2024-10-23')", tableName); sql("ALTER TABLE %s DROP COLUMN day_of_ts", tableName); + sql("INSERT INTO TABLE %s VALUES (3, TIMESTAMP '2024-10-23')", tableName); + + assertThat(sql("SELECT * FROM %s", tableName)).as("rows should be equals").hasSize(3); } private void assertPartitioningEquals(SparkTable table, int len, String transform) { diff --git a/spark/v3.4/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestAlterTablePartitionFields.java b/spark/v3.4/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestAlterTablePartitionFields.java index 2c109f1007d1..972c270fe919 100644 --- a/spark/v3.4/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestAlterTablePartitionFields.java +++ b/spark/v3.4/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestAlterTablePartitionFields.java @@ -19,11 +19,13 @@ package org.apache.iceberg.spark.extensions; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Table; import org.apache.iceberg.TableProperties; import org.apache.iceberg.TestHelpers; +import org.apache.iceberg.exceptions.ValidationException; import org.apache.iceberg.spark.SparkCatalogConfig; import org.apache.iceberg.spark.source.SparkTable; import org.apache.spark.sql.connector.catalog.CatalogManager; @@ -525,23 +527,38 @@ public void testSparkTableAddDropPartitions() throws Exception { public void testDropColumnOfOldPartitionFieldV1() { // default table created in v1 format sql( - "CREATE TABLE %s (id bigint NOT NULL, ts timestamp, day_of_ts date) USING iceberg PARTITIONED BY (day_of_ts) TBLPROPERTIES('format-version' = '1')", + "CREATE TABLE %s (id bigint NOT NULL, ts timestamp, day_of_ts date) USING iceberg PARTITIONED BY (day_of_ts)" + + "TBLPROPERTIES ('format-version' = '1')", tableName); + sql("INSERT INTO TABLE %s VALUES (1, TIMESTAMP '2024-10-23', DATE '2024-10-23')", tableName); + sql("ALTER TABLE %s REPLACE PARTITION FIELD day_of_ts WITH days(ts)", tableName); + sql("INSERT INTO TABLE %s VALUES (1, TIMESTAMP '2024-10-23', DATE '2024-10-23')", tableName); - sql("ALTER TABLE %s DROP COLUMN day_of_ts", tableName); + assertThatThrownBy( + () -> sql("ALTER TABLE %s DROP COLUMN day_of_ts", tableName), + "should throw validationError", + ValidationException.class); } @Test public void testDropColumnOfOldPartitionFieldV2() { + sql( - "CREATE TABLE %s (id bigint NOT NULL, ts timestamp, day_of_ts date) USING iceberg PARTITIONED BY (day_of_ts) TBLPROPERTIES('format-version' = '2')", + "CREATE TABLE %s (id bigint NOT NULL, ts timestamp, day_of_ts date) USING iceberg PARTITIONED BY (day_of_ts)", tableName); + sql("ALTER TABLE %s SET TBLPROPERTIES ('format-version' = '2');", tableName); + + sql("INSERT INTO TABLE %s VALUES (1, TIMESTAMP '2024-10-23', DATE '2024-10-23')", tableName); sql("ALTER TABLE %s REPLACE PARTITION FIELD day_of_ts WITH days(ts)", tableName); + sql("INSERT INTO TABLE %s VALUES (2, TIMESTAMP '2024-10-23', DATE '2024-10-23')", tableName); sql("ALTER TABLE %s DROP COLUMN day_of_ts", tableName); + sql("INSERT INTO TABLE %s VALUES (3, TIMESTAMP '2024-10-23')", tableName); + + assertThat(sql("SELECT * FROM %s", tableName)).as("rows should be equals").hasSize(3); } private void assertPartitioningEquals(SparkTable table, int len, String transform) { diff --git a/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestAlterTablePartitionFields.java b/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestAlterTablePartitionFields.java index 38e5c942c9ff..afbb93ed1bce 100644 --- a/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestAlterTablePartitionFields.java +++ b/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestAlterTablePartitionFields.java @@ -19,6 +19,7 @@ package org.apache.iceberg.spark.extensions; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import org.apache.iceberg.Parameter; import org.apache.iceberg.ParameterizedTestExtension; @@ -27,6 +28,7 @@ import org.apache.iceberg.Table; import org.apache.iceberg.TableProperties; import org.apache.iceberg.TestHelpers; +import org.apache.iceberg.exceptions.ValidationException; import org.apache.iceberg.spark.SparkCatalogConfig; import org.apache.iceberg.spark.source.SparkTable; import org.apache.spark.sql.connector.catalog.CatalogManager; @@ -533,23 +535,38 @@ public void testSparkTableAddDropPartitions() throws Exception { public void testDropColumnOfOldPartitionFieldV1() { // default table created in v1 format sql( - "CREATE TABLE %s (id bigint NOT NULL, ts timestamp, day_of_ts date) USING iceberg PARTITIONED BY (day_of_ts) TBLPROPERTIES('format-version' = '1')", + "CREATE TABLE %s (id bigint NOT NULL, ts timestamp, day_of_ts date) USING iceberg PARTITIONED BY (day_of_ts)" + + "TBLPROPERTIES ('format-version' = '1')", tableName); + sql("INSERT INTO TABLE %s VALUES (1, TIMESTAMP '2024-10-23', DATE '2024-10-23')", tableName); + sql("ALTER TABLE %s REPLACE PARTITION FIELD day_of_ts WITH days(ts)", tableName); + sql("INSERT INTO TABLE %s VALUES (1, TIMESTAMP '2024-10-23', DATE '2024-10-23')", tableName); - sql("ALTER TABLE %s DROP COLUMN day_of_ts", tableName); + assertThatThrownBy( + () -> sql("ALTER TABLE %s DROP COLUMN day_of_ts", tableName), + "should throw validationError", + ValidationException.class); } @TestTemplate public void testDropColumnOfOldPartitionFieldV2() { + sql( - "CREATE TABLE %s (id bigint NOT NULL, ts timestamp, day_of_ts date) USING iceberg PARTITIONED BY (day_of_ts) TBLPROPERTIES('format-version' = '2')", + "CREATE TABLE %s (id bigint NOT NULL, ts timestamp, day_of_ts date) USING iceberg PARTITIONED BY (day_of_ts)", tableName); + sql("ALTER TABLE %s SET TBLPROPERTIES ('format-version' = '2');", tableName); + + sql("INSERT INTO TABLE %s VALUES (1, TIMESTAMP '2024-10-23', DATE '2024-10-23')", tableName); sql("ALTER TABLE %s REPLACE PARTITION FIELD day_of_ts WITH days(ts)", tableName); + sql("INSERT INTO TABLE %s VALUES (2, TIMESTAMP '2024-10-23', DATE '2024-10-23')", tableName); sql("ALTER TABLE %s DROP COLUMN day_of_ts", tableName); + sql("INSERT INTO TABLE %s VALUES (3, TIMESTAMP '2024-10-23')", tableName); + + assertThat(sql("SELECT * FROM %s", tableName)).as("rows should be equals").hasSize(3); } private void assertPartitioningEquals(SparkTable table, int len, String transform) {