Skip to content

Commit

Permalink
Fix bug Delta column operations erases its properties on uppercase names
Browse files Browse the repository at this point in the history
Also, show column properties correctly when column name contains
uppercase characters and stores the exact column name
in column statistics.

Co-Authored-By: Slawomir Pajak <slawomir.pajak@starburstdata.com>
  • Loading branch information
ebyhr and pajaks committed Aug 7, 2023
1 parent 6728ac8 commit df9140a
Show file tree
Hide file tree
Showing 21 changed files with 437 additions and 96 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@
import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.getParquetWriterBlockSize;
import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.getParquetWriterPageSize;
import static io.trino.plugin.deltalake.DeltaLakeTypes.toParquetType;
import static io.trino.plugin.deltalake.transactionlog.TransactionLogAccess.canonicalizeColumnName;
import static io.trino.plugin.hive.util.HiveUtil.escapePathName;
import static java.lang.Math.min;
import static java.lang.String.format;
Expand Down Expand Up @@ -146,21 +145,18 @@ public AbstractDeltaLakePageSink(
ImmutableList.Builder<Type> dataColumnTypes = ImmutableList.builder();
ImmutableList.Builder<String> dataColumnNames = ImmutableList.builder();

Map<String, String> canonicalToOriginalPartitionColumns = new HashMap<>();
Map<String, Integer> canonicalToOriginalPartitionPositions = new HashMap<>();
Map<String, Integer> toOriginalPartitionPositions = new HashMap<>();
int partitionColumnPosition = 0;
for (String partitionColumnName : originalPartitionColumns) {
String canonicalizeColumnName = canonicalizeColumnName(partitionColumnName);
canonicalToOriginalPartitionColumns.put(canonicalizeColumnName, partitionColumnName);
canonicalToOriginalPartitionPositions.put(canonicalizeColumnName, partitionColumnPosition++);
toOriginalPartitionPositions.put(partitionColumnName, partitionColumnPosition++);
}
for (int inputIndex = 0; inputIndex < inputColumns.size(); inputIndex++) {
DeltaLakeColumnHandle column = inputColumns.get(inputIndex);
switch (column.getColumnType()) {
case PARTITION_KEY:
int partitionPosition = canonicalToOriginalPartitionPositions.get(column.getColumnName());
int partitionPosition = toOriginalPartitionPositions.get(column.getColumnName());
partitionColumnInputIndex[partitionPosition] = inputIndex;
originalPartitionColumnNames[partitionPosition] = canonicalToOriginalPartitionColumns.get(column.getColumnName());
originalPartitionColumnNames[partitionPosition] = column.getColumnName();
partitionColumnTypes[partitionPosition] = column.getBaseType();
break;
case REGULAR:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,22 @@
import java.util.OptionalInt;

import static com.google.common.base.MoreObjects.toStringHelper;
import static java.util.Locale.ENGLISH;
import static java.util.Objects.requireNonNull;

public class DeltaLakeColumnMetadata
{
private final ColumnMetadata columnMetadata;
private final String name;
private final OptionalInt fieldId;
private final String physicalName;
private final Type physicalColumnType;

public DeltaLakeColumnMetadata(ColumnMetadata columnMetadata, OptionalInt fieldId, String physicalName, Type physicalColumnType)
public DeltaLakeColumnMetadata(ColumnMetadata columnMetadata, String name, OptionalInt fieldId, String physicalName, Type physicalColumnType)
{
this.columnMetadata = requireNonNull(columnMetadata, "columnMetadata is null");
this.name = requireNonNull(name, "name is null");
this.fieldId = requireNonNull(fieldId, "fieldId is null");
this.physicalName = physicalName.toLowerCase(ENGLISH);
this.physicalName = requireNonNull(physicalName, "physicalName is null");
this.physicalColumnType = requireNonNull(physicalColumnType, "physicalColumnType is null");
}

Expand All @@ -50,7 +51,7 @@ public OptionalInt getFieldId()

public String getName()
{
return columnMetadata.getName();
return name;
}

public Type getType()
Expand All @@ -73,6 +74,7 @@ public String toString()
{
return toStringHelper(this)
.add("columnMetadata", columnMetadata)
.add("name", name)
.add("fieldId", fieldId)
.add("physicalName", physicalName)
.add("physicalColumnType", physicalColumnType)
Expand All @@ -90,6 +92,7 @@ public boolean equals(Object o)
}
DeltaLakeColumnMetadata that = (DeltaLakeColumnMetadata) o;
return Objects.equals(columnMetadata, that.columnMetadata) &&
Objects.equals(name, that.name) &&
Objects.equals(fieldId, that.fieldId) &&
Objects.equals(physicalName, that.physicalName) &&
Objects.equals(physicalColumnType, that.physicalColumnType);
Expand All @@ -98,6 +101,6 @@ public boolean equals(Object o)
@Override
public int hashCode()
{
return Objects.hash(columnMetadata, fieldId, physicalName, physicalColumnType);
return Objects.hash(columnMetadata, name, fieldId, physicalName, physicalColumnType);
}
}
Loading

0 comments on commit df9140a

Please sign in to comment.