Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[feat](nereids) adjust min/max for partition key #41729 branch-2.1 #45713

Merged
merged 1 commit into from
Dec 22, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,15 @@
package org.apache.doris.nereids.stats;

import org.apache.doris.analysis.IntLiteral;
import org.apache.doris.analysis.LiteralExpr;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.ListPartitionItem;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.PartitionItem;
import org.apache.doris.catalog.PartitionKey;
import org.apache.doris.catalog.PartitionType;
import org.apache.doris.catalog.RangePartitionItem;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.common.FeConstants;
import org.apache.doris.common.Pair;
Expand Down Expand Up @@ -136,11 +142,13 @@
import org.apache.doris.statistics.Statistics;
import org.apache.doris.statistics.StatisticsBuilder;
import org.apache.doris.statistics.TableStatsMeta;
import org.apache.doris.statistics.util.StatisticsUtil;

import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Range;
import org.apache.commons.collections.CollectionUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
Expand Down Expand Up @@ -899,6 +907,155 @@ private double getTableRowCount(CatalogRelation relation) {
}
}

/**
* Determine whether it is a partition key inside the function.
*/
private ColumnStatistic updateMinMaxForPartitionKey(OlapTable olapTable,
List<String> selectedPartitionNames,
SlotReference slot, ColumnStatistic cache) {
if (olapTable.getPartitionType() == PartitionType.LIST) {
cache = updateMinMaxForListPartitionKey(olapTable, selectedPartitionNames, slot, cache);
} else if (olapTable.getPartitionType() == PartitionType.RANGE) {
cache = updateMinMaxForTheFirstRangePartitionKey(olapTable, selectedPartitionNames, slot, cache);
}
return cache;
}

private double convertLegacyLiteralToDouble(LiteralExpr literal) throws org.apache.doris.common.AnalysisException {
return StatisticsUtil.convertToDouble(literal.getType(), literal.getStringValue());
}

private ColumnStatistic updateMinMaxForListPartitionKey(OlapTable olapTable,
List<String> selectedPartitionNames,
SlotReference slot, ColumnStatistic cache) {
int partitionColumnIdx = olapTable.getPartitionColumns().indexOf(slot.getColumn().get());
if (partitionColumnIdx != -1) {
try {
LiteralExpr minExpr = null;
LiteralExpr maxExpr = null;
double minValue = 0;
double maxValue = 0;
for (String selectedPartitionName : selectedPartitionNames) {
PartitionItem item = olapTable.getPartitionItemOrAnalysisException(
selectedPartitionName);
if (item instanceof ListPartitionItem) {
ListPartitionItem lp = (ListPartitionItem) item;
for (PartitionKey key : lp.getItems()) {
if (minExpr == null) {
minExpr = key.getKeys().get(partitionColumnIdx);
minValue = convertLegacyLiteralToDouble(minExpr);
maxExpr = key.getKeys().get(partitionColumnIdx);
maxValue = convertLegacyLiteralToDouble(maxExpr);
} else {
double current = convertLegacyLiteralToDouble(key.getKeys().get(partitionColumnIdx));
if (current > maxValue) {
maxValue = current;
maxExpr = key.getKeys().get(partitionColumnIdx);
} else if (current < minValue) {
minValue = current;
minExpr = key.getKeys().get(partitionColumnIdx);
}
}
}
}
}
if (minExpr != null) {
cache = updateMinMax(cache, minValue, minExpr, maxValue, maxExpr);
}
} catch (org.apache.doris.common.AnalysisException e) {
LOG.debug(e.getMessage());
}
}
return cache;
}

private ColumnStatistic updateMinMaxForTheFirstRangePartitionKey(OlapTable olapTable,
List<String> selectedPartitionNames,
SlotReference slot, ColumnStatistic cache) {
int partitionColumnIdx = olapTable.getPartitionColumns().indexOf(slot.getColumn().get());
// for multi partition keys, only the first partition key need to adjust min/max
if (partitionColumnIdx == 0) {
// update partition column min/max by partition info
try {
LiteralExpr minExpr = null;
LiteralExpr maxExpr = null;
double minValue = 0;
double maxValue = 0;
for (String selectedPartitionName : selectedPartitionNames) {
PartitionItem item = olapTable.getPartitionItemOrAnalysisException(
selectedPartitionName);
if (item instanceof RangePartitionItem) {
RangePartitionItem ri = (RangePartitionItem) item;
Range<PartitionKey> range = ri.getItems();
PartitionKey upper = range.upperEndpoint();
PartitionKey lower = range.lowerEndpoint();
if (maxExpr == null) {
maxExpr = upper.getKeys().get(partitionColumnIdx);
maxValue = convertLegacyLiteralToDouble(maxExpr);
minExpr = lower.getKeys().get(partitionColumnIdx);
minValue = convertLegacyLiteralToDouble(minExpr);
} else {
double currentValue = convertLegacyLiteralToDouble(upper.getKeys()
.get(partitionColumnIdx));
if (currentValue > maxValue) {
maxValue = currentValue;
maxExpr = upper.getKeys().get(partitionColumnIdx);
}
currentValue = convertLegacyLiteralToDouble(lower.getKeys().get(partitionColumnIdx));
if (currentValue < minValue) {
minValue = currentValue;
minExpr = lower.getKeys().get(partitionColumnIdx);
}
}
}
}
if (minExpr != null) {
cache = updateMinMax(cache, minValue, minExpr, maxValue, maxExpr);
}
} catch (org.apache.doris.common.AnalysisException e) {
LOG.debug(e.getMessage());
}
}
return cache;
}

private ColumnStatistic updateMinMax(ColumnStatistic cache, double minValue, LiteralExpr minExpr,
double maxValue, LiteralExpr maxExpr) {
boolean shouldUpdateCache = false;
if (!cache.isUnKnown) {
// merge the min/max with cache.
// example: min/max range in cache is [10-20]
// range from partition def is [15-30]
// the final range is [15-20]
if (cache.minValue > minValue) {
minValue = cache.minValue;
minExpr = cache.minExpr;
} else {
shouldUpdateCache = true;
}
if (cache.maxValue < maxValue) {
maxValue = cache.maxValue;
maxExpr = cache.maxExpr;
} else {
shouldUpdateCache = true;
}
// if min/max is invalid, do not update cache
if (minValue > maxValue) {
shouldUpdateCache = false;
}
}

if (shouldUpdateCache) {
cache = new ColumnStatisticBuilder(cache)
.setMinExpr(minExpr)
.setMinValue(minValue)
.setMaxExpr(maxExpr)
.setMaxValue(maxValue)
.build();
}
return cache;
}

// TODO: 1. Subtract the pruned partition
// 2. Consider the influence of runtime filter
// 3. Get NDV and column data size from StatisticManger, StatisticManager doesn't support it now.
Expand Down Expand Up @@ -944,6 +1101,18 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) {
LOG.debug("{} is partially analyzed, clear min/max values in column stats",
catalogRelation.getTable().getName());
}
List<String> selectedPartitionNames = null;
if (catalogRelation instanceof OlapScan) {
OlapScan olapScan = (OlapScan) catalogRelation;
if (olapScan.getSelectedPartitionIds().size() < olapScan.getTable().getPartitionNum()) {
// partition pruned
// try to use selected partition stats, if failed, fall back to table stats
selectedPartitionNames = new ArrayList<>(olapScan.getSelectedPartitionIds().size());
for (Long id : olapScan.getSelectedPartitionIds()) {
selectedPartitionNames.add(olapScan.getTable().getPartition(id).getName());
}
}
}
for (SlotReference slotReference : slotSet) {
String colName = slotReference.getColumn().isPresent()
? slotReference.getColumn().get().getName()
Expand All @@ -959,6 +1128,11 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) {
cache = ColumnStatistic.UNKNOWN;
} else {
cache = getColumnStatistic(table, colName, idxId);
if (table instanceof OlapTable && slotReference.getColumn().isPresent()
&& selectedPartitionNames != null) {
cache = updateMinMaxForPartitionKey((OlapTable) table,
selectedPartitionNames, slotReference, cache);
}
}
ColumnStatisticBuilder colStatsBuilder = new ColumnStatisticBuilder(cache);
colStatsBuilder.normalizeAvgSizeByte(slotReference);
Expand Down
Loading