-
Notifications
You must be signed in to change notification settings - Fork 134
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add min max index with IT Signed-off-by: Chen Dai <daichen@amazon.com> * Update user doc Signed-off-by: Chen Dai <daichen@amazon.com> * Fix output schema issue and IT Signed-off-by: Chen Dai <daichen@amazon.com> --------- Signed-off-by: Chen Dai <daichen@amazon.com>
- Loading branch information
Showing
6 changed files
with
106 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
42 changes: 42 additions & 0 deletions
42
...on/src/main/scala/org/opensearch/flint/spark/skipping/minmax/MinMaxSkippingStrategy.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.flint.spark.skipping.minmax | ||
|
||
import org.opensearch.flint.spark.skipping.FlintSparkSkippingStrategy | ||
import org.opensearch.flint.spark.skipping.FlintSparkSkippingStrategy.SkippingKind.{MinMax, SkippingKind} | ||
|
||
import org.apache.spark.sql.Column | ||
import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, Literal, Predicate} | ||
import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateFunction, Max, Min} | ||
import org.apache.spark.sql.functions.col | ||
|
||
/** | ||
* Skipping strategy based on min-max boundary of column values. | ||
*/ | ||
case class MinMaxSkippingStrategy( | ||
override val kind: SkippingKind = MinMax, | ||
override val columnName: String, | ||
override val columnType: String) | ||
extends FlintSparkSkippingStrategy { | ||
|
||
/** Column name in Flint index data. */ | ||
private def minColName = s"MinMax_${columnName}_0" | ||
private def maxColName = s"MinMax_${columnName}_1" | ||
|
||
override def outputSchema(): Map[String, String] = | ||
Map(minColName -> columnType, maxColName -> columnType) | ||
|
||
override def getAggregators: Seq[AggregateFunction] = | ||
Seq(Min(col(columnName).expr), Max(col(columnName).expr)) | ||
|
||
override def rewritePredicate(predicate: Predicate): Option[Predicate] = | ||
predicate.collect { case EqualTo(AttributeReference(`columnName`, _, _, _), value: Literal) => | ||
rewriteTo(col(minColName) <= value && col(maxColName) >= value) | ||
}.headOption | ||
|
||
// Convert a column to predicate | ||
private def rewriteTo(col: Column): Predicate = col.expr.asInstanceOf[Predicate] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters