Skip to content

Commit

Permalink
Add support for custom date format and openSearch date format for dat…
Browse files Browse the repository at this point in the history
…e fields as part of Lucene query

Github Issue - #2700

Signed-off-by: Manasvini B S <manasvis@amazon.com>
  • Loading branch information
manasvinibs committed Jul 17, 2024
1 parent 0c2e1da commit af13fe5
Show file tree
Hide file tree
Showing 17 changed files with 632 additions and 72 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -62,19 +62,23 @@ public String toString() {
@EqualsAndHashCode.Exclude @Getter protected MappingType mappingType;

// resolved ExprCoreType
protected ExprCoreType exprCoreType;
@Getter protected ExprCoreType exprCoreType;

/**
* Get a simplified type {@link ExprCoreType} if possible. To avoid returning `UNKNOWN` for
* `OpenSearch*Type`s, e.g. for IP, returns itself.
* `OpenSearch*Type`s, e.g. for IP, returns itself. If the `exprCoreType` is {@link
* ExprCoreType#DATE}, {@link ExprCoreType#TIMESTAMP}, {@link ExprCoreType#TIME}, or {@link
* ExprCoreType#UNKNOWN}, it returns the current instance; otherwise, it returns `exprCoreType`.
*
* @return An {@link ExprType}.
*/
public ExprType getExprType() {
if (exprCoreType != ExprCoreType.UNKNOWN) {
return exprCoreType;
}
return this;
return (exprCoreType == ExprCoreType.DATE
|| exprCoreType == ExprCoreType.TIMESTAMP
|| exprCoreType == ExprCoreType.TIME
|| exprCoreType == ExprCoreType.UNKNOWN)
? this
: exprCoreType;
}

/**
Expand Down Expand Up @@ -230,6 +234,9 @@ public String legacyTypeName() {
if (mappingType == null) {
return exprCoreType.typeName();
}
if (mappingType.toString().equalsIgnoreCase("DATE")) {
return exprCoreType.typeName();
}
return mappingType.toString().toUpperCase();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,17 @@
import static org.opensearch.sql.data.type.ExprCoreType.TIME;
import static org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP;

import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.time.temporal.TemporalAccessor;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import lombok.EqualsAndHashCode;
import org.opensearch.common.time.DateFormatter;
import org.opensearch.common.time.DateFormatters;
import org.opensearch.common.time.FormatNames;
import org.opensearch.sql.data.type.ExprCoreType;
import org.opensearch.sql.data.type.ExprType;
Expand Down Expand Up @@ -137,6 +143,9 @@ public class OpenSearchDateType extends OpenSearchDataType {

private static final String CUSTOM_FORMAT_DATE_SYMBOLS = "FecEWwYqQgdMLDyuG";

private static final List<String> OPENSEARCH_DEFAULT_FORMATS =
Arrays.asList("strict_date_time_no_millis", "strict_date_optional_time", "epoch_millis");

@EqualsAndHashCode.Exclude private final List<String> formats;

private OpenSearchDateType() {
Expand Down Expand Up @@ -235,6 +244,71 @@ public List<DateFormatter> getAllCustomFormatters() {
.collect(Collectors.toList());
}

/**
* Retrieves a list of custom formatters and OpenSearch named formatters defined by the user, and
* attempts to parse the given date/time string using these formatters.
*
* @param dateTime The date/time string to parse.
* @return A ZonedDateTime representing the parsed date/time in UTC, or null if parsing fails.
*/
public ZonedDateTime getParsedDateTime(String dateTime) {
List<DateFormatter> dateFormatters = this.getAllNamedFormatters();
dateFormatters.addAll(this.getAllCustomFormatters());
ZonedDateTime zonedDateTime = null;

// check if dateFormatters are empty, then set default ones
if (dateFormatters.isEmpty()) {
dateFormatters = initializeDateFormatters();
}
// parse using OpenSearch DateFormatters
for (DateFormatter formatter : dateFormatters) {
try {
TemporalAccessor accessor = formatter.parse(dateTime);
zonedDateTime = DateFormatters.from(accessor).withZoneSameLocal(ZoneOffset.UTC);
break;
} catch (IllegalArgumentException ignored) {
// nothing to do, try another format
}
}
return zonedDateTime;
}

/**
* Returns a formatted date string using the internal formatter, if available.
*
* @param accessor The TemporalAccessor object containing the date/time information.
* @return A formatted date string if a formatter is available, otherwise null.
*/
public String getFormattedDate(TemporalAccessor accessor) {
if (hasNoFormatter()) {
return DateFormatter.forPattern(OPENSEARCH_DEFAULT_FORMATS.get(0)).format(accessor);
}
// Use the first available format string to create the formatter
return DateFormatter.forPattern(this.formats.get(0)).format(accessor);
}

/**
* Checks if the formatter is not initialized.
*
* @return True if the formatter is not set, otherwise false.
*/
public boolean hasNoFormatter() {
return this.formats.isEmpty();
}

/**
* Initializes and returns a list of default OpenSearch date formatters.
*
* @return A list of DateFormatter objects initialized with default patterns.
*/
private static List<DateFormatter> initializeDateFormatters() {
List<DateFormatter> dateFormatters = new ArrayList<>();
for (String pattern : OPENSEARCH_DEFAULT_FORMATS) {
dateFormatters.add(DateFormatter.forPattern(pattern));
}
return dateFormatters;
}

/**
* Retrieves a list of named formatters that format for dates.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ private Optional<ExprType> type(String field) {
private static ExprValue parseDateTimeString(String value, OpenSearchDateType dataType) {
List<DateFormatter> formatters = dataType.getAllNamedFormatters();
formatters.addAll(dataType.getAllCustomFormatters());
ExprCoreType returnFormat = (ExprCoreType) dataType.getExprType();
ExprCoreType returnFormat = dataType.getExprCoreType();

for (DateFormatter formatter : formatters) {
try {
Expand Down Expand Up @@ -273,8 +273,7 @@ private static ExprValue parseDateTimeString(String value, OpenSearchDateType da

private static ExprValue createOpenSearchDateType(Content value, ExprType type) {
OpenSearchDateType dt = (OpenSearchDateType) type;
ExprType returnFormat = dt.getExprType();

ExprCoreType returnFormat = dt.getExprCoreType();
if (value.isNumber()) { // isNumber
var numFormatters = dt.getNumericNamedFormatters();
if (numFormatters.size() > 0 || !dt.hasFormats()) {
Expand All @@ -287,7 +286,7 @@ private static ExprValue createOpenSearchDateType(Content value, ExprType type)
epochMillis = value.longValue();
}
Instant instant = Instant.ofEpochMilli(epochMillis);
switch ((ExprCoreType) returnFormat) {
switch (returnFormat) {
case TIME:
return new ExprTimeValue(LocalTime.from(instant.atZone(ZoneOffset.UTC)));
case DATE:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.opensearch.sql.ast.expression.SpanUnit;
import org.opensearch.sql.expression.NamedExpression;
import org.opensearch.sql.expression.span.SpanExpression;
import org.opensearch.sql.opensearch.data.type.OpenSearchDateType;
import org.opensearch.sql.opensearch.storage.serialization.ExpressionSerializer;

/** Bucket Aggregation Builder. */
Expand Down Expand Up @@ -65,7 +66,10 @@ private CompositeValuesSourceBuilder<?> buildCompositeValuesSourceBuilder(
.missingOrder(missingOrder)
.order(sortOrder);
// Time types values are converted to LONG in ExpressionAggregationScript::execute
if (List.of(TIMESTAMP, TIME, DATE).contains(expr.getDelegated().type())) {
if ((expr.getDelegated().type() instanceof OpenSearchDateType
&& List.of(TIMESTAMP, TIME, DATE)
.contains(((OpenSearchDateType) expr.getDelegated().type()).getExprCoreType()))
|| List.of(TIMESTAMP, TIME, DATE).contains(expr.getDelegated().type())) {
sourceBuilder.userValuetypeHint(ValueType.LONG);
}
return helper.build(expr.getDelegated(), sourceBuilder::field, sourceBuilder::script);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import static org.opensearch.sql.analysis.NestedAnalyzer.isNestedFunction;

import com.google.common.collect.ImmutableMap;
import java.time.ZonedDateTime;
import java.util.Map;
import java.util.function.Function;
import org.opensearch.index.query.QueryBuilder;
Expand All @@ -32,10 +33,13 @@
import org.opensearch.sql.expression.ReferenceExpression;
import org.opensearch.sql.expression.function.BuiltinFunctionName;
import org.opensearch.sql.expression.function.FunctionName;
import org.opensearch.sql.opensearch.data.type.OpenSearchDateType;

/** Lucene query abstraction that builds Lucene query from function expression. */
public abstract class LuceneQuery {

private ReferenceExpression ref;

/**
* Check if function expression supported by current Lucene query. Default behavior is that report
* supported if:
Expand Down Expand Up @@ -102,10 +106,11 @@ private boolean literalExpressionWrappedByCast(FunctionExpression func) {
* @return query
*/
public QueryBuilder build(FunctionExpression func) {
ReferenceExpression ref = (ReferenceExpression) func.getArguments().get(0);
this.ref = (ReferenceExpression) func.getArguments().get(0);
Expression expr = func.getArguments().get(1);
ExprValue literalValue =
expr instanceof LiteralExpression ? expr.valueOf() : cast((FunctionExpression) expr);

return doBuild(ref.getAttr(), ref.type(), literalValue);
}

Expand All @@ -120,7 +125,7 @@ private ExprValue cast(FunctionExpression castFunction) {
ImmutableMap.<FunctionName, Function<LiteralExpression, ExprValue>>builder()
.put(
BuiltinFunctionName.CAST_TO_STRING.getName(),
expr -> {
(expr) -> {
if (!expr.type().equals(ExprCoreType.STRING)) {
return new ExprStringValue(String.valueOf(expr.valueOf().value()));
} else {
Expand Down Expand Up @@ -209,7 +214,10 @@ private ExprValue cast(FunctionExpression castFunction) {
.put(
BuiltinFunctionName.CAST_TO_DATE.getName(),
expr -> {
if (expr.type().equals(ExprCoreType.STRING)) {
ZonedDateTime zonedDateTime = getParsedDateTime(expr);
if (zonedDateTime != null) {
return new ExprDateValue(zonedDateTime.toLocalDate());
} else if (expr.type().equals(ExprCoreType.STRING)) {
return new ExprDateValue(expr.valueOf().stringValue());
} else {
return new ExprDateValue(expr.valueOf().dateValue());
Expand All @@ -218,7 +226,10 @@ private ExprValue cast(FunctionExpression castFunction) {
.put(
BuiltinFunctionName.CAST_TO_TIME.getName(),
expr -> {
if (expr.type().equals(ExprCoreType.STRING)) {
ZonedDateTime zonedDateTime = getParsedDateTime(expr);
if (zonedDateTime != null) {
return new ExprTimeValue(zonedDateTime.toLocalTime());
} else if (expr.type().equals(ExprCoreType.STRING)) {
return new ExprTimeValue(expr.valueOf().stringValue());
} else {
return new ExprTimeValue(expr.valueOf().timeValue());
Expand All @@ -227,14 +238,31 @@ private ExprValue cast(FunctionExpression castFunction) {
.put(
BuiltinFunctionName.CAST_TO_TIMESTAMP.getName(),
expr -> {
if (expr.type().equals(ExprCoreType.STRING)) {
ZonedDateTime zonedDateTime = getParsedDateTime(expr);
if (zonedDateTime != null) {
return new ExprTimestampValue(zonedDateTime.toInstant());
} else if (expr.type().equals(ExprCoreType.STRING)) {
return new ExprTimestampValue(expr.valueOf().stringValue());
} else {
return new ExprTimestampValue(expr.valueOf().timestampValue());
}
})
.build();

/**
* Parses the date/time from the given expression if it is of string type and the reference type
* is an instance of OpenSearchDateType.
*
* @param expr The expression to parse.
* @return The parsed ZonedDateTime or null if the conditions are not met.
*/
private ZonedDateTime getParsedDateTime(LiteralExpression expr) {
if (expr.type().equals(ExprCoreType.STRING) && this.ref.type() instanceof OpenSearchDateType) {
return ((OpenSearchDateType) this.ref.type()).getParsedDateTime(expr.valueOf().stringValue());
}
return null;
}

/**
* Build method that subclass implements by default which is to build query from reference and
* literal in function arguments.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.opensearch.sql.data.model.ExprValue;
import org.opensearch.sql.data.type.ExprCoreType;
import org.opensearch.sql.data.type.ExprType;
import org.opensearch.sql.opensearch.data.type.OpenSearchDateType;

/** Lucene query that builds range query for non-quality comparison. */
@RequiredArgsConstructor
Expand All @@ -30,7 +31,8 @@ public enum Comparison {

@Override
protected QueryBuilder doBuild(String fieldName, ExprType fieldType, ExprValue literal) {
Object value = value(literal);

Object value = value(literal, fieldType);

RangeQueryBuilder query = QueryBuilders.rangeQuery(fieldName);
switch (comparison) {
Expand All @@ -47,11 +49,23 @@ protected QueryBuilder doBuild(String fieldName, ExprType fieldType, ExprValue l
}
}

private Object value(ExprValue literal) {
if (literal.type().equals(ExprCoreType.TIMESTAMP)) {
return literal.timestampValue().toEpochMilli();
} else {
return literal.value();
private Object value(ExprValue literal, ExprType fieldType) {
if (fieldType instanceof OpenSearchDateType) {
OpenSearchDateType openSearchDateType = (OpenSearchDateType) fieldType;
if (literal.type().equals(ExprCoreType.TIMESTAMP)) {
return openSearchDateType.hasNoFormatter()
? literal.timestampValue().toEpochMilli()
: openSearchDateType.getFormattedDate(literal.timestampValue());
} else if (literal.type().equals(ExprCoreType.DATE)) {
return openSearchDateType.hasNoFormatter()
? literal.value()
: openSearchDateType.getFormattedDate(literal.dateValue());
} else if (literal.type().equals(ExprCoreType.TIME)) {
return openSearchDateType.hasNoFormatter()
? literal.value()
: openSearchDateType.getFormattedDate(literal.timeValue());
}
}
return literal.value();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import org.opensearch.sql.data.model.ExprValue;
import org.opensearch.sql.data.type.ExprCoreType;
import org.opensearch.sql.data.type.ExprType;
import org.opensearch.sql.opensearch.data.type.OpenSearchDateType;
import org.opensearch.sql.opensearch.data.type.OpenSearchTextType;

/** Lucene query that build term query for equality comparison. */
Expand All @@ -18,14 +19,26 @@ public class TermQuery extends LuceneQuery {
@Override
protected QueryBuilder doBuild(String fieldName, ExprType fieldType, ExprValue literal) {
fieldName = OpenSearchTextType.convertTextToKeyword(fieldName, fieldType);
return QueryBuilders.termQuery(fieldName, value(literal));
return QueryBuilders.termQuery(fieldName, value(literal, fieldType));
}

private Object value(ExprValue literal) {
if (literal.type().equals(ExprCoreType.TIMESTAMP)) {
return literal.timestampValue().toEpochMilli();
} else {
return literal.value();
private Object value(ExprValue literal, ExprType fieldType) {
if (fieldType instanceof OpenSearchDateType) {
OpenSearchDateType openSearchDateType = (OpenSearchDateType) fieldType;
if (literal.type().equals(ExprCoreType.TIMESTAMP)) {
return openSearchDateType.hasNoFormatter()
? literal.timestampValue().toEpochMilli()
: openSearchDateType.getFormattedDate(literal.timestampValue());
} else if (literal.type().equals(ExprCoreType.DATE)) {
return openSearchDateType.hasNoFormatter()
? literal.value()
: openSearchDateType.getFormattedDate(literal.dateValue());
} else if (literal.type().equals(ExprCoreType.TIME)) {
return openSearchDateType.hasNoFormatter()
? literal.value()
: openSearchDateType.getFormattedDate(literal.timeValue());
}
}
return literal.value();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ void get_index_mappings() throws IOException {
() -> assertEquals(OpenSearchTextType.of(MappingType.Double), parsedTypes.get("balance")),
() -> assertEquals("KEYWORD", mapping.get("city").legacyTypeName()),
() -> assertEquals(OpenSearchTextType.of(MappingType.Keyword), parsedTypes.get("city")),
() -> assertEquals("DATE", mapping.get("birthday").legacyTypeName()),
() -> assertEquals("TIMESTAMP", mapping.get("birthday").legacyTypeName()),
() -> assertEquals(OpenSearchTextType.of(MappingType.Date), parsedTypes.get("birthday")),
() -> assertEquals("GEO_POINT", mapping.get("location").legacyTypeName()),
() ->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ void get_index_mappings() throws IOException {
() -> assertEquals(OpenSearchTextType.of(MappingType.Double), parsedTypes.get("balance")),
() -> assertEquals("KEYWORD", mapping.get("city").legacyTypeName()),
() -> assertEquals(OpenSearchTextType.of(MappingType.Keyword), parsedTypes.get("city")),
() -> assertEquals("DATE", mapping.get("birthday").legacyTypeName()),
() -> assertEquals("TIMESTAMP", mapping.get("birthday").legacyTypeName()),
() -> assertEquals(OpenSearchTextType.of(MappingType.Date), parsedTypes.get("birthday")),
() -> assertEquals("GEO_POINT", mapping.get("location").legacyTypeName()),
() ->
Expand Down
Loading

0 comments on commit af13fe5

Please sign in to comment.