Skip to content

Commit

Permalink
Add support for custom date format and openSearch date format for dat…
Browse files Browse the repository at this point in the history
…e fields as part of Lucene query

Github Issue - #2700

Signed-off-by: Manasvini B S <[email protected]>
  • Loading branch information
manasvinibs committed Jul 18, 2024
1 parent 607354c commit 4a3709e
Show file tree
Hide file tree
Showing 18 changed files with 669 additions and 76 deletions.
42 changes: 42 additions & 0 deletions docs/user/general/datatypes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,48 @@ Querying such index will provide a response with ``schema`` block as shown below
"status": 200
}
If the sql query contains an `IndexDateField` and a literal value with an operator (such as a term query or a range query), then the literal value can be in the `IndexDateField` format.

.. code-block:: json
{
"mappings" : {
"properties" : {
"release_date" : {
"type" : "date",
"format": "dd-MMM-yy"
}
}
}
}
Querying such an `IndexDateField` (``release_date``) will provide a response with ``schema`` and ``datarows`` blocks as shown below.

.. code-block:: json
{
"query" : "SELECT release_date FROM test_index WHERE release_date = \"03-Jan-21\""
}
.. code-block:: json
{
"schema": [
{
"name": "release_date",
"type": "date"
}
],
"datarows": [
[
"2021-01-03"
]
],
"total": 1,
"size": 1,
"status": 200
}
String Data Types
=================

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,19 +62,23 @@ public String toString() {
@EqualsAndHashCode.Exclude @Getter protected MappingType mappingType;

// resolved ExprCoreType
protected ExprCoreType exprCoreType;
@Getter protected ExprCoreType exprCoreType;

/**
* Get a simplified type {@link ExprCoreType} if possible. To avoid returning `UNKNOWN` for
* `OpenSearch*Type`s, e.g. for IP, returns itself.
* `OpenSearch*Type`s, e.g. for IP, returns itself. If the `exprCoreType` is {@link
* ExprCoreType#DATE}, {@link ExprCoreType#TIMESTAMP}, {@link ExprCoreType#TIME}, or {@link
* ExprCoreType#UNKNOWN}, it returns the current instance; otherwise, it returns `exprCoreType`.
*
* @return An {@link ExprType}.
*/
public ExprType getExprType() {
if (exprCoreType != ExprCoreType.UNKNOWN) {
return exprCoreType;
}
return this;
return (exprCoreType == ExprCoreType.DATE
|| exprCoreType == ExprCoreType.TIMESTAMP
|| exprCoreType == ExprCoreType.TIME
|| exprCoreType == ExprCoreType.UNKNOWN)
? this
: exprCoreType;
}

/**
Expand Down Expand Up @@ -230,6 +234,9 @@ public String legacyTypeName() {
if (mappingType == null) {
return exprCoreType.typeName();
}
if (mappingType.toString().equalsIgnoreCase("DATE")) {
return exprCoreType.typeName();
}
return mappingType.toString().toUpperCase();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,18 @@
import static org.opensearch.sql.data.type.ExprCoreType.TIME;
import static org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP;

import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.time.temporal.TemporalAccessor;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import lombok.EqualsAndHashCode;
import org.opensearch.common.time.DateFormatter;
import org.opensearch.common.time.DateFormatters;
import org.opensearch.common.time.FormatNames;
import org.opensearch.sql.data.type.ExprCoreType;
import org.opensearch.sql.data.type.ExprType;
Expand Down Expand Up @@ -137,6 +144,9 @@ public class OpenSearchDateType extends OpenSearchDataType {

private static final String CUSTOM_FORMAT_DATE_SYMBOLS = "FecEWwYqQgdMLDyuG";

private static final List<String> OPENSEARCH_DEFAULT_FORMATS =
Arrays.asList("strict_date_time_no_millis", "strict_date_optional_time", "epoch_millis");

@EqualsAndHashCode.Exclude private final List<String> formats;

private OpenSearchDateType() {
Expand Down Expand Up @@ -235,6 +245,72 @@ public List<DateFormatter> getAllCustomFormatters() {
.collect(Collectors.toList());
}

/**
* Retrieves a list of custom formatters and OpenSearch named formatters defined by the user, and
* attempts to parse the given date/time string using these formatters.
*
* @param dateTime The date/time string to parse.
* @return A ZonedDateTime representing the parsed date/time in UTC, or null if parsing fails.
*/
public ZonedDateTime getParsedDateTime(String dateTime) {
List<DateFormatter> dateFormatters =
Stream.concat(this.getAllNamedFormatters().stream(), this.getAllCustomFormatters().stream())
.collect(Collectors.toList());
ZonedDateTime zonedDateTime = null;

// check if dateFormatters are empty, then set default ones
if (dateFormatters.isEmpty()) {
dateFormatters = initializeDateFormatters();
}
// parse using OpenSearch DateFormatters
for (DateFormatter formatter : dateFormatters) {
try {
TemporalAccessor accessor = formatter.parse(dateTime);
zonedDateTime = DateFormatters.from(accessor).withZoneSameLocal(ZoneOffset.UTC);
break;
} catch (IllegalArgumentException ignored) {
// nothing to do, try another format
}
}
return zonedDateTime;
}

/**
* Returns a formatted date string using the internal formatter, if available.
*
* @param accessor The TemporalAccessor object containing the date/time information.
* @return A formatted date string if a formatter is available, otherwise null.
*/
public String getFormattedDate(TemporalAccessor accessor) {
if (hasNoFormatter()) {
return DateFormatter.forPattern(OPENSEARCH_DEFAULT_FORMATS.get(0)).format(accessor);
}
// Use the first available format string to create the formatter
return DateFormatter.forPattern(this.formats.get(0)).format(accessor);
}

/**
* Checks if the formatter is not initialized.
*
* @return True if the formatter is not set, otherwise false.
*/
public boolean hasNoFormatter() {
return this.formats.isEmpty();
}

/**
* Initializes and returns a list of default OpenSearch date formatters.
*
* @return A list of DateFormatter objects initialized with default patterns.
*/
private static List<DateFormatter> initializeDateFormatters() {
List<DateFormatter> dateFormatters = new ArrayList<>();
for (String pattern : OPENSEARCH_DEFAULT_FORMATS) {
dateFormatters.add(DateFormatter.forPattern(pattern));
}
return dateFormatters;
}

/**
* Retrieves a list of named formatters that format for dates.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ private Optional<ExprType> type(String field) {
private static ExprValue parseDateTimeString(String value, OpenSearchDateType dataType) {
List<DateFormatter> formatters = dataType.getAllNamedFormatters();
formatters.addAll(dataType.getAllCustomFormatters());
ExprCoreType returnFormat = (ExprCoreType) dataType.getExprType();
ExprCoreType returnFormat = dataType.getExprCoreType();

for (DateFormatter formatter : formatters) {
try {
Expand Down Expand Up @@ -273,8 +273,7 @@ private static ExprValue parseDateTimeString(String value, OpenSearchDateType da

private static ExprValue createOpenSearchDateType(Content value, ExprType type) {
OpenSearchDateType dt = (OpenSearchDateType) type;
ExprType returnFormat = dt.getExprType();

ExprCoreType returnFormat = dt.getExprCoreType();
if (value.isNumber()) { // isNumber
var numFormatters = dt.getNumericNamedFormatters();
if (numFormatters.size() > 0 || !dt.hasFormats()) {
Expand All @@ -287,7 +286,7 @@ private static ExprValue createOpenSearchDateType(Content value, ExprType type)
epochMillis = value.longValue();
}
Instant instant = Instant.ofEpochMilli(epochMillis);
switch ((ExprCoreType) returnFormat) {
switch (returnFormat) {
case TIME:
return new ExprTimeValue(LocalTime.from(instant.atZone(ZoneOffset.UTC)));
case DATE:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.opensearch.sql.ast.expression.SpanUnit;
import org.opensearch.sql.expression.NamedExpression;
import org.opensearch.sql.expression.span.SpanExpression;
import org.opensearch.sql.opensearch.data.type.OpenSearchDateType;
import org.opensearch.sql.opensearch.storage.serialization.ExpressionSerializer;

/** Bucket Aggregation Builder. */
Expand Down Expand Up @@ -65,7 +66,10 @@ private CompositeValuesSourceBuilder<?> buildCompositeValuesSourceBuilder(
.missingOrder(missingOrder)
.order(sortOrder);
// Time types values are converted to LONG in ExpressionAggregationScript::execute
if (List.of(TIMESTAMP, TIME, DATE).contains(expr.getDelegated().type())) {
if ((expr.getDelegated().type() instanceof OpenSearchDateType
&& List.of(TIMESTAMP, TIME, DATE)
.contains(((OpenSearchDateType) expr.getDelegated().type()).getExprCoreType()))
|| List.of(TIMESTAMP, TIME, DATE).contains(expr.getDelegated().type())) {
sourceBuilder.userValuetypeHint(ValueType.LONG);
}
return helper.build(expr.getDelegated(), sourceBuilder::field, sourceBuilder::script);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import static org.opensearch.sql.analysis.NestedAnalyzer.isNestedFunction;

import com.google.common.collect.ImmutableMap;
import java.time.ZonedDateTime;
import java.util.Map;
import java.util.function.Function;
import org.opensearch.index.query.QueryBuilder;
Expand All @@ -32,10 +33,13 @@
import org.opensearch.sql.expression.ReferenceExpression;
import org.opensearch.sql.expression.function.BuiltinFunctionName;
import org.opensearch.sql.expression.function.FunctionName;
import org.opensearch.sql.opensearch.data.type.OpenSearchDateType;

/** Lucene query abstraction that builds Lucene query from function expression. */
public abstract class LuceneQuery {

private ReferenceExpression ref;

/**
* Check if function expression supported by current Lucene query. Default behavior is that report
* supported if:
Expand Down Expand Up @@ -102,7 +106,7 @@ private boolean literalExpressionWrappedByCast(FunctionExpression func) {
* @return query
*/
public QueryBuilder build(FunctionExpression func) {
ReferenceExpression ref = (ReferenceExpression) func.getArguments().get(0);
this.ref = (ReferenceExpression) func.getArguments().get(0);
Expression expr = func.getArguments().get(1);
ExprValue literalValue =
expr instanceof LiteralExpression ? expr.valueOf() : cast((FunctionExpression) expr);
Expand Down Expand Up @@ -210,6 +214,10 @@ private ExprValue cast(FunctionExpression castFunction) {
BuiltinFunctionName.CAST_TO_DATE.getName(),
expr -> {
if (expr.type().equals(ExprCoreType.STRING)) {
ZonedDateTime zonedDateTime = getParsedDateTime(expr);
if (zonedDateTime != null) {
return new ExprDateValue(zonedDateTime.toLocalDate());
}
return new ExprDateValue(expr.valueOf().stringValue());
} else {
return new ExprDateValue(expr.valueOf().dateValue());
Expand All @@ -219,6 +227,10 @@ private ExprValue cast(FunctionExpression castFunction) {
BuiltinFunctionName.CAST_TO_TIME.getName(),
expr -> {
if (expr.type().equals(ExprCoreType.STRING)) {
ZonedDateTime zonedDateTime = getParsedDateTime(expr);
if (zonedDateTime != null) {
return new ExprTimeValue(zonedDateTime.toLocalTime());
}
return new ExprTimeValue(expr.valueOf().stringValue());
} else {
return new ExprTimeValue(expr.valueOf().timeValue());
Expand All @@ -228,13 +240,31 @@ private ExprValue cast(FunctionExpression castFunction) {
BuiltinFunctionName.CAST_TO_TIMESTAMP.getName(),
expr -> {
if (expr.type().equals(ExprCoreType.STRING)) {
ZonedDateTime zonedDateTime = getParsedDateTime(expr);
if (zonedDateTime != null) {
return new ExprTimestampValue(zonedDateTime.toInstant());
}
return new ExprTimestampValue(expr.valueOf().stringValue());
} else {
return new ExprTimestampValue(expr.valueOf().timestampValue());
}
})
.build();

/**
* Parses the date/time from the given expression if the reference type is an instance of
* OpenSearchDateType.
*
* @param expr The expression to parse.
* @return The parsed ZonedDateTime or null if the conditions are not met.
*/
private ZonedDateTime getParsedDateTime(LiteralExpression expr) {
if (this.ref.type() instanceof OpenSearchDateType) {
return ((OpenSearchDateType) this.ref.type()).getParsedDateTime(expr.valueOf().stringValue());
}
return null;
}

/**
* Build method that subclass implements by default which is to build query from reference and
* literal in function arguments.
Expand All @@ -248,4 +278,36 @@ protected QueryBuilder doBuild(String fieldName, ExprType fieldType, ExprValue l
throw new UnsupportedOperationException(
"Subclass doesn't implement this and build method either");
}

/**
* Converts a literal value to a formatted date or time value based on the specified field type.
*
* <p>If the field type is an instance of {@link OpenSearchDateType}, this method checks the type
* of the literal value and converts it to a formatted date or time if necessary. The formatting
* is applied if the {@link OpenSearchDateType} has a formatter. Otherwise, the raw value is
* returned.
*
* @param literal the literal value to be converted
* @param fieldType the field type to determine the conversion logic
* @return the formatted date or time value if the field type requires it, otherwise the raw value
*/
protected Object value(ExprValue literal, ExprType fieldType) {
if (fieldType instanceof OpenSearchDateType) {
OpenSearchDateType openSearchDateType = (OpenSearchDateType) fieldType;
if (literal.type().equals(ExprCoreType.TIMESTAMP)) {
return openSearchDateType.hasNoFormatter()
? literal.timestampValue().toEpochMilli()
: openSearchDateType.getFormattedDate(literal.timestampValue());
} else if (literal.type().equals(ExprCoreType.DATE)) {
return openSearchDateType.hasNoFormatter()
? literal.value()
: openSearchDateType.getFormattedDate(literal.dateValue());
} else if (literal.type().equals(ExprCoreType.TIME)) {
return openSearchDateType.hasNoFormatter()
? literal.value()
: openSearchDateType.getFormattedDate(literal.timeValue());
}
}
return literal.value();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import org.opensearch.index.query.QueryBuilders;
import org.opensearch.index.query.RangeQueryBuilder;
import org.opensearch.sql.data.model.ExprValue;
import org.opensearch.sql.data.type.ExprCoreType;
import org.opensearch.sql.data.type.ExprType;

/** Lucene query that builds range query for non-quality comparison. */
Expand All @@ -30,7 +29,7 @@ public enum Comparison {

@Override
protected QueryBuilder doBuild(String fieldName, ExprType fieldType, ExprValue literal) {
Object value = value(literal);
Object value = this.value(literal, fieldType);

RangeQueryBuilder query = QueryBuilders.rangeQuery(fieldName);
switch (comparison) {
Expand All @@ -46,12 +45,4 @@ protected QueryBuilder doBuild(String fieldName, ExprType fieldType, ExprValue l
throw new IllegalStateException("Comparison is supported by range query: " + comparison);
}
}

private Object value(ExprValue literal) {
if (literal.type().equals(ExprCoreType.TIMESTAMP)) {
return literal.timestampValue().toEpochMilli();
} else {
return literal.value();
}
}
}
Loading

0 comments on commit 4a3709e

Please sign in to comment.