Skip to content

Commit

Permalink
API: implement types timestamp_ns and timestamptz_ns
Browse files Browse the repository at this point in the history
Helps #8657

This change adds field `TimestampType.Unit unit` to `TimestampType`,
such that `TimestampType` now represents four specified types:
- `timestamp` (existing)
- `timestamptz` (existing)
- `timestamp_ns` (new #8683)
- `timestamptz_ns` (new #8683)

Note that TimestampType.with[out]Zone() are marked as deprecated in this
change. In future PRs, I'll remove usage of these static methods.
  • Loading branch information
jacobmarble authored and epgif committed Feb 14, 2024
1 parent 20ff1ab commit d8688f2
Show file tree
Hide file tree
Showing 35 changed files with 885 additions and 243 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.apache.iceberg.transforms.Transforms;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.util.DateTimeUtil;

/** Expression utility methods. */
public class ExpressionUtil {
Expand All @@ -48,10 +49,15 @@ public class ExpressionUtil {
private static final Pattern DATE = Pattern.compile("\\d{4}-\\d{2}-\\d{2}");
private static final Pattern TIME = Pattern.compile("\\d{2}:\\d{2}(:\\d{2}(.\\d{1,9})?)?");
private static final Pattern TIMESTAMP =
Pattern.compile("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{1,9})?)?");
Pattern.compile("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{1,6})?)?");
private static final Pattern TIMESTAMPNS =
Pattern.compile("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{7,9})?)?");
private static final Pattern TIMESTAMPTZ =
Pattern.compile(
"\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{1,9})?)?([-+]\\d{2}:\\d{2}|Z)");
"\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{1,6})?)?([-+]\\d{2}:\\d{2}|Z)");
private static final Pattern TIMESTAMPTZNS =
Pattern.compile(
"\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{7,9})?)?([-+]\\d{2}:\\d{2}|Z)");
static final int LONG_IN_PREDICATE_ABBREVIATION_THRESHOLD = 10;
private static final int LONG_IN_PREDICATE_ABBREVIATION_MIN_GAIN = 5;

Expand Down Expand Up @@ -514,7 +520,7 @@ private static String sanitize(Type type, Object value, long now, int today) {
case TIME:
return "(time)";
case TIMESTAMP:
return sanitizeTimestamp((long) value, now);
return sanitizeTimestamp(((Types.TimestampType) type).unit(), (long) value, now);
case STRING:
return sanitizeString((CharSequence) value, now, today);
case BOOLEAN:
Expand All @@ -535,7 +541,8 @@ private static String sanitize(Literal<?> literal, long now, int today) {
} else if (literal instanceof Literals.DateLiteral) {
return sanitizeDate(((Literals.DateLiteral) literal).value(), today);
} else if (literal instanceof Literals.TimestampLiteral) {
return sanitizeTimestamp(((Literals.TimestampLiteral) literal).value(), now);
Literals.TimestampLiteral tsLiteral = ((Literals.TimestampLiteral) literal);
return sanitizeTimestamp(tsLiteral.unit(), tsLiteral.value(), now);
} else if (literal instanceof Literals.TimeLiteral) {
return "(time)";
} else if (literal instanceof Literals.IntegerLiteral) {
Expand Down Expand Up @@ -564,7 +571,18 @@ private static String sanitizeDate(int days, int today) {
return "(date)";
}

private static String sanitizeTimestamp(long micros, long now) {
private static String sanitizeTimestamp(Types.TimestampType.Unit unit, long timeUnits, long now) {
final long micros;
switch (unit) {
case MICROS:
micros = timeUnits;
break;
case NANOS:
micros = DateTimeUtil.nanosToMicros(timeUnits);
break;
default:
throw new UnsupportedOperationException("Unsupported timestamp unit: " + unit);
}
String isPast = now > micros ? "ago" : "from-now";
long diff = Math.abs(now - micros);
if (diff < FIVE_MINUTES_IN_MICROS) {
Expand Down Expand Up @@ -595,11 +613,17 @@ private static String sanitizeString(CharSequence value, long now, int today) {
Literal<Integer> date = Literal.of(value).to(Types.DateType.get());
return sanitizeDate(date.value(), today);
} else if (TIMESTAMP.matcher(value).matches()) {
Literal<Long> ts = Literal.of(value).to(Types.TimestampType.withoutZone());
return sanitizeTimestamp(ts.value(), now);
Literal<Long> ts = Literal.of(value).to(Types.TimestampType.microsWithoutZone());
return sanitizeTimestamp(Types.TimestampType.Unit.MICROS, ts.value(), now);
} else if (TIMESTAMPNS.matcher(value).matches()) {
Literal<Long> ts = Literal.of(value).to(Types.TimestampType.nanosWithoutZone());
return sanitizeTimestamp(Types.TimestampType.Unit.NANOS, ts.value(), now);
} else if (TIMESTAMPTZ.matcher(value).matches()) {
Literal<Long> ts = Literal.of(value).to(Types.TimestampType.withZone());
return sanitizeTimestamp(ts.value(), now);
Literal<Long> ts = Literal.of(value).to(Types.TimestampType.microsWithZone());
return sanitizeTimestamp(Types.TimestampType.Unit.MICROS, ts.value(), now);
} else if (TIMESTAMPTZNS.matcher(value).matches()) {
Literal<Long> ts = Literal.of(value).to(Types.TimestampType.nanosWithZone());
return sanitizeTimestamp(Types.TimestampType.Unit.NANOS, ts.value(), now);
} else if (TIME.matcher(value).matches()) {
return "(time)";
} else {
Expand Down
76 changes: 56 additions & 20 deletions api/src/main/java/org/apache/iceberg/expressions/Literals.java
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@
import org.apache.iceberg.types.Conversions;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.types.Types.TimestampType;
import org.apache.iceberg.util.ByteBuffers;
import org.apache.iceberg.util.DateTimeUtil;
import org.apache.iceberg.util.NaNUtil;

class Literals {
Expand Down Expand Up @@ -298,7 +300,7 @@ public <T> Literal<T> to(Type type) {
case TIME:
return (Literal<T>) new TimeLiteral(value());
case TIMESTAMP:
return (Literal<T>) new TimestampLiteral(value());
return (Literal<T>) new TimestampLiteral(((TimestampType) type).unit(), value());
case DATE:
if ((long) Integer.MAX_VALUE < value()) {
return aboveMax();
Expand Down Expand Up @@ -426,23 +428,55 @@ protected Type.TypeID typeId() {
}

static class TimestampLiteral extends ComparableLiteral<Long> {
TimestampLiteral(Long value) {
private final TimestampType.Unit unit;

TimestampLiteral(TimestampType.Unit unit, Long value) {
super(value);
this.unit = unit;
}

@Override
@SuppressWarnings("unchecked")
public <T> Literal<T> to(Type type) {
switch (type.typeId()) {
case TIMESTAMP:
return (Literal<T>) this;
TimestampType.Unit toUnit = ((TimestampType) type).unit();
switch (unit) {
case MICROS:
switch (toUnit) {
case MICROS:
return (Literal<T>) this;
case NANOS:
return (Literal<T>)
new TimestampLiteral(unit, DateTimeUtil.microsToNanos(value()));
}
break;
case NANOS:
switch (toUnit) {
case MICROS:
return (Literal<T>)
new TimestampLiteral(unit, DateTimeUtil.nanosToMicros(value()));
case NANOS:
return (Literal<T>) this;
}
break;
}
break;
case DATE:
return (Literal<T>)
new DateLiteral(
(int)
ChronoUnit.DAYS.between(
EPOCH_DAY, EPOCH.plus(value(), ChronoUnit.MICROS).toLocalDate()));
default:
switch (unit) {
case MICROS:
return (Literal<T>)
new DateLiteral(
(int)
ChronoUnit.DAYS.between(
EPOCH_DAY, EPOCH.plus(value(), ChronoUnit.MICROS).toLocalDate()));
case NANOS:
return (Literal<T>)
new DateLiteral(
(int)
ChronoUnit.DAYS.between(
EPOCH_DAY, EPOCH.plusNanos(value()).toLocalDate()));
}
}
return null;
}
Expand All @@ -451,6 +485,10 @@ public <T> Literal<T> to(Type type) {
protected Type.TypeID typeId() {
return Type.TypeID.TIMESTAMP;
}

protected TimestampType.Unit unit() {
return unit;
}
}

static class DecimalLiteral extends ComparableLiteral<BigDecimal> {
Expand Down Expand Up @@ -501,19 +539,17 @@ public <T> Literal<T> to(Type type) {
return (Literal<T>) new TimeLiteral(timeMicros);

case TIMESTAMP:
if (((Types.TimestampType) type).shouldAdjustToUTC()) {
long timestampMicros =
ChronoUnit.MICROS.between(
EPOCH, OffsetDateTime.parse(value(), DateTimeFormatter.ISO_DATE_TIME));
return (Literal<T>) new TimestampLiteral(timestampMicros);
TimestampType tsType = (TimestampType) type;
final java.time.temporal.Temporal valueAsTemporal;
if (tsType.shouldAdjustToUTC()) {
valueAsTemporal = OffsetDateTime.parse(value(), DateTimeFormatter.ISO_DATE_TIME);
} else {
long timestampMicros =
ChronoUnit.MICROS.between(
EPOCH,
LocalDateTime.parse(value(), DateTimeFormatter.ISO_LOCAL_DATE_TIME)
.atOffset(ZoneOffset.UTC));
return (Literal<T>) new TimestampLiteral(timestampMicros);
valueAsTemporal =
LocalDateTime.parse(value(), DateTimeFormatter.ISO_LOCAL_DATE_TIME)
.atOffset(ZoneOffset.UTC);
}
final long timestampUnits = tsType.unit().between(EPOCH, valueAsTemporal);
return (Literal<T>) new TimestampLiteral(tsType.unit(), timestampUnits);

case STRING:
return (Literal<T>) this;
Expand Down
14 changes: 8 additions & 6 deletions api/src/main/java/org/apache/iceberg/transforms/Days.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package org.apache.iceberg.transforms;

import java.io.ObjectStreamException;
import java.time.temporal.ChronoUnit;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Types;

Expand All @@ -37,7 +38,7 @@ protected Transform<T, Integer> toEnum(Type type) {
case DATE:
return (Transform<T, Integer>) Dates.DAY;
case TIMESTAMP:
return (Transform<T, Integer>) Timestamps.DAY;
return (Transform<T, Integer>) Timestamps.get((Types.TimestampType) type, ChronoUnit.DAYS);
default:
throw new IllegalArgumentException("Unsupported type: " + type);
}
Expand All @@ -55,14 +56,15 @@ public boolean satisfiesOrderOf(Transform<?, ?> other) {
}

if (other instanceof Timestamps) {
return Timestamps.DAY.satisfiesOrderOf(other);
ChronoUnit otherResultTypeUnit = ((Timestamps) other).getResultTypeUnit();
return otherResultTypeUnit == ChronoUnit.DAYS
|| otherResultTypeUnit == ChronoUnit.MONTHS
|| otherResultTypeUnit == ChronoUnit.YEARS;
} else if (other instanceof Dates) {
return Dates.DAY.satisfiesOrderOf(other);
} else if (other instanceof Days || other instanceof Months || other instanceof Years) {
return true;
} else {
return other instanceof Days || other instanceof Months || other instanceof Years;
}

return false;
}

@Override
Expand Down
21 changes: 12 additions & 9 deletions api/src/main/java/org/apache/iceberg/transforms/Hours.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package org.apache.iceberg.transforms;

import java.io.ObjectStreamException;
import java.time.temporal.ChronoUnit;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Types;

Expand All @@ -34,7 +35,7 @@ static <T> Hours<T> get() {
@SuppressWarnings("unchecked")
protected Transform<T, Integer> toEnum(Type type) {
if (type.typeId() == Type.TypeID.TIMESTAMP) {
return (Transform<T, Integer>) Timestamps.HOUR;
return (Transform<T, Integer>) Timestamps.get((Types.TimestampType) type, ChronoUnit.HOURS);
}

throw new IllegalArgumentException("Unsupported type: " + type);
Expand All @@ -57,15 +58,17 @@ public boolean satisfiesOrderOf(Transform<?, ?> other) {
}

if (other instanceof Timestamps) {
return other == Timestamps.HOUR;
} else if (other instanceof Hours
|| other instanceof Days
|| other instanceof Months
|| other instanceof Years) {
return true;
ChronoUnit otherResultTypeUnit = ((Timestamps) other).getResultTypeUnit();
return otherResultTypeUnit == ChronoUnit.HOURS
|| otherResultTypeUnit == ChronoUnit.DAYS
|| otherResultTypeUnit == ChronoUnit.MONTHS
|| otherResultTypeUnit == ChronoUnit.YEARS;
} else {
return other instanceof Hours
|| other instanceof Days
|| other instanceof Months
|| other instanceof Years;
}

return false;
}

@Override
Expand Down
13 changes: 7 additions & 6 deletions api/src/main/java/org/apache/iceberg/transforms/Months.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package org.apache.iceberg.transforms;

import java.io.ObjectStreamException;
import java.time.temporal.ChronoUnit;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Types;

Expand All @@ -37,7 +38,8 @@ protected Transform<T, Integer> toEnum(Type type) {
case DATE:
return (Transform<T, Integer>) Dates.MONTH;
case TIMESTAMP:
return (Transform<T, Integer>) Timestamps.MONTH;
return (Transform<T, Integer>)
Timestamps.get((Types.TimestampType) type, ChronoUnit.MONTHS);
default:
throw new IllegalArgumentException("Unsupported type: " + type);
}
Expand All @@ -55,14 +57,13 @@ public boolean satisfiesOrderOf(Transform<?, ?> other) {
}

if (other instanceof Timestamps) {
return Timestamps.MONTH.satisfiesOrderOf(other);
ChronoUnit otherResultTypeUnit = ((Timestamps) other).getResultTypeUnit();
return otherResultTypeUnit == ChronoUnit.MONTHS || otherResultTypeUnit == ChronoUnit.YEARS;
} else if (other instanceof Dates) {
return Dates.MONTH.satisfiesOrderOf(other);
} else if (other instanceof Months || other instanceof Years) {
return true;
} else {
return other instanceof Months || other instanceof Years;
}

return false;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,16 +122,23 @@ static <R> R visit(Schema schema, PartitionField field, PartitionSpecVisitor<R>
int width = ((Truncate<?>) transform).width();
return visitor.truncate(field.fieldId(), sourceName, field.sourceId(), width);
} else if (transform == Dates.YEAR
|| transform == Timestamps.YEAR
|| transform == Timestamps.YEAR_FROM_MICROS
|| transform == Timestamps.YEAR_FROM_NANOS
|| transform instanceof Years) {
return visitor.year(field.fieldId(), sourceName, field.sourceId());
} else if (transform == Dates.MONTH
|| transform == Timestamps.MONTH
|| transform == Timestamps.MONTH_FROM_MICROS
|| transform == Timestamps.MONTH_FROM_NANOS
|| transform instanceof Months) {
return visitor.month(field.fieldId(), sourceName, field.sourceId());
} else if (transform == Dates.DAY || transform == Timestamps.DAY || transform instanceof Days) {
} else if (transform == Dates.DAY
|| transform == Timestamps.DAY_FROM_MICROS
|| transform == Timestamps.DAY_FROM_NANOS
|| transform instanceof Days) {
return visitor.day(field.fieldId(), sourceName, field.sourceId());
} else if (transform == Timestamps.HOUR || transform instanceof Hours) {
} else if (transform == Timestamps.HOUR_FROM_MICROS
|| transform == Timestamps.HOUR_FROM_NANOS
|| transform instanceof Hours) {
return visitor.hour(field.fieldId(), sourceName, field.sourceId());
} else if (transform instanceof VoidTransform) {
return visitor.alwaysNull(field.fieldId(), sourceName, field.sourceId());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,22 +84,16 @@ static <R> List<R> visit(SortOrder sortOrder, SortOrderVisitor<R> visitor) {
results.add(
visitor.truncate(
sourceName, field.sourceId(), width, field.direction(), field.nullOrder()));
} else if (transform == Dates.YEAR
|| transform == Timestamps.YEAR
|| transform instanceof Years) {
} else if ("year".equalsIgnoreCase(transform.toString())) {
results.add(
visitor.year(sourceName, field.sourceId(), field.direction(), field.nullOrder()));
} else if (transform == Dates.MONTH
|| transform == Timestamps.MONTH
|| transform instanceof Months) {
} else if ("month".equalsIgnoreCase(transform.toString())) {
results.add(
visitor.month(sourceName, field.sourceId(), field.direction(), field.nullOrder()));
} else if (transform == Dates.DAY
|| transform == Timestamps.DAY
|| transform instanceof Days) {
} else if ("day".equalsIgnoreCase(transform.toString())) {
results.add(
visitor.day(sourceName, field.sourceId(), field.direction(), field.nullOrder()));
} else if (transform == Timestamps.HOUR || transform instanceof Hours) {
} else if ("hour".equalsIgnoreCase(transform.toString())) {
results.add(
visitor.hour(sourceName, field.sourceId(), field.direction(), field.nullOrder()));
} else if (transform instanceof UnknownTransform) {
Expand Down
Loading

0 comments on commit d8688f2

Please sign in to comment.