From 0e098f0dda46afa0566685b4d4a12897f329aafb Mon Sep 17 00:00:00 2001 From: Jacob Marble Date: Tue, 20 Feb 2024 09:20:34 -0600 Subject: [PATCH 01/38] API: implement types timestamp_ns and timestamptz_ns Helps #8657 This change adds field `TimestampType.Unit unit` to `TimestampType`, such that `TimestampType` now represents four specified types: - `timestamp` (existing) - `timestamptz` (existing) - `timestamp_ns` (new #8683) - `timestamptz_ns` (new #8683) Note that TimestampType.with[out]Zone() are marked as deprecated in this change. In future PRs, I'll remove usage of these static methods. --- .../iceberg/expressions/ExpressionUtil.java | 42 ++++- .../apache/iceberg/expressions/Literals.java | 77 ++++++--- .../org/apache/iceberg/transforms/Days.java | 14 +- .../org/apache/iceberg/transforms/Hours.java | 21 ++- .../org/apache/iceberg/transforms/Months.java | 13 +- .../transforms/PartitionSpecVisitor.java | 15 +- .../iceberg/transforms/SortOrderVisitor.java | 14 +- .../apache/iceberg/transforms/Timestamps.java | 163 ++++++++++++++---- .../apache/iceberg/transforms/Transform.java | 8 +- .../iceberg/transforms/TransformUtil.java | 27 ++- .../apache/iceberg/transforms/Transforms.java | 52 +++++- .../org/apache/iceberg/transforms/Years.java | 11 +- .../org/apache/iceberg/types/Comparators.java | 6 +- .../java/org/apache/iceberg/types/Types.java | 78 +++++++-- .../org/apache/iceberg/util/DateTimeUtil.java | 135 ++++++++++++++- .../apache/iceberg/PartitionSpecTestBase.java | 11 +- .../org/apache/iceberg/TestAccessors.java | 6 +- .../apache/iceberg/TestPartitionPaths.java | 25 ++- .../iceberg/TestPartitionSpecValidation.java | 8 +- .../expressions/TestExpressionUtil.java | 90 ++++++++-- .../expressions/TestLiteralSerialization.java | 6 +- .../TestMiscLiteralConversions.java | 103 ++++++++--- .../TestStringLiteralConversions.java | 65 +++++-- .../iceberg/transforms/TestBucketing.java | 52 +++++- .../iceberg/transforms/TestIdentity.java | 4 +- .../iceberg/transforms/TestProjection.java | 8 +- .../iceberg/transforms/TestResiduals.java | 24 ++- .../iceberg/transforms/TestTimestamps.java | 18 +- .../transforms/TestTimestampsProjection.java | 2 +- .../TestTransformSerialization.java | 4 +- .../apache/iceberg/types/TestComparators.java | 6 +- .../apache/iceberg/types/TestConversions.java | 20 ++- .../iceberg/types/TestReadabilityChecks.java | 6 +- .../iceberg/types/TestSerializableTypes.java | 6 +- .../org/apache/iceberg/types/TestTypes.java | 4 +- 35 files changed, 894 insertions(+), 250 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java b/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java index 3708dafc4126..d8e764a4c4ef 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java +++ b/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java @@ -36,6 +36,7 @@ import org.apache.iceberg.transforms.Transforms; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; +import org.apache.iceberg.util.DateTimeUtil; /** Expression utility methods. */ public class ExpressionUtil { @@ -48,10 +49,15 @@ public class ExpressionUtil { private static final Pattern DATE = Pattern.compile("\\d{4}-\\d{2}-\\d{2}"); private static final Pattern TIME = Pattern.compile("\\d{2}:\\d{2}(:\\d{2}(.\\d{1,9})?)?"); private static final Pattern TIMESTAMP = - Pattern.compile("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{1,9})?)?"); + Pattern.compile("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{1,6})?)?"); + private static final Pattern TIMESTAMPNS = + Pattern.compile("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{7,9})?)?"); private static final Pattern TIMESTAMPTZ = Pattern.compile( - "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{1,9})?)?([-+]\\d{2}:\\d{2}|Z)"); + "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{1,6})?)?([-+]\\d{2}:\\d{2}|Z)"); + private static final Pattern TIMESTAMPTZNS = + Pattern.compile( + "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{7,9})?)?([-+]\\d{2}:\\d{2}|Z)"); static final int LONG_IN_PREDICATE_ABBREVIATION_THRESHOLD = 10; private static final int LONG_IN_PREDICATE_ABBREVIATION_MIN_GAIN = 5; @@ -514,7 +520,7 @@ private static String sanitize(Type type, Object value, long now, int today) { case TIME: return "(time)"; case TIMESTAMP: - return sanitizeTimestamp((long) value, now); + return sanitizeTimestamp(((Types.TimestampType) type).unit(), (long) value, now); case STRING: return sanitizeString((CharSequence) value, now, today); case BOOLEAN: @@ -535,7 +541,8 @@ private static String sanitize(Literal literal, long now, int today) { } else if (literal instanceof Literals.DateLiteral) { return sanitizeDate(((Literals.DateLiteral) literal).value(), today); } else if (literal instanceof Literals.TimestampLiteral) { - return sanitizeTimestamp(((Literals.TimestampLiteral) literal).value(), now); + Literals.TimestampLiteral tsLiteral = ((Literals.TimestampLiteral) literal); + return sanitizeTimestamp(tsLiteral.unit(), tsLiteral.value(), now); } else if (literal instanceof Literals.TimeLiteral) { return "(time)"; } else if (literal instanceof Literals.IntegerLiteral) { @@ -564,7 +571,18 @@ private static String sanitizeDate(int days, int today) { return "(date)"; } - private static String sanitizeTimestamp(long micros, long now) { + private static String sanitizeTimestamp(Types.TimestampType.Unit unit, long timeUnits, long now) { + final long micros; + switch (unit) { + case MICROS: + micros = timeUnits; + break; + case NANOS: + micros = DateTimeUtil.nanosToMicros(timeUnits); + break; + default: + throw new UnsupportedOperationException("Unsupported timestamp unit: " + unit); + } String isPast = now > micros ? "ago" : "from-now"; long diff = Math.abs(now - micros); if (diff < FIVE_MINUTES_IN_MICROS) { @@ -595,11 +613,17 @@ private static String sanitizeString(CharSequence value, long now, int today) { Literal date = Literal.of(value).to(Types.DateType.get()); return sanitizeDate(date.value(), today); } else if (TIMESTAMP.matcher(value).matches()) { - Literal ts = Literal.of(value).to(Types.TimestampType.withoutZone()); - return sanitizeTimestamp(ts.value(), now); + Literal ts = Literal.of(value).to(Types.TimestampType.microsWithoutZone()); + return sanitizeTimestamp(Types.TimestampType.Unit.MICROS, ts.value(), now); + } else if (TIMESTAMPNS.matcher(value).matches()) { + Literal ts = Literal.of(value).to(Types.TimestampType.nanosWithoutZone()); + return sanitizeTimestamp(Types.TimestampType.Unit.NANOS, ts.value(), now); } else if (TIMESTAMPTZ.matcher(value).matches()) { - Literal ts = Literal.of(value).to(Types.TimestampType.withZone()); - return sanitizeTimestamp(ts.value(), now); + Literal ts = Literal.of(value).to(Types.TimestampType.microsWithZone()); + return sanitizeTimestamp(Types.TimestampType.Unit.MICROS, ts.value(), now); + } else if (TIMESTAMPTZNS.matcher(value).matches()) { + Literal ts = Literal.of(value).to(Types.TimestampType.nanosWithZone()); + return sanitizeTimestamp(Types.TimestampType.Unit.NANOS, ts.value(), now); } else if (TIME.matcher(value).matches()) { return "(time)"; } else { diff --git a/api/src/main/java/org/apache/iceberg/expressions/Literals.java b/api/src/main/java/org/apache/iceberg/expressions/Literals.java index 79d7190c49df..aaa2f8efc15f 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/Literals.java +++ b/api/src/main/java/org/apache/iceberg/expressions/Literals.java @@ -24,7 +24,6 @@ import java.nio.ByteBuffer; import java.time.Instant; import java.time.LocalDate; -import java.time.LocalDateTime; import java.time.LocalTime; import java.time.OffsetDateTime; import java.time.ZoneOffset; @@ -39,7 +38,9 @@ import org.apache.iceberg.types.Conversions; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; +import org.apache.iceberg.types.Types.TimestampType; import org.apache.iceberg.util.ByteBuffers; +import org.apache.iceberg.util.DateTimeUtil; import org.apache.iceberg.util.NaNUtil; class Literals { @@ -298,7 +299,7 @@ public Literal to(Type type) { case TIME: return (Literal) new TimeLiteral(value()); case TIMESTAMP: - return (Literal) new TimestampLiteral(value()); + return (Literal) new TimestampLiteral(((TimestampType) type).unit(), value()); case DATE: if ((long) Integer.MAX_VALUE < value()) { return aboveMax(); @@ -426,8 +427,11 @@ protected Type.TypeID typeId() { } static class TimestampLiteral extends ComparableLiteral { - TimestampLiteral(Long value) { + private final TimestampType.Unit unit; + + TimestampLiteral(TimestampType.Unit unit, Long value) { super(value); + this.unit = unit; } @Override @@ -435,14 +439,43 @@ static class TimestampLiteral extends ComparableLiteral { public Literal to(Type type) { switch (type.typeId()) { case TIMESTAMP: - return (Literal) this; + TimestampType.Unit toUnit = ((TimestampType) type).unit(); + switch (unit) { + case MICROS: + switch (toUnit) { + case MICROS: + return (Literal) this; + case NANOS: + return (Literal) + new TimestampLiteral(unit, DateTimeUtil.microsToNanos(value())); + } + break; + case NANOS: + switch (toUnit) { + case MICROS: + return (Literal) + new TimestampLiteral(unit, DateTimeUtil.nanosToMicros(value())); + case NANOS: + return (Literal) this; + } + break; + } + break; case DATE: - return (Literal) - new DateLiteral( - (int) - ChronoUnit.DAYS.between( - EPOCH_DAY, EPOCH.plus(value(), ChronoUnit.MICROS).toLocalDate())); - default: + switch (unit) { + case MICROS: + return (Literal) + new DateLiteral( + (int) + ChronoUnit.DAYS.between( + EPOCH_DAY, EPOCH.plus(value(), ChronoUnit.MICROS).toLocalDate())); + case NANOS: + return (Literal) + new DateLiteral( + (int) + ChronoUnit.DAYS.between( + EPOCH_DAY, EPOCH.plusNanos(value()).toLocalDate())); + } } return null; } @@ -451,6 +484,10 @@ public Literal to(Type type) { protected Type.TypeID typeId() { return Type.TypeID.TIMESTAMP; } + + protected TimestampType.Unit unit() { + return unit; + } } static class DecimalLiteral extends ComparableLiteral { @@ -501,19 +538,17 @@ public Literal to(Type type) { return (Literal) new TimeLiteral(timeMicros); case TIMESTAMP: - if (((Types.TimestampType) type).shouldAdjustToUTC()) { - long timestampMicros = - ChronoUnit.MICROS.between( - EPOCH, OffsetDateTime.parse(value(), DateTimeFormatter.ISO_DATE_TIME)); - return (Literal) new TimestampLiteral(timestampMicros); + final TimestampType tsType = (TimestampType) type; + final String value = value().toString(); + final java.time.temporal.Temporal valueAsTemporal; + if (tsType.shouldAdjustToUTC()) { + valueAsTemporal = DateTimeUtil.isoTimestamptzToOffsetDateTime(value); } else { - long timestampMicros = - ChronoUnit.MICROS.between( - EPOCH, - LocalDateTime.parse(value(), DateTimeFormatter.ISO_LOCAL_DATE_TIME) - .atOffset(ZoneOffset.UTC)); - return (Literal) new TimestampLiteral(timestampMicros); + valueAsTemporal = + DateTimeUtil.isoTimestampToLocalDateTime(value).atOffset(ZoneOffset.UTC); } + final long timestampUnits = tsType.unit().between(EPOCH, valueAsTemporal); + return (Literal) new TimestampLiteral(tsType.unit(), timestampUnits); case STRING: return (Literal) this; diff --git a/api/src/main/java/org/apache/iceberg/transforms/Days.java b/api/src/main/java/org/apache/iceberg/transforms/Days.java index f69d5d6110ed..b4dee3749604 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Days.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Days.java @@ -19,6 +19,7 @@ package org.apache.iceberg.transforms; import java.io.ObjectStreamException; +import java.time.temporal.ChronoUnit; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; @@ -37,7 +38,7 @@ protected Transform toEnum(Type type) { case DATE: return (Transform) Dates.DAY; case TIMESTAMP: - return (Transform) Timestamps.DAY; + return (Transform) Timestamps.get((Types.TimestampType) type, ChronoUnit.DAYS); default: throw new IllegalArgumentException("Unsupported type: " + type); } @@ -55,14 +56,15 @@ public boolean satisfiesOrderOf(Transform other) { } if (other instanceof Timestamps) { - return Timestamps.DAY.satisfiesOrderOf(other); + ChronoUnit otherResultTypeUnit = ((Timestamps) other).getResultTypeUnit(); + return otherResultTypeUnit == ChronoUnit.DAYS + || otherResultTypeUnit == ChronoUnit.MONTHS + || otherResultTypeUnit == ChronoUnit.YEARS; } else if (other instanceof Dates) { return Dates.DAY.satisfiesOrderOf(other); - } else if (other instanceof Days || other instanceof Months || other instanceof Years) { - return true; + } else { + return other instanceof Days || other instanceof Months || other instanceof Years; } - - return false; } @Override diff --git a/api/src/main/java/org/apache/iceberg/transforms/Hours.java b/api/src/main/java/org/apache/iceberg/transforms/Hours.java index afc14516f3cd..3ceeec9417a7 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Hours.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Hours.java @@ -19,6 +19,7 @@ package org.apache.iceberg.transforms; import java.io.ObjectStreamException; +import java.time.temporal.ChronoUnit; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; @@ -34,7 +35,7 @@ static Hours get() { @SuppressWarnings("unchecked") protected Transform toEnum(Type type) { if (type.typeId() == Type.TypeID.TIMESTAMP) { - return (Transform) Timestamps.HOUR; + return (Transform) Timestamps.get((Types.TimestampType) type, ChronoUnit.HOURS); } throw new IllegalArgumentException("Unsupported type: " + type); @@ -57,15 +58,17 @@ public boolean satisfiesOrderOf(Transform other) { } if (other instanceof Timestamps) { - return other == Timestamps.HOUR; - } else if (other instanceof Hours - || other instanceof Days - || other instanceof Months - || other instanceof Years) { - return true; + ChronoUnit otherResultTypeUnit = ((Timestamps) other).getResultTypeUnit(); + return otherResultTypeUnit == ChronoUnit.HOURS + || otherResultTypeUnit == ChronoUnit.DAYS + || otherResultTypeUnit == ChronoUnit.MONTHS + || otherResultTypeUnit == ChronoUnit.YEARS; + } else { + return other instanceof Hours + || other instanceof Days + || other instanceof Months + || other instanceof Years; } - - return false; } @Override diff --git a/api/src/main/java/org/apache/iceberg/transforms/Months.java b/api/src/main/java/org/apache/iceberg/transforms/Months.java index 8fa4d42385f7..cbdee19e03ea 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Months.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Months.java @@ -19,6 +19,7 @@ package org.apache.iceberg.transforms; import java.io.ObjectStreamException; +import java.time.temporal.ChronoUnit; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; @@ -37,7 +38,8 @@ protected Transform toEnum(Type type) { case DATE: return (Transform) Dates.MONTH; case TIMESTAMP: - return (Transform) Timestamps.MONTH; + return (Transform) + Timestamps.get((Types.TimestampType) type, ChronoUnit.MONTHS); default: throw new IllegalArgumentException("Unsupported type: " + type); } @@ -55,14 +57,13 @@ public boolean satisfiesOrderOf(Transform other) { } if (other instanceof Timestamps) { - return Timestamps.MONTH.satisfiesOrderOf(other); + ChronoUnit otherResultTypeUnit = ((Timestamps) other).getResultTypeUnit(); + return otherResultTypeUnit == ChronoUnit.MONTHS || otherResultTypeUnit == ChronoUnit.YEARS; } else if (other instanceof Dates) { return Dates.MONTH.satisfiesOrderOf(other); - } else if (other instanceof Months || other instanceof Years) { - return true; + } else { + return other instanceof Months || other instanceof Years; } - - return false; } @Override diff --git a/api/src/main/java/org/apache/iceberg/transforms/PartitionSpecVisitor.java b/api/src/main/java/org/apache/iceberg/transforms/PartitionSpecVisitor.java index e4796478bf28..b3f8d600bd38 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/PartitionSpecVisitor.java +++ b/api/src/main/java/org/apache/iceberg/transforms/PartitionSpecVisitor.java @@ -122,16 +122,23 @@ static R visit(Schema schema, PartitionField field, PartitionSpecVisitor int width = ((Truncate) transform).width(); return visitor.truncate(field.fieldId(), sourceName, field.sourceId(), width); } else if (transform == Dates.YEAR - || transform == Timestamps.YEAR + || transform == Timestamps.YEAR_FROM_MICROS + || transform == Timestamps.YEAR_FROM_NANOS || transform instanceof Years) { return visitor.year(field.fieldId(), sourceName, field.sourceId()); } else if (transform == Dates.MONTH - || transform == Timestamps.MONTH + || transform == Timestamps.MONTH_FROM_MICROS + || transform == Timestamps.MONTH_FROM_NANOS || transform instanceof Months) { return visitor.month(field.fieldId(), sourceName, field.sourceId()); - } else if (transform == Dates.DAY || transform == Timestamps.DAY || transform instanceof Days) { + } else if (transform == Dates.DAY + || transform == Timestamps.DAY_FROM_MICROS + || transform == Timestamps.DAY_FROM_NANOS + || transform instanceof Days) { return visitor.day(field.fieldId(), sourceName, field.sourceId()); - } else if (transform == Timestamps.HOUR || transform instanceof Hours) { + } else if (transform == Timestamps.HOUR_FROM_MICROS + || transform == Timestamps.HOUR_FROM_NANOS + || transform instanceof Hours) { return visitor.hour(field.fieldId(), sourceName, field.sourceId()); } else if (transform instanceof VoidTransform) { return visitor.alwaysNull(field.fieldId(), sourceName, field.sourceId()); diff --git a/api/src/main/java/org/apache/iceberg/transforms/SortOrderVisitor.java b/api/src/main/java/org/apache/iceberg/transforms/SortOrderVisitor.java index 680e095270fb..20e757d1a5a2 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/SortOrderVisitor.java +++ b/api/src/main/java/org/apache/iceberg/transforms/SortOrderVisitor.java @@ -84,22 +84,16 @@ static List visit(SortOrder sortOrder, SortOrderVisitor visitor) { results.add( visitor.truncate( sourceName, field.sourceId(), width, field.direction(), field.nullOrder())); - } else if (transform == Dates.YEAR - || transform == Timestamps.YEAR - || transform instanceof Years) { + } else if ("year".equalsIgnoreCase(transform.toString())) { results.add( visitor.year(sourceName, field.sourceId(), field.direction(), field.nullOrder())); - } else if (transform == Dates.MONTH - || transform == Timestamps.MONTH - || transform instanceof Months) { + } else if ("month".equalsIgnoreCase(transform.toString())) { results.add( visitor.month(sourceName, field.sourceId(), field.direction(), field.nullOrder())); - } else if (transform == Dates.DAY - || transform == Timestamps.DAY - || transform instanceof Days) { + } else if ("day".equalsIgnoreCase(transform.toString())) { results.add( visitor.day(sourceName, field.sourceId(), field.direction(), field.nullOrder())); - } else if (transform == Timestamps.HOUR || transform instanceof Hours) { + } else if ("hour".equalsIgnoreCase(transform.toString())) { results.add( visitor.hour(sourceName, field.sourceId(), field.direction(), field.nullOrder())); } else if (transform instanceof UnknownTransform) { diff --git a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java index b5b50e9d42b2..bf203262afcc 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java @@ -20,6 +20,7 @@ import com.google.errorprone.annotations.Immutable; import java.time.temporal.ChronoUnit; +import java.util.Locale; import org.apache.iceberg.expressions.BoundPredicate; import org.apache.iceberg.expressions.BoundTransform; import org.apache.iceberg.expressions.Expression; @@ -28,57 +29,131 @@ import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; +import org.apache.iceberg.types.Types.TimestampType; import org.apache.iceberg.util.DateTimeUtil; import org.apache.iceberg.util.SerializableFunction; -enum Timestamps implements Transform { - YEAR(ChronoUnit.YEARS, "year"), - MONTH(ChronoUnit.MONTHS, "month"), - DAY(ChronoUnit.DAYS, "day"), - HOUR(ChronoUnit.HOURS, "hour"); +class Timestamps implements Transform { + + static final Timestamps YEAR_FROM_MICROS = new Timestamps(ChronoUnit.MICROS, ChronoUnit.YEARS); + static final Timestamps MONTH_FROM_MICROS = new Timestamps(ChronoUnit.MICROS, ChronoUnit.MONTHS); + static final Timestamps DAY_FROM_MICROS = new Timestamps(ChronoUnit.MICROS, ChronoUnit.DAYS); + static final Timestamps HOUR_FROM_MICROS = new Timestamps(ChronoUnit.MICROS, ChronoUnit.HOURS); + static final Timestamps YEAR_FROM_NANOS = new Timestamps(ChronoUnit.NANOS, ChronoUnit.YEARS); + static final Timestamps MONTH_FROM_NANOS = new Timestamps(ChronoUnit.NANOS, ChronoUnit.MONTHS); + static final Timestamps DAY_FROM_NANOS = new Timestamps(ChronoUnit.NANOS, ChronoUnit.DAYS); + static final Timestamps HOUR_FROM_NANOS = new Timestamps(ChronoUnit.NANOS, ChronoUnit.HOURS); + + static Timestamps get(TimestampType type, String resultTypeUnit) { + switch (resultTypeUnit.toLowerCase(Locale.ENGLISH)) { + case "year": + return get(type, ChronoUnit.YEARS); + case "month": + return get(type, ChronoUnit.MONTHS); + case "day": + return get(type, ChronoUnit.DAYS); + case "hour": + return get(type, ChronoUnit.HOURS); + default: + throw new IllegalArgumentException( + "Unsupported source/result type units: " + type + "->" + resultTypeUnit); + } + } + + static Timestamps get(TimestampType type, ChronoUnit resultTypeUnit) { + switch (type.unit()) { + case MICROS: + switch (resultTypeUnit) { + case YEARS: + return YEAR_FROM_MICROS; + case MONTHS: + return MONTH_FROM_MICROS; + case DAYS: + return DAY_FROM_MICROS; + case HOURS: + return HOUR_FROM_MICROS; + } + break; + case NANOS: + switch (resultTypeUnit) { + case YEARS: + return YEAR_FROM_NANOS; + case MONTHS: + return MONTH_FROM_NANOS; + case DAYS: + return DAY_FROM_NANOS; + case HOURS: + return HOUR_FROM_NANOS; + } + break; + default: + throw new UnsupportedOperationException("Unsupported timestamp unit: " + type.unit()); + } + throw new IllegalArgumentException( + "Unsupported source/result type units: " + type + "->" + resultTypeUnit); + } @Immutable static class Apply implements SerializableFunction { - private final ChronoUnit granularity; + private final ChronoUnit sourceTypeUnit; + private final ChronoUnit resultTypeUnit; - Apply(ChronoUnit granularity) { - this.granularity = granularity; + Apply(ChronoUnit sourceTypeUnit, ChronoUnit resultTypeUnit) { + this.sourceTypeUnit = sourceTypeUnit; + this.resultTypeUnit = resultTypeUnit; } @Override - public Integer apply(Long timestampMicros) { - if (timestampMicros == null) { + public Integer apply(Long timestampUnits) { + if (timestampUnits == null) { return null; } - switch (granularity) { - case YEARS: - return DateTimeUtil.microsToYears(timestampMicros); - case MONTHS: - return DateTimeUtil.microsToMonths(timestampMicros); - case DAYS: - return DateTimeUtil.microsToDays(timestampMicros); - case HOURS: - return DateTimeUtil.microsToHours(timestampMicros); + switch (sourceTypeUnit) { + case MICROS: + switch (resultTypeUnit) { + case YEARS: + return DateTimeUtil.microsToYears(timestampUnits); + case MONTHS: + return DateTimeUtil.microsToMonths(timestampUnits); + case DAYS: + return DateTimeUtil.microsToDays(timestampUnits); + case HOURS: + return DateTimeUtil.microsToHours(timestampUnits); + default: + throw new UnsupportedOperationException( + "Unsupported result type unit: " + resultTypeUnit); + } + case NANOS: + switch (resultTypeUnit) { + case YEARS: + return DateTimeUtil.nanosToYears(timestampUnits); + case MONTHS: + return DateTimeUtil.nanosToMonths(timestampUnits); + case DAYS: + return DateTimeUtil.nanosToDays(timestampUnits); + case HOURS: + return DateTimeUtil.nanosToHours(timestampUnits); + default: + throw new UnsupportedOperationException( + "Unsupported result type unit: " + resultTypeUnit); + } default: - throw new UnsupportedOperationException("Unsupported time unit: " + granularity); + throw new UnsupportedOperationException( + "Unsupported source type unit: " + sourceTypeUnit); } } } - private final ChronoUnit granularity; - private final String name; private final Apply apply; - Timestamps(ChronoUnit granularity, String name) { - this.granularity = granularity; - this.name = name; - this.apply = new Apply(granularity); + Timestamps(ChronoUnit sourceTypeUnit, ChronoUnit resultTypeUnit) { + this.apply = new Apply(sourceTypeUnit, resultTypeUnit); } @Override - public Integer apply(Long timestampMicros) { - return apply.apply(timestampMicros); + public Integer apply(Long timestampUnits) { + return apply.apply(timestampUnits); } @Override @@ -94,12 +169,16 @@ public boolean canTransform(Type type) { @Override public Type getResultType(Type sourceType) { - if (granularity == ChronoUnit.DAYS) { + if (apply.resultTypeUnit == ChronoUnit.DAYS) { return Types.DateType.get(); } return Types.IntegerType.get(); } + public ChronoUnit getResultTypeUnit() { + return apply.resultTypeUnit; + } + @Override public boolean preservesOrder() { return true; @@ -112,11 +191,11 @@ public boolean satisfiesOrderOf(Transform other) { } if (other instanceof Timestamps) { - // test the granularity, in hours. hour(ts) => 1 hour, day(ts) => 24 hours, and hour satisfies - // the order of day + // test the granularity, in hours. hour(ts) => 1 hour, day(ts) => 24 hours, and + // hour satisfies the order of day Timestamps otherTransform = (Timestamps) other; - return granularity.getDuration().toHours() - <= otherTransform.granularity.getDuration().toHours(); + return apply.resultTypeUnit.getDuration().toHours() + <= otherTransform.apply.resultTypeUnit.getDuration().toHours(); } return false; @@ -174,7 +253,7 @@ public String toHumanString(Type outputType, Integer value) { return "null"; } - switch (granularity) { + switch (apply.resultTypeUnit) { case YEARS: return TransformUtil.humanYear(value); case MONTHS: @@ -184,13 +263,25 @@ public String toHumanString(Type outputType, Integer value) { case HOURS: return TransformUtil.humanHour(value); default: - throw new UnsupportedOperationException("Unsupported time unit: " + granularity); + throw new UnsupportedOperationException("Unsupported time unit: " + apply.resultTypeUnit); } } @Override public String toString() { - return name; + switch (apply.resultTypeUnit) { + case YEARS: + return "year"; + case MONTHS: + return "month"; + case DAYS: + return "day"; + case HOURS: + return "hour"; + default: + throw new UnsupportedOperationException( + "Unsupported result time unit: " + apply.resultTypeUnit); + } } @Override diff --git a/api/src/main/java/org/apache/iceberg/transforms/Transform.java b/api/src/main/java/org/apache/iceberg/transforms/Transform.java index 5a56b672b1b1..0c5e7dd77d0f 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Transform.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Transform.java @@ -24,7 +24,7 @@ import org.apache.iceberg.expressions.BoundPredicate; import org.apache.iceberg.expressions.UnboundPredicate; import org.apache.iceberg.types.Type; -import org.apache.iceberg.types.Types; +import org.apache.iceberg.types.Types.TimestampType; import org.apache.iceberg.util.SerializableFunction; /** @@ -176,11 +176,7 @@ default String toHumanString(Type type, T value) { case TIME: return TransformUtil.humanTime((Long) value); case TIMESTAMP: - if (((Types.TimestampType) type).shouldAdjustToUTC()) { - return TransformUtil.humanTimestampWithZone((Long) value); - } else { - return TransformUtil.humanTimestampWithoutZone((Long) value); - } + return TransformUtil.humanTimestamp((TimestampType) type, (Long) value); case FIXED: case BINARY: if (value instanceof ByteBuffer) { diff --git a/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java b/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java index 53bc23a49888..3679628a3b36 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java +++ b/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java @@ -26,6 +26,7 @@ import java.time.ZoneOffset; import java.time.temporal.ChronoUnit; import java.util.Base64; +import org.apache.iceberg.types.Types; class TransformUtil { @@ -54,12 +55,26 @@ static String humanTime(Long microsFromMidnight) { return LocalTime.ofNanoOfDay(microsFromMidnight * 1000).toString(); } - static String humanTimestampWithZone(Long timestampMicros) { - return ChronoUnit.MICROS.addTo(EPOCH, timestampMicros).toString(); - } - - static String humanTimestampWithoutZone(Long timestampMicros) { - return ChronoUnit.MICROS.addTo(EPOCH, timestampMicros).toLocalDateTime().toString(); + public static String humanTimestamp(Types.TimestampType tsType, Long value) { + if (tsType.shouldAdjustToUTC()) { + switch (tsType.unit()) { + case MICROS: + return ChronoUnit.MICROS.addTo(EPOCH, value).toString(); + case NANOS: + return ChronoUnit.NANOS.addTo(EPOCH, value).toString(); + default: + throw new UnsupportedOperationException("Unsupported timestamp unit: " + tsType.unit()); + } + } else { + switch (tsType.unit()) { + case MICROS: + return ChronoUnit.MICROS.addTo(EPOCH, value).toLocalDateTime().toString(); + case NANOS: + return ChronoUnit.NANOS.addTo(EPOCH, value).toLocalDateTime().toString(); + default: + throw new UnsupportedOperationException("Unsupported timestamp unit: " + tsType.unit()); + } + } } static String humanHour(int hourOrdinal) { diff --git a/api/src/main/java/org/apache/iceberg/transforms/Transforms.java b/api/src/main/java/org/apache/iceberg/transforms/Transforms.java index a1ce33ddd6da..69a91c328f5a 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Transforms.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Transforms.java @@ -25,6 +25,7 @@ import org.apache.iceberg.Schema; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.Types.TimestampType; /** * Factory methods for transforms. @@ -86,8 +87,9 @@ private Transforms() {} try { if (type.typeId() == Type.TypeID.TIMESTAMP) { - return Timestamps.valueOf(transform.toUpperCase(Locale.ENGLISH)); - } else if (type.typeId() == Type.TypeID.DATE) { + return Timestamps.get((TimestampType) type, transform); + } + if (type.typeId() == Type.TypeID.DATE) { return Dates.valueOf(transform.toUpperCase(Locale.ENGLISH)); } } catch (IllegalArgumentException ignored) { @@ -129,7 +131,15 @@ public static Transform year(Type type) { case DATE: return (Transform) Dates.YEAR; case TIMESTAMP: - return (Transform) Timestamps.YEAR; + TimestampType.Unit unit = ((TimestampType) type).unit(); + switch (unit) { + case MICROS: + return (Transform) Timestamps.YEAR_FROM_MICROS; + case NANOS: + return (Transform) Timestamps.YEAR_FROM_NANOS; + default: + throw new UnsupportedOperationException("Unsupported timestamp unit: " + unit); + } default: throw new IllegalArgumentException("Cannot partition type " + type + " by year"); } @@ -150,7 +160,15 @@ public static Transform month(Type type) { case DATE: return (Transform) Dates.MONTH; case TIMESTAMP: - return (Transform) Timestamps.MONTH; + TimestampType.Unit unit = ((TimestampType) type).unit(); + switch (unit) { + case MICROS: + return (Transform) Timestamps.MONTH_FROM_MICROS; + case NANOS: + return (Transform) Timestamps.MONTH_FROM_NANOS; + default: + throw new UnsupportedOperationException("Unsupported timestamp unit: " + unit); + } default: throw new IllegalArgumentException("Cannot partition type " + type + " by month"); } @@ -171,7 +189,15 @@ public static Transform day(Type type) { case DATE: return (Transform) Dates.DAY; case TIMESTAMP: - return (Transform) Timestamps.DAY; + TimestampType.Unit unit = ((TimestampType) type).unit(); + switch (unit) { + case MICROS: + return (Transform) Timestamps.DAY_FROM_MICROS; + case NANOS: + return (Transform) Timestamps.DAY_FROM_NANOS; + default: + throw new UnsupportedOperationException("Unsupported timestamp unit: " + unit); + } default: throw new IllegalArgumentException("Cannot partition type " + type + " by day"); } @@ -188,9 +214,19 @@ public static Transform day(Type type) { @Deprecated @SuppressWarnings("unchecked") public static Transform hour(Type type) { - Preconditions.checkArgument( - type.typeId() == Type.TypeID.TIMESTAMP, "Cannot partition type %s by hour", type); - return (Transform) Timestamps.HOUR; + if (Preconditions.checkNotNull(type.typeId(), "Type ID cannot be null") + == Type.TypeID.TIMESTAMP) { + TimestampType.Unit unit = ((TimestampType) type).unit(); + switch (unit) { + case MICROS: + return (Transform) Timestamps.HOUR_FROM_MICROS; + case NANOS: + return (Transform) Timestamps.HOUR_FROM_NANOS; + default: + throw new UnsupportedOperationException("Unsupported timestamp unit: " + unit); + } + } + throw new IllegalArgumentException("Cannot partition type " + type + " by hour"); } /** diff --git a/api/src/main/java/org/apache/iceberg/transforms/Years.java b/api/src/main/java/org/apache/iceberg/transforms/Years.java index 6c1eee578506..de81fabf7ec8 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Years.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Years.java @@ -19,6 +19,7 @@ package org.apache.iceberg.transforms; import java.io.ObjectStreamException; +import java.time.temporal.ChronoUnit; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; @@ -37,7 +38,7 @@ protected Transform toEnum(Type type) { case DATE: return (Transform) Dates.YEAR; case TIMESTAMP: - return (Transform) Timestamps.YEAR; + return (Transform) Timestamps.get((Types.TimestampType) type, ChronoUnit.YEARS); default: throw new IllegalArgumentException("Unsupported type: " + type); } @@ -55,14 +56,12 @@ public boolean satisfiesOrderOf(Transform other) { } if (other instanceof Timestamps) { - return Timestamps.YEAR.satisfiesOrderOf(other); + return ((Timestamps) other).getResultTypeUnit() == ChronoUnit.YEARS; } else if (other instanceof Dates) { return Dates.YEAR.satisfiesOrderOf(other); - } else if (other instanceof Years) { - return true; + } else { + return other instanceof Years; } - - return false; } @Override diff --git a/api/src/main/java/org/apache/iceberg/types/Comparators.java b/api/src/main/java/org/apache/iceberg/types/Comparators.java index d09d9f5395ce..ddc52446e041 100644 --- a/api/src/main/java/org/apache/iceberg/types/Comparators.java +++ b/api/src/main/java/org/apache/iceberg/types/Comparators.java @@ -39,8 +39,10 @@ private Comparators() {} .put(Types.DoubleType.get(), Comparator.naturalOrder()) .put(Types.DateType.get(), Comparator.naturalOrder()) .put(Types.TimeType.get(), Comparator.naturalOrder()) - .put(Types.TimestampType.withZone(), Comparator.naturalOrder()) - .put(Types.TimestampType.withoutZone(), Comparator.naturalOrder()) + .put(Types.TimestampType.microsWithZone(), Comparator.naturalOrder()) + .put(Types.TimestampType.microsWithoutZone(), Comparator.naturalOrder()) + .put(Types.TimestampType.nanosWithZone(), Comparator.naturalOrder()) + .put(Types.TimestampType.nanosWithoutZone(), Comparator.naturalOrder()) .put(Types.StringType.get(), Comparators.charSequences()) .put(Types.UUIDType.get(), Comparator.naturalOrder()) .put(Types.BinaryType.get(), Comparators.unsignedBytes()) diff --git a/api/src/main/java/org/apache/iceberg/types/Types.java b/api/src/main/java/org/apache/iceberg/types/Types.java index dda842c9e161..a27e2ac9a046 100644 --- a/api/src/main/java/org/apache/iceberg/types/Types.java +++ b/api/src/main/java/org/apache/iceberg/types/Types.java @@ -19,6 +19,8 @@ package org.apache.iceberg.types; import java.io.Serializable; +import java.time.temporal.ChronoUnit; +import java.time.temporal.Temporal; import java.util.Arrays; import java.util.List; import java.util.Locale; @@ -47,8 +49,11 @@ private Types() {} .put(DoubleType.get().toString(), DoubleType.get()) .put(DateType.get().toString(), DateType.get()) .put(TimeType.get().toString(), TimeType.get()) + // TODO(epg): Replace next two with non-deprecated micros methods. .put(TimestampType.withZone().toString(), TimestampType.withZone()) .put(TimestampType.withoutZone().toString(), TimestampType.withoutZone()) + .put(TimestampType.nanosWithZone().toString(), TimestampType.nanosWithZone()) + .put(TimestampType.nanosWithoutZone().toString(), TimestampType.nanosWithoutZone()) .put(StringType.get().toString(), StringType.get()) .put(UUIDType.get().toString(), UUIDType.get()) .put(BinaryType.get().toString(), BinaryType.get()) @@ -206,27 +211,75 @@ public String toString() { } public static class TimestampType extends PrimitiveType { - private static final TimestampType INSTANCE_WITH_ZONE = new TimestampType(true); - private static final TimestampType INSTANCE_WITHOUT_ZONE = new TimestampType(false); + public enum Unit { + MICROS(ChronoUnit.MICROS), + NANOS(ChronoUnit.NANOS), + ; + public long between(Temporal temporal1Inclusive, Temporal temporal2Exclusive) { + return unit.between(temporal1Inclusive, temporal2Exclusive); + } + + Unit(final ChronoUnit unit) { + this.unit = unit; + } + + private final ChronoUnit unit; + } + + private static final TimestampType INSTANCE_MICROS_WITH_ZONE = + new TimestampType(true, Unit.MICROS); + private static final TimestampType INSTANCE_MICROS_WITHOUT_ZONE = + new TimestampType(false, Unit.MICROS); + private static final TimestampType INSTANCE_NANOS_WITH_ZONE = + new TimestampType(true, Unit.NANOS); + private static final TimestampType INSTANCE_NANOS_WITHOUT_ZONE = + new TimestampType(false, Unit.NANOS); + + /** @deprecated for removal in 2.0; use {@link #microsWithZone()} instead. */ + @Deprecated public static TimestampType withZone() { - return INSTANCE_WITH_ZONE; + return INSTANCE_MICROS_WITH_ZONE; } + /** @deprecated for removal in 2.0; use {@link #microsWithoutZone()} instead. */ + @Deprecated public static TimestampType withoutZone() { - return INSTANCE_WITHOUT_ZONE; + return INSTANCE_MICROS_WITHOUT_ZONE; + } + + public static TimestampType microsWithZone() { + return INSTANCE_MICROS_WITH_ZONE; + } + + public static TimestampType microsWithoutZone() { + return INSTANCE_MICROS_WITHOUT_ZONE; + } + + public static TimestampType nanosWithZone() { + return INSTANCE_NANOS_WITH_ZONE; + } + + public static TimestampType nanosWithoutZone() { + return INSTANCE_NANOS_WITHOUT_ZONE; } private final boolean adjustToUTC; + private final Unit unit; - private TimestampType(boolean adjustToUTC) { + private TimestampType(boolean adjustToUTC, Unit unit) { this.adjustToUTC = adjustToUTC; + this.unit = unit; } public boolean shouldAdjustToUTC() { return adjustToUTC; } + public Unit unit() { + return unit; + } + @Override public TypeID typeId() { return TypeID.TIMESTAMP; @@ -234,10 +287,13 @@ public TypeID typeId() { @Override public String toString() { - if (shouldAdjustToUTC()) { - return "timestamptz"; - } else { - return "timestamp"; + switch (unit) { + case MICROS: + return shouldAdjustToUTC() ? "timestamptz" : "timestamp"; + case NANOS: + return shouldAdjustToUTC() ? "timestamptz_ns" : "timestamp_ns"; + default: + throw new UnsupportedOperationException("Unsupported timestamp unit: " + unit); } } @@ -250,12 +306,12 @@ public boolean equals(Object o) { } TimestampType timestampType = (TimestampType) o; - return adjustToUTC == timestampType.adjustToUTC; + return adjustToUTC == timestampType.adjustToUTC && unit == timestampType.unit; } @Override public int hashCode() { - return Objects.hash(TimestampType.class, adjustToUTC); + return Objects.hash(TimestampType.class, adjustToUTC, unit); } } diff --git a/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java b/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java index a2f5301f44a9..902328d820e6 100644 --- a/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java +++ b/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java @@ -33,8 +33,12 @@ private DateTimeUtil() {} public static final OffsetDateTime EPOCH = Instant.ofEpochSecond(0).atOffset(ZoneOffset.UTC); public static final LocalDate EPOCH_DAY = EPOCH.toLocalDate(); - public static final long MICROS_PER_MILLIS = 1000L; + public static final long MICROS_PER_MILLIS = 1_000L; + public static final long MILLIS_PER_SECOND = 1_000L; public static final long MICROS_PER_SECOND = 1_000_000L; + public static final long NANOS_PER_SECOND = 1_000_000_000L; + public static final long NANOS_PER_MILLI = 1_000_000L; + public static final long NANOS_PER_MICRO = 1_000L; public static LocalDate dateFromDays(int daysFromEpoch) { return ChronoUnit.DAYS.addTo(EPOCH_DAY, daysFromEpoch); @@ -60,14 +64,26 @@ public static LocalDateTime timestampFromMicros(long microsFromEpoch) { return ChronoUnit.MICROS.addTo(EPOCH, microsFromEpoch).toLocalDateTime(); } + public static LocalDateTime timestampFromNanos(long nanosFromEpoch) { + return ChronoUnit.NANOS.addTo(EPOCH, nanosFromEpoch).toLocalDateTime(); + } + public static long microsFromInstant(Instant instant) { return ChronoUnit.MICROS.between(EPOCH, instant.atOffset(ZoneOffset.UTC)); } + public static long nanosFromInstant(Instant instant) { + return ChronoUnit.NANOS.between(EPOCH, instant.atOffset(ZoneOffset.UTC)); + } + public static long microsFromTimestamp(LocalDateTime dateTime) { return ChronoUnit.MICROS.between(EPOCH, dateTime.atOffset(ZoneOffset.UTC)); } + public static long nanosFromTimestamp(LocalDateTime dateTime) { + return ChronoUnit.NANOS.between(EPOCH, dateTime.atOffset(ZoneOffset.UTC)); + } + public static long microsToMillis(long micros) { // When the timestamp is negative, i.e before 1970, we need to adjust the milliseconds portion. // Example - 1965-01-01 10:11:12.123456 is represented as (-157700927876544) in micro precision. @@ -75,14 +91,38 @@ public static long microsToMillis(long micros) { return Math.floorDiv(micros, MICROS_PER_MILLIS); } + public static long nanosToMillis(long nanos) { + return Math.floorDiv(nanos, NANOS_PER_MILLI); + } + + public static long nanosToMicros(long nanos) { + return Math.floorDiv(nanos, NANOS_PER_MICRO); + } + + public static long microsToNanos(long micros) { + return Math.multiplyExact(micros, NANOS_PER_MICRO); + } + + public static long millisToNanos(long millis) { + return Math.multiplyExact(millis, NANOS_PER_MILLI); + } + public static OffsetDateTime timestamptzFromMicros(long microsFromEpoch) { return ChronoUnit.MICROS.addTo(EPOCH, microsFromEpoch); } + public static OffsetDateTime timestamptzFromNanos(long nanosFromEpoch) { + return ChronoUnit.NANOS.addTo(EPOCH, nanosFromEpoch); + } + public static long microsFromTimestamptz(OffsetDateTime dateTime) { return ChronoUnit.MICROS.between(EPOCH, dateTime); } + public static long nanosFromTimestamptz(OffsetDateTime dateTime) { + return ChronoUnit.NANOS.between(EPOCH, dateTime); + } + public static String formatTimestampMillis(long millis) { return Instant.ofEpochMilli(millis).toString().replace("Z", "+00:00"); } @@ -106,11 +146,27 @@ public static String microsToIsoTimestamptz(long micros) { return localDateTime.atOffset(ZoneOffset.UTC).format(zeroOffsetFormatter); } + public static String nanosToIsoTimestamptz(long nanos) { + LocalDateTime localDateTime = timestampFromNanos(nanos); + DateTimeFormatter zeroOffsetFormatter = + new DateTimeFormatterBuilder() + .parseCaseInsensitive() + .append(DateTimeFormatter.ISO_LOCAL_DATE_TIME) + .appendOffset("+HH:MM:ss", "+00:00") + .toFormatter(); + return localDateTime.atOffset(ZoneOffset.UTC).format(zeroOffsetFormatter); + } + public static String microsToIsoTimestamp(long micros) { LocalDateTime localDateTime = timestampFromMicros(micros); return localDateTime.format(DateTimeFormatter.ISO_LOCAL_DATE_TIME); } + public static String nanosToIsoTimestamp(long nanos) { + LocalDateTime localDateTime = timestampFromNanos(nanos); + return localDateTime.format(DateTimeFormatter.ISO_LOCAL_DATE_TIME); + } + public static int isoDateToDays(String dateString) { return daysFromDate(LocalDate.parse(dateString, DateTimeFormatter.ISO_LOCAL_DATE)); } @@ -120,19 +176,32 @@ public static long isoTimeToMicros(String timeString) { } public static long isoTimestamptzToMicros(String timestampString) { - return microsFromTimestamptz( - OffsetDateTime.parse(timestampString, DateTimeFormatter.ISO_DATE_TIME)); + return microsFromTimestamptz(isoTimestamptzToOffsetDateTime(timestampString)); + } + + public static OffsetDateTime isoTimestamptzToOffsetDateTime(String timestamp) { + return OffsetDateTime.parse(timestamp, DateTimeFormatter.ISO_DATE_TIME); + } + + public static LocalDateTime isoTimestampToLocalDateTime(String timestamp) { + return LocalDateTime.parse(timestamp, DateTimeFormatter.ISO_LOCAL_DATE_TIME); + } + + public static long isoTimestamptzToNanos(String timestampString) { + return nanosFromTimestamptz(isoTimestamptzToOffsetDateTime(timestampString)); } public static boolean isUTCTimestamptz(String timestampString) { - OffsetDateTime offsetDateTime = - OffsetDateTime.parse(timestampString, DateTimeFormatter.ISO_DATE_TIME); + OffsetDateTime offsetDateTime = isoTimestamptzToOffsetDateTime(timestampString); return offsetDateTime.getOffset().equals(ZoneOffset.UTC); } public static long isoTimestampToMicros(String timestampString) { - return microsFromTimestamp( - LocalDateTime.parse(timestampString, DateTimeFormatter.ISO_LOCAL_DATE_TIME)); + return microsFromTimestamp(isoTimestampToLocalDateTime(timestampString)); + } + + public static long isoTimestampToNanos(String timestampString) { + return nanosFromTimestamp(isoTimestampToLocalDateTime(timestampString)); } public static int daysToYears(int days) { @@ -159,28 +228,76 @@ public static int microsToYears(long micros) { return convertMicros(micros, ChronoUnit.YEARS); } + public static int nanosToYears(long nanos) { + return convertNanos(nanos, ChronoUnit.YEARS); + } + public static int microsToMonths(long micros) { return convertMicros(micros, ChronoUnit.MONTHS); } + public static int nanosToMonths(long nanos) { + return convertNanos(nanos, ChronoUnit.MONTHS); + } + public static int microsToDays(long micros) { return convertMicros(micros, ChronoUnit.DAYS); } + public static int nanosToDays(long nanos) { + return convertNanos(nanos, ChronoUnit.DAYS); + } + + public static int millisToHours(long millis) { + return convertMillis(millis, ChronoUnit.HOURS); + } + public static int microsToHours(long micros) { return convertMicros(micros, ChronoUnit.HOURS); } + public static int nanosToHours(long nanos) { + return convertNanos(nanos, ChronoUnit.HOURS); + } + + private static int convertMillis(long millis, ChronoUnit granularity) { + if (millis >= 0) { + long epochSecond = Math.floorDiv(millis, MILLIS_PER_SECOND); + long nanoAdjustment = Math.floorMod(millis, MILLIS_PER_SECOND) * NANOS_PER_MILLI; + return (int) granularity.between(EPOCH, toOffsetDateTime(epochSecond, nanoAdjustment)); + } else { + // add 1 milli to the value to account for the case where there is exactly 1 unit between + // the timestamp and epoch because the result will always be decremented. + long epochSecond = Math.floorDiv(millis, MILLIS_PER_SECOND); + long nanoAdjustment = Math.floorMod(millis + 1, MILLIS_PER_SECOND) * NANOS_PER_MILLI; + return (int) granularity.between(EPOCH, toOffsetDateTime(epochSecond, nanoAdjustment)) - 1; + } + } + private static int convertMicros(long micros, ChronoUnit granularity) { if (micros >= 0) { long epochSecond = Math.floorDiv(micros, MICROS_PER_SECOND); - long nanoAdjustment = Math.floorMod(micros, MICROS_PER_SECOND) * 1000; + long nanoAdjustment = Math.floorMod(micros, MICROS_PER_SECOND) * NANOS_PER_MICRO; return (int) granularity.between(EPOCH, toOffsetDateTime(epochSecond, nanoAdjustment)); } else { // add 1 micro to the value to account for the case where there is exactly 1 unit between // the timestamp and epoch because the result will always be decremented. long epochSecond = Math.floorDiv(micros, MICROS_PER_SECOND); - long nanoAdjustment = Math.floorMod(micros + 1, MICROS_PER_SECOND) * 1000; + long nanoAdjustment = Math.floorMod(micros + 1, MICROS_PER_SECOND) * NANOS_PER_MICRO; + return (int) granularity.between(EPOCH, toOffsetDateTime(epochSecond, nanoAdjustment)) - 1; + } + } + + private static int convertNanos(long nanos, ChronoUnit granularity) { + if (nanos >= 0) { + long epochSecond = Math.floorDiv(nanos, NANOS_PER_SECOND); + long nanoAdjustment = Math.floorMod(nanos, NANOS_PER_SECOND); + return (int) granularity.between(EPOCH, toOffsetDateTime(epochSecond, nanoAdjustment)); + } else { + // add 1 nano to the value to account for the case where there is exactly 1 unit between + // the timestamp and epoch because the result will always be decremented. + long epochSecond = Math.floorDiv(nanos, NANOS_PER_SECOND); + long nanoAdjustment = Math.floorMod(nanos + 1, NANOS_PER_SECOND); return (int) granularity.between(EPOCH, toOffsetDateTime(epochSecond, nanoAdjustment)) - 1; } } diff --git a/api/src/test/java/org/apache/iceberg/PartitionSpecTestBase.java b/api/src/test/java/org/apache/iceberg/PartitionSpecTestBase.java index 5e4ca1fb11be..369a3a842224 100644 --- a/api/src/test/java/org/apache/iceberg/PartitionSpecTestBase.java +++ b/api/src/test/java/org/apache/iceberg/PartitionSpecTestBase.java @@ -29,12 +29,13 @@ public class PartitionSpecTestBase { Types.NestedField.required(2, "l", Types.LongType.get()), Types.NestedField.required(3, "d", Types.DateType.get()), Types.NestedField.required(4, "t", Types.TimeType.get()), - Types.NestedField.required(5, "ts", Types.TimestampType.withoutZone()), + Types.NestedField.required(5, "ts", Types.TimestampType.microsWithoutZone()), Types.NestedField.required(6, "dec", Types.DecimalType.of(9, 2)), Types.NestedField.required(7, "s", Types.StringType.get()), Types.NestedField.required(8, "u", Types.UUIDType.get()), Types.NestedField.required(9, "f", Types.FixedType.ofLength(3)), - Types.NestedField.required(10, "b", Types.BinaryType.get())); + Types.NestedField.required(10, "b", Types.BinaryType.get()), + Types.NestedField.required(11, "tsn", Types.TimestampType.nanosWithoutZone())); // a spec with all of the allowed transform/type pairs public static final PartitionSpec[] SPECS = @@ -49,6 +50,7 @@ public class PartitionSpecTestBase { PartitionSpec.builderFor(SCHEMA).identity("u").build(), PartitionSpec.builderFor(SCHEMA).identity("f").build(), PartitionSpec.builderFor(SCHEMA).identity("b").build(), + PartitionSpec.builderFor(SCHEMA).identity("tsn").build(), PartitionSpec.builderFor(SCHEMA).bucket("i", 128).build(), PartitionSpec.builderFor(SCHEMA).bucket("l", 128).build(), PartitionSpec.builderFor(SCHEMA).bucket("d", 128).build(), @@ -59,6 +61,7 @@ public class PartitionSpecTestBase { PartitionSpec.builderFor(SCHEMA).bucket("u", 128).build(), PartitionSpec.builderFor(SCHEMA).bucket("f", 128).build(), PartitionSpec.builderFor(SCHEMA).bucket("b", 128).build(), + PartitionSpec.builderFor(SCHEMA).bucket("tsn", 128).build(), PartitionSpec.builderFor(SCHEMA).year("d").build(), PartitionSpec.builderFor(SCHEMA).month("d").build(), PartitionSpec.builderFor(SCHEMA).day("d").build(), @@ -66,6 +69,10 @@ public class PartitionSpecTestBase { PartitionSpec.builderFor(SCHEMA).month("ts").build(), PartitionSpec.builderFor(SCHEMA).day("ts").build(), PartitionSpec.builderFor(SCHEMA).hour("ts").build(), + PartitionSpec.builderFor(SCHEMA).year("tsn").build(), + PartitionSpec.builderFor(SCHEMA).month("tsn").build(), + PartitionSpec.builderFor(SCHEMA).day("tsn").build(), + PartitionSpec.builderFor(SCHEMA).hour("tsn").build(), PartitionSpec.builderFor(SCHEMA).truncate("i", 10).build(), PartitionSpec.builderFor(SCHEMA).truncate("l", 10).build(), PartitionSpec.builderFor(SCHEMA).truncate("dec", 10).build(), diff --git a/api/src/test/java/org/apache/iceberg/TestAccessors.java b/api/src/test/java/org/apache/iceberg/TestAccessors.java index 332556e474c7..233c8c508239 100644 --- a/api/src/test/java/org/apache/iceberg/TestAccessors.java +++ b/api/src/test/java/org/apache/iceberg/TestAccessors.java @@ -178,8 +178,10 @@ public void testTime() { @Test public void testTimestamp() { - assertAccessorReturns(Types.TimestampType.withoutZone(), 123L); - assertAccessorReturns(Types.TimestampType.withZone(), 123L); + assertAccessorReturns(Types.TimestampType.microsWithoutZone(), 123L); + assertAccessorReturns(Types.TimestampType.microsWithZone(), 123L); + assertAccessorReturns(Types.TimestampType.nanosWithoutZone(), 123L); + assertAccessorReturns(Types.TimestampType.nanosWithZone(), 123L); } @Test diff --git a/api/src/test/java/org/apache/iceberg/TestPartitionPaths.java b/api/src/test/java/org/apache/iceberg/TestPartitionPaths.java index 2fda247a33c8..6bee8aedbf17 100644 --- a/api/src/test/java/org/apache/iceberg/TestPartitionPaths.java +++ b/api/src/test/java/org/apache/iceberg/TestPartitionPaths.java @@ -32,7 +32,7 @@ public class TestPartitionPaths { new Schema( Types.NestedField.required(1, "id", Types.IntegerType.get()), Types.NestedField.optional(2, "data", Types.StringType.get()), - Types.NestedField.optional(3, "ts", Types.TimestampType.withoutZone())); + Types.NestedField.optional(3, "ts", Types.TimestampType.microsWithoutZone())); @Test public void testPartitionPath() { @@ -42,8 +42,27 @@ public void testPartitionPath() { Transform bucket = Transforms.bucket(10); Literal ts = - Literal.of("2017-12-01T10:12:55.038194").to(Types.TimestampType.withoutZone()); - Object tsHour = hour.bind(Types.TimestampType.withoutZone()).apply(ts.value()); + Literal.of("2017-12-01T10:12:55.038194").to(Types.TimestampType.microsWithoutZone()); + Object tsHour = hour.bind(Types.TimestampType.microsWithoutZone()).apply(ts.value()); + Object idBucket = bucket.bind(Types.IntegerType.get()).apply(1); + + Row partition = Row.of(tsHour, idBucket); + + assertThat(spec.partitionToPath(partition)) + .as("Should produce expected partition key") + .isEqualTo("ts_hour=2017-12-01-10/id_bucket=" + idBucket); + } + + @Test + public void testPartitionPathWithNanoseconds() { + PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).hour("ts").bucket("id", 10).build(); + + Transform hour = Transforms.hour(); + Transform bucket = Transforms.bucket(10); + + Literal ts = + Literal.of("2017-12-01T10:12:55.038194789").to(Types.TimestampType.nanosWithoutZone()); + Object tsHour = hour.bind(Types.TimestampType.nanosWithoutZone()).apply(ts.value()); Object idBucket = bucket.bind(Types.IntegerType.get()).apply(1); Row partition = Row.of(tsHour, idBucket); diff --git a/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java b/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java index eb0e74164688..a621ccab81e5 100644 --- a/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java +++ b/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java @@ -30,10 +30,10 @@ public class TestPartitionSpecValidation { private static final Schema SCHEMA = new Schema( NestedField.required(1, "id", Types.LongType.get()), - NestedField.required(2, "ts", Types.TimestampType.withZone()), - NestedField.required(3, "another_ts", Types.TimestampType.withZone()), - NestedField.required(4, "d", Types.TimestampType.withZone()), - NestedField.required(5, "another_d", Types.TimestampType.withZone()), + NestedField.required(2, "ts", Types.TimestampType.microsWithZone()), + NestedField.required(3, "another_ts", Types.TimestampType.microsWithZone()), + NestedField.required(4, "d", Types.TimestampType.microsWithZone()), + NestedField.required(5, "another_d", Types.TimestampType.microsWithZone()), NestedField.required(6, "s", Types.StringType.get())); @Test diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java b/api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java index 9a27830543ad..5638ef6c31b5 100644 --- a/api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java @@ -42,12 +42,13 @@ public class TestExpressionUtil { Types.NestedField.required(1, "id", Types.LongType.get()), Types.NestedField.required(2, "val", Types.IntegerType.get()), Types.NestedField.required(3, "val2", Types.IntegerType.get()), - Types.NestedField.required(4, "ts", Types.TimestampType.withoutZone()), - Types.NestedField.required(5, "date", Types.DateType.get()), - Types.NestedField.required(6, "time", Types.DateType.get()), - Types.NestedField.optional(7, "data", Types.StringType.get()), - Types.NestedField.optional(8, "measurement", Types.DoubleType.get()), - Types.NestedField.optional(9, "test", Types.IntegerType.get())); + Types.NestedField.required(4, "ts", Types.TimestampType.microsWithoutZone()), + Types.NestedField.required(5, "tsns", Types.TimestampType.nanosWithoutZone()), + Types.NestedField.required(6, "date", Types.DateType.get()), + Types.NestedField.required(7, "time", Types.DateType.get()), + Types.NestedField.optional(8, "data", Types.StringType.get()), + Types.NestedField.optional(9, "measurement", Types.DoubleType.get()), + Types.NestedField.optional(10, "test", Types.IntegerType.get())); private static final Types.StructType STRUCT = SCHEMA.asStruct(); @@ -461,7 +462,9 @@ public void testSanitizeTimestamp() { "2022-04-29T23:49:51", "2022-04-29T23:49:51.123456", "2022-04-29T23:49:51-07:00", - "2022-04-29T23:49:51.123456+01:00")) { + "2022-04-29T23:49:51.123456+01:00", + "2022-04-29T23:49:51.123456789", + "2022-04-29T23:49:51.123456789+01:00")) { assertEquals( Expressions.equal("test", "(timestamp)"), ExpressionUtil.sanitize(Expressions.equal("test", timestamp))); @@ -496,7 +499,14 @@ public void testSanitizeTimestampAboutNow() { Expressions.predicate( Expression.Operation.EQ, "test", - Literal.of(nowLocal).to(Types.TimestampType.withoutZone())))); + Literal.of(nowLocal).to(Types.TimestampType.microsWithoutZone())))); + assertEquals( + Expressions.equal("test", "(timestamp-about-now)"), + ExpressionUtil.sanitize( + Expressions.predicate( + Expression.Operation.EQ, + "test", + Literal.of(nowLocal).to(Types.TimestampType.nanosWithoutZone())))); assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test", nowLocal))) .as("Sanitized string should be identical except for descriptive literal") @@ -522,7 +532,14 @@ public void testSanitizeTimestampPast() { Expressions.predicate( Expression.Operation.EQ, "test", - Literal.of(ninetyMinutesAgoLocal).to(Types.TimestampType.withoutZone())))); + Literal.of(ninetyMinutesAgoLocal).to(Types.TimestampType.microsWithoutZone())))); + assertEquals( + Expressions.equal("test", "(timestamp-1-hours-ago)"), + ExpressionUtil.sanitize( + Expressions.predicate( + Expression.Operation.EQ, + "test", + Literal.of(ninetyMinutesAgoLocal).to(Types.TimestampType.nanosWithoutZone())))); assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test", ninetyMinutesAgoLocal))) .as("Sanitized string should be identical except for descriptive literal") @@ -548,7 +565,14 @@ public void testSanitizeTimestampLastWeek() { Expressions.predicate( Expression.Operation.EQ, "test", - Literal.of(lastWeekLocal).to(Types.TimestampType.withoutZone())))); + Literal.of(lastWeekLocal).to(Types.TimestampType.microsWithoutZone())))); + assertEquals( + Expressions.equal("test", "(timestamp-7-days-ago)"), + ExpressionUtil.sanitize( + Expressions.predicate( + Expression.Operation.EQ, + "test", + Literal.of(lastWeekLocal).to(Types.TimestampType.nanosWithoutZone())))); assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test", lastWeekLocal))) .as("Sanitized string should be identical except for descriptive literal") @@ -574,7 +598,15 @@ public void testSanitizeTimestampFuture() { Expressions.predicate( Expression.Operation.EQ, "test", - Literal.of(ninetyMinutesFromNowLocal).to(Types.TimestampType.withoutZone())))); + Literal.of(ninetyMinutesFromNowLocal) + .to(Types.TimestampType.microsWithoutZone())))); + assertEquals( + Expressions.equal("test", "(timestamp-1-hours-from-now)"), + ExpressionUtil.sanitize( + Expressions.predicate( + Expression.Operation.EQ, + "test", + Literal.of(ninetyMinutesFromNowLocal).to(Types.TimestampType.nanosWithoutZone())))); assertThat( ExpressionUtil.toSanitizedString(Expressions.equal("test", ninetyMinutesFromNowLocal))) @@ -597,7 +629,14 @@ public void testSanitizeTimestamptzAboutNow() { Expressions.predicate( Expression.Operation.EQ, "test", - Literal.of(nowUtc).to(Types.TimestampType.withZone())))); + Literal.of(nowUtc).to(Types.TimestampType.microsWithZone())))); + assertEquals( + Expressions.equal("test", "(timestamp-about-now)"), + ExpressionUtil.sanitize( + Expressions.predicate( + Expression.Operation.EQ, + "test", + Literal.of(nowUtc).to(Types.TimestampType.nanosWithZone())))); assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test", nowUtc))) .as("Sanitized string should be identical except for descriptive literal") @@ -618,7 +657,14 @@ public void testSanitizeTimestamptzPast() { Expressions.predicate( Expression.Operation.EQ, "test", - Literal.of(ninetyMinutesAgoUtc).to(Types.TimestampType.withZone())))); + Literal.of(ninetyMinutesAgoUtc).to(Types.TimestampType.microsWithZone())))); + assertEquals( + Expressions.equal("test", "(timestamp-1-hours-ago)"), + ExpressionUtil.sanitize( + Expressions.predicate( + Expression.Operation.EQ, + "test", + Literal.of(ninetyMinutesAgoUtc).to(Types.TimestampType.nanosWithZone())))); assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test", ninetyMinutesAgoUtc))) .as("Sanitized string should be identical except for descriptive literal") @@ -639,7 +685,14 @@ public void testSanitizeTimestamptzLastWeek() { Expressions.predicate( Expression.Operation.EQ, "test", - Literal.of(lastWeekUtc).to(Types.TimestampType.withZone())))); + Literal.of(lastWeekUtc).to(Types.TimestampType.microsWithZone())))); + assertEquals( + Expressions.equal("test", "(timestamp-7-days-ago)"), + ExpressionUtil.sanitize( + Expressions.predicate( + Expression.Operation.EQ, + "test", + Literal.of(lastWeekUtc).to(Types.TimestampType.nanosWithZone())))); assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test", lastWeekUtc))) .as("Sanitized string should be identical except for descriptive literal") @@ -660,7 +713,14 @@ public void testSanitizeTimestamptzFuture() { Expressions.predicate( Expression.Operation.EQ, "test", - Literal.of(ninetyMinutesFromNowUtc).to(Types.TimestampType.withZone())))); + Literal.of(ninetyMinutesFromNowUtc).to(Types.TimestampType.microsWithZone())))); + assertEquals( + Expressions.equal("test", "(timestamp-1-hours-from-now)"), + ExpressionUtil.sanitize( + Expressions.predicate( + Expression.Operation.EQ, + "test", + Literal.of(ninetyMinutesFromNowUtc).to(Types.TimestampType.nanosWithZone())))); assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test", ninetyMinutesFromNowUtc))) .as("Sanitized string should be identical except for descriptive literal") diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestLiteralSerialization.java b/api/src/test/java/org/apache/iceberg/expressions/TestLiteralSerialization.java index d5aa251ffb50..fcb031c27aa8 100644 --- a/api/src/test/java/org/apache/iceberg/expressions/TestLiteralSerialization.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestLiteralSerialization.java @@ -38,8 +38,10 @@ public void testLiterals() throws Exception { Literal.of(8.75D), Literal.of("2017-11-29").to(Types.DateType.get()), Literal.of("11:30:07").to(Types.TimeType.get()), - Literal.of("2017-11-29T11:30:07.123").to(Types.TimestampType.withoutZone()), - Literal.of("2017-11-29T11:30:07.123+01:00").to(Types.TimestampType.withZone()), + Literal.of("2017-11-29T11:30:07.123456").to(Types.TimestampType.microsWithoutZone()), + Literal.of("2017-11-29T11:30:07.123456+01:00").to(Types.TimestampType.microsWithZone()), + Literal.of("2017-11-29T11:30:07.123456789").to(Types.TimestampType.nanosWithoutZone()), + Literal.of("2017-11-29T11:30:07.123456789+01:00").to(Types.TimestampType.nanosWithZone()), Literal.of("abc"), Literal.of(UUID.randomUUID()), Literal.of(new byte[] {1, 2, 3}).to(Types.FixedType.ofLength(3)), diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestMiscLiteralConversions.java b/api/src/test/java/org/apache/iceberg/expressions/TestMiscLiteralConversions.java index f8d2cd49d969..86aa9ea07c63 100644 --- a/api/src/test/java/org/apache/iceberg/expressions/TestMiscLiteralConversions.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestMiscLiteralConversions.java @@ -42,7 +42,11 @@ public void testIdentityConversions() { Pair.of(Literal.of("34.55"), Types.DecimalType.of(9, 2)), Pair.of(Literal.of("2017-08-18"), Types.DateType.get()), Pair.of(Literal.of("14:21:01.919"), Types.TimeType.get()), - Pair.of(Literal.of("2017-08-18T14:21:01.919"), Types.TimestampType.withoutZone()), + Pair.of( + Literal.of("2017-08-18T14:21:01.919432"), Types.TimestampType.microsWithoutZone()), + Pair.of( + Literal.of("2017-08-18T14:21:01.919432755"), + Types.TimestampType.nanosWithoutZone()), Pair.of(Literal.of("abc"), Types.StringType.get()), Pair.of(Literal.of(UUID.randomUUID()), Types.UUIDType.get()), Pair.of(Literal.of(new byte[] {0, 1, 2}), Types.FixedType.ofLength(3)), @@ -62,6 +66,22 @@ public void testIdentityConversions() { } } + @Test + public void testTimestampWithMicrosecondsToDate() { + final Literal micros = + Literal.of("2017-08-18T14:21:01.919432755").to(Types.TimestampType.microsWithoutZone()); + final Literal dateOfNanos = micros.to(Types.DateType.get()); + assertThat(dateOfNanos).isEqualTo(Literal.of("2017-08-18").to(Types.DateType.get())); + } + + @Test + public void testTimestampWithNanoosecondsToDate() { + final Literal nanos = + Literal.of("2017-08-18T14:21:01.919432755").to(Types.TimestampType.nanosWithoutZone()); + final Literal dateOfNanos = nanos.to(Types.DateType.get()); + assertThat(dateOfNanos).isEqualTo(Literal.of("2017-08-18").to(Types.DateType.get())); + } + @Test public void testBinaryToFixed() { Literal lit = Literal.of(ByteBuffer.wrap(new byte[] {0, 1, 2})); @@ -99,8 +119,10 @@ public void testInvalidBooleanConversions() { Types.DoubleType.get(), Types.DateType.get(), Types.TimeType.get(), - Types.TimestampType.withZone(), - Types.TimestampType.withoutZone(), + Types.TimestampType.microsWithoutZone(), + Types.TimestampType.microsWithZone(), + Types.TimestampType.nanosWithoutZone(), + Types.TimestampType.nanosWithZone(), Types.DecimalType.of(9, 2), Types.StringType.get(), Types.UUIDType.get(), @@ -114,8 +136,10 @@ public void testInvalidIntegerConversions() { Literal.of(34), Types.BooleanType.get(), Types.TimeType.get(), - Types.TimestampType.withZone(), - Types.TimestampType.withoutZone(), + Types.TimestampType.microsWithoutZone(), + Types.TimestampType.microsWithZone(), + Types.TimestampType.nanosWithoutZone(), + Types.TimestampType.nanosWithZone(), Types.StringType.get(), Types.UUIDType.get(), Types.FixedType.ofLength(1), @@ -142,8 +166,10 @@ public void testInvalidFloatConversions() { Types.LongType.get(), Types.DateType.get(), Types.TimeType.get(), - Types.TimestampType.withZone(), - Types.TimestampType.withoutZone(), + Types.TimestampType.microsWithoutZone(), + Types.TimestampType.microsWithZone(), + Types.TimestampType.nanosWithoutZone(), + Types.TimestampType.nanosWithZone(), Types.StringType.get(), Types.UUIDType.get(), Types.FixedType.ofLength(1), @@ -159,8 +185,10 @@ public void testInvalidDoubleConversions() { Types.LongType.get(), Types.DateType.get(), Types.TimeType.get(), - Types.TimestampType.withZone(), - Types.TimestampType.withoutZone(), + Types.TimestampType.microsWithoutZone(), + Types.TimestampType.microsWithZone(), + Types.TimestampType.nanosWithoutZone(), + Types.TimestampType.nanosWithZone(), Types.StringType.get(), Types.UUIDType.get(), Types.FixedType.ofLength(1), @@ -177,8 +205,10 @@ public void testInvalidDateConversions() { Types.FloatType.get(), Types.DoubleType.get(), Types.TimeType.get(), - Types.TimestampType.withZone(), - Types.TimestampType.withoutZone(), + Types.TimestampType.microsWithoutZone(), + Types.TimestampType.microsWithZone(), + Types.TimestampType.nanosWithoutZone(), + Types.TimestampType.nanosWithZone(), Types.DecimalType.of(9, 4), Types.StringType.get(), Types.UUIDType.get(), @@ -196,8 +226,27 @@ public void testInvalidTimeConversions() { Types.FloatType.get(), Types.DoubleType.get(), Types.DateType.get(), - Types.TimestampType.withZone(), - Types.TimestampType.withoutZone(), + Types.TimestampType.microsWithoutZone(), + Types.TimestampType.microsWithZone(), + Types.TimestampType.nanosWithoutZone(), + Types.TimestampType.nanosWithZone(), + Types.DecimalType.of(9, 4), + Types.StringType.get(), + Types.UUIDType.get(), + Types.FixedType.ofLength(1), + Types.BinaryType.get()); + } + + @Test + public void testInvalidTimestampMicrosConversions() { + testInvalidConversions( + Literal.of("2017-08-18T14:21:01.919123").to(Types.TimestampType.microsWithoutZone()), + Types.BooleanType.get(), + Types.IntegerType.get(), + Types.LongType.get(), + Types.FloatType.get(), + Types.DoubleType.get(), + Types.TimeType.get(), Types.DecimalType.of(9, 4), Types.StringType.get(), Types.UUIDType.get(), @@ -206,9 +255,9 @@ public void testInvalidTimeConversions() { } @Test - public void testInvalidTimestampConversions() { + public void testInvalidTimestampNanosConversions() { testInvalidConversions( - Literal.of("2017-08-18T14:21:01.919").to(Types.TimestampType.withoutZone()), + Literal.of("2017-08-18T14:21:01.919123456").to(Types.TimestampType.nanosWithoutZone()), Types.BooleanType.get(), Types.IntegerType.get(), Types.LongType.get(), @@ -233,8 +282,10 @@ public void testInvalidDecimalConversions() { Types.DoubleType.get(), Types.DateType.get(), Types.TimeType.get(), - Types.TimestampType.withZone(), - Types.TimestampType.withoutZone(), + Types.TimestampType.microsWithoutZone(), + Types.TimestampType.microsWithZone(), + Types.TimestampType.nanosWithoutZone(), + Types.TimestampType.nanosWithZone(), Types.StringType.get(), Types.UUIDType.get(), Types.FixedType.ofLength(1), @@ -267,8 +318,10 @@ public void testInvalidUUIDConversions() { Types.DoubleType.get(), Types.DateType.get(), Types.TimeType.get(), - Types.TimestampType.withZone(), - Types.TimestampType.withoutZone(), + Types.TimestampType.microsWithoutZone(), + Types.TimestampType.microsWithZone(), + Types.TimestampType.nanosWithoutZone(), + Types.TimestampType.nanosWithZone(), Types.DecimalType.of(9, 2), Types.StringType.get(), Types.FixedType.ofLength(1), @@ -286,8 +339,10 @@ public void testInvalidFixedConversions() { Types.DoubleType.get(), Types.DateType.get(), Types.TimeType.get(), - Types.TimestampType.withZone(), - Types.TimestampType.withoutZone(), + Types.TimestampType.microsWithoutZone(), + Types.TimestampType.microsWithZone(), + Types.TimestampType.nanosWithoutZone(), + Types.TimestampType.nanosWithZone(), Types.DecimalType.of(9, 2), Types.StringType.get(), Types.UUIDType.get(), @@ -305,8 +360,10 @@ public void testInvalidBinaryConversions() { Types.DoubleType.get(), Types.DateType.get(), Types.TimeType.get(), - Types.TimestampType.withZone(), - Types.TimestampType.withoutZone(), + Types.TimestampType.microsWithoutZone(), + Types.TimestampType.microsWithZone(), + Types.TimestampType.nanosWithoutZone(), + Types.TimestampType.nanosWithZone(), Types.DecimalType.of(9, 2), Types.StringType.get(), Types.UUIDType.get(), diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestStringLiteralConversions.java b/api/src/test/java/org/apache/iceberg/expressions/TestStringLiteralConversions.java index f35b274eb3d5..678e48632e92 100644 --- a/api/src/test/java/org/apache/iceberg/expressions/TestStringLiteralConversions.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestStringLiteralConversions.java @@ -101,7 +101,7 @@ public void testStringToTimestampLiteral() { // Timestamp with explicit UTC offset, +00:00 Literal timestampStr = Literal.of("2017-08-18T14:21:01.919+00:00"); - Literal timestamp = timestampStr.to(Types.TimestampType.withZone()); + Literal timestamp = timestampStr.to(Types.TimestampType.microsWithZone()); long avroValue = avroConversion.toLong( LocalDateTime.of(2017, 8, 18, 14, 21, 1, 919 * 1000000).toInstant(ZoneOffset.UTC), @@ -112,7 +112,7 @@ public void testStringToTimestampLiteral() { // Timestamp without an explicit zone should be UTC (equal to the previous converted value) timestampStr = Literal.of("2017-08-18T14:21:01.919"); - timestamp = timestampStr.to(Types.TimestampType.withoutZone()); + timestamp = timestampStr.to(Types.TimestampType.microsWithoutZone()); assertThat((long) timestamp.value()) .as("Timestamp without zone should match UTC") @@ -120,7 +120,7 @@ public void testStringToTimestampLiteral() { // Timestamp with an explicit offset should be adjusted to UTC timestampStr = Literal.of("2017-08-18T14:21:01.919-07:00"); - timestamp = timestampStr.to(Types.TimestampType.withZone()); + timestamp = timestampStr.to(Types.TimestampType.microsWithZone()); avroValue = avroConversion.toLong( LocalDateTime.of(2017, 8, 18, 21, 21, 1, 919 * 1000000).toInstant(ZoneOffset.UTC), @@ -132,6 +132,38 @@ public void testStringToTimestampLiteral() { .isEqualTo(avroValue); } + @Test + public void testStringToTimestampLiteralWithMicrosecondPrecisionFromNanoseconds() { + // use Avro's timestamp conversion to validate the result + Schema avroSchema = LogicalTypes.timestampMicros().addToSchema(Schema.create(Schema.Type.LONG)); + TimeConversions.TimestampMicrosConversion avroConversion = + new TimeConversions.TimestampMicrosConversion(); + + Literal timestampStr = Literal.of("2017-08-18T14:21:01.123456789"); + Literal timestamp = timestampStr.to(Types.TimestampType.microsWithoutZone()); + long avroValue = + avroConversion.toLong( + LocalDateTime.of(2017, 8, 18, 14, 21, 1, 123456000).toInstant(ZoneOffset.UTC), + avroSchema, + avroSchema.getLogicalType()); + + assertThat((long) timestamp.value()) + .as("Timestamp without zone should match UTC") + .isEqualTo(avroValue); + } + + @Test + public void testStringToTimestampLiteralWithNanosecondPrecisionFromNanoseconds() { + // Not using Avro's timestamp conversion as it has no timestampNanos(). + long expected = 1503066061123456789L; + + Literal timestampStr = Literal.of("2017-08-18T14:21:01.123456789"); + Literal timestamp = timestampStr.to(Types.TimestampType.nanosWithoutZone()); + assertThat((long) timestamp.value()) + .as("Timestamp without zone should match UTC") + .isEqualTo(expected); + } + @Test public void testNegativeStringToTimestampLiteral() { // use Avro's timestamp conversion to validate the result @@ -141,7 +173,7 @@ public void testNegativeStringToTimestampLiteral() { // Timestamp with explicit UTC offset, +00:00 Literal timestampStr = Literal.of("1969-12-31T23:59:58.999999+00:00"); - Literal timestamp = timestampStr.to(Types.TimestampType.withZone()); + Literal timestamp = timestampStr.to(Types.TimestampType.microsWithZone()); long avroValue = avroConversion.toLong( LocalDateTime.of(1969, 12, 31, 23, 59, 58, 999999 * 1_000).toInstant(ZoneOffset.UTC), @@ -156,7 +188,7 @@ public void testNegativeStringToTimestampLiteral() { // Timestamp without an explicit zone should be UTC (equal to the previous converted value) timestampStr = Literal.of("1969-12-31T23:59:58.999999"); - timestamp = timestampStr.to(Types.TimestampType.withoutZone()); + timestamp = timestampStr.to(Types.TimestampType.microsWithoutZone()); assertThat((long) timestamp.value()) .as("Timestamp without zone should match UTC") @@ -164,7 +196,7 @@ public void testNegativeStringToTimestampLiteral() { // Timestamp with an explicit offset should be adjusted to UTC timestampStr = Literal.of("1969-12-31T16:59:58.999999-07:00"); - timestamp = timestampStr.to(Types.TimestampType.withZone()); + timestamp = timestampStr.to(Types.TimestampType.microsWithZone()); avroValue = avroConversion.toLong( LocalDateTime.of(1969, 12, 31, 23, 59, 58, 999999 * 1_000).toInstant(ZoneOffset.UTC), @@ -181,8 +213,13 @@ public void testNegativeStringToTimestampLiteral() { @Test public void testTimestampWithZoneWithoutZoneInLiteral() { // Zone must be present in literals when converting to timestamp with zone - Literal timestampStr = Literal.of("2017-08-18T14:21:01.919"); - Assertions.assertThatThrownBy(() -> timestampStr.to(Types.TimestampType.withZone())) + Assertions.assertThatThrownBy( + () -> Literal.of("2017-08-18T14:21:01.919123").to(Types.TimestampType.microsWithZone())) + .isInstanceOf(DateTimeException.class) + .hasMessageContaining("could not be parsed"); + Assertions.assertThatThrownBy( + () -> + Literal.of("2017-08-18T14:21:01.919123456").to(Types.TimestampType.nanosWithZone())) .isInstanceOf(DateTimeException.class) .hasMessageContaining("could not be parsed"); } @@ -190,8 +227,16 @@ public void testTimestampWithZoneWithoutZoneInLiteral() { @Test public void testTimestampWithoutZoneWithZoneInLiteral() { // Zone must not be present in literals when converting to timestamp without zone - Literal timestampStr = Literal.of("2017-08-18T14:21:01.919+07:00"); - Assertions.assertThatThrownBy(() -> timestampStr.to(Types.TimestampType.withoutZone())) + Assertions.assertThatThrownBy( + () -> + Literal.of("2017-08-18T14:21:01.919123+07:00") + .to(Types.TimestampType.microsWithoutZone())) + .isInstanceOf(DateTimeException.class) + .hasMessageContaining("could not be parsed"); + Assertions.assertThatThrownBy( + () -> + Literal.of("2017-08-18T14:21:01.919123456+07:00") + .to(Types.TimestampType.nanosWithoutZone())) .isInstanceOf(DateTimeException.class) .hasMessageContaining("could not be parsed"); } diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java b/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java index b8a0e40c1110..c76ac98612b9 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java @@ -107,17 +107,65 @@ public void testSpecValues() { .isEqualTo(-662762989); Literal timestampVal = - Literal.of("2017-11-16T22:31:08").to(Types.TimestampType.withoutZone()); + Literal.of("2017-11-16T22:31:08").to(Types.TimestampType.microsWithoutZone()); assertThat(BucketUtil.hash(timestampVal.value())) .as("Spec example: hash(2017-11-16T22:31:08) = -2047944441") .isEqualTo(-2047944441); + timestampVal = + Literal.of("2017-11-16T22:31:08.000001").to(Types.TimestampType.microsWithoutZone()); + assertThat(BucketUtil.hash(timestampVal.value())) + .as("Spec example: hash(2017-11-16T22:31:08.000001) = -1207196810") + .isEqualTo(-1207196810); + Literal timestamptzVal = - Literal.of("2017-11-16T14:31:08-08:00").to(Types.TimestampType.withZone()); + Literal.of("2017-11-16T14:31:08-08:00").to(Types.TimestampType.microsWithZone()); assertThat(BucketUtil.hash(timestamptzVal.value())) .as("Spec example: hash(2017-11-16T14:31:08-08:00) = -2047944441") .isEqualTo(-2047944441); + timestamptzVal = + Literal.of("2017-11-16T14:31:08.000001-08:00").to(Types.TimestampType.microsWithZone()); + assertThat(BucketUtil.hash(timestamptzVal.value())) + .as("Spec example: hash(2017-11-16T14:31:08.000001-08:00) = -1207196810") + .isEqualTo(-1207196810); + + Literal timestampNsVal = + Literal.of("2017-11-16T22:31:08").to(Types.TimestampType.nanosWithoutZone()); + assertThat(BucketUtil.hash(timestampNsVal.value())) + .as("Spec example: hash(2017-11-16T22:31:08) = -737750069") + .isEqualTo(-737750069); + + timestampNsVal = + Literal.of("2017-11-16T22:31:08.000001").to(Types.TimestampType.nanosWithoutZone()); + assertThat(BucketUtil.hash(timestampNsVal.value())) + .as("Spec example: hash(2017-11-16T22:31:08.000001) = -976603392") + .isEqualTo(-976603392); + + timestampNsVal = + Literal.of("2017-11-16T22:31:08.000000001").to(Types.TimestampType.nanosWithoutZone()); + assertThat(BucketUtil.hash(timestampNsVal.value())) + .as("Spec example: hash(2017-11-16T22:31:08.000000001) = -160215926") + .isEqualTo(-160215926); + + Literal timestamptzNsVal = + Literal.of("2017-11-16T14:31:08-08:00").to(Types.TimestampType.nanosWithZone()); + assertThat(BucketUtil.hash(timestamptzNsVal.value())) + .as("Spec example: hash(2017-11-16T14:31:08-08:00) = -737750069") + .isEqualTo(-737750069); + + timestamptzNsVal = + Literal.of("2017-11-16T14:31:08.000001-08:00").to(Types.TimestampType.nanosWithZone()); + assertThat(BucketUtil.hash(timestamptzNsVal.value())) + .as("Spec example: hash(2017-11-16T14:31:08.000001-08:00) = -976603392") + .isEqualTo(-976603392); + + timestamptzNsVal = + Literal.of("2017-11-16T14:31:08.000000001-08:00").to(Types.TimestampType.nanosWithZone()); + assertThat(BucketUtil.hash(timestamptzNsVal.value())) + .as("Spec example: hash(2017-11-16T14:31:08.000000001-08:00) = -160215926") + .isEqualTo(-160215926); + assertThat(BucketUtil.hash("iceberg")) .as("Spec example: hash(\"iceberg\") = 1210000089") .isEqualTo(1210000089); diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestIdentity.java b/api/src/test/java/org/apache/iceberg/transforms/TestIdentity.java index 6101fdf0986d..8d651bf618dd 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestIdentity.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestIdentity.java @@ -98,7 +98,7 @@ public void testTimeHumanString() { @Test public void testTimestampWithZoneHumanString() { - Types.TimestampType timestamptz = Types.TimestampType.withZone(); + Types.TimestampType timestamptz = Types.TimestampType.microsWithZone(); Transform identity = Transforms.identity(); Literal ts = Literal.of("2017-12-01T10:12:55.038194-08:00").to(timestamptz); @@ -111,7 +111,7 @@ public void testTimestampWithZoneHumanString() { @Test public void testTimestampWithoutZoneHumanString() { - Types.TimestampType timestamp = Types.TimestampType.withoutZone(); + Types.TimestampType timestamp = Types.TimestampType.microsWithoutZone(); Transform identity = Transforms.identity(); String tsString = "2017-12-01T10:12:55.038194"; diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestProjection.java b/api/src/test/java/org/apache/iceberg/transforms/TestProjection.java index ccfda895f9f1..ffc48fc0e9aa 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestProjection.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestProjection.java @@ -268,10 +268,10 @@ public void testBadSparkPartitionFilter() { public void testProjectionNames() { final Schema schema = new Schema( - required(1, "timestamp1", Types.TimestampType.withoutZone()), - optional(2, "timestamp2", Types.TimestampType.withoutZone()), - optional(3, "timestamp3", Types.TimestampType.withoutZone()), - optional(4, "timestamp4", Types.TimestampType.withoutZone()), + required(1, "timestamp1", Types.TimestampType.microsWithoutZone()), + optional(2, "timestamp2", Types.TimestampType.microsWithoutZone()), + optional(3, "timestamp3", Types.TimestampType.microsWithoutZone()), + optional(4, "timestamp4", Types.TimestampType.microsWithoutZone()), optional(5, "date1", Types.DateType.get()), optional(6, "date2", Types.DateType.get()), optional(7, "date3", Types.DateType.get()), diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestResiduals.java b/api/src/test/java/org/apache/iceberg/transforms/TestResiduals.java index fa3436e5701b..87bdd6944a73 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestResiduals.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestResiduals.java @@ -199,19 +199,23 @@ public void testIn() { public void testInTimestamp() { Schema schema = new Schema( - Types.NestedField.optional(50, "ts", Types.TimestampType.withoutZone()), + Types.NestedField.optional(50, "ts", Types.TimestampType.microsWithoutZone()), Types.NestedField.optional(51, "dateint", Types.IntegerType.get())); Long date20191201 = (Long) - Literal.of("2019-12-01T00:00:00.00000").to(Types.TimestampType.withoutZone()).value(); + Literal.of("2019-12-01T00:00:00.00000") + .to(Types.TimestampType.microsWithoutZone()) + .value(); Long date20191202 = (Long) - Literal.of("2019-12-02T00:00:00.00000").to(Types.TimestampType.withoutZone()).value(); + Literal.of("2019-12-02T00:00:00.00000") + .to(Types.TimestampType.microsWithoutZone()) + .value(); PartitionSpec spec = PartitionSpec.builderFor(schema).day("ts").build(); - Function day = Transforms.day().bind(Types.TimestampType.withoutZone()); + Function day = Transforms.day().bind(Types.TimestampType.microsWithoutZone()); Integer tsDay = day.apply(date20191201); Expression pred = in("ts", date20191201, date20191202); @@ -307,19 +311,23 @@ public void testNotNaN() { public void testNotInTimestamp() { Schema schema = new Schema( - Types.NestedField.optional(50, "ts", Types.TimestampType.withoutZone()), + Types.NestedField.optional(50, "ts", Types.TimestampType.microsWithoutZone()), Types.NestedField.optional(51, "dateint", Types.IntegerType.get())); Long date20191201 = (Long) - Literal.of("2019-12-01T00:00:00.00000").to(Types.TimestampType.withoutZone()).value(); + Literal.of("2019-12-01T00:00:00.00000") + .to(Types.TimestampType.microsWithoutZone()) + .value(); Long date20191202 = (Long) - Literal.of("2019-12-02T00:00:00.00000").to(Types.TimestampType.withoutZone()).value(); + Literal.of("2019-12-02T00:00:00.00000") + .to(Types.TimestampType.microsWithoutZone()) + .value(); PartitionSpec spec = PartitionSpec.builderFor(schema).day("ts").build(); - Function day = Transforms.day().bind(Types.TimestampType.withoutZone()); + Function day = Transforms.day().bind(Types.TimestampType.microsWithoutZone()); Integer tsDay = day.apply(date20191201); Expression pred = notIn("ts", date20191201, date20191202); diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java b/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java index 3c37e643eb95..dee5d8265b04 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java @@ -29,7 +29,7 @@ public class TestTimestamps { @Test @SuppressWarnings("deprecation") public void testDeprecatedTimestampTransform() { - Types.TimestampType type = Types.TimestampType.withoutZone(); + Types.TimestampType type = Types.TimestampType.microsWithoutZone(); Literal ts = Literal.of("2017-12-01T10:12:55.038194").to(type); Literal pts = Literal.of("1970-01-01T00:00:01.000001").to(type); Literal nts = Literal.of("1969-12-31T23:59:58.999999").to(type); @@ -61,7 +61,7 @@ public void testDeprecatedTimestampTransform() { @Test public void testTimestampTransform() { - Types.TimestampType type = Types.TimestampType.withoutZone(); + Types.TimestampType type = Types.TimestampType.microsWithoutZone(); Literal ts = Literal.of("2017-12-01T10:12:55.038194").to(type); Literal pts = Literal.of("1970-01-01T00:00:01.000001").to(type); Literal nts = Literal.of("1969-12-31T23:59:58.999999").to(type); @@ -105,7 +105,7 @@ public void testTimestampTransform() { @Test public void testTimestampWithoutZoneToHumanString() { - Types.TimestampType type = Types.TimestampType.withoutZone(); + Types.TimestampType type = Types.TimestampType.microsWithoutZone(); Literal date = Literal.of("2017-12-01T10:12:55.038194").to(type); Transform year = Transforms.year(); @@ -125,7 +125,7 @@ public void testTimestampWithoutZoneToHumanString() { @Test public void testNegativeTimestampWithoutZoneToHumanString() { - Types.TimestampType type = Types.TimestampType.withoutZone(); + Types.TimestampType type = Types.TimestampType.microsWithoutZone(); Literal date = Literal.of("1969-12-30T10:12:55.038194").to(type); Transform year = Transforms.year(); @@ -145,7 +145,7 @@ public void testNegativeTimestampWithoutZoneToHumanString() { @Test public void testNegativeTimestampWithoutZoneToHumanStringLowerBound() { - Types.TimestampType type = Types.TimestampType.withoutZone(); + Types.TimestampType type = Types.TimestampType.microsWithoutZone(); Literal date = Literal.of("1969-12-30T00:00:00.000000").to(type); Transform year = Transforms.year(); @@ -165,7 +165,7 @@ public void testNegativeTimestampWithoutZoneToHumanStringLowerBound() { @Test public void testNegativeTimestampWithoutZoneToHumanStringUpperBound() { - Types.TimestampType type = Types.TimestampType.withoutZone(); + Types.TimestampType type = Types.TimestampType.microsWithoutZone(); Literal date = Literal.of("1969-12-31T23:59:59.999999").to(type); Transform year = Transforms.year(); @@ -185,7 +185,7 @@ public void testNegativeTimestampWithoutZoneToHumanStringUpperBound() { @Test public void testTimestampWithZoneToHumanString() { - Types.TimestampType type = Types.TimestampType.withZone(); + Types.TimestampType type = Types.TimestampType.microsWithZone(); Literal date = Literal.of("2017-12-01T10:12:55.038194-08:00").to(type); Transform year = Transforms.year(); @@ -206,7 +206,7 @@ public void testTimestampWithZoneToHumanString() { @Test public void testNullHumanString() { - Types.TimestampType type = Types.TimestampType.withZone(); + Types.TimestampType type = Types.TimestampType.microsWithZone(); assertThat(Transforms.year().toHumanString(type, null)) .as("Should produce \"null\" for null") .isEqualTo("null"); @@ -223,7 +223,7 @@ public void testNullHumanString() { @Test public void testTimestampsReturnType() { - Types.TimestampType type = Types.TimestampType.withZone(); + Types.TimestampType type = Types.TimestampType.microsWithZone(); Transform year = Transforms.year(); Type yearResultType = year.getResultType(type); diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestTimestampsProjection.java b/api/src/test/java/org/apache/iceberg/transforms/TestTimestampsProjection.java index cd20868a06eb..dc0f199db132 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestTimestampsProjection.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestTimestampsProjection.java @@ -44,7 +44,7 @@ import org.junit.jupiter.api.Test; public class TestTimestampsProjection { - private static final Types.TimestampType TYPE = Types.TimestampType.withoutZone(); + private static final Types.TimestampType TYPE = Types.TimestampType.microsWithoutZone(); private static final Schema SCHEMA = new Schema(optional(1, "timestamp", TYPE)); @SuppressWarnings("unchecked") diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestTransformSerialization.java b/api/src/test/java/org/apache/iceberg/transforms/TestTransformSerialization.java index c2330247fa9d..70b5a16e3bb3 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestTransformSerialization.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestTransformSerialization.java @@ -39,8 +39,8 @@ public void testFunctionSerialization() throws Exception { Types.StringType.get(), Types.DateType.get(), Types.TimeType.get(), - Types.TimestampType.withoutZone(), - Types.TimestampType.withoutZone(), + Types.TimestampType.microsWithoutZone(), + Types.TimestampType.microsWithoutZone(), Types.BinaryType.get(), Types.FixedType.ofLength(4), Types.DecimalType.of(9, 4), diff --git a/api/src/test/java/org/apache/iceberg/types/TestComparators.java b/api/src/test/java/org/apache/iceberg/types/TestComparators.java index 165d96c029cc..a04b039e5d65 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestComparators.java +++ b/api/src/test/java/org/apache/iceberg/types/TestComparators.java @@ -75,8 +75,10 @@ public void testTime() { @Test public void testTimestamp() { - assertComparesCorrectly(Comparators.forType(Types.TimestampType.withoutZone()), 111, 222); - assertComparesCorrectly(Comparators.forType(Types.TimestampType.withZone()), 111, 222); + assertComparesCorrectly(Comparators.forType(Types.TimestampType.microsWithoutZone()), 111, 222); + assertComparesCorrectly(Comparators.forType(Types.TimestampType.microsWithZone()), 111, 222); + assertComparesCorrectly(Comparators.forType(Types.TimestampType.nanosWithoutZone()), 111, 222); + assertComparesCorrectly(Comparators.forType(Types.TimestampType.nanosWithZone()), 111, 222); } @Test diff --git a/api/src/test/java/org/apache/iceberg/types/TestConversions.java b/api/src/test/java/org/apache/iceberg/types/TestConversions.java index 6c7a884a5839..464f99652c60 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestConversions.java +++ b/api/src/test/java/org/apache/iceberg/types/TestConversions.java @@ -93,15 +93,25 @@ public void testByteBufferConversions() { assertThat(Literal.of(10000L).to(TimeType.get()).toByteBuffer().array()) .isEqualTo(new byte[] {16, 39, 0, 0, 0, 0, 0, 0}); - // timestamps are stored as microseconds from 1970-01-01 00:00:00.000000 in an 8-byte + // timestamps are stored as micro|nanoseconds from 1970-01-01 00:00:00 in an 8-byte // little-endian long // 400000L is 0...110|00011010|10000000 in binary // 10000000 -> -128, 00011010 -> 26, 00000110 -> 6, ... , 00000000 -> 0 - assertConversion(400000L, TimestampType.withoutZone(), new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); - assertConversion(400000L, TimestampType.withZone(), new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); - assertThat(Literal.of(400000L).to(TimestampType.withoutZone()).toByteBuffer().array()) + assertConversion( + 400000L, TimestampType.microsWithoutZone(), new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); + assertConversion( + 400000L, TimestampType.microsWithZone(), new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); + assertThat(Literal.of(400000L).to(TimestampType.microsWithoutZone()).toByteBuffer().array()) + .isEqualTo(new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); + assertThat(Literal.of(400000L).to(TimestampType.microsWithZone()).toByteBuffer().array()) + .isEqualTo(new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); + assertConversion( + 400000L, TimestampType.nanosWithoutZone(), new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); + assertConversion( + 400000L, TimestampType.nanosWithZone(), new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); + assertThat(Literal.of(400000L).to(TimestampType.nanosWithoutZone()).toByteBuffer().array()) .isEqualTo(new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); - assertThat(Literal.of(400000L).to(TimestampType.withZone()).toByteBuffer().array()) + assertThat(Literal.of(400000L).to(TimestampType.nanosWithZone()).toByteBuffer().array()) .isEqualTo(new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); // strings are stored as UTF-8 bytes (without length) diff --git a/api/src/test/java/org/apache/iceberg/types/TestReadabilityChecks.java b/api/src/test/java/org/apache/iceberg/types/TestReadabilityChecks.java index 7f5948bd5838..56a011263995 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestReadabilityChecks.java +++ b/api/src/test/java/org/apache/iceberg/types/TestReadabilityChecks.java @@ -37,8 +37,10 @@ public class TestReadabilityChecks { Types.DoubleType.get(), Types.DateType.get(), Types.TimeType.get(), - Types.TimestampType.withoutZone(), - Types.TimestampType.withZone(), + Types.TimestampType.microsWithoutZone(), + Types.TimestampType.microsWithZone(), + Types.TimestampType.nanosWithoutZone(), + Types.TimestampType.nanosWithZone(), Types.StringType.get(), Types.UUIDType.get(), Types.FixedType.ofLength(3), diff --git a/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java b/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java index d981b5a26789..52cb95dcba03 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java +++ b/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java @@ -39,8 +39,10 @@ public void testIdentityTypes() throws Exception { Types.DoubleType.get(), Types.DateType.get(), Types.TimeType.get(), - Types.TimestampType.withoutZone(), - Types.TimestampType.withZone(), + Types.TimestampType.microsWithoutZone(), + Types.TimestampType.microsWithZone(), + Types.TimestampType.nanosWithoutZone(), + Types.TimestampType.nanosWithZone(), Types.StringType.get(), Types.UUIDType.get(), Types.BinaryType.get() diff --git a/api/src/test/java/org/apache/iceberg/types/TestTypes.java b/api/src/test/java/org/apache/iceberg/types/TestTypes.java index ca5c6edce16b..088c042de550 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestTypes.java +++ b/api/src/test/java/org/apache/iceberg/types/TestTypes.java @@ -29,7 +29,9 @@ public void fromPrimitiveString() { Assertions.assertThat(Types.fromPrimitiveString("BooLean")).isSameAs(Types.BooleanType.get()); Assertions.assertThat(Types.fromPrimitiveString("timestamp")) - .isSameAs(Types.TimestampType.withoutZone()); + .isSameAs(Types.TimestampType.microsWithoutZone()); + Assertions.assertThat(Types.fromPrimitiveString("timestamp_ns")) + .isSameAs(Types.TimestampType.nanosWithoutZone()); Assertions.assertThat(Types.fromPrimitiveString("Fixed[ 3 ]")) .isEqualTo(Types.FixedType.ofLength(3)); From b283a5a1808e7eff4e5a7dfe38a4ff26dde52927 Mon Sep 17 00:00:00 2001 From: Eric Gillespie Date: Tue, 26 Mar 2024 10:54:41 -0500 Subject: [PATCH 02/38] Redo as separate type --- .../expressions/BoundLiteralPredicate.java | 1 + .../iceberg/expressions/ExpressionUtil.java | 52 +++---- .../apache/iceberg/expressions/Literals.java | 119 +++++++++------- .../org/apache/iceberg/transforms/Bucket.java | 2 + .../org/apache/iceberg/transforms/Days.java | 13 +- .../org/apache/iceberg/transforms/Hours.java | 25 ++-- .../org/apache/iceberg/transforms/Months.java | 11 +- .../iceberg/transforms/TimeTransform.java | 4 +- .../apache/iceberg/transforms/Timestamps.java | 31 ++-- .../apache/iceberg/transforms/Transform.java | 14 +- .../iceberg/transforms/TransformUtil.java | 35 ++--- .../apache/iceberg/transforms/Transforms.java | 71 ++++------ .../org/apache/iceberg/transforms/Years.java | 10 +- .../org/apache/iceberg/types/Comparators.java | 8 +- .../org/apache/iceberg/types/Conversions.java | 2 + .../java/org/apache/iceberg/types/Type.java | 1 + .../org/apache/iceberg/types/TypeUtil.java | 1 + .../java/org/apache/iceberg/types/Types.java | 118 ++++++++-------- .../org/apache/iceberg/util/DateTimeUtil.java | 132 +++--------------- .../apache/iceberg/PartitionSpecTestBase.java | 4 +- .../org/apache/iceberg/TestAccessors.java | 8 +- .../apache/iceberg/TestPartitionPaths.java | 10 +- .../iceberg/TestPartitionSpecValidation.java | 8 +- .../expressions/TestExpressionUtil.java | 37 +++-- .../expressions/TestLiteralSerialization.java | 8 +- .../TestMiscLiteralConversions.java | 94 ++++++------- .../TestStringLiteralConversions.java | 26 ++-- .../iceberg/transforms/TestBucketing.java | 23 ++- .../iceberg/transforms/TestIdentity.java | 4 +- .../iceberg/transforms/TestProjection.java | 8 +- .../iceberg/transforms/TestResiduals.java | 24 ++-- .../iceberg/transforms/TestTimestamps.java | 18 +-- .../transforms/TestTimestampsProjection.java | 2 +- .../TestTransformSerialization.java | 4 +- .../apache/iceberg/types/TestComparators.java | 8 +- .../apache/iceberg/types/TestConversions.java | 19 ++- .../iceberg/types/TestReadabilityChecks.java | 8 +- .../iceberg/types/TestSerializableTypes.java | 8 +- .../org/apache/iceberg/types/TestTypes.java | 4 +- 39 files changed, 439 insertions(+), 536 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/expressions/BoundLiteralPredicate.java b/api/src/main/java/org/apache/iceberg/expressions/BoundLiteralPredicate.java index 02dc31c6a6c5..127d46e6a48f 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/BoundLiteralPredicate.java +++ b/api/src/main/java/org/apache/iceberg/expressions/BoundLiteralPredicate.java @@ -31,6 +31,7 @@ public class BoundLiteralPredicate extends BoundPredicate { Type.TypeID.LONG, Type.TypeID.DATE, Type.TypeID.TIME, + Type.TypeID.TIMESTAMP_NANO, Type.TypeID.TIMESTAMP); private static long toLong(Literal lit) { diff --git a/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java b/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java index d8e764a4c4ef..57d306d98813 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java +++ b/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java @@ -36,7 +36,6 @@ import org.apache.iceberg.transforms.Transforms; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; -import org.apache.iceberg.util.DateTimeUtil; /** Expression utility methods. */ public class ExpressionUtil { @@ -44,20 +43,22 @@ public class ExpressionUtil { Transforms.bucket(Integer.MAX_VALUE).bind(Types.StringType.get()); private static final OffsetDateTime EPOCH = Instant.ofEpochSecond(0).atOffset(ZoneOffset.UTC); private static final long FIVE_MINUTES_IN_MICROS = TimeUnit.MINUTES.toMicros(5); + private static final long FIVE_MINUTES_IN_NANOS = TimeUnit.MINUTES.toNanos(5); private static final long THREE_DAYS_IN_HOURS = TimeUnit.DAYS.toHours(3); private static final long NINETY_DAYS_IN_HOURS = TimeUnit.DAYS.toHours(90); private static final Pattern DATE = Pattern.compile("\\d{4}-\\d{2}-\\d{2}"); private static final Pattern TIME = Pattern.compile("\\d{2}:\\d{2}(:\\d{2}(.\\d{1,9})?)?"); private static final Pattern TIMESTAMP = - Pattern.compile("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{1,6})?)?"); - private static final Pattern TIMESTAMPNS = - Pattern.compile("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{7,9})?)?"); + Pattern.compile("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{1,9})?)?"); private static final Pattern TIMESTAMPTZ = Pattern.compile( - "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{1,6})?)?([-+]\\d{2}:\\d{2}|Z)"); + "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{1,9})?)?([-+]\\d{2}:\\d{2}|Z)"); + private static final Pattern TIMESTAMPNS = + Pattern.compile("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{7,9})?)?"); private static final Pattern TIMESTAMPTZNS = Pattern.compile( "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{7,9})?)?([-+]\\d{2}:\\d{2}|Z)"); + static final int LONG_IN_PREDICATE_ABBREVIATION_THRESHOLD = 10; private static final int LONG_IN_PREDICATE_ABBREVIATION_MIN_GAIN = 5; @@ -520,7 +521,9 @@ private static String sanitize(Type type, Object value, long now, int today) { case TIME: return "(time)"; case TIMESTAMP: - return sanitizeTimestamp(((Types.TimestampType) type).unit(), (long) value, now); + return sanitizeTimestamp((long) value, now); + case TIMESTAMP_NANO: + return sanitizeTimestamp((long) value / 1000, now); case STRING: return sanitizeString((CharSequence) value, now, today); case BOOLEAN: @@ -541,8 +544,9 @@ private static String sanitize(Literal literal, long now, int today) { } else if (literal instanceof Literals.DateLiteral) { return sanitizeDate(((Literals.DateLiteral) literal).value(), today); } else if (literal instanceof Literals.TimestampLiteral) { - Literals.TimestampLiteral tsLiteral = ((Literals.TimestampLiteral) literal); - return sanitizeTimestamp(tsLiteral.unit(), tsLiteral.value(), now); + return sanitizeTimestamp(((Literals.TimestampLiteral) literal).value(), now); + } else if (literal instanceof Literals.TimestampNanoLiteral) { + return sanitizeTimestamp(((Literals.TimestampNanoLiteral) literal).value() / 1000, now); } else if (literal instanceof Literals.TimeLiteral) { return "(time)"; } else if (literal instanceof Literals.IntegerLiteral) { @@ -571,18 +575,8 @@ private static String sanitizeDate(int days, int today) { return "(date)"; } - private static String sanitizeTimestamp(Types.TimestampType.Unit unit, long timeUnits, long now) { - final long micros; - switch (unit) { - case MICROS: - micros = timeUnits; - break; - case NANOS: - micros = DateTimeUtil.nanosToMicros(timeUnits); - break; - default: - throw new UnsupportedOperationException("Unsupported timestamp unit: " + unit); - } + // TODO(epg): `now` is millisecond resolution; shouldn't this be too? + private static String sanitizeTimestamp(long micros, long now) { String isPast = now > micros ? "ago" : "from-now"; long diff = Math.abs(now - micros); if (diff < FIVE_MINUTES_IN_MICROS) { @@ -613,17 +607,17 @@ private static String sanitizeString(CharSequence value, long now, int today) { Literal date = Literal.of(value).to(Types.DateType.get()); return sanitizeDate(date.value(), today); } else if (TIMESTAMP.matcher(value).matches()) { - Literal ts = Literal.of(value).to(Types.TimestampType.microsWithoutZone()); - return sanitizeTimestamp(Types.TimestampType.Unit.MICROS, ts.value(), now); - } else if (TIMESTAMPNS.matcher(value).matches()) { - Literal ts = Literal.of(value).to(Types.TimestampType.nanosWithoutZone()); - return sanitizeTimestamp(Types.TimestampType.Unit.NANOS, ts.value(), now); + Literal ts = Literal.of(value).to(Types.TimestampType.withoutZone()); + return sanitizeTimestamp(ts.value(), now); } else if (TIMESTAMPTZ.matcher(value).matches()) { - Literal ts = Literal.of(value).to(Types.TimestampType.microsWithZone()); - return sanitizeTimestamp(Types.TimestampType.Unit.MICROS, ts.value(), now); + Literal ts = Literal.of(value).to(Types.TimestampType.withZone()); + return sanitizeTimestamp(ts.value(), now); + } else if (TIMESTAMPNS.matcher(value).matches()) { + Literal ts = Literal.of(value).to(Types.TimestampNanoType.withoutZone()); + return sanitizeTimestamp(ts.value() / 1000, now); } else if (TIMESTAMPTZNS.matcher(value).matches()) { - Literal ts = Literal.of(value).to(Types.TimestampType.nanosWithZone()); - return sanitizeTimestamp(Types.TimestampType.Unit.NANOS, ts.value(), now); + Literal ts = Literal.of(value).to(Types.TimestampNanoType.withZone()); + return sanitizeTimestamp(ts.value() / 1000, now); } else if (TIME.matcher(value).matches()) { return "(time)"; } else { diff --git a/api/src/main/java/org/apache/iceberg/expressions/Literals.java b/api/src/main/java/org/apache/iceberg/expressions/Literals.java index aaa2f8efc15f..600484a0be3e 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/Literals.java +++ b/api/src/main/java/org/apache/iceberg/expressions/Literals.java @@ -24,6 +24,7 @@ import java.nio.ByteBuffer; import java.time.Instant; import java.time.LocalDate; +import java.time.LocalDateTime; import java.time.LocalTime; import java.time.OffsetDateTime; import java.time.ZoneOffset; @@ -38,7 +39,6 @@ import org.apache.iceberg.types.Conversions; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; -import org.apache.iceberg.types.Types.TimestampType; import org.apache.iceberg.util.ByteBuffers; import org.apache.iceberg.util.DateTimeUtil; import org.apache.iceberg.util.NaNUtil; @@ -299,7 +299,9 @@ public Literal to(Type type) { case TIME: return (Literal) new TimeLiteral(value()); case TIMESTAMP: - return (Literal) new TimestampLiteral(((TimestampType) type).unit(), value()); + return (Literal) new TimestampLiteral(value()); + case TIMESTAMP_NANO: + return (Literal) new TimestampNanoLiteral(value()); case DATE: if ((long) Integer.MAX_VALUE < value()) { return aboveMax(); @@ -427,11 +429,8 @@ protected Type.TypeID typeId() { } static class TimestampLiteral extends ComparableLiteral { - private final TimestampType.Unit unit; - - TimestampLiteral(TimestampType.Unit unit, Long value) { + TimestampLiteral(Long value) { super(value); - this.unit = unit; } @Override @@ -439,43 +438,16 @@ static class TimestampLiteral extends ComparableLiteral { public Literal to(Type type) { switch (type.typeId()) { case TIMESTAMP: - TimestampType.Unit toUnit = ((TimestampType) type).unit(); - switch (unit) { - case MICROS: - switch (toUnit) { - case MICROS: - return (Literal) this; - case NANOS: - return (Literal) - new TimestampLiteral(unit, DateTimeUtil.microsToNanos(value())); - } - break; - case NANOS: - switch (toUnit) { - case MICROS: - return (Literal) - new TimestampLiteral(unit, DateTimeUtil.nanosToMicros(value())); - case NANOS: - return (Literal) this; - } - break; - } - break; + return (Literal) this; case DATE: - switch (unit) { - case MICROS: - return (Literal) - new DateLiteral( - (int) - ChronoUnit.DAYS.between( - EPOCH_DAY, EPOCH.plus(value(), ChronoUnit.MICROS).toLocalDate())); - case NANOS: - return (Literal) - new DateLiteral( - (int) - ChronoUnit.DAYS.between( - EPOCH_DAY, EPOCH.plusNanos(value()).toLocalDate())); - } + return (Literal) + new DateLiteral( + (int) + ChronoUnit.DAYS.between( + EPOCH_DAY, EPOCH.plus(value(), ChronoUnit.MICROS).toLocalDate())); + case TIMESTAMP_NANO: + return (Literal) new TimestampNanoLiteral(DateTimeUtil.microsToNanos(value())); + default: } return null; } @@ -484,9 +456,33 @@ public Literal to(Type type) { protected Type.TypeID typeId() { return Type.TypeID.TIMESTAMP; } + } + + static class TimestampNanoLiteral extends ComparableLiteral { + TimestampNanoLiteral(Long value) { + super(value); + } + + @Override + @SuppressWarnings("unchecked") + public Literal to(Type type) { + switch (type.typeId()) { + case DATE: + return (Literal) + new DateLiteral( + (int) ChronoUnit.DAYS.between(EPOCH_DAY, EPOCH.plusNanos(value()).toLocalDate())); + case TIMESTAMP: + return (Literal) new TimestampLiteral(DateTimeUtil.nanosToMicros(value())); + case TIMESTAMP_NANO: + return (Literal) this; + default: + } + return null; + } - protected TimestampType.Unit unit() { - return unit; + @Override + protected Type.TypeID typeId() { + return Type.TypeID.TIMESTAMP_NANO; } } @@ -538,17 +534,34 @@ public Literal to(Type type) { return (Literal) new TimeLiteral(timeMicros); case TIMESTAMP: - final TimestampType tsType = (TimestampType) type; - final String value = value().toString(); - final java.time.temporal.Temporal valueAsTemporal; - if (tsType.shouldAdjustToUTC()) { - valueAsTemporal = DateTimeUtil.isoTimestamptzToOffsetDateTime(value); + if (((Types.TimestampType) type).shouldAdjustToUTC()) { + long timestampMicros = + ChronoUnit.MICROS.between( + EPOCH, OffsetDateTime.parse(value(), DateTimeFormatter.ISO_DATE_TIME)); + return (Literal) new TimestampLiteral(timestampMicros); + } else { + long timestampMicros = + ChronoUnit.MICROS.between( + EPOCH, + LocalDateTime.parse(value(), DateTimeFormatter.ISO_LOCAL_DATE_TIME) + .atOffset(ZoneOffset.UTC)); + return (Literal) new TimestampLiteral(timestampMicros); + } + + case TIMESTAMP_NANO: + if (((Types.TimestampNanoType) type).shouldAdjustToUTC()) { + long timestampNanos = + ChronoUnit.NANOS.between( + EPOCH, OffsetDateTime.parse(value(), DateTimeFormatter.ISO_DATE_TIME)); + return (Literal) new TimestampNanoLiteral(timestampNanos); } else { - valueAsTemporal = - DateTimeUtil.isoTimestampToLocalDateTime(value).atOffset(ZoneOffset.UTC); + long timestampNanos = + ChronoUnit.NANOS.between( + EPOCH, + LocalDateTime.parse(value(), DateTimeFormatter.ISO_LOCAL_DATE_TIME) + .atOffset(ZoneOffset.UTC)); + return (Literal) new TimestampNanoLiteral(timestampNanos); } - final long timestampUnits = tsType.unit().between(EPOCH, valueAsTemporal); - return (Literal) new TimestampLiteral(tsType.unit(), timestampUnits); case STRING: return (Literal) this; diff --git a/api/src/main/java/org/apache/iceberg/transforms/Bucket.java b/api/src/main/java/org/apache/iceberg/transforms/Bucket.java index 912bcd271725..e91a8c6cc231 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Bucket.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Bucket.java @@ -54,6 +54,7 @@ static & SerializableFunction> B get( return (B) new BucketInteger(numBuckets); case TIME: case TIMESTAMP: + case TIMESTAMP_NANO: case LONG: return (B) new BucketLong(numBuckets); case DECIMAL: @@ -107,6 +108,7 @@ public boolean canTransform(Type type) { case DATE: case TIME: case TIMESTAMP: + case TIMESTAMP_NANO: case STRING: case BINARY: case FIXED: diff --git a/api/src/main/java/org/apache/iceberg/transforms/Days.java b/api/src/main/java/org/apache/iceberg/transforms/Days.java index b4dee3749604..d351a7c387e4 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Days.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Days.java @@ -38,7 +38,9 @@ protected Transform toEnum(Type type) { case DATE: return (Transform) Dates.DAY; case TIMESTAMP: - return (Transform) Timestamps.get((Types.TimestampType) type, ChronoUnit.DAYS); + return (Transform) Timestamps.DAY_FROM_MICROS; + case TIMESTAMP_NANO: + return (Transform) Timestamps.DAY_FROM_NANOS; default: throw new IllegalArgumentException("Unsupported type: " + type); } @@ -56,15 +58,20 @@ public boolean satisfiesOrderOf(Transform other) { } if (other instanceof Timestamps) { + // TODO(epg): "I'd prefer to keep the logic of this check inside of the satisfiesOrderOf + // function" + // https://github.com/apache/iceberg/pull/9008#discussion_r1520360025 ChronoUnit otherResultTypeUnit = ((Timestamps) other).getResultTypeUnit(); return otherResultTypeUnit == ChronoUnit.DAYS || otherResultTypeUnit == ChronoUnit.MONTHS || otherResultTypeUnit == ChronoUnit.YEARS; } else if (other instanceof Dates) { return Dates.DAY.satisfiesOrderOf(other); - } else { - return other instanceof Days || other instanceof Months || other instanceof Years; + } else if (other instanceof Days || other instanceof Months || other instanceof Years) { + return true; } + + return false; } @Override diff --git a/api/src/main/java/org/apache/iceberg/transforms/Hours.java b/api/src/main/java/org/apache/iceberg/transforms/Hours.java index 3ceeec9417a7..a5eaa01678a7 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Hours.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Hours.java @@ -34,16 +34,19 @@ static Hours get() { @Override @SuppressWarnings("unchecked") protected Transform toEnum(Type type) { - if (type.typeId() == Type.TypeID.TIMESTAMP) { - return (Transform) Timestamps.get((Types.TimestampType) type, ChronoUnit.HOURS); + switch (type.typeId()) { + case TIMESTAMP: + return (Transform) Timestamps.HOUR_FROM_MICROS; + case TIMESTAMP_NANO: + return (Transform) Timestamps.HOUR_FROM_NANOS; + default: + throw new IllegalArgumentException("Unsupported type: " + type); } - - throw new IllegalArgumentException("Unsupported type: " + type); } @Override public boolean canTransform(Type type) { - return type.typeId() == Type.TypeID.TIMESTAMP; + return type.typeId() == Type.TypeID.TIMESTAMP || type.typeId() == Type.TypeID.TIMESTAMP_NANO; } @Override @@ -63,12 +66,14 @@ public boolean satisfiesOrderOf(Transform other) { || otherResultTypeUnit == ChronoUnit.DAYS || otherResultTypeUnit == ChronoUnit.MONTHS || otherResultTypeUnit == ChronoUnit.YEARS; - } else { - return other instanceof Hours - || other instanceof Days - || other instanceof Months - || other instanceof Years; + } else if (other instanceof Hours + || other instanceof Days + || other instanceof Months + || other instanceof Years) { + return true; } + + return false; } @Override diff --git a/api/src/main/java/org/apache/iceberg/transforms/Months.java b/api/src/main/java/org/apache/iceberg/transforms/Months.java index cbdee19e03ea..ddfcd42dd641 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Months.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Months.java @@ -38,8 +38,9 @@ protected Transform toEnum(Type type) { case DATE: return (Transform) Dates.MONTH; case TIMESTAMP: - return (Transform) - Timestamps.get((Types.TimestampType) type, ChronoUnit.MONTHS); + return (Transform) Timestamps.MONTH_FROM_MICROS; + case TIMESTAMP_NANO: + return (Transform) Timestamps.MONTH_FROM_NANOS; default: throw new IllegalArgumentException("Unsupported type: " + type); } @@ -61,9 +62,11 @@ public boolean satisfiesOrderOf(Transform other) { return otherResultTypeUnit == ChronoUnit.MONTHS || otherResultTypeUnit == ChronoUnit.YEARS; } else if (other instanceof Dates) { return Dates.MONTH.satisfiesOrderOf(other); - } else { - return other instanceof Months || other instanceof Years; + } else if (other instanceof Months || other instanceof Years) { + return true; } + + return false; } @Override diff --git a/api/src/main/java/org/apache/iceberg/transforms/TimeTransform.java b/api/src/main/java/org/apache/iceberg/transforms/TimeTransform.java index 01ea8130aa60..23ee38271e81 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/TimeTransform.java +++ b/api/src/main/java/org/apache/iceberg/transforms/TimeTransform.java @@ -39,7 +39,9 @@ public boolean preservesOrder() { @Override public boolean canTransform(Type type) { - return type.typeId() == Type.TypeID.DATE || type.typeId() == Type.TypeID.TIMESTAMP; + return type.typeId() == Type.TypeID.DATE + || type.typeId() == Type.TypeID.TIMESTAMP + || type.typeId() == Type.TypeID.TIMESTAMP_NANO; } @Override diff --git a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java index bf203262afcc..642a67dae9ee 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java @@ -29,7 +29,6 @@ import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; -import org.apache.iceberg.types.Types.TimestampType; import org.apache.iceberg.util.DateTimeUtil; import org.apache.iceberg.util.SerializableFunction; @@ -44,7 +43,7 @@ class Timestamps implements Transform { static final Timestamps DAY_FROM_NANOS = new Timestamps(ChronoUnit.NANOS, ChronoUnit.DAYS); static final Timestamps HOUR_FROM_NANOS = new Timestamps(ChronoUnit.NANOS, ChronoUnit.HOURS); - static Timestamps get(TimestampType type, String resultTypeUnit) { + static Timestamps get(Types.TimestampType type, String resultTypeUnit) { switch (resultTypeUnit.toLowerCase(Locale.ENGLISH)) { case "year": return get(type, ChronoUnit.YEARS); @@ -60,9 +59,9 @@ static Timestamps get(TimestampType type, String resultTypeUnit) { } } - static Timestamps get(TimestampType type, ChronoUnit resultTypeUnit) { - switch (type.unit()) { - case MICROS: + static Timestamps get(Types.TimestampType type, ChronoUnit resultTypeUnit) { + switch (type.typeId()) { + case TIMESTAMP: switch (resultTypeUnit) { case YEARS: return YEAR_FROM_MICROS; @@ -74,7 +73,7 @@ static Timestamps get(TimestampType type, ChronoUnit resultTypeUnit) { return HOUR_FROM_MICROS; } break; - case NANOS: + case TIMESTAMP_NANO: switch (resultTypeUnit) { case YEARS: return YEAR_FROM_NANOS; @@ -87,8 +86,10 @@ static Timestamps get(TimestampType type, ChronoUnit resultTypeUnit) { } break; default: - throw new UnsupportedOperationException("Unsupported timestamp unit: " + type.unit()); + // `type` is out of range. + throw new UnsupportedOperationException("Unsupported timestamp unit: " + type); } + // `resultTypeUnit` is out of range. throw new IllegalArgumentException( "Unsupported source/result type units: " + type + "->" + resultTypeUnit); } @@ -125,19 +126,7 @@ public Integer apply(Long timestampUnits) { "Unsupported result type unit: " + resultTypeUnit); } case NANOS: - switch (resultTypeUnit) { - case YEARS: - return DateTimeUtil.nanosToYears(timestampUnits); - case MONTHS: - return DateTimeUtil.nanosToMonths(timestampUnits); - case DAYS: - return DateTimeUtil.nanosToDays(timestampUnits); - case HOURS: - return DateTimeUtil.nanosToHours(timestampUnits); - default: - throw new UnsupportedOperationException( - "Unsupported result type unit: " + resultTypeUnit); - } + return DateTimeUtil.convertNanos(timestampUnits, resultTypeUnit); default: throw new UnsupportedOperationException( "Unsupported source type unit: " + sourceTypeUnit); @@ -164,7 +153,7 @@ public SerializableFunction bind(Type type) { @Override public boolean canTransform(Type type) { - return type.typeId() == Type.TypeID.TIMESTAMP; + return type.typeId() == Type.TypeID.TIMESTAMP || type.typeId() == Type.TypeID.TIMESTAMP_NANO; } @Override diff --git a/api/src/main/java/org/apache/iceberg/transforms/Transform.java b/api/src/main/java/org/apache/iceberg/transforms/Transform.java index 0c5e7dd77d0f..78312b58b12f 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Transform.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Transform.java @@ -24,7 +24,7 @@ import org.apache.iceberg.expressions.BoundPredicate; import org.apache.iceberg.expressions.UnboundPredicate; import org.apache.iceberg.types.Type; -import org.apache.iceberg.types.Types.TimestampType; +import org.apache.iceberg.types.Types; import org.apache.iceberg.util.SerializableFunction; /** @@ -176,7 +176,17 @@ default String toHumanString(Type type, T value) { case TIME: return TransformUtil.humanTime((Long) value); case TIMESTAMP: - return TransformUtil.humanTimestamp((TimestampType) type, (Long) value); + if (((Types.TimestampType) type).shouldAdjustToUTC()) { + return TransformUtil.humanTimestampWithZone((Long) value); + } else { + return TransformUtil.humanTimestampWithoutZone((Long) value); + } + case TIMESTAMP_NANO: + if (((Types.TimestampNanoType) type).shouldAdjustToUTC()) { + return TransformUtil.humanTimestampNanoWithZone((Long) value); + } else { + return TransformUtil.humanTimestampNanoWithoutZone((Long) value); + } case FIXED: case BINARY: if (value instanceof ByteBuffer) { diff --git a/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java b/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java index 3679628a3b36..c70a926d58db 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java +++ b/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java @@ -26,7 +26,6 @@ import java.time.ZoneOffset; import java.time.temporal.ChronoUnit; import java.util.Base64; -import org.apache.iceberg.types.Types; class TransformUtil { @@ -55,26 +54,20 @@ static String humanTime(Long microsFromMidnight) { return LocalTime.ofNanoOfDay(microsFromMidnight * 1000).toString(); } - public static String humanTimestamp(Types.TimestampType tsType, Long value) { - if (tsType.shouldAdjustToUTC()) { - switch (tsType.unit()) { - case MICROS: - return ChronoUnit.MICROS.addTo(EPOCH, value).toString(); - case NANOS: - return ChronoUnit.NANOS.addTo(EPOCH, value).toString(); - default: - throw new UnsupportedOperationException("Unsupported timestamp unit: " + tsType.unit()); - } - } else { - switch (tsType.unit()) { - case MICROS: - return ChronoUnit.MICROS.addTo(EPOCH, value).toLocalDateTime().toString(); - case NANOS: - return ChronoUnit.NANOS.addTo(EPOCH, value).toLocalDateTime().toString(); - default: - throw new UnsupportedOperationException("Unsupported timestamp unit: " + tsType.unit()); - } - } + static String humanTimestampWithZone(Long timestampMicros) { + return ChronoUnit.MICROS.addTo(EPOCH, timestampMicros).toString(); + } + + static String humanTimestampWithoutZone(Long timestampMicros) { + return ChronoUnit.MICROS.addTo(EPOCH, timestampMicros).toLocalDateTime().toString(); + } + + static String humanTimestampNanoWithZone(Long timestampMicros) { + return ChronoUnit.NANOS.addTo(EPOCH, timestampMicros).toString(); + } + + static String humanTimestampNanoWithoutZone(Long timestampMicros) { + return ChronoUnit.NANOS.addTo(EPOCH, timestampMicros).toLocalDateTime().toString(); } static String humanHour(int hourOrdinal) { diff --git a/api/src/main/java/org/apache/iceberg/transforms/Transforms.java b/api/src/main/java/org/apache/iceberg/transforms/Transforms.java index 69a91c328f5a..e83759c0b0f5 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Transforms.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Transforms.java @@ -23,9 +23,9 @@ import java.util.regex.Pattern; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Schema; -import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.base.Strings; import org.apache.iceberg.types.Type; -import org.apache.iceberg.types.Types.TimestampType; +import org.apache.iceberg.types.Types; /** * Factory methods for transforms. @@ -86,11 +86,12 @@ private Transforms() {} } try { - if (type.typeId() == Type.TypeID.TIMESTAMP) { - return Timestamps.get((TimestampType) type, transform); - } - if (type.typeId() == Type.TypeID.DATE) { - return Dates.valueOf(transform.toUpperCase(Locale.ENGLISH)); + switch (type.typeId()) { + case TIMESTAMP: + case TIMESTAMP_NANO: + return Timestamps.get((Types.TimestampType) type, transform); + case DATE: + return Dates.valueOf(transform.toUpperCase(Locale.ENGLISH)); } } catch (IllegalArgumentException ignored) { // fall through to return unknown transform @@ -131,15 +132,9 @@ public static Transform year(Type type) { case DATE: return (Transform) Dates.YEAR; case TIMESTAMP: - TimestampType.Unit unit = ((TimestampType) type).unit(); - switch (unit) { - case MICROS: - return (Transform) Timestamps.YEAR_FROM_MICROS; - case NANOS: - return (Transform) Timestamps.YEAR_FROM_NANOS; - default: - throw new UnsupportedOperationException("Unsupported timestamp unit: " + unit); - } + return (Transform) Timestamps.YEAR_FROM_MICROS; + case TIMESTAMP_NANO: + return (Transform) Timestamps.YEAR_FROM_NANOS; default: throw new IllegalArgumentException("Cannot partition type " + type + " by year"); } @@ -160,15 +155,9 @@ public static Transform month(Type type) { case DATE: return (Transform) Dates.MONTH; case TIMESTAMP: - TimestampType.Unit unit = ((TimestampType) type).unit(); - switch (unit) { - case MICROS: - return (Transform) Timestamps.MONTH_FROM_MICROS; - case NANOS: - return (Transform) Timestamps.MONTH_FROM_NANOS; - default: - throw new UnsupportedOperationException("Unsupported timestamp unit: " + unit); - } + return (Transform) Timestamps.MONTH_FROM_MICROS; + case TIMESTAMP_NANO: + return (Transform) Timestamps.MONTH_FROM_NANOS; default: throw new IllegalArgumentException("Cannot partition type " + type + " by month"); } @@ -189,15 +178,9 @@ public static Transform day(Type type) { case DATE: return (Transform) Dates.DAY; case TIMESTAMP: - TimestampType.Unit unit = ((TimestampType) type).unit(); - switch (unit) { - case MICROS: - return (Transform) Timestamps.DAY_FROM_MICROS; - case NANOS: - return (Transform) Timestamps.DAY_FROM_NANOS; - default: - throw new UnsupportedOperationException("Unsupported timestamp unit: " + unit); - } + return (Transform) Timestamps.DAY_FROM_MICROS; + case TIMESTAMP_NANO: + return (Transform) Timestamps.DAY_FROM_NANOS; default: throw new IllegalArgumentException("Cannot partition type " + type + " by day"); } @@ -214,19 +197,15 @@ public static Transform day(Type type) { @Deprecated @SuppressWarnings("unchecked") public static Transform hour(Type type) { - if (Preconditions.checkNotNull(type.typeId(), "Type ID cannot be null") - == Type.TypeID.TIMESTAMP) { - TimestampType.Unit unit = ((TimestampType) type).unit(); - switch (unit) { - case MICROS: - return (Transform) Timestamps.HOUR_FROM_MICROS; - case NANOS: - return (Transform) Timestamps.HOUR_FROM_NANOS; - default: - throw new UnsupportedOperationException("Unsupported timestamp unit: " + unit); - } + switch (type.typeId()) { + case TIMESTAMP: + return (Transform) Timestamps.HOUR_FROM_MICROS; + case TIMESTAMP_NANO: + return (Transform) Timestamps.HOUR_FROM_NANOS; + default: + throw new IllegalArgumentException( + Strings.lenientFormat("Cannot partition type %s by hour", type)); } - throw new IllegalArgumentException("Cannot partition type " + type + " by hour"); } /** diff --git a/api/src/main/java/org/apache/iceberg/transforms/Years.java b/api/src/main/java/org/apache/iceberg/transforms/Years.java index de81fabf7ec8..8eb43cd55c57 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Years.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Years.java @@ -38,7 +38,9 @@ protected Transform toEnum(Type type) { case DATE: return (Transform) Dates.YEAR; case TIMESTAMP: - return (Transform) Timestamps.get((Types.TimestampType) type, ChronoUnit.YEARS); + return (Transform) Timestamps.YEAR_FROM_MICROS; + case TIMESTAMP_NANO: + return (Transform) Timestamps.YEAR_FROM_NANOS; default: throw new IllegalArgumentException("Unsupported type: " + type); } @@ -59,9 +61,11 @@ public boolean satisfiesOrderOf(Transform other) { return ((Timestamps) other).getResultTypeUnit() == ChronoUnit.YEARS; } else if (other instanceof Dates) { return Dates.YEAR.satisfiesOrderOf(other); - } else { - return other instanceof Years; + } else if (other instanceof Years) { + return true; } + + return false; } @Override diff --git a/api/src/main/java/org/apache/iceberg/types/Comparators.java b/api/src/main/java/org/apache/iceberg/types/Comparators.java index ddc52446e041..36664d3121cf 100644 --- a/api/src/main/java/org/apache/iceberg/types/Comparators.java +++ b/api/src/main/java/org/apache/iceberg/types/Comparators.java @@ -39,10 +39,10 @@ private Comparators() {} .put(Types.DoubleType.get(), Comparator.naturalOrder()) .put(Types.DateType.get(), Comparator.naturalOrder()) .put(Types.TimeType.get(), Comparator.naturalOrder()) - .put(Types.TimestampType.microsWithZone(), Comparator.naturalOrder()) - .put(Types.TimestampType.microsWithoutZone(), Comparator.naturalOrder()) - .put(Types.TimestampType.nanosWithZone(), Comparator.naturalOrder()) - .put(Types.TimestampType.nanosWithoutZone(), Comparator.naturalOrder()) + .put(Types.TimestampType.withZone(), Comparator.naturalOrder()) + .put(Types.TimestampType.withoutZone(), Comparator.naturalOrder()) + .put(Types.TimestampNanoType.withZone(), Comparator.naturalOrder()) + .put(Types.TimestampNanoType.withoutZone(), Comparator.naturalOrder()) .put(Types.StringType.get(), Comparators.charSequences()) .put(Types.UUIDType.get(), Comparator.naturalOrder()) .put(Types.BinaryType.get(), Comparators.unsignedBytes()) diff --git a/api/src/main/java/org/apache/iceberg/types/Conversions.java b/api/src/main/java/org/apache/iceberg/types/Conversions.java index 1d2539514954..e18c7b4362e6 100644 --- a/api/src/main/java/org/apache/iceberg/types/Conversions.java +++ b/api/src/main/java/org/apache/iceberg/types/Conversions.java @@ -97,6 +97,7 @@ public static ByteBuffer toByteBuffer(Type.TypeID typeId, Object value) { case LONG: case TIME: case TIMESTAMP: + case TIMESTAMP_NANO: return ByteBuffer.allocate(8).order(ByteOrder.LITTLE_ENDIAN).putLong(0, (long) value); case FLOAT: return ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN).putFloat(0, (float) value); @@ -146,6 +147,7 @@ private static Object internalFromByteBuffer(Type type, ByteBuffer buffer) { case LONG: case TIME: case TIMESTAMP: + case TIMESTAMP_NANO: if (tmp.remaining() < 8) { // type was later promoted to long return (long) tmp.getInt(); diff --git a/api/src/main/java/org/apache/iceberg/types/Type.java b/api/src/main/java/org/apache/iceberg/types/Type.java index 5062b54d10e1..571bf9a14e43 100644 --- a/api/src/main/java/org/apache/iceberg/types/Type.java +++ b/api/src/main/java/org/apache/iceberg/types/Type.java @@ -37,6 +37,7 @@ enum TypeID { DATE(Integer.class), TIME(Long.class), TIMESTAMP(Long.class), + TIMESTAMP_NANO(Long.class), STRING(CharSequence.class), UUID(java.util.UUID.class), FIXED(ByteBuffer.class), diff --git a/api/src/main/java/org/apache/iceberg/types/TypeUtil.java b/api/src/main/java/org/apache/iceberg/types/TypeUtil.java index 7c13d6094084..61cd1c4f1f6a 100644 --- a/api/src/main/java/org/apache/iceberg/types/TypeUtil.java +++ b/api/src/main/java/org/apache/iceberg/types/TypeUtil.java @@ -485,6 +485,7 @@ private static int estimateSize(Type type) { case DOUBLE: case TIME: case TIMESTAMP: + case TIMESTAMP_NANO: // longs and doubles occupy 8 bytes // times and timestamps are internally represented as longs return 8; diff --git a/api/src/main/java/org/apache/iceberg/types/Types.java b/api/src/main/java/org/apache/iceberg/types/Types.java index a27e2ac9a046..e914a55ebc10 100644 --- a/api/src/main/java/org/apache/iceberg/types/Types.java +++ b/api/src/main/java/org/apache/iceberg/types/Types.java @@ -19,8 +19,6 @@ package org.apache.iceberg.types; import java.io.Serializable; -import java.time.temporal.ChronoUnit; -import java.time.temporal.Temporal; import java.util.Arrays; import java.util.List; import java.util.Locale; @@ -49,11 +47,10 @@ private Types() {} .put(DoubleType.get().toString(), DoubleType.get()) .put(DateType.get().toString(), DateType.get()) .put(TimeType.get().toString(), TimeType.get()) - // TODO(epg): Replace next two with non-deprecated micros methods. .put(TimestampType.withZone().toString(), TimestampType.withZone()) .put(TimestampType.withoutZone().toString(), TimestampType.withoutZone()) - .put(TimestampType.nanosWithZone().toString(), TimestampType.nanosWithZone()) - .put(TimestampType.nanosWithoutZone().toString(), TimestampType.nanosWithoutZone()) + .put(TimestampNanoType.withZone().toString(), TimestampNanoType.withZone()) + .put(TimestampNanoType.withoutZone().toString(), TimestampNanoType.withoutZone()) .put(StringType.get().toString(), StringType.get()) .put(UUIDType.get().toString(), UUIDType.get()) .put(BinaryType.get().toString(), BinaryType.get()) @@ -211,89 +208,92 @@ public String toString() { } public static class TimestampType extends PrimitiveType { - public enum Unit { - MICROS(ChronoUnit.MICROS), - NANOS(ChronoUnit.NANOS), - ; + private static final TimestampType INSTANCE_WITH_ZONE = new TimestampType(true); + private static final TimestampType INSTANCE_WITHOUT_ZONE = new TimestampType(false); - public long between(Temporal temporal1Inclusive, Temporal temporal2Exclusive) { - return unit.between(temporal1Inclusive, temporal2Exclusive); - } + public static TimestampType withZone() { + return INSTANCE_WITH_ZONE; + } - Unit(final ChronoUnit unit) { - this.unit = unit; - } + public static TimestampType withoutZone() { + return INSTANCE_WITHOUT_ZONE; + } + + private final boolean adjustToUTC; - private final ChronoUnit unit; + private TimestampType(boolean adjustToUTC) { + this.adjustToUTC = adjustToUTC; } - private static final TimestampType INSTANCE_MICROS_WITH_ZONE = - new TimestampType(true, Unit.MICROS); - private static final TimestampType INSTANCE_MICROS_WITHOUT_ZONE = - new TimestampType(false, Unit.MICROS); - private static final TimestampType INSTANCE_NANOS_WITH_ZONE = - new TimestampType(true, Unit.NANOS); - private static final TimestampType INSTANCE_NANOS_WITHOUT_ZONE = - new TimestampType(false, Unit.NANOS); + public boolean shouldAdjustToUTC() { + return adjustToUTC; + } - /** @deprecated for removal in 2.0; use {@link #microsWithZone()} instead. */ - @Deprecated - public static TimestampType withZone() { - return INSTANCE_MICROS_WITH_ZONE; + @Override + public TypeID typeId() { + return TypeID.TIMESTAMP; } - /** @deprecated for removal in 2.0; use {@link #microsWithoutZone()} instead. */ - @Deprecated - public static TimestampType withoutZone() { - return INSTANCE_MICROS_WITHOUT_ZONE; + @Override + public String toString() { + if (shouldAdjustToUTC()) { + return "timestamptz"; + } else { + return "timestamp"; + } } - public static TimestampType microsWithZone() { - return INSTANCE_MICROS_WITH_ZONE; + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } else if (!(o instanceof TimestampType)) { + return false; + } + + TimestampType timestampType = (TimestampType) o; + return adjustToUTC == timestampType.adjustToUTC; } - public static TimestampType microsWithoutZone() { - return INSTANCE_MICROS_WITHOUT_ZONE; + @Override + public int hashCode() { + return Objects.hash(TimestampType.class, adjustToUTC); } + } - public static TimestampType nanosWithZone() { - return INSTANCE_NANOS_WITH_ZONE; + public static class TimestampNanoType extends PrimitiveType { + private static final TimestampNanoType INSTANCE_WITH_ZONE = new TimestampNanoType(true); + private static final TimestampNanoType INSTANCE_WITHOUT_ZONE = new TimestampNanoType(false); + + public static TimestampNanoType withZone() { + return INSTANCE_WITH_ZONE; } - public static TimestampType nanosWithoutZone() { - return INSTANCE_NANOS_WITHOUT_ZONE; + public static TimestampNanoType withoutZone() { + return INSTANCE_WITHOUT_ZONE; } private final boolean adjustToUTC; - private final Unit unit; - private TimestampType(boolean adjustToUTC, Unit unit) { + private TimestampNanoType(boolean adjustToUTC) { this.adjustToUTC = adjustToUTC; - this.unit = unit; } public boolean shouldAdjustToUTC() { return adjustToUTC; } - public Unit unit() { - return unit; - } - @Override public TypeID typeId() { - return TypeID.TIMESTAMP; + return TypeID.TIMESTAMP_NANO; } @Override public String toString() { - switch (unit) { - case MICROS: - return shouldAdjustToUTC() ? "timestamptz" : "timestamp"; - case NANOS: - return shouldAdjustToUTC() ? "timestamptz_ns" : "timestamp_ns"; - default: - throw new UnsupportedOperationException("Unsupported timestamp unit: " + unit); + if (shouldAdjustToUTC()) { + return "timestamptz_ns"; + } else { + return "timestamp_ns"; } } @@ -301,17 +301,17 @@ public String toString() { public boolean equals(Object o) { if (this == o) { return true; - } else if (!(o instanceof TimestampType)) { + } else if (!(o instanceof TimestampNanoType)) { return false; } - TimestampType timestampType = (TimestampType) o; - return adjustToUTC == timestampType.adjustToUTC && unit == timestampType.unit; + TimestampNanoType that = (TimestampNanoType) o; + return adjustToUTC == that.adjustToUTC; } @Override public int hashCode() { - return Objects.hash(TimestampType.class, adjustToUTC, unit); + return Objects.hash(TimestampNanoType.class, adjustToUTC); } } diff --git a/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java b/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java index 902328d820e6..08116311a9ce 100644 --- a/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java +++ b/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java @@ -33,12 +33,10 @@ private DateTimeUtil() {} public static final OffsetDateTime EPOCH = Instant.ofEpochSecond(0).atOffset(ZoneOffset.UTC); public static final LocalDate EPOCH_DAY = EPOCH.toLocalDate(); - public static final long MICROS_PER_MILLIS = 1_000L; - public static final long MILLIS_PER_SECOND = 1_000L; + public static final long MICROS_PER_MILLIS = 1000L; public static final long MICROS_PER_SECOND = 1_000_000L; - public static final long NANOS_PER_SECOND = 1_000_000_000L; - public static final long NANOS_PER_MILLI = 1_000_000L; - public static final long NANOS_PER_MICRO = 1_000L; + private static final long NANOS_PER_SECOND = 1_000_000_000L; + private static final long NANOS_PER_MICRO = 1_000L; public static LocalDate dateFromDays(int daysFromEpoch) { return ChronoUnit.DAYS.addTo(EPOCH_DAY, daysFromEpoch); @@ -64,26 +62,14 @@ public static LocalDateTime timestampFromMicros(long microsFromEpoch) { return ChronoUnit.MICROS.addTo(EPOCH, microsFromEpoch).toLocalDateTime(); } - public static LocalDateTime timestampFromNanos(long nanosFromEpoch) { - return ChronoUnit.NANOS.addTo(EPOCH, nanosFromEpoch).toLocalDateTime(); - } - public static long microsFromInstant(Instant instant) { return ChronoUnit.MICROS.between(EPOCH, instant.atOffset(ZoneOffset.UTC)); } - public static long nanosFromInstant(Instant instant) { - return ChronoUnit.NANOS.between(EPOCH, instant.atOffset(ZoneOffset.UTC)); - } - public static long microsFromTimestamp(LocalDateTime dateTime) { return ChronoUnit.MICROS.between(EPOCH, dateTime.atOffset(ZoneOffset.UTC)); } - public static long nanosFromTimestamp(LocalDateTime dateTime) { - return ChronoUnit.NANOS.between(EPOCH, dateTime.atOffset(ZoneOffset.UTC)); - } - public static long microsToMillis(long micros) { // When the timestamp is negative, i.e before 1970, we need to adjust the milliseconds portion. // Example - 1965-01-01 10:11:12.123456 is represented as (-157700927876544) in micro precision. @@ -91,10 +77,6 @@ public static long microsToMillis(long micros) { return Math.floorDiv(micros, MICROS_PER_MILLIS); } - public static long nanosToMillis(long nanos) { - return Math.floorDiv(nanos, NANOS_PER_MILLI); - } - public static long nanosToMicros(long nanos) { return Math.floorDiv(nanos, NANOS_PER_MICRO); } @@ -103,26 +85,14 @@ public static long microsToNanos(long micros) { return Math.multiplyExact(micros, NANOS_PER_MICRO); } - public static long millisToNanos(long millis) { - return Math.multiplyExact(millis, NANOS_PER_MILLI); - } - public static OffsetDateTime timestamptzFromMicros(long microsFromEpoch) { return ChronoUnit.MICROS.addTo(EPOCH, microsFromEpoch); } - public static OffsetDateTime timestamptzFromNanos(long nanosFromEpoch) { - return ChronoUnit.NANOS.addTo(EPOCH, nanosFromEpoch); - } - public static long microsFromTimestamptz(OffsetDateTime dateTime) { return ChronoUnit.MICROS.between(EPOCH, dateTime); } - public static long nanosFromTimestamptz(OffsetDateTime dateTime) { - return ChronoUnit.NANOS.between(EPOCH, dateTime); - } - public static String formatTimestampMillis(long millis) { return Instant.ofEpochMilli(millis).toString().replace("Z", "+00:00"); } @@ -137,24 +107,7 @@ public static String microsToIsoTime(long micros) { public static String microsToIsoTimestamptz(long micros) { LocalDateTime localDateTime = timestampFromMicros(micros); - DateTimeFormatter zeroOffsetFormatter = - new DateTimeFormatterBuilder() - .parseCaseInsensitive() - .append(DateTimeFormatter.ISO_LOCAL_DATE_TIME) - .appendOffset("+HH:MM:ss", "+00:00") - .toFormatter(); - return localDateTime.atOffset(ZoneOffset.UTC).format(zeroOffsetFormatter); - } - - public static String nanosToIsoTimestamptz(long nanos) { - LocalDateTime localDateTime = timestampFromNanos(nanos); - DateTimeFormatter zeroOffsetFormatter = - new DateTimeFormatterBuilder() - .parseCaseInsensitive() - .append(DateTimeFormatter.ISO_LOCAL_DATE_TIME) - .appendOffset("+HH:MM:ss", "+00:00") - .toFormatter(); - return localDateTime.atOffset(ZoneOffset.UTC).format(zeroOffsetFormatter); + return localDateTime.atOffset(ZoneOffset.UTC).format(FORMATTER); } public static String microsToIsoTimestamp(long micros) { @@ -162,11 +115,6 @@ public static String microsToIsoTimestamp(long micros) { return localDateTime.format(DateTimeFormatter.ISO_LOCAL_DATE_TIME); } - public static String nanosToIsoTimestamp(long nanos) { - LocalDateTime localDateTime = timestampFromNanos(nanos); - return localDateTime.format(DateTimeFormatter.ISO_LOCAL_DATE_TIME); - } - public static int isoDateToDays(String dateString) { return daysFromDate(LocalDate.parse(dateString, DateTimeFormatter.ISO_LOCAL_DATE)); } @@ -176,32 +124,19 @@ public static long isoTimeToMicros(String timeString) { } public static long isoTimestamptzToMicros(String timestampString) { - return microsFromTimestamptz(isoTimestamptzToOffsetDateTime(timestampString)); - } - - public static OffsetDateTime isoTimestamptzToOffsetDateTime(String timestamp) { - return OffsetDateTime.parse(timestamp, DateTimeFormatter.ISO_DATE_TIME); - } - - public static LocalDateTime isoTimestampToLocalDateTime(String timestamp) { - return LocalDateTime.parse(timestamp, DateTimeFormatter.ISO_LOCAL_DATE_TIME); - } - - public static long isoTimestamptzToNanos(String timestampString) { - return nanosFromTimestamptz(isoTimestamptzToOffsetDateTime(timestampString)); + return microsFromTimestamptz( + OffsetDateTime.parse(timestampString, DateTimeFormatter.ISO_DATE_TIME)); } public static boolean isUTCTimestamptz(String timestampString) { - OffsetDateTime offsetDateTime = isoTimestamptzToOffsetDateTime(timestampString); + OffsetDateTime offsetDateTime = + OffsetDateTime.parse(timestampString, DateTimeFormatter.ISO_DATE_TIME); return offsetDateTime.getOffset().equals(ZoneOffset.UTC); } public static long isoTimestampToMicros(String timestampString) { - return microsFromTimestamp(isoTimestampToLocalDateTime(timestampString)); - } - - public static long isoTimestampToNanos(String timestampString) { - return nanosFromTimestamp(isoTimestampToLocalDateTime(timestampString)); + return microsFromTimestamp( + LocalDateTime.parse(timestampString, DateTimeFormatter.ISO_LOCAL_DATE_TIME)); } public static int daysToYears(int days) { @@ -228,67 +163,33 @@ public static int microsToYears(long micros) { return convertMicros(micros, ChronoUnit.YEARS); } - public static int nanosToYears(long nanos) { - return convertNanos(nanos, ChronoUnit.YEARS); - } - public static int microsToMonths(long micros) { return convertMicros(micros, ChronoUnit.MONTHS); } - public static int nanosToMonths(long nanos) { - return convertNanos(nanos, ChronoUnit.MONTHS); - } - public static int microsToDays(long micros) { return convertMicros(micros, ChronoUnit.DAYS); } - public static int nanosToDays(long nanos) { - return convertNanos(nanos, ChronoUnit.DAYS); - } - - public static int millisToHours(long millis) { - return convertMillis(millis, ChronoUnit.HOURS); - } - public static int microsToHours(long micros) { return convertMicros(micros, ChronoUnit.HOURS); } - public static int nanosToHours(long nanos) { - return convertNanos(nanos, ChronoUnit.HOURS); - } - - private static int convertMillis(long millis, ChronoUnit granularity) { - if (millis >= 0) { - long epochSecond = Math.floorDiv(millis, MILLIS_PER_SECOND); - long nanoAdjustment = Math.floorMod(millis, MILLIS_PER_SECOND) * NANOS_PER_MILLI; - return (int) granularity.between(EPOCH, toOffsetDateTime(epochSecond, nanoAdjustment)); - } else { - // add 1 milli to the value to account for the case where there is exactly 1 unit between - // the timestamp and epoch because the result will always be decremented. - long epochSecond = Math.floorDiv(millis, MILLIS_PER_SECOND); - long nanoAdjustment = Math.floorMod(millis + 1, MILLIS_PER_SECOND) * NANOS_PER_MILLI; - return (int) granularity.between(EPOCH, toOffsetDateTime(epochSecond, nanoAdjustment)) - 1; - } - } - private static int convertMicros(long micros, ChronoUnit granularity) { if (micros >= 0) { long epochSecond = Math.floorDiv(micros, MICROS_PER_SECOND); - long nanoAdjustment = Math.floorMod(micros, MICROS_PER_SECOND) * NANOS_PER_MICRO; + long nanoAdjustment = Math.floorMod(micros, MICROS_PER_SECOND) * 1000; return (int) granularity.between(EPOCH, toOffsetDateTime(epochSecond, nanoAdjustment)); } else { // add 1 micro to the value to account for the case where there is exactly 1 unit between // the timestamp and epoch because the result will always be decremented. long epochSecond = Math.floorDiv(micros, MICROS_PER_SECOND); - long nanoAdjustment = Math.floorMod(micros + 1, MICROS_PER_SECOND) * NANOS_PER_MICRO; + long nanoAdjustment = Math.floorMod(micros + 1, MICROS_PER_SECOND) * 1000; return (int) granularity.between(EPOCH, toOffsetDateTime(epochSecond, nanoAdjustment)) - 1; } } - private static int convertNanos(long nanos, ChronoUnit granularity) { + public static int convertNanos(long nanos, ChronoUnit granularity) { if (nanos >= 0) { long epochSecond = Math.floorDiv(nanos, NANOS_PER_SECOND); long nanoAdjustment = Math.floorMod(nanos, NANOS_PER_SECOND); @@ -305,4 +206,11 @@ private static int convertNanos(long nanos, ChronoUnit granularity) { private static OffsetDateTime toOffsetDateTime(long epochSecond, long nanoAdjustment) { return Instant.ofEpochSecond(epochSecond, nanoAdjustment).atOffset(ZoneOffset.UTC); } + + private static final DateTimeFormatter FORMATTER = + new DateTimeFormatterBuilder() + .parseCaseInsensitive() + .append(DateTimeFormatter.ISO_LOCAL_DATE_TIME) + .appendOffset("+HH:MM:ss", "+00:00") + .toFormatter(); } diff --git a/api/src/test/java/org/apache/iceberg/PartitionSpecTestBase.java b/api/src/test/java/org/apache/iceberg/PartitionSpecTestBase.java index 369a3a842224..11f2cb353880 100644 --- a/api/src/test/java/org/apache/iceberg/PartitionSpecTestBase.java +++ b/api/src/test/java/org/apache/iceberg/PartitionSpecTestBase.java @@ -29,13 +29,13 @@ public class PartitionSpecTestBase { Types.NestedField.required(2, "l", Types.LongType.get()), Types.NestedField.required(3, "d", Types.DateType.get()), Types.NestedField.required(4, "t", Types.TimeType.get()), - Types.NestedField.required(5, "ts", Types.TimestampType.microsWithoutZone()), + Types.NestedField.required(5, "ts", Types.TimestampType.withoutZone()), Types.NestedField.required(6, "dec", Types.DecimalType.of(9, 2)), Types.NestedField.required(7, "s", Types.StringType.get()), Types.NestedField.required(8, "u", Types.UUIDType.get()), Types.NestedField.required(9, "f", Types.FixedType.ofLength(3)), Types.NestedField.required(10, "b", Types.BinaryType.get()), - Types.NestedField.required(11, "tsn", Types.TimestampType.nanosWithoutZone())); + Types.NestedField.required(11, "tsn", Types.TimestampNanoType.withoutZone())); // a spec with all of the allowed transform/type pairs public static final PartitionSpec[] SPECS = diff --git a/api/src/test/java/org/apache/iceberg/TestAccessors.java b/api/src/test/java/org/apache/iceberg/TestAccessors.java index 233c8c508239..7b4feb845f12 100644 --- a/api/src/test/java/org/apache/iceberg/TestAccessors.java +++ b/api/src/test/java/org/apache/iceberg/TestAccessors.java @@ -178,10 +178,10 @@ public void testTime() { @Test public void testTimestamp() { - assertAccessorReturns(Types.TimestampType.microsWithoutZone(), 123L); - assertAccessorReturns(Types.TimestampType.microsWithZone(), 123L); - assertAccessorReturns(Types.TimestampType.nanosWithoutZone(), 123L); - assertAccessorReturns(Types.TimestampType.nanosWithZone(), 123L); + assertAccessorReturns(Types.TimestampType.withoutZone(), 123L); + assertAccessorReturns(Types.TimestampType.withZone(), 123L); + assertAccessorReturns(Types.TimestampNanoType.withoutZone(), 123L); + assertAccessorReturns(Types.TimestampNanoType.withZone(), 123L); } @Test diff --git a/api/src/test/java/org/apache/iceberg/TestPartitionPaths.java b/api/src/test/java/org/apache/iceberg/TestPartitionPaths.java index 6bee8aedbf17..59c9937f5ddd 100644 --- a/api/src/test/java/org/apache/iceberg/TestPartitionPaths.java +++ b/api/src/test/java/org/apache/iceberg/TestPartitionPaths.java @@ -32,7 +32,7 @@ public class TestPartitionPaths { new Schema( Types.NestedField.required(1, "id", Types.IntegerType.get()), Types.NestedField.optional(2, "data", Types.StringType.get()), - Types.NestedField.optional(3, "ts", Types.TimestampType.microsWithoutZone())); + Types.NestedField.optional(3, "ts", Types.TimestampType.withoutZone())); @Test public void testPartitionPath() { @@ -42,8 +42,8 @@ public void testPartitionPath() { Transform bucket = Transforms.bucket(10); Literal ts = - Literal.of("2017-12-01T10:12:55.038194").to(Types.TimestampType.microsWithoutZone()); - Object tsHour = hour.bind(Types.TimestampType.microsWithoutZone()).apply(ts.value()); + Literal.of("2017-12-01T10:12:55.038194").to(Types.TimestampType.withoutZone()); + Object tsHour = hour.bind(Types.TimestampType.withoutZone()).apply(ts.value()); Object idBucket = bucket.bind(Types.IntegerType.get()).apply(1); Row partition = Row.of(tsHour, idBucket); @@ -61,8 +61,8 @@ public void testPartitionPathWithNanoseconds() { Transform bucket = Transforms.bucket(10); Literal ts = - Literal.of("2017-12-01T10:12:55.038194789").to(Types.TimestampType.nanosWithoutZone()); - Object tsHour = hour.bind(Types.TimestampType.nanosWithoutZone()).apply(ts.value()); + Literal.of("2017-12-01T10:12:55.038194789").to(Types.TimestampNanoType.withoutZone()); + Object tsHour = hour.bind(Types.TimestampNanoType.withoutZone()).apply(ts.value()); Object idBucket = bucket.bind(Types.IntegerType.get()).apply(1); Row partition = Row.of(tsHour, idBucket); diff --git a/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java b/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java index a621ccab81e5..eb0e74164688 100644 --- a/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java +++ b/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java @@ -30,10 +30,10 @@ public class TestPartitionSpecValidation { private static final Schema SCHEMA = new Schema( NestedField.required(1, "id", Types.LongType.get()), - NestedField.required(2, "ts", Types.TimestampType.microsWithZone()), - NestedField.required(3, "another_ts", Types.TimestampType.microsWithZone()), - NestedField.required(4, "d", Types.TimestampType.microsWithZone()), - NestedField.required(5, "another_d", Types.TimestampType.microsWithZone()), + NestedField.required(2, "ts", Types.TimestampType.withZone()), + NestedField.required(3, "another_ts", Types.TimestampType.withZone()), + NestedField.required(4, "d", Types.TimestampType.withZone()), + NestedField.required(5, "another_d", Types.TimestampType.withZone()), NestedField.required(6, "s", Types.StringType.get())); @Test diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java b/api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java index 5638ef6c31b5..c91308b975af 100644 --- a/api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java @@ -42,8 +42,8 @@ public class TestExpressionUtil { Types.NestedField.required(1, "id", Types.LongType.get()), Types.NestedField.required(2, "val", Types.IntegerType.get()), Types.NestedField.required(3, "val2", Types.IntegerType.get()), - Types.NestedField.required(4, "ts", Types.TimestampType.microsWithoutZone()), - Types.NestedField.required(5, "tsns", Types.TimestampType.nanosWithoutZone()), + Types.NestedField.required(4, "ts", Types.TimestampType.withoutZone()), + Types.NestedField.required(5, "tsns", Types.TimestampNanoType.withoutZone()), Types.NestedField.required(6, "date", Types.DateType.get()), Types.NestedField.required(7, "time", Types.DateType.get()), Types.NestedField.optional(8, "data", Types.StringType.get()), @@ -499,14 +499,14 @@ public void testSanitizeTimestampAboutNow() { Expressions.predicate( Expression.Operation.EQ, "test", - Literal.of(nowLocal).to(Types.TimestampType.microsWithoutZone())))); + Literal.of(nowLocal).to(Types.TimestampType.withoutZone())))); assertEquals( Expressions.equal("test", "(timestamp-about-now)"), ExpressionUtil.sanitize( Expressions.predicate( Expression.Operation.EQ, "test", - Literal.of(nowLocal).to(Types.TimestampType.nanosWithoutZone())))); + Literal.of(nowLocal).to(Types.TimestampNanoType.withoutZone())))); assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test", nowLocal))) .as("Sanitized string should be identical except for descriptive literal") @@ -532,14 +532,14 @@ public void testSanitizeTimestampPast() { Expressions.predicate( Expression.Operation.EQ, "test", - Literal.of(ninetyMinutesAgoLocal).to(Types.TimestampType.microsWithoutZone())))); + Literal.of(ninetyMinutesAgoLocal).to(Types.TimestampType.withoutZone())))); assertEquals( Expressions.equal("test", "(timestamp-1-hours-ago)"), ExpressionUtil.sanitize( Expressions.predicate( Expression.Operation.EQ, "test", - Literal.of(ninetyMinutesAgoLocal).to(Types.TimestampType.nanosWithoutZone())))); + Literal.of(ninetyMinutesAgoLocal).to(Types.TimestampNanoType.withoutZone())))); assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test", ninetyMinutesAgoLocal))) .as("Sanitized string should be identical except for descriptive literal") @@ -565,14 +565,14 @@ public void testSanitizeTimestampLastWeek() { Expressions.predicate( Expression.Operation.EQ, "test", - Literal.of(lastWeekLocal).to(Types.TimestampType.microsWithoutZone())))); + Literal.of(lastWeekLocal).to(Types.TimestampType.withoutZone())))); assertEquals( Expressions.equal("test", "(timestamp-7-days-ago)"), ExpressionUtil.sanitize( Expressions.predicate( Expression.Operation.EQ, "test", - Literal.of(lastWeekLocal).to(Types.TimestampType.nanosWithoutZone())))); + Literal.of(lastWeekLocal).to(Types.TimestampNanoType.withoutZone())))); assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test", lastWeekLocal))) .as("Sanitized string should be identical except for descriptive literal") @@ -598,15 +598,14 @@ public void testSanitizeTimestampFuture() { Expressions.predicate( Expression.Operation.EQ, "test", - Literal.of(ninetyMinutesFromNowLocal) - .to(Types.TimestampType.microsWithoutZone())))); + Literal.of(ninetyMinutesFromNowLocal).to(Types.TimestampType.withoutZone())))); assertEquals( Expressions.equal("test", "(timestamp-1-hours-from-now)"), ExpressionUtil.sanitize( Expressions.predicate( Expression.Operation.EQ, "test", - Literal.of(ninetyMinutesFromNowLocal).to(Types.TimestampType.nanosWithoutZone())))); + Literal.of(ninetyMinutesFromNowLocal).to(Types.TimestampNanoType.withoutZone())))); assertThat( ExpressionUtil.toSanitizedString(Expressions.equal("test", ninetyMinutesFromNowLocal))) @@ -629,14 +628,14 @@ public void testSanitizeTimestamptzAboutNow() { Expressions.predicate( Expression.Operation.EQ, "test", - Literal.of(nowUtc).to(Types.TimestampType.microsWithZone())))); + Literal.of(nowUtc).to(Types.TimestampType.withZone())))); assertEquals( Expressions.equal("test", "(timestamp-about-now)"), ExpressionUtil.sanitize( Expressions.predicate( Expression.Operation.EQ, "test", - Literal.of(nowUtc).to(Types.TimestampType.nanosWithZone())))); + Literal.of(nowUtc).to(Types.TimestampNanoType.withZone())))); assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test", nowUtc))) .as("Sanitized string should be identical except for descriptive literal") @@ -657,14 +656,14 @@ public void testSanitizeTimestamptzPast() { Expressions.predicate( Expression.Operation.EQ, "test", - Literal.of(ninetyMinutesAgoUtc).to(Types.TimestampType.microsWithZone())))); + Literal.of(ninetyMinutesAgoUtc).to(Types.TimestampType.withZone())))); assertEquals( Expressions.equal("test", "(timestamp-1-hours-ago)"), ExpressionUtil.sanitize( Expressions.predicate( Expression.Operation.EQ, "test", - Literal.of(ninetyMinutesAgoUtc).to(Types.TimestampType.nanosWithZone())))); + Literal.of(ninetyMinutesAgoUtc).to(Types.TimestampNanoType.withZone())))); assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test", ninetyMinutesAgoUtc))) .as("Sanitized string should be identical except for descriptive literal") @@ -685,14 +684,14 @@ public void testSanitizeTimestamptzLastWeek() { Expressions.predicate( Expression.Operation.EQ, "test", - Literal.of(lastWeekUtc).to(Types.TimestampType.microsWithZone())))); + Literal.of(lastWeekUtc).to(Types.TimestampType.withZone())))); assertEquals( Expressions.equal("test", "(timestamp-7-days-ago)"), ExpressionUtil.sanitize( Expressions.predicate( Expression.Operation.EQ, "test", - Literal.of(lastWeekUtc).to(Types.TimestampType.nanosWithZone())))); + Literal.of(lastWeekUtc).to(Types.TimestampNanoType.withZone())))); assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test", lastWeekUtc))) .as("Sanitized string should be identical except for descriptive literal") @@ -713,14 +712,14 @@ public void testSanitizeTimestamptzFuture() { Expressions.predicate( Expression.Operation.EQ, "test", - Literal.of(ninetyMinutesFromNowUtc).to(Types.TimestampType.microsWithZone())))); + Literal.of(ninetyMinutesFromNowUtc).to(Types.TimestampType.withZone())))); assertEquals( Expressions.equal("test", "(timestamp-1-hours-from-now)"), ExpressionUtil.sanitize( Expressions.predicate( Expression.Operation.EQ, "test", - Literal.of(ninetyMinutesFromNowUtc).to(Types.TimestampType.nanosWithZone())))); + Literal.of(ninetyMinutesFromNowUtc).to(Types.TimestampNanoType.withZone())))); assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test", ninetyMinutesFromNowUtc))) .as("Sanitized string should be identical except for descriptive literal") diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestLiteralSerialization.java b/api/src/test/java/org/apache/iceberg/expressions/TestLiteralSerialization.java index fcb031c27aa8..24fc458b37b4 100644 --- a/api/src/test/java/org/apache/iceberg/expressions/TestLiteralSerialization.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestLiteralSerialization.java @@ -38,10 +38,10 @@ public void testLiterals() throws Exception { Literal.of(8.75D), Literal.of("2017-11-29").to(Types.DateType.get()), Literal.of("11:30:07").to(Types.TimeType.get()), - Literal.of("2017-11-29T11:30:07.123456").to(Types.TimestampType.microsWithoutZone()), - Literal.of("2017-11-29T11:30:07.123456+01:00").to(Types.TimestampType.microsWithZone()), - Literal.of("2017-11-29T11:30:07.123456789").to(Types.TimestampType.nanosWithoutZone()), - Literal.of("2017-11-29T11:30:07.123456789+01:00").to(Types.TimestampType.nanosWithZone()), + Literal.of("2017-11-29T11:30:07.123456").to(Types.TimestampType.withoutZone()), + Literal.of("2017-11-29T11:30:07.123456+01:00").to(Types.TimestampType.withZone()), + Literal.of("2017-11-29T11:30:07.123456789").to(Types.TimestampNanoType.withoutZone()), + Literal.of("2017-11-29T11:30:07.123456789+01:00").to(Types.TimestampNanoType.withZone()), Literal.of("abc"), Literal.of(UUID.randomUUID()), Literal.of(new byte[] {1, 2, 3}).to(Types.FixedType.ofLength(3)), diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestMiscLiteralConversions.java b/api/src/test/java/org/apache/iceberg/expressions/TestMiscLiteralConversions.java index 86aa9ea07c63..4a6f550eb9ce 100644 --- a/api/src/test/java/org/apache/iceberg/expressions/TestMiscLiteralConversions.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestMiscLiteralConversions.java @@ -42,11 +42,9 @@ public void testIdentityConversions() { Pair.of(Literal.of("34.55"), Types.DecimalType.of(9, 2)), Pair.of(Literal.of("2017-08-18"), Types.DateType.get()), Pair.of(Literal.of("14:21:01.919"), Types.TimeType.get()), + Pair.of(Literal.of("2017-08-18T14:21:01.919432"), Types.TimestampType.withoutZone()), Pair.of( - Literal.of("2017-08-18T14:21:01.919432"), Types.TimestampType.microsWithoutZone()), - Pair.of( - Literal.of("2017-08-18T14:21:01.919432755"), - Types.TimestampType.nanosWithoutZone()), + Literal.of("2017-08-18T14:21:01.919432755"), Types.TimestampNanoType.withoutZone()), Pair.of(Literal.of("abc"), Types.StringType.get()), Pair.of(Literal.of(UUID.randomUUID()), Types.UUIDType.get()), Pair.of(Literal.of(new byte[] {0, 1, 2}), Types.FixedType.ofLength(3)), @@ -69,7 +67,7 @@ public void testIdentityConversions() { @Test public void testTimestampWithMicrosecondsToDate() { final Literal micros = - Literal.of("2017-08-18T14:21:01.919432755").to(Types.TimestampType.microsWithoutZone()); + Literal.of("2017-08-18T14:21:01.919432755").to(Types.TimestampType.withoutZone()); final Literal dateOfNanos = micros.to(Types.DateType.get()); assertThat(dateOfNanos).isEqualTo(Literal.of("2017-08-18").to(Types.DateType.get())); } @@ -77,7 +75,7 @@ public void testTimestampWithMicrosecondsToDate() { @Test public void testTimestampWithNanoosecondsToDate() { final Literal nanos = - Literal.of("2017-08-18T14:21:01.919432755").to(Types.TimestampType.nanosWithoutZone()); + Literal.of("2017-08-18T14:21:01.919432755").to(Types.TimestampNanoType.withoutZone()); final Literal dateOfNanos = nanos.to(Types.DateType.get()); assertThat(dateOfNanos).isEqualTo(Literal.of("2017-08-18").to(Types.DateType.get())); } @@ -119,10 +117,10 @@ public void testInvalidBooleanConversions() { Types.DoubleType.get(), Types.DateType.get(), Types.TimeType.get(), - Types.TimestampType.microsWithoutZone(), - Types.TimestampType.microsWithZone(), - Types.TimestampType.nanosWithoutZone(), - Types.TimestampType.nanosWithZone(), + Types.TimestampType.withoutZone(), + Types.TimestampType.withZone(), + Types.TimestampNanoType.withoutZone(), + Types.TimestampNanoType.withZone(), Types.DecimalType.of(9, 2), Types.StringType.get(), Types.UUIDType.get(), @@ -136,10 +134,10 @@ public void testInvalidIntegerConversions() { Literal.of(34), Types.BooleanType.get(), Types.TimeType.get(), - Types.TimestampType.microsWithoutZone(), - Types.TimestampType.microsWithZone(), - Types.TimestampType.nanosWithoutZone(), - Types.TimestampType.nanosWithZone(), + Types.TimestampType.withoutZone(), + Types.TimestampType.withZone(), + Types.TimestampNanoType.withoutZone(), + Types.TimestampNanoType.withZone(), Types.StringType.get(), Types.UUIDType.get(), Types.FixedType.ofLength(1), @@ -166,10 +164,10 @@ public void testInvalidFloatConversions() { Types.LongType.get(), Types.DateType.get(), Types.TimeType.get(), - Types.TimestampType.microsWithoutZone(), - Types.TimestampType.microsWithZone(), - Types.TimestampType.nanosWithoutZone(), - Types.TimestampType.nanosWithZone(), + Types.TimestampType.withoutZone(), + Types.TimestampType.withZone(), + Types.TimestampNanoType.withoutZone(), + Types.TimestampNanoType.withZone(), Types.StringType.get(), Types.UUIDType.get(), Types.FixedType.ofLength(1), @@ -185,10 +183,10 @@ public void testInvalidDoubleConversions() { Types.LongType.get(), Types.DateType.get(), Types.TimeType.get(), - Types.TimestampType.microsWithoutZone(), - Types.TimestampType.microsWithZone(), - Types.TimestampType.nanosWithoutZone(), - Types.TimestampType.nanosWithZone(), + Types.TimestampType.withoutZone(), + Types.TimestampType.withZone(), + Types.TimestampNanoType.withoutZone(), + Types.TimestampNanoType.withZone(), Types.StringType.get(), Types.UUIDType.get(), Types.FixedType.ofLength(1), @@ -205,10 +203,10 @@ public void testInvalidDateConversions() { Types.FloatType.get(), Types.DoubleType.get(), Types.TimeType.get(), - Types.TimestampType.microsWithoutZone(), - Types.TimestampType.microsWithZone(), - Types.TimestampType.nanosWithoutZone(), - Types.TimestampType.nanosWithZone(), + Types.TimestampType.withoutZone(), + Types.TimestampType.withZone(), + Types.TimestampNanoType.withoutZone(), + Types.TimestampNanoType.withZone(), Types.DecimalType.of(9, 4), Types.StringType.get(), Types.UUIDType.get(), @@ -226,10 +224,10 @@ public void testInvalidTimeConversions() { Types.FloatType.get(), Types.DoubleType.get(), Types.DateType.get(), - Types.TimestampType.microsWithoutZone(), - Types.TimestampType.microsWithZone(), - Types.TimestampType.nanosWithoutZone(), - Types.TimestampType.nanosWithZone(), + Types.TimestampType.withoutZone(), + Types.TimestampType.withZone(), + Types.TimestampNanoType.withoutZone(), + Types.TimestampNanoType.withZone(), Types.DecimalType.of(9, 4), Types.StringType.get(), Types.UUIDType.get(), @@ -240,7 +238,7 @@ public void testInvalidTimeConversions() { @Test public void testInvalidTimestampMicrosConversions() { testInvalidConversions( - Literal.of("2017-08-18T14:21:01.919123").to(Types.TimestampType.microsWithoutZone()), + Literal.of("2017-08-18T14:21:01.919123").to(Types.TimestampType.withoutZone()), Types.BooleanType.get(), Types.IntegerType.get(), Types.LongType.get(), @@ -257,7 +255,7 @@ public void testInvalidTimestampMicrosConversions() { @Test public void testInvalidTimestampNanosConversions() { testInvalidConversions( - Literal.of("2017-08-18T14:21:01.919123456").to(Types.TimestampType.nanosWithoutZone()), + Literal.of("2017-08-18T14:21:01.919123456").to(Types.TimestampNanoType.withoutZone()), Types.BooleanType.get(), Types.IntegerType.get(), Types.LongType.get(), @@ -282,10 +280,10 @@ public void testInvalidDecimalConversions() { Types.DoubleType.get(), Types.DateType.get(), Types.TimeType.get(), - Types.TimestampType.microsWithoutZone(), - Types.TimestampType.microsWithZone(), - Types.TimestampType.nanosWithoutZone(), - Types.TimestampType.nanosWithZone(), + Types.TimestampType.withoutZone(), + Types.TimestampType.withZone(), + Types.TimestampNanoType.withoutZone(), + Types.TimestampNanoType.withZone(), Types.StringType.get(), Types.UUIDType.get(), Types.FixedType.ofLength(1), @@ -318,10 +316,10 @@ public void testInvalidUUIDConversions() { Types.DoubleType.get(), Types.DateType.get(), Types.TimeType.get(), - Types.TimestampType.microsWithoutZone(), - Types.TimestampType.microsWithZone(), - Types.TimestampType.nanosWithoutZone(), - Types.TimestampType.nanosWithZone(), + Types.TimestampType.withoutZone(), + Types.TimestampType.withZone(), + Types.TimestampNanoType.withoutZone(), + Types.TimestampNanoType.withZone(), Types.DecimalType.of(9, 2), Types.StringType.get(), Types.FixedType.ofLength(1), @@ -339,10 +337,10 @@ public void testInvalidFixedConversions() { Types.DoubleType.get(), Types.DateType.get(), Types.TimeType.get(), - Types.TimestampType.microsWithoutZone(), - Types.TimestampType.microsWithZone(), - Types.TimestampType.nanosWithoutZone(), - Types.TimestampType.nanosWithZone(), + Types.TimestampType.withoutZone(), + Types.TimestampType.withZone(), + Types.TimestampNanoType.withoutZone(), + Types.TimestampNanoType.withZone(), Types.DecimalType.of(9, 2), Types.StringType.get(), Types.UUIDType.get(), @@ -360,10 +358,10 @@ public void testInvalidBinaryConversions() { Types.DoubleType.get(), Types.DateType.get(), Types.TimeType.get(), - Types.TimestampType.microsWithoutZone(), - Types.TimestampType.microsWithZone(), - Types.TimestampType.nanosWithoutZone(), - Types.TimestampType.nanosWithZone(), + Types.TimestampType.withoutZone(), + Types.TimestampType.withZone(), + Types.TimestampNanoType.withoutZone(), + Types.TimestampNanoType.withZone(), Types.DecimalType.of(9, 2), Types.StringType.get(), Types.UUIDType.get(), diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestStringLiteralConversions.java b/api/src/test/java/org/apache/iceberg/expressions/TestStringLiteralConversions.java index 678e48632e92..914776073c4d 100644 --- a/api/src/test/java/org/apache/iceberg/expressions/TestStringLiteralConversions.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestStringLiteralConversions.java @@ -101,7 +101,7 @@ public void testStringToTimestampLiteral() { // Timestamp with explicit UTC offset, +00:00 Literal timestampStr = Literal.of("2017-08-18T14:21:01.919+00:00"); - Literal timestamp = timestampStr.to(Types.TimestampType.microsWithZone()); + Literal timestamp = timestampStr.to(Types.TimestampType.withZone()); long avroValue = avroConversion.toLong( LocalDateTime.of(2017, 8, 18, 14, 21, 1, 919 * 1000000).toInstant(ZoneOffset.UTC), @@ -112,7 +112,7 @@ public void testStringToTimestampLiteral() { // Timestamp without an explicit zone should be UTC (equal to the previous converted value) timestampStr = Literal.of("2017-08-18T14:21:01.919"); - timestamp = timestampStr.to(Types.TimestampType.microsWithoutZone()); + timestamp = timestampStr.to(Types.TimestampType.withoutZone()); assertThat((long) timestamp.value()) .as("Timestamp without zone should match UTC") @@ -120,7 +120,7 @@ public void testStringToTimestampLiteral() { // Timestamp with an explicit offset should be adjusted to UTC timestampStr = Literal.of("2017-08-18T14:21:01.919-07:00"); - timestamp = timestampStr.to(Types.TimestampType.microsWithZone()); + timestamp = timestampStr.to(Types.TimestampType.withZone()); avroValue = avroConversion.toLong( LocalDateTime.of(2017, 8, 18, 21, 21, 1, 919 * 1000000).toInstant(ZoneOffset.UTC), @@ -140,7 +140,7 @@ public void testStringToTimestampLiteralWithMicrosecondPrecisionFromNanoseconds( new TimeConversions.TimestampMicrosConversion(); Literal timestampStr = Literal.of("2017-08-18T14:21:01.123456789"); - Literal timestamp = timestampStr.to(Types.TimestampType.microsWithoutZone()); + Literal timestamp = timestampStr.to(Types.TimestampType.withoutZone()); long avroValue = avroConversion.toLong( LocalDateTime.of(2017, 8, 18, 14, 21, 1, 123456000).toInstant(ZoneOffset.UTC), @@ -154,11 +154,13 @@ public void testStringToTimestampLiteralWithMicrosecondPrecisionFromNanoseconds( @Test public void testStringToTimestampLiteralWithNanosecondPrecisionFromNanoseconds() { + // TODO(epg): "Test that Avro produces a value within 1 micro of this" ??? + // https://github.com/apache/iceberg/pull/9008#discussion_r1520435413 // Not using Avro's timestamp conversion as it has no timestampNanos(). long expected = 1503066061123456789L; Literal timestampStr = Literal.of("2017-08-18T14:21:01.123456789"); - Literal timestamp = timestampStr.to(Types.TimestampType.nanosWithoutZone()); + Literal timestamp = timestampStr.to(Types.TimestampNanoType.withoutZone()); assertThat((long) timestamp.value()) .as("Timestamp without zone should match UTC") .isEqualTo(expected); @@ -173,7 +175,7 @@ public void testNegativeStringToTimestampLiteral() { // Timestamp with explicit UTC offset, +00:00 Literal timestampStr = Literal.of("1969-12-31T23:59:58.999999+00:00"); - Literal timestamp = timestampStr.to(Types.TimestampType.microsWithZone()); + Literal timestamp = timestampStr.to(Types.TimestampType.withZone()); long avroValue = avroConversion.toLong( LocalDateTime.of(1969, 12, 31, 23, 59, 58, 999999 * 1_000).toInstant(ZoneOffset.UTC), @@ -188,7 +190,7 @@ public void testNegativeStringToTimestampLiteral() { // Timestamp without an explicit zone should be UTC (equal to the previous converted value) timestampStr = Literal.of("1969-12-31T23:59:58.999999"); - timestamp = timestampStr.to(Types.TimestampType.microsWithoutZone()); + timestamp = timestampStr.to(Types.TimestampType.withoutZone()); assertThat((long) timestamp.value()) .as("Timestamp without zone should match UTC") @@ -196,7 +198,7 @@ public void testNegativeStringToTimestampLiteral() { // Timestamp with an explicit offset should be adjusted to UTC timestampStr = Literal.of("1969-12-31T16:59:58.999999-07:00"); - timestamp = timestampStr.to(Types.TimestampType.microsWithZone()); + timestamp = timestampStr.to(Types.TimestampType.withZone()); avroValue = avroConversion.toLong( LocalDateTime.of(1969, 12, 31, 23, 59, 58, 999999 * 1_000).toInstant(ZoneOffset.UTC), @@ -214,12 +216,12 @@ public void testNegativeStringToTimestampLiteral() { public void testTimestampWithZoneWithoutZoneInLiteral() { // Zone must be present in literals when converting to timestamp with zone Assertions.assertThatThrownBy( - () -> Literal.of("2017-08-18T14:21:01.919123").to(Types.TimestampType.microsWithZone())) + () -> Literal.of("2017-08-18T14:21:01.919123").to(Types.TimestampType.withZone())) .isInstanceOf(DateTimeException.class) .hasMessageContaining("could not be parsed"); Assertions.assertThatThrownBy( () -> - Literal.of("2017-08-18T14:21:01.919123456").to(Types.TimestampType.nanosWithZone())) + Literal.of("2017-08-18T14:21:01.919123456").to(Types.TimestampNanoType.withZone())) .isInstanceOf(DateTimeException.class) .hasMessageContaining("could not be parsed"); } @@ -230,13 +232,13 @@ public void testTimestampWithoutZoneWithZoneInLiteral() { Assertions.assertThatThrownBy( () -> Literal.of("2017-08-18T14:21:01.919123+07:00") - .to(Types.TimestampType.microsWithoutZone())) + .to(Types.TimestampType.withoutZone())) .isInstanceOf(DateTimeException.class) .hasMessageContaining("could not be parsed"); Assertions.assertThatThrownBy( () -> Literal.of("2017-08-18T14:21:01.919123456+07:00") - .to(Types.TimestampType.nanosWithoutZone())) + .to(Types.TimestampNanoType.withoutZone())) .isInstanceOf(DateTimeException.class) .hasMessageContaining("could not be parsed"); } diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java b/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java index c76ac98612b9..0313a8fe6098 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java @@ -107,61 +107,60 @@ public void testSpecValues() { .isEqualTo(-662762989); Literal timestampVal = - Literal.of("2017-11-16T22:31:08").to(Types.TimestampType.microsWithoutZone()); + Literal.of("2017-11-16T22:31:08").to(Types.TimestampType.withoutZone()); assertThat(BucketUtil.hash(timestampVal.value())) .as("Spec example: hash(2017-11-16T22:31:08) = -2047944441") .isEqualTo(-2047944441); - timestampVal = - Literal.of("2017-11-16T22:31:08.000001").to(Types.TimestampType.microsWithoutZone()); + timestampVal = Literal.of("2017-11-16T22:31:08.000001").to(Types.TimestampType.withoutZone()); assertThat(BucketUtil.hash(timestampVal.value())) .as("Spec example: hash(2017-11-16T22:31:08.000001) = -1207196810") .isEqualTo(-1207196810); Literal timestamptzVal = - Literal.of("2017-11-16T14:31:08-08:00").to(Types.TimestampType.microsWithZone()); + Literal.of("2017-11-16T14:31:08-08:00").to(Types.TimestampType.withZone()); assertThat(BucketUtil.hash(timestamptzVal.value())) .as("Spec example: hash(2017-11-16T14:31:08-08:00) = -2047944441") .isEqualTo(-2047944441); timestamptzVal = - Literal.of("2017-11-16T14:31:08.000001-08:00").to(Types.TimestampType.microsWithZone()); + Literal.of("2017-11-16T14:31:08.000001-08:00").to(Types.TimestampType.withZone()); assertThat(BucketUtil.hash(timestamptzVal.value())) .as("Spec example: hash(2017-11-16T14:31:08.000001-08:00) = -1207196810") .isEqualTo(-1207196810); Literal timestampNsVal = - Literal.of("2017-11-16T22:31:08").to(Types.TimestampType.nanosWithoutZone()); + Literal.of("2017-11-16T22:31:08").to(Types.TimestampNanoType.withoutZone()); assertThat(BucketUtil.hash(timestampNsVal.value())) .as("Spec example: hash(2017-11-16T22:31:08) = -737750069") .isEqualTo(-737750069); timestampNsVal = - Literal.of("2017-11-16T22:31:08.000001").to(Types.TimestampType.nanosWithoutZone()); + Literal.of("2017-11-16T22:31:08.000001").to(Types.TimestampNanoType.withoutZone()); assertThat(BucketUtil.hash(timestampNsVal.value())) .as("Spec example: hash(2017-11-16T22:31:08.000001) = -976603392") .isEqualTo(-976603392); timestampNsVal = - Literal.of("2017-11-16T22:31:08.000000001").to(Types.TimestampType.nanosWithoutZone()); + Literal.of("2017-11-16T22:31:08.000000001").to(Types.TimestampNanoType.withoutZone()); assertThat(BucketUtil.hash(timestampNsVal.value())) - .as("Spec example: hash(2017-11-16T22:31:08.000000001) = -160215926") + .as("hash(2017-11-16T22:31:08.000000001) = -160215926") .isEqualTo(-160215926); Literal timestamptzNsVal = - Literal.of("2017-11-16T14:31:08-08:00").to(Types.TimestampType.nanosWithZone()); + Literal.of("2017-11-16T14:31:08-08:00").to(Types.TimestampNanoType.withZone()); assertThat(BucketUtil.hash(timestamptzNsVal.value())) .as("Spec example: hash(2017-11-16T14:31:08-08:00) = -737750069") .isEqualTo(-737750069); timestamptzNsVal = - Literal.of("2017-11-16T14:31:08.000001-08:00").to(Types.TimestampType.nanosWithZone()); + Literal.of("2017-11-16T14:31:08.000001-08:00").to(Types.TimestampNanoType.withZone()); assertThat(BucketUtil.hash(timestamptzNsVal.value())) .as("Spec example: hash(2017-11-16T14:31:08.000001-08:00) = -976603392") .isEqualTo(-976603392); timestamptzNsVal = - Literal.of("2017-11-16T14:31:08.000000001-08:00").to(Types.TimestampType.nanosWithZone()); + Literal.of("2017-11-16T14:31:08.000000001-08:00").to(Types.TimestampNanoType.withZone()); assertThat(BucketUtil.hash(timestamptzNsVal.value())) .as("Spec example: hash(2017-11-16T14:31:08.000000001-08:00) = -160215926") .isEqualTo(-160215926); diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestIdentity.java b/api/src/test/java/org/apache/iceberg/transforms/TestIdentity.java index 8d651bf618dd..6101fdf0986d 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestIdentity.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestIdentity.java @@ -98,7 +98,7 @@ public void testTimeHumanString() { @Test public void testTimestampWithZoneHumanString() { - Types.TimestampType timestamptz = Types.TimestampType.microsWithZone(); + Types.TimestampType timestamptz = Types.TimestampType.withZone(); Transform identity = Transforms.identity(); Literal ts = Literal.of("2017-12-01T10:12:55.038194-08:00").to(timestamptz); @@ -111,7 +111,7 @@ public void testTimestampWithZoneHumanString() { @Test public void testTimestampWithoutZoneHumanString() { - Types.TimestampType timestamp = Types.TimestampType.microsWithoutZone(); + Types.TimestampType timestamp = Types.TimestampType.withoutZone(); Transform identity = Transforms.identity(); String tsString = "2017-12-01T10:12:55.038194"; diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestProjection.java b/api/src/test/java/org/apache/iceberg/transforms/TestProjection.java index ffc48fc0e9aa..ccfda895f9f1 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestProjection.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestProjection.java @@ -268,10 +268,10 @@ public void testBadSparkPartitionFilter() { public void testProjectionNames() { final Schema schema = new Schema( - required(1, "timestamp1", Types.TimestampType.microsWithoutZone()), - optional(2, "timestamp2", Types.TimestampType.microsWithoutZone()), - optional(3, "timestamp3", Types.TimestampType.microsWithoutZone()), - optional(4, "timestamp4", Types.TimestampType.microsWithoutZone()), + required(1, "timestamp1", Types.TimestampType.withoutZone()), + optional(2, "timestamp2", Types.TimestampType.withoutZone()), + optional(3, "timestamp3", Types.TimestampType.withoutZone()), + optional(4, "timestamp4", Types.TimestampType.withoutZone()), optional(5, "date1", Types.DateType.get()), optional(6, "date2", Types.DateType.get()), optional(7, "date3", Types.DateType.get()), diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestResiduals.java b/api/src/test/java/org/apache/iceberg/transforms/TestResiduals.java index 87bdd6944a73..fa3436e5701b 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestResiduals.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestResiduals.java @@ -199,23 +199,19 @@ public void testIn() { public void testInTimestamp() { Schema schema = new Schema( - Types.NestedField.optional(50, "ts", Types.TimestampType.microsWithoutZone()), + Types.NestedField.optional(50, "ts", Types.TimestampType.withoutZone()), Types.NestedField.optional(51, "dateint", Types.IntegerType.get())); Long date20191201 = (Long) - Literal.of("2019-12-01T00:00:00.00000") - .to(Types.TimestampType.microsWithoutZone()) - .value(); + Literal.of("2019-12-01T00:00:00.00000").to(Types.TimestampType.withoutZone()).value(); Long date20191202 = (Long) - Literal.of("2019-12-02T00:00:00.00000") - .to(Types.TimestampType.microsWithoutZone()) - .value(); + Literal.of("2019-12-02T00:00:00.00000").to(Types.TimestampType.withoutZone()).value(); PartitionSpec spec = PartitionSpec.builderFor(schema).day("ts").build(); - Function day = Transforms.day().bind(Types.TimestampType.microsWithoutZone()); + Function day = Transforms.day().bind(Types.TimestampType.withoutZone()); Integer tsDay = day.apply(date20191201); Expression pred = in("ts", date20191201, date20191202); @@ -311,23 +307,19 @@ public void testNotNaN() { public void testNotInTimestamp() { Schema schema = new Schema( - Types.NestedField.optional(50, "ts", Types.TimestampType.microsWithoutZone()), + Types.NestedField.optional(50, "ts", Types.TimestampType.withoutZone()), Types.NestedField.optional(51, "dateint", Types.IntegerType.get())); Long date20191201 = (Long) - Literal.of("2019-12-01T00:00:00.00000") - .to(Types.TimestampType.microsWithoutZone()) - .value(); + Literal.of("2019-12-01T00:00:00.00000").to(Types.TimestampType.withoutZone()).value(); Long date20191202 = (Long) - Literal.of("2019-12-02T00:00:00.00000") - .to(Types.TimestampType.microsWithoutZone()) - .value(); + Literal.of("2019-12-02T00:00:00.00000").to(Types.TimestampType.withoutZone()).value(); PartitionSpec spec = PartitionSpec.builderFor(schema).day("ts").build(); - Function day = Transforms.day().bind(Types.TimestampType.microsWithoutZone()); + Function day = Transforms.day().bind(Types.TimestampType.withoutZone()); Integer tsDay = day.apply(date20191201); Expression pred = notIn("ts", date20191201, date20191202); diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java b/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java index dee5d8265b04..3c37e643eb95 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java @@ -29,7 +29,7 @@ public class TestTimestamps { @Test @SuppressWarnings("deprecation") public void testDeprecatedTimestampTransform() { - Types.TimestampType type = Types.TimestampType.microsWithoutZone(); + Types.TimestampType type = Types.TimestampType.withoutZone(); Literal ts = Literal.of("2017-12-01T10:12:55.038194").to(type); Literal pts = Literal.of("1970-01-01T00:00:01.000001").to(type); Literal nts = Literal.of("1969-12-31T23:59:58.999999").to(type); @@ -61,7 +61,7 @@ public void testDeprecatedTimestampTransform() { @Test public void testTimestampTransform() { - Types.TimestampType type = Types.TimestampType.microsWithoutZone(); + Types.TimestampType type = Types.TimestampType.withoutZone(); Literal ts = Literal.of("2017-12-01T10:12:55.038194").to(type); Literal pts = Literal.of("1970-01-01T00:00:01.000001").to(type); Literal nts = Literal.of("1969-12-31T23:59:58.999999").to(type); @@ -105,7 +105,7 @@ public void testTimestampTransform() { @Test public void testTimestampWithoutZoneToHumanString() { - Types.TimestampType type = Types.TimestampType.microsWithoutZone(); + Types.TimestampType type = Types.TimestampType.withoutZone(); Literal date = Literal.of("2017-12-01T10:12:55.038194").to(type); Transform year = Transforms.year(); @@ -125,7 +125,7 @@ public void testTimestampWithoutZoneToHumanString() { @Test public void testNegativeTimestampWithoutZoneToHumanString() { - Types.TimestampType type = Types.TimestampType.microsWithoutZone(); + Types.TimestampType type = Types.TimestampType.withoutZone(); Literal date = Literal.of("1969-12-30T10:12:55.038194").to(type); Transform year = Transforms.year(); @@ -145,7 +145,7 @@ public void testNegativeTimestampWithoutZoneToHumanString() { @Test public void testNegativeTimestampWithoutZoneToHumanStringLowerBound() { - Types.TimestampType type = Types.TimestampType.microsWithoutZone(); + Types.TimestampType type = Types.TimestampType.withoutZone(); Literal date = Literal.of("1969-12-30T00:00:00.000000").to(type); Transform year = Transforms.year(); @@ -165,7 +165,7 @@ public void testNegativeTimestampWithoutZoneToHumanStringLowerBound() { @Test public void testNegativeTimestampWithoutZoneToHumanStringUpperBound() { - Types.TimestampType type = Types.TimestampType.microsWithoutZone(); + Types.TimestampType type = Types.TimestampType.withoutZone(); Literal date = Literal.of("1969-12-31T23:59:59.999999").to(type); Transform year = Transforms.year(); @@ -185,7 +185,7 @@ public void testNegativeTimestampWithoutZoneToHumanStringUpperBound() { @Test public void testTimestampWithZoneToHumanString() { - Types.TimestampType type = Types.TimestampType.microsWithZone(); + Types.TimestampType type = Types.TimestampType.withZone(); Literal date = Literal.of("2017-12-01T10:12:55.038194-08:00").to(type); Transform year = Transforms.year(); @@ -206,7 +206,7 @@ public void testTimestampWithZoneToHumanString() { @Test public void testNullHumanString() { - Types.TimestampType type = Types.TimestampType.microsWithZone(); + Types.TimestampType type = Types.TimestampType.withZone(); assertThat(Transforms.year().toHumanString(type, null)) .as("Should produce \"null\" for null") .isEqualTo("null"); @@ -223,7 +223,7 @@ public void testNullHumanString() { @Test public void testTimestampsReturnType() { - Types.TimestampType type = Types.TimestampType.microsWithZone(); + Types.TimestampType type = Types.TimestampType.withZone(); Transform year = Transforms.year(); Type yearResultType = year.getResultType(type); diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestTimestampsProjection.java b/api/src/test/java/org/apache/iceberg/transforms/TestTimestampsProjection.java index dc0f199db132..cd20868a06eb 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestTimestampsProjection.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestTimestampsProjection.java @@ -44,7 +44,7 @@ import org.junit.jupiter.api.Test; public class TestTimestampsProjection { - private static final Types.TimestampType TYPE = Types.TimestampType.microsWithoutZone(); + private static final Types.TimestampType TYPE = Types.TimestampType.withoutZone(); private static final Schema SCHEMA = new Schema(optional(1, "timestamp", TYPE)); @SuppressWarnings("unchecked") diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestTransformSerialization.java b/api/src/test/java/org/apache/iceberg/transforms/TestTransformSerialization.java index 70b5a16e3bb3..c2330247fa9d 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestTransformSerialization.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestTransformSerialization.java @@ -39,8 +39,8 @@ public void testFunctionSerialization() throws Exception { Types.StringType.get(), Types.DateType.get(), Types.TimeType.get(), - Types.TimestampType.microsWithoutZone(), - Types.TimestampType.microsWithoutZone(), + Types.TimestampType.withoutZone(), + Types.TimestampType.withoutZone(), Types.BinaryType.get(), Types.FixedType.ofLength(4), Types.DecimalType.of(9, 4), diff --git a/api/src/test/java/org/apache/iceberg/types/TestComparators.java b/api/src/test/java/org/apache/iceberg/types/TestComparators.java index a04b039e5d65..89007480cb0f 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestComparators.java +++ b/api/src/test/java/org/apache/iceberg/types/TestComparators.java @@ -75,10 +75,10 @@ public void testTime() { @Test public void testTimestamp() { - assertComparesCorrectly(Comparators.forType(Types.TimestampType.microsWithoutZone()), 111, 222); - assertComparesCorrectly(Comparators.forType(Types.TimestampType.microsWithZone()), 111, 222); - assertComparesCorrectly(Comparators.forType(Types.TimestampType.nanosWithoutZone()), 111, 222); - assertComparesCorrectly(Comparators.forType(Types.TimestampType.nanosWithZone()), 111, 222); + assertComparesCorrectly(Comparators.forType(Types.TimestampType.withoutZone()), 111, 222); + assertComparesCorrectly(Comparators.forType(Types.TimestampType.withZone()), 111, 222); + assertComparesCorrectly(Comparators.forType(Types.TimestampNanoType.withoutZone()), 111, 222); + assertComparesCorrectly(Comparators.forType(Types.TimestampNanoType.withZone()), 111, 222); } @Test diff --git a/api/src/test/java/org/apache/iceberg/types/TestConversions.java b/api/src/test/java/org/apache/iceberg/types/TestConversions.java index 464f99652c60..00dc2f5df260 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestConversions.java +++ b/api/src/test/java/org/apache/iceberg/types/TestConversions.java @@ -37,6 +37,7 @@ import org.apache.iceberg.types.Types.LongType; import org.apache.iceberg.types.Types.StringType; import org.apache.iceberg.types.Types.TimeType; +import org.apache.iceberg.types.Types.TimestampNanoType; import org.apache.iceberg.types.Types.TimestampType; import org.apache.iceberg.types.Types.UUIDType; import org.junit.jupiter.api.Test; @@ -97,21 +98,19 @@ public void testByteBufferConversions() { // little-endian long // 400000L is 0...110|00011010|10000000 in binary // 10000000 -> -128, 00011010 -> 26, 00000110 -> 6, ... , 00000000 -> 0 - assertConversion( - 400000L, TimestampType.microsWithoutZone(), new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); - assertConversion( - 400000L, TimestampType.microsWithZone(), new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); - assertThat(Literal.of(400000L).to(TimestampType.microsWithoutZone()).toByteBuffer().array()) + assertConversion(400000L, TimestampType.withoutZone(), new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); + assertConversion(400000L, TimestampType.withZone(), new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); + assertThat(Literal.of(400000L).to(TimestampType.withoutZone()).toByteBuffer().array()) .isEqualTo(new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); - assertThat(Literal.of(400000L).to(TimestampType.microsWithZone()).toByteBuffer().array()) + assertThat(Literal.of(400000L).to(TimestampType.withZone()).toByteBuffer().array()) .isEqualTo(new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); assertConversion( - 400000L, TimestampType.nanosWithoutZone(), new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); + 400000L, TimestampNanoType.withoutZone(), new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); assertConversion( - 400000L, TimestampType.nanosWithZone(), new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); - assertThat(Literal.of(400000L).to(TimestampType.nanosWithoutZone()).toByteBuffer().array()) + 400000L, TimestampNanoType.withZone(), new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); + assertThat(Literal.of(400000L).to(TimestampNanoType.withoutZone()).toByteBuffer().array()) .isEqualTo(new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); - assertThat(Literal.of(400000L).to(TimestampType.nanosWithZone()).toByteBuffer().array()) + assertThat(Literal.of(400000L).to(TimestampNanoType.withZone()).toByteBuffer().array()) .isEqualTo(new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); // strings are stored as UTF-8 bytes (without length) diff --git a/api/src/test/java/org/apache/iceberg/types/TestReadabilityChecks.java b/api/src/test/java/org/apache/iceberg/types/TestReadabilityChecks.java index 56a011263995..2d02da5346a7 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestReadabilityChecks.java +++ b/api/src/test/java/org/apache/iceberg/types/TestReadabilityChecks.java @@ -37,10 +37,10 @@ public class TestReadabilityChecks { Types.DoubleType.get(), Types.DateType.get(), Types.TimeType.get(), - Types.TimestampType.microsWithoutZone(), - Types.TimestampType.microsWithZone(), - Types.TimestampType.nanosWithoutZone(), - Types.TimestampType.nanosWithZone(), + Types.TimestampType.withoutZone(), + Types.TimestampType.withZone(), + Types.TimestampNanoType.withoutZone(), + Types.TimestampNanoType.withZone(), Types.StringType.get(), Types.UUIDType.get(), Types.FixedType.ofLength(3), diff --git a/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java b/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java index 52cb95dcba03..96c330d6eb43 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java +++ b/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java @@ -39,10 +39,10 @@ public void testIdentityTypes() throws Exception { Types.DoubleType.get(), Types.DateType.get(), Types.TimeType.get(), - Types.TimestampType.microsWithoutZone(), - Types.TimestampType.microsWithZone(), - Types.TimestampType.nanosWithoutZone(), - Types.TimestampType.nanosWithZone(), + Types.TimestampType.withoutZone(), + Types.TimestampType.withZone(), + Types.TimestampNanoType.withoutZone(), + Types.TimestampNanoType.withZone(), Types.StringType.get(), Types.UUIDType.get(), Types.BinaryType.get() diff --git a/api/src/test/java/org/apache/iceberg/types/TestTypes.java b/api/src/test/java/org/apache/iceberg/types/TestTypes.java index 088c042de550..51201a5d6b21 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestTypes.java +++ b/api/src/test/java/org/apache/iceberg/types/TestTypes.java @@ -29,9 +29,9 @@ public void fromPrimitiveString() { Assertions.assertThat(Types.fromPrimitiveString("BooLean")).isSameAs(Types.BooleanType.get()); Assertions.assertThat(Types.fromPrimitiveString("timestamp")) - .isSameAs(Types.TimestampType.microsWithoutZone()); + .isSameAs(Types.TimestampType.withoutZone()); Assertions.assertThat(Types.fromPrimitiveString("timestamp_ns")) - .isSameAs(Types.TimestampType.nanosWithoutZone()); + .isSameAs(Types.TimestampNanoType.withoutZone()); Assertions.assertThat(Types.fromPrimitiveString("Fixed[ 3 ]")) .isEqualTo(Types.FixedType.ofLength(3)); From a114044e1444cc239f0b8cae21f84385ae22005c Mon Sep 17 00:00:00 2001 From: Eric Gillespie Date: Tue, 26 Mar 2024 10:55:20 -0500 Subject: [PATCH 03/38] Try to hew closer to the original satisfiesOrderOf logic. This is for https://github.com/apache/iceberg/pull/9008#discussion_r1520360025 "I'd prefer to keep the logic of this check inside of the satisfiesOrderOf function" It's interesting that Hours.java stands out from the rest in that it was not calling Timestamps.HOUR.satisfiesOrderOf before we got here and is still not calling Timestamps.HOUR_FROM_MICROS.satisfiesOrderOf. --- .../java/org/apache/iceberg/transforms/Days.java | 15 +++++++++------ .../java/org/apache/iceberg/transforms/Hours.java | 7 +------ .../org/apache/iceberg/transforms/Months.java | 10 +++++++++- .../java/org/apache/iceberg/transforms/Years.java | 11 ++++++++++- 4 files changed, 29 insertions(+), 14 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/transforms/Days.java b/api/src/main/java/org/apache/iceberg/transforms/Days.java index d351a7c387e4..88f684c76da7 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Days.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Days.java @@ -58,13 +58,16 @@ public boolean satisfiesOrderOf(Transform other) { } if (other instanceof Timestamps) { - // TODO(epg): "I'd prefer to keep the logic of this check inside of the satisfiesOrderOf - // function" - // https://github.com/apache/iceberg/pull/9008#discussion_r1520360025 ChronoUnit otherResultTypeUnit = ((Timestamps) other).getResultTypeUnit(); - return otherResultTypeUnit == ChronoUnit.DAYS - || otherResultTypeUnit == ChronoUnit.MONTHS - || otherResultTypeUnit == ChronoUnit.YEARS; + switch (otherResultTypeUnit) { + case MICROS: + return Timestamps.DAY_FROM_MICROS.satisfiesOrderOf(other); + case NANOS: + return Timestamps.DAY_FROM_NANOS.satisfiesOrderOf(other); + default: + throw new UnsupportedOperationException( + "Unsupported timestamp unit: " + otherResultTypeUnit); + } } else if (other instanceof Dates) { return Dates.DAY.satisfiesOrderOf(other); } else if (other instanceof Days || other instanceof Months || other instanceof Years) { diff --git a/api/src/main/java/org/apache/iceberg/transforms/Hours.java b/api/src/main/java/org/apache/iceberg/transforms/Hours.java index a5eaa01678a7..b198d0e94eac 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Hours.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Hours.java @@ -19,7 +19,6 @@ package org.apache.iceberg.transforms; import java.io.ObjectStreamException; -import java.time.temporal.ChronoUnit; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; @@ -61,11 +60,7 @@ public boolean satisfiesOrderOf(Transform other) { } if (other instanceof Timestamps) { - ChronoUnit otherResultTypeUnit = ((Timestamps) other).getResultTypeUnit(); - return otherResultTypeUnit == ChronoUnit.HOURS - || otherResultTypeUnit == ChronoUnit.DAYS - || otherResultTypeUnit == ChronoUnit.MONTHS - || otherResultTypeUnit == ChronoUnit.YEARS; + return other == Timestamps.HOUR_FROM_MICROS || other == Timestamps.HOUR_FROM_NANOS; } else if (other instanceof Hours || other instanceof Days || other instanceof Months diff --git a/api/src/main/java/org/apache/iceberg/transforms/Months.java b/api/src/main/java/org/apache/iceberg/transforms/Months.java index ddfcd42dd641..c2e045975de7 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Months.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Months.java @@ -59,7 +59,15 @@ public boolean satisfiesOrderOf(Transform other) { if (other instanceof Timestamps) { ChronoUnit otherResultTypeUnit = ((Timestamps) other).getResultTypeUnit(); - return otherResultTypeUnit == ChronoUnit.MONTHS || otherResultTypeUnit == ChronoUnit.YEARS; + switch (otherResultTypeUnit) { + case MICROS: + return Timestamps.MONTH_FROM_MICROS.satisfiesOrderOf(other); + case NANOS: + return Timestamps.MONTH_FROM_NANOS.satisfiesOrderOf(other); + default: + throw new UnsupportedOperationException( + "Unsupported timestamp unit: " + otherResultTypeUnit); + } } else if (other instanceof Dates) { return Dates.MONTH.satisfiesOrderOf(other); } else if (other instanceof Months || other instanceof Years) { diff --git a/api/src/main/java/org/apache/iceberg/transforms/Years.java b/api/src/main/java/org/apache/iceberg/transforms/Years.java index 8eb43cd55c57..009686d6d4fa 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Years.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Years.java @@ -58,7 +58,16 @@ public boolean satisfiesOrderOf(Transform other) { } if (other instanceof Timestamps) { - return ((Timestamps) other).getResultTypeUnit() == ChronoUnit.YEARS; + ChronoUnit otherResultTypeUnit = ((Timestamps) other).getResultTypeUnit(); + switch (otherResultTypeUnit) { + case MICROS: + return Timestamps.YEAR_FROM_MICROS.satisfiesOrderOf(other); + case NANOS: + return Timestamps.YEAR_FROM_NANOS.satisfiesOrderOf(other); + default: + throw new UnsupportedOperationException( + "Unsupported timestamp unit: " + otherResultTypeUnit); + } } else if (other instanceof Dates) { return Dates.YEAR.satisfiesOrderOf(other); } else if (other instanceof Years) { From 0ebdf9f75b5e7187fec2ff3e54ced6d4438bae05 Mon Sep 17 00:00:00 2001 From: Eric Gillespie Date: Tue, 26 Mar 2024 10:55:36 -0500 Subject: [PATCH 04/38] Test that Avro produces a value within 1 micro --- .../TestStringLiteralConversions.java | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestStringLiteralConversions.java b/api/src/test/java/org/apache/iceberg/expressions/TestStringLiteralConversions.java index 914776073c4d..b5949b0a144b 100644 --- a/api/src/test/java/org/apache/iceberg/expressions/TestStringLiteralConversions.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestStringLiteralConversions.java @@ -154,13 +154,24 @@ public void testStringToTimestampLiteralWithMicrosecondPrecisionFromNanoseconds( @Test public void testStringToTimestampLiteralWithNanosecondPrecisionFromNanoseconds() { - // TODO(epg): "Test that Avro produces a value within 1 micro of this" ??? - // https://github.com/apache/iceberg/pull/9008#discussion_r1520435413 - // Not using Avro's timestamp conversion as it has no timestampNanos(). - long expected = 1503066061123456789L; - Literal timestampStr = Literal.of("2017-08-18T14:21:01.123456789"); Literal timestamp = timestampStr.to(Types.TimestampNanoType.withoutZone()); + + // use Avro's timestamp conversion to validate the result within one microsecond + Schema avroSchema = LogicalTypes.timestampMicros().addToSchema(Schema.create(Schema.Type.LONG)); + TimeConversions.TimestampMicrosConversion avroConversion = + new TimeConversions.TimestampMicrosConversion(); + long avroValue = + avroConversion.toLong( + LocalDateTime.of(2017, 8, 18, 14, 21, 1, 123456000).toInstant(ZoneOffset.UTC), + avroSchema, + avroSchema.getLogicalType()); + assertThat(timestamp.value() - avroValue * 1000) + .as("Timestamp without zone should match UTC") + .isLessThan(1000); + + // Not only using Avro's timestamp conversion as it has no timestampNanos(). + long expected = 1503066061123456789L; assertThat((long) timestamp.value()) .as("Timestamp without zone should match UTC") .isEqualTo(expected); From bea34877d190a924253cdc81092f3d9571e8cd76 Mon Sep 17 00:00:00 2001 From: Eric Gillespie Date: Thu, 11 Apr 2024 11:20:55 -0500 Subject: [PATCH 05/38] address most comments --- .../iceberg/expressions/ExpressionUtil.java | 13 +- .../apache/iceberg/expressions/Literals.java | 5 +- .../org/apache/iceberg/transforms/Days.java | 3 +- .../org/apache/iceberg/transforms/Months.java | 3 +- .../iceberg/transforms/SortOrderVisitor.java | 5 +- .../apache/iceberg/transforms/Timestamps.java | 125 +++++++++--------- .../iceberg/transforms/TransformUtil.java | 4 +- .../org/apache/iceberg/transforms/Years.java | 3 +- .../org/apache/iceberg/util/DateTimeUtil.java | 5 + .../TestMiscLiteralConversions.java | 16 +-- 10 files changed, 94 insertions(+), 88 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java b/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java index 57d306d98813..7b455274081c 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java +++ b/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java @@ -575,7 +575,6 @@ private static String sanitizeDate(int days, int today) { return "(date)"; } - // TODO(epg): `now` is millisecond resolution; shouldn't this be too? private static String sanitizeTimestamp(long micros, long now) { String isPast = now > micros ? "ago" : "from-now"; long diff = Math.abs(now - micros); @@ -606,18 +605,18 @@ private static String sanitizeString(CharSequence value, long now, int today) { if (DATE.matcher(value).matches()) { Literal date = Literal.of(value).to(Types.DateType.get()); return sanitizeDate(date.value(), today); + } else if (TIMESTAMPNS.matcher(value).matches()) { + Literal ts = Literal.of(value).to(Types.TimestampNanoType.withoutZone()); + return sanitizeTimestamp(Math.floorDiv(ts.value(), 1000), now); + } else if (TIMESTAMPTZNS.matcher(value).matches()) { + Literal ts = Literal.of(value).to(Types.TimestampNanoType.withZone()); + return sanitizeTimestamp(Math.floorDiv(ts.value(), 1000), now); } else if (TIMESTAMP.matcher(value).matches()) { Literal ts = Literal.of(value).to(Types.TimestampType.withoutZone()); return sanitizeTimestamp(ts.value(), now); } else if (TIMESTAMPTZ.matcher(value).matches()) { Literal ts = Literal.of(value).to(Types.TimestampType.withZone()); return sanitizeTimestamp(ts.value(), now); - } else if (TIMESTAMPNS.matcher(value).matches()) { - Literal ts = Literal.of(value).to(Types.TimestampNanoType.withoutZone()); - return sanitizeTimestamp(ts.value() / 1000, now); - } else if (TIMESTAMPTZNS.matcher(value).matches()) { - Literal ts = Literal.of(value).to(Types.TimestampNanoType.withZone()); - return sanitizeTimestamp(ts.value() / 1000, now); } else if (TIME.matcher(value).matches()) { return "(time)"; } else { diff --git a/api/src/main/java/org/apache/iceberg/expressions/Literals.java b/api/src/main/java/org/apache/iceberg/expressions/Literals.java index 600484a0be3e..35232a0591cd 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/Literals.java +++ b/api/src/main/java/org/apache/iceberg/expressions/Literals.java @@ -550,10 +550,7 @@ public Literal to(Type type) { case TIMESTAMP_NANO: if (((Types.TimestampNanoType) type).shouldAdjustToUTC()) { - long timestampNanos = - ChronoUnit.NANOS.between( - EPOCH, OffsetDateTime.parse(value(), DateTimeFormatter.ISO_DATE_TIME)); - return (Literal) new TimestampNanoLiteral(timestampNanos); + return (Literal) new TimestampNanoLiteral(DateTimeUtil.isoTimestampToNanos(value())); } else { long timestampNanos = ChronoUnit.NANOS.between( diff --git a/api/src/main/java/org/apache/iceberg/transforms/Days.java b/api/src/main/java/org/apache/iceberg/transforms/Days.java index 88f684c76da7..e8aae2d18262 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Days.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Days.java @@ -19,7 +19,6 @@ package org.apache.iceberg.transforms; import java.io.ObjectStreamException; -import java.time.temporal.ChronoUnit; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; @@ -58,7 +57,7 @@ public boolean satisfiesOrderOf(Transform other) { } if (other instanceof Timestamps) { - ChronoUnit otherResultTypeUnit = ((Timestamps) other).getResultTypeUnit(); + Timestamps.ResultTypeUnit otherResultTypeUnit = ((Timestamps) other).resultTypeUnit(); switch (otherResultTypeUnit) { case MICROS: return Timestamps.DAY_FROM_MICROS.satisfiesOrderOf(other); diff --git a/api/src/main/java/org/apache/iceberg/transforms/Months.java b/api/src/main/java/org/apache/iceberg/transforms/Months.java index c2e045975de7..766b2874c16b 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Months.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Months.java @@ -19,7 +19,6 @@ package org.apache.iceberg.transforms; import java.io.ObjectStreamException; -import java.time.temporal.ChronoUnit; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; @@ -58,7 +57,7 @@ public boolean satisfiesOrderOf(Transform other) { } if (other instanceof Timestamps) { - ChronoUnit otherResultTypeUnit = ((Timestamps) other).getResultTypeUnit(); + Timestamps.ResultTypeUnit otherResultTypeUnit = ((Timestamps) other).resultTypeUnit(); switch (otherResultTypeUnit) { case MICROS: return Timestamps.MONTH_FROM_MICROS.satisfiesOrderOf(other); diff --git a/api/src/main/java/org/apache/iceberg/transforms/SortOrderVisitor.java b/api/src/main/java/org/apache/iceberg/transforms/SortOrderVisitor.java index 20e757d1a5a2..e52eab053eed 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/SortOrderVisitor.java +++ b/api/src/main/java/org/apache/iceberg/transforms/SortOrderVisitor.java @@ -84,7 +84,10 @@ static List visit(SortOrder sortOrder, SortOrderVisitor visitor) { results.add( visitor.truncate( sourceName, field.sourceId(), width, field.direction(), field.nullOrder())); - } else if ("year".equalsIgnoreCase(transform.toString())) { + } else if (transform == Dates.YEAR + || transform == Timestamps.YEAR_FROM_MICROS + || transform == Timestamps.YEAR_FROM_NANOS + || transform instanceof Years) { results.add( visitor.year(sourceName, field.sourceId(), field.direction(), field.nullOrder())); } else if ("month".equalsIgnoreCase(transform.toString())) { diff --git a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java index 642a67dae9ee..8b5e01bb47c4 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java @@ -18,7 +18,10 @@ */ package org.apache.iceberg.transforms; +import static org.apache.iceberg.types.Type.TypeID.TIMESTAMP; + import com.google.errorprone.annotations.Immutable; +import java.time.Duration; import java.time.temporal.ChronoUnit; import java.util.Locale; import org.apache.iceberg.expressions.BoundPredicate; @@ -34,14 +37,22 @@ class Timestamps implements Transform { - static final Timestamps YEAR_FROM_MICROS = new Timestamps(ChronoUnit.MICROS, ChronoUnit.YEARS); - static final Timestamps MONTH_FROM_MICROS = new Timestamps(ChronoUnit.MICROS, ChronoUnit.MONTHS); - static final Timestamps DAY_FROM_MICROS = new Timestamps(ChronoUnit.MICROS, ChronoUnit.DAYS); - static final Timestamps HOUR_FROM_MICROS = new Timestamps(ChronoUnit.MICROS, ChronoUnit.HOURS); - static final Timestamps YEAR_FROM_NANOS = new Timestamps(ChronoUnit.NANOS, ChronoUnit.YEARS); - static final Timestamps MONTH_FROM_NANOS = new Timestamps(ChronoUnit.NANOS, ChronoUnit.MONTHS); - static final Timestamps DAY_FROM_NANOS = new Timestamps(ChronoUnit.NANOS, ChronoUnit.DAYS); - static final Timestamps HOUR_FROM_NANOS = new Timestamps(ChronoUnit.NANOS, ChronoUnit.HOURS); + static final Timestamps YEAR_FROM_MICROS = + new Timestamps(ChronoUnit.MICROS, ResultTypeUnit.YEARS, "year"); + static final Timestamps MONTH_FROM_MICROS = + new Timestamps(ChronoUnit.MICROS, ResultTypeUnit.MONTHS, "month"); + static final Timestamps DAY_FROM_MICROS = + new Timestamps(ChronoUnit.MICROS, ResultTypeUnit.DAYS, "day"); + static final Timestamps HOUR_FROM_MICROS = + new Timestamps(ChronoUnit.MICROS, ResultTypeUnit.HOURS, "hour"); + static final Timestamps YEAR_FROM_NANOS = + new Timestamps(ChronoUnit.NANOS, ResultTypeUnit.YEARS, "year"); + static final Timestamps MONTH_FROM_NANOS = + new Timestamps(ChronoUnit.NANOS, ResultTypeUnit.MONTHS, "month"); + static final Timestamps DAY_FROM_NANOS = + new Timestamps(ChronoUnit.NANOS, ResultTypeUnit.DAYS, "day"); + static final Timestamps HOUR_FROM_NANOS = + new Timestamps(ChronoUnit.NANOS, ResultTypeUnit.HOURS, "hour"); static Timestamps get(Types.TimestampType type, String resultTypeUnit) { switch (resultTypeUnit.toLowerCase(Locale.ENGLISH)) { @@ -60,46 +71,50 @@ static Timestamps get(Types.TimestampType type, String resultTypeUnit) { } static Timestamps get(Types.TimestampType type, ChronoUnit resultTypeUnit) { - switch (type.typeId()) { - case TIMESTAMP: - switch (resultTypeUnit) { - case YEARS: - return YEAR_FROM_MICROS; - case MONTHS: - return MONTH_FROM_MICROS; - case DAYS: - return DAY_FROM_MICROS; - case HOURS: - return HOUR_FROM_MICROS; - } - break; - case TIMESTAMP_NANO: - switch (resultTypeUnit) { - case YEARS: - return YEAR_FROM_NANOS; - case MONTHS: - return MONTH_FROM_NANOS; - case DAYS: - return DAY_FROM_NANOS; - case HOURS: - return HOUR_FROM_NANOS; - } - break; + if (type.typeId() != TIMESTAMP) { + throw new UnsupportedOperationException("Unsupported timestamp unit: " + type); + } + switch (resultTypeUnit) { + case YEARS: + return YEAR_FROM_MICROS; + case MONTHS: + return MONTH_FROM_MICROS; + case DAYS: + return DAY_FROM_MICROS; + case HOURS: + return HOUR_FROM_MICROS; default: - // `type` is out of range. - throw new UnsupportedOperationException("Unsupported timestamp unit: " + type); + throw new IllegalArgumentException( + "Unsupported source/result type units: " + type + "->" + resultTypeUnit); + } + } + + enum ResultTypeUnit { + YEARS(ChronoUnit.YEARS), + MONTHS(ChronoUnit.MONTHS), + DAYS(ChronoUnit.DAYS), + HOURS(ChronoUnit.HOURS), + MICROS(ChronoUnit.MICROS), + NANOS(ChronoUnit.NANOS), + ; + + private final ChronoUnit unit; + + ResultTypeUnit(final ChronoUnit unit) { + this.unit = unit; + } + + Duration getDuration() { + return unit.getDuration(); } - // `resultTypeUnit` is out of range. - throw new IllegalArgumentException( - "Unsupported source/result type units: " + type + "->" + resultTypeUnit); } @Immutable static class Apply implements SerializableFunction { private final ChronoUnit sourceTypeUnit; - private final ChronoUnit resultTypeUnit; + private final ResultTypeUnit resultTypeUnit; - Apply(ChronoUnit sourceTypeUnit, ChronoUnit resultTypeUnit) { + Apply(ChronoUnit sourceTypeUnit, ResultTypeUnit resultTypeUnit) { this.sourceTypeUnit = sourceTypeUnit; this.resultTypeUnit = resultTypeUnit; } @@ -126,7 +141,7 @@ public Integer apply(Long timestampUnits) { "Unsupported result type unit: " + resultTypeUnit); } case NANOS: - return DateTimeUtil.convertNanos(timestampUnits, resultTypeUnit); + return DateTimeUtil.convertNanos(timestampUnits, resultTypeUnit.unit); default: throw new UnsupportedOperationException( "Unsupported source type unit: " + sourceTypeUnit); @@ -134,9 +149,11 @@ public Integer apply(Long timestampUnits) { } } + private final String name; private final Apply apply; - Timestamps(ChronoUnit sourceTypeUnit, ChronoUnit resultTypeUnit) { + Timestamps(ChronoUnit sourceTypeUnit, ResultTypeUnit resultTypeUnit, String name) { + this.name = name; this.apply = new Apply(sourceTypeUnit, resultTypeUnit); } @@ -153,18 +170,18 @@ public SerializableFunction bind(Type type) { @Override public boolean canTransform(Type type) { - return type.typeId() == Type.TypeID.TIMESTAMP || type.typeId() == Type.TypeID.TIMESTAMP_NANO; + return type.typeId() == TIMESTAMP || type.typeId() == Type.TypeID.TIMESTAMP_NANO; } @Override public Type getResultType(Type sourceType) { - if (apply.resultTypeUnit == ChronoUnit.DAYS) { + if (apply.resultTypeUnit == ResultTypeUnit.DAYS) { return Types.DateType.get(); } return Types.IntegerType.get(); } - public ChronoUnit getResultTypeUnit() { + ResultTypeUnit resultTypeUnit() { return apply.resultTypeUnit; } @@ -180,8 +197,8 @@ public boolean satisfiesOrderOf(Transform other) { } if (other instanceof Timestamps) { - // test the granularity, in hours. hour(ts) => 1 hour, day(ts) => 24 hours, and - // hour satisfies the order of day + // test the granularity, in hours. hour(ts) => 1 hour, day(ts) => 24 hours, and hour satisfies + // the order of day Timestamps otherTransform = (Timestamps) other; return apply.resultTypeUnit.getDuration().toHours() <= otherTransform.apply.resultTypeUnit.getDuration().toHours(); @@ -258,19 +275,7 @@ public String toHumanString(Type outputType, Integer value) { @Override public String toString() { - switch (apply.resultTypeUnit) { - case YEARS: - return "year"; - case MONTHS: - return "month"; - case DAYS: - return "day"; - case HOURS: - return "hour"; - default: - throw new UnsupportedOperationException( - "Unsupported result time unit: " + apply.resultTypeUnit); - } + return name; } @Override diff --git a/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java b/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java index c70a926d58db..4c6f9118dd37 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java +++ b/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java @@ -62,8 +62,8 @@ static String humanTimestampWithoutZone(Long timestampMicros) { return ChronoUnit.MICROS.addTo(EPOCH, timestampMicros).toLocalDateTime().toString(); } - static String humanTimestampNanoWithZone(Long timestampMicros) { - return ChronoUnit.NANOS.addTo(EPOCH, timestampMicros).toString(); + static String humanTimestampNanoWithZone(Long timestampNanos) { + return ChronoUnit.NANOS.addTo(EPOCH, timestampNanos).toString(); } static String humanTimestampNanoWithoutZone(Long timestampMicros) { diff --git a/api/src/main/java/org/apache/iceberg/transforms/Years.java b/api/src/main/java/org/apache/iceberg/transforms/Years.java index 009686d6d4fa..8b6ff318444a 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Years.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Years.java @@ -19,7 +19,6 @@ package org.apache.iceberg.transforms; import java.io.ObjectStreamException; -import java.time.temporal.ChronoUnit; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; @@ -58,7 +57,7 @@ public boolean satisfiesOrderOf(Transform other) { } if (other instanceof Timestamps) { - ChronoUnit otherResultTypeUnit = ((Timestamps) other).getResultTypeUnit(); + Timestamps.ResultTypeUnit otherResultTypeUnit = ((Timestamps) other).resultTypeUnit(); switch (otherResultTypeUnit) { case MICROS: return Timestamps.YEAR_FROM_MICROS.satisfiesOrderOf(other); diff --git a/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java b/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java index 08116311a9ce..ab4aa33a89d1 100644 --- a/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java +++ b/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java @@ -128,6 +128,11 @@ public static long isoTimestamptzToMicros(String timestampString) { OffsetDateTime.parse(timestampString, DateTimeFormatter.ISO_DATE_TIME)); } + public static long isoTimestampToNanos(CharSequence timestamp) { + return ChronoUnit.NANOS.between( + EPOCH, OffsetDateTime.parse(timestamp, DateTimeFormatter.ISO_DATE_TIME)); + } + public static boolean isUTCTimestamptz(String timestampString) { OffsetDateTime offsetDateTime = OffsetDateTime.parse(timestampString, DateTimeFormatter.ISO_DATE_TIME); diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestMiscLiteralConversions.java b/api/src/test/java/org/apache/iceberg/expressions/TestMiscLiteralConversions.java index 4a6f550eb9ce..e2611ddb281f 100644 --- a/api/src/test/java/org/apache/iceberg/expressions/TestMiscLiteralConversions.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestMiscLiteralConversions.java @@ -73,7 +73,7 @@ public void testTimestampWithMicrosecondsToDate() { } @Test - public void testTimestampWithNanoosecondsToDate() { + public void testTimestampWithNanosecondsToDate() { final Literal nanos = Literal.of("2017-08-18T14:21:01.919432755").to(Types.TimestampNanoType.withoutZone()); final Literal dateOfNanos = nanos.to(Types.DateType.get()); @@ -117,8 +117,8 @@ public void testInvalidBooleanConversions() { Types.DoubleType.get(), Types.DateType.get(), Types.TimeType.get(), - Types.TimestampType.withoutZone(), Types.TimestampType.withZone(), + Types.TimestampType.withoutZone(), Types.TimestampNanoType.withoutZone(), Types.TimestampNanoType.withZone(), Types.DecimalType.of(9, 2), @@ -134,8 +134,8 @@ public void testInvalidIntegerConversions() { Literal.of(34), Types.BooleanType.get(), Types.TimeType.get(), - Types.TimestampType.withoutZone(), Types.TimestampType.withZone(), + Types.TimestampType.withoutZone(), Types.TimestampNanoType.withoutZone(), Types.TimestampNanoType.withZone(), Types.StringType.get(), @@ -164,8 +164,8 @@ public void testInvalidFloatConversions() { Types.LongType.get(), Types.DateType.get(), Types.TimeType.get(), - Types.TimestampType.withoutZone(), Types.TimestampType.withZone(), + Types.TimestampType.withoutZone(), Types.TimestampNanoType.withoutZone(), Types.TimestampNanoType.withZone(), Types.StringType.get(), @@ -183,8 +183,8 @@ public void testInvalidDoubleConversions() { Types.LongType.get(), Types.DateType.get(), Types.TimeType.get(), - Types.TimestampType.withoutZone(), Types.TimestampType.withZone(), + Types.TimestampType.withoutZone(), Types.TimestampNanoType.withoutZone(), Types.TimestampNanoType.withZone(), Types.StringType.get(), @@ -203,8 +203,8 @@ public void testInvalidDateConversions() { Types.FloatType.get(), Types.DoubleType.get(), Types.TimeType.get(), - Types.TimestampType.withoutZone(), Types.TimestampType.withZone(), + Types.TimestampType.withoutZone(), Types.TimestampNanoType.withoutZone(), Types.TimestampNanoType.withZone(), Types.DecimalType.of(9, 4), @@ -224,8 +224,8 @@ public void testInvalidTimeConversions() { Types.FloatType.get(), Types.DoubleType.get(), Types.DateType.get(), - Types.TimestampType.withoutZone(), Types.TimestampType.withZone(), + Types.TimestampType.withoutZone(), Types.TimestampNanoType.withoutZone(), Types.TimestampNanoType.withZone(), Types.DecimalType.of(9, 4), @@ -358,8 +358,8 @@ public void testInvalidBinaryConversions() { Types.DoubleType.get(), Types.DateType.get(), Types.TimeType.get(), - Types.TimestampType.withoutZone(), Types.TimestampType.withZone(), + Types.TimestampType.withoutZone(), Types.TimestampNanoType.withoutZone(), Types.TimestampNanoType.withZone(), Types.DecimalType.of(9, 2), From 753aed569184890156f826347fbbf6eab843cf1b Mon Sep 17 00:00:00 2001 From: Eric Gillespie Date: Fri, 12 Apr 2024 15:10:59 -0500 Subject: [PATCH 06/38] Bucket timestamp and timestamp_ns the same. --- .../org/apache/iceberg/transforms/Bucket.java | 17 ++++++- .../iceberg/transforms/TestBucketing.java | 51 +++++++++++++++++++ format/spec.md | 4 +- 3 files changed, 69 insertions(+), 3 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/transforms/Bucket.java b/api/src/main/java/org/apache/iceberg/transforms/Bucket.java index e91a8c6cc231..7e1a089f5b51 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Bucket.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Bucket.java @@ -54,7 +54,6 @@ static & SerializableFunction> B get( return (B) new BucketInteger(numBuckets); case TIME: case TIMESTAMP: - case TIMESTAMP_NANO: case LONG: return (B) new BucketLong(numBuckets); case DECIMAL: @@ -64,6 +63,8 @@ static & SerializableFunction> B get( case FIXED: case BINARY: return (B) new BucketByteBuffer(numBuckets); + case TIMESTAMP_NANO: + return (B) new BucketTimestampNano(numBuckets); case UUID: return (B) new BucketUUID(numBuckets); default: @@ -216,6 +217,20 @@ protected int hash(Long value) { } } + // In order to bucket TimestampNano the same as Timestamp, we divide these values by 1000. + private static class BucketTimestampNano extends Bucket + implements SerializableFunction { + + private BucketTimestampNano(int numBuckets) { + super(numBuckets); + } + + @Override + protected int hash(Long value) { + return BucketUtil.hash(Math.floorDiv(value, 1000)); + } + } + private static class BucketString extends Bucket implements SerializableFunction { diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java b/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java index 0313a8fe6098..a5e26b0b3f7e 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java @@ -212,6 +212,57 @@ public void testLong() { .isEqualTo(hashBytes(buffer.array())); } + @Test + public void testTimestampNanoPromotion() { + // Values from spec Appendix B: 32-bit Hash Requirements + String timestamp1 = "2017-11-16T22:31:08"; + long expectedHash1 = -2047944441; + String timestamp2 = "2017-11-16T22:31:08.000001"; + long expectedHash2 = -1207196810; + String timestampTz1 = "2017-11-16T14:31:08-08:00"; + String timestampTz2 = "2017-11-16T14:31:08.000001-08:00"; + String timestampNs1 = "2017-11-16T22:31:08"; + String timestampNs2 = "2017-11-16T22:31:08.000001001"; + String timestampTzNs1 = "2017-11-16T14:31:08-08:00"; + String timestampTzNs2 = "2017-11-16T14:31:08.000001001-08:00"; + + Types.TimestampType tsType = Types.TimestampType.withoutZone(); + Types.TimestampType tsTzType = Types.TimestampType.withZone(); + Types.TimestampNanoType tsNsType = Types.TimestampNanoType.withoutZone(); + Types.TimestampNanoType tsTzNsType = Types.TimestampNanoType.withZone(); + + Bucket tsNsBucket = Bucket.get(tsNsType, 1); + Bucket tsBucket = Bucket.get(tsType, 1); + Bucket tsTzNsBucket = Bucket.get(tsTzNsType, 1); + Bucket tsTzBucket = Bucket.get(tsTzType, 1); + + assertThat(tsBucket.hash(Literal.of(timestamp1).to(tsType).value())) + .as("Timestamp and TimestampNano bucket results should match") + .isEqualTo(expectedHash1); + assertThat(tsTzBucket.hash(Literal.of(timestampTz1).to(tsTzType).value())) + .as("Timestamp and TimestampNano bucket results should match") + .isEqualTo(expectedHash1); + assertThat(tsNsBucket.hash(Literal.of(timestampNs1).to(tsNsType).value())) + .as("Timestamp and TimestampNano bucket results should match") + .isEqualTo(expectedHash1); + assertThat(tsTzNsBucket.hash(Literal.of(timestampTzNs1).to(tsTzNsType).value())) + .as("Timestamp and TimestampNano bucket results should match") + .isEqualTo(expectedHash1); + + assertThat(tsBucket.hash(Literal.of(timestamp2).to(tsType).value())) + .as("Timestamp and TimestampNano bucket results should match") + .isEqualTo(expectedHash2); + assertThat(tsTzBucket.hash(Literal.of(timestampTz2).to(tsTzType).value())) + .as("Timestamp and TimestampNano bucket results should match") + .isEqualTo(expectedHash2); + assertThat(tsNsBucket.hash(Literal.of(timestampNs2).to(tsNsType).value())) + .as("Timestamp and TimestampNano bucket results should match") + .isEqualTo(expectedHash2); + assertThat(tsTzNsBucket.hash(Literal.of(timestampTzNs2).to(tsTzNsType).value())) + .as("Timestamp and TimestampNano bucket results should match") + .isEqualTo(expectedHash2); + } + @Test public void testIntegerTypePromotion() { int randomInt = testRandom.nextInt(); diff --git a/format/spec.md b/format/spec.md index bc655c49dc57..4a14b4d80914 100644 --- a/format/spec.md +++ b/format/spec.md @@ -1045,8 +1045,8 @@ The 32-bit hash implementation is 32-bit Murmur3 hash, x86 variant, seeded with | **`time`** | `hashLong(microsecsFromMidnight(v))` | `22:31:08` → `-662762989` | | **`timestamp`** | `hashLong(microsecsFromUnixEpoch(v))` | `2017-11-16T22:31:08` → `-2047944441`
`2017-11-16T22:31:08.000001` → `-1207196810` | | **`timestamptz`** | `hashLong(microsecsFromUnixEpoch(v))` | `2017-11-16T14:31:08-08:00` → `-2047944441`
`2017-11-16T14:31:08.000001-08:00` → `-1207196810` | -| **`timestamp_ns`** | `hashLong(nanosecsFromUnixEpoch(v))` | `2017-11-16T22:31:08` → `-737750069`
`2017-11-16T22:31:08.000001` → `-976603392`
`2017-11-16T22:31:08.000000001` → `-160215926` | -| **`timestamptz_ns`** | `hashLong(nanosecsFromUnixEpoch(v))` | `2017-11-16T14:31:08-08:00` → `-737750069`
`2017-11-16T14:31:08.000001-08:00` → `-976603392`
`2017-11-16T14:31:08.000000001-08:00` → `-160215926` | +| **`timestamp_ns`** | `hashLong(nanosecsFromUnixEpoch(v))` | `2017-11-16T22:31:08` → `-2047944441`
`2017-11-16T22:31:08.000001001` → `-1207196810` | +| **`timestamptz_ns`** | `hashLong(nanosecsFromUnixEpoch(v))` | `2017-11-16T14:31:08-08:00` → `-2047944441`
`2017-11-16T14:31:08.000001001-08:00` → `-1207196810` | | **`string`** | `hashBytes(utf8Bytes(v))` | `iceberg` → `1210000089` | | **`uuid`** | `hashBytes(uuidBytes(v))` [3] | `f79c3e09-677c-4bbd-a479-3f349cb785e7` → `1488055340` | | **`fixed(L)`** | `hashBytes(v)` | `00 01 02 03` → `-188683207` | From 19605d653e138f034699cd7cdbf6ddfa27f361d4 Mon Sep 17 00:00:00 2001 From: Eric Gillespie Date: Thu, 25 Apr 2024 12:02:34 -0500 Subject: [PATCH 07/38] Fix bug caught by TestPartitionSpecParser#testTransforms . I'd missed Timestamps#get overloads for Types.TimestampNanoType . --- .../apache/iceberg/transforms/Timestamps.java | 41 +++++++++++++++++-- .../apache/iceberg/transforms/Transforms.java | 3 +- 2 files changed, 39 insertions(+), 5 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java index 8b5e01bb47c4..954b6e40cbc8 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java @@ -18,8 +18,6 @@ */ package org.apache.iceberg.transforms; -import static org.apache.iceberg.types.Type.TypeID.TIMESTAMP; - import com.google.errorprone.annotations.Immutable; import java.time.Duration; import java.time.temporal.ChronoUnit; @@ -70,8 +68,24 @@ static Timestamps get(Types.TimestampType type, String resultTypeUnit) { } } + static Timestamps get(Types.TimestampNanoType type, String resultTypeUnit) { + switch (resultTypeUnit.toLowerCase(Locale.ENGLISH)) { + case "year": + return get(type, ChronoUnit.YEARS); + case "month": + return get(type, ChronoUnit.MONTHS); + case "day": + return get(type, ChronoUnit.DAYS); + case "hour": + return get(type, ChronoUnit.HOURS); + default: + throw new IllegalArgumentException( + "Unsupported source/result type units: " + type + "->" + resultTypeUnit); + } + } + static Timestamps get(Types.TimestampType type, ChronoUnit resultTypeUnit) { - if (type.typeId() != TIMESTAMP) { + if (type.typeId() != Type.TypeID.TIMESTAMP) { throw new UnsupportedOperationException("Unsupported timestamp unit: " + type); } switch (resultTypeUnit) { @@ -89,6 +103,25 @@ static Timestamps get(Types.TimestampType type, ChronoUnit resultTypeUnit) { } } + static Timestamps get(Types.TimestampNanoType type, ChronoUnit resultTypeUnit) { + if (type.typeId() != Type.TypeID.TIMESTAMP_NANO) { + throw new UnsupportedOperationException("Unsupported timestamp unit: " + type); + } + switch (resultTypeUnit) { + case YEARS: + return YEAR_FROM_NANOS; + case MONTHS: + return MONTH_FROM_NANOS; + case DAYS: + return DAY_FROM_NANOS; + case HOURS: + return HOUR_FROM_NANOS; + default: + throw new IllegalArgumentException( + "Unsupported source/result type units: " + type + "->" + resultTypeUnit); + } + } + enum ResultTypeUnit { YEARS(ChronoUnit.YEARS), MONTHS(ChronoUnit.MONTHS), @@ -170,7 +203,7 @@ public SerializableFunction bind(Type type) { @Override public boolean canTransform(Type type) { - return type.typeId() == TIMESTAMP || type.typeId() == Type.TypeID.TIMESTAMP_NANO; + return type.typeId() == Type.TypeID.TIMESTAMP || type.typeId() == Type.TypeID.TIMESTAMP_NANO; } @Override diff --git a/api/src/main/java/org/apache/iceberg/transforms/Transforms.java b/api/src/main/java/org/apache/iceberg/transforms/Transforms.java index e83759c0b0f5..8bd623e5d2ec 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Transforms.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Transforms.java @@ -88,8 +88,9 @@ private Transforms() {} try { switch (type.typeId()) { case TIMESTAMP: - case TIMESTAMP_NANO: return Timestamps.get((Types.TimestampType) type, transform); + case TIMESTAMP_NANO: + return Timestamps.get((Types.TimestampNanoType) type, transform); case DATE: return Dates.valueOf(transform.toUpperCase(Locale.ENGLISH)); } From d0b46279ff3902f14e7b16d72f2a2ff362d78394 Mon Sep 17 00:00:00 2001 From: Eric Gillespie Date: Wed, 5 Jun 2024 16:22:06 -0500 Subject: [PATCH 08/38] address review comments --- .../iceberg/transforms/SortOrderVisitor.java | 14 +++-- .../apache/iceberg/transforms/Timestamps.java | 10 ++-- .../apache/iceberg/transforms/Transforms.java | 4 +- .../org/apache/iceberg/util/DateTimeUtil.java | 6 +-- .../apache/iceberg/transforms/TestDays.java | 34 ++++++++++++ .../apache/iceberg/transforms/TestHours.java | 36 +++++++++++++ .../iceberg/transforms/TestTimestamps.java | 52 +++++++++++++++++++ .../apache/iceberg/transforms/TestYears.java | 34 ++++++++++++ .../apache/iceberg/util/TestDateTimeUtil.java | 29 +++++++++++ 9 files changed, 203 insertions(+), 16 deletions(-) create mode 100644 api/src/test/java/org/apache/iceberg/transforms/TestDays.java create mode 100644 api/src/test/java/org/apache/iceberg/transforms/TestHours.java create mode 100644 api/src/test/java/org/apache/iceberg/transforms/TestYears.java diff --git a/api/src/main/java/org/apache/iceberg/transforms/SortOrderVisitor.java b/api/src/main/java/org/apache/iceberg/transforms/SortOrderVisitor.java index e52eab053eed..998c63d8e367 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/SortOrderVisitor.java +++ b/api/src/main/java/org/apache/iceberg/transforms/SortOrderVisitor.java @@ -90,13 +90,21 @@ static List visit(SortOrder sortOrder, SortOrderVisitor visitor) { || transform instanceof Years) { results.add( visitor.year(sourceName, field.sourceId(), field.direction(), field.nullOrder())); - } else if ("month".equalsIgnoreCase(transform.toString())) { + } else if (transform == Dates.MONTH + || transform == Timestamps.MONTH_FROM_MICROS + || transform == Timestamps.MONTH_FROM_NANOS + || transform instanceof Months) { results.add( visitor.month(sourceName, field.sourceId(), field.direction(), field.nullOrder())); - } else if ("day".equalsIgnoreCase(transform.toString())) { + } else if (transform == Dates.DAY + || transform == Timestamps.DAY_FROM_MICROS + || transform == Timestamps.DAY_FROM_NANOS + || transform instanceof Days) { results.add( visitor.day(sourceName, field.sourceId(), field.direction(), field.nullOrder())); - } else if ("hour".equalsIgnoreCase(transform.toString())) { + } else if (transform == Timestamps.HOUR_FROM_MICROS + || transform == Timestamps.HOUR_FROM_NANOS + || transform instanceof Hours) { results.add( visitor.hour(sourceName, field.sourceId(), field.direction(), field.nullOrder())); } else if (transform instanceof UnknownTransform) { diff --git a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java index 954b6e40cbc8..6cba2ea78a78 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java @@ -85,9 +85,6 @@ static Timestamps get(Types.TimestampNanoType type, String resultTypeUnit) { } static Timestamps get(Types.TimestampType type, ChronoUnit resultTypeUnit) { - if (type.typeId() != Type.TypeID.TIMESTAMP) { - throw new UnsupportedOperationException("Unsupported timestamp unit: " + type); - } switch (resultTypeUnit) { case YEARS: return YEAR_FROM_MICROS; @@ -104,9 +101,6 @@ static Timestamps get(Types.TimestampType type, ChronoUnit resultTypeUnit) { } static Timestamps get(Types.TimestampNanoType type, ChronoUnit resultTypeUnit) { - if (type.typeId() != Type.TypeID.TIMESTAMP_NANO) { - throw new UnsupportedOperationException("Unsupported timestamp unit: " + type); - } switch (resultTypeUnit) { case YEARS: return YEAR_FROM_NANOS; @@ -174,7 +168,9 @@ public Integer apply(Long timestampUnits) { "Unsupported result type unit: " + resultTypeUnit); } case NANOS: - return DateTimeUtil.convertNanos(timestampUnits, resultTypeUnit.unit); + // TODO(epg): Overflows for MILLIS, MICROS, and NANOS! Fixing this is quite invasive, as + // Timestamps is assumed to be Transform in many, many places. + return (int) DateTimeUtil.convertNanos(timestampUnits, resultTypeUnit.unit); default: throw new UnsupportedOperationException( "Unsupported source type unit: " + sourceTypeUnit); diff --git a/api/src/main/java/org/apache/iceberg/transforms/Transforms.java b/api/src/main/java/org/apache/iceberg/transforms/Transforms.java index 8bd623e5d2ec..610872f01065 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Transforms.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Transforms.java @@ -23,7 +23,6 @@ import java.util.regex.Pattern; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Schema; -import org.apache.iceberg.relocated.com.google.common.base.Strings; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; @@ -204,8 +203,7 @@ public static Transform hour(Type type) { case TIMESTAMP_NANO: return (Transform) Timestamps.HOUR_FROM_NANOS; default: - throw new IllegalArgumentException( - Strings.lenientFormat("Cannot partition type %s by hour", type)); + throw new IllegalArgumentException(String.format("Cannot partition type %s by hour", type)); } } diff --git a/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java b/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java index ab4aa33a89d1..049b1c5d1af0 100644 --- a/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java +++ b/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java @@ -194,17 +194,17 @@ private static int convertMicros(long micros, ChronoUnit granularity) { } } - public static int convertNanos(long nanos, ChronoUnit granularity) { + public static long convertNanos(long nanos, ChronoUnit granularity) { if (nanos >= 0) { long epochSecond = Math.floorDiv(nanos, NANOS_PER_SECOND); long nanoAdjustment = Math.floorMod(nanos, NANOS_PER_SECOND); - return (int) granularity.between(EPOCH, toOffsetDateTime(epochSecond, nanoAdjustment)); + return granularity.between(EPOCH, toOffsetDateTime(epochSecond, nanoAdjustment)); } else { // add 1 nano to the value to account for the case where there is exactly 1 unit between // the timestamp and epoch because the result will always be decremented. long epochSecond = Math.floorDiv(nanos, NANOS_PER_SECOND); long nanoAdjustment = Math.floorMod(nanos + 1, NANOS_PER_SECOND); - return (int) granularity.between(EPOCH, toOffsetDateTime(epochSecond, nanoAdjustment)) - 1; + return granularity.between(EPOCH, toOffsetDateTime(epochSecond, nanoAdjustment)) - 1; } } diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestDays.java b/api/src/test/java/org/apache/iceberg/transforms/TestDays.java new file mode 100644 index 000000000000..4de2fe8d55f3 --- /dev/null +++ b/api/src/test/java/org/apache/iceberg/transforms/TestDays.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.transforms; + +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import org.junit.jupiter.api.Test; + +public class TestDays { + @Test + public void satisfiesOrderOf() { + assertThatThrownBy(() -> DAYS.satisfiesOrderOf(Timestamps.DAY_FROM_NANOS)) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessageMatching("Unsupported timestamp unit: DAYS"); + } + + private static final Days DAYS = Days.get(); +} diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestHours.java b/api/src/test/java/org/apache/iceberg/transforms/TestHours.java new file mode 100644 index 000000000000..ad022957c3fb --- /dev/null +++ b/api/src/test/java/org/apache/iceberg/transforms/TestHours.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.transforms; + +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import org.apache.iceberg.types.Types; +import org.junit.jupiter.api.Test; + +public class TestHours { + @Test + public void toEnum() { + assertThatThrownBy(() -> HOURS.toEnum(TYPE)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageMatching("Unsupported type: date"); + } + + private static final Hours HOURS = Hours.get(); + private static final Types.DateType TYPE = Types.DateType.get(); +} diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java b/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java index 3c37e643eb95..2c5d24563d33 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java @@ -19,7 +19,9 @@ package org.apache.iceberg.transforms; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import java.time.temporal.ChronoUnit; import org.apache.iceberg.expressions.Literal; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; @@ -241,4 +243,54 @@ public void testTimestampsReturnType() { Type hourResultType = hour.getResultType(type); assertThat(hourResultType).isEqualTo(Types.IntegerType.get()); } + + @Test + public void apply_bad_source_type() { + Timestamps badSourceType = + new Timestamps(ChronoUnit.CENTURIES, Timestamps.ResultTypeUnit.YEARS, "year"); + assertThatThrownBy(() -> badSourceType.apply(11L)) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessageMatching("Unsupported source type unit: Centuries"); + } + + @Test + public void apply_bad_result_type() { + Timestamps badResultType = + new Timestamps(ChronoUnit.MICROS, Timestamps.ResultTypeUnit.NANOS, "nano"); + assertThatThrownBy(() -> badResultType.apply(11L)) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessageMatching("Unsupported result type unit: NANOS"); + } + + @Test + public void get_TimestampType_ChronoUnit() { + Types.TimestampType timestampType = Types.TimestampType.withZone(); + assertThatThrownBy(() -> Timestamps.get(timestampType, ChronoUnit.CENTURIES)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageMatching("Unsupported source/result type units: timestamptz->Centuries"); + } + + @Test + public void get_TimestampType_String() { + Types.TimestampType timestampType = Types.TimestampType.withZone(); + assertThatThrownBy(() -> Timestamps.get(timestampType, "trash")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageMatching("Unsupported source/result type units: timestamptz->trash"); + } + + @Test + public void get_TimestampNanoType_ChronoUnit() { + Types.TimestampNanoType timestampNanoType = Types.TimestampNanoType.withZone(); + assertThatThrownBy(() -> Timestamps.get(timestampNanoType, ChronoUnit.CENTURIES)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageMatching("Unsupported source/result type units: timestamptz_ns->Centuries"); + } + + @Test + public void get_TimestampNanoType_String() { + Types.TimestampNanoType timestampNanoType = Types.TimestampNanoType.withZone(); + assertThatThrownBy(() -> Timestamps.get(timestampNanoType, "trash")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageMatching("Unsupported source/result type units: timestamptz_ns->trash"); + } } diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestYears.java b/api/src/test/java/org/apache/iceberg/transforms/TestYears.java new file mode 100644 index 000000000000..8224dde64497 --- /dev/null +++ b/api/src/test/java/org/apache/iceberg/transforms/TestYears.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.transforms; + +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import org.junit.jupiter.api.Test; + +public class TestYears { + @Test + public void satisfiesOrderOf() { + assertThatThrownBy(() -> YEARS.satisfiesOrderOf(Timestamps.DAY_FROM_NANOS)) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessageMatching("Unsupported timestamp unit: DAYS"); + } + + private static final Years YEARS = Years.get(); +} diff --git a/api/src/test/java/org/apache/iceberg/util/TestDateTimeUtil.java b/api/src/test/java/org/apache/iceberg/util/TestDateTimeUtil.java index cf6b9ccb790d..0f912a4af949 100644 --- a/api/src/test/java/org/apache/iceberg/util/TestDateTimeUtil.java +++ b/api/src/test/java/org/apache/iceberg/util/TestDateTimeUtil.java @@ -18,7 +18,10 @@ */ package org.apache.iceberg.util; +import static org.assertj.core.api.Assertions.assertThat; + import java.time.ZonedDateTime; +import java.time.temporal.ChronoUnit; import org.assertj.core.api.Assertions; import org.junit.jupiter.api.Test; @@ -35,4 +38,30 @@ public void formatTimestampMillis() { Assertions.assertThat(ZonedDateTime.parse(timestamp).toInstant().toEpochMilli()) .isEqualTo(1000000L); } + + @Test + public void nanosToMicros() { + assertThat(DateTimeUtil.nanosToMicros(1510871468000001001L)).isEqualTo(1510871468000001L); + } + + @Test + public void isoTimestampToNanos() { + assertThat(DateTimeUtil.isoTimestampToNanos("2017-11-16T14:31:08.000001001-08:00")) + .isEqualTo(1510871468000001001L); + } + + @Test + public void convertNanos() { + assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.HOURS)).isEqualTo(419686); + assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.MINUTES)) + .isEqualTo(25181191); + assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.SECONDS)) + .isEqualTo(1510871468); + assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.MILLIS)) + .isEqualTo(1510871468000L); + assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.MICROS)) + .isEqualTo(1510871468000001L); + assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.NANOS)) + .isEqualTo(1510871468000001001L); + } } From 2f71da939599068676390d48c5b3c183b04077b1 Mon Sep 17 00:00:00 2001 From: Eric Gillespie Date: Thu, 6 Jun 2024 11:51:16 -0500 Subject: [PATCH 09/38] address style improvements --- .../apache/iceberg/transforms/Timestamps.java | 14 ++++++------- .../org/apache/iceberg/util/DateTimeUtil.java | 14 ++++++------- .../apache/iceberg/transforms/TestDays.java | 7 +++---- .../apache/iceberg/transforms/TestHours.java | 9 ++++----- .../iceberg/transforms/TestTimestamps.java | 20 +++++++++---------- .../apache/iceberg/transforms/TestYears.java | 7 +++---- 6 files changed, 34 insertions(+), 37 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java index 6cba2ea78a78..b16d9e9b2512 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java @@ -64,7 +64,7 @@ static Timestamps get(Types.TimestampType type, String resultTypeUnit) { return get(type, ChronoUnit.HOURS); default: throw new IllegalArgumentException( - "Unsupported source/result type units: " + type + "->" + resultTypeUnit); + "Unsupported source/result type units: " + type + " -> " + resultTypeUnit); } } @@ -80,7 +80,7 @@ static Timestamps get(Types.TimestampNanoType type, String resultTypeUnit) { return get(type, ChronoUnit.HOURS); default: throw new IllegalArgumentException( - "Unsupported source/result type units: " + type + "->" + resultTypeUnit); + "Unsupported source/result type units: " + type + " -> " + resultTypeUnit); } } @@ -96,7 +96,7 @@ static Timestamps get(Types.TimestampType type, ChronoUnit resultTypeUnit) { return HOUR_FROM_MICROS; default: throw new IllegalArgumentException( - "Unsupported source/result type units: " + type + "->" + resultTypeUnit); + "Unsupported source/result type units: " + type + " -> " + resultTypeUnit); } } @@ -112,7 +112,7 @@ static Timestamps get(Types.TimestampNanoType type, ChronoUnit resultTypeUnit) { return HOUR_FROM_NANOS; default: throw new IllegalArgumentException( - "Unsupported source/result type units: " + type + "->" + resultTypeUnit); + "Unsupported source/result type units: " + type + " -> " + resultTypeUnit); } } @@ -131,7 +131,7 @@ enum ResultTypeUnit { this.unit = unit; } - Duration getDuration() { + Duration duration() { return unit.getDuration(); } } @@ -229,8 +229,8 @@ public boolean satisfiesOrderOf(Transform other) { // test the granularity, in hours. hour(ts) => 1 hour, day(ts) => 24 hours, and hour satisfies // the order of day Timestamps otherTransform = (Timestamps) other; - return apply.resultTypeUnit.getDuration().toHours() - <= otherTransform.apply.resultTypeUnit.getDuration().toHours(); + return apply.resultTypeUnit.duration().toHours() + <= otherTransform.apply.resultTypeUnit.duration().toHours(); } return false; diff --git a/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java b/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java index 049b1c5d1af0..28c313fc3cb2 100644 --- a/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java +++ b/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java @@ -38,6 +38,13 @@ private DateTimeUtil() {} private static final long NANOS_PER_SECOND = 1_000_000_000L; private static final long NANOS_PER_MICRO = 1_000L; + private static final DateTimeFormatter FORMATTER = + new DateTimeFormatterBuilder() + .parseCaseInsensitive() + .append(DateTimeFormatter.ISO_LOCAL_DATE_TIME) + .appendOffset("+HH:MM:ss", "+00:00") + .toFormatter(); + public static LocalDate dateFromDays(int daysFromEpoch) { return ChronoUnit.DAYS.addTo(EPOCH_DAY, daysFromEpoch); } @@ -211,11 +218,4 @@ public static long convertNanos(long nanos, ChronoUnit granularity) { private static OffsetDateTime toOffsetDateTime(long epochSecond, long nanoAdjustment) { return Instant.ofEpochSecond(epochSecond, nanoAdjustment).atOffset(ZoneOffset.UTC); } - - private static final DateTimeFormatter FORMATTER = - new DateTimeFormatterBuilder() - .parseCaseInsensitive() - .append(DateTimeFormatter.ISO_LOCAL_DATE_TIME) - .appendOffset("+HH:MM:ss", "+00:00") - .toFormatter(); } diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestDays.java b/api/src/test/java/org/apache/iceberg/transforms/TestDays.java index 4de2fe8d55f3..ea06fe495bfc 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestDays.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestDays.java @@ -24,11 +24,10 @@ public class TestDays { @Test - public void satisfiesOrderOf() { - assertThatThrownBy(() -> DAYS.satisfiesOrderOf(Timestamps.DAY_FROM_NANOS)) + public void testSatisfiesOrderOf() { + Days days = Days.get(); + assertThatThrownBy(() -> days.satisfiesOrderOf(Timestamps.DAY_FROM_NANOS)) .isInstanceOf(UnsupportedOperationException.class) .hasMessageMatching("Unsupported timestamp unit: DAYS"); } - - private static final Days DAYS = Days.get(); } diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestHours.java b/api/src/test/java/org/apache/iceberg/transforms/TestHours.java index ad022957c3fb..8e070aec8b9c 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestHours.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestHours.java @@ -25,12 +25,11 @@ public class TestHours { @Test - public void toEnum() { - assertThatThrownBy(() -> HOURS.toEnum(TYPE)) + public void testToEnum() { + Hours hours = Hours.get(); + Types.DateType type = Types.DateType.get(); + assertThatThrownBy(() -> hours.toEnum(type)) .isInstanceOf(IllegalArgumentException.class) .hasMessageMatching("Unsupported type: date"); } - - private static final Hours HOURS = Hours.get(); - private static final Types.DateType TYPE = Types.DateType.get(); } diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java b/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java index 2c5d24563d33..29b5210b245f 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java @@ -245,7 +245,7 @@ public void testTimestampsReturnType() { } @Test - public void apply_bad_source_type() { + public void testApplyRejectsBadSourceType() { Timestamps badSourceType = new Timestamps(ChronoUnit.CENTURIES, Timestamps.ResultTypeUnit.YEARS, "year"); assertThatThrownBy(() -> badSourceType.apply(11L)) @@ -254,7 +254,7 @@ public void apply_bad_source_type() { } @Test - public void apply_bad_result_type() { + public void testApplyRejectsBadResultType() { Timestamps badResultType = new Timestamps(ChronoUnit.MICROS, Timestamps.ResultTypeUnit.NANOS, "nano"); assertThatThrownBy(() -> badResultType.apply(11L)) @@ -263,34 +263,34 @@ public void apply_bad_result_type() { } @Test - public void get_TimestampType_ChronoUnit() { + public void testGetOfTimestampTypeRejectsBadChronoUnit() { Types.TimestampType timestampType = Types.TimestampType.withZone(); assertThatThrownBy(() -> Timestamps.get(timestampType, ChronoUnit.CENTURIES)) .isInstanceOf(IllegalArgumentException.class) - .hasMessageMatching("Unsupported source/result type units: timestamptz->Centuries"); + .hasMessageMatching("Unsupported source/result type units: timestamptz -> Centuries"); } @Test - public void get_TimestampType_String() { + public void testGetOfTimestampTypeRejectsBadString() { Types.TimestampType timestampType = Types.TimestampType.withZone(); assertThatThrownBy(() -> Timestamps.get(timestampType, "trash")) .isInstanceOf(IllegalArgumentException.class) - .hasMessageMatching("Unsupported source/result type units: timestamptz->trash"); + .hasMessageMatching("Unsupported source/result type units: timestamptz -> trash"); } @Test - public void get_TimestampNanoType_ChronoUnit() { + public void testGetOfTimestampNanoTypeRejectsBadChronoUnit() { Types.TimestampNanoType timestampNanoType = Types.TimestampNanoType.withZone(); assertThatThrownBy(() -> Timestamps.get(timestampNanoType, ChronoUnit.CENTURIES)) .isInstanceOf(IllegalArgumentException.class) - .hasMessageMatching("Unsupported source/result type units: timestamptz_ns->Centuries"); + .hasMessageMatching("Unsupported source/result type units: timestamptz_ns -> Centuries"); } @Test - public void get_TimestampNanoType_String() { + public void testGetOfTimestampNanoTypeRejectsBadString() { Types.TimestampNanoType timestampNanoType = Types.TimestampNanoType.withZone(); assertThatThrownBy(() -> Timestamps.get(timestampNanoType, "trash")) .isInstanceOf(IllegalArgumentException.class) - .hasMessageMatching("Unsupported source/result type units: timestamptz_ns->trash"); + .hasMessageMatching("Unsupported source/result type units: timestamptz_ns -> trash"); } } diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestYears.java b/api/src/test/java/org/apache/iceberg/transforms/TestYears.java index 8224dde64497..571180c1b524 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestYears.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestYears.java @@ -24,11 +24,10 @@ public class TestYears { @Test - public void satisfiesOrderOf() { - assertThatThrownBy(() -> YEARS.satisfiesOrderOf(Timestamps.DAY_FROM_NANOS)) + public void testSatisfiesOrderOf() { + Years years = Years.get(); + assertThatThrownBy(() -> years.satisfiesOrderOf(Timestamps.DAY_FROM_NANOS)) .isInstanceOf(UnsupportedOperationException.class) .hasMessageMatching("Unsupported timestamp unit: DAYS"); } - - private static final Years YEARS = Years.get(); } From c3c1288e69ce50a085fa3b326ec3d65b8af02361 Mon Sep 17 00:00:00 2001 From: Eric Gillespie Date: Thu, 6 Jun 2024 11:56:33 -0500 Subject: [PATCH 10/38] test DateTimeUtil.convertNanos on negative input --- .../test/java/org/apache/iceberg/util/TestDateTimeUtil.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/api/src/test/java/org/apache/iceberg/util/TestDateTimeUtil.java b/api/src/test/java/org/apache/iceberg/util/TestDateTimeUtil.java index 0f912a4af949..5e5e37a060b1 100644 --- a/api/src/test/java/org/apache/iceberg/util/TestDateTimeUtil.java +++ b/api/src/test/java/org/apache/iceberg/util/TestDateTimeUtil.java @@ -64,4 +64,10 @@ public void convertNanos() { assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.NANOS)) .isEqualTo(1510871468000001001L); } + + @Test + public void convertNanosNegative() { + assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.MILLIS)) + .isEqualTo(-1510871468001L); + } } From ec01d68afcf67cf8e5e3150c108ad35c7d4a1cc5 Mon Sep 17 00:00:00 2001 From: Eric Gillespie Date: Thu, 6 Jun 2024 11:58:00 -0500 Subject: [PATCH 11/38] use Math.toIntExact in Timestamps NANOS conversion --- .../main/java/org/apache/iceberg/transforms/Timestamps.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java index b16d9e9b2512..69ca6bcc1cd8 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java @@ -168,9 +168,7 @@ public Integer apply(Long timestampUnits) { "Unsupported result type unit: " + resultTypeUnit); } case NANOS: - // TODO(epg): Overflows for MILLIS, MICROS, and NANOS! Fixing this is quite invasive, as - // Timestamps is assumed to be Transform in many, many places. - return (int) DateTimeUtil.convertNanos(timestampUnits, resultTypeUnit.unit); + return Math.toIntExact(DateTimeUtil.convertNanos(timestampUnits, resultTypeUnit.unit)); default: throw new UnsupportedOperationException( "Unsupported source type unit: " + sourceTypeUnit); From eb5e38242348ebed6778432ead28182672bafc79 Mon Sep 17 00:00:00 2001 From: Eric Gillespie Date: Wed, 3 Jul 2024 22:48:55 -0500 Subject: [PATCH 12/38] address review comments --- .../iceberg/expressions/ExpressionUtil.java | 5 ++- .../apache/iceberg/transforms/Timestamps.java | 21 +++++---- .../java/org/apache/iceberg/types/Types.java | 11 +++-- .../apache/iceberg/TestPartitionPaths.java | 19 ++++++++ .../iceberg/transforms/TestBucketing.java | 43 ++++++++++++------- 5 files changed, 64 insertions(+), 35 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java b/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java index 7b455274081c..9b6a3e5bf7eb 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java +++ b/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java @@ -36,6 +36,7 @@ import org.apache.iceberg.transforms.Transforms; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; +import org.apache.iceberg.util.DateTimeUtil; /** Expression utility methods. */ public class ExpressionUtil { @@ -607,10 +608,10 @@ private static String sanitizeString(CharSequence value, long now, int today) { return sanitizeDate(date.value(), today); } else if (TIMESTAMPNS.matcher(value).matches()) { Literal ts = Literal.of(value).to(Types.TimestampNanoType.withoutZone()); - return sanitizeTimestamp(Math.floorDiv(ts.value(), 1000), now); + return sanitizeTimestamp(DateTimeUtil.nanosToMicros(ts.value()), now); } else if (TIMESTAMPTZNS.matcher(value).matches()) { Literal ts = Literal.of(value).to(Types.TimestampNanoType.withZone()); - return sanitizeTimestamp(Math.floorDiv(ts.value(), 1000), now); + return sanitizeTimestamp(DateTimeUtil.nanosToMicros(ts.value()), now); } else if (TIMESTAMP.matcher(value).matches()) { Literal ts = Literal.of(value).to(Types.TimestampType.withoutZone()); return sanitizeTimestamp(ts.value(), now); diff --git a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java index 69ca6bcc1cd8..c917c257ad88 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java @@ -122,8 +122,7 @@ enum ResultTypeUnit { DAYS(ChronoUnit.DAYS), HOURS(ChronoUnit.HOURS), MICROS(ChronoUnit.MICROS), - NANOS(ChronoUnit.NANOS), - ; + NANOS(ChronoUnit.NANOS); private final ChronoUnit unit; @@ -147,8 +146,8 @@ static class Apply implements SerializableFunction { } @Override - public Integer apply(Long timestampUnits) { - if (timestampUnits == null) { + public Integer apply(Long timestamp) { + if (timestamp == null) { return null; } @@ -156,19 +155,19 @@ public Integer apply(Long timestampUnits) { case MICROS: switch (resultTypeUnit) { case YEARS: - return DateTimeUtil.microsToYears(timestampUnits); + return DateTimeUtil.microsToYears(timestamp); case MONTHS: - return DateTimeUtil.microsToMonths(timestampUnits); + return DateTimeUtil.microsToMonths(timestamp); case DAYS: - return DateTimeUtil.microsToDays(timestampUnits); + return DateTimeUtil.microsToDays(timestamp); case HOURS: - return DateTimeUtil.microsToHours(timestampUnits); + return DateTimeUtil.microsToHours(timestamp); default: throw new UnsupportedOperationException( "Unsupported result type unit: " + resultTypeUnit); } case NANOS: - return Math.toIntExact(DateTimeUtil.convertNanos(timestampUnits, resultTypeUnit.unit)); + return Math.toIntExact(DateTimeUtil.convertNanos(timestamp, resultTypeUnit.unit)); default: throw new UnsupportedOperationException( "Unsupported source type unit: " + sourceTypeUnit); @@ -185,8 +184,8 @@ public Integer apply(Long timestampUnits) { } @Override - public Integer apply(Long timestampUnits) { - return apply.apply(timestampUnits); + public Integer apply(Long timestamp) { + return apply.apply(timestamp); } @Override diff --git a/api/src/main/java/org/apache/iceberg/types/Types.java b/api/src/main/java/org/apache/iceberg/types/Types.java index 156880cd5043..f9a683d46052 100644 --- a/api/src/main/java/org/apache/iceberg/types/Types.java +++ b/api/src/main/java/org/apache/iceberg/types/Types.java @@ -298,15 +298,14 @@ public String toString() { } @Override - public boolean equals(Object o) { - if (this == o) { + public boolean equals(Object other) { + if (this == other) { return true; - } else if (!(o instanceof TimestampNanoType)) { + } + if (!(other instanceof TimestampNanoType)) { return false; } - - TimestampNanoType that = (TimestampNanoType) o; - return adjustToUTC == that.adjustToUTC; + return adjustToUTC == ((TimestampNanoType) other).adjustToUTC; } @Override diff --git a/api/src/test/java/org/apache/iceberg/TestPartitionPaths.java b/api/src/test/java/org/apache/iceberg/TestPartitionPaths.java index c1df62282615..31a6c486bf6e 100644 --- a/api/src/test/java/org/apache/iceberg/TestPartitionPaths.java +++ b/api/src/test/java/org/apache/iceberg/TestPartitionPaths.java @@ -73,6 +73,25 @@ public void testPartitionPathWithNanoseconds() { .isEqualTo("ts_hour=2017-12-01-10/id_bucket=" + idBucket); } + @Test + public void testPartitionPathWithNanosecondsTz() { + PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).hour("ts").bucket("id", 10).build(); + + Transform hour = Transforms.hour(); + Transform bucket = Transforms.bucket(10); + + Literal ts = + Literal.of("2017-12-01T10:12:55.038194789-08:00").to(Types.TimestampNanoType.withZone()); + Object tsTzHour = hour.bind(Types.TimestampNanoType.withZone()).apply(ts.value()); + Object idBucket = bucket.bind(Types.IntegerType.get()).apply(1); + + Row partition = Row.of(tsTzHour, idBucket); + + assertThat(spec.partitionToPath(partition)) + .as("Should produce expected partition key") + .isEqualTo("ts_hour=2017-12-01-18/id_bucket=" + idBucket); + } + @Test public void testEscapedStrings() { PartitionSpec spec = diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java b/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java index b5bf44b0a3d0..279f1c15c589 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java @@ -219,43 +219,54 @@ public void testTimestampNanoPromotion() { long expectedHash1 = -2047944441; String timestamp2 = "2017-11-16T22:31:08.000001"; long expectedHash2 = -1207196810; - String timestampTz1 = "2017-11-16T14:31:08-08:00"; - String timestampTz2 = "2017-11-16T14:31:08.000001-08:00"; String timestampNs1 = "2017-11-16T22:31:08"; String timestampNs2 = "2017-11-16T22:31:08.000001001"; - String timestampTzNs1 = "2017-11-16T14:31:08-08:00"; - String timestampTzNs2 = "2017-11-16T14:31:08.000001001-08:00"; Types.TimestampType tsType = Types.TimestampType.withoutZone(); - Types.TimestampType tsTzType = Types.TimestampType.withZone(); Types.TimestampNanoType tsNsType = Types.TimestampNanoType.withoutZone(); - Types.TimestampNanoType tsTzNsType = Types.TimestampNanoType.withZone(); Bucket tsNsBucket = Bucket.get(tsNsType, 1); Bucket tsBucket = Bucket.get(tsType, 1); - Bucket tsTzNsBucket = Bucket.get(tsTzNsType, 1); - Bucket tsTzBucket = Bucket.get(tsTzType, 1); assertThat(tsBucket.hash(Literal.of(timestamp1).to(tsType).value())) .as("Timestamp and TimestampNano bucket results should match") .isEqualTo(expectedHash1); - assertThat(tsTzBucket.hash(Literal.of(timestampTz1).to(tsTzType).value())) - .as("Timestamp and TimestampNano bucket results should match") - .isEqualTo(expectedHash1); assertThat(tsNsBucket.hash(Literal.of(timestampNs1).to(tsNsType).value())) .as("Timestamp and TimestampNano bucket results should match") .isEqualTo(expectedHash1); - assertThat(tsTzNsBucket.hash(Literal.of(timestampTzNs1).to(tsTzNsType).value())) - .as("Timestamp and TimestampNano bucket results should match") - .isEqualTo(expectedHash1); assertThat(tsBucket.hash(Literal.of(timestamp2).to(tsType).value())) .as("Timestamp and TimestampNano bucket results should match") .isEqualTo(expectedHash2); - assertThat(tsTzBucket.hash(Literal.of(timestampTz2).to(tsTzType).value())) + assertThat(tsNsBucket.hash(Literal.of(timestampNs2).to(tsNsType).value())) .as("Timestamp and TimestampNano bucket results should match") .isEqualTo(expectedHash2); - assertThat(tsNsBucket.hash(Literal.of(timestampNs2).to(tsNsType).value())) + } + + @Test + public void testTimestampTzNanoPromotion() { + // Values from spec Appendix B: 32-bit Hash Requirements + long expectedHash1 = -2047944441; + long expectedHash2 = -1207196810; + String timestampTz1 = "2017-11-16T14:31:08-08:00"; + String timestampTz2 = "2017-11-16T14:31:08.000001-08:00"; + String timestampTzNs1 = "2017-11-16T14:31:08-08:00"; + String timestampTzNs2 = "2017-11-16T14:31:08.000001001-08:00"; + + Types.TimestampType tsTzType = Types.TimestampType.withZone(); + Types.TimestampNanoType tsTzNsType = Types.TimestampNanoType.withZone(); + + Bucket tsTzNsBucket = Bucket.get(tsTzNsType, 1); + Bucket tsTzBucket = Bucket.get(tsTzType, 1); + + assertThat(tsTzBucket.hash(Literal.of(timestampTz1).to(tsTzType).value())) + .as("Timestamp and TimestampNano bucket results should match") + .isEqualTo(expectedHash1); + assertThat(tsTzNsBucket.hash(Literal.of(timestampTzNs1).to(tsTzNsType).value())) + .as("Timestamp and TimestampNano bucket results should match") + .isEqualTo(expectedHash1); + + assertThat(tsTzBucket.hash(Literal.of(timestampTz2).to(tsTzType).value())) .as("Timestamp and TimestampNano bucket results should match") .isEqualTo(expectedHash2); assertThat(tsTzNsBucket.hash(Literal.of(timestampTzNs2).to(tsTzNsType).value())) From 421ed86c0be9d08f84fec6785d7b60d4f3194ab1 Mon Sep 17 00:00:00 2001 From: Eric Gillespie Date: Mon, 8 Jul 2024 13:14:57 -0500 Subject: [PATCH 13/38] Adjust tests as requested. --- .../iceberg/expressions/TestStringLiteralConversions.java | 6 ++---- .../test/java/org/apache/iceberg/transforms/TestDays.java | 3 +-- .../test/java/org/apache/iceberg/transforms/TestYears.java | 5 ++--- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestStringLiteralConversions.java b/api/src/test/java/org/apache/iceberg/expressions/TestStringLiteralConversions.java index 31cfbd709297..063c759af2a8 100644 --- a/api/src/test/java/org/apache/iceberg/expressions/TestStringLiteralConversions.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestStringLiteralConversions.java @@ -227,7 +227,7 @@ public void testNegativeStringToTimestampLiteral() { public void testTimestampWithZoneWithoutZoneInLiteral() { // Zone must be present in literals when converting to timestamp with zone assertThatThrownBy( - () -> Literal.of("2017-08-18T14:21:01.919123").to(Types.TimestampType.withZone())) + () -> Literal.of("2017-08-18T14:21:01.919").to(Types.TimestampType.withZone())) .isInstanceOf(DateTimeException.class) .hasMessageContaining("could not be parsed"); assertThatThrownBy( @@ -241,9 +241,7 @@ public void testTimestampWithZoneWithoutZoneInLiteral() { public void testTimestampWithoutZoneWithZoneInLiteral() { // Zone must not be present in literals when converting to timestamp without zone assertThatThrownBy( - () -> - Literal.of("2017-08-18T14:21:01.919123+07:00") - .to(Types.TimestampType.withoutZone())) + () -> Literal.of("2017-08-18T14:21:01.919+07:00").to(Types.TimestampType.withoutZone())) .isInstanceOf(DateTimeException.class) .hasMessageContaining("could not be parsed"); assertThatThrownBy( diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestDays.java b/api/src/test/java/org/apache/iceberg/transforms/TestDays.java index ea06fe495bfc..c090ad62c914 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestDays.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestDays.java @@ -25,8 +25,7 @@ public class TestDays { @Test public void testSatisfiesOrderOf() { - Days days = Days.get(); - assertThatThrownBy(() -> days.satisfiesOrderOf(Timestamps.DAY_FROM_NANOS)) + assertThatThrownBy(() -> Days.get().satisfiesOrderOf(Timestamps.DAY_FROM_NANOS)) .isInstanceOf(UnsupportedOperationException.class) .hasMessageMatching("Unsupported timestamp unit: DAYS"); } diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestYears.java b/api/src/test/java/org/apache/iceberg/transforms/TestYears.java index 571180c1b524..d4b06b6f2a62 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestYears.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestYears.java @@ -25,9 +25,8 @@ public class TestYears { @Test public void testSatisfiesOrderOf() { - Years years = Years.get(); - assertThatThrownBy(() -> years.satisfiesOrderOf(Timestamps.DAY_FROM_NANOS)) + assertThatThrownBy(() -> Years.get().satisfiesOrderOf(Timestamps.YEAR_FROM_NANOS)) .isInstanceOf(UnsupportedOperationException.class) - .hasMessageMatching("Unsupported timestamp unit: DAYS"); + .hasMessageMatching("Unsupported timestamp unit: YEARS"); } } From d7a13268f2144076b53e0248a251c0e8187e0f02 Mon Sep 17 00:00:00 2001 From: Jacob Marble Date: Mon, 8 Jul 2024 15:38:41 -0700 Subject: [PATCH 14/38] add nanosecond tests to TestTimestamps --- .../iceberg/transforms/TestTimestamps.java | 218 +++++++++++++++++- 1 file changed, 217 insertions(+), 1 deletion(-) diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java b/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java index 29b5210b245f..c62a8f8a490e 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java @@ -61,6 +61,39 @@ public void testDeprecatedTimestampTransform() { assertThat((int) hours.apply(nts.value())).isEqualTo(-1); } + @Test + @SuppressWarnings("deprecation") + public void testDeprecatedTimestampNanoTransform() { + Types.TimestampNanoType type = Types.TimestampNanoType.withoutZone(); + Literal ts = Literal.of("2017-12-01T10:12:55.038194789").to(type); + Literal pts = Literal.of("1970-01-01T00:00:01.000000001").to(type); + Literal nts = Literal.of("1969-12-31T23:59:58.999999999").to(type); + + Transform years = Transforms.year(type); + assertThat((int) years.apply(ts.value())).as("Should produce 2017 - 1970 = 47").isEqualTo(47); + assertThat((int) years.apply(pts.value())).as("Should produce 1970 - 1970 = 0").isZero(); + assertThat((int) years.apply(nts.value())).as("Should produce 1969 - 1970 = -1").isEqualTo(-1); + + Transform months = Transforms.month(type); + assertThat((int) months.apply(ts.value())) + .as("Should produce 47 * 12 + 11 = 575") + .isEqualTo(575); + assertThat((int) months.apply(pts.value())).as("Should produce 0 * 12 + 0 = 0").isZero(); + assertThat((int) months.apply(nts.value())).isEqualTo(-1); + + Transform days = Transforms.day(type); + assertThat((int) days.apply(ts.value())).as("Should produce 17501").isEqualTo(17501); + assertThat((int) days.apply(pts.value())).as("Should produce 0 * 365 + 0 = 0").isZero(); + assertThat((int) days.apply(nts.value())).isEqualTo(-1); + + Transform hours = Transforms.hour(type); + assertThat((int) hours.apply(ts.value())) + .as("Should produce 17501 * 24 + 10") + .isEqualTo(420034); + assertThat((int) hours.apply(pts.value())).as("Should produce 0 * 24 + 0 = 0").isZero(); + assertThat((int) hours.apply(nts.value())).isEqualTo(-1); + } + @Test public void testTimestampTransform() { Types.TimestampType type = Types.TimestampType.withoutZone(); @@ -105,6 +138,50 @@ public void testTimestampTransform() { assertThat((int) hours.bind(type).apply(nts.value())).isEqualTo(-1); } + @Test + public void testTimestampNanoTransform() { + Types.TimestampNanoType type = Types.TimestampNanoType.withoutZone(); + Literal ts = Literal.of("2017-12-01T10:12:55.038194789").to(type); + Literal pts = Literal.of("1970-01-01T00:00:01.000000001").to(type); + Literal nts = Literal.of("1969-12-31T23:59:58.999999999").to(type); + + Transform years = Transforms.year(); + assertThat((int) years.bind(type).apply(ts.value())) + .as("Should produce 2017 - 1970 = 47") + .isEqualTo(47); + assertThat((int) years.bind(type).apply(pts.value())) + .as("Should produce 1970 - 1970 = 0") + .isZero(); + assertThat((int) years.bind(type).apply(nts.value())) + .as("Should produce 1969 - 1970 = -1") + .isEqualTo(-1); + + Transform months = Transforms.month(); + assertThat((int) months.bind(type).apply(ts.value())) + .as("Should produce 47 * 12 + 11 = 575") + .isEqualTo(575); + assertThat((int) months.bind(type).apply(pts.value())) + .as("Should produce 0 * 12 + 0 = 0") + .isZero(); + assertThat((int) months.bind(type).apply(nts.value())).isEqualTo(-1); + + Transform days = Transforms.day(); + assertThat((int) days.bind(type).apply(ts.value())).as("Should produce 17501").isEqualTo(17501); + assertThat((int) days.bind(type).apply(pts.value())) + .as("Should produce 0 * 365 + 0 = 0") + .isZero(); + assertThat((int) days.bind(type).apply(nts.value())).isEqualTo(-1); + + Transform hours = Transforms.hour(); + assertThat((int) hours.bind(type).apply(ts.value())) + .as("Should produce 17501 * 24 + 10") + .isEqualTo(420034); + assertThat((int) hours.bind(type).apply(pts.value())) + .as("Should produce 0 * 24 + 0 = 0") + .isZero(); + assertThat((int) hours.bind(type).apply(nts.value())).isEqualTo(-1); + } + @Test public void testTimestampWithoutZoneToHumanString() { Types.TimestampType type = Types.TimestampType.withoutZone(); @@ -125,6 +202,26 @@ public void testTimestampWithoutZoneToHumanString() { .isEqualTo("2017-12-01-10"); } + @Test + public void testTimestampNanoWithoutZoneToHumanString() { + Types.TimestampNanoType type = Types.TimestampNanoType.withoutZone(); + Literal date = Literal.of("2017-12-01T10:12:55.038194789").to(type); + + Transform year = Transforms.year(); + assertThat(year.toHumanString(type, year.bind(type).apply(date.value()))).isEqualTo("2017"); + + Transform month = Transforms.month(); + assertThat(month.toHumanString(type, month.bind(type).apply(date.value()))) + .isEqualTo("2017-12"); + + Transform day = Transforms.day(); + assertThat(day.toHumanString(type, day.bind(type).apply(date.value()))).isEqualTo("2017-12-01"); + + Transform hour = Transforms.hour(); + assertThat(hour.toHumanString(type, hour.bind(type).apply(date.value()))) + .isEqualTo("2017-12-01-10"); + } + @Test public void testNegativeTimestampWithoutZoneToHumanString() { Types.TimestampType type = Types.TimestampType.withoutZone(); @@ -145,6 +242,26 @@ public void testNegativeTimestampWithoutZoneToHumanString() { .isEqualTo("1969-12-30-10"); } + @Test + public void testNegativeTimestampNanoWithoutZoneToHumanString() { + Types.TimestampNanoType type = Types.TimestampNanoType.withoutZone(); + Literal date = Literal.of("1969-12-30T10:12:55.038194789").to(type); + + Transform year = Transforms.year(); + assertThat(year.toHumanString(type, year.bind(type).apply(date.value()))).isEqualTo("1969"); + + Transform month = Transforms.month(); + assertThat(month.toHumanString(type, month.bind(type).apply(date.value()))) + .isEqualTo("1969-12"); + + Transform day = Transforms.day(); + assertThat(day.toHumanString(type, day.bind(type).apply(date.value()))).isEqualTo("1969-12-30"); + + Transform hour = Transforms.hour(); + assertThat(hour.toHumanString(type, hour.bind(type).apply(date.value()))) + .isEqualTo("1969-12-30-10"); + } + @Test public void testNegativeTimestampWithoutZoneToHumanStringLowerBound() { Types.TimestampType type = Types.TimestampType.withoutZone(); @@ -165,6 +282,26 @@ public void testNegativeTimestampWithoutZoneToHumanStringLowerBound() { .isEqualTo("1969-12-30-00"); } + @Test + public void testNegativeTimestampNanoWithoutZoneToHumanStringLowerBound() { + Types.TimestampNanoType type = Types.TimestampNanoType.withoutZone(); + Literal date = Literal.of("1969-12-30T00:00:00.000000000").to(type); + + Transform year = Transforms.year(); + assertThat(year.toHumanString(type, year.bind(type).apply(date.value()))).isEqualTo("1969"); + + Transform month = Transforms.month(); + assertThat(month.toHumanString(type, month.bind(type).apply(date.value()))) + .isEqualTo("1969-12"); + + Transform day = Transforms.day(); + assertThat(day.toHumanString(type, day.bind(type).apply(date.value()))).isEqualTo("1969-12-30"); + + Transform hour = Transforms.hour(); + assertThat(hour.toHumanString(type, hour.bind(type).apply(date.value()))) + .isEqualTo("1969-12-30-00"); + } + @Test public void testNegativeTimestampWithoutZoneToHumanStringUpperBound() { Types.TimestampType type = Types.TimestampType.withoutZone(); @@ -185,6 +322,26 @@ public void testNegativeTimestampWithoutZoneToHumanStringUpperBound() { .isEqualTo("1969-12-31-23"); } + @Test + public void testNegativeTimestampNanoWithoutZoneToHumanStringUpperBound() { + Types.TimestampNanoType type = Types.TimestampNanoType.withoutZone(); + Literal date = Literal.of("1969-12-31T23:59:59.999999999").to(type); + + Transform year = Transforms.year(); + assertThat(year.toHumanString(type, year.bind(type).apply(date.value()))).isEqualTo("1969"); + + Transform month = Transforms.month(); + assertThat(month.toHumanString(type, month.bind(type).apply(date.value()))) + .isEqualTo("1969-12"); + + Transform day = Transforms.day(); + assertThat(day.toHumanString(type, day.bind(type).apply(date.value()))).isEqualTo("1969-12-31"); + + Transform hour = Transforms.hour(); + assertThat(hour.toHumanString(type, hour.bind(type).apply(date.value()))) + .isEqualTo("1969-12-31-23"); + } + @Test public void testTimestampWithZoneToHumanString() { Types.TimestampType type = Types.TimestampType.withZone(); @@ -207,7 +364,45 @@ public void testTimestampWithZoneToHumanString() { } @Test - public void testNullHumanString() { + public void testTimestampNanoWithZoneToHumanString() { + Types.TimestampNanoType type = Types.TimestampNanoType.withZone(); + Literal date = Literal.of("2017-12-01T10:12:55.038194789-08:00").to(type); + + Transform year = Transforms.year(); + assertThat(year.toHumanString(type, year.bind(type).apply(date.value()))).isEqualTo("2017"); + + Transform month = Transforms.month(); + assertThat(month.toHumanString(type, month.bind(type).apply(date.value()))) + .isEqualTo("2017-12"); + + Transform day = Transforms.day(); + assertThat(day.toHumanString(type, day.bind(type).apply(date.value()))).isEqualTo("2017-12-01"); + + // the hour is 18 because the value is always UTC + Transform hour = Transforms.hour(); + assertThat(hour.toHumanString(type, hour.bind(type).apply(date.value()))) + .isEqualTo("2017-12-01-18"); + } + + @Test + public void testTimestampNullHumanString() { + Types.TimestampType type = Types.TimestampType.withZone(); + assertThat(Transforms.year().toHumanString(type, null)) + .as("Should produce \"null\" for null") + .isEqualTo("null"); + assertThat(Transforms.month().toHumanString(type, null)) + .as("Should produce \"null\" for null") + .isEqualTo("null"); + assertThat(Transforms.day().toHumanString(type, null)) + .as("Should produce \"null\" for null") + .isEqualTo("null"); + assertThat(Transforms.hour().toHumanString(type, null)) + .as("Should produce \"null\" for null") + .isEqualTo("null"); + } + + @Test + public void testTimestampNanoNullHumanString() { Types.TimestampType type = Types.TimestampType.withZone(); assertThat(Transforms.year().toHumanString(type, null)) .as("Should produce \"null\" for null") @@ -244,6 +439,27 @@ public void testTimestampsReturnType() { assertThat(hourResultType).isEqualTo(Types.IntegerType.get()); } + @Test + public void testTimestampNanosReturnType() { + Types.TimestampNanoType type = Types.TimestampNanoType.withZone(); + + Transform year = Transforms.year(); + Type yearResultType = year.getResultType(type); + assertThat(yearResultType).isEqualTo(Types.IntegerType.get()); + + Transform month = Transforms.month(); + Type monthResultType = month.getResultType(type); + assertThat(monthResultType).isEqualTo(Types.IntegerType.get()); + + Transform day = Transforms.day(); + Type dayResultType = day.getResultType(type); + assertThat(dayResultType).isEqualTo(Types.DateType.get()); + + Transform hour = Transforms.hour(); + Type hourResultType = hour.getResultType(type); + assertThat(hourResultType).isEqualTo(Types.IntegerType.get()); + } + @Test public void testApplyRejectsBadSourceType() { Timestamps badSourceType = From 068b18bf1b7040d2b674f25a3471e8467e2ff483 Mon Sep 17 00:00:00 2001 From: Jacob Marble Date: Mon, 8 Jul 2024 15:41:37 -0700 Subject: [PATCH 15/38] add timestamptz and timestampns_tz to fromPrimitiveString test --- api/src/test/java/org/apache/iceberg/types/TestTypes.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/api/src/test/java/org/apache/iceberg/types/TestTypes.java b/api/src/test/java/org/apache/iceberg/types/TestTypes.java index fa826d2fa545..226c53f1e9ce 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestTypes.java +++ b/api/src/test/java/org/apache/iceberg/types/TestTypes.java @@ -31,8 +31,11 @@ public void fromPrimitiveString() { assertThat(Types.fromPrimitiveString("BooLean")).isSameAs(Types.BooleanType.get()); assertThat(Types.fromPrimitiveString("timestamp")).isSameAs(Types.TimestampType.withoutZone()); + assertThat(Types.fromPrimitiveString("timestamptz")).isSameAs(Types.TimestampType.withZone()); assertThat(Types.fromPrimitiveString("timestamp_ns")) .isSameAs(Types.TimestampNanoType.withoutZone()); + assertThat(Types.fromPrimitiveString("timestamptz_ns")) + .isSameAs(Types.TimestampNanoType.withZone()); assertThat(Types.fromPrimitiveString("Fixed[ 3 ]")).isEqualTo(Types.FixedType.ofLength(3)); From 269b3e9cdac9294df45dbfaede382dc450dad733 Mon Sep 17 00:00:00 2001 From: Jacob Marble Date: Mon, 8 Jul 2024 15:58:56 -0700 Subject: [PATCH 16/38] correct TestYears - was testing days --- .../test/java/org/apache/iceberg/transforms/TestDays.java | 3 +-- .../test/java/org/apache/iceberg/transforms/TestYears.java | 5 ++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestDays.java b/api/src/test/java/org/apache/iceberg/transforms/TestDays.java index ea06fe495bfc..c090ad62c914 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestDays.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestDays.java @@ -25,8 +25,7 @@ public class TestDays { @Test public void testSatisfiesOrderOf() { - Days days = Days.get(); - assertThatThrownBy(() -> days.satisfiesOrderOf(Timestamps.DAY_FROM_NANOS)) + assertThatThrownBy(() -> Days.get().satisfiesOrderOf(Timestamps.DAY_FROM_NANOS)) .isInstanceOf(UnsupportedOperationException.class) .hasMessageMatching("Unsupported timestamp unit: DAYS"); } diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestYears.java b/api/src/test/java/org/apache/iceberg/transforms/TestYears.java index 571180c1b524..d4b06b6f2a62 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestYears.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestYears.java @@ -25,9 +25,8 @@ public class TestYears { @Test public void testSatisfiesOrderOf() { - Years years = Years.get(); - assertThatThrownBy(() -> years.satisfiesOrderOf(Timestamps.DAY_FROM_NANOS)) + assertThatThrownBy(() -> Years.get().satisfiesOrderOf(Timestamps.YEAR_FROM_NANOS)) .isInstanceOf(UnsupportedOperationException.class) - .hasMessageMatching("Unsupported timestamp unit: DAYS"); + .hasMessageMatching("Unsupported timestamp unit: YEARS"); } } From 846237427ad55af99a4289d496b3ef6d9bbd1868 Mon Sep 17 00:00:00 2001 From: Jacob Marble Date: Mon, 8 Jul 2024 16:03:10 -0700 Subject: [PATCH 17/38] test Avro timestamp conversion precisely --- .../expressions/TestStringLiteralConversions.java | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestStringLiteralConversions.java b/api/src/test/java/org/apache/iceberg/expressions/TestStringLiteralConversions.java index 31cfbd709297..ae8ccd4f9068 100644 --- a/api/src/test/java/org/apache/iceberg/expressions/TestStringLiteralConversions.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestStringLiteralConversions.java @@ -157,6 +157,12 @@ public void testStringToTimestampLiteralWithNanosecondPrecisionFromNanoseconds() Literal timestampStr = Literal.of("2017-08-18T14:21:01.123456789"); Literal timestamp = timestampStr.to(Types.TimestampNanoType.withoutZone()); + // Not only using Avro's timestamp conversion as it has no timestampNanos(). + long expected = 1503066061123456789L; + assertThat((long) timestamp.value()) + .as("Timestamp without zone should match UTC") + .isEqualTo(expected); + // use Avro's timestamp conversion to validate the result within one microsecond Schema avroSchema = LogicalTypes.timestampMicros().addToSchema(Schema.create(Schema.Type.LONG)); TimeConversions.TimestampMicrosConversion avroConversion = @@ -168,13 +174,7 @@ public void testStringToTimestampLiteralWithNanosecondPrecisionFromNanoseconds() avroSchema.getLogicalType()); assertThat(timestamp.value() - avroValue * 1000) .as("Timestamp without zone should match UTC") - .isLessThan(1000); - - // Not only using Avro's timestamp conversion as it has no timestampNanos(). - long expected = 1503066061123456789L; - assertThat((long) timestamp.value()) - .as("Timestamp without zone should match UTC") - .isEqualTo(expected); + .isEqualTo(789L); } @Test From 16319d80089549f72ea04a48f200349fb6c3dabf Mon Sep 17 00:00:00 2001 From: Eric Gillespie Date: Mon, 8 Jul 2024 14:03:55 -0500 Subject: [PATCH 18/38] Add requested additional tests. --- .../TestTimestampLiteralConversions.java | 46 +++++++++++++++++++ .../org/apache/iceberg/types/TestTypes.java | 3 ++ .../apache/iceberg/util/TestDateTimeUtil.java | 39 ++++++++++++++++ 3 files changed, 88 insertions(+) create mode 100644 api/src/test/java/org/apache/iceberg/expressions/TestTimestampLiteralConversions.java diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestTimestampLiteralConversions.java b/api/src/test/java/org/apache/iceberg/expressions/TestTimestampLiteralConversions.java new file mode 100644 index 000000000000..add374ee9a17 --- /dev/null +++ b/api/src/test/java/org/apache/iceberg/expressions/TestTimestampLiteralConversions.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.expressions; + +import static org.assertj.core.api.Assertions.assertThat; + +import org.apache.iceberg.types.Types; +import org.junit.jupiter.api.Test; + +public class TestTimestampLiteralConversions { + @Test + public void testTimestampToTimestampNanoConversion() { + Literal timestamp = + Literal.of("2017-11-16T14:31:08.000000001").to(Types.TimestampType.withoutZone()); + assertThat(timestamp.value()).isEqualTo(1510842668000000L); + + Literal timestampNano = timestamp.to(Types.TimestampNanoType.withoutZone()); + assertThat(timestampNano.value()).isEqualTo(1510842668000000000L); + } + + @Test + public void testTimestampNanoToTimestampConversion() { + Literal timestamp = + Literal.of("2017-11-16T14:31:08.000000001").to(Types.TimestampNanoType.withoutZone()); + assertThat(timestamp.value()).isEqualTo(1510842668000000001L); + + Literal timestampNano = timestamp.to(Types.TimestampType.withoutZone()); + assertThat(timestampNano.value()).isEqualTo(1510842668000000L); + } +} diff --git a/api/src/test/java/org/apache/iceberg/types/TestTypes.java b/api/src/test/java/org/apache/iceberg/types/TestTypes.java index fa826d2fa545..226c53f1e9ce 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestTypes.java +++ b/api/src/test/java/org/apache/iceberg/types/TestTypes.java @@ -31,8 +31,11 @@ public void fromPrimitiveString() { assertThat(Types.fromPrimitiveString("BooLean")).isSameAs(Types.BooleanType.get()); assertThat(Types.fromPrimitiveString("timestamp")).isSameAs(Types.TimestampType.withoutZone()); + assertThat(Types.fromPrimitiveString("timestamptz")).isSameAs(Types.TimestampType.withZone()); assertThat(Types.fromPrimitiveString("timestamp_ns")) .isSameAs(Types.TimestampNanoType.withoutZone()); + assertThat(Types.fromPrimitiveString("timestamptz_ns")) + .isSameAs(Types.TimestampNanoType.withZone()); assertThat(Types.fromPrimitiveString("Fixed[ 3 ]")).isEqualTo(Types.FixedType.ofLength(3)); diff --git a/api/src/test/java/org/apache/iceberg/util/TestDateTimeUtil.java b/api/src/test/java/org/apache/iceberg/util/TestDateTimeUtil.java index 27e59d2ad22f..6ae2891c35e0 100644 --- a/api/src/test/java/org/apache/iceberg/util/TestDateTimeUtil.java +++ b/api/src/test/java/org/apache/iceberg/util/TestDateTimeUtil.java @@ -40,6 +40,13 @@ public void formatTimestampMillis() { @Test public void nanosToMicros() { assertThat(DateTimeUtil.nanosToMicros(1510871468000001001L)).isEqualTo(1510871468000001L); + assertThat(DateTimeUtil.nanosToMicros(-1510871468000001001L)).isEqualTo(-1510871468000002L); + } + + @Test + public void microsToNanos() { + assertThat(DateTimeUtil.microsToNanos(1510871468000001L)).isEqualTo(1510871468000001000L); + assertThat(DateTimeUtil.microsToNanos(-1510871468000001L)).isEqualTo(-1510871468000001000L); } @Test @@ -50,6 +57,16 @@ public void isoTimestampToNanos() { @Test public void convertNanos() { + assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.ERAS)).isEqualTo(0); + assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.MILLENNIA)).isEqualTo(0); + assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.CENTURIES)).isEqualTo(0); + assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.DECADES)).isEqualTo(4); + assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.YEARS)).isEqualTo(47); + assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.MONTHS)).isEqualTo(574); + assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.WEEKS)).isEqualTo(2498); + assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.DAYS)).isEqualTo(17486); + assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.HALF_DAYS)) + .isEqualTo(34973); assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.HOURS)).isEqualTo(419686); assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.MINUTES)) .isEqualTo(25181191); @@ -65,7 +82,29 @@ public void convertNanos() { @Test public void convertNanosNegative() { + assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.ERAS)).isEqualTo(-1); + assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.MILLENNIA)) + .isEqualTo(-1); + assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.CENTURIES)) + .isEqualTo(-1); + assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.DECADES)).isEqualTo(-5); + assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.YEARS)).isEqualTo(-48); + assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.MONTHS)).isEqualTo(-575); + assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.WEEKS)).isEqualTo(-2499); + assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.DAYS)).isEqualTo(-17487); + assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.HALF_DAYS)) + .isEqualTo(-34974); + assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.HOURS)) + .isEqualTo(-419687); + assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.MINUTES)) + .isEqualTo(-25181192); + assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.SECONDS)) + .isEqualTo(-1510871469); assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.MILLIS)) .isEqualTo(-1510871468001L); + assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.MICROS)) + .isEqualTo(-1510871468000002L); + assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.NANOS)) + .isEqualTo(-1510871468000001001L); } } From e38f68aa386f220a7bb0a03011b9f5a21592410d Mon Sep 17 00:00:00 2001 From: Eric Gillespie Date: Mon, 8 Jul 2024 14:44:13 -0500 Subject: [PATCH 19/38] Remove redundant and erroneous tests. These are from before commit 753aed5 changed the bucketing. New tests were added in that commit and in commit eb5e382, obviating these. --- .../iceberg/transforms/TestBucketing.java | 47 ------------------- 1 file changed, 47 deletions(-) diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java b/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java index 279f1c15c589..0950f746a0de 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java @@ -112,59 +112,12 @@ public void testSpecValues() { .as("Spec example: hash(2017-11-16T22:31:08) = -2047944441") .isEqualTo(-2047944441); - timestampVal = Literal.of("2017-11-16T22:31:08.000001").to(Types.TimestampType.withoutZone()); - assertThat(BucketUtil.hash(timestampVal.value())) - .as("Spec example: hash(2017-11-16T22:31:08.000001) = -1207196810") - .isEqualTo(-1207196810); - Literal timestamptzVal = Literal.of("2017-11-16T14:31:08-08:00").to(Types.TimestampType.withZone()); assertThat(BucketUtil.hash(timestamptzVal.value())) .as("Spec example: hash(2017-11-16T14:31:08-08:00) = -2047944441") .isEqualTo(-2047944441); - timestamptzVal = - Literal.of("2017-11-16T14:31:08.000001-08:00").to(Types.TimestampType.withZone()); - assertThat(BucketUtil.hash(timestamptzVal.value())) - .as("Spec example: hash(2017-11-16T14:31:08.000001-08:00) = -1207196810") - .isEqualTo(-1207196810); - - Literal timestampNsVal = - Literal.of("2017-11-16T22:31:08").to(Types.TimestampNanoType.withoutZone()); - assertThat(BucketUtil.hash(timestampNsVal.value())) - .as("Spec example: hash(2017-11-16T22:31:08) = -737750069") - .isEqualTo(-737750069); - - timestampNsVal = - Literal.of("2017-11-16T22:31:08.000001").to(Types.TimestampNanoType.withoutZone()); - assertThat(BucketUtil.hash(timestampNsVal.value())) - .as("Spec example: hash(2017-11-16T22:31:08.000001) = -976603392") - .isEqualTo(-976603392); - - timestampNsVal = - Literal.of("2017-11-16T22:31:08.000000001").to(Types.TimestampNanoType.withoutZone()); - assertThat(BucketUtil.hash(timestampNsVal.value())) - .as("hash(2017-11-16T22:31:08.000000001) = -160215926") - .isEqualTo(-160215926); - - Literal timestamptzNsVal = - Literal.of("2017-11-16T14:31:08-08:00").to(Types.TimestampNanoType.withZone()); - assertThat(BucketUtil.hash(timestamptzNsVal.value())) - .as("Spec example: hash(2017-11-16T14:31:08-08:00) = -737750069") - .isEqualTo(-737750069); - - timestamptzNsVal = - Literal.of("2017-11-16T14:31:08.000001-08:00").to(Types.TimestampNanoType.withZone()); - assertThat(BucketUtil.hash(timestamptzNsVal.value())) - .as("Spec example: hash(2017-11-16T14:31:08.000001-08:00) = -976603392") - .isEqualTo(-976603392); - - timestamptzNsVal = - Literal.of("2017-11-16T14:31:08.000000001-08:00").to(Types.TimestampNanoType.withZone()); - assertThat(BucketUtil.hash(timestamptzNsVal.value())) - .as("Spec example: hash(2017-11-16T14:31:08.000000001-08:00) = -160215926") - .isEqualTo(-160215926); - assertThat(BucketUtil.hash("iceberg")) .as("Spec example: hash(\"iceberg\") = 1210000089") .isEqualTo(1210000089); From d34daad3682e3cd1db87183c0579087eef748ad2 Mon Sep 17 00:00:00 2001 From: Eric Gillespie Date: Thu, 11 Jul 2024 09:39:22 -0500 Subject: [PATCH 20/38] Update TestBucketing as requested. --- .../iceberg/transforms/TestBucketing.java | 82 +++++++++---------- 1 file changed, 38 insertions(+), 44 deletions(-) diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java b/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java index 0950f746a0de..585694afd762 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java @@ -167,64 +167,58 @@ public void testLong() { @Test public void testTimestampNanoPromotion() { - // Values from spec Appendix B: 32-bit Hash Requirements - String timestamp1 = "2017-11-16T22:31:08"; - long expectedHash1 = -2047944441; - String timestamp2 = "2017-11-16T22:31:08.000001"; - long expectedHash2 = -1207196810; - String timestampNs1 = "2017-11-16T22:31:08"; - String timestampNs2 = "2017-11-16T22:31:08.000001001"; - Types.TimestampType tsType = Types.TimestampType.withoutZone(); Types.TimestampNanoType tsNsType = Types.TimestampNanoType.withoutZone(); - Bucket tsNsBucket = Bucket.get(tsNsType, 1); Bucket tsBucket = Bucket.get(tsType, 1); - assertThat(tsBucket.hash(Literal.of(timestamp1).to(tsType).value())) - .as("Timestamp and TimestampNano bucket results should match") - .isEqualTo(expectedHash1); - assertThat(tsNsBucket.hash(Literal.of(timestampNs1).to(tsNsType).value())) - .as("Timestamp and TimestampNano bucket results should match") - .isEqualTo(expectedHash1); - - assertThat(tsBucket.hash(Literal.of(timestamp2).to(tsType).value())) - .as("Timestamp and TimestampNano bucket results should match") - .isEqualTo(expectedHash2); - assertThat(tsNsBucket.hash(Literal.of(timestampNs2).to(tsNsType).value())) - .as("Timestamp and TimestampNano bucket results should match") - .isEqualTo(expectedHash2); + // Values from spec Appendix B: 32-bit Hash Requirements + assertThat(tsBucket.hash(Literal.of("2017-11-16T22:31:08").to(tsType).value())) + .as( + "Spec example: hash(2017-11-16T22:31:08) = -2047944441 for Timestamp and TimestampNano should match") + .isEqualTo(-2047944441); + assertThat(tsNsBucket.hash(Literal.of("2017-11-16T22:31:08").to(tsNsType).value())) + .as( + "Spec example: hash(2017-11-16T22:31:08) = -2047944441 for Timestamp and TimestampNano should match") + .isEqualTo(-2047944441); + + assertThat(tsBucket.hash(Literal.of("2017-11-16T22:31:08.000001").to(tsType).value())) + .as( + "Spec example: hash(2017-11-16T22:31:08.000001) = -1207196810 for Timestamp and TimestampNano should match") + .isEqualTo(-1207196810); + assertThat(tsNsBucket.hash(Literal.of("2017-11-16T22:31:08.000001001").to(tsNsType).value())) + .as( + "Spec example: hash(2017-11-16T22:31:08.000001) = -1207196810 for Timestamp and TimestampNano should match") + .isEqualTo(-1207196810); } @Test public void testTimestampTzNanoPromotion() { - // Values from spec Appendix B: 32-bit Hash Requirements - long expectedHash1 = -2047944441; - long expectedHash2 = -1207196810; - String timestampTz1 = "2017-11-16T14:31:08-08:00"; - String timestampTz2 = "2017-11-16T14:31:08.000001-08:00"; - String timestampTzNs1 = "2017-11-16T14:31:08-08:00"; - String timestampTzNs2 = "2017-11-16T14:31:08.000001001-08:00"; - Types.TimestampType tsTzType = Types.TimestampType.withZone(); Types.TimestampNanoType tsTzNsType = Types.TimestampNanoType.withZone(); - Bucket tsTzNsBucket = Bucket.get(tsTzNsType, 1); Bucket tsTzBucket = Bucket.get(tsTzType, 1); - assertThat(tsTzBucket.hash(Literal.of(timestampTz1).to(tsTzType).value())) - .as("Timestamp and TimestampNano bucket results should match") - .isEqualTo(expectedHash1); - assertThat(tsTzNsBucket.hash(Literal.of(timestampTzNs1).to(tsTzNsType).value())) - .as("Timestamp and TimestampNano bucket results should match") - .isEqualTo(expectedHash1); - - assertThat(tsTzBucket.hash(Literal.of(timestampTz2).to(tsTzType).value())) - .as("Timestamp and TimestampNano bucket results should match") - .isEqualTo(expectedHash2); - assertThat(tsTzNsBucket.hash(Literal.of(timestampTzNs2).to(tsTzNsType).value())) - .as("Timestamp and TimestampNano bucket results should match") - .isEqualTo(expectedHash2); + // Values from spec Appendix B: 32-bit Hash Requirements + assertThat(tsTzBucket.hash(Literal.of("2017-11-16T14:31:08-08:00").to(tsTzType).value())) + .as( + "Spec example: hash(2017-11-16T14:31:08-08:00) = -2047944441 for Timestamp and TimestampNano should match") + .isEqualTo(-2047944441); + assertThat(tsTzNsBucket.hash(Literal.of("2017-11-16T14:31:08-08:00").to(tsTzNsType).value())) + .as( + "Spec example: hash(2017-11-16T14:31:08-08:00) = -2047944441 for Timestamp and TimestampNano should match") + .isEqualTo(-2047944441); + + assertThat(tsTzBucket.hash(Literal.of("2017-11-16T14:31:08.000001-08:00").to(tsTzType).value())) + .as( + "Spec example: hash(2017-11-16T14:31:08.000001-08:00) = -1207196810 for Timestamp and TimestampNano should match") + .isEqualTo(-1207196810); + assertThat( + tsTzNsBucket.hash( + Literal.of("2017-11-16T14:31:08.000001001-08:00").to(tsTzNsType).value())) + .as( + "Spec example: hash(2017-11-16T14:31:08.000001-08:00) = -1207196810 for Timestamp and TimestampNano should match") + .isEqualTo(-1207196810); } @Test From 42ca8a40a1619da043a6c34e4da5812a68dafa17 Mon Sep 17 00:00:00 2001 From: Eric Gillespie Date: Wed, 31 Jul 2024 12:04:26 -0500 Subject: [PATCH 21/38] Convert long to TimestampLiteral and then that to TimestampNanoLiteral. Avoids timestamp granularity bugs. --- .../main/java/org/apache/iceberg/expressions/Literals.java | 2 +- .../test/java/org/apache/iceberg/types/TestConversions.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/expressions/Literals.java b/api/src/main/java/org/apache/iceberg/expressions/Literals.java index 35232a0591cd..f2b064b7fee8 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/Literals.java +++ b/api/src/main/java/org/apache/iceberg/expressions/Literals.java @@ -301,7 +301,7 @@ public Literal to(Type type) { case TIMESTAMP: return (Literal) new TimestampLiteral(value()); case TIMESTAMP_NANO: - return (Literal) new TimestampNanoLiteral(value()); + return new TimestampLiteral(value()).to(type); case DATE: if ((long) Integer.MAX_VALUE < value()) { return aboveMax(); diff --git a/api/src/test/java/org/apache/iceberg/types/TestConversions.java b/api/src/test/java/org/apache/iceberg/types/TestConversions.java index 00dc2f5df260..9764b8551313 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestConversions.java +++ b/api/src/test/java/org/apache/iceberg/types/TestConversions.java @@ -109,9 +109,9 @@ public void testByteBufferConversions() { assertConversion( 400000L, TimestampNanoType.withZone(), new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); assertThat(Literal.of(400000L).to(TimestampNanoType.withoutZone()).toByteBuffer().array()) - .isEqualTo(new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); + .isEqualTo(new byte[] {0, -124, -41, 23, 0, 0, 0, 0}); assertThat(Literal.of(400000L).to(TimestampNanoType.withZone()).toByteBuffer().array()) - .isEqualTo(new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); + .isEqualTo(new byte[] {0, -124, -41, 23, 0, 0, 0, 0}); // strings are stored as UTF-8 bytes (without length) // 'A' -> 65, 'B' -> 66, 'C' -> 67 From 0cbdeb8b2ccc3b5f9e525dc12a78147d117375bc Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Fri, 23 Aug 2024 14:02:11 -0700 Subject: [PATCH 22/38] Use DateTimeUtil conversion instead of /. --- .../java/org/apache/iceberg/expressions/ExpressionUtil.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java b/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java index 9b6a3e5bf7eb..4d56529e62db 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java +++ b/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java @@ -524,7 +524,7 @@ private static String sanitize(Type type, Object value, long now, int today) { case TIMESTAMP: return sanitizeTimestamp((long) value, now); case TIMESTAMP_NANO: - return sanitizeTimestamp((long) value / 1000, now); + return sanitizeTimestamp(DateTimeUtil.nanosToMicros((long) value / 1000), now); case STRING: return sanitizeString((CharSequence) value, now, today); case BOOLEAN: @@ -547,7 +547,8 @@ private static String sanitize(Literal literal, long now, int today) { } else if (literal instanceof Literals.TimestampLiteral) { return sanitizeTimestamp(((Literals.TimestampLiteral) literal).value(), now); } else if (literal instanceof Literals.TimestampNanoLiteral) { - return sanitizeTimestamp(((Literals.TimestampNanoLiteral) literal).value() / 1000, now); + return sanitizeTimestamp( + DateTimeUtil.nanosToMicros(((Literals.TimestampNanoLiteral) literal).value()), now); } else if (literal instanceof Literals.TimeLiteral) { return "(time)"; } else if (literal instanceof Literals.IntegerLiteral) { From 48626e56354d52c14bb6086dbec6c34782c42066 Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Fri, 23 Aug 2024 15:24:29 -0700 Subject: [PATCH 23/38] Update Literals to use DateTimeUtil, add new tests. --- .../apache/iceberg/expressions/Literals.java | 31 +-- .../apache/iceberg/transforms/Timestamps.java | 14 +- .../org/apache/iceberg/util/DateTimeUtil.java | 39 +++- .../TestTimestampLiteralConversions.java | 203 ++++++++++++++++++ 4 files changed, 258 insertions(+), 29 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/expressions/Literals.java b/api/src/main/java/org/apache/iceberg/expressions/Literals.java index f2b064b7fee8..36a70ba32edc 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/Literals.java +++ b/api/src/main/java/org/apache/iceberg/expressions/Literals.java @@ -24,7 +24,6 @@ import java.nio.ByteBuffer; import java.time.Instant; import java.time.LocalDate; -import java.time.LocalDateTime; import java.time.LocalTime; import java.time.OffsetDateTime; import java.time.ZoneOffset; @@ -301,6 +300,7 @@ public Literal to(Type type) { case TIMESTAMP: return (Literal) new TimestampLiteral(value()); case TIMESTAMP_NANO: + // assume micros and convert to nanos to match the behavior in the timestamp case above return new TimestampLiteral(value()).to(type); case DATE: if ((long) Integer.MAX_VALUE < value()) { @@ -440,11 +440,7 @@ public Literal to(Type type) { case TIMESTAMP: return (Literal) this; case DATE: - return (Literal) - new DateLiteral( - (int) - ChronoUnit.DAYS.between( - EPOCH_DAY, EPOCH.plus(value(), ChronoUnit.MICROS).toLocalDate())); + return (Literal) new DateLiteral(DateTimeUtil.microsToDays(value())); case TIMESTAMP_NANO: return (Literal) new TimestampNanoLiteral(DateTimeUtil.microsToNanos(value())); default: @@ -468,9 +464,7 @@ static class TimestampNanoLiteral extends ComparableLiteral { public Literal to(Type type) { switch (type.typeId()) { case DATE: - return (Literal) - new DateLiteral( - (int) ChronoUnit.DAYS.between(EPOCH_DAY, EPOCH.plusNanos(value()).toLocalDate())); + return (Literal) new DateLiteral(DateTimeUtil.nanosToDays(value())); case TIMESTAMP: return (Literal) new TimestampLiteral(DateTimeUtil.nanosToMicros(value())); case TIMESTAMP_NANO: @@ -535,29 +529,18 @@ public Literal to(Type type) { case TIMESTAMP: if (((Types.TimestampType) type).shouldAdjustToUTC()) { - long timestampMicros = - ChronoUnit.MICROS.between( - EPOCH, OffsetDateTime.parse(value(), DateTimeFormatter.ISO_DATE_TIME)); + long timestampMicros = DateTimeUtil.isoTimestamptzToMicros(value().toString()); return (Literal) new TimestampLiteral(timestampMicros); } else { - long timestampMicros = - ChronoUnit.MICROS.between( - EPOCH, - LocalDateTime.parse(value(), DateTimeFormatter.ISO_LOCAL_DATE_TIME) - .atOffset(ZoneOffset.UTC)); + long timestampMicros = DateTimeUtil.isoTimestampToMicros(value().toString()); return (Literal) new TimestampLiteral(timestampMicros); } case TIMESTAMP_NANO: if (((Types.TimestampNanoType) type).shouldAdjustToUTC()) { - return (Literal) new TimestampNanoLiteral(DateTimeUtil.isoTimestampToNanos(value())); + return (Literal) new TimestampNanoLiteral(DateTimeUtil.isoTimestamptzToNanos(value())); } else { - long timestampNanos = - ChronoUnit.NANOS.between( - EPOCH, - LocalDateTime.parse(value(), DateTimeFormatter.ISO_LOCAL_DATE_TIME) - .atOffset(ZoneOffset.UTC)); - return (Literal) new TimestampNanoLiteral(timestampNanos); + return (Literal) new TimestampNanoLiteral(DateTimeUtil.isoTimestampToNanos(value())); } case STRING: diff --git a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java index c917c257ad88..7f89bbcd1237 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java @@ -167,7 +167,19 @@ public Integer apply(Long timestamp) { "Unsupported result type unit: " + resultTypeUnit); } case NANOS: - return Math.toIntExact(DateTimeUtil.convertNanos(timestamp, resultTypeUnit.unit)); + switch (resultTypeUnit) { + case YEARS: + return DateTimeUtil.nanosToYears(timestamp); + case MONTHS: + return DateTimeUtil.nanosToMonths(timestamp); + case DAYS: + return DateTimeUtil.nanosToDays(timestamp); + case HOURS: + return DateTimeUtil.nanosToHours(timestamp); + default: + throw new UnsupportedOperationException( + "Unsupported result type unit: " + resultTypeUnit); + } default: throw new UnsupportedOperationException( "Unsupported source type unit: " + sourceTypeUnit); diff --git a/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java b/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java index 28c313fc3cb2..6d194431f840 100644 --- a/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java +++ b/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java @@ -27,6 +27,7 @@ import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatterBuilder; import java.time.temporal.ChronoUnit; +import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; public class DateTimeUtil { private DateTimeUtil() {} @@ -77,6 +78,10 @@ public static long microsFromTimestamp(LocalDateTime dateTime) { return ChronoUnit.MICROS.between(EPOCH, dateTime.atOffset(ZoneOffset.UTC)); } + public static long nanosFromTimestamp(LocalDateTime dateTime) { + return ChronoUnit.NANOS.between(EPOCH, dateTime.atOffset(ZoneOffset.UTC)); + } + public static long microsToMillis(long micros) { // When the timestamp is negative, i.e before 1970, we need to adjust the milliseconds portion. // Example - 1965-01-01 10:11:12.123456 is represented as (-157700927876544) in micro precision. @@ -100,6 +105,10 @@ public static long microsFromTimestamptz(OffsetDateTime dateTime) { return ChronoUnit.MICROS.between(EPOCH, dateTime); } + public static long nanosFromTimestamptz(OffsetDateTime dateTime) { + return ChronoUnit.NANOS.between(EPOCH, dateTime); + } + public static String formatTimestampMillis(long millis) { return Instant.ofEpochMilli(millis).toString().replace("Z", "+00:00"); } @@ -135,9 +144,9 @@ public static long isoTimestamptzToMicros(String timestampString) { OffsetDateTime.parse(timestampString, DateTimeFormatter.ISO_DATE_TIME)); } - public static long isoTimestampToNanos(CharSequence timestamp) { - return ChronoUnit.NANOS.between( - EPOCH, OffsetDateTime.parse(timestamp, DateTimeFormatter.ISO_DATE_TIME)); + public static long isoTimestamptzToNanos(CharSequence timestampString) { + return nanosFromTimestamptz( + OffsetDateTime.parse(timestampString, DateTimeFormatter.ISO_DATE_TIME)); } public static boolean isUTCTimestamptz(String timestampString) { @@ -151,6 +160,11 @@ public static long isoTimestampToMicros(String timestampString) { LocalDateTime.parse(timestampString, DateTimeFormatter.ISO_LOCAL_DATE_TIME)); } + public static long isoTimestampToNanos(CharSequence timestampString) { + return nanosFromTimestamp( + LocalDateTime.parse(timestampString, DateTimeFormatter.ISO_LOCAL_DATE_TIME)); + } + public static int daysToYears(int days) { return convertDays(days, ChronoUnit.YEARS); } @@ -201,7 +215,24 @@ private static int convertMicros(long micros, ChronoUnit granularity) { } } - public static long convertNanos(long nanos, ChronoUnit granularity) { + public static int nanosToYears(long nanos) { + return Math.toIntExact(convertNanos(nanos, ChronoUnit.YEARS)); + } + + public static int nanosToMonths(long nanos) { + return Math.toIntExact(convertNanos(nanos, ChronoUnit.MONTHS)); + } + + public static int nanosToDays(long nanos) { + return Math.toIntExact(convertNanos(nanos, ChronoUnit.DAYS)); + } + + public static int nanosToHours(long nanos) { + return Math.toIntExact(convertNanos(nanos, ChronoUnit.HOURS)); + } + + @VisibleForTesting + static long convertNanos(long nanos, ChronoUnit granularity) { if (nanos >= 0) { long epochSecond = Math.floorDiv(nanos, NANOS_PER_SECOND); long nanoAdjustment = Math.floorMod(nanos, NANOS_PER_SECOND); diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestTimestampLiteralConversions.java b/api/src/test/java/org/apache/iceberg/expressions/TestTimestampLiteralConversions.java index add374ee9a17..181f5f6eade6 100644 --- a/api/src/test/java/org/apache/iceberg/expressions/TestTimestampLiteralConversions.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestTimestampLiteralConversions.java @@ -19,8 +19,11 @@ package org.apache.iceberg.expressions; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import java.time.format.DateTimeParseException; import org.apache.iceberg.types.Types; +import org.apache.iceberg.util.DateTimeUtil; import org.junit.jupiter.api.Test; public class TestTimestampLiteralConversions { @@ -32,6 +35,76 @@ public void testTimestampToTimestampNanoConversion() { Literal timestampNano = timestamp.to(Types.TimestampNanoType.withoutZone()); assertThat(timestampNano.value()).isEqualTo(1510842668000000000L); + + timestamp = Literal.of("1970-01-01T00:00:00.000000001").to(Types.TimestampType.withoutZone()); + assertThat(timestamp.value()).isEqualTo(0L); + + timestampNano = timestamp.to(Types.TimestampNanoType.withoutZone()); + assertThat(timestampNano.value()).isEqualTo(0L); + + timestamp = Literal.of("1969-12-31T23:59:59.999999999").to(Types.TimestampType.withoutZone()); + assertThat(timestamp.value()).isEqualTo(0L); + + timestampNano = timestamp.to(Types.TimestampNanoType.withoutZone()); + assertThat(timestampNano.value()).isEqualTo(0L); + + timestamp = Literal.of("1969-12-31T23:59:59.999999000").to(Types.TimestampType.withoutZone()); + assertThat(timestamp.value()).isEqualTo(-1L); + + timestampNano = timestamp.to(Types.TimestampNanoType.withoutZone()); + assertThat(timestampNano.value()).isEqualTo(-1000L); + } + + @Test + public void testTimestampToDateConversion() { + Literal ts = + Literal.of("2017-11-16T14:31:08.000001").to(Types.TimestampType.withoutZone()); + int dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(DateTimeUtil.isoDateToDays("2017-11-16")); + + ts = Literal.of("1970-01-01T00:00:00.000001").to(Types.TimestampType.withoutZone()); + dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(0); + + ts = Literal.of("1969-12-31T23:59:59.999999").to(Types.TimestampType.withoutZone()); + dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(-1); + + ts = Literal.of("2017-11-16T14:31:08.000000001").to(Types.TimestampType.withoutZone()); + dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(DateTimeUtil.isoDateToDays("2017-11-16")); + + ts = Literal.of("1970-01-01T00:00:00.000000001").to(Types.TimestampType.withoutZone()); + dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(0).isEqualTo(DateTimeUtil.isoDateToDays("1970-01-01")); + + ts = Literal.of("1969-12-31T23:59:59.999999999").to(Types.TimestampType.withoutZone()); + dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(0).isEqualTo(DateTimeUtil.isoDateToDays("1970-01-01")); + + ts = Literal.of("1969-12-31T23:59:59.999999000").to(Types.TimestampType.withoutZone()); + dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(-1).isEqualTo(DateTimeUtil.isoDateToDays("1969-12-31")); + } + + @Test + public void testTimestampMicrosToDateConversion() { + Literal ts = + Literal.of("2017-11-16T14:31:08.000000001").to(Types.TimestampType.withoutZone()); + int dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(DateTimeUtil.isoDateToDays("2017-11-16")); + + ts = Literal.of("1970-01-01T00:00:00.000000001").to(Types.TimestampType.withoutZone()); + dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(0); + + ts = Literal.of("1969-12-31T23:59:59.999999999").to(Types.TimestampType.withoutZone()); + dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(0); + + ts = Literal.of("1969-12-31T23:59:59.999999000").to(Types.TimestampType.withoutZone()); + dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(-1); } @Test @@ -42,5 +115,135 @@ public void testTimestampNanoToTimestampConversion() { Literal timestampNano = timestamp.to(Types.TimestampType.withoutZone()); assertThat(timestampNano.value()).isEqualTo(1510842668000000L); + + timestamp = + Literal.of("1970-01-01T00:00:00.000000001").to(Types.TimestampNanoType.withoutZone()); + assertThat(timestamp.value()).isEqualTo(1L); + + timestampNano = timestamp.to(Types.TimestampType.withoutZone()); + assertThat(timestampNano.value()).isEqualTo(0L); + + timestamp = + Literal.of("1969-12-31T23:59:59.999999999").to(Types.TimestampNanoType.withoutZone()); + assertThat(timestamp.value()).isEqualTo(-1L); + + timestampNano = timestamp.to(Types.TimestampType.withoutZone()); + assertThat(timestampNano.value()).isEqualTo(-1L); + + timestamp = + Literal.of("1969-12-31T23:59:59.999999000").to(Types.TimestampNanoType.withoutZone()); + assertThat(timestamp.value()).isEqualTo(-1000L); + + timestampNano = timestamp.to(Types.TimestampType.withoutZone()); + assertThat(timestampNano.value()).isEqualTo(-1L); + } + + @Test + public void testTimestampNanosToDateConversion() { + Literal ts = + Literal.of("2017-11-16T14:31:08.000000001").to(Types.TimestampNanoType.withoutZone()); + int dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(DateTimeUtil.isoDateToDays("2017-11-16")); + + ts = Literal.of("1970-01-01T00:00:00.000000001").to(Types.TimestampNanoType.withoutZone()); + dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(0); + + ts = Literal.of("1969-12-31T23:59:59.999999999").to(Types.TimestampNanoType.withoutZone()); + dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(-1); + + ts = Literal.of("1969-12-31T23:59:59.999999000").to(Types.TimestampNanoType.withoutZone()); + dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(-1); + } + + @Test + public void testTimestampNanosWithZoneConversion() { + Literal isoTimestampNanosWithZoneOffset = + Literal.of("2017-11-16T14:31:08.000000001+00:00"); + + assertThatThrownBy( + () -> isoTimestampNanosWithZoneOffset.to(Types.TimestampType.withoutZone())) + .as("Should not convert timestamp with offset to a timestamp without zone") + .isInstanceOf(DateTimeParseException.class); + + assertThatThrownBy( + () -> isoTimestampNanosWithZoneOffset.to(Types.TimestampNanoType.withoutZone())) + .as("Should not convert timestamp with offset to a timestamp without zone") + .isInstanceOf(DateTimeParseException.class); + + assertThat(isoTimestampNanosWithZoneOffset.to(Types.TimestampType.withZone()).value()) + .isEqualTo(1510842668000000L); + + assertThat(isoTimestampNanosWithZoneOffset.to(Types.TimestampNanoType.withZone()).value()) + .isEqualTo(1510842668000000001L); + } + + + @Test + public void testTimestampMicrosWithZoneConversion() { + Literal isoTimestampMicrosWithZoneOffset = + Literal.of("2017-11-16T14:31:08.000001+00:00"); + + assertThatThrownBy( + () -> isoTimestampMicrosWithZoneOffset.to(Types.TimestampType.withoutZone())) + .as("Should not convert timestamp with offset to a timestamp without zone") + .isInstanceOf(DateTimeParseException.class); + + assertThatThrownBy( + () -> isoTimestampMicrosWithZoneOffset.to(Types.TimestampNanoType.withoutZone())) + .as("Should not convert timestamp with offset to a timestamp without zone") + .isInstanceOf(DateTimeParseException.class); + + assertThat(isoTimestampMicrosWithZoneOffset.to(Types.TimestampType.withZone()).value()) + .isEqualTo(1510842668000001L); + + assertThat(isoTimestampMicrosWithZoneOffset.to(Types.TimestampNanoType.withZone()).value()) + .isEqualTo(1510842668000001000L); + } + + @Test + public void testTimestampNanosWithoutZoneConversion() { + Literal isoTimestampNanosWithoutZoneOffset = + Literal.of("2017-11-16T14:31:08.000000001"); + + assertThatThrownBy( + () -> isoTimestampNanosWithoutZoneOffset.to(Types.TimestampType.withZone())) + .as("Should not convert timestamp without offset to a timestamp with zone") + .isInstanceOf(DateTimeParseException.class); + + assertThatThrownBy( + () -> isoTimestampNanosWithoutZoneOffset.to(Types.TimestampNanoType.withZone())) + .as("Should not convert timestamp without offset to a timestamp with zone") + .isInstanceOf(DateTimeParseException.class); + + assertThat(isoTimestampNanosWithoutZoneOffset.to(Types.TimestampType.withoutZone()).value()) + .isEqualTo(1510842668000000L); + + assertThat(isoTimestampNanosWithoutZoneOffset.to(Types.TimestampNanoType.withoutZone()).value()) + .isEqualTo(1510842668000000001L); + } + + @Test + public void testTimestampMicrosWithoutZoneConversion() { + Literal isoTimestampMicrosWithoutZoneOffset = + Literal.of("2017-11-16T14:31:08.000001"); + + assertThatThrownBy( + () -> isoTimestampMicrosWithoutZoneOffset.to(Types.TimestampType.withZone())) + .as("Should not convert timestamp without offset to a timestamp with zone") + .isInstanceOf(DateTimeParseException.class); + + assertThatThrownBy( + () -> isoTimestampMicrosWithoutZoneOffset.to(Types.TimestampNanoType.withZone())) + .as("Should not convert timestamp without offset to a timestamp with zone") + .isInstanceOf(DateTimeParseException.class); + + assertThat(isoTimestampMicrosWithoutZoneOffset.to(Types.TimestampType.withoutZone()).value()) + .isEqualTo(1510842668000001L); + + assertThat(isoTimestampMicrosWithoutZoneOffset.to(Types.TimestampNanoType.withoutZone()).value()) + .isEqualTo(1510842668000001000L); } } From 743e872bf57f20738ef41686c716c08bb3d59267 Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Fri, 23 Aug 2024 15:34:57 -0700 Subject: [PATCH 24/38] Fix test for DateTimeUtil.isoTimestampToNanos. --- .../java/org/apache/iceberg/util/TestDateTimeUtil.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/api/src/test/java/org/apache/iceberg/util/TestDateTimeUtil.java b/api/src/test/java/org/apache/iceberg/util/TestDateTimeUtil.java index 6ae2891c35e0..618868603c5c 100644 --- a/api/src/test/java/org/apache/iceberg/util/TestDateTimeUtil.java +++ b/api/src/test/java/org/apache/iceberg/util/TestDateTimeUtil.java @@ -51,7 +51,13 @@ public void microsToNanos() { @Test public void isoTimestampToNanos() { - assertThat(DateTimeUtil.isoTimestampToNanos("2017-11-16T14:31:08.000001001-08:00")) + assertThat(DateTimeUtil.isoTimestampToNanos("2017-11-16T22:31:08.000001001")) + .isEqualTo(1510871468000001001L); + } + + @Test + public void isoTimestamptzToNanos() { + assertThat(DateTimeUtil.isoTimestamptzToNanos("2017-11-16T14:31:08.000001001-08:00")) .isEqualTo(1510871468000001001L); } From dedeb19351126fe54a1afd6c376daf96d89b5f80 Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Fri, 23 Aug 2024 15:52:44 -0700 Subject: [PATCH 25/38] Fix TestDateTimeUtil and add test cases. --- .../iceberg/transforms/TransformUtil.java | 12 +-- .../org/apache/iceberg/util/DateTimeUtil.java | 17 +++- .../apache/iceberg/util/TestDateTimeUtil.java | 93 +++++++------------ 3 files changed, 55 insertions(+), 67 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java b/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java index 4c6f9118dd37..88f6b64e9218 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java +++ b/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java @@ -24,8 +24,8 @@ import java.time.LocalTime; import java.time.OffsetDateTime; import java.time.ZoneOffset; -import java.time.temporal.ChronoUnit; import java.util.Base64; +import org.apache.iceberg.util.DateTimeUtil; class TransformUtil { @@ -55,19 +55,19 @@ static String humanTime(Long microsFromMidnight) { } static String humanTimestampWithZone(Long timestampMicros) { - return ChronoUnit.MICROS.addTo(EPOCH, timestampMicros).toString(); + return DateTimeUtil.microsToIsoTimestamptz(timestampMicros); } static String humanTimestampWithoutZone(Long timestampMicros) { - return ChronoUnit.MICROS.addTo(EPOCH, timestampMicros).toLocalDateTime().toString(); + return DateTimeUtil.microsToIsoTimestamp(timestampMicros); } static String humanTimestampNanoWithZone(Long timestampNanos) { - return ChronoUnit.NANOS.addTo(EPOCH, timestampNanos).toString(); + return DateTimeUtil.nanosToIsoTimestamptz(timestampNanos); } - static String humanTimestampNanoWithoutZone(Long timestampMicros) { - return ChronoUnit.NANOS.addTo(EPOCH, timestampMicros).toLocalDateTime().toString(); + static String humanTimestampNanoWithoutZone(Long timestampNanos) { + return DateTimeUtil.nanosToIsoTimestamp(timestampNanos); } static String humanHour(int hourOrdinal) { diff --git a/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java b/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java index 6d194431f840..bc901e492551 100644 --- a/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java +++ b/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java @@ -70,6 +70,10 @@ public static LocalDateTime timestampFromMicros(long microsFromEpoch) { return ChronoUnit.MICROS.addTo(EPOCH, microsFromEpoch).toLocalDateTime(); } + public static LocalDateTime timestampFromNanos(long nanosFromEpoch) { + return ChronoUnit.NANOS.addTo(EPOCH, nanosFromEpoch).toLocalDateTime(); + } + public static long microsFromInstant(Instant instant) { return ChronoUnit.MICROS.between(EPOCH, instant.atOffset(ZoneOffset.UTC)); } @@ -126,11 +130,21 @@ public static String microsToIsoTimestamptz(long micros) { return localDateTime.atOffset(ZoneOffset.UTC).format(FORMATTER); } + public static String nanosToIsoTimestamptz(long nanos) { + LocalDateTime localDateTime = timestampFromNanos(nanos); + return localDateTime.atOffset(ZoneOffset.UTC).format(FORMATTER); + } + public static String microsToIsoTimestamp(long micros) { LocalDateTime localDateTime = timestampFromMicros(micros); return localDateTime.format(DateTimeFormatter.ISO_LOCAL_DATE_TIME); } + public static String nanosToIsoTimestamp(long nanos) { + LocalDateTime localDateTime = timestampFromNanos(nanos); + return localDateTime.format(DateTimeFormatter.ISO_LOCAL_DATE_TIME); + } + public static int isoDateToDays(String dateString) { return daysFromDate(LocalDate.parse(dateString, DateTimeFormatter.ISO_LOCAL_DATE)); } @@ -231,8 +245,7 @@ public static int nanosToHours(long nanos) { return Math.toIntExact(convertNanos(nanos, ChronoUnit.HOURS)); } - @VisibleForTesting - static long convertNanos(long nanos, ChronoUnit granularity) { + private static long convertNanos(long nanos, ChronoUnit granularity) { if (nanos >= 0) { long epochSecond = Math.floorDiv(nanos, NANOS_PER_SECOND); long nanoAdjustment = Math.floorMod(nanos, NANOS_PER_SECOND); diff --git a/api/src/test/java/org/apache/iceberg/util/TestDateTimeUtil.java b/api/src/test/java/org/apache/iceberg/util/TestDateTimeUtil.java index 618868603c5c..6088fe51b57a 100644 --- a/api/src/test/java/org/apache/iceberg/util/TestDateTimeUtil.java +++ b/api/src/test/java/org/apache/iceberg/util/TestDateTimeUtil.java @@ -20,23 +20,11 @@ import static org.assertj.core.api.Assertions.assertThat; -import java.time.ZonedDateTime; -import java.time.temporal.ChronoUnit; +import org.apache.iceberg.transforms.Transforms; +import org.apache.iceberg.types.Types; import org.junit.jupiter.api.Test; public class TestDateTimeUtil { - - @Test - public void formatTimestampMillis() { - String timestamp = "1970-01-01T00:00:00.001+00:00"; - assertThat(DateTimeUtil.formatTimestampMillis(1L)).isEqualTo(timestamp); - assertThat(ZonedDateTime.parse(timestamp).toInstant().toEpochMilli()).isEqualTo(1L); - - timestamp = "1970-01-01T00:16:40+00:00"; - assertThat(DateTimeUtil.formatTimestampMillis(1000000L)).isEqualTo(timestamp); - assertThat(ZonedDateTime.parse(timestamp).toInstant().toEpochMilli()).isEqualTo(1000000L); - } - @Test public void nanosToMicros() { assertThat(DateTimeUtil.nanosToMicros(1510871468000001001L)).isEqualTo(1510871468000001L); @@ -53,64 +41,51 @@ public void microsToNanos() { public void isoTimestampToNanos() { assertThat(DateTimeUtil.isoTimestampToNanos("2017-11-16T22:31:08.000001001")) .isEqualTo(1510871468000001001L); + assertThat(DateTimeUtil.isoTimestampToNanos("1922-02-15T01:28:51.999998999")) + .isEqualTo(-1510871468000001001L); } @Test public void isoTimestamptzToNanos() { assertThat(DateTimeUtil.isoTimestamptzToNanos("2017-11-16T14:31:08.000001001-08:00")) .isEqualTo(1510871468000001001L); + assertThat(DateTimeUtil.isoTimestamptzToNanos("1922-02-15T01:28:51.999998999+00:00")) + .isEqualTo(-1510871468000001001L); } @Test public void convertNanos() { - assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.ERAS)).isEqualTo(0); - assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.MILLENNIA)).isEqualTo(0); - assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.CENTURIES)).isEqualTo(0); - assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.DECADES)).isEqualTo(4); - assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.YEARS)).isEqualTo(47); - assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.MONTHS)).isEqualTo(574); - assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.WEEKS)).isEqualTo(2498); - assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.DAYS)).isEqualTo(17486); - assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.HALF_DAYS)) - .isEqualTo(34973); - assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.HOURS)).isEqualTo(419686); - assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.MINUTES)) - .isEqualTo(25181191); - assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.SECONDS)) - .isEqualTo(1510871468); - assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.MILLIS)) - .isEqualTo(1510871468000L); - assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.MICROS)) - .isEqualTo(1510871468000001L); - assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.NANOS)) - .isEqualTo(1510871468000001001L); + assertThat( + Transforms.identity() + .toHumanString(Types.TimestampNanoType.withoutZone(), 1510871468000001001L)) + .isEqualTo("2017-11-16T22:31:08.000001001"); + assertThat(DateTimeUtil.nanosToYears(1510871468000001001L)).isEqualTo(47); + assertThat(Transforms.year().toHumanString(Types.IntegerType.get(), 47)).isEqualTo("2017"); + assertThat(DateTimeUtil.nanosToMonths(1510871468000001001L)).isEqualTo(574); + assertThat(Transforms.month().toHumanString(Types.IntegerType.get(), 574)).isEqualTo("2017-11"); + assertThat(DateTimeUtil.nanosToDays(1510871468000001001L)).isEqualTo(17486); + assertThat(Transforms.day().toHumanString(Types.IntegerType.get(), 17486)) + .isEqualTo("2017-11-16"); + assertThat(DateTimeUtil.nanosToHours(1510871468000001001L)).isEqualTo(419686); + assertThat(Transforms.hour().toHumanString(Types.IntegerType.get(), 419686)) + .isEqualTo("2017-11-16-22"); } @Test public void convertNanosNegative() { - assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.ERAS)).isEqualTo(-1); - assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.MILLENNIA)) - .isEqualTo(-1); - assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.CENTURIES)) - .isEqualTo(-1); - assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.DECADES)).isEqualTo(-5); - assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.YEARS)).isEqualTo(-48); - assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.MONTHS)).isEqualTo(-575); - assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.WEEKS)).isEqualTo(-2499); - assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.DAYS)).isEqualTo(-17487); - assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.HALF_DAYS)) - .isEqualTo(-34974); - assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.HOURS)) - .isEqualTo(-419687); - assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.MINUTES)) - .isEqualTo(-25181192); - assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.SECONDS)) - .isEqualTo(-1510871469); - assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.MILLIS)) - .isEqualTo(-1510871468001L); - assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.MICROS)) - .isEqualTo(-1510871468000002L); - assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.NANOS)) - .isEqualTo(-1510871468000001001L); + assertThat( + Transforms.identity() + .toHumanString(Types.TimestampNanoType.withZone(), -1510871468000001001L)) + .isEqualTo("1922-02-15T01:28:51.999998999+00:00"); + assertThat(DateTimeUtil.nanosToYears(-1510871468000001001L)).isEqualTo(-48); + assertThat(Transforms.year().toHumanString(Types.IntegerType.get(), 47)).isEqualTo("2017"); + assertThat(DateTimeUtil.nanosToMonths(-1510871468000001001L)).isEqualTo(-575); + assertThat(Transforms.month().toHumanString(Types.IntegerType.get(), 574)).isEqualTo("2017-11"); + assertThat(DateTimeUtil.nanosToDays(-1510871468000001001L)).isEqualTo(-17487); + assertThat(Transforms.day().toHumanString(Types.IntegerType.get(), 17486)) + .isEqualTo("2017-11-16"); + assertThat(DateTimeUtil.nanosToHours(-1510871468000001001L)).isEqualTo(-419687); + assertThat(Transforms.hour().toHumanString(Types.IntegerType.get(), 419686)) + .isEqualTo("2017-11-16-22"); } } From c9f427359739115e466b31770a656a1e33d86e96 Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Fri, 23 Aug 2024 16:18:59 -0700 Subject: [PATCH 26/38] Simplify Timestamps transform get. --- .../apache/iceberg/transforms/Timestamps.java | 90 +++++++------------ .../apache/iceberg/transforms/Transforms.java | 4 +- .../iceberg/transforms/TestTimestamps.java | 16 ---- 3 files changed, 31 insertions(+), 79 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java index 7f89bbcd1237..ddd08271ece3 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java @@ -52,67 +52,37 @@ class Timestamps implements Transform { static final Timestamps HOUR_FROM_NANOS = new Timestamps(ChronoUnit.NANOS, ResultTypeUnit.HOURS, "hour"); - static Timestamps get(Types.TimestampType type, String resultTypeUnit) { - switch (resultTypeUnit.toLowerCase(Locale.ENGLISH)) { - case "year": - return get(type, ChronoUnit.YEARS); - case "month": - return get(type, ChronoUnit.MONTHS); - case "day": - return get(type, ChronoUnit.DAYS); - case "hour": - return get(type, ChronoUnit.HOURS); + static Timestamps get(Type type, String transform) { + String name = transform.toLowerCase(Locale.ENGLISH); + switch (type.typeId()) { + case TIMESTAMP: + switch (name) { + case "year": + return YEAR_FROM_MICROS; + case "month": + return MONTH_FROM_MICROS; + case "day": + return DAY_FROM_MICROS; + case "hour": + return HOUR_FROM_MICROS; + default: + throw new IllegalArgumentException("Unsupported transform: " + name); + } + case TIMESTAMP_NANO: + switch (name) { + case "year": + return YEAR_FROM_NANOS; + case "month": + return MONTH_FROM_NANOS; + case "day": + return DAY_FROM_NANOS; + case "hour": + return HOUR_FROM_NANOS; + default: + throw new IllegalArgumentException("Unsupported transform: " + name); + } default: - throw new IllegalArgumentException( - "Unsupported source/result type units: " + type + " -> " + resultTypeUnit); - } - } - - static Timestamps get(Types.TimestampNanoType type, String resultTypeUnit) { - switch (resultTypeUnit.toLowerCase(Locale.ENGLISH)) { - case "year": - return get(type, ChronoUnit.YEARS); - case "month": - return get(type, ChronoUnit.MONTHS); - case "day": - return get(type, ChronoUnit.DAYS); - case "hour": - return get(type, ChronoUnit.HOURS); - default: - throw new IllegalArgumentException( - "Unsupported source/result type units: " + type + " -> " + resultTypeUnit); - } - } - - static Timestamps get(Types.TimestampType type, ChronoUnit resultTypeUnit) { - switch (resultTypeUnit) { - case YEARS: - return YEAR_FROM_MICROS; - case MONTHS: - return MONTH_FROM_MICROS; - case DAYS: - return DAY_FROM_MICROS; - case HOURS: - return HOUR_FROM_MICROS; - default: - throw new IllegalArgumentException( - "Unsupported source/result type units: " + type + " -> " + resultTypeUnit); - } - } - - static Timestamps get(Types.TimestampNanoType type, ChronoUnit resultTypeUnit) { - switch (resultTypeUnit) { - case YEARS: - return YEAR_FROM_NANOS; - case MONTHS: - return MONTH_FROM_NANOS; - case DAYS: - return DAY_FROM_NANOS; - case HOURS: - return HOUR_FROM_NANOS; - default: - throw new IllegalArgumentException( - "Unsupported source/result type units: " + type + " -> " + resultTypeUnit); + throw new IllegalArgumentException("Unsupported type for transform " + name + ": " + type); } } diff --git a/api/src/main/java/org/apache/iceberg/transforms/Transforms.java b/api/src/main/java/org/apache/iceberg/transforms/Transforms.java index 610872f01065..2dccbf72b12f 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Transforms.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Transforms.java @@ -24,7 +24,6 @@ import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Schema; import org.apache.iceberg.types.Type; -import org.apache.iceberg.types.Types; /** * Factory methods for transforms. @@ -87,9 +86,8 @@ private Transforms() {} try { switch (type.typeId()) { case TIMESTAMP: - return Timestamps.get((Types.TimestampType) type, transform); case TIMESTAMP_NANO: - return Timestamps.get((Types.TimestampNanoType) type, transform); + return Timestamps.get(type, transform); case DATE: return Dates.valueOf(transform.toUpperCase(Locale.ENGLISH)); } diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java b/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java index c62a8f8a490e..134085a589bb 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java @@ -478,14 +478,6 @@ public void testApplyRejectsBadResultType() { .hasMessageMatching("Unsupported result type unit: NANOS"); } - @Test - public void testGetOfTimestampTypeRejectsBadChronoUnit() { - Types.TimestampType timestampType = Types.TimestampType.withZone(); - assertThatThrownBy(() -> Timestamps.get(timestampType, ChronoUnit.CENTURIES)) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageMatching("Unsupported source/result type units: timestamptz -> Centuries"); - } - @Test public void testGetOfTimestampTypeRejectsBadString() { Types.TimestampType timestampType = Types.TimestampType.withZone(); @@ -494,14 +486,6 @@ public void testGetOfTimestampTypeRejectsBadString() { .hasMessageMatching("Unsupported source/result type units: timestamptz -> trash"); } - @Test - public void testGetOfTimestampNanoTypeRejectsBadChronoUnit() { - Types.TimestampNanoType timestampNanoType = Types.TimestampNanoType.withZone(); - assertThatThrownBy(() -> Timestamps.get(timestampNanoType, ChronoUnit.CENTURIES)) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageMatching("Unsupported source/result type units: timestamptz_ns -> Centuries"); - } - @Test public void testGetOfTimestampNanoTypeRejectsBadString() { Types.TimestampNanoType timestampNanoType = Types.TimestampNanoType.withZone(); From 97489abd8b06f90030baf1281a6335df313de8c6 Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Fri, 23 Aug 2024 16:30:18 -0700 Subject: [PATCH 27/38] Remove ChronoUnit wrapper enum. --- .../org/apache/iceberg/transforms/Days.java | 12 +---- .../org/apache/iceberg/transforms/Months.java | 12 +---- .../apache/iceberg/transforms/Timestamps.java | 53 +++++-------------- .../org/apache/iceberg/transforms/Years.java | 12 +---- .../iceberg/transforms/TestTimestamps.java | 23 +------- 5 files changed, 22 insertions(+), 90 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/transforms/Days.java b/api/src/main/java/org/apache/iceberg/transforms/Days.java index e8aae2d18262..24f844ee50ab 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Days.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Days.java @@ -57,16 +57,8 @@ public boolean satisfiesOrderOf(Transform other) { } if (other instanceof Timestamps) { - Timestamps.ResultTypeUnit otherResultTypeUnit = ((Timestamps) other).resultTypeUnit(); - switch (otherResultTypeUnit) { - case MICROS: - return Timestamps.DAY_FROM_MICROS.satisfiesOrderOf(other); - case NANOS: - return Timestamps.DAY_FROM_NANOS.satisfiesOrderOf(other); - default: - throw new UnsupportedOperationException( - "Unsupported timestamp unit: " + otherResultTypeUnit); - } + // incoming type unit does not matter + return Timestamps.DAY_FROM_MICROS.satisfiesOrderOf(other); } else if (other instanceof Dates) { return Dates.DAY.satisfiesOrderOf(other); } else if (other instanceof Days || other instanceof Months || other instanceof Years) { diff --git a/api/src/main/java/org/apache/iceberg/transforms/Months.java b/api/src/main/java/org/apache/iceberg/transforms/Months.java index 766b2874c16b..c1ac280b69bf 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Months.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Months.java @@ -57,16 +57,8 @@ public boolean satisfiesOrderOf(Transform other) { } if (other instanceof Timestamps) { - Timestamps.ResultTypeUnit otherResultTypeUnit = ((Timestamps) other).resultTypeUnit(); - switch (otherResultTypeUnit) { - case MICROS: - return Timestamps.MONTH_FROM_MICROS.satisfiesOrderOf(other); - case NANOS: - return Timestamps.MONTH_FROM_NANOS.satisfiesOrderOf(other); - default: - throw new UnsupportedOperationException( - "Unsupported timestamp unit: " + otherResultTypeUnit); - } + // incoming type unit does not matter + return Timestamps.MONTH_FROM_MICROS.satisfiesOrderOf(other); } else if (other instanceof Dates) { return Dates.MONTH.satisfiesOrderOf(other); } else if (other instanceof Months || other instanceof Years) { diff --git a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java index ddd08271ece3..5ab8e291044c 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java @@ -19,7 +19,6 @@ package org.apache.iceberg.transforms; import com.google.errorprone.annotations.Immutable; -import java.time.Duration; import java.time.temporal.ChronoUnit; import java.util.Locale; import org.apache.iceberg.expressions.BoundPredicate; @@ -36,21 +35,20 @@ class Timestamps implements Transform { static final Timestamps YEAR_FROM_MICROS = - new Timestamps(ChronoUnit.MICROS, ResultTypeUnit.YEARS, "year"); + new Timestamps(ChronoUnit.MICROS, ChronoUnit.YEARS, "year"); static final Timestamps MONTH_FROM_MICROS = - new Timestamps(ChronoUnit.MICROS, ResultTypeUnit.MONTHS, "month"); + new Timestamps(ChronoUnit.MICROS, ChronoUnit.MONTHS, "month"); static final Timestamps DAY_FROM_MICROS = - new Timestamps(ChronoUnit.MICROS, ResultTypeUnit.DAYS, "day"); + new Timestamps(ChronoUnit.MICROS, ChronoUnit.DAYS, "day"); static final Timestamps HOUR_FROM_MICROS = - new Timestamps(ChronoUnit.MICROS, ResultTypeUnit.HOURS, "hour"); + new Timestamps(ChronoUnit.MICROS, ChronoUnit.HOURS, "hour"); static final Timestamps YEAR_FROM_NANOS = - new Timestamps(ChronoUnit.NANOS, ResultTypeUnit.YEARS, "year"); + new Timestamps(ChronoUnit.NANOS, ChronoUnit.YEARS, "year"); static final Timestamps MONTH_FROM_NANOS = - new Timestamps(ChronoUnit.NANOS, ResultTypeUnit.MONTHS, "month"); - static final Timestamps DAY_FROM_NANOS = - new Timestamps(ChronoUnit.NANOS, ResultTypeUnit.DAYS, "day"); + new Timestamps(ChronoUnit.NANOS, ChronoUnit.MONTHS, "month"); + static final Timestamps DAY_FROM_NANOS = new Timestamps(ChronoUnit.NANOS, ChronoUnit.DAYS, "day"); static final Timestamps HOUR_FROM_NANOS = - new Timestamps(ChronoUnit.NANOS, ResultTypeUnit.HOURS, "hour"); + new Timestamps(ChronoUnit.NANOS, ChronoUnit.HOURS, "hour"); static Timestamps get(Type type, String transform) { String name = transform.toLowerCase(Locale.ENGLISH); @@ -86,31 +84,12 @@ static Timestamps get(Type type, String transform) { } } - enum ResultTypeUnit { - YEARS(ChronoUnit.YEARS), - MONTHS(ChronoUnit.MONTHS), - DAYS(ChronoUnit.DAYS), - HOURS(ChronoUnit.HOURS), - MICROS(ChronoUnit.MICROS), - NANOS(ChronoUnit.NANOS); - - private final ChronoUnit unit; - - ResultTypeUnit(final ChronoUnit unit) { - this.unit = unit; - } - - Duration duration() { - return unit.getDuration(); - } - } - @Immutable static class Apply implements SerializableFunction { private final ChronoUnit sourceTypeUnit; - private final ResultTypeUnit resultTypeUnit; + private final ChronoUnit resultTypeUnit; - Apply(ChronoUnit sourceTypeUnit, ResultTypeUnit resultTypeUnit) { + Apply(ChronoUnit sourceTypeUnit, ChronoUnit resultTypeUnit) { this.sourceTypeUnit = sourceTypeUnit; this.resultTypeUnit = resultTypeUnit; } @@ -160,7 +139,7 @@ public Integer apply(Long timestamp) { private final String name; private final Apply apply; - Timestamps(ChronoUnit sourceTypeUnit, ResultTypeUnit resultTypeUnit, String name) { + Timestamps(ChronoUnit sourceTypeUnit, ChronoUnit resultTypeUnit, String name) { this.name = name; this.apply = new Apply(sourceTypeUnit, resultTypeUnit); } @@ -183,16 +162,12 @@ public boolean canTransform(Type type) { @Override public Type getResultType(Type sourceType) { - if (apply.resultTypeUnit == ResultTypeUnit.DAYS) { + if (apply.resultTypeUnit == ChronoUnit.DAYS) { return Types.DateType.get(); } return Types.IntegerType.get(); } - ResultTypeUnit resultTypeUnit() { - return apply.resultTypeUnit; - } - @Override public boolean preservesOrder() { return true; @@ -208,8 +183,8 @@ public boolean satisfiesOrderOf(Transform other) { // test the granularity, in hours. hour(ts) => 1 hour, day(ts) => 24 hours, and hour satisfies // the order of day Timestamps otherTransform = (Timestamps) other; - return apply.resultTypeUnit.duration().toHours() - <= otherTransform.apply.resultTypeUnit.duration().toHours(); + return apply.resultTypeUnit.getDuration().toHours() + <= otherTransform.apply.resultTypeUnit.getDuration().toHours(); } return false; diff --git a/api/src/main/java/org/apache/iceberg/transforms/Years.java b/api/src/main/java/org/apache/iceberg/transforms/Years.java index 8b6ff318444a..d1a7d82ac8bd 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Years.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Years.java @@ -57,16 +57,8 @@ public boolean satisfiesOrderOf(Transform other) { } if (other instanceof Timestamps) { - Timestamps.ResultTypeUnit otherResultTypeUnit = ((Timestamps) other).resultTypeUnit(); - switch (otherResultTypeUnit) { - case MICROS: - return Timestamps.YEAR_FROM_MICROS.satisfiesOrderOf(other); - case NANOS: - return Timestamps.YEAR_FROM_NANOS.satisfiesOrderOf(other); - default: - throw new UnsupportedOperationException( - "Unsupported timestamp unit: " + otherResultTypeUnit); - } + // incoming type unit does not matter + return Timestamps.YEAR_FROM_MICROS.satisfiesOrderOf(other); } else if (other instanceof Dates) { return Dates.YEAR.satisfiesOrderOf(other); } else if (other instanceof Years) { diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java b/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java index 134085a589bb..f62307788dd8 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java @@ -21,7 +21,6 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; -import java.time.temporal.ChronoUnit; import org.apache.iceberg.expressions.Literal; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; @@ -460,30 +459,12 @@ public void testTimestampNanosReturnType() { assertThat(hourResultType).isEqualTo(Types.IntegerType.get()); } - @Test - public void testApplyRejectsBadSourceType() { - Timestamps badSourceType = - new Timestamps(ChronoUnit.CENTURIES, Timestamps.ResultTypeUnit.YEARS, "year"); - assertThatThrownBy(() -> badSourceType.apply(11L)) - .isInstanceOf(UnsupportedOperationException.class) - .hasMessageMatching("Unsupported source type unit: Centuries"); - } - - @Test - public void testApplyRejectsBadResultType() { - Timestamps badResultType = - new Timestamps(ChronoUnit.MICROS, Timestamps.ResultTypeUnit.NANOS, "nano"); - assertThatThrownBy(() -> badResultType.apply(11L)) - .isInstanceOf(UnsupportedOperationException.class) - .hasMessageMatching("Unsupported result type unit: NANOS"); - } - @Test public void testGetOfTimestampTypeRejectsBadString() { Types.TimestampType timestampType = Types.TimestampType.withZone(); assertThatThrownBy(() -> Timestamps.get(timestampType, "trash")) .isInstanceOf(IllegalArgumentException.class) - .hasMessageMatching("Unsupported source/result type units: timestamptz -> trash"); + .hasMessageMatching("Unsupported transform: trash"); } @Test @@ -491,6 +472,6 @@ public void testGetOfTimestampNanoTypeRejectsBadString() { Types.TimestampNanoType timestampNanoType = Types.TimestampNanoType.withZone(); assertThatThrownBy(() -> Timestamps.get(timestampNanoType, "trash")) .isInstanceOf(IllegalArgumentException.class) - .hasMessageMatching("Unsupported source/result type units: timestamptz_ns -> trash"); + .hasMessageMatching("Unsupported transform: trash"); } } From 8c3cc6772768d7b256c098ea746c30bfb542b29a Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Sun, 25 Aug 2024 15:31:20 -0700 Subject: [PATCH 28/38] Restore Timestamps as enum and simplify boilerplate. --- .../org/apache/iceberg/transforms/Dates.java | 14 +- .../org/apache/iceberg/transforms/Days.java | 37 +-- .../org/apache/iceberg/transforms/Hours.java | 34 +-- .../org/apache/iceberg/transforms/Months.java | 37 +-- .../transforms/PartitionSpecVisitor.java | 16 +- .../iceberg/transforms/SortOrderVisitor.java | 16 +- .../iceberg/transforms/TimeTransform.java | 36 +++ .../apache/iceberg/transforms/Timestamps.java | 254 ++++++++++-------- .../iceberg/transforms/TransformUtil.java | 7 + .../apache/iceberg/transforms/Transforms.java | 78 ++---- .../org/apache/iceberg/transforms/Years.java | 37 +-- .../apache/iceberg/transforms/TestDates.java | 69 +++++ .../apache/iceberg/transforms/TestDays.java | 32 --- .../apache/iceberg/transforms/TestHours.java | 35 --- .../transforms/TestTimeTransforms.java | 128 +++++++++ .../iceberg/transforms/TestTimestamps.java | 193 +++++++++++-- .../apache/iceberg/transforms/TestYears.java | 32 --- 17 files changed, 634 insertions(+), 421 deletions(-) delete mode 100644 api/src/test/java/org/apache/iceberg/transforms/TestDays.java delete mode 100644 api/src/test/java/org/apache/iceberg/transforms/TestHours.java create mode 100644 api/src/test/java/org/apache/iceberg/transforms/TestTimeTransforms.java delete mode 100644 api/src/test/java/org/apache/iceberg/transforms/TestYears.java diff --git a/api/src/main/java/org/apache/iceberg/transforms/Dates.java b/api/src/main/java/org/apache/iceberg/transforms/Dates.java index 3d26b542be7b..88db16797867 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Dates.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Dates.java @@ -97,6 +97,10 @@ public Type getResultType(Type sourceType) { return Types.IntegerType.get(); } + ChronoUnit granularity() { + return granularity; + } + @Override public boolean preservesOrder() { return true; @@ -109,11 +113,11 @@ public boolean satisfiesOrderOf(Transform other) { } if (other instanceof Dates) { - // test the granularity, in days. day(ts) => 1 day, months(ts) => 30 days, and day satisfies - // the order of months - Dates otherTransform = (Dates) other; - return granularity.getDuration().toDays() - <= otherTransform.granularity.getDuration().toDays(); + return TransformUtil.satisfiesOrderOf(granularity, ((Dates) other).granularity()); + } else if (other instanceof Timestamps) { + return TransformUtil.satisfiesOrderOf(granularity, ((Timestamps) other).granularity()); + } else if (other instanceof TimeTransform) { + return TransformUtil.satisfiesOrderOf(granularity, ((TimeTransform) other).granularity()); } return false; diff --git a/api/src/main/java/org/apache/iceberg/transforms/Days.java b/api/src/main/java/org/apache/iceberg/transforms/Days.java index 24f844ee50ab..e2b829b86662 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Days.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Days.java @@ -19,6 +19,7 @@ package org.apache.iceberg.transforms; import java.io.ObjectStreamException; +import java.time.temporal.ChronoUnit; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; @@ -31,41 +32,19 @@ static Days get() { } @Override - @SuppressWarnings("unchecked") - protected Transform toEnum(Type type) { - switch (type.typeId()) { - case DATE: - return (Transform) Dates.DAY; - case TIMESTAMP: - return (Transform) Timestamps.DAY_FROM_MICROS; - case TIMESTAMP_NANO: - return (Transform) Timestamps.DAY_FROM_NANOS; - default: - throw new IllegalArgumentException("Unsupported type: " + type); - } + protected ChronoUnit granularity() { + return ChronoUnit.DAYS; } @Override - public Type getResultType(Type sourceType) { - return Types.DateType.get(); + protected Transform toEnum(Type type) { + return (Transform) + fromSourceType(type, Dates.DAY, Timestamps.MICROS_TO_DAY, Timestamps.NANOS_TO_DAY); } @Override - public boolean satisfiesOrderOf(Transform other) { - if (this == other) { - return true; - } - - if (other instanceof Timestamps) { - // incoming type unit does not matter - return Timestamps.DAY_FROM_MICROS.satisfiesOrderOf(other); - } else if (other instanceof Dates) { - return Dates.DAY.satisfiesOrderOf(other); - } else if (other instanceof Days || other instanceof Months || other instanceof Years) { - return true; - } - - return false; + public Type getResultType(Type sourceType) { + return Types.DateType.get(); } @Override diff --git a/api/src/main/java/org/apache/iceberg/transforms/Hours.java b/api/src/main/java/org/apache/iceberg/transforms/Hours.java index b198d0e94eac..2ff79f6a66a7 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Hours.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Hours.java @@ -19,6 +19,7 @@ package org.apache.iceberg.transforms; import java.io.ObjectStreamException; +import java.time.temporal.ChronoUnit; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; @@ -30,17 +31,16 @@ static Hours get() { return (Hours) INSTANCE; } + @Override + protected ChronoUnit granularity() { + return ChronoUnit.HOURS; + } + @Override @SuppressWarnings("unchecked") protected Transform toEnum(Type type) { - switch (type.typeId()) { - case TIMESTAMP: - return (Transform) Timestamps.HOUR_FROM_MICROS; - case TIMESTAMP_NANO: - return (Transform) Timestamps.HOUR_FROM_NANOS; - default: - throw new IllegalArgumentException("Unsupported type: " + type); - } + return (Transform) + fromSourceType(type, null, Timestamps.MICROS_TO_HOUR, Timestamps.NANOS_TO_HOUR); } @Override @@ -53,24 +53,6 @@ public Type getResultType(Type sourceType) { return Types.IntegerType.get(); } - @Override - public boolean satisfiesOrderOf(Transform other) { - if (this == other) { - return true; - } - - if (other instanceof Timestamps) { - return other == Timestamps.HOUR_FROM_MICROS || other == Timestamps.HOUR_FROM_NANOS; - } else if (other instanceof Hours - || other instanceof Days - || other instanceof Months - || other instanceof Years) { - return true; - } - - return false; - } - @Override public String toHumanString(Type alwaysInt, Integer value) { return value != null ? TransformUtil.humanHour(value) : "null"; diff --git a/api/src/main/java/org/apache/iceberg/transforms/Months.java b/api/src/main/java/org/apache/iceberg/transforms/Months.java index c1ac280b69bf..73ec50e5dd9a 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Months.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Months.java @@ -19,6 +19,7 @@ package org.apache.iceberg.transforms; import java.io.ObjectStreamException; +import java.time.temporal.ChronoUnit; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; @@ -31,41 +32,19 @@ static Months get() { } @Override - @SuppressWarnings("unchecked") - protected Transform toEnum(Type type) { - switch (type.typeId()) { - case DATE: - return (Transform) Dates.MONTH; - case TIMESTAMP: - return (Transform) Timestamps.MONTH_FROM_MICROS; - case TIMESTAMP_NANO: - return (Transform) Timestamps.MONTH_FROM_NANOS; - default: - throw new IllegalArgumentException("Unsupported type: " + type); - } + protected ChronoUnit granularity() { + return ChronoUnit.MONTHS; } @Override - public Type getResultType(Type sourceType) { - return Types.IntegerType.get(); + protected Transform toEnum(Type type) { + return (Transform) + fromSourceType(type, Dates.MONTH, Timestamps.MICROS_TO_MONTH, Timestamps.NANOS_TO_MONTH); } @Override - public boolean satisfiesOrderOf(Transform other) { - if (this == other) { - return true; - } - - if (other instanceof Timestamps) { - // incoming type unit does not matter - return Timestamps.MONTH_FROM_MICROS.satisfiesOrderOf(other); - } else if (other instanceof Dates) { - return Dates.MONTH.satisfiesOrderOf(other); - } else if (other instanceof Months || other instanceof Years) { - return true; - } - - return false; + public Type getResultType(Type sourceType) { + return Types.IntegerType.get(); } @Override diff --git a/api/src/main/java/org/apache/iceberg/transforms/PartitionSpecVisitor.java b/api/src/main/java/org/apache/iceberg/transforms/PartitionSpecVisitor.java index b3f8d600bd38..0d80ef88a296 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/PartitionSpecVisitor.java +++ b/api/src/main/java/org/apache/iceberg/transforms/PartitionSpecVisitor.java @@ -122,22 +122,22 @@ static R visit(Schema schema, PartitionField field, PartitionSpecVisitor int width = ((Truncate) transform).width(); return visitor.truncate(field.fieldId(), sourceName, field.sourceId(), width); } else if (transform == Dates.YEAR - || transform == Timestamps.YEAR_FROM_MICROS - || transform == Timestamps.YEAR_FROM_NANOS + || transform == Timestamps.MICROS_TO_YEAR + || transform == Timestamps.NANOS_TO_YEAR || transform instanceof Years) { return visitor.year(field.fieldId(), sourceName, field.sourceId()); } else if (transform == Dates.MONTH - || transform == Timestamps.MONTH_FROM_MICROS - || transform == Timestamps.MONTH_FROM_NANOS + || transform == Timestamps.MICROS_TO_MONTH + || transform == Timestamps.NANOS_TO_MONTH || transform instanceof Months) { return visitor.month(field.fieldId(), sourceName, field.sourceId()); } else if (transform == Dates.DAY - || transform == Timestamps.DAY_FROM_MICROS - || transform == Timestamps.DAY_FROM_NANOS + || transform == Timestamps.MICROS_TO_DAY + || transform == Timestamps.NANOS_TO_DAY || transform instanceof Days) { return visitor.day(field.fieldId(), sourceName, field.sourceId()); - } else if (transform == Timestamps.HOUR_FROM_MICROS - || transform == Timestamps.HOUR_FROM_NANOS + } else if (transform == Timestamps.MICROS_TO_HOUR + || transform == Timestamps.NANOS_TO_HOUR || transform instanceof Hours) { return visitor.hour(field.fieldId(), sourceName, field.sourceId()); } else if (transform instanceof VoidTransform) { diff --git a/api/src/main/java/org/apache/iceberg/transforms/SortOrderVisitor.java b/api/src/main/java/org/apache/iceberg/transforms/SortOrderVisitor.java index 998c63d8e367..62cc9d3cdb33 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/SortOrderVisitor.java +++ b/api/src/main/java/org/apache/iceberg/transforms/SortOrderVisitor.java @@ -85,25 +85,25 @@ static List visit(SortOrder sortOrder, SortOrderVisitor visitor) { visitor.truncate( sourceName, field.sourceId(), width, field.direction(), field.nullOrder())); } else if (transform == Dates.YEAR - || transform == Timestamps.YEAR_FROM_MICROS - || transform == Timestamps.YEAR_FROM_NANOS + || transform == Timestamps.MICROS_TO_YEAR + || transform == Timestamps.NANOS_TO_YEAR || transform instanceof Years) { results.add( visitor.year(sourceName, field.sourceId(), field.direction(), field.nullOrder())); } else if (transform == Dates.MONTH - || transform == Timestamps.MONTH_FROM_MICROS - || transform == Timestamps.MONTH_FROM_NANOS + || transform == Timestamps.MICROS_TO_MONTH + || transform == Timestamps.NANOS_TO_MONTH || transform instanceof Months) { results.add( visitor.month(sourceName, field.sourceId(), field.direction(), field.nullOrder())); } else if (transform == Dates.DAY - || transform == Timestamps.DAY_FROM_MICROS - || transform == Timestamps.DAY_FROM_NANOS + || transform == Timestamps.MICROS_TO_DAY + || transform == Timestamps.NANOS_TO_DAY || transform instanceof Days) { results.add( visitor.day(sourceName, field.sourceId(), field.direction(), field.nullOrder())); - } else if (transform == Timestamps.HOUR_FROM_MICROS - || transform == Timestamps.HOUR_FROM_NANOS + } else if (transform == Timestamps.MICROS_TO_HOUR + || transform == Timestamps.NANOS_TO_HOUR || transform instanceof Hours) { results.add( visitor.hour(sourceName, field.sourceId(), field.direction(), field.nullOrder())); diff --git a/api/src/main/java/org/apache/iceberg/transforms/TimeTransform.java b/api/src/main/java/org/apache/iceberg/transforms/TimeTransform.java index 23ee38271e81..d28ffb465d70 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/TimeTransform.java +++ b/api/src/main/java/org/apache/iceberg/transforms/TimeTransform.java @@ -18,6 +18,7 @@ */ package org.apache.iceberg.transforms; +import java.time.temporal.ChronoUnit; import org.apache.iceberg.expressions.BoundPredicate; import org.apache.iceberg.expressions.BoundTransform; import org.apache.iceberg.expressions.UnboundPredicate; @@ -25,6 +26,24 @@ import org.apache.iceberg.util.SerializableFunction; abstract class TimeTransform implements Transform { + protected static R fromSourceType(Type type, R dateResult, R microsResult, R nanosResult) { + switch (type.typeId()) { + case DATE: + if (dateResult != null) { + return dateResult; + } + break; + case TIMESTAMP: + return microsResult; + case TIMESTAMP_NANO: + return nanosResult; + } + + throw new IllegalArgumentException("Unsupported type: " + type); + } + + protected abstract ChronoUnit granularity(); + protected abstract Transform toEnum(Type type); @Override @@ -37,6 +56,23 @@ public boolean preservesOrder() { return true; } + @Override + public boolean satisfiesOrderOf(Transform other) { + if (this == other) { + return true; + } + + if (other instanceof Dates) { + return TransformUtil.satisfiesOrderOf(granularity(), ((Dates) other).granularity()); + } else if (other instanceof Timestamps) { + return TransformUtil.satisfiesOrderOf(granularity(), ((Timestamps) other).granularity()); + } else if (other instanceof TimeTransform) { + return TransformUtil.satisfiesOrderOf(granularity(), ((TimeTransform) other).granularity()); + } + + return false; + } + @Override public boolean canTransform(Type type) { return type.typeId() == Type.TypeID.DATE diff --git a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java index 5ab8e291044c..8b8c2ca0a96b 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java @@ -20,7 +20,6 @@ import com.google.errorprone.annotations.Immutable; import java.time.temporal.ChronoUnit; -import java.util.Locale; import org.apache.iceberg.expressions.BoundPredicate; import org.apache.iceberg.expressions.BoundTransform; import org.apache.iceberg.expressions.Expression; @@ -32,116 +31,25 @@ import org.apache.iceberg.util.DateTimeUtil; import org.apache.iceberg.util.SerializableFunction; -class Timestamps implements Transform { - - static final Timestamps YEAR_FROM_MICROS = - new Timestamps(ChronoUnit.MICROS, ChronoUnit.YEARS, "year"); - static final Timestamps MONTH_FROM_MICROS = - new Timestamps(ChronoUnit.MICROS, ChronoUnit.MONTHS, "month"); - static final Timestamps DAY_FROM_MICROS = - new Timestamps(ChronoUnit.MICROS, ChronoUnit.DAYS, "day"); - static final Timestamps HOUR_FROM_MICROS = - new Timestamps(ChronoUnit.MICROS, ChronoUnit.HOURS, "hour"); - static final Timestamps YEAR_FROM_NANOS = - new Timestamps(ChronoUnit.NANOS, ChronoUnit.YEARS, "year"); - static final Timestamps MONTH_FROM_NANOS = - new Timestamps(ChronoUnit.NANOS, ChronoUnit.MONTHS, "month"); - static final Timestamps DAY_FROM_NANOS = new Timestamps(ChronoUnit.NANOS, ChronoUnit.DAYS, "day"); - static final Timestamps HOUR_FROM_NANOS = - new Timestamps(ChronoUnit.NANOS, ChronoUnit.HOURS, "hour"); - - static Timestamps get(Type type, String transform) { - String name = transform.toLowerCase(Locale.ENGLISH); - switch (type.typeId()) { - case TIMESTAMP: - switch (name) { - case "year": - return YEAR_FROM_MICROS; - case "month": - return MONTH_FROM_MICROS; - case "day": - return DAY_FROM_MICROS; - case "hour": - return HOUR_FROM_MICROS; - default: - throw new IllegalArgumentException("Unsupported transform: " + name); - } - case TIMESTAMP_NANO: - switch (name) { - case "year": - return YEAR_FROM_NANOS; - case "month": - return MONTH_FROM_NANOS; - case "day": - return DAY_FROM_NANOS; - case "hour": - return HOUR_FROM_NANOS; - default: - throw new IllegalArgumentException("Unsupported transform: " + name); - } - default: - throw new IllegalArgumentException("Unsupported type for transform " + name + ": " + type); - } - } - - @Immutable - static class Apply implements SerializableFunction { - private final ChronoUnit sourceTypeUnit; - private final ChronoUnit resultTypeUnit; - - Apply(ChronoUnit sourceTypeUnit, ChronoUnit resultTypeUnit) { - this.sourceTypeUnit = sourceTypeUnit; - this.resultTypeUnit = resultTypeUnit; - } - - @Override - public Integer apply(Long timestamp) { - if (timestamp == null) { - return null; - } +enum Timestamps implements Transform { + MICROS_TO_YEAR(ChronoUnit.YEARS, "year", MicrosToYears.INSTANCE), + MICROS_TO_MONTH(ChronoUnit.MONTHS, "month", MicrosToMonths.INSTANCE), + MICROS_TO_DAY(ChronoUnit.DAYS, "day", MicrosToDays.INSTANCE), + MICROS_TO_HOUR(ChronoUnit.HOURS, "hour", MicrosToHours.INSTANCE), - switch (sourceTypeUnit) { - case MICROS: - switch (resultTypeUnit) { - case YEARS: - return DateTimeUtil.microsToYears(timestamp); - case MONTHS: - return DateTimeUtil.microsToMonths(timestamp); - case DAYS: - return DateTimeUtil.microsToDays(timestamp); - case HOURS: - return DateTimeUtil.microsToHours(timestamp); - default: - throw new UnsupportedOperationException( - "Unsupported result type unit: " + resultTypeUnit); - } - case NANOS: - switch (resultTypeUnit) { - case YEARS: - return DateTimeUtil.nanosToYears(timestamp); - case MONTHS: - return DateTimeUtil.nanosToMonths(timestamp); - case DAYS: - return DateTimeUtil.nanosToDays(timestamp); - case HOURS: - return DateTimeUtil.nanosToHours(timestamp); - default: - throw new UnsupportedOperationException( - "Unsupported result type unit: " + resultTypeUnit); - } - default: - throw new UnsupportedOperationException( - "Unsupported source type unit: " + sourceTypeUnit); - } - } - } + NANOS_TO_YEAR(ChronoUnit.YEARS, "year", NanosToYears.INSTANCE), + NANOS_TO_MONTH(ChronoUnit.MONTHS, "month", NanosToMonths.INSTANCE), + NANOS_TO_DAY(ChronoUnit.DAYS, "day", NanosToDays.INSTANCE), + NANOS_TO_HOUR(ChronoUnit.HOURS, "hour", NanosToHours.INSTANCE); + private final ChronoUnit granularity; private final String name; - private final Apply apply; + private final SerializableFunction apply; - Timestamps(ChronoUnit sourceTypeUnit, ChronoUnit resultTypeUnit, String name) { + Timestamps(ChronoUnit granularity, String name, SerializableFunction apply) { this.name = name; - this.apply = new Apply(sourceTypeUnit, resultTypeUnit); + this.granularity = granularity; + this.apply = apply; } @Override @@ -162,12 +70,16 @@ public boolean canTransform(Type type) { @Override public Type getResultType(Type sourceType) { - if (apply.resultTypeUnit == ChronoUnit.DAYS) { + if (granularity == ChronoUnit.DAYS) { return Types.DateType.get(); } return Types.IntegerType.get(); } + ChronoUnit granularity() { + return granularity; + } + @Override public boolean preservesOrder() { return true; @@ -179,12 +91,12 @@ public boolean satisfiesOrderOf(Transform other) { return true; } - if (other instanceof Timestamps) { - // test the granularity, in hours. hour(ts) => 1 hour, day(ts) => 24 hours, and hour satisfies - // the order of day - Timestamps otherTransform = (Timestamps) other; - return apply.resultTypeUnit.getDuration().toHours() - <= otherTransform.apply.resultTypeUnit.getDuration().toHours(); + if (other instanceof Dates) { + return TransformUtil.satisfiesOrderOf(granularity, ((Dates) other).granularity()); + } else if (other instanceof Timestamps) { + return TransformUtil.satisfiesOrderOf(granularity, ((Timestamps) other).granularity()); + } else if (other instanceof TimeTransform) { + return TransformUtil.satisfiesOrderOf(granularity, ((TimeTransform) other).granularity()); } return false; @@ -242,7 +154,7 @@ public String toHumanString(Type outputType, Integer value) { return "null"; } - switch (apply.resultTypeUnit) { + switch (granularity) { case YEARS: return TransformUtil.humanYear(value); case MONTHS: @@ -252,7 +164,7 @@ public String toHumanString(Type outputType, Integer value) { case HOURS: return TransformUtil.humanHour(value); default: - throw new UnsupportedOperationException("Unsupported time unit: " + apply.resultTypeUnit); + throw new UnsupportedOperationException("Unsupported time unit: " + granularity); } } @@ -265,4 +177,116 @@ public String toString() { public String dedupName() { return "time"; } + + @Immutable + static class MicrosToYears implements SerializableFunction { + static final MicrosToYears INSTANCE = new MicrosToYears(); + + @Override + public Integer apply(Long micros) { + if (micros == null) { + return null; + } + + return DateTimeUtil.microsToYears(micros); + } + } + + @Immutable + static class MicrosToMonths implements SerializableFunction { + static final MicrosToMonths INSTANCE = new MicrosToMonths(); + + @Override + public Integer apply(Long micros) { + if (micros == null) { + return null; + } + + return DateTimeUtil.microsToMonths(micros); + } + } + + @Immutable + static class MicrosToDays implements SerializableFunction { + static final MicrosToDays INSTANCE = new MicrosToDays(); + + @Override + public Integer apply(Long micros) { + if (micros == null) { + return null; + } + + return DateTimeUtil.microsToDays(micros); + } + } + + @Immutable + static class MicrosToHours implements SerializableFunction { + static final MicrosToHours INSTANCE = new MicrosToHours(); + + @Override + public Integer apply(Long micros) { + if (micros == null) { + return null; + } + + return DateTimeUtil.microsToHours(micros); + } + } + + @Immutable + static class NanosToYears implements SerializableFunction { + static final NanosToYears INSTANCE = new NanosToYears(); + + @Override + public Integer apply(Long nanos) { + if (nanos == null) { + return null; + } + + return DateTimeUtil.nanosToYears(nanos); + } + } + + @Immutable + static class NanosToMonths implements SerializableFunction { + static final NanosToMonths INSTANCE = new NanosToMonths(); + + @Override + public Integer apply(Long nanos) { + if (nanos == null) { + return null; + } + + return DateTimeUtil.nanosToMonths(nanos); + } + } + + @Immutable + static class NanosToDays implements SerializableFunction { + static final NanosToDays INSTANCE = new NanosToDays(); + + @Override + public Integer apply(Long nanos) { + if (nanos == null) { + return null; + } + + return DateTimeUtil.nanosToDays(nanos); + } + } + + @Immutable + static class NanosToHours implements SerializableFunction { + static final NanosToHours INSTANCE = new NanosToHours(); + + @Override + public Integer apply(Long nanos) { + if (nanos == null) { + return null; + } + + return DateTimeUtil.nanosToHours(nanos); + } + } } diff --git a/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java b/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java index 88f6b64e9218..dd7f97e950e8 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java +++ b/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java @@ -24,6 +24,7 @@ import java.time.LocalTime; import java.time.OffsetDateTime; import java.time.ZoneOffset; +import java.time.temporal.ChronoUnit; import java.util.Base64; import org.apache.iceberg.util.DateTimeUtil; @@ -81,4 +82,10 @@ static String base64encode(ByteBuffer buffer) { // use direct encoding because all of the encoded bytes are in ASCII return StandardCharsets.ISO_8859_1.decode(Base64.getEncoder().encode(buffer)).toString(); } + + static boolean satisfiesOrderOf(ChronoUnit leftGranularity, ChronoUnit rightGranularity) { + // test the granularity, in hours. hour(ts) => 1 hour, day(ts) => 24 hours, and hour satisfies + // the order of day + return leftGranularity.getDuration().toHours() <= rightGranularity.getDuration().toHours(); + } } diff --git a/api/src/main/java/org/apache/iceberg/transforms/Transforms.java b/api/src/main/java/org/apache/iceberg/transforms/Transforms.java index 2dccbf72b12f..aacd4d430069 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Transforms.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Transforms.java @@ -67,6 +67,10 @@ private Transforms() {} return new UnknownTransform<>(transform); } + /** + * @deprecated use {@link #identity()} instead; will be removed in 2.0.0 + */ + @Deprecated public static Transform fromString(Type type, String transform) { Matcher widthMatcher = HAS_WIDTH.matcher(transform); if (widthMatcher.matches()) { @@ -79,24 +83,20 @@ private Transforms() {} } } - if (transform.equalsIgnoreCase("identity")) { - return Identity.get(type); - } - - try { - switch (type.typeId()) { - case TIMESTAMP: - case TIMESTAMP_NANO: - return Timestamps.get(type, transform); - case DATE: - return Dates.valueOf(transform.toUpperCase(Locale.ENGLISH)); - } - } catch (IllegalArgumentException ignored) { - // fall through to return unknown transform - } - - if (transform.equalsIgnoreCase("void")) { - return VoidTransform.get(); + String lowerTransform = transform.toLowerCase(Locale.ENGLISH); + switch (lowerTransform) { + case "identity": + return Identity.get(type); + case "year": + return Years.get().toEnum(type); + case "month": + return Months.get().toEnum(type); + case "day": + return Days.get().toEnum(type); + case "hour": + return Hours.get().toEnum(type); + case "void": + return VoidTransform.get(); } return new UnknownTransform<>(transform); @@ -126,16 +126,7 @@ public static Transform identity(Type type) { @Deprecated @SuppressWarnings("unchecked") public static Transform year(Type type) { - switch (type.typeId()) { - case DATE: - return (Transform) Dates.YEAR; - case TIMESTAMP: - return (Transform) Timestamps.YEAR_FROM_MICROS; - case TIMESTAMP_NANO: - return (Transform) Timestamps.YEAR_FROM_NANOS; - default: - throw new IllegalArgumentException("Cannot partition type " + type + " by year"); - } + return (Transform) Years.get().toEnum(type); } /** @@ -149,16 +140,7 @@ public static Transform year(Type type) { @Deprecated @SuppressWarnings("unchecked") public static Transform month(Type type) { - switch (type.typeId()) { - case DATE: - return (Transform) Dates.MONTH; - case TIMESTAMP: - return (Transform) Timestamps.MONTH_FROM_MICROS; - case TIMESTAMP_NANO: - return (Transform) Timestamps.MONTH_FROM_NANOS; - default: - throw new IllegalArgumentException("Cannot partition type " + type + " by month"); - } + return (Transform) Months.get().toEnum(type); } /** @@ -172,16 +154,7 @@ public static Transform month(Type type) { @Deprecated @SuppressWarnings("unchecked") public static Transform day(Type type) { - switch (type.typeId()) { - case DATE: - return (Transform) Dates.DAY; - case TIMESTAMP: - return (Transform) Timestamps.DAY_FROM_MICROS; - case TIMESTAMP_NANO: - return (Transform) Timestamps.DAY_FROM_NANOS; - default: - throw new IllegalArgumentException("Cannot partition type " + type + " by day"); - } + return (Transform) Days.get().toEnum(type); } /** @@ -195,14 +168,7 @@ public static Transform day(Type type) { @Deprecated @SuppressWarnings("unchecked") public static Transform hour(Type type) { - switch (type.typeId()) { - case TIMESTAMP: - return (Transform) Timestamps.HOUR_FROM_MICROS; - case TIMESTAMP_NANO: - return (Transform) Timestamps.HOUR_FROM_NANOS; - default: - throw new IllegalArgumentException(String.format("Cannot partition type %s by hour", type)); - } + return (Transform) Hours.get().toEnum(type); } /** diff --git a/api/src/main/java/org/apache/iceberg/transforms/Years.java b/api/src/main/java/org/apache/iceberg/transforms/Years.java index d1a7d82ac8bd..2920a37dc692 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Years.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Years.java @@ -19,6 +19,7 @@ package org.apache.iceberg.transforms; import java.io.ObjectStreamException; +import java.time.temporal.ChronoUnit; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; @@ -31,41 +32,19 @@ static Years get() { } @Override - @SuppressWarnings("unchecked") - protected Transform toEnum(Type type) { - switch (type.typeId()) { - case DATE: - return (Transform) Dates.YEAR; - case TIMESTAMP: - return (Transform) Timestamps.YEAR_FROM_MICROS; - case TIMESTAMP_NANO: - return (Transform) Timestamps.YEAR_FROM_NANOS; - default: - throw new IllegalArgumentException("Unsupported type: " + type); - } + protected ChronoUnit granularity() { + return ChronoUnit.YEARS; } @Override - public Type getResultType(Type sourceType) { - return Types.IntegerType.get(); + protected Transform toEnum(Type type) { + return (Transform) + fromSourceType(type, Dates.YEAR, Timestamps.MICROS_TO_YEAR, Timestamps.NANOS_TO_YEAR); } @Override - public boolean satisfiesOrderOf(Transform other) { - if (this == other) { - return true; - } - - if (other instanceof Timestamps) { - // incoming type unit does not matter - return Timestamps.YEAR_FROM_MICROS.satisfiesOrderOf(other); - } else if (other instanceof Dates) { - return Dates.YEAR.satisfiesOrderOf(other); - } else if (other instanceof Years) { - return true; - } - - return false; + public Type getResultType(Type sourceType) { + return Types.IntegerType.get(); } @Override diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestDates.java b/api/src/test/java/org/apache/iceberg/transforms/TestDates.java index b9c380244666..c899b4cfa1cb 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestDates.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestDates.java @@ -26,6 +26,75 @@ import org.junit.jupiter.api.Test; public class TestDates { + @Test + public void testSatisfiesOrderOfDates() { + assertThat(Dates.DAY.satisfiesOrderOf(Dates.DAY)).isTrue(); + assertThat(Dates.DAY.satisfiesOrderOf(Dates.MONTH)).isTrue(); + assertThat(Dates.DAY.satisfiesOrderOf(Dates.YEAR)).isTrue(); + + assertThat(Dates.MONTH.satisfiesOrderOf(Dates.DAY)).isFalse(); + assertThat(Dates.MONTH.satisfiesOrderOf(Dates.MONTH)).isTrue(); + assertThat(Dates.MONTH.satisfiesOrderOf(Dates.YEAR)).isTrue(); + + assertThat(Dates.YEAR.satisfiesOrderOf(Dates.DAY)).isFalse(); + assertThat(Dates.YEAR.satisfiesOrderOf(Dates.MONTH)).isFalse(); + assertThat(Dates.YEAR.satisfiesOrderOf(Dates.YEAR)).isTrue(); + } + + @Test + public void testSatisfiesOrderOfTimestamps() { + assertThat(Dates.DAY.satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isFalse(); + assertThat(Dates.DAY.satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isTrue(); + assertThat(Dates.DAY.satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isTrue(); + assertThat(Dates.DAY.satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + + assertThat(Dates.MONTH.satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isFalse(); + assertThat(Dates.MONTH.satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isFalse(); + assertThat(Dates.MONTH.satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isTrue(); + assertThat(Dates.MONTH.satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + + assertThat(Dates.YEAR.satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isFalse(); + assertThat(Dates.YEAR.satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isFalse(); + assertThat(Dates.YEAR.satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isFalse(); + assertThat(Dates.YEAR.satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + } + + @Test + public void testSatisfiesOrderOfTimestampNanos() { + assertThat(Dates.DAY.satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isFalse(); + assertThat(Dates.DAY.satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isTrue(); + assertThat(Dates.DAY.satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isTrue(); + assertThat(Dates.DAY.satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + + assertThat(Dates.MONTH.satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isFalse(); + assertThat(Dates.MONTH.satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isFalse(); + assertThat(Dates.MONTH.satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isTrue(); + assertThat(Dates.MONTH.satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + + assertThat(Dates.YEAR.satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isFalse(); + assertThat(Dates.YEAR.satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isFalse(); + assertThat(Dates.YEAR.satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isFalse(); + assertThat(Dates.YEAR.satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + } + + @Test + public void testSatisfiesOrderOfTimeTransforms() { + assertThat(Dates.DAY.satisfiesOrderOf(Hours.get())).isFalse(); + assertThat(Dates.DAY.satisfiesOrderOf(Days.get())).isTrue(); + assertThat(Dates.DAY.satisfiesOrderOf(Months.get())).isTrue(); + assertThat(Dates.DAY.satisfiesOrderOf(Years.get())).isTrue(); + + assertThat(Dates.MONTH.satisfiesOrderOf(Hours.get())).isFalse(); + assertThat(Dates.MONTH.satisfiesOrderOf(Days.get())).isFalse(); + assertThat(Dates.MONTH.satisfiesOrderOf(Months.get())).isTrue(); + assertThat(Dates.MONTH.satisfiesOrderOf(Years.get())).isTrue(); + + assertThat(Dates.YEAR.satisfiesOrderOf(Hours.get())).isFalse(); + assertThat(Dates.YEAR.satisfiesOrderOf(Days.get())).isFalse(); + assertThat(Dates.YEAR.satisfiesOrderOf(Months.get())).isFalse(); + assertThat(Dates.YEAR.satisfiesOrderOf(Years.get())).isTrue(); + } + @Test @SuppressWarnings("deprecation") public void testDeprecatedDateTransform() { diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestDays.java b/api/src/test/java/org/apache/iceberg/transforms/TestDays.java deleted file mode 100644 index c090ad62c914..000000000000 --- a/api/src/test/java/org/apache/iceberg/transforms/TestDays.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.transforms; - -import static org.assertj.core.api.Assertions.assertThatThrownBy; - -import org.junit.jupiter.api.Test; - -public class TestDays { - @Test - public void testSatisfiesOrderOf() { - assertThatThrownBy(() -> Days.get().satisfiesOrderOf(Timestamps.DAY_FROM_NANOS)) - .isInstanceOf(UnsupportedOperationException.class) - .hasMessageMatching("Unsupported timestamp unit: DAYS"); - } -} diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestHours.java b/api/src/test/java/org/apache/iceberg/transforms/TestHours.java deleted file mode 100644 index 8e070aec8b9c..000000000000 --- a/api/src/test/java/org/apache/iceberg/transforms/TestHours.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.transforms; - -import static org.assertj.core.api.Assertions.assertThatThrownBy; - -import org.apache.iceberg.types.Types; -import org.junit.jupiter.api.Test; - -public class TestHours { - @Test - public void testToEnum() { - Hours hours = Hours.get(); - Types.DateType type = Types.DateType.get(); - assertThatThrownBy(() -> hours.toEnum(type)) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageMatching("Unsupported type: date"); - } -} diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestTimeTransforms.java b/api/src/test/java/org/apache/iceberg/transforms/TestTimeTransforms.java new file mode 100644 index 000000000000..4093aa3b196f --- /dev/null +++ b/api/src/test/java/org/apache/iceberg/transforms/TestTimeTransforms.java @@ -0,0 +1,128 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. + * + */ + +package org.apache.iceberg.transforms; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import org.apache.iceberg.types.Types; +import org.junit.jupiter.api.Test; + +public class TestTimeTransforms { + @Test + public void testMicrosSatisfiesOrderOfDates() { + assertThat(Hours.get().satisfiesOrderOf(Dates.DAY)).isTrue(); + assertThat(Hours.get().satisfiesOrderOf(Dates.MONTH)).isTrue(); + assertThat(Hours.get().satisfiesOrderOf(Dates.YEAR)).isTrue(); + + assertThat(Days.get().satisfiesOrderOf(Dates.DAY)).isTrue(); + assertThat(Days.get().satisfiesOrderOf(Dates.MONTH)).isTrue(); + assertThat(Days.get().satisfiesOrderOf(Dates.YEAR)).isTrue(); + + assertThat(Months.get().satisfiesOrderOf(Dates.DAY)).isFalse(); + assertThat(Months.get().satisfiesOrderOf(Dates.MONTH)).isTrue(); + assertThat(Months.get().satisfiesOrderOf(Dates.YEAR)).isTrue(); + + assertThat(Years.get().satisfiesOrderOf(Dates.DAY)).isFalse(); + assertThat(Years.get().satisfiesOrderOf(Dates.MONTH)).isFalse(); + assertThat(Years.get().satisfiesOrderOf(Dates.YEAR)).isTrue(); + } + + @Test + public void testMicrosSatisfiesOrderOfTimestamps() { + assertThat(Hours.get().satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isTrue(); + assertThat(Hours.get().satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isTrue(); + assertThat(Hours.get().satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isTrue(); + assertThat(Hours.get().satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + + assertThat(Days.get().satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isFalse(); + assertThat(Days.get().satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isTrue(); + assertThat(Days.get().satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isTrue(); + assertThat(Days.get().satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + + assertThat(Months.get().satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isFalse(); + assertThat(Months.get().satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isFalse(); + assertThat(Months.get().satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isTrue(); + assertThat(Months.get().satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + + assertThat(Years.get().satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isFalse(); + assertThat(Years.get().satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isFalse(); + assertThat(Years.get().satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isFalse(); + assertThat(Years.get().satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + } + + @Test + public void testMicrosSatisfiesOrderOfTimestampNanos() { + assertThat(Hours.get().satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isTrue(); + assertThat(Hours.get().satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isTrue(); + assertThat(Hours.get().satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isTrue(); + assertThat(Hours.get().satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + + assertThat(Days.get().satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isFalse(); + assertThat(Days.get().satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isTrue(); + assertThat(Days.get().satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isTrue(); + assertThat(Days.get().satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + + assertThat(Months.get().satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isFalse(); + assertThat(Months.get().satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isFalse(); + assertThat(Months.get().satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isTrue(); + assertThat(Months.get().satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + + assertThat(Years.get().satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isFalse(); + assertThat(Years.get().satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isFalse(); + assertThat(Years.get().satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isFalse(); + assertThat(Years.get().satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + } + + @Test + public void testMicrosSatisfiesOrderOfTimeTransforms() { + assertThat(Hours.get().satisfiesOrderOf(Hours.get())).isTrue(); + assertThat(Hours.get().satisfiesOrderOf(Days.get())).isTrue(); + assertThat(Hours.get().satisfiesOrderOf(Months.get())).isTrue(); + assertThat(Hours.get().satisfiesOrderOf(Years.get())).isTrue(); + + assertThat(Days.get().satisfiesOrderOf(Hours.get())).isFalse(); + assertThat(Days.get().satisfiesOrderOf(Days.get())).isTrue(); + assertThat(Days.get().satisfiesOrderOf(Months.get())).isTrue(); + assertThat(Days.get().satisfiesOrderOf(Years.get())).isTrue(); + + assertThat(Months.get().satisfiesOrderOf(Hours.get())).isFalse(); + assertThat(Months.get().satisfiesOrderOf(Days.get())).isFalse(); + assertThat(Months.get().satisfiesOrderOf(Months.get())).isTrue(); + assertThat(Months.get().satisfiesOrderOf(Years.get())).isTrue(); + + assertThat(Years.get().satisfiesOrderOf(Hours.get())).isFalse(); + assertThat(Years.get().satisfiesOrderOf(Days.get())).isFalse(); + assertThat(Years.get().satisfiesOrderOf(Months.get())).isFalse(); + assertThat(Years.get().satisfiesOrderOf(Years.get())).isTrue(); + } + + @Test + public void testHoursToEnum() { + Hours hours = Hours.get(); + Types.DateType type = Types.DateType.get(); + assertThatThrownBy(() -> hours.toEnum(type)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageMatching("Unsupported type: date"); + } + +} diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java b/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java index f62307788dd8..52eab49f7414 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java @@ -19,7 +19,6 @@ package org.apache.iceberg.transforms; import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatThrownBy; import org.apache.iceberg.expressions.Literal; import org.apache.iceberg.types.Type; @@ -27,6 +26,182 @@ import org.junit.jupiter.api.Test; public class TestTimestamps { + @Test + public void testMicrosSatisfiesOrderOfDates() { + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Dates.DAY)).isTrue(); + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Dates.MONTH)).isTrue(); + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Dates.YEAR)).isTrue(); + + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Dates.DAY)).isTrue(); + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Dates.MONTH)).isTrue(); + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Dates.YEAR)).isTrue(); + + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Dates.DAY)).isFalse(); + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Dates.MONTH)).isTrue(); + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Dates.YEAR)).isTrue(); + + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Dates.DAY)).isFalse(); + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Dates.MONTH)).isFalse(); + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Dates.YEAR)).isTrue(); + } + + @Test + public void testMicrosSatisfiesOrderOfTimestamps() { + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isTrue(); + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isTrue(); + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isTrue(); + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isFalse(); + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isTrue(); + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isTrue(); + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isFalse(); + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isFalse(); + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isTrue(); + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isFalse(); + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isFalse(); + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isFalse(); + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + } + + @Test + public void testMicrosSatisfiesOrderOfTimestampNanos() { + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isTrue(); + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isTrue(); + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isTrue(); + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isFalse(); + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isTrue(); + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isTrue(); + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isFalse(); + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isFalse(); + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isTrue(); + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isFalse(); + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isFalse(); + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isFalse(); + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + } + + @Test + public void testMicrosSatisfiesOrderOfTimeTransforms() { + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Hours.get())).isTrue(); + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Days.get())).isTrue(); + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Months.get())).isTrue(); + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Years.get())).isTrue(); + + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Hours.get())).isFalse(); + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Days.get())).isTrue(); + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Months.get())).isTrue(); + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Years.get())).isTrue(); + + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Hours.get())).isFalse(); + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Days.get())).isFalse(); + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Months.get())).isTrue(); + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Years.get())).isTrue(); + + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Hours.get())).isFalse(); + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Days.get())).isFalse(); + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Months.get())).isFalse(); + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Years.get())).isTrue(); + } + + @Test + public void testNanosSatisfiesOrderOfDates() { + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Dates.DAY)).isTrue(); + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Dates.MONTH)).isTrue(); + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Dates.YEAR)).isTrue(); + + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Dates.DAY)).isTrue(); + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Dates.MONTH)).isTrue(); + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Dates.YEAR)).isTrue(); + + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Dates.DAY)).isFalse(); + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Dates.MONTH)).isTrue(); + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Dates.YEAR)).isTrue(); + + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Dates.DAY)).isFalse(); + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Dates.MONTH)).isFalse(); + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Dates.YEAR)).isTrue(); + } + + @Test + public void testNanosSatisfiesOrderOfTimestamps() { + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isTrue(); + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isTrue(); + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isTrue(); + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isFalse(); + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isTrue(); + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isTrue(); + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isFalse(); + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isFalse(); + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isTrue(); + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isFalse(); + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isFalse(); + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isFalse(); + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + } + + @Test + public void testNanosSatisfiesOrderOfTimestampNanos() { + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isTrue(); + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isTrue(); + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isTrue(); + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isFalse(); + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isTrue(); + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isTrue(); + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isFalse(); + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isFalse(); + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isTrue(); + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isFalse(); + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isFalse(); + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isFalse(); + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + } + + @Test + public void testNanosSatisfiesOrderOfTimeTransforms() { + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Hours.get())).isTrue(); + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Days.get())).isTrue(); + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Months.get())).isTrue(); + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Years.get())).isTrue(); + + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Hours.get())).isFalse(); + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Days.get())).isTrue(); + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Months.get())).isTrue(); + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Years.get())).isTrue(); + + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Hours.get())).isFalse(); + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Days.get())).isFalse(); + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Months.get())).isTrue(); + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Years.get())).isTrue(); + + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Hours.get())).isFalse(); + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Days.get())).isFalse(); + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Months.get())).isFalse(); + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Years.get())).isTrue(); + } + @Test @SuppressWarnings("deprecation") public void testDeprecatedTimestampTransform() { @@ -458,20 +633,4 @@ public void testTimestampNanosReturnType() { Type hourResultType = hour.getResultType(type); assertThat(hourResultType).isEqualTo(Types.IntegerType.get()); } - - @Test - public void testGetOfTimestampTypeRejectsBadString() { - Types.TimestampType timestampType = Types.TimestampType.withZone(); - assertThatThrownBy(() -> Timestamps.get(timestampType, "trash")) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageMatching("Unsupported transform: trash"); - } - - @Test - public void testGetOfTimestampNanoTypeRejectsBadString() { - Types.TimestampNanoType timestampNanoType = Types.TimestampNanoType.withZone(); - assertThatThrownBy(() -> Timestamps.get(timestampNanoType, "trash")) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageMatching("Unsupported transform: trash"); - } } diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestYears.java b/api/src/test/java/org/apache/iceberg/transforms/TestYears.java deleted file mode 100644 index d4b06b6f2a62..000000000000 --- a/api/src/test/java/org/apache/iceberg/transforms/TestYears.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.transforms; - -import static org.assertj.core.api.Assertions.assertThatThrownBy; - -import org.junit.jupiter.api.Test; - -public class TestYears { - @Test - public void testSatisfiesOrderOf() { - assertThatThrownBy(() -> Years.get().satisfiesOrderOf(Timestamps.YEAR_FROM_NANOS)) - .isInstanceOf(UnsupportedOperationException.class) - .hasMessageMatching("Unsupported timestamp unit: YEARS"); - } -} From 6f25c99a83c9435bfdf5fe5806dd027b2de52dfe Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Sun, 25 Aug 2024 15:40:05 -0700 Subject: [PATCH 29/38] Minor fix to bucket transform. --- .../main/java/org/apache/iceberg/transforms/Bucket.java | 7 ++++--- .../main/java/org/apache/iceberg/util/DateTimeUtil.java | 1 - 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/transforms/Bucket.java b/api/src/main/java/org/apache/iceberg/transforms/Bucket.java index 7e1a089f5b51..0e4e782cc110 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Bucket.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Bucket.java @@ -33,6 +33,7 @@ import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; import org.apache.iceberg.util.BucketUtil; +import org.apache.iceberg.util.DateTimeUtil; import org.apache.iceberg.util.SerializableFunction; class Bucket implements Transform, Serializable { @@ -217,7 +218,7 @@ protected int hash(Long value) { } } - // In order to bucket TimestampNano the same as Timestamp, we divide these values by 1000. + // In order to bucket TimestampNano the same as Timestamp, convert to micros before hashing. private static class BucketTimestampNano extends Bucket implements SerializableFunction { @@ -226,8 +227,8 @@ private BucketTimestampNano(int numBuckets) { } @Override - protected int hash(Long value) { - return BucketUtil.hash(Math.floorDiv(value, 1000)); + protected int hash(Long nanos) { + return BucketUtil.hash(DateTimeUtil.nanosToMicros(nanos)); } } diff --git a/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java b/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java index bc901e492551..3c312486be00 100644 --- a/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java +++ b/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java @@ -27,7 +27,6 @@ import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatterBuilder; import java.time.temporal.ChronoUnit; -import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; public class DateTimeUtil { private DateTimeUtil() {} From f006cb244bc5802a8a8d4410ce57e03e9eb1568f Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Sun, 25 Aug 2024 15:44:36 -0700 Subject: [PATCH 30/38] Fix style --- api/src/main/java/org/apache/iceberg/types/Types.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/types/Types.java b/api/src/main/java/org/apache/iceberg/types/Types.java index f9a683d46052..2352b9b52f13 100644 --- a/api/src/main/java/org/apache/iceberg/types/Types.java +++ b/api/src/main/java/org/apache/iceberg/types/Types.java @@ -301,10 +301,10 @@ public String toString() { public boolean equals(Object other) { if (this == other) { return true; - } - if (!(other instanceof TimestampNanoType)) { + } else if (!(other instanceof TimestampNanoType)) { return false; } + return adjustToUTC == ((TimestampNanoType) other).adjustToUTC; } From 066c9557a939ab804c4acb593865a96291c154dc Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Sun, 25 Aug 2024 15:54:42 -0700 Subject: [PATCH 31/38] Fix typos in TestTimestamps. --- .../org/apache/iceberg/transforms/TestTimestamps.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java b/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java index 52eab49f7414..78b0e67c686b 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java @@ -208,7 +208,7 @@ public void testDeprecatedTimestampTransform() { Types.TimestampType type = Types.TimestampType.withoutZone(); Literal ts = Literal.of("2017-12-01T10:12:55.038194").to(type); Literal pts = Literal.of("1970-01-01T00:00:01.000001").to(type); - Literal nts = Literal.of("1969-12-31T23:59:58.999999").to(type); + Literal nts = Literal.of("1969-12-31T23:59:59.999999").to(type); Transform years = Transforms.year(type); assertThat((int) years.apply(ts.value())).as("Should produce 2017 - 1970 = 47").isEqualTo(47); @@ -241,7 +241,7 @@ public void testDeprecatedTimestampNanoTransform() { Types.TimestampNanoType type = Types.TimestampNanoType.withoutZone(); Literal ts = Literal.of("2017-12-01T10:12:55.038194789").to(type); Literal pts = Literal.of("1970-01-01T00:00:01.000000001").to(type); - Literal nts = Literal.of("1969-12-31T23:59:58.999999999").to(type); + Literal nts = Literal.of("1969-12-31T23:59:59.999999999").to(type); Transform years = Transforms.year(type); assertThat((int) years.apply(ts.value())).as("Should produce 2017 - 1970 = 47").isEqualTo(47); @@ -273,7 +273,7 @@ public void testTimestampTransform() { Types.TimestampType type = Types.TimestampType.withoutZone(); Literal ts = Literal.of("2017-12-01T10:12:55.038194").to(type); Literal pts = Literal.of("1970-01-01T00:00:01.000001").to(type); - Literal nts = Literal.of("1969-12-31T23:59:58.999999").to(type); + Literal nts = Literal.of("1969-12-31T23:59:59.999999").to(type); Transform years = Transforms.year(); assertThat((int) years.bind(type).apply(ts.value())) @@ -317,7 +317,7 @@ public void testTimestampNanoTransform() { Types.TimestampNanoType type = Types.TimestampNanoType.withoutZone(); Literal ts = Literal.of("2017-12-01T10:12:55.038194789").to(type); Literal pts = Literal.of("1970-01-01T00:00:01.000000001").to(type); - Literal nts = Literal.of("1969-12-31T23:59:58.999999999").to(type); + Literal nts = Literal.of("1969-12-31T23:59:59.999999999").to(type); Transform years = Transforms.year(); assertThat((int) years.bind(type).apply(ts.value())) @@ -577,7 +577,7 @@ public void testTimestampNullHumanString() { @Test public void testTimestampNanoNullHumanString() { - Types.TimestampType type = Types.TimestampType.withZone(); + Types.TimestampNanoType type = Types.TimestampNanoType.withZone(); assertThat(Transforms.year().toHumanString(type, null)) .as("Should produce \"null\" for null") .isEqualTo("null"); From 4d7720281f936e751b69e4c724225c76958f766d Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Sun, 25 Aug 2024 16:04:17 -0700 Subject: [PATCH 32/38] Add a comment to clarify conversion test. --- .../test/java/org/apache/iceberg/types/TestConversions.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/api/src/test/java/org/apache/iceberg/types/TestConversions.java b/api/src/test/java/org/apache/iceberg/types/TestConversions.java index 9764b8551313..e207cfd8d59a 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestConversions.java +++ b/api/src/test/java/org/apache/iceberg/types/TestConversions.java @@ -104,10 +104,12 @@ public void testByteBufferConversions() { .isEqualTo(new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); assertThat(Literal.of(400000L).to(TimestampType.withZone()).toByteBuffer().array()) .isEqualTo(new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); + // values passed to assertConversion and Literal.of differ because Literal.of(...) assumes + // the value is in micros, which gets converted when to(TimestampNanoType) is called assertConversion( - 400000L, TimestampNanoType.withoutZone(), new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); + 400000000L, TimestampNanoType.withoutZone(), new byte[] {0, -124, -41, 23, 0, 0, 0, 0}); assertConversion( - 400000L, TimestampNanoType.withZone(), new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); + 400000000L, TimestampNanoType.withZone(), new byte[] {0, -124, -41, 23, 0, 0, 0, 0}); assertThat(Literal.of(400000L).to(TimestampNanoType.withoutZone()).toByteBuffer().array()) .isEqualTo(new byte[] {0, -124, -41, 23, 0, 0, 0, 0}); assertThat(Literal.of(400000L).to(TimestampNanoType.withZone()).toByteBuffer().array()) From 9a3d16f7e828f5503d33b84572d6efffde6b0070 Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Sun, 25 Aug 2024 16:05:57 -0700 Subject: [PATCH 33/38] Split timestamp and timestamp_ns comparator test cases. --- .../test/java/org/apache/iceberg/types/TestComparators.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/api/src/test/java/org/apache/iceberg/types/TestComparators.java b/api/src/test/java/org/apache/iceberg/types/TestComparators.java index 89007480cb0f..07653ba3c8a8 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestComparators.java +++ b/api/src/test/java/org/apache/iceberg/types/TestComparators.java @@ -77,6 +77,10 @@ public void testTime() { public void testTimestamp() { assertComparesCorrectly(Comparators.forType(Types.TimestampType.withoutZone()), 111, 222); assertComparesCorrectly(Comparators.forType(Types.TimestampType.withZone()), 111, 222); + } + + @Test + public void testTimestampNanos() { assertComparesCorrectly(Comparators.forType(Types.TimestampNanoType.withoutZone()), 111, 222); assertComparesCorrectly(Comparators.forType(Types.TimestampNanoType.withZone()), 111, 222); } From 274de5670158597bc1c39f85936e20657bcdeba7 Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Sun, 25 Aug 2024 16:10:36 -0700 Subject: [PATCH 34/38] Fix spec update to specify microsecond hashing. --- format/spec.md | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/format/spec.md b/format/spec.md index 4727c48c6c26..9b447db56cb4 100644 --- a/format/spec.md +++ b/format/spec.md @@ -1050,10 +1050,10 @@ The 32-bit hash implementation is 32-bit Murmur3 hash, x86 variant, seeded with | **`time`** | `hashLong(microsecsFromMidnight(v))` | `22:31:08` → `-662762989` | | **`timestamp`** | `hashLong(microsecsFromUnixEpoch(v))` | `2017-11-16T22:31:08` → `-2047944441`
`2017-11-16T22:31:08.000001` → `-1207196810` | | **`timestamptz`** | `hashLong(microsecsFromUnixEpoch(v))` | `2017-11-16T14:31:08-08:00` → `-2047944441`
`2017-11-16T14:31:08.000001-08:00` → `-1207196810` | -| **`timestamp_ns`** | `hashLong(nanosecsFromUnixEpoch(v))` | `2017-11-16T22:31:08` → `-2047944441`
`2017-11-16T22:31:08.000001001` → `-1207196810` | -| **`timestamptz_ns`** | `hashLong(nanosecsFromUnixEpoch(v))` | `2017-11-16T14:31:08-08:00` → `-2047944441`
`2017-11-16T14:31:08.000001001-08:00` → `-1207196810` | +| **`timestamp_ns`** | `hashLong(microsecsFromUnixEpoch(v))` [3] | `2017-11-16T22:31:08` → `-2047944441`
`2017-11-16T22:31:08.000001001` → `-1207196810` | +| **`timestamptz_ns`** | `hashLong(microsecsFromUnixEpoch(v))` [3]| `2017-11-16T14:31:08-08:00` → `-2047944441`
`2017-11-16T14:31:08.000001001-08:00` → `-1207196810` | | **`string`** | `hashBytes(utf8Bytes(v))` | `iceberg` → `1210000089` | -| **`uuid`** | `hashBytes(uuidBytes(v))` [3] | `f79c3e09-677c-4bbd-a479-3f349cb785e7` → `1488055340` | +| **`uuid`** | `hashBytes(uuidBytes(v))` [4] | `f79c3e09-677c-4bbd-a479-3f349cb785e7` → `1488055340` | | **`fixed(L)`** | `hashBytes(v)` | `00 01 02 03` → `-188683207` | | **`binary`** | `hashBytes(v)` | `00 01 02 03` → `-188683207` | @@ -1062,17 +1062,18 @@ The types below are not currently valid for bucketing, and so are not hashed. Ho | Primitive type | Hash specification | Test value | |--------------------|-------------------------------------------|--------------------------------------------| | **`boolean`** | `false: hashInt(0)`, `true: hashInt(1)` | `true` → `1392991556` | -| **`float`** | `hashLong(doubleToLongBits(double(v))` [4]| `1.0F` → `-142385009`, `0.0F` → `1669671676`, `-0.0F` → `1669671676` | -| **`double`** | `hashLong(doubleToLongBits(v))` [4]| `1.0D` → `-142385009`, `0.0D` → `1669671676`, `-0.0D` → `1669671676` | +| **`float`** | `hashLong(doubleToLongBits(double(v))` [5]| `1.0F` → `-142385009`, `0.0F` → `1669671676`, `-0.0F` → `1669671676` | +| **`double`** | `hashLong(doubleToLongBits(v))` [5]| `1.0D` → `-142385009`, `0.0D` → `1669671676`, `-0.0D` → `1669671676` | Notes: 1. Integer and long hash results must be identical for all integer values. This ensures that schema evolution does not change bucket partition values if integer types are promoted. 2. Decimal values are hashed using the minimum number of bytes required to hold the unscaled value as a two’s complement big-endian; this representation does not include padding bytes required for storage in a fixed-length array. Hash results are not dependent on decimal scale, which is part of the type, not the data value. -3. UUIDs are encoded using big endian. The test UUID for the example above is: `f79c3e09-677c-4bbd-a479-3f349cb785e7`. This UUID encoded as a byte array is: +3. Nanosecond timestamps must be converted to microsecond precision before hashing to ensure timestamps have the same hash value. +4. UUIDs are encoded using big endian. The test UUID for the example above is: `f79c3e09-677c-4bbd-a479-3f349cb785e7`. This UUID encoded as a byte array is: `F7 9C 3E 09 67 7C 4B BD A4 79 3F 34 9C B7 85 E7` -4. `doubleToLongBits` must give the IEEE 754 compliant bit representation of the double value. All `NaN` bit patterns must be canonicalized to `0x7ff8000000000000L`. Negative zero (`-0.0`) must be canonicalized to positive zero (`0.0`). Float hash values are the result of hashing the float cast to double to ensure that schema evolution does not change hash values if float types are promoted. +5. `doubleToLongBits` must give the IEEE 754 compliant bit representation of the double value. All `NaN` bit patterns must be canonicalized to `0x7ff8000000000000L`. Negative zero (`-0.0`) must be canonicalized to positive zero (`0.0`). Float hash values are the result of hashing the float cast to double to ensure that schema evolution does not change hash values if float types are promoted. ## Appendix C: JSON serialization From 0bbd3d690eff7a6efe0ef8892257a106605ed028 Mon Sep 17 00:00:00 2001 From: epgif Date: Mon, 26 Aug 2024 14:20:17 -0500 Subject: [PATCH 35/38] Run :iceberg-api:spotlessApply --- .../apache/iceberg/expressions/Literals.java | 3 +- .../iceberg/transforms/TimeTransform.java | 3 +- .../apache/iceberg/transforms/Transforms.java | 4 +-- .../TestTimestampLiteralConversions.java | 22 ++++++------- .../transforms/TestTimeTransforms.java | 32 ++++++++----------- 5 files changed, 28 insertions(+), 36 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/expressions/Literals.java b/api/src/main/java/org/apache/iceberg/expressions/Literals.java index 36a70ba32edc..ee47035b1e72 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/Literals.java +++ b/api/src/main/java/org/apache/iceberg/expressions/Literals.java @@ -538,7 +538,8 @@ public Literal to(Type type) { case TIMESTAMP_NANO: if (((Types.TimestampNanoType) type).shouldAdjustToUTC()) { - return (Literal) new TimestampNanoLiteral(DateTimeUtil.isoTimestamptzToNanos(value())); + return (Literal) + new TimestampNanoLiteral(DateTimeUtil.isoTimestamptzToNanos(value())); } else { return (Literal) new TimestampNanoLiteral(DateTimeUtil.isoTimestampToNanos(value())); } diff --git a/api/src/main/java/org/apache/iceberg/transforms/TimeTransform.java b/api/src/main/java/org/apache/iceberg/transforms/TimeTransform.java index d28ffb465d70..c348fda52b02 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/TimeTransform.java +++ b/api/src/main/java/org/apache/iceberg/transforms/TimeTransform.java @@ -67,7 +67,8 @@ public boolean satisfiesOrderOf(Transform other) { } else if (other instanceof Timestamps) { return TransformUtil.satisfiesOrderOf(granularity(), ((Timestamps) other).granularity()); } else if (other instanceof TimeTransform) { - return TransformUtil.satisfiesOrderOf(granularity(), ((TimeTransform) other).granularity()); + return TransformUtil.satisfiesOrderOf( + granularity(), ((TimeTransform) other).granularity()); } return false; diff --git a/api/src/main/java/org/apache/iceberg/transforms/Transforms.java b/api/src/main/java/org/apache/iceberg/transforms/Transforms.java index aacd4d430069..11282efdefb1 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Transforms.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Transforms.java @@ -67,9 +67,7 @@ private Transforms() {} return new UnknownTransform<>(transform); } - /** - * @deprecated use {@link #identity()} instead; will be removed in 2.0.0 - */ + /** @deprecated use {@link #identity()} instead; will be removed in 2.0.0 */ @Deprecated public static Transform fromString(Type type, String transform) { Matcher widthMatcher = HAS_WIDTH.matcher(transform); diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestTimestampLiteralConversions.java b/api/src/test/java/org/apache/iceberg/expressions/TestTimestampLiteralConversions.java index 181f5f6eade6..379ad4db5e97 100644 --- a/api/src/test/java/org/apache/iceberg/expressions/TestTimestampLiteralConversions.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestTimestampLiteralConversions.java @@ -163,8 +163,7 @@ public void testTimestampNanosWithZoneConversion() { Literal isoTimestampNanosWithZoneOffset = Literal.of("2017-11-16T14:31:08.000000001+00:00"); - assertThatThrownBy( - () -> isoTimestampNanosWithZoneOffset.to(Types.TimestampType.withoutZone())) + assertThatThrownBy(() -> isoTimestampNanosWithZoneOffset.to(Types.TimestampType.withoutZone())) .as("Should not convert timestamp with offset to a timestamp without zone") .isInstanceOf(DateTimeParseException.class); @@ -180,19 +179,17 @@ public void testTimestampNanosWithZoneConversion() { .isEqualTo(1510842668000000001L); } - @Test public void testTimestampMicrosWithZoneConversion() { Literal isoTimestampMicrosWithZoneOffset = Literal.of("2017-11-16T14:31:08.000001+00:00"); - assertThatThrownBy( - () -> isoTimestampMicrosWithZoneOffset.to(Types.TimestampType.withoutZone())) + assertThatThrownBy(() -> isoTimestampMicrosWithZoneOffset.to(Types.TimestampType.withoutZone())) .as("Should not convert timestamp with offset to a timestamp without zone") .isInstanceOf(DateTimeParseException.class); assertThatThrownBy( - () -> isoTimestampMicrosWithZoneOffset.to(Types.TimestampNanoType.withoutZone())) + () -> isoTimestampMicrosWithZoneOffset.to(Types.TimestampNanoType.withoutZone())) .as("Should not convert timestamp with offset to a timestamp without zone") .isInstanceOf(DateTimeParseException.class); @@ -208,13 +205,12 @@ public void testTimestampNanosWithoutZoneConversion() { Literal isoTimestampNanosWithoutZoneOffset = Literal.of("2017-11-16T14:31:08.000000001"); - assertThatThrownBy( - () -> isoTimestampNanosWithoutZoneOffset.to(Types.TimestampType.withZone())) + assertThatThrownBy(() -> isoTimestampNanosWithoutZoneOffset.to(Types.TimestampType.withZone())) .as("Should not convert timestamp without offset to a timestamp with zone") .isInstanceOf(DateTimeParseException.class); assertThatThrownBy( - () -> isoTimestampNanosWithoutZoneOffset.to(Types.TimestampNanoType.withZone())) + () -> isoTimestampNanosWithoutZoneOffset.to(Types.TimestampNanoType.withZone())) .as("Should not convert timestamp without offset to a timestamp with zone") .isInstanceOf(DateTimeParseException.class); @@ -230,20 +226,20 @@ public void testTimestampMicrosWithoutZoneConversion() { Literal isoTimestampMicrosWithoutZoneOffset = Literal.of("2017-11-16T14:31:08.000001"); - assertThatThrownBy( - () -> isoTimestampMicrosWithoutZoneOffset.to(Types.TimestampType.withZone())) + assertThatThrownBy(() -> isoTimestampMicrosWithoutZoneOffset.to(Types.TimestampType.withZone())) .as("Should not convert timestamp without offset to a timestamp with zone") .isInstanceOf(DateTimeParseException.class); assertThatThrownBy( - () -> isoTimestampMicrosWithoutZoneOffset.to(Types.TimestampNanoType.withZone())) + () -> isoTimestampMicrosWithoutZoneOffset.to(Types.TimestampNanoType.withZone())) .as("Should not convert timestamp without offset to a timestamp with zone") .isInstanceOf(DateTimeParseException.class); assertThat(isoTimestampMicrosWithoutZoneOffset.to(Types.TimestampType.withoutZone()).value()) .isEqualTo(1510842668000001L); - assertThat(isoTimestampMicrosWithoutZoneOffset.to(Types.TimestampNanoType.withoutZone()).value()) + assertThat( + isoTimestampMicrosWithoutZoneOffset.to(Types.TimestampNanoType.withoutZone()).value()) .isEqualTo(1510842668000001000L); } } diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestTimeTransforms.java b/api/src/test/java/org/apache/iceberg/transforms/TestTimeTransforms.java index 4093aa3b196f..2a161f9bc822 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestTimeTransforms.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestTimeTransforms.java @@ -1,24 +1,21 @@ /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * * Licensed to the Apache Software Foundation (ASF) under one - * * or more contributor license agreements. See the NOTICE file - * * distributed with this work for additional information - * * regarding copyright ownership. The ASF licenses this file - * * to you under the Apache License, Version 2.0 (the - * * "License"); you may not use this file except in compliance - * * with the License. You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, - * * software distributed under the License is distributed on an - * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * * KIND, either express or implied. See the License for the - * * specific language governing permissions and limitations - * * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ - package org.apache.iceberg.transforms; import static org.assertj.core.api.Assertions.assertThat; @@ -124,5 +121,4 @@ public void testHoursToEnum() { .isInstanceOf(IllegalArgumentException.class) .hasMessageMatching("Unsupported type: date"); } - } From 58f11a32cc77472eeca02d90b38de0a9f706b38f Mon Sep 17 00:00:00 2001 From: epgif Date: Fri, 30 Aug 2024 08:13:43 -0500 Subject: [PATCH 36/38] fix testTimestampWithZoneHumanString --- .../test/java/org/apache/iceberg/transforms/TestIdentity.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestIdentity.java b/api/src/test/java/org/apache/iceberg/transforms/TestIdentity.java index 6101fdf0986d..93d3281411f3 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestIdentity.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestIdentity.java @@ -106,7 +106,7 @@ public void testTimestampWithZoneHumanString() { // value will always be in UTC assertThat(identity.toHumanString(timestamptz, ts.value())) .as("Should produce timestamp with time zone adjusted to UTC") - .isEqualTo("2017-12-01T18:12:55.038194Z"); + .isEqualTo("2017-12-01T18:12:55.038194+00:00"); } @Test From 8ea5777c5d6a0a6c5ee1c386e9d2cd7f6e145f8c Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Mon, 2 Sep 2024 10:08:58 -0700 Subject: [PATCH 37/38] Prevent creating table metadata with nanosecond timestamps before v3. --- .../main/java/org/apache/iceberg/Schema.java | 25 +++++++++ .../org/apache/iceberg/TableMetadata.java | 4 +- .../org/apache/iceberg/TestTableMetadata.java | 51 +++++++++++++++++++ .../TableMetadataUnsupportedVersion.json | 4 +- 4 files changed, 81 insertions(+), 3 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/Schema.java b/api/src/main/java/org/apache/iceberg/Schema.java index d5ec3f250982..c2c1dd85306f 100644 --- a/api/src/main/java/org/apache/iceberg/Schema.java +++ b/api/src/main/java/org/apache/iceberg/Schema.java @@ -54,6 +54,8 @@ public class Schema implements Serializable { private static final Joiner NEWLINE = Joiner.on('\n'); private static final String ALL_COLUMNS = "*"; private static final int DEFAULT_SCHEMA_ID = 0; + private static final Map MIN_FORMAT_VERSIONS = + ImmutableMap.of(Type.TypeID.TIMESTAMP_NANO, 3); private final StructType struct; private final int schemaId; @@ -573,4 +575,27 @@ private List reassignIds(List columns, TypeUtil.GetID }); return res.asStructType().fields(); } + + /** + * Check the compatibility of the schema with a format version. + * + *

This validates that the schema does not contain types that were released in later format + * versions. + * + * @param schema a Schema + * @param formatVersion table format version + */ + public static void checkCompatibility(Schema schema, int formatVersion) { + // check the type in each field + for (NestedField field : schema.lazyIdToField().values()) { + Integer minFormatVersion = MIN_FORMAT_VERSIONS.get(field.type().typeId()); + Preconditions.checkState( + minFormatVersion == null || formatVersion >= minFormatVersion, + "Invalid type in v%s schema: %s %s is not supported until v%s", + formatVersion, + schema.findColumnName(field.fieldId()), + field.type(), + minFormatVersion); + } + } } diff --git a/core/src/main/java/org/apache/iceberg/TableMetadata.java b/core/src/main/java/org/apache/iceberg/TableMetadata.java index a7edddecad52..12336fd99879 100644 --- a/core/src/main/java/org/apache/iceberg/TableMetadata.java +++ b/core/src/main/java/org/apache/iceberg/TableMetadata.java @@ -51,7 +51,7 @@ public class TableMetadata implements Serializable { static final long INITIAL_SEQUENCE_NUMBER = 0; static final long INVALID_SEQUENCE_NUMBER = -1; static final int DEFAULT_TABLE_FORMAT_VERSION = 2; - static final int SUPPORTED_TABLE_FORMAT_VERSION = 2; + static final int SUPPORTED_TABLE_FORMAT_VERSION = 3; static final int INITIAL_SPEC_ID = 0; static final int INITIAL_SORT_ORDER_ID = 1; static final int INITIAL_SCHEMA_ID = 0; @@ -1489,6 +1489,8 @@ private int addSchemaInternal(Schema schema, int newLastColumnId) { newLastColumnId, lastColumnId); + Schema.checkCompatibility(schema, formatVersion); + int newSchemaId = reuseOrCreateNewSchemaId(schema); boolean schemaFound = schemasById.containsKey(newSchemaId); if (schemaFound && newLastColumnId == lastColumnId) { diff --git a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java index 8075372d09c1..b4a9124baede 100644 --- a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java +++ b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java @@ -61,6 +61,7 @@ import org.apache.iceberg.transforms.Transforms; import org.apache.iceberg.types.Types; import org.apache.iceberg.util.JsonUtil; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; @@ -1627,4 +1628,54 @@ public void buildReplacementKeepsSnapshotLog() throws Exception { .hasSize(2) .containsExactlyElementsOf(metadata.snapshotLog()); } + + @Test + public void testConstructV3Metadata() { + TableMetadata.newTableMetadata( + TEST_SCHEMA, + PartitionSpec.unpartitioned(), + SortOrder.unsorted(), + TEST_LOCATION, + ImmutableMap.of(), + 3); + } + + @Test + public void testV3TimestampNanoTypeSupport() { + Schema v3Schema = + new Schema( + Types.NestedField.required(3, "id", Types.LongType.get()), + Types.NestedField.required(4, "data", Types.StringType.get()), + Types.NestedField.required( + 5, + "struct", + Types.StructType.of( + Types.NestedField.optional( + 6, "ts_nanos", Types.TimestampNanoType.withZone())))); + + for (int unsupportedFormatVersion : ImmutableList.of(1, 2)) { + Assertions.assertThrows( + IllegalStateException.class, + () -> + TableMetadata.newTableMetadata( + v3Schema, + PartitionSpec.unpartitioned(), + SortOrder.unsorted(), + TEST_LOCATION, + ImmutableMap.of(), + unsupportedFormatVersion), + String.format( + "Invalid type in v%s schema: struct.ts_nanos timestamptz_ns is not supported until v3", + unsupportedFormatVersion)); + } + + // should be allowed in v3 + TableMetadata.newTableMetadata( + v3Schema, + PartitionSpec.unpartitioned(), + SortOrder.unsorted(), + TEST_LOCATION, + ImmutableMap.of(), + 3); + } } diff --git a/core/src/test/resources/TableMetadataUnsupportedVersion.json b/core/src/test/resources/TableMetadataUnsupportedVersion.json index 0633a71d24d5..730e50ab1215 100644 --- a/core/src/test/resources/TableMetadataUnsupportedVersion.json +++ b/core/src/test/resources/TableMetadataUnsupportedVersion.json @@ -1,5 +1,5 @@ { - "format-version": 3, + "format-version": 10, "table-uuid": "d20125c8-7284-442c-9aea-15fee620737c", "location": "s3://bucket/test/location", "last-updated-ms": 1602638573874, @@ -33,4 +33,4 @@ "properties": {}, "current-snapshot-id": -1, "snapshots": [] -} \ No newline at end of file +} From 47d4b64e4930412188bc2a4adce3a662101c5cc0 Mon Sep 17 00:00:00 2001 From: Jacob Marble Date: Tue, 3 Sep 2024 07:41:28 -0700 Subject: [PATCH 38/38] fix merge conflict --- core/src/test/resources/TableMetadataUnsupportedVersion.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/test/resources/TableMetadataUnsupportedVersion.json b/core/src/test/resources/TableMetadataUnsupportedVersion.json index 730e50ab1215..c40a0c9cd5ae 100644 --- a/core/src/test/resources/TableMetadataUnsupportedVersion.json +++ b/core/src/test/resources/TableMetadataUnsupportedVersion.json @@ -1,5 +1,5 @@ { - "format-version": 10, + "format-version": 4, "table-uuid": "d20125c8-7284-442c-9aea-15fee620737c", "location": "s3://bucket/test/location", "last-updated-ms": 1602638573874,