From 20e120b2a71e7da7ecc5ed9b32c7b5ea9558fa00 Mon Sep 17 00:00:00 2001 From: Curt Hagenlocher Date: Mon, 16 Oct 2023 09:24:23 -0700 Subject: [PATCH] GH-38061: [C#] Implement Duration support (#38062) ### What changes are included in this PR? Complete support for the Duration array type in the C# implementation. ### Are these changes tested? Yes. ### Are there any user-facing changes? The Duration array type is now supported in the C# library. This also does some slight refactoring of classes which could impact edge cases of user scenarios. * Closes: #38061 --- .../Arrays/ArrayDataTypeComparer.cs | 18 +-- .../Arrays/ArrowArrayBuilderFactory.cs | 2 + .../Apache.Arrow/Arrays/ArrowArrayFactory.cs | 2 + .../src/Apache.Arrow/Arrays/DurationArray.cs | 84 +++++++++++ csharp/src/Apache.Arrow/Arrays/Time64Array.cs | 19 +-- .../Apache.Arrow/C/CArrowSchemaExporter.cs | 3 + .../Apache.Arrow/C/CArrowSchemaImporter.cs | 5 +- .../src/Apache.Arrow/Ipc/ArrowStreamWriter.cs | 2 + .../Ipc/ArrowTypeFlatbufferBuilder.cs | 8 ++ .../src/Apache.Arrow/Ipc/MessageSerializer.cs | 3 + .../src/Apache.Arrow/RecordBatch.Builder.cs | 2 + csharp/src/Apache.Arrow/Types/DurationType.cs | 42 ++++++ csharp/src/Apache.Arrow/Types/IArrowType.cs | 1 + .../src/Apache.Arrow/Types/TimeBasedType.cs | 27 ++++ csharp/src/Apache.Arrow/Types/TimeType.cs | 15 +- csharp/src/Apache.Arrow/Types/TimeUnit.cs | 58 ++++++++ .../src/Apache.Arrow/Types/TimestampType.cs | 7 +- .../Apache.Arrow.IntegrationTest/JsonFile.cs | 15 ++ .../Apache.Arrow.Tests/ArrayTypeComparer.cs | 18 +-- .../ArrowArrayConcatenatorTests.cs | 28 ++++ .../Apache.Arrow.Tests/ArrowArrayTests.cs | 2 + .../Apache.Arrow.Tests/ArrowReaderVerifier.cs | 2 + .../CDataInterfacePythonTests.cs | 16 ++- .../Apache.Arrow.Tests/DurationArrayTests.cs | 134 ++++++++++++++++++ csharp/test/Apache.Arrow.Tests/TestData.cs | 13 ++ dev/archery/archery/integration/datagen.py | 3 +- docs/source/status.rst | 2 +- 27 files changed, 465 insertions(+), 66 deletions(-) create mode 100644 csharp/src/Apache.Arrow/Arrays/DurationArray.cs create mode 100644 csharp/src/Apache.Arrow/Types/DurationType.cs create mode 100644 csharp/src/Apache.Arrow/Types/TimeBasedType.cs create mode 100644 csharp/src/Apache.Arrow/Types/TimeUnit.cs create mode 100644 csharp/test/Apache.Arrow.Tests/DurationArrayTests.cs diff --git a/csharp/src/Apache.Arrow/Arrays/ArrayDataTypeComparer.cs b/csharp/src/Apache.Arrow/Arrays/ArrayDataTypeComparer.cs index 1698e0672fb60..1abaa7f043b64 100644 --- a/csharp/src/Apache.Arrow/Arrays/ArrayDataTypeComparer.cs +++ b/csharp/src/Apache.Arrow/Arrays/ArrayDataTypeComparer.cs @@ -13,7 +13,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -using System; using Apache.Arrow.Types; namespace Apache.Arrow @@ -22,8 +21,7 @@ internal sealed class ArrayDataTypeComparer : IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, - IArrowTypeVisitor, - IArrowTypeVisitor, + IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, @@ -69,18 +67,10 @@ public void Visit(Date64Type actualType) } } - public void Visit(Time32Type actualType) + public void Visit(TimeBasedType actualType) { - if (_expectedType is Time32Type expectedType - && expectedType.Unit == actualType.Unit) - { - _dataTypeMatch = true; - } - } - - public void Visit(Time64Type actualType) - { - if (_expectedType is Time64Type expectedType + if (_expectedType.TypeId == actualType.TypeId + && _expectedType is TimeBasedType expectedType && expectedType.Unit == actualType.Unit) { _dataTypeMatch = true; diff --git a/csharp/src/Apache.Arrow/Arrays/ArrowArrayBuilderFactory.cs b/csharp/src/Apache.Arrow/Arrays/ArrowArrayBuilderFactory.cs index 1b972d01874ca..af5a524798396 100644 --- a/csharp/src/Apache.Arrow/Arrays/ArrowArrayBuilderFactory.cs +++ b/csharp/src/Apache.Arrow/Arrays/ArrowArrayBuilderFactory.cs @@ -66,6 +66,8 @@ internal static IArrowArrayBuilder> return new Time32Array.Builder(dataType as Time32Type); case ArrowTypeId.Time64: return new Time64Array.Builder(dataType as Time64Type); + case ArrowTypeId.Duration: + return new DurationArray.Builder(dataType as DurationType); case ArrowTypeId.List: return new ListArray.Builder(dataType as ListType); case ArrowTypeId.FixedSizeList: diff --git a/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs b/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs index d3b7d65185abe..0520513334db3 100644 --- a/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs +++ b/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs @@ -73,6 +73,8 @@ public static IArrowArray BuildArray(ArrayData data) return new Time32Array(data); case ArrowTypeId.Time64: return new Time64Array(data); + case ArrowTypeId.Duration: + return new DurationArray(data); case ArrowTypeId.Decimal128: return new Decimal128Array(data); case ArrowTypeId.Decimal256: diff --git a/csharp/src/Apache.Arrow/Arrays/DurationArray.cs b/csharp/src/Apache.Arrow/Arrays/DurationArray.cs new file mode 100644 index 0000000000000..3649dda50cd97 --- /dev/null +++ b/csharp/src/Apache.Arrow/Arrays/DurationArray.cs @@ -0,0 +1,84 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using Apache.Arrow.Types; + +namespace Apache.Arrow +{ + public class DurationArray : PrimitiveArray + { + public class Builder : PrimitiveArrayBuilder + { + public DurationType DataType { get; } + + public Builder(DurationType dataType) + { + DataType = dataType; + } + + protected override DurationArray Build( + ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, + int length, int nullCount, int offset) => + new DurationArray(DataType, valueBuffer, nullBitmapBuffer, length, nullCount, offset); + + /// + /// Append a duration in the form of a object to the array. + /// + /// TimeSpan to add. + /// Returns the builder (for fluent-style composition). + public Builder Append(TimeSpan value) + { + Append(DataType.Unit.ConvertFromTicks(value.Ticks)); + return this; + } + + /// + /// Append a duration in the form of a object to the array. + /// + /// TimeSpan to add. + /// Returns the builder (for fluent-style composition). + public Builder Append(TimeSpan? value) => + (value == null) ? AppendNull() : Append(value.Value); + } + + public DurationArray( + DurationType type, + ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, + int length, int nullCount, int offset) + : this(new ArrayData(type, length, nullCount, offset, + new[] { nullBitmapBuffer, valueBuffer })) + { } + + public DurationArray(ArrayData data) + : base(data) + { + data.EnsureDataType(ArrowTypeId.Duration); + } + + public DurationType DataType => (DurationType)this.Data.DataType; + + public TimeSpan? GetTimeSpan(int index) + { + if (index < 0 || index >= Length) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + return IsValid(index) ? new TimeSpan(DataType.Unit.ConvertToTicks(Values[index])) : null; + } + + public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor); + } +} diff --git a/csharp/src/Apache.Arrow/Arrays/Time64Array.cs b/csharp/src/Apache.Arrow/Arrays/Time64Array.cs index 9fc2ae4be1563..3369893304414 100644 --- a/csharp/src/Apache.Arrow/Arrays/Time64Array.cs +++ b/csharp/src/Apache.Arrow/Arrays/Time64Array.cs @@ -25,9 +25,6 @@ namespace Apache.Arrow /// public class Time64Array : PrimitiveArray { - private const long TicksPerMicrosecond = 10; - private const long NanosecondsPerTick = 100; - /// /// The class can be used to fluently build objects. /// @@ -62,13 +59,7 @@ public Builder(Time64Type type) #if NET6_0_OR_GREATER protected override long Convert(TimeOnly time) { - var unit = ((TimeBuilder)InnerBuilder).DataType.Unit; - return unit switch - { - TimeUnit.Microsecond => (long)(time.Ticks / TicksPerMicrosecond), - TimeUnit.Nanosecond => (long)(time.Ticks * NanosecondsPerTick), - _ => throw new InvalidDataException($"Unsupported time unit for Time32Type: {unit}") - }; + return ((TimeBuilder)InnerBuilder).DataType.Unit.ConvertFromTicks(time.Ticks); } #endif } @@ -153,13 +144,7 @@ public Time64Array(ArrayData data) return null; } - var unit = ((Time64Type)Data.DataType).Unit; - return unit switch - { - TimeUnit.Microsecond => new TimeOnly(value.Value * TicksPerMicrosecond), - TimeUnit.Nanosecond => new TimeOnly(value.Value / NanosecondsPerTick), - _ => throw new InvalidDataException($"Unsupported time unit for Time64Type: {unit}") - }; + return new TimeOnly(((Time64Type)Data.DataType).Unit.ConvertToTicks(value.Value)); } #endif } diff --git a/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs b/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs index f3479903889d1..d805e9afc4c8b 100644 --- a/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs +++ b/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs @@ -179,6 +179,9 @@ private static string GetFormat(IArrowType datatype) case Time64Type timeType: // Same prefix as Time32, but allowed time units are different. return String.Format("tt{0}", FormatTimeUnit(timeType.Unit)); + // Duration + case DurationType durationType: + return String.Format("tD{0}", FormatTimeUnit(durationType.Unit)); // Timestamp case TimestampType timestampType: return String.Format("ts{0}:{1}", FormatTimeUnit(timestampType.Unit), timestampType.Timezone); diff --git a/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs b/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs index f2a08f5e4a40b..12545c9831a04 100644 --- a/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs +++ b/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs @@ -313,7 +313,10 @@ public ArrowType GetAsType() "ttm" => TimeType.Millisecond, "ttu" => TimeType.Microsecond, "ttn" => TimeType.Nanosecond, - // TODO: duration not yet implemented + "tDs" => DurationType.Second, + "tDm" => DurationType.Millisecond, + "tDu" => DurationType.Microsecond, + "tDn" => DurationType.Nanosecond, "tiM" => IntervalType.YearMonth, "tiD" => IntervalType.DayTime, //"tin" => IntervalType.MonthDayNanosecond, // Not yet implemented diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs index 2b3815af71142..dcb8852bc1f65 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs @@ -49,6 +49,7 @@ internal class ArrowRecordBatchFlatBufferBuilder : IArrowArrayVisitor, IArrowArrayVisitor, IArrowArrayVisitor, + IArrowArrayVisitor, IArrowArrayVisitor, IArrowArrayVisitor, IArrowArrayVisitor, @@ -104,6 +105,7 @@ public ArrowRecordBatchFlatBufferBuilder() public void Visit(Date64Array array) => CreateBuffers(array); public void Visit(Time32Array array) => CreateBuffers(array); public void Visit(Time64Array array) => CreateBuffers(array); + public void Visit(DurationArray array) => CreateBuffers(array); public void Visit(ListArray array) { diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs b/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs index 425d9326addfe..1397eb3e00f3c 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs @@ -57,6 +57,7 @@ class TypeVisitor : IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, + IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, @@ -188,6 +189,13 @@ public void Visit(Time64Type type) Flatbuf.Time.CreateTime(Builder, ToFlatBuffer(type.Unit), 64)); } + public void Visit(DurationType type) + { + Result = FieldType.Build( + Flatbuf.Type.Duration, + Flatbuf.Duration.CreateDuration(Builder, ToFlatBuffer(type.Unit))); + } + public void Visit(StructType type) { Flatbuf.Struct_.StartStruct_(Builder); diff --git a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs index 9847c376cf82e..3f504cf3b975a 100644 --- a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs +++ b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs @@ -177,6 +177,9 @@ private static Types.IArrowType GetFieldArrowType(Flatbuf.Field field, Field[] c Types.TimeUnit unit = timestampTypeMetadata.Unit.ToArrow(); string timezone = timestampTypeMetadata.Timezone; return new Types.TimestampType(unit, timezone); + case Flatbuf.Type.Duration: + Flatbuf.Duration durationMeta = field.Type().Value; + return DurationType.FromTimeUnit(durationMeta.Unit.ToArrow()); case Flatbuf.Type.Interval: Flatbuf.Interval intervalMetadata = field.Type().Value; return Types.IntervalType.FromIntervalUnit(intervalMetadata.Unit.ToArrow()); diff --git a/csharp/src/Apache.Arrow/RecordBatch.Builder.cs b/csharp/src/Apache.Arrow/RecordBatch.Builder.cs index b5d5ec9ea0bbf..8e0d17ae06f49 100644 --- a/csharp/src/Apache.Arrow/RecordBatch.Builder.cs +++ b/csharp/src/Apache.Arrow/RecordBatch.Builder.cs @@ -63,6 +63,8 @@ public Time32Array Time32(Time32Type type, Action action) = public Time64Array Time64(Time64Type type, Action action) => Build( new Time64Array.Builder(type), action); + public DurationArray Duration(DurationType type, Action action) => + Build(new DurationArray.Builder(type), action); public BinaryArray Binary(Action action) => Build(new BinaryArray.Builder(), action); public StringArray String(Action action) => Build(new StringArray.Builder(), action); public TimestampArray Timestamp(Action action) => Build(new TimestampArray.Builder(), action); diff --git a/csharp/src/Apache.Arrow/Types/DurationType.cs b/csharp/src/Apache.Arrow/Types/DurationType.cs new file mode 100644 index 0000000000000..7e937a6e72e0b --- /dev/null +++ b/csharp/src/Apache.Arrow/Types/DurationType.cs @@ -0,0 +1,42 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +namespace Apache.Arrow.Types +{ + public sealed class DurationType : TimeBasedType + { + public static readonly DurationType Second = new DurationType(TimeUnit.Second); + public static readonly DurationType Millisecond = new DurationType(TimeUnit.Millisecond); + public static readonly DurationType Microsecond = new DurationType(TimeUnit.Microsecond); + public static readonly DurationType Nanosecond = new DurationType(TimeUnit.Nanosecond); + private static readonly DurationType[] _types = new DurationType[] { Second, Millisecond, Microsecond, Nanosecond }; + + private DurationType(TimeUnit unit) + : base(unit) + { + } + + public override ArrowTypeId TypeId => ArrowTypeId.Duration; + public override string Name => "duration"; + public override int BitWidth => 64; + + public static DurationType FromTimeUnit(TimeUnit unit) + { + return _types[(int)unit]; + } + + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); + } +} diff --git a/csharp/src/Apache.Arrow/Types/IArrowType.cs b/csharp/src/Apache.Arrow/Types/IArrowType.cs index 447db47329829..cdf423e56f7a8 100644 --- a/csharp/src/Apache.Arrow/Types/IArrowType.cs +++ b/csharp/src/Apache.Arrow/Types/IArrowType.cs @@ -48,6 +48,7 @@ public enum ArrowTypeId Dictionary, Map, FixedSizeList, + Duration, } public interface IArrowType diff --git a/csharp/src/Apache.Arrow/Types/TimeBasedType.cs b/csharp/src/Apache.Arrow/Types/TimeBasedType.cs new file mode 100644 index 0000000000000..ffcd3f5a7ab63 --- /dev/null +++ b/csharp/src/Apache.Arrow/Types/TimeBasedType.cs @@ -0,0 +1,27 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +namespace Apache.Arrow.Types +{ + public abstract class TimeBasedType : FixedWidthType + { + public TimeUnit Unit { get; } + + protected TimeBasedType(TimeUnit unit) + { + Unit = unit; + } + } +} diff --git a/csharp/src/Apache.Arrow/Types/TimeType.cs b/csharp/src/Apache.Arrow/Types/TimeType.cs index 48c7fdb5f1bea..b317df265d56f 100644 --- a/csharp/src/Apache.Arrow/Types/TimeType.cs +++ b/csharp/src/Apache.Arrow/Types/TimeType.cs @@ -13,18 +13,9 @@ // See the License for the specific language governing permissions and // limitations under the License. - namespace Apache.Arrow.Types { - public enum TimeUnit - { - Second, - Millisecond, - Microsecond, - Nanosecond - } - - public abstract class TimeType: FixedWidthType + public abstract class TimeType : TimeBasedType { public static readonly Time32Type Second = new Time32Type(TimeUnit.Second); public static readonly Time32Type Millisecond = new Time32Type(TimeUnit.Millisecond); @@ -32,11 +23,9 @@ public abstract class TimeType: FixedWidthType public static readonly Time64Type Nanosecond = new Time64Type(TimeUnit.Nanosecond); private static readonly TimeType[] _types = new TimeType[] { Second, Millisecond, Microsecond, Nanosecond }; - public TimeUnit Unit { get; } - protected TimeType(TimeUnit unit) + : base(unit) { - Unit = unit; } public static TimeType FromTimeUnit(TimeUnit unit) diff --git a/csharp/src/Apache.Arrow/Types/TimeUnit.cs b/csharp/src/Apache.Arrow/Types/TimeUnit.cs new file mode 100644 index 0000000000000..ba60fa9bb8371 --- /dev/null +++ b/csharp/src/Apache.Arrow/Types/TimeUnit.cs @@ -0,0 +1,58 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System.IO; +using System; + +namespace Apache.Arrow.Types +{ + public enum TimeUnit + { + Second, + Millisecond, + Microsecond, + Nanosecond + } + + internal static class TimeUnitExtensions + { + private const long TicksPerMicrosecond = 10; + private const long NanosecondsPerTick = 100; + + public static long ConvertFromTicks(this TimeUnit unit, long ticks) + { + return unit switch + { + TimeUnit.Second => ticks / TimeSpan.TicksPerSecond, + TimeUnit.Millisecond => ticks / TimeSpan.TicksPerMillisecond, + TimeUnit.Microsecond => ticks / TicksPerMicrosecond, + TimeUnit.Nanosecond => ticks * NanosecondsPerTick, + _ => throw new InvalidDataException($"Unsupported time unit: {unit}") + }; + } + + public static long ConvertToTicks(this TimeUnit unit, long units) + { + return unit switch + { + TimeUnit.Second => units * TimeSpan.TicksPerSecond, + TimeUnit.Millisecond => units * TimeSpan.TicksPerMillisecond, + TimeUnit.Microsecond => units * TicksPerMicrosecond, + TimeUnit.Nanosecond => units / NanosecondsPerTick, + _ => throw new InvalidDataException($"Unsupported time unit: {unit}") + }; + } + } +} diff --git a/csharp/src/Apache.Arrow/Types/TimestampType.cs b/csharp/src/Apache.Arrow/Types/TimestampType.cs index 66b6ca60971cb..565eb62bb799b 100644 --- a/csharp/src/Apache.Arrow/Types/TimestampType.cs +++ b/csharp/src/Apache.Arrow/Types/TimestampType.cs @@ -18,7 +18,7 @@ namespace Apache.Arrow.Types { - public sealed class TimestampType : FixedWidthType + public sealed class TimestampType : TimeBasedType { public static readonly TimestampType Default = new TimestampType(TimeUnit.Millisecond, "+00:00"); @@ -26,7 +26,6 @@ public sealed class TimestampType : FixedWidthType public override string Name => "timestamp"; public override int BitWidth => 64; - public TimeUnit Unit { get; } public string Timezone { get; } public bool IsTimeZoneAware => !string.IsNullOrWhiteSpace(Timezone); @@ -34,16 +33,16 @@ public sealed class TimestampType : FixedWidthType public TimestampType( TimeUnit unit = TimeUnit.Millisecond, string timezone = default) + : base(unit) { - Unit = unit; Timezone = timezone; } public TimestampType( TimeUnit unit = TimeUnit.Millisecond, TimeZoneInfo timezone = default) + : base(unit) { - Unit = unit; Timezone = timezone?.BaseUtcOffset.ToTimeZoneOffsetString(); } diff --git a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs index d06249bef2661..987a236a10191 100644 --- a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs +++ b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs @@ -119,6 +119,7 @@ private static IArrowType ToArrowType(JsonArrowType type, Field[] children) "fixedsizebinary" => new FixedSizeBinaryType(type.ByteWidth), "date" => ToDateArrowType(type), "time" => ToTimeArrowType(type), + "duration" => ToDurationArrowType(type), "timestamp" => ToTimestampArrowType(type), "list" => ToListArrowType(type, children), "fixedsizelist" => ToFixedSizeListArrowType(type, children), @@ -191,6 +192,18 @@ private static IArrowType ToTimeArrowType(JsonArrowType type) }; } + private static IArrowType ToDurationArrowType(JsonArrowType type) + { + return type.Unit switch + { + "SECOND" => DurationType.Second, + "MILLISECOND" => DurationType.Millisecond, + "MICROSECOND" => DurationType.Microsecond, + "NANOSECOND" => DurationType.Nanosecond, + _ => throw new NotSupportedException($"Time type not supported: {type.Unit}, {type.BitWidth}") + }; + } + private static IArrowType ToTimestampArrowType(JsonArrowType type) { return type.Unit switch @@ -346,6 +359,7 @@ private class ArrayCreator : IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, + IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, @@ -396,6 +410,7 @@ public void Visit(BooleanType type) public void Visit(DoubleType type) => GenerateArray((v, n, c, nc, o) => new DoubleArray(v, n, c, nc, o)); public void Visit(Time32Type type) => GenerateArray((v, n, c, nc, o) => new Time32Array(type, v, n, c, nc, o)); public void Visit(Time64Type type) => GenerateLongArray((v, n, c, nc, o) => new Time64Array(type, v, n, c, nc, o), s => long.Parse(s)); + public void Visit(DurationType type) => GenerateLongArray((v, n, c, nc, o) => new DurationArray(type, v, n, c, nc, o), s => long.Parse(s)); public void Visit(Decimal128Type type) { diff --git a/csharp/test/Apache.Arrow.Tests/ArrayTypeComparer.cs b/csharp/test/Apache.Arrow.Tests/ArrayTypeComparer.cs index ad3527c47807b..ceeffe42c6297 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrayTypeComparer.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrayTypeComparer.cs @@ -23,8 +23,7 @@ public class ArrayTypeComparer : IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, - IArrowTypeVisitor, - IArrowTypeVisitor, + IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, @@ -66,18 +65,11 @@ public void Visit(Date64Type actualType) Assert.Equal(expectedType.Unit, actualType.Unit); } - public void Visit(Time32Type actualType) + public void Visit(TimeBasedType actualType) { - Assert.IsAssignableFrom(_expectedType); - var expectedType = (Time32Type)_expectedType; - - Assert.Equal(expectedType.Unit, actualType.Unit); - } - - public void Visit(Time64Type actualType) - { - Assert.IsAssignableFrom(_expectedType); - var expectedType = (Time64Type)_expectedType; + Assert.IsAssignableFrom(_expectedType); + Assert.Equal(_expectedType.TypeId, actualType.TypeId); + var expectedType = (TimeBasedType)_expectedType; Assert.Equal(expectedType.Unit, actualType.Unit); } diff --git a/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs b/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs index 6f4c17a959981..1d108d2123c10 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs @@ -125,6 +125,7 @@ private class TestDataGenerator : IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, + IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, @@ -252,6 +253,33 @@ public void Visit(TimestampType type) ExpectedArray = resultBuilder.Build(); } + public void Visit(DurationType type) + { + DurationArray.Builder resultBuilder = new DurationArray.Builder(type).Reserve(_baseDataTotalElementCount); + DateTimeOffset basis = DateTimeOffset.UtcNow; + + for (int i = 0; i < _baseDataListCount; i++) + { + List dataList = _baseData[i]; + DurationArray.Builder builder = new DurationArray.Builder(type).Reserve(dataList.Count); + foreach (int? value in dataList) + { + if (value.HasValue) + { + builder.Append(value.Value); + resultBuilder.Append(value.Value); + } + else + { + builder.AppendNull(); + resultBuilder.AppendNull(); + } + } + TestTargetArrayList.Add(builder.Build()); + } + + ExpectedArray = resultBuilder.Build(); + } public void Visit(BinaryType type) { diff --git a/csharp/test/Apache.Arrow.Tests/ArrowArrayTests.cs b/csharp/test/Apache.Arrow.Tests/ArrowArrayTests.cs index d4f0d8dfd0383..96918ff091639 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrowArrayTests.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrowArrayTests.cs @@ -200,6 +200,7 @@ private class ArraySliceValidator : IArrowArrayVisitor, IArrowArrayVisitor, IArrowArrayVisitor, + IArrowArrayVisitor, #if NET5_0_OR_GREATER IArrowArrayVisitor, #endif @@ -243,6 +244,7 @@ public void Visit(Date64Array array) } public void Visit(Time32Array array) => ValidateArrays(array); public void Visit(Time64Array array) => ValidateArrays(array); + public void Visit(DurationArray array) => ValidateArrays(array); #if NET5_0_OR_GREATER public void Visit(HalfFloatArray array) => ValidateArrays(array); diff --git a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs index 8b41763a70ac8..75d62b25d7b88 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs @@ -85,6 +85,7 @@ private class ArrayComparer : IArrowArrayVisitor, IArrowArrayVisitor, IArrowArrayVisitor, + IArrowArrayVisitor, IArrowArrayVisitor, IArrowArrayVisitor, IArrowArrayVisitor, @@ -127,6 +128,7 @@ public ArrayComparer(IArrowArray expectedArray, bool strictCompare) public void Visit(Date64Array array) => CompareArrays(array); public void Visit(Time32Array array) => CompareArrays(array); public void Visit(Time64Array array) => CompareArrays(array); + public void Visit(DurationArray array) => CompareArrays(array); public void Visit(ListArray array) => CompareArrays(array); public void Visit(FixedSizeListArray array) => CompareArrays(array); public void Visit(FixedSizeBinaryArray array) => CompareArrays(array); diff --git a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs index 7aee37b8212c3..4efa94e8c7363 100644 --- a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs +++ b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs @@ -117,6 +117,11 @@ private static Schema GetTestSchema() .Field(f => f.Name("map").DataType(new MapType(StringType.Default, Int32Type.Default)).Nullable(false)) + .Field(f => f.Name("duration_s").DataType(DurationType.Second).Nullable(false)) + .Field(f => f.Name("duration_ms").DataType(DurationType.Millisecond).Nullable(true)) + .Field(f => f.Name("duration_us").DataType(DurationType.Microsecond).Nullable(false)) + .Field(f => f.Name("duration_ns").DataType(DurationType.Nanosecond).Nullable(true)) + // Checking wider characters. .Field(f => f.Name("hello 你好 😄").DataType(BooleanType.Default).Nullable(true)) @@ -182,6 +187,11 @@ private static IEnumerable GetPythonFields() yield return pa.field("map", pa.map_(pa.@string(), pa.int32()), false); + yield return pa.field("duration_s", pa.duration("s"), false); + yield return pa.field("duration_ms", pa.duration("ms"), true); + yield return pa.field("duration_us", pa.duration("us"), false); + yield return pa.field("duration_ns", pa.duration("ns"), true); + yield return pa.field("hello 你好 😄", pa.bool_(), true); } } @@ -520,8 +530,9 @@ public unsafe void ImportRecordBatch() List(0, 0, 1, 2, 4, 10), pa.array(List("one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten")), pa.array(List(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))), + pa.array(List(1234, 2345, 3456, null, 6789), pa.duration("ms")), }), - new[] { "col1", "col2", "col3", "col4", "col5", "col6", "col7", "col8", "col9", "col10" }); + new[] { "col1", "col2", "col3", "col4", "col5", "col6", "col7", "col8", "col9", "col10", "col11" }); dynamic batch = table.to_batches()[0]; @@ -598,6 +609,9 @@ public unsafe void ImportRecordBatch() Assert.Equal(5, col10.Length); Assert.Equal(new int[] { 0, 0, 1, 2, 4, 10}, col10.ValueOffsets.ToArray()); Assert.Equal(new long?[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }, ((Int64Array)col10.Values).ToList().ToArray()); + + DurationArray col11 = (DurationArray)recordBatch.Column("col11"); + Assert.Equal(5, col11.Length); } [SkippableFact] diff --git a/csharp/test/Apache.Arrow.Tests/DurationArrayTests.cs b/csharp/test/Apache.Arrow.Tests/DurationArrayTests.cs new file mode 100644 index 0000000000000..0890d356b8e90 --- /dev/null +++ b/csharp/test/Apache.Arrow.Tests/DurationArrayTests.cs @@ -0,0 +1,134 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Linq; +using Apache.Arrow.Types; +using Xunit; + +namespace Apache.Arrow.Tests +{ + public class DurationArrayTests + { + private const long TicksPerMicrosecond = 10; + + private static readonly TimeSpan?[] _exampleTimeSpans = + { + null, + TimeSpan.FromDays(10.5), + TimeSpan.FromHours(10.5), + TimeSpan.FromMinutes(10.5), + TimeSpan.FromSeconds(10.5), + TimeSpan.FromMilliseconds(10.5), + TimeSpan.FromTicks(11), + }; + + private static readonly long?[] _exampleDurations = + { + null, + 1, + 1000, + 1000000, + 1000000000, + 1000000000000, + }; + + private static readonly DurationType[] _durationTypes = + { + DurationType.Second, + DurationType.Millisecond, + DurationType.Microsecond, + DurationType.Nanosecond, + }; + + public static IEnumerable GetTimeSpansData() => + from timeSpan in _exampleTimeSpans + from type in _durationTypes + where type.Unit >= RequiredPrecision(timeSpan) + select new object[] { timeSpan, type }; + + public static IEnumerable GetDurationsData() => + from duration in _exampleDurations + from type in _durationTypes + select new object[] { duration, type }; + + static TimeUnit RequiredPrecision(TimeSpan? timeSpan) + { + if (timeSpan == null) { return TimeUnit.Second; } + if ((timeSpan.Value.Ticks % TicksPerMicrosecond) > 0) { return TimeUnit.Nanosecond; } + if (timeSpan.Value.Microseconds > 0) { return TimeUnit.Microsecond; } + if (timeSpan.Value.Milliseconds > 0) { return TimeUnit.Millisecond; } + return TimeUnit.Second; + } + + public class AppendNull + { + [Fact] + public void AppendThenGetGivesNull() + { + // Arrange + var builder = new DurationArray.Builder(DurationType.Millisecond); + + // Act + builder = builder.AppendNull(); + + // Assert + var array = builder.Build(); + Assert.Equal(1, array.Length); + Assert.Null(array.GetValue(0)); + Assert.Null(array.GetTimeSpan(0)); + } + } + + public class AppendTimeSpan + { + [Theory] + [MemberData(nameof(GetTimeSpansData), MemberType = typeof(DurationArrayTests))] + public void AppendTimeSpanGivesSameTimeSpan(TimeSpan? timeSpan, DurationType type) + { + // Arrange + var builder = new DurationArray.Builder(type); + + // Act + builder = builder.Append(timeSpan); + + // Assert + var array = builder.Build(); + Assert.Equal(1, array.Length); + Assert.Equal(timeSpan, array.GetTimeSpan(0)); + } + } + + public class AppendDuration + { + [Theory] + [MemberData(nameof(GetDurationsData), MemberType = typeof(DurationArrayTests))] + public void AppendDurationGivesSameDuration(long? duration, DurationType type) + { + // Arrange + var builder = new DurationArray.Builder(type); + + // Act + builder = builder.Append(duration); + + // Assert + var array = builder.Build(); + Assert.Equal(1, array.Length); + Assert.Equal(duration, array.GetValue(0)); + } + } + } +} diff --git a/csharp/test/Apache.Arrow.Tests/TestData.cs b/csharp/test/Apache.Arrow.Tests/TestData.cs index e3a40dbdafd61..3af6efb97b437 100644 --- a/csharp/test/Apache.Arrow.Tests/TestData.cs +++ b/csharp/test/Apache.Arrow.Tests/TestData.cs @@ -113,6 +113,7 @@ private class ArrayCreator : IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, + IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, @@ -233,6 +234,18 @@ public void Visit(Time64Type type) Array = builder.Build(); } + public void Visit(DurationType type) + { + var builder = new DurationArray.Builder(type).Reserve(Length); + + for (var i = 0; i < Length; i++) + { + builder.Append(i); + } + + Array = builder.Build(); + } + public void Visit(TimestampType type) { var builder = new TimestampArray.Builder().Reserve(Length); diff --git a/dev/archery/archery/integration/datagen.py b/dev/archery/archery/integration/datagen.py index 01672fbe7488a..f229012366e1f 100644 --- a/dev/archery/archery/integration/datagen.py +++ b/dev/archery/archery/integration/datagen.py @@ -1804,8 +1804,7 @@ def _temp_path(): generate_datetime_case(), - generate_duration_case() - .skip_tester('C#'), + generate_duration_case(), generate_interval_case() .skip_tester('C#') diff --git a/docs/source/status.rst b/docs/source/status.rst index 6024c1d3172bb..c8c0e6dfc1dfe 100644 --- a/docs/source/status.rst +++ b/docs/source/status.rst @@ -54,7 +54,7 @@ Data Types +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+ | Timestamp | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+ -| Duration | ✓ | ✓ | ✓ | ✓ | | ✓ | ✓ | | +| Duration | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+ | Interval | ✓ | ✓ | ✓ | | | ✓ | ✓ | | +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+