Skip to content

Commit

Permalink
Support the TimeOnly and DateOnly types added in .NET 6 (#424)
Browse files Browse the repository at this point in the history
* Support round tripping DateOnly and TimeOnly, requiring LogicalReaderOverride

* Allow configuring use of DateOnly and TimeOnly in LogicalTypeFactory

* Update type factories documentation
  • Loading branch information
adamreeve authored Feb 20, 2024
1 parent 862568b commit d531836
Show file tree
Hide file tree
Showing 6 changed files with 630 additions and 19 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,8 @@ For more detailed information on how to use ParquetSharp, see the following docu
* [Working with nested data](docs/Nested.md)
* [Reading and writing Arrow data](docs/Arrow.md) — how to read and write data using the [Apache Arrow format](https://arrow.apache.org/)
* [Row-oriented API](docs/RowOriented.md) — a higher level API that abstracts away the column-oriented nature of Parquet files
* [Custom types](docs/TypeFactories.md) — how to override the mapping between .NET and Parquet types
* [Custom types](docs/TypeFactories.md) — how to customize the mapping between .NET and Parquet types,
including using the `DateOnly` and `TimeOnly` types added in .NET 6.
* [Writing TimeSpan data](docs/TimeSpan.md) — interoperability with other libraries when writing TimeSpan data
* [Use from PowerShell](docs/PowerShell.md)

Expand Down
213 changes: 213 additions & 0 deletions csharp.test/TestLogicalTypeRoundtrip.cs
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,219 @@ public static void TestRoundTripBuffered(
}
}

#if NET6_0_OR_GREATER
[Test]
public static void TestRoundTripDateOnly([Values] bool useReaderOverride)
{
var schemaColumns = new Column[]
{
new Column<DateOnly>("date"),
new Column<DateOnly?>("nullable_date"),
};

const int numRows = 100;
var dateValues = Enumerable.Range(0, numRows)
.Select(i => new DateOnly(2024, 1, 1).AddDays(i))
.ToArray();
var nullableDateValues = Enumerable.Range(0, numRows)
.Select(i => i % 5 == 1 ? (DateOnly?) null : new DateOnly(2024, 1, 1).AddDays(i))
.ToArray();

using var buffer = new ResizableBuffer();
using (var outStream = new BufferOutputStream(buffer))
{
using var fileWriter = new ParquetFileWriter(outStream, schemaColumns);
using var rowGroupWriter = fileWriter.AppendRowGroup();
{
using var columnWriter = rowGroupWriter.NextColumn().LogicalWriter<DateOnly>();
columnWriter.WriteBatch(dateValues);
}
{
using var columnWriter = rowGroupWriter.NextColumn().LogicalWriter<DateOnly?>();
columnWriter.WriteBatch(nullableDateValues);
}
fileWriter.Close();
}

DateOnly[] readDateValues;
DateOnly?[] readNullableDateValues;
using (var inStream = new BufferReader(buffer))
{
using var fileReader = new ParquetFileReader(inStream);
if (!useReaderOverride)
{
fileReader.LogicalTypeFactory = new LogicalTypeFactory
{
DateAsDateOnly = true,
};
}
using var rowGroupReader = fileReader.RowGroup(0);
{
using var columnReader = rowGroupReader.Column(0);
using var logicalReader = useReaderOverride
? columnReader.LogicalReaderOverride<DateOnly>()
: columnReader.LogicalReader<DateOnly>();
readDateValues = logicalReader.ReadAll(numRows);
}
{
using var columnReader = rowGroupReader.Column(1);
using var logicalReader = useReaderOverride
? columnReader.LogicalReaderOverride<DateOnly?>()
: columnReader.LogicalReader<DateOnly?>();
readNullableDateValues = logicalReader.ReadAll(numRows);
}
}

Assert.AreEqual(dateValues, readDateValues);
Assert.AreEqual(nullableDateValues, readNullableDateValues);
}

[TestCase(null, true)]
[TestCase(TimeUnit.Micros, true)]
[TestCase(TimeUnit.Millis, true)]
[TestCase(TimeUnit.Millis, false)]
public static void TestRoundTripTimeOnly(TimeUnit? timeUnit, bool useReaderOverride)
{
LogicalType? logicalTypeOverride = null;
if (timeUnit.HasValue)
{
logicalTypeOverride = LogicalType.Time(isAdjustedToUtc: true, timeUnit.Value);
}
var schemaColumns = new Column[]
{
new Column<TimeOnly>("time", logicalTypeOverride: logicalTypeOverride),
new Column<TimeOnly?>("nullable_time", logicalTypeOverride: logicalTypeOverride),
};

const int numRows = 100;
var timeValues = Enumerable.Range(0, numRows)
.Select(i => new TimeOnly(0, 0, 0).Add(TimeSpan.FromSeconds(i)))
.ToArray();
var nullableTimeValues = Enumerable.Range(0, numRows)
.Select(i => i % 5 == 1 ? (TimeOnly?) null : new TimeOnly(0, 0, 0).Add(TimeSpan.FromSeconds(i)))
.ToArray();

using var buffer = new ResizableBuffer();
using (var outStream = new BufferOutputStream(buffer))
{
using var fileWriter = new ParquetFileWriter(outStream, schemaColumns);
using var rowGroupWriter = fileWriter.AppendRowGroup();
{
using var columnWriter = rowGroupWriter.NextColumn().LogicalWriter<TimeOnly>();
columnWriter.WriteBatch(timeValues);
}
{
using var columnWriter = rowGroupWriter.NextColumn().LogicalWriter<TimeOnly?>();
columnWriter.WriteBatch(nullableTimeValues);
}
fileWriter.Close();
}

TimeOnly[] readTimeValues;
TimeOnly?[] readNullableTimeValues;
using (var inStream = new BufferReader(buffer))
{
using var fileReader = new ParquetFileReader(inStream);
if (!useReaderOverride)
{
fileReader.LogicalTypeFactory = new LogicalTypeFactory
{
TimeAsTimeOnly = true,
};
}
using var rowGroupReader = fileReader.RowGroup(0);
{
using var columnReader = rowGroupReader.Column(0);
using var logicalReader = useReaderOverride
? columnReader.LogicalReaderOverride<TimeOnly>()
: columnReader.LogicalReader<TimeOnly>();
readTimeValues = logicalReader.ReadAll(numRows);
}
{
using var columnReader = rowGroupReader.Column(1);
using var logicalReader = useReaderOverride
? columnReader.LogicalReaderOverride<TimeOnly?>()
: columnReader.LogicalReader<TimeOnly?>();
readNullableTimeValues = logicalReader.ReadAll(numRows);
}
}

Assert.AreEqual(timeValues, readTimeValues);
Assert.AreEqual(nullableTimeValues, readNullableTimeValues);
}

[Test]
[NonParallelizable]
public static void TestSetTimeOnlyAndDateOnlyOnDefaultTypeFactory()
{
var defaultDateAsDateOnly = LogicalTypeFactory.Default.DateAsDateOnly;
var defaultTimeAsTimeOnly = LogicalTypeFactory.Default.TimeAsTimeOnly;

try
{
LogicalTypeFactory.Default.DateAsDateOnly = true;
LogicalTypeFactory.Default.TimeAsTimeOnly = true;

// Create schema directly rather than using the column abstraction,
// to test that this uses the correct types from the type factory when writing.
using var dateNode = new PrimitiveNode("date", Repetition.Required, LogicalType.Date(), PhysicalType.Int32);
using var timeNode = new PrimitiveNode("time", Repetition.Required, LogicalType.Time(true, TimeUnit.Millis), PhysicalType.Int32);
using var schemaNode = new GroupNode("schema", Repetition.Required, new[] {dateNode, timeNode});

const int numRows = 100;
var timeValues = Enumerable.Range(0, numRows)
.Select(i => new TimeOnly(0, 0, 0).Add(TimeSpan.FromSeconds(i)))
.ToArray();
var dateValues = Enumerable.Range(0, numRows)
.Select(i => new DateOnly(2024, 1, 1).AddDays(i))
.ToArray();

using var buffer = new ResizableBuffer();
using (var outStream = new BufferOutputStream(buffer))
{

using var builder = new WriterPropertiesBuilder();
using var writerProperties = builder.Build();
using var fileWriter = new ParquetFileWriter(outStream, schemaNode, writerProperties);
using var rowGroupWriter = fileWriter.AppendRowGroup();
{
using var dateWriter = rowGroupWriter.NextColumn().LogicalWriter<DateOnly>();
dateWriter.WriteBatch(dateValues);
using var timeWriter = rowGroupWriter.NextColumn().LogicalWriter<TimeOnly>();
timeWriter.WriteBatch(timeValues);
}
fileWriter.Close();
}

DateOnly[] readDateValues;
TimeOnly[] readTimeValues;
using (var inStream = new BufferReader(buffer))
{
using var fileReader = new ParquetFileReader(inStream);
using var rowGroupReader = fileReader.RowGroup(0);
{
using var columnReader = rowGroupReader.Column(0);
using var logicalReader = columnReader.LogicalReader<DateOnly>();
readDateValues = logicalReader.ReadAll(numRows);
}
{
using var columnReader = rowGroupReader.Column(1);
using var logicalReader = columnReader.LogicalReader<TimeOnly>();
readTimeValues = logicalReader.ReadAll(numRows);
}
}

Assert.AreEqual(dateValues, readDateValues);
Assert.AreEqual(timeValues, readTimeValues);
}
finally
{
LogicalTypeFactory.Default.DateAsDateOnly = defaultDateAsDateOnly;
LogicalTypeFactory.Default.TimeAsTimeOnly = defaultTimeAsTimeOnly;
}
}
#endif

[TestCase(DateTimeKind.Utc, TimeUnit.Micros)]
[TestCase(DateTimeKind.Utc, TimeUnit.Millis)]
[TestCase(DateTimeKind.Unspecified, TimeUnit.Micros)]
Expand Down
109 changes: 109 additions & 0 deletions csharp/LogicalRead.cs
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,43 @@ public static Delegate GetConverter(ColumnDescriptor columnDescriptor, ColumnChu
return LogicalRead.GetNullableNativeConverter<TimeSpanNanos, long>();
}

#if NET6_0_OR_GREATER
if (typeof(TLogical) == typeof(DateOnly))
{
return (LogicalRead<DateOnly, int>.Converter) ((s, _, d, _) => LogicalRead.ConvertDateOnly(s, d));
}

if (typeof(TLogical) == typeof(DateOnly?))
{
return (LogicalRead<DateOnly?, int>.Converter) LogicalRead.ConvertDateOnly;
}

if (typeof(TLogical) == typeof(TimeOnly))
{
switch (((TimeLogicalType) logicalType).TimeUnit)
{
case TimeUnit.Millis:
return (LogicalRead<TimeOnly, int>.Converter) ((s, _, d, _) => LogicalRead.ConvertTimeOnlyMillis(s, d));
case TimeUnit.Micros:
return (LogicalRead<TimeOnly, long>.Converter) ((s, _, d, _) => LogicalRead.ConvertTimeOnlyMicros(s, d));
}
}

if (typeof(TLogical) == typeof(TimeOnly?))
{
var timeLogicalType = (TimeLogicalType) logicalType;
var timeUnit = timeLogicalType.TimeUnit;

switch (timeUnit)
{
case TimeUnit.Millis:
return (LogicalRead<TimeOnly?, int>.Converter) LogicalRead.ConvertTimeOnlyMillis;
case TimeUnit.Micros:
return (LogicalRead<TimeOnly?, long>.Converter) LogicalRead.ConvertTimeOnlyMicros;
}
}
#endif

if (typeof(TLogical) == typeof(string))
{
var byteArrayCache = new ByteArrayReaderCache<TPhysical, TLogical>(columnChunkMetaData);
Expand Down Expand Up @@ -572,6 +609,56 @@ public static void ConvertTimeSpanMillis(ReadOnlySpan<int> source, ReadOnlySpan<
}
}

#if NET6_0_OR_GREATER
public static void ConvertDateOnly(ReadOnlySpan<int> source, Span<DateOnly> destination)
{
for (int i = 0; i < destination.Length; ++i)
{
destination[i] = ToDateOnly(source[i]);
}
}

public static void ConvertDateOnly(ReadOnlySpan<int> source, ReadOnlySpan<short> defLevels, Span<DateOnly?> destination, short definedLevel)
{
for (int i = 0, src = 0; i < destination.Length; ++i)
{
destination[i] = defLevels[i] != definedLevel ? default(DateOnly?) : ToDateOnly(source[src++]);
}
}

public static void ConvertTimeOnlyMicros(ReadOnlySpan<long> source, Span<TimeOnly> destination)
{
for (int i = 0; i < destination.Length; ++i)
{
destination[i] = ToTimeOnlyMicros(source[i]);
}
}

public static void ConvertTimeOnlyMicros(ReadOnlySpan<long> source, ReadOnlySpan<short> defLevels, Span<TimeOnly?> destination, short definedLevel)
{
for (int i = 0, src = 0; i < destination.Length; ++i)
{
destination[i] = defLevels[i] != definedLevel ? default(TimeOnly?) : ToTimeOnlyMicros(source[src++]);
}
}

public static void ConvertTimeOnlyMillis(ReadOnlySpan<int> source, Span<TimeOnly> destination)
{
for (int i = 0; i < destination.Length; ++i)
{
destination[i] = ToTimeOnlyMillis(source[i]);
}
}

public static void ConvertTimeOnlyMillis(ReadOnlySpan<int> source, ReadOnlySpan<short> defLevels, Span<TimeOnly?> destination, short definedLevel)
{
for (int i = 0, src = 0; i < destination.Length; ++i)
{
destination[i] = defLevels[i] != definedLevel ? default(TimeOnly?) : ToTimeOnlyMillis(source[src++]);
}
}
#endif

public static void ConvertString(ReadOnlySpan<ByteArray> source, ReadOnlySpan<short> defLevels, Span<string?> destination, short definedLevel, ByteArrayReaderCache<ByteArray, string> byteArrayCache)
{
for (int i = 0, src = 0; i < destination.Length; ++i)
Expand Down Expand Up @@ -737,6 +824,28 @@ public static byte[] ToByteArray(ByteArray byteArray)
return array;
}

#if NET6_0_OR_GREATER
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static DateOnly ToDateOnly(int source)
{
return DateOnly.FromDayNumber(BaseDateOnlyNumber + source);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static TimeOnly ToTimeOnlyMicros(long source)
{
return TimeOnly.FromTimeSpan(TimeSpan.FromTicks(source * (TimeSpan.TicksPerMillisecond / 1000)));
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static TimeOnly ToTimeOnlyMillis(int source)
{
return TimeOnly.FromTimeSpan(TimeSpan.FromTicks(source * TimeSpan.TicksPerMillisecond));
}

private static readonly int BaseDateOnlyNumber = LogicalWrite.BaseDateOnlyNumber;
#endif

public const long DateTimeOffset = LogicalWrite.DateTimeOffset;
}
}
Loading

0 comments on commit d531836

Please sign in to comment.