diff --git a/src/ParquetFileViewer/Helpers/UtilityMethods.cs b/src/ParquetFileViewer/Helpers/UtilityMethods.cs index 3488cb2..17ca3b3 100644 --- a/src/ParquetFileViewer/Helpers/UtilityMethods.cs +++ b/src/ParquetFileViewer/Helpers/UtilityMethods.cs @@ -13,15 +13,17 @@ public static DataTable ParquetReaderToDataTable(ParquetReader parquetReader, Li { //Get list of data fields and construct the DataTable DataTable dataTable = new DataTable(); - List fields = new List(); + var fields = new List<(Parquet.Thrift.SchemaElement, Parquet.Data.DataField)>(); var dataFields = parquetReader.Schema.GetDataFields(); foreach (string selectedField in selectedFields) { var dataField = dataFields.FirstOrDefault(f => f.Name.Equals(selectedField, StringComparison.InvariantCultureIgnoreCase)); if (dataField != null) { - fields.Add(dataField); - DataColumn newColumn = new DataColumn(dataField.Name, ParquetNetTypeToCSharpType(dataField.DataType)); + var thriftSchema = parquetReader.ThriftMetadata.Schema.First(f => f.Name.Equals(selectedField, StringComparison.InvariantCultureIgnoreCase)); + + fields.Add((thriftSchema, dataField)); + DataColumn newColumn = new DataColumn(dataField.Name, ParquetNetTypeToCSharpType(thriftSchema, dataField.DataType)); dataTable.Columns.Add(newColumn); } else @@ -64,17 +66,20 @@ public static DataTable ParquetReaderToDataTable(ParquetReader parquetReader, Li return dataTable; } - private static void ProcessRowGroup(DataTable dataTable, ParquetRowGroupReader groupReader, List fields, + private static void ProcessRowGroup(DataTable dataTable, ParquetRowGroupReader groupReader, List<(Parquet.Thrift.SchemaElement, Parquet.Data.DataField)> fields, int skipRecords, int readRecords, CancellationToken cancellationToken) { int rowBeginIndex = dataTable.Rows.Count; bool isFirstColumn = true; - foreach (var field in fields) + foreach (var fieldTuple in fields) { if (cancellationToken.IsCancellationRequested) break; + var logicalType = fieldTuple.Item1.LogicalType; + var field = fieldTuple.Item2; + int rowIndex = rowBeginIndex; int skippedRecords = 0; @@ -101,7 +106,23 @@ private static void ProcessRowGroup(DataTable dataTable, ParquetRowGroupReader g if (value == null) dataTable.Rows[rowIndex][field.Name] = DBNull.Value; else if (field.DataType == Parquet.Data.DataType.DateTimeOffset) - dataTable.Rows[rowIndex][field.Name] = ((DateTimeOffset)value).DateTime; //converts to local time! + dataTable.Rows[rowIndex][field.Name] = ((DateTimeOffset)value).DateTime; + else if (field.DataType == Parquet.Data.DataType.Int64 + && logicalType.TIMESTAMP != null) + { + int divideBy = 0; + if (logicalType.TIMESTAMP.Unit.NANOS != null) + divideBy = 1000 * 1000; + else if (logicalType.TIMESTAMP.Unit.MICROS != null) + divideBy = 1000; + else if (logicalType.TIMESTAMP.Unit.MILLIS != null) + divideBy = 1; + + if (divideBy > 0) + dataTable.Rows[rowIndex][field.Name] = DateTimeOffset.FromUnixTimeMilliseconds((long)value / divideBy).DateTime; + else //Not sure if this 'else' is correct but adding just in case + dataTable.Rows[rowIndex][field.Name] = DateTimeOffset.FromUnixTimeSeconds((long)value); + } else dataTable.Rows[rowIndex][field.Name] = value; @@ -113,7 +134,7 @@ private static void ProcessRowGroup(DataTable dataTable, ParquetRowGroupReader g } - public static Type ParquetNetTypeToCSharpType(Parquet.Data.DataType type) + public static Type ParquetNetTypeToCSharpType(Parquet.Thrift.SchemaElement thriftSchema, Parquet.Data.DataType type) { Type columnType = null; switch (type) @@ -147,7 +168,7 @@ public static Type ParquetNetTypeToCSharpType(Parquet.Data.DataType type) columnType = typeof(int); break; case Parquet.Data.DataType.Int64: - columnType = typeof(long); + columnType = thriftSchema.LogicalType.TIMESTAMP != null ? typeof(DateTime) : typeof(long); break; case Parquet.Data.DataType.UnsignedByte: columnType = typeof(byte); diff --git a/src/ParquetFileViewer/Properties/AssemblyInfo.cs b/src/ParquetFileViewer/Properties/AssemblyInfo.cs index 5a1453f..fd61d81 100644 --- a/src/ParquetFileViewer/Properties/AssemblyInfo.cs +++ b/src/ParquetFileViewer/Properties/AssemblyInfo.cs @@ -31,5 +31,5 @@ // You can specify all the values or you can default the Build and Revision Numbers // by using the '*' as shown below: // [assembly: AssemblyVersion("1.0.*")] -[assembly: AssemblyVersion("2.3.1.*")] -[assembly: AssemblyFileVersion("2.3.1.*")] +[assembly: AssemblyVersion("2.3.2.*")] +[assembly: AssemblyFileVersion("2.3.2.*")]