Skip to content

Commit

Permalink
Merge pull request #41 from mukunku/v2.3.2
Browse files Browse the repository at this point in the history
v2.3.2
  • Loading branch information
mukunku authored Aug 14, 2021
2 parents 97958b2 + 2db8383 commit a67b1ff
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 10 deletions.
37 changes: 29 additions & 8 deletions src/ParquetFileViewer/Helpers/UtilityMethods.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,17 @@ public static DataTable ParquetReaderToDataTable(ParquetReader parquetReader, Li
{
//Get list of data fields and construct the DataTable
DataTable dataTable = new DataTable();
List<Parquet.Data.DataField> fields = new List<Parquet.Data.DataField>();
var fields = new List<(Parquet.Thrift.SchemaElement, Parquet.Data.DataField)>();
var dataFields = parquetReader.Schema.GetDataFields();
foreach (string selectedField in selectedFields)
{
var dataField = dataFields.FirstOrDefault(f => f.Name.Equals(selectedField, StringComparison.InvariantCultureIgnoreCase));
if (dataField != null)
{
fields.Add(dataField);
DataColumn newColumn = new DataColumn(dataField.Name, ParquetNetTypeToCSharpType(dataField.DataType));
var thriftSchema = parquetReader.ThriftMetadata.Schema.First(f => f.Name.Equals(selectedField, StringComparison.InvariantCultureIgnoreCase));

fields.Add((thriftSchema, dataField));
DataColumn newColumn = new DataColumn(dataField.Name, ParquetNetTypeToCSharpType(thriftSchema, dataField.DataType));
dataTable.Columns.Add(newColumn);
}
else
Expand Down Expand Up @@ -64,17 +66,20 @@ public static DataTable ParquetReaderToDataTable(ParquetReader parquetReader, Li
return dataTable;
}

private static void ProcessRowGroup(DataTable dataTable, ParquetRowGroupReader groupReader, List<Parquet.Data.DataField> fields,
private static void ProcessRowGroup(DataTable dataTable, ParquetRowGroupReader groupReader, List<(Parquet.Thrift.SchemaElement, Parquet.Data.DataField)> fields,
int skipRecords, int readRecords, CancellationToken cancellationToken)
{
int rowBeginIndex = dataTable.Rows.Count;
bool isFirstColumn = true;

foreach (var field in fields)
foreach (var fieldTuple in fields)
{
if (cancellationToken.IsCancellationRequested)
break;

var logicalType = fieldTuple.Item1.LogicalType;
var field = fieldTuple.Item2;

int rowIndex = rowBeginIndex;

int skippedRecords = 0;
Expand All @@ -101,7 +106,23 @@ private static void ProcessRowGroup(DataTable dataTable, ParquetRowGroupReader g
if (value == null)
dataTable.Rows[rowIndex][field.Name] = DBNull.Value;
else if (field.DataType == Parquet.Data.DataType.DateTimeOffset)
dataTable.Rows[rowIndex][field.Name] = ((DateTimeOffset)value).DateTime; //converts to local time!
dataTable.Rows[rowIndex][field.Name] = ((DateTimeOffset)value).DateTime;
else if (field.DataType == Parquet.Data.DataType.Int64
&& logicalType.TIMESTAMP != null)
{
int divideBy = 0;
if (logicalType.TIMESTAMP.Unit.NANOS != null)
divideBy = 1000 * 1000;
else if (logicalType.TIMESTAMP.Unit.MICROS != null)
divideBy = 1000;
else if (logicalType.TIMESTAMP.Unit.MILLIS != null)
divideBy = 1;

if (divideBy > 0)
dataTable.Rows[rowIndex][field.Name] = DateTimeOffset.FromUnixTimeMilliseconds((long)value / divideBy).DateTime;
else //Not sure if this 'else' is correct but adding just in case
dataTable.Rows[rowIndex][field.Name] = DateTimeOffset.FromUnixTimeSeconds((long)value);
}
else
dataTable.Rows[rowIndex][field.Name] = value;

Expand All @@ -113,7 +134,7 @@ private static void ProcessRowGroup(DataTable dataTable, ParquetRowGroupReader g
}


public static Type ParquetNetTypeToCSharpType(Parquet.Data.DataType type)
public static Type ParquetNetTypeToCSharpType(Parquet.Thrift.SchemaElement thriftSchema, Parquet.Data.DataType type)
{
Type columnType = null;
switch (type)
Expand Down Expand Up @@ -147,7 +168,7 @@ public static Type ParquetNetTypeToCSharpType(Parquet.Data.DataType type)
columnType = typeof(int);
break;
case Parquet.Data.DataType.Int64:
columnType = typeof(long);
columnType = thriftSchema.LogicalType.TIMESTAMP != null ? typeof(DateTime) : typeof(long);
break;
case Parquet.Data.DataType.UnsignedByte:
columnType = typeof(byte);
Expand Down
4 changes: 2 additions & 2 deletions src/ParquetFileViewer/Properties/AssemblyInfo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,5 @@
// You can specify all the values or you can default the Build and Revision Numbers
// by using the '*' as shown below:
// [assembly: AssemblyVersion("1.0.*")]
[assembly: AssemblyVersion("2.3.1.*")]
[assembly: AssemblyFileVersion("2.3.1.*")]
[assembly: AssemblyVersion("2.3.2.*")]
[assembly: AssemblyFileVersion("2.3.2.*")]

0 comments on commit a67b1ff

Please sign in to comment.