Skip to content

Commit

Permalink
expand support dataframe
Browse files Browse the repository at this point in the history
  • Loading branch information
chuongmep committed Feb 27, 2024
1 parent 53959b0 commit b7b911e
Show file tree
Hide file tree
Showing 6 changed files with 98 additions and 3 deletions.
60 changes: 60 additions & 0 deletions APSToolkit/Utils/DataFrame.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
using System.Data;
using ChoETL;

namespace APSToolkit.Utils;

public static class DataFrame
{

/// <summary>
/// Loads a DataFrame from a DataTable.
/// </summary>
/// <param name="dataTable">The DataTable to be converted into a DataFrame.</param>
/// <returns>A DataFrame that represents the provided DataTable.</returns>
public static Microsoft.Data.Analysis.DataFrame LoadFromDataTable(DataTable dataTable)
{
Microsoft.Data.Analysis.DataFrame dataFrame = dataTable.ToDataFrame();
return dataFrame;
}

/// <summary>
/// Loads a DataFrame from a Parquet file.
/// </summary>
/// <param name="filePath">The path to the Parquet file.</param>
/// <returns>A DataFrame that represents the data in the Parquet file.</returns>
public static Microsoft.Data.Analysis.DataFrame LoadFromParquet(string filePath)
{
using (var r = new ChoParquetReader(filePath))
{
var dataFrame = r.AsDataTable();
return dataFrame.ToDataFrame();
}
}

/// <summary>
/// Loads a DataFrame from a Parquet file represented as a byte array.
/// </summary>
/// <param name="stream">The byte array representing the Parquet file.</param>
/// <returns>A DataFrame that represents the data in the Parquet file.</returns>
public static Microsoft.Data.Analysis.DataFrame LoadFromParquet(byte[] stream)
{
Stream s = new MemoryStream(stream);
using (var r = new ChoParquetReader(s))
{
var dataFrame = r.AsDataTable();
return dataFrame.ToDataFrame();
}
}

/// <summary>
/// Loads a DataFrame from an Excel file.
/// </summary>
/// <param name="filePath">The path to the Excel file.</param>
/// <param name="sheetName">The name of the sheet in the Excel file to load.</param>
/// <returns>A DataFrame that represents the data in the specified sheet of the Excel file.</returns>
public static Microsoft.Data.Analysis.DataFrame LoadFromExcel(string filePath, string sheetName)
{
DataTable dt = ExcelUtils.ReadDataFromExcelToDataTable(filePath, sheetName);
return dt.ToDataFrame();
}
}
4 changes: 2 additions & 2 deletions APSToolkit/Utils/DataTableUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -204,9 +204,9 @@ public static DataTable FixBytesValue(this DataTable dataTable)
/// </summary>
/// <param name="dataTable">The DataTable to be converted.</param>
/// <returns>A DataFrame with the same columns and data as the input DataTable.</returns>
public static DataFrame ToDataFrame(this DataTable dataTable)
public static Microsoft.Data.Analysis.DataFrame ToDataFrame(this DataTable dataTable)
{
DataFrame dataFrame = new DataFrame();
Microsoft.Data.Analysis.DataFrame dataFrame = new Microsoft.Data.Analysis.DataFrame();

foreach (DataColumn column in dataTable.Columns)
{
Expand Down
3 changes: 3 additions & 0 deletions APSToolkitUnit/APSToolkitUnit.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -67,5 +67,8 @@
<None Update="Resources\result.xlsx">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="Resources\result.parquet">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
</ItemGroup>
</Project>
32 changes: 32 additions & 0 deletions APSToolkitUnit/DataFrameTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
using APSToolkit.Utils;
using Microsoft.Data.Analysis;
using NUnit.Framework;
using DataFrame = Microsoft.Data.Analysis.DataFrame;

namespace ForgeToolkitUnit;

Expand All @@ -27,4 +28,35 @@ public void DataTableToDataFrame()
Assert.AreEqual(3, dataFrame.Rows.Count);
Assert.AreEqual(3, dataFrame.Columns.Count);
}

[Test]
public void LoadFromDataTableTest()
{
DataTable dt = new DataTable();
dt.Columns.Add("Id", typeof(int));
dt.Columns.Add("Name", typeof(string));
dt.Columns.Add("Data", typeof(byte[]));
dt.Rows.Add(1, "Name1", new byte[] { 1, 2, 3 });
dt.Rows.Add(2, "Name2", new byte[] { 4, 5, 6 });
dt.Rows.Add(3, "Name3", new byte[] { 7, 8, 9 });
DataFrame dataFrame = APSToolkit.Utils.DataFrame.LoadFromDataTable(dt);
Assert.AreEqual(3, dataFrame.Rows.Count);
Assert.AreEqual(3, dataFrame.Columns.Count);
}
[Test]
[TestCase("./Resources/result.parquet")]
public void LoadFromParquetTest(string parquet)
{
DataFrame dataFrame = APSToolkit.Utils.DataFrame.LoadFromParquet(parquet);
Assert.AreEqual(3, dataFrame.Rows.Count);
}

[Test]
[TestCase("./Resources/result.xlsx","Walls")]
public void LoadFromExcelTest(string excelPath,string sheetName)
{
DataFrame dataFrame = APSToolkit.Utils.DataFrame.LoadFromExcel(excelPath,sheetName);
Assert.AreNotEqual(0, dataFrame.Rows.Count);
Assert.AreNotEqual(0, dataFrame.Columns.Count);
}
}
2 changes: 1 addition & 1 deletion APSToolkitUnit/ProbDbReaderRevitTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ public void GetAllDataByFamilyTypeTest(string typeName)
public void GetDataByCategoryParquetTest(string category)

{
RevitPropDbReader = new PropDbReaderRevit(Settings._RevitRealUrn, Settings.Token2Leg);
RevitPropDbReader = new PropDbReaderRevit(Settings._RevitTestUrn, Settings.Token2Leg);
DataTable dataTable = RevitPropDbReader.GetAllDataByCategory(category);
dataTable.ExportToParquet("result.parquet");
Assert.AreNotEqual(0, dataTable.Rows);
Expand Down
Binary file added APSToolkitUnit/Resources/result.parquet
Binary file not shown.

0 comments on commit b7b911e

Please sign in to comment.