forked from finopsfoundation/focus_converters
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix multiprocessing queue and downgraded polars version temporarily u…
…ntil pyarrow compatibility is restored. (finopsfoundation#341) Signed-off-by: Varun Mittal <[email protected]>
- Loading branch information
1 parent
d45674b
commit a438f5a
Showing
5 changed files
with
69 additions
and
5 deletions.
There are no files selected for viewing
8 changes: 8 additions & 0 deletions
8
focus_converter_base/focus_converter/conversion_configs/gcp/0_dimension_dtypes_S001.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
plan_name: adds dtypes to the columns required for the conversion | ||
conversion_type: set_column_dtypes | ||
column: PlaceHolder | ||
focus_column: PlaceHolder | ||
conversion_args: | ||
dtype_args: | ||
- column_name: billing_account_id | ||
dtype: string |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
50 changes: 50 additions & 0 deletions
50
focus_converter_base/tests/test_polars_pyarrow_compatibility.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
import os | ||
|
||
import polars as pl | ||
import pyarrow as pa | ||
import pytest | ||
from unittest import TestCase | ||
import pyarrow.parquet as pq | ||
import pyarrow.dataset as ds | ||
import tempfile | ||
|
||
from focus_converter.data_loaders.data_loader import ( | ||
DEFAULT_BATCH_READ_SIZE, | ||
FRAGMENT_READ_AHEAD, | ||
BATCH_READ_AHEAD, | ||
) | ||
|
||
|
||
class TestPolarsPyarrowCompatibility(TestCase): | ||
""" | ||
Test that the Polars and PyArrow data types are compatible. | ||
When trying to load a PyArrow table into Polars, raises error AttributeError: 'pyarrow.lib.StructArray' object has no attribute 'num_chunks' | ||
""" | ||
|
||
def test_polars_pyarrow_compatibility(self): | ||
# Create a PyArrow table | ||
table = pa.table( | ||
{"a": [1, 2, 3], "b": [4, 5, 6], "c": [{"d": 7}, {"d": 8}, {"d": 9}]} | ||
) | ||
|
||
with tempfile.TemporaryDirectory() as tempdir: | ||
pq.write_table(table, f"{tempdir}/test.pq") | ||
table = pq.read_table(f"{tempdir}/test.pq") | ||
|
||
pl.from_arrow(table) | ||
|
||
os.system(f"ls -lh {tempdir}") | ||
dataset = ds.dataset(tempdir) | ||
|
||
# Load the PyArrow dataset into Polars, this will raise an error AttributeError: 'pyarrow.lib.StructArray' object has no attribute 'num_chunks' | ||
scanner = dataset.scanner( | ||
batch_size=DEFAULT_BATCH_READ_SIZE, | ||
use_threads=True, | ||
fragment_readahead=FRAGMENT_READ_AHEAD, | ||
batch_readahead=BATCH_READ_AHEAD, | ||
) | ||
|
||
for batch in scanner.to_batches(): | ||
df = pl.from_arrow(batch) | ||
self.assertIsInstance(df, pl.DataFrame) |