Skip to content

Commit

Permalink
Merge pull request #170 from astronomy-commons/parquet_batch_read
Browse files Browse the repository at this point in the history
Read parquet lazily
  • Loading branch information
hombit authored Nov 29, 2023
2 parents b2fe8ac + a41b7f7 commit 4dae8de
Showing 1 changed file with 3 additions and 4 deletions.
7 changes: 3 additions & 4 deletions src/hipscat_import/catalog/file_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import abc

import pyarrow as pa
import pyarrow.parquet as pq
from astropy.table import Table
from hipscat.io import FilePointer, file_io
Expand Down Expand Up @@ -258,9 +257,9 @@ def __init__(self, chunksize=500_000, **kwargs):

def read(self, input_file):
self.regular_file_exists(input_file)
parquet_file = pq.read_table(input_file, **self.kwargs)
for smaller_table in parquet_file.to_batches(max_chunksize=self.chunksize):
yield pa.Table.from_batches([smaller_table]).to_pandas()
parquet_file = pq.ParquetFile(input_file, **self.kwargs)
for smaller_table in parquet_file.iter_batches(batch_size=self.chunksize, use_pandas_metadata=True):
yield smaller_table.to_pandas()

def provenance_info(self) -> dict:
provenance_info = {
Expand Down

0 comments on commit 4dae8de

Please sign in to comment.