Skip to content

Commit

Permalink
Tests for indexed readers
Browse files Browse the repository at this point in the history
  • Loading branch information
delucchi-cmu committed Jun 10, 2024
1 parent 4b2e63b commit da0a69c
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 1 deletion.
3 changes: 2 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
from pathlib import Path

import hipscat as hc
import lsdb
Expand Down Expand Up @@ -52,7 +53,7 @@ def storage_options(cloud):
@pytest.fixture
def local_data_dir():
local_data_path = os.path.dirname(__file__)
return os.path.join(local_data_path, "data")
return Path(local_data_path) / "data"


@pytest.fixture
Expand Down
5 changes: 5 additions & 0 deletions tests/data/indexed_files/csv_list_single.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
abfs://hipscat/pytests/hipscat_import/data/small_sky_parts/catalog_00_of_05.csv
abfs://hipscat/pytests/hipscat_import/data/small_sky_parts/catalog_01_of_05.csv
abfs://hipscat/pytests/hipscat_import/data/small_sky_parts/catalog_02_of_05.csv
abfs://hipscat/pytests/hipscat_import/data/small_sky_parts/catalog_03_of_05.csv
abfs://hipscat/pytests/hipscat_import/data/small_sky_parts/catalog_04_of_05.csv
4 changes: 4 additions & 0 deletions tests/data/indexed_files/parquet_list_single.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
abfs://hipscat/pytests/hipscat/data/small_sky_order1/Norder=1/Dir=0/Npix=44.parquet
abfs://hipscat/pytests/hipscat/data/small_sky_order1/Norder=1/Dir=0/Npix=45.parquet
abfs://hipscat/pytests/hipscat/data/small_sky_order1/Norder=1/Dir=0/Npix=46.parquet
abfs://hipscat/pytests/hipscat/data/small_sky_order1/Norder=1/Dir=0/Npix=47.parquet
57 changes: 57 additions & 0 deletions tests/hipscat_import/test_input_readers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from hipscat_import.catalog.file_readers import IndexedCsvReader, IndexedParquetReader


def test_indexed_parquet_reader(storage_options, local_data_dir):
# Chunksize covers all the inputs.
total_chunks = 0
total_len = 0
for frame in IndexedParquetReader(chunksize=10_000, storage_options=storage_options).read(
local_data_dir / "indexed_files" / "parquet_list_single.txt"
):
total_chunks += 1
assert len(frame) == 131
total_len += len(frame)

assert total_chunks == 1
assert total_len == 131

# Requesting a very small chunksize. This will split up reads on the parquet.
total_chunks = 0
total_len = 0
for frame in IndexedParquetReader(chunksize=5, storage_options=storage_options).read(
local_data_dir / "indexed_files" / "parquet_list_single.txt"
):
total_chunks += 1
assert len(frame) <= 5
total_len += len(frame)

assert total_chunks == 28
assert total_len == 131


def test_indexed_csv_reader(storage_options, local_data_dir):
# Chunksize covers all the inputs.
total_chunks = 0
total_len = 0
for frame in IndexedCsvReader(chunksize=10_000, storage_options=storage_options).read(
local_data_dir / "indexed_files" / "csv_list_single.txt"
):
total_chunks += 1
assert len(frame) == 131
total_len += len(frame)

assert total_chunks == 1
assert total_len == 131

# Requesting a very small chunksize. This will split up reads on the parquet.
total_chunks = 0
total_len = 0
for frame in IndexedCsvReader(chunksize=5, storage_options=storage_options).read(
local_data_dir / "indexed_files" / "csv_list_single.txt"
):
total_chunks += 1
assert len(frame) <= 5
total_len += len(frame)

assert total_chunks == 29
assert total_len == 131

0 comments on commit da0a69c

Please sign in to comment.