Skip to content

Commit

Permalink
add empty test0.parquet + inspect test
Browse files Browse the repository at this point in the history
  • Loading branch information
ryan-williams committed Dec 31, 2023
1 parent b876f7f commit 0c1fe4e
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 1 deletion.
56 changes: 56 additions & 0 deletions tests/test0_inspect.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@

############ file meta data ############
created_by: parquet-cpp-arrow version 14.0.2
num_columns: 4
num_rows: 0
num_row_groups: 1
format_version: 2.6
serialized_size: 2398


############ Columns ############
a
b
c
d

############ Column(a) ############
name: a
path: a
max_definition_level: 1
max_repetition_level: 0
physical_type: DOUBLE
logical_type: None
converted_type (legacy): NONE
compression: SNAPPY (space_saved: -7%)

############ Column(b) ############
name: b
path: b
max_definition_level: 1
max_repetition_level: 0
physical_type: INT32
logical_type: Null
converted_type (legacy): NONE
compression: SNAPPY (space_saved: -7%)

############ Column(c) ############
name: c
path: c
max_definition_level: 1
max_repetition_level: 0
physical_type: INT64
logical_type: None
converted_type (legacy): NONE
compression: SNAPPY (space_saved: -7%)

############ Column(d) ############
name: d
path: d
max_definition_level: 1
max_repetition_level: 0
physical_type: BOOLEAN
logical_type: None
converted_type (legacy): NONE
compression: SNAPPY (space_saved: N/A)

14 changes: 14 additions & 0 deletions tests/test_parquet.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
from os import path
from os.path import dirname

from subprocess import check_output

from parquet_tools.parquet.reader import get_filemetadata
from parquet_tools.gen_py.parquet.ttypes import (FileMetaData, SchemaElement, LogicalType, StringType, RowGroup, ColumnMetaData, ColumnChunk,
Statistics, PageEncodingStats, KeyValue)
Expand Down Expand Up @@ -216,3 +221,12 @@ def test_key_value_metadata(self, fmd):
key='ARROW:schema',
value='/////4gDAAAQAAAAAAAKAA4ABgAFAAgACgAAAAABBAAQAAAAAAAKAAwAAAAEAAgACgAAALgCAAAEAAAAAQAAAAwAAAAIAAwABAAIAAgAAACQAgAABAAAAIECAAB7ImluZGV4X2NvbHVtbnMiOiBbeyJraW5kIjogInJhbmdlIiwgIm5hbWUiOiBudWxsLCAic3RhcnQiOiAwLCAic3RvcCI6IDMsICJzdGVwIjogMX1dLCAiY29sdW1uX2luZGV4ZXMiOiBbeyJuYW1lIjogbnVsbCwgImZpZWxkX25hbWUiOiBudWxsLCAicGFuZGFzX3R5cGUiOiAidW5pY29kZSIsICJudW1weV90eXBlIjogIm9iamVjdCIsICJtZXRhZGF0YSI6IHsiZW5jb2RpbmciOiAiVVRGLTgifX1dLCAiY29sdW1ucyI6IFt7Im5hbWUiOiAib25lIiwgImZpZWxkX25hbWUiOiAib25lIiwgInBhbmRhc190eXBlIjogImZsb2F0NjQiLCAibnVtcHlfdHlwZSI6ICJmbG9hdDY0IiwgIm1ldGFkYXRhIjogbnVsbH0sIHsibmFtZSI6ICJ0d28iLCAiZmllbGRfbmFtZSI6ICJ0d28iLCAicGFuZGFzX3R5cGUiOiAidW5pY29kZSIsICJudW1weV90eXBlIjogIm9iamVjdCIsICJtZXRhZGF0YSI6IG51bGx9LCB7Im5hbWUiOiAidGhyZWUiLCAiZmllbGRfbmFtZSI6ICJ0aHJlZSIsICJwYW5kYXNfdHlwZSI6ICJib29sIiwgIm51bXB5X3R5cGUiOiAiYm9vbCIsICJtZXRhZGF0YSI6IG51bGx9XSwgImNyZWF0b3IiOiB7ImxpYnJhcnkiOiAicHlhcnJvdyIsICJ2ZXJzaW9uIjogIjE0LjAuMiJ9LCAicGFuZGFzX3ZlcnNpb24iOiAiMi4xLjQifQAAAAYAAABwYW5kYXMAAAMAAABsAAAAMAAAAAQAAACw////AAABBhAAAAAYAAAABAAAAAAAAAAFAAAAdGhyZWUAAADc////2P///wAAAQUQAAAAGAAAAAQAAAAAAAAAAwAAAHR3bwAEAAQABAAAABAAFAAIAAYABwAMAAAAEAAQAAAAAAABAxAAAAAcAAAABAAAAAAAAAADAAAAb25lAAAABgAIAAYABgAAAAAAAgAAAAAA')
]

def test_inspect(self):
tests_dir = dirname(__file__)
test0_parquet_path = path.join(tests_dir, 'test0.parquet')
actual = check_output(['parquet-tools', 'inspect', test0_parquet_path]).decode()
expected_path = path.join(tests_dir, 'test0_inspect.txt')
with open(expected_path, 'r') as f:
expected = f.read()
assert actual == expected
2 changes: 2 additions & 0 deletions tests/test_parquets.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ def write_test_dataframes():
tests_dir = dirname(__file__)
df.to_parquet(f'{tests_dir}/test1.parquet')
df.to_parquet(f'{tests_dir}/test2.parquet')
df0 = pd.DataFrame({ 'a': [], 'b': [], 'c': [], 'd': [], }).astype({ 'a': float, 'b': str, 'c': int, 'd': bool })
df0.to_parquet(f'{tests_dir}/test0.parquet')


if __name__ == '__main__':
Expand Down
5 changes: 4 additions & 1 deletion tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def test_is_wildcard(self, pf, expected):
@pytest.mark.parametrize('pf, expected', [
(
LocalParquetFile(path='./tests/*.parquet'), [
LocalParquetFile('./tests/test0.parquet'),
LocalParquetFile('./tests/test1.parquet'),
LocalParquetFile('./tests/test2.parquet')
]
Expand Down Expand Up @@ -82,11 +83,13 @@ def test_multiple_localfile(self):
LocalParquetFile(path='./tests/*.parquet'),
)

assert len(actual) == 2
assert len(actual) == 3
assert isinstance(actual[0], LocalParquetFile)
assert isinstance(actual[1], LocalParquetFile)
assert isinstance(actual[1], LocalParquetFile)

assert {a.path for a in actual} == {
'./tests/test0.parquet',
'./tests/test1.parquet',
'./tests/test2.parquet',
}
Expand Down

0 comments on commit 0c1fe4e

Please sign in to comment.