From 18b8f08acfd4b82f6e04d93381bc5511ee3d4435 Mon Sep 17 00:00:00 2001 From: Adam Reeve Date: Mon, 6 Nov 2023 12:32:08 +1300 Subject: [PATCH] Add extra tests for Pandas export --- nptdms/export/pandas_export.py | 2 +- nptdms/test/test_pandas.py | 80 ++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 1 deletion(-) diff --git a/nptdms/export/pandas_export.py b/nptdms/export/pandas_export.py index 1d15d4f..796ea38 100644 --- a/nptdms/export/pandas_export.py +++ b/nptdms/export/pandas_export.py @@ -114,7 +114,7 @@ def _channels_to_dataframe( def _array_for_pd(array): """ Convert data array to a format suitable for a Pandas dataframe """ - if np.issubdtype(array.dtype, np.dtype('void')): + if np.issubdtype(array.dtype, np.dtype('void')) and len(array.dtype) == 0: # If dtype is void then the array must also be empty. # Pandas doesn't like void data types, so these are converted to empty float64 arrays # and Pandas will fill values with NaN diff --git a/nptdms/test/test_pandas.py b/nptdms/test/test_pandas.py index 22c3f21..564aee4 100644 --- a/nptdms/test/test_pandas.py +++ b/nptdms/test/test_pandas.py @@ -352,6 +352,86 @@ def check_series(series): check_series(channel_df["/'Group'/'Channel1'"]) +@pytest.mark.parametrize('arrow_dtypes', [False, True]) +def test_bool_data_to_pandas(arrow_dtypes): + test_file, expected_data = scenarios.bool_data().values + df = test_file.load()['group'].as_dataframe(arrow_dtypes=arrow_dtypes) + np.testing.assert_allclose(df['bool_channel'], expected_data[('group', 'bool_channel')]) + + +@pytest.mark.parametrize('arrow_dtypes', [False, True]) +def test_string_data_to_pandas(arrow_dtypes): + strings = ["abcdefg", "qwertyuiop"] + + test_file = GeneratedFile() + toc = ("kTocMetaData", "kTocRawData", "kTocNewObjList") + metadata = ( + # Number of objects + "01 00 00 00" + # Length of the object path + "18 00 00 00") + metadata += string_hexlify("/'Group'/'StringChannel'") + metadata += ( + # Length of index information + "1C 00 00 00" + # Raw data data type + "20 00 00 00" + # Dimension + "01 00 00 00" + # Number of raw data values + "02 00 00 00" + "00 00 00 00" + # Number of bytes in data + "19 00 00 00" + "00 00 00 00" + # Number of properties (0) + "00 00 00 00") + data = ( + "07 00 00 00" # index to after first string + "11 00 00 00" # index to after second string + ) + for string in strings: + data += string_hexlify(string) + test_file.add_segment(toc, metadata, data) + tdms_data = test_file.load() + + series = tdms_data["Group"].as_dataframe(arrow_dtypes=arrow_dtypes)["StringChannel"] + + assert len(series) == len(strings) + for expected, read in zip(strings, series): + assert expected == read + + +def test_dataframe_with_complex_data(): + test_file, expected_data = scenarios.complex_data().values + df = test_file.load()['group'].as_dataframe() + np.testing.assert_allclose(df['complex_single_channel'], expected_data[('group', 'complex_single_channel')]) + np.testing.assert_allclose(df['complex_double_channel'], expected_data[('group', 'complex_double_channel')]) + + +def test_dataframe_with_raw_timestamp_data(): + test_file = GeneratedFile() + seconds = 3672033330 + second_fractions = 1234567890 * 10 ** 10 + test_file.add_segment( + ("kTocMetaData", "kTocRawData", "kTocNewObjList"), + segment_objects_metadata( + channel_metadata("/'group'/'channel1'", 0x44, 4), + ), + hexlify_value("