From 8f129c4cc5c5bdc4b6cfe227b4afb06f29cdcdbc Mon Sep 17 00:00:00 2001 From: Adam Reeve Date: Fri, 8 Mar 2024 23:02:56 +1300 Subject: [PATCH] Expose information about incomplete segments --- docs/apireference.rst | 6 ++++ nptdms/reader.py | 4 +-- nptdms/tdms.py | 52 +++++++++++++++++++++++++++++++++++ nptdms/tdms_segment.py | 23 ++++++++-------- nptdms/test/test_tdms_file.py | 9 ++++++ 5 files changed, 81 insertions(+), 13 deletions(-) diff --git a/docs/apireference.rst b/docs/apireference.rst index aa77446..6d889f8 100644 --- a/docs/apireference.rst +++ b/docs/apireference.rst @@ -27,6 +27,12 @@ Reading TDMS Files .. autoclass:: ChannelDataChunk() :members: +.. autoclass:: nptdms.tdms.FileStatus() + :members: + +.. autoclass:: nptdms.tdms.ChannelSegmentStatus() + :members: + Writing TDMS Files ------------------ diff --git a/nptdms/reader.py b/nptdms/reader.py index 12f284b..8c7b031 100644 --- a/nptdms/reader.py +++ b/nptdms/reader.py @@ -275,10 +275,10 @@ def _read_segment_metadata( file, segment_position, is_index_file) segment = TdmsSegment( - position, toc_mask, next_segment_pos, data_position) + position, toc_mask, next_segment_pos, data_position, segment_incomplete) properties = segment.read_segment_objects( - file, self._prev_segment_objects, index_cache, previous_segment, segment_incomplete) + file, self._prev_segment_objects, index_cache, previous_segment) return segment, properties def _read_lead_in(self, file, segment_position, is_index_file=False): diff --git a/nptdms/tdms.py b/nptdms/tdms.py index df2e1cb..e012a82 100644 --- a/nptdms/tdms.py +++ b/nptdms/tdms.py @@ -164,6 +164,37 @@ def properties(self): return self._properties + @property + def file_status(self): + """ Return information about the file status + + :rtype: FileStatus + """ + + # _read_lead_in returns whether last segment incomplete + # passed to TdmsSegment.read_segment_objects + # passed to _calculalte_chunks + # _calculalte_chunks sets final_chunk_lengths_override + # but only if data size is not multiple of chunk size + + incomplete_last_segment = False + channel_statuses = None + if self._reader._segments: + last_segment = self._reader._segments[-1] + incomplete_last_segment = last_segment.segment_incomplete + last_chunk_overrides = last_segment.final_chunk_lengths_override + if last_chunk_overrides: + channel_statuses = dict( + (obj.path, ChannelSegmentStatus(obj.number_values, last_chunk_overrides[obj.path])) + for obj in last_segment.ordered_objects) + elif incomplete_last_segment: + # Data lengths match expected lengths + channel_statuses = dict( + (obj.path, ChannelSegmentStatus(obj.number_values, obj.number_values)) + for obj in last_segment.ordered_objects) + + return FileStatus(incomplete_last_segment, channel_statuses) + def as_dataframe(self, time_index=False, absolute_time=False, scaled_data=True, arrow_dtypes=False): """ Converts the TDMS file to a DataFrame. DataFrame columns are named using the TDMS object paths. @@ -955,6 +986,27 @@ def _data(self): return self._raw_data.data +class FileStatus: + """ + Contains status information about a read TDMS file + """ + def __init__(self, incomplete_final_segment, channel_statuses): + #: Boolean indicating whether the last data segment was not written completely, + #: meaning it may contain less data than expected + self.incomplete_final_segment = incomplete_final_segment + #: Dictionary mapping from channel paths to ChannelSegmentStatus objects + #: when the last segment is incomplete or had an unexpected length + self.channel_statuses = channel_statuses + + +class ChannelSegmentStatus: + def __init__(self, expected_length, read_length): + #: Number of values expected in the segment + self.expected_length = expected_length + #: Number of values read from the segment + self.read_length = read_length + + def _convert_data_chunk(chunk, raw_timestamps): for channel_chunk in chunk.channel_data.values(): _convert_channel_data_chunk(channel_chunk, raw_timestamps) diff --git a/nptdms/tdms_segment.py b/nptdms/tdms_segment.py index 6c753ff..0e6110a 100644 --- a/nptdms/tdms_segment.py +++ b/nptdms/tdms_segment.py @@ -44,9 +44,10 @@ class TdmsSegment(object): 'data_position', 'final_chunk_lengths_override', 'object_index', + 'segment_incomplete', ] - def __init__(self, position, toc_mask, next_segment_pos, data_position): + def __init__(self, position, toc_mask, next_segment_pos, data_position, segment_incomplete): self.position = position self.toc_mask = toc_mask self.next_segment_pos = next_segment_pos @@ -55,11 +56,12 @@ def __init__(self, position, toc_mask, next_segment_pos, data_position): self.final_chunk_lengths_override = None self.ordered_objects = None self.object_index = None + self.segment_incomplete = segment_incomplete def __repr__(self): return "" % self.position - def read_segment_objects(self, file, previous_segment_objects, index_cache, previous_segment, segment_incomplete): + def read_segment_objects(self, file, previous_segment_objects, index_cache, previous_segment): """Read segment metadata section and update object information :param file: Open TDMS file @@ -67,11 +69,10 @@ def read_segment_objects(self, file, previous_segment_objects, index_cache, prev recently read segment object for a TDMS object. :param index_cache: A SegmentIndexCache instance, or None if segment indexes are not required. :param previous_segment: Previous segment in the file. - :param segment_incomplete: Whether the next segment offset was not set. """ if not self.toc_mask & toc_properties['kTocMetaData']: - self._reuse_previous_segment_metadata(previous_segment, segment_incomplete) + self._reuse_previous_segment_metadata(previous_segment) return endianness = '>' if (self.toc_mask & toc_properties['kTocBigEndian']) else '<' @@ -134,7 +135,7 @@ def read_segment_objects(self, file, previous_segment_objects, index_cache, prev if index_cache is not None: self.object_index = index_cache.get_index(self.ordered_objects) - self._calculate_chunks(segment_incomplete) + self._calculate_chunks() return properties def get_segment_object(self, object_path): @@ -194,11 +195,11 @@ def _reuse_previous_object( segment_obj.read_raw_data_index(file, raw_data_index_header, endianness) self.ordered_objects.append(segment_obj) - def _reuse_previous_segment_metadata(self, previous_segment, segment_incomplete): + def _reuse_previous_segment_metadata(self, previous_segment): try: self.ordered_objects = previous_segment.ordered_objects self.object_index = previous_segment.object_index - self._calculate_chunks(segment_incomplete) + self._calculate_chunks() except AttributeError: raise ValueError( "kTocMetaData is not set for segment but " @@ -269,7 +270,7 @@ def read_raw_data_for_channel(self, f, channel_path, chunk_offset=0, num_chunks= for chunk in self._read_channel_data_chunks(f, data_objects, channel_path, chunk_offset, stop_chunk): yield chunk - def _calculate_chunks(self, segment_incomplete): + def _calculate_chunks(self): """ Work out the number of chunks the data is in, for cases where the meta data doesn't change at all so there is no @@ -299,9 +300,9 @@ def _calculate_chunks(self, segment_incomplete): total_data_size, data_size) self.num_chunks = 1 + int(total_data_size // data_size) self.final_chunk_lengths_override = self._compute_final_chunk_lengths( - data_size, chunk_remainder, segment_incomplete) + data_size, chunk_remainder) - def _compute_final_chunk_lengths(self, chunk_size, chunk_remainder, segment_incomplete): + def _compute_final_chunk_lengths(self, chunk_size, chunk_remainder): """Compute object data lengths for a final chunk that has less data than expected """ if self._have_daqmx_objects(): @@ -314,7 +315,7 @@ def _compute_final_chunk_lengths(self, chunk_size, chunk_remainder, segment_inco return obj_chunk_sizes interleaved_data = self.toc_mask & toc_properties['kTocInterleavedData'] - if interleaved_data or not segment_incomplete: + if interleaved_data or not self.segment_incomplete: for obj in self.ordered_objects: if not obj.has_data: continue diff --git a/nptdms/test/test_tdms_file.py b/nptdms/test/test_tdms_file.py index e4b1cc6..47cf913 100644 --- a/nptdms/test/test_tdms_file.py +++ b/nptdms/test/test_tdms_file.py @@ -807,6 +807,15 @@ def test_truncated_interleaved_data(): assert len(chan) == 3 assert len(chan_data) == 3 + file_status = tdms_file.file_status + assert file_status.incomplete_final_segment + chan1_status = file_status.channel_statuses["/'group'/'channel1'"] + assert chan1_status.expected_length == 4 + assert chan1_status.read_length == 3 + chan2_status = file_status.channel_statuses["/'group'/'channel2'"] + assert chan2_status.expected_length == 4 + assert chan2_status.read_length == 3 + def test_truncated_metadata_in_last_segment(): """ Test the scenario where writing the file was aborted with part of the metadata written