From ce684cddf65c684554b0bd7fa9fb1fe1949c6136 Mon Sep 17 00:00:00 2001 From: Scott Griffiths Date: Fri, 10 May 2024 13:30:57 +0100 Subject: [PATCH] Optimised find/findall for byte aligned case. Bug #326. --- bitstring/bitstore.py | 18 ++++++++++++++++++ release_notes.txt | 1 + tests/test_bitstream.py | 6 +++--- 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/bitstring/bitstore.py b/bitstring/bitstore.py index f417b3f7..6c01f289 100644 --- a/bitstring/bitstore.py +++ b/bitstring/bitstore.py @@ -131,6 +131,24 @@ def rfind(self, bs: BitStore, start: int, end: int, bytealigned: bool = False): return -1 def findall_msb0(self, bs: BitStore, start: int, end: int, bytealigned: bool = False) -> Iterator[int]: + if bytealigned is True and len(bs) % 8 == 0: + # Special case, looking for whole bytes on whole byte boundaries + bytes_ = bs.tobytes() + # Round up start byte to next byte, and round end byte down. + # We're only looking for whole bytes, so can ignore bits at either end. + start_byte = (start + 7) // 8 + end_byte = end // 8 + b = self._bitarray[start_byte * 8: end_byte * 8].tobytes() + byte_pos = 0 + bytes_to_search = end_byte - start_byte + while byte_pos < bytes_to_search: + byte_pos = b.find(bytes_, byte_pos) + if byte_pos == -1: + break + yield (byte_pos + start_byte) * 8 + byte_pos = byte_pos + 1 + return + # General case i = self._bitarray.itersearch(bs._bitarray, start, end) if not bytealigned: for p in i: diff --git a/release_notes.txt b/release_notes.txt index 64970bd6..4ba2563e 100644 --- a/release_notes.txt +++ b/release_notes.txt @@ -10,6 +10,7 @@ A couple more minor bug fixes. * Sometimes a ValueError was being raised instead of a ReadError. Bug #325. * Initialising a bitstring from None now raises a TypeError rather than generating an empty bitstring. Bug #323. +* Fixed performance regression for find/findall in some situations. Bug #326. ------------------------- April 2024: version 4.2.1 diff --git a/tests/test_bitstream.py b/tests/test_bitstream.py index 6581612e..a0569558 100644 --- a/tests/test_bitstream.py +++ b/tests/test_bitstream.py @@ -131,10 +131,10 @@ def test_find_corner_cases(self): def test_find_bytes(self): s = BitStream.fromstring('0x010203040102ff') - assert not s.find('0x05', bytealigned=True) - assert s.find('0x02', bytealigned=True) + assert s.find('0x05', bytealigned=True) ==() + assert s.find('0x02', bytealigned=True) == (8,) assert s.read(16).hex == '0203' - assert s.find('0x02', start=s.bitpos, bytealigned=True) + assert s.find('0x02', start=s.bitpos, bytealigned=True) == (40,) s.read(1) assert not s.find('0x02', start=s.bitpos, bytealigned=True)