diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 7236225..5b38d19 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.9', '3.10', '3.11', '3.12', '3.13'] + python-version: ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13'] steps: - uses: actions/checkout@v4 diff --git a/probables/__init__.py b/probables/__init__.py index 532cb8e..0314018 100644 --- a/probables/__init__.py +++ b/probables/__init__.py @@ -1,5 +1,7 @@ """pyprobables module""" +from __future__ import annotations + from probables.blooms import ( BloomFilter, BloomFilterOnDisk, diff --git a/probables/blooms/bloom.py b/probables/blooms/bloom.py index b2fe918..7a44381 100644 --- a/probables/blooms/bloom.py +++ b/probables/blooms/bloom.py @@ -4,6 +4,8 @@ URL: https://github.com/barrust/bloom """ +from __future__ import annotations + import math import os from array import array @@ -68,11 +70,11 @@ class BloomFilter: def __init__( self, - est_elements: Union[int, None] = None, - false_positive_rate: Union[float, None] = None, - filepath: Union[str, Path, None] = None, - hex_string: Union[str, None] = None, - hash_function: Union[HashFuncT, None] = None, + est_elements: int | None = None, + false_positive_rate: float | None = None, + filepath: str | Path | None = None, + hex_string: str | None = None, + hash_function: HashFuncT | None = None, ): # set some things up self._on_disk = False @@ -110,7 +112,7 @@ def _load_init(self, filepath, hash_function, hex_string, est_elements, false_po _FPR_STRUCT = Struct("f") _IMPT_STRUCT = Struct("B") - def __contains__(self, key: KeyT) -> Union[int, bool]: + def __contains__(self, key: KeyT) -> int | bool: """setup the `in` keyword""" return self.check(key) @@ -220,7 +222,7 @@ def clear(self) -> None: for idx in range(self._bloom_length): self._bloom[idx] = 0 - def hashes(self, key: KeyT, depth: Union[int, None] = None) -> HashResultsT: + def hashes(self, key: KeyT, depth: int | None = None) -> HashResultsT: """Return the hashes based on the provided key Args: @@ -284,7 +286,7 @@ def export_hex(self) -> str: bytes_string = hexlify(bytearray(self._bloom[: self.bloom_length])) + hexlify(footer_bytes) return str(bytes_string, "utf-8") - def export(self, file: Union[Path, str, IOBase, mmap]) -> None: + def export(self, file: Path | str | IOBase | mmap) -> None: """Export the Bloom Filter to disk Args: @@ -303,7 +305,7 @@ def export(self, file: Union[Path, str, IOBase, mmap]) -> None: ) ) - def export_c_header(self, filename: Union[str, Path]) -> None: + def export_c_header(self, filename: str | Path) -> None: """Export the Bloom Filter to disk as a C header file. Args: @@ -322,7 +324,7 @@ def export_c_header(self, filename: Union[str, Path]) -> None: print("const unsigned char bloom[] = {", *data, "};", sep="\n", file=file) @classmethod - def frombytes(cls, b: ByteString, hash_function: Union[HashFuncT, None] = None) -> "BloomFilter": + def frombytes(cls, b: ByteString, hash_function: HashFuncT | None = None) -> BloomFilter: """ Args: b (ByteString): The bytes to load as a Bloom Filter @@ -368,7 +370,7 @@ def current_false_positive_rate(self) -> float: exp = math.exp(dbl) return math.pow((1 - exp), self.number_hashes) - def intersection(self, second: SimpleBloomT) -> Union[SimpleBloomT, None]: + def intersection(self, second: SimpleBloomT) -> SimpleBloomT | None: """Return a new Bloom Filter that contains the intersection of the two @@ -399,7 +401,7 @@ def intersection(self, second: SimpleBloomT) -> Union[SimpleBloomT, None]: res.elements_added = res.estimate_elements() return res - def union(self, second: SimpleBloomT) -> Union["BloomFilter", None]: + def union(self, second: SimpleBloomT) -> BloomFilter | None: """Return a new Bloom Filter that contains the union of the two Args: @@ -429,7 +431,7 @@ def union(self, second: SimpleBloomT) -> Union["BloomFilter", None]: res.elements_added = res.estimate_elements() return res - def jaccard_index(self, second: SimpleBloomT) -> Union[float, None]: + def jaccard_index(self, second: SimpleBloomT) -> float | None: """Calculate the jaccard similarity score between two Bloom Filters Args: @@ -491,7 +493,7 @@ def _set_values( fpr: float, n_hashes: int, n_bits: int, - hash_func: Union[HashFuncT, None], + hash_func: HashFuncT | None, ) -> None: self._est_elements = est_els self._fpr = fpr @@ -504,7 +506,7 @@ def _set_values( self._number_hashes = n_hashes self._num_bits = n_bits - def _load_hex(self, hex_string: str, hash_function: Union[HashFuncT, None] = None) -> None: + def _load_hex(self, hex_string: str, hash_function: HashFuncT | None = None) -> None: """placeholder for loading from hex string""" offset = self._FOOTER_STRUCT_BE.size * 2 est_els, els_added, fpr, n_hashes, n_bits = self._parse_footer( @@ -516,8 +518,8 @@ def _load_hex(self, hex_string: str, hash_function: Union[HashFuncT, None] = Non def _load( self, - file: Union[Path, str, IOBase, mmap, ByteString], - hash_function: Union[HashFuncT, None] = None, + file: Path | str | IOBase | mmap | ByteString, + hash_function: HashFuncT | None = None, ) -> None: """load the Bloom Filter from file or bytes""" if not isinstance(file, (IOBase, mmap, bytes, bytearray, memoryview)): @@ -597,11 +599,11 @@ class BloomFilterOnDisk(BloomFilter): def __init__( self, - filepath: Union[str, Path], - est_elements: Union[int, None] = None, - false_positive_rate: Union[float, None] = None, - hex_string: Union[str, None] = None, - hash_function: Union[HashFuncT, None] = None, + filepath: str | Path, + est_elements: int | None = None, + false_positive_rate: float | None = None, + hex_string: str | None = None, + hash_function: HashFuncT | None = None, ) -> None: # set some things up self._filepath = resolve_path(filepath) @@ -644,7 +646,7 @@ def close(self) -> None: self.__file_pointer.close() self.__file_pointer = None - def export(self, file: Union[str, Path]) -> None: # type: ignore + def export(self, file: str | Path) -> None: # type: ignore """Export to disk if a different location Args: @@ -656,7 +658,7 @@ def export(self, file: Union[str, Path]) -> None: # type: ignore copyfile(self._filepath.name, str(file)) # otherwise, nothing to do! - def _load(self, file: Union[str, Path], hash_function: Union[HashFuncT, None] = None): # type: ignore + def _load(self, file: str | Path, hash_function: HashFuncT | None = None): # type: ignore """load the Bloom Filter on disk""" # read the file, set the optimal params # mmap everything @@ -678,7 +680,7 @@ def add_alt(self, hashes: HashResultsT) -> None: self.__update() @classmethod - def frombytes(cls, b: ByteString, hash_function: Union[HashFuncT, None] = None) -> "BloomFilterOnDisk": + def frombytes(cls, b: ByteString, hash_function: HashFuncT | None = None) -> BloomFilterOnDisk: """ Raises: NotSupportedError """ diff --git a/probables/blooms/countingbloom.py b/probables/blooms/countingbloom.py index aa9a478..c3e6bd8 100644 --- a/probables/blooms/countingbloom.py +++ b/probables/blooms/countingbloom.py @@ -1,13 +1,15 @@ -""" CountingBloomFilter, python implementation - License: MIT - Author: Tyler Barrus (barrust@gmail.com) - URL: https://github.com/barrust/counting_bloom +"""CountingBloomFilter, python implementation +License: MIT +Author: Tyler Barrus (barrust@gmail.com) +URL: https://github.com/barrust/counting_bloom """ + +from __future__ import annotations + from array import array from collections.abc import ByteString from pathlib import Path from struct import Struct -from typing import Union from probables.blooms.bloom import BloomFilter from probables.constants import UINT32_T_MAX, UINT64_T_MAX @@ -18,7 +20,7 @@ MISMATCH_MSG = "The parameter second must be of type CountingBloomFilter" -def _verify_not_type_mismatch(second: "CountingBloomFilter") -> bool: +def _verify_not_type_mismatch(second: CountingBloomFilter) -> bool: """verify that there is not a type mismatch""" return isinstance(second, (CountingBloomFilter)) @@ -47,11 +49,11 @@ class CountingBloomFilter(BloomFilter): def __init__( self, - est_elements: Union[int, None] = None, - false_positive_rate: Union[float, None] = None, - filepath: Union[str, Path, None] = None, - hex_string: Union[str, None] = None, - hash_function: Union[HashFuncT, None] = None, + est_elements: int | None = None, + false_positive_rate: float | None = None, + filepath: str | Path | None = None, + hex_string: str | None = None, + hash_function: HashFuncT | None = None, ) -> None: """setup the basic values needed""" self._filepath = None @@ -80,7 +82,7 @@ def _load_init(self, filepath, hash_function, hex_string, est_elements, false_po _IMPT_STRUCT = Struct("I") @classmethod - def frombytes(cls, b: ByteString, hash_function: Union[HashFuncT, None] = None) -> "CountingBloomFilter": + def frombytes(cls, b: ByteString, hash_function: HashFuncT | None = None) -> CountingBloomFilter: """ Args: b (ByteString): the bytes to load as a Counting Bloom Filter @@ -207,7 +209,7 @@ def remove_alt(self, hashes: HashResultsT, num_els: int = 1) -> int: self.elements_added -= to_remove return min_val - to_remove - def intersection(self, second: "CountingBloomFilter") -> Union["CountingBloomFilter", None]: # type: ignore + def intersection(self, second: CountingBloomFilter) -> CountingBloomFilter | None: # type: ignore """Take the intersection of two Counting Bloom Filters Args: @@ -240,7 +242,7 @@ def intersection(self, second: "CountingBloomFilter") -> Union["CountingBloomFil res.elements_added = res.estimate_elements() return res - def jaccard_index(self, second: "CountingBloomFilter") -> Union[float, None]: # type:ignore + def jaccard_index(self, second: CountingBloomFilter) -> float | None: # type:ignore """Take the Jaccard Index of two Counting Bloom Filters Args: @@ -270,7 +272,7 @@ def jaccard_index(self, second: "CountingBloomFilter") -> Union[float, None]: # return 1.0 return count_inter / count_union - def union(self, second: "CountingBloomFilter") -> Union["CountingBloomFilter", None]: # type:ignore + def union(self, second: CountingBloomFilter) -> CountingBloomFilter | None: # type:ignore """Return a new Countiong Bloom Filter that contains the union of the two diff --git a/probables/blooms/expandingbloom.py b/probables/blooms/expandingbloom.py index a1cf268..8168bf3 100644 --- a/probables/blooms/expandingbloom.py +++ b/probables/blooms/expandingbloom.py @@ -4,13 +4,14 @@ URL: https://github.com/barrust/pyprobables """ +from __future__ import annotations + from array import array from collections.abc import ByteString from io import BytesIO, IOBase from mmap import mmap from pathlib import Path from struct import Struct -from typing import Union from probables.blooms.bloom import BloomFilter from probables.exceptions import RotatingBloomFilterError @@ -46,10 +47,10 @@ class ExpandingBloomFilter: def __init__( self, - est_elements: Union[int, None] = None, - false_positive_rate: Union[float, None] = None, - filepath: Union[str, Path, None] = None, - hash_function: Union[HashFuncT, None] = None, + est_elements: int | None = None, + false_positive_rate: float | None = None, + filepath: str | Path | None = None, + hash_function: HashFuncT | None = None, ): """initialize""" self._blooms = [] # type: ignore @@ -74,7 +75,7 @@ def __init__( _BLOOM_ELEMENT_SIZE = Struct("B").size @classmethod - def frombytes(cls, b: ByteString, hash_function: Union[HashFuncT, None] = None) -> "ExpandingBloomFilter": + def frombytes(cls, b: ByteString, hash_function: HashFuncT | None = None) -> ExpandingBloomFilter: """ Args: b (ByteString): The bytes to load as a Expanding Bloom Filter @@ -184,7 +185,7 @@ def __check_for_growth(self): if self._blooms[-1].elements_added >= self.__est_elements: self.__add_bloom_filter() - def export(self, file: Union[Path, str, IOBase, mmap]) -> None: + def export(self, file: Path | str | IOBase | mmap) -> None: """Export an expanding Bloom Filter, or subclass, to disk Args: @@ -208,7 +209,7 @@ def export(self, file: Union[Path, str, IOBase, mmap]) -> None: ) ) - def __load(self, file: Union[Path, str, IOBase, mmap]): + def __load(self, file: Path | str | IOBase | mmap): """load a file""" if not isinstance(file, (IOBase, mmap)): file = resolve_path(file) @@ -273,11 +274,11 @@ class RotatingBloomFilter(ExpandingBloomFilter): def __init__( self, - est_elements: Union[int, None] = None, - false_positive_rate: Union[float, None] = None, + est_elements: int | None = None, + false_positive_rate: float | None = None, max_queue_size: int = 10, - filepath: Union[str, Path, None] = None, - hash_function: Union[HashFuncT, None] = None, + filepath: str | Path | None = None, + hash_function: HashFuncT | None = None, ) -> None: """initialize""" super().__init__( @@ -290,8 +291,8 @@ def __init__( @classmethod def frombytes( # type:ignore - cls, b: ByteString, max_queue_size: int, hash_function: Union[HashFuncT, None] = None - ) -> "RotatingBloomFilter": + cls, b: ByteString, max_queue_size: int, hash_function: HashFuncT | None = None + ) -> RotatingBloomFilter: """ Args: b (ByteString): The bytes to load as a Expanding Bloom Filter diff --git a/probables/countminsketch/countminsketch.py b/probables/countminsketch/countminsketch.py index 98a5611..4cb5eb6 100644 --- a/probables/countminsketch/countminsketch.py +++ b/probables/countminsketch/countminsketch.py @@ -4,6 +4,8 @@ URL: https://github.com/barrust/count-min-sketch """ +from __future__ import annotations + import math from array import array from collections.abc import ByteString @@ -12,7 +14,6 @@ from numbers import Number from pathlib import Path from struct import Struct -from typing import Union from probables.constants import INT32_T_MAX, INT32_T_MIN, INT64_T_MAX, INT64_T_MIN from probables.exceptions import CountMinSketchError, InitializationError, NotSupportedError @@ -59,12 +60,12 @@ class CountMinSketch: def __init__( self, - width: Union[int, None] = None, - depth: Union[int, None] = None, - confidence: Union[float, None] = None, - error_rate: Union[float, None] = None, - filepath: Union[str, Path, None] = None, - hash_function: Union[HashFuncT, None] = None, + width: int | None = None, + depth: int | None = None, + confidence: float | None = None, + error_rate: float | None = None, + filepath: str | Path | None = None, + hash_function: HashFuncT | None = None, ) -> None: """default initilization function""" # default values @@ -152,7 +153,7 @@ def __bytes__(self) -> bytes: return f.getvalue() @classmethod - def frombytes(cls, b: ByteString, hash_function: Union[HashFuncT, None] = None) -> "CountMinSketch": + def frombytes(cls, b: ByteString, hash_function: HashFuncT | None = None) -> CountMinSketch: """ Args: b (ByteString): The bytes to load as a Count-Min Sketch @@ -244,7 +245,7 @@ def clear(self) -> None: for i, _ in enumerate(self._bins): self._bins[i] = 0 - def hashes(self, key: KeyT, depth: Union[int, None] = None) -> HashResultsT: + def hashes(self, key: KeyT, depth: int | None = None) -> HashResultsT: """Return the hashes based on the provided key Args: @@ -340,7 +341,7 @@ def check_alt(self, hashes: HashResultsT) -> int: bins = [(val % self.width) + (i * self.width) for i, val in enumerate(hashes)] return self.__query_method(sorted([self._bins[i] for i in bins])) - def export(self, file: Union[Path, str, IOBase, mmap]) -> None: + def export(self, file: Path | str | IOBase | mmap) -> None: """Export the count-min sketch to disk Args: @@ -354,7 +355,7 @@ def export(self, file: Union[Path, str, IOBase, mmap]) -> None: self._bins.tofile(file) # type: ignore file.write(self.__FOOTER_STRUCT.pack(self.width, self.depth, self.elements_added)) - def join(self, second: "CountMinSketch") -> None: + def join(self, second: CountMinSketch) -> None: """Join two count-min sketchs into a single count-min sketch; the calling count-min sketch will have the resulting combined data @@ -399,7 +400,7 @@ def join(self, second: "CountMinSketch") -> None: elif self.elements_added < INT64_T_MIN: self.__elements_added = INT64_T_MIN - def __load(self, file: Union[Path, str, IOBase, mmap]): + def __load(self, file: Path | str | IOBase | mmap): """load the count-min sketch from file""" if not isinstance(file, (IOBase, mmap)): file = resolve_path(file) @@ -481,12 +482,12 @@ class CountMeanSketch(CountMinSketch): def __init__( self, - width: Union[int, None] = None, - depth: Union[int, None] = None, - confidence: Union[float, None] = None, - error_rate: Union[float, None] = None, - filepath: Union[str, Path, None] = None, - hash_function: Union[HashFuncT, None] = None, + width: int | None = None, + depth: int | None = None, + confidence: float | None = None, + error_rate: float | None = None, + filepath: str | Path | None = None, + hash_function: HashFuncT | None = None, ) -> None: super().__init__(width, depth, confidence, error_rate, filepath, hash_function) self.query_type = "mean" @@ -519,12 +520,12 @@ class CountMeanMinSketch(CountMinSketch): def __init__( self, - width: Union[int, None] = None, - depth: Union[int, None] = None, - confidence: Union[float, None] = None, - error_rate: Union[float, None] = None, - filepath: Union[str, Path, None] = None, - hash_function: Union[HashFuncT, None] = None, + width: int | None = None, + depth: int | None = None, + confidence: float | None = None, + error_rate: float | None = None, + filepath: str | Path | None = None, + hash_function: HashFuncT | None = None, ) -> None: super().__init__(width, depth, confidence, error_rate, filepath, hash_function) self.query_type = "mean-min" @@ -560,12 +561,12 @@ class HeavyHitters(CountMinSketch): def __init__( self, num_hitters: int = 100, - width: Union[int, None] = None, - depth: Union[int, None] = None, - confidence: Union[float, None] = None, - error_rate: Union[float, None] = None, - filepath: Union[str, Path, None] = None, - hash_function: Union[HashFuncT, None] = None, + width: int | None = None, + depth: int | None = None, + confidence: float | None = None, + error_rate: float | None = None, + filepath: str | Path | None = None, + hash_function: HashFuncT | None = None, ) -> None: super().__init__(width, depth, confidence, error_rate, filepath, hash_function) self.__top_x = {} # type: ignore @@ -575,8 +576,8 @@ def __init__( @classmethod def frombytes( # type: ignore - cls, b: ByteString, num_hitters: int = 100, hash_function: Union[HashFuncT, None] = None - ) -> "HeavyHitters": + cls, b: ByteString, num_hitters: int = 100, hash_function: HashFuncT | None = None + ) -> HeavyHitters: """ Args: b (ByteString): The bytes to load as a Expanding Bloom Filter @@ -683,7 +684,7 @@ def clear(self) -> None: self.__top_x_size = 0 self.__smallest = 0 - def join(self, second: "HeavyHitters"): # type: ignore + def join(self, second: HeavyHitters): # type: ignore """Join is not supported by HeavyHitters Raises: @@ -721,12 +722,12 @@ class StreamThreshold(CountMinSketch): def __init__( self, threshold: int = 100, - width: Union[int, None] = None, - depth: Union[int, None] = None, - confidence: Union[float, None] = None, - error_rate: Union[float, None] = None, - filepath: Union[str, Path, None] = None, - hash_function: Union[HashFuncT, None] = None, + width: int | None = None, + depth: int | None = None, + confidence: float | None = None, + error_rate: float | None = None, + filepath: str | Path | None = None, + hash_function: HashFuncT | None = None, ) -> None: super().__init__(width, depth, confidence, error_rate, filepath, hash_function) self.__threshold = threshold @@ -734,8 +735,8 @@ def __init__( @classmethod def frombytes( # type: ignore - cls, b: ByteString, threshold: int = 100, hash_function: Union[HashFuncT, None] = None - ) -> "StreamThreshold": + cls, b: ByteString, threshold: int = 100, hash_function: HashFuncT | None = None + ) -> StreamThreshold: """ Args: b (ByteString): The bytes to load as a Expanding Bloom Filter @@ -835,7 +836,7 @@ def remove_alt(self, key: str, hashes: HashResultsT, num_els: int = 1) -> int: self.__meets_threshold[key] = res return res - def join(self, second: "StreamThreshold"): # type: ignore + def join(self, second: StreamThreshold): # type: ignore """Join is not supported by StreamThreshold Raises: diff --git a/probables/cuckoo/countingcuckoo.py b/probables/cuckoo/countingcuckoo.py index e073757..5db9e97 100644 --- a/probables/cuckoo/countingcuckoo.py +++ b/probables/cuckoo/countingcuckoo.py @@ -3,6 +3,8 @@ Author: Tyler Barrus (barrust@gmail.com) """ +from __future__ import annotations + import random from array import array from collections.abc import ByteString @@ -10,7 +12,6 @@ from mmap import mmap from pathlib import Path from struct import Struct -from typing import Union from probables.cuckoo.cuckoo import CuckooFilter from probables.exceptions import CuckooFilterFullError @@ -44,8 +45,8 @@ def __init__( expansion_rate: int = 2, auto_expand: bool = True, finger_size: int = 4, - filepath: Union[str, Path, None] = None, - hash_function: Union[SimpleHashT, None] = None, + filepath: str | Path | None = None, + hash_function: SimpleHashT | None = None, ) -> None: """setup the data structure""" self.__unique_elements = 0 @@ -72,7 +73,7 @@ def init_error_rate( max_swaps: int = 500, expansion_rate: int = 2, auto_expand: bool = True, - hash_function: Union[SimpleHashT, None] = None, + hash_function: SimpleHashT | None = None, ): """Initialize a simple Cuckoo Filter based on error rate @@ -99,7 +100,7 @@ def init_error_rate( @classmethod def load_error_rate( - cls, error_rate: float, filepath: Union[str, Path], hash_function: Union[SimpleHashT, None] = None + cls, error_rate: float, filepath: str | Path, hash_function: SimpleHashT | None = None ): """Initialize a previously exported Cuckoo Filter based on error rate @@ -118,8 +119,8 @@ def load_error_rate( @classmethod def frombytes( - cls, b: ByteString, error_rate: Union[float, None] = None, hash_function: Union[SimpleHashT, None] = None - ) -> "CountingCuckooFilter": + cls, b: ByteString, error_rate: float | None = None, hash_function: SimpleHashT | None = None + ) -> CountingCuckooFilter: """ Args: b (ByteString): The bytes to load as a Expanding Bloom Filter @@ -144,7 +145,7 @@ def unique_elements(self) -> int: return self.__unique_elements @property - def buckets(self) -> list[list["CountingCuckooBin"]]: # type: ignore + def buckets(self) -> list[list[CountingCuckooBin]]: # type: ignore """list(list): The buckets holding the fingerprints Note: @@ -215,7 +216,7 @@ def expand(self): """Expand the cuckoo filter""" self._expand_logic(None) - def export(self, file: Union[Path, str, IOBase, mmap]) -> None: + def export(self, file: Path | str | IOBase | mmap) -> None: """Export cuckoo filter to file Args: @@ -231,7 +232,7 @@ def export(self, file: Union[Path, str, IOBase, mmap]) -> None: def _insert_fingerprint_alt( self, fingerprint: int, idx_1: int, idx_2: int, count: int = 1 - ) -> Union["CountingCuckooBin", None]: + ) -> CountingCuckooBin | None: """insert a fingerprint, but with a count parameter!""" if self.__insert_element(fingerprint, idx_1, count): self._inserted_elements += 1 @@ -266,7 +267,7 @@ def _insert_fingerprint_alt( # if we got here we have an error... we might need to know what is left return prv_bin - def _check_if_present(self, idx_1: int, idx_2: int, fingerprint: int) -> Union[int, None]: + def _check_if_present(self, idx_1: int, idx_2: int, fingerprint: int) -> int | None: """wrapper for checking if fingerprint is already inserted""" if fingerprint in [x.finger for x in self.buckets[idx_1]]: return idx_1 @@ -274,7 +275,7 @@ def _check_if_present(self, idx_1: int, idx_2: int, fingerprint: int) -> Union[i return idx_2 return None - def _load(self, file: Union[Path, str, IOBase, mmap, bytes, ByteString]) -> None: + def _load(self, file: Path | str | IOBase | mmap | bytes | ByteString) -> None: """load a cuckoo filter from file""" if not isinstance(file, (IOBase, mmap, bytes, bytearray, memoryview)): file = resolve_path(file) @@ -304,7 +305,7 @@ def _parse_buckets(self, d: ByteString) -> None: start = end end += bin_size - def _expand_logic(self, extra_fingerprint: "CountingCuckooBin") -> None: + def _expand_logic(self, extra_fingerprint: CountingCuckooBin) -> None: """the logic to acutally expand the cuckoo filter""" # get all the fingerprints fingerprints = self._setup_expand(extra_fingerprint) diff --git a/probables/cuckoo/cuckoo.py b/probables/cuckoo/cuckoo.py index cbc8136..1e7221b 100644 --- a/probables/cuckoo/cuckoo.py +++ b/probables/cuckoo/cuckoo.py @@ -3,6 +3,8 @@ Author: Tyler Barrus (barrust@gmail.com) """ +from __future__ import annotations + import math import random from array import array @@ -12,7 +14,6 @@ from numbers import Number from pathlib import Path from struct import Struct -from typing import Union from probables.exceptions import CuckooFilterFullError, InitializationError from probables.hashes import KeyT, SimpleHashT, fnv_1a @@ -57,8 +58,8 @@ def __init__( expansion_rate: int = 2, auto_expand: bool = True, finger_size: int = 4, - filepath: Union[str, Path, None] = None, - hash_function: Union[SimpleHashT, None] = None, + filepath: str | Path | None = None, + hash_function: SimpleHashT | None = None, ): """setup the data structure""" valid_prms = ( @@ -109,7 +110,7 @@ def init_error_rate( max_swaps: int = 500, expansion_rate: int = 2, auto_expand: bool = True, - hash_function: Union[SimpleHashT, None] = None, + hash_function: SimpleHashT | None = None, ): """Initialize a simple Cuckoo Filter based on error rate @@ -139,8 +140,8 @@ def init_error_rate( def load_error_rate( cls, error_rate: float, - filepath: Union[str, Path], - hash_function: Union[SimpleHashT, None] = None, + filepath: str | Path, + hash_function: SimpleHashT | None = None, ): """Initialize a previously exported Cuckoo Filter based on error rate @@ -159,9 +160,9 @@ def load_error_rate( def frombytes( cls, b: ByteString, - error_rate: Union[float, None] = None, - hash_function: Union[SimpleHashT, None] = None, - ) -> "CuckooFilter": + error_rate: float | None = None, + hash_function: SimpleHashT | None = None, + ) -> CuckooFilter: """ Args: b (ByteString): The bytes to load as a Expanding Bloom Filter @@ -330,7 +331,7 @@ def remove(self, key: KeyT) -> bool: self._inserted_elements -= 1 return True - def export(self, file: Union[Path, str, IOBase, mmap]) -> None: + def export(self, file: Path | str | IOBase | mmap) -> None: """Export cuckoo filter to file Args: @@ -392,7 +393,7 @@ def _insert_fingerprint(self, fingerprint, idx_1, idx_2): # if we got here we have an error... we might need to know what is left return fingerprint - def _load(self, file: Union[Path, str, IOBase, mmap, bytes]) -> None: + def _load(self, file: Path | str | IOBase | mmap | bytes) -> None: """load a cuckoo filter from file""" if not isinstance(file, (IOBase, mmap, bytes)): file = resolve_path(file) @@ -431,7 +432,7 @@ def _parse_bucket(self, d: ByteString) -> array: self._inserted_elements += len(bucket) return bucket - def _set_error_rate(self, error_rate: Union[float, None]) -> None: + def _set_error_rate(self, error_rate: float | None) -> None: """set error rate correctly""" # if error rate is provided, use it if error_rate is not None: diff --git a/probables/exceptions.py b/probables/exceptions.py index b76eb37..e9f59d1 100644 --- a/probables/exceptions.py +++ b/probables/exceptions.py @@ -1,4 +1,6 @@ -""" PyProbables Exceptions """ +"""PyProbables Exceptions""" + +from __future__ import annotations class ProbablesBaseException(Exception): diff --git a/probables/hashes.py b/probables/hashes.py index b26f835..0a1533a 100644 --- a/probables/hashes.py +++ b/probables/hashes.py @@ -1,13 +1,15 @@ """Probables Hashing Utilities""" +from __future__ import annotations + from functools import wraps from hashlib import md5, sha256 from struct import unpack -from typing import Callable, Union +from typing import Callable from probables.constants import UINT32_T_MAX, UINT64_T_MAX -KeyT = Union[str, bytes] +KeyT = str | bytes SimpleHashT = Callable[[KeyT, int], int] HashResultsT = list[int] HashFuncT = Callable[[KeyT, int], HashResultsT] diff --git a/probables/quotientfilter/quotientfilter.py b/probables/quotientfilter/quotientfilter.py index d8e1113..5f9e5f3 100644 --- a/probables/quotientfilter/quotientfilter.py +++ b/probables/quotientfilter/quotientfilter.py @@ -3,10 +3,12 @@ Author: Tyler Barrus (barrust@gmail.com) """ +from __future__ import annotations + import sys from array import array from collections.abc import Iterator -from typing import Optional, TextIO +from typing import TextIO from probables.exceptions import QuotientFilterError from probables.hashes import KeyT, SimpleHashT, fnv_1a_32 @@ -45,7 +47,7 @@ class QuotientFilter: ) def __init__( - self, quotient: int = 20, auto_expand: bool = True, hash_function: Optional[SimpleHashT] = None + self, quotient: int = 20, auto_expand: bool = True, hash_function: SimpleHashT | None = None ): # needs to be parameterized if quotient < 3 or quotient > 31: raise QuotientFilterError( @@ -53,7 +55,7 @@ def __init__( ) self.__set_params(quotient, auto_expand, hash_function) - def __set_params(self, quotient: int, auto_expand: bool, hash_function: Optional[SimpleHashT]): + def __set_params(self, quotient: int, auto_expand: bool, hash_function: SimpleHashT | None): self._q: int = quotient self._r: int = 32 - quotient self._size: int = 1 << self._q # same as 2**q @@ -244,7 +246,7 @@ def get_hashes(self) -> list[int]: list(int): The hash values stored in the quotient filter""" return list(self.hashes()) - def resize(self, quotient: Optional[int] = None) -> None: + def resize(self, quotient: int | None = None) -> None: """Resize the quotient filter to use the new quotient size Args: @@ -273,7 +275,7 @@ def resize(self, quotient: Optional[int] = None) -> None: for _h in hashes: self.add_alt(_h) - def merge(self, second: "QuotientFilter") -> None: + def merge(self, second: QuotientFilter) -> None: """Merge the `second` quotient filter into the first Args: diff --git a/probables/utilities.py b/probables/utilities.py index 3029ae3..ddd864e 100644 --- a/probables/utilities.py +++ b/probables/utilities.py @@ -1,28 +1,29 @@ -""" Utility Functions """ +"""Utility Functions""" + +from __future__ import annotations import math import mmap import string from array import array from pathlib import Path -from typing import Union -def is_hex_string(hex_string: Union[str, None]) -> bool: +def is_hex_string(hex_string: str | None) -> bool: """check if the passed in string is really hex""" if hex_string is None: return False return all(c in string.hexdigits for c in hex_string) -def is_valid_file(filepath: Union[str, Path, None]) -> bool: +def is_valid_file(filepath: str | Path | None) -> bool: """check if the passed filepath points to a real file""" if filepath is None: return False return Path(filepath).exists() -def resolve_path(filepath: Union[str, Path]) -> Path: +def resolve_path(filepath: str | Path) -> Path: """fully resolve the path by expanding user and resolving""" return Path(filepath).expanduser().resolve() @@ -39,7 +40,7 @@ class MMap: __slots__ = ("__p", "__f", "__m", "_closed") - def __init__(self, path: Union[Path, str]): + def __init__(self, path: Path | str): self.__p = Path(path) self.__f = self.path.open("rb") self.__m = mmap.mmap(self.__f.fileno(), 0, access=mmap.ACCESS_READ) diff --git a/pyproject.toml b/pyproject.toml index 47e93f9..5405bf5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,7 +73,12 @@ target-version = ['py38'] include = '\.pyi?$' [tool.ruff] -include = ["pyproject.toml", "probables/**/*.py", "scripts/**/*.py"] +include = [ + "pyproject.toml", + "probables/**/*.py", + "probables/*.py", + "scripts/**/*.py", +] exclude = [ ".bzr", ".direnv",