From ba89c5ebec4d23fae264be82be3ab206ef8ff5b6 Mon Sep 17 00:00:00 2001 From: synodriver <624805065@qq.com> Date: Tue, 26 Jul 2022 04:33:20 +0000 Subject: [PATCH] release the gil to use multithread --- setup.py | 5 ++++ spookyhash.py | 14 ++++----- src/SpookyV2.pxd | 4 +-- src/spookyhash.pyx | 71 +++++++++++++++++++++++++--------------------- 4 files changed, 53 insertions(+), 41 deletions(-) diff --git a/setup.py b/setup.py index 599fa58..13bde7f 100644 --- a/setup.py +++ b/setup.py @@ -3,6 +3,8 @@ setup( + name="spookyhash", + url="https://github.com/buhanec/spookyhash", ext_modules=cythonize( [ Extension( @@ -12,6 +14,9 @@ ), ], compiler_directives={ + "cdivision": True, + "boundscheck": False, + "wraparound": False, 'embedsignature': True, 'language_level': 3, }, diff --git a/spookyhash.py b/spookyhash.py index d75e27e..c54e403 100644 --- a/spookyhash.py +++ b/spookyhash.py @@ -16,9 +16,9 @@ # Utility function for homogenising data -def _char_arr(message: Union[bytes, memoryview, None]) -> Tuple[memoryview, int]: +def _char_arr(message: Union[bytes, memoryview, None]) -> memoryview: if message is None: - return memoryview(b'').cast('c'), 0 + return memoryview(b'').cast('c') if not isinstance(message, memoryview): try: message = memoryview(message) @@ -31,7 +31,7 @@ def _char_arr(message: Union[bytes, memoryview, None]) -> Tuple[memoryview, int] ) from e if message.format != 'c' or not message.contiguous: message = message.cast('c') - return message, len(message) + return message # Oneshot hash functions @@ -44,7 +44,7 @@ def hash32(message: Union[bytes, memoryview], seed: int = 0) -> int: :param seed: 32-bit seed :return: 32-bit hash """ - return _spookyhash.hash32(*_char_arr(message), seed) + return _spookyhash.hash32(_char_arr(message), seed) def hash64(message: Union[bytes, memoryview], seed: int = 0) -> int: @@ -55,7 +55,7 @@ def hash64(message: Union[bytes, memoryview], seed: int = 0) -> int: :param seed: 64-bit seed :return: 64-bit hash """ - return _spookyhash.hash64(*_char_arr(message), seed) + return _spookyhash.hash64(_char_arr(message), seed) def hash128_pair( @@ -71,7 +71,7 @@ def hash128_pair( :param seed2: 64-bit seed 2 :return: Pair of 64-bit hash values """ - digest = _spookyhash.hash128(*_char_arr(message), seed1=seed1, seed2=seed2) + digest = _spookyhash.hash128(_char_arr(message), seed1=seed1, seed2=seed2) return cast(Tuple[int, int], struct.unpack('=QQ', digest)) @@ -134,7 +134,7 @@ def update(self, message: Union[bytes, memoryview, None]) -> 'Self': :return: Self """ if message is not None: - self._hash.update(*_char_arr(message)) + self._hash.update(_char_arr(message)) return self def digest(self) -> bytes: diff --git a/src/SpookyV2.pxd b/src/SpookyV2.pxd index 2a7fc23..56edbc7 100644 --- a/src/SpookyV2.pxd +++ b/src/SpookyV2.pxd @@ -1,12 +1,12 @@ from libc.stdint cimport uint32_t, uint64_t -cdef extern from 'SpookyV2.h': +cdef extern from 'SpookyV2.h' nogil: cdef cppclass SpookyHash: void Init(uint64_t seed1, uint64_t seed2) void Update(const char *message, size_t length) void Final(uint64_t *hash1, uint64_t *hash2) -cdef extern from 'SpookyV2.h' namespace 'SpookyHash': +cdef extern from 'SpookyV2.h' namespace 'SpookyHash' nogil: void Hash128(const char *message, size_t length, uint64_t *hash1, uint64_t *hash2) uint64_t Hash64(const char *message, size_t length, uint64_t seed) uint32_t Hash32(const char *message, size_t length, uint32_t seed) diff --git a/src/spookyhash.pyx b/src/spookyhash.pyx index 68c5e8c..c1b9ec6 100644 --- a/src/spookyhash.pyx +++ b/src/spookyhash.pyx @@ -1,6 +1,6 @@ """SpookyHash Cython code.""" -from libc.stdint cimport uint32_t, uint64_t +from libc.stdint cimport uint8_t, uint32_t, uint64_t cimport SpookyV2 @@ -12,44 +12,46 @@ __version__ = '2.1.1' # Oneshot hash functions cpdef uint32_t hash32( - const char[::1] message, - size_t length, + const uint8_t[::1] message, uint32_t seed=0, ): - return SpookyV2.Hash32( - &message[0] if length else NULL, - length, - seed, - ) + cdef uint32_t ret + with nogil: + ret = SpookyV2.Hash32( + &message[0], + message.shape[0], + seed) + return ret cpdef uint64_t hash64( - const char[::1] message, - size_t length, + const uint8_t[::1] message, uint64_t seed=0, ): - return SpookyV2.Hash64( - &message[0] if length else NULL, - length, - seed, - ) + cdef uint64_t ret + with nogil: + ret = SpookyV2.Hash64( + &message[0], + message.shape[0], + seed) + return ret cpdef bytes hash128( - const char[::1] message, - size_t length, + const uint8_t[::1] message, uint64_t seed1=0, uint64_t seed2=0, ): cdef char digest[16] ( &digest[0])[0] = seed1 ( &digest[8])[0] = seed2 - SpookyV2.Hash128( - &message[0] if length else NULL, - length, - digest, - (digest + 8), - ) + with nogil: + SpookyV2.Hash128( + &message[0], + message.shape[0], + digest, + (digest + 8), + ) return digest[:16] @@ -62,15 +64,17 @@ cdef class Hash: def __cinit__(Hash self, *args, **kwargs): pass - cpdef void update(Hash self, const char[::1] message, size_t length): - self.hash.Update( - &message[0] if length else NULL, - length, - ) + cpdef void update(Hash self, const uint8_t[::1] message): + with nogil: + self.hash.Update( + &message[0], + message.shape[0], + ) cpdef bytes digest(Hash self): cdef uint64_t digest[2] - self.hash.Final(&digest[0], &digest[1]) + with nogil: + self.hash.Final(&digest[0], &digest[1]) return ( digest)[:self.digest_size] @@ -79,7 +83,8 @@ cdef class Hash32(Hash): def __cinit__(Hash32 self, uint32_t seed1, uint32_t seed2): if seed1 != seed2: raise ValueError('Expecting seed1 and seed2 to be the same') - self.hash.Init(seed1, seed1) + with nogil: + self.hash.Init(seed1, seed1) self.digest_size = 4 @@ -88,12 +93,14 @@ cdef class Hash64(Hash): def __cinit__(Hash64 self, uint64_t seed1, uint64_t seed2): if seed1 != seed2: raise ValueError('Expecting seed1 and seed2 to be the same') - self.hash.Init(seed1, seed1) + with nogil: + self.hash.Init(seed1, seed1) self.digest_size = 8 cdef class Hash128(Hash): def __cinit__(Hash128 self, uint64_t seed1, uint64_t seed2): - self.hash.Init(seed1, seed2) + with nogil: + self.hash.Init(seed1, seed2) self.digest_size = 16