Skip to content

Commit

Permalink
Port python string hashing changes from orjson
Browse files Browse the repository at this point in the history
Signed-off-by: Emanuele Giaquinta <[email protected]>
  • Loading branch information
exg committed Sep 28, 2023
1 parent ee0f05c commit c930599
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 11 deletions.
2 changes: 0 additions & 2 deletions src/typeref.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ pub static mut PYDANTIC2_FIELDS_STR: *mut PyObject = null_mut();
pub static mut FIELD_TYPE_STR: *mut PyObject = null_mut();
pub static mut ARRAY_STRUCT_STR: *mut PyObject = null_mut();
pub static mut VALUE_STR: *mut PyObject = null_mut();
pub static mut STR_HASH_FUNCTION: Option<hashfunc> = None;
pub static mut INT_ATTR_STR: *mut PyObject = null_mut();

pub static mut HASH_BUILDER: ThreadSafeLazy<ahash::RandomState> = ThreadSafeLazy::new(|| unsafe {
Expand Down Expand Up @@ -94,7 +93,6 @@ pub fn init_typerefs() {
FALSE = Py_False();
EMPTY_UNICODE = PyUnicode_New(0, 255);
STR_TYPE = (*EMPTY_UNICODE).ob_type;
STR_HASH_FUNCTION = (*((*EMPTY_UNICODE).ob_type)).tp_hash;
BYTES_TYPE = (*PyBytes_FromStringAndSize("".as_ptr() as *const c_char, 0)).ob_type;

{
Expand Down
28 changes: 19 additions & 9 deletions src/unicode.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: (Apache-2.0 OR MIT)

use crate::typeref::EMPTY_UNICODE;
use crate::typeref::STR_HASH_FUNCTION;
use core::ffi::c_void;
use pyo3::ffi::*;

// see unicodeobject.h for documentation
Expand Down Expand Up @@ -122,6 +122,24 @@ fn pyunicode_fourbyte(buf: &str, num_chars: usize) -> *mut pyo3::ffi::PyObject {
}
}

#[inline]
pub fn hash_str(op: *mut PyObject) -> Py_hash_t {
unsafe {
let data_ptr: *mut c_void = if (*op.cast::<PyASCIIObject>()).compact() == 1
&& (*op.cast::<PyASCIIObject>()).ascii() == 1
{
(op as *mut PyASCIIObject).offset(1) as *mut c_void
} else {
(op as *mut PyCompactUnicodeObject).offset(1) as *mut c_void
};
let num_bytes =
(*(op as *mut PyASCIIObject)).length * ((*(op as *mut PyASCIIObject)).kind()) as isize;
let hash = pyo3::ffi::_Py_HashBytes(data_ptr, num_bytes);
(*op.cast::<PyASCIIObject>()).hash = hash;
hash
}
}

#[inline(never)]
pub fn unicode_to_str_via_ffi(op: *mut PyObject) -> Option<&'static str> {
let mut str_size: pyo3::ffi::Py_ssize_t = 0;
Expand Down Expand Up @@ -151,11 +169,3 @@ pub fn unicode_to_str(op: *mut PyObject) -> Option<&'static str> {
}
}
}

#[inline]
pub fn hash_str(op: *mut PyObject) -> Py_hash_t {
unsafe {
(*op.cast::<PyASCIIObject>()).hash = STR_HASH_FUNCTION.unwrap()(op);
(*op.cast::<PyASCIIObject>()).hash
}
}

0 comments on commit c930599

Please sign in to comment.