diff --git a/src/python_data_access.rs b/src/python_data_access.rs index 4249e40a..2ffd66dc 100644 --- a/src/python_data_access.rs +++ b/src/python_data_access.rs @@ -33,10 +33,11 @@ pub fn copy_string( Ok(chars.iter().collect()) } (2, _) => { - // UCS2 strings aren't used internally after v3.3: https://www.python.org/dev/peps/pep-0393/ - // TODO: however with python 2.7 they could be added with --enable-unicode=ucs2 configure flag. - // or with python 3.2 --with-wide-unicode=ucs2 - Err(format_err!("ucs2 strings aren't supported yet!")) + #[allow(clippy::cast_ptr_alignment)] + let chars = unsafe { + std::slice::from_raw_parts(bytes.as_ptr() as *const u16, bytes.len() / 2) + }; + Ok(String::from_utf16(chars)?) } (1, true) => Ok(String::from_utf8(bytes)?), (1, false) => Ok(bytes.iter().map(|&b| b as char).collect()), diff --git a/tests/integration_test.rs b/tests/integration_test.rs index 9d7c36cc..e136e35c 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -229,6 +229,34 @@ fn test_unicode() { assert!(!traces[0].owns_gil); } +#[test] +fn test_cyrillic() { + #[cfg(target_os = "macos")] + { + if unsafe { libc::geteuid() } != 0 { + return; + } + } + + // Identifiers with characters outside the ASCII range are supported from Python 3 + let runner = TestRunner::new(Config::default(), "./tests/scripts/longsleep.py"); + if runner.spy.version.major == 2 { + return; + } + + let mut runner = TestRunner::new(Config::default(), "./tests/scripts/cyrillic.py"); + + let traces = runner.spy.get_stack_traces().unwrap(); + assert_eq!(traces.len(), 1); + let trace = &traces[0]; + + assert_eq!(trace.frames[0].name, "кириллица"); + assert_eq!(trace.frames[0].line, 4); + + assert_eq!(trace.frames[1].name, ""); + assert_eq!(trace.frames[1].line, 7); +} + #[test] fn test_local_vars() { #[cfg(target_os = "macos")] diff --git a/tests/scripts/cyrillic.py b/tests/scripts/cyrillic.py new file mode 100644 index 00000000..77370c22 --- /dev/null +++ b/tests/scripts/cyrillic.py @@ -0,0 +1,7 @@ +import time + +def кириллица(seconds): + time.sleep(seconds) + +if __name__ == "__main__": + кириллица(100)