From 3a754c8d95b67b12dff76ddc7be3e6a6467a35d6 Mon Sep 17 00:00:00 2001 From: Nariman Safiulin Date: Fri, 2 Jul 2021 10:03:21 +0200 Subject: [PATCH 1/3] feat: add support for UCS-2 strings rebased commit: dcf703b609b90e1a10633d175c384c66d1c40fc9 --- src/python_data_access.rs | 9 +++++---- tests/scripts/cyrillic.py | 15 +++++++++++++++ 2 files changed, 20 insertions(+), 4 deletions(-) create mode 100644 tests/scripts/cyrillic.py diff --git a/src/python_data_access.rs b/src/python_data_access.rs index 4249e40a..2ffd66dc 100644 --- a/src/python_data_access.rs +++ b/src/python_data_access.rs @@ -33,10 +33,11 @@ pub fn copy_string( Ok(chars.iter().collect()) } (2, _) => { - // UCS2 strings aren't used internally after v3.3: https://www.python.org/dev/peps/pep-0393/ - // TODO: however with python 2.7 they could be added with --enable-unicode=ucs2 configure flag. - // or with python 3.2 --with-wide-unicode=ucs2 - Err(format_err!("ucs2 strings aren't supported yet!")) + #[allow(clippy::cast_ptr_alignment)] + let chars = unsafe { + std::slice::from_raw_parts(bytes.as_ptr() as *const u16, bytes.len() / 2) + }; + Ok(String::from_utf16(chars)?) } (1, true) => Ok(String::from_utf8(bytes)?), (1, false) => Ok(bytes.iter().map(|&b| b as char).collect()), diff --git a/tests/scripts/cyrillic.py b/tests/scripts/cyrillic.py new file mode 100644 index 00000000..501a74ae --- /dev/null +++ b/tests/scripts/cyrillic.py @@ -0,0 +1,15 @@ +import time + + +def f(seconds): + time.sleep(seconds) + + +def кириллица(seconds): + f(seconds) + + +if __name__ == "__main__": + f(3) + кириллица(3) + f(3) From 43259b8d8ccf7f9b506d1f341d355bb37718b5e8 Mon Sep 17 00:00:00 2001 From: Nariman Safiulin Date: Thu, 4 Jan 2024 21:50:36 +0300 Subject: [PATCH 2/3] tests: add a test to ensure py-spy does not fails on cyrillic (ucs-2) strings --- tests/integration_test.rs | 21 +++++++++++++++++++++ tests/scripts/cyrillic.py | 12 ++---------- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/tests/integration_test.rs b/tests/integration_test.rs index 9d7c36cc..ef067456 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -229,6 +229,27 @@ fn test_unicode() { assert!(!traces[0].owns_gil); } +#[test] +fn test_cyrillic() { + #[cfg(target_os = "macos")] + { + if unsafe { libc::geteuid() } != 0 { + return; + } + } + let mut runner = TestRunner::new(Config::default(), "./tests/scripts/cyrillic.py"); + + let traces = runner.spy.get_stack_traces().unwrap(); + assert_eq!(traces.len(), 1); + let trace = &traces[0]; + + assert_eq!(trace.frames[0].name, "кириллица"); + assert_eq!(trace.frames[0].line, 4); + + assert_eq!(trace.frames[1].name, ""); + assert_eq!(trace.frames[1].line, 7); +} + #[test] fn test_local_vars() { #[cfg(target_os = "macos")] diff --git a/tests/scripts/cyrillic.py b/tests/scripts/cyrillic.py index 501a74ae..995c032c 100644 --- a/tests/scripts/cyrillic.py +++ b/tests/scripts/cyrillic.py @@ -1,15 +1,7 @@ import time - -def f(seconds): - time.sleep(seconds) - - def кириллица(seconds): - f(seconds) - + time.sleep(seconds) if __name__ == "__main__": - f(3) - кириллица(3) - f(3) + кириллица(10) From 37ede8ed5b07612573a0a6c720630fd31a5b398f Mon Sep 17 00:00:00 2001 From: Nariman Safiulin Date: Tue, 27 Feb 2024 09:28:27 +0200 Subject: [PATCH 3/3] tests: fix `test_cyrillic` test for python 2 --- tests/integration_test.rs | 7 +++++++ tests/scripts/cyrillic.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/integration_test.rs b/tests/integration_test.rs index ef067456..e136e35c 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -237,6 +237,13 @@ fn test_cyrillic() { return; } } + + // Identifiers with characters outside the ASCII range are supported from Python 3 + let runner = TestRunner::new(Config::default(), "./tests/scripts/longsleep.py"); + if runner.spy.version.major == 2 { + return; + } + let mut runner = TestRunner::new(Config::default(), "./tests/scripts/cyrillic.py"); let traces = runner.spy.get_stack_traces().unwrap(); diff --git a/tests/scripts/cyrillic.py b/tests/scripts/cyrillic.py index 995c032c..77370c22 100644 --- a/tests/scripts/cyrillic.py +++ b/tests/scripts/cyrillic.py @@ -4,4 +4,4 @@ def кириллица(seconds): time.sleep(seconds) if __name__ == "__main__": - кириллица(10) + кириллица(100)