From dc5eeca99ef7d5f6d8b470d143cd0b47a0d8d60e Mon Sep 17 00:00:00 2001 From: Rostan Tabet Date: Tue, 13 May 2025 16:33:12 +0200 Subject: [PATCH 1/4] Bump PyO3 to 0.24.2 Signed-off-by: Rostan Tabet --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 4dbfef9..ad92ee0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,7 +15,7 @@ python = [ ] [dependencies] -pyo3 = { version = "0.22.2", default-features = false, features = [ +pyo3 = { version = "0.24.2", default-features = false, features = [ "extension-module", "macros", ], optional = true } From adf9fa34d88803ef97f6142f695f64edaffda0d7 Mon Sep 17 00:00:00 2001 From: Rostan Tabet Date: Tue, 13 May 2025 16:34:19 +0200 Subject: [PATCH 2/4] Fix PyO3 0.23 warnings Signed-off-by: Rostan Tabet --- src/py.rs | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/py.rs b/src/py.rs index ed107b4..5ee8ac4 100644 --- a/src/py.rs +++ b/src/py.rs @@ -55,14 +55,13 @@ impl CoreBPE { py: Python, text: &str, allowed_special: HashSet, - ) -> Py { + ) -> TiktokenBuffer { let tokens = py.allow_threads(|| { let allowed_special: HashSet<&str> = allowed_special.iter().map(|s| s.as_ref()).collect(); self.encode(text, &allowed_special).0 }); - let buffer = TiktokenBuffer { tokens }; - buffer.into_py(py) + TiktokenBuffer { tokens } } fn _encode_bytes(&self, py: Python, bytes: &[u8]) -> Vec { @@ -115,19 +114,20 @@ impl CoreBPE { py: Python, text: &str, allowed_special: HashSet, - ) -> Py { + ) -> PyResult> { let (tokens, completions) = py.allow_threads(|| { let allowed_special: HashSet<&str> = allowed_special.iter().map(|s| s.as_ref()).collect(); self._encode_unstable_native(text, &allowed_special) }); - let py_completions = PyList::new_bound( + let py_completions: Bound<'_, PyList> = PyList::new( py, completions .iter() - .map(|seq| PyList::new_bound(py, &seq[..])), - ); - (tokens, py_completions).into_py(py) + .map(|seq| PyList::new(py, &seq[..])) + .collect::>>(), + )?; + Ok((tokens, py_completions).into_pyobject(py)?.unbind().into()) } fn encode_single_token(&self, piece: &[u8]) -> PyResult { @@ -156,17 +156,17 @@ impl CoreBPE { #[pyo3(name = "decode_bytes")] fn py_decode_bytes(&self, py: Python, tokens: Vec) -> Result, PyErr> { match py.allow_threads(|| self.decode_bytes(&tokens)) { - Ok(bytes) => Ok(PyBytes::new_bound(py, &bytes).into()), + Ok(bytes) => Ok(PyBytes::new(py, &bytes).into()), Err(e) => Err(pyo3::exceptions::PyKeyError::new_err(format!("{}", e))), } } fn decode_single_token_bytes(&self, py: Python, token: Rank) -> PyResult> { if let Some(bytes) = self.decoder.get(&token) { - return Ok(PyBytes::new_bound(py, bytes).into()); + return Ok(PyBytes::new(py, bytes).into()); } if let Some(bytes) = self.special_tokens_decoder.get(&token) { - return Ok(PyBytes::new_bound(py, bytes).into()); + return Ok(PyBytes::new(py, bytes).into()); } Err(PyErr::new::(token.to_string())) } @@ -178,7 +178,7 @@ impl CoreBPE { fn token_byte_values(&self, py: Python) -> Vec> { self.sorted_token_bytes .iter() - .map(|x| PyBytes::new_bound(py, x).into()) + .map(|x| PyBytes::new(py, x).into()) .collect() } } From dbffd9cf656c8ca338bbd5351693adf22dc6350d Mon Sep 17 00:00:00 2001 From: Rostan Tabet Date: Tue, 13 May 2025 18:23:51 +0200 Subject: [PATCH 3/4] Declare that _tiktoken supports free-threading Signed-off-by: Rostan Tabet --- src/py.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/py.rs b/src/py.rs index 5ee8ac4..b901e03 100644 --- a/src/py.rs +++ b/src/py.rs @@ -240,7 +240,7 @@ impl TiktokenBuffer { } } -#[pymodule] +#[pymodule(gil_used = false)] fn _tiktoken(_py: Python, m: &Bound) -> PyResult<()> { m.add_class::()?; Ok(()) From 3efeae495aa49b8b14399c3acf1df30316e25df5 Mon Sep 17 00:00:00 2001 From: Rostan Tabet Date: Tue, 13 May 2025 18:41:54 +0200 Subject: [PATCH 4/4] Enable building 3.13t wheels in the CI Signed-off-by: Rostan Tabet --- .github/workflows/build_wheels.yml | 4 ++-- pyproject.toml | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index 9f14b28..2a31012 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -17,7 +17,7 @@ jobs: # cibuildwheel builds linux wheels inside a manylinux container # it also takes care of procuring the correct python version for us os: [ubuntu-latest, windows-latest, macos-latest] - python-version: [39, 310, 311, 312, 313] + python-version: ["39", "310", "311", "312", "313", "313t"] steps: - uses: actions/checkout@v4 @@ -39,7 +39,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-24.04-arm] - python-version: [39, 310, 311, 312, 313] + python-version: ["39", "310", "311", "312", "313", "313t"] steps: - uses: actions/checkout@v4 diff --git a/pyproject.toml b/pyproject.toml index b2056d9..fec3378 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ requires = ["setuptools>=62.4", "wheel", "setuptools-rust>=1.5.2"] [tool.cibuildwheel] build-frontend = "build" +enable = ["cpython-freethreading"] build-verbosity = 1 linux.before-all = "curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --profile minimal"