Skip to content

Commit 1dcd8c4

Browse files
author
Gal Ben David
committed
Python3.11 support added. Dependencies changes. Github Actions updates
1 parent 1f85a7e commit 1dcd8c4

File tree

7 files changed

+334
-228
lines changed

7 files changed

+334
-228
lines changed

.github/workflows/build.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ jobs:
3232
- '3.8'
3333
- '3.9'
3434
- '3.10'
35+
- '3.11'
3536
os:
3637
- ubuntu-latest
3738
- macos-latest
@@ -40,11 +41,11 @@ jobs:
4041
- name: Checkout
4142
uses: actions/checkout@v3
4243
- name: Set up Python ${{ matrix.python-version }}
43-
uses: actions/setup-python@v3
44+
uses: actions/setup-python@v4
4445
with:
4546
python-version: ${{ matrix.python-version }}
4647
- name: Install Poetry
47-
uses: abatilo/actions-poetry@v2.1.3
48+
uses: abatilo/actions-poetry@v2
4849
- name: Install Rust
4950
uses: actions-rs/toolchain@v1
5051
with:

.github/workflows/deploy.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ jobs:
1414
- '3.8'
1515
- '3.9'
1616
- '3.10'
17+
- '3.11'
1718
os:
1819
- ubuntu-latest
1920
- macos-latest
@@ -22,7 +23,7 @@ jobs:
2223
- name: Checkout
2324
uses: actions/checkout@v3
2425
- name: Set up Python ${{ matrix.python-version }}
25-
uses: actions/setup-python@v3
26+
uses: actions/setup-python@v4
2627
with:
2728
python-version: ${{ matrix.python-version }}
2829
- name: Install Rust
@@ -32,7 +33,7 @@ jobs:
3233
toolchain: stable
3334
override: true
3435
- name: Publish Package
35-
uses: messense/maturin-action@v1
36+
uses: PyO3/maturin-action@v1
3637
with:
3738
command: publish
3839
args: --username=__token__ ${{ matrix.os == 'ubuntu-latest' && matrix.python-version == '3.7' && '' || '--no-sdist' }} --interpreter=python${{ !startsWith(matrix.os, 'windows') && matrix.python-version || '' }}

Cargo.toml

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "pydomainextractor"
3-
version = "0.11.1"
3+
version = "0.12.0"
44
authors = ["Gal Ben David <[email protected]>"]
55
edition = "2021"
66
description = "A blazingly fast domain extraction library written in Rust"
@@ -28,6 +28,7 @@ classifier = [
2828
"Programming Language :: Python :: 3.8",
2929
"Programming Language :: Python :: 3.9",
3030
"Programming Language :: Python :: 3.10",
31+
"Programming Language :: Python :: 3.11",
3132
"Programming Language :: Rust",
3233
]
3334

@@ -36,12 +37,14 @@ name = "pydomainextractor"
3637
crate-type = ["cdylib"]
3738

3839
[dependencies]
39-
ahash = "0.7"
40-
idna = "0.2"
40+
ahash = "0.8"
41+
idna = "0.3"
4142
memchr = "2"
43+
arraystring = "0.3.0"
44+
typenum = "1"
4245

4346
[dependencies.pyo3]
44-
version = "0.16.5"
47+
version = "0.17.3"
4548
features = ["extension-module"]
4649

4750
[profile.release]

MANIFEST.in

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
include README.md
22
include images/logo.png
33
graft tests
4-
recursive-include src *.h *.cpp
54
recursive-include pydomainextractor *.py *.pyi

pyproject.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[build-system]
2-
requires = ["maturin>=0.12,<0.13"]
2+
requires = ["maturin>=0.14,<0.15"]
33
build-backend = "maturin"
44

55
[tool.maturin]
@@ -13,7 +13,7 @@ sdist-include = [
1313

1414
[tool.poetry]
1515
name = "pydomainextractor"
16-
version = "0.11.1"
16+
version = "0.12.0"
1717
authors = ["Gal Ben David <[email protected]>"]
1818
description = "A blazingly fast domain extraction library written in Rust"
1919
readme = "README.md"
@@ -38,6 +38,7 @@ classifiers = [
3838
"Programming Language :: Python :: 3.8",
3939
"Programming Language :: Python :: 3.9",
4040
"Programming Language :: Python :: 3.10",
41+
"Programming Language :: Python :: 3.11",
4142
"Programming Language :: Rust",
4243
]
4344

src/lib.rs

Lines changed: 34 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
use ahash::{AHashMap, AHashSet};
22
use pyo3::exceptions::PyValueError;
3+
use pyo3::intern;
34
use pyo3::prelude::*;
45
use pyo3::types::PyString;
5-
use std::ptr;
6-
use pyo3::once_cell::GILOnceCell;
6+
7+
type DomainString = arraystring::ArrayString<typenum::U255>;
78

89
#[derive(Default)]
910
struct Suffix {
@@ -12,13 +13,7 @@ struct Suffix {
1213
sub_blacklist: AHashSet<String>,
1314
}
1415

15-
static mut EMPTY_STRING: *mut pyo3::ffi::PyObject = ptr::null_mut();
16-
static mut SUFFIX_STRING: *mut pyo3::ffi::PyObject = ptr::null_mut();
17-
static mut DOMAIN_STRING: *mut pyo3::ffi::PyObject = ptr::null_mut();
18-
static mut SUBDOMAIN_STRING: *mut pyo3::ffi::PyObject = ptr::null_mut();
1916
static PUBLIC_SUFFIX_LIST_DATA: &str = include_str!("public_suffix_list.dat");
20-
static mut TEMP_DOMAIN_STRING: GILOnceCell<String> = GILOnceCell::new();
21-
2217

2318
#[pyclass]
2419
struct DomainExtractor {
@@ -31,21 +26,21 @@ impl DomainExtractor {
3126
#[new]
3227
fn new(
3328
suffix_list: Option<&str>,
34-
) -> PyResult<Self> {
29+
) -> Self {
3530
let (suffixes, tld_list) = if let Some(suffix_list) = suffix_list {
3631
parse_suffix_list(suffix_list)
3732
} else {
3833
parse_suffix_list(PUBLIC_SUFFIX_LIST_DATA)
3934
};
4035

41-
Ok(DomainExtractor { suffixes, tld_list })
36+
DomainExtractor { suffixes, tld_list }
4237
}
4338

4439
fn parse_domain_parts<'a>(
4540
&self,
4641
domain: &'a str,
47-
) -> Result<(&'a str, &'a str, &'a str), PyErr> {
48-
let mut suffix_part: &str = "";
42+
) -> PyResult<(&'a str, &'a str, &'a str)> {
43+
let mut suffix_part = "";
4944
let mut current_suffixes = &self.suffixes;
5045
let mut last_dot_index = domain.len();
5146
let mut in_wildcard_tld = false;
@@ -126,29 +121,26 @@ impl DomainExtractor {
126121

127122
fn extract(
128123
&self,
124+
py: Python,
129125
domain: &PyString,
130-
) -> PyResult<*mut pyo3::ffi::PyObject> {
126+
) -> PyResult<PyObject> {
131127
if domain.len().unwrap() > 255 {
132128
return Err(PyValueError::new_err("Invalid domain detected"));
133129
}
134130

135-
let domain_string = unsafe {
136-
let temp_domain_string = TEMP_DOMAIN_STRING.get_mut().unwrap_unchecked();
137-
temp_domain_string.clear();
138-
temp_domain_string.push_str(domain.to_str().unwrap());
139-
temp_domain_string.make_ascii_lowercase();
140-
141-
temp_domain_string
131+
let mut domain_string = unsafe {
132+
DomainString::from_str_unchecked(domain.to_string_lossy().as_ref())
142133
};
134+
domain_string.make_ascii_lowercase();
143135

144-
let (suffix_part, domain_part, subdomain_part) = self.parse_domain_parts(domain_string)?;
136+
let (suffix_part, domain_part, subdomain_part) = self.parse_domain_parts(domain_string.as_str())?;
145137

146138
unsafe {
147139
let dict = pyo3::ffi::PyDict_New();
148140
for (fraction_key, fraction) in [
149-
(SUFFIX_STRING, suffix_part),
150-
(DOMAIN_STRING, domain_part),
151-
(SUBDOMAIN_STRING, subdomain_part),
141+
(intern!(py, "suffix").into_ptr(), suffix_part),
142+
(intern!(py, "domain").into_ptr(), domain_part),
143+
(intern!(py, "subdomain").into_ptr(), subdomain_part),
152144
] {
153145
if !fraction.is_empty() {
154146
let substr = pyo3::ffi::PyUnicode_FromStringAndSize(
@@ -166,12 +158,12 @@ impl DomainExtractor {
166158
pyo3::ffi::PyDict_SetItem(
167159
dict,
168160
fraction_key,
169-
EMPTY_STRING,
161+
intern!(py, "").into_ptr(),
170162
);
171163
}
172164
}
173165

174-
Ok(dict)
166+
Ok(pyo3::PyObject::from_owned_ptr(py, dict))
175167
}
176168
}
177169

@@ -184,12 +176,8 @@ impl DomainExtractor {
184176
return false;
185177
}
186178

187-
let domain_string = unsafe {
188-
let temp_domain_string = TEMP_DOMAIN_STRING.get_mut().unwrap_unchecked();
189-
temp_domain_string.clear();
190-
temp_domain_string.push_str(domain.to_str().unwrap());
191-
192-
temp_domain_string
179+
let mut domain_string = unsafe {
180+
DomainString::from_str_unchecked(domain.to_string_lossy().as_ref())
193181
};
194182

195183
for fraction in domain_string.split('.') {
@@ -208,15 +196,15 @@ impl DomainExtractor {
208196
}
209197

210198
domain_string.make_ascii_lowercase();
211-
if let Ok((suffix_part, domain_part, _subdomain_part)) = self.parse_domain_parts(domain_string) {
199+
if let Ok((suffix_part, domain_part, _subdomain_part)) = self.parse_domain_parts(domain_string.as_str()) {
212200
if suffix_part.is_empty() || domain_part.is_empty() {
213201
return false;
214202
}
215203

216-
if idna::domain_to_ascii(domain_string).is_err() {
204+
if idna::domain_to_ascii(domain_string.as_str()).is_err() {
217205
return false;
218206
}
219-
if idna::domain_to_unicode(domain_string).1.is_err() {
207+
if idna::domain_to_unicode(domain_string.as_str()).1.is_err() {
220208
return false;
221209
}
222210

@@ -234,8 +222,9 @@ impl DomainExtractor {
234222

235223
fn extract_from_url(
236224
&self,
225+
py: Python,
237226
url: &PyString,
238-
) -> PyResult<*mut pyo3::ffi::PyObject> {
227+
) -> PyResult<PyObject> {
239228
let mut url_str = url.to_str().unwrap();
240229

241230
match memchr::memmem::find(url_str.as_bytes(), b"//") {
@@ -265,23 +254,19 @@ impl DomainExtractor {
265254
);
266255
}
267256

268-
let domain_string = unsafe {
269-
let temp_domain_string = TEMP_DOMAIN_STRING.get_mut().unwrap_unchecked();
270-
temp_domain_string.clear();
271-
temp_domain_string.push_str(url_str);
272-
temp_domain_string.make_ascii_lowercase();
273-
274-
temp_domain_string
257+
let mut domain_string = unsafe {
258+
DomainString::from_str_unchecked(url_str)
275259
};
260+
domain_string.make_ascii_lowercase();
276261

277262
let (suffix_part, domain_part, subdomain_part) = self.parse_domain_parts(domain_string.as_str())?;
278263

279264
unsafe {
280265
let dict = pyo3::ffi::PyDict_New();
281266
for (fraction_key, fraction) in [
282-
(SUFFIX_STRING, suffix_part),
283-
(DOMAIN_STRING, domain_part),
284-
(SUBDOMAIN_STRING, subdomain_part),
267+
(intern!(py, "suffix").into_ptr(), suffix_part),
268+
(intern!(py, "domain").into_ptr(), domain_part),
269+
(intern!(py, "subdomain").into_ptr(), subdomain_part),
285270
] {
286271
if !fraction.is_empty() {
287272
let substr = pyo3::ffi::PyUnicode_FromStringAndSize(
@@ -299,12 +284,12 @@ impl DomainExtractor {
299284
pyo3::ffi::PyDict_SetItem(
300285
dict,
301286
fraction_key,
302-
EMPTY_STRING,
287+
intern!(py, "").into_ptr(),
303288
);
304289
}
305290
}
306291

307-
Ok(dict)
292+
Ok(pyo3::PyObject::from_owned_ptr(py, dict))
308293
}
309294
}
310295
}
@@ -363,26 +348,9 @@ fn parse_suffix_list(
363348

364349
#[pymodule]
365350
fn pydomainextractor(
366-
py: Python,
351+
_py: Python,
367352
m: &PyModule,
368353
) -> PyResult<()> {
369-
unsafe {
370-
EMPTY_STRING = pyo3::ffi::PyUnicode_New(0, 127);
371-
SUFFIX_STRING = pyo3::ffi::PyUnicode_FromStringAndSize(
372-
"suffix".as_ptr() as *const i8,
373-
"suffix".len() as isize,
374-
);
375-
DOMAIN_STRING = pyo3::ffi::PyUnicode_FromStringAndSize(
376-
"domain".as_ptr() as *const i8,
377-
"domain".len() as isize,
378-
);
379-
SUBDOMAIN_STRING = pyo3::ffi::PyUnicode_FromStringAndSize(
380-
"subdomain".as_ptr() as *const i8,
381-
"subdomain".len() as isize,
382-
);
383-
TEMP_DOMAIN_STRING.set(py, String::with_capacity(1024)).unwrap();
384-
}
385-
386354
m.add_class::<DomainExtractor>()?;
387355
Ok(())
388356
}

0 commit comments

Comments
 (0)