11use ahash:: { AHashMap , AHashSet } ;
22use pyo3:: exceptions:: PyValueError ;
3+ use pyo3:: intern;
34use pyo3:: prelude:: * ;
45use pyo3:: types:: PyString ;
5- use std :: ptr ;
6- use pyo3 :: once_cell :: GILOnceCell ;
6+
7+ type DomainString = arraystring :: ArrayString < typenum :: U255 > ;
78
89#[ derive( Default ) ]
910struct Suffix {
@@ -12,13 +13,7 @@ struct Suffix {
1213 sub_blacklist : AHashSet < String > ,
1314}
1415
15- static mut EMPTY_STRING : * mut pyo3:: ffi:: PyObject = ptr:: null_mut ( ) ;
16- static mut SUFFIX_STRING : * mut pyo3:: ffi:: PyObject = ptr:: null_mut ( ) ;
17- static mut DOMAIN_STRING : * mut pyo3:: ffi:: PyObject = ptr:: null_mut ( ) ;
18- static mut SUBDOMAIN_STRING : * mut pyo3:: ffi:: PyObject = ptr:: null_mut ( ) ;
1916static PUBLIC_SUFFIX_LIST_DATA : & str = include_str ! ( "public_suffix_list.dat" ) ;
20- static mut TEMP_DOMAIN_STRING : GILOnceCell < String > = GILOnceCell :: new ( ) ;
21-
2217
2318#[ pyclass]
2419struct DomainExtractor {
@@ -31,21 +26,21 @@ impl DomainExtractor {
3126 #[ new]
3227 fn new (
3328 suffix_list : Option < & str > ,
34- ) -> PyResult < Self > {
29+ ) -> Self {
3530 let ( suffixes, tld_list) = if let Some ( suffix_list) = suffix_list {
3631 parse_suffix_list ( suffix_list)
3732 } else {
3833 parse_suffix_list ( PUBLIC_SUFFIX_LIST_DATA )
3934 } ;
4035
41- Ok ( DomainExtractor { suffixes, tld_list } )
36+ DomainExtractor { suffixes, tld_list }
4237 }
4338
4439 fn parse_domain_parts < ' a > (
4540 & self ,
4641 domain : & ' a str ,
47- ) -> Result < ( & ' a str , & ' a str , & ' a str ) , PyErr > {
48- let mut suffix_part: & str = "" ;
42+ ) -> PyResult < ( & ' a str , & ' a str , & ' a str ) > {
43+ let mut suffix_part = "" ;
4944 let mut current_suffixes = & self . suffixes ;
5045 let mut last_dot_index = domain. len ( ) ;
5146 let mut in_wildcard_tld = false ;
@@ -126,29 +121,26 @@ impl DomainExtractor {
126121
127122 fn extract (
128123 & self ,
124+ py : Python ,
129125 domain : & PyString ,
130- ) -> PyResult < * mut pyo3 :: ffi :: PyObject > {
126+ ) -> PyResult < PyObject > {
131127 if domain. len ( ) . unwrap ( ) > 255 {
132128 return Err ( PyValueError :: new_err ( "Invalid domain detected" ) ) ;
133129 }
134130
135- let domain_string = unsafe {
136- let temp_domain_string = TEMP_DOMAIN_STRING . get_mut ( ) . unwrap_unchecked ( ) ;
137- temp_domain_string. clear ( ) ;
138- temp_domain_string. push_str ( domain. to_str ( ) . unwrap ( ) ) ;
139- temp_domain_string. make_ascii_lowercase ( ) ;
140-
141- temp_domain_string
131+ let mut domain_string = unsafe {
132+ DomainString :: from_str_unchecked ( domain. to_string_lossy ( ) . as_ref ( ) )
142133 } ;
134+ domain_string. make_ascii_lowercase ( ) ;
143135
144- let ( suffix_part, domain_part, subdomain_part) = self . parse_domain_parts ( domain_string) ?;
136+ let ( suffix_part, domain_part, subdomain_part) = self . parse_domain_parts ( domain_string. as_str ( ) ) ?;
145137
146138 unsafe {
147139 let dict = pyo3:: ffi:: PyDict_New ( ) ;
148140 for ( fraction_key, fraction) in [
149- ( SUFFIX_STRING , suffix_part) ,
150- ( DOMAIN_STRING , domain_part) ,
151- ( SUBDOMAIN_STRING , subdomain_part) ,
141+ ( intern ! ( py , "suffix" ) . into_ptr ( ) , suffix_part) ,
142+ ( intern ! ( py , "domain" ) . into_ptr ( ) , domain_part) ,
143+ ( intern ! ( py , "subdomain" ) . into_ptr ( ) , subdomain_part) ,
152144 ] {
153145 if !fraction. is_empty ( ) {
154146 let substr = pyo3:: ffi:: PyUnicode_FromStringAndSize (
@@ -166,12 +158,12 @@ impl DomainExtractor {
166158 pyo3:: ffi:: PyDict_SetItem (
167159 dict,
168160 fraction_key,
169- EMPTY_STRING ,
161+ intern ! ( py , "" ) . into_ptr ( ) ,
170162 ) ;
171163 }
172164 }
173165
174- Ok ( dict)
166+ Ok ( pyo3 :: PyObject :: from_owned_ptr ( py , dict) )
175167 }
176168 }
177169
@@ -184,12 +176,8 @@ impl DomainExtractor {
184176 return false ;
185177 }
186178
187- let domain_string = unsafe {
188- let temp_domain_string = TEMP_DOMAIN_STRING . get_mut ( ) . unwrap_unchecked ( ) ;
189- temp_domain_string. clear ( ) ;
190- temp_domain_string. push_str ( domain. to_str ( ) . unwrap ( ) ) ;
191-
192- temp_domain_string
179+ let mut domain_string = unsafe {
180+ DomainString :: from_str_unchecked ( domain. to_string_lossy ( ) . as_ref ( ) )
193181 } ;
194182
195183 for fraction in domain_string. split ( '.' ) {
@@ -208,15 +196,15 @@ impl DomainExtractor {
208196 }
209197
210198 domain_string. make_ascii_lowercase ( ) ;
211- if let Ok ( ( suffix_part, domain_part, _subdomain_part) ) = self . parse_domain_parts ( domain_string) {
199+ if let Ok ( ( suffix_part, domain_part, _subdomain_part) ) = self . parse_domain_parts ( domain_string. as_str ( ) ) {
212200 if suffix_part. is_empty ( ) || domain_part. is_empty ( ) {
213201 return false ;
214202 }
215203
216- if idna:: domain_to_ascii ( domain_string) . is_err ( ) {
204+ if idna:: domain_to_ascii ( domain_string. as_str ( ) ) . is_err ( ) {
217205 return false ;
218206 }
219- if idna:: domain_to_unicode ( domain_string) . 1 . is_err ( ) {
207+ if idna:: domain_to_unicode ( domain_string. as_str ( ) ) . 1 . is_err ( ) {
220208 return false ;
221209 }
222210
@@ -234,8 +222,9 @@ impl DomainExtractor {
234222
235223 fn extract_from_url (
236224 & self ,
225+ py : Python ,
237226 url : & PyString ,
238- ) -> PyResult < * mut pyo3 :: ffi :: PyObject > {
227+ ) -> PyResult < PyObject > {
239228 let mut url_str = url. to_str ( ) . unwrap ( ) ;
240229
241230 match memchr:: memmem:: find ( url_str. as_bytes ( ) , b"//" ) {
@@ -265,23 +254,19 @@ impl DomainExtractor {
265254 ) ;
266255 }
267256
268- let domain_string = unsafe {
269- let temp_domain_string = TEMP_DOMAIN_STRING . get_mut ( ) . unwrap_unchecked ( ) ;
270- temp_domain_string. clear ( ) ;
271- temp_domain_string. push_str ( url_str) ;
272- temp_domain_string. make_ascii_lowercase ( ) ;
273-
274- temp_domain_string
257+ let mut domain_string = unsafe {
258+ DomainString :: from_str_unchecked ( url_str)
275259 } ;
260+ domain_string. make_ascii_lowercase ( ) ;
276261
277262 let ( suffix_part, domain_part, subdomain_part) = self . parse_domain_parts ( domain_string. as_str ( ) ) ?;
278263
279264 unsafe {
280265 let dict = pyo3:: ffi:: PyDict_New ( ) ;
281266 for ( fraction_key, fraction) in [
282- ( SUFFIX_STRING , suffix_part) ,
283- ( DOMAIN_STRING , domain_part) ,
284- ( SUBDOMAIN_STRING , subdomain_part) ,
267+ ( intern ! ( py , "suffix" ) . into_ptr ( ) , suffix_part) ,
268+ ( intern ! ( py , "domain" ) . into_ptr ( ) , domain_part) ,
269+ ( intern ! ( py , "subdomain" ) . into_ptr ( ) , subdomain_part) ,
285270 ] {
286271 if !fraction. is_empty ( ) {
287272 let substr = pyo3:: ffi:: PyUnicode_FromStringAndSize (
@@ -299,12 +284,12 @@ impl DomainExtractor {
299284 pyo3:: ffi:: PyDict_SetItem (
300285 dict,
301286 fraction_key,
302- EMPTY_STRING ,
287+ intern ! ( py , "" ) . into_ptr ( ) ,
303288 ) ;
304289 }
305290 }
306291
307- Ok ( dict)
292+ Ok ( pyo3 :: PyObject :: from_owned_ptr ( py , dict) )
308293 }
309294 }
310295}
@@ -363,26 +348,9 @@ fn parse_suffix_list(
363348
364349#[ pymodule]
365350fn pydomainextractor (
366- py : Python ,
351+ _py : Python ,
367352 m : & PyModule ,
368353) -> PyResult < ( ) > {
369- unsafe {
370- EMPTY_STRING = pyo3:: ffi:: PyUnicode_New ( 0 , 127 ) ;
371- SUFFIX_STRING = pyo3:: ffi:: PyUnicode_FromStringAndSize (
372- "suffix" . as_ptr ( ) as * const i8 ,
373- "suffix" . len ( ) as isize ,
374- ) ;
375- DOMAIN_STRING = pyo3:: ffi:: PyUnicode_FromStringAndSize (
376- "domain" . as_ptr ( ) as * const i8 ,
377- "domain" . len ( ) as isize ,
378- ) ;
379- SUBDOMAIN_STRING = pyo3:: ffi:: PyUnicode_FromStringAndSize (
380- "subdomain" . as_ptr ( ) as * const i8 ,
381- "subdomain" . len ( ) as isize ,
382- ) ;
383- TEMP_DOMAIN_STRING . set ( py, String :: with_capacity ( 1024 ) ) . unwrap ( ) ;
384- }
385-
386354 m. add_class :: < DomainExtractor > ( ) ?;
387355 Ok ( ( ) )
388356}
0 commit comments