diff --git a/Cargo.toml b/Cargo.toml index 6d386bd..a8504b2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] edition = "2021" name = "spider_rs" -version = "0.0.5" +version = "0.0.6" description = "The fastest web crawler written in Rust ported to nodejs." repository = "https://github.com/spider-rs/spider-nodejs" diff --git a/README.md b/README.md index 30bb7bb..4661c0b 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ import asyncio from spider_rs import Website async def main(): - website = Website("https://choosealicense.com", False) + website = Website("https://choosealicense.com", False).with_headers({ "authorization": "myjwttoken" }) website.crawl() print(website.get_links()) diff --git a/bench/scrappy.py b/bench/scrappy.py index 20bf943..22ab138 100644 --- a/bench/scrappy.py +++ b/bench/scrappy.py @@ -9,7 +9,6 @@ class MySpider(CrawlSpider): allowed_domains = ['rsseau.fr'] start_urls = ['https://rsseau.fr'] links = [] - rules = ( Rule(LinkExtractor(), callback='parse_item', follow=True), ) diff --git a/examples/website.py b/examples/website.py index 0ceca56..dc9e577 100644 --- a/examples/website.py +++ b/examples/website.py @@ -3,7 +3,7 @@ from spider_rs import Website async def main(): - website = Website("https://choosealicense.com", False) + website = Website("https://choosealicense.com", False).with_headers({ "authorization": "myjwttoken"}) website.crawl() print(website.get_links()) diff --git a/src/website.rs b/src/website.rs index 0b0d7a0..c2d31c0 100644 --- a/src/website.rs +++ b/src/website.rs @@ -527,34 +527,27 @@ impl Website { mut slf: PyRefMut<'_, Self>, headers: Option, ) -> PyRefMut<'_, Self> { + use pyo3::types::PyDict; use std::str::FromStr; - match headers { Some(obj) => { let mut h = spider::reqwest::header::HeaderMap::new(); + let py = slf.py(); + let dict = obj.downcast::(py); - match obj.as_ref(slf.py()).iter() { + match dict { Ok(keys) => { for key in keys.into_iter() { - match key { - Ok(k) => { - let key_name = k.to_string(); - let header_key = spider::reqwest::header::HeaderName::from_str(&key_name); - - match header_key { - Ok(hn) => match k.get_item(key_name) { - Ok(he) => { - let header_value = he.to_string(); - - match spider::reqwest::header::HeaderValue::from_str(&header_value) { - Ok(hk) => { - h.append(hn, hk); - } - _ => (), - } - } - _ => (), - }, + let header_key = spider::reqwest::header::HeaderName::from_str(&key.0.to_string()); + + match header_key { + Ok(hn) => { + let header_value = key.1.to_string(); + + match spider::reqwest::header::HeaderValue::from_str(&header_value) { + Ok(hk) => { + h.append(hn, hk); + } _ => (), } }