Skip to content

Commit

Permalink
chore(pyo3): update [email protected]
Browse files Browse the repository at this point in the history
  • Loading branch information
j-mendez committed Jan 27, 2025
1 parent 7a9fb2d commit cca384b
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 43 deletions.
6 changes: 3 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
edition = "2021"
name = "spider_rs"
version = "0.0.53"
version = "0.0.54"
repository = "https://github.com/spider-rs/spider-py"
license = "MIT"
description = "The fastest web crawler and indexer."
Expand All @@ -13,8 +13,8 @@ crate-type = ["cdylib"]
indexmap = "2"
num_cpus = "1"
spider = { version = "2", features = ["cron", "regex", "cookies", "socks", "chrome", "control", "smart", "chrome_intercept", "cache", "serde", "openai", "headers" ] }
pyo3 = { version = "0.20.3", features = ["extension-module", "serde"] }
pyo3-asyncio = { version = "0.20", features = ["attributes", "tokio-runtime"] }
pyo3 = { version = "0.23", features = ["extension-module", "serde"] }
pyo3-async-runtimes = { version = "0.23", features = ["attributes", "tokio-runtime"] }
serde_json = "1"

[target.x86_64-unknown-linux-gnu.dependencies]
Expand Down
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ pub use website::Website;

#[pyfunction]
fn crawl(py: Python, url: String, raw_content: Option<bool>) -> PyResult<&PyAny> {
pyo3_asyncio::tokio::future_into_py(py, async move {
pyo3_async_runtimes::tokio::future_into_py(py, async move {
let w = shortcut::crawl(url, raw_content).await;

Ok(w)
Expand Down
6 changes: 3 additions & 3 deletions src/page.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ pub struct Page {
selectors: Option<(
CompactString,
spider::smallvec::SmallVec<[CompactString; 2]>,
CompactString
CompactString,
)>,
/// the url for the page
pub url: String,
Expand Down Expand Up @@ -77,7 +77,7 @@ impl Page {
client
};
}
let s = pyo3_asyncio::tokio::get_runtime()
let s = pyo3_async_runtimes::tokio::get_runtime()
.block_on(async move {
let page = spider::page::Page::new_page(&slf.url, &PAGE_CLIENT).await;
slf.status_code = page.status_code.into();
Expand All @@ -99,7 +99,7 @@ impl Page {
match &slf.selectors {
Some(selectors) => match &slf.inner {
Some(inner) => {
let links = pyo3_asyncio::tokio::get_runtime()
let links = pyo3_async_runtimes::tokio::get_runtime()
.block_on(async move {
let links = inner.links(&selectors).await;
Ok::<spider::hashbrown::HashSet<spider::CaseInsensitiveString>, ()>(links)
Expand Down
63 changes: 27 additions & 36 deletions src/website.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ impl Website {
.expect("sync feature should be enabled");
let raw_content = slf.raw_content;

let handle = pyo3_asyncio::tokio::get_runtime().spawn(async move {
let handle = pyo3_async_runtimes::tokio::get_runtime().spawn(async move {
while let Ok(res) = rx2.recv().await {
let page = new_page(&res, raw_content);
Python::with_gil(|py| {
Expand Down Expand Up @@ -105,7 +105,7 @@ impl Website {
if slf.running_in_background {
let domain_name = slf.inner.get_url().inner().clone();

let _ = pyo3_asyncio::tokio::future_into_py(slf.py(), async move {
let _ = pyo3_async_runtimes::tokio::future_into_py(slf.py(), async move {
shutdown(&domain_name).await;
Ok(())
});
Expand Down Expand Up @@ -196,7 +196,7 @@ impl Website {
.expect("sync feature should be enabled");

let py: Python<'_> = slf.py();
let rt = pyo3_asyncio::tokio::get_runtime();
let rt = pyo3_async_runtimes::tokio::get_runtime();

let f1 = async {
while let Ok(res) = rx2.recv().await {
Expand Down Expand Up @@ -244,7 +244,7 @@ impl Website {

slf.crawl_handles.insert(crawl_id, crawl_handle);
} else {
let _ = pyo3_asyncio::tokio::get_runtime().block_on(async move {
let _ = pyo3_async_runtimes::tokio::get_runtime().block_on(async move {
if headless {
slf.inner.crawl().await;
} else {
Expand Down Expand Up @@ -311,7 +311,7 @@ impl Website {
.expect("sync feature should be enabled");

let py: Python<'_> = slf.py();
let rt = pyo3_asyncio::tokio::get_runtime();
let rt = pyo3_async_runtimes::tokio::get_runtime();

let f1 = async {
while let Ok(res) = rx2.recv().await {
Expand Down Expand Up @@ -351,7 +351,7 @@ impl Website {

slf.crawl_handles.insert(crawl_id, crawl_handle);
} else {
let _ = pyo3_asyncio::tokio::get_runtime().block_on(async move {
let _ = pyo3_async_runtimes::tokio::get_runtime().block_on(async move {
slf.inner.crawl_smart().await;
Ok::<(), ()>(())
});
Expand Down Expand Up @@ -421,7 +421,7 @@ impl Website {
.expect("sync feature should be enabled");

let py: Python<'_> = slf.py();
let rt = pyo3_asyncio::tokio::get_runtime();
let rt = pyo3_async_runtimes::tokio::get_runtime();

let f1 = async {
while let Ok(res) = rx2.recv().await {
Expand Down Expand Up @@ -469,7 +469,7 @@ impl Website {

slf.crawl_handles.insert(crawl_id, crawl_handle);
} else {
let _ = pyo3_asyncio::tokio::get_runtime().block_on(async move {
let _ = pyo3_async_runtimes::tokio::get_runtime().block_on(async move {
if headless {
slf.inner.scrape().await;
} else {
Expand Down Expand Up @@ -505,7 +505,7 @@ impl Website {
_ => None,
};

let inner = pyo3_asyncio::tokio::get_runtime()
let inner = pyo3_async_runtimes::tokio::get_runtime()
.block_on(async move {
let runner: spider::async_job::Runner = slf.inner.run_cron().await;
Ok::<spider::async_job::Runner, ()>(runner)
Expand Down Expand Up @@ -536,22 +536,16 @@ impl Website {
pub fn get_configuration_headers(&self) -> Vec<(String, String)> {
let mut map = Vec::new();

match self.inner.configuration.headers.as_ref() {
Some(h) => {
for v in h.iter() {
let mut value = String::new();
if let Some(h) = self.inner.configuration.headers.as_ref() {
for v in h.iter() {
let mut value = String::new();

match v.1.to_str() {
Ok(vv) => {
value.push_str(vv);
}
_ => (),
};

map.push((v.0.to_string(), value))
if let Ok(vv) = v.1.to_str() {
value.push_str(vv);
}

map.push((v.0.to_string(), value))
}
_ => (),
}

map
Expand All @@ -562,13 +556,10 @@ impl Website {
let mut pages: Vec<NPage> = Vec::new();
let raw_content = self.raw_content;

match self.inner.get_pages() {
Some(p) => {
for page in p.iter() {
pages.push(new_page(page, raw_content));
}
if let Some(p) = self.inner.get_pages() {
for page in p.iter() {
pages.push(new_page(page, raw_content));
}
_ => (),
}

pages
Expand Down Expand Up @@ -725,9 +716,7 @@ impl Website {
mut slf: PyRefMut<'_, Self>,
return_page_links: bool,
) -> PyRefMut<'_, Self> {
slf
.inner
.with_return_page_links(return_page_links);
slf.inner.with_return_page_links(return_page_links);
slf
}

Expand All @@ -738,7 +727,11 @@ impl Website {
) -> PyRefMut<'_, Self> {
slf
.inner
.with_chrome_connection(if chome_connection.is_empty() { None } else { Some (chome_connection)});
.with_chrome_connection(if chome_connection.is_empty() {
None
} else {
Some(chome_connection)
});
slf
}

Expand All @@ -747,9 +740,7 @@ impl Website {
mut slf: PyRefMut<'_, Self>,
preserve: bool,
) -> PyRefMut<'_, Self> {
slf
.inner
.with_preserve_host_header(preserve);
slf.inner.with_preserve_host_header(preserve);
slf
}

Expand Down Expand Up @@ -1008,7 +999,7 @@ impl Cron {
Some(h) => h.abort(),
_ => (),
};
let _ = pyo3_asyncio::tokio::get_runtime().block_on(async move {
let _ = pyo3_async_runtimes::tokio::get_runtime().block_on(async move {
slf.inner.stop().await;
Ok::<(), ()>(())
});
Expand Down

0 comments on commit cca384b

Please sign in to comment.