Skip to content

Commit

Permalink
chore(crate): remove unused
Browse files Browse the repository at this point in the history
  • Loading branch information
j-mendez committed Dec 27, 2023
1 parent 92d7d06 commit 14208cb
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 53 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
edition = "2021"
name = "spider_rs"
version = "0.0.12"
version = "0.0.13"
description = "The fastest web crawler written in Rust ported to nodejs."
repository = "https://github.com/spider-rs/spider-nodejs"

Expand Down
53 changes: 1 addition & 52 deletions src/website.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use spider::tokio::task::JoinHandle;
use spider::utils::shutdown;
use std::time::Duration;

/// a website holding the inner spider::website::Website from Rust fit for python.
/// A website holding the inner spider::website::Website from Rust fit for python.
#[pyclass]
pub struct Website {
/// the website from spider.
Expand All @@ -18,15 +18,8 @@ pub struct Website {
crawl_handles: IndexMap<u32, JoinHandle<()>>,
/// do not convert content to UT8.
raw_content: bool,
/// the data collected.
collected_data: Box<Vec<u8>>,
/// is the crawl running in the background.
running_in_background: bool, // /// the file handle for storing data
// file_handle: Option<spider::tokio::fs::File>,
}

struct PageEvent {
pub page: NPage,
}

#[pymethods]
Expand All @@ -39,7 +32,6 @@ impl Website {
subscription_handles: IndexMap::new(),
crawl_handles: IndexMap::new(),
raw_content: raw_content.unwrap_or_default(),
collected_data: Box::new(Vec::new()),
running_in_background: false, // file_handle: None,
}
}
Expand All @@ -49,49 +41,6 @@ impl Website {
self.inner.get_status().to_string()
}

// /// store data to memory for disk storing. This will create the path if not exist and defaults to ./storage.
// pub async fn export_jsonl_data(&self, export_path: Option<String>) -> std::io::Result<()> {
// use spider::tokio::io::AsyncWriteExt;
// let file = match export_path {
// Some(p) => {
// let base_dir = p
// .split("/")
// .into_iter()
// .map(|f| {
// if f.contains(".") {
// "".to_string()
// } else {
// f.to_string()
// }
// })
// .collect::<String>();

// spider::tokio::fs::create_dir_all(&base_dir).await?;

// if !p.contains(".") {
// p + ".jsonl"
// } else {
// p
// }
// }
// _ => {
// spider::tokio::fs::create_dir_all("./storage").await?;
// "./storage/".to_owned()
// + &self
// .inner
// .get_domain()
// .inner()
// .replace("http://", "")
// .replace("https://", "")
// + "jsonl"
// }
// };
// let mut file = spider::tokio::fs::File::create(file).await?;
// // transform data step needed to auto convert type ..
// file.write_all(&self.collected_data).await?;
// Ok(())
// }

/// subscribe and add an event listener.
pub fn subscribe(mut slf: PyRefMut<'_, Self>, on_page_event: PyObject) -> u32 {
let mut rx2 = slf
Expand Down

0 comments on commit 14208cb

Please sign in to comment.