Skip to content

Commit

Permalink
feat: Add scraper and scihub-scraper dependencies
Browse files Browse the repository at this point in the history
  • Loading branch information
Kremilly committed Jun 20, 2024
1 parent c63d498 commit 826f027
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 3 deletions.
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,5 @@ lopdf = "0.32.0"
sha2 = "0.10.8"
zip = "2.1.0"
walkdir = "2.5.0"
scraper = "0.19.0"
scihub-scraper = "0.5.2"
2 changes: 1 addition & 1 deletion src/consts/uris.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ impl Uris {

// Scimon
pub const SCIMON_API_REQUEST: &'static str = "http://localhost/Scimon/api/";
pub const SCIHUB_ADDONS_ENDPOINT: &'static str = "https://addons.scibun.com/scihub?paper=";
// pub const SCIHUB_ADDONS_ENDPOINT: &'static str = "https://addons.scibun.com/scihub?paper=";
pub const SCIMON_SCRAPE_API_ENPOINT: &'static str = "https://addons.scibun.com/scrape?url=";

pub const README_TEMPLATE_LINK: &'static str = "https://template.scibun.com/";
Expand Down
10 changes: 8 additions & 2 deletions src/system/providers.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
use std::error::Error;

use scihub_scraper::SciHubScraper;

use crate::{
consts::uris::Uris,
addons::wikipedia::Wikipedia,

utils::{
url::UrlMisc,
domains::Domains,
remote::Remote,
domains::Domains,
},
};

Expand Down Expand Up @@ -67,8 +69,12 @@ impl Providers {
}

pub async fn scihub(url: &str) -> Result<(String, String), Box<dyn Error>> {
let mut scraper = SciHubScraper::new();

let paper = Self::extract_doi(url);
let paper_url = format!("{}{}", Uris::SCIHUB_ADDONS_ENDPOINT, paper);
let paper = scraper.fetch_paper_pdf_url_by_doi(&paper).await?;

let paper_url = paper.to_string();
let filename = Remote::get_filename(&paper_url, true).await?;

Ok((paper_url, filename))
Expand Down

0 comments on commit 826f027

Please sign in to comment.