Skip to content

Commit

Permalink
Merge pull request #32 from eredotpkfr/refactors
Browse files Browse the repository at this point in the history
Use Static Dispatching On Modules
  • Loading branch information
eredotpkfr authored Oct 3, 2024
2 parents 1026c9b + a463750 commit 45b3ba3
Show file tree
Hide file tree
Showing 50 changed files with 711 additions and 451 deletions.
2 changes: 2 additions & 0 deletions .env.template
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@ SUBSCAN_BINARYEDGE_APIKEY=bar
SUBSCAN_BUFFEROVER_APIKEY=baz
SUBSCAN_BUILTWITH_APIKEY=foo
SUBSCAN_CENSYS_APIKEY=bar
SUBSCAN_CERTSPOTTER_APIKEY=baz
SUBSCAN_CHAOS_APIKEY=foo
10 changes: 5 additions & 5 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@ readme = "README.md"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
clap = { version = "4.5.17", features = ["derive"] }
clap = { version = "4.5.19", features = ["derive"] }
tokio = { version = "1.40.0", features = ["full"] }
headless_chrome = { version = "1.0.14", features = ["fetch"] }
reqwest = "0.12.7"
headless_chrome = { version = "1.0.15", features = ["fetch"] }
reqwest = "0.12.8"
scraper = "0.20.0"
regex = "1.10.6"
regex = "1.11.0"
serde_json = "1.0.128"
async-trait = "0.1.82"
async-trait = "0.1.83"
lazy_static = "1.5.0"
enum_dispatch = "0.3.13"
dotenvy = "0.15.7"
Expand Down
4 changes: 2 additions & 2 deletions src/bin/subscan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use clap::Parser;
use subscan::{
cache::{self, ALL_MODULES},
cli::Cli,
interfaces::requester::RequesterInterface,
interfaces::{module::SubscanModuleInterface, requester::RequesterInterface},
types::config::RequesterConfig,
};

Expand All @@ -17,7 +17,7 @@ async fn main() {
let mut module = item.lock().await;
let requester = module.requester().await.unwrap();

if module.name().await != "Censys" {
if module.name().await != "chaos" {
continue;
}

Expand Down
38 changes: 23 additions & 15 deletions src/cache.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
use crate::{
enums::SubscanModuleDispatcher,
modules::{
engines::{bing, duckduckgo, google, yahoo},
integrations::{alienvault, anubis, bevigil, binaryedge, bufferover, builtwith, censys},
integrations::{
alienvault, anubis, bevigil, binaryedge, bufferover, builtwith, censys, certspotter,
chaos,
},
},
SubscanModule,
};
use lazy_static::lazy_static;
use tokio::sync::Mutex;
Expand All @@ -12,26 +15,31 @@ lazy_static! {
/// All `subscan` modules are stores in this in-memory [`Vec`]
/// as a [`SubscanModule`], all modules must be compatible
/// with [`SubscanModuleInterface`](crate::interfaces::module::SubscanModuleInterface) trait
pub static ref ALL_MODULES: Vec<Mutex<SubscanModule>> = vec![
pub static ref ALL_MODULES: Vec<Mutex<SubscanModuleDispatcher>> = vec![
// Search engines
SubscanModule::new(google::Google::new()),
SubscanModule::new(yahoo::Yahoo::new()),
SubscanModule::new(bing::Bing::new()),
SubscanModule::new(duckduckgo::DuckDuckGo::new()),
Mutex::new(google::Google::dispatcher()),
Mutex::new(yahoo::Yahoo::dispatcher()),
Mutex::new(bing::Bing::dispatcher()),
Mutex::new(duckduckgo::DuckDuckGo::dispatcher()),
// API integrations
SubscanModule::new(alienvault::AlienVault::new()),
SubscanModule::new(anubis::Anubis::new()),
SubscanModule::new(bevigil::Bevigil::new()),
SubscanModule::new(binaryedge::Binaryedge::new()),
SubscanModule::new(bufferover::Bufferover::new()),
SubscanModule::new(builtwith::Builtwith::new()),
SubscanModule::new(censys::Censys::new()),
Mutex::new(alienvault::AlienVault::dispatcher()),
Mutex::new(anubis::Anubis::dispatcher()),
Mutex::new(bevigil::Bevigil::dispatcher()),
Mutex::new(binaryedge::BinaryEdge::dispatcher()),
Mutex::new(bufferover::BufferOver::dispatcher()),
Mutex::new(builtwith::BuiltWith::dispatcher()),
Mutex::new(censys::Censys::dispatcher()),
Mutex::new(certspotter::CertSpotter::dispatcher()),
Mutex::new(chaos::Chaos::dispatcher()),
];
}

/// Module to manage modules that already cached in-memory cache
pub mod modules {
use crate::{interfaces::requester::RequesterInterface, types::config::RequesterConfig};
use crate::{
interfaces::{module::SubscanModuleInterface, requester::RequesterInterface},
types::config::RequesterConfig,
};

/// Configure all modules requester objects that has any requester
///
Expand Down
37 changes: 31 additions & 6 deletions src/enums.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,35 @@
use crate::extractors::{html::HTMLExtractor, json::JSONExtractor, regex::RegexExtractor};
use crate::modules::generics::api_integration::GenericAPIIntegrationModule;
use crate::modules::generics::search_engine::GenericSearchEngineModule;
use crate::requesters::{chrome::ChromeBrowser, client::HTTPClient};
use enum_dispatch::enum_dispatch;

/// Dispatcher enumeration to decide module types
///
/// It allows to made static type dispatching instead of
/// dynamic dispatch and speed up performance. For more
/// technical details please follow up `enum_dispatch` package
///
/// Each `Subscan` module that compatible with [`SubscanModuleInterface`](crate::interfaces::module::SubscanModuleInterface)
/// must be appeared in this dispatcher as a any enum variant c/o `enum_dispatch`
/// macro creates a sub method for [`SubscanModuleInterface`](crate::interfaces::module::SubscanModuleInterface)
/// methods and matches all of these variants with their methods
///
/// When you call any method from [`SubscanModuleDispatcher`] object this mappings
/// should be implemented otherwise you cannot access these methods like
/// `.name(`, `.requester(`, `.run(`, etc.
#[enum_dispatch(SubscanModuleInterface)]
pub enum SubscanModuleDispatcher {
/// Enum variant of generic API integrations. It can be used for all
/// generic API modules at the same time, for this only requirement
/// is the module should be implemented as a [`GenericAPIIntegrationModule`]
GenericAPIIntegrationModule(GenericAPIIntegrationModule),
/// Also another generic variant for search engines, It can be used for
/// all generic search engine modules at the same time. Just modules should be
/// implemented as a [`GenericSearchEngineModule`]
GenericSearchEngineModule(GenericSearchEngineModule),
}

/// Dispatcher enumeration to decide extractor types
///
/// It allows to made static type dispatching instead of
Expand Down Expand Up @@ -52,9 +80,9 @@ pub enum RequesterDispatcher {
}

/// Authentication methods for API calls.
/// [`GenericAPIIntegrationModule`](crate::modules::generics::api_integration::GenericAPIIntegrationModule)
/// uses them to apply correct auth method. See the
/// method descriptions to learn how it works
/// [`GenericAPIIntegrationModule`] uses them to apply
/// correct auth method. See the method descriptions to
/// learn how it works
#[derive(PartialEq)]
pub enum APIAuthMethod {
/// Some APIs uses request headers to get
Expand All @@ -66,9 +94,6 @@ pub enum APIAuthMethod {
/// will be added in URL as a query param with given
/// parameter key
APIKeyAsQueryParam(String),
/// This auth method checks if the query URL includes
/// API key's self
APIKeyAsURLSlug,
/// This auth type does nothing for auth
NoAuth,
}
Expand Down
5 changes: 3 additions & 2 deletions src/extractors/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ use async_trait::async_trait;
use serde_json;
use std::collections::BTreeSet;

/// JSON content parser wrapper struct. This object compatible
/// with [`SubdomainExtractorInterface`] and it uses `extract`
/// JSON content parser wrapper struct
///
/// This object compatible with [`SubdomainExtractorInterface`] and it uses `extract`
/// method to extract subdomain addresses from JSON content.
/// JSON parsing function must be given for this extractor. Please
/// follow up examples to learn usage techniques
Expand Down
2 changes: 1 addition & 1 deletion src/interfaces/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/// Extractor trait definition
pub mod extractor;
/// Subscan module trait definition
/// `Subscan` module trait definition
pub mod module;
/// HTTP requester trait definition
pub mod requester;
5 changes: 4 additions & 1 deletion src/interfaces/module.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
use crate::{
enums::{RequesterDispatcher, SubdomainExtractorDispatcher},
enums::{RequesterDispatcher, SubdomainExtractorDispatcher, SubscanModuleDispatcher},
modules::generics::{
api_integration::GenericAPIIntegrationModule, search_engine::GenericSearchEngineModule,
},
types::core::APIKeyAsEnv,
utils::env,
};
Expand Down
89 changes: 0 additions & 89 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,92 +20,3 @@ pub mod requesters;
pub mod types;
/// Utilities for the handle different stuff things
pub mod utils;

use enums::{RequesterDispatcher, SubdomainExtractorDispatcher};
use interfaces::module::SubscanModuleInterface;
use std::collections::BTreeSet;
use tokio::sync::Mutex;
use types::core::APIKeyAsEnv;

/// Wrapper around a [`SubscanModuleInterface`] trait object
///
/// It has single field that stores [`SubscanModuleInterface`]
/// compatible object. Allows to access inner object's every
/// implemented method by using dynamic dispatching method
/// during run-time
///
/// Please follow up the [`struct@crate::cache::ALL_MODULES`]
/// to see pre-defined `subscan` modules
pub struct SubscanModule(Box<dyn SubscanModuleInterface>);

impl SubscanModule {
/// Create new [`SubscanModule`] instance wrapped with a [`Mutex`]
///
/// # Examples
///
/// ```
/// use std::collections::BTreeSet;
/// use tokio::sync::Mutex;
/// use subscan::SubscanModule;
/// use subscan::interfaces::module::SubscanModuleInterface;
/// use subscan::enums::{RequesterDispatcher, SubdomainExtractorDispatcher};
/// use async_trait::async_trait;
///
/// #[derive(Clone)]
/// pub struct FooModule {}
///
/// #[async_trait(?Send)]
/// impl SubscanModuleInterface for FooModule {
/// async fn name(&self) -> &str {
/// &"foo-module"
/// }
///
/// async fn requester(&self) -> Option<&Mutex<RequesterDispatcher>> {
/// None
/// }
///
/// async fn extractor(&self) -> Option<&SubdomainExtractorDispatcher> {
/// None
/// }
///
/// async fn run(&mut self, domain: String) -> BTreeSet<String> {
/// // do something in `run` method
/// BTreeSet::new()
/// }
/// }
///
/// #[tokio::main]
/// async fn main() {
/// let module = FooModule {};
/// let wrapped = SubscanModule::new(module.clone());
///
/// assert_eq!(wrapped.lock().await.name().await, module.name().await);
///
/// assert!(wrapped.lock().await.requester().await.is_none());
/// assert!(wrapped.lock().await.extractor().await.is_none());
/// }
/// ```
pub fn new<M: 'static + SubscanModuleInterface>(module: M) -> Mutex<Self> {
Mutex::new(Self(Box::new(module)))
}

pub async fn requester(&self) -> Option<&Mutex<RequesterDispatcher>> {
self.0.requester().await
}

pub async fn extractor(&self) -> Option<&SubdomainExtractorDispatcher> {
self.0.extractor().await
}

pub async fn run(&mut self, domain: String) -> BTreeSet<String> {
self.0.run(domain).await
}

pub async fn name(&self) -> &str {
self.0.name().await
}

pub async fn fetch_apikey(&self) -> APIKeyAsEnv {
self.0.fetch_apikey().await
}
}
22 changes: 13 additions & 9 deletions src/modules/engines/bing.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
use crate::{
enums::RequesterDispatcher, extractors::html::HTMLExtractor,
modules::generics::search_engine::GenericSearchEngineModule, requesters::client::HTTPClient,
enums::{RequesterDispatcher, SubscanModuleDispatcher},
extractors::html::HTMLExtractor,
modules::generics::search_engine::GenericSearchEngineModule,
requesters::client::HTTPClient,
};
use reqwest::Url;

pub const BING_MODULE_NAME: &str = "Bing";
pub const BING_MODULE_NAME: &str = "bing";
pub const BING_SEARCH_URL: &str = "https://www.bing.com/search";
pub const BING_SEARCH_PARAM: &str = "q";
pub const BING_CITE_TAG: &str = "cite";
Expand All @@ -16,25 +18,27 @@ pub const BING_CITE_TAG: &str = "cite";
///
/// | Property | Value |
/// |:------------------:|:-----------------------------:|
/// | Module Name | `Bing` |
/// | Module Name | `bing` |
/// | Search URL | <https://www.bing.com/search> |
/// | Search Param | `q` |
/// | Subdomain Selector | `cite` |
pub struct Bing {}

impl Bing {
#[allow(clippy::new_ret_no_self)]
pub fn new() -> GenericSearchEngineModule {
pub fn dispatcher() -> SubscanModuleDispatcher {
let url = Url::parse(BING_SEARCH_URL);

let extractor: HTMLExtractor = HTMLExtractor::new(BING_CITE_TAG.into(), vec![]);
let requester: RequesterDispatcher = HTTPClient::default().into();
let url = Url::parse(BING_SEARCH_URL);

GenericSearchEngineModule {
let generic = GenericSearchEngineModule {
name: BING_MODULE_NAME.into(),
param: BING_SEARCH_PARAM.into(),
url: url.unwrap(),
requester: requester.into(),
extractor: extractor.into(),
}
};

generic.into()
}
}
22 changes: 13 additions & 9 deletions src/modules/engines/duckduckgo.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
use crate::{
enums::RequesterDispatcher, extractors::html::HTMLExtractor,
modules::generics::search_engine::GenericSearchEngineModule, requesters::chrome::ChromeBrowser,
enums::{RequesterDispatcher, SubscanModuleDispatcher},
extractors::html::HTMLExtractor,
modules::generics::search_engine::GenericSearchEngineModule,
requesters::chrome::ChromeBrowser,
};
use reqwest::Url;

pub const DUCKDUCKGO_MODULE_NAME: &str = "DuckDuckGo";
pub const DUCKDUCKGO_MODULE_NAME: &str = "duckduckgo";
pub const DUCKDUCKGO_SEARCH_URL: &str = "https://duckduckgo.com";
pub const DUCKDUCKGO_SEARCH_PARAM: &str = "q";
pub const DUCKDUCKGO_CITE_TAG: &str = "article > div > div > a > span:first-child";
Expand All @@ -16,25 +18,27 @@ pub const DUCKDUCKGO_CITE_TAG: &str = "article > div > div > a > span:first-chil
///
/// | Property | Value |
/// |:------------------:|:--------------------------------------------:|
/// | Module Name | `DuckDuckGo` |
/// | Module Name | `duckduckgo` |
/// | Search URL | <https://duckduckgo.com> |
/// | Search Param | `q` |
/// | Subdomain Selector | `article > div > div > a > span:first-child` |
pub struct DuckDuckGo {}

impl DuckDuckGo {
#[allow(clippy::new_ret_no_self)]
pub fn new() -> GenericSearchEngineModule {
pub fn dispatcher() -> SubscanModuleDispatcher {
let url = Url::parse(DUCKDUCKGO_SEARCH_URL);

let extractor: HTMLExtractor = HTMLExtractor::new(DUCKDUCKGO_CITE_TAG.into(), vec![]);
let requester: RequesterDispatcher = ChromeBrowser::default().into();
let url = Url::parse(DUCKDUCKGO_SEARCH_URL);

GenericSearchEngineModule {
let generic = GenericSearchEngineModule {
name: DUCKDUCKGO_MODULE_NAME.into(),
param: DUCKDUCKGO_SEARCH_PARAM.into(),
url: url.unwrap(),
requester: requester.into(),
extractor: extractor.into(),
}
};

generic.into()
}
}
Loading

0 comments on commit 45b3ba3

Please sign in to comment.