From 41b60546e884a45fec1f3c7d710b2dadeca3920b Mon Sep 17 00:00:00 2001 From: Erdogan Yoksul Date: Sat, 21 Sep 2024 01:03:24 +0300 Subject: [PATCH 01/38] feat: add generic api integration module and implement anubis and alienvault integrations --- src/bin/subscan.rs | 2 +- src/cache.rs | 3 + src/enums.rs | 3 +- src/extractors/html.rs | 2 +- src/extractors/json.rs | 67 ++++++++++++++ src/extractors/mod.rs | 2 + src/integrations/alienvault.rs | 51 ----------- src/integrations/anubis.rs | 48 ---------- src/integrations/mod.rs | 2 - src/interfaces/extractor.rs | 2 +- src/lib.rs | 2 - src/modules/engines/bing.rs | 2 +- src/modules/engines/duckduckgo.rs | 2 +- src/modules/engines/google.rs | 2 +- src/modules/engines/yahoo.rs | 2 +- src/modules/generics/api_integration.rs | 53 +++++++++++ src/modules/generics/mod.rs | 4 +- .../{searchengine.rs => search_engine.rs} | 2 +- src/modules/integrations/alienvault.rs | 91 +++++++++++++++++++ src/modules/integrations/anubis.rs | 90 ++++++++++++++++++ src/modules/integrations/mod.rs | 4 + src/modules/mod.rs | 2 + src/types/core.rs | 5 + tests/modules/common.rs | 4 +- 24 files changed, 332 insertions(+), 115 deletions(-) create mode 100644 src/extractors/json.rs delete mode 100644 src/integrations/alienvault.rs delete mode 100644 src/integrations/anubis.rs delete mode 100644 src/integrations/mod.rs create mode 100644 src/modules/generics/api_integration.rs rename src/modules/generics/{searchengine.rs => search_engine.rs} (98%) create mode 100644 src/modules/integrations/alienvault.rs create mode 100644 src/modules/integrations/anubis.rs create mode 100644 src/modules/integrations/mod.rs diff --git a/src/bin/subscan.rs b/src/bin/subscan.rs index fd5297b8..54a87ef4 100644 --- a/src/bin/subscan.rs +++ b/src/bin/subscan.rs @@ -23,7 +23,7 @@ async fn main() { requester ); - if module.name().await != "DuckDuckGo" { + if module.name().await != "Anubis" { continue; } diff --git a/src/cache.rs b/src/cache.rs index 76713f45..cc10d2bc 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -1,5 +1,6 @@ use crate::{ modules::engines::{bing, duckduckgo, google, yahoo}, + modules::integrations::{alienvault, anubis}, SubscanModule, }; use lazy_static::lazy_static; @@ -14,6 +15,8 @@ lazy_static! { SubscanModule::new(yahoo::Yahoo::new()), SubscanModule::new(bing::Bing::new()), SubscanModule::new(duckduckgo::DuckDuckGo::new()), + SubscanModule::new(alienvault::AlienVault::new()), + SubscanModule::new(anubis::Anubis::new()), ]; } diff --git a/src/enums.rs b/src/enums.rs index 23d74e82..eff98d1d 100644 --- a/src/enums.rs +++ b/src/enums.rs @@ -1,4 +1,4 @@ -use crate::extractors::{html::HTMLExtractor, regex::RegexExtractor}; +use crate::extractors::{html::HTMLExtractor, json::JSONExtractor, regex::RegexExtractor}; use crate::requesters::{chrome::ChromeBrowser, client::HTTPClient}; use enum_dispatch::enum_dispatch; @@ -11,6 +11,7 @@ use enum_dispatch::enum_dispatch; pub enum SubdomainExtractorDispatcher { HTMLExtractor(HTMLExtractor), RegexExtractor(RegexExtractor), + JSONExtractor(JSONExtractor), } /// Dispatcher enumeration to decide requester types diff --git a/src/extractors/html.rs b/src/extractors/html.rs index acdf5a2f..e18a403c 100644 --- a/src/extractors/html.rs +++ b/src/extractors/html.rs @@ -69,7 +69,7 @@ impl SubdomainExtractorInterface for HTMLExtractor { /// /// let result = extractor.extract(html, domain).await; /// - /// assert_eq!(result, BTreeSet::from([String::from("bar.foo.com")])); + /// assert_eq!(result, BTreeSet::from(["bar.foo.com".into()])); /// } /// ``` async fn extract(&self, content: String, domain: String) -> BTreeSet { diff --git a/src/extractors/json.rs b/src/extractors/json.rs new file mode 100644 index 00000000..71f6eb19 --- /dev/null +++ b/src/extractors/json.rs @@ -0,0 +1,67 @@ +use crate::interfaces::extractor::SubdomainExtractorInterface; +use crate::types::core::{InnerExtractMethod, Subdomain}; +use async_trait::async_trait; +use serde_json; +use std::collections::BTreeSet; + +/// JSON content parser wrapper struct +/// +/// This object compatible with [`SubdomainExtractorInterface`] +/// and it uses `extract` method to extract subdomain addresses +/// from JSON content. JSON parsing function must be given +/// for this extractor +pub struct JSONExtractor { + inner: InnerExtractMethod, +} + +#[async_trait] +impl SubdomainExtractorInterface for JSONExtractor { + /// Main extraction method to extract subdomains from + /// given JSON content + /// + /// # Examples + /// + /// ``` + /// use subscan::extractors::json::JSONExtractor; + /// use crate::subscan::interfaces::extractor::SubdomainExtractorInterface; + /// use std::collections::BTreeSet; + /// use serde_json::Value; + /// + /// #[tokio::main] + /// async fn main() { + /// let json = "{\"foo\": \"bar\"}".to_string(); + /// let domain = "foo.com".to_string(); + /// + /// let func = |item: Value| { + /// BTreeSet::from([item["foo"].as_str().unwrap().into()]) + /// }; + /// let extractor = JSONExtractor::new(Box::new(func)); + /// + /// let result = extractor.extract(json, domain).await; + /// + /// assert_eq!(result, BTreeSet::from(["bar".into()])); + /// } + /// ``` + async fn extract(&self, content: String, _domain: String) -> BTreeSet { + (self.inner)(serde_json::from_str(&content).unwrap_or_default()) + } +} + +impl JSONExtractor { + /// Creates a new [`JSONExtractor`] instance + /// + /// # Examples + /// + /// ```no_run + /// use subscan::extractors::json::JSONExtractor; + /// use std::collections::BTreeSet; + /// use serde_json::Value; + /// + /// let extractor = JSONExtractor::new(Box::new(move |_: Value| BTreeSet::default())); + /// + /// // do something with extractor instance + /// ``` + pub fn new(inner: InnerExtractMethod) -> Self { + Self { inner } + } +} diff --git a/src/extractors/mod.rs b/src/extractors/mod.rs index 050467bd..415ddd08 100644 --- a/src/extractors/mod.rs +++ b/src/extractors/mod.rs @@ -1,4 +1,6 @@ /// Subdomain extractor for HTML documents pub mod html; +/// JSON extractor to extract subdomains from JSON content +pub mod json; /// Extract subdomains with regex statement pub mod regex; diff --git a/src/integrations/alienvault.rs b/src/integrations/alienvault.rs deleted file mode 100644 index a88b3348..00000000 --- a/src/integrations/alienvault.rs +++ /dev/null @@ -1,51 +0,0 @@ -use reqwest::Client; -use serde_json::Value; -use std::collections::HashSet; - -const USER_AGENT: &str = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"; - -#[derive(Debug)] -pub struct AlienVault { - url: &'static str, - domain: String, - client: Client, -} - -impl AlienVault { - pub async fn new(domain: String) -> AlienVault { - AlienVault { - url: "https://otx.alienvault.com/api/v1/indicators/domain/", - domain, - client: Client::new(), - } - } - - pub async fn start(&self) { - let mut all_results: HashSet = HashSet::new(); - - let request = self - .client - .get(format!("{}{}{}", self.url, self.domain, "/passive_dns")) - .header("User-Agent", USER_AGENT) - .build() - .unwrap(); - - let response = self.client.execute(request).await.unwrap(); - - if response.status() != 200 { - return; - } - - let content = response.text().await.unwrap(); - let res: Value = serde_json::from_str(&content).unwrap(); - - if let Some(passives) = res["passive_dns"].as_array() { - all_results.extend( - passives - .iter() - .filter_map(|item| Some(item["hostname"].as_str()?.to_string())), - ); - } - println!("{:#?}\n{}", all_results, all_results.len()); - } -} diff --git a/src/integrations/anubis.rs b/src/integrations/anubis.rs deleted file mode 100644 index 71fd5b87..00000000 --- a/src/integrations/anubis.rs +++ /dev/null @@ -1,48 +0,0 @@ -use reqwest::Client; -use serde_json::Value; -use std::collections::HashSet; - -const USER_AGENT: &str = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"; - -#[derive(Debug)] -pub struct Anubis { - url: &'static str, - domain: String, - client: Client, -} - -impl Anubis { - pub async fn new(domain: String) -> Anubis { - Anubis { - url: "https://jonlu.ca/anubis/subdomains/", - domain, - client: Client::new(), - } - } - - pub async fn start(&self) { - let request = self - .client - .get(format!("{}{}", self.url, self.domain)) - .header("User-Agent", USER_AGENT) - .build() - .unwrap(); - - let response = self.client.execute(request).await.unwrap(); - - if response.status() != 200 && response.status() != 300 { - return; - } - - let content = response.text().await.unwrap(); - let res: Value = serde_json::from_str(&content).unwrap(); - - if let Some(subs) = res.as_array() { - let all_results: HashSet = subs - .iter() - .filter_map(|item| Some(item.as_str()?.to_string())) - .collect(); - println!("{:#?}\n{}", all_results, all_results.len()); - } - } -} diff --git a/src/integrations/mod.rs b/src/integrations/mod.rs deleted file mode 100644 index ab08776f..00000000 --- a/src/integrations/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub mod alienvault; -pub mod anubis; diff --git a/src/interfaces/extractor.rs b/src/interfaces/extractor.rs index ac16a461..cb110526 100644 --- a/src/interfaces/extractor.rs +++ b/src/interfaces/extractor.rs @@ -1,5 +1,5 @@ use crate::enums::SubdomainExtractorDispatcher; -use crate::extractors::{html::HTMLExtractor, regex::RegexExtractor}; +use crate::extractors::{html::HTMLExtractor, json::JSONExtractor, regex::RegexExtractor}; use crate::types::core::Subdomain; use async_trait::async_trait; use enum_dispatch::enum_dispatch; diff --git a/src/lib.rs b/src/lib.rs index 3235297a..257eecb2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,8 +7,6 @@ pub mod enums; /// Data extractors like /// [`extractors::regex`], [`extractors::html`], etc. pub mod extractors; -/// Thirty party integration modules -pub mod integrations; /// Trait implementations pub mod interfaces; /// All modules listed under this module, core components for subscan diff --git a/src/modules/engines/bing.rs b/src/modules/engines/bing.rs index d6c02d31..cd5cc976 100644 --- a/src/modules/engines/bing.rs +++ b/src/modules/engines/bing.rs @@ -1,6 +1,6 @@ use crate::{ enums::RequesterDispatcher, extractors::html::HTMLExtractor, - modules::generics::searchengine::GenericSearchEngineModule, requesters::client::HTTPClient, + modules::generics::search_engine::GenericSearchEngineModule, requesters::client::HTTPClient, }; use reqwest::Url; diff --git a/src/modules/engines/duckduckgo.rs b/src/modules/engines/duckduckgo.rs index 0b2145c1..f289c0fd 100644 --- a/src/modules/engines/duckduckgo.rs +++ b/src/modules/engines/duckduckgo.rs @@ -1,6 +1,6 @@ use crate::{ enums::RequesterDispatcher, extractors::html::HTMLExtractor, - modules::generics::searchengine::GenericSearchEngineModule, requesters::chrome::ChromeBrowser, + modules::generics::search_engine::GenericSearchEngineModule, requesters::chrome::ChromeBrowser, }; use reqwest::Url; diff --git a/src/modules/engines/google.rs b/src/modules/engines/google.rs index 8706a872..f3fc8a95 100644 --- a/src/modules/engines/google.rs +++ b/src/modules/engines/google.rs @@ -1,6 +1,6 @@ use crate::{ enums::RequesterDispatcher, extractors::html::HTMLExtractor, - modules::generics::searchengine::GenericSearchEngineModule, requesters::client::HTTPClient, + modules::generics::search_engine::GenericSearchEngineModule, requesters::client::HTTPClient, }; use reqwest::Url; diff --git a/src/modules/engines/yahoo.rs b/src/modules/engines/yahoo.rs index 22fa9aef..768b9b0a 100644 --- a/src/modules/engines/yahoo.rs +++ b/src/modules/engines/yahoo.rs @@ -1,6 +1,6 @@ use crate::{ enums::RequesterDispatcher, extractors::html::HTMLExtractor, - modules::generics::searchengine::GenericSearchEngineModule, requesters::client::HTTPClient, + modules::generics::search_engine::GenericSearchEngineModule, requesters::client::HTTPClient, }; use reqwest::Url; diff --git a/src/modules/generics/api_integration.rs b/src/modules/generics/api_integration.rs new file mode 100644 index 00000000..93f50429 --- /dev/null +++ b/src/modules/generics/api_integration.rs @@ -0,0 +1,53 @@ +use crate::enums::{RequesterDispatcher, SubdomainExtractorDispatcher}; +use crate::interfaces::extractor::SubdomainExtractorInterface; +use crate::interfaces::module::SubscanModuleInterface; +use crate::interfaces::requester::RequesterInterface; +use async_trait::async_trait; +use reqwest::Url; +use std::collections::BTreeSet; +use tokio::sync::Mutex; + +/// Generic API integration module +/// +/// Several modules uses this generic module +/// to make API calls and parsing JSON response +/// +/// It takes a extractor that compatible with +/// [`SubdomainExtractorInterface`], mostly +/// [`JSONExtractor`](crate::extractors::json::JSONExtractor) extractor +/// is used with this module to parse JSON contents +pub struct GenericAPIIntegrationModule { + /// Module name + pub name: String, + /// Simple function field that gets query URL + /// by given domain address + pub url: Box String + Sync + Send>, + /// Requester object instance for HTTP requests + pub requester: Mutex, + /// Any extractor object to extract subdomain from content + pub extractor: SubdomainExtractorDispatcher, +} + +#[async_trait(?Send)] +impl SubscanModuleInterface for GenericAPIIntegrationModule { + async fn name(&self) -> &str { + &self.name + } + + async fn requester(&self) -> Option<&Mutex> { + Some(&self.requester) + } + + async fn extractor(&self) -> Option<&SubdomainExtractorDispatcher> { + Some(&self.extractor) + } + + async fn run(&mut self, domain: String) -> BTreeSet { + let requester = self.requester.lock().await; + let url = Url::parse(&(self.url)(domain.clone())).unwrap(); + + let content = requester.get_content(url).await.unwrap_or_default(); + + self.extractor.extract(content, domain).await + } +} diff --git a/src/modules/generics/mod.rs b/src/modules/generics/mod.rs index e1c8301b..3434a861 100644 --- a/src/modules/generics/mod.rs +++ b/src/modules/generics/mod.rs @@ -1,2 +1,4 @@ +/// Generic API integration module +pub mod api_integration; /// Generic search engine module -pub mod searchengine; +pub mod search_engine; diff --git a/src/modules/generics/searchengine.rs b/src/modules/generics/search_engine.rs similarity index 98% rename from src/modules/generics/searchengine.rs rename to src/modules/generics/search_engine.rs index 3cba0df9..046efc63 100644 --- a/src/modules/generics/searchengine.rs +++ b/src/modules/generics/search_engine.rs @@ -63,7 +63,7 @@ impl GenericSearchEngineModule { /// # Examples /// /// ```no_run - /// use subscan::modules::generics::searchengine::GenericSearchEngineModule; + /// use subscan::modules::generics::search_engine::GenericSearchEngineModule; /// use subscan::types::query::SearchQueryParam; /// use subscan::extractors::regex::RegexExtractor; /// use subscan::requesters::client::HTTPClient; diff --git a/src/modules/integrations/alienvault.rs b/src/modules/integrations/alienvault.rs new file mode 100644 index 00000000..be2652ee --- /dev/null +++ b/src/modules/integrations/alienvault.rs @@ -0,0 +1,91 @@ +use std::collections::BTreeSet; + +use crate::{ + enums::RequesterDispatcher, extractors::json::JSONExtractor, + modules::generics::api_integration::GenericAPIIntegrationModule, + requesters::client::HTTPClient, types::core::Subdomain, +}; +use serde_json::Value; + +/// Alienvault API integration module +/// +/// It uses [`GenericAPIIntegrationModule`] its own inner +/// here are the configurations +pub struct AlienVault {} + +const ALIENVAULT_MODULE_NAME: &str = "AlienVault"; +const ALIENVAULT_URL: &str = "https://otx.alienvault.com/api/v1/indicators/domain/"; + +impl AlienVault { + /// Create a new [`AlienVault`] module instance + /// + /// # Examples + /// + /// ```no_run + /// use subscan::modules::integrations::alienvault; + /// + /// #[tokio::main] + /// async fn main() { + /// let alienvault = alienvault::AlienVault::new(); + /// + /// // do something with alienvault instance + /// } + /// ``` + #[allow(clippy::new_ret_no_self)] + pub fn new() -> GenericAPIIntegrationModule { + let requester: RequesterDispatcher = HTTPClient::default().into(); + let extractor: JSONExtractor = JSONExtractor::new(Box::new(Self::extract)); + + GenericAPIIntegrationModule { + name: ALIENVAULT_MODULE_NAME.into(), + url: Box::new(Self::get_query_url), + requester: requester.into(), + extractor: extractor.into(), + } + } + + /// Get Alienvault query URL from given domain address + /// + /// # Examples + /// + /// ``` + /// use subscan::modules::integrations::alienvault; + /// + /// #[tokio::main] + /// async fn main() { + /// let url = alienvault::AlienVault::get_query_url("foo.com".to_string()); + /// let expected = "https://otx.alienvault.com/api/v1/indicators/domain/foo.com/passive_dns"; + /// + /// assert_eq!(url, expected); + /// } + /// ``` + pub fn get_query_url(domain: String) -> String { + format!("{ALIENVAULT_URL}{domain}/passive_dns") + } + + /// JSON parse method to extract subdomains + /// + /// # Examples + /// + /// ``` + /// use subscan::modules::integrations::alienvault; + /// use std::collections::BTreeSet; + /// use serde_json::Value; + /// + /// #[tokio::main] + /// async fn main() { + /// let result = alienvault::AlienVault::extract(Value::default()); + /// + /// assert_eq!(result, BTreeSet::default()); + /// } + /// ``` + pub fn extract(content: Value) -> BTreeSet { + if let Some(passives) = content["passive_dns"].as_array() { + let filter = |item: &Value| Some(item["hostname"].as_str()?.to_string()); + + BTreeSet::from_iter(passives.iter().filter_map(filter)) + } else { + BTreeSet::default() + } + } +} diff --git a/src/modules/integrations/anubis.rs b/src/modules/integrations/anubis.rs new file mode 100644 index 00000000..52f67306 --- /dev/null +++ b/src/modules/integrations/anubis.rs @@ -0,0 +1,90 @@ +use crate::{ + enums::RequesterDispatcher, extractors::json::JSONExtractor, + modules::generics::api_integration::GenericAPIIntegrationModule, + requesters::client::HTTPClient, types::core::Subdomain, +}; +use serde_json::Value; +use std::collections::BTreeSet; + +/// Anubis API integration module +/// +/// It uses [`GenericAPIIntegrationModule`] its own inner +/// here are the configurations +pub struct Anubis {} + +const ANUBIS_MODULE_NAME: &str = "Anubis"; +const ANUBIS_URL: &str = "https://jonlu.ca/anubis/subdomains/"; + +impl Anubis { + /// Create a new [`Anubis`] module instance + /// + /// # Examples + /// + /// ```no_run + /// use subscan::modules::integrations::anubis; + /// + /// #[tokio::main] + /// async fn main() { + /// let anubis = anubis::Anubis::new(); + /// + /// // do something with anubis instance + /// } + /// ``` + #[allow(clippy::new_ret_no_self)] + pub fn new() -> GenericAPIIntegrationModule { + let requester: RequesterDispatcher = HTTPClient::default().into(); + let extractor: JSONExtractor = JSONExtractor::new(Box::new(Self::extract)); + + GenericAPIIntegrationModule { + name: ANUBIS_MODULE_NAME.into(), + url: Box::new(Self::get_query_url), + requester: requester.into(), + extractor: extractor.into(), + } + } + + /// Get Anubis query URL from given domain address + /// + /// # Examples + /// + /// ```no_run + /// use subscan::modules::integrations::anubis; + /// + /// #[tokio::main] + /// async fn main() { + /// let url = anubis::Anubis::get_query_url("foo.com".to_string()); + /// let expected = "https://jonlu.ca/anubis/subdomains/foo.com"; + /// + /// assert_eq!(url, expected); + /// } + /// ``` + pub fn get_query_url(domain: String) -> String { + format!("{ANUBIS_URL}{domain}") + } + + /// JSON parse method to extract subdomains + /// + /// # Examples + /// + /// ```no_run + /// use subscan::modules::integrations::anubis; + /// use std::collections::BTreeSet; + /// use serde_json::Value; + /// + /// #[tokio::main] + /// async fn main() { + /// let result = anubis::Anubis::extract(Value::default()); + /// + /// assert_eq!(result, BTreeSet::default()); + /// } + /// ``` + pub fn extract(content: Value) -> BTreeSet { + if let Some(subs) = content.as_array() { + let filter = |item: &Value| Some(item.as_str()?.to_string()); + + BTreeSet::from_iter(subs.iter().filter_map(filter)) + } else { + BTreeSet::default() + } + } +} diff --git a/src/modules/integrations/mod.rs b/src/modules/integrations/mod.rs new file mode 100644 index 00000000..2dc96c57 --- /dev/null +++ b/src/modules/integrations/mod.rs @@ -0,0 +1,4 @@ +/// Alienvault API integration module +pub mod alienvault; +/// Anubis API integration module +pub mod anubis; diff --git a/src/modules/mod.rs b/src/modules/mod.rs index 19d5f257..4d521937 100644 --- a/src/modules/mod.rs +++ b/src/modules/mod.rs @@ -2,3 +2,5 @@ pub mod engines; /// Generic module implementations pub mod generics; +/// Integration modules +pub mod integrations; diff --git a/src/types/core.rs b/src/types/core.rs index e588629f..4ff29da9 100644 --- a/src/types/core.rs +++ b/src/types/core.rs @@ -1,2 +1,7 @@ +use serde_json::Value; +use std::collections::BTreeSet; + /// Core subdomain data type pub type Subdomain = String; +/// Inner extract method type definition for [`JSONExtractor`](crate::extractors::json::JSONExtractor) +pub type InnerExtractMethod = Box BTreeSet + Sync + Send>; diff --git a/tests/modules/common.rs b/tests/modules/common.rs index 3c7a848a..760ababe 100644 --- a/tests/modules/common.rs +++ b/tests/modules/common.rs @@ -10,8 +10,8 @@ pub mod mocks { use reqwest::Url; use subscan::{ enums::RequesterDispatcher, extractors::regex::RegexExtractor, - modules::generics::searchengine::GenericSearchEngineModule, requesters::client::HTTPClient, - types::query::SearchQueryParam, + modules::generics::search_engine::GenericSearchEngineModule, + requesters::client::HTTPClient, types::query::SearchQueryParam, }; pub fn generic_search_engine(url: &str) -> GenericSearchEngineModule { From a2fb66250661c839c7d9b3930c54d1c3f877835d Mon Sep 17 00:00:00 2001 From: Erdogan Yoksul Date: Sat, 21 Sep 2024 12:41:01 +0300 Subject: [PATCH 02/38] test: add anubis and alienvault tests --- tests/modules/common.rs | 46 ++++++++++++++++--- .../modules/generics/api_integration_test.rs | 23 ++++++++++ ...chengine_test.rs => search_engine_test.rs} | 0 tests/modules/integrations/alienvault_test.rs | 24 ++++++++++ tests/modules/integrations/anubis_test.rs | 17 +++++++ tests/modules/integrations/mod.rs | 1 + tests/modules/main.rs | 1 + .../module/generics/api-integration.json | 18 ++++++++ .../stubs/module/integrations/alienvault.json | 22 +++++++++ tests/stubs/module/integrations/anubis.json | 15 ++++++ 10 files changed, 161 insertions(+), 6 deletions(-) create mode 100644 tests/modules/generics/api_integration_test.rs rename tests/modules/generics/{searchengine_test.rs => search_engine_test.rs} (100%) create mode 100644 tests/modules/integrations/alienvault_test.rs create mode 100644 tests/modules/integrations/anubis_test.rs create mode 100644 tests/modules/integrations/mod.rs create mode 100644 tests/stubs/module/generics/api-integration.json create mode 100644 tests/stubs/module/integrations/alienvault.json create mode 100644 tests/stubs/module/integrations/anubis.json diff --git a/tests/modules/common.rs b/tests/modules/common.rs index 760ababe..725e9daa 100644 --- a/tests/modules/common.rs +++ b/tests/modules/common.rs @@ -3,24 +3,58 @@ pub mod constants { pub const TEST_URL: &str = "http://foo.com"; pub const TEST_DOMAIN: &str = "foo.com"; pub const TEST_BAR_SUBDOMAIN: &str = "bar.foo.com"; + pub const TEST_BAZ_SUBDOMAIN: &str = "baz.foo.com"; } pub mod mocks { use super::constants::TEST_MODULE_NAME; use reqwest::Url; + use serde_json::Value; + use std::collections::BTreeSet; use subscan::{ - enums::RequesterDispatcher, extractors::regex::RegexExtractor, - modules::generics::search_engine::GenericSearchEngineModule, - requesters::client::HTTPClient, types::query::SearchQueryParam, + enums::RequesterDispatcher, + extractors::{json::JSONExtractor, regex::RegexExtractor}, + modules::generics::{ + api_integration::GenericAPIIntegrationModule, search_engine::GenericSearchEngineModule, + }, + requesters::client::HTTPClient, + types::query::SearchQueryParam, }; pub fn generic_search_engine(url: &str) -> GenericSearchEngineModule { + let requester = RequesterDispatcher::HTTPClient(HTTPClient::default()); + let extractor = RegexExtractor::default(); + let url = Url::parse(url); + GenericSearchEngineModule { name: TEST_MODULE_NAME.to_string(), - url: Url::parse(url).unwrap(), + url: url.unwrap(), param: SearchQueryParam::from("q"), - requester: RequesterDispatcher::HTTPClient(HTTPClient::default()).into(), - extractor: RegexExtractor::default().into(), + requester: requester.into(), + extractor: extractor.into(), + } + } + + pub fn generic_api_integration(url: &str) -> GenericAPIIntegrationModule { + let parse = |json: Value| { + if let Some(subs) = json["subdomains"].as_array() { + let filter = |item: &Value| Some(item.as_str()?.to_string()); + + BTreeSet::from_iter(subs.iter().filter_map(filter)) + } else { + BTreeSet::default() + } + }; + + let requester = RequesterDispatcher::HTTPClient(HTTPClient::default()); + let extractor = JSONExtractor::new(Box::new(parse)); + let url = url.to_string(); + + GenericAPIIntegrationModule { + name: TEST_MODULE_NAME.to_string(), + url: Box::new(move |_| url.clone()), + requester: requester.into(), + extractor: extractor.into(), } } } diff --git a/tests/modules/generics/api_integration_test.rs b/tests/modules/generics/api_integration_test.rs new file mode 100644 index 00000000..b17467d9 --- /dev/null +++ b/tests/modules/generics/api_integration_test.rs @@ -0,0 +1,23 @@ +use crate::common::{ + constants::{TEST_BAR_SUBDOMAIN, TEST_BAZ_SUBDOMAIN, TEST_DOMAIN, TEST_MODULE_NAME}, + mocks::generic_api_integration, +}; +use subscan::interfaces::module::SubscanModuleInterface; + +#[tokio::test] +#[stubr::mock("module/generics/api-integration.json")] +async fn generic_api_integration_run_test() { + let mut module = generic_api_integration(&stubr.path("/subdomains")); + + let result = module.run(TEST_DOMAIN.to_string()).await; + + assert_eq!(module.name().await, TEST_MODULE_NAME.to_string()); + assert_eq!( + result, + [ + TEST_BAR_SUBDOMAIN.to_string(), + TEST_BAZ_SUBDOMAIN.to_string() + ] + .into() + ); +} diff --git a/tests/modules/generics/searchengine_test.rs b/tests/modules/generics/search_engine_test.rs similarity index 100% rename from tests/modules/generics/searchengine_test.rs rename to tests/modules/generics/search_engine_test.rs diff --git a/tests/modules/integrations/alienvault_test.rs b/tests/modules/integrations/alienvault_test.rs new file mode 100644 index 00000000..ada5ed02 --- /dev/null +++ b/tests/modules/integrations/alienvault_test.rs @@ -0,0 +1,24 @@ +use crate::common::constants::{TEST_BAR_SUBDOMAIN, TEST_BAZ_SUBDOMAIN, TEST_DOMAIN}; +use reqwest::Url; +use subscan::{interfaces::module::SubscanModuleInterface, modules::integrations::alienvault}; + +#[tokio::test] +#[stubr::mock("module/integrations/alienvault.json")] +async fn alienvault_run_test() { + let mut alienvault = alienvault::AlienVault::new(); + let url = Url::parse(stubr.path("/alienvault").as_str()).unwrap(); + + alienvault.url = Box::new(move |_| url.to_string()); + + let result = alienvault.run(TEST_DOMAIN.to_string()).await; + + assert_eq!(alienvault.name().await, "AlienVault"); + assert_eq!( + result, + [ + TEST_BAR_SUBDOMAIN.to_string(), + TEST_BAZ_SUBDOMAIN.to_string(), + ] + .into() + ); +} diff --git a/tests/modules/integrations/anubis_test.rs b/tests/modules/integrations/anubis_test.rs new file mode 100644 index 00000000..3547ba13 --- /dev/null +++ b/tests/modules/integrations/anubis_test.rs @@ -0,0 +1,17 @@ +use crate::common::constants::{TEST_BAR_SUBDOMAIN, TEST_DOMAIN}; +use reqwest::Url; +use subscan::{interfaces::module::SubscanModuleInterface, modules::integrations::anubis}; + +#[tokio::test] +#[stubr::mock("module/integrations/anubis.json")] +async fn anubis_run_test() { + let mut anubis = anubis::Anubis::new(); + let url = Url::parse(stubr.path("/anubis").as_str()).unwrap(); + + anubis.url = Box::new(move |_| url.to_string()); + + let result = anubis.run(TEST_DOMAIN.to_string()).await; + + assert_eq!(anubis.name().await, "Anubis"); + assert_eq!(result, [TEST_BAR_SUBDOMAIN.to_string()].into()); +} diff --git a/tests/modules/integrations/mod.rs b/tests/modules/integrations/mod.rs new file mode 100644 index 00000000..d39cf824 --- /dev/null +++ b/tests/modules/integrations/mod.rs @@ -0,0 +1 @@ +automod::dir!("tests/modules/integrations"); diff --git a/tests/modules/main.rs b/tests/modules/main.rs index 433450fd..f46d4803 100644 --- a/tests/modules/main.rs +++ b/tests/modules/main.rs @@ -1,3 +1,4 @@ mod common; mod engines; mod generics; +mod integrations; diff --git a/tests/stubs/module/generics/api-integration.json b/tests/stubs/module/generics/api-integration.json new file mode 100644 index 00000000..0d32eb98 --- /dev/null +++ b/tests/stubs/module/generics/api-integration.json @@ -0,0 +1,18 @@ +{ + "request": { + "method": "GET", + "urlPath": "/subdomains" + }, + "response": { + "headers": { + "content-type": "application/json" + }, + "jsonBody": { + "subdomains": [ + "bar.foo.com", + "baz.foo.com" + ] + }, + "status": 200 + } +} diff --git a/tests/stubs/module/integrations/alienvault.json b/tests/stubs/module/integrations/alienvault.json new file mode 100644 index 00000000..54f7d5e3 --- /dev/null +++ b/tests/stubs/module/integrations/alienvault.json @@ -0,0 +1,22 @@ +{ + "request": { + "method": "GET", + "urlPath": "/alienvault" + }, + "response": { + "headers": { + "content-type": "application/json" + }, + "jsonBody": { + "passive_dns": [ + { + "hostname": "bar.foo.com" + }, + { + "hostname": "baz.foo.com" + } + ] + }, + "status": 200 + } +} diff --git a/tests/stubs/module/integrations/anubis.json b/tests/stubs/module/integrations/anubis.json new file mode 100644 index 00000000..26c568aa --- /dev/null +++ b/tests/stubs/module/integrations/anubis.json @@ -0,0 +1,15 @@ +{ + "request": { + "method": "GET", + "urlPath": "/anubis" + }, + "response": { + "headers": { + "content-type": "application/json" + }, + "jsonBody": [ + "bar.foo.com" + ], + "status": 200 + } +} From 153739bbeb44a131c732b501664a971226465efa Mon Sep 17 00:00:00 2001 From: Erdogan Yoksul Date: Sat, 21 Sep 2024 13:07:44 +0300 Subject: [PATCH 03/38] test: extend anubis and alienvault tests --- src/modules/engines/bing.rs | 8 +++--- src/modules/engines/duckduckgo.rs | 8 +++--- src/modules/engines/google.rs | 8 +++--- src/modules/engines/yahoo.rs | 8 +++--- src/modules/integrations/alienvault.rs | 12 ++++----- src/modules/integrations/anubis.rs | 16 +++++------ tests/modules/engines/bing_test.rs | 7 +++-- tests/modules/engines/duckduckgo_test.rs | 8 +++--- tests/modules/engines/google_test.rs | 7 +++-- tests/modules/engines/yahoo_test.rs | 7 +++-- tests/modules/integrations/alienvault_test.rs | 27 +++++++++++++++++-- tests/modules/integrations/anubis_test.rs | 27 +++++++++++++++++-- 12 files changed, 100 insertions(+), 43 deletions(-) diff --git a/src/modules/engines/bing.rs b/src/modules/engines/bing.rs index cd5cc976..18a3802b 100644 --- a/src/modules/engines/bing.rs +++ b/src/modules/engines/bing.rs @@ -4,10 +4,10 @@ use crate::{ }; use reqwest::Url; -const BING_MODULE_NAME: &str = "Bing"; -const BING_SEARCH_URL: &str = "https://www.bing.com/search"; -const BING_SEARCH_PARAM: &str = "q"; -const BING_CITE_TAG: &str = "cite"; +pub const BING_MODULE_NAME: &str = "Bing"; +pub const BING_SEARCH_URL: &str = "https://www.bing.com/search"; +pub const BING_SEARCH_PARAM: &str = "q"; +pub const BING_CITE_TAG: &str = "cite"; /// Bing search engine enumerator /// diff --git a/src/modules/engines/duckduckgo.rs b/src/modules/engines/duckduckgo.rs index f289c0fd..9794b3bc 100644 --- a/src/modules/engines/duckduckgo.rs +++ b/src/modules/engines/duckduckgo.rs @@ -4,10 +4,10 @@ use crate::{ }; use reqwest::Url; -const DUCKDUCKGO_MODULE_NAME: &str = "DuckDuckGo"; -const DUCKDUCKGO_SEARCH_URL: &str = "https://duckduckgo.com"; -const DUCKDUCKGO_SEARCH_PARAM: &str = "q"; -const DUCKDUCKGO_CITE_TAG: &str = "article > div > div > a > span:first-child"; +pub const DUCKDUCKGO_MODULE_NAME: &str = "DuckDuckGo"; +pub const DUCKDUCKGO_SEARCH_URL: &str = "https://duckduckgo.com"; +pub const DUCKDUCKGO_SEARCH_PARAM: &str = "q"; +pub const DUCKDUCKGO_CITE_TAG: &str = "article > div > div > a > span:first-child"; /// DuckDuckGo search engine enumerator /// diff --git a/src/modules/engines/google.rs b/src/modules/engines/google.rs index f3fc8a95..6d3424ff 100644 --- a/src/modules/engines/google.rs +++ b/src/modules/engines/google.rs @@ -4,10 +4,10 @@ use crate::{ }; use reqwest::Url; -const GOOGLE_MODULE_NAME: &str = "Google"; -const GOOGLE_SEARCH_URL: &str = "https://www.google.com/search"; -const GOOGLE_SEARCH_PARAM: &str = "q"; -const GOOGLE_CITE_TAG: &str = "cite"; +pub const GOOGLE_MODULE_NAME: &str = "Google"; +pub const GOOGLE_SEARCH_URL: &str = "https://www.google.com/search"; +pub const GOOGLE_SEARCH_PARAM: &str = "q"; +pub const GOOGLE_CITE_TAG: &str = "cite"; /// Google search engine enumerator /// diff --git a/src/modules/engines/yahoo.rs b/src/modules/engines/yahoo.rs index 768b9b0a..d2b5dd80 100644 --- a/src/modules/engines/yahoo.rs +++ b/src/modules/engines/yahoo.rs @@ -4,10 +4,10 @@ use crate::{ }; use reqwest::Url; -const YAHOO_MODULE_NAME: &str = "Yahoo"; -const YAHOO_SEARCH_URL: &str = "https://search.yahoo.com/search"; -const YAHOO_SEARCH_PARAM: &str = "p"; -const YAHOO_CITE_TAG: &str = "ol > li > div > div > h3 > a > span"; +pub const YAHOO_MODULE_NAME: &str = "Yahoo"; +pub const YAHOO_SEARCH_URL: &str = "https://search.yahoo.com/search"; +pub const YAHOO_SEARCH_PARAM: &str = "p"; +pub const YAHOO_CITE_TAG: &str = "ol > li > div > div > h3 > a > span"; /// Yahoo search engine enumerator /// diff --git a/src/modules/integrations/alienvault.rs b/src/modules/integrations/alienvault.rs index be2652ee..5c94289d 100644 --- a/src/modules/integrations/alienvault.rs +++ b/src/modules/integrations/alienvault.rs @@ -13,8 +13,8 @@ use serde_json::Value; /// here are the configurations pub struct AlienVault {} -const ALIENVAULT_MODULE_NAME: &str = "AlienVault"; -const ALIENVAULT_URL: &str = "https://otx.alienvault.com/api/v1/indicators/domain/"; +pub const ALIENVAULT_MODULE_NAME: &str = "AlienVault"; +pub const ALIENVAULT_URL: &str = "https://otx.alienvault.com/api/v1/indicators/domain/"; impl AlienVault { /// Create a new [`AlienVault`] module instance @@ -49,14 +49,14 @@ impl AlienVault { /// # Examples /// /// ``` - /// use subscan::modules::integrations::alienvault; + /// use subscan::modules::integrations::alienvault::{self, ALIENVAULT_URL}; /// /// #[tokio::main] /// async fn main() { - /// let url = alienvault::AlienVault::get_query_url("foo.com".to_string()); - /// let expected = "https://otx.alienvault.com/api/v1/indicators/domain/foo.com/passive_dns"; + /// let domain = "foo.com".to_string(); + /// let url = alienvault::AlienVault::get_query_url(domain.clone()); /// - /// assert_eq!(url, expected); + /// assert_eq!(url, format!("{ALIENVAULT_URL}{domain}/passive_dns")); /// } /// ``` pub fn get_query_url(domain: String) -> String { diff --git a/src/modules/integrations/anubis.rs b/src/modules/integrations/anubis.rs index 52f67306..729da6a5 100644 --- a/src/modules/integrations/anubis.rs +++ b/src/modules/integrations/anubis.rs @@ -12,8 +12,8 @@ use std::collections::BTreeSet; /// here are the configurations pub struct Anubis {} -const ANUBIS_MODULE_NAME: &str = "Anubis"; -const ANUBIS_URL: &str = "https://jonlu.ca/anubis/subdomains/"; +pub const ANUBIS_MODULE_NAME: &str = "Anubis"; +pub const ANUBIS_URL: &str = "https://jonlu.ca/anubis/subdomains/"; impl Anubis { /// Create a new [`Anubis`] module instance @@ -47,15 +47,15 @@ impl Anubis { /// /// # Examples /// - /// ```no_run - /// use subscan::modules::integrations::anubis; + /// ``` + /// use subscan::modules::integrations::anubis::{self, ANUBIS_URL}; /// /// #[tokio::main] /// async fn main() { - /// let url = anubis::Anubis::get_query_url("foo.com".to_string()); - /// let expected = "https://jonlu.ca/anubis/subdomains/foo.com"; + /// let domain = "foo.com".to_string(); + /// let url = anubis::Anubis::get_query_url(domain.clone()); /// - /// assert_eq!(url, expected); + /// assert_eq!(url, format!("{ANUBIS_URL}{domain}")); /// } /// ``` pub fn get_query_url(domain: String) -> String { @@ -66,7 +66,7 @@ impl Anubis { /// /// # Examples /// - /// ```no_run + /// ``` /// use subscan::modules::integrations::anubis; /// use std::collections::BTreeSet; /// use serde_json::Value; diff --git a/tests/modules/engines/bing_test.rs b/tests/modules/engines/bing_test.rs index 255e2907..c8a15acf 100644 --- a/tests/modules/engines/bing_test.rs +++ b/tests/modules/engines/bing_test.rs @@ -1,6 +1,9 @@ use crate::common::constants::{TEST_BAR_SUBDOMAIN, TEST_DOMAIN}; use reqwest::Url; -use subscan::{interfaces::module::SubscanModuleInterface, modules::engines::bing}; +use subscan::{ + interfaces::module::SubscanModuleInterface, + modules::engines::bing::{self, BING_MODULE_NAME}, +}; #[tokio::test] #[stubr::mock("module/engines/bing.json")] @@ -11,6 +14,6 @@ async fn bing_run_test() { let result = bing.run(TEST_DOMAIN.to_string()).await; - assert_eq!(bing.name().await, "Bing"); + assert_eq!(bing.name().await, BING_MODULE_NAME); assert_eq!(result, [TEST_BAR_SUBDOMAIN.to_string()].into()); } diff --git a/tests/modules/engines/duckduckgo_test.rs b/tests/modules/engines/duckduckgo_test.rs index 4697349d..b7d7fef8 100644 --- a/tests/modules/engines/duckduckgo_test.rs +++ b/tests/modules/engines/duckduckgo_test.rs @@ -1,8 +1,10 @@ use crate::common::constants::{TEST_BAR_SUBDOMAIN, TEST_DOMAIN}; use reqwest::Url; use subscan::{ - enums::RequesterDispatcher, interfaces::module::SubscanModuleInterface, - modules::engines::duckduckgo, requesters::client::HTTPClient, + enums::RequesterDispatcher, + interfaces::module::SubscanModuleInterface, + modules::engines::duckduckgo::{self, DUCKDUCKGO_MODULE_NAME}, + requesters::client::HTTPClient, }; #[tokio::test] @@ -16,6 +18,6 @@ async fn duckduckgo_run_test() { let result = duckduckgo.run(TEST_DOMAIN.to_string()).await; - assert_eq!(duckduckgo.name().await, "DuckDuckGo"); + assert_eq!(duckduckgo.name().await, DUCKDUCKGO_MODULE_NAME); assert_eq!(result, [TEST_BAR_SUBDOMAIN.to_string()].into()); } diff --git a/tests/modules/engines/google_test.rs b/tests/modules/engines/google_test.rs index 5528e9e3..92469045 100644 --- a/tests/modules/engines/google_test.rs +++ b/tests/modules/engines/google_test.rs @@ -1,6 +1,9 @@ use crate::common::constants::{TEST_BAR_SUBDOMAIN, TEST_DOMAIN}; use reqwest::Url; -use subscan::{interfaces::module::SubscanModuleInterface, modules::engines::google}; +use subscan::{ + interfaces::module::SubscanModuleInterface, + modules::engines::google::{self, GOOGLE_MODULE_NAME}, +}; #[tokio::test] #[stubr::mock("module/engines/google.json")] @@ -11,6 +14,6 @@ async fn google_run_test() { let result = google.run(TEST_DOMAIN.to_string()).await; - assert_eq!(google.name().await, "Google"); + assert_eq!(google.name().await, GOOGLE_MODULE_NAME); assert_eq!(result, [TEST_BAR_SUBDOMAIN.to_string()].into()); } diff --git a/tests/modules/engines/yahoo_test.rs b/tests/modules/engines/yahoo_test.rs index 1da61818..998788e1 100644 --- a/tests/modules/engines/yahoo_test.rs +++ b/tests/modules/engines/yahoo_test.rs @@ -1,6 +1,9 @@ use crate::common::constants::{TEST_BAR_SUBDOMAIN, TEST_DOMAIN}; use reqwest::Url; -use subscan::{interfaces::module::SubscanModuleInterface, modules::engines::yahoo}; +use subscan::{ + interfaces::module::SubscanModuleInterface, + modules::engines::yahoo::{self, YAHOO_MODULE_NAME}, +}; #[tokio::test] #[stubr::mock("module/engines/yahoo.json")] @@ -11,6 +14,6 @@ async fn yahoo_run_test() { let result = yahoo.run(TEST_DOMAIN.to_string()).await; - assert_eq!(yahoo.name().await, "Yahoo"); + assert_eq!(yahoo.name().await, YAHOO_MODULE_NAME); assert_eq!(result, [TEST_BAR_SUBDOMAIN.to_string()].into()); } diff --git a/tests/modules/integrations/alienvault_test.rs b/tests/modules/integrations/alienvault_test.rs index ada5ed02..0a2be78c 100644 --- a/tests/modules/integrations/alienvault_test.rs +++ b/tests/modules/integrations/alienvault_test.rs @@ -1,6 +1,10 @@ use crate::common::constants::{TEST_BAR_SUBDOMAIN, TEST_BAZ_SUBDOMAIN, TEST_DOMAIN}; use reqwest::Url; -use subscan::{interfaces::module::SubscanModuleInterface, modules::integrations::alienvault}; +use serde_json::{self, Value}; +use subscan::{ + interfaces::module::SubscanModuleInterface, + modules::integrations::alienvault::{self, ALIENVAULT_MODULE_NAME, ALIENVAULT_URL}, +}; #[tokio::test] #[stubr::mock("module/integrations/alienvault.json")] @@ -12,7 +16,7 @@ async fn alienvault_run_test() { let result = alienvault.run(TEST_DOMAIN.to_string()).await; - assert_eq!(alienvault.name().await, "AlienVault"); + assert_eq!(alienvault.name().await, ALIENVAULT_MODULE_NAME); assert_eq!( result, [ @@ -22,3 +26,22 @@ async fn alienvault_run_test() { .into() ); } + +#[tokio::test] +async fn get_query_url_test() { + let url = alienvault::AlienVault::get_query_url(TEST_DOMAIN.to_string()); + let expected = format!("{ALIENVAULT_URL}{TEST_DOMAIN}/passive_dns"); + + assert_eq!(url, expected); +} + +#[tokio::test] +async fn extract_test() { + let json = "{\"passive_dns\": [{\"hostname\": \"bar.foo.com\"}]}"; + + let extracted = alienvault::AlienVault::extract(serde_json::from_str(json).unwrap()); + let not_extracted = alienvault::AlienVault::extract(Value::default()); + + assert_eq!(extracted, [TEST_BAR_SUBDOMAIN.to_string()].into()); + assert_eq!(not_extracted, [].into()); +} diff --git a/tests/modules/integrations/anubis_test.rs b/tests/modules/integrations/anubis_test.rs index 3547ba13..b8bb1df3 100644 --- a/tests/modules/integrations/anubis_test.rs +++ b/tests/modules/integrations/anubis_test.rs @@ -1,6 +1,10 @@ use crate::common::constants::{TEST_BAR_SUBDOMAIN, TEST_DOMAIN}; use reqwest::Url; -use subscan::{interfaces::module::SubscanModuleInterface, modules::integrations::anubis}; +use serde_json::{self, Value}; +use subscan::{ + interfaces::module::SubscanModuleInterface, + modules::integrations::anubis::{self, ANUBIS_MODULE_NAME, ANUBIS_URL}, +}; #[tokio::test] #[stubr::mock("module/integrations/anubis.json")] @@ -12,6 +16,25 @@ async fn anubis_run_test() { let result = anubis.run(TEST_DOMAIN.to_string()).await; - assert_eq!(anubis.name().await, "Anubis"); + assert_eq!(anubis.name().await, ANUBIS_MODULE_NAME); assert_eq!(result, [TEST_BAR_SUBDOMAIN.to_string()].into()); } + +#[tokio::test] +async fn get_query_url_test() { + let url = anubis::Anubis::get_query_url(TEST_DOMAIN.to_string()); + let expected = format!("{ANUBIS_URL}{TEST_DOMAIN}"); + + assert_eq!(url, expected); +} + +#[tokio::test] +async fn extract_test() { + let json = "[\"bar.foo.com\"]"; + + let extracted = anubis::Anubis::extract(serde_json::from_str(json).unwrap()); + let not_extracted = anubis::Anubis::extract(Value::default()); + + assert_eq!(extracted, [TEST_BAR_SUBDOMAIN.to_string()].into()); + assert_eq!(not_extracted, [].into()); +} From 1a2de4ce39318cb5f91271e15e8622b5a406a6d9 Mon Sep 17 00:00:00 2001 From: Erdogan Yoksul Date: Sat, 21 Sep 2024 18:04:37 +0300 Subject: [PATCH 04/38] feat: basic API key system --- .env.template | 1 + .gitignore | 3 + Cargo.toml | 1 + src/bin/subscan.rs | 4 +- src/cache.rs | 3 +- src/config.rs | 1 + src/enums.rs | 7 ++ src/interfaces/module.rs | 17 +++- src/interfaces/requester.rs | 11 ++- src/lib.rs | 2 + src/modules/generics/api_integration.rs | 36 +++++-- src/modules/integrations/alienvault.rs | 13 ++- src/modules/integrations/anubis.rs | 13 ++- src/modules/integrations/bevigil.rs | 93 +++++++++++++++++++ src/modules/integrations/mod.rs | 2 + src/requesters/chrome.rs | 10 +- src/requesters/client.rs | 8 +- tests/cache_test.rs | 4 +- tests/modules/common.rs | 3 +- tests/modules/integrations/alienvault_test.rs | 2 +- tests/modules/integrations/anubis_test.rs | 2 +- tests/modules/integrations/bevigil_test.rs | 47 ++++++++++ tests/stubs/module/integrations/bevigil.json | 18 ++++ 23 files changed, 262 insertions(+), 39 deletions(-) create mode 100644 .env.template create mode 100644 src/config.rs create mode 100644 src/modules/integrations/bevigil.rs create mode 100644 tests/modules/integrations/bevigil_test.rs create mode 100644 tests/stubs/module/integrations/bevigil.json diff --git a/.env.template b/.env.template new file mode 100644 index 00000000..d058629c --- /dev/null +++ b/.env.template @@ -0,0 +1 @@ +SUBSCAN_BEVIGIL_APIKEY=foo diff --git a/.gitignore b/.gitignore index 196e176d..f7df471c 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,6 @@ Cargo.lock # Added by cargo /target + +# Ignore local .env file +.env diff --git a/Cargo.toml b/Cargo.toml index d8e43695..dd6a4152 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ serde_json = "1.0.128" async-trait = "0.1.82" lazy_static = "1.5.0" enum_dispatch = "0.3.13" +dotenvy = "0.15.7" [dev-dependencies] automod = "1.0.14" diff --git a/src/bin/subscan.rs b/src/bin/subscan.rs index 54a87ef4..eaedb765 100644 --- a/src/bin/subscan.rs +++ b/src/bin/subscan.rs @@ -8,6 +8,8 @@ use subscan::{ #[tokio::main] async fn main() { + dotenvy::dotenv().ok(); + let cli = Cli::parse(); let config = RequesterConfig::from(&cli); @@ -23,7 +25,7 @@ async fn main() { requester ); - if module.name().await != "Anubis" { + if module.name().await != "Bevigil" { continue; } diff --git a/src/cache.rs b/src/cache.rs index cc10d2bc..7246d3e5 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -1,6 +1,6 @@ use crate::{ modules::engines::{bing, duckduckgo, google, yahoo}, - modules::integrations::{alienvault, anubis}, + modules::integrations::{alienvault, anubis, bevigil}, SubscanModule, }; use lazy_static::lazy_static; @@ -17,6 +17,7 @@ lazy_static! { SubscanModule::new(duckduckgo::DuckDuckGo::new()), SubscanModule::new(alienvault::AlienVault::new()), SubscanModule::new(anubis::Anubis::new()), + SubscanModule::new(bevigil::Bevigil::new()), ]; } diff --git a/src/config.rs b/src/config.rs new file mode 100644 index 00000000..a6e417bc --- /dev/null +++ b/src/config.rs @@ -0,0 +1 @@ +pub const SUBSCAN_ENV_NAMESPACE: &str = "SUBSCAN"; diff --git a/src/enums.rs b/src/enums.rs index eff98d1d..fc0eab47 100644 --- a/src/enums.rs +++ b/src/enums.rs @@ -32,3 +32,10 @@ pub enum RequesterDispatcher { /// Just send HTTP requests via [`reqwest`] HTTPClient(HTTPClient), } + +/// Authentication methods for API calls +pub enum AuthMethod { + APIKeyInHeader(String), + APIKeyInURL, + NoAuth, +} diff --git a/src/interfaces/module.rs b/src/interfaces/module.rs index 9a24eb3e..8e0f05fb 100644 --- a/src/interfaces/module.rs +++ b/src/interfaces/module.rs @@ -1,4 +1,7 @@ -use crate::enums::{RequesterDispatcher, SubdomainExtractorDispatcher}; +use crate::{ + config::SUBSCAN_ENV_NAMESPACE, + enums::{RequesterDispatcher, SubdomainExtractorDispatcher}, +}; use async_trait::async_trait; use enum_dispatch::enum_dispatch; use std::collections::BTreeSet; @@ -78,4 +81,16 @@ pub trait SubscanModuleInterface: Sync + Send { /// run this `run` method will be called, so this method /// should do everything async fn run(&mut self, domain: String) -> BTreeSet; + /// Fetches API key from system environment variables + /// if available. Module environment variables uses [`SUBSCAN_ENV_NAMESPACE`] + /// namespace with `SUBSCAN__APIKEY` format + async fn fetch_apikey(&self) -> String { + let key = format!( + "{}_{}_APIKEY", + SUBSCAN_ENV_NAMESPACE, + self.name().await.to_uppercase() + ); + + std::env::var(key).unwrap_or_default() + } } diff --git a/src/interfaces/requester.rs b/src/interfaces/requester.rs index c5067bce..03f252c3 100644 --- a/src/interfaces/requester.rs +++ b/src/interfaces/requester.rs @@ -30,8 +30,8 @@ use reqwest::Url; /// /// #[async_trait(?Send)] /// impl RequesterInterface for CustomRequester { -/// async fn config(&self) -> RequesterConfig { -/// RequesterConfig::default() +/// async fn config(&mut self) -> &mut RequesterConfig { +/// &mut self.config /// } /// /// async fn configure(&mut self, config: RequesterConfig) { @@ -46,11 +46,12 @@ use reqwest::Url; /// #[tokio::main] /// async fn main() { /// let url = Url::parse("https://foo.com").unwrap(); -/// let requester = CustomRequester { +/// +/// let mut requester = CustomRequester { /// config: RequesterConfig::default(), /// }; /// -/// let config = requester.config().await; +/// let config = requester.config().await.clone(); /// /// assert_eq!(requester.get_content(url).await.unwrap(), "foo"); /// assert_eq!(config.proxy, None); @@ -62,7 +63,7 @@ use reqwest::Url; #[enum_dispatch] pub trait RequesterInterface: Sync + Send { /// Returns requester configurations as a [`RequesterConfig`] object - async fn config(&self) -> RequesterConfig; + async fn config(&mut self) -> &mut RequesterConfig; /// Configure current requester object by using new [`RequesterConfig`] object async fn configure(&mut self, config: RequesterConfig); /// Get HTML source of page from given [`reqwest::Url`] object diff --git a/src/lib.rs b/src/lib.rs index 257eecb2..00e7c17e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,6 +2,8 @@ pub mod cache; /// Includes CLI components pub mod cli; +/// Project configuration utils +pub mod config; /// Enumerations and project type definitions pub mod enums; /// Data extractors like diff --git a/src/modules/generics/api_integration.rs b/src/modules/generics/api_integration.rs index 93f50429..3a53dce2 100644 --- a/src/modules/generics/api_integration.rs +++ b/src/modules/generics/api_integration.rs @@ -1,10 +1,16 @@ -use crate::enums::{RequesterDispatcher, SubdomainExtractorDispatcher}; -use crate::interfaces::extractor::SubdomainExtractorInterface; -use crate::interfaces::module::SubscanModuleInterface; -use crate::interfaces::requester::RequesterInterface; +use crate::{ + enums::{AuthMethod, RequesterDispatcher, SubdomainExtractorDispatcher}, + interfaces::{ + extractor::SubdomainExtractorInterface, module::SubscanModuleInterface, + requester::RequesterInterface, + }, +}; use async_trait::async_trait; -use reqwest::Url; -use std::collections::BTreeSet; +use reqwest::{ + header::{HeaderName, HeaderValue}, + Url, +}; +use std::{collections::BTreeSet, str::FromStr}; use tokio::sync::Mutex; /// Generic API integration module @@ -22,6 +28,9 @@ pub struct GenericAPIIntegrationModule { /// Simple function field that gets query URL /// by given domain address pub url: Box String + Sync + Send>, + /// Set authentication method, see [`AuthMethod`] enum + /// for details + pub auth: AuthMethod, /// Requester object instance for HTTP requests pub requester: Mutex, /// Any extractor object to extract subdomain from content @@ -43,9 +52,22 @@ impl SubscanModuleInterface for GenericAPIIntegrationModule { } async fn run(&mut self, domain: String) -> BTreeSet { - let requester = self.requester.lock().await; + let mut requester = self.requester.lock().await; let url = Url::parse(&(self.url)(domain.clone())).unwrap(); + match &self.auth { + AuthMethod::APIKeyInHeader(key) => { + let apikey = self.fetch_apikey().await; + + let name = HeaderName::from_str(key.as_str()).unwrap(); + let value = HeaderValue::from_str(apikey.as_str()).unwrap(); + + requester.config().await.add_header(name, value); + } + AuthMethod::APIKeyInURL => {} + AuthMethod::NoAuth => {} + } + let content = requester.get_content(url).await.unwrap_or_default(); self.extractor.extract(content, domain).await diff --git a/src/modules/integrations/alienvault.rs b/src/modules/integrations/alienvault.rs index 5c94289d..53ddd20e 100644 --- a/src/modules/integrations/alienvault.rs +++ b/src/modules/integrations/alienvault.rs @@ -1,9 +1,11 @@ use std::collections::BTreeSet; use crate::{ - enums::RequesterDispatcher, extractors::json::JSONExtractor, + enums::{AuthMethod, RequesterDispatcher}, + extractors::json::JSONExtractor, modules::generics::api_integration::GenericAPIIntegrationModule, - requesters::client::HTTPClient, types::core::Subdomain, + requesters::client::HTTPClient, + types::core::Subdomain, }; use serde_json::Value; @@ -14,7 +16,7 @@ use serde_json::Value; pub struct AlienVault {} pub const ALIENVAULT_MODULE_NAME: &str = "AlienVault"; -pub const ALIENVAULT_URL: &str = "https://otx.alienvault.com/api/v1/indicators/domain/"; +pub const ALIENVAULT_URL: &str = "https://otx.alienvault.com/api/v1/indicators/domain"; impl AlienVault { /// Create a new [`AlienVault`] module instance @@ -39,6 +41,7 @@ impl AlienVault { GenericAPIIntegrationModule { name: ALIENVAULT_MODULE_NAME.into(), url: Box::new(Self::get_query_url), + auth: AuthMethod::NoAuth, requester: requester.into(), extractor: extractor.into(), } @@ -56,11 +59,11 @@ impl AlienVault { /// let domain = "foo.com".to_string(); /// let url = alienvault::AlienVault::get_query_url(domain.clone()); /// - /// assert_eq!(url, format!("{ALIENVAULT_URL}{domain}/passive_dns")); + /// assert_eq!(url, format!("{ALIENVAULT_URL}/{domain}/passive_dns")); /// } /// ``` pub fn get_query_url(domain: String) -> String { - format!("{ALIENVAULT_URL}{domain}/passive_dns") + format!("{ALIENVAULT_URL}/{domain}/passive_dns") } /// JSON parse method to extract subdomains diff --git a/src/modules/integrations/anubis.rs b/src/modules/integrations/anubis.rs index 729da6a5..673c3094 100644 --- a/src/modules/integrations/anubis.rs +++ b/src/modules/integrations/anubis.rs @@ -1,7 +1,9 @@ use crate::{ - enums::RequesterDispatcher, extractors::json::JSONExtractor, + enums::{AuthMethod, RequesterDispatcher}, + extractors::json::JSONExtractor, modules::generics::api_integration::GenericAPIIntegrationModule, - requesters::client::HTTPClient, types::core::Subdomain, + requesters::client::HTTPClient, + types::core::Subdomain, }; use serde_json::Value; use std::collections::BTreeSet; @@ -13,7 +15,7 @@ use std::collections::BTreeSet; pub struct Anubis {} pub const ANUBIS_MODULE_NAME: &str = "Anubis"; -pub const ANUBIS_URL: &str = "https://jonlu.ca/anubis/subdomains/"; +pub const ANUBIS_URL: &str = "https://jonlu.ca/anubis/subdomains"; impl Anubis { /// Create a new [`Anubis`] module instance @@ -38,6 +40,7 @@ impl Anubis { GenericAPIIntegrationModule { name: ANUBIS_MODULE_NAME.into(), url: Box::new(Self::get_query_url), + auth: AuthMethod::NoAuth, requester: requester.into(), extractor: extractor.into(), } @@ -55,11 +58,11 @@ impl Anubis { /// let domain = "foo.com".to_string(); /// let url = anubis::Anubis::get_query_url(domain.clone()); /// - /// assert_eq!(url, format!("{ANUBIS_URL}{domain}")); + /// assert_eq!(url, format!("{ANUBIS_URL}/{domain}")); /// } /// ``` pub fn get_query_url(domain: String) -> String { - format!("{ANUBIS_URL}{domain}") + format!("{ANUBIS_URL}/{domain}") } /// JSON parse method to extract subdomains diff --git a/src/modules/integrations/bevigil.rs b/src/modules/integrations/bevigil.rs new file mode 100644 index 00000000..19b63925 --- /dev/null +++ b/src/modules/integrations/bevigil.rs @@ -0,0 +1,93 @@ +use crate::{ + enums::{AuthMethod, RequesterDispatcher}, + extractors::json::JSONExtractor, + modules::generics::api_integration::GenericAPIIntegrationModule, + requesters::client::HTTPClient, + types::core::Subdomain, +}; +use serde_json::Value; +use std::collections::BTreeSet; + +/// Bevigil API integration module +/// +/// It uses [`GenericAPIIntegrationModule`] its own inner +/// here are the configurations +pub struct Bevigil {} + +pub const BEVIGIL_MODULE_NAME: &str = "Bevigil"; +pub const BEVIGIL_URL: &str = "https://osint.bevigil.com/api"; + +impl Bevigil { + /// Create a new [`Bevigil`] module instance + /// + /// # Examples + /// + /// ```no_run + /// use subscan::modules::integrations::bevigil; + /// + /// #[tokio::main] + /// async fn main() { + /// let bevigil = bevigil::Bevigil::new(); + /// + /// // do something with bevigil instance + /// } + /// ``` + #[allow(clippy::new_ret_no_self)] + pub fn new() -> GenericAPIIntegrationModule { + let requester: RequesterDispatcher = HTTPClient::default().into(); + let extractor: JSONExtractor = JSONExtractor::new(Box::new(Self::extract)); + + GenericAPIIntegrationModule { + name: BEVIGIL_MODULE_NAME.into(), + url: Box::new(Self::get_query_url), + auth: AuthMethod::APIKeyInHeader("X-Access-Token".into()), + requester: requester.into(), + extractor: extractor.into(), + } + } + + /// Get Bevigil query URL from given domain address + /// + /// # Examples + /// + /// ``` + /// use subscan::modules::integrations::bevigil::{self, BEVIGIL_URL}; + /// + /// #[tokio::main] + /// async fn main() { + /// let domain = "foo.com".to_string(); + /// let url = bevigil::Bevigil::get_query_url(domain.clone()); + /// + /// assert_eq!(url, format!("{BEVIGIL_URL}/{domain}/subdomains")); + /// } + /// ``` + pub fn get_query_url(domain: String) -> String { + format!("{BEVIGIL_URL}/{domain}/subdomains") + } + + /// JSON parse method to extract subdomains + /// + /// # Examples + /// + /// ``` + /// use subscan::modules::integrations::bevigil; + /// use std::collections::BTreeSet; + /// use serde_json::Value; + /// + /// #[tokio::main] + /// async fn main() { + /// let result = bevigil::Bevigil::extract(Value::default()); + /// + /// assert_eq!(result, BTreeSet::default()); + /// } + /// ``` + pub fn extract(content: Value) -> BTreeSet { + if let Some(subs) = content["subdomains"].as_array() { + let filter = |item: &Value| Some(item.as_str()?.to_string()); + + BTreeSet::from_iter(subs.iter().filter_map(filter)) + } else { + BTreeSet::default() + } + } +} diff --git a/src/modules/integrations/mod.rs b/src/modules/integrations/mod.rs index 2dc96c57..579204d9 100644 --- a/src/modules/integrations/mod.rs +++ b/src/modules/integrations/mod.rs @@ -2,3 +2,5 @@ pub mod alienvault; /// Anubis API integration module pub mod anubis; +/// Bevigil API integration module, API key required +pub mod bevigil; diff --git a/src/requesters/chrome.rs b/src/requesters/chrome.rs index 87eb0db3..76ecbefd 100644 --- a/src/requesters/chrome.rs +++ b/src/requesters/chrome.rs @@ -107,13 +107,13 @@ impl RequesterInterface for ChromeBrowser { /// /// #[tokio::main] /// async fn main() { - /// let browser = ChromeBrowser::default(); + /// let mut browser = ChromeBrowser::default(); /// /// assert_eq!(browser.config().await.timeout, Duration::from_secs(10)); /// } /// ``` - async fn config(&self) -> RequesterConfig { - self.config.clone() + async fn config(&mut self) -> &mut RequesterConfig { + &mut self.config } /// Configure requester with a new config object @@ -150,7 +150,7 @@ impl RequesterInterface for ChromeBrowser { } self.browser = Browser::new(options).unwrap(); - self.config = config + self.config = config; } /// Get page source HTML from given [`reqwest::Url`] @@ -164,7 +164,7 @@ impl RequesterInterface for ChromeBrowser { /// /// #[tokio::main] /// async fn main() { - /// let browser = ChromeBrowser::default(); + /// let mut browser = ChromeBrowser::default(); /// let url = Url::parse("https://foo.com").unwrap(); /// /// let content = browser.get_content(url).await.unwrap(); diff --git a/src/requesters/client.rs b/src/requesters/client.rs index d4c29dd7..9f103bac 100644 --- a/src/requesters/client.rs +++ b/src/requesters/client.rs @@ -72,13 +72,13 @@ impl RequesterInterface for HTTPClient { /// /// #[tokio::main] /// async fn main() { - /// let client = HTTPClient::default(); + /// let mut client = HTTPClient::default(); /// /// assert_eq!(client.config().await.timeout, Duration::from_secs(10)); /// } /// ``` - async fn config(&self) -> RequesterConfig { - self.config.clone() + async fn config(&mut self) -> &mut RequesterConfig { + &mut self.config } /// Configure requester with a new config object @@ -129,7 +129,7 @@ impl RequesterInterface for HTTPClient { /// /// #[tokio::main] /// async fn main() { - /// let client = HTTPClient::default(); + /// let mut client = HTTPClient::default(); /// let url = Url::parse("https://foo.com").unwrap(); /// /// let content = client.get_content(url).await.unwrap(); diff --git a/tests/cache_test.rs b/tests/cache_test.rs index 269eb48e..fc854a54 100644 --- a/tests/cache_test.rs +++ b/tests/cache_test.rs @@ -33,7 +33,7 @@ mod modules { let module = module.lock().await; if let Some(requester) = module.requester().await { - assert_eq!(requester.lock().await.config().await, old_config); + assert_eq!(requester.lock().await.config().await, &old_config); } } @@ -43,7 +43,7 @@ mod modules { let module = module.lock().await; if let Some(requester) = module.requester().await { - assert_eq!(requester.lock().await.config().await, new_config); + assert_eq!(requester.lock().await.config().await, &new_config); } } } diff --git a/tests/modules/common.rs b/tests/modules/common.rs index 725e9daa..d0c3895f 100644 --- a/tests/modules/common.rs +++ b/tests/modules/common.rs @@ -12,7 +12,7 @@ pub mod mocks { use serde_json::Value; use std::collections::BTreeSet; use subscan::{ - enums::RequesterDispatcher, + enums::{AuthMethod, RequesterDispatcher}, extractors::{json::JSONExtractor, regex::RegexExtractor}, modules::generics::{ api_integration::GenericAPIIntegrationModule, search_engine::GenericSearchEngineModule, @@ -53,6 +53,7 @@ pub mod mocks { GenericAPIIntegrationModule { name: TEST_MODULE_NAME.to_string(), url: Box::new(move |_| url.clone()), + auth: AuthMethod::NoAuth, requester: requester.into(), extractor: extractor.into(), } diff --git a/tests/modules/integrations/alienvault_test.rs b/tests/modules/integrations/alienvault_test.rs index 0a2be78c..084051ac 100644 --- a/tests/modules/integrations/alienvault_test.rs +++ b/tests/modules/integrations/alienvault_test.rs @@ -30,7 +30,7 @@ async fn alienvault_run_test() { #[tokio::test] async fn get_query_url_test() { let url = alienvault::AlienVault::get_query_url(TEST_DOMAIN.to_string()); - let expected = format!("{ALIENVAULT_URL}{TEST_DOMAIN}/passive_dns"); + let expected = format!("{ALIENVAULT_URL}/{TEST_DOMAIN}/passive_dns"); assert_eq!(url, expected); } diff --git a/tests/modules/integrations/anubis_test.rs b/tests/modules/integrations/anubis_test.rs index b8bb1df3..4ad08829 100644 --- a/tests/modules/integrations/anubis_test.rs +++ b/tests/modules/integrations/anubis_test.rs @@ -23,7 +23,7 @@ async fn anubis_run_test() { #[tokio::test] async fn get_query_url_test() { let url = anubis::Anubis::get_query_url(TEST_DOMAIN.to_string()); - let expected = format!("{ANUBIS_URL}{TEST_DOMAIN}"); + let expected = format!("{ANUBIS_URL}/{TEST_DOMAIN}"); assert_eq!(url, expected); } diff --git a/tests/modules/integrations/bevigil_test.rs b/tests/modules/integrations/bevigil_test.rs new file mode 100644 index 00000000..ff59f4f6 --- /dev/null +++ b/tests/modules/integrations/bevigil_test.rs @@ -0,0 +1,47 @@ +use crate::common::constants::{TEST_BAR_SUBDOMAIN, TEST_BAZ_SUBDOMAIN, TEST_DOMAIN}; +use reqwest::Url; +use serde_json::{self, Value}; +use subscan::{ + interfaces::module::SubscanModuleInterface, + modules::integrations::bevigil::{self, BEVIGIL_MODULE_NAME, BEVIGIL_URL}, +}; + +#[tokio::test] +#[stubr::mock("module/integrations/bevigil.json")] +async fn bevigil_run_test() { + let mut bevigil = bevigil::Bevigil::new(); + let url = Url::parse(stubr.path("/bevigil").as_str()).unwrap(); + + bevigil.url = Box::new(move |_| url.to_string()); + + let result = bevigil.run(TEST_DOMAIN.to_string()).await; + + assert_eq!(bevigil.name().await, BEVIGIL_MODULE_NAME); + assert_eq!( + result, + [ + TEST_BAR_SUBDOMAIN.to_string(), + TEST_BAZ_SUBDOMAIN.to_string(), + ] + .into() + ); +} + +#[tokio::test] +async fn get_query_url_test() { + let url = bevigil::Bevigil::get_query_url(TEST_DOMAIN.to_string()); + let expected = format!("{BEVIGIL_URL}/{TEST_DOMAIN}/subdomains"); + + assert_eq!(url, expected); +} + +#[tokio::test] +async fn extract_test() { + let json = "{\"subdomains\": [\"bar.foo.com\"]}"; + + let extracted = bevigil::Bevigil::extract(serde_json::from_str(json).unwrap()); + let not_extracted = bevigil::Bevigil::extract(Value::default()); + + assert_eq!(extracted, [TEST_BAR_SUBDOMAIN.to_string()].into()); + assert_eq!(not_extracted, [].into()); +} diff --git a/tests/stubs/module/integrations/bevigil.json b/tests/stubs/module/integrations/bevigil.json new file mode 100644 index 00000000..39b3d5f7 --- /dev/null +++ b/tests/stubs/module/integrations/bevigil.json @@ -0,0 +1,18 @@ +{ + "request": { + "method": "GET", + "urlPath": "/bevigil" + }, + "response": { + "headers": { + "content-type": "application/json" + }, + "jsonBody": { + "subdomains": [ + "bar.foo.com", + "baz.foo.com" + ] + }, + "status": 200 + } +} From 548e2181698af6ad31afd1695ec67be5098cf624 Mon Sep 17 00:00:00 2001 From: Erdogan Yoksul Date: Sat, 21 Sep 2024 22:15:42 +0300 Subject: [PATCH 05/38] docs: update docstrings --- .gitignore | 1 - src/cache.rs | 2 ++ src/enums.rs | 32 +++++++++++++++++++++++++++++--- src/extractors/html.rs | 2 -- src/extractors/json.rs | 11 +++++------ src/extractors/regex.rs | 8 ++++---- src/requesters/chrome.rs | 5 ++--- src/requesters/client.rs | 5 ++--- 8 files changed, 44 insertions(+), 22 deletions(-) diff --git a/.gitignore b/.gitignore index f7df471c..355c3966 100644 --- a/.gitignore +++ b/.gitignore @@ -15,7 +15,6 @@ Cargo.lock # Added by cargo - /target # Ignore local .env file diff --git a/src/cache.rs b/src/cache.rs index 7246d3e5..91e284a9 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -11,10 +11,12 @@ lazy_static! { /// as a [`SubscanModule`], all modules must be compatible /// with [`SubscanModuleInterface`](crate::interfaces::module::SubscanModuleInterface) trait pub static ref ALL_MODULES: Vec> = vec![ + // Search engines SubscanModule::new(google::Google::new()), SubscanModule::new(yahoo::Yahoo::new()), SubscanModule::new(bing::Bing::new()), SubscanModule::new(duckduckgo::DuckDuckGo::new()), + // API integrations SubscanModule::new(alienvault::AlienVault::new()), SubscanModule::new(anubis::Anubis::new()), SubscanModule::new(bevigil::Bevigil::new()), diff --git a/src/enums.rs b/src/enums.rs index fc0eab47..4b7632dc 100644 --- a/src/enums.rs +++ b/src/enums.rs @@ -9,8 +9,23 @@ use enum_dispatch::enum_dispatch; /// technical details please follow up `enum_dispatch` package #[enum_dispatch(SubdomainExtractorInterface)] pub enum SubdomainExtractorDispatcher { + /// HTML extractor type to extract subdomain addresses + /// from any HTML content. See the [`HTMLExtractor`] + /// struct definition for examples and technical details HTMLExtractor(HTMLExtractor), + /// Regex extractor type allows to extract subdomain + /// addresses from string content with a regex pattern + /// by given domain address. See the [`RegexExtractor`] + /// for technical details and examples usages RegexExtractor(RegexExtractor), + /// JSON extractor type can extract subdomains from + /// JSON content. In this type head up point + /// is to know that created as a wrapper + /// struct to be compatible with + /// [`SubdomainExtractorInterface`](crate::interfaces::extractor::SubdomainExtractorInterface) + /// so parser method must be implemented and gave + /// this wrapper struct. See the [`JSONExtractor`] + /// examples and technical details JSONExtractor(JSONExtractor), } @@ -25,17 +40,28 @@ pub enum RequesterDispatcher { /// On this requester type, Chrome browser will run and /// all HTTP requests made with browser. Has pros according /// to [`HTTPClient`] requester like running Js, rendering - /// pages, etc. + /// pages, etc. See the [`ChromeBrowser`] definition to learn + /// usage ChromeBrowser(ChromeBrowser), /// Simple HTTP client interface to make requesters, it does /// not allows to run Js, rendering pages or user interface. - /// Just send HTTP requests via [`reqwest`] + /// Just send HTTP requests via [`reqwest`]. See the [`HTTPClient`] + /// struct definition for examples and technical details HTTPClient(HTTPClient), } -/// Authentication methods for API calls +/// Authentication methods for API calls. +/// [`GenericAPIIntegrationModule`](crate::modules::generics::api_integration::GenericAPIIntegrationModule) +/// uses them to apply correct auth method. See the +/// method descriptions to learn how it works pub enum AuthMethod { + /// Some APIs uses request headers to get + /// API key. If this auth type selected API key + /// will add in request headers with a given header key APIKeyInHeader(String), + /// This auth type uses when API require API + /// key in URL APIKeyInURL, + /// This auth type does nothing for auth NoAuth, } diff --git a/src/extractors/html.rs b/src/extractors/html.rs index e18a403c..bb64dd19 100644 --- a/src/extractors/html.rs +++ b/src/extractors/html.rs @@ -6,8 +6,6 @@ use async_trait::async_trait; use scraper::{ElementRef, Html, Selector}; use std::collections::BTreeSet; -/// HTML extractor component to extract subdomain addresses -/// /// This object compatible with [`SubdomainExtractorInterface`] /// and it uses `extract` method to extract subdomain addresses /// from inner text by given `XPath` or `CSS` selector diff --git a/src/extractors/json.rs b/src/extractors/json.rs index 71f6eb19..2c854ec5 100644 --- a/src/extractors/json.rs +++ b/src/extractors/json.rs @@ -4,12 +4,11 @@ use async_trait::async_trait; use serde_json; use std::collections::BTreeSet; -/// JSON content parser wrapper struct -/// -/// This object compatible with [`SubdomainExtractorInterface`] -/// and it uses `extract` method to extract subdomain addresses -/// from JSON content. JSON parsing function must be given -/// for this extractor +/// JSON content parser wrapper struct. This object compatible +/// with [`SubdomainExtractorInterface`] and it uses `extract` +/// method to extract subdomain addresses from JSON content. +/// JSON parsing function must be given for this extractor. Please +/// follow up examples to learn usage techniques pub struct JSONExtractor { inner: InnerExtractMethod, } diff --git a/src/extractors/regex.rs b/src/extractors/regex.rs index ab026d30..d57f0e8b 100644 --- a/src/extractors/regex.rs +++ b/src/extractors/regex.rs @@ -6,10 +6,10 @@ use async_trait::async_trait; use regex::Match; use std::collections::BTreeSet; -/// Regex extractor component -/// -/// Generates subdomain pattern by given domain -/// address and extracts subdomains via this pattern +/// Regex extractor component generates subdomain pattern by +/// given domain address and extracts subdomains via this pattern. +/// Also this object compatible with [`SubdomainExtractorInterface`] +/// and it uses `extract` method #[derive(Default)] pub struct RegexExtractor {} diff --git a/src/requesters/chrome.rs b/src/requesters/chrome.rs index 76ecbefd..b77bea16 100644 --- a/src/requesters/chrome.rs +++ b/src/requesters/chrome.rs @@ -3,9 +3,8 @@ use async_trait::async_trait; use headless_chrome::{browser::LaunchOptions, Browser}; use reqwest::Url; -/// Chrome requester struct, send HTTP requests -/// via Chrome browser. Also its compatible -/// with [`RequesterInterface`] +/// Chrome requester struct, send HTTP requests via Chrome browser. +/// Also its compatible with [`RequesterInterface`] pub struct ChromeBrowser { config: RequesterConfig, browser: Browser, diff --git a/src/requesters/client.rs b/src/requesters/client.rs index 9f103bac..9655a262 100644 --- a/src/requesters/client.rs +++ b/src/requesters/client.rs @@ -6,9 +6,8 @@ const CLIENT_BUILD_ERR: &str = "Cannot create HTTP client!"; const REQUEST_BUILD_ERR: &str = "Cannot build request!"; const PROXY_PARSE_ERR: &str = "Cannot parse proxy!"; -/// HTTP requester struct, send HTTP requests -/// via [`reqwest`] client. Also its compatible -/// with [`RequesterInterface`] +/// HTTP requester struct, send HTTP requests via [`reqwest`] client. +/// Also its compatible with [`RequesterInterface`] #[derive(Default)] pub struct HTTPClient { config: RequesterConfig, From 2b7fbd1a42de1617a37c9e11203866cbcfdbb33f Mon Sep 17 00:00:00 2001 From: Erdogan Yoksul Date: Sat, 21 Sep 2024 23:18:36 +0300 Subject: [PATCH 06/38] chore: use new method instead of default while create btreeset --- src/extractors/json.rs | 2 +- src/interfaces/module.rs | 2 +- src/lib.rs | 2 +- src/modules/integrations/alienvault.rs | 4 ++-- src/modules/integrations/anubis.rs | 4 ++-- src/modules/integrations/bevigil.rs | 4 ++-- tests/modules/common.rs | 2 +- 7 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/extractors/json.rs b/src/extractors/json.rs index 2c854ec5..7e584055 100644 --- a/src/extractors/json.rs +++ b/src/extractors/json.rs @@ -56,7 +56,7 @@ impl JSONExtractor { /// use std::collections::BTreeSet; /// use serde_json::Value; /// - /// let extractor = JSONExtractor::new(Box::new(move |_: Value| BTreeSet::default())); + /// let extractor = JSONExtractor::new(Box::new(move |_: Value| BTreeSet::new())); /// /// // do something with extractor instance /// ``` diff --git a/src/interfaces/module.rs b/src/interfaces/module.rs index 8e0f05fb..17951090 100644 --- a/src/interfaces/module.rs +++ b/src/interfaces/module.rs @@ -46,7 +46,7 @@ use tokio::sync::Mutex; /// } /// /// async fn run(&mut self, domain: String) -> BTreeSet { -/// BTreeSet::default() +/// BTreeSet::new() /// // do something in `run` method /// } /// } diff --git a/src/lib.rs b/src/lib.rs index 00e7c17e..187a2bc4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -69,7 +69,7 @@ impl SubscanModule { /// /// async fn run(&mut self, domain: String) -> BTreeSet { /// // do something in `run` method - /// BTreeSet::default() + /// BTreeSet::new() /// } /// } /// diff --git a/src/modules/integrations/alienvault.rs b/src/modules/integrations/alienvault.rs index 53ddd20e..6f4e784e 100644 --- a/src/modules/integrations/alienvault.rs +++ b/src/modules/integrations/alienvault.rs @@ -79,7 +79,7 @@ impl AlienVault { /// async fn main() { /// let result = alienvault::AlienVault::extract(Value::default()); /// - /// assert_eq!(result, BTreeSet::default()); + /// assert_eq!(result, BTreeSet::new()); /// } /// ``` pub fn extract(content: Value) -> BTreeSet { @@ -88,7 +88,7 @@ impl AlienVault { BTreeSet::from_iter(passives.iter().filter_map(filter)) } else { - BTreeSet::default() + BTreeSet::new() } } } diff --git a/src/modules/integrations/anubis.rs b/src/modules/integrations/anubis.rs index 673c3094..3f10f77e 100644 --- a/src/modules/integrations/anubis.rs +++ b/src/modules/integrations/anubis.rs @@ -78,7 +78,7 @@ impl Anubis { /// async fn main() { /// let result = anubis::Anubis::extract(Value::default()); /// - /// assert_eq!(result, BTreeSet::default()); + /// assert_eq!(result, BTreeSet::new()); /// } /// ``` pub fn extract(content: Value) -> BTreeSet { @@ -87,7 +87,7 @@ impl Anubis { BTreeSet::from_iter(subs.iter().filter_map(filter)) } else { - BTreeSet::default() + BTreeSet::new() } } } diff --git a/src/modules/integrations/bevigil.rs b/src/modules/integrations/bevigil.rs index 19b63925..f511e2eb 100644 --- a/src/modules/integrations/bevigil.rs +++ b/src/modules/integrations/bevigil.rs @@ -78,7 +78,7 @@ impl Bevigil { /// async fn main() { /// let result = bevigil::Bevigil::extract(Value::default()); /// - /// assert_eq!(result, BTreeSet::default()); + /// assert_eq!(result, BTreeSet::new()); /// } /// ``` pub fn extract(content: Value) -> BTreeSet { @@ -87,7 +87,7 @@ impl Bevigil { BTreeSet::from_iter(subs.iter().filter_map(filter)) } else { - BTreeSet::default() + BTreeSet::new() } } } diff --git a/tests/modules/common.rs b/tests/modules/common.rs index d0c3895f..688abc6f 100644 --- a/tests/modules/common.rs +++ b/tests/modules/common.rs @@ -42,7 +42,7 @@ pub mod mocks { BTreeSet::from_iter(subs.iter().filter_map(filter)) } else { - BTreeSet::default() + BTreeSet::new() } }; From 717a173d2201ca18c642a0228e67266045c9d623 Mon Sep 17 00:00:00 2001 From: Erdogan Yoksul Date: Sun, 22 Sep 2024 14:41:03 +0300 Subject: [PATCH 07/38] test: add json extractor tests and docstring updates --- src/bin/subscan.rs | 2 - src/enums.rs | 7 +-- src/extractors/html.rs | 3 +- src/extractors/json.rs | 47 ++++++++++--------- src/extractors/regex.rs | 4 +- src/interfaces/extractor.rs | 6 +-- src/interfaces/module.rs | 31 ++++++------ src/modules/generics/api_integration.rs | 10 ++-- src/modules/generics/search_engine.rs | 6 +-- src/types/core.rs | 1 + src/utils.rs | 36 +++++++++++++- testing/testdata/json/subdomains.json | 17 +++++++ tests/extractors/html_test.rs | 8 ++-- tests/extractors/json_test.rs | 39 +++++++++++++++ tests/extractors/regex_test.rs | 8 ++-- tests/modules/common.rs | 6 +++ tests/modules/engines/duckduckgo_test.rs | 3 +- tests/modules/integrations/alienvault_test.rs | 13 +++-- tests/modules/integrations/anubis_test.rs | 13 +++-- tests/modules/integrations/bevigil_test.rs | 14 ++++-- tests/stubs/module/integrations/bevigil.json | 5 ++ 21 files changed, 195 insertions(+), 84 deletions(-) create mode 100644 testing/testdata/json/subdomains.json create mode 100644 tests/extractors/json_test.rs diff --git a/src/bin/subscan.rs b/src/bin/subscan.rs index eaedb765..23f56da8 100644 --- a/src/bin/subscan.rs +++ b/src/bin/subscan.rs @@ -8,8 +8,6 @@ use subscan::{ #[tokio::main] async fn main() { - dotenvy::dotenv().ok(); - let cli = Cli::parse(); let config = RequesterConfig::from(&cli); diff --git a/src/enums.rs b/src/enums.rs index 4b7632dc..912f8a61 100644 --- a/src/enums.rs +++ b/src/enums.rs @@ -24,8 +24,9 @@ pub enum SubdomainExtractorDispatcher { /// struct to be compatible with /// [`SubdomainExtractorInterface`](crate::interfaces::extractor::SubdomainExtractorInterface) /// so parser method must be implemented and gave - /// this wrapper struct. See the [`JSONExtractor`] - /// examples and technical details + /// this wrapper struct. See the [`JSONExtractor`] struct + /// and [`InnerExtractMethod`](crate::types::core::InnerExtractMethod) + /// type for examples and technical details JSONExtractor(JSONExtractor), } @@ -38,7 +39,7 @@ pub enum SubdomainExtractorDispatcher { pub enum RequesterDispatcher { /// Chrome browser struct definition as a enum value. /// On this requester type, Chrome browser will run and - /// all HTTP requests made with browser. Has pros according + /// all HTTP requests made with browser. It has pros according /// to [`HTTPClient`] requester like running Js, rendering /// pages, etc. See the [`ChromeBrowser`] definition to learn /// usage diff --git a/src/extractors/html.rs b/src/extractors/html.rs index bb64dd19..e7cc9aa3 100644 --- a/src/extractors/html.rs +++ b/src/extractors/html.rs @@ -55,7 +55,6 @@ impl SubdomainExtractorInterface for HTMLExtractor { /// use subscan::extractors::html::HTMLExtractor; /// use subscan::interfaces::extractor::SubdomainExtractorInterface; /// use subscan::types::core::Subdomain; - /// use std::collections::BTreeSet; /// /// #[tokio::main] /// async fn main() { @@ -67,7 +66,7 @@ impl SubdomainExtractorInterface for HTMLExtractor { /// /// let result = extractor.extract(html, domain).await; /// - /// assert_eq!(result, BTreeSet::from(["bar.foo.com".into()])); + /// assert_eq!(result, [Subdomain::from("bar.foo.com")].into()); /// } /// ``` async fn extract(&self, content: String, domain: String) -> BTreeSet { diff --git a/src/extractors/json.rs b/src/extractors/json.rs index 7e584055..9eb9aa61 100644 --- a/src/extractors/json.rs +++ b/src/extractors/json.rs @@ -13,6 +13,25 @@ pub struct JSONExtractor { inner: InnerExtractMethod, } +impl JSONExtractor { + /// Creates a new [`JSONExtractor`] instance + /// + /// # Examples + /// + /// ```no_run + /// use subscan::extractors::json::JSONExtractor; + /// use std::collections::BTreeSet; + /// use serde_json::Value; + /// + /// let extractor = JSONExtractor::new(Box::new(move |_: Value| BTreeSet::new())); + /// + /// // do something with extractor instance + /// ``` + pub fn new(inner: InnerExtractMethod) -> Self { + Self { inner } + } +} + #[async_trait] impl SubdomainExtractorInterface for JSONExtractor { /// Main extraction method to extract subdomains from @@ -22,7 +41,8 @@ impl SubdomainExtractorInterface for JSONExtractor { /// /// ``` /// use subscan::extractors::json::JSONExtractor; - /// use crate::subscan::interfaces::extractor::SubdomainExtractorInterface; + /// use subscan::interfaces::extractor::SubdomainExtractorInterface; + /// use subscan::types::core::Subdomain; /// use std::collections::BTreeSet; /// use serde_json::Value; /// @@ -32,35 +52,18 @@ impl SubdomainExtractorInterface for JSONExtractor { /// let domain = "foo.com".to_string(); /// /// let func = |item: Value| { - /// BTreeSet::from([item["foo"].as_str().unwrap().into()]) + /// [ + /// Subdomain::from(item["foo"].as_str().unwrap()) + /// ].into() /// }; /// let extractor = JSONExtractor::new(Box::new(func)); /// /// let result = extractor.extract(json, domain).await; /// - /// assert_eq!(result, BTreeSet::from(["bar".into()])); + /// assert_eq!(result, [Subdomain::from("bar")].into()); /// } /// ``` async fn extract(&self, content: String, _domain: String) -> BTreeSet { (self.inner)(serde_json::from_str(&content).unwrap_or_default()) } } - -impl JSONExtractor { - /// Creates a new [`JSONExtractor`] instance - /// - /// # Examples - /// - /// ```no_run - /// use subscan::extractors::json::JSONExtractor; - /// use std::collections::BTreeSet; - /// use serde_json::Value; - /// - /// let extractor = JSONExtractor::new(Box::new(move |_: Value| BTreeSet::new())); - /// - /// // do something with extractor instance - /// ``` - pub fn new(inner: InnerExtractMethod) -> Self { - Self { inner } - } -} diff --git a/src/extractors/regex.rs b/src/extractors/regex.rs index d57f0e8b..99eeb0ea 100644 --- a/src/extractors/regex.rs +++ b/src/extractors/regex.rs @@ -67,10 +67,10 @@ impl SubdomainExtractorInterface for RegexExtractor { /// let extractor = RegexExtractor::default(); /// let result = extractor.extract(content, domain).await; /// - /// assert_eq!(result, BTreeSet::from([ + /// assert_eq!(result, [ /// Subdomain::from("bar.foo.com"), /// Subdomain::from("baz.foo.com"), - /// ])); + /// ].into()); /// assert_eq!(result.len(), 2); /// } /// ``` diff --git a/src/interfaces/extractor.rs b/src/interfaces/extractor.rs index cb110526..1027ee36 100644 --- a/src/interfaces/extractor.rs +++ b/src/interfaces/extractor.rs @@ -26,9 +26,7 @@ use std::collections::BTreeSet; /// #[async_trait] /// impl SubdomainExtractorInterface for CustomExtractor { /// async fn extract(&self, content: String, domain: String) -> BTreeSet { -/// BTreeSet::from([ -/// Subdomain::from(content.replace("-", "")) -/// ]) +/// [Subdomain::from(content.replace("-", ""))].into() /// } /// } /// @@ -41,7 +39,7 @@ use std::collections::BTreeSet; /// /// let result = extractor.extract(content, domain).await; /// -/// assert_eq!(result, BTreeSet::from([Subdomain::from("foo.com")])); +/// assert_eq!(result, [Subdomain::from("foo.com")].into()); /// } /// ``` #[async_trait] diff --git a/src/interfaces/module.rs b/src/interfaces/module.rs index 17951090..576ba28f 100644 --- a/src/interfaces/module.rs +++ b/src/interfaces/module.rs @@ -1,6 +1,6 @@ use crate::{ - config::SUBSCAN_ENV_NAMESPACE, enums::{RequesterDispatcher, SubdomainExtractorDispatcher}, + utils::env, }; use async_trait::async_trait; use enum_dispatch::enum_dispatch; @@ -34,7 +34,7 @@ use tokio::sync::Mutex; /// #[async_trait(?Send)] /// impl SubscanModuleInterface for FooModule { /// async fn name(&self) -> &str { -/// &"foo-module" +/// &"foo" /// } /// /// async fn requester(&self) -> Option<&Mutex> { @@ -53,18 +53,21 @@ use tokio::sync::Mutex; /// /// #[tokio::main] /// async fn main() { +/// let requester = RequesterDispatcher::HTTPClient(HTTPClient::default()); +/// let extracator = RegexExtractor::default(); +/// /// let mut foo = FooModule { -/// requester: Mutex::new(HTTPClient::default().into()), -/// extractor: RegexExtractor::default().into(), +/// requester: Mutex::new(requester), +/// extractor: SubdomainExtractorDispatcher::RegexExtractor(extracator), /// }; /// /// assert!(foo.requester().await.is_some()); /// assert!(foo.extractor().await.is_some()); /// -/// assert_eq!(foo.name().await, "foo-module"); +/// assert_eq!(foo.name().await, "foo"); /// /// // do something with results -/// let results = foo.run("foo.com".into()).await; +/// let results = foo.run("foo.com".to_string()).await; /// } /// ``` #[async_trait(?Send)] @@ -81,16 +84,10 @@ pub trait SubscanModuleInterface: Sync + Send { /// run this `run` method will be called, so this method /// should do everything async fn run(&mut self, domain: String) -> BTreeSet; - /// Fetches API key from system environment variables - /// if available. Module environment variables uses [`SUBSCAN_ENV_NAMESPACE`] - /// namespace with `SUBSCAN__APIKEY` format - async fn fetch_apikey(&self) -> String { - let key = format!( - "{}_{}_APIKEY", - SUBSCAN_ENV_NAMESPACE, - self.name().await.to_uppercase() - ); - - std::env::var(key).unwrap_or_default() + /// Fetches module API key from system environment variables + /// if available. See the [`get_subscan_module_apikey`](crate::utils::env::get_subscan_module_apikey) + /// for details + async fn fetch_apikey(&self) -> Result { + env::get_subscan_module_apikey(&self.name().await.to_uppercase()) } } diff --git a/src/modules/generics/api_integration.rs b/src/modules/generics/api_integration.rs index 3a53dce2..9321d5e5 100644 --- a/src/modules/generics/api_integration.rs +++ b/src/modules/generics/api_integration.rs @@ -57,12 +57,12 @@ impl SubscanModuleInterface for GenericAPIIntegrationModule { match &self.auth { AuthMethod::APIKeyInHeader(key) => { - let apikey = self.fetch_apikey().await; + if let Ok(apikey) = self.fetch_apikey().await { + let name = HeaderName::from_str(key.as_str()).unwrap(); + let value = HeaderValue::from_str(apikey.as_str()).unwrap(); - let name = HeaderName::from_str(key.as_str()).unwrap(); - let value = HeaderValue::from_str(apikey.as_str()).unwrap(); - - requester.config().await.add_header(name, value); + requester.config().await.add_header(name, value); + } } AuthMethod::APIKeyInURL => {} AuthMethod::NoAuth => {} diff --git a/src/modules/generics/search_engine.rs b/src/modules/generics/search_engine.rs index 046efc63..fbc3c1a9 100644 --- a/src/modules/generics/search_engine.rs +++ b/src/modules/generics/search_engine.rs @@ -74,14 +74,14 @@ impl GenericSearchEngineModule { /// #[tokio::main] /// async fn main() { /// let module = GenericSearchEngineModule { - /// name: "foo-module".into(), + /// name: "foo-module".to_string(), /// url: Url::parse("https://foo.com").unwrap(), - /// param: "q".into(), + /// param: SearchQueryParam::from("q"), /// requester: Mutex::new(HTTPClient::default().into()), /// extractor: RegexExtractor::default().into(), /// }; /// - /// let mut query = module.get_search_query("foo.com".into()).await; + /// let mut query = module.get_search_query("foo.com".to_string()).await; /// /// assert_eq!(query.as_search_str(), "site:foo.com"); /// } diff --git a/src/types/core.rs b/src/types/core.rs index 4ff29da9..8c9a6e84 100644 --- a/src/types/core.rs +++ b/src/types/core.rs @@ -4,4 +4,5 @@ use std::collections::BTreeSet; /// Core subdomain data type pub type Subdomain = String; /// Inner extract method type definition for [`JSONExtractor`](crate::extractors::json::JSONExtractor) +/// In summary it takes a [`Value`] as a parameter and parse subdomains pub type InnerExtractMethod = Box BTreeSet + Sync + Send>; diff --git a/src/utils.rs b/src/utils.rs index 6fcc6146..32ddcbbc 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -4,8 +4,7 @@ pub mod regex { use regex::{Error, Regex}; /// Helper function that generates dynamically regex statement - /// by given domain address to parse subdomain addresses - /// according to any target domain address + /// by given domain address to parse subdomains /// /// # Examples /// @@ -32,3 +31,36 @@ pub mod regex { Regex::new(&formatted) } } + +pub mod env { + use crate::config::SUBSCAN_ENV_NAMESPACE; + + /// Fetches API key from system environment variables + /// if available. Module environment variables uses [`SUBSCAN_ENV_NAMESPACE`] + /// namespace with `SUBSCAN__APIKEY` format + /// + /// # Examples + /// + /// ``` + /// use std::env; + /// use subscan::utils::env::get_subscan_module_apikey; + /// + /// #[tokio::main] + /// async fn main() { + /// let env_key = "SUBSCAN_FOO_APIKEY"; + /// + /// env::remove_var(env_key); + /// + /// assert_eq!(get_subscan_module_apikey("FOO").is_ok(), false); + /// + /// env::set_var(env_key, "foo"); + /// + /// assert_eq!(get_subscan_module_apikey("FOO").unwrap(), "foo"); + /// } + /// ``` + pub fn get_subscan_module_apikey(name: &str) -> Result { + let key = format!("{}_{}_APIKEY", SUBSCAN_ENV_NAMESPACE, name); + + dotenvy::var(key) + } +} diff --git a/testing/testdata/json/subdomains.json b/testing/testdata/json/subdomains.json new file mode 100644 index 00000000..c072aaac --- /dev/null +++ b/testing/testdata/json/subdomains.json @@ -0,0 +1,17 @@ +{ + "data": { + "subdomains": [ + { + "ip": "127.0.0.1", + "subdomain": "bar.foo.com" + }, + { + "ip": "127.0.0.1", + "subdomain": "baz.foo.com" + } + ] + }, + "id": "7a3db2d9-1713-4e72-8261-5976fa5b9cf9", + "scan_time": "2024-09-22T11:15:00.430218", + "status": "SUCCESS" +} diff --git a/tests/extractors/html_test.rs b/tests/extractors/html_test.rs index 5b1d8486..98eeeb84 100644 --- a/tests/extractors/html_test.rs +++ b/tests/extractors/html_test.rs @@ -1,3 +1,5 @@ +use std::collections::BTreeSet; + use crate::common::{ constants::{TEST_BAR_SUBDOMAIN, TEST_BAZ_SUBDOMAIN, TEST_DOMAIN}, funcs::read_testdata, @@ -24,10 +26,10 @@ async fn extract_with_removes() { let extractor = HTMLExtractor::new(selector, vec!["
".to_string()]); let result = extractor.extract(html, TEST_DOMAIN.to_string()).await; - let expected = [ + let expected = BTreeSet::from([ TEST_BAR_SUBDOMAIN.to_string(), TEST_BAZ_SUBDOMAIN.to_string(), - ]; + ]); - assert_eq!(result, expected.into()); + assert_eq!(result, expected); } diff --git a/tests/extractors/json_test.rs b/tests/extractors/json_test.rs new file mode 100644 index 00000000..899d8f2b --- /dev/null +++ b/tests/extractors/json_test.rs @@ -0,0 +1,39 @@ +use std::collections::BTreeSet; + +use crate::common::{ + constants::{TEST_BAR_SUBDOMAIN, TEST_BAZ_SUBDOMAIN, TEST_DOMAIN}, + funcs::read_testdata, +}; +use serde_json::Value; +use subscan::extractors::json::JSONExtractor; +use subscan::interfaces::extractor::SubdomainExtractorInterface; + +#[tokio::test] +async fn extract_test() { + let json = read_testdata("json/subdomains.json"); + + let inner_parser = |item: Value| { + if let Some(subs) = item["data"]["subdomains"].as_array() { + let filter = |item: &Value| Some(item["subdomain"].as_str().unwrap().to_string()); + + BTreeSet::from_iter(subs.iter().filter_map(filter)) + } else { + BTreeSet::new() + } + }; + + let extractor = JSONExtractor::new(Box::new(inner_parser)); + + let result = extractor.extract(json, TEST_DOMAIN.to_string()).await; + let no_result = extractor + .extract(String::new(), TEST_DOMAIN.to_string()) + .await; + + let expected = BTreeSet::from([ + TEST_BAR_SUBDOMAIN.to_string(), + TEST_BAZ_SUBDOMAIN.to_string(), + ]); + + assert_eq!(result, expected); + assert_eq!(no_result, BTreeSet::new()); +} diff --git a/tests/extractors/regex_test.rs b/tests/extractors/regex_test.rs index f7abf86f..85f7a74f 100644 --- a/tests/extractors/regex_test.rs +++ b/tests/extractors/regex_test.rs @@ -1,3 +1,5 @@ +use std::collections::BTreeSet; + use crate::common::constants::{TEST_BAR_SUBDOMAIN, TEST_BAZ_SUBDOMAIN, TEST_DOMAIN}; use subscan::extractors::regex::RegexExtractor; use subscan::interfaces::extractor::SubdomainExtractorInterface; @@ -21,10 +23,10 @@ async fn extract_test() { let extractor = RegexExtractor::default(); let result = extractor.extract(content, TEST_DOMAIN.to_string()).await; - let expected = [ + let expected = BTreeSet::from([ TEST_BAR_SUBDOMAIN.to_string(), TEST_BAZ_SUBDOMAIN.to_string(), - ]; + ]); - assert_eq!(result, expected.into()); + assert_eq!(result, expected); } diff --git a/tests/modules/common.rs b/tests/modules/common.rs index 688abc6f..6ec44279 100644 --- a/tests/modules/common.rs +++ b/tests/modules/common.rs @@ -58,4 +58,10 @@ pub mod mocks { extractor: extractor.into(), } } + + pub fn wrap_url_with_mock_func(url: &str) -> Box String + Sync + Send> { + let url = Url::parse(url).unwrap().to_string(); + + Box::new(move |_| url.clone()) + } } diff --git a/tests/modules/engines/duckduckgo_test.rs b/tests/modules/engines/duckduckgo_test.rs index b7d7fef8..ddd06e4f 100644 --- a/tests/modules/engines/duckduckgo_test.rs +++ b/tests/modules/engines/duckduckgo_test.rs @@ -6,6 +6,7 @@ use subscan::{ modules::engines::duckduckgo::{self, DUCKDUCKGO_MODULE_NAME}, requesters::client::HTTPClient, }; +use tokio::sync::Mutex; #[tokio::test] #[stubr::mock("module/engines/duckduckgo.json")] @@ -13,7 +14,7 @@ async fn duckduckgo_run_test() { let mut duckduckgo = duckduckgo::DuckDuckGo::new(); let new_requester = HTTPClient::default(); - duckduckgo.requester = RequesterDispatcher::HTTPClient(new_requester).into(); + duckduckgo.requester = Mutex::new(RequesterDispatcher::HTTPClient(new_requester)); duckduckgo.url = Url::parse(stubr.uri().as_str()).unwrap(); let result = duckduckgo.run(TEST_DOMAIN.to_string()).await; diff --git a/tests/modules/integrations/alienvault_test.rs b/tests/modules/integrations/alienvault_test.rs index 084051ac..64584c39 100644 --- a/tests/modules/integrations/alienvault_test.rs +++ b/tests/modules/integrations/alienvault_test.rs @@ -1,5 +1,9 @@ -use crate::common::constants::{TEST_BAR_SUBDOMAIN, TEST_BAZ_SUBDOMAIN, TEST_DOMAIN}; -use reqwest::Url; +use std::collections::BTreeSet; + +use crate::common::{ + constants::{TEST_BAR_SUBDOMAIN, TEST_BAZ_SUBDOMAIN, TEST_DOMAIN}, + mocks::wrap_url_with_mock_func, +}; use serde_json::{self, Value}; use subscan::{ interfaces::module::SubscanModuleInterface, @@ -10,9 +14,8 @@ use subscan::{ #[stubr::mock("module/integrations/alienvault.json")] async fn alienvault_run_test() { let mut alienvault = alienvault::AlienVault::new(); - let url = Url::parse(stubr.path("/alienvault").as_str()).unwrap(); - alienvault.url = Box::new(move |_| url.to_string()); + alienvault.url = wrap_url_with_mock_func(stubr.path("/alienvault").as_str()); let result = alienvault.run(TEST_DOMAIN.to_string()).await; @@ -43,5 +46,5 @@ async fn extract_test() { let not_extracted = alienvault::AlienVault::extract(Value::default()); assert_eq!(extracted, [TEST_BAR_SUBDOMAIN.to_string()].into()); - assert_eq!(not_extracted, [].into()); + assert_eq!(not_extracted, BTreeSet::new()); } diff --git a/tests/modules/integrations/anubis_test.rs b/tests/modules/integrations/anubis_test.rs index 4ad08829..04ae4452 100644 --- a/tests/modules/integrations/anubis_test.rs +++ b/tests/modules/integrations/anubis_test.rs @@ -1,5 +1,9 @@ -use crate::common::constants::{TEST_BAR_SUBDOMAIN, TEST_DOMAIN}; -use reqwest::Url; +use std::collections::BTreeSet; + +use crate::common::{ + constants::{TEST_BAR_SUBDOMAIN, TEST_DOMAIN}, + mocks::wrap_url_with_mock_func, +}; use serde_json::{self, Value}; use subscan::{ interfaces::module::SubscanModuleInterface, @@ -10,9 +14,8 @@ use subscan::{ #[stubr::mock("module/integrations/anubis.json")] async fn anubis_run_test() { let mut anubis = anubis::Anubis::new(); - let url = Url::parse(stubr.path("/anubis").as_str()).unwrap(); - anubis.url = Box::new(move |_| url.to_string()); + anubis.url = wrap_url_with_mock_func(stubr.path("/anubis").as_str()); let result = anubis.run(TEST_DOMAIN.to_string()).await; @@ -36,5 +39,5 @@ async fn extract_test() { let not_extracted = anubis::Anubis::extract(Value::default()); assert_eq!(extracted, [TEST_BAR_SUBDOMAIN.to_string()].into()); - assert_eq!(not_extracted, [].into()); + assert_eq!(not_extracted, BTreeSet::new()); } diff --git a/tests/modules/integrations/bevigil_test.rs b/tests/modules/integrations/bevigil_test.rs index ff59f4f6..1736fbe5 100644 --- a/tests/modules/integrations/bevigil_test.rs +++ b/tests/modules/integrations/bevigil_test.rs @@ -1,6 +1,9 @@ -use crate::common::constants::{TEST_BAR_SUBDOMAIN, TEST_BAZ_SUBDOMAIN, TEST_DOMAIN}; -use reqwest::Url; +use crate::common::{ + constants::{TEST_BAR_SUBDOMAIN, TEST_BAZ_SUBDOMAIN, TEST_DOMAIN}, + mocks::wrap_url_with_mock_func, +}; use serde_json::{self, Value}; +use std::{collections::BTreeSet, env}; use subscan::{ interfaces::module::SubscanModuleInterface, modules::integrations::bevigil::{self, BEVIGIL_MODULE_NAME, BEVIGIL_URL}, @@ -9,10 +12,11 @@ use subscan::{ #[tokio::test] #[stubr::mock("module/integrations/bevigil.json")] async fn bevigil_run_test() { + env::set_var("SUBSCAN_BEVIGIL_APIKEY", "bevigil-api-key"); + let mut bevigil = bevigil::Bevigil::new(); - let url = Url::parse(stubr.path("/bevigil").as_str()).unwrap(); - bevigil.url = Box::new(move |_| url.to_string()); + bevigil.url = wrap_url_with_mock_func(stubr.path("/bevigil").as_str()); let result = bevigil.run(TEST_DOMAIN.to_string()).await; @@ -43,5 +47,5 @@ async fn extract_test() { let not_extracted = bevigil::Bevigil::extract(Value::default()); assert_eq!(extracted, [TEST_BAR_SUBDOMAIN.to_string()].into()); - assert_eq!(not_extracted, [].into()); + assert_eq!(not_extracted, BTreeSet::new()); } diff --git a/tests/stubs/module/integrations/bevigil.json b/tests/stubs/module/integrations/bevigil.json index 39b3d5f7..fb0ab94e 100644 --- a/tests/stubs/module/integrations/bevigil.json +++ b/tests/stubs/module/integrations/bevigil.json @@ -1,5 +1,10 @@ { "request": { + "headers": { + "X-Access-Token": { + "equalTo": "bevigil-api-key" + } + }, "method": "GET", "urlPath": "/bevigil" }, From d2a4a7ca5e16257c228bd5489f0e93e107a350cd Mon Sep 17 00:00:00 2001 From: Erdogan Yoksul Date: Sun, 22 Sep 2024 15:09:01 +0300 Subject: [PATCH 08/38] test: extend generic api integration tests according to auth methods --- src/cache.rs | 6 ++- tests/modules/common.rs | 10 ++--- .../modules/generics/api_integration_test.rs | 42 ++++++++++++++++--- ...tion.json => api-integration-no-auth.json} | 0 .../api-integration-with-header-auth.json | 22 ++++++++++ .../api-integration-with-url-auth.json | 22 ++++++++++ 6 files changed, 90 insertions(+), 12 deletions(-) rename tests/stubs/module/generics/{api-integration.json => api-integration-no-auth.json} (100%) create mode 100644 tests/stubs/module/generics/api-integration-with-header-auth.json create mode 100644 tests/stubs/module/generics/api-integration-with-url-auth.json diff --git a/src/cache.rs b/src/cache.rs index 91e284a9..f77bd8bd 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -1,6 +1,8 @@ use crate::{ - modules::engines::{bing, duckduckgo, google, yahoo}, - modules::integrations::{alienvault, anubis, bevigil}, + modules::{ + engines::{bing, duckduckgo, google, yahoo}, + integrations::{alienvault, anubis, bevigil}, + }, SubscanModule, }; use lazy_static::lazy_static; diff --git a/tests/modules/common.rs b/tests/modules/common.rs index 6ec44279..adc9fe58 100644 --- a/tests/modules/common.rs +++ b/tests/modules/common.rs @@ -1,9 +1,10 @@ pub mod constants { - pub const TEST_MODULE_NAME: &str = "foo-module"; + pub const TEST_MODULE_NAME: &str = "foo"; pub const TEST_URL: &str = "http://foo.com"; pub const TEST_DOMAIN: &str = "foo.com"; pub const TEST_BAR_SUBDOMAIN: &str = "bar.foo.com"; pub const TEST_BAZ_SUBDOMAIN: &str = "baz.foo.com"; + pub const TEST_API_KEY: &str = "test-api-key"; } pub mod mocks { @@ -35,7 +36,7 @@ pub mod mocks { } } - pub fn generic_api_integration(url: &str) -> GenericAPIIntegrationModule { + pub fn generic_api_integration(url: &str, auth: AuthMethod) -> GenericAPIIntegrationModule { let parse = |json: Value| { if let Some(subs) = json["subdomains"].as_array() { let filter = |item: &Value| Some(item.as_str()?.to_string()); @@ -48,12 +49,11 @@ pub mod mocks { let requester = RequesterDispatcher::HTTPClient(HTTPClient::default()); let extractor = JSONExtractor::new(Box::new(parse)); - let url = url.to_string(); GenericAPIIntegrationModule { name: TEST_MODULE_NAME.to_string(), - url: Box::new(move |_| url.clone()), - auth: AuthMethod::NoAuth, + url: wrap_url_with_mock_func(url), + auth, requester: requester.into(), extractor: extractor.into(), } diff --git a/tests/modules/generics/api_integration_test.rs b/tests/modules/generics/api_integration_test.rs index b17467d9..835da6b5 100644 --- a/tests/modules/generics/api_integration_test.rs +++ b/tests/modules/generics/api_integration_test.rs @@ -1,13 +1,17 @@ use crate::common::{ - constants::{TEST_BAR_SUBDOMAIN, TEST_BAZ_SUBDOMAIN, TEST_DOMAIN, TEST_MODULE_NAME}, + constants::{ + TEST_API_KEY, TEST_BAR_SUBDOMAIN, TEST_BAZ_SUBDOMAIN, TEST_DOMAIN, TEST_MODULE_NAME, + }, mocks::generic_api_integration, }; -use subscan::interfaces::module::SubscanModuleInterface; +use std::env; +use subscan::{enums::AuthMethod, interfaces::module::SubscanModuleInterface}; #[tokio::test] -#[stubr::mock("module/generics/api-integration.json")] -async fn generic_api_integration_run_test() { - let mut module = generic_api_integration(&stubr.path("/subdomains")); +#[stubr::mock("module/generics/api-integration-no-auth.json")] +async fn generic_api_integration_run_test_no_auth() { + let auth = AuthMethod::NoAuth; + let mut module = generic_api_integration(&stubr.path("/subdomains"), auth); let result = module.run(TEST_DOMAIN.to_string()).await; @@ -21,3 +25,31 @@ async fn generic_api_integration_run_test() { .into() ); } + +#[tokio::test] +#[stubr::mock("module/generics/api-integration-with-header-auth.json")] +async fn generic_api_integration_run_test_with_header_auth() { + env::set_var("SUBSCAN_FOO_APIKEY", TEST_API_KEY); + + let auth = AuthMethod::APIKeyInHeader("X-API-Key".to_string()); + let mut module = generic_api_integration(&stubr.path("/subdomains"), auth); + + let result = module.run(TEST_DOMAIN.to_string()).await; + + assert_eq!(module.name().await, TEST_MODULE_NAME.to_string()); + assert_eq!(result, [TEST_BAR_SUBDOMAIN.to_string()].into()); +} + +#[tokio::test] +#[stubr::mock("module/generics/api-integration-with-url-auth.json")] +async fn generic_api_integration_run_test_with_url_auth() { + let auth = AuthMethod::APIKeyInURL; + let url = format!("{}?apikey={}", stubr.path("/subdomains"), TEST_API_KEY); + + let mut module = generic_api_integration(&url, auth); + + let result = module.run(TEST_DOMAIN.to_string()).await; + + assert_eq!(module.name().await, TEST_MODULE_NAME.to_string()); + assert_eq!(result, [TEST_BAR_SUBDOMAIN.to_string()].into()); +} diff --git a/tests/stubs/module/generics/api-integration.json b/tests/stubs/module/generics/api-integration-no-auth.json similarity index 100% rename from tests/stubs/module/generics/api-integration.json rename to tests/stubs/module/generics/api-integration-no-auth.json diff --git a/tests/stubs/module/generics/api-integration-with-header-auth.json b/tests/stubs/module/generics/api-integration-with-header-auth.json new file mode 100644 index 00000000..ae9df9ca --- /dev/null +++ b/tests/stubs/module/generics/api-integration-with-header-auth.json @@ -0,0 +1,22 @@ +{ + "request": { + "headers": { + "X-API-Key": { + "equalTo": "test-api-key" + } + }, + "method": "GET", + "urlPath": "/subdomains" + }, + "response": { + "headers": { + "content-type": "application/json" + }, + "jsonBody": { + "subdomains": [ + "bar.foo.com" + ] + }, + "status": 200 + } +} diff --git a/tests/stubs/module/generics/api-integration-with-url-auth.json b/tests/stubs/module/generics/api-integration-with-url-auth.json new file mode 100644 index 00000000..7b3487e4 --- /dev/null +++ b/tests/stubs/module/generics/api-integration-with-url-auth.json @@ -0,0 +1,22 @@ +{ + "request": { + "method": "GET", + "queryParameters": { + "apikey": { + "equalTo": "test-api-key" + } + }, + "urlPath": "/subdomains" + }, + "response": { + "headers": { + "content-type": "application/json" + }, + "jsonBody": { + "subdomains": [ + "bar.foo.com" + ] + }, + "status": 200 + } +} From 00840cd19a170795bc02ea6cb3f047ba1243e8be Mon Sep 17 00:00:00 2001 From: Erdogan Yoksul Date: Sun, 22 Sep 2024 17:22:34 +0300 Subject: [PATCH 09/38] refactor: generic api integration --- src/modules/generics/api_integration.rs | 49 +++++++++++-------- src/modules/integrations/alienvault.rs | 6 +-- src/modules/integrations/anubis.rs | 6 +-- src/modules/integrations/bevigil.rs | 6 +-- tests/modules/common.rs | 6 +-- tests/modules/integrations/alienvault_test.rs | 2 +- tests/modules/integrations/anubis_test.rs | 2 +- tests/modules/integrations/bevigil_test.rs | 2 +- 8 files changed, 44 insertions(+), 35 deletions(-) diff --git a/src/modules/generics/api_integration.rs b/src/modules/generics/api_integration.rs index 9321d5e5..4cf065c1 100644 --- a/src/modules/generics/api_integration.rs +++ b/src/modules/generics/api_integration.rs @@ -6,10 +6,8 @@ use crate::{ }, }; use async_trait::async_trait; -use reqwest::{ - header::{HeaderName, HeaderValue}, - Url, -}; +use reqwest::header::{HeaderName, HeaderValue}; +use reqwest::Url; use std::{collections::BTreeSet, str::FromStr}; use tokio::sync::Mutex; @@ -27,7 +25,7 @@ pub struct GenericAPIIntegrationModule { pub name: String, /// Simple function field that gets query URL /// by given domain address - pub url: Box String + Sync + Send>, + pub url: Box String + Sync + Send>, /// Set authentication method, see [`AuthMethod`] enum /// for details pub auth: AuthMethod, @@ -37,6 +35,30 @@ pub struct GenericAPIIntegrationModule { pub extractor: SubdomainExtractorDispatcher, } +impl GenericAPIIntegrationModule { + async fn authenticate(&self, domain: &str) -> Url { + let url: Url = (self.url)(domain).parse().unwrap(); + let apikey = self.fetch_apikey().await; + + match &self.auth { + AuthMethod::APIKeyInHeader(key) => { + if let Ok(apikey) = apikey { + let mut requester = self.requester.lock().await; + + let (name, value) = (HeaderName::from_str(key), HeaderValue::from_str(&apikey)); + + if let (Ok(name), Ok(value)) = (name, value) { + requester.config().await.add_header(name, value); + } + } + } + AuthMethod::APIKeyInURL | AuthMethod::NoAuth => {} + } + + url + } +} + #[async_trait(?Send)] impl SubscanModuleInterface for GenericAPIIntegrationModule { async fn name(&self) -> &str { @@ -52,22 +74,9 @@ impl SubscanModuleInterface for GenericAPIIntegrationModule { } async fn run(&mut self, domain: String) -> BTreeSet { - let mut requester = self.requester.lock().await; - let url = Url::parse(&(self.url)(domain.clone())).unwrap(); - - match &self.auth { - AuthMethod::APIKeyInHeader(key) => { - if let Ok(apikey) = self.fetch_apikey().await { - let name = HeaderName::from_str(key.as_str()).unwrap(); - let value = HeaderValue::from_str(apikey.as_str()).unwrap(); - - requester.config().await.add_header(name, value); - } - } - AuthMethod::APIKeyInURL => {} - AuthMethod::NoAuth => {} - } + let url = self.authenticate(&domain).await; + let requester = self.requester.lock().await; let content = requester.get_content(url).await.unwrap_or_default(); self.extractor.extract(content, domain).await diff --git a/src/modules/integrations/alienvault.rs b/src/modules/integrations/alienvault.rs index 6f4e784e..4e288e24 100644 --- a/src/modules/integrations/alienvault.rs +++ b/src/modules/integrations/alienvault.rs @@ -56,13 +56,13 @@ impl AlienVault { /// /// #[tokio::main] /// async fn main() { - /// let domain = "foo.com".to_string(); - /// let url = alienvault::AlienVault::get_query_url(domain.clone()); + /// let domain = "foo.com"; + /// let url = alienvault::AlienVault::get_query_url(&domain); /// /// assert_eq!(url, format!("{ALIENVAULT_URL}/{domain}/passive_dns")); /// } /// ``` - pub fn get_query_url(domain: String) -> String { + pub fn get_query_url(domain: &str) -> String { format!("{ALIENVAULT_URL}/{domain}/passive_dns") } diff --git a/src/modules/integrations/anubis.rs b/src/modules/integrations/anubis.rs index 3f10f77e..63849eaa 100644 --- a/src/modules/integrations/anubis.rs +++ b/src/modules/integrations/anubis.rs @@ -55,13 +55,13 @@ impl Anubis { /// /// #[tokio::main] /// async fn main() { - /// let domain = "foo.com".to_string(); - /// let url = anubis::Anubis::get_query_url(domain.clone()); + /// let domain = "foo.com"; + /// let url = anubis::Anubis::get_query_url(&domain); /// /// assert_eq!(url, format!("{ANUBIS_URL}/{domain}")); /// } /// ``` - pub fn get_query_url(domain: String) -> String { + pub fn get_query_url(domain: &str) -> String { format!("{ANUBIS_URL}/{domain}") } diff --git a/src/modules/integrations/bevigil.rs b/src/modules/integrations/bevigil.rs index f511e2eb..8e167af8 100644 --- a/src/modules/integrations/bevigil.rs +++ b/src/modules/integrations/bevigil.rs @@ -55,13 +55,13 @@ impl Bevigil { /// /// #[tokio::main] /// async fn main() { - /// let domain = "foo.com".to_string(); - /// let url = bevigil::Bevigil::get_query_url(domain.clone()); + /// let domain = "foo.com"; + /// let url = bevigil::Bevigil::get_query_url(&domain); /// /// assert_eq!(url, format!("{BEVIGIL_URL}/{domain}/subdomains")); /// } /// ``` - pub fn get_query_url(domain: String) -> String { + pub fn get_query_url(domain: &str) -> String { format!("{BEVIGIL_URL}/{domain}/subdomains") } diff --git a/tests/modules/common.rs b/tests/modules/common.rs index adc9fe58..a3a9c5d1 100644 --- a/tests/modules/common.rs +++ b/tests/modules/common.rs @@ -59,9 +59,9 @@ pub mod mocks { } } - pub fn wrap_url_with_mock_func(url: &str) -> Box String + Sync + Send> { - let url = Url::parse(url).unwrap().to_string(); + pub fn wrap_url_with_mock_func(url: &str) -> Box String + Sync + Send> { + let url: Url = url.parse().unwrap(); - Box::new(move |_| url.clone()) + Box::new(move |_| url.to_string().clone()) } } diff --git a/tests/modules/integrations/alienvault_test.rs b/tests/modules/integrations/alienvault_test.rs index 64584c39..2155d5e3 100644 --- a/tests/modules/integrations/alienvault_test.rs +++ b/tests/modules/integrations/alienvault_test.rs @@ -32,7 +32,7 @@ async fn alienvault_run_test() { #[tokio::test] async fn get_query_url_test() { - let url = alienvault::AlienVault::get_query_url(TEST_DOMAIN.to_string()); + let url = alienvault::AlienVault::get_query_url(TEST_DOMAIN); let expected = format!("{ALIENVAULT_URL}/{TEST_DOMAIN}/passive_dns"); assert_eq!(url, expected); diff --git a/tests/modules/integrations/anubis_test.rs b/tests/modules/integrations/anubis_test.rs index 04ae4452..6dac28ac 100644 --- a/tests/modules/integrations/anubis_test.rs +++ b/tests/modules/integrations/anubis_test.rs @@ -25,7 +25,7 @@ async fn anubis_run_test() { #[tokio::test] async fn get_query_url_test() { - let url = anubis::Anubis::get_query_url(TEST_DOMAIN.to_string()); + let url = anubis::Anubis::get_query_url(TEST_DOMAIN); let expected = format!("{ANUBIS_URL}/{TEST_DOMAIN}"); assert_eq!(url, expected); diff --git a/tests/modules/integrations/bevigil_test.rs b/tests/modules/integrations/bevigil_test.rs index 1736fbe5..6c784a3a 100644 --- a/tests/modules/integrations/bevigil_test.rs +++ b/tests/modules/integrations/bevigil_test.rs @@ -33,7 +33,7 @@ async fn bevigil_run_test() { #[tokio::test] async fn get_query_url_test() { - let url = bevigil::Bevigil::get_query_url(TEST_DOMAIN.to_string()); + let url = bevigil::Bevigil::get_query_url(TEST_DOMAIN); let expected = format!("{BEVIGIL_URL}/{TEST_DOMAIN}/subdomains"); assert_eq!(url, expected); From 3b03eaf673fe29bf8c5a2cf0aac52935f9aa3d28 Mon Sep 17 00:00:00 2001 From: Erdogan Yoksul Date: Wed, 25 Sep 2024 00:03:45 +0300 Subject: [PATCH 10/38] refactor: api key logics --- Cargo.toml | 1 + src/bin/subscan.rs | 10 +++--- src/enums.rs | 15 +++++--- src/interfaces/module.rs | 10 +++--- src/lib.rs | 5 +++ src/modules/generics/api_integration.rs | 21 +++++++----- src/modules/integrations/alienvault.rs | 4 +-- src/modules/integrations/anubis.rs | 4 +-- src/modules/integrations/bevigil.rs | 4 +-- src/types/core.rs | 4 +++ src/utils.rs | 15 +++++--- tests/modules/common.rs | 27 ++++++++++----- .../modules/generics/api_integration_test.rs | 34 +++++++++++-------- tests/modules/generics/search_engine_test.rs | 6 ++-- 14 files changed, 101 insertions(+), 59 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index dd6a4152..01d5df1b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,5 +21,6 @@ enum_dispatch = "0.3.13" dotenvy = "0.15.7" [dev-dependencies] +md5 = "0.7.0" automod = "1.0.14" stubr = "0.6.2" diff --git a/src/bin/subscan.rs b/src/bin/subscan.rs index 23f56da8..2e50f4b8 100644 --- a/src/bin/subscan.rs +++ b/src/bin/subscan.rs @@ -17,16 +17,16 @@ async fn main() { let mut module = item.lock().await; let requester = module.requester().await.unwrap(); + if module.name().await != "Bevigil" { + continue; + } + println!( "{:#?} {:p}", requester.lock().await.config().await, - requester + requester, ); - if module.name().await != "Bevigil" { - continue; - } - println!("Running...{}({})", module.name().await, cli.domain.clone()); let res = module.run(cli.domain.clone()).await; diff --git a/src/enums.rs b/src/enums.rs index 912f8a61..a27e34ad 100644 --- a/src/enums.rs +++ b/src/enums.rs @@ -55,14 +55,19 @@ pub enum RequesterDispatcher { /// [`GenericAPIIntegrationModule`](crate::modules::generics::api_integration::GenericAPIIntegrationModule) /// uses them to apply correct auth method. See the /// method descriptions to learn how it works -pub enum AuthMethod { +pub enum APIAuthMethod { /// Some APIs uses request headers to get /// API key. If this auth type selected API key /// will add in request headers with a given header key - APIKeyInHeader(String), - /// This auth type uses when API require API - /// key in URL - APIKeyInURL, + APIKeyAsHeader(String), + /// This auth type uses when API require API key + /// as a query param. If this method chose API key + /// will be added in URL as a query param with given + /// parameter key + APIKeyAsQueryParam(String), + /// This auth method checks if the query URL includes + /// API key's self + APIKeyAsURLSlug, /// This auth type does nothing for auth NoAuth, } diff --git a/src/interfaces/module.rs b/src/interfaces/module.rs index 576ba28f..014ed340 100644 --- a/src/interfaces/module.rs +++ b/src/interfaces/module.rs @@ -1,5 +1,6 @@ use crate::{ enums::{RequesterDispatcher, SubdomainExtractorDispatcher}, + types::core::APIKeyAsEnv, utils::env, }; use async_trait::async_trait; @@ -84,10 +85,11 @@ pub trait SubscanModuleInterface: Sync + Send { /// run this `run` method will be called, so this method /// should do everything async fn run(&mut self, domain: String) -> BTreeSet; - /// Fetches module API key from system environment variables - /// if available. See the [`get_subscan_module_apikey`](crate::utils::env::get_subscan_module_apikey) + /// Loads `.env` file and fetches module API key with variable name. If system + /// environment variable set with same name, `.env` file will be overrode + /// See the [`get_subscan_module_apikey`](crate::utils::env::get_subscan_module_apikey) /// for details - async fn fetch_apikey(&self) -> Result { - env::get_subscan_module_apikey(&self.name().await.to_uppercase()) + async fn fetch_apikey(&self) -> APIKeyAsEnv { + env::get_subscan_module_apikey(self.name().await) } } diff --git a/src/lib.rs b/src/lib.rs index 187a2bc4..f8d22b2d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -25,6 +25,7 @@ use enums::{RequesterDispatcher, SubdomainExtractorDispatcher}; use interfaces::module::SubscanModuleInterface; use std::collections::BTreeSet; use tokio::sync::Mutex; +use types::core::APIKeyAsEnv; /// Wrapper around a [`SubscanModuleInterface`] trait object /// @@ -103,4 +104,8 @@ impl SubscanModule { pub async fn name(&self) -> &str { self.0.name().await } + + pub async fn fetch_apikey(&self) -> APIKeyAsEnv { + self.0.fetch_apikey().await + } } diff --git a/src/modules/generics/api_integration.rs b/src/modules/generics/api_integration.rs index 4cf065c1..4b7ce92b 100644 --- a/src/modules/generics/api_integration.rs +++ b/src/modules/generics/api_integration.rs @@ -1,5 +1,5 @@ use crate::{ - enums::{AuthMethod, RequesterDispatcher, SubdomainExtractorDispatcher}, + enums::{APIAuthMethod, RequesterDispatcher, SubdomainExtractorDispatcher}, interfaces::{ extractor::SubdomainExtractorInterface, module::SubscanModuleInterface, requester::RequesterInterface, @@ -26,9 +26,9 @@ pub struct GenericAPIIntegrationModule { /// Simple function field that gets query URL /// by given domain address pub url: Box String + Sync + Send>, - /// Set authentication method, see [`AuthMethod`] enum + /// Set authentication method, see [`APIAuthMethod`] enum /// for details - pub auth: AuthMethod, + pub auth: APIAuthMethod, /// Requester object instance for HTTP requests pub requester: Mutex, /// Any extractor object to extract subdomain from content @@ -36,12 +36,12 @@ pub struct GenericAPIIntegrationModule { } impl GenericAPIIntegrationModule { - async fn authenticate(&self, domain: &str) -> Url { - let url: Url = (self.url)(domain).parse().unwrap(); - let apikey = self.fetch_apikey().await; + pub async fn authenticate(&self, domain: &str) -> Url { + let mut url: Url = (self.url)(domain).parse().unwrap(); + let (_, apikey) = self.fetch_apikey().await; match &self.auth { - AuthMethod::APIKeyInHeader(key) => { + APIAuthMethod::APIKeyAsHeader(key) => { if let Ok(apikey) = apikey { let mut requester = self.requester.lock().await; @@ -52,7 +52,12 @@ impl GenericAPIIntegrationModule { } } } - AuthMethod::APIKeyInURL | AuthMethod::NoAuth => {} + APIAuthMethod::APIKeyAsQueryParam(query_param) => { + if let Ok(apikey) = apikey { + url.set_query(Some(&format!("{query_param}={apikey}"))) + } + } + APIAuthMethod::APIKeyAsURLSlug | APIAuthMethod::NoAuth => {} } url diff --git a/src/modules/integrations/alienvault.rs b/src/modules/integrations/alienvault.rs index 4e288e24..ef318a66 100644 --- a/src/modules/integrations/alienvault.rs +++ b/src/modules/integrations/alienvault.rs @@ -1,7 +1,7 @@ use std::collections::BTreeSet; use crate::{ - enums::{AuthMethod, RequesterDispatcher}, + enums::{APIAuthMethod, RequesterDispatcher}, extractors::json::JSONExtractor, modules::generics::api_integration::GenericAPIIntegrationModule, requesters::client::HTTPClient, @@ -41,7 +41,7 @@ impl AlienVault { GenericAPIIntegrationModule { name: ALIENVAULT_MODULE_NAME.into(), url: Box::new(Self::get_query_url), - auth: AuthMethod::NoAuth, + auth: APIAuthMethod::NoAuth, requester: requester.into(), extractor: extractor.into(), } diff --git a/src/modules/integrations/anubis.rs b/src/modules/integrations/anubis.rs index 63849eaa..4317a788 100644 --- a/src/modules/integrations/anubis.rs +++ b/src/modules/integrations/anubis.rs @@ -1,5 +1,5 @@ use crate::{ - enums::{AuthMethod, RequesterDispatcher}, + enums::{APIAuthMethod, RequesterDispatcher}, extractors::json::JSONExtractor, modules::generics::api_integration::GenericAPIIntegrationModule, requesters::client::HTTPClient, @@ -40,7 +40,7 @@ impl Anubis { GenericAPIIntegrationModule { name: ANUBIS_MODULE_NAME.into(), url: Box::new(Self::get_query_url), - auth: AuthMethod::NoAuth, + auth: APIAuthMethod::NoAuth, requester: requester.into(), extractor: extractor.into(), } diff --git a/src/modules/integrations/bevigil.rs b/src/modules/integrations/bevigil.rs index 8e167af8..14ee8ce0 100644 --- a/src/modules/integrations/bevigil.rs +++ b/src/modules/integrations/bevigil.rs @@ -1,5 +1,5 @@ use crate::{ - enums::{AuthMethod, RequesterDispatcher}, + enums::{APIAuthMethod, RequesterDispatcher}, extractors::json::JSONExtractor, modules::generics::api_integration::GenericAPIIntegrationModule, requesters::client::HTTPClient, @@ -40,7 +40,7 @@ impl Bevigil { GenericAPIIntegrationModule { name: BEVIGIL_MODULE_NAME.into(), url: Box::new(Self::get_query_url), - auth: AuthMethod::APIKeyInHeader("X-Access-Token".into()), + auth: APIAuthMethod::APIKeyAsHeader("X-Access-Token".into()), requester: requester.into(), extractor: extractor.into(), } diff --git a/src/types/core.rs b/src/types/core.rs index 8c9a6e84..abc3d1c0 100644 --- a/src/types/core.rs +++ b/src/types/core.rs @@ -6,3 +6,7 @@ pub type Subdomain = String; /// Inner extract method type definition for [`JSONExtractor`](crate::extractors::json::JSONExtractor) /// In summary it takes a [`Value`] as a parameter and parse subdomains pub type InnerExtractMethod = Box BTreeSet + Sync + Send>; + +/// Simple tuple type to store environment API key +/// variable with variable name +pub type APIKeyAsEnv = (String, Result); diff --git a/src/utils.rs b/src/utils.rs index 32ddcbbc..cddbf9f2 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -34,6 +34,7 @@ pub mod regex { pub mod env { use crate::config::SUBSCAN_ENV_NAMESPACE; + use crate::types::core::APIKeyAsEnv; /// Fetches API key from system environment variables /// if available. Module environment variables uses [`SUBSCAN_ENV_NAMESPACE`] @@ -51,16 +52,20 @@ pub mod env { /// /// env::remove_var(env_key); /// - /// assert_eq!(get_subscan_module_apikey("FOO").is_ok(), false); + /// assert_eq!(get_subscan_module_apikey("FOO").0, env_key); + /// assert_eq!(get_subscan_module_apikey("FOO").1.is_ok(), false); /// /// env::set_var(env_key, "foo"); /// - /// assert_eq!(get_subscan_module_apikey("FOO").unwrap(), "foo"); + /// assert_eq!(get_subscan_module_apikey("FOO").0, env_key); + /// assert_eq!(get_subscan_module_apikey("FOO").1.unwrap(), "foo"); + /// + /// env::remove_var(env_key); /// } /// ``` - pub fn get_subscan_module_apikey(name: &str) -> Result { - let key = format!("{}_{}_APIKEY", SUBSCAN_ENV_NAMESPACE, name); + pub fn get_subscan_module_apikey(name: &str) -> APIKeyAsEnv { + let var_name = format!("{}_{}_APIKEY", SUBSCAN_ENV_NAMESPACE, name.to_uppercase()); - dotenvy::var(key) + (var_name.clone(), dotenvy::var(var_name)) } } diff --git a/tests/modules/common.rs b/tests/modules/common.rs index a3a9c5d1..93edeff0 100644 --- a/tests/modules/common.rs +++ b/tests/modules/common.rs @@ -1,5 +1,8 @@ +use reqwest::Url; +use serde_json::Value; +use std::{collections::BTreeSet, thread}; + pub mod constants { - pub const TEST_MODULE_NAME: &str = "foo"; pub const TEST_URL: &str = "http://foo.com"; pub const TEST_DOMAIN: &str = "foo.com"; pub const TEST_BAR_SUBDOMAIN: &str = "bar.foo.com"; @@ -7,13 +10,17 @@ pub mod constants { pub const TEST_API_KEY: &str = "test-api-key"; } +pub mod funcs { + pub fn md5_hex(target: String) -> String { + format!("{:x}", md5::compute(target)) + } +} + pub mod mocks { - use super::constants::TEST_MODULE_NAME; - use reqwest::Url; - use serde_json::Value; - use std::collections::BTreeSet; + use super::funcs::md5_hex; + use super::*; use subscan::{ - enums::{AuthMethod, RequesterDispatcher}, + enums::{APIAuthMethod, RequesterDispatcher}, extractors::{json::JSONExtractor, regex::RegexExtractor}, modules::generics::{ api_integration::GenericAPIIntegrationModule, search_engine::GenericSearchEngineModule, @@ -26,9 +33,10 @@ pub mod mocks { let requester = RequesterDispatcher::HTTPClient(HTTPClient::default()); let extractor = RegexExtractor::default(); let url = Url::parse(url); + let thread_name = thread::current().name().unwrap().to_uppercase(); GenericSearchEngineModule { - name: TEST_MODULE_NAME.to_string(), + name: md5_hex(thread_name), url: url.unwrap(), param: SearchQueryParam::from("q"), requester: requester.into(), @@ -36,7 +44,7 @@ pub mod mocks { } } - pub fn generic_api_integration(url: &str, auth: AuthMethod) -> GenericAPIIntegrationModule { + pub fn generic_api_integration(url: &str, auth: APIAuthMethod) -> GenericAPIIntegrationModule { let parse = |json: Value| { if let Some(subs) = json["subdomains"].as_array() { let filter = |item: &Value| Some(item.as_str()?.to_string()); @@ -49,9 +57,10 @@ pub mod mocks { let requester = RequesterDispatcher::HTTPClient(HTTPClient::default()); let extractor = JSONExtractor::new(Box::new(parse)); + let thread_name = thread::current().name().unwrap().to_uppercase(); GenericAPIIntegrationModule { - name: TEST_MODULE_NAME.to_string(), + name: md5_hex(thread_name), url: wrap_url_with_mock_func(url), auth, requester: requester.into(), diff --git a/tests/modules/generics/api_integration_test.rs b/tests/modules/generics/api_integration_test.rs index 835da6b5..a90ae3a6 100644 --- a/tests/modules/generics/api_integration_test.rs +++ b/tests/modules/generics/api_integration_test.rs @@ -1,21 +1,19 @@ use crate::common::{ - constants::{ - TEST_API_KEY, TEST_BAR_SUBDOMAIN, TEST_BAZ_SUBDOMAIN, TEST_DOMAIN, TEST_MODULE_NAME, - }, + constants::{TEST_API_KEY, TEST_BAR_SUBDOMAIN, TEST_BAZ_SUBDOMAIN, TEST_DOMAIN}, mocks::generic_api_integration, }; use std::env; -use subscan::{enums::AuthMethod, interfaces::module::SubscanModuleInterface}; +use subscan::{enums::APIAuthMethod, interfaces::module::SubscanModuleInterface}; #[tokio::test] #[stubr::mock("module/generics/api-integration-no-auth.json")] async fn generic_api_integration_run_test_no_auth() { - let auth = AuthMethod::NoAuth; + let auth = APIAuthMethod::NoAuth; let mut module = generic_api_integration(&stubr.path("/subdomains"), auth); let result = module.run(TEST_DOMAIN.to_string()).await; - assert_eq!(module.name().await, TEST_MODULE_NAME.to_string()); + assert_eq!(module.name().await, module.name); assert_eq!( result, [ @@ -29,27 +27,35 @@ async fn generic_api_integration_run_test_no_auth() { #[tokio::test] #[stubr::mock("module/generics/api-integration-with-header-auth.json")] async fn generic_api_integration_run_test_with_header_auth() { - env::set_var("SUBSCAN_FOO_APIKEY", TEST_API_KEY); - - let auth = AuthMethod::APIKeyInHeader("X-API-Key".to_string()); + let auth = APIAuthMethod::APIKeyAsHeader("X-API-Key".to_string()); let mut module = generic_api_integration(&stubr.path("/subdomains"), auth); + let (env_key, _) = module.fetch_apikey().await; + + env::set_var(env_key.clone(), TEST_API_KEY); + let result = module.run(TEST_DOMAIN.to_string()).await; - assert_eq!(module.name().await, TEST_MODULE_NAME.to_string()); + assert_eq!(module.name().await, module.name); assert_eq!(result, [TEST_BAR_SUBDOMAIN.to_string()].into()); + + env::remove_var(env_key); } #[tokio::test] #[stubr::mock("module/generics/api-integration-with-url-auth.json")] async fn generic_api_integration_run_test_with_url_auth() { - let auth = AuthMethod::APIKeyInURL; - let url = format!("{}?apikey={}", stubr.path("/subdomains"), TEST_API_KEY); + let auth = APIAuthMethod::APIKeyAsQueryParam("apikey".to_string()); + let mut module = generic_api_integration(&stubr.path("/subdomains"), auth); + + let (env_key, _) = module.fetch_apikey().await; - let mut module = generic_api_integration(&url, auth); + env::set_var(env_key.clone(), TEST_API_KEY); let result = module.run(TEST_DOMAIN.to_string()).await; - assert_eq!(module.name().await, TEST_MODULE_NAME.to_string()); + assert_eq!(module.name().await, module.name); assert_eq!(result, [TEST_BAR_SUBDOMAIN.to_string()].into()); + + env::remove_var(env_key); } diff --git a/tests/modules/generics/search_engine_test.rs b/tests/modules/generics/search_engine_test.rs index 7f2316ba..3a031bd9 100644 --- a/tests/modules/generics/search_engine_test.rs +++ b/tests/modules/generics/search_engine_test.rs @@ -1,5 +1,5 @@ use crate::common::{ - constants::{TEST_BAR_SUBDOMAIN, TEST_DOMAIN, TEST_MODULE_NAME, TEST_URL}, + constants::{TEST_BAR_SUBDOMAIN, TEST_DOMAIN, TEST_URL}, mocks::generic_search_engine, }; use subscan::interfaces::module::SubscanModuleInterface; @@ -11,7 +11,7 @@ async fn get_search_query_test() { let mut query = module.get_search_query(TEST_DOMAIN.to_string()).await; assert_eq!(query.as_search_str(), "site:foo.com"); - assert_eq!(module.name().await, TEST_MODULE_NAME.to_string()); + assert_eq!(module.name().await, module.name); } #[tokio::test] @@ -21,6 +21,6 @@ async fn generic_search_engine_run_test() { let result = module.run(TEST_DOMAIN.to_string()).await; - assert_eq!(module.name().await, TEST_MODULE_NAME.to_string()); + assert_eq!(module.name().await, module.name); assert_eq!(result, [TEST_BAR_SUBDOMAIN.to_string()].into()); } From 396ae9b084a1e49b086c06faf55c6775352c48a1 Mon Sep 17 00:00:00 2001 From: Erdogan Yoksul Date: Wed, 25 Sep 2024 16:33:01 +0300 Subject: [PATCH 11/38] refactor: generic api integration module auth method --- src/modules/generics/api_integration.rs | 45 ++++++++++++++----------- src/types/core.rs | 1 - 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/src/modules/generics/api_integration.rs b/src/modules/generics/api_integration.rs index 4b7ce92b..fa6a1abb 100644 --- a/src/modules/generics/api_integration.rs +++ b/src/modules/generics/api_integration.rs @@ -4,6 +4,7 @@ use crate::{ extractor::SubdomainExtractorInterface, module::SubscanModuleInterface, requester::RequesterInterface, }, + types::core::APIKeyAsEnv, }; use async_trait::async_trait; use reqwest::header::{HeaderName, HeaderValue}; @@ -36,31 +37,35 @@ pub struct GenericAPIIntegrationModule { } impl GenericAPIIntegrationModule { - pub async fn authenticate(&self, domain: &str) -> Url { - let mut url: Url = (self.url)(domain).parse().unwrap(); - let (_, apikey) = self.fetch_apikey().await; + pub async fn authenticate(&self, url: &mut Url, apienv: APIKeyAsEnv) { + let apikey = apienv.1; - match &self.auth { - APIAuthMethod::APIKeyAsHeader(key) => { - if let Ok(apikey) = apikey { - let mut requester = self.requester.lock().await; - - let (name, value) = (HeaderName::from_str(key), HeaderValue::from_str(&apikey)); + if apikey.is_err() { + return; + } - if let (Ok(name), Ok(value)) = (name, value) { - requester.config().await.add_header(name, value); - } - } + match &self.auth { + APIAuthMethod::APIKeyAsHeader(name) => { + self.set_apikey_header(name, &apikey.unwrap()).await } - APIAuthMethod::APIKeyAsQueryParam(query_param) => { - if let Ok(apikey) = apikey { - url.set_query(Some(&format!("{query_param}={apikey}"))) - } + APIAuthMethod::APIKeyAsQueryParam(param) => { + self.set_apikey_param(url, param, &apikey.unwrap()).await } APIAuthMethod::APIKeyAsURLSlug | APIAuthMethod::NoAuth => {} } + } - url + async fn set_apikey_param(&self, url: &mut Url, param: &str, apikey: &str) { + url.set_query(Some(&format!("{param}={apikey}"))); + } + + async fn set_apikey_header(&self, name: &str, apikey: &str) { + let mut requester = self.requester.lock().await; + let (name, value) = (HeaderName::from_str(name), HeaderValue::from_str(apikey)); + + if let (Ok(name), Ok(value)) = (name, value) { + requester.config().await.add_header(name, value); + } } } @@ -79,7 +84,9 @@ impl SubscanModuleInterface for GenericAPIIntegrationModule { } async fn run(&mut self, domain: String) -> BTreeSet { - let url = self.authenticate(&domain).await; + let mut url: Url = (self.url)(&domain).parse().unwrap(); + + self.authenticate(&mut url, self.fetch_apikey().await).await; let requester = self.requester.lock().await; let content = requester.get_content(url).await.unwrap_or_default(); diff --git a/src/types/core.rs b/src/types/core.rs index abc3d1c0..dabeba7b 100644 --- a/src/types/core.rs +++ b/src/types/core.rs @@ -6,7 +6,6 @@ pub type Subdomain = String; /// Inner extract method type definition for [`JSONExtractor`](crate::extractors::json::JSONExtractor) /// In summary it takes a [`Value`] as a parameter and parse subdomains pub type InnerExtractMethod = Box BTreeSet + Sync + Send>; - /// Simple tuple type to store environment API key /// variable with variable name pub type APIKeyAsEnv = (String, Result); From c39b4d0df505a038eb037754a0b4d5133b0d9596 Mon Sep 17 00:00:00 2001 From: Erdogan Yoksul Date: Wed, 25 Sep 2024 16:57:53 +0300 Subject: [PATCH 12/38] chore: remove unnecessary integration module docstrings --- src/modules/integrations/alienvault.rs | 31 -------------------------- src/modules/integrations/anubis.rs | 31 -------------------------- src/modules/integrations/bevigil.rs | 31 -------------------------- 3 files changed, 93 deletions(-) diff --git a/src/modules/integrations/alienvault.rs b/src/modules/integrations/alienvault.rs index ef318a66..0408383f 100644 --- a/src/modules/integrations/alienvault.rs +++ b/src/modules/integrations/alienvault.rs @@ -47,41 +47,10 @@ impl AlienVault { } } - /// Get Alienvault query URL from given domain address - /// - /// # Examples - /// - /// ``` - /// use subscan::modules::integrations::alienvault::{self, ALIENVAULT_URL}; - /// - /// #[tokio::main] - /// async fn main() { - /// let domain = "foo.com"; - /// let url = alienvault::AlienVault::get_query_url(&domain); - /// - /// assert_eq!(url, format!("{ALIENVAULT_URL}/{domain}/passive_dns")); - /// } - /// ``` pub fn get_query_url(domain: &str) -> String { format!("{ALIENVAULT_URL}/{domain}/passive_dns") } - /// JSON parse method to extract subdomains - /// - /// # Examples - /// - /// ``` - /// use subscan::modules::integrations::alienvault; - /// use std::collections::BTreeSet; - /// use serde_json::Value; - /// - /// #[tokio::main] - /// async fn main() { - /// let result = alienvault::AlienVault::extract(Value::default()); - /// - /// assert_eq!(result, BTreeSet::new()); - /// } - /// ``` pub fn extract(content: Value) -> BTreeSet { if let Some(passives) = content["passive_dns"].as_array() { let filter = |item: &Value| Some(item["hostname"].as_str()?.to_string()); diff --git a/src/modules/integrations/anubis.rs b/src/modules/integrations/anubis.rs index 4317a788..4b052391 100644 --- a/src/modules/integrations/anubis.rs +++ b/src/modules/integrations/anubis.rs @@ -46,41 +46,10 @@ impl Anubis { } } - /// Get Anubis query URL from given domain address - /// - /// # Examples - /// - /// ``` - /// use subscan::modules::integrations::anubis::{self, ANUBIS_URL}; - /// - /// #[tokio::main] - /// async fn main() { - /// let domain = "foo.com"; - /// let url = anubis::Anubis::get_query_url(&domain); - /// - /// assert_eq!(url, format!("{ANUBIS_URL}/{domain}")); - /// } - /// ``` pub fn get_query_url(domain: &str) -> String { format!("{ANUBIS_URL}/{domain}") } - /// JSON parse method to extract subdomains - /// - /// # Examples - /// - /// ``` - /// use subscan::modules::integrations::anubis; - /// use std::collections::BTreeSet; - /// use serde_json::Value; - /// - /// #[tokio::main] - /// async fn main() { - /// let result = anubis::Anubis::extract(Value::default()); - /// - /// assert_eq!(result, BTreeSet::new()); - /// } - /// ``` pub fn extract(content: Value) -> BTreeSet { if let Some(subs) = content.as_array() { let filter = |item: &Value| Some(item.as_str()?.to_string()); diff --git a/src/modules/integrations/bevigil.rs b/src/modules/integrations/bevigil.rs index 14ee8ce0..9f5f6b2a 100644 --- a/src/modules/integrations/bevigil.rs +++ b/src/modules/integrations/bevigil.rs @@ -46,41 +46,10 @@ impl Bevigil { } } - /// Get Bevigil query URL from given domain address - /// - /// # Examples - /// - /// ``` - /// use subscan::modules::integrations::bevigil::{self, BEVIGIL_URL}; - /// - /// #[tokio::main] - /// async fn main() { - /// let domain = "foo.com"; - /// let url = bevigil::Bevigil::get_query_url(&domain); - /// - /// assert_eq!(url, format!("{BEVIGIL_URL}/{domain}/subdomains")); - /// } - /// ``` pub fn get_query_url(domain: &str) -> String { format!("{BEVIGIL_URL}/{domain}/subdomains") } - /// JSON parse method to extract subdomains - /// - /// # Examples - /// - /// ``` - /// use subscan::modules::integrations::bevigil; - /// use std::collections::BTreeSet; - /// use serde_json::Value; - /// - /// #[tokio::main] - /// async fn main() { - /// let result = bevigil::Bevigil::extract(Value::default()); - /// - /// assert_eq!(result, BTreeSet::new()); - /// } - /// ``` pub fn extract(content: Value) -> BTreeSet { if let Some(subs) = content["subdomains"].as_array() { let filter = |item: &Value| Some(item.as_str()?.to_string()); From b82471a2a08e860837a5b3e98b6f8b19e91b1dcc Mon Sep 17 00:00:00 2001 From: Erdogan Yoksul Date: Wed, 25 Sep 2024 21:34:48 +0300 Subject: [PATCH 13/38] chore: remove unnecessary module doc strings --- src/modules/engines/bing.rs | 14 ------------- src/modules/engines/duckduckgo.rs | 14 ------------- src/modules/engines/google.rs | 14 ------------- src/modules/engines/yahoo.rs | 14 ------------- src/modules/generics/search_engine.rs | 28 -------------------------- src/modules/integrations/alienvault.rs | 14 ------------- src/modules/integrations/anubis.rs | 14 ------------- src/modules/integrations/bevigil.rs | 14 ------------- 8 files changed, 126 deletions(-) diff --git a/src/modules/engines/bing.rs b/src/modules/engines/bing.rs index 18a3802b..7f588ac3 100644 --- a/src/modules/engines/bing.rs +++ b/src/modules/engines/bing.rs @@ -23,20 +23,6 @@ pub const BING_CITE_TAG: &str = "cite"; pub struct Bing {} impl Bing { - /// Create a new [`Bing`] module instance - /// - /// # Examples - /// - /// ```no_run - /// use subscan::modules::engines::bing; - /// - /// #[tokio::main] - /// async fn main() { - /// let bing = bing::Bing::new(); - /// - /// // do something with bing instance - /// } - /// ``` #[allow(clippy::new_ret_no_self)] pub fn new() -> GenericSearchEngineModule { let extractor: HTMLExtractor = HTMLExtractor::new(BING_CITE_TAG.into(), vec![]); diff --git a/src/modules/engines/duckduckgo.rs b/src/modules/engines/duckduckgo.rs index 9794b3bc..1471525f 100644 --- a/src/modules/engines/duckduckgo.rs +++ b/src/modules/engines/duckduckgo.rs @@ -23,20 +23,6 @@ pub const DUCKDUCKGO_CITE_TAG: &str = "article > div > div > a > span:first-chil pub struct DuckDuckGo {} impl DuckDuckGo { - /// Create a new [`DuckDuckGo`] module instance - /// - /// # Examples - /// - /// ```no_run - /// use subscan::modules::engines::duckduckgo; - /// - /// #[tokio::main] - /// async fn main() { - /// let duckduckgo = duckduckgo::DuckDuckGo::new(); - /// - /// // do something with duckduckgo instance - /// } - /// ``` #[allow(clippy::new_ret_no_self)] pub fn new() -> GenericSearchEngineModule { let extractor: HTMLExtractor = HTMLExtractor::new(DUCKDUCKGO_CITE_TAG.into(), vec![]); diff --git a/src/modules/engines/google.rs b/src/modules/engines/google.rs index 6d3424ff..7301f6d8 100644 --- a/src/modules/engines/google.rs +++ b/src/modules/engines/google.rs @@ -23,20 +23,6 @@ pub const GOOGLE_CITE_TAG: &str = "cite"; pub struct Google {} impl Google { - /// Create a new [`Google`] module instance - /// - /// # Examples - /// - /// ```no_run - /// use subscan::modules::engines::google; - /// - /// #[tokio::main] - /// async fn main() { - /// let google = google::Google::new(); - /// - /// // do something with google instance - /// } - /// ``` #[allow(clippy::new_ret_no_self)] pub fn new() -> GenericSearchEngineModule { let extractor: HTMLExtractor = HTMLExtractor::new(GOOGLE_CITE_TAG.into(), vec![]); diff --git a/src/modules/engines/yahoo.rs b/src/modules/engines/yahoo.rs index d2b5dd80..4124c6a1 100644 --- a/src/modules/engines/yahoo.rs +++ b/src/modules/engines/yahoo.rs @@ -23,20 +23,6 @@ pub const YAHOO_CITE_TAG: &str = "ol > li > div > div > h3 > a > span"; pub struct Yahoo {} impl Yahoo { - /// Create a new [`Yahoo`] module instance - /// - /// # Examples - /// - /// ```no_run - /// use subscan::modules::engines::yahoo; - /// - /// #[tokio::main] - /// async fn main() { - /// let yahoo = yahoo::Yahoo::new(); - /// - /// // do something with yahoo instance - /// } - /// ``` #[allow(clippy::new_ret_no_self)] pub fn new() -> GenericSearchEngineModule { let removes: Vec = vec!["".into(), "".into()]; diff --git a/src/modules/generics/search_engine.rs b/src/modules/generics/search_engine.rs index fbc3c1a9..866376d6 100644 --- a/src/modules/generics/search_engine.rs +++ b/src/modules/generics/search_engine.rs @@ -58,34 +58,6 @@ pub struct GenericSearchEngineModule { } impl GenericSearchEngineModule { - /// Converts [`SearchQueryParam`] to [`SearchQuery`] - /// - /// # Examples - /// - /// ```no_run - /// use subscan::modules::generics::search_engine::GenericSearchEngineModule; - /// use subscan::types::query::SearchQueryParam; - /// use subscan::extractors::regex::RegexExtractor; - /// use subscan::requesters::client::HTTPClient; - /// use subscan::cache; - /// use tokio::sync::Mutex; - /// use reqwest::Url; - /// - /// #[tokio::main] - /// async fn main() { - /// let module = GenericSearchEngineModule { - /// name: "foo-module".to_string(), - /// url: Url::parse("https://foo.com").unwrap(), - /// param: SearchQueryParam::from("q"), - /// requester: Mutex::new(HTTPClient::default().into()), - /// extractor: RegexExtractor::default().into(), - /// }; - /// - /// let mut query = module.get_search_query("foo.com".to_string()).await; - /// - /// assert_eq!(query.as_search_str(), "site:foo.com"); - /// } - /// ``` pub async fn get_search_query(&self, domain: String) -> SearchQuery { self.param.to_search_query(domain, "site:".to_string()) } diff --git a/src/modules/integrations/alienvault.rs b/src/modules/integrations/alienvault.rs index 0408383f..35217d8a 100644 --- a/src/modules/integrations/alienvault.rs +++ b/src/modules/integrations/alienvault.rs @@ -19,20 +19,6 @@ pub const ALIENVAULT_MODULE_NAME: &str = "AlienVault"; pub const ALIENVAULT_URL: &str = "https://otx.alienvault.com/api/v1/indicators/domain"; impl AlienVault { - /// Create a new [`AlienVault`] module instance - /// - /// # Examples - /// - /// ```no_run - /// use subscan::modules::integrations::alienvault; - /// - /// #[tokio::main] - /// async fn main() { - /// let alienvault = alienvault::AlienVault::new(); - /// - /// // do something with alienvault instance - /// } - /// ``` #[allow(clippy::new_ret_no_self)] pub fn new() -> GenericAPIIntegrationModule { let requester: RequesterDispatcher = HTTPClient::default().into(); diff --git a/src/modules/integrations/anubis.rs b/src/modules/integrations/anubis.rs index 4b052391..fbca35e1 100644 --- a/src/modules/integrations/anubis.rs +++ b/src/modules/integrations/anubis.rs @@ -18,20 +18,6 @@ pub const ANUBIS_MODULE_NAME: &str = "Anubis"; pub const ANUBIS_URL: &str = "https://jonlu.ca/anubis/subdomains"; impl Anubis { - /// Create a new [`Anubis`] module instance - /// - /// # Examples - /// - /// ```no_run - /// use subscan::modules::integrations::anubis; - /// - /// #[tokio::main] - /// async fn main() { - /// let anubis = anubis::Anubis::new(); - /// - /// // do something with anubis instance - /// } - /// ``` #[allow(clippy::new_ret_no_self)] pub fn new() -> GenericAPIIntegrationModule { let requester: RequesterDispatcher = HTTPClient::default().into(); diff --git a/src/modules/integrations/bevigil.rs b/src/modules/integrations/bevigil.rs index 9f5f6b2a..d005c57c 100644 --- a/src/modules/integrations/bevigil.rs +++ b/src/modules/integrations/bevigil.rs @@ -18,20 +18,6 @@ pub const BEVIGIL_MODULE_NAME: &str = "Bevigil"; pub const BEVIGIL_URL: &str = "https://osint.bevigil.com/api"; impl Bevigil { - /// Create a new [`Bevigil`] module instance - /// - /// # Examples - /// - /// ```no_run - /// use subscan::modules::integrations::bevigil; - /// - /// #[tokio::main] - /// async fn main() { - /// let bevigil = bevigil::Bevigil::new(); - /// - /// // do something with bevigil instance - /// } - /// ``` #[allow(clippy::new_ret_no_self)] pub fn new() -> GenericAPIIntegrationModule { let requester: RequesterDispatcher = HTTPClient::default().into(); From 6d7ad0e07747d63d06433c5fc5b51622e25963af Mon Sep 17 00:00:00 2001 From: Erdogan Yoksul Date: Wed, 25 Sep 2024 21:46:04 +0300 Subject: [PATCH 14/38] chore: minor syntax fixes --- tests/extractors/json_test.rs | 10 ++++------ tests/extractors/regex_test.rs | 3 +-- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/tests/extractors/json_test.rs b/tests/extractors/json_test.rs index 899d8f2b..e2818558 100644 --- a/tests/extractors/json_test.rs +++ b/tests/extractors/json_test.rs @@ -1,10 +1,9 @@ -use std::collections::BTreeSet; - use crate::common::{ constants::{TEST_BAR_SUBDOMAIN, TEST_BAZ_SUBDOMAIN, TEST_DOMAIN}, funcs::read_testdata, }; use serde_json::Value; +use std::collections::BTreeSet; use subscan::extractors::json::JSONExtractor; use subscan::interfaces::extractor::SubdomainExtractorInterface; @@ -22,12 +21,11 @@ async fn extract_test() { } }; + let domain = TEST_DOMAIN.to_string(); let extractor = JSONExtractor::new(Box::new(inner_parser)); - let result = extractor.extract(json, TEST_DOMAIN.to_string()).await; - let no_result = extractor - .extract(String::new(), TEST_DOMAIN.to_string()) - .await; + let result = extractor.extract(json, domain.clone()).await; + let no_result = extractor.extract(String::new(), domain).await; let expected = BTreeSet::from([ TEST_BAR_SUBDOMAIN.to_string(), diff --git a/tests/extractors/regex_test.rs b/tests/extractors/regex_test.rs index 85f7a74f..a3abe57f 100644 --- a/tests/extractors/regex_test.rs +++ b/tests/extractors/regex_test.rs @@ -1,6 +1,5 @@ -use std::collections::BTreeSet; - use crate::common::constants::{TEST_BAR_SUBDOMAIN, TEST_BAZ_SUBDOMAIN, TEST_DOMAIN}; +use std::collections::BTreeSet; use subscan::extractors::regex::RegexExtractor; use subscan::interfaces::extractor::SubdomainExtractorInterface; From 156d1276a803656ff4b59a75fbbb12d2ef96f82d Mon Sep 17 00:00:00 2001 From: Erdogan Yoksul Date: Wed, 25 Sep 2024 21:54:30 +0300 Subject: [PATCH 15/38] chore: rename workflow files --- .github/workflows/{rust-cargo-linters.yml => rust-linters.yml} | 0 .github/workflows/{rust-cargo-security.yml => rust-security.yml} | 0 .github/workflows/{rust-cargo-test.yml => rust-test.yml} | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/{rust-cargo-linters.yml => rust-linters.yml} (100%) rename .github/workflows/{rust-cargo-security.yml => rust-security.yml} (100%) rename .github/workflows/{rust-cargo-test.yml => rust-test.yml} (100%) diff --git a/.github/workflows/rust-cargo-linters.yml b/.github/workflows/rust-linters.yml similarity index 100% rename from .github/workflows/rust-cargo-linters.yml rename to .github/workflows/rust-linters.yml diff --git a/.github/workflows/rust-cargo-security.yml b/.github/workflows/rust-security.yml similarity index 100% rename from .github/workflows/rust-cargo-security.yml rename to .github/workflows/rust-security.yml diff --git a/.github/workflows/rust-cargo-test.yml b/.github/workflows/rust-test.yml similarity index 100% rename from .github/workflows/rust-cargo-test.yml rename to .github/workflows/rust-test.yml From f2530c0b74cba17e19ee7b2274ae3c5bafa1da3a Mon Sep 17 00:00:00 2001 From: Erdogan Yoksul Date: Wed, 25 Sep 2024 22:05:03 +0300 Subject: [PATCH 16/38] chore: minor syntax fix --- src/extractors/regex.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/extractors/regex.rs b/src/extractors/regex.rs index 99eeb0ea..0be891f0 100644 --- a/src/extractors/regex.rs +++ b/src/extractors/regex.rs @@ -36,10 +36,9 @@ impl RegexExtractor { /// ``` pub fn extract_one(&self, content: String, domain: String) -> Option { let pattern = generate_subdomain_regex(domain).unwrap(); + let to_string = |matches: Match| matches.as_str().to_string(); - pattern - .find(&content) - .map(|matches| matches.as_str().to_string()) + pattern.find(&content).map(to_string) } } From be5450f174e7dd4ee8da0f0f3dae3daa77374ad0 Mon Sep 17 00:00:00 2001 From: Erdogan Yoksul Date: Wed, 25 Sep 2024 23:17:37 +0300 Subject: [PATCH 17/38] feat: binaryedge module --- src/bin/subscan.rs | 2 +- src/cache.rs | 3 +- src/modules/integrations/binaryedge.rs | 125 +++++++++++++++++++++++++ src/modules/integrations/mod.rs | 2 + 4 files changed, 130 insertions(+), 2 deletions(-) create mode 100644 src/modules/integrations/binaryedge.rs diff --git a/src/bin/subscan.rs b/src/bin/subscan.rs index 2e50f4b8..4adcca24 100644 --- a/src/bin/subscan.rs +++ b/src/bin/subscan.rs @@ -17,7 +17,7 @@ async fn main() { let mut module = item.lock().await; let requester = module.requester().await.unwrap(); - if module.name().await != "Bevigil" { + if module.name().await != "Binaryedge" { continue; } diff --git a/src/cache.rs b/src/cache.rs index f77bd8bd..8ecbc7f6 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -1,7 +1,7 @@ use crate::{ modules::{ engines::{bing, duckduckgo, google, yahoo}, - integrations::{alienvault, anubis, bevigil}, + integrations::{alienvault, anubis, bevigil, binaryedge}, }, SubscanModule, }; @@ -22,6 +22,7 @@ lazy_static! { SubscanModule::new(alienvault::AlienVault::new()), SubscanModule::new(anubis::Anubis::new()), SubscanModule::new(bevigil::Bevigil::new()), + SubscanModule::new(binaryedge::Binaryedge::new()), ]; } diff --git a/src/modules/integrations/binaryedge.rs b/src/modules/integrations/binaryedge.rs new file mode 100644 index 00000000..8a3b9140 --- /dev/null +++ b/src/modules/integrations/binaryedge.rs @@ -0,0 +1,125 @@ +use crate::{ + enums::{RequesterDispatcher, SubdomainExtractorDispatcher}, + extractors::json::JSONExtractor, + interfaces::{ + extractor::SubdomainExtractorInterface, module::SubscanModuleInterface, + requester::RequesterInterface, + }, + requesters::client::HTTPClient, + types::core::Subdomain, +}; +use async_trait::async_trait; +use reqwest::{ + header::{HeaderName, HeaderValue}, + Url, +}; +use serde_json::Value; +use std::{collections::BTreeSet, str::FromStr}; +use tokio::sync::Mutex; + +/// Binaryedge API integration module +/// +/// It uses [`GenericAPIIntegrationModule`] its own inner +/// here are the configurations +pub struct Binaryedge { + /// Module name + pub name: String, + /// API search URL + pub url: Url, + /// Requester object instance for HTTP requests + pub requester: Mutex, + /// Any extractor object to extract subdomain from content + pub extractor: SubdomainExtractorDispatcher, +} + +pub const BINARYEDGE_MODULE_NAME: &str = "Binaryedge"; +pub const BINARYEDGE_URL: &str = "https://api.binaryedge.io/v2/query/domains/subdomain"; + +impl Default for Binaryedge { + fn default() -> Self { + Self::new() + } +} + +impl Binaryedge { + pub fn new() -> Self { + let url = Url::parse(BINARYEDGE_URL); + let requester: RequesterDispatcher = HTTPClient::default().into(); + let extractor: JSONExtractor = JSONExtractor::new(Box::new(Self::extract)); + + Self { + name: BINARYEDGE_MODULE_NAME.into(), + url: url.unwrap(), + requester: requester.into(), + extractor: extractor.into(), + } + } + + pub fn extract(content: Value) -> BTreeSet { + if let Some(subs) = content["events"].as_array() { + let filter = |item: &Value| Some(item.as_str()?.to_string()); + + BTreeSet::from_iter(subs.iter().filter_map(filter)) + } else { + BTreeSet::new() + } + } + + pub async fn get_query_url(&self, domain: &str) -> Url { + format!("{BINARYEDGE_URL}/{domain}").parse().unwrap() + } +} + +#[async_trait(?Send)] +impl SubscanModuleInterface for Binaryedge { + async fn name(&self) -> &str { + &self.name + } + + async fn requester(&self) -> Option<&Mutex> { + Some(&self.requester) + } + + async fn extractor(&self) -> Option<&SubdomainExtractorDispatcher> { + Some(&self.extractor) + } + + async fn run(&mut self, domain: String) -> BTreeSet { + let (_, apikey) = self.fetch_apikey().await; + + if apikey.is_err() { + return BTreeSet::new(); + } + + let mut requester = self.requester.lock().await; + let (name, value) = ( + HeaderName::from_str("X-Key"), + HeaderValue::from_str(&apikey.unwrap()), + ); + + if let (Ok(name), Ok(value)) = (name, value) { + requester.config().await.add_header(name, value); + } + + let mut all_results = BTreeSet::new(); + + let mut url = self.get_query_url(&domain).await; + let mut page = 1; + + loop { + let content = requester.get_content(url.clone()).await.unwrap_or_default(); + let news = self.extractor.extract(content, domain.clone()).await; + + if !news.is_empty() { + page += 1; + url.set_query(Some(&format!("page={}", page))); + all_results.extend(news); + } else { + break; + } + println!("url: {}", url); + } + + all_results + } +} diff --git a/src/modules/integrations/mod.rs b/src/modules/integrations/mod.rs index 579204d9..7c1548e6 100644 --- a/src/modules/integrations/mod.rs +++ b/src/modules/integrations/mod.rs @@ -4,3 +4,5 @@ pub mod alienvault; pub mod anubis; /// Bevigil API integration module, API key required pub mod bevigil; +/// Binaryedge API integration mmodule, API key required +pub mod binaryedge; From b5d35d921f2a45147d794e2fe6e82b634de59fa5 Mon Sep 17 00:00:00 2001 From: Erdogan Yoksul Date: Thu, 26 Sep 2024 23:22:12 +0300 Subject: [PATCH 18/38] feat: add pagination system into generic api module --- .env.template | 1 + src/interfaces/requester.rs | 9 ++ src/modules/generics/api_integration.rs | 28 +++++- src/modules/integrations/alienvault.rs | 1 + src/modules/integrations/anubis.rs | 1 + src/modules/integrations/bevigil.rs | 1 + src/modules/integrations/binaryedge.rs | 117 ++++++------------------ src/requesters/chrome.rs | 7 ++ src/requesters/client.rs | 7 ++ tests/modules/common.rs | 1 + 10 files changed, 82 insertions(+), 91 deletions(-) diff --git a/.env.template b/.env.template index d058629c..15b3841e 100644 --- a/.env.template +++ b/.env.template @@ -1 +1,2 @@ SUBSCAN_BEVIGIL_APIKEY=foo +SUBSCAN_BINARYEDGE_APIKEY=bar diff --git a/src/interfaces/requester.rs b/src/interfaces/requester.rs index 03f252c3..13fe70bf 100644 --- a/src/interfaces/requester.rs +++ b/src/interfaces/requester.rs @@ -6,6 +6,7 @@ use crate::{ use async_trait::async_trait; use enum_dispatch::enum_dispatch; use reqwest::Url; +use serde_json::Value; /// Generic HTTP client trait definition to implement different /// HTTP requester objects with a single interface compatible @@ -23,6 +24,7 @@ use reqwest::Url; /// use subscan::types::config::RequesterConfig; /// use reqwest::Url; /// use async_trait::async_trait; +/// use serde_json::Value; /// /// pub struct CustomRequester { /// config: RequesterConfig @@ -41,6 +43,10 @@ use reqwest::Url; /// async fn get_content(&self, url: Url) -> Option { /// Some(String::from("foo")) /// } +/// +/// async fn get_json_content(&self, url: Url) -> Value { +/// Value::Bool(false) +/// } /// } /// /// #[tokio::main] @@ -53,6 +59,7 @@ use reqwest::Url; /// /// let config = requester.config().await.clone(); /// +/// assert_eq!(requester.get_json_content(url.clone()).await, false); /// assert_eq!(requester.get_content(url).await.unwrap(), "foo"); /// assert_eq!(config.proxy, None); /// assert_eq!(config.timeout, Duration::from_secs(10)); @@ -68,4 +75,6 @@ pub trait RequesterInterface: Sync + Send { async fn configure(&mut self, config: RequesterConfig); /// Get HTML source of page from given [`reqwest::Url`] object async fn get_content(&self, url: Url) -> Option; + /// Get JSON content from any URL + async fn get_json_content(&self, url: Url) -> Value; } diff --git a/src/modules/generics/api_integration.rs b/src/modules/generics/api_integration.rs index fa6a1abb..d2b16883 100644 --- a/src/modules/generics/api_integration.rs +++ b/src/modules/generics/api_integration.rs @@ -9,6 +9,7 @@ use crate::{ use async_trait::async_trait; use reqwest::header::{HeaderName, HeaderValue}; use reqwest::Url; +use serde_json::Value; use std::{collections::BTreeSet, str::FromStr}; use tokio::sync::Mutex; @@ -27,6 +28,9 @@ pub struct GenericAPIIntegrationModule { /// Simple function field that gets query URL /// by given domain address pub url: Box String + Sync + Send>, + /// Function definition that gets next URL to ensure + /// fully fetch data with pagination from API endpoint + pub next: Box Option + Sync + Send>, /// Set authentication method, see [`APIAuthMethod`] enum /// for details pub auth: APIAuthMethod, @@ -85,12 +89,32 @@ impl SubscanModuleInterface for GenericAPIIntegrationModule { async fn run(&mut self, domain: String) -> BTreeSet { let mut url: Url = (self.url)(&domain).parse().unwrap(); + let mut all_results = BTreeSet::new(); self.authenticate(&mut url, self.fetch_apikey().await).await; let requester = self.requester.lock().await; - let content = requester.get_content(url).await.unwrap_or_default(); - self.extractor.extract(content, domain).await + loop { + let json = requester.get_json_content(url.clone()).await; + let news = self + .extractor + .extract(json.to_string(), domain.clone()) + .await; + + if news.is_empty() { + break; + } + + all_results.extend(news); + + if let Some(next_url) = (self.next)(url.clone(), json) { + url = next_url; + } else { + break; + } + } + + all_results } } diff --git a/src/modules/integrations/alienvault.rs b/src/modules/integrations/alienvault.rs index 35217d8a..5cf2bd0c 100644 --- a/src/modules/integrations/alienvault.rs +++ b/src/modules/integrations/alienvault.rs @@ -27,6 +27,7 @@ impl AlienVault { GenericAPIIntegrationModule { name: ALIENVAULT_MODULE_NAME.into(), url: Box::new(Self::get_query_url), + next: Box::new(move |_, _| None), auth: APIAuthMethod::NoAuth, requester: requester.into(), extractor: extractor.into(), diff --git a/src/modules/integrations/anubis.rs b/src/modules/integrations/anubis.rs index fbca35e1..262485ef 100644 --- a/src/modules/integrations/anubis.rs +++ b/src/modules/integrations/anubis.rs @@ -26,6 +26,7 @@ impl Anubis { GenericAPIIntegrationModule { name: ANUBIS_MODULE_NAME.into(), url: Box::new(Self::get_query_url), + next: Box::new(move |_, _| None), auth: APIAuthMethod::NoAuth, requester: requester.into(), extractor: extractor.into(), diff --git a/src/modules/integrations/bevigil.rs b/src/modules/integrations/bevigil.rs index d005c57c..2805f738 100644 --- a/src/modules/integrations/bevigil.rs +++ b/src/modules/integrations/bevigil.rs @@ -26,6 +26,7 @@ impl Bevigil { GenericAPIIntegrationModule { name: BEVIGIL_MODULE_NAME.into(), url: Box::new(Self::get_query_url), + next: Box::new(move |_, _| None), auth: APIAuthMethod::APIKeyAsHeader("X-Access-Token".into()), requester: requester.into(), extractor: extractor.into(), diff --git a/src/modules/integrations/binaryedge.rs b/src/modules/integrations/binaryedge.rs index 8a3b9140..cf2dac5f 100644 --- a/src/modules/integrations/binaryedge.rs +++ b/src/modules/integrations/binaryedge.rs @@ -1,125 +1,64 @@ use crate::{ - enums::{RequesterDispatcher, SubdomainExtractorDispatcher}, + enums::{APIAuthMethod, RequesterDispatcher}, extractors::json::JSONExtractor, - interfaces::{ - extractor::SubdomainExtractorInterface, module::SubscanModuleInterface, - requester::RequesterInterface, - }, + modules::generics::api_integration::GenericAPIIntegrationModule, requesters::client::HTTPClient, types::core::Subdomain, }; -use async_trait::async_trait; -use reqwest::{ - header::{HeaderName, HeaderValue}, - Url, -}; +use reqwest::Url; use serde_json::Value; -use std::{collections::BTreeSet, str::FromStr}; -use tokio::sync::Mutex; +use std::collections::BTreeSet; /// Binaryedge API integration module /// /// It uses [`GenericAPIIntegrationModule`] its own inner /// here are the configurations -pub struct Binaryedge { - /// Module name - pub name: String, - /// API search URL - pub url: Url, - /// Requester object instance for HTTP requests - pub requester: Mutex, - /// Any extractor object to extract subdomain from content - pub extractor: SubdomainExtractorDispatcher, -} +pub struct Binaryedge {} pub const BINARYEDGE_MODULE_NAME: &str = "Binaryedge"; pub const BINARYEDGE_URL: &str = "https://api.binaryedge.io/v2/query/domains/subdomain"; -impl Default for Binaryedge { - fn default() -> Self { - Self::new() - } -} - impl Binaryedge { - pub fn new() -> Self { - let url = Url::parse(BINARYEDGE_URL); + #[allow(clippy::new_ret_no_self)] + pub fn new() -> GenericAPIIntegrationModule { let requester: RequesterDispatcher = HTTPClient::default().into(); let extractor: JSONExtractor = JSONExtractor::new(Box::new(Self::extract)); - Self { + GenericAPIIntegrationModule { name: BINARYEDGE_MODULE_NAME.into(), - url: url.unwrap(), + url: Box::new(Self::get_query_url), + next: Box::new(Self::get_next_url), + auth: APIAuthMethod::APIKeyAsHeader("X-Key".into()), requester: requester.into(), extractor: extractor.into(), } } - pub fn extract(content: Value) -> BTreeSet { - if let Some(subs) = content["events"].as_array() { - let filter = |item: &Value| Some(item.as_str()?.to_string()); - - BTreeSet::from_iter(subs.iter().filter_map(filter)) - } else { - BTreeSet::new() - } - } - - pub async fn get_query_url(&self, domain: &str) -> Url { - format!("{BINARYEDGE_URL}/{domain}").parse().unwrap() - } -} - -#[async_trait(?Send)] -impl SubscanModuleInterface for Binaryedge { - async fn name(&self) -> &str { - &self.name + pub fn get_query_url(domain: &str) -> String { + format!("{BINARYEDGE_URL}/{domain}") } - async fn requester(&self) -> Option<&Mutex> { - Some(&self.requester) - } + pub fn get_next_url(mut url: Url, _content: Value) -> Option { + let page_param = url.query_pairs().find(|item| item.0 == "page"); - async fn extractor(&self) -> Option<&SubdomainExtractorDispatcher> { - Some(&self.extractor) - } + if let Some(page) = page_param { + let new_page = page.1.parse::().unwrap() + 1; - async fn run(&mut self, domain: String) -> BTreeSet { - let (_, apikey) = self.fetch_apikey().await; - - if apikey.is_err() { - return BTreeSet::new(); - } - - let mut requester = self.requester.lock().await; - let (name, value) = ( - HeaderName::from_str("X-Key"), - HeaderValue::from_str(&apikey.unwrap()), - ); - - if let (Ok(name), Ok(value)) = (name, value) { - requester.config().await.add_header(name, value); + url.set_query(Some(&format!("{}={}", "page", new_page))); + } else { + url.set_query(Some(&format!("{}={}", "page", "2"))); } - let mut all_results = BTreeSet::new(); - - let mut url = self.get_query_url(&domain).await; - let mut page = 1; + Some(url) + } - loop { - let content = requester.get_content(url.clone()).await.unwrap_or_default(); - let news = self.extractor.extract(content, domain.clone()).await; + pub fn extract(content: Value) -> BTreeSet { + if let Some(subs) = content["events"].as_array() { + let filter = |item: &Value| Some(item.as_str()?.to_string()); - if !news.is_empty() { - page += 1; - url.set_query(Some(&format!("page={}", page))); - all_results.extend(news); - } else { - break; - } - println!("url: {}", url); + BTreeSet::from_iter(subs.iter().filter_map(filter)) + } else { + BTreeSet::new() } - - all_results } } diff --git a/src/requesters/chrome.rs b/src/requesters/chrome.rs index b77bea16..19dfdb41 100644 --- a/src/requesters/chrome.rs +++ b/src/requesters/chrome.rs @@ -2,6 +2,7 @@ use crate::{interfaces::requester::RequesterInterface, types::config::RequesterC use async_trait::async_trait; use headless_chrome::{browser::LaunchOptions, Browser}; use reqwest::Url; +use serde_json::Value; /// Chrome requester struct, send HTTP requests via Chrome browser. /// Also its compatible with [`RequesterInterface`] @@ -187,4 +188,10 @@ impl RequesterInterface for ChromeBrowser { content } + + async fn get_json_content(&self, url: Url) -> Value { + let content = self.get_content(url).await.unwrap_or_default(); + + serde_json::from_str(&content).unwrap_or_default() + } } diff --git a/src/requesters/client.rs b/src/requesters/client.rs index 9655a262..50855604 100644 --- a/src/requesters/client.rs +++ b/src/requesters/client.rs @@ -1,6 +1,7 @@ use crate::{interfaces::requester::RequesterInterface, types::config::RequesterConfig}; use async_trait::async_trait; use reqwest::{Client, Proxy, Url}; +use serde_json::Value; const CLIENT_BUILD_ERR: &str = "Cannot create HTTP client!"; const REQUEST_BUILD_ERR: &str = "Cannot build request!"; @@ -154,4 +155,10 @@ impl RequesterInterface for HTTPClient { None } } + + async fn get_json_content(&self, url: Url) -> Value { + let content = self.get_content(url).await.unwrap_or_default(); + + serde_json::from_str(&content).unwrap_or_default() + } } diff --git a/tests/modules/common.rs b/tests/modules/common.rs index 93edeff0..d7e4dad9 100644 --- a/tests/modules/common.rs +++ b/tests/modules/common.rs @@ -62,6 +62,7 @@ pub mod mocks { GenericAPIIntegrationModule { name: md5_hex(thread_name), url: wrap_url_with_mock_func(url), + next: Box::new(|_, _| None), auth, requester: requester.into(), extractor: extractor.into(), From 556a5de870b21b87b769f7cb4d3c1bf196f58b7a Mon Sep 17 00:00:00 2001 From: Erdogan Yoksul Date: Thu, 26 Sep 2024 23:48:05 +0300 Subject: [PATCH 19/38] chore: minor refactoring --- src/modules/integrations/binaryedge.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/modules/integrations/binaryedge.rs b/src/modules/integrations/binaryedge.rs index cf2dac5f..3d2eb671 100644 --- a/src/modules/integrations/binaryedge.rs +++ b/src/modules/integrations/binaryedge.rs @@ -44,9 +44,9 @@ impl Binaryedge { if let Some(page) = page_param { let new_page = page.1.parse::().unwrap() + 1; - url.set_query(Some(&format!("{}={}", "page", new_page))); + url.set_query(Some(&format!("page={new_page}"))); } else { - url.set_query(Some(&format!("{}={}", "page", "2"))); + url.set_query(Some("page=2")); } Some(url) From 7e4aff691198ca53a3a45c1f0980c5f37dc9c915 Mon Sep 17 00:00:00 2001 From: Erdogan Yoksul Date: Fri, 27 Sep 2024 12:20:35 +0300 Subject: [PATCH 20/38] chore: refactor on generic api integration module --- src/enums.rs | 20 +++++++++++++++++ src/modules/generics/api_integration.rs | 29 +++++++++++++------------ 2 files changed, 35 insertions(+), 14 deletions(-) diff --git a/src/enums.rs b/src/enums.rs index a27e34ad..a7307687 100644 --- a/src/enums.rs +++ b/src/enums.rs @@ -55,6 +55,7 @@ pub enum RequesterDispatcher { /// [`GenericAPIIntegrationModule`](crate::modules::generics::api_integration::GenericAPIIntegrationModule) /// uses them to apply correct auth method. See the /// method descriptions to learn how it works +#[derive(PartialEq)] pub enum APIAuthMethod { /// Some APIs uses request headers to get /// API key. If this auth type selected API key @@ -71,3 +72,22 @@ pub enum APIAuthMethod { /// This auth type does nothing for auth NoAuth, } + +impl APIAuthMethod { + /// Checks the any auth method selector or not + /// + /// # Examples + /// + /// ``` + /// use subscan::enums::APIAuthMethod; + /// + /// let as_header = APIAuthMethod::APIKeyAsHeader("X-API-Key".to_string()); + /// let no_auth = APIAuthMethod::NoAuth; + /// + /// assert!(as_header.is_set()); + /// assert!(!no_auth.is_set()); + /// ``` + pub fn is_set(&self) -> bool { + self != &Self::NoAuth + } +} diff --git a/src/modules/generics/api_integration.rs b/src/modules/generics/api_integration.rs index d2b16883..f8dbdf3b 100644 --- a/src/modules/generics/api_integration.rs +++ b/src/modules/generics/api_integration.rs @@ -4,7 +4,6 @@ use crate::{ extractor::SubdomainExtractorInterface, module::SubscanModuleInterface, requester::RequesterInterface, }, - types::core::APIKeyAsEnv, }; use async_trait::async_trait; use reqwest::header::{HeaderName, HeaderValue}; @@ -41,19 +40,11 @@ pub struct GenericAPIIntegrationModule { } impl GenericAPIIntegrationModule { - pub async fn authenticate(&self, url: &mut Url, apienv: APIKeyAsEnv) { - let apikey = apienv.1; - - if apikey.is_err() { - return; - } - + pub async fn authenticate(&self, url: &mut Url, apikey: String) { match &self.auth { - APIAuthMethod::APIKeyAsHeader(name) => { - self.set_apikey_header(name, &apikey.unwrap()).await - } + APIAuthMethod::APIKeyAsHeader(name) => self.set_apikey_header(name, &apikey).await, APIAuthMethod::APIKeyAsQueryParam(param) => { - self.set_apikey_param(url, param, &apikey.unwrap()).await + self.set_apikey_param(url, param, &apikey).await } APIAuthMethod::APIKeyAsURLSlug | APIAuthMethod::NoAuth => {} } @@ -65,7 +56,9 @@ impl GenericAPIIntegrationModule { async fn set_apikey_header(&self, name: &str, apikey: &str) { let mut requester = self.requester.lock().await; - let (name, value) = (HeaderName::from_str(name), HeaderValue::from_str(apikey)); + + let name = HeaderName::from_str(name); + let value = HeaderValue::from_str(apikey); if let (Ok(name), Ok(value)) = (name, value) { requester.config().await.add_header(name, value); @@ -91,7 +84,15 @@ impl SubscanModuleInterface for GenericAPIIntegrationModule { let mut url: Url = (self.url)(&domain).parse().unwrap(); let mut all_results = BTreeSet::new(); - self.authenticate(&mut url, self.fetch_apikey().await).await; + if self.auth.is_set() { + let apienv = self.fetch_apikey().await; + + if let Ok(apikey) = apienv.1 { + self.authenticate(&mut url, apikey).await; + } else { + return all_results; + } + } let requester = self.requester.lock().await; From 55f13c5f9cdc11e18df36b6b7c8de2ab0d640ef0 Mon Sep 17 00:00:00 2001 From: Erdogan Yoksul Date: Fri, 27 Sep 2024 18:08:11 +0300 Subject: [PATCH 21/38] chore: minor refactor on generic api integration --- src/modules/generics/api_integration.rs | 6 +++--- src/types/core.rs | 7 +++++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/modules/generics/api_integration.rs b/src/modules/generics/api_integration.rs index f8dbdf3b..c1c8d247 100644 --- a/src/modules/generics/api_integration.rs +++ b/src/modules/generics/api_integration.rs @@ -4,11 +4,11 @@ use crate::{ extractor::SubdomainExtractorInterface, module::SubscanModuleInterface, requester::RequesterInterface, }, + types::core::{GetNextUrlMethod, GetQueryUrlMethod}, }; use async_trait::async_trait; use reqwest::header::{HeaderName, HeaderValue}; use reqwest::Url; -use serde_json::Value; use std::{collections::BTreeSet, str::FromStr}; use tokio::sync::Mutex; @@ -26,10 +26,10 @@ pub struct GenericAPIIntegrationModule { pub name: String, /// Simple function field that gets query URL /// by given domain address - pub url: Box String + Sync + Send>, + pub url: GetQueryUrlMethod, /// Function definition that gets next URL to ensure /// fully fetch data with pagination from API endpoint - pub next: Box Option + Sync + Send>, + pub next: GetNextUrlMethod, /// Set authentication method, see [`APIAuthMethod`] enum /// for details pub auth: APIAuthMethod, diff --git a/src/types/core.rs b/src/types/core.rs index dabeba7b..1000472e 100644 --- a/src/types/core.rs +++ b/src/types/core.rs @@ -1,3 +1,4 @@ +use reqwest::Url; use serde_json::Value; use std::collections::BTreeSet; @@ -9,3 +10,9 @@ pub type InnerExtractMethod = Box BTreeSet + Sync + /// Simple tuple type to store environment API key /// variable with variable name pub type APIKeyAsEnv = (String, Result); +/// Method definition type, [`GenericAPIIntegrationModule`](crate::modules::generics::api_integration::GenericAPIIntegrationModule) +/// uses this type to define method that gets query URL +pub type GetQueryUrlMethod = Box String + Sync + Send>; +/// Method definition type, [`GenericAPIIntegrationModule`](crate::modules::generics::api_integration::GenericAPIIntegrationModule) +/// uses this type to define function that gets next query URL to fetch API fully +pub type GetNextUrlMethod = Box Option + Sync + Send>; From b96569303cf97b1334769c5d5d0e60a92b54dcfc Mon Sep 17 00:00:00 2001 From: Erdogan Yoksul Date: Fri, 27 Sep 2024 18:25:19 +0300 Subject: [PATCH 22/38] chore: minor refactor --- src/modules/generics/api_integration.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/modules/generics/api_integration.rs b/src/modules/generics/api_integration.rs index c1c8d247..4305543e 100644 --- a/src/modules/generics/api_integration.rs +++ b/src/modules/generics/api_integration.rs @@ -98,10 +98,8 @@ impl SubscanModuleInterface for GenericAPIIntegrationModule { loop { let json = requester.get_json_content(url.clone()).await; - let news = self - .extractor - .extract(json.to_string(), domain.clone()) - .await; + let serialized = json.to_string(); + let news = self.extractor.extract(serialized, domain.clone()).await; if news.is_empty() { break; From 395a672ec3797894a6e6b2630506d8672bc0e3e8 Mon Sep 17 00:00:00 2001 From: Erdogan Yoksul Date: Fri, 27 Sep 2024 18:26:26 +0300 Subject: [PATCH 23/38] chore: bump version to 0.1.1 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 01d5df1b..1d795339 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "subscan" -version = "0.1.0" +version = "0.1.1" edition = "2021" description = "Next generation subdomain enumeration tool" readme = "README.md" From aa5eab87643358c1250e62be526ae6852ca05f51 Mon Sep 17 00:00:00 2001 From: Erdogan Yoksul Date: Fri, 27 Sep 2024 22:47:53 +0300 Subject: [PATCH 24/38] chore: change type names --- src/modules/generics/api_integration.rs | 6 +++--- src/types/core.rs | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/modules/generics/api_integration.rs b/src/modules/generics/api_integration.rs index 4305543e..baace088 100644 --- a/src/modules/generics/api_integration.rs +++ b/src/modules/generics/api_integration.rs @@ -4,7 +4,7 @@ use crate::{ extractor::SubdomainExtractorInterface, module::SubscanModuleInterface, requester::RequesterInterface, }, - types::core::{GetNextUrlMethod, GetQueryUrlMethod}, + types::core::{GetNextUrlFunc, GetQueryUrlFunc}, }; use async_trait::async_trait; use reqwest::header::{HeaderName, HeaderValue}; @@ -26,10 +26,10 @@ pub struct GenericAPIIntegrationModule { pub name: String, /// Simple function field that gets query URL /// by given domain address - pub url: GetQueryUrlMethod, + pub url: GetQueryUrlFunc, /// Function definition that gets next URL to ensure /// fully fetch data with pagination from API endpoint - pub next: GetNextUrlMethod, + pub next: GetNextUrlFunc, /// Set authentication method, see [`APIAuthMethod`] enum /// for details pub auth: APIAuthMethod, diff --git a/src/types/core.rs b/src/types/core.rs index 1000472e..46efb1e3 100644 --- a/src/types/core.rs +++ b/src/types/core.rs @@ -12,7 +12,7 @@ pub type InnerExtractMethod = Box BTreeSet + Sync + pub type APIKeyAsEnv = (String, Result); /// Method definition type, [`GenericAPIIntegrationModule`](crate::modules::generics::api_integration::GenericAPIIntegrationModule) /// uses this type to define method that gets query URL -pub type GetQueryUrlMethod = Box String + Sync + Send>; +pub type GetQueryUrlFunc = Box String + Sync + Send>; /// Method definition type, [`GenericAPIIntegrationModule`](crate::modules::generics::api_integration::GenericAPIIntegrationModule) /// uses this type to define function that gets next query URL to fetch API fully -pub type GetNextUrlMethod = Box Option + Sync + Send>; +pub type GetNextUrlFunc = Box Option + Sync + Send>; From ebd2266f4bf18d7dc9d17eef217bf44cb26f3291 Mon Sep 17 00:00:00 2001 From: Erdogan Yoksul Date: Fri, 27 Sep 2024 23:18:32 +0300 Subject: [PATCH 25/38] chore: update docstring --- src/types/core.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/types/core.rs b/src/types/core.rs index 46efb1e3..ae656105 100644 --- a/src/types/core.rs +++ b/src/types/core.rs @@ -10,9 +10,9 @@ pub type InnerExtractMethod = Box BTreeSet + Sync + /// Simple tuple type to store environment API key /// variable with variable name pub type APIKeyAsEnv = (String, Result); -/// Method definition type, [`GenericAPIIntegrationModule`](crate::modules::generics::api_integration::GenericAPIIntegrationModule) +/// Function definition type, [`GenericAPIIntegrationModule`](crate::modules::generics::api_integration::GenericAPIIntegrationModule) /// uses this type to define method that gets query URL pub type GetQueryUrlFunc = Box String + Sync + Send>; -/// Method definition type, [`GenericAPIIntegrationModule`](crate::modules::generics::api_integration::GenericAPIIntegrationModule) +/// Function definition type, [`GenericAPIIntegrationModule`](crate::modules::generics::api_integration::GenericAPIIntegrationModule) /// uses this type to define function that gets next query URL to fetch API fully pub type GetNextUrlFunc = Box Option + Sync + Send>; From 0d338557d2edd045123d5b30ee73f4ce409c470a Mon Sep 17 00:00:00 2001 From: Erdogan Yoksul Date: Sat, 28 Sep 2024 15:19:31 +0300 Subject: [PATCH 26/38] test: add binaryedge module tests --- tests/modules/integrations/binaryedge_test.rs | 51 +++++++++++++++++++ .../stubs/module/integrations/binaryedge.json | 28 ++++++++++ 2 files changed, 79 insertions(+) create mode 100644 tests/modules/integrations/binaryedge_test.rs create mode 100644 tests/stubs/module/integrations/binaryedge.json diff --git a/tests/modules/integrations/binaryedge_test.rs b/tests/modules/integrations/binaryedge_test.rs new file mode 100644 index 00000000..135968be --- /dev/null +++ b/tests/modules/integrations/binaryedge_test.rs @@ -0,0 +1,51 @@ +use crate::common::{ + constants::{TEST_BAR_SUBDOMAIN, TEST_BAZ_SUBDOMAIN, TEST_DOMAIN}, + mocks::wrap_url_with_mock_func, +}; +use serde_json::{self, Value}; +use std::{collections::BTreeSet, env}; +use subscan::{ + interfaces::module::SubscanModuleInterface, + modules::integrations::binaryedge::{self, BINARYEDGE_MODULE_NAME, BINARYEDGE_URL}, +}; + +#[tokio::test] +#[stubr::mock("module/integrations/binaryedge.json")] +async fn binaryedge_run_test() { + env::set_var("SUBSCAN_BINARYEDGE_APIKEY", "binaryedge-api-key"); + + let mut binaryedge = binaryedge::Binaryedge::new(); + + binaryedge.url = wrap_url_with_mock_func(stubr.path("/binaryedge").as_str()); + + let result = binaryedge.run(TEST_DOMAIN.to_string()).await; + + assert_eq!(binaryedge.name().await, BINARYEDGE_MODULE_NAME); + assert_eq!( + result, + [ + TEST_BAR_SUBDOMAIN.to_string(), + TEST_BAZ_SUBDOMAIN.to_string(), + ] + .into() + ); +} + +#[tokio::test] +async fn get_query_url_test() { + let url = binaryedge::Binaryedge::get_query_url(TEST_DOMAIN); + let expected = format!("{BINARYEDGE_URL}/{TEST_DOMAIN}"); + + assert_eq!(url, expected); +} + +#[tokio::test] +async fn extract_test() { + let json = "{\"events\": [\"bar.foo.com\"]}"; + + let extracted = binaryedge::Binaryedge::extract(serde_json::from_str(json).unwrap()); + let not_extracted = binaryedge::Binaryedge::extract(Value::default()); + + assert_eq!(extracted, [TEST_BAR_SUBDOMAIN.to_string()].into()); + assert_eq!(not_extracted, BTreeSet::new()); +} diff --git a/tests/stubs/module/integrations/binaryedge.json b/tests/stubs/module/integrations/binaryedge.json new file mode 100644 index 00000000..4fc479c7 --- /dev/null +++ b/tests/stubs/module/integrations/binaryedge.json @@ -0,0 +1,28 @@ +{ + "request": { + "headers": { + "X-Key": { + "equalTo": "binaryedge-api-key" + } + }, + "method": "GET", + "queryParameters": { + "page": { + "absent": true + } + }, + "urlPath": "/binaryedge" + }, + "response": { + "headers": { + "content-type": "application/json" + }, + "jsonBody": { + "events": [ + "bar.foo.com", + "baz.foo.com" + ] + }, + "status": 200 + } +} From 6113333259c2ac879ebceebdea3b4d439c5f678a Mon Sep 17 00:00:00 2001 From: Erdogan Yoksul Date: Sat, 28 Sep 2024 15:29:03 +0300 Subject: [PATCH 27/38] chore: refactor bevigil and binaryedge tests --- tests/modules/integrations/bevigil_test.rs | 7 +++++-- tests/modules/integrations/binaryedge_test.rs | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/tests/modules/integrations/bevigil_test.rs b/tests/modules/integrations/bevigil_test.rs index 6c784a3a..2891aeb7 100644 --- a/tests/modules/integrations/bevigil_test.rs +++ b/tests/modules/integrations/bevigil_test.rs @@ -12,9 +12,10 @@ use subscan::{ #[tokio::test] #[stubr::mock("module/integrations/bevigil.json")] async fn bevigil_run_test() { - env::set_var("SUBSCAN_BEVIGIL_APIKEY", "bevigil-api-key"); - let mut bevigil = bevigil::Bevigil::new(); + let (env_name, _) = bevigil.fetch_apikey().await; + + env::set_var(&env_name, "bevigil-api-key"); bevigil.url = wrap_url_with_mock_func(stubr.path("/bevigil").as_str()); @@ -29,6 +30,8 @@ async fn bevigil_run_test() { ] .into() ); + + env::remove_var(env_name); } #[tokio::test] diff --git a/tests/modules/integrations/binaryedge_test.rs b/tests/modules/integrations/binaryedge_test.rs index 135968be..539a808a 100644 --- a/tests/modules/integrations/binaryedge_test.rs +++ b/tests/modules/integrations/binaryedge_test.rs @@ -12,9 +12,10 @@ use subscan::{ #[tokio::test] #[stubr::mock("module/integrations/binaryedge.json")] async fn binaryedge_run_test() { - env::set_var("SUBSCAN_BINARYEDGE_APIKEY", "binaryedge-api-key"); - let mut binaryedge = binaryedge::Binaryedge::new(); + let (env_name, _) = binaryedge.fetch_apikey().await; + + env::set_var(&env_name, "binaryedge-api-key"); binaryedge.url = wrap_url_with_mock_func(stubr.path("/binaryedge").as_str()); @@ -29,6 +30,8 @@ async fn binaryedge_run_test() { ] .into() ); + + env::remove_var(env_name); } #[tokio::test] From 2d493168150b5b8da355b1fcd073bd05f423905a Mon Sep 17 00:00:00 2001 From: Erdogan Yoksul Date: Sat, 28 Sep 2024 17:05:50 +0300 Subject: [PATCH 28/38] feat: new bufferover integration module --- .env.template | 1 + src/extractors/json.rs | 12 ++-- src/modules/integrations/alienvault.rs | 2 +- src/modules/integrations/anubis.rs | 2 +- src/modules/integrations/bevigil.rs | 2 +- src/modules/integrations/binaryedge.rs | 2 +- src/modules/integrations/bufferover.rs | 58 +++++++++++++++++++ src/modules/integrations/mod.rs | 2 + src/types/core.rs | 2 +- tests/extractors/json_test.rs | 6 +- tests/modules/common.rs | 2 +- tests/modules/integrations/alienvault_test.rs | 7 ++- tests/modules/integrations/anubis_test.rs | 7 ++- tests/modules/integrations/bevigil_test.rs | 7 ++- tests/modules/integrations/binaryedge_test.rs | 7 ++- 15 files changed, 94 insertions(+), 25 deletions(-) create mode 100644 src/modules/integrations/bufferover.rs diff --git a/.env.template b/.env.template index 15b3841e..479d342f 100644 --- a/.env.template +++ b/.env.template @@ -1,2 +1,3 @@ SUBSCAN_BEVIGIL_APIKEY=foo SUBSCAN_BINARYEDGE_APIKEY=bar +SUBSCAN_BUFFEROVER_APIKEY=baz diff --git a/src/extractors/json.rs b/src/extractors/json.rs index 9eb9aa61..de313d3e 100644 --- a/src/extractors/json.rs +++ b/src/extractors/json.rs @@ -23,7 +23,11 @@ impl JSONExtractor { /// use std::collections::BTreeSet; /// use serde_json::Value; /// - /// let extractor = JSONExtractor::new(Box::new(move |_: Value| BTreeSet::new())); + /// let inner = |_content: Value, _domain: String| { + /// BTreeSet::new() + /// }; + /// + /// let extractor = JSONExtractor::new(Box::new(inner)); /// /// // do something with extractor instance /// ``` @@ -51,7 +55,7 @@ impl SubdomainExtractorInterface for JSONExtractor { /// let json = "{\"foo\": \"bar\"}".to_string(); /// let domain = "foo.com".to_string(); /// - /// let func = |item: Value| { + /// let func = |item: Value, _domain: String| { /// [ /// Subdomain::from(item["foo"].as_str().unwrap()) /// ].into() @@ -63,7 +67,7 @@ impl SubdomainExtractorInterface for JSONExtractor { /// assert_eq!(result, [Subdomain::from("bar")].into()); /// } /// ``` - async fn extract(&self, content: String, _domain: String) -> BTreeSet { - (self.inner)(serde_json::from_str(&content).unwrap_or_default()) + async fn extract(&self, content: String, domain: String) -> BTreeSet { + (self.inner)(serde_json::from_str(&content).unwrap_or_default(), domain) } } diff --git a/src/modules/integrations/alienvault.rs b/src/modules/integrations/alienvault.rs index 5cf2bd0c..6ff0f4d5 100644 --- a/src/modules/integrations/alienvault.rs +++ b/src/modules/integrations/alienvault.rs @@ -38,7 +38,7 @@ impl AlienVault { format!("{ALIENVAULT_URL}/{domain}/passive_dns") } - pub fn extract(content: Value) -> BTreeSet { + pub fn extract(content: Value, _domain: String) -> BTreeSet { if let Some(passives) = content["passive_dns"].as_array() { let filter = |item: &Value| Some(item["hostname"].as_str()?.to_string()); diff --git a/src/modules/integrations/anubis.rs b/src/modules/integrations/anubis.rs index 262485ef..220de697 100644 --- a/src/modules/integrations/anubis.rs +++ b/src/modules/integrations/anubis.rs @@ -37,7 +37,7 @@ impl Anubis { format!("{ANUBIS_URL}/{domain}") } - pub fn extract(content: Value) -> BTreeSet { + pub fn extract(content: Value, _domain: String) -> BTreeSet { if let Some(subs) = content.as_array() { let filter = |item: &Value| Some(item.as_str()?.to_string()); diff --git a/src/modules/integrations/bevigil.rs b/src/modules/integrations/bevigil.rs index 2805f738..7bf26e81 100644 --- a/src/modules/integrations/bevigil.rs +++ b/src/modules/integrations/bevigil.rs @@ -37,7 +37,7 @@ impl Bevigil { format!("{BEVIGIL_URL}/{domain}/subdomains") } - pub fn extract(content: Value) -> BTreeSet { + pub fn extract(content: Value, _domain: String) -> BTreeSet { if let Some(subs) = content["subdomains"].as_array() { let filter = |item: &Value| Some(item.as_str()?.to_string()); diff --git a/src/modules/integrations/binaryedge.rs b/src/modules/integrations/binaryedge.rs index 3d2eb671..c93a3373 100644 --- a/src/modules/integrations/binaryedge.rs +++ b/src/modules/integrations/binaryedge.rs @@ -52,7 +52,7 @@ impl Binaryedge { Some(url) } - pub fn extract(content: Value) -> BTreeSet { + pub fn extract(content: Value, _domain: String) -> BTreeSet { if let Some(subs) = content["events"].as_array() { let filter = |item: &Value| Some(item.as_str()?.to_string()); diff --git a/src/modules/integrations/bufferover.rs b/src/modules/integrations/bufferover.rs new file mode 100644 index 00000000..f428fa0e --- /dev/null +++ b/src/modules/integrations/bufferover.rs @@ -0,0 +1,58 @@ +use crate::{ + enums::{APIAuthMethod, RequesterDispatcher}, + extractors::json::JSONExtractor, + modules::generics::api_integration::GenericAPIIntegrationModule, + requesters::client::HTTPClient, + types::core::Subdomain, + utils::regex::generate_subdomain_regex, +}; +use regex::Match; +use serde_json::Value; +use std::collections::BTreeSet; + +/// Bufferover API integration module +/// +/// It uses [`GenericAPIIntegrationModule`] its own inner +/// here are the configurations +pub struct Bufferover {} + +pub const BUFFEROVER_MODULE_NAME: &str = "Bufferover"; +pub const BUFFEROVER_URL: &str = "https://tls.bufferover.run"; + +impl Bufferover { + #[allow(clippy::new_ret_no_self)] + pub fn new() -> GenericAPIIntegrationModule { + let requester: RequesterDispatcher = HTTPClient::default().into(); + let extractor: JSONExtractor = JSONExtractor::new(Box::new(Self::extract)); + + GenericAPIIntegrationModule { + name: BUFFEROVER_MODULE_NAME.into(), + url: Box::new(Self::get_query_url), + next: Box::new(move |_, _| None), + auth: APIAuthMethod::APIKeyAsHeader("X-API-Key".into()), + requester: requester.into(), + extractor: extractor.into(), + } + } + + pub fn get_query_url(domain: &str) -> String { + format!("{BUFFEROVER_URL}/dns?q={domain}") + } + + pub fn extract(content: Value, domain: String) -> BTreeSet { + let pattern = generate_subdomain_regex(domain).unwrap(); + + if let Some(subs) = content["Results"].as_array() { + let filter = |item: &Value| { + let line = item.as_str()?.to_string(); + let to_string = |matches: Match| matches.as_str().to_string(); + + pattern.find(&line).map(to_string) + }; + + BTreeSet::from_iter(subs.iter().filter_map(filter)) + } else { + BTreeSet::new() + } + } +} diff --git a/src/modules/integrations/mod.rs b/src/modules/integrations/mod.rs index 7c1548e6..3f687ef8 100644 --- a/src/modules/integrations/mod.rs +++ b/src/modules/integrations/mod.rs @@ -6,3 +6,5 @@ pub mod anubis; pub mod bevigil; /// Binaryedge API integration mmodule, API key required pub mod binaryedge; +/// Bufferover API integration mmodule, API key required +pub mod bufferover; diff --git a/src/types/core.rs b/src/types/core.rs index ae656105..439502ff 100644 --- a/src/types/core.rs +++ b/src/types/core.rs @@ -6,7 +6,7 @@ use std::collections::BTreeSet; pub type Subdomain = String; /// Inner extract method type definition for [`JSONExtractor`](crate::extractors::json::JSONExtractor) /// In summary it takes a [`Value`] as a parameter and parse subdomains -pub type InnerExtractMethod = Box BTreeSet + Sync + Send>; +pub type InnerExtractMethod = Box BTreeSet + Sync + Send>; /// Simple tuple type to store environment API key /// variable with variable name pub type APIKeyAsEnv = (String, Result); diff --git a/tests/extractors/json_test.rs b/tests/extractors/json_test.rs index e2818558..489288a9 100644 --- a/tests/extractors/json_test.rs +++ b/tests/extractors/json_test.rs @@ -11,9 +11,9 @@ use subscan::interfaces::extractor::SubdomainExtractorInterface; async fn extract_test() { let json = read_testdata("json/subdomains.json"); - let inner_parser = |item: Value| { - if let Some(subs) = item["data"]["subdomains"].as_array() { - let filter = |item: &Value| Some(item["subdomain"].as_str().unwrap().to_string()); + let inner_parser = |json: Value, _domain: String| { + if let Some(subs) = json["data"]["subdomains"].as_array() { + let filter = |json: &Value| Some(json["subdomain"].as_str().unwrap().to_string()); BTreeSet::from_iter(subs.iter().filter_map(filter)) } else { diff --git a/tests/modules/common.rs b/tests/modules/common.rs index d7e4dad9..3337377d 100644 --- a/tests/modules/common.rs +++ b/tests/modules/common.rs @@ -45,7 +45,7 @@ pub mod mocks { } pub fn generic_api_integration(url: &str, auth: APIAuthMethod) -> GenericAPIIntegrationModule { - let parse = |json: Value| { + let parse = |json: Value, _domain: String| { if let Some(subs) = json["subdomains"].as_array() { let filter = |item: &Value| Some(item.as_str()?.to_string()); diff --git a/tests/modules/integrations/alienvault_test.rs b/tests/modules/integrations/alienvault_test.rs index 2155d5e3..f7a9673e 100644 --- a/tests/modules/integrations/alienvault_test.rs +++ b/tests/modules/integrations/alienvault_test.rs @@ -40,10 +40,11 @@ async fn get_query_url_test() { #[tokio::test] async fn extract_test() { - let json = "{\"passive_dns\": [{\"hostname\": \"bar.foo.com\"}]}"; + let content = "{\"passive_dns\": [{\"hostname\": \"bar.foo.com\"}]}"; + let json = serde_json::from_str(content).unwrap(); - let extracted = alienvault::AlienVault::extract(serde_json::from_str(json).unwrap()); - let not_extracted = alienvault::AlienVault::extract(Value::default()); + let extracted = alienvault::AlienVault::extract(json, TEST_DOMAIN.to_string()); + let not_extracted = alienvault::AlienVault::extract(Value::Null, TEST_DOMAIN.to_string()); assert_eq!(extracted, [TEST_BAR_SUBDOMAIN.to_string()].into()); assert_eq!(not_extracted, BTreeSet::new()); diff --git a/tests/modules/integrations/anubis_test.rs b/tests/modules/integrations/anubis_test.rs index 6dac28ac..c304ea9d 100644 --- a/tests/modules/integrations/anubis_test.rs +++ b/tests/modules/integrations/anubis_test.rs @@ -33,10 +33,11 @@ async fn get_query_url_test() { #[tokio::test] async fn extract_test() { - let json = "[\"bar.foo.com\"]"; + let content = "[\"bar.foo.com\"]"; + let json = serde_json::from_str(content).unwrap(); - let extracted = anubis::Anubis::extract(serde_json::from_str(json).unwrap()); - let not_extracted = anubis::Anubis::extract(Value::default()); + let extracted = anubis::Anubis::extract(json, TEST_DOMAIN.to_string()); + let not_extracted = anubis::Anubis::extract(Value::Null, TEST_DOMAIN.to_string()); assert_eq!(extracted, [TEST_BAR_SUBDOMAIN.to_string()].into()); assert_eq!(not_extracted, BTreeSet::new()); diff --git a/tests/modules/integrations/bevigil_test.rs b/tests/modules/integrations/bevigil_test.rs index 2891aeb7..6aec01d1 100644 --- a/tests/modules/integrations/bevigil_test.rs +++ b/tests/modules/integrations/bevigil_test.rs @@ -44,10 +44,11 @@ async fn get_query_url_test() { #[tokio::test] async fn extract_test() { - let json = "{\"subdomains\": [\"bar.foo.com\"]}"; + let content = "{\"subdomains\": [\"bar.foo.com\"]}"; + let json = serde_json::from_str(content).unwrap(); - let extracted = bevigil::Bevigil::extract(serde_json::from_str(json).unwrap()); - let not_extracted = bevigil::Bevigil::extract(Value::default()); + let extracted = bevigil::Bevigil::extract(json, TEST_DOMAIN.to_string()); + let not_extracted = bevigil::Bevigil::extract(Value::Null, TEST_DOMAIN.to_string()); assert_eq!(extracted, [TEST_BAR_SUBDOMAIN.to_string()].into()); assert_eq!(not_extracted, BTreeSet::new()); diff --git a/tests/modules/integrations/binaryedge_test.rs b/tests/modules/integrations/binaryedge_test.rs index 539a808a..44dd937f 100644 --- a/tests/modules/integrations/binaryedge_test.rs +++ b/tests/modules/integrations/binaryedge_test.rs @@ -44,10 +44,11 @@ async fn get_query_url_test() { #[tokio::test] async fn extract_test() { - let json = "{\"events\": [\"bar.foo.com\"]}"; + let content = "{\"events\": [\"bar.foo.com\"]}"; + let json = serde_json::from_str(content).unwrap(); - let extracted = binaryedge::Binaryedge::extract(serde_json::from_str(json).unwrap()); - let not_extracted = binaryedge::Binaryedge::extract(Value::default()); + let extracted = binaryedge::Binaryedge::extract(json, TEST_DOMAIN.to_string()); + let not_extracted = binaryedge::Binaryedge::extract(Value::Null, TEST_DOMAIN.to_string()); assert_eq!(extracted, [TEST_BAR_SUBDOMAIN.to_string()].into()); assert_eq!(not_extracted, BTreeSet::new()); From 28b976fe543aa3f0cf7686f0185649469ba5c870 Mon Sep 17 00:00:00 2001 From: Erdogan Yoksul Date: Sat, 28 Sep 2024 17:12:54 +0300 Subject: [PATCH 29/38] test: add bufferover integration module tests --- tests/modules/integrations/bufferover_test.rs | 55 +++++++++++++++++++ .../stubs/module/integrations/bufferover.json | 26 +++++++++ 2 files changed, 81 insertions(+) create mode 100644 tests/modules/integrations/bufferover_test.rs create mode 100644 tests/stubs/module/integrations/bufferover.json diff --git a/tests/modules/integrations/bufferover_test.rs b/tests/modules/integrations/bufferover_test.rs new file mode 100644 index 00000000..672f62f2 --- /dev/null +++ b/tests/modules/integrations/bufferover_test.rs @@ -0,0 +1,55 @@ +use crate::common::{ + constants::{TEST_BAR_SUBDOMAIN, TEST_BAZ_SUBDOMAIN, TEST_DOMAIN}, + mocks::wrap_url_with_mock_func, +}; +use serde_json::{self, Value}; +use std::{collections::BTreeSet, env}; +use subscan::{ + interfaces::module::SubscanModuleInterface, + modules::integrations::bufferover::{self, BUFFEROVER_MODULE_NAME, BUFFEROVER_URL}, +}; + +#[tokio::test] +#[stubr::mock("module/integrations/bufferover.json")] +async fn bufferover_run_test() { + let mut bufferover = bufferover::Bufferover::new(); + let (env_name, _) = bufferover.fetch_apikey().await; + + env::set_var(&env_name, "bufferover-api-key"); + + bufferover.url = wrap_url_with_mock_func(stubr.path("/bufferover").as_str()); + + let result = bufferover.run(TEST_DOMAIN.to_string()).await; + + assert_eq!(bufferover.name().await, BUFFEROVER_MODULE_NAME); + assert_eq!( + result, + [ + TEST_BAR_SUBDOMAIN.to_string(), + TEST_BAZ_SUBDOMAIN.to_string(), + ] + .into() + ); + + env::remove_var(env_name); +} + +#[tokio::test] +async fn get_query_url_test() { + let url = bufferover::Bufferover::get_query_url(TEST_DOMAIN); + let expected = format!("{BUFFEROVER_URL}/dns?q={TEST_DOMAIN}"); + + assert_eq!(url, expected); +} + +#[tokio::test] +async fn extract_test() { + let content = "{\"Results\": [\"127.0.0.1,md5,,bar.foo.com\"]}"; + let json = serde_json::from_str(content).unwrap(); + + let extracted = bufferover::Bufferover::extract(json, TEST_DOMAIN.to_string()); + let not_extracted = bufferover::Bufferover::extract(Value::Null, TEST_DOMAIN.to_string()); + + assert_eq!(extracted, [TEST_BAR_SUBDOMAIN.to_string()].into()); + assert_eq!(not_extracted, BTreeSet::new()); +} diff --git a/tests/stubs/module/integrations/bufferover.json b/tests/stubs/module/integrations/bufferover.json new file mode 100644 index 00000000..62816729 --- /dev/null +++ b/tests/stubs/module/integrations/bufferover.json @@ -0,0 +1,26 @@ +{ + "request": { + "headers": { + "X-API-Key": { + "equalTo": "bufferover-api-key" + } + }, + "method": "GET", + "urlPath": "/bufferover" + }, + "response": { + "headers": { + "content-type": "application/json" + }, + "jsonBody": { + "Meta": { + "domain": "foo.com" + }, + "Results": [ + "54.201.204.183,581faa6ff692d0ba8185753570c8624a2a6b4e8e47bd5322216cc12a41def044,,bar.foo.com", + "104.154.120.133,2122965ac9fa9a2cb9295fc8966569d9a3906995e8fcd61d82d6ec0f9d10dfea,,baz.foo.com" + ] + }, + "status": 200 + } +} From 67e5f489ae2061ce4e77af5b9ed82e016a03d3b8 Mon Sep 17 00:00:00 2001 From: Erdogan Yoksul Date: Sun, 29 Sep 2024 14:45:42 +0300 Subject: [PATCH 30/38] feat: add builtwith integration module --- src/bin/subscan.rs | 2 +- src/cache.rs | 3 +- src/modules/generics/api_integration.rs | 6 +- src/modules/integrations/alienvault.rs | 2 +- src/modules/integrations/anubis.rs | 2 +- src/modules/integrations/bevigil.rs | 2 +- src/modules/integrations/binaryedge.rs | 2 +- src/modules/integrations/bufferover.rs | 2 +- src/modules/integrations/builtwith.rs | 73 +++++++++++++++++++ src/modules/integrations/mod.rs | 2 + tests/modules/common.rs | 17 +++++ tests/modules/integrations/alienvault_test.rs | 14 +++- tests/modules/integrations/anubis_test.rs | 4 +- tests/modules/integrations/bevigil_test.rs | 13 +++- tests/modules/integrations/binaryedge_test.rs | 13 +++- tests/modules/integrations/bufferover_test.rs | 14 +++- tests/modules/integrations/builtwith_test.rs | 71 ++++++++++++++++++ .../stubs/module/integrations/builtwith.json | 39 ++++++++++ 18 files changed, 258 insertions(+), 23 deletions(-) create mode 100644 src/modules/integrations/builtwith.rs create mode 100644 tests/modules/integrations/builtwith_test.rs create mode 100644 tests/stubs/module/integrations/builtwith.json diff --git a/src/bin/subscan.rs b/src/bin/subscan.rs index 4adcca24..9d8fe8ad 100644 --- a/src/bin/subscan.rs +++ b/src/bin/subscan.rs @@ -17,7 +17,7 @@ async fn main() { let mut module = item.lock().await; let requester = module.requester().await.unwrap(); - if module.name().await != "Binaryedge" { + if module.name().await != "Builtwith" { continue; } diff --git a/src/cache.rs b/src/cache.rs index 8ecbc7f6..bc31186b 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -1,7 +1,7 @@ use crate::{ modules::{ engines::{bing, duckduckgo, google, yahoo}, - integrations::{alienvault, anubis, bevigil, binaryedge}, + integrations::{alienvault, anubis, bevigil, binaryedge, builtwith}, }, SubscanModule, }; @@ -23,6 +23,7 @@ lazy_static! { SubscanModule::new(anubis::Anubis::new()), SubscanModule::new(bevigil::Bevigil::new()), SubscanModule::new(binaryedge::Binaryedge::new()), + SubscanModule::new(builtwith::Builtwith::new()), ]; } diff --git a/src/modules/generics/api_integration.rs b/src/modules/generics/api_integration.rs index baace088..6658a99e 100644 --- a/src/modules/generics/api_integration.rs +++ b/src/modules/generics/api_integration.rs @@ -51,7 +51,11 @@ impl GenericAPIIntegrationModule { } async fn set_apikey_param(&self, url: &mut Url, param: &str, apikey: &str) { - url.set_query(Some(&format!("{param}={apikey}"))); + if let Some(query) = url.query() { + url.set_query(Some(&format!("{query}&{param}={apikey}"))); + } else { + url.set_query(Some(&format!("{param}={apikey}"))); + } } async fn set_apikey_header(&self, name: &str, apikey: &str) { diff --git a/src/modules/integrations/alienvault.rs b/src/modules/integrations/alienvault.rs index 6ff0f4d5..1da9d1d9 100644 --- a/src/modules/integrations/alienvault.rs +++ b/src/modules/integrations/alienvault.rs @@ -42,7 +42,7 @@ impl AlienVault { if let Some(passives) = content["passive_dns"].as_array() { let filter = |item: &Value| Some(item["hostname"].as_str()?.to_string()); - BTreeSet::from_iter(passives.iter().filter_map(filter)) + passives.iter().filter_map(filter).collect() } else { BTreeSet::new() } diff --git a/src/modules/integrations/anubis.rs b/src/modules/integrations/anubis.rs index 220de697..1028b86b 100644 --- a/src/modules/integrations/anubis.rs +++ b/src/modules/integrations/anubis.rs @@ -41,7 +41,7 @@ impl Anubis { if let Some(subs) = content.as_array() { let filter = |item: &Value| Some(item.as_str()?.to_string()); - BTreeSet::from_iter(subs.iter().filter_map(filter)) + subs.iter().filter_map(filter).collect() } else { BTreeSet::new() } diff --git a/src/modules/integrations/bevigil.rs b/src/modules/integrations/bevigil.rs index 7bf26e81..2e52c911 100644 --- a/src/modules/integrations/bevigil.rs +++ b/src/modules/integrations/bevigil.rs @@ -41,7 +41,7 @@ impl Bevigil { if let Some(subs) = content["subdomains"].as_array() { let filter = |item: &Value| Some(item.as_str()?.to_string()); - BTreeSet::from_iter(subs.iter().filter_map(filter)) + subs.iter().filter_map(filter).collect() } else { BTreeSet::new() } diff --git a/src/modules/integrations/binaryedge.rs b/src/modules/integrations/binaryedge.rs index c93a3373..04b0a0d5 100644 --- a/src/modules/integrations/binaryedge.rs +++ b/src/modules/integrations/binaryedge.rs @@ -56,7 +56,7 @@ impl Binaryedge { if let Some(subs) = content["events"].as_array() { let filter = |item: &Value| Some(item.as_str()?.to_string()); - BTreeSet::from_iter(subs.iter().filter_map(filter)) + subs.iter().filter_map(filter).collect() } else { BTreeSet::new() } diff --git a/src/modules/integrations/bufferover.rs b/src/modules/integrations/bufferover.rs index f428fa0e..f4c1b08f 100644 --- a/src/modules/integrations/bufferover.rs +++ b/src/modules/integrations/bufferover.rs @@ -50,7 +50,7 @@ impl Bufferover { pattern.find(&line).map(to_string) }; - BTreeSet::from_iter(subs.iter().filter_map(filter)) + subs.iter().filter_map(filter).collect() } else { BTreeSet::new() } diff --git a/src/modules/integrations/builtwith.rs b/src/modules/integrations/builtwith.rs new file mode 100644 index 00000000..53a9d7b3 --- /dev/null +++ b/src/modules/integrations/builtwith.rs @@ -0,0 +1,73 @@ +use crate::{ + enums::{APIAuthMethod, RequesterDispatcher}, + extractors::json::JSONExtractor, + modules::generics::api_integration::GenericAPIIntegrationModule, + requesters::client::HTTPClient, + types::core::Subdomain, +}; +use reqwest::Url; +use serde_json::Value; +use std::collections::BTreeSet; + +/// Builtwith API integration module +/// +/// It uses [`GenericAPIIntegrationModule`] its own inner +/// here are the configurations +pub struct Builtwith {} + +pub const BUILTWITH_MODULE_NAME: &str = "Builtwith"; +pub const BUILTWITH_URL: &str = "https://api.builtwith.com/v21/api.json"; + +impl Builtwith { + #[allow(clippy::new_ret_no_self)] + pub fn new() -> GenericAPIIntegrationModule { + let requester: RequesterDispatcher = HTTPClient::default().into(); + let extractor: JSONExtractor = JSONExtractor::new(Box::new(Self::extract)); + + GenericAPIIntegrationModule { + name: BUILTWITH_MODULE_NAME.into(), + url: Box::new(Self::get_query_url), + next: Box::new(move |_, _| None), + auth: APIAuthMethod::APIKeyAsQueryParam("KEY".into()), + requester: requester.into(), + extractor: extractor.into(), + } + } + + pub fn get_query_url(domain: &str) -> String { + let params = &[ + ("HIDETEXT", "yes"), + ("HIDEDL", "yes"), + ("NOLIVE", "yes"), + ("NOMETA", "yes"), + ("NOPII", "yes"), + ("NOATTR", "yes"), + ("LOOKUP", domain), + ]; + + let url = Url::parse_with_params(BUILTWITH_URL, params); + + url.unwrap().to_string() + } + + pub fn extract(content: Value, domain: String) -> BTreeSet { + if let Some(results) = content["Results"].as_array() { + let mut subs = BTreeSet::new(); + + for result in results { + if let Some(paths) = result["Result"]["Paths"].as_array() { + let filter = |item: &Value| { + let sub = item["SubDomain"].as_str()?.to_string(); + + Some(format!("{}.{}", sub, domain)) + }; + + subs.extend(paths.iter().filter_map(filter)); + } + } + subs + } else { + BTreeSet::new() + } + } +} diff --git a/src/modules/integrations/mod.rs b/src/modules/integrations/mod.rs index 3f687ef8..332f3d0d 100644 --- a/src/modules/integrations/mod.rs +++ b/src/modules/integrations/mod.rs @@ -8,3 +8,5 @@ pub mod bevigil; pub mod binaryedge; /// Bufferover API integration mmodule, API key required pub mod bufferover; +/// Builtwith API integration mmodule, API key required +pub mod builtwith; diff --git a/tests/modules/common.rs b/tests/modules/common.rs index 3337377d..172a194b 100644 --- a/tests/modules/common.rs +++ b/tests/modules/common.rs @@ -8,12 +8,29 @@ pub mod constants { pub const TEST_BAR_SUBDOMAIN: &str = "bar.foo.com"; pub const TEST_BAZ_SUBDOMAIN: &str = "baz.foo.com"; pub const TEST_API_KEY: &str = "test-api-key"; + pub const READ_ERROR: &str = "Cannot read file!"; } pub mod funcs { + use super::constants::READ_ERROR; + use serde_json::Value; + use std::fs; + use std::path::{Path, PathBuf}; + + fn stubs_path() -> PathBuf { + Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/stubs") + } + pub fn md5_hex(target: String) -> String { format!("{:x}", md5::compute(target)) } + + pub fn read_stub(path: &str) -> Value { + let file_path = stubs_path().join(path); + let content = fs::read_to_string(file_path).expect(READ_ERROR); + + serde_json::from_str(&content).unwrap() + } } pub mod mocks { diff --git a/tests/modules/integrations/alienvault_test.rs b/tests/modules/integrations/alienvault_test.rs index f7a9673e..90cbe450 100644 --- a/tests/modules/integrations/alienvault_test.rs +++ b/tests/modules/integrations/alienvault_test.rs @@ -2,6 +2,7 @@ use std::collections::BTreeSet; use crate::common::{ constants::{TEST_BAR_SUBDOMAIN, TEST_BAZ_SUBDOMAIN, TEST_DOMAIN}, + funcs::read_stub, mocks::wrap_url_with_mock_func, }; use serde_json::{self, Value}; @@ -40,12 +41,19 @@ async fn get_query_url_test() { #[tokio::test] async fn extract_test() { - let content = "{\"passive_dns\": [{\"hostname\": \"bar.foo.com\"}]}"; - let json = serde_json::from_str(content).unwrap(); + let json = read_stub("module/integrations/alienvault.json")["response"]["jsonBody"].clone(); let extracted = alienvault::AlienVault::extract(json, TEST_DOMAIN.to_string()); let not_extracted = alienvault::AlienVault::extract(Value::Null, TEST_DOMAIN.to_string()); - assert_eq!(extracted, [TEST_BAR_SUBDOMAIN.to_string()].into()); + assert_eq!( + extracted, + [ + TEST_BAR_SUBDOMAIN.to_string(), + TEST_BAZ_SUBDOMAIN.to_string(), + ] + .into() + ); + assert_eq!(not_extracted, BTreeSet::new()); } diff --git a/tests/modules/integrations/anubis_test.rs b/tests/modules/integrations/anubis_test.rs index c304ea9d..13775427 100644 --- a/tests/modules/integrations/anubis_test.rs +++ b/tests/modules/integrations/anubis_test.rs @@ -2,6 +2,7 @@ use std::collections::BTreeSet; use crate::common::{ constants::{TEST_BAR_SUBDOMAIN, TEST_DOMAIN}, + funcs::read_stub, mocks::wrap_url_with_mock_func, }; use serde_json::{self, Value}; @@ -33,8 +34,7 @@ async fn get_query_url_test() { #[tokio::test] async fn extract_test() { - let content = "[\"bar.foo.com\"]"; - let json = serde_json::from_str(content).unwrap(); + let json = read_stub("module/integrations/anubis.json")["response"]["jsonBody"].clone(); let extracted = anubis::Anubis::extract(json, TEST_DOMAIN.to_string()); let not_extracted = anubis::Anubis::extract(Value::Null, TEST_DOMAIN.to_string()); diff --git a/tests/modules/integrations/bevigil_test.rs b/tests/modules/integrations/bevigil_test.rs index 6aec01d1..f291a134 100644 --- a/tests/modules/integrations/bevigil_test.rs +++ b/tests/modules/integrations/bevigil_test.rs @@ -1,5 +1,6 @@ use crate::common::{ constants::{TEST_BAR_SUBDOMAIN, TEST_BAZ_SUBDOMAIN, TEST_DOMAIN}, + funcs::read_stub, mocks::wrap_url_with_mock_func, }; use serde_json::{self, Value}; @@ -44,12 +45,18 @@ async fn get_query_url_test() { #[tokio::test] async fn extract_test() { - let content = "{\"subdomains\": [\"bar.foo.com\"]}"; - let json = serde_json::from_str(content).unwrap(); + let json = read_stub("module/integrations/bevigil.json")["response"]["jsonBody"].clone(); let extracted = bevigil::Bevigil::extract(json, TEST_DOMAIN.to_string()); let not_extracted = bevigil::Bevigil::extract(Value::Null, TEST_DOMAIN.to_string()); - assert_eq!(extracted, [TEST_BAR_SUBDOMAIN.to_string()].into()); + assert_eq!( + extracted, + [ + TEST_BAR_SUBDOMAIN.to_string(), + TEST_BAZ_SUBDOMAIN.to_string(), + ] + .into() + ); assert_eq!(not_extracted, BTreeSet::new()); } diff --git a/tests/modules/integrations/binaryedge_test.rs b/tests/modules/integrations/binaryedge_test.rs index 44dd937f..9c8ca78e 100644 --- a/tests/modules/integrations/binaryedge_test.rs +++ b/tests/modules/integrations/binaryedge_test.rs @@ -1,5 +1,6 @@ use crate::common::{ constants::{TEST_BAR_SUBDOMAIN, TEST_BAZ_SUBDOMAIN, TEST_DOMAIN}, + funcs::read_stub, mocks::wrap_url_with_mock_func, }; use serde_json::{self, Value}; @@ -44,12 +45,18 @@ async fn get_query_url_test() { #[tokio::test] async fn extract_test() { - let content = "{\"events\": [\"bar.foo.com\"]}"; - let json = serde_json::from_str(content).unwrap(); + let json = read_stub("module/integrations/binaryedge.json")["response"]["jsonBody"].clone(); let extracted = binaryedge::Binaryedge::extract(json, TEST_DOMAIN.to_string()); let not_extracted = binaryedge::Binaryedge::extract(Value::Null, TEST_DOMAIN.to_string()); - assert_eq!(extracted, [TEST_BAR_SUBDOMAIN.to_string()].into()); + assert_eq!( + extracted, + [ + TEST_BAR_SUBDOMAIN.to_string(), + TEST_BAZ_SUBDOMAIN.to_string(), + ] + .into() + ); assert_eq!(not_extracted, BTreeSet::new()); } diff --git a/tests/modules/integrations/bufferover_test.rs b/tests/modules/integrations/bufferover_test.rs index 672f62f2..09f43724 100644 --- a/tests/modules/integrations/bufferover_test.rs +++ b/tests/modules/integrations/bufferover_test.rs @@ -1,5 +1,6 @@ use crate::common::{ constants::{TEST_BAR_SUBDOMAIN, TEST_BAZ_SUBDOMAIN, TEST_DOMAIN}, + funcs::read_stub, mocks::wrap_url_with_mock_func, }; use serde_json::{self, Value}; @@ -44,12 +45,17 @@ async fn get_query_url_test() { #[tokio::test] async fn extract_test() { - let content = "{\"Results\": [\"127.0.0.1,md5,,bar.foo.com\"]}"; - let json = serde_json::from_str(content).unwrap(); - + let json = read_stub("module/integrations/bufferover.json")["response"]["jsonBody"].clone(); let extracted = bufferover::Bufferover::extract(json, TEST_DOMAIN.to_string()); let not_extracted = bufferover::Bufferover::extract(Value::Null, TEST_DOMAIN.to_string()); - assert_eq!(extracted, [TEST_BAR_SUBDOMAIN.to_string()].into()); + assert_eq!( + extracted, + [ + TEST_BAR_SUBDOMAIN.to_string(), + TEST_BAZ_SUBDOMAIN.to_string(), + ] + .into() + ); assert_eq!(not_extracted, BTreeSet::new()); } diff --git a/tests/modules/integrations/builtwith_test.rs b/tests/modules/integrations/builtwith_test.rs new file mode 100644 index 00000000..2d5fd9b8 --- /dev/null +++ b/tests/modules/integrations/builtwith_test.rs @@ -0,0 +1,71 @@ +use crate::common::{ + constants::{TEST_BAR_SUBDOMAIN, TEST_BAZ_SUBDOMAIN, TEST_DOMAIN}, + funcs::read_stub, + mocks::wrap_url_with_mock_func, +}; +use reqwest::Url; +use serde_json::{self, Value}; +use std::{collections::BTreeSet, env}; +use subscan::{ + interfaces::module::SubscanModuleInterface, + modules::integrations::builtwith::{self, BUILTWITH_MODULE_NAME, BUILTWITH_URL}, +}; + +#[tokio::test] +#[stubr::mock("module/integrations/builtwith.json")] +async fn builtwith_run_test() { + let mut builtwith = builtwith::Builtwith::new(); + let (env_name, _) = builtwith.fetch_apikey().await; + + env::set_var(&env_name, "builtwith-api-key"); + + builtwith.url = wrap_url_with_mock_func(stubr.path("/builtwith").as_str()); + + let result = builtwith.run(TEST_DOMAIN.to_string()).await; + + assert_eq!(builtwith.name().await, BUILTWITH_MODULE_NAME); + assert_eq!( + result, + [ + TEST_BAR_SUBDOMAIN.to_string(), + TEST_BAZ_SUBDOMAIN.to_string(), + ] + .into() + ); + + env::remove_var(env_name); +} + +#[tokio::test] +async fn get_query_url_test() { + let params = &[ + ("HIDETEXT", "yes"), + ("HIDEDL", "yes"), + ("NOLIVE", "yes"), + ("NOMETA", "yes"), + ("NOPII", "yes"), + ("NOATTR", "yes"), + ]; + + let expected = Url::parse_with_params(BUILTWITH_URL, params).unwrap(); + let url = builtwith::Builtwith::get_query_url(TEST_DOMAIN); + + assert_eq!(url, format!("{expected}&LOOKUP={TEST_DOMAIN}")); +} + +#[tokio::test] +async fn extract_test() { + let json = read_stub("module/integrations/builtwith.json")["response"]["jsonBody"].clone(); + let extracted = builtwith::Builtwith::extract(json, TEST_DOMAIN.to_string()); + let not_extracted = builtwith::Builtwith::extract(Value::Null, TEST_DOMAIN.to_string()); + + assert_eq!( + extracted, + [ + TEST_BAR_SUBDOMAIN.to_string(), + TEST_BAZ_SUBDOMAIN.to_string() + ] + .into() + ); + assert_eq!(not_extracted, BTreeSet::new()); +} diff --git a/tests/stubs/module/integrations/builtwith.json b/tests/stubs/module/integrations/builtwith.json new file mode 100644 index 00000000..fa093e4d --- /dev/null +++ b/tests/stubs/module/integrations/builtwith.json @@ -0,0 +1,39 @@ +{ + "request": { + "method": "GET", + "queryParameters": { + "KEY": { + "equalTo": "builtwith-api-key" + } + }, + "urlPath": "/builtwith" + }, + "response": { + "headers": { + "content-type": "application/json" + }, + "jsonBody": { + "Results": [ + { + "Result": { + "Paths": [ + { + "SubDomain": "bar" + } + ] + } + }, + { + "Result": { + "Paths": [ + { + "SubDomain": "baz" + } + ] + } + } + ] + }, + "status": 200 + } +} From ec34e43463d18855454f7bce31c286c4c10de65e Mon Sep 17 00:00:00 2001 From: Erdogan Yoksul Date: Sun, 29 Sep 2024 14:51:18 +0300 Subject: [PATCH 31/38] chore: update .env.template file --- .env.template | 1 + 1 file changed, 1 insertion(+) diff --git a/.env.template b/.env.template index 479d342f..b6c117f9 100644 --- a/.env.template +++ b/.env.template @@ -1,3 +1,4 @@ SUBSCAN_BEVIGIL_APIKEY=foo SUBSCAN_BINARYEDGE_APIKEY=bar SUBSCAN_BUFFEROVER_APIKEY=baz +SUBSCAN_BUILTWITH_APIKEY=foo From 4f4c30d746171020ad8562a08c1b15c653a67269 Mon Sep 17 00:00:00 2001 From: Erdogan Yoksul Date: Sun, 29 Sep 2024 17:23:44 +0300 Subject: [PATCH 32/38] feat: add new censys integration module --- src/bin/subscan.rs | 2 +- src/cache.rs | 3 +- src/modules/generics/api_integration.rs | 7 +-- src/modules/integrations/censys.rs | 68 +++++++++++++++++++++++++ src/modules/integrations/mod.rs | 3 ++ src/utils.rs | 37 ++++++++++++++ 6 files changed, 113 insertions(+), 7 deletions(-) create mode 100644 src/modules/integrations/censys.rs diff --git a/src/bin/subscan.rs b/src/bin/subscan.rs index 9d8fe8ad..18ab53c8 100644 --- a/src/bin/subscan.rs +++ b/src/bin/subscan.rs @@ -17,7 +17,7 @@ async fn main() { let mut module = item.lock().await; let requester = module.requester().await.unwrap(); - if module.name().await != "Builtwith" { + if module.name().await != "Censys" { continue; } diff --git a/src/cache.rs b/src/cache.rs index bc31186b..1a6db8eb 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -1,7 +1,7 @@ use crate::{ modules::{ engines::{bing, duckduckgo, google, yahoo}, - integrations::{alienvault, anubis, bevigil, binaryedge, builtwith}, + integrations::{alienvault, anubis, bevigil, binaryedge, builtwith, censys}, }, SubscanModule, }; @@ -24,6 +24,7 @@ lazy_static! { SubscanModule::new(bevigil::Bevigil::new()), SubscanModule::new(binaryedge::Binaryedge::new()), SubscanModule::new(builtwith::Builtwith::new()), + SubscanModule::new(censys::Censys::new()), ]; } diff --git a/src/modules/generics/api_integration.rs b/src/modules/generics/api_integration.rs index 6658a99e..27f2868f 100644 --- a/src/modules/generics/api_integration.rs +++ b/src/modules/generics/api_integration.rs @@ -5,6 +5,7 @@ use crate::{ requester::RequesterInterface, }, types::core::{GetNextUrlFunc, GetQueryUrlFunc}, + utils::http, }; use async_trait::async_trait; use reqwest::header::{HeaderName, HeaderValue}; @@ -51,11 +52,7 @@ impl GenericAPIIntegrationModule { } async fn set_apikey_param(&self, url: &mut Url, param: &str, apikey: &str) { - if let Some(query) = url.query() { - url.set_query(Some(&format!("{query}&{param}={apikey}"))); - } else { - url.set_query(Some(&format!("{param}={apikey}"))); - } + http::set_query_without_override(url, param, apikey); } async fn set_apikey_header(&self, name: &str, apikey: &str) { diff --git a/src/modules/integrations/censys.rs b/src/modules/integrations/censys.rs new file mode 100644 index 00000000..a94f9b7b --- /dev/null +++ b/src/modules/integrations/censys.rs @@ -0,0 +1,68 @@ +use crate::{ + enums::{APIAuthMethod, RequesterDispatcher}, + extractors::json::JSONExtractor, + modules::generics::api_integration::GenericAPIIntegrationModule, + requesters::client::HTTPClient, + types::core::Subdomain, + utils::http, +}; +use reqwest::Url; +use serde_json::Value; +use std::collections::BTreeSet; + +/// Censys API integration module +/// +/// It uses [`GenericAPIIntegrationModule`] its own inner +/// here are the configurations +pub struct Censys {} + +pub const CENSYS_MODULE_NAME: &str = "Censys"; +pub const CENSYS_URL: &str = "https://search.censys.io/api/v2/certificates/search"; + +impl Censys { + #[allow(clippy::new_ret_no_self)] + pub fn new() -> GenericAPIIntegrationModule { + let requester: RequesterDispatcher = HTTPClient::default().into(); + let extractor: JSONExtractor = JSONExtractor::new(Box::new(Self::extract)); + + GenericAPIIntegrationModule { + name: CENSYS_MODULE_NAME.into(), + url: Box::new(Self::get_query_url), + next: Box::new(Self::get_next_url), + auth: APIAuthMethod::APIKeyAsHeader("Authorization".into()), + requester: requester.into(), + extractor: extractor.into(), + } + } + + pub fn get_query_url(domain: &str) -> String { + format!("{CENSYS_URL}?q={domain}") + } + + pub fn get_next_url(mut url: Url, content: Value) -> Option { + if let Some(cursor) = content["result"]["links"]["next"].as_str() { + http::set_query_without_override(&mut url, "cursor", cursor); + Some(url) + } else { + None + } + } + + pub fn extract(content: Value, _domain: String) -> BTreeSet { + if let Some(hits) = content["result"]["hits"].as_array() { + let mut subs = BTreeSet::new(); + + for result in hits { + if let Some(names) = result["names"].as_array() { + let to_string = |item: &Value| Some(item.as_str()?.to_string()); + let names = names.iter().filter_map(to_string); + + subs.extend(names); + } + } + subs + } else { + BTreeSet::new() + } + } +} diff --git a/src/modules/integrations/mod.rs b/src/modules/integrations/mod.rs index 332f3d0d..086d5748 100644 --- a/src/modules/integrations/mod.rs +++ b/src/modules/integrations/mod.rs @@ -10,3 +10,6 @@ pub mod binaryedge; pub mod bufferover; /// Builtwith API integration mmodule, API key required pub mod builtwith; +/// Censys API integration, basic HTTP auth required but `Authorization` +/// header can be used (e.g. `Authorization: Basic foo`) +pub mod censys; diff --git a/src/utils.rs b/src/utils.rs index cddbf9f2..1dbb8848 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -69,3 +69,40 @@ pub mod env { (var_name.clone(), dotenvy::var(var_name)) } } + +pub mod http { + + use reqwest::Url; + + /// Set query param without override olds. If the given param + /// name already exists it will be updated + /// + /// # Examples + /// + /// ``` + /// use subscan::utils::http::set_query_without_override; + /// use reqwest::Url; + /// + /// let mut url: Url = "https://foo.com".parse().unwrap(); + /// + /// set_query_without_override(&mut url, "a".into(), "b".into()); + /// assert_eq!(url.to_string(), "https://foo.com/?a=b"); + /// + /// set_query_without_override(&mut url, "x".into(), "y".into()); + /// assert_eq!(url.to_string(), "https://foo.com/?a=b&x=y"); + /// + /// set_query_without_override(&mut url, "a".into(), "c".into()); + /// assert_eq!(url.to_string(), "https://foo.com/?x=y&a=c"); + /// ``` + pub fn set_query_without_override(url: &mut Url, name: &str, value: &str) { + let binding = url.clone(); + let pairs = binding.query_pairs(); + let filtered = pairs.filter(|item| item.0.to_lowercase() != name.to_lowercase()); + + url.query_pairs_mut() + .clear() + .extend_pairs(filtered) + .append_pair(name, value) + .finish(); + } +} From c127f3afde964cc142eac1496a16be7e5b25c9ad Mon Sep 17 00:00:00 2001 From: Erdogan Yoksul Date: Sun, 29 Sep 2024 17:24:40 +0300 Subject: [PATCH 33/38] chore: update .env.template file --- .env.template | 1 + 1 file changed, 1 insertion(+) diff --git a/.env.template b/.env.template index b6c117f9..de712820 100644 --- a/.env.template +++ b/.env.template @@ -2,3 +2,4 @@ SUBSCAN_BEVIGIL_APIKEY=foo SUBSCAN_BINARYEDGE_APIKEY=bar SUBSCAN_BUFFEROVER_APIKEY=baz SUBSCAN_BUILTWITH_APIKEY=foo +SUBSCAN_CENSYS_APIKEY=bar From d6a9cb5744f6d5f71756d83cb3ff7d35d4a8bb1a Mon Sep 17 00:00:00 2001 From: Erdogan Yoksul Date: Sun, 29 Sep 2024 17:27:49 +0300 Subject: [PATCH 34/38] chore: test uppercase --- src/utils.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/utils.rs b/src/utils.rs index 1dbb8848..1508d919 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -52,13 +52,13 @@ pub mod env { /// /// env::remove_var(env_key); /// - /// assert_eq!(get_subscan_module_apikey("FOO").0, env_key); - /// assert_eq!(get_subscan_module_apikey("FOO").1.is_ok(), false); + /// assert_eq!(get_subscan_module_apikey("foo").0, env_key); + /// assert_eq!(get_subscan_module_apikey("foo").1.is_ok(), false); /// /// env::set_var(env_key, "foo"); /// - /// assert_eq!(get_subscan_module_apikey("FOO").0, env_key); - /// assert_eq!(get_subscan_module_apikey("FOO").1.unwrap(), "foo"); + /// assert_eq!(get_subscan_module_apikey("foo").0, env_key); + /// assert_eq!(get_subscan_module_apikey("foo").1.unwrap(), "foo"); /// /// env::remove_var(env_key); /// } From 52925ef28749006fb85d48625631da33c1208104 Mon Sep 17 00:00:00 2001 From: Erdogan Yoksul Date: Mon, 30 Sep 2024 09:49:06 +0300 Subject: [PATCH 35/38] chore: docstring updates --- src/utils.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/utils.rs b/src/utils.rs index 1508d919..c9adfb1d 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -32,6 +32,7 @@ pub mod regex { } } +/// Utilities about project environments pub mod env { use crate::config::SUBSCAN_ENV_NAMESPACE; use crate::types::core::APIKeyAsEnv; @@ -70,8 +71,8 @@ pub mod env { } } +/// Helpful HTTP utilities pub mod http { - use reqwest::Url; /// Set query param without override olds. If the given param @@ -88,6 +89,7 @@ pub mod http { /// set_query_without_override(&mut url, "a".into(), "b".into()); /// assert_eq!(url.to_string(), "https://foo.com/?a=b"); /// + /// // does not override old `a` parameter /// set_query_without_override(&mut url, "x".into(), "y".into()); /// assert_eq!(url.to_string(), "https://foo.com/?a=b&x=y"); /// From 8b096caa6e6ce1d7f965036f5f248a39507701ad Mon Sep 17 00:00:00 2001 From: Erdogan Yoksul Date: Mon, 30 Sep 2024 16:08:56 +0300 Subject: [PATCH 36/38] test: add censys module tests --- src/modules/generics/api_integration.rs | 2 +- src/modules/integrations/bufferover.rs | 3 +- src/modules/integrations/censys.rs | 18 +++++--- src/utils.rs | 15 +++---- tests/modules/integrations/censys_test.rs | 46 +++++++++++++++++++++ tests/stubs/module/integrations/censys.json | 34 +++++++++++++++ 6 files changed, 102 insertions(+), 16 deletions(-) create mode 100644 tests/modules/integrations/censys_test.rs create mode 100644 tests/stubs/module/integrations/censys.json diff --git a/src/modules/generics/api_integration.rs b/src/modules/generics/api_integration.rs index 27f2868f..ed970333 100644 --- a/src/modules/generics/api_integration.rs +++ b/src/modules/generics/api_integration.rs @@ -52,7 +52,7 @@ impl GenericAPIIntegrationModule { } async fn set_apikey_param(&self, url: &mut Url, param: &str, apikey: &str) { - http::set_query_without_override(url, param, apikey); + http::update_url_query(url, param, apikey); } async fn set_apikey_header(&self, name: &str, apikey: &str) { diff --git a/src/modules/integrations/bufferover.rs b/src/modules/integrations/bufferover.rs index f4c1b08f..7bcc7389 100644 --- a/src/modules/integrations/bufferover.rs +++ b/src/modules/integrations/bufferover.rs @@ -44,10 +44,9 @@ impl Bufferover { if let Some(subs) = content["Results"].as_array() { let filter = |item: &Value| { - let line = item.as_str()?.to_string(); let to_string = |matches: Match| matches.as_str().to_string(); - pattern.find(&line).map(to_string) + pattern.find(item.as_str()?).map(to_string) }; subs.iter().filter_map(filter).collect() diff --git a/src/modules/integrations/censys.rs b/src/modules/integrations/censys.rs index a94f9b7b..2a14ef0a 100644 --- a/src/modules/integrations/censys.rs +++ b/src/modules/integrations/censys.rs @@ -4,8 +4,9 @@ use crate::{ modules::generics::api_integration::GenericAPIIntegrationModule, requesters::client::HTTPClient, types::core::Subdomain, - utils::http, + utils::{http, regex::generate_subdomain_regex}, }; +use regex::Match; use reqwest::Url; use serde_json::Value; use std::collections::BTreeSet; @@ -41,23 +42,28 @@ impl Censys { pub fn get_next_url(mut url: Url, content: Value) -> Option { if let Some(cursor) = content["result"]["links"]["next"].as_str() { - http::set_query_without_override(&mut url, "cursor", cursor); + http::update_url_query(&mut url, "cursor", cursor); Some(url) } else { None } } - pub fn extract(content: Value, _domain: String) -> BTreeSet { + pub fn extract(content: Value, domain: String) -> BTreeSet { + let pattern = generate_subdomain_regex(domain).unwrap(); + if let Some(hits) = content["result"]["hits"].as_array() { let mut subs = BTreeSet::new(); for result in hits { if let Some(names) = result["names"].as_array() { - let to_string = |item: &Value| Some(item.as_str()?.to_string()); - let names = names.iter().filter_map(to_string); + let matches = |item: &Value| { + let to_string = |matched: Match| matched.as_str().to_string(); + + pattern.find(item.as_str()?).map(to_string) + }; - subs.extend(names); + subs.extend(names.iter().filter_map(matches)); } } subs diff --git a/src/utils.rs b/src/utils.rs index c9adfb1d..745ff5ca 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -75,28 +75,29 @@ pub mod env { pub mod http { use reqwest::Url; - /// Set query param without override olds. If the given param - /// name already exists it will be updated + /// Update query params without remove old query params. If the + /// given parameter name non-exists it will append end of the + /// query otherwise it's value will be updated /// /// # Examples /// /// ``` - /// use subscan::utils::http::set_query_without_override; + /// use subscan::utils::http::update_url_query; /// use reqwest::Url; /// /// let mut url: Url = "https://foo.com".parse().unwrap(); /// - /// set_query_without_override(&mut url, "a".into(), "b".into()); + /// update_url_query(&mut url, "a".into(), "b".into()); /// assert_eq!(url.to_string(), "https://foo.com/?a=b"); /// /// // does not override old `a` parameter - /// set_query_without_override(&mut url, "x".into(), "y".into()); + /// update_url_query(&mut url, "x".into(), "y".into()); /// assert_eq!(url.to_string(), "https://foo.com/?a=b&x=y"); /// - /// set_query_without_override(&mut url, "a".into(), "c".into()); + /// update_url_query(&mut url, "a".into(), "c".into()); /// assert_eq!(url.to_string(), "https://foo.com/?x=y&a=c"); /// ``` - pub fn set_query_without_override(url: &mut Url, name: &str, value: &str) { + pub fn update_url_query(url: &mut Url, name: &str, value: &str) { let binding = url.clone(); let pairs = binding.query_pairs(); let filtered = pairs.filter(|item| item.0.to_lowercase() != name.to_lowercase()); diff --git a/tests/modules/integrations/censys_test.rs b/tests/modules/integrations/censys_test.rs new file mode 100644 index 00000000..78bbc14c --- /dev/null +++ b/tests/modules/integrations/censys_test.rs @@ -0,0 +1,46 @@ +use crate::common::{ + constants::{TEST_BAR_SUBDOMAIN, TEST_DOMAIN}, + funcs::read_stub, + mocks::wrap_url_with_mock_func, +}; +use serde_json::{self, Value}; +use std::{collections::BTreeSet, env}; +use subscan::{ + interfaces::module::SubscanModuleInterface, + modules::integrations::censys::{self, CENSYS_MODULE_NAME, CENSYS_URL}, +}; + +#[tokio::test] +#[stubr::mock("module/integrations/censys.json")] +async fn censys_run_test() { + let mut censys = censys::Censys::new(); + let (env_name, _) = censys.fetch_apikey().await; + + env::set_var(&env_name, "censys-api-key"); + + censys.url = wrap_url_with_mock_func(stubr.path("/censys").as_str()); + + let result = censys.run(TEST_DOMAIN.to_string()).await; + + assert_eq!(censys.name().await, CENSYS_MODULE_NAME); + assert_eq!(result, [TEST_BAR_SUBDOMAIN.to_string()].into()); + + env::remove_var(env_name); +} + +#[tokio::test] +async fn get_query_url_test() { + let url = censys::Censys::get_query_url(TEST_DOMAIN); + + assert_eq!(url, format!("{CENSYS_URL}?q={TEST_DOMAIN}")); +} + +#[tokio::test] +async fn extract_test() { + let json = read_stub("module/integrations/censys.json")["response"]["jsonBody"].clone(); + let extracted = censys::Censys::extract(json, TEST_DOMAIN.to_string()); + let not_extracted = censys::Censys::extract(Value::Null, TEST_DOMAIN.to_string()); + + assert_eq!(extracted, [TEST_BAR_SUBDOMAIN.to_string()].into()); + assert_eq!(not_extracted, BTreeSet::new()); +} diff --git a/tests/stubs/module/integrations/censys.json b/tests/stubs/module/integrations/censys.json new file mode 100644 index 00000000..07f1c131 --- /dev/null +++ b/tests/stubs/module/integrations/censys.json @@ -0,0 +1,34 @@ +{ + "request": { + "headers": { + "Authorization": { + "equalTo": "censys-api-key" + } + }, + "method": "GET", + "urlPath": "/censys" + }, + "response": { + "headers": { + "content-type": "application/json" + }, + "jsonBody": { + "result": { + "hits": [ + { + "names": [ + "bar.foo.com", + "*.bar.foo.com" + ] + }, + { + "names": { + "foo": "bar" + } + } + ] + } + }, + "status": 200 + } +} From d53d273c242f342512b3d075beaeffdb302b0d4c Mon Sep 17 00:00:00 2001 From: Erdogan Yoksul Date: Mon, 30 Sep 2024 23:43:20 +0300 Subject: [PATCH 37/38] chore: rename stub file name --- tests/modules/generics/api_integration_test.rs | 2 +- ...-with-url-auth.json => api-integration-with-query-auth.json} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename tests/stubs/module/generics/{api-integration-with-url-auth.json => api-integration-with-query-auth.json} (100%) diff --git a/tests/modules/generics/api_integration_test.rs b/tests/modules/generics/api_integration_test.rs index a90ae3a6..f71e9324 100644 --- a/tests/modules/generics/api_integration_test.rs +++ b/tests/modules/generics/api_integration_test.rs @@ -43,7 +43,7 @@ async fn generic_api_integration_run_test_with_header_auth() { } #[tokio::test] -#[stubr::mock("module/generics/api-integration-with-url-auth.json")] +#[stubr::mock("module/generics/api-integration-with-query-auth.json")] async fn generic_api_integration_run_test_with_url_auth() { let auth = APIAuthMethod::APIKeyAsQueryParam("apikey".to_string()); let mut module = generic_api_integration(&stubr.path("/subdomains"), auth); diff --git a/tests/stubs/module/generics/api-integration-with-url-auth.json b/tests/stubs/module/generics/api-integration-with-query-auth.json similarity index 100% rename from tests/stubs/module/generics/api-integration-with-url-auth.json rename to tests/stubs/module/generics/api-integration-with-query-auth.json From 830a5ac1e0ad4637de74b240a2cf65639f665e05 Mon Sep 17 00:00:00 2001 From: Erdogan Yoksul Date: Mon, 30 Sep 2024 23:46:53 +0300 Subject: [PATCH 38/38] chore: add bufferover module into cache --- src/cache.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/cache.rs b/src/cache.rs index 1a6db8eb..fcaf47c5 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -1,7 +1,7 @@ use crate::{ modules::{ engines::{bing, duckduckgo, google, yahoo}, - integrations::{alienvault, anubis, bevigil, binaryedge, builtwith, censys}, + integrations::{alienvault, anubis, bevigil, binaryedge, bufferover, builtwith, censys}, }, SubscanModule, }; @@ -23,6 +23,7 @@ lazy_static! { SubscanModule::new(anubis::Anubis::new()), SubscanModule::new(bevigil::Bevigil::new()), SubscanModule::new(binaryedge::Binaryedge::new()), + SubscanModule::new(bufferover::Bufferover::new()), SubscanModule::new(builtwith::Builtwith::new()), SubscanModule::new(censys::Censys::new()), ];