Merge pull request #28 from eredotpkfr/integrations

feat: generic api integration module
eredotpkfr · Oct 1, 2024 · 1026c9b · 1026c9b
2 parents 2abdc57 + 674d4e4
commit 1026c9b
Show file tree

Hide file tree

Showing 72 changed files with 1,807 additions and 298 deletions.
diff --git a/.env.template b/.env.template
@@ -0,0 +1,5 @@
+SUBSCAN_BEVIGIL_APIKEY=foo
+SUBSCAN_BINARYEDGE_APIKEY=bar
+SUBSCAN_BUFFEROVER_APIKEY=baz
+SUBSCAN_BUILTWITH_APIKEY=foo
+SUBSCAN_CENSYS_APIKEY=bar
diff --git a/.github/workflows/rust-cargo-linters.yml → .github/workflows/rust-linters.yml b/.github/workflows/rust-cargo-linters.yml → .github/workflows/rust-linters.yml
diff --git a/.github/workflows/rust-cargo-security.yml → .github/workflows/rust-security.yml b/.github/workflows/rust-cargo-security.yml → .github/workflows/rust-security.yml
diff --git a/.github/workflows/rust-cargo-test.yml → .github/workflows/rust-test.yml b/.github/workflows/rust-cargo-test.yml → .github/workflows/rust-test.yml
diff --git a/.gitignore b/.gitignore
@@ -15,5 +15,7 @@ Cargo.lock
 
 
 # Added by cargo
-
 /target
+
+# Ignore local .env file
+.env
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "subscan"
-version = "0.1.0"
+version = "0.1.1"
 edition = "2021"
 description = "Next generation subdomain enumeration tool"
 readme = "README.md"
@@ -18,7 +18,9 @@ serde_json = "1.0.128"
 async-trait = "0.1.82"
 lazy_static = "1.5.0"
 enum_dispatch = "0.3.13"
+dotenvy = "0.15.7"
 
 [dev-dependencies]
+md5 = "0.7.0"
 automod = "1.0.14"
 stubr = "0.6.2"
diff --git a/src/bin/subscan.rs b/src/bin/subscan.rs
@@ -17,16 +17,16 @@ async fn main() {
         let mut module = item.lock().await;
         let requester = module.requester().await.unwrap();
 
+        if module.name().await != "Censys" {
+            continue;
+        }
+
         println!(
             "{:#?} {:p}",
             requester.lock().await.config().await,
-            requester
+            requester,
         );
 
-        if module.name().await != "DuckDuckGo" {
-            continue;
-        }
-
         println!("Running...{}({})", module.name().await, cli.domain.clone());
 
         let res = module.run(cli.domain.clone()).await;

diff --git a/src/cache.rs b/src/cache.rs
@@ -1,5 +1,8 @@
 use crate::{
-    modules::engines::{bing, duckduckgo, google, yahoo},
+    modules::{
+        engines::{bing, duckduckgo, google, yahoo},
+        integrations::{alienvault, anubis, bevigil, binaryedge, bufferover, builtwith, censys},
+    },
     SubscanModule,
 };
 use lazy_static::lazy_static;
@@ -10,10 +13,19 @@ lazy_static! {
     /// as a [`SubscanModule`], all modules must be compatible
     /// with [`SubscanModuleInterface`](crate::interfaces::module::SubscanModuleInterface) trait
     pub static ref ALL_MODULES: Vec<Mutex<SubscanModule>> = vec![
+        // Search engines
         SubscanModule::new(google::Google::new()),
         SubscanModule::new(yahoo::Yahoo::new()),
         SubscanModule::new(bing::Bing::new()),
         SubscanModule::new(duckduckgo::DuckDuckGo::new()),
+        // API integrations
+        SubscanModule::new(alienvault::AlienVault::new()),
+        SubscanModule::new(anubis::Anubis::new()),
+        SubscanModule::new(bevigil::Bevigil::new()),
+        SubscanModule::new(binaryedge::Binaryedge::new()),
+        SubscanModule::new(bufferover::Bufferover::new()),
+        SubscanModule::new(builtwith::Builtwith::new()),
+        SubscanModule::new(censys::Censys::new()),
     ];
 }
 

diff --git a/src/config.rs b/src/config.rs
@@ -0,0 +1 @@
+pub const SUBSCAN_ENV_NAMESPACE: &str = "SUBSCAN";
diff --git a/src/enums.rs b/src/enums.rs
@@ -1,4 +1,4 @@
-use crate::extractors::{html::HTMLExtractor, regex::RegexExtractor};
+use crate::extractors::{html::HTMLExtractor, json::JSONExtractor, regex::RegexExtractor};
 use crate::requesters::{chrome::ChromeBrowser, client::HTTPClient};
 use enum_dispatch::enum_dispatch;
 
@@ -9,8 +9,25 @@ use enum_dispatch::enum_dispatch;
 /// technical details please follow up `enum_dispatch` package
 #[enum_dispatch(SubdomainExtractorInterface)]
 pub enum SubdomainExtractorDispatcher {
+    /// HTML extractor type to extract subdomain addresses
+    /// from any HTML content. See the [`HTMLExtractor`]
+    /// struct definition for examples and technical details
     HTMLExtractor(HTMLExtractor),
+    /// Regex extractor type allows to extract subdomain
+    /// addresses from string content with a regex pattern
+    /// by given domain address. See the [`RegexExtractor`]
+    /// for technical details and examples usages
     RegexExtractor(RegexExtractor),
+    /// JSON extractor type can extract subdomains from
+    /// JSON content. In this type head up point
+    /// is to know that created as a wrapper
+    /// struct to be compatible with
+    /// [`SubdomainExtractorInterface`](crate::interfaces::extractor::SubdomainExtractorInterface)
+    /// so parser method must be implemented and gave
+    /// this wrapper struct. See the [`JSONExtractor`] struct
+    /// and [`InnerExtractMethod`](crate::types::core::InnerExtractMethod)
+    /// type for examples and technical details
+    JSONExtractor(JSONExtractor),
 }
 
 /// Dispatcher enumeration to decide requester types
@@ -22,12 +39,55 @@ pub enum SubdomainExtractorDispatcher {
 pub enum RequesterDispatcher {
     /// Chrome browser struct definition as a enum value.
     /// On this requester type, Chrome browser will run and
-    /// all HTTP requests made with browser. Has pros according
+    /// all HTTP requests made with browser. It has pros according
     /// to [`HTTPClient`] requester like running Js, rendering
-    /// pages, etc.
+    /// pages, etc. See the [`ChromeBrowser`] definition to learn
+    /// usage
     ChromeBrowser(ChromeBrowser),
     /// Simple HTTP client interface to make requesters, it does
     /// not allows to run Js, rendering pages or user interface.
-    /// Just send HTTP requests via [`reqwest`]
+    /// Just send HTTP requests via [`reqwest`]. See the [`HTTPClient`]
+    /// struct definition for examples and technical details
     HTTPClient(HTTPClient),
 }
+
+/// Authentication methods for API calls.
+/// [`GenericAPIIntegrationModule`](crate::modules::generics::api_integration::GenericAPIIntegrationModule)
+/// uses them to apply correct auth method. See the
+/// method descriptions to learn how it works
+#[derive(PartialEq)]
+pub enum APIAuthMethod {
+    /// Some APIs uses request headers to get
+    /// API key. If this auth type selected API key
+    /// will add in request headers with a given header key
+    APIKeyAsHeader(String),
+    /// This auth type uses when API require API key
+    /// as a query param. If this method chose API key
+    /// will be added in URL as a query param with given
+    /// parameter key
+    APIKeyAsQueryParam(String),
+    /// This auth method checks if the query URL includes
+    /// API key's self
+    APIKeyAsURLSlug,
+    /// This auth type does nothing for auth
+    NoAuth,
+}
+
+impl APIAuthMethod {
+    /// Checks the any auth method selector or not
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use subscan::enums::APIAuthMethod;
+    ///
+    /// let as_header = APIAuthMethod::APIKeyAsHeader("X-API-Key".to_string());
+    /// let no_auth = APIAuthMethod::NoAuth;
+    ///
+    /// assert!(as_header.is_set());
+    /// assert!(!no_auth.is_set());
+    /// ```
+    pub fn is_set(&self) -> bool {
+        self != &Self::NoAuth
+    }
+}
diff --git a/src/extractors/html.rs b/src/extractors/html.rs
@@ -6,8 +6,6 @@ use async_trait::async_trait;
 use scraper::{ElementRef, Html, Selector};
 use std::collections::BTreeSet;
 
-/// HTML extractor component to extract subdomain addresses
-///
 /// This object compatible with [`SubdomainExtractorInterface`]
 /// and it uses `extract` method to extract subdomain addresses
 /// from inner text by given `XPath` or `CSS` selector
@@ -57,7 +55,6 @@ impl SubdomainExtractorInterface for HTMLExtractor {
     /// use subscan::extractors::html::HTMLExtractor;
     /// use subscan::interfaces::extractor::SubdomainExtractorInterface;
     /// use subscan::types::core::Subdomain;
-    /// use std::collections::BTreeSet;
     ///
     /// #[tokio::main]
     /// async fn main() {
@@ -69,7 +66,7 @@ impl SubdomainExtractorInterface for HTMLExtractor {
     ///
     ///     let result = extractor.extract(html, domain).await;
     ///
-    ///     assert_eq!(result, BTreeSet::from([String::from("bar.foo.com")]));
+    ///     assert_eq!(result, [Subdomain::from("bar.foo.com")].into());
     /// }
     /// ```
     async fn extract(&self, content: String, domain: String) -> BTreeSet<Subdomain> {

diff --git a/src/extractors/json.rs b/src/extractors/json.rs
@@ -0,0 +1,73 @@
+use crate::interfaces::extractor::SubdomainExtractorInterface;
+use crate::types::core::{InnerExtractMethod, Subdomain};
+use async_trait::async_trait;
+use serde_json;
+use std::collections::BTreeSet;
+
+/// JSON content parser wrapper struct. This object compatible
+/// with [`SubdomainExtractorInterface`] and it uses `extract`
+/// method to extract subdomain addresses from JSON content.
+/// JSON parsing function must be given for this extractor. Please
+/// follow up examples to learn usage techniques
+pub struct JSONExtractor {
+    inner: InnerExtractMethod,
+}
+
+impl JSONExtractor {
+    /// Creates a new [`JSONExtractor`] instance
+    ///
+    /// # Examples
+    ///
+    /// ```no_run
+    /// use subscan::extractors::json::JSONExtractor;
+    /// use std::collections::BTreeSet;
+    /// use serde_json::Value;
+    ///
+    /// let inner = |_content: Value, _domain: String| {
+    ///     BTreeSet::new()
+    /// };
+    ///
+    /// let extractor = JSONExtractor::new(Box::new(inner));
+    ///
+    /// // do something with extractor instance
+    /// ```
+    pub fn new(inner: InnerExtractMethod) -> Self {
+        Self { inner }
+    }
+}
+
+#[async_trait]
+impl SubdomainExtractorInterface for JSONExtractor {
+    /// Main extraction method to extract subdomains from
+    /// given JSON content
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use subscan::extractors::json::JSONExtractor;
+    /// use subscan::interfaces::extractor::SubdomainExtractorInterface;
+    /// use subscan::types::core::Subdomain;
+    /// use std::collections::BTreeSet;
+    /// use serde_json::Value;
+    ///
+    /// #[tokio::main]
+    /// async fn main() {
+    ///     let json = "{\"foo\": \"bar\"}".to_string();
+    ///     let domain = "foo.com".to_string();
+    ///
+    ///     let func = |item: Value, _domain: String| {
+    ///         [
+    ///             Subdomain::from(item["foo"].as_str().unwrap())
+    ///         ].into()
+    ///     };
+    ///     let extractor = JSONExtractor::new(Box::new(func));
+    ///
+    ///     let result = extractor.extract(json, domain).await;
+    ///
+    ///     assert_eq!(result, [Subdomain::from("bar")].into());
+    /// }
+    /// ```
+    async fn extract(&self, content: String, domain: String) -> BTreeSet<Subdomain> {
+        (self.inner)(serde_json::from_str(&content).unwrap_or_default(), domain)
+    }
+}
diff --git a/src/extractors/mod.rs b/src/extractors/mod.rs
@@ -1,4 +1,6 @@
 /// Subdomain extractor for HTML documents
 pub mod html;
+/// JSON extractor to extract subdomains from JSON content
+pub mod json;
 /// Extract subdomains with regex statement
 pub mod regex;
diff --git a/src/extractors/regex.rs b/src/extractors/regex.rs
@@ -6,10 +6,10 @@ use async_trait::async_trait;
 use regex::Match;
 use std::collections::BTreeSet;
 
-/// Regex extractor component
-///
-/// Generates subdomain pattern by given domain
-/// address and extracts subdomains via this pattern
+/// Regex extractor component generates subdomain pattern by
+/// given domain address and extracts subdomains via this pattern.
+/// Also this object compatible with [`SubdomainExtractorInterface`]
+/// and it uses `extract` method
 #[derive(Default)]
 pub struct RegexExtractor {}
 
@@ -36,10 +36,9 @@ impl RegexExtractor {
     /// ```
     pub fn extract_one(&self, content: String, domain: String) -> Option<Subdomain> {
         let pattern = generate_subdomain_regex(domain).unwrap();
+        let to_string = |matches: Match| matches.as_str().to_string();
 
-        pattern
-            .find(&content)
-            .map(|matches| matches.as_str().to_string())
+        pattern.find(&content).map(to_string)
     }
 }
 
@@ -67,10 +66,10 @@ impl SubdomainExtractorInterface for RegexExtractor {
     ///     let extractor = RegexExtractor::default();
     ///     let result = extractor.extract(content, domain).await;
     ///
-    ///     assert_eq!(result, BTreeSet::from([
+    ///     assert_eq!(result, [
     ///         Subdomain::from("bar.foo.com"),
     ///         Subdomain::from("baz.foo.com"),
-    ///     ]));
+    ///     ].into());
     ///     assert_eq!(result.len(), 2);
     /// }
     /// ```

diff --git a/src/integrations/alienvault.rs b/src/integrations/alienvault.rs
-Original file line number
+Diff line change
@@ Expand Up / @@ -15,5 +15,7 @@ Cargo.lock @@
     # Added by cargo
     /target
+    # Ignore local .env file
+    .env
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		pub const SUBSCAN_ENV_NAMESPACE: &str = "SUBSCAN";