feat(fetch): impl fetch feature variants

EstebanBorai · Aug 11, 2021 · 7a74d0e · 7a74d0e
1 parent 555a2c4
commit 7a74d0e
Show file tree

Hide file tree

Showing 7 changed files with 132 additions and 16 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -27,3 +27,4 @@ jobs:
         uses: actions-rs/cargo@v1
         with:
           command: test
+          args: --all-features
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,14 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.0.2] - 2021-08-10
+### Added
+- `fetch` feature
+  - `fetch` function to retrieve HTML
+  - `fetch_partially` to fetch first 10 chunks (of arbitrary size) from the URL
+  - `fetch_with_limit` same implementation for `fetch_partially` with custom
+    limit of chunks
+
 ## [0.0.1] - 2021-07-31
 ### Added
 - `LinkPreview` struct implementation

diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "link-preview"
-version = "0.0.1"
+version = "0.0.2"
 authors = ["Esteban Borai <[email protected]>"]
 edition = "2018"
 description = "Retrieve website metadata such as title, description, preview image, author and more from OpenGraph, Google, Schema.org and Twitter compliant sites"
@@ -21,4 +21,5 @@ url = "2.2.2"
 tokio = { version = "1.9.0", features = ["rt", "macros"] }
 
 [features]
+# Provide fetch capabilities
 fetch = ["reqwest"]
diff --git a/src/fetch.rs b/src/fetch.rs
@@ -0,0 +1,108 @@
+use reqwest::get;
+use scraper::Html;
+use std::string::FromUtf8Error;
+use thiserror::Error;
+
+#[derive(Error, Debug)]
+pub enum Error {
+    #[error("Failed to fetch {0}. An error ocurred: {1}")]
+    FetchFailed(String, reqwest::Error),
+    #[error("Failed to parse response from {0}. An error ocurred: {1}")]
+    ParseError(String, reqwest::Error),
+    #[error("Failed to stream response chunks {0}. An error ocurred: {1}")]
+    StreamError(String, reqwest::Error),
+    #[error("Failed to parse bytes into UTF-8 while streaming response from {0}")]
+    InvalidUtf8(String, FromUtf8Error),
+}
+
+/// Fetches the provided URL and retrieves an instance of `LinkPreview`
+pub async fn fetch(url: &str) -> Result<Html, Error> {
+    let resp = get(url)
+        .await
+        .map_err(|err| Error::FetchFailed(url.to_string(), err))?;
+    let html = resp
+        .text()
+        .await
+        .map_err(|err| Error::ParseError(url.to_string(), err))?;
+
+    Ok(Html::parse_document(&html))
+}
+
+/// Fetches the provided URL and retrieves an instance of `LinkPreview`
+pub async fn fetch_partially(url: &str) -> Result<Html, Error> {
+    fetch_with_limit(url, 10).await
+}
+
+/// Fetches the provided URL and retrieves an instance of `LinkPreview`
+pub async fn fetch_with_limit(url: &str, limit: usize) -> Result<Html, Error> {
+    let mut laps = 0_usize;
+    let mut resp = get(url)
+        .await
+        .map_err(|err| Error::FetchFailed(url.to_string(), err))?;
+    let mut bytes: Vec<u8> = Vec::new();
+
+    while let Some(chunk) = resp
+        .chunk()
+        .await
+        .map_err(|err| Error::StreamError(url.to_string(), err))?
+    {
+        if laps >= limit {
+            break;
+        }
+
+        let ref mut chunk = chunk.to_vec();
+
+        bytes.append(chunk);
+        laps += 1;
+    }
+
+    let html = String::from_utf8(bytes).map_err(|err| Error::InvalidUtf8(url.to_string(), err))?;
+
+    Ok(Html::parse_document(&html))
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::tests::REMOTE_FULL_FEATURED_HTML;
+    use crate::LinkPreview;
+
+    use super::{fetch, fetch_partially, fetch_with_limit};
+
+    #[tokio::test]
+    async fn fetches() {
+        let html = fetch(REMOTE_FULL_FEATURED_HTML).await.unwrap();
+        let link_preview = LinkPreview::from(&html);
+
+        assert_eq!(
+            link_preview.title.unwrap_or(String::default()),
+            "SEO Strategies for a better web"
+        );
+        assert_eq!(link_preview.description.unwrap_or(String::default()), "John Appleseed tells you his secrets on SEO for a better web experience by taking advantage of OpenGraph\'s Tags!");
+    }
+
+    #[tokio::test]
+    async fn fetches_page_partially() {
+        let html = fetch_partially(REMOTE_FULL_FEATURED_HTML).await.unwrap();
+        let link_preview = LinkPreview::from(&html);
+
+        assert_eq!(
+            link_preview.title.unwrap_or(String::default()),
+            "SEO Strategies for a better web"
+        );
+        assert_eq!(link_preview.description.unwrap_or(String::default()), "John Appleseed tells you his secrets on SEO for a better web experience by taking advantage of OpenGraph\'s Tags!");
+    }
+
+    #[tokio::test]
+    async fn fetches_page_with_limit_of_20() {
+        let html = fetch_with_limit(REMOTE_FULL_FEATURED_HTML, 20)
+            .await
+            .unwrap();
+        let link_preview = LinkPreview::from(&html);
+
+        assert_eq!(
+            link_preview.title.unwrap_or(String::default()),
+            "SEO Strategies for a better web"
+        );
+        assert_eq!(link_preview.description.unwrap_or(String::default()), "John Appleseed tells you his secrets on SEO for a better web experience by taking advantage of OpenGraph\'s Tags!");
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
@@ -1,3 +1,6 @@
+#[cfg(feature = "fetch")]
+pub mod fetch;
+
 pub mod html;
 pub mod og;
 pub mod preview;
@@ -12,4 +15,8 @@ mod tests {
     pub const OG_COMPILANT_HTML: &[u8] = include_bytes!("../html/og_compilant.html");
     pub const SCHEMA_COMPILANT_HTML: &[u8] = include_bytes!("../html/schema_compilant.html");
     pub const TWITTER_COMPILANT_HTML: &[u8] = include_bytes!("../html/twitter_compilant.html");
+
+    #[cfg(feature = "fetch")]
+    pub const REMOTE_FULL_FEATURED_HTML: &str =
+        "https://raw.githubusercontent.com/EstebanBorai/link-preview/main/html/full_featured.html";
 }
diff --git a/src/preview.rs b/src/preview.rs
@@ -1,6 +1,3 @@
-#[cfg(feature = "fetch")]
-use reqwest::get;
-
 use scraper::Html;
 use std::str::FromStr;
 use std::string::FromUtf8Error;
@@ -16,6 +13,9 @@ use crate::twitter::{find_twitter_tag, TwitterMetaTag};
 pub enum Error {
     #[error("The provided byte slice contains invalid UTF-8 characters")]
     InvalidUtf8(FromUtf8Error),
+    #[cfg(feature = "fetch")]
+    #[error("Failed to fetch {0}. An error ocurred: {1}")]
+    FailedToFetch(String, reqwest::Error),
 }
 
 #[derive(Debug)]
@@ -27,15 +27,6 @@ pub struct LinkPreview {
 }
 
 impl LinkPreview {
-    /// Fetches the provided URL and retrieves an instance of `LinkPreview`
-    #[cfg(feature = "fetch")]
-    pub async fn fetch(url: &str) -> Self {
-        let resp = get(url).await.unwrap();
-        let html = resp.text().await;
-
-        LinkPreview::from_str(html)
-    }
-
     /// Attempts to find the description of the page in the following order:
     ///
     /// - Document's `<link rel="canonical" /> element's `href` attribute