Skip to content

Commit

Permalink
feat(fetch): impl fetch feature variants
Browse files Browse the repository at this point in the history
  • Loading branch information
EstebanBorai committed Aug 11, 2021
1 parent 555a2c4 commit 7a74d0e
Show file tree
Hide file tree
Showing 7 changed files with 132 additions and 16 deletions.
1 change: 1 addition & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,4 @@ jobs:
uses: actions-rs/cargo@v1
with:
command: test
args: --all-features
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.0.2] - 2021-08-10
### Added
- `fetch` feature
- `fetch` function to retrieve HTML
- `fetch_partially` to fetch first 10 chunks (of arbitrary size) from the URL
- `fetch_with_limit` same implementation for `fetch_partially` with custom
limit of chunks

## [0.0.1] - 2021-07-31
### Added
- `LinkPreview` struct implementation
Expand Down
6 changes: 3 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "link-preview"
version = "0.0.1"
version = "0.0.2"
authors = ["Esteban Borai <[email protected]>"]
edition = "2018"
description = "Retrieve website metadata such as title, description, preview image, author and more from OpenGraph, Google, Schema.org and Twitter compliant sites"
Expand All @@ -21,4 +21,5 @@ url = "2.2.2"
tokio = { version = "1.9.0", features = ["rt", "macros"] }

[features]
# Provide fetch capabilities
fetch = ["reqwest"]
108 changes: 108 additions & 0 deletions src/fetch.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
use reqwest::get;
use scraper::Html;
use std::string::FromUtf8Error;
use thiserror::Error;

#[derive(Error, Debug)]
pub enum Error {
#[error("Failed to fetch {0}. An error ocurred: {1}")]
FetchFailed(String, reqwest::Error),
#[error("Failed to parse response from {0}. An error ocurred: {1}")]
ParseError(String, reqwest::Error),
#[error("Failed to stream response chunks {0}. An error ocurred: {1}")]
StreamError(String, reqwest::Error),
#[error("Failed to parse bytes into UTF-8 while streaming response from {0}")]
InvalidUtf8(String, FromUtf8Error),
}

/// Fetches the provided URL and retrieves an instance of `LinkPreview`
pub async fn fetch(url: &str) -> Result<Html, Error> {
let resp = get(url)
.await
.map_err(|err| Error::FetchFailed(url.to_string(), err))?;
let html = resp
.text()
.await
.map_err(|err| Error::ParseError(url.to_string(), err))?;

Ok(Html::parse_document(&html))
}

/// Fetches the provided URL and retrieves an instance of `LinkPreview`
pub async fn fetch_partially(url: &str) -> Result<Html, Error> {
fetch_with_limit(url, 10).await
}

/// Fetches the provided URL and retrieves an instance of `LinkPreview`
pub async fn fetch_with_limit(url: &str, limit: usize) -> Result<Html, Error> {
let mut laps = 0_usize;
let mut resp = get(url)
.await
.map_err(|err| Error::FetchFailed(url.to_string(), err))?;
let mut bytes: Vec<u8> = Vec::new();

while let Some(chunk) = resp
.chunk()
.await
.map_err(|err| Error::StreamError(url.to_string(), err))?
{
if laps >= limit {
break;
}

let ref mut chunk = chunk.to_vec();

bytes.append(chunk);
laps += 1;
}

let html = String::from_utf8(bytes).map_err(|err| Error::InvalidUtf8(url.to_string(), err))?;

Ok(Html::parse_document(&html))
}

#[cfg(test)]
mod tests {
use crate::tests::REMOTE_FULL_FEATURED_HTML;
use crate::LinkPreview;

use super::{fetch, fetch_partially, fetch_with_limit};

#[tokio::test]
async fn fetches() {
let html = fetch(REMOTE_FULL_FEATURED_HTML).await.unwrap();
let link_preview = LinkPreview::from(&html);

assert_eq!(
link_preview.title.unwrap_or(String::default()),
"SEO Strategies for a better web"
);
assert_eq!(link_preview.description.unwrap_or(String::default()), "John Appleseed tells you his secrets on SEO for a better web experience by taking advantage of OpenGraph\'s Tags!");
}

#[tokio::test]
async fn fetches_page_partially() {
let html = fetch_partially(REMOTE_FULL_FEATURED_HTML).await.unwrap();
let link_preview = LinkPreview::from(&html);

assert_eq!(
link_preview.title.unwrap_or(String::default()),
"SEO Strategies for a better web"
);
assert_eq!(link_preview.description.unwrap_or(String::default()), "John Appleseed tells you his secrets on SEO for a better web experience by taking advantage of OpenGraph\'s Tags!");
}

#[tokio::test]
async fn fetches_page_with_limit_of_20() {
let html = fetch_with_limit(REMOTE_FULL_FEATURED_HTML, 20)
.await
.unwrap();
let link_preview = LinkPreview::from(&html);

assert_eq!(
link_preview.title.unwrap_or(String::default()),
"SEO Strategies for a better web"
);
assert_eq!(link_preview.description.unwrap_or(String::default()), "John Appleseed tells you his secrets on SEO for a better web experience by taking advantage of OpenGraph\'s Tags!");
}
}
7 changes: 7 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
#[cfg(feature = "fetch")]
pub mod fetch;

pub mod html;
pub mod og;
pub mod preview;
Expand All @@ -12,4 +15,8 @@ mod tests {
pub const OG_COMPILANT_HTML: &[u8] = include_bytes!("../html/og_compilant.html");
pub const SCHEMA_COMPILANT_HTML: &[u8] = include_bytes!("../html/schema_compilant.html");
pub const TWITTER_COMPILANT_HTML: &[u8] = include_bytes!("../html/twitter_compilant.html");

#[cfg(feature = "fetch")]
pub const REMOTE_FULL_FEATURED_HTML: &str =
"https://raw.githubusercontent.com/EstebanBorai/link-preview/main/html/full_featured.html";
}
15 changes: 3 additions & 12 deletions src/preview.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
#[cfg(feature = "fetch")]
use reqwest::get;

use scraper::Html;
use std::str::FromStr;
use std::string::FromUtf8Error;
Expand All @@ -16,6 +13,9 @@ use crate::twitter::{find_twitter_tag, TwitterMetaTag};
pub enum Error {
#[error("The provided byte slice contains invalid UTF-8 characters")]
InvalidUtf8(FromUtf8Error),
#[cfg(feature = "fetch")]
#[error("Failed to fetch {0}. An error ocurred: {1}")]
FailedToFetch(String, reqwest::Error),
}

#[derive(Debug)]
Expand All @@ -27,15 +27,6 @@ pub struct LinkPreview {
}

impl LinkPreview {
/// Fetches the provided URL and retrieves an instance of `LinkPreview`
#[cfg(feature = "fetch")]
pub async fn fetch(url: &str) -> Self {
let resp = get(url).await.unwrap();
let html = resp.text().await;

LinkPreview::from_str(html)
}

/// Attempts to find the description of the page in the following order:
///
/// - Document's `<link rel="canonical" /> element's `href` attribute
Expand Down

0 comments on commit 7a74d0e

Please sign in to comment.