From 7490a0e93b8a8d71469852a27b31a8cc8dc7044b Mon Sep 17 00:00:00 2001 From: Stephen Solka Date: Sat, 10 Apr 2021 11:09:45 -0400 Subject: [PATCH] oembed support --- Cargo.lock | 99 ++++++++++++++++++++++------------------- Cargo.toml | 4 +- manifest.json | 4 +- src/fetch.rs | 3 ++ src/lib.rs | 33 +++++--------- src/transform.rs | 51 +++++++++++++++++++++ src/transform/oembed.rs | 76 +++++++++++++++++++++++++++++++ 7 files changed, 198 insertions(+), 72 deletions(-) create mode 100644 src/transform.rs create mode 100644 src/transform/oembed.rs diff --git a/Cargo.lock b/Cargo.lock index 3ed07fc..b508437 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -163,9 +163,9 @@ dependencies = [ "log", "mac", "markup5ever 0.10.0", - "proc-macro2 1.0.24", + "proc-macro2 1.0.26", "quote 1.0.9", - "syn 1.0.64", + "syn 1.0.69", ] [[package]] @@ -207,9 +207,9 @@ checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" [[package]] name = "js-sys" -version = "0.3.49" +version = "0.3.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc15e39392125075f60c95ba416f5381ff6c3a948ff02ab12464715adf56c821" +checksum = "2d99f9e3e84b8f67f846ef5b4cbbc3b1c29f6c759fcbce6f01aa0e73d932a24c" dependencies = [ "wasm-bindgen", ] @@ -222,9 +222,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.90" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba4aede83fc3617411dc6993bc8c70919750c1c257c6ca6a502aed6e0e2394ae" +checksum = "9385f66bf6105b241aa65a61cb923ef20efc665cb9f9bb50ac2f0c4b7f378d41" [[package]] name = "linked-hash-map" @@ -318,6 +318,8 @@ dependencies = [ "html2md", "js-sys", "readability", + "serde", + "serde_json", "thiserror", "url", "wasm-bindgen", @@ -437,9 +439,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.24" +version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71" +checksum = "a152013215dca273577e18d2bf00fa862b89b24169fb78c4c95aeb07992c9cec" dependencies = [ "unicode-xid 0.2.1", ] @@ -459,7 +461,7 @@ version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7" dependencies = [ - "proc-macro2 1.0.24", + "proc-macro2 1.0.26", ] [[package]] @@ -673,19 +675,22 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.124" +version = "1.0.125" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd761ff957cb2a45fbb9ab3da6512de9de55872866160b23c25f1a841e99d29f" +checksum = "558dc50e1a5a5fa7112ca2ce4effcb321b0300c0d4ccf0776a9f60cd89031171" +dependencies = [ + "serde_derive", +] [[package]] name = "serde_derive" -version = "1.0.124" +version = "1.0.125" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1800f7693e94e186f5e25a28291ae1570da908aff7d97a095dec1e56ff99069b" +checksum = "b093b7a2bb58203b5da3056c05b4ec1fed827dcfdb37347a8841695263b3d06d" dependencies = [ - "proc-macro2 1.0.24", + "proc-macro2 1.0.26", "quote 1.0.9", - "syn 1.0.64", + "syn 1.0.69", ] [[package]] @@ -747,7 +752,7 @@ checksum = "f0f45ed1b65bf9a4bf2f7b7dc59212d1926e9eaf00fa998988e420fd124467c6" dependencies = [ "phf_generator 0.7.24", "phf_shared 0.7.24", - "proc-macro2 1.0.24", + "proc-macro2 1.0.26", "quote 1.0.9", "string_cache_shared", ] @@ -760,7 +765,7 @@ checksum = "f24c8e5e19d22a726626f1a5e16fe15b132dcf21d10177fa5a45ce7962996b97" dependencies = [ "phf_generator 0.8.0", "phf_shared 0.8.0", - "proc-macro2 1.0.24", + "proc-macro2 1.0.26", "quote 1.0.9", ] @@ -783,11 +788,11 @@ dependencies = [ [[package]] name = "syn" -version = "1.0.64" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fd9d1e9976102a03c542daa2eff1b43f9d72306342f3f8b3ed5fb8908195d6f" +checksum = "48fe99c6bd8b1cc636890bcc071842de909d902c81ac7dab53ba33c421ab8ffb" dependencies = [ - "proc-macro2 1.0.24", + "proc-macro2 1.0.26", "quote 1.0.9", "unicode-xid 0.2.1", ] @@ -818,9 +823,9 @@ version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7765189610d8241a44529806d6fd1f2e0a08734313a35d5b3a556f92b381f3c0" dependencies = [ - "proc-macro2 1.0.24", + "proc-macro2 1.0.26", "quote 1.0.9", - "syn 1.0.64", + "syn 1.0.69", ] [[package]] @@ -836,9 +841,9 @@ dependencies = [ [[package]] name = "tinyvec" -version = "1.1.1" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "317cca572a0e89c3ce0ca1f1bdc9369547fe318a683418e42ac8f59d14701023" +checksum = "5b5220f05bb7de7f3f53c7c065e1199b3172696fe2db9f9c4d8ad9b4ee74c342" dependencies = [ "tinyvec_macros", ] @@ -851,9 +856,9 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" [[package]] name = "unicode-bidi" -version = "0.3.4" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5" +checksum = "eeb8be209bb1c96b7c177c7420d26e04eccacb0eeae6b980e35fcb74678107e0" dependencies = [ "matches", ] @@ -919,9 +924,9 @@ checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" [[package]] name = "walkdir" -version = "2.3.1" +version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "777182bc735b6424e1a57516d35ed72cb8019d85c8c9bf536dccb3445c1a2f7d" +checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" dependencies = [ "same-file", "winapi", @@ -942,34 +947,36 @@ checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" [[package]] name = "wasm-bindgen" -version = "0.2.72" +version = "0.2.73" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fe8f61dba8e5d645a4d8132dc7a0a66861ed5e1045d2c0ed940fab33bac0fbe" +checksum = "83240549659d187488f91f33c0f8547cbfef0b2088bc470c116d1d260ef623d9" dependencies = [ "cfg-if", + "serde", + "serde_json", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" -version = "0.2.72" +version = "0.2.73" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "046ceba58ff062da072c7cb4ba5b22a37f00a302483f7e2a6cdc18fedbdc1fd3" +checksum = "ae70622411ca953215ca6d06d3ebeb1e915f0f6613e3b495122878d7ebec7dae" dependencies = [ "bumpalo", "lazy_static", "log", - "proc-macro2 1.0.24", + "proc-macro2 1.0.26", "quote 1.0.9", - "syn 1.0.64", + "syn 1.0.69", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.22" +version = "0.4.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73157efb9af26fb564bb59a009afd1c7c334a44db171d280690d0c3faaec3468" +checksum = "81b8b767af23de6ac18bf2168b690bed2902743ddf0fb39252e36f9e2bfc63ea" dependencies = [ "cfg-if", "js-sys", @@ -979,9 +986,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.72" +version = "0.2.73" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ef9aa01d36cda046f797c57959ff5f3c615c9cc63997a8d545831ec7976819b" +checksum = "3e734d91443f177bfdb41969de821e15c516931c3c3db3d318fa1b68975d0f6f" dependencies = [ "quote 1.0.9", "wasm-bindgen-macro-support", @@ -989,28 +996,28 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.72" +version = "0.2.73" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96eb45c1b2ee33545a813a92dbb53856418bf7eb54ab34f7f7ff1448a5b3735d" +checksum = "d53739ff08c8a68b0fdbcd54c372b8ab800b1449ab3c9d706503bc7dd1621b2c" dependencies = [ - "proc-macro2 1.0.24", + "proc-macro2 1.0.26", "quote 1.0.9", - "syn 1.0.64", + "syn 1.0.69", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.72" +version = "0.2.73" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7148f4696fb4960a346eaa60bbfb42a1ac4ebba21f750f75fc1375b098d5ffa" +checksum = "d9a543ae66aa233d14bb765ed9af4a33e81b8b58d1584cf1b47ff8cd0b9e4489" [[package]] name = "web-sys" -version = "0.3.49" +version = "0.3.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59fe19d70f5dacc03f6e46777213facae5ac3801575d56ca6cbd4c93dcd12310" +checksum = "a905d57e488fec8861446d3393670fb50d27a262344013181c2cdf9fff5481be" dependencies = [ "js-sys", "wasm-bindgen", diff --git a/Cargo.toml b/Cargo.toml index 7bda141..7259cd0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,7 @@ edition = "2018" crate-type = ["cdylib"] [dependencies] -wasm-bindgen = "0.2" +wasm-bindgen = { version = "^0.2", features = ["serde-serialize"] } wasm-bindgen-futures = "0.4.22" js-sys = "0.3.49" url = "1" @@ -16,6 +16,8 @@ html2md = "0.2.10" thiserror = "1.0.24" frontmatter = "^0.4" yaml-rust = "^0.4" +serde = { version = "^1.0", features = ["derive"] } +serde_json = "^1.0" [dependencies.readability] git = "https://github.com/trashhalo/readability.git" diff --git a/manifest.json b/manifest.json index 9ae5f01..2b39058 100644 --- a/manifest.json +++ b/manifest.json @@ -1,9 +1,9 @@ { "id": "extract-url", "name": "Extract url content", - "version": "0.5.0", + "version": "0.6.0", "description": "Extract url converting content into markdown", "author": "Stephen Solka", "authorUrl": "https://github.com/trashhalo", "isDesktopOnly": true -} \ No newline at end of file +} diff --git a/src/fetch.rs b/src/fetch.rs index 9cedf45..fff6a2e 100644 --- a/src/fetch.rs +++ b/src/fetch.rs @@ -11,4 +11,7 @@ extern "C" { #[wasm_bindgen(catch, method)] pub fn text(this: &Response) -> Result; + + #[wasm_bindgen(catch, method)] + pub fn json(this: &Response) -> Result; } diff --git a/src/lib.rs b/src/lib.rs index c1bae15..bf1f48b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,15 +1,14 @@ mod fetch; mod obsidian; -use html2md::parse_html; +mod transform; use js_sys::{Error, JsString, Promise}; -use readability::extractor::extract; use std::rc::Rc; use thiserror::Error; use url::Url; use wasm_bindgen::prelude::*; use wasm_bindgen::JsCast; use wasm_bindgen_futures::{future_to_promise, JsFuture}; -use yaml_rust::emitter::{YamlEmitter, EmitError}; +use yaml_rust::emitter::{EmitError, YamlEmitter}; use yaml_rust::scanner::ScanError; #[wasm_bindgen] @@ -80,26 +79,26 @@ pub fn onload(plugin: obsidian::Plugin) { #[derive(Error, Debug)] pub enum ExtractError { - #[error("url did not parse")] + #[error("url did not parse. {0}")] Parse(#[from] url::ParseError), - #[error("url not readable")] - Read(#[from] readability::error::Error), - #[error("url had not content")] NoContent, #[error("fetch error `{0}`")] Fetch(String), - #[error("select a url to extract or add link to your frontmatter")] + #[error("select a url to extract or add link to your frontmatter. {0}")] NoUrlFrontmatter(#[from] FrontmatterError), #[error("expected view to be MarkdownView but was not")] WrongView, - #[error("error serializing front matter")] + #[error("error serializing front matter. {0}")] FrontmatterWrite(#[from] EmitError), + + #[error("error transforming content. {0}")] + Transform(#[from] transform::TransformError), } impl std::convert::From for ExtractError { @@ -149,26 +148,14 @@ async fn convert_url_to_markdown( title_only: bool, url_str: String, ) -> Result { - let url = Url::parse(&url_str)?; + let ref url = Url::parse(&url_str)?; let resp_value = JsFuture::from(fetch::with_url(&url_str)).await?; let resp: fetch::Response = resp_value.dyn_into()?; let body = JsFuture::from(resp.text()?) .await? .as_string() .ok_or_else(|| ExtractError::NoContent)?; - let ref mut b = body.as_bytes(); - let readable = extract(b, &url)?; - - Ok(if title_only { - format!("[{}]({})", readable.title, url_str) - } else { - format!( - "# [{}]({})\n{}", - readable.title, - url_str, - parse_html(&readable.content) - ) - }) + Ok(transform::transform_url(url, title_only, body).await?) } #[derive(Error, Debug)] diff --git a/src/transform.rs b/src/transform.rs new file mode 100644 index 0000000..6365e70 --- /dev/null +++ b/src/transform.rs @@ -0,0 +1,51 @@ +mod oembed; +use html2md::parse_html; +use readability::extractor::extract; +use thiserror::Error; +use url::Url; + +#[derive(Error, Debug)] +pub enum TransformError { + #[error("url not readable. {0}")] + Read(#[from] readability::error::Error), + + #[error("error converting oembed data. {0}")] + Oembed(#[from] oembed::OembedError), +} + +pub async fn readable_content(body: String, url: &Url) -> Result { + let ref mut b = body.as_bytes(); + let readable = extract(b, url)?; + + Ok(format!( + "# [{}]({})\n{}", + readable.title, + url, + parse_html(&readable.content) + )) +} + +pub async fn readable_title(body: String, url: &Url) -> Result { + let ref mut b = body.as_bytes(); + let readable = extract(b, url)?; + + Ok(format!("[{}]({})", readable.title, url)) +} + +pub async fn transform_url( + url: &Url, + title_only: bool, + body: String, +) -> Result { + if title_only { + match oembed::oembed_title(body.clone(), url).await { + Ok(o) => Ok(o), + Err(_) => readable_title(body.clone(), url).await, + } + } else { + match oembed::oembed_content(body.clone(), url).await { + Ok(o) => Ok(o), + Err(_) => readable_content(body.clone(), url).await, + } + } +} diff --git a/src/transform/oembed.rs b/src/transform/oembed.rs new file mode 100644 index 0000000..f695c98 --- /dev/null +++ b/src/transform/oembed.rs @@ -0,0 +1,76 @@ +use crate::fetch; +use html2md::parse_html; +use serde::Deserialize; +use serde_json; +use thiserror::Error; +use url::Url; +use wasm_bindgen::{JsCast, JsValue}; +use wasm_bindgen_futures::JsFuture; + +#[derive(Error, Debug)] +pub enum OembedError { + #[error("Url missing oembed info")] + NoLink, + + #[error("Url missing oembed info")] + NoHtml, + + #[error("Error fetching url for oembed")] + Fetch(String), + + #[error("Error fetching url for oembed. {0}")] + Url(#[from] url::ParseError), + + #[error("Error serializing oembed data. {0}")] + Serde(#[from] serde_json::error::Error), +} + +impl std::convert::From for OembedError { + fn from(err: JsValue) -> Self { + if let Some(err_val) = err.as_string() { + OembedError::Fetch(format!("fetch error {}", err_val)) + } else { + OembedError::Fetch(String::from("fetch error")) + } + } +} + +#[derive(Debug, Deserialize)] +pub struct OembedData { + pub html: Option, + pub title: String, +} + +pub async fn oembed_content(_body: String, url: &Url) -> Result { + let mut href = Url::parse("https://noembed.com/embed")?; + href.query_pairs_mut().append_pair("url", &url.to_string()); + let resp_value = JsFuture::from(fetch::with_url(&href.to_string())).await?; + let resp: fetch::Response = resp_value.dyn_into()?; + let body = JsFuture::from(resp.json()?).await?; + let data: OembedData = body.into_serde()?; + match data.html { + None => Err(OembedError::NoHtml), + Some(html) => { + if html.contains("iframe") { + Ok(format!("# [{}]({})\n{}", data.title, url, html)) + } else { + Ok(format!( + "# [{}]({})\n{}", + data.title, + url, + parse_html(&html) + )) + } + } + } +} + +pub async fn oembed_title(_body: String, url: &Url) -> Result { + let mut href = Url::parse("https://noembed.com/embed")?; + href.query_pairs_mut().append_pair("url", &url.to_string()); + let resp_value = JsFuture::from(fetch::with_url(&href.to_string())).await?; + let resp: fetch::Response = resp_value.dyn_into()?; + let body = JsFuture::from(resp.json()?).await?; + let data: OembedData = body.into_serde()?; + Ok(format!("[{}]({})", data.title, url)) +}