Skip to content

Commit

Permalink
Add support for parsing any abitrary string
Browse files Browse the repository at this point in the history
Instead of having to specify whether you are parsing a domain,
a host or a URL, now you can just call `List::parse_str` and it
will extract a host from that string if any.
  • Loading branch information
rushmorem committed Jan 3, 2017
1 parent 15412ac commit e8ff171
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 4 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "publicsuffix"
description = "A robust and reliable library for parsing domain names"
version = "1.1.0"
version = "1.2.0"
license = "MIT/Apache-2.0"
repository = "https://github.com/rushmorem/publicsuffix"
documentation = "https://docs.rs/publicsuffix"
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Add this crate to your `Cargo.toml`:

```toml
[dependencies.publicsuffix]
version = "1.1"
version = "1.2"

# This crate exposes the methods `List::fetch` and `List::from_url` as a
# feature named "remote_list". This feature is on by default. If you have
Expand Down
29 changes: 27 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,12 @@ impl<'a> IntoUrl for &'a str {
}
}

impl<'a> IntoUrl for &'a String {
fn into_url(self) -> Result<Url> {
Ok(Url::parse(self)?)
}
}

impl IntoUrl for String {
fn into_url(self) -> Result<Url> {
Ok(Url::parse(&self)?)
Expand Down Expand Up @@ -327,12 +333,31 @@ impl List {
}

/// Extracts Host from a URL
pub fn parse_url(&self, url: &str) -> Result<Host> {
match Url::parse(url)?.host_str() {
pub fn parse_url<U: IntoUrl>(&self, url: U) -> Result<Host> {
match url.into_url()?.host_str() {
Some(host) => self.parse_host(host),
None => Err(ErrorKind::NoHost.into()),
}
}

/// Parses any arbitrary string
///
/// Effectively this means that the string is either a URL or a host.
pub fn parse_str(&self, string: &str) -> Result<Host> {
if string.contains("//") {
if string.starts_with("//") {
// If a string starts with `//` it might be a protocol
// relative URL. Since we really do not care about the
// protocol anyway, let's just assume it's `https` to
// give it a fair chance with `Url::parse`.
self.parse_url(&format!("https:{}", string))
} else {
self.parse_url(string)
}
} else {
self.parse_host(string)
}
}
}

impl Host {
Expand Down
15 changes: 15 additions & 0 deletions src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -220,5 +220,20 @@ fn list_behaviour() {
assert!(list.parse_url("https://127.38.53.247:8080/list/").unwrap().is_ip());
pass!()
});

ctx.it("can be parsed from a standard URL using `parse_str`", || {
assert!(list.parse_str("https://127.38.53.247:8080/list/").unwrap().is_ip());
pass!()
});

ctx.it("can be parsed from a protocol-relative URL using `parse_str`", || {
assert!(list.parse_str("//127.38.53.247:8080/list/").unwrap().is_ip());
pass!()
});

ctx.it("can be parsed from a non-URL using `parse_str`", || {
assert!(list.parse_str("example.com").unwrap().is_domain());
pass!()
});
});
}

0 comments on commit e8ff171

Please sign in to comment.