Skip to content

Commit

Permalink
Refactor regex
Browse files Browse the repository at this point in the history
  • Loading branch information
rushmorem committed Jan 5, 2017
1 parent 1654218 commit 8aab6a8
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 7 deletions.
51 changes: 45 additions & 6 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ use std::fmt;

pub use errors::{Result, Error};

use regex::Regex;
use regex::RegexSet;
use errors::ErrorKind;
#[cfg(feature = "remote_list")]
use native_tls::TlsConnector;
Expand Down Expand Up @@ -141,7 +141,45 @@ pub enum Host {
}

lazy_static! {
static ref LABEL: Regex = Regex::new(r"^([[:alnum:]]+|[[:alnum:]]+[[:alnum:]-]*[[:alnum:]]+)$").unwrap();
// Regex for matching domain name labels
static ref LABEL: RegexSet = {
let exprs = vec![
// can be any combination of alphanumeric characters
r"^[[:alnum:]]+$",
// or it can start with an alphanumeric character
// then optionally be followed by any combination of
// alphanumeric characters and dashes before finally
// ending with an alphanumeric character
r"^[[:alnum:]]+[[:alnum:]-]*[[:alnum:]]+$",
];
RegexSet::new(exprs).unwrap()
};

// Regex for matching the local-part of an
// email address
static ref LOCAL: RegexSet = {
// these characters can be anywhere in the expresion
let global = r#"[[:alnum:]!#$%&'*+/=?^_`{|}~-]"#;
// non-ascii characters (an also be unquoted)
let non_ascii = r#"[^\x00-\x7F]"#;
// the pattern to match
let quoted = r#"["(),\\:;<>@\[\]. ]"#;
// combined regex
let combined = format!(r#"({}*{}*)"#, global, non_ascii);

let exprs = vec![
// can be any combination of allowed characters
format!(r#"^{}+$"#, combined),
// can be any combination of allowed charaters
// separated by a . in between
format!(r#"^({0}+[.]?{0}+)+$"#, combined),
// can be a quoted string with allowed plus
// additional characters
format!(r#"^"({}*{}*)*"$"#, combined, quoted),
];

RegexSet::new(exprs).unwrap()
};
}

/// Converts a type into a Url object
Expand Down Expand Up @@ -379,6 +417,8 @@ impl List {
// http://girders.org/blog/2013/01/31/dont-rfc-validate-email-addresses/
// https://html.spec.whatwg.org/multipage/forms.html#valid-e-mail-address
// https://hackernoon.com/the-100-correct-way-to-validate-email-addresses-7c4818f24643#.pgcir4z3e
// http://haacked.com/archive/2007/08/21/i-knew-how-to-validate-an-email-address-until-i.aspx/
// https://tools.ietf.org/html/rfc6530#section-10.1
pub fn parse_email(&self, address: &str) -> Result<Host> {
let mut parts = address.rsplitn(2, "@");
let host = match parts.next() {
Expand All @@ -389,10 +429,10 @@ impl List {
Some(local) => local,
None => { return Err(ErrorKind::InvalidEmail.into()); }
};
if local.starts_with(".")
|| local.ends_with(".")
|| local.chars().count() > 64
if local.chars().count() > 64
|| address.chars().count() > 254
|| (!local.starts_with('"') && local.contains(".."))
|| !LOCAL.is_match(local)
{
return Err(ErrorKind::InvalidEmail.into());
}
Expand Down Expand Up @@ -596,7 +636,6 @@ impl Domain {
return Err(ErrorKind::InvalidDomain(domain.into()).into());
}
let input = domain;
//let domain = input.trim().trim_right_matches('.');
let (domain, res) = domain_to_unicode(input);
if let Err(errors) = res {
return Err(ErrorKind::Uts46(errors).into());
Expand Down
42 changes: 41 additions & 1 deletion src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,14 @@ fn list_behaviour() {
"#!$%&'*+-/=?^_`{}|[email protected]",
"[email protected]",
"user@[fd79:cdcb:38cc:9dd:f686:e06d:32f3:c123]",
r#""Abc\@def"@example.com"#,
r#""Fred Bloggs"@example.com"#,
r#""Joe\\Blow"@example.com"#,
r#""Abc@def"@example.com"#,
r#"customer/[email protected]"#,
"[email protected]",
"!def!xyz%[email protected]",
"[email protected]",
];
for email in emails {
println!("{} should be valid", email);
Expand All @@ -252,6 +260,27 @@ fn list_behaviour() {
pass!()
});

ctx.it("should reject invalid email addresses", || {
let emails = vec![
"Abc.example.com",
"A@b@[email protected]",
r#"a"b(c)d,e:f;g<h>i[j\k][email protected]"#,
r#""just"not"[email protected]"#,
r#"this is"not\[email protected]"#,
r#"this\ still\"not\\[email protected]"#,
"1234567890123456789012345678901234567890123456789012345678901234+x@example.com",
"[email protected]",
"[email protected]",
" [email protected]",
"[email protected] ",
];
for email in emails {
println!("{} should not be valid", email);
assert!(list.parse_email(email).is_err());
}
pass!()
});

ctx.it("should allow parsing emails as str", || {
assert!(list.parse_str("[email protected]").unwrap().is_domain());
pass!()
Expand All @@ -263,7 +292,18 @@ fn list_behaviour() {
});

ctx.it("should allow parsing IDN email addresses", || {
assert!(list.parse_email("用户@例子.广告").is_ok());
let emails = vec![
r#"Pelé@example.com"#,
r#"δοκιμή@παράδειγμα.δοκιμή"#,
r#"我買@屋企.香港"#,
r#"甲斐@黒川.日本"#,
r#"чебурашка@ящик-с-апельсинами.рф"#,
r#"संपर्क@डाटामेल.भारत"#,
];
for email in emails {
println!("{} should be valid", email);
assert!(list.parse_email(email).is_ok());
}
pass!()
});
});
Expand Down

0 comments on commit 8aab6a8

Please sign in to comment.