Skip to content

Commit

Permalink
support detecting which relaxed matches are lone emails
Browse files Browse the repository at this point in the history
This doesn't require new API nor does it require us to do a second
regular expression match on each string, so I think it's the right way.
We can also add more subexpression names later which might be useful,
such as "web URL without scheme" or "URL scheme".

Updates #53.
  • Loading branch information
mvdan committed Jan 1, 2024
1 parent 7f5546e commit 09d66fb
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 2 deletions.
15 changes: 15 additions & 0 deletions example_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,18 @@ func ExampleStrictMatchingScheme() {
// Output:
// [https://foo.com/dl]
}

func Example_filterEmails() {
s := "Email [email protected] about any issues with foo.com or https://foo.com/dl"
rx := xurls.Relaxed()
idxEmail := rx.SubexpIndex("relaxedEmail")
for _, match := range rx.FindAllStringSubmatch(s, -1) {
if match[idxEmail] != "" {
continue // skip lone email addresses
}
fmt.Println(match[0])
}
// Output:
// foo.com
// https://foo.com/dl
}
7 changes: 5 additions & 2 deletions xurls.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ func relaxedExp() string {

hostName := `(?:` + domain + `|\[` + ipv6Addr + `\]|\b` + ipv4Addr + `\b)`
webURL := hostName + port + `(?:/` + pathCont + `|/)?`
email := `[a-zA-Z0-9._%\-+]+@` + domain
email := `(?P<relaxedEmail>[a-zA-Z0-9._%\-+]+@` + domain + `)`
return strictExp() + `|` + webURL + `|` + email + `|` + ipv6AddrMinusEmpty
}

Expand All @@ -179,7 +179,10 @@ func Strict() *regexp.Regexp {
}

// Relaxed produces a regexp that matches any URL matched by Strict, plus any
// URL with no scheme or email address.
// URL or email address with no scheme.
//
// Email addresses without a scheme match the `relaxedEmail` subexpression,
// which can be used to filter them as needed.
func Relaxed() *regexp.Regexp {
relaxedInit.Do(func() {
relaxedRe = regexp.MustCompile(relaxedExp())
Expand Down

0 comments on commit 09d66fb

Please sign in to comment.