Skip to content

Commit

Permalink
Fix catastrophic backtracking (#21)
Browse files Browse the repository at this point in the history
* Prepare comment indent

* Fix catastrophic backtracking using atomic-grouping

* Add edgecases to test which previously failed
  • Loading branch information
bartgloudemans authored and thewilkybarkid committed Feb 18, 2017
1 parent c6dc4e2 commit 1fe74ab
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 15 deletions.
33 changes: 18 additions & 15 deletions src/Misd/Linkify/Linkify.php
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,9 @@ protected function linkify($text, $urls = true, $emails = true, array $options =
/**
* Add HTML links to URLs in plain text.
*
* @see http://www.regular-expressions.info/catastrophic.html For more info on atomic-grouping,
* used in this regex to prevent Catastrophic Backtracking.
*
* @param string $text Text to linkify.
* @param array $options Options, 'attr' key being the attributes to add to the links, with a preceding space.
*
Expand All @@ -139,23 +142,23 @@ protected function linkifyUrls($text, $options = array('attr' => ''))
{
$pattern = '~(?xi)
(?:
((ht|f)tps?://) # scheme://
| # or
www\d{0,3}\. # "www.", "www1.", "www2." ... "www999."
| # or
www\- # "www-"
| # or
[a-z0-9.\-]+\.[a-z]{2,4}(?=/) # looks like domain name followed by a slash
((ht|f)tps?://) # scheme://
| # or
www\d{0,3}\. # "www.", "www1.", "www2." ... "www999."
| # or
www\- # "www-"
| # or
[a-z0-9.\-]+\.[a-z]{2,4}(?=/) # looks like domain name followed by a slash
)
(?: # Zero or more:
[^\s()<>]+ # Run of non-space, non-()<>
| # or
\(([^\s()<>]+|(\([^\s()<>]+\)))*\) # balanced parens, up to 2 levels
(?: # Zero or more:
[^\s()<>]+ # Run of non-space, non-()<>
| # or
\((?>[^\s()<>]+|(\([^\s()<>]+\)))*\) # balanced parens, up to 2 levels
)*
(?: # End with:
\(([^\s()<>]+|(\([^\s()<>]+\)))*\) # balanced parens, up to 2 levels
| # or
[^\s`!\-()\[\]{};:\'".,<>?«»“”‘’] # not a space or one of these punct chars
(?: # End with:
\((?>[^\s()<>]+|(\([^\s()<>]+\)))*\) # balanced parens, up to 2 levels
| # or
[^\s`!\-()\[\]{};:\'".,<>?«»“”‘’] # not a space or one of these punct chars
)
~';

Expand Down
12 changes: 12 additions & 0 deletions tests/data/url.json
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,18 @@
]
},
"expected": "<a href=\"http://www.example.com\" class=\"foo bar\" rel=\"nofollow\">www.example.com</a>"
},
{
"test": "https://www.example.com/a_(aaaaaaaaaaaaaaaa)",
"expected": "<a href=\"https://www.example.com/a_(aaaaaaaaaaaaaaaa)\">https://www.example.com/a_(aaaaaaaaaaaaaaaa)</a>"
},
{
"test": "https://www.example.com/a_(aaaaaaaaaaaaaaaaa)",
"expected": "<a href=\"https://www.example.com/a_(aaaaaaaaaaaaaaaaa)\">https://www.example.com/a_(aaaaaaaaaaaaaaaaa)</a>"
},
{
"test": "https://www.example.com/a_(aaaaaaaaaaaaaaaaa)a",
"expected": "<a href=\"https://www.example.com/a_(aaaaaaaaaaaaaaaaa)a\">https://www.example.com/a_(aaaaaaaaaaaaaaaaa)a</a>"
}
]
}

0 comments on commit 1fe74ab

Please sign in to comment.