Skip to content

Commit

Permalink
Do not use regexes for parsing simple URLs (Fixes #207)
Browse files Browse the repository at this point in the history
  • Loading branch information
miguelgrinberg committed Feb 18, 2024
1 parent a3363c7 commit 38262c5
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 26 deletions.
93 changes: 67 additions & 26 deletions src/microdot/microdot.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import asyncio
import io
import json
import re
import time

try:
Expand Down Expand Up @@ -798,8 +797,9 @@ def send_file(cls, filename, status_code=200, content_type=None,
class URLPattern():
def __init__(self, url_pattern):
self.url_pattern = url_pattern
self.pattern = ''
self.args = []
self.segments = []
self.regex = None
pattern = ''
use_regex = False
for segment in url_pattern.lstrip('/').split('/'):
if segment and segment[0] == '<':
Expand All @@ -811,42 +811,83 @@ def __init__(self, url_pattern):
else:
type_ = 'string'
name = segment
parser = None
if type_ == 'string':
pattern = '[^/]+'
parser = self._string_segment
pattern += '/([^/]+)'
elif type_ == 'int':
pattern = '-?\\d+'
parser = self._int_segment
pattern += '/(-?\\d+)'
elif type_ == 'path':
pattern = '.+'
use_regex = True
pattern += '/(.+)'
elif type_.startswith('re:'):
pattern = type_[3:]
use_regex = True
pattern += '/({pattern})'.format(pattern=type_[3:])
else:
raise ValueError('invalid URL segment type')
use_regex = True
self.pattern += '/({pattern})'.format(pattern=pattern)
self.args.append({'type': type_, 'name': name})
self.segments.append({'parser': parser, 'name': name,
'type': type_})
else:
self.pattern += '/{segment}'.format(segment=segment)
pattern += '/' + segment
self.segments.append({'parser': self._static_segment(segment)})
if use_regex:
self.pattern = re.compile('^' + self.pattern + '$')
import re
self.regex = re.compile('^' + pattern + '$')

def match(self, path):
if isinstance(self.pattern, str):
if path != self.pattern:
return
return {}
g = self.pattern.match(path)
if not g:
return
args = {}
i = 1
for arg in self.args:
value = g.group(i)
if arg['type'] == 'int':
value = int(value)
args[arg['name']] = value
i += 1
if self.regex:
g = self.regex.match(path)
if not g:
return
i = 1
for segment in self.segments:
if 'name' not in segment:
continue
value = g.group(i)
if segment['type'] == 'int':
value = int(value)
args[segment['name']] = value
i += 1
else:
if len(path) == 0 or path[0] != '/':
return
path = path[1:]
args = {}
for segment in self.segments:
if path is None:
return
arg, path = segment['parser'](path)
if arg is None:
return
if 'name' in segment:
if not arg:
return
args[segment['name']] = arg
if path is not None:
return
return args

def _static_segment(self, segment):
def _static(value):
s = value.split('/', 1)
if s[0] == segment:
return '', s[1] if len(s) > 1 else None
return None, None
return _static

def _string_segment(self, value):
s = value.split('/', 1)
return s[0], s[1] if len(s) > 1 else None

def _int_segment(self, value):
s = value.split('/', 1)
try:
return int(s[0]), s[1] if len(s) > 1 else None
except ValueError:
return None, None


class HTTPException(Exception):
def __init__(self, status_code, reason=None):
Expand Down
7 changes: 7 additions & 0 deletions tests/test_url_pattern.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ def test_static(self):
p = URLPattern('/')
self.assertEqual(p.match('/'), {})
self.assertIsNone(p.match('/foo'))
self.assertIsNone(p.match('foo'))
self.assertIsNone(p.match(''))

p = URLPattern('/foo/bar')
self.assertEqual(p.match('/foo/bar'), {})
Expand All @@ -23,6 +25,8 @@ def test_string_argument(self):
p = URLPattern('/<arg>')
self.assertEqual(p.match('/foo'), {'arg': 'foo'})
self.assertIsNone(p.match('/'))
self.assertIsNone(p.match(''))
self.assertIsNone(p.match('foo/'))
self.assertIsNone(p.match('/foo/'))

p = URLPattern('/<arg>/')
Expand Down Expand Up @@ -82,7 +86,10 @@ def test_regex_argument(self):
p = URLPattern('/users/<re:[a-c]+:id>')
self.assertEqual(p.match('/users/ab'), {'id': 'ab'})
self.assertEqual(p.match('/users/bca'), {'id': 'bca'})
self.assertIsNone(p.match('/users'))
self.assertIsNone(p.match('/users/'))
self.assertIsNone(p.match('/users/abcd'))
self.assertIsNone(p.match('/users/abcdx'))

def test_many_arguments(self):
p = URLPattern('/foo/<path:path>/<int:id>/bar/<name>')
Expand Down

0 comments on commit 38262c5

Please sign in to comment.