Do not use regexes for parsing simple URLs (Fixes #207)

This commit is contained in:
Miguel Grinberg
2024-02-18 15:05:21 +00:00
parent a3363c7b8c
commit 38262c56d3
2 changed files with 74 additions and 26 deletions

View File

@@ -8,7 +8,6 @@ servers for MicroPython and standard Python.
import asyncio import asyncio
import io import io
import json import json
import re
import time import time
try: try:
@@ -798,8 +797,9 @@ class Response:
class URLPattern(): class URLPattern():
def __init__(self, url_pattern): def __init__(self, url_pattern):
self.url_pattern = url_pattern self.url_pattern = url_pattern
self.pattern = '' self.segments = []
self.args = [] self.regex = None
pattern = ''
use_regex = False use_regex = False
for segment in url_pattern.lstrip('/').split('/'): for segment in url_pattern.lstrip('/').split('/'):
if segment and segment[0] == '<': if segment and segment[0] == '<':
@@ -811,42 +811,83 @@ class URLPattern():
else: else:
type_ = 'string' type_ = 'string'
name = segment name = segment
parser = None
if type_ == 'string': if type_ == 'string':
pattern = '[^/]+' parser = self._string_segment
pattern += '/([^/]+)'
elif type_ == 'int': elif type_ == 'int':
pattern = '-?\\d+' parser = self._int_segment
pattern += '/(-?\\d+)'
elif type_ == 'path': elif type_ == 'path':
pattern = '.+' use_regex = True
pattern += '/(.+)'
elif type_.startswith('re:'): elif type_.startswith('re:'):
pattern = type_[3:] use_regex = True
pattern += '/({pattern})'.format(pattern=type_[3:])
else: else:
raise ValueError('invalid URL segment type') raise ValueError('invalid URL segment type')
use_regex = True self.segments.append({'parser': parser, 'name': name,
self.pattern += '/({pattern})'.format(pattern=pattern) 'type': type_})
self.args.append({'type': type_, 'name': name})
else: else:
self.pattern += '/{segment}'.format(segment=segment) pattern += '/' + segment
self.segments.append({'parser': self._static_segment(segment)})
if use_regex: if use_regex:
self.pattern = re.compile('^' + self.pattern + '$') import re
self.regex = re.compile('^' + pattern + '$')
def match(self, path): def match(self, path):
if isinstance(self.pattern, str): args = {}
if path != self.pattern: if self.regex:
return g = self.regex.match(path)
return {}
g = self.pattern.match(path)
if not g: if not g:
return return
args = {}
i = 1 i = 1
for arg in self.args: for segment in self.segments:
if 'name' not in segment:
continue
value = g.group(i) value = g.group(i)
if arg['type'] == 'int': if segment['type'] == 'int':
value = int(value) value = int(value)
args[arg['name']] = value args[segment['name']] = value
i += 1 i += 1
else:
if len(path) == 0 or path[0] != '/':
return
path = path[1:]
args = {}
for segment in self.segments:
if path is None:
return
arg, path = segment['parser'](path)
if arg is None:
return
if 'name' in segment:
if not arg:
return
args[segment['name']] = arg
if path is not None:
return
return args return args
def _static_segment(self, segment):
def _static(value):
s = value.split('/', 1)
if s[0] == segment:
return '', s[1] if len(s) > 1 else None
return None, None
return _static
def _string_segment(self, value):
s = value.split('/', 1)
return s[0], s[1] if len(s) > 1 else None
def _int_segment(self, value):
s = value.split('/', 1)
try:
return int(s[0]), s[1] if len(s) > 1 else None
except ValueError:
return None, None
class HTTPException(Exception): class HTTPException(Exception):
def __init__(self, status_code, reason=None): def __init__(self, status_code, reason=None):

View File

@@ -7,6 +7,8 @@ class TestURLPattern(unittest.TestCase):
p = URLPattern('/') p = URLPattern('/')
self.assertEqual(p.match('/'), {}) self.assertEqual(p.match('/'), {})
self.assertIsNone(p.match('/foo')) self.assertIsNone(p.match('/foo'))
self.assertIsNone(p.match('foo'))
self.assertIsNone(p.match(''))
p = URLPattern('/foo/bar') p = URLPattern('/foo/bar')
self.assertEqual(p.match('/foo/bar'), {}) self.assertEqual(p.match('/foo/bar'), {})
@@ -23,6 +25,8 @@ class TestURLPattern(unittest.TestCase):
p = URLPattern('/<arg>') p = URLPattern('/<arg>')
self.assertEqual(p.match('/foo'), {'arg': 'foo'}) self.assertEqual(p.match('/foo'), {'arg': 'foo'})
self.assertIsNone(p.match('/')) self.assertIsNone(p.match('/'))
self.assertIsNone(p.match(''))
self.assertIsNone(p.match('foo/'))
self.assertIsNone(p.match('/foo/')) self.assertIsNone(p.match('/foo/'))
p = URLPattern('/<arg>/') p = URLPattern('/<arg>/')
@@ -82,7 +86,10 @@ class TestURLPattern(unittest.TestCase):
p = URLPattern('/users/<re:[a-c]+:id>') p = URLPattern('/users/<re:[a-c]+:id>')
self.assertEqual(p.match('/users/ab'), {'id': 'ab'}) self.assertEqual(p.match('/users/ab'), {'id': 'ab'})
self.assertEqual(p.match('/users/bca'), {'id': 'bca'}) self.assertEqual(p.match('/users/bca'), {'id': 'bca'})
self.assertIsNone(p.match('/users'))
self.assertIsNone(p.match('/users/'))
self.assertIsNone(p.match('/users/abcd')) self.assertIsNone(p.match('/users/abcd'))
self.assertIsNone(p.match('/users/abcdx'))
def test_many_arguments(self): def test_many_arguments(self):
p = URLPattern('/foo/<path:path>/<int:id>/bar/<name>') p = URLPattern('/foo/<path:path>/<int:id>/bar/<name>')