Redesigned the URL parser to allow for custom path components

This commit is contained in:
Miguel Grinberg
2025-03-02 00:47:54 +00:00
parent 48ce31e699
commit c92b5ae282
3 changed files with 97 additions and 68 deletions

View File

@@ -329,15 +329,51 @@ URL::
async def get_test(request, path):
return 'Test: ' + path
For the most control, the ``re`` type allows the application to provide a
custom regular expression for the dynamic component. The next example defines
a route that only matches usernames that begin with an upper or lower case
letter, followed by a sequence of letters or numbers::
The ``re`` type allows the application to provide a custom regular expression
for the dynamic component. The next example defines a route that only matches
usernames that begin with an upper or lower case letter, followed by a sequence
of letters or numbers::
@app.get('/users/<re:[a-zA-Z][a-zA-Z0-9]*:username>')
async def get_user(request, username):
return 'User: ' + username
The ``re`` type returns the URL component as a string, which sometimes may not
be the most convenient. In such cases, the application can register a custom
URL component type and provide a parser function. In the following example,
a ``hex`` custom type is registered to automatically convert the arguments to
numbers::
from microdot import URLPattern
URLPattern.register_type('hex', parser=lambda value: int(value, 16))
@app.get('/users/<hex:user_id>')
async def get_user(request, user_id):
user = get_user_by_id(user_id)
# ...
In addition to the parser, the custom URL component can include a valid pattern
as a regular expression. When a pattern is provided, the URL component will
only match if the regular expression matches the value passed in the URL. The
``hex`` example above can be expanded with a pattern as follows::
URLPattern.register_type('hex', pattern='[0-9a-fA-F]+',
parser=lambda value: int(value, 16))
In cases where a pattern isn't provided, or when the pattern is unable to
filter all invalid values, the parser function can return ``None`` to indicate
a failed match. The next example shows how the ``hex`` type can be expanded to
do that::
def hex_parser(value):
try:
return int(value, 16)
except ValueError:
return None
URLPattern.register_type('hex', pattern='[0-9a-fA-F]+', parser=hex_parser)
.. note::
Dynamic path components are passed to route functions as keyword arguments,
so the names of the function arguments must match the names declared in the

View File

@@ -8,6 +8,7 @@ servers for MicroPython and standard Python.
import asyncio
import io
import json
import re
import time
try:
@@ -805,12 +806,20 @@ class Response:
class URLPattern():
segment_patterns = {
'string': '/([^/]+)',
'int': '/(-?\\d+)',
'path': '/(.+)',
}
segment_parsers = {
'int': lambda value: int(value),
}
def __init__(self, url_pattern):
self.url_pattern = url_pattern
self.segments = []
self.regex = None
pattern = ''
use_regex = False
for segment in url_pattern.lstrip('/').split('/'):
if segment and segment[0] == '<':
if segment[-1] != '>':
@@ -822,82 +831,43 @@ class URLPattern():
type_ = 'string'
name = segment
parser = None
if type_ == 'string':
parser = self._string_segment
pattern += '/([^/]+)'
elif type_ == 'int':
parser = self._int_segment
pattern += '/(-?\\d+)'
elif type_ == 'path':
use_regex = True
pattern += '/(.+)'
elif type_.startswith('re:'):
use_regex = True
if type_.startswith('re:'):
pattern += '/({pattern})'.format(pattern=type_[3:])
else:
raise ValueError('invalid URL segment type')
if type_ not in self.segment_patterns:
raise ValueError('invalid URL segment type')
pattern += self.segment_patterns[type_]
parser = self.segment_parsers.get(type_)
self.segments.append({'parser': parser, 'name': name,
'type': type_})
else:
pattern += '/' + segment
self.segments.append({'parser': self._static_segment(segment)})
if use_regex:
import re
self.regex = re.compile('^' + pattern + '$')
self.segments.append({'parser': None})
self.regex = re.compile('^' + pattern + '$')
@classmethod
def register_type(cls, type_name, pattern='[^/]+', parser=None):
cls.segment_patterns[type_name] = '/({})'.format(pattern)
cls.segment_parsers[type_name] = parser
def match(self, path):
args = {}
if self.regex:
g = self.regex.match(path)
if not g:
return
i = 1
for segment in self.segments:
if 'name' not in segment:
continue
value = g.group(i)
if segment['type'] == 'int':
value = int(value)
args[segment['name']] = value
i += 1
else:
if len(path) == 0 or path[0] != '/':
return
path = path[1:]
args = {}
for segment in self.segments:
if path is None:
return
arg, path = segment['parser'](path)
g = self.regex.match(path)
if not g:
return
i = 1
for segment in self.segments:
if 'name' not in segment:
continue
arg = g.group(i)
if segment['parser']:
arg = self.segment_parsers[segment['type']](arg)
if arg is None:
return
if 'name' in segment:
args[segment['name']] = arg
if path is not None:
return
args[segment['name']] = arg
i += 1
return args
def _static_segment(self, segment):
def _static(value):
s = value.split('/', 1)
if s[0] == segment:
return '', s[1] if len(s) > 1 else None
return None, None
return _static
def _string_segment(self, value):
s = value.split('/', 1)
if len(s[0]) == 0:
return None, None
return s[0], s[1] if len(s) > 1 else None
def _int_segment(self, value):
s = value.split('/', 1)
try:
return int(s[0]), s[1] if len(s) > 1 else None
except ValueError:
return None, None
def __repr__(self): # pragma: no cover
return 'URLPattern: {}'.format(self.url_pattern)

View File

@@ -121,3 +121,26 @@ class TestURLPattern(unittest.TestCase):
def test_invalid_url_patterns(self):
self.assertRaises(ValueError, URLPattern, '/users/<foo/bar')
self.assertRaises(ValueError, URLPattern, '/users/<badtype:id>')
def test_custom_url_pattern(self):
URLPattern.register_type('hex', '[0-9a-f]+')
p = URLPattern('/users/<hex:id>')
self.assertEqual(p.match('/users/a1'), {'id': 'a1'})
self.assertIsNone(p.match('/users/ab12z'))
URLPattern.register_type('hex', '[0-9a-f]+',
parser=lambda value: int(value, 16))
p = URLPattern('/users/<hex:id>')
self.assertEqual(p.match('/users/a1'), {'id': 161})
self.assertIsNone(p.match('/users/ab12z'))
def hex_parser(value):
try:
return int(value, 16)
except ValueError:
return None
URLPattern.register_type('hex', parser=hex_parser)
p = URLPattern('/users/<hex:id>')
self.assertEqual(p.match('/users/a1'), {'id': 161})
self.assertIsNone(p.match('/users/ab12z'))