From c92b5ae28222af5a1094f5d2f70a45d4d17653d5 Mon Sep 17 00:00:00 2001 From: Miguel Grinberg Date: Sun, 2 Mar 2025 00:47:54 +0000 Subject: [PATCH] Redesigned the URL parser to allow for custom path components --- docs/intro.rst | 44 ++++++++++++++++-- src/microdot/microdot.py | 98 ++++++++++++++------------------------- tests/test_url_pattern.py | 23 +++++++++ 3 files changed, 97 insertions(+), 68 deletions(-) diff --git a/docs/intro.rst b/docs/intro.rst index f84122c..b381d93 100644 --- a/docs/intro.rst +++ b/docs/intro.rst @@ -329,15 +329,51 @@ URL:: async def get_test(request, path): return 'Test: ' + path -For the most control, the ``re`` type allows the application to provide a -custom regular expression for the dynamic component. The next example defines -a route that only matches usernames that begin with an upper or lower case -letter, followed by a sequence of letters or numbers:: +The ``re`` type allows the application to provide a custom regular expression +for the dynamic component. The next example defines a route that only matches +usernames that begin with an upper or lower case letter, followed by a sequence +of letters or numbers:: @app.get('/users/') async def get_user(request, username): return 'User: ' + username +The ``re`` type returns the URL component as a string, which sometimes may not +be the most convenient. In such cases, the application can register a custom +URL component type and provide a parser function. In the following example, +a ``hex`` custom type is registered to automatically convert the arguments to +numbers:: + + from microdot import URLPattern + + URLPattern.register_type('hex', parser=lambda value: int(value, 16)) + + @app.get('/users/') + async def get_user(request, user_id): + user = get_user_by_id(user_id) + # ... + +In addition to the parser, the custom URL component can include a valid pattern +as a regular expression. When a pattern is provided, the URL component will +only match if the regular expression matches the value passed in the URL. The +``hex`` example above can be expanded with a pattern as follows:: + + URLPattern.register_type('hex', pattern='[0-9a-fA-F]+', + parser=lambda value: int(value, 16)) + +In cases where a pattern isn't provided, or when the pattern is unable to +filter all invalid values, the parser function can return ``None`` to indicate +a failed match. The next example shows how the ``hex`` type can be expanded to +do that:: + + def hex_parser(value): + try: + return int(value, 16) + except ValueError: + return None + + URLPattern.register_type('hex', pattern='[0-9a-fA-F]+', parser=hex_parser) + .. note:: Dynamic path components are passed to route functions as keyword arguments, so the names of the function arguments must match the names declared in the diff --git a/src/microdot/microdot.py b/src/microdot/microdot.py index 061e12b..e8bdcaa 100644 --- a/src/microdot/microdot.py +++ b/src/microdot/microdot.py @@ -8,6 +8,7 @@ servers for MicroPython and standard Python. import asyncio import io import json +import re import time try: @@ -805,12 +806,20 @@ class Response: class URLPattern(): + segment_patterns = { + 'string': '/([^/]+)', + 'int': '/(-?\\d+)', + 'path': '/(.+)', + } + segment_parsers = { + 'int': lambda value: int(value), + } + def __init__(self, url_pattern): self.url_pattern = url_pattern self.segments = [] self.regex = None pattern = '' - use_regex = False for segment in url_pattern.lstrip('/').split('/'): if segment and segment[0] == '<': if segment[-1] != '>': @@ -822,82 +831,43 @@ class URLPattern(): type_ = 'string' name = segment parser = None - if type_ == 'string': - parser = self._string_segment - pattern += '/([^/]+)' - elif type_ == 'int': - parser = self._int_segment - pattern += '/(-?\\d+)' - elif type_ == 'path': - use_regex = True - pattern += '/(.+)' - elif type_.startswith('re:'): - use_regex = True + if type_.startswith('re:'): pattern += '/({pattern})'.format(pattern=type_[3:]) else: - raise ValueError('invalid URL segment type') + if type_ not in self.segment_patterns: + raise ValueError('invalid URL segment type') + pattern += self.segment_patterns[type_] + parser = self.segment_parsers.get(type_) self.segments.append({'parser': parser, 'name': name, 'type': type_}) else: pattern += '/' + segment - self.segments.append({'parser': self._static_segment(segment)}) - if use_regex: - import re - self.regex = re.compile('^' + pattern + '$') + self.segments.append({'parser': None}) + self.regex = re.compile('^' + pattern + '$') + + @classmethod + def register_type(cls, type_name, pattern='[^/]+', parser=None): + cls.segment_patterns[type_name] = '/({})'.format(pattern) + cls.segment_parsers[type_name] = parser def match(self, path): args = {} - if self.regex: - g = self.regex.match(path) - if not g: - return - i = 1 - for segment in self.segments: - if 'name' not in segment: - continue - value = g.group(i) - if segment['type'] == 'int': - value = int(value) - args[segment['name']] = value - i += 1 - else: - if len(path) == 0 or path[0] != '/': - return - path = path[1:] - args = {} - for segment in self.segments: - if path is None: - return - arg, path = segment['parser'](path) + g = self.regex.match(path) + if not g: + return + i = 1 + for segment in self.segments: + if 'name' not in segment: + continue + arg = g.group(i) + if segment['parser']: + arg = self.segment_parsers[segment['type']](arg) if arg is None: return - if 'name' in segment: - args[segment['name']] = arg - if path is not None: - return + args[segment['name']] = arg + i += 1 return args - def _static_segment(self, segment): - def _static(value): - s = value.split('/', 1) - if s[0] == segment: - return '', s[1] if len(s) > 1 else None - return None, None - return _static - - def _string_segment(self, value): - s = value.split('/', 1) - if len(s[0]) == 0: - return None, None - return s[0], s[1] if len(s) > 1 else None - - def _int_segment(self, value): - s = value.split('/', 1) - try: - return int(s[0]), s[1] if len(s) > 1 else None - except ValueError: - return None, None - def __repr__(self): # pragma: no cover return 'URLPattern: {}'.format(self.url_pattern) diff --git a/tests/test_url_pattern.py b/tests/test_url_pattern.py index e9b4a43..c3656bd 100644 --- a/tests/test_url_pattern.py +++ b/tests/test_url_pattern.py @@ -121,3 +121,26 @@ class TestURLPattern(unittest.TestCase): def test_invalid_url_patterns(self): self.assertRaises(ValueError, URLPattern, '/users/') + + def test_custom_url_pattern(self): + URLPattern.register_type('hex', '[0-9a-f]+') + p = URLPattern('/users/') + self.assertEqual(p.match('/users/a1'), {'id': 'a1'}) + self.assertIsNone(p.match('/users/ab12z')) + + URLPattern.register_type('hex', '[0-9a-f]+', + parser=lambda value: int(value, 16)) + p = URLPattern('/users/') + self.assertEqual(p.match('/users/a1'), {'id': 161}) + self.assertIsNone(p.match('/users/ab12z')) + + def hex_parser(value): + try: + return int(value, 16) + except ValueError: + return None + + URLPattern.register_type('hex', parser=hex_parser) + p = URLPattern('/users/') + self.assertEqual(p.match('/users/a1'), {'id': 161}) + self.assertIsNone(p.match('/users/ab12z'))