From c92b5ae28222af5a1094f5d2f70a45d4d17653d5 Mon Sep 17 00:00:00 2001
From: Miguel Grinberg <miguel.grinberg@gmail.com>
Date: Sun, 2 Mar 2025 00:47:54 +0000
Subject: [PATCH] Redesigned the URL parser to allow for custom path components

---
 docs/intro.rst            | 44 ++++++++++++++++--
 src/microdot/microdot.py  | 98 ++++++++++++++-------------------------
 tests/test_url_pattern.py | 23 +++++++++
 3 files changed, 97 insertions(+), 68 deletions(-)

diff --git a/docs/intro.rst b/docs/intro.rst
index f84122c..b381d93 100644
--- a/docs/intro.rst
+++ b/docs/intro.rst
@@ -329,15 +329,51 @@ URL::
     async def get_test(request, path):
         return 'Test: ' + path
 
-For the most control, the ``re`` type allows the application to provide a
-custom regular expression for the dynamic component. The next example defines
-a route that only matches usernames that begin with an upper or lower case
-letter, followed by a sequence of letters or numbers::
+The ``re`` type allows the application to provide a custom regular expression
+for the dynamic component. The next example defines a route that only matches
+usernames that begin with an upper or lower case letter, followed by a sequence
+of letters or numbers::
 
     @app.get('/users/<re:[a-zA-Z][a-zA-Z0-9]*:username>')
     async def get_user(request, username):
         return 'User: ' + username
 
+The ``re`` type returns the URL component as a string, which sometimes may not
+be the most convenient. In such cases, the application can register a custom
+URL component type and provide a parser function. In the following example,
+a ``hex`` custom type is registered to automatically convert the arguments to
+numbers::
+
+    from microdot import URLPattern
+
+    URLPattern.register_type('hex', parser=lambda value: int(value, 16))
+
+    @app.get('/users/<hex:user_id>')
+    async def get_user(request, user_id):
+        user = get_user_by_id(user_id)
+        # ...
+
+In addition to the parser, the custom URL component can include a valid pattern
+as a regular expression. When a pattern is provided, the URL component will
+only match if the regular expression matches the value passed in the URL. The
+``hex`` example above can be expanded with a pattern as follows::
+
+    URLPattern.register_type('hex', pattern='[0-9a-fA-F]+',
+                             parser=lambda value: int(value, 16))
+
+In cases where a pattern isn't provided, or when the pattern is unable to
+filter all invalid values, the parser function can return ``None`` to indicate
+a failed match. The next example shows how the ``hex`` type can be expanded to
+do that::
+
+    def hex_parser(value):
+        try:
+            return int(value, 16)
+        except ValueError:
+            return None
+
+    URLPattern.register_type('hex', pattern='[0-9a-fA-F]+', parser=hex_parser)
+
 .. note::
    Dynamic path components are passed to route functions as keyword arguments,
    so the names of the function arguments must match the names declared in the
diff --git a/src/microdot/microdot.py b/src/microdot/microdot.py
index 061e12b..e8bdcaa 100644
--- a/src/microdot/microdot.py
+++ b/src/microdot/microdot.py
@@ -8,6 +8,7 @@ servers for MicroPython and standard Python.
 import asyncio
 import io
 import json
+import re
 import time
 
 try:
@@ -805,12 +806,20 @@ class Response:
 
 
 class URLPattern():
+    segment_patterns = {
+        'string': '/([^/]+)',
+        'int': '/(-?\\d+)',
+        'path': '/(.+)',
+    }
+    segment_parsers = {
+        'int': lambda value: int(value),
+    }
+
     def __init__(self, url_pattern):
         self.url_pattern = url_pattern
         self.segments = []
         self.regex = None
         pattern = ''
-        use_regex = False
         for segment in url_pattern.lstrip('/').split('/'):
             if segment and segment[0] == '<':
                 if segment[-1] != '>':
@@ -822,82 +831,43 @@ class URLPattern():
                     type_ = 'string'
                     name = segment
                 parser = None
-                if type_ == 'string':
-                    parser = self._string_segment
-                    pattern += '/([^/]+)'
-                elif type_ == 'int':
-                    parser = self._int_segment
-                    pattern += '/(-?\\d+)'
-                elif type_ == 'path':
-                    use_regex = True
-                    pattern += '/(.+)'
-                elif type_.startswith('re:'):
-                    use_regex = True
+                if type_.startswith('re:'):
                     pattern += '/({pattern})'.format(pattern=type_[3:])
                 else:
-                    raise ValueError('invalid URL segment type')
+                    if type_ not in self.segment_patterns:
+                        raise ValueError('invalid URL segment type')
+                    pattern += self.segment_patterns[type_]
+                    parser = self.segment_parsers.get(type_)
                 self.segments.append({'parser': parser, 'name': name,
                                       'type': type_})
             else:
                 pattern += '/' + segment
-                self.segments.append({'parser': self._static_segment(segment)})
-        if use_regex:
-            import re
-            self.regex = re.compile('^' + pattern + '$')
+                self.segments.append({'parser': None})
+        self.regex = re.compile('^' + pattern + '$')
+
+    @classmethod
+    def register_type(cls, type_name, pattern='[^/]+', parser=None):
+        cls.segment_patterns[type_name] = '/({})'.format(pattern)
+        cls.segment_parsers[type_name] = parser
 
     def match(self, path):
         args = {}
-        if self.regex:
-            g = self.regex.match(path)
-            if not g:
-                return
-            i = 1
-            for segment in self.segments:
-                if 'name' not in segment:
-                    continue
-                value = g.group(i)
-                if segment['type'] == 'int':
-                    value = int(value)
-                args[segment['name']] = value
-                i += 1
-        else:
-            if len(path) == 0 or path[0] != '/':
-                return
-            path = path[1:]
-            args = {}
-            for segment in self.segments:
-                if path is None:
-                    return
-                arg, path = segment['parser'](path)
+        g = self.regex.match(path)
+        if not g:
+            return
+        i = 1
+        for segment in self.segments:
+            if 'name' not in segment:
+                continue
+            arg = g.group(i)
+            if segment['parser']:
+                arg = self.segment_parsers[segment['type']](arg)
                 if arg is None:
                     return
-                if 'name' in segment:
-                    args[segment['name']] = arg
-            if path is not None:
-                return
+            args[segment['name']] = arg
+            i += 1
         return args
 
-    def _static_segment(self, segment):
-        def _static(value):
-            s = value.split('/', 1)
-            if s[0] == segment:
-                return '', s[1] if len(s) > 1 else None
-            return None, None
-        return _static
-
-    def _string_segment(self, value):
-        s = value.split('/', 1)
-        if len(s[0]) == 0:
-            return None, None
-        return s[0], s[1] if len(s) > 1 else None
-
-    def _int_segment(self, value):
-        s = value.split('/', 1)
-        try:
-            return int(s[0]), s[1] if len(s) > 1 else None
-        except ValueError:
-            return None, None
-
     def __repr__(self):  # pragma: no cover
         return 'URLPattern: {}'.format(self.url_pattern)
 
diff --git a/tests/test_url_pattern.py b/tests/test_url_pattern.py
index e9b4a43..c3656bd 100644
--- a/tests/test_url_pattern.py
+++ b/tests/test_url_pattern.py
@@ -121,3 +121,26 @@ class TestURLPattern(unittest.TestCase):
     def test_invalid_url_patterns(self):
         self.assertRaises(ValueError, URLPattern, '/users/<foo/bar')
         self.assertRaises(ValueError, URLPattern, '/users/<badtype:id>')
+
+    def test_custom_url_pattern(self):
+        URLPattern.register_type('hex', '[0-9a-f]+')
+        p = URLPattern('/users/<hex:id>')
+        self.assertEqual(p.match('/users/a1'), {'id': 'a1'})
+        self.assertIsNone(p.match('/users/ab12z'))
+
+        URLPattern.register_type('hex', '[0-9a-f]+',
+                                 parser=lambda value: int(value, 16))
+        p = URLPattern('/users/<hex:id>')
+        self.assertEqual(p.match('/users/a1'), {'id': 161})
+        self.assertIsNone(p.match('/users/ab12z'))
+
+        def hex_parser(value):
+            try:
+                return int(value, 16)
+            except ValueError:
+                return None
+
+        URLPattern.register_type('hex', parser=hex_parser)
+        p = URLPattern('/users/<hex:id>')
+        self.assertEqual(p.match('/users/a1'), {'id': 161})
+        self.assertIsNone(p.match('/users/ab12z'))