diff --git a/microdot.py b/microdot.py index be74d5a..bfd06c9 100644 --- a/microdot.py +++ b/microdot.py @@ -219,18 +219,18 @@ class URLPattern(): raise ValueError('invalid URL pattern') segment = segment[1:-1] if ':' in segment: - type_, name = segment.split(':', 1) + type_, name = segment.rsplit(':', 1) else: type_ = 'string' name = segment if type_ == 'string': - pattern = '[^/]*' + pattern = '[^/]+' elif type_ == 'int': pattern = '\\d+' elif type_ == 'path': - pattern = '.*' - elif type_.startswith('regex'): - pattern = eval(type_[5:]) + pattern = '.+' + elif type_.startswith('re:'): + pattern = type_[3:] else: raise ValueError('invalid URL segment type') use_regex = True @@ -239,7 +239,7 @@ class URLPattern(): else: self.pattern += '/{segment}'.format(segment=segment) if use_regex: - self.pattern = re.compile(self.pattern) + self.pattern = re.compile('^' + self.pattern + '$') def match(self, path): if isinstance(self.pattern, str): diff --git a/tests/libs/datetime.py b/tests/libs/datetime.py index c33b901..cf9167f 100644 --- a/tests/libs/datetime.py +++ b/tests/libs/datetime.py @@ -1,4 +1,5 @@ """Concrete date/time and related types. + See http://www.iana.org/time-zones/repository/tz-link.html for time zone and DST data sources. """ @@ -280,14 +281,18 @@ def _cmperror(x, y): class timedelta: """Represent the difference between two datetime objects. + Supported operators: + - add, subtract timedelta - unary plus, minus, abs - compare to timedelta - multiply, divide by int + In addition, datetime supports subtraction of two datetime objects returning a timedelta, and addition or subtraction of a datetime and a timedelta giving a datetime. + Representation: (days, seconds, microseconds). Why? Because I felt like it. """ @@ -612,22 +617,29 @@ timedelta.resolution = timedelta(microseconds=1) class date: """Concrete date type. + Constructors: + __new__() fromtimestamp() today() fromordinal() + Operators: + __repr__, __str__ __cmp__, __hash__ __add__, __radd__, __sub__ (add/radd only with timedelta arg) + Methods: + timetuple() toordinal() weekday() isoweekday(), isocalendar(), isoformat() ctime() strftime() + Properties (readonly): year, month, day """ @@ -635,7 +647,9 @@ class date: def __new__(cls, year, month=None, day=None): """Constructor. + Arguments: + year, month, day (required, base 1) """ if (isinstance(year, bytes) and len(year) == 4 and @@ -668,6 +682,7 @@ class date: @classmethod def fromordinal(cls, n): """Contruct a date from a proleptic Gregorian ordinal. + January 1 of year 1 is day 1. Only the year, month and day are non-zero in the result. """ @@ -678,9 +693,11 @@ class date: def __repr__(self): """Convert to formal string, for repr(). + >>> dt = datetime(2010, 1, 1) >>> repr(dt) 'datetime.datetime(2010, 1, 1, 0, 0)' + >>> dt = datetime(2010, 1, 1, tzinfo=timezone.utc) >>> repr(dt) 'datetime.datetime(2010, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)' @@ -714,7 +731,9 @@ class date: def isoformat(self): """Return the date formatted according to ISO. + This is 'YYYY-MM-DD'. + References: - http://www.w3.org/TR/NOTE-datetime - http://www.cl.cam.ac.uk/~mgk25/iso-time.html @@ -748,6 +767,7 @@ class date: def toordinal(self): """Return proleptic Gregorian ordinal for the year, month and day. + January 1 of year 1 is day 1. Only the year, month and day values contribute to the result. """ @@ -842,10 +862,13 @@ class date: def isocalendar(self): """Return a 3-tuple containing ISO year, week number, and weekday. + The first ISO week of the year is the (Mon-Sun) week containing the year's first Thursday; everything else derives from that. + The first week is 1; Monday is 1 ... Sunday is 7. + ISO calendar algorithm taken from http://www.phys.uu.nl/~vgent/calendar/isocalendar.htm """ @@ -887,9 +910,15 @@ date.resolution = timedelta(days=1) class tzinfo: """Abstract base class for time zone info classes. + Subclasses must override the name(), utcoffset() and dst() methods. """ __slots__ = () + + def __new__(cls): + self = object.__new__(cls) + return self + def tzname(self, dt): "datetime -> string name of time zone." raise NotImplementedError("tzinfo subclass must override tzname()") @@ -900,6 +929,7 @@ class tzinfo: def dst(self, dt): """datetime -> DST offset in minutes east of UTC. + Return 0 if DST not in effect. utcoffset() must include the DST offset. """ @@ -954,24 +984,33 @@ _tzinfo_class = tzinfo class time: """Time with time zone. + Constructors: + __new__() + Operators: + __repr__, __str__ __cmp__, __hash__ + Methods: + strftime() isoformat() utcoffset() tzname() dst() + Properties (readonly): hour, minute, second, microsecond, tzinfo """ def __new__(cls, hour=0, minute=0, second=0, microsecond=0, tzinfo=None): """Constructor. + Arguments: + hour, minute (required) second, microsecond (default to zero) tzinfo (default to None) @@ -1132,6 +1171,7 @@ class time: def isoformat(self): """Return the time formatted according to ISO. + This is 'HH:MM:SS.mmmmmm+zz:zz', or 'HH:MM:SS+zz:zz' if self.microsecond == 0. """ @@ -1173,6 +1213,7 @@ class time: def tzname(self): """Return the timezone name. + Note that the name is 100% informational -- there's no requirement that it mean anything in particular. For example, "GMT", "UTC", "-500", "-5:00", "EDT", "US/Eastern", "America/New York" are all valid replies. @@ -1186,6 +1227,7 @@ class time: def dst(self): """Return 0 if DST is not in effect, or the DST offset (in minutes eastward) if DST is in effect. + This is purely informational; the DST offset has already been added to the UTC offset returned by utcoffset() if applicable, so there's no need to consult dst() unless you're interested in displaying the DST @@ -1254,6 +1296,7 @@ time.resolution = timedelta(microseconds=1) class datetime(date): """datetime(year, month, day[, hour[, minute[, second[, microsecond[,tzinfo]]]]]) + The year, month and day arguments are required. tzinfo may be None, or an instance of a tzinfo subclass. The remaining arguments may be ints. """ @@ -1307,6 +1350,7 @@ class datetime(date): @classmethod def fromtimestamp(cls, t, tz=None): """Construct a datetime from a POSIX timestamp (like time.time()). + A timezone info object may be passed in as well. """ @@ -1503,10 +1547,13 @@ class datetime(date): def isoformat(self, sep='T'): """Return the time formatted according to ISO. + This is 'YYYY-MM-DD HH:MM:SS.mmmmmm', or 'YYYY-MM-DD HH:MM:SS' if self.microsecond == 0. + If self.tzinfo is not None, the UTC offset is also attached, giving 'YYYY-MM-DD HH:MM:SS.mmmmmm+HH:MM' or 'YYYY-MM-DD HH:MM:SS+HH:MM'. + Optional argument sep specifies the separator between date and time, default 'T'. """ @@ -1563,6 +1610,7 @@ class datetime(date): def tzname(self): """Return the timezone name. + Note that the name is 100% informational -- there's no requirement that it mean anything in particular. For example, "GMT", "UTC", "-500", "-5:00", "EDT", "US/Eastern", "America/New York" are all valid replies. @@ -1574,6 +1622,7 @@ class datetime(date): def dst(self): """Return 0 if DST is not in effect, or the DST offset (in minutes eastward) if DST is in effect. + This is purely informational; the DST offset has already been added to the UTC offset returned by utcoffset() if applicable, so there's no need to consult dst() unless you're interested in displaying the DST @@ -1789,7 +1838,7 @@ class timezone(tzinfo): @classmethod def _create(cls, offset, name=None): - self = object.__new__(cls) + self = tzinfo.__new__(cls) self._offset = offset self._name = name return self @@ -1810,6 +1859,7 @@ class timezone(tzinfo): def __repr__(self): """Convert to formal string, for repr(). + >>> tz = timezone.utc >>> repr(tz) 'datetime.timezone.utc' @@ -1883,48 +1933,71 @@ Some time zone algebra. For a datetime x, let x.d = x.dst(), and assuming that doesn't raise an exception or return None x.s = x's standard offset, x.o - x.d + Now some derived rules, where k is a duration (timedelta). + 1. x.o = x.s + x.d This follows from the definition of x.s. + 2. If x and y have the same tzinfo member, x.s = y.s. This is actually a requirement, an assumption we need to make about sane tzinfo classes. + 3. The naive UTC time corresponding to x is x.n - x.o. This is again a requirement for a sane tzinfo class. + 4. (x+k).s = x.s This follows from #2, and that datimetimetz+timedelta preserves tzinfo. + 5. (x+k).n = x.n + k Again follows from how arithmetic is defined. + Now we can explain tz.fromutc(x). Let's assume it's an interesting case (meaning that the various tzinfo methods exist, and don't blow up or return None when called). + The function wants to return a datetime y with timezone tz, equivalent to x. x is already in UTC. + By #3, we want + y.n - y.o = x.n [1] + The algorithm starts by attaching tz to x.n, and calling that y. So x.n = y.n at the start. Then it wants to add a duration k to y, so that [1] becomes true; in effect, we want to solve [2] for k: + (y+k).n - (y+k).o = x.n [2] + By #1, this is the same as + (y+k).n - ((y+k).s + (y+k).d) = x.n [3] + By #5, (y+k).n = y.n + k, which equals x.n + k because x.n=y.n at the start. Substituting that into [3], + x.n + k - (y+k).s - (y+k).d = x.n; the x.n terms cancel, leaving k - (y+k).s - (y+k).d = 0; rearranging, k = (y+k).s - (y+k).d; by #4, (y+k).s == y.s, so k = y.s - (y+k).d + On the RHS, (y+k).d can't be computed directly, but y.s can be, and we approximate k by ignoring the (y+k).d term at first. Note that k can't be very large, since all offset-returning methods return a duration of magnitude less than 24 hours. For that reason, if y is firmly in std time, (y+k).d must be 0, so ignoring it has no consequence then. + In any case, the new value is + z = y + y.s [4] + It's helpful to step back at look at [4] from a higher level: it's simply mapping from UTC to tz's standard time. + At this point, if + z.n - z.o = x.n [5] + we have an equivalent time, and are almost done. The insecurity here is at the start of daylight time. Picture US Eastern for concreteness. The wall time jumps from 1:59 to 3:00, and wall hours of the form 2:MM don't make good @@ -1932,10 +2005,13 @@ sense then. The docs ask that an Eastern tzinfo class consider such a time to be EDT (because it's "after 2"), which is a redundant spelling of 1:MM EST on the day DST starts. We want to return the 1:MM EST spelling because that's the only spelling that makes sense on the local wall clock. + In fact, if [5] holds at this point, we do have the standard-time spelling, but that takes a bit of proof. We first prove a stronger result. What's the difference between the LHS and RHS of [5]? Let + diff = x.n - (z.n - z.o) [6] + Now z.n = by [4] (y + y.s).n = by #5 @@ -1943,28 +2019,39 @@ Now x.n + y.s = since z and y are have the same tzinfo member, y.s = z.s by #2 x.n + z.s + Plugging that back into [6] gives + diff = x.n - ((x.n + z.s) - z.o) = expanding x.n - x.n - z.s + z.o = cancelling - z.s + z.o = by #2 z.d + So diff = z.d. + If [5] is true now, diff = 0, so z.d = 0 too, and we have the standard-time spelling we wanted in the endcase described above. We're done. Contrarily, if z.d = 0, then we have a UTC equivalent, and are also done. + If [5] is not true now, diff = z.d != 0, and z.d is the offset we need to add to z (in effect, z is in tz's standard time, and we need to shift the local clock into tz's daylight time). + Let + z' = z + z.d = z + diff [7] + and we can again ask whether + z'.n - z'.o = x.n [8] + If so, we're done. If not, the tzinfo class is insane, according to the assumptions we've made. This also requires a bit of proof. As before, let's compute the difference between the LHS and RHS of [8] (and skipping some of the justifications for the kinds of substitutions we've done several times already): + diff' = x.n - (z'.n - z'.o) = replacing z'.n via [7] x.n - (z.n + diff - z'.o) = replacing diff via [6] x.n - (z.n + x.n - (z.n - z.o) - z'.o) = @@ -1973,13 +2060,16 @@ already): - z.o + z'.o = #1 twice -z.s - z.d + z'.s + z'.d = z and z' have same tzinfo z'.d - z.d + So z' is UTC-equivalent to x iff z'.d = z.d at this point. If they are equal, we've found the UTC-equivalent so are done. In fact, we stop with [7] and return z', not bothering to compute z'.d. + How could z.d and z'd differ? z' = z + z.d [7], so merely moving z' by a dst() offset, and starting *from* a time already in DST (we know z.d != 0), would have to change the result dst() returns: we start in DST, and moving a little further into it takes us out of DST. + There isn't a sane case where this can happen. The closest it gets is at the end of DST, where there's an hour in UTC with no spelling in a hybrid tzinfo class. In US Eastern, that's 5:MM UTC = 0:MM EST = 1:MM EDT. During @@ -1990,10 +2080,12 @@ clock jumps from 1:59 back to 1:00 again, and repeats the 1:MM hour in standard time. Since that's what the local clock *does*, we want to map both UTC hours 5:MM and 6:MM to 1:MM Eastern. The result is ambiguous in local time, but so it goes -- it's the way the local clock works. + When x = 5:MM UTC is the input to this algorithm, x.o=0, y.o=-5 and y.d=0, so z=0:MM. z.d=60 (minutes) then, so [5] doesn't hold and we keep going. z' = z + z.d = 1:MM then, and z'.d=0, and z'.d - z.d = -60 != 0 so [8] (correctly) concludes that z' is not UTC-equivalent to x. + Because we know z.d said z was in daylight time (else [5] would have held and we would have stopped then), and we know z.d != z'.d (else [8] would have held and we have stopped then), and there are only 2 possible values dst() can @@ -2001,21 +2093,27 @@ return in Eastern, it follows that z'.d must be 0 (which it is in the example, but the reasoning doesn't depend on the example -- it depends on there being two possible dst() outcomes, one zero and the other non-zero). Therefore z' must be in standard time, and is the spelling we want in this case. + Note again that z' is not UTC-equivalent as far as the hybrid tzinfo class is concerned (because it takes z' as being in standard time rather than the daylight time we intend here), but returning it gives the real-life "local clock repeats an hour" behavior when mapping the "unspellable" UTC hour into tz. + When the input is 6:MM, z=1:MM and z.d=0, and we stop at once, again with the 1:MM standard time spelling we want. + So how can this break? One of the assumptions must be violated. Two possibilities: + 1) [2] effectively says that y.s is invariant across all y belong to a given time zone. This isn't true if, for political reasons or continental drift, a region decides to change its base offset from UTC. + 2) There may be versions of "double daylight" time where the tail end of the analysis gives up a step too early. I haven't thought about that enough to say. + In any case, it's clear that the default fromutc() is strong enough to handle "almost all" time zones: so long as the standard offset is invariant, it doesn't matter if daylight time transition points change from year to year, or diff --git a/tests/test_url_pattern.py b/tests/test_url_pattern.py index 564d992..411ba77 100644 --- a/tests/test_url_pattern.py +++ b/tests/test_url_pattern.py @@ -1,6 +1,98 @@ import unittest +from microdot import URLPattern class TestURLPattern(unittest.TestCase): - def test_foo(self): - pass + def test_static(self): + p = URLPattern('/') + self.assertEqual(p.match('/'), {}) + self.assertIsNone(p.match('/foo')) + + p = URLPattern('/foo/bar') + self.assertEqual(p.match('/foo/bar'), {}) + self.assertIsNone(p.match('/foo')) + self.assertIsNone(p.match('/foo/bar/')) + + p = URLPattern('/foo//bar/baz/') + self.assertEqual(p.match('/foo//bar/baz/'), {}) + self.assertIsNone(p.match('/foo/bar/baz/')) + self.assertIsNone(p.match('/foo')) + self.assertIsNone(p.match('/foo/bar/baz')) + + def test_string_argument(self): + p = URLPattern('/') + self.assertEqual(p.match('/foo'), {'arg': 'foo'}) + self.assertIsNone(p.match('/')) + self.assertIsNone(p.match('/foo/')) + + p = URLPattern('//') + self.assertEqual(p.match('/foo/'), {'arg': 'foo'}) + self.assertIsNone(p.match('/')) + self.assertIsNone(p.match('/foo')) + + p = URLPattern('/') + self.assertEqual(p.match('/foo'), {'arg': 'foo'}) + self.assertIsNone(p.match('/')) + self.assertIsNone(p.match('/foo/')) + + p = URLPattern('//') + self.assertEqual(p.match('/foo/'), {'arg': 'foo'}) + self.assertIsNone(p.match('/')) + self.assertIsNone(p.match('/foo')) + + p = URLPattern('/foo//bar/') + self.assertEqual(p.match('/foo/one/bar/two'), + {'arg1': 'one', 'arg2': 'two'}) + self.assertIsNone(p.match('/')) + self.assertIsNone(p.match('/foo/')) + + def test_int_argument(self): + p = URLPattern('/users/') + self.assertEqual(p.match('/users/123'), {'id': 123}) + self.assertIsNone(p.match('/users/')) + self.assertIsNone(p.match('/users/abc')) + self.assertIsNone(p.match('/users/123abc')) + self.assertIsNone(p.match('/users/123/abc')) + + p = URLPattern('/users///') + self.assertEqual(p.match('/users/123/456/'), {'id': 123, 'id2': 456}) + self.assertIsNone(p.match('/users/')) + self.assertIsNone(p.match('/users/123/456')) + self.assertIsNone(p.match('/users/123/abc/')) + self.assertIsNone(p.match('/users/123/456/abc')) + + def test_path_argument(self): + p = URLPattern('/users/') + self.assertEqual(p.match('/users/123'), {'path': '123'}) + self.assertEqual(p.match('/users/123/'), {'path': '123/'}) + self.assertEqual(p.match('/users/abc/def'), {'path': 'abc/def'}) + self.assertIsNone(p.match('/users/')) + + p = URLPattern('/users//foo') + self.assertEqual(p.match('/users/123/foo'), {'path': '123'}) + self.assertEqual(p.match('/users/foo/foo'), {'path': 'foo'}) + self.assertEqual(p.match('/users/abc/def/foo'), {'path': 'abc/def'}) + self.assertIsNone(p.match('/users/')) + self.assertIsNone(p.match('/users/foo')) + self.assertIsNone(p.match('/users/foo/')) + + def test_regex_argument(self): + p = URLPattern('/users/') + print(p.pattern) + self.assertEqual(p.match('/users/ab'), {'id': 'ab'}) + self.assertEqual(p.match('/users/bca'), {'id': 'bca'}) + self.assertIsNone(p.match('/users/abcd')) + + def test_many_arguments(self): + p = URLPattern('/foo///bar/') + self.assertEqual(p.match('/foo/abc/def/123/bar/test'), + {'path': 'abc/def', 'id': 123, 'name': 'test'}) + self.assertIsNone(p.match('/foo/123/bar/test')) + self.assertIsNone(p.match('/foo/abc/def/ghi/bar/test')) + self.assertIsNone(p.match('/foo/abc/def/123/bar')) + self.assertIsNone(p.match('/foo/abc/def/123/bar/')) + self.assertIsNone(p.match('/foo/abc/def/123/test')) + + def test_invalid_url_patterns(self): + self.assertRaises(ValueError, URLPattern, '/users/')