extmod/modure: Convert byte offsets to unicode indices when necessary.

And add a test.

Fixes issue #9202.

Signed-off-by: Jeff Epler <jepler@gmail.com>
This commit is contained in:
Jeff Epler
2022-09-05 07:58:04 -05:00
committed by Damien George
parent 719dbbf563
commit e90b85cc98
2 changed files with 48 additions and 0 deletions

View File

@@ -0,0 +1,32 @@
# test match.span() for unicode strings
try:
import ure as re
except ImportError:
try:
import re
except ImportError:
print("SKIP")
raise SystemExit
try:
m = re.match(".", "a")
m.span
except AttributeError:
print("SKIP")
raise SystemExit
def print_spans(match):
print("----")
try:
i = 0
while True:
print(match.span(i), match.start(i), match.end(i))
i += 1
except IndexError:
pass
m = re.match(r"([0-9]*)(([a-z]*)([0-9]*))", "1234\u2764567")
print_spans(m)