py/parsenum: Throw an exception for invalid int literals like "01".

This includes making int("01") parse in base 10 like standard Python.
When a base of 0 is specified it means auto-detect based on the prefix, and
literals begining with 0 (except when the literal is all 0's) like "01" are
then invalid and now throw an exception.

The new error message is different from CPython. It says e.g.,
`SyntaxError: invalid syntax for integer with base 0: '09'`

Additional test cases were added to cover the changed & added code.

Co-authored-by: Damien George <damien@micropython.org>
Signed-off-by: Jeff Epler <jepler@gmail.com>
This commit is contained in:
Jeff Epler
2024-01-03 19:31:35 -06:00
committed by Damien George
parent 7b3f189b17
commit 13b13d1fdd
5 changed files with 37 additions and 21 deletions

View File

@@ -55,7 +55,7 @@ static mp_obj_t mp_obj_int_make_new(const mp_obj_type_t *type_in, size_t n_args,
return o;
} else if (mp_get_buffer(args[0], &bufinfo, MP_BUFFER_READ)) {
// a textual representation, parse it
return mp_parse_num_integer(bufinfo.buf, bufinfo.len, 0, NULL);
return mp_parse_num_integer(bufinfo.buf, bufinfo.len, 10, NULL);
#if MICROPY_PY_BUILTINS_FLOAT
} else if (mp_obj_is_float(args[0])) {
return mp_obj_new_int_from_float(mp_obj_float_get(args[0]));

View File

@@ -151,13 +151,13 @@ value_error:
raise_exc(exc, lex);
#elif MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_NORMAL
mp_obj_t exc = mp_obj_new_exception_msg_varg(&mp_type_ValueError,
MP_ERROR_TEXT("invalid syntax for integer with base %d"), base);
MP_ERROR_TEXT("invalid syntax for integer with base %d"), base == 1 ? 0 : base);
raise_exc(exc, lex);
#else
vstr_t vstr;
mp_print_t print;
vstr_init_print(&vstr, 50, &print);
mp_printf(&print, "invalid syntax for integer with base %d: ", base);
mp_printf(&print, "invalid syntax for integer with base %d: ", base == 1 ? 0 : base);
mp_str_print_quoted(&print, str_val_start, top - str_val_start, true);
mp_obj_t exc = mp_obj_new_exception_arg1(&mp_type_ValueError,
mp_obj_new_str_from_utf8_vstr(&vstr));

View File

@@ -30,35 +30,28 @@
// find real radix base, and strip preceding '0x', '0o' and '0b'
// puts base in *base, and returns number of bytes to skip the prefix
// in base-0, puts 1 in *base to indicate a number that starts with 0, to provoke a
// ValueError if it's not all-digits-zero.
size_t mp_parse_num_base(const char *str, size_t len, int *base) {
const byte *p = (const byte *)str;
if (len <= 1) {
goto no_prefix;
}
unichar c = *(p++);
if ((*base == 0 || *base == 16) && c == '0') {
c = *(p++);
if ((c | 32) == 'x') {
if (c == '0') {
c = *(p++) | 32;
int b = *base;
if (c == 'x' && !(b & ~16)) {
*base = 16;
} else if (*base == 0 && (c | 32) == 'o') {
} else if (c == 'o' && !(b & ~8)) {
*base = 8;
} else if (*base == 0 && (c | 32) == 'b') {
} else if (c == 'b' && !(b & ~2)) {
*base = 2;
} else {
if (*base == 0) {
*base = 10;
p -= 2;
if (b == 0) {
*base = 1;
}
p -= 2;
}
} else if (*base == 8 && c == '0') {
c = *(p++);
if ((c | 32) != 'o') {
p -= 2;
}
} else if (*base == 2 && c == '0') {
c = *(p++);
if ((c | 32) != 'b') {
p -= 2;
}
} else {
p--;

View File

@@ -13,6 +13,7 @@ print(int('1'))
print(int('+1'))
print(int('-1'))
print(int('01'))
print(int('00'))
print(int('9'))
print(int('10'))
print(int('+10'))
@@ -31,6 +32,7 @@ print(int(' -3 '))
print(int('0', 10))
print(int('1', 10))
print(int(' \t 1 \t ', 10))
print(int(' \t 00 \t ', 10))
print(int('11', 10))
print(int('11', 16))
print(int('11', 8))
@@ -52,6 +54,17 @@ print(int(' \t 0o12', 8))
print(int('0o12 \t ', 8))
print(int(b"12", 10))
print(int(b"12"))
print(int('000 ', 0))
print(int('000 ', 2))
print(int('000 ', 8))
print(int('000 ', 10))
print(int('000 ', 16))
print(int('000 ', 36))
print(int('010 ', 2))
print(int('010 ', 8))
print(int('010 ', 10))
print(int('010 ', 16))
print(int('010 ', 36))
def test(value, base):
@@ -79,6 +92,8 @@ test('0o8', 8)
test('0xg', 16)
test('1 1', 16)
test('123', 37)
test('01', 0)
test('01 ', 0)
# check that we don't parse this as a floating point number
print(0x1e+1)

View File

@@ -83,3 +83,11 @@ try:
exec(r"'\U0000000'")
except SyntaxError:
print("SyntaxError")
# Properly formed integer literals
print(eval("00"))
# badly formed integer literals
try:
eval("01")
except SyntaxError:
print("SyntaxError")