py/parsenum: Throw an exception for invalid int literals like "01".
This includes making int("01") parse in base 10 like standard Python.
When a base of 0 is specified it means auto-detect based on the prefix, and
literals begining with 0 (except when the literal is all 0's) like "01" are
then invalid and now throw an exception.
The new error message is different from CPython. It says e.g.,
`SyntaxError: invalid syntax for integer with base 0: '09'`
Additional test cases were added to cover the changed & added code.
Co-authored-by: Damien George <damien@micropython.org>
Signed-off-by: Jeff Epler <jepler@gmail.com>
This commit is contained in:
committed by
Damien George
parent
7b3f189b17
commit
13b13d1fdd
@@ -55,7 +55,7 @@ static mp_obj_t mp_obj_int_make_new(const mp_obj_type_t *type_in, size_t n_args,
|
||||
return o;
|
||||
} else if (mp_get_buffer(args[0], &bufinfo, MP_BUFFER_READ)) {
|
||||
// a textual representation, parse it
|
||||
return mp_parse_num_integer(bufinfo.buf, bufinfo.len, 0, NULL);
|
||||
return mp_parse_num_integer(bufinfo.buf, bufinfo.len, 10, NULL);
|
||||
#if MICROPY_PY_BUILTINS_FLOAT
|
||||
} else if (mp_obj_is_float(args[0])) {
|
||||
return mp_obj_new_int_from_float(mp_obj_float_get(args[0]));
|
||||
|
||||
@@ -151,13 +151,13 @@ value_error:
|
||||
raise_exc(exc, lex);
|
||||
#elif MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_NORMAL
|
||||
mp_obj_t exc = mp_obj_new_exception_msg_varg(&mp_type_ValueError,
|
||||
MP_ERROR_TEXT("invalid syntax for integer with base %d"), base);
|
||||
MP_ERROR_TEXT("invalid syntax for integer with base %d"), base == 1 ? 0 : base);
|
||||
raise_exc(exc, lex);
|
||||
#else
|
||||
vstr_t vstr;
|
||||
mp_print_t print;
|
||||
vstr_init_print(&vstr, 50, &print);
|
||||
mp_printf(&print, "invalid syntax for integer with base %d: ", base);
|
||||
mp_printf(&print, "invalid syntax for integer with base %d: ", base == 1 ? 0 : base);
|
||||
mp_str_print_quoted(&print, str_val_start, top - str_val_start, true);
|
||||
mp_obj_t exc = mp_obj_new_exception_arg1(&mp_type_ValueError,
|
||||
mp_obj_new_str_from_utf8_vstr(&vstr));
|
||||
|
||||
@@ -30,35 +30,28 @@
|
||||
|
||||
// find real radix base, and strip preceding '0x', '0o' and '0b'
|
||||
// puts base in *base, and returns number of bytes to skip the prefix
|
||||
// in base-0, puts 1 in *base to indicate a number that starts with 0, to provoke a
|
||||
// ValueError if it's not all-digits-zero.
|
||||
size_t mp_parse_num_base(const char *str, size_t len, int *base) {
|
||||
const byte *p = (const byte *)str;
|
||||
if (len <= 1) {
|
||||
goto no_prefix;
|
||||
}
|
||||
unichar c = *(p++);
|
||||
if ((*base == 0 || *base == 16) && c == '0') {
|
||||
c = *(p++);
|
||||
if ((c | 32) == 'x') {
|
||||
if (c == '0') {
|
||||
c = *(p++) | 32;
|
||||
int b = *base;
|
||||
if (c == 'x' && !(b & ~16)) {
|
||||
*base = 16;
|
||||
} else if (*base == 0 && (c | 32) == 'o') {
|
||||
} else if (c == 'o' && !(b & ~8)) {
|
||||
*base = 8;
|
||||
} else if (*base == 0 && (c | 32) == 'b') {
|
||||
} else if (c == 'b' && !(b & ~2)) {
|
||||
*base = 2;
|
||||
} else {
|
||||
if (*base == 0) {
|
||||
*base = 10;
|
||||
p -= 2;
|
||||
if (b == 0) {
|
||||
*base = 1;
|
||||
}
|
||||
p -= 2;
|
||||
}
|
||||
} else if (*base == 8 && c == '0') {
|
||||
c = *(p++);
|
||||
if ((c | 32) != 'o') {
|
||||
p -= 2;
|
||||
}
|
||||
} else if (*base == 2 && c == '0') {
|
||||
c = *(p++);
|
||||
if ((c | 32) != 'b') {
|
||||
p -= 2;
|
||||
}
|
||||
} else {
|
||||
p--;
|
||||
|
||||
@@ -13,6 +13,7 @@ print(int('1'))
|
||||
print(int('+1'))
|
||||
print(int('-1'))
|
||||
print(int('01'))
|
||||
print(int('00'))
|
||||
print(int('9'))
|
||||
print(int('10'))
|
||||
print(int('+10'))
|
||||
@@ -31,6 +32,7 @@ print(int(' -3 '))
|
||||
print(int('0', 10))
|
||||
print(int('1', 10))
|
||||
print(int(' \t 1 \t ', 10))
|
||||
print(int(' \t 00 \t ', 10))
|
||||
print(int('11', 10))
|
||||
print(int('11', 16))
|
||||
print(int('11', 8))
|
||||
@@ -52,6 +54,17 @@ print(int(' \t 0o12', 8))
|
||||
print(int('0o12 \t ', 8))
|
||||
print(int(b"12", 10))
|
||||
print(int(b"12"))
|
||||
print(int('000 ', 0))
|
||||
print(int('000 ', 2))
|
||||
print(int('000 ', 8))
|
||||
print(int('000 ', 10))
|
||||
print(int('000 ', 16))
|
||||
print(int('000 ', 36))
|
||||
print(int('010 ', 2))
|
||||
print(int('010 ', 8))
|
||||
print(int('010 ', 10))
|
||||
print(int('010 ', 16))
|
||||
print(int('010 ', 36))
|
||||
|
||||
|
||||
def test(value, base):
|
||||
@@ -79,6 +92,8 @@ test('0o8', 8)
|
||||
test('0xg', 16)
|
||||
test('1 1', 16)
|
||||
test('123', 37)
|
||||
test('01', 0)
|
||||
test('01 ', 0)
|
||||
|
||||
# check that we don't parse this as a floating point number
|
||||
print(0x1e+1)
|
||||
|
||||
@@ -83,3 +83,11 @@ try:
|
||||
exec(r"'\U0000000'")
|
||||
except SyntaxError:
|
||||
print("SyntaxError")
|
||||
|
||||
# Properly formed integer literals
|
||||
print(eval("00"))
|
||||
# badly formed integer literals
|
||||
try:
|
||||
eval("01")
|
||||
except SyntaxError:
|
||||
print("SyntaxError")
|
||||
|
||||
Reference in New Issue
Block a user