py/objint_longlong: Add arithmetic overflow checks.
Long long big integer support now raises an exception on overflow rather than returning an undefined result. Also adds an error when shifting by a negative value. The new arithmetic checks are added in the misc.h header. This work was funded through GitHub Sponsors. Signed-off-by: Angus Gratton <angus@redyak.com.au>
This commit is contained in:
committed by
Damien George
parent
d07f103d68
commit
516aa02104
105
py/misc.h
105
py/misc.h
@@ -33,10 +33,15 @@
|
|||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
|
#include <limits.h>
|
||||||
|
|
||||||
typedef unsigned char byte;
|
typedef unsigned char byte;
|
||||||
typedef unsigned int uint;
|
typedef unsigned int uint;
|
||||||
|
|
||||||
|
#ifndef __has_builtin
|
||||||
|
#define __has_builtin(x) (0)
|
||||||
|
#endif
|
||||||
|
|
||||||
/** generic ops *************************************************/
|
/** generic ops *************************************************/
|
||||||
|
|
||||||
#ifndef MIN
|
#ifndef MIN
|
||||||
@@ -374,26 +379,23 @@ static inline bool mp_check(bool value) {
|
|||||||
static inline uint32_t mp_popcount(uint32_t x) {
|
static inline uint32_t mp_popcount(uint32_t x) {
|
||||||
return __popcnt(x);
|
return __popcnt(x);
|
||||||
}
|
}
|
||||||
#else
|
#else // _MSC_VER
|
||||||
#define mp_clz(x) __builtin_clz(x)
|
#define mp_clz(x) __builtin_clz(x)
|
||||||
#define mp_clzl(x) __builtin_clzl(x)
|
#define mp_clzl(x) __builtin_clzl(x)
|
||||||
#define mp_clzll(x) __builtin_clzll(x)
|
#define mp_clzll(x) __builtin_clzll(x)
|
||||||
#define mp_ctz(x) __builtin_ctz(x)
|
#define mp_ctz(x) __builtin_ctz(x)
|
||||||
#define mp_check(x) (x)
|
#define mp_check(x) (x)
|
||||||
#if defined __has_builtin
|
|
||||||
#if __has_builtin(__builtin_popcount)
|
#if __has_builtin(__builtin_popcount)
|
||||||
#define mp_popcount(x) __builtin_popcount(x)
|
#define mp_popcount(x) __builtin_popcount(x)
|
||||||
#endif
|
#else
|
||||||
#endif
|
|
||||||
#if !defined(mp_popcount)
|
|
||||||
static inline uint32_t mp_popcount(uint32_t x) {
|
static inline uint32_t mp_popcount(uint32_t x) {
|
||||||
x = x - ((x >> 1) & 0x55555555);
|
x = x - ((x >> 1) & 0x55555555);
|
||||||
x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
|
x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
|
||||||
x = (x + (x >> 4)) & 0x0F0F0F0F;
|
x = (x + (x >> 4)) & 0x0F0F0F0F;
|
||||||
return (x * 0x01010101) >> 24;
|
return (x * 0x01010101) >> 24;
|
||||||
}
|
}
|
||||||
#endif
|
#endif // __has_builtin(__builtin_popcount)
|
||||||
#endif
|
#endif // _MSC_VER
|
||||||
|
|
||||||
#define MP_FIT_UNSIGNED(bits, value) (((value) & (~0U << (bits))) == 0)
|
#define MP_FIT_UNSIGNED(bits, value) (((value) & (~0U << (bits))) == 0)
|
||||||
#define MP_FIT_SIGNED(bits, value) \
|
#define MP_FIT_SIGNED(bits, value) \
|
||||||
@@ -426,4 +428,93 @@ static inline uint32_t mp_clz_mpi(mp_int_t x) {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Overflow-checked operations for long long
|
||||||
|
|
||||||
|
// Integer overflow builtins were added to GCC 5, but __has_builtin only in GCC 10
|
||||||
|
//
|
||||||
|
// Note that the builtins has a defined result when overflow occurs, whereas the custom
|
||||||
|
// functions below don't update the result if an overflow would occur (to avoid UB).
|
||||||
|
#define MP_GCC_HAS_BUILTIN_OVERFLOW (__GNUC__ >= 5)
|
||||||
|
|
||||||
|
#if __has_builtin(__builtin_umulll_overflow) || MP_GCC_HAS_BUILTIN_OVERFLOW
|
||||||
|
#define mp_mul_ull_overflow __builtin_umulll_overflow
|
||||||
|
#else
|
||||||
|
inline static bool mp_mul_ull_overflow(unsigned long long int x, unsigned long long int y, unsigned long long int *res) {
|
||||||
|
if (y > 0 && x > (ULLONG_MAX / y)) {
|
||||||
|
return true; // overflow
|
||||||
|
}
|
||||||
|
*res = x * y;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if __has_builtin(__builtin_smulll_overflow) || MP_GCC_HAS_BUILTIN_OVERFLOW
|
||||||
|
#define mp_mul_ll_overflow __builtin_smulll_overflow
|
||||||
|
#else
|
||||||
|
inline static bool mp_mul_ll_overflow(long long int x, long long int y, long long int *res) {
|
||||||
|
bool overflow;
|
||||||
|
|
||||||
|
// Check for multiply overflow; see CERT INT32-C
|
||||||
|
if (x > 0) { // x is positive
|
||||||
|
if (y > 0) { // x and y are positive
|
||||||
|
overflow = (x > (LLONG_MAX / y));
|
||||||
|
} else { // x positive, y nonpositive
|
||||||
|
overflow = (y < (LLONG_MIN / x));
|
||||||
|
} // x positive, y nonpositive
|
||||||
|
} else { // x is nonpositive
|
||||||
|
if (y > 0) { // x is nonpositive, y is positive
|
||||||
|
overflow = (x < (LLONG_MIN / y));
|
||||||
|
} else { // x and y are nonpositive
|
||||||
|
overflow = (x != 0 && y < (LLONG_MAX / x));
|
||||||
|
} // End if x and y are nonpositive
|
||||||
|
} // End if x is nonpositive
|
||||||
|
|
||||||
|
if (!overflow) {
|
||||||
|
*res = x * y;
|
||||||
|
}
|
||||||
|
|
||||||
|
return overflow;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if __has_builtin(__builtin_saddll_overflow) || MP_GCC_HAS_BUILTIN_OVERFLOW
|
||||||
|
#define mp_add_ll_overflow __builtin_saddll_overflow
|
||||||
|
#else
|
||||||
|
inline static bool mp_add_ll_overflow(long long int lhs, long long int rhs, long long int *res) {
|
||||||
|
bool overflow;
|
||||||
|
|
||||||
|
if (rhs > 0) {
|
||||||
|
overflow = (lhs > LLONG_MAX - rhs);
|
||||||
|
} else {
|
||||||
|
overflow = (lhs < LLONG_MIN - rhs);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!overflow) {
|
||||||
|
*res = lhs + rhs;
|
||||||
|
}
|
||||||
|
|
||||||
|
return overflow;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if __has_builtin(__builtin_ssubll_overflow) || MP_GCC_HAS_BUILTIN_OVERFLOW
|
||||||
|
#define mp_sub_ll_overflow __builtin_ssubll_overflow
|
||||||
|
#else
|
||||||
|
inline static bool mp_sub_ll_overflow(long long int lhs, long long int rhs, long long int *res) {
|
||||||
|
bool overflow;
|
||||||
|
|
||||||
|
if (rhs > 0) {
|
||||||
|
overflow = (lhs < LLONG_MIN + rhs);
|
||||||
|
} else {
|
||||||
|
overflow = (lhs > LLONG_MAX + rhs);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!overflow) {
|
||||||
|
*res = lhs - rhs;
|
||||||
|
}
|
||||||
|
|
||||||
|
return overflow;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif // MICROPY_INCLUDED_PY_MISC_H
|
#endif // MICROPY_INCLUDED_PY_MISC_H
|
||||||
|
|||||||
@@ -31,6 +31,7 @@
|
|||||||
#include "py/smallint.h"
|
#include "py/smallint.h"
|
||||||
#include "py/objint.h"
|
#include "py/objint.h"
|
||||||
#include "py/runtime.h"
|
#include "py/runtime.h"
|
||||||
|
#include "py/misc.h"
|
||||||
|
|
||||||
#if MICROPY_PY_BUILTINS_FLOAT
|
#if MICROPY_PY_BUILTINS_FLOAT
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
@@ -43,6 +44,10 @@
|
|||||||
const mp_obj_int_t mp_sys_maxsize_obj = {{&mp_type_int}, MP_SSIZE_MAX};
|
const mp_obj_int_t mp_sys_maxsize_obj = {{&mp_type_int}, MP_SSIZE_MAX};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static void raise_long_long_overflow(void) {
|
||||||
|
mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("result overflows long long storage"));
|
||||||
|
}
|
||||||
|
|
||||||
mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf) {
|
mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf) {
|
||||||
int delta = 1;
|
int delta = 1;
|
||||||
if (!big_endian) {
|
if (!big_endian) {
|
||||||
@@ -120,7 +125,6 @@ mp_obj_t mp_obj_int_unary_op(mp_unary_op_t op, mp_obj_t o_in) {
|
|||||||
// small int if the value fits without truncation
|
// small int if the value fits without truncation
|
||||||
case MP_UNARY_OP_HASH:
|
case MP_UNARY_OP_HASH:
|
||||||
return MP_OBJ_NEW_SMALL_INT((mp_int_t)o->val);
|
return MP_OBJ_NEW_SMALL_INT((mp_int_t)o->val);
|
||||||
|
|
||||||
case MP_UNARY_OP_POSITIVE:
|
case MP_UNARY_OP_POSITIVE:
|
||||||
return o_in;
|
return o_in;
|
||||||
case MP_UNARY_OP_NEGATIVE:
|
case MP_UNARY_OP_NEGATIVE:
|
||||||
@@ -147,6 +151,8 @@ mp_obj_t mp_obj_int_unary_op(mp_unary_op_t op, mp_obj_t o_in) {
|
|||||||
mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
|
mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
|
||||||
long long lhs_val;
|
long long lhs_val;
|
||||||
long long rhs_val;
|
long long rhs_val;
|
||||||
|
bool overflow = false;
|
||||||
|
long long result;
|
||||||
|
|
||||||
if (mp_obj_is_small_int(lhs_in)) {
|
if (mp_obj_is_small_int(lhs_in)) {
|
||||||
lhs_val = MP_OBJ_SMALL_INT_VALUE(lhs_in);
|
lhs_val = MP_OBJ_SMALL_INT_VALUE(lhs_in);
|
||||||
@@ -167,13 +173,16 @@ mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_i
|
|||||||
switch (op) {
|
switch (op) {
|
||||||
case MP_BINARY_OP_ADD:
|
case MP_BINARY_OP_ADD:
|
||||||
case MP_BINARY_OP_INPLACE_ADD:
|
case MP_BINARY_OP_INPLACE_ADD:
|
||||||
return mp_obj_new_int_from_ll(lhs_val + rhs_val);
|
overflow = mp_add_ll_overflow(lhs_val, rhs_val, &result);
|
||||||
|
break;
|
||||||
case MP_BINARY_OP_SUBTRACT:
|
case MP_BINARY_OP_SUBTRACT:
|
||||||
case MP_BINARY_OP_INPLACE_SUBTRACT:
|
case MP_BINARY_OP_INPLACE_SUBTRACT:
|
||||||
return mp_obj_new_int_from_ll(lhs_val - rhs_val);
|
overflow = mp_sub_ll_overflow(lhs_val, rhs_val, &result);
|
||||||
|
break;
|
||||||
case MP_BINARY_OP_MULTIPLY:
|
case MP_BINARY_OP_MULTIPLY:
|
||||||
case MP_BINARY_OP_INPLACE_MULTIPLY:
|
case MP_BINARY_OP_INPLACE_MULTIPLY:
|
||||||
return mp_obj_new_int_from_ll(lhs_val * rhs_val);
|
overflow = mp_mul_ll_overflow(lhs_val, rhs_val, &result);
|
||||||
|
break;
|
||||||
case MP_BINARY_OP_FLOOR_DIVIDE:
|
case MP_BINARY_OP_FLOOR_DIVIDE:
|
||||||
case MP_BINARY_OP_INPLACE_FLOOR_DIVIDE:
|
case MP_BINARY_OP_INPLACE_FLOOR_DIVIDE:
|
||||||
if (rhs_val == 0) {
|
if (rhs_val == 0) {
|
||||||
@@ -199,9 +208,21 @@ mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_i
|
|||||||
|
|
||||||
case MP_BINARY_OP_LSHIFT:
|
case MP_BINARY_OP_LSHIFT:
|
||||||
case MP_BINARY_OP_INPLACE_LSHIFT:
|
case MP_BINARY_OP_INPLACE_LSHIFT:
|
||||||
return mp_obj_new_int_from_ll(lhs_val << (int)rhs_val);
|
if ((int)rhs_val < 0) {
|
||||||
|
// negative shift not allowed
|
||||||
|
mp_raise_ValueError(MP_ERROR_TEXT("negative shift count"));
|
||||||
|
}
|
||||||
|
result = lhs_val << (int)rhs_val;
|
||||||
|
// Left-shifting of negative values is implementation defined in C, but assume compiler
|
||||||
|
// will give us typical 2s complement behaviour unless the value overflows
|
||||||
|
overflow = rhs_val > 0 && ((lhs_val >= 0 && result < lhs_val) || (lhs_val < 0 && result > lhs_val));
|
||||||
|
break;
|
||||||
case MP_BINARY_OP_RSHIFT:
|
case MP_BINARY_OP_RSHIFT:
|
||||||
case MP_BINARY_OP_INPLACE_RSHIFT:
|
case MP_BINARY_OP_INPLACE_RSHIFT:
|
||||||
|
if ((int)rhs_val < 0) {
|
||||||
|
// negative shift not allowed
|
||||||
|
mp_raise_ValueError(MP_ERROR_TEXT("negative shift count"));
|
||||||
|
}
|
||||||
return mp_obj_new_int_from_ll(lhs_val >> (int)rhs_val);
|
return mp_obj_new_int_from_ll(lhs_val >> (int)rhs_val);
|
||||||
|
|
||||||
case MP_BINARY_OP_POWER:
|
case MP_BINARY_OP_POWER:
|
||||||
@@ -213,18 +234,18 @@ mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_i
|
|||||||
mp_raise_ValueError(MP_ERROR_TEXT("negative power with no float support"));
|
mp_raise_ValueError(MP_ERROR_TEXT("negative power with no float support"));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
long long ans = 1;
|
result = 1;
|
||||||
while (rhs_val > 0) {
|
while (rhs_val > 0 && !overflow) {
|
||||||
if (rhs_val & 1) {
|
if (rhs_val & 1) {
|
||||||
ans *= lhs_val;
|
overflow = mp_mul_ll_overflow(result, lhs_val, &result);
|
||||||
}
|
}
|
||||||
if (rhs_val == 1) {
|
if (rhs_val == 1 || overflow) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
rhs_val /= 2;
|
rhs_val /= 2;
|
||||||
lhs_val *= lhs_val;
|
overflow = mp_mul_ll_overflow(lhs_val, lhs_val, &lhs_val);
|
||||||
}
|
}
|
||||||
return mp_obj_new_int_from_ll(ans);
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case MP_BINARY_OP_LESS:
|
case MP_BINARY_OP_LESS:
|
||||||
@@ -242,6 +263,12 @@ mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_i
|
|||||||
return MP_OBJ_NULL; // op not supported
|
return MP_OBJ_NULL; // op not supported
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (overflow) {
|
||||||
|
raise_long_long_overflow();
|
||||||
|
}
|
||||||
|
|
||||||
|
return mp_obj_new_int_from_ll(result);
|
||||||
|
|
||||||
zero_division:
|
zero_division:
|
||||||
mp_raise_msg(&mp_type_ZeroDivisionError, MP_ERROR_TEXT("divide by zero"));
|
mp_raise_msg(&mp_type_ZeroDivisionError, MP_ERROR_TEXT("divide by zero"));
|
||||||
}
|
}
|
||||||
@@ -267,7 +294,7 @@ mp_obj_t mp_obj_new_int_from_ll(long long val) {
|
|||||||
mp_obj_t mp_obj_new_int_from_ull(unsigned long long val) {
|
mp_obj_t mp_obj_new_int_from_ull(unsigned long long val) {
|
||||||
// TODO raise an exception if the unsigned long long won't fit
|
// TODO raise an exception if the unsigned long long won't fit
|
||||||
if (val >> (sizeof(unsigned long long) * 8 - 1) != 0) {
|
if (val >> (sizeof(unsigned long long) * 8 - 1) != 0) {
|
||||||
mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("ulonglong too large"));
|
raise_long_long_overflow();
|
||||||
}
|
}
|
||||||
return mp_obj_new_int_from_ll(val);
|
return mp_obj_new_int_from_ll(val);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -117,10 +117,21 @@ x = -4611686018427387904 # big
|
|||||||
# sys.maxsize is a constant bigint, so test it's compatible with dynamic ones
|
# sys.maxsize is a constant bigint, so test it's compatible with dynamic ones
|
||||||
import sys
|
import sys
|
||||||
if hasattr(sys, "maxsize"):
|
if hasattr(sys, "maxsize"):
|
||||||
print(sys.maxsize + 1 - 1 == sys.maxsize)
|
print(sys.maxsize - 1 + 1 == sys.maxsize)
|
||||||
else:
|
else:
|
||||||
print(True) # No maxsize property in this config
|
print(True) # No maxsize property in this config
|
||||||
|
|
||||||
# test extraction of big int value via mp_obj_get_int_maybe
|
# test extraction of big int value via mp_obj_get_int_maybe
|
||||||
x = 1 << 62
|
x = 1 << 62
|
||||||
print('a' * (x + 4 - x))
|
print('a' * (x + 4 - x))
|
||||||
|
|
||||||
|
# negative shifts are invalid
|
||||||
|
try:
|
||||||
|
print((1 << 48) >> -4)
|
||||||
|
except ValueError as e:
|
||||||
|
print(e)
|
||||||
|
|
||||||
|
try:
|
||||||
|
print((1 << 48) << -6)
|
||||||
|
except ValueError as e:
|
||||||
|
print(e)
|
||||||
|
|||||||
@@ -12,5 +12,8 @@ for i in range(8):
|
|||||||
print(uctypes.addressof(uctypes.bytearray_at(1 << i, 8)))
|
print(uctypes.addressof(uctypes.bytearray_at(1 << i, 8)))
|
||||||
|
|
||||||
# Test address that is bigger than the greatest small-int but still within the address range.
|
# Test address that is bigger than the greatest small-int but still within the address range.
|
||||||
|
try:
|
||||||
large_addr = maxsize + 1
|
large_addr = maxsize + 1
|
||||||
print(uctypes.addressof(uctypes.bytearray_at(large_addr, 8)) == large_addr)
|
print(uctypes.addressof(uctypes.bytearray_at(large_addr, 8)) == large_addr)
|
||||||
|
except OverflowError:
|
||||||
|
print(True) # systems with 64-bit bigints will overflow on the above operation
|
||||||
|
|||||||
Reference in New Issue
Block a user