py/objstr: Consolidate methods for str/bytes/bytearray/array.
This commit adds the bytes methods to bytearray, matching CPython. The existing implementations of these methods for str/bytes are reused for bytearray with minor updates to match CPython return types. For details on the CPython behaviour see https://docs.python.org/3/library/stdtypes.html#bytes-and-bytearray-operations The work to merge locals tables for str/bytes/bytearray/array was done by @jimmo. Because of this merging of locals the change in code size for this commit is mostly negative: bare-arm: +0 +0.000% minimal x86: +29 +0.018% unix x64: -792 -0.128% standard[incl -448(data)] unix nanbox: -436 -0.078% nanbox[incl -448(data)] stm32: -40 -0.010% PYBV10 cc3200: -32 -0.017% esp8266: -28 -0.004% GENERIC esp32: -72 -0.005% GENERIC[incl -200(data)] mimxrt: -40 -0.011% TEENSY40 renesas-ra: -40 -0.006% RA6M2_EK nrf: -16 -0.009% pca10040 rp2: -64 -0.013% PICO samd: +148 +0.105% ADAFRUIT_ITSYBITSY_M4_EXPRESS
This commit is contained in:
committed by
Damien George
parent
82b3500724
commit
f7f56d4285
157
py/objstr.c
157
py/objstr.c
@@ -41,6 +41,26 @@ STATIC mp_obj_t str_modulo_format(mp_obj_t pattern, size_t n_args, const mp_obj_
|
||||
STATIC mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str, mp_obj_iter_buf_t *iter_buf);
|
||||
STATIC NORETURN void bad_implicit_conversion(mp_obj_t self_in);
|
||||
|
||||
STATIC void str_check_arg_type(const mp_obj_type_t *self_type, const mp_obj_t arg) {
|
||||
// String operations generally need the args type to match the object they're called on,
|
||||
// e.g. str.find(str), byte.startswith(byte)
|
||||
// with the exception that bytes may be used for bytearray and vice versa.
|
||||
const mp_obj_type_t *arg_type = mp_obj_get_type(arg);
|
||||
|
||||
#if MICROPY_PY_BUILTINS_BYTEARRAY
|
||||
if (arg_type == &mp_type_bytearray) {
|
||||
arg_type = &mp_type_bytes;
|
||||
}
|
||||
if (self_type == &mp_type_bytearray) {
|
||||
self_type = &mp_type_bytes;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (arg_type != self_type) {
|
||||
bad_implicit_conversion(arg);
|
||||
}
|
||||
}
|
||||
|
||||
/******************************************************************************/
|
||||
/* str */
|
||||
|
||||
@@ -452,6 +472,7 @@ STATIC mp_obj_t bytes_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) {
|
||||
STATIC mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) {
|
||||
mp_check_self(mp_obj_is_str_or_bytes(self_in));
|
||||
const mp_obj_type_t *self_type = mp_obj_get_type(self_in);
|
||||
const mp_obj_type_t *ret_type = self_type;
|
||||
|
||||
// get separation string
|
||||
GET_STR_DATA_LEN(self_in, sep_str, sep_len);
|
||||
@@ -469,8 +490,19 @@ STATIC mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) {
|
||||
|
||||
// count required length
|
||||
size_t required_len = 0;
|
||||
#if MICROPY_PY_BUILTINS_BYTEARRAY
|
||||
if (self_type == &mp_type_bytearray) {
|
||||
self_type = &mp_type_bytes;
|
||||
}
|
||||
#endif
|
||||
for (size_t i = 0; i < seq_len; i++) {
|
||||
if (mp_obj_get_type(seq_items[i]) != self_type) {
|
||||
const mp_obj_type_t *seq_type = mp_obj_get_type(seq_items[i]);
|
||||
#if MICROPY_PY_BUILTINS_BYTEARRAY
|
||||
if (seq_type == &mp_type_bytearray) {
|
||||
seq_type = &mp_type_bytes;
|
||||
}
|
||||
#endif
|
||||
if (seq_type != self_type) {
|
||||
mp_raise_TypeError(
|
||||
MP_ERROR_TEXT("join expects a list of str/bytes objects consistent with self object"));
|
||||
}
|
||||
@@ -496,7 +528,7 @@ STATIC mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) {
|
||||
}
|
||||
|
||||
// return joined string
|
||||
return mp_obj_new_str_from_vstr(self_type, &vstr);
|
||||
return mp_obj_new_str_from_vstr(ret_type, &vstr);
|
||||
}
|
||||
MP_DEFINE_CONST_FUN_OBJ_2(str_join_obj, str_join);
|
||||
|
||||
@@ -545,9 +577,7 @@ mp_obj_t mp_obj_str_split(size_t n_args, const mp_obj_t *args) {
|
||||
|
||||
} else {
|
||||
// sep given
|
||||
if (mp_obj_get_type(sep) != self_type) {
|
||||
bad_implicit_conversion(sep);
|
||||
}
|
||||
str_check_arg_type(self_type, sep);
|
||||
|
||||
size_t sep_len;
|
||||
const char *sep_str = mp_obj_str_get_data(sep, &sep_len);
|
||||
@@ -699,9 +729,7 @@ STATIC mp_obj_t str_finder(size_t n_args, const mp_obj_t *args, int direction, b
|
||||
mp_check_self(mp_obj_is_str_or_bytes(args[0]));
|
||||
|
||||
// check argument type
|
||||
if (mp_obj_get_type(args[1]) != self_type) {
|
||||
bad_implicit_conversion(args[1]);
|
||||
}
|
||||
str_check_arg_type(self_type, args[1]);
|
||||
|
||||
GET_STR_DATA_LEN(args[0], haystack, haystack_len);
|
||||
GET_STR_DATA_LEN(args[1], needle, needle_len);
|
||||
@@ -805,9 +833,7 @@ STATIC mp_obj_t str_uni_strip(int type, size_t n_args, const mp_obj_t *args) {
|
||||
chars_to_del = whitespace;
|
||||
chars_to_del_len = sizeof(whitespace) - 1;
|
||||
} else {
|
||||
if (mp_obj_get_type(args[1]) != self_type) {
|
||||
bad_implicit_conversion(args[1]);
|
||||
}
|
||||
str_check_arg_type(self_type, args[1]);
|
||||
GET_STR_DATA_LEN(args[1], s, l);
|
||||
chars_to_del = s;
|
||||
chars_to_del_len = l;
|
||||
@@ -1633,13 +1659,8 @@ STATIC mp_obj_t str_replace(size_t n_args, const mp_obj_t *args) {
|
||||
|
||||
const mp_obj_type_t *self_type = mp_obj_get_type(args[0]);
|
||||
|
||||
if (mp_obj_get_type(args[1]) != self_type) {
|
||||
bad_implicit_conversion(args[1]);
|
||||
}
|
||||
|
||||
if (mp_obj_get_type(args[2]) != self_type) {
|
||||
bad_implicit_conversion(args[2]);
|
||||
}
|
||||
str_check_arg_type(self_type, args[1]);
|
||||
str_check_arg_type(self_type, args[2]);
|
||||
|
||||
// extract string data
|
||||
|
||||
@@ -1726,9 +1747,7 @@ STATIC mp_obj_t str_count(size_t n_args, const mp_obj_t *args) {
|
||||
mp_check_self(mp_obj_is_str_or_bytes(args[0]));
|
||||
|
||||
// check argument type
|
||||
if (mp_obj_get_type(args[1]) != self_type) {
|
||||
bad_implicit_conversion(args[1]);
|
||||
}
|
||||
str_check_arg_type(self_type, args[1]);
|
||||
|
||||
GET_STR_DATA_LEN(args[0], haystack, haystack_len);
|
||||
GET_STR_DATA_LEN(args[1], needle, needle_len);
|
||||
@@ -1767,9 +1786,7 @@ MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_count_obj, 2, 4, str_count);
|
||||
STATIC mp_obj_t str_partitioner(mp_obj_t self_in, mp_obj_t arg, int direction) {
|
||||
mp_check_self(mp_obj_is_str_or_bytes(self_in));
|
||||
const mp_obj_type_t *self_type = mp_obj_get_type(self_in);
|
||||
if (self_type != mp_obj_get_type(arg)) {
|
||||
bad_implicit_conversion(arg);
|
||||
}
|
||||
str_check_arg_type(self_type, arg);
|
||||
|
||||
GET_STR_DATA_LEN(self_in, str, str_len);
|
||||
GET_STR_DATA_LEN(arg, sep, sep_len);
|
||||
@@ -1795,6 +1812,12 @@ STATIC mp_obj_t str_partitioner(mp_obj_t self_in, mp_obj_t arg, int direction) {
|
||||
result[2] = self_in;
|
||||
}
|
||||
|
||||
#if MICROPY_PY_BUILTINS_BYTEARRAY
|
||||
if (mp_obj_get_type(arg) != self_type) {
|
||||
arg = mp_obj_new_str_of_type(self_type, sep, sep_len);
|
||||
}
|
||||
#endif
|
||||
|
||||
const byte *position_ptr = find_subbytes(str, str_len, sep, sep_len, direction);
|
||||
if (position_ptr != NULL) {
|
||||
size_t position = position_ptr - str;
|
||||
@@ -1940,17 +1963,15 @@ mp_int_t mp_obj_str_get_buffer(mp_obj_t self_in, mp_buffer_info_t *bufinfo, mp_u
|
||||
}
|
||||
}
|
||||
|
||||
STATIC const mp_rom_map_elem_t str8_locals_dict_table[] = {
|
||||
// This locals table is used for the following types: str, bytes, bytearray, array.array.
|
||||
// Each type takes a different section (start to end offset) of this table.
|
||||
STATIC const mp_rom_map_elem_t array_bytearray_str_bytes_locals_table[] = {
|
||||
#if MICROPY_PY_ARRAY || MICROPY_PY_BUILTINS_BYTEARRAY
|
||||
{ MP_ROM_QSTR(MP_QSTR_append), MP_ROM_PTR(&mp_obj_array_append_obj) },
|
||||
{ MP_ROM_QSTR(MP_QSTR_extend), MP_ROM_PTR(&mp_obj_array_extend_obj) },
|
||||
#endif
|
||||
#if MICROPY_CPYTHON_COMPAT
|
||||
{ MP_ROM_QSTR(MP_QSTR_decode), MP_ROM_PTR(&bytes_decode_obj) },
|
||||
#if !MICROPY_PY_BUILTINS_STR_UNICODE
|
||||
// If we have separate unicode type, then here we have methods only
|
||||
// for bytes type, and it should not have encode() methods. Otherwise,
|
||||
// we have non-compliant-but-practical bytestring type, which shares
|
||||
// method table with bytes, so they both have encode() and decode()
|
||||
// methods (which should do type checking at runtime).
|
||||
{ MP_ROM_QSTR(MP_QSTR_encode), MP_ROM_PTR(&str_encode_obj) },
|
||||
#endif
|
||||
#endif
|
||||
{ MP_ROM_QSTR(MP_QSTR_find), MP_ROM_PTR(&str_find_obj) },
|
||||
{ MP_ROM_QSTR(MP_QSTR_rfind), MP_ROM_PTR(&str_rfind_obj) },
|
||||
@@ -1986,9 +2007,46 @@ STATIC const mp_rom_map_elem_t str8_locals_dict_table[] = {
|
||||
{ MP_ROM_QSTR(MP_QSTR_isdigit), MP_ROM_PTR(&str_isdigit_obj) },
|
||||
{ MP_ROM_QSTR(MP_QSTR_isupper), MP_ROM_PTR(&str_isupper_obj) },
|
||||
{ MP_ROM_QSTR(MP_QSTR_islower), MP_ROM_PTR(&str_islower_obj) },
|
||||
#if MICROPY_CPYTHON_COMPAT
|
||||
{ MP_ROM_QSTR(MP_QSTR_encode), MP_ROM_PTR(&str_encode_obj) },
|
||||
#endif
|
||||
};
|
||||
|
||||
STATIC MP_DEFINE_CONST_DICT(str8_locals_dict, str8_locals_dict_table);
|
||||
#if MICROPY_CPYTHON_COMPAT
|
||||
#define TABLE_ENTRIES_COMPAT 1
|
||||
#else
|
||||
#define TABLE_ENTRIES_COMPAT 0
|
||||
#endif
|
||||
|
||||
#if MICROPY_PY_ARRAY || MICROPY_PY_BUILTINS_BYTEARRAY
|
||||
#define TABLE_ENTRIES_ARRAY 2
|
||||
#else
|
||||
#define TABLE_ENTRIES_ARRAY 0
|
||||
#endif
|
||||
|
||||
MP_DEFINE_CONST_DICT_WITH_SIZE(mp_obj_str_locals_dict,
|
||||
array_bytearray_str_bytes_locals_table + TABLE_ENTRIES_ARRAY + TABLE_ENTRIES_COMPAT,
|
||||
MP_ARRAY_SIZE(array_bytearray_str_bytes_locals_table) - (TABLE_ENTRIES_ARRAY + TABLE_ENTRIES_COMPAT));
|
||||
|
||||
#if TABLE_ENTRIES_COMPAT == 0
|
||||
#define mp_obj_bytes_locals_dict mp_obj_str_locals_dict
|
||||
#else
|
||||
MP_DEFINE_CONST_DICT_WITH_SIZE(mp_obj_bytes_locals_dict,
|
||||
array_bytearray_str_bytes_locals_table + TABLE_ENTRIES_ARRAY,
|
||||
MP_ARRAY_SIZE(array_bytearray_str_bytes_locals_table) - (TABLE_ENTRIES_ARRAY + TABLE_ENTRIES_COMPAT));
|
||||
#endif
|
||||
|
||||
#if MICROPY_PY_BUILTINS_BYTEARRAY
|
||||
MP_DEFINE_CONST_DICT_WITH_SIZE(mp_obj_bytearray_locals_dict,
|
||||
array_bytearray_str_bytes_locals_table,
|
||||
MP_ARRAY_SIZE(array_bytearray_str_bytes_locals_table) - TABLE_ENTRIES_COMPAT);
|
||||
#endif
|
||||
|
||||
#if MICROPY_PY_ARRAY
|
||||
MP_DEFINE_CONST_DICT_WITH_SIZE(mp_obj_array_locals_dict,
|
||||
array_bytearray_str_bytes_locals_table,
|
||||
TABLE_ENTRIES_ARRAY);
|
||||
#endif
|
||||
|
||||
#if !MICROPY_PY_BUILTINS_STR_UNICODE
|
||||
STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str, mp_obj_iter_buf_t *iter_buf);
|
||||
@@ -2002,9 +2060,9 @@ const mp_obj_type_t mp_type_str = {
|
||||
.subscr = bytes_subscr,
|
||||
.getiter = mp_obj_new_str_iterator,
|
||||
.buffer_p = { .get_buffer = mp_obj_str_get_buffer },
|
||||
.locals_dict = (mp_obj_dict_t *)&str8_locals_dict,
|
||||
.locals_dict = (mp_obj_dict_t *)&mp_obj_str_locals_dict,
|
||||
};
|
||||
#endif
|
||||
#endif // !MICROPY_PY_BUILTINS_STR_UNICODE
|
||||
|
||||
// Reuses most of methods from str
|
||||
const mp_obj_type_t mp_type_bytes = {
|
||||
@@ -2016,7 +2074,7 @@ const mp_obj_type_t mp_type_bytes = {
|
||||
.subscr = bytes_subscr,
|
||||
.getiter = mp_obj_new_bytes_iterator,
|
||||
.buffer_p = { .get_buffer = mp_obj_str_get_buffer },
|
||||
.locals_dict = (mp_obj_dict_t *)&str8_locals_dict,
|
||||
.locals_dict = (mp_obj_dict_t *)&mp_obj_bytes_locals_dict,
|
||||
};
|
||||
|
||||
// The zero-length bytes object, with data that includes a null-terminating byte
|
||||
@@ -2044,6 +2102,10 @@ mp_obj_t mp_obj_new_str_copy(const mp_obj_type_t *type, const byte *data, size_t
|
||||
mp_obj_t mp_obj_new_str_of_type(const mp_obj_type_t *type, const byte *data, size_t len) {
|
||||
if (type == &mp_type_str) {
|
||||
return mp_obj_new_str((const char *)data, len);
|
||||
#if MICROPY_PY_BUILTINS_BYTEARRAY
|
||||
} else if (type == &mp_type_bytearray) {
|
||||
return mp_obj_new_bytearray(len, data);
|
||||
#endif
|
||||
} else {
|
||||
return mp_obj_new_bytes(data, len);
|
||||
}
|
||||
@@ -2068,18 +2130,24 @@ mp_obj_t mp_obj_new_str_from_vstr(const mp_obj_type_t *type, vstr_t *vstr) {
|
||||
}
|
||||
}
|
||||
|
||||
// make a new str/bytes object
|
||||
mp_obj_str_t *o = mp_obj_malloc(mp_obj_str_t, type);
|
||||
o->len = vstr->len;
|
||||
o->hash = qstr_compute_hash((byte *)vstr->buf, vstr->len);
|
||||
byte *data;
|
||||
if (vstr->len + 1 == vstr->alloc) {
|
||||
o->data = (byte *)vstr->buf;
|
||||
data = (byte *)vstr->buf;
|
||||
} else {
|
||||
o->data = (byte *)m_renew(char, vstr->buf, vstr->alloc, vstr->len + 1);
|
||||
data = (byte *)m_renew(char, vstr->buf, vstr->alloc, vstr->len + 1);
|
||||
}
|
||||
((byte *)o->data)[o->len] = '\0'; // add null byte
|
||||
data[vstr->len] = '\0'; // add null byte
|
||||
vstr->buf = NULL;
|
||||
vstr->alloc = 0;
|
||||
#if MICROPY_PY_BUILTINS_BYTEARRAY
|
||||
if (type == &mp_type_bytearray) {
|
||||
return mp_obj_new_bytearray_by_ref(vstr->len, data);
|
||||
}
|
||||
#endif
|
||||
mp_obj_str_t *o = mp_obj_malloc(mp_obj_str_t, type);
|
||||
o->len = vstr->len;
|
||||
o->hash = qstr_compute_hash(data, vstr->len);
|
||||
o->data = data;
|
||||
return MP_OBJ_FROM_PTR(o);
|
||||
}
|
||||
|
||||
@@ -2179,6 +2247,7 @@ const byte *mp_obj_str_get_data_no_check(mp_obj_t self_in, size_t *len) {
|
||||
if (mp_obj_is_qstr(self_in)) {
|
||||
return qstr_data(MP_OBJ_QSTR_VALUE(self_in), len);
|
||||
} else {
|
||||
MP_STATIC_ASSERT_STR_ARRAY_COMPATIBLE;
|
||||
*len = ((mp_obj_str_t *)MP_OBJ_TO_PTR(self_in))->len;
|
||||
return ((mp_obj_str_t *)MP_OBJ_TO_PTR(self_in))->data;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user