py/unicode: Clean up utf8 funcs and provide non-utf8 inline versions.
This patch provides inline versions of the utf8 helper functions for the case when unicode is disabled (MICROPY_PY_BUILTINS_STR_UNICODE set to 0). This saves code size. The unichar_charlen function is also renamed to utf8_charlen to match the other utf8 helper functions, and the signature of this function is adjusted for consistency (const char* -> const byte*, mp_uint_t -> size_t).
This commit is contained in:
29
py/unicode.c
29
py/unicode.c
@@ -67,9 +67,9 @@ STATIC const uint8_t attr[] = {
|
||||
AT_LO, AT_LO, AT_LO, AT_PR, AT_PR, AT_PR, AT_PR, 0
|
||||
};
|
||||
|
||||
// TODO: Rename to str_get_char
|
||||
unichar utf8_get_char(const byte *s) {
|
||||
#if MICROPY_PY_BUILTINS_STR_UNICODE
|
||||
|
||||
unichar utf8_get_char(const byte *s) {
|
||||
unichar ord = *s++;
|
||||
if (!UTF8_IS_NONASCII(ord)) return ord;
|
||||
ord &= 0x7F;
|
||||
@@ -80,22 +80,14 @@ unichar utf8_get_char(const byte *s) {
|
||||
ord = (ord << 6) | (*s++ & 0x3F);
|
||||
}
|
||||
return ord;
|
||||
#else
|
||||
return *s;
|
||||
#endif
|
||||
}
|
||||
|
||||
// TODO: Rename to str_next_char
|
||||
const byte *utf8_next_char(const byte *s) {
|
||||
#if MICROPY_PY_BUILTINS_STR_UNICODE
|
||||
++s;
|
||||
while (UTF8_IS_CONT(*s)) {
|
||||
++s;
|
||||
}
|
||||
return s;
|
||||
#else
|
||||
return s + 1;
|
||||
#endif
|
||||
}
|
||||
|
||||
mp_uint_t utf8_ptr_to_index(const byte *s, const byte *ptr) {
|
||||
@@ -109,21 +101,18 @@ mp_uint_t utf8_ptr_to_index(const byte *s, const byte *ptr) {
|
||||
return i;
|
||||
}
|
||||
|
||||
// TODO: Rename to str_charlen
|
||||
mp_uint_t unichar_charlen(const char *str, mp_uint_t len) {
|
||||
#if MICROPY_PY_BUILTINS_STR_UNICODE
|
||||
mp_uint_t charlen = 0;
|
||||
for (const char *top = str + len; str < top; ++str) {
|
||||
size_t utf8_charlen(const byte *str, size_t len) {
|
||||
size_t charlen = 0;
|
||||
for (const byte *top = str + len; str < top; ++str) {
|
||||
if (!UTF8_IS_CONT(*str)) {
|
||||
++charlen;
|
||||
}
|
||||
}
|
||||
return charlen;
|
||||
#else
|
||||
return len;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// Be aware: These unichar_is* functions are actually ASCII-only!
|
||||
bool unichar_isspace(unichar c) {
|
||||
return c < 128 && (attr[c] & FL_SPACE) != 0;
|
||||
@@ -183,6 +172,8 @@ mp_uint_t unichar_xdigit_value(unichar c) {
|
||||
return n;
|
||||
}
|
||||
|
||||
#if MICROPY_PY_BUILTINS_STR_UNICODE
|
||||
|
||||
bool utf8_check(const byte *p, size_t len) {
|
||||
uint8_t need = 0;
|
||||
const byte *end = p + len;
|
||||
@@ -210,3 +201,5 @@ bool utf8_check(const byte *p, size_t len) {
|
||||
}
|
||||
return need == 0; // no pending fragments allowed
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user