Skip to content

Commit

Permalink
Unicode: remove lookup from valid_utf8()
Browse files Browse the repository at this point in the history
  • Loading branch information
AlekseyCherepanov committed Sep 25, 2024
1 parent d243681 commit cd3d694
Showing 1 changed file with 17 additions and 18 deletions.
35 changes: 17 additions & 18 deletions src/unicode.c
Original file line number Diff line number Diff line change
Expand Up @@ -534,7 +534,7 @@ inline size_t strlen_any(const void *source)
int valid_utf8(const UTF8 *source)
{
UTF8 a;
int length, ret = 1;
int ret = 1;
const UTF8 *srcptr;

while (*source) {
Expand All @@ -546,35 +546,34 @@ int valid_utf8(const UTF8 *source)
if (*source < 0xC2)
return 0;

length = opt_trailingBytesUTF8[*source & 0x3f] + 1;
srcptr = source;

switch (length) {
default:
return 0;
/* Everything else falls through when valid */
/* But no fall-through in inner switch statements. */
case 4:
if ((a = (*++srcptr)) < 0x80 || a > 0xBF) return 0;
if (*source >= 0xE0) { /* 3+ bytes */
if (*source >= 0xF0) { /* 4+ bytes */

if (*source > 0xF4) return 0;
if ((a = (*++srcptr)) < 0x80 || a > 0xBF) return 0;

if (*source > 0xF4) return 0;

switch (*source) {
case 0xF0: if (a < 0x90) return 0; break;
case 0xF4: if (a > 0x8F) return 0;
}

} /* end of specific handling for 4+ bytes */

switch (*source) {
case 0xF0: if (a < 0x90) return 0; break;
case 0xF4: if (a > 0x8F) return 0;
}
case 3:
if ((a = (*++srcptr)) < 0x80 || a > 0xBF) return 0;

switch (*source) {
case 0xE0: if (a < 0xA0) return 0; break;
case 0xED: if (a > 0x9F) return 0;
}
case 2:
if ((a = (*++srcptr)) < 0x80 || a > 0xBF) return 0;
}
/* 2 bytes or "fall-through" with handled beginning of 3-4 bytes */

if ((a = (*++srcptr)) < 0x80 || a > 0xBF) return 0;

source += length;
source = srcptr + 1;
ret++;
}
return ret;
Expand Down

0 comments on commit cd3d694

Please sign in to comment.