diff --git a/src/unicode.c b/src/unicode.c index 3efbff6ed5..592d567fe9 100644 --- a/src/unicode.c +++ b/src/unicode.c @@ -543,33 +543,36 @@ int valid_utf8(const UTF8 *source) continue; } + if (*source < 0xC2) + return 0; + length = opt_trailingBytesUTF8[*source & 0x3f] + 1; - srcptr = source + length; + srcptr = source; switch (length) { default: return 0; /* Everything else falls through when valid */ + /* But no fall-through in inner switch statements. */ case 4: - if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; - case 3: - if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; - case 2: - if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; + if ((a = (*++srcptr)) < 0x80 || a > 0xBF) return 0; + + if (*source > 0xF4) return 0; switch (*source) { - /* no fall-through in this inner switch */ - case 0xE0: if (a < 0xA0) return 0; break; - case 0xED: if (a > 0x9F) return 0; break; case 0xF0: if (a < 0x90) return 0; break; case 0xF4: if (a > 0x8F) return 0; } + case 3: + if ((a = (*++srcptr)) < 0x80 || a > 0xBF) return 0; - case 1: - if (*source >= 0x80 && *source < 0xC2) return 0; + switch (*source) { + case 0xE0: if (a < 0xA0) return 0; break; + case 0xED: if (a > 0x9F) return 0; + } + case 2: + if ((a = (*++srcptr)) < 0x80 || a > 0xBF) return 0; } - if (*source > 0xF4) - return 0; source += length; ret++;