harec

[hare] Hare compiler, written in C11 for POSIX OSs
Log | Files | Refs | README | LICENSE

commit 223e8235a93e86b6047a4a3d1f1bf253b2f534f8
parent f674e4cac551546cfee5dc1521f6f013e6b44522
Author: Jose Lombera <jose@lombera.dev>
Date:   Sun, 11 Sep 2022 20:03:04 -0500

utf8: utf8_decode: validate multi-byte codepoints

Validate byte mask in multi-byte codepoints.

Signed-off-by: Jose Lombera <jose@lombera.dev>

Diffstat:
Msrc/utf8.c | 10++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/utf8.c b/src/utf8.c @@ -59,9 +59,15 @@ utf8_decode(const char **char_str) cp = **s & mask; ++*s; while (--size) { - cp <<= 6; - cp |= **s & 0x3f; + uint8_t c = **s; + ++*s; + + if ((c >> 6) != 0x02) + return UTF8_INVALID; + + cp <<= 6; + cp |= c & 0x3f; } return cp; }