From 7c85c7c210e3ad417f59038de95b71618783d76c Mon Sep 17 00:00:00 2001 From: Damien George Date: Mon, 26 Nov 2018 16:13:08 +1100 Subject: [PATCH] py/unicode: Fix check for valid utf8 being stricter about contn chars. --- py/unicode.c | 2 +- tests/unicode/unicode.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/py/unicode.c b/py/unicode.c index 935dc9012..d69b6f56f 100644 --- a/py/unicode.c +++ b/py/unicode.c @@ -180,7 +180,7 @@ bool utf8_check(const byte *p, size_t len) { for (; p < end; p++) { byte c = *p; if (need) { - if (c >= 0x80) { + if (UTF8_IS_CONT(c)) { need--; } else { // mismatch diff --git a/tests/unicode/unicode.py b/tests/unicode/unicode.py index 3a35ce894..b3d4b09ee 100644 --- a/tests/unicode/unicode.py +++ b/tests/unicode/unicode.py @@ -47,3 +47,7 @@ try: str(bytearray(b'ab\xc0a'), 'utf8') except UnicodeError: print('UnicodeError') +try: + str(b'\xf0\xe0\xed\xe8', 'utf8') +except UnicodeError: + print('UnicodeError')