diff --git a/py/unicode.c b/py/unicode.c index 935dc9012..d69b6f56f 100644 --- a/py/unicode.c +++ b/py/unicode.c @@ -180,7 +180,7 @@ bool utf8_check(const byte *p, size_t len) { for (; p < end; p++) { byte c = *p; if (need) { - if (c >= 0x80) { + if (UTF8_IS_CONT(c)) { need--; } else { // mismatch diff --git a/tests/unicode/unicode.py b/tests/unicode/unicode.py index 3a35ce894..b3d4b09ee 100644 --- a/tests/unicode/unicode.py +++ b/tests/unicode/unicode.py @@ -47,3 +47,7 @@ try: str(bytearray(b'ab\xc0a'), 'utf8') except UnicodeError: print('UnicodeError') +try: + str(b'\xf0\xe0\xed\xe8', 'utf8') +except UnicodeError: + print('UnicodeError')