Skip to content

Commit 7665cee

Browse files
AdamKorczStanFromIreland
authored andcommitted
[3.13] gh-144872: fix heap buffer overflow _PyTokenizer_ensure_utf8 (GH-144807)
(cherry picked from commit 3fc945d) Co-authored-by: AdamKorcz <44787359+AdamKorcz@users.noreply.github.com>
1 parent d567f45 commit 7665cee

File tree

3 files changed

+22
-2
lines changed

3 files changed

+22
-2
lines changed

Lib/test/test_source_encoding.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,23 @@ def test_issue7820(self):
6464
# two bytes in common with the UTF-8 BOM
6565
self.assertRaises(SyntaxError, eval, b'\xef\xbb\x20')
6666

67+
def test_truncated_utf8_at_eof(self):
68+
# Regression test for https://issues.oss-fuzz.com/issues/451112368
69+
# Truncated multi-byte UTF-8 sequences at end of input caused an
70+
# out-of-bounds read in Parser/tokenizer/helpers.c:valid_utf8().
71+
truncated = [
72+
b'\xc2', # 2-byte lead, missing 1 continuation
73+
b'\xdf', # 2-byte lead, missing 1 continuation
74+
b'\xe0', # 3-byte lead, missing 2 continuations
75+
b'\xe0\xa0', # 3-byte lead, missing 1 continuation
76+
b'\xf0\x90', # 4-byte lead, missing 2 continuations
77+
b'\xf0\x90\x80', # 4-byte lead, missing 1 continuation
78+
b'\xf3', # 4-byte lead, missing 3 (the oss-fuzz reproducer)
79+
]
80+
for seq in truncated:
81+
with self.subTest(seq=seq):
82+
self.assertRaises(SyntaxError, compile, seq, '<test>', 'exec')
83+
6784
@requires_subprocess()
6885
def test_20731(self):
6986
sub = subprocess.Popen([sys.executable,
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix heap buffer overflow in the parser found by OSS-Fuzz.

Parser/tokenizer/helpers.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -481,9 +481,11 @@ valid_utf8(const unsigned char* s)
481481
return 0;
482482
}
483483
length = expected + 1;
484-
for (; expected; expected--)
485-
if (s[expected] < 0x80 || s[expected] >= 0xC0)
484+
for (int i = 1; i <= expected; i++) {
485+
if (s[i] < 0x80 || s[i] >= 0xC0) {
486486
return 0;
487+
}
488+
}
487489
return length;
488490
}
489491

0 commit comments

Comments
 (0)