fune/testing/web-platform/tests/encoding/textdecoder-eof.any.js
Andreu Botella ebc35c78ef Bug 1739798 [wpt PR 31537] - Fix TextCodecUTF8's error handling in EOF and across buffer boundaries, a=testonly
Automatic update from web-platform-tests
Fix TextCodecUTF8's error handling in EOF and across buffer boundaries

When TextCodecUTF8 found a truncated sequence at EOF, it used to emit
one replacement character per byte in the sequence, even when it was a
prefix of a valid sequence. Additionally, in streaming mode, if it found
a lead byte for which a valid sequence would span longer than the
current available bytes, any processing of that sequence was deferred
until all such bytes were available, even if errors could be detected
earlier. Both issues are solved by always checking the validity of
partial sequences.

The approach used in this patch uses `DecodeNonASCIISequence` to find
the length of the maximal subpart of a partial sequence, and if the
length is equal to the partial sequence size and we're not at EOF, we
don't emit the error. However, this does not work when a byte in the
0x80 to 0xC1 range is found in a lead position, since
`NonASCIISequenceLength` wrongly returns 2 and `DecodeNonASCIISequence`
isn't enough to determine whether the partial sequence is invalid. This
is fixed by having `NonASCIISequenceLength` to return 0 in those cases.

Another issue with this approach is that, since the outer do-while loops
in the `Decode` method take `do_flush && partial_sequence_size` as a
condition, if a non-ASCII lead byte is found whose valid sequences would
span longer than the bytes we have, those bytes would not be processed
until the next call to `Decode` if `do_flush` is false. But as it turns
out, the `do_flush` condition is not in fact needed, and removing it
fixes this issue.

Fixed: 796697
Fixed: 978522
Change-Id: Ic5a78e4eca356fdc2ad4038eba9ffe455fddf3ee
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3263938
Reviewed-by: Jeremy Roman <jbroman@chromium.org>
Reviewed-by: Joshua Bell <jsbell@chromium.org>
Reviewed-by: Kentaro Hara <haraken@chromium.org>
Commit-Queue: Jeremy Roman <jbroman@chromium.org>
Cr-Commit-Position: refs/heads/main@{#944572}

--

wpt-commits: 8e786e0fc0da2378b76a26e3dbfd96dccd8fd636
wpt-pr: 31537
2021-11-29 19:40:21 +00:00

40 lines
2 KiB
JavaScript

test(() => {
// Truncated sequences
assert_equals(new TextDecoder().decode(new Uint8Array([0xF0])), "\uFFFD");
assert_equals(new TextDecoder().decode(new Uint8Array([0xF0, 0x9F])), "\uFFFD");
assert_equals(new TextDecoder().decode(new Uint8Array([0xF0, 0x9F, 0x92])), "\uFFFD");
// Errors near end-of-queue
assert_equals(new TextDecoder().decode(new Uint8Array([0xF0, 0x9F, 0x41])), "\uFFFDA");
assert_equals(new TextDecoder().decode(new Uint8Array([0xF0, 0x41, 0x42])), "\uFFFDAB");
assert_equals(new TextDecoder().decode(new Uint8Array([0xF0, 0x41, 0xF0])), "\uFFFDA\uFFFD");
assert_equals(new TextDecoder().decode(new Uint8Array([0xF0, 0x8F, 0x92])), "\uFFFD\uFFFD\uFFFD");
}, "TextDecoder end-of-queue handling");
test(() => {
const decoder = new TextDecoder();
decoder.decode(new Uint8Array([0xF0]), { stream: true });
assert_equals(decoder.decode(), "\uFFFD");
decoder.decode(new Uint8Array([0xF0]), { stream: true });
decoder.decode(new Uint8Array([0x9F]), { stream: true });
assert_equals(decoder.decode(), "\uFFFD");
decoder.decode(new Uint8Array([0xF0, 0x9F]), { stream: true });
assert_equals(decoder.decode(new Uint8Array([0x92])), "\uFFFD");
assert_equals(decoder.decode(new Uint8Array([0xF0, 0x9F]), { stream: true }), "");
assert_equals(decoder.decode(new Uint8Array([0x41]), { stream: true }), "\uFFFDA");
assert_equals(decoder.decode(), "");
assert_equals(decoder.decode(new Uint8Array([0xF0, 0x41, 0x42]), { stream: true }), "\uFFFDAB");
assert_equals(decoder.decode(), "");
assert_equals(decoder.decode(new Uint8Array([0xF0, 0x41, 0xF0]), { stream: true }), "\uFFFDA");
assert_equals(decoder.decode(), "\uFFFD");
assert_equals(decoder.decode(new Uint8Array([0xF0]), { stream: true }), "");
assert_equals(decoder.decode(new Uint8Array([0x8F]), { stream: true }), "\uFFFD\uFFFD");
assert_equals(decoder.decode(new Uint8Array([0x92]), { stream: true }), "\uFFFD");
assert_equals(decoder.decode(), "");
}, "TextDecoder end-of-queue handling using stream: true");