fune/xpcom/io/nsIUnicharInputStream.idl
Nika Layzell aef919f890 Bug 1170668 - Improve short read handling in nsConverterInputStream, r=hsivonen
This patch changes how nsConverterInputStream handles passing data
through to the underlying unicode converter in order to make it more
reliably handle propagating errors and deal with short reads from the
underlying input stream.

This was done by making the code continuously read within the Fill
method until at least one character has been decoded from the input
stream, so that we don't spuriously communicate an EOF to the caller due
to a short read not producing enough bytes for the decoder to produce a
UTF-16 character.

In addition, while making this change it became easier to signal to
the decoder about the final read from the input stream, meaning that
partial characters at the end of the stream will now generate a
replacement character, rather than being ignored.

Differential Revision: https://phabricator.services.mozilla.com/D152682
2022-08-11 17:37:20 +00:00

97 lines
3.8 KiB
Text

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nsISupports.idl"
interface nsIUnicharInputStream;
interface nsIInputStream;
%{C++
/**
* The signature of the writer function passed to ReadSegments. This
* is the "consumer" of data that gets read from the stream's buffer.
*
* @param aInStream stream being read
* @param aClosure opaque parameter passed to ReadSegments
* @param aFromSegment pointer to memory owned by the input stream
* @param aToOffset number of UTF-16 code units already read
* (since ReadSegments was called)
* @param aCount length of fromSegment
* @param aWriteCount number of UTF-16 code units read
*
* Implementers should return the following:
*
* @throws <any-error> if not interested in consuming any data
*
* Errors are never passed to the caller of ReadSegments.
*
* NOTE: returning NS_OK and (*aWriteCount = 0) has undefined behavior.
*/
typedef nsresult (*nsWriteUnicharSegmentFun)(nsIUnicharInputStream *aInStream,
void *aClosure,
const char16_t *aFromSegment,
uint32_t aToOffset,
uint32_t aCount,
uint32_t *aWriteCount);
%}
native nsWriteUnicharSegmentFun(nsWriteUnicharSegmentFun);
/**
* Abstract UTF-16 input stream
* @see nsIInputStream
*/
[scriptable, uuid(d5e3bd80-6723-4b92-b0c9-22f6162fd94f)]
interface nsIUnicharInputStream : nsISupports {
/**
* Reads into a caller-provided array.
*
* @return The number of utf-16 code units that were successfully read.
* May be less than aCount, even if there is more data in the input
* stream. A return value of 0 means EOF.
*
* @note To read more than 2^32 code units, call this method multiple times.
*/
[noscript] unsigned long read([array, size_is(aCount)] in char16_t aBuf,
in unsigned long aCount);
/**
* Low-level read method that has access to the stream's underlying buffer.
* The writer function may be called multiple times for segmented buffers.
* ReadSegments is expected to keep calling the writer until either there is
* nothing left to read or the writer returns an error. ReadSegments should
* not call the writer with zero UTF-16 code units to consume.
*
* @param aWriter the "consumer" of the data to be read
* @param aClosure opaque parameter passed to writer
* @param aCount the maximum number of UTF-16 code units to be read
*
* @return number of UTF-16 code units read (may be less than aCount)
* @return 0 if reached end of file (or if aWriter refused to consume data)
*
* @throws NS_BASE_STREAM_WOULD_BLOCK if reading from the input stream would
* block the calling thread (non-blocking mode only)
* @throws <other-error> on failure
*
* NOTE: this function may be unimplemented if a stream has no underlying
* buffer
*/
[noscript] unsigned long readSegments(in nsWriteUnicharSegmentFun aWriter,
in voidPtr aClosure,
in unsigned long aCount);
/**
* Read into a string object.
*
* @param aCount The number of UTF-16 code units that should be read
* @return The number of UTF-16 code units that were read.
*/
unsigned long readString(in unsigned long aCount, out AString aString);
/**
* Close the stream and free associated resources. This also closes the
* underlying stream, if any.
*/
void close();
};