forked from mirrors/gecko-dev
		
	
		
			
				
	
	
		
			2045 lines
		
	
	
	
		
			74 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			2045 lines
		
	
	
	
		
			74 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
// © 2016 and later: Unicode, Inc. and others.
 | 
						|
// License & terms of use: http://www.unicode.org/copyright.html
 | 
						|
/*
 | 
						|
******************************************************************************
 | 
						|
*
 | 
						|
*   Copyright (C) 2000-2016, International Business Machines
 | 
						|
*   Corporation and others.  All Rights Reserved.
 | 
						|
*
 | 
						|
******************************************************************************
 | 
						|
*   file name:  ucnvscsu.c
 | 
						|
*   encoding:   UTF-8
 | 
						|
*   tab size:   8 (not used)
 | 
						|
*   indentation:4
 | 
						|
*
 | 
						|
*   created on: 2000nov18
 | 
						|
*   created by: Markus W. Scherer
 | 
						|
*
 | 
						|
*   This is an implementation of the Standard Compression Scheme for Unicode
 | 
						|
*   as defined in http://www.unicode.org/unicode/reports/tr6/ .
 | 
						|
*   Reserved commands and window settings are treated as illegal sequences and
 | 
						|
*   will result in callback calls.
 | 
						|
*/
 | 
						|
 | 
						|
#include "unicode/utypes.h"
 | 
						|
 | 
						|
#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
 | 
						|
 | 
						|
#include "unicode/ucnv.h"
 | 
						|
#include "unicode/ucnv_cb.h"
 | 
						|
#include "unicode/utf16.h"
 | 
						|
#include "ucnv_bld.h"
 | 
						|
#include "ucnv_cnv.h"
 | 
						|
#include "cmemory.h"
 | 
						|
 | 
						|
/* SCSU definitions --------------------------------------------------------- */
 | 
						|
 | 
						|
/* SCSU command byte values */
 | 
						|
enum {
 | 
						|
    SQ0=0x01, /* Quote from window pair 0 */
 | 
						|
    SQ7=0x08, /* Quote from window pair 7 */
 | 
						|
    SDX=0x0B, /* Define a window as extended */
 | 
						|
    Srs=0x0C, /* reserved */
 | 
						|
    SQU=0x0E, /* Quote a single Unicode character */
 | 
						|
    SCU=0x0F, /* Change to Unicode mode */
 | 
						|
    SC0=0x10, /* Select window 0 */
 | 
						|
    SC7=0x17, /* Select window 7 */
 | 
						|
    SD0=0x18, /* Define and select window 0 */
 | 
						|
    SD7=0x1F, /* Define and select window 7 */
 | 
						|
 | 
						|
    UC0=0xE0, /* Select window 0 */
 | 
						|
    UC7=0xE7, /* Select window 7 */
 | 
						|
    UD0=0xE8, /* Define and select window 0 */
 | 
						|
    UD7=0xEF, /* Define and select window 7 */
 | 
						|
    UQU=0xF0, /* Quote a single Unicode character */
 | 
						|
    UDX=0xF1, /* Define a Window as extended */
 | 
						|
    Urs=0xF2  /* reserved */
 | 
						|
};
 | 
						|
 | 
						|
enum {
 | 
						|
    /*
 | 
						|
     * Unicode code points from 3400 to E000 are not adressible by
 | 
						|
     * dynamic window, since in these areas no short run alphabets are
 | 
						|
     * found. Therefore add gapOffset to all values from gapThreshold.
 | 
						|
     */
 | 
						|
    gapThreshold=0x68,
 | 
						|
    gapOffset=0xAC00,
 | 
						|
 | 
						|
    /* values between reservedStart and fixedThreshold are reserved */
 | 
						|
    reservedStart=0xA8,
 | 
						|
 | 
						|
    /* use table of predefined fixed offsets for values from fixedThreshold */
 | 
						|
    fixedThreshold=0xF9
 | 
						|
};
 | 
						|
 | 
						|
/* constant offsets for the 8 static windows */
 | 
						|
static const uint32_t staticOffsets[8]={
 | 
						|
    0x0000, /* ASCII for quoted tags */
 | 
						|
    0x0080, /* Latin - 1 Supplement (for access to punctuation) */
 | 
						|
    0x0100, /* Latin Extended-A */
 | 
						|
    0x0300, /* Combining Diacritical Marks */
 | 
						|
    0x2000, /* General Punctuation */
 | 
						|
    0x2080, /* Currency Symbols */
 | 
						|
    0x2100, /* Letterlike Symbols and Number Forms */
 | 
						|
    0x3000  /* CJK Symbols and punctuation */
 | 
						|
};
 | 
						|
 | 
						|
/* initial offsets for the 8 dynamic (sliding) windows */
 | 
						|
static const uint32_t initialDynamicOffsets[8]={
 | 
						|
    0x0080, /* Latin-1 */
 | 
						|
    0x00C0, /* Latin Extended A */
 | 
						|
    0x0400, /* Cyrillic */
 | 
						|
    0x0600, /* Arabic */
 | 
						|
    0x0900, /* Devanagari */
 | 
						|
    0x3040, /* Hiragana */
 | 
						|
    0x30A0, /* Katakana */
 | 
						|
    0xFF00  /* Fullwidth ASCII */
 | 
						|
};
 | 
						|
 | 
						|
/* Table of fixed predefined Offsets */
 | 
						|
static const uint32_t fixedOffsets[]={
 | 
						|
    /* 0xF9 */ 0x00C0, /* Latin-1 Letters + half of Latin Extended A */
 | 
						|
    /* 0xFA */ 0x0250, /* IPA extensions */
 | 
						|
    /* 0xFB */ 0x0370, /* Greek */
 | 
						|
    /* 0xFC */ 0x0530, /* Armenian */
 | 
						|
    /* 0xFD */ 0x3040, /* Hiragana */
 | 
						|
    /* 0xFE */ 0x30A0, /* Katakana */
 | 
						|
    /* 0xFF */ 0xFF60  /* Halfwidth Katakana */
 | 
						|
};
 | 
						|
 | 
						|
/* state values */
 | 
						|
enum {
 | 
						|
    readCommand,
 | 
						|
    quotePairOne,
 | 
						|
    quotePairTwo,
 | 
						|
    quoteOne,
 | 
						|
    definePairOne,
 | 
						|
    definePairTwo,
 | 
						|
    defineOne
 | 
						|
};
 | 
						|
 | 
						|
typedef struct SCSUData {
 | 
						|
    /* dynamic window offsets, intitialize to default values from initialDynamicOffsets */
 | 
						|
    uint32_t toUDynamicOffsets[8];
 | 
						|
    uint32_t fromUDynamicOffsets[8];
 | 
						|
 | 
						|
    /* state machine state - toUnicode */
 | 
						|
    UBool toUIsSingleByteMode;
 | 
						|
    uint8_t toUState;
 | 
						|
    int8_t toUQuoteWindow, toUDynamicWindow;
 | 
						|
    uint8_t toUByteOne;
 | 
						|
    uint8_t toUPadding[3];
 | 
						|
 | 
						|
    /* state machine state - fromUnicode */
 | 
						|
    UBool fromUIsSingleByteMode;
 | 
						|
    int8_t fromUDynamicWindow;
 | 
						|
 | 
						|
    /*
 | 
						|
     * windowUse[] keeps track of the use of the dynamic windows:
 | 
						|
     * At nextWindowUseIndex there is the least recently used window,
 | 
						|
     * and the following windows (in a wrapping manner) are more and more
 | 
						|
     * recently used.
 | 
						|
     * At nextWindowUseIndex-1 there is the most recently used window.
 | 
						|
     */
 | 
						|
    uint8_t locale;
 | 
						|
    int8_t nextWindowUseIndex;
 | 
						|
    int8_t windowUse[8];
 | 
						|
} SCSUData;
 | 
						|
 | 
						|
static const int8_t initialWindowUse[8]={ 7, 0, 3, 2, 4, 5, 6, 1 };
 | 
						|
static const int8_t initialWindowUse_ja[8]={ 3, 2, 4, 1, 0, 7, 5, 6 };
 | 
						|
 | 
						|
enum {
 | 
						|
    lGeneric, l_ja
 | 
						|
};
 | 
						|
 | 
						|
/* SCSU setup functions ----------------------------------------------------- */
 | 
						|
U_CDECL_BEGIN
 | 
						|
static void U_CALLCONV
 | 
						|
_SCSUReset(UConverter *cnv, UConverterResetChoice choice) {
 | 
						|
    SCSUData *scsu=(SCSUData *)cnv->extraInfo;
 | 
						|
 | 
						|
    if(choice<=UCNV_RESET_TO_UNICODE) {
 | 
						|
        /* reset toUnicode */
 | 
						|
        uprv_memcpy(scsu->toUDynamicOffsets, initialDynamicOffsets, 32);
 | 
						|
 | 
						|
        scsu->toUIsSingleByteMode=TRUE;
 | 
						|
        scsu->toUState=readCommand;
 | 
						|
        scsu->toUQuoteWindow=scsu->toUDynamicWindow=0;
 | 
						|
        scsu->toUByteOne=0;
 | 
						|
 | 
						|
        cnv->toULength=0;
 | 
						|
    }
 | 
						|
    if(choice!=UCNV_RESET_TO_UNICODE) {
 | 
						|
        /* reset fromUnicode */
 | 
						|
        uprv_memcpy(scsu->fromUDynamicOffsets, initialDynamicOffsets, 32);
 | 
						|
 | 
						|
        scsu->fromUIsSingleByteMode=TRUE;
 | 
						|
        scsu->fromUDynamicWindow=0;
 | 
						|
 | 
						|
        scsu->nextWindowUseIndex=0;
 | 
						|
        switch(scsu->locale) {
 | 
						|
        case l_ja:
 | 
						|
            uprv_memcpy(scsu->windowUse, initialWindowUse_ja, 8);
 | 
						|
            break;
 | 
						|
        default:
 | 
						|
            uprv_memcpy(scsu->windowUse, initialWindowUse, 8);
 | 
						|
            break;
 | 
						|
        }
 | 
						|
 | 
						|
        cnv->fromUChar32=0;
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
static void U_CALLCONV
 | 
						|
_SCSUOpen(UConverter *cnv,
 | 
						|
          UConverterLoadArgs *pArgs,
 | 
						|
          UErrorCode *pErrorCode) {
 | 
						|
    const char *locale=pArgs->locale;
 | 
						|
    if(pArgs->onlyTestIsLoadable) {
 | 
						|
        return;
 | 
						|
    }
 | 
						|
    cnv->extraInfo=uprv_malloc(sizeof(SCSUData));
 | 
						|
    if(cnv->extraInfo!=NULL) {
 | 
						|
        if(locale!=NULL && locale[0]=='j' && locale[1]=='a' && (locale[2]==0 || locale[2]=='_')) {
 | 
						|
            ((SCSUData *)cnv->extraInfo)->locale=l_ja;
 | 
						|
        } else {
 | 
						|
            ((SCSUData *)cnv->extraInfo)->locale=lGeneric;
 | 
						|
        }
 | 
						|
        _SCSUReset(cnv, UCNV_RESET_BOTH);
 | 
						|
    } else {
 | 
						|
        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
 | 
						|
    }
 | 
						|
 | 
						|
    /* Set the substitution character U+fffd as a Unicode string. */
 | 
						|
    cnv->subUChars[0]=0xfffd;
 | 
						|
    cnv->subCharLen=-1;
 | 
						|
}
 | 
						|
 | 
						|
static void U_CALLCONV
 | 
						|
_SCSUClose(UConverter *cnv) {
 | 
						|
    if(cnv->extraInfo!=NULL) {
 | 
						|
        if(!cnv->isExtraLocal) {
 | 
						|
            uprv_free(cnv->extraInfo);
 | 
						|
        }
 | 
						|
        cnv->extraInfo=NULL;
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
/* SCSU-to-Unicode conversion functions ------------------------------------- */
 | 
						|
 | 
						|
static void U_CALLCONV
 | 
						|
_SCSUToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
 | 
						|
                          UErrorCode *pErrorCode) {
 | 
						|
    UConverter *cnv;
 | 
						|
    SCSUData *scsu;
 | 
						|
    const uint8_t *source, *sourceLimit;
 | 
						|
    UChar *target;
 | 
						|
    const UChar *targetLimit;
 | 
						|
    int32_t *offsets;
 | 
						|
    UBool isSingleByteMode;
 | 
						|
    uint8_t state, byteOne;
 | 
						|
    int8_t quoteWindow, dynamicWindow;
 | 
						|
 | 
						|
    int32_t sourceIndex, nextSourceIndex;
 | 
						|
 | 
						|
    uint8_t b;
 | 
						|
 | 
						|
    /* set up the local pointers */
 | 
						|
    cnv=pArgs->converter;
 | 
						|
    scsu=(SCSUData *)cnv->extraInfo;
 | 
						|
 | 
						|
    source=(const uint8_t *)pArgs->source;
 | 
						|
    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
 | 
						|
    target=pArgs->target;
 | 
						|
    targetLimit=pArgs->targetLimit;
 | 
						|
    offsets=pArgs->offsets;
 | 
						|
 | 
						|
    /* get the state machine state */
 | 
						|
    isSingleByteMode=scsu->toUIsSingleByteMode;
 | 
						|
    state=scsu->toUState;
 | 
						|
    quoteWindow=scsu->toUQuoteWindow;
 | 
						|
    dynamicWindow=scsu->toUDynamicWindow;
 | 
						|
    byteOne=scsu->toUByteOne;
 | 
						|
 | 
						|
    /* sourceIndex=-1 if the current character began in the previous buffer */
 | 
						|
    sourceIndex=state==readCommand ? 0 : -1;
 | 
						|
    nextSourceIndex=0;
 | 
						|
 | 
						|
    /*
 | 
						|
     * conversion "loop"
 | 
						|
     *
 | 
						|
     * For performance, this is not a normal C loop.
 | 
						|
     * Instead, there are two code blocks for the two SCSU modes.
 | 
						|
     * The function branches to either one, and a change of the mode is done with a goto to
 | 
						|
     * the other branch.
 | 
						|
     *
 | 
						|
     * Each branch has two conventional loops:
 | 
						|
     * - a fast-path loop for the most common codes in the mode
 | 
						|
     * - a loop for all other codes in the mode
 | 
						|
     * When the fast-path runs into a code that it cannot handle, its loop ends and it
 | 
						|
     * runs into the following loop to handle the other codes.
 | 
						|
     * The end of the input or output buffer is also handled by the slower loop.
 | 
						|
     * The slow loop jumps (goto) to the fast-path loop again as soon as possible.
 | 
						|
     *
 | 
						|
     * The callback handling is done by returning with an error code.
 | 
						|
     * The conversion framework actually calls the callback function.
 | 
						|
     */
 | 
						|
    if(isSingleByteMode) {
 | 
						|
        /* fast path for single-byte mode */
 | 
						|
        if(state==readCommand) {
 | 
						|
fastSingle:
 | 
						|
            while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) {
 | 
						|
                ++source;
 | 
						|
                ++nextSourceIndex;
 | 
						|
                if(b<=0x7f) {
 | 
						|
                    /* write US-ASCII graphic character or DEL */
 | 
						|
                    *target++=(UChar)b;
 | 
						|
                    if(offsets!=NULL) {
 | 
						|
                        *offsets++=sourceIndex;
 | 
						|
                    }
 | 
						|
                } else {
 | 
						|
                    /* write from dynamic window */
 | 
						|
                    uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f);
 | 
						|
                    if(c<=0xffff) {
 | 
						|
                        *target++=(UChar)c;
 | 
						|
                        if(offsets!=NULL) {
 | 
						|
                            *offsets++=sourceIndex;
 | 
						|
                        }
 | 
						|
                    } else {
 | 
						|
                        /* output surrogate pair */
 | 
						|
                        *target++=(UChar)(0xd7c0+(c>>10));
 | 
						|
                        if(target<targetLimit) {
 | 
						|
                            *target++=(UChar)(0xdc00|(c&0x3ff));
 | 
						|
                            if(offsets!=NULL) {
 | 
						|
                                *offsets++=sourceIndex;
 | 
						|
                                *offsets++=sourceIndex;
 | 
						|
                            }
 | 
						|
                        } else {
 | 
						|
                            /* target overflow */
 | 
						|
                            if(offsets!=NULL) {
 | 
						|
                                *offsets++=sourceIndex;
 | 
						|
                            }
 | 
						|
                            cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
 | 
						|
                            cnv->UCharErrorBufferLength=1;
 | 
						|
                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 | 
						|
                            goto endloop;
 | 
						|
                        }
 | 
						|
                    }
 | 
						|
                }
 | 
						|
                sourceIndex=nextSourceIndex;
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        /* normal state machine for single-byte mode, minus handling for what fastSingle covers */
 | 
						|
singleByteMode:
 | 
						|
        while(source<sourceLimit) {
 | 
						|
            if(target>=targetLimit) {
 | 
						|
                /* target is full */
 | 
						|
                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 | 
						|
                break;
 | 
						|
            }
 | 
						|
            b=*source++;
 | 
						|
            ++nextSourceIndex;
 | 
						|
            switch(state) {
 | 
						|
            case readCommand:
 | 
						|
                /* redundant conditions are commented out */
 | 
						|
                /* here: b<0x20 because otherwise we would be in fastSingle */
 | 
						|
                if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
 | 
						|
                    /* CR/LF/TAB/NUL */
 | 
						|
                    *target++=(UChar)b;
 | 
						|
                    if(offsets!=NULL) {
 | 
						|
                        *offsets++=sourceIndex;
 | 
						|
                    }
 | 
						|
                    sourceIndex=nextSourceIndex;
 | 
						|
                    goto fastSingle;
 | 
						|
                } else if(SC0<=b) {
 | 
						|
                    if(b<=SC7) {
 | 
						|
                        dynamicWindow=(int8_t)(b-SC0);
 | 
						|
                        sourceIndex=nextSourceIndex;
 | 
						|
                        goto fastSingle;
 | 
						|
                    } else /* if(SD0<=b && b<=SD7) */ {
 | 
						|
                        dynamicWindow=(int8_t)(b-SD0);
 | 
						|
                        state=defineOne;
 | 
						|
                    }
 | 
						|
                } else if(/* SQ0<=b && */ b<=SQ7) {
 | 
						|
                    quoteWindow=(int8_t)(b-SQ0);
 | 
						|
                    state=quoteOne;
 | 
						|
                } else if(b==SDX) {
 | 
						|
                    state=definePairOne;
 | 
						|
                } else if(b==SQU) {
 | 
						|
                    state=quotePairOne;
 | 
						|
                } else if(b==SCU) {
 | 
						|
                    sourceIndex=nextSourceIndex;
 | 
						|
                    isSingleByteMode=FALSE;
 | 
						|
                    goto fastUnicode;
 | 
						|
                } else /* Srs */ {
 | 
						|
                    /* callback(illegal) */
 | 
						|
                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
 | 
						|
                    cnv->toUBytes[0]=b;
 | 
						|
                    cnv->toULength=1;
 | 
						|
                    goto endloop;
 | 
						|
                }
 | 
						|
 | 
						|
                /* store the first byte of a multibyte sequence in toUBytes[] */
 | 
						|
                cnv->toUBytes[0]=b;
 | 
						|
                cnv->toULength=1;
 | 
						|
                break;
 | 
						|
            case quotePairOne:
 | 
						|
                byteOne=b;
 | 
						|
                cnv->toUBytes[1]=b;
 | 
						|
                cnv->toULength=2;
 | 
						|
                state=quotePairTwo;
 | 
						|
                break;
 | 
						|
            case quotePairTwo:
 | 
						|
                *target++=(UChar)((byteOne<<8)|b);
 | 
						|
                if(offsets!=NULL) {
 | 
						|
                    *offsets++=sourceIndex;
 | 
						|
                }
 | 
						|
                sourceIndex=nextSourceIndex;
 | 
						|
                state=readCommand;
 | 
						|
                goto fastSingle;
 | 
						|
            case quoteOne:
 | 
						|
                if(b<0x80) {
 | 
						|
                    /* all static offsets are in the BMP */
 | 
						|
                    *target++=(UChar)(staticOffsets[quoteWindow]+b);
 | 
						|
                    if(offsets!=NULL) {
 | 
						|
                        *offsets++=sourceIndex;
 | 
						|
                    }
 | 
						|
                } else {
 | 
						|
                    /* write from dynamic window */
 | 
						|
                    uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f);
 | 
						|
                    if(c<=0xffff) {
 | 
						|
                        *target++=(UChar)c;
 | 
						|
                        if(offsets!=NULL) {
 | 
						|
                            *offsets++=sourceIndex;
 | 
						|
                        }
 | 
						|
                    } else {
 | 
						|
                        /* output surrogate pair */
 | 
						|
                        *target++=(UChar)(0xd7c0+(c>>10));
 | 
						|
                        if(target<targetLimit) {
 | 
						|
                            *target++=(UChar)(0xdc00|(c&0x3ff));
 | 
						|
                            if(offsets!=NULL) {
 | 
						|
                                *offsets++=sourceIndex;
 | 
						|
                                *offsets++=sourceIndex;
 | 
						|
                            }
 | 
						|
                        } else {
 | 
						|
                            /* target overflow */
 | 
						|
                            if(offsets!=NULL) {
 | 
						|
                                *offsets++=sourceIndex;
 | 
						|
                            }
 | 
						|
                            cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
 | 
						|
                            cnv->UCharErrorBufferLength=1;
 | 
						|
                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 | 
						|
                            goto endloop;
 | 
						|
                        }
 | 
						|
                    }
 | 
						|
                }
 | 
						|
                sourceIndex=nextSourceIndex;
 | 
						|
                state=readCommand;
 | 
						|
                goto fastSingle;
 | 
						|
            case definePairOne:
 | 
						|
                dynamicWindow=(int8_t)((b>>5)&7);
 | 
						|
                byteOne=(uint8_t)(b&0x1f);
 | 
						|
                cnv->toUBytes[1]=b;
 | 
						|
                cnv->toULength=2;
 | 
						|
                state=definePairTwo;
 | 
						|
                break;
 | 
						|
            case definePairTwo:
 | 
						|
                scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL);
 | 
						|
                sourceIndex=nextSourceIndex;
 | 
						|
                state=readCommand;
 | 
						|
                goto fastSingle;
 | 
						|
            case defineOne:
 | 
						|
                if(b==0) {
 | 
						|
                    /* callback(illegal): Reserved window offset value 0 */
 | 
						|
                    cnv->toUBytes[1]=b;
 | 
						|
                    cnv->toULength=2;
 | 
						|
                    goto endloop;
 | 
						|
                } else if(b<gapThreshold) {
 | 
						|
                    scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL;
 | 
						|
                } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) {
 | 
						|
                    scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset;
 | 
						|
                } else if(b>=fixedThreshold) {
 | 
						|
                    scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold];
 | 
						|
                } else {
 | 
						|
                    /* callback(illegal): Reserved window offset value 0xa8..0xf8 */
 | 
						|
                    cnv->toUBytes[1]=b;
 | 
						|
                    cnv->toULength=2;
 | 
						|
                    goto endloop;
 | 
						|
                }
 | 
						|
                sourceIndex=nextSourceIndex;
 | 
						|
                state=readCommand;
 | 
						|
                goto fastSingle;
 | 
						|
            }
 | 
						|
        }
 | 
						|
    } else {
 | 
						|
        /* fast path for Unicode mode */
 | 
						|
        if(state==readCommand) {
 | 
						|
fastUnicode:
 | 
						|
            while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) {
 | 
						|
                *target++=(UChar)((b<<8)|source[1]);
 | 
						|
                if(offsets!=NULL) {
 | 
						|
                    *offsets++=sourceIndex;
 | 
						|
                }
 | 
						|
                sourceIndex=nextSourceIndex;
 | 
						|
                nextSourceIndex+=2;
 | 
						|
                source+=2;
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        /* normal state machine for Unicode mode */
 | 
						|
/* unicodeByteMode: */
 | 
						|
        while(source<sourceLimit) {
 | 
						|
            if(target>=targetLimit) {
 | 
						|
                /* target is full */
 | 
						|
                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 | 
						|
                break;
 | 
						|
            }
 | 
						|
            b=*source++;
 | 
						|
            ++nextSourceIndex;
 | 
						|
            switch(state) {
 | 
						|
            case readCommand:
 | 
						|
                if((uint8_t)(b-UC0)>(Urs-UC0)) {
 | 
						|
                    byteOne=b;
 | 
						|
                    cnv->toUBytes[0]=b;
 | 
						|
                    cnv->toULength=1;
 | 
						|
                    state=quotePairTwo;
 | 
						|
                } else if(/* UC0<=b && */ b<=UC7) {
 | 
						|
                    dynamicWindow=(int8_t)(b-UC0);
 | 
						|
                    sourceIndex=nextSourceIndex;
 | 
						|
                    isSingleByteMode=TRUE;
 | 
						|
                    goto fastSingle;
 | 
						|
                } else if(/* UD0<=b && */ b<=UD7) {
 | 
						|
                    dynamicWindow=(int8_t)(b-UD0);
 | 
						|
                    isSingleByteMode=TRUE;
 | 
						|
                    cnv->toUBytes[0]=b;
 | 
						|
                    cnv->toULength=1;
 | 
						|
                    state=defineOne;
 | 
						|
                    goto singleByteMode;
 | 
						|
                } else if(b==UDX) {
 | 
						|
                    isSingleByteMode=TRUE;
 | 
						|
                    cnv->toUBytes[0]=b;
 | 
						|
                    cnv->toULength=1;
 | 
						|
                    state=definePairOne;
 | 
						|
                    goto singleByteMode;
 | 
						|
                } else if(b==UQU) {
 | 
						|
                    cnv->toUBytes[0]=b;
 | 
						|
                    cnv->toULength=1;
 | 
						|
                    state=quotePairOne;
 | 
						|
                } else /* Urs */ {
 | 
						|
                    /* callback(illegal) */
 | 
						|
                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
 | 
						|
                    cnv->toUBytes[0]=b;
 | 
						|
                    cnv->toULength=1;
 | 
						|
                    goto endloop;
 | 
						|
                }
 | 
						|
                break;
 | 
						|
            case quotePairOne:
 | 
						|
                byteOne=b;
 | 
						|
                cnv->toUBytes[1]=b;
 | 
						|
                cnv->toULength=2;
 | 
						|
                state=quotePairTwo;
 | 
						|
                break;
 | 
						|
            case quotePairTwo:
 | 
						|
                *target++=(UChar)((byteOne<<8)|b);
 | 
						|
                if(offsets!=NULL) {
 | 
						|
                    *offsets++=sourceIndex;
 | 
						|
                }
 | 
						|
                sourceIndex=nextSourceIndex;
 | 
						|
                state=readCommand;
 | 
						|
                goto fastUnicode;
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
endloop:
 | 
						|
 | 
						|
    /* set the converter state back into UConverter */
 | 
						|
    if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
 | 
						|
        /* reset to deal with the next character */
 | 
						|
        state=readCommand;
 | 
						|
    } else if(state==readCommand) {
 | 
						|
        /* not in a multi-byte sequence, reset toULength */
 | 
						|
        cnv->toULength=0;
 | 
						|
    }
 | 
						|
    scsu->toUIsSingleByteMode=isSingleByteMode;
 | 
						|
    scsu->toUState=state;
 | 
						|
    scsu->toUQuoteWindow=quoteWindow;
 | 
						|
    scsu->toUDynamicWindow=dynamicWindow;
 | 
						|
    scsu->toUByteOne=byteOne;
 | 
						|
 | 
						|
    /* write back the updated pointers */
 | 
						|
    pArgs->source=(const char *)source;
 | 
						|
    pArgs->target=target;
 | 
						|
    pArgs->offsets=offsets;
 | 
						|
    return;
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * Identical to _SCSUToUnicodeWithOffsets but without offset handling.
 | 
						|
 * If a change is made in the original function, then either
 | 
						|
 * change this function the same way or
 | 
						|
 * re-copy the original function and remove the variables
 | 
						|
 * offsets, sourceIndex, and nextSourceIndex.
 | 
						|
 */
 | 
						|
static void U_CALLCONV
 | 
						|
_SCSUToUnicode(UConverterToUnicodeArgs *pArgs,
 | 
						|
               UErrorCode *pErrorCode) {
 | 
						|
    UConverter *cnv;
 | 
						|
    SCSUData *scsu;
 | 
						|
    const uint8_t *source, *sourceLimit;
 | 
						|
    UChar *target;
 | 
						|
    const UChar *targetLimit;
 | 
						|
    UBool isSingleByteMode;
 | 
						|
    uint8_t state, byteOne;
 | 
						|
    int8_t quoteWindow, dynamicWindow;
 | 
						|
 | 
						|
    uint8_t b;
 | 
						|
 | 
						|
    /* set up the local pointers */
 | 
						|
    cnv=pArgs->converter;
 | 
						|
    scsu=(SCSUData *)cnv->extraInfo;
 | 
						|
 | 
						|
    source=(const uint8_t *)pArgs->source;
 | 
						|
    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
 | 
						|
    target=pArgs->target;
 | 
						|
    targetLimit=pArgs->targetLimit;
 | 
						|
 | 
						|
    /* get the state machine state */
 | 
						|
    isSingleByteMode=scsu->toUIsSingleByteMode;
 | 
						|
    state=scsu->toUState;
 | 
						|
    quoteWindow=scsu->toUQuoteWindow;
 | 
						|
    dynamicWindow=scsu->toUDynamicWindow;
 | 
						|
    byteOne=scsu->toUByteOne;
 | 
						|
 | 
						|
    /*
 | 
						|
     * conversion "loop"
 | 
						|
     *
 | 
						|
     * For performance, this is not a normal C loop.
 | 
						|
     * Instead, there are two code blocks for the two SCSU modes.
 | 
						|
     * The function branches to either one, and a change of the mode is done with a goto to
 | 
						|
     * the other branch.
 | 
						|
     *
 | 
						|
     * Each branch has two conventional loops:
 | 
						|
     * - a fast-path loop for the most common codes in the mode
 | 
						|
     * - a loop for all other codes in the mode
 | 
						|
     * When the fast-path runs into a code that it cannot handle, its loop ends and it
 | 
						|
     * runs into the following loop to handle the other codes.
 | 
						|
     * The end of the input or output buffer is also handled by the slower loop.
 | 
						|
     * The slow loop jumps (goto) to the fast-path loop again as soon as possible.
 | 
						|
     *
 | 
						|
     * The callback handling is done by returning with an error code.
 | 
						|
     * The conversion framework actually calls the callback function.
 | 
						|
     */
 | 
						|
    if(isSingleByteMode) {
 | 
						|
        /* fast path for single-byte mode */
 | 
						|
        if(state==readCommand) {
 | 
						|
fastSingle:
 | 
						|
            while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) {
 | 
						|
                ++source;
 | 
						|
                if(b<=0x7f) {
 | 
						|
                    /* write US-ASCII graphic character or DEL */
 | 
						|
                    *target++=(UChar)b;
 | 
						|
                } else {
 | 
						|
                    /* write from dynamic window */
 | 
						|
                    uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f);
 | 
						|
                    if(c<=0xffff) {
 | 
						|
                        *target++=(UChar)c;
 | 
						|
                    } else {
 | 
						|
                        /* output surrogate pair */
 | 
						|
                        *target++=(UChar)(0xd7c0+(c>>10));
 | 
						|
                        if(target<targetLimit) {
 | 
						|
                            *target++=(UChar)(0xdc00|(c&0x3ff));
 | 
						|
                        } else {
 | 
						|
                            /* target overflow */
 | 
						|
                            cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
 | 
						|
                            cnv->UCharErrorBufferLength=1;
 | 
						|
                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 | 
						|
                            goto endloop;
 | 
						|
                        }
 | 
						|
                    }
 | 
						|
                }
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        /* normal state machine for single-byte mode, minus handling for what fastSingle covers */
 | 
						|
singleByteMode:
 | 
						|
        while(source<sourceLimit) {
 | 
						|
            if(target>=targetLimit) {
 | 
						|
                /* target is full */
 | 
						|
                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 | 
						|
                break;
 | 
						|
            }
 | 
						|
            b=*source++;
 | 
						|
            switch(state) {
 | 
						|
            case readCommand:
 | 
						|
                /* redundant conditions are commented out */
 | 
						|
                /* here: b<0x20 because otherwise we would be in fastSingle */
 | 
						|
                if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
 | 
						|
                    /* CR/LF/TAB/NUL */
 | 
						|
                    *target++=(UChar)b;
 | 
						|
                    goto fastSingle;
 | 
						|
                } else if(SC0<=b) {
 | 
						|
                    if(b<=SC7) {
 | 
						|
                        dynamicWindow=(int8_t)(b-SC0);
 | 
						|
                        goto fastSingle;
 | 
						|
                    } else /* if(SD0<=b && b<=SD7) */ {
 | 
						|
                        dynamicWindow=(int8_t)(b-SD0);
 | 
						|
                        state=defineOne;
 | 
						|
                    }
 | 
						|
                } else if(/* SQ0<=b && */ b<=SQ7) {
 | 
						|
                    quoteWindow=(int8_t)(b-SQ0);
 | 
						|
                    state=quoteOne;
 | 
						|
                } else if(b==SDX) {
 | 
						|
                    state=definePairOne;
 | 
						|
                } else if(b==SQU) {
 | 
						|
                    state=quotePairOne;
 | 
						|
                } else if(b==SCU) {
 | 
						|
                    isSingleByteMode=FALSE;
 | 
						|
                    goto fastUnicode;
 | 
						|
                } else /* Srs */ {
 | 
						|
                    /* callback(illegal) */
 | 
						|
                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
 | 
						|
                    cnv->toUBytes[0]=b;
 | 
						|
                    cnv->toULength=1;
 | 
						|
                    goto endloop;
 | 
						|
                }
 | 
						|
 | 
						|
                /* store the first byte of a multibyte sequence in toUBytes[] */
 | 
						|
                cnv->toUBytes[0]=b;
 | 
						|
                cnv->toULength=1;
 | 
						|
                break;
 | 
						|
            case quotePairOne:
 | 
						|
                byteOne=b;
 | 
						|
                cnv->toUBytes[1]=b;
 | 
						|
                cnv->toULength=2;
 | 
						|
                state=quotePairTwo;
 | 
						|
                break;
 | 
						|
            case quotePairTwo:
 | 
						|
                *target++=(UChar)((byteOne<<8)|b);
 | 
						|
                state=readCommand;
 | 
						|
                goto fastSingle;
 | 
						|
            case quoteOne:
 | 
						|
                if(b<0x80) {
 | 
						|
                    /* all static offsets are in the BMP */
 | 
						|
                    *target++=(UChar)(staticOffsets[quoteWindow]+b);
 | 
						|
                } else {
 | 
						|
                    /* write from dynamic window */
 | 
						|
                    uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f);
 | 
						|
                    if(c<=0xffff) {
 | 
						|
                        *target++=(UChar)c;
 | 
						|
                    } else {
 | 
						|
                        /* output surrogate pair */
 | 
						|
                        *target++=(UChar)(0xd7c0+(c>>10));
 | 
						|
                        if(target<targetLimit) {
 | 
						|
                            *target++=(UChar)(0xdc00|(c&0x3ff));
 | 
						|
                        } else {
 | 
						|
                            /* target overflow */
 | 
						|
                            cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
 | 
						|
                            cnv->UCharErrorBufferLength=1;
 | 
						|
                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 | 
						|
                            goto endloop;
 | 
						|
                        }
 | 
						|
                    }
 | 
						|
                }
 | 
						|
                state=readCommand;
 | 
						|
                goto fastSingle;
 | 
						|
            case definePairOne:
 | 
						|
                dynamicWindow=(int8_t)((b>>5)&7);
 | 
						|
                byteOne=(uint8_t)(b&0x1f);
 | 
						|
                cnv->toUBytes[1]=b;
 | 
						|
                cnv->toULength=2;
 | 
						|
                state=definePairTwo;
 | 
						|
                break;
 | 
						|
            case definePairTwo:
 | 
						|
                scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL);
 | 
						|
                state=readCommand;
 | 
						|
                goto fastSingle;
 | 
						|
            case defineOne:
 | 
						|
                if(b==0) {
 | 
						|
                    /* callback(illegal): Reserved window offset value 0 */
 | 
						|
                    cnv->toUBytes[1]=b;
 | 
						|
                    cnv->toULength=2;
 | 
						|
                    goto endloop;
 | 
						|
                } else if(b<gapThreshold) {
 | 
						|
                    scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL;
 | 
						|
                } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) {
 | 
						|
                    scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset;
 | 
						|
                } else if(b>=fixedThreshold) {
 | 
						|
                    scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold];
 | 
						|
                } else {
 | 
						|
                    /* callback(illegal): Reserved window offset value 0xa8..0xf8 */
 | 
						|
                    cnv->toUBytes[1]=b;
 | 
						|
                    cnv->toULength=2;
 | 
						|
                    goto endloop;
 | 
						|
                }
 | 
						|
                state=readCommand;
 | 
						|
                goto fastSingle;
 | 
						|
            }
 | 
						|
        }
 | 
						|
    } else {
 | 
						|
        /* fast path for Unicode mode */
 | 
						|
        if(state==readCommand) {
 | 
						|
fastUnicode:
 | 
						|
            while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) {
 | 
						|
                *target++=(UChar)((b<<8)|source[1]);
 | 
						|
                source+=2;
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        /* normal state machine for Unicode mode */
 | 
						|
/* unicodeByteMode: */
 | 
						|
        while(source<sourceLimit) {
 | 
						|
            if(target>=targetLimit) {
 | 
						|
                /* target is full */
 | 
						|
                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 | 
						|
                break;
 | 
						|
            }
 | 
						|
            b=*source++;
 | 
						|
            switch(state) {
 | 
						|
            case readCommand:
 | 
						|
                if((uint8_t)(b-UC0)>(Urs-UC0)) {
 | 
						|
                    byteOne=b;
 | 
						|
                    cnv->toUBytes[0]=b;
 | 
						|
                    cnv->toULength=1;
 | 
						|
                    state=quotePairTwo;
 | 
						|
                } else if(/* UC0<=b && */ b<=UC7) {
 | 
						|
                    dynamicWindow=(int8_t)(b-UC0);
 | 
						|
                    isSingleByteMode=TRUE;
 | 
						|
                    goto fastSingle;
 | 
						|
                } else if(/* UD0<=b && */ b<=UD7) {
 | 
						|
                    dynamicWindow=(int8_t)(b-UD0);
 | 
						|
                    isSingleByteMode=TRUE;
 | 
						|
                    cnv->toUBytes[0]=b;
 | 
						|
                    cnv->toULength=1;
 | 
						|
                    state=defineOne;
 | 
						|
                    goto singleByteMode;
 | 
						|
                } else if(b==UDX) {
 | 
						|
                    isSingleByteMode=TRUE;
 | 
						|
                    cnv->toUBytes[0]=b;
 | 
						|
                    cnv->toULength=1;
 | 
						|
                    state=definePairOne;
 | 
						|
                    goto singleByteMode;
 | 
						|
                } else if(b==UQU) {
 | 
						|
                    cnv->toUBytes[0]=b;
 | 
						|
                    cnv->toULength=1;
 | 
						|
                    state=quotePairOne;
 | 
						|
                } else /* Urs */ {
 | 
						|
                    /* callback(illegal) */
 | 
						|
                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
 | 
						|
                    cnv->toUBytes[0]=b;
 | 
						|
                    cnv->toULength=1;
 | 
						|
                    goto endloop;
 | 
						|
                }
 | 
						|
                break;
 | 
						|
            case quotePairOne:
 | 
						|
                byteOne=b;
 | 
						|
                cnv->toUBytes[1]=b;
 | 
						|
                cnv->toULength=2;
 | 
						|
                state=quotePairTwo;
 | 
						|
                break;
 | 
						|
            case quotePairTwo:
 | 
						|
                *target++=(UChar)((byteOne<<8)|b);
 | 
						|
                state=readCommand;
 | 
						|
                goto fastUnicode;
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
endloop:
 | 
						|
 | 
						|
    /* set the converter state back into UConverter */
 | 
						|
    if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
 | 
						|
        /* reset to deal with the next character */
 | 
						|
        state=readCommand;
 | 
						|
    } else if(state==readCommand) {
 | 
						|
        /* not in a multi-byte sequence, reset toULength */
 | 
						|
        cnv->toULength=0;
 | 
						|
    }
 | 
						|
    scsu->toUIsSingleByteMode=isSingleByteMode;
 | 
						|
    scsu->toUState=state;
 | 
						|
    scsu->toUQuoteWindow=quoteWindow;
 | 
						|
    scsu->toUDynamicWindow=dynamicWindow;
 | 
						|
    scsu->toUByteOne=byteOne;
 | 
						|
 | 
						|
    /* write back the updated pointers */
 | 
						|
    pArgs->source=(const char *)source;
 | 
						|
    pArgs->target=target;
 | 
						|
    return;
 | 
						|
}
 | 
						|
U_CDECL_END
 | 
						|
/* SCSU-from-Unicode conversion functions ----------------------------------- */
 | 
						|
 | 
						|
/*
 | 
						|
 * This SCSU Encoder is fairly simple but uses all SCSU commands to achieve
 | 
						|
 * reasonable results. The lookahead is minimal.
 | 
						|
 * Many cases are simple:
 | 
						|
 * A character fits directly into the current mode, a dynamic or static window,
 | 
						|
 * or is not compressible. These cases are tested first.
 | 
						|
 * Real compression heuristics are applied to the rest, in code branches for
 | 
						|
 * single/Unicode mode and BMP/supplementary code points.
 | 
						|
 * The heuristics used here are extremely simple.
 | 
						|
 */
 | 
						|
 | 
						|
/* get the number of the window that this character is in, or -1 */
 | 
						|
static int8_t
 | 
						|
getWindow(const uint32_t offsets[8], uint32_t c) {
 | 
						|
    int i;
 | 
						|
    for(i=0; i<8; ++i) {
 | 
						|
        if((uint32_t)(c-offsets[i])<=0x7f) {
 | 
						|
            return (int8_t)(i);
 | 
						|
        }
 | 
						|
    }
 | 
						|
    return -1;
 | 
						|
}
 | 
						|
 | 
						|
/* is the character in the dynamic window starting at the offset, or in the direct-encoded range? */
 | 
						|
static UBool
 | 
						|
isInOffsetWindowOrDirect(uint32_t offset, uint32_t c) {
 | 
						|
    return (UBool)(c<=offset+0x7f &&
 | 
						|
          (c>=offset || (c<=0x7f &&
 | 
						|
                        (c>=0x20 || (1UL<<c)&0x2601))));
 | 
						|
                                /* binary 0010 0110 0000 0001,
 | 
						|
                                   check for b==0xd || b==0xa || b==9 || b==0 */
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * getNextDynamicWindow returns the next dynamic window to be redefined
 | 
						|
 */
 | 
						|
static int8_t
 | 
						|
getNextDynamicWindow(SCSUData *scsu) {
 | 
						|
    int8_t window=scsu->windowUse[scsu->nextWindowUseIndex];
 | 
						|
    if(++scsu->nextWindowUseIndex==8) {
 | 
						|
        scsu->nextWindowUseIndex=0;
 | 
						|
    }
 | 
						|
    return window;
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * useDynamicWindow() adjusts
 | 
						|
 * windowUse[] and nextWindowUseIndex for the algorithm to choose
 | 
						|
 * the next dynamic window to be defined;
 | 
						|
 * a subclass may override it and provide its own algorithm.
 | 
						|
 */
 | 
						|
static void
 | 
						|
useDynamicWindow(SCSUData *scsu, int8_t window) {
 | 
						|
    /*
 | 
						|
     * move the existing window, which just became the most recently used one,
 | 
						|
     * up in windowUse[] to nextWindowUseIndex-1
 | 
						|
     */
 | 
						|
 | 
						|
    /* first, find the index of the window - backwards to favor the more recently used windows */
 | 
						|
    int i, j;
 | 
						|
 | 
						|
    i=scsu->nextWindowUseIndex;
 | 
						|
    do {
 | 
						|
        if(--i<0) {
 | 
						|
            i=7;
 | 
						|
        }
 | 
						|
    } while(scsu->windowUse[i]!=window);
 | 
						|
 | 
						|
    /* now copy each windowUse[i+1] to [i] */
 | 
						|
    j=i+1;
 | 
						|
    if(j==8) {
 | 
						|
        j=0;
 | 
						|
    }
 | 
						|
    while(j!=scsu->nextWindowUseIndex) {
 | 
						|
        scsu->windowUse[i]=scsu->windowUse[j];
 | 
						|
        i=j;
 | 
						|
        if(++j==8) { j=0; }
 | 
						|
    }
 | 
						|
 | 
						|
    /* finally, set the window into the most recently used index */
 | 
						|
    scsu->windowUse[i]=window;
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * calculate the offset and the code for a dynamic window that contains the character
 | 
						|
 * takes fixed offsets into account
 | 
						|
 * the offset of the window is stored in the offset variable,
 | 
						|
 * the code is returned
 | 
						|
 *
 | 
						|
 * return offset code: -1 none  <=0xff code for SDn/UDn  else code for SDX/UDX, subtract 0x200 to get the true code
 | 
						|
 */
 | 
						|
static int
 | 
						|
getDynamicOffset(uint32_t c, uint32_t *pOffset) {
 | 
						|
    int i;
 | 
						|
 | 
						|
    for(i=0; i<7; ++i) {
 | 
						|
        if((uint32_t)(c-fixedOffsets[i])<=0x7f) {
 | 
						|
            *pOffset=fixedOffsets[i];
 | 
						|
            return 0xf9+i;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    if(c<0x80) {
 | 
						|
        /* No dynamic window for US-ASCII. */
 | 
						|
        return -1;
 | 
						|
    } else if(c<0x3400 ||
 | 
						|
              (uint32_t)(c-0x10000)<(0x14000-0x10000) ||
 | 
						|
              (uint32_t)(c-0x1d000)<=(0x1ffff-0x1d000)
 | 
						|
    ) {
 | 
						|
        /* This character is in a code range for a "small", i.e., reasonably windowable, script. */
 | 
						|
        *pOffset=c&0x7fffff80;
 | 
						|
        return (int)(c>>7);
 | 
						|
    } else if(0xe000<=c && c!=0xfeff && c<0xfff0) {
 | 
						|
        /* For these characters we need to take the gapOffset into account. */
 | 
						|
        *pOffset=c&0x7fffff80;
 | 
						|
        return (int)((c-gapOffset)>>7);
 | 
						|
    } else {
 | 
						|
        return -1;
 | 
						|
    }
 | 
						|
}
 | 
						|
U_CDECL_BEGIN
 | 
						|
/*
 | 
						|
 * Idea for compression:
 | 
						|
 *  - save SCSUData and other state before really starting work
 | 
						|
 *  - at endloop, see if compression could be better with just unicode mode
 | 
						|
 *  - don't do this if a callback has been called
 | 
						|
 *  - if unicode mode would be smaller, then override the results with it - may need SCU at the beginning
 | 
						|
 *  - different buffer handling!
 | 
						|
 *
 | 
						|
 * Drawback or need for corrective handling:
 | 
						|
 * it is desirable to encode U+feff as SQU fe ff for the SCSU signature, and
 | 
						|
 * it is desirable to start a document in US-ASCII/Latin-1 for as long as possible
 | 
						|
 * not only for compression but also for HTML/XML documents with following charset/encoding announcers.
 | 
						|
 *
 | 
						|
 * How to achieve both?
 | 
						|
 *  - Only replace the result after an SDX or SCU?
 | 
						|
 */
 | 
						|
 | 
						|
static void U_CALLCONV
 | 
						|
_SCSUFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
 | 
						|
                            UErrorCode *pErrorCode) {
 | 
						|
    UConverter *cnv;
 | 
						|
    SCSUData *scsu;
 | 
						|
    const UChar *source, *sourceLimit;
 | 
						|
    uint8_t *target;
 | 
						|
    int32_t targetCapacity;
 | 
						|
    int32_t *offsets;
 | 
						|
 | 
						|
    UBool isSingleByteMode;
 | 
						|
    uint8_t dynamicWindow;
 | 
						|
    uint32_t currentOffset;
 | 
						|
 | 
						|
    uint32_t c, delta;
 | 
						|
 | 
						|
    int32_t sourceIndex, nextSourceIndex;
 | 
						|
 | 
						|
    int32_t length;
 | 
						|
 | 
						|
    /* variables for compression heuristics */
 | 
						|
    uint32_t offset;
 | 
						|
    UChar lead, trail;
 | 
						|
    int code;
 | 
						|
    int8_t window;
 | 
						|
 | 
						|
    /* set up the local pointers */
 | 
						|
    cnv=pArgs->converter;
 | 
						|
    scsu=(SCSUData *)cnv->extraInfo;
 | 
						|
 | 
						|
    /* set up the local pointers */
 | 
						|
    source=pArgs->source;
 | 
						|
    sourceLimit=pArgs->sourceLimit;
 | 
						|
    target=(uint8_t *)pArgs->target;
 | 
						|
    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
 | 
						|
    offsets=pArgs->offsets;
 | 
						|
 | 
						|
    /* get the state machine state */
 | 
						|
    isSingleByteMode=scsu->fromUIsSingleByteMode;
 | 
						|
    dynamicWindow=scsu->fromUDynamicWindow;
 | 
						|
    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
 | 
						|
 | 
						|
    c=cnv->fromUChar32;
 | 
						|
 | 
						|
    /* sourceIndex=-1 if the current character began in the previous buffer */
 | 
						|
    sourceIndex= c==0 ? 0 : -1;
 | 
						|
    nextSourceIndex=0;
 | 
						|
 | 
						|
    /* similar conversion "loop" as in toUnicode */
 | 
						|
loop:
 | 
						|
    if(isSingleByteMode) {
 | 
						|
        if(c!=0 && targetCapacity>0) {
 | 
						|
            goto getTrailSingle;
 | 
						|
        }
 | 
						|
 | 
						|
        /* state machine for single-byte mode */
 | 
						|
/* singleByteMode: */
 | 
						|
        while(source<sourceLimit) {
 | 
						|
            if(targetCapacity<=0) {
 | 
						|
                /* target is full */
 | 
						|
                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 | 
						|
                break;
 | 
						|
            }
 | 
						|
            c=*source++;
 | 
						|
            ++nextSourceIndex;
 | 
						|
 | 
						|
            if((c-0x20)<=0x5f) {
 | 
						|
                /* pass US-ASCII graphic character through */
 | 
						|
                *target++=(uint8_t)c;
 | 
						|
                if(offsets!=NULL) {
 | 
						|
                    *offsets++=sourceIndex;
 | 
						|
                }
 | 
						|
                --targetCapacity;
 | 
						|
            } else if(c<0x20) {
 | 
						|
                if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
 | 
						|
                    /* CR/LF/TAB/NUL */
 | 
						|
                    *target++=(uint8_t)c;
 | 
						|
                    if(offsets!=NULL) {
 | 
						|
                        *offsets++=sourceIndex;
 | 
						|
                    }
 | 
						|
                    --targetCapacity;
 | 
						|
                } else {
 | 
						|
                    /* quote C0 control character */
 | 
						|
                    c|=SQ0<<8;
 | 
						|
                    length=2;
 | 
						|
                    goto outputBytes;
 | 
						|
                }
 | 
						|
            } else if((delta=c-currentOffset)<=0x7f) {
 | 
						|
                /* use the current dynamic window */
 | 
						|
                *target++=(uint8_t)(delta|0x80);
 | 
						|
                if(offsets!=NULL) {
 | 
						|
                    *offsets++=sourceIndex;
 | 
						|
                }
 | 
						|
                --targetCapacity;
 | 
						|
            } else if(U16_IS_SURROGATE(c)) {
 | 
						|
                if(U16_IS_SURROGATE_LEAD(c)) {
 | 
						|
getTrailSingle:
 | 
						|
                    lead=(UChar)c;
 | 
						|
                    if(source<sourceLimit) {
 | 
						|
                        /* test the following code unit */
 | 
						|
                        trail=*source;
 | 
						|
                        if(U16_IS_TRAIL(trail)) {
 | 
						|
                            ++source;
 | 
						|
                            ++nextSourceIndex;
 | 
						|
                            c=U16_GET_SUPPLEMENTARY(c, trail);
 | 
						|
                            /* convert this surrogate code point */
 | 
						|
                            /* exit this condition tree */
 | 
						|
                        } else {
 | 
						|
                            /* this is an unmatched lead code unit (1st surrogate) */
 | 
						|
                            /* callback(illegal) */
 | 
						|
                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
 | 
						|
                            goto endloop;
 | 
						|
                        }
 | 
						|
                    } else {
 | 
						|
                        /* no more input */
 | 
						|
                        break;
 | 
						|
                    }
 | 
						|
                } else {
 | 
						|
                    /* this is an unmatched trail code unit (2nd surrogate) */
 | 
						|
                    /* callback(illegal) */
 | 
						|
                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
 | 
						|
                    goto endloop;
 | 
						|
                }
 | 
						|
 | 
						|
                /* compress supplementary character U+10000..U+10ffff */
 | 
						|
                if((delta=c-currentOffset)<=0x7f) {
 | 
						|
                    /* use the current dynamic window */
 | 
						|
                    *target++=(uint8_t)(delta|0x80);
 | 
						|
                    if(offsets!=NULL) {
 | 
						|
                        *offsets++=sourceIndex;
 | 
						|
                    }
 | 
						|
                    --targetCapacity;
 | 
						|
                } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
 | 
						|
                    /* there is a dynamic window that contains this character, change to it */
 | 
						|
                    dynamicWindow=window;
 | 
						|
                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
 | 
						|
                    useDynamicWindow(scsu, dynamicWindow);
 | 
						|
                    c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
 | 
						|
                    length=2;
 | 
						|
                    goto outputBytes;
 | 
						|
                } else if((code=getDynamicOffset(c, &offset))>=0) {
 | 
						|
                    /* might check if there are more characters in this window to come */
 | 
						|
                    /* define an extended window with this character */
 | 
						|
                    code-=0x200;
 | 
						|
                    dynamicWindow=getNextDynamicWindow(scsu);
 | 
						|
                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
 | 
						|
                    useDynamicWindow(scsu, dynamicWindow);
 | 
						|
                    c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
 | 
						|
                    length=4;
 | 
						|
                    goto outputBytes;
 | 
						|
                } else {
 | 
						|
                    /* change to Unicode mode and output this (lead, trail) pair */
 | 
						|
                    isSingleByteMode=FALSE;
 | 
						|
                    *target++=(uint8_t)SCU;
 | 
						|
                    if(offsets!=NULL) {
 | 
						|
                        *offsets++=sourceIndex;
 | 
						|
                    }
 | 
						|
                    --targetCapacity;
 | 
						|
                    c=((uint32_t)lead<<16)|trail;
 | 
						|
                    length=4;
 | 
						|
                    goto outputBytes;
 | 
						|
                }
 | 
						|
            } else if(c<0xa0) {
 | 
						|
                /* quote C1 control character */
 | 
						|
                c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */
 | 
						|
                length=2;
 | 
						|
                goto outputBytes;
 | 
						|
            } else if(c==0xfeff || c>=0xfff0) {
 | 
						|
                /* quote signature character=byte order mark and specials */
 | 
						|
                c|=SQU<<16;
 | 
						|
                length=3;
 | 
						|
                goto outputBytes;
 | 
						|
            } else {
 | 
						|
                /* compress all other BMP characters */
 | 
						|
                if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
 | 
						|
                    /* there is a window defined that contains this character - switch to it or quote from it? */
 | 
						|
                    if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) {
 | 
						|
                        /* change to dynamic window */
 | 
						|
                        dynamicWindow=window;
 | 
						|
                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
 | 
						|
                        useDynamicWindow(scsu, dynamicWindow);
 | 
						|
                        c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
 | 
						|
                        length=2;
 | 
						|
                        goto outputBytes;
 | 
						|
                    } else {
 | 
						|
                        /* quote from dynamic window */
 | 
						|
                        c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80;
 | 
						|
                        length=2;
 | 
						|
                        goto outputBytes;
 | 
						|
                    }
 | 
						|
                } else if((window=getWindow(staticOffsets, c))>=0) {
 | 
						|
                    /* quote from static window */
 | 
						|
                    c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]);
 | 
						|
                    length=2;
 | 
						|
                    goto outputBytes;
 | 
						|
                } else if((code=getDynamicOffset(c, &offset))>=0) {
 | 
						|
                    /* define a dynamic window with this character */
 | 
						|
                    dynamicWindow=getNextDynamicWindow(scsu);
 | 
						|
                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
 | 
						|
                    useDynamicWindow(scsu, dynamicWindow);
 | 
						|
                    c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
 | 
						|
                    length=3;
 | 
						|
                    goto outputBytes;
 | 
						|
                } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) &&
 | 
						|
                          (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400))
 | 
						|
                ) {
 | 
						|
                    /*
 | 
						|
                     * this character is not compressible (a BMP ideograph or similar);
 | 
						|
                     * switch to Unicode mode if this is the last character in the block
 | 
						|
                     * or there is at least one more ideograph following immediately
 | 
						|
                     */
 | 
						|
                    isSingleByteMode=FALSE;
 | 
						|
                    c|=SCU<<16;
 | 
						|
                    length=3;
 | 
						|
                    goto outputBytes;
 | 
						|
                } else {
 | 
						|
                    /* quote Unicode */
 | 
						|
                    c|=SQU<<16;
 | 
						|
                    length=3;
 | 
						|
                    goto outputBytes;
 | 
						|
                }
 | 
						|
            }
 | 
						|
 | 
						|
            /* normal end of conversion: prepare for a new character */
 | 
						|
            c=0;
 | 
						|
            sourceIndex=nextSourceIndex;
 | 
						|
        }
 | 
						|
    } else {
 | 
						|
        if(c!=0 && targetCapacity>0) {
 | 
						|
            goto getTrailUnicode;
 | 
						|
        }
 | 
						|
 | 
						|
        /* state machine for Unicode mode */
 | 
						|
/* unicodeByteMode: */
 | 
						|
        while(source<sourceLimit) {
 | 
						|
            if(targetCapacity<=0) {
 | 
						|
                /* target is full */
 | 
						|
                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 | 
						|
                break;
 | 
						|
            }
 | 
						|
            c=*source++;
 | 
						|
            ++nextSourceIndex;
 | 
						|
 | 
						|
            if((uint32_t)(c-0x3400)<(0xd800-0x3400)) {
 | 
						|
                /* not compressible, write character directly */
 | 
						|
                if(targetCapacity>=2) {
 | 
						|
                    *target++=(uint8_t)(c>>8);
 | 
						|
                    *target++=(uint8_t)c;
 | 
						|
                    if(offsets!=NULL) {
 | 
						|
                        *offsets++=sourceIndex;
 | 
						|
                        *offsets++=sourceIndex;
 | 
						|
                    }
 | 
						|
                    targetCapacity-=2;
 | 
						|
                } else {
 | 
						|
                    length=2;
 | 
						|
                    goto outputBytes;
 | 
						|
                }
 | 
						|
            } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) {
 | 
						|
                /* compress BMP character if the following one is not an uncompressible ideograph */
 | 
						|
                if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) {
 | 
						|
                    if(((uint32_t)(c-0x30)<10 || (uint32_t)(c-0x61)<26 || (uint32_t)(c-0x41)<26)) {
 | 
						|
                        /* ASCII digit or letter */
 | 
						|
                        isSingleByteMode=TRUE;
 | 
						|
                        c|=((uint32_t)(UC0+dynamicWindow)<<8)|c;
 | 
						|
                        length=2;
 | 
						|
                        goto outputBytes;
 | 
						|
                    } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
 | 
						|
                        /* there is a dynamic window that contains this character, change to it */
 | 
						|
                        isSingleByteMode=TRUE;
 | 
						|
                        dynamicWindow=window;
 | 
						|
                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
 | 
						|
                        useDynamicWindow(scsu, dynamicWindow);
 | 
						|
                        c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
 | 
						|
                        length=2;
 | 
						|
                        goto outputBytes;
 | 
						|
                    } else if((code=getDynamicOffset(c, &offset))>=0) {
 | 
						|
                        /* define a dynamic window with this character */
 | 
						|
                        isSingleByteMode=TRUE;
 | 
						|
                        dynamicWindow=getNextDynamicWindow(scsu);
 | 
						|
                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
 | 
						|
                        useDynamicWindow(scsu, dynamicWindow);
 | 
						|
                        c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
 | 
						|
                        length=3;
 | 
						|
                        goto outputBytes;
 | 
						|
                    }
 | 
						|
                }
 | 
						|
 | 
						|
                /* don't know how to compress this character, just write it directly */
 | 
						|
                length=2;
 | 
						|
                goto outputBytes;
 | 
						|
            } else if(c<0xe000) {
 | 
						|
                /* c is a surrogate */
 | 
						|
                if(U16_IS_SURROGATE_LEAD(c)) {
 | 
						|
getTrailUnicode:
 | 
						|
                    lead=(UChar)c;
 | 
						|
                    if(source<sourceLimit) {
 | 
						|
                        /* test the following code unit */
 | 
						|
                        trail=*source;
 | 
						|
                        if(U16_IS_TRAIL(trail)) {
 | 
						|
                            ++source;
 | 
						|
                            ++nextSourceIndex;
 | 
						|
                            c=U16_GET_SUPPLEMENTARY(c, trail);
 | 
						|
                            /* convert this surrogate code point */
 | 
						|
                            /* exit this condition tree */
 | 
						|
                        } else {
 | 
						|
                            /* this is an unmatched lead code unit (1st surrogate) */
 | 
						|
                            /* callback(illegal) */
 | 
						|
                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
 | 
						|
                            goto endloop;
 | 
						|
                        }
 | 
						|
                    } else {
 | 
						|
                        /* no more input */
 | 
						|
                        break;
 | 
						|
                    }
 | 
						|
                } else {
 | 
						|
                    /* this is an unmatched trail code unit (2nd surrogate) */
 | 
						|
                    /* callback(illegal) */
 | 
						|
                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
 | 
						|
                    goto endloop;
 | 
						|
                }
 | 
						|
 | 
						|
                /* compress supplementary character */
 | 
						|
                if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 &&
 | 
						|
                    !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))
 | 
						|
                ) {
 | 
						|
                    /*
 | 
						|
                     * there is a dynamic window that contains this character and
 | 
						|
                     * the following character is not uncompressible,
 | 
						|
                     * change to the window
 | 
						|
                     */
 | 
						|
                    isSingleByteMode=TRUE;
 | 
						|
                    dynamicWindow=window;
 | 
						|
                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
 | 
						|
                    useDynamicWindow(scsu, dynamicWindow);
 | 
						|
                    c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
 | 
						|
                    length=2;
 | 
						|
                    goto outputBytes;
 | 
						|
                } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */
 | 
						|
                          (code=getDynamicOffset(c, &offset))>=0
 | 
						|
                ) {
 | 
						|
                    /* two supplementary characters in (probably) the same window - define an extended one */
 | 
						|
                    isSingleByteMode=TRUE;
 | 
						|
                    code-=0x200;
 | 
						|
                    dynamicWindow=getNextDynamicWindow(scsu);
 | 
						|
                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
 | 
						|
                    useDynamicWindow(scsu, dynamicWindow);
 | 
						|
                    c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
 | 
						|
                    length=4;
 | 
						|
                    goto outputBytes;
 | 
						|
                } else {
 | 
						|
                    /* don't know how to compress this character, just write it directly */
 | 
						|
                    c=((uint32_t)lead<<16)|trail;
 | 
						|
                    length=4;
 | 
						|
                    goto outputBytes;
 | 
						|
                }
 | 
						|
            } else /* 0xe000<=c<0xf300 */ {
 | 
						|
                /* quote to avoid SCSU tags */
 | 
						|
                c|=UQU<<16;
 | 
						|
                length=3;
 | 
						|
                goto outputBytes;
 | 
						|
            }
 | 
						|
 | 
						|
            /* normal end of conversion: prepare for a new character */
 | 
						|
            c=0;
 | 
						|
            sourceIndex=nextSourceIndex;
 | 
						|
        }
 | 
						|
    }
 | 
						|
endloop:
 | 
						|
 | 
						|
    /* set the converter state back into UConverter */
 | 
						|
    scsu->fromUIsSingleByteMode=isSingleByteMode;
 | 
						|
    scsu->fromUDynamicWindow=dynamicWindow;
 | 
						|
 | 
						|
    cnv->fromUChar32=c;
 | 
						|
 | 
						|
    /* write back the updated pointers */
 | 
						|
    pArgs->source=source;
 | 
						|
    pArgs->target=(char *)target;
 | 
						|
    pArgs->offsets=offsets;
 | 
						|
    return;
 | 
						|
 | 
						|
outputBytes:
 | 
						|
    /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */
 | 
						|
    /* from the first if in the loop we know that targetCapacity>0 */
 | 
						|
    if(length<=targetCapacity) {
 | 
						|
        if(offsets==NULL) {
 | 
						|
            switch(length) {
 | 
						|
                /* each branch falls through to the next one */
 | 
						|
            case 4:
 | 
						|
                *target++=(uint8_t)(c>>24);
 | 
						|
                U_FALLTHROUGH;
 | 
						|
            case 3:
 | 
						|
                *target++=(uint8_t)(c>>16);
 | 
						|
                U_FALLTHROUGH;
 | 
						|
            case 2:
 | 
						|
                *target++=(uint8_t)(c>>8);
 | 
						|
                U_FALLTHROUGH;
 | 
						|
            case 1:
 | 
						|
                *target++=(uint8_t)c;
 | 
						|
                U_FALLTHROUGH;
 | 
						|
            default:
 | 
						|
                /* will never occur */
 | 
						|
                break;
 | 
						|
            }
 | 
						|
        } else {
 | 
						|
            switch(length) {
 | 
						|
                /* each branch falls through to the next one */
 | 
						|
            case 4:
 | 
						|
                *target++=(uint8_t)(c>>24);
 | 
						|
                *offsets++=sourceIndex;
 | 
						|
                U_FALLTHROUGH;
 | 
						|
            case 3:
 | 
						|
                *target++=(uint8_t)(c>>16);
 | 
						|
                *offsets++=sourceIndex;
 | 
						|
                U_FALLTHROUGH;
 | 
						|
            case 2:
 | 
						|
                *target++=(uint8_t)(c>>8);
 | 
						|
                *offsets++=sourceIndex;
 | 
						|
                U_FALLTHROUGH;
 | 
						|
            case 1:
 | 
						|
                *target++=(uint8_t)c;
 | 
						|
                *offsets++=sourceIndex;
 | 
						|
                U_FALLTHROUGH;
 | 
						|
            default:
 | 
						|
                /* will never occur */
 | 
						|
                break;
 | 
						|
            }
 | 
						|
        }
 | 
						|
        targetCapacity-=length;
 | 
						|
 | 
						|
        /* normal end of conversion: prepare for a new character */
 | 
						|
        c=0;
 | 
						|
        sourceIndex=nextSourceIndex;
 | 
						|
        goto loop;
 | 
						|
    } else {
 | 
						|
        uint8_t *p;
 | 
						|
 | 
						|
        /*
 | 
						|
         * We actually do this backwards here:
 | 
						|
         * In order to save an intermediate variable, we output
 | 
						|
         * first to the overflow buffer what does not fit into the
 | 
						|
         * regular target.
 | 
						|
         */
 | 
						|
        /* we know that 0<=targetCapacity<length<=4 */
 | 
						|
        /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */
 | 
						|
        length-=targetCapacity;
 | 
						|
        p=(uint8_t *)cnv->charErrorBuffer;
 | 
						|
        switch(length) {
 | 
						|
            /* each branch falls through to the next one */
 | 
						|
        case 4:
 | 
						|
            *p++=(uint8_t)(c>>24);
 | 
						|
            U_FALLTHROUGH;
 | 
						|
        case 3:
 | 
						|
            *p++=(uint8_t)(c>>16);
 | 
						|
            U_FALLTHROUGH;
 | 
						|
        case 2:
 | 
						|
            *p++=(uint8_t)(c>>8);
 | 
						|
            U_FALLTHROUGH;
 | 
						|
        case 1:
 | 
						|
            *p=(uint8_t)c;
 | 
						|
            U_FALLTHROUGH;
 | 
						|
        default:
 | 
						|
            /* will never occur */
 | 
						|
            break;
 | 
						|
        }
 | 
						|
        cnv->charErrorBufferLength=(int8_t)length;
 | 
						|
 | 
						|
        /* now output what fits into the regular target */
 | 
						|
        c>>=8*length; /* length was reduced by targetCapacity */
 | 
						|
        switch(targetCapacity) {
 | 
						|
            /* each branch falls through to the next one */
 | 
						|
        case 3:
 | 
						|
            *target++=(uint8_t)(c>>16);
 | 
						|
            if(offsets!=NULL) {
 | 
						|
                *offsets++=sourceIndex;
 | 
						|
            }
 | 
						|
            U_FALLTHROUGH;
 | 
						|
        case 2:
 | 
						|
            *target++=(uint8_t)(c>>8);
 | 
						|
            if(offsets!=NULL) {
 | 
						|
                *offsets++=sourceIndex;
 | 
						|
            }
 | 
						|
            U_FALLTHROUGH;
 | 
						|
        case 1:
 | 
						|
            *target++=(uint8_t)c;
 | 
						|
            if(offsets!=NULL) {
 | 
						|
                *offsets++=sourceIndex;
 | 
						|
            }
 | 
						|
            U_FALLTHROUGH;
 | 
						|
        default:
 | 
						|
            break;
 | 
						|
        }
 | 
						|
 | 
						|
        /* target overflow */
 | 
						|
        targetCapacity=0;
 | 
						|
        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 | 
						|
        c=0;
 | 
						|
        goto endloop;
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * Identical to _SCSUFromUnicodeWithOffsets but without offset handling.
 | 
						|
 * If a change is made in the original function, then either
 | 
						|
 * change this function the same way or
 | 
						|
 * re-copy the original function and remove the variables
 | 
						|
 * offsets, sourceIndex, and nextSourceIndex.
 | 
						|
 */
 | 
						|
static void U_CALLCONV
 | 
						|
_SCSUFromUnicode(UConverterFromUnicodeArgs *pArgs,
 | 
						|
                 UErrorCode *pErrorCode) {
 | 
						|
    UConverter *cnv;
 | 
						|
    SCSUData *scsu;
 | 
						|
    const UChar *source, *sourceLimit;
 | 
						|
    uint8_t *target;
 | 
						|
    int32_t targetCapacity;
 | 
						|
 | 
						|
    UBool isSingleByteMode;
 | 
						|
    uint8_t dynamicWindow;
 | 
						|
    uint32_t currentOffset;
 | 
						|
 | 
						|
    uint32_t c, delta;
 | 
						|
 | 
						|
    int32_t length;
 | 
						|
 | 
						|
    /* variables for compression heuristics */
 | 
						|
    uint32_t offset;
 | 
						|
    UChar lead, trail;
 | 
						|
    int code;
 | 
						|
    int8_t window;
 | 
						|
 | 
						|
    /* set up the local pointers */
 | 
						|
    cnv=pArgs->converter;
 | 
						|
    scsu=(SCSUData *)cnv->extraInfo;
 | 
						|
 | 
						|
    /* set up the local pointers */
 | 
						|
    source=pArgs->source;
 | 
						|
    sourceLimit=pArgs->sourceLimit;
 | 
						|
    target=(uint8_t *)pArgs->target;
 | 
						|
    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
 | 
						|
 | 
						|
    /* get the state machine state */
 | 
						|
    isSingleByteMode=scsu->fromUIsSingleByteMode;
 | 
						|
    dynamicWindow=scsu->fromUDynamicWindow;
 | 
						|
    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
 | 
						|
 | 
						|
    c=cnv->fromUChar32;
 | 
						|
 | 
						|
    /* similar conversion "loop" as in toUnicode */
 | 
						|
loop:
 | 
						|
    if(isSingleByteMode) {
 | 
						|
        if(c!=0 && targetCapacity>0) {
 | 
						|
            goto getTrailSingle;
 | 
						|
        }
 | 
						|
 | 
						|
        /* state machine for single-byte mode */
 | 
						|
/* singleByteMode: */
 | 
						|
        while(source<sourceLimit) {
 | 
						|
            if(targetCapacity<=0) {
 | 
						|
                /* target is full */
 | 
						|
                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 | 
						|
                break;
 | 
						|
            }
 | 
						|
            c=*source++;
 | 
						|
 | 
						|
            if((c-0x20)<=0x5f) {
 | 
						|
                /* pass US-ASCII graphic character through */
 | 
						|
                *target++=(uint8_t)c;
 | 
						|
                --targetCapacity;
 | 
						|
            } else if(c<0x20) {
 | 
						|
                if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
 | 
						|
                    /* CR/LF/TAB/NUL */
 | 
						|
                    *target++=(uint8_t)c;
 | 
						|
                    --targetCapacity;
 | 
						|
                } else {
 | 
						|
                    /* quote C0 control character */
 | 
						|
                    c|=SQ0<<8;
 | 
						|
                    length=2;
 | 
						|
                    goto outputBytes;
 | 
						|
                }
 | 
						|
            } else if((delta=c-currentOffset)<=0x7f) {
 | 
						|
                /* use the current dynamic window */
 | 
						|
                *target++=(uint8_t)(delta|0x80);
 | 
						|
                --targetCapacity;
 | 
						|
            } else if(U16_IS_SURROGATE(c)) {
 | 
						|
                if(U16_IS_SURROGATE_LEAD(c)) {
 | 
						|
getTrailSingle:
 | 
						|
                    lead=(UChar)c;
 | 
						|
                    if(source<sourceLimit) {
 | 
						|
                        /* test the following code unit */
 | 
						|
                        trail=*source;
 | 
						|
                        if(U16_IS_TRAIL(trail)) {
 | 
						|
                            ++source;
 | 
						|
                            c=U16_GET_SUPPLEMENTARY(c, trail);
 | 
						|
                            /* convert this surrogate code point */
 | 
						|
                            /* exit this condition tree */
 | 
						|
                        } else {
 | 
						|
                            /* this is an unmatched lead code unit (1st surrogate) */
 | 
						|
                            /* callback(illegal) */
 | 
						|
                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
 | 
						|
                            goto endloop;
 | 
						|
                        }
 | 
						|
                    } else {
 | 
						|
                        /* no more input */
 | 
						|
                        break;
 | 
						|
                    }
 | 
						|
                } else {
 | 
						|
                    /* this is an unmatched trail code unit (2nd surrogate) */
 | 
						|
                    /* callback(illegal) */
 | 
						|
                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
 | 
						|
                    goto endloop;
 | 
						|
                }
 | 
						|
 | 
						|
                /* compress supplementary character U+10000..U+10ffff */
 | 
						|
                if((delta=c-currentOffset)<=0x7f) {
 | 
						|
                    /* use the current dynamic window */
 | 
						|
                    *target++=(uint8_t)(delta|0x80);
 | 
						|
                    --targetCapacity;
 | 
						|
                } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
 | 
						|
                    /* there is a dynamic window that contains this character, change to it */
 | 
						|
                    dynamicWindow=window;
 | 
						|
                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
 | 
						|
                    useDynamicWindow(scsu, dynamicWindow);
 | 
						|
                    c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
 | 
						|
                    length=2;
 | 
						|
                    goto outputBytes;
 | 
						|
                } else if((code=getDynamicOffset(c, &offset))>=0) {
 | 
						|
                    /* might check if there are more characters in this window to come */
 | 
						|
                    /* define an extended window with this character */
 | 
						|
                    code-=0x200;
 | 
						|
                    dynamicWindow=getNextDynamicWindow(scsu);
 | 
						|
                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
 | 
						|
                    useDynamicWindow(scsu, dynamicWindow);
 | 
						|
                    c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
 | 
						|
                    length=4;
 | 
						|
                    goto outputBytes;
 | 
						|
                } else {
 | 
						|
                    /* change to Unicode mode and output this (lead, trail) pair */
 | 
						|
                    isSingleByteMode=FALSE;
 | 
						|
                    *target++=(uint8_t)SCU;
 | 
						|
                    --targetCapacity;
 | 
						|
                    c=((uint32_t)lead<<16)|trail;
 | 
						|
                    length=4;
 | 
						|
                    goto outputBytes;
 | 
						|
                }
 | 
						|
            } else if(c<0xa0) {
 | 
						|
                /* quote C1 control character */
 | 
						|
                c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */
 | 
						|
                length=2;
 | 
						|
                goto outputBytes;
 | 
						|
            } else if(c==0xfeff || c>=0xfff0) {
 | 
						|
                /* quote signature character=byte order mark and specials */
 | 
						|
                c|=SQU<<16;
 | 
						|
                length=3;
 | 
						|
                goto outputBytes;
 | 
						|
            } else {
 | 
						|
                /* compress all other BMP characters */
 | 
						|
                if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
 | 
						|
                    /* there is a window defined that contains this character - switch to it or quote from it? */
 | 
						|
                    if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) {
 | 
						|
                        /* change to dynamic window */
 | 
						|
                        dynamicWindow=window;
 | 
						|
                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
 | 
						|
                        useDynamicWindow(scsu, dynamicWindow);
 | 
						|
                        c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
 | 
						|
                        length=2;
 | 
						|
                        goto outputBytes;
 | 
						|
                    } else {
 | 
						|
                        /* quote from dynamic window */
 | 
						|
                        c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80;
 | 
						|
                        length=2;
 | 
						|
                        goto outputBytes;
 | 
						|
                    }
 | 
						|
                } else if((window=getWindow(staticOffsets, c))>=0) {
 | 
						|
                    /* quote from static window */
 | 
						|
                    c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]);
 | 
						|
                    length=2;
 | 
						|
                    goto outputBytes;
 | 
						|
                } else if((code=getDynamicOffset(c, &offset))>=0) {
 | 
						|
                    /* define a dynamic window with this character */
 | 
						|
                    dynamicWindow=getNextDynamicWindow(scsu);
 | 
						|
                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
 | 
						|
                    useDynamicWindow(scsu, dynamicWindow);
 | 
						|
                    c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
 | 
						|
                    length=3;
 | 
						|
                    goto outputBytes;
 | 
						|
                } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) &&
 | 
						|
                          (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400))
 | 
						|
                ) {
 | 
						|
                    /*
 | 
						|
                     * this character is not compressible (a BMP ideograph or similar);
 | 
						|
                     * switch to Unicode mode if this is the last character in the block
 | 
						|
                     * or there is at least one more ideograph following immediately
 | 
						|
                     */
 | 
						|
                    isSingleByteMode=FALSE;
 | 
						|
                    c|=SCU<<16;
 | 
						|
                    length=3;
 | 
						|
                    goto outputBytes;
 | 
						|
                } else {
 | 
						|
                    /* quote Unicode */
 | 
						|
                    c|=SQU<<16;
 | 
						|
                    length=3;
 | 
						|
                    goto outputBytes;
 | 
						|
                }
 | 
						|
            }
 | 
						|
 | 
						|
            /* normal end of conversion: prepare for a new character */
 | 
						|
            c=0;
 | 
						|
        }
 | 
						|
    } else {
 | 
						|
        if(c!=0 && targetCapacity>0) {
 | 
						|
            goto getTrailUnicode;
 | 
						|
        }
 | 
						|
 | 
						|
        /* state machine for Unicode mode */
 | 
						|
/* unicodeByteMode: */
 | 
						|
        while(source<sourceLimit) {
 | 
						|
            if(targetCapacity<=0) {
 | 
						|
                /* target is full */
 | 
						|
                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 | 
						|
                break;
 | 
						|
            }
 | 
						|
            c=*source++;
 | 
						|
 | 
						|
            if((uint32_t)(c-0x3400)<(0xd800-0x3400)) {
 | 
						|
                /* not compressible, write character directly */
 | 
						|
                if(targetCapacity>=2) {
 | 
						|
                    *target++=(uint8_t)(c>>8);
 | 
						|
                    *target++=(uint8_t)c;
 | 
						|
                    targetCapacity-=2;
 | 
						|
                } else {
 | 
						|
                    length=2;
 | 
						|
                    goto outputBytes;
 | 
						|
                }
 | 
						|
            } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) {
 | 
						|
                /* compress BMP character if the following one is not an uncompressible ideograph */
 | 
						|
                if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) {
 | 
						|
                    if(((uint32_t)(c-0x30)<10 || (uint32_t)(c-0x61)<26 || (uint32_t)(c-0x41)<26)) {
 | 
						|
                        /* ASCII digit or letter */
 | 
						|
                        isSingleByteMode=TRUE;
 | 
						|
                        c|=((uint32_t)(UC0+dynamicWindow)<<8)|c;
 | 
						|
                        length=2;
 | 
						|
                        goto outputBytes;
 | 
						|
                    } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
 | 
						|
                        /* there is a dynamic window that contains this character, change to it */
 | 
						|
                        isSingleByteMode=TRUE;
 | 
						|
                        dynamicWindow=window;
 | 
						|
                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
 | 
						|
                        useDynamicWindow(scsu, dynamicWindow);
 | 
						|
                        c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
 | 
						|
                        length=2;
 | 
						|
                        goto outputBytes;
 | 
						|
                    } else if((code=getDynamicOffset(c, &offset))>=0) {
 | 
						|
                        /* define a dynamic window with this character */
 | 
						|
                        isSingleByteMode=TRUE;
 | 
						|
                        dynamicWindow=getNextDynamicWindow(scsu);
 | 
						|
                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
 | 
						|
                        useDynamicWindow(scsu, dynamicWindow);
 | 
						|
                        c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
 | 
						|
                        length=3;
 | 
						|
                        goto outputBytes;
 | 
						|
                    }
 | 
						|
                }
 | 
						|
 | 
						|
                /* don't know how to compress this character, just write it directly */
 | 
						|
                length=2;
 | 
						|
                goto outputBytes;
 | 
						|
            } else if(c<0xe000) {
 | 
						|
                /* c is a surrogate */
 | 
						|
                if(U16_IS_SURROGATE_LEAD(c)) {
 | 
						|
getTrailUnicode:
 | 
						|
                    lead=(UChar)c;
 | 
						|
                    if(source<sourceLimit) {
 | 
						|
                        /* test the following code unit */
 | 
						|
                        trail=*source;
 | 
						|
                        if(U16_IS_TRAIL(trail)) {
 | 
						|
                            ++source;
 | 
						|
                            c=U16_GET_SUPPLEMENTARY(c, trail);
 | 
						|
                            /* convert this surrogate code point */
 | 
						|
                            /* exit this condition tree */
 | 
						|
                        } else {
 | 
						|
                            /* this is an unmatched lead code unit (1st surrogate) */
 | 
						|
                            /* callback(illegal) */
 | 
						|
                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
 | 
						|
                            goto endloop;
 | 
						|
                        }
 | 
						|
                    } else {
 | 
						|
                        /* no more input */
 | 
						|
                        break;
 | 
						|
                    }
 | 
						|
                } else {
 | 
						|
                    /* this is an unmatched trail code unit (2nd surrogate) */
 | 
						|
                    /* callback(illegal) */
 | 
						|
                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
 | 
						|
                    goto endloop;
 | 
						|
                }
 | 
						|
 | 
						|
                /* compress supplementary character */
 | 
						|
                if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 &&
 | 
						|
                    !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))
 | 
						|
                ) {
 | 
						|
                    /*
 | 
						|
                     * there is a dynamic window that contains this character and
 | 
						|
                     * the following character is not uncompressible,
 | 
						|
                     * change to the window
 | 
						|
                     */
 | 
						|
                    isSingleByteMode=TRUE;
 | 
						|
                    dynamicWindow=window;
 | 
						|
                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
 | 
						|
                    useDynamicWindow(scsu, dynamicWindow);
 | 
						|
                    c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
 | 
						|
                    length=2;
 | 
						|
                    goto outputBytes;
 | 
						|
                } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */
 | 
						|
                          (code=getDynamicOffset(c, &offset))>=0
 | 
						|
                ) {
 | 
						|
                    /* two supplementary characters in (probably) the same window - define an extended one */
 | 
						|
                    isSingleByteMode=TRUE;
 | 
						|
                    code-=0x200;
 | 
						|
                    dynamicWindow=getNextDynamicWindow(scsu);
 | 
						|
                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
 | 
						|
                    useDynamicWindow(scsu, dynamicWindow);
 | 
						|
                    c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
 | 
						|
                    length=4;
 | 
						|
                    goto outputBytes;
 | 
						|
                } else {
 | 
						|
                    /* don't know how to compress this character, just write it directly */
 | 
						|
                    c=((uint32_t)lead<<16)|trail;
 | 
						|
                    length=4;
 | 
						|
                    goto outputBytes;
 | 
						|
                }
 | 
						|
            } else /* 0xe000<=c<0xf300 */ {
 | 
						|
                /* quote to avoid SCSU tags */
 | 
						|
                c|=UQU<<16;
 | 
						|
                length=3;
 | 
						|
                goto outputBytes;
 | 
						|
            }
 | 
						|
 | 
						|
            /* normal end of conversion: prepare for a new character */
 | 
						|
            c=0;
 | 
						|
        }
 | 
						|
    }
 | 
						|
endloop:
 | 
						|
 | 
						|
    /* set the converter state back into UConverter */
 | 
						|
    scsu->fromUIsSingleByteMode=isSingleByteMode;
 | 
						|
    scsu->fromUDynamicWindow=dynamicWindow;
 | 
						|
 | 
						|
    cnv->fromUChar32=c;
 | 
						|
 | 
						|
    /* write back the updated pointers */
 | 
						|
    pArgs->source=source;
 | 
						|
    pArgs->target=(char *)target;
 | 
						|
    return;
 | 
						|
 | 
						|
outputBytes:
 | 
						|
    /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */
 | 
						|
    /* from the first if in the loop we know that targetCapacity>0 */
 | 
						|
    if(length<=targetCapacity) {
 | 
						|
        switch(length) {
 | 
						|
            /* each branch falls through to the next one */
 | 
						|
        case 4:
 | 
						|
            *target++=(uint8_t)(c>>24);
 | 
						|
            U_FALLTHROUGH;
 | 
						|
        case 3:
 | 
						|
            *target++=(uint8_t)(c>>16);
 | 
						|
            U_FALLTHROUGH;
 | 
						|
        case 2:
 | 
						|
            *target++=(uint8_t)(c>>8);
 | 
						|
            U_FALLTHROUGH;
 | 
						|
        case 1:
 | 
						|
            *target++=(uint8_t)c;
 | 
						|
            U_FALLTHROUGH;
 | 
						|
        default:
 | 
						|
            /* will never occur */
 | 
						|
            break;
 | 
						|
        }
 | 
						|
        targetCapacity-=length;
 | 
						|
 | 
						|
        /* normal end of conversion: prepare for a new character */
 | 
						|
        c=0;
 | 
						|
        goto loop;
 | 
						|
    } else {
 | 
						|
        uint8_t *p;
 | 
						|
 | 
						|
        /*
 | 
						|
         * We actually do this backwards here:
 | 
						|
         * In order to save an intermediate variable, we output
 | 
						|
         * first to the overflow buffer what does not fit into the
 | 
						|
         * regular target.
 | 
						|
         */
 | 
						|
        /* we know that 0<=targetCapacity<length<=4 */
 | 
						|
        /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */
 | 
						|
        length-=targetCapacity;
 | 
						|
        p=(uint8_t *)cnv->charErrorBuffer;
 | 
						|
        switch(length) {
 | 
						|
            /* each branch falls through to the next one */
 | 
						|
        case 4:
 | 
						|
            *p++=(uint8_t)(c>>24);
 | 
						|
            U_FALLTHROUGH;
 | 
						|
        case 3:
 | 
						|
            *p++=(uint8_t)(c>>16);
 | 
						|
            U_FALLTHROUGH;
 | 
						|
        case 2:
 | 
						|
            *p++=(uint8_t)(c>>8);
 | 
						|
            U_FALLTHROUGH;
 | 
						|
        case 1:
 | 
						|
            *p=(uint8_t)c;
 | 
						|
            U_FALLTHROUGH;
 | 
						|
        default:
 | 
						|
            /* will never occur */
 | 
						|
            break;
 | 
						|
        }
 | 
						|
        cnv->charErrorBufferLength=(int8_t)length;
 | 
						|
 | 
						|
        /* now output what fits into the regular target */
 | 
						|
        c>>=8*length; /* length was reduced by targetCapacity */
 | 
						|
        switch(targetCapacity) {
 | 
						|
            /* each branch falls through to the next one */
 | 
						|
        case 3:
 | 
						|
            *target++=(uint8_t)(c>>16);
 | 
						|
            U_FALLTHROUGH;
 | 
						|
        case 2:
 | 
						|
            *target++=(uint8_t)(c>>8);
 | 
						|
            U_FALLTHROUGH;
 | 
						|
        case 1:
 | 
						|
            *target++=(uint8_t)c;
 | 
						|
            U_FALLTHROUGH;
 | 
						|
        default:
 | 
						|
            break;
 | 
						|
        }
 | 
						|
 | 
						|
        /* target overflow */
 | 
						|
        targetCapacity=0;
 | 
						|
        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 | 
						|
        c=0;
 | 
						|
        goto endloop;
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
/* miscellaneous ------------------------------------------------------------ */
 | 
						|
 | 
						|
static const char *  U_CALLCONV
 | 
						|
_SCSUGetName(const UConverter *cnv) {
 | 
						|
    SCSUData *scsu=(SCSUData *)cnv->extraInfo;
 | 
						|
 | 
						|
    switch(scsu->locale) {
 | 
						|
    case l_ja:
 | 
						|
        return "SCSU,locale=ja";
 | 
						|
    default:
 | 
						|
        return "SCSU";
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
/* structure for SafeClone calculations */
 | 
						|
struct cloneSCSUStruct
 | 
						|
{
 | 
						|
    UConverter cnv;
 | 
						|
    SCSUData mydata;
 | 
						|
};
 | 
						|
 | 
						|
static UConverter *  U_CALLCONV
 | 
						|
_SCSUSafeClone(const UConverter *cnv, 
 | 
						|
               void *stackBuffer, 
 | 
						|
               int32_t *pBufferSize, 
 | 
						|
               UErrorCode *status)
 | 
						|
{
 | 
						|
    struct cloneSCSUStruct * localClone;
 | 
						|
    int32_t bufferSizeNeeded = sizeof(struct cloneSCSUStruct);
 | 
						|
 | 
						|
    if (U_FAILURE(*status)){
 | 
						|
        return 0;
 | 
						|
    }
 | 
						|
 | 
						|
    if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */
 | 
						|
        *pBufferSize = bufferSizeNeeded;
 | 
						|
        return 0;
 | 
						|
    }
 | 
						|
 | 
						|
    localClone = (struct cloneSCSUStruct *)stackBuffer;
 | 
						|
    /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
 | 
						|
 | 
						|
    uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(SCSUData));
 | 
						|
    localClone->cnv.extraInfo = &localClone->mydata;
 | 
						|
    localClone->cnv.isExtraLocal = TRUE;
 | 
						|
 | 
						|
    return &localClone->cnv;
 | 
						|
}
 | 
						|
U_CDECL_END
 | 
						|
 | 
						|
static const UConverterImpl _SCSUImpl={
 | 
						|
    UCNV_SCSU,
 | 
						|
 | 
						|
    NULL,
 | 
						|
    NULL,
 | 
						|
 | 
						|
    _SCSUOpen,
 | 
						|
    _SCSUClose,
 | 
						|
    _SCSUReset,
 | 
						|
 | 
						|
    _SCSUToUnicode,
 | 
						|
    _SCSUToUnicodeWithOffsets,
 | 
						|
    _SCSUFromUnicode,
 | 
						|
    _SCSUFromUnicodeWithOffsets,
 | 
						|
    NULL,
 | 
						|
 | 
						|
    NULL,
 | 
						|
    _SCSUGetName,
 | 
						|
    NULL,
 | 
						|
    _SCSUSafeClone,
 | 
						|
    ucnv_getCompleteUnicodeSet,
 | 
						|
    NULL,
 | 
						|
    NULL
 | 
						|
};
 | 
						|
 | 
						|
static const UConverterStaticData _SCSUStaticData={
 | 
						|
    sizeof(UConverterStaticData),
 | 
						|
    "SCSU",
 | 
						|
    1212, /* CCSID for SCSU */
 | 
						|
    UCNV_IBM, UCNV_SCSU,
 | 
						|
    1, 3, /* one UChar generates at least 1 byte and at most 3 bytes */
 | 
						|
    /*
 | 
						|
     * The subchar here is ignored because _SCSUOpen() sets U+fffd as a Unicode
 | 
						|
     * substitution string.
 | 
						|
     */
 | 
						|
    { 0x0e, 0xff, 0xfd, 0 }, 3,
 | 
						|
    FALSE, FALSE,
 | 
						|
    0,
 | 
						|
    0,
 | 
						|
    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
 | 
						|
};
 | 
						|
 | 
						|
const UConverterSharedData _SCSUData=
 | 
						|
        UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_SCSUStaticData, &_SCSUImpl);
 | 
						|
 | 
						|
#endif
 |