forked from mirrors/gecko-dev
		
	
		
			
				
	
	
		
			756 lines
		
	
	
	
		
			22 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			756 lines
		
	
	
	
		
			22 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
// © 2016 and later: Unicode, Inc. and others.
 | 
						|
// License & terms of use: http://www.unicode.org/copyright.html
 | 
						|
/* 
 | 
						|
**********************************************************************
 | 
						|
*   Copyright (C) 2000-2015, International Business Machines
 | 
						|
*   Corporation and others.  All Rights Reserved.
 | 
						|
**********************************************************************
 | 
						|
*   file name:  ucnvlat1.cpp
 | 
						|
*   encoding:   UTF-8
 | 
						|
*   tab size:   8 (not used)
 | 
						|
*   indentation:4
 | 
						|
*
 | 
						|
*   created on: 2000feb07
 | 
						|
*   created by: Markus W. Scherer
 | 
						|
*/
 | 
						|
 | 
						|
#include "unicode/utypes.h"
 | 
						|
 | 
						|
#if !UCONFIG_NO_CONVERSION
 | 
						|
 | 
						|
#include "unicode/ucnv.h"
 | 
						|
#include "unicode/uset.h"
 | 
						|
#include "unicode/utf8.h"
 | 
						|
#include "ucnv_bld.h"
 | 
						|
#include "ucnv_cnv.h"
 | 
						|
#include "ustr_imp.h"
 | 
						|
 | 
						|
/* control optimizations according to the platform */
 | 
						|
#define LATIN1_UNROLL_FROM_UNICODE 1
 | 
						|
 | 
						|
/* ISO 8859-1 --------------------------------------------------------------- */
 | 
						|
 | 
						|
/* This is a table-less and callback-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
 | 
						|
U_CDECL_BEGIN
 | 
						|
static void U_CALLCONV
 | 
						|
_Latin1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
 | 
						|
                            UErrorCode *pErrorCode) {
 | 
						|
    const uint8_t *source;
 | 
						|
    UChar *target;
 | 
						|
    int32_t targetCapacity, length;
 | 
						|
    int32_t *offsets;
 | 
						|
 | 
						|
    int32_t sourceIndex;
 | 
						|
 | 
						|
    /* set up the local pointers */
 | 
						|
    source=(const uint8_t *)pArgs->source;
 | 
						|
    target=pArgs->target;
 | 
						|
    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
 | 
						|
    offsets=pArgs->offsets;
 | 
						|
 | 
						|
    sourceIndex=0;
 | 
						|
 | 
						|
    /*
 | 
						|
     * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
 | 
						|
     * for the minimum of the sourceLength and targetCapacity
 | 
						|
     */
 | 
						|
    length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source);
 | 
						|
    if(length<=targetCapacity) {
 | 
						|
        targetCapacity=length;
 | 
						|
    } else {
 | 
						|
        /* target will be full */
 | 
						|
        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 | 
						|
        length=targetCapacity;
 | 
						|
    }
 | 
						|
 | 
						|
    if(targetCapacity>=8) {
 | 
						|
        /* This loop is unrolled for speed and improved pipelining. */
 | 
						|
        int32_t count, loops;
 | 
						|
 | 
						|
        loops=count=targetCapacity>>3;
 | 
						|
        length=targetCapacity&=0x7;
 | 
						|
        do {
 | 
						|
            target[0]=source[0];
 | 
						|
            target[1]=source[1];
 | 
						|
            target[2]=source[2];
 | 
						|
            target[3]=source[3];
 | 
						|
            target[4]=source[4];
 | 
						|
            target[5]=source[5];
 | 
						|
            target[6]=source[6];
 | 
						|
            target[7]=source[7];
 | 
						|
            target+=8;
 | 
						|
            source+=8;
 | 
						|
        } while(--count>0);
 | 
						|
 | 
						|
        if(offsets!=NULL) {
 | 
						|
            do {
 | 
						|
                offsets[0]=sourceIndex++;
 | 
						|
                offsets[1]=sourceIndex++;
 | 
						|
                offsets[2]=sourceIndex++;
 | 
						|
                offsets[3]=sourceIndex++;
 | 
						|
                offsets[4]=sourceIndex++;
 | 
						|
                offsets[5]=sourceIndex++;
 | 
						|
                offsets[6]=sourceIndex++;
 | 
						|
                offsets[7]=sourceIndex++;
 | 
						|
                offsets+=8;
 | 
						|
            } while(--loops>0);
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    /* conversion loop */
 | 
						|
    while(targetCapacity>0) {
 | 
						|
        *target++=*source++;
 | 
						|
        --targetCapacity;
 | 
						|
    }
 | 
						|
 | 
						|
    /* write back the updated pointers */
 | 
						|
    pArgs->source=(const char *)source;
 | 
						|
    pArgs->target=target;
 | 
						|
 | 
						|
    /* set offsets */
 | 
						|
    if(offsets!=NULL) {
 | 
						|
        while(length>0) {
 | 
						|
            *offsets++=sourceIndex++;
 | 
						|
            --length;
 | 
						|
        }
 | 
						|
        pArgs->offsets=offsets;
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
/* This is a table-less and callback-less version of ucnv_MBCSSingleGetNextUChar(). */
 | 
						|
static UChar32 U_CALLCONV
 | 
						|
_Latin1GetNextUChar(UConverterToUnicodeArgs *pArgs,
 | 
						|
                    UErrorCode *pErrorCode) {
 | 
						|
    const uint8_t *source=(const uint8_t *)pArgs->source;
 | 
						|
    if(source<(const uint8_t *)pArgs->sourceLimit) {
 | 
						|
        pArgs->source=(const char *)(source+1);
 | 
						|
        return *source;
 | 
						|
    }
 | 
						|
 | 
						|
    /* no output because of empty input */
 | 
						|
    *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
 | 
						|
    return 0xffff;
 | 
						|
}
 | 
						|
 | 
						|
/* This is a table-less version of ucnv_MBCSSingleFromBMPWithOffsets(). */
 | 
						|
static void U_CALLCONV
 | 
						|
_Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
 | 
						|
                              UErrorCode *pErrorCode) {
 | 
						|
    UConverter *cnv;
 | 
						|
    const UChar *source, *sourceLimit;
 | 
						|
    uint8_t *target, *oldTarget;
 | 
						|
    int32_t targetCapacity, length;
 | 
						|
    int32_t *offsets;
 | 
						|
 | 
						|
    UChar32 cp;
 | 
						|
    UChar c, max;
 | 
						|
 | 
						|
    int32_t sourceIndex;
 | 
						|
 | 
						|
    /* set up the local pointers */
 | 
						|
    cnv=pArgs->converter;
 | 
						|
    source=pArgs->source;
 | 
						|
    sourceLimit=pArgs->sourceLimit;
 | 
						|
    target=oldTarget=(uint8_t *)pArgs->target;
 | 
						|
    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
 | 
						|
    offsets=pArgs->offsets;
 | 
						|
 | 
						|
    if(cnv->sharedData==&_Latin1Data) {
 | 
						|
        max=0xff; /* Latin-1 */
 | 
						|
    } else {
 | 
						|
        max=0x7f; /* US-ASCII */
 | 
						|
    }
 | 
						|
 | 
						|
    /* get the converter state from UConverter */
 | 
						|
    cp=cnv->fromUChar32;
 | 
						|
 | 
						|
    /* sourceIndex=-1 if the current character began in the previous buffer */
 | 
						|
    sourceIndex= cp==0 ? 0 : -1;
 | 
						|
 | 
						|
    /*
 | 
						|
     * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
 | 
						|
     * for the minimum of the sourceLength and targetCapacity
 | 
						|
     */
 | 
						|
    length=(int32_t)(sourceLimit-source);
 | 
						|
    if(length<targetCapacity) {
 | 
						|
        targetCapacity=length;
 | 
						|
    }
 | 
						|
 | 
						|
    /* conversion loop */
 | 
						|
    if(cp!=0 && targetCapacity>0) {
 | 
						|
        goto getTrail;
 | 
						|
    }
 | 
						|
 | 
						|
#if LATIN1_UNROLL_FROM_UNICODE
 | 
						|
    /* unroll the loop with the most common case */
 | 
						|
    if(targetCapacity>=16) {
 | 
						|
        int32_t count, loops;
 | 
						|
        UChar u, oredChars;
 | 
						|
 | 
						|
        loops=count=targetCapacity>>4;
 | 
						|
        do {
 | 
						|
            oredChars=u=*source++;
 | 
						|
            *target++=(uint8_t)u;
 | 
						|
            oredChars|=u=*source++;
 | 
						|
            *target++=(uint8_t)u;
 | 
						|
            oredChars|=u=*source++;
 | 
						|
            *target++=(uint8_t)u;
 | 
						|
            oredChars|=u=*source++;
 | 
						|
            *target++=(uint8_t)u;
 | 
						|
            oredChars|=u=*source++;
 | 
						|
            *target++=(uint8_t)u;
 | 
						|
            oredChars|=u=*source++;
 | 
						|
            *target++=(uint8_t)u;
 | 
						|
            oredChars|=u=*source++;
 | 
						|
            *target++=(uint8_t)u;
 | 
						|
            oredChars|=u=*source++;
 | 
						|
            *target++=(uint8_t)u;
 | 
						|
            oredChars|=u=*source++;
 | 
						|
            *target++=(uint8_t)u;
 | 
						|
            oredChars|=u=*source++;
 | 
						|
            *target++=(uint8_t)u;
 | 
						|
            oredChars|=u=*source++;
 | 
						|
            *target++=(uint8_t)u;
 | 
						|
            oredChars|=u=*source++;
 | 
						|
            *target++=(uint8_t)u;
 | 
						|
            oredChars|=u=*source++;
 | 
						|
            *target++=(uint8_t)u;
 | 
						|
            oredChars|=u=*source++;
 | 
						|
            *target++=(uint8_t)u;
 | 
						|
            oredChars|=u=*source++;
 | 
						|
            *target++=(uint8_t)u;
 | 
						|
            oredChars|=u=*source++;
 | 
						|
            *target++=(uint8_t)u;
 | 
						|
 | 
						|
            /* were all 16 entries really valid? */
 | 
						|
            if(oredChars>max) {
 | 
						|
                /* no, return to the first of these 16 */
 | 
						|
                source-=16;
 | 
						|
                target-=16;
 | 
						|
                break;
 | 
						|
            }
 | 
						|
        } while(--count>0);
 | 
						|
        count=loops-count;
 | 
						|
        targetCapacity-=16*count;
 | 
						|
 | 
						|
        if(offsets!=NULL) {
 | 
						|
            oldTarget+=16*count;
 | 
						|
            while(count>0) {
 | 
						|
                *offsets++=sourceIndex++;
 | 
						|
                *offsets++=sourceIndex++;
 | 
						|
                *offsets++=sourceIndex++;
 | 
						|
                *offsets++=sourceIndex++;
 | 
						|
                *offsets++=sourceIndex++;
 | 
						|
                *offsets++=sourceIndex++;
 | 
						|
                *offsets++=sourceIndex++;
 | 
						|
                *offsets++=sourceIndex++;
 | 
						|
                *offsets++=sourceIndex++;
 | 
						|
                *offsets++=sourceIndex++;
 | 
						|
                *offsets++=sourceIndex++;
 | 
						|
                *offsets++=sourceIndex++;
 | 
						|
                *offsets++=sourceIndex++;
 | 
						|
                *offsets++=sourceIndex++;
 | 
						|
                *offsets++=sourceIndex++;
 | 
						|
                *offsets++=sourceIndex++;
 | 
						|
                --count;
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
#endif
 | 
						|
 | 
						|
    /* conversion loop */
 | 
						|
    c=0;
 | 
						|
    while(targetCapacity>0 && (c=*source++)<=max) {
 | 
						|
        /* convert the Unicode code point */
 | 
						|
        *target++=(uint8_t)c;
 | 
						|
        --targetCapacity;
 | 
						|
    }
 | 
						|
 | 
						|
    if(c>max) {
 | 
						|
        cp=c;
 | 
						|
        if(!U_IS_SURROGATE(cp)) {
 | 
						|
            /* callback(unassigned) */
 | 
						|
        } else if(U_IS_SURROGATE_LEAD(cp)) {
 | 
						|
getTrail:
 | 
						|
            if(source<sourceLimit) {
 | 
						|
                /* test the following code unit */
 | 
						|
                UChar trail=*source;
 | 
						|
                if(U16_IS_TRAIL(trail)) {
 | 
						|
                    ++source;
 | 
						|
                    cp=U16_GET_SUPPLEMENTARY(cp, trail);
 | 
						|
                    /* this codepage does not map supplementary code points */
 | 
						|
                    /* callback(unassigned) */
 | 
						|
                } else {
 | 
						|
                    /* this is an unmatched lead code unit (1st surrogate) */
 | 
						|
                    /* callback(illegal) */
 | 
						|
                }
 | 
						|
            } else {
 | 
						|
                /* no more input */
 | 
						|
                cnv->fromUChar32=cp;
 | 
						|
                goto noMoreInput;
 | 
						|
            }
 | 
						|
        } else {
 | 
						|
            /* this is an unmatched trail code unit (2nd surrogate) */
 | 
						|
            /* callback(illegal) */
 | 
						|
        }
 | 
						|
 | 
						|
        *pErrorCode= U_IS_SURROGATE(cp) ? U_ILLEGAL_CHAR_FOUND : U_INVALID_CHAR_FOUND;
 | 
						|
        cnv->fromUChar32=cp;
 | 
						|
    }
 | 
						|
noMoreInput:
 | 
						|
 | 
						|
    /* set offsets since the start */
 | 
						|
    if(offsets!=NULL) {
 | 
						|
        size_t count=target-oldTarget;
 | 
						|
        while(count>0) {
 | 
						|
            *offsets++=sourceIndex++;
 | 
						|
            --count;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=(uint8_t *)pArgs->targetLimit) {
 | 
						|
        /* target is full */
 | 
						|
        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 | 
						|
    }
 | 
						|
 | 
						|
    /* write back the updated pointers */
 | 
						|
    pArgs->source=source;
 | 
						|
    pArgs->target=(char *)target;
 | 
						|
    pArgs->offsets=offsets;
 | 
						|
}
 | 
						|
 | 
						|
/* Convert UTF-8 to Latin-1. Adapted from ucnv_SBCSFromUTF8(). */
 | 
						|
static void U_CALLCONV
 | 
						|
ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
 | 
						|
                    UConverterToUnicodeArgs *pToUArgs,
 | 
						|
                    UErrorCode *pErrorCode) {
 | 
						|
    UConverter *utf8;
 | 
						|
    const uint8_t *source, *sourceLimit;
 | 
						|
    uint8_t *target;
 | 
						|
    int32_t targetCapacity;
 | 
						|
 | 
						|
    UChar32 c;
 | 
						|
    uint8_t b, t1;
 | 
						|
 | 
						|
    /* set up the local pointers */
 | 
						|
    utf8=pToUArgs->converter;
 | 
						|
    source=(uint8_t *)pToUArgs->source;
 | 
						|
    sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
 | 
						|
    target=(uint8_t *)pFromUArgs->target;
 | 
						|
    targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
 | 
						|
 | 
						|
    /* get the converter state from the UTF-8 UConverter */
 | 
						|
    if (utf8->toULength > 0) {
 | 
						|
        c=(UChar32)utf8->toUnicodeStatus;
 | 
						|
    } else {
 | 
						|
        c = 0;
 | 
						|
    }
 | 
						|
    if(c!=0 && source<sourceLimit) {
 | 
						|
        if(targetCapacity==0) {
 | 
						|
            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 | 
						|
            return;
 | 
						|
        } else if(c>=0xc2 && c<=0xc3 && (t1=(uint8_t)(*source-0x80)) <= 0x3f) {
 | 
						|
            ++source;
 | 
						|
            *target++=(uint8_t)(((c&3)<<6)|t1);
 | 
						|
            --targetCapacity;
 | 
						|
 | 
						|
            utf8->toUnicodeStatus=0;
 | 
						|
            utf8->toULength=0;
 | 
						|
        } else {
 | 
						|
            /* complicated, illegal or unmappable input: fall back to the pivoting implementation */
 | 
						|
            *pErrorCode=U_USING_DEFAULT_WARNING;
 | 
						|
            return;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    /*
 | 
						|
     * Make sure that the last byte sequence before sourceLimit is complete
 | 
						|
     * or runs into a lead byte.
 | 
						|
     * In the conversion loop compare source with sourceLimit only once
 | 
						|
     * per multi-byte character.
 | 
						|
     * For Latin-1, adjust sourceLimit only for 1 trail byte because
 | 
						|
     * the conversion loop handles at most 2-byte sequences.
 | 
						|
     */
 | 
						|
    if(source<sourceLimit && U8_IS_LEAD(*(sourceLimit-1))) {
 | 
						|
        --sourceLimit;
 | 
						|
    }
 | 
						|
 | 
						|
    /* conversion loop */
 | 
						|
    while(source<sourceLimit) {
 | 
						|
        if(targetCapacity>0) {
 | 
						|
            b=*source++;
 | 
						|
            if(U8_IS_SINGLE(b)) {
 | 
						|
                /* convert ASCII */
 | 
						|
                *target++=(uint8_t)b;
 | 
						|
                --targetCapacity;
 | 
						|
            } else if( /* handle U+0080..U+00FF inline */
 | 
						|
                       b>=0xc2 && b<=0xc3 &&
 | 
						|
                       (t1=(uint8_t)(*source-0x80)) <= 0x3f
 | 
						|
            ) {
 | 
						|
                ++source;
 | 
						|
                *target++=(uint8_t)(((b&3)<<6)|t1);
 | 
						|
                --targetCapacity;
 | 
						|
            } else {
 | 
						|
                /* complicated, illegal or unmappable input: fall back to the pivoting implementation */
 | 
						|
                pToUArgs->source=(char *)(source-1);
 | 
						|
                pFromUArgs->target=(char *)target;
 | 
						|
                *pErrorCode=U_USING_DEFAULT_WARNING;
 | 
						|
                return;
 | 
						|
            }
 | 
						|
        } else {
 | 
						|
            /* target is full */
 | 
						|
            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 | 
						|
            break;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    /*
 | 
						|
     * The sourceLimit may have been adjusted before the conversion loop
 | 
						|
     * to stop before a truncated sequence.
 | 
						|
     * If so, then collect the truncated sequence now.
 | 
						|
     * For Latin-1, there is at most exactly one lead byte because of the
 | 
						|
     * smaller sourceLimit adjustment logic.
 | 
						|
     */
 | 
						|
    if(U_SUCCESS(*pErrorCode) && source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
 | 
						|
        utf8->toUnicodeStatus=utf8->toUBytes[0]=b=*source++;
 | 
						|
        utf8->toULength=1;
 | 
						|
        utf8->mode=U8_COUNT_BYTES(b);
 | 
						|
    }
 | 
						|
 | 
						|
    /* write back the updated pointers */
 | 
						|
    pToUArgs->source=(char *)source;
 | 
						|
    pFromUArgs->target=(char *)target;
 | 
						|
}
 | 
						|
 | 
						|
static void U_CALLCONV
 | 
						|
_Latin1GetUnicodeSet(const UConverter *cnv,
 | 
						|
                     const USetAdder *sa,
 | 
						|
                     UConverterUnicodeSet which,
 | 
						|
                     UErrorCode *pErrorCode) {
 | 
						|
    (void)cnv;
 | 
						|
    (void)which;
 | 
						|
    (void)pErrorCode;
 | 
						|
    sa->addRange(sa->set, 0, 0xff);
 | 
						|
}
 | 
						|
U_CDECL_END
 | 
						|
 | 
						|
 | 
						|
static const UConverterImpl _Latin1Impl={
 | 
						|
    UCNV_LATIN_1,
 | 
						|
 | 
						|
    NULL,
 | 
						|
    NULL,
 | 
						|
 | 
						|
    NULL,
 | 
						|
    NULL,
 | 
						|
    NULL,
 | 
						|
 | 
						|
    _Latin1ToUnicodeWithOffsets,
 | 
						|
    _Latin1ToUnicodeWithOffsets,
 | 
						|
    _Latin1FromUnicodeWithOffsets,
 | 
						|
    _Latin1FromUnicodeWithOffsets,
 | 
						|
    _Latin1GetNextUChar,
 | 
						|
 | 
						|
    NULL,
 | 
						|
    NULL,
 | 
						|
    NULL,
 | 
						|
    NULL,
 | 
						|
    _Latin1GetUnicodeSet,
 | 
						|
 | 
						|
    NULL,
 | 
						|
    ucnv_Latin1FromUTF8
 | 
						|
};
 | 
						|
 | 
						|
static const UConverterStaticData _Latin1StaticData={
 | 
						|
    sizeof(UConverterStaticData),
 | 
						|
    "ISO-8859-1",
 | 
						|
    819, UCNV_IBM, UCNV_LATIN_1, 1, 1,
 | 
						|
    { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,
 | 
						|
    0,
 | 
						|
    0,
 | 
						|
    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
 | 
						|
};
 | 
						|
 | 
						|
const UConverterSharedData _Latin1Data=
 | 
						|
        UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_Latin1StaticData, &_Latin1Impl);
 | 
						|
 | 
						|
/* US-ASCII ----------------------------------------------------------------- */
 | 
						|
 | 
						|
U_CDECL_BEGIN
 | 
						|
/* This is a table-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
 | 
						|
static void U_CALLCONV
 | 
						|
_ASCIIToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
 | 
						|
                           UErrorCode *pErrorCode) {
 | 
						|
    const uint8_t *source, *sourceLimit;
 | 
						|
    UChar *target, *oldTarget;
 | 
						|
    int32_t targetCapacity, length;
 | 
						|
    int32_t *offsets;
 | 
						|
 | 
						|
    int32_t sourceIndex;
 | 
						|
 | 
						|
    uint8_t c;
 | 
						|
 | 
						|
    /* set up the local pointers */
 | 
						|
    source=(const uint8_t *)pArgs->source;
 | 
						|
    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
 | 
						|
    target=oldTarget=pArgs->target;
 | 
						|
    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
 | 
						|
    offsets=pArgs->offsets;
 | 
						|
 | 
						|
    /* sourceIndex=-1 if the current character began in the previous buffer */
 | 
						|
    sourceIndex=0;
 | 
						|
 | 
						|
    /*
 | 
						|
     * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
 | 
						|
     * for the minimum of the sourceLength and targetCapacity
 | 
						|
     */
 | 
						|
    length=(int32_t)(sourceLimit-source);
 | 
						|
    if(length<targetCapacity) {
 | 
						|
        targetCapacity=length;
 | 
						|
    }
 | 
						|
 | 
						|
    if(targetCapacity>=8) {
 | 
						|
        /* This loop is unrolled for speed and improved pipelining. */
 | 
						|
        int32_t count, loops;
 | 
						|
        UChar oredChars;
 | 
						|
 | 
						|
        loops=count=targetCapacity>>3;
 | 
						|
        do {
 | 
						|
            oredChars=target[0]=source[0];
 | 
						|
            oredChars|=target[1]=source[1];
 | 
						|
            oredChars|=target[2]=source[2];
 | 
						|
            oredChars|=target[3]=source[3];
 | 
						|
            oredChars|=target[4]=source[4];
 | 
						|
            oredChars|=target[5]=source[5];
 | 
						|
            oredChars|=target[6]=source[6];
 | 
						|
            oredChars|=target[7]=source[7];
 | 
						|
 | 
						|
            /* were all 16 entries really valid? */
 | 
						|
            if(oredChars>0x7f) {
 | 
						|
                /* no, return to the first of these 16 */
 | 
						|
                break;
 | 
						|
            }
 | 
						|
            source+=8;
 | 
						|
            target+=8;
 | 
						|
        } while(--count>0);
 | 
						|
        count=loops-count;
 | 
						|
        targetCapacity-=count*8;
 | 
						|
 | 
						|
        if(offsets!=NULL) {
 | 
						|
            oldTarget+=count*8;
 | 
						|
            while(count>0) {
 | 
						|
                offsets[0]=sourceIndex++;
 | 
						|
                offsets[1]=sourceIndex++;
 | 
						|
                offsets[2]=sourceIndex++;
 | 
						|
                offsets[3]=sourceIndex++;
 | 
						|
                offsets[4]=sourceIndex++;
 | 
						|
                offsets[5]=sourceIndex++;
 | 
						|
                offsets[6]=sourceIndex++;
 | 
						|
                offsets[7]=sourceIndex++;
 | 
						|
                offsets+=8;
 | 
						|
                --count;
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    /* conversion loop */
 | 
						|
    c=0;
 | 
						|
    while(targetCapacity>0 && (c=*source++)<=0x7f) {
 | 
						|
        *target++=c;
 | 
						|
        --targetCapacity;
 | 
						|
    }
 | 
						|
 | 
						|
    if(c>0x7f) {
 | 
						|
        /* callback(illegal); copy the current bytes to toUBytes[] */
 | 
						|
        UConverter *cnv=pArgs->converter;
 | 
						|
        cnv->toUBytes[0]=c;
 | 
						|
        cnv->toULength=1;
 | 
						|
        *pErrorCode=U_ILLEGAL_CHAR_FOUND;
 | 
						|
    } else if(source<sourceLimit && target>=pArgs->targetLimit) {
 | 
						|
        /* target is full */
 | 
						|
        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 | 
						|
    }
 | 
						|
 | 
						|
    /* set offsets since the start */
 | 
						|
    if(offsets!=NULL) {
 | 
						|
        size_t count=target-oldTarget;
 | 
						|
        while(count>0) {
 | 
						|
            *offsets++=sourceIndex++;
 | 
						|
            --count;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    /* write back the updated pointers */
 | 
						|
    pArgs->source=(const char *)source;
 | 
						|
    pArgs->target=target;
 | 
						|
    pArgs->offsets=offsets;
 | 
						|
}
 | 
						|
 | 
						|
/* This is a table-less version of ucnv_MBCSSingleGetNextUChar(). */
 | 
						|
static UChar32 U_CALLCONV
 | 
						|
_ASCIIGetNextUChar(UConverterToUnicodeArgs *pArgs,
 | 
						|
                   UErrorCode *pErrorCode) {
 | 
						|
    const uint8_t *source;
 | 
						|
    uint8_t b;
 | 
						|
 | 
						|
    source=(const uint8_t *)pArgs->source;
 | 
						|
    if(source<(const uint8_t *)pArgs->sourceLimit) {
 | 
						|
        b=*source++;
 | 
						|
        pArgs->source=(const char *)source;
 | 
						|
        if(b<=0x7f) {
 | 
						|
            return b;
 | 
						|
        } else {
 | 
						|
            UConverter *cnv=pArgs->converter;
 | 
						|
            cnv->toUBytes[0]=b;
 | 
						|
            cnv->toULength=1;
 | 
						|
            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
 | 
						|
            return 0xffff;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    /* no output because of empty input */
 | 
						|
    *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
 | 
						|
    return 0xffff;
 | 
						|
}
 | 
						|
 | 
						|
/* "Convert" UTF-8 to US-ASCII: Validate and copy. */
 | 
						|
static void U_CALLCONV
 | 
						|
ucnv_ASCIIFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
 | 
						|
                   UConverterToUnicodeArgs *pToUArgs,
 | 
						|
                   UErrorCode *pErrorCode) {
 | 
						|
    const uint8_t *source, *sourceLimit;
 | 
						|
    uint8_t *target;
 | 
						|
    int32_t targetCapacity, length;
 | 
						|
 | 
						|
    uint8_t c;
 | 
						|
 | 
						|
    if(pToUArgs->converter->toULength > 0) {
 | 
						|
        /* no handling of partial UTF-8 characters here, fall back to pivoting */
 | 
						|
        *pErrorCode=U_USING_DEFAULT_WARNING;
 | 
						|
        return;
 | 
						|
    }
 | 
						|
 | 
						|
    /* set up the local pointers */
 | 
						|
    source=(const uint8_t *)pToUArgs->source;
 | 
						|
    sourceLimit=(const uint8_t *)pToUArgs->sourceLimit;
 | 
						|
    target=(uint8_t *)pFromUArgs->target;
 | 
						|
    targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
 | 
						|
 | 
						|
    /*
 | 
						|
     * since the conversion here is 1:1 uint8_t:uint8_t, we need only one counter
 | 
						|
     * for the minimum of the sourceLength and targetCapacity
 | 
						|
     */
 | 
						|
    length=(int32_t)(sourceLimit-source);
 | 
						|
    if(length<targetCapacity) {
 | 
						|
        targetCapacity=length;
 | 
						|
    }
 | 
						|
 | 
						|
    /* unroll the loop with the most common case */
 | 
						|
    if(targetCapacity>=16) {
 | 
						|
        int32_t count, loops;
 | 
						|
        uint8_t oredChars;
 | 
						|
 | 
						|
        loops=count=targetCapacity>>4;
 | 
						|
        do {
 | 
						|
            oredChars=*target++=*source++;
 | 
						|
            oredChars|=*target++=*source++;
 | 
						|
            oredChars|=*target++=*source++;
 | 
						|
            oredChars|=*target++=*source++;
 | 
						|
            oredChars|=*target++=*source++;
 | 
						|
            oredChars|=*target++=*source++;
 | 
						|
            oredChars|=*target++=*source++;
 | 
						|
            oredChars|=*target++=*source++;
 | 
						|
            oredChars|=*target++=*source++;
 | 
						|
            oredChars|=*target++=*source++;
 | 
						|
            oredChars|=*target++=*source++;
 | 
						|
            oredChars|=*target++=*source++;
 | 
						|
            oredChars|=*target++=*source++;
 | 
						|
            oredChars|=*target++=*source++;
 | 
						|
            oredChars|=*target++=*source++;
 | 
						|
            oredChars|=*target++=*source++;
 | 
						|
 | 
						|
            /* were all 16 entries really valid? */
 | 
						|
            if(oredChars>0x7f) {
 | 
						|
                /* no, return to the first of these 16 */
 | 
						|
                source-=16;
 | 
						|
                target-=16;
 | 
						|
                break;
 | 
						|
            }
 | 
						|
        } while(--count>0);
 | 
						|
        count=loops-count;
 | 
						|
        targetCapacity-=16*count;
 | 
						|
    }
 | 
						|
 | 
						|
    /* conversion loop */
 | 
						|
    c=0;
 | 
						|
    while(targetCapacity>0 && (c=*source)<=0x7f) {
 | 
						|
        ++source;
 | 
						|
        *target++=c;
 | 
						|
        --targetCapacity;
 | 
						|
    }
 | 
						|
 | 
						|
    if(c>0x7f) {
 | 
						|
        /* non-ASCII character, handle in standard converter */
 | 
						|
        *pErrorCode=U_USING_DEFAULT_WARNING;
 | 
						|
    } else if(source<sourceLimit && target>=(const uint8_t *)pFromUArgs->targetLimit) {
 | 
						|
        /* target is full */
 | 
						|
        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 | 
						|
    }
 | 
						|
 | 
						|
    /* write back the updated pointers */
 | 
						|
    pToUArgs->source=(const char *)source;
 | 
						|
    pFromUArgs->target=(char *)target;
 | 
						|
}
 | 
						|
 | 
						|
static void U_CALLCONV
 | 
						|
_ASCIIGetUnicodeSet(const UConverter *cnv,
 | 
						|
                    const USetAdder *sa,
 | 
						|
                    UConverterUnicodeSet which,
 | 
						|
                    UErrorCode *pErrorCode) {
 | 
						|
    (void)cnv;
 | 
						|
    (void)which;
 | 
						|
    (void)pErrorCode;
 | 
						|
    sa->addRange(sa->set, 0, 0x7f);
 | 
						|
}
 | 
						|
U_CDECL_END
 | 
						|
 | 
						|
static const UConverterImpl _ASCIIImpl={
 | 
						|
    UCNV_US_ASCII,
 | 
						|
 | 
						|
    NULL,
 | 
						|
    NULL,
 | 
						|
 | 
						|
    NULL,
 | 
						|
    NULL,
 | 
						|
    NULL,
 | 
						|
 | 
						|
    _ASCIIToUnicodeWithOffsets,
 | 
						|
    _ASCIIToUnicodeWithOffsets,
 | 
						|
    _Latin1FromUnicodeWithOffsets,
 | 
						|
    _Latin1FromUnicodeWithOffsets,
 | 
						|
    _ASCIIGetNextUChar,
 | 
						|
 | 
						|
    NULL,
 | 
						|
    NULL,
 | 
						|
    NULL,
 | 
						|
    NULL,
 | 
						|
    _ASCIIGetUnicodeSet,
 | 
						|
 | 
						|
    NULL,
 | 
						|
    ucnv_ASCIIFromUTF8
 | 
						|
};
 | 
						|
 | 
						|
static const UConverterStaticData _ASCIIStaticData={
 | 
						|
    sizeof(UConverterStaticData),
 | 
						|
    "US-ASCII",
 | 
						|
    367, UCNV_IBM, UCNV_US_ASCII, 1, 1,
 | 
						|
    { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,
 | 
						|
    0,
 | 
						|
    0,
 | 
						|
    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
 | 
						|
};
 | 
						|
 | 
						|
const UConverterSharedData _ASCIIData=
 | 
						|
        UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ASCIIStaticData, &_ASCIIImpl);
 | 
						|
 | 
						|
#endif
 |