forked from mirrors/gecko-dev
		
	Update to ICU 69.1 by running "update-icu.sh" with "maint/maint-69" as the target. Differential Revision: https://phabricator.services.mozilla.com/D116968
		
			
				
	
	
		
			405 lines
		
	
	
	
		
			15 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			405 lines
		
	
	
	
		
			15 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
// © 2016 and later: Unicode, Inc. and others.
 | 
						|
// License & terms of use: http://www.unicode.org/copyright.html
 | 
						|
/*
 | 
						|
*******************************************************************************
 | 
						|
* Copyright (C) 2014, International Business Machines
 | 
						|
* Corporation and others.  All Rights Reserved.
 | 
						|
*******************************************************************************
 | 
						|
* norm2allmodes.h
 | 
						|
*
 | 
						|
* created on: 2014sep07
 | 
						|
* created by: Markus W. Scherer
 | 
						|
*/
 | 
						|
 | 
						|
#ifndef __NORM2ALLMODES_H__
 | 
						|
#define __NORM2ALLMODES_H__
 | 
						|
 | 
						|
#include "unicode/utypes.h"
 | 
						|
 | 
						|
#if !UCONFIG_NO_NORMALIZATION
 | 
						|
 | 
						|
#include "unicode/edits.h"
 | 
						|
#include "unicode/normalizer2.h"
 | 
						|
#include "unicode/stringoptions.h"
 | 
						|
#include "unicode/unistr.h"
 | 
						|
#include "cpputils.h"
 | 
						|
#include "normalizer2impl.h"
 | 
						|
 | 
						|
U_NAMESPACE_BEGIN
 | 
						|
 | 
						|
// Intermediate class:
 | 
						|
// Has Normalizer2Impl and does boilerplate argument checking and setup.
 | 
						|
class Normalizer2WithImpl : public Normalizer2 {
 | 
						|
public:
 | 
						|
    Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
 | 
						|
    virtual ~Normalizer2WithImpl();
 | 
						|
 | 
						|
    // normalize
 | 
						|
    virtual UnicodeString &
 | 
						|
    normalize(const UnicodeString &src,
 | 
						|
              UnicodeString &dest,
 | 
						|
              UErrorCode &errorCode) const U_OVERRIDE {
 | 
						|
        if(U_FAILURE(errorCode)) {
 | 
						|
            dest.setToBogus();
 | 
						|
            return dest;
 | 
						|
        }
 | 
						|
        const UChar *sArray=src.getBuffer();
 | 
						|
        if(&dest==&src || sArray==NULL) {
 | 
						|
            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
 | 
						|
            dest.setToBogus();
 | 
						|
            return dest;
 | 
						|
        }
 | 
						|
        dest.remove();
 | 
						|
        ReorderingBuffer buffer(impl, dest);
 | 
						|
        if(buffer.init(src.length(), errorCode)) {
 | 
						|
            normalize(sArray, sArray+src.length(), buffer, errorCode);
 | 
						|
        }
 | 
						|
        return dest;
 | 
						|
    }
 | 
						|
    virtual void
 | 
						|
    normalize(const UChar *src, const UChar *limit,
 | 
						|
              ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
 | 
						|
 | 
						|
    // normalize and append
 | 
						|
    virtual UnicodeString &
 | 
						|
    normalizeSecondAndAppend(UnicodeString &first,
 | 
						|
                             const UnicodeString &second,
 | 
						|
                             UErrorCode &errorCode) const U_OVERRIDE {
 | 
						|
        return normalizeSecondAndAppend(first, second, true, errorCode);
 | 
						|
    }
 | 
						|
    virtual UnicodeString &
 | 
						|
    append(UnicodeString &first,
 | 
						|
           const UnicodeString &second,
 | 
						|
           UErrorCode &errorCode) const U_OVERRIDE {
 | 
						|
        return normalizeSecondAndAppend(first, second, false, errorCode);
 | 
						|
    }
 | 
						|
    UnicodeString &
 | 
						|
    normalizeSecondAndAppend(UnicodeString &first,
 | 
						|
                             const UnicodeString &second,
 | 
						|
                             UBool doNormalize,
 | 
						|
                             UErrorCode &errorCode) const {
 | 
						|
        uprv_checkCanGetBuffer(first, errorCode);
 | 
						|
        if(U_FAILURE(errorCode)) {
 | 
						|
            return first;
 | 
						|
        }
 | 
						|
        const UChar *secondArray=second.getBuffer();
 | 
						|
        if(&first==&second || secondArray==NULL) {
 | 
						|
            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
 | 
						|
            return first;
 | 
						|
        }
 | 
						|
        int32_t firstLength=first.length();
 | 
						|
        UnicodeString safeMiddle;
 | 
						|
        {
 | 
						|
            ReorderingBuffer buffer(impl, first);
 | 
						|
            if(buffer.init(firstLength+second.length(), errorCode)) {
 | 
						|
                normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
 | 
						|
                                   safeMiddle, buffer, errorCode);
 | 
						|
            }
 | 
						|
        }  // The ReorderingBuffer destructor finalizes the first string.
 | 
						|
        if(U_FAILURE(errorCode)) {
 | 
						|
            // Restore the modified suffix of the first string.
 | 
						|
            first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
 | 
						|
        }
 | 
						|
        return first;
 | 
						|
    }
 | 
						|
    virtual void
 | 
						|
    normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
 | 
						|
                       UnicodeString &safeMiddle,
 | 
						|
                       ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
 | 
						|
    virtual UBool
 | 
						|
    getDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE {
 | 
						|
        UChar buffer[4];
 | 
						|
        int32_t length;
 | 
						|
        const UChar *d=impl.getDecomposition(c, buffer, length);
 | 
						|
        if(d==NULL) {
 | 
						|
            return false;
 | 
						|
        }
 | 
						|
        if(d==buffer) {
 | 
						|
            decomposition.setTo(buffer, length);  // copy the string (Jamos from Hangul syllable c)
 | 
						|
        } else {
 | 
						|
            decomposition.setTo(false, d, length);  // read-only alias
 | 
						|
        }
 | 
						|
        return true;
 | 
						|
    }
 | 
						|
    virtual UBool
 | 
						|
    getRawDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE {
 | 
						|
        UChar buffer[30];
 | 
						|
        int32_t length;
 | 
						|
        const UChar *d=impl.getRawDecomposition(c, buffer, length);
 | 
						|
        if(d==NULL) {
 | 
						|
            return false;
 | 
						|
        }
 | 
						|
        if(d==buffer) {
 | 
						|
            decomposition.setTo(buffer, length);  // copy the string (algorithmic decomposition)
 | 
						|
        } else {
 | 
						|
            decomposition.setTo(false, d, length);  // read-only alias
 | 
						|
        }
 | 
						|
        return true;
 | 
						|
    }
 | 
						|
    virtual UChar32
 | 
						|
    composePair(UChar32 a, UChar32 b) const U_OVERRIDE {
 | 
						|
        return impl.composePair(a, b);
 | 
						|
    }
 | 
						|
 | 
						|
    virtual uint8_t
 | 
						|
    getCombiningClass(UChar32 c) const U_OVERRIDE {
 | 
						|
        return impl.getCC(impl.getNorm16(c));
 | 
						|
    }
 | 
						|
 | 
						|
    // quick checks
 | 
						|
    virtual UBool
 | 
						|
    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
 | 
						|
        if(U_FAILURE(errorCode)) {
 | 
						|
            return false;
 | 
						|
        }
 | 
						|
        const UChar *sArray=s.getBuffer();
 | 
						|
        if(sArray==NULL) {
 | 
						|
            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
 | 
						|
            return false;
 | 
						|
        }
 | 
						|
        const UChar *sLimit=sArray+s.length();
 | 
						|
        return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
 | 
						|
    }
 | 
						|
    virtual UNormalizationCheckResult
 | 
						|
    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
 | 
						|
        return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
 | 
						|
    }
 | 
						|
    virtual int32_t
 | 
						|
    spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
 | 
						|
        if(U_FAILURE(errorCode)) {
 | 
						|
            return 0;
 | 
						|
        }
 | 
						|
        const UChar *sArray=s.getBuffer();
 | 
						|
        if(sArray==NULL) {
 | 
						|
            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
 | 
						|
            return 0;
 | 
						|
        }
 | 
						|
        return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
 | 
						|
    }
 | 
						|
    virtual const UChar *
 | 
						|
    spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
 | 
						|
 | 
						|
    virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
 | 
						|
        return UNORM_YES;
 | 
						|
    }
 | 
						|
 | 
						|
    const Normalizer2Impl &impl;
 | 
						|
};
 | 
						|
 | 
						|
class DecomposeNormalizer2 : public Normalizer2WithImpl {
 | 
						|
public:
 | 
						|
    DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
 | 
						|
    virtual ~DecomposeNormalizer2();
 | 
						|
 | 
						|
private:
 | 
						|
    virtual void
 | 
						|
    normalize(const UChar *src, const UChar *limit,
 | 
						|
              ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
 | 
						|
        impl.decompose(src, limit, &buffer, errorCode);
 | 
						|
    }
 | 
						|
    using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
 | 
						|
    virtual void
 | 
						|
    normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
 | 
						|
                       UnicodeString &safeMiddle,
 | 
						|
                       ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
 | 
						|
        impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
 | 
						|
    }
 | 
						|
 | 
						|
    void
 | 
						|
    normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
 | 
						|
                  Edits *edits, UErrorCode &errorCode) const U_OVERRIDE {
 | 
						|
        if (U_FAILURE(errorCode)) {
 | 
						|
            return;
 | 
						|
        }
 | 
						|
        if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
 | 
						|
            edits->reset();
 | 
						|
        }
 | 
						|
        const uint8_t *s = reinterpret_cast<const uint8_t *>(src.data());
 | 
						|
        impl.decomposeUTF8(options, s, s + src.length(), &sink, edits, errorCode);
 | 
						|
        sink.Flush();
 | 
						|
    }
 | 
						|
    virtual UBool
 | 
						|
    isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const U_OVERRIDE {
 | 
						|
        if(U_FAILURE(errorCode)) {
 | 
						|
            return false;
 | 
						|
        }
 | 
						|
        const uint8_t *s = reinterpret_cast<const uint8_t *>(sp.data());
 | 
						|
        const uint8_t *sLimit = s + sp.length();
 | 
						|
        return sLimit == impl.decomposeUTF8(0, s, sLimit, nullptr, nullptr, errorCode);
 | 
						|
    }
 | 
						|
 | 
						|
    virtual const UChar *
 | 
						|
    spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const U_OVERRIDE {
 | 
						|
        return impl.decompose(src, limit, NULL, errorCode);
 | 
						|
    }
 | 
						|
    using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
 | 
						|
    virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const U_OVERRIDE {
 | 
						|
        return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
 | 
						|
    }
 | 
						|
    virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE {
 | 
						|
        return impl.hasDecompBoundaryBefore(c);
 | 
						|
    }
 | 
						|
    virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE {
 | 
						|
        return impl.hasDecompBoundaryAfter(c);
 | 
						|
    }
 | 
						|
    virtual UBool isInert(UChar32 c) const U_OVERRIDE {
 | 
						|
        return impl.isDecompInert(c);
 | 
						|
    }
 | 
						|
};
 | 
						|
 | 
						|
class ComposeNormalizer2 : public Normalizer2WithImpl {
 | 
						|
public:
 | 
						|
    ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
 | 
						|
        Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
 | 
						|
    virtual ~ComposeNormalizer2();
 | 
						|
 | 
						|
private:
 | 
						|
    virtual void
 | 
						|
    normalize(const UChar *src, const UChar *limit,
 | 
						|
              ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
 | 
						|
        impl.compose(src, limit, onlyContiguous, true, buffer, errorCode);
 | 
						|
    }
 | 
						|
    using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
 | 
						|
 | 
						|
    void
 | 
						|
    normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
 | 
						|
                  Edits *edits, UErrorCode &errorCode) const U_OVERRIDE {
 | 
						|
        if (U_FAILURE(errorCode)) {
 | 
						|
            return;
 | 
						|
        }
 | 
						|
        if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
 | 
						|
            edits->reset();
 | 
						|
        }
 | 
						|
        const uint8_t *s = reinterpret_cast<const uint8_t *>(src.data());
 | 
						|
        impl.composeUTF8(options, onlyContiguous, s, s + src.length(),
 | 
						|
                         &sink, edits, errorCode);
 | 
						|
        sink.Flush();
 | 
						|
    }
 | 
						|
 | 
						|
    virtual void
 | 
						|
    normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
 | 
						|
                       UnicodeString &safeMiddle,
 | 
						|
                       ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
 | 
						|
        impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
 | 
						|
    }
 | 
						|
 | 
						|
    virtual UBool
 | 
						|
    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
 | 
						|
        if(U_FAILURE(errorCode)) {
 | 
						|
            return false;
 | 
						|
        }
 | 
						|
        const UChar *sArray=s.getBuffer();
 | 
						|
        if(sArray==NULL) {
 | 
						|
            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
 | 
						|
            return false;
 | 
						|
        }
 | 
						|
        UnicodeString temp;
 | 
						|
        ReorderingBuffer buffer(impl, temp);
 | 
						|
        if(!buffer.init(5, errorCode)) {  // small destCapacity for substring normalization
 | 
						|
            return false;
 | 
						|
        }
 | 
						|
        return impl.compose(sArray, sArray+s.length(), onlyContiguous, false, buffer, errorCode);
 | 
						|
    }
 | 
						|
    virtual UBool
 | 
						|
    isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const U_OVERRIDE {
 | 
						|
        if(U_FAILURE(errorCode)) {
 | 
						|
            return false;
 | 
						|
        }
 | 
						|
        const uint8_t *s = reinterpret_cast<const uint8_t *>(sp.data());
 | 
						|
        return impl.composeUTF8(0, onlyContiguous, s, s + sp.length(), nullptr, nullptr, errorCode);
 | 
						|
    }
 | 
						|
    virtual UNormalizationCheckResult
 | 
						|
    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
 | 
						|
        if(U_FAILURE(errorCode)) {
 | 
						|
            return UNORM_MAYBE;
 | 
						|
        }
 | 
						|
        const UChar *sArray=s.getBuffer();
 | 
						|
        if(sArray==NULL) {
 | 
						|
            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
 | 
						|
            return UNORM_MAYBE;
 | 
						|
        }
 | 
						|
        UNormalizationCheckResult qcResult=UNORM_YES;
 | 
						|
        impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
 | 
						|
        return qcResult;
 | 
						|
    }
 | 
						|
    virtual const UChar *
 | 
						|
    spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const U_OVERRIDE {
 | 
						|
        return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
 | 
						|
    }
 | 
						|
    using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
 | 
						|
    virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const U_OVERRIDE {
 | 
						|
        return impl.getCompQuickCheck(impl.getNorm16(c));
 | 
						|
    }
 | 
						|
    virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE {
 | 
						|
        return impl.hasCompBoundaryBefore(c);
 | 
						|
    }
 | 
						|
    virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE {
 | 
						|
        return impl.hasCompBoundaryAfter(c, onlyContiguous);
 | 
						|
    }
 | 
						|
    virtual UBool isInert(UChar32 c) const U_OVERRIDE {
 | 
						|
        return impl.isCompInert(c, onlyContiguous);
 | 
						|
    }
 | 
						|
 | 
						|
    const UBool onlyContiguous;
 | 
						|
};
 | 
						|
 | 
						|
class FCDNormalizer2 : public Normalizer2WithImpl {
 | 
						|
public:
 | 
						|
    FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
 | 
						|
    virtual ~FCDNormalizer2();
 | 
						|
 | 
						|
private:
 | 
						|
    virtual void
 | 
						|
    normalize(const UChar *src, const UChar *limit,
 | 
						|
              ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
 | 
						|
        impl.makeFCD(src, limit, &buffer, errorCode);
 | 
						|
    }
 | 
						|
    using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
 | 
						|
    virtual void
 | 
						|
    normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
 | 
						|
                       UnicodeString &safeMiddle,
 | 
						|
                       ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
 | 
						|
        impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
 | 
						|
    }
 | 
						|
    virtual const UChar *
 | 
						|
    spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const U_OVERRIDE {
 | 
						|
        return impl.makeFCD(src, limit, NULL, errorCode);
 | 
						|
    }
 | 
						|
    using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
 | 
						|
    virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE {
 | 
						|
        return impl.hasFCDBoundaryBefore(c);
 | 
						|
    }
 | 
						|
    virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE {
 | 
						|
        return impl.hasFCDBoundaryAfter(c);
 | 
						|
    }
 | 
						|
    virtual UBool isInert(UChar32 c) const U_OVERRIDE {
 | 
						|
        return impl.isFCDInert(c);
 | 
						|
    }
 | 
						|
};
 | 
						|
 | 
						|
struct Norm2AllModes : public UMemory {
 | 
						|
    Norm2AllModes(Normalizer2Impl *i)
 | 
						|
            : impl(i), comp(*i, false), decomp(*i), fcd(*i), fcc(*i, true) {}
 | 
						|
    ~Norm2AllModes();
 | 
						|
 | 
						|
    static Norm2AllModes *createInstance(Normalizer2Impl *impl, UErrorCode &errorCode);
 | 
						|
    static Norm2AllModes *createNFCInstance(UErrorCode &errorCode);
 | 
						|
    static Norm2AllModes *createInstance(const char *packageName,
 | 
						|
                                         const char *name,
 | 
						|
                                         UErrorCode &errorCode);
 | 
						|
 | 
						|
    static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode);
 | 
						|
    static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode);
 | 
						|
    static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode);
 | 
						|
 | 
						|
    Normalizer2Impl *impl;
 | 
						|
    ComposeNormalizer2 comp;
 | 
						|
    DecomposeNormalizer2 decomp;
 | 
						|
    FCDNormalizer2 fcd;
 | 
						|
    ComposeNormalizer2 fcc;
 | 
						|
};
 | 
						|
 | 
						|
U_NAMESPACE_END
 | 
						|
 | 
						|
#endif  // !UCONFIG_NO_NORMALIZATION
 | 
						|
#endif  // __NORM2ALLMODES_H__
 |