forked from mirrors/gecko-dev
		
	Update to ICU 69.1 by running "update-icu.sh" with "maint/maint-69" as the target. Differential Revision: https://phabricator.services.mozilla.com/D116968
		
			
				
	
	
		
			655 lines
		
	
	
	
		
			24 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			655 lines
		
	
	
	
		
			24 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
// Copyright (C) 2016 and later: Unicode, Inc. and others.
 | 
						|
// License & terms of use: http://www.unicode.org/copyright.html
 | 
						|
 | 
						|
// file: rbbi_cache.cpp
 | 
						|
 | 
						|
#include "unicode/utypes.h"
 | 
						|
 | 
						|
#if !UCONFIG_NO_BREAK_ITERATION
 | 
						|
 | 
						|
#include "unicode/ubrk.h"
 | 
						|
#include "unicode/rbbi.h"
 | 
						|
 | 
						|
#include "rbbi_cache.h"
 | 
						|
 | 
						|
#include "brkeng.h"
 | 
						|
#include "cmemory.h"
 | 
						|
#include "rbbidata.h"
 | 
						|
#include "rbbirb.h"
 | 
						|
#include "uassert.h"
 | 
						|
#include "uvectr32.h"
 | 
						|
 | 
						|
U_NAMESPACE_BEGIN
 | 
						|
 | 
						|
/*
 | 
						|
 * DictionaryCache implementation
 | 
						|
 */
 | 
						|
 | 
						|
RuleBasedBreakIterator::DictionaryCache::DictionaryCache(RuleBasedBreakIterator *bi, UErrorCode &status) :
 | 
						|
        fBI(bi), fBreaks(status), fPositionInCache(-1),
 | 
						|
        fStart(0), fLimit(0), fFirstRuleStatusIndex(0), fOtherRuleStatusIndex(0) {
 | 
						|
}
 | 
						|
 | 
						|
RuleBasedBreakIterator::DictionaryCache::~DictionaryCache() {
 | 
						|
}
 | 
						|
 | 
						|
void RuleBasedBreakIterator::DictionaryCache::reset() {
 | 
						|
    fPositionInCache = -1;
 | 
						|
    fStart = 0;
 | 
						|
    fLimit = 0;
 | 
						|
    fFirstRuleStatusIndex = 0;
 | 
						|
    fOtherRuleStatusIndex = 0;
 | 
						|
    fBreaks.removeAllElements();
 | 
						|
}
 | 
						|
 | 
						|
UBool RuleBasedBreakIterator::DictionaryCache::following(int32_t fromPos, int32_t *result, int32_t *statusIndex) {
 | 
						|
    if (fromPos >= fLimit || fromPos < fStart) {
 | 
						|
        fPositionInCache = -1;
 | 
						|
        return FALSE;
 | 
						|
    }
 | 
						|
 | 
						|
    // Sequential iteration, move from previous boundary to the following
 | 
						|
 | 
						|
    int32_t r = 0;
 | 
						|
    if (fPositionInCache >= 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) {
 | 
						|
        ++fPositionInCache;
 | 
						|
        if (fPositionInCache >= fBreaks.size()) {
 | 
						|
            fPositionInCache = -1;
 | 
						|
            return FALSE;
 | 
						|
        }
 | 
						|
        r = fBreaks.elementAti(fPositionInCache);
 | 
						|
        U_ASSERT(r > fromPos);
 | 
						|
        *result = r;
 | 
						|
        *statusIndex = fOtherRuleStatusIndex;
 | 
						|
        return TRUE;
 | 
						|
    }
 | 
						|
 | 
						|
    // Random indexing. Linear search for the boundary following the given position.
 | 
						|
 | 
						|
    for (fPositionInCache = 0; fPositionInCache < fBreaks.size(); ++fPositionInCache) {
 | 
						|
        r= fBreaks.elementAti(fPositionInCache);
 | 
						|
        if (r > fromPos) {
 | 
						|
            *result = r;
 | 
						|
            *statusIndex = fOtherRuleStatusIndex;
 | 
						|
            return TRUE;
 | 
						|
        }
 | 
						|
    }
 | 
						|
    UPRV_UNREACHABLE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
UBool RuleBasedBreakIterator::DictionaryCache::preceding(int32_t fromPos, int32_t *result, int32_t *statusIndex) {
 | 
						|
    if (fromPos <= fStart || fromPos > fLimit) {
 | 
						|
        fPositionInCache = -1;
 | 
						|
        return FALSE;
 | 
						|
    }
 | 
						|
 | 
						|
    if (fromPos == fLimit) {
 | 
						|
        fPositionInCache = fBreaks.size() - 1;
 | 
						|
        if (fPositionInCache >= 0) {
 | 
						|
            U_ASSERT(fBreaks.elementAti(fPositionInCache) == fromPos);
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    int32_t r;
 | 
						|
    if (fPositionInCache > 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) {
 | 
						|
        --fPositionInCache;
 | 
						|
        r = fBreaks.elementAti(fPositionInCache);
 | 
						|
        U_ASSERT(r < fromPos);
 | 
						|
        *result = r;
 | 
						|
        *statusIndex = ( r== fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
 | 
						|
        return TRUE;
 | 
						|
    }
 | 
						|
 | 
						|
    if (fPositionInCache == 0) {
 | 
						|
        fPositionInCache = -1;
 | 
						|
        return FALSE;
 | 
						|
    }
 | 
						|
 | 
						|
    for (fPositionInCache = fBreaks.size()-1; fPositionInCache >= 0; --fPositionInCache) {
 | 
						|
        r = fBreaks.elementAti(fPositionInCache);
 | 
						|
        if (r < fromPos) {
 | 
						|
            *result = r;
 | 
						|
            *statusIndex = ( r == fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
 | 
						|
            return TRUE;
 | 
						|
        }
 | 
						|
    }
 | 
						|
    UPRV_UNREACHABLE;
 | 
						|
}
 | 
						|
 | 
						|
void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPos, int32_t endPos,
 | 
						|
                                       int32_t firstRuleStatus, int32_t otherRuleStatus) {
 | 
						|
    if ((endPos - startPos) <= 1) {
 | 
						|
        return;
 | 
						|
    }
 | 
						|
 | 
						|
    reset();
 | 
						|
    fFirstRuleStatusIndex = firstRuleStatus;
 | 
						|
    fOtherRuleStatusIndex = otherRuleStatus;
 | 
						|
 | 
						|
    int32_t rangeStart = startPos;
 | 
						|
    int32_t rangeEnd = endPos;
 | 
						|
 | 
						|
    uint16_t    category;
 | 
						|
    int32_t     current;
 | 
						|
    UErrorCode  status = U_ZERO_ERROR;
 | 
						|
    int32_t     foundBreakCount = 0;
 | 
						|
    UText      *text = &fBI->fText;
 | 
						|
 | 
						|
    // Loop through the text, looking for ranges of dictionary characters.
 | 
						|
    // For each span, find the appropriate break engine, and ask it to find
 | 
						|
    // any breaks within the span.
 | 
						|
 | 
						|
    utext_setNativeIndex(text, rangeStart);
 | 
						|
    UChar32     c = utext_current32(text);
 | 
						|
    category = ucptrie_get(fBI->fData->fTrie, c);
 | 
						|
    uint32_t dictStart = fBI->fData->fForwardTable->fDictCategoriesStart;
 | 
						|
 | 
						|
    while(U_SUCCESS(status)) {
 | 
						|
        while((current = (int32_t)UTEXT_GETNATIVEINDEX(text)) < rangeEnd
 | 
						|
                && (category < dictStart)) {
 | 
						|
            utext_next32(text);           // TODO: cleaner loop structure.
 | 
						|
            c = utext_current32(text);
 | 
						|
            category = ucptrie_get(fBI->fData->fTrie, c);
 | 
						|
        }
 | 
						|
        if (current >= rangeEnd) {
 | 
						|
            break;
 | 
						|
        }
 | 
						|
 | 
						|
        // We now have a dictionary character. Get the appropriate language object
 | 
						|
        // to deal with it.
 | 
						|
        const LanguageBreakEngine *lbe = fBI->getLanguageBreakEngine(c);
 | 
						|
 | 
						|
        // Ask the language object if there are any breaks. It will add them to the cache and
 | 
						|
        // leave the text pointer on the other side of its range, ready to search for the next one.
 | 
						|
        if (lbe != NULL) {
 | 
						|
            foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBreaks);
 | 
						|
        }
 | 
						|
 | 
						|
        // Reload the loop variables for the next go-round
 | 
						|
        c = utext_current32(text);
 | 
						|
        category = ucptrie_get(fBI->fData->fTrie, c);
 | 
						|
    }
 | 
						|
 | 
						|
    // If we found breaks, ensure that the first and last entries are
 | 
						|
    // the original starting and ending position. And initialize the
 | 
						|
    // cache iteration position to the first entry.
 | 
						|
 | 
						|
    // printf("foundBreakCount = %d\n", foundBreakCount);
 | 
						|
    if (foundBreakCount > 0) {
 | 
						|
        U_ASSERT(foundBreakCount == fBreaks.size());
 | 
						|
        if (startPos < fBreaks.elementAti(0)) {
 | 
						|
            // The dictionary did not place a boundary at the start of the segment of text.
 | 
						|
            // Add one now. This should not commonly happen, but it would be easy for interactions
 | 
						|
            // of the rules for dictionary segments and the break engine implementations to
 | 
						|
            // inadvertently cause it. Cover it here, just in case.
 | 
						|
            fBreaks.insertElementAt(startPos, 0, status);
 | 
						|
        }
 | 
						|
        if (endPos > fBreaks.peeki()) {
 | 
						|
            fBreaks.push(endPos, status);
 | 
						|
        }
 | 
						|
        fPositionInCache = 0;
 | 
						|
        // Note: Dictionary matching may extend beyond the original limit.
 | 
						|
        fStart = fBreaks.elementAti(0);
 | 
						|
        fLimit = fBreaks.peeki();
 | 
						|
    } else {
 | 
						|
        // there were no language-based breaks, even though the segment contained
 | 
						|
        // dictionary characters. Subsequent attempts to fetch boundaries from the dictionary cache
 | 
						|
        // for this range will fail, and the calling code will fall back to the rule based boundaries.
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/*
 | 
						|
 *   BreakCache implemetation
 | 
						|
 */
 | 
						|
 | 
						|
RuleBasedBreakIterator::BreakCache::BreakCache(RuleBasedBreakIterator *bi, UErrorCode &status) :
 | 
						|
        fBI(bi), fSideBuffer(status) {
 | 
						|
    reset();
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
RuleBasedBreakIterator::BreakCache::~BreakCache() {
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
void RuleBasedBreakIterator::BreakCache::reset(int32_t pos, int32_t ruleStatus) {
 | 
						|
    fStartBufIdx = 0;
 | 
						|
    fEndBufIdx = 0;
 | 
						|
    fTextIdx = pos;
 | 
						|
    fBufIdx = 0;
 | 
						|
    fBoundaries[0] = pos;
 | 
						|
    fStatuses[0] = (uint16_t)ruleStatus;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
int32_t  RuleBasedBreakIterator::BreakCache::current() {
 | 
						|
    fBI->fPosition = fTextIdx;
 | 
						|
    fBI->fRuleStatusIndex = fStatuses[fBufIdx];
 | 
						|
    fBI->fDone = FALSE;
 | 
						|
    return fTextIdx;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
void RuleBasedBreakIterator::BreakCache::following(int32_t startPos, UErrorCode &status) {
 | 
						|
    if (U_FAILURE(status)) {
 | 
						|
        return;
 | 
						|
    }
 | 
						|
    if (startPos == fTextIdx || seek(startPos) || populateNear(startPos, status)) {
 | 
						|
        // startPos is in the cache. Do a next() from that position.
 | 
						|
        // TODO: an awkward set of interactions with bi->fDone
 | 
						|
        //       seek() does not clear it; it can't because of interactions with populateNear().
 | 
						|
        //       next() does not clear it in the fast-path case, where everything matters. Maybe it should.
 | 
						|
        //       So clear it here, for the case where seek() succeeded on an iterator that had previously run off the end.
 | 
						|
        fBI->fDone = false;
 | 
						|
        next();
 | 
						|
    }
 | 
						|
    return;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
void RuleBasedBreakIterator::BreakCache::preceding(int32_t startPos, UErrorCode &status) {
 | 
						|
    if (U_FAILURE(status)) {
 | 
						|
        return;
 | 
						|
    }
 | 
						|
    if (startPos == fTextIdx || seek(startPos) || populateNear(startPos, status)) {
 | 
						|
        if (startPos == fTextIdx) {
 | 
						|
            previous(status);
 | 
						|
        } else {
 | 
						|
            // seek() leaves the BreakCache positioned at the preceding boundary
 | 
						|
            //        if the requested position is between two boundaries.
 | 
						|
            // current() pushes the BreakCache position out to the BreakIterator itself.
 | 
						|
            U_ASSERT(startPos > fTextIdx);
 | 
						|
            current();
 | 
						|
        }
 | 
						|
    }
 | 
						|
    return;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/*
 | 
						|
 * Out-of-line code for BreakCache::next().
 | 
						|
 * Cache does not already contain the boundary
 | 
						|
 */
 | 
						|
void RuleBasedBreakIterator::BreakCache::nextOL() {
 | 
						|
    fBI->fDone = !populateFollowing();
 | 
						|
    fBI->fPosition = fTextIdx;
 | 
						|
    fBI->fRuleStatusIndex = fStatuses[fBufIdx];
 | 
						|
    return;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
void RuleBasedBreakIterator::BreakCache::previous(UErrorCode &status) {
 | 
						|
    if (U_FAILURE(status)) {
 | 
						|
        return;
 | 
						|
    }
 | 
						|
    int32_t initialBufIdx = fBufIdx;
 | 
						|
    if (fBufIdx == fStartBufIdx) {
 | 
						|
        // At start of cache. Prepend to it.
 | 
						|
        populatePreceding(status);
 | 
						|
    } else {
 | 
						|
        // Cache already holds the next boundary
 | 
						|
        fBufIdx = modChunkSize(fBufIdx - 1);
 | 
						|
        fTextIdx = fBoundaries[fBufIdx];
 | 
						|
    }
 | 
						|
    fBI->fDone = (fBufIdx == initialBufIdx);
 | 
						|
    fBI->fPosition = fTextIdx;
 | 
						|
    fBI->fRuleStatusIndex = fStatuses[fBufIdx];
 | 
						|
    return;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
UBool RuleBasedBreakIterator::BreakCache::seek(int32_t pos) {
 | 
						|
    if (pos < fBoundaries[fStartBufIdx] || pos > fBoundaries[fEndBufIdx]) {
 | 
						|
        return FALSE;
 | 
						|
    }
 | 
						|
    if (pos == fBoundaries[fStartBufIdx]) {
 | 
						|
        // Common case: seek(0), from BreakIterator::first()
 | 
						|
        fBufIdx = fStartBufIdx;
 | 
						|
        fTextIdx = fBoundaries[fBufIdx];
 | 
						|
        return TRUE;
 | 
						|
    }
 | 
						|
    if (pos == fBoundaries[fEndBufIdx]) {
 | 
						|
        fBufIdx = fEndBufIdx;
 | 
						|
        fTextIdx = fBoundaries[fBufIdx];
 | 
						|
        return TRUE;
 | 
						|
    }
 | 
						|
 | 
						|
    int32_t min = fStartBufIdx;
 | 
						|
    int32_t max = fEndBufIdx;
 | 
						|
    while (min != max) {
 | 
						|
        int32_t probe = (min + max + (min>max ? CACHE_SIZE : 0)) / 2;
 | 
						|
        probe = modChunkSize(probe);
 | 
						|
        if (fBoundaries[probe] > pos) {
 | 
						|
            max = probe;
 | 
						|
        } else {
 | 
						|
            min = modChunkSize(probe + 1);
 | 
						|
        }
 | 
						|
    }
 | 
						|
    U_ASSERT(fBoundaries[max] > pos);
 | 
						|
    fBufIdx = modChunkSize(max - 1);
 | 
						|
    fTextIdx = fBoundaries[fBufIdx];
 | 
						|
    U_ASSERT(fTextIdx <= pos);
 | 
						|
    return TRUE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
UBool RuleBasedBreakIterator::BreakCache::populateNear(int32_t position, UErrorCode &status) {
 | 
						|
    if (U_FAILURE(status)) {
 | 
						|
        return FALSE;
 | 
						|
    }
 | 
						|
    U_ASSERT(position < fBoundaries[fStartBufIdx] || position > fBoundaries[fEndBufIdx]);
 | 
						|
 | 
						|
    // Find a boundary somewhere in the vicinity of the requested position.
 | 
						|
    // Depending on the safe rules and the text data, it could be either before, at, or after
 | 
						|
    // the requested position.
 | 
						|
 | 
						|
 | 
						|
    // If the requested position is not near already cached positions, clear the existing cache,
 | 
						|
    // find a near-by boundary and begin new cache contents there.
 | 
						|
 | 
						|
    if ((position < fBoundaries[fStartBufIdx] - 15) || position > (fBoundaries[fEndBufIdx] + 15)) {
 | 
						|
        int32_t aBoundary = 0;
 | 
						|
        int32_t ruleStatusIndex = 0;
 | 
						|
        if (position > 20) {
 | 
						|
            int32_t backupPos = fBI->handleSafePrevious(position);
 | 
						|
 | 
						|
            if (backupPos > 0) {
 | 
						|
                // Advance to the boundary following the backup position.
 | 
						|
                // There is a complication: the safe reverse rules identify pairs of code points
 | 
						|
                // that are safe. If advancing from the safe point moves forwards by less than
 | 
						|
                // two code points, we need to advance one more time to ensure that the boundary
 | 
						|
                // is good, including a correct rules status value.
 | 
						|
                //
 | 
						|
                fBI->fPosition = backupPos;
 | 
						|
                aBoundary = fBI->handleNext();
 | 
						|
                if (aBoundary <= backupPos + 4) {
 | 
						|
                    // +4 is a quick test for possibly having advanced only one codepoint.
 | 
						|
                    // Four being the length of the longest potential code point, a supplementary in UTF-8
 | 
						|
                    utext_setNativeIndex(&fBI->fText, aBoundary);
 | 
						|
                    if (backupPos == utext_getPreviousNativeIndex(&fBI->fText)) {
 | 
						|
                        // The initial handleNext() only advanced by a single code point. Go again.
 | 
						|
                        aBoundary = fBI->handleNext();   // Safe rules identify safe pairs.
 | 
						|
                    }
 | 
						|
                }
 | 
						|
                ruleStatusIndex = fBI->fRuleStatusIndex;
 | 
						|
            }
 | 
						|
        }
 | 
						|
        reset(aBoundary, ruleStatusIndex);        // Reset cache to hold aBoundary as a single starting point.
 | 
						|
    }
 | 
						|
 | 
						|
    // Fill in boundaries between existing cache content and the new requested position.
 | 
						|
 | 
						|
    if (fBoundaries[fEndBufIdx] < position) {
 | 
						|
        // The last position in the cache precedes the requested position.
 | 
						|
        // Add following position(s) to the cache.
 | 
						|
        while (fBoundaries[fEndBufIdx] < position) {
 | 
						|
            if (!populateFollowing()) {
 | 
						|
                UPRV_UNREACHABLE;
 | 
						|
            }
 | 
						|
        }
 | 
						|
        fBufIdx = fEndBufIdx;                      // Set iterator position to the end of the buffer.
 | 
						|
        fTextIdx = fBoundaries[fBufIdx];           // Required because populateFollowing may add extra boundaries.
 | 
						|
        while (fTextIdx > position) {              // Move backwards to a position at or preceding the requested pos.
 | 
						|
            previous(status);
 | 
						|
        }
 | 
						|
        return true;
 | 
						|
    }
 | 
						|
 | 
						|
    if (fBoundaries[fStartBufIdx] > position) {
 | 
						|
        // The first position in the cache is beyond the requested position.
 | 
						|
        // back up more until we get a boundary <= the requested position.
 | 
						|
        while (fBoundaries[fStartBufIdx] > position) {
 | 
						|
            populatePreceding(status);
 | 
						|
        }
 | 
						|
        fBufIdx = fStartBufIdx;                    // Set iterator position to the start of the buffer.
 | 
						|
        fTextIdx = fBoundaries[fBufIdx];           // Required because populatePreceding may add extra boundaries.
 | 
						|
        while (fTextIdx < position) {              // Move forwards to a position at or following the requested pos.
 | 
						|
            next();
 | 
						|
        }
 | 
						|
        if (fTextIdx > position) {
 | 
						|
            // If position is not itself a boundary, the next() loop above will overshoot.
 | 
						|
            // Back up one, leaving cache position at the boundary preceding the requested position.
 | 
						|
            previous(status);
 | 
						|
        }
 | 
						|
        return true;
 | 
						|
    }
 | 
						|
 | 
						|
    U_ASSERT(fTextIdx == position);
 | 
						|
    return true;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
 | 
						|
UBool RuleBasedBreakIterator::BreakCache::populateFollowing() {
 | 
						|
    int32_t fromPosition = fBoundaries[fEndBufIdx];
 | 
						|
    int32_t fromRuleStatusIdx = fStatuses[fEndBufIdx];
 | 
						|
    int32_t pos = 0;
 | 
						|
    int32_t ruleStatusIdx = 0;
 | 
						|
 | 
						|
    if (fBI->fDictionaryCache->following(fromPosition, &pos, &ruleStatusIdx)) {
 | 
						|
        addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
 | 
						|
        return TRUE;
 | 
						|
    }
 | 
						|
 | 
						|
    fBI->fPosition = fromPosition;
 | 
						|
    pos = fBI->handleNext();
 | 
						|
    if (pos == UBRK_DONE) {
 | 
						|
        return FALSE;
 | 
						|
    }
 | 
						|
 | 
						|
    ruleStatusIdx = fBI->fRuleStatusIndex;
 | 
						|
    if (fBI->fDictionaryCharCount > 0) {
 | 
						|
        // The text segment obtained from the rules includes dictionary characters.
 | 
						|
        // Subdivide it, with subdivided results going into the dictionary cache.
 | 
						|
        fBI->fDictionaryCache->populateDictionary(fromPosition, pos, fromRuleStatusIdx, ruleStatusIdx);
 | 
						|
        if (fBI->fDictionaryCache->following(fromPosition, &pos, &ruleStatusIdx)) {
 | 
						|
            addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
 | 
						|
            return TRUE;
 | 
						|
            // TODO: may want to move a sizable chunk of dictionary cache to break cache at this point.
 | 
						|
            //       But be careful with interactions with populateNear().
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    // Rule based segment did not include dictionary characters.
 | 
						|
    // Or, it did contain dictionary chars, but the dictionary segmenter didn't handle them,
 | 
						|
    //    meaning that we didn't take the return, above.
 | 
						|
    // Add its end point to the cache.
 | 
						|
    addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
 | 
						|
 | 
						|
    // Add several non-dictionary boundaries at this point, to optimize straight forward iteration.
 | 
						|
    //    (subsequent calls to BreakIterator::next() will take the fast path, getting cached results.
 | 
						|
    //
 | 
						|
    for (int count=0; count<6; ++count) {
 | 
						|
        pos = fBI->handleNext();
 | 
						|
        if (pos == UBRK_DONE || fBI->fDictionaryCharCount > 0) {
 | 
						|
            break;
 | 
						|
        }
 | 
						|
        addFollowing(pos, fBI->fRuleStatusIndex, RetainCachePosition);
 | 
						|
    }
 | 
						|
 | 
						|
    return TRUE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
UBool RuleBasedBreakIterator::BreakCache::populatePreceding(UErrorCode &status) {
 | 
						|
    if (U_FAILURE(status)) {
 | 
						|
        return FALSE;
 | 
						|
    }
 | 
						|
 | 
						|
    int32_t fromPosition = fBoundaries[fStartBufIdx];
 | 
						|
    if (fromPosition == 0) {
 | 
						|
        return FALSE;
 | 
						|
    }
 | 
						|
 | 
						|
    int32_t position = 0;
 | 
						|
    int32_t positionStatusIdx = 0;
 | 
						|
 | 
						|
    if (fBI->fDictionaryCache->preceding(fromPosition, &position, &positionStatusIdx)) {
 | 
						|
        addPreceding(position, positionStatusIdx, UpdateCachePosition);
 | 
						|
        return TRUE;
 | 
						|
    }
 | 
						|
 | 
						|
    int32_t backupPosition = fromPosition;
 | 
						|
 | 
						|
    // Find a boundary somewhere preceding the first already-cached boundary
 | 
						|
    do {
 | 
						|
        backupPosition = backupPosition - 30;
 | 
						|
        if (backupPosition <= 0) {
 | 
						|
            backupPosition = 0;
 | 
						|
        } else {
 | 
						|
            backupPosition = fBI->handleSafePrevious(backupPosition);
 | 
						|
        }
 | 
						|
        if (backupPosition == UBRK_DONE || backupPosition == 0) {
 | 
						|
            position = 0;
 | 
						|
            positionStatusIdx = 0;
 | 
						|
        } else {
 | 
						|
            // Advance to the boundary following the backup position.
 | 
						|
            // There is a complication: the safe reverse rules identify pairs of code points
 | 
						|
            // that are safe. If advancing from the safe point moves forwards by less than
 | 
						|
            // two code points, we need to advance one more time to ensure that the boundary
 | 
						|
            // is good, including a correct rules status value.
 | 
						|
            //
 | 
						|
            fBI->fPosition = backupPosition;
 | 
						|
            position = fBI->handleNext();
 | 
						|
            if (position <= backupPosition + 4) {
 | 
						|
                // +4 is a quick test for possibly having advanced only one codepoint.
 | 
						|
                // Four being the length of the longest potential code point, a supplementary in UTF-8
 | 
						|
                utext_setNativeIndex(&fBI->fText, position);
 | 
						|
                if (backupPosition == utext_getPreviousNativeIndex(&fBI->fText)) {
 | 
						|
                    // The initial handleNext() only advanced by a single code point. Go again.
 | 
						|
                    position = fBI->handleNext();   // Safe rules identify safe pairs.
 | 
						|
                }
 | 
						|
            }
 | 
						|
            positionStatusIdx = fBI->fRuleStatusIndex;
 | 
						|
        }
 | 
						|
    } while (position >= fromPosition);
 | 
						|
 | 
						|
    // Find boundaries between the one we just located and the first already-cached boundary
 | 
						|
    // Put them in a side buffer, because we don't yet know where they will fall in the circular cache buffer..
 | 
						|
 | 
						|
    fSideBuffer.removeAllElements();
 | 
						|
    fSideBuffer.addElement(position, status);
 | 
						|
    fSideBuffer.addElement(positionStatusIdx, status);
 | 
						|
 | 
						|
    do {
 | 
						|
        int32_t prevPosition = fBI->fPosition = position;
 | 
						|
        int32_t prevStatusIdx = positionStatusIdx;
 | 
						|
        position = fBI->handleNext();
 | 
						|
        positionStatusIdx = fBI->fRuleStatusIndex;
 | 
						|
        if (position == UBRK_DONE) {
 | 
						|
            break;
 | 
						|
        }
 | 
						|
 | 
						|
        UBool segmentHandledByDictionary = FALSE;
 | 
						|
        if (fBI->fDictionaryCharCount != 0) {
 | 
						|
            // Segment from the rules includes dictionary characters.
 | 
						|
            // Subdivide it, with subdivided results going into the dictionary cache.
 | 
						|
            int32_t dictSegEndPosition = position;
 | 
						|
            fBI->fDictionaryCache->populateDictionary(prevPosition, dictSegEndPosition, prevStatusIdx, positionStatusIdx);
 | 
						|
            while (fBI->fDictionaryCache->following(prevPosition, &position, &positionStatusIdx)) {
 | 
						|
                segmentHandledByDictionary = true;
 | 
						|
                U_ASSERT(position > prevPosition);
 | 
						|
                if (position >= fromPosition) {
 | 
						|
                    break;
 | 
						|
                }
 | 
						|
                U_ASSERT(position <= dictSegEndPosition);
 | 
						|
                fSideBuffer.addElement(position, status);
 | 
						|
                fSideBuffer.addElement(positionStatusIdx, status);
 | 
						|
                prevPosition = position;
 | 
						|
            }
 | 
						|
            U_ASSERT(position==dictSegEndPosition || position>=fromPosition);
 | 
						|
        }
 | 
						|
 | 
						|
        if (!segmentHandledByDictionary && position < fromPosition) {
 | 
						|
            fSideBuffer.addElement(position, status);
 | 
						|
            fSideBuffer.addElement(positionStatusIdx, status);
 | 
						|
        }
 | 
						|
    } while (position < fromPosition);
 | 
						|
 | 
						|
    // Move boundaries from the side buffer to the main circular buffer.
 | 
						|
    UBool success = FALSE;
 | 
						|
    if (!fSideBuffer.isEmpty()) {
 | 
						|
        positionStatusIdx = fSideBuffer.popi();
 | 
						|
        position = fSideBuffer.popi();
 | 
						|
        addPreceding(position, positionStatusIdx, UpdateCachePosition);
 | 
						|
        success = TRUE;
 | 
						|
    }
 | 
						|
 | 
						|
    while (!fSideBuffer.isEmpty()) {
 | 
						|
        positionStatusIdx = fSideBuffer.popi();
 | 
						|
        position = fSideBuffer.popi();
 | 
						|
        if (!addPreceding(position, positionStatusIdx, RetainCachePosition)) {
 | 
						|
            // No space in circular buffer to hold a new preceding result while
 | 
						|
            // also retaining the current cache (iteration) position.
 | 
						|
            // Bailing out is safe; the cache will refill again if needed.
 | 
						|
            break;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    return success;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
void RuleBasedBreakIterator::BreakCache::addFollowing(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update) {
 | 
						|
    U_ASSERT(position > fBoundaries[fEndBufIdx]);
 | 
						|
    U_ASSERT(ruleStatusIdx <= UINT16_MAX);
 | 
						|
    int32_t nextIdx = modChunkSize(fEndBufIdx + 1);
 | 
						|
    if (nextIdx == fStartBufIdx) {
 | 
						|
        fStartBufIdx = modChunkSize(fStartBufIdx + 6);    // TODO: experiment. Probably revert to 1.
 | 
						|
    }
 | 
						|
    fBoundaries[nextIdx] = position;
 | 
						|
    fStatuses[nextIdx] = static_cast<uint16_t>(ruleStatusIdx);
 | 
						|
    fEndBufIdx = nextIdx;
 | 
						|
    if (update == UpdateCachePosition) {
 | 
						|
        // Set current position to the newly added boundary.
 | 
						|
        fBufIdx = nextIdx;
 | 
						|
        fTextIdx = position;
 | 
						|
    } else {
 | 
						|
        // Retaining the original cache position.
 | 
						|
        // Check if the added boundary wraps around the buffer, and would over-write the original position.
 | 
						|
        // It's the responsibility of callers of this function to not add too many.
 | 
						|
        U_ASSERT(nextIdx != fBufIdx);
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
bool RuleBasedBreakIterator::BreakCache::addPreceding(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update) {
 | 
						|
    U_ASSERT(position < fBoundaries[fStartBufIdx]);
 | 
						|
    U_ASSERT(ruleStatusIdx <= UINT16_MAX);
 | 
						|
    int32_t nextIdx = modChunkSize(fStartBufIdx - 1);
 | 
						|
    if (nextIdx == fEndBufIdx) {
 | 
						|
        if (fBufIdx == fEndBufIdx && update == RetainCachePosition) {
 | 
						|
            // Failure. The insertion of the new boundary would claim the buffer position that is the
 | 
						|
            // current iteration position. And we also want to retain the current iteration position.
 | 
						|
            // (The buffer is already completely full of entries that precede the iteration position.)
 | 
						|
            return false;
 | 
						|
        }
 | 
						|
        fEndBufIdx = modChunkSize(fEndBufIdx - 1);
 | 
						|
    }
 | 
						|
    fBoundaries[nextIdx] = position;
 | 
						|
    fStatuses[nextIdx] = static_cast<uint16_t>(ruleStatusIdx);
 | 
						|
    fStartBufIdx = nextIdx;
 | 
						|
    if (update == UpdateCachePosition) {
 | 
						|
        fBufIdx = nextIdx;
 | 
						|
        fTextIdx = position;
 | 
						|
    }
 | 
						|
    return true;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
void RuleBasedBreakIterator::BreakCache::dumpCache() {
 | 
						|
#ifdef RBBI_DEBUG
 | 
						|
    RBBIDebugPrintf("fTextIdx:%d   fBufIdx:%d\n", fTextIdx, fBufIdx);
 | 
						|
    for (int32_t i=fStartBufIdx; ; i=modChunkSize(i+1)) {
 | 
						|
        RBBIDebugPrintf("%d  %d\n", i, fBoundaries[i]);
 | 
						|
        if (i == fEndBufIdx) {
 | 
						|
            break;
 | 
						|
        }
 | 
						|
    }
 | 
						|
#endif
 | 
						|
}
 | 
						|
 | 
						|
U_NAMESPACE_END
 | 
						|
 | 
						|
#endif // #if !UCONFIG_NO_BREAK_ITERATION
 |