forked from mirrors/gecko-dev
		
	
		
			
				
	
	
		
			162 lines
		
	
	
	
		
			4.3 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			162 lines
		
	
	
	
		
			4.3 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
// © 2016 and later: Unicode, Inc. and others.
 | 
						|
// License & terms of use: http://www.unicode.org/copyright.html
 | 
						|
/*
 | 
						|
**********************************************************************
 | 
						|
* Copyright (c) 2003-2011, International Business Machines
 | 
						|
* Corporation and others.  All Rights Reserved.
 | 
						|
**********************************************************************
 | 
						|
* Author: Alan Liu
 | 
						|
* Created: September 24 2003
 | 
						|
* Since: ICU 2.8
 | 
						|
**********************************************************************
 | 
						|
*/
 | 
						|
#include "ruleiter.h"
 | 
						|
#include "unicode/parsepos.h"
 | 
						|
#include "unicode/symtable.h"
 | 
						|
#include "unicode/unistr.h"
 | 
						|
#include "unicode/utf16.h"
 | 
						|
#include "patternprops.h"
 | 
						|
 | 
						|
/* \U87654321 or \ud800\udc00 */
 | 
						|
#define MAX_U_NOTATION_LEN 12
 | 
						|
 | 
						|
U_NAMESPACE_BEGIN
 | 
						|
 | 
						|
RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym,
 | 
						|
                      ParsePosition& thePos) :
 | 
						|
    text(theText),
 | 
						|
    pos(thePos),
 | 
						|
    sym(theSym),
 | 
						|
    buf(0),
 | 
						|
    bufPos(0)
 | 
						|
{}
 | 
						|
 | 
						|
UBool RuleCharacterIterator::atEnd() const {
 | 
						|
    return buf == 0 && pos.getIndex() == text.length();
 | 
						|
}
 | 
						|
 | 
						|
UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) {
 | 
						|
    if (U_FAILURE(ec)) return DONE;
 | 
						|
 | 
						|
    UChar32 c = DONE;
 | 
						|
    isEscaped = FALSE;
 | 
						|
 | 
						|
    for (;;) {
 | 
						|
        c = _current();
 | 
						|
        _advance(U16_LENGTH(c));
 | 
						|
 | 
						|
        if (c == SymbolTable::SYMBOL_REF && buf == 0 &&
 | 
						|
            (options & PARSE_VARIABLES) != 0 && sym != 0) {
 | 
						|
            UnicodeString name = sym->parseReference(text, pos, text.length());
 | 
						|
            // If name is empty there was an isolated SYMBOL_REF;
 | 
						|
            // return it.  Caller must be prepared for this.
 | 
						|
            if (name.length() == 0) {
 | 
						|
                break;
 | 
						|
            }
 | 
						|
            bufPos = 0;
 | 
						|
            buf = sym->lookup(name);
 | 
						|
            if (buf == 0) {
 | 
						|
                ec = U_UNDEFINED_VARIABLE;
 | 
						|
                return DONE;
 | 
						|
            }
 | 
						|
            // Handle empty variable value
 | 
						|
            if (buf->length() == 0) {
 | 
						|
                buf = 0;
 | 
						|
            }
 | 
						|
            continue;
 | 
						|
        }
 | 
						|
 | 
						|
        if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) {
 | 
						|
            continue;
 | 
						|
        }
 | 
						|
 | 
						|
        if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) {
 | 
						|
            UnicodeString tempEscape;
 | 
						|
            int32_t offset = 0;
 | 
						|
            c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset);
 | 
						|
            jumpahead(offset);
 | 
						|
            isEscaped = TRUE;
 | 
						|
            if (c < 0) {
 | 
						|
                ec = U_MALFORMED_UNICODE_ESCAPE;
 | 
						|
                return DONE;
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        break;
 | 
						|
    }
 | 
						|
 | 
						|
    return c;
 | 
						|
}
 | 
						|
 | 
						|
void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const {
 | 
						|
    p.buf = buf;
 | 
						|
    p.pos = pos.getIndex();
 | 
						|
    p.bufPos = bufPos;
 | 
						|
}
 | 
						|
 | 
						|
void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) {
 | 
						|
    buf = p.buf;
 | 
						|
    pos.setIndex(p.pos);
 | 
						|
    bufPos = p.bufPos;
 | 
						|
}
 | 
						|
 | 
						|
void RuleCharacterIterator::skipIgnored(int32_t options) {
 | 
						|
    if ((options & SKIP_WHITESPACE) != 0) {
 | 
						|
        for (;;) {
 | 
						|
            UChar32 a = _current();
 | 
						|
            if (!PatternProps::isWhiteSpace(a)) break;
 | 
						|
            _advance(U16_LENGTH(a));
 | 
						|
        }
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const {
 | 
						|
    if (maxLookAhead < 0) {
 | 
						|
        maxLookAhead = 0x7FFFFFFF;
 | 
						|
    }
 | 
						|
    if (buf != 0) {
 | 
						|
        buf->extract(bufPos, maxLookAhead, result);
 | 
						|
    } else {
 | 
						|
        text.extract(pos.getIndex(), maxLookAhead, result);
 | 
						|
    }
 | 
						|
    return result;
 | 
						|
}
 | 
						|
 | 
						|
void RuleCharacterIterator::jumpahead(int32_t count) {
 | 
						|
    _advance(count);
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const {
 | 
						|
    int32_t b = pos.getIndex();
 | 
						|
    text.extract(0, b, result);
 | 
						|
    return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index
 | 
						|
}
 | 
						|
*/
 | 
						|
 | 
						|
UChar32 RuleCharacterIterator::_current() const {
 | 
						|
    if (buf != 0) {
 | 
						|
        return buf->char32At(bufPos);
 | 
						|
    } else {
 | 
						|
        int i = pos.getIndex();
 | 
						|
        return (i < text.length()) ? text.char32At(i) : (UChar32)DONE;
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
void RuleCharacterIterator::_advance(int32_t count) {
 | 
						|
    if (buf != 0) {
 | 
						|
        bufPos += count;
 | 
						|
        if (bufPos == buf->length()) {
 | 
						|
            buf = 0;
 | 
						|
        }
 | 
						|
    } else {
 | 
						|
        pos.setIndex(pos.getIndex() + count);
 | 
						|
        if (pos.getIndex() > text.length()) {
 | 
						|
            pos.setIndex(text.length());
 | 
						|
        }
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
U_NAMESPACE_END
 | 
						|
 | 
						|
//eof
 |