forked from mirrors/gecko-dev
		
	
		
			
				
	
	
		
			283 lines
		
	
	
	
		
			8.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			283 lines
		
	
	
	
		
			8.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| from typing import Callable, Union
 | |
| from typing_extensions import Literal
 | |
| from .errors import ParseError
 | |
| 
 | |
| 
 | |
| class ParserStream:
 | |
|     def __init__(self, string: str):
 | |
|         self.string = string
 | |
|         self.index = 0
 | |
|         self.peek_offset = 0
 | |
| 
 | |
|     def get(self, offset: int) -> Union[str, None]:
 | |
|         try:
 | |
|             return self.string[offset]
 | |
|         except IndexError:
 | |
|             return None
 | |
| 
 | |
|     def char_at(self, offset: int) -> Union[str, None]:
 | |
|         # When the cursor is at CRLF, return LF but don't move the cursor. The
 | |
|         # cursor still points to the EOL position, which in this case is the
 | |
|         # beginning of the compound CRLF sequence. This ensures slices of
 | |
|         # [inclusive, exclusive) continue to work properly.
 | |
|         if self.get(offset) == '\r' \
 | |
|                 and self.get(offset + 1) == '\n':
 | |
|             return '\n'
 | |
| 
 | |
|         return self.get(offset)
 | |
| 
 | |
|     @property
 | |
|     def current_char(self) -> Union[str, None]:
 | |
|         return self.char_at(self.index)
 | |
| 
 | |
|     @property
 | |
|     def current_peek(self) -> Union[str, None]:
 | |
|         return self.char_at(self.index + self.peek_offset)
 | |
| 
 | |
|     def next(self) -> Union[str, None]:
 | |
|         self.peek_offset = 0
 | |
|         # Skip over CRLF as if it was a single character.
 | |
|         if self.get(self.index) == '\r' \
 | |
|                 and self.get(self.index + 1) == '\n':
 | |
|             self.index += 1
 | |
|         self.index += 1
 | |
|         return self.get(self.index)
 | |
| 
 | |
|     def peek(self) -> Union[str, None]:
 | |
|         # Skip over CRLF as if it was a single character.
 | |
|         if self.get(self.index + self.peek_offset) == '\r' \
 | |
|                 and self.get(self.index + self.peek_offset + 1) == '\n':
 | |
|             self.peek_offset += 1
 | |
|         self.peek_offset += 1
 | |
|         return self.get(self.index + self.peek_offset)
 | |
| 
 | |
|     def reset_peek(self, offset: int = 0) -> None:
 | |
|         self.peek_offset = offset
 | |
| 
 | |
|     def skip_to_peek(self) -> None:
 | |
|         self.index += self.peek_offset
 | |
|         self.peek_offset = 0
 | |
| 
 | |
| 
 | |
| EOL = '\n'
 | |
| EOF = None
 | |
| SPECIAL_LINE_START_CHARS = ('}', '.', '[', '*')
 | |
| 
 | |
| 
 | |
| class FluentParserStream(ParserStream):
 | |
| 
 | |
|     def peek_blank_inline(self) -> str:
 | |
|         start = self.index + self.peek_offset
 | |
|         while self.current_peek == ' ':
 | |
|             self.peek()
 | |
|         return self.string[start:self.index + self.peek_offset]
 | |
| 
 | |
|     def skip_blank_inline(self) -> str:
 | |
|         blank = self.peek_blank_inline()
 | |
|         self.skip_to_peek()
 | |
|         return blank
 | |
| 
 | |
|     def peek_blank_block(self) -> str:
 | |
|         blank = ""
 | |
|         while True:
 | |
|             line_start = self.peek_offset
 | |
|             self.peek_blank_inline()
 | |
| 
 | |
|             if self.current_peek == EOL:
 | |
|                 blank += EOL
 | |
|                 self.peek()
 | |
|                 continue
 | |
| 
 | |
|             if self.current_peek is EOF:
 | |
|                 # Treat the blank line at EOF as a blank block.
 | |
|                 return blank
 | |
| 
 | |
|             # Any other char; reset to column 1 on this line.
 | |
|             self.reset_peek(line_start)
 | |
|             return blank
 | |
| 
 | |
|     def skip_blank_block(self) -> str:
 | |
|         blank = self.peek_blank_block()
 | |
|         self.skip_to_peek()
 | |
|         return blank
 | |
| 
 | |
|     def peek_blank(self) -> None:
 | |
|         while self.current_peek in (" ", EOL):
 | |
|             self.peek()
 | |
| 
 | |
|     def skip_blank(self) -> None:
 | |
|         self.peek_blank()
 | |
|         self.skip_to_peek()
 | |
| 
 | |
|     def expect_char(self, ch: str) -> Literal[True]:
 | |
|         if self.current_char == ch:
 | |
|             self.next()
 | |
|             return True
 | |
| 
 | |
|         raise ParseError('E0003', ch)
 | |
| 
 | |
|     def expect_line_end(self) -> Literal[True]:
 | |
|         if self.current_char is EOF:
 | |
|             # EOF is a valid line end in Fluent.
 | |
|             return True
 | |
| 
 | |
|         if self.current_char == EOL:
 | |
|             self.next()
 | |
|             return True
 | |
| 
 | |
|         # Unicode Character 'SYMBOL FOR NEWLINE' (U+2424)
 | |
|         raise ParseError('E0003', '\u2424')
 | |
| 
 | |
|     def take_char(self, f: Callable[[str], bool]) -> Union[str, Literal[False], None]:
 | |
|         ch = self.current_char
 | |
|         if ch is None:
 | |
|             return EOF
 | |
|         if f(ch):
 | |
|             self.next()
 | |
|             return ch
 | |
|         return False
 | |
| 
 | |
|     def is_char_id_start(self, ch: Union[str, None]) -> bool:
 | |
|         if ch is None:
 | |
|             return False
 | |
| 
 | |
|         cc = ord(ch)
 | |
|         return (cc >= 97 and cc <= 122) or \
 | |
|                (cc >= 65 and cc <= 90)
 | |
| 
 | |
|     def is_identifier_start(self) -> bool:
 | |
|         return self.is_char_id_start(self.current_peek)
 | |
| 
 | |
|     def is_number_start(self) -> bool:
 | |
|         ch = self.peek() if self.current_char == '-' else self.current_char
 | |
|         if ch is None:
 | |
|             self.reset_peek()
 | |
|             return False
 | |
| 
 | |
|         cc = ord(ch)
 | |
|         is_digit = cc >= 48 and cc <= 57
 | |
|         self.reset_peek()
 | |
|         return is_digit
 | |
| 
 | |
|     def is_char_pattern_continuation(self, ch: Union[str, None]) -> bool:
 | |
|         if ch is EOF:
 | |
|             return False
 | |
| 
 | |
|         return ch not in SPECIAL_LINE_START_CHARS
 | |
| 
 | |
|     def is_value_start(self) -> bool:
 | |
|         # Inline Patterns may start with any char.
 | |
|         return self.current_peek is not EOF and self.current_peek != EOL
 | |
| 
 | |
|     def is_value_continuation(self) -> bool:
 | |
|         column1 = self.peek_offset
 | |
|         self.peek_blank_inline()
 | |
| 
 | |
|         if self.current_peek == '{':
 | |
|             self.reset_peek(column1)
 | |
|             return True
 | |
| 
 | |
|         if self.peek_offset - column1 == 0:
 | |
|             return False
 | |
| 
 | |
|         if self.is_char_pattern_continuation(self.current_peek):
 | |
|             self.reset_peek(column1)
 | |
|             return True
 | |
| 
 | |
|         return False
 | |
| 
 | |
|     # -1 - any
 | |
|     #  0 - comment
 | |
|     #  1 - group comment
 | |
|     #  2 - resource comment
 | |
|     def is_next_line_comment(self, level: int = -1) -> bool:
 | |
|         if self.current_peek != EOL:
 | |
|             return False
 | |
| 
 | |
|         i = 0
 | |
| 
 | |
|         while (i <= level or (level == -1 and i < 3)):
 | |
|             if self.peek() != '#':
 | |
|                 if i <= level and level != -1:
 | |
|                     self.reset_peek()
 | |
|                     return False
 | |
|                 break
 | |
|             i += 1
 | |
| 
 | |
|         # The first char after #, ## or ###.
 | |
|         if self.peek() in (' ', EOL):
 | |
|             self.reset_peek()
 | |
|             return True
 | |
| 
 | |
|         self.reset_peek()
 | |
|         return False
 | |
| 
 | |
|     def is_variant_start(self) -> bool:
 | |
|         current_peek_offset = self.peek_offset
 | |
|         if self.current_peek == '*':
 | |
|             self.peek()
 | |
|         if self.current_peek == '[' and self.peek() != '[':
 | |
|             self.reset_peek(current_peek_offset)
 | |
|             return True
 | |
| 
 | |
|         self.reset_peek(current_peek_offset)
 | |
|         return False
 | |
| 
 | |
|     def is_attribute_start(self) -> bool:
 | |
|         return self.current_peek == '.'
 | |
| 
 | |
|     def skip_to_next_entry_start(self, junk_start: int) -> None:
 | |
|         last_newline = self.string.rfind(EOL, 0, self.index)
 | |
|         if junk_start < last_newline:
 | |
|             # Last seen newline is _after_ the junk start. It's safe to rewind
 | |
|             # without the risk of resuming at the same broken entry.
 | |
|             self.index = last_newline
 | |
| 
 | |
|         while self.current_char:
 | |
|             # We're only interested in beginnings of line.
 | |
|             if self.current_char != EOL:
 | |
|                 self.next()
 | |
|                 continue
 | |
| 
 | |
|             # Break if the first char in this line looks like an entry start.
 | |
|             first = self.next()
 | |
|             if self.is_char_id_start(first) or first == '-' or first == '#':
 | |
|                 break
 | |
| 
 | |
|             # Syntax 0.4 compatibility
 | |
|             peek = self.peek()
 | |
|             self.reset_peek()
 | |
|             if (first, peek) == ('/', '/') or (first, peek) == ('[', '['):
 | |
|                 break
 | |
| 
 | |
|     def take_id_start(self) -> Union[str, None]:
 | |
|         if self.is_char_id_start(self.current_char):
 | |
|             ret = self.current_char
 | |
|             self.next()
 | |
|             return ret
 | |
| 
 | |
|         raise ParseError('E0004', 'a-zA-Z')
 | |
| 
 | |
|     def take_id_char(self) -> Union[str, Literal[False], None]:
 | |
|         def closure(ch: str) -> bool:
 | |
|             cc = ord(ch)
 | |
|             return ((cc >= 97 and cc <= 122) or
 | |
|                     (cc >= 65 and cc <= 90) or
 | |
|                     (cc >= 48 and cc <= 57) or
 | |
|                     cc == 95 or cc == 45)
 | |
|         return self.take_char(closure)
 | |
| 
 | |
|     def take_digit(self) -> Union[str, Literal[False], None]:
 | |
|         def closure(ch: str) -> bool:
 | |
|             cc = ord(ch)
 | |
|             return (cc >= 48 and cc <= 57)
 | |
|         return self.take_char(closure)
 | |
| 
 | |
|     def take_hex_digit(self) -> Union[str, Literal[False], None]:
 | |
|         def closure(ch: str) -> bool:
 | |
|             cc = ord(ch)
 | |
|             return (
 | |
|                 (cc >= 48 and cc <= 57)   # 0-9
 | |
|                 or (cc >= 65 and cc <= 70)  # A-F
 | |
|                 or (cc >= 97 and cc <= 102))  # a-f
 | |
|         return self.take_char(closure)
 | 
