forked from mirrors/gecko-dev
80 lines
2.2 KiB
Python
80 lines
2.2 KiB
Python
"""
|
|
Generate a table of unique items.
|
|
|
|
The `UniqueTable` class collects items into an array, removing duplicates. Each
|
|
item is mapped to its offset in the final array.
|
|
|
|
This is a compression technique for compile-time generated tables.
|
|
"""
|
|
|
|
try:
|
|
from typing import Any, List, Dict, Tuple, Sequence # noqa
|
|
except ImportError:
|
|
pass
|
|
|
|
|
|
class UniqueTable:
|
|
"""
|
|
Collect items into the `table` list, removing duplicates.
|
|
"""
|
|
def __init__(self):
|
|
# type: () -> None
|
|
# List of items added in order.
|
|
self.table = list() # type: List[Any]
|
|
# Map item -> index.
|
|
self.index = dict() # type: Dict[Any, int]
|
|
|
|
def add(self, item):
|
|
# type: (Any) -> int
|
|
"""
|
|
Add a single item to the table if it isn't already there.
|
|
|
|
Return the offset into `self.table` of the item.
|
|
"""
|
|
if item in self.index:
|
|
return self.index[item]
|
|
|
|
idx = len(self.table)
|
|
self.index[item] = idx
|
|
self.table.append(item)
|
|
return idx
|
|
|
|
|
|
class UniqueSeqTable:
|
|
"""
|
|
Collect sequences into the `table` list, removing duplicates.
|
|
|
|
Sequences don't have to be of the same length.
|
|
"""
|
|
def __init__(self):
|
|
# type: () -> None
|
|
self.table = list() # type: List[Any]
|
|
# Map seq -> index.
|
|
self.index = dict() # type: Dict[Tuple[Any, ...], int]
|
|
|
|
def add(self, seq):
|
|
# type: (Sequence[Any]) -> int
|
|
"""
|
|
Add a sequence of items to the table. If the table already contains the
|
|
items in `seq` in the same order, use those instead.
|
|
|
|
Return the offset into `self.table` of the beginning of `seq`.
|
|
"""
|
|
if len(seq) == 0:
|
|
return 0
|
|
tseq = tuple(seq)
|
|
if tseq in self.index:
|
|
return self.index[tseq]
|
|
|
|
idx = len(self.table)
|
|
self.table.extend(tseq)
|
|
|
|
# Add seq and all sub-sequences to `index`.
|
|
index = self.index # type: Dict[Tuple[Any, ...], int]
|
|
assert index is not None
|
|
for length in range(1, len(tseq) + 1):
|
|
for offset in range(len(tseq) - length + 1):
|
|
key = tseq[offset:offset+length]
|
|
index[key] = idx + offset
|
|
|
|
return idx
|