mirror of
				https://github.com/torvalds/linux.git
				synced 2025-10-31 16:48:26 +02:00 
			
		
		
		
	 0d5fd96880
			
		
	
	
		0d5fd96880
		
	
	
	
	
		
			
			The undefined logic is complex and has lots of magic on it. Implement it, using the same algorithm we have at get_abi.pl. Yet, some tweaks to optimize performance and to make the code simpler were added here: - at the perl version, the tree graph had loops, so we had to use BFS to traverse it. On this version, the graph is a tree, so, it simplifies the what group for sysfs aliases; - the logic which splits regular expressions into subgroups was re-written to make it faster; - it may optionally use multiple processes to search for symbol matches; - it has some additional debug levels. Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org> Signed-off-by: Jonathan Corbet <corbet@lwn.net> Link: https://lore.kernel.org/r/1529c255845d117696d5af57d8dc05554663afdf.1739182025.git.mchehab+huawei@kernel.org
		
			
				
	
	
		
			234 lines
		
	
	
	
		
			7.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			234 lines
		
	
	
	
		
			7.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| #!/usr/bin/env python3
 | |
| # xxpylint: disable=R0903
 | |
| # Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
 | |
| # SPDX-License-Identifier: GPL-2.0
 | |
| 
 | |
| """
 | |
| Convert ABI what into regular expressions
 | |
| """
 | |
| 
 | |
| import re
 | |
| import sys
 | |
| 
 | |
| from pprint import pformat
 | |
| 
 | |
| from abi_parser import AbiParser
 | |
| from helpers import AbiDebug
 | |
| 
 | |
| class AbiRegex(AbiParser):
 | |
|     """Extends AbiParser to search ABI nodes with regular expressions"""
 | |
| 
 | |
|     # Escape only ASCII visible characters
 | |
|     escape_symbols = r"([\x21-\x29\x2b-\x2d\x3a-\x40\x5c\x60\x7b-\x7e])"
 | |
|     leave_others = "others"
 | |
| 
 | |
|     # Tuples with regular expressions to be compiled and replacement data
 | |
|     re_whats = [
 | |
|         # Drop escape characters that might exist
 | |
|         (re.compile("\\\\"), ""),
 | |
| 
 | |
|         # Temporarily escape dot characters
 | |
|         (re.compile(r"\."),  "\xf6"),
 | |
| 
 | |
|         # Temporarily change [0-9]+ type of patterns
 | |
|         (re.compile(r"\[0\-9\]\+"),  "\xff"),
 | |
| 
 | |
|         # Temporarily change [\d+-\d+] type of patterns
 | |
|         (re.compile(r"\[0\-\d+\]"),  "\xff"),
 | |
|         (re.compile(r"\[0:\d+\]"),  "\xff"),
 | |
|         (re.compile(r"\[(\d+)\]"),  "\xf4\\\\d+\xf5"),
 | |
| 
 | |
|         # Temporarily change [0-9] type of patterns
 | |
|         (re.compile(r"\[(\d)\-(\d)\]"),  "\xf4\1-\2\xf5"),
 | |
| 
 | |
|         # Handle multiple option patterns
 | |
|         (re.compile(r"[\{\<\[]([\w_]+)(?:[,|]+([\w_]+)){1,}[\}\>\]]"), r"(\1|\2)"),
 | |
| 
 | |
|         # Handle wildcards
 | |
|         (re.compile(r"([^\/])\*"), "\\1\\\\w\xf7"),
 | |
|         (re.compile(r"/\*/"), "/.*/"),
 | |
|         (re.compile(r"/\xf6\xf6\xf6"), "/.*"),
 | |
|         (re.compile(r"\<[^\>]+\>"), "\\\\w\xf7"),
 | |
|         (re.compile(r"\{[^\}]+\}"), "\\\\w\xf7"),
 | |
|         (re.compile(r"\[[^\]]+\]"), "\\\\w\xf7"),
 | |
| 
 | |
|         (re.compile(r"XX+"), "\\\\w\xf7"),
 | |
|         (re.compile(r"([^A-Z])[XYZ]([^A-Z])"), "\\1\\\\w\xf7\\2"),
 | |
|         (re.compile(r"([^A-Z])[XYZ]$"), "\\1\\\\w\xf7"),
 | |
|         (re.compile(r"_[AB]_"), "_\\\\w\xf7_"),
 | |
| 
 | |
|         # Recover [0-9] type of patterns
 | |
|         (re.compile(r"\xf4"), "["),
 | |
|         (re.compile(r"\xf5"),  "]"),
 | |
| 
 | |
|         # Remove duplicated spaces
 | |
|         (re.compile(r"\s+"), r" "),
 | |
| 
 | |
|         # Special case: drop comparison as in:
 | |
|         # What: foo = <something>
 | |
|         # (this happens on a few IIO definitions)
 | |
|         (re.compile(r"\s*\=.*$"), ""),
 | |
| 
 | |
|         # Escape all other symbols
 | |
|         (re.compile(escape_symbols), r"\\\1"),
 | |
|         (re.compile(r"\\\\"), r"\\"),
 | |
|         (re.compile(r"\\([\[\]\(\)\|])"), r"\1"),
 | |
|         (re.compile(r"(\d+)\\(-\d+)"), r"\1\2"),
 | |
| 
 | |
|         (re.compile(r"\xff"), r"\\d+"),
 | |
| 
 | |
|         # Special case: IIO ABI which a parenthesis.
 | |
|         (re.compile(r"sqrt(.*)"), r"sqrt(.*)"),
 | |
| 
 | |
|         # Simplify regexes with multiple .*
 | |
|         (re.compile(r"(?:\.\*){2,}"),  ""),
 | |
| 
 | |
|         # Recover dot characters
 | |
|         (re.compile(r"\xf6"), "\\."),
 | |
|         # Recover plus characters
 | |
|         (re.compile(r"\xf7"), "+"),
 | |
|     ]
 | |
|     re_has_num = re.compile(r"\\d")
 | |
| 
 | |
|     # Symbol name after escape_chars that are considered a devnode basename
 | |
|     re_symbol_name =  re.compile(r"(\w|\\[\.\-\:])+$")
 | |
| 
 | |
|     # List of popular group names to be skipped to minimize regex group size
 | |
|     # Use AbiDebug.SUBGROUP_SIZE to detect those
 | |
|     skip_names = set(["devices", "hwmon"])
 | |
| 
 | |
|     def regex_append(self, what, new):
 | |
|         """
 | |
|         Get a search group for a subset of regular expressions.
 | |
| 
 | |
|         As ABI may have thousands of symbols, using a for to search all
 | |
|         regular expressions is at least O(n^2). When there are wildcards,
 | |
|         the complexity increases substantially, eventually becoming exponential.
 | |
| 
 | |
|         To avoid spending too much time on them, use a logic to split
 | |
|         them into groups. The smaller the group, the better, as it would
 | |
|         mean that searches will be confined to a small number of regular
 | |
|         expressions.
 | |
| 
 | |
|         The conversion to a regex subset is tricky, as we need something
 | |
|         that can be easily obtained from the sysfs symbol and from the
 | |
|         regular expression. So, we need to discard nodes that have
 | |
|         wildcards.
 | |
| 
 | |
|         If it can't obtain a subgroup, place the regular expression inside
 | |
|         a special group (self.leave_others).
 | |
|         """
 | |
| 
 | |
|         search_group = None
 | |
| 
 | |
|         for search_group in reversed(new.split("/")):
 | |
|             if not search_group or search_group in self.skip_names:
 | |
|                 continue
 | |
|             if self.re_symbol_name.match(search_group):
 | |
|                 break
 | |
| 
 | |
|         if not search_group:
 | |
|             search_group = self.leave_others
 | |
| 
 | |
|         if self.debug & AbiDebug.SUBGROUP_MAP:
 | |
|             self.log.debug("%s: mapped as %s", what, search_group)
 | |
| 
 | |
|         try:
 | |
|             if search_group not in self.regex_group:
 | |
|                 self.regex_group[search_group] = []
 | |
| 
 | |
|             self.regex_group[search_group].append(re.compile(new))
 | |
|             if self.search_string:
 | |
|                 if what.find(self.search_string) >= 0:
 | |
|                     print(f"What: {what}")
 | |
|         except re.PatternError:
 | |
|             self.log.warning("Ignoring '%s' as it produced an invalid regex:\n"
 | |
|                              "           '%s'", what, new)
 | |
| 
 | |
|     def get_regexes(self, what):
 | |
|         """
 | |
|         Given an ABI devnode, return a list of all regular expressions that
 | |
|         may match it, based on the sub-groups created by regex_append()
 | |
|         """
 | |
| 
 | |
|         re_list = []
 | |
| 
 | |
|         patches = what.split("/")
 | |
|         patches.reverse()
 | |
|         patches.append(self.leave_others)
 | |
| 
 | |
|         for search_group in patches:
 | |
|             if search_group in self.regex_group:
 | |
|                 re_list += self.regex_group[search_group]
 | |
| 
 | |
|         return re_list
 | |
| 
 | |
|     def __init__(self, *args, **kwargs):
 | |
|         """
 | |
|         Override init method to get verbose argument
 | |
|         """
 | |
| 
 | |
|         self.regex_group = None
 | |
|         self.search_string = None
 | |
|         self.re_string = None
 | |
| 
 | |
|         if "search_string" in kwargs:
 | |
|             self.search_string = kwargs.get("search_string")
 | |
|             del kwargs["search_string"]
 | |
| 
 | |
|             if self.search_string:
 | |
| 
 | |
|                 try:
 | |
|                     self.re_string = re.compile(self.search_string)
 | |
|                 except re.PatternError as e:
 | |
|                     msg = f"{self.search_string} is not a valid regular expression"
 | |
|                     raise ValueError(msg) from e
 | |
| 
 | |
|         super().__init__(*args, **kwargs)
 | |
| 
 | |
|     def parse_abi(self, *args, **kwargs):
 | |
| 
 | |
|         super().parse_abi(*args, **kwargs)
 | |
| 
 | |
|         self.regex_group = {}
 | |
| 
 | |
|         print("Converting ABI What fields into regexes...", file=sys.stderr)
 | |
| 
 | |
|         for t in sorted(self.data.items(), key=lambda x: x[0]):
 | |
|             v = t[1]
 | |
|             if v.get("type") == "File":
 | |
|                 continue
 | |
| 
 | |
|             v["regex"] = []
 | |
| 
 | |
|             for what in v.get("what", []):
 | |
|                 if not what.startswith("/sys"):
 | |
|                     continue
 | |
| 
 | |
|                 new = what
 | |
|                 for r, s in self.re_whats:
 | |
|                     try:
 | |
|                         new = r.sub(s, new)
 | |
|                     except re.PatternError as e:
 | |
|                         # Help debugging troubles with new regexes
 | |
|                         raise re.PatternError(f"{e}\nwhile re.sub('{r.pattern}', {s}, str)") from e
 | |
| 
 | |
|                 v["regex"].append(new)
 | |
| 
 | |
|                 if self.debug & AbiDebug.REGEX:
 | |
|                     self.log.debug("%-90s <== %s", new, what)
 | |
| 
 | |
|                 # Store regex into a subgroup to speedup searches
 | |
|                 self.regex_append(what, new)
 | |
| 
 | |
|         if self.debug & AbiDebug.SUBGROUP_DICT:
 | |
|             self.log.debug("%s", pformat(self.regex_group))
 | |
| 
 | |
|         if self.debug & AbiDebug.SUBGROUP_SIZE:
 | |
|             biggestd_keys = sorted(self.regex_group.keys(),
 | |
|                                    key= lambda k: len(self.regex_group[k]),
 | |
|                                    reverse=True)
 | |
| 
 | |
|             print("Top regex subgroups:", file=sys.stderr)
 | |
|             for k in biggestd_keys[:10]:
 | |
|                 print(f"{k} has {len(self.regex_group[k])} elements", file=sys.stderr)
 |