forked from mirrors/gecko-dev
		
	 02a7b4ebdf
			
		
	
	
		02a7b4ebdf
		
	
	
	
	
		
			
			Allow-list all Python code in tree for use with the black linter, and re-format all code in-tree accordingly. To produce this patch I did all of the following: 1. Make changes to tools/lint/black.yml to remove include: stanza and update list of source extensions. 2. Run ./mach lint --linter black --fix 3. Make some ad-hoc manual updates to python/mozbuild/mozbuild/test/configure/test_configure.py -- it has some hard-coded line numbers that the reformat breaks. 4. Make some ad-hoc manual updates to `testing/marionette/client/setup.py`, `testing/marionette/harness/setup.py`, and `testing/firefox-ui/harness/setup.py`, which have hard-coded regexes that break after the reformat. 5. Add a set of exclusions to black.yml. These will be deleted in a follow-up bug (1672023). # ignore-this-changeset Differential Revision: https://phabricator.services.mozilla.com/D94045
		
			
				
	
	
		
			152 lines
		
	
	
	
		
			4.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			152 lines
		
	
	
	
		
			4.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # This Source Code Form is subject to the terms of the Mozilla Public
 | |
| # License, v. 2.0. If a copy of the MPL was not distributed with this
 | |
| # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 | |
| 
 | |
| import codecs
 | |
| import encodings.idna
 | |
| import imp
 | |
| import os
 | |
| import re
 | |
| import sys
 | |
| from make_dafsa import words_to_cxx, words_to_bin
 | |
| 
 | |
| """
 | |
| Processes a file containing effective TLD data.  See the following URL for a
 | |
| description of effective TLDs and of the file format that this script
 | |
| processes (although for the latter you're better off just reading this file's
 | |
| short source code).
 | |
| 
 | |
| http://wiki.mozilla.org/Gecko:Effective_TLD_Service
 | |
| """
 | |
| 
 | |
| 
 | |
| def getEffectiveTLDs(path):
 | |
|     file = codecs.open(path, "r", "UTF-8")
 | |
|     entries = []
 | |
|     domains = set()
 | |
|     for line in file:
 | |
|         # line always contains a line terminator unless the file is empty
 | |
|         if len(line) == 0:
 | |
|             raise StopIteration
 | |
|         line = line.rstrip()
 | |
|         # comment, empty, or superfluous line for explicitness purposes
 | |
|         if line.startswith("//") or not line.strip():
 | |
|             continue
 | |
|         line = re.split(r"[ \t\n]", line, 1)[0]
 | |
|         entry = EffectiveTLDEntry(line)
 | |
|         domain = entry.domain()
 | |
|         assert domain not in domains, "repeating domain %s makes no sense" % domain
 | |
|         domains.add(domain)
 | |
|         yield entry
 | |
| 
 | |
| 
 | |
| def _normalizeHostname(domain):
 | |
|     """
 | |
|     Normalizes the given domain, component by component.  ASCII components are
 | |
|     lowercased, while non-ASCII components are processed using the ToASCII
 | |
|     algorithm.
 | |
|     """
 | |
| 
 | |
|     def convertLabel(label):
 | |
|         if _isASCII(label):
 | |
|             return label.lower()
 | |
|         return encodings.idna.ToASCII(label).decode("utf-8")
 | |
| 
 | |
|     return ".".join(map(convertLabel, domain.split(".")))
 | |
| 
 | |
| 
 | |
| def _isASCII(s):
 | |
|     "True if s consists entirely of ASCII characters, false otherwise."
 | |
|     for c in s:
 | |
|         if ord(c) > 127:
 | |
|             return False
 | |
|     return True
 | |
| 
 | |
| 
 | |
| class EffectiveTLDEntry:
 | |
|     """
 | |
|     Stores an entry in an effective-TLD name file.
 | |
|     """
 | |
| 
 | |
|     _exception = False
 | |
|     _wild = False
 | |
| 
 | |
|     def __init__(self, line):
 | |
|         """
 | |
|         Creates a TLD entry from a line of data, which must have been stripped of
 | |
|         the line ending.
 | |
|         """
 | |
|         if line.startswith("!"):
 | |
|             self._exception = True
 | |
|             domain = line[1:]
 | |
|         elif line.startswith("*."):
 | |
|             self._wild = True
 | |
|             domain = line[2:]
 | |
|         else:
 | |
|             domain = line
 | |
|         self._domain = _normalizeHostname(domain)
 | |
| 
 | |
|     def domain(self):
 | |
|         "The domain this represents."
 | |
|         return self._domain
 | |
| 
 | |
|     def exception(self):
 | |
|         "True if this entry's domain denotes does not denote an effective TLD."
 | |
|         return self._exception
 | |
| 
 | |
|     def wild(self):
 | |
|         "True if this entry represents a class of effective TLDs."
 | |
|         return self._wild
 | |
| 
 | |
| 
 | |
| #################
 | |
| # DO EVERYTHING #
 | |
| #################
 | |
| 
 | |
| 
 | |
| def main(output, effective_tld_filename, output_format="cxx"):
 | |
|     """
 | |
|     effective_tld_filename is the effective TLD file to parse.
 | |
|     based on the output format, either a C++ array of a binary representation
 | |
|     of a DAFSA representing the eTLD file is then printed to standard output
 | |
|     or a binary file is written to disk.
 | |
|     """
 | |
| 
 | |
|     def typeEnum(etld):
 | |
|         """
 | |
|         Maps the flags to the DAFSA's enum types.
 | |
|         """
 | |
|         if etld.exception():
 | |
|             return 1
 | |
|         elif etld.wild():
 | |
|             return 2
 | |
|         else:
 | |
|             return 0
 | |
| 
 | |
|     def dafsa_words():
 | |
|         """
 | |
|         make_dafsa expects lines of the form "<domain_name><enum_value>"
 | |
|         """
 | |
|         for etld in getEffectiveTLDs(effective_tld_filename):
 | |
|             yield "%s%d" % (etld.domain(), typeEnum(etld))
 | |
| 
 | |
|     """ words_to_bin() returns a bytes while words_to_cxx() returns string """
 | |
|     if output_format == "bin":
 | |
|         output.write(words_to_bin(dafsa_words()))
 | |
|     else:
 | |
|         output.write(words_to_cxx(dafsa_words()))
 | |
| 
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     """
 | |
|     This program can output the DAFSA in two formats:
 | |
|     as C++ code that will be included and compiled at build time
 | |
|     or as a binary file that will be published in Remote Settings.
 | |
| 
 | |
|     Flags for format options:
 | |
|     "cxx" -> C++ array [default]
 | |
|     "bin" -> Binary file
 | |
|     """
 | |
| 
 | |
|     output_format = "bin" if "--bin" in sys.argv else "cxx"
 | |
|     main(sys.stdout, sys.argv[1], output_format=output_format)
 |