mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	This patch adds the functionability to smartly identify origin commit of the translation by matching the following patterns in commit log: 1) update to commit HASH 2) Update the translation through commit HASH If no such pattern is found, script will obey the original workflow. Signed-off-by: Zhiyu Zhang <zhiyuzhang999@gmail.com> Reviewed-by: Dongliang Mu <dzm91@hust.edu.cn> Signed-off-by: Jonathan Corbet <corbet@lwn.net> Link: https://lore.kernel.org/r/20250713163418.1459-1-zhiyuzhang999@gmail.com
		
			
				
	
	
		
			307 lines
		
	
	
	
		
			10 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
			
		
		
	
	
			307 lines
		
	
	
	
		
			10 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
#!/usr/bin/env python3
 | 
						|
# SPDX-License-Identifier: GPL-2.0
 | 
						|
 | 
						|
"""
 | 
						|
This script helps track the translation status of the documentation
 | 
						|
in different locales, e.g., zh_CN. More specially, it uses `git log`
 | 
						|
commit to find the latest english commit from the translation commit
 | 
						|
(order by author date) and the latest english commits from HEAD. If
 | 
						|
differences occur, report the file and commits that need to be updated.
 | 
						|
 | 
						|
The usage is as follows:
 | 
						|
- ./scripts/checktransupdate.py -l zh_CN
 | 
						|
This will print all the files that need to be updated or translated in the zh_CN locale.
 | 
						|
- ./scripts/checktransupdate.py Documentation/translations/zh_CN/dev-tools/testing-overview.rst
 | 
						|
This will only print the status of the specified file.
 | 
						|
 | 
						|
The output is something like:
 | 
						|
Documentation/dev-tools/kfence.rst
 | 
						|
No translation in the locale of zh_CN
 | 
						|
 | 
						|
Documentation/translations/zh_CN/dev-tools/testing-overview.rst
 | 
						|
commit 42fb9cfd5b18 ("Documentation: dev-tools: Add link to RV docs")
 | 
						|
1 commits needs resolving in total
 | 
						|
"""
 | 
						|
 | 
						|
import os
 | 
						|
import re
 | 
						|
import time
 | 
						|
import logging
 | 
						|
from argparse import ArgumentParser, ArgumentTypeError, BooleanOptionalAction
 | 
						|
from datetime import datetime
 | 
						|
 | 
						|
 | 
						|
def get_origin_path(file_path):
 | 
						|
    """Get the origin path from the translation path"""
 | 
						|
    paths = file_path.split("/")
 | 
						|
    tidx = paths.index("translations")
 | 
						|
    opaths = paths[:tidx]
 | 
						|
    opaths += paths[tidx + 2 :]
 | 
						|
    return "/".join(opaths)
 | 
						|
 | 
						|
 | 
						|
def get_latest_commit_from(file_path, commit):
 | 
						|
    """Get the latest commit from the specified commit for the specified file"""
 | 
						|
    command = f"git log --pretty=format:%H%n%aD%n%cD%n%n%B {commit} -1 -- {file_path}"
 | 
						|
    logging.debug(command)
 | 
						|
    pipe = os.popen(command)
 | 
						|
    result = pipe.read()
 | 
						|
    result = result.split("\n")
 | 
						|
    if len(result) <= 1:
 | 
						|
        return None
 | 
						|
 | 
						|
    logging.debug("Result: %s", result[0])
 | 
						|
 | 
						|
    return {
 | 
						|
        "hash": result[0],
 | 
						|
        "author_date": datetime.strptime(result[1], "%a, %d %b %Y %H:%M:%S %z"),
 | 
						|
        "commit_date": datetime.strptime(result[2], "%a, %d %b %Y %H:%M:%S %z"),
 | 
						|
        "message": result[4:],
 | 
						|
    }
 | 
						|
 | 
						|
 | 
						|
def get_origin_from_trans(origin_path, t_from_head):
 | 
						|
    """Get the latest origin commit from the translation commit"""
 | 
						|
    o_from_t = get_latest_commit_from(origin_path, t_from_head["hash"])
 | 
						|
    while o_from_t is not None and o_from_t["author_date"] > t_from_head["author_date"]:
 | 
						|
        o_from_t = get_latest_commit_from(origin_path, o_from_t["hash"] + "^")
 | 
						|
    if o_from_t is not None:
 | 
						|
        logging.debug("tracked origin commit id: %s", o_from_t["hash"])
 | 
						|
    return o_from_t
 | 
						|
 | 
						|
 | 
						|
def get_origin_from_trans_smartly(origin_path, t_from_head):
 | 
						|
    """Get the latest origin commit from the formatted translation commit:
 | 
						|
    (1) update to commit HASH (TITLE)
 | 
						|
    (2) Update the translation through commit HASH (TITLE)
 | 
						|
    """
 | 
						|
    # catch flag for 12-bit commit hash
 | 
						|
    HASH = r'([0-9a-f]{12})'
 | 
						|
    # pattern 1: contains "update to commit HASH"
 | 
						|
    pat_update_to = re.compile(rf'update to commit {HASH}')
 | 
						|
    # pattern 2: contains "Update the translation through commit HASH"
 | 
						|
    pat_update_translation = re.compile(rf'Update the translation through commit {HASH}')
 | 
						|
 | 
						|
    origin_commit_hash = None
 | 
						|
    for line in t_from_head["message"]:
 | 
						|
        # check if the line matches the first pattern
 | 
						|
        match = pat_update_to.search(line)
 | 
						|
        if match:
 | 
						|
            origin_commit_hash = match.group(1)
 | 
						|
            break
 | 
						|
        # check if the line matches the second pattern
 | 
						|
        match = pat_update_translation.search(line)
 | 
						|
        if match:
 | 
						|
            origin_commit_hash = match.group(1)
 | 
						|
            break
 | 
						|
    if origin_commit_hash is None:
 | 
						|
        return None
 | 
						|
    o_from_t = get_latest_commit_from(origin_path, origin_commit_hash)
 | 
						|
    if o_from_t is not None:
 | 
						|
        logging.debug("tracked origin commit id: %s", o_from_t["hash"])
 | 
						|
    return o_from_t
 | 
						|
 | 
						|
 | 
						|
def get_commits_count_between(opath, commit1, commit2):
 | 
						|
    """Get the commits count between two commits for the specified file"""
 | 
						|
    command = f"git log --pretty=format:%H {commit1}...{commit2} -- {opath}"
 | 
						|
    logging.debug(command)
 | 
						|
    pipe = os.popen(command)
 | 
						|
    result = pipe.read().split("\n")
 | 
						|
    # filter out empty lines
 | 
						|
    result = list(filter(lambda x: x != "", result))
 | 
						|
    return result
 | 
						|
 | 
						|
 | 
						|
def pretty_output(commit):
 | 
						|
    """Pretty print the commit message"""
 | 
						|
    command = f"git log --pretty='format:%h (\"%s\")' -1 {commit}"
 | 
						|
    logging.debug(command)
 | 
						|
    pipe = os.popen(command)
 | 
						|
    return pipe.read()
 | 
						|
 | 
						|
 | 
						|
def valid_commit(commit):
 | 
						|
    """Check if the commit is valid or not"""
 | 
						|
    msg = pretty_output(commit)
 | 
						|
    return "Merge tag" not in msg
 | 
						|
 | 
						|
def check_per_file(file_path):
 | 
						|
    """Check the translation status for the specified file"""
 | 
						|
    opath = get_origin_path(file_path)
 | 
						|
 | 
						|
    if not os.path.isfile(opath):
 | 
						|
        logging.error("Cannot find the origin path for {file_path}")
 | 
						|
        return
 | 
						|
 | 
						|
    o_from_head = get_latest_commit_from(opath, "HEAD")
 | 
						|
    t_from_head = get_latest_commit_from(file_path, "HEAD")
 | 
						|
 | 
						|
    if o_from_head is None or t_from_head is None:
 | 
						|
        logging.error("Cannot find the latest commit for %s", file_path)
 | 
						|
        return
 | 
						|
 | 
						|
    o_from_t = get_origin_from_trans_smartly(opath, t_from_head)
 | 
						|
    # notice, o_from_t from get_*_smartly() is always more accurate than from get_*()
 | 
						|
    if o_from_t is None:
 | 
						|
        o_from_t = get_origin_from_trans(opath, t_from_head)
 | 
						|
 | 
						|
    if o_from_t is None:
 | 
						|
        logging.error("Error: Cannot find the latest origin commit for %s", file_path)
 | 
						|
        return
 | 
						|
 | 
						|
    if o_from_head["hash"] == o_from_t["hash"]:
 | 
						|
        logging.debug("No update needed for %s", file_path)
 | 
						|
    else:
 | 
						|
        logging.info(file_path)
 | 
						|
        commits = get_commits_count_between(
 | 
						|
            opath, o_from_t["hash"], o_from_head["hash"]
 | 
						|
        )
 | 
						|
        count = 0
 | 
						|
        for commit in commits:
 | 
						|
            if valid_commit(commit):
 | 
						|
                logging.info("commit %s", pretty_output(commit))
 | 
						|
                count += 1
 | 
						|
        logging.info("%d commits needs resolving in total\n", count)
 | 
						|
 | 
						|
 | 
						|
def valid_locales(locale):
 | 
						|
    """Check if the locale is valid or not"""
 | 
						|
    script_path = os.path.dirname(os.path.abspath(__file__))
 | 
						|
    linux_path = os.path.join(script_path, "..")
 | 
						|
    if not os.path.isdir(f"{linux_path}/Documentation/translations/{locale}"):
 | 
						|
        raise ArgumentTypeError("Invalid locale: {locale}")
 | 
						|
    return locale
 | 
						|
 | 
						|
 | 
						|
def list_files_with_excluding_folders(folder, exclude_folders, include_suffix):
 | 
						|
    """List all files with the specified suffix in the folder and its subfolders"""
 | 
						|
    files = []
 | 
						|
    stack = [folder]
 | 
						|
 | 
						|
    while stack:
 | 
						|
        pwd = stack.pop()
 | 
						|
        # filter out the exclude folders
 | 
						|
        if os.path.basename(pwd) in exclude_folders:
 | 
						|
            continue
 | 
						|
        # list all files and folders
 | 
						|
        for item in os.listdir(pwd):
 | 
						|
            ab_item = os.path.join(pwd, item)
 | 
						|
            if os.path.isdir(ab_item):
 | 
						|
                stack.append(ab_item)
 | 
						|
            else:
 | 
						|
                if ab_item.endswith(include_suffix):
 | 
						|
                    files.append(ab_item)
 | 
						|
 | 
						|
    return files
 | 
						|
 | 
						|
 | 
						|
class DmesgFormatter(logging.Formatter):
 | 
						|
    """Custom dmesg logging formatter"""
 | 
						|
    def format(self, record):
 | 
						|
        timestamp = time.time()
 | 
						|
        formatted_time = f"[{timestamp:>10.6f}]"
 | 
						|
        log_message = f"{formatted_time} {record.getMessage()}"
 | 
						|
        return log_message
 | 
						|
 | 
						|
 | 
						|
def config_logging(log_level, log_file="checktransupdate.log"):
 | 
						|
    """configure logging based on the log level"""
 | 
						|
    # set up the root logger
 | 
						|
    logger = logging.getLogger()
 | 
						|
    logger.setLevel(log_level)
 | 
						|
 | 
						|
    # Create console handler
 | 
						|
    console_handler = logging.StreamHandler()
 | 
						|
    console_handler.setLevel(log_level)
 | 
						|
 | 
						|
    # Create file handler
 | 
						|
    file_handler = logging.FileHandler(log_file)
 | 
						|
    file_handler.setLevel(log_level)
 | 
						|
 | 
						|
    # Create formatter and add it to the handlers
 | 
						|
    formatter = DmesgFormatter()
 | 
						|
    console_handler.setFormatter(formatter)
 | 
						|
    file_handler.setFormatter(formatter)
 | 
						|
 | 
						|
    # Add the handler to the logger
 | 
						|
    logger.addHandler(console_handler)
 | 
						|
    logger.addHandler(file_handler)
 | 
						|
 | 
						|
 | 
						|
def main():
 | 
						|
    """Main function of the script"""
 | 
						|
    script_path = os.path.dirname(os.path.abspath(__file__))
 | 
						|
    linux_path = os.path.join(script_path, "..")
 | 
						|
 | 
						|
    parser = ArgumentParser(description="Check the translation update")
 | 
						|
    parser.add_argument(
 | 
						|
        "-l",
 | 
						|
        "--locale",
 | 
						|
        default="zh_CN",
 | 
						|
        type=valid_locales,
 | 
						|
        help="Locale to check when files are not specified",
 | 
						|
    )
 | 
						|
 | 
						|
    parser.add_argument(
 | 
						|
        "--print-missing-translations",
 | 
						|
        action=BooleanOptionalAction,
 | 
						|
        default=True,
 | 
						|
        help="Print files that do not have translations",
 | 
						|
    )
 | 
						|
 | 
						|
    parser.add_argument(
 | 
						|
        '--log',
 | 
						|
        default='INFO',
 | 
						|
        choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
 | 
						|
        help='Set the logging level')
 | 
						|
 | 
						|
    parser.add_argument(
 | 
						|
        '--logfile',
 | 
						|
        default='checktransupdate.log',
 | 
						|
        help='Set the logging file (default: checktransupdate.log)')
 | 
						|
 | 
						|
    parser.add_argument(
 | 
						|
        "files", nargs="*", help="Files to check, if not specified, check all files"
 | 
						|
    )
 | 
						|
    args = parser.parse_args()
 | 
						|
 | 
						|
    # Configure logging based on the --log argument
 | 
						|
    log_level = getattr(logging, args.log.upper(), logging.INFO)
 | 
						|
    config_logging(log_level)
 | 
						|
 | 
						|
    # Get files related to linux path
 | 
						|
    files = args.files
 | 
						|
    if len(files) == 0:
 | 
						|
        offical_files = list_files_with_excluding_folders(
 | 
						|
            os.path.join(linux_path, "Documentation"), ["translations", "output"], "rst"
 | 
						|
        )
 | 
						|
 | 
						|
        for file in offical_files:
 | 
						|
            # split the path into parts
 | 
						|
            path_parts = file.split(os.sep)
 | 
						|
            # find the index of the "Documentation" directory
 | 
						|
            kindex = path_parts.index("Documentation")
 | 
						|
            # insert the translations and locale after the Documentation directory
 | 
						|
            new_path_parts = path_parts[:kindex + 1] + ["translations", args.locale] \
 | 
						|
                           + path_parts[kindex + 1 :]
 | 
						|
            # join the path parts back together
 | 
						|
            new_file = os.sep.join(new_path_parts)
 | 
						|
            if os.path.isfile(new_file):
 | 
						|
                files.append(new_file)
 | 
						|
            else:
 | 
						|
                if args.print_missing_translations:
 | 
						|
                    logging.info(os.path.relpath(os.path.abspath(file), linux_path))
 | 
						|
                    logging.info("No translation in the locale of %s\n", args.locale)
 | 
						|
 | 
						|
    files = list(map(lambda x: os.path.relpath(os.path.abspath(x), linux_path), files))
 | 
						|
 | 
						|
    # cd to linux root directory
 | 
						|
    os.chdir(linux_path)
 | 
						|
 | 
						|
    for file in files:
 | 
						|
        check_per_file(file)
 | 
						|
 | 
						|
 | 
						|
if __name__ == "__main__":
 | 
						|
    main()
 |