Skip to content
Snippets Groups Projects
Commit 459f1275 authored by henry's avatar henry
Browse files

fixup! Bug 42305: Add script to combine translation files across versions.

Bug 43156: Add an option to also include strings from a legacy branch.

Also, instead of using the tagger date to find the highest version
branch, we use version ordering.
parent 217346c0
Branches
Tags
1 merge request!1231Translation CI: Also include strings from the legacy branch
......@@ -67,136 +67,227 @@ def git_lines(git_args: list[str]) -> list[str]:
return [line for line in git_text(git_args).split("\n") if line]
def git_file_paths(git_ref: str) -> list[str]:
"""Get the full list of file paths found under the given tree.
class BrowserBranch:
"""Represents a browser git branch."""
:param git_ref: The git reference for the tree to search.
:returns: The found file paths.
def __init__(self, branch_name: str, is_head: bool = False) -> None:
"""Create a new instance.
:param branch_name: The branch's git name.
:param is_head: Whether the branch matches "HEAD".
"""
return git_lines(["ls-tree", "-r", "--format=%(path)", git_ref])
version_match = re.match(
r"(?P<prefix>[a-z]+\-browser)\-"
r"(?P<firefox>[0-9]+(?:\.[0-9]+){1,2})esr\-"
r"(?P<browser>[0-9]+\.[05])\-"
r"(?P<number>[0-9]+)$",
branch_name,
)
if not version_match:
raise ValueError(f"Unable to parse the version from the ref {branch_name}")
self.name = branch_name
self.prefix = version_match.group("prefix")
self.browser_version = version_match.group("browser")
self._is_head = is_head
self._ref = "HEAD" if is_head else f"origin/{branch_name}"
firefox_nums = [int(n) for n in version_match.group("firefox").split(".")]
if len(firefox_nums) == 2:
firefox_nums.append(0)
browser_nums = [int(n) for n in self.browser_version.split(".")]
branch_number = int(version_match.group("number"))
# Prioritise the firefox ESR version, then the browser version then the
# branch number.
self._ordered = (
firefox_nums[0],
firefox_nums[1],
firefox_nums[2],
browser_nums[0],
browser_nums[1],
branch_number,
)
# Minor version for browser is only ever "0" or "5", so we can convert
# the version to an integer.
self._browser_int_version = int(2 * float(self.browser_version))
self._file_paths: list[str] | None = None
def release_below(self, other: "BrowserBranch", num: int) -> bool:
"""Determine whether another branch is within range of a previous
browser release.
def matching_path(search_paths: list[str], filename: str) -> str | None:
"""Get the matching file path with the given filename, if it exists.
The browser versions are expected to increment by "0.5", and a previous
release branch's version is expected to be `num * 0.5` behind the
current one.
:param search_paths: The file paths to search through.
:param filename: The file name to match.
:returns: The unique file path with the matching name, or None if no such
match was found.
:throws Exception: If multiple paths shared the same file name.
:param other: The branch to compare.
:param num: The number of "0.5" releases behind to test with.
"""
matching = [path for path in search_paths if os.path.basename(path) == filename]
if not matching:
return None
if len(matching) > 1:
raise Exception("Multiple occurrences of {filename}")
return matching[0]
return other._browser_int_version == self._browser_int_version - num
def __lt__(self, other: "BrowserBranch") -> bool:
return self._ordered < other._ordered
def __gt__(self, other: "BrowserBranch") -> bool:
return self._ordered > other._ordered
def git_file_content(git_ref: str, path: str | None) -> str | None:
"""Get the file content of the specified git blob object.
def get_file_content(self, filename: str) -> str | None:
"""Fetch the file content for the named file in this branch.
:param git_ref: The reference for the tree to find the file under.
:param path: The file path for the object, or None if there is no path.
:returns: The file content, or None if no path was given.
:param filename: The name of the file to fetch the content for.
:returns: The file content, or `None` if no file could be found.
"""
if path is None:
if self._file_paths is None:
if not self._is_head:
# Minimal fetch of non-HEAD branch to get the file paths.
# Individual file blobs will be downloaded as needed.
git_run(
["fetch", "--depth=1", "--filter=blob:none", "origin", self._ref]
)
self._file_paths = git_lines(
["ls-tree", "-r", "--format=%(path)", self._ref]
)
matching = [
path for path in self._file_paths if os.path.basename(path) == filename
]
if not matching:
return None
return git_text(["cat-file", "blob", f"{git_ref}:{path}"])
if len(matching) > 1:
raise Exception(f"Multiple occurrences of {filename}")
path = matching[0]
return git_text(["cat-file", "blob", f"{self._ref}:{path}"])
def get_stable_branch(branch_prefix: str) -> str:
def get_stable_branch(
compare_version: BrowserBranch,
) -> tuple[BrowserBranch, BrowserBranch | None]:
"""Find the most recent stable branch in the origin repository.
:param branch_prefix: The prefix that the stable branch should have.
:returns: The branch name.
:param compare_version: The development branch to compare against.
:returns: The stable and legacy branches. If no legacy branch is found,
`None` will be returned instead.
"""
tag_glob = f"{branch_prefix}-*-build1"
# We search for build1 tags. These are added *after* the rebase of browser
# commits, so the corresponding branch should contain our strings.
# Moreover, we *assume* that the branch with the most recent ESR version
# with such a tag will be used in the *next* stable build in
# tor-browser-build.
tag_glob = f"{compare_version.prefix}-*esr-*-*-build1"
# To speed up, only fetch the tags without blobs.
git_run(
["fetch", "--depth=1", "--filter=object:type=tag", "origin", "tag", tag_glob]
)
# Get most recent stable tag.
stable_branches = []
legacy_branches = []
stable_annotation_regex = re.compile(r"\bstable\b")
legacy_annotation_regex = re.compile(r"\blegacy\b")
for build_tag, annotation in (
line.split(" ", 1)
for line in git_lines(["tag", "-n1", "--list", tag_glob, "--sort=-taggerdate"])
line.split(" ", 1) for line in git_lines(["tag", "-n1", "--list", tag_glob])
):
if "stable" in annotation:
is_stable = bool(stable_annotation_regex.search(annotation))
is_legacy = bool(legacy_annotation_regex.search(annotation))
if not is_stable and not is_legacy:
continue
try:
# Branch name is the same as the tag, minus "-build1".
return re.sub(r"-build1$", "", build_tag)
raise Exception("No stable build1 tag found")
branch = BrowserBranch(re.sub(r"-build1$", "", build_tag))
except ValueError:
logger.warning(f"Could not read the version for {build_tag}")
continue
if branch.prefix != compare_version.prefix:
continue
if is_stable:
# Stable can be one release version behind.
# NOTE: In principle, when switching between versions there may be a
# window of time where the development branch has not yet progressed
# to the next "0.5" release, so has the same browser version as the
# stable branch. So we also allow for matching browser versions.
# NOTE:
# 1. The "Will be unused in" message will not make sense, but we do
# not expect string differences in this scenario.
# 2. We do not expect this scenario to last for long.
if not (
compare_version.release_below(branch, 1)
or compare_version.release_below(branch, 0)
):
continue
stable_branches.append(branch)
elif is_legacy:
# Legacy can be two release versions behind.
# We also allow for being just one version behind.
if not (
compare_version.release_below(branch, 2)
or compare_version.release_below(branch, 1)
):
continue
legacy_branches.append(branch)
def get_version_from_branch_name(branch_name: str) -> tuple[str, float]:
"""Get the branch prefix and version from its name.
if not stable_branches:
raise Exception("No stable build1 branch found")
:param branch_name: The branch to extract from.
:returns: The branch prefix and its version number.
"""
version_match = re.match(
r"([a-z-]+)-[^-]*-([0-9]+\.[05])-",
branch_name,
return (
# Return the stable branch with the highest version.
max(stable_branches),
max(legacy_branches) if legacy_branches else None,
)
if not version_match:
raise ValueError(f"Unable to parse the version from the branch {branch_name}")
return (version_match.group(1), float(version_match.group(2)))
branch_prefix, current_version = get_version_from_branch_name(args.current_branch)
current_branch = BrowserBranch(args.current_branch, is_head=True)
stable_branch = get_stable_branch(branch_prefix)
_, stable_version = get_version_from_branch_name(stable_branch)
stable_branch, legacy_branch = get_stable_branch(current_branch)
if stable_version > current_version or stable_version < current_version - 0.5:
raise Exception(
f"Version of stable branch {stable_branch} is not within 0.5 of the "
f"current branch {args.current_branch}"
)
# Minimal fetch of stable_branch.
# Individual file blobs will be downloaded as needed.
git_run(["fetch", "--depth=1", "--filter=blob:none", "origin", stable_branch])
if os.environ.get("TRANSLATION_INCLUDE_LEGACY", "") != "true":
legacy_branch = None
current_file_paths = git_file_paths("HEAD")
old_file_paths = git_file_paths(f"origin/{stable_branch}")
ci_commit = os.environ.get("CI_COMMIT_SHA", "")
ci_url_base = os.environ.get("CI_PROJECT_URL", "")
json_data = {
"commit": ci_commit,
"commit-url": f"{ci_url_base}/-/commit/{ci_commit}"
if (ci_commit and ci_url_base)
else "",
"project-path": os.environ.get("CI_PROJECT_PATH", ""),
"current-branch": args.current_branch,
"stable-branch": stable_branch,
"files": [],
}
files_list = []
for translation_branch, name in (
part.strip().split(":", 1) for part in args.filenames.split(" ") if part.strip()
):
current_path = matching_path(current_file_paths, name)
old_path = matching_path(old_file_paths, name)
current_content = current_branch.get_file_content(name)
stable_content = stable_branch.get_file_content(name)
if current_path is None and old_path is None:
if current_content is None and stable_content is None:
# No file in either branch.
logger.warning(f"{name} does not exist in either the current or stable branch")
elif current_path is None:
elif current_content is None:
logger.warning(f"{name} deleted in the current branch")
elif old_path is None:
elif stable_content is None:
logger.warning(f"{name} does not exist in the stable branch")
content = combine_files(
name,
git_file_content("HEAD", current_path),
git_file_content(f"origin/{stable_branch}", old_path),
f"Will be unused in Tor Browser {current_version}!",
current_content,
stable_content,
f"Will be unused in Tor Browser {current_branch.browser_version}!",
)
json_data["files"].append(
if legacy_branch:
legacy_content = legacy_branch.get_file_content(name)
if (
legacy_content is not None
and current_content is None
and stable_content is None
):
logger.warning(f"{name} still exists in the legacy branch")
elif legacy_content is None:
logger.warning(f"{name} does not exist in the legacy branch")
content = combine_files(
name,
content,
legacy_content,
f"Unused in Tor Browser {stable_branch.browser_version}!",
)
files_list.append(
{
"name": name,
"branch": translation_branch,
......@@ -204,5 +295,23 @@ for translation_branch, name in (
}
)
ci_commit = os.environ.get("CI_COMMIT_SHA", "")
ci_url_base = os.environ.get("CI_PROJECT_URL", "")
json_data = {
"commit": ci_commit,
"commit-url": f"{ci_url_base}/-/commit/{ci_commit}"
if (ci_commit and ci_url_base)
else "",
"project-path": os.environ.get("CI_PROJECT_PATH", ""),
"current-branch": current_branch.name,
"stable-branch": stable_branch.name,
"files": files_list,
}
if legacy_branch:
json_data["legacy-branch"] = legacy_branch.name
with open(args.outname, "w") as file:
json.dump(json_data, file)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment