Bug 42305: Add script to combine translation files across versions.

fb1af12e · henry · Pier Angelo Vendrame · e3c5fb2d · fb1af12e · fb1af12e
Commit fb1af12e authored Nov 30, 2023 by henry Committed by Pier Angelo Vendrame 4 months ago
--- a/tools/torbrowser/l10n/combine-translation-versions.py
+++ b/tools/torbrowser/l10n/combine-translation-versions.py
+import argparse
+import json
+import logging
+import os
+import re
+import subprocess
+
+from combine import combine_files
+
+arg_parser = argparse.ArgumentParser(
+    description="Combine a translation file across two different versions"
+)
+
+arg_parser.add_argument(
+    "current_branch", metavar="<current-branch>", help="branch for the newest version"
+)
+arg_parser.add_argument(
+    "filenames", metavar="<filenames>", help="name of the translation files"
+)
+arg_parser.add_argument("outname", metavar="<json>", help="name of the json output")
+
+args = arg_parser.parse_args()
+
+logging.basicConfig()
+logger = logging.getLogger("combine-translation-versions")
+logger.setLevel(logging.INFO)
+
+
+def in_pink(msg: str) -> str:
+    """Present a message as pink in the terminal output.
+
+    :param msg: The message to wrap in pink.
+    :returns: The message to print to terminal.
+    """
+    # Pink and bold.
+    return f"\x1b[1;38;5;212m{msg}\x1b[0m"
+
+
+def git_run(git_args: list[str]) -> None:
+    """Run a git command.
+
+    :param git_args: The arguments that should follow "git".
+    """
+    # Add some text to give context to git's stderr appearing in log.
+    logger.info("Running: " + in_pink("git " + " ".join(git_args)))
+    subprocess.run(["git", *git_args], check=True)
+
+
+def git_text(git_args: list[str]) -> str:
+    """Get the text output for a git command.
+
+    :param git_args: The arguments that should follow "git".
+    :returns: The stdout of the command.
+    """
+    logger.info("Running: " + in_pink("git " + " ".join(git_args)))
+    return subprocess.run(
+        ["git", *git_args], text=True, check=True, stdout=subprocess.PIPE
+    ).stdout
+
+
+def git_lines(git_args: list[str]) -> list[str]:
+    """Get the lines from a git command.
+
+    :param git_args: The arguments that should follow "git".
+    :returns: The non-empty lines from stdout of the command.
+    """
+    return [line for line in git_text(git_args).split("\n") if line]
+
+
+class BrowserBranch:
+    """Represents a browser git branch."""
+
+    def __init__(self, branch_name: str, is_head: bool = False) -> None:
+        """Create a new instance.
+
+        :param branch_name: The branch's git name.
+        :param is_head: Whether the branch matches "HEAD".
+        """
+        version_match = re.match(
+            r"(?P<prefix>[a-z]+\-browser)\-"
+            r"(?P<firefox>[0-9]+(?:\.[0-9]+){1,2})esr\-"
+            r"(?P<browser>[0-9]+\.[05])\-"
+            r"(?P<number>[0-9]+)$",
+            branch_name,
+        )
+
+        if not version_match:
+            raise ValueError(f"Unable to parse the version from the ref {branch_name}")
+
+        self.name = branch_name
+        self.prefix = version_match.group("prefix")
+        self.browser_version = version_match.group("browser")
+        self._is_head = is_head
+        self._ref = "HEAD" if is_head else f"origin/{branch_name}"
+
+        firefox_nums = [int(n) for n in version_match.group("firefox").split(".")]
+        if len(firefox_nums) == 2:
+            firefox_nums.append(0)
+        browser_nums = [int(n) for n in self.browser_version.split(".")]
+        branch_number = int(version_match.group("number"))
+        # Prioritise the firefox ESR version, then the browser version then the
+        # branch number.
+        self._ordered = (
+            firefox_nums[0],
+            firefox_nums[1],
+            firefox_nums[2],
+            browser_nums[0],
+            browser_nums[1],
+            branch_number,
+        )
+
+        # Minor version for browser is only ever "0" or "5", so we can convert
+        # the version to an integer.
+        self._browser_int_version = int(2 * float(self.browser_version))
+
+        self._file_paths: list[str] | None = None
+
+    def release_below(self, other: "BrowserBranch", num: int) -> bool:
+        """Determine whether another branch is within range of a previous
+        browser release.
+
+        The browser versions are expected to increment by "0.5", and a previous
+        release branch's version is expected to be `num * 0.5` behind the
+        current one.
+
+        :param other: The branch to compare.
+        :param num: The number of "0.5" releases behind to test with.
+        """
+        return other._browser_int_version == self._browser_int_version - num
+
+    def __lt__(self, other: "BrowserBranch") -> bool:
+        return self._ordered < other._ordered
+
+    def __gt__(self, other: "BrowserBranch") -> bool:
+        return self._ordered > other._ordered
+
+    def get_file_content(self, filename: str) -> str | None:
+        """Fetch the file content for the named file in this branch.
+
+        :param filename: The name of the file to fetch the content for.
+        :returns: The file content, or `None` if no file could be found.
+        """
+        if self._file_paths is None:
+            if not self._is_head:
+                # Minimal fetch of non-HEAD branch to get the file paths.
+                # Individual file blobs will be downloaded as needed.
+                git_run(
+                    ["fetch", "--depth=1", "--filter=blob:none", "origin", self._ref]
+                )
+            self._file_paths = git_lines(
+                ["ls-tree", "-r", "--format=%(path)", self._ref]
+            )
+
+        matching = [
+            path for path in self._file_paths if os.path.basename(path) == filename
+        ]
+        if not matching:
+            return None
+        if len(matching) > 1:
+            raise Exception(f"Multiple occurrences of {filename}")
+
+        path = matching[0]
+
+        return git_text(["cat-file", "blob", f"{self._ref}:{path}"])
+
+
+def get_stable_branch(
+    compare_version: BrowserBranch,
+) -> tuple[BrowserBranch, BrowserBranch | None]:
+    """Find the most recent stable branch in the origin repository.
+
+    :param compare_version: The development branch to compare against.
+    :returns: The stable and legacy branches. If no legacy branch is found,
+      `None` will be returned instead.
+    """
+    # We search for build1 tags. These are added *after* the rebase of browser
+    # commits, so the corresponding branch should contain our strings.
+    # Moreover, we *assume* that the branch with the most recent ESR version
+    # with such a tag will be used in the *next* stable build in
+    # tor-browser-build.
+    tag_glob = f"{compare_version.prefix}-*esr-*-*-build1"
+
+    # To speed up, only fetch the tags without blobs.
+    git_run(
+        ["fetch", "--depth=1", "--filter=object:type=tag", "origin", "tag", tag_glob]
+    )
+    stable_branches = []
+    legacy_branches = []
+    stable_annotation_regex = re.compile(r"\bstable\b")
+    legacy_annotation_regex = re.compile(r"\blegacy\b")
+
+    for build_tag, annotation in (
+        line.split(" ", 1) for line in git_lines(["tag", "-n1", "--list", tag_glob])
+    ):
+        is_stable = bool(stable_annotation_regex.search(annotation))
+        is_legacy = bool(legacy_annotation_regex.search(annotation))
+        if not is_stable and not is_legacy:
+            continue
+        try:
+            # Branch name is the same as the tag, minus "-build1".
+            branch = BrowserBranch(re.sub(r"-build1$", "", build_tag))
+        except ValueError:
+            logger.warning(f"Could not read the version for {build_tag}")
+            continue
+        if branch.prefix != compare_version.prefix:
+            continue
+        if is_stable:
+            # Stable can be one release version behind.
+            # NOTE: In principle, when switching between versions there may be a
+            # window of time where the development branch has not yet progressed
+            # to the next "0.5" release, so has the same browser version as the
+            # stable branch. So we also allow for matching browser versions.
+            # NOTE:
+            # 1. The "Will be unused in" message will not make sense, but we do
+            #    not expect string differences in this scenario.
+            # 2. We do not expect this scenario to last for long.
+            if not (
+                compare_version.release_below(branch, 1)
+                or compare_version.release_below(branch, 0)
+            ):
+                continue
+            stable_branches.append(branch)
+        elif is_legacy:
+            # Legacy can be two release versions behind.
+            # We also allow for being just one version behind.
+            if not (
+                compare_version.release_below(branch, 2)
+                or compare_version.release_below(branch, 1)
+            ):
+                continue
+            legacy_branches.append(branch)
+
+    if not stable_branches:
+        raise Exception("No stable build1 branch found")
+
+    return (
+        # Return the stable branch with the highest version.
+        max(stable_branches),
+        max(legacy_branches) if legacy_branches else None,
+    )
+
+
+current_branch = BrowserBranch(args.current_branch, is_head=True)
+
+stable_branch, legacy_branch = get_stable_branch(current_branch)
+
+if os.environ.get("TRANSLATION_INCLUDE_LEGACY", "") != "true":
+    legacy_branch = None
+
+files_list = []
+
+for translation_branch, name in (
+    part.strip().split(":", 1) for part in args.filenames.split(" ") if part.strip()
+):
+    current_content = current_branch.get_file_content(name)
+    stable_content = stable_branch.get_file_content(name)
+
+    if current_content is None and stable_content is None:
+        # No file in either branch.
+        logger.warning(f"{name} does not exist in either the current or stable branch")
+    elif current_content is None:
+        logger.warning(f"{name} deleted in the current branch")
+    elif stable_content is None:
+        logger.warning(f"{name} does not exist in the stable branch")
+
+    content = combine_files(
+        name,
+        current_content,
+        stable_content,
+        f"Will be unused in Tor Browser {current_branch.browser_version}!",
+    )
+
+    if legacy_branch:
+        legacy_content = legacy_branch.get_file_content(name)
+        if (
+            legacy_content is not None
+            and current_content is None
+            and stable_content is None
+        ):
+            logger.warning(f"{name} still exists in the legacy branch")
+        elif legacy_content is None:
+            logger.warning(f"{name} does not exist in the legacy branch")
+        content = combine_files(
+            name,
+            content,
+            legacy_content,
+            f"Unused in Tor Browser {stable_branch.browser_version}!",
+        )
+
+    files_list.append(
+        {
+            "name": name,
+            "branch": translation_branch,
+            "content": content,
+        }
+    )
+
+
+ci_commit = os.environ.get("CI_COMMIT_SHA", "")
+ci_url_base = os.environ.get("CI_PROJECT_URL", "")
+
+json_data = {
+    "commit": ci_commit,
+    "commit-url": f"{ci_url_base}/-/commit/{ci_commit}"
+    if (ci_commit and ci_url_base)
+    else "",
+    "project-path": os.environ.get("CI_PROJECT_PATH", ""),
+    "current-branch": current_branch.name,
+    "stable-branch": stable_branch.name,
+    "files": files_list,
+}
+
+if legacy_branch:
+    json_data["legacy-branch"] = legacy_branch.name
+
+with open(args.outname, "w") as file:
+    json.dump(json_data, file)
--- a/tools/torbrowser/l10n/combine/__init__.py
+++ b/tools/torbrowser/l10n/combine/__init__.py
+# flake8: noqa
+
+from .combine import combine_files
--- a/tools/torbrowser/l10n/combine/combine.py
+++ b/tools/torbrowser/l10n/combine/combine.py
+import re
+from typing import TYPE_CHECKING, Any
+
+from compare_locales.parser import getParser
+from compare_locales.parser.android import AndroidEntity, DocumentWrapper
+from compare_locales.parser.base import Comment, Entity, Junk, Whitespace
+from compare_locales.parser.dtd import DTDEntity
+from compare_locales.parser.fluent import FluentComment, FluentEntity
+from compare_locales.parser.properties import PropertiesEntity
+
+if TYPE_CHECKING:
+    from collections.abc import Iterable
+
+
+def combine_files(
+    filename: str,
+    new_content: str | None,
+    old_content: str | None,
+    comment_prefix: str,
+) -> str | None:
+    """Combine two translation files into one to include all strings from both.
+    The new content is presented first, and any strings only found in the old
+    content are placed at the end with an additional comment.
+
+    :param filename: The filename for the file, determines the format.
+    :param new_content: The new content for the file, or None if it has been
+      deleted.
+    :param old_content: The old content for the file, or None if it did not
+      exist before.
+    :comment_prefix: A comment to include for any strings that are only found in
+      the old content. This will be placed before any other comments for the
+      string.
+
+    :returns: The combined content, or None if both given contents are None.
+    """
+    if new_content is None and old_content is None:
+        return None
+
+    # getParser from compare_locale returns the same instance for the same file
+    # extension.
+    parser = getParser(filename)
+
+    is_android = filename.endswith(".xml")
+    if new_content is None:
+        if is_android:
+            # File was deleted, add some document parts.
+            content_start = (
+                '<?xml version="1.0" encoding="utf-8" standalone="yes"?>\n<resources>\n'
+            )
+            content_end = "</resources>\n"
+        else:
+            # Treat as an empty file.
+            content_start = ""
+            content_end = ""
+        existing_keys = []
+    else:
+        parser.readUnicode(new_content)
+
+        # Start with the same content as the current file.
+        # For android strings, we want to keep the final "</resources>" until after.
+        if is_android:
+            closing_match = re.match(
+                r"^(.*)(</resources>\s*)$", parser.ctx.contents, re.DOTALL
+            )
+            if not closing_match:
+                raise ValueError("Missing a final </resources>")
+            content_start = closing_match.group(1)
+            content_end = closing_match.group(2)
+        else:
+            content_start = parser.ctx.contents
+            content_end = ""
+        existing_keys = [entry.key for entry in parser.walk(only_localizable=True)]
+
+    # For Fluent, we want to prefix the strings using GroupComments.
+    # On weblate this will cause all the strings that fall under the GroupComment's
+    # scope to have the prefix added to their "notes".
+    # We set up an initial GroupComment for the first string we find. This will also
+    # end the scope of the last GroupComment in the new translation file.
+    # This will be replaced with a the next GroupComment when it is found.
+    fluent_group_comment_prefix = f"\n## {comment_prefix}\n"
+    fluent_group_comment: str | None = fluent_group_comment_prefix
+
+    # For other formats, we want to keep all the comment lines that come directly
+    # before the string.
+    # In compare_locales.parser, only the comment line directly before an Entity
+    # counts as the pre_comment for that Entity. I.e. only this line will be
+    # included in Entity.all
+    # However, in weblate every comment line that comes before the Entity is
+    # included as a comment. So we also want to keep these additional comments to
+    # preserve them for weblate.
+    # We gather these extra comments in stacked_comments, and clear them whenever we
+    # reach an Entity or a blank line (Whitespace is more than "\n").
+    stacked_comments: list[str] = []
+
+    additions: list[str] = []
+
+    entry_iter: Iterable[Any] = ()
+    # If the file does not exist in the old branch, don't make any additions.
+    if old_content is not None:
+        parser.readUnicode(old_content)
+        entry_iter = parser.walk(only_localizable=False)
+    for entry in entry_iter:
+        if isinstance(entry, Junk):
+            raise ValueError(f"Unexpected Junk: {entry.all}")
+        if isinstance(entry, Whitespace):
+            # Clear stacked comments if more than one empty line.
+            if entry.all != "\n":
+                stacked_comments.clear()
+            continue
+        if isinstance(entry, Comment):
+            if isinstance(entry, FluentComment):
+                # Don't stack Fluent comments.
+                # Only the comments included in Entity.pre_comment count towards
+                # that Entity's comment.
+                if entry.all.startswith("##"):
+                    # A Fluent GroupComment
+                    if entry.all == "##":
+                        # Empty GroupComment. Used to end the scope of a previous
+                        # GroupComment.
+                        # Replace this with our prefix comment.
+                        fluent_group_comment = fluent_group_comment_prefix
+                    else:
+                        # Prefix the group comment.
+                        fluent_group_comment = (
+                            f"{fluent_group_comment_prefix}{entry.all}\n"
+                        )
+            else:
+                stacked_comments.append(entry.all)
+            continue
+        if isinstance(entry, DocumentWrapper):
+            # Not needed.
+            continue
+
+        if not isinstance(entry, Entity):
+            raise ValueError(f"Unexpected type: {entry.__class__.__name__}")
+
+        if entry.key in existing_keys:
+            # Already included this string in the new translation file.
+            # Drop the gathered comments for this Entity.
+            stacked_comments.clear()
+            continue
+
+        if isinstance(entry, FluentEntity):
+            if fluent_group_comment is not None:
+                # We have a found GroupComment which has not been included yet.
+                # All following Entity's will be under its scope, until the next
+                # GroupComment.
+                additions.append(fluent_group_comment)
+                # Added GroupComment, so don't need to add again.
+                fluent_group_comment = None
+        elif isinstance(entry, DTDEntity):
+            # Include our additional comment before we print the rest for this
+            # Entity.
+            additions.append(f"<!-- LOCALIZATION NOTE: {comment_prefix} -->")
+        elif isinstance(entry, PropertiesEntity):
+            additions.append(f"# {comment_prefix}")
+        elif isinstance(entry, AndroidEntity):
+            additions.append(f"<!-- {comment_prefix} -->")
+        else:
+            raise ValueError(f"Unexpected Entity type: {entry.__class__.__name__}")
+
+        # Add any other comment lines that came directly before this Entity.
+        additions.extend(stacked_comments)
+        stacked_comments.clear()
+        additions.append(entry.all)
+
+    content_middle = ""
+
+    if additions:
+        # New line before and after the additions
+        additions.insert(0, "")
+        additions.append("")
+        if is_android:
+            content_middle = "\n    ".join(additions)
+        else:
+            content_middle = "\n".join(additions)
+
+        # Remove " " in otherwise blank lines.
+        content_middle = re.sub("^ +$", "", content_middle, flags=re.MULTILINE)
+
+    return content_start + content_middle + content_end
--- a/tools/torbrowser/l10n/combine/tests/README
+++ b/tools/torbrowser/l10n/combine/tests/README
+python tests to be run with pytest.
+Requires the compare-locales package.
--- a/tools/torbrowser/l10n/combine/tests/__init__.py
+++ b/tools/torbrowser/l10n/combine/tests/__init__.py
--- a/tools/torbrowser/l10n/combine/tests/test_android.py
+++ b/tools/torbrowser/l10n/combine/tests/test_android.py
+import textwrap
+
+from combine import combine_files
+
+
+def wrap_in_xml(content):
+    if content is None:
+        return None
+    # Allow for indents to make the tests more readable.
+    content = textwrap.dedent(content)
+    return f"""\
+<?xml version="1.0" encoding="utf-8" standalone="yes"?>
+<resources>
+{textwrap.indent(content, "    ")}</resources>
+"""
+
+
+def assert_result(new_content, old_content, expect):
+    new_content = wrap_in_xml(new_content)
+    old_content = wrap_in_xml(old_content)
+    expect = wrap_in_xml(expect)
+    assert expect == combine_files(
+        "test_strings.xml", new_content, old_content, "REMOVED STRING"
+    )
+
+
+def test_combine_empty():
+    assert_result(None, None, None)
+
+
+def test_combine_new_file():
+    # New file with no old content.
+    assert_result(
+        """\
+        <string name="string_1">First</string>
+        <string name="string_2">Second</string>
+        """,
+        None,
+        """\
+        <string name="string_1">First</string>
+        <string name="string_2">Second</string>
+        """,
+    )
+
+
+def test_combine_removed_file():
+    # Entire file was removed.
+    assert_result(
+        None,
+        """\
+        <string name="string_1">First</string>
+        <string name="string_2">Second</string>
+        """,
+        """\
+
+        <!-- REMOVED STRING -->
+        <string name="string_1">First</string>
+        <!-- REMOVED STRING -->
+        <string name="string_2">Second</string>
+        """,
+    )
+
+
+def test_no_change():
+    content = """\
+        <string name="string_1">First</string>
+        <string name="string_2">Second</string>
+        """
+    assert_result(content, content, content)
+
+
+def test_added_string():
+    assert_result(
+        """\
+        <string name="string_1">First</string>
+        <string name="string_new">NEW</string>
+        <string name="string_2">Second</string>
+        """,
+        """\
+        <string name="string_1">First</string>
+        <string name="string_2">Second</string>
+        """,
+        """\
+        <string name="string_1">First</string>
+        <string name="string_new">NEW</string>
+        <string name="string_2">Second</string>
+        """,
+    )
+
+
+def test_removed_string():
+    assert_result(
+        """\
+        <string name="string_1">First</string>
+        <string name="string_2">Second</string>
+        """,
+        """\
+        <string name="string_1">First</string>
+        <string name="removed">REMOVED</string>
+        <string name="string_2">Second</string>
+        """,
+        """\
+        <string name="string_1">First</string>
+        <string name="string_2">Second</string>
+
+        <!-- REMOVED STRING -->
+        <string name="removed">REMOVED</string>
+        """,
+    )
+
+
+def test_removed_and_added():
+    assert_result(
+        """\
+        <string name="new_1">New string</string>
+        <string name="string_1">First</string>
+        <string name="string_2">Second</string>
+        <string name="new_2">New string 2</string>
+        """,
+        """\
+        <string name="string_1">First</string>
+        <string name="removed_1">First removed</string>
+        <string name="removed_2">Second removed</string>
+        <string name="string_2">Second</string>
+        <string name="removed_3">Third removed</string>
+        """,
+        """\
+        <string name="new_1">New string</string>
+        <string name="string_1">First</string>
+        <string name="string_2">Second</string>
+        <string name="new_2">New string 2</string>
+
+        <!-- REMOVED STRING -->
+        <string name="removed_1">First removed</string>
+        <!-- REMOVED STRING -->
+        <string name="removed_2">Second removed</string>
+        <!-- REMOVED STRING -->
+        <string name="removed_3">Third removed</string>
+        """,
+    )
+
+
+def test_updated():
+    # String content was updated.
+    assert_result(
+        """\
+        <string name="changed_string">NEW</string>
+        """,
+        """\
+        <string name="changed_string">OLD</string>
+        """,
+        """\
+        <string name="changed_string">NEW</string>
+        """,
+    )
+
+
+def test_updated_comment():
+    # String comment was updated.
+    assert_result(
+        """\
+        <!-- NEW -->
+        <string name="changed_string">string</string>
+        """,
+        """\
+        <!-- OLD -->
+        <string name="changed_string">string</string>
+        """,
+        """\
+        <!-- NEW -->
+        <string name="changed_string">string</string>
+        """,
+    )
+    # Comment added.
+    assert_result(
+        """\
+        <!-- NEW -->
+        <string name="changed_string">string</string>
+        """,
+        """\
+        <string name="changed_string">string</string>
+        """,
+        """\
+        <!-- NEW -->
+        <string name="changed_string">string</string>
+        """,
+    )
+    # Comment removed.
+    assert_result(
+        """\
+        <string name="changed_string">string</string>
+        """,
+        """\
+        <!-- OLD -->
+        <string name="changed_string">string</string>
+        """,
+        """\
+        <string name="changed_string">string</string>
+        """,
+    )
+
+    # With file comments
+    assert_result(
+        """\
+        <!-- NEW file comment -->
+
+        <!-- NEW -->
+        <string name="changed_string">string</string>
+        """,
+        """\
+        <!-- OLD file comment -->
+
+        <!-- OLD -->
+        <string name="changed_string">string</string>
+        """,
+        """\
+        <!-- NEW file comment -->
+
+        <!-- NEW -->
+        <string name="changed_string">string</string>
+        """,
+    )
+
+
+def test_reordered():
+    # String was re_ordered.
+    assert_result(
+        """\
+        <string name="string_1">value</string>
+        <string name="moved_string">move</string>
+        """,
+        """\
+        <string name="moved_string">move</string>
+        <string name="string_1">value</string>
+        """,
+        """\
+        <string name="string_1">value</string>
+        <string name="moved_string">move</string>
+        """,
+    )
+
+
+def test_removed_string_with_comment():
+    assert_result(
+        """\
+        <!-- Comment for first. -->
+        <string name="string_1">First</string>
+        <string name="string_2">Second</string>
+        """,
+        """\
+        <!-- Comment for first. -->
+        <string name="string_1">First</string>
+        <!-- Comment for removed. -->
+        <string name="removed">REMOVED</string>
+        <string name="string_2">Second</string>
+        """,
+        """\
+        <!-- Comment for first. -->
+        <string name="string_1">First</string>
+        <string name="string_2">Second</string>
+
+        <!-- REMOVED STRING -->
+        <!-- Comment for removed. -->
+        <string name="removed">REMOVED</string>
+        """,
+    )
+
+    # With file comments and multi-line.
+    # All comments prior to a removed string are moved with it, until another
+    # entity or blank line is reached.
+    assert_result(
+        """\
+        <!-- First File comment -->
+
+        <!-- Comment for first. -->
+        <!-- Comment 2 for first. -->
+        <string name="string_1">First</string>
+
+        <!-- Second -->
+        <!-- File comment -->
+
+        <string name="string_2">Second</string>
+        """,
+        """\
+        <!-- First File comment -->
+
+        <!-- Comment for first. -->
+        <!-- Comment 2 for first. -->
+        <string name="string_1">First</string>
+        <string name="removed_1">First removed</string>
+        <!-- Comment for second removed. -->
+        <string name="removed_2">Second removed</string>
+
+        <!-- Removed file comment -->
+
+        <!-- Comment 1 for third removed -->
+        <!-- Comment 2 for third removed -->
+        <string name="removed_3">Third removed</string>
+
+        <!-- Second -->
+        <!-- File comment -->
+
+        <string name="removed_4">Fourth removed</string>
+        <string name="string_2">Second</string>
+        """,
+        """\
+        <!-- First File comment -->
+
+        <!-- Comment for first. -->
+        <!-- Comment 2 for first. -->
+        <string name="string_1">First</string>
+
+        <!-- Second -->
+        <!-- File comment -->
+
+        <string name="string_2">Second</string>
+
+        <!-- REMOVED STRING -->
+        <string name="removed_1">First removed</string>
+        <!-- REMOVED STRING -->
+        <!-- Comment for second removed. -->
+        <string name="removed_2">Second removed</string>
+        <!-- REMOVED STRING -->
+        <!-- Comment 1 for third removed -->
+        <!-- Comment 2 for third removed -->
+        <string name="removed_3">Third removed</string>
+        <!-- REMOVED STRING -->
+        <string name="removed_4">Fourth removed</string>
+        """,
+    )
--- a/tools/torbrowser/l10n/combine/tests/test_dtd.py
+++ b/tools/torbrowser/l10n/combine/tests/test_dtd.py
+import textwrap
+
+from combine import combine_files
+
+
+def assert_result(new_content, old_content, expect):
+    # Allow for indents to make the tests more readable.
+    if new_content is not None:
+        new_content = textwrap.dedent(new_content)
+    if old_content is not None:
+        old_content = textwrap.dedent(old_content)
+    if expect is not None:
+        expect = textwrap.dedent(expect)
+    assert expect == combine_files(
+        "test.dtd", new_content, old_content, "REMOVED STRING"
+    )
+
+
+def test_combine_empty():
+    assert_result(None, None, None)
+
+
+def test_combine_new_file():
+    # New file with no old content.
+    assert_result(
+        """\
+        <!ENTITY string.1 "First">
+        <!ENTITY string.2 "Second">
+        """,
+        None,
+        """\
+        <!ENTITY string.1 "First">
+        <!ENTITY string.2 "Second">
+        """,
+    )
+
+
+def test_combine_removed_file():
+    # Entire file was removed.
+    assert_result(
+        None,
+        """\
+        <!ENTITY string.1 "First">
+        <!ENTITY string.2 "Second">
+        """,
+        """\
+
+        <!-- LOCALIZATION NOTE: REMOVED STRING -->
+        <!ENTITY string.1 "First">
+        <!-- LOCALIZATION NOTE: REMOVED STRING -->
+        <!ENTITY string.2 "Second">
+        """,
+    )
+
+
+def test_no_change():
+    content = """\
+        <!ENTITY string.1 "First">
+        <!ENTITY string.2 "Second">
+        """
+    assert_result(content, content, content)
+
+
+def test_added_string():
+    assert_result(
+        """\
+        <!ENTITY string.1 "First">
+        <!ENTITY string.new "NEW">
+        <!ENTITY string.2 "Second">
+        """,
+        """\
+        <!ENTITY string.1 "First">
+        <!ENTITY string.2 "Second">
+        """,
+        """\
+        <!ENTITY string.1 "First">
+        <!ENTITY string.new "NEW">
+        <!ENTITY string.2 "Second">
+        """,
+    )
+
+
+def test_removed_string():
+    assert_result(
+        """\
+        <!ENTITY string.1 "First">
+        <!ENTITY string.2 "Second">
+        """,
+        """\
+        <!ENTITY string.1 "First">
+        <!ENTITY removed "REMOVED">
+        <!ENTITY string.2 "Second">
+        """,
+        """\
+        <!ENTITY string.1 "First">
+        <!ENTITY string.2 "Second">
+
+        <!-- LOCALIZATION NOTE: REMOVED STRING -->
+        <!ENTITY removed "REMOVED">
+        """,
+    )
+
+
+def test_removed_and_added():
+    assert_result(
+        """\
+        <!ENTITY new.1 "New string">
+        <!ENTITY string.1 "First">
+        <!ENTITY string.2 "Second">
+        <!ENTITY new.2 "New string 2">
+        """,
+        """\
+        <!ENTITY string.1 "First">
+        <!ENTITY removed.1 "First removed">
+        <!ENTITY removed.2 "Second removed">
+        <!ENTITY string.2 "Second">
+        <!ENTITY removed.3 "Third removed">
+        """,
+        """\
+        <!ENTITY new.1 "New string">
+        <!ENTITY string.1 "First">
+        <!ENTITY string.2 "Second">
+        <!ENTITY new.2 "New string 2">
+
+        <!-- LOCALIZATION NOTE: REMOVED STRING -->
+        <!ENTITY removed.1 "First removed">
+        <!-- LOCALIZATION NOTE: REMOVED STRING -->
+        <!ENTITY removed.2 "Second removed">
+        <!-- LOCALIZATION NOTE: REMOVED STRING -->
+        <!ENTITY removed.3 "Third removed">
+        """,
+    )
+
+
+def test_updated():
+    # String content was updated.
+    assert_result(
+        """\
+        <!ENTITY changed.string "NEW">
+        """,
+        """\
+        <!ENTITY changed.string "OLD">
+        """,
+        """\
+        <!ENTITY changed.string "NEW">
+        """,
+    )
+
+
+def test_updated_comment():
+    # String comment was updated.
+    assert_result(
+        """\
+        <!-- LOCALIZATION NOTE: NEW -->
+        <!ENTITY changed.string "string">
+        """,
+        """\
+        <!-- LOCALIZATION NOTE: OLD -->
+        <!ENTITY changed.string "string">
+        """,
+        """\
+        <!-- LOCALIZATION NOTE: NEW -->
+        <!ENTITY changed.string "string">
+        """,
+    )
+    # Comment added.
+    assert_result(
+        """\
+        <!-- LOCALIZATION NOTE: NEW -->
+        <!ENTITY changed.string "string">
+        """,
+        """\
+        <!ENTITY changed.string "string">
+        """,
+        """\
+        <!-- LOCALIZATION NOTE: NEW -->
+        <!ENTITY changed.string "string">
+        """,
+    )
+    # Comment removed.
+    assert_result(
+        """\
+        <!ENTITY changed.string "string">
+        """,
+        """\
+        <!-- LOCALIZATION NOTE: OLD -->
+        <!ENTITY changed.string "string">
+        """,
+        """\
+        <!ENTITY changed.string "string">
+        """,
+    )
+
+    # With multiple comments
+    assert_result(
+        """\
+        <!-- NEW FILE COMMENT -->
+
+        <!-- LOCALIZATION NOTE: NEW -->
+        <!ENTITY changed.string "string">
+        """,
+        """\
+        <!-- OLD -->
+
+        <!-- LOCALIZATION NOTE: OLD -->
+        <!ENTITY changed.string "string">
+        """,
+        """\
+        <!-- NEW FILE COMMENT -->
+
+        <!-- LOCALIZATION NOTE: NEW -->
+        <!ENTITY changed.string "string">
+        """,
+    )
+
+
+def test_reordered():
+    # String was re.ordered.
+    assert_result(
+        """\
+        <!ENTITY string.1 "value">
+        <!ENTITY moved.string "move">
+        """,
+        """\
+        <!ENTITY moved.string "move">
+        <!ENTITY string.1 "value">
+        """,
+        """\
+        <!ENTITY string.1 "value">
+        <!ENTITY moved.string "move">
+        """,
+    )
+
+
+def test_removed_string_with_comment():
+    assert_result(
+        """\
+        <!-- LOCALIZATION NOTE: Comment for first. -->
+        <!ENTITY string.1 "First">
+        <!ENTITY string.2 "Second">
+        """,
+        """\
+        <!-- LOCALIZATION NOTE: Comment for first. -->
+        <!ENTITY string.1 "First">
+        <!-- LOCALIZATION NOTE: Comment for removed. -->
+        <!ENTITY removed "REMOVED">
+        <!ENTITY string.2 "Second">
+        """,
+        """\
+        <!-- LOCALIZATION NOTE: Comment for first. -->
+        <!ENTITY string.1 "First">
+        <!ENTITY string.2 "Second">
+
+        <!-- LOCALIZATION NOTE: REMOVED STRING -->
+        <!-- LOCALIZATION NOTE: Comment for removed. -->
+        <!ENTITY removed "REMOVED">
+        """,
+    )
+
+    # With multiple lines of comments.
+
+    assert_result(
+        """\
+        <!-- First file comment -->
+
+        <!-- LOCALIZATION NOTE: Comment for first. -->
+        <!-- LOCALIZATION NOTE: Comment 2 for first. -->
+        <!ENTITY string.1 "First">
+
+        <!-- Second
+           - file
+           - comment -->
+
+        <!ENTITY string.2 "Second">
+        """,
+        """\
+        <!-- First file comment -->
+
+        <!-- LOCALIZATION NOTE: Comment for first. -->
+        <!ENTITY string.1 "First">
+        <!ENTITY removed.1 "First removed">
+        <!-- LOCALIZATION NOTE: Comment for second removed. -->
+        <!ENTITY removed.2 "Second removed">
+
+        <!-- Removed file comment -->
+
+        <!-- LOCALIZATION NOTE: Comment for third removed. -->
+        <!-- LOCALIZATION NOTE: Comment 2 for
+        third removed. -->
+        <!ENTITY removed.3 "Third removed">
+
+        <!-- Second
+           - file
+           - comment -->
+
+        <!ENTITY removed.4 "Fourth removed">
+        <!ENTITY string.2 "Second">
+        """,
+        """\
+        <!-- First file comment -->
+
+        <!-- LOCALIZATION NOTE: Comment for first. -->
+        <!-- LOCALIZATION NOTE: Comment 2 for first. -->
+        <!ENTITY string.1 "First">
+
+        <!-- Second
+           - file
+           - comment -->
+
+        <!ENTITY string.2 "Second">
+
+        <!-- LOCALIZATION NOTE: REMOVED STRING -->
+        <!ENTITY removed.1 "First removed">
+        <!-- LOCALIZATION NOTE: REMOVED STRING -->
+        <!-- LOCALIZATION NOTE: Comment for second removed. -->
+        <!ENTITY removed.2 "Second removed">
+        <!-- LOCALIZATION NOTE: REMOVED STRING -->
+        <!-- LOCALIZATION NOTE: Comment for third removed. -->
+        <!-- LOCALIZATION NOTE: Comment 2 for
+        third removed. -->
+        <!ENTITY removed.3 "Third removed">
+        <!-- LOCALIZATION NOTE: REMOVED STRING -->
+        <!ENTITY removed.4 "Fourth removed">
+        """,
+    )
--- a/tools/torbrowser/l10n/combine/tests/test_fluent.py
+++ b/tools/torbrowser/l10n/combine/tests/test_fluent.py
+import textwrap
+
+from combine import combine_files
+
+
+def assert_result(new_content, old_content, expect):
+    # Allow for indents to make the tests more readable.
+    if new_content is not None:
+        new_content = textwrap.dedent(new_content)
+    if old_content is not None:
+        old_content = textwrap.dedent(old_content)
+    if expect is not None:
+        expect = textwrap.dedent(expect)
+    assert expect == combine_files(
+        "test.ftl", new_content, old_content, "REMOVED STRING"
+    )
+
+
+def test_combine_empty():
+    assert_result(None, None, None)
+
+
+def test_combine_new_file():
+    # New file with no old content.
+    assert_result(
+        """\
+        string-1 = First
+        string-2 = Second
+        """,
+        None,
+        """\
+        string-1 = First
+        string-2 = Second
+        """,
+    )
+
+
+def test_combine_removed_file():
+    # Entire file was removed.
+    assert_result(
+        None,
+        """\
+        string-1 = First
+        string-2 = Second
+        """,
+        """\
+
+
+        ## REMOVED STRING
+
+        string-1 = First
+        string-2 = Second
+        """,
+    )
+
+
+def test_no_change():
+    content = """\
+        string-1 = First
+        string-2 = Second
+        """
+    assert_result(content, content, content)
+
+
+def test_added_string():
+    assert_result(
+        """\
+        string-1 = First
+        string-new = NEW
+        string-2 = Second
+        """,
+        """\
+        string-1 = First
+        string-2 = Second
+        """,
+        """\
+        string-1 = First
+        string-new = NEW
+        string-2 = Second
+        """,
+    )
+
+
+def test_removed_string():
+    assert_result(
+        """\
+        string-1 = First
+        string-2 = Second
+        """,
+        """\
+        string-1 = First
+        removed = REMOVED
+        string-2 = Second
+        """,
+        """\
+        string-1 = First
+        string-2 = Second
+
+
+        ## REMOVED STRING
+
+        removed = REMOVED
+        """,
+    )
+
+
+def test_removed_and_added():
+    assert_result(
+        """\
+        new-1 = New string
+        string-1 =
+            .attr = First
+        string-2 = Second
+        new-2 =
+            .title = New string 2
+        """,
+        """\
+        string-1 =
+            .attr = First
+        removed-1 = First removed
+        removed-2 =
+            .attr = Second removed
+        string-2 = Second
+        removed-3 = Third removed
+        """,
+        """\
+        new-1 = New string
+        string-1 =
+            .attr = First
+        string-2 = Second
+        new-2 =
+            .title = New string 2
+
+
+        ## REMOVED STRING
+
+        removed-1 = First removed
+        removed-2 =
+            .attr = Second removed
+        removed-3 = Third removed
+        """,
+    )
+
+
+def test_updated():
+    # String content was updated.
+    assert_result(
+        """\
+        changed-string = NEW
+        """,
+        """\
+        changed-string = OLD
+        """,
+        """\
+        changed-string = NEW
+        """,
+    )
+
+
+def test_updated_comment():
+    # String comment was updated.
+    assert_result(
+        """\
+        # NEW
+        changed-string = string
+        """,
+        """\
+        # OLD
+        changed-string = string
+        """,
+        """\
+        # NEW
+        changed-string = string
+        """,
+    )
+    # Comment added.
+    assert_result(
+        """\
+        # NEW
+        changed-string = string
+        """,
+        """\
+        changed-string = string
+        """,
+        """\
+        # NEW
+        changed-string = string
+        """,
+    )
+    # Comment removed.
+    assert_result(
+        """\
+        changed-string = string
+        """,
+        """\
+        # OLD
+        changed-string = string
+        """,
+        """\
+        changed-string = string
+        """,
+    )
+
+    # With group comments.
+    assert_result(
+        """\
+        ## GROUP NEW
+
+        # NEW
+        changed-string = string
+        """,
+        """\
+        ## GROUP OLD
+
+        # OLD
+        changed-string = string
+        """,
+        """\
+        ## GROUP NEW
+
+        # NEW
+        changed-string = string
+        """,
+    )
+
+
+def test_reordered():
+    # String was re-ordered.
+    assert_result(
+        """\
+        string-1 = value
+        moved-string = move
+        """,
+        """\
+        moved-string = move
+        string-1 = value
+        """,
+        """\
+        string-1 = value
+        moved-string = move
+        """,
+    )
+
+
+def test_removed_string_with_comment():
+    assert_result(
+        """\
+        # Comment for first.
+        string-1 = First
+        string-2 = Second
+        """,
+        """\
+        # Comment for first.
+        string-1 = First
+        # Comment for removed.
+        removed = REMOVED
+        string-2 = Second
+        """,
+        """\
+        # Comment for first.
+        string-1 = First
+        string-2 = Second
+
+
+        ## REMOVED STRING
+
+        # Comment for removed.
+        removed = REMOVED
+        """,
+    )
+
+    # Group comments are combined with the "REMOVED STRING" comments.
+    # If strings have no group comment, then a single "REMOVED STRING" is
+    # included for them.
+    assert_result(
+        """\
+        ## First Group comment
+
+        # Comment for first.
+        string-1 = First
+
+        ##
+
+        no-group = No group comment
+
+        ## Second
+        ## Group comment
+
+        string-2 = Second
+        """,
+        """\
+        ## First Group comment
+
+        # Comment for first.
+        string-1 = First
+        removed-1 = First removed
+        # Comment for second removed.
+        removed-2 = Second removed
+
+        ##
+
+        no-group = No group comment
+        removed-3 = Third removed
+
+        ## Second
+        ## Group comment
+
+        removed-4 = Fourth removed
+        string-2 = Second
+        """,
+        """\
+        ## First Group comment
+
+        # Comment for first.
+        string-1 = First
+
+        ##
+
+        no-group = No group comment
+
+        ## Second
+        ## Group comment
+
+        string-2 = Second
+
+
+        ## REMOVED STRING
+        ## First Group comment
+
+        removed-1 = First removed
+        # Comment for second removed.
+        removed-2 = Second removed
+
+        ## REMOVED STRING
+
+        removed-3 = Third removed
+
+        ## REMOVED STRING
+        ## Second
+        ## Group comment
+
+        removed-4 = Fourth removed
+        """,
+    )
--- a/tools/torbrowser/l10n/combine/tests/test_properties.py
+++ b/tools/torbrowser/l10n/combine/tests/test_properties.py
+import textwrap
+
+from combine import combine_files
+
+
+def assert_result(new_content, old_content, expect):
+    # Allow for indents to make the tests more readable.
+    if new_content is not None:
+        new_content = textwrap.dedent(new_content)
+    if old_content is not None:
+        old_content = textwrap.dedent(old_content)
+    if expect is not None:
+        expect = textwrap.dedent(expect)
+    assert expect == combine_files(
+        "test.properties", new_content, old_content, "REMOVED STRING"
+    )
+
+
+def test_combine_empty():
+    assert_result(None, None, None)
+
+
+def test_combine_new_file():
+    # New file with no old content.
+    assert_result(
+        """\
+        string.1 = First
+        string.2 = Second
+        """,
+        None,
+        """\
+        string.1 = First
+        string.2 = Second
+        """,
+    )
+
+
+def test_combine_removed_file():
+    # Entire file was removed.
+    assert_result(
+        None,
+        """\
+        string.1 = First
+        string.2 = Second
+        """,
+        """\
+
+        # REMOVED STRING
+        string.1 = First
+        # REMOVED STRING
+        string.2 = Second
+        """,
+    )
+
+
+def test_no_change():
+    content = """\
+        string.1 = First
+        string.2 = Second
+        """
+    assert_result(content, content, content)
+
+
+def test_added_string():
+    assert_result(
+        """\
+        string.1 = First
+        string.new = NEW
+        string.2 = Second
+        """,
+        """\
+        string.1 = First
+        string.2 = Second
+        """,
+        """\
+        string.1 = First
+        string.new = NEW
+        string.2 = Second
+        """,
+    )
+
+
+def test_removed_string():
+    assert_result(
+        """\
+        string.1 = First
+        string.2 = Second
+        """,
+        """\
+        string.1 = First
+        removed = REMOVED
+        string.2 = Second
+        """,
+        """\
+        string.1 = First
+        string.2 = Second
+
+        # REMOVED STRING
+        removed = REMOVED
+        """,
+    )
+
+
+def test_removed_and_added():
+    assert_result(
+        """\
+        new.1 = New string
+        string.1 = First
+        string.2 = Second
+        new.2 = New string 2
+        """,
+        """\
+        string.1 = First
+        removed.1 = First removed
+        removed.2 = Second removed
+        string.2 = Second
+        removed.3 = Third removed
+        """,
+        """\
+        new.1 = New string
+        string.1 = First
+        string.2 = Second
+        new.2 = New string 2
+
+        # REMOVED STRING
+        removed.1 = First removed
+        # REMOVED STRING
+        removed.2 = Second removed
+        # REMOVED STRING
+        removed.3 = Third removed
+        """,
+    )
+
+
+def test_updated():
+    # String content was updated.
+    assert_result(
+        """\
+        changed.string = NEW
+        """,
+        """\
+        changed.string = OLD
+        """,
+        """\
+        changed.string = NEW
+        """,
+    )
+
+
+def test_updated_comment():
+    # String comment was updated.
+    assert_result(
+        """\
+        # NEW
+        changed.string = string
+        """,
+        """\
+        # OLD
+        changed.string = string
+        """,
+        """\
+        # NEW
+        changed.string = string
+        """,
+    )
+    # Comment added.
+    assert_result(
+        """\
+        # NEW
+        changed.string = string
+        """,
+        """\
+        changed.string = string
+        """,
+        """\
+        # NEW
+        changed.string = string
+        """,
+    )
+    # Comment removed.
+    assert_result(
+        """\
+        changed.string = string
+        """,
+        """\
+        # OLD
+        changed.string = string
+        """,
+        """\
+        changed.string = string
+        """,
+    )
+
+    # With file comments
+    assert_result(
+        """\
+        # NEW file comment
+
+        # NEW
+        changed.string = string
+        """,
+        """\
+        # OLD file comment
+
+        # OLD
+        changed.string = string
+        """,
+        """\
+        # NEW file comment
+
+        # NEW
+        changed.string = string
+        """,
+    )
+
+
+def test_reordered():
+    # String was re.ordered.
+    assert_result(
+        """\
+        string.1 = value
+        moved.string = move
+        """,
+        """\
+        moved.string = move
+        string.1 = value
+        """,
+        """\
+        string.1 = value
+        moved.string = move
+        """,
+    )
+
+
+def test_removed_string_with_comment():
+    assert_result(
+        """\
+        # Comment for first.
+        string.1 = First
+        string.2 = Second
+        """,
+        """\
+        # Comment for first.
+        string.1 = First
+        # Comment for removed.
+        removed = REMOVED
+        string.2 = Second
+        """,
+        """\
+        # Comment for first.
+        string.1 = First
+        string.2 = Second
+
+        # REMOVED STRING
+        # Comment for removed.
+        removed = REMOVED
+        """,
+    )
+
+    # With file comments and multi-line.
+    # All comments prior to a removed string are moved with it, until another
+    # entity or blank line is reached.
+    assert_result(
+        """\
+        # First File comment
+
+        # Comment for first.
+        # Comment 2 for first.
+        string.1 = First
+
+        # Second
+        # File comment
+
+        string.2 = Second
+        """,
+        """\
+        # First File comment
+
+        # Comment for first.
+        # Comment 2 for first.
+        string.1 = First
+        removed.1 = First removed
+        # Comment for second removed.
+        removed.2 = Second removed
+
+        # Removed file comment
+
+        # Comment 1 for third removed
+        # Comment 2 for third removed
+        removed.3 = Third removed
+
+        # Second
+        # File comment
+
+        removed.4 = Fourth removed
+        string.2 = Second
+        """,
+        """\
+        # First File comment
+
+        # Comment for first.
+        # Comment 2 for first.
+        string.1 = First
+
+        # Second
+        # File comment
+
+        string.2 = Second
+
+        # REMOVED STRING
+        removed.1 = First removed
+        # REMOVED STRING
+        # Comment for second removed.
+        removed.2 = Second removed
+        # REMOVED STRING
+        # Comment 1 for third removed
+        # Comment 2 for third removed
+        removed.3 = Third removed
+        # REMOVED STRING
+        removed.4 = Fourth removed
+        """,
+    )