Bug 42305: Add script to combine translation files across versions.

7af0448b · henry · Pier Angelo Vendrame · db5e3f0e · 7af0448b · 7af0448b
Verified Commit 7af0448b authored Nov 30, 2023 by henry Committed by Pier Angelo Vendrame 5 months ago
--- a/tools/torbrowser/l10n/combine-translation-versions.py
+++ b/tools/torbrowser/l10n/combine-translation-versions.py
+import argparse
+import json
+import logging
+import os
+import re
+import subprocess
+
+from combine import combine_files
+
+arg_parser = argparse.ArgumentParser(
+    description="Combine a translation file across two different versions"
+)
+
+arg_parser.add_argument(
+    "current_branch", metavar="<current-branch>", help="branch for the newest version"
+)
+arg_parser.add_argument(
+    "filenames", metavar="<filenames>", help="name of the translation files"
+)
+arg_parser.add_argument("outname", metavar="<json>", help="name of the json output")
+
+args = arg_parser.parse_args()
+
+logging.basicConfig()
+logger = logging.getLogger("combine-translation-versions")
+logger.setLevel(logging.INFO)
+
+
+def in_pink(msg: str) -> str:
+    """Present a message as pink in the terminal output.
+
+    :param msg: The message to wrap in pink.
+    :returns: The message to print to terminal.
+    """
+    # Pink and bold.
+    return f"\x1b[1;38;5;212m{msg}\x1b[0m"
+
+
+def git_run(git_args: list[str]) -> None:
+    """Run a git command.
+
+    :param git_args: The arguments that should follow "git".
+    """
+    # Add some text to give context to git's stderr appearing in log.
+    logger.info("Running: " + in_pink("git " + " ".join(git_args)))
+    subprocess.run(["git", *git_args], check=True)
+
+
+def git_text(git_args: list[str]) -> str:
+    """Get the text output for a git command.
+
+    :param git_args: The arguments that should follow "git".
+    :returns: The stdout of the command.
+    """
+    logger.info("Running: " + in_pink("git " + " ".join(git_args)))
+    return subprocess.run(
+        ["git", *git_args], text=True, check=True, stdout=subprocess.PIPE
+    ).stdout
+
+
+def git_lines(git_args: list[str]) -> list[str]:
+    """Get the lines from a git command.
+
+    :param git_args: The arguments that should follow "git".
+    :returns: The non-empty lines from stdout of the command.
+    """
+    return [line for line in git_text(git_args).split("\n") if line]
+
+
+def git_file_paths(git_ref: str) -> list[str]:
+    """Get the full list of file paths found under the given tree.
+
+    :param git_ref: The git reference for the tree to search.
+    :returns: The found file paths.
+    """
+    return git_lines(["ls-tree", "-r", "--format=%(path)", git_ref])
+
+
+def matching_path(search_paths: list[str], filename: str) -> str | None:
+    """Get the matching file path with the given filename, if it exists.
+
+    :param search_paths: The file paths to search through.
+    :param filename: The file name to match.
+    :returns: The unique file path with the matching name, or None if no such
+      match was found.
+    :throws Exception: If multiple paths shared the same file name.
+    """
+    matching = [path for path in search_paths if os.path.basename(path) == filename]
+    if not matching:
+        return None
+    if len(matching) > 1:
+        raise Exception("Multiple occurrences of {filename}")
+    return matching[0]
+
+
+def git_file_content(git_ref: str, path: str | None) -> str | None:
+    """Get the file content of the specified git blob object.
+
+    :param git_ref: The reference for the tree to find the file under.
+    :param path: The file path for the object, or None if there is no path.
+    :returns: The file content, or None if no path was given.
+    """
+    if path is None:
+        return None
+    return git_text(["cat-file", "blob", f"{git_ref}:{path}"])
+
+
+def get_stable_branch(branch_prefix: str) -> str:
+    """Find the most recent stable branch in the origin repository.
+
+    :param branch_prefix: The prefix that the stable branch should have.
+    :returns: The branch name.
+    """
+    tag_glob = f"{branch_prefix}-*-build1"
+    # To speed up, only fetch the tags without blobs.
+    git_run(
+        ["fetch", "--depth=1", "--filter=object:type=tag", "origin", "tag", tag_glob]
+    )
+    # Get most recent stable tag.
+    for build_tag, annotation in (
+        line.split(" ", 1)
+        for line in git_lines(["tag", "-n1", "--list", tag_glob, "--sort=-taggerdate"])
+    ):
+        if "stable" in annotation:
+            # Branch name is the same as the tag, minus "-build1".
+            return re.sub(r"-build1$", "", build_tag)
+    raise Exception("No stable build1 tag found")
+
+
+def get_version_from_branch_name(branch_name: str) -> tuple[str, float]:
+    """Get the branch prefix and version from its name.
+
+    :param branch_name: The branch to extract from.
+    :returns: The branch prefix and its version number.
+    """
+    version_match = re.match(
+        r"([a-z-]+)-[^-]*-([0-9]+\.[05])-",
+        branch_name,
+    )
+
+    if not version_match:
+        raise ValueError(f"Unable to parse the version from the branch {branch_name}")
+
+    return (version_match.group(1), float(version_match.group(2)))
+
+
+branch_prefix, current_version = get_version_from_branch_name(args.current_branch)
+
+stable_branch = get_stable_branch(branch_prefix)
+_, stable_version = get_version_from_branch_name(stable_branch)
+
+if stable_version > current_version or stable_version < current_version - 0.5:
+    raise Exception(
+        f"Version of stable branch {stable_branch} is not within 0.5 of the "
+        f"current branch {args.current_branch}"
+    )
+
+# Minimal fetch of stable_branch.
+# Individual file blobs will be downloaded as needed.
+git_run(["fetch", "--depth=1", "--filter=blob:none", "origin", stable_branch])
+
+current_file_paths = git_file_paths("HEAD")
+old_file_paths = git_file_paths(f"origin/{stable_branch}")
+
+ci_commit = os.environ.get("CI_COMMIT_SHA", "")
+ci_url_base = os.environ.get("CI_PROJECT_URL", "")
+
+json_data = {
+    "commit": ci_commit,
+    "commit-url": f"{ci_url_base}/-/commit/{ci_commit}"
+    if (ci_commit and ci_url_base)
+    else "",
+    "project-path": os.environ.get("CI_PROJECT_PATH", ""),
+    "current-branch": args.current_branch,
+    "stable-branch": stable_branch,
+    "files": [],
+}
+
+for translation_branch, name in (
+    part.strip().split(":", 1) for part in args.filenames.split(" ") if part.strip()
+):
+    current_path = matching_path(current_file_paths, name)
+    old_path = matching_path(old_file_paths, name)
+
+    if current_path is None and old_path is None:
+        # No file in either branch.
+        logger.warning(f"{name} does not exist in either the current or stable branch")
+    elif current_path is None:
+        logger.warning(f"{name} deleted in the current branch")
+    elif old_path is None:
+        logger.warning(f"{name} does not exist in the stable branch")
+
+    content = combine_files(
+        name,
+        git_file_content("HEAD", current_path),
+        git_file_content(f"origin/{stable_branch}", old_path),
+        f"Will be unused in Tor Browser {current_version}!",
+    )
+    json_data["files"].append(
+        {
+            "name": name,
+            "branch": translation_branch,
+            "content": content,
+        }
+    )
+
+with open(args.outname, "w") as file:
+    json.dump(json_data, file)
--- a/tools/torbrowser/l10n/combine/__init__.py
+++ b/tools/torbrowser/l10n/combine/__init__.py
+# flake8: noqa
+
+from .combine import combine_files
--- a/tools/torbrowser/l10n/combine/combine.py
+++ b/tools/torbrowser/l10n/combine/combine.py
+import re
+from typing import TYPE_CHECKING, Any
+
+from compare_locales.parser import getParser
+from compare_locales.parser.android import AndroidEntity, DocumentWrapper
+from compare_locales.parser.base import Comment, Entity, Junk, Whitespace
+from compare_locales.parser.dtd import DTDEntity
+from compare_locales.parser.fluent import FluentComment, FluentEntity
+from compare_locales.parser.properties import PropertiesEntity
+
+if TYPE_CHECKING:
+    from collections.abc import Iterable
+
+
+def combine_files(
+    filename: str,
+    new_content: str | None,
+    old_content: str | None,
+    comment_prefix: str,
+) -> str | None:
+    """Combine two translation files into one to include all strings from both.
+    The new content is presented first, and any strings only found in the old
+    content are placed at the end with an additional comment.
+
+    :param filename: The filename for the file, determines the format.
+    :param new_content: The new content for the file, or None if it has been
+      deleted.
+    :param old_content: The old content for the file, or None if it did not
+      exist before.
+    :comment_prefix: A comment to include for any strings that are only found in
+      the old content. This will be placed before any other comments for the
+      string.
+
+    :returns: The combined content, or None if both given contents are None.
+    """
+    if new_content is None and old_content is None:
+        return None
+
+    # getParser from compare_locale returns the same instance for the same file
+    # extension.
+    parser = getParser(filename)
+
+    is_android = filename.endswith(".xml")
+    if new_content is None:
+        if is_android:
+            # File was deleted, add some document parts.
+            content_start = (
+                '<?xml version="1.0" encoding="utf-8" standalone="yes"?>\n<resources>\n'
+            )
+            content_end = "</resources>\n"
+        else:
+            # Treat as an empty file.
+            content_start = ""
+            content_end = ""
+        existing_keys = []
+    else:
+        parser.readUnicode(new_content)
+
+        # Start with the same content as the current file.
+        # For android strings, we want to keep the final "</resources>" until after.
+        if is_android:
+            closing_match = re.match(
+                r"^(.*)(</resources>\s*)$", parser.ctx.contents, re.DOTALL
+            )
+            if not closing_match:
+                raise ValueError("Missing a final </resources>")
+            content_start = closing_match.group(1)
+            content_end = closing_match.group(2)
+        else:
+            content_start = parser.ctx.contents
+            content_end = ""
+        existing_keys = [entry.key for entry in parser.walk(only_localizable=True)]
+
+    # For Fluent, we want to prefix the strings using GroupComments.
+    # On weblate this will cause all the strings that fall under the GroupComment's
+    # scope to have the prefix added to their "notes".
+    # We set up an initial GroupComment for the first string we find. This will also
+    # end the scope of the last GroupComment in the new translation file.
+    # This will be replaced with a the next GroupComment when it is found.
+    fluent_group_comment_prefix = f"\n## {comment_prefix}\n"
+    fluent_group_comment: str | None = fluent_group_comment_prefix
+
+    # For other formats, we want to keep all the comment lines that come directly
+    # before the string.
+    # In compare_locales.parser, only the comment line directly before an Entity
+    # counts as the pre_comment for that Entity. I.e. only this line will be
+    # included in Entity.all
+    # However, in weblate every comment line that comes before the Entity is
+    # included as a comment. So we also want to keep these additional comments to
+    # preserve them for weblate.
+    # We gather these extra comments in stacked_comments, and clear them whenever we
+    # reach an Entity or a blank line (Whitespace is more than "\n").
+    stacked_comments: list[str] = []
+
+    additions: list[str] = []
+
+    entry_iter: Iterable[Any] = ()
+    # If the file does not exist in the old branch, don't make any additions.
+    if old_content is not None:
+        parser.readUnicode(old_content)
+        entry_iter = parser.walk(only_localizable=False)
+    for entry in entry_iter:
+        if isinstance(entry, Junk):
+            raise ValueError(f"Unexpected Junk: {entry.all}")
+        if isinstance(entry, Whitespace):
+            # Clear stacked comments if more than one empty line.
+            if entry.all != "\n":
+                stacked_comments.clear()
+            continue
+        if isinstance(entry, Comment):
+            if isinstance(entry, FluentComment):
+                # Don't stack Fluent comments.
+                # Only the comments included in Entity.pre_comment count towards
+                # that Entity's comment.
+                if entry.all.startswith("##"):
+                    # A Fluent GroupComment
+                    if entry.all == "##":
+                        # Empty GroupComment. Used to end the scope of a previous
+                        # GroupComment.
+                        # Replace this with our prefix comment.
+                        fluent_group_comment = fluent_group_comment_prefix
+                    else:
+                        # Prefix the group comment.
+                        fluent_group_comment = (
+                            f"{fluent_group_comment_prefix}{entry.all}\n"
+                        )
+            else:
+                stacked_comments.append(entry.all)
+            continue
+        if isinstance(entry, DocumentWrapper):
+            # Not needed.
+            continue
+
+        if not isinstance(entry, Entity):
+            raise ValueError(f"Unexpected type: {entry.__class__.__name__}")
+
+        if entry.key in existing_keys:
+            # Already included this string in the new translation file.
+            # Drop the gathered comments for this Entity.
+            stacked_comments.clear()
+            continue
+
+        if isinstance(entry, FluentEntity):
+            if fluent_group_comment is not None:
+                # We have a found GroupComment which has not been included yet.
+                # All following Entity's will be under its scope, until the next
+                # GroupComment.
+                additions.append(fluent_group_comment)
+                # Added GroupComment, so don't need to add again.
+                fluent_group_comment = None
+        elif isinstance(entry, DTDEntity):
+            # Include our additional comment before we print the rest for this
+            # Entity.
+            additions.append(f"<!-- LOCALIZATION NOTE: {comment_prefix} -->")
+        elif isinstance(entry, PropertiesEntity):
+            additions.append(f"# {comment_prefix}")
+        elif isinstance(entry, AndroidEntity):
+            additions.append(f"<!-- {comment_prefix} -->")
+        else:
+            raise ValueError(f"Unexpected Entity type: {entry.__class__.__name__}")
+
+        # Add any other comment lines that came directly before this Entity.
+        additions.extend(stacked_comments)
+        stacked_comments.clear()
+        additions.append(entry.all)
+
+    content_middle = ""
+
+    if additions:
+        # New line before and after the additions
+        additions.insert(0, "")
+        additions.append("")
+        if is_android:
+            content_middle = "\n    ".join(additions)
+        else:
+            content_middle = "\n".join(additions)
+
+        # Remove " " in otherwise blank lines.
+        content_middle = re.sub("^ +$", "", content_middle, flags=re.MULTILINE)
+
+    return content_start + content_middle + content_end
--- a/tools/torbrowser/l10n/combine/tests/README
+++ b/tools/torbrowser/l10n/combine/tests/README
+python tests to be run with pytest.
+Requires the compare-locales package.
--- a/tools/torbrowser/l10n/combine/tests/__init__.py
+++ b/tools/torbrowser/l10n/combine/tests/__init__.py
--- a/tools/torbrowser/l10n/combine/tests/test_android.py
+++ b/tools/torbrowser/l10n/combine/tests/test_android.py
+import textwrap
+
+from combine import combine_files
+
+
+def wrap_in_xml(content):
+    if content is None:
+        return None
+    # Allow for indents to make the tests more readable.
+    content = textwrap.dedent(content)
+    return f"""\
+<?xml version="1.0" encoding="utf-8" standalone="yes"?>
+<resources>
+{textwrap.indent(content, "    ")}</resources>
+"""
+
+
+def assert_result(new_content, old_content, expect):
+    new_content = wrap_in_xml(new_content)
+    old_content = wrap_in_xml(old_content)
+    expect = wrap_in_xml(expect)
+    assert expect == combine_files(
+        "test_strings.xml", new_content, old_content, "REMOVED STRING"
+    )
+
+
+def test_combine_empty():
+    assert_result(None, None, None)
+
+
+def test_combine_new_file():
+    # New file with no old content.
+    assert_result(
+        """\
+        <string name="string_1">First</string>
+        <string name="string_2">Second</string>
+        """,
+        None,
+        """\
+        <string name="string_1">First</string>
+        <string name="string_2">Second</string>
+        """,
+    )
+
+
+def test_combine_removed_file():
+    # Entire file was removed.
+    assert_result(
+        None,
+        """\
+        <string name="string_1">First</string>
+        <string name="string_2">Second</string>
+        """,
+        """\
+
+        <!-- REMOVED STRING -->
+        <string name="string_1">First</string>
+        <!-- REMOVED STRING -->
+        <string name="string_2">Second</string>
+        """,
+    )
+
+
+def test_no_change():
+    content = """\
+        <string name="string_1">First</string>
+        <string name="string_2">Second</string>
+        """
+    assert_result(content, content, content)
+
+
+def test_added_string():
+    assert_result(
+        """\
+        <string name="string_1">First</string>
+        <string name="string_new">NEW</string>
+        <string name="string_2">Second</string>
+        """,
+        """\
+        <string name="string_1">First</string>
+        <string name="string_2">Second</string>
+        """,
+        """\
+        <string name="string_1">First</string>
+        <string name="string_new">NEW</string>
+        <string name="string_2">Second</string>
+        """,
+    )
+
+
+def test_removed_string():
+    assert_result(
+        """\
+        <string name="string_1">First</string>
+        <string name="string_2">Second</string>
+        """,
+        """\
+        <string name="string_1">First</string>
+        <string name="removed">REMOVED</string>
+        <string name="string_2">Second</string>
+        """,
+        """\
+        <string name="string_1">First</string>
+        <string name="string_2">Second</string>
+
+        <!-- REMOVED STRING -->
+        <string name="removed">REMOVED</string>
+        """,
+    )
+
+
+def test_removed_and_added():
+    assert_result(
+        """\
+        <string name="new_1">New string</string>
+        <string name="string_1">First</string>
+        <string name="string_2">Second</string>
+        <string name="new_2">New string 2</string>
+        """,
+        """\
+        <string name="string_1">First</string>
+        <string name="removed_1">First removed</string>
+        <string name="removed_2">Second removed</string>
+        <string name="string_2">Second</string>
+        <string name="removed_3">Third removed</string>
+        """,
+        """\
+        <string name="new_1">New string</string>
+        <string name="string_1">First</string>
+        <string name="string_2">Second</string>
+        <string name="new_2">New string 2</string>
+
+        <!-- REMOVED STRING -->
+        <string name="removed_1">First removed</string>
+        <!-- REMOVED STRING -->
+        <string name="removed_2">Second removed</string>
+        <!-- REMOVED STRING -->
+        <string name="removed_3">Third removed</string>
+        """,
+    )
+
+
+def test_updated():
+    # String content was updated.
+    assert_result(
+        """\
+        <string name="changed_string">NEW</string>
+        """,
+        """\
+        <string name="changed_string">OLD</string>
+        """,
+        """\
+        <string name="changed_string">NEW</string>
+        """,
+    )
+
+
+def test_updated_comment():
+    # String comment was updated.
+    assert_result(
+        """\
+        <!-- NEW -->
+        <string name="changed_string">string</string>
+        """,
+        """\
+        <!-- OLD -->
+        <string name="changed_string">string</string>
+        """,
+        """\
+        <!-- NEW -->
+        <string name="changed_string">string</string>
+        """,
+    )
+    # Comment added.
+    assert_result(
+        """\
+        <!-- NEW -->
+        <string name="changed_string">string</string>
+        """,
+        """\
+        <string name="changed_string">string</string>
+        """,
+        """\
+        <!-- NEW -->
+        <string name="changed_string">string</string>
+        """,
+    )
+    # Comment removed.
+    assert_result(
+        """\
+        <string name="changed_string">string</string>
+        """,
+        """\
+        <!-- OLD -->
+        <string name="changed_string">string</string>
+        """,
+        """\
+        <string name="changed_string">string</string>
+        """,
+    )
+
+    # With file comments
+    assert_result(
+        """\
+        <!-- NEW file comment -->
+
+        <!-- NEW -->
+        <string name="changed_string">string</string>
+        """,
+        """\
+        <!-- OLD file comment -->
+
+        <!-- OLD -->
+        <string name="changed_string">string</string>
+        """,
+        """\
+        <!-- NEW file comment -->
+
+        <!-- NEW -->
+        <string name="changed_string">string</string>
+        """,
+    )
+
+
+def test_reordered():
+    # String was re_ordered.
+    assert_result(
+        """\
+        <string name="string_1">value</string>
+        <string name="moved_string">move</string>
+        """,
+        """\
+        <string name="moved_string">move</string>
+        <string name="string_1">value</string>
+        """,
+        """\
+        <string name="string_1">value</string>
+        <string name="moved_string">move</string>
+        """,
+    )
+
+
+def test_removed_string_with_comment():
+    assert_result(
+        """\
+        <!-- Comment for first. -->
+        <string name="string_1">First</string>
+        <string name="string_2">Second</string>
+        """,
+        """\
+        <!-- Comment for first. -->
+        <string name="string_1">First</string>
+        <!-- Comment for removed. -->
+        <string name="removed">REMOVED</string>
+        <string name="string_2">Second</string>
+        """,
+        """\
+        <!-- Comment for first. -->
+        <string name="string_1">First</string>
+        <string name="string_2">Second</string>
+
+        <!-- REMOVED STRING -->
+        <!-- Comment for removed. -->
+        <string name="removed">REMOVED</string>
+        """,
+    )
+
+    # With file comments and multi-line.
+    # All comments prior to a removed string are moved with it, until another
+    # entity or blank line is reached.
+    assert_result(
+        """\
+        <!-- First File comment -->
+
+        <!-- Comment for first. -->
+        <!-- Comment 2 for first. -->
+        <string name="string_1">First</string>
+
+        <!-- Second -->
+        <!-- File comment -->
+
+        <string name="string_2">Second</string>
+        """,
+        """\
+        <!-- First File comment -->
+
+        <!-- Comment for first. -->
+        <!-- Comment 2 for first. -->
+        <string name="string_1">First</string>
+        <string name="removed_1">First removed</string>
+        <!-- Comment for second removed. -->
+        <string name="removed_2">Second removed</string>
+
+        <!-- Removed file comment -->
+
+        <!-- Comment 1 for third removed -->
+        <!-- Comment 2 for third removed -->
+        <string name="removed_3">Third removed</string>
+
+        <!-- Second -->
+        <!-- File comment -->
+
+        <string name="removed_4">Fourth removed</string>
+        <string name="string_2">Second</string>
+        """,
+        """\
+        <!-- First File comment -->
+
+        <!-- Comment for first. -->
+        <!-- Comment 2 for first. -->
+        <string name="string_1">First</string>
+
+        <!-- Second -->
+        <!-- File comment -->
+
+        <string name="string_2">Second</string>
+
+        <!-- REMOVED STRING -->
+        <string name="removed_1">First removed</string>
+        <!-- REMOVED STRING -->
+        <!-- Comment for second removed. -->
+        <string name="removed_2">Second removed</string>
+        <!-- REMOVED STRING -->
+        <!-- Comment 1 for third removed -->
+        <!-- Comment 2 for third removed -->
+        <string name="removed_3">Third removed</string>
+        <!-- REMOVED STRING -->
+        <string name="removed_4">Fourth removed</string>
+        """,
+    )
--- a/tools/torbrowser/l10n/combine/tests/test_dtd.py
+++ b/tools/torbrowser/l10n/combine/tests/test_dtd.py
+import textwrap
+
+from combine import combine_files
+
+
+def assert_result(new_content, old_content, expect):
+    # Allow for indents to make the tests more readable.
+    if new_content is not None:
+        new_content = textwrap.dedent(new_content)
+    if old_content is not None:
+        old_content = textwrap.dedent(old_content)
+    if expect is not None:
+        expect = textwrap.dedent(expect)
+    assert expect == combine_files(
+        "test.dtd", new_content, old_content, "REMOVED STRING"
+    )
+
+
+def test_combine_empty():
+    assert_result(None, None, None)
+
+
+def test_combine_new_file():
+    # New file with no old content.
+    assert_result(
+        """\
+        <!ENTITY string.1 "First">
+        <!ENTITY string.2 "Second">
+        """,
+        None,
+        """\
+        <!ENTITY string.1 "First">
+        <!ENTITY string.2 "Second">
+        """,
+    )
+
+
+def test_combine_removed_file():
+    # Entire file was removed.
+    assert_result(
+        None,
+        """\
+        <!ENTITY string.1 "First">
+        <!ENTITY string.2 "Second">
+        """,
+        """\
+
+        <!-- LOCALIZATION NOTE: REMOVED STRING -->
+        <!ENTITY string.1 "First">
+        <!-- LOCALIZATION NOTE: REMOVED STRING -->
+        <!ENTITY string.2 "Second">
+        """,
+    )
+
+
+def test_no_change():
+    content = """\
+        <!ENTITY string.1 "First">
+        <!ENTITY string.2 "Second">
+        """
+    assert_result(content, content, content)
+
+
+def test_added_string():
+    assert_result(
+        """\
+        <!ENTITY string.1 "First">
+        <!ENTITY string.new "NEW">
+        <!ENTITY string.2 "Second">
+        """,
+        """\
+        <!ENTITY string.1 "First">
+        <!ENTITY string.2 "Second">
+        """,
+        """\
+        <!ENTITY string.1 "First">
+        <!ENTITY string.new "NEW">
+        <!ENTITY string.2 "Second">
+        """,
+    )
+
+
+def test_removed_string():
+    assert_result(
+        """\
+        <!ENTITY string.1 "First">
+        <!ENTITY string.2 "Second">
+        """,
+        """\
+        <!ENTITY string.1 "First">
+        <!ENTITY removed "REMOVED">
+        <!ENTITY string.2 "Second">
+        """,
+        """\
+        <!ENTITY string.1 "First">
+        <!ENTITY string.2 "Second">
+
+        <!-- LOCALIZATION NOTE: REMOVED STRING -->
+        <!ENTITY removed "REMOVED">
+        """,
+    )
+
+
+def test_removed_and_added():
+    assert_result(
+        """\
+        <!ENTITY new.1 "New string">
+        <!ENTITY string.1 "First">
+        <!ENTITY string.2 "Second">
+        <!ENTITY new.2 "New string 2">
+        """,
+        """\
+        <!ENTITY string.1 "First">
+        <!ENTITY removed.1 "First removed">
+        <!ENTITY removed.2 "Second removed">
+        <!ENTITY string.2 "Second">
+        <!ENTITY removed.3 "Third removed">
+        """,
+        """\
+        <!ENTITY new.1 "New string">
+        <!ENTITY string.1 "First">
+        <!ENTITY string.2 "Second">
+        <!ENTITY new.2 "New string 2">
+
+        <!-- LOCALIZATION NOTE: REMOVED STRING -->
+        <!ENTITY removed.1 "First removed">
+        <!-- LOCALIZATION NOTE: REMOVED STRING -->
+        <!ENTITY removed.2 "Second removed">
+        <!-- LOCALIZATION NOTE: REMOVED STRING -->
+        <!ENTITY removed.3 "Third removed">
+        """,
+    )
+
+
+def test_updated():
+    # String content was updated.
+    assert_result(
+        """\
+        <!ENTITY changed.string "NEW">
+        """,
+        """\
+        <!ENTITY changed.string "OLD">
+        """,
+        """\
+        <!ENTITY changed.string "NEW">
+        """,
+    )
+
+
+def test_updated_comment():
+    # String comment was updated.
+    assert_result(
+        """\
+        <!-- LOCALIZATION NOTE: NEW -->
+        <!ENTITY changed.string "string">
+        """,
+        """\
+        <!-- LOCALIZATION NOTE: OLD -->
+        <!ENTITY changed.string "string">
+        """,
+        """\
+        <!-- LOCALIZATION NOTE: NEW -->
+        <!ENTITY changed.string "string">
+        """,
+    )
+    # Comment added.
+    assert_result(
+        """\
+        <!-- LOCALIZATION NOTE: NEW -->
+        <!ENTITY changed.string "string">
+        """,
+        """\
+        <!ENTITY changed.string "string">
+        """,
+        """\
+        <!-- LOCALIZATION NOTE: NEW -->
+        <!ENTITY changed.string "string">
+        """,
+    )
+    # Comment removed.
+    assert_result(
+        """\
+        <!ENTITY changed.string "string">
+        """,
+        """\
+        <!-- LOCALIZATION NOTE: OLD -->
+        <!ENTITY changed.string "string">
+        """,
+        """\
+        <!ENTITY changed.string "string">
+        """,
+    )
+
+    # With multiple comments
+    assert_result(
+        """\
+        <!-- NEW FILE COMMENT -->
+
+        <!-- LOCALIZATION NOTE: NEW -->
+        <!ENTITY changed.string "string">
+        """,
+        """\
+        <!-- OLD -->
+
+        <!-- LOCALIZATION NOTE: OLD -->
+        <!ENTITY changed.string "string">
+        """,
+        """\
+        <!-- NEW FILE COMMENT -->
+
+        <!-- LOCALIZATION NOTE: NEW -->
+        <!ENTITY changed.string "string">
+        """,
+    )
+
+
+def test_reordered():
+    # String was re.ordered.
+    assert_result(
+        """\
+        <!ENTITY string.1 "value">
+        <!ENTITY moved.string "move">
+        """,
+        """\
+        <!ENTITY moved.string "move">
+        <!ENTITY string.1 "value">
+        """,
+        """\
+        <!ENTITY string.1 "value">
+        <!ENTITY moved.string "move">
+        """,
+    )
+
+
+def test_removed_string_with_comment():
+    assert_result(
+        """\
+        <!-- LOCALIZATION NOTE: Comment for first. -->
+        <!ENTITY string.1 "First">
+        <!ENTITY string.2 "Second">
+        """,
+        """\
+        <!-- LOCALIZATION NOTE: Comment for first. -->
+        <!ENTITY string.1 "First">
+        <!-- LOCALIZATION NOTE: Comment for removed. -->
+        <!ENTITY removed "REMOVED">
+        <!ENTITY string.2 "Second">
+        """,
+        """\
+        <!-- LOCALIZATION NOTE: Comment for first. -->
+        <!ENTITY string.1 "First">
+        <!ENTITY string.2 "Second">
+
+        <!-- LOCALIZATION NOTE: REMOVED STRING -->
+        <!-- LOCALIZATION NOTE: Comment for removed. -->
+        <!ENTITY removed "REMOVED">
+        """,
+    )
+
+    # With multiple lines of comments.
+
+    assert_result(
+        """\
+        <!-- First file comment -->
+
+        <!-- LOCALIZATION NOTE: Comment for first. -->
+        <!-- LOCALIZATION NOTE: Comment 2 for first. -->
+        <!ENTITY string.1 "First">
+
+        <!-- Second
+           - file
+           - comment -->
+
+        <!ENTITY string.2 "Second">
+        """,
+        """\
+        <!-- First file comment -->
+
+        <!-- LOCALIZATION NOTE: Comment for first. -->
+        <!ENTITY string.1 "First">
+        <!ENTITY removed.1 "First removed">
+        <!-- LOCALIZATION NOTE: Comment for second removed. -->
+        <!ENTITY removed.2 "Second removed">
+
+        <!-- Removed file comment -->
+
+        <!-- LOCALIZATION NOTE: Comment for third removed. -->
+        <!-- LOCALIZATION NOTE: Comment 2 for
+        third removed. -->
+        <!ENTITY removed.3 "Third removed">
+
+        <!-- Second
+           - file
+           - comment -->
+
+        <!ENTITY removed.4 "Fourth removed">
+        <!ENTITY string.2 "Second">
+        """,
+        """\
+        <!-- First file comment -->
+
+        <!-- LOCALIZATION NOTE: Comment for first. -->
+        <!-- LOCALIZATION NOTE: Comment 2 for first. -->
+        <!ENTITY string.1 "First">
+
+        <!-- Second
+           - file
+           - comment -->
+
+        <!ENTITY string.2 "Second">
+
+        <!-- LOCALIZATION NOTE: REMOVED STRING -->
+        <!ENTITY removed.1 "First removed">
+        <!-- LOCALIZATION NOTE: REMOVED STRING -->
+        <!-- LOCALIZATION NOTE: Comment for second removed. -->
+        <!ENTITY removed.2 "Second removed">
+        <!-- LOCALIZATION NOTE: REMOVED STRING -->
+        <!-- LOCALIZATION NOTE: Comment for third removed. -->
+        <!-- LOCALIZATION NOTE: Comment 2 for
+        third removed. -->
+        <!ENTITY removed.3 "Third removed">
+        <!-- LOCALIZATION NOTE: REMOVED STRING -->
+        <!ENTITY removed.4 "Fourth removed">
+        """,
+    )
--- a/tools/torbrowser/l10n/combine/tests/test_fluent.py
+++ b/tools/torbrowser/l10n/combine/tests/test_fluent.py
+import textwrap
+
+from combine import combine_files
+
+
+def assert_result(new_content, old_content, expect):
+    # Allow for indents to make the tests more readable.
+    if new_content is not None:
+        new_content = textwrap.dedent(new_content)
+    if old_content is not None:
+        old_content = textwrap.dedent(old_content)
+    if expect is not None:
+        expect = textwrap.dedent(expect)
+    assert expect == combine_files(
+        "test.ftl", new_content, old_content, "REMOVED STRING"
+    )
+
+
+def test_combine_empty():
+    assert_result(None, None, None)
+
+
+def test_combine_new_file():
+    # New file with no old content.
+    assert_result(
+        """\
+        string-1 = First
+        string-2 = Second
+        """,
+        None,
+        """\
+        string-1 = First
+        string-2 = Second
+        """,
+    )
+
+
+def test_combine_removed_file():
+    # Entire file was removed.
+    assert_result(
+        None,
+        """\
+        string-1 = First
+        string-2 = Second
+        """,
+        """\
+
+
+        ## REMOVED STRING
+
+        string-1 = First
+        string-2 = Second
+        """,
+    )
+
+
+def test_no_change():
+    content = """\
+        string-1 = First
+        string-2 = Second
+        """
+    assert_result(content, content, content)
+
+
+def test_added_string():
+    assert_result(
+        """\
+        string-1 = First
+        string-new = NEW
+        string-2 = Second
+        """,
+        """\
+        string-1 = First
+        string-2 = Second
+        """,
+        """\
+        string-1 = First
+        string-new = NEW
+        string-2 = Second
+        """,
+    )
+
+
+def test_removed_string():
+    assert_result(
+        """\
+        string-1 = First
+        string-2 = Second
+        """,
+        """\
+        string-1 = First
+        removed = REMOVED
+        string-2 = Second
+        """,
+        """\
+        string-1 = First
+        string-2 = Second
+
+
+        ## REMOVED STRING
+
+        removed = REMOVED
+        """,
+    )
+
+
+def test_removed_and_added():
+    assert_result(
+        """\
+        new-1 = New string
+        string-1 =
+            .attr = First
+        string-2 = Second
+        new-2 =
+            .title = New string 2
+        """,
+        """\
+        string-1 =
+            .attr = First
+        removed-1 = First removed
+        removed-2 =
+            .attr = Second removed
+        string-2 = Second
+        removed-3 = Third removed
+        """,
+        """\
+        new-1 = New string
+        string-1 =
+            .attr = First
+        string-2 = Second
+        new-2 =
+            .title = New string 2
+
+
+        ## REMOVED STRING
+
+        removed-1 = First removed
+        removed-2 =
+            .attr = Second removed
+        removed-3 = Third removed
+        """,
+    )
+
+
+def test_updated():
+    # String content was updated.
+    assert_result(
+        """\
+        changed-string = NEW
+        """,
+        """\
+        changed-string = OLD
+        """,
+        """\
+        changed-string = NEW
+        """,
+    )
+
+
+def test_updated_comment():
+    # String comment was updated.
+    assert_result(
+        """\
+        # NEW
+        changed-string = string
+        """,
+        """\
+        # OLD
+        changed-string = string
+        """,
+        """\
+        # NEW
+        changed-string = string
+        """,
+    )
+    # Comment added.
+    assert_result(
+        """\
+        # NEW
+        changed-string = string
+        """,
+        """\
+        changed-string = string
+        """,
+        """\
+        # NEW
+        changed-string = string
+        """,
+    )
+    # Comment removed.
+    assert_result(
+        """\
+        changed-string = string
+        """,
+        """\
+        # OLD
+        changed-string = string
+        """,
+        """\
+        changed-string = string
+        """,
+    )
+
+    # With group comments.
+    assert_result(
+        """\
+        ## GROUP NEW
+
+        # NEW
+        changed-string = string
+        """,
+        """\
+        ## GROUP OLD
+
+        # OLD
+        changed-string = string
+        """,
+        """\
+        ## GROUP NEW
+
+        # NEW
+        changed-string = string
+        """,
+    )
+
+
+def test_reordered():
+    # String was re-ordered.
+    assert_result(
+        """\
+        string-1 = value
+        moved-string = move
+        """,
+        """\
+        moved-string = move
+        string-1 = value
+        """,
+        """\
+        string-1 = value
+        moved-string = move
+        """,
+    )
+
+
+def test_removed_string_with_comment():
+    assert_result(
+        """\
+        # Comment for first.
+        string-1 = First
+        string-2 = Second
+        """,
+        """\
+        # Comment for first.
+        string-1 = First
+        # Comment for removed.
+        removed = REMOVED
+        string-2 = Second
+        """,
+        """\
+        # Comment for first.
+        string-1 = First
+        string-2 = Second
+
+
+        ## REMOVED STRING
+
+        # Comment for removed.
+        removed = REMOVED
+        """,
+    )
+
+    # Group comments are combined with the "REMOVED STRING" comments.
+    # If strings have no group comment, then a single "REMOVED STRING" is
+    # included for them.
+    assert_result(
+        """\
+        ## First Group comment
+
+        # Comment for first.
+        string-1 = First
+
+        ##
+
+        no-group = No group comment
+
+        ## Second
+        ## Group comment
+
+        string-2 = Second
+        """,
+        """\
+        ## First Group comment
+
+        # Comment for first.
+        string-1 = First
+        removed-1 = First removed
+        # Comment for second removed.
+        removed-2 = Second removed
+
+        ##
+
+        no-group = No group comment
+        removed-3 = Third removed
+
+        ## Second
+        ## Group comment
+
+        removed-4 = Fourth removed
+        string-2 = Second
+        """,
+        """\
+        ## First Group comment
+
+        # Comment for first.
+        string-1 = First
+
+        ##
+
+        no-group = No group comment
+
+        ## Second
+        ## Group comment
+
+        string-2 = Second
+
+
+        ## REMOVED STRING
+        ## First Group comment
+
+        removed-1 = First removed
+        # Comment for second removed.
+        removed-2 = Second removed
+
+        ## REMOVED STRING
+
+        removed-3 = Third removed
+
+        ## REMOVED STRING
+        ## Second
+        ## Group comment
+
+        removed-4 = Fourth removed
+        """,
+    )
--- a/tools/torbrowser/l10n/combine/tests/test_properties.py
+++ b/tools/torbrowser/l10n/combine/tests/test_properties.py
+import textwrap
+
+from combine import combine_files
+
+
+def assert_result(new_content, old_content, expect):
+    # Allow for indents to make the tests more readable.
+    if new_content is not None:
+        new_content = textwrap.dedent(new_content)
+    if old_content is not None:
+        old_content = textwrap.dedent(old_content)
+    if expect is not None:
+        expect = textwrap.dedent(expect)
+    assert expect == combine_files(
+        "test.properties", new_content, old_content, "REMOVED STRING"
+    )
+
+
+def test_combine_empty():
+    assert_result(None, None, None)
+
+
+def test_combine_new_file():
+    # New file with no old content.
+    assert_result(
+        """\
+        string.1 = First
+        string.2 = Second
+        """,
+        None,
+        """\
+        string.1 = First
+        string.2 = Second
+        """,
+    )
+
+
+def test_combine_removed_file():
+    # Entire file was removed.
+    assert_result(
+        None,
+        """\
+        string.1 = First
+        string.2 = Second
+        """,
+        """\
+
+        # REMOVED STRING
+        string.1 = First
+        # REMOVED STRING
+        string.2 = Second
+        """,
+    )
+
+
+def test_no_change():
+    content = """\
+        string.1 = First
+        string.2 = Second
+        """
+    assert_result(content, content, content)
+
+
+def test_added_string():
+    assert_result(
+        """\
+        string.1 = First
+        string.new = NEW
+        string.2 = Second
+        """,
+        """\
+        string.1 = First
+        string.2 = Second
+        """,
+        """\
+        string.1 = First
+        string.new = NEW
+        string.2 = Second
+        """,
+    )
+
+
+def test_removed_string():
+    assert_result(
+        """\
+        string.1 = First
+        string.2 = Second
+        """,
+        """\
+        string.1 = First
+        removed = REMOVED
+        string.2 = Second
+        """,
+        """\
+        string.1 = First
+        string.2 = Second
+
+        # REMOVED STRING
+        removed = REMOVED
+        """,
+    )
+
+
+def test_removed_and_added():
+    assert_result(
+        """\
+        new.1 = New string
+        string.1 = First
+        string.2 = Second
+        new.2 = New string 2
+        """,
+        """\
+        string.1 = First
+        removed.1 = First removed
+        removed.2 = Second removed
+        string.2 = Second
+        removed.3 = Third removed
+        """,
+        """\
+        new.1 = New string
+        string.1 = First
+        string.2 = Second
+        new.2 = New string 2
+
+        # REMOVED STRING
+        removed.1 = First removed
+        # REMOVED STRING
+        removed.2 = Second removed
+        # REMOVED STRING
+        removed.3 = Third removed
+        """,
+    )
+
+
+def test_updated():
+    # String content was updated.
+    assert_result(
+        """\
+        changed.string = NEW
+        """,
+        """\
+        changed.string = OLD
+        """,
+        """\
+        changed.string = NEW
+        """,
+    )
+
+
+def test_updated_comment():
+    # String comment was updated.
+    assert_result(
+        """\
+        # NEW
+        changed.string = string
+        """,
+        """\
+        # OLD
+        changed.string = string
+        """,
+        """\
+        # NEW
+        changed.string = string
+        """,
+    )
+    # Comment added.
+    assert_result(
+        """\
+        # NEW
+        changed.string = string
+        """,
+        """\
+        changed.string = string
+        """,
+        """\
+        # NEW
+        changed.string = string
+        """,
+    )
+    # Comment removed.
+    assert_result(
+        """\
+        changed.string = string
+        """,
+        """\
+        # OLD
+        changed.string = string
+        """,
+        """\
+        changed.string = string
+        """,
+    )
+
+    # With file comments
+    assert_result(
+        """\
+        # NEW file comment
+
+        # NEW
+        changed.string = string
+        """,
+        """\
+        # OLD file comment
+
+        # OLD
+        changed.string = string
+        """,
+        """\
+        # NEW file comment
+
+        # NEW
+        changed.string = string
+        """,
+    )
+
+
+def test_reordered():
+    # String was re.ordered.
+    assert_result(
+        """\
+        string.1 = value
+        moved.string = move
+        """,
+        """\
+        moved.string = move
+        string.1 = value
+        """,
+        """\
+        string.1 = value
+        moved.string = move
+        """,
+    )
+
+
+def test_removed_string_with_comment():
+    assert_result(
+        """\
+        # Comment for first.
+        string.1 = First
+        string.2 = Second
+        """,
+        """\
+        # Comment for first.
+        string.1 = First
+        # Comment for removed.
+        removed = REMOVED
+        string.2 = Second
+        """,
+        """\
+        # Comment for first.
+        string.1 = First
+        string.2 = Second
+
+        # REMOVED STRING
+        # Comment for removed.
+        removed = REMOVED
+        """,
+    )
+
+    # With file comments and multi-line.
+    # All comments prior to a removed string are moved with it, until another
+    # entity or blank line is reached.
+    assert_result(
+        """\
+        # First File comment
+
+        # Comment for first.
+        # Comment 2 for first.
+        string.1 = First
+
+        # Second
+        # File comment
+
+        string.2 = Second
+        """,
+        """\
+        # First File comment
+
+        # Comment for first.
+        # Comment 2 for first.
+        string.1 = First
+        removed.1 = First removed
+        # Comment for second removed.
+        removed.2 = Second removed
+
+        # Removed file comment
+
+        # Comment 1 for third removed
+        # Comment 2 for third removed
+        removed.3 = Third removed
+
+        # Second
+        # File comment
+
+        removed.4 = Fourth removed
+        string.2 = Second
+        """,
+        """\
+        # First File comment
+
+        # Comment for first.
+        # Comment 2 for first.
+        string.1 = First
+
+        # Second
+        # File comment
+
+        string.2 = Second
+
+        # REMOVED STRING
+        removed.1 = First removed
+        # REMOVED STRING
+        # Comment for second removed.
+        removed.2 = Second removed
+        # REMOVED STRING
+        # Comment 1 for third removed
+        # Comment 2 for third removed
+        removed.3 = Third removed
+        # REMOVED STRING
+        removed.4 = Fourth removed
+        """,
+    )