Commit a2fdbf3c authored by Karsten Loesing's avatar Karsten Loesing
Browse files

Move lower-level sanitizing code to its own class.

Part of #20542.
parent c0ee1a6c
Loading
Loading
Loading
Loading
+30 −374
Original line number Diff line number Diff line
@@ -3,8 +3,6 @@

package org.torproject.metrics.collector.bridgedescs;

import static java.time.ZoneOffset.UTC;

import org.torproject.descriptor.BridgeExtraInfoDescriptor;
import org.torproject.descriptor.BridgeNetworkStatus;
import org.torproject.descriptor.BridgeServerDescriptor;
@@ -35,18 +33,12 @@ import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.security.GeneralSecurityException;
import java.security.SecureRandom;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.time.Instant;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.time.temporal.ChronoUnit;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
@@ -89,26 +81,14 @@ public class SanitizedBridgesWriter extends CollecTorMain {

  private Path inputDirectory;

  private boolean replaceIpAddressesWithHashes;

  private boolean persistenceProblemWithSecrets;

  private SortedMap<String, byte[]> secretsForHashingIpAddresses;

  private String bridgeSanitizingCutOffTimestamp;

  private boolean haveWarnedAboutInterval;

  private Path bridgeIpSecretsFile;

  private SecureRandom secureRandom;

  private Path outputDirectory;

  private Path recentDirectory;

  private Path statsDirectory;

  private SensitivePartsSanitizer sensitivePartsSanitizer;

  @Override
  public String module() {
    return "bridgedescs";
@@ -128,90 +108,30 @@ public class SanitizedBridgesWriter extends CollecTorMain {
        .resolve(BRIDGE_DESCRIPTORS);
    this.inputDirectory = config.getPath(Key.BridgeLocalOrigins);
    this.statsDirectory = config.getPath(Key.StatsPath);
    this.replaceIpAddressesWithHashes =
    boolean replaceIpAddressesWithHashes =
        config.getBool(Key.ReplaceIpAddressesWithHashes);
    SimpleDateFormat rsyncCatFormat = new SimpleDateFormat(
        "yyyy-MM-dd-HH-mm-ss");
    this.rsyncCatString = rsyncCatFormat.format(
        System.currentTimeMillis());

    /* Initialize secure random number generator if we need it. */
    if (this.replaceIpAddressesWithHashes) {
      try {
        this.secureRandom = SecureRandom.getInstance("SHA1PRNG", "SUN");
      } catch (GeneralSecurityException e) {
        logger.warn("Could not initialize secure "
            + "random number generator! Not calculating any IP address "
            + "hashes in this execution!", e);
        this.persistenceProblemWithSecrets = true;
      }
    }

    /* Read hex-encoded secrets for replacing IP addresses with hashes
     * from disk. */
    this.secretsForHashingIpAddresses = new TreeMap<>();
    this.bridgeIpSecretsFile = statsDirectory.resolve("bridge-ip-secrets");
    if (Files.exists(this.bridgeIpSecretsFile)) {
      try {
        for (String line : Files.readAllLines(this.bridgeIpSecretsFile)) {
          String[] parts = line.split(",");
          if ((line.length() != ("yyyy-MM,".length() + 31 * 2)
              && line.length() != ("yyyy-MM,".length() + 50 * 2)
              && line.length() != ("yyyy-MM,".length() + 83 * 2))
              || parts.length != 2) {
            logger.warn("Invalid line in bridge-ip-secrets file "
                + "starting with '{}'! "
                + "Not calculating any IP address hashes in this "
                + "execution!", line.substring(0, 7));
            this.persistenceProblemWithSecrets = true;
            break;
          }
          String month = parts[0];
          byte[] secret = Hex.decodeHex(parts[1].toCharArray());
          this.secretsForHashingIpAddresses.put(month, secret);
        }
        if (!this.persistenceProblemWithSecrets) {
          logger.debug("Read {} secrets for hashing bridge IP addresses.",
              this.secretsForHashingIpAddresses.size());
        }
      } catch (DecoderException e) {
        logger.warn("Failed to decode hex string in {}! Not calculating any IP "
            + "address hashes in this execution!", this.bridgeIpSecretsFile, e);
        this.persistenceProblemWithSecrets = true;
      } catch (IOException e) {
        logger.warn("Failed to read {}! Not calculating any IP "
            + "address hashes in this execution!", this.bridgeIpSecretsFile, e);
        this.persistenceProblemWithSecrets = true;
      }
    }

    Path bridgeIpSecretsFile = statsDirectory.resolve("bridge-ip-secrets");
    if (replaceIpAddressesWithHashes) {
      long limitBridgeSanitizingIntervalDays
          = config.getInt(Key.BridgeDescriptorMappingsLimit);

    /* If we're configured to keep secrets only for a limited time, define
     * the cut-off day and time. */
    LocalDateTime bridgeSanitizingCutOffDateTime
        = LocalDateTime.of(1999, 12, 31, 23, 59, 59);
    if (limitBridgeSanitizingIntervalDays >= 0L) {
      LocalDateTime configuredBridgeSanitizingCutOffDateTime
          = LocalDateTime.now(UTC).minusDays(limitBridgeSanitizingIntervalDays);
      if (configuredBridgeSanitizingCutOffDateTime.isAfter(
          bridgeSanitizingCutOffDateTime)) {
        bridgeSanitizingCutOffDateTime
            = configuredBridgeSanitizingCutOffDateTime;
      }
      this.sensitivePartsSanitizer = new SensitivePartsSanitizer(
          bridgeIpSecretsFile, limitBridgeSanitizingIntervalDays);
    } else {
      this.sensitivePartsSanitizer = new SensitivePartsSanitizer();
    }
    this.bridgeSanitizingCutOffTimestamp = bridgeSanitizingCutOffDateTime
        .format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"));

    logger.info("Using cut-off datetime '{}' for secrets.",
        this.bridgeSanitizingCutOffTimestamp);

    // Import bridge descriptors
    this.readBridgeSnapshots(this.inputDirectory, this.statsDirectory);

    // Finish writing sanitized bridge descriptors to disk
    this.finishWriting();
    if (replaceIpAddressesWithHashes) {
      this.sensitivePartsSanitizer.finishWriting();
    }

    this.checkStaleDescriptors();

@@ -423,206 +343,6 @@ public class SanitizedBridgesWriter extends CollecTorMain {
    }
  }

  private String scrubOrAddress(String orAddress, byte[] fingerprintBytes,
      String published) throws IOException {
    if (!orAddress.contains(":")) {
      /* Malformed or-address or a line. */
      return null;
    }
    String addressPart = orAddress.substring(0,
        orAddress.lastIndexOf(":"));
    String portPart = orAddress.substring(orAddress.lastIndexOf(":") + 1);
    String scrubbedAddressPart;
    if (addressPart.startsWith("[")) {
      scrubbedAddressPart = this.scrubIpv6Address(addressPart,
          fingerprintBytes, published);
    } else {
      scrubbedAddressPart = this.scrubIpv4Address(addressPart,
          fingerprintBytes, published);
    }
    String scrubbedPort = this.scrubTcpPort(portPart, fingerprintBytes,
        published);
    return (scrubbedAddressPart == null ? null :
          scrubbedAddressPart + ":" + scrubbedPort);
  }

  private String scrubIpv4Address(String address, byte[] fingerprintBytes,
      String published) throws IOException {
    if (this.replaceIpAddressesWithHashes) {
      if (this.persistenceProblemWithSecrets) {
        /* There's a persistence problem, so we shouldn't scrub more IP
         * addresses in this execution. */
        return null;
      }
      byte[] hashInput = new byte[4 + 20 + 31];
      String[] ipParts = address.split("\\.");
      for (int i = 0; i < 4; i++) {
        hashInput[i] = (byte) Integer.parseInt(ipParts[i]);
      }
      System.arraycopy(fingerprintBytes, 0, hashInput, 4, 20);
      String month = published.substring(0, "yyyy-MM".length());
      byte[] secret = this.getSecretForMonth(month);
      System.arraycopy(secret, 0, hashInput, 24, 31);
      byte[] hashOutput = DigestUtils.sha256(hashInput);
      return "10."
          + (((int) hashOutput[0] + 256) % 256) + "."
          + (((int) hashOutput[1] + 256) % 256) + "."
          + (((int) hashOutput[2] + 256) % 256);
    } else {
      return "127.0.0.1";
    }
  }

  private String scrubIpv6Address(String address, byte[] fingerprintBytes,
      String published) throws IOException {
    StringBuilder sb = new StringBuilder("[fd9f:2e19:3bcf::");
    if (this.replaceIpAddressesWithHashes) {
      if (this.persistenceProblemWithSecrets) {
        /* There's a persistence problem, so we shouldn't scrub more IP
         * addresses in this execution. */
        return null;
      }
      String[] doubleColonSeparatedParts = address.substring(1,
          address.length() - 1).split("::", -1);
      if (doubleColonSeparatedParts.length > 2) {
        /* Invalid IPv6 address. */
        return null;
      }
      List<String> hexParts = new ArrayList<>();
      for (String doubleColonSeparatedPart : doubleColonSeparatedParts) {
        StringBuilder hexPart = new StringBuilder();
        String[] parts = doubleColonSeparatedPart.split(":", -1);
        if (parts.length < 1 || parts.length > 8) {
          /* Invalid IPv6 address. */
          return null;
        }
        for (String part : parts) {
          if (part.contains(".")) {
            String[] ipParts = part.split("\\.");
            byte[] ipv4Bytes = new byte[4];
            if (ipParts.length != 4) {
              /* Invalid IPv4 part in IPv6 address. */
              return null;
            }
            for (int m = 0; m < 4; m++) {
              ipv4Bytes[m] = (byte) Integer.parseInt(ipParts[m]);
            }
            hexPart.append(Hex.encodeHexString(ipv4Bytes));
          } else if (part.length() > 4) {
            /* Invalid IPv6 address. */
            return null;
          } else {
            for (int k = part.length(); k < 4; k++) {
              hexPart.append("0");
            }
            hexPart.append(part);
          }
        }
        hexParts.add(hexPart.toString());
      }
      StringBuilder hex = new StringBuilder();
      hex.append(hexParts.get(0));
      if (hexParts.size() == 2) {
        for (int i = 32 - hexParts.get(0).length()
            - hexParts.get(1).length(); i > 0; i--) {
          hex.append("0");
        }
        hex.append(hexParts.get(1));
      }
      byte[] ipBytes;
      try {
        ipBytes = Hex.decodeHex(hex.toString().toCharArray());
      } catch (DecoderException e) {
        /* TODO Invalid IPv6 address. */
        return null;
      }
      if (ipBytes.length != 16) {
        /* TODO Invalid IPv6 address. */
        return null;
      }
      byte[] hashInput = new byte[16 + 20 + 19];
      System.arraycopy(ipBytes, 0, hashInput, 0, 16);
      System.arraycopy(fingerprintBytes, 0, hashInput, 16, 20);
      String month = published.substring(0, "yyyy-MM".length());
      byte[] secret = this.getSecretForMonth(month);
      System.arraycopy(secret, 31, hashInput, 36, 19);
      String hashOutput = DigestUtils.sha256Hex(hashInput);
      sb.append(hashOutput, hashOutput.length() - 6, hashOutput.length() - 4);
      sb.append(":");
      sb.append(hashOutput.substring(hashOutput.length() - 4));
    }
    sb.append("]");
    return sb.toString();
  }

  private String scrubTcpPort(String portString, byte[] fingerprintBytes,
      String published) throws IOException {
    if (portString.equals("0")) {
      return "0";
    } else if (this.replaceIpAddressesWithHashes) {
      if (this.persistenceProblemWithSecrets) {
        /* There's a persistence problem, so we shouldn't scrub more TCP
         * ports in this execution. */
        return null;
      }
      byte[] hashInput = new byte[2 + 20 + 33];
      int portNumber = Integer.parseInt(portString);
      hashInput[0] = (byte) (portNumber >> 8);
      hashInput[1] = (byte) portNumber;
      System.arraycopy(fingerprintBytes, 0, hashInput, 2, 20);
      String month = published.substring(0, "yyyy-MM".length());
      byte[] secret = this.getSecretForMonth(month);
      System.arraycopy(secret, 50, hashInput, 22, 33);
      byte[] hashOutput = DigestUtils.sha256(hashInput);
      int hashedPort = ((((hashOutput[0] & 0b1111_1111) << 8)
          | (hashOutput[1] & 0b1111_1111)) >> 2) | 0b1100_0000_0000_0000;
      return String.valueOf(hashedPort);
    } else {
      return "1";
    }
  }

  private byte[] getSecretForMonth(String month) throws IOException {
    if (!this.secretsForHashingIpAddresses.containsKey(month)
        || this.secretsForHashingIpAddresses.get(month).length < 83) {
      byte[] secret = new byte[83];
      this.secureRandom.nextBytes(secret);
      if (this.secretsForHashingIpAddresses.containsKey(month)) {
        System.arraycopy(this.secretsForHashingIpAddresses.get(month), 0,
            secret, 0,
            this.secretsForHashingIpAddresses.get(month).length);
      }
      if (month.compareTo(
          this.bridgeSanitizingCutOffTimestamp) < 0) {
        logger.warn("Generated a secret that we won't make "
            + "persistent, because it's outside our bridge descriptor "
            + "sanitizing interval.");
      } else {
        /* Append secret to file on disk immediately before using it, or
         * we might end with inconsistently sanitized bridges. */
        byte[] newBytes = (month + "," + Hex.encodeHexString(secret) + "\n")
            .getBytes();
        try {
          if (Files.exists(this.bridgeIpSecretsFile)) {
            Files.write(this.bridgeIpSecretsFile, newBytes,
                StandardOpenOption.APPEND);
          } else {
            Files.createDirectories(this.bridgeIpSecretsFile.getParent());
            Files.write(this.bridgeIpSecretsFile, newBytes);
          }
        } catch (IOException e) {
          logger.warn("Could not store new secret "
              + "to disk! Not calculating any IP address or TCP port "
              + "hashes in this execution!", e);
          this.persistenceProblemWithSecrets = true;
          throw new IOException(e);
        }
      }
      this.secretsForHashingIpAddresses.put(month, secret);
    }
    return this.secretsForHashingIpAddresses.get(month);
  }

  private String maxNetworkStatusPublishedTime = "1970-01-01 00:00:00";

  /**
@@ -631,7 +351,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
  public void sanitizeAndStoreNetworkStatus(byte[] data,
      String publicationTime, String authorityFingerprint) {

    if (this.persistenceProblemWithSecrets) {
    if (this.sensitivePartsSanitizer.hasPersistenceProblemWithSecrets()) {
      /* There's a persistence problem, so we shouldn't scrub more IP
       * addresses in this execution. */
      return;
@@ -641,19 +361,6 @@ public class SanitizedBridgesWriter extends CollecTorMain {
      maxNetworkStatusPublishedTime = publicationTime;
    }

    if (this.bridgeSanitizingCutOffTimestamp
        .compareTo(publicationTime) > 0) {
      String text = "Sanitizing and storing network status with "
          + "publication time outside our descriptor sanitizing "
          + "interval.";
      if (this.haveWarnedAboutInterval) {
        logger.debug(text);
      } else {
        logger.warn(text);
        this.haveWarnedAboutInterval = true;
      }
    }

    /* Parse the given network status line by line. */
    DescriptorBuilder header = new DescriptorBuilder();
    boolean includesFingerprintLine = false;
@@ -738,14 +445,13 @@ public class SanitizedBridgesWriter extends CollecTorMain {
          String hashedDescriptorIdentifier = Base64.encodeBase64String(
              DigestUtils.sha1(Base64.decodeBase64(descriptorIdentifier
              + "=="))).substring(0, 27);
          String scrubbedAddress = scrubIpv4Address(address,
              fingerprintBytes,
              descPublicationTime);
          String scrubbedAddress = this.sensitivePartsSanitizer
              .scrubIpv4Address(address, fingerprintBytes, descPublicationTime);
          String nickname = parts[1];
          String scrubbedOrPort = this.scrubTcpPort(orPort,
              fingerprintBytes, descPublicationTime);
          String scrubbedDirPort = this.scrubTcpPort(dirPort,
              fingerprintBytes, descPublicationTime);
          String scrubbedOrPort = this.sensitivePartsSanitizer.scrubTcpPort(
              orPort, fingerprintBytes, descPublicationTime);
          String scrubbedDirPort = this.sensitivePartsSanitizer.scrubTcpPort(
              dirPort, fingerprintBytes, descPublicationTime);
          scrubbed.append("r ").append(nickname).space()
              .append(hashedBridgeIdentityBase64).space()
              .append(hashedDescriptorIdentifier).space()
@@ -757,8 +463,8 @@ public class SanitizedBridgesWriter extends CollecTorMain {
        /* Sanitize any addresses in a lines using the fingerprint and
         * descriptor publication time from the previous r line. */
        } else if (line.startsWith("a ")) {
          String scrubbedOrAddress = scrubOrAddress(
              line.substring("a ".length()), fingerprintBytes,
          String scrubbedOrAddress = this.sensitivePartsSanitizer
              .scrubOrAddress(line.substring("a ".length()), fingerprintBytes,
              descPublicationTime);
          if (scrubbedOrAddress != null) {
            scrubbed.append("a ").append(scrubbedOrAddress).newLine();
@@ -856,7 +562,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
   */
  public void sanitizeAndStoreServerDescriptor(byte[] data) {

    if (this.persistenceProblemWithSecrets) {
    if (this.sensitivePartsSanitizer.hasPersistenceProblemWithSecrets()) {
      /* There's a persistence problem, so we shouldn't scrub more IP
       * addresses in this execution. */
      return;
@@ -922,18 +628,6 @@ public class SanitizedBridgesWriter extends CollecTorMain {
          if (published.compareTo(maxServerDescriptorPublishedTime) > 0) {
            maxServerDescriptorPublishedTime = published;
          }
          if (this.bridgeSanitizingCutOffTimestamp
              .compareTo(published) > 0) {
            String text = "Sanitizing and storing "
                + "server descriptor with publication time outside our "
                + "descriptor sanitizing interval.";
            if (this.haveWarnedAboutInterval) {
              logger.debug(text);
            } else {
              logger.warn(text);
              this.haveWarnedAboutInterval = true;
            }
          }
          scrubbed.append(line).newLine();

        /* Parse the fingerprint to determine the hashed bridge
@@ -1127,8 +821,8 @@ public class SanitizedBridgesWriter extends CollecTorMain {
      return;
    }
    try {
      String scrubbedAddressString = scrubIpv4Address(address, fingerprintBytes,
          published);
      String scrubbedAddressString = this.sensitivePartsSanitizer
          .scrubIpv4Address(address, fingerprintBytes, published);
      if (null == scrubbedAddressString) {
        logger.warn("Invalid IP address in \"router\" line in bridge server "
            + "descriptor. Skipping descriptor.");
@@ -1137,8 +831,8 @@ public class SanitizedBridgesWriter extends CollecTorMain {
      scrubbedAddress.append(scrubbedAddressString);
      for (Map.Entry<StringBuilder, String> e
          : scrubbedIpAddressesAndTcpPorts.entrySet()) {
        String scrubbedOrAddress = scrubOrAddress(e.getValue(),
            fingerprintBytes, published);
        String scrubbedOrAddress = this.sensitivePartsSanitizer
            .scrubOrAddress(e.getValue(), fingerprintBytes, published);
        if (null == scrubbedOrAddress) {
          logger.warn("Invalid IP address or TCP port in \"or-address\" line "
              + "in bridge server descriptor. Skipping descriptor.");
@@ -1147,8 +841,8 @@ public class SanitizedBridgesWriter extends CollecTorMain {
        e.getKey().append(scrubbedOrAddress);
      }
      for (Map.Entry<StringBuilder, String> e : scrubbedTcpPorts.entrySet()) {
        String scrubbedTcpPort = scrubTcpPort(e.getValue(), fingerprintBytes,
            published);
        String scrubbedTcpPort = this.sensitivePartsSanitizer
            .scrubTcpPort(e.getValue(), fingerprintBytes, published);
        if (null == scrubbedTcpPort) {
          logger.warn("Invalid TCP port in \"router\" line in bridge server "
              + "descriptor. Skipping descriptor.");
@@ -1159,7 +853,6 @@ public class SanitizedBridgesWriter extends CollecTorMain {
    } catch (IOException exception) {
      /* There's a persistence problem, so we shouldn't scrub more IP addresses
       * or TCP ports in this execution. */
      this.persistenceProblemWithSecrets = true;
      return;
    }

@@ -1500,43 +1193,6 @@ public class SanitizedBridgesWriter extends CollecTorMain {
    }
  }

  /**
   * Rewrite all network statuses that might contain references to server
   * descriptors we added or updated in this execution. This applies to
   * all statuses that have been published up to 24 hours after any added
   * or updated server descriptor.
   */
  public void finishWriting() {

    /* Delete secrets that we don't need anymore. */
    if (!this.secretsForHashingIpAddresses.isEmpty()
        && this.secretsForHashingIpAddresses.firstKey().compareTo(
        this.bridgeSanitizingCutOffTimestamp) < 0) {
      try {
        int kept = 0;
        int deleted = 0;
        List<String> lines = new ArrayList<>();
        for (Map.Entry<String, byte[]> e :
            this.secretsForHashingIpAddresses.entrySet()) {
          if (e.getKey().compareTo(
              this.bridgeSanitizingCutOffTimestamp) < 0) {
            deleted++;
          } else {
            lines.add(e.getKey() + "," + Hex.encodeHexString(e.getValue()));
            kept++;
          }
        }
        Files.write(this.bridgeIpSecretsFile, lines);
        logger.info("Deleted {} secrets that we don't "
            + "need anymore and kept {}.", deleted, kept);
      } catch (IOException e) {
        logger.warn("Could not store reduced set of "
            + "secrets to disk! This is a bad sign, better check what's "
            + "going on!", e);
      }
    }
  }

  private void checkStaleDescriptors() {
    SimpleDateFormat dateTimeFormat = new SimpleDateFormat(
        "yyyy-MM-dd HH:mm:ss");
+378 −0

File added.

Preview size limit exceeded, changes collapsed.

+2 −0
Original line number Diff line number Diff line
@@ -821,6 +821,8 @@ public class SanitizedBridgesWriterTest {

  @Test
  public void testBridgeIpSecretsIsDirectory() throws Exception {
    this.configuration.setProperty(Key.ReplaceIpAddressesWithHashes.name(),
        "true");
    Files.createDirectory(Paths.get(statsDirectory, "bridge-ip-secrets"));
    this.runTest();
    assertTrue("Sanitized server descriptors without secrets.",