Commit 2e8cdf7f authored by Karsten Loesing's avatar Karsten Loesing
Browse files

Move sanitizing code to one class per type.

Part of #20542.
parent 47a4c7a9
Loading
Loading
Loading
Loading
+118 −0
Original line number Diff line number Diff line
/* Copyright 2010--2020 The Tor Project
 * See LICENSE for licensing information */

package org.torproject.metrics.collector.bridgedescs;

import org.apache.commons.codec.binary.Base64;
import org.apache.commons.codec.digest.DigestUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.nio.charset.StandardCharsets;

public abstract class SanitizedBridgeDescriptor {

  private static final Logger logger = LoggerFactory.getLogger(
      SanitizedBridgeDescriptor.class);

  protected byte[] originalBytes;

  protected SensitivePartsSanitizer sensitivePartsSanitizer;

  protected byte[] sanitizedBytes;

  protected String publishedString;

  SanitizedBridgeDescriptor(byte[] originalBytes,
      SensitivePartsSanitizer sensitivePartsSanitizer) {
    this.originalBytes = originalBytes;
    this.sensitivePartsSanitizer = sensitivePartsSanitizer;
  }

  protected String parseMasterKeyEd25519FromIdentityEd25519(
      String identityEd25519Base64) {
    byte[] identityEd25519 = Base64.decodeBase64(identityEd25519Base64);
    if (identityEd25519.length < 40) {
      logger.warn("Invalid length of identity-ed25519 (in bytes): {}",
          identityEd25519.length);
    } else if (identityEd25519[0] != 0x01) {
      logger.warn("Unknown version in identity-ed25519: {}",
          identityEd25519[0]);
    } else if (identityEd25519[1] != 0x04) {
      logger.warn("Unknown cert type in identity-ed25519: {}",
          identityEd25519[1]);
    } else if (identityEd25519[6] != 0x01) {
      logger.warn("Unknown certified key type in identity-ed25519: {}",
          identityEd25519[1]);
    } else if (identityEd25519[39] == 0x00) {
      logger.warn("No extensions in identity-ed25519 (which "
              + "would contain the encoded master-key-ed25519): {}",
          identityEd25519[39]);
    } else {
      int extensionStart = 40;
      for (int i = 0; i < (int) identityEd25519[39]; i++) {
        if (identityEd25519.length < extensionStart + 4) {
          logger.warn("Invalid extension with id {} in identity-ed25519.", i);
          break;
        }
        int extensionLength = identityEd25519[extensionStart];
        extensionLength <<= 8;
        extensionLength += identityEd25519[extensionStart + 1];
        int extensionType = identityEd25519[extensionStart + 2];
        if (extensionLength == 32 && extensionType == 4) {
          if (identityEd25519.length < extensionStart + 4 + 32) {
            logger.warn("Invalid extension with id {} in identity-ed25519.", i);
            break;
          }
          byte[] masterKeyEd25519 = new byte[32];
          System.arraycopy(identityEd25519, extensionStart + 4,
              masterKeyEd25519, 0, masterKeyEd25519.length);
          String masterKeyEd25519Base64 = Base64.encodeBase64String(
              masterKeyEd25519);
          return masterKeyEd25519Base64.replaceAll("=", "");
        }
        extensionStart += 4 + extensionLength;
      }
    }
    logger.warn("Unable to locate master-key-ed25519 in identity-ed25519.");
    return null;
  }

  protected String computeDescriptorDigest(byte[] descriptorBytes,
      String startToken, String sigToken) {
    String descriptorDigest = null;
    String ascii = new String(descriptorBytes, StandardCharsets.US_ASCII);
    int start = ascii.indexOf(startToken);
    int sig = ascii.indexOf(sigToken) + sigToken.length();
    if (start >= 0 && sig >= 0 && sig > start) {
      byte[] forDigest = new byte[sig - start];
      System.arraycopy(descriptorBytes, start, forDigest, 0, sig - start);
      descriptorDigest = DigestUtils.sha1Hex(DigestUtils.sha1(forDigest));
    }
    if (descriptorDigest == null) {
      logger.warn("Could not calculate extra-info descriptor digest.");
    }
    return descriptorDigest;
  }

  protected String computeSha256Base64Digest(byte[] descriptorBytes,
      String startToken, String sigToken) {
    String descriptorDigestSha256Base64 = null;
    String ascii = new String(descriptorBytes, StandardCharsets.US_ASCII);
    int start = ascii.indexOf(startToken);
    int sig = ascii.indexOf(sigToken) + sigToken.length();
    if (start >= 0 && sig >= 0 && sig > start) {
      byte[] forDigest = new byte[sig - start];
      System.arraycopy(descriptorBytes, start, forDigest, 0, sig - start);
      descriptorDigestSha256Base64 = Base64.encodeBase64String(
          DigestUtils.sha256(DigestUtils.sha256(forDigest)))
          .replaceAll("=", "");
    }
    if (descriptorDigestSha256Base64 == null) {
      logger.warn("Could not calculate extra-info "
          + "descriptor SHA256 digest.");
    }
    return descriptorDigestSha256Base64;
  }
}
+192 −0
Original line number Diff line number Diff line
/* Copyright 2010--2020 The Tor Project
 * See LICENSE for licensing information */

package org.torproject.metrics.collector.bridgedescs;

import org.torproject.metrics.collector.conf.Annotation;

import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.codec.digest.DigestUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.StringReader;
import java.nio.charset.StandardCharsets;

public class SanitizedBridgeExtraInfoDescriptor
    extends SanitizedBridgeDescriptor {

  private static final Logger logger = LoggerFactory.getLogger(
      SanitizedBridgeExtraInfoDescriptor.class);

  private String descriptorDigest;

  SanitizedBridgeExtraInfoDescriptor(byte[] originalBytes,
      SensitivePartsSanitizer sensitivePartsSanitizer) {
    super(originalBytes, sensitivePartsSanitizer);
  }

  boolean sanitizeDescriptor() {

    /* Parse descriptor to generate a sanitized version. */
    String masterKeyEd25519FromIdentityEd25519 = null;
    DescriptorBuilder scrubbed = new DescriptorBuilder();
    try (BufferedReader br = new BufferedReader(new StringReader(new String(
        this.originalBytes, StandardCharsets.US_ASCII)))) {
      scrubbed.append(Annotation.BridgeExtraInfo.toString());
      String line;
      String hashedBridgeIdentity;
      String masterKeyEd25519 = null;
      while ((line = br.readLine()) != null) {

        /* Parse bridge identity from extra-info line and replace it with
         * its hash in the sanitized descriptor. */
        String[] parts = line.split(" ");
        if (line.startsWith("extra-info ")) {
          if (parts.length < 3) {
            logger.debug("Illegal line in extra-info descriptor: '{}'.  "
                + "Skipping descriptor.", line);
            return false;
          }
          hashedBridgeIdentity = DigestUtils.sha1Hex(Hex.decodeHex(
              parts[2].toCharArray())).toLowerCase();
          scrubbed.append("extra-info ").append(parts[1])
              .space().append(hashedBridgeIdentity.toUpperCase()).newLine();

          /* Parse the publication time to determine the file name. */
        } else if (line.startsWith("published ")) {
          scrubbed.append(line).newLine();
          this.publishedString = line.substring("published ".length());

          /* Remove everything from transport lines except the transport
           * name. */
        } else if (line.startsWith("transport ")) {
          if (parts.length < 3) {
            logger.debug("Illegal line in extra-info descriptor: '{}'.  "
                + "Skipping descriptor.", line);
            return false;
          }
          scrubbed.append("transport ").append(parts[1]).newLine();

          /* Skip transport-info lines entirely. */
        } else if (line.startsWith("transport-info ")) {

          /* Extract master-key-ed25519 from identity-ed25519. */
        } else if (line.equals("identity-ed25519")) {
          StringBuilder sb = new StringBuilder();
          while ((line = br.readLine()) != null
              && !line.equals("-----END ED25519 CERT-----")) {
            if (line.equals("-----BEGIN ED25519 CERT-----")) {
              continue;
            }
            sb.append(line);
          }
          masterKeyEd25519FromIdentityEd25519 =
              this.parseMasterKeyEd25519FromIdentityEd25519(
                  sb.toString());
          String sha256MasterKeyEd25519 = Base64.encodeBase64String(
              DigestUtils.sha256(Base64.decodeBase64(
                  masterKeyEd25519FromIdentityEd25519 + "=")))
              .replaceAll("=", "");
          scrubbed.append("master-key-ed25519 ").append(sha256MasterKeyEd25519)
              .newLine();
          if (masterKeyEd25519 != null && !masterKeyEd25519.equals(
              masterKeyEd25519FromIdentityEd25519)) {
            logger.warn("Mismatch between identity-ed25519 and "
                + "master-key-ed25519.  Skipping.");
            return false;
          }

          /* Verify that identity-ed25519 and master-key-ed25519 match. */
        } else if (line.startsWith("master-key-ed25519 ")) {
          masterKeyEd25519 = line.substring(line.indexOf(" ") + 1);
          if (masterKeyEd25519FromIdentityEd25519 != null
              && !masterKeyEd25519FromIdentityEd25519.equals(
              masterKeyEd25519)) {
            logger.warn("Mismatch between identity-ed25519 and "
                + "master-key-ed25519.  Skipping.");
            return false;
          }

          /* Write the following lines unmodified to the sanitized
           * descriptor. */
        } else if (line.startsWith("write-history ")
            || line.startsWith("read-history ")
            || line.startsWith("ipv6-write-history ")
            || line.startsWith("ipv6-read-history ")
            || line.startsWith("geoip-start-time ")
            || line.startsWith("geoip-client-origins ")
            || line.startsWith("geoip-db-digest ")
            || line.startsWith("geoip6-db-digest ")
            || line.startsWith("conn-bi-direct ")
            || line.startsWith("ipv6-conn-bi-direct ")
            || line.startsWith("bridge-")
            || line.startsWith("dirreq-")
            || line.startsWith("cell-")
            || line.startsWith("entry-")
            || line.startsWith("exit-")
            || line.startsWith("hidserv-")
            || line.startsWith("padding-counts ")) {
          scrubbed.append(line).newLine();

          /* When we reach the signature, we're done. Write the sanitized
           * descriptor to disk below. */
        } else if (line.startsWith("router-signature")) {
          break;

          /* Skip the ed25519 signature; we'll include a SHA256 digest of
           * the SHA256 descriptor digest in router-digest-sha256. */
        } else if (line.startsWith("router-sig-ed25519 ")) {
          continue;

          /* If we encounter an unrecognized line, stop parsing and print
           * out a warning. We might have overlooked sensitive information
           * that we need to remove or replace for the sanitized descriptor
           * version. */
        } else {
          logger.warn("Unrecognized line '{}'. Skipping.", line);
          return false;
        }
      }
    } catch (DecoderException | IOException e) {
      logger.warn("Could not parse extra-info descriptor.", e);
      return false;
    }

    /* Determine digest(s) of sanitized extra-info descriptor. */
    this.descriptorDigest = this.computeDescriptorDigest(this.originalBytes,
        "extra-info ", "\nrouter-signature\n");
    String descriptorDigestSha256Base64 = null;
    if (masterKeyEd25519FromIdentityEd25519 != null) {
      descriptorDigestSha256Base64 = this.computeSha256Base64Digest(
          this.originalBytes, "extra-info ", "\n-----END SIGNATURE-----\n");
    }
    if (null != descriptorDigestSha256Base64) {
      scrubbed.append("router-digest-sha256 ")
          .append(descriptorDigestSha256Base64).newLine();
    }
    if (null != this.descriptorDigest) {
      scrubbed.append("router-digest ")
          .append(this.descriptorDigest.toUpperCase()).newLine();
    }
    this.sanitizedBytes = scrubbed.toBytes();
    return true;
  }

  byte[] getSanitizedBytes() {
    return this.sanitizedBytes;
  }

  public String getPublishedString() {
    return this.publishedString;
  }

  public String getDescriptorDigest() {
    return this.descriptorDigest;
  }
}
+230 −0
Original line number Diff line number Diff line
/* Copyright 2010--2020 The Tor Project
 * See LICENSE for licensing information */

package org.torproject.metrics.collector.bridgedescs;

import org.torproject.metrics.collector.conf.Annotation;

import org.apache.commons.codec.binary.Base64;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.codec.digest.DigestUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.StringReader;
import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.SortedMap;
import java.util.TreeMap;

public class SanitizedBridgeNetworkStatus extends SanitizedBridgeDescriptor {

  private static final Logger logger = LoggerFactory.getLogger(
      SanitizedBridgeNetworkStatus.class);

  private final String authorityFingerprint;

  SanitizedBridgeNetworkStatus(byte[] originalBytes,
      SensitivePartsSanitizer sensitivePartsSanitizer, String publicationTime,
      String authorityFingerprint) {
    super(originalBytes, sensitivePartsSanitizer);
    this.publishedString = publicationTime;
    this.authorityFingerprint = authorityFingerprint;
  }

  boolean sanitizeDescriptor() {

    if (this.sensitivePartsSanitizer.hasPersistenceProblemWithSecrets()) {
      /* There's a persistence problem, so we shouldn't scrub more IP
       * addresses in this execution. */
      return false;
    }

    /* Parse the given network status line by line. */
    boolean includesFingerprintLine = false;
    DescriptorBuilder scrubbed = new DescriptorBuilder();
    scrubbed.append(Annotation.Status.toString());
    SortedMap<String, String> scrubbedEntries = new TreeMap<>();
    StringBuilder publishedStringBuilder = new StringBuilder();
    scrubbed.append("published ").append(publishedStringBuilder).newLine();
    DescriptorBuilder header = new DescriptorBuilder();
    scrubbed.append(header);

    try {
      BufferedReader br = new BufferedReader(new StringReader(new String(
          this.originalBytes, StandardCharsets.US_ASCII)));
      String line;
      String mostRecentDescPublished = null;
      byte[] fingerprintBytes = null;
      String descPublicationTime = null;
      String hashedBridgeIdentityHex = null;
      DescriptorBuilder scrubbedEntry = new DescriptorBuilder();
      while ((line = br.readLine()) != null) {

        /* Use publication time from "published" line instead of the
         * file's last-modified time.  Don't copy over the line, because
         * we're going to write a "published" line below. */
        if (line.startsWith("published ")) {
          this.publishedString = line.substring("published ".length());

          /* Additional header lines don't have to be cleaned up. */
        } else if (line.startsWith("flag-thresholds ")) {
          header.append(line).newLine();

          /* The authority fingerprint in the "fingerprint" line can go in
           * unscrubbed. */
        } else if (line.startsWith("fingerprint ")) {
          if (!("fingerprint " + authorityFingerprint).equals(line)) {
            logger.warn("Mismatch between authority fingerprint expected from "
                + "file name ({}) and parsed from \"fingerprint\" "
                + "line (\"{}\").", authorityFingerprint, line);
            return false;
          }
          header.append(line).newLine();
          includesFingerprintLine = true;

          /* r lines contain sensitive information that needs to be removed
           * or replaced. */
        } else if (line.startsWith("r ")) {

          /* Clear buffer from previously scrubbed lines. */
          if (scrubbedEntry.hasContent()) {
            scrubbedEntries.put(hashedBridgeIdentityHex,
                scrubbedEntry.toString());
            scrubbedEntry = new DescriptorBuilder();
          }

          /* Parse the relevant parts of this r line. */
          String[] parts = line.split(" ");
          if (parts.length < 9) {
            logger.warn("Illegal line '{}' in bridge network "
                + "status.  Skipping descriptor.", line);
            return false;
          }
          if (!Base64.isBase64(parts[2])) {
            logger.warn("Illegal base64 character in r line '{}'.  "
                + "Skipping descriptor.", parts[2]);
            return false;
          }
          fingerprintBytes = Base64.decodeBase64(parts[2] + "==");
          descPublicationTime = parts[4] + " " + parts[5];
          String address = parts[6];
          String orPort = parts[7];
          String dirPort = parts[8];

          /* Determine most recent descriptor publication time. */
          if (descPublicationTime.compareTo(this.publishedString) <= 0
              && (mostRecentDescPublished == null
              || descPublicationTime.compareTo(
              mostRecentDescPublished) > 0)) {
            mostRecentDescPublished = descPublicationTime;
          }

          /* Write scrubbed r line to buffer. */
          byte[] hashedBridgeIdentity = DigestUtils.sha1(fingerprintBytes);
          String hashedBridgeIdentityBase64 = Base64.encodeBase64String(
              hashedBridgeIdentity).substring(0, 27);
          hashedBridgeIdentityHex = Hex.encodeHexString(
              hashedBridgeIdentity);
          String descriptorIdentifier = parts[3];
          String hashedDescriptorIdentifier = Base64.encodeBase64String(
              DigestUtils.sha1(Base64.decodeBase64(descriptorIdentifier
                  + "=="))).substring(0, 27);
          String scrubbedAddress = this.sensitivePartsSanitizer
              .scrubIpv4Address(address, fingerprintBytes, descPublicationTime);
          String nickname = parts[1];
          String scrubbedOrPort = this.sensitivePartsSanitizer.scrubTcpPort(
              orPort, fingerprintBytes, descPublicationTime);
          String scrubbedDirPort = this.sensitivePartsSanitizer.scrubTcpPort(
              dirPort, fingerprintBytes, descPublicationTime);
          scrubbedEntry.append("r ").append(nickname).space()
              .append(hashedBridgeIdentityBase64).space()
              .append(hashedDescriptorIdentifier).space()
              .append(descPublicationTime).space()
              .append(scrubbedAddress).space()
              .append(scrubbedOrPort).space()
              .append(scrubbedDirPort).newLine();

          /* Sanitize any addresses in a lines using the fingerprint and
           * descriptor publication time from the previous r line. */
        } else if (line.startsWith("a ")) {
          String scrubbedOrAddress = this.sensitivePartsSanitizer
              .scrubOrAddress(line.substring("a ".length()), fingerprintBytes,
                  descPublicationTime);
          if (scrubbedOrAddress != null) {
            scrubbedEntry.append("a ").append(scrubbedOrAddress).newLine();
          } else {
            logger.warn("Invalid address in line '{}' "
                + "in bridge network status.  Skipping line!", line);
          }

          /* Nothing special about s, w, and p lines; just copy them. */
        } else if (line.startsWith("s ") || line.equals("s")
            || line.startsWith("w ") || line.equals("w")
            || line.startsWith("p ") || line.equals("p")) {
          scrubbedEntry.append(line).newLine();

          /* There should be nothing else but r, a, w, p, and s lines in the
           * network status.  If there is, we should probably learn before
           * writing anything to the sanitized descriptors. */
        } else {
          logger.debug("Unknown line '{}' in bridge "
              + "network status. Not writing to disk!", line);
          return false;
        }
      }
      br.close();
      if (scrubbedEntry.hasContent()) {
        scrubbedEntries.put(hashedBridgeIdentityHex, scrubbedEntry.toString());
      }
      if (!includesFingerprintLine) {
        header.append("fingerprint ").append(authorityFingerprint).newLine();
      }

      /* Check if we can tell from the descriptor publication times
       * whether this status is possibly stale. */
      SimpleDateFormat formatter = new SimpleDateFormat(
          "yyyy-MM-dd HH:mm:ss");
      if (null == mostRecentDescPublished) {
        logger.warn("The bridge network status published at {}"
            + " does not contain a single entry. Please ask the bridge "
            + "authority operator to check!", this.publishedString);
      } else if (formatter.parse(this.publishedString).getTime()
          - formatter.parse(mostRecentDescPublished).getTime()
          > 60L * 60L * 1000L) {
        logger.warn("The most recent descriptor in the bridge "
                + "network status published at {} was published at {} which is "
                + "more than 1 hour before the status. This is a sign for "
                + "the status being stale. Please check!",
            this.publishedString, mostRecentDescPublished);
      }
    } catch (ParseException e) {
      logger.warn("Could not parse timestamp in bridge network status.", e);
      return false;
    } catch (IOException e) {
      logger.warn("Could not parse bridge network status.", e);
      return false;
    }

    /* Write the sanitized network status to disk. */
    publishedStringBuilder.append(this.publishedString);
    for (String scrubbedEntry : scrubbedEntries.values()) {
      scrubbed.append(scrubbedEntry);
    }
    this.sanitizedBytes = scrubbed.toBytes();
    return true;
  }


  byte[] getSanitizedBytes() {
    return this.sanitizedBytes;
  }

  public String getPublishedString() {
    return this.publishedString;
  }
}
+360 −0

File added.

Preview size limit exceeded, changes collapsed.

+34 −737

File changed.

Preview size limit exceeded, changes collapsed.