Commit 0f5536ed authored by Karsten Loesing's avatar Karsten Loesing
Browse files

Archive OnionPerf analysis .json files.

Implements #34072.
parent a87ce0d0
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
# Changes in version 1.1?.? - 2020-0?-??

 * Medium changes
   - Update to metrics-lib 2.12.1.
   - Download OnionPerf analysis .json files in addition to .tpf
     files.

 * Minor changes
   - Simplify logging configuration.
   - Set default locale `US` and default time zone `UTC` at the
+1 −1
Original line number Diff line number Diff line
@@ -12,7 +12,7 @@
  <property name="release.version" value="1.14.1-dev" />
  <property name="project-main-class" value="org.torproject.metrics.collector.Main" />
  <property name="name" value="collector"/>
  <property name="metricslibversion" value="2.10.0" />
  <property name="metricslibversion" value="2.12.1" />
  <property name="jarincludes" value="collector.properties logback.xml" />

  <patternset id="runtime" >
+181 −37
Original line number Diff line number Diff line
@@ -13,6 +13,7 @@ import org.torproject.metrics.collector.conf.Key;
import org.torproject.metrics.collector.cron.CollecTorMain;
import org.torproject.metrics.collector.downloader.Downloader;

import org.apache.commons.compress.utils.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@@ -32,14 +33,16 @@ import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.SortedSet;
import java.util.Stack;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/** Download download .tpf files from OnionPerf hosts. */
/** Download OnionPerf files from OnionPerf hosts. */
public class OnionPerfDownloader extends CollecTorMain {

  private static final Logger logger = LoggerFactory.getLogger(
@@ -47,6 +50,8 @@ public class OnionPerfDownloader extends CollecTorMain {

  private static final String TORPERF = "torperf";

  private static final String ONIONPERF = "onionperf";

  /** Instantiate the OnionPerf module using the given configuration. */
  public OnionPerfDownloader(Configuration config) {
    super(config);
@@ -54,21 +59,25 @@ public class OnionPerfDownloader extends CollecTorMain {
  }

  /** File containing the download history, which is necessary, because
   * OnionPerf does not delete older .tpf files, but which enables us to do
   * so. */
   * OnionPerf does not delete older files, but which enables us to do so. */
  private File onionPerfDownloadedFile;

  /** Full URLs of .tpf files downloaded in the current or in past
   * executions. */
  private SortedSet<String> downloadedTpfFiles = new TreeSet<>();
  /** Full URLs of files downloaded in the current or in past executions. */
  private SortedSet<String> downloadedFiles = new TreeSet<>();

  /** Base URLs of configured OnionPerf hosts. */
  private URL[] onionPerfHosts = null;

  /** Directory for storing archived .tpf files. */
  /** Relative URLs of available .tpf files by base URL. */
  private Map<URL, List<String>> tpfFileUrls = new HashMap<>();

  /** Relative URLs of available OnionPerf analysis files by base URL. */
  private Map<URL, List<String>> onionPerfAnalysisFileUrls = new HashMap<>();

  /** Directory for storing archived files. */
  private File archiveDirectory = null;

  /** Directory for storing recent .tpf files. */
  /** Directory for storing recent files. */
  private File recentDirectory = null;

  @Override
@@ -87,19 +96,17 @@ public class OnionPerfDownloader extends CollecTorMain {
        new File(config.getPath(Key.StatsPath).toFile(),
        "onionperf-downloaded");
    this.onionPerfHosts = config.getUrlArray(Key.OnionPerfHosts);
    this.readDownloadedOnionPerfTpfFiles();
    this.archiveDirectory = new File(config.getPath(Key.OutputPath).toFile(),
        TORPERF);
    this.recentDirectory = new File(config.getPath(Key.RecentPath).toFile(),
        TORPERF);
    this.readDownloadedOnionPerfFiles();
    this.archiveDirectory = config.getPath(Key.OutputPath).toFile();
    this.recentDirectory = config.getPath(Key.RecentPath).toFile();
    for (URL baseUrl : this.onionPerfHosts) {
      this.downloadFromOnionPerfHost(baseUrl);
    }
    this.writeDownloadedOnionPerfTpfFiles();
    this.writeDownloadedOnionPerfFiles();
    this.cleanUpRsyncDirectory();
  }

  private void readDownloadedOnionPerfTpfFiles() {
  private void readDownloadedOnionPerfFiles() {
    if (!this.onionPerfDownloadedFile.exists()) {
      return;
    }
@@ -107,47 +114,69 @@ public class OnionPerfDownloader extends CollecTorMain {
          this.onionPerfDownloadedFile))) {
      String line;
      while ((line = br.readLine()) != null) {
        this.downloadedTpfFiles.add(line);
        this.downloadedFiles.add(line);
      }
    } catch (IOException e) {
      logger.info("Unable to read download history file '{}'. Ignoring "
          + "download history and downloading all available .tpf files.",
          + "download history and downloading all available files.",
          this.onionPerfDownloadedFile.getAbsolutePath());
      this.downloadedTpfFiles.clear();
      this.downloadedFiles.clear();
    }
  }

  private void downloadFromOnionPerfHost(URL baseUrl) {
    logger.info("Downloading from OnionPerf host {}", baseUrl);
    List<String> tpfFileNames =
    this.downloadOnionPerfDirectoryListing(baseUrl);
    String source = baseUrl.getHost().split("\\.")[0];
    for (String tpfFileName : tpfFileNames) {
    if (this.tpfFileUrls.containsKey(baseUrl)) {
      for (String tpfFileName : this.tpfFileUrls.get(baseUrl)) {
        this.downloadAndParseOnionPerfTpfFile(baseUrl, source, tpfFileName);
      }
    }
    if (this.onionPerfAnalysisFileUrls.containsKey(baseUrl)) {
      for (String onionPerfAnalysisFileName
          : this.onionPerfAnalysisFileUrls.get(baseUrl)) {
        this.downloadAndParseOnionPerfAnalysisFile(baseUrl, source,
            onionPerfAnalysisFileName);
      }
    }
  }

  /** Pattern for links contained in directory listings. */
  /** Patterns for links contained in directory listings. */
  private static final Pattern TPF_FILE_URL_PATTERN =
      Pattern.compile(".*<a href=\"([^\"]+\\.tpf)\">.*");

  private List<String> downloadOnionPerfDirectoryListing(URL baseUrl) {
    List<String> tpfFileUrls = new ArrayList<>();
  private static final Pattern ONIONPERF_ANALYSIS_FILE_URL_PATTERN =
      Pattern.compile(
      ".*<a href=\"([0-9-]{10}\\.onionperf\\.analysis\\.json\\.xz)\">.*");

  private void downloadOnionPerfDirectoryListing(URL baseUrl) {
    try (BufferedReader br = new BufferedReader(new InputStreamReader(
        baseUrl.openStream()))) {
      String line;
      while ((line = br.readLine()) != null) {
        Matcher matcher = TPF_FILE_URL_PATTERN.matcher(line);
        if (matcher.matches() && !matcher.group(1).startsWith("/")) {
          tpfFileUrls.add(matcher.group(1));
        Matcher tpfFileMatcher = TPF_FILE_URL_PATTERN.matcher(line);
        if (tpfFileMatcher.matches()
            && !tpfFileMatcher.group(1).startsWith("/")) {
          this.tpfFileUrls.putIfAbsent(baseUrl, new ArrayList<>());
          this.tpfFileUrls.get(baseUrl).add(tpfFileMatcher.group(1));
        }
        Matcher onionPerfAnalysisFileMatcher
            = ONIONPERF_ANALYSIS_FILE_URL_PATTERN.matcher(line);
        if (onionPerfAnalysisFileMatcher.matches()
            && !onionPerfAnalysisFileMatcher.group(1).startsWith("/")) {
          this.onionPerfAnalysisFileUrls.putIfAbsent(baseUrl,
              new ArrayList<>());
          this.onionPerfAnalysisFileUrls.get(baseUrl)
              .add(onionPerfAnalysisFileMatcher.group(1));
        }
      }
    } catch (IOException e) {
      logger.warn("Unable to download directory listing from '{}'.  Skipping "
          + "this OnionPerf host.", baseUrl);
      tpfFileUrls.clear();
      this.tpfFileUrls.remove(baseUrl);
      this.onionPerfAnalysisFileUrls.remove(baseUrl);
    }
    return tpfFileUrls;
  }

  private static final DateFormat DATE_FORMAT;
@@ -169,7 +198,7 @@ public class OnionPerfDownloader extends CollecTorMain {
    }

    /* Skip if we successfully downloaded this file before. */
    if (this.downloadedTpfFiles.contains(tpfFileUrl.toString())) {
    if (this.downloadedFiles.contains(tpfFileUrl.toString())) {
      return;
    }

@@ -197,7 +226,8 @@ public class OnionPerfDownloader extends CollecTorMain {
    }

    /* Download file contents to temporary file. */
    File tempFile = new File(this.recentDirectory, "." + tpfFileName);
    File tempFile = new File(this.recentDirectory,
        TORPERF + "/." + tpfFileName);
    byte[] downloadedBytes;
    try {
      downloadedBytes = Downloader.downloadFromHttpServer(
@@ -263,7 +293,120 @@ public class OnionPerfDownloader extends CollecTorMain {

    /* Copy/move files in place. */
    File archiveFile = new File(this.archiveDirectory,
         date.replaceAll("-", "/") + "/" + tpfFileName);
         TORPERF + "/" + date.replaceAll("-", "/") + "/" + tpfFileName);
    archiveFile.getParentFile().mkdirs();
    try {
      Files.copy(tempFile.toPath(), archiveFile.toPath(),
          StandardCopyOption.REPLACE_EXISTING);
    } catch (IOException e) {
      logger.warn("Unable to copy OnionPerf file {} to {}.  Skipping.",
          tempFile, archiveFile, e);
      tempFile.delete();
      return;
    }
    File recentFile = new File(this.recentDirectory,
        TORPERF + "/" + tpfFileName);
    tempFile.renameTo(recentFile);

    /* Add to download history to avoid downloading it again. */
    this.downloadedFiles.add(baseUrl + tpfFileName);
  }


  private void downloadAndParseOnionPerfAnalysisFile(URL baseUrl, String source,
      String onionPerfAnalysisFileName) {
    URL onionPerfAnalysisFileUrl;
    try {
      onionPerfAnalysisFileUrl = new URL(baseUrl, onionPerfAnalysisFileName);
    } catch (MalformedURLException e1) {
      logger.warn("Unable to put together base URL '{}' and file path '{}' to "
          + "a URL. Skipping.", baseUrl, onionPerfAnalysisFileName);
      return;
    }

    /* Skip if we successfully downloaded this file before. */
    if (this.downloadedFiles.contains(onionPerfAnalysisFileUrl.toString())) {
      return;
    }

    /* Parse date from file name: yyyy-MM-dd.onionperf.analysis.json.xz */
    String date;
    try {
      date = onionPerfAnalysisFileName.substring(0, 10);
      DATE_FORMAT.parse(date);
    } catch (NumberFormatException | ParseException e) {
      logger.warn("Invalid file name '{}{}'. Skipping.", baseUrl,
          onionPerfAnalysisFileName, e);
      return;
    }

    /* Download file contents to temporary file. */
    File tempFile = new File(this.recentDirectory,
        ONIONPERF + "/." + onionPerfAnalysisFileName);
    byte[] downloadedBytes;
    try {
      downloadedBytes = Downloader.downloadFromHttpServer(
          new URL(baseUrl + onionPerfAnalysisFileName));
    } catch (IOException e) {
      logger.warn("Unable to download '{}{}'. Skipping.", baseUrl,
          onionPerfAnalysisFileName, e);
      return;
    }
    if (null == downloadedBytes) {
      logger.warn("Unable to download '{}{}'. Skipping.", baseUrl,
          onionPerfAnalysisFileName);
      return;
    }
    tempFile.getParentFile().mkdirs();
    try {
      Files.write(tempFile.toPath(), downloadedBytes);
    } catch (IOException e) {
      logger.warn("Unable to write previously downloaded '{}{}' to temporary "
          + "file '{}'. Skipping.", baseUrl, onionPerfAnalysisFileName,
          tempFile, e);
      return;
    }

    /* Validate contained descriptors. */
    DescriptorParser descriptorParser =
        DescriptorSourceFactory.createDescriptorParser();
    byte[] rawDescriptorBytes;
    try {
      rawDescriptorBytes = IOUtils.toByteArray(
          Files.newInputStream(tempFile.toPath()));
    } catch (IOException e) {
      logger.warn("OnionPerf file '{}{}' could not be read. Skipping.", baseUrl,
          onionPerfAnalysisFileName, e);
      tempFile.delete();
      return;
    }
    Iterable<Descriptor> descriptors = descriptorParser.parseDescriptors(
        rawDescriptorBytes, null, onionPerfAnalysisFileName);
    String message = null;
    for (Descriptor descriptor : descriptors) {
      if (!(descriptor instanceof TorperfResult)) {
        message = "File contains descriptors other than an OnionPerf analysis "
            + "document: " + descriptor.getClass();
        break;
      }
      TorperfResult torperf = (TorperfResult) descriptor;
      if (!source.equals(torperf.getSource())) {
        message = "File contains transfer from another source: "
            + torperf.getSource();
        break;
      }
    }
    if (null != message) {
      logger.warn("OnionPerf file '{}{}' was found to be invalid: {}. "
          + "Skipping.", baseUrl, onionPerfAnalysisFileName, message);
      tempFile.delete();
      return;
    }

    /* Copy/move files in place. */
    File archiveFile = new File(this.archiveDirectory,
        ONIONPERF + "/" + date.replaceAll("-", "/") + "/" + date + "." + source
        + ".onionperf.analysis.json.xz");
    archiveFile.getParentFile().mkdirs();
    try {
      Files.copy(tempFile.toPath(), archiveFile.toPath(),
@@ -274,18 +417,19 @@ public class OnionPerfDownloader extends CollecTorMain {
      tempFile.delete();
      return;
    }
    File recentFile = new File(this.recentDirectory, tpfFileName);
    File recentFile = new File(this.recentDirectory,
        ONIONPERF + "/" + date + "." + source + ".onionperf.analysis.json.xz");
    tempFile.renameTo(recentFile);

    /* Add to download history to avoid downloading it again. */
    this.downloadedTpfFiles.add(baseUrl + tpfFileName);
    this.downloadedFiles.add(baseUrl + onionPerfAnalysisFileName);
  }

  private void writeDownloadedOnionPerfTpfFiles() {
  private void writeDownloadedOnionPerfFiles() {
    this.onionPerfDownloadedFile.getParentFile().mkdirs();
    try (BufferedWriter bw = new BufferedWriter(new FileWriter(
          this.onionPerfDownloadedFile))) {
      for (String line : this.downloadedTpfFiles) {
      for (String line : this.downloadedFiles) {
        bw.write(line);
        bw.newLine();
      }
+1 −1
Original line number Diff line number Diff line
@@ -175,7 +175,7 @@ ExitlistUrl = https://check.torproject.org/exit-addresses
######## OnionPerf downloader ########
#
## Define descriptor sources
#  possible values: Remote,Sync
#  possible values: Remote,Sync (.tpf files only!)
OnionPerfSources = Remote
#  Retrieve files from the following CollecTor instances.
#  List of URLs separated by comma.
+7 −0
Original line number Diff line number Diff line
@@ -40,6 +40,8 @@ TARBALLS=(
  exit-list-$YEARTWO-$MONTHTWO
  torperf-$YEARONE-$MONTHONE
  torperf-$YEARTWO-$MONTHTWO
  onionperf-$YEARONE-$MONTHONE
  onionperf-$YEARTWO-$MONTHTWO
  certs
  microdescs-$YEARONE-$MONTHONE
  microdescs-$YEARTWO-$MONTHTWO
@@ -73,6 +75,8 @@ DIRECTORIES=(
  $OUTDIR/exit-lists/$YEARTWO/$MONTHTWO/
  $OUTDIR/torperf/$YEARONE/$MONTHONE/
  $OUTDIR/torperf/$YEARTWO/$MONTHTWO/
  $OUTDIR/onionperf/$YEARONE/$MONTHONE/
  $OUTDIR/onionperf/$YEARTWO/$MONTHTWO/
  $OUTDIR/relay-descriptors/certs/
  $OUTDIR/relay-descriptors/microdesc/$YEARONE/$MONTHONE
  $OUTDIR/relay-descriptors/microdesc/$YEARTWO/$MONTHTWO
@@ -178,6 +182,9 @@ ln -f -s -t $ARCHIVEDIR/relay-descriptors/bandwidths/ $TARBALLTARGETDIR/bandwidt
mkdir -p $ARCHIVEDIR/torperf/
ln -f -s -t $ARCHIVEDIR/torperf/ $TARBALLTARGETDIR/torperf-20??-??.tar.xz

mkdir -p $ARCHIVEDIR/onionperf/
ln -f -s -t $ARCHIVEDIR/onionperf/ $TARBALLTARGETDIR/onionperf-20??-??.tar.xz

mkdir -p $ARCHIVEDIR/webstats/
ln -f -s -t $ARCHIVEDIR/webstats/ $TARBALLTARGETDIR/webstats-20??-??.tar