Commit dcbac68b authored by Karsten Loesing's avatar Karsten Loesing
Browse files

Archive bandwidth files in relaydescs module.

Also update to metrics-lib 2.6.1.

Implements #30218.
parent 19ae66a1
# Changes in version 1.9.0 - 2019-05-??
* Medium changes
- Archive bandwidth files in relaydescs module.
- Update to metrics-lib 2.6.1.
# Changes in version 1.8.0 - 2018-10-11
* Medium changes
......
......@@ -11,7 +11,7 @@
<property name="release.version" value="1.8.0-dev" />
<property name="project-main-class" value="org.torproject.metrics.collector.Main" />
<property name="name" value="collector"/>
<property name="metricslibversion" value="2.4.0" />
<property name="metricslibversion" value="2.6.1" />
<property name="jarincludes" value="collector.properties logback.xml" />
<patternset id="runtime" >
......@@ -21,7 +21,7 @@
<include name="jackson-core-2.8.6.jar"/>
<include name="jackson-databind-2.8.6.jar"/>
<include name="xz-1.6.jar"/>
<include name="metrics-lib-${metricslibversion}.jar"/>
<include name="metrics-lib-${metricslibversion}-thin.jar"/>
<include name="logback-core-1.1.9.jar" />
<include name="logback-classic-1.1.9.jar" />
<include name="slf4j-api-1.7.22.jar" />
......
......@@ -6,6 +6,7 @@ package org.torproject.metrics.collector.conf;
/** This enum contains all currently valid descriptor annotations. */
public enum Annotation {
BandwidthFile("@type bandwidth-file 1.0\n"),
BridgeExtraInfo("@type bridge-extra-info 1.3\n"),
BridgeServer("@type bridge-server-descriptor 1.2\n"),
Cert("@type dir-key-certificate-3 1.0\n"),
......
......@@ -3,6 +3,7 @@
package org.torproject.metrics.collector.relaydescs;
import org.torproject.descriptor.BandwidthFile;
import org.torproject.descriptor.Descriptor;
import org.torproject.descriptor.DescriptorParser;
import org.torproject.descriptor.DescriptorSourceFactory;
......@@ -33,6 +34,10 @@ import java.nio.file.Path;
import java.nio.file.Paths;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.time.Instant;
import java.time.LocalDateTime;
import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
......@@ -44,6 +49,7 @@ import java.util.SortedSet;
import java.util.Stack;
import java.util.TimeZone;
import java.util.TreeMap;
import java.util.TreeSet;
public class ArchiveWriter extends CollecTorMain {
......@@ -51,12 +57,15 @@ public class ArchiveWriter extends CollecTorMain {
ArchiveWriter.class);
private long now = System.currentTimeMillis();
private LocalDateTime nowLocalDateTime
= LocalDateTime.ofInstant(Instant.ofEpochMilli(this.now), ZoneOffset.UTC);
private String outputDirectory;
private String rsyncCatString;
private DescriptorParser descriptorParser;
private int storedConsensusesCounter = 0;
private int storedMicrodescConsensusesCounter = 0;
private int storedVotesCounter = 0;
private int storedBandwidthsCounter = 0;
private int storedCertsCounter = 0;
private int storedServerDescriptorsCounter = 0;
private int storedExtraInfoDescriptorsCounter = 0;
......@@ -74,6 +83,8 @@ public class ArchiveWriter extends CollecTorMain {
private SortedMap<Long, Set<String>> storedExtraInfoDescriptors =
new TreeMap<>();
private SortedMap<Long, Set<String>> storedMicrodescriptors = new TreeMap<>();
private SortedMap<LocalDateTime, Set<String>> storedBandwidths
= new TreeMap<>();
private File storedServerDescriptorsFile;
private File storedExtraInfoDescriptorsFile;
......@@ -103,6 +114,8 @@ public class ArchiveWriter extends CollecTorMain {
RelayServerDescriptor.class);
this.mapPathDescriptors.put("recent/relay-descriptors/extra-infos",
RelayExtraInfoDescriptor.class);
this.mapPathDescriptors.put("recent/relay-descriptors/bandwidths",
BandwidthFile.class);
}
@Override
......@@ -203,6 +216,7 @@ public class ArchiveWriter extends CollecTorMain {
this.storedConsensuses.clear();
this.storedMicrodescConsensuses.clear();
this.storedVotes.clear();
this.storedBandwidths.clear();
this.storedServerDescriptors.clear();
this.storedExtraInfoDescriptors.clear();
this.storedMicrodescriptors.clear();
......@@ -299,7 +313,8 @@ public class ArchiveWriter extends CollecTorMain {
.append(this.storedConsensusesCounter).append(" consensus(es), ")
.append(this.storedMicrodescConsensusesCounter).append(" microdesc ")
.append("consensus(es), ").append(this.storedVotesCounter)
.append(" vote(s), ").append(this.storedCertsCounter)
.append(" vote(s), ").append(this.storedBandwidthsCounter)
.append(" bandwidth file(s), ").append(this.storedCertsCounter)
.append(" certificate(s), ").append(this.storedServerDescriptorsCounter)
.append(" server descriptor(s), ")
.append(this.storedExtraInfoDescriptorsCounter).append(" extra-info ")
......@@ -309,6 +324,7 @@ public class ArchiveWriter extends CollecTorMain {
this.storedConsensusesCounter = 0;
this.storedMicrodescConsensusesCounter = 0;
this.storedVotesCounter = 0;
this.storedBandwidthsCounter = 0;
this.storedCertsCounter = 0;
this.storedServerDescriptorsCounter = 0;
this.storedExtraInfoDescriptorsCounter = 0;
......@@ -727,6 +743,30 @@ public class ArchiveWriter extends CollecTorMain {
}
}
/** Stores a bandwidth file to disk. */
void storeBandwidthFile(byte[] data, LocalDateTime fileCreatedOrTimestamp,
String bandwidthFileDigest) {
DateTimeFormatter printFormat = DateTimeFormatter
.ofPattern("uuuu/MM/dd/uuuu-MM-dd-HH-mm-ss").withZone(ZoneOffset.UTC);
File tarballFile = Paths.get(this.outputDirectory, "bandwidth",
fileCreatedOrTimestamp.format(printFormat) + "-bandwidth-"
+ bandwidthFileDigest).toFile();
boolean tarballFileExistedBefore = tarballFile.exists();
File rsyncFile = Paths.get(recentPathName, RELAY_DESCRIPTORS, "bandwidths",
tarballFile.getName()).toFile();
File[] outputFiles = new File[] { tarballFile, rsyncFile };
if (this.store(Annotation.BandwidthFile.bytes(), data, outputFiles, null)) {
this.storedVotesCounter++;
}
if (!tarballFileExistedBefore
&& this.nowLocalDateTime.isAfter(fileCreatedOrTimestamp.plusDays(3L))) {
this.storedBandwidths.putIfAbsent(fileCreatedOrTimestamp,
new TreeSet<>());
this.storedBandwidths.get(fileCreatedOrTimestamp)
.add(bandwidthFileDigest);
}
}
/** Stores a key certificate to disk. */
public void storeCertificate(byte[] data, String fingerprint,
long published) {
......
......@@ -258,6 +258,8 @@ public class RelayDescriptorDownloader {
private int requestedVotes = 0;
private int requestedBandwidthFiles = 0;
private int requestedMissingServerDescriptors = 0;
private int requestedAllServerDescriptors = 0;
......@@ -274,6 +276,8 @@ public class RelayDescriptorDownloader {
private int downloadedVotes = 0;
private int downloadedBandwidthFiles = 0;
private int downloadedMissingServerDescriptors = 0;
private int downloadedAllServerDescriptors = 0;
......@@ -729,6 +733,14 @@ public class RelayDescriptorDownloader {
}
}
/* Now try to download the bandwidth file, regardless of whether this
* authority might provide one or when we last downloaded a bandwidth
* file from it. */
this.requestedBandwidthFiles++;
this.downloadedBandwidthFiles +=
this.downloadResourceFromAuthority(authority,
"/tor/status-vote/next/bandwidth");
/* Download either all server and extra-info descriptors or only
* those that we're missing. Start with server descriptors, then
* request extra-info descriptors. Finally, request missing
......@@ -886,7 +898,7 @@ public class RelayDescriptorDownloader {
allData == null ? 0 : allData.length);
int receivedDescriptors = 0;
if (allData != null) {
if (resource.startsWith("/tor/status-vote/current/")) {
if (resource.startsWith("/tor/status-vote/")) {
this.rdp.parse(allData);
receivedDescriptors = 1;
} else if (resource.startsWith("/tor/server/")
......@@ -1067,11 +1079,13 @@ public class RelayDescriptorDownloader {
this.newMissingServerDescriptors, this.newMissingExtraInfoDescriptors,
this.newMissingMicrodescriptors);
logger.info("We requested {} consensus(es), {} microdesc consensus(es), "
+ "{} vote(s), {} missing server descriptor(s), {} times all server "
+ "{} vote(s), {} bandwidth file(s), {} missing server descriptor(s), "
+ "{} times all server "
+ "descriptors, {} missing extra-info descriptor(s), {} times all "
+ "extra-info descriptors, and {} missing microdescriptor(s) from the "
+ "directory authorities.", this.requestedConsensuses,
this.requestedMicrodescConsensuses, this.requestedVotes,
this.requestedBandwidthFiles,
this.requestedMissingServerDescriptors,
this.requestedAllServerDescriptors,
this.requestedMissingExtraInfoDescriptors,
......@@ -1085,12 +1099,14 @@ public class RelayDescriptorDownloader {
logger.info("We sent these numbers of requests to the directory "
+ "authorities:{}", sb.toString());
logger.info("We successfully downloaded {} consensus(es), {} microdesc "
+ "consensus(es), {} vote(s), {} missing server descriptor(s), {} "
+ "consensus(es), {} vote(s), {} bandwidth file(s), "
+ "{} missing server descriptor(s), {} "
+ "server descriptor(s) when downloading all descriptors, {} missing "
+ "extra-info descriptor(s), {} extra-info descriptor(s) when "
+ "downloading all descriptors, and {} missing microdescriptor(s).",
this.downloadedConsensuses, this.downloadedMicrodescConsensuses,
this.downloadedVotes, this.downloadedMissingServerDescriptors,
this.downloadedVotes, this.downloadedBandwidthFiles,
this.downloadedMissingServerDescriptors,
this.downloadedAllServerDescriptors,
this.downloadedMissingExtraInfoDescriptors,
this.downloadedAllExtraInfoDescriptors,
......
......@@ -14,6 +14,10 @@ import java.io.IOException;
import java.io.StringReader;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.time.Instant;
import java.time.LocalDateTime;
import java.time.ZoneOffset;
import java.time.format.DateTimeParseException;
import java.util.SortedSet;
import java.util.TimeZone;
import java.util.TreeSet;
......@@ -318,6 +322,44 @@ public class RelayDescriptorParser {
* time(s) of microdesc consensuses containing them, because we
* don't know which month directories to put them in. Have to use
* storeMicrodescriptor below. */
} else if (line.matches("[0-9]{10}")) {
/* The following code is a much more lenient version of the parser in
* metrics-lib that we need for storing a bandwidth file even if
* metrics-lib has trouble verifying its format. As in metrics-lib,
* identifying bandwidth files by a 10-digit timestamp in the first line
* breaks with files generated before 2002 or after 2286 and when the
* next descriptor identifier starts with just a timestamp in the first
* line rather than a document type identifier. */
String timestampLine = line;
LocalDateTime fileCreatedOrTimestamp = null;
try {
while ((line = br.readLine()) != null) {
if (line.startsWith("file_created=")) {
fileCreatedOrTimestamp = LocalDateTime.parse(
line.substring("file_created=".length()));
break;
} else if (line.startsWith("bw=") || line.contains(" bw=")
|| "====".equals(line) || "=====".equals(line)) {
break;
}
}
} catch (IOException | DateTimeParseException e) {
/* Fall back to using timestamp in first line. */
}
if (null == fileCreatedOrTimestamp) {
try {
fileCreatedOrTimestamp = LocalDateTime.ofInstant(
Instant.ofEpochSecond(Long.parseLong(timestampLine)),
ZoneOffset.UTC);
} catch (NumberFormatException | DateTimeParseException e) {
logger.warn("Could not parse timestamp or file_created time from "
+ "bandwidth file. Storing with timestamp 2000-01-01 00:00:00");
fileCreatedOrTimestamp = LocalDateTime.of(2000, 1, 1, 0, 0, 0);
}
}
this.aw.storeBandwidthFile(data, fileCreatedOrTimestamp,
DigestUtils.sha256Hex(data).toUpperCase());
stored = true;
}
br.close();
} catch (IOException | ParseException e) {
......
......@@ -47,6 +47,8 @@ TARBALLS=(
consensuses-$YEARTWO-$MONTHTWO
votes-$YEARONE-$MONTHONE
votes-$YEARTWO-$MONTHTWO
bandwidths-$YEARONE-$MONTHONE
bandwidths-$YEARTWO-$MONTHTWO
server-descriptors-$YEARONE-$MONTHONE
server-descriptors-$YEARTWO-$MONTHTWO
extra-infos-$YEARONE-$MONTHONE
......@@ -72,6 +74,8 @@ DIRECTORIES=(
$OUTDIR/relay-descriptors/consensus/$YEARTWO/$MONTHTWO
$OUTDIR/relay-descriptors/vote/$YEARONE/$MONTHONE/
$OUTDIR/relay-descriptors/vote/$YEARTWO/$MONTHTWO/
$OUTDIR/relay-descriptors/bandwidth/$YEARONE/$MONTHONE/
$OUTDIR/relay-descriptors/bandwidth/$YEARTWO/$MONTHTWO/
$OUTDIR/relay-descriptors/server-descriptor/$YEARONE/$MONTHONE/
$OUTDIR/relay-descriptors/server-descriptor/$YEARTWO/$MONTHTWO/
$OUTDIR/relay-descriptors/extra-info/$YEARONE/$MONTHONE/
......@@ -156,6 +160,9 @@ ln -f -s -t $ARCHIVEDIR/relay-descriptors/tor/ $TARBALLTARGETDIR/tor-20??-??.tar
mkdir -p $ARCHIVEDIR/relay-descriptors/votes/
ln -f -s -t $ARCHIVEDIR/relay-descriptors/votes/ $TARBALLTARGETDIR/votes-20??-??.tar.xz
mkdir -p $ARCHIVEDIR/relay-descriptors/bandwidths/
ln -f -s -t $ARCHIVEDIR/relay-descriptors/bandwidths/ $TARBALLTARGETDIR/bandwidths-20??-??.tar.xz
mkdir -p $ARCHIVEDIR/torperf/
ln -f -s -t $ARCHIVEDIR/torperf/ $TARBALLTARGETDIR/torperf-20??-??.tar.xz
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment