Commit 09d7311d authored by Karsten Loesing's avatar Karsten Loesing
Browse files

Add new BridgedbMetrics descriptor type.

Also extend DescriptorReader#readDescriptors to support .gz-compressed
files which will be necessary to process files rsync'ed from BridgeDB.
And maybe it's useful for other purposes, too.

Implements part of #19332.
parent 8e2f6710
# Changes in version 2.?.? - 2019-??-??
* Medium changes
- Extend DescriptorReader#readDescriptors to support .gz-compressed
files.
- Add new BridgedbMetrics descriptor type.
# Changes in version 2.7.0 - 2019-09-06
* Medium changes
......
/* Copyright 2019 The Tor Project
* See LICENSE for licensing information */
package org.torproject.descriptor;
import java.time.Duration;
import java.time.LocalDateTime;
import java.util.Map;
import java.util.Optional;
/**
* Contains aggregated information about requests to the BridgeDB service.
*
* @since 2.8.0
*/
public interface BridgedbMetrics extends Descriptor {
/**
* Return the end of the included measurement interval.
*
* @return End of the included measurement interval.
* @since 2.8.0
*/
LocalDateTime bridgedbMetricsEnd();
/**
* Return the length of the included measurement interval.
*
* @return Length of the included measurement interval.
* @since 2.8.0
*/
Duration bridgedbMetricsIntervalLength();
/**
* Return the BridgeDB metrics format version.
*
* @return BridgeDB metrics format version.
* @since 2.8.0
*/
String bridgedbMetricsVersion();
/**
* Return approximate request numbers to the BridgeDB service in the
* measurement interval broken down by distribution mechanism, obfuscation
* protocol, and country code.
*
* <p>Keys are formatted as {@code DIST.PROTO.CC/EMAIL.[success|fail].none}
* where:</p>
* <ul>
* <li>{@code DIST} is BridgeDB's distribution mechanism, for example,
* {@code http}, {@code email}, or {@code moat};</li>
* <li>{@code PROTO} is the obfuscation protocol, for example, {@code obfs2},
* {@code obfs3}, {@code obfs4}, {@code scramblesuit}, or {@code fte};</li>
* <li>{@code CC/EMAIL} is either a two-letter country code or an email
* provider;</li>
* <li>the second-to-last field is either {@code success} or {@code fail}
* depending on if the BridgeDB request succeeded or not; and</li>
* <li>the last field is reserved for an anomaly score to be added in the
* future.</li>
* </ul>
*
* <p>Values are approximate request numbers, rounded up to the next multiple
* of 10.</p>
*
* @return Map of approximate request numbers.
* @since 2.8.0
*/
Optional<Map<String, Long>> bridgedbMetricCounts();
}
/* Copyright 2019 The Tor Project
* See LICENSE for licensing information */
package org.torproject.descriptor.impl;
import org.torproject.descriptor.BridgedbMetrics;
import org.torproject.descriptor.DescriptorParseException;
import java.io.File;
import java.time.Duration;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Optional;
import java.util.Scanner;
import java.util.Set;
public class BridgedbMetricsImpl extends DescriptorImpl
implements BridgedbMetrics {
private static final Set<Key> exactlyOnce = EnumSet.of(
Key.BRIDGEDB_METRICS_END, Key.BRIDGEDB_METRICS_VERSION);
BridgedbMetricsImpl(byte[] rawDescriptorBytes, int[] offsetAndLength,
File descriptorFile) throws DescriptorParseException {
super(rawDescriptorBytes, offsetAndLength, descriptorFile, false);
this.parseDescriptorBytes();
this.checkExactlyOnceKeys(exactlyOnce);
this.checkFirstKey(Key.BRIDGEDB_METRICS_END);
this.clearParsedKeys();
}
BridgedbMetricsImpl(byte[] rawDescriptorBytes, File descriptorFile)
throws DescriptorParseException {
this(rawDescriptorBytes, new int[] { 0, rawDescriptorBytes.length },
descriptorFile);
}
private void parseDescriptorBytes() throws DescriptorParseException {
Scanner scanner = this.newScanner().useDelimiter(NL);
while (scanner.hasNext()) {
String line = scanner.next();
if (line.startsWith("@")) {
continue;
}
String[] parts = line.split("[ \t]+");
Key key = Key.get(parts[0]);
switch (key) {
case BRIDGEDB_METRICS_END:
this.parseBridgedbMetricsEnd(line, parts);
break;
case BRIDGEDB_METRICS_VERSION:
this.parseBridgedbMetricsVersion(line, parts);
break;
case BRIDGEDB_METRIC_COUNT:
this.parseBridgedbMetricCount(line, parts);
break;
case INVALID:
default:
ParseHelper.parseKeyword(line, parts[0]);
if (this.unrecognizedLines == null) {
this.unrecognizedLines = new ArrayList<>();
}
this.unrecognizedLines.add(line);
}
}
}
private void parseBridgedbMetricsEnd(String line, String[] parts)
throws DescriptorParseException {
if (parts.length < 5 || parts[3].length() < 2 || !parts[3].startsWith("(")
|| !parts[4].equals("s)")) {
throw new DescriptorParseException("Illegal line '" + line + "'.");
}
this.bridgedbMetricsEnd = ParseHelper.parseLocalDateTime(line, parts,
1, 2);
this.bridgedbMetricsIntervalLength = ParseHelper.parseDuration(line,
parts[3].substring(1));
}
private void parseBridgedbMetricsVersion(String line, String[] parts)
throws DescriptorParseException {
if (parts.length < 2) {
throw new DescriptorParseException("Illegal line '" + line + "'.");
}
this.bridgedbMetricsVersion = parts[1];
}
private void parseBridgedbMetricCount(String line, String[] parts)
throws DescriptorParseException {
if (parts.length < 3) {
throw new DescriptorParseException("Illegal line '" + line + "'.");
}
if (null == this.bridgedbMetricCounts) {
this.bridgedbMetricCounts = new LinkedHashMap<>();
}
String key = parts[1];
if (this.bridgedbMetricCounts.containsKey(key)) {
throw new DescriptorParseException("Duplicate key '" + key + "' in line '"
+ line + "'.");
}
long value = ParseHelper.parseLong(line, parts, 2);
this.bridgedbMetricCounts.put(key, value);
}
private LocalDateTime bridgedbMetricsEnd;
@Override
public LocalDateTime bridgedbMetricsEnd() {
return this.bridgedbMetricsEnd;
}
private Duration bridgedbMetricsIntervalLength;
@Override
public Duration bridgedbMetricsIntervalLength() {
return this.bridgedbMetricsIntervalLength;
}
private String bridgedbMetricsVersion;
@Override
public String bridgedbMetricsVersion() {
return this.bridgedbMetricsVersion;
}
private Map<String, Long> bridgedbMetricCounts;
@Override
public Optional<Map<String, Long>> bridgedbMetricCounts() {
return Optional.ofNullable(this.bridgedbMetricCounts);
}
}
......@@ -135,6 +135,11 @@ public class DescriptorParserImpl implements DescriptorParser {
|| firstLines.contains(NL + Key.SNOWFLAKE_STATS_END.keyword + SP)) {
return this.parseOneOrMoreDescriptors(rawDescriptorBytes, sourceFile,
Key.SNOWFLAKE_STATS_END, SnowflakeStatsImpl.class);
} else if (firstLines.startsWith("@type bridgedb-metrics 1.")
|| firstLines.startsWith(Key.BRIDGEDB_METRICS_END.keyword + SP)
|| firstLines.contains(NL + Key.BRIDGEDB_METRICS_END.keyword + SP)) {
return this.parseOneOrMoreDescriptors(rawDescriptorBytes, sourceFile,
Key.BRIDGEDB_METRICS_END, BridgedbMetricsImpl.class);
} else if (fileName.contains(LogDescriptorImpl.MARKER)) {
return LogDescriptorImpl.parse(rawDescriptorBytes, sourceFile, fileName);
} else if (firstLines.startsWith("@type bandwidth-file 1.")
......
......@@ -10,7 +10,9 @@ import org.torproject.descriptor.DescriptorReader;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import org.apache.commons.compress.compressors.xz.XZCompressorInputStream;
import org.apache.commons.compress.utils.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
......@@ -21,6 +23,7 @@ import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.ArrayList;
......@@ -325,10 +328,18 @@ public class DescriptorReaderImpl implements DescriptorReader {
}
private void readDescriptorFile(File file) throws IOException {
byte[] rawDescriptorBytes = Files.readAllBytes(file.toPath());
for (Descriptor descriptor : this.descriptorParser.parseDescriptors(
rawDescriptorBytes, file, file.getName())) {
this.descriptorQueue.add(descriptor);
try (FileInputStream fis = new FileInputStream(file)) {
InputStream is = fis;
if (file.getName().endsWith(".gz")) {
is = new GzipCompressorInputStream(fis);
}
byte[] rawDescriptorBytes = IOUtils.toByteArray(is);
if (rawDescriptorBytes.length > 0) {
for (Descriptor descriptor : this.descriptorParser.parseDescriptors(
rawDescriptorBytes, file, file.getName())) {
this.descriptorQueue.add(descriptor);
}
}
}
}
}
......
......@@ -18,6 +18,9 @@ public enum Key {
ALLOW_SINGLE_HOP_EXITS("allow-single-hop-exits"),
BANDWIDTH("bandwidth"),
BANDWIDTH_WEIGHTS("bandwidth-weights"),
BRIDGEDB_METRICS_END("bridgedb-metrics-end"),
BRIDGEDB_METRICS_VERSION("bridgedb-metrics-version"),
BRIDGEDB_METRIC_COUNT("bridgedb-metric-count"),
BRIDGE_IPS("bridge-ips"),
BRIDGE_IP_TRANSPORTS("bridge-ip-transports"),
BRIDGE_IP_VERSIONS("bridge-ip-versions"),
......
......@@ -115,6 +115,20 @@ public class ParseHelper {
return Duration.ofSeconds(parsedSeconds);
}
protected static Long parseLong(String line, String[] parts, int index)
throws DescriptorParseException {
if (index >= parts.length) {
throw new DescriptorParseException(String.format(
"Line '%s' does not contain a long value at index %d.", line, index));
}
try {
return Long.parseLong(parts[index]);
} catch (NumberFormatException e) {
throw new DescriptorParseException(String.format(
"Unable to parse long value '%s' in line '%s'.", parts[index], line));
}
}
protected static String parseExitPattern(String line, String exitPattern)
throws DescriptorParseException {
if (!exitPattern.contains(":")) {
......
......@@ -101,36 +101,22 @@ public class SnowflakeStatsImpl extends DescriptorImpl
private void parseSnowflakeIpsTotal(String line, String[] parts)
throws DescriptorParseException {
this.snowflakeIpsTotal = parseLong(line, parts, 1);
this.snowflakeIpsTotal = ParseHelper.parseLong(line, parts, 1);
}
private void parseSnowflakeIdleCount(String line, String[] parts)
throws DescriptorParseException {
this.snowflakeIdleCount = parseLong(line, parts, 1);
this.snowflakeIdleCount = ParseHelper.parseLong(line, parts, 1);
}
private void parseClientDeniedCount(String line, String[] parts)
throws DescriptorParseException {
this.clientDeniedCount = parseLong(line, parts, 1);
this.clientDeniedCount = ParseHelper.parseLong(line, parts, 1);
}
private void parseClientSnowflakeMatchCount(String line, String[] parts)
throws DescriptorParseException {
this.clientSnowflakeMatchCount = parseLong(line, parts, 1);
}
private static Long parseLong(String line, String[] parts, int index)
throws DescriptorParseException {
if (index >= parts.length) {
throw new DescriptorParseException(String.format(
"Line '%s' does not contain a long value at index %d.", line, index));
}
try {
return Long.parseLong(parts[index]);
} catch (NumberFormatException e) {
throw new DescriptorParseException(String.format(
"Unable to parse long value '%s' in line '%s'.", parts[index], line));
}
this.clientSnowflakeMatchCount = ParseHelper.parseLong(line, parts, 1);
}
private LocalDateTime snowflakeStatsEnd;
......
/* Copyright 2019 The Tor Project
* See LICENSE for licensing information */
package org.torproject.descriptor.impl;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import org.torproject.descriptor.BridgedbMetrics;
import org.torproject.descriptor.DescriptorParseException;
import org.hamcrest.Matchers;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import java.time.Duration;
import java.time.LocalDateTime;
public class BridgedbMetricsImplTest {
@Rule
public ExpectedException thrown = ExpectedException.none();
/**
* Example taken from BridgeDB metrics from 2019-09-17.
*/
private static final String[] exampleBridgedbMetricsLog = new String[] {
"bridgedb-metrics-end 2019-09-17 00:33:44 (86400 s)",
"bridgedb-metrics-version 1",
"bridgedb-metric-count https.obfs3.ru.success.none 10",
"bridgedb-metric-count https.obfs3.sk.success.none 10",
"bridgedb-metric-count https.fte.de.fail.none 10" };
@Test
public void testExampleMetricsLog() throws DescriptorParseException {
BridgedbMetrics bridgedbMetrics = new BridgedbMetricsImpl(
new TestDescriptorBuilder(exampleBridgedbMetricsLog).build(), null);
assertEquals(LocalDateTime.of(2019, 9, 17, 0, 33, 44),
bridgedbMetrics.bridgedbMetricsEnd());
assertEquals(Duration.ofDays(1L),
bridgedbMetrics.bridgedbMetricsIntervalLength());
assertEquals("1", bridgedbMetrics.bridgedbMetricsVersion());
assertTrue(bridgedbMetrics.bridgedbMetricCounts().isPresent());
assertEquals(3, bridgedbMetrics.bridgedbMetricCounts().get().size());
assertEquals((Long) 10L, bridgedbMetrics.bridgedbMetricCounts().get()
.get("https.obfs3.ru.success.none"));
assertEquals((Long) 10L, bridgedbMetrics.bridgedbMetricCounts().get()
.get("https.obfs3.sk.success.none"));
assertEquals((Long) 10L, bridgedbMetrics.bridgedbMetricCounts().get()
.get("https.fte.de.fail.none"));
}
@Test
public void testMinimalBridgedbMetrics() throws DescriptorParseException {
BridgedbMetrics bridgedbMetrics = new BridgedbMetricsImpl(
new TestDescriptorBuilder(exampleBridgedbMetricsLog[0],
exampleBridgedbMetricsLog[1]).build(), null);
assertEquals(LocalDateTime.of(2019, 9, 17, 0, 33, 44),
bridgedbMetrics.bridgedbMetricsEnd());
assertEquals(Duration.ofDays(1L),
bridgedbMetrics.bridgedbMetricsIntervalLength());
assertEquals("1", bridgedbMetrics.bridgedbMetricsVersion());
assertFalse(bridgedbMetrics.bridgedbMetricCounts().isPresent());
}
@Test
public void testEmptyLine() throws DescriptorParseException {
this.thrown.expect(DescriptorParseException.class);
this.thrown.expectMessage(Matchers.containsString(
"Blank lines are not allowed."));
new BridgedbMetricsImpl(new TestDescriptorBuilder(exampleBridgedbMetricsLog)
.appendLines("")
.build(), null);
}
@Test
public void testDuplicateLine() throws DescriptorParseException {
this.thrown.expect(DescriptorParseException.class);
this.thrown.expectMessage(Matchers.containsString(
"must be contained exactly once."));
new BridgedbMetricsImpl(new TestDescriptorBuilder(
exampleBridgedbMetricsLog[0], exampleBridgedbMetricsLog[1],
exampleBridgedbMetricsLog[1]).build(), null);
}
@Test
public void testDuplicateKey() throws DescriptorParseException {
this.thrown.expect(DescriptorParseException.class);
this.thrown.expectMessage(Matchers.containsString("Duplicate key"));
new BridgedbMetricsImpl(new TestDescriptorBuilder(
exampleBridgedbMetricsLog[0], exampleBridgedbMetricsLog[1],
exampleBridgedbMetricsLog[2], exampleBridgedbMetricsLog[2])
.build(), null);
}
@Test
public void testNoValue() throws DescriptorParseException {
this.thrown.expect(DescriptorParseException.class);
this.thrown.expectMessage(Matchers.containsString(
"Unable to parse long value '10-ish' in line"));
new BridgedbMetricsImpl(new TestDescriptorBuilder(
exampleBridgedbMetricsLog[0], exampleBridgedbMetricsLog[1],
exampleBridgedbMetricsLog[2] + "-ish").build(), null);
}
@Test
public void testNonPositiveIntervalLength() throws DescriptorParseException {
this.thrown.expect(DescriptorParseException.class);
this.thrown.expectMessage(Matchers.containsString(
"Duration must be positive"));
new BridgedbMetricsImpl(new TestDescriptorBuilder(
"bridgedb-metrics-end 2019-09-17 00:33:44 (0 s)",
exampleBridgedbMetricsLog[1]).build(), null);
}
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment