diff --git a/CHANGELOG b/CHANGELOG index 10012cec372186a57f30704b05dbe171d5cb5c1d..8939ac494605256cad4a101f5a2ddce629cd9c86 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,7 @@ + * FIXES https://bugs.torproject.org/31422 + Make BridgeDB report internal metrics, like the median number of users that + bridges were handed out to. + * FIXES https://bugs.torproject.org/34260 Parse bridge blocking information from SQL database. diff --git a/bridgedb/distributors/email/autoresponder.py b/bridgedb/distributors/email/autoresponder.py index 9b0ac53dae5beed796c58b8ce5ea4c6aae8568b2..94a8f75b2478c623ccebcfe616100ca7e0ab1187 100644 --- a/bridgedb/distributors/email/autoresponder.py +++ b/bridgedb/distributors/email/autoresponder.py @@ -62,9 +62,10 @@ from bridgedb.parse.addr import canonicalizeEmailDomain from bridgedb.util import levenshteinDistance from bridgedb import translations -# We use our metrics singleton to keep track of BridgeDB metrics such as +# We use our metrics singletons to keep track of BridgeDB metrics such as # "number of failed HTTPS bridge requests." -metrix = metrics.EmailMetrics() +emailMetrix = metrics.EmailMetrics() +internalMetrix = metrics.InternalMetrics() def createResponseBody(lines, context, client, lang='en'): @@ -113,6 +114,9 @@ def createResponseBody(lines, context, client, lang='en'): transport = bridgeRequest.justOnePTType() answer = "".join(" %s\r\n" % b.getBridgeLine( bridgeRequest, context.includeFingerprints) for b in bridges) + internalMetrix.recordHandoutsPerBridge(bridgeRequest, bridges) + else: + internalMetrix.recordEmptyEmailResponse() return templates.buildAnswerMessage(translator, client, answer) def generateResponse(fromAddress, client, body, subject=None, @@ -396,9 +400,9 @@ class SMTPAutoresponder(smtp.SMTPClient): # request. translator = translations.installTranslations(lang) if body is not None and translator.gettext(strings.EMAIL_MISC_TEXT[1]) in body: - metrix.recordValidEmailRequest(self) + emailMetrix.recordValidEmailRequest(self) else: - metrix.recordInvalidEmailRequest(self) + emailMetrix.recordInvalidEmailRequest(self) if not body: return # The client was already warned. diff --git a/bridgedb/distributors/https/server.py b/bridgedb/distributors/https/server.py index 91757e791461036c4a197a07b9a1852a4462d729..fd3fc7475ffa3bfe021255f5d5ba3c9d42f2f940 100644 --- a/bridgedb/distributors/https/server.py +++ b/bridgedb/distributors/https/server.py @@ -90,9 +90,10 @@ logging.debug("Set template root to %s" % TEMPLATE_DIR) #: A list of supported language tuples. Use getSortedLangList() to read this variable. supported_langs = [] -# We use our metrics singleton to keep track of BridgeDB metrics such as +# We use our metrics singletons to keep track of BridgeDB metrics such as # "number of failed HTTPS bridge requests." -metrix = metrics.HTTPSMetrics() +httpsMetrix = metrics.HTTPSMetrics() +internalMetrix = metrics.InternalMetrics() def stringifyRequestArgs(args): @@ -574,7 +575,7 @@ class CaptchaProtectedResource(CustomErrorHandlingResource, CSPResource): try: if self.checkSolution(request) is True: - metrix.recordValidHTTPSRequest(request) + httpsMetrix.recordValidHTTPSRequest(request) return self.resource.render(request) except ValueError as err: logging.debug(str(err)) @@ -584,14 +585,14 @@ class CaptchaProtectedResource(CustomErrorHandlingResource, CSPResource): # work of art" as pennance for their sins. d = task.deferLater(reactor, 1, lambda: request) d.addCallback(redirectMaliciousRequest) - metrix.recordInvalidHTTPSRequest(request) + httpsMetrix.recordInvalidHTTPSRequest(request) return NOT_DONE_YET except Exception as err: logging.debug(str(err)) - metrix.recordInvalidHTTPSRequest(request) + httpsMetrix.recordInvalidHTTPSRequest(request) return replaceErrorPage(request, err) - metrix.recordInvalidHTTPSRequest(request) + httpsMetrix.recordInvalidHTTPSRequest(request) logging.debug("Client failed a CAPTCHA; returning redirect to %s" % request.uri) return redirectTo(request.uri, request) @@ -847,12 +848,12 @@ class ReCaptchaProtectedResource(CaptchaProtectedResource): # breaking). Hence, the 'no cover' pragma. if solution.is_valid: # pragma: no cover logging.info("Valid CAPTCHA solution from %r." % clientIP) - metrix.recordValidHTTPSRequest(request) + httpsMetrix.recordValidHTTPSRequest(request) return (True, request) else: logging.info("Invalid CAPTCHA solution from %r: %r" % (clientIP, solution.error_code)) - metrix.recordInvalidHTTPSRequest(request) + httpsMetrix.recordInvalidHTTPSRequest(request) return (False, request) d = txrecaptcha.submit(challenge, response, self.secretKey, @@ -1000,6 +1001,8 @@ class BridgesResource(CustomErrorHandlingResource, CSPResource): bridgeLines = [replaceControlChars(bridge.getBridgeLine( bridgeRequest, self.includeFingerprints)) for bridge in bridges] + internalMetrix.recordHandoutsPerBridge(bridgeRequest, bridges) + if antibot.isRequestFromBot(request): transports = bridgeRequest.transports # Return either a decoy bridge or no bridge. @@ -1059,6 +1062,9 @@ class BridgesResource(CustomErrorHandlingResource, CSPResource): rtl = False format = self.getResponseFormat(request) + if not bridgeLines: + internalMetrix.recordEmptyHTTPSResponse() + if format == 'plain': request.setHeader("Content-Type", "text/plain") try: diff --git a/bridgedb/distributors/moat/server.py b/bridgedb/distributors/moat/server.py index cdf2022f33a33f0dd0095e519e7f7850f71fae22..7e83d94cfe778879b0200f7e5b97d679bc70f0a7 100644 --- a/bridgedb/distributors/moat/server.py +++ b/bridgedb/distributors/moat/server.py @@ -51,9 +51,10 @@ from bridgedb.schedule import Unscheduled from bridgedb.schedule import ScheduledInterval from bridgedb.util import replaceControlChars -# We use our metrics singleton to keep track of BridgeDB metrics such as +# We use our metrics singletons to keep track of BridgeDB metrics such as # "number of failed HTTPS bridge requests." -metrix = metrics.MoatMetrics() +moatMetrix = metrics.MoatMetrics() +internalMetrix = metrics.InternalMetrics() #: The current version of the moat JSON API that we speak @@ -538,26 +539,37 @@ class CaptchaCheckResource(CaptchaResource): return bridgeRequest - def getBridgeLines(self, bridgeRequest): - """Get bridge lines for a client's HTTP request. + def getBridges(self, bridgeRequest): + """Get bridges for a client's HTTP request. :type bridgeRequest: :class:`MoatBridgeRequest` :param bridgeRequest: A valid bridge request object with pre-generated filters (as returned by :meth:`createBridgeRequest`). :rtype: list - :returns: A list of bridge lines. + :return: A list of :class:`~bridgedb.bridges.Bridge`s. """ - bridgeLines = list() + bridges = list() interval = self.schedule.intervalStart(time.time()) logging.debug("Replying to JSON API request from %s." % bridgeRequest.client) if bridgeRequest.isValid(): bridges = self.distributor.getBridges(bridgeRequest, interval) - bridgeLines = [replaceControlChars(bridge.getBridgeLine(bridgeRequest)) - for bridge in bridges] - return bridgeLines + return bridges + + def getBridgeLines(self, bridgeRequest, bridges): + """ + :type bridgeRequest: :class:`MoatBridgeRequest` + :param bridgeRequest: A valid bridge request object with pre-generated + filters (as returned by :meth:`createBridgeRequest`). + :param list bridges: A list of :class:`~bridgedb.bridges.Bridge` + objects. + :rtype: list + :return: A list of bridge lines. + """ + return [replaceControlChars(bridge.getBridgeLine(bridgeRequest)) + for bridge in bridges] def extractClientSolution(self, data): """Extract the client's CAPTCHA solution from a POST request. @@ -686,7 +698,7 @@ class CaptchaCheckResource(CaptchaResource): if error: # pragma: no cover logging.debug("Error while checking moat request headers.") - metrix.recordInvalidMoatRequest(request) + moatMetrix.recordInvalidMoatRequest(request) return error.render(request) data = { @@ -714,25 +726,30 @@ class CaptchaCheckResource(CaptchaResource): valid = self.checkSolution(challenge, solution, clientIP) except captcha.CaptchaExpired: logging.debug("The challenge had timed out") - metrix.recordInvalidMoatRequest(request) + moatMetrix.recordInvalidMoatRequest(request) return self.failureResponse(5, request) except Exception as impossible: logging.warn("Unhandled exception while processing a POST /fetch request!") logging.error(impossible) - metrix.recordInvalidMoatRequest(request) + moatMetrix.recordInvalidMoatRequest(request) return self.failureResponse(4, request) if valid: qrcode = None bridgeRequest = self.createBridgeRequest(clientIP, client_data) - bridgeLines = self.getBridgeLines(bridgeRequest) - metrix.recordValidMoatRequest(request) + bridges = self.getBridges(bridgeRequest) + bridgeLines = self.getBridgeLines(bridgeRequest, bridges) + moatMetrix.recordValidMoatRequest(request) # If we can only return less than the configured # MOAT_BRIDGES_PER_ANSWER then log a warning. if len(bridgeLines) < self.nBridgesToGive: logging.warn(("Not enough bridges of the type specified to " "fulfill the following request: %s") % bridgeRequest) + if not bridgeLines: + internalMetrix.recordEmptyMoatResponse() + else: + internalMetrix.recordHandoutsPerBridge(bridgeRequest, bridges) if antibot.isRequestFromBot(request): ttype = transport or "vanilla" @@ -754,7 +771,7 @@ class CaptchaCheckResource(CaptchaResource): return self.formatDataForResponse(data, request) else: - metrix.recordInvalidMoatRequest(request) + moatMetrix.recordInvalidMoatRequest(request) return self.failureResponse(4, request) diff --git a/bridgedb/main.py b/bridgedb/main.py index 8b851a346b2c1387d32b635bff907d592b29703d..8fdec234c3dfd773cc38e138c2675625b95154b8 100644 --- a/bridgedb/main.py +++ b/bridgedb/main.py @@ -483,6 +483,23 @@ def run(options, reactor=reactor): else: logging.warn("No Moat distributor created!") + metrix = metrics.InternalMetrics() + logging.info("Logging bridge ring metrics for %d rings." % + len(hashring.ringsByName)) + for ringName, ring in hashring.ringsByName.items(): + # Ring is of type FilteredBridgeSplitter or UnallocatedHolder. + # FilteredBridgeSplitter splits bridges into subhashrings based on + # filters. + if hasattr(ring, "filterRings"): + for (ringname, (filterFn, subring)) in ring.filterRings.items(): + subRingName = "-".join(ring.extractFilterNames(ringname)) + metrix.recordBridgesInHashring(ringName, + subRingName, + len(subring)) + elif hasattr(ring, "fingerprints"): + metrix.recordBridgesInHashring(ringName, "unallocated", + len(ring.fingerprints)) + # Dump bridge pool assignments to disk. writeAssignments(hashring, state.ASSIGNMENTS_FILE) state.save() diff --git a/bridgedb/metrics.py b/bridgedb/metrics.py index 48713f03f83d5c45cb8b2ea7d897ee4d3fb80949..14073ee8c3be66bd900b2f0ae19aa1b13adbf75f 100644 --- a/bridgedb/metrics.py +++ b/bridgedb/metrics.py @@ -18,6 +18,8 @@ import ipaddr import operator import json import datetime +import statistics +import numpy from bridgedb import geo from bridgedb.distributors.common.http import getClientIP @@ -54,7 +56,7 @@ SUPPORTED_TRANSPORTS = None # Version number for our metrics format. We increment the version if our # format changes. -METRICS_VERSION = 1 +METRICS_VERSION = 2 def setProxies(proxies): @@ -106,14 +108,14 @@ def export(fh, measurementInterval): and dump our metrics. """ - httpsMetrix = HTTPSMetrics() - emailMetrix = EmailMetrics() - moatMetrix = MoatMetrics() + metrics = [HTTPSMetrics(), + EmailMetrics(), + MoatMetrics(), + InternalMetrics()] # Rotate our metrics. - httpsMetrix.rotate() - emailMetrix.rotate() - moatMetrix.rotate() + for m in metrics: + m.rotate() numProxies = len(PROXIES) if PROXIES is not None else 0 if numProxies == 0: @@ -127,17 +129,14 @@ def export(fh, measurementInterval): measurementInterval)) fh.write("bridgedb-metrics-version %d\n" % METRICS_VERSION) - httpsLines = httpsMetrix.getMetrics() - for line in httpsLines: - fh.write("bridgedb-metric-count %s\n" % line) + for m in metrics: + distLines = m.getMetrics() + for line in distLines: + fh.write("bridgedb-metric-count %s\n" % line) + logging.debug("Writing metrics line to file: %s" % line) - moatLines = moatMetrix.getMetrics() - for line in moatLines: - fh.write("bridgedb-metric-count %s\n" % line) - - emailLines = emailMetrix.getMetrics() - for line in emailLines: - fh.write("bridgedb-metric-count %s\n" % line) + for m in metrics: + m.reset() def resolveCountryCode(ipAddr): @@ -200,6 +199,7 @@ class Metrics(metaclass=Singleton): # that, our hot metrics turn into cold metrics, and we start over. self.hotMetrics = dict() self.coldMetrics = dict() + self.unsanitisedSet = set() def rotate(self): """Rotate our metrics.""" @@ -216,8 +216,15 @@ class Metrics(metaclass=Singleton): return anomaly - def getMetrics(self): - """Get our sanitized current metrics, one per line. + def doNotSanitise(self, key): + """ + :param str key: A key that will not be sanitised when exporting our + metrics. + """ + self.unsanitisedSet.add(key) + + def getMetrics(self, sanitized=True): + """Get our (sanitized) current metrics, one per line. Metrics are of the form: @@ -227,15 +234,19 @@ class Metrics(metaclass=Singleton): ... ] + :param bool sanitized: ``True`` if the metrics must be sanitized. :rtype: list :returns: A list of metric lines. """ lines = [] for key, value in self.coldMetrics.items(): - # Round up our value to the nearest multiple of self.binSize to - # reduce the accuracy of our real values. - if (value % self.binSize) > 0: - value += self.binSize - (value % self.binSize) + + # There's no need to sanitize internal metrics. + if sanitized and not key in self.unsanitisedSet: + # Round up our value to the nearest multiple of self.binSize to + # reduce the accuracy of our real values. + if (value % self.binSize) > 0: + value += self.binSize - (value % self.binSize) lines.append("%s %d" % (key, value)) return lines @@ -290,6 +301,148 @@ class Metrics(metaclass=Singleton): return key + def reset(self): + """Reset internal variables after a metrics interval.""" + pass + + +class InternalMetrics(Metrics): + + def __init__(self): + super(InternalMetrics, self).__init__() + self.keyPrefix = "internal" + # Maps bridges to the number of time they have been handed out. + self.bridgeHandouts = {} + + # There's no reason for the following metrics to be sanitised. + handoutsPrefix = "{}.handouts".format(self.keyPrefix) + self.doNotSanitise("{}.unique-bridges".format(handoutsPrefix)) + self.doNotSanitise("{}.median".format(handoutsPrefix)) + self.doNotSanitise("{}.min".format(handoutsPrefix)) + self.doNotSanitise("{}.max".format(handoutsPrefix)) + self.doNotSanitise("{}.quartile1".format(handoutsPrefix)) + self.doNotSanitise("{}.quartile3".format(handoutsPrefix)) + self.doNotSanitise("{}.lower-whisker".format(handoutsPrefix)) + self.doNotSanitise("{}.upper-whisker".format(handoutsPrefix)) + + def reset(self): + """Reset bridge handouts after each interval.""" + + # Log the bridge that has seen the most handouts. This helps us + # understand BridgeDB better. + items = self.bridgeHandouts.items() + if len(items): + bridgeLine, num = sorted(items, key=lambda x: x[1], + reverse=True)[0] + logging.debug("Bridge line with most handouts (%d): %s" % + (num, bridgeLine)) + + self.bridgeHandouts = {} + + def _recordEmptyResponse(self, distributor): + """ + Record an empty bridge request response for the given distributor. + + :param str distributor: A bridge distributor, e.g., "https". + """ + self.inc("{}.{}.empty-response".format(self.keyPrefix, distributor)) + + def recordEmptyEmailResponse(self): + self._recordEmptyResponse("email") + + def recordEmptyHTTPSResponse(self): + self._recordEmptyResponse("https") + + def recordEmptyMoatResponse(self): + self._recordEmptyResponse("moat") + + def recordHandoutsPerBridge(self, bridgeRequest, bridges): + """ + Record how often a given bridge was handed out. + + Note that bridges that were not handed out will not be part of these + metrics. + + :type bridgeRequest: :api:`bridgerequest.BridgeRequestBase` + :param bridgeRequest: A bridge request for either one of our + distributors. + :param list bridges: A list of :class:`~bridgedb.Bridges.Bridge`s. + """ + + handoutsPrefix = "{}.handouts".format(self.keyPrefix) + + if bridgeRequest is None or bridges is None: + logging.warning("Given bridgeRequest and bridges cannot be None.") + return + + # Keep track of how many IPv4 and IPv6 requests we are seeing. + ipVersion = bridgeRequest.ipVersion + if ipVersion not in [4, 6]: + logging.warning("Got bridge request for unsupported IP version " + "{}.".format(ipVersion)) + return + else: + self.inc("{}.ipv{}".format(handoutsPrefix, ipVersion)) + + # Keep track of how many times we're handing out a given bridge. + for bridge in bridges: + # Use bridge lines as dictionary key. We cannot use the bridge + # objects because BridgeDB reloads its descriptors every 30 + # minutes, at which points the bridge objects change. + key = bridge.getBridgeLine(bridgeRequest) + num = self.bridgeHandouts.get(key, None) + if num is None: + self.bridgeHandouts[key] = 1 + else: + self.bridgeHandouts[key] = num + 1 + + # We need more than two handouts to calculate our statistics. + values = self.bridgeHandouts.values() + if len(values) <= 2: + return + + # Update our statistics. + self.set("{}.median".format(handoutsPrefix), + statistics.median(values)) + self.set("{}.min".format(handoutsPrefix), min(values)) + self.set("{}.max".format(handoutsPrefix), max(values)) + self.set("{}.unique-bridges".format(handoutsPrefix), + len(self.bridgeHandouts)) + # Python 3.8 comes with a statistics.quantiles function, which we + # should use instead of numpy once 3.8 is available in Debian stable. + q1, q3 = numpy.quantile(numpy.array(list(values)), [0.25, 0.75]) + self.set("{}.quartile1".format(handoutsPrefix), q1) + self.set("{}.quartile3".format(handoutsPrefix), q3) + # Determine our inter-quartile range (the difference between quartile 3 + # and quartile 1) and use it to calculate the upper and lower whiskers + # as you would see them in a boxplot. + iqr = q3 - q1 + lowerWhisker = min([x for x in values if x >= q1 - (1.5 * iqr)]) + upperWhisker = max([x for x in values if x <= q3 + (1.5 * iqr)]) + self.set("{}.lower-whisker".format(handoutsPrefix), lowerWhisker) + self.set("{}.upper-whisker".format(handoutsPrefix), upperWhisker) + + def recordBridgesInHashring(self, ringName, subRingName, numBridges): + """ + Record the number of bridges per hashring. + + :param str ringName: The name of the ring, e.g., "https". + :param str subRingName: The name of the subring, e.g., + "byIPv6-bySubring1of4". + :param int numBridges: The number of bridges in the given subring. + """ + + if not ringName or not subRingName: + logging.warning("Ring name ({}) and subring name ({}) cannot be " + "empty.".format(ringName, subRingName)) + return + + logging.info("Recording metrics for bridge (sub)rings: %s/%s/%d." % + (ringName, subRingName, numBridges)) + # E.g, concatenate "https" with "byipv6-bysubring1of4". + key = "{}.{}.{}".format(self.keyPrefix, ringName, subRingName.lower()) + self.set(key, numBridges) + class HTTPSMetrics(Metrics): diff --git a/bridgedb/test/test_distributors_moat_server.py b/bridgedb/test/test_distributors_moat_server.py index 695116b057f87ecab41ed10671d117a8b9ab4cee..3d338237af248f285f55259ef92be1e954521569 100644 --- a/bridgedb/test/test_distributors_moat_server.py +++ b/bridgedb/test/test_distributors_moat_server.py @@ -586,7 +586,7 @@ class MockCaptchaCheckResource(server.CaptchaCheckResource): """A mocked :class:`server.CaptchaCheckResource` whose ``getBridgeLines`` method returns no bridges. """ - def getBridgeLines(self, bridgeRequest): + def getBridgeLines(self, bridgeRequest, bridges): return list() @@ -860,7 +860,8 @@ class CaptchaCheckResourceTests(unittest.TestCase): content = json.loads(encoded_content)['data'][0] bridgeRequest = self.resource.createBridgeRequest('3.3.3.3', content) - bridgelines = self.resource.getBridgeLines(bridgeRequest) + bridges = self.resource.getBridges(bridgeRequest) + bridgelines = self.resource.getBridgeLines(bridgeRequest, bridges) self.assertTrue(bridgelines) @@ -869,7 +870,8 @@ class CaptchaCheckResourceTests(unittest.TestCase): request.client = requesthelper.IPv4Address('TCP', '3.3.3.3', 443) bridgeRequest = self.resource.createBridgeRequest('3.3.3.3', None) - bridgelines = self.resource.getBridgeLines(bridgeRequest) + bridges = self.resource.getBridges(bridgeRequest) + bridgelines = self.resource.getBridgeLines(bridgeRequest, bridges) self.assertFalse(bridgeRequest.isValid()) self.assertEqual(len(bridgelines), 0) diff --git a/bridgedb/test/test_metrics.py b/bridgedb/test/test_metrics.py index fcc9e4ae4b450e755bfa625f556424da61ae0c8c..12b3ee10313c90ac899c20dfb045ea4a31bead63 100644 --- a/bridgedb/test/test_metrics.py +++ b/bridgedb/test/test_metrics.py @@ -18,13 +18,17 @@ functioning as expected. import io import json import os +import copy from bridgedb import metrics +from bridgedb.test import util from bridgedb.test.https_helpers import DummyRequest from bridgedb.distributors.email.server import SMTPMessage +from bridgedb.distributors.https.server import HTTPSBridgeRequest from bridgedb.test.email_helpers import _createMailServerContext from bridgedb.test.email_helpers import _createConfig from bridgedb.distributors.moat import server +from bridgedb.bridges import Bridge from twisted.trial import unittest from twisted.test import proto_helpers @@ -41,6 +45,7 @@ class StateTest(unittest.TestCase): type(metrics.HTTPSMetrics()).clear() type(metrics.EmailMetrics()).clear() type(metrics.MoatMetrics()).clear() + type(metrics.InternalMetrics()).clear() metrics.setSupportedTransports({ 'obfs2': False, @@ -213,3 +218,130 @@ class StateTest(unittest.TestCase): self.assertTrue(metrics.isBridgeTypeSupported("obfs4")) self.assertTrue(metrics.isBridgeTypeSupported("vanilla")) self.assertFalse(metrics.isBridgeTypeSupported("xxx")) + + def test_bridge_handouts(self): + + metrix = metrics.InternalMetrics() + bridges = copy.deepcopy(util.generateFakeBridges()) + bridge1, bridge2, bridge3 = bridges[0:3] + m = metrix.hotMetrics + + br = HTTPSBridgeRequest() + br.withIPversion({"ipv6": "4"}) + br.isValid(True) + + # Record a number of distribution events for three separate bridges. + for i in range(10): + metrix.recordHandoutsPerBridge(br, [bridge1]) + for i in range(5): + metrix.recordHandoutsPerBridge(br, [bridge2]) + metrix.recordHandoutsPerBridge(br, [bridge3]) + + self.assertEqual(m["internal.handouts.unique-bridges"], 3) + self.assertEqual(m["internal.handouts.min"], 1) + self.assertEqual(m["internal.handouts.max"], 10) + self.assertEqual(m["internal.handouts.median"], 5) + + # Internal metrics must not be sanitized. + metrix.rotate() + lines = metrix.getMetrics() + self.assertIn("internal.handouts.unique-bridges 3", lines) + self.assertIn("internal.handouts.median 5", lines) + self.assertIn("internal.handouts.min 1", lines) + self.assertIn("internal.handouts.max 10", lines) + + def test_empty_responses(self): + + metrix = metrics.InternalMetrics() + + # Unlike all other internal metrics, empty responses are sanitized. + for i in range(10): + metrix.recordEmptyEmailResponse() + for i in range(11): + metrix.recordEmptyMoatResponse() + metrix.recordEmptyHTTPSResponse() + + metrix.rotate() + lines = metrix.getMetrics() + + self.assertEqual(len(lines), 3) + self.assertIn("internal.email.empty-response 10", lines) + self.assertIn("internal.moat.empty-response 20", lines) + self.assertIn("internal.https.empty-response 10", lines) + + def test_rings(self): + + metrix = metrics.InternalMetrics() + + # Empty parameters must not be recorded. + metrix.recordBridgesInHashring("", "", 20) + self.assertEqual(len(metrix.hotMetrics), 0) + + metrix.recordBridgesInHashring("https", "byIPv6-bySubring1of4", 20) + self.assertEqual(len(metrix.hotMetrics), 1) + self.assertEqual(list(metrix.hotMetrics.keys()), + ["internal.https.byipv6-bysubring1of4"]) + + def test_ipv4_ipv6_requests(self): + + metrix = metrics.InternalMetrics() + v6Req = HTTPSBridgeRequest() + v6Req.withIPversion({"ipv6": "4"}) + v4Req = HTTPSBridgeRequest() + v4Req.withIPversion({}) + + bridges = copy.deepcopy(util.generateFakeBridges()) + + for i in range(9): + metrix.recordHandoutsPerBridge(v6Req, [bridges[0]]) + metrix.recordHandoutsPerBridge(v6Req, [bridges[1]]) + + for i in range(11): + metrix.recordHandoutsPerBridge(v4Req, [bridges[0]]) + + metrix.rotate() + lines = metrix.getMetrics() + + self.assertIn("internal.handouts.ipv6 10", lines) + self.assertIn("internal.handouts.ipv4 20", lines) + + def test_handouts(self): + + metrix = metrics.InternalMetrics() + metrix.recordHandoutsPerBridge(None, None) + self.assertEqual(len(metrix.hotMetrics), 0) + + req = HTTPSBridgeRequest() + req.withIPversion({}) + req.isValid(True) + bridges = copy.deepcopy(util.generateFakeBridges()) + + metrix.recordHandoutsPerBridge(req, [bridges[0]]) + self.assertNotIn("internal.handouts.median", metrix.hotMetrics.keys()) + metrix.recordHandoutsPerBridge(req, [bridges[1]]) + self.assertNotIn("internal.handouts.median", metrix.hotMetrics.keys()) + metrix.recordHandoutsPerBridge(req, [bridges[2]]) + self.assertEqual(metrix.hotMetrics["internal.handouts.median"], 1) + + metrix.recordHandoutsPerBridge(req, [bridges[1]]) + metrix.recordHandoutsPerBridge(req, [bridges[2]]) + metrix.recordHandoutsPerBridge(req, [bridges[2]]) + self.assertEqual(metrix.hotMetrics["internal.handouts.min"], 1) + self.assertEqual(metrix.hotMetrics["internal.handouts.median"], 2) + self.assertEqual(metrix.hotMetrics["internal.handouts.max"], 3) + self.assertEqual(metrix.hotMetrics["internal.handouts.unique-bridges"], 3) + self.assertEqual(metrix.hotMetrics["internal.handouts.quartile1"], 1.5) + self.assertEqual(metrix.hotMetrics["internal.handouts.quartile3"], 2.5) + self.assertEqual(metrix.hotMetrics["internal.handouts.lower-whisker"], 1) + self.assertEqual(metrix.hotMetrics["internal.handouts.upper-whisker"], 3) + + def test_metrics_reset(self): + metrix = metrics.InternalMetrics() + req = HTTPSBridgeRequest() + req.withIPversion({}) + bridges = copy.deepcopy(util.generateFakeBridges()) + metrix.recordHandoutsPerBridge(req, [bridges[0]]) + + self.assertTrue(len(metrix.bridgeHandouts) > 0) + metrix.reset() + self.assertTrue(len(metrix.bridgeHandouts) == 0) diff --git a/doc/bridgedb-metrics-spec.txt b/doc/bridgedb-metrics-spec.txt index 14c38f972e86d26bb6420a88e0fd0a72b2b60a47..aac838a68e5124f0862c34938f9f051475014d1a 100644 --- a/doc/bridgedb-metrics-spec.txt +++ b/doc/bridgedb-metrics-spec.txt @@ -1,4 +1,4 @@ - BridgeDB metrics (version 1) + BridgeDB metrics (version 2) BridgeDB exports usage metrics once every 24 hours. These metrics encode how many approximate successful/failed requests BridgeDB has seen @@ -33,42 +33,82 @@ file is formatted as follows: "bridgedb-metric-count" METRIC_KEY COUNT NL [Any number.] - METRIC_KEY determines a metrics key, which consists of several - fields, separated by a period: - - DISTRIBUTION "." TRANSPORT "." CC/EMAIL "." "success" | "fail" "." RESERVED - - DISTRIBUTION is BridgeDB's distribution mechanism, which includes - "https", "email", and "moat". These distribution mechanisms may - change in the future. + METRIC_KEY specifies a metrics key, which consists of several fields, + separated by a period. These fields form a hierarchy. At the root of + the hierarchy are our three distribution mechanisms ("https", "email", + and "moat") and "internal", which represents BridgeDB-internal metrics. + The hierarchy is of the following form: + + * "https" + └─* TRANSPORT + └─* CC + └─* "success" | "fail" + └─* RESERVED + + * "email" + └─* TRANSPORT + └─* EMAIL + └─* "success" | "fail" + └─* RESERVED + + * "moat" + └─* TRANSPORT + └─* CC + └─* "success" | "fail" + └─* RESERVED + + * "internal" + ├─* "handouts" + │ ├─* "min" + │ ├─* "max" + │ ├─* "median" + │ ├─* "quartile1" + │ ├─* "quartile3" + │ ├─* "lower-whisker" + │ ├─* "upper-whisker" + │ ├─* "ipv4" + │ └─* "ipv6" + │ + └─* TRANSPORT + ├─* "empty-response" + └─* SUBRING + + Strings in between quotes (e.g., "handouts") are literals and show up in + the hierarchy as is. Upper-case strings (e.g., TRANSPORT) are + placeholders, which are explained below. TRANSPORT refers to a pluggable transport protocol. This includes "obfs2", "obfs3", "obfs4", "scramblesuit", and "fte". These pluggable transports will change in the future. - CC/EMAIL refers to a two-letter country code of the user's IP - address iff DISTRIBUTION is "moat" or "https"; or to an email - provider iff DISTRIBUTION is "email". We use two reserved country - codes, "??" and "zz". "??" denotes that we couldn't map an IP - address to its country, e.g., because our geolocation API was - unable to. "zz" denotes a proxy IP address, e.g., Tor exit - relays. The two allowed email providers are "gmail" and "riseup". + CC refers to a two-letter country code of the user's IP address. We use + two reserved country codes, "??" and "zz". "??" denotes that we couldn't + map an IP address to its country, e.g., because our geolocation API was + unable to. "zz" denotes a proxy IP address, e.g., Tor exit relays. - The next field is either "success" or "fail", depending on if the - BridgeDB request was successful or not. A request is successful - if BridgeDB attempts to provide the user with bridges, even if - BridgeDB currently has no bridges available. A request has failed - if BridgeDB won't provide the user with bridges, for example, if - the user could not solve the CAPTCHA. + EMAIL refers to an email provider. The two currently-supported email + providers are "gmail" and "riseup". - The field RESERVED is reserved for an anomaly score. It is - currently set to "none" and should be ignored by implementations. + The field RESERVED is reserved for an anomaly score. It is currently set + to "none" and should be ignored by implementations. COUNT is the approximate number of user requests for the given METRIC_KEY. We round up the number of requests to the next - multiple of 10 to preserve some user privacy. + multiple of 10 to preserve some user privacy. Some metrics key are not + rounded up to the next multiple of ten because they are not sensitive. + + One label either takes on the value "success" or "fail", depending on if + the BridgeDB request was successful or not. A request is successful if + BridgeDB attempts to provide the user with bridges, even if BridgeDB + currently has no bridges available. A request has failed if BridgeDB + won't provide the user with bridges, for example, if the user could not + solve the CAPTCHA. - Examples: + Here are several examples: bridgedb-metric-count https.scramblesuit.zz.fail.none 100 bridgedb-metric-count moat.obfs4.??.success.none 3550 bridgedb-metric-count email.fte.gmail.fail.none 10 + bridgedb-metric-count internal.handouts.ipv4 20 + bridgedb-metric-count internal.moat.empty-response 10 + bridgedb-metric-count internal.handouts.min 23 + bridgedb-metric-count internal.https.byipv6-bysubring1of4 40 diff --git a/requirements.txt b/requirements.txt index fdf1043a61c5f78d3ba910191127f56a7f816a92..08edf9ee5b43ed5b0c8f050ba6e62db704a6a06c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,3 +12,4 @@ qrcode==6.1 service_identity==18.1.0 stem==1.8.0 zope.interface==5.1.0 +numpy==1.18.5