Unverified Commit bbf1bdc5 authored by Philipp Winter's avatar Philipp Winter
Browse files

Merge branch 'release-0.8.0'

parents 2d86ae0c 7df8ccc7
......@@ -6,7 +6,6 @@
# $ make coverage
#
coverage==4.2
git+https://git.torproject.org/user/phw/leekspin.git@d34c804cd0f01af5206833e62c0dedec8565b235#egg=leekspin
mechanize==0.2.5
pep8==1.5.7
# pylint must be pinned until pylint bug #203 is fixed. See
......
......@@ -15,7 +15,6 @@
#------------------------------------------------------------------------------
coverage==4.2
coveralls==1.2.0
git+https://git.torproject.org/user/phw/leekspin.git@d34c804cd0f01af5206833e62c0dedec8565b235#egg=leekspin
mechanize==0.2.5
sure==1.2.2
Babel==0.9.6
......
Changes in version 0.8.0 - 2019-08-20
* FIXES https://bugs.torproject.org/9316
Make BridgeDB export usage metrics every 24 hours. At the end of each
24-hour measurement interval, BridgeDB will append usage metrics to the
file METRICS_FILE, which is configured in bridgedb.conf. Our metrics
keep track of the number of (un)successful requests per transport type
per country code (or email provider) per distribution method. This way,
we get to learn that, say, over the last 24 hours there were 31-40 users
in Iran who successfully requested an obfs4 bridge over Moat.
* FIXES #26542 https://bugs.torproject.org/26542
Make BridgeDB distribute vanilla IPv6 bridges again.
* FIXES #22755 https://bugs.torproject.org/22755
Use stem instead of leekspin to create test descriptors. We now don't
need to depend on leekspin anymore.
* FIXES #31252 https://bugs.torproject.org/31252
Add an anti-bot mechanism that allows us to detect bots by matching
HTTP request headers for blacklisted patterns. For example, bots may
have their Accept-Language set to "Klingon". Blacklisted patterns are
configured in BLACKLISTED_REQUEST_HEADERS_FILE. When BridgeDB detects
a bot request, we can answer their request with a decoy bridge that's
only handed out to bots. Decoy bridges are configured in
DECOY_BRIDGES_FILE.
Changes in version 0.7.1 - 2019-06-07
* FIXES #28496 https://bugs.torproject.org/28496
......
......@@ -315,11 +315,7 @@ To create a bunch of fake bridge descriptors to test BridgeDB, do::
bridgedb mock [-n NUMBER_OF_DESCRIPTORS]
Note that you will need to install
`leekspin <https://pypi.python.org/pypi/leekspin>`__ in order to run the
``bridgedb mock`` command. See ``doc/HACKING.md`` for details.
And finally, to run the test suites, do::
To run the test suites, do::
make coverage
......@@ -362,16 +358,6 @@ Or just give it a SIGHUP::
kill -s SIGHUP `cat .../run/bridgedb.pid`
----------------------------------
To extract all bridge assignments:
----------------------------------
To dump all bridge assignments to files, send BridgeDB a ``SIGUSR1``
signal by doing::
kill -s SIGUSR1 `cat .../run/bridgedb.pid`
=========================
Using a BridgeDB Instance
=========================
......
......@@ -177,6 +177,9 @@ MASTER_KEY_FILE = "secret_key"
# File to which we dump bridge pool assignments for statistics.
ASSIGNMENTS_FILE = "assignments.log"
# Name of the file that contains BridgeDB's metrics.
METRICS_FILE = "bridgedb-metrics.log"
#------------------
# Logging Options \
#------------------------------------------------------------------------------
......@@ -260,16 +263,19 @@ FORCE_FLAGS = [("Stable", 1)]
# Only consider routers whose purpose matches this string.
BRIDGE_PURPOSE = "bridge"
# TASKS is a dictionary mapping the names of tasks to the frequency with which
# they should be run (in seconds). If a task's value is set to 0, it will not
# be scheduled to run.
# TASKS is a dictionary mapping the names of tasks to a tuple consisting of the
# frequency with which they should be run (in seconds) and a boolean value
# expressing if the task should be run immediately after start up. If a task's
# frequency is set to 0, it will not be scheduled to run.
TASKS = {
# Download a list of Tor exit relays once every three hours (by running
# scripts/get-exit-list) and add those exit relays to the list of proxies
# loaded from the PROXY_LIST_FILES:
'GET_TOR_EXIT_LIST': 3 * 60 * 60,
'GET_TOR_EXIT_LIST': (3 * 60 * 60, True),
# Delete *.unparseable descriptor files which are more than 24 hours old:
'DELETE_UNPARSEABLE_DESCRIPTORS': 24 * 60 * 60,
'DELETE_UNPARSEABLE_DESCRIPTORS': (24 * 60 * 60, False),
# Export usage metrics every 24 hours:
'EXPORT_METRICS': (24 * 60 * 60, False),
}
# SUPPORTED_TRANSPORTS is a dictionary mapping Pluggable Transport methodnames
......@@ -295,6 +301,25 @@ PROBING_RESISTANT_TRANSPORTS = ['scramblesuit', 'obfs4']
# menu).
DEFAULT_TRANSPORT = 'obfs4'
# HTTP headers that suggest that a request was issued by a bot. The CSV
# file must have the following format:
# <HEADER>,<REGEXP>
# ...
# For example:
# Accept-Language,[Kk]lingon
BLACKLISTED_REQUEST_HEADERS_FILE="blacklisted-request-headers.csv"
# Decoy bridges that we are handing out to bots that we detected using the
# regular expressions in BLACKLISTED_REQUEST_HEADERS_FILE. The CSV file must
# have the following format:
# <TRANSPORT>v<IP_VERSION>,<BRIDGE_LINE>
# ...
# For example:
# vanillav4,1.2.3.4:1234 0123456789ABCDEF0123456789ABCDEF01234567
# vanillav6,[::1]:1234 0123456789ABCDEF0123456789ABCDEF01234567
# obfs4v4,obfs4 1.2.3.4:1234 public-key=... node-id=... iat-mode=...
DECOY_BRIDGES_FILE="decoy-bridges.csv"
#-------------------------------
# Moat Distribution Options \
#------------------------------------------------------------------------------
......
......@@ -344,6 +344,8 @@ class BridgeRing(object):
bridges = [self.bridges[k] for k in keys]
bridges = bridges[:N]
logging.debug("Caller asked for N=%d, filterBySubnet=%s bridges. "
"Returning %d bridges." % (N, filterBySubnet, len(bridges)))
return bridges
......
# -*- coding: utf-8 ; test-case-name: bridgedb.test.test_metrics ; -*-
# _____________________________________________________________________________
#
# This file is part of BridgeDB, a Tor bridge distribution system.
#
# :authors: please see included AUTHORS file
# :copyright: (c) 2019, The Tor Project, Inc.
# (c) 2019, Philipp Winter
# :license: see LICENSE for licensing information
# _____________________________________________________________________________
"""Functions for dealing with bot requests."""
import re
import logging
# Maps transport types and IP version (e.g., "obfs4v4", "vanillav4", or
# "vanillav6") to bridge lines (e.g., "1.2.3.4:1234 ...".
DECOY_BRIDGES = {}
# Maps HTTP request headers (e.g., "Accept-Language") to regular expressions
# that suggest that the request was issued by a bot (e.g., "[Kk]lingon").
BLACKLISTED_REQUEST_HEADERS = {}
def _loadCSV(filename):
"""Load and return the content of the given CSV file.
:param str filename: The filename to read.
:rtype: dict
:returns: A dictionary mapping keys (first column) to values (second
column).
"""
csv = dict()
try:
with open(filename) as fh:
for line in fh.readlines():
if line.count(",") != 1:
logging.warning("Line must have exactly one comma: %s" %
line)
continue
key, value = line.split(",")
csv[key.strip()] = value.strip()
except IOError as err:
logging.warning("I/O error while reading from file %s: %s" %
(filename, err))
return csv
def loadBlacklistedRequestHeaders(filename):
"""Load and globally set a dictionary of blacklisted request headers.
:param str filename: The filename to read.
"""
content = _loadCSV(filename)
blacklisted = dict()
# Turn dictionary values into compiled regular expressions.
for header, regexp in content.items():
try:
blacklisted[header] = re.compile(regexp)
except Exception as err:
logging.warning("Skipping regexp %s because we couldn't compile "
"it: %s" % (regexp, err))
global BLACKLISTED_REQUEST_HEADERS
BLACKLISTED_REQUEST_HEADERS = blacklisted
def loadDecoyBridges(filename):
"""Load and globally set a dictionary of decoy bridges.
:param str filename: The filename to read.
"""
d = _loadCSV(filename)
# Turn our bridge lines (which are strings) into lists.
decoyBridges = {ttype: [line] for ttype, line in d.items()}
global DECOY_BRIDGES
DECOY_BRIDGES = decoyBridges
def getDecoyBridge(transport, ipVersion):
"""Return a decoy bridge or, if none is available, None.
:param str transport: The desired transport, e.g., "vanilla" or "obfs4".
:param int ipVersion: The IP version, which must be either 4 or 6.
:rtype: list
:returns: Return a list of bridge lines or, if we don't have any, None.
"""
if ipVersion not in [4, 6]:
return None
logging.info("Returning IPv%d decoy bridge for transport %s." %
(ipVersion, transport))
return DECOY_BRIDGES.get("%sv%d" % (transport, ipVersion), None)
def isRequestFromBot(request):
"""Determine if the given request is coming from a bot.
:type request: :api:`twisted.web.http.Request`
:param request: A ``Request`` object, including POST arguments which
should include two key/value pairs.
:rtype: bool
:returns: True if the request is coming from a bot and False otherwise.
"""
for header, badRegexp in BLACKLISTED_REQUEST_HEADERS.items():
value = request.getHeader(header)
if value is None:
continue
if badRegexp.search(value) is not None:
logging.info("Found bot request. Headers: %s" %
request.requestHeaders)
return True
return False
......@@ -249,7 +249,7 @@ class BridgeRequestBase(object):
for country in self.notBlockedIn:
logging.info("%s %s bridges not blocked in %s..." %
(msg, pt or "vanilla", country))
self.addFilter(byNotBlockedIn(country, pt, self.ipVersion))
self.addFilter(byNotBlockedIn(country, pt or "vanilla", self.ipVersion))
elif pt:
logging.info("%s %s bridges..." % (msg, pt))
self.addFilter(byTransport(pt, self.ipVersion))
......
......@@ -48,6 +48,8 @@ from twisted.internet import reactor
from twisted.mail import smtp
from twisted.python import failure
from bridgedb import strings
from bridgedb import metrics
from bridgedb import safelog
from bridgedb.crypto import NEW_BUFFER_INTERFACE
from bridgedb.distributors.email import dkim
......@@ -62,6 +64,10 @@ from bridgedb.parse.addr import canonicalizeEmailDomain
from bridgedb.util import levenshteinDistance
from bridgedb import translations
# We use our metrics singleton to keep track of BridgeDB metrics such as
# "number of failed HTTPS bridge requests."
metrix = metrics.EmailMetrics()
def createResponseBody(lines, context, client, lang='en'):
"""Parse the **lines** from an incoming email request and determine how to
......@@ -424,6 +430,16 @@ class SMTPAutoresponder(smtp.SMTPClient):
body = createResponseBody(self.incoming.lines,
self.incoming.context,
client, lang)
# The string EMAIL_MISC_TEXT[1] shows up in an email if BridgeDB
# responds with bridges. Everything else we count as an invalid
# request.
translator = translations.installTranslations(lang)
if body is not None and translator.gettext(strings.EMAIL_MISC_TEXT[1]) in body:
metrix.recordValidEmailRequest(self)
else:
metrix.recordInvalidEmailRequest(self)
if not body: return # The client was already warned.
messageID = self.incoming.message.getheader("Message-ID", None)
......
......@@ -52,6 +52,8 @@ from bridgedb import crypto
from bridgedb import strings
from bridgedb import translations
from bridgedb import txrecaptcha
from bridgedb import metrics
from bridgedb import antibot
from bridgedb.distributors.common.http import setFQDN
from bridgedb.distributors.common.http import getFQDN
from bridgedb.distributors.common.http import getClientIP
......@@ -85,6 +87,10 @@ logging.debug("Set template root to %s" % TEMPLATE_DIR)
#: Localisations which BridgeDB supports which should be rendered right-to-left.
rtl_langs = ('ar', 'he', 'fa', 'gu_IN', 'ku')
# We use our metrics singleton to keep track of BridgeDB metrics such as
# "number of failed HTTPS bridge requests."
metrix = metrics.HTTPSMetrics()
def replaceErrorPage(request, error, template_name=None, html=True):
"""Create a general error page for displaying in place of tracebacks.
......@@ -495,6 +501,7 @@ class CaptchaProtectedResource(CustomErrorHandlingResource, CSPResource):
try:
if self.checkSolution(request) is True:
metrix.recordValidHTTPSRequest(request)
return self.resource.render(request)
except ValueError as err:
logging.debug(err.message)
......@@ -504,11 +511,14 @@ class CaptchaProtectedResource(CustomErrorHandlingResource, CSPResource):
# work of art" as pennance for their sins.
d = task.deferLater(reactor, 1, lambda: request)
d.addCallback(redirectMaliciousRequest)
metrix.recordInvalidHTTPSRequest(request)
return NOT_DONE_YET
except Exception as err:
logging.debug(err.message)
metrix.recordInvalidHTTPSRequest(request)
return replaceErrorPage(request, err)
metrix.recordInvalidHTTPSRequest(request)
logging.debug("Client failed a CAPTCHA; returning redirect to %s"
% request.uri)
return redirectTo(request.uri, request)
......@@ -764,10 +774,12 @@ class ReCaptchaProtectedResource(CaptchaProtectedResource):
# breaking). Hence, the 'no cover' pragma.
if solution.is_valid: # pragma: no cover
logging.info("Valid CAPTCHA solution from %r." % clientIP)
metrix.recordValidHTTPSRequest(request)
return (True, request)
else:
logging.info("Invalid CAPTCHA solution from %r: %r"
% (clientIP, solution.error_code))
metrix.recordInvalidHTTPSRequest(request)
return (False, request)
d = txrecaptcha.submit(challenge, response, self.secretKey,
......@@ -905,6 +917,15 @@ class BridgesResource(CustomErrorHandlingResource, CSPResource):
bridgeLines = [replaceControlChars(bridge.getBridgeLine(
bridgeRequest, self.includeFingerprints)) for bridge in bridges]
if antibot.isRequestFromBot(request):
transports = bridgeRequest.transports
# Return either a decoy bridge or no bridge.
if len(transports) > 2:
logging.warning("More than one transport requested")
return self.renderAnswer(request)
ttype = "vanilla" if len(transports) == 0 else transports[0]
return self.renderAnswer(request, antibot.getDecoyBridge(ttype, bridgeRequest.ipVersion))
return self.renderAnswer(request, bridgeLines)
def getResponseFormat(self, request):
......
......@@ -38,8 +38,10 @@ from twisted.internet.error import CannotListenError
from twisted.web import resource
from twisted.web.server import Site
from bridgedb import metrics
from bridgedb import captcha
from bridgedb import crypto
from bridgedb import antibot
from bridgedb.distributors.common.http import setFQDN
from bridgedb.distributors.common.http import getFQDN
from bridgedb.distributors.common.http import getClientIP
......@@ -49,6 +51,10 @@ from bridgedb.schedule import Unscheduled
from bridgedb.schedule import ScheduledInterval
from bridgedb.util import replaceControlChars
# We use our metrics singleton to keep track of BridgeDB metrics such as
# "number of failed HTTPS bridge requests."
metrix = metrics.MoatMetrics()
#: The current version of the moat JSON API that we speak
MOAT_API_VERSION = '0.1.0'
......@@ -681,6 +687,8 @@ class CaptchaCheckResource(CaptchaResource):
error = self.checkRequestHeaders(request)
if error: # pragma: no cover
logging.debug("Error while checking moat request headers.")
metrix.recordInvalidMoatRequest(request)
return error.render(request)
data = {
......@@ -694,7 +702,11 @@ class CaptchaCheckResource(CaptchaResource):
}
try:
pos = request.content.tell()
encoded_client_data = request.content.read()
# We rewind the stream to its previous position to allow the
# metrix module to read the request's content too.
request.content.seek(pos)
client_data = json.loads(encoded_client_data)["data"][0]
clientIP = self.getClientIP(request)
......@@ -704,16 +716,19 @@ class CaptchaCheckResource(CaptchaResource):
valid = self.checkSolution(challenge, solution, clientIP)
except captcha.CaptchaExpired:
logging.debug("The challenge had timed out")
metrix.recordInvalidMoatRequest(request)
return self.failureResponse(5, request)
except Exception as impossible:
logging.warn("Unhandled exception while processing a POST /fetch request!")
logging.error(impossible)
metrix.recordInvalidMoatRequest(request)
return self.failureResponse(4, request)
if valid:
qrcode = None
bridgeRequest = self.createBridgeRequest(clientIP, client_data)
bridgeLines = self.getBridgeLines(bridgeRequest)
metrix.recordValidMoatRequest(request)
# If we can only return less than the configured
# MOAT_BRIDGES_PER_ANSWER then log a warning.
......@@ -721,6 +736,11 @@ class CaptchaCheckResource(CaptchaResource):
logging.warn(("Not enough bridges of the type specified to "
"fulfill the following request: %s") % bridgeRequest)
if antibot.isRequestFromBot(request):
ttype = transport or "vanilla"
bridgeLines = antibot.getDecoyBridge(ttype,
bridgeRequest.ipVersion)
# If we have no bridges at all to give to the client, then
# return a JSON API 404 error.
if not bridgeLines:
......@@ -736,6 +756,7 @@ class CaptchaCheckResource(CaptchaResource):
return self.formatDataForResponse(data, request)
else:
metrix.recordInvalidMoatRequest(request)
return self.failureResponse(4, request)
......
......@@ -151,6 +151,13 @@ def byProbingResistance(methodname=None, ipVersion=None):
return _cache[name]
except KeyError:
def _byProbingResistance(bridge):
# If we're dealing with a vanilla bridge, make sure that the bridge
# has the correct IP version.
if methodname == "vanilla":
validVersion = byIPv(ipVersion)
if not validVersion(bridge):
return False
if bridge.hasProbingResistantPT():
return methodname in ('scramblesuit', 'obfs4')
return True
......@@ -262,7 +269,8 @@ def byNotBlockedIn(countryCode=None, methodname=None, ipVersion=4):
if not methodname:
return not bridge.isBlockedIn(countryCode)
elif methodname == "vanilla":
if bridge.address.version == ipVersion:
validVersion = byIPv(ipVersion)
if validVersion(bridge):
if not bridge.addressIsBlockedIn(countryCode,
bridge.address,
bridge.orPort):
......
......@@ -25,6 +25,8 @@ from bridgedb import persistent
from bridgedb import proxy
from bridgedb import runner
from bridgedb import util
from bridgedb import metrics
from bridgedb import antibot
from bridgedb.bridges import MalformedBridgeInfo
from bridgedb.bridges import MissingServerDescriptorDigest
from bridgedb.bridges import ServerDescriptorDigestMismatch
......@@ -72,6 +74,22 @@ def writeAssignments(hashring, filename):
except IOError:
logging.info("I/O error while writing assignments to: '%s'" % filename)
def writeMetrics(filename, measurementInterval):
"""Dump usage metrics to disk.
:param str filename: The filename to write the metrics to.
:param int measurementInterval: The number of seconds after which we rotate
and dump our metrics.
"""
logging.debug("Dumping metrics to file: '%s'" % filename)
try:
with open(filename, 'a') as fh:
metrics.export(fh, measurementInterval)
except IOError as err:
logging.error("Failed to write metrics to '%s': %s" % (filename, err))
def load(state, hashring, clear=False):
"""Read and parse all descriptors, and load into a bridge hashring.
......@@ -359,7 +377,6 @@ def run(options, reactor=reactor):
moatDistributor = None
# Save our state
state.proxies = proxies
state.key = key
state.save()
......@@ -369,10 +386,9 @@ def run(options, reactor=reactor):
State should be saved before calling this method, and will be saved
again at the end of it.
The internal variables, ``cfg``, ``hashring``, ``proxyList``,
``ipDistributor``, and ``emailDistributor`` are all taken from a
:class:`~bridgedb.persistent.State` instance, which has been saved to
a statefile with :meth:`bridgedb.persistent.State.save`.
The internal variables ``cfg`` and ``hashring`` are taken from a
:class:`~bridgedb.persistent.State` instance, which has been saved to a
statefile with :meth:`bridgedb.persistent.State.save`.
:type cfg: :class:`Conf`
:ivar cfg: The current configuration, including any in-memory
......@@ -381,18 +397,6 @@ def run(options, reactor=reactor):
:type hashring: A :class:`~bridgedb.Bridges.BridgeSplitter`
:ivar hashring: A class which takes an HMAC key and splits bridges
into their hashring assignments.
:type proxyList: :class:`~bridgedb.proxy.ProxySet`
:ivar proxyList: The container for the IP addresses of any currently
known open proxies.
:ivar ipDistributor: A
:class:`~bridgedb.distributors.https.distributor.HTTPSDistributor`.
:ivar emailDistributor: A
:class:`~bridgedb.distributors.email.distributor.EmailDistributor`.
:ivar dict tasks: A dictionary of ``{name: task}``, where name is a
string to associate with the ``task``, and ``task`` is some
scheduled event, repetitive or otherwise, for the :class:`reactor
<twisted.internet.epollreactor.EPollReactor>`. See the classes
within the :api:`twisted.internet.tasks` module.
"""
logging.debug("Caught SIGHUP")
logging.info("Reloading...")
......@@ -411,13 +415,19 @@ def run(options, reactor=reactor):
logging.info("Reloading the list of open proxies...")
for proxyfile in cfg.PROXY_LIST_FILES:
logging.info("Loading proxies from: %s" % proxyfile)
proxy.loadProxiesFromFile(proxyfile, state.proxies, removeStale=True)
proxy.loadProxiesFromFile(proxyfile, proxies, removeStale=True)
metrics.setProxies(proxies)
logging.info("Reloading blacklisted request headers...")
antibot.loadBlacklistedRequestHeaders(config.BLACKLISTED_REQUEST_HEADERS_FILE)
logging.info("Reloading decoy bridges...")
antibot.loadDecoyBridges(config.DECOY_BRIDGES_FILE)
logging.info("Reparsing bridge descriptors...")
(hashring,
emailDistributorTmp,
ipDistributorTmp,
moatDistributorTmp) = createBridgeRings(cfg, state.proxies, key)
moatDistributorTmp) = createBridgeRings(cfg, proxies, key)
logging.info("Bridges loaded: %d" % len(hashring))
# Initialize our DB.
......@@ -477,13 +487,15 @@ def run(options, reactor=reactor):
if config.EMAIL_DIST and config.EMAIL_SHARE:
addSMTPServer(config, emailDistributor)
metrics.setSupportedTransports(config.SUPPORTED_TRANSPORTS)
tasks = {}
# Setup all our repeating tasks:
if config.TASKS['GET_TOR_EXIT_LIST']:
tasks['GET_TOR_EXIT_LIST'] = task.LoopingCall(
proxy.downloadTorExits,
state.proxies,
proxies,
config.SERVER_PUBLIC_EXTERNAL_IP)
if config.TASKS.get('DELETE_UNPARSEABLE_DESCRIPTORS'):
......@@ -497,14 +509,19 @@ def run(options, reactor=reactor):
runner.cleanupUnparseableDescriptors,
os.path.dirname(config.STATUS_FILE), delUnparseableSecs)
measurementInterval, _ = config.TASKS['EXPORT_METRICS']
tasks['EXPORT_METRICS'] = task.LoopingCall(
writeMetrics, state.METRICS_FILE, measurementInterval)
# Schedule all configured repeating tasks:
for name, seconds in config.TASKS.items():
for name, value in config.TASKS.items():
seconds, startNow = value
if seconds:
try:
# Set now to False to get the servers up and running when
# first started, rather than spend a bunch of time in
# scheduled tasks.
tasks[name].start(abs(seconds), now=False)