Unverified Commit fb3cfa7e authored by Philipp Winter's avatar Philipp Winter
Browse files

Merge branch 'release-0.11.0'

parents c45439ce bd32dd76
Changes in version 0.11.0 - 2020-07-08
* FIXES https://bugs.torproject.org/31422
Make BridgeDB report internal metrics, like the median number of users that
bridges were handed out to.
* FIXES https://bugs.torproject.org/34260
Parse bridge blocking information from SQL database.
* FIXES https://gitlab.torproject.org/tpo/anti-censorship/bridgedb/-/issues/40001
Remove the --reload command line switch. It doesn't actually do anything.
* FIXES https://bugs.torproject.org/29184
Add a new configuration option, BLACKLISTED_TOR_VERSIONS, which contains a
list of Tor versions. BridgeDB won't hand out bridges whose Tor version
is present in this blacklist.
* FIXES https://bugs.torproject.org/19774
Add a favicon to BridgeDB's web UI.
Changes in version 0.10.1 - 2020-05-27
* FIXES https://bugs.torproject.org/33945
......
**********************************************************
BridgeDB |Latest Version| |Build Status| |Coverage Status|
**********************************************************
***********************
BridgeDB |Build Status|
***********************
BridgeDB is a collection of backend servers used to distribute `Tor Bridges
<https://www.torproject.org/docs/bridges>`__. Currently, it mainly consists of
a webserver with `an HTTPS interface <https://bridges.torproject.org>`__,
`an email responder <mailto:bridges@torproject.org>`__, and an SQLite database.
.. |Latest Version| image:: https://pypip.in/version/bridgedb/badge.svg?style=flat
:target: https://pypi.python.org/pypi/bridgedb/
.. |Build Status| image:: https://travis-ci.org/sysrqbci/bridgedb.svg
:target: https://travis-ci.org/sysrqbci/bridgedb
.. |Coverage Status| image:: https://coveralls.io/repos/github/sysrqbci/bridgedb/badge.svg?branch=develop
:target: https://coveralls.io/github/sysrqbci/bridgedb?branch=develop
.. |Build Status| image:: https://travis-ci.org/NullHypothesis/bridgedb.svg?branch=master
:target: https://travis-ci.org/github/NullHypothesis/bridgedb
.. image:: doc/sphinx/source/_static/bay-bridge.jpg
:scale: 80%
......@@ -318,10 +312,6 @@ Reloading Bridges From Their Descriptor Files:
When you have new lists of bridges from the Bridge Authority, replace
the old files and do::
bridgedb --reload
Or just give it a SIGHUP::
kill -s SIGHUP `cat .../run/bridgedb.pid`
......
......@@ -309,6 +309,16 @@ DEFAULT_TRANSPORT = 'obfs4'
# Accept-Language,[Kk]lingon
BLACKLISTED_REQUEST_HEADERS_FILE="blacklisted-request-headers.csv"
# List of tuples that specify blacklisted tor version ranges. The first
# element marks the start of the range and the second element marks the end.
# Both the start *and* the end version are blocked too. If you want to block a
# single version, have the start and end range be identical. BridgeDB won't
# distribute bridges whose version falls within any version ranges.
BLACKLISTED_TOR_VERSIONS = [
('0.3.4', '0.3.4.9'), # See <https://bugs.torproject.org/29184>.
('0.3.5', '0.3.5.6')
]
# Decoy bridges that we are handing out to bots that we detected using the
# regular expressions in BLACKLISTED_REQUEST_HEADERS_FILE. The CSV file must
# have the following format:
......
......@@ -12,6 +12,7 @@ from functools import wraps
from ipaddr import IPAddress
from contextlib import contextmanager
import sys
import datetime
from bridgedb.Stability import BridgeHistory
import threading
......@@ -19,6 +20,7 @@ import threading
toHex = binascii.b2a_hex
fromHex = binascii.a2b_hex
HEX_ID_LEN = 40
BRIDGE_REACHABLE, BRIDGE_BLOCKED = 0, 1
def _escapeValue(v):
return "'%s'" % v.replace("'", "''")
......@@ -68,7 +70,7 @@ SCHEMA2_SCRIPT = """
CREATE INDEX EmailedBridgesWhenMailed on EmailedBridges ( email );
CREATE TABLE BlockedBridges (
CREATE TABLE BridgeMeasurements (
id INTEGER PRIMARY KEY NOT NULL,
hex_key,
bridge_type,
......@@ -77,10 +79,11 @@ SCHEMA2_SCRIPT = """
blocking_country,
blocking_asn,
measured_by,
last_measured
last_measured,
verdict INTEGER
);
CREATE INDEX BlockedBridgesBlockingCountry on BlockedBridges(hex_key);
CREATE INDEX BlockedBridgesBlockingCountry on BridgeMeasurements(hex_key);
CREATE TABLE WarnedEmails (
email PRIMARY KEY NOT NULL,
......@@ -242,6 +245,34 @@ class Database(object):
return retBridges
def getBlockedBridges(self):
"""Return a dictionary of bridges that are blocked.
:rtype: dict
:returns: A dictionary that maps bridge fingerprints (as strings) to a
three-tuple that captures its blocking state: (country, address,
port).
"""
ms = self.__fetchBridgeMeasurements()
return getBlockedBridgesFromSql(ms)
def __fetchBridgeMeasurements(self):
"""Return all bridge measurement rows from the last three years.
We limit our search to three years for performance reasons because the
bridge measurement table keeps growing and therefore slowing down
queries.
:rtype: list
:returns: A list of tuples.
"""
cur = self._cur
old_year = datetime.datetime.utcnow() - datetime.timedelta(days=365*3)
cur.execute("SELECT * FROM BridgeMeasurements WHERE last_measured > "
"'%s' ORDER BY blocking_country DESC" %
old_year.strftime("%Y-%m-%d"))
return cur.fetchall()
def getBridgesForDistributor(self, distributor):
"""Return a list of BridgeData value classes of all bridges in the
database that are allocated to distributor 'distributor'
......@@ -352,6 +383,107 @@ _LOCKED = 0
_OPENED_DB = None
_REFCOUNT = 0
class BridgeMeasurement(object):
def __init__(self, id, fingerprint, bridge_type, address, port,
country, asn, measured_by, last_measured, verdict):
self.fingerprint = fingerprint
self.country = country
self.address = address
self.port = port
try:
self.date = datetime.datetime.strptime(last_measured, "%Y-%m-%d")
except ValueError:
logging.error("Could not convert SQL date string '%s' to "
"datetime object." % last_measured)
self.date = datetime.datetime(1970, 1, 1, 0, 0)
self.verdict = verdict
def compact(self):
return (self.country, self.address, self.port)
def __contains__(self, item):
return (self.country == item.country and
self.address == item.address and
self.port == item.port)
def newerThan(self, other):
return self.date > other.date
def conflicts(self, other):
return (self.verdict != other.verdict and
self.country == other.country and
self.address == other.address and
self.port == other.port)
def getBlockedBridgesFromSql(sql_rows):
"""Return a dictionary that maps bridge fingerprints to a list of
bridges that are known to be blocked somewhere.
:param list sql_rows: A list of tuples. Each tuple represents an SQL row.
:rtype: dict
:returns: A dictionary that maps bridge fingerprints (as strings) to a
three-tuple that captures its blocking state: (country, address,
port).
"""
# Separately keep track of measurements that conclude that a bridge is
# blocked or reachable.
blocked = {}
reachable = {}
def _shouldSkip(m1):
"""Return `True` if we can skip this measurement."""
# Use our 'reachable' dictionary if our original measurement says that
# a bridge is blocked, and vice versa. The purpose is to process
# measurements that are possibly conflicting with the one at hand.
d = reachable if m1.verdict == BRIDGE_BLOCKED else blocked
maybe_conflicting = d.get(m1.fingerprint, None)
if maybe_conflicting is None:
# There is no potentially conflicting measurement.
return False
for m2 in maybe_conflicting:
if m1.compact() != m2.compact():
continue
# Conflicting measurement. If m2 is newer than m1, we believe m2.
if m2.newerThan(m1):
return True
# Conflicting measurement. If m1 is newer than m2, we believe m1,
# and remove m1.
if m1.newerThan(m2):
d[m1.fingerprint].remove(m2)
# If we're left with an empty list, get rid of the dictionary
# key altogether.
if len(d[m1.fingerprint]) == 0:
del d[m1.fingerprint]
return False
return False
for fields in sql_rows:
m = BridgeMeasurement(*fields)
if _shouldSkip(m):
continue
d = blocked if m.verdict == BRIDGE_BLOCKED else reachable
other_measurements = d.get(m.fingerprint, None)
if other_measurements is None:
# We're dealing with the first "blocked" or "reachable" measurement
# for the given bridge fingerprint.
d[m.fingerprint] = [m]
else:
# Do we have an existing measurement that agrees with the given
# measurement?
if m in other_measurements:
d[m.fingerprint] = [m if m.compact() == x.compact() and
m.newerThan(x) else x for x in other_measurements]
# We're dealing with a new measurement. Add it to the list.
else:
d[m.fingerprint] = other_measurements + [m]
# Compact-ify the measurements in our dictionary.
for k, v in blocked.items():
blocked[k] = [i.compact() for i in v]
return blocked
def clearGlobalDB():
"""Start from scratch.
......
......@@ -1825,3 +1825,18 @@ class Bridge(BridgeBackwardsCompatibility):
logging.info("Removing dead transport for bridge %s: %s %s:%s %s" %
(self, pt.methodname, pt.address, pt.port, pt.arguments))
self.transports.remove(pt)
def runsVersion(self, version_tuples):
"""Return ``True`` if this bridge runs any of the given versions.
:param list version_tuples: A list of tuples that contain a minimum and
maximum version number (as :class:`stem.version.Version` objects),
each.
:rtype: bool
:returns: ``True`` if this bridge runs any of the given Tor versions
and ``False`` otherwise.
"""
for min_version, max_version in version_tuples:
if min_version <= self.software <= max_version:
return True
return False
......@@ -62,9 +62,10 @@ from bridgedb.parse.addr import canonicalizeEmailDomain
from bridgedb.util import levenshteinDistance
from bridgedb import translations
# We use our metrics singleton to keep track of BridgeDB metrics such as
# We use our metrics singletons to keep track of BridgeDB metrics such as
# "number of failed HTTPS bridge requests."
metrix = metrics.EmailMetrics()
emailMetrix = metrics.EmailMetrics()
internalMetrix = metrics.InternalMetrics()
def createResponseBody(lines, context, client, lang='en'):
......@@ -113,6 +114,9 @@ def createResponseBody(lines, context, client, lang='en'):
transport = bridgeRequest.justOnePTType()
answer = "".join(" %s\r\n" % b.getBridgeLine(
bridgeRequest, context.includeFingerprints) for b in bridges)
internalMetrix.recordHandoutsPerBridge(bridgeRequest, bridges)
else:
internalMetrix.recordEmptyEmailResponse()
return templates.buildAnswerMessage(translator, client, answer)
def generateResponse(fromAddress, client, body, subject=None,
......@@ -396,9 +400,9 @@ class SMTPAutoresponder(smtp.SMTPClient):
# request.
translator = translations.installTranslations(lang)
if body is not None and translator.gettext(strings.EMAIL_MISC_TEXT[1]) in body:
metrix.recordValidEmailRequest(self)
emailMetrix.recordValidEmailRequest(self)
else:
metrix.recordInvalidEmailRequest(self)
emailMetrix.recordInvalidEmailRequest(self)
if not body: return # The client was already warned.
......
......@@ -90,9 +90,10 @@ logging.debug("Set template root to %s" % TEMPLATE_DIR)
#: A list of supported language tuples. Use getSortedLangList() to read this variable.
supported_langs = []
# We use our metrics singleton to keep track of BridgeDB metrics such as
# We use our metrics singletons to keep track of BridgeDB metrics such as
# "number of failed HTTPS bridge requests."
metrix = metrics.HTTPSMetrics()
httpsMetrix = metrics.HTTPSMetrics()
internalMetrix = metrics.InternalMetrics()
def stringifyRequestArgs(args):
......@@ -574,7 +575,7 @@ class CaptchaProtectedResource(CustomErrorHandlingResource, CSPResource):
try:
if self.checkSolution(request) is True:
metrix.recordValidHTTPSRequest(request)
httpsMetrix.recordValidHTTPSRequest(request)
return self.resource.render(request)
except ValueError as err:
logging.debug(str(err))
......@@ -584,14 +585,14 @@ class CaptchaProtectedResource(CustomErrorHandlingResource, CSPResource):
# work of art" as pennance for their sins.
d = task.deferLater(reactor, 1, lambda: request)
d.addCallback(redirectMaliciousRequest)
metrix.recordInvalidHTTPSRequest(request)
httpsMetrix.recordInvalidHTTPSRequest(request)
return NOT_DONE_YET
except Exception as err:
logging.debug(str(err))
metrix.recordInvalidHTTPSRequest(request)
httpsMetrix.recordInvalidHTTPSRequest(request)
return replaceErrorPage(request, err)
metrix.recordInvalidHTTPSRequest(request)
httpsMetrix.recordInvalidHTTPSRequest(request)
logging.debug("Client failed a CAPTCHA; returning redirect to %s"
% request.uri)
return redirectTo(request.uri, request)
......@@ -847,12 +848,12 @@ class ReCaptchaProtectedResource(CaptchaProtectedResource):
# breaking). Hence, the 'no cover' pragma.
if solution.is_valid: # pragma: no cover
logging.info("Valid CAPTCHA solution from %r." % clientIP)
metrix.recordValidHTTPSRequest(request)
httpsMetrix.recordValidHTTPSRequest(request)
return (True, request)
else:
logging.info("Invalid CAPTCHA solution from %r: %r"
% (clientIP, solution.error_code))
metrix.recordInvalidHTTPSRequest(request)
httpsMetrix.recordInvalidHTTPSRequest(request)
return (False, request)
d = txrecaptcha.submit(challenge, response, self.secretKey,
......@@ -1000,6 +1001,8 @@ class BridgesResource(CustomErrorHandlingResource, CSPResource):
bridgeLines = [replaceControlChars(bridge.getBridgeLine(
bridgeRequest, self.includeFingerprints)) for bridge in bridges]
internalMetrix.recordHandoutsPerBridge(bridgeRequest, bridges)
if antibot.isRequestFromBot(request):
transports = bridgeRequest.transports
# Return either a decoy bridge or no bridge.
......@@ -1059,6 +1062,9 @@ class BridgesResource(CustomErrorHandlingResource, CSPResource):
rtl = False
format = self.getResponseFormat(request)
if not bridgeLines:
internalMetrix.recordEmptyHTTPSResponse()
if format == 'plain':
request.setHeader("Content-Type", "text/plain")
try:
......
......@@ -13,6 +13,7 @@
<meta name="description" content="Tor Bridges">
<meta name="author" content="The Tor Project">
<link rel="icon" type="image/x-icon" href="/assets/images/favicon.ico">
<link rel="stylesheet" href="/assets/css/bootstrap.min.css">
<link rel="stylesheet" href="/assets/css/font-awesome.min.css">
<link rel="stylesheet" href="/assets/css/main.css">
......
......@@ -51,9 +51,10 @@ from bridgedb.schedule import Unscheduled
from bridgedb.schedule import ScheduledInterval
from bridgedb.util import replaceControlChars
# We use our metrics singleton to keep track of BridgeDB metrics such as
# We use our metrics singletons to keep track of BridgeDB metrics such as
# "number of failed HTTPS bridge requests."
metrix = metrics.MoatMetrics()
moatMetrix = metrics.MoatMetrics()
internalMetrix = metrics.InternalMetrics()
#: The current version of the moat JSON API that we speak
......@@ -538,26 +539,37 @@ class CaptchaCheckResource(CaptchaResource):
return bridgeRequest
def getBridgeLines(self, bridgeRequest):
"""Get bridge lines for a client's HTTP request.
def getBridges(self, bridgeRequest):
"""Get bridges for a client's HTTP request.
:type bridgeRequest: :class:`MoatBridgeRequest`
:param bridgeRequest: A valid bridge request object with pre-generated
filters (as returned by :meth:`createBridgeRequest`).
:rtype: list
:returns: A list of bridge lines.
:return: A list of :class:`~bridgedb.bridges.Bridge`s.
"""
bridgeLines = list()
bridges = list()
interval = self.schedule.intervalStart(time.time())
logging.debug("Replying to JSON API request from %s." % bridgeRequest.client)
if bridgeRequest.isValid():
bridges = self.distributor.getBridges(bridgeRequest, interval)
bridgeLines = [replaceControlChars(bridge.getBridgeLine(bridgeRequest))
for bridge in bridges]
return bridgeLines
return bridges
def getBridgeLines(self, bridgeRequest, bridges):
"""
:type bridgeRequest: :class:`MoatBridgeRequest`
:param bridgeRequest: A valid bridge request object with pre-generated
filters (as returned by :meth:`createBridgeRequest`).
:param list bridges: A list of :class:`~bridgedb.bridges.Bridge`
objects.
:rtype: list
:return: A list of bridge lines.
"""
return [replaceControlChars(bridge.getBridgeLine(bridgeRequest))
for bridge in bridges]
def extractClientSolution(self, data):
"""Extract the client's CAPTCHA solution from a POST request.
......@@ -686,7 +698,7 @@ class CaptchaCheckResource(CaptchaResource):
if error: # pragma: no cover
logging.debug("Error while checking moat request headers.")
metrix.recordInvalidMoatRequest(request)
moatMetrix.recordInvalidMoatRequest(request)
return error.render(request)
data = {
......@@ -714,25 +726,30 @@ class CaptchaCheckResource(CaptchaResource):
valid = self.checkSolution(challenge, solution, clientIP)
except captcha.CaptchaExpired:
logging.debug("The challenge had timed out")
metrix.recordInvalidMoatRequest(request)
moatMetrix.recordInvalidMoatRequest(request)
return self.failureResponse(5, request)
except Exception as impossible:
logging.warn("Unhandled exception while processing a POST /fetch request!")
logging.error(impossible)
metrix.recordInvalidMoatRequest(request)
moatMetrix.recordInvalidMoatRequest(request)
return self.failureResponse(4, request)
if valid:
qrcode = None
bridgeRequest = self.createBridgeRequest(clientIP, client_data)
bridgeLines = self.getBridgeLines(bridgeRequest)
metrix.recordValidMoatRequest(request)
bridges = self.getBridges(bridgeRequest)
bridgeLines = self.getBridgeLines(bridgeRequest, bridges)
moatMetrix.recordValidMoatRequest(request)
# If we can only return less than the configured
# MOAT_BRIDGES_PER_ANSWER then log a warning.
if len(bridgeLines) < self.nBridgesToGive:
logging.warn(("Not enough bridges of the type specified to "
"fulfill the following request: %s") % bridgeRequest)
if not bridgeLines:
internalMetrix.recordEmptyMoatResponse()
else:
internalMetrix.recordHandoutsPerBridge(bridgeRequest, bridges)
if antibot.isRequestFromBot(request):
ttype = transport or "vanilla"
......@@ -754,7 +771,7 @@ class CaptchaCheckResource(CaptchaResource):
return self.formatDataForResponse(data, request)
else:
metrix.recordInvalidMoatRequest(request)
moatMetrix.recordInvalidMoatRequest(request)
return self.failureResponse(4, request)
......
......@@ -38,6 +38,7 @@ from bridgedb.distributors.https.distributor import HTTPSDistributor
from bridgedb.distributors.moat.distributor import MoatDistributor
from bridgedb.parse import descriptors
from bridgedb.parse.blacklist import parseBridgeBlacklistFile
from bridgedb.parse.versions import parseVersionsList
import bridgedb.Storage
......@@ -211,6 +212,10 @@ def load(state, hashring, clear=False):
elif bridge in blacklist.keys():
logging.warn("Not distributing blacklisted Bridge %s %s:%s: %s" %
(bridge, bridge.address, bridge.orPort, blacklist[bridge]))
# Skip bridges that are running a blacklisted version of Tor.
elif bridge.runsVersion(state.BLACKLISTED_TOR_VERSIONS):
logging.warn("Not distributing bridge %s because it runs blacklisted "
"Tor version %s." % (router.fingerprint, bridge.software))
else:
# If the bridge is not running, then it is skipped during the
# insertion process.
......@@ -303,6 +308,31 @@ def createBridgeRings(cfg, proxyList, key):
return hashring, emailDistributor, ipDistributor, moatDistributor
def loadBlockedBridges(hashring):
"""Load bridge blocking info from our SQL database and add it to bridge
objects."""
blockedBridges = {}
with bridgedb.Storage.getDB() as db:
blockedBridges = db.getBlockedBridges()
num_blocked = 0
for name, ring in hashring.ringsByName.items():
if name == "unallocated":
continue
for _, bridge in ring.bridges.items():
l = []
try:
l = blockedBridges[bridge.fingerprint]
except KeyError:
continue
for blocking_country, address, port in l:
bridge.setBlockedIn(blocking_country, address, port)
num_blocked += 1
logging.info("Loaded blocking info for %d bridges.".format(num_blocked))
def run(options, reactor=reactor):
"""This is BridgeDB's main entry point and main runtime loop.
......@@ -418,6 +448,8 @@ def run(options, reactor=reactor):
proxy.loadProxiesFromFile(proxyfile, proxies, removeStale=True)
metrics.setProxies(proxies)
state.BLACKLISTED_TOR_VERSIONS = parseVersionsList(state.BLACKLISTED_TOR_VERSIONS)
logging.info("Reloading blacklisted request headers...")
antibot.loadBlacklistedRequestHeaders(config.BLACKLISTED_REQUEST_HEADERS_FILE)
logging.info("Reloading decoy bridges...")
......@@ -434,6 +466,7 @@ def run(options, reactor=reactor):
logging.info("Reparsing bridge descriptors...")
load(state, hashring, clear=False)
logging.info("Bridges loaded: %d" % len(hashring))
loadBlockedBridges(hashring)
if emailDistributorTmp is not None:
emailDistributorTmp.prepopulateRings() # create default rings
......@@ -450,6 +483,23 @@ def run(options, reactor=reactor):
else:
logging.warn("No Moat distributor created!")
metrix = metrics.InternalMetrics()
logging.info("Logging bridge ring metrics for %d rings." %
len(hashring.ringsByName))
for ringName, ring in hashring.ringsByName.items():
# Ring is of type FilteredBridgeSplitter or UnallocatedHolder.
# FilteredBridgeSplitter splits bridges into subhashrings based on
# filters.
if hasattr(ring, "filterRings"):
for (ringname, (filterFn, subring)) in ring.filterRings.items():
subRingName = "-".join(ring.extractFilterNames(ringname))
metrix.recordBridgesInHashring(ringName,
subRingName,
len(subring))
elif hasattr(ring, "fingerprints"):
metrix.recordBridgesInHashring(ringName, "unallocated",
len(ring.fingerprints))
# Dump bridge pool assignments to disk.
writeAssignments(hashring, state.ASSIGNMENTS_FILE)
state.save()
......
......@@ -18,6 +18,8 @@ import ipaddr
import operator
import json
import datetime
import statistics
import numpy
from bridgedb import geo
from bridgedb.distributors.common.http import getClientIP
......@@ -54,7 +56,7 @@ SUPPORTED_TRANSPORTS = None
# Version number for our metrics format. We increment the version if our
# format changes.
METRICS_VERSION = 1
METRICS_VERSION = 2