Commit ac112c4c authored by Nick Mathewson's avatar Nick Mathewson 🎨
Browse files

r17241@catbus: nickm | 2007-12-18 18:04:43 -0500

 Document the heck out of bridgedb and clean up the code a little.

parent 2438804e
# BridgeDB by Nick Mathewson.
# Copyright (c) 2007, The Tor Project, Inc.
# See LICENSE for licensing informatino
# See LICENSE for licensing information
This module has low-level functionality for parsing bridges and arranging
them in rings.
import binascii
import bisect
......@@ -47,6 +52,9 @@ def is_valid_ip(ip):
return True
def is_valid_fingerprint(fp):
"""Return true iff fp in the right format to be a hex fingerprint
of a Tor server.
if len(fp) != HEX_FP_LEN:
return False
......@@ -60,10 +68,13 @@ toHex = binascii.b2a_hex
fromHex = binascii.a2b_hex
def get_hmac(k,v):
"""Return the hmac of v using the key k."""
h =, v, digestmod=DIGESTMOD)
return h.digest()
def get_hmac_fn(k, hex=True):
"""Return a function that computes the hmac of its input using the key k.
If 'hex' is true, the output of the function will be hex-encoded."""
h =, digestmod=DIGESTMOD)
def hmac_fn(v):
h_tmp = h.copy()
......@@ -75,11 +86,23 @@ def get_hmac_fn(k, hex=True):
return hmac_fn
def chopString(s, size):
"""Generator. Given a string and a length, divide the string into pieces
of no more than that length.
for pos in xrange(0, len(s), size):
yield s[pos:pos+size]
class Bridge:
"""Holds information for a single bridge"""
## Fields:
## nickname -- The bridge's nickname. Not currently used.
## ip -- The bridge's IP address, as a dotted quad.
## orport -- The bridge's OR port.
## fingerprint -- The bridge's identity digest, in lowercase hex, with
## no spaces.
def __init__(self, nickname, ip, orport, fingerprint=None, id_digest=None):
"""Create a new Bridge. One of fingerprint and id_digest must be
self.nickname = nickname
self.ip = ip
self.orport = orport
......@@ -97,13 +120,16 @@ class Bridge:
raise TypeError("Bridge with no ID")
def getID(self):
"""Return the bridge's identity digest."""
return fromHex(self.fingerprint)
def __repr__(self):
"""Return a piece of python that evaluates to this bridge."""
return "Bridge(%r,%r,%d,%r)"%(
self.nickname, self.ip, self.orport, self.fingerprint)
def getConfigLine(self):
"""Return a line describing this bridge for inclusion in a torrc."""
return "bridge %s:%d %s" % (self.ip, self.orport, self.fingerprint)
def assertOK(self):
......@@ -112,6 +138,9 @@ class Bridge:
assert 1 <= self.orport <= 65535
def parseDescFile(f, bridge_purpose='bridge'):
"""Generator. Parses a cached-descriptors file 'f', and yields a Bridge
object for every entry whose purpose matches bridge_purpose.
nickname = ip = orport = fingerprint = purpose = None
for line in f:
......@@ -140,6 +169,7 @@ def parseDescFile(f, bridge_purpose='bridge'):
nickname = ip = orport = fingerprint = purpose = None
class BridgeHolder:
"""Abstract base class for all classes that hold bridges."""
def insert(self, bridge):
raise NotImplemented()
......@@ -147,7 +177,15 @@ class BridgeHolder:
return True
class BridgeRing(BridgeHolder):
"""Arranges bridges in a ring based on an hmac function."""
## Fields:
## bridges: a map from hmac value to Bridge.
## bridgesByID: a map from bridge ID Digest to Bridge.
## isSorted: true iff sortedKeys is currently sorted.
## sortedKeys: a list of all the hmacs, in order.
## name: a string to represent this ring in the logs.
def __init__(self, key):
"""Create a new BridgeRing, using key as its hmac key."""
self.bridges = {}
self.bridgesByID = {}
self.hmac = get_hmac_fn(key, hex=False)
......@@ -159,6 +197,8 @@ class BridgeRing(BridgeHolder):
return len(self.bridges)
def insert(self, bridge):
"""Add a bridge to the ring. If the bridge is already there,
replace the old one."""
ident = bridge.getID()
pos = self.hmac(ident)
if not self.bridges.has_key(pos):
......@@ -168,17 +208,20 @@ class BridgeRing(BridgeHolder):
self.bridgesByID[id] = bridge
logging.debug("Adding %s to %s", bridge.getConfigLine(),
def sort(self):
def _sort(self):
"""Helper: put the keys in sorted order."""
if not self.isSorted:
self.isSorted = True
def _getBridgeKeysAt(self, pos, N=1):
"""Helper: return the N keys appearing in the ring after position
assert len(pos) == DIGEST_LEN
if N >= len(self.sortedKeys):
return self.sortedKeys
if not self.isSorted:
idx = bisect.bisect_left(self.sortedKeys, pos)
r = self.sortedKeys[idx:idx+N]
if len(r) < N:
......@@ -188,17 +231,25 @@ class BridgeRing(BridgeHolder):
return r
def getBridges(self, pos, N=1):
"""Return the N bridges appearing in the ring after position pos"""
keys = self._getBridgeKeysAt(pos, N)
return [ self.bridges[k] for k in keys ]
def getBridgeByID(self, fp):
"""Return the bridge whose identity digest is fp, or None if no such
bridge exists."""
return self.bridgesByID.get(fp)
class LogDB:
"""Wraps a database object and records all modifications to a
human-readable logfile."""
def __init__(self, kwd, db, logfile):
self._kwd = kwd
if kwd:
self._kwd = "%s: "%kwd
self._kwd = ""
self._db = db
self._logfile = logfile
def __delitem__(self, k):
......@@ -211,7 +262,7 @@ class LogDB:
return self._db[k]
except KeyError:
self._logfile.write("%s: [%r] = [%r]\n"%(self._kwd, k, v))
self._logfile.write("%s[%r] = [%r]\n"%(self._kwd, k, v))
self._db[k] = v
return v
def __len__(self):
......@@ -227,6 +278,9 @@ class LogDB:
class PrefixStore:
"""Wraps a database object and prefixes the keys in all requests with
'prefix'. This is used to multiplex several key->value mappings
onto a single database."""
def __init__(self, store, prefix):
self._d = store
self._p = prefix
......@@ -247,6 +301,9 @@ class PrefixStore:
return [ k[n:] for k in self._d.keys() if k.startswith(self._p) ]
class FixedBridgeSplitter(BridgeHolder):
"""A bridgeholder that splits bridges up based on an hmac and assigns
them to several sub-bridgeholders with equal probability.
def __init__(self, key, rings):
self.hmac = get_hmac_fn(key, hex=True)
self.rings = rings[:]
......@@ -268,6 +325,9 @@ class FixedBridgeSplitter(BridgeHolder):
class UnallocatedHolder(BridgeHolder):
"""A pseudo-bridgeholder that ignores its bridges and leaves them
def insert(self, bridge):
logging.debug("Leaving %s unallocated", bridge.getConfigLine())
......@@ -275,6 +335,9 @@ class UnallocatedHolder(BridgeHolder):
return False
class BridgeTracker:
"""A stats tracker that records when we first saw and most recently
saw each bridge.
def __init__(self, firstSeenStore, lastSeenStore):
self.firstSeenStore = firstSeenStore
self.lastSeenStore = lastSeenStore
......@@ -289,6 +352,10 @@ class BridgeTracker:
self.firstSeenStore.setdefault(bridgeID, now)
class BridgeSplitter(BridgeHolder):
"""A BridgeHolder that splits incoming bridges up based on an hmac,
and assigns them to sub-bridgeholders with different probabilities.
Bridge-to-bridgeholder associations are recorded in a store.
def __init__(self, key, store):
self.hmac = get_hmac_fn(key, hex=True) = store
......@@ -305,6 +372,13 @@ class BridgeSplitter(BridgeHolder):
return n
def addRing(self, ring, ringname, p=1):
"""Add a new bridgeholder.
ring -- the bridgeholder to add.
ringname -- a string representing the bridgeholder. This is used
to record which bridges have been assigned where in the store.
p -- the relative proportion of bridges to assign to this
assert isinstance(ring, BridgeHolder)
self.ringsByName[ringname] = ring
......@@ -312,6 +386,8 @@ class BridgeSplitter(BridgeHolder):
self.totalP += p
def addTracker(self, t):
"""Adds a statistics tracker that gets told about every bridge we see.
def insert(self, bridge):
......@@ -334,11 +410,3 @@ class BridgeSplitter(BridgeHolder):[bridgeID] = ringname
if __name__ == '__main__':
import sys
br = BridgeRing("hello")
for fname in sys.argv[1:]:
f = open(fname)
for bridge in parseDescFile(f):
# BridgeDB by Nick Mathewson.
# Copyright (c) 2007, The Tor Project, Inc.
# See LICENSE for licensing informatino
# See LICENSE for licensing information
This module has functions to decide which bridges to hand out to whom.
import bridgedb.Bridges
......@@ -17,6 +21,18 @@ def uniformMap(ip):
return ".".join( ip.split(".")[:3] )
class IPBasedDistributor(bridgedb.Bridges.BridgeHolder):
"""Object that hands out bridges based on the IP address of an incoming
request and the current time period.
## Fields:
## areaMapper -- a function that maps an IP address to a string such
## that addresses mapping to the same string are in the same "area".
## rings -- a list of BridgeRing objects. Every bridge goes into one
## of these rings, and every area is associated with one.
## splitter -- a FixedBridgeSplitter to assign bridges into the
## rings of this distributor.
## areaOrderHmac -- an hmac function used to order areas within rings.
## areaClusterHmac -- an hmac function used to assign areas to rings.
def __init__(self, areaMapper, nClusters, key):
self.areaMapper = areaMapper
......@@ -36,9 +52,16 @@ class IPBasedDistributor(bridgedb.Bridges.BridgeHolder):
self.areaClusterHmac = bridgedb.Bridges.get_hmac_fn(key4, hex=True)
def insert(self, bridge):
"""Assign a bridge to this distributor."""
def getBridgesForIP(self, ip, epoch, N=1):
"""Return a list of bridges to give to a user.
ip -- the user's IP address, as a dotted quad.
epoch -- the time period when we got this request. This can
be any string, so long as it changes with every period.
N -- the number of bridges to try to give back.
if not len(self.splitter):
return []
......@@ -61,9 +84,9 @@ class IPBasedDistributor(bridgedb.Bridges.BridgeHolder):
# These characters are the ones that RFC2822 allows.
#ASPECIAL = '!#$%&*+-/=?^_`{|}~'
#ASPECIAL += "\\\'"
# These are the ones we're pretty sure we can handle right.
ASPECIAL = '-_+/=_~'
ACHAR = r'[\w%s]' % "".join("\\%s"%c for c in ASPECIAL)
DOTATOM = r'%s+(?:\.%s+)*'%(ACHAR,ACHAR)
DOMAIN = r'\w+(?:\.\w+)*'
......@@ -73,14 +96,21 @@ SPACE_PAT = re.compile(r'\s+')
class BadEmail(Exception):
"""Exception raised when we get a bad email address."""
def __init__(self, msg, email):
Exception.__init__(self, msg) = email
class UnsupportedDomain(BadEmail):
"""Exception raised when we get an email address from a domain we
don't know."""
def extractAddrSpec(addr):
"""Given an email From line, try to extract and parse the addrspec
portion. Returns localpart,domain on success; raises BadEmail
on failure.
orig_addr = addr
addr = SPACE_PAT.sub(' ', addr)
addr = addr.strip()
......@@ -116,6 +146,10 @@ def extractAddrSpec(addr):
return localpart, domain
def normalizeEmail(addr, domainmap):
"""Given the contents of a from line, and a map of supported email
domains (in lowercase), raise BadEmail or return a normalized
email address.
addr = addr.lower()
localpart, domain = extractAddrSpec(addr)
if domainmap is not None:
......@@ -128,21 +162,38 @@ def normalizeEmail(addr, domainmap):
return "%s@%s"%(localpart, domain)
class EmailBasedDistributor(bridgedb.Bridges.BridgeHolder):
"""Object that hands out bridges based on the email address of an incoming
request and the current time period.
## Fields:
## emailHmac -- an hmac function used to order email addresses within
## a ring.
## ring -- a BridgeRing object to hold all the bridges we hand out.
## store -- a database object to remember what we've given to whom.
## domainmap -- a map from lowercase domains that we support mail from
## to their canonical forms.
def __init__(self, key, store, domainmap):
key1 = bridgedb.Bridges.get_hmac(key, "Map-Addresses-To-Ring")
self.emailHmac = bridgedb.Bridges.get_hmac_fn(key1, hex=False)
key2 = bridgedb.Bridges.get_hmac(key, "Order-Bridges-In-Ring")
self.ring = bridgedb.Bridges.BridgeRing(key2) = "email ring"
# XXXX clear the store when the period rolls over! = store
self.domainmap = domainmap
def insert(self, bridge):
"""Assign a bridge to this distributor."""
def getBridgesForEmail(self, emailaddress, epoch, N=1):
"""Return a list of bridges to give to a user.
emailaddress -- the user's email address, as given in a from line.
epoch -- the time period when we got this request. This can
be any string, so long as it changes with every period.
N -- the number of bridges to try to give back.
emailaddress = normalizeEmail(emailaddress, self.domainmap)
if emailaddress is None:
return [] #XXXX raise an exception.
......@@ -163,15 +214,3 @@ class EmailBasedDistributor(bridgedb.Bridges.BridgeHolder):
memo = "".join(b.getID() for b in result)[emailaddress] = memo
return result
if __name__ == '__main__':
import sys
for line in sys.stdin:
line = line.strip()
if line.startswith("From: "):
line = line[6:]
normal = normalizeEmail(line, None)
print normal
except BadEmail, e:
print line, e
# BridgeDB by Nick Mathewson.
# Copyright (c) 2007, The Tor Project, Inc.
# See LICENSE for licensing informatino
# See LICENSE for licensing information
This module sets up a bridgedb and starts the servers running.
import anydbm
import os
......@@ -16,9 +20,13 @@ import bridgedb.Time as Time
import bridgedb.Server as Server
class Conf:
"""A configuration object. Holds unvalidated attributes.
def __init__(self, **attrs):
# An example configuration. Used for testing. See sample
# bridgedb.conf for documentation.
CONFIG = Conf(
RUN_IN_DIR = ".",
......@@ -58,6 +66,8 @@ CONFIG = Conf(
def configureLogging(cfg):
"""Set up Python's logging subsystem based on the configuratino.
level = getattr(cfg, 'LOGLEVEL', 'WARNING')
level = getattr(logging, level)
extra = {}
......@@ -100,17 +110,24 @@ def getKey(fname):
return k
def load(cfg, splitter):
"""Read all the bridge files from cfg, and pass them into a splitter
for fname in cfg.BRIDGE_FILES:
f = open(fname, 'r')
for bridge in Bridges.parseDescFile(f, cfg.BRIDGE_PURPOSE):
_reloadFn = None
_reloadFn = lambda: True
def _handleSIGHUP(*args):
"""Called when we receive a SIGHUP; invokes _reloadFn."""
reactor.callLater(0, _reloadFn)
def startup(cfg):
"""Parse bridges,
# Expand any ~ characters in paths in the configuration.
cfg.BRIDGE_FILES = [ os.path.expanduser(fn) for fn in cfg.BRIDGE_FILES ]
......@@ -118,28 +135,36 @@ def startup(cfg):
if v:
setattr(cfg, key, os.path.expanduser(v))
# Change to the directory where we're supposed to run.
if cfg.RUN_IN_DIR:
# Write the pidfile.
if cfg.PIDFILE:
f = open(cfg.PIDFILE, 'w')
# Set up logging.
# Load the master key, or create a new one.
key = getKey(cfg.MASTER_KEY_FILE)
dblogfile = None
emailDistributor = ipDistributor = None
# Initialize our DB file.
dblogfile = None
baseStore = store =, "c", 0600)
if cfg.DB_LOG_FILE:
dblogfile = open(cfg.DB_LOG_FILE, "a+", 0)
store = Bridges.LogDB("db", store, dblogfile)
store = Bridges.LogDB(None, store, dblogfile)
# Create a BridgeSplitter to assign the bridges to the different
# distributors.
splitter = Bridges.BridgeSplitter(Bridges.get_hmac(key, "Splitter-Key"),
Bridges.PrefixStore(store, "sp|"))
emailDistributor = ipDistributor = None
# As appropriate, create an IP-based distributor.
if cfg.HTTPS_DIST and cfg.HTTPS_SHARE:
ipDistributor = Dist.IPBasedDistributor(
......@@ -148,6 +173,7 @@ def startup(cfg):
splitter.addRing(ipDistributor, "https", cfg.HTTPS_SHARE)
webSchedule = Time.IntervalSchedule("day", 2)
# As appropriate, create an email-based distributor.
if cfg.EMAIL_DIST and cfg.EMAIL_SHARE:
for d in cfg.EMAIL_DOMAINS:
......@@ -158,15 +184,18 @@ def startup(cfg):
splitter.addRing(emailDistributor, "email", cfg.EMAIL_SHARE)
emailSchedule = Time.IntervalSchedule("day", 1)
# As appropriate, tell the splitter to leave some bridges unallocated.
# Add a tracker to tell us how often we've seen various bridges.
stats = Bridges.BridgeTracker(Bridges.PrefixStore(store, "fs|"),
Bridges.PrefixStore(store, "ls|"))
# Parse the bridges and log how many we put where."Loading bridges")
load(cfg, splitter)"%d bridges loaded", len(splitter))
......@@ -177,19 +206,22 @@ def startup(cfg):" by location set: %s",
" ".join(str(len(r)) for r in ipDistributor.rings))
# Configure HTTP and/or HTTPS servers.
if cfg.HTTPS_DIST and cfg.HTTPS_SHARE:
Server.addWebServer(cfg, ipDistributor, webSchedule)
# Configure Email servers.
if cfg.EMAIL_DIST and cfg.EMAIL_SHARE:
Server.addSMTPServer(cfg, emailDistributor, emailSchedule)
# Make the parse-bridges function get re-called on SIGHUP.
def reload():
load(cfg, splitter)
global _reloadFn
_reloadFn = reload
signal.signal(signal.SIGHUP, _handleSIGHUP)
# Actually run the servers.
try:"Starting reactors.")
......@@ -201,6 +233,9 @@ def startup(cfg):
def run():
"""Parse the command line to determine where the configuration is.
Parse the configuration, and start the servers.
if len(sys.argv) != 2:
print "Syntax: %s [config file]" % sys.argv[0]
# BridgeDB by Nick Mathewson.
# Copyright (c) 2007, The Tor Project, Inc.
# See LICENSE for licensing informatino
# See LICENSE for licensing information
This module implements the web and email interfaces to the bridge database.
from cStringIO import StringIO
import MimeWriter
......@@ -68,9 +72,16 @@ bridge addresses.
class WebResource(twisted.web.resource.Resource):
"""This resource is used by Twisted Web to give a web page with some
bridges in response to a request."""
isLeaf = True
def __init__(self, distributor, schedule, N=1):
"""Create a new WebResource.
distributor -- an IPBasedDistributor object
schedule -- an IntervalSchedule object
N -- the number of bridges to hand out per query.
self.distributor = distributor
self.schedule = schedule
......@@ -90,22 +101,34 @@ class WebResource(twisted.web.resource.Resource):
def addWebServer(cfg, dist, sched):
"""Set up a web server.
cfg -- a configuration object from Main. We use these options:
dist -- an IPBasedDistributor object.
sched -- an IntervalSchedule object.
Site = twisted.web.server.Site
resource = WebResource(dist, sched, cfg.HTTPS_N_BRIDGES_PER_ANSWER)
site = Site(resource)
ip = cfg.HTTPS_BIND_IP or ""
reactor.listenTCP(cfg.HTTP_UNENCRYPTED_PORT, site, interface=ip)
if cfg.HTTPS_PORT:
from twisted.internet.ssl import DefaultOpenSSLContextFactory
#from OpenSSL.SSL import SSLv3_METHOD
ip = cfg.HTTPS_BIND_IP or ""
factory = DefaultOpenSSLContextFactory(cfg.HTTPS_KEY_FILE,
reactor.listenSSL(cfg.HTTPS_PORT, site, factory, interface=ip)
return site
class MailFile:
"""A file-like object used to hand rfc822.Message a list of lines
as though it were reading them from a file."""
def __init__(self, lines):
self.lines = lines
self.idx = 0
......@@ -113,11 +136,17 @@ class MailFile:
try :
line = self.lines[self.idx]
self.idx += 1
return line #Append a \n? XXXX
return line
except IndexError:
return ""
def getMailResponse(lines, ctx):
"""Given a list of lines from an incoming email message, and a