HTTPServer.py 17.7 KB
Newer Older
aagbsn's avatar
aagbsn committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# BridgeDB by Nick Mathewson.
# Copyright (c) 2007-2013, The Tor Project, Inc.
# See LICENSE for licensing information

"""
This module implements the web (http, https) interfaces to the bridge database.
"""

import base64
import gettext
import logging
import re
import textwrap
import time
15
import os
aagbsn's avatar
aagbsn committed
16
17

from twisted.internet import reactor
18
from twisted.internet.error import CannotListenError
aagbsn's avatar
aagbsn committed
19
import twisted.web.resource
aagbsn's avatar
aagbsn committed
20
from twisted.web.server import Site
21
22
from twisted.web import static
from twisted.web.util import redirectTo
23
from twisted.python import filepath
aagbsn's avatar
aagbsn committed
24
25
26

import bridgedb.Dist
import bridgedb.I18n as I18n
27
import bridgedb.Util as Util
aagbsn's avatar
aagbsn committed
28

29
from recaptcha.client import captcha
aagbsn's avatar
aagbsn committed
30
31
32
33
from bridgedb.Raptcha import Raptcha
from bridgedb.Filters import filterBridgesByIP6, filterBridgesByIP4
from bridgedb.Filters import filterBridgesByTransport
from bridgedb.Filters import filterBridgesByNotBlockedIn
34
from bridgedb.parse import headers
aagbsn's avatar
aagbsn committed
35
36
from ipaddr import IPv4Address, IPv6Address
from random import randint
37
38
39
40
41
42
43
from mako.template import Template
from mako.lookup import TemplateLookup
from zope.interface import Interface, Attribute, implements

template_root = os.path.join(os.path.dirname(__file__),'templates')
lookup = TemplateLookup(directories=[template_root],
                        output_encoding='utf-8')
44
rtl_langs = ('ar', 'he', 'fa', 'gu_IN', 'ku')
aagbsn's avatar
aagbsn committed
45

aagbsn's avatar
aagbsn committed
46
47
logging.debug("Set template root to %s" % template_root)

aagbsn's avatar
aagbsn committed
48
try:
49
50
51
52
53
54
55
56
57
58
59
60
61
62
    # Make sure we have the database before trying to import the module:
    geoipdb = '/usr/share/GeoIP/GeoIP.dat'
    if not os.path.isfile(geoipdb):
        raise EnvironmentError("Could not find %r. On Debian-based systems, "\
                               "please install the geoip-database package."
                               % geoipdb)
    # This is a "pure" python version which interacts with the Maxmind GeoIP
    # API (version 1). It require, in Debian, the libgeoip-dev and
    # geoip-database packages.
    import pygeoip
    geoip = pygeoip.GeoIP(geoipdb, flags=pygeoip.MEMORY_CACHE)

except Exception as err:
    logging.debug("Error while loading geoip module: %r" % err)
63
    logging.warn("GeoIP database not found")
64
65
66
67
    geoip = None
else:
    logging.info("GeoIP database loaded")

aagbsn's avatar
aagbsn committed
68

aagbsn's avatar
aagbsn committed
69
class CaptchaProtectedResource(twisted.web.resource.Resource):
aagbsn's avatar
aagbsn committed
70
71
    def __init__(self, useRecaptcha=False, recaptchaPrivKey='',
            recaptchaPubKey='', useForwardedHeader=False, resource=None):
aagbsn's avatar
aagbsn committed
72
73
        self.isLeaf = resource.isLeaf
        self.useForwardedHeader = useForwardedHeader
74
75
        self.recaptchaPrivKey = recaptchaPrivKey
        self.recaptchaPubKey = recaptchaPubKey
aagbsn's avatar
aagbsn committed
76
        self.resource = resource
77

aagbsn's avatar
aagbsn committed
78
79
80
81
82
83
84
85
86
    def getClientIP(self, request):
        ip = None
        if self.useForwardedHeader:
            h = request.getHeader("X-Forwarded-For")
            if h:
                ip = h.split(",")[-1].strip()
                if not bridgedb.Bridges.is_valid_ip(ip):
                    logging.warn("Got weird forwarded-for value %r",h)
                    ip = None
87
        else:
aagbsn's avatar
aagbsn committed
88
89
            ip = request.getClientIP()
        return ip
90

aagbsn's avatar
aagbsn committed
91
92
93
    def render_GET(self, request):
        # get a captcha
        c = Raptcha(self.recaptchaPubKey, self.recaptchaPrivKey)
94
95
96
97
        try:
            c.get()
        except Exception as error:
            log.error("Connection to Recaptcha server failed.")
aagbsn's avatar
aagbsn committed
98
99
100
101
102

        # TODO: this does not work for versions of IE < 8.0
        imgstr = 'data:image/jpeg;base64,%s' % base64.b64encode(c.image)
        return lookup.get_template('captcha.html').render(imgstr=imgstr, challenge_field=c.challenge)

103
104
105
106
107
    def render_POST(self, request):
        try:
            challenge = request.args['recaptcha_challenge_field'][0]
            response = request.args['recaptcha_response_field'][0]
        except:
aagbsn's avatar
aagbsn committed
108
            return redirectTo(request.URLPath(), request)
109
110
111
112
113
114
115
116
117

        # generate a random IP for the captcha submission
        remote_ip = '%d.%d.%d.%d' % (randint(1,255),randint(1,255),
                                     randint(1,255),randint(1,255))

        recaptcha_response = captcha.submit(challenge, response,
                                        self.recaptchaPrivKey, remote_ip)
        if recaptcha_response.is_valid:
            logging.info("Valid recaptcha from %s. Parameters were %r",
118
                    Util.logSafely(remote_ip), request.args)
aagbsn's avatar
aagbsn committed
119
            return self.resource.render(request)
120
121
        else:
            logging.info("Invalid recaptcha from %s. Parameters were %r",
122
                         Util.logSafely(remote_ip), request.args)
123
            logging.info("Recaptcha error code: %s", recaptcha_response.error_code)
aagbsn's avatar
aagbsn committed
124
        return redirectTo(request.URLPath(), request)
125

aagbsn's avatar
aagbsn committed
126
127
128
129
130
131
class WebResource(twisted.web.resource.Resource):
    """This resource is used by Twisted Web to give a web page with some
       bridges in response to a request."""
    isLeaf = True

    def __init__(self, distributor, schedule, N=1, useForwardedHeader=False,
132
                 includeFingerprints=True, domains=None):
aagbsn's avatar
aagbsn committed
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
        """Create a new WebResource.
             distributor -- an IPBasedDistributor object
             schedule -- an IntervalSchedule object
             N -- the number of bridges to hand out per query.
        """
        gettext.install("bridgedb", unicode=True)
        twisted.web.resource.Resource.__init__(self)
        self.distributor = distributor
        self.schedule = schedule
        self.nBridgesToGive = N
        self.useForwardedHeader = useForwardedHeader
        self.includeFingerprints = includeFingerprints

        # do not use mutable types as __init__ defaults!
        if not domains: domains = []
        self.domains = domains

aagbsn's avatar
aagbsn committed
150
    def render(self, request):
151
152
        """Render a response for a client HTTP request.

153
154
155
156
157
158
159
160
161
162
163
        Presently, this method merely wraps :meth:`getBridgeRequestAnswer` to
        catch any unhandled exceptions which occur (otherwise the server will
        display the traceback to the client). If an unhandled exception *does*
        occur, the client will be served the default "No bridges currently
        available" HTML response page.

        :type request: :api:`twisted.web.http.Request`
        :param request: A ``Request`` object containing the HTTP method, full
                        URI, and any URL/POST arguments and headers present.
        :rtype: str
        :returns: A plaintext or HTML response to serve.
164
        """
165
166
167
168
169
170
171
        try:
            response = self.getBridgeRequestAnswer(request)
        except Exception as err:
            logging.exception(err)
            response = self.renderAnswer(request)

        return response
aagbsn's avatar
aagbsn committed
172
173

    def getBridgeRequestAnswer(self, request):
174
175
176
177
178
        """Respond to a client HTTP request for bridges.

        :type request: :api:`twisted.web.http.Request`
        :param request: A ``Request`` object containing the HTTP method, full
                        URI, and any URL/POST arguments and headers present.
179
180
        :rtype: str
        :returns: A plaintext or HTML response to serve.
181
        """
aagbsn's avatar
aagbsn committed
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
        interval = self.schedule.getInterval(time.time())
        bridges = ( )
        ip = None
        countryCode = None
        if self.useForwardedHeader:
            h = request.getHeader("X-Forwarded-For")
            if h:
                ip = h.split(",")[-1].strip()
                if not bridgedb.Bridges.is_valid_ip(ip):
                    logging.warn("Got weird forwarded-for value %r",h)
                    ip = None
        else:
            ip = request.getClientIP()

        if geoip:
            countryCode = geoip.country_code_by_addr(ip)
198
199
            if countryCode:
                logging.debug("Client request from GeoIP CC: %s" % countryCode)
aagbsn's avatar
aagbsn committed
200

201
        rtl = usingRTLLang(request)
202
203
        if rtl:
            logging.debug("Rendering RTL response.")
aagbsn's avatar
aagbsn committed
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226

        format = request.args.get("format", None)
        if format and len(format): format = format[0] # choose the first arg

        # do want any options?
        transport = ipv6 = unblocked = False

        ipv6 = request.args.get("ipv6", False)
        if ipv6: ipv6 = True # if anything after ?ipv6=

        try:
            # validate method name
            transport = re.match('[_a-zA-Z][_a-zA-Z0-9]*',
                    request.args.get("transport")[0]).group()
        except (TypeError, IndexError, AttributeError):
            transport = None

        try:
            unblocked = re.match('[a-zA-Z]{2,4}',
                    request.args.get("unblocked")[0]).group()
        except (TypeError, IndexError, AttributeError):
            unblocked = False

227
228
229
        logging.info("Replying to web request from %s. Parameters were %r"
                     % (Util.logSafely(ip), request.args))

aagbsn's avatar
aagbsn committed
230
        rules = []
231
        bridgeLines = None
aagbsn's avatar
aagbsn committed
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254

        if ip:
            if ipv6:
                rules.append(filterBridgesByIP6)
                addressClass = IPv6Address
            else:
                rules.append(filterBridgesByIP4)
                addressClass = IPv4Address

            if transport:
                #XXX: A cleaner solution would differentiate between
                # addresses by protocol rather than have separate lists
                # Tor to be a transport, and selecting between them.
                rules = [filterBridgesByTransport(transport, addressClass)]

            if unblocked:
                rules.append(filterBridgesByNotBlockedIn(unblocked,
                    addressClass, transport))

            bridges = self.distributor.getBridgesForIP(ip, interval,
                                                       self.nBridgesToGive,
                                                       countryCode,
                                                       bridgeFilterRules=rules)
255
256
257
258
259
260
            bridgeLines = "".join("  %s\n" % b.getConfigLine(
                includeFingerprint=self.includeFingerprints,
                addressClass=addressClass,
                transport=transport,
                request=bridgedb.Dist.uniformMap(ip)
                ) for b in bridges)
aagbsn's avatar
aagbsn committed
261

262
        answer = self.renderAnswer(request, bridgeLines, rtl, format)
263
264
        return answer

265
    def renderAnswer(self, request, bridgeLines=None, rtl=False, format=None):
266
267
268
269
270
271
272
        """Generate a response for a client which includes **bridges**.

        The generated response can be plaintext or HTML.

        :type request: :api:`twisted.web.http.Request`
        :param request: A ``Request`` object containing the HTTP method, full
                        URI, and any URL/POST arguments and headers present.
273
274
275
        :type bridgeLines: list or None
        :param bridgeLines: A list of strings used to configure a Tor client
                            to use a bridge.
276
277
278
279
280
281
282
283
284
285
        :param bool rtl: If ``True``, the language used for the response to
                         the client should be rendered right-to-left.
        :type format: str or None
        :param format: If ``'plain'``, return a plaintext response. Otherwise,
                       use the :file:`bridgedb/templates/bridges.html`
                       template to render an HTML response page which includes
                       the **bridges**.
        :rtype: str
        :returns: A plaintext or HTML response to serve.
        """
aagbsn's avatar
aagbsn committed
286
287
288
289
        if format == 'plain':
            request.setHeader("Content-Type", "text/plain")
            return answer
        else:
290
            request.setHeader("Content-Type", "text/html; charset=utf-8")
291
            return lookup.get_template('bridges.html').render(answer=bridgeLines,
292
                                                              rtl=rtl)
aagbsn's avatar
aagbsn committed
293
294

class WebRoot(twisted.web.resource.Resource):
295
296
    """The parent resource of all other documents hosted by the webserver."""

aagbsn's avatar
aagbsn committed
297
    isLeaf = True
298

aagbsn's avatar
aagbsn committed
299
    def render_GET(self, request):
300
301
302
303
304
305
306
307
        """Handles requests for the webserver root document.

        For example, this function handles requests for
        https://bridges.torproject.org/.

        :type request: :api:`twisted.web.server.Request`
        :param request: An incoming request.
        """
308
309
310
311
312
313
314
315
316
317
        rtl = False

        try:
            rtl = usingRTLLang(request)
        except Exception as err:
            logging.exception(err)
            logging.error("The gettext files were not properly installed.")
            logging.info("To install translations, try doing `python " \
                         "setup.py compile_catalog`.")

318
        return lookup.get_template('index.html').render(rtl=rtl)
aagbsn's avatar
aagbsn committed
319

320

aagbsn's avatar
aagbsn committed
321
322
323
324
325
326
327
328
329
330
331
332
def addWebServer(cfg, dist, sched):
    """Set up a web server.
         cfg -- a configuration object from Main.  We use these options:
                HTTPS_N_BRIDGES_PER_ANSWER
                HTTP_UNENCRYPTED_PORT
                HTTP_UNENCRYPTED_BIND_IP
                HTTP_USE_IP_FROM_FORWARDED_HEADER
                HTTPS_PORT
                HTTPS_BIND_IP
                HTTPS_USE_IP_FROM_FORWARDED_HEADER
                RECAPTCHA_ENABLED
                RECAPTCHA_PUB_KEY
333
                RECAPTCHA_PRIV_KEY
aagbsn's avatar
aagbsn committed
334
335
336
337
         dist -- an IPBasedDistributor object.
         sched -- an IntervalSchedule object.
    """
    site = None
338
339
    httpdist = twisted.web.resource.Resource()
    httpdist.putChild('', WebRoot())
340
341
342
343
    httpdist.putChild('robots.txt',
                      static.File(os.path.join(template_root, 'robots.txt')))
    httpdist.putChild('assets',
                      static.File(os.path.join(template_root, 'assets/')))
344

aagbsn's avatar
aagbsn committed
345
346
347
348
349
    resource = WebResource(dist, sched, cfg.HTTPS_N_BRIDGES_PER_ANSWER,
                   cfg.HTTP_USE_IP_FROM_FORWARDED_HEADER,
                   includeFingerprints=cfg.HTTPS_INCLUDE_FINGERPRINTS,
                   domains=cfg.EMAIL_DOMAINS)

350
    if cfg.RECAPTCHA_ENABLED:
aagbsn's avatar
aagbsn committed
351
352
353
354
355
356
357
358
        protected = CaptchaProtectedResource(
                recaptchaPrivKey=cfg.RECAPTCHA_PRIV_KEY,
                recaptchaPubKey=cfg.RECAPTCHA_PUB_KEY,
                useForwardedHeader=cfg.HTTP_USE_IP_FROM_FORWARDED_HEADER,
                resource=resource)
        httpdist.putChild('bridges', protected)
    else:
        httpdist.putChild('bridges', resource)
359

aagbsn's avatar
aagbsn committed
360
    site = Site(httpdist)
361

aagbsn's avatar
aagbsn committed
362
363
    if cfg.HTTP_UNENCRYPTED_PORT:
        ip = cfg.HTTP_UNENCRYPTED_BIND_IP or ""
364
365
366
367
        try:
            reactor.listenTCP(cfg.HTTP_UNENCRYPTED_PORT, site, interface=ip)
        except CannotListenError as error:
            raise SystemExit(error)
368

aagbsn's avatar
aagbsn committed
369
370
371
372
373
374
    if cfg.HTTPS_PORT:
        from twisted.internet.ssl import DefaultOpenSSLContextFactory
        #from OpenSSL.SSL import SSLv3_METHOD
        ip = cfg.HTTPS_BIND_IP or ""
        factory = DefaultOpenSSLContextFactory(cfg.HTTPS_KEY_FILE,
                                               cfg.HTTPS_CERT_FILE)
375
376
377
378
        try:
            reactor.listenSSL(cfg.HTTPS_PORT, site, factory, interface=ip)
        except CannotListenError as error:
            raise SystemExit(error)
379

aagbsn's avatar
aagbsn committed
380
381
    return site

382
def usingRTLLang(request):
383
    """Check if we should translate the text into a RTL language
384
385
386
387
388
389
390

    Retrieve the headers from the request. Obtain the Accept-Language header
    and decide if we need to translate the text. Install the requisite
    languages via gettext, if so. Then, manually check which languages we
    support. Choose the first language from the header that we support and
    return True if it is a RTL language, else return False.

391
392
393
394
395
    :type request: :api:`twisted.web.server.Request`
    :param request: An incoming request.
    :rtype: bool
    :returns: ``True`` if the preferred language is right-to-left; ``False``
              otherwise.
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
    """
    langs = setLocaleFromRequestHeader(request)

    # Grab only the language (first two characters) so we know if the language
    # is read right-to-left
    #langs = [ lang[:2] for lang in langs ]
    lang = getAssumedChosenLang(langs)
    if lang in rtl_langs:
        return True
    return False

def getAssumedChosenLang(langs):
    """
    Return the first language in ``langs`` and we supprt

    :param langs list: All requested languages
    :returns string: Chosen language
    """
    i18npath = os.path.join(os.path.dirname(__file__), 'i18n')
    path = filepath.FilePath(i18npath)
    assert path.isdir()

    lang = 'en-US'
    supp_langs = path.listdir() + ['en']
    for l in langs:
        if l in supp_langs:
            lang = l
            break
    return lang

426
def setLocaleFromRequestHeader(request):
427
428
429
430
431
432
433
434
435
    """Retrieve the languages from the accept-language header and install them.

    Parse the languages in the header, and attempt to install the first one in
    the list. If that fails, we receive a :class:`gettext.NullTranslation`
    object, if it worked then we have a :class:`gettext.GNUTranslation`
    object. Whichever one we end up with, add the other get the other
    languages and add them as fallbacks to the first. Lastly, install this
    chain of translations.

436
    :type request: :api:`twisted.web.server.Request`
437
438
439
    :param request: An incoming request from a client.
    :rtype: list
    :returns: All requested languages.
440
    """
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
    logging.debug("Getting client 'Accept-Language' header...")
    header = request.getHeader('accept-language')

    if header is None:
        logging.debug("Client sent no 'Accept-Language' header. Using fallback.")
        header = 'en,en-US'

    localedir = os.path.join(os.path.dirname(__file__), 'i18n/')
    langs = headers.parseAcceptLanguage(header)
    ## XXX the 'Accept-Language' header is potentially identifying
    logging.debug("Client Accept-Language (top 5): %s" % langs[:4])

    try:
        language = gettext.translation("bridgedb", localedir=localedir,
                                       languages=langs, fallback=True)
        for lang in langs:
            language.add_fallback(gettext.translation("bridgedb",
                                                      localedir=localedir,
                                                      languages=langs,
                                                      fallback=True))
    except IOError as error:
        logging.error(error.message)

    language.install(unicode=True)
465
    return langs