GitLab is used only for code review, issue tracking and project management. Canonical locations for source code are still https://gitweb.torproject.org/ https://git.torproject.org/ and git-rw.torproject.org.

email.py 8.86 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
# -*- coding: utf-8 -*-
#
# This file is part of GetTor, a Tor Browser distribution system.
#
# :authors: isra <ilv@torproject.org>
#           see also AUTHORS file
#
# :copyright:   (c) 2008-2014, The Tor Project, Inc.
#               (c) 2014-2018, Israel Leiva
#
# :license: This is Free Software. See LICENSE for license information.

from __future__ import absolute_import

import re
import dkim
import hashlib

from datetime import datetime
import configparser

from email import message_from_string
from email.utils import parseaddr

from twisted.python import log
from twisted.internet import defer

28
from ..utils.db import SQLite3
Hiro's avatar
Hiro committed
29
from ..utils import validate_email
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47

class AddressError(Exception):
    """
    Error if email address is not valid or it can't be normalized.
    """
    pass


class DKIMError(Exception):
    """
    Error if DKIM signature verification fails.
    """
    pass


class EmailParser(object):
    """Class for parsing email requests."""

48
    def __init__(self, settings, to_addr=None, dkim=False):
49 50 51 52 53
        """
        Constructor.

        param (Boolean) dkim: Set dkim verification to True or False.
        """
54
        self.settings = settings
55 56
        self.dkim = dkim
        self.to_addr = to_addr
57
        self.locales = []
58
        self.platforms = self.settings.get("platforms")
59 60 61 62
        self.conn = SQLite3(self.settings.get("dbname"))

    def __del__(self):
        del self.conn
63

64
    def normalize(self, msg):
65 66 67 68 69 70 71 72
        # Normalization will convert <Alice Wonderland> alice@wonderland.net
        # into alice@wonderland.net
        name, norm_addr = parseaddr(msg['From'])
        to_name, norm_to_addr = parseaddr(msg['To'])
        log.msg(
            "Normalizing and validating FROM email address.",
            system="email parser"
        )
73 74
        return name, norm_addr, to_name, norm_to_addr

75

76
    def validate(self, norm_addr, msg):
77 78 79 80 81 82 83 84 85
        # Validate_email will do a bunch of regexp to see if the email address
        # is well address. Additional options for validate_email are check_mx
        # and verify, which check if the SMTP host and email address exist.
        # See validate_email package for more info.
        if norm_addr and validate_email.validate_email(norm_addr):
            log.msg(
                "Email address normalized and validated.",
                system="email parser"
            )
86 87
            return True

88 89 90 91 92 93 94 95
        else:
            log.err(
                "Error normalizing/validating email address.",
                system="email parser"
            )
            raise AddressError("Invalid email address {}".format(msg['From']))


96
    def dkim_verify(self, msg_str, norm_addr):
97 98 99 100 101 102 103 104
        # DKIM verification. Simply check that the server has verified the
        # message's signature
        if self.dkim:
            log.msg("Checking DKIM signature.", system="email parser")
            # Note: msg.as_string() changes the message to conver it to
            # string, so DKIM will fail. Use the original string instead
            if dkim.verify(msg_str):
                log.msg("Valid DKIM signature.", system="email parser")
105
                return True
106 107 108 109 110 111 112 113
            else:
                log.msg("Invalid DKIM signature.", system="email parser")
                username, domain = norm_addr.split("@")
                raise DkimError(
                    "DKIM failed for {} at {}".format(
                        hid.hexdigest(), domain
                    )
                )
114 115 116 117
        # Is this even useful like this?
        else:
            return True

118 119 120
    def parse_keywords(self, text, request):

        for word in re.split(r"\s+", text.strip()):
121 122 123 124 125 126
            for locale in self.locales:
                if word.lower() == locale.lower():
                    request["language"] = locale
                elif (not request["language"]) and (word.lower()[:2] ==
                        locale.lower()[:2]):
                    request["language"] = locale
127 128 129 130 131 132 133
            if word.lower() in self.platforms:
                request["command"] = "links"
                request["platform"] = word.lower()
            if word.lower() == "help":
                request["command"] = "help"
                break
        return request
134

135
    def build_request(self, msg_str, norm_addr):
136 137 138 139 140 141
        # Search for commands keywords
        subject_re = re.compile(r"Subject: (.*)\r\n")
        subject = subject_re.search(msg_str)

        request = {
            "id": norm_addr,
142 143
            "command": None,
            "platform": None,
Hiro's avatar
Hiro committed
144
            "language": None,
145 146 147 148 149
            "service": "email"
        }

        if subject:
            subject = subject.group(1)
150
            request = self.parse_keywords(subject, request)
151

152 153 154 155 156
        # Always parse the body too, to see if there's more specific information
        request = self.parse_keywords(msg_str, request)

        if not request["language"]:
            request["language"] = "en-US"
157 158 159

        return request

Hiro's avatar
Hiro committed
160

Hiro's avatar
Hiro committed
161
    def too_many_requests(self, hid, test_hid, num_requests, limit):
Hiro's avatar
Hiro committed
162
        if hid == test_hid:
Hiro's avatar
Hiro committed
163
            return False
Hiro's avatar
Hiro committed
164
        elif num_requests < limit:
Hiro's avatar
Hiro committed
165
            return False
166
        else:
Hiro's avatar
Hiro committed
167
            return True
Hiro's avatar
Hiro committed
168

169 170 171
    @defer.inlineCallbacks
    def get_locales(self):

172
        locales = yield self.conn.get_locales()
173 174 175
        for l in locales:
            self.locales.append(l[0])

Hiro's avatar
Hiro committed
176

177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
    def parse(self, msg_str):
        """
        Parse message content. Check if email address is well formed, if DKIM
        signature is valid, and prevent service flooding. Finally, look for
        commands to process the request. Current commands are:

            - links: request links for download.
            - help: help request.

        :param msg_str (str): incomming message as string.

        :return dict with email address and command (`links` or `help`).
        """

        log.msg("Building email message from string.", system="email parser")

        msg = message_from_string(msg_str)

        name, norm_addr, to_name, norm_to_addr = self.normalize(msg)

        try:
            self.validate(norm_addr, msg)
        except AddressError as e:
            log.message("Address error: {}".format(e.args))

        hid = hashlib.sha256(norm_addr.encode('utf-8'))
        log.msg(
            "Request from {}".format(hid.hexdigest()), system="email parser"
        )

        if self.to_addr:
            if self.to_addr != norm_to_addr:
                log.msg("Got request for a different instance of gettor")
                log.msg("Intended recipient: {}".format(norm_to_addr))
                return {}

        try:
            self.dkim_verify(msg_str, norm_addr)
        except ValueError as e:
            log.msg("DKIM error: {}".format(e.args))

218
        request = self.build_request(msg_str, norm_addr)
219 220 221 222

        return request


223 224 225 226 227 228 229 230 231 232 233 234 235
    @defer.inlineCallbacks
    def parse_callback(self, request):
        """
        Callback invoked when the message has been parsed. It stores the
        obtained information in the database for further processing by the
        Sendmail service.

        :param (dict) request: the built request based on message's content.
        It contains the `email_addr` and command `fields`.

        :return: deferred whose callback/errback will log database query
        execution details.
        """
236
        email_requests_limit = self.settings.get("email_requests_limit")
Hiro's avatar
Hiro committed
237 238
        now_str = datetime.now().strftime("%Y%m%d%H%M%S")
        dbname = self.settings.get("dbname")
Hiro's avatar
Hiro committed
239
        test_hid = self.settings.get("test_hid")
240 241

        if request["command"]:
Hiro's avatar
Hiro committed
242 243 244 245

            hid = hashlib.sha256(request['id'].encode('utf-8')).hexdigest()
            request_service = request['service']

Hiro's avatar
Hiro committed
246 247 248
            log.msg(
                "Found request for {}.".format(request['command']),
                system="email parser"
249
            )
Hiro's avatar
Hiro committed
250

251
            num_requests = yield self.conn.get_num_requests(
Hiro's avatar
Hiro committed
252
                id=hid, service=request_service
Hiro's avatar
Hiro committed
253 254 255
            )

            check = self.too_many_requests(
Hiro's avatar
Hiro committed
256
                hid, test_hid, num_requests[0][0], email_requests_limit
Hiro's avatar
Hiro committed
257 258
            )

259
            if check:
260 261
                log.msg(
                    "Discarded. Too many requests from {}.".format(
Hiro's avatar
Hiro committed
262
                        hid
263 264 265
                    ), system="email parser"
                )
            else:
266
                self.conn.new_request(
267 268 269
                    id=request['id'],
                    command=request['command'],
                    platform=request['platform'],
Hiro's avatar
Hiro committed
270
                    language=request['language'],
271 272 273 274
                    service=request['service'],
                    date=now_str,
                    status="ONHOLD",
                )
Hiro's avatar
Hiro committed
275 276 277 278 279
        else:
            log.msg(
                "Request not found",
                system="email parser"
            )
280 281 282 283 284 285 286 287 288

    def parse_errback(self, error):
        """
        Errback if we don't/can't parse the message's content.
        """
        log.msg(
            "Error while parsing email content: {}.".format(error),
            system="email parser"
        )