email.py 8.72 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# -*- coding: utf-8 -*-
#
# This file is part of GetTor, a Tor Browser distribution system.
#
# :authors: isra <ilv@torproject.org>
#           see also AUTHORS file
#
# :copyright:   (c) 2008-2014, The Tor Project, Inc.
#               (c) 2014-2018, Israel Leiva
#
# :license: This is Free Software. See LICENSE for license information.

from __future__ import absolute_import

import re
import dkim
import hashlib

from datetime import datetime
import configparser

from email import message_from_string
from email.utils import parseaddr

from twisted.python import log
from twisted.internet import defer
from twisted.enterprise import adbapi

29
from ..utils.db import SQLite3
30
from ..utils import strings
Hiro's avatar
Hiro committed
31
from ..utils import validate_email
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49

class AddressError(Exception):
    """
    Error if email address is not valid or it can't be normalized.
    """
    pass


class DKIMError(Exception):
    """
    Error if DKIM signature verification fails.
    """
    pass


class EmailParser(object):
    """Class for parsing email requests."""

50
    def __init__(self, settings, to_addr=None, dkim=False):
51
52
53
54
55
        """
        Constructor.

        param (Boolean) dkim: Set dkim verification to True or False.
        """
56
        self.settings = settings
57
58
59
        self.dkim = dkim
        self.to_addr = to_addr

60
    def normalize(self, msg):
61
62
63
64
65
66
67
68
        # Normalization will convert <Alice Wonderland> alice@wonderland.net
        # into alice@wonderland.net
        name, norm_addr = parseaddr(msg['From'])
        to_name, norm_to_addr = parseaddr(msg['To'])
        log.msg(
            "Normalizing and validating FROM email address.",
            system="email parser"
        )
69
70
        return name, norm_addr, to_name, norm_to_addr

71

72
    def validate(self, norm_addr, msg):
73
74
75
76
77
78
79
80
81
        # Validate_email will do a bunch of regexp to see if the email address
        # is well address. Additional options for validate_email are check_mx
        # and verify, which check if the SMTP host and email address exist.
        # See validate_email package for more info.
        if norm_addr and validate_email.validate_email(norm_addr):
            log.msg(
                "Email address normalized and validated.",
                system="email parser"
            )
82
83
            return True

84
85
86
87
88
89
90
91
        else:
            log.err(
                "Error normalizing/validating email address.",
                system="email parser"
            )
            raise AddressError("Invalid email address {}".format(msg['From']))


92
    def dkim_verify(self, msg_str, norm_addr):
93
94
95
96
97
98
99
100
        # DKIM verification. Simply check that the server has verified the
        # message's signature
        if self.dkim:
            log.msg("Checking DKIM signature.", system="email parser")
            # Note: msg.as_string() changes the message to conver it to
            # string, so DKIM will fail. Use the original string instead
            if dkim.verify(msg_str):
                log.msg("Valid DKIM signature.", system="email parser")
101
                return True
102
103
104
105
106
107
108
109
            else:
                log.msg("Invalid DKIM signature.", system="email parser")
                username, domain = norm_addr.split("@")
                raise DkimError(
                    "DKIM failed for {} at {}".format(
                        hid.hexdigest(), domain
                    )
                )
110
111
112
113
        # Is this even useful like this?
        else:
            return True

114

115
    def build_request(self, msg_str, norm_addr, languages, platforms):
116
117
118
119
120
121
        # Search for commands keywords
        subject_re = re.compile(r"Subject: (.*)\r\n")
        subject = subject_re.search(msg_str)

        request = {
            "id": norm_addr,
122
123
            "command": None,
            "platform": None,
Hiro's avatar
Hiro committed
124
            "language": None,
125
126
127
128
129
130
            "service": "email"
        }

        if subject:
            subject = subject.group(1)
            for word in re.split(r"\s+", subject.strip()):
Hiro's avatar
Hiro committed
131
132
                if word.lower() in languages:
                    request["language"] = word.lower()
133
                if word.lower() in platforms:
134
135
136
137
138
139
                    request["command"] = "links"
                    request["platform"] = word.lower()
                if word.lower() == "help":
                    request["command"] = "help"
                    break

Hiro's avatar
Hiro committed
140
        if not request["command"] or not request["language"]:
141
            for word in re.split(r"\s+", msg_str.strip()):
Hiro's avatar
Hiro committed
142
143
                if word.lower() in languages:
                    request["language"] = word.lower()
144
                if word.lower() in platforms:
145
146
147
148
149
150
151
152
                    request["command"] = "links"
                    request["platform"] = word.lower()
                if word.lower() == "help":
                    request["command"] = "help"
                    break

        return request

Hiro's avatar
Hiro committed
153

154
    def too_many_requests(self, request_id, hid, request_service, limit):
Hiro's avatar
Hiro committed
155
156
157
158
        # check limit first
        num_requests = limit

        if hid.hexdigest() == self.settings.get('test_hid'):
159
            num_requests = 0
Hiro's avatar
Hiro committed
160
161
162
163
164
165
166
        else:
            num_requests = yield conn.get_num_requests(
                id=hid.hexdigest(), service=request_service
            )

        if num_requests[0][0] < email_requests_limit:
            return 0
167
168
        else:
            return 1
Hiro's avatar
Hiro committed
169
170


171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
    def parse(self, msg_str):
        """
        Parse message content. Check if email address is well formed, if DKIM
        signature is valid, and prevent service flooding. Finally, look for
        commands to process the request. Current commands are:

            - links: request links for download.
            - help: help request.

        :param msg_str (str): incomming message as string.

        :return dict with email address and command (`links` or `help`).
        """

        log.msg("Building email message from string.", system="email parser")

        platforms = self.settings.get("platforms")
        languages = [*strings.get_locales().keys()]
        msg = message_from_string(msg_str)

        name, norm_addr, to_name, norm_to_addr = self.normalize(msg)

        try:
            self.validate(norm_addr, msg)
        except AddressError as e:
            log.message("Address error: {}".format(e.args))

        hid = hashlib.sha256(norm_addr.encode('utf-8'))
        log.msg(
            "Request from {}".format(hid.hexdigest()), system="email parser"
        )

        if self.to_addr:
            if self.to_addr != norm_to_addr:
                log.msg("Got request for a different instance of gettor")
                log.msg("Intended recipient: {}".format(norm_to_addr))
                return {}

        try:
            self.dkim_verify(msg_str, norm_addr)
        except ValueError as e:
            log.msg("DKIM error: {}".format(e.args))

        request = self.build_request(msg_str, norm_addr, languages, platforms)

        return request


219
220
221
222
223
224
225
226
227
228
229
230
231
    @defer.inlineCallbacks
    def parse_callback(self, request):
        """
        Callback invoked when the message has been parsed. It stores the
        obtained information in the database for further processing by the
        Sendmail service.

        :param (dict) request: the built request based on message's content.
        It contains the `email_addr` and command `fields`.

        :return: deferred whose callback/errback will log database query
        execution details.
        """
232
        email_requests_limit = self.settings.get("email_requests_limit")
Hiro's avatar
Hiro committed
233
234
        now_str = datetime.now().strftime("%Y%m%d%H%M%S")
        dbname = self.settings.get("dbname")
Hiro's avatar
Hiro committed
235
        hid = hashlib.sha256(request['id'].encode('utf-8'))
Hiro's avatar
Hiro committed
236
        conn = SQLite3(dbname)
237
238

        if request["command"]:
Hiro's avatar
Hiro committed
239
240
241
            log.msg(
                "Found request for {}.".format(request['command']),
                system="email parser"
242
            )
243
244
245
246
247
248
249
250
            check = yield self.too_many_requests(request['id'], hid, request['service'], email_requests_limit):
            if check
                log.msg(
                    "Discarded. Too many requests from {}.".format(
                        hid.hexdigest()
                    ), system="email parser"
                )
            else:
251
252
253
254
                conn.new_request(
                    id=request['id'],
                    command=request['command'],
                    platform=request['platform'],
Hiro's avatar
Hiro committed
255
                    language=request['language'],
256
257
258
259
                    service=request['service'],
                    date=now_str,
                    status="ONHOLD",
                )
Hiro's avatar
Hiro committed
260
261
262
263
264
        else:
            log.msg(
                "Request not found",
                system="email parser"
            )
265
266
267
268
269
270
271
272
273

    def parse_errback(self, error):
        """
        Errback if we don't/can't parse the message's content.
        """
        log.msg(
            "Error while parsing email content: {}.".format(error),
            system="email parser"
        )