email.py 8.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# -*- coding: utf-8 -*-
#
# This file is part of GetTor, a Tor Browser distribution system.
#
# :authors: isra <ilv@torproject.org>
#           see also AUTHORS file
#
# :copyright:   (c) 2008-2014, The Tor Project, Inc.
#               (c) 2014-2018, Israel Leiva
#
# :license: This is Free Software. See LICENSE for license information.

from __future__ import absolute_import

import re
import dkim
import hashlib
import validate_email

from datetime import datetime
import configparser

from email import message_from_string
from email.utils import parseaddr

from twisted.python import log
from twisted.internet import defer
from twisted.enterprise import adbapi

30
from ..utils.db import SQLite3
31
from ..utils import strings
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49

class AddressError(Exception):
    """
    Error if email address is not valid or it can't be normalized.
    """
    pass


class DKIMError(Exception):
    """
    Error if DKIM signature verification fails.
    """
    pass


class EmailParser(object):
    """Class for parsing email requests."""

50
    def __init__(self, settings, to_addr=None, dkim=False):
51
52
53
54
55
        """
        Constructor.

        param (Boolean) dkim: Set dkim verification to True or False.
        """
56
        self.settings = settings
57
58
59
        self.dkim = dkim
        self.to_addr = to_addr

60
    def normalize(self, msg):
61
62
63
64
65
66
67
68
        # Normalization will convert <Alice Wonderland> alice@wonderland.net
        # into alice@wonderland.net
        name, norm_addr = parseaddr(msg['From'])
        to_name, norm_to_addr = parseaddr(msg['To'])
        log.msg(
            "Normalizing and validating FROM email address.",
            system="email parser"
        )
69
70
        return name, norm_addr, to_name, norm_to_addr

71

72
    def validate(self, norm_addr, msg):
73
74
75
76
77
78
79
80
81
        # Validate_email will do a bunch of regexp to see if the email address
        # is well address. Additional options for validate_email are check_mx
        # and verify, which check if the SMTP host and email address exist.
        # See validate_email package for more info.
        if norm_addr and validate_email.validate_email(norm_addr):
            log.msg(
                "Email address normalized and validated.",
                system="email parser"
            )
82
83
            return True

84
85
86
87
88
89
90
91
        else:
            log.err(
                "Error normalizing/validating email address.",
                system="email parser"
            )
            raise AddressError("Invalid email address {}".format(msg['From']))


92
    def dkim_verify(self, msg_str, norm_addr):
93
94
95
96
97
98
99
100
        # DKIM verification. Simply check that the server has verified the
        # message's signature
        if self.dkim:
            log.msg("Checking DKIM signature.", system="email parser")
            # Note: msg.as_string() changes the message to conver it to
            # string, so DKIM will fail. Use the original string instead
            if dkim.verify(msg_str):
                log.msg("Valid DKIM signature.", system="email parser")
101
                return True
102
103
104
105
106
107
108
109
            else:
                log.msg("Invalid DKIM signature.", system="email parser")
                username, domain = norm_addr.split("@")
                raise DkimError(
                    "DKIM failed for {} at {}".format(
                        hid.hexdigest(), domain
                    )
                )
110
111
112
113
        # Is this even useful like this?
        else:
            return True

114

115
    def build_request(self, msg_str, norm_addr, languages, platforms):
116
117
118
119
120
121
        # Search for commands keywords
        subject_re = re.compile(r"Subject: (.*)\r\n")
        subject = subject_re.search(msg_str)

        request = {
            "id": norm_addr,
122
123
            "command": None,
            "platform": None,
Hiro's avatar
Hiro committed
124
            "language": None,
125
126
127
128
129
130
            "service": "email"
        }

        if subject:
            subject = subject.group(1)
            for word in re.split(r"\s+", subject.strip()):
Hiro's avatar
Hiro committed
131
132
                if word.lower() in languages:
                    request["language"] = word.lower()
133
                if word.lower() in platforms:
134
135
136
137
138
139
                    request["command"] = "links"
                    request["platform"] = word.lower()
                if word.lower() == "help":
                    request["command"] = "help"
                    break

Hiro's avatar
Hiro committed
140
        if not request["command"] or not request["language"]:
141
            for word in re.split(r"\s+", msg_str.strip()):
Hiro's avatar
Hiro committed
142
143
                if word.lower() in languages:
                    request["language"] = word.lower()
144
                if word.lower() in platforms:
145
146
147
148
149
150
151
152
                    request["command"] = "links"
                    request["platform"] = word.lower()
                if word.lower() == "help":
                    request["command"] = "help"
                    break

        return request

153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
    def parse(self, msg_str):
        """
        Parse message content. Check if email address is well formed, if DKIM
        signature is valid, and prevent service flooding. Finally, look for
        commands to process the request. Current commands are:

            - links: request links for download.
            - help: help request.

        :param msg_str (str): incomming message as string.

        :return dict with email address and command (`links` or `help`).
        """

        log.msg("Building email message from string.", system="email parser")

        platforms = self.settings.get("platforms")
        languages = [*strings.get_locales().keys()]
        msg = message_from_string(msg_str)

        name, norm_addr, to_name, norm_to_addr = self.normalize(msg)

        try:
            self.validate(norm_addr, msg)
        except AddressError as e:
            log.message("Address error: {}".format(e.args))

        hid = hashlib.sha256(norm_addr.encode('utf-8'))
        log.msg(
            "Request from {}".format(hid.hexdigest()), system="email parser"
        )

        if self.to_addr:
            if self.to_addr != norm_to_addr:
                log.msg("Got request for a different instance of gettor")
                log.msg("Intended recipient: {}".format(norm_to_addr))
                return {}

        try:
            self.dkim_verify(msg_str, norm_addr)
        except ValueError as e:
            log.msg("DKIM error: {}".format(e.args))

        request = self.build_request(msg_str, norm_addr, languages, platforms)

        return request


201
202
203
204
205
206
207
208
209
210
211
212
213
    @defer.inlineCallbacks
    def parse_callback(self, request):
        """
        Callback invoked when the message has been parsed. It stores the
        obtained information in the database for further processing by the
        Sendmail service.

        :param (dict) request: the built request based on message's content.
        It contains the `email_addr` and command `fields`.

        :return: deferred whose callback/errback will log database query
        execution details.
        """
214
        email_requests_limit = self.settings.get("email_requests_limit")
215
216
217
218
219
220
221
        log.msg(
            "Found request for {}.".format(request['command']),
            system="email parser"
        )

        if request["command"]:
            now_str = datetime.now().strftime("%Y%m%d%H%M%S")
222
223
            dbname = self.settings.get("dbname")
            conn = SQLite3(dbname)
224

225
            hid = hashlib.sha256(request['id'].encode('utf-8'))
226
227
228
229
230
            # check limits first
            num_requests = yield conn.get_num_requests(
                id=hid.hexdigest(), service=request['service']
            )

231
            if num_requests[0][0] > email_requests_limit:
232
233
234
235
236
237
238
239
240
241
242
                log.msg(
                    "Discarded. Too many requests from {}.".format(
                        hid.hexdigest
                    ), system="email parser"
            )

            else:
                conn.new_request(
                    id=request['id'],
                    command=request['command'],
                    platform=request['platform'],
Hiro's avatar
Hiro committed
243
                    language=request['language'],
244
245
246
247
248
249
250
251
252
253
254
255
256
                    service=request['service'],
                    date=now_str,
                    status="ONHOLD",
                )

    def parse_errback(self, error):
        """
        Errback if we don't/can't parse the message's content.
        """
        log.msg(
            "Error while parsing email content: {}.".format(error),
            system="email parser"
        )