email.py 7.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# -*- coding: utf-8 -*-
#
# This file is part of GetTor, a Tor Browser distribution system.
#
# :authors: isra <ilv@torproject.org>
#           see also AUTHORS file
#
# :copyright:   (c) 2008-2014, The Tor Project, Inc.
#               (c) 2014-2018, Israel Leiva
#
# :license: This is Free Software. See LICENSE for license information.

from __future__ import absolute_import

import re
import dkim
import hashlib
import validate_email

from datetime import datetime
import configparser

from email import message_from_string
from email.utils import parseaddr

from twisted.python import log
from twisted.internet import defer
from twisted.enterprise import adbapi

30
from ..utils.db import SQLite3
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49


class AddressError(Exception):
    """
    Error if email address is not valid or it can't be normalized.
    """
    pass


class DKIMError(Exception):
    """
    Error if DKIM signature verification fails.
    """
    pass


class EmailParser(object):
    """Class for parsing email requests."""

50
    def __init__(self, settings, to_addr=None, dkim=False):
51
52
53
54
55
        """
        Constructor.

        param (Boolean) dkim: Set dkim verification to True or False.
        """
56
        self.settings = settings
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
        self.dkim = dkim
        self.to_addr = to_addr


    def parse(self, msg_str):
        """
        Parse message content. Check if email address is well formed, if DKIM
        signature is valid, and prevent service flooding. Finally, look for
        commands to process the request. Current commands are:

            - links: request links for download.
            - help: help request.

        :param msg_str (str): incomming message as string.

        :return dict with email address and command (`links` or `help`).
        """

75
        platforms = self.settings.get("platforms")
Hiro's avatar
Hiro committed
76
        languages = self.settings.get("languages")
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
        log.msg("Building email message from string.", system="email parser")
        msg = message_from_string(msg_str)

        # Normalization will convert <Alice Wonderland> alice@wonderland.net
        # into alice@wonderland.net
        name, norm_addr = parseaddr(msg['From'])
        to_name, norm_to_addr = parseaddr(msg['To'])
        log.msg(
            "Normalizing and validating FROM email address.",
            system="email parser"
        )

        # Validate_email will do a bunch of regexp to see if the email address
        # is well address. Additional options for validate_email are check_mx
        # and verify, which check if the SMTP host and email address exist.
        # See validate_email package for more info.
        if norm_addr and validate_email.validate_email(norm_addr):
            log.msg(
                "Email address normalized and validated.",
                system="email parser"
            )
        else:
            log.err(
                "Error normalizing/validating email address.",
                system="email parser"
            )
            raise AddressError("Invalid email address {}".format(msg['From']))

105
        hid = hashlib.sha256(norm_addr.encode('utf-8'))
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
        log.msg(
            "Request from {}".format(hid.hexdigest()), system="email parser"
        )

        if self.to_addr:
            if self.to_addr != norm_to_addr:
                log.msg("Got request for a different instance of gettor")
                log.msg("Intended recipient: {}".format(norm_to_addr))
                return {}

        # DKIM verification. Simply check that the server has verified the
        # message's signature
        if self.dkim:
            log.msg("Checking DKIM signature.", system="email parser")
            # Note: msg.as_string() changes the message to conver it to
            # string, so DKIM will fail. Use the original string instead
            if dkim.verify(msg_str):
                log.msg("Valid DKIM signature.", system="email parser")
            else:
                log.msg("Invalid DKIM signature.", system="email parser")
                username, domain = norm_addr.split("@")
                raise DkimError(
                    "DKIM failed for {} at {}".format(
                        hid.hexdigest(), domain
                    )
                )

        # Search for commands keywords
        subject_re = re.compile(r"Subject: (.*)\r\n")
        subject = subject_re.search(msg_str)

        request = {
            "id": norm_addr,
139
140
            "command": None,
            "platform": None,
Hiro's avatar
Hiro committed
141
            "language": None,
142
143
144
145
146
147
            "service": "email"
        }

        if subject:
            subject = subject.group(1)
            for word in re.split(r"\s+", subject.strip()):
Hiro's avatar
Hiro committed
148
149
                if word.lower() in languages:
                    request["language"] = word.lower()
150
                if word.lower() in platforms:
151
152
153
154
155
156
                    request["command"] = "links"
                    request["platform"] = word.lower()
                if word.lower() == "help":
                    request["command"] = "help"
                    break

Hiro's avatar
Hiro committed
157
        if not request["command"] and not request["language"]:
158
            for word in re.split(r"\s+", msg_str.strip()):
Hiro's avatar
Hiro committed
159
160
                if word.lower() in languages:
                    request["language"] = word.lower()
161
                if word.lower() in platforms:
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
                    request["command"] = "links"
                    request["platform"] = word.lower()
                if word.lower() == "help":
                    request["command"] = "help"
                    break

        return request

    @defer.inlineCallbacks
    def parse_callback(self, request):
        """
        Callback invoked when the message has been parsed. It stores the
        obtained information in the database for further processing by the
        Sendmail service.

        :param (dict) request: the built request based on message's content.
        It contains the `email_addr` and command `fields`.

        :return: deferred whose callback/errback will log database query
        execution details.
        """
183
        email_requests_limit = self.settings.get("email_requests_limit")
184
185
186
187
188
189
190
        log.msg(
            "Found request for {}.".format(request['command']),
            system="email parser"
        )

        if request["command"]:
            now_str = datetime.now().strftime("%Y%m%d%H%M%S")
191
192
            dbname = self.settings.get("dbname")
            conn = SQLite3(dbname)
193

194
            hid = hashlib.sha256(request['id'].encode('utf-8'))
195
196
197
198
199
            # check limits first
            num_requests = yield conn.get_num_requests(
                id=hid.hexdigest(), service=request['service']
            )

200
            if num_requests[0][0] > email_requests_limit:
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
                log.msg(
                    "Discarded. Too many requests from {}.".format(
                        hid.hexdigest
                    ), system="email parser"
            )

            else:
                conn.new_request(
                    id=request['id'],
                    command=request['command'],
                    platform=request['platform'],
                    service=request['service'],
                    date=now_str,
                    status="ONHOLD",
                )

    def parse_errback(self, error):
        """
        Errback if we don't/can't parse the message's content.
        """
        log.msg(
            "Error while parsing email content: {}.".format(error),
            system="email parser"
        )