twitter.py 4.54 KB
Newer Older
Hiro's avatar
Hiro committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# -*- coding: utf-8 -*-
#
# This file is part of GetTor, a Tor Browser distribution system.
#
# :authors: isra <hiro@torproject.org>
#           see also AUTHORS file
#
# :copyright:   (c) 2008-2014, The Tor Project, Inc.
#               (c) 2019, Hiro
#
# :license: This is Free Software. See LICENSE for license information.

from __future__ import absolute_import

import re
import dkim
import hashlib

from datetime import datetime
import configparser

from twisted.python import log
from twisted.internet import defer
from twisted.enterprise import adbapi

from ..utils.db import SQLite3
from ..utils import strings


class TwitterParser(object):
    """Class for parsing twitter message requests."""

    def __init__(self, settings, twitter_id=None):
        """
        Constructor.
        """
        self.settings = settings
        self.twitter_id = twitter_id


    def build_request(self, msg_text, twitter_id, languages, platforms):

        request = {
            "id": twitter_id,
            "command": None,
            "platform": None,
            "language": "en",
            "service": "twitter"
        }

        if msg_text:
            for word in re.split(r"\s+", msg_text.strip()):
                if word.lower() in languages:
                    request["language"] = word.lower()
                if word.lower() in platforms:
                    request["command"] = "links"
                    request["platform"] = word.lower()
                if word.lower() == "help":
                    request["command"] = "help"
                    break

        return request


    def parse(self, msg, twitter_id):
        """
        Parse message content. Prevent service flooding. Finally, look for
        commands to process the request. Current commands are:

            - links: request links for download.
            - help: help request.

        :param msg_str (str): incomming message as string.

        :return dict with email address and command (`links` or `help`).
        """

        log.msg("Building twitter message from string.", system="twitter parser")

        platforms = self.settings.get("platforms")
        languages = [*strings.get_locales().keys()]

Hiro's avatar
Hiro committed
83
        hid = hashlib.sha256(str(twitter_id).encode('utf-8'))
Hiro's avatar
Hiro committed
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
        log.msg(
            "Request from {}".format(hid.hexdigest()), system="twitter parser"
        )

        request = self.build_request(msg, twitter_id, languages, platforms)

        return request


    @defer.inlineCallbacks
    def parse_callback(self, request):
        """
        Callback invoked when the message has been parsed. It stores the
        obtained information in the database for further processing by the
        Sendmail service.

        :param (dict) request: the built request based on message's content.
        It contains the `email_addr` and command `fields`.

        :return: deferred whose callback/errback will log database query
        execution details.
        """
        twitter_requests_limit = self.settings.get("twitter_requests_limit")
        log.msg(
            "Found request for {}.".format(request['command']),
            system="twitter parser"
        )

        if request["command"]:
            now_str = datetime.now().strftime("%Y%m%d%H%M%S")
            dbname = self.settings.get("dbname")
            conn = SQLite3(dbname)

Hiro's avatar
Hiro committed
117
            hid = hashlib.sha256(str(request['id']).encode('utf-8'))
Hiro's avatar
Hiro committed
118
119
120
121
122
            # check limits first
            num_requests = yield conn.get_num_requests(
                id=hid.hexdigest(), service=request['service']
            )

Hiro's avatar
Hiro committed
123
            num_requests += yield conn.get_num_requests(
Hiro's avatar
Hiro committed
124
                id=str(request['id']), service=request['service']
Hiro's avatar
Hiro committed
125
126
            )

127
128
129
130
131
132
133
            if num_requests[0][0] > twitter_requests_limit:
                log.msg(
                    "Discarded. Too many requests from {}.".format(
                        hid.hexdigest()
                    ), system="twitter parser"
                )
            else:
Hiro's avatar
Hiro committed
134
                conn.new_request(
Hiro's avatar
Hiro committed
135
                    id=str(request['id']),
Hiro's avatar
Hiro committed
136
137
138
139
140
141
142
143
                    command=request['command'],
                    platform=request['platform'],
                    language=request['language'],
                    service=request['service'],
                    date=now_str,
                    status="ONHOLD",
                )

Hiro's avatar
Hiro committed
144

Hiro's avatar
Hiro committed
145
146
147
148
149
150
151
152
    def parse_errback(self, error):
        """
        Errback if we don't/can't parse the message's content.
        """
        log.msg(
            "Error while parsing twitter message content: {}.".format(error),
            system="twitter parser"
        )