generate.py 6.52 KB
Newer Older
1
2
from datetime import datetime

3
from sbws.globals import (fail_hard, is_initted)
juga's avatar
juga committed
4
from sbws.lib.v3bwfile import V3BwHeader
5
from sbws.lib.resultdump import ResultSuccess
6
from sbws.lib.resultdump import load_recent_results_in_datadir
7
from sbws.util.filelock import FileLock
8
9
from argparse import ArgumentDefaultsHelpFormatter
from statistics import median
10
import os
11
12
13
import logging

log = logging.getLogger(__name__)
14
15


Matt Traudt's avatar
Matt Traudt committed
16
class V3BWLine:
17
    def __init__(self, fp, bw, nick, rtts, last_time):
Matt Traudt's avatar
Matt Traudt committed
18
19
        self.fp = fp
        self.nick = nick
20
21
        # convert to KiB and make sure the answer is at least 1
        self.bw = max(round(bw / 1024), 1)
Matt Traudt's avatar
Matt Traudt committed
22
        # convert to ms
23
24
        rtts = [round(r * 1000) for r in rtts]
        self.rtt = round(median(rtts))
25
        self.time = last_time
Matt Traudt's avatar
Matt Traudt committed
26
27

    def __str__(self):
28
        frmt = 'node_id=${fp} bw={sp} nick={n} rtt={rtt} time={t}'
29
30
        return frmt.format(fp=self.fp, sp=self.bw, n=self.nick, rtt=self.rtt,
                           t=self.time)
Matt Traudt's avatar
Matt Traudt committed
31
32


33
34
35
def result_data_to_v3bw_line(data, fingerprint):
    assert fingerprint in data
    results = data[fingerprint]
36
37
38
39
    for res in results:
        assert isinstance(res, ResultSuccess)
    results = data[fingerprint]
    nick = results[0].nickname
40
    speeds = [dl['amount'] / dl['duration']
41
              for r in results for dl in r.downloads]
42
    speed = median(speeds)
43
    rtts = [rtt for r in results for rtt in r.rtts]
44
45
    last_time = round(max([r.time for r in results]))
    return V3BWLine(fingerprint, speed, nick, rtts, last_time)
46
47


48
49
50
def warn_if_not_accurate_enough(lines, constant):
    margin = 0.001
    accuracy_ratio = (sum([l.bw for l in lines]) / len(lines)) / constant
51
52
    log.info('The generated lines are within {:.5}% of what they should '
             'be'.format((1-accuracy_ratio)*100))
53
    if accuracy_ratio < 1 - margin or accuracy_ratio > 1 + margin:
Matt Traudt's avatar
Matt Traudt committed
54
55
        log.warning('There was %f%% error and only +/- %f%% is '
                    'allowed', (1-accuracy_ratio)*100, margin*100)
56
57


58
def scale_lines(args, v3bw_lines):
Matt Traudt's avatar
Matt Traudt committed
59
    assert len(v3bw_lines) > 0
Matt Traudt's avatar
Matt Traudt committed
60
    total = sum([l.bw for l in v3bw_lines])
61
62
    # In case total is zero, it will run on ZeroDivision
    assert total > 0
63
    if args.scale:
64
65
66
        scale = len(v3bw_lines) * args.scale_constant
    else:
        scale = total
67
    ratio = scale / total
Matt Traudt's avatar
Matt Traudt committed
68
    for line in v3bw_lines:
69
        line.bw = round(line.bw * ratio)
70
71
    if args.scale:
        warn_if_not_accurate_enough(v3bw_lines, args.scale_constant)
Matt Traudt's avatar
Matt Traudt committed
72
73
74
    return v3bw_lines


75
def gen_parser(sub):
76
77
78
79
    d = 'Generate a v3bw file based on recent results. A v3bw file is the '\
        'file Tor directory authorities want to read and base their '\
        'bandwidth votes on.'
    p = sub.add_parser('generate', description=d,
80
                       formatter_class=ArgumentDefaultsHelpFormatter)
81
82
83
    p.add_argument('--output', default=None, type=str,
                   help='If specified, write the v3bw here instead of what is'
                   'specified in the configuration')
84
85
86
87
    # The reason for --scale-constant defaulting to 7500 is because at one
    # time, torflow happened to generate output that averaged to 7500 bw units
    # per relay. We wanted the ability to try to be like torflow. See
    # https://lists.torproject.org/pipermail/tor-dev/2018-March/013049.html
88
89
90
    p.add_argument('--scale-constant', default=7500, type=int,
                   help='When scaling bw weights, scale them using this const '
                   'multiplied by the number of measured relays')
91
92
93
94
95
    p.add_argument('--scale', action='store_true',
                   help='If specified, do not use bandwidth values as they '
                   'are, but scale them such that we have a budget of '
                   'scale_constant * num_measured_relays = bandwidth to give '
                   'out, and we do so proportionally')
96
97


Matt Traudt's avatar
Matt Traudt committed
98
def log_stats(data_lines):
Matt Traudt's avatar
Matt Traudt committed
99
    assert len(data_lines) > 0
Matt Traudt's avatar
Matt Traudt committed
100
    total_bw = sum([l.bw for l in data_lines])
Matt Traudt's avatar
Matt Traudt committed
101
    bw_per_line = total_bw / len(data_lines)
Matt Traudt's avatar
Matt Traudt committed
102
    log.info('Mean bandwidth per line: %f "KiB"', bw_per_line)
Matt Traudt's avatar
Matt Traudt committed
103
104


juga's avatar
juga committed
105
def read_started_ts(conf):
106
107
    """Read ISO formated timestamp which represents the date and time
    when scanner started.
juga's avatar
juga committed
108
109

    :param ConfigParser conf: configuration
110
    :returns: str, ISO formated timestamp
juga's avatar
juga committed
111
    """
112
    filepath = conf['paths']['started_filepath']
juga's avatar
juga committed
113
114
115
116
117
118
119
    try:
        with FileLock(filepath):
            with open(filepath, 'r') as fd:
                generator_started = fd.read()
    except FileNotFoundError as e:
        log.warn('File %s not found.%s', filepath, e)
        return ''
120
    return generator_started
juga's avatar
juga committed
121
122


123
def main(args, conf):
124
    if not is_initted(args.directory):
125
        fail_hard('Sbws isn\'t initialized.  Try sbws init')
126
127
128

    datadir = conf['paths']['datadir']
    if not os.path.isdir(datadir):
129
        fail_hard('%s does not exist', datadir)
130
    if args.scale_constant < 1:
131
        fail_hard('--scale-constant must be positive')
132

133
134
    fresh_days = conf.getint('general', 'data_period')
    results = load_recent_results_in_datadir(
135
        fresh_days, datadir, success_only=True)
136
    if results:
137
        # Using naive datetime object without timezone, assumed utc
138
139
140
        timestamp = datetime.utcfromtimestamp(max([r.time for fp in results
                                                   for r in results[fp]]))
        lastest_bandwidth = timestamp.replace(microsecond=0).isoformat()
141
        earliest_bandwidth = datetime.utcfromtimestamp(
juga's avatar
juga committed
142
143
                                min([r.time for fp in results
                                     for r in results[fp]])) \
144
            .replace(microsecond=0).isoformat()
Matt Traudt's avatar
Matt Traudt committed
145
    if len(results) < 1:
146
        log.warning('No recent results, so not generating anything. (Have you '
Matt Traudt's avatar
Matt Traudt committed
147
                    'ran sbws scanner recently?)')
Matt Traudt's avatar
Matt Traudt committed
148
        return
149
    data_lines = [result_data_to_v3bw_line(results, fp) for fp in results]
Matt Traudt's avatar
Matt Traudt committed
150
    data_lines = sorted(data_lines, key=lambda d: d.bw, reverse=True)
151
    data_lines = scale_lines(args, data_lines)
152
    generator_started = read_started_ts(conf)
153
    if results:
154
155
156
        header = V3BwHeader(timestamp=timestamp,
                            lastest_bandwidth=lastest_bandwidth,
                            earliest_bandwidth=earliest_bandwidth,
157
                            generator_started=generator_started)
158
    else:
159
        header = V3BwHeader(generator_started=generator_started)
Matt Traudt's avatar
Matt Traudt committed
160
    log_stats(data_lines)
161
162
163
164
165
    output = conf['paths']['v3bw_fname']
    if args.output:
        output = args.output
    log.info('Writing v3bw file to %s', output)
    with open(output, 'wt') as fd:
juga's avatar
juga committed
166
        fd.write(str(header))
Matt Traudt's avatar
Matt Traudt committed
167
168
        for line in data_lines:
            fd.write('{}\n'.format(str(line)))