generate.py 4.71 KB
Newer Older
1
from sbws.globals import (fail_hard, is_initted)
juga's avatar
juga committed
2
from sbws.lib.v3bwfile import V3BwHeader, V3BWLine
3
from sbws.lib.resultdump import ResultSuccess
4
from sbws.lib.resultdump import load_recent_results_in_datadir
juga's avatar
juga committed
5
from sbws.util.timestamp import unixts_to_isodt_str
6
7
from argparse import ArgumentDefaultsHelpFormatter
from statistics import median
8
import os
9
10
11
import logging

log = logging.getLogger(__name__)
12
13
14
15
16


def result_data_to_v3bw_line(data, fingerprint):
    assert fingerprint in data
    results = data[fingerprint]
17
18
19
20
    for res in results:
        assert isinstance(res, ResultSuccess)
    results = data[fingerprint]
    nick = results[0].nickname
21
    speeds = [dl['amount'] / dl['duration']
22
              for r in results for dl in r.downloads]
23
    speed = median(speeds)
24
    rtts = [rtt for r in results for rtt in r.rtts]
25
26
    last_time = round(max([r.time for r in results]))
    return V3BWLine(fingerprint, speed, nick, rtts, last_time)
27
28


29
30
31
def warn_if_not_accurate_enough(lines, constant):
    margin = 0.001
    accuracy_ratio = (sum([l.bw for l in lines]) / len(lines)) / constant
32
33
    log.info('The generated lines are within {:.5}% of what they should '
             'be'.format((1-accuracy_ratio)*100))
34
    if accuracy_ratio < 1 - margin or accuracy_ratio > 1 + margin:
Matt Traudt's avatar
Matt Traudt committed
35
36
        log.warning('There was %f%% error and only +/- %f%% is '
                    'allowed', (1-accuracy_ratio)*100, margin*100)
37
38


39
def scale_lines(args, v3bw_lines):
Matt Traudt's avatar
Matt Traudt committed
40
    assert len(v3bw_lines) > 0
Matt Traudt's avatar
Matt Traudt committed
41
    total = sum([l.bw for l in v3bw_lines])
42
43
    # In case total is zero, it will run on ZeroDivision
    assert total > 0
44
    if args.scale:
45
46
47
        scale = len(v3bw_lines) * args.scale_constant
    else:
        scale = total
48
    ratio = scale / total
Matt Traudt's avatar
Matt Traudt committed
49
    for line in v3bw_lines:
50
        line.bw = round(line.bw * ratio)
51
52
    if args.scale:
        warn_if_not_accurate_enough(v3bw_lines, args.scale_constant)
Matt Traudt's avatar
Matt Traudt committed
53
54
55
    return v3bw_lines


56
def gen_parser(sub):
57
58
59
60
    d = 'Generate a v3bw file based on recent results. A v3bw file is the '\
        'file Tor directory authorities want to read and base their '\
        'bandwidth votes on.'
    p = sub.add_parser('generate', description=d,
61
                       formatter_class=ArgumentDefaultsHelpFormatter)
62
63
64
    p.add_argument('--output', default=None, type=str,
                   help='If specified, write the v3bw here instead of what is'
                   'specified in the configuration')
65
66
67
68
    # The reason for --scale-constant defaulting to 7500 is because at one
    # time, torflow happened to generate output that averaged to 7500 bw units
    # per relay. We wanted the ability to try to be like torflow. See
    # https://lists.torproject.org/pipermail/tor-dev/2018-March/013049.html
69
70
71
    p.add_argument('--scale-constant', default=7500, type=int,
                   help='When scaling bw weights, scale them using this const '
                   'multiplied by the number of measured relays')
72
73
74
75
76
    p.add_argument('--scale', action='store_true',
                   help='If specified, do not use bandwidth values as they '
                   'are, but scale them such that we have a budget of '
                   'scale_constant * num_measured_relays = bandwidth to give '
                   'out, and we do so proportionally')
77
78


Matt Traudt's avatar
Matt Traudt committed
79
def log_stats(data_lines):
Matt Traudt's avatar
Matt Traudt committed
80
    assert len(data_lines) > 0
Matt Traudt's avatar
Matt Traudt committed
81
    total_bw = sum([l.bw for l in data_lines])
Matt Traudt's avatar
Matt Traudt committed
82
    bw_per_line = total_bw / len(data_lines)
Matt Traudt's avatar
Matt Traudt committed
83
    log.info('Mean bandwidth per line: %f "KiB"', bw_per_line)
Matt Traudt's avatar
Matt Traudt committed
84
85


86
def main(args, conf):
87
    if not is_initted(args.directory):
88
        fail_hard('Sbws isn\'t initialized.  Try sbws init')
89
90
91

    datadir = conf['paths']['datadir']
    if not os.path.isdir(datadir):
92
        fail_hard('%s does not exist', datadir)
93
    if args.scale_constant < 1:
94
        fail_hard('--scale-constant must be positive')
95

96
97
    fresh_days = conf.getint('general', 'data_period')
    results = load_recent_results_in_datadir(
98
        fresh_days, datadir, success_only=True)
Matt Traudt's avatar
Matt Traudt committed
99
    if len(results) < 1:
100
        log.warning('No recent results, so not generating anything. (Have you '
Matt Traudt's avatar
Matt Traudt committed
101
                    'ran sbws scanner recently?)')
Matt Traudt's avatar
Matt Traudt committed
102
        return
103
104

    # process bandwidth lines
105
    data_lines = [result_data_to_v3bw_line(results, fp) for fp in results]
Matt Traudt's avatar
Matt Traudt committed
106
    data_lines = sorted(data_lines, key=lambda d: d.bw, reverse=True)
107
    data_lines = scale_lines(args, data_lines)
juga's avatar
juga committed
108
    log_stats(data_lines)
109
110
111

    # process header lines
    # FIXME: what to move to V3BwHeader?
juga's avatar
juga committed
112
    header = V3BwHeader.from_results(conf, results)
113
114

    # FIXME: move this to V3BwFile class?
115
116
117
118
119
    output = conf['paths']['v3bw_fname']
    if args.output:
        output = args.output
    log.info('Writing v3bw file to %s', output)
    with open(output, 'wt') as fd:
juga's avatar
juga committed
120
        fd.write(str(header))
Matt Traudt's avatar
Matt Traudt committed
121
122
        for line in data_lines:
            fd.write('{}\n'.format(str(line)))