generate.py 5.02 KB
Newer Older
1
from sbws import version
2
from sbws.globals import (fail_hard, is_initted, time_now)
juga's avatar
juga committed
3
from sbws.lib.v3bwfile import V3BwHeader
4
from sbws.lib.resultdump import ResultSuccess
5
6
from sbws.lib.resultdump import load_recent_results_in_datadir
from sbws.lib.resultdump import group_results_by_relay
7
8
from argparse import ArgumentDefaultsHelpFormatter
from statistics import median
9
import os
10
11
12
import logging

log = logging.getLogger(__name__)
13
14


Matt Traudt's avatar
Matt Traudt committed
15
class V3BWLine:
16
    def __init__(self, fp, bw, nick, rtts, last_time):
Matt Traudt's avatar
Matt Traudt committed
17
18
        self.fp = fp
        self.nick = nick
19
20
        # convert to KiB and make sure the answer is at least 1
        self.bw = max(round(bw / 1024), 1)
Matt Traudt's avatar
Matt Traudt committed
21
        # convert to ms
22
23
        rtts = [round(r * 1000) for r in rtts]
        self.rtt = round(median(rtts))
24
        self.time = last_time
Matt Traudt's avatar
Matt Traudt committed
25
26

    def __str__(self):
27
        frmt = 'node_id=${fp} bw={sp} nick={n} rtt={rtt} time={t}'
28
29
        return frmt.format(fp=self.fp, sp=self.bw, n=self.nick, rtt=self.rtt,
                           t=self.time)
Matt Traudt's avatar
Matt Traudt committed
30
31


32
33
34
def result_data_to_v3bw_line(data, fingerprint):
    assert fingerprint in data
    results = data[fingerprint]
35
36
37
38
    for res in results:
        assert isinstance(res, ResultSuccess)
    results = data[fingerprint]
    nick = results[0].nickname
39
    speeds = [dl['amount'] / dl['duration']
40
              for r in results for dl in r.downloads]
41
    speed = median(speeds)
42
    rtts = [rtt for r in results for rtt in r.rtts]
43
44
    last_time = round(max([r.time for r in results]))
    return V3BWLine(fingerprint, speed, nick, rtts, last_time)
45
46


47
48
49
def warn_if_not_accurate_enough(lines, constant):
    margin = 0.001
    accuracy_ratio = (sum([l.bw for l in lines]) / len(lines)) / constant
50
51
    log.info('The generated lines are within {:.5}% of what they should '
             'be'.format((1-accuracy_ratio)*100))
52
    if accuracy_ratio < 1 - margin or accuracy_ratio > 1 + margin:
Matt Traudt's avatar
Matt Traudt committed
53
54
        log.warning('There was %f%% error and only +/- %f%% is '
                    'allowed', (1-accuracy_ratio)*100, margin*100)
55
56


57
def scale_lines(args, v3bw_lines):
Matt Traudt's avatar
Matt Traudt committed
58
    assert len(v3bw_lines) > 0
Matt Traudt's avatar
Matt Traudt committed
59
    total = sum([l.bw for l in v3bw_lines])
60
61
    # In case total is zero, it will run on ZeroDivision
    assert total > 0
62
    if args.scale:
63
64
65
        scale = len(v3bw_lines) * args.scale_constant
    else:
        scale = total
66
    ratio = scale / total
Matt Traudt's avatar
Matt Traudt committed
67
    for line in v3bw_lines:
68
        line.bw = round(line.bw * ratio)
69
70
    if args.scale:
        warn_if_not_accurate_enough(v3bw_lines, args.scale_constant)
Matt Traudt's avatar
Matt Traudt committed
71
72
73
    return v3bw_lines


74
def gen_parser(sub):
75
76
77
78
    d = 'Generate a v3bw file based on recent results. A v3bw file is the '\
        'file Tor directory authorities want to read and base their '\
        'bandwidth votes on.'
    p = sub.add_parser('generate', description=d,
79
80
81
                       formatter_class=ArgumentDefaultsHelpFormatter)
    p.add_argument('--output', default='/dev/stdout', type=str,
                   help='Where to write v3bw file')
82
83
84
85
    # The reason for --scale-constant defaulting to 7500 is because at one
    # time, torflow happened to generate output that averaged to 7500 bw units
    # per relay. We wanted the ability to try to be like torflow. See
    # https://lists.torproject.org/pipermail/tor-dev/2018-March/013049.html
86
87
88
    p.add_argument('--scale-constant', default=7500, type=int,
                   help='When scaling bw weights, scale them using this const '
                   'multiplied by the number of measured relays')
89
90
91
92
93
    p.add_argument('--scale', action='store_true',
                   help='If specified, do not use bandwidth values as they '
                   'are, but scale them such that we have a budget of '
                   'scale_constant * num_measured_relays = bandwidth to give '
                   'out, and we do so proportionally')
94
95


Matt Traudt's avatar
Matt Traudt committed
96
def log_stats(data_lines):
Matt Traudt's avatar
Matt Traudt committed
97
    assert len(data_lines) > 0
Matt Traudt's avatar
Matt Traudt committed
98
    total_bw = sum([l.bw for l in data_lines])
Matt Traudt's avatar
Matt Traudt committed
99
    bw_per_line = total_bw / len(data_lines)
Matt Traudt's avatar
Matt Traudt committed
100
    log.info('Mean bandwidth per line: %f "KiB"', bw_per_line)
Matt Traudt's avatar
Matt Traudt committed
101
102


103
def main(args, conf):
104
    if not is_initted(args.directory):
105
        fail_hard('Sbws isn\'t initialized.  Try sbws init')
106
107
108

    datadir = conf['paths']['datadir']
    if not os.path.isdir(datadir):
109
        fail_hard('%s does not exist', datadir)
110
    if args.scale_constant < 1:
111
        fail_hard('--scale-constant must be positive')
112

113
114
    fresh_days = conf.getint('general', 'data_period')
    results = load_recent_results_in_datadir(
115
        fresh_days, datadir, success_only=True)
Matt Traudt's avatar
Matt Traudt committed
116
    if len(results) < 1:
117
        log.warning('No recent results, so not generating anything. (Have you '
Matt Traudt's avatar
Matt Traudt committed
118
                    'ran sbws scanner recently?)')
Matt Traudt's avatar
Matt Traudt committed
119
        return
120
    data = group_results_by_relay(results)
Matt Traudt's avatar
Matt Traudt committed
121
122
    data_lines = [result_data_to_v3bw_line(data, fp) for fp in data]
    data_lines = sorted(data_lines, key=lambda d: d.bw, reverse=True)
123
    data_lines = scale_lines(args, data_lines)
juga's avatar
juga committed
124
    header = V3BwHeader()
Matt Traudt's avatar
Matt Traudt committed
125
    log_stats(data_lines)
126
    log.info('Writing v3bw file to %s', args.output)
127
    with open(args.output, 'wt') as fd:
juga's avatar
juga committed
128
        fd.write(header)
Matt Traudt's avatar
Matt Traudt committed
129
130
        for line in data_lines:
            fd.write('{}\n'.format(str(line)))