generate.py 4.92 KB
Newer Older
1
from sbws import version
2
from sbws.globals import (fail_hard, is_initted, time_now)
3
from sbws.lib.resultdump import ResultSuccess
4
5
from sbws.lib.resultdump import load_recent_results_in_datadir
from sbws.lib.resultdump import group_results_by_relay
6
7
from argparse import ArgumentDefaultsHelpFormatter
from statistics import median
8
import os
9
10
11
import logging

log = logging.getLogger(__name__)
12
13


Matt Traudt's avatar
Matt Traudt committed
14
class V3BWLine:
15
    def __init__(self, fp, bw, nick, rtts, last_time):
Matt Traudt's avatar
Matt Traudt committed
16
        self.fp = fp
17
        self.bw = max(round(bw), 1)
Matt Traudt's avatar
Matt Traudt committed
18
        self.nick = nick
Matt Traudt's avatar
Matt Traudt committed
19
        # convert to ms
20
21
        rtts = [round(r * 1000) for r in rtts]
        self.rtt = round(median(rtts))
22
        self.time = last_time
Matt Traudt's avatar
Matt Traudt committed
23
24

    def __str__(self):
25
        frmt = 'node_id=${fp} bw={sp} nick={n} rtt={rtt} time={t}'
26
27
        return frmt.format(fp=self.fp, sp=self.bw, n=self.nick, rtt=self.rtt,
                           t=self.time)
Matt Traudt's avatar
Matt Traudt committed
28
29


30
31
32
def result_data_to_v3bw_line(data, fingerprint):
    assert fingerprint in data
    results = data[fingerprint]
33
34
35
36
    for res in results:
        assert isinstance(res, ResultSuccess)
    results = data[fingerprint]
    nick = results[0].nickname
37
    speeds = [dl['amount'] / dl['duration']
38
              for r in results for dl in r.downloads]
39
    speed = median(speeds)
40
    rtts = [rtt for r in results for rtt in r.rtts]
41
42
    last_time = round(max([r.time for r in results]))
    return V3BWLine(fingerprint, speed, nick, rtts, last_time)
43
44


45
46
47
def warn_if_not_accurate_enough(lines, constant):
    margin = 0.001
    accuracy_ratio = (sum([l.bw for l in lines]) / len(lines)) / constant
48
49
    log.info('The generated lines are within {:.5}% of what they should '
             'be'.format((1-accuracy_ratio)*100))
50
    if accuracy_ratio < 1 - margin or accuracy_ratio > 1 + margin:
Matt Traudt's avatar
Matt Traudt committed
51
52
        log.warning('There was %f%% error and only +/- %f%% is '
                    'allowed', (1-accuracy_ratio)*100, margin*100)
53
54


55
def scale_lines(args, v3bw_lines):
Matt Traudt's avatar
Matt Traudt committed
56
    assert len(v3bw_lines) > 0
Matt Traudt's avatar
Matt Traudt committed
57
    total = sum([l.bw for l in v3bw_lines])
58
59
    # In case total is zero, it will run on ZeroDivision
    assert total > 0
60
    if args.scale:
61
62
63
        scale = len(v3bw_lines) * args.scale_constant
    else:
        scale = total
64
    ratio = scale / total
Matt Traudt's avatar
Matt Traudt committed
65
    for line in v3bw_lines:
66
        line.bw = round(line.bw * ratio)
67
68
    if args.scale:
        warn_if_not_accurate_enough(v3bw_lines, args.scale_constant)
Matt Traudt's avatar
Matt Traudt committed
69
70
71
    return v3bw_lines


72
def gen_parser(sub):
73
74
75
76
    d = 'Generate a v3bw file based on recent results. A v3bw file is the '\
        'file Tor directory authorities want to read and base their '\
        'bandwidth votes on.'
    p = sub.add_parser('generate', description=d,
77
78
79
                       formatter_class=ArgumentDefaultsHelpFormatter)
    p.add_argument('--output', default='/dev/stdout', type=str,
                   help='Where to write v3bw file')
80
81
82
83
    # The reason for --scale-constant defaulting to 7500 is because at one
    # time, torflow happened to generate output that averaged to 7500 bw units
    # per relay. We wanted the ability to try to be like torflow. See
    # https://lists.torproject.org/pipermail/tor-dev/2018-March/013049.html
84
85
86
    p.add_argument('--scale-constant', default=7500, type=int,
                   help='When scaling bw weights, scale them using this const '
                   'multiplied by the number of measured relays')
87
88
89
90
91
    p.add_argument('--scale', action='store_true',
                   help='If specified, do not use bandwidth values as they '
                   'are, but scale them such that we have a budget of '
                   'scale_constant * num_measured_relays = bandwidth to give '
                   'out, and we do so proportionally')
92
93


Matt Traudt's avatar
Matt Traudt committed
94
def log_stats(data_lines):
Matt Traudt's avatar
Matt Traudt committed
95
    assert len(data_lines) > 0
Matt Traudt's avatar
Matt Traudt committed
96
97
98
99
100
    total_bw = sum([l.bw for l in data_lines])
    bw_per_line = total_bw / len(data_lines) / 1024
    log.info('Mean bandwidth per line: {:.2f} "KiB"'.format(bw_per_line))


101
def main(args, conf):
102
    if not is_initted(args.directory):
103
        fail_hard('Sbws isn\'t initialized.  Try sbws init')
104
105
106

    datadir = conf['paths']['datadir']
    if not os.path.isdir(datadir):
107
        fail_hard('%s does not exist', datadir)
108
    if args.scale_constant < 1:
109
        fail_hard('--scale-constant must be positive')
110

111
112
    fresh_days = conf.getint('general', 'data_period')
    results = load_recent_results_in_datadir(
113
        fresh_days, datadir, success_only=True)
Matt Traudt's avatar
Matt Traudt committed
114
    if len(results) < 1:
115
116
        log.warning('No recent results, so not generating anything. (Have you '
                    'ran sbws client recently?)')
Matt Traudt's avatar
Matt Traudt committed
117
        return
118
    data = group_results_by_relay(results)
Matt Traudt's avatar
Matt Traudt committed
119
120
    data_lines = [result_data_to_v3bw_line(data, fp) for fp in data]
    data_lines = sorted(data_lines, key=lambda d: d.bw, reverse=True)
121
    data_lines = scale_lines(args, data_lines)
Matt Traudt's avatar
Matt Traudt committed
122
    log_stats(data_lines)
123
    with open(args.output, 'wt') as fd:
124
        fd.write('{}\n'.format(int(time_now())))
125
        fd.write('version={}\n'.format(version))
Matt Traudt's avatar
Matt Traudt committed
126
127
        for line in data_lines:
            fd.write('{}\n'.format(str(line)))