Verified Commit 279874f4 authored by Georg Koppen's avatar Georg Koppen
Browse files

Merge remote-tracking branch 'gitlab/merge-requests/71' into maint-1.1

parents cde89500 fc3d3b99
Pipeline #3914 passed with stage
in 88 minutes and 50 seconds
......@@ -203,6 +203,18 @@ From Torflow's `README.spec.txt`_ (section 1.6)::
In the code, `SQLSupport.py`_, ``strm_bw`` is ``sbw`` and
``filt_bw`` is ``filt_sbws``::
for s in rs.router.streams:
if isinstance(s, ClosedStream):
tot_bytes += s.tot_bytes()
tot_duration += s.end_time - s.start_time
tot_bw += s.bandwidth()
s_cnt += 1
# FIXME: Hrmm.. do we want to do weighted avg or pure avg here?
# If files are all the same size, it shouldn't matter..
if s_cnt > 0:
rs.sbw = tot_bw/s_cnt
else: rs.sbw = None
for rs in RouterStats.query.filter(stats_clause).\
options(eagerload_all('router.streams.circuit.routers')).all():
tot_sbw = 0
......@@ -224,6 +236,19 @@ In the code, `SQLSupport.py`_, ``strm_bw`` is ``sbw`` and
if sbw_cnt: rs.filt_sbw = tot_sbw/sbw_cnt
else: rs.filt_sbw = None
When it is written to the file, it seem to write "None" string when
``filt_sbw`` or ``strm_bw`` are None. That would give an exception when
calculating the network average. So it never happen?::
def cvt(a,b,c=1):
if type(a) == float: return int(round(a/c,b))
elif type(a) == int: return a
elif type(a) == type(None): return "None"
else: return type(a)
f.write(" strm_bw="+str(cvt(s.sbw,0)))
f.write(" filt_bw="+str(cvt(s.filt_sbw,0)))
This is also expressed in pseudocode in the `bandwidth file spec`_, section B.4
step 1.
......
......@@ -187,6 +187,14 @@ MAX_RECENT_PRIORITY_RELAY_COUNT = (
MAX_RECENT_PRIORITY_LIST_COUNT * MAX_RELAYS_PER_PRIORITY_LIST
)
# Used by util/stem.py
G = 0
M = 1
E = 2
GE = 3
# Used by lib/scaling.py to calculate network means by relay type
RELAY_TYPES = [G, M, E, GE]
def fail_hard(*a, **kw):
''' Log something ... and then exit as fast as possible '''
......
from statistics import mean
from sbws.globals import RELAY_TYPES
from sbws.util.stem import rs_relay_type
def bw_measurements_from_results(results):
return [
......@@ -14,10 +17,37 @@ def bw_filt(bw_measurements):
It is the equivalent to Torflow's ``filt_sbw``.
``mu`` in this function is the equivalent to Torflow's ``sbw``.
"""
mu = 1
if bw_measurements:
mu = mean(bw_measurements)
# It's safe to return 0 here, because:
# 1. this value will be the numerator when calculating the ratio.
# 2. `kb_round_x_sig_dig` returns a minimum of 1.
# This should never be the case, as the measurements come from successful
# results.
if not bw_measurements:
return 0
# Torflow is rounding to an integer, so is `bw_mean_from_results` in
# `v3bwfile.py`
mu = round(mean(bw_measurements))
bws_gte_mean = list(filter(lambda bw: bw >= mu, bw_measurements))
if bws_gte_mean:
return mean(bws_gte_mean)
return 1
return round(mean(bws_gte_mean))
return mu
def network_means_by_relay_type(bw_lines, router_statuses_d):
# Temporarily assign the type of relay to calculate network stream and
# filtered bandwidth by type
for line in bw_lines:
rs = None
if router_statuses_d:
rs = router_statuses_d.get(line.node_id.replace("$", ""), None)
line.set_relay_type(rs_relay_type(rs))
mu_type = muf_type = {}
for rt in RELAY_TYPES:
bw_lines_type = [line for line in bw_lines if line.relay_type == rt]
if len(bw_lines_type) > 0:
# Torflow does not round these values.
# Ensure they won't be 0 to avoid division by 0 Exception
mu_type[rt] = mean([line.bw_mean for line in bw_lines_type]) or 1
muf_type[rt] = mean([line.bw_filt for line in bw_lines_type]) or 1
return mu_type, muf_type
......@@ -875,9 +875,12 @@ class V3BWLine(object):
def bw_mean_from_results(results):
bws = [dl['amount'] / dl['duration']
for r in results for dl in r.downloads]
# It's safe to return 0 here, because:
# 1. this value will be the numerator when calculating the ratio.
# 2. `kb_round_x_sig_dig` returns a minimum of 1.
if bws:
return max(round(mean(bws)), 1)
return 1
return round(mean(bws))
return 0
@staticmethod
def last_time_from_results(results):
......@@ -982,6 +985,11 @@ class V3BWLine(object):
len(bw_line_str), BW_LINE_SIZE)
return bw_line_str
def set_relay_type(self, relay_type):
self.relay_type = relay_type
def del_relay_type(self):
delattr(self, "relay_type")
class V3BWFile(object):
"""
......@@ -1223,12 +1231,11 @@ class V3BWFile(object):
"""
log.info("Calculating relays' bandwidth using Torflow method.")
bw_lines_tf = copy.deepcopy(bw_lines)
# mean (Torflow's strm_avg)
mu = mean([l.bw_mean for l in bw_lines])
# filtered mean (Torflow's filt_avg)
muf = mean([l.bw_filt for l in bw_lines])
log.debug('mu %s', mu)
log.debug('muf %s', muf)
mu_type, muf_type = scaling.network_means_by_relay_type(
bw_lines_tf, router_statuses_d
)
log.debug('mu %s', mu_type)
log.debug('muf %s', muf_type)
# Torflow's ``tot_net_bw``, sum of the scaled bandwidth for the relays
# that are in the last consensus
......@@ -1289,10 +1296,12 @@ class V3BWFile(object):
continue
# Torflow's scaling
ratio_stream = l.bw_mean / mu
ratio_stream_filtered = l.bw_filt / muf
# relay_type is set in `network_means_by_relay_type` in the lines
# above
ratio_stream = l.bw_mean / mu_type[l.relay_type]
ratio_stream_filtered = l.bw_filt / muf_type[l.relay_type]
l.del_relay_type()
ratio = max(ratio_stream, ratio_stream_filtered)
# Assign it to an attribute, so it's not lost before capping and
# rounding
l.bw = ratio * min_bandwidth
......
......@@ -12,9 +12,11 @@ import logging
import os
from sbws.globals import fail_hard
from sbws.globals import (TORRC_STARTING_POINT, TORRC_RUNTIME_OPTIONS,
TORRC_OPTIONS_CAN_FAIL)
TORRC_OPTIONS_CAN_FAIL, G, M, E, GE)
from sbws import settings
from stem import Flag
log = logging.getLogger(__name__)
stream_building_lock = RLock()
......@@ -325,3 +327,21 @@ def is_torrc_starting_point_set(tor_controller):
if not bad_options:
log.info("Tor is correctly configured to work with sbws.")
return bad_options
def rs_relay_type(rs):
# In torflow, the equivalent to the bw_lines is initialized to "", so when
# the relay is not in the previous consensus and it is not known which
# flags it has, it would return "Medium", as it's the fail case in
# Node.node_class().
# It is not known if it is a bug, or a desired side effect that they relays
# not in the consensus will end up in the Middle class
if not rs:
return M
if Flag.EXIT in rs.flags and Flag.GUARD in rs.flags:
return GE
if Flag.GUARD in rs.flags:
return G
if Flag.EXIT in rs.flags:
return E
return M
"""Unit tests for scaling.py."""
import os
from statistics import mean
from sbws.lib import scaling
from sbws.lib.resultdump import load_result_file, ResultSuccess
def test_bw_filt():
......@@ -10,4 +13,57 @@ def test_bw_filt():
]
fb = scaling.bw_filt(bw_measurements)
# This is greater than the mean, that is 61422.73714139576
assert fb == 83505.81986994506
assert fb == 83506
# When there are no measurements what can not be the case for successful
# results.
bw_measurements = []
assert 0 == scaling.bw_filt(bw_measurements)
bw_measurements = [1, 0]
# Because rounded to int
assert 0 == round(mean(bw_measurements))
# So the filtered bw will be also 0
assert 0 == scaling.bw_filt(bw_measurements)
bw_measurements = [1, 2, 3]
# Because rounded to int
assert 2 == round(mean(bw_measurements))
assert 2 == scaling.bw_filt(bw_measurements)
bw_measurements = [10, 0]
assert 5 == round(mean(bw_measurements))
# Because the value 10 is bigger than the mean
assert 10 == scaling.bw_filt(bw_measurements)
bw_measurements = [0, 10, 20]
assert 10 == round(mean(bw_measurements))
# Because 10 and 20 are bigger or equal than the mean
assert 15 == scaling.bw_filt(bw_measurements)
def test_bw_filt_from_results(root_data_path):
results_file = os.path.join(
root_data_path, ".sbws", "datadir", "2019-03-25.txt"
)
results = load_result_file(results_file)
bw_filts = {}
for fp, values in results.items():
success_results = [r for r in values if isinstance(r, ResultSuccess)]
if success_results:
bw_measurements = scaling.bw_measurements_from_results(
success_results
)
mu = round(mean(bw_measurements))
muf = scaling.bw_filt(bw_measurements)
bw_filts[fp] = (mu, muf)
for fp, values in bw_filts.items():
assert bw_filts[fp][0] <= bw_filts[fp][1]
assert 5526756 == bw_filts['117A456C911114076BEB4E757AC48B16CC0CCC5F'][0]
assert 5643086 == bw_filts['117A456C911114076BEB4E757AC48B16CC0CCC5F'][1]
assert 5664965 == bw_filts['693F73187624BE760AAD2A12C5ED89DB1DE044F5'][0]
assert 5774274 == bw_filts['693F73187624BE760AAD2A12C5ED89DB1DE044F5'][1]
assert 5508279 == bw_filts['270A861ABED22EC2B625198BCCD7B2B9DBFFC93C'][0]
assert 5583737 == bw_filts['270A861ABED22EC2B625198BCCD7B2B9DBFFC93C'][1]
assert 5379911 == bw_filts['E894C65997F8EC96558B554176EEEA39C6A43EF6'][0]
assert 5485088 == bw_filts['E894C65997F8EC96558B554176EEEA39C6A43EF6'][1]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment