Unverified Commit 279874f4 authored by Georg Koppen's avatar Georg Koppen
Browse files

Merge remote-tracking branch 'gitlab/merge-requests/71' into maint-1.1

parents cde89500 fc3d3b99
Pipeline #3906 passed with stage
in 75 minutes and 14 seconds
...@@ -203,6 +203,18 @@ From Torflow's `README.spec.txt`_ (section 1.6):: ...@@ -203,6 +203,18 @@ From Torflow's `README.spec.txt`_ (section 1.6)::
In the code, `SQLSupport.py`_, ``strm_bw`` is ``sbw`` and In the code, `SQLSupport.py`_, ``strm_bw`` is ``sbw`` and
``filt_bw`` is ``filt_sbws``:: ``filt_bw`` is ``filt_sbws``::
for s in rs.router.streams:
if isinstance(s, ClosedStream):
tot_bytes += s.tot_bytes()
tot_duration += s.end_time - s.start_time
tot_bw += s.bandwidth()
s_cnt += 1
# FIXME: Hrmm.. do we want to do weighted avg or pure avg here?
# If files are all the same size, it shouldn't matter..
if s_cnt > 0:
rs.sbw = tot_bw/s_cnt
else: rs.sbw = None
for rs in RouterStats.query.filter(stats_clause).\ for rs in RouterStats.query.filter(stats_clause).\
options(eagerload_all('router.streams.circuit.routers')).all(): options(eagerload_all('router.streams.circuit.routers')).all():
tot_sbw = 0 tot_sbw = 0
...@@ -224,6 +236,19 @@ In the code, `SQLSupport.py`_, ``strm_bw`` is ``sbw`` and ...@@ -224,6 +236,19 @@ In the code, `SQLSupport.py`_, ``strm_bw`` is ``sbw`` and
if sbw_cnt: rs.filt_sbw = tot_sbw/sbw_cnt if sbw_cnt: rs.filt_sbw = tot_sbw/sbw_cnt
else: rs.filt_sbw = None else: rs.filt_sbw = None
When it is written to the file, it seem to write "None" string when
``filt_sbw`` or ``strm_bw`` are None. That would give an exception when
calculating the network average. So it never happen?::
def cvt(a,b,c=1):
if type(a) == float: return int(round(a/c,b))
elif type(a) == int: return a
elif type(a) == type(None): return "None"
else: return type(a)
f.write(" strm_bw="+str(cvt(s.sbw,0)))
f.write(" filt_bw="+str(cvt(s.filt_sbw,0)))
This is also expressed in pseudocode in the `bandwidth file spec`_, section B.4 This is also expressed in pseudocode in the `bandwidth file spec`_, section B.4
step 1. step 1.
......
...@@ -187,6 +187,14 @@ MAX_RECENT_PRIORITY_RELAY_COUNT = ( ...@@ -187,6 +187,14 @@ MAX_RECENT_PRIORITY_RELAY_COUNT = (
MAX_RECENT_PRIORITY_LIST_COUNT * MAX_RELAYS_PER_PRIORITY_LIST MAX_RECENT_PRIORITY_LIST_COUNT * MAX_RELAYS_PER_PRIORITY_LIST
) )
# Used by util/stem.py
G = 0
M = 1
E = 2
GE = 3
# Used by lib/scaling.py to calculate network means by relay type
RELAY_TYPES = [G, M, E, GE]
def fail_hard(*a, **kw): def fail_hard(*a, **kw):
''' Log something ... and then exit as fast as possible ''' ''' Log something ... and then exit as fast as possible '''
......
from statistics import mean from statistics import mean
from sbws.globals import RELAY_TYPES
from sbws.util.stem import rs_relay_type
def bw_measurements_from_results(results): def bw_measurements_from_results(results):
return [ return [
...@@ -14,10 +17,37 @@ def bw_filt(bw_measurements): ...@@ -14,10 +17,37 @@ def bw_filt(bw_measurements):
It is the equivalent to Torflow's ``filt_sbw``. It is the equivalent to Torflow's ``filt_sbw``.
``mu`` in this function is the equivalent to Torflow's ``sbw``. ``mu`` in this function is the equivalent to Torflow's ``sbw``.
""" """
mu = 1 # It's safe to return 0 here, because:
if bw_measurements: # 1. this value will be the numerator when calculating the ratio.
mu = mean(bw_measurements) # 2. `kb_round_x_sig_dig` returns a minimum of 1.
# This should never be the case, as the measurements come from successful
# results.
if not bw_measurements:
return 0
# Torflow is rounding to an integer, so is `bw_mean_from_results` in
# `v3bwfile.py`
mu = round(mean(bw_measurements))
bws_gte_mean = list(filter(lambda bw: bw >= mu, bw_measurements)) bws_gte_mean = list(filter(lambda bw: bw >= mu, bw_measurements))
if bws_gte_mean: if bws_gte_mean:
return mean(bws_gte_mean) return round(mean(bws_gte_mean))
return 1 return mu
def network_means_by_relay_type(bw_lines, router_statuses_d):
# Temporarily assign the type of relay to calculate network stream and
# filtered bandwidth by type
for line in bw_lines:
rs = None
if router_statuses_d:
rs = router_statuses_d.get(line.node_id.replace("$", ""), None)
line.set_relay_type(rs_relay_type(rs))
mu_type = muf_type = {}
for rt in RELAY_TYPES:
bw_lines_type = [line for line in bw_lines if line.relay_type == rt]
if len(bw_lines_type) > 0:
# Torflow does not round these values.
# Ensure they won't be 0 to avoid division by 0 Exception
mu_type[rt] = mean([line.bw_mean for line in bw_lines_type]) or 1
muf_type[rt] = mean([line.bw_filt for line in bw_lines_type]) or 1
return mu_type, muf_type
...@@ -875,9 +875,12 @@ class V3BWLine(object): ...@@ -875,9 +875,12 @@ class V3BWLine(object):
def bw_mean_from_results(results): def bw_mean_from_results(results):
bws = [dl['amount'] / dl['duration'] bws = [dl['amount'] / dl['duration']
for r in results for dl in r.downloads] for r in results for dl in r.downloads]
# It's safe to return 0 here, because:
# 1. this value will be the numerator when calculating the ratio.
# 2. `kb_round_x_sig_dig` returns a minimum of 1.
if bws: if bws:
return max(round(mean(bws)), 1) return round(mean(bws))
return 1 return 0
@staticmethod @staticmethod
def last_time_from_results(results): def last_time_from_results(results):
...@@ -982,6 +985,11 @@ class V3BWLine(object): ...@@ -982,6 +985,11 @@ class V3BWLine(object):
len(bw_line_str), BW_LINE_SIZE) len(bw_line_str), BW_LINE_SIZE)
return bw_line_str return bw_line_str
def set_relay_type(self, relay_type):
self.relay_type = relay_type
def del_relay_type(self):
delattr(self, "relay_type")
class V3BWFile(object): class V3BWFile(object):
""" """
...@@ -1223,12 +1231,11 @@ class V3BWFile(object): ...@@ -1223,12 +1231,11 @@ class V3BWFile(object):
""" """
log.info("Calculating relays' bandwidth using Torflow method.") log.info("Calculating relays' bandwidth using Torflow method.")
bw_lines_tf = copy.deepcopy(bw_lines) bw_lines_tf = copy.deepcopy(bw_lines)
# mean (Torflow's strm_avg) mu_type, muf_type = scaling.network_means_by_relay_type(
mu = mean([l.bw_mean for l in bw_lines]) bw_lines_tf, router_statuses_d
# filtered mean (Torflow's filt_avg) )
muf = mean([l.bw_filt for l in bw_lines]) log.debug('mu %s', mu_type)
log.debug('mu %s', mu) log.debug('muf %s', muf_type)
log.debug('muf %s', muf)
# Torflow's ``tot_net_bw``, sum of the scaled bandwidth for the relays # Torflow's ``tot_net_bw``, sum of the scaled bandwidth for the relays
# that are in the last consensus # that are in the last consensus
...@@ -1289,10 +1296,12 @@ class V3BWFile(object): ...@@ -1289,10 +1296,12 @@ class V3BWFile(object):
continue continue
# Torflow's scaling # Torflow's scaling
ratio_stream = l.bw_mean / mu # relay_type is set in `network_means_by_relay_type` in the lines
ratio_stream_filtered = l.bw_filt / muf # above
ratio_stream = l.bw_mean / mu_type[l.relay_type]
ratio_stream_filtered = l.bw_filt / muf_type[l.relay_type]
l.del_relay_type()
ratio = max(ratio_stream, ratio_stream_filtered) ratio = max(ratio_stream, ratio_stream_filtered)
# Assign it to an attribute, so it's not lost before capping and # Assign it to an attribute, so it's not lost before capping and
# rounding # rounding
l.bw = ratio * min_bandwidth l.bw = ratio * min_bandwidth
......
...@@ -12,9 +12,11 @@ import logging ...@@ -12,9 +12,11 @@ import logging
import os import os
from sbws.globals import fail_hard from sbws.globals import fail_hard
from sbws.globals import (TORRC_STARTING_POINT, TORRC_RUNTIME_OPTIONS, from sbws.globals import (TORRC_STARTING_POINT, TORRC_RUNTIME_OPTIONS,
TORRC_OPTIONS_CAN_FAIL) TORRC_OPTIONS_CAN_FAIL, G, M, E, GE)
from sbws import settings from sbws import settings
from stem import Flag
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
stream_building_lock = RLock() stream_building_lock = RLock()
...@@ -325,3 +327,21 @@ def is_torrc_starting_point_set(tor_controller): ...@@ -325,3 +327,21 @@ def is_torrc_starting_point_set(tor_controller):
if not bad_options: if not bad_options:
log.info("Tor is correctly configured to work with sbws.") log.info("Tor is correctly configured to work with sbws.")
return bad_options return bad_options
def rs_relay_type(rs):
# In torflow, the equivalent to the bw_lines is initialized to "", so when
# the relay is not in the previous consensus and it is not known which
# flags it has, it would return "Medium", as it's the fail case in
# Node.node_class().
# It is not known if it is a bug, or a desired side effect that they relays
# not in the consensus will end up in the Middle class
if not rs:
return M
if Flag.EXIT in rs.flags and Flag.GUARD in rs.flags:
return GE
if Flag.GUARD in rs.flags:
return G
if Flag.EXIT in rs.flags:
return E
return M
"""Unit tests for scaling.py.""" """Unit tests for scaling.py."""
import os
from statistics import mean
from sbws.lib import scaling from sbws.lib import scaling
from sbws.lib.resultdump import load_result_file, ResultSuccess
def test_bw_filt(): def test_bw_filt():
...@@ -10,4 +13,57 @@ def test_bw_filt(): ...@@ -10,4 +13,57 @@ def test_bw_filt():
] ]
fb = scaling.bw_filt(bw_measurements) fb = scaling.bw_filt(bw_measurements)
# This is greater than the mean, that is 61422.73714139576 # This is greater than the mean, that is 61422.73714139576
assert fb == 83505.81986994506 assert fb == 83506
# When there are no measurements what can not be the case for successful
# results.
bw_measurements = []
assert 0 == scaling.bw_filt(bw_measurements)
bw_measurements = [1, 0]
# Because rounded to int
assert 0 == round(mean(bw_measurements))
# So the filtered bw will be also 0
assert 0 == scaling.bw_filt(bw_measurements)
bw_measurements = [1, 2, 3]
# Because rounded to int
assert 2 == round(mean(bw_measurements))
assert 2 == scaling.bw_filt(bw_measurements)
bw_measurements = [10, 0]
assert 5 == round(mean(bw_measurements))
# Because the value 10 is bigger than the mean
assert 10 == scaling.bw_filt(bw_measurements)
bw_measurements = [0, 10, 20]
assert 10 == round(mean(bw_measurements))
# Because 10 and 20 are bigger or equal than the mean
assert 15 == scaling.bw_filt(bw_measurements)
def test_bw_filt_from_results(root_data_path):
results_file = os.path.join(
root_data_path, ".sbws", "datadir", "2019-03-25.txt"
)
results = load_result_file(results_file)
bw_filts = {}
for fp, values in results.items():
success_results = [r for r in values if isinstance(r, ResultSuccess)]
if success_results:
bw_measurements = scaling.bw_measurements_from_results(
success_results
)
mu = round(mean(bw_measurements))
muf = scaling.bw_filt(bw_measurements)
bw_filts[fp] = (mu, muf)
for fp, values in bw_filts.items():
assert bw_filts[fp][0] <= bw_filts[fp][1]
assert 5526756 == bw_filts['117A456C911114076BEB4E757AC48B16CC0CCC5F'][0]
assert 5643086 == bw_filts['117A456C911114076BEB4E757AC48B16CC0CCC5F'][1]
assert 5664965 == bw_filts['693F73187624BE760AAD2A12C5ED89DB1DE044F5'][0]
assert 5774274 == bw_filts['693F73187624BE760AAD2A12C5ED89DB1DE044F5'][1]
assert 5508279 == bw_filts['270A861ABED22EC2B625198BCCD7B2B9DBFFC93C'][0]
assert 5583737 == bw_filts['270A861ABED22EC2B625198BCCD7B2B9DBFFC93C'][1]
assert 5379911 == bw_filts['E894C65997F8EC96558B554176EEEA39C6A43EF6'][0]
assert 5485088 == bw_filts['E894C65997F8EC96558B554176EEEA39C6A43EF6'][1]
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment