Loading sbws/config.default.ini +15 −0 Original line number Diff line number Diff line Loading @@ -56,3 +56,18 @@ extra_lines = stale_days = 10 # After this many days, delete data files rotten_days = 90 [relayprioritizer] # The target fraction of best priority relays we would like to return. # 0.05 is 5%. In a 7000 relay network, 5% is 350 relays. # # In a network of ~6500 relays and with a ResultDump containing 1 result per # relay, the best_priority() function takes ~11 seconds to complete on my # home desktop. Using this parameter allows us to balance between calling # best_priority() more often (but wasting more CPU), and calling it less # often (but taking longer to get back to relays with non-successful results). # # Alternatively, we could rewrite best_priority() to not suck so much. fraction_relays = 0.05 # The minimum number of best priority relays we are willing to return min_relays = 50 sbws/lib/relayprioritizer.py +5 −15 Original line number Diff line number Diff line Loading @@ -10,20 +10,6 @@ import logging log = logging.getLogger(__name__) # We want to at least return the MIN_TO_RETURN best priority relays ... MIN_TO_RETURN = 50 # But ideally, we return PERCENT_TO_RETURN of the relays because it will be # larger and we won't have to recalculate priority so much. In a network of # ~6500 relays and with a ResultDump containing 1 result per relay, the # best_priority() function takes ~11 seconds to complete on my home desktop. # Using this parameter allows us to balance between calling best_priority() # more often (but wasting more CPU), and calling it less often (but taking # longer to get back to relays with non-successful results). # # Alternatively, we could rewrite best_priority() to not suck so much. PERCENT_TO_RETURN = 0.05 # 5% class RelayPrioritizer: def __init__(self, args, conf, relay_list, result_dump): assert isinstance(relay_list, RelayList) Loading @@ -33,6 +19,9 @@ class RelayPrioritizer: self.result_dump = result_dump self.measure_authorities = conf.getboolean( 'scanner', 'measure_authorities') self.min_to_return = conf.getint('relayprioritizer', 'min_relays') self.fraction_to_return = conf.getfloat( 'relayprioritizer', 'fraction_relays') def best_priority(self): ''' Return a generator containing the best priority relays. Loading Loading @@ -91,7 +80,8 @@ class RelayPrioritizer: # Sort the relays by their priority, with the smallest (best) priority # relays at the front relays = sorted(relays, key=lambda r: r.priority) cutoff = max(int(len(relays) * PERCENT_TO_RETURN), MIN_TO_RETURN) cutoff = max(int(len(relays) * self.fraction_to_return), self.min_to_return) fn_tstop = Decimal(time.time()) fn_tdelta = (fn_tstop - fn_tstart) * 1000 log.info('Spent %f msecs calculating relay best priority', fn_tdelta) Loading sbws/util/config.py +18 −0 Original line number Diff line number Diff line Loading @@ -102,6 +102,7 @@ def validate_config(conf): errors.extend(_validate_tor(conf)) errors.extend(_validate_paths(conf)) errors.extend(_validate_destinations(conf)) errors.extend(_validate_relayprioritizer(conf)) return len(errors) < 1, errors Loading Loading @@ -191,6 +192,23 @@ def _validate_tor(conf): return errors def _validate_relayprioritizer(conf): errors = [] sec = 'relayprioritizer' err_tmpl = Template('$sec/$key ($val): $e') ints = { 'min_relays': {'minimum': 1, 'maximum': None}, } floats = { 'fraction_relays': {'minimum': 0.0, 'maximum': 1.0}, } all_valid_keys = list(ints.keys()) + list(floats.keys()) errors.extend(_validate_section_keys(conf, sec, all_valid_keys, err_tmpl)) errors.extend(_validate_section_ints(conf, sec, ints, err_tmpl)) errors.extend(_validate_section_floats(conf, sec, floats, err_tmpl)) return errors def _validate_destinations(conf): errors = [] sec = 'destinations' Loading Loading
sbws/config.default.ini +15 −0 Original line number Diff line number Diff line Loading @@ -56,3 +56,18 @@ extra_lines = stale_days = 10 # After this many days, delete data files rotten_days = 90 [relayprioritizer] # The target fraction of best priority relays we would like to return. # 0.05 is 5%. In a 7000 relay network, 5% is 350 relays. # # In a network of ~6500 relays and with a ResultDump containing 1 result per # relay, the best_priority() function takes ~11 seconds to complete on my # home desktop. Using this parameter allows us to balance between calling # best_priority() more often (but wasting more CPU), and calling it less # often (but taking longer to get back to relays with non-successful results). # # Alternatively, we could rewrite best_priority() to not suck so much. fraction_relays = 0.05 # The minimum number of best priority relays we are willing to return min_relays = 50
sbws/lib/relayprioritizer.py +5 −15 Original line number Diff line number Diff line Loading @@ -10,20 +10,6 @@ import logging log = logging.getLogger(__name__) # We want to at least return the MIN_TO_RETURN best priority relays ... MIN_TO_RETURN = 50 # But ideally, we return PERCENT_TO_RETURN of the relays because it will be # larger and we won't have to recalculate priority so much. In a network of # ~6500 relays and with a ResultDump containing 1 result per relay, the # best_priority() function takes ~11 seconds to complete on my home desktop. # Using this parameter allows us to balance between calling best_priority() # more often (but wasting more CPU), and calling it less often (but taking # longer to get back to relays with non-successful results). # # Alternatively, we could rewrite best_priority() to not suck so much. PERCENT_TO_RETURN = 0.05 # 5% class RelayPrioritizer: def __init__(self, args, conf, relay_list, result_dump): assert isinstance(relay_list, RelayList) Loading @@ -33,6 +19,9 @@ class RelayPrioritizer: self.result_dump = result_dump self.measure_authorities = conf.getboolean( 'scanner', 'measure_authorities') self.min_to_return = conf.getint('relayprioritizer', 'min_relays') self.fraction_to_return = conf.getfloat( 'relayprioritizer', 'fraction_relays') def best_priority(self): ''' Return a generator containing the best priority relays. Loading Loading @@ -91,7 +80,8 @@ class RelayPrioritizer: # Sort the relays by their priority, with the smallest (best) priority # relays at the front relays = sorted(relays, key=lambda r: r.priority) cutoff = max(int(len(relays) * PERCENT_TO_RETURN), MIN_TO_RETURN) cutoff = max(int(len(relays) * self.fraction_to_return), self.min_to_return) fn_tstop = Decimal(time.time()) fn_tdelta = (fn_tstop - fn_tstart) * 1000 log.info('Spent %f msecs calculating relay best priority', fn_tdelta) Loading
sbws/util/config.py +18 −0 Original line number Diff line number Diff line Loading @@ -102,6 +102,7 @@ def validate_config(conf): errors.extend(_validate_tor(conf)) errors.extend(_validate_paths(conf)) errors.extend(_validate_destinations(conf)) errors.extend(_validate_relayprioritizer(conf)) return len(errors) < 1, errors Loading Loading @@ -191,6 +192,23 @@ def _validate_tor(conf): return errors def _validate_relayprioritizer(conf): errors = [] sec = 'relayprioritizer' err_tmpl = Template('$sec/$key ($val): $e') ints = { 'min_relays': {'minimum': 1, 'maximum': None}, } floats = { 'fraction_relays': {'minimum': 0.0, 'maximum': 1.0}, } all_valid_keys = list(ints.keys()) + list(floats.keys()) errors.extend(_validate_section_keys(conf, sec, all_valid_keys, err_tmpl)) errors.extend(_validate_section_ints(conf, sec, ints, err_tmpl)) errors.extend(_validate_section_floats(conf, sec, floats, err_tmpl)) return errors def _validate_destinations(conf): errors = [] sec = 'destinations' Loading