Make RelayPrioritizer variables configurable (3a7628a4) · Commits · The Tor Project / Network Health / sbws

sbws/config.default.ini

+15 −0

Original line number	Diff line number	Diff line
		@@ -56,3 +56,18 @@ extra_lines =
		stale_days = 10
		# After this many days, delete data files
		rotten_days = 90

		[relayprioritizer]
		# The target fraction of best priority relays we would like to return.
		# 0.05 is 5%. In a 7000 relay network, 5% is 350 relays.
		#
		# In a network of ~6500 relays and with a ResultDump containing 1 result per
		# relay, the best_priority() function takes ~11 seconds to complete on my
		# home desktop. Using this parameter allows us to balance between calling
		# best_priority() more often (but wasting more CPU), and calling it less
		# often (but taking longer to get back to relays with non-successful results).
		#
		# Alternatively, we could rewrite best_priority() to not suck so much.
		fraction_relays = 0.05
		# The minimum number of best priority relays we are willing to return
		min_relays = 50

sbws/lib/relayprioritizer.py

+5 −15

Original line number	Diff line number	Diff line
		@@ -10,20 +10,6 @@ import logging
		log = logging.getLogger(__name__)


		# We want to at least return the MIN_TO_RETURN best priority relays ...
		MIN_TO_RETURN = 50
		# But ideally, we return PERCENT_TO_RETURN of the relays because it will be
		# larger and we won't have to recalculate priority so much. In a network of
		# ~6500 relays and with a ResultDump containing 1 result per relay, the
		# best_priority() function takes ~11 seconds to complete on my home desktop.
		# Using this parameter allows us to balance between calling best_priority()
		# more often (but wasting more CPU), and calling it less often (but taking
		# longer to get back to relays with non-successful results).
		#
		# Alternatively, we could rewrite best_priority() to not suck so much.
		PERCENT_TO_RETURN = 0.05 # 5%


		class RelayPrioritizer:
		def __init__(self, args, conf, relay_list, result_dump):
		assert isinstance(relay_list, RelayList)
		@@ -33,6 +19,9 @@ class RelayPrioritizer:
		self.result_dump = result_dump
		self.measure_authorities = conf.getboolean(
		'scanner', 'measure_authorities')
		self.min_to_return = conf.getint('relayprioritizer', 'min_relays')
		self.fraction_to_return = conf.getfloat(
		'relayprioritizer', 'fraction_relays')

		def best_priority(self):
		''' Return a generator containing the best priority relays.
		@@ -91,7 +80,8 @@ class RelayPrioritizer:
		# Sort the relays by their priority, with the smallest (best) priority
		# relays at the front
		relays = sorted(relays, key=lambda r: r.priority)
		cutoff = max(int(len(relays) * PERCENT_TO_RETURN), MIN_TO_RETURN)
		cutoff = max(int(len(relays) * self.fraction_to_return),
		self.min_to_return)
		fn_tstop = Decimal(time.time())
		fn_tdelta = (fn_tstop - fn_tstart) * 1000
		log.info('Spent %f msecs calculating relay best priority', fn_tdelta)

sbws/util/config.py

+18 −0

Original line number	Diff line number	Diff line
		@@ -102,6 +102,7 @@ def validate_config(conf):
		errors.extend(_validate_tor(conf))
		errors.extend(_validate_paths(conf))
		errors.extend(_validate_destinations(conf))
		errors.extend(_validate_relayprioritizer(conf))
		return len(errors) < 1, errors


		@@ -191,6 +192,23 @@ def _validate_tor(conf):
		return errors


		def _validate_relayprioritizer(conf):
		errors = []
		sec = 'relayprioritizer'
		err_tmpl = Template('$sec/$key ($val): $e')
		ints = {
		'min_relays': {'minimum': 1, 'maximum': None},
		}
		floats = {
		'fraction_relays': {'minimum': 0.0, 'maximum': 1.0},
		}
		all_valid_keys = list(ints.keys()) + list(floats.keys())
		errors.extend(_validate_section_keys(conf, sec, all_valid_keys, err_tmpl))
		errors.extend(_validate_section_ints(conf, sec, ints, err_tmpl))
		errors.extend(_validate_section_floats(conf, sec, floats, err_tmpl))
		return errors


		def _validate_destinations(conf):
		errors = []
		sec = 'destinations'