Commit 0661b8a2 authored by Matt Traudt's avatar Matt Traudt
Browse files

Switch to storing results in a dict instead of list

parent afee2647
......@@ -2,7 +2,6 @@ from sbws.globals import (fail_hard, is_initted)
from sbws.lib.v3bwfile import V3BwHeader
from sbws.lib.resultdump import ResultSuccess
from sbws.lib.resultdump import load_recent_results_in_datadir
from sbws.lib.resultdump import group_results_by_relay
from argparse import ArgumentDefaultsHelpFormatter
from statistics import median
import os
......@@ -117,8 +116,7 @@ def main(args, conf):
log.warning('No recent results, so not generating anything. (Have you '
'ran sbws scanner recently?)')
return
data = group_results_by_relay(results)
data_lines = [result_data_to_v3bw_line(data, fp) for fp in data]
data_lines = [result_data_to_v3bw_line(results, fp) for fp in results]
data_lines = sorted(data_lines, key=lambda d: d.bw, reverse=True)
data_lines = scale_lines(args, data_lines)
header = V3BwHeader()
......
......@@ -3,7 +3,6 @@ from sbws.lib.resultdump import Result
from sbws.lib.resultdump import ResultError
from sbws.lib.resultdump import ResultSuccess
from sbws.lib.resultdump import load_recent_results_in_datadir
from sbws.lib.resultdump import group_results_by_relay
from argparse import ArgumentDefaultsHelpFormatter
import os
from datetime import datetime
......@@ -133,5 +132,4 @@ def main(args, conf):
if len(results) < 1:
log.warning('No fresh results')
return
data = group_results_by_relay(results)
print_stats(args, data)
print_stats(args, results)
......@@ -18,29 +18,26 @@ from sbws.util.filelock import DirectoryLock
log = logging.getLogger(__name__)
def group_results_by_relay(results, starting_dict=None):
''' Given a list of Results, sort them by the relay fingerprint that they
measured and return the resulting dict. Optionally start with the given
dict instead of an empty one. '''
data = starting_dict if starting_dict else {}
assert isinstance(data, dict)
assert isinstance(results, list)
for result in results:
assert isinstance(result, Result)
fp = result.fingerprint
if fp not in data:
data[fp] = []
data[fp].append(result)
return data
def merge_result_dicts(d1, d2):
'''
Given two dictionaries that contain Result data, merge them. Result
dictionaries have keys of relay fingerprints and values of lists of results
for those relays.
'''
for key in d2:
if key not in d1:
d1[key] = []
d1[key].extend(d2[key])
return d1
def load_result_file(fname, success_only=False):
''' Reads in all lines from the given file, and parses them into Result
structures (or subclasses of Result). Optionally only keeps ResultSuccess.
Returns all kept Results as a list. This function does not care about the
age of the results '''
Returns all kept Results as a result dictionary. This function does not
care about the age of the results '''
assert os.path.isfile(fname)
d = []
d = {}
num_ignored = 0
with DirectoryLock(os.path.dirname(fname)):
with open(fname, 'rt') as fd:
......@@ -51,26 +48,35 @@ def load_result_file(fname, success_only=False):
continue
if success_only and isinstance(r, ResultError):
continue
d.append(r)
log.debug('Read %d lines from %s', len(d), fname)
fp = r.fingerprint
if fp not in d:
d[fp] = []
d[fp].append(r)
num_lines = sum([len(d[fp]) for fp in d])
log.debug('Read %d lines from %s', num_lines, fname)
if num_ignored > 0:
log.warning('Had to ignore %d results due to not knowing how to '
'parse them.', num_ignored)
return d
def trim_results(fresh_days, results):
''' Given a result list, remove all Results that are no longer valid and
return the new list '''
def trim_results(fresh_days, result_dict):
''' Given a result dictionary, remove all Results that are no longer valid
and return the new dictionary '''
assert isinstance(fresh_days, int)
assert isinstance(results, list)
assert isinstance(result_dict, dict)
data_period = fresh_days * 24*60*60
oldest_allowed = time.time() - data_period
out_results = []
for result in results:
if result.time >= oldest_allowed:
out_results.append(result)
log.debug('Keeping %d/%d results', len(out_results), len(results))
out_results = {}
for fp in result_dict:
for result in result_dict[fp]:
if result.time >= oldest_allowed:
if fp not in out_results:
out_results[fp] = []
out_results[fp].append(result)
num_in = sum([len(result_dict[fp]) for fp in result_dict])
num_out = sum([len(out_results[fp]) for fp in out_results])
log.debug('Keeping %d/%d results', num_out, num_in)
return out_results
......@@ -80,7 +86,7 @@ def load_recent_results_in_datadir(fresh_days, datadir, success_only=False):
Results as a list '''
assert isinstance(fresh_days, int)
assert os.path.isdir(datadir)
results = []
results = {}
today = datetime.utcfromtimestamp(time.time())
data_period = fresh_days + 2
oldest_day = today - timedelta(days=data_period)
......@@ -94,11 +100,13 @@ def load_recent_results_in_datadir(fresh_days, datadir, success_only=False):
os.path.join(datadir, '*', '{}*.txt'.format(d))]
for pattern in patterns:
for fname in glob(pattern):
results.extend(load_result_file(
fname, success_only=success_only))
new_results = load_result_file(
fname, success_only=success_only)
results = merge_result_dicts(results, new_results)
working_day += timedelta(days=1)
results = trim_results(fresh_days, results)
if len(results) == 0:
num_res = sum([len(results[fp]) for fp in results])
if num_res == 0:
log.warning('Results files that are valid not found. '
'Probably sbws scanner was not run first or '
'it ran more than %d days ago or '
......@@ -424,7 +432,10 @@ class ResultDump:
''' Call from ResultDump thread '''
assert isinstance(result, Result)
with self.data_lock:
self.data.append(result)
fp = result.fingerprint
if fp not in self.data:
self.data[fp] = []
self.data[fp].append(result)
self.data = trim_results(self.fresh_days, self.data)
def handle_result(self, result):
......@@ -464,5 +475,8 @@ class ResultDump:
def results_for_relay(self, relay):
assert isinstance(relay, RouterStatusEntryV3)
fp = relay.fingerprint
with self.data_lock:
return [r for r in self.data if r.fingerprint == relay.fingerprint]
if fp not in self.data:
return []
return self.data[fp]
......@@ -94,8 +94,13 @@ def test_generate_single_success_noscale(dotsbws_success_result, caplog,
conf = get_config(args)
sbws.core.generate.main(args, conf)
dd = conf['paths']['datadir']
# Here results is a dict
results = load_recent_results_in_datadir(1, dd, success_only=False)
assert len(results) == 1, 'There should be one result in the datadir'
assert isinstance(results, dict)
res_len = sum([len(results[fp]) for fp in results])
assert res_len == 1, 'There should be one result in the datadir'
# And here we change it to a list
results = [r for fp in results for r in results[fp]]
result = results[0]
assert isinstance(result, ResultSuccess), 'The one existing result '\
'should be a success'
......@@ -120,8 +125,13 @@ def test_generate_single_success_scale(dotsbws_success_result, parser,
conf = get_config(args)
sbws.core.generate.main(args, conf)
dd = conf['paths']['datadir']
# Here results is a dict
results = load_recent_results_in_datadir(1, dd, success_only=False)
assert len(results) == 1, 'There should be one result in the datadir'
assert isinstance(results, dict)
res_len = sum([len(results[fp]) for fp in results])
assert res_len == 1, 'There should be one result in the datadir'
# And here we change it to a list
results = [r for fp in results for r in results[fp]]
result = results[0]
assert isinstance(result, ResultSuccess), 'The one existing result '\
'should be a success'
......@@ -145,8 +155,13 @@ def test_generate_single_relay_success_noscale(
conf = get_config(args)
sbws.core.generate.main(args, conf)
dd = conf['paths']['datadir']
# Here results is a dict
results = load_recent_results_in_datadir(1, dd, success_only=False)
assert len(results) == 2, 'There should be two results in the datadir'
assert isinstance(results, dict)
res_len = sum([len(results[fp]) for fp in results])
assert res_len == 2, 'There should be two results in the datadir'
# And here we change it to a list
results = [r for fp in results for r in results[fp]]
for result in results:
assert isinstance(result, ResultSuccess), 'All existing results '\
'should be a success'
......@@ -172,8 +187,13 @@ def test_generate_single_relay_success_scale(
conf = get_config(args)
sbws.core.generate.main(args, conf)
dd = conf['paths']['datadir']
# Here results is a dict
results = load_recent_results_in_datadir(1, dd, success_only=False)
assert len(results) == 2, 'There should be two results in the datadir'
assert isinstance(results, dict)
res_len = sum([len(results[fp]) for fp in results])
assert res_len == 2, 'There should be two results in the datadir'
# And here we change it to a list
results = [r for fp in results for r in results[fp]]
for result in results:
assert isinstance(result, ResultSuccess), 'All existing results '\
'should be a success'
......@@ -197,8 +217,13 @@ def test_generate_two_relays_success_noscale(
conf = get_config(args)
sbws.core.generate.main(args, conf)
dd = conf['paths']['datadir']
# Here results is a dict
results = load_recent_results_in_datadir(1, dd, success_only=False)
assert len(results) == 4, 'There should be 4 results in the datadir'
assert isinstance(results, dict)
res_len = sum([len(results[fp]) for fp in results])
assert res_len == 4, 'There should be 4 results in the datadir'
# And here we change it to a list
results = [r for fp in results for r in results[fp]]
for result in results:
assert isinstance(result, ResultSuccess), 'All existing results '\
'should be a success'
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment