Unverified Commit 7430b4ef authored by Georg Koppen's avatar Georg Koppen
Browse files

Merge remote-tracking branch 'gitlab/merge-requests/56' into maint-1.1

parents 96aadc69 e416547c
Pipeline #2310 passed with stage
in 43 minutes and 21 seconds
......@@ -137,7 +137,8 @@ Each relay bandwidth measurements are selected in the following way:
If they are not, the relay MUST NOT be included in the Bandwith File.
#. The measurements than are are older than an arbitrary number of senconds
in the past MUST be discarded.
Currently this number is the same as ``data_period`` (5 days).
Currently this number is the same as ``data_period`` (5 days) when not
scaling as Torflow and 28 days when scaling as Torflow.
If the number of relays to include in the Bandwidth File are less than
a percententage (currently 60%) than the number of relays in the consensus,
......
......@@ -143,9 +143,9 @@ relayprioritizer
cleanup
data_files_compress_after_days = INT
After this many days, compress data files. (Default: 10)
After this many days, compress data files. (Default: 29)
data_files_delete_after_days = INT
After this many days, delete data files. (Default: 90)
After this many days, delete data files. (Default: 57)
v3bw_files_compress_after_days = INT
After this many days, compress v3bw files. (Default: 1)
v3bw_files_delete_after_days = INT
......
......@@ -71,9 +71,14 @@ extra_lines =
[cleanup]
# After this many days, compress data files
data_files_compress_after_days = 10
# After this many days, delete data files
data_files_delete_after_days = 90
# #40017: To generate files as Torflow the result files must be kept for
# GENERATE_PERIOD seconds.
# The number of days after they are compressed or deleted could be added
# as defaults (currently globals.py), and just as a factor of GENERATE_PERIOD.
data_files_compress_after_days = 29
# After this many days, delete data files.
# 57 == 28 * 2 + 1.
data_files_delete_after_days = 57
# After this many days, compress v3bw files (1d)
v3bw_files_compress_after_days = 1
# After this many days, delete v3bw files (7d)
......
......@@ -99,28 +99,6 @@ def _check_validity_periods_v3bw(compress_after_days, delete_after_days):
"after a bigger number of days.")
def _check_validity_periods_results(
data_period, compress_after_days, delete_after_days):
if compress_after_days - 2 < data_period:
fail_hard(
'For safetly, cleanup/data_files_compress_after_days (%d) must be '
'at least 2 days larger than general/data_period (%d)',
compress_after_days, data_period)
if delete_after_days < compress_after_days:
fail_hard(
'cleanup/data_files_delete_after_days (%d) must be the same or '
'larger than cleanup/data_files_compress_after_days (%d)',
delete_after_days, compress_after_days)
if compress_after_days / 2 < data_period:
log.warning(
'cleanup/data_files_compress_after_days (%d) is less than twice '
'general/data_period (%d). For ease of parsing older results '
'if necessary, it is recommended to make '
'data_files_compress_after_days at least twice the data_period.',
compress_after_days, data_period)
return True
def _clean_v3bw_files(args, conf):
v3bw_dname = conf.getpath('paths', 'v3bw_dname')
if not os.path.isdir(v3bw_dname):
......@@ -147,13 +125,10 @@ def _clean_result_files(args, conf):
datadir = conf.getpath('paths', 'datadir')
if not os.path.isdir(datadir):
fail_hard('%s does not exist', datadir)
data_period = conf.getint('general', 'data_period')
compress_after_days = conf.getint(
'cleanup', 'data_files_compress_after_days')
delete_after_days = conf.getint(
'cleanup', 'data_files_delete_after_days')
_check_validity_periods_results(
data_period, compress_after_days, delete_after_days)
# first delete so that the files to be deleted are not compressed first
files_to_delete = _get_files_mtime_older_than(
......
......@@ -2,7 +2,7 @@ from math import ceil
from sbws.globals import (fail_hard, SBWS_SCALE_CONSTANT, TORFLOW_SCALING,
SBWS_SCALING, TORFLOW_BW_MARGIN, PROP276_ROUND_DIG,
DAY_SECS, NUM_MIN_RESULTS)
DAY_SECS, NUM_MIN_RESULTS, GENERATE_PERIOD)
from sbws.lib.v3bwfile import V3BWFile
from sbws.lib.resultdump import load_recent_results_in_datadir
from argparse import ArgumentDefaultsHelpFormatter
......@@ -60,8 +60,9 @@ def gen_parser(sub):
help="Number of most significant digits to round bw.")
p.add_argument('-p', '--secs-recent', default=None, type=int,
help="How many secs in the past are results being "
"still considered. Note this value will supersede "
"data_period in the configuration.")
"still considered. Default is {} secs. If not scaling "
"as Torflow the default is data_period in the "
"configuration.".format(GENERATE_PERIOD))
p.add_argument('-a', '--secs-away', default=DAY_SECS, type=int,
help="How many secs results have to be away from each "
"other.")
......@@ -90,6 +91,8 @@ def main(args, conf):
scaling_method = TORFLOW_SCALING
if args.secs_recent:
fresh_days = ceil(args.secs_recent / 24 / 60 / 60)
elif scaling_method == TORFLOW_SCALING:
fresh_days = ceil(GENERATE_PERIOD / 24 / 60 / 60)
else:
fresh_days = conf.getint('general', 'data_period')
reset_bw_ipv4_changes = conf.getboolean('general', 'reset_bw_ipv4_changes')
......
......@@ -101,16 +101,22 @@ MAX_BW_DIFF_PERC = 50
# Tor already accept lines of any size, but leaving the limit anyway.
BW_LINE_SIZE = 1022
# RelayList, ResultDump, v3bwfile
# RelayList, ResultDump
# For how many seconds in the past the relays and measurements data is keep/
# considered valid.
# This is currently set by default in config.default.ini as ``date_period``,
# and used in ResultDump and v3bwfile.
# This is currently set by default in config.default.ini as ``data_period``,
# and used in ResultDump.
# In a future refactor, constants in config.default.ini should be moved here,
# or calculated in settings, so that there's no need to pass the configuration
# to all the functions.
MEASUREMENTS_PERIOD = 5 * 24 * 60 * 60
# #40017: To make sbws behave similar to Torflow, the number of raw past
# measurements used when generating the Bandwidth File has to be 28, not 5.
# Note that this is different from the number of raw past measurements used
# when measuring, which are used for the monitoring values and storing json.
GENERATE_PERIOD = 28 * 24 * 60 * 60
# Metadata to send in every requests, so that data servers can know which
# scanners are using them.
# In Requests these keys are case insensitive.
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment