Commit 536a9568 authored by juga's avatar juga
Browse files

chg: generate, cleanup: Use 28 days of measurements

When generating the Bandwidth File as Torflow, use 28 days of past raw
measurements instead of 5, by default.
Also keep the raw measurements for that long before compressing or
deleting them.
And stop checking whether the compression and delete
periods are valid, without checking defaults first and based on
arbitrary values.

Closes: #40017
parent 38649f0f
Pipeline #2299 passed with stage
in 22 minutes and 55 seconds
......@@ -143,9 +143,9 @@ relayprioritizer
cleanup
data_files_compress_after_days = INT
After this many days, compress data files. (Default: 10)
After this many days, compress data files. (Default: 29)
data_files_delete_after_days = INT
After this many days, delete data files. (Default: 90)
After this many days, delete data files. (Default: 56)
v3bw_files_compress_after_days = INT
After this many days, compress v3bw files. (Default: 1)
v3bw_files_delete_after_days = INT
......
......@@ -71,9 +71,14 @@ extra_lines =
[cleanup]
# After this many days, compress data files
data_files_compress_after_days = 10
# After this many days, delete data files
data_files_delete_after_days = 90
# #40017: To generate files as Torflow the result files must be kept for
# GENERATE_PERIOD seconds.
# The number of days after they are compressed or deleted could be added
# as defaults (currently globals.py), and just as a factor GENERATE_PERIOD.
data_files_compress_after_days = 29
# After this many days, delete data files.
# 56 == 28 * 2 + 1.
data_files_delete_after_days = 57
# After this many days, compress v3bw files (1d)
v3bw_files_compress_after_days = 1
# After this many days, delete v3bw files (7d)
......
......@@ -99,28 +99,6 @@ def _check_validity_periods_v3bw(compress_after_days, delete_after_days):
"after a bigger number of days.")
def _check_validity_periods_results(
data_period, compress_after_days, delete_after_days):
if compress_after_days - 2 < data_period:
fail_hard(
'For safetly, cleanup/data_files_compress_after_days (%d) must be '
'at least 2 days larger than general/data_period (%d)',
compress_after_days, data_period)
if delete_after_days < compress_after_days:
fail_hard(
'cleanup/data_files_delete_after_days (%d) must be the same or '
'larger than cleanup/data_files_compress_after_days (%d)',
delete_after_days, compress_after_days)
if compress_after_days / 2 < data_period:
log.warning(
'cleanup/data_files_compress_after_days (%d) is less than twice '
'general/data_period (%d). For ease of parsing older results '
'if necessary, it is recommended to make '
'data_files_compress_after_days at least twice the data_period.',
compress_after_days, data_period)
return True
def _clean_v3bw_files(args, conf):
v3bw_dname = conf.getpath('paths', 'v3bw_dname')
if not os.path.isdir(v3bw_dname):
......@@ -147,13 +125,10 @@ def _clean_result_files(args, conf):
datadir = conf.getpath('paths', 'datadir')
if not os.path.isdir(datadir):
fail_hard('%s does not exist', datadir)
data_period = conf.getint('general', 'data_period')
compress_after_days = conf.getint(
'cleanup', 'data_files_compress_after_days')
delete_after_days = conf.getint(
'cleanup', 'data_files_delete_after_days')
_check_validity_periods_results(
data_period, compress_after_days, delete_after_days)
# first delete so that the files to be deleted are not compressed first
files_to_delete = _get_files_mtime_older_than(
......
......@@ -2,7 +2,7 @@ from math import ceil
from sbws.globals import (fail_hard, SBWS_SCALE_CONSTANT, TORFLOW_SCALING,
SBWS_SCALING, TORFLOW_BW_MARGIN, PROP276_ROUND_DIG,
DAY_SECS, NUM_MIN_RESULTS)
DAY_SECS, NUM_MIN_RESULTS, GENERATE_PERIOD)
from sbws.lib.v3bwfile import V3BWFile
from sbws.lib.resultdump import load_recent_results_in_datadir
from argparse import ArgumentDefaultsHelpFormatter
......@@ -60,8 +60,11 @@ def gen_parser(sub):
help="Number of most significant digits to round bw.")
p.add_argument('-p', '--secs-recent', default=None, type=int,
help="How many secs in the past are results being "
"still considered. Note this value will supersede "
"data_period in the configuration.")
"still considered. Default is {}. If not scaling as "
"Torflow the default is data_period in the "
"configuration.".format(
ceil(GENERATE_PERIOD / 24 / 60 / 60))
)
p.add_argument('-a', '--secs-away', default=DAY_SECS, type=int,
help="How many secs results have to be away from each "
"other.")
......@@ -90,6 +93,8 @@ def main(args, conf):
scaling_method = TORFLOW_SCALING
if args.secs_recent:
fresh_days = ceil(args.secs_recent / 24 / 60 / 60)
elif scaling_method == TORFLOW_SCALING:
fresh_days = ceil(GENERATE_PERIOD / 24 / 60 / 60)
else:
fresh_days = conf.getint('general', 'data_period')
reset_bw_ipv4_changes = conf.getboolean('general', 'reset_bw_ipv4_changes')
......
......@@ -101,16 +101,22 @@ MAX_BW_DIFF_PERC = 50
# Tor already accept lines of any size, but leaving the limit anyway.
BW_LINE_SIZE = 1022
# RelayList, ResultDump, v3bwfile
# RelayList, ResultDump
# For how many seconds in the past the relays and measurements data is keep/
# considered valid.
# This is currently set by default in config.default.ini as ``date_period``,
# and used in ResultDump and v3bwfile.
# This is currently set by default in config.default.ini as ``data_period``,
# and used in ResultDump.
# In a future refactor, constants in config.default.ini should be moved here,
# or calculated in settings, so that there's no need to pass the configuration
# to all the functions.
MEASUREMENTS_PERIOD = 5 * 24 * 60 * 60
# #40017: To make sbws behave similar to Torflow, the number of raw past
# measurements used when generating the Bandwidth File has to be 28, not 5.
# Note that this is different from the number of raw past measurements used
# when measuring, which are used for the monitoring values and storing json.
GENERATE_PERIOD = 28 * 24 * 60 * 60
# Metadata to send in every requests, so that data servers can know which
# scanners are using them.
# In Requests these keys are case insensitive.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment