Commit e416547c authored by juga's avatar juga
Browse files

chg: generate, cleanup: Use 28 days of measurements

When generating the Bandwidth File as Torflow, use 28 days of past raw
measurements instead of 5, by default.
Also keep the raw measurements for that long before compressing or
deleting them.
And stop checking whether the compression and delete
periods are valid, without checking defaults first and based on
arbitrary values.

Closes: #40017
parent 38649f0f
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -137,7 +137,8 @@ Each relay bandwidth measurements are selected in the following way:
   If they are not, the relay MUST NOT be included in the Bandwith File.
#. The measurements than are are older than an arbitrary number of senconds
   in the past MUST be discarded.
   Currently this number is the same as ``data_period`` (5 days).
   Currently this number is the same as ``data_period`` (5 days) when not
   scaling as Torflow and 28 days when scaling as Torflow.

If the number of relays to include in the Bandwidth File are less than
a percententage (currently 60%) than the number of relays in the consensus,
+2 −2
Original line number Diff line number Diff line
@@ -143,9 +143,9 @@ relayprioritizer

cleanup
  data_files_compress_after_days = INT
    After this many days, compress data files. (Default: 10)
    After this many days, compress data files. (Default: 29)
  data_files_delete_after_days = INT
    After this many days, delete data files. (Default: 90)
    After this many days, delete data files. (Default: 57)
  v3bw_files_compress_after_days = INT
    After this many days, compress v3bw files. (Default: 1)
  v3bw_files_delete_after_days = INT
+8 −3
Original line number Diff line number Diff line
@@ -71,9 +71,14 @@ extra_lines =

[cleanup]
# After this many days, compress data files
data_files_compress_after_days = 10
# After this many days, delete data files
data_files_delete_after_days = 90
# #40017: To generate files as Torflow the result files must be kept for
# GENERATE_PERIOD seconds.
# The number of days after they are compressed or deleted could be added
# as defaults (currently globals.py), and just as a factor of GENERATE_PERIOD.
data_files_compress_after_days = 29
# After this many days, delete data files.
# 57 == 28 * 2 + 1.
data_files_delete_after_days = 57
# After this many days, compress v3bw files (1d)
v3bw_files_compress_after_days = 1
# After this many days, delete v3bw files (7d)
+0 −25
Original line number Diff line number Diff line
@@ -99,28 +99,6 @@ def _check_validity_periods_v3bw(compress_after_days, delete_after_days):
              "after a bigger number of days.")


def _check_validity_periods_results(
        data_period, compress_after_days, delete_after_days):
    if compress_after_days - 2 < data_period:
        fail_hard(
            'For safetly, cleanup/data_files_compress_after_days (%d) must be '
            'at least 2 days larger than general/data_period (%d)',
            compress_after_days, data_period)
    if delete_after_days < compress_after_days:
        fail_hard(
            'cleanup/data_files_delete_after_days (%d) must be the same or '
            'larger than cleanup/data_files_compress_after_days (%d)',
            delete_after_days, compress_after_days)
    if compress_after_days / 2 < data_period:
        log.warning(
            'cleanup/data_files_compress_after_days (%d) is less than twice '
            'general/data_period (%d). For ease of parsing older results '
            'if necessary, it is recommended to make '
            'data_files_compress_after_days at least twice the data_period.',
            compress_after_days, data_period)
    return True


def _clean_v3bw_files(args, conf):
    v3bw_dname = conf.getpath('paths', 'v3bw_dname')
    if not os.path.isdir(v3bw_dname):
@@ -147,13 +125,10 @@ def _clean_result_files(args, conf):
    datadir = conf.getpath('paths', 'datadir')
    if not os.path.isdir(datadir):
        fail_hard('%s does not exist', datadir)
    data_period = conf.getint('general', 'data_period')
    compress_after_days = conf.getint(
        'cleanup', 'data_files_compress_after_days')
    delete_after_days = conf.getint(
        'cleanup', 'data_files_delete_after_days')
    _check_validity_periods_results(
        data_period, compress_after_days, delete_after_days)

    # first delete so that the files to be deleted are not compressed first
    files_to_delete = _get_files_mtime_older_than(
+6 −3
Original line number Diff line number Diff line
@@ -2,7 +2,7 @@ from math import ceil

from sbws.globals import (fail_hard, SBWS_SCALE_CONSTANT, TORFLOW_SCALING,
                          SBWS_SCALING, TORFLOW_BW_MARGIN, PROP276_ROUND_DIG,
                          DAY_SECS, NUM_MIN_RESULTS)
                          DAY_SECS, NUM_MIN_RESULTS, GENERATE_PERIOD)
from sbws.lib.v3bwfile import V3BWFile
from sbws.lib.resultdump import load_recent_results_in_datadir
from argparse import ArgumentDefaultsHelpFormatter
@@ -60,8 +60,9 @@ def gen_parser(sub):
                   help="Number of most significant digits to round bw.")
    p.add_argument('-p', '--secs-recent', default=None, type=int,
                   help="How many secs in the past are results being "
                        "still considered. Note this value will supersede "
                        "data_period in the configuration.")
                        "still considered. Default is {} secs. If not scaling "
                        "as Torflow the default is data_period in the "
                        "configuration.".format(GENERATE_PERIOD))
    p.add_argument('-a', '--secs-away', default=DAY_SECS, type=int,
                   help="How many secs results have to be away from each "
                        "other.")
@@ -90,6 +91,8 @@ def main(args, conf):
        scaling_method = TORFLOW_SCALING
    if args.secs_recent:
        fresh_days = ceil(args.secs_recent / 24 / 60 / 60)
    elif scaling_method == TORFLOW_SCALING:
        fresh_days = ceil(GENERATE_PERIOD / 24 / 60 / 60)
    else:
        fresh_days = conf.getint('general', 'data_period')
    reset_bw_ipv4_changes = conf.getboolean('general', 'reset_bw_ipv4_changes')
Loading