Skip to content
Snippets Groups Projects
Commit 359f87a3 authored by juga's avatar juga Committed by Matt Traudt
Browse files

Revert "Refactor to clean v3bw files too"

This reverts commit 380461e56bd87bd2709abe591b6f4b11e1481e35.
parent e8bc2d71
Branches
Tags
No related merge requests found
......@@ -10,8 +10,6 @@ import shutil
import logging
import time
from sbws.util.timestamp import unixts_to_dt_obj
log = logging.getLogger(__name__)
......@@ -29,21 +27,17 @@ def gen_parser(sub):
p.add_argument('--dry-run', action='store_true',
help='Don\'t actually compress or delete anything')
p.add_argument('--v3bw', action='store_true', help='Clean also v3bw files')
def _get_older_files_than(dname, time_delta, extensions, is_v3bw=False):
def _get_older_files_than(dname, num_days_ago, extensions):
assert os.path.isdir(dname)
assert isinstance(time_delta, int)
assert isinstance(num_days_ago, int)
assert isinstance(extensions, list)
for ext in extensions:
assert isinstance(ext, str)
assert ext[0] == '.'
# Determine oldest allowed date
today = datetime.utcfromtimestamp(time.time())
oldest_day = today - timedelta(days=time_delta)
if is_v3bw:
oldest = today - timedelta(minutes=time_delta)
oldest_day = today - timedelta(days=num_days_ago)
# Compile a regex that can extract a date from a file name that looks like
# /path/to/foo/YYYY-MM-DD*.extension
extensions = [re.escape(e) for e in extensions]
......@@ -56,52 +50,38 @@ def _get_older_files_than(dname, time_delta, extensions, is_v3bw=False):
for root, dirs, files in os.walk(dname):
for f in files:
fname = os.path.join(root, f)
if is_v3bw: # or (v3bw_ext not in fname)
# not forcing files to have correct names just the extension
_, ext = os.path.splitext(fname)
if ext not in ['.v3bw']:
log.debug('Ignoring %s because it doesn\'t have extension '
'%s', fname, ext)
continue
dt = unixts_to_dt_obj(os.path.getmtime(fname))
if dt < oldest and os.path.splitext:
yield fname
else:
match = regex.match(fname)
if not match:
log.debug('Ignoring %s because it doesn\'t look like '
'YYYY-MM-DD', fname)
continue
d = datetime(*[int(n) for n in match.group(1).split('-')])
if d < oldest_day:
yield fname
def _remove_rotten_files(datadir, rotten_days, dry_run=True, is_v3bw=False):
match = regex.match(fname)
if not match:
log.debug('Ignoring %s because it doesn\'t look like '
'YYYY-MM-DD', fname)
continue
d = datetime(*[int(n) for n in match.group(1).split('-')])
if d < oldest_day:
yield fname
def _remove_rotten_files(datadir, rotten_days, dry_run=True):
assert os.path.isdir(datadir)
assert isinstance(rotten_days, int)
# Hold the lock for basically the entire time just in case someone else
# moves files between when we get the list of files and when we try to
# delete them.
exts = ['.txt', '.txt.gz'] if not is_v3bw else ['.v3bw']
with DirectoryLock(datadir):
for fname in _get_older_files_than(datadir, rotten_days, exts,
is_v3bw):
for fname in _get_older_files_than(datadir, rotten_days,
['.txt', '.txt.gz']):
log.info('Deleting %s', fname)
if not dry_run:
os.remove(fname)
def _compress_stale_files(datadir, stale_days, dry_run=True, is_v3bw=False):
def _compress_stale_files(datadir, stale_days, dry_run=True):
assert os.path.isdir(datadir)
assert isinstance(stale_days, int)
# Hold the lock for basically the entire time just in case someone else
# moves files between when we get the list of files and when we try to
# compress them.
exts = ['.txt', '.txt.gz'] if not is_v3bw else ['.v3bw']
with DirectoryLock(datadir):
for fname in _get_older_files_than(datadir, stale_days, exts,
is_v3bw):
for fname in _get_older_files_than(datadir, stale_days, ['.txt']):
log.info('Compressing %s', fname)
if dry_run:
continue
......@@ -112,24 +92,6 @@ def _compress_stale_files(datadir, stale_days, dry_run=True, is_v3bw=False):
os.remove(fname)
def _check_validity_periods(valid, stale, rotten):
if stale - 2 < valid:
fail_hard('For safetly, cleanup/stale_* (%d) must be at least 2 '
'days larger than general/data_period or general/valid_ * '
'(%d)', stale, valid)
if rotten < stale:
fail_hard('cleanup/rotten_* (%d) must be the same or larger than '
'cleanup/stale_* (%d)', rotten, stale)
if stale / 2 < valid:
log.warning(
'cleanup/stale_ (%d) is less than twice '
'general/data_period or general/valid_*(%d). '
'For ease of parsing older results '
'if necessary, it is recommended to make stale at least '
'twice the data_period.', stale, valid)
def main(args, conf):
'''
Main entry point in to the cleanup command.
......@@ -164,16 +126,3 @@ def main(args, conf):
_remove_rotten_files(datadir, rotten_days, dry_run=args.dry_run)
_compress_stale_files(datadir, stale_days, dry_run=args.dry_run)
if args.v3bw:
v3bw_dir = conf['paths']['v3bw_dname']
if not os.path.isdir(datadir):
fail_hard('%s does not exist', v3bw_dir)
valid = conf.getint('general', 'valid_mins_v3bw_files')
stale = conf.getint('cleanup', 'stale_mins_v3bw_files')
rotten = conf.getint('cleanup', 'rotten_mins_v3bw_files')
_check_validity_periods(valid, stale, rotten)
_remove_rotten_files(v3bw_dir, rotten, dry_run=args.dry_run,
is_v3bw=True)
_compress_stale_files(v3bw_dir, stale, dry_run=args.dry_run,
is_v3bw=True)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment