#!/bin/bash
set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
source "$SCRIPT_DIR/config"

# --- logging (clean log) ---
LOG_DIR="${PARSER_HOME:-$SCRIPT_DIR}/logs"
mkdir -p "$LOG_DIR"
CLEAN_LOG="$LOG_DIR/clean.log"
exec >>"$CLEAN_LOG" 2>&1

log(){ echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"; }

error_trap() {
  local rc=$?
  local line=${BASH_LINENO[0]:-$LINENO}   # line where the error occurred
  local src=${BASH_SOURCE[1]:-${BASH_SOURCE[0]}}
  local cmd=${BASH_COMMAND:-}
  log "ERROR: rc=$rc at ${src}:${line} :: ${cmd}"
  exit "$rc"
}
trap error_trap ERR

# --- helpers ---
parse_month_window(){ # -> echo "YM|START|END"
  local in="$1" ym start end
  if [[ "$in" =~ ^[0-9]{4}-[0-9]{2}$ ]]; then
    ym="$in"
  elif [[ "$in" =~ ^[0-9]{4}-[0-9]{2}-[0-9]{2}$ ]]; then
    ym="${in:0:7}"
  else
    echo "ERROR: invalid date/month '$in' (use YYYY-MM or YYYY-MM-DD)" >&2
    exit 2
  fi
  start="${ym}-01"
  end="$(date -d "$start +1 month" +%F)"
  echo "$ym|$start|$end"
}

rm_maybe(){  # respects DRY_RUN
  if [[ "${DRY_RUN:-0}" == "1" ]]; then echo "[DRY_RUN] rm $*"; else rm "$@"; fi
}
rm_rf_maybe(){
  if [[ "${DRY_RUN:-0}" == "1" ]]; then echo "[DRY_RUN] rm -rf $*"; else rm -rf "$@"; fi
}

# Delete everything under a directory but keep the directory itself
empty_dir_preserve(){
  local dir="$1"
  [[ -d "$dir" ]] || { log "Skip (missing): $dir"; return 0; }
  log "Emptying contents of $dir (preserving the directory)"
  if [[ "${DRY_RUN:-0}" == "1" ]]; then
    find "$dir" -mindepth 1 -depth -print
  else
    # -mindepth 1 keeps the directory; -depth ensures children are removed before parents
    find "$dir" -mindepth 1 -depth -delete
  fi
}

# --- main cleanup ---
cleanup_month(){
  local ym="$1" start="$2" end="$3"
  log "Cleanup month=$ym (window: $start .. $end)"

  # 1) Delete moved/processed files for this month from each destination folder
  for p in "${ARCHIVE_PATHS[@]}"; do
    local dest="$DATA_HOME/$p"
    [[ -d "$dest" ]] || continue
    log "Deleting files in $dest for $ym"
    if [[ "${DRY_RUN:-0}" == "1" ]]; then
      find "$dest" -type f -newermt "$start" ! -newermt "$end" -print
    else
      find "$dest" -type f -newermt "$start" ! -newermt "$end" -delete
    fi
  done

  # 2) Remove extracted archive directories and tarballs for this month
  mkdir -p "$PARSER_HOME/archives"
  for p in "${ARCHIVE_PATHS[@]}"; do
    local q="${p##*/}"
    local bases=("$q-$ym" "bridge-$q-$ym" "exit-list-$ym")
    for base in "${bases[@]}"; do
      local dir="$PARSER_HOME/archives/$base"
      local tar="$PARSER_HOME/archive/$base.tar.xz"
      if [[ -d "$dir" ]]; then log "Deleting extracted $dir"; rm_rf_maybe "$dir"; fi
      if [[ -f "$tar" ]]; then log "Deleting archive $tar"; rm_maybe -f "$tar"; fi
    done
  done

  # 3) Recreate “recent” dirs if your pipeline expects them
  # Use a safe 'is set' check that works with 'set -u'
  if [[ "${RECENT_PATHS+set}" == "set" && ${#RECENT_PATHS[@]} -gt 0 ]]; then
    log "Ensuring recent paths exist"
    for r in "${RECENT_PATHS[@]}"; do
      mkdir -p "$DATA_HOME/$r"
    done
  fi

  # 4) Metrics line (optional; keep for continuity)
  echo "$(date '+%Y-%m-%d %H:%M:%S') cleaned month=$ym" >> "$PARSER_HOME/logs/parser/metrics.log"

  # 5) Final sweep: empty archive(s) and descriptors completely (preserve the folders)
  # NOTE: descriptors live under $PARSER_HOME/descriptors (your DATA_HOME),
  #       tarballs under $PARSER_HOME/archive, extracted under $PARSER_HOME/archives.
  local ARCHIVE_DIR_DEFAULT="$PARSER_HOME/archive"
  local ARCHIVES_DIR_DEFAULT="$PARSER_HOME/archives"
  local DESCRIPTORS_DIR_DEFAULT="$DATA_HOME"

  local ARCHIVE_DIR="${ARCHIVE_DIR:-$ARCHIVE_DIR_DEFAULT}"
  local ARCHIVES_DIR="${ARCHIVES_DIR:-$ARCHIVES_DIR_DEFAULT}"
  local DESCRIPTORS_DIR="${DESCRIPTORS_DIR:-$DESCRIPTORS_DIR_DEFAULT}"

  empty_dir_preserve "$ARCHIVE_DIR"
  empty_dir_preserve "$ARCHIVES_DIR"
  empty_dir_preserve "$DESCRIPTORS_DIR"

  log "Cleanup finished for month=$ym"
}

IFS='|' read -r YM START END < <(parse_month_window "${1:-$(date +%Y-%m)}")
cleanup_month "$YM" "$START" "$END"
