#!/bin/bash
set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
source "$SCRIPT_DIR/config"

# --- dirs from env or sensible defaults ---
PARSER_BASE="${PARSER_HOME:-$SCRIPT_DIR}"
ARCHIVE_DIR="${ARCHIVE_DIR:-$PARSER_BASE/archive}"
DESCRIPTORS_DIR="${DESCRIPTORS_DIR:-$PARSER_BASE/descriptors}"

# PROCESSED_LOG: prefer env/export from config; default to $PARSER_HOME/logs/processed.log
PROCESSED_LOG="${PROCESSED_LOG:-$PARSER_BASE/logs/downloaded.log}"
mkdir -p "$(dirname "$PROCESSED_LOG")"

# --- logging ---
LOG_DIR="$PARSER_BASE/logs"
mkdir -p "$LOG_DIR" "$ARCHIVE_DIR" "$DESCRIPTORS_DIR"
LLOG_FILE="$LOG_DIR/download.log"
exec >>"$LLOG_FILE" 2>&1
log(){ echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"; }
trap 'rc=$?; log "ERROR: failed at line $LINENO (rc=$rc)"; exit $rc' ERR

require_arg(){ [[ $# -ge 1 ]] || { echo "Usage: $0 <YYYY-MM or YYYY-MM-DD>"; exit 2; }; }

# Parse input into YYYY-MM and month window [start, end)
parse_month_window(){
  local in="$1" ym start end
  if [[ "$in" =~ ^[0-9]{4}-[0-9]{2}$ ]]; then
    ym="$in"
  elif [[ "$in" =~ ^[0-9]{4}-[0-9]{2}-[0-9]{2}$ ]]; then
    ym="${in:0:7}"
  else
    echo "ERROR: invalid date/month '$in' (use YYYY-MM or YYYY-MM-DD)" >&2
    exit 2
  fi
  start="${ym}-01"
  end="$(date -d "$start +1 month" +%F)"
  echo "$ym|$start|$end"
}

process_month(){
  local ym="$1" start="$2" end="$3"
  log "Start month=$ym (window: $start .. $end)"

  for p in "${ARCHIVE_PATHS[@]}"; do
    local q="${p##*/}"
    local tar_url tar_file extract_root

    tar_url="$ARCHIVE_URL/$p/$q-$ym.tar.xz"
    tar_file="$ARCHIVE_DIR/$q-$ym.tar.xz"

    if [[ "$p" == *"bridge-descriptors"* ]]; then
      tar_url="$ARCHIVE_URL/$p/bridge-$q-$ym.tar.xz"
      tar_file="$ARCHIVE_DIR/bridge-$q-$ym.tar.xz"
    elif [[ "$p" == "exit-lists" ]]; then
      tar_url="$ARCHIVE_URL/$p/exit-list-$ym.tar.xz"
      tar_file="$ARCHIVE_DIR/exit-list-$ym.tar.xz"
    fi

    # 1) Download tarball into $ARCHIVE_DIR
    if [[ ! -f "$tar_file" ]]; then
      log "Downloading: $tar_url -> $tar_file"
      if wget -q -O "$tar_file" "$tar_url"; then
        log "Downloaded OK: $tar_file ($(du -h "$tar_file" | awk '{print $1}'))"
      else
        log "WARN: Failed to download $tar_url"
        continue
      fi
    else
      log "Using cached archive: $tar_file"
    fi

    # 2) Extract (once) into $ARCHIVE_DIR
    extract_root="${tar_file%.tar.xz}"
    if [[ ! -d "$extract_root" ]]; then
      log "Extracting: $tar_file -> $ARCHIVE_DIR"
      if tar -xf "$tar_file" -C "$ARCHIVE_DIR"; then
        log "Extracted to: $extract_root"
      else
        log "WARN: Failed to extract $tar_file"
        continue
      fi
    else
      log "Already extracted: $extract_root"
    fi

    # 3) Move all extracted files into $DESCRIPTORS_DIR/$p, preserving tree.
    # The tarball is already month-scoped by name; CollecTor zeroes file mtimes
    # for reproducibility, so mtime-based windowing matches nothing.
    local dest_root="$DESCRIPTORS_DIR/$p"
    mkdir -p "$dest_root"

    # Build a NUL-delimited list of paths relative to extract_root
    local listfile
    listfile="$(mktemp)"
    find "$extract_root" -type f -printf '%P\0' >"$listfile"

    if [[ ! -s "$listfile" ]]; then
      log "No files extracted for $p ($ym); skipping move."
      rm -f "$listfile"
      continue
    fi

    # Count files (number of NULs)
    local count
    count="$(tr -cd '\0' <"$listfile" | wc -c | tr -d ' ')"
    log "Moving ${count} files to $dest_root (preserving structure)"

    # Move each file, creating parent directories as needed (no rsync)
    # Read the NUL-delimited list safely
    while IFS= read -r -d '' rel; do
      # rel is like: sub/dir/file.txt (relative to extract_root)
      local src="$extract_root/$rel"
      local dstdir="$dest_root/$(dirname -- "$rel")"
      mkdir -p "$dstdir"
      mv -- "$src" "$dstdir/"
    done < "$listfile"

    # Clean empty dirs left behind under extract_root
    find "$extract_root" -type d -empty -delete || true

    rm -f "$listfile"
  done

  # Atomic write to processed log
  { echo "$ym"; } > "${PROCESSED_LOG}.tmp" && mv -f "${PROCESSED_LOG}.tmp" "$PROCESSED_LOG"
  log "Done month=$ym (written PROCESSED_LOG=$PROCESSED_LOG)"
}

require_arg "$@"
IFS='|' read -r YM START END < <(parse_month_window "$1")
process_month "$YM" "$START" "$END"