From cf7a64dc85af237cb2dc6fe308313ee31a3adda2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= <anarcat@debian.org>
Date: Tue, 24 Sep 2024 10:31:16 -0400
Subject: [PATCH] clarify missing base warning

---
 howto/postgresql.md | 68 ++++++++++++++++++++++++++-------------------
 1 file changed, 39 insertions(+), 29 deletions(-)

diff --git a/howto/postgresql.md b/howto/postgresql.md
index e6553e1d..616bd252 100644
--- a/howto/postgresql.md
+++ b/howto/postgresql.md
@@ -1144,46 +1144,56 @@ untested.
 
 If you get a Nagios warning like this:
 
-    [fasolo, dak] MISSING-BASE: dak.BASE.backuphost.debian.org-20180211-012002-fasolo.debian.org-dak-9.6-backup.tar.gz
+    [weather-01, main] MISSING-BASE: main.BASE.bungei.torproject.org-20240924-141251-weather-01.torproject.org-main-15-backup.tar.gz
 
 This means that we started doing a base backup (as witnessed by a
 .backup file next to a WAL), but for some reason we don't have the
 corresponding base file.
 
-    root@backuphost:/srv/backups/pg/fasolo# ls -l *backup*
-    -rw------- 1 debbackup debbackup 9201093916 Jan 14 06:18 dak.BASE.backuphost.debian.org-20180114-012001-fasolo.debian.org-dak-9.6-backup.tar.gz
-    -rw------- 1 debbackup debbackup 9227651542 Jan 21 06:25 dak.BASE.backuphost.debian.org-20180121-012001-fasolo.debian.org-dak-9.6-backup.tar.gz
-    -rw------- 1 debbackup debbackup 9266306750 Jan 28 07:59 dak.BASE.backuphost.debian.org-20180128-012001-fasolo.debian.org-dak-9.6-backup.tar.gz
-    -rw------- 1 debbackup debbackup 9312602089 Feb  5 11:00 dak.BASE.backuphost.debian.org-20180204-012001-fasolo.debian.org-dak-9.6-backup.tar.gz
-    -rw------- 1 debbackup debbackup 9346830509 Feb 12 10:25 dak.BASE.backuphost.debian.org-20180212-094930-fasolo.debian.org-dak-9.6-backup.tar.gz
-    -rw------- 1 debbackup debbackup        353 Jan 14 06:18 dak.WAL.0000000100000033000000A6.00000028.backup
-    -rw------- 1 debbackup debbackup        350 Jan 20 11:20 dak.WAL.00000001000000350000008C.00000028.backup
-    -rw------- 1 debbackup debbackup        353 Jan 21 06:25 dak.WAL.000000010000003600000068.00000028.backup
-    -rw------- 1 debbackup debbackup        353 Jan 28 07:59 dak.WAL.0000000100000038000000E3.00000028.backup
-    -rw------- 1 debbackup debbackup        353 Feb  5 11:00 dak.WAL.000000010000003B00000090.00000028.backup
-    -rw------- 1 debbackup debbackup        350 Feb  5 15:49 dak.WAL.000000010000003B0000009B.00000108.backup
-    -rw------- 1 debbackup debbackup        353 Feb 11 10:09 dak.WAL.000000010000003D000000AC.00000028.backup
-    -rw------- 1 debbackup debbackup        353 Feb 12 10:25 dak.WAL.000000010000003E00000027.00000178.backup
+```
+root@bungei:/srv/backups/pg/weather-01# ls -al *backup*
+-rw------- 1 torbackup torbackup 7688112 Aug 30 16:22 main.BASE.bungei.torproject.org-20240830-162218-weather-01.torproject.org-main-15-backup.tar.gz
+-rw------- 1 torbackup torbackup 7733484 Sep  8 16:46 main.BASE.bungei.torproject.org-20240908-164631-weather-01.torproject.org-main-15-backup.tar.gz
+-rw------- 1 torbackup torbackup 7774825 Sep 17 05:15 main.BASE.bungei.torproject.org-20240917-051512-weather-01.torproject.org-main-15-backup.tar.gz
+-rw------- 1 torbackup torbackup 7776596 Sep 24 14:26 main.BASE.bungei.torproject.org-20240924-142622-weather-01.torproject.org-main-15-backup.tar.gz
+-rw------- 1 torbackup torbackup     397 Aug 30 16:22 main.WAL.0000000100000017000000B5.00000028.backup
+-rw------- 1 torbackup torbackup     397 Sep  8 16:46 main.WAL.00000001000000180000008B.00000028.backup
+-rw------- 1 torbackup torbackup     397 Sep 17 05:15 main.WAL.000000010000001900000056.00000028.backup
+-rw------- 1 torbackup torbackup     397 Sep 24 14:25 main.WAL.0000000100000019000000F2.00000028.backup
+-rw------- 1 torbackup torbackup     397 Sep 24 14:26 main.WAL.0000000100000019000000F5.00000028.backup
+```
 
 `.backup` files are created on the postgres server and shipped to the
 backup hosts whenever a base backup is initiated. We do some
 labelling, so we know which backup host the corresponding tarball
 should end up with. For example:
 
-    root@backuphost:/srv/backups/pg/fasolo# cat dak.WAL.000000010000003B00000090.00000028.backup
-    START WAL LOCATION: 3B/90000028 (file 000000010000003B00000090)
-    STOP WAL LOCATION: 3B/97CF2138 (file 000000010000003B00000097)
-    CHECKPOINT LOCATION: 3B/90000098
-    BACKUP METHOD: streamed
-    BACKUP FROM: master
-    START TIME: 2018-02-05 10:25:28 UTC
-    LABEL: backuphost.debian.org-20180204-012001-fasolo.debian.org-dak-9.6-backup
-    STOP TIME: 2018-02-05 10:59:50 UTC
-
-To fix this, verify we have a later base tarball, or that we are fine
-for some other reason, and remove the corresponding `.backup` file from
-the backup host. In the case above, we would remove
-`dak.WAL.000000010000003D000000AC.00000028.backup`.
+```
+root@bungei:/srv/backups/pg/weather-01# cat main.WAL.0000000100000017000000B5.00000028.backup 
+START WAL LOCATION: 17/B5000028 (file 0000000100000017000000B5)
+STOP WAL LOCATION: 17/B5000100 (file 0000000100000017000000B5)
+CHECKPOINT LOCATION: 17/B5000060
+BACKUP METHOD: streamed
+BACKUP FROM: primary
+START TIME: 2024-08-30 16:22:20 UTC
+LABEL: bungei.torproject.org-20240830-162218-weather-01.torproject.org-main-15-backup
+START TIMELINE: 1
+STOP TIME: 2024-08-30 16:22:21 UTC
+STOP TIMELINE: 1
+```
+
+So in this case we need to find the spurious `.backup` file and
+delete it.
+
+To find the exact file, grep for part of the file mentioned in the
+warning, like:
+
+    root@bungei:/srv/backups/pg/weather-01# grep bungei.torproject.org-20240924-141251-weather-01.torproject.org *.backup
+    main.WAL.0000000100000019000000F2.00000028.backup:LABEL: bungei.torproject.org-20240924-141251-weather-01.torproject.org-main-15-backup
+
+Then delete it:
+
+    rm /srv/backups/pg/weather-01/main.WAL.0000000100000019000000F2.00000028.backup
 
 ### WAL-MISSING-AFTER
 
-- 
GitLab