Loading rules.d/tpa_textfile.rules +3 −0 Original line number Diff line number Diff line Loading @@ -38,6 +38,7 @@ groups: summary: "Packages pending on {{ $labels.alias }} for a week" description: "There are {{ $value }} pending package upgrades on {{ $labels.alias }} that have not been automatically installed for two days." playbook: "https://gitlab.torproject.org/tpo/tpa/team/-/wikis/howto/upgrades#blocked-upgrades" dashboard: "https://grafana.torproject.org/d/wUmZB05Zk/tpo-overview?viewPanel=panel-22" - alert: ObsoletePackages expr: apt_packages_obsolete_count + on (instance) group_left (version_codename) (0 * node_os_info) > 0 Loading Loading @@ -107,6 +108,7 @@ groups: Found pending kernel or microcode upgrades on {{ $value }} hosts running {{ $labels.version_codename}}. playbook: "https://gitlab.torproject.org/tpo/tpa/team/-/wikis/howto/reboots" dashboard: "https://grafana.torproject.org/d/wUmZB05Zk/tpo-overview?viewPanel=panel-21" # pint ignore/end - alert: OutdatedLibraries Loading @@ -126,6 +128,7 @@ groups: which are using outdated libraries. Those processes should be restarted. playbook: "https://gitlab.torproject.org/tpo/tpa/team/-/wikis/howto/upgrades#special-cases-and-manual-restarts" dashboard: "https://grafana.torproject.org/d/wUmZB05Zk/tpo-overview?viewPanel=panel-39" # Note: this is using a different metric from the similarly named alert in # tpa_blackox.rules. The metric for local cert expiry is produced by the node Loading tests/tpa_textfile.yml +4 −0 Original line number Diff line number Diff line Loading @@ -58,6 +58,7 @@ tests: summary: "Some bookworm servers need to reboot" description: "Found pending kernel or microcode upgrades on 1\nhosts running bookworm.\n" playbook: "https://gitlab.torproject.org/tpo/tpa/team/-/wikis/howto/reboots" dashboard: "https://grafana.torproject.org/d/wUmZB05Zk/tpo-overview?viewPanel=panel-21" - exp_labels: job: "node" severity: "info" Loading @@ -67,6 +68,7 @@ tests: summary: "Some bookworm servers need to reboot" description: "Found pending kernel or microcode upgrades on 2\nhosts running bookworm.\n" playbook: "https://gitlab.torproject.org/tpo/tpa/team/-/wikis/howto/reboots" dashboard: "https://grafana.torproject.org/d/wUmZB05Zk/tpo-overview?viewPanel=panel-21" - exp_labels: job: "node" severity: "info" Loading @@ -76,6 +78,7 @@ tests: summary: "Some bullseye servers need to reboot" description: "Found pending kernel or microcode upgrades on 1\nhosts running bullseye.\n" playbook: "https://gitlab.torproject.org/tpo/tpa/team/-/wikis/howto/reboots" dashboard: "https://grafana.torproject.org/d/wUmZB05Zk/tpo-overview?viewPanel=panel-21" - interval: 1m input_series: Loading @@ -99,6 +102,7 @@ tests: which are using outdated libraries. Those processes should be restarted. playbook: "https://gitlab.torproject.org/tpo/tpa/team/-/wikis/howto/upgrades#special-cases-and-manual-restarts" dashboard: "https://grafana.torproject.org/d/wUmZB05Zk/tpo-overview?viewPanel=panel-39" - interval: 1m input_series: Loading Loading
rules.d/tpa_textfile.rules +3 −0 Original line number Diff line number Diff line Loading @@ -38,6 +38,7 @@ groups: summary: "Packages pending on {{ $labels.alias }} for a week" description: "There are {{ $value }} pending package upgrades on {{ $labels.alias }} that have not been automatically installed for two days." playbook: "https://gitlab.torproject.org/tpo/tpa/team/-/wikis/howto/upgrades#blocked-upgrades" dashboard: "https://grafana.torproject.org/d/wUmZB05Zk/tpo-overview?viewPanel=panel-22" - alert: ObsoletePackages expr: apt_packages_obsolete_count + on (instance) group_left (version_codename) (0 * node_os_info) > 0 Loading Loading @@ -107,6 +108,7 @@ groups: Found pending kernel or microcode upgrades on {{ $value }} hosts running {{ $labels.version_codename}}. playbook: "https://gitlab.torproject.org/tpo/tpa/team/-/wikis/howto/reboots" dashboard: "https://grafana.torproject.org/d/wUmZB05Zk/tpo-overview?viewPanel=panel-21" # pint ignore/end - alert: OutdatedLibraries Loading @@ -126,6 +128,7 @@ groups: which are using outdated libraries. Those processes should be restarted. playbook: "https://gitlab.torproject.org/tpo/tpa/team/-/wikis/howto/upgrades#special-cases-and-manual-restarts" dashboard: "https://grafana.torproject.org/d/wUmZB05Zk/tpo-overview?viewPanel=panel-39" # Note: this is using a different metric from the similarly named alert in # tpa_blackox.rules. The metric for local cert expiry is produced by the node Loading
tests/tpa_textfile.yml +4 −0 Original line number Diff line number Diff line Loading @@ -58,6 +58,7 @@ tests: summary: "Some bookworm servers need to reboot" description: "Found pending kernel or microcode upgrades on 1\nhosts running bookworm.\n" playbook: "https://gitlab.torproject.org/tpo/tpa/team/-/wikis/howto/reboots" dashboard: "https://grafana.torproject.org/d/wUmZB05Zk/tpo-overview?viewPanel=panel-21" - exp_labels: job: "node" severity: "info" Loading @@ -67,6 +68,7 @@ tests: summary: "Some bookworm servers need to reboot" description: "Found pending kernel or microcode upgrades on 2\nhosts running bookworm.\n" playbook: "https://gitlab.torproject.org/tpo/tpa/team/-/wikis/howto/reboots" dashboard: "https://grafana.torproject.org/d/wUmZB05Zk/tpo-overview?viewPanel=panel-21" - exp_labels: job: "node" severity: "info" Loading @@ -76,6 +78,7 @@ tests: summary: "Some bullseye servers need to reboot" description: "Found pending kernel or microcode upgrades on 1\nhosts running bullseye.\n" playbook: "https://gitlab.torproject.org/tpo/tpa/team/-/wikis/howto/reboots" dashboard: "https://grafana.torproject.org/d/wUmZB05Zk/tpo-overview?viewPanel=panel-21" - interval: 1m input_series: Loading @@ -99,6 +102,7 @@ tests: which are using outdated libraries. Those processes should be restarted. playbook: "https://gitlab.torproject.org/tpo/tpa/team/-/wikis/howto/upgrades#special-cases-and-manual-restarts" dashboard: "https://grafana.torproject.org/d/wUmZB05Zk/tpo-overview?viewPanel=panel-39" - interval: 1m input_series: Loading