Verified Commit 56a7fe90 authored by anarcat's avatar anarcat 💥
Browse files

extend udldap refresh checks

When I have a hour-long downtime at home (which happens), this trips
needlessly.

This is just a sanity check, it's fine to wait a day.
parent 524d63a4
Loading
Loading
Loading
Loading
+3 −4
Original line number Diff line number Diff line
@@ -321,13 +321,12 @@ groups:
      playbook: "TODO"

  - alert: UDLdapLastRefreshTooOld
    expr: (time() - node_path_modification_timestamp_seconds) > 60*60
    expr: (time() - node_path_modification_timestamp_seconds) > 24*60*60
    labels:
      severity: warning
    annotations:
      summary: "userdir-ldap has not refreshed correctly on {{ $labels.alias }} for more than 1h"
      summary: "userdir-ldap has not refreshed correctly on {{ $labels.alias }} for too long"
      description: |
        On {{ $labels.alias }} the file {{ $labels.path }} has
        a last modification date that's one hour or more in the past. The userdir-ldap
        replication should be verified.
        a last modification date that is {{ $value | humanizeDuration }} old. The userdir-ldap replication should be verified.
      playbook: "TODO"
+3 −4
Original line number Diff line number Diff line
@@ -659,7 +659,7 @@ tests:
    input_series:
      # file too old, firing
      - series: 'node_path_modification_timestamp_seconds{alias="submit-01.torproject.org",instance="submit-01.torproject.org:9100",job="node",path="/var/lib/misc/thishost/last_update.trace",team="TPA"}'
        values: '-3601'
        values: '-86401'
      # file is recent enough, not firing
      - series: 'node_path_modification_timestamp_seconds{alias="web-fsn-02.torproject.org",instance="web-fsn-02.torproject.org:9100",job="node",path="/var/lib/misc/thishost/last_update.trace",team="TPA"}'
        values: '-150'
@@ -675,9 +675,8 @@ tests:
              job: "node"
              path: "/var/lib/misc/thishost/last_update.trace"
            exp_annotations:
              summary: "userdir-ldap has not refreshed correctly on submit-01.torproject.org for more than 1h"
              summary: "userdir-ldap has not refreshed correctly on submit-01.torproject.org for too long"
              description: |
                On submit-01.torproject.org the file /var/lib/misc/thishost/last_update.trace has
                a last modification date that's one hour or more in the past. The userdir-ldap
                replication should be verified.
                a last modification date that is 1d 0h 0m 1s old. The userdir-ldap replication should be verified.
              playbook: "TODO"