Loading rules.d/tpa_postgresql.rules +8 −6 Original line number Diff line number Diff line Loading @@ -40,12 +40,12 @@ groups: - alert: PgConnectionsSaturation expr: | ( sum(pg_settings_max_connections) by (alias) sum(pg_settings_max_connections) by (job, instance, alias, team) - ( sum(pg_settings_superuser_reserved_connections) by (alias) + sum(pg_stat_activity_count) by (alias) sum(pg_settings_superuser_reserved_connections) by (job, instance, alias, team) + sum(pg_stat_activity_count) by (job, instance, alias, team) ) ) / sum(pg_settings_max_connections) by (alias) * 100 < 10 ) / sum(pg_settings_max_connections) by (job, instance, alias, team) * 100 < 10 for: 1h labels: severity: warning Loading Loading @@ -91,7 +91,8 @@ groups: # # # matching: metricsdb-01, constantly hitting this # - alert: PgSlowQueries # expr: sum(pg_stat_activity_max_tx_duration) by (alias) > 1 # expr: sum(pg_stat_activity_max_tx_duration) by (job, instance, # alias, team) > 1 # labels: # severity: warning # annotations: Loading @@ -104,7 +105,8 @@ groups: # # # other nice queries to look into: # # deadlocks, missing a playbook, no signal in the last week # sum(rate(pg_stat_database_deadlocks[5m])) by (alias) # sum(rate(pg_stat_database_deadlocks[5m])) by (job, instance, # alias, team) # # # too many rollbacks, no playbook, not sure is useful # rate(pg_stat_database_xact_rollback{datname!~"template.*"}[5m]) / rate(pg_stat_database_xact_commit{datname!~"template.*"}[5m]) > 0.02 Loading
rules.d/tpa_postgresql.rules +8 −6 Original line number Diff line number Diff line Loading @@ -40,12 +40,12 @@ groups: - alert: PgConnectionsSaturation expr: | ( sum(pg_settings_max_connections) by (alias) sum(pg_settings_max_connections) by (job, instance, alias, team) - ( sum(pg_settings_superuser_reserved_connections) by (alias) + sum(pg_stat_activity_count) by (alias) sum(pg_settings_superuser_reserved_connections) by (job, instance, alias, team) + sum(pg_stat_activity_count) by (job, instance, alias, team) ) ) / sum(pg_settings_max_connections) by (alias) * 100 < 10 ) / sum(pg_settings_max_connections) by (job, instance, alias, team) * 100 < 10 for: 1h labels: severity: warning Loading Loading @@ -91,7 +91,8 @@ groups: # # # matching: metricsdb-01, constantly hitting this # - alert: PgSlowQueries # expr: sum(pg_stat_activity_max_tx_duration) by (alias) > 1 # expr: sum(pg_stat_activity_max_tx_duration) by (job, instance, # alias, team) > 1 # labels: # severity: warning # annotations: Loading @@ -104,7 +105,8 @@ groups: # # # other nice queries to look into: # # deadlocks, missing a playbook, no signal in the last week # sum(rate(pg_stat_database_deadlocks[5m])) by (alias) # sum(rate(pg_stat_database_deadlocks[5m])) by (job, instance, # alias, team) # # # too many rollbacks, no playbook, not sure is useful # rate(pg_stat_database_xact_rollback{datname!~"template.*"}[5m]) / rate(pg_stat_database_xact_commit{datname!~"template.*"}[5m]) > 0.02