Files
ilnmors-homelab/config/services/containers/infra/prometheus/etc/rules.yaml.j2
2026-03-15 04:41:02 +09:00

39 lines
1.4 KiB
Django/Jinja

groups:
- name: node_exporters_heartbeat
rules:
{% for instance in ['vmm', 'fw', 'infra', 'auth', 'app'] %}
- alert: {{ instance }}_node_exporter_down
expr: |
(present_over_time(up{instance="{{ instance }}"}[5m]) or on() vector(0)) == 0
for: 30s
labels:
severity: critical
annotations:
summary: "Exporter heartbeat is down: {{ instance }}"
description: "{{ instance }} exporter is down for 5 mins"
{% endfor %}
- name: postgresql_heartbeat
rules:
- alert: Postgresql_Down
expr: |
(present_over_time(pg_up{instance="infra", job="postgres"}[5m]) or on() vector(0)) == 0
for: 30s
labels:
severity: critical
annotations:
summary: "Postgresql Heartbeat Lost: postgresql"
description: "postgresql node is down for 5 mins."
- name: Certificate_expiry_check
rules:
{% for filename in ['root.crt', 'intermediate.crt', 'crowdsec.crt', 'blocky.crt', 'postgresql.crt', 'ldap.crt', 'prometheus.crt', 'loki.crt', 'dsm.crt'] %}
- alert: {{ filename | replace('.', '_') }}_is_expired_soon
expr: |
max(x509_cert_not_after{filename="{{ filename }}"}) - time() < 2592000
for: 1d
labels:
severity: critical
annotations:
summary: "{{ filename }} is expired in 30 days"
description: "{{ filename }} is expired in 30 days."
{% endfor %}