groups: - name: node_exporters_heartbeat rules: {% for instance in ['vmm', 'fw', 'infra', 'auth', 'app'] %} - alert: {{ instance }}_node_exporter_down expr: | (present_over_time(up{instance="{{ instance }}"}[5m]) or on() vector(0)) == 0 for: 30s labels: severity: critical annotations: summary: "Exporter heartbeat is down: {{ instance }}" description: "{{ instance }} exporter is down for 5 mins" {% endfor %} - name: postgresql_heartbeat rules: - alert: Postgresql_Down expr: | (present_over_time(pg_up{instance="infra", job="postgres"}[5m]) or on() vector(0)) == 0 for: 30s labels: severity: critical annotations: summary: "Postgresql Heartbeat Lost: postgresql" description: "postgresql node is down for 5 mins." - name: Certificate_expiry_check rules: {% for filename in ['root.crt', 'intermediate.crt', 'crowdsec.crt', 'blocky.crt', 'postgresql.crt', 'ldap.crt', 'prometheus.crt', 'loki.crt', 'dsm.crt'] %} - alert: {{ filename | replace('.', '_') }}_is_expired_soon expr: | max(x509_cert_not_after{filename="{{ filename }}"}) - time() < 2592000 for: 1d labels: severity: critical annotations: summary: "{{ filename }} is expired in 30 days" description: "{{ filename }} is expired in 30 days." {% endfor %}