25 lines
990 B
YAML
25 lines
990 B
YAML
# vim: tw=2 sw=2 :
|
|
groups:
|
|
- name: service
|
|
rules:
|
|
- alert: High rate of 500 errors on specific route
|
|
expr: rate(request_time_count{status="500"}[15m]) / ignoring(status) sum without(status) (rate(request_time_count[15m])) > 0.25 and (sum without (status) (rate(request_time_count[1h])) > 1/60)
|
|
for: 5m
|
|
labels:
|
|
severity: important
|
|
annotations:
|
|
summary: "{{ $labels.instance }} has a high rate of 500 errors on route {{ $labels.route }}"
|
|
- alert: High rate of 500 errors on an instance
|
|
expr: sum by(instance) (rate(request_time_count{status="500"}[15m])) / sum by(instance) (rate(request_time_count[15m])) > 0.25
|
|
for: 5m
|
|
labels:
|
|
severity: urgent
|
|
annotations:
|
|
summary: "{{ $labels.instance }} has a high rate of 500 errors"
|
|
- alert: Webhook queue queued up
|
|
expr: srht_webhooks_queue_length > 5
|
|
for: 5m
|
|
labels:
|
|
severity: important
|
|
annotations:
|
|
summary: "{{ $labels.instance }} webhooks have queued up" |