From bd106ed9845ec4e308bbf5eb451fe396d5396e34 Mon Sep 17 00:00:00 2001 From: Ilya Mashchenko Date: Thu, 2 Feb 2023 17:01:43 +0200 Subject: [PATCH] fix kubelet alarms (#14414) --- health/health.d/kubelet.conf | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/health/health.d/kubelet.conf b/health/health.d/kubelet.conf index c2778cc5ee..428b6ee917 100644 --- a/health/health.d/kubelet.conf +++ b/health/health.d/kubelet.conf @@ -9,7 +9,7 @@ class: Errors type: Kubernetes component: Kubelet - calc: $kubelet_node_config_error + calc: $experiencing_error units: bool every: 10s warn: $this == 1 @@ -20,12 +20,12 @@ component: Kubelet # Failed Token() requests to the alternate token source template: kubelet_token_requests - lookup: sum -10s of token_fail_count on: k8s_kubelet.kubelet_token_requests class: Errors type: Kubernetes component: Kubelet - units: failed requests + lookup: sum -10s of failed + units: requests every: 10s warn: $this > 0 delay: down 1m multiplier 1.5 max 2h @@ -35,11 +35,11 @@ component: Kubelet # Docker and runtime operation errors template: kubelet_operations_error - lookup: sum -1m on: k8s_kubelet.kubelet_operations_errors class: Errors type: Kubernetes component: Kubelet + lookup: sum -1m units: errors every: 10s warn: $this > (($status >= $WARNING) ? (0) : (20)) @@ -67,7 +67,7 @@ component: Kubelet class: Latency type: Kubernetes component: Kubelet - lookup: average -1m unaligned of kubelet_pleg_relist_latency_05 + lookup: average -1m unaligned of 0.5 units: microseconds every: 10s info: average Pod Lifecycle Event Generator relisting latency over the last minute (quantile 0.5) @@ -77,7 +77,7 @@ component: Kubelet class: Latency type: Kubernetes component: Kubelet - lookup: average -10s unaligned of kubelet_pleg_relist_latency_05 + lookup: average -10s unaligned of 0.5 calc: $this * 100 / (($kubelet_1m_pleg_relist_latency_quantile_05 < 1000)?(1000):($kubelet_1m_pleg_relist_latency_quantile_05)) every: 10s units: % @@ -95,7 +95,7 @@ component: Kubelet class: Latency type: Kubernetes component: Kubelet - lookup: average -1m unaligned of kubelet_pleg_relist_latency_09 + lookup: average -1m unaligned of 0.9 units: microseconds every: 10s info: average Pod Lifecycle Event Generator relisting latency over the last minute (quantile 0.9) @@ -105,7 +105,7 @@ component: Kubelet class: Latency type: Kubernetes component: Kubelet - lookup: average -10s unaligned of kubelet_pleg_relist_latency_09 + lookup: average -10s unaligned of 0.9 calc: $this * 100 / (($kubelet_1m_pleg_relist_latency_quantile_09 < 1000)?(1000):($kubelet_1m_pleg_relist_latency_quantile_09)) every: 10s units: % @@ -123,7 +123,7 @@ component: Kubelet class: Latency type: Kubernetes component: Kubelet - lookup: average -1m unaligned of kubelet_pleg_relist_latency_099 + lookup: average -1m unaligned of 0.99 units: microseconds every: 10s info: average Pod Lifecycle Event Generator relisting latency over the last minute (quantile 0.99) @@ -133,7 +133,7 @@ component: Kubelet class: Latency type: Kubernetes component: Kubelet - lookup: average -10s unaligned of kubelet_pleg_relist_latency_099 + lookup: average -10s unaligned of 0.99 calc: $this * 100 / (($kubelet_1m_pleg_relist_latency_quantile_099 < 1000)?(1000):($kubelet_1m_pleg_relist_latency_quantile_099)) every: 10s units: %