fix(pacakging): fix cpu/memory metrics when running inside LXC container as systemd service (#14255)
Fixes https://github.com/netdata/netdata/issues/14238
This commit is contained in:
parent
7d0ca0b83e
commit
a14a21f90f
|
@ -98,6 +98,41 @@ static void proc_main_cleanup(void *ptr)
|
|||
worker_unregister();
|
||||
}
|
||||
|
||||
bool inside_lxc_container = false;
|
||||
|
||||
static bool is_lxcfs_proc_mounted() {
|
||||
procfile *ff = NULL;
|
||||
|
||||
if (unlikely(!ff)) {
|
||||
char filename[FILENAME_MAX + 1];
|
||||
snprintfz(filename, FILENAME_MAX, "/proc/self/mounts");
|
||||
ff = procfile_open(filename, " \t", PROCFILE_FLAG_DEFAULT);
|
||||
if (unlikely(!ff))
|
||||
return false;
|
||||
}
|
||||
|
||||
ff = procfile_readall(ff);
|
||||
if (unlikely(!ff))
|
||||
return false;
|
||||
|
||||
unsigned long l, lines = procfile_lines(ff);
|
||||
|
||||
for (l = 0; l < lines; l++) {
|
||||
size_t words = procfile_linewords(ff, l);
|
||||
if (words < 2) {
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(procfile_lineword(ff, l, 0), "lxcfs") && !strncmp(procfile_lineword(ff, l, 1), "/proc", 5)) {
|
||||
procfile_close(ff);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
procfile_close(ff);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void *proc_main(void *ptr)
|
||||
{
|
||||
worker_register("PROC");
|
||||
|
@ -128,6 +163,8 @@ void *proc_main(void *ptr)
|
|||
heartbeat_t hb;
|
||||
heartbeat_init(&hb);
|
||||
|
||||
inside_lxc_container = is_lxcfs_proc_mounted();
|
||||
|
||||
while (service_running(SERVICE_COLLECTORS)) {
|
||||
worker_is_idle();
|
||||
usec_t hb_dt = heartbeat_next(&hb, step);
|
||||
|
|
|
@ -48,6 +48,7 @@ int get_numa_node_count(void);
|
|||
|
||||
// metrics that need to be shared among data collectors
|
||||
extern unsigned long long zfs_arcstats_shrinkable_cache_size_bytes;
|
||||
extern bool inside_lxc_container;
|
||||
|
||||
// netdev renames
|
||||
void netdev_rename_device_add(
|
||||
|
|
|
@ -158,9 +158,11 @@ int do_proc_meminfo(int update_every, usec_t dt) {
|
|||
unsigned long long MemCached = Cached + SReclaimable - Shmem;
|
||||
unsigned long long MemUsed = MemTotal - MemFree - MemCached - Buffers;
|
||||
// The Linux kernel doesn't report ZFS ARC usage as cache memory (the ARC is included in the total used system memory)
|
||||
MemCached += (zfs_arcstats_shrinkable_cache_size_bytes / 1024);
|
||||
MemUsed -= (zfs_arcstats_shrinkable_cache_size_bytes / 1024);
|
||||
MemAvailable += (zfs_arcstats_shrinkable_cache_size_bytes / 1024);
|
||||
if (!inside_lxc_container) {
|
||||
MemCached += (zfs_arcstats_shrinkable_cache_size_bytes / 1024);
|
||||
MemUsed -= (zfs_arcstats_shrinkable_cache_size_bytes / 1024);
|
||||
MemAvailable += (zfs_arcstats_shrinkable_cache_size_bytes / 1024);
|
||||
}
|
||||
|
||||
if(do_ram) {
|
||||
{
|
||||
|
|
|
@ -217,6 +217,9 @@ if [ -n "${lscpu}" ] && lscpu > /dev/null 2>&1; then
|
|||
LCPU_COUNT="$(echo "${lscpu_output}" | grep "^CPU(s):" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')"
|
||||
CPU_VENDOR="$(echo "${lscpu_output}" | grep "^Vendor ID:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')"
|
||||
CPU_MODEL="$(echo "${lscpu_output}" | grep "^Model name:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')"
|
||||
if grep -q "^lxcfs /proc" /proc/self/mounts 2>/dev/null && count=$(grep -c ^processor /proc/cpuinfo 2>/dev/null); then
|
||||
LCPU_COUNT="$count"
|
||||
fi
|
||||
possible_cpu_freq="$(echo "${lscpu_output}" | grep -F "CPU max MHz:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' | grep -o '^[0-9]*')"
|
||||
if [ -z "$possible_cpu_freq" ]; then
|
||||
possible_cpu_freq="$(echo "${lscpu_output}" | grep -F "CPU MHz:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' | grep -o '^[0-9]*')"
|
||||
|
@ -437,7 +440,7 @@ CLOUD_INSTANCE_TYPE="unknown"
|
|||
CLOUD_INSTANCE_REGION="unknown"
|
||||
|
||||
if [ "${VIRTUALIZATION}" != "none" ] && command -v curl > /dev/null 2>&1; then
|
||||
# Returned HTTP status codes: GCP is 200, AWS is 200, DO is 404.
|
||||
# Returned HTTP status codes: GCP is 200, AWS is 200, DO is 404.
|
||||
curl --fail -s -m 1 --noproxy "*" http://169.254.169.254 >/dev/null 2>&1
|
||||
ret=$?
|
||||
# anything but operation timeout.
|
||||
|
|
|
@ -71,6 +71,10 @@ ProtectControlGroups=on
|
|||
ReadWriteDirectories=/run/netdata
|
||||
# This is needed to make email-based alert deliver work if Postfix is the email provider on the system.
|
||||
ReadWriteDirectories=-/var/spool/postfix/maildrop
|
||||
# LXCFS directories (https://github.com/lxc/lxcfs#lxcfs)
|
||||
# If we don't set them explicitly, systemd mounts procfs from the host. See https://github.com/netdata/netdata/issues/14238.
|
||||
BindReadOnlyPaths=-/proc/cpuinfo -/proc/diskstats -/proc/loadavg -/proc/meminfo
|
||||
BindReadOnlyPaths=-/proc/stat -/proc/swaps -/proc/uptime -/proc/slabinfo
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
|
Loading…
Reference in New Issue