Workers utilization charts (#12807)
* initial version of worker utilization * working example * without mutexes * monitoring DBENGINE, ACLKSYNC, WEB workers * added charts to monitor worker usage * fixed charts units * updated contexts * updated priorities * added documentation * converted threads to stacked chart * One query per query thread * Revert "One query per query thread" This reverts commit 6aeb391f5987c3c6ba2864b559fd7f0cd64b14d3. * fixed priority for web charts * read worker cpu utilization from proc * read workers cpu utilization via /proc/self/task/PID/stat, so that we have cpu utilization even when the jobs are too long to finish within our update_every frequency * disabled web server cpu utilization monitoring - it is now monitored by worker utilization * tight integration of worker utilization to web server * monitoring statsd worker threads * code cleanup and renaming of variables * contrained worker and statistics conflict to just one variable * support for rendering jobs per type * better priorities and removed the total jobs chart * added busy time in ms per job type * added proc.plugin monitoring, switch clock to MONOTONIC_RAW if available, global statistics now cleans up old worker threads * isolated worker thread families * added cgroups.plugin workers * remove unneeded dimensions when then expected worker is just one * plugins.d and streaming monitoring * rebased; support worker_is_busy() to be called one after another * added diskspace plugin monitoring * added tc.plugin monitoring * added ML threads monitoring * dont create dimensions and charts that are not needed * fix crash when job types are added on the fly * added timex and idlejitter plugins; collected heartbeat statistics; reworked heartbeat according to the POSIX * the right name is heartbeat for this chart * monitor streaming senders * added streaming senders to global stats * prevent division by zero * added clock_init() to external C plugins * added freebsd and macos plugins * added freebsd and macos to global statistics * dont use new as a variable; address compiler warnings on FreeBSD and MacOS * refactored contexts to be unique; added health threads monitoring Co-authored-by: Stelios Fragkakis <52996999+stelfrag@users.noreply.github.com>
This commit is contained in:
parent
0b3ee50c76
commit
eb216a1f4b
|
@ -410,6 +410,8 @@ set(LIBNETDATA_FILES
|
|||
libnetdata/string/utf8.h
|
||||
libnetdata/socket/security.c
|
||||
libnetdata/socket/security.h
|
||||
libnetdata/worker_utilization/worker_utilization.c
|
||||
libnetdata/worker_utilization/worker_utilization.h
|
||||
libnetdata/circular_buffer/circular_buffer.c
|
||||
libnetdata/circular_buffer/circular_buffer.h)
|
||||
|
||||
|
|
|
@ -187,6 +187,8 @@ LIBNETDATA_FILES = \
|
|||
libnetdata/health/health.c \
|
||||
libnetdata/health/health.h \
|
||||
libnetdata/string/utf8.h \
|
||||
libnetdata/worker_utilization/worker_utilization.c \
|
||||
libnetdata/worker_utilization/worker_utilization.h \
|
||||
$(NULL)
|
||||
|
||||
if ENABLE_PLUGIN_EBPF
|
||||
|
|
|
@ -351,6 +351,8 @@ static void aclk_query_process_msg(struct aclk_query_thread *query_thr, aclk_que
|
|||
{
|
||||
for (int i = 0; aclk_query_handlers[i].type != UNKNOWN; i++) {
|
||||
if (aclk_query_handlers[i].type == query->type) {
|
||||
worker_is_busy(i);
|
||||
|
||||
debug(D_ACLK, "Processing Queued Message of type: \"%s\"", aclk_query_handlers[i].name);
|
||||
aclk_query_handlers[i].fnc(query_thr, query);
|
||||
if (aclk_stats_enabled) {
|
||||
|
@ -361,6 +363,8 @@ static void aclk_query_process_msg(struct aclk_query_thread *query_thr, aclk_que
|
|||
ACLK_STATS_UNLOCK;
|
||||
}
|
||||
aclk_query_free(query);
|
||||
|
||||
worker_is_idle();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -378,21 +382,33 @@ int aclk_query_process_msgs(struct aclk_query_thread *query_thr)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void worker_aclk_register(void) {
|
||||
worker_register("ACLKQUERY");
|
||||
for (int i = 0; aclk_query_handlers[i].type != UNKNOWN; i++) {
|
||||
worker_register_job_name(i, aclk_query_handlers[i].name);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Main query processing thread
|
||||
*/
|
||||
void *aclk_query_main_thread(void *ptr)
|
||||
{
|
||||
worker_aclk_register();
|
||||
|
||||
struct aclk_query_thread *query_thr = ptr;
|
||||
|
||||
while (!netdata_exit) {
|
||||
aclk_query_process_msgs(query_thr);
|
||||
|
||||
worker_is_idle();
|
||||
QUERY_THREAD_LOCK;
|
||||
if (unlikely(pthread_cond_wait(&query_cond_wait, &query_lock_wait)))
|
||||
sleep_usec(USEC_PER_SEC * 1);
|
||||
QUERY_THREAD_UNLOCK;
|
||||
}
|
||||
|
||||
worker_unregister();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
|
|
@ -360,10 +360,8 @@
|
|||
|
||||
#define NETDATA_CHART_PRIO_CHECKS 99999
|
||||
|
||||
#define NETDATA_CHART_PRIO_NETDATA_DISKSPACE 132020
|
||||
#define NETDATA_CHART_PRIO_NETDATA_TIMEX 132030
|
||||
#define NETDATA_CHART_PRIO_NETDATA_TC_CPU 135000
|
||||
#define NETDATA_CHART_PRIO_NETDATA_TC_TIME 135001
|
||||
#define NETDATA_CHART_PRIO_NETDATA_TC_TIME 1000100
|
||||
|
||||
|
||||
#endif //NETDATA_ALL_H
|
||||
|
|
|
@ -4124,6 +4124,8 @@ static int check_capabilities() {
|
|||
int main(int argc, char **argv) {
|
||||
// debug_flags = D_PROCFILE;
|
||||
|
||||
clocks_init();
|
||||
|
||||
pagesize = (size_t)sysconf(_SC_PAGESIZE);
|
||||
|
||||
// set the name for logging
|
||||
|
|
|
@ -2646,11 +2646,26 @@ static inline void discovery_process_cgroup(struct cgroup *cg) {
|
|||
read_cgroup_network_interfaces(cg);
|
||||
}
|
||||
|
||||
#define WORKER_DISCOVERY_INIT 0
|
||||
#define WORKER_DISCOVERY_FIND 1
|
||||
#define WORKER_DISCOVERY_PROCESS 2
|
||||
#define WORKER_DISCOVERY_UPDATE 3
|
||||
#define WORKER_DISCOVERY_CLEANUP 4
|
||||
#define WORKER_DISCOVERY_COPY 5
|
||||
#define WORKER_DISCOVERY_SHARE 6
|
||||
#define WORKER_DISCOVERY_LOCK 7
|
||||
|
||||
#if WORKER_UTILIZATION_MAX_JOB_TYPES < 8
|
||||
#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 8
|
||||
#endif
|
||||
|
||||
static inline void discovery_find_all_cgroups() {
|
||||
debug(D_CGROUP, "searching for cgroups");
|
||||
|
||||
worker_is_busy(WORKER_DISCOVERY_INIT);
|
||||
discovery_mark_all_cgroups_as_unavailable();
|
||||
|
||||
worker_is_busy(WORKER_DISCOVERY_FIND);
|
||||
if (!cgroup_use_unified_cgroups) {
|
||||
discovery_find_all_cgroups_v1();
|
||||
} else {
|
||||
|
@ -2659,16 +2674,25 @@ static inline void discovery_find_all_cgroups() {
|
|||
|
||||
struct cgroup *cg;
|
||||
for (cg = discovered_cgroup_root; cg; cg = cg->discovered_next) {
|
||||
worker_is_busy(WORKER_DISCOVERY_PROCESS);
|
||||
discovery_process_cgroup(cg);
|
||||
}
|
||||
|
||||
worker_is_busy(WORKER_DISCOVERY_UPDATE);
|
||||
discovery_update_filenames();
|
||||
|
||||
worker_is_busy(WORKER_DISCOVERY_LOCK);
|
||||
uv_mutex_lock(&cgroup_root_mutex);
|
||||
|
||||
worker_is_busy(WORKER_DISCOVERY_CLEANUP);
|
||||
discovery_cleanup_all_cgroups();
|
||||
|
||||
worker_is_busy(WORKER_DISCOVERY_COPY);
|
||||
discovery_copy_discovered_cgroups_to_reader();
|
||||
|
||||
uv_mutex_unlock(&cgroup_root_mutex);
|
||||
|
||||
worker_is_busy(WORKER_DISCOVERY_SHARE);
|
||||
discovery_share_cgroups_with_ebpf();
|
||||
|
||||
debug(D_CGROUP, "done searching for cgroups");
|
||||
|
@ -2678,7 +2702,19 @@ void cgroup_discovery_worker(void *ptr)
|
|||
{
|
||||
UNUSED(ptr);
|
||||
|
||||
worker_register("CGROUPSDISC");
|
||||
worker_register_job_name(WORKER_DISCOVERY_INIT, "init");
|
||||
worker_register_job_name(WORKER_DISCOVERY_FIND, "find");
|
||||
worker_register_job_name(WORKER_DISCOVERY_PROCESS, "process");
|
||||
worker_register_job_name(WORKER_DISCOVERY_UPDATE, "update");
|
||||
worker_register_job_name(WORKER_DISCOVERY_CLEANUP, "cleanup");
|
||||
worker_register_job_name(WORKER_DISCOVERY_COPY, "copy");
|
||||
worker_register_job_name(WORKER_DISCOVERY_SHARE, "share");
|
||||
worker_register_job_name(WORKER_DISCOVERY_LOCK, "lock");
|
||||
|
||||
while (!netdata_exit) {
|
||||
worker_is_idle();
|
||||
|
||||
uv_mutex_lock(&discovery_thread.mutex);
|
||||
while (!discovery_thread.start_discovery)
|
||||
uv_cond_wait(&discovery_thread.cond_var, &discovery_thread.mutex);
|
||||
|
@ -2692,6 +2728,7 @@ void cgroup_discovery_worker(void *ptr)
|
|||
}
|
||||
|
||||
discovery_thread.exited = 1;
|
||||
worker_unregister();
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
@ -4650,6 +4687,8 @@ void update_cgroup_charts(int update_every) {
|
|||
// cgroups main
|
||||
|
||||
static void cgroup_main_cleanup(void *ptr) {
|
||||
worker_unregister();
|
||||
|
||||
struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr;
|
||||
static_thread->enabled = NETDATA_MAIN_THREAD_EXITING;
|
||||
|
||||
|
@ -4687,24 +4726,30 @@ static void cgroup_main_cleanup(void *ptr) {
|
|||
static_thread->enabled = NETDATA_MAIN_THREAD_EXITED;
|
||||
}
|
||||
|
||||
void *cgroups_main(void *ptr) {
|
||||
netdata_thread_cleanup_push(cgroup_main_cleanup, ptr);
|
||||
#define WORKER_CGROUPS_LOCK 0
|
||||
#define WORKER_CGROUPS_READ 1
|
||||
#define WORKER_CGROUPS_CHART 2
|
||||
|
||||
struct rusage thread;
|
||||
#if WORKER_UTILIZATION_MAX_JOB_TYPES < 3
|
||||
#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 3
|
||||
#endif
|
||||
|
||||
void *cgroups_main(void *ptr) {
|
||||
worker_register("CGROUPS");
|
||||
worker_register_job_name(WORKER_CGROUPS_LOCK, "lock");
|
||||
worker_register_job_name(WORKER_CGROUPS_READ, "read");
|
||||
worker_register_job_name(WORKER_CGROUPS_READ, "chart");
|
||||
|
||||
netdata_thread_cleanup_push(cgroup_main_cleanup, ptr);
|
||||
|
||||
if (getenv("KUBERNETES_SERVICE_HOST") != NULL && getenv("KUBERNETES_SERVICE_PORT") != NULL) {
|
||||
is_inside_k8s = 1;
|
||||
cgroup_enable_cpuacct_cpu_shares = CONFIG_BOOLEAN_YES;
|
||||
}
|
||||
|
||||
// when ZERO, attempt to do it
|
||||
int vdo_cpu_netdata = config_get_boolean("plugin:cgroups", "cgroups plugin resource charts", 1);
|
||||
|
||||
read_cgroup_plugin_configuration();
|
||||
netdata_cgroup_ebpf_initialize_shm();
|
||||
|
||||
RRDSET *stcpu_thread = NULL;
|
||||
|
||||
if (uv_mutex_init(&cgroup_root_mutex)) {
|
||||
error("CGROUP: cannot initialize mutex for the main cgroup list");
|
||||
goto exit;
|
||||
|
@ -4736,6 +4781,8 @@ void *cgroups_main(void *ptr) {
|
|||
usec_t find_every = cgroup_check_for_new_every * USEC_PER_SEC, find_dt = 0;
|
||||
|
||||
while(!netdata_exit) {
|
||||
worker_is_idle();
|
||||
|
||||
usec_t hb_dt = heartbeat_next(&hb, step);
|
||||
if(unlikely(netdata_exit)) break;
|
||||
|
||||
|
@ -4747,46 +4794,21 @@ void *cgroups_main(void *ptr) {
|
|||
cgroups_check = 0;
|
||||
}
|
||||
|
||||
worker_is_busy(WORKER_CGROUPS_LOCK);
|
||||
uv_mutex_lock(&cgroup_root_mutex);
|
||||
|
||||
worker_is_busy(WORKER_CGROUPS_READ);
|
||||
read_all_discovered_cgroups(cgroup_root);
|
||||
|
||||
worker_is_busy(WORKER_CGROUPS_CHART);
|
||||
update_cgroup_charts(cgroup_update_every);
|
||||
|
||||
worker_is_idle();
|
||||
uv_mutex_unlock(&cgroup_root_mutex);
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
if(vdo_cpu_netdata) {
|
||||
getrusage(RUSAGE_THREAD, &thread);
|
||||
|
||||
if(unlikely(!stcpu_thread)) {
|
||||
|
||||
stcpu_thread = rrdset_create_localhost(
|
||||
"netdata"
|
||||
, "plugin_cgroups_cpu"
|
||||
, NULL
|
||||
, "cgroups"
|
||||
, NULL
|
||||
, "Netdata CGroups Plugin CPU usage"
|
||||
, "milliseconds/s"
|
||||
, PLUGIN_CGROUPS_NAME
|
||||
, "stats"
|
||||
, 132000
|
||||
, cgroup_update_every
|
||||
, RRDSET_TYPE_STACKED
|
||||
);
|
||||
|
||||
rrddim_add(stcpu_thread, "user", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL);
|
||||
rrddim_add(stcpu_thread, "system", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL);
|
||||
}
|
||||
else
|
||||
rrdset_next(stcpu_thread);
|
||||
|
||||
rrddim_set(stcpu_thread, "user" , thread.ru_utime.tv_sec * 1000000ULL + thread.ru_utime.tv_usec);
|
||||
rrddim_set(stcpu_thread, "system", thread.ru_stime.tv_sec * 1000000ULL + thread.ru_stime.tv_usec);
|
||||
rrdset_done(stcpu_thread);
|
||||
}
|
||||
}
|
||||
|
||||
exit:
|
||||
worker_unregister();
|
||||
netdata_thread_cleanup_pop(1);
|
||||
return NULL;
|
||||
}
|
||||
|
|
|
@ -224,6 +224,7 @@ void reset_metrics() {
|
|||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
clocks_init();
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// initialization of netdata plugin
|
||||
|
|
|
@ -365,6 +365,8 @@ static inline void do_disk_space_stats(struct mountinfo *mi, int update_every) {
|
|||
}
|
||||
|
||||
static void diskspace_main_cleanup(void *ptr) {
|
||||
worker_unregister();
|
||||
|
||||
struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr;
|
||||
static_thread->enabled = NETDATA_MAIN_THREAD_EXITING;
|
||||
|
||||
|
@ -373,10 +375,21 @@ static void diskspace_main_cleanup(void *ptr) {
|
|||
static_thread->enabled = NETDATA_MAIN_THREAD_EXITED;
|
||||
}
|
||||
|
||||
void *diskspace_main(void *ptr) {
|
||||
netdata_thread_cleanup_push(diskspace_main_cleanup, ptr);
|
||||
#define WORKER_JOB_MOUNTINFO 0
|
||||
#define WORKER_JOB_MOUNTPOINT 1
|
||||
#define WORKER_JOB_CLEANUP 2
|
||||
|
||||
int vdo_cpu_netdata = config_get_boolean("plugin:proc", "netdata server resources", 1);
|
||||
#if WORKER_UTILIZATION_MAX_JOB_TYPES < 3
|
||||
#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 3
|
||||
#endif
|
||||
|
||||
void *diskspace_main(void *ptr) {
|
||||
worker_register("DISKSPACE");
|
||||
worker_register_job_name(WORKER_JOB_MOUNTINFO, "mountinfo");
|
||||
worker_register_job_name(WORKER_JOB_MOUNTPOINT, "mountpoint");
|
||||
worker_register_job_name(WORKER_JOB_CLEANUP, "cleanup");
|
||||
|
||||
netdata_thread_cleanup_push(diskspace_main_cleanup, ptr);
|
||||
|
||||
cleanup_mount_points = config_get_boolean(CONFIG_SECTION_DISKSPACE, "remove charts of unmounted disks" , cleanup_mount_points);
|
||||
|
||||
|
@ -388,14 +401,11 @@ void *diskspace_main(void *ptr) {
|
|||
if(check_for_new_mountpoints_every < update_every)
|
||||
check_for_new_mountpoints_every = update_every;
|
||||
|
||||
struct rusage thread;
|
||||
|
||||
usec_t duration = 0;
|
||||
usec_t step = update_every * USEC_PER_SEC;
|
||||
heartbeat_t hb;
|
||||
heartbeat_init(&hb);
|
||||
while(!netdata_exit) {
|
||||
duration = heartbeat_monotonic_dt_to_now_usec(&hb);
|
||||
worker_is_idle();
|
||||
/* usec_t hb_dt = */ heartbeat_next(&hb, step);
|
||||
|
||||
if(unlikely(netdata_exit)) break;
|
||||
|
@ -404,9 +414,9 @@ void *diskspace_main(void *ptr) {
|
|||
// --------------------------------------------------------------------------
|
||||
// this is smart enough not to reload it every time
|
||||
|
||||
worker_is_busy(WORKER_JOB_MOUNTINFO);
|
||||
mountinfo_reload(0);
|
||||
|
||||
|
||||
// --------------------------------------------------------------------------
|
||||
// disk space metrics
|
||||
|
||||
|
@ -420,80 +430,20 @@ void *diskspace_main(void *ptr) {
|
|||
if(mi->flags & MOUNTINFO_READONLY && !strcmp(mi->root, mi->mount_point))
|
||||
continue;
|
||||
|
||||
worker_is_busy(WORKER_JOB_MOUNTPOINT);
|
||||
do_disk_space_stats(mi, update_every);
|
||||
if(unlikely(netdata_exit)) break;
|
||||
}
|
||||
|
||||
if(unlikely(netdata_exit)) break;
|
||||
|
||||
if(dict_mountpoints)
|
||||
if(dict_mountpoints) {
|
||||
worker_is_busy(WORKER_JOB_CLEANUP);
|
||||
dictionary_get_all(dict_mountpoints, mount_point_cleanup, NULL);
|
||||
|
||||
if(vdo_cpu_netdata) {
|
||||
static RRDSET *stcpu_thread = NULL, *st_duration = NULL;
|
||||
static RRDDIM *rd_user = NULL, *rd_system = NULL, *rd_duration = NULL;
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
||||
getrusage(RUSAGE_THREAD, &thread);
|
||||
|
||||
if(unlikely(!stcpu_thread)) {
|
||||
stcpu_thread = rrdset_create_localhost(
|
||||
"netdata"
|
||||
, "plugin_diskspace"
|
||||
, NULL
|
||||
, "diskspace"
|
||||
, NULL
|
||||
, "Netdata Disk Space Plugin CPU usage"
|
||||
, "milliseconds/s"
|
||||
, PLUGIN_DISKSPACE_NAME
|
||||
, NULL
|
||||
, NETDATA_CHART_PRIO_NETDATA_DISKSPACE
|
||||
, update_every
|
||||
, RRDSET_TYPE_STACKED
|
||||
);
|
||||
|
||||
rd_user = rrddim_add(stcpu_thread, "user", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL);
|
||||
rd_system = rrddim_add(stcpu_thread, "system", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL);
|
||||
}
|
||||
else
|
||||
rrdset_next(stcpu_thread);
|
||||
|
||||
rrddim_set_by_pointer(stcpu_thread, rd_user, thread.ru_utime.tv_sec * 1000000ULL + thread.ru_utime.tv_usec);
|
||||
rrddim_set_by_pointer(stcpu_thread, rd_system, thread.ru_stime.tv_sec * 1000000ULL + thread.ru_stime.tv_usec);
|
||||
rrdset_done(stcpu_thread);
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
||||
if(unlikely(!st_duration)) {
|
||||
st_duration = rrdset_create_localhost(
|
||||
"netdata"
|
||||
, "plugin_diskspace_dt"
|
||||
, NULL
|
||||
, "diskspace"
|
||||
, NULL
|
||||
, "Netdata Disk Space Plugin Duration"
|
||||
, "milliseconds/run"
|
||||
, PLUGIN_DISKSPACE_NAME
|
||||
, NULL
|
||||
, 132021
|
||||
, update_every
|
||||
, RRDSET_TYPE_AREA
|
||||
);
|
||||
|
||||
rd_duration = rrddim_add(st_duration, "duration", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE);
|
||||
}
|
||||
else
|
||||
rrdset_next(st_duration);
|
||||
|
||||
rrddim_set_by_pointer(st_duration, rd_duration, duration);
|
||||
rrdset_done(st_duration);
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
||||
if(unlikely(netdata_exit)) break;
|
||||
}
|
||||
|
||||
}
|
||||
worker_unregister();
|
||||
|
||||
netdata_thread_cleanup_pop(1);
|
||||
return NULL;
|
||||
|
|
|
@ -1864,6 +1864,8 @@ static void ebpf_manage_pid(pid_t pid)
|
|||
*/
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
clocks_init();
|
||||
|
||||
set_global_variables();
|
||||
ebpf_parse_args(argc, argv);
|
||||
ebpf_manage_pid(getpid());
|
||||
|
|
|
@ -9,7 +9,6 @@ static struct freebsd_module {
|
|||
int enabled;
|
||||
|
||||
int (*func)(int update_every, usec_t dt);
|
||||
usec_t duration;
|
||||
|
||||
RRDDIM *rd;
|
||||
|
||||
|
@ -68,8 +67,14 @@ static struct freebsd_module {
|
|||
{.name = NULL, .dim = NULL, .enabled = 0, .func = NULL}
|
||||
};
|
||||
|
||||
#if WORKER_UTILIZATION_MAX_JOB_TYPES < 33
|
||||
#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 33
|
||||
#endif
|
||||
|
||||
static void freebsd_main_cleanup(void *ptr)
|
||||
{
|
||||
worker_unregister();
|
||||
|
||||
struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr;
|
||||
static_thread->enabled = NETDATA_MAIN_THREAD_EXITING;
|
||||
|
||||
|
@ -80,9 +85,9 @@ static void freebsd_main_cleanup(void *ptr)
|
|||
|
||||
void *freebsd_main(void *ptr)
|
||||
{
|
||||
netdata_thread_cleanup_push(freebsd_main_cleanup, ptr);
|
||||
worker_register("FREEBSD");
|
||||
|
||||
int vdo_cpu_netdata = config_get_boolean("plugin:freebsd", "netdata server resources", 1);
|
||||
netdata_thread_cleanup_push(freebsd_main_cleanup, ptr);
|
||||
|
||||
// initialize FreeBSD plugin
|
||||
if (freebsd_plugin_init())
|
||||
|
@ -94,8 +99,9 @@ void *freebsd_main(void *ptr)
|
|||
struct freebsd_module *pm = &freebsd_modules[i];
|
||||
|
||||
pm->enabled = config_get_boolean("plugin:freebsd", pm->name, pm->enabled);
|
||||
pm->duration = 0ULL;
|
||||
pm->rd = NULL;
|
||||
|
||||
worker_register_job_name(i, freebsd_modules[i].dim);
|
||||
}
|
||||
|
||||
usec_t step = localhost->rrd_update_every * USEC_PER_SEC;
|
||||
|
@ -103,14 +109,13 @@ void *freebsd_main(void *ptr)
|
|||
heartbeat_init(&hb);
|
||||
|
||||
while (!netdata_exit) {
|
||||
worker_is_idle();
|
||||
|
||||
usec_t hb_dt = heartbeat_next(&hb, step);
|
||||
usec_t duration = 0ULL;
|
||||
|
||||
if (unlikely(netdata_exit))
|
||||
break;
|
||||
|
||||
// BEGIN -- the job to be done
|
||||
|
||||
for (i = 0; freebsd_modules[i].name; i++) {
|
||||
struct freebsd_module *pm = &freebsd_modules[i];
|
||||
if (unlikely(!pm->enabled))
|
||||
|
@ -118,92 +123,12 @@ void *freebsd_main(void *ptr)
|
|||
|
||||
debug(D_PROCNETDEV_LOOP, "FREEBSD calling %s.", pm->name);
|
||||
|
||||
worker_is_busy(i);
|
||||
pm->enabled = !pm->func(localhost->rrd_update_every, hb_dt);
|
||||
pm->duration = heartbeat_monotonic_dt_to_now_usec(&hb) - duration;
|
||||
duration += pm->duration;
|
||||
|
||||
if (unlikely(netdata_exit))
|
||||
break;
|
||||
}
|
||||
|
||||
// END -- the job is done
|
||||
|
||||
if (vdo_cpu_netdata) {
|
||||
static RRDSET *st_cpu_thread = NULL, *st_duration = NULL;
|
||||
static RRDDIM *rd_user = NULL, *rd_system = NULL;
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
||||
struct rusage thread;
|
||||
getrusage(RUSAGE_THREAD, &thread);
|
||||
|
||||
if (unlikely(!st_cpu_thread)) {
|
||||
st_cpu_thread = rrdset_create_localhost(
|
||||
"netdata",
|
||||
"plugin_freebsd_cpu",
|
||||
NULL,
|
||||
"freebsd",
|
||||
NULL,
|
||||
"Netdata FreeBSD plugin CPU usage",
|
||||
"milliseconds/s",
|
||||
"freebsd.plugin",
|
||||
"stats",
|
||||
132000,
|
||||
localhost->rrd_update_every,
|
||||
RRDSET_TYPE_STACKED);
|
||||
|
||||
rd_user = rrddim_add(st_cpu_thread, "user", NULL, 1, USEC_PER_MS, RRD_ALGORITHM_INCREMENTAL);
|
||||
rd_system = rrddim_add(st_cpu_thread, "system", NULL, 1, USEC_PER_MS, RRD_ALGORITHM_INCREMENTAL);
|
||||
} else {
|
||||
rrdset_next(st_cpu_thread);
|
||||
}
|
||||
|
||||
rrddim_set_by_pointer(
|
||||
st_cpu_thread, rd_user, thread.ru_utime.tv_sec * USEC_PER_SEC + thread.ru_utime.tv_usec);
|
||||
rrddim_set_by_pointer(
|
||||
st_cpu_thread, rd_system, thread.ru_stime.tv_sec * USEC_PER_SEC + thread.ru_stime.tv_usec);
|
||||
rrdset_done(st_cpu_thread);
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
||||
if (unlikely(!st_duration)) {
|
||||
st_duration = rrdset_find_active_bytype_localhost("netdata", "plugin_freebsd_modules");
|
||||
|
||||
if (!st_duration) {
|
||||
st_duration = rrdset_create_localhost(
|
||||
"netdata",
|
||||
"plugin_freebsd_modules",
|
||||
NULL,
|
||||
"freebsd",
|
||||
NULL,
|
||||
"Netdata FreeBSD plugin modules durations",
|
||||
"milliseconds/run",
|
||||
"freebsd.plugin",
|
||||
"stats",
|
||||
132001,
|
||||
localhost->rrd_update_every,
|
||||
RRDSET_TYPE_STACKED);
|
||||
|
||||
for (i = 0; freebsd_modules[i].name; i++) {
|
||||
struct freebsd_module *pm = &freebsd_modules[i];
|
||||
if (unlikely(!pm->enabled))
|
||||
continue;
|
||||
|
||||
pm->rd = rrddim_add(st_duration, pm->dim, NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE);
|
||||
}
|
||||
}
|
||||
} else
|
||||
rrdset_next(st_duration);
|
||||
|
||||
for (i = 0; freebsd_modules[i].name; i++) {
|
||||
struct freebsd_module *pm = &freebsd_modules[i];
|
||||
if (unlikely(!pm->enabled))
|
||||
continue;
|
||||
|
||||
rrddim_set_by_pointer(st_duration, pm->rd, pm->duration);
|
||||
}
|
||||
rrdset_done(st_duration);
|
||||
}
|
||||
}
|
||||
|
||||
netdata_thread_cleanup_pop(1);
|
||||
|
|
|
@ -1596,6 +1596,7 @@ int host_is_local(const char *host)
|
|||
}
|
||||
|
||||
int main (int argc, char **argv) {
|
||||
clocks_init();
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// initialization of netdata plugin
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
#define CPU_IDLEJITTER_SLEEP_TIME_MS 20
|
||||
|
||||
static void cpuidlejitter_main_cleanup(void *ptr) {
|
||||
worker_unregister();
|
||||
|
||||
struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr;
|
||||
static_thread->enabled = NETDATA_MAIN_THREAD_EXITING;
|
||||
|
||||
|
@ -14,6 +16,9 @@ static void cpuidlejitter_main_cleanup(void *ptr) {
|
|||
}
|
||||
|
||||
void *cpuidlejitter_main(void *ptr) {
|
||||
worker_register("IDLEJITTER");
|
||||
worker_register_job_name(0, "measurements");
|
||||
|
||||
netdata_thread_cleanup_push(cpuidlejitter_main_cleanup, ptr);
|
||||
|
||||
usec_t sleep_ut = config_get_number("plugin:idlejitter", "loop time in ms", CPU_IDLEJITTER_SLEEP_TIME_MS) * USEC_PER_MS;
|
||||
|
@ -55,7 +60,9 @@ void *cpuidlejitter_main(void *ptr) {
|
|||
|
||||
while(elapsed < update_every_ut) {
|
||||
now_monotonic_high_precision_timeval(&before);
|
||||
worker_is_idle();
|
||||
sleep_usec(sleep_ut);
|
||||
worker_is_busy(0);
|
||||
now_monotonic_high_precision_timeval(&after);
|
||||
|
||||
usec_t dt = dt_usec(&after, &before);
|
||||
|
|
|
@ -9,7 +9,6 @@ static struct macos_module {
|
|||
int enabled;
|
||||
|
||||
int (*func)(int update_every, usec_t dt);
|
||||
usec_t duration;
|
||||
|
||||
RRDDIM *rd;
|
||||
|
||||
|
@ -22,8 +21,14 @@ static struct macos_module {
|
|||
{.name = NULL, .dim = NULL, .enabled = 0, .func = NULL}
|
||||
};
|
||||
|
||||
#if WORKER_UTILIZATION_MAX_JOB_TYPES < 3
|
||||
#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 3
|
||||
#endif
|
||||
|
||||
static void macos_main_cleanup(void *ptr)
|
||||
{
|
||||
worker_unregister();
|
||||
|
||||
struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr;
|
||||
static_thread->enabled = NETDATA_MAIN_THREAD_EXITING;
|
||||
|
||||
|
@ -34,17 +39,18 @@ static void macos_main_cleanup(void *ptr)
|
|||
|
||||
void *macos_main(void *ptr)
|
||||
{
|
||||
netdata_thread_cleanup_push(macos_main_cleanup, ptr);
|
||||
worker_register("MACOS");
|
||||
|
||||
int vdo_cpu_netdata = config_get_boolean("plugin:macos", "netdata server resources", CONFIG_BOOLEAN_YES);
|
||||
netdata_thread_cleanup_push(macos_main_cleanup, ptr);
|
||||
|
||||
// check the enabled status for each module
|
||||
for (int i = 0; macos_modules[i].name; i++) {
|
||||
struct macos_module *pm = &macos_modules[i];
|
||||
|
||||
pm->enabled = config_get_boolean("plugin:macos", pm->name, pm->enabled);
|
||||
pm->duration = 0ULL;
|
||||
pm->rd = NULL;
|
||||
|
||||
worker_register_job_name(i, macos_modules[i].dim);
|
||||
}
|
||||
|
||||
usec_t step = localhost->rrd_update_every * USEC_PER_SEC;
|
||||
|
@ -52,10 +58,8 @@ void *macos_main(void *ptr)
|
|||
heartbeat_init(&hb);
|
||||
|
||||
while (!netdata_exit) {
|
||||
worker_is_idle();
|
||||
usec_t hb_dt = heartbeat_next(&hb, step);
|
||||
usec_t duration = 0ULL;
|
||||
|
||||
// BEGIN -- the job to be done
|
||||
|
||||
for (int i = 0; macos_modules[i].name; i++) {
|
||||
struct macos_module *pm = &macos_modules[i];
|
||||
|
@ -64,92 +68,12 @@ void *macos_main(void *ptr)
|
|||
|
||||
debug(D_PROCNETDEV_LOOP, "macos calling %s.", pm->name);
|
||||
|
||||
worker_is_busy(i);
|
||||
pm->enabled = !pm->func(localhost->rrd_update_every, hb_dt);
|
||||
pm->duration = heartbeat_monotonic_dt_to_now_usec(&hb) - duration;
|
||||
duration += pm->duration;
|
||||
|
||||
if (unlikely(netdata_exit))
|
||||
break;
|
||||
}
|
||||
|
||||
// END -- the job is done
|
||||
|
||||
if (vdo_cpu_netdata) {
|
||||
static RRDSET *st_cpu_thread = NULL, *st_duration = NULL;
|
||||
static RRDDIM *rd_user = NULL, *rd_system = NULL;
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
||||
struct rusage thread;
|
||||
getrusage(RUSAGE_THREAD, &thread);
|
||||
|
||||
if (unlikely(!st_cpu_thread)) {
|
||||
st_cpu_thread = rrdset_create_localhost(
|
||||
"netdata",
|
||||
"plugin_macos_cpu",
|
||||
NULL,
|
||||
"macos",
|
||||
NULL,
|
||||
"Netdata macOS plugin CPU usage",
|
||||
"milliseconds/s",
|
||||
"macos.plugin",
|
||||
"stats",
|
||||
132000,
|
||||
localhost->rrd_update_every,
|
||||
RRDSET_TYPE_STACKED);
|
||||
|
||||
rd_user = rrddim_add(st_cpu_thread, "user", NULL, 1, USEC_PER_MS, RRD_ALGORITHM_INCREMENTAL);
|
||||
rd_system = rrddim_add(st_cpu_thread, "system", NULL, 1, USEC_PER_MS, RRD_ALGORITHM_INCREMENTAL);
|
||||
} else {
|
||||
rrdset_next(st_cpu_thread);
|
||||
}
|
||||
|
||||
rrddim_set_by_pointer(
|
||||
st_cpu_thread, rd_user, thread.ru_utime.tv_sec * USEC_PER_SEC + thread.ru_utime.tv_usec);
|
||||
rrddim_set_by_pointer(
|
||||
st_cpu_thread, rd_system, thread.ru_stime.tv_sec * USEC_PER_SEC + thread.ru_stime.tv_usec);
|
||||
rrdset_done(st_cpu_thread);
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
||||
if (unlikely(!st_duration)) {
|
||||
st_duration = rrdset_find_active_bytype_localhost("netdata", "plugin_macos_modules");
|
||||
|
||||
if (!st_duration) {
|
||||
st_duration = rrdset_create_localhost(
|
||||
"netdata",
|
||||
"plugin_macos_modules",
|
||||
NULL,
|
||||
"macos",
|
||||
NULL,
|
||||
"Netdata macOS plugin modules durations",
|
||||
"milliseconds/run",
|
||||
"macos.plugin",
|
||||
"stats",
|
||||
132001,
|
||||
localhost->rrd_update_every,
|
||||
RRDSET_TYPE_STACKED);
|
||||
|
||||
for (int i = 0; macos_modules[i].name; i++) {
|
||||
struct macos_module *pm = &macos_modules[i];
|
||||
if (unlikely(!pm->enabled))
|
||||
continue;
|
||||
|
||||
pm->rd = rrddim_add(st_duration, pm->dim, NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE);
|
||||
}
|
||||
}
|
||||
} else
|
||||
rrdset_next(st_duration);
|
||||
|
||||
for (int i = 0; macos_modules[i].name; i++) {
|
||||
struct macos_module *pm = &macos_modules[i];
|
||||
if (unlikely(!pm->enabled))
|
||||
continue;
|
||||
|
||||
rrddim_set_by_pointer(st_duration, pm->rd, pm->duration);
|
||||
}
|
||||
rrdset_done(st_duration);
|
||||
}
|
||||
}
|
||||
|
||||
netdata_thread_cleanup_pop(1);
|
||||
|
|
|
@ -745,6 +745,7 @@ void nfacct_signals()
|
|||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
clocks_init();
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// initialization of netdata plugin
|
||||
|
|
|
@ -1283,6 +1283,7 @@ void parse_command_line(int argc, char **argv) {
|
|||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
clocks_init();
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// initialization of netdata plugin
|
||||
|
|
|
@ -230,6 +230,8 @@ static void pluginsd_worker_thread_handle_error(struct plugind *cd, int worker_r
|
|||
|
||||
void *pluginsd_worker_thread(void *arg)
|
||||
{
|
||||
worker_register("PLUGINSD");
|
||||
|
||||
netdata_thread_cleanup_push(pluginsd_worker_thread_cleanup, arg);
|
||||
|
||||
struct plugind *cd = (struct plugind *)arg;
|
||||
|
@ -260,6 +262,7 @@ void *pluginsd_worker_thread(void *arg)
|
|||
if (unlikely(!cd->enabled))
|
||||
break;
|
||||
}
|
||||
worker_unregister();
|
||||
|
||||
netdata_thread_cleanup_pop(1);
|
||||
return NULL;
|
||||
|
@ -281,6 +284,8 @@ static void pluginsd_main_cleanup(void *data)
|
|||
|
||||
info("cleanup completed.");
|
||||
static_thread->enabled = NETDATA_MAIN_THREAD_EXITED;
|
||||
|
||||
worker_unregister();
|
||||
}
|
||||
|
||||
void *pluginsd_main(void *ptr)
|
||||
|
|
|
@ -9,7 +9,6 @@ static struct proc_module {
|
|||
int enabled;
|
||||
|
||||
int (*func)(int update_every, usec_t dt);
|
||||
usec_t duration;
|
||||
|
||||
RRDDIM *rd;
|
||||
|
||||
|
@ -66,9 +65,7 @@ static struct proc_module {
|
|||
|
||||
// ZFS metrics
|
||||
{.name = "/proc/spl/kstat/zfs/arcstats", .dim = "zfs_arcstats", .func = do_proc_spl_kstat_zfs_arcstats},
|
||||
{.name = "/proc/spl/kstat/zfs/pool/state",
|
||||
.dim = "zfs_pool_state",
|
||||
.func = do_proc_spl_kstat_zfs_pool_state},
|
||||
{.name = "/proc/spl/kstat/zfs/pool/state",.dim = "zfs_pool_state",.func = do_proc_spl_kstat_zfs_pool_state},
|
||||
|
||||
// BTRFS metrics
|
||||
{.name = "/sys/fs/btrfs", .dim = "btrfs", .func = do_sys_fs_btrfs},
|
||||
|
@ -83,6 +80,10 @@ static struct proc_module {
|
|||
{.name = NULL, .dim = NULL, .func = NULL}
|
||||
};
|
||||
|
||||
#if WORKER_UTILIZATION_MAX_JOB_TYPES < 36
|
||||
#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 36
|
||||
#endif
|
||||
|
||||
static void proc_main_cleanup(void *ptr)
|
||||
{
|
||||
struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr;
|
||||
|
@ -91,13 +92,15 @@ static void proc_main_cleanup(void *ptr)
|
|||
info("cleaning up...");
|
||||
|
||||
static_thread->enabled = NETDATA_MAIN_THREAD_EXITED;
|
||||
|
||||
worker_unregister();
|
||||
}
|
||||
|
||||
void *proc_main(void *ptr)
|
||||
{
|
||||
netdata_thread_cleanup_push(proc_main_cleanup, ptr);
|
||||
worker_register("PROC");
|
||||
|
||||
int vdo_cpu_netdata = config_get_boolean("plugin:proc", "netdata server resources", CONFIG_BOOLEAN_YES);
|
||||
netdata_thread_cleanup_push(proc_main_cleanup, ptr);
|
||||
|
||||
config_get_boolean("plugin:proc", "/proc/pagetypeinfo", CONFIG_BOOLEAN_NO);
|
||||
|
||||
|
@ -107,128 +110,34 @@ void *proc_main(void *ptr)
|
|||
struct proc_module *pm = &proc_modules[i];
|
||||
|
||||
pm->enabled = config_get_boolean("plugin:proc", pm->name, CONFIG_BOOLEAN_YES);
|
||||
pm->duration = 0ULL;
|
||||
pm->rd = NULL;
|
||||
|
||||
worker_register_job_name(i, proc_modules[i].dim);
|
||||
}
|
||||
|
||||
usec_t step = localhost->rrd_update_every * USEC_PER_SEC;
|
||||
heartbeat_t hb;
|
||||
heartbeat_init(&hb);
|
||||
size_t iterations = 0;
|
||||
|
||||
while (!netdata_exit) {
|
||||
iterations++;
|
||||
(void)iterations;
|
||||
|
||||
worker_is_idle();
|
||||
usec_t hb_dt = heartbeat_next(&hb, step);
|
||||
usec_t duration = 0ULL;
|
||||
|
||||
if (unlikely(netdata_exit))
|
||||
break;
|
||||
|
||||
// BEGIN -- the job to be done
|
||||
|
||||
for (i = 0; proc_modules[i].name; i++) {
|
||||
if (unlikely(netdata_exit))
|
||||
break;
|
||||
|
||||
struct proc_module *pm = &proc_modules[i];
|
||||
if (unlikely(!pm->enabled))
|
||||
continue;
|
||||
|
||||
debug(D_PROCNETDEV_LOOP, "PROC calling %s.", pm->name);
|
||||
|
||||
//#ifdef NETDATA_LOG_ALLOCATIONS
|
||||
// if(pm->func == do_proc_interrupts)
|
||||
// log_thread_memory_allocations = iterations;
|
||||
//#endif
|
||||
worker_is_busy(i);
|
||||
pm->enabled = !pm->func(localhost->rrd_update_every, hb_dt);
|
||||
pm->duration = heartbeat_monotonic_dt_to_now_usec(&hb) - duration;
|
||||
duration += pm->duration;
|
||||
|
||||
//#ifdef NETDATA_LOG_ALLOCATIONS
|
||||
// if(pm->func == do_proc_interrupts)
|
||||
// log_thread_memory_allocations = 0;
|
||||
//#endif
|
||||
|
||||
if (unlikely(netdata_exit))
|
||||
break;
|
||||
}
|
||||
|
||||
// END -- the job is done
|
||||
|
||||
if (vdo_cpu_netdata) {
|
||||
static RRDSET *st_cpu_thread = NULL, *st_duration = NULL;
|
||||
static RRDDIM *rd_user = NULL, *rd_system = NULL;
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
||||
struct rusage thread;
|
||||
getrusage(RUSAGE_THREAD, &thread);
|
||||
|
||||
if (unlikely(!st_cpu_thread)) {
|
||||
st_cpu_thread = rrdset_create_localhost(
|
||||
"netdata",
|
||||
"plugin_proc_cpu",
|
||||
NULL,
|
||||
"proc",
|
||||
NULL,
|
||||
"Netdata proc plugin CPU usage",
|
||||
"milliseconds/s",
|
||||
"proc",
|
||||
"stats",
|
||||
132000,
|
||||
localhost->rrd_update_every,
|
||||
RRDSET_TYPE_STACKED);
|
||||
|
||||
rd_user = rrddim_add(st_cpu_thread, "user", NULL, 1, USEC_PER_MS, RRD_ALGORITHM_INCREMENTAL);
|
||||
rd_system = rrddim_add(st_cpu_thread, "system", NULL, 1, USEC_PER_MS, RRD_ALGORITHM_INCREMENTAL);
|
||||
} else {
|
||||
rrdset_next(st_cpu_thread);
|
||||
}
|
||||
|
||||
rrddim_set_by_pointer(
|
||||
st_cpu_thread, rd_user, thread.ru_utime.tv_sec * USEC_PER_SEC + thread.ru_utime.tv_usec);
|
||||
rrddim_set_by_pointer(
|
||||
st_cpu_thread, rd_system, thread.ru_stime.tv_sec * USEC_PER_SEC + thread.ru_stime.tv_usec);
|
||||
rrdset_done(st_cpu_thread);
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
||||
if (unlikely(!st_duration)) {
|
||||
st_duration = rrdset_find_active_bytype_localhost("netdata", "plugin_proc_modules");
|
||||
|
||||
if (!st_duration) {
|
||||
st_duration = rrdset_create_localhost(
|
||||
"netdata",
|
||||
"plugin_proc_modules",
|
||||
NULL,
|
||||
"proc",
|
||||
NULL,
|
||||
"Netdata proc plugin modules durations",
|
||||
"milliseconds/run",
|
||||
"proc",
|
||||
"stats",
|
||||
132001,
|
||||
localhost->rrd_update_every,
|
||||
RRDSET_TYPE_STACKED);
|
||||
|
||||
for (i = 0; proc_modules[i].name; i++) {
|
||||
struct proc_module *pm = &proc_modules[i];
|
||||
if (unlikely(!pm->enabled))
|
||||
continue;
|
||||
|
||||
pm->rd = rrddim_add(st_duration, pm->dim, NULL, 1, USEC_PER_MS, RRD_ALGORITHM_ABSOLUTE);
|
||||
}
|
||||
}
|
||||
} else
|
||||
rrdset_next(st_duration);
|
||||
|
||||
for (i = 0; proc_modules[i].name; i++) {
|
||||
struct proc_module *pm = &proc_modules[i];
|
||||
if (unlikely(!pm->enabled))
|
||||
continue;
|
||||
|
||||
rrddim_set_by_pointer(st_duration, pm->rd, pm->duration);
|
||||
}
|
||||
rrdset_done(st_duration);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -336,6 +336,7 @@ void usage(void) {
|
|||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
clocks_init();
|
||||
|
||||
program_name = argv[0];
|
||||
program_version = "0.1";
|
||||
|
|
|
@ -9,6 +9,15 @@
|
|||
#define STATSD_LISTEN_PORT 8125
|
||||
#define STATSD_LISTEN_BACKLOG 4096
|
||||
|
||||
#define WORKER_JOB_TYPE_TCP_CONNECTED 0
|
||||
#define WORKER_JOB_TYPE_TCP_DISCONNECTED 1
|
||||
#define WORKER_JOB_TYPE_RCV_DATA 2
|
||||
#define WORKER_JOB_TYPE_SND_DATA 3
|
||||
|
||||
#if WORKER_UTILIZATION_MAX_JOB_TYPES < 4
|
||||
#error Please increase WORKER_UTILIZATION_MAX_JOB_TYPES to at least 4
|
||||
#endif
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
|
||||
// #define STATSD_MULTITHREADED 1
|
||||
|
@ -237,10 +246,6 @@ struct collection_thread_status {
|
|||
size_t max_sockets;
|
||||
|
||||
netdata_thread_t thread;
|
||||
struct rusage rusage;
|
||||
RRDSET *st_cpu;
|
||||
RRDDIM *rd_user;
|
||||
RRDDIM *rd_system;
|
||||
};
|
||||
|
||||
static struct statsd {
|
||||
|
@ -788,6 +793,7 @@ static void *statsd_add_callback(POLLINFO *pi, short int *events, void *data) {
|
|||
(void)pi;
|
||||
(void)data;
|
||||
|
||||
worker_is_busy(WORKER_JOB_TYPE_TCP_CONNECTED);
|
||||
*events = POLLIN;
|
||||
|
||||
struct statsd_tcp *t = (struct statsd_tcp *)callocz(sizeof(struct statsd_tcp) + STATSD_TCP_BUFFER_SIZE, 1);
|
||||
|
@ -796,11 +802,14 @@ static void *statsd_add_callback(POLLINFO *pi, short int *events, void *data) {
|
|||
statsd.tcp_socket_connects++;
|
||||
statsd.tcp_socket_connected++;
|
||||
|
||||
worker_is_idle();
|
||||
return t;
|
||||
}
|
||||
|
||||
// TCP client disconnected
|
||||
static void statsd_del_callback(POLLINFO *pi) {
|
||||
worker_is_busy(WORKER_JOB_TYPE_TCP_DISCONNECTED);
|
||||
|
||||
struct statsd_tcp *t = pi->data;
|
||||
|
||||
if(likely(t)) {
|
||||
|
@ -818,10 +827,15 @@ static void statsd_del_callback(POLLINFO *pi) {
|
|||
|
||||
freez(t);
|
||||
}
|
||||
|
||||
worker_is_idle();
|
||||
}
|
||||
|
||||
// Receive data
|
||||
static int statsd_rcv_callback(POLLINFO *pi, short int *events) {
|
||||
int retval = -1;
|
||||
worker_is_busy(WORKER_JOB_TYPE_RCV_DATA);
|
||||
|
||||
*events = POLLIN;
|
||||
|
||||
int fd = pi->fd;
|
||||
|
@ -832,14 +846,16 @@ static int statsd_rcv_callback(POLLINFO *pi, short int *events) {
|
|||
if(unlikely(!d)) {
|
||||
error("STATSD: internal error: expected TCP data pointer is NULL");
|
||||
statsd.socket_errors++;
|
||||
return -1;
|
||||
retval = -1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
#ifdef NETDATA_INTERNAL_CHECKS
|
||||
if(unlikely(d->type != STATSD_SOCKET_DATA_TYPE_TCP)) {
|
||||
error("STATSD: internal error: socket data type should be %d, but it is %d", (int)STATSD_SOCKET_DATA_TYPE_TCP, (int)d->type);
|
||||
statsd.socket_errors++;
|
||||
return -1;
|
||||
retval = -1;
|
||||
goto cleanup;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -872,8 +888,10 @@ static int statsd_rcv_callback(POLLINFO *pi, short int *events) {
|
|||
d->len = statsd_process(d->buffer, d->len, 1);
|
||||
}
|
||||
|
||||
if(unlikely(ret == -1))
|
||||
return -1;
|
||||
if(unlikely(ret == -1)) {
|
||||
retval = -1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
} while (rc != -1);
|
||||
break;
|
||||
|
@ -884,14 +902,16 @@ static int statsd_rcv_callback(POLLINFO *pi, short int *events) {
|
|||
if(unlikely(!d)) {
|
||||
error("STATSD: internal error: expected UDP data pointer is NULL");
|
||||
statsd.socket_errors++;
|
||||
return -1;
|
||||
retval = -1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
#ifdef NETDATA_INTERNAL_CHECKS
|
||||
if(unlikely(d->type != STATSD_SOCKET_DATA_TYPE_UDP)) {
|
||||
error("STATSD: internal error: socket data should be %d, but it is %d", (int)d->type, (int)STATSD_SOCKET_DATA_TYPE_UDP);
|
||||
statsd.socket_errors++;
|
||||
return -1;
|
||||
retval = -1;
|
||||
goto cleanup;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -904,7 +924,8 @@ static int statsd_rcv_callback(POLLINFO *pi, short int *events) {
|
|||
if (errno != EWOULDBLOCK && errno != EAGAIN && errno != EINTR) {
|
||||
error("STATSD: recvmmsg() on UDP socket %d failed.", fd);
|
||||
statsd.socket_errors++;
|
||||
return -1;
|
||||
retval = -1;
|
||||
goto cleanup;
|
||||
}
|
||||
} else if (rc) {
|
||||
// data received
|
||||
|
@ -929,7 +950,8 @@ static int statsd_rcv_callback(POLLINFO *pi, short int *events) {
|
|||
if (errno != EWOULDBLOCK && errno != EAGAIN && errno != EINTR) {
|
||||
error("STATSD: recv() on UDP socket %d failed.", fd);
|
||||
statsd.socket_errors++;
|
||||
return -1;
|
||||
retval = -1;
|
||||
goto cleanup;
|
||||
}
|
||||
} else if (rc) {
|
||||
// data received
|
||||
|
@ -947,24 +969,26 @@ static int statsd_rcv_callback(POLLINFO *pi, short int *events) {
|
|||
default: {
|
||||
error("STATSD: internal error: unknown socktype %d on socket %d", pi->socktype, fd);
|
||||
statsd.socket_errors++;
|
||||
return -1;
|
||||
retval = -1;
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
retval = 0;
|
||||
cleanup:
|
||||
worker_is_idle();
|
||||
return retval;
|
||||
}
|
||||
|
||||
static int statsd_snd_callback(POLLINFO *pi, short int *events) {
|
||||
(void)pi;
|
||||
(void)events;
|
||||
|
||||
worker_is_busy(WORKER_JOB_TYPE_SND_DATA);
|
||||
error("STATSD: snd_callback() called, but we never requested to send data to statsd clients.");
|
||||
return -1;
|
||||
}
|
||||
worker_is_idle();
|
||||
|
||||
static void statsd_timer_callback(void *timer_data) {
|
||||
struct collection_thread_status *status = timer_data;
|
||||
getrusage(RUSAGE_THREAD, &status->rusage);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------------
|
||||
|
@ -986,12 +1010,19 @@ void statsd_collector_thread_cleanup(void *data) {
|
|||
#endif
|
||||
|
||||
freez(d);
|
||||
worker_unregister();
|
||||
}
|
||||
|
||||
void *statsd_collector_thread(void *ptr) {
|
||||
struct collection_thread_status *status = ptr;
|
||||
status->status = 1;
|
||||
|
||||
worker_register("STATSD");
|
||||
worker_register_job_name(WORKER_JOB_TYPE_TCP_CONNECTED, "tcp connect");
|
||||
worker_register_job_name(WORKER_JOB_TYPE_TCP_DISCONNECTED, "tcp disconnect");
|
||||
worker_register_job_name(WORKER_JOB_TYPE_RCV_DATA, "receive");
|
||||
worker_register_job_name(WORKER_JOB_TYPE_SND_DATA, "send");
|
||||
|
||||
info("STATSD collector thread started with taskid %d", gettid());
|
||||
|
||||
struct statsd_udp *d = callocz(sizeof(struct statsd_udp), 1);
|
||||
|
@ -1019,7 +1050,7 @@ void *statsd_collector_thread(void *ptr) {
|
|||
, statsd_del_callback
|
||||
, statsd_rcv_callback
|
||||
, statsd_snd_callback
|
||||
, statsd_timer_callback
|
||||
, NULL
|
||||
, NULL // No access control pattern
|
||||
, 0 // No dns lookups for access control pattern
|
||||
, (void *)d
|
||||
|
@ -2147,9 +2178,32 @@ static void statsd_main_cleanup(void *data) {
|
|||
|
||||
info("STATSD: cleanup completed.");
|
||||
static_thread->enabled = NETDATA_MAIN_THREAD_EXITED;
|
||||
|
||||
worker_unregister();
|
||||
}
|
||||
|
||||
#define WORKER_STATSD_FLUSH_GAUGES 0
|
||||
#define WORKER_STATSD_FLUSH_COUNTERS 1
|
||||
#define WORKER_STATSD_FLUSH_METERS 2
|
||||
#define WORKER_STATSD_FLUSH_TIMERS 3
|
||||
#define WORKER_STATSD_FLUSH_HISTOGRAMS 4
|
||||
#define WORKER_STATSD_FLUSH_SETS 5
|
||||
#define WORKER_STATSD_FLUSH_STATS 6
|
||||
|
||||
#if WORKER_UTILIZATION_MAX_JOB_TYPES < 7
|
||||
#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 6
|
||||
#endif
|
||||
|
||||
void *statsd_main(void *ptr) {
|
||||
worker_register("STATSDFLUSH");
|
||||
worker_register_job_name(WORKER_STATSD_FLUSH_GAUGES, "gauges");
|
||||
worker_register_job_name(WORKER_STATSD_FLUSH_COUNTERS, "counters");
|
||||
worker_register_job_name(WORKER_STATSD_FLUSH_METERS, "meters");
|
||||
worker_register_job_name(WORKER_STATSD_FLUSH_TIMERS, "timers");
|
||||
worker_register_job_name(WORKER_STATSD_FLUSH_HISTOGRAMS, "histograms");
|
||||
worker_register_job_name(WORKER_STATSD_FLUSH_SETS, "sets");
|
||||
worker_register_job_name(WORKER_STATSD_FLUSH_STATS, "statistics");
|
||||
|
||||
netdata_thread_cleanup_push(statsd_main_cleanup, ptr);
|
||||
|
||||
// ----------------------------------------------------------------------------------------------------------------
|
||||
|
@ -2420,71 +2474,37 @@ void *statsd_main(void *ptr) {
|
|||
);
|
||||
RRDDIM *rd_pcharts = rrddim_add(st_pcharts, "charts", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
|
||||
|
||||
RRDSET *stcpu_thread = rrdset_create_localhost(
|
||||
"netdata"
|
||||
, "plugin_statsd_charting_cpu"
|
||||
, NULL
|
||||
, "statsd"
|
||||
, "netdata.statsd_cpu"
|
||||
, "Netdata statsd charting thread CPU usage"
|
||||
, "milliseconds/s"
|
||||
, PLUGIN_STATSD_NAME
|
||||
, "stats"
|
||||
, 132001
|
||||
, statsd.update_every
|
||||
, RRDSET_TYPE_STACKED
|
||||
);
|
||||
|
||||
RRDDIM *rd_user = rrddim_add(stcpu_thread, "user", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL);
|
||||
RRDDIM *rd_system = rrddim_add(stcpu_thread, "system", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL);
|
||||
struct rusage thread;
|
||||
|
||||
for(i = 0; i < statsd.threads ;i++) {
|
||||
char id[100 + 1];
|
||||
char title[100 + 1];
|
||||
|
||||
snprintfz(id, 100, "plugin_statsd_collector%d_cpu", i + 1);
|
||||
snprintfz(title, 100, "Netdata statsd collector thread No %d CPU usage", i + 1);
|
||||
|
||||
statsd.collection_threads_status[i].st_cpu = rrdset_create_localhost(
|
||||
"netdata"
|
||||
, id
|
||||
, NULL
|
||||
, "statsd"
|
||||
, "netdata.statsd_cpu"
|
||||
, title
|
||||
, "milliseconds/s"
|
||||
, PLUGIN_STATSD_NAME
|
||||
, "stats"
|
||||
, 132002 + i
|
||||
, statsd.update_every
|
||||
, RRDSET_TYPE_STACKED
|
||||
);
|
||||
|
||||
statsd.collection_threads_status[i].rd_user = rrddim_add(statsd.collection_threads_status[i].st_cpu, "user", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL);
|
||||
statsd.collection_threads_status[i].rd_system = rrddim_add(statsd.collection_threads_status[i].st_cpu, "system", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------------------------------
|
||||
// ----------------------------------------------------------------------------------------------------------------
|
||||
// statsd thread to turn metrics into charts
|
||||
|
||||
usec_t step = statsd.update_every * USEC_PER_SEC;
|
||||
heartbeat_t hb;
|
||||
heartbeat_init(&hb);
|
||||
while(!netdata_exit) {
|
||||
worker_is_idle();
|
||||
usec_t hb_dt = heartbeat_next(&hb, step);
|
||||
|
||||
worker_is_busy(WORKER_STATSD_FLUSH_GAUGES);
|
||||
statsd_flush_index_metrics(&statsd.gauges, statsd_flush_gauge);
|
||||
|
||||
worker_is_busy(WORKER_STATSD_FLUSH_COUNTERS);
|
||||
statsd_flush_index_metrics(&statsd.counters, statsd_flush_counter);
|
||||
|
||||
worker_is_busy(WORKER_STATSD_FLUSH_METERS);
|
||||
statsd_flush_index_metrics(&statsd.meters, statsd_flush_meter);
|
||||
|
||||
worker_is_busy(WORKER_STATSD_FLUSH_TIMERS);
|
||||
statsd_flush_index_metrics(&statsd.timers, statsd_flush_timer);
|
||||
|
||||
worker_is_busy(WORKER_STATSD_FLUSH_HISTOGRAMS);
|
||||
statsd_flush_index_metrics(&statsd.histograms, statsd_flush_histogram);
|
||||
|
||||
worker_is_busy(WORKER_STATSD_FLUSH_SETS);
|
||||
statsd_flush_index_metrics(&statsd.sets, statsd_flush_set);
|
||||
|
||||
worker_is_busy(WORKER_STATSD_FLUSH_STATS);
|
||||
statsd_update_all_app_charts();
|
||||
|
||||
getrusage(RUSAGE_THREAD, &thread);
|
||||
|
||||
if(unlikely(netdata_exit))
|
||||
break;
|
||||
|
||||
|
@ -2498,9 +2518,6 @@ void *statsd_main(void *ptr) {
|
|||
rrdset_next(st_tcp_connects);
|
||||
rrdset_next(st_tcp_connected);
|
||||
rrdset_next(st_pcharts);
|
||||
rrdset_next(stcpu_thread);
|
||||
for(i = 0; i < statsd.threads ;i++)
|
||||
rrdset_next(statsd.collection_threads_status[i].st_cpu);
|
||||
}
|
||||
|
||||
rrddim_set_by_pointer(st_metrics, rd_metrics_gauge, (collected_number)statsd.gauges.metrics);
|
||||
|
@ -2550,16 +2567,6 @@ void *statsd_main(void *ptr) {
|
|||
|
||||
rrddim_set_by_pointer(st_pcharts, rd_pcharts, (collected_number)statsd.private_charts);
|
||||
rrdset_done(st_pcharts);
|
||||
|
||||
rrddim_set_by_pointer(stcpu_thread, rd_user, thread.ru_utime.tv_sec * 1000000ULL + thread.ru_utime.tv_usec);
|
||||
rrddim_set_by_pointer(stcpu_thread, rd_system, thread.ru_stime.tv_sec * 1000000ULL + thread.ru_stime.tv_usec);
|
||||
rrdset_done(stcpu_thread);
|
||||
|
||||
for(i = 0; i < statsd.threads ;i++) {
|
||||
rrddim_set_by_pointer(statsd.collection_threads_status[i].st_cpu, statsd.collection_threads_status[i].rd_user, statsd.collection_threads_status[i].rusage.ru_utime.tv_sec * 1000000ULL + statsd.collection_threads_status[i].rusage.ru_utime.tv_usec);
|
||||
rrddim_set_by_pointer(statsd.collection_threads_status[i].st_cpu, statsd.collection_threads_status[i].rd_system, statsd.collection_threads_status[i].rusage.ru_stime.tv_sec * 1000000ULL + statsd.collection_threads_status[i].rusage.ru_stime.tv_usec);
|
||||
rrdset_done(statsd.collection_threads_status[i].st_cpu);
|
||||
}
|
||||
}
|
||||
|
||||
cleanup: ; // added semi-colon to prevent older gcc error: label at end of compound statement
|
||||
|
|
|
@ -844,6 +844,8 @@ static inline void tc_split_words(char *str, char **words, int max_words) {
|
|||
static pid_t tc_child_pid = 0;
|
||||
|
||||
static void tc_main_cleanup(void *ptr) {
|
||||
worker_unregister();
|
||||
|
||||
struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr;
|
||||
static_thread->enabled = NETDATA_MAIN_THREAD_EXITING;
|
||||
|
||||
|
@ -864,10 +866,35 @@ static void tc_main_cleanup(void *ptr) {
|
|||
static_thread->enabled = NETDATA_MAIN_THREAD_EXITED;
|
||||
}
|
||||
|
||||
void *tc_main(void *ptr) {
|
||||
netdata_thread_cleanup_push(tc_main_cleanup, ptr);
|
||||
#define WORKER_TC_CLASS 0
|
||||
#define WORKER_TC_BEGIN 1
|
||||
#define WORKER_TC_END 2
|
||||
#define WORKER_TC_SENT 3
|
||||
#define WORKER_TC_LENDED 4
|
||||
#define WORKER_TC_TOKENS 5
|
||||
#define WORKER_TC_SETDEVICENAME 6
|
||||
#define WORKER_TC_SETDEVICEGROUP 7
|
||||
#define WORKER_TC_SETCLASSNAME 8
|
||||
#define WORKER_TC_WORKTIME 9
|
||||
|
||||
struct rusage thread;
|
||||
#if WORKER_UTILIZATION_MAX_JOB_TYPES < 10
|
||||
#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 10
|
||||
#endif
|
||||
|
||||
void *tc_main(void *ptr) {
|
||||
worker_register("TC");
|
||||
worker_register_job_name(WORKER_TC_CLASS, "class");
|
||||
worker_register_job_name(WORKER_TC_BEGIN, "begin");
|
||||
worker_register_job_name(WORKER_TC_END, "end");
|
||||
worker_register_job_name(WORKER_TC_SENT, "sent");
|
||||
worker_register_job_name(WORKER_TC_LENDED, "lended");
|
||||
worker_register_job_name(WORKER_TC_TOKENS, "tokens");
|
||||
worker_register_job_name(WORKER_TC_SETDEVICENAME, "devicename");
|
||||
worker_register_job_name(WORKER_TC_SETDEVICEGROUP, "devicegroup");
|
||||
worker_register_job_name(WORKER_TC_SETCLASSNAME, "classname");
|
||||
worker_register_job_name(WORKER_TC_WORKTIME, "worktime");
|
||||
|
||||
netdata_thread_cleanup_push(tc_main_cleanup, ptr);
|
||||
|
||||
char command[FILENAME_MAX + 1];
|
||||
char *words[PLUGINSD_MAX_WORDS] = { NULL };
|
||||
|
@ -913,6 +940,7 @@ void *tc_main(void *ptr) {
|
|||
|
||||
if(unlikely(!words[0] || !*words[0])) {
|
||||
// debug(D_TC_LOOP, "empty line");
|
||||
worker_is_idle();
|
||||
continue;
|
||||
}
|
||||
// else debug(D_TC_LOOP, "First word is '%s'", words[0]);
|
||||
|
@ -920,6 +948,8 @@ void *tc_main(void *ptr) {
|
|||
first_hash = simple_hash(words[0]);
|
||||
|
||||
if(unlikely(device && ((first_hash == CLASS_HASH && strcmp(words[0], "class") == 0) || (first_hash == QDISC_HASH && strcmp(words[0], "qdisc") == 0)))) {
|
||||
worker_is_busy(WORKER_TC_CLASS);
|
||||
|
||||
// debug(D_TC_LOOP, "CLASS line on class id='%s', parent='%s', parentid='%s', leaf='%s', leafid='%s'", words[2], words[3], words[4], words[5], words[6]);
|
||||
|
||||
char *type = words[1]; // the class/qdisc type: htb, fq_codel, etc
|
||||
|
@ -949,6 +979,7 @@ void *tc_main(void *ptr) {
|
|||
// there should be an IFB interface for this
|
||||
|
||||
class = NULL;
|
||||
worker_is_idle();
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -985,6 +1016,8 @@ void *tc_main(void *ptr) {
|
|||
}
|
||||
}
|
||||
else if(unlikely(first_hash == END_HASH && strcmp(words[0], "END") == 0)) {
|
||||
worker_is_busy(WORKER_TC_END);
|
||||
|
||||
// debug(D_TC_LOOP, "END line");
|
||||
|
||||
if(likely(device)) {
|
||||
|
@ -998,6 +1031,8 @@ void *tc_main(void *ptr) {
|
|||
class = NULL;
|
||||
}
|
||||
else if(unlikely(first_hash == BEGIN_HASH && strcmp(words[0], "BEGIN") == 0)) {
|
||||
worker_is_busy(WORKER_TC_BEGIN);
|
||||
|
||||
// debug(D_TC_LOOP, "BEGIN line on device '%s'", words[1]);
|
||||
|
||||
if(likely(words[1] && *words[1])) {
|
||||
|
@ -1011,6 +1046,8 @@ void *tc_main(void *ptr) {
|
|||
class = NULL;
|
||||
}
|
||||
else if(unlikely(device && class && first_hash == SENT_HASH && strcmp(words[0], "Sent") == 0)) {
|
||||
worker_is_busy(WORKER_TC_SENT);
|
||||
|
||||
// debug(D_TC_LOOP, "SENT line '%s'", words[1]);
|
||||
if(likely(words[1] && *words[1])) {
|
||||
class->bytes = str2ull(words[1]);
|
||||
|
@ -1033,6 +1070,8 @@ void *tc_main(void *ptr) {
|
|||
class->requeues = str2ull(words[8]);
|
||||
}
|
||||
else if(unlikely(device && class && class->updated && first_hash == LENDED_HASH && strcmp(words[0], "lended:") == 0)) {
|
||||
worker_is_busy(WORKER_TC_LENDED);
|
||||
|
||||
// debug(D_TC_LOOP, "LENDED line '%s'", words[1]);
|
||||
if(likely(words[1] && *words[1]))
|
||||
class->lended = str2ull(words[1]);
|
||||
|
@ -1044,6 +1083,8 @@ void *tc_main(void *ptr) {
|
|||
class->giants = str2ull(words[5]);
|
||||
}
|
||||
else if(unlikely(device && class && class->updated && first_hash == TOKENS_HASH && strcmp(words[0], "tokens:") == 0)) {
|
||||
worker_is_busy(WORKER_TC_TOKENS);
|
||||
|
||||
// debug(D_TC_LOOP, "TOKENS line '%s'", words[1]);
|
||||
if(likely(words[1] && *words[1]))
|
||||
class->tokens = str2ull(words[1]);
|
||||
|
@ -1052,16 +1093,22 @@ void *tc_main(void *ptr) {
|
|||
class->ctokens = str2ull(words[3]);
|
||||
}
|
||||
else if(unlikely(device && first_hash == SETDEVICENAME_HASH && strcmp(words[0], "SETDEVICENAME") == 0)) {
|
||||
worker_is_busy(WORKER_TC_SETDEVICENAME);
|
||||
|
||||
// debug(D_TC_LOOP, "SETDEVICENAME line '%s'", words[1]);
|
||||
if(likely(words[1] && *words[1]))
|
||||
tc_device_set_device_name(device, words[1]);
|
||||
}
|
||||
else if(unlikely(device && first_hash == SETDEVICEGROUP_HASH && strcmp(words[0], "SETDEVICEGROUP") == 0)) {
|
||||
worker_is_busy(WORKER_TC_SETDEVICEGROUP);
|
||||
|
||||
// debug(D_TC_LOOP, "SETDEVICEGROUP line '%s'", words[1]);
|
||||
if(likely(words[1] && *words[1]))
|
||||
tc_device_set_device_family(device, words[1]);
|
||||
}
|
||||
else if(unlikely(device && first_hash == SETCLASSNAME_HASH && strcmp(words[0], "SETCLASSNAME") == 0)) {
|
||||
worker_is_busy(WORKER_TC_SETCLASSNAME);
|
||||
|
||||
// debug(D_TC_LOOP, "SETCLASSNAME line '%s' '%s'", words[1], words[2]);
|
||||
char *id = words[1];
|
||||
char *path = words[2];
|
||||
|
@ -1069,36 +1116,9 @@ void *tc_main(void *ptr) {
|
|||
tc_device_set_class_name(device, id, path);
|
||||
}
|
||||
else if(unlikely(first_hash == WORKTIME_HASH && strcmp(words[0], "WORKTIME") == 0)) {
|
||||
worker_is_busy(WORKER_TC_WORKTIME);
|
||||
|
||||
// debug(D_TC_LOOP, "WORKTIME line '%s' '%s'", words[1], words[2]);
|
||||
getrusage(RUSAGE_THREAD, &thread);
|
||||
|
||||
static RRDSET *stcpu = NULL;
|
||||
static RRDDIM *rd_user = NULL, *rd_system = NULL;
|
||||
|
||||
if(unlikely(!stcpu)) {
|
||||
stcpu = rrdset_create_localhost(
|
||||
"netdata"
|
||||
, "plugin_tc_cpu"
|
||||
, NULL
|
||||
, "tc.helper"
|
||||
, NULL
|
||||
, "Netdata TC CPU usage"
|
||||
, "milliseconds/s"
|
||||
, PLUGIN_TC_NAME
|
||||
, NULL
|
||||
, NETDATA_CHART_PRIO_NETDATA_TC_CPU
|
||||
, localhost->rrd_update_every
|
||||
, RRDSET_TYPE_STACKED
|
||||
);
|
||||
rd_user = rrddim_add(stcpu, "user", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL);
|
||||
rd_system = rrddim_add(stcpu, "system", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL);
|
||||
}
|
||||
else rrdset_next(stcpu);
|
||||
|
||||
rrddim_set_by_pointer(stcpu, rd_user , thread.ru_utime.tv_sec * 1000000ULL + thread.ru_utime.tv_usec);
|
||||
rrddim_set_by_pointer(stcpu, rd_system, thread.ru_stime.tv_sec * 1000000ULL + thread.ru_stime.tv_usec);
|
||||
rrdset_done(stcpu);
|
||||
|
||||
static RRDSET *sttime = NULL;
|
||||
static RRDDIM *rd_run_time = NULL;
|
||||
|
||||
|
@ -1107,8 +1127,8 @@ void *tc_main(void *ptr) {
|
|||
"netdata"
|
||||
, "plugin_tc_time"
|
||||
, NULL
|
||||
, "tc.helper"
|
||||
, NULL
|
||||
, "workers plugin tc"
|
||||
, "netdata.workers.tc.script_time"
|
||||
, "Netdata TC script execution"
|
||||
, "milliseconds/run"
|
||||
, PLUGIN_TC_NAME
|
||||
|
@ -1128,6 +1148,8 @@ void *tc_main(void *ptr) {
|
|||
//else {
|
||||
// debug(D_TC_LOOP, "IGNORED line");
|
||||
//}
|
||||
|
||||
worker_is_idle();
|
||||
}
|
||||
|
||||
// fgets() failed or loop broke
|
||||
|
@ -1158,6 +1180,7 @@ void *tc_main(void *ptr) {
|
|||
}
|
||||
|
||||
cleanup: ; // added semi-colon to prevent older gcc error: label at end of compound statement
|
||||
worker_unregister();
|
||||
netdata_thread_cleanup_pop(1);
|
||||
return NULL;
|
||||
}
|
||||
|
|
|
@ -32,6 +32,8 @@ struct status_codes {
|
|||
|
||||
static void timex_main_cleanup(void *ptr)
|
||||
{
|
||||
worker_unregister();
|
||||
|
||||
struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr;
|
||||
static_thread->enabled = NETDATA_MAIN_THREAD_EXITING;
|
||||
|
||||
|
@ -42,9 +44,10 @@ static void timex_main_cleanup(void *ptr)
|
|||
|
||||
void *timex_main(void *ptr)
|
||||
{
|
||||
netdata_thread_cleanup_push(timex_main_cleanup, ptr);
|
||||
worker_register("TIMEX");
|
||||
worker_register_job_name(0, "clock check");
|
||||
|
||||
int vdo_cpu_netdata = config_get_boolean(CONFIG_SECTION_TIMEX, "timex plugin resource charts", CONFIG_BOOLEAN_YES);
|
||||
netdata_thread_cleanup_push(timex_main_cleanup, ptr);
|
||||
|
||||
int update_every = (int)config_get_number(CONFIG_SECTION_TIMEX, "update every", 10);
|
||||
if (update_every < localhost->rrd_update_every)
|
||||
|
@ -62,8 +65,9 @@ void *timex_main(void *ptr)
|
|||
heartbeat_t hb;
|
||||
heartbeat_init(&hb);
|
||||
while (!netdata_exit) {
|
||||
usec_t duration = heartbeat_monotonic_dt_to_now_usec(&hb);
|
||||
worker_is_idle();
|
||||
heartbeat_next(&hb, step);
|
||||
worker_is_busy(0);
|
||||
|
||||
struct timex timex_buf = {};
|
||||
int sync_state = 0;
|
||||
|
@ -170,68 +174,6 @@ void *timex_main(void *ptr)
|
|||
rrddim_set_by_pointer(st_offset, rd_offset, timex_buf.offset);
|
||||
rrdset_done(st_offset);
|
||||
}
|
||||
|
||||
if (vdo_cpu_netdata) {
|
||||
static RRDSET *stcpu_thread = NULL, *st_duration = NULL;
|
||||
static RRDDIM *rd_user = NULL, *rd_system = NULL, *rd_duration = NULL;
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
||||
struct rusage thread;
|
||||
getrusage(RUSAGE_THREAD, &thread);
|
||||
|
||||
if (unlikely(!stcpu_thread)) {
|
||||
stcpu_thread = rrdset_create_localhost(
|
||||
"netdata",
|
||||
"plugin_timex",
|
||||
NULL,
|
||||
"timex",
|
||||
NULL,
|
||||
"Netdata Timex Plugin CPU usage",
|
||||
"milliseconds/s",
|
||||
PLUGIN_TIMEX_NAME,
|
||||
NULL,
|
||||
NETDATA_CHART_PRIO_NETDATA_TIMEX,
|
||||
update_every,
|
||||
RRDSET_TYPE_STACKED);
|
||||
|
||||
rd_user = rrddim_add(stcpu_thread, "user", NULL, 1, USEC_PER_MS, RRD_ALGORITHM_INCREMENTAL);
|
||||
rd_system = rrddim_add(stcpu_thread, "system", NULL, 1, USEC_PER_MS, RRD_ALGORITHM_INCREMENTAL);
|
||||
} else {
|
||||
rrdset_next(stcpu_thread);
|
||||
}
|
||||
|
||||
rrddim_set_by_pointer(
|
||||
stcpu_thread, rd_user, thread.ru_utime.tv_sec * USEC_PER_SEC + thread.ru_utime.tv_usec);
|
||||
rrddim_set_by_pointer(
|
||||
stcpu_thread, rd_system, thread.ru_stime.tv_sec * USEC_PER_SEC + thread.ru_stime.tv_usec);
|
||||
rrdset_done(stcpu_thread);
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
||||
if (unlikely(!st_duration)) {
|
||||
st_duration = rrdset_create_localhost(
|
||||
"netdata",
|
||||
"plugin_timex_dt",
|
||||
NULL,
|
||||
"timex",
|
||||
NULL,
|
||||
"Netdata Timex Plugin Duration",
|
||||
"milliseconds/run",
|
||||
PLUGIN_TIMEX_NAME,
|
||||
NULL,
|
||||
NETDATA_CHART_PRIO_NETDATA_TIMEX + 1,
|
||||
update_every,
|
||||
RRDSET_TYPE_AREA);
|
||||
|
||||
rd_duration = rrddim_add(st_duration, "duration", NULL, 1, USEC_PER_MS, RRD_ALGORITHM_ABSOLUTE);
|
||||
} else {
|
||||
rrdset_next(st_duration);
|
||||
}
|
||||
|
||||
rrddim_set_by_pointer(st_duration, rd_duration, duration);
|
||||
rrdset_done(st_duration);
|
||||
}
|
||||
}
|
||||
|
||||
exit:
|
||||
|
|
|
@ -920,6 +920,7 @@ static void xenstat_send_domain_metrics() {
|
|||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
clocks_init();
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// initialization of netdata plugin
|
||||
|
|
|
@ -1767,6 +1767,7 @@ AC_CONFIG_FILES([
|
|||
libnetdata/url/Makefile
|
||||
libnetdata/json/Makefile
|
||||
libnetdata/health/Makefile
|
||||
libnetdata/worker_utilization/Makefile
|
||||
registry/Makefile
|
||||
streaming/Makefile
|
||||
system/Makefile
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -11,6 +11,10 @@ rrdeng_stats_t global_flushing_pressure_page_deletions = 0;
|
|||
|
||||
static unsigned pages_per_extent = MAX_PAGES_PER_EXTENT;
|
||||
|
||||
#if WORKER_UTILIZATION_MAX_JOB_TYPES < (RRDENG_MAX_OPCODE + 2)
|
||||
#error Please increase WORKER_UTILIZATION_MAX_JOB_TYPES to at least (RRDENG_MAX_OPCODE + 2)
|
||||
#endif
|
||||
|
||||
void *dbengine_page_alloc() {
|
||||
void *page = netdata_mmap(NULL, RRDENG_BLOCK_SIZE, MAP_PRIVATE, enable_ksm);
|
||||
if(!page) fatal("Cannot allocate dbengine page cache page, with mmap()");
|
||||
|
@ -23,6 +27,8 @@ void dbengine_page_free(void *page) {
|
|||
|
||||
static void sanity_check(void)
|
||||
{
|
||||
BUILD_BUG_ON(WORKER_UTILIZATION_MAX_JOB_TYPES < (RRDENG_MAX_OPCODE + 2));
|
||||
|
||||
/* Magic numbers must fit in the super-blocks */
|
||||
BUILD_BUG_ON(strlen(RRDENG_DF_MAGIC) > RRDENG_MAGIC_SZ);
|
||||
BUILD_BUG_ON(strlen(RRDENG_JF_MAGIC) > RRDENG_MAGIC_SZ);
|
||||
|
@ -1085,13 +1091,17 @@ void async_cb(uv_async_t *handle)
|
|||
|
||||
void timer_cb(uv_timer_t* handle)
|
||||
{
|
||||
worker_is_busy(RRDENG_MAX_OPCODE + 1);
|
||||
|
||||
struct rrdengine_worker_config* wc = handle->data;
|
||||
struct rrdengine_instance *ctx = wc->ctx;
|
||||
|
||||
uv_stop(handle->loop);
|
||||
uv_update_time(handle->loop);
|
||||
if (unlikely(!ctx->metalog_ctx->initialized))
|
||||
if (unlikely(!ctx->metalog_ctx->initialized)) {
|
||||
worker_is_idle();
|
||||
return; /* Wait for the metadata log to initialize */
|
||||
}
|
||||
rrdeng_test_quota(wc);
|
||||
debug(D_RRDENGINE, "%s: timeout reached.", __func__);
|
||||
if (likely(!wc->now_deleting_files && !wc->now_invalidating_dirty_pages)) {
|
||||
|
@ -1133,12 +1143,26 @@ void timer_cb(uv_timer_t* handle)
|
|||
debug(D_RRDENGINE, "%s", get_rrdeng_statistics(wc->ctx, buf, sizeof(buf)));
|
||||
}
|
||||
#endif
|
||||
|
||||
worker_is_idle();
|
||||
}
|
||||
|
||||
#define MAX_CMD_BATCH_SIZE (256)
|
||||
|
||||
void rrdeng_worker(void* arg)
|
||||
{
|
||||
worker_register("DBENGINE");
|
||||
worker_register_job_name(RRDENG_NOOP, "noop");
|
||||
worker_register_job_name(RRDENG_READ_PAGE, "page read");
|
||||
worker_register_job_name(RRDENG_READ_EXTENT, "extent read");
|
||||
worker_register_job_name(RRDENG_COMMIT_PAGE, "commit");
|
||||
worker_register_job_name(RRDENG_FLUSH_PAGES, "flush");
|
||||
worker_register_job_name(RRDENG_SHUTDOWN, "shutdown");
|
||||
worker_register_job_name(RRDENG_INVALIDATE_OLDEST_MEMORY_PAGE, "page lru");
|
||||
worker_register_job_name(RRDENG_QUIESCE, "quiesce");
|
||||
worker_register_job_name(RRDENG_MAX_OPCODE, "cleanup");
|
||||
worker_register_job_name(RRDENG_MAX_OPCODE + 1, "timer");
|
||||
|
||||
struct rrdengine_worker_config* wc = arg;
|
||||
struct rrdengine_instance *ctx = wc->ctx;
|
||||
uv_loop_t* loop;
|
||||
|
@ -1188,7 +1212,9 @@ void rrdeng_worker(void* arg)
|
|||
shutdown = 0;
|
||||
int set_name = 0;
|
||||
while (likely(shutdown == 0 || rrdeng_threads_alive(wc))) {
|
||||
worker_is_idle();
|
||||
uv_run(loop, UV_RUN_DEFAULT);
|
||||
worker_is_busy(RRDENG_MAX_OPCODE);
|
||||
rrdeng_cleanup_finished_threads(wc);
|
||||
|
||||
/* wait for commands */
|
||||
|
@ -1205,6 +1231,9 @@ void rrdeng_worker(void* arg)
|
|||
opcode = cmd.opcode;
|
||||
++cmd_batch_size;
|
||||
|
||||
if(likely(opcode != RRDENG_NOOP))
|
||||
worker_is_busy(opcode);
|
||||
|
||||
switch (opcode) {
|
||||
case RRDENG_NOOP:
|
||||
/* the command queue was empty, do nothing */
|
||||
|
@ -1281,6 +1310,7 @@ void rrdeng_worker(void* arg)
|
|||
fatal_assert(0 == uv_loop_close(loop));
|
||||
freez(loop);
|
||||
|
||||
worker_unregister();
|
||||
return;
|
||||
|
||||
error_after_timer_init:
|
||||
|
@ -1293,6 +1323,7 @@ error_after_loop_init:
|
|||
wc->error = UV_EAGAIN;
|
||||
/* wake up initialization thread */
|
||||
completion_mark_complete(&ctx->rrdengine_completion);
|
||||
worker_unregister();
|
||||
}
|
||||
|
||||
/* C entry point for development purposes
|
||||
|
|
|
@ -10,6 +10,11 @@
|
|||
#include "../../aclk/aclk.h"
|
||||
#endif
|
||||
|
||||
void sanity_check(void) {
|
||||
// make sure the compiler will stop on misconfigurations
|
||||
BUILD_BUG_ON(WORKER_UTILIZATION_MAX_JOB_TYPES < ACLK_MAX_ENUMERATIONS_DEFINED);
|
||||
}
|
||||
|
||||
const char *aclk_sync_config[] = {
|
||||
"CREATE TABLE IF NOT EXISTS dimension_delete (dimension_id blob, dimension_name text, chart_type_id text, "
|
||||
"dim_id blob, chart_id blob, host_id blob, date_created);",
|
||||
|
@ -352,6 +357,29 @@ static void timer_cb(uv_timer_t* handle)
|
|||
|
||||
void aclk_database_worker(void *arg)
|
||||
{
|
||||
worker_register("ACLKSYNC");
|
||||
worker_register_job_name(ACLK_DATABASE_NOOP, "noop");
|
||||
#ifdef ENABLE_NEW_CLOUD_PROTOCOL
|
||||
worker_register_job_name(ACLK_DATABASE_ADD_CHART, "chart add");
|
||||
worker_register_job_name(ACLK_DATABASE_ADD_DIMENSION, "dimension add");
|
||||
worker_register_job_name(ACLK_DATABASE_PUSH_CHART, "chart push");
|
||||
worker_register_job_name(ACLK_DATABASE_PUSH_CHART_CONFIG, "chart conf push");
|
||||
worker_register_job_name(ACLK_DATABASE_RESET_CHART, "chart reset");
|
||||
worker_register_job_name(ACLK_DATABASE_CHART_ACK, "chart ack");
|
||||
worker_register_job_name(ACLK_DATABASE_UPD_RETENTION, "retention check");
|
||||
worker_register_job_name(ACLK_DATABASE_DIM_DELETION, "dimension delete");
|
||||
worker_register_job_name(ACLK_DATABASE_ORPHAN_HOST, "node orphan");
|
||||
#endif
|
||||
worker_register_job_name(ACLK_DATABASE_ALARM_HEALTH_LOG, "alert log");
|
||||
worker_register_job_name(ACLK_DATABASE_CLEANUP, "cleanup");
|
||||
worker_register_job_name(ACLK_DATABASE_DELETE_HOST, "node delete");
|
||||
worker_register_job_name(ACLK_DATABASE_NODE_INFO, "node info");
|
||||
worker_register_job_name(ACLK_DATABASE_PUSH_ALERT, "alert push");
|
||||
worker_register_job_name(ACLK_DATABASE_PUSH_ALERT_CONFIG, "alert conf push");
|
||||
worker_register_job_name(ACLK_DATABASE_PUSH_ALERT_SNAPSHOT, "alert snapshot");
|
||||
worker_register_job_name(ACLK_DATABASE_QUEUE_REMOVED_ALERTS, "alerts check");
|
||||
worker_register_job_name(ACLK_DATABASE_TIMER, "timer");
|
||||
|
||||
struct aclk_database_worker_config *wc = arg;
|
||||
uv_loop_t *loop;
|
||||
int ret;
|
||||
|
@ -413,6 +441,7 @@ void aclk_database_worker(void *arg)
|
|||
|
||||
debug(D_ACLK_SYNC,"Node %s reports pending message count = %u", wc->node_id, wc->chart_payload_count);
|
||||
while (likely(!netdata_exit)) {
|
||||
worker_is_idle();
|
||||
uv_run(loop, UV_RUN_DEFAULT);
|
||||
|
||||
/* wait for commands */
|
||||
|
@ -427,6 +456,10 @@ void aclk_database_worker(void *arg)
|
|||
|
||||
opcode = cmd.opcode;
|
||||
++cmd_batch_size;
|
||||
|
||||
if(likely(opcode != ACLK_DATABASE_NOOP))
|
||||
worker_is_busy(opcode);
|
||||
|
||||
switch (opcode) {
|
||||
case ACLK_DATABASE_NOOP:
|
||||
/* the command queue was empty, do nothing */
|
||||
|
@ -439,6 +472,7 @@ void aclk_database_worker(void *arg)
|
|||
if (wc->host == localhost)
|
||||
sql_check_aclk_table_list(wc);
|
||||
break;
|
||||
|
||||
case ACLK_DATABASE_DELETE_HOST:
|
||||
debug(D_ACLK_SYNC,"Cleaning ACLK tables for %s", (char *) cmd.data);
|
||||
sql_delete_aclk_table_list(wc, cmd);
|
||||
|
@ -577,6 +611,8 @@ void aclk_database_worker(void *arg)
|
|||
wc->host->dbsync_worker = NULL;
|
||||
freez(wc);
|
||||
rrd_unlock();
|
||||
|
||||
worker_unregister();
|
||||
return;
|
||||
|
||||
error_after_timer_init:
|
||||
|
@ -585,6 +621,7 @@ error_after_async_init:
|
|||
fatal_assert(0 == uv_loop_close(loop));
|
||||
error_after_loop_init:
|
||||
freez(loop);
|
||||
worker_unregister();
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------
|
||||
|
|
|
@ -133,7 +133,11 @@ enum aclk_database_opcode {
|
|||
ACLK_DATABASE_PUSH_ALERT_CONFIG,
|
||||
ACLK_DATABASE_PUSH_ALERT_SNAPSHOT,
|
||||
ACLK_DATABASE_QUEUE_REMOVED_ALERTS,
|
||||
ACLK_DATABASE_TIMER
|
||||
ACLK_DATABASE_TIMER,
|
||||
|
||||
// leave this last
|
||||
// we need it to check for worker utilization
|
||||
ACLK_MAX_ENUMERATIONS_DEFINED
|
||||
};
|
||||
|
||||
struct aclk_chart_payload_t {
|
||||
|
|
|
@ -573,6 +573,8 @@ static inline int check_if_resumed_from_suspension(void) {
|
|||
}
|
||||
|
||||
static void health_main_cleanup(void *ptr) {
|
||||
worker_unregister();
|
||||
|
||||
struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr;
|
||||
static_thread->enabled = NETDATA_MAIN_THREAD_EXITING;
|
||||
|
||||
|
@ -695,7 +697,31 @@ static void init_pending_foreach_alarms(RRDHOST *host) {
|
|||
*
|
||||
* @return It always returns NULL
|
||||
*/
|
||||
|
||||
#define WORKER_HEALTH_JOB_RRD_LOCK 0
|
||||
#define WORKER_HEALTH_JOB_HOST_LOCK 1
|
||||
#define WORKER_HEALTH_JOB_DB_QUERY 2
|
||||
#define WORKER_HEALTH_JOB_CALC_EVAL 3
|
||||
#define WORKER_HEALTH_JOB_WARNING_EVAL 4
|
||||
#define WORKER_HEALTH_JOB_CRITICAL_EVAL 5
|
||||
#define WORKER_HEALTH_JOB_ALARM_LOG_ENTRY 6
|
||||
#define WORKER_HEALTH_JOB_ALARM_LOG_PROCESS 7
|
||||
|
||||
#if WORKER_UTILIZATION_MAX_JOB_TYPES < 8
|
||||
#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 8
|
||||
#endif
|
||||
|
||||
void *health_main(void *ptr) {
|
||||
worker_register("HEALTH");
|
||||
worker_register_job_name(WORKER_HEALTH_JOB_RRD_LOCK, "rrd lock");
|
||||
worker_register_job_name(WORKER_HEALTH_JOB_HOST_LOCK, "host lock");
|
||||
worker_register_job_name(WORKER_HEALTH_JOB_DB_QUERY, "db lookup");
|
||||
worker_register_job_name(WORKER_HEALTH_JOB_CALC_EVAL, "calc eval");
|
||||
worker_register_job_name(WORKER_HEALTH_JOB_WARNING_EVAL, "warning eval");
|
||||
worker_register_job_name(WORKER_HEALTH_JOB_CRITICAL_EVAL, "critical eval");
|
||||
worker_register_job_name(WORKER_HEALTH_JOB_ALARM_LOG_ENTRY, "alarm log entry");
|
||||
worker_register_job_name(WORKER_HEALTH_JOB_ALARM_LOG_PROCESS, "alarm log process");
|
||||
|
||||
netdata_thread_cleanup_push(health_main_cleanup, ptr);
|
||||
|
||||
int min_run_every = (int)config_get_number(CONFIG_SECTION_HEALTH, "run at least every seconds", 10);
|
||||
|
@ -743,6 +769,7 @@ void *health_main(void *ptr) {
|
|||
marked_aclk_reload_loop = loop;
|
||||
#endif
|
||||
|
||||
worker_is_busy(WORKER_HEALTH_JOB_RRD_LOCK);
|
||||
rrd_rdlock();
|
||||
|
||||
RRDHOST *host;
|
||||
|
@ -772,6 +799,7 @@ void *health_main(void *ptr) {
|
|||
|
||||
init_pending_foreach_alarms(host);
|
||||
|
||||
worker_is_busy(WORKER_HEALTH_JOB_HOST_LOCK);
|
||||
rrdhost_rdlock(host);
|
||||
|
||||
// the first loop is to lookup values from the db
|
||||
|
@ -786,6 +814,7 @@ void *health_main(void *ptr) {
|
|||
rrdset_flag_check(rc->rrdset, RRDSET_FLAG_OBSOLETE) &&
|
||||
now > (rc->rrdset->last_collected_time.tv_sec + 60))) {
|
||||
if (!rrdcalc_isrepeating(rc)) {
|
||||
worker_is_busy(WORKER_HEALTH_JOB_ALARM_LOG_ENTRY);
|
||||
time_t now = now_realtime_sec();
|
||||
ALARM_ENTRY *ae = health_create_alarm_entry(
|
||||
host, rc->id, rc->next_event_id++, rc->config_hash_id, now, rc->name, rc->rrdset->id,
|
||||
|
@ -820,6 +849,8 @@ void *health_main(void *ptr) {
|
|||
// if there is database lookup, do it
|
||||
|
||||
if (unlikely(RRDCALC_HAS_DB_LOOKUP(rc))) {
|
||||
worker_is_busy(WORKER_HEALTH_JOB_DB_QUERY);
|
||||
|
||||
/* time_t old_db_timestamp = rc->db_before; */
|
||||
int value_is_null = 0;
|
||||
|
||||
|
@ -876,6 +907,8 @@ void *health_main(void *ptr) {
|
|||
// if there is calculation expression, run it
|
||||
|
||||
if (unlikely(rc->calculation)) {
|
||||
worker_is_busy(WORKER_HEALTH_JOB_CALC_EVAL);
|
||||
|
||||
if (unlikely(!expression_evaluate(rc->calculation))) {
|
||||
// calculation failed
|
||||
rc->value = NAN;
|
||||
|
@ -924,6 +957,8 @@ void *health_main(void *ptr) {
|
|||
// check the warning expression
|
||||
|
||||
if (likely(rc->warning)) {
|
||||
worker_is_busy(WORKER_HEALTH_JOB_WARNING_EVAL);
|
||||
|
||||
if (unlikely(!expression_evaluate(rc->warning))) {
|
||||
// calculation failed
|
||||
rc->rrdcalc_flags |= RRDCALC_FLAG_WARN_ERROR;
|
||||
|
@ -948,6 +983,8 @@ void *health_main(void *ptr) {
|
|||
// check the critical expression
|
||||
|
||||
if (likely(rc->critical)) {
|
||||
worker_is_busy(WORKER_HEALTH_JOB_CRITICAL_EVAL);
|
||||
|
||||
if (unlikely(!expression_evaluate(rc->critical))) {
|
||||
// calculation failed
|
||||
rc->rrdcalc_flags |= RRDCALC_FLAG_CRIT_ERROR;
|
||||
|
@ -1005,6 +1042,7 @@ void *health_main(void *ptr) {
|
|||
// check if the new status and the old differ
|
||||
|
||||
if (status != rc->status) {
|
||||
worker_is_busy(WORKER_HEALTH_JOB_ALARM_LOG_ENTRY);
|
||||
int delay = 0;
|
||||
|
||||
// apply trigger hysteresis
|
||||
|
@ -1086,6 +1124,7 @@ void *health_main(void *ptr) {
|
|||
}
|
||||
|
||||
if(unlikely(repeat_every > 0 && (rc->last_repeat + repeat_every) <= now)) {
|
||||
worker_is_busy(WORKER_HEALTH_JOB_ALARM_LOG_ENTRY);
|
||||
rc->last_repeat = now;
|
||||
if (likely(rc->times_repeat < UINT32_MAX)) rc->times_repeat++;
|
||||
ALARM_ENTRY *ae = health_create_alarm_entry(
|
||||
|
@ -1118,6 +1157,7 @@ void *health_main(void *ptr) {
|
|||
|
||||
// execute notifications
|
||||
// and cleanup
|
||||
worker_is_busy(WORKER_HEALTH_JOB_ALARM_LOG_PROCESS);
|
||||
health_alarm_log_process(host);
|
||||
|
||||
if (unlikely(netdata_exit)) {
|
||||
|
@ -1156,6 +1196,7 @@ void *health_main(void *ptr) {
|
|||
|
||||
now = now_realtime_sec();
|
||||
if(now < next_run) {
|
||||
worker_is_idle();
|
||||
debug(D_HEALTH, "Health monitoring iteration no %u done. Next iteration in %d secs", loop, (int) (next_run - now));
|
||||
sleep_usec(USEC_PER_SEC * (usec_t) (next_run - now));
|
||||
now = now_realtime_sec();
|
||||
|
|
|
@ -26,6 +26,7 @@ SUBDIRS = \
|
|||
storage_number \
|
||||
threads \
|
||||
url \
|
||||
worker_utilization \
|
||||
tests \
|
||||
$(NULL)
|
||||
|
||||
|
|
|
@ -7,6 +7,9 @@
|
|||
static clockid_t clock_boottime_to_use = CLOCK_MONOTONIC;
|
||||
static clockid_t clock_monotonic_to_use = CLOCK_MONOTONIC;
|
||||
|
||||
usec_t clock_monotonic_resolution = 1000;
|
||||
usec_t clock_realtime_resolution = 1000;
|
||||
|
||||
#ifndef HAVE_CLOCK_GETTIME
|
||||
inline int clock_gettime(clockid_t clk_id, struct timespec *ts) {
|
||||
struct timeval tv;
|
||||
|
@ -20,15 +23,19 @@ inline int clock_gettime(clockid_t clk_id, struct timespec *ts) {
|
|||
}
|
||||
#endif
|
||||
|
||||
// When running a binary with CLOCK_MONOTONIC_COARSE defined on a system with a linux kernel older than Linux 2.6.32 the
|
||||
// clock_gettime(2) system call fails with EINVAL. In that case it must fall-back to CLOCK_MONOTONIC.
|
||||
// Similar to CLOCK_MONOTONIC, but provides access to a raw hardware-based time that is not subject to NTP adjustments
|
||||
// or the incremental adjustments performed by adjtime(3). This clock does not count time that the system is suspended
|
||||
|
||||
static void test_clock_monotonic_coarse(void) {
|
||||
static void test_clock_monotonic_raw(void) {
|
||||
#ifdef CLOCK_MONOTONIC_RAW
|
||||
struct timespec ts;
|
||||
if(clock_gettime(CLOCK_MONOTONIC_COARSE, &ts) == -1 && errno == EINVAL)
|
||||
if(clock_gettime(CLOCK_MONOTONIC_RAW, &ts) == -1 && errno == EINVAL)
|
||||
clock_monotonic_to_use = CLOCK_MONOTONIC;
|
||||
else
|
||||
clock_monotonic_to_use = CLOCK_MONOTONIC_COARSE;
|
||||
clock_monotonic_to_use = CLOCK_MONOTONIC_RAW;
|
||||
#else
|
||||
clock_monotonic_to_use = CLOCK_MONOTONIC;
|
||||
#endif
|
||||
}
|
||||
|
||||
// When running a binary with CLOCK_BOOTTIME defined on a system with a linux kernel older than Linux 2.6.39 the
|
||||
|
@ -42,14 +49,31 @@ static void test_clock_boottime(void) {
|
|||
clock_boottime_to_use = CLOCK_BOOTTIME;
|
||||
}
|
||||
|
||||
static usec_t get_clock_resolution(clockid_t clock) {
|
||||
struct timespec ts;
|
||||
clock_getres(clock, &ts);
|
||||
return ts.tv_sec * USEC_PER_SEC + ts.tv_nsec * NSEC_PER_USEC;
|
||||
}
|
||||
|
||||
// perform any initializations required for clocks
|
||||
|
||||
void clocks_init(void) {
|
||||
// monotonic coarse has to be tested before boottime
|
||||
test_clock_monotonic_coarse();
|
||||
// monotonic raw has to be tested before boottime
|
||||
test_clock_monotonic_raw();
|
||||
|
||||
// boottime has to be tested after monotonic coarse
|
||||
test_clock_boottime();
|
||||
|
||||
clock_monotonic_resolution = get_clock_resolution(clock_monotonic_to_use);
|
||||
clock_realtime_resolution = get_clock_resolution(CLOCK_REALTIME);
|
||||
|
||||
// if for any reason these are zero, netdata will crash
|
||||
// since we use them as modulo to calculations
|
||||
if(!clock_realtime_resolution)
|
||||
clock_realtime_resolution = 1000;
|
||||
|
||||
if(!clock_monotonic_resolution)
|
||||
clock_monotonic_resolution = 1000;
|
||||
}
|
||||
|
||||
inline time_t now_sec(clockid_t clk_id) {
|
||||
|
@ -155,8 +179,110 @@ inline usec_t dt_usec(struct timeval *now, struct timeval *old) {
|
|||
return (ts1 > ts2) ? (ts1 - ts2) : (ts2 - ts1);
|
||||
}
|
||||
|
||||
void sleep_to_absolute_time(usec_t usec) {
|
||||
static int einval_printed = 0, enotsup_printed = 0, eunknown_printed = 0;
|
||||
clockid_t clock = CLOCK_REALTIME;
|
||||
|
||||
struct timespec req = {
|
||||
.tv_sec = (time_t)(usec / USEC_PER_SEC),
|
||||
.tv_nsec = (suseconds_t)((usec % USEC_PER_SEC) * NSEC_PER_USEC)
|
||||
};
|
||||
|
||||
int ret = 0;
|
||||
while( (ret = clock_nanosleep(clock, TIMER_ABSTIME, &req, NULL)) != 0 ) {
|
||||
if(ret == EINTR) continue;
|
||||
else {
|
||||
if (ret == EINVAL) {
|
||||
if (!einval_printed) {
|
||||
einval_printed++;
|
||||
error(
|
||||
"Invalid time given to clock_nanosleep(): clockid = %d, tv_sec = %ld, tv_nsec = %ld",
|
||||
clock,
|
||||
req.tv_sec,
|
||||
req.tv_nsec);
|
||||
}
|
||||
} else if (ret == ENOTSUP) {
|
||||
if (!enotsup_printed) {
|
||||
enotsup_printed++;
|
||||
error(
|
||||
"Invalid clock id given to clock_nanosleep(): clockid = %d, tv_sec = %ld, tv_nsec = %ld",
|
||||
clock,
|
||||
req.tv_sec,
|
||||
req.tv_nsec);
|
||||
}
|
||||
} else {
|
||||
if (!eunknown_printed) {
|
||||
eunknown_printed++;
|
||||
error(
|
||||
"Unknown return value %d from clock_nanosleep(): clockid = %d, tv_sec = %ld, tv_nsec = %ld",
|
||||
ret,
|
||||
clock,
|
||||
req.tv_sec,
|
||||
req.tv_nsec);
|
||||
}
|
||||
}
|
||||
sleep_usec(usec);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#define HEARTBEAT_ALIGNMENT_STATISTICS_SIZE 10
|
||||
netdata_mutex_t heartbeat_alignment_mutex = NETDATA_MUTEX_INITIALIZER;
|
||||
static size_t heartbeat_alignment_id = 0;
|
||||
|
||||
struct heartbeat_thread_statistics {
|
||||
size_t sequence;
|
||||
usec_t dt;
|
||||
};
|
||||
static struct heartbeat_thread_statistics heartbeat_alignment_values[HEARTBEAT_ALIGNMENT_STATISTICS_SIZE] = { 0 };
|
||||
|
||||
void heartbeat_statistics(usec_t *min_ptr, usec_t *max_ptr, usec_t *average_ptr, size_t *count_ptr) {
|
||||
struct heartbeat_thread_statistics current[HEARTBEAT_ALIGNMENT_STATISTICS_SIZE];
|
||||
static struct heartbeat_thread_statistics old[HEARTBEAT_ALIGNMENT_STATISTICS_SIZE] = { 0 };
|
||||
|
||||
memcpy(current, heartbeat_alignment_values, sizeof(struct heartbeat_thread_statistics) * HEARTBEAT_ALIGNMENT_STATISTICS_SIZE);
|
||||
|
||||
usec_t min = 0, max = 0, total = 0, average = 0;
|
||||
size_t i, count = 0;
|
||||
for(i = 0; i < HEARTBEAT_ALIGNMENT_STATISTICS_SIZE ;i++) {
|
||||
if(current[i].sequence == old[i].sequence) continue;
|
||||
usec_t value = current[i].dt - old[i].dt;
|
||||
|
||||
if(!count) {
|
||||
min = max = total = value;
|
||||
count = 1;
|
||||
}
|
||||
else {
|
||||
total += value;
|
||||
if(value < min) min = value;
|
||||
if(value > max) max = value;
|
||||
count++;
|
||||
}
|
||||
}
|
||||
average = total / count;
|
||||
|
||||
if(min_ptr) *min_ptr = min;
|
||||
if(max_ptr) *max_ptr = max;
|
||||
if(average_ptr) *average_ptr = average;
|
||||
if(count_ptr) *count_ptr = count;
|
||||
|
||||
memcpy(old, current, sizeof(struct heartbeat_thread_statistics) * HEARTBEAT_ALIGNMENT_STATISTICS_SIZE);
|
||||
}
|
||||
|
||||
inline void heartbeat_init(heartbeat_t *hb) {
|
||||
hb->monotonic = hb->realtime = 0ULL;
|
||||
hb->realtime = 0ULL;
|
||||
hb->randomness = 250 * USEC_PER_MS + ((now_realtime_usec() * clock_realtime_resolution) % (250 * USEC_PER_MS));
|
||||
hb->randomness -= (hb->randomness % clock_realtime_resolution);
|
||||
|
||||
netdata_mutex_lock(&heartbeat_alignment_mutex);
|
||||
hb->statistics_id = heartbeat_alignment_id;
|
||||
heartbeat_alignment_id++;
|
||||
netdata_mutex_unlock(&heartbeat_alignment_mutex);
|
||||
|
||||
if(hb->statistics_id < HEARTBEAT_ALIGNMENT_STATISTICS_SIZE) {
|
||||
heartbeat_alignment_values[hb->statistics_id].dt = 0;
|
||||
heartbeat_alignment_values[hb->statistics_id].sequence = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// waits for the next heartbeat
|
||||
|
@ -164,96 +290,73 @@ inline void heartbeat_init(heartbeat_t *hb) {
|
|||
// it returns the dt using the realtime clock
|
||||
|
||||
usec_t heartbeat_next(heartbeat_t *hb, usec_t tick) {
|
||||
heartbeat_t now;
|
||||
now.monotonic = now_monotonic_usec();
|
||||
now.realtime = now_realtime_usec();
|
||||
|
||||
usec_t next_monotonic = now.monotonic - (now.monotonic % tick) + tick;
|
||||
|
||||
while(now.monotonic < next_monotonic) {
|
||||
sleep_usec(next_monotonic - now.monotonic);
|
||||
now.monotonic = now_monotonic_usec();
|
||||
now.realtime = now_realtime_usec();
|
||||
if(unlikely(hb->randomness > tick / 2)) {
|
||||
// TODO: The heartbeat tick should be specified at the heartbeat_init() function
|
||||
usec_t tmp = (now_realtime_usec() * clock_realtime_resolution) % (tick / 2);
|
||||
info("heartbeat randomness of %llu is too big for a tick of %llu - setting it to %llu", hb->randomness, tick, tmp);
|
||||
hb->randomness = tmp;
|
||||
}
|
||||
|
||||
if(likely(hb->realtime != 0ULL)) {
|
||||
usec_t dt_monotonic = now.monotonic - hb->monotonic;
|
||||
usec_t dt_realtime = now.realtime - hb->realtime;
|
||||
usec_t dt;
|
||||
usec_t now = now_realtime_usec();
|
||||
usec_t next = now - (now % tick) + tick + hb->randomness;
|
||||
|
||||
hb->monotonic = now.monotonic;
|
||||
hb->realtime = now.realtime;
|
||||
// align the next time we want to the clock resolution
|
||||
if(next % clock_realtime_resolution)
|
||||
next = next - (next % clock_realtime_resolution) + clock_realtime_resolution;
|
||||
|
||||
if(unlikely(dt_monotonic >= tick + tick / 2)) {
|
||||
errno = 0;
|
||||
error("heartbeat missed %llu monotonic microseconds", dt_monotonic - tick);
|
||||
}
|
||||
// sleep_usec() has a loop to guarantee we will sleep for at least the requested time.
|
||||
// According the specs, when we sleep for a relative time, clock adjustments should not affect the duration
|
||||
// we sleep.
|
||||
sleep_usec(next - now);
|
||||
now = now_realtime_usec();
|
||||
dt = now - hb->realtime;
|
||||
|
||||
return dt_realtime;
|
||||
if(hb->statistics_id < HEARTBEAT_ALIGNMENT_STATISTICS_SIZE) {
|
||||
heartbeat_alignment_values[hb->statistics_id].dt += now - next;
|
||||
heartbeat_alignment_values[hb->statistics_id].sequence++;
|
||||
}
|
||||
else {
|
||||
hb->monotonic = now.monotonic;
|
||||
hb->realtime = now.realtime;
|
||||
return 0ULL;
|
||||
|
||||
if(unlikely(now < next)) {
|
||||
errno = 0;
|
||||
error("heartbeat clock: woke up %llu microseconds earlier than expected (can be due to the CLOCK_REALTIME set to the past).", next - now);
|
||||
}
|
||||
else if(unlikely(now - next > tick / 2)) {
|
||||
errno = 0;
|
||||
error("heartbeat clock: woke up %llu microseconds later than expected (can be due to system load or the CLOCK_REALTIME set to the future).", now - next);
|
||||
}
|
||||
|
||||
if(unlikely(!hb->realtime)) {
|
||||
// the first time return zero
|
||||
dt = 0;
|
||||
}
|
||||
|
||||
hb->realtime = now;
|
||||
return dt;
|
||||
}
|
||||
|
||||
// returned the elapsed time, since the last heartbeat
|
||||
// using the monotonic clock
|
||||
|
||||
inline usec_t heartbeat_monotonic_dt_to_now_usec(heartbeat_t *hb) {
|
||||
if(!hb || !hb->monotonic) return 0ULL;
|
||||
return now_monotonic_usec() - hb->monotonic;
|
||||
}
|
||||
|
||||
int sleep_usec(usec_t usec) {
|
||||
|
||||
#ifndef NETDATA_WITH_USLEEP
|
||||
void sleep_usec(usec_t usec) {
|
||||
// we expect microseconds (1.000.000 per second)
|
||||
// but timespec is nanoseconds (1.000.000.000 per second)
|
||||
struct timespec rem, req = {
|
||||
.tv_sec = (time_t) (usec / 1000000),
|
||||
.tv_nsec = (suseconds_t) ((usec % 1000000) * 1000)
|
||||
.tv_sec = (time_t) (usec / USEC_PER_SEC),
|
||||
.tv_nsec = (suseconds_t) ((usec % USEC_PER_SEC) * NSEC_PER_USEC)
|
||||
};
|
||||
|
||||
while (nanosleep(&req, &rem) == -1) {
|
||||
while ((errno = clock_nanosleep(CLOCK_REALTIME, 0, &req, &rem)) != 0) {
|
||||
if (likely(errno == EINTR)) {
|
||||
debug(D_SYSTEM, "nanosleep() interrupted (while sleeping for %llu microseconds).", usec);
|
||||
req.tv_sec = rem.tv_sec;
|
||||
req.tv_nsec = rem.tv_nsec;
|
||||
} else {
|
||||
error("Cannot nanosleep() for %llu microseconds.", usec);
|
||||
error("Cannot clock_nanosleep(CLOCK_REALTIME) for %llu microseconds.", usec);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
#else
|
||||
int ret = usleep(usec);
|
||||
if(unlikely(ret == -1 && errno == EINVAL)) {
|
||||
// on certain systems, usec has to be up to 999999
|
||||
if(usec > 999999) {
|
||||
int counter = usec / 999999;
|
||||
while(counter--)
|
||||
usleep(999999);
|
||||
|
||||
usleep(usec % 999999);
|
||||
}
|
||||
else {
|
||||
error("Cannot usleep() for %llu microseconds.", usec);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
if(ret != 0)
|
||||
error("usleep() failed for %llu microseconds.", usec);
|
||||
|
||||
return ret;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline collected_number uptime_from_boottime(void) {
|
||||
#ifdef CLOCK_BOOTTIME_IS_AVAILABLE
|
||||
return now_boottime_usec() / 1000;
|
||||
return (collected_number)(now_boottime_usec() / USEC_PER_MS);
|
||||
#else
|
||||
error("uptime cannot be read from CLOCK_BOOTTIME on this system.");
|
||||
return 0;
|
||||
|
|
|
@ -22,8 +22,9 @@ typedef unsigned long long usec_t;
|
|||
typedef long long susec_t;
|
||||
|
||||
typedef struct heartbeat {
|
||||
usec_t monotonic;
|
||||
usec_t realtime;
|
||||
usec_t randomness;
|
||||
size_t statistics_id;
|
||||
} heartbeat_t;
|
||||
|
||||
/* Linux value is as good as any other */
|
||||
|
@ -36,20 +37,14 @@ typedef struct heartbeat {
|
|||
#define CLOCK_MONOTONIC CLOCK_REALTIME
|
||||
#endif
|
||||
|
||||
/* Prefer CLOCK_MONOTONIC_COARSE where available to reduce overhead. It has the same semantics as CLOCK_MONOTONIC */
|
||||
#ifndef CLOCK_MONOTONIC_COARSE
|
||||
/* fallback to CLOCK_MONOTONIC if not available */
|
||||
#define CLOCK_MONOTONIC_COARSE CLOCK_MONOTONIC
|
||||
#endif
|
||||
|
||||
#ifndef CLOCK_BOOTTIME
|
||||
|
||||
#ifdef CLOCK_UPTIME
|
||||
/* CLOCK_BOOTTIME falls back to CLOCK_UPTIME on FreeBSD */
|
||||
#define CLOCK_BOOTTIME CLOCK_UPTIME
|
||||
#else // CLOCK_UPTIME
|
||||
/* CLOCK_BOOTTIME falls back to CLOCK_MONOTONIC */
|
||||
#define CLOCK_BOOTTIME CLOCK_MONOTONIC_COARSE
|
||||
/* CLOCK_BOOTTIME falls back to CLOCK_REALTIME */
|
||||
#define CLOCK_BOOTTIME CLOCK_REALTIME
|
||||
#endif // CLOCK_UPTIME
|
||||
|
||||
#else // CLOCK_BOOTTIME
|
||||
|
@ -115,8 +110,6 @@ extern int clock_gettime(clockid_t clk_id, struct timespec *ts);
|
|||
* All now_*_sec() functions return the time in seconds from the appropriate clock, or 0 on error.
|
||||
* All now_*_usec() functions return the time in microseconds from the appropriate clock, or 0 on error.
|
||||
*
|
||||
* Most functions will attempt to use CLOCK_MONOTONIC_COARSE if available to reduce contention overhead and improve
|
||||
* performance scaling. If high precision is required please use one of the available now_*_high_precision_* functions.
|
||||
*/
|
||||
extern int now_realtime_timeval(struct timeval *tv);
|
||||
extern time_t now_realtime_sec(void);
|
||||
|
@ -146,10 +139,9 @@ extern void heartbeat_init(heartbeat_t *hb);
|
|||
*/
|
||||
extern usec_t heartbeat_next(heartbeat_t *hb, usec_t tick);
|
||||
|
||||
/* Returns elapsed time in microseconds since last heartbeat */
|
||||
extern usec_t heartbeat_monotonic_dt_to_now_usec(heartbeat_t *hb);
|
||||
extern void heartbeat_statistics(usec_t *min_ptr, usec_t *max_ptr, usec_t *average_ptr, size_t *count_ptr);
|
||||
|
||||
extern int sleep_usec(usec_t usec);
|
||||
extern void sleep_usec(usec_t usec);
|
||||
|
||||
extern void clocks_init(void);
|
||||
|
||||
|
@ -160,4 +152,9 @@ extern int now_timeval(clockid_t clk_id, struct timeval *tv);
|
|||
|
||||
extern collected_number uptime_msec(char *filename);
|
||||
|
||||
extern usec_t clock_monotonic_resolution;
|
||||
extern usec_t clock_realtime_resolution;
|
||||
|
||||
extern void sleep_to_absolute_time(usec_t usec);
|
||||
|
||||
#endif /* NETDATA_CLOCKS_H */
|
||||
|
|
|
@ -346,6 +346,7 @@ extern char *netdata_configured_host_prefix;
|
|||
#include "health/health.h"
|
||||
#include "string/utf8.h"
|
||||
#include "onewayalloc/onewayalloc.h"
|
||||
#include "worker_utilization/worker_utilization.h"
|
||||
|
||||
// BEWARE: Outside of the C code this also exists in alarm-notify.sh
|
||||
#define DEFAULT_CLOUD_BASE_URL "https://app.netdata.cloud"
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
AUTOMAKE_OPTIONS = subdir-objects
|
||||
MAINTAINERCLEANFILES = $(srcdir)/Makefile.in
|
||||
|
||||
dist_noinst_DATA = \
|
||||
README.md \
|
||||
$(NULL)
|
|
@ -0,0 +1,58 @@
|
|||
<!--
|
||||
title: "Worker Utilization"
|
||||
custom_edit_url: https://github.com/netdata/netdata/edit/master/libnetdata/onewayallocator/README.md
|
||||
-->
|
||||
|
||||
# Worker Utilization
|
||||
|
||||
This library is to be used when there are 1 or more worker threads accepting requests of some kind and servicing them.
|
||||
The goal is to provide a very simple way to monitor worker threads utilization, as a percentage of the time they are busy and the amount of requests served.
|
||||
|
||||
## How to use
|
||||
|
||||
When a working thread starts, call:
|
||||
|
||||
```c
|
||||
void worker_register(const char *name);
|
||||
```
|
||||
|
||||
This will create the necessary structures for the library to work.
|
||||
No need to keep a pointer to them. They are allocated as `__thread` variables.
|
||||
|
||||
When the thread stops, call:
|
||||
|
||||
```c
|
||||
void worker_unregister(void)
|
||||
```
|
||||
|
||||
Again, no parameters, or return values.
|
||||
|
||||
When you are about to do some work in the working thread, call:
|
||||
|
||||
```c
|
||||
void worker_is_busy(void)
|
||||
```
|
||||
|
||||
When you finish doing the job, call:
|
||||
|
||||
```c
|
||||
void worker_is_idle(void)
|
||||
```
|
||||
|
||||
Calls to `worker_is_busy()` can be made one after another (without calling
|
||||
`worker_is_idle()` between them) to switch jobs without losing any time between
|
||||
them and eliminating one of the 2 clock calls involved.
|
||||
|
||||
## Implementation details
|
||||
|
||||
Totally lockless, extremely fast, it should not introduce any kind of problems to the workers.
|
||||
Every time `worker_is_busy()` or `worker_is_idle()` are called, a call to `now_realtime_usec()`
|
||||
is done and a couple of variables are updated. That's it!
|
||||
|
||||
The worker does not need to update the variables regularly. Based on the last status of the worker,
|
||||
the statistics collector of netdata will calculate if the thread is busy or idle all the time or
|
||||
part of the time. Works well for both thousands of jobs per second and unlimited working time
|
||||
(being totally busy with a single request for ages).
|
||||
|
||||
The statistics collector is called by the global statistics thread of netdata. So, even if the workers
|
||||
are extremely busy with their jobs, netdata will be able to know how busy they are.
|
|
@ -0,0 +1,201 @@
|
|||
#include "worker_utilization.h"
|
||||
|
||||
#define WORKER_IDLE 'I'
|
||||
#define WORKER_BUSY 'B'
|
||||
|
||||
struct worker_job_type {
|
||||
char name[WORKER_UTILIZATION_MAX_JOB_NAME_LENGTH + 1];
|
||||
size_t worker_jobs_started;
|
||||
usec_t worker_busy_time;
|
||||
|
||||
size_t statistics_jobs_started;
|
||||
usec_t statistics_busy_time;
|
||||
};
|
||||
|
||||
struct worker {
|
||||
pid_t pid;
|
||||
const char *tag;
|
||||
const char *workname;
|
||||
uint32_t workname_hash;
|
||||
|
||||
// only one variable is set by our statistics callers
|
||||
usec_t statistics_last_checkpoint;
|
||||
size_t statistics_last_jobs_started;
|
||||
usec_t statistics_last_busy_time;
|
||||
|
||||
// the worker controlled variables
|
||||
size_t job_id;
|
||||
volatile size_t jobs_started;
|
||||
volatile usec_t busy_time;
|
||||
volatile usec_t last_action_timestamp;
|
||||
volatile char last_action;
|
||||
|
||||
struct worker_job_type per_job_type[WORKER_UTILIZATION_MAX_JOB_TYPES];
|
||||
|
||||
struct worker *next;
|
||||
};
|
||||
|
||||
static netdata_mutex_t base_lock = NETDATA_MUTEX_INITIALIZER;
|
||||
static struct worker *base = NULL;
|
||||
static __thread struct worker *worker = NULL;
|
||||
|
||||
void worker_register(const char *workname) {
|
||||
if(unlikely(worker)) return;
|
||||
|
||||
worker = callocz(1, sizeof(struct worker));
|
||||
worker->pid = gettid();
|
||||
worker->tag = strdupz(netdata_thread_tag());
|
||||
worker->workname = strdupz(workname);
|
||||
worker->workname_hash = simple_hash(worker->workname);
|
||||
|
||||
usec_t now = now_realtime_usec();
|
||||
worker->statistics_last_checkpoint = now;
|
||||
worker->last_action_timestamp = now;
|
||||
worker->last_action = WORKER_IDLE;
|
||||
|
||||
netdata_mutex_lock(&base_lock);
|
||||
worker->next = base;
|
||||
base = worker;
|
||||
netdata_mutex_unlock(&base_lock);
|
||||
}
|
||||
|
||||
void worker_register_job_name(size_t job_id, const char *name) {
|
||||
if(unlikely(!worker)) return;
|
||||
|
||||
if(unlikely(job_id >= WORKER_UTILIZATION_MAX_JOB_TYPES)) {
|
||||
error("WORKER_UTILIZATION: job_id %zu is too big. Max is %zu", job_id, (size_t)(WORKER_UTILIZATION_MAX_JOB_TYPES - 1));
|
||||
return;
|
||||
}
|
||||
|
||||
strncpy(worker->per_job_type[job_id].name, name, WORKER_UTILIZATION_MAX_JOB_NAME_LENGTH);
|
||||
}
|
||||
|
||||
void worker_unregister(void) {
|
||||
if(unlikely(!worker)) return;
|
||||
|
||||
netdata_mutex_lock(&base_lock);
|
||||
if(base == worker)
|
||||
base = worker->next;
|
||||
else {
|
||||
struct worker *p;
|
||||
for(p = base; p && p->next && p->next != worker ;p = p->next);
|
||||
if(p && p->next == worker)
|
||||
p->next = worker->next;
|
||||
}
|
||||
netdata_mutex_unlock(&base_lock);
|
||||
|
||||
freez((void *)worker->tag);
|
||||
freez((void *)worker->workname);
|
||||
freez(worker);
|
||||
|
||||
worker = NULL;
|
||||
}
|
||||
|
||||
static inline void worker_is_idle_with_time(usec_t now) {
|
||||
usec_t delta = now - worker->last_action_timestamp;
|
||||
worker->busy_time += delta;
|
||||
worker->per_job_type[worker->job_id].worker_busy_time += delta;
|
||||
|
||||
// the worker was busy
|
||||
// set it to idle before we set the timestamp
|
||||
|
||||
worker->last_action = WORKER_IDLE;
|
||||
if(likely(worker->last_action_timestamp < now))
|
||||
worker->last_action_timestamp = now;
|
||||
}
|
||||
|
||||
void worker_is_idle(void) {
|
||||
if(unlikely(!worker)) return;
|
||||
if(unlikely(worker->last_action != WORKER_BUSY)) return;
|
||||
|
||||
worker_is_idle_with_time(now_realtime_usec());
|
||||
}
|
||||
|
||||
void worker_is_busy(size_t job_id) {
|
||||
if(unlikely(!worker)) return;
|
||||
if(unlikely(job_id >= WORKER_UTILIZATION_MAX_JOB_TYPES))
|
||||
job_id = 0;
|
||||
|
||||
usec_t now = now_realtime_usec();
|
||||
|
||||
if(worker->last_action == WORKER_BUSY)
|
||||
worker_is_idle_with_time(now);
|
||||
|
||||
// the worker was idle
|
||||
// set the timestamp and then set it to busy
|
||||
|
||||
worker->job_id = job_id;
|
||||
worker->per_job_type[job_id].worker_jobs_started++;
|
||||
worker->jobs_started++;
|
||||
worker->last_action_timestamp = now;
|
||||
worker->last_action = WORKER_BUSY;
|
||||
}
|
||||
|
||||
|
||||
// statistics interface
|
||||
|
||||
void workers_foreach(const char *workname, void (*callback)(void *data, pid_t pid, const char *thread_tag, size_t utilization_usec, size_t duration_usec, size_t jobs_started, size_t is_running, const char **job_types_names, size_t *job_types_jobs_started, usec_t *job_types_busy_time), void *data) {
|
||||
netdata_mutex_lock(&base_lock);
|
||||
uint32_t hash = simple_hash(workname);
|
||||
usec_t busy_time, delta;
|
||||
size_t i, jobs_started, jobs_running;
|
||||
|
||||
struct worker *p;
|
||||
for(p = base; p ; p = p->next) {
|
||||
if(hash != p->workname_hash || strcmp(workname, p->workname)) continue;
|
||||
|
||||
usec_t now = now_realtime_usec();
|
||||
|
||||
// find per job type statistics
|
||||
const char *per_job_type_name[WORKER_UTILIZATION_MAX_JOB_TYPES];
|
||||
size_t per_job_type_jobs_started[WORKER_UTILIZATION_MAX_JOB_TYPES];
|
||||
usec_t per_job_type_busy_time[WORKER_UTILIZATION_MAX_JOB_TYPES];
|
||||
for(i = 0; i < WORKER_UTILIZATION_MAX_JOB_TYPES ;i++) {
|
||||
per_job_type_name[i] = p->per_job_type[i].name;
|
||||
|
||||
size_t tmp_jobs_started = p->per_job_type[i].worker_jobs_started;
|
||||
per_job_type_jobs_started[i] = tmp_jobs_started - p->per_job_type[i].statistics_jobs_started;
|
||||
p->per_job_type[i].statistics_jobs_started = tmp_jobs_started;
|
||||
|
||||
usec_t tmp_busy_time = p->per_job_type[i].worker_busy_time;
|
||||
per_job_type_busy_time[i] = tmp_busy_time - p->per_job_type[i].statistics_busy_time;
|
||||
p->per_job_type[i].statistics_busy_time = tmp_busy_time;
|
||||
}
|
||||
|
||||
// get a copy of the worker variables
|
||||
usec_t worker_busy_time = p->busy_time;
|
||||
size_t worker_jobs_started = p->jobs_started;
|
||||
char worker_last_action = p->last_action;
|
||||
usec_t worker_last_action_timestamp = p->last_action_timestamp;
|
||||
|
||||
// this is the only variable both the worker thread and the statistics thread are writing
|
||||
// we set this only when the worker is busy, so that worker will not
|
||||
// accumulate all the busy time, but only the time after the point we collected statistics
|
||||
if(worker_last_action == WORKER_BUSY && p->last_action_timestamp == worker_last_action_timestamp && p->last_action == WORKER_BUSY)
|
||||
p->last_action_timestamp = now;
|
||||
|
||||
// calculate delta busy time
|
||||
busy_time = worker_busy_time - p->statistics_last_busy_time;
|
||||
p->statistics_last_busy_time = worker_busy_time;
|
||||
|
||||
// calculate delta jobs done
|
||||
jobs_started = worker_jobs_started - p->statistics_last_jobs_started;
|
||||
p->statistics_last_jobs_started = worker_jobs_started;
|
||||
|
||||
jobs_running = 0;
|
||||
if(worker_last_action == WORKER_BUSY) {
|
||||
// the worker is still busy with something
|
||||
// let's add that busy time to the reported one
|
||||
busy_time += now - worker_last_action_timestamp;
|
||||
jobs_running = 1;
|
||||
}
|
||||
|
||||
delta = now - p->statistics_last_checkpoint;
|
||||
|
||||
p->statistics_last_checkpoint = now;
|
||||
|
||||
callback(data, p->pid, p->tag, busy_time, delta, jobs_started, jobs_running, per_job_type_name, per_job_type_jobs_started, per_job_type_busy_time);
|
||||
}
|
||||
|
||||
netdata_mutex_unlock(&base_lock);
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
#ifndef WORKER_UTILIZATION_H
|
||||
#define WORKER_UTILIZATION_H 1
|
||||
|
||||
#include "../libnetdata.h"
|
||||
|
||||
// workers interfaces
|
||||
|
||||
#define WORKER_UTILIZATION_MAX_JOB_TYPES 50
|
||||
#define WORKER_UTILIZATION_MAX_JOB_NAME_LENGTH 25
|
||||
|
||||
extern void worker_register(const char *workname);
|
||||
extern void worker_register_job_name(size_t job_id, const char *name);
|
||||
extern void worker_unregister(void);
|
||||
|
||||
extern void worker_is_idle(void);
|
||||
extern void worker_is_busy(size_t job_id);
|
||||
|
||||
// statistics interface
|
||||
|
||||
extern void workers_foreach(const char *workname, void (*callback)(void *data, pid_t pid, const char *thread_tag, size_t utilization_usec, size_t duration_usec, size_t jobs_started, size_t is_running, const char **job_types_names, size_t *job_types_jobs_started, usec_t *job_types_busy_time), void *data);
|
||||
|
||||
#endif // WORKER_UTILIZATION_H
|
32
ml/Host.cc
32
ml/Host.cc
|
@ -358,6 +358,10 @@ void TrainableHost::trainDimension(Dimension *D, const TimePoint &NowTP) {
|
|||
void TrainableHost::train() {
|
||||
Duration<double> MaxSleepFor = Seconds{10 * updateEvery()};
|
||||
|
||||
worker_register("MLTRAIN");
|
||||
worker_register_job_name(0, "dimensions");
|
||||
|
||||
worker_is_busy(0);
|
||||
while (!netdata_exit) {
|
||||
netdata_thread_testcancel();
|
||||
netdata_thread_disable_cancelability();
|
||||
|
@ -378,11 +382,23 @@ void TrainableHost::train() {
|
|||
if (RealDuration >= AllottedDuration)
|
||||
continue;
|
||||
|
||||
worker_is_idle();
|
||||
SleepFor = std::min(AllottedDuration - RealDuration, MaxSleepFor);
|
||||
std::this_thread::sleep_for(SleepFor);
|
||||
worker_is_busy(0);
|
||||
}
|
||||
}
|
||||
|
||||
#define WORKER_JOB_DETECT_DIMENSION 0
|
||||
#define WORKER_JOB_UPDATE_DETECTION_CHART 1
|
||||
#define WORKER_JOB_UPDATE_ANOMALY_RATES 2
|
||||
#define WORKER_JOB_UPDATE_CHARTS 3
|
||||
#define WORKER_JOB_SAVE_ANOMALY_EVENT 4
|
||||
|
||||
#if WORKER_UTILIZATION_MAX_JOB_TYPES < 5
|
||||
#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 5
|
||||
#endif
|
||||
|
||||
void DetectableHost::detectOnce() {
|
||||
auto P = BRW.insert(WindowAnomalyRate >= Cfg.HostAnomalyRateThreshold);
|
||||
BitRateWindow::Edge Edge = P.first;
|
||||
|
@ -408,6 +424,8 @@ void DetectableHost::detectOnce() {
|
|||
DimsOverThreshold.reserve(DimensionsMap.size());
|
||||
|
||||
for (auto &DP : DimensionsMap) {
|
||||
worker_is_busy(WORKER_JOB_DETECT_DIMENSION);
|
||||
|
||||
Dimension *D = DP.second;
|
||||
|
||||
auto P = D->detect(WindowLength, ResetBitCounter);
|
||||
|
@ -434,6 +452,7 @@ void DetectableHost::detectOnce() {
|
|||
}
|
||||
|
||||
if (CollectAnomalyRates) {
|
||||
worker_is_busy(WORKER_JOB_UPDATE_ANOMALY_RATES);
|
||||
AnomalyRateTimer = 0;
|
||||
rrdset_done(AnomalyRateRS);
|
||||
}
|
||||
|
@ -442,6 +461,7 @@ void DetectableHost::detectOnce() {
|
|||
this->NumNormalDimensions = NumNormalDimensions;
|
||||
this->NumTrainedDimensions = NumTrainedDimensions;
|
||||
|
||||
worker_is_busy(WORKER_JOB_UPDATE_CHARTS);
|
||||
updateDimensionsChart(getRH(), NumTrainedDimensions, NumNormalDimensions, NumAnomalousDimensions);
|
||||
updateRateChart(getRH(), WindowAnomalyRate * 10000.0);
|
||||
updateWindowLengthChart(getRH(), WindowLength);
|
||||
|
@ -454,6 +474,8 @@ void DetectableHost::detectOnce() {
|
|||
if (!NewAnomalyEvent || (DimsOverThreshold.size() == 0))
|
||||
return;
|
||||
|
||||
worker_is_busy(WORKER_JOB_SAVE_ANOMALY_EVENT);
|
||||
|
||||
std::sort(DimsOverThreshold.begin(), DimsOverThreshold.end());
|
||||
std::reverse(DimsOverThreshold.begin(), DimsOverThreshold.end());
|
||||
|
||||
|
@ -476,6 +498,13 @@ void DetectableHost::detectOnce() {
|
|||
}
|
||||
|
||||
void DetectableHost::detect() {
|
||||
worker_register("MLDETECT");
|
||||
worker_register_job_name(WORKER_JOB_DETECT_DIMENSION, "dimensions");
|
||||
worker_register_job_name(WORKER_JOB_UPDATE_DETECTION_CHART, "detection chart");
|
||||
worker_register_job_name(WORKER_JOB_UPDATE_ANOMALY_RATES, "anomaly rates");
|
||||
worker_register_job_name(WORKER_JOB_UPDATE_CHARTS, "charts");
|
||||
worker_register_job_name(WORKER_JOB_SAVE_ANOMALY_EVENT, "anomaly event");
|
||||
|
||||
std::this_thread::sleep_for(Seconds{10});
|
||||
|
||||
heartbeat_t HB;
|
||||
|
@ -483,10 +512,13 @@ void DetectableHost::detect() {
|
|||
|
||||
while (!netdata_exit) {
|
||||
netdata_thread_testcancel();
|
||||
worker_is_idle();
|
||||
heartbeat_next(&HB, updateEvery() * USEC_PER_SEC);
|
||||
|
||||
netdata_thread_disable_cancelability();
|
||||
detectOnce();
|
||||
|
||||
worker_is_busy(WORKER_JOB_UPDATE_DETECTION_CHART);
|
||||
updateDetectionChart(getRH());
|
||||
netdata_thread_enable_cancelability();
|
||||
}
|
||||
|
|
|
@ -133,10 +133,13 @@ int parser_add_keyword(PARSER *parser, char *keyword, keyword_function func)
|
|||
|
||||
tmp_keyword = callocz(1, sizeof(*tmp_keyword));
|
||||
|
||||
tmp_keyword->worker_job_id = parser->worker_job_ids++;
|
||||
tmp_keyword->keyword = strdupz(keyword);
|
||||
tmp_keyword->keyword_hash = keyword_hash;
|
||||
tmp_keyword->func[tmp_keyword->func_no++] = (void *) func;
|
||||
|
||||
worker_register_job_name(tmp_keyword->worker_job_id, tmp_keyword->keyword);
|
||||
|
||||
tmp_keyword->next = parser->keyword;
|
||||
parser->keyword = tmp_keyword;
|
||||
return tmp_keyword->func_no;
|
||||
|
@ -265,10 +268,12 @@ inline int parser_action(PARSER *parser, char *input)
|
|||
|
||||
uint32_t command_hash = simple_hash(command);
|
||||
|
||||
size_t worker_job_id;
|
||||
while(tmp_keyword) {
|
||||
if (command_hash == tmp_keyword->keyword_hash &&
|
||||
(!strcmp(command, tmp_keyword->keyword))) {
|
||||
action_function_list = &tmp_keyword->func[0];
|
||||
worker_job_id = tmp_keyword->worker_job_id;
|
||||
break;
|
||||
}
|
||||
tmp_keyword = tmp_keyword->next;
|
||||
|
@ -284,12 +289,14 @@ inline int parser_action(PARSER *parser, char *input)
|
|||
#endif
|
||||
}
|
||||
else {
|
||||
worker_is_busy(worker_job_id);
|
||||
while ((action_function = *action_function_list) != NULL) {
|
||||
rc = action_function(words, parser->user, parser->plugins_action);
|
||||
if (unlikely(rc == PARSER_RC_ERROR || rc == PARSER_RC_STOP))
|
||||
break;
|
||||
action_function_list++;
|
||||
}
|
||||
worker_is_idle();
|
||||
}
|
||||
|
||||
if (likely(input == parser->buffer))
|
||||
|
|
|
@ -54,6 +54,7 @@ typedef enum parser_input_type {
|
|||
typedef PARSER_RC (*keyword_function)(char **, void *, PLUGINSD_ACTION *plugins_action);
|
||||
|
||||
typedef struct parser_keyword {
|
||||
size_t worker_job_id;
|
||||
char *keyword;
|
||||
uint32_t keyword_hash;
|
||||
int func_no;
|
||||
|
@ -67,6 +68,7 @@ typedef struct parser_data {
|
|||
} PARSER_DATA;
|
||||
|
||||
typedef struct parser {
|
||||
size_t worker_job_ids;
|
||||
uint8_t version; // Parser version
|
||||
RRDHOST *host;
|
||||
void *input; // Input source e.g. stream
|
||||
|
|
|
@ -30,6 +30,8 @@ void destroy_receiver_state(struct receiver_state *rpt) {
|
|||
}
|
||||
|
||||
static void rrdpush_receiver_thread_cleanup(void *ptr) {
|
||||
worker_unregister();
|
||||
|
||||
static __thread int executed = 0;
|
||||
if(!executed) {
|
||||
executed = 1;
|
||||
|
@ -716,7 +718,9 @@ void *rrdpush_receiver_thread(void *ptr) {
|
|||
struct receiver_state *rpt = (struct receiver_state *)ptr;
|
||||
info("STREAM %s [%s]:%s: receive thread created (task id %d)", rpt->hostname, rpt->client_ip, rpt->client_port, gettid());
|
||||
|
||||
worker_register("STREAMRCV");
|
||||
rrdpush_receive(rpt);
|
||||
worker_unregister();
|
||||
|
||||
netdata_thread_cleanup_pop(1);
|
||||
return NULL;
|
||||
|
|
|
@ -2,6 +2,26 @@
|
|||
|
||||
#include "rrdpush.h"
|
||||
|
||||
#define WORKER_SENDER_JOB_CONNECT 0
|
||||
#define WORKER_SENDER_JOB_PIPE_READ 1
|
||||
#define WORKER_SENDER_JOB_SOCKET_RECEIVE 2
|
||||
#define WORKER_SENDER_JOB_EXECUTE 3
|
||||
#define WORKER_SENDER_JOB_SOCKET_SEND 4
|
||||
#define WORKER_SENDER_JOB_DISCONNECT_BAD_HANDSHAKE 5
|
||||
#define WORKER_SENDER_JOB_DISCONNECT_OVERFLOW 6
|
||||
#define WORKER_SENDER_JOB_DISCONNECT_TIMEOUT 7
|
||||
#define WORKER_SENDER_JOB_DISCONNECT_POLL_ERROR 8
|
||||
#define WORKER_SENDER_JOB_DISCONNECT_SOCKER_ERROR 9
|
||||
#define WORKER_SENDER_JOB_DISCONNECT_SSL_ERROR 10
|
||||
#define WORKER_SENDER_JOB_DISCONNECT_PARENT_CLOSED 11
|
||||
#define WORKER_SENDER_JOB_DISCONNECT_RECEIVE_ERROR 12
|
||||
#define WORKER_SENDER_JOB_DISCONNECT_SEND_ERROR 13
|
||||
#define WORKER_SENDER_JOB_DISCONNECT_NO_COMPRESSION 14
|
||||
|
||||
#if WORKER_UTILIZATION_MAX_JOB_TYPES < 15
|
||||
#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 15
|
||||
#endif
|
||||
|
||||
extern struct config stream_config;
|
||||
extern int netdata_use_ssl_on_stream;
|
||||
extern char *netdata_ssl_ca_path;
|
||||
|
@ -21,8 +41,8 @@ static inline void rrdpush_sender_thread_close_socket(RRDHOST *host);
|
|||
* Inform the user through the error log file and
|
||||
* deactivate compression by downgrading the stream protocol.
|
||||
*/
|
||||
static inline void deactivate_compression(struct sender_state *s)
|
||||
{
|
||||
static inline void deactivate_compression(struct sender_state *s) {
|
||||
worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_NO_COMPRESSION);
|
||||
error("STREAM_COMPRESSION: Deactivating compression to avoid stream corruption");
|
||||
default_compression_enabled = 0;
|
||||
s->rrdpush_compression = 0;
|
||||
|
@ -389,6 +409,7 @@ if(!s->rrdpush_compression)
|
|||
err = SSL_get_error(host->ssl.conn, err);
|
||||
error("SSL cannot connect with the server: %s ",ERR_error_string((long)SSL_get_error(host->ssl.conn,err),NULL));
|
||||
if (netdata_use_ssl_on_stream == NETDATA_SSL_FORCE) {
|
||||
worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_SSL_ERROR);
|
||||
rrdpush_sender_thread_close_socket(host);
|
||||
return 0;
|
||||
}else {
|
||||
|
@ -399,6 +420,7 @@ if(!s->rrdpush_compression)
|
|||
if (netdata_use_ssl_on_stream == NETDATA_SSL_FORCE) {
|
||||
if (netdata_validate_server == NETDATA_SSL_VALID_CERTIFICATE) {
|
||||
if ( security_test_certificate(host->ssl.conn)) {
|
||||
worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_SSL_ERROR);
|
||||
error("Closing the stream connection, because the server SSL certificate is not valid.");
|
||||
rrdpush_sender_thread_close_socket(host);
|
||||
return 0;
|
||||
|
@ -411,6 +433,7 @@ if(!s->rrdpush_compression)
|
|||
#else
|
||||
if(send_timeout(host->rrdpush_sender_socket, http, strlen(http), 0, timeout) == -1) {
|
||||
#endif
|
||||
worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_TIMEOUT);
|
||||
error("STREAM %s [send to %s]: failed to send HTTP header to remote netdata.", host->hostname, s->connected_to);
|
||||
rrdpush_sender_thread_close_socket(host);
|
||||
return 0;
|
||||
|
@ -426,6 +449,7 @@ if(!s->rrdpush_compression)
|
|||
received = recv_timeout(host->rrdpush_sender_socket, http, HTTP_HEADER_SIZE, 0, timeout);
|
||||
if(received == -1) {
|
||||
#endif
|
||||
worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_TIMEOUT);
|
||||
error("STREAM %s [send to %s]: remote netdata does not respond.", host->hostname, s->connected_to);
|
||||
rrdpush_sender_thread_close_socket(host);
|
||||
return 0;
|
||||
|
@ -435,6 +459,7 @@ if(!s->rrdpush_compression)
|
|||
debug(D_STREAM, "Response to sender from far end: %s", http);
|
||||
int32_t version = (int32_t)parse_stream_version(host, http);
|
||||
if(version == -1) {
|
||||
worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_BAD_HANDSHAKE);
|
||||
error("STREAM %s [send to %s]: server is not replying properly (is it a netdata?).", host->hostname, s->connected_to);
|
||||
rrdpush_sender_thread_close_socket(host);
|
||||
return 0;
|
||||
|
@ -541,9 +566,9 @@ void attempt_to_send(struct sender_state *s) {
|
|||
s->last_sent_t = now_monotonic_sec();
|
||||
}
|
||||
else if (ret == -1 && (errno == EAGAIN || errno == EINTR || errno == EWOULDBLOCK))
|
||||
debug(D_STREAM, "STREAM %s [send to %s]: unavailable after polling POLLOUT", s->host->hostname,
|
||||
s->connected_to);
|
||||
debug(D_STREAM, "STREAM %s [send to %s]: unavailable after polling POLLOUT", s->host->hostname, s->connected_to);
|
||||
else if (ret == -1) {
|
||||
worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_SEND_ERROR);
|
||||
debug(D_STREAM, "STREAM: Send failed - closing socket...");
|
||||
error("STREAM %s [send to %s]: failed to send metrics - closing connection - we have sent %zu bytes on this connection.", s->host->hostname, s->connected_to, s->sent_bytes_on_this_connection);
|
||||
rrdpush_sender_thread_close_socket(s->host);
|
||||
|
@ -570,6 +595,8 @@ int ret;
|
|||
int sslerrno = SSL_get_error(s->host->ssl.conn, desired);
|
||||
if (sslerrno == SSL_ERROR_WANT_READ || sslerrno == SSL_ERROR_WANT_WRITE)
|
||||
return;
|
||||
|
||||
worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_SSL_ERROR);
|
||||
u_long err;
|
||||
char buf[256];
|
||||
while ((err = ERR_get_error()) != 0) {
|
||||
|
@ -581,20 +608,25 @@ int ret;
|
|||
return;
|
||||
}
|
||||
#endif
|
||||
ret = recv(s->host->rrdpush_sender_socket, s->read_buffer + s->read_len, sizeof(s->read_buffer) - s->read_len - 1,
|
||||
MSG_DONTWAIT);
|
||||
ret = recv(s->host->rrdpush_sender_socket, s->read_buffer + s->read_len, sizeof(s->read_buffer) - s->read_len - 1,MSG_DONTWAIT);
|
||||
if (ret>0) {
|
||||
s->read_len += ret;
|
||||
return;
|
||||
}
|
||||
|
||||
debug(D_STREAM, "Socket was POLLIN, but req %zu bytes gave %d", sizeof(s->read_buffer) - s->read_len - 1, ret);
|
||||
|
||||
if (ret<0 && (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR))
|
||||
return;
|
||||
if (ret==0)
|
||||
|
||||
if (ret==0) {
|
||||
worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_PARENT_CLOSED);
|
||||
error("STREAM %s [send to %s]: connection closed by far end. Restarting connection", s->host->hostname, s->connected_to);
|
||||
else
|
||||
error("STREAM %s [send to %s]: error during read (%d). Restarting connection", s->host->hostname, s->connected_to,
|
||||
ret);
|
||||
}
|
||||
else {
|
||||
worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_RECEIVE_ERROR);
|
||||
error("STREAM %s [send to %s]: error during receive (%d). Restarting connection", s->host->hostname, s->connected_to, ret);
|
||||
}
|
||||
rrdpush_sender_thread_close_socket(s->host);
|
||||
}
|
||||
|
||||
|
@ -615,6 +647,8 @@ void execute_commands(struct sender_state *s) {
|
|||
|
||||
|
||||
static void rrdpush_sender_thread_cleanup_callback(void *ptr) {
|
||||
worker_unregister();
|
||||
|
||||
RRDHOST *host = (RRDHOST *)ptr;
|
||||
|
||||
netdata_mutex_lock(&host->sender->mutex);
|
||||
|
@ -707,6 +741,25 @@ void *rrdpush_sender_thread(void *ptr) {
|
|||
fds[Collector].fd = s->host->rrdpush_sender_pipe[PIPE_READ];
|
||||
fds[Collector].events = POLLIN;
|
||||
|
||||
worker_register("STREAMSND");
|
||||
worker_register_job_name(WORKER_SENDER_JOB_CONNECT, "connect");
|
||||
worker_register_job_name(WORKER_SENDER_JOB_PIPE_READ, "pipe read");
|
||||
worker_register_job_name(WORKER_SENDER_JOB_SOCKET_RECEIVE, "receive");
|
||||
worker_register_job_name(WORKER_SENDER_JOB_EXECUTE, "execute");
|
||||
worker_register_job_name(WORKER_SENDER_JOB_SOCKET_SEND, "send");
|
||||
|
||||
// disconnection reasons
|
||||
worker_register_job_name(WORKER_SENDER_JOB_DISCONNECT_TIMEOUT, "disconnect timeout");
|
||||
worker_register_job_name(WORKER_SENDER_JOB_DISCONNECT_POLL_ERROR, "disconnect poll error");
|
||||
worker_register_job_name(WORKER_SENDER_JOB_DISCONNECT_SOCKER_ERROR, "disconnect socket error");
|
||||
worker_register_job_name(WORKER_SENDER_JOB_DISCONNECT_OVERFLOW, "disconnect overflow");
|
||||
worker_register_job_name(WORKER_SENDER_JOB_DISCONNECT_SSL_ERROR, "disconnect ssl error");
|
||||
worker_register_job_name(WORKER_SENDER_JOB_DISCONNECT_PARENT_CLOSED, "disconnect parent closed");
|
||||
worker_register_job_name(WORKER_SENDER_JOB_DISCONNECT_RECEIVE_ERROR, "disconnect receive error");
|
||||
worker_register_job_name(WORKER_SENDER_JOB_DISCONNECT_SEND_ERROR, "disconnect send error");
|
||||
worker_register_job_name(WORKER_SENDER_JOB_DISCONNECT_NO_COMPRESSION, "disconnect no compression");
|
||||
worker_register_job_name(WORKER_SENDER_JOB_DISCONNECT_BAD_HANDSHAKE, "disconnect bad handshake");
|
||||
|
||||
netdata_thread_cleanup_push(rrdpush_sender_thread_cleanup_callback, s->host);
|
||||
for(; s->host->rrdpush_send_enabled && !netdata_exit ;) {
|
||||
// check for outstanding cancellation requests
|
||||
|
@ -714,6 +767,7 @@ void *rrdpush_sender_thread(void *ptr) {
|
|||
|
||||
// The connection attempt blocks (after which we use the socket in nonblocking)
|
||||
if(unlikely(s->host->rrdpush_sender_socket == -1)) {
|
||||
worker_is_busy(WORKER_SENDER_JOB_CONNECT);
|
||||
s->overflow = 0;
|
||||
s->read_len = 0;
|
||||
s->buffer->read = 0;
|
||||
|
@ -731,11 +785,14 @@ void *rrdpush_sender_thread(void *ptr) {
|
|||
|
||||
// If the TCP window never opened then something is wrong, restart connection
|
||||
if(unlikely(now_monotonic_sec() - s->last_sent_t > s->timeout)) {
|
||||
worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_TIMEOUT);
|
||||
error("STREAM %s [send to %s]: could not send metrics for %d seconds - closing connection - we have sent %zu bytes on this connection via %zu send attempts.", s->host->hostname, s->connected_to, s->timeout, s->sent_bytes_on_this_connection, s->send_attempts);
|
||||
rrdpush_sender_thread_close_socket(s->host);
|
||||
continue;
|
||||
}
|
||||
|
||||
worker_is_idle();
|
||||
|
||||
// Wait until buffer opens in the socket or a rrdset_done_push wakes us
|
||||
fds[Collector].revents = 0;
|
||||
fds[Socket].revents = 0;
|
||||
|
@ -757,16 +814,18 @@ void *rrdpush_sender_thread(void *ptr) {
|
|||
int retval = poll(fds, 2, 1000);
|
||||
debug(D_STREAM, "STREAM: poll() finished collector=%d socket=%d (current chunk %zu bytes)...",
|
||||
fds[Collector].revents, fds[Socket].revents, outstanding);
|
||||
|
||||
if(unlikely(netdata_exit)) break;
|
||||
|
||||
// Spurious wake-ups without error - loop again
|
||||
if (retval == 0 || ((retval == -1) && (errno == EAGAIN || errno == EINTR)))
|
||||
{
|
||||
if (retval == 0 || ((retval == -1) && (errno == EAGAIN || errno == EINTR))) {
|
||||
debug(D_STREAM, "Spurious wakeup");
|
||||
continue;
|
||||
}
|
||||
|
||||
// Only errors from poll() are internal, but try restarting the connection
|
||||
if(unlikely(retval == -1)) {
|
||||
worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_POLL_ERROR);
|
||||
error("STREAM %s [send to %s]: failed to poll(). Closing socket.", s->host->hostname, s->connected_to);
|
||||
rrdpush_sender_thread_close_socket(s->host);
|
||||
continue;
|
||||
|
@ -774,6 +833,7 @@ void *rrdpush_sender_thread(void *ptr) {
|
|||
|
||||
// If the collector woke us up then empty the pipe to remove the signal
|
||||
if (fds[Collector].revents & POLLIN || fds[Collector].revents & POLLPRI) {
|
||||
worker_is_busy(WORKER_SENDER_JOB_PIPE_READ);
|
||||
debug(D_STREAM, "STREAM: Data added to send buffer (current buffer chunk %zu bytes)...", outstanding);
|
||||
|
||||
char buffer[1000 + 1];
|
||||
|
@ -782,13 +842,19 @@ void *rrdpush_sender_thread(void *ptr) {
|
|||
}
|
||||
|
||||
// Read as much as possible to fill the buffer, split into full lines for execution.
|
||||
if (fds[Socket].revents & POLLIN)
|
||||
if (fds[Socket].revents & POLLIN) {
|
||||
worker_is_busy(WORKER_SENDER_JOB_SOCKET_RECEIVE);
|
||||
attempt_read(s);
|
||||
}
|
||||
|
||||
worker_is_busy(WORKER_SENDER_JOB_EXECUTE);
|
||||
execute_commands(s);
|
||||
|
||||
// If we have data and have seen the TCP window open then try to close it by a transmission.
|
||||
if (outstanding && fds[Socket].revents & POLLOUT)
|
||||
if (outstanding && fds[Socket].revents & POLLOUT) {
|
||||
worker_is_busy(WORKER_SENDER_JOB_SOCKET_SEND);
|
||||
attempt_to_send(s);
|
||||
}
|
||||
|
||||
// TODO-GAPS - why do we only check this on the socket, not the pipe?
|
||||
if (outstanding) {
|
||||
|
@ -800,6 +866,7 @@ void *rrdpush_sender_thread(void *ptr) {
|
|||
else if (unlikely(fds[Socket].revents & POLLNVAL))
|
||||
error = "connection is invalid (POLLNVAL)";
|
||||
if(unlikely(error)) {
|
||||
worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_SOCKER_ERROR);
|
||||
error("STREAM %s [send to %s]: restart stream because %s - %zu bytes transmitted.", s->host->hostname,
|
||||
s->connected_to, error, s->sent_bytes_on_this_connection);
|
||||
rrdpush_sender_thread_close_socket(s->host);
|
||||
|
@ -808,6 +875,7 @@ void *rrdpush_sender_thread(void *ptr) {
|
|||
|
||||
// protection from overflow
|
||||
if (s->overflow) {
|
||||
worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_OVERFLOW);
|
||||
errno = 0;
|
||||
error("STREAM %s [send to %s]: buffer full (%zu-bytes) after %zu bytes. Restarting connection",
|
||||
s->host->hostname, s->connected_to, s->buffer->size, s->sent_bytes_on_this_connection);
|
||||
|
|
|
@ -7,6 +7,20 @@ int web_client_timeout = DEFAULT_DISCONNECT_IDLE_WEB_CLIENTS_AFTER_SECONDS;
|
|||
int web_client_first_request_timeout = DEFAULT_TIMEOUT_TO_RECEIVE_FIRST_WEB_REQUEST;
|
||||
long web_client_streaming_rate_t = 0L;
|
||||
|
||||
#define WORKER_JOB_ADD_CONNECTION 0
|
||||
#define WORKER_JOB_DEL_COLLECTION 1
|
||||
#define WORKER_JOB_ADD_FILE 2
|
||||
#define WORKER_JOB_DEL_FILE 3
|
||||
#define WORKER_JOB_READ_FILE 4
|
||||
#define WORKER_JOB_WRITE_FILE 5
|
||||
#define WORKER_JOB_RCV_DATA 6
|
||||
#define WORKER_JOB_SND_DATA 7
|
||||
#define WORKER_JOB_PROCESS 8
|
||||
|
||||
#if (WORKER_UTILIZATION_MAX_JOB_TYPES < 9)
|
||||
#error Please increase WORKER_UTILIZATION_MAX_JOB_TYPES to at least 8
|
||||
#endif
|
||||
|
||||
/*
|
||||
* --------------------------------------------------------------------------------------------------------------------
|
||||
* Build web_client state from the pollinfo that describes an accepted connection.
|
||||
|
@ -71,11 +85,15 @@ static inline int web_server_check_client_status(struct web_client *w) {
|
|||
static void *web_server_file_add_callback(POLLINFO *pi, short int *events, void *data) {
|
||||
struct web_client *w = (struct web_client *)data;
|
||||
|
||||
worker_is_busy(WORKER_JOB_ADD_FILE);
|
||||
|
||||
worker_private->files_read++;
|
||||
|
||||
debug(D_WEB_CLIENT, "%llu: ADDED FILE READ ON FD %d", w->id, pi->fd);
|
||||
*events = POLLIN;
|
||||
pi->data = w;
|
||||
|
||||
worker_is_idle();
|
||||
return w;
|
||||
}
|
||||
|
||||
|
@ -83,27 +101,36 @@ static void web_server_file_del_callback(POLLINFO *pi) {
|
|||
struct web_client *w = (struct web_client *)pi->data;
|
||||
debug(D_WEB_CLIENT, "%llu: RELEASE FILE READ ON FD %d", w->id, pi->fd);
|
||||
|
||||
worker_is_busy(WORKER_JOB_DEL_FILE);
|
||||
|
||||
w->pollinfo_filecopy_slot = 0;
|
||||
|
||||
if(unlikely(!w->pollinfo_slot)) {
|
||||
debug(D_WEB_CLIENT, "%llu: CROSS WEB CLIENT CLEANUP (iFD %d, oFD %d)", w->id, pi->fd, w->ofd);
|
||||
web_client_release(w);
|
||||
}
|
||||
|
||||
worker_is_idle();
|
||||
}
|
||||
|
||||
static int web_server_file_read_callback(POLLINFO *pi, short int *events) {
|
||||
int retval = -1;
|
||||
struct web_client *w = (struct web_client *)pi->data;
|
||||
|
||||
worker_is_busy(WORKER_JOB_READ_FILE);
|
||||
|
||||
// if there is no POLLINFO linked to this, it means the client disconnected
|
||||
// stop the file reading too
|
||||
if(unlikely(!w->pollinfo_slot)) {
|
||||
debug(D_WEB_CLIENT, "%llu: PREVENTED ATTEMPT TO READ FILE ON FD %d, ON CLOSED WEB CLIENT", w->id, pi->fd);
|
||||
return -1;
|
||||
retval = -1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if(unlikely(w->mode != WEB_CLIENT_MODE_FILECOPY || w->ifd == w->ofd)) {
|
||||
debug(D_WEB_CLIENT, "%llu: PREVENTED ATTEMPT TO READ FILE ON FD %d, ON NON-FILECOPY WEB CLIENT", w->id, pi->fd);
|
||||
return -1;
|
||||
retval = -1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
debug(D_WEB_CLIENT, "%llu: READING FILE ON FD %d", w->id, pi->fd);
|
||||
|
@ -121,18 +148,25 @@ static int web_server_file_read_callback(POLLINFO *pi, short int *events) {
|
|||
|
||||
if(unlikely(ret <= 0 || w->ifd == w->ofd)) {
|
||||
debug(D_WEB_CLIENT, "%llu: DONE READING FILE ON FD %d", w->id, pi->fd);
|
||||
return -1;
|
||||
retval = -1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
*events = POLLIN;
|
||||
return 0;
|
||||
retval = 0;
|
||||
|
||||
cleanup:
|
||||
worker_is_idle();
|
||||
return retval;
|
||||
}
|
||||
|
||||
static int web_server_file_write_callback(POLLINFO *pi, short int *events) {
|
||||
(void)pi;
|
||||
(void)events;
|
||||
|
||||
worker_is_busy(WORKER_JOB_WRITE_FILE);
|
||||
error("Writing to web files is not supported!");
|
||||
worker_is_idle();
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
@ -143,6 +177,7 @@ static int web_server_file_write_callback(POLLINFO *pi, short int *events) {
|
|||
static void *web_server_add_callback(POLLINFO *pi, short int *events, void *data) {
|
||||
(void)data; // Suppress warning on unused argument
|
||||
|
||||
worker_is_busy(WORKER_JOB_ADD_CONNECTION);
|
||||
worker_private->connected++;
|
||||
|
||||
size_t concurrent = worker_private->connected - worker_private->disconnected;
|
||||
|
@ -177,7 +212,7 @@ static void *web_server_add_callback(POLLINFO *pi, short int *events, void *data
|
|||
//this means that the mensage was not completely read, so
|
||||
//I cannot identify it yet.
|
||||
sock_setnonblock(w->ifd);
|
||||
return w;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
//The next two ifs are not together because I am reusing SSL structure
|
||||
|
@ -191,7 +226,7 @@ static void *web_server_add_callback(POLLINFO *pi, short int *events, void *data
|
|||
if (test[0] < 0x18){
|
||||
WEB_CLIENT_IS_DEAD(w);
|
||||
sock_setnonblock(w->ifd);
|
||||
return w;
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -217,11 +252,16 @@ static void *web_server_add_callback(POLLINFO *pi, short int *events, void *data
|
|||
#endif
|
||||
|
||||
debug(D_WEB_CLIENT, "%llu: ADDED CLIENT FD %d", w->id, pi->fd);
|
||||
|
||||
cleanup:
|
||||
worker_is_idle();
|
||||
return w;
|
||||
}
|
||||
|
||||
// TCP client disconnected
|
||||
static void web_server_del_callback(POLLINFO *pi) {
|
||||
worker_is_busy(WORKER_JOB_DEL_COLLECTION);
|
||||
|
||||
worker_private->disconnected++;
|
||||
|
||||
struct web_client *w = (struct web_client *)pi->data;
|
||||
|
@ -240,18 +280,27 @@ static void web_server_del_callback(POLLINFO *pi) {
|
|||
debug(D_WEB_CLIENT, "%llu: CLOSING CLIENT FD %d", w->id, pi->fd);
|
||||
web_client_release(w);
|
||||
}
|
||||
|
||||
worker_is_idle();
|
||||
}
|
||||
|
||||
static int web_server_rcv_callback(POLLINFO *pi, short int *events) {
|
||||
int ret = -1;
|
||||
worker_is_busy(WORKER_JOB_RCV_DATA);
|
||||
|
||||
worker_private->receptions++;
|
||||
|
||||
struct web_client *w = (struct web_client *)pi->data;
|
||||
int fd = pi->fd;
|
||||
|
||||
if(unlikely(web_client_receive(w) < 0))
|
||||
return -1;
|
||||
if(unlikely(web_client_receive(w) < 0)) {
|
||||
ret = -1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
debug(D_WEB_CLIENT, "%llu: processing received data on fd %d.", w->id, fd);
|
||||
worker_is_idle();
|
||||
worker_is_busy(WORKER_JOB_PROCESS);
|
||||
web_client_process_request(w);
|
||||
|
||||
if(unlikely(w->mode == WEB_CLIENT_MODE_FILECOPY)) {
|
||||
|
@ -282,7 +331,8 @@ static int web_server_rcv_callback(POLLINFO *pi, short int *events) {
|
|||
w->pollinfo_filecopy_slot = fpi->slot;
|
||||
else {
|
||||
error("Failed to add filecopy fd. Closing client.");
|
||||
return -1;
|
||||
ret = -1;
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -295,10 +345,17 @@ static int web_server_rcv_callback(POLLINFO *pi, short int *events) {
|
|||
if(unlikely(w->ofd == fd && web_client_has_wait_send(w)))
|
||||
*events |= POLLOUT;
|
||||
|
||||
return web_server_check_client_status(w);
|
||||
ret = web_server_check_client_status(w);
|
||||
|
||||
cleanup:
|
||||
worker_is_idle();
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int web_server_snd_callback(POLLINFO *pi, short int *events) {
|
||||
int retval = -1;
|
||||
worker_is_busy(WORKER_JOB_SND_DATA);
|
||||
|
||||
worker_private->sends++;
|
||||
|
||||
struct web_client *w = (struct web_client *)pi->data;
|
||||
|
@ -306,8 +363,12 @@ static int web_server_snd_callback(POLLINFO *pi, short int *events) {
|
|||
|
||||
debug(D_WEB_CLIENT, "%llu: sending data on fd %d.", w->id, fd);
|
||||
|
||||
if(unlikely(web_client_send(w) < 0))
|
||||
return -1;
|
||||
int ret = web_client_send(w);
|
||||
|
||||
if(unlikely(ret < 0)) {
|
||||
retval = -1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if(unlikely(w->ifd == fd && web_client_has_wait_receive(w)))
|
||||
*events |= POLLIN;
|
||||
|
@ -315,50 +376,11 @@ static int web_server_snd_callback(POLLINFO *pi, short int *events) {
|
|||
if(unlikely(w->ofd == fd && web_client_has_wait_send(w)))
|
||||
*events |= POLLOUT;
|
||||
|
||||
return web_server_check_client_status(w);
|
||||
}
|
||||
retval = web_server_check_client_status(w);
|
||||
|
||||
static void web_server_tmr_callback(void *timer_data) {
|
||||
worker_private = (struct web_server_static_threaded_worker *)timer_data;
|
||||
|
||||
static __thread RRDSET *st = NULL;
|
||||
static __thread RRDDIM *rd_user = NULL, *rd_system = NULL;
|
||||
|
||||
if(unlikely(netdata_exit)) return;
|
||||
|
||||
if(unlikely(!st)) {
|
||||
char id[100 + 1];
|
||||
char title[100 + 1];
|
||||
|
||||
snprintfz(id, 100, "web_thread%d_cpu", worker_private->id + 1);
|
||||
snprintfz(title, 100, "Netdata web server thread CPU usage");
|
||||
|
||||
st = rrdset_create_localhost(
|
||||
"netdata"
|
||||
, id
|
||||
, NULL
|
||||
, "web"
|
||||
, "netdata.web_cpu"
|
||||
, title
|
||||
, "milliseconds/s"
|
||||
, "web"
|
||||
, "stats"
|
||||
, 132000 + worker_private->id
|
||||
, default_rrd_update_every
|
||||
, RRDSET_TYPE_STACKED
|
||||
);
|
||||
|
||||
rd_user = rrddim_add(st, "user", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL);
|
||||
rd_system = rrddim_add(st, "system", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL);
|
||||
}
|
||||
else
|
||||
rrdset_next(st);
|
||||
|
||||
struct rusage rusage;
|
||||
getrusage(RUSAGE_THREAD, &rusage);
|
||||
rrddim_set_by_pointer(st, rd_user, rusage.ru_utime.tv_sec * 1000000ULL + rusage.ru_utime.tv_usec);
|
||||
rrddim_set_by_pointer(st, rd_system, rusage.ru_stime.tv_sec * 1000000ULL + rusage.ru_stime.tv_usec);
|
||||
rrdset_done(st);
|
||||
cleanup:
|
||||
worker_is_idle();
|
||||
return retval;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
@ -379,11 +401,22 @@ static void socket_listen_main_static_threaded_worker_cleanup(void *ptr) {
|
|||
);
|
||||
|
||||
worker_private->running = 0;
|
||||
worker_unregister();
|
||||
}
|
||||
|
||||
void *socket_listen_main_static_threaded_worker(void *ptr) {
|
||||
worker_private = (struct web_server_static_threaded_worker *)ptr;
|
||||
worker_private->running = 1;
|
||||
worker_register("WEB");
|
||||
worker_register_job_name(WORKER_JOB_ADD_CONNECTION, "connect");
|
||||
worker_register_job_name(WORKER_JOB_DEL_COLLECTION, "disconnect");
|
||||
worker_register_job_name(WORKER_JOB_ADD_FILE, "file start");
|
||||
worker_register_job_name(WORKER_JOB_DEL_FILE, "file end");
|
||||
worker_register_job_name(WORKER_JOB_READ_FILE, "file read");
|
||||
worker_register_job_name(WORKER_JOB_WRITE_FILE, "file write");
|
||||
worker_register_job_name(WORKER_JOB_RCV_DATA, "receive");
|
||||
worker_register_job_name(WORKER_JOB_SND_DATA, "send");
|
||||
worker_register_job_name(WORKER_JOB_PROCESS, "process");
|
||||
|
||||
netdata_thread_cleanup_push(socket_listen_main_static_threaded_worker_cleanup, ptr);
|
||||
|
||||
|
@ -392,7 +425,7 @@ void *socket_listen_main_static_threaded_worker(void *ptr) {
|
|||
, web_server_del_callback
|
||||
, web_server_rcv_callback
|
||||
, web_server_snd_callback
|
||||
, web_server_tmr_callback
|
||||
, NULL
|
||||
, web_allow_connections_from
|
||||
, web_allow_connections_dns
|
||||
, NULL
|
||||
|
|
Loading…
Reference in New Issue