Backtrace info when modifying refcount of metrics.

Keep the last X backtraces when a metric's
reference count is increased/decreased.

To keep CPU and memory consumption low we enable
this only for UUIDs starting with 0x0A.
This commit is contained in:
vkalintiris 2024-03-04 12:22:01 +02:00
parent a890cfaa51
commit 39568ed90f
10 changed files with 289 additions and 4 deletions

View File

@ -125,6 +125,7 @@ option(ENABLE_BUNDLED_PROTOBUF "enable bundled protobuf" False)
option(ENABLE_LOGS_MANAGEMENT_TESTS "enable logs management tests" True)
option(ENABLE_LIBBACKTRACE "enable libbacktrace" False)
option(ENABLE_SENTRY "enable sentry" False)
option(ENABLE_WEBRTC "enable webrtc" False)
@ -136,6 +137,23 @@ if(ENABLE_PLUGIN_GO)
find_package(Go "${MIN_GO_VERSION}" REQUIRED)
endif()
if(ENABLE_LIBBACKTRACE)
include(ExternalProject)
ExternalProject_Add(libbacktrace
GIT_REPOSITORY https://github.com/ianlancetaylor/libbacktrace.git
GIT_TAG master
CONFIGURE_COMMAND <SOURCE_DIR>/configure --prefix=<INSTALL_DIR>
BUILD_COMMAND make
BUILD_BYPRODUCTS <INSTALL_DIR>/lib/libbacktrace.a
UPDATE_DISCONNECTED True
INSTALL_COMMAND make install)
ExternalProject_Get_Property(libbacktrace INSTALL_DIR)
set(LIBBACKTRACE_INCLUDE_DIR ${INSTALL_DIR}/include)
set(LIBBACKTRACE_LIBRARY ${INSTALL_DIR}/lib/libbacktrace.a)
endif()
if(ENABLE_SENTRY)
include(FetchContent)
@ -670,6 +688,12 @@ if(ENABLE_PLUGIN_EBPF)
)
endif()
if(ENABLE_LIBBACKTRACE)
list(APPEND LIBNETDATA_FILES
src/libnetdata/bt/bt.cc
src/libnetdata/bt/bt.h)
endif()
set(LIBH2O_FILES
src/web/server/h2o/libh2o/deps/cloexec/cloexec.c
src/web/server/h2o/libh2o/deps/libgkc/gkc.c
@ -1506,6 +1530,12 @@ if(ENABLE_PLUGIN_EBPF)
target_link_libraries(libnetdata PUBLIC ${ELF_LIBRARIES})
endif()
if(ENABLE_LIBBACKTRACE)
add_dependencies(libnetdata libbacktrace)
target_include_directories(libnetdata PRIVATE ${LIBBACKTRACE_INCLUDE_DIR})
target_link_libraries(libnetdata PRIVATE ${LIBBACKTRACE_LIBRARY})
endif()
# judy
target_link_libraries(libnetdata PUBLIC judy)

View File

@ -46,7 +46,8 @@ ifeq ($(ENABLE_SENTRY),true)
-DNETDATA_SENTRY_ENVIRONMENT=$(RELEASE_PIPELINE) \
-DNETDATA_SENTRY_RELEASE=$(VERSION) \
-DNETDATA_SENTRY_DIST=$(BUILD_DESTINATION) \
-DNETDATA_SENTRY_DSN=$(SENTRY_DSN)
-DNETDATA_SENTRY_DSN=$(SENTRY_DSN) \
-DENABLE_LIBBACKTRACE=On
else
SENTRY_CONFIG := -DENABLE_SENTRY=Off
endif
@ -69,7 +70,7 @@ override_dh_auto_configure:
packaging/bundle-ebpf-co-re.sh . ${TOP}/usr/libexec/netdata/plugins.d; \
fi
dh_auto_configure -- -G Ninja \
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
-DCMAKE_BUILD_TYPE=Debug \
-DCMAKE_INSTALL_PREFIX=/ \
-DWEB_DIR=/var/lib/netdata/www \
-DCMAKE_C_FLAGS='-ffile-prefix-map=${SRC_DIR}=${SRC_DIR}' \
@ -105,7 +106,6 @@ override_dh_strip:
if [ "${ENABLE_SENTRY}" = "true" ] && [ "${UPLOAD_SENTRY}" = "true" ]; then \
sentry-cli debug-files upload -o netdata-inc -p netdata-agent --force-foreground --log-level=debug --wait --include-sources /usr/src/netdata/debian/netdata/usr/sbin/netdata; \
fi
dh_strip
override_dh_install:
cp -v $(BASE_CONFIG) debian/netdata.conf

View File

@ -116,6 +116,11 @@
#cmakedefine NETDATA_SENTRY_RELEASE "@NETDATA_SENTRY_RELEASE@"
#cmakedefine NETDATA_SENTRY_DIST "@NETDATA_SENTRY_DIST@"
#cmakedefine NETDATA_SENTRY_DSN "@NETDATA_SENTRY_DSN@"
// enabled libbacktrace
#cmakedefine ENABLE_LIBBACKTRACE
// enabled bundling
#cmakedefine ENABLE_BUNDLED_JSONC

View File

@ -26,6 +26,9 @@ void get_netdata_execution_path(void) {
}
netdata_exe_file[exepath_size] = '\0';
#ifdef ENABLE_LIBBACKTRACE
bt_init(netdata_exe_file, netdata_configured_cache_dir);
#endif
// macOS's dirname(3) does not modify passed string
char *tmpdir = strdupz(netdata_exe_file);

View File

@ -1400,6 +1400,8 @@ int unittest_prepare_rrd(char **user) {
}
int main(int argc, char **argv) {
uv_setup_args(argc, argv);
// initialize the system clocks
clocks_init();
netdata_start_time = now_realtime_sec();

View File

@ -38,6 +38,10 @@ void sentry_native_init(void)
sentry_options_set_debug(options, 1);
#endif
#ifdef ENABLE_LIBBACKTRACE
sentry_options_add_attachment(options, bt_path);
#endif
sentry_init(options);
}

View File

@ -134,6 +134,10 @@ static inline time_t mrg_metric_get_first_time_s_smart(MRG *mrg __maybe_unused,
}
static inline REFCOUNT metric_acquire(MRG *mrg __maybe_unused, METRIC *metric) {
#ifdef ENABLE_LIBBACKTRACE
bt_collect(&metric->uuid);
#endif
spinlock_lock(&metric->refcount_spinlock);
if (metric->refcount >= 0)
@ -154,10 +158,18 @@ static inline REFCOUNT metric_acquire(MRG *mrg __maybe_unused, METRIC *metric) {
}
static inline void metric_release(MRG *mrg __maybe_unused, METRIC *metric) {
#ifdef ENABLE_LIBBACKTRACE
bt_collect(&metric->uuid);
#endif
spinlock_lock(&metric->refcount_spinlock);
if (metric->refcount <= 0)
if (metric->refcount <= 0) {
#ifdef ENABLE_LIBBACKTRACE
bt_dump(&metric->uuid);
#endif
fatal("METRIC: refcount is %d (zero or negative) during release", metric->refcount);
}
metric->refcount -= 1;
REFCOUNT refcount = metric->refcount;

208
src/libnetdata/bt/bt.cc Normal file
View File

@ -0,0 +1,208 @@
#include "bt.h"
#include <backtrace.h>
#include <backtrace-supported.h>
#include <algorithm>
#include <cstdio>
#include <fstream>
#include <mutex>
#include <sstream>
#include <unordered_map>
#include <queue>
static backtrace_state *State = nullptr;
static int pcinfo_callback(void *data, uintptr_t pc, const char *filename, int lineno, const char *function)
{
std::ostringstream *OS = static_cast<std::ostringstream*>(data);
if (function)
*OS << function << "() @ ";
if (filename)
*OS << filename << ":" << lineno;
else
*OS << pc << " (information not available)";
*OS << "\n";
return 0;
}
static void error_callback(void *data, const char *msg, int errnum)
{
std::ostringstream *OS = static_cast<std::ostringstream*>(data);
*OS << "Backtrace error: " << msg << " (error number " << errnum << ")\n";
}
struct UuidKey
{
const uuid_t *Inner;
bool operator==(const UuidKey& Other) const
{
return uuid_compare(*Inner, *Other.Inner) == 0;
}
};
namespace std
{
template<>
struct hash<UuidKey>
{
size_t operator()(const UuidKey& Key) const
{
return XXH64(*Key.Inner, sizeof(uuid_t), 0);
}
};
}
class StackTrace
{
public:
static const size_t MAX_ITEMS = 128;
uintptr_t PCs[MAX_ITEMS] = { 0 };
size_t Items = 0;
void append(uintptr_t PC)
{
assert(Items < MAX_ITEMS);
PCs[Items++] = PC;
}
bool operator==(const StackTrace& Other) const
{
if (Items != Other.Items)
return false;
for (size_t i = 0; i < Items; i++)
if (PCs[i] != Other.PCs[i])
return false;
return true;
}
void dump(std::ostream &OS) const
{
for (size_t i = 0; i < Items; ++i)
backtrace_pcinfo(State, PCs[i], pcinfo_callback, error_callback, &OS);
OS << std::endl;
}
};
namespace std
{
template<>
struct hash<StackTrace>
{
size_t operator()(const StackTrace& ST) const
{
return XXH64(ST.PCs, ST.Items * sizeof(uintptr_t), 0);
}
};
}
static std::vector<std::pair<uint64_t, StackTrace>> InternedStackTraces;
static size_t stackTraceID(const StackTrace &ST)
{
std::hash<StackTrace> hasher;
uint64_t K = hasher(ST);
auto Pred = [](const std::pair<uint64_t, StackTrace>& a, const std::pair<uint64_t, StackTrace>& b) {
return a.first < b.first;
};
std::pair<uint64_t, StackTrace> P(K, ST);
auto It = std::lower_bound(InternedStackTraces.begin(), InternedStackTraces.end(), P, Pred);
if (It != InternedStackTraces.end() && It->first == K)
return K;
InternedStackTraces.insert(It, {K, ST});
return K;
}
static const StackTrace &lookupStackTrace(uint64_t ID)
{
auto Pred = [](const std::pair<uint64_t, StackTrace>& element, uint64_t value) {
return element.first < value;
};
auto It = std::lower_bound(InternedStackTraces.begin(), InternedStackTraces.end(), ID, Pred);
return It->second;
}
static std::unordered_map<UuidKey, std::queue<uint64_t>> USTs;
static std::mutex Mutex;
static int simple_callback(void *data, uintptr_t pc)
{
StackTrace *ST = static_cast<StackTrace*>(data);
if (ST->Items == StackTrace::MAX_ITEMS)
fatal("StackTrace too big...");
ST->append(pc);
return 0;
}
const char *bt_path = NULL;
void bt_init(const char *exepath, const char *cache_dir)
{
State = backtrace_create_state(exepath, 1, nullptr, nullptr);
char buf[FILENAME_MAX + 1];
snprintfz(buf, FILENAME_MAX, "%s/%s", cache_dir, "bt.log");
bt_path = strdupz(buf);
}
void bt_collect(const uuid_t *uuid)
{
// Enable collection on 1/16th of UUIDs to save on CPU and RAM consumption
if (*uuid[0] != 0x0A)
return;
{
std::lock_guard<std::mutex> lock(Mutex);
UuidKey UK = { uuid };
auto& Q = USTs[UK];
if (Q.size() == 128)
Q.pop();
StackTrace ST;
backtrace_simple(State, 1, simple_callback, error_callback, &ST);
Q.push(stackTraceID(ST));
}
}
void bt_dump(const uuid_t *uuid)
{
std::lock_guard<std::mutex> lock(Mutex);
UuidKey UK = { uuid };
auto It = USTs.find(UK);
if (It == USTs.end())
return;
std::queue<uint64_t> Q = It->second;
std::ostringstream OS;
size_t Idx = 0;
while (!Q.empty())
{
OS << "Stack trace " << ++Idx << "/" << It->second.size() << ":\n";
const StackTrace& ST = lookupStackTrace(Q.front());
ST.dump(OS);
Q.pop();
}
std::ofstream OF{bt_path};
if (OF.is_open())
{
OF << OS.str();
OF.close();
}
}

20
src/libnetdata/bt/bt.h Normal file
View File

@ -0,0 +1,20 @@
#ifndef LIBNETDATA_BT
#define LIBNETDATA_BT
#include "../libnetdata.h"
#ifdef __cplusplus
extern "C" {
#endif
void bt_init(const char *exepath, const char *cache_dir);
void bt_collect(const uuid_t *uuid);
void bt_dump(const uuid_t *uuid);
extern const char *bt_path;
#ifdef __cplusplus
}
#endif
#endif /* LIBNETDATA_BT */

View File

@ -753,6 +753,7 @@ extern char *netdata_configured_host_prefix;
#include "facets/facets.h"
#include "functions_evloop/functions_evloop.h"
#include "query_progress/progress.h"
#include "bt/bt.h"
// BEWARE: this exists in alarm-notify.sh
#define DEFAULT_CLOUD_BASE_URL "https://app.netdata.cloud"