Backtrace info when modifying refcount of metrics.
Keep the last X backtraces when a metric's reference count is increased/decreased. To keep CPU and memory consumption low we enable this only for UUIDs starting with 0x0A.
This commit is contained in:
parent
a890cfaa51
commit
39568ed90f
|
@ -125,6 +125,7 @@ option(ENABLE_BUNDLED_PROTOBUF "enable bundled protobuf" False)
|
|||
|
||||
option(ENABLE_LOGS_MANAGEMENT_TESTS "enable logs management tests" True)
|
||||
|
||||
option(ENABLE_LIBBACKTRACE "enable libbacktrace" False)
|
||||
option(ENABLE_SENTRY "enable sentry" False)
|
||||
option(ENABLE_WEBRTC "enable webrtc" False)
|
||||
|
||||
|
@ -136,6 +137,23 @@ if(ENABLE_PLUGIN_GO)
|
|||
find_package(Go "${MIN_GO_VERSION}" REQUIRED)
|
||||
endif()
|
||||
|
||||
if(ENABLE_LIBBACKTRACE)
|
||||
include(ExternalProject)
|
||||
|
||||
ExternalProject_Add(libbacktrace
|
||||
GIT_REPOSITORY https://github.com/ianlancetaylor/libbacktrace.git
|
||||
GIT_TAG master
|
||||
CONFIGURE_COMMAND <SOURCE_DIR>/configure --prefix=<INSTALL_DIR>
|
||||
BUILD_COMMAND make
|
||||
BUILD_BYPRODUCTS <INSTALL_DIR>/lib/libbacktrace.a
|
||||
UPDATE_DISCONNECTED True
|
||||
INSTALL_COMMAND make install)
|
||||
|
||||
ExternalProject_Get_Property(libbacktrace INSTALL_DIR)
|
||||
set(LIBBACKTRACE_INCLUDE_DIR ${INSTALL_DIR}/include)
|
||||
set(LIBBACKTRACE_LIBRARY ${INSTALL_DIR}/lib/libbacktrace.a)
|
||||
endif()
|
||||
|
||||
if(ENABLE_SENTRY)
|
||||
include(FetchContent)
|
||||
|
||||
|
@ -670,6 +688,12 @@ if(ENABLE_PLUGIN_EBPF)
|
|||
)
|
||||
endif()
|
||||
|
||||
if(ENABLE_LIBBACKTRACE)
|
||||
list(APPEND LIBNETDATA_FILES
|
||||
src/libnetdata/bt/bt.cc
|
||||
src/libnetdata/bt/bt.h)
|
||||
endif()
|
||||
|
||||
set(LIBH2O_FILES
|
||||
src/web/server/h2o/libh2o/deps/cloexec/cloexec.c
|
||||
src/web/server/h2o/libh2o/deps/libgkc/gkc.c
|
||||
|
@ -1506,6 +1530,12 @@ if(ENABLE_PLUGIN_EBPF)
|
|||
target_link_libraries(libnetdata PUBLIC ${ELF_LIBRARIES})
|
||||
endif()
|
||||
|
||||
if(ENABLE_LIBBACKTRACE)
|
||||
add_dependencies(libnetdata libbacktrace)
|
||||
target_include_directories(libnetdata PRIVATE ${LIBBACKTRACE_INCLUDE_DIR})
|
||||
target_link_libraries(libnetdata PRIVATE ${LIBBACKTRACE_LIBRARY})
|
||||
endif()
|
||||
|
||||
# judy
|
||||
target_link_libraries(libnetdata PUBLIC judy)
|
||||
|
||||
|
|
|
@ -46,7 +46,8 @@ ifeq ($(ENABLE_SENTRY),true)
|
|||
-DNETDATA_SENTRY_ENVIRONMENT=$(RELEASE_PIPELINE) \
|
||||
-DNETDATA_SENTRY_RELEASE=$(VERSION) \
|
||||
-DNETDATA_SENTRY_DIST=$(BUILD_DESTINATION) \
|
||||
-DNETDATA_SENTRY_DSN=$(SENTRY_DSN)
|
||||
-DNETDATA_SENTRY_DSN=$(SENTRY_DSN) \
|
||||
-DENABLE_LIBBACKTRACE=On
|
||||
else
|
||||
SENTRY_CONFIG := -DENABLE_SENTRY=Off
|
||||
endif
|
||||
|
@ -69,7 +70,7 @@ override_dh_auto_configure:
|
|||
packaging/bundle-ebpf-co-re.sh . ${TOP}/usr/libexec/netdata/plugins.d; \
|
||||
fi
|
||||
dh_auto_configure -- -G Ninja \
|
||||
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
|
||||
-DCMAKE_BUILD_TYPE=Debug \
|
||||
-DCMAKE_INSTALL_PREFIX=/ \
|
||||
-DWEB_DIR=/var/lib/netdata/www \
|
||||
-DCMAKE_C_FLAGS='-ffile-prefix-map=${SRC_DIR}=${SRC_DIR}' \
|
||||
|
@ -105,7 +106,6 @@ override_dh_strip:
|
|||
if [ "${ENABLE_SENTRY}" = "true" ] && [ "${UPLOAD_SENTRY}" = "true" ]; then \
|
||||
sentry-cli debug-files upload -o netdata-inc -p netdata-agent --force-foreground --log-level=debug --wait --include-sources /usr/src/netdata/debian/netdata/usr/sbin/netdata; \
|
||||
fi
|
||||
dh_strip
|
||||
|
||||
override_dh_install:
|
||||
cp -v $(BASE_CONFIG) debian/netdata.conf
|
||||
|
|
|
@ -116,6 +116,11 @@
|
|||
#cmakedefine NETDATA_SENTRY_RELEASE "@NETDATA_SENTRY_RELEASE@"
|
||||
#cmakedefine NETDATA_SENTRY_DIST "@NETDATA_SENTRY_DIST@"
|
||||
#cmakedefine NETDATA_SENTRY_DSN "@NETDATA_SENTRY_DSN@"
|
||||
|
||||
// enabled libbacktrace
|
||||
|
||||
#cmakedefine ENABLE_LIBBACKTRACE
|
||||
|
||||
// enabled bundling
|
||||
|
||||
#cmakedefine ENABLE_BUNDLED_JSONC
|
||||
|
|
|
@ -26,6 +26,9 @@ void get_netdata_execution_path(void) {
|
|||
}
|
||||
|
||||
netdata_exe_file[exepath_size] = '\0';
|
||||
#ifdef ENABLE_LIBBACKTRACE
|
||||
bt_init(netdata_exe_file, netdata_configured_cache_dir);
|
||||
#endif
|
||||
|
||||
// macOS's dirname(3) does not modify passed string
|
||||
char *tmpdir = strdupz(netdata_exe_file);
|
||||
|
|
|
@ -1400,6 +1400,8 @@ int unittest_prepare_rrd(char **user) {
|
|||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
uv_setup_args(argc, argv);
|
||||
|
||||
// initialize the system clocks
|
||||
clocks_init();
|
||||
netdata_start_time = now_realtime_sec();
|
||||
|
|
|
@ -38,6 +38,10 @@ void sentry_native_init(void)
|
|||
sentry_options_set_debug(options, 1);
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_LIBBACKTRACE
|
||||
sentry_options_add_attachment(options, bt_path);
|
||||
#endif
|
||||
|
||||
sentry_init(options);
|
||||
}
|
||||
|
||||
|
|
|
@ -134,6 +134,10 @@ static inline time_t mrg_metric_get_first_time_s_smart(MRG *mrg __maybe_unused,
|
|||
}
|
||||
|
||||
static inline REFCOUNT metric_acquire(MRG *mrg __maybe_unused, METRIC *metric) {
|
||||
#ifdef ENABLE_LIBBACKTRACE
|
||||
bt_collect(&metric->uuid);
|
||||
#endif
|
||||
|
||||
spinlock_lock(&metric->refcount_spinlock);
|
||||
|
||||
if (metric->refcount >= 0)
|
||||
|
@ -154,10 +158,18 @@ static inline REFCOUNT metric_acquire(MRG *mrg __maybe_unused, METRIC *metric) {
|
|||
}
|
||||
|
||||
static inline void metric_release(MRG *mrg __maybe_unused, METRIC *metric) {
|
||||
#ifdef ENABLE_LIBBACKTRACE
|
||||
bt_collect(&metric->uuid);
|
||||
#endif
|
||||
|
||||
spinlock_lock(&metric->refcount_spinlock);
|
||||
|
||||
if (metric->refcount <= 0)
|
||||
if (metric->refcount <= 0) {
|
||||
#ifdef ENABLE_LIBBACKTRACE
|
||||
bt_dump(&metric->uuid);
|
||||
#endif
|
||||
fatal("METRIC: refcount is %d (zero or negative) during release", metric->refcount);
|
||||
}
|
||||
|
||||
metric->refcount -= 1;
|
||||
REFCOUNT refcount = metric->refcount;
|
||||
|
|
|
@ -0,0 +1,208 @@
|
|||
#include "bt.h"
|
||||
|
||||
#include <backtrace.h>
|
||||
#include <backtrace-supported.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdio>
|
||||
#include <fstream>
|
||||
#include <mutex>
|
||||
#include <sstream>
|
||||
#include <unordered_map>
|
||||
#include <queue>
|
||||
|
||||
static backtrace_state *State = nullptr;
|
||||
|
||||
static int pcinfo_callback(void *data, uintptr_t pc, const char *filename, int lineno, const char *function)
|
||||
{
|
||||
std::ostringstream *OS = static_cast<std::ostringstream*>(data);
|
||||
|
||||
if (function)
|
||||
*OS << function << "() @ ";
|
||||
|
||||
if (filename)
|
||||
*OS << filename << ":" << lineno;
|
||||
else
|
||||
*OS << pc << " (information not available)";
|
||||
|
||||
*OS << "\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void error_callback(void *data, const char *msg, int errnum)
|
||||
{
|
||||
std::ostringstream *OS = static_cast<std::ostringstream*>(data);
|
||||
*OS << "Backtrace error: " << msg << " (error number " << errnum << ")\n";
|
||||
}
|
||||
|
||||
struct UuidKey
|
||||
{
|
||||
const uuid_t *Inner;
|
||||
|
||||
bool operator==(const UuidKey& Other) const
|
||||
{
|
||||
return uuid_compare(*Inner, *Other.Inner) == 0;
|
||||
}
|
||||
};
|
||||
|
||||
namespace std
|
||||
{
|
||||
template<>
|
||||
struct hash<UuidKey>
|
||||
{
|
||||
size_t operator()(const UuidKey& Key) const
|
||||
{
|
||||
return XXH64(*Key.Inner, sizeof(uuid_t), 0);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
class StackTrace
|
||||
{
|
||||
public:
|
||||
static const size_t MAX_ITEMS = 128;
|
||||
uintptr_t PCs[MAX_ITEMS] = { 0 };
|
||||
size_t Items = 0;
|
||||
|
||||
void append(uintptr_t PC)
|
||||
{
|
||||
assert(Items < MAX_ITEMS);
|
||||
PCs[Items++] = PC;
|
||||
}
|
||||
|
||||
bool operator==(const StackTrace& Other) const
|
||||
{
|
||||
if (Items != Other.Items)
|
||||
return false;
|
||||
|
||||
for (size_t i = 0; i < Items; i++)
|
||||
if (PCs[i] != Other.PCs[i])
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void dump(std::ostream &OS) const
|
||||
{
|
||||
for (size_t i = 0; i < Items; ++i)
|
||||
backtrace_pcinfo(State, PCs[i], pcinfo_callback, error_callback, &OS);
|
||||
OS << std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
namespace std
|
||||
{
|
||||
template<>
|
||||
struct hash<StackTrace>
|
||||
{
|
||||
size_t operator()(const StackTrace& ST) const
|
||||
{
|
||||
return XXH64(ST.PCs, ST.Items * sizeof(uintptr_t), 0);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
static std::vector<std::pair<uint64_t, StackTrace>> InternedStackTraces;
|
||||
|
||||
static size_t stackTraceID(const StackTrace &ST)
|
||||
{
|
||||
std::hash<StackTrace> hasher;
|
||||
uint64_t K = hasher(ST);
|
||||
|
||||
auto Pred = [](const std::pair<uint64_t, StackTrace>& a, const std::pair<uint64_t, StackTrace>& b) {
|
||||
return a.first < b.first;
|
||||
};
|
||||
|
||||
std::pair<uint64_t, StackTrace> P(K, ST);
|
||||
auto It = std::lower_bound(InternedStackTraces.begin(), InternedStackTraces.end(), P, Pred);
|
||||
if (It != InternedStackTraces.end() && It->first == K)
|
||||
return K;
|
||||
|
||||
InternedStackTraces.insert(It, {K, ST});
|
||||
return K;
|
||||
}
|
||||
|
||||
static const StackTrace &lookupStackTrace(uint64_t ID)
|
||||
{
|
||||
auto Pred = [](const std::pair<uint64_t, StackTrace>& element, uint64_t value) {
|
||||
return element.first < value;
|
||||
};
|
||||
auto It = std::lower_bound(InternedStackTraces.begin(), InternedStackTraces.end(), ID, Pred);
|
||||
|
||||
return It->second;
|
||||
}
|
||||
|
||||
static std::unordered_map<UuidKey, std::queue<uint64_t>> USTs;
|
||||
static std::mutex Mutex;
|
||||
|
||||
static int simple_callback(void *data, uintptr_t pc)
|
||||
{
|
||||
StackTrace *ST = static_cast<StackTrace*>(data);
|
||||
if (ST->Items == StackTrace::MAX_ITEMS)
|
||||
fatal("StackTrace too big...");
|
||||
|
||||
ST->append(pc);
|
||||
return 0;
|
||||
}
|
||||
|
||||
const char *bt_path = NULL;
|
||||
|
||||
void bt_init(const char *exepath, const char *cache_dir)
|
||||
{
|
||||
State = backtrace_create_state(exepath, 1, nullptr, nullptr);
|
||||
|
||||
char buf[FILENAME_MAX + 1];
|
||||
snprintfz(buf, FILENAME_MAX, "%s/%s", cache_dir, "bt.log");
|
||||
bt_path = strdupz(buf);
|
||||
}
|
||||
|
||||
void bt_collect(const uuid_t *uuid)
|
||||
{
|
||||
// Enable collection on 1/16th of UUIDs to save on CPU and RAM consumption
|
||||
if (*uuid[0] != 0x0A)
|
||||
return;
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(Mutex);
|
||||
|
||||
UuidKey UK = { uuid };
|
||||
|
||||
auto& Q = USTs[UK];
|
||||
if (Q.size() == 128)
|
||||
Q.pop();
|
||||
|
||||
StackTrace ST;
|
||||
backtrace_simple(State, 1, simple_callback, error_callback, &ST);
|
||||
Q.push(stackTraceID(ST));
|
||||
}
|
||||
}
|
||||
|
||||
void bt_dump(const uuid_t *uuid)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(Mutex);
|
||||
|
||||
UuidKey UK = { uuid };
|
||||
|
||||
auto It = USTs.find(UK);
|
||||
if (It == USTs.end())
|
||||
return;
|
||||
|
||||
std::queue<uint64_t> Q = It->second;
|
||||
std::ostringstream OS;
|
||||
|
||||
size_t Idx = 0;
|
||||
while (!Q.empty())
|
||||
{
|
||||
OS << "Stack trace " << ++Idx << "/" << It->second.size() << ":\n";
|
||||
const StackTrace& ST = lookupStackTrace(Q.front());
|
||||
ST.dump(OS);
|
||||
Q.pop();
|
||||
}
|
||||
|
||||
std::ofstream OF{bt_path};
|
||||
if (OF.is_open())
|
||||
{
|
||||
OF << OS.str();
|
||||
OF.close();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,20 @@
|
|||
#ifndef LIBNETDATA_BT
|
||||
#define LIBNETDATA_BT
|
||||
|
||||
#include "../libnetdata.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void bt_init(const char *exepath, const char *cache_dir);
|
||||
void bt_collect(const uuid_t *uuid);
|
||||
void bt_dump(const uuid_t *uuid);
|
||||
|
||||
extern const char *bt_path;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* LIBNETDATA_BT */
|
|
@ -753,6 +753,7 @@ extern char *netdata_configured_host_prefix;
|
|||
#include "facets/facets.h"
|
||||
#include "functions_evloop/functions_evloop.h"
|
||||
#include "query_progress/progress.h"
|
||||
#include "bt/bt.h"
|
||||
|
||||
// BEWARE: this exists in alarm-notify.sh
|
||||
#define DEFAULT_CLOUD_BASE_URL "https://app.netdata.cloud"
|
||||
|
|
Loading…
Reference in New Issue