DBENGINE v2 - improvements part 11 (#14337)
* acquiring / releasing interface for metrics * metrics registry statistics * cleanup metrics registry by deleting metrics when they dont have retention anymore; do not double copy the data of pages to be flushed * print the tier in retention summary * Open files with buffered instead of direct I/O (test) * added more metrics stats and fixed unittest * rename writer functions to avoid confusion with refcounting * do not release a metric that is not acquired * Revert to use direct I/O on write -- use direct I/O on read as well * keep track of ARAL overhead and add it to the memory chart * aral full check via api * Cleanup * give names to ARALs and PGCs * aral improvements * restore query expansion to the future * prefer higher resolution tier when switching plans * added extent read statistics * smoother joining of tiers at query engine * fine tune aral max allocation size * aral restructuring to hide its internals from the rest of netdata * aral restructuring; addtion of defrag option to aral to keep the linked list sorted - enabled by default to test it * fully async aral * some statistics and cleanup * fix infinite loop while calculating retention * aral docs and defragmenting disabled by default * fix bug and add optimization when defragmenter is not enabled * aral stress test * aral speed report and documentation * added internal checks that all pages are full * improve internal log about metrics deletion * metrics registry uses one aral per partition * metrics registry aral max size to 512 elements per page * remove data_structures/README.md dependency --------- Co-authored-by: Stelios Fragkakis <52996999+stelfrag@users.noreply.github.com>
This commit is contained in:
parent
fd7f39a744
commit
7f8f11eb37
|
@ -453,8 +453,8 @@ set(LIBNETDATA_FILES
|
|||
libnetdata/adaptive_resortable_list/adaptive_resortable_list.h
|
||||
libnetdata/config/appconfig.c
|
||||
libnetdata/config/appconfig.h
|
||||
libnetdata/arrayalloc/arrayalloc.c
|
||||
libnetdata/arrayalloc/arrayalloc.h
|
||||
libnetdata/aral/aral.c
|
||||
libnetdata/aral/aral.h
|
||||
libnetdata/avl/avl.c
|
||||
libnetdata/avl/avl.h
|
||||
libnetdata/buffer/buffer.c
|
||||
|
|
|
@ -131,8 +131,8 @@ LIBNETDATA_FILES = \
|
|||
libnetdata/adaptive_resortable_list/adaptive_resortable_list.h \
|
||||
libnetdata/config/appconfig.c \
|
||||
libnetdata/config/appconfig.h \
|
||||
libnetdata/arrayalloc/arrayalloc.c \
|
||||
libnetdata/arrayalloc/arrayalloc.h \
|
||||
libnetdata/aral/aral.c \
|
||||
libnetdata/aral/aral.h \
|
||||
libnetdata/avl/avl.c \
|
||||
libnetdata/avl/avl.h \
|
||||
libnetdata/buffer/buffer.c \
|
||||
|
|
|
@ -975,7 +975,7 @@ static inline struct pid_stat *get_pid_entry(pid_t pid) {
|
|||
init_pid_fds(p, 0, p->fds_size);
|
||||
p->pid = pid;
|
||||
|
||||
DOUBLE_LINKED_LIST_APPEND_UNSAFE(root_of_pids, p, prev, next);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(root_of_pids, p, prev, next);
|
||||
|
||||
all_pids[pid] = p;
|
||||
all_pids_count++;
|
||||
|
@ -993,7 +993,7 @@ static inline void del_pid_entry(pid_t pid) {
|
|||
|
||||
debug_log("process %d %s exited, deleting it.", pid, p->comm);
|
||||
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(root_of_pids, p, prev, next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(root_of_pids, p, prev, next);
|
||||
|
||||
// free the filename
|
||||
#ifndef __FreeBSD__
|
||||
|
|
|
@ -1730,7 +1730,7 @@ AC_CONFIG_FILES([
|
|||
libnetdata/Makefile
|
||||
libnetdata/tests/Makefile
|
||||
libnetdata/adaptive_resortable_list/Makefile
|
||||
libnetdata/arrayalloc/Makefile
|
||||
libnetdata/aral/Makefile
|
||||
libnetdata/avl/Makefile
|
||||
libnetdata/buffer/Makefile
|
||||
libnetdata/clocks/Makefile
|
||||
|
|
|
@ -243,6 +243,9 @@ static void global_statistics_charts(void) {
|
|||
global_statistics_copy(&gs, GLOBAL_STATS_RESET_WEB_USEC_MAX);
|
||||
getrusage(RUSAGE_SELF, &me);
|
||||
|
||||
size_t aral_structures, aral_malloc_allocated, aral_malloc_used, aral_mmap_allocated, aral_mmap_used;
|
||||
aral_get_size_statistics(&aral_structures, &aral_malloc_allocated, &aral_malloc_used, &aral_mmap_allocated, &aral_mmap_used);
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
||||
{
|
||||
|
@ -292,6 +295,7 @@ static void global_statistics_charts(void) {
|
|||
static RRDDIM *rd_replication = NULL;
|
||||
static RRDDIM *rd_buffers = NULL;
|
||||
static RRDDIM *rd_workers = NULL;
|
||||
static RRDDIM *rd_aral = NULL;
|
||||
static RRDDIM *rd_other = NULL;
|
||||
|
||||
if (unlikely(!st_memory)) {
|
||||
|
@ -322,6 +326,7 @@ static void global_statistics_charts(void) {
|
|||
rd_replication = rrddim_add(st_memory, "replication", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
|
||||
rd_buffers = rrddim_add(st_memory, "buffers", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
|
||||
rd_workers = rrddim_add(st_memory, "workers", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
|
||||
rd_aral = rrddim_add(st_memory, "aral", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
|
||||
rd_other = rrddim_add(st_memory, "other", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
|
||||
}
|
||||
|
||||
|
@ -355,6 +360,7 @@ static void global_statistics_charts(void) {
|
|||
rrddim_set_by_pointer(st_memory, rd_replication, (collected_number)dictionary_stats_memory_total(dictionary_stats_category_replication) + (collected_number)replication_allocated_memory());
|
||||
rrddim_set_by_pointer(st_memory, rd_buffers, (collected_number)buffers);
|
||||
rrddim_set_by_pointer(st_memory, rd_workers, (collected_number) workers_allocated_memory());
|
||||
rrddim_set_by_pointer(st_memory, rd_aral, (collected_number) aral_structures);
|
||||
rrddim_set_by_pointer(st_memory, rd_other, (collected_number)dictionary_stats_memory_total(dictionary_stats_category_other));
|
||||
|
||||
rrdset_done(st_memory);
|
||||
|
@ -374,6 +380,7 @@ static void global_statistics_charts(void) {
|
|||
static RRDDIM *rd_cbuffers_streaming = NULL;
|
||||
static RRDDIM *rd_buffers_replication = NULL;
|
||||
static RRDDIM *rd_buffers_web = NULL;
|
||||
static RRDDIM *rd_buffers_aral = NULL;
|
||||
|
||||
if (unlikely(!st_memory_buffers)) {
|
||||
st_memory_buffers = rrdset_create_localhost(
|
||||
|
@ -402,6 +409,7 @@ static void global_statistics_charts(void) {
|
|||
rd_cbuffers_streaming = rrddim_add(st_memory_buffers, "streaming cbuf", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
|
||||
rd_buffers_replication = rrddim_add(st_memory_buffers, "replication", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
|
||||
rd_buffers_web = rrddim_add(st_memory_buffers, "web", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
|
||||
rd_buffers_aral = rrddim_add(st_memory_buffers, "aral", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
|
||||
}
|
||||
|
||||
rrddim_set_by_pointer(st_memory_buffers, rd_queries, (collected_number)netdata_buffers_statistics.query_targets_size + (collected_number) onewayalloc_allocated_memory());
|
||||
|
@ -416,6 +424,7 @@ static void global_statistics_charts(void) {
|
|||
rrddim_set_by_pointer(st_memory_buffers, rd_cbuffers_streaming, (collected_number)netdata_buffers_statistics.cbuffers_streaming);
|
||||
rrddim_set_by_pointer(st_memory_buffers, rd_buffers_replication, (collected_number)replication_allocated_buffers());
|
||||
rrddim_set_by_pointer(st_memory_buffers, rd_buffers_web, (collected_number)netdata_buffers_statistics.buffers_web);
|
||||
rrddim_set_by_pointer(st_memory_buffers, rd_buffers_aral, (collected_number)(aral_malloc_allocated + aral_mmap_allocated) - (collected_number)(aral_malloc_used + aral_mmap_used));
|
||||
|
||||
rrdset_done(st_memory_buffers);
|
||||
}
|
||||
|
@ -1885,6 +1894,111 @@ static void dbengine2_statistics_charts(void) {
|
|||
}
|
||||
#endif
|
||||
|
||||
{
|
||||
static RRDSET *st_mrg_metrics = NULL;
|
||||
static RRDDIM *rd_mrg_metrics = NULL;
|
||||
static RRDDIM *rd_mrg_acquired = NULL;
|
||||
static RRDDIM *rd_mrg_collected = NULL;
|
||||
static RRDDIM *rd_mrg_with_retention = NULL;
|
||||
static RRDDIM *rd_mrg_without_retention = NULL;
|
||||
static RRDDIM *rd_mrg_multiple_writers = NULL;
|
||||
|
||||
if (unlikely(!st_mrg_metrics)) {
|
||||
st_mrg_metrics = rrdset_create_localhost(
|
||||
"netdata",
|
||||
"dbengine_metrics",
|
||||
NULL,
|
||||
"dbengine metrics",
|
||||
NULL,
|
||||
"Netdata Metrics in Metrics Registry",
|
||||
"metrics",
|
||||
"netdata",
|
||||
"stats",
|
||||
priority,
|
||||
localhost->rrd_update_every,
|
||||
RRDSET_TYPE_LINE);
|
||||
|
||||
rd_mrg_metrics = rrddim_add(st_mrg_metrics, "all", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
|
||||
rd_mrg_acquired = rrddim_add(st_mrg_metrics, "acquired", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
|
||||
rd_mrg_collected = rrddim_add(st_mrg_metrics, "collected", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
|
||||
rd_mrg_with_retention = rrddim_add(st_mrg_metrics, "with retention", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
|
||||
rd_mrg_without_retention = rrddim_add(st_mrg_metrics, "without retention", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
|
||||
rd_mrg_multiple_writers = rrddim_add(st_mrg_metrics, "multi-collected", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
|
||||
}
|
||||
priority++;
|
||||
|
||||
rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_metrics, (collected_number)mrg_stats.entries);
|
||||
rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_acquired, (collected_number)mrg_stats.entries_referenced);
|
||||
rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_collected, (collected_number)mrg_stats.writers);
|
||||
rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_with_retention, (collected_number)mrg_stats.entries_with_retention);
|
||||
rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_without_retention, (collected_number)mrg_stats.entries - (collected_number)mrg_stats.entries_with_retention);
|
||||
rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_multiple_writers, (collected_number)mrg_stats.writers_conflicts);
|
||||
|
||||
rrdset_done(st_mrg_metrics);
|
||||
}
|
||||
|
||||
{
|
||||
static RRDSET *st_mrg_ops = NULL;
|
||||
static RRDDIM *rd_mrg_add = NULL;
|
||||
static RRDDIM *rd_mrg_del = NULL;
|
||||
static RRDDIM *rd_mrg_search = NULL;
|
||||
|
||||
if (unlikely(!st_mrg_ops)) {
|
||||
st_mrg_ops = rrdset_create_localhost(
|
||||
"netdata",
|
||||
"dbengine_metrics_registry_operations",
|
||||
NULL,
|
||||
"dbengine metrics",
|
||||
NULL,
|
||||
"Netdata Metrics Registry Operations",
|
||||
"metrics",
|
||||
"netdata",
|
||||
"stats",
|
||||
priority,
|
||||
localhost->rrd_update_every,
|
||||
RRDSET_TYPE_LINE);
|
||||
|
||||
rd_mrg_add = rrddim_add(st_mrg_ops, "add", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
|
||||
rd_mrg_del = rrddim_add(st_mrg_ops, "delete", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
|
||||
rd_mrg_search = rrddim_add(st_mrg_ops, "search", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
|
||||
}
|
||||
priority++;
|
||||
|
||||
rrddim_set_by_pointer(st_mrg_ops, rd_mrg_add, (collected_number)mrg_stats.additions);
|
||||
rrddim_set_by_pointer(st_mrg_ops, rd_mrg_del, (collected_number)mrg_stats.deletions);
|
||||
rrddim_set_by_pointer(st_mrg_ops, rd_mrg_search, (collected_number)mrg_stats.search_hits + (collected_number)mrg_stats.search_misses);
|
||||
|
||||
rrdset_done(st_mrg_ops);
|
||||
}
|
||||
|
||||
{
|
||||
static RRDSET *st_mrg_references = NULL;
|
||||
static RRDDIM *rd_mrg_references = NULL;
|
||||
|
||||
if (unlikely(!st_mrg_references)) {
|
||||
st_mrg_references = rrdset_create_localhost(
|
||||
"netdata",
|
||||
"dbengine_metrics_registry_references",
|
||||
NULL,
|
||||
"dbengine metrics",
|
||||
NULL,
|
||||
"Netdata Metrics Registry References",
|
||||
"references",
|
||||
"netdata",
|
||||
"stats",
|
||||
priority,
|
||||
localhost->rrd_update_every,
|
||||
RRDSET_TYPE_LINE);
|
||||
|
||||
rd_mrg_references = rrddim_add(st_mrg_references, "references", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
|
||||
}
|
||||
priority++;
|
||||
|
||||
rrddim_set_by_pointer(st_mrg_references, rd_mrg_references, (collected_number)mrg_stats.current_references);
|
||||
|
||||
rrdset_done(st_mrg_references);
|
||||
}
|
||||
|
||||
{
|
||||
static RRDSET *st_cache_hit_ratio = NULL;
|
||||
static RRDDIM *rd_hit_ratio = NULL;
|
||||
|
@ -3817,7 +3931,7 @@ static void workers_threads_cleanup(struct worker_utilization *wu) {
|
|||
|
||||
if(!t->enabled) {
|
||||
JudyLDel(&workers_by_pid_JudyL_array, t->pid, PJE0);
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(wu->threads, t, prev, next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(wu->threads, t, prev, next);
|
||||
freez(t);
|
||||
}
|
||||
t = next;
|
||||
|
@ -3844,7 +3958,7 @@ static struct worker_thread *worker_thread_create(struct worker_utilization *wu,
|
|||
*PValue = wt;
|
||||
|
||||
// link it
|
||||
DOUBLE_LINKED_LIST_APPEND_UNSAFE(wu->threads, wt, prev, next);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(wu->threads, wt, prev, next);
|
||||
|
||||
return wt;
|
||||
}
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
typedef int32_t REFCOUNT;
|
||||
#define REFCOUNT_DELETING (-100)
|
||||
|
||||
// to use arrayalloc uncomment the following line:
|
||||
// to use ARAL uncomment the following line:
|
||||
#define PGC_WITH_ARAL 1
|
||||
|
||||
typedef enum __attribute__ ((__packed__)) {
|
||||
|
@ -82,6 +82,8 @@ struct pgc_linked_list {
|
|||
|
||||
struct pgc {
|
||||
struct {
|
||||
char name[PGC_NAME_MAX + 1];
|
||||
|
||||
size_t partitions;
|
||||
size_t clean_size;
|
||||
size_t max_dirty_pages_per_call;
|
||||
|
@ -415,13 +417,25 @@ struct section_pages {
|
|||
PGC_PAGE *base;
|
||||
};
|
||||
|
||||
static ARAL section_pages_aral = {
|
||||
.filename = NULL,
|
||||
.cache_dir = NULL,
|
||||
.use_mmap = false,
|
||||
.initial_elements = 16384 / sizeof(struct section_pages),
|
||||
.requested_element_size = sizeof(struct section_pages),
|
||||
};
|
||||
static ARAL *pgc_section_pages_aral = NULL;
|
||||
static void pgc_section_pages_static_aral_init(void) {
|
||||
static SPINLOCK spinlock = NETDATA_SPINLOCK_INITIALIZER;
|
||||
|
||||
if(unlikely(!pgc_section_pages_aral)) {
|
||||
netdata_spinlock_lock(&spinlock);
|
||||
|
||||
// we have to check again
|
||||
if(!pgc_section_pages_aral)
|
||||
pgc_section_pages_aral = aral_create(
|
||||
"pgc_section",
|
||||
sizeof(struct section_pages),
|
||||
0,
|
||||
4096,
|
||||
NULL, NULL, false, false);
|
||||
|
||||
netdata_spinlock_unlock(&spinlock);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void pgc_stats_ll_judy_change(PGC *cache, struct pgc_linked_list *ll, size_t mem_before_judyl, size_t mem_after_judyl) {
|
||||
if(mem_after_judyl > mem_before_judyl) {
|
||||
|
@ -462,7 +476,7 @@ static void pgc_ll_add(PGC *cache __maybe_unused, struct pgc_linked_list *ll, PG
|
|||
struct section_pages *sp = *section_pages_pptr;
|
||||
if(!sp) {
|
||||
// sp = callocz(1, sizeof(struct section_pages));
|
||||
sp = arrayalloc_mallocz(§ion_pages_aral);
|
||||
sp = aral_mallocz(pgc_section_pages_aral);
|
||||
memset(sp, 0, sizeof(struct section_pages));
|
||||
|
||||
*section_pages_pptr = sp;
|
||||
|
@ -473,7 +487,7 @@ static void pgc_ll_add(PGC *cache __maybe_unused, struct pgc_linked_list *ll, PG
|
|||
|
||||
sp->entries++;
|
||||
sp->size += page->assumed_size;
|
||||
DOUBLE_LINKED_LIST_APPEND_UNSAFE(sp->base, page, link.prev, link.next);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(sp->base, page, link.prev, link.next);
|
||||
|
||||
if((sp->entries % cache->config.max_dirty_pages_per_call) == 0)
|
||||
ll->version++;
|
||||
|
@ -484,11 +498,11 @@ static void pgc_ll_add(PGC *cache __maybe_unused, struct pgc_linked_list *ll, PG
|
|||
// - DIRTY pages made CLEAN, depending on their accesses may be appended (accesses > 0) or prepended (accesses = 0).
|
||||
|
||||
if(page->accesses || page_flag_check(page, PGC_PAGE_HAS_BEEN_ACCESSED | PGC_PAGE_HAS_NO_DATA_IGNORE_ACCESSES) == PGC_PAGE_HAS_BEEN_ACCESSED) {
|
||||
DOUBLE_LINKED_LIST_APPEND_UNSAFE(ll->base, page, link.prev, link.next);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(ll->base, page, link.prev, link.next);
|
||||
page_flag_clear(page, PGC_PAGE_HAS_BEEN_ACCESSED);
|
||||
}
|
||||
else
|
||||
DOUBLE_LINKED_LIST_PREPEND_UNSAFE(ll->base, page, link.prev, link.next);
|
||||
DOUBLE_LINKED_LIST_PREPEND_ITEM_UNSAFE(ll->base, page, link.prev, link.next);
|
||||
|
||||
ll->version++;
|
||||
}
|
||||
|
@ -530,7 +544,7 @@ static void pgc_ll_del(PGC *cache __maybe_unused, struct pgc_linked_list *ll, PG
|
|||
struct section_pages *sp = *section_pages_pptr;
|
||||
sp->entries--;
|
||||
sp->size -= page->assumed_size;
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(sp->base, page, link.prev, link.next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(sp->base, page, link.prev, link.next);
|
||||
|
||||
if(!sp->base) {
|
||||
size_t mem_before_judyl, mem_after_judyl;
|
||||
|
@ -543,13 +557,13 @@ static void pgc_ll_del(PGC *cache __maybe_unused, struct pgc_linked_list *ll, PG
|
|||
fatal("DBENGINE CACHE: cannot delete section from Judy LL");
|
||||
|
||||
// freez(sp);
|
||||
arrayalloc_freez(§ion_pages_aral, sp);
|
||||
aral_freez(pgc_section_pages_aral, sp);
|
||||
mem_after_judyl -= sizeof(struct section_pages);
|
||||
pgc_stats_ll_judy_change(cache, ll, mem_before_judyl, mem_after_judyl);
|
||||
}
|
||||
}
|
||||
else {
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(ll->base, page, link.prev, link.next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ll->base, page, link.prev, link.next);
|
||||
ll->version++;
|
||||
}
|
||||
|
||||
|
@ -565,8 +579,8 @@ static inline void page_has_been_accessed(PGC *cache, PGC_PAGE *page) {
|
|||
|
||||
if (flags & PGC_PAGE_CLEAN) {
|
||||
if(pgc_ll_trylock(cache, &cache->clean)) {
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(cache->clean.base, page, link.prev, link.next);
|
||||
DOUBLE_LINKED_LIST_APPEND_UNSAFE(cache->clean.base, page, link.prev, link.next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(cache->clean.base, page, link.prev, link.next);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(cache->clean.base, page, link.prev, link.next);
|
||||
pgc_ll_unlock(cache, &cache->clean);
|
||||
page_flag_clear(page, PGC_PAGE_HAS_BEEN_ACCESSED);
|
||||
}
|
||||
|
@ -860,7 +874,7 @@ static inline void free_this_page(PGC *cache, PGC_PAGE *page) {
|
|||
|
||||
// free our memory
|
||||
#ifdef PGC_WITH_ARAL
|
||||
arrayalloc_freez(cache->aral, page);
|
||||
aral_freez(cache->aral, page);
|
||||
#else
|
||||
freez(page);
|
||||
#endif
|
||||
|
@ -1038,8 +1052,8 @@ static bool evict_pages_with_filter(PGC *cache, size_t max_skip, size_t max_evic
|
|||
break;
|
||||
|
||||
if(unlikely(page_flag_check(page, PGC_PAGE_HAS_BEEN_ACCESSED | PGC_PAGE_HAS_NO_DATA_IGNORE_ACCESSES) == PGC_PAGE_HAS_BEEN_ACCESSED)) {
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(cache->clean.base, page, link.prev, link.next);
|
||||
DOUBLE_LINKED_LIST_APPEND_UNSAFE(cache->clean.base, page, link.prev, link.next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(cache->clean.base, page, link.prev, link.next);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(cache->clean.base, page, link.prev, link.next);
|
||||
page_flag_clear(page, PGC_PAGE_HAS_BEEN_ACCESSED);
|
||||
continue;
|
||||
}
|
||||
|
@ -1056,7 +1070,7 @@ static bool evict_pages_with_filter(PGC *cache, size_t max_skip, size_t max_evic
|
|||
__atomic_add_fetch(&cache->stats.evicting_entries, 1, __ATOMIC_RELAXED);
|
||||
__atomic_add_fetch(&cache->stats.evicting_size, page->assumed_size, __ATOMIC_RELAXED);
|
||||
|
||||
DOUBLE_LINKED_LIST_APPEND_UNSAFE(pages_to_evict, page, link.prev, link.next);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(pages_to_evict, page, link.prev, link.next);
|
||||
|
||||
pages_to_evict_size += page->assumed_size;
|
||||
|
||||
|
@ -1073,8 +1087,8 @@ static bool evict_pages_with_filter(PGC *cache, size_t max_skip, size_t max_evic
|
|||
if(!first_page_we_relocated)
|
||||
first_page_we_relocated = page;
|
||||
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(cache->clean.base, page, link.prev, link.next);
|
||||
DOUBLE_LINKED_LIST_APPEND_UNSAFE(cache->clean.base, page, link.prev, link.next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(cache->clean.base, page, link.prev, link.next);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(cache->clean.base, page, link.prev, link.next);
|
||||
|
||||
// check if we have to stop
|
||||
if(unlikely(++total_pages_skipped >= max_skip && !all_of_them)) {
|
||||
|
@ -1099,8 +1113,8 @@ static bool evict_pages_with_filter(PGC *cache, size_t max_skip, size_t max_evic
|
|||
next = page->link.next;
|
||||
|
||||
size_t partition = pgc_indexing_partition(cache, page->metric_id);
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(pages_to_evict, page, link.prev, link.next);
|
||||
DOUBLE_LINKED_LIST_APPEND_UNSAFE(pages_per_partition[partition], page, link.prev, link.next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(pages_to_evict, page, link.prev, link.next);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(pages_per_partition[partition], page, link.prev, link.next);
|
||||
}
|
||||
|
||||
// remove them from the index
|
||||
|
@ -1178,7 +1192,7 @@ static PGC_PAGE *page_add(PGC *cache, PGC_ENTRY *entry, bool *added) {
|
|||
__atomic_add_fetch(&cache->stats.workers_add, 1, __ATOMIC_RELAXED);
|
||||
|
||||
#ifdef PGC_WITH_ARAL
|
||||
PGC_PAGE *allocation = arrayalloc_mallocz(cache->aral);
|
||||
PGC_PAGE *allocation = aral_mallocz(cache->aral);
|
||||
#endif
|
||||
PGC_PAGE *page;
|
||||
size_t spins = 0;
|
||||
|
@ -1285,7 +1299,7 @@ static PGC_PAGE *page_add(PGC *cache, PGC_ENTRY *entry, bool *added) {
|
|||
|
||||
#ifdef PGC_WITH_ARAL
|
||||
if(allocation)
|
||||
arrayalloc_freez(cache->aral, allocation);
|
||||
aral_freez(cache->aral, allocation);
|
||||
#endif
|
||||
|
||||
__atomic_sub_fetch(&cache->stats.workers_add, 1, __ATOMIC_RELAXED);
|
||||
|
@ -1713,7 +1727,8 @@ void free_all_unreferenced_clean_pages(PGC *cache) {
|
|||
// ----------------------------------------------------------------------------
|
||||
// public API
|
||||
|
||||
PGC *pgc_create(size_t clean_size_bytes, free_clean_page_callback pgc_free_cb,
|
||||
PGC *pgc_create(const char *name,
|
||||
size_t clean_size_bytes, free_clean_page_callback pgc_free_cb,
|
||||
size_t max_dirty_pages_per_flush,
|
||||
save_dirty_init_callback pgc_save_init_cb,
|
||||
save_dirty_page_callback pgc_save_dirty_cb,
|
||||
|
@ -1732,6 +1747,7 @@ PGC *pgc_create(size_t clean_size_bytes, free_clean_page_callback pgc_free_cb,
|
|||
max_flushes_inline = 2;
|
||||
|
||||
PGC *cache = callocz(1, sizeof(PGC));
|
||||
strncpyz(cache->config.name, name, PGC_NAME_MAX);
|
||||
cache->config.options = options;
|
||||
cache->config.clean_size = (clean_size_bytes < 1 * 1024 * 1024) ? 1 * 1024 * 1024 : clean_size_bytes;
|
||||
cache->config.pgc_free_clean_cb = pgc_free_cb;
|
||||
|
@ -1772,10 +1788,14 @@ PGC *pgc_create(size_t clean_size_bytes, free_clean_page_callback pgc_free_cb,
|
|||
cache->clean.stats = &cache->stats.queues.clean;
|
||||
|
||||
#ifdef PGC_WITH_ARAL
|
||||
cache->aral = arrayalloc_create(sizeof(PGC_PAGE) + cache->config.additional_bytes_per_page, 65536 / sizeof(PGC_PAGE),
|
||||
NULL, NULL, false, false);
|
||||
cache->aral = aral_create(name,
|
||||
sizeof(PGC_PAGE) + cache->config.additional_bytes_per_page,
|
||||
0,
|
||||
4096,
|
||||
NULL, NULL, false, false);
|
||||
#endif
|
||||
|
||||
pgc_section_pages_static_aral_init();
|
||||
pointer_index_init(cache);
|
||||
|
||||
return cache;
|
||||
|
@ -1803,7 +1823,7 @@ void pgc_destroy(PGC *cache) {
|
|||
else {
|
||||
pointer_destroy_index(cache);
|
||||
#ifdef PGC_WITH_ARAL
|
||||
arrayalloc_destroy(cache->aral);
|
||||
aral_destroy(cache->aral);
|
||||
#endif
|
||||
freez(cache);
|
||||
}
|
||||
|
@ -2602,7 +2622,8 @@ void unittest_stress_test(void) {
|
|||
#endif
|
||||
|
||||
int pgc_unittest(void) {
|
||||
PGC *cache = pgc_create(32 * 1024 * 1024, unittest_free_clean_page_callback,
|
||||
PGC *cache = pgc_create("test",
|
||||
32 * 1024 * 1024, unittest_free_clean_page_callback,
|
||||
64, NULL, unittest_save_dirty_page_callback,
|
||||
10, 10, 1000, 10,
|
||||
PGC_OPTIONS_DEFAULT, 1, 11);
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
|
||||
typedef struct pgc PGC;
|
||||
typedef struct pgc_page PGC_PAGE;
|
||||
#define PGC_NAME_MAX 23
|
||||
|
||||
typedef enum __attribute__ ((__packed__)) {
|
||||
PGC_OPTIONS_NONE = 0,
|
||||
|
@ -165,7 +166,8 @@ typedef void (*free_clean_page_callback)(PGC *cache, PGC_ENTRY entry);
|
|||
typedef void (*save_dirty_page_callback)(PGC *cache, PGC_ENTRY *entries_array, PGC_PAGE **pages_array, size_t entries);
|
||||
typedef void (*save_dirty_init_callback)(PGC *cache, Word_t section);
|
||||
// create a cache
|
||||
PGC *pgc_create(size_t clean_size_bytes, free_clean_page_callback pgc_free_clean_cb,
|
||||
PGC *pgc_create(const char *name,
|
||||
size_t clean_size_bytes, free_clean_page_callback pgc_free_clean_cb,
|
||||
size_t max_dirty_pages_per_flush, save_dirty_init_callback pgc_save_init_cb, save_dirty_page_callback pgc_save_dirty_cb,
|
||||
size_t max_pages_per_inline_eviction, size_t max_inline_evictors,
|
||||
size_t max_skip_pages_per_inline_eviction,
|
||||
|
|
|
@ -4,13 +4,13 @@
|
|||
void datafile_list_insert(struct rrdengine_instance *ctx, struct rrdengine_datafile *datafile)
|
||||
{
|
||||
uv_rwlock_wrlock(&ctx->datafiles.rwlock);
|
||||
DOUBLE_LINKED_LIST_APPEND_UNSAFE(ctx->datafiles.first, datafile, prev, next);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(ctx->datafiles.first, datafile, prev, next);
|
||||
uv_rwlock_wrunlock(&ctx->datafiles.rwlock);
|
||||
}
|
||||
|
||||
void datafile_list_delete_unsafe(struct rrdengine_instance *ctx, struct rrdengine_datafile *datafile)
|
||||
{
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(ctx->datafiles.first, datafile, prev, next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ctx->datafiles.first, datafile, prev, next);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -605,7 +605,9 @@ static void journalfile_restore_extent_metadata(struct rrdengine_instance *ctx,
|
|||
false);
|
||||
|
||||
if(!vd.is_valid) {
|
||||
mrg_metric_release(main_mrg, metric);
|
||||
if(metric)
|
||||
mrg_metric_release(main_mrg, metric);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
|
@ -3,6 +3,10 @@
|
|||
typedef int32_t REFCOUNT;
|
||||
#define REFCOUNT_DELETING (-100)
|
||||
|
||||
typedef enum __attribute__ ((__packed__)) {
|
||||
METRIC_FLAG_HAS_RETENTION = (1 << 0),
|
||||
} METRIC_FLAGS;
|
||||
|
||||
struct metric {
|
||||
uuid_t uuid; // never changes
|
||||
Word_t section; // never changes
|
||||
|
@ -12,6 +16,8 @@ struct metric {
|
|||
time_t latest_time_s_hot; // latest time of the currently collected page
|
||||
uint32_t latest_update_every_s; //
|
||||
pid_t writer;
|
||||
METRIC_FLAGS flags;
|
||||
REFCOUNT refcount;
|
||||
SPINLOCK spinlock; // protects all variable members
|
||||
|
||||
// THIS IS allocated with malloc()
|
||||
|
@ -19,8 +25,9 @@ struct metric {
|
|||
};
|
||||
|
||||
struct mrg {
|
||||
ARAL *aral[MRG_PARTITIONS];
|
||||
|
||||
struct pgc_index {
|
||||
ARAL *aral;
|
||||
netdata_rwlock_t rwlock;
|
||||
Pvoid_t uuid_judy; // each UUID has a JudyL of sections (tiers)
|
||||
} index[MRG_PARTITIONS];
|
||||
|
@ -95,9 +102,75 @@ static inline size_t uuid_partition(MRG *mrg __maybe_unused, uuid_t *uuid) {
|
|||
return u[UUID_SZ - 1] % MRG_PARTITIONS;
|
||||
}
|
||||
|
||||
static METRIC *metric_add(MRG *mrg, MRG_ENTRY *entry, bool *ret) {
|
||||
static inline bool metric_has_retention_unsafe(MRG *mrg __maybe_unused, METRIC *metric) {
|
||||
bool has_retention = (metric->first_time_s || metric->latest_time_s_clean || metric->latest_time_s_hot);
|
||||
|
||||
if(has_retention && !(metric->flags & METRIC_FLAG_HAS_RETENTION)) {
|
||||
metric->flags |= METRIC_FLAG_HAS_RETENTION;
|
||||
__atomic_add_fetch(&mrg->stats.entries_with_retention, 1, __ATOMIC_RELAXED);
|
||||
}
|
||||
else if(!has_retention && (metric->flags & METRIC_FLAG_HAS_RETENTION)) {
|
||||
metric->flags &= ~METRIC_FLAG_HAS_RETENTION;
|
||||
__atomic_sub_fetch(&mrg->stats.entries_with_retention, 1, __ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
return has_retention;
|
||||
}
|
||||
|
||||
static inline REFCOUNT metric_acquire(MRG *mrg __maybe_unused, METRIC *metric, bool having_spinlock) {
|
||||
REFCOUNT refcount;
|
||||
|
||||
if(!having_spinlock)
|
||||
netdata_spinlock_lock(&metric->spinlock);
|
||||
|
||||
if(unlikely(metric->refcount < 0))
|
||||
fatal("METRIC: refcount is %d (negative) during acquire", metric->refcount);
|
||||
|
||||
refcount = ++metric->refcount;
|
||||
|
||||
// update its retention flags
|
||||
metric_has_retention_unsafe(mrg, metric);
|
||||
|
||||
if(!having_spinlock)
|
||||
netdata_spinlock_unlock(&metric->spinlock);
|
||||
|
||||
if(refcount == 1)
|
||||
__atomic_add_fetch(&mrg->stats.entries_referenced, 1, __ATOMIC_RELAXED);
|
||||
|
||||
__atomic_add_fetch(&mrg->stats.current_references, 1, __ATOMIC_RELAXED);
|
||||
|
||||
return refcount;
|
||||
}
|
||||
|
||||
static inline bool metric_release_and_can_be_deleted(MRG *mrg __maybe_unused, METRIC *metric) {
|
||||
bool ret = true;
|
||||
REFCOUNT refcount;
|
||||
|
||||
netdata_spinlock_lock(&metric->spinlock);
|
||||
|
||||
if(unlikely(metric->refcount <= 0))
|
||||
fatal("METRIC: refcount is %d (zero or negative) during release", metric->refcount);
|
||||
|
||||
refcount = --metric->refcount;
|
||||
|
||||
if(likely(metric_has_retention_unsafe(mrg, metric) || refcount != 0))
|
||||
ret = false;
|
||||
|
||||
netdata_spinlock_unlock(&metric->spinlock);
|
||||
|
||||
if(unlikely(!refcount))
|
||||
__atomic_sub_fetch(&mrg->stats.entries_referenced, 1, __ATOMIC_RELAXED);
|
||||
|
||||
__atomic_sub_fetch(&mrg->stats.current_references, 1, __ATOMIC_RELAXED);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static METRIC *metric_add_and_acquire(MRG *mrg, MRG_ENTRY *entry, bool *ret) {
|
||||
size_t partition = uuid_partition(mrg, &entry->uuid);
|
||||
|
||||
METRIC *allocation = aral_mallocz(mrg->aral[partition]);
|
||||
|
||||
mrg_index_write_lock(mrg, partition);
|
||||
|
||||
size_t mem_before_judyl, mem_after_judyl;
|
||||
|
@ -117,18 +190,22 @@ static METRIC *metric_add(MRG *mrg, MRG_ENTRY *entry, bool *ret) {
|
|||
if(unlikely(!PValue || PValue == PJERR))
|
||||
fatal("DBENGINE METRIC: corrupted section JudyL array");
|
||||
|
||||
if(*PValue != NULL) {
|
||||
if(unlikely(*PValue != NULL)) {
|
||||
METRIC *metric = *PValue;
|
||||
|
||||
metric_acquire(mrg, metric, false);
|
||||
mrg_index_write_unlock(mrg, partition);
|
||||
|
||||
if(ret)
|
||||
*ret = false;
|
||||
|
||||
aral_freez(mrg->aral[partition], allocation);
|
||||
|
||||
MRG_STATS_DUPLICATE_ADD(mrg);
|
||||
return metric;
|
||||
}
|
||||
|
||||
METRIC *metric = arrayalloc_mallocz(mrg->index[partition].aral);
|
||||
METRIC *metric = allocation;
|
||||
uuid_copy(metric->uuid, entry->uuid);
|
||||
metric->section = entry->section;
|
||||
metric->first_time_s = entry->first_time_s;
|
||||
|
@ -136,7 +213,10 @@ static METRIC *metric_add(MRG *mrg, MRG_ENTRY *entry, bool *ret) {
|
|||
metric->latest_time_s_hot = 0;
|
||||
metric->latest_update_every_s = entry->latest_update_every_s;
|
||||
metric->writer = 0;
|
||||
metric->refcount = 0;
|
||||
metric->flags = 0;
|
||||
netdata_spinlock_init(&metric->spinlock);
|
||||
metric_acquire(mrg, metric, true); // no spinlock use required here
|
||||
*PValue = metric;
|
||||
|
||||
mrg_index_write_unlock(mrg, partition);
|
||||
|
@ -149,7 +229,7 @@ static METRIC *metric_add(MRG *mrg, MRG_ENTRY *entry, bool *ret) {
|
|||
return metric;
|
||||
}
|
||||
|
||||
static METRIC *metric_get(MRG *mrg, uuid_t *uuid, Word_t section) {
|
||||
static METRIC *metric_get_and_acquire(MRG *mrg, uuid_t *uuid, Word_t section) {
|
||||
size_t partition = uuid_partition(mrg, uuid);
|
||||
|
||||
mrg_index_read_lock(mrg, partition);
|
||||
|
@ -170,19 +250,27 @@ static METRIC *metric_get(MRG *mrg, uuid_t *uuid, Word_t section) {
|
|||
|
||||
METRIC *metric = *PValue;
|
||||
|
||||
metric_acquire(mrg, metric, false);
|
||||
|
||||
mrg_index_read_unlock(mrg, partition);
|
||||
|
||||
MRG_STATS_SEARCH_HIT(mrg);
|
||||
return metric;
|
||||
}
|
||||
|
||||
static bool metric_del(MRG *mrg, METRIC *metric) {
|
||||
static bool acquired_metric_del(MRG *mrg, METRIC *metric) {
|
||||
size_t partition = uuid_partition(mrg, &metric->uuid);
|
||||
|
||||
size_t mem_before_judyl, mem_after_judyl;
|
||||
|
||||
mrg_index_write_lock(mrg, partition);
|
||||
|
||||
if(!metric_release_and_can_be_deleted(mrg, metric)) {
|
||||
mrg_index_write_unlock(mrg, partition);
|
||||
__atomic_add_fetch(&mrg->stats.delete_having_retention_or_referenced, 1, __ATOMIC_RELAXED);
|
||||
return false;
|
||||
}
|
||||
|
||||
Pvoid_t *sections_judy_pptr = JudyHSGet(mrg->index[partition].uuid_judy, &metric->uuid, sizeof(uuid_t));
|
||||
if(unlikely(!sections_judy_pptr || !*sections_judy_pptr)) {
|
||||
mrg_index_write_unlock(mrg, partition);
|
||||
|
@ -208,11 +296,10 @@ static bool metric_del(MRG *mrg, METRIC *metric) {
|
|||
mrg_stats_size_judyhs_removed_uuid(mrg);
|
||||
}
|
||||
|
||||
// arrayalloc is running lockless here
|
||||
arrayalloc_freez(mrg->index[partition].aral, metric);
|
||||
|
||||
mrg_index_write_unlock(mrg, partition);
|
||||
|
||||
aral_freez(mrg->aral[partition], metric);
|
||||
|
||||
MRG_STATS_DELETED_METRIC(mrg, partition);
|
||||
|
||||
return true;
|
||||
|
@ -223,11 +310,22 @@ static bool metric_del(MRG *mrg, METRIC *metric) {
|
|||
|
||||
MRG *mrg_create(void) {
|
||||
MRG *mrg = callocz(1, sizeof(MRG));
|
||||
|
||||
for(size_t i = 0; i < MRG_PARTITIONS ; i++) {
|
||||
char buf[ARAL_MAX_NAME + 1];
|
||||
snprintfz(buf, ARAL_MAX_NAME, "mrg[%zu]", i);
|
||||
netdata_rwlock_init(&mrg->index[i].rwlock);
|
||||
mrg->index[i].aral = arrayalloc_create(sizeof(METRIC), 32768 / sizeof(METRIC), NULL, NULL, false, true);
|
||||
|
||||
mrg->aral[i] = aral_create("mrg",
|
||||
sizeof(METRIC),
|
||||
0,
|
||||
512,
|
||||
NULL, NULL, false,
|
||||
false);
|
||||
}
|
||||
|
||||
mrg->stats.size = sizeof(MRG);
|
||||
|
||||
return mrg;
|
||||
}
|
||||
|
||||
|
@ -242,32 +340,27 @@ void mrg_destroy(MRG *mrg __maybe_unused) {
|
|||
}
|
||||
|
||||
METRIC *mrg_metric_add_and_acquire(MRG *mrg, MRG_ENTRY entry, bool *ret) {
|
||||
// FIXME - support refcount
|
||||
|
||||
// internal_fatal(entry.latest_time_s > max_acceptable_collected_time(),
|
||||
// "DBENGINE METRIC: metric latest time is in the future");
|
||||
|
||||
return metric_add(mrg, &entry, ret);
|
||||
return metric_add_and_acquire(mrg, &entry, ret);
|
||||
}
|
||||
|
||||
METRIC *mrg_metric_get_and_acquire(MRG *mrg, uuid_t *uuid, Word_t section) {
|
||||
// FIXME - support refcount
|
||||
return metric_get(mrg, uuid, section);
|
||||
return metric_get_and_acquire(mrg, uuid, section);
|
||||
}
|
||||
|
||||
bool mrg_metric_release_and_delete(MRG *mrg, METRIC *metric) {
|
||||
// FIXME - support refcount
|
||||
return metric_del(mrg, metric);
|
||||
return acquired_metric_del(mrg, metric);
|
||||
}
|
||||
|
||||
METRIC *mrg_metric_dup(MRG *mrg __maybe_unused, METRIC *metric) {
|
||||
// FIXME - duplicate refcount
|
||||
METRIC *mrg_metric_dup(MRG *mrg, METRIC *metric) {
|
||||
metric_acquire(mrg, metric, false);
|
||||
return metric;
|
||||
}
|
||||
|
||||
void mrg_metric_release(MRG *mrg __maybe_unused, METRIC *metric __maybe_unused) {
|
||||
// FIXME - release refcount
|
||||
|
||||
bool mrg_metric_release(MRG *mrg, METRIC *metric) {
|
||||
return metric_release_and_can_be_deleted(mrg, metric);
|
||||
}
|
||||
|
||||
Word_t mrg_metric_id(MRG *mrg __maybe_unused, METRIC *metric) {
|
||||
|
@ -285,6 +378,7 @@ Word_t mrg_metric_section(MRG *mrg __maybe_unused, METRIC *metric) {
|
|||
bool mrg_metric_set_first_time_s(MRG *mrg __maybe_unused, METRIC *metric, time_t first_time_s) {
|
||||
netdata_spinlock_lock(&metric->spinlock);
|
||||
metric->first_time_s = first_time_s;
|
||||
metric_has_retention_unsafe(mrg, metric);
|
||||
netdata_spinlock_unlock(&metric->spinlock);
|
||||
|
||||
return true;
|
||||
|
@ -311,6 +405,7 @@ void mrg_metric_expand_retention(MRG *mrg __maybe_unused, METRIC *metric, time_t
|
|||
else if(unlikely(!metric->latest_update_every_s && update_every_s))
|
||||
metric->latest_update_every_s = update_every_s;
|
||||
|
||||
metric_has_retention_unsafe(mrg, metric);
|
||||
netdata_spinlock_unlock(&metric->spinlock);
|
||||
}
|
||||
|
||||
|
@ -322,6 +417,7 @@ bool mrg_metric_set_first_time_s_if_bigger(MRG *mrg __maybe_unused, METRIC *metr
|
|||
metric->first_time_s = first_time_s;
|
||||
ret = true;
|
||||
}
|
||||
metric_has_retention_unsafe(mrg, metric);
|
||||
netdata_spinlock_unlock(&metric->spinlock);
|
||||
|
||||
return ret;
|
||||
|
@ -382,10 +478,63 @@ bool mrg_metric_set_clean_latest_time_s(MRG *mrg __maybe_unused, METRIC *metric,
|
|||
// if(unlikely(metric->first_time_s > latest_time_s))
|
||||
// metric->first_time_s = latest_time_s;
|
||||
|
||||
metric_has_retention_unsafe(mrg, metric);
|
||||
netdata_spinlock_unlock(&metric->spinlock);
|
||||
return true;
|
||||
}
|
||||
|
||||
// returns true when metric still has retention
|
||||
bool mrg_metric_zero_disk_retention(MRG *mrg __maybe_unused, METRIC *metric) {
|
||||
Word_t section = mrg_metric_section(mrg, metric);
|
||||
bool do_again = false;
|
||||
size_t countdown = 5;
|
||||
bool ret = true;
|
||||
|
||||
do {
|
||||
time_t min_first_time_s = LONG_MAX;
|
||||
time_t max_end_time_s = 0;
|
||||
PGC_PAGE *page;
|
||||
PGC_SEARCH method = PGC_SEARCH_FIRST;
|
||||
time_t page_first_time_s = 0;
|
||||
time_t page_end_time_s = 0;
|
||||
while ((page = pgc_page_get_and_acquire(main_cache, section, (Word_t)metric, page_first_time_s, method))) {
|
||||
method = PGC_SEARCH_NEXT;
|
||||
|
||||
bool is_hot = pgc_is_page_hot(page);
|
||||
bool is_dirty = pgc_is_page_dirty(page);
|
||||
page_first_time_s = pgc_page_start_time_s(page);
|
||||
page_end_time_s = pgc_page_end_time_s(page);
|
||||
|
||||
if ((is_hot || is_dirty) && page_first_time_s < min_first_time_s)
|
||||
min_first_time_s = page_first_time_s;
|
||||
|
||||
if (is_dirty && page_end_time_s > max_end_time_s)
|
||||
max_end_time_s = page_end_time_s;
|
||||
|
||||
pgc_page_release(main_cache, page);
|
||||
}
|
||||
|
||||
if (min_first_time_s == LONG_MAX)
|
||||
min_first_time_s = 0;
|
||||
|
||||
netdata_spinlock_lock(&metric->spinlock);
|
||||
if (--countdown && !min_first_time_s && metric->latest_time_s_hot)
|
||||
do_again = true;
|
||||
else {
|
||||
internal_error(!countdown, "METRIC: giving up on updating the retention of metric without disk retention");
|
||||
|
||||
do_again = false;
|
||||
metric->first_time_s = min_first_time_s;
|
||||
metric->latest_time_s_clean = max_end_time_s;
|
||||
|
||||
ret = metric_has_retention_unsafe(mrg, metric);
|
||||
}
|
||||
netdata_spinlock_unlock(&metric->spinlock);
|
||||
} while(do_again);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool mrg_metric_set_hot_latest_time_s(MRG *mrg __maybe_unused, METRIC *metric, time_t latest_time_s) {
|
||||
// internal_fatal(latest_time_s > max_acceptable_collected_time(),
|
||||
// "DBENGINE METRIC: metric latest time is in the future");
|
||||
|
@ -399,6 +548,7 @@ bool mrg_metric_set_hot_latest_time_s(MRG *mrg __maybe_unused, METRIC *metric, t
|
|||
// if(unlikely(metric->first_time_s > latest_time_s))
|
||||
// metric->first_time_s = latest_time_s;
|
||||
|
||||
metric_has_retention_unsafe(mrg, metric);
|
||||
netdata_spinlock_unlock(&metric->spinlock);
|
||||
return true;
|
||||
}
|
||||
|
@ -444,7 +594,7 @@ time_t mrg_metric_get_update_every_s(MRG *mrg __maybe_unused, METRIC *metric) {
|
|||
return update_every_s;
|
||||
}
|
||||
|
||||
bool mrg_metric_writer_acquire(MRG *mrg, METRIC *metric) {
|
||||
bool mrg_metric_set_writer(MRG *mrg, METRIC *metric) {
|
||||
bool done = false;
|
||||
netdata_spinlock_lock(&metric->spinlock);
|
||||
if(!metric->writer) {
|
||||
|
@ -452,11 +602,13 @@ bool mrg_metric_writer_acquire(MRG *mrg, METRIC *metric) {
|
|||
__atomic_add_fetch(&mrg->stats.writers, 1, __ATOMIC_RELAXED);
|
||||
done = true;
|
||||
}
|
||||
else
|
||||
__atomic_add_fetch(&mrg->stats.writers_conflicts, 1, __ATOMIC_RELAXED);
|
||||
netdata_spinlock_unlock(&metric->spinlock);
|
||||
return done;
|
||||
}
|
||||
|
||||
bool mrg_metric_writer_release(MRG *mrg, METRIC *metric) {
|
||||
bool mrg_metric_clear_writer(MRG *mrg, METRIC *metric) {
|
||||
bool done = false;
|
||||
netdata_spinlock_lock(&metric->spinlock);
|
||||
if(metric->writer) {
|
||||
|
@ -584,73 +736,80 @@ static void *mrg_stress_test_thread3(void *ptr) {
|
|||
|
||||
int mrg_unittest(void) {
|
||||
MRG *mrg = mrg_create();
|
||||
METRIC *metric1, *metric2;
|
||||
METRIC *m1_t0, *m2_t0, *m3_t0, *m4_t0;
|
||||
METRIC *m1_t1, *m2_t1, *m3_t1, *m4_t1;
|
||||
bool ret;
|
||||
|
||||
MRG_ENTRY entry = {
|
||||
.section = 1,
|
||||
.section = 0,
|
||||
.first_time_s = 2,
|
||||
.last_time_s = 3,
|
||||
.latest_update_every_s = 4,
|
||||
};
|
||||
uuid_generate(entry.uuid);
|
||||
metric1 = mrg_metric_add_and_acquire(mrg, entry, &ret);
|
||||
m1_t0 = mrg_metric_add_and_acquire(mrg, entry, &ret);
|
||||
if(!ret)
|
||||
fatal("DBENGINE METRIC: failed to add metric");
|
||||
|
||||
// add the same metric again
|
||||
if(mrg_metric_add_and_acquire(mrg, entry, &ret) != metric1)
|
||||
m2_t0 = mrg_metric_add_and_acquire(mrg, entry, &ret);
|
||||
if(m2_t0 != m1_t0)
|
||||
fatal("DBENGINE METRIC: adding the same metric twice, does not return the same pointer");
|
||||
if(ret)
|
||||
fatal("DBENGINE METRIC: managed to add the same metric twice");
|
||||
|
||||
if(mrg_metric_get_and_acquire(mrg, &entry.uuid, entry.section) != metric1)
|
||||
m3_t0 = mrg_metric_get_and_acquire(mrg, &entry.uuid, entry.section);
|
||||
if(m3_t0 != m1_t0)
|
||||
fatal("DBENGINE METRIC: cannot find the metric added");
|
||||
|
||||
// add the same metric again
|
||||
if(mrg_metric_add_and_acquire(mrg, entry, &ret) != metric1)
|
||||
m4_t0 = mrg_metric_add_and_acquire(mrg, entry, &ret);
|
||||
if(m4_t0 != m1_t0)
|
||||
fatal("DBENGINE METRIC: adding the same metric twice, does not return the same pointer");
|
||||
if(ret)
|
||||
fatal("DBENGINE METRIC: managed to add the same metric twice");
|
||||
|
||||
// add the same metric in another section
|
||||
entry.section = 0;
|
||||
metric2 = mrg_metric_add_and_acquire(mrg, entry, &ret);
|
||||
entry.section = 1;
|
||||
m1_t1 = mrg_metric_add_and_acquire(mrg, entry, &ret);
|
||||
if(!ret)
|
||||
fatal("DBENGINE METRIC: failed to add metric in different section");
|
||||
fatal("DBENGINE METRIC: failed to add metric in section %zu", (size_t)entry.section);
|
||||
|
||||
// add the same metric again
|
||||
if(mrg_metric_add_and_acquire(mrg, entry, &ret) != metric2)
|
||||
fatal("DBENGINE METRIC: adding the same metric twice (section 0), does not return the same pointer");
|
||||
m2_t1 = mrg_metric_add_and_acquire(mrg, entry, &ret);
|
||||
if(m2_t1 != m1_t1)
|
||||
fatal("DBENGINE METRIC: adding the same metric twice (section %zu), does not return the same pointer", (size_t)entry.section);
|
||||
if(ret)
|
||||
fatal("DBENGINE METRIC: managed to add the same metric twice in (section 0)");
|
||||
|
||||
if(mrg_metric_get_and_acquire(mrg, &entry.uuid, entry.section) != metric2)
|
||||
fatal("DBENGINE METRIC: cannot find the metric added (section 0)");
|
||||
m3_t1 = mrg_metric_get_and_acquire(mrg, &entry.uuid, entry.section);
|
||||
if(m3_t1 != m1_t1)
|
||||
fatal("DBENGINE METRIC: cannot find the metric added (section %zu)", (size_t)entry.section);
|
||||
|
||||
// delete the first metric
|
||||
if(!mrg_metric_release_and_delete(mrg, metric1))
|
||||
mrg_metric_release(mrg, m2_t0);
|
||||
mrg_metric_release(mrg, m3_t0);
|
||||
mrg_metric_release(mrg, m4_t0);
|
||||
mrg_metric_set_first_time_s(mrg, m1_t0, 0);
|
||||
mrg_metric_set_clean_latest_time_s(mrg, m1_t0, 0);
|
||||
mrg_metric_set_hot_latest_time_s(mrg, m1_t0, 0);
|
||||
if(!mrg_metric_release_and_delete(mrg, m1_t0))
|
||||
fatal("DBENGINE METRIC: cannot delete the first metric");
|
||||
|
||||
if(mrg_metric_get_and_acquire(mrg, &entry.uuid, entry.section) != metric2)
|
||||
fatal("DBENGINE METRIC: cannot find the metric added (section 0), after deleting the first one");
|
||||
|
||||
// delete the first metric again - metric1 pointer is invalid now
|
||||
if(mrg_metric_release_and_delete(mrg, metric1))
|
||||
fatal("DBENGINE METRIC: deleted again an already deleted metric");
|
||||
|
||||
// find the section 0 metric again
|
||||
if(mrg_metric_get_and_acquire(mrg, &entry.uuid, entry.section) != metric2)
|
||||
fatal("DBENGINE METRIC: cannot find the metric added (section 0), after deleting the first one twice");
|
||||
m4_t1 = mrg_metric_get_and_acquire(mrg, &entry.uuid, entry.section);
|
||||
if(m4_t1 != m1_t1)
|
||||
fatal("DBENGINE METRIC: cannot find the metric added (section %zu), after deleting the first one", (size_t)entry.section);
|
||||
|
||||
// delete the second metric
|
||||
if(!mrg_metric_release_and_delete(mrg, metric2))
|
||||
mrg_metric_release(mrg, m2_t1);
|
||||
mrg_metric_release(mrg, m3_t1);
|
||||
mrg_metric_release(mrg, m4_t1);
|
||||
mrg_metric_set_first_time_s(mrg, m1_t1, 0);
|
||||
mrg_metric_set_clean_latest_time_s(mrg, m1_t1, 0);
|
||||
mrg_metric_set_hot_latest_time_s(mrg, m1_t1, 0);
|
||||
if(!mrg_metric_release_and_delete(mrg, m1_t1))
|
||||
fatal("DBENGINE METRIC: cannot delete the second metric");
|
||||
|
||||
// delete the second metric again
|
||||
if(mrg_metric_release_and_delete(mrg, metric2))
|
||||
fatal("DBENGINE METRIC: managed to delete an already deleted metric");
|
||||
|
||||
if(mrg->stats.entries != 0)
|
||||
fatal("DBENGINE METRIC: invalid entries counter");
|
||||
|
||||
|
|
|
@ -18,23 +18,32 @@ typedef struct mrg_entry {
|
|||
|
||||
struct mrg_statistics {
|
||||
size_t entries;
|
||||
size_t size; // memory without indexing
|
||||
size_t entries_referenced;
|
||||
size_t entries_with_retention;
|
||||
|
||||
size_t size; // total memory used, with indexing
|
||||
|
||||
size_t current_references;
|
||||
|
||||
size_t additions;
|
||||
size_t additions_duplicate;
|
||||
|
||||
size_t deletions;
|
||||
size_t delete_having_retention_or_referenced;
|
||||
size_t delete_misses;
|
||||
|
||||
size_t search_hits;
|
||||
size_t search_misses;
|
||||
size_t pointer_validation_hits;
|
||||
size_t pointer_validation_misses;
|
||||
|
||||
size_t writers;
|
||||
size_t writers_conflicts;
|
||||
};
|
||||
|
||||
MRG *mrg_create(void);
|
||||
void mrg_destroy(MRG *mrg);
|
||||
|
||||
METRIC *mrg_metric_dup(MRG *mrg, METRIC *metric);
|
||||
void mrg_metric_release(MRG *mrg, METRIC *metric);
|
||||
bool mrg_metric_release(MRG *mrg, METRIC *metric);
|
||||
|
||||
METRIC *mrg_metric_add_and_acquire(MRG *mrg, MRG_ENTRY entry, bool *ret);
|
||||
METRIC *mrg_metric_get_and_acquire(MRG *mrg, uuid_t *uuid, Word_t section);
|
||||
|
@ -58,9 +67,10 @@ time_t mrg_metric_get_update_every_s(MRG *mrg, METRIC *metric);
|
|||
|
||||
void mrg_metric_expand_retention(MRG *mrg, METRIC *metric, time_t first_time_s, time_t last_time_s, time_t update_every_s);
|
||||
void mrg_metric_get_retention(MRG *mrg, METRIC *metric, time_t *first_time_s, time_t *last_time_s, time_t *update_every_s);
|
||||
bool mrg_metric_zero_disk_retention(MRG *mrg __maybe_unused, METRIC *metric);
|
||||
|
||||
bool mrg_metric_writer_acquire(MRG *mrg, METRIC *metric);
|
||||
bool mrg_metric_writer_release(MRG *mrg, METRIC *metric);
|
||||
bool mrg_metric_set_writer(MRG *mrg, METRIC *metric);
|
||||
bool mrg_metric_clear_writer(MRG *mrg, METRIC *metric);
|
||||
|
||||
struct mrg_statistics mrg_get_statistics(MRG *mrg);
|
||||
|
||||
|
|
|
@ -23,6 +23,9 @@ static void main_cache_flush_dirty_page_init_callback(PGC *cache __maybe_unused,
|
|||
|
||||
static void main_cache_flush_dirty_page_callback(PGC *cache __maybe_unused, PGC_ENTRY *entries_array __maybe_unused, PGC_PAGE **pages_array __maybe_unused, size_t entries __maybe_unused)
|
||||
{
|
||||
if(!entries)
|
||||
return;
|
||||
|
||||
struct rrdengine_instance *ctx = (struct rrdengine_instance *) entries_array[0].section;
|
||||
|
||||
size_t bytes_per_point = CTX_POINT_SIZE_BYTES(ctx);
|
||||
|
@ -50,8 +53,8 @@ static void main_cache_flush_dirty_page_callback(PGC *cache __maybe_unused, PGC_
|
|||
error_limit(&erl, "DBENGINE: page exceeds the maximum size, adjusting it to max.");
|
||||
}
|
||||
|
||||
memcpy(descr->page, pgc_page_data(pages_array[Index]), descr->page_length);
|
||||
DOUBLE_LINKED_LIST_APPEND_UNSAFE(base, descr, link.prev, link.next);
|
||||
descr->page = pgc_page_data(pages_array[Index]);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(base, descr, link.prev, link.next);
|
||||
|
||||
internal_fatal(descr->page_length > RRDENG_BLOCK_SIZE, "DBENGINE: faulty page length calculation");
|
||||
}
|
||||
|
@ -1074,6 +1077,7 @@ void init_page_cache(void)
|
|||
}
|
||||
|
||||
main_cache = pgc_create(
|
||||
"main_cache",
|
||||
main_cache_size,
|
||||
main_cache_free_clean_page_callback,
|
||||
(size_t) rrdeng_pages_per_extent,
|
||||
|
@ -1089,6 +1093,7 @@ void init_page_cache(void)
|
|||
);
|
||||
|
||||
open_cache = pgc_create(
|
||||
"open_cache",
|
||||
open_cache_size, // the default is 1MB
|
||||
open_cache_free_clean_page_callback,
|
||||
1,
|
||||
|
@ -1105,6 +1110,7 @@ void init_page_cache(void)
|
|||
pgc_set_dynamic_target_cache_size_callback(open_cache, dynamic_open_cache_size);
|
||||
|
||||
extent_cache = pgc_create(
|
||||
"extent_cache",
|
||||
extent_cache_size,
|
||||
extent_cache_free_clean_page_callback,
|
||||
1,
|
||||
|
|
|
@ -27,7 +27,7 @@ struct page_descr_with_data {
|
|||
uint8_t type;
|
||||
uint32_t update_every_s;
|
||||
uint32_t page_length;
|
||||
uint8_t page[RRDENG_BLOCK_SIZE];
|
||||
uint8_t *page;
|
||||
|
||||
struct {
|
||||
struct page_descr_with_data *prev;
|
||||
|
|
|
@ -68,7 +68,7 @@ void pdc_cleanup1(void) {
|
|||
|
||||
if(pdc_globals.protected.available_items && pdc_globals.protected.available > (size_t)libuv_worker_threads) {
|
||||
item = pdc_globals.protected.available_items;
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(pdc_globals.protected.available_items, item, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(pdc_globals.protected.available_items, item, cache.prev, cache.next);
|
||||
pdc_globals.protected.available--;
|
||||
}
|
||||
|
||||
|
@ -87,7 +87,7 @@ PDC *pdc_get(void) {
|
|||
|
||||
if(likely(pdc_globals.protected.available_items)) {
|
||||
pdc = pdc_globals.protected.available_items;
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(pdc_globals.protected.available_items, pdc, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(pdc_globals.protected.available_items, pdc, cache.prev, cache.next);
|
||||
pdc_globals.protected.available--;
|
||||
}
|
||||
|
||||
|
@ -106,7 +106,7 @@ static void pdc_release(PDC *pdc) {
|
|||
if(unlikely(!pdc)) return;
|
||||
|
||||
netdata_spinlock_lock(&pdc_globals.protected.spinlock);
|
||||
DOUBLE_LINKED_LIST_APPEND_UNSAFE(pdc_globals.protected.available_items, pdc, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(pdc_globals.protected.available_items, pdc, cache.prev, cache.next);
|
||||
pdc_globals.protected.available++;
|
||||
netdata_spinlock_unlock(&pdc_globals.protected.spinlock);
|
||||
}
|
||||
|
@ -147,7 +147,7 @@ void page_details_cleanup1(void) {
|
|||
|
||||
if(page_details_globals.protected.available_items && page_details_globals.protected.available > (size_t)libuv_worker_threads * 2) {
|
||||
item = page_details_globals.protected.available_items;
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(page_details_globals.protected.available_items, item, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(page_details_globals.protected.available_items, item, cache.prev, cache.next);
|
||||
page_details_globals.protected.available--;
|
||||
}
|
||||
|
||||
|
@ -166,7 +166,7 @@ struct page_details *page_details_get(void) {
|
|||
|
||||
if(likely(page_details_globals.protected.available_items)) {
|
||||
pd = page_details_globals.protected.available_items;
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(page_details_globals.protected.available_items, pd, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(page_details_globals.protected.available_items, pd, cache.prev, cache.next);
|
||||
page_details_globals.protected.available--;
|
||||
}
|
||||
|
||||
|
@ -185,7 +185,7 @@ static void page_details_release(struct page_details *pd) {
|
|||
if(unlikely(!pd)) return;
|
||||
|
||||
netdata_spinlock_lock(&page_details_globals.protected.spinlock);
|
||||
DOUBLE_LINKED_LIST_APPEND_UNSAFE(page_details_globals.protected.available_items, pd, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(page_details_globals.protected.available_items, pd, cache.prev, cache.next);
|
||||
page_details_globals.protected.available++;
|
||||
netdata_spinlock_unlock(&page_details_globals.protected.spinlock);
|
||||
}
|
||||
|
@ -226,7 +226,7 @@ void epdl_cleanup1(void) {
|
|||
|
||||
if(epdl_globals.protected.available_items && epdl_globals.protected.available > 100) {
|
||||
item = epdl_globals.protected.available_items;
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(epdl_globals.protected.available_items, item, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(epdl_globals.protected.available_items, item, cache.prev, cache.next);
|
||||
epdl_globals.protected.available--;
|
||||
}
|
||||
|
||||
|
@ -245,7 +245,7 @@ static EPDL *epdl_get(void) {
|
|||
|
||||
if(likely(epdl_globals.protected.available_items)) {
|
||||
epdl = epdl_globals.protected.available_items;
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(epdl_globals.protected.available_items, epdl, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(epdl_globals.protected.available_items, epdl, cache.prev, cache.next);
|
||||
epdl_globals.protected.available--;
|
||||
}
|
||||
|
||||
|
@ -264,7 +264,7 @@ static void epdl_release(EPDL *epdl) {
|
|||
if(unlikely(!epdl)) return;
|
||||
|
||||
netdata_spinlock_lock(&epdl_globals.protected.spinlock);
|
||||
DOUBLE_LINKED_LIST_APPEND_UNSAFE(epdl_globals.protected.available_items, epdl, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(epdl_globals.protected.available_items, epdl, cache.prev, cache.next);
|
||||
epdl_globals.protected.available++;
|
||||
netdata_spinlock_unlock(&epdl_globals.protected.spinlock);
|
||||
}
|
||||
|
@ -305,7 +305,7 @@ void deol_cleanup1(void) {
|
|||
|
||||
if(deol_globals.protected.available_items && deol_globals.protected.available > 100) {
|
||||
item = deol_globals.protected.available_items;
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(deol_globals.protected.available_items, item, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(deol_globals.protected.available_items, item, cache.prev, cache.next);
|
||||
deol_globals.protected.available--;
|
||||
}
|
||||
|
||||
|
@ -324,7 +324,7 @@ static DEOL *deol_get(void) {
|
|||
|
||||
if(likely(deol_globals.protected.available_items)) {
|
||||
deol = deol_globals.protected.available_items;
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(deol_globals.protected.available_items, deol, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(deol_globals.protected.available_items, deol, cache.prev, cache.next);
|
||||
deol_globals.protected.available--;
|
||||
}
|
||||
|
||||
|
@ -343,7 +343,7 @@ static void deol_release(DEOL *deol) {
|
|||
if(unlikely(!deol)) return;
|
||||
|
||||
netdata_spinlock_lock(&deol_globals.protected.spinlock);
|
||||
DOUBLE_LINKED_LIST_APPEND_UNSAFE(deol_globals.protected.available_items, deol, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(deol_globals.protected.available_items, deol, cache.prev, cache.next);
|
||||
deol_globals.protected.available++;
|
||||
netdata_spinlock_unlock(&deol_globals.protected.spinlock);
|
||||
}
|
||||
|
@ -399,7 +399,7 @@ void extent_buffer_cleanup1(void) {
|
|||
|
||||
if(extent_buffer_globals.protected.available_items && extent_buffer_globals.protected.available > 1) {
|
||||
item = extent_buffer_globals.protected.available_items;
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(extent_buffer_globals.protected.available_items, item, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(extent_buffer_globals.protected.available_items, item, cache.prev, cache.next);
|
||||
extent_buffer_globals.protected.available--;
|
||||
}
|
||||
|
||||
|
@ -424,7 +424,7 @@ struct extent_buffer *extent_buffer_get(size_t size) {
|
|||
netdata_spinlock_lock(&extent_buffer_globals.protected.spinlock);
|
||||
if(likely(extent_buffer_globals.protected.available_items)) {
|
||||
eb = extent_buffer_globals.protected.available_items;
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(extent_buffer_globals.protected.available_items, eb, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(extent_buffer_globals.protected.available_items, eb, cache.prev, cache.next);
|
||||
extent_buffer_globals.protected.available--;
|
||||
}
|
||||
netdata_spinlock_unlock(&extent_buffer_globals.protected.spinlock);
|
||||
|
@ -452,7 +452,7 @@ void extent_buffer_release(struct extent_buffer *eb) {
|
|||
if(unlikely(!eb)) return;
|
||||
|
||||
netdata_spinlock_lock(&extent_buffer_globals.protected.spinlock);
|
||||
DOUBLE_LINKED_LIST_APPEND_UNSAFE(extent_buffer_globals.protected.available_items, eb, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(extent_buffer_globals.protected.available_items, eb, cache.prev, cache.next);
|
||||
extent_buffer_globals.protected.available++;
|
||||
netdata_spinlock_unlock(&extent_buffer_globals.protected.spinlock);
|
||||
}
|
||||
|
@ -671,7 +671,7 @@ static bool epdl_pending_add(EPDL *epdl) {
|
|||
rrdeng_req_cmd(epdl_get_cmd, base, epdl->pdc->priority);
|
||||
}
|
||||
|
||||
DOUBLE_LINKED_LIST_APPEND_UNSAFE(base, epdl, query.prev, query.next);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(base, epdl, query.prev, query.next);
|
||||
*PValue = base;
|
||||
|
||||
netdata_spinlock_unlock(&epdl->datafile->extent_queries.spinlock);
|
||||
|
@ -1009,7 +1009,7 @@ static inline struct page_details *epdl_get_pd_load_link_list_from_metric_start_
|
|||
if (unlikely(__atomic_load_n(&ep->pdc->workers_should_stop, __ATOMIC_RELAXED)))
|
||||
pdc_page_status_set(pd, PDC_PAGE_FAILED | PDC_PAGE_CANCELLED);
|
||||
else
|
||||
DOUBLE_LINKED_LIST_APPEND_UNSAFE(pd_list, pd, load.prev, load.next);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(pd_list, pd, load.prev, load.next);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1259,6 +1259,35 @@ static bool epdl_populate_pages_from_extent_data(
|
|||
return true;
|
||||
}
|
||||
|
||||
static inline void *datafile_extent_read(struct rrdengine_instance *ctx, uv_file file, unsigned pos, unsigned size_bytes)
|
||||
{
|
||||
void *buffer;
|
||||
uv_fs_t request;
|
||||
|
||||
unsigned real_io_size = ALIGN_BYTES_CEILING(size_bytes);
|
||||
int ret = posix_memalign(&buffer, RRDFILE_ALIGNMENT, real_io_size);
|
||||
if (unlikely(ret))
|
||||
fatal("DBENGINE: posix_memalign(): %s", strerror(ret));
|
||||
|
||||
uv_buf_t iov = uv_buf_init(buffer, real_io_size);
|
||||
ret = uv_fs_read(NULL, &request, file, &iov, 1, pos, NULL);
|
||||
if (unlikely(-1 == ret)) {
|
||||
ctx_io_error(ctx);
|
||||
posix_memfree(buffer);
|
||||
buffer = NULL;
|
||||
}
|
||||
else
|
||||
ctx_io_read_op_bytes(ctx, real_io_size);
|
||||
|
||||
uv_fs_req_cleanup(&request);
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
static inline void datafile_extent_read_free(void *buffer) {
|
||||
posix_memfree(buffer);
|
||||
}
|
||||
|
||||
void epdl_find_extent_and_populate_pages(struct rrdengine_instance *ctx, EPDL *epdl, bool worker) {
|
||||
size_t *statistics_counter = NULL;
|
||||
PDC_PAGE_STATUS not_loaded_pages_tag = 0, loaded_pages_tag = 0;
|
||||
|
@ -1306,18 +1335,12 @@ void epdl_find_extent_and_populate_pages(struct rrdengine_instance *ctx, EPDL *e
|
|||
if(worker)
|
||||
worker_is_busy(UV_EVENT_DBENGINE_EXTENT_MMAP);
|
||||
|
||||
off_t map_start = ALIGN_BYTES_FLOOR(epdl->extent_offset);
|
||||
size_t length = ALIGN_BYTES_CEILING(epdl->extent_offset + epdl->extent_size) - map_start;
|
||||
|
||||
void *mmap_data = mmap(NULL, length, PROT_READ, MAP_SHARED, epdl->file, map_start);
|
||||
if(mmap_data != MAP_FAILED) {
|
||||
extent_compressed_data = mmap_data + (epdl->extent_offset - map_start);
|
||||
void *extent_data = datafile_extent_read(ctx, epdl->file, epdl->extent_offset, epdl->extent_size);
|
||||
if(extent_data != NULL) {
|
||||
|
||||
void *copied_extent_compressed_data = dbengine_extent_alloc(epdl->extent_size);
|
||||
memcpy(copied_extent_compressed_data, extent_compressed_data, epdl->extent_size);
|
||||
|
||||
int ret = munmap(mmap_data, length);
|
||||
fatal_assert(0 == ret);
|
||||
memcpy(copied_extent_compressed_data, extent_data, epdl->extent_size);
|
||||
datafile_extent_read_free(extent_data);
|
||||
|
||||
if(worker)
|
||||
worker_is_busy(UV_EVENT_DBENGINE_EXTENT_CACHE_LOOKUP);
|
||||
|
|
|
@ -124,7 +124,7 @@ static void work_request_cleanup1(void) {
|
|||
|
||||
if(work_request_globals.protected.available_items && work_request_globals.protected.available > (size_t)libuv_worker_threads) {
|
||||
item = work_request_globals.protected.available_items;
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(work_request_globals.protected.available_items, item, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(work_request_globals.protected.available_items, item, cache.prev, cache.next);
|
||||
work_request_globals.protected.available--;
|
||||
}
|
||||
netdata_spinlock_unlock(&work_request_globals.protected.spinlock);
|
||||
|
@ -137,7 +137,7 @@ static void work_request_cleanup1(void) {
|
|||
|
||||
static inline void work_done(struct rrdeng_work *work_request) {
|
||||
netdata_spinlock_lock(&work_request_globals.protected.spinlock);
|
||||
DOUBLE_LINKED_LIST_APPEND_UNSAFE(work_request_globals.protected.available_items, work_request, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(work_request_globals.protected.available_items, work_request, cache.prev, cache.next);
|
||||
work_request_globals.protected.available++;
|
||||
netdata_spinlock_unlock(&work_request_globals.protected.spinlock);
|
||||
}
|
||||
|
@ -183,7 +183,7 @@ static bool work_dispatch(struct rrdengine_instance *ctx, void *data, struct com
|
|||
|
||||
if(likely(work_request_globals.protected.available_items)) {
|
||||
work_request = work_request_globals.protected.available_items;
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(work_request_globals.protected.available_items, work_request, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(work_request_globals.protected.available_items, work_request, cache.prev, cache.next);
|
||||
work_request_globals.protected.available--;
|
||||
}
|
||||
|
||||
|
@ -246,7 +246,7 @@ static void page_descriptor_cleanup1(void) {
|
|||
|
||||
if(page_descriptor_globals.protected.available_items && page_descriptor_globals.protected.available > MAX_PAGES_PER_EXTENT) {
|
||||
item = page_descriptor_globals.protected.available_items;
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(page_descriptor_globals.protected.available_items, item, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(page_descriptor_globals.protected.available_items, item, cache.prev, cache.next);
|
||||
page_descriptor_globals.protected.available--;
|
||||
}
|
||||
|
||||
|
@ -265,7 +265,7 @@ struct page_descr_with_data *page_descriptor_get(void) {
|
|||
|
||||
if(likely(page_descriptor_globals.protected.available_items)) {
|
||||
descr = page_descriptor_globals.protected.available_items;
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(page_descriptor_globals.protected.available_items, descr, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(page_descriptor_globals.protected.available_items, descr, cache.prev, cache.next);
|
||||
page_descriptor_globals.protected.available--;
|
||||
}
|
||||
|
||||
|
@ -284,7 +284,7 @@ static inline void page_descriptor_release(struct page_descr_with_data *descr) {
|
|||
if(unlikely(!descr)) return;
|
||||
|
||||
netdata_spinlock_lock(&page_descriptor_globals.protected.spinlock);
|
||||
DOUBLE_LINKED_LIST_APPEND_UNSAFE(page_descriptor_globals.protected.available_items, descr, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(page_descriptor_globals.protected.available_items, descr, cache.prev, cache.next);
|
||||
page_descriptor_globals.protected.available++;
|
||||
netdata_spinlock_unlock(&page_descriptor_globals.protected.spinlock);
|
||||
}
|
||||
|
@ -322,7 +322,7 @@ static void extent_io_descriptor_cleanup1(void) {
|
|||
|
||||
if(extent_io_descriptor_globals.protected.available_items && extent_io_descriptor_globals.protected.available > (size_t)libuv_worker_threads) {
|
||||
item = extent_io_descriptor_globals.protected.available_items;
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(extent_io_descriptor_globals.protected.available_items, item, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(extent_io_descriptor_globals.protected.available_items, item, cache.prev, cache.next);
|
||||
extent_io_descriptor_globals.protected.available--;
|
||||
}
|
||||
netdata_spinlock_unlock(&extent_io_descriptor_globals.protected.spinlock);
|
||||
|
@ -340,7 +340,7 @@ static struct extent_io_descriptor *extent_io_descriptor_get(void) {
|
|||
|
||||
if(likely(extent_io_descriptor_globals.protected.available_items)) {
|
||||
xt_io_descr = extent_io_descriptor_globals.protected.available_items;
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(extent_io_descriptor_globals.protected.available_items, xt_io_descr, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(extent_io_descriptor_globals.protected.available_items, xt_io_descr, cache.prev, cache.next);
|
||||
extent_io_descriptor_globals.protected.available--;
|
||||
}
|
||||
|
||||
|
@ -359,7 +359,7 @@ static inline void extent_io_descriptor_release(struct extent_io_descriptor *xt_
|
|||
if(unlikely(!xt_io_descr)) return;
|
||||
|
||||
netdata_spinlock_lock(&extent_io_descriptor_globals.protected.spinlock);
|
||||
DOUBLE_LINKED_LIST_APPEND_UNSAFE(extent_io_descriptor_globals.protected.available_items, xt_io_descr, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(extent_io_descriptor_globals.protected.available_items, xt_io_descr, cache.prev, cache.next);
|
||||
extent_io_descriptor_globals.protected.available++;
|
||||
netdata_spinlock_unlock(&extent_io_descriptor_globals.protected.spinlock);
|
||||
}
|
||||
|
@ -396,7 +396,7 @@ static void rrdeng_query_handle_cleanup1(void) {
|
|||
|
||||
if(rrdeng_query_handle_globals.protected.available_items && rrdeng_query_handle_globals.protected.available > 10) {
|
||||
item = rrdeng_query_handle_globals.protected.available_items;
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(rrdeng_query_handle_globals.protected.available_items, item, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(rrdeng_query_handle_globals.protected.available_items, item, cache.prev, cache.next);
|
||||
rrdeng_query_handle_globals.protected.available--;
|
||||
}
|
||||
|
||||
|
@ -415,7 +415,7 @@ struct rrdeng_query_handle *rrdeng_query_handle_get(void) {
|
|||
|
||||
if(likely(rrdeng_query_handle_globals.protected.available_items)) {
|
||||
handle = rrdeng_query_handle_globals.protected.available_items;
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(rrdeng_query_handle_globals.protected.available_items, handle, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(rrdeng_query_handle_globals.protected.available_items, handle, cache.prev, cache.next);
|
||||
rrdeng_query_handle_globals.protected.available--;
|
||||
}
|
||||
|
||||
|
@ -434,7 +434,7 @@ void rrdeng_query_handle_release(struct rrdeng_query_handle *handle) {
|
|||
if(unlikely(!handle)) return;
|
||||
|
||||
netdata_spinlock_lock(&rrdeng_query_handle_globals.protected.spinlock);
|
||||
DOUBLE_LINKED_LIST_APPEND_UNSAFE(rrdeng_query_handle_globals.protected.available_items, handle, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(rrdeng_query_handle_globals.protected.available_items, handle, cache.prev, cache.next);
|
||||
rrdeng_query_handle_globals.protected.available++;
|
||||
netdata_spinlock_unlock(&rrdeng_query_handle_globals.protected.spinlock);
|
||||
}
|
||||
|
@ -471,7 +471,7 @@ static void wal_cleanup1(void) {
|
|||
|
||||
if(wal_globals.protected.available_items && wal_globals.protected.available > storage_tiers) {
|
||||
wal = wal_globals.protected.available_items;
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(wal_globals.protected.available_items, wal, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(wal_globals.protected.available_items, wal, cache.prev, cache.next);
|
||||
wal_globals.protected.available--;
|
||||
}
|
||||
|
||||
|
@ -494,7 +494,7 @@ WAL *wal_get(struct rrdengine_instance *ctx, unsigned size) {
|
|||
|
||||
if(likely(wal_globals.protected.available_items)) {
|
||||
wal = wal_globals.protected.available_items;
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(wal_globals.protected.available_items, wal, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(wal_globals.protected.available_items, wal, cache.prev, cache.next);
|
||||
wal_globals.protected.available--;
|
||||
}
|
||||
|
||||
|
@ -532,7 +532,7 @@ void wal_release(WAL *wal) {
|
|||
if(unlikely(!wal)) return;
|
||||
|
||||
netdata_spinlock_lock(&wal_globals.protected.spinlock);
|
||||
DOUBLE_LINKED_LIST_APPEND_UNSAFE(wal_globals.protected.available_items, wal, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(wal_globals.protected.available_items, wal, cache.prev, cache.next);
|
||||
wal_globals.protected.available++;
|
||||
netdata_spinlock_unlock(&wal_globals.protected.spinlock);
|
||||
}
|
||||
|
@ -596,7 +596,7 @@ static void rrdeng_cmd_cleanup1(void) {
|
|||
|
||||
if(rrdeng_cmd_globals.cache.available_items && rrdeng_cmd_globals.cache.available > 100) {
|
||||
item = rrdeng_cmd_globals.cache.available_items;
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(rrdeng_cmd_globals.cache.available_items, item, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(rrdeng_cmd_globals.cache.available_items, item, cache.prev, cache.next);
|
||||
rrdeng_cmd_globals.cache.available--;
|
||||
}
|
||||
netdata_spinlock_unlock(&rrdeng_cmd_globals.cache.spinlock);
|
||||
|
@ -639,8 +639,8 @@ void rrdeng_req_cmd(requeue_callback_t get_cmd_cb, void *data, STORAGE_PRIORITY
|
|||
priority = rrdeng_enq_cmd_map_opcode_to_priority(cmd->opcode, priority);
|
||||
|
||||
if (cmd->priority > priority) {
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(rrdeng_cmd_globals.queue.waiting_items_by_priority[cmd->priority], cmd, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_APPEND_UNSAFE(rrdeng_cmd_globals.queue.waiting_items_by_priority[priority], cmd, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(rrdeng_cmd_globals.queue.waiting_items_by_priority[cmd->priority], cmd, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(rrdeng_cmd_globals.queue.waiting_items_by_priority[priority], cmd, cache.prev, cache.next);
|
||||
cmd->priority = priority;
|
||||
}
|
||||
}
|
||||
|
@ -657,7 +657,7 @@ void rrdeng_enq_cmd(struct rrdengine_instance *ctx, enum rrdeng_opcode opcode, v
|
|||
netdata_spinlock_lock(&rrdeng_cmd_globals.cache.spinlock);
|
||||
if(likely(rrdeng_cmd_globals.cache.available_items)) {
|
||||
cmd = rrdeng_cmd_globals.cache.available_items;
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(rrdeng_cmd_globals.cache.available_items, cmd, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(rrdeng_cmd_globals.cache.available_items, cmd, cache.prev, cache.next);
|
||||
rrdeng_cmd_globals.cache.available--;
|
||||
}
|
||||
netdata_spinlock_unlock(&rrdeng_cmd_globals.cache.spinlock);
|
||||
|
@ -676,7 +676,7 @@ void rrdeng_enq_cmd(struct rrdengine_instance *ctx, enum rrdeng_opcode opcode, v
|
|||
cmd->dequeue_cb = dequeue_cb;
|
||||
|
||||
netdata_spinlock_lock(&rrdeng_cmd_globals.queue.spinlock);
|
||||
DOUBLE_LINKED_LIST_APPEND_UNSAFE(rrdeng_cmd_globals.queue.waiting_items_by_priority[priority], cmd, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(rrdeng_cmd_globals.queue.waiting_items_by_priority[priority], cmd, cache.prev, cache.next);
|
||||
rrdeng_cmd_globals.queue.waiting++;
|
||||
if(enqueue_cb)
|
||||
enqueue_cb(cmd);
|
||||
|
@ -715,7 +715,7 @@ static inline struct rrdeng_cmd rrdeng_deq_cmd(void) {
|
|||
}
|
||||
|
||||
// remove it from the queue
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(rrdeng_cmd_globals.queue.waiting_items_by_priority[priority], cmd, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(rrdeng_cmd_globals.queue.waiting_items_by_priority[priority], cmd, cache.prev, cache.next);
|
||||
rrdeng_cmd_globals.queue.waiting--;
|
||||
break;
|
||||
}
|
||||
|
@ -735,7 +735,7 @@ static inline struct rrdeng_cmd rrdeng_deq_cmd(void) {
|
|||
|
||||
// put it in the cache
|
||||
netdata_spinlock_lock(&rrdeng_cmd_globals.cache.spinlock);
|
||||
DOUBLE_LINKED_LIST_APPEND_UNSAFE(rrdeng_cmd_globals.cache.available_items, cmd, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(rrdeng_cmd_globals.cache.available_items, cmd, cache.prev, cache.next);
|
||||
rrdeng_cmd_globals.cache.available++;
|
||||
netdata_spinlock_unlock(&rrdeng_cmd_globals.cache.spinlock);
|
||||
}
|
||||
|
@ -968,10 +968,6 @@ static void *extent_flushed_to_open_tp_worker(struct rrdengine_instance *ctx __m
|
|||
struct rrdengine_datafile *datafile;
|
||||
unsigned i;
|
||||
|
||||
if (uv_fs_request->result < 0) {
|
||||
ctx_io_error(ctx);
|
||||
error("DBENGINE: %s: uv_fs_write: %s", __func__, uv_strerror((int)uv_fs_request->result));
|
||||
}
|
||||
datafile = xt_io_descr->datafile;
|
||||
|
||||
bool still_running = ctx_is_available_for_queries(ctx);
|
||||
|
@ -1014,6 +1010,11 @@ static void after_extent_write_datafile_io(uv_fs_t *uv_fs_request) {
|
|||
struct rrdengine_datafile *datafile = xt_io_descr->datafile;
|
||||
struct rrdengine_instance *ctx = datafile->ctx;
|
||||
|
||||
if (uv_fs_request->result < 0) {
|
||||
ctx_io_error(ctx);
|
||||
error("DBENGINE: %s: uv_fs_write(): %s", __func__, uv_strerror((int)uv_fs_request->result));
|
||||
}
|
||||
|
||||
journalfile_v1_extent_write(ctx, xt_io_descr->datafile, xt_io_descr->wal, &rrdeng_main.loop);
|
||||
|
||||
netdata_spinlock_lock(&datafile->writers.spinlock);
|
||||
|
@ -1398,7 +1399,7 @@ void find_uuid_first_time(
|
|||
}
|
||||
}
|
||||
internal_error(true,
|
||||
"DBENGINE: analyzed the retention of %zu rotated metrics, "
|
||||
"DBENGINE: analyzed the retention of %zu rotated metrics of tier %d, "
|
||||
"did %zu jv2 matching binary searches (%zu not matching, %zu overflown) in %u journal files, "
|
||||
"%zu metrics with entries in open cache, "
|
||||
"metrics first time found per datafile index ([not in jv2]:%zu, [1]:%zu, [2]:%zu, [3]:%zu, [4]:%zu, [5]:%zu, [6]:%zu, [7]:%zu, [8]:%zu, [bigger]: %zu), "
|
||||
|
@ -1406,6 +1407,7 @@ void find_uuid_first_time(
|
|||
"metrics without any remaining retention %zu, "
|
||||
"metrics not in MRG %zu",
|
||||
metric_count,
|
||||
ctx->config.tier,
|
||||
binary_match,
|
||||
not_matching_bsearches,
|
||||
not_needed_bsearches,
|
||||
|
@ -1446,7 +1448,8 @@ static void update_metrics_first_time_s(struct rrdengine_instance *ctx, struct r
|
|||
added++;
|
||||
}
|
||||
|
||||
info("DBENGINE: recalculating retention for %zu metrics starting with datafile %u", count, first_datafile_remaining->fileno);
|
||||
info("DBENGINE: recalculating tier %d retention for %zu metrics starting with datafile %u",
|
||||
ctx->config.tier, count, first_datafile_remaining->fileno);
|
||||
|
||||
journalfile_v2_data_release(journalfile);
|
||||
|
||||
|
@ -1460,18 +1463,40 @@ static void update_metrics_first_time_s(struct rrdengine_instance *ctx, struct r
|
|||
if(worker)
|
||||
worker_is_busy(UV_EVENT_DBENGINE_POPULATE_MRG);
|
||||
|
||||
info("DBENGINE: updating metric registry retention for %zu metrics", added);
|
||||
info("DBENGINE: updating tier %d metrics registry retention for %zu metrics",
|
||||
ctx->config.tier, added);
|
||||
|
||||
size_t deleted_metrics = 0, zero_retention_referenced = 0, zero_disk_retention = 0, zero_disk_but_live = 0;
|
||||
for (size_t index = 0; index < added; ++index) {
|
||||
uuid_first_t_entry = &uuid_first_entry_list[index];
|
||||
if (likely(uuid_first_t_entry->first_time_s != LONG_MAX))
|
||||
if (likely(uuid_first_t_entry->first_time_s != LONG_MAX)) {
|
||||
mrg_metric_set_first_time_s_if_bigger(main_mrg, uuid_first_t_entry->metric, uuid_first_t_entry->first_time_s);
|
||||
else
|
||||
mrg_metric_set_first_time_s(main_mrg, uuid_first_t_entry->metric, 0);
|
||||
mrg_metric_release(main_mrg, uuid_first_t_entry->metric);
|
||||
mrg_metric_release(main_mrg, uuid_first_t_entry->metric);
|
||||
}
|
||||
else {
|
||||
zero_disk_retention++;
|
||||
|
||||
// there is no retention for this metric
|
||||
bool has_retention = mrg_metric_zero_disk_retention(main_mrg, uuid_first_t_entry->metric);
|
||||
if (!has_retention) {
|
||||
bool deleted = mrg_metric_release_and_delete(main_mrg, uuid_first_t_entry->metric);
|
||||
if(deleted)
|
||||
deleted_metrics++;
|
||||
else
|
||||
zero_retention_referenced++;
|
||||
}
|
||||
else {
|
||||
zero_disk_but_live++;
|
||||
mrg_metric_release(main_mrg, uuid_first_t_entry->metric);
|
||||
}
|
||||
}
|
||||
}
|
||||
freez(uuid_first_entry_list);
|
||||
|
||||
internal_error(zero_disk_retention,
|
||||
"DBENGINE: deleted %zu metrics, zero retention but referenced %zu (out of %zu total, of which %zu have main cache retention) zero on-disk retention tier %d metrics from metrics registry",
|
||||
deleted_metrics, zero_retention_referenced, zero_disk_retention, zero_disk_but_live, ctx->config.tier);
|
||||
|
||||
if(worker)
|
||||
worker_is_idle();
|
||||
}
|
||||
|
|
|
@ -243,7 +243,7 @@ STORAGE_COLLECT_HANDLE *rrdeng_store_metric_init(STORAGE_METRIC_HANDLE *db_metri
|
|||
struct rrdengine_instance *ctx = mrg_metric_ctx(metric);
|
||||
|
||||
bool is_1st_metric_writer = true;
|
||||
if(!mrg_metric_writer_acquire(main_mrg, metric)) {
|
||||
if(!mrg_metric_set_writer(main_mrg, metric)) {
|
||||
is_1st_metric_writer = false;
|
||||
char uuid[UUID_STR_LEN + 1];
|
||||
uuid_unparse(*mrg_metric_uuid(main_mrg, metric), uuid);
|
||||
|
@ -696,7 +696,7 @@ int rrdeng_store_metric_finalize(STORAGE_COLLECT_HANDLE *collection_handle) {
|
|||
if(!(handle->options & RRDENG_1ST_METRIC_WRITER))
|
||||
__atomic_sub_fetch(&ctx->atomic.collectors_running_duplicate, 1, __ATOMIC_RELAXED);
|
||||
|
||||
if((handle->options & RRDENG_1ST_METRIC_WRITER) && !mrg_metric_writer_release(main_mrg, handle->metric))
|
||||
if((handle->options & RRDENG_1ST_METRIC_WRITER) && !mrg_metric_clear_writer(main_mrg, handle->metric))
|
||||
internal_fatal(true, "DBENGINE: metric is already released");
|
||||
|
||||
time_t first_time_s, last_time_s, update_every_s;
|
||||
|
@ -738,12 +738,12 @@ static void register_query_handle(struct rrdeng_query_handle *handle) {
|
|||
handle->started_time_s = now_realtime_sec();
|
||||
|
||||
netdata_spinlock_lock(&global_query_handle_spinlock);
|
||||
DOUBLE_LINKED_LIST_APPEND_UNSAFE(global_query_handle_ll, handle, prev, next);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(global_query_handle_ll, handle, prev, next);
|
||||
netdata_spinlock_unlock(&global_query_handle_spinlock);
|
||||
}
|
||||
static void unregister_query_handle(struct rrdeng_query_handle *handle) {
|
||||
netdata_spinlock_lock(&global_query_handle_spinlock);
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(global_query_handle_ll, handle, prev, next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(global_query_handle_ll, handle, prev, next);
|
||||
netdata_spinlock_unlock(&global_query_handle_spinlock);
|
||||
}
|
||||
#else
|
||||
|
|
|
@ -436,8 +436,10 @@ void rrddim_memory_file_save(RRDDIM *rd);
|
|||
(x).end_time_s = end_s; \
|
||||
} while(0)
|
||||
|
||||
#define STORAGE_POINT_UNSET { .min = NAN, .max = NAN, .sum = NAN, .count = 0, .anomaly_count = 0, .flags = SN_FLAG_NONE, .start_time_s = 0, .end_time_s = 0 }
|
||||
|
||||
#define storage_point_is_unset(x) (!(x).count)
|
||||
#define storage_point_is_empty(x) (!netdata_double_isnumber((x).sum))
|
||||
#define storage_point_is_gap(x) (!netdata_double_isnumber((x).sum))
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// function pointers that handle data collection
|
||||
|
|
|
@ -181,7 +181,7 @@ static void rrdcalc_link_to_rrdset(RRDSET *st, RRDCALC *rc) {
|
|||
rc->rrdset = st;
|
||||
|
||||
netdata_rwlock_wrlock(&st->alerts.rwlock);
|
||||
DOUBLE_LINKED_LIST_APPEND_UNSAFE(st->alerts.base, rc, prev, next);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(st->alerts.base, rc, prev, next);
|
||||
netdata_rwlock_unlock(&st->alerts.rwlock);
|
||||
|
||||
if(rc->update_every < rc->rrdset->update_every) {
|
||||
|
@ -328,7 +328,7 @@ static void rrdcalc_unlink_from_rrdset(RRDCALC *rc, bool having_ll_wrlock) {
|
|||
if(!having_ll_wrlock)
|
||||
netdata_rwlock_wrlock(&st->alerts.rwlock);
|
||||
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(st->alerts.base, rc, prev, next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(st->alerts.base, rc, prev, next);
|
||||
|
||||
if(!having_ll_wrlock)
|
||||
netdata_rwlock_unlock(&st->alerts.rwlock);
|
||||
|
|
|
@ -122,6 +122,8 @@ typedef struct query_plan_entry {
|
|||
size_t tier;
|
||||
time_t after;
|
||||
time_t before;
|
||||
time_t expanded_after;
|
||||
time_t expanded_before;
|
||||
struct storage_engine_query_handle handle;
|
||||
STORAGE_POINT (*next_metric)(struct storage_engine_query_handle *handle);
|
||||
int (*is_finished)(struct storage_engine_query_handle *handle);
|
||||
|
|
|
@ -477,9 +477,9 @@ int is_legacy = 1;
|
|||
rrdhost_index_add_hostname(host);
|
||||
|
||||
if(is_localhost)
|
||||
DOUBLE_LINKED_LIST_PREPEND_UNSAFE(localhost, host, prev, next);
|
||||
DOUBLE_LINKED_LIST_PREPEND_ITEM_UNSAFE(localhost, host, prev, next);
|
||||
else
|
||||
DOUBLE_LINKED_LIST_APPEND_UNSAFE(localhost, host, prev, next);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(localhost, host, prev, next);
|
||||
|
||||
rrd_unlock();
|
||||
|
||||
|
@ -1092,7 +1092,7 @@ void rrdhost_free___while_having_rrd_wrlock(RRDHOST *host, bool force) {
|
|||
rrdhost_index_del_by_guid(host);
|
||||
|
||||
if (host->prev)
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(localhost, host, prev, next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(localhost, host, prev, next);
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
|
|
|
@ -1149,7 +1149,7 @@ void store_metric_at_tier(RRDDIM *rd, size_t tier, struct rrddim_tier *t, STORAG
|
|||
t->virtual_point.end_time_s = sp.end_time_s;
|
||||
|
||||
// merge the values into our virtual point
|
||||
if (likely(!storage_point_is_empty(sp))) {
|
||||
if (likely(!storage_point_is_gap(sp))) {
|
||||
// we aggregate only non NULLs into higher tiers
|
||||
|
||||
if (likely(!storage_point_is_unset(t->virtual_point))) {
|
||||
|
|
|
@ -130,7 +130,7 @@ NETDATA_DOUBLE exporting_calculate_value_from_stored_data(
|
|||
STORAGE_POINT sp = rd->tiers[0].query_ops->next_metric(&handle);
|
||||
points_read++;
|
||||
|
||||
if (unlikely(storage_point_is_empty(sp))) {
|
||||
if (unlikely(storage_point_is_gap(sp))) {
|
||||
// not collected
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -5,7 +5,7 @@ MAINTAINERCLEANFILES = $(srcdir)/Makefile.in
|
|||
|
||||
SUBDIRS = \
|
||||
adaptive_resortable_list \
|
||||
arrayalloc \
|
||||
aral \
|
||||
avl \
|
||||
buffer \
|
||||
clocks \
|
||||
|
|
|
@ -0,0 +1,169 @@
|
|||
<!--
|
||||
title: "Array Allocator"
|
||||
custom_edit_url: https://github.com/netdata/netdata/edit/master/libnetdata/aral/README.md
|
||||
-->
|
||||
|
||||
# Array Allocator
|
||||
|
||||
Come on! Array allocators are embedded in libc! Why do we need such a thing in Netdata?
|
||||
|
||||
Well, we have a couple of problems to solve:
|
||||
|
||||
1. **Fragmentation** - It is important for Netdata to keeps its overall memory footprint as low as possible. libc does an amazing job when the same thread allocates and frees some memory. But it simply cannot do better without knowing the specifics of the application when memory is allocated and freed randomly between threads.
|
||||
2. **Speed** - Especially when allocations and de-allocations happen across threads, the speed penalty is tremendous.
|
||||
|
||||
In Netdata we have a few moments that are very tough. Imagine collecting 1 million metrics per second. You have a buffer for each metric and put append new points there. This works beautifully, of course! But then, when the buffers get full, imagine the situation. You suddenly need 1 million buffers, at once!
|
||||
|
||||
To solve this problem we first spread out the buffers. So, the first time each metric asks for a buffer, it gets a smaller one. We added logic there to spread them as evenly as possible across time. Solved? Not exactly!
|
||||
|
||||
We have 3 tiers for each metric. For the metrics of tier 0 (per second resolution) we have a max buffer for 1024 points and every new metrics gets a random size between 3 points and 1024. So they are distributed across time. For 1 million metrics, we have about 1000 buffers beings created every second.
|
||||
|
||||
But at some point, the end of the minute will come, and suddenly all the metrics will need a new buffer for tier 1 (per minute). Oops! We will spread tier 1 buffers across time too, but the first minute is a tough one. We really need 1 million buffers instantly.
|
||||
|
||||
And if that minute happens to also be the beginning of an hour... tier 2 (per hour) kicks in. For that instant we are going to need 2 million buffers instantly.
|
||||
|
||||
The problem becomes even bigger when we collect 2, or even 10 million metrics...
|
||||
|
||||
So solve it, Netdata uses a special implementation of an array allocator that is tightly integrated with the structures we need.
|
||||
|
||||
## Features
|
||||
|
||||
1. Malloc, or MMAP modes. File based MMAP is also supported to put the data in file backed up shared memory.
|
||||
2. Fully asynchronous operations. There are just a couple of points where spin-locks protect a few counters and pointers.
|
||||
3. Optional defragmenter, that once enabled it will make free operation slower while trying to maintain a sorted list of fragments to offer first during allocations. The defragmenter can be enabled / disabled at run time. The defragmenter can hurt performance on application with intense turn-around of allocation, like Netdata dbengine caches. So, it is disabled by default.
|
||||
4. Without the defragmenter enabled, ARAL still tries to keep pages full, but the depth of the search is limited to 3 pages (so, a page with a free slot will either become 1st, 2nd, or 3rd). At the same time, during allocations, ARAL will evaluate the first 2 pages to find the one that is more full than the other, to use it for the new allocation.
|
||||
|
||||
## How it works
|
||||
|
||||
Allocations are organized in pages. Pages have a minimum size (a system page, usually 4KB) and a maximum defined by for each different kind of object.
|
||||
|
||||
Initially every page is free. When an allocation request is made, the free space is split, and the first element is reserved. Free space is now considered there rest.
|
||||
|
||||
This continuous until the page gets full, where a new page is allocated and the process is repeated.
|
||||
|
||||
Each allocation returned has a pointer appended to it. The pointer points to the page the allocation belongs to.
|
||||
|
||||
When a pointer is freed, the page it belongs is identified, its space is marked free, and it is prepended in a single linked list that resides in the page itself. So, each page has its own list of free slots to use.
|
||||
|
||||
Pages are then on another linked list. This is a double linked list and at its beginning has the pages with free space and at the end the pages that are full.
|
||||
|
||||
When the defragmenter is enabled the pages double linked list is also sorted, like this: the fewer the free slots on a page, the earlier in the linked list the page will be, except if it does not have any free slot, in which case it will be at the end. So, the defragmenter tries to have pages full.
|
||||
|
||||
When a page is entirerly free, it is given back to the system immediately. There is no caching of free pages.
|
||||
|
||||
|
||||
Parallelism is achieved like this:
|
||||
|
||||
When some threads are waiting for a page to be allocated, free operations are allowed. If a free operation happens before a new page is allocated, any waiting thread will get the slot that is freed on another page.
|
||||
|
||||
Free operations happen in parallel, even for the same page. There is a spin-lock on each page to protect the base pointer of the page's free slots single linked list. But, this is instant. All preparative work happens lockless, then to add the free slot to the page, the page spinlock is acquired, the free slot is prepended to the linked list on the page, the spinlock is released. Such free operations on different pages are totally parallel.
|
||||
|
||||
Once the free operation on a page has finished, the pages double linked list spinlock is acquired to put the page first on that linked list. If the defragmenter is enabled, the spinlock is retained for a little longer, to find the exact position of the page in the linked list.
|
||||
|
||||
During allocations, the reverse order is used. First get the pages double linked list spinlock, get the first page and decrement its free slots counter, then release the spinlock. If the first page does not have any free slots, a page allocation is spawn, without any locks acquired. All threads are spinning waiting for a page with free slots, either from the newly allocated one or from a free operation that may happen in parallel.
|
||||
|
||||
Once a page is acquired, each thread locks its own page to get the first free slot and releases the lock immediately. This is guaranteed to succeed, because when the page was given to that thread its free slots counter was decremented. So, there is a free slot for every thread that got that page. All preparative work to return a pointer to the caller is done lock free. Allocations on different pages are done in parallel, without any intervention between them.
|
||||
|
||||
|
||||
## What to expect
|
||||
|
||||
Systems not designed for parallelism achieve their top performance single threaded. The single threaded speed is the baseline. Adding more threads makes them slower.
|
||||
|
||||
The baseline for ARAL is the following, the included stress test when running single threaded:
|
||||
|
||||
```
|
||||
Running stress test of 1 threads, with 10000 elements each, for 5 seconds...
|
||||
2023-01-29 17:04:50: netdata INFO : TH[0] : set name of thread 1314983 to TH[0]
|
||||
ARAL executes 12.27 M malloc and 12.26 M free operations/s
|
||||
ARAL executes 12.29 M malloc and 12.29 M free operations/s
|
||||
ARAL executes 12.30 M malloc and 12.30 M free operations/s
|
||||
ARAL executes 12.30 M malloc and 12.29 M free operations/s
|
||||
ARAL executes 12.29 M malloc and 12.29 M free operations/s
|
||||
Waiting the threads to finish...
|
||||
2023-01-29 17:04:55: netdata INFO : MAIN : ARAL: did 61487356 malloc, 61487356 free, using 1 threads, in 5003808 usecs
|
||||
```
|
||||
|
||||
The same test with 2 threads, both threads on the same ARAL of course. As you see performance improved:
|
||||
|
||||
```
|
||||
Running stress test of 2 threads, with 10000 elements each, for 5 seconds...
|
||||
2023-01-29 17:05:25: netdata INFO : TH[0] : set name of thread 1315537 to TH[0]
|
||||
2023-01-29 17:05:25: netdata INFO : TH[1] : set name of thread 1315538 to TH[1]
|
||||
ARAL executes 17.75 M malloc and 17.73 M free operations/s
|
||||
ARAL executes 17.93 M malloc and 17.93 M free operations/s
|
||||
ARAL executes 18.17 M malloc and 18.18 M free operations/s
|
||||
ARAL executes 18.33 M malloc and 18.32 M free operations/s
|
||||
ARAL executes 18.36 M malloc and 18.36 M free operations/s
|
||||
Waiting the threads to finish...
|
||||
2023-01-29 17:05:30: netdata INFO : MAIN : ARAL: did 90976190 malloc, 90976190 free, using 2 threads, in 5029462 usecs
|
||||
```
|
||||
|
||||
The same test with 4 threads:
|
||||
|
||||
```
|
||||
Running stress test of 4 threads, with 10000 elements each, for 5 seconds...
|
||||
2023-01-29 17:10:12: netdata INFO : TH[0] : set name of thread 1319552 to TH[0]
|
||||
2023-01-29 17:10:12: netdata INFO : TH[1] : set name of thread 1319553 to TH[1]
|
||||
2023-01-29 17:10:12: netdata INFO : TH[2] : set name of thread 1319554 to TH[2]
|
||||
2023-01-29 17:10:12: netdata INFO : TH[3] : set name of thread 1319555 to TH[3]
|
||||
ARAL executes 19.95 M malloc and 19.91 M free operations/s
|
||||
ARAL executes 20.08 M malloc and 20.08 M free operations/s
|
||||
ARAL executes 20.85 M malloc and 20.85 M free operations/s
|
||||
ARAL executes 20.84 M malloc and 20.84 M free operations/s
|
||||
ARAL executes 21.37 M malloc and 21.37 M free operations/s
|
||||
Waiting the threads to finish...
|
||||
2023-01-29 17:10:17: netdata INFO : MAIN : ARAL: did 103549747 malloc, 103549747 free, using 4 threads, in 5023325 usecs
|
||||
```
|
||||
|
||||
The same with 8 threads:
|
||||
|
||||
```
|
||||
Running stress test of 8 threads, with 10000 elements each, for 5 seconds...
|
||||
2023-01-29 17:07:06: netdata INFO : TH[0] : set name of thread 1317608 to TH[0]
|
||||
2023-01-29 17:07:06: netdata INFO : TH[1] : set name of thread 1317609 to TH[1]
|
||||
2023-01-29 17:07:06: netdata INFO : TH[2] : set name of thread 1317610 to TH[2]
|
||||
2023-01-29 17:07:06: netdata INFO : TH[3] : set name of thread 1317611 to TH[3]
|
||||
2023-01-29 17:07:06: netdata INFO : TH[4] : set name of thread 1317612 to TH[4]
|
||||
2023-01-29 17:07:06: netdata INFO : TH[5] : set name of thread 1317613 to TH[5]
|
||||
2023-01-29 17:07:06: netdata INFO : TH[6] : set name of thread 1317614 to TH[6]
|
||||
2023-01-29 17:07:06: netdata INFO : TH[7] : set name of thread 1317615 to TH[7]
|
||||
ARAL executes 15.73 M malloc and 15.66 M free operations/s
|
||||
ARAL executes 13.95 M malloc and 13.94 M free operations/s
|
||||
ARAL executes 15.59 M malloc and 15.58 M free operations/s
|
||||
ARAL executes 15.49 M malloc and 15.49 M free operations/s
|
||||
ARAL executes 16.16 M malloc and 16.16 M free operations/s
|
||||
Waiting the threads to finish...
|
||||
2023-01-29 17:07:11: netdata INFO : MAIN : ARAL: did 78427750 malloc, 78427750 free, using 8 threads, in 5088591 usecs
|
||||
```
|
||||
|
||||
The same with 16 threads:
|
||||
|
||||
```
|
||||
Running stress test of 16 threads, with 10000 elements each, for 5 seconds...
|
||||
2023-01-29 17:08:04: netdata INFO : TH[0] : set name of thread 1318663 to TH[0]
|
||||
2023-01-29 17:08:04: netdata INFO : TH[1] : set name of thread 1318664 to TH[1]
|
||||
2023-01-29 17:08:04: netdata INFO : TH[2] : set name of thread 1318665 to TH[2]
|
||||
2023-01-29 17:08:04: netdata INFO : TH[3] : set name of thread 1318666 to TH[3]
|
||||
2023-01-29 17:08:04: netdata INFO : TH[4] : set name of thread 1318667 to TH[4]
|
||||
2023-01-29 17:08:04: netdata INFO : TH[5] : set name of thread 1318668 to TH[5]
|
||||
2023-01-29 17:08:04: netdata INFO : TH[6] : set name of thread 1318669 to TH[6]
|
||||
2023-01-29 17:08:04: netdata INFO : TH[7] : set name of thread 1318670 to TH[7]
|
||||
2023-01-29 17:08:04: netdata INFO : TH[8] : set name of thread 1318671 to TH[8]
|
||||
2023-01-29 17:08:04: netdata INFO : TH[9] : set name of thread 1318672 to TH[9]
|
||||
2023-01-29 17:08:04: netdata INFO : TH[10] : set name of thread 1318673 to TH[10]
|
||||
2023-01-29 17:08:04: netdata INFO : TH[11] : set name of thread 1318674 to TH[11]
|
||||
2023-01-29 17:08:04: netdata INFO : TH[12] : set name of thread 1318675 to TH[12]
|
||||
2023-01-29 17:08:04: netdata INFO : TH[13] : set name of thread 1318676 to TH[13]
|
||||
2023-01-29 17:08:04: netdata INFO : TH[14] : set name of thread 1318677 to TH[14]
|
||||
2023-01-29 17:08:04: netdata INFO : TH[15] : set name of thread 1318678 to TH[15]
|
||||
ARAL executes 11.77 M malloc and 11.62 M free operations/s
|
||||
ARAL executes 12.80 M malloc and 12.81 M free operations/s
|
||||
ARAL executes 13.26 M malloc and 13.25 M free operations/s
|
||||
ARAL executes 13.30 M malloc and 13.29 M free operations/s
|
||||
ARAL executes 13.23 M malloc and 13.25 M free operations/s
|
||||
Waiting the threads to finish...
|
||||
2023-01-29 17:08:09: netdata INFO : MAIN : ARAL: did 65302122 malloc, 65302122 free, using 16 threads, in 5066009 usecs
|
||||
```
|
||||
|
||||
As you can see, the top performance is with 4 threads, almost double the single thread speed.
|
||||
16 threads performance is still better than single threaded, despite the intense concurrency.
|
|
@ -0,0 +1,918 @@
|
|||
#include "../libnetdata.h"
|
||||
#include "aral.h"
|
||||
|
||||
#ifdef NETDATA_TRACE_ALLOCATIONS
|
||||
#define TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAMS , const char *file, const char *function, size_t line
|
||||
#define TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS , file, function, line
|
||||
#else
|
||||
#define TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAMS
|
||||
#define TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS
|
||||
#endif
|
||||
|
||||
#define ARAL_FREE_PAGES_DELTA_TO_REARRANGE_LIST 5
|
||||
|
||||
// max file size
|
||||
#define ARAL_MAX_PAGE_SIZE_MMAP (1*1024*1024*1024)
|
||||
|
||||
// max malloc size
|
||||
// optimal at current versions of libc is up to 256k
|
||||
// ideal to have the same overhead as libc is 4k
|
||||
#define ARAL_MAX_PAGE_SIZE_MALLOC (65*1024)
|
||||
|
||||
typedef struct aral_free {
|
||||
size_t size;
|
||||
struct aral_free *next;
|
||||
} ARAL_FREE;
|
||||
|
||||
typedef struct aral_page {
|
||||
size_t size; // the allocation size of the page
|
||||
const char *filename;
|
||||
uint8_t *data;
|
||||
|
||||
uint32_t free_elements_to_move_first;
|
||||
uint32_t max_elements; // the number of elements that can fit on this page
|
||||
|
||||
struct {
|
||||
uint32_t used_elements; // the number of used elements on this page
|
||||
uint32_t free_elements; // the number of free elements on this page
|
||||
} aral_lock;
|
||||
|
||||
struct {
|
||||
SPINLOCK spinlock;
|
||||
ARAL_FREE *list;
|
||||
} free;
|
||||
|
||||
struct aral_page *prev; // the prev page on the list
|
||||
struct aral_page *next; // the next page on the list
|
||||
} ARAL_PAGE;
|
||||
|
||||
struct aral {
|
||||
struct {
|
||||
char name[ARAL_MAX_NAME + 1];
|
||||
|
||||
bool lockless;
|
||||
bool defragment;
|
||||
|
||||
size_t element_size; // calculated to take into account ARAL overheads
|
||||
size_t max_allocation_size; // calculated in bytes
|
||||
size_t page_ptr_offset; // calculated
|
||||
size_t natural_page_size; // calculated
|
||||
|
||||
size_t requested_element_size;
|
||||
size_t initial_page_elements;
|
||||
size_t max_page_elements;
|
||||
|
||||
struct {
|
||||
bool enabled;
|
||||
const char *filename;
|
||||
char **cache_dir;
|
||||
} mmap;
|
||||
} config;
|
||||
|
||||
struct {
|
||||
SPINLOCK spinlock;
|
||||
size_t file_number; // for mmap
|
||||
struct aral_page *pages; // linked list of pages
|
||||
|
||||
size_t user_malloc_operations;
|
||||
size_t user_free_operations;
|
||||
size_t defragment_operations;
|
||||
size_t defragment_linked_list_traversals;
|
||||
} aral_lock;
|
||||
|
||||
struct {
|
||||
SPINLOCK spinlock;
|
||||
size_t allocation_size; // current allocation size
|
||||
} adders;
|
||||
|
||||
struct {
|
||||
} atomic;
|
||||
};
|
||||
|
||||
struct {
|
||||
struct {
|
||||
struct {
|
||||
size_t allocations;
|
||||
size_t allocated;
|
||||
} structures;
|
||||
|
||||
struct {
|
||||
size_t allocations;
|
||||
size_t allocated;
|
||||
size_t used;
|
||||
} malloc;
|
||||
|
||||
struct {
|
||||
size_t allocations;
|
||||
size_t allocated;
|
||||
size_t used;
|
||||
} mmap;
|
||||
} atomic;
|
||||
} aral_globals = {};
|
||||
|
||||
void aral_get_size_statistics(size_t *structures, size_t *malloc_allocated, size_t *malloc_used, size_t *mmap_allocated, size_t *mmap_used) {
|
||||
*structures = __atomic_load_n(&aral_globals.atomic.structures.allocated, __ATOMIC_RELAXED);
|
||||
*malloc_allocated = __atomic_load_n(&aral_globals.atomic.malloc.allocated, __ATOMIC_RELAXED);
|
||||
*malloc_used = __atomic_load_n(&aral_globals.atomic.malloc.used, __ATOMIC_RELAXED);
|
||||
*mmap_allocated = __atomic_load_n(&aral_globals.atomic.mmap.allocated, __ATOMIC_RELAXED);
|
||||
*mmap_used = __atomic_load_n(&aral_globals.atomic.mmap.used, __ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
#define ARAL_NATURAL_ALIGNMENT (sizeof(uintptr_t) * 2)
|
||||
static inline size_t natural_alignment(size_t size, size_t alignment) {
|
||||
if(unlikely(size % alignment))
|
||||
size = size + alignment - (size % alignment);
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
static size_t aral_align_alloc_size(ARAL *ar, uint64_t size) {
|
||||
if(size % ar->config.natural_page_size)
|
||||
size += ar->config.natural_page_size - (size % ar->config.natural_page_size) ;
|
||||
|
||||
if(size % ar->config.element_size)
|
||||
size -= size % ar->config.element_size;
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
static inline void aral_lock(ARAL *ar) {
|
||||
if(likely(!ar->config.lockless))
|
||||
netdata_spinlock_lock(&ar->aral_lock.spinlock);
|
||||
}
|
||||
|
||||
static inline void aral_unlock(ARAL *ar) {
|
||||
if(likely(!ar->config.lockless))
|
||||
netdata_spinlock_unlock(&ar->aral_lock.spinlock);
|
||||
}
|
||||
|
||||
static void aral_delete_leftover_files(const char *name, const char *path, const char *required_prefix) {
|
||||
DIR *dir = opendir(path);
|
||||
if(!dir) return;
|
||||
|
||||
char full_path[FILENAME_MAX + 1];
|
||||
size_t len = strlen(required_prefix);
|
||||
|
||||
struct dirent *de = NULL;
|
||||
while((de = readdir(dir))) {
|
||||
if(de->d_type == DT_DIR)
|
||||
continue;
|
||||
|
||||
if(strncmp(de->d_name, required_prefix, len) != 0)
|
||||
continue;
|
||||
|
||||
snprintfz(full_path, FILENAME_MAX, "%s/%s", path, de->d_name);
|
||||
info("ARAL: '%s' removing left-over file '%s'", name, full_path);
|
||||
if(unlikely(unlink(full_path) == -1))
|
||||
error("ARAL: '%s' cannot delete file '%s'", name, full_path);
|
||||
}
|
||||
|
||||
closedir(dir);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// check a free slot
|
||||
|
||||
#ifdef NETDATA_INTERNAL_CHECKS
|
||||
static inline void aral_free_validate_internal_check(ARAL *ar, ARAL_FREE *fr) {
|
||||
if(unlikely(fr->size < ar->config.element_size))
|
||||
fatal("ARAL: '%s' free item of size %zu, less than the expected element size %zu",
|
||||
ar->config.name, fr->size, ar->config.element_size);
|
||||
|
||||
if(unlikely(fr->size % ar->config.element_size))
|
||||
fatal("ARAL: '%s' free item of size %zu is not multiple to element size %zu",
|
||||
ar->config.name, fr->size, ar->config.element_size);
|
||||
}
|
||||
#else
|
||||
#define aral_free_validate_internal_check(ar, fr) debug_dummy()
|
||||
#endif
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// find the page a pointer belongs to
|
||||
|
||||
#ifdef NETDATA_INTERNAL_CHECKS
|
||||
static inline ARAL_PAGE *find_page_with_allocation_internal_check(ARAL *ar, void *ptr) {
|
||||
aral_lock(ar);
|
||||
|
||||
uintptr_t seeking = (uintptr_t)ptr;
|
||||
ARAL_PAGE *page;
|
||||
|
||||
for(page = ar->aral_lock.pages; page ; page = page->next) {
|
||||
if(unlikely(seeking >= (uintptr_t)page->data && seeking < (uintptr_t)page->data + page->size))
|
||||
break;
|
||||
}
|
||||
|
||||
aral_unlock(ar);
|
||||
|
||||
return page;
|
||||
}
|
||||
#endif
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// find a page with a free slot (there shouldn't be any)
|
||||
|
||||
#ifdef NETDATA_ARAL_INTERNAL_CHECKS
|
||||
static inline ARAL_PAGE *find_page_with_free_slots_internal_check___with_aral_lock(ARAL *ar) {
|
||||
ARAL_PAGE *page;
|
||||
|
||||
for(page = ar->aral_lock.pages; page ; page = page->next) {
|
||||
if(page->aral_lock.free_elements)
|
||||
break;
|
||||
|
||||
internal_fatal(page->size - page->aral_lock.used_elements * ar->config.element_size >= ar->config.element_size,
|
||||
"ARAL: '%s' a page is marked full, but it is not!", ar->config.name);
|
||||
|
||||
internal_fatal(page->size < page->aral_lock.used_elements * ar->config.element_size,
|
||||
"ARAL: '%s' a page has been overflown!", ar->config.name);
|
||||
}
|
||||
|
||||
return page;
|
||||
}
|
||||
#endif
|
||||
|
||||
static ARAL_PAGE *aral_create_page___no_lock_needed(ARAL *ar TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAMS) {
|
||||
ARAL_PAGE *page = callocz(1, sizeof(ARAL_PAGE));
|
||||
netdata_spinlock_init(&page->free.spinlock);
|
||||
page->size = ar->adders.allocation_size;
|
||||
|
||||
if(page->size > ar->config.max_allocation_size)
|
||||
page->size = ar->config.max_allocation_size;
|
||||
else
|
||||
ar->adders.allocation_size = aral_align_alloc_size(ar, (uint64_t)ar->adders.allocation_size * 4 / 3);
|
||||
|
||||
page->max_elements = page->aral_lock.free_elements = page->size / ar->config.element_size;
|
||||
page->free_elements_to_move_first = page->max_elements / 4;
|
||||
if(unlikely(page->free_elements_to_move_first < 1))
|
||||
page->free_elements_to_move_first = 1;
|
||||
|
||||
__atomic_add_fetch(&aral_globals.atomic.structures.allocations, 1, __ATOMIC_RELAXED);
|
||||
__atomic_add_fetch(&aral_globals.atomic.structures.allocated, sizeof(ARAL_PAGE), __ATOMIC_RELAXED);
|
||||
|
||||
if(unlikely(ar->config.mmap.enabled)) {
|
||||
ar->aral_lock.file_number++;
|
||||
char filename[FILENAME_MAX + 1];
|
||||
snprintfz(filename, FILENAME_MAX, "%s/array_alloc.mmap/%s.%zu", *ar->config.mmap.cache_dir, ar->config.mmap.filename, ar->aral_lock.file_number);
|
||||
page->filename = strdupz(filename);
|
||||
page->data = netdata_mmap(page->filename, page->size, MAP_SHARED, 0, false, NULL);
|
||||
if (unlikely(!page->data))
|
||||
fatal("ARAL: '%s' cannot allocate aral buffer of size %zu on filename '%s'",
|
||||
ar->config.name, page->size, page->filename);
|
||||
__atomic_add_fetch(&aral_globals.atomic.mmap.allocations, 1, __ATOMIC_RELAXED);
|
||||
__atomic_add_fetch(&aral_globals.atomic.mmap.allocated, page->size, __ATOMIC_RELAXED);
|
||||
}
|
||||
else {
|
||||
#ifdef NETDATA_TRACE_ALLOCATIONS
|
||||
page->data = mallocz_int(page->size TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS);
|
||||
#else
|
||||
page->data = mallocz(page->size);
|
||||
#endif
|
||||
__atomic_add_fetch(&aral_globals.atomic.malloc.allocations, 1, __ATOMIC_RELAXED);
|
||||
__atomic_add_fetch(&aral_globals.atomic.malloc.allocated, page->size, __ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
// link the free space to its page
|
||||
ARAL_FREE *fr = (ARAL_FREE *)page->data;
|
||||
fr->size = page->size;
|
||||
fr->next = NULL;
|
||||
page->free.list = fr;
|
||||
|
||||
aral_free_validate_internal_check(ar, fr);
|
||||
|
||||
return page;
|
||||
}
|
||||
|
||||
void aral_del_page___no_lock_needed(ARAL *ar, ARAL_PAGE *page TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAMS) {
|
||||
|
||||
// free it
|
||||
if (ar->config.mmap.enabled) {
|
||||
netdata_munmap(page->data, page->size);
|
||||
|
||||
if (unlikely(unlink(page->filename) == 1))
|
||||
error("Cannot delete file '%s'", page->filename);
|
||||
|
||||
freez((void *)page->filename);
|
||||
|
||||
__atomic_sub_fetch(&aral_globals.atomic.mmap.allocations, 1, __ATOMIC_RELAXED);
|
||||
__atomic_sub_fetch(&aral_globals.atomic.mmap.allocated, page->size, __ATOMIC_RELAXED);
|
||||
}
|
||||
else {
|
||||
#ifdef NETDATA_TRACE_ALLOCATIONS
|
||||
freez_int(page->data TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS);
|
||||
#else
|
||||
freez(page->data);
|
||||
#endif
|
||||
__atomic_sub_fetch(&aral_globals.atomic.malloc.allocations, 1, __ATOMIC_RELAXED);
|
||||
__atomic_sub_fetch(&aral_globals.atomic.malloc.allocated, page->size, __ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
freez(page);
|
||||
|
||||
__atomic_sub_fetch(&aral_globals.atomic.structures.allocations, 1, __ATOMIC_RELAXED);
|
||||
__atomic_sub_fetch(&aral_globals.atomic.structures.allocated, sizeof(ARAL_PAGE), __ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
static inline void aral_insert_not_linked_page_with_free_items_to_proper_position___aral_lock_needed(ARAL *ar, ARAL_PAGE *page) {
|
||||
ARAL_PAGE *first = ar->aral_lock.pages;
|
||||
|
||||
if (page->aral_lock.free_elements <= page->free_elements_to_move_first ||
|
||||
!first ||
|
||||
!first->aral_lock.free_elements ||
|
||||
page->aral_lock.free_elements <= first->aral_lock.free_elements + ARAL_FREE_PAGES_DELTA_TO_REARRANGE_LIST) {
|
||||
// first position
|
||||
DOUBLE_LINKED_LIST_PREPEND_ITEM_UNSAFE(ar->aral_lock.pages, page, prev, next);
|
||||
}
|
||||
else {
|
||||
ARAL_PAGE *second = first->next;
|
||||
|
||||
if (!second ||
|
||||
!second->aral_lock.free_elements ||
|
||||
page->aral_lock.free_elements <= second->aral_lock.free_elements)
|
||||
// second position
|
||||
DOUBLE_LINKED_LIST_INSERT_ITEM_AFTER_UNSAFE(ar->aral_lock.pages, first, page, prev, next);
|
||||
else
|
||||
// third position
|
||||
DOUBLE_LINKED_LIST_INSERT_ITEM_AFTER_UNSAFE(ar->aral_lock.pages, second, page, prev, next);
|
||||
}
|
||||
}
|
||||
|
||||
static inline ARAL_PAGE *aral_acquire_a_free_slot(ARAL *ar TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAMS) {
|
||||
aral_lock(ar);
|
||||
|
||||
ARAL_PAGE *page = ar->aral_lock.pages;
|
||||
|
||||
while(!page || !page->aral_lock.free_elements) {
|
||||
#ifdef NETDATA_ARAL_INTERNAL_CHECKS
|
||||
internal_fatal(find_page_with_free_slots_internal_check___with_aral_lock(ar), "ARAL: '%s' found page with free slot!", ar->config.name);
|
||||
#endif
|
||||
aral_unlock(ar);
|
||||
|
||||
if(netdata_spinlock_trylock(&ar->adders.spinlock)) {
|
||||
page = aral_create_page___no_lock_needed(ar TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS);
|
||||
|
||||
aral_lock(ar);
|
||||
aral_insert_not_linked_page_with_free_items_to_proper_position___aral_lock_needed(ar, page);
|
||||
netdata_spinlock_unlock(&ar->adders.spinlock);
|
||||
break;
|
||||
}
|
||||
else {
|
||||
aral_lock(ar);
|
||||
page = ar->aral_lock.pages;
|
||||
}
|
||||
}
|
||||
|
||||
// we have a page
|
||||
// and aral locked
|
||||
|
||||
{
|
||||
ARAL_PAGE *first = ar->aral_lock.pages;
|
||||
ARAL_PAGE *second = first->next;
|
||||
|
||||
if (!second ||
|
||||
!second->aral_lock.free_elements ||
|
||||
first->aral_lock.free_elements <= second->aral_lock.free_elements + ARAL_FREE_PAGES_DELTA_TO_REARRANGE_LIST)
|
||||
page = first;
|
||||
else {
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, second, prev, next);
|
||||
DOUBLE_LINKED_LIST_PREPEND_ITEM_UNSAFE(ar->aral_lock.pages, second, prev, next);
|
||||
page = second;
|
||||
}
|
||||
}
|
||||
|
||||
internal_fatal(!page || !page->aral_lock.free_elements,
|
||||
"ARAL: '%s' selected page does not have a free slot in it",
|
||||
ar->config.name);
|
||||
|
||||
internal_fatal(page->max_elements != page->aral_lock.used_elements + page->aral_lock.free_elements,
|
||||
"ARAL: '%s' page element counters do not match, "
|
||||
"page says it can handle %zu elements, "
|
||||
"but there are %zu used and %zu free items, "
|
||||
"total %zu items",
|
||||
ar->config.name,
|
||||
(size_t)page->max_elements,
|
||||
(size_t)page->aral_lock.used_elements, (size_t)page->aral_lock.free_elements,
|
||||
(size_t)page->aral_lock.used_elements + (size_t)page->aral_lock.free_elements
|
||||
);
|
||||
|
||||
ar->aral_lock.user_malloc_operations++;
|
||||
|
||||
// acquire a slot for the caller
|
||||
page->aral_lock.used_elements++;
|
||||
if(--page->aral_lock.free_elements == 0) {
|
||||
// we are done with this page
|
||||
// move the full page last
|
||||
// so that pages with free items remain first in the list
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, prev, next);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(ar->aral_lock.pages, page, prev, next);
|
||||
}
|
||||
|
||||
aral_unlock(ar);
|
||||
|
||||
return page;
|
||||
}
|
||||
|
||||
void *aral_mallocz_internal(ARAL *ar TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAMS) {
|
||||
|
||||
ARAL_PAGE *page = aral_acquire_a_free_slot(ar TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS);
|
||||
|
||||
netdata_spinlock_lock(&page->free.spinlock);
|
||||
|
||||
internal_fatal(!page->free.list,
|
||||
"ARAL: '%s' free item to use, cannot be NULL.", ar->config.name);
|
||||
|
||||
internal_fatal(page->free.list->size < ar->config.element_size,
|
||||
"ARAL: '%s' free item size %zu, cannot be smaller than %zu",
|
||||
ar->config.name, page->free.list->size, ar->config.element_size);
|
||||
|
||||
ARAL_FREE *found_fr = page->free.list;
|
||||
|
||||
// check if the remaining size (after we use this slot) is not enough for another element
|
||||
if(unlikely(found_fr->size - ar->config.element_size < ar->config.element_size)) {
|
||||
// we can use the entire free space entry
|
||||
|
||||
page->free.list = found_fr->next;
|
||||
}
|
||||
else {
|
||||
// we can split the free space entry
|
||||
|
||||
uint8_t *data = (uint8_t *)found_fr;
|
||||
ARAL_FREE *fr = (ARAL_FREE *)&data[ar->config.element_size];
|
||||
fr->size = found_fr->size - ar->config.element_size;
|
||||
|
||||
// link the free slot first in the page
|
||||
fr->next = found_fr->next;
|
||||
page->free.list = fr;
|
||||
|
||||
aral_free_validate_internal_check(ar, fr);
|
||||
}
|
||||
|
||||
netdata_spinlock_unlock(&page->free.spinlock);
|
||||
|
||||
// put the page pointer after the element
|
||||
uint8_t *data = (uint8_t *)found_fr;
|
||||
ARAL_PAGE **page_ptr = (ARAL_PAGE **)&data[ar->config.page_ptr_offset];
|
||||
*page_ptr = page;
|
||||
|
||||
if(unlikely(ar->config.mmap.enabled))
|
||||
__atomic_add_fetch(&aral_globals.atomic.mmap.used, ar->config.element_size, __ATOMIC_RELAXED);
|
||||
else
|
||||
__atomic_add_fetch(&aral_globals.atomic.malloc.used, ar->config.element_size, __ATOMIC_RELAXED);
|
||||
|
||||
return (void *)found_fr;
|
||||
}
|
||||
|
||||
static inline ARAL_PAGE *aral_ptr_to_page___must_NOT_have_aral_lock(ARAL *ar, void *ptr) {
|
||||
// given a data pointer we returned before,
|
||||
// find the ARAL_PAGE it belongs to
|
||||
|
||||
uint8_t *data = (uint8_t *)ptr;
|
||||
ARAL_PAGE **page_ptr = (ARAL_PAGE **)&data[ar->config.page_ptr_offset];
|
||||
ARAL_PAGE *page = *page_ptr;
|
||||
|
||||
#ifdef NETDATA_INTERNAL_CHECKS
|
||||
// make it NULL so that we will fail on double free
|
||||
// do not enable this on production, because the MMAP file
|
||||
// will need to be saved again!
|
||||
*page_ptr = NULL;
|
||||
#endif
|
||||
|
||||
#ifdef NETDATA_ARAL_INTERNAL_CHECKS
|
||||
{
|
||||
// find the page ptr belongs
|
||||
ARAL_PAGE *page2 = find_page_with_allocation_internal_check(ar, ptr);
|
||||
|
||||
internal_fatal(page != page2,
|
||||
"ARAL: '%s' page pointers do not match!",
|
||||
ar->name);
|
||||
|
||||
internal_fatal(!page2,
|
||||
"ARAL: '%s' free of pointer %p is not in ARAL address space.",
|
||||
ar->name, ptr);
|
||||
}
|
||||
#endif
|
||||
|
||||
internal_fatal(!page,
|
||||
"ARAL: '%s' possible corruption or double free of pointer %p",
|
||||
ar->config.name, ptr);
|
||||
|
||||
return page;
|
||||
}
|
||||
|
||||
static void aral_defrag_sorted_page_position___aral_lock_needed(ARAL *ar, ARAL_PAGE *page) {
|
||||
ARAL_PAGE *tmp;
|
||||
|
||||
int action = 0; (void)action;
|
||||
size_t move_later = 0, move_earlier = 0;
|
||||
|
||||
for(tmp = page->next ;
|
||||
tmp && tmp->aral_lock.free_elements && tmp->aral_lock.free_elements < page->aral_lock.free_elements ;
|
||||
tmp = tmp->next)
|
||||
move_later++;
|
||||
|
||||
if(!tmp && page->next) {
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, prev, next);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(ar->aral_lock.pages, page, prev, next);
|
||||
action = 1;
|
||||
}
|
||||
else if(tmp != page->next) {
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, prev, next);
|
||||
DOUBLE_LINKED_LIST_INSERT_ITEM_BEFORE_UNSAFE(ar->aral_lock.pages, tmp, page, prev, next);
|
||||
action = 2;
|
||||
}
|
||||
else {
|
||||
for(tmp = (page == ar->aral_lock.pages) ? NULL : page->prev ;
|
||||
tmp && (!tmp->aral_lock.free_elements || tmp->aral_lock.free_elements > page->aral_lock.free_elements);
|
||||
tmp = (tmp == ar->aral_lock.pages) ? NULL : tmp->prev)
|
||||
move_earlier++;
|
||||
|
||||
if(!tmp) {
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, prev, next);
|
||||
DOUBLE_LINKED_LIST_PREPEND_ITEM_UNSAFE(ar->aral_lock.pages, page, prev, next);
|
||||
action = 3;
|
||||
}
|
||||
else if(tmp != page->prev){
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, prev, next);
|
||||
DOUBLE_LINKED_LIST_INSERT_ITEM_AFTER_UNSAFE(ar->aral_lock.pages, tmp, page, prev, next);
|
||||
action = 4;
|
||||
}
|
||||
}
|
||||
|
||||
ar->aral_lock.defragment_operations++;
|
||||
ar->aral_lock.defragment_linked_list_traversals += move_earlier + move_later;
|
||||
|
||||
internal_fatal(page->next && page->next->aral_lock.free_elements && page->next->aral_lock.free_elements < page->aral_lock.free_elements,
|
||||
"ARAL: '%s' item should be later in the list", ar->config.name);
|
||||
|
||||
internal_fatal(page != ar->aral_lock.pages && (!page->prev->aral_lock.free_elements || page->prev->aral_lock.free_elements > page->aral_lock.free_elements),
|
||||
"ARAL: '%s' item should be earlier in the list", ar->config.name);
|
||||
}
|
||||
|
||||
static inline void aral_move_page_with_free_list___aral_lock_needed(ARAL *ar, ARAL_PAGE *page) {
|
||||
if(unlikely(page == ar->aral_lock.pages))
|
||||
// we are the first already
|
||||
return;
|
||||
|
||||
if(likely(!ar->config.defragment)) {
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, prev, next);
|
||||
aral_insert_not_linked_page_with_free_items_to_proper_position___aral_lock_needed(ar, page);
|
||||
}
|
||||
else
|
||||
aral_defrag_sorted_page_position___aral_lock_needed(ar, page);
|
||||
}
|
||||
|
||||
void aral_freez_internal(ARAL *ar, void *ptr TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAMS) {
|
||||
if(unlikely(!ptr)) return;
|
||||
|
||||
// get the page pointer
|
||||
ARAL_PAGE *page = aral_ptr_to_page___must_NOT_have_aral_lock(ar, ptr);
|
||||
|
||||
if(unlikely(ar->config.mmap.enabled))
|
||||
__atomic_sub_fetch(&aral_globals.atomic.mmap.used, ar->config.element_size, __ATOMIC_RELAXED);
|
||||
else
|
||||
__atomic_sub_fetch(&aral_globals.atomic.malloc.used, ar->config.element_size, __ATOMIC_RELAXED);
|
||||
|
||||
// make this element available
|
||||
ARAL_FREE *fr = (ARAL_FREE *)ptr;
|
||||
fr->size = ar->config.element_size;
|
||||
|
||||
netdata_spinlock_lock(&page->free.spinlock);
|
||||
fr->next = page->free.list;
|
||||
page->free.list = fr;
|
||||
netdata_spinlock_unlock(&page->free.spinlock);
|
||||
|
||||
aral_lock(ar);
|
||||
|
||||
internal_fatal(!page->aral_lock.used_elements,
|
||||
"ARAL: '%s' pointer %p is inside a page without any active allocations.",
|
||||
ar->config.name, ptr);
|
||||
|
||||
internal_fatal(page->max_elements != page->aral_lock.used_elements + page->aral_lock.free_elements,
|
||||
"ARAL: '%s' page element counters do not match, "
|
||||
"page says it can handle %zu elements, "
|
||||
"but there are %zu used and %zu free items, "
|
||||
"total %zu items",
|
||||
ar->config.name,
|
||||
(size_t)page->max_elements,
|
||||
(size_t)page->aral_lock.used_elements, (size_t)page->aral_lock.free_elements,
|
||||
(size_t)page->aral_lock.used_elements + (size_t)page->aral_lock.free_elements
|
||||
);
|
||||
|
||||
page->aral_lock.used_elements--;
|
||||
page->aral_lock.free_elements++;
|
||||
|
||||
ar->aral_lock.user_free_operations++;
|
||||
|
||||
// if the page is empty, release it
|
||||
if(unlikely(!page->aral_lock.used_elements)) {
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, prev, next);
|
||||
aral_unlock(ar);
|
||||
aral_del_page___no_lock_needed(ar, page TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS);
|
||||
}
|
||||
else {
|
||||
aral_move_page_with_free_list___aral_lock_needed(ar, page);
|
||||
aral_unlock(ar);
|
||||
}
|
||||
}
|
||||
|
||||
void aral_destroy_internal(ARAL *ar TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAMS) {
|
||||
aral_lock(ar);
|
||||
|
||||
ARAL_PAGE *page;
|
||||
while((page = ar->aral_lock.pages)) {
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, prev, next);
|
||||
aral_del_page___no_lock_needed(ar, page TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS);
|
||||
}
|
||||
|
||||
aral_unlock(ar);
|
||||
freez(ar);
|
||||
}
|
||||
|
||||
ARAL *aral_create(const char *name, size_t element_size, size_t initial_page_elements, size_t max_page_elements, const char *filename, char **cache_dir, bool mmap, bool lockless) {
|
||||
ARAL *ar = callocz(1, sizeof(ARAL));
|
||||
ar->config.requested_element_size = element_size;
|
||||
ar->config.initial_page_elements = initial_page_elements;
|
||||
ar->config.max_page_elements = max_page_elements;
|
||||
ar->config.mmap.filename = filename;
|
||||
ar->config.mmap.cache_dir = cache_dir;
|
||||
ar->config.mmap.enabled = mmap;
|
||||
ar->config.lockless = lockless;
|
||||
ar->config.defragment = false;
|
||||
strncpyz(ar->config.name, name, ARAL_MAX_NAME);
|
||||
netdata_spinlock_init(&ar->aral_lock.spinlock);
|
||||
|
||||
long int page_size = sysconf(_SC_PAGE_SIZE);
|
||||
if (unlikely(page_size == -1))
|
||||
ar->config.natural_page_size = 4096;
|
||||
else
|
||||
ar->config.natural_page_size = page_size;
|
||||
|
||||
// we need to add a page pointer after the element
|
||||
// so, first align the element size to the pointer size
|
||||
ar->config.element_size = natural_alignment(ar->config.requested_element_size, sizeof(uintptr_t));
|
||||
|
||||
// then add the size of a pointer to it
|
||||
ar->config.element_size += sizeof(uintptr_t);
|
||||
|
||||
// make sure it is at least what we need for an ARAL_FREE slot
|
||||
if (ar->config.element_size < sizeof(ARAL_FREE))
|
||||
ar->config.element_size = sizeof(ARAL_FREE);
|
||||
|
||||
// and finally align it to the natural alignment
|
||||
ar->config.element_size = natural_alignment(ar->config.element_size, ARAL_NATURAL_ALIGNMENT);
|
||||
|
||||
// we write the page pointer just after each element
|
||||
ar->config.page_ptr_offset = ar->config.element_size - sizeof(uintptr_t);
|
||||
|
||||
if(ar->config.requested_element_size + sizeof(uintptr_t) > ar->config.element_size)
|
||||
fatal("ARAL: '%s' failed to calculate properly page_ptr_offset: "
|
||||
"element size %zu, sizeof(uintptr_t) %zu, natural alignment %zu, "
|
||||
"final element size %zu, page_ptr_offset %zu",
|
||||
ar->config.name, ar->config.requested_element_size, sizeof(uintptr_t), ARAL_NATURAL_ALIGNMENT,
|
||||
ar->config.element_size, ar->config.page_ptr_offset);
|
||||
|
||||
//info("ARAL: element size %zu, sizeof(uintptr_t) %zu, natural alignment %zu, final element size %zu, page_ptr_offset %zu",
|
||||
// ar->element_size, sizeof(uintptr_t), ARAL_NATURAL_ALIGNMENT, ar->internal.element_size, ar->internal.page_ptr_offset);
|
||||
|
||||
|
||||
if (ar->config.initial_page_elements < 2)
|
||||
ar->config.initial_page_elements = 2;
|
||||
|
||||
if(ar->config.mmap.enabled && (!ar->config.mmap.cache_dir || !*ar->config.mmap.cache_dir)) {
|
||||
error("ARAL: '%s' mmap cache directory is not configured properly, disabling mmap.", ar->config.name);
|
||||
ar->config.mmap.enabled = false;
|
||||
internal_fatal(true, "ARAL: '%s' mmap cache directory is not configured properly", ar->config.name);
|
||||
}
|
||||
|
||||
uint64_t max_alloc_size;
|
||||
if(!ar->config.max_page_elements)
|
||||
max_alloc_size = ar->config.mmap.enabled ? ARAL_MAX_PAGE_SIZE_MMAP : ARAL_MAX_PAGE_SIZE_MALLOC;
|
||||
else
|
||||
max_alloc_size = ar->config.max_page_elements * ar->config.element_size;
|
||||
|
||||
ar->config.max_allocation_size = aral_align_alloc_size(ar, max_alloc_size);
|
||||
ar->adders.allocation_size = aral_align_alloc_size(ar, (uint64_t)ar->config.element_size * ar->config.initial_page_elements);
|
||||
ar->aral_lock.pages = NULL;
|
||||
ar->aral_lock.file_number = 0;
|
||||
|
||||
if(ar->config.mmap.enabled) {
|
||||
char directory_name[FILENAME_MAX + 1];
|
||||
snprintfz(directory_name, FILENAME_MAX, "%s/array_alloc.mmap", *ar->config.mmap.cache_dir);
|
||||
int r = mkdir(directory_name, 0775);
|
||||
if (r != 0 && errno != EEXIST)
|
||||
fatal("Cannot create directory '%s'", directory_name);
|
||||
|
||||
char file[FILENAME_MAX + 1];
|
||||
snprintfz(file, FILENAME_MAX, "%s.", ar->config.mmap.filename);
|
||||
aral_delete_leftover_files(ar->config.name, directory_name, file);
|
||||
}
|
||||
|
||||
internal_error(true,
|
||||
"ARAL: '%s' "
|
||||
"element size %zu (requested %zu bytes), "
|
||||
"min elements per page %zu (requested %zu), "
|
||||
"max elements per page %zu (requested %zu), "
|
||||
"max page size %zu bytes, "
|
||||
, ar->config.name
|
||||
, ar->config.element_size, ar->config.requested_element_size
|
||||
, ar->adders.allocation_size / ar->config.element_size, ar->config.initial_page_elements
|
||||
, ar->config.max_allocation_size / ar->config.element_size, ar->config.max_page_elements
|
||||
, ar->config.max_allocation_size
|
||||
);
|
||||
|
||||
__atomic_add_fetch(&aral_globals.atomic.structures.allocations, 1, __ATOMIC_RELAXED);
|
||||
__atomic_add_fetch(&aral_globals.atomic.structures.allocated, sizeof(ARAL), __ATOMIC_RELAXED);
|
||||
return ar;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// unittest
|
||||
|
||||
struct aral_unittest_config {
|
||||
bool single_threaded;
|
||||
bool stop;
|
||||
ARAL *ar;
|
||||
size_t elements;
|
||||
size_t threads;
|
||||
int errors;
|
||||
};
|
||||
|
||||
static void *aral_test_thread(void *ptr) {
|
||||
struct aral_unittest_config *auc = ptr;
|
||||
ARAL *ar = auc->ar;
|
||||
size_t elements = auc->elements;
|
||||
|
||||
void **pointers = callocz(elements, sizeof(void *));
|
||||
|
||||
do {
|
||||
for (size_t i = 0; i < elements; i++) {
|
||||
pointers[i] = aral_mallocz(ar);
|
||||
}
|
||||
|
||||
for (size_t div = 5; div >= 2; div--) {
|
||||
for (size_t i = 0; i < elements / div; i++) {
|
||||
aral_freez(ar, pointers[i]);
|
||||
pointers[i] = NULL;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < elements / div; i++) {
|
||||
pointers[i] = aral_mallocz(ar);
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t step = 50; step >= 10; step -= 10) {
|
||||
for (size_t i = 0; i < elements; i += step) {
|
||||
aral_freez(ar, pointers[i]);
|
||||
pointers[i] = NULL;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < elements; i += step) {
|
||||
pointers[i] = aral_mallocz(ar);
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < elements; i++) {
|
||||
aral_freez(ar, pointers[i]);
|
||||
pointers[i] = NULL;
|
||||
}
|
||||
|
||||
if (auc->single_threaded && ar->aral_lock.pages) {
|
||||
fprintf(stderr, "\n\nARAL leftovers detected (1)\n\n");
|
||||
__atomic_add_fetch(&auc->errors, 1, __ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
if(!auc->single_threaded && __atomic_load_n(&auc->stop, __ATOMIC_RELAXED))
|
||||
break;
|
||||
|
||||
for (size_t i = 0; i < elements; i++) {
|
||||
pointers[i] = aral_mallocz(ar);
|
||||
}
|
||||
|
||||
size_t increment = elements / ar->config.max_page_elements;
|
||||
for (size_t all = increment; all <= elements / 2; all += increment) {
|
||||
|
||||
size_t to_free = all % ar->config.max_page_elements;
|
||||
size_t step = elements / to_free;
|
||||
if(!step) step = 1;
|
||||
|
||||
// fprintf(stderr, "all %zu, to free %zu, step %zu\n", all, to_free, step);
|
||||
|
||||
size_t free_list[to_free];
|
||||
for (size_t i = 0; i < to_free; i++) {
|
||||
size_t pos = step * i;
|
||||
aral_freez(ar, pointers[pos]);
|
||||
pointers[pos] = NULL;
|
||||
free_list[i] = pos;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < to_free; i++) {
|
||||
size_t pos = free_list[i];
|
||||
pointers[pos] = aral_mallocz(ar);
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < elements; i++) {
|
||||
aral_freez(ar, pointers[i]);
|
||||
pointers[i] = NULL;
|
||||
}
|
||||
|
||||
if (auc->single_threaded && ar->aral_lock.pages) {
|
||||
fprintf(stderr, "\n\nARAL leftovers detected (2)\n\n");
|
||||
__atomic_add_fetch(&auc->errors, 1, __ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
} while(!auc->single_threaded && !__atomic_load_n(&auc->stop, __ATOMIC_RELAXED));
|
||||
|
||||
freez(pointers);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
int aral_stress_test(size_t threads, size_t elements, size_t seconds) {
|
||||
fprintf(stderr, "Running stress test of %zu threads, with %zu elements each, for %zu seconds...\n",
|
||||
threads, elements, seconds);
|
||||
|
||||
memset(&aral_globals, 0, sizeof(aral_globals));
|
||||
|
||||
struct aral_unittest_config auc = {
|
||||
.single_threaded = false,
|
||||
.threads = threads,
|
||||
.ar = aral_create("aral-test", 20, 10, 1024, "test-aral", NULL, false, false),
|
||||
.elements = elements,
|
||||
.errors = 0,
|
||||
};
|
||||
|
||||
usec_t started_ut = now_monotonic_usec();
|
||||
netdata_thread_t thread_ptrs[threads];
|
||||
|
||||
for(size_t i = 0; i < threads ; i++) {
|
||||
char tag[NETDATA_THREAD_NAME_MAX + 1];
|
||||
snprintfz(tag, NETDATA_THREAD_NAME_MAX, "TH[%zu]", i);
|
||||
netdata_thread_create(&thread_ptrs[i], tag,
|
||||
NETDATA_THREAD_OPTION_JOINABLE | NETDATA_THREAD_OPTION_DONT_LOG,
|
||||
aral_test_thread, &auc);
|
||||
}
|
||||
|
||||
size_t malloc_done = 0;
|
||||
size_t free_done = 0;
|
||||
size_t countdown = seconds;
|
||||
while(countdown-- > 0) {
|
||||
sleep_usec(1 * USEC_PER_SEC);
|
||||
aral_lock(auc.ar);
|
||||
size_t m = auc.ar->aral_lock.user_malloc_operations;
|
||||
size_t f = auc.ar->aral_lock.user_free_operations;
|
||||
aral_unlock(auc.ar);
|
||||
fprintf(stderr, "ARAL executes %0.2f M malloc and %0.2f M free operations/s\n",
|
||||
(double)(m - malloc_done) / 1000000.0, (double)(f - free_done) / 1000000.0);
|
||||
malloc_done = m;
|
||||
free_done = f;
|
||||
}
|
||||
|
||||
__atomic_store_n(&auc.stop, true, __ATOMIC_RELAXED);
|
||||
|
||||
// fprintf(stderr, "Cancelling the threads...\n");
|
||||
// for(size_t i = 0; i < threads ; i++) {
|
||||
// netdata_thread_cancel(thread_ptrs[i]);
|
||||
// }
|
||||
|
||||
fprintf(stderr, "Waiting the threads to finish...\n");
|
||||
for(size_t i = 0; i < threads ; i++) {
|
||||
netdata_thread_join(thread_ptrs[i], NULL);
|
||||
}
|
||||
|
||||
usec_t ended_ut = now_monotonic_usec();
|
||||
|
||||
if (auc.ar->aral_lock.pages) {
|
||||
fprintf(stderr, "\n\nARAL leftovers detected (3)\n\n");
|
||||
__atomic_add_fetch(&auc.errors, 1, __ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
info("ARAL: did %zu malloc, %zu free, "
|
||||
"using %zu threads, in %llu usecs",
|
||||
auc.ar->aral_lock.user_malloc_operations,
|
||||
auc.ar->aral_lock.user_free_operations,
|
||||
threads,
|
||||
ended_ut - started_ut);
|
||||
|
||||
aral_destroy(auc.ar);
|
||||
|
||||
return auc.errors;
|
||||
}
|
||||
|
||||
int aral_unittest(size_t elements) {
|
||||
char *cache_dir = "/tmp/";
|
||||
|
||||
struct aral_unittest_config auc = {
|
||||
.single_threaded = true,
|
||||
.threads = 1,
|
||||
.ar = aral_create("aral-test", 20, 10, 1024, "test-aral", &cache_dir, false, false),
|
||||
.elements = elements,
|
||||
.errors = 0,
|
||||
};
|
||||
|
||||
aral_test_thread(&auc);
|
||||
|
||||
aral_destroy(auc.ar);
|
||||
|
||||
int errors = aral_stress_test(2, elements, 5);
|
||||
|
||||
return auc.errors + errors;
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
|
||||
#ifndef ARAL_H
|
||||
#define ARAL_H 1
|
||||
|
||||
#include "../libnetdata.h"
|
||||
|
||||
#define ARAL_MAX_NAME 23
|
||||
|
||||
typedef struct aral ARAL;
|
||||
|
||||
ARAL *aral_create(const char *name, size_t element_size, size_t initial_page_elements, size_t max_page_elements, const char *filename, char **cache_dir, bool mmap, bool lockless);
|
||||
int aral_unittest(size_t elements);
|
||||
void aral_get_size_statistics(size_t *structures, size_t *malloc_allocated, size_t *malloc_used, size_t *mmap_allocated, size_t *mmap_used);
|
||||
|
||||
#ifdef NETDATA_TRACE_ALLOCATIONS
|
||||
|
||||
#define aral_mallocz(ar) aral_mallocz_internal(ar, __FILE__, __FUNCTION__, __LINE__)
|
||||
#define aral_freez(ar, ptr) aral_freez_internal(ar, ptr, __FILE__, __FUNCTION__, __LINE__)
|
||||
#define aral_destroy(ar) aral_destroy_internal(ar, __FILE__, __FUNCTION__, __LINE__)
|
||||
|
||||
void *aral_mallocz_internal(ARAL *ar, const char *file, const char *function, size_t line);
|
||||
void aral_freez_internal(ARAL *ar, void *ptr, const char *file, const char *function, size_t line);
|
||||
void aral_destroy_internal(ARAL *ar, const char *file, const char *function, size_t line);
|
||||
|
||||
#else // NETDATA_TRACE_ALLOCATIONS
|
||||
|
||||
#define aral_mallocz(ar) aral_mallocz_internal(ar)
|
||||
#define aral_freez(ar, ptr) aral_freez_internal(ar, ptr)
|
||||
#define aral_destroy(ar) aral_destroy_internal(ar)
|
||||
|
||||
void *aral_mallocz_internal(ARAL *ar);
|
||||
void aral_freez_internal(ARAL *ar, void *ptr);
|
||||
void aral_destroy_internal(ARAL *ar);
|
||||
|
||||
#endif // NETDATA_TRACE_ALLOCATIONS
|
||||
|
||||
#endif // ARAL_H
|
|
@ -1,7 +0,0 @@
|
|||
<!--
|
||||
title: "Array Allocator"
|
||||
custom_edit_url: https://github.com/netdata/netdata/edit/master/libnetdata/arrayalloc/README.md
|
||||
-->
|
||||
|
||||
# Array Allocator
|
||||
|
|
@ -1,501 +0,0 @@
|
|||
#include "../libnetdata.h"
|
||||
#include "arrayalloc.h"
|
||||
|
||||
#ifdef NETDATA_TRACE_ALLOCATIONS
|
||||
#define TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAMS , const char *file, const char *function, size_t line
|
||||
#define TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS , file, function, line
|
||||
#else
|
||||
#define TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAMS
|
||||
#define TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS
|
||||
#endif
|
||||
|
||||
// max file size
|
||||
#define ARAL_MAX_PAGE_SIZE_MMAP (1*1024*1024*1024)
|
||||
|
||||
// max malloc size
|
||||
// optimal at current versions of libc is up to 256k
|
||||
// ideal to have the same overhead as libc is 4k
|
||||
#define ARAL_MAX_PAGE_SIZE_MALLOC (64*1024)
|
||||
|
||||
typedef struct arrayalloc_free {
|
||||
size_t size;
|
||||
struct arrayalloc_page *page;
|
||||
struct arrayalloc_free *next;
|
||||
} ARAL_FREE;
|
||||
|
||||
typedef struct arrayalloc_page {
|
||||
const char *filename;
|
||||
size_t size; // the total size of the page
|
||||
size_t used_elements; // the total number of used elements on this page
|
||||
uint8_t *data;
|
||||
ARAL_FREE *free_list;
|
||||
struct arrayalloc_page *prev; // the prev page on the list
|
||||
struct arrayalloc_page *next; // the next page on the list
|
||||
} ARAL_PAGE;
|
||||
|
||||
#define ARAL_NATURAL_ALIGNMENT (sizeof(uintptr_t) * 2)
|
||||
static inline size_t natural_alignment(size_t size, size_t alignment) {
|
||||
if(unlikely(size % alignment))
|
||||
size = size + alignment - (size % alignment);
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
static void arrayalloc_delete_leftover_files(const char *path, const char *required_prefix) {
|
||||
DIR *dir = opendir(path);
|
||||
if(!dir) return;
|
||||
|
||||
char full_path[FILENAME_MAX + 1];
|
||||
size_t len = strlen(required_prefix);
|
||||
|
||||
struct dirent *de = NULL;
|
||||
while((de = readdir(dir))) {
|
||||
if(de->d_type == DT_DIR)
|
||||
continue;
|
||||
|
||||
if(strncmp(de->d_name, required_prefix, len) != 0)
|
||||
continue;
|
||||
|
||||
snprintfz(full_path, FILENAME_MAX, "%s/%s", path, de->d_name);
|
||||
info("ARRAYALLOC: removing left-over file '%s'", full_path);
|
||||
if(unlikely(unlink(full_path) == -1))
|
||||
error("Cannot delete file '%s'", full_path);
|
||||
}
|
||||
|
||||
closedir(dir);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// arrayalloc_init()
|
||||
|
||||
static void arrayalloc_init(ARAL *ar) {
|
||||
static netdata_mutex_t mutex = NETDATA_MUTEX_INITIALIZER;
|
||||
netdata_mutex_lock(&mutex);
|
||||
|
||||
if(!ar->internal.initialized) {
|
||||
netdata_spinlock_init(&ar->internal.spinlock);
|
||||
|
||||
long int page_size = sysconf(_SC_PAGE_SIZE);
|
||||
if (unlikely(page_size == -1))
|
||||
ar->internal.natural_page_size = 4096;
|
||||
else
|
||||
ar->internal.natural_page_size = page_size;
|
||||
|
||||
// we need to add a page pointer after the element
|
||||
// so, first align the element size to the pointer size
|
||||
ar->internal.element_size = natural_alignment(ar->requested_element_size, sizeof(uintptr_t));
|
||||
|
||||
// then add the size of a pointer to it
|
||||
ar->internal.element_size += sizeof(uintptr_t);
|
||||
|
||||
// make sure it is at least what we need for an ARAL_FREE slot
|
||||
if (ar->internal.element_size < sizeof(ARAL_FREE))
|
||||
ar->internal.element_size = sizeof(ARAL_FREE);
|
||||
|
||||
// and finally align it to the natural alignment
|
||||
ar->internal.element_size = natural_alignment(ar->internal.element_size, ARAL_NATURAL_ALIGNMENT);
|
||||
|
||||
// we write the page pointer just after each element
|
||||
ar->internal.page_ptr_offset = ar->internal.element_size - sizeof(uintptr_t);
|
||||
|
||||
if(ar->requested_element_size + sizeof(uintptr_t) > ar->internal.element_size)
|
||||
fatal("ARRAYALLOC: failed to calculate properly page_ptr_offset: element size %zu, sizeof(uintptr_t) %zu, natural alignment %zu, final element size %zu, page_ptr_offset %zu",
|
||||
ar->requested_element_size, sizeof(uintptr_t), ARAL_NATURAL_ALIGNMENT, ar->internal.element_size, ar->internal.page_ptr_offset);
|
||||
|
||||
//info("ARRAYALLOC: element size %zu, sizeof(uintptr_t) %zu, natural alignment %zu, final element size %zu, page_ptr_offset %zu",
|
||||
// ar->element_size, sizeof(uintptr_t), ARAL_NATURAL_ALIGNMENT, ar->internal.element_size, ar->internal.page_ptr_offset);
|
||||
|
||||
if (ar->initial_elements < 10)
|
||||
ar->initial_elements = 10;
|
||||
|
||||
ar->internal.mmap = (ar->use_mmap && ar->cache_dir && *ar->cache_dir) ? true : false;
|
||||
ar->internal.max_alloc_size = ar->internal.mmap ? ARAL_MAX_PAGE_SIZE_MMAP : ARAL_MAX_PAGE_SIZE_MALLOC;
|
||||
|
||||
if(ar->internal.max_alloc_size % ar->internal.natural_page_size)
|
||||
ar->internal.max_alloc_size += ar->internal.natural_page_size - (ar->internal.max_alloc_size % ar->internal.natural_page_size) ;
|
||||
|
||||
if(ar->internal.max_alloc_size % ar->internal.element_size)
|
||||
ar->internal.max_alloc_size -= ar->internal.max_alloc_size % ar->internal.element_size;
|
||||
|
||||
ar->internal.pages = NULL;
|
||||
ar->internal.allocation_multiplier = 1;
|
||||
ar->internal.file_number = 0;
|
||||
|
||||
if(ar->internal.mmap) {
|
||||
char directory_name[FILENAME_MAX + 1];
|
||||
snprintfz(directory_name, FILENAME_MAX, "%s/array_alloc.mmap", *ar->cache_dir);
|
||||
int r = mkdir(directory_name, 0775);
|
||||
if (r != 0 && errno != EEXIST)
|
||||
fatal("Cannot create directory '%s'", directory_name);
|
||||
|
||||
char filename[FILENAME_MAX + 1];
|
||||
snprintfz(filename, FILENAME_MAX, "%s.", ar->filename);
|
||||
arrayalloc_delete_leftover_files(directory_name, filename);
|
||||
}
|
||||
|
||||
ar->internal.initialized = true;
|
||||
}
|
||||
|
||||
netdata_mutex_unlock(&mutex);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// check a free slot
|
||||
|
||||
#ifdef NETDATA_INTERNAL_CHECKS
|
||||
static inline void arrayalloc_free_validate_internal_check(ARAL *ar, ARAL_FREE *fr) {
|
||||
if(unlikely(fr->size < ar->internal.element_size))
|
||||
fatal("ARRAYALLOC: free item of size %zu, less than the expected element size %zu", fr->size, ar->internal.element_size);
|
||||
|
||||
if(unlikely(fr->size % ar->internal.element_size))
|
||||
fatal("ARRAYALLOC: free item of size %zu is not multiple to element size %zu", fr->size, ar->internal.element_size);
|
||||
}
|
||||
#else
|
||||
#define arrayalloc_free_validate_internal_check(ar, fr) debug_dummy()
|
||||
#endif
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// find the page a pointer belongs to
|
||||
|
||||
#ifdef NETDATA_INTERNAL_CHECKS
|
||||
static inline ARAL_PAGE *find_page_with_allocation_internal_check(ARAL *ar, void *ptr) {
|
||||
uintptr_t seeking = (uintptr_t)ptr;
|
||||
ARAL_PAGE *page;
|
||||
|
||||
for(page = ar->internal.pages; page ; page = page->next) {
|
||||
if(unlikely(seeking >= (uintptr_t)page->data && seeking < (uintptr_t)page->data + page->size))
|
||||
break;
|
||||
}
|
||||
|
||||
return page;
|
||||
}
|
||||
#endif
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// find a page with a free slot (there shouldn't be any)
|
||||
|
||||
#ifdef NETDATA_ARRAYALLOC_INTERNAL_CHECKS
|
||||
static inline ARAL_PAGE *find_page_with_free_slots_internal_check(ARAL *ar) {
|
||||
ARAL_PAGE *page;
|
||||
|
||||
for(page = ar->internal.pages; page ; page = page->next) {
|
||||
if(page->free_list)
|
||||
break;
|
||||
|
||||
internal_fatal(page->size - page->used_elements * ar->internal.element_size >= ar->internal.element_size,
|
||||
"ARRAYALLOC: a page is marked full, but it is not!");
|
||||
|
||||
internal_fatal(page->size < page->used_elements * ar->internal.element_size,
|
||||
"ARRAYALLOC: a page has been overflown!");
|
||||
}
|
||||
|
||||
return page;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void arrayalloc_add_page(ARAL *ar TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAMS) {
|
||||
if(unlikely(!ar->internal.initialized))
|
||||
arrayalloc_init(ar);
|
||||
|
||||
ARAL_PAGE *page = callocz(1, sizeof(ARAL_PAGE));
|
||||
page->size = ar->initial_elements * ar->internal.element_size * ar->internal.allocation_multiplier;
|
||||
if(page->size > ar->internal.max_alloc_size)
|
||||
page->size = ar->internal.max_alloc_size;
|
||||
else
|
||||
ar->internal.allocation_multiplier *= 2;
|
||||
|
||||
if(ar->internal.mmap) {
|
||||
ar->internal.file_number++;
|
||||
char filename[FILENAME_MAX + 1];
|
||||
snprintfz(filename, FILENAME_MAX, "%s/array_alloc.mmap/%s.%zu", *ar->cache_dir, ar->filename, ar->internal.file_number);
|
||||
page->filename = strdupz(filename);
|
||||
page->data = netdata_mmap(page->filename, page->size, MAP_SHARED, 0, false, NULL);
|
||||
if (unlikely(!page->data))
|
||||
fatal("Cannot allocate arrayalloc buffer of size %zu on filename '%s'", page->size, page->filename);
|
||||
}
|
||||
else {
|
||||
#ifdef NETDATA_TRACE_ALLOCATIONS
|
||||
page->data = mallocz_int(page->size TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS);
|
||||
#else
|
||||
page->data = mallocz(page->size);
|
||||
#endif
|
||||
}
|
||||
|
||||
// link the free space to its page
|
||||
ARAL_FREE *fr = (ARAL_FREE *)page->data;
|
||||
fr->size = page->size;
|
||||
fr->page = page;
|
||||
fr->next = NULL;
|
||||
page->free_list = fr;
|
||||
|
||||
// link the new page at the front of the list of pages
|
||||
DOUBLE_LINKED_LIST_PREPEND_UNSAFE(ar->internal.pages, page, prev, next);
|
||||
|
||||
arrayalloc_free_validate_internal_check(ar, fr);
|
||||
}
|
||||
|
||||
static inline void arrayalloc_lock(ARAL *ar) {
|
||||
if(likely(!ar->internal.lockless))
|
||||
netdata_spinlock_lock(&ar->internal.spinlock);
|
||||
}
|
||||
|
||||
static inline void arrayalloc_unlock(ARAL *ar) {
|
||||
if(likely(!ar->internal.lockless))
|
||||
netdata_spinlock_unlock(&ar->internal.spinlock);
|
||||
}
|
||||
|
||||
ARAL *arrayalloc_create(size_t element_size, size_t elements, const char *filename, char **cache_dir, bool mmap, bool lockless) {
|
||||
ARAL *ar = callocz(1, sizeof(ARAL));
|
||||
ar->requested_element_size = element_size;
|
||||
ar->initial_elements = elements;
|
||||
ar->filename = filename;
|
||||
ar->cache_dir = cache_dir;
|
||||
ar->use_mmap = mmap;
|
||||
ar->internal.lockless = lockless;
|
||||
return ar;
|
||||
}
|
||||
|
||||
void arrayalloc_del_page(ARAL *ar, ARAL_PAGE *page TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAMS) {
|
||||
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(ar->internal.pages, page, prev, next);
|
||||
|
||||
// free it
|
||||
if (ar->internal.mmap) {
|
||||
netdata_munmap(page->data, page->size);
|
||||
|
||||
if (unlikely(unlink(page->filename) == 1))
|
||||
error("Cannot delete file '%s'", page->filename);
|
||||
|
||||
freez((void *)page->filename);
|
||||
}
|
||||
else {
|
||||
#ifdef NETDATA_TRACE_ALLOCATIONS
|
||||
freez_int(page->data TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS);
|
||||
#else
|
||||
freez(page->data);
|
||||
#endif
|
||||
}
|
||||
|
||||
freez(page);
|
||||
}
|
||||
|
||||
void arrayalloc_destroy_internal(ARAL *ar TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAMS) {
|
||||
arrayalloc_lock(ar);
|
||||
|
||||
while(ar->internal.pages)
|
||||
arrayalloc_del_page(ar, ar->internal.pages TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS);
|
||||
|
||||
arrayalloc_unlock(ar);
|
||||
freez(ar);
|
||||
}
|
||||
|
||||
void *arrayalloc_mallocz_internal(ARAL *ar TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAMS) {
|
||||
|
||||
if(unlikely(!ar->internal.initialized))
|
||||
arrayalloc_init(ar);
|
||||
|
||||
arrayalloc_lock(ar);
|
||||
|
||||
if(unlikely(!ar->internal.pages || !ar->internal.pages->free_list)) {
|
||||
#ifdef NETDATA_ARRAYALLOC_INTERNAL_CHECKS
|
||||
internal_fatal(find_page_with_free_slots_internal_check(ar) != NULL,
|
||||
"ARRAYALLOC: first page does not have any free slots, but there is another that has!");
|
||||
#endif
|
||||
arrayalloc_add_page(ar TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS);
|
||||
}
|
||||
|
||||
ARAL_PAGE *page = ar->internal.pages;
|
||||
ARAL_FREE *found_fr = page->free_list;
|
||||
|
||||
internal_fatal(!found_fr,
|
||||
"ARRAYALLOC: free item to use, cannot be NULL.");
|
||||
|
||||
internal_fatal(found_fr->size < ar->internal.element_size,
|
||||
"ARRAYALLOC: free item size %zu, cannot be smaller than %zu",
|
||||
found_fr->size, ar->internal.element_size);
|
||||
|
||||
if(unlikely(found_fr->size - ar->internal.element_size < ar->internal.element_size)) {
|
||||
// we can use the entire free space entry
|
||||
|
||||
page->free_list = found_fr->next;
|
||||
|
||||
if(unlikely(!page->free_list)) {
|
||||
// we are done with this page
|
||||
// move the full page last
|
||||
// so that pages with free items remain first in the list
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(ar->internal.pages, page, prev, next);
|
||||
DOUBLE_LINKED_LIST_APPEND_UNSAFE(ar->internal.pages, page, prev, next);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// we can split the free space entry
|
||||
|
||||
uint8_t *data = (uint8_t *)found_fr;
|
||||
ARAL_FREE *fr = (ARAL_FREE *)&data[ar->internal.element_size];
|
||||
fr->page = page;
|
||||
fr->size = found_fr->size - ar->internal.element_size;
|
||||
|
||||
// link the free slot first in the page
|
||||
fr->next = found_fr->next;
|
||||
page->free_list = fr;
|
||||
|
||||
arrayalloc_free_validate_internal_check(ar, fr);
|
||||
}
|
||||
|
||||
page->used_elements++;
|
||||
|
||||
// put the page pointer after the element
|
||||
uint8_t *data = (uint8_t *)found_fr;
|
||||
ARAL_PAGE **page_ptr = (ARAL_PAGE **)&data[ar->internal.page_ptr_offset];
|
||||
*page_ptr = page;
|
||||
|
||||
arrayalloc_unlock(ar);
|
||||
return (void *)found_fr;
|
||||
}
|
||||
|
||||
void arrayalloc_freez_internal(ARAL *ar, void *ptr TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAMS) {
|
||||
|
||||
if(unlikely(!ptr)) return;
|
||||
arrayalloc_lock(ar);
|
||||
|
||||
// get the page pointer
|
||||
ARAL_PAGE *page;
|
||||
{
|
||||
uint8_t *data = (uint8_t *)ptr;
|
||||
ARAL_PAGE **page_ptr = (ARAL_PAGE **)&data[ar->internal.page_ptr_offset];
|
||||
page = *page_ptr;
|
||||
|
||||
#ifdef NETDATA_INTERNAL_CHECKS
|
||||
// make it NULL so that we will fail on double free
|
||||
// do not enable this on production, because the MMAP file
|
||||
// will need to be saved again!
|
||||
*page_ptr = NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef NETDATA_ARRAYALLOC_INTERNAL_CHECKS
|
||||
{
|
||||
// find the page ptr belongs
|
||||
ARAL_PAGE *page2 = find_page_with_allocation_internal_check(ar, ptr);
|
||||
|
||||
if(unlikely(page != page2))
|
||||
fatal("ARRAYALLOC: page pointers do not match!");
|
||||
|
||||
if (unlikely(!page2))
|
||||
fatal("ARRAYALLOC: free of pointer %p is not in arrayalloc address space.", ptr);
|
||||
}
|
||||
#endif
|
||||
|
||||
if(unlikely(!page))
|
||||
fatal("ARRAYALLOC: possible corruption or double free of pointer %p", ptr);
|
||||
|
||||
if (unlikely(!page->used_elements))
|
||||
fatal("ARRAYALLOC: free of pointer %p is inside a page without any active allocations.", ptr);
|
||||
|
||||
page->used_elements--;
|
||||
|
||||
// make this element available
|
||||
ARAL_FREE *fr = (ARAL_FREE *)ptr;
|
||||
fr->page = page;
|
||||
fr->size = ar->internal.element_size;
|
||||
fr->next = page->free_list;
|
||||
page->free_list = fr;
|
||||
|
||||
// if the page is empty, release it
|
||||
if(!page->used_elements)
|
||||
arrayalloc_del_page(ar, page TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS);
|
||||
|
||||
else if(page != ar->internal.pages) {
|
||||
// move the page with free item first
|
||||
// so that the next allocation will use this page
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(ar->internal.pages, page, prev, next);
|
||||
DOUBLE_LINKED_LIST_PREPEND_UNSAFE(ar->internal.pages, page, prev, next);
|
||||
}
|
||||
|
||||
arrayalloc_unlock(ar);
|
||||
}
|
||||
|
||||
int aral_unittest(size_t elements) {
|
||||
char *cache_dir = "/tmp/";
|
||||
ARAL *ar = arrayalloc_create(20, 10, "test-aral", &cache_dir, false, false);
|
||||
|
||||
void *pointers[elements];
|
||||
|
||||
for(size_t i = 0; i < elements ;i++) {
|
||||
pointers[i] = arrayalloc_mallocz(ar);
|
||||
}
|
||||
|
||||
for(size_t div = 5; div >= 2 ;div--) {
|
||||
for (size_t i = 0; i < elements / div; i++) {
|
||||
arrayalloc_freez(ar, pointers[i]);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < elements / div; i++) {
|
||||
pointers[i] = arrayalloc_mallocz(ar);
|
||||
}
|
||||
}
|
||||
|
||||
for(size_t step = 50; step >= 10 ;step -= 10) {
|
||||
for (size_t i = 0; i < elements; i += step) {
|
||||
arrayalloc_freez(ar, pointers[i]);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < elements; i += step) {
|
||||
pointers[i] = arrayalloc_mallocz(ar);
|
||||
}
|
||||
}
|
||||
|
||||
for(size_t i = 0; i < elements ;i++) {
|
||||
arrayalloc_freez(ar, pointers[i]);
|
||||
}
|
||||
|
||||
if(ar->internal.pages) {
|
||||
fprintf(stderr, "ARAL leftovers detected (1)");
|
||||
return 1;
|
||||
}
|
||||
|
||||
size_t ops = 0; (void)ops;
|
||||
size_t increment = elements / 10;
|
||||
size_t allocated = 0;
|
||||
for(size_t all = increment; all <= elements ; all += increment) {
|
||||
|
||||
for(; allocated < all ; allocated++) {
|
||||
pointers[allocated] = arrayalloc_mallocz(ar);
|
||||
ops++;
|
||||
}
|
||||
|
||||
size_t to_free = now_realtime_usec() % all;
|
||||
size_t free_list[to_free];
|
||||
for(size_t i = 0; i < to_free ;i++) {
|
||||
size_t pos;
|
||||
do {
|
||||
pos = now_realtime_usec() % all;
|
||||
} while(!pointers[pos]);
|
||||
|
||||
arrayalloc_freez(ar, pointers[pos]);
|
||||
pointers[pos] = NULL;
|
||||
free_list[i] = pos;
|
||||
ops++;
|
||||
}
|
||||
|
||||
for(size_t i = 0; i < to_free ;i++) {
|
||||
size_t pos = free_list[i];
|
||||
pointers[pos] = arrayalloc_mallocz(ar);
|
||||
ops++;
|
||||
}
|
||||
}
|
||||
|
||||
for(size_t i = 0; i < allocated - 1 ;i++) {
|
||||
arrayalloc_freez(ar, pointers[i]);
|
||||
ops++;
|
||||
}
|
||||
|
||||
arrayalloc_freez(ar, pointers[allocated - 1]);
|
||||
|
||||
if(ar->internal.pages) {
|
||||
fprintf(stderr, "ARAL leftovers detected (2)");
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -1,55 +0,0 @@
|
|||
|
||||
#ifndef ARRAYALLOC_H
|
||||
#define ARRAYALLOC_H 1
|
||||
|
||||
#include "../libnetdata.h"
|
||||
|
||||
typedef struct arrayalloc {
|
||||
size_t requested_element_size;
|
||||
size_t initial_elements;
|
||||
const char *filename;
|
||||
char **cache_dir;
|
||||
bool use_mmap;
|
||||
|
||||
// private members - do not touch
|
||||
struct {
|
||||
bool mmap;
|
||||
bool lockless;
|
||||
bool initialized;
|
||||
size_t element_size;
|
||||
size_t page_ptr_offset;
|
||||
size_t file_number;
|
||||
size_t natural_page_size;
|
||||
size_t allocation_multiplier;
|
||||
size_t max_alloc_size;
|
||||
SPINLOCK spinlock;
|
||||
struct arrayalloc_page *pages;
|
||||
} internal;
|
||||
} ARAL;
|
||||
|
||||
ARAL *arrayalloc_create(size_t element_size, size_t elements, const char *filename, char **cache_dir, bool mmap, bool lockless);
|
||||
int aral_unittest(size_t elements);
|
||||
|
||||
#ifdef NETDATA_TRACE_ALLOCATIONS
|
||||
|
||||
#define arrayalloc_mallocz(ar) arrayalloc_mallocz_internal(ar, __FILE__, __FUNCTION__, __LINE__)
|
||||
#define arrayalloc_freez(ar, ptr) arrayalloc_freez_internal(ar, ptr, __FILE__, __FUNCTION__, __LINE__)
|
||||
#define arrayalloc_destroy(ar) arrayalloc_destroy_internal(ar, __FILE__, __FUNCTION__, __LINE__)
|
||||
|
||||
void *arrayalloc_mallocz_internal(ARAL *ar, const char *file, const char *function, size_t line);
|
||||
void arrayalloc_freez_internal(ARAL *ar, void *ptr, const char *file, const char *function, size_t line);
|
||||
void arrayalloc_destroy_internal(ARAL *ar, const char *file, const char *function, size_t line);
|
||||
|
||||
#else // NETDATA_TRACE_ALLOCATIONS
|
||||
|
||||
#define arrayalloc_mallocz(ar) arrayalloc_mallocz_internal(ar)
|
||||
#define arrayalloc_freez(ar, ptr) arrayalloc_freez_internal(ar, ptr)
|
||||
#define arrayalloc_destroy(ar) arrayalloc_destroy_internal(ar)
|
||||
|
||||
void *arrayalloc_mallocz_internal(ARAL *ar);
|
||||
void arrayalloc_freez_internal(ARAL *ar, void *ptr);
|
||||
void arrayalloc_destroy_internal(ARAL *ar);
|
||||
|
||||
#endif // NETDATA_TRACE_ALLOCATIONS
|
||||
|
||||
#endif // ARRAYALLOC_H
|
|
@ -789,7 +789,7 @@ static void garbage_collect_pending_deletes(DICTIONARY *dict) {
|
|||
// we didn't get a reference
|
||||
|
||||
if(item_is_not_referenced_and_can_be_removed(dict, item)) {
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(dict->items.list, item, prev, next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(dict->items.list, item, prev, next);
|
||||
dict_item_free_with_hooks(dict, item);
|
||||
deleted++;
|
||||
|
||||
|
@ -1167,9 +1167,9 @@ static inline void item_linked_list_add(DICTIONARY *dict, DICTIONARY_ITEM *item)
|
|||
ll_recursive_lock(dict, DICTIONARY_LOCK_WRITE);
|
||||
|
||||
if(dict->options & DICT_OPTION_ADD_IN_FRONT)
|
||||
DOUBLE_LINKED_LIST_PREPEND_UNSAFE(dict->items.list, item, prev, next);
|
||||
DOUBLE_LINKED_LIST_PREPEND_ITEM_UNSAFE(dict->items.list, item, prev, next);
|
||||
else
|
||||
DOUBLE_LINKED_LIST_APPEND_UNSAFE(dict->items.list, item, prev, next);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(dict->items.list, item, prev, next);
|
||||
|
||||
#ifdef NETDATA_INTERNAL_CHECKS
|
||||
item->ll_adder_pid = gettid();
|
||||
|
@ -1186,7 +1186,7 @@ static inline void item_linked_list_add(DICTIONARY *dict, DICTIONARY_ITEM *item)
|
|||
static inline void item_linked_list_remove(DICTIONARY *dict, DICTIONARY_ITEM *item) {
|
||||
ll_recursive_lock(dict, DICTIONARY_LOCK_WRITE);
|
||||
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(dict->items.list, item, prev, next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(dict->items.list, item, prev, next);
|
||||
|
||||
#ifdef NETDATA_INTERNAL_CHECKS
|
||||
item->ll_remover_pid = gettid();
|
||||
|
@ -1234,28 +1234,43 @@ static inline size_t item_get_name_len(const DICTIONARY_ITEM *item) {
|
|||
return strlen(item->caller_name);
|
||||
}
|
||||
|
||||
static ARAL dict_items_aral = {
|
||||
.filename = NULL,
|
||||
.cache_dir = NULL,
|
||||
.use_mmap = false,
|
||||
.initial_elements = 65536 / sizeof(DICTIONARY_ITEM),
|
||||
.requested_element_size = sizeof(DICTIONARY_ITEM),
|
||||
};
|
||||
static ARAL *dict_items_aral = NULL;
|
||||
static ARAL *dict_shared_items_aral = NULL;
|
||||
|
||||
static ARAL dict_shared_items_aral = {
|
||||
.filename = NULL,
|
||||
.cache_dir = NULL,
|
||||
.use_mmap = false,
|
||||
.initial_elements = 65536 / sizeof(DICTIONARY_ITEM_SHARED),
|
||||
.requested_element_size = sizeof(DICTIONARY_ITEM_SHARED),
|
||||
};
|
||||
void dictionary_static_items_aral_init(void) {
|
||||
static SPINLOCK spinlock;
|
||||
|
||||
if(unlikely(!dict_items_aral || !dict_shared_items_aral)) {
|
||||
netdata_spinlock_lock(&spinlock);
|
||||
|
||||
// we have to check again
|
||||
if(!dict_items_aral)
|
||||
dict_items_aral = aral_create(
|
||||
"dict-items",
|
||||
sizeof(DICTIONARY_ITEM),
|
||||
0,
|
||||
4096,
|
||||
NULL, NULL, false, false);
|
||||
|
||||
// we have to check again
|
||||
if(!dict_shared_items_aral)
|
||||
dict_shared_items_aral = aral_create(
|
||||
"dict-shared-items",
|
||||
sizeof(DICTIONARY_ITEM_SHARED),
|
||||
0,
|
||||
4096,
|
||||
NULL, NULL, false, false);
|
||||
|
||||
netdata_spinlock_unlock(&spinlock);
|
||||
}
|
||||
}
|
||||
|
||||
static DICTIONARY_ITEM *dict_item_create(DICTIONARY *dict __maybe_unused, size_t *allocated_bytes, DICTIONARY_ITEM *master_item) {
|
||||
DICTIONARY_ITEM *item;
|
||||
|
||||
size_t size = sizeof(DICTIONARY_ITEM);
|
||||
// item = callocz(1, size);
|
||||
item = arrayalloc_mallocz(&dict_items_aral);
|
||||
item = aral_mallocz(dict_items_aral);
|
||||
memset(item, 0, sizeof(DICTIONARY_ITEM));
|
||||
|
||||
#ifdef NETDATA_INTERNAL_CHECKS
|
||||
|
@ -1276,7 +1291,7 @@ static DICTIONARY_ITEM *dict_item_create(DICTIONARY *dict __maybe_unused, size_t
|
|||
else {
|
||||
size = sizeof(DICTIONARY_ITEM_SHARED);
|
||||
// item->shared = callocz(1, size);
|
||||
item->shared = arrayalloc_mallocz(&dict_shared_items_aral);
|
||||
item->shared = aral_mallocz(dict_shared_items_aral);
|
||||
memset(item->shared, 0, sizeof(DICTIONARY_ITEM_SHARED));
|
||||
|
||||
item->shared->links = 1;
|
||||
|
@ -1418,13 +1433,13 @@ static size_t dict_item_free_with_hooks(DICTIONARY *dict, DICTIONARY_ITEM *item)
|
|||
value_size += item->shared->value_len;
|
||||
|
||||
// freez(item->shared);
|
||||
arrayalloc_freez(&dict_shared_items_aral, item->shared);
|
||||
aral_freez(dict_shared_items_aral, item->shared);
|
||||
item->shared = NULL;
|
||||
item_size += sizeof(DICTIONARY_ITEM_SHARED);
|
||||
}
|
||||
|
||||
// freez(item);
|
||||
arrayalloc_freez(&dict_items_aral, item);
|
||||
aral_freez(dict_items_aral, item);
|
||||
|
||||
item_size += sizeof(DICTIONARY_ITEM);
|
||||
|
||||
|
@ -1971,6 +1986,7 @@ static DICTIONARY *dictionary_create_internal(DICT_OPTIONS options, struct dicti
|
|||
dict_size += reference_counter_init(dict);
|
||||
dict_size += hashtable_init_unsafe(dict);
|
||||
|
||||
dictionary_static_items_aral_init();
|
||||
pointer_index_init(dict);
|
||||
|
||||
DICTIONARY_STATS_PLUS_MEMORY(dict, 0, dict_size, 0);
|
||||
|
|
|
@ -64,7 +64,7 @@ void julyl_cleanup1(void) {
|
|||
|
||||
if(julyl_globals.protected.available_items && julyl_globals.protected.available > 10) {
|
||||
item = julyl_globals.protected.available_items;
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(julyl_globals.protected.available_items, item, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(julyl_globals.protected.available_items, item, cache.prev, cache.next);
|
||||
julyl_globals.protected.available--;
|
||||
}
|
||||
|
||||
|
@ -85,7 +85,7 @@ struct JulyL *julyl_get(void) {
|
|||
|
||||
j = julyl_globals.protected.available_items;
|
||||
if(likely(j)) {
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(julyl_globals.protected.available_items, j, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(julyl_globals.protected.available_items, j, cache.prev, cache.next);
|
||||
julyl_globals.protected.available--;
|
||||
}
|
||||
|
||||
|
@ -114,7 +114,7 @@ static void julyl_release(struct JulyL *j) {
|
|||
__atomic_add_fetch(&julyl_globals.atomics.reallocs, j->reallocs, __ATOMIC_RELAXED);
|
||||
|
||||
netdata_spinlock_lock(&julyl_globals.protected.spinlock);
|
||||
DOUBLE_LINKED_LIST_APPEND_UNSAFE(julyl_globals.protected.available_items, j, cache.prev, cache.next);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(julyl_globals.protected.available_items, j, cache.prev, cache.next);
|
||||
julyl_globals.protected.available++;
|
||||
netdata_spinlock_unlock(&julyl_globals.protected.spinlock);
|
||||
}
|
||||
|
|
|
@ -233,8 +233,9 @@ extern "C" {
|
|||
|
||||
// ---------------------------------------------------------------------------------------------
|
||||
// double linked list management
|
||||
// inspired by https://github.com/troydhanson/uthash/blob/master/src/utlist.h
|
||||
|
||||
#define DOUBLE_LINKED_LIST_PREPEND_UNSAFE(head, item, prev, next) \
|
||||
#define DOUBLE_LINKED_LIST_PREPEND_ITEM_UNSAFE(head, item, prev, next) \
|
||||
do { \
|
||||
(item)->next = (head); \
|
||||
\
|
||||
|
@ -248,7 +249,7 @@ extern "C" {
|
|||
(head) = (item); \
|
||||
} while (0)
|
||||
|
||||
#define DOUBLE_LINKED_LIST_APPEND_UNSAFE(head, item, prev, next) \
|
||||
#define DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(head, item, prev, next) \
|
||||
do { \
|
||||
if(likely(head)) { \
|
||||
(item)->prev = (head)->prev; \
|
||||
|
@ -264,39 +265,97 @@ extern "C" {
|
|||
\
|
||||
} while (0)
|
||||
|
||||
#define DOUBLE_LINKED_LIST_REMOVE_UNSAFE(head, item, prev, next) \
|
||||
#define DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(head, item, prev, next) \
|
||||
do { \
|
||||
fatal_assert((head) != NULL); \
|
||||
fatal_assert((item)->prev != NULL); \
|
||||
\
|
||||
if((item)->prev == (item)) { \
|
||||
if((item)->prev == (item)) \
|
||||
/* it is the only item in the list */ \
|
||||
(head) = NULL; \
|
||||
} \
|
||||
\
|
||||
else if((item) == (head)) { \
|
||||
/* it is the first item */ \
|
||||
fatal_assert((item)->next != NULL); \
|
||||
(item)->next->prev = (item)->prev; \
|
||||
(head) = (item)->next; \
|
||||
} \
|
||||
else { \
|
||||
/* it is any other item */ \
|
||||
(item)->prev->next = (item)->next; \
|
||||
if ((item)->next) { \
|
||||
\
|
||||
if ((item)->next) \
|
||||
(item)->next->prev = (item)->prev; \
|
||||
} \
|
||||
else { \
|
||||
else \
|
||||
(head)->prev = (item)->prev; \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
(item)->next = NULL; \
|
||||
(item)->prev = NULL; \
|
||||
} while (0)
|
||||
|
||||
#define DOUBLE_LINKED_LIST_INSERT_ITEM_BEFORE_UNSAFE(head, existing, item, prev, next) \
|
||||
do { \
|
||||
if (existing) { \
|
||||
fatal_assert((head) != NULL); \
|
||||
fatal_assert((item) != NULL); \
|
||||
\
|
||||
(item)->next = (existing); \
|
||||
(item)->prev = (existing)->prev; \
|
||||
(existing)->prev = (item); \
|
||||
\
|
||||
if ((head) == (existing)) \
|
||||
(head) = (item); \
|
||||
else \
|
||||
(item)->prev->next = (item); \
|
||||
\
|
||||
} \
|
||||
else \
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(head, item, prev, next); \
|
||||
\
|
||||
} while (0)
|
||||
|
||||
#define DOUBLE_LINKED_LIST_INSERT_ITEM_AFTER_UNSAFE(head, existing, item, prev, next) \
|
||||
do { \
|
||||
if (existing) { \
|
||||
fatal_assert((head) != NULL); \
|
||||
fatal_assert((item) != NULL); \
|
||||
\
|
||||
(item)->next = (existing)->next; \
|
||||
(item)->prev = (existing); \
|
||||
(existing)->next = (item); \
|
||||
\
|
||||
if ((item)->next) \
|
||||
(item)->next->prev = (item); \
|
||||
else \
|
||||
(head)->prev = (item); \
|
||||
} \
|
||||
else \
|
||||
DOUBLE_LINKED_LIST_PREPEND_ITEM_UNSAFE(head, item, prev, next); \
|
||||
\
|
||||
} while (0)
|
||||
|
||||
#define DOUBLE_LINKED_LIST_APPEND_LIST_UNSAFE(head, head2, prev, next) \
|
||||
do { \
|
||||
if (head2) { \
|
||||
if (head) { \
|
||||
__typeof(head2) _head2_last_item = (head2)->prev; \
|
||||
\
|
||||
(head2)->prev = (head)->prev; \
|
||||
(head)->prev->next = (head2); \
|
||||
\
|
||||
(head)->prev = _head2_last_item; \
|
||||
} \
|
||||
else \
|
||||
(head) = (head2); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define DOUBLE_LINKED_LIST_FOREACH_FORWARD(head, var, prev, next) \
|
||||
for ((var) = (head); (var) ; (var) = (var)->next)
|
||||
|
||||
#define DOUBLE_LINKED_LIST_FOREACH_BACKWARD(head, var, prev, next) \
|
||||
for ((var) = (head)?(head)->prev:NULL; (var) && (var) != (head)->prev ; (var) = (var)->prev)
|
||||
for ((var) = (head) ? (head)->prev : NULL ; (var) ; (var) = ((var) == (head)) ? NULL : (var)->prev)
|
||||
|
||||
// ---------------------------------------------------------------------------------------------
|
||||
|
||||
|
@ -481,7 +540,7 @@ extern char *netdata_configured_host_prefix;
|
|||
#include "json/json.h"
|
||||
#include "health/health.h"
|
||||
#include "string/utf8.h"
|
||||
#include "arrayalloc/arrayalloc.h"
|
||||
#include "libnetdata/aral/aral.h"
|
||||
#include "onewayalloc/onewayalloc.h"
|
||||
#include "worker_utilization/worker_utilization.h"
|
||||
|
||||
|
|
|
@ -12,8 +12,8 @@ typedef pthread_mutex_t netdata_mutex_t;
|
|||
typedef struct netdata_spinlock {
|
||||
bool locked;
|
||||
#ifdef NETDATA_INTERNAL_CHECKS
|
||||
size_t spins;
|
||||
pid_t locker_pid;
|
||||
size_t spins;
|
||||
#endif
|
||||
} SPINLOCK;
|
||||
|
||||
|
|
|
@ -43,7 +43,7 @@ static void netdata_popen_tracking_add_pid_unsafe(pid_t pid) {
|
|||
mp = mallocz(sizeof(struct netdata_popen));
|
||||
mp->pid = pid;
|
||||
|
||||
DOUBLE_LINKED_LIST_PREPEND_UNSAFE(netdata_popen_root, mp, prev, next);
|
||||
DOUBLE_LINKED_LIST_PREPEND_ITEM_UNSAFE(netdata_popen_root, mp, prev, next);
|
||||
}
|
||||
|
||||
// myp_del deletes pid if we're tracking.
|
||||
|
@ -61,7 +61,7 @@ static void netdata_popen_tracking_del_pid(pid_t pid) {
|
|||
}
|
||||
|
||||
if(mp) {
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(netdata_popen_root, mp, prev, next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(netdata_popen_root, mp, prev, next);
|
||||
freez(mp);
|
||||
}
|
||||
else
|
||||
|
@ -96,7 +96,7 @@ void netdata_popen_tracking_cleanup(void) {
|
|||
|
||||
while(netdata_popen_root) {
|
||||
struct netdata_popen *mp = netdata_popen_root;
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(netdata_popen_root, mp, prev, next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(netdata_popen_root, mp, prev, next);
|
||||
freez(mp);
|
||||
}
|
||||
|
||||
|
|
|
@ -100,7 +100,7 @@ void worker_register(const char *name) {
|
|||
}
|
||||
|
||||
netdata_spinlock_lock(&workname->spinlock);
|
||||
DOUBLE_LINKED_LIST_APPEND_UNSAFE(workname->base, worker, prev, next);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(workname->base, worker, prev, next);
|
||||
netdata_spinlock_unlock(&workname->spinlock);
|
||||
|
||||
netdata_spinlock_unlock(&workers_globals.spinlock);
|
||||
|
@ -141,7 +141,7 @@ void worker_unregister(void) {
|
|||
if(PValue) {
|
||||
struct workers_workname *workname = *PValue;
|
||||
netdata_spinlock_lock(&workname->spinlock);
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(workname->base, worker, prev, next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(workname->base, worker, prev, next);
|
||||
netdata_spinlock_unlock(&workname->spinlock);
|
||||
|
||||
if(!workname->base) {
|
||||
|
|
|
@ -392,7 +392,7 @@ static void replication_query_execute(BUFFER *wb, struct replication_query *q, s
|
|||
if (likely( d->sp.start_time_s <= min_end_time &&
|
||||
d->sp.end_time_s >= min_end_time &&
|
||||
!storage_point_is_unset(d->sp) &&
|
||||
!storage_point_is_empty(d->sp))) {
|
||||
!storage_point_is_gap(d->sp))) {
|
||||
|
||||
buffer_sprintf(wb, PLUGINSD_KEYWORD_REPLAY_SET " \"%s\" " NETDATA_DOUBLE_FORMAT " \"%s\"\n",
|
||||
rrddim_id(d->rd), d->sp.sum, d->sp.flags & SN_FLAG_RESET ? "R" : "");
|
||||
|
|
|
@ -498,8 +498,8 @@ int connect_to_one_of_destinations(
|
|||
// move the current item to the end of the list
|
||||
// without this, this destination will break the loop again and again
|
||||
// not advancing the destinations to find one that may work
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(host->destinations, d, prev, next);
|
||||
DOUBLE_LINKED_LIST_APPEND_UNSAFE(host->destinations, d, prev, next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(host->destinations, d, prev, next);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(host->destinations, d, prev, next);
|
||||
|
||||
break;
|
||||
}
|
||||
|
@ -522,7 +522,7 @@ bool destinations_init_add_one(char *entry, void *data) {
|
|||
|
||||
__atomic_add_fetch(&netdata_buffers_statistics.rrdhost_senders, sizeof(struct rrdpush_destinations), __ATOMIC_RELAXED);
|
||||
|
||||
DOUBLE_LINKED_LIST_APPEND_UNSAFE(t->list, d, prev, next);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(t->list, d, prev, next);
|
||||
|
||||
t->count++;
|
||||
info("STREAM: added streaming destination No %d: '%s' to host '%s'", t->count, string2str(d->destination), rrdhost_hostname(t->host));
|
||||
|
@ -549,7 +549,7 @@ void rrdpush_destinations_init(RRDHOST *host) {
|
|||
void rrdpush_destinations_free(RRDHOST *host) {
|
||||
while (host->destinations) {
|
||||
struct rrdpush_destinations *tmp = host->destinations;
|
||||
DOUBLE_LINKED_LIST_REMOVE_UNSAFE(host->destinations, tmp, prev, next);
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(host->destinations, tmp, prev, next);
|
||||
string_freez(tmp->destination);
|
||||
freez(tmp);
|
||||
__atomic_sub_fetch(&netdata_buffers_statistics.rrdhost_senders, sizeof(struct rrdpush_destinations), __ATOMIC_RELAXED);
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
#include "percentile/percentile.h"
|
||||
#include "trimmed_mean/trimmed_mean.h"
|
||||
|
||||
#define POINTS_TO_EXPAND_QUERY 0
|
||||
#define POINTS_TO_EXPAND_QUERY 5
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
|
@ -927,6 +927,8 @@ typedef struct query_engine_ops {
|
|||
// query planer
|
||||
size_t current_plan;
|
||||
time_t current_plan_expire_time;
|
||||
time_t plan_expanded_after;
|
||||
time_t plan_expanded_before;
|
||||
|
||||
// storage queries
|
||||
size_t tier;
|
||||
|
@ -955,6 +957,20 @@ typedef struct query_engine_ops {
|
|||
|
||||
#define query_plan_should_switch_plan(ops, now) ((now) >= (ops)->current_plan_expire_time)
|
||||
|
||||
static size_t query_planer_expand_duration_in_points(time_t this_update_every, time_t next_update_every) {
|
||||
|
||||
time_t delta = this_update_every - next_update_every;
|
||||
if(delta < 0) delta = -delta;
|
||||
|
||||
size_t points;
|
||||
if(delta < this_update_every * POINTS_TO_EXPAND_QUERY)
|
||||
points = POINTS_TO_EXPAND_QUERY;
|
||||
else
|
||||
points = (delta + this_update_every - 1) / this_update_every;
|
||||
|
||||
return points;
|
||||
}
|
||||
|
||||
static void query_planer_initialize_plans(QUERY_ENGINE_OPS *ops) {
|
||||
QUERY_METRIC *qm = ops->qm;
|
||||
|
||||
|
@ -962,8 +978,35 @@ static void query_planer_initialize_plans(QUERY_ENGINE_OPS *ops) {
|
|||
size_t tier = qm->plan.array[p].tier;
|
||||
time_t update_every = qm->tiers[tier].db_update_every_s;
|
||||
|
||||
time_t after = qm->plan.array[p].after - (update_every * POINTS_TO_EXPAND_QUERY);
|
||||
time_t before = qm->plan.array[p].before + (update_every * POINTS_TO_EXPAND_QUERY);
|
||||
size_t points_to_add_to_after;
|
||||
if(p > 0) {
|
||||
// there is another plan before to this
|
||||
|
||||
size_t tier0 = qm->plan.array[p - 1].tier;
|
||||
time_t update_every0 = qm->tiers[tier0].db_update_every_s;
|
||||
|
||||
points_to_add_to_after = query_planer_expand_duration_in_points(update_every, update_every0);
|
||||
}
|
||||
else
|
||||
points_to_add_to_after = (tier == 0) ? 0 : POINTS_TO_EXPAND_QUERY;
|
||||
|
||||
size_t points_to_add_to_before;
|
||||
if(p + 1 < qm->plan.used) {
|
||||
// there is another plan after to this
|
||||
|
||||
size_t tier1 = qm->plan.array[p+1].tier;
|
||||
time_t update_every1 = qm->tiers[tier1].db_update_every_s;
|
||||
|
||||
points_to_add_to_before = query_planer_expand_duration_in_points(update_every, update_every1);
|
||||
}
|
||||
else
|
||||
points_to_add_to_before = POINTS_TO_EXPAND_QUERY;
|
||||
|
||||
time_t after = qm->plan.array[p].after - (time_t)(update_every * points_to_add_to_after);
|
||||
time_t before = qm->plan.array[p].before + (time_t)(update_every * points_to_add_to_before);
|
||||
|
||||
qm->plan.array[p].expanded_after = after;
|
||||
qm->plan.array[p].expanded_before = before;
|
||||
|
||||
struct query_metric_tier *tier_ptr = &qm->tiers[tier];
|
||||
tier_ptr->eng->api.query_ops.init(
|
||||
|
@ -1027,9 +1070,12 @@ static void query_planer_activate_plan(QUERY_ENGINE_OPS *ops, size_t plan_id, ti
|
|||
ops->current_plan_expire_time = qm->plan.array[plan_id + 1].after;
|
||||
else
|
||||
ops->current_plan_expire_time = qm->plan.array[plan_id].before;
|
||||
|
||||
ops->plan_expanded_after = qm->plan.array[plan_id].expanded_after;
|
||||
ops->plan_expanded_before = qm->plan.array[plan_id].expanded_before;
|
||||
}
|
||||
|
||||
static void query_planer_next_plan(QUERY_ENGINE_OPS *ops, time_t now, time_t last_point_end_time) {
|
||||
static bool query_planer_next_plan(QUERY_ENGINE_OPS *ops, time_t now, time_t last_point_end_time) {
|
||||
QUERY_METRIC *qm = ops->qm;
|
||||
|
||||
size_t old_plan = ops->current_plan;
|
||||
|
@ -1043,7 +1089,7 @@ static void query_planer_next_plan(QUERY_ENGINE_OPS *ops, time_t now, time_t las
|
|||
ops->current_plan_expire_time = ops->r->internal.qt->window.before;
|
||||
// let the query run with current plan
|
||||
// we will not switch it
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
next_plan_before_time = qm->plan.array[ops->current_plan].before;
|
||||
|
@ -1052,11 +1098,12 @@ static void query_planer_next_plan(QUERY_ENGINE_OPS *ops, time_t now, time_t las
|
|||
if(!query_metric_is_valid_tier(qm, qm->plan.array[ops->current_plan].tier)) {
|
||||
ops->current_plan = old_plan;
|
||||
ops->current_plan_expire_time = ops->r->internal.qt->window.before;
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
query_planer_finalize_plan(ops, old_plan);
|
||||
query_planer_activate_plan(ops, ops->current_plan, MIN(now, last_point_end_time));
|
||||
return true;
|
||||
}
|
||||
|
||||
static int compare_query_plan_entries_on_start_time(const void *a, const void *b) {
|
||||
|
@ -1273,6 +1320,11 @@ static void rrd2rrdr_query_execute(RRDR *r, size_t dim_id_in_rrdr, QUERY_ENGINE_
|
|||
QUERY_POINT last1_point = QUERY_POINT_EMPTY;
|
||||
QUERY_POINT new_point = QUERY_POINT_EMPTY;
|
||||
|
||||
// ONE POINT READ-AHEAD
|
||||
// when we switch plans, we read-ahead a point from the next plan
|
||||
// to join them smoothly at the exact time the next plan begins
|
||||
STORAGE_POINT next1_point = STORAGE_POINT_UNSET;
|
||||
|
||||
time_t now_start_time = after_wanted - ops->query_granularity;
|
||||
time_t now_end_time = after_wanted + ops->view_update_every - ops->query_granularity;
|
||||
|
||||
|
@ -1311,8 +1363,41 @@ static void rrd2rrdr_query_execute(RRDR *r, size_t dim_id_in_rrdr, QUERY_ENGINE_
|
|||
|
||||
// fetch the new point
|
||||
{
|
||||
db_points_read_since_plan_switch++;
|
||||
STORAGE_POINT sp = ops->next_metric(ops->handle);
|
||||
STORAGE_POINT sp;
|
||||
if(likely(storage_point_is_unset(next1_point))) {
|
||||
db_points_read_since_plan_switch++;
|
||||
sp = ops->next_metric(ops->handle);
|
||||
}
|
||||
else {
|
||||
// ONE POINT READ-AHEAD
|
||||
sp = next1_point;
|
||||
storage_point_unset(next1_point);
|
||||
db_points_read_since_plan_switch = 1;
|
||||
}
|
||||
|
||||
// ONE POINT READ-AHEAD
|
||||
if(unlikely(query_plan_should_switch_plan(ops, sp.end_time_s) &&
|
||||
query_planer_next_plan(ops, now_end_time, new_point.end_time))) {
|
||||
|
||||
// The end time of the current point, crosses our plans (tiers)
|
||||
// so, we switched plan (tier)
|
||||
//
|
||||
// There are 2 cases now:
|
||||
//
|
||||
// A. the entire point of the previous plan is to the future of point from the next plan
|
||||
// B. part of the point of the previous plan overlaps with the point from the next plan
|
||||
|
||||
STORAGE_POINT sp2 = ops->next_metric(ops->handle);
|
||||
|
||||
if(sp.start_time_s > sp2.start_time_s)
|
||||
// the point from the previous plan is useless
|
||||
sp = sp2;
|
||||
else
|
||||
// let the query run from the previous plan
|
||||
// but setting this will also cut off the interpolation
|
||||
// of the point from the previous plan
|
||||
next1_point = sp2;
|
||||
}
|
||||
|
||||
ops->db_points_read_per_tier[ops->tier]++;
|
||||
ops->db_total_points_read++;
|
||||
|
@ -1326,8 +1411,8 @@ static void rrd2rrdr_query_execute(RRDR *r, size_t dim_id_in_rrdr, QUERY_ENGINE_
|
|||
// info("QUERY: got point %zu, from time %ld to %ld // now from %ld to %ld // query from %ld to %ld",
|
||||
// new_point.id, new_point.start_time, new_point.end_time, now_start_time, now_end_time, after_wanted, before_wanted);
|
||||
//
|
||||
// set the right value to the point we got
|
||||
if(likely(!storage_point_is_unset(sp) && !storage_point_is_empty(sp))) {
|
||||
// get the right value from the point we got
|
||||
if(likely(!storage_point_is_unset(sp) && !storage_point_is_gap(sp))) {
|
||||
|
||||
if(unlikely(use_anomaly_bit_as_value))
|
||||
new_point.value = new_point.anomaly;
|
||||
|
@ -1391,7 +1476,7 @@ static void rrd2rrdr_query_execute(RRDR *r, size_t dim_id_in_rrdr, QUERY_ENGINE_
|
|||
count_same_end_time = 0;
|
||||
|
||||
// decide how to use this point
|
||||
if(likely(new_point.end_time < now_end_time)) { // likely to favor tier0
|
||||
if(likely(new_point.end_time <= now_end_time)) { // likely to favor tier0
|
||||
// this db point ends before our now_end_time
|
||||
|
||||
if(likely(new_point.end_time >= now_start_time)) { // likely to favor tier0
|
||||
|
@ -1408,8 +1493,8 @@ static void rrd2rrdr_query_execute(RRDR *r, size_t dim_id_in_rrdr, QUERY_ENGINE_
|
|||
// at exactly the time we will want
|
||||
|
||||
// we only log if this is not point 1
|
||||
internal_error(new_point.end_time < after_wanted &&
|
||||
new_point.id > POINTS_TO_EXPAND_QUERY + 1,
|
||||
internal_error(new_point.end_time < ops->plan_expanded_after &&
|
||||
db_points_read_since_plan_switch > 1,
|
||||
"QUERY: '%s', dimension '%s' next_metric() "
|
||||
"returned point %zu from %ld time %ld, "
|
||||
"which is entirely before our current timeframe %ld to %ld "
|
||||
|
@ -1417,7 +1502,7 @@ static void rrd2rrdr_query_execute(RRDR *r, size_t dim_id_in_rrdr, QUERY_ENGINE_
|
|||
qt->id, string2str(qm->dimension.id),
|
||||
new_point.id, new_point.start_time, new_point.end_time,
|
||||
now_start_time, now_end_time,
|
||||
after_wanted, before_wanted);
|
||||
ops->plan_expanded_after, ops->plan_expanded_before);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1430,19 +1515,30 @@ static void rrd2rrdr_query_execute(RRDR *r, size_t dim_id_in_rrdr, QUERY_ENGINE_
|
|||
|
||||
if(unlikely(count_same_end_time)) {
|
||||
internal_error(true,
|
||||
"QUERY: '%s', dimension '%s', the database does not advance the query, it returned an end time less or equal to the end time of the last point we got %ld, %zu times",
|
||||
qt->id, string2str(qm->dimension.id), last1_point.end_time, count_same_end_time);
|
||||
"QUERY: '%s', dimension '%s', the database does not advance the query,"
|
||||
" it returned an end time less or equal to the end time of the last "
|
||||
"point we got %ld, %zu times",
|
||||
qt->id, string2str(qm->dimension.id),
|
||||
last1_point.end_time, count_same_end_time);
|
||||
|
||||
if(unlikely(new_point.end_time <= last1_point.end_time))
|
||||
new_point.end_time = now_end_time;
|
||||
}
|
||||
|
||||
time_t stop_time = new_point.end_time;
|
||||
if(unlikely(!storage_point_is_unset(next1_point))) {
|
||||
// ONE POINT READ-AHEAD
|
||||
// the point crosses the start time of the
|
||||
// read ahead storage point we have read
|
||||
stop_time = next1_point.start_time_s;
|
||||
}
|
||||
|
||||
// the inner loop
|
||||
// we have 3 points in memory: last2, last1, new
|
||||
// we select the one to use based on their timestamps
|
||||
|
||||
size_t iterations = 0;
|
||||
for ( ; now_end_time <= new_point.end_time && points_added < points_wanted ;
|
||||
for ( ; now_end_time <= stop_time && points_added < points_wanted ;
|
||||
now_end_time += ops->view_update_every, iterations++) {
|
||||
|
||||
// now_start_time is wrong in this loop
|
||||
|
|
|
@ -129,10 +129,10 @@ void web_client_request_done(struct web_client *w) {
|
|||
, mode
|
||||
, sent
|
||||
, size
|
||||
, -((size > 0) ? ((size - sent) / (double) size * 100.0) : 0.0)
|
||||
, dt_usec(&w->tv_ready, &w->tv_in) / 1000.0
|
||||
, dt_usec(&tv, &w->tv_ready) / 1000.0
|
||||
, dt_usec(&tv, &w->tv_in) / 1000.0
|
||||
, -((size > 0) ? ((double)(size - sent) / (double) size * 100.0) : 0.0)
|
||||
, (double)dt_usec(&w->tv_ready, &w->tv_in) / 1000.0
|
||||
, (double)dt_usec(&tv, &w->tv_ready) / 1000.0
|
||||
, (double)dt_usec(&tv, &w->tv_in) / 1000.0
|
||||
, w->response.code
|
||||
, strip_control_characters(w->last_url)
|
||||
);
|
||||
|
@ -302,7 +302,7 @@ int mysendfile(struct web_client *w, char *filename) {
|
|||
}
|
||||
}
|
||||
|
||||
// if the filename contains a .. refuse to serve it
|
||||
// if the filename contains a double dot refuse to serve it
|
||||
if(strstr(filename, "..") != 0) {
|
||||
debug(D_WEB_CLIENT_ACCESS, "%llu: File '%s' is not acceptable.", w->id, filename);
|
||||
w->response.data->contenttype = CT_TEXT_HTML;
|
||||
|
@ -831,9 +831,8 @@ static inline char *web_client_valid_method(struct web_client *w, char *s) {
|
|||
* @param s is the first address of the string.
|
||||
* @param ptr is the address of the separator.
|
||||
*/
|
||||
static void web_client_set_path_query(struct web_client *w, char *s, char *ptr) {
|
||||
static void web_client_set_path_query(struct web_client *w, const char *s, char *ptr) {
|
||||
w->url_path_length = (size_t)(ptr -s);
|
||||
|
||||
w->url_search_path = ptr;
|
||||
}
|
||||
|
||||
|
@ -1429,7 +1428,7 @@ static inline int web_client_process_url(RRDHOST *host, struct web_client *w, ch
|
|||
// replace the zero bytes with spaces
|
||||
buffer_char_replace(w->response.data, '\0', ' ');
|
||||
|
||||
// just leave the buffer as is
|
||||
// just leave the buffer as-is
|
||||
// it will be copied back to the client
|
||||
|
||||
return HTTP_RESP_OK;
|
||||
|
@ -1546,7 +1545,7 @@ void web_client_process_request(struct web_client *w) {
|
|||
break;
|
||||
}
|
||||
|
||||
// keep track of the time we done processing
|
||||
// keep track of the processing time
|
||||
now_realtime_timeval(&w->tv_ready);
|
||||
|
||||
w->response.sent = 0;
|
||||
|
@ -1847,7 +1846,7 @@ ssize_t web_client_read_file(struct web_client *w)
|
|||
if(unlikely(w->response.rlen <= w->response.data->len))
|
||||
return 0;
|
||||
|
||||
ssize_t left = w->response.rlen - w->response.data->len;
|
||||
ssize_t left = (ssize_t)(w->response.rlen - w->response.data->len);
|
||||
ssize_t bytes = read(w->ifd, &w->response.data->buffer[w->response.data->len], (size_t)left);
|
||||
if(likely(bytes > 0)) {
|
||||
size_t old = w->response.data->len;
|
||||
|
@ -1897,7 +1896,7 @@ ssize_t web_client_receive(struct web_client *w)
|
|||
return web_client_read_file(w);
|
||||
|
||||
ssize_t bytes;
|
||||
ssize_t left = w->response.data->size - w->response.data->len;
|
||||
ssize_t left = (ssize_t)(w->response.data->size - w->response.data->len);
|
||||
|
||||
// do we have any space for more data?
|
||||
buffer_need_bytes(w->response.data, NETDATA_WEB_REQUEST_RECEIVE_SIZE);
|
||||
|
|
Loading…
Reference in New Issue