Hyperloglog avoid allocate more than 'server.hll_sparse_max_bytes' bytes of memory for sparse representation (#11438)

Before this PR, we use sdsMakeRoomFor() to expand the size of hyperloglog
string (sparse representation). And because sdsMakeRoomFor() uses a greedy
strategy (allocate about twice what we need), the memory we allocated for the
hyperloglog may be more than `server.hll_sparse_max_bytes` bytes.
The memory more than` server.hll_sparse_max_bytes` will be wasted.

In this pull request, tone down the greediness of the allocation growth, and also
make sure it'll never request more than `server.hll_sparse_max_bytes`.

This could in theory mean the size of the hyperloglog string is insufficient for the
increment we need, should be ok since in this case we promote the hyperloglog
to dense representation, an assertion was added to make sure.

This PR also add some tests and fixes some typo and indentation issues.
This commit is contained in:
Mingyi Kang 2022-11-28 23:35:31 +08:00 committed by GitHub
parent f0005b5328
commit f8ac5a6503
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 57 additions and 16 deletions

View File

@ -662,12 +662,22 @@ int hllSparseSet(robj *o, long index, uint8_t count) {
* switch to dense representation. */
if (count > HLL_SPARSE_VAL_MAX_VALUE) goto promote;
/* When updating a sparse representation, sometimes we may need to
* enlarge the buffer for up to 3 bytes in the worst case (XZERO split
* into XZERO-VAL-XZERO). Make sure there is enough space right now
* so that the pointers we take during the execution of the function
* will be valid all the time. */
o->ptr = sdsMakeRoomFor(o->ptr,3);
/* When updating a sparse representation, sometimes we may need to enlarge the
* buffer for up to 3 bytes in the worst case (XZERO split into XZERO-VAL-XZERO),
* and the following code does the enlarge job.
* Actually, we use a greedy strategy, enlarge more than 3 bytes to avoid the need
* for future reallocates on incremental growth. But we do not allocate more than
* 'server.hll_sparse_max_bytes' bytes for the sparse representation.
* If the available size of hyperloglog sds string is not enough for the increment
* we need, we promote the hypreloglog to dense representation in 'step 3'.
*/
if (sdsalloc(o->ptr) < server.hll_sparse_max_bytes && sdsavail(o->ptr) < 3) {
size_t newlen = sdslen(o->ptr) + 3;
newlen += min(newlen, 300); /* Greediness: double 'newlen' if it is smaller than 300, or add 300 to it when it exceeds 300 */
if (newlen > server.hll_sparse_max_bytes)
newlen = server.hll_sparse_max_bytes;
o->ptr = sdsResize(o->ptr, newlen);
}
/* Step 1: we need to locate the opcode we need to modify to check
* if a value update is actually needed. */
@ -824,17 +834,18 @@ int hllSparseSet(robj *o, long index, uint8_t count) {
/* Step 3: substitute the new sequence with the old one.
*
* Note that we already allocated space on the sds string
* calling sdsMakeRoomFor(). */
int seqlen = n-seq;
int oldlen = is_xzero ? 2 : 1;
int deltalen = seqlen-oldlen;
* calling sdsResize(). */
int seqlen = n-seq;
int oldlen = is_xzero ? 2 : 1;
int deltalen = seqlen-oldlen;
if (deltalen > 0 &&
sdslen(o->ptr)+deltalen > server.hll_sparse_max_bytes) goto promote;
if (deltalen && next) memmove(next+deltalen,next,end-next);
sdsIncrLen(o->ptr,deltalen);
memcpy(p,seq,seqlen);
end += deltalen;
if (deltalen > 0 &&
sdslen(o->ptr) + deltalen > server.hll_sparse_max_bytes) goto promote;
serverAssert(sdslen(o->ptr) + deltalen <= sdsalloc(o->ptr));
if (deltalen && next) memmove(next+deltalen,next,end-next);
sdsIncrLen(o->ptr,deltalen);
memcpy(p,seq,seqlen);
end += deltalen;
updated:
/* Step 4: Merge adjacent values if possible.

View File

@ -59,6 +59,36 @@ start_server {tags {"hll"}} {
}
} {} {needs:pfdebug}
test {Change hll-sparse-max-bytes} {
r config set hll-sparse-max-bytes 3000
r del hll
r pfadd hll a b c d e d g h i j k
assert {[r pfdebug encoding hll] eq {sparse}}
r config set hll-sparse-max-bytes 30
r pfadd hll new_element
assert {[r pfdebug encoding hll] eq {dense}}
} {} {needs:pfdebug}
test {Hyperloglog promote to dense well in different hll-sparse-max-bytes} {
set max(0) 100
set max(1) 500
set max(2) 3000
for {set i 0} {$i < [array size max]} {incr i} {
r config set hll-sparse-max-bytes $max($i)
r del hll
r pfadd hll
set len [r strlen hll]
while {$len <= $max($i)} {
assert {[r pfdebug encoding hll] eq {sparse}}
set elements {}
for {set j 0} {$j < 10} {incr j} { lappend elements [expr rand()]}
r pfadd hll {*}$elements
set len [r strlen hll]
}
assert {[r pfdebug encoding hll] eq {dense}}
}
} {} {needs:pfdebug}
test {HyperLogLog sparse encoding stress test} {
for {set x 0} {$x < 1000} {incr x} {
r del hll1