Change lzf to handle values larger than UINT32_MAX (#9776)

Redis supports inserting data over 4GB into string (and recently for lists too, see #9357),
But LZF compression used in RDB files (see `rdbcompression` config), and in quicklist
(see `list-compress-depth` config) does not support compress/decompress data over
UINT32_MAX, which will result in corrupting the rdb after compression.

Internal changes:
1. Modify the `unsigned int` parameter of `lzf_compress/lzf_decompress` to `size_t`.
2. Modify the variable types in `lzf_compress` involving offsets and lengths to `size_t`.
3. Set LZF_USE_OFFSETS to 0.
    When LZF_USE_OFFSETS is 1, lzf store offset into `LZF_HSLOT`(32bit). 
    Even in 64-bit, `LZF_USE_OFFSETS` defaults to 1, because lzf assumes that it only
    compresses and decompresses data smaller than UINT32_MAX.
    But now we need to make lzf support 64-bit, turning on `LZF_USE_OFFSETS` will make
    it impossible to store 64-bit offsets or pointers.
    BTW, disable LZF_USE_OFFSETS also brings a few performance improvements.

Tests:
1. Add test for compress/decompress string large than UINT32_MAX.
2. Add unittest for compress/decompress quicklistNode.
This commit is contained in:
sundb 2021-11-16 19:12:25 +08:00 committed by GitHub
parent e725d737fb
commit 985430b4fc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 109 additions and 17 deletions

View File

@ -73,9 +73,9 @@
* and lzf_c.c.
*
*/
unsigned int
lzf_compress (const void *const in_data, unsigned int in_len,
void *out_data, unsigned int out_len);
size_t
lzf_compress (const void *const in_data, size_t in_len,
void *out_data, size_t out_len);
/*
* Decompress data compressed with some version of the lzf_compress
@ -92,9 +92,9 @@ lzf_compress (const void *const in_data, unsigned int in_len,
*
* This function is very fast, about as fast as a copying loop.
*/
unsigned int
lzf_decompress (const void *const in_data, unsigned int in_len,
void *out_data, unsigned int out_len);
size_t
lzf_decompress (const void *const in_data, size_t in_len,
void *out_data, size_t out_len);
#endif

View File

@ -129,8 +129,9 @@
* Whether to store pointers or offsets inside the hash table. On
* 64 bit architectures, pointers take up twice as much space,
* and might also be slower. Default is to autodetect.
*/
/*#define LZF_USER_OFFSETS autodetect */
* Notice: Don't set this value to 1, it will result in 'LZF_HSLOT'
* not being able to store offset above UINT32_MAX in 64bit. */
#define LZF_USE_OFFSETS 0
/*****************************************************************************/
/* nothing should be changed below */

View File

@ -105,9 +105,9 @@
*
*/
NO_SANITIZE("alignment")
unsigned int
lzf_compress (const void *const in_data, unsigned int in_len,
void *out_data, unsigned int out_len
size_t
lzf_compress (const void *const in_data, size_t in_len,
void *out_data, size_t out_len
#if LZF_STATE_ARG
, LZF_STATE htab
#endif
@ -132,7 +132,7 @@ lzf_compress (const void *const in_data, unsigned int in_len,
#if defined (WIN32) && defined (_M_X64)
unsigned _int64 off; /* workaround for missing POSIX compliance */
#else
unsigned long off;
size_t off;
#endif
unsigned int hval;
int lit;
@ -153,7 +153,8 @@ lzf_compress (const void *const in_data, unsigned int in_len,
hval = NEXT (hval, ip);
hslot = htab + IDX (hval);
ref = *hslot + LZF_HSLOT_BIAS; *hslot = ip - LZF_HSLOT_BIAS;
ref = *hslot ? (*hslot + LZF_HSLOT_BIAS) : NULL; /* avoid applying zero offset to null pointer */
*hslot = ip - LZF_HSLOT_BIAS;
if (1
#if INIT_HTAB
@ -171,7 +172,7 @@ lzf_compress (const void *const in_data, unsigned int in_len,
{
/* match found at *ref++ */
unsigned int len = 2;
unsigned int maxlen = in_end - ip - len;
size_t maxlen = in_end - ip - len;
maxlen = maxlen > MAX_REF ? MAX_REF : maxlen;
if (expect_false (op + 3 + 1 >= out_end)) /* first a faster conservative test */

View File

@ -56,9 +56,9 @@
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
#endif
unsigned int
lzf_decompress (const void *const in_data, unsigned int in_len,
void *out_data, unsigned int out_len)
size_t
lzf_decompress (const void *const in_data, size_t in_len,
void *out_data, size_t out_len)
{
u8 const *ip = (const u8 *)in_data;
u8 *op = (u8 *)out_data;

View File

@ -1721,6 +1721,7 @@ void quicklistBookmarksClear(quicklist *ql) {
#include <stdint.h>
#include <sys/time.h>
#include "testhelp.h"
#include <stdlib.h>
#define yell(str, ...) printf("ERROR! " str "\n\n", __VA_ARGS__)
@ -1902,6 +1903,30 @@ static char *genstr(char *prefix, int i) {
return result;
}
static void randstring(unsigned char *target, size_t sz) {
size_t p = 0;
int minval, maxval;
switch(rand() % 3) {
case 0:
minval = 'a';
maxval = 'z';
break;
case 1:
minval = '0';
maxval = '9';
break;
case 2:
minval = 'A';
maxval = 'Z';
break;
default:
assert(NULL);
}
while(p < sz)
target[p++] = minval+rand()%(maxval-minval+1);
}
/* main test, but callable from other files */
int quicklistTest(int argc, char *argv[], int flags) {
UNUSED(argc);
@ -3125,6 +3150,57 @@ int quicklistTest(int argc, char *argv[], int flags) {
quicklistRelease(ql);
}
if (flags & REDIS_TEST_LARGE_MEMORY) {
TEST("compress and decompress quicklist ziplist node") {
quicklistNode *node = quicklistCreateNode();
node->entry = ziplistNew();
/* Create a rand string */
size_t sz = (1 << 25); /* 32MB per one entry */
unsigned char *s = zmalloc(sz);
randstring(s, sz);
/* Keep filling the node, until it reaches 1GB */
for (int i = 0; i < 32; i++) {
node->entry = ziplistPush(node->entry, s, sz, ZIPLIST_TAIL);
quicklistNodeUpdateSz(node);
long long start = mstime();
assert(__quicklistCompressNode(node));
assert(__quicklistDecompressNode(node));
printf("Compress and decompress: %zu MB in %.2f seconds.\n",
node->sz/1024/1024, (float)(mstime() - start) / 1000);
}
zfree(s);
zfree(node->entry);
zfree(node);
}
#if ULONG_MAX >= 0xffffffffffffffff
TEST("compress and decomress quicklist plain node large than UINT32_MAX") {
size_t sz = (1ull << 32);
unsigned char *s = zmalloc(sz);
randstring(s, sz);
memcpy(s, "helloworld", 10);
memcpy(s + sz - 10, "1234567890", 10);
quicklistNode *node = __quicklistCreatePlainNode(s, sz);
long long start = mstime();
assert(__quicklistCompressNode(node));
assert(__quicklistDecompressNode(node));
printf("Compress and decompress: %zu MB in %.2f seconds.\n",
node->sz/1024/1024, (float)(mstime() - start) / 1000);
assert(memcmp(node->entry, "helloworld", 10) == 0);
assert(memcmp(node->entry + sz - 10, "1234567890", 10) == 0);
zfree(node->entry);
zfree(node);
}
#endif
}
if (!err)
printf("ALL TESTS PASSED!\n");
else

View File

@ -566,4 +566,18 @@ start_server {tags {"bitops"}} {
r config set proto-max-bulk-len $oldval
r del mykey
} {1} {large-memory}
test "SETBIT values larger than UINT32_MAX and lzf_compress/lzf_decompress correctly" {
set bytes [expr (1 << 32) + 1]
set bitpos [expr (1 << 35)]
set oldval [lindex [r config get proto-max-bulk-len] 1]
r config set proto-max-bulk-len $bytes
r setbit mykey $bitpos 1
assert_equal $bytes [r strlen mykey]
assert_equal 1 [r getbit mykey $bitpos]
r debug reload ;# lzf_compress/lzf_decompress when RDB saving/loading.
assert_equal 1 [r getbit mykey $bitpos]
r config set proto-max-bulk-len $oldval
r del mykey
} {1} {large-memory needs:debug}
}