Hash Field Expiration - Basic support

- Add ebuckets & mstr data structures
- Integrate active & lazy expiration
- Add most of the commands 
- Add support for dict (listpack is missing)
TODOs:  RDB, notification, listpack, HSET, HGETF, defrag, aof
This commit is contained in:
Moti Cohen 2024-04-18 16:06:30 +03:00 committed by GitHub
parent 4581d43230
commit c18ff05665
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
36 changed files with 6617 additions and 209 deletions

View File

@ -354,7 +354,7 @@ endif
REDIS_SERVER_NAME=redis-server$(PROG_SUFFIX)
REDIS_SENTINEL_NAME=redis-sentinel$(PROG_SUFFIX)
REDIS_SERVER_OBJ=threads_mngr.o adlist.o quicklist.o ae.o anet.o dict.o kvstore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_legacy.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o commands.o strl.o connection.o unix.o logreqres.o
REDIS_SERVER_OBJ=threads_mngr.o adlist.o quicklist.o ae.o anet.o dict.o ebuckets.o mstr.o kvstore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_legacy.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o commands.o strl.o connection.o unix.o logreqres.o
REDIS_CLI_NAME=redis-cli$(PROG_SUFFIX)
REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o ae.o redisassert.o crcspeed.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o cli_commands.o
REDIS_BENCHMARK_NAME=redis-benchmark$(PROG_SUFFIX)

View File

@ -1950,8 +1950,10 @@ static int rioWriteHashIteratorCursor(rio *r, hashTypeIterator *hi, int what) {
else
return rioWriteBulkLongLong(r, vll);
} else if (hi->encoding == OBJ_ENCODING_HT) {
sds value = hashTypeCurrentFromHashTable(hi, what);
return rioWriteBulkString(r, value, sdslen(value));
char *str;
size_t len;
hashTypeCurrentFromHashTable(hi, what, &str, &len, NULL);
return rioWriteBulkString(r, str, len);
}
serverPanic("Unknown hash encoding");
@ -1962,10 +1964,10 @@ static int rioWriteHashIteratorCursor(rio *r, hashTypeIterator *hi, int what) {
* The function returns 0 on error, 1 on success. */
int rewriteHashObject(rio *r, robj *key, robj *o) {
hashTypeIterator *hi;
long long count = 0, items = hashTypeLength(o);
long long count = 0, items = hashTypeLength(o, 0);
hi = hashTypeInitIterator(o);
while (hashTypeNext(hi) != C_ERR) {
while (hashTypeNext(hi, 0) != C_ERR) {
if (count == 0) {
int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ?
AOF_REWRITE_ITEMS_PER_CMD : items;

View File

@ -3303,6 +3303,104 @@ struct COMMAND_ARG HEXISTS_Args[] = {
{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
};
/********** HEXPIRE ********************/
#ifndef SKIP_CMD_HISTORY_TABLE
/* HEXPIRE history */
#define HEXPIRE_History NULL
#endif
#ifndef SKIP_CMD_TIPS_TABLE
/* HEXPIRE tips */
#define HEXPIRE_Tips NULL
#endif
#ifndef SKIP_CMD_KEY_SPECS_TABLE
/* HEXPIRE key specs */
keySpec HEXPIRE_Keyspecs[1] = {
{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
};
#endif
/* HEXPIRE condition argument table */
struct COMMAND_ARG HEXPIRE_condition_Subargs[] = {
{MAKE_ARG("nx",ARG_TYPE_PURE_TOKEN,-1,"NX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
{MAKE_ARG("xx",ARG_TYPE_PURE_TOKEN,-1,"XX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
{MAKE_ARG("gt",ARG_TYPE_PURE_TOKEN,-1,"GT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
{MAKE_ARG("lt",ARG_TYPE_PURE_TOKEN,-1,"LT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
};
/* HEXPIRE argument table */
struct COMMAND_ARG HEXPIRE_Args[] = {
{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
{MAKE_ARG("seconds",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
{MAKE_ARG("condition",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,4,NULL),.subargs=HEXPIRE_condition_Subargs},
{MAKE_ARG("numfields",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
};
/********** HEXPIREAT ********************/
#ifndef SKIP_CMD_HISTORY_TABLE
/* HEXPIREAT history */
#define HEXPIREAT_History NULL
#endif
#ifndef SKIP_CMD_TIPS_TABLE
/* HEXPIREAT tips */
#define HEXPIREAT_Tips NULL
#endif
#ifndef SKIP_CMD_KEY_SPECS_TABLE
/* HEXPIREAT key specs */
keySpec HEXPIREAT_Keyspecs[1] = {
{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
};
#endif
/* HEXPIREAT condition argument table */
struct COMMAND_ARG HEXPIREAT_condition_Subargs[] = {
{MAKE_ARG("nx",ARG_TYPE_PURE_TOKEN,-1,"NX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
{MAKE_ARG("xx",ARG_TYPE_PURE_TOKEN,-1,"XX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
{MAKE_ARG("gt",ARG_TYPE_PURE_TOKEN,-1,"GT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
{MAKE_ARG("lt",ARG_TYPE_PURE_TOKEN,-1,"LT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
};
/* HEXPIREAT argument table */
struct COMMAND_ARG HEXPIREAT_Args[] = {
{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
{MAKE_ARG("unix-time-seconds",ARG_TYPE_UNIX_TIME,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
{MAKE_ARG("condition",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,4,NULL),.subargs=HEXPIREAT_condition_Subargs},
{MAKE_ARG("numfields",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
};
/********** HEXPIRETIME ********************/
#ifndef SKIP_CMD_HISTORY_TABLE
/* HEXPIRETIME history */
#define HEXPIRETIME_History NULL
#endif
#ifndef SKIP_CMD_TIPS_TABLE
/* HEXPIRETIME tips */
#define HEXPIRETIME_Tips NULL
#endif
#ifndef SKIP_CMD_KEY_SPECS_TABLE
/* HEXPIRETIME key specs */
keySpec HEXPIRETIME_Keyspecs[1] = {
{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
};
#endif
/* HEXPIRETIME argument table */
struct COMMAND_ARG HEXPIRETIME_Args[] = {
{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
{MAKE_ARG("numfields",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
};
/********** HGET ********************/
#ifndef SKIP_CMD_HISTORY_TABLE
@ -3512,6 +3610,156 @@ struct COMMAND_ARG HMSET_Args[] = {
{MAKE_ARG("data",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,2,NULL),.subargs=HMSET_data_Subargs},
};
/********** HPERSIST ********************/
#ifndef SKIP_CMD_HISTORY_TABLE
/* HPERSIST history */
#define HPERSIST_History NULL
#endif
#ifndef SKIP_CMD_TIPS_TABLE
/* HPERSIST tips */
#define HPERSIST_Tips NULL
#endif
#ifndef SKIP_CMD_KEY_SPECS_TABLE
/* HPERSIST key specs */
keySpec HPERSIST_Keyspecs[1] = {
{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
};
#endif
/* HPERSIST argument table */
struct COMMAND_ARG HPERSIST_Args[] = {
{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
{MAKE_ARG("numfields",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
};
/********** HPEXPIRE ********************/
#ifndef SKIP_CMD_HISTORY_TABLE
/* HPEXPIRE history */
#define HPEXPIRE_History NULL
#endif
#ifndef SKIP_CMD_TIPS_TABLE
/* HPEXPIRE tips */
#define HPEXPIRE_Tips NULL
#endif
#ifndef SKIP_CMD_KEY_SPECS_TABLE
/* HPEXPIRE key specs */
keySpec HPEXPIRE_Keyspecs[1] = {
{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
};
#endif
/* HPEXPIRE condition argument table */
struct COMMAND_ARG HPEXPIRE_condition_Subargs[] = {
{MAKE_ARG("nx",ARG_TYPE_PURE_TOKEN,-1,"NX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
{MAKE_ARG("xx",ARG_TYPE_PURE_TOKEN,-1,"XX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
{MAKE_ARG("gt",ARG_TYPE_PURE_TOKEN,-1,"GT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
{MAKE_ARG("lt",ARG_TYPE_PURE_TOKEN,-1,"LT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
};
/* HPEXPIRE argument table */
struct COMMAND_ARG HPEXPIRE_Args[] = {
{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
{MAKE_ARG("milliseconds",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
{MAKE_ARG("condition",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,4,NULL),.subargs=HPEXPIRE_condition_Subargs},
{MAKE_ARG("numfields",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
};
/********** HPEXPIREAT ********************/
#ifndef SKIP_CMD_HISTORY_TABLE
/* HPEXPIREAT history */
#define HPEXPIREAT_History NULL
#endif
#ifndef SKIP_CMD_TIPS_TABLE
/* HPEXPIREAT tips */
#define HPEXPIREAT_Tips NULL
#endif
#ifndef SKIP_CMD_KEY_SPECS_TABLE
/* HPEXPIREAT key specs */
keySpec HPEXPIREAT_Keyspecs[1] = {
{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
};
#endif
/* HPEXPIREAT condition argument table */
struct COMMAND_ARG HPEXPIREAT_condition_Subargs[] = {
{MAKE_ARG("nx",ARG_TYPE_PURE_TOKEN,-1,"NX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
{MAKE_ARG("xx",ARG_TYPE_PURE_TOKEN,-1,"XX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
{MAKE_ARG("gt",ARG_TYPE_PURE_TOKEN,-1,"GT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
{MAKE_ARG("lt",ARG_TYPE_PURE_TOKEN,-1,"LT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
};
/* HPEXPIREAT argument table */
struct COMMAND_ARG HPEXPIREAT_Args[] = {
{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
{MAKE_ARG("unix-time-milliseconds",ARG_TYPE_UNIX_TIME,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
{MAKE_ARG("condition",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,4,NULL),.subargs=HPEXPIREAT_condition_Subargs},
{MAKE_ARG("numfields",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
};
/********** HPEXPIRETIME ********************/
#ifndef SKIP_CMD_HISTORY_TABLE
/* HPEXPIRETIME history */
#define HPEXPIRETIME_History NULL
#endif
#ifndef SKIP_CMD_TIPS_TABLE
/* HPEXPIRETIME tips */
#define HPEXPIRETIME_Tips NULL
#endif
#ifndef SKIP_CMD_KEY_SPECS_TABLE
/* HPEXPIRETIME key specs */
keySpec HPEXPIRETIME_Keyspecs[1] = {
{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
};
#endif
/* HPEXPIRETIME argument table */
struct COMMAND_ARG HPEXPIRETIME_Args[] = {
{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
{MAKE_ARG("numfields",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
};
/********** HPTTL ********************/
#ifndef SKIP_CMD_HISTORY_TABLE
/* HPTTL history */
#define HPTTL_History NULL
#endif
#ifndef SKIP_CMD_TIPS_TABLE
/* HPTTL tips */
#define HPTTL_Tips NULL
#endif
#ifndef SKIP_CMD_KEY_SPECS_TABLE
/* HPTTL key specs */
keySpec HPTTL_Keyspecs[1] = {
{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
};
#endif
/* HPTTL argument table */
struct COMMAND_ARG HPTTL_Args[] = {
{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
{MAKE_ARG("numfields",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
};
/********** HRANDFIELD ********************/
#ifndef SKIP_CMD_HISTORY_TABLE
@ -3659,6 +3907,32 @@ struct COMMAND_ARG HSTRLEN_Args[] = {
{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
};
/********** HTTL ********************/
#ifndef SKIP_CMD_HISTORY_TABLE
/* HTTL history */
#define HTTL_History NULL
#endif
#ifndef SKIP_CMD_TIPS_TABLE
/* HTTL tips */
#define HTTL_Tips NULL
#endif
#ifndef SKIP_CMD_KEY_SPECS_TABLE
/* HTTL key specs */
keySpec HTTL_Keyspecs[1] = {
{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
};
#endif
/* HTTL argument table */
struct COMMAND_ARG HTTL_Args[] = {
{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
{MAKE_ARG("numfields",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
};
/********** HVALS ********************/
#ifndef SKIP_CMD_HISTORY_TABLE
@ -10710,6 +10984,9 @@ struct COMMAND_STRUCT redisCommandTable[] = {
/* hash */
{MAKE_CMD("hdel","Deletes one or more fields and their values from a hash. Deletes the hash if no fields remain.","O(N) where N is the number of fields to be removed.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HDEL_History,1,HDEL_Tips,0,hdelCommand,-3,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HDEL_Keyspecs,1,NULL,2),.args=HDEL_Args},
{MAKE_CMD("hexists","Determines whether a field exists in a hash.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXISTS_History,0,HEXISTS_Tips,0,hexistsCommand,3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HEXISTS_Keyspecs,1,NULL,2),.args=HEXISTS_Args},
{MAKE_CMD("hexpire","Set expiry for hash field using relative time to expire (seconds)","O(N) where N is the number of arguments to the command","8.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXPIRE_History,0,HEXPIRE_Tips,0,hexpireCommand,-5,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HEXPIRE_Keyspecs,1,NULL,5),.args=HEXPIRE_Args},
{MAKE_CMD("hexpireat","Set expiry for hash field using an absolute Unix timestamp (seconds)","O(N) where N is the number of arguments to the command","8.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXPIREAT_History,0,HEXPIREAT_Tips,0,hexpireatCommand,-5,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HEXPIREAT_Keyspecs,1,NULL,5),.args=HEXPIREAT_Args},
{MAKE_CMD("hexpiretime","Returns the expiration time of a hash field as a Unix timestamp, in seconds.","O(N) where N is the number of arguments to the command","8.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXPIRETIME_History,0,HEXPIRETIME_Tips,0,hexpiretimeCommand,-4,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HEXPIRETIME_Keyspecs,1,NULL,3),.args=HEXPIRETIME_Args},
{MAKE_CMD("hget","Returns the value of a field in a hash.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HGET_History,0,HGET_Tips,0,hgetCommand,3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HGET_Keyspecs,1,NULL,2),.args=HGET_Args},
{MAKE_CMD("hgetall","Returns all fields and values in a hash.","O(N) where N is the size of the hash.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HGETALL_History,0,HGETALL_Tips,1,hgetallCommand,2,CMD_READONLY,ACL_CATEGORY_HASH,HGETALL_Keyspecs,1,NULL,1),.args=HGETALL_Args},
{MAKE_CMD("hincrby","Increments the integer value of a field in a hash by a number. Uses 0 as initial value if the field doesn't exist.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HINCRBY_History,0,HINCRBY_Tips,0,hincrbyCommand,4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HINCRBY_Keyspecs,1,NULL,3),.args=HINCRBY_Args},
@ -10718,11 +10995,17 @@ struct COMMAND_STRUCT redisCommandTable[] = {
{MAKE_CMD("hlen","Returns the number of fields in a hash.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HLEN_History,0,HLEN_Tips,0,hlenCommand,2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HLEN_Keyspecs,1,NULL,1),.args=HLEN_Args},
{MAKE_CMD("hmget","Returns the values of all fields in a hash.","O(N) where N is the number of fields being requested.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HMGET_History,0,HMGET_Tips,0,hmgetCommand,-3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HMGET_Keyspecs,1,NULL,2),.args=HMGET_Args},
{MAKE_CMD("hmset","Sets the values of multiple fields.","O(N) where N is the number of fields being set.","2.0.0",CMD_DOC_DEPRECATED,"`HSET` with multiple field-value pairs","4.0.0","hash",COMMAND_GROUP_HASH,HMSET_History,0,HMSET_Tips,0,hsetCommand,-4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HMSET_Keyspecs,1,NULL,2),.args=HMSET_Args},
{MAKE_CMD("hpersist","Removes the expiration time for each specified field","O(N) where N is the number of arguments to the command","8.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPERSIST_History,0,HPERSIST_Tips,0,hpersistCommand,-4,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HPERSIST_Keyspecs,1,NULL,3),.args=HPERSIST_Args},
{MAKE_CMD("hpexpire","Set expiry for hash field using relative time to expire (milliseconds)","O(N) where N is the number of arguments to the command","8.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPEXPIRE_History,0,HPEXPIRE_Tips,0,hpexpireCommand,-5,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HPEXPIRE_Keyspecs,1,NULL,5),.args=HPEXPIRE_Args},
{MAKE_CMD("hpexpireat","Set expiry for hash field using an absolute Unix timestamp (milliseconds)","O(N) where N is the number of arguments to the command","8.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPEXPIREAT_History,0,HPEXPIREAT_Tips,0,hpexpireatCommand,-5,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HPEXPIREAT_Keyspecs,1,NULL,5),.args=HPEXPIREAT_Args},
{MAKE_CMD("hpexpiretime","Returns the expiration time of a hash field as a Unix timestamp, in msec.","O(N) where N is the number of arguments to the command","8.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPEXPIRETIME_History,0,HPEXPIRETIME_Tips,0,hpexpiretimeCommand,-4,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HPEXPIRETIME_Keyspecs,1,NULL,3),.args=HPEXPIRETIME_Args},
{MAKE_CMD("hpttl","Returns the TTL in milliseconds of a hash field.","O(N) where N is the number of arguments to the command","8.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPTTL_History,0,HPTTL_Tips,0,hpttlCommand,-4,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HPTTL_Keyspecs,1,NULL,3),.args=HPTTL_Args},
{MAKE_CMD("hrandfield","Returns one or more random fields from a hash.","O(N) where N is the number of fields returned","6.2.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HRANDFIELD_History,0,HRANDFIELD_Tips,1,hrandfieldCommand,-2,CMD_READONLY,ACL_CATEGORY_HASH,HRANDFIELD_Keyspecs,1,NULL,2),.args=HRANDFIELD_Args},
{MAKE_CMD("hscan","Iterates over fields and values of a hash.","O(1) for every call. O(N) for a complete iteration, including enough command calls for the cursor to return back to 0. N is the number of elements inside the collection.","2.8.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HSCAN_History,0,HSCAN_Tips,1,hscanCommand,-3,CMD_READONLY,ACL_CATEGORY_HASH,HSCAN_Keyspecs,1,NULL,5),.args=HSCAN_Args},
{MAKE_CMD("hset","Creates or modifies the value of a field in a hash.","O(1) for each field/value pair added, so O(N) to add N field/value pairs when the command is called with multiple field/value pairs.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HSET_History,1,HSET_Tips,0,hsetCommand,-4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HSET_Keyspecs,1,NULL,2),.args=HSET_Args},
{MAKE_CMD("hsetnx","Sets the value of a field in a hash only when the field doesn't exist.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HSETNX_History,0,HSETNX_Tips,0,hsetnxCommand,4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HSETNX_Keyspecs,1,NULL,3),.args=HSETNX_Args},
{MAKE_CMD("hstrlen","Returns the length of the value of a field.","O(1)","3.2.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HSTRLEN_History,0,HSTRLEN_Tips,0,hstrlenCommand,3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HSTRLEN_Keyspecs,1,NULL,2),.args=HSTRLEN_Args},
{MAKE_CMD("httl","Returns the TTL in seconds of a hash field.","O(N) where N is the number of arguments to the command","8.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HTTL_History,0,HTTL_Tips,0,httlCommand,-4,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HTTL_Keyspecs,1,NULL,3),.args=HTTL_Args},
{MAKE_CMD("hvals","Returns all values in a hash.","O(N) where N is the size of the hash.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HVALS_History,0,HVALS_Tips,1,hvalsCommand,2,CMD_READONLY,ACL_CATEGORY_HASH,HVALS_Keyspecs,1,NULL,1),.args=HVALS_Args},
/* hyperloglog */
{MAKE_CMD("pfadd","Adds elements to a HyperLogLog key. Creates the key if it doesn't exist.","O(1) to add every element.","2.8.9",CMD_DOC_NONE,NULL,NULL,"hyperloglog",COMMAND_GROUP_HYPERLOGLOG,PFADD_History,0,PFADD_Tips,0,pfaddCommand,-2,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HYPERLOGLOG,PFADD_Keyspecs,1,NULL,2),.args=PFADD_Args},

118
src/commands/hexpire.json Normal file
View File

@ -0,0 +1,118 @@
{
"HEXPIRE": {
"summary": "Set expiry for hash field using relative time to expire (seconds)",
"complexity": "O(N) where N is the number of arguments to the command",
"group": "hash",
"since": "8.0.0",
"arity": -5,
"function": "hexpireCommand",
"history": [],
"command_flags": [
"WRITE",
"DENYOOM",
"FAST"
],
"acl_categories": [
"HASH"
],
"key_specs": [
{
"flags": [
"RW",
"UPDATE"
],
"begin_search": {
"index": {
"pos": 1
}
},
"find_keys": {
"range": {
"lastkey": 0,
"step": 1,
"limit": 0
}
}
}
],
"reply_schema": {
"oneOf": [
{
"description": "Key does not exist.",
"type": "null"
},
{
"description": "Array of results",
"type": "array",
"minItems": 1,
"maxItems": 4294967295,
"items": [
{
"description": "The field does not exist.",
"const": -2
},
{
"description": "Specified NX | XX | GT | LT condition not met",
"const": 0
},
{
"description": "Expiration time was set or updated.",
"const": 1
},
{
"description": "Field deleted because the specified expiration time is in the past.",
"const": 2
}
]
}
]
},
"arguments": [
{
"name": "key",
"type": "key",
"key_spec_index": 0
},
{
"name": "seconds",
"type": "integer"
},
{
"name": "condition",
"type": "oneof",
"optional": true,
"arguments": [
{
"name": "nx",
"type": "pure-token",
"token": "NX"
},
{
"name": "xx",
"type": "pure-token",
"token": "XX"
},
{
"name": "gt",
"type": "pure-token",
"token": "GT"
},
{
"name": "lt",
"type": "pure-token",
"token": "LT"
}
]
},
{
"name": "numfields",
"type": "integer"
},
{
"name": "field",
"type": "string",
"multiple": true
}
]
}
}

118
src/commands/hexpireat.json Normal file
View File

@ -0,0 +1,118 @@
{
"HEXPIREAT": {
"summary": "Set expiry for hash field using an absolute Unix timestamp (seconds)",
"complexity": "O(N) where N is the number of arguments to the command",
"group": "hash",
"since": "8.0.0",
"arity": -5,
"function": "hexpireatCommand",
"history": [],
"command_flags": [
"WRITE",
"DENYOOM",
"FAST"
],
"acl_categories": [
"HASH"
],
"key_specs": [
{
"flags": [
"RW",
"UPDATE"
],
"begin_search": {
"index": {
"pos": 1
}
},
"find_keys": {
"range": {
"lastkey": 0,
"step": 1,
"limit": 0
}
}
}
],
"reply_schema": {
"oneOf": [
{
"description": "Key does not exist.",
"type": "null"
},
{
"description": "Array of results",
"type": "array",
"minItems": 1,
"maxItems": 4294967295,
"items": [
{
"description": "The field does not exist.",
"const": -2
},
{
"description": "Specified NX | XX | GT | LT condition not met",
"const": 0
},
{
"description": "Expiration time was set or updated.",
"const": 1
},
{
"description": "Field deleted because the specified expiration time is in the past.",
"const": 2
}
]
}
]
},
"arguments": [
{
"name": "key",
"type": "key",
"key_spec_index": 0
},
{
"name": "unix-time-seconds",
"type": "unix-time"
},
{
"name": "condition",
"type": "oneof",
"optional": true,
"arguments": [
{
"name": "nx",
"type": "pure-token",
"token": "NX"
},
{
"name": "xx",
"type": "pure-token",
"token": "XX"
},
{
"name": "gt",
"type": "pure-token",
"token": "GT"
},
{
"name": "lt",
"type": "pure-token",
"token": "LT"
}
]
},
{
"name": "numfields",
"type": "integer"
},
{
"name": "field",
"type": "string",
"multiple": true
}
]
}
}

View File

@ -0,0 +1,83 @@
{
"HEXPIRETIME": {
"summary": "Returns the expiration time of a hash field as a Unix timestamp, in seconds.",
"complexity": "O(N) where N is the number of arguments to the command",
"group": "hash",
"since": "8.0.0",
"arity": -4,
"function": "hexpiretimeCommand",
"history": [],
"command_flags": [
"READONLY",
"FAST"
],
"acl_categories": [
"HASH"
],
"key_specs": [
{
"flags": [
"RO",
"ACCESS"
],
"begin_search": {
"index": {
"pos": 1
}
},
"find_keys": {
"range": {
"lastkey": 0,
"step": 1,
"limit": 0
}
}
}
],
"reply_schema": {
"oneOf": [
{
"description": "Key does not exist.",
"type": "null"
},
{
"description": "Array of results",
"type": "array",
"minItems": 1,
"maxItems": 4294967295,
"items": [
{
"description": "The field does not exist.",
"const": -2
},
{
"description": "The field exists but has no associated expire.",
"const": -1
},
{
"description": "Expiration Unix timestamp in seconds.",
"type": "integer",
"minimum": 1
}
]
}
]
},
"arguments": [
{
"name": "key",
"type": "key",
"key_spec_index": 0
},
{
"name": "numfields",
"type": "integer"
},
{
"name": "field",
"type": "string",
"multiple": true
}
]
}
}

View File

@ -0,0 +1,82 @@
{
"HPERSIST": {
"summary": "Removes the expiration time for each specified field",
"complexity": "O(N) where N is the number of arguments to the command",
"group": "hash",
"since": "8.0.0",
"arity": -4,
"function": "hpersistCommand",
"history": [],
"command_flags": [
"READONLY",
"FAST"
],
"acl_categories": [
"HASH"
],
"key_specs": [
{
"flags": [
"RO",
"ACCESS"
],
"begin_search": {
"index": {
"pos": 1
}
},
"find_keys": {
"range": {
"lastkey": 0,
"step": 1,
"limit": 0
}
}
}
],
"reply_schema": {
"oneOf": [
{
"description": "Key does not exist.",
"type": "null"
},
{
"description": "Array of results",
"type": "array",
"minItems": 1,
"maxItems": 4294967295,
"items": [
{
"description": "The field does not exist.",
"const": -2
},
{
"description": "The field exists but has no associated expire.",
"const": -1
},
{
"description": "Expiration time was removed",
"const": 1
}
]
}
]
},
"arguments": [
{
"name": "key",
"type": "key",
"key_spec_index": 0
},
{
"name": "numfields",
"type": "integer"
},
{
"name": "field",
"type": "string",
"multiple": true
}
]
}
}

118
src/commands/hpexpire.json Normal file
View File

@ -0,0 +1,118 @@
{
"HPEXPIRE": {
"summary": "Set expiry for hash field using relative time to expire (milliseconds)",
"complexity": "O(N) where N is the number of arguments to the command",
"group": "hash",
"since": "8.0.0",
"arity": -5,
"function": "hpexpireCommand",
"history": [],
"command_flags": [
"WRITE",
"DENYOOM",
"FAST"
],
"acl_categories": [
"HASH"
],
"key_specs": [
{
"flags": [
"RW",
"UPDATE"
],
"begin_search": {
"index": {
"pos": 1
}
},
"find_keys": {
"range": {
"lastkey": 0,
"step": 1,
"limit": 0
}
}
}
],
"reply_schema": {
"oneOf": [
{
"description": "Key does not exist.",
"type": "null"
},
{
"description": "Array of results",
"type": "array",
"minItems": 1,
"maxItems": 4294967295,
"items": [
{
"description": "The field does not exist.",
"const": -2
},
{
"description": "Specified NX | XX | GT | LT condition not met",
"const": 0
},
{
"description": "Expiration time was set or updated.",
"const": 1
},
{
"description": "Field deleted because the specified expiration time is in the past.",
"const": 2
}
]
}
]
},
"arguments": [
{
"name": "key",
"type": "key",
"key_spec_index": 0
},
{
"name": "milliseconds",
"type": "integer"
},
{
"name": "condition",
"type": "oneof",
"optional": true,
"arguments": [
{
"name": "nx",
"type": "pure-token",
"token": "NX"
},
{
"name": "xx",
"type": "pure-token",
"token": "XX"
},
{
"name": "gt",
"type": "pure-token",
"token": "GT"
},
{
"name": "lt",
"type": "pure-token",
"token": "LT"
}
]
},
{
"name": "numfields",
"type": "integer"
},
{
"name": "field",
"type": "string",
"multiple": true
}
]
}
}

View File

@ -0,0 +1,118 @@
{
"HPEXPIREAT": {
"summary": "Set expiry for hash field using an absolute Unix timestamp (milliseconds)",
"complexity": "O(N) where N is the number of arguments to the command",
"group": "hash",
"since": "8.0.0",
"arity": -5,
"function": "hpexpireatCommand",
"history": [],
"command_flags": [
"WRITE",
"DENYOOM",
"FAST"
],
"acl_categories": [
"HASH"
],
"key_specs": [
{
"flags": [
"RW",
"UPDATE"
],
"begin_search": {
"index": {
"pos": 1
}
},
"find_keys": {
"range": {
"lastkey": 0,
"step": 1,
"limit": 0
}
}
}
],
"reply_schema": {
"oneOf": [
{
"description": "Key does not exist.",
"type": "null"
},
{
"description": "Array of results",
"type": "array",
"minItems": 1,
"maxItems": 4294967295,
"items": [
{
"description": "The field does not exist.",
"const": -2
},
{
"description": "Specified NX | XX | GT | LT condition not met",
"const": 0
},
{
"description": "Expiration time was set or updated.",
"const": 1
},
{
"description": "Field deleted because the specified expiration time is in the past.",
"const": 2
}
]
}
]
},
"arguments": [
{
"name": "key",
"type": "key",
"key_spec_index": 0
},
{
"name": "unix-time-milliseconds",
"type": "unix-time"
},
{
"name": "condition",
"type": "oneof",
"optional": true,
"arguments": [
{
"name": "nx",
"type": "pure-token",
"token": "NX"
},
{
"name": "xx",
"type": "pure-token",
"token": "XX"
},
{
"name": "gt",
"type": "pure-token",
"token": "GT"
},
{
"name": "lt",
"type": "pure-token",
"token": "LT"
}
]
},
{
"name": "numfields",
"type": "integer"
},
{
"name": "field",
"type": "string",
"multiple": true
}
]
}
}

View File

@ -0,0 +1,83 @@
{
"HPEXPIRETIME": {
"summary": "Returns the expiration time of a hash field as a Unix timestamp, in msec.",
"complexity": "O(N) where N is the number of arguments to the command",
"group": "hash",
"since": "8.0.0",
"arity": -4,
"function": "hpexpiretimeCommand",
"history": [],
"command_flags": [
"READONLY",
"FAST"
],
"acl_categories": [
"HASH"
],
"key_specs": [
{
"flags": [
"RO",
"ACCESS"
],
"begin_search": {
"index": {
"pos": 1
}
},
"find_keys": {
"range": {
"lastkey": 0,
"step": 1,
"limit": 0
}
}
}
],
"reply_schema": {
"oneOf": [
{
"description": "Key does not exist.",
"type": "null"
},
{
"description": "The keyname, popped member, and its score.",
"type": "array",
"minItems": 1,
"maxItems": 4294967295,
"items": [
{
"description": "The field does not exist.",
"const": -2
},
{
"description": "The field exists but has no associated expire.",
"const": -1
},
{
"description": "Expiration Unix timestamp in milliseconds.",
"type": "integer",
"minimum": 1
}
]
}
]
},
"arguments": [
{
"name": "key",
"type": "key",
"key_spec_index": 0
},
{
"name": "numfields",
"type": "integer"
},
{
"name": "field",
"type": "string",
"multiple": true
}
]
}
}

83
src/commands/hpttl.json Normal file
View File

@ -0,0 +1,83 @@
{
"HPTTL": {
"summary": "Returns the TTL in milliseconds of a hash field.",
"complexity": "O(N) where N is the number of arguments to the command",
"group": "hash",
"since": "8.0.0",
"arity": -4,
"function": "hpttlCommand",
"history": [],
"command_flags": [
"READONLY",
"FAST"
],
"acl_categories": [
"HASH"
],
"key_specs": [
{
"flags": [
"RO",
"ACCESS"
],
"begin_search": {
"index": {
"pos": 1
}
},
"find_keys": {
"range": {
"lastkey": 0,
"step": 1,
"limit": 0
}
}
}
],
"reply_schema": {
"oneOf": [
{
"description": "Key does not exist.",
"type": "null"
},
{
"description": "The keyname, popped member, and its score.",
"type": "array",
"minItems": 1,
"maxItems": 4294967295,
"items": [
{
"description": "The field does not exist.",
"const": -2
},
{
"description": "The field exists but has no associated expire.",
"const": -1
},
{
"description": "TTL in milliseconds.",
"type": "integer",
"minimum": 1
}
]
}
]
},
"arguments": [
{
"name": "key",
"type": "key",
"key_spec_index": 0
},
{
"name": "numfields",
"type": "integer"
},
{
"name": "field",
"type": "string",
"multiple": true
}
]
}
}

83
src/commands/httl.json Normal file
View File

@ -0,0 +1,83 @@
{
"HTTL": {
"summary": "Returns the TTL in seconds of a hash field.",
"complexity": "O(N) where N is the number of arguments to the command",
"group": "hash",
"since": "8.0.0",
"arity": -4,
"function": "httlCommand",
"history": [],
"command_flags": [
"READONLY",
"FAST"
],
"acl_categories": [
"HASH"
],
"key_specs": [
{
"flags": [
"RO",
"ACCESS"
],
"begin_search": {
"index": {
"pos": 1
}
},
"find_keys": {
"range": {
"lastkey": 0,
"step": 1,
"limit": 0
}
}
}
],
"reply_schema": {
"oneOf": [
{
"description": "Key does not exist.",
"type": "null"
},
{
"description": "Array of results",
"type": "array",
"minItems": 1,
"maxItems": 4294967295,
"items": [
{
"description": "The field does not exist.",
"const": -2
},
{
"description": "The field exists but has no associated expire.",
"const": -1
},
{
"description": "TTL in seconds.",
"type": "integer",
"minimum": 1
}
]
}
]
},
"arguments": [
{
"name": "key",
"type": "key",
"key_spec_index": 0
},
{
"name": "numfields",
"type": "integer"
},
{
"name": "field",
"type": "string",
"multiple": true
}
]
}
}

101
src/db.c
View File

@ -177,13 +177,13 @@ robj *lookupKeyWriteOrReply(client *c, robj *key, robj *reply) {
*
* If the update_if_existing argument is false, the program is aborted
* if the key already exists, otherwise, it can fall back to dbOverwrite. */
static void dbAddInternal(redisDb *db, robj *key, robj *val, int update_if_existing) {
static dictEntry *dbAddInternal(redisDb *db, robj *key, robj *val, int update_if_existing) {
dictEntry *existing;
int slot = getKeySlot(key->ptr);
dictEntry *de = kvstoreDictAddRaw(db->keys, slot, key->ptr, &existing);
if (update_if_existing && existing) {
dbSetValue(db, key, val, 1, existing);
return;
return existing;
}
serverAssertWithInfo(NULL, key, de != NULL);
kvstoreDictSetKey(db->keys, slot, de, sdsdup(key->ptr));
@ -191,10 +191,11 @@ static void dbAddInternal(redisDb *db, robj *key, robj *val, int update_if_exist
kvstoreDictSetVal(db->keys, slot, de, val);
signalKeyAsReady(db, key, val->type);
notifyKeyspaceEvent(NOTIFY_NEW,"new",key,db->id);
return de;
}
void dbAdd(redisDb *db, robj *key, robj *val) {
dbAddInternal(db, key, val, 0);
dictEntry *dbAdd(redisDb *db, robj *key, robj *val) {
return dbAddInternal(db, key, val, 0);
}
/* Returns key's hash slot when cluster mode is enabled, or 0 when disabled.
@ -370,6 +371,11 @@ int dbGenericDelete(redisDb *db, robj *key, int async, int flags) {
dictEntry *de = kvstoreDictTwoPhaseUnlinkFind(db->keys, slot, key->ptr, &plink, &table);
if (de) {
robj *val = dictGetVal(de);
/* If hash object with expiry on fields, remove it from HFE DS of DB */
if (val->type == OBJ_HASH)
hashTypeRemoveFromExpires(&db->hexpires, val);
/* RM_StringDMA may call dbUnshareStringValue which may free val, so we
* need to incr to retain val */
incrRefCount(val);
@ -475,6 +481,9 @@ long long emptyDbStructure(redisDb *dbarray, int dbnum, int async,
if (async) {
emptyDbAsync(&dbarray[j]);
} else {
/* Destroy global HFE DS before deleting the hashes since ebuckets
* DS is embedded in the stored objects. */
ebDestroy(&dbarray[j].hexpires, &hashExpireBucketsType, NULL);
kvstoreEmpty(dbarray[j].keys, callback);
kvstoreEmpty(dbarray[j].expires, callback);
}
@ -554,6 +563,7 @@ redisDb *initTempDb(void) {
tempDb[i].id = i;
tempDb[i].keys = kvstoreCreate(&dbDictType, slot_count_bits, flags);
tempDb[i].expires = kvstoreCreate(&dbExpiresDictType, slot_count_bits, flags);
tempDb[i].hexpires = ebCreate();
}
return tempDb;
@ -566,6 +576,9 @@ void discardTempDb(redisDb *tempDb, void(callback)(dict*)) {
/* Release temp DBs. */
emptyDbStructure(tempDb, -1, async, callback);
for (int i=0; i<server.dbnum; i++) {
/* Destroy global HFE DS before deleting the hashes since ebuckets DS is
* embedded in the stored objects. */
ebDestroy(&tempDb[i].hexpires, &hashExpireBucketsType, NULL);
kvstoreRelease(tempDb[i].keys);
kvstoreRelease(tempDb[i].expires);
}
@ -894,6 +907,7 @@ typedef struct {
sds pattern; /* pattern string, NULL means no pattern */
long sampled; /* cumulative number of keys sampled */
int no_values; /* set to 1 means to return keys only */
size_t (*strlen)(char *s); /* (o->type == OBJ_HASH) ? hfieldlen : sdslen */
} scanData;
/* Helper function to compare key type in scan commands */
@ -918,7 +932,7 @@ void scanCallback(void *privdata, const dictEntry *de) {
list *keys = data->keys;
robj *o = data->o;
sds val = NULL;
sds key = NULL;
void *key = NULL; /* if OBJ_HASH then key is of type `hfield`. Otherwise, `sds` */
data->sampled++;
/* o and typename can not have values at the same time. */
@ -932,24 +946,29 @@ void scanCallback(void *privdata, const dictEntry *de) {
}*/
/* Filter element if it does not match the pattern. */
sds keysds = dictGetKey(de);
void *keyStr = dictGetKey(de);
if (data->pattern) {
if (!stringmatchlen(data->pattern, sdslen(data->pattern), keysds, sdslen(keysds), 0)) {
if (!stringmatchlen(data->pattern, sdslen(data->pattern), keyStr, data->strlen(keyStr), 0)) {
return;
}
}
if (o == NULL) {
key = keysds;
key = keyStr;
} else if (o->type == OBJ_SET) {
key = keysds;
key = keyStr;
} else if (o->type == OBJ_HASH) {
key = keysds;
key = keyStr;
val = dictGetVal(de);
/* If field is expired, then ignore */
if (hfieldIsExpired(key))
return;
} else if (o->type == OBJ_ZSET) {
char buf[MAX_LONG_DOUBLE_CHARS];
int len = ld2string(buf, sizeof(buf), *(double *)dictGetVal(de), LD_STR_AUTO);
key = sdsdup(keysds);
key = sdsdup(keyStr);
val = sdsnewlen(buf, len);
} else {
serverPanic("Type not handled in SCAN callback.");
@ -1023,6 +1042,7 @@ char *getObjectTypeName(robj *o) {
* In the case of a Hash object the function returns both the field and value
* of every element on the Hash. */
void scanGenericCommand(client *c, robj *o, unsigned long long cursor) {
int isKeysHfield = 0;
int i, j;
listNode *node;
long count = 10;
@ -1103,6 +1123,7 @@ void scanGenericCommand(client *c, robj *o, unsigned long long cursor) {
} else if (o->type == OBJ_SET && o->encoding == OBJ_ENCODING_HT) {
ht = o->ptr;
} else if (o->type == OBJ_HASH && o->encoding == OBJ_ENCODING_HT) {
isKeysHfield = 1;
ht = o->ptr;
} else if (o->type == OBJ_ZSET && o->encoding == OBJ_ENCODING_SKIPLIST) {
zset *zs = o->ptr;
@ -1141,7 +1162,7 @@ void scanGenericCommand(client *c, robj *o, unsigned long long cursor) {
* working on an empty dict, one with a lot of empty buckets, and
* for the buckets are not empty, we need to limit the spampled number
* to prevent a long hang time caused by filtering too many keys;
* 6. data.no_values: to control whether values will be returned or
* 6. data.no_values: to control whether values will be returned or
* only keys are returned. */
scanData data = {
.keys = keys,
@ -1150,6 +1171,7 @@ void scanGenericCommand(client *c, robj *o, unsigned long long cursor) {
.pattern = use_pattern ? pat : NULL,
.sampled = 0,
.no_values = no_values,
.strlen = (isKeysHfield) ? hfieldlen : sdslen,
};
/* A pattern may restrict all matching keys to one cluster slot. */
@ -1245,8 +1267,8 @@ void scanGenericCommand(client *c, robj *o, unsigned long long cursor) {
addReplyArrayLen(c, listLength(keys));
while ((node = listFirst(keys)) != NULL) {
sds key = listNodeValue(node);
addReplyBulkCBuffer(c, key, sdslen(key));
void *key = listNodeValue(node);
addReplyBulkCBuffer(c, key, (isKeysHfield) ? mstrlen(key) : sdslen(key));
listDelNode(keys, node);
}
@ -1339,6 +1361,7 @@ void renameGenericCommand(client *c, int nx) {
robj *o;
long long expire;
int samekey = 0;
uint64_t minHashExpireTime = EB_EXPIRE_TIME_INVALID;
/* When source and dest key is the same, no operation is performed,
* if the key exists, however we still return an error on unexisting key. */
@ -1364,9 +1387,21 @@ void renameGenericCommand(client *c, int nx) {
* with the same name. */
dbDelete(c->db,c->argv[2]);
}
dbAdd(c->db,c->argv[2],o);
dictEntry *de = dbAdd(c->db, c->argv[2], o);
if (expire != -1) setExpire(c,c->db,c->argv[2],expire);
/* If hash with expiration on fields then remove it from global HFE DS and
* keep next expiration time. Otherwise, dbDelete() will remove it from the
* global HFE DS and we will lose the expiration time. */
if (o->type == OBJ_HASH && o->encoding == OBJ_ENCODING_HT)
minHashExpireTime = hashTypeRemoveFromExpires(&c->db->hexpires, o);
dbDelete(c->db,c->argv[1]);
/* If hash with HFEs, register in db->hexpires */
if (minHashExpireTime != EB_EXPIRE_TIME_INVALID)
hashTypeAddToExpires(c->db, dictGetKey(de), o, minHashExpireTime);
signalModifiedKey(c,c->db,c->argv[1]);
signalModifiedKey(c,c->db,c->argv[2]);
notifyKeyspaceEvent(NOTIFY_GENERIC,"rename_from",
@ -1390,6 +1425,7 @@ void moveCommand(client *c) {
redisDb *src, *dst;
int srcid, dbid;
long long expire;
uint64_t hashExpireTime = EB_EXPIRE_TIME_INVALID;
if (server.cluster_enabled) {
addReplyError(c,"MOVE is not allowed in cluster mode");
@ -1430,12 +1466,25 @@ void moveCommand(client *c) {
addReply(c,shared.czero);
return;
}
dbAdd(dst,c->argv[1],o);
dictEntry *dstDictEntry = dbAdd(dst,c->argv[1],o);
if (expire != -1) setExpire(c,dst,c->argv[1],expire);
/* If hash with expiration on fields, remove it from global HFE DS and keep
* aside registered expiration time. Must be before deletion of the object.
* hexpires (ebuckets) embed in stored items its structure. */
if (o->type == OBJ_HASH && o->encoding == OBJ_ENCODING_HT)
hashExpireTime = hashTypeRemoveFromExpires(&src->hexpires, o);
incrRefCount(o);
/* OK! key moved, free the entry in the source DB */
dbDelete(src,c->argv[1]);
/* If object of type hash with expiration on fields. Taken care to add the
* hash to hexpires of `dst` only after dbDelete(). */
if (hashExpireTime != EB_EXPIRE_TIME_INVALID)
hashTypeAddToExpires(dst, dictGetKey(dstDictEntry), o, hashExpireTime);
signalModifiedKey(c,src,c->argv[1]);
signalModifiedKey(c,dst,c->argv[1]);
notifyKeyspaceEvent(NOTIFY_GENERIC,
@ -1518,12 +1567,13 @@ void copyCommand(client *c) {
/* Duplicate object according to object's type. */
robj *newobj;
uint64_t minHashExpire = EB_EXPIRE_TIME_INVALID; /* HFE feature */
switch(o->type) {
case OBJ_STRING: newobj = dupStringObject(o); break;
case OBJ_LIST: newobj = listTypeDup(o); break;
case OBJ_SET: newobj = setTypeDup(o); break;
case OBJ_ZSET: newobj = zsetDup(o); break;
case OBJ_HASH: newobj = hashTypeDup(o); break;
case OBJ_HASH: newobj = hashTypeDup(o, newkey->ptr, &minHashExpire); break;
case OBJ_STREAM: newobj = streamDup(o); break;
case OBJ_MODULE:
newobj = moduleTypeDupOrReply(c, key, newkey, dst->id, o);
@ -1538,8 +1588,15 @@ void copyCommand(client *c) {
dbDelete(dst,newkey);
}
dbAdd(dst,newkey,newobj);
if (expire != -1) setExpire(c, dst, newkey, expire);
dictEntry *deCopy = dbAdd(dst,newkey,newobj);
/* if key with expiration then set it */
if (expire != -1)
setExpire(c, dst, newkey, expire);
/* If hash with expiration on fields then add it to 'dst' global HFE DS */
if (minHashExpire != EB_EXPIRE_TIME_INVALID)
hashTypeAddToExpires(dst, dictGetKey(deCopy), newobj, minHashExpire);
/* OK! key copied */
signalModifiedKey(c,dst,c->argv[2]);
@ -1629,11 +1686,13 @@ int dbSwapDatabases(int id1, int id2) {
* remain in the same DB they were. */
db1->keys = db2->keys;
db1->expires = db2->expires;
db1->hexpires = db2->hexpires;
db1->avg_ttl = db2->avg_ttl;
db1->expires_cursor = db2->expires_cursor;
db2->keys = aux.keys;
db2->expires = aux.expires;
db2->hexpires = aux.hexpires;
db2->avg_ttl = aux.avg_ttl;
db2->expires_cursor = aux.expires_cursor;
@ -1864,7 +1923,7 @@ int keyIsExpired(redisDb *db, robj *key) {
* EXPIRE_AVOID_DELETE_EXPIRED flag.
*
* The return value of the function is KEY_VALID if the key is still valid.
* The function returns KEY_EXPIRED if the key is expired BUT not deleted,
* The function returns KEY_EXPIRED if the key is expired BUT not deleted,
* or returns KEY_DELETED if the key is expired and deleted. */
keyStatus expireIfNeeded(redisDb *db, robj *key, int flags) {
if (server.lazy_expire_disabled) return KEY_VALID;
@ -1878,7 +1937,7 @@ keyStatus expireIfNeeded(redisDb *db, robj *key, int flags) {
* replicas.
*
* Still we try to return the right information to the caller,
* that is, KEY_VALID if we think the key should still be valid,
* that is, KEY_VALID if we think the key should still be valid,
* KEY_EXPIRED if we think the key is expired but don't want to delete it at this time.
*
* When replicating commands from the master, keys are never considered

View File

@ -200,7 +200,7 @@ void xorObjectDigest(redisDb *db, robj *keyobj, unsigned char *digest, robj *o)
}
} else if (o->type == OBJ_HASH) {
hashTypeIterator *hi = hashTypeInitIterator(o);
while (hashTypeNext(hi) != C_ERR) {
while (hashTypeNext(hi, 0) != C_ERR) {
unsigned char eledigest[20];
sds sdsele;
@ -445,9 +445,9 @@ void debugCommand(client *c) {
"SEGFAULT",
" Crash the server with sigsegv.",
"SET-ACTIVE-EXPIRE <0|1>",
" Setting it to 0 disables expiring keys in background when they are not",
" accessed (otherwise the Redis behavior). Setting it to 1 reenables back the",
" default.",
" Setting it to 0 disables expiring keys (and hash-fields) in background ",
" when they are not accessed (otherwise the Redis behavior). Setting it",
" to 1 reenables back the default.",
"QUICKLIST-PACKED-THRESHOLD <size>",
" Sets the threshold for elements to be inserted as plain vs packed nodes",
" Default value is 1GB, allows values up to 4GB. Setting to 0 restores to default.",
@ -1081,7 +1081,7 @@ void serverLogObjectDebugInfo(const robj *o) {
} else if (o->type == OBJ_SET) {
serverLog(LL_WARNING,"Set size: %d", (int) setTypeSize(o));
} else if (o->type == OBJ_HASH) {
serverLog(LL_WARNING,"Hash size: %d", (int) hashTypeLength(o));
serverLog(LL_WARNING,"Hash size: %d", (int) hashTypeLength(o, 0));
} else if (o->type == OBJ_ZSET) {
serverLog(LL_WARNING,"Sorted set size: %d", (int) zsetLength(o));
if (o->encoding == OBJ_ENCODING_SKIPLIST)

View File

@ -67,6 +67,25 @@ static int _dictInit(dict *d, dictType *type);
static dictEntry *dictGetNext(const dictEntry *de);
static dictEntry **dictGetNextRef(dictEntry *de);
static void dictSetNext(dictEntry *de, dictEntry *next);
static int dictDefaultCompare(dict *d, const void *key1, const void *key2);
/* -------------------------- misc inline functions -------------------------------- */
typedef int (*keyCmpFunc)(dict *d, const void *key1, const void *key2);
static inline keyCmpFunc dictGetKeyCmpFunc(dict *d) {
if (d->useStoredKeyApi && d->type->storedKeyCompare)
return d->type->storedKeyCompare;
if (d->type->keyCompare)
return d->type->keyCompare;
return dictDefaultCompare;
}
static inline uint64_t dictHashKey(dict *d, const void *key, int isStoredKey) {
if (isStoredKey && d->type->storedHashFunction)
return d->type->storedHashFunction(key);
else
return d->type->hashFunction(key);
}
/* -------------------------- hash functions -------------------------------- */
@ -173,6 +192,19 @@ dict *dictCreate(dictType *type)
return d;
}
/* Change dictType of dict to another one with metadata support
* Rest of dictType's values must stay the same */
void dictTypeAddMeta(dict **d, dictType *typeWithMeta) {
/* Verify new dictType is compatible with the old one */
dictType toCmp = *typeWithMeta;
toCmp.dictMetadataBytes = NULL; /* Expected old one not to have metadata */
toCmp.onDictRelease = (*d)->type->onDictRelease; /* Ignore 'onDictRelease' in comparison */
assert(memcmp((*d)->type, &toCmp, sizeof(dictType)) == 0); /* The rest of the dictType fields must be the same */
*d = zrealloc(*d, sizeof(dict) + typeWithMeta->dictMetadataBytes(*d));
(*d)->type = typeWithMeta;
}
/* Initialize the hash table */
int _dictInit(dict *d, dictType *type)
{
@ -182,6 +214,7 @@ int _dictInit(dict *d, dictType *type)
d->rehashidx = -1;
d->pauserehash = 0;
d->pauseAutoResize = 0;
d->useStoredKeyApi = 0;
return DICT_OK;
}
@ -285,7 +318,7 @@ static void rehashEntriesInBucketAtIndex(dict *d, uint64_t idx) {
void *key = dictGetKey(de);
/* Get the index in the new hash table */
if (d->ht_size_exp[1] > d->ht_size_exp[0]) {
h = dictHashKey(d, key) & DICTHT_SIZE_MASK(d->ht_size_exp[1]);
h = dictHashKey(d, key, 1) & DICTHT_SIZE_MASK(d->ht_size_exp[1]);
} else {
/* We're shrinking the table. The tables sizes are powers of
* two, so we simply mask the bucket index in the larger table
@ -572,7 +605,7 @@ static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {
/* dict is empty */
if (dictSize(d) == 0) return NULL;
h = dictHashKey(d, key);
h = dictHashKey(d, key, d->useStoredKeyApi);
idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[0]);
if (dictIsRehashing(d)) {
@ -587,6 +620,8 @@ static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {
}
}
keyCmpFunc cmpFunc = dictGetKeyCmpFunc(d);
for (table = 0; table <= 1; table++) {
if (table == 0 && (long)idx < d->rehashidx) continue;
idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
@ -594,7 +629,7 @@ static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {
prevHe = NULL;
while(he) {
void *he_key = dictGetKey(he);
if (key == he_key || dictCompareKeys(d, key, he_key)) {
if (key == he_key || cmpFunc(d, key, he_key)) {
/* Unlink the element from the list */
if (prevHe)
dictSetNext(prevHe, dictGetNext(he));
@ -689,6 +724,10 @@ void dictRelease(dict *d)
* destroying the dict fake completion. */
if (dictIsRehashing(d) && d->type->rehashingCompleted)
d->type->rehashingCompleted(d);
if (d->type->onDictRelease)
d->type->onDictRelease(d);
_dictClear(d,0,NULL);
_dictClear(d,1,NULL);
zfree(d);
@ -701,8 +740,9 @@ dictEntry *dictFind(dict *d, const void *key)
if (dictSize(d) == 0) return NULL; /* dict is empty */
h = dictHashKey(d, key);
h = dictHashKey(d, key, d->useStoredKeyApi);
idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[0]);
keyCmpFunc cmpFunc = dictGetKeyCmpFunc(d);
if (dictIsRehashing(d)) {
if ((long)idx >= d->rehashidx && d->ht_table[0][idx]) {
@ -722,7 +762,7 @@ dictEntry *dictFind(dict *d, const void *key)
he = d->ht_table[table][idx];
while(he) {
void *he_key = dictGetKey(he);
if (key == he_key || dictCompareKeys(d, key, he_key))
if (key == he_key || cmpFunc(d, key, he_key))
return he;
he = dictGetNext(he);
}
@ -759,7 +799,9 @@ dictEntry *dictTwoPhaseUnlinkFind(dict *d, const void *key, dictEntry ***plink,
if (dictSize(d) == 0) return NULL; /* dict is empty */
if (dictIsRehashing(d)) _dictRehashStep(d);
h = dictHashKey(d, key);
h = dictHashKey(d, key, d->useStoredKeyApi);
keyCmpFunc cmpFunc = dictGetKeyCmpFunc(d);
for (table = 0; table <= 1; table++) {
idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
@ -767,7 +809,7 @@ dictEntry *dictTwoPhaseUnlinkFind(dict *d, const void *key, dictEntry ***plink,
dictEntry **ref = &d->ht_table[table][idx];
while (ref && *ref) {
void *de_key = dictGetKey(*ref);
if (key == de_key || dictCompareKeys(d, key, de_key)) {
if (key == de_key || cmpFunc(d, key, de_key)) {
*table_index = table;
*plink = ref;
dictPauseRehashing(d);
@ -1530,8 +1572,8 @@ static signed char _dictNextExp(unsigned long size)
void *dictFindPositionForInsert(dict *d, const void *key, dictEntry **existing) {
unsigned long idx, table;
dictEntry *he;
uint64_t hash = dictHashKey(d, key, d->useStoredKeyApi);
if (existing) *existing = NULL;
uint64_t hash = dictHashKey(d, key);
idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[0]);
if (dictIsRehashing(d)) {
@ -1548,6 +1590,8 @@ void *dictFindPositionForInsert(dict *d, const void *key, dictEntry **existing)
/* Expand the hash table if needed */
_dictExpandIfNeeded(d);
keyCmpFunc cmpFunc = dictGetKeyCmpFunc(d);
for (table = 0; table <= 1; table++) {
if (table == 0 && (long)idx < d->rehashidx) continue;
idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
@ -1555,7 +1599,7 @@ void *dictFindPositionForInsert(dict *d, const void *key, dictEntry **existing)
he = d->ht_table[table][idx];
while(he) {
void *he_key = dictGetKey(he);
if (key == he_key || dictCompareKeys(d, key, he_key)) {
if (key == he_key || cmpFunc(d, key, he_key)) {
if (existing) *existing = he;
return NULL;
}
@ -1587,7 +1631,7 @@ void dictSetResizeEnabled(dictResizeEnable enable) {
}
uint64_t dictGetHash(dict *d, const void *key) {
return dictHashKey(d, key);
return dictHashKey(d, key, d->useStoredKeyApi);
}
/* Finds the dictEntry using pointer and pre-calculated hash.
@ -1732,6 +1776,11 @@ void dictGetStats(char *buf, size_t bufsize, dict *d, int full) {
orig_buf[orig_bufsize-1] = '\0';
}
static int dictDefaultCompare(dict *d, const void *key1, const void *key2) {
(void)(d); /*unused*/
return key1 == key2;
}
/* ------------------------------- Benchmark ---------------------------------*/
#ifdef REDIS_TEST

View File

@ -62,6 +62,32 @@ typedef struct dictType {
unsigned int keys_are_odd:1;
/* TODO: Add a 'keys_are_even' flag and use a similar optimization if that
* flag is set. */
/* Sometimes we want the ability to store a key in a given way inside the hash
* function, and lookup it in some other way without resorting to any kind of
* conversion. For instance the key may be stored as a structure also
* representing other things, but the lookup happens via just a pointer to a
* null terminated string. Optionally providing additional hash/cmp functions,
* dict supports such usage. In that case we'll have a hashFunction() that will
* expect a null terminated C string, and a storedHashFunction() that will
* instead expect the structure. Similarly, the two comparison functions will
* work differently. The keyCompare() will treat the first argument as a pointer
* to a C string and the other as a structure (this way we can directly lookup
* the structure key using the C string). While the storedKeyCompare() will
* check if two pointers to the key in structure form are the same.
*
* However, functions of dict that gets key as argument (void *key) don't get
* any indication whether it is a lookup or stored key. To indicate that
* you intend to use key of type stored-key, and, consequently, use
* dedicated compare and hash functions of stored-key, is by calling
* dictUseStoredKeyApi(1) before using any of the dict functions that gets
* key as a parameter and then call again dictUseStoredKeyApi(0) once done.
*
* Set to NULL both functions, if you don't want to support this feature. */
uint64_t (*storedHashFunction)(const void *key);
int (*storedKeyCompare)(dict *d, const void *key1, const void *key2);
/* Optional callback called when the dict is destroyed. */
void (*onDictRelease)(dict *d);
} dictType;
#define DICTHT_SIZE(exp) ((exp) == -1 ? 0 : (unsigned long)1<<(exp))
@ -76,7 +102,9 @@ struct dict {
long rehashidx; /* rehashing not in progress if rehashidx == -1 */
/* Keep small vars at end for optimal (minimal) struct padding */
int16_t pauserehash; /* If >0 rehashing is paused (<0 indicates coding error) */
unsigned pauserehash : 15; /* If >0 rehashing is paused */
unsigned useStoredKeyApi : 1; /* See comment of storedHashFunction above */
signed char ht_size_exp[2]; /* exponent of size. (size = 1<<exp) */
int16_t pauseAutoResize; /* If >0 automatic resizing is disallowed (<0 indicates coding error) */
void *metadata[];
@ -136,7 +164,6 @@ typedef struct {
#define dictMetadataSize(d) ((d)->type->dictMetadataBytes \
? (d)->type->dictMetadataBytes(d) : 0)
#define dictHashKey(d, key) ((d)->type->hashFunction(key))
#define dictBuckets(d) (DICTHT_SIZE((d)->ht_size_exp[0])+DICTHT_SIZE((d)->ht_size_exp[1]))
#define dictSize(d) ((d)->ht_used[0]+(d)->ht_used[1])
#define dictIsEmpty(d) ((d)->ht_used[0] == 0 && (d)->ht_used[1] == 0)
@ -146,6 +173,7 @@ typedef struct {
#define dictIsRehashingPaused(d) ((d)->pauserehash > 0)
#define dictPauseAutoResize(d) ((d)->pauseAutoResize++)
#define dictResumeAutoResize(d) ((d)->pauseAutoResize--)
#define dictUseStoredKeyApi(d, flag) ((d)->useStoredKeyApi = (flag))
/* If our unsigned long type can store a 64 bit number, use a 64 bit PRNG. */
#if ULONG_MAX >= 0xffffffffffffffff
@ -162,6 +190,7 @@ typedef enum {
/* API */
dict *dictCreate(dictType *type);
void dictTypeAddMeta(dict **d, dictType *typeWithMeta);
int dictExpand(dict *d, unsigned long size);
int dictTryExpand(dict *d, unsigned long size);
int dictShrink(dict *d, unsigned long size);

2254
src/ebuckets.c Normal file

File diff suppressed because it is too large Load Diff

302
src/ebuckets.h Normal file
View File

@ -0,0 +1,302 @@
/*
* Copyright Redis Ltd. 2024 - present
*
* Licensed under your choice of the Redis Source Available License 2.0 (RSALv2)
* or the Server Side Public License v1 (SSPLv1).
*
*
* WHAT IS EBUCKETS?
* -----------------
* ebuckets is being used to store items that are set with expiration-time. It
* supports the basic API of add, remove and active expiration. The implementation
* of it is based on rax-tree, or plain linked-list when small. The expiration time
* of the items are used as the key to traverse rax-tree.
*
* Instead of holding a distinct item in each leaf of the rax-tree we can aggregate
* items into small segments and hold it in each leaf. This way we can avoid
* frequent modification of the rax-tree, since many of the modifications
* will be done only at the segment level. It will also save memory because
* rax-tree can be costly, around 40 bytes per leaf (with rax-key limited to 6
* bytes). Whereas each additional item in the segment will cost the size of the
* 'next' pointer in a list (8 bytes) and few more bytes for maintenance of the
* segment.
*
* EBUCKETS STRUCTURE
* ------------------
* The ebuckets data structure is organized in a hierarchical manner as follows:
*
* 1. ebuckets: This is the top-level data structure. It can be either a rax tree
* or a plain linked list. It contains one or more buckets, each representing
* an interval in time.
*
* 2. bucket: Each bucket represents an interval in time and contains one or more
* segments. The key in the rax-tree for each bucket represents low
* bound expiration-time for the items within this bucket. The key of the
* following bucket represents the upper bound expiration-time.
*
* 3. segment: Each segment within a bucket can hold up to `EB_SEG_MAX_ITEMS`
* items as a linked list. If there are more, the segment will try to
* split the bucket. To avoid wasting memory, it is a singly linked list (only
* next-item pointer). It is a cyclic linked-list to allow efficient removal of
* items from the middle of the segment without traversing the rax tree.
*
* 4. item: Each item that is stored in ebuckets should embed the ExpireMeta
* struct and supply getter function (see EbucketsType.getExpireMeta). This
* struct holds the expire-time of the item and few more fields that are used
* to maintain the segments data-structure.
*
* SPLITTING BUCKET
* ----------------
* Each segment can hold up-to `EB_SEG_MAX_ITEMS` items. On insertion of new
* item, it will try to split the segment. Here is an example For adding item
* with expiration of 42 to a segment that already reached its maximum capacity
* which will cause to split of the segment and in turn split of the bucket as
* well to a finer grained ranges:
*
* BUCKETS BUCKETS
* [ 00-10 ] -> size(Seg0) = 11 ==> [ 00-10 ] -> size(Seg0) = 11
* [ 11-76 ] -> size(Seg1) = 16 [ 11-36 ] -> size(Seg1) = 9
* [ 37-76 ] -> size(Seg2) = 7
*
* EXTENDING BUCKET
* ----------------
* In the example above, the reason it wasn't split evenly is that Seg1 must have
* been holding items with same TTL and they must reside together in the same
* bucket after the split. Which brings us to another important point. If there
* is a segment that reached its maximum capacity and all the items have same
* expiration-time key, then we cannot split the bucket but aggregate all the
* items, with same expiration time key, by allocating an extended-segment and
* chain it to the first segment in visited bucket. In that sense, extended
* segments will only hold items with same expiration-time key.
*
* BUCKETS BUCKETS
* [ 00-10 ] -> size(Seg0)=11 ==> [ 00-10 ] -> size(Seg0)=11
* [ 11-12 ] -> size(Seg1)=16 [ 11-12 ] -> size(Seg1)=1 -> size(Seg2)=16
*
* LIMITING RAX TREE DEPTH
* -----------------------
* The rax tree is basically a B-tree and its depth is bounded by the sizeof of
* the key. Holding 6 bytes for expiration-time key is more than enough to represent
* unix-time in msec, and in turn the depth of the tree is limited to 6 levels.
* At a first glance it might look sufficient but we need take into consideration
* the heavyweight maintenance and traversal of each node in the B-tree.
*
* And so, we can further prune the tree such that holding keys with msec precision
* in the tree doesn't bring with it much value. The active-expiration operation can
* live with deletion of expired items, say, older than 1 sec, which means the size
* of time-expiration keys to the rax tree become no more than ~4.5 bytes and we
* also get rid of the "noisy" bits which most probably will cause to yet another
* branching and modification of the rax tree in case of items with time-expiration
* difference of less than 1 second. The lazy expiration will still be precise and
* without compromise on accuracy because the exact expiration-time is kept
* attached as well to each item, in `ExpireMeta`, and each traversal of item with
* expiration will behave as expected down to the msec. Take care to configure
* `EB_BUCKET_KEY_PRECISION` according to your needs.
*
* EBUCKET KEY
* -----------
* Taking into account configured value of `EB_BUCKET_KEY_PRECISION`, two items
* with expiration-time t1 and t2 will be considered to have the same key in the
* rax-tree/buckets if and only if:
*
* EB_BUCKET_KEY(t1) == EB_BUCKET_KEY(t2)
*
* EBUCKETS CREATION
* -----------------
* To avoid the cost of allocating rax data-structure for only few elements,
* ebuckets will start as a simple linked-list and only when it reaches some
* threshold, it will be converted to rax.
*
* TODO
* ----
* - ebRemove() optimize to merge small segments into one segment.
* - ebAdd() Fix pathological case of cascade addition of items into rax such
* that their values are smaller/bigger than visited extended-segment which ends
* up with multiple segments with a single item in each segment.
*/
#ifndef __EBUCKETS_H
#define __EBUCKETS_H
#include <stdlib.h>
#include <sys/types.h>
#include <stdarg.h>
#include <stdint.h>
#include "rax.h"
/*
* EB_BUCKET_KEY_PRECISION - Defines the number of bits to ignore from the
* expiration-time when mapping to buckets. The higher the value, the more items
* with similar expiration-time will be aggregated into the same bucket. The lower
* the value, the more "accurate" the active expiration of buckets will be.
*
* Note that the accurate time expiration of each item is preserved anyway and
* enforced by lazy expiration. It only impacts the active expiration that will
* be able to work on buckets older than (1<<EB_BUCKET_KEY_PRECISION) msec ago.
* For example if EB_BUCKET_KEY_PRECISION is 10, then active expiration
* will work only on buckets that already got expired at least 1sec ago.
*
* The idea of it is to trim the rax tree depth, avoid having too many branches,
* and reduce frequent modifications of the tree to the minimum.
*/
#define EB_BUCKET_KEY_PRECISION 0 /* 1024msec */
/* From expiration time to bucket-key */
#define EB_BUCKET_KEY(exptime) ((exptime) >> EB_BUCKET_KEY_PRECISION)
#define EB_EXPIRE_TIME_MAX ((uint64_t)0x0000FFFFFFFFFFFF) /* Maximum expire-time. */
#define EB_EXPIRE_TIME_INVALID (EB_EXPIRE_TIME_MAX+1) /* assumed bigger than max */
/* Handler to ebuckets DS. Pointer to a list, rax or NULL (empty DS). See also ebIsList(). */
typedef void *ebuckets;
/* Users of ebuckets will store `eItem` which is just a void pointer to their
* element. In addition, eItem should embed the ExpireMeta struct and supply
* getter function (see EbucketsType.getExpireMeta).
*/
typedef void *eItem;
/* This struct Should be embedded inside `eItem` and must be aligned in memory. */
typedef struct ExpireMeta {
/* 48bits of unix-time in msec. This value is sufficient to represent, in
* unix-time, until the date of 02 August, 10889
*/
uint32_t expireTimeLo; /* Low bits of expireTime. */
uint16_t expireTimeHi; /* High bits of expireTime. */
unsigned int lastInSegment : 1; /* Last item in segment. If set, then 'next' will
point to the NextSegHdr, unless lastItemBucket=1
then it will point to segment header of the
current segment. */
unsigned int firstItemBucket : 1; /* First item in bucket. This flag assist
to manipulate segments directly without
the need to traverse from start the
rax tree */
unsigned int lastItemBucket : 1; /* Last item in bucket. This flag assist
to manipulate segments directly without
the need to traverse from start the
rax tree */
unsigned int numItems : 5; /* Only first item in segment will maintain
this value. */
unsigned int trash : 1; /* This flag indicates whether the ExpireMeta
associated with the item is leftover.
There is always a potential to reuse the
item after removal/deletion. Note that,
the user can still safely O(1) TTL lookup
a given item and verify whether attached
TTL is valid or leftover. See function
ebGetExpireTime(). */
unsigned int userData : 3; /* ebuckets can be used to store in same
instance few different types of items,
such as, listpack and hash. This field
is reserved to store such identification
associated with the item and can help
to distinct on delete or expire callback.
It is not used by ebuckets internally and
should be maintained by the user */
unsigned int reserved : 4;
void *next; /* - If not last item in segment then next
points to next eItem (lastInSegment=0).
- If last in segment but not last in
bucket (lastItemBucket=0) then it
points to next segment header.
- If last in bucket then it points to
current segment header (Can be either
of type FirstSegHdr or NextSegHdr). */
} ExpireMeta;
/* Each instance of ebuckets need to have corresponding EbucketsType that holds
* the necessary callbacks and configuration to operate correctly on the type
* of items that are stored in it. Conceptually it should have hold reference
* from ebuckets instance to this type, but to save memory we will pass it as
* an argument to each API call. */
typedef struct EbucketsType {
/* getter to extract the ExpireMeta from the item */
ExpireMeta* (*getExpireMeta)(const eItem item);
/* Called during ebDestroy(). Set to NULL if not needed. */
void (*onDeleteItem)(eItem item, void *ctx);
/* Is addresses of items are odd in memory. It is taken into consideration
* and used by ebuckets to know how to distinct between ebuckets pointer to
* rax versus a pointer to item which is head of list. */
unsigned int itemsAddrAreOdd;
} EbucketsType;
/* Returned value by `onExpireItem` callback to indicate the action to be taken by
* ebExpire(). */
typedef enum ExpireAction {
ACT_REMOVE_EXP_ITEM=0, /* Remove the item from ebuckets. */
ACT_UPDATE_EXP_ITEM, /* Re-insert the item with updated expiration-time.
Before returning this value, the cb need to
update expiration time of the item by assisting
function ebSetMetaExpTime(). The item will be
kept aside and will be added again to ebuckets
at the end of ebExpire() */
ACT_STOP_ACTIVE_EXP /* Stop active-expiration. It will assume that
provided 'item' wasn't deleted by the callback. */
} ExpireAction;
/* ExpireInfo is used to pass input and output parameters to ebExpire(). */
typedef struct ExpireInfo {
/* onExpireItem - Called during active-expiration by ebExpire() */
ExpireAction (*onExpireItem)(eItem item, void *ctx);
uint64_t maxToExpire; /* [INPUT ] Limit of number expired items to scan */
void *ctx; /* [INPUT ] context to pass to onExpireItem */
uint64_t now; /* [INPUT ] Current time in msec. */
uint64_t nextExpireTime; /* [OUTPUT] Next expiration time. Return 0, if none left. */
uint64_t itemsExpired; /* [OUTPUT] Returns the number of expired items. */
} ExpireInfo;
/* ebuckets API */
static inline ebuckets ebCreate(void) { return NULL; } /* Empty ebuckets */
void ebDestroy(ebuckets *eb, EbucketsType *type, void *deletedItemsCbCtx);
void ebExpire(ebuckets *eb, EbucketsType *type, ExpireInfo *info);
uint64_t ebExpireDryRun(ebuckets eb, EbucketsType *type, uint64_t now);
static inline int ebIsEmpty(ebuckets eb) { return eb == NULL; }
uint64_t ebGetNextTimeToExpire(ebuckets eb, EbucketsType *type);
uint64_t ebGetMaxExpireTime(ebuckets eb, EbucketsType *type, int accurate);
uint64_t ebGetTotalItems(ebuckets eb, EbucketsType *type);
/* Item related API */
int ebRemove(ebuckets *eb, EbucketsType *type, eItem item);
int ebAdd(ebuckets *eb, EbucketsType *type, eItem item, uint64_t expireTime);
uint64_t ebGetExpireTime(EbucketsType *type, eItem item);
static inline uint64_t ebGetMetaExpTime(ExpireMeta *expMeta) {
return (((uint64_t)(expMeta)->expireTimeHi << 32) | (expMeta)->expireTimeLo);
}
static inline void ebSetMetaExpTime(ExpireMeta *expMeta, uint64_t t) {
expMeta->expireTimeLo = (uint32_t)(t&0xFFFFFFFF);
expMeta->expireTimeHi = (uint16_t)((t) >> 32);
}
/* Debug API */
void ebValidate(ebuckets eb, EbucketsType *type);
void ebPrint(ebuckets eb, EbucketsType *type);
#ifdef REDIS_TEST
int ebucketsTest(int argc, char *argv[], int flags);
#endif
#endif /* __EBUCKETS_H */

View File

@ -94,6 +94,7 @@ int activeExpireCycleTryExpire(redisDb *db, dictEntry *de, long long now) {
#define ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC 25 /* Max % of CPU to use. */
#define ACTIVE_EXPIRE_CYCLE_ACCEPTABLE_STALE 10 /* % of stale keys after which
we do extra efforts. */
#define HFE_ACTIVE_EXPIRE_CYCLE_FIELDS 1000
/* Data used by the expire dict scan callback. */
typedef struct {
@ -134,6 +135,53 @@ static inline int isExpiryDictValidForSamplingCb(dict *d) {
return C_OK;
}
/* Active expiration Cycle for hash-fields.
*
* Note that releasing fields is expected to be more predictable and rewarding
* than releasing keys because it is stored in `ebuckets` DS which optimized for
* active expiration and in addition the deletion of fields is simple to handle. */
static inline void activeExpireHashFieldCycle(int type) {
/* Remember current db across calls */
static unsigned int currentDb = 0;
/* Tracks the count of fields actively expired for the current database.
* This count continues as long as it fails to actively expire all expired
* fields of currentDb, indicating a possible need to adjust the value of
* maxToExpire. */
static uint64_t activeExpirySequence = 0;
/* Threshold for adjusting maxToExpire */
const uint32_t EXPIRED_FIELDS_TH = 1000000;
/* Maximum number of fields to actively expire in a single call */
uint32_t maxToExpire = HFE_ACTIVE_EXPIRE_CYCLE_FIELDS;
redisDb *db = server.db + currentDb;
/* If db is empty, move to next db and return */
if (ebIsEmpty(db->hexpires)) {
activeExpirySequence = 0;
currentDb = (currentDb + 1) % server.dbnum;
return;
}
/* If running for a while and didn't manage to active-expire all expired fields of
* currentDb (i.e. activeExpirySequence becomes significant) then adjust maxToExpire */
if ((activeExpirySequence > EXPIRED_FIELDS_TH) && (type == ACTIVE_EXPIRE_CYCLE_SLOW)) {
/* maxToExpire is multiplied by a factor between 1 and 32, proportional to
* the number of times activeExpirySequence exceeded EXPIRED_FIELDS_TH */
uint64_t factor = activeExpirySequence / EXPIRED_FIELDS_TH;
maxToExpire *= (factor<32) ? factor : 32;
}
if (hashTypeDbActiveExpire(db, maxToExpire) == maxToExpire) {
/* active-expire reached maxToExpire limit */
activeExpirySequence += maxToExpire;
} else {
/* Managed to active-expire all expired fields of currentDb */
activeExpirySequence = 0;
currentDb = (currentDb + 1) % server.dbnum;
}
}
void activeExpireCycle(int type) {
/* Adjust the running parameters according to the configured expire
* effort. The default effort is 1, and the maximum configurable effort
@ -232,6 +280,11 @@ void activeExpireCycle(int type) {
* distribute the time evenly across DBs. */
current_db++;
/* Interleaving hash-field expiration with key expiration. Better
* call it before handling expired keys because HFE DS is optimized for
* active expiration */
activeExpireHashFieldCycle(type);
if (kvstoreSize(db->expires))
dbs_performed++;

View File

@ -3,6 +3,7 @@
#include "atomicvar.h"
#include "functions.h"
#include "cluster.h"
#include "ebuckets.h"
static redisAtomic size_t lazyfree_objects = 0;
static redisAtomic size_t lazyfreed_objects = 0;
@ -22,7 +23,8 @@ void lazyfreeFreeObject(void *args[]) {
void lazyfreeFreeDatabase(void *args[]) {
kvstore *da1 = args[0];
kvstore *da2 = args[1];
ebuckets oldHfe = args[2];
ebDestroy(&oldHfe, &hashExpireBucketsType, NULL);
size_t numkeys = kvstoreSize(da1);
kvstoreRelease(da1);
kvstoreRelease(da2);
@ -201,10 +203,12 @@ void emptyDbAsync(redisDb *db) {
flags |= KVSTORE_FREE_EMPTY_DICTS;
}
kvstore *oldkeys = db->keys, *oldexpires = db->expires;
ebuckets oldHfe = db->hexpires;
db->keys = kvstoreCreate(&dbDictType, slot_count_bits, flags);
db->expires = kvstoreCreate(&dbExpiresDictType, slot_count_bits, flags);
db->hexpires = ebCreate();
atomicIncr(lazyfree_objects, kvstoreSize(oldkeys));
bioCreateLazyFreeJob(lazyfreeFreeDatabase, 2, oldkeys, oldexpires);
bioCreateLazyFreeJob(lazyfreeFreeDatabase, 3, oldkeys, oldexpires, oldHfe);
}
/* Free the key tracking table.

View File

@ -745,7 +745,7 @@ int moduleDelKeyIfEmpty(RedisModuleKey *key) {
case OBJ_LIST: isempty = listTypeLength(o) == 0; break;
case OBJ_SET: isempty = setTypeSize(o) == 0; break;
case OBJ_ZSET: isempty = zsetLength(o) == 0; break;
case OBJ_HASH: isempty = hashTypeLength(o) == 0; break;
case OBJ_HASH: isempty = hashTypeLength(o, 0) == 0; break;
case OBJ_STREAM: isempty = streamLength(o) == 0; break;
default: isempty = 0;
}
@ -4168,7 +4168,7 @@ size_t RM_ValueLength(RedisModuleKey *key) {
case OBJ_LIST: return listTypeLength(key->value);
case OBJ_SET: return setTypeSize(key->value);
case OBJ_ZSET: return zsetLength(key->value);
case OBJ_HASH: return hashTypeLength(key->value);
case OBJ_HASH: return hashTypeLength(key->value, 0); /* OPEN: To subtract expired fields? */
case OBJ_STREAM: return streamLength(key->value);
default: return 0;
}
@ -5296,7 +5296,7 @@ int RM_HashSet(RedisModuleKey *key, int flags, ...) {
robj *argv[2] = {field,value};
hashTypeTryConversion(key->value,argv,0,1);
int updated = hashTypeSet(key->value, field->ptr, value->ptr, low_flags);
int updated = hashTypeSet(key->db, key->value, field->ptr, value->ptr, low_flags);
count += (flags & REDISMODULE_HASH_COUNT_ALL) ? 1 : updated;
/* If CFIELDS is active, SDS string ownership is now of hashTypeSet(),
@ -11071,18 +11071,22 @@ static void moduleScanKeyCallback(void *privdata, const dictEntry *de) {
ScanKeyCBData *data = privdata;
sds key = dictGetKey(de);
robj *o = data->key->value;
robj *field = createStringObject(key, sdslen(key));
robj *field = NULL;
robj *value = NULL;
if (o->type == OBJ_SET) {
value = NULL;
} else if (o->type == OBJ_HASH) {
sds val = dictGetVal(de);
field = createStringObject(key, hfieldlen(key));
value = createStringObject(val, sdslen(val));
} else if (o->type == OBJ_ZSET) {
double *val = (double*)dictGetVal(de);
value = createStringObjectFromLongDouble(*val, 0);
}
/* if type is OBJ_HASH then key is of type hfield. Otherwise sds. */
if (!field) field = createStringObject(key, sdslen(key));
data->fn(data->key, field, value, data->user_data);
decrRefCount(field);
if (value) decrRefCount(value);

524
src/mstr.c Normal file
View File

@ -0,0 +1,524 @@
/*
* Copyright Redis Ltd. 2024 - present
*
* Licensed under your choice of the Redis Source Available License 2.0 (RSALv2)
* or the Server Side Public License v1 (SSPLv1).
*/
#include <string.h>
#include <assert.h>
#include "sdsalloc.h"
#include "mstr.h"
#include "stdio.h"
#define NULL_SIZE 1
static inline char mstrReqType(size_t string_size);
static inline int mstrHdrSize(char type);
static inline int mstrSumMetaLen(mstrKind *k, mstrFlags flags);
static inline size_t mstrAllocLen(const mstr s, struct mstrKind *kind);
/*** mstr API ***/
/* Create mstr without any metadata attached, based on string 'initStr'.
* - If initStr equals NULL, then only allocation will be made.
* - string of mstr is always null-terminated.
*/
mstr mstrNew(const char *initStr, size_t lenStr, int trymalloc) {
unsigned char *pInfo; /* pointer to mstr info field */
void *sh;
mstr s;
char type = mstrReqType(lenStr);
int mstrHdr = mstrHdrSize(type);
assert(lenStr + mstrHdr + 1 > lenStr); /* Catch size_t overflow */
size_t len = mstrHdr + lenStr + NULL_SIZE;
sh = trymalloc? s_trymalloc(len) : s_malloc(len);
if (sh == NULL) return NULL;
s = (char*)sh + mstrHdr;
pInfo = ((unsigned char*)s) - 1;
switch(type) {
case MSTR_TYPE_5: {
*pInfo = CREATE_MSTR_INFO(lenStr, 0 /*ismeta*/, type);
break;
}
case MSTR_TYPE_8: {
MSTR_HDR_VAR(8,s);
*pInfo = CREATE_MSTR_INFO(0 /*unused*/, 0 /*ismeta*/, type);
sh->len = lenStr;
break;
}
case MSTR_TYPE_16: {
MSTR_HDR_VAR(16,s);
*pInfo = CREATE_MSTR_INFO(0 /*unused*/, 0 /*ismeta*/, type);
sh->len = lenStr;
break;
}
case MSTR_TYPE_64: {
MSTR_HDR_VAR(64,s);
*pInfo = CREATE_MSTR_INFO(0 /*unused*/, 0 /*ismeta*/, type);
sh->len = lenStr;
break;
}
}
if (initStr && lenStr)
memcpy(s, initStr, lenStr);
s[lenStr] = '\0';
return s;
}
/* Creates mstr with given string. Reserve space for metadata.
*
* Note: mstrNew(s,l) and mstrNewWithMeta(s,l,0) are not the same. The first allocates
* just string. The second allocates a string with flags (yet without any metadata
* structures allocated).
*/
mstr mstrNewWithMeta(struct mstrKind *kind, const char *initStr, size_t lenStr, mstrFlags metaFlags, int trymalloc) {
unsigned char *pInfo; /* pointer to mstr info field */
char *allocMstr;
mstr mstrPtr;
char type = mstrReqType(lenStr);
int mstrHdr = mstrHdrSize(type);
int sumMetaLen = mstrSumMetaLen(kind, metaFlags);
/* mstrSumMetaLen() + sizeof(mstrFlags) + sizeof(mstrhdrX) + lenStr */
size_t allocLen = sumMetaLen + sizeof(mstrFlags) + mstrHdr + lenStr + NULL_SIZE;
allocMstr = trymalloc? s_trymalloc(allocLen) : s_malloc(allocLen);
if (allocMstr == NULL) return NULL;
/* metadata is located at the beginning of the allocation, then meta-flags and lastly the string */
mstrFlags *pMetaFlags = (mstrFlags *) (allocMstr + sumMetaLen) ;
mstrPtr = ((char*) pMetaFlags) + sizeof(mstrFlags) + mstrHdr;
pInfo = ((unsigned char*)mstrPtr) - 1;
switch(type) {
case MSTR_TYPE_5: {
*pInfo = CREATE_MSTR_INFO(lenStr, 1 /*ismeta*/, type);
break;
}
case MSTR_TYPE_8: {
MSTR_HDR_VAR(8, mstrPtr);
sh->len = lenStr;
*pInfo = CREATE_MSTR_INFO(0 /*unused*/, 1 /*ismeta*/, type);
break;
}
case MSTR_TYPE_16: {
MSTR_HDR_VAR(16, mstrPtr);
sh->len = lenStr;
*pInfo = CREATE_MSTR_INFO(0 /*unused*/, 1 /*ismeta*/, type);
break;
}
case MSTR_TYPE_64: {
MSTR_HDR_VAR(64, mstrPtr);
sh->len = lenStr;
*pInfo = CREATE_MSTR_INFO(0 /*unused*/, 1 /*ismeta*/, type);
break;
}
}
*pMetaFlags = metaFlags;
if (initStr != NULL) memcpy(mstrPtr, initStr, lenStr);
mstrPtr[lenStr] = '\0';
return mstrPtr;
}
/* Create copy of mstr. Flags can be modified. For each metadata flag, if
* same flag is set on both, then copy its metadata. */
mstr mstrNewCopy(struct mstrKind *kind, mstr src, mstrFlags newFlags) {
mstr dst;
/* if no flags are set, then just copy the string */
if (newFlags == 0) return mstrNew(src, mstrlen(src), 0);
dst = mstrNewWithMeta(kind, src, mstrlen(src), newFlags, 0);
memcpy(dst, src, mstrlen(src) + 1);
/* if metadata is attached to src, then selectively copy metadata */
if (mstrIsMetaAttached(src)) {
mstrFlags *pFlags1 = mstrFlagsRef(src),
*pFlags2 = mstrFlagsRef(dst);
mstrFlags flags1Shift = *pFlags1,
flags2Shift = *pFlags2;
unsigned char *at1 = ((unsigned char *) pFlags1),
*at2 = ((unsigned char *) pFlags2);
/* if the flag is set on both, then copy the metadata */
for (int i = 0; flags1Shift != 0; ++i) {
int isFlag1Set = flags1Shift & 0x1;
int isFlag2Set = flags2Shift & 0x1;
if (isFlag1Set) at1 -= kind->metaSize[i];
if (isFlag2Set) at2 -= kind->metaSize[i];
if (isFlag1Set && isFlag2Set)
memcpy(at2, at1, kind->metaSize[i]);
flags1Shift >>= 1;
flags2Shift >>= 1;
}
}
return dst;
}
/* Free mstring. Note, mstrKind is required to eval sizeof metadata and find start
* of allocation but if mstrIsMetaAttached(s) is false, you can pass NULL as well.
*/
void mstrFree(struct mstrKind *kind, mstr s) {
if (s != NULL)
s_free(mstrGetAllocPtr(kind, s));
}
/* return ref to metadata flags. Useful to modify directly flags which doesn't
* include metadata payload */
mstrFlags *mstrFlagsRef(mstr s) {
switch(s[-1]&MSTR_TYPE_MASK) {
case MSTR_TYPE_5:
return ((mstrFlags *) (s - sizeof(struct mstrhdr5))) - 1;
case MSTR_TYPE_8:
return ((mstrFlags *) (s - sizeof(struct mstrhdr8))) - 1;
case MSTR_TYPE_16:
return ((mstrFlags *) (s - sizeof(struct mstrhdr16))) - 1;
default: /* MSTR_TYPE_64: */
return ((mstrFlags *) (s - sizeof(struct mstrhdr64))) - 1;
}
}
/* Return a reference to corresponding metadata of the specified metadata flag
* index (flagIdx). If the metadata doesn't exist, it still returns a reference
* to the starting location where it would have been written among other metadatas.
* To verify if `flagIdx` of some metadata is attached, use `mstrGetFlag(s, flagIdx)`.
*/
void *mstrMetaRef(mstr s, struct mstrKind *kind, int flagIdx) {
int metaOffset = 0;
/* start iterating from flags backward */
mstrFlags *pFlags = mstrFlagsRef(s);
mstrFlags tmp = *pFlags;
for (int i = 0 ; i <= flagIdx ; ++i) {
if (tmp & 0x1) metaOffset += kind->metaSize[i];
tmp >>= 1;
}
return ((char *)pFlags) - metaOffset;
}
/* mstr layout: [meta-data#N]...[meta-data#0][mstrFlags][mstrhdr][string][null] */
void *mstrGetAllocPtr(struct mstrKind *kind, mstr str) {
if (!mstrIsMetaAttached(str))
return (char*)str - mstrHdrSize(str[-1]);
int totalMetaLen = mstrSumMetaLen(kind, *mstrFlagsRef(str));
return (char*)str - mstrHdrSize(str[-1]) - sizeof(mstrFlags) - totalMetaLen;
}
/* Prints in the following fashion:
* [0x7f8bd8816017] my_mstr: foo (strLen=3, mstrLen=11, isMeta=1, metaFlags=0x1)
* [0x7f8bd8816010] >> meta[0]: 0x78 0x56 0x34 0x12 (metaLen=4)
*/
void mstrPrint(mstr s, struct mstrKind *kind, int verbose) {
mstrFlags mflags, tmp;
int isMeta = mstrIsMetaAttached(s);
tmp = mflags = (isMeta) ? *mstrFlagsRef(s) : 0;
if (!isMeta) {
printf("[%p] %s: %s (strLen=%zu, mstrLen=%zu, isMeta=0)\n",
(void *)s, kind->name, s, mstrlen(s), mstrAllocLen(s, kind));
return;
}
printf("[%p] %s: %s (strLen=%zu, mstrLen=%zu, isMeta=1, metaFlags=0x%x)\n",
(void *)s, kind->name, s, mstrlen(s), mstrAllocLen(s, kind), mflags);
if (verbose) {
for (unsigned int i = 0 ; i < NUM_MSTR_FLAGS ; ++i) {
if (tmp & 0x1) {
int mSize = kind->metaSize[i];
void *mRef = mstrMetaRef(s, kind, i);
printf("[%p] >> meta[%d]:", mRef, i);
for (int j = 0 ; j < mSize ; ++j) {
printf(" 0x%02x", ((unsigned char *) mRef)[j]);
}
printf(" (metaLen=%d)\n", mSize);
}
tmp >>= 1;
}
}
}
/* return length of the string (ignoring metadata attached) */
size_t mstrlen(const mstr s) {
unsigned char info = s[-1];
switch(info & MSTR_TYPE_MASK) {
case MSTR_TYPE_5:
return MSTR_TYPE_5_LEN(info);
case MSTR_TYPE_8:
return MSTR_HDR(8,s)->len;
case MSTR_TYPE_16:
return MSTR_HDR(16,s)->len;
default: /* MSTR_TYPE_64: */
return MSTR_HDR(64,s)->len;
}
}
/*** mstr internals ***/
static inline int mstrSumMetaLen(mstrKind *k, mstrFlags flags) {
int total = 0;
int i = 0 ;
while (flags) {
total += (flags & 0x1) ? k->metaSize[i] : 0;
flags >>= 1;
++i;
}
return total;
}
/* mstrSumMetaLen() + sizeof(mstrFlags) + sizeof(mstrhdrX) + strlen + '\0' */
static inline size_t mstrAllocLen(const mstr s, struct mstrKind *kind) {
int hdrlen;
mstrFlags *pMetaFlags;
size_t strlen = 0;
int isMeta = mstrIsMetaAttached(s);
unsigned char info = s[-1];
switch(info & MSTR_TYPE_MASK) {
case MSTR_TYPE_5:
strlen = MSTR_TYPE_5_LEN(info);
hdrlen = sizeof(struct mstrhdr5);
pMetaFlags = ((mstrFlags *) MSTR_HDR(5, s)) - 1;
break;
case MSTR_TYPE_8:
strlen = MSTR_HDR(8,s)->len;
hdrlen = sizeof(struct mstrhdr8);
pMetaFlags = ((mstrFlags *) MSTR_HDR(8, s)) - 1;
break;
case MSTR_TYPE_16:
strlen = MSTR_HDR(16,s)->len;
hdrlen = sizeof(struct mstrhdr16);
pMetaFlags = ((mstrFlags *) MSTR_HDR(16, s)) - 1;
break;
default: /* MSTR_TYPE_64: */
strlen = MSTR_HDR(64,s)->len;
hdrlen = sizeof(struct mstrhdr64);
pMetaFlags = ((mstrFlags *) MSTR_HDR(64, s)) - 1;
break;
}
return hdrlen + strlen + NULL_SIZE + ((isMeta) ? (mstrSumMetaLen(kind, *pMetaFlags) + sizeof(mstrFlags)) : 0);
}
/* returns pointer to the beginning of malloc() of mstr */
void *mstrGetStartAlloc(mstr s, struct mstrKind *kind) {
int hdrlen;
mstrFlags *pMetaFlags;
int isMeta = mstrIsMetaAttached(s);
switch(s[-1]&MSTR_TYPE_MASK) {
case MSTR_TYPE_5:
hdrlen = sizeof(struct mstrhdr5);
pMetaFlags = ((mstrFlags *) MSTR_HDR(5, s)) - 1;
break;
case MSTR_TYPE_8:
hdrlen = sizeof(struct mstrhdr8);
pMetaFlags = ((mstrFlags *) MSTR_HDR(8, s)) - 1;
break;
case MSTR_TYPE_16:
hdrlen = sizeof(struct mstrhdr16);
pMetaFlags = ((mstrFlags *) MSTR_HDR(16, s)) - 1;
break;
default: /* MSTR_TYPE_64: */
hdrlen = sizeof(struct mstrhdr64);
pMetaFlags = ((mstrFlags *) MSTR_HDR(64, s)) - 1;
break;
}
return (char *) s - hdrlen - ((isMeta) ? (mstrSumMetaLen(kind, *pMetaFlags) + sizeof(mstrFlags)) : 0);
}
static inline int mstrHdrSize(char type) {
switch(type&MSTR_TYPE_MASK) {
case MSTR_TYPE_5:
return sizeof(struct mstrhdr5);
case MSTR_TYPE_8:
return sizeof(struct mstrhdr8);
case MSTR_TYPE_16:
return sizeof(struct mstrhdr16);
case MSTR_TYPE_64:
return sizeof(struct mstrhdr64);
}
return 0;
}
static inline char mstrReqType(size_t string_size) {
if (string_size < 1<<5)
return MSTR_TYPE_5;
if (string_size < 1<<8)
return MSTR_TYPE_8;
if (string_size < 1<<16)
return MSTR_TYPE_16;
return MSTR_TYPE_64;
}
#ifdef REDIS_TEST
#include <stdlib.h>
#include <assert.h>
#include "testhelp.h"
#include "limits.h"
#ifndef UNUSED
#define UNUSED(x) (void)(x)
#endif
/* Challenge mstr with metadata interesting enough that can include the case of hfield and hkey and more */
#define B(idx) (1<<(idx))
#define META_IDX_MYMSTR_TTL4 0
#define META_IDX_MYMSTR_TTL8 1
#define META_IDX_MYMSTR_TYPE_ENC_LRU 2 // 4Bbit type, 4bit encoding, 24bits lru
#define META_IDX_MYMSTR_VALUE_PTR 3
#define META_IDX_MYMSTR_FLAG_NO_META 4
#define TEST_CONTEXT(context) printf("\nContext: %s \n", context);
int mstrTest(int argc, char **argv, int flags) {
UNUSED(argc);
UNUSED(argv);
UNUSED(flags);
struct mstrKind kind_mymstr = {
.name = "my_mstr",
.metaSize[META_IDX_MYMSTR_TTL4] = 4,
.metaSize[META_IDX_MYMSTR_TTL8] = 8,
.metaSize[META_IDX_MYMSTR_TYPE_ENC_LRU] = 4,
.metaSize[META_IDX_MYMSTR_VALUE_PTR] = 8,
.metaSize[META_IDX_MYMSTR_FLAG_NO_META] = 0,
};
TEST_CONTEXT("Create simple short mstr")
{
char *str = "foo";
mstr s = mstrNew(str, strlen(str), 0);
size_t expStrLen = strlen(str);
test_cond("Verify str length and alloc length",
mstrAllocLen(s, NULL) == (1 + expStrLen + 1) && /* mstrhdr5 + str + null */
mstrlen(s) == expStrLen && /* expected strlen(str) */
memcmp(s, str, expStrLen + 1) == 0);
mstrFree(&kind_mymstr, s);
}
TEST_CONTEXT("Create simple 40 bytes mstr")
{
char *str = "0123456789012345678901234567890123456789"; // 40 bytes
mstr s = mstrNew(str, strlen(str), 0);
test_cond("Verify str length and alloc length",
mstrAllocLen(s, NULL) == (3 + 40 + 1) && /* mstrhdr8 + str + null */
mstrlen(s) == 40 &&
memcmp(s,str,40) == 0);
mstrFree(&kind_mymstr, s);
}
TEST_CONTEXT("Create mstr with random characters")
{
long unsigned int i;
char str[66000];
for (i = 0 ; i < sizeof(str) ; ++i) str[i] = rand() % 256;
size_t len[] = { 31, 32, 33, 255, 256, 257, 65535, 65536, 65537, 66000};
for (i = 0 ; i < sizeof(len) / sizeof(len[0]) ; ++i) {
char title[100];
mstr s = mstrNew(str, len[i], 0);
size_t mstrhdrSize = (len[i] < 1<<5) ? sizeof(struct mstrhdr5) :
(len[i] < 1<<8) ? sizeof(struct mstrhdr8) :
(len[i] < 1<<16) ? sizeof(struct mstrhdr16) :
sizeof(struct mstrhdr64);
snprintf(title, sizeof(title), "Verify string of length %zu", len[i]);
test_cond(title,
mstrAllocLen(s, NULL) == (mstrhdrSize + len[i] + 1) && /* mstrhdrX + str + null */
mstrlen(s) == len[i] &&
memcmp(s,str,len[i]) == 0);
mstrFree(&kind_mymstr, s);
}
}
TEST_CONTEXT("Create short mstr with TTL4")
{
uint32_t *ttl;
mstr s = mstrNewWithMeta(&kind_mymstr,
"foo",
strlen("foo"),
B(META_IDX_MYMSTR_TTL4), /* allocate with TTL4 metadata */
0);
ttl = mstrMetaRef(s, &kind_mymstr, META_IDX_MYMSTR_TTL4);
*ttl = 0x12345678;
test_cond("Verify memory-allocation and string lengths",
mstrAllocLen(s, &kind_mymstr) == (1 + 3 + 2 + 1 + 4) && /* mstrhdr5 + str + null + mstrFlags + TLL */
mstrlen(s) == 3);
unsigned char expMem[] = {0xFF, 0xFF, 0xFF, 0xFF, 0x01, 0x00, 0x1c, 'f', 'o', 'o', '\0' };
uint32_t value = 0x12345678;
memcpy(expMem, &value, sizeof(uint32_t));
test_cond("Verify string and TTL4 payload", memcmp(
mstrMetaRef(s, &kind_mymstr, 0) , expMem, sizeof(expMem)) == 0);
test_cond("Verify mstrIsMetaAttached() function works", mstrIsMetaAttached(s) != 0);
mstrFree(&kind_mymstr, s);
}
TEST_CONTEXT("Create short mstr with TTL4 and value ptr ")
{
mstr s = mstrNewWithMeta(&kind_mymstr, "foo", strlen("foo"),
B(META_IDX_MYMSTR_TTL4) | B(META_IDX_MYMSTR_VALUE_PTR), 0);
*((uint32_t *) (mstrMetaRef(s, &kind_mymstr,
META_IDX_MYMSTR_TTL4))) = 0x12345678;
test_cond("Verify length and alloc length",
mstrAllocLen(s, &kind_mymstr) == (1 + 3 + 1 + 2 + 4 + 8) && /* mstrhdr5 + str + null + mstrFlags + TLL + PTR */
mstrlen(s) == 3);
mstrFree(&kind_mymstr, s);
}
TEST_CONTEXT("Copy mstr and add it TTL4")
{
mstr s1 = mstrNew("foo", strlen("foo"), 0);
mstr s2 = mstrNewCopy(&kind_mymstr, s1, B(META_IDX_MYMSTR_TTL4));
*((uint32_t *) (mstrMetaRef(s2, &kind_mymstr, META_IDX_MYMSTR_TTL4))) = 0x12345678;
test_cond("Verify new mstr includes TTL4",
mstrAllocLen(s2, &kind_mymstr) == (1 + 3 + 1 + 2 + 4) && /* mstrhdr5 + str + null + mstrFlags + TTL4 */
mstrlen(s2) == 3 && /* 'foo' = 3bytes */
memcmp(s2, "foo\0", 4) == 0);
mstr s3 = mstrNewCopy(&kind_mymstr, s2, B(META_IDX_MYMSTR_TTL4));
unsigned char expMem[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0x1, 0x0, 0x1c, 'f', 'o', 'o', '\0' };
uint32_t value = 0x12345678;
memcpy(expMem, &value, sizeof(uint32_t));
char *ppp = mstrGetStartAlloc(s3, &kind_mymstr);
test_cond("Verify string and TTL4 payload",
memcmp(ppp, expMem, sizeof(expMem)) == 0);
mstrPrint(s3, &kind_mymstr, 1);
mstrFree(&kind_mymstr, s1);
mstrFree(&kind_mymstr, s2);
mstrFree(&kind_mymstr, s3);
}
return 0;
}
#endif

223
src/mstr.h Normal file
View File

@ -0,0 +1,223 @@
/*
* Copyright Redis Ltd. 2024 - present
*
* Licensed under your choice of the Redis Source Available License 2.0 (RSALv2)
* or the Server Side Public License v1 (SSPLv1).
*
*
* WHAT IS MSTR (M-STRING)?
* ------------------------
* mstr stands for immutable string with optional metadata attached.
*
* sds string is widely used across the system and serves as a general purpose
* container to hold data. The need to optimize memory and aggregate strings
* along with metadata and store it into Redis data-structures as single bulk keep
* reoccur. One thought might be, why not to extend sds to support metadata. The
* answer is that sds is mutable string in its nature, with wide API (split, join,
* etc.). Pushing metadata logic into sds will make it very fragile, and complex
* to maintain.
*
* Another idea involved using a simple struct with flags and a dynamic buf[] at the
* end. While this could be viable, it introduces considerable complexity and would
* need maintenance across different contexts.
*
* As an alternative, we introduce a new implementation of immutable strings,
* with limited API, and with the option to attach metadata. The representation
* of the string, without any metadata, in its basic form, resembles SDS but
* without the API to manipulate the string. Only to attach metadata to it. The
* following diagram shows the memory layout of mstring (mstrhdr8) when no
* metadata is attached:
*
* +----------------------------------------------+
* | mstrhdr8 | c-string | |
* +--------------------------------+-------------+
* |8b |2b |1b |5b |?bytes |8b|
* | Len | Type |m-bit=0 | Unused | String |\0|
* +----------------------------------------------+
* ^
* |
* mstrNew() returns pointer to here --+
*
* If metadata-flag is set, depicted in diagram above as m-bit in the diagram,
* then the header will be preceded with additional 16 bits of metadata flags such
* that if i'th bit is set, then the i'th metadata structure is attached to the
* mstring. The metadata layout and their sizes are defined by mstrKind structure
* (More below).
*
* The following diagram shows the memory layout of mstr (mstrhdr8) when 3 bits in mFlags
* are set to indicate that 3 fields of metadata are attached to the mstring at the
* beginning.
*
* +-------------------------------------------------------------------------------+
* | METADATA FIELDS | mflags | mstrhdr8 | c-string | |
* +-----------------------+--------+--------------------------------+-------------+
* |?bytes |?bytes |?bytes |16b |8b |2b |1b |5b |?bytes |8b|
* | Meta3 | Meta2 | Meta0 | 0x1101 | Len | Type |m-bit=1 | Unused | String |\0|
* +-------------------------------------------------------------------------------+
* ^
* |
* mstrNewWithMeta() returns pointer to here --+
*
* mstr allows to define different kinds (groups) of mstrings, each with its
* own unique metadata layout. For example, in case of hash-fields, all instances of
* it can optionally have TTL metadata attached to it. This is achieved by first
* prototyping a single mstrKind structure that defines the metadata layout and sizes
* of this specific kind. Now each hash-field instance has still the freedom to
* attach or not attach the metadata to it, and metadata flags (mFlags) of the
* instance will reflect this decision.
*
* In the future, the keys of Redis keyspace can be another kind of mstring that
* has TTL, LRU or even dictEntry metadata embedded into. Unlike vptr in c++, this
* struct won't be attached to mstring but will be passed as yet another argument
* to API, to save memory. In addition, each instance of a given mstrkind can hold
* any subset of metadata and the 8 bits of metadata-flags will reflect it.
*
* The following example shows how to define mstrKind for possible future keyspace
* that aggregates several keyspace related metadata into one compact, singly
* allocated, mstring.
*
* typedef enum HkeyMetaFlags {
* HKEY_META_VAL_REF_COUNT = 0, // refcount
* HKEY_META_VAL_REF = 1, // Val referenced
* HKEY_META_EXPIRE = 2, // TTL and more
* HKEY_META_TYPE_ENC_LRU = 3, // TYPE + LRU + ENC
* HKEY_META_DICT_ENT_NEXT = 4, // Next dict entry
* // Following two must be together and in this order
* HKEY_META_VAL_EMBED8 = 5, // Val embedded, max 7 bytes
* HKEY_META_VAL_EMBED16 = 6, // Val embedded, max 15 bytes (23 with EMBED8)
* } HkeyMetaFlags;
*
* mstrKind hkeyKind = {
* .name = "hkey",
* .metaSize[HKEY_META_VAL_REF_COUNT] = 4,
* .metaSize[HKEY_META_VAL_REF] = 8,
* .metaSize[HKEY_META_EXPIRE] = sizeof(ExpireMeta),
* .metaSize[HKEY_META_TYPE_ENC_LRU] = 8,
* .metaSize[HKEY_META_DICT_ENT_NEXT] = 8,
* .metaSize[HKEY_META_VAL_EMBED8] = 8,
* .metaSize[HKEY_META_VAL_EMBED16] = 16,
* };
*
* MSTR-ALIGNMENT
* --------------
* There are two types of alignments to take into consideration:
* 1. Alignment of the metadata.
* 2. Alignment of returned mstr pointer
*
* 1) As the metadatas layout are reversed to their enumeration, it is recommended
* to put metadata with "better" alignment first in memory layout (enumerated
* last) and the worst, or those that simply don't require any alignment will be
* last in memory layout (enumerated first). This is similar the to the applied
* consideration when defining new struct in C. Note also that each metadata
* might either be attached to mstr or not which complicates the design phase
* of a new mstrKind a little.
*
* In the example above, HKEY_META_VAL_REF_COUNT, with worst alignment of 4
* bytes, is enumerated first, and therefore, will be last in memory layout.
*
* 2) Few optimizations in Redis rely on the fact that sds address is always an odd
* pointer. We can achieve the same with a little effort. It was already taken
* care that all headers of type mstrhdrX has odd size. With that in mind, if
* a new kind of mstr is required to be limited to odd addresses, then we must
* make sure that sizes of all related metadatas that are defined in mstrKind
* are even in size.
*/
#ifndef __MSTR_H
#define __MSTR_H
#include <sys/types.h>
#include <stdarg.h>
#include <stdint.h>
/* Selective copy of ifndef from server.h instead of including it */
#ifndef static_assert
#define static_assert(expr, lit) extern char __static_assert_failure[(expr) ? 1:-1]
#endif
#define MSTR_TYPE_5 0
#define MSTR_TYPE_8 1
#define MSTR_TYPE_16 2
#define MSTR_TYPE_64 3
#define MSTR_TYPE_MASK 3
#define MSTR_TYPE_BITS 2
#define MSTR_META_MASK 4
#define MSTR_HDR(T,s) ((struct mstrhdr##T *)((s)-(sizeof(struct mstrhdr##T))))
#define MSTR_HDR_VAR(T,s) struct mstrhdr##T *sh = (void*)((s)-(sizeof(struct mstrhdr##T)));
#define MSTR_META_BITS 1 /* is metadata attached? */
#define MSTR_TYPE_5_LEN(f) ((f) >> (MSTR_TYPE_BITS + MSTR_META_BITS))
#define CREATE_MSTR_INFO(len, ismeta, type) ( (((len<<MSTR_META_BITS) + ismeta) << (MSTR_TYPE_BITS)) | type )
/* mimic plain c-string */
typedef char *mstr;
/* Flags that can be set on mstring to indicate for attached metadata. It is
* */
typedef uint16_t mstrFlags;
struct __attribute__ ((__packed__)) mstrhdr5 {
unsigned char info; /* 2 lsb of type, 1 metadata, and 5 msb of string length */
char buf[];
};
struct __attribute__ ((__packed__)) mstrhdr8 {
uint8_t unused; /* To achieve odd size header (See comment above) */
uint8_t len;
unsigned char info; /* 2 lsb of type, 6 unused bits */
char buf[];
};
struct __attribute__ ((__packed__)) mstrhdr16 {
uint16_t len;
unsigned char info; /* 2 lsb of type, 6 unused bits */
char buf[];
};
struct __attribute__ ((__packed__)) mstrhdr64 {
uint64_t len;
unsigned char info; /* 2 lsb of type, 6 unused bits */
char buf[];
};
#define NUM_MSTR_FLAGS (sizeof(mstrFlags)*8)
/* mstrKind is used to define a kind (a group) of mstring with its own metadata layout */
typedef struct mstrKind {
const char *name;
int metaSize[NUM_MSTR_FLAGS];
} mstrKind;
mstr mstrNew(const char *initStr, size_t lenStr, int trymalloc);
mstr mstrNewWithMeta(struct mstrKind *kind, const char *initStr, size_t lenStr, mstrFlags flags, int trymalloc);
mstr mstrNewCopy(struct mstrKind *kind, mstr src, mstrFlags newFlags);
void *mstrGetAllocPtr(struct mstrKind *kind, mstr str);
void mstrFree(struct mstrKind *kind, mstr s);
mstrFlags *mstrFlagsRef(mstr s);
void *mstrMetaRef(mstr s, struct mstrKind *kind, int flagIdx);
size_t mstrlen(const mstr s);
/* return non-zero if metadata is attached to mstring */
static inline int mstrIsMetaAttached(mstr s) { return s[-1] & MSTR_META_MASK; }
/* return whether if a specific flag-index is set */
static inline int mstrGetFlag(mstr s, int flagIdx) { return *mstrFlagsRef(s) & (1 << flagIdx); }
/* See comment above about MSTR-ALIGNMENT(2) */
static_assert(sizeof(struct mstrhdr5 ) % 2 == 1, "must be odd");
static_assert(sizeof(struct mstrhdr8 ) % 2 == 1, "must be odd");
static_assert(sizeof(struct mstrhdr16 ) % 2 == 1, "must be odd");
static_assert(sizeof(struct mstrhdr64 ) % 2 == 1, "must be odd");
static_assert(sizeof(mstrFlags ) % 2 == 0, "must be even to keep mstr pointer odd");
#ifdef REDIS_TEST
int mstrTest(int argc, char *argv[], int flags);
#endif
#endif

View File

@ -31,6 +31,14 @@ size_t sdsZmallocSize(sds s) {
return zmalloc_size(sh);
}
/* Return the size consumed from the allocator, for the specified hfield with
* metadata (mstr), including internal fragmentation. This function is used in
* order to compute the client output buffer size. */
size_t hfieldZmallocSize(hfield s) {
void *sh = hfieldGetAllocPtr(s);
return zmalloc_size(sh);
}
/* Return the amount of memory used by the sds string at object->ptr
* for a string object. This includes internal fragmentation. */
size_t getStringObjectSdsUsedMemory(robj *o) {

View File

@ -80,7 +80,7 @@ sds keyspaceEventsFlagsToString(int flags) {
* 'event' is a C string representing the event name.
* 'key' is a Redis object representing the key name.
* 'dbid' is the database ID where the key lives. */
void notifyKeyspaceEvent(int type, char *event, robj *key, int dbid) {
void notifyKeyspaceEvent(int type, const char *event, robj *key, int dbid) {
sds chan;
robj *chanobj, *eventobj;
int len = -1;

View File

@ -979,7 +979,6 @@ size_t streamRadixTreeMemoryUsage(rax *rax) {
* are checked and averaged to estimate the total size. */
#define OBJ_COMPUTE_SIZE_DEF_SAMPLES 5 /* Default sample size. */
size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) {
sds ele, ele2;
dict *d;
dictIterator *di;
struct dictEntry *de;
@ -1016,7 +1015,7 @@ size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) {
di = dictGetIterator(d);
asize = sizeof(*o)+sizeof(dict)+(sizeof(struct dictEntry*)*dictBuckets(d));
while((de = dictNext(di)) != NULL && samples < sample_size) {
ele = dictGetKey(de);
sds ele = dictGetKey(de);
elesize += dictEntryMemUsage() + sdsZmallocSize(ele);
samples++;
}
@ -1057,9 +1056,9 @@ size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) {
di = dictGetIterator(d);
asize = sizeof(*o)+sizeof(dict)+(sizeof(struct dictEntry*)*dictBuckets(d));
while((de = dictNext(di)) != NULL && samples < sample_size) {
ele = dictGetKey(de);
ele2 = dictGetVal(de);
elesize += sdsZmallocSize(ele) + sdsZmallocSize(ele2);
hfield ele = dictGetKey(de);
sds ele2 = dictGetVal(de);
elesize += hfieldZmallocSize(ele) + sdsZmallocSize(ele2);
elesize += dictEntryMemUsage();
samples++;
}

View File

@ -173,11 +173,16 @@ raxNode *raxNewNode(size_t children, int datafield) {
/* Allocate a new rax and return its pointer. On out of memory the function
* returns NULL. */
rax *raxNew(void) {
rax *rax = rax_malloc(sizeof(*rax));
return raxNewWithMetadata(0);
}
/* Allocate a new rax with metadata */
rax *raxNewWithMetadata(int metaSize) {
rax *rax = rax_malloc(sizeof(*rax) + metaSize);
if (rax == NULL) return NULL;
rax->numele = 0;
rax->numnodes = 1;
rax->head = raxNewNode(0,0);
rax->head = raxNewNode(0, 0);
if (rax->head == NULL) {
rax_free(rax);
return NULL;
@ -1210,6 +1215,25 @@ void raxRecursiveFree(rax *rax, raxNode *n, void (*free_callback)(void*)) {
rax->numnodes--;
}
/* Same as raxRecursiveFree() with context argument */
void raxRecursiveFreeWithCtx(rax *rax, raxNode *n,
void (*free_callback)(void *item, void *ctx), void *ctx) {
debugnode("free traversing",n);
int numchildren = n->iscompr ? 1 : n->size;
raxNode **cp = raxNodeLastChildPtr(n);
while(numchildren--) {
raxNode *child;
memcpy(&child,cp,sizeof(child));
raxRecursiveFreeWithCtx(rax,child,free_callback, ctx);
cp--;
}
debugnode("free depth-first",n);
if (free_callback && n->iskey && !n->isnull)
free_callback(raxGetData(n), ctx);
rax_free(n);
rax->numnodes--;
}
/* Free a whole radix tree, calling the specified callback in order to
* free the auxiliary data. */
void raxFreeWithCallback(rax *rax, void (*free_callback)(void*)) {
@ -1218,6 +1242,15 @@ void raxFreeWithCallback(rax *rax, void (*free_callback)(void*)) {
rax_free(rax);
}
/* Free a whole radix tree, calling the specified callback in order to
* free the auxiliary data. */
void raxFreeWithCbAndContext(rax *rax,
void (*free_callback)(void *item, void *ctx), void *ctx) {
raxRecursiveFreeWithCtx(rax,rax->head,free_callback,ctx);
assert(rax->numnodes == 0);
rax_free(rax);
}
/* Free a whole radix tree. */
void raxFree(rax *rax) {
raxFreeWithCallback(rax,NULL);

View File

@ -113,6 +113,7 @@ typedef struct rax {
raxNode *head;
uint64_t numele;
uint64_t numnodes;
void *metadata[];
} rax;
/* Stack data structure used by raxLowWalk() in order to, optionally, return
@ -166,12 +167,16 @@ typedef struct raxIterator {
/* Exported API. */
rax *raxNew(void);
rax *raxNewWithMetadata(int metaSize);
int raxInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old);
int raxTryInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old);
int raxRemove(rax *rax, unsigned char *s, size_t len, void **old);
int raxFind(rax *rax, unsigned char *s, size_t len, void **value);
void raxFree(rax *rax);
void raxFreeWithCallback(rax *rax, void (*free_callback)(void*));
void raxFreeWithCbAndContext(rax *rax,
void (*free_callback)(void *item, void *ctx),
void *ctx);
void raxStart(raxIterator *it, rax *rt);
int raxSeek(raxIterator *it, const char *op, unsigned char *ele, size_t len);
int raxNext(raxIterator *it);

186
src/rdb.c
View File

@ -268,8 +268,9 @@ int rdbEncodeInteger(long long value, unsigned char *enc) {
* The returned value changes according to the flags, see
* rdbGenericLoadStringObject() for more info. */
void *rdbLoadIntegerObject(rio *rdb, int enctype, int flags, size_t *lenptr) {
int plain = flags & RDB_LOAD_PLAIN;
int sds = flags & RDB_LOAD_SDS;
int plainFlag = flags & RDB_LOAD_PLAIN;
int sdsFlag = flags & RDB_LOAD_SDS;
int hfldFlag = flags & RDB_LOAD_HFLD;
int encode = flags & RDB_LOAD_ENC;
unsigned char enc[4];
long long val;
@ -295,11 +296,17 @@ void *rdbLoadIntegerObject(rio *rdb, int enctype, int flags, size_t *lenptr) {
rdbReportCorruptRDB("Unknown RDB integer encoding type %d",enctype);
return NULL; /* Never reached. */
}
if (plain || sds) {
if (plainFlag || sdsFlag || hfldFlag) {
char buf[LONG_STR_SIZE], *p;
int len = ll2string(buf,sizeof(buf),val);
if (lenptr) *lenptr = len;
p = plain ? zmalloc(len) : sdsnewlen(SDS_NOINIT,len);
if (plainFlag) {
p = zmalloc(len);
} else if (sdsFlag) {
p = sdsnewlen(SDS_NOINIT,len);
} else { /* hfldFlag */
p = hfieldNew(NULL, len, 0);
}
memcpy(p,buf,len);
return p;
} else if (encode) {
@ -368,8 +375,11 @@ ssize_t rdbSaveLzfStringObject(rio *rdb, unsigned char *s, size_t len) {
* changes according to 'flags'. For more info check the
* rdbGenericLoadStringObject() function. */
void *rdbLoadLzfStringObject(rio *rdb, int flags, size_t *lenptr) {
int plain = flags & RDB_LOAD_PLAIN;
int sds = flags & RDB_LOAD_SDS;
int plainFlag = flags & RDB_LOAD_PLAIN;
int sdsFlag = flags & RDB_LOAD_SDS;
int hfldFlag = flags & RDB_LOAD_HFLD;
int robjFlag = (!(plainFlag || sdsFlag || hfldFlag)); /* not plain/sds/hfld */
uint64_t len, clen;
unsigned char *c = NULL;
char *val = NULL;
@ -382,11 +392,14 @@ void *rdbLoadLzfStringObject(rio *rdb, int flags, size_t *lenptr) {
}
/* Allocate our target according to the uncompressed size. */
if (plain) {
if (plainFlag) {
val = ztrymalloc(len);
} else {
} else if (sdsFlag || robjFlag) {
val = sdstrynewlen(SDS_NOINIT,len);
} else { /* hfldFlag */
val = hfieldTryNew(NULL, len, 0);
}
if (!val) {
serverLog(isRestoreContext()? LL_VERBOSE: LL_WARNING, "rdbLoadLzfStringObject failed allocating %llu bytes", (unsigned long long)len);
goto err;
@ -402,17 +415,17 @@ void *rdbLoadLzfStringObject(rio *rdb, int flags, size_t *lenptr) {
}
zfree(c);
if (plain || sds) {
return val;
} else {
return createObject(OBJ_STRING,val);
}
return (robjFlag) ? createObject(OBJ_STRING,val) : (void *) val;
err:
zfree(c);
if (plain)
if (plainFlag) {
zfree(val);
else
} else if (sdsFlag || robjFlag) {
sdsfree(val);
} else { /* hfldFlag*/
hfieldFree(val);
}
return NULL;
}
@ -495,8 +508,12 @@ ssize_t rdbSaveStringObject(rio *rdb, robj *obj) {
* On I/O error NULL is returned.
*/
void *rdbGenericLoadStringObject(rio *rdb, int flags, size_t *lenptr) {
int plain = flags & RDB_LOAD_PLAIN;
int sds = flags & RDB_LOAD_SDS;
void *buf;
int plainFlag = flags & RDB_LOAD_PLAIN;
int sdsFlag = flags & RDB_LOAD_SDS;
int hfldFlag = flags & RDB_LOAD_HFLD;
int robjFlag = (!(plainFlag || sdsFlag || hfldFlag)); /* not plain/sds/hfld */
int isencoded;
unsigned long long len;
@ -517,22 +534,8 @@ void *rdbGenericLoadStringObject(rio *rdb, int flags, size_t *lenptr) {
}
}
if (plain || sds) {
void *buf = plain ? ztrymalloc(len) : sdstrynewlen(SDS_NOINIT,len);
if (!buf) {
serverLog(isRestoreContext()? LL_VERBOSE: LL_WARNING, "rdbGenericLoadStringObject failed allocating %llu bytes", len);
return NULL;
}
if (lenptr) *lenptr = len;
if (len && rioRead(rdb,buf,len) == 0) {
if (plain)
zfree(buf);
else
sdsfree(buf);
return NULL;
}
return buf;
} else {
/* return robj */
if (robjFlag) {
robj *o = tryCreateStringObject(SDS_NOINIT,len);
if (!o) {
serverLog(isRestoreContext()? LL_VERBOSE: LL_WARNING, "rdbGenericLoadStringObject failed allocating %llu bytes", len);
@ -544,6 +547,32 @@ void *rdbGenericLoadStringObject(rio *rdb, int flags, size_t *lenptr) {
}
return o;
}
/* plain/sds/hfld */
if (plainFlag) {
buf = ztrymalloc(len);
} else if (sdsFlag) {
buf = sdstrynewlen(SDS_NOINIT,len);
} else { /* hfldFlag */
buf = hfieldTryNew(NULL, len, 0);
}
if (!buf) {
serverLog(isRestoreContext()? LL_VERBOSE: LL_WARNING, "rdbGenericLoadStringObject failed allocating %llu bytes", len);
return NULL;
}
if (lenptr) *lenptr = len;
if (len && rioRead(rdb,buf,len) == 0) {
if (plainFlag)
zfree(buf);
else if (sdsFlag) {
sdsfree(buf);
} else { /* hfldFlag */
hfieldFree(buf);
}
return NULL;
}
return buf;
}
robj *rdbLoadStringObject(rio *rdb) {
@ -924,11 +953,11 @@ ssize_t rdbSaveObject(rio *rdb, robj *o, robj *key, int dbid) {
nwritten += n;
while((de = dictNext(di)) != NULL) {
sds field = dictGetKey(de);
hfield field = dictGetKey(de);
sds value = dictGetVal(de);
if ((n = rdbSaveRawString(rdb,(unsigned char*)field,
sdslen(field))) == -1)
hfieldlen(field))) == -1)
{
dictReleaseIterator(di);
return -1;
@ -1403,7 +1432,7 @@ werr:
return C_ERR;
}
/* This helper function is only used for diskless replication.
/* This helper function is only used for diskless replication.
* This is just a wrapper to rdbSaveRio() that additionally adds a prefix
* and a suffix to the generated RDB dump. The prefix is:
*
@ -1856,7 +1885,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
decrRefCount(ele);
}
listTypeTryConversion(o,LIST_CONV_AUTO,NULL,NULL);
listTypeTryConversion(o, LIST_CONV_AUTO, NULL, NULL);
} else if (rdbtype == RDB_TYPE_SET) {
/* Read Set value */
if ((len = rdbLoadLen(rdb,NULL)) == RDB_LENERR) return NULL;
@ -1869,7 +1898,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
o = createSetObject();
/* It's faster to expand the dict to the right size asap in order
* to avoid rehashing */
if (len > DICT_HT_INITIAL_SIZE && dictTryExpand(o->ptr,len) != DICT_OK) {
if (len > DICT_HT_INITIAL_SIZE && dictTryExpand(o->ptr, len) != DICT_OK) {
rdbReportCorruptRDB("OOM in dictTryExpand %llu", (unsigned long long)len);
decrRefCount(o);
return NULL;
@ -1896,7 +1925,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
/* Fetch integer value from element. */
if (isSdsRepresentableAsLongLong(sdsele,&llval) == C_OK) {
uint8_t success;
o->ptr = intsetAdd(o->ptr,llval,&success);
o->ptr = intsetAdd(o->ptr, llval, &success);
if (!success) {
rdbReportCorruptRDB("Duplicate set members detected");
decrRefCount(o);
@ -1946,7 +1975,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
/* This will also be called when the set was just converted
* to a regular hash table encoded set. */
if (o->encoding == OBJ_ENCODING_HT) {
if (dictAdd((dict*)o->ptr,sdsele,NULL) != DICT_OK) {
if (dictAdd((dict*)o->ptr, sdsele, NULL) != DICT_OK) {
rdbReportCorruptRDB("Duplicate set members detected");
decrRefCount(o);
sdsfree(sdsele);
@ -2024,12 +2053,13 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
maxelelen <= server.zset_max_listpack_value &&
lpSafeToAdd(NULL, totelelen))
{
zsetConvert(o,OBJ_ENCODING_LISTPACK);
zsetConvert(o, OBJ_ENCODING_LISTPACK);
}
} else if (rdbtype == RDB_TYPE_HASH) {
uint64_t len;
int ret;
sds field, value;
sds value;
hfield field;
dict *dupSearchDict = NULL;
len = rdbLoadLen(rdb, NULL);
@ -2054,43 +2084,46 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
while (o->encoding == OBJ_ENCODING_LISTPACK && len > 0) {
len--;
/* Load raw strings */
if ((field = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL)) == NULL) {
if ((field = rdbGenericLoadStringObject(rdb,RDB_LOAD_HFLD,NULL)) == NULL) {
decrRefCount(o);
if (dupSearchDict) dictRelease(dupSearchDict);
return NULL;
}
if ((value = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL)) == NULL) {
sdsfree(field);
hfieldFree(field);
decrRefCount(o);
if (dupSearchDict) dictRelease(dupSearchDict);
return NULL;
}
if (dupSearchDict) {
sds field_dup = sdsdup(field);
sds field_dup = sdsnewlen(field, hfieldlen(field));
if (dictAdd(dupSearchDict, field_dup, NULL) != DICT_OK) {
rdbReportCorruptRDB("Hash with dup elements");
dictRelease(dupSearchDict);
decrRefCount(o);
sdsfree(field_dup);
sdsfree(field);
hfieldFree(field);
sdsfree(value);
return NULL;
}
}
/* Convert to hash table if size threshold is exceeded */
if (sdslen(field) > server.hash_max_listpack_value ||
if (hfieldlen(field) > server.hash_max_listpack_value ||
sdslen(value) > server.hash_max_listpack_value ||
!lpSafeToAdd(o->ptr, sdslen(field)+sdslen(value)))
!lpSafeToAdd(o->ptr, hfieldlen(field) + sdslen(value)))
{
hashTypeConvert(o, OBJ_ENCODING_HT);
dictUseStoredKeyApi((dict *)o->ptr, 1);
ret = dictAdd((dict*)o->ptr, field, value);
dictUseStoredKeyApi((dict *)o->ptr, 0);
if (ret == DICT_ERR) {
rdbReportCorruptRDB("Duplicate hash fields detected");
if (dupSearchDict) dictRelease(dupSearchDict);
sdsfree(value);
sdsfree(field);
hfieldFree(field);
decrRefCount(o);
return NULL;
}
@ -2098,10 +2131,10 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
}
/* Add pair to listpack */
o->ptr = lpAppend(o->ptr, (unsigned char*)field, sdslen(field));
o->ptr = lpAppend(o->ptr, (unsigned char*)field, hfieldlen(field));
o->ptr = lpAppend(o->ptr, (unsigned char*)value, sdslen(value));
sdsfree(field);
hfieldFree(field);
sdsfree(value);
}
@ -2113,7 +2146,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
}
if (o->encoding == OBJ_ENCODING_HT && len > DICT_HT_INITIAL_SIZE) {
if (dictTryExpand(o->ptr,len) != DICT_OK) {
if (dictTryExpand(o->ptr, len) != DICT_OK) {
rdbReportCorruptRDB("OOM in dictTryExpand %llu", (unsigned long long)len);
decrRefCount(o);
return NULL;
@ -2124,22 +2157,25 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
while (o->encoding == OBJ_ENCODING_HT && len > 0) {
len--;
/* Load encoded strings */
if ((field = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL)) == NULL) {
if ((field = rdbGenericLoadStringObject(rdb,RDB_LOAD_HFLD,NULL)) == NULL) {
decrRefCount(o);
return NULL;
}
if ((value = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL)) == NULL) {
sdsfree(field);
hfieldFree(field);
decrRefCount(o);
return NULL;
}
/* Add pair to hash table */
ret = dictAdd((dict*)o->ptr, field, value);
dict *d = o->ptr;
dictUseStoredKeyApi(d, 1);
ret = dictAdd(d, field, value);
dictUseStoredKeyApi(d, 0);
if (ret == DICT_ERR) {
rdbReportCorruptRDB("Duplicate hash fields detected");
sdsfree(value);
sdsfree(field);
hfieldFree(field);
decrRefCount(o);
return NULL;
}
@ -2221,7 +2257,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
goto emptykey;
}
listTypeTryConversion(o,LIST_CONV_AUTO,NULL,NULL);
listTypeTryConversion(o, LIST_CONV_AUTO, NULL, NULL);
} else if (rdbtype == RDB_TYPE_HASH_ZIPMAP ||
rdbtype == RDB_TYPE_LIST_ZIPLIST ||
rdbtype == RDB_TYPE_SET_INTSET ||
@ -2236,7 +2272,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
rdbGenericLoadStringObject(rdb,RDB_LOAD_PLAIN,&encoded_len);
if (encoded == NULL) return NULL;
o = createObject(OBJ_STRING,encoded); /* Obj type fixed below. */
o = createObject(OBJ_STRING, encoded); /* Obj type fixed below. */
/* Fix the object encoding, and make sure to convert the encoded
* data type into the base type if accordingly to the current
@ -2292,14 +2328,14 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
o->type = OBJ_HASH;
o->encoding = OBJ_ENCODING_LISTPACK;
if (hashTypeLength(o) > server.hash_max_listpack_entries ||
if (hashTypeLength(o, 0) > server.hash_max_listpack_entries ||
maxlen > server.hash_max_listpack_value)
{
hashTypeConvert(o, OBJ_ENCODING_HT);
}
}
break;
case RDB_TYPE_LIST_ZIPLIST:
case RDB_TYPE_LIST_ZIPLIST:
{
quicklist *ql = quicklistNew(server.list_max_listpack_size,
server.list_compress_depth);
@ -2341,7 +2377,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
o->type = OBJ_SET;
o->encoding = OBJ_ENCODING_INTSET;
if (intsetLen(o->ptr) > server.set_max_intset_entries)
setTypeConvert(o,OBJ_ENCODING_HT);
setTypeConvert(o, OBJ_ENCODING_HT);
break;
case RDB_TYPE_SET_LISTPACK:
if (deep_integrity_validation) server.stat_dump_payload_sanitizations++;
@ -2386,7 +2422,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
}
if (zsetLength(o) > server.zset_max_listpack_entries)
zsetConvert(o,OBJ_ENCODING_SKIPLIST);
zsetConvert(o, OBJ_ENCODING_SKIPLIST);
else
o->ptr = lpShrinkToFit(o->ptr);
break;
@ -2408,7 +2444,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
}
if (zsetLength(o) > server.zset_max_listpack_entries)
zsetConvert(o,OBJ_ENCODING_SKIPLIST);
zsetConvert(o, OBJ_ENCODING_SKIPLIST);
break;
case RDB_TYPE_HASH_ZIPLIST:
{
@ -2426,12 +2462,12 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
o->ptr = lp;
o->type = OBJ_HASH;
o->encoding = OBJ_ENCODING_LISTPACK;
if (hashTypeLength(o) == 0) {
if (hashTypeLength(o, 0) == 0) {
decrRefCount(o);
goto emptykey;
}
if (hashTypeLength(o) > server.hash_max_listpack_entries)
if (hashTypeLength(o, 0) > server.hash_max_listpack_entries)
hashTypeConvert(o, OBJ_ENCODING_HT);
else
o->ptr = lpShrinkToFit(o->ptr);
@ -2448,12 +2484,12 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
}
o->type = OBJ_HASH;
o->encoding = OBJ_ENCODING_LISTPACK;
if (hashTypeLength(o) == 0) {
if (hashTypeLength(o, 0) == 0) {
decrRefCount(o);
goto emptykey;
}
if (hashTypeLength(o) > server.hash_max_listpack_entries)
if (hashTypeLength(o, 0) > server.hash_max_listpack_entries)
hashTypeConvert(o, OBJ_ENCODING_HT);
break;
default:
@ -2540,7 +2576,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
/* Load the last entry ID. */
s->last_id.ms = rdbLoadLen(rdb,NULL);
s->last_id.seq = rdbLoadLen(rdb,NULL);
if (rdbtype >= RDB_TYPE_STREAM_LISTPACKS_2) {
/* Load the first entry ID. */
s->first_id.ms = rdbLoadLen(rdb,NULL);
@ -2559,9 +2595,9 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
s->max_deleted_entry_id.ms = 0;
s->max_deleted_entry_id.seq = 0;
s->entries_added = s->length;
/* Since the rax is already loaded, we can find the first entry's
* ID. */
* ID. */
streamGetEdgeID(s,1,1,&s->first_id);
}
@ -2807,7 +2843,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
uint64_t eof = rdbLoadLen(rdb,NULL);
if (eof == RDB_LENERR) {
if (ptr) {
o = createModuleObject(mt,ptr); /* creating just in order to easily destroy */
o = createModuleObject(mt, ptr); /* creating just in order to easily destroy */
decrRefCount(o);
}
return NULL;
@ -2816,7 +2852,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
rdbReportCorruptRDB("The RDB file contains module data for the module '%s' that is not terminated by "
"the proper module value EOF marker", moduleTypeModuleName(mt));
if (ptr) {
o = createModuleObject(mt,ptr); /* creating just in order to easily destroy */
o = createModuleObject(mt, ptr); /* creating just in order to easily destroy */
decrRefCount(o);
}
return NULL;
@ -2828,7 +2864,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
moduleTypeModuleName(mt));
return NULL;
}
o = createModuleObject(mt,ptr);
o = createModuleObject(mt, ptr);
} else {
rdbReportReadError("Unknown RDB encoding type %d",rdbtype);
return NULL;
@ -3256,8 +3292,8 @@ int rdbLoadRioWithLoadingCtx(rio *rdb, int rdbflags, rdbSaveInfo *rsi, rdbLoadin
* received from the master. In the latter case, the master is
* responsible for key expiry. If we would expire keys here, the
* snapshot taken by the master may not be reflected on the slave.
* Similarly, if the base AOF is RDB format, we want to load all
* the keys they are, since the log of operations in the incr AOF
* Similarly, if the base AOF is RDB format, we want to load all
* the keys they are, since the log of operations in the incr AOF
* is assumed to work in the exact keyspace state. */
if (val == NULL) {
/* Since we used to have bug that could lead to empty keys

View File

@ -105,6 +105,7 @@
#define RDB_LOAD_ENC (1<<0)
#define RDB_LOAD_PLAIN (1<<1)
#define RDB_LOAD_SDS (1<<2)
#define RDB_LOAD_HFLD (1<<3)
/* flags on the purpose of rdb save or load */
#define RDBFLAGS_NONE 0 /* No special RDB loading or saving. */

View File

@ -19,6 +19,8 @@
#include "syscheck.h"
#include "threads_mngr.h"
#include "fmtargs.h"
#include "mstr.h"
#include "ebuckets.h"
#include <time.h>
#include <signal.h>
@ -281,6 +283,18 @@ int dictSdsKeyCompare(dict *d, const void *key1,
return memcmp(key1, key2, l1) == 0;
}
int dictSdsMstrKeyCompare(dict *d, const void *sdsLookup, const void *mstrStored)
{
int l1,l2;
UNUSED(d);
l1 = sdslen((sds)sdsLookup);
l2 = hfieldlen((hfield)mstrStored);
if (l1 != l2) return 0;
return memcmp(sdsLookup, mstrStored, l1) == 0;
}
/* A case insensitive version used for the command lookup table and other
* places where case insensitive non binary-safe comparison is needed. */
int dictSdsKeyCaseCompare(dict *d, const void *key1,
@ -2500,6 +2514,7 @@ void resetServerStats(void) {
server.stat_numcommands = 0;
server.stat_numconnections = 0;
server.stat_expiredkeys = 0;
server.stat_expired_hash_fields = 0;
server.stat_expired_stale_perc = 0;
server.stat_expired_time_cap_reached_count = 0;
server.stat_expire_cycle_time_used = 0;
@ -2648,6 +2663,7 @@ void initServer(void) {
for (j = 0; j < server.dbnum; j++) {
server.db[j].keys = kvstoreCreate(&dbDictType, slot_count_bits, flags);
server.db[j].expires = kvstoreCreate(&dbExpiresDictType, slot_count_bits, flags);
server.db[j].hexpires = ebCreate();
server.db[j].expires_cursor = 0;
server.db[j].blocking_keys = dictCreate(&keylistDictType);
server.db[j].blocking_keys_unblock_on_nokey = dictCreate(&objectKeyPointerValueDictType);
@ -5849,6 +5865,7 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
"sync_full:%lld\r\n", server.stat_sync_full,
"sync_partial_ok:%lld\r\n", server.stat_sync_partial_ok,
"sync_partial_err:%lld\r\n", server.stat_sync_partial_err,
"expired_hash_fields:%lld\r\n", server.stat_expired_hash_fields,
"expired_keys:%lld\r\n", server.stat_expiredkeys,
"expired_stale_perc:%.2f\r\n", server.stat_expired_stale_perc*100,
"expired_time_cap_reached_count:%lld\r\n", server.stat_expired_time_cap_reached_count,
@ -6862,9 +6879,11 @@ struct redisTest {
{"crc64", crc64Test},
{"zmalloc", zmalloc_test},
{"sds", sdsTest},
{"mstr", mstrTest},
{"dict", dictTest},
{"listpack", listpackTest},
{"kvstore", kvstoreTest},
{"ebuckets", ebucketsTest},
};
redisTestProc *getTestProcByName(const char *name) {
int numtests = sizeof(redisTests)/sizeof(struct redisTest);
@ -6891,6 +6910,7 @@ int main(int argc, char **argv) {
if (!strcasecmp(arg, "--accurate")) flags |= REDIS_TEST_ACCURATE;
else if (!strcasecmp(arg, "--large-memory")) flags |= REDIS_TEST_LARGE_MEMORY;
else if (!strcasecmp(arg, "--valgrind")) flags |= REDIS_TEST_VALGRIND;
else if (!strcasecmp(arg, "--verbose")) flags |= REDIS_TEST_VERBOSE;
}
if (!strcasecmp(argv[2], "all")) {

View File

@ -45,6 +45,8 @@ typedef long long ustime_t; /* microsecond time type. */
#include "ae.h" /* Event driven programming library */
#include "sds.h" /* Dynamic safe strings */
#include "mstr.h" /* Immutable strings with optional metadata attached */
#include "ebuckets.h" /* expiry data structure */
#include "dict.h" /* Hash tables */
#include "kvstore.h" /* Slot-based hash table */
#include "adlist.h" /* Linked lists */
@ -960,6 +962,7 @@ typedef struct replBufBlock {
typedef struct redisDb {
kvstore *keys; /* The keyspace for this DB */
kvstore *expires; /* Timeout of keys with a timeout set */
ebuckets hexpires; /* Hash expiration DS. Single TTL per hash (of next min field to expire) */
dict *blocking_keys; /* Keys with clients waiting for data (BLPOP)*/
dict *blocking_keys_unblock_on_nokey; /* Keys with clients waiting for
* data, and should be unblocked if key is deleted (XREADEDGROUP).
@ -1642,6 +1645,7 @@ struct redisServer {
long long stat_numcommands; /* Number of processed commands */
long long stat_numconnections; /* Number of connections received */
long long stat_expiredkeys; /* Number of expired keys */
long long stat_expired_hash_fields; /* Number of expired hash-fields */
double stat_expired_stale_perc; /* Percentage of keys probably expired */
long long stat_expired_time_cap_reached_count; /* Early expire cycle stops.*/
long long stat_expire_cycle_time_used; /* Cumulative microseconds used. */
@ -2444,6 +2448,10 @@ typedef struct {
#define IO_THREADS_OP_WRITE 2
extern int io_threads_op;
/* Hash-field data type (of t_hash.c) */
typedef mstr hfield;
extern mstrKind mstrFieldKind;
/*-----------------------------------------------------------------------------
* Extern declarations
*----------------------------------------------------------------------------*/
@ -2458,6 +2466,8 @@ extern dictType zsetDictType;
extern dictType dbDictType;
extern double R_Zero, R_PosInf, R_NegInf, R_Nan;
extern dictType hashDictType;
extern dictType mstrHashDictType;
extern dictType mstrHashDictTypeWithHFE;
extern dictType stringSetDictType;
extern dictType externalStringType;
extern dictType sdsHashDictType;
@ -2469,6 +2479,9 @@ extern dictType sdsReplyDictType;
extern dictType keylistDictType;
extern dict *modules;
extern EbucketsType hashExpireBucketsType; /* global expires */
extern EbucketsType hashFieldExpiresBucketType; /* local per hash */
/*-----------------------------------------------------------------------------
* Functions prototypes
*----------------------------------------------------------------------------*/
@ -2611,6 +2624,7 @@ void copyReplicaOutputBuffer(client *dst, client *src);
void addListRangeReply(client *c, robj *o, long start, long end, int reverse);
void deferredAfterErrorReply(client *c, list *errors);
size_t sdsZmallocSize(sds s);
size_t hfieldZmallocSize(hfield s);
size_t getStringObjectSdsUsedMemory(robj *o);
void freeClientReplyValue(void *o);
void *dupClientReplyValue(void *o);
@ -3144,21 +3158,35 @@ void hashTypeConvert(robj *o, int enc);
void hashTypeTryConversion(robj *subject, robj **argv, int start, int end);
int hashTypeExists(robj *o, sds key);
int hashTypeDelete(robj *o, sds key);
unsigned long hashTypeLength(const robj *o);
unsigned long hashTypeLength(const robj *o, int subtractExpiredFields);
hashTypeIterator *hashTypeInitIterator(robj *subject);
void hashTypeReleaseIterator(hashTypeIterator *hi);
int hashTypeNext(hashTypeIterator *hi);
int hashTypeNext(hashTypeIterator *hi, int skipExpiredFields);
void hashTypeCurrentFromListpack(hashTypeIterator *hi, int what,
unsigned char **vstr,
unsigned int *vlen,
long long *vll);
sds hashTypeCurrentFromHashTable(hashTypeIterator *hi, int what);
void hashTypeCurrentObject(hashTypeIterator *hi, int what, unsigned char **vstr, unsigned int *vlen, long long *vll);
void hashTypeCurrentFromHashTable(hashTypeIterator *hi, int what, char **str,
size_t *len, uint64_t *expireTime);
void hashTypeCurrentObject(hashTypeIterator *hi, int what, unsigned char **vstr,
unsigned int *vlen, long long *vll, uint64_t *expireTime);
sds hashTypeCurrentObjectNewSds(hashTypeIterator *hi, int what);
robj *hashTypeLookupWriteOrCreate(client *c, robj *key);
hfield hashTypeCurrentObjectNewHfield(hashTypeIterator *hi);
robj *hashTypeGetValueObject(robj *o, sds field);
int hashTypeSet(robj *o, sds field, sds value, int flags);
robj *hashTypeDup(robj *o);
int hashTypeSet(redisDb *db, robj *o, sds field, sds value, int flags);
robj *hashTypeDup(robj *o, sds newkey, uint64_t *minHashExpire);
uint64_t hashTypeRemoveFromExpires(ebuckets *hexpires, robj *o);
void hashTypeAddToExpires(redisDb *db, sds key, robj *hashObj, uint64_t expireTime);
int64_t hashTypeGetMinExpire(robj *keyObj);
/* Hash-Field data type (of t_hash.c) */
hfield hfieldNew(const void *field, size_t fieldlen, int withExpireMeta);
hfield hfieldTryNew(const void *field, size_t fieldlen, int withExpireMeta);
int hfieldIsExpireAttached(hfield field);
int hfieldIsExpired(hfield field);
static inline void hfieldFree(hfield field) { mstrFree(&mstrFieldKind, field); }
static inline void *hfieldGetAllocPtr(hfield field) { return mstrGetAllocPtr(&mstrFieldKind, field); }
static inline size_t hfieldlen(hfield field) { return mstrlen(field);}
/* Pub / Sub */
int pubsubUnsubscribeAllChannels(client *c, int notify);
@ -3177,7 +3205,7 @@ dict *getClientPubSubChannels(client *c);
dict *getClientPubSubShardChannels(client *c);
/* Keyspace events notification */
void notifyKeyspaceEvent(int type, char *event, robj *key, int dbid);
void notifyKeyspaceEvent(int type, const char *event, robj *key, int dbid);
int keyspaceEventsStringToFlags(char *classes);
sds keyspaceEventsFlagsToString(int flags);
@ -3261,6 +3289,7 @@ int keyIsExpired(redisDb *db, robj *key);
long long getExpire(redisDb *db, robj *key);
void setExpire(client *c, redisDb *db, robj *key, long long when);
int checkAlreadyExpired(long long when);
int parseExtendedExpireArgumentsOrReply(client *c, int *flags);
robj *lookupKeyRead(redisDb *db, robj *key);
robj *lookupKeyWrite(redisDb *db, robj *key);
robj *lookupKeyReadOrReply(client *c, robj *key, robj *reply);
@ -3279,7 +3308,7 @@ int objectSetLRUOrLFU(robj *val, long long lfu_freq, long long lru_idle,
#define LOOKUP_NOEXPIRE (1<<4) /* Avoid deleting lazy expired keys. */
#define LOOKUP_NOEFFECTS (LOOKUP_NONOTIFY | LOOKUP_NOSTATS | LOOKUP_NOTOUCH | LOOKUP_NOEXPIRE) /* Avoid any effects from fetching the key */
void dbAdd(redisDb *db, robj *key, robj *val);
dictEntry *dbAdd(redisDb *db, robj *key, robj *val);
int dbAddRDBLoad(redisDb *db, sds key, robj *val);
void dbReplaceValue(redisDb *db, robj *key, robj *val);
@ -3434,6 +3463,7 @@ void expireSlaveKeys(void);
void rememberSlaveKeyWithExpire(redisDb *db, robj *key);
void flushSlaveKeysWithExpireList(void);
size_t getSlaveKeyWithExpireCount(void);
uint64_t hashTypeDbActiveExpire(redisDb *db, uint32_t maxFieldsToExpire);
/* evict.c -- maxmemory handling and LRU eviction. */
void evictionPoolAlloc(void);
@ -3451,6 +3481,7 @@ void startEvictionTimeProc(void);
uint64_t dictSdsHash(const void *key);
uint64_t dictSdsCaseHash(const void *key);
int dictSdsKeyCompare(dict *d, const void *key1, const void *key2);
int dictSdsMstrKeyCompare(dict *d, const void *sdsLookup, const void *mstrStored);
int dictSdsKeyCaseCompare(dict *d, const void *key1, const void *key2);
void dictSdsDestructor(dict *d, void *val);
void dictListDestructor(dict *d, void *val);
@ -3606,6 +3637,15 @@ void strlenCommand(client *c);
void zrankCommand(client *c);
void zrevrankCommand(client *c);
void hsetCommand(client *c);
void hpexpireCommand(client *c);
void hexpireCommand(client *c);
void hpexpireatCommand(client *c);
void hexpireatCommand(client *c);
void httlCommand(client *c);
void hpttlCommand(client *c);
void hexpiretimeCommand(client *c);
void hpexpiretimeCommand(client *c);
void hpersistCommand(client *c);
void hsetnxCommand(client *c);
void hgetCommand(client *c);
void hmgetCommand(client *c);

File diff suppressed because it is too large Load Diff

View File

@ -21,6 +21,8 @@
#define REDIS_TEST_ACCURATE (1<<0)
#define REDIS_TEST_LARGE_MEMORY (1<<1)
#define REDIS_TEST_VALGRIND (1<<2)
#define REDIS_TEST_VERBOSE (1<<3)
extern int __failed_tests;
extern int __test_num;

View File

@ -34,6 +34,7 @@ set ::all_tests {
unit/type/set
unit/type/zset
unit/type/hash
unit/type/hash-field-expire
unit/type/stream
unit/type/stream-cgroups
unit/sort

View File

@ -0,0 +1,616 @@
######## HEXPIRE family commands
# Field does not exists
set E_NO_FIELD -2
# Specified NX | XX | GT | LT condition not met
set E_FAIL 0
# expiration time set/updated
set E_OK 1
# Field deleted because the specified expiration time is in the past
set E_DELETED 2
######## HTTL family commands
set T_NO_FIELD -2
set T_NO_EXPIRY -1
######## HPERIST
set P_NO_FIELD -2
set P_NO_EXPIRY -1
set P_OK 1
############################### AUX FUNCS ######################################
proc create_hash {key entries} {
r del $key
foreach entry $entries {
r hset $key [lindex $entry 0] [lindex $entry 1]
}
}
proc get_keys {l} {
set res {}
foreach entry $l {
set key [lindex $entry 0]
lappend res $key
}
return $res
}
proc cmp_hrandfield_result {hash_name expected_result} {
# Accumulate hrandfield results
unset -nocomplain myhash
array set myhash {}
for {set i 0} {$i < 100} {incr i} {
set key [r hrandfield $hash_name]
set myhash($key) 1
}
set res [lsort [array names myhash]]
if {$res eq $expected_result} {
return 1
} else {
return $res
}
}
proc hrandfieldTest {activeExpireConfig} {
r debug set-active-expire $activeExpireConfig
r del myhash
set contents {{field1 1} {field2 2} }
create_hash myhash $contents
set factorValgrind [expr {$::valgrind ? 2 : 1}]
# Set expiration time for field1 and field2 such that field1 expires first
r hpexpire myhash 1 NX 1 field1
r hpexpire myhash 100 NX 1 field2
# On call hrandfield command lazy expire deletes field1 first
wait_for_condition 8 10 {
[cmp_hrandfield_result myhash "field2"] == 1
} else {
fail "Expected field2 to be returned by HRANDFIELD."
}
# On call hrandfield command lazy expire deletes field2 as well
wait_for_condition 8 20 {
[cmp_hrandfield_result myhash "{}"] == 1
} else {
fail "Expected {} to be returned by HRANDFIELD."
}
# restore the default value
r debug set-active-expire 1
}
############################### TESTS #########################################
start_server {tags {"external:skip needs:debug"}} {
# Currently listpack doesn't support HFE
r config set hash-max-listpack-entries 0
test {HPEXPIRE - Test 'NX' flag} {
r del myhash
r hset myhash field1 value1 field2 value2 field3 value3
assert_equal [r hpexpire myhash 1000 NX 1 field1] [list $E_OK]
assert_equal [r hpexpire myhash 1000 NX 2 field1 field2] [list $E_FAIL $E_OK]
}
test {HPEXPIRE - Test 'XX' flag} {
r del myhash
r hset myhash field1 value1 field2 value2 field3 value3
assert_equal [r hpexpire myhash 1000 NX 2 field1 field2] [list $E_OK $E_OK]
assert_equal [r hpexpire myhash 1000 XX 2 field1 field3] [list $E_OK $E_FAIL]
}
test {HPEXPIRE - Test 'GT' flag} {
r del myhash
r hset myhash field1 value1 field2 value2
assert_equal [r hpexpire myhash 1000 NX 1 field1] [list $E_OK]
assert_equal [r hpexpire myhash 2000 NX 1 field2] [list $E_OK]
assert_equal [r hpexpire myhash 1500 GT 2 field1 field2] [list $E_OK $E_FAIL]
}
test {HPEXPIRE - Test 'LT' flag} {
r del myhash
r hset myhash field1 value1 field2 value2
assert_equal [r hpexpire myhash 1000 NX 1 field1] [list $E_OK]
assert_equal [r hpexpire myhash 2000 NX 1 field2] [list $E_OK]
assert_equal [r hpexpire myhash 1500 LT 2 field1 field2] [list $E_FAIL $E_OK]
}
test {HPEXPIREAT - field not exists or TTL is in the past} {
r del myhash
r hset myhash f1 v1 f2 v2 f4 v4
r hexpire myhash 1000 NX 1 f4
assert_equal [r hexpireat myhash [expr {[clock seconds] - 1}] NX 4 f1 f2 f3 f4] "$E_DELETED $E_DELETED $E_NO_FIELD $E_FAIL"
assert_equal [r hexists myhash field1] 0
}
test {HPEXPIRE - wrong number of arguments} {
r del myhash
r hset myhash f1 v1
assert_error {*Parameter `numFields` should be greater than 0} {r hpexpire myhash 1000 NX 0 f1 f2 f3}
assert_error {*Parameter `numFileds` is more than number of arguments} {r hpexpire myhash 1000 NX 4 f1 f2 f3}
}
test {HPEXPIRE - parameter expire-time near limit of 2^48} {
r del myhash
r hset myhash f1 v1
# below & above
assert_equal [r hpexpire myhash [expr (1<<48) - [clock milliseconds] - 1000 ] 1 f1] [list $E_OK]
assert_error {*invalid expire time*} {r hpexpire myhash [expr (1<<48) - [clock milliseconds] + 100 ] 1 f1}
}
test {Lazy - doesn't delete hash that all its fields got expired} {
r debug set-active-expire 0
r flushall
set hash_sizes {1 15 16 17 31 32 33 40}
foreach h $hash_sizes {
for {set i 1} {$i <= $h} {incr i} {
# random expiration time
r hset hrand$h f$i v$i
r hpexpire hrand$h [expr {50 + int(rand() * 50)}] 1 f$i
assert_equal 1 [r HEXISTS hrand$h f$i]
# same expiration time
r hset same$h f$i v$i
r hpexpire same$h 100 1 f$i
assert_equal 1 [r HEXISTS same$h f$i]
# same expiration time
r hset mix$h f$i v$i fieldWithoutExpire$i v$i
r hpexpire mix$h 100 1 f$i
assert_equal 1 [r HEXISTS mix$h f$i]
}
}
after 150
# Verify that all fields got expired but keys wasn't lazy deleted
foreach h $hash_sizes {
for {set i 1} {$i <= $h} {incr i} {
assert_equal 0 [r HEXISTS mix$h f$i]
}
assert_equal 1 [r EXISTS hrand$h]
assert_equal 1 [r EXISTS same$h]
assert_equal [expr $h * 2] [r HLEN mix$h]
}
# Restore default
r debug set-active-expire 1
}
test {Active - deletes hash that all its fields got expired} {
r flushall
set hash_sizes {1 15 16 17 31 32 33 40}
foreach h $hash_sizes {
for {set i 1} {$i <= $h} {incr i} {
# random expiration time
r hset hrand$h f$i v$i
r hpexpire hrand$h [expr {50 + int(rand() * 50)}] 1 f$i
assert_equal 1 [r HEXISTS hrand$h f$i]
# same expiration time
r hset same$h f$i v$i
r hpexpire same$h 100 1 f$i
assert_equal 1 [r HEXISTS same$h f$i]
# same expiration time
r hset mix$h f$i v$i fieldWithoutExpire$i v$i
r hpexpire mix$h 100 1 f$i
assert_equal 1 [r HEXISTS mix$h f$i]
}
}
# Wait for active expire
wait_for_condition 50 20 { [r EXISTS same40] == 0 } else { fail "hash `same40` should be expired" }
# Verify that all fields got expired and keys got deleted
foreach h $hash_sizes {
for {set i 1} {$i <= $h} {incr i} {
assert_equal 0 [r HEXISTS mix$h f$i]
}
assert_equal 0 [r EXISTS hrand$h]
assert_equal 0 [r EXISTS same$h]
assert_equal $h [r HLEN mix$h]
}
}
test {HPEXPIRE - Flushall deletes all pending expired fields} {
r del myhash
r hset myhash field1 value1 field2 value2
r hpexpire myhash 10000 NX 1 field1
r hpexpire myhash 10000 NX 1 field2
r flushall
r del myhash
r hset myhash field1 value1 field2 value2
r hpexpire myhash 10000 NX 1 field1
r hpexpire myhash 10000 NX 1 field2
r flushall async
}
test {HTTL/HPTTL - Input validation gets failed on nonexists field or field without expire} {
r del myhash
r HSET myhash field1 value1 field2 value2
r HPEXPIRE myhash 1000 NX 1 field1
foreach cmd {HTTL HPTTL} {
assert_equal [r $cmd non_exists_key 1 f] {}
assert_equal [r $cmd myhash 2 field2 non_exists_field] "$T_NO_EXPIRY $T_NO_FIELD"
# Set numFields less than actual number of fields. Fine.
assert_equal [r $cmd myhash 1 non_exists_field1 non_exists_field2] "$T_NO_FIELD"
}
}
test {HTTL/HPTTL - returns time to live in seconds/msillisec} {
r del myhash
r HSET myhash field1 value1 field2 value2
r HPEXPIRE myhash 2000 NX 2 field1 field2
set ttlArray [r HTTL myhash 2 field1 field2]
assert_range [lindex $ttlArray 0] 1 2
set ttl [r HPTTL myhash 1 field1]
assert_range $ttl 1000 2000
}
test {HEXPIRETIME - returns TTL in Unix timestamp} {
r del myhash
r HSET myhash field1 value1
r HPEXPIRE myhash 1000 NX 1 field1
set lo [expr {[clock seconds] + 1}]
set hi [expr {[clock seconds] + 2}]
assert_range [r HEXPIRETIME myhash 1 field1] $lo $hi
assert_range [r HPEXPIRETIME myhash 1 field1] [expr $lo*1000] [expr $hi*1000]
}
test {HTTL/HPTTL - Verify TTL progress until expiration} {
r del myhash
r hset myhash field1 value1 field2 value2
r hpexpire myhash 200 NX 1 field1
assert_range [r HPTTL myhash 1 field1] 100 200
assert_range [r HTTL myhash 1 field1] 0 1
after 100
assert_range [r HPTTL myhash 1 field1] 1 101
after 110
assert_equal [r HPTTL myhash 1 field1] $T_NO_FIELD
assert_equal [r HTTL myhash 1 field1] $T_NO_FIELD
}
test {HPEXPIRE - DEL hash with non expired fields (valgrind test)} {
r del myhash
r hset myhash field1 value1 field2 value2
r hpexpire myhash 10000 NX 1 field1
r del myhash
}
test {HEXPIREAT - Set time in the past} {
r del myhash
r hset myhash field1 value1
assert_equal [r hexpireat myhash [expr {[clock seconds] - 1}] NX 1 field1] $E_DELETED
assert_equal [r hexists myhash field1] 0
}
test {HEXPIREAT - Set time and then get TTL} {
r del myhash
r hset myhash field1 value1
r hexpireat myhash [expr {[clock seconds] + 2}] NX 1 field1
assert_range [r hpttl myhash 1 field1] 1000 2000
assert_range [r httl myhash 1 field1] 1 2
r hexpireat myhash [expr {[clock seconds] + 5}] XX 1 field1
assert_range [r httl myhash 1 field1] 4 5
}
test {Lazy expire - delete hash with expired fields} {
r del myhash
r debug set-active-expire 0
r hset myhash k v
r hpexpire myhash 1 NX 1 k
after 5
r del myhash
r debug set-active-expire 1
}
# OPEN: To decide if to delete expired fields at start of HRANDFIELD.
# test {Test HRANDFIELD does not return expired fields} {
# hrandfieldTest 0
# hrandfieldTest 1
# }
test {Test HRANDFIELD can return expired fields} {
r debug set-active-expire 0
r del myhash
r hset myhash f1 v1 f2 v2 f3 v3 f4 v4 f5 v5
r hpexpire myhash 1 NX 4 f1 f2 f3 f4
after 5
set res [cmp_hrandfield_result myhash "f1 f2 f3 f4 f5"]
assert {$res == 1}
r debug set-active-expire 1
}
test {Lazy expire - HLEN does count expired fields} {
# Enforce only lazy expire
r debug set-active-expire 0
r del h1 h4 h18 h20
r hset h1 k1 v1
r hpexpire h1 1 NX 1 k1
r hset h4 k1 v1 k2 v2 k3 v3 k4 v4
r hpexpire h4 1 NX 3 k1 k3 k4
# beyond 16 fields: HFE DS (ebuckets) converts from list to rax
r hset h18 k1 v1 k2 v2 k3 v3 k4 v4 k5 v5 k6 v6 k7 v7 k8 v8 k9 v9 k10 v10 k11 v11 k12 v12 k13 v13 k14 v14 k15 v15 k16 v16 k17 v17 k18 v18
r hpexpire h18 1 NX 18 k1 k2 k3 k4 k5 k6 k7 k8 k9 k10 k11 k12 k13 k14 k15 k16 k17 k18
r hset h20 k1 v1 k2 v2 k3 v3 k4 v4 k5 v5 k6 v6 k7 v7 k8 v8 k9 v9 k10 v10 k11 v11 k12 v12 k13 v13 k14 v14 k15 v15 k16 v16 k17 v17 k18 v18 k19 v19 k20 v20
r hpexpire h20 1 NX 2 k1 k2
after 10
assert_equal [r hlen h1] 1
assert_equal [r hlen h4] 4
assert_equal [r hlen h18] 18
assert_equal [r hlen h20] 20
# Restore to support active expire
r debug set-active-expire 1
}
test {Lazy expire - HSCAN does not report expired fields} {
# Enforce only lazy expire
r debug set-active-expire 0
r del h1 h20 h4 h18 h20
r hset h1 01 01
r hpexpire h1 1 NX 1 01
r hset h4 01 01 02 02 03 03 04 04
r hpexpire h4 1 NX 3 01 03 04
# beyond 16 fields hash-field expiration DS (ebuckets) converts from list to rax
r hset h18 01 01 02 02 03 03 04 04 05 05 06 06 07 07 08 08 09 09 10 10 11 11 12 12 13 13 14 14 15 15 16 16 17 17 18 18
r hpexpire h18 1 NX 18 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18
r hset h20 01 01 02 02 03 03 04 04 05 05 06 06 07 07 08 08 09 09 10 10 11 11 12 12 13 13 14 14 15 15 16 16 17 17 18 18 19 19 20 20
r hpexpire h20 1 NX 2 01 02
after 10
# Verify SCAN does not report expired fields
assert_equal [lsort -unique [lindex [r hscan h1 0 COUNT 10] 1]] ""
assert_equal [lsort -unique [lindex [r hscan h4 0 COUNT 10] 1]] "02"
assert_equal [lsort -unique [lindex [r hscan h18 0 COUNT 10] 1]] ""
assert_equal [lsort -unique [lindex [r hscan h20 0 COUNT 100] 1]] "03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20"
# Restore to support active expire
r debug set-active-expire 1
}
test {Test HSCAN with mostly expired fields return empty result} {
r debug set-active-expire 0
# Create hash with 1000 fields and 999 of them will be expired
r del myhash
for {set i 1} {$i <= 1000} {incr i} {
r hset myhash field$i value$i
if {$i > 1} {
r hpexpire myhash 1 NX 1 field$i
}
}
after 3
# Verify iterative HSCAN returns either empty result or only the first field
set countEmptyResult 0
set cur 0
while 1 {
set res [r hscan myhash $cur]
set cur [lindex $res 0]
# if the result is not empty, it should contain only the first field
if {[llength [lindex $res 1]] > 0} {
assert_equal [lindex $res 1] "field1 value1"
} else {
incr countEmptyResult
}
if {$cur == 0} break
}
assert {$countEmptyResult > 0}
r debug set-active-expire 1
}
test {Lazy expire - verify various HASH commands handling expired fields} {
# Enforce only lazy expire
r debug set-active-expire 0
r del h1 h2 h3 h4 h5 h18
r hset h1 01 01
r hset h2 01 01 02 02
r hset h3 01 01 02 02 03 03
r hset h4 1 99 2 99 3 99 4 99
r hset h5 1 1 2 22 3 333 4 4444 5 55555
r hset h6 01 01 02 02 03 03 04 04 05 05 06 06
r hset h18 01 01 02 02 03 03 04 04 05 05 06 06 07 07 08 08 09 09 10 10 11 11 12 12 13 13 14 14 15 15 16 16 17 17 18 18
r hpexpire h1 100 NX 1 01
r hpexpire h2 100 NX 1 01
r hpexpire h2 100 NX 1 02
r hpexpire h3 100 NX 1 01
r hpexpire h4 100 NX 1 2
r hpexpire h5 100 NX 1 3
r hpexpire h6 100 NX 1 05
r hpexpire h18 100 NX 17 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17
after 150
# Verify HDEL not ignore expired field. It is too much overhead to check
# if the field is expired before deletion.
assert_equal [r HDEL h1 01] "1"
# Verify HGET ignore expired field
assert_equal [r HGET h2 01] ""
assert_equal [r HGET h2 02] ""
assert_equal [r HGET h3 01] ""
assert_equal [r HGET h3 02] "02"
assert_equal [r HGET h3 03] "03"
# Verify HINCRBY ignore expired field
assert_equal [r HINCRBY h4 2 1] "1"
assert_equal [r HINCRBY h4 3 1] "100"
# Verify HSTRLEN ignore expired field
assert_equal [r HSTRLEN h5 3] "0"
assert_equal [r HSTRLEN h5 4] "4"
assert_equal [lsort [r HKEYS h6]] "01 02 03 04 06"
# Verify HEXISTS ignore expired field
assert_equal [r HEXISTS h18 07] "0"
assert_equal [r HEXISTS h18 18] "1"
# Verify HVALS ignore expired field
assert_equal [lsort [r HVALS h18]] "18"
# Restore to support active expire
r debug set-active-expire 1
}
test {A field with TTL overridden with another value (TTL discarded)} {
r del myhash
r hset myhash field1 value1
r hpexpire myhash 1 NX 1 field1
r hset myhash field1 value2
after 5
# Expected TTL will be discarded
assert_equal [r hget myhash field1] "value2"
}
test {Modify TTL of a field} {
r del myhash
r hset myhash field1 value1
r hpexpire myhash 200 NX 1 field1
r hpexpire myhash 1000 XX 1 field1
after 15
assert_equal [r hget myhash field1] "value1"
assert_range [r hpttl myhash 1 field1] 900 1000
}
test {Test HGETALL not return expired fields} {
# Test with small hash
r debug set-active-expire 0
r del myhash
r hset myhash1 f1 v1 f2 v2 f3 v3 f4 v4 f5 v5
r hpexpire myhash1 1 NX 2 f2 f4
after 10
assert_equal [lsort [r hgetall myhash1]] "f1 f3 f5 v1 v3 v5"
# Test with large hash
r del myhash
for {set i 1} {$i <= 600} {incr i} {
r hset myhash f$i v$i
if {$i > 3} { r hpexpire myhash 1 NX 1 f$i }
}
after 10
assert_equal [lsort [r hgetall myhash]] [lsort "f1 f2 f3 v1 v2 v3"]
r debug set-active-expire 1
}
test {Test RENAME hash with fields to be expired} {
r debug set-active-expire 0
r del myhash
r hset myhash field1 value1
r hpexpire myhash 20 NX 1 field1
r rename myhash myhash2
assert_equal [r exists myhash] 0
assert_range [r hpttl myhash2 1 field1] 1 20
after 25
# Verify the renamed key exists
assert_equal [r exists myhash2] 1
r debug set-active-expire 1
# Only active expire will delete the key
wait_for_condition 30 10 { [r exists myhash2] == 0 } else { fail "`myhash2` should be expired" }
}
test {MOVE to another DB hash with fields to be expired} {
r select 9
r flushall
r hset myhash field1 value1
r hpexpire myhash 100 NX 1 field1
r move myhash 10
assert_equal [r exists myhash] 0
assert_equal [r dbsize] 0
# Verify the key and its field exists in the target DB
r select 10
assert_equal [r hget myhash field1] "value1"
assert_equal [r exists myhash] 1
# Eventually the field will be expired and the key will be deleted
wait_for_condition 40 10 { [r hget myhash field1] == "" } else { fail "`field1` should be expired" }
wait_for_condition 40 10 { [r exists myhash] == 0 } else { fail "db should be empty" }
} {} {singledb:skip}
test {Test COPY hash with fields to be expired} {
r flushall
r hset h1 f1 v1 f2 v2
r hset h2 f1 v1 f2 v2 f3 v3 f4 v4 f5 v5 f6 v6 f7 v7 f8 v8 f9 v9 f10 v10 f11 v11 f12 v12 f13 v13 f14 v14 f15 v15 f16 v16 f17 v17 f18 v18
r hpexpire h1 100 NX 1 f1
r hpexpire h2 100 NX 18 f1 f2 f3 f4 f5 f6 f7 f8 f9 f10 f11 f12 f13 f14 f15 f16 f17 f18
r COPY h1 h1copy
r COPY h2 h2copy
assert_equal [r hget h1 f1] "v1"
assert_equal [r hget h1copy f1] "v1"
assert_equal [r exists h2] 1
assert_equal [r exists h2copy] 1
after 105
# Verify lazy expire of field in h1 and its copy
assert_equal [r hget h1 f1] ""
assert_equal [r hget h1copy f1] ""
# Verify lazy expire of field in h2 and its copy. Verify the key deleted as well.
wait_for_condition 40 10 { [r exists h2] == 0 } else { fail "`h2` should be expired" }
wait_for_condition 40 10 { [r exists h2copy] == 0 } else { fail "`h2copy` should be expired" }
} {} {singledb:skip}
test {Test SWAPDB hash-fields to be expired} {
r select 9
r flushall
r hset myhash field1 value1
r hpexpire myhash 50 NX 1 field1
r swapdb 9 10
# Verify the key and its field doesn't exist in the source DB
assert_equal [r exists myhash] 0
assert_equal [r dbsize] 0
# Verify the key and its field exists in the target DB
r select 10
assert_equal [r hget myhash field1] "value1"
assert_equal [r dbsize] 1
# Eventually the field will be expired and the key will be deleted
wait_for_condition 20 10 { [r exists myhash] == 0 } else { fail "'myhash' should be expired" }
} {} {singledb:skip}
test {HPERSIST - input validation} {
# HPERSIST key <num-fields> <field [field ...]>
r del myhash
r hset myhash f1 v1 f2 v2
r hexpire myhash 1000 NX 1 f1
assert_error {*wrong number of arguments*} {r hpersist myhash}
assert_error {*wrong number of arguments*} {r hpersist myhash 1}
assert_equal [r hpersist not-exists-key 1 f1] {}
assert_equal [r hpersist myhash 2 f1 not-exists-field] "$P_OK $P_NO_FIELD"
assert_equal [r hpersist myhash 1 f2] "$P_NO_EXPIRY"
}
test {HPERSIST - verify fields with TTL are persisted} {
r del myhash
r hset myhash f1 v1 f2 v2
r hexpire myhash 20 NX 2 f1 f2
r hpersist myhash 2 f1 f2
after 25
assert_equal [r hget myhash f1] "v1"
assert_equal [r hget myhash f2] "v2"
assert_equal [r HTTL myhash 2 f1 f2] "$T_NO_EXPIRY $T_NO_EXPIRY"
}
r config set hash-max-listpack-entries 1
}