Listpack encoding for sets (#11290)

Small sets with not only integer elements are listpack encoded, by default up to 128 elements, max 64 bytes per element, new config `set-max-listpack-entries` and `set-max-listpack-value`. This saves memory for small sets compared to using a hashtable. Sets with only integers, even very small sets, are still intset encoded (up to 1G limit, etc.). Larger sets are hashtable encoded. This PR increments the RDB version, and has an effect on OBJECT ENCODING Possible conversions when elements are added: intset -> listpack listpack -> hashtable intset -> hashtable Note: No conversion happens when elements are deleted. If all elements are deleted and then added again, the set is deleted and recreated, thus implicitly converted to a smaller encoding.
2022-11-09 18:50:07 +01:00 · 2022-11-09 18:50:07 +01:00 · 4e472a1a7f
parent 07d187066a
commit 4e472a1a7f
19 changed files with 1132 additions and 344 deletions
--- a/redis.conf
+++ b/redis.conf
@ -1951,13 +1951,20 @@ list-max-listpack-size -2
 # etc.
 list-compress-depth 0

-# Sets have a special encoding in just one case: when a set is composed
+# Sets have a special encoding when a set is composed
 # of just strings that happen to be integers in radix 10 in the range
 # of 64 bit signed integers.
 # The following configuration setting sets the limit in the size of the
 # set in order to use this special memory saving encoding.
 set-max-intset-entries 512

+# Sets containing non-integer values are also encoded using a memory efficient
+# data structure when they have a small number of entries, and the biggest entry
+# does not exceed a given threshold. These thresholds can be configured using
+# the following directives.
+set-max-listpack-entries 128
+set-max-listpack-value 64
+
 # Similarly to hashes and lists, sorted sets are also specially encoded in
 # order to save a lot of space. This encoding is only used when the length and
 # elements of a sorted set are below the following limits:
--- a/src/aof.c
+++ b/src/aof.c
@ -1818,56 +1818,31 @@ int rewriteListObject(rio *r, robj *key, robj *o) {
 * The function returns 0 on error, 1 on success. */
 int rewriteSetObject(rio *r, robj *key, robj *o) {
    long long count = 0, items = setTypeSize(o);
-
-    if (o->encoding == OBJ_ENCODING_INTSET) {
-        int ii = 0;
-        int64_t llval;
-
-        while(intsetGet(o->ptr,ii++,&llval)) {
-            if (count == 0) {
-                int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ?
-                    AOF_REWRITE_ITEMS_PER_CMD : items;
-
-                if (!rioWriteBulkCount(r,'*',2+cmd_items) ||
-                    !rioWriteBulkString(r,"SADD",4) ||
-                    !rioWriteBulkObject(r,key)) 
-                {
-                    return 0;
-                }
+    setTypeIterator *si = setTypeInitIterator(o);
+    char *str;
+    size_t len;
+    int64_t llval;
+    while (setTypeNext(si, &str, &len, &llval) != -1) {
+        if (count == 0) {
+            int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ?
+                AOF_REWRITE_ITEMS_PER_CMD : items;
+            if (!rioWriteBulkCount(r,'*',2+cmd_items) ||
+                !rioWriteBulkString(r,"SADD",4) ||
+                !rioWriteBulkObject(r,key))
+            {
+                return 0;
            }
-            if (!rioWriteBulkLongLong(r,llval)) return 0;
-            if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0;
-            items--;
        }
-    } else if (o->encoding == OBJ_ENCODING_HT) {
-        dictIterator *di = dictGetIterator(o->ptr);
-        dictEntry *de;
-
-        while((de = dictNext(di)) != NULL) {
-            sds ele = dictGetKey(de);
-            if (count == 0) {
-                int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ?
-                    AOF_REWRITE_ITEMS_PER_CMD : items;
-
-                if (!rioWriteBulkCount(r,'*',2+cmd_items) ||
-                    !rioWriteBulkString(r,"SADD",4) ||
-                    !rioWriteBulkObject(r,key)) 
-                {
-                    dictReleaseIterator(di);
-                    return 0;
-                }
-            }
-            if (!rioWriteBulkString(r,ele,sdslen(ele))) {
-                dictReleaseIterator(di);
-                return 0;          
-            }
-            if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0;
-            items--;
+        size_t written = str ?
+            rioWriteBulkString(r, str, len) : rioWriteBulkLongLong(r, llval);
+        if (!written) {
+            setTypeReleaseIterator(si);
+            return 0;
        }
-        dictReleaseIterator(di);
-    } else {
-        serverPanic("Unknown set encoding");
+        if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0;
+        items--;
    }
+    setTypeReleaseIterator(si);
    return 1;
 }

--- a/src/config.c
+++ b/src/config.c
@ -3130,6 +3130,8 @@ standardConfig static_configs[] = {
    /* Size_t configs */
    createSizeTConfig("hash-max-listpack-entries", "hash-max-ziplist-entries", MODIFIABLE_CONFIG, 0, LONG_MAX, server.hash_max_listpack_entries, 512, INTEGER_CONFIG, NULL, NULL),
    createSizeTConfig("set-max-intset-entries", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.set_max_intset_entries, 512, INTEGER_CONFIG, NULL, NULL),
+    createSizeTConfig("set-max-listpack-entries", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.set_max_listpack_entries, 128, INTEGER_CONFIG, NULL, NULL),
+    createSizeTConfig("set-max-listpack-value", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.set_max_listpack_value, 64, INTEGER_CONFIG, NULL, NULL),
    createSizeTConfig("zset-max-listpack-entries", "zset-max-ziplist-entries", MODIFIABLE_CONFIG, 0, LONG_MAX, server.zset_max_listpack_entries, 128, INTEGER_CONFIG, NULL, NULL),
    createSizeTConfig("active-defrag-ignore-bytes", NULL, MODIFIABLE_CONFIG, 1, LLONG_MAX, server.active_defrag_ignore_bytes, 100<<20, MEMORY_CONFIG, NULL, NULL), /* Default: don't defrag if frag overhead is below 100mb */
    createSizeTConfig("hash-max-listpack-value", "hash-max-ziplist-value", MODIFIABLE_CONFIG, 0, LONG_MAX, server.hash_max_listpack_value, 64, MEMORY_CONFIG, NULL, NULL),
--- a/src/db.c
+++ b/src/db.c
@ -915,14 +915,16 @@ void scanGenericCommand(client *c, robj *o, unsigned long cursor) {
        } while (cursor &&
              maxiterations-- &&
              listLength(keys) < (unsigned long)count);
-    } else if (o->type == OBJ_SET) {
+    } else if (o->type == OBJ_SET && o->encoding == OBJ_ENCODING_INTSET) {
        int pos = 0;
        int64_t ll;

        while(intsetGet(o->ptr,pos++,&ll))
            listAddNodeTail(keys,createStringObjectFromLongLong(ll));
        cursor = 0;
-    } else if (o->type == OBJ_HASH || o->type == OBJ_ZSET) {
+    } else if ((o->type == OBJ_HASH || o->type == OBJ_ZSET || o->type == OBJ_SET) &&
+               o->encoding == OBJ_ENCODING_LISTPACK)
+    {
        unsigned char *p = lpFirst(o->ptr);
        unsigned char *vstr;
        int64_t vlen;
--- a/src/defrag.c
+++ b/src/defrag.c
@ -874,10 +874,12 @@ long defragKey(redisDb *db, dictEntry *de) {
    } else if (ob->type == OBJ_SET) {
        if (ob->encoding == OBJ_ENCODING_HT) {
            defragged += defragSet(db, de);
-        } else if (ob->encoding == OBJ_ENCODING_INTSET) {
-            intset *newis, *is = ob->ptr;
-            if ((newis = activeDefragAlloc(is)))
-                defragged++, ob->ptr = newis;
+        } else if (ob->encoding == OBJ_ENCODING_INTSET ||
+                   ob->encoding == OBJ_ENCODING_LISTPACK)
+        {
+            void *newptr, *ptr = ob->ptr;
+            if ((newptr = activeDefragAlloc(ptr)))
+                defragged++, ob->ptr = newptr;
        } else {
            serverPanic("Unknown set encoding");
        }
--- a/src/intset.c
+++ b/src/intset.c
@ -265,6 +265,17 @@ int64_t intsetRandom(intset *is) {
    return _intsetGet(is,rand()%len);
 }

+/* Return the largest member. */
+int64_t intsetMax(intset *is) {
+    uint32_t len = intrev32ifbe(is->length);
+    return _intsetGet(is, len - 1);
+}
+
+/* Return the smallest member. */
+int64_t intsetMin(intset *is) {
+    return _intsetGet(is, 0);
+}
+
 /* Get the value at the given position. When this position is
 * out of range the function returns 0, when in range it returns 1. */
 uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value) {
@ -425,6 +436,8 @@ int intsetTest(int argc, char **argv, int flags) {
        is = intsetAdd(is,6,&success); assert(success);
        is = intsetAdd(is,4,&success); assert(success);
        is = intsetAdd(is,4,&success); assert(!success);
+        assert(6 == intsetMax(is));
+        assert(4 == intsetMin(is));
        ok();
        zfree(is);
    }
--- a/src/intset.h
+++ b/src/intset.h
@ -43,6 +43,8 @@ intset *intsetAdd(intset *is, int64_t value, uint8_t *success);
 intset *intsetRemove(intset *is, int64_t value, int *success);
 uint8_t intsetFind(intset *is, int64_t value);
 int64_t intsetRandom(intset *is);
+int64_t intsetMax(intset *is);
+int64_t intsetMin(intset *is);
 uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value);
 uint32_t intsetLen(const intset *is);
 size_t intsetBlobLen(intset *is);
--- a/src/listpack.c
+++ b/src/listpack.c
@ -1063,6 +1063,55 @@ unsigned char *lpDeleteRange(unsigned char *lp, long index, unsigned long num) {
    return lp;
 }

+/* Delete the elements 'ps' passed as an array of 'count' element pointers and
+ * return the resulting listpack. The elements must be given in the same order
+ * as they apper in the listpack. */
+unsigned char *lpBatchDelete(unsigned char *lp, unsigned char **ps, unsigned long count) {
+    if (count == 0) return lp;
+    unsigned char *dst = ps[0];
+    size_t total_bytes = lpGetTotalBytes(lp);
+    unsigned char *lp_end = lp + total_bytes; /* After the EOF element. */
+    assert(lp_end[-1] == LP_EOF);
+    /*
+     * ----+--------+-----------+--------+---------+-----+---+
+     * ... | Delete | Keep      | Delete | Keep    | ... |EOF|
+     * ... |xxxxxxxx|           |xxxxxxxx|         | ... |   |
+     * ----+--------+-----------+--------+---------+-----+---+
+     *     ^        ^           ^                            ^
+     *     |        |           |                            |
+     *     ps[i]    |           ps[i+1]                      |
+     *     skip     keep_start  keep_end                     lp_end
+     *
+     * The loop memmoves the bytes between keep_start and keep_end to dst.
+     */
+    for (unsigned long i = 0; i < count; i++) {
+        unsigned char *skip = ps[i];
+        assert(skip != NULL && skip[0] != LP_EOF);
+        unsigned char *keep_start = lpSkip(skip);
+        unsigned char *keep_end;
+        if (i + 1 < count) {
+            keep_end = ps[i + 1];
+            /* Deleting consecutive elements. Nothing to keep between them. */
+            if (keep_start == keep_end) continue;
+        } else {
+            /* Keep the rest of the listpack including the EOF marker. */
+            keep_end = lp_end;
+        }
+        assert(keep_end > keep_start);
+        size_t bytes_to_keep = keep_end - keep_start;
+        memmove(dst, keep_start, bytes_to_keep);
+        dst += bytes_to_keep;
+    }
+    /* Update total size and num elements. */
+    size_t deleted_bytes = lp_end - dst;
+    total_bytes -= deleted_bytes;
+    assert(lp[total_bytes - 1] == LP_EOF);
+    lpSetTotalBytes(lp, total_bytes);
+    uint32_t numele = lpGetNumElements(lp);
+    if (numele != LP_HDR_NUMELE_UNKNOWN) lpSetNumElements(lp, numele - count);
+    return lpShrinkToFit(lp);
+}
+
 /* Merge listpacks 'first' and 'second' by appending 'second' to 'first'.
 *
 * NOTE: The larger listpack is reallocated to contain the new merged listpack.
@ -1383,6 +1432,43 @@ void lpRandomPair(unsigned char *lp, unsigned long total_count, listpackEntry *k
    val->sval = lpGetValue(p, &(val->slen), &(val->lval));
 }

+/* Randomly select 'count' entries and store them in the 'entries' array, which
+ * needs to have space for 'count' listpackEntry structs. The order is random
+ * and duplicates are possible. */
+void lpRandomEntries(unsigned char *lp, unsigned int count, listpackEntry *entries) {
+    struct pick {
+        unsigned int index;
+        unsigned int order;
+    } *picks = lp_malloc(count * sizeof(struct pick));
+    unsigned int total_size = lpLength(lp);
+    assert(total_size);
+    for (unsigned int i = 0; i < count; i++) {
+        picks[i].index = rand() % total_size;
+        picks[i].order = i;
+    }
+
+    /* Sort by index. */
+    qsort(picks, count, sizeof(struct pick), uintCompare);
+
+    /* Iterate over listpack in index order and store the values in the entries
+     * array respecting the original order. */
+    unsigned char *p = lpFirst(lp);
+    unsigned int j = 0; /* index in listpack */
+    for (unsigned int i = 0; i < count; i++) {
+        /* Advance listpack pointer to until we reach 'index' listpack. */
+        while (j < picks[i].index) {
+            p = lpNext(lp, p);
+            j++;
+        }
+        int storeorder = picks[i].order;
+        unsigned int len = 0;
+        long long llval = 0;
+        unsigned char *str = lpGetValue(p, &len, &llval);
+        lpSaveValue(str, len, llval, &entries[storeorder]);
+    }
+    lp_free(picks);
+}
+
 /* Randomly select count of key value pairs and store into 'keys' and
 * 'vals' args. The order of the picked entries is random, and the selections
 * are non-unique (repetitions are possible).
@ -1449,34 +1535,83 @@ unsigned int lpRandomPairsUnique(unsigned char *lp, unsigned int count, listpack
    if (count > total_size)
        count = total_size;

-    /* To only iterate once, every time we try to pick a member, the probability
-     * we pick it is the quotient of the count left we want to pick and the
-     * count still we haven't visited in the dict, this way, we could make every
-     * member be equally picked.*/
    p = lpFirst(lp);
    unsigned int picked = 0, remaining = count;
    while (picked < count && p) {
-        double randomDouble = ((double)rand()) / RAND_MAX;
-        double threshold = ((double)remaining) / (total_size - index);
-        if (randomDouble <= threshold) {
+        assert((p = lpNextRandom(lp, p, &index, remaining, 1)));
+        key = lpGetValue(p, &klen, &klval);
+        lpSaveValue(key, klen, klval, &keys[picked]);
+        assert((p = lpNext(lp, p)));
+        index++;
+        if (vals) {
            key = lpGetValue(p, &klen, &klval);
-            lpSaveValue(key, klen, klval, &keys[picked]);
-            assert((p = lpNext(lp, p)));
-            if (vals) {
-                key = lpGetValue(p, &klen, &klval);
-                lpSaveValue(key, klen, klval, &vals[picked]);
-            }
-            remaining--;
-            picked++;
-        } else {
-            assert((p = lpNext(lp, p)));
+            lpSaveValue(key, klen, klval, &vals[picked]);
        }
        p = lpNext(lp, p);
+        remaining--;
+        picked++;
        index++;
    }
    return picked;
 }

+/* Iterates forward to the "next random" element, given we are yet to pick
+ * 'remaining' unique elements between the starting element 'p' (inclusive) and
+ * the end of the list. The 'index' needs to be initialized according to the
+ * current zero-based index matching the position of the starting element 'p'
+ * and is updated to match the returned element's zero-based index. If
+ * 'even_only' is nonzero, an element with an even index is picked, which is
+ * useful if the listpack represents a key-value pair sequence.
+ *
+ * Note that this function can return p. In order to skip the previously
+ * returned element, you need to call lpNext() or lpDelete() after each call to
+ * lpNextRandom(). Idea:
+ *
+ *     assert(remaining <= lpLength(lp));
+ *     p = lpFirst(lp);
+ *     i = 0;
+ *     while (remaining > 0) {
+ *         p = lpNextRandom(lp, p, &i, remaining--, 0);
+ *
+ *         // ... Do stuff with p ...
+ *
+ *         p = lpNext(lp, p);
+ *         i++;
+ *     }
+ */
+unsigned char *lpNextRandom(unsigned char *lp, unsigned char *p, unsigned int *index,
+                            unsigned int remaining, int even_only)
+{
+    /* To only iterate once, every time we try to pick a member, the probability
+     * we pick it is the quotient of the count left we want to pick and the
+     * count still we haven't visited. This way, we could make every member be
+     * equally likely to be picked. */
+    unsigned int i = *index;
+    unsigned int total_size = lpLength(lp);
+    while (i < total_size && p != NULL) {
+        if (even_only && i % 2 != 0) {
+            p = lpNext(lp, p);
+            i++;
+            continue;
+        }
+
+        /* Do we pick this element? */
+        unsigned int available = total_size - i;
+        if (even_only) available /= 2;
+        double randomDouble = ((double)rand()) / RAND_MAX;
+        double threshold = ((double)remaining) / available;
+        if (randomDouble <= threshold) {
+            *index = i;
+            return p;
+        }
+
+        p = lpNext(lp, p);
+        i++;
+    }
+
+    return NULL;
+}
+
 /* Print info of listpack which is used in debugCommand */
 void lpRepr(unsigned char *lp) {
    unsigned char *p, *vstr;
@ -1902,6 +2037,21 @@ int listpackTest(int argc, char *argv[], int flags) {
        zfree(lp);
    }

+    TEST("Batch delete") {
+        unsigned char *lp = createList(); /* char *mixlist[] = {"hello", "foo", "quux", "1024"} */
+        assert(lpLength(lp) == 4); /* Pre-condition */
+        unsigned char *p0 = lpFirst(lp),
+            *p1 = lpNext(lp, p0),
+            *p2 = lpNext(lp, p1),
+            *p3 = lpNext(lp, p2);
+        unsigned char *ps[] = {p0, p1, p3};
+        lp = lpBatchDelete(lp, ps, 3);
+        assert(lpLength(lp) == 1);
+        verifyEntry(lpFirst(lp), (unsigned char*)mixlist[2], strlen(mixlist[2]));
+        assert(lpValidateIntegrity(lp, lpBytes(lp), 1, NULL, NULL) == 1);
+        lpFree(lp);
+    }
+
    TEST("Delete foo while iterating") {
        lp = createList();
        p = lpFirst(lp);
@ -2048,6 +2198,82 @@ int listpackTest(int argc, char *argv[], int flags) {
        zfree(lp3);
    }

+    TEST("lpNextRandom normal usage") {
+        /* Create some data */
+        unsigned char *lp = lpNew(0);
+        unsigned char buf[100] = "asdf";
+        unsigned int size = 100;
+        for (size_t i = 0; i < size; i++) {
+            lp = lpAppend(lp, buf, i);
+        }
+        assert(lpLength(lp) == size);
+
+        /* Pick a subset of the elements of every possible subset size */
+        for (unsigned int count = 0; count <= size; count++) {
+            unsigned int remaining = count;
+            unsigned char *p = lpFirst(lp);
+            unsigned char *prev = NULL;
+            unsigned index = 0;
+            while (remaining > 0) {
+                assert(p != NULL);
+                p = lpNextRandom(lp, p, &index, remaining--, 0);
+                assert(p != NULL);
+                assert(p != prev);
+                prev = p;
+                p = lpNext(lp, p);
+                index++;
+            }
+        }
+    }
+
+    TEST("lpNextRandom corner cases") {
+        unsigned char *lp = lpNew(0);
+        unsigned i = 0;
+
+        /* Pick from empty listpack returns NULL. */
+        assert(lpNextRandom(lp, NULL, &i, 2, 0) == NULL);
+
+        /* Add some elements and find their pointers within the listpack. */
+        lp = lpAppend(lp, (unsigned char *)"abc", 3);
+        lp = lpAppend(lp, (unsigned char *)"def", 3);
+        lp = lpAppend(lp, (unsigned char *)"ghi", 3);
+        assert(lpLength(lp) == 3);
+        unsigned char *p0 = lpFirst(lp);
+        unsigned char *p1 = lpNext(lp, p0);
+        unsigned char *p2 = lpNext(lp, p1);
+        assert(lpNext(lp, p2) == NULL);
+
+        /* Pick zero elements returns NULL. */
+        i = 0; assert(lpNextRandom(lp, lpFirst(lp), &i, 0, 0) == NULL);
+
+        /* Pick all returns all. */
+        i = 0; assert(lpNextRandom(lp, p0, &i, 3, 0) == p0 && i == 0);
+        i = 1; assert(lpNextRandom(lp, p1, &i, 2, 0) == p1 && i == 1);
+        i = 2; assert(lpNextRandom(lp, p2, &i, 1, 0) == p2 && i == 2);
+
+        /* Pick more than one when there's only one left returns the last one. */
+        i = 2; assert(lpNextRandom(lp, p2, &i, 42, 0) == p2 && i == 2);
+
+        /* Pick all even elements returns p0 and p2. */
+        i = 0; assert(lpNextRandom(lp, p0, &i, 10, 1) == p0 && i == 0);
+        i = 1; assert(lpNextRandom(lp, p1, &i, 10, 1) == p2 && i == 2);
+
+        /* Don't crash even for bad index. */
+        for (int j = 0; j < 100; j++) {
+            unsigned char *p;
+            switch (j % 4) {
+            case 0: p = p0; break;
+            case 1: p = p1; break;
+            case 2: p = p2; break;
+            case 3: p = NULL; break;
+            }
+            i = j % 7;
+            unsigned int remaining = j % 5;
+            p = lpNextRandom(lp, p, &i, remaining, 0);
+            assert(p == p0 || p == p1 || p == p2 || p == NULL);
+        }
+    }
+
    TEST("Random pair with one element") {
        listpackEntry key, val;
        unsigned char *lp = lpNew(0);
--- a/src/listpack.h
+++ b/src/listpack.h
@ -70,6 +70,7 @@ unsigned char *lpReplaceInteger(unsigned char *lp, unsigned char **p, long long
 unsigned char *lpDelete(unsigned char *lp, unsigned char *p, unsigned char **newp);
 unsigned char *lpDeleteRangeWithEntry(unsigned char *lp, unsigned char **p, unsigned long num);
 unsigned char *lpDeleteRange(unsigned char *lp, long index, unsigned long num);
+unsigned char *lpBatchDelete(unsigned char *lp, unsigned char **ps, unsigned long count);
 unsigned char *lpMerge(unsigned char **first, unsigned char **second);
 unsigned long lpLength(unsigned char *lp);
 unsigned char *lpGet(unsigned char *p, int64_t *count, unsigned char *intbuf);
@ -90,6 +91,9 @@ unsigned int lpCompare(unsigned char *p, unsigned char *s, uint32_t slen);
 void lpRandomPair(unsigned char *lp, unsigned long total_count, listpackEntry *key, listpackEntry *val);
 void lpRandomPairs(unsigned char *lp, unsigned int count, listpackEntry *keys, listpackEntry *vals);
 unsigned int lpRandomPairsUnique(unsigned char *lp, unsigned int count, listpackEntry *keys, listpackEntry *vals);
+void lpRandomEntries(unsigned char *lp, unsigned int count, listpackEntry *entries);
+unsigned char *lpNextRandom(unsigned char *lp, unsigned char *p, unsigned int *index,
+                            unsigned int remaining, int even_only);
 int lpSafeToAdd(unsigned char* lp, size_t add);
 void lpRepr(unsigned char *lp);

--- a/src/object.c
+++ b/src/object.c
@ -247,6 +247,13 @@ robj *createIntsetObject(void) {
    return o;
 }

+robj *createSetListpackObject(void) {
+    unsigned char *lp = lpNew(0);
+    robj *o = createObject(OBJ_SET, lp);
+    o->encoding = OBJ_ENCODING_LISTPACK;
+    return o;
+}
+
 robj *createHashObject(void) {
    unsigned char *zl = lpNew(0);
    robj *o = createObject(OBJ_HASH, zl);
@ -306,6 +313,7 @@ void freeSetObject(robj *o) {
        dictRelease((dict*) o->ptr);
        break;
    case OBJ_ENCODING_INTSET:
+    case OBJ_ENCODING_LISTPACK:
        zfree(o->ptr);
        break;
    default:
@ -441,6 +449,8 @@ void dismissSetObject(robj *o, size_t size_hint) {
        dismissMemory(set->ht_table[1], DICTHT_SIZE(set->ht_size_exp[1])*sizeof(dictEntry*));
    } else if (o->encoding == OBJ_ENCODING_INTSET) {
        dismissMemory(o->ptr, intsetBlobLen((intset*)o->ptr));
+    } else if (o->encoding == OBJ_ENCODING_LISTPACK) {
+        dismissMemory(o->ptr, lpBytes((unsigned char *)o->ptr));
    } else {
        serverPanic("Unknown set encoding type");
    }
--- a/src/rdb.c
+++ b/src/rdb.c
@ -665,6 +665,8 @@ int rdbSaveObjectType(rio *rdb, robj *o) {
            return rdbSaveType(rdb,RDB_TYPE_SET_INTSET);
        else if (o->encoding == OBJ_ENCODING_HT)
            return rdbSaveType(rdb,RDB_TYPE_SET);
+        else if (o->encoding == OBJ_ENCODING_LISTPACK)
+            return rdbSaveType(rdb,RDB_TYPE_SET_LISTPACK);
        else
            serverPanic("Unknown set encoding");
    case OBJ_ZSET:
@ -858,6 +860,10 @@ ssize_t rdbSaveObject(rio *rdb, robj *o, robj *key, int dbid) {

            if ((n = rdbSaveRawString(rdb,o->ptr,l)) == -1) return -1;
            nwritten += n;
+        } else if (o->encoding == OBJ_ENCODING_LISTPACK) {
+            size_t l = lpBytes((unsigned char *)o->ptr);
+            if ((n = rdbSaveRawString(rdb, o->ptr, l)) == -1) return -1;
+            nwritten += n;
        } else {
            serverPanic("Unknown set encoding");
        }
@ -1690,19 +1696,21 @@ static int _listZiplistEntryConvertAndValidate(unsigned char *p, unsigned int he
 }

 /* callback for to check the listpack doesn't have duplicate records */
-static int _lpPairsEntryValidation(unsigned char *p, unsigned int head_count, void *userdata) {
+static int _lpEntryValidation(unsigned char *p, unsigned int head_count, void *userdata) {
    struct {
+        int pairs;
        long count;
        dict *fields;
    } *data = userdata;

    if (data->fields == NULL) {
        data->fields = dictCreate(&hashDictType);
-        dictExpand(data->fields, head_count/2);
+        dictExpand(data->fields, data->pairs ? head_count/2 : head_count);
    }

-    /* Even records are field names, add to dict and check that's not a dup */
-    if (((data->count) & 1) == 0) {
+    /* If we're checking pairs, then even records are field names. Otherwise
+     * we're checking all elements. Add to dict and check that's not a dup */
+    if (!data->pairs || ((data->count) & 1) == 0) {
        unsigned char *str;
        int64_t slen;
        unsigned char buf[LP_INTBUF_SIZE];
@ -1722,21 +1730,24 @@ static int _lpPairsEntryValidation(unsigned char *p, unsigned int head_count, vo

 /* Validate the integrity of the listpack structure.
 * when `deep` is 0, only the integrity of the header is validated.
- * when `deep` is 1, we scan all the entries one by one. */
-int lpPairsValidateIntegrityAndDups(unsigned char *lp, size_t size, int deep) {
+ * when `deep` is 1, we scan all the entries one by one.
+ * when `pairs` is 0, all elements need to be unique (it's a set)
+ * when `pairs` is 1, odd elements need to be unique (it's a key-value map) */
+int lpValidateIntegrityAndDups(unsigned char *lp, size_t size, int deep, int pairs) {
    if (!deep)
        return lpValidateIntegrity(lp, size, 0, NULL, NULL);

    /* Keep track of the field names to locate duplicate ones */
    struct {
+        int pairs;
        long count;
        dict *fields; /* Initialisation at the first callback. */
-    } data = {0, NULL};
+    } data = {pairs, 0, NULL};

-    int ret = lpValidateIntegrity(lp, size, 1, _lpPairsEntryValidation, &data);
+    int ret = lpValidateIntegrity(lp, size, 1, _lpEntryValidation, &data);

    /* make sure we have an even number of records. */
-    if (data.count & 1)
+    if (pairs && data.count & 1)
        ret = 0;

    if (data.fields) dictRelease(data.fields);
@ -1813,6 +1824,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
        }

        /* Load every single element of the set */
+        size_t maxelelen = 0, sumelelen = 0;
        for (i = 0; i < len; i++) {
            long long llval;
            sds sdsele;
@ -1821,6 +1833,9 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
                decrRefCount(o);
                return NULL;
            }
+            size_t elelen = sdslen(sdsele);
+            sumelelen += elelen;
+            if (elelen > maxelelen) maxelelen = elelen;

            if (o->encoding == OBJ_ENCODING_INTSET) {
                /* Fetch integer value from element. */
@ -1833,6 +1848,14 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
                        sdsfree(sdsele);
                        return NULL;
                    }
+                } else if (setTypeSize(o) < server.set_max_listpack_entries &&
+                           maxelelen <= server.set_max_listpack_value &&
+                           lpSafeToAdd(NULL, sumelelen))
+                {
+                    /* We checked if it's safe to add one large element instead
+                     * of many small ones. It's OK since lpSafeToAdd doesn't
+                     * care about individual elements, only the total size. */
+                    setTypeConvert(o, OBJ_ENCODING_LISTPACK);
                } else {
                    setTypeConvert(o,OBJ_ENCODING_HT);
                    if (dictTryExpand(o->ptr,len) != DICT_OK) {
@ -1844,6 +1867,33 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
                }
            }

+            /* This will also be called when the set was just converted
+             * to a listpack encoded set. */
+            if (o->encoding == OBJ_ENCODING_LISTPACK) {
+                if (setTypeSize(o) < server.set_max_listpack_entries &&
+                    elelen <= server.set_max_listpack_value &&
+                    lpSafeToAdd(o->ptr, elelen))
+                {
+                    unsigned char *p = lpFirst(o->ptr);
+                    if (p && lpFind(o->ptr, p, (unsigned char*)sdsele, elelen, 0)) {
+                        rdbReportCorruptRDB("Duplicate set members detected");
+                        decrRefCount(o);
+                        sdsfree(sdsele);
+                        return NULL;
+                    }
+                    o->ptr = lpAppend(o->ptr, (unsigned char *)sdsele, elelen);
+                } else {
+                    setTypeConvert(o, OBJ_ENCODING_HT);
+                    if (dictTryExpand(o->ptr, len) != DICT_OK) {
+                        rdbReportCorruptRDB("OOM in dictTryExpand %llu",
+                                            (unsigned long long)len);
+                        sdsfree(sdsele);
+                        decrRefCount(o);
+                        return NULL;
+                    }
+                }
+            }
+
            /* This will also be called when the set was just converted
             * to a regular hash table encoded set. */
            if (o->encoding == OBJ_ENCODING_HT) {
@ -2126,6 +2176,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
    } else if (rdbtype == RDB_TYPE_HASH_ZIPMAP  ||
               rdbtype == RDB_TYPE_LIST_ZIPLIST ||
               rdbtype == RDB_TYPE_SET_INTSET   ||
+               rdbtype == RDB_TYPE_SET_LISTPACK ||
               rdbtype == RDB_TYPE_ZSET_ZIPLIST ||
               rdbtype == RDB_TYPE_ZSET_LISTPACK ||
               rdbtype == RDB_TYPE_HASH_ZIPLIST ||
@ -2243,6 +2294,20 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
                if (intsetLen(o->ptr) > server.set_max_intset_entries)
                    setTypeConvert(o,OBJ_ENCODING_HT);
                break;
+            case RDB_TYPE_SET_LISTPACK:
+                if (deep_integrity_validation) server.stat_dump_payload_sanitizations++;
+                if (!lpValidateIntegrityAndDups(encoded, encoded_len, deep_integrity_validation, 0)) {
+                    rdbReportCorruptRDB("Set listpack integrity check failed.");
+                    zfree(encoded);
+                    o->ptr = NULL;
+                    decrRefCount(o);
+                    return NULL;
+                }
+                o->type = OBJ_SET;
+                o->encoding = OBJ_ENCODING_LISTPACK;
+                if (setTypeSize(o) > server.set_max_listpack_entries)
+                    setTypeConvert(o, OBJ_ENCODING_HT);
+                break;
            case RDB_TYPE_ZSET_ZIPLIST:
                {
                    unsigned char *lp = lpNew(encoded_len);
@ -2272,7 +2337,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
                }
            case RDB_TYPE_ZSET_LISTPACK:
                if (deep_integrity_validation) server.stat_dump_payload_sanitizations++;
-                if (!lpPairsValidateIntegrityAndDups(encoded, encoded_len, deep_integrity_validation)) {
+                if (!lpValidateIntegrityAndDups(encoded, encoded_len, deep_integrity_validation, 1)) {
                    rdbReportCorruptRDB("Zset listpack integrity check failed.");
                    zfree(encoded);
                    o->ptr = NULL;
@ -2318,7 +2383,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
                }
            case RDB_TYPE_HASH_LISTPACK:
                if (deep_integrity_validation) server.stat_dump_payload_sanitizations++;
-                if (!lpPairsValidateIntegrityAndDups(encoded, encoded_len, deep_integrity_validation)) {
+                if (!lpValidateIntegrityAndDups(encoded, encoded_len, deep_integrity_validation, 1)) {
                    rdbReportCorruptRDB("Hash listpack integrity check failed.");
                    zfree(encoded);
                    o->ptr = NULL;
--- a/src/rdb.h
+++ b/src/rdb.h
@ -38,7 +38,7 @@

 /* The current RDB version. When the format changes in a way that is no longer
 * backward compatible this number gets incremented. */
-#define RDB_VERSION 10
+#define RDB_VERSION 11

 /* Defines related to the dump file format. To store 32 bits lengths for short
 * keys requires a lot of space, so we check the most significant 2 bits of
@ -95,10 +95,11 @@
 #define RDB_TYPE_ZSET_LISTPACK 17
 #define RDB_TYPE_LIST_QUICKLIST_2   18
 #define RDB_TYPE_STREAM_LISTPACKS_2 19
+#define RDB_TYPE_SET_LISTPACK  20
 /* NOTE: WHEN ADDING NEW RDB TYPE, UPDATE rdbIsObjectType() BELOW */

 /* Test if a type is an object type. */
-#define rdbIsObjectType(t) (((t) >= 0 && (t) <= 7) || ((t) >= 9 && (t) <= 19))
+#define rdbIsObjectType(t) (((t) >= 0 && (t) <= 7) || ((t) >= 9 && (t) <= 20))

 /* Special RDB opcodes (saved/loaded with rdbSaveType/rdbLoadType). */
 #define RDB_OPCODE_FUNCTION2  245   /* function library data */
--- a/src/redis-check-rdb.c
+++ b/src/redis-check-rdb.c
@ -97,7 +97,8 @@ char *rdb_type_string[] = {
    "stream",
    "hash-listpack",
    "zset-listpack",
-    "quicklist-v2"
+    "quicklist-v2",
+    "set-listpack",
 };

 /* Show a few stats collected into 'rdbstate' */
--- a/src/server.h
+++ b/src/server.h
@ -1850,6 +1850,8 @@ struct redisServer {
    size_t hash_max_listpack_entries;
    size_t hash_max_listpack_value;
    size_t set_max_intset_entries;
+    size_t set_max_listpack_entries;
+    size_t set_max_listpack_value;
    size_t zset_max_listpack_entries;
    size_t zset_max_listpack_value;
    size_t hll_sparse_max_bytes;
@ -2331,6 +2333,7 @@ typedef struct {
    int encoding;
    int ii; /* intset iterator */
    dictIterator *di;
+    unsigned char *lpi; /* listpack iterator */
 } setTypeIterator;

 /* Structure to hold hash iteration abstraction. Note that iteration over
@ -2655,6 +2658,7 @@ robj *createStringObjectFromLongDouble(long double value, int humanfriendly);
 robj *createQuicklistObject(void);
 robj *createSetObject(void);
 robj *createIntsetObject(void);
+robj *createSetListpackObject(void);
 robj *createHashObject(void);
 robj *createZsetObject(void);
 robj *createZsetListpackObject(void);
@ -2980,9 +2984,9 @@ int setTypeRemove(robj *subject, sds value);
 int setTypeIsMember(robj *subject, sds value);
 setTypeIterator *setTypeInitIterator(robj *subject);
 void setTypeReleaseIterator(setTypeIterator *si);
-int setTypeNext(setTypeIterator *si, sds *sdsele, int64_t *llele);
+int setTypeNext(setTypeIterator *si, char **str, size_t *len, int64_t *llele);
 sds setTypeNextObject(setTypeIterator *si);
-int setTypeRandomElement(robj *setobj, sds *sdsele, int64_t *llele);
+int setTypeRandomElement(robj *setobj, char **str, size_t *len, int64_t *llele);
 unsigned long setTypeRandomElements(robj *set, unsigned long count, robj *aux_set);
 unsigned long setTypeSize(const robj *subject);
 void setTypeConvert(robj *subject, int enc);
--- a/src/t_set.c
+++ b/src/t_set.c
--- a/src/t_zset.c
+++ b/src/t_zset.c
@ -1971,6 +1971,10 @@ typedef struct {
                dictIterator *di;
                dictEntry *de;
            } ht;
+            struct {
+                unsigned char *lp;
+                unsigned char *p;
+            } lp;
        } set;

        /* Sorted set iterators. */
@ -2025,6 +2029,9 @@ void zuiInitIterator(zsetopsrc *op) {
            it->ht.dict = op->subject->ptr;
            it->ht.di = dictGetIterator(op->subject->ptr);
            it->ht.de = dictNext(it->ht.di);
+        } else if (op->encoding == OBJ_ENCODING_LISTPACK) {
+            it->lp.lp = op->subject->ptr;
+            it->lp.p = lpFirst(it->lp.lp);
        } else {
            serverPanic("Unknown set encoding");
        }
@ -2061,6 +2068,8 @@ void zuiClearIterator(zsetopsrc *op) {
            UNUSED(it); /* skip */
        } else if (op->encoding == OBJ_ENCODING_HT) {
            dictReleaseIterator(it->ht.di);
+        } else if (op->encoding == OBJ_ENCODING_LISTPACK) {
+            UNUSED(it);
        } else {
            serverPanic("Unknown set encoding");
        }
@ -2091,14 +2100,7 @@ unsigned long zuiLength(zsetopsrc *op) {
        return 0;

    if (op->type == OBJ_SET) {
-        if (op->encoding == OBJ_ENCODING_INTSET) {
-            return intsetLen(op->subject->ptr);
-        } else if (op->encoding == OBJ_ENCODING_HT) {
-            dict *ht = op->subject->ptr;
-            return dictSize(ht);
-        } else {
-            serverPanic("Unknown set encoding");
-        }
+        return setTypeSize(op->subject);
    } else if (op->type == OBJ_ZSET) {
        if (op->encoding == OBJ_ENCODING_LISTPACK) {
            return zzlLength(op->subject->ptr);
@ -2144,6 +2146,14 @@ int zuiNext(zsetopsrc *op, zsetopval *val) {

            /* Move to next element. */
            it->ht.de = dictNext(it->ht.di);
+        } else if (op->encoding == OBJ_ENCODING_LISTPACK) {
+            if (it->lp.p == NULL)
+                return 0;
+            val->estr = lpGetValue(it->lp.p, &val->elen, &val->ell);
+            val->score = 1.0;
+
+            /* Move to next element. */
+            it->lp.p = lpNext(it->lp.lp, it->lp.p);
        } else {
            serverPanic("Unknown set encoding");
        }
--- a/tests/unit/keyspace.tcl
+++ b/tests/unit/keyspace.tcl
@ -256,30 +256,27 @@ start_server {tags {"keyspace"}} {
        assert_equal $digest [debug_digest_value mynewlist{t}]
    }

-    test {COPY basic usage for intset set} {
-        r del set1{t} newset1{t}
-        r sadd set1{t} 1 2 3
-        assert_encoding intset set1{t}
-        r copy set1{t} newset1{t}
-        set digest [debug_digest_value set1{t}]
-        assert_equal $digest [debug_digest_value newset1{t}]
-        assert_refcount 1 set1{t}
-        assert_refcount 1 newset1{t}
-        r del set1{t}
-        assert_equal $digest [debug_digest_value newset1{t}]
-    }
-
-    test {COPY basic usage for hashtable set} {
-        r del set2{t} newset2{t}
-        r sadd set2{t} 1 2 3 a
-        assert_encoding hashtable set2{t}
-        r copy set2{t} newset2{t}
-        set digest [debug_digest_value set2{t}]
-        assert_equal $digest [debug_digest_value newset2{t}]
-        assert_refcount 1 set2{t}
-        assert_refcount 1 newset2{t}
-        r del set2{t}
-        assert_equal $digest [debug_digest_value newset2{t}]
+    foreach type {intset listpack hashtable} {
+        test {COPY basic usage for $type set} {
+            r del set1{t} newset1{t}
+            r sadd set1{t} 1 2 3
+            if {$type ne "intset"} {
+                r sadd set1{t} a
+            }
+            if {$type eq "hashtable"} {
+                for {set i 4} {$i < 200} {incr i} {
+                    r sadd set1{t} $i
+                }
+            }
+            assert_encoding $type set1{t}
+            r copy set1{t} newset1{t}
+            set digest [debug_digest_value set1{t}]
+            assert_equal $digest [debug_digest_value newset1{t}]
+            assert_refcount 1 set1{t}
+            assert_refcount 1 newset1{t}
+            r del set1{t}
+            assert_equal $digest [debug_digest_value newset1{t}]
+        }
    }

    test {COPY basic usage for listpack sorted set} {
--- a/tests/unit/scan.tcl
+++ b/tests/unit/scan.tcl
@ -98,7 +98,7 @@ start_server {tags {"scan network"}} {
        assert_equal 1000 [llength $keys]
    }

-    foreach enc {intset hashtable} {
+    foreach enc {intset listpack hashtable} {
        test "SSCAN with encoding $enc" {
            # Create the Set
            r del set
@ -107,8 +107,9 @@ start_server {tags {"scan network"}} {
            } else {
                set prefix "ele:"
            }
+            set count [expr {$enc eq "hashtable" ? 200 : 100}]
            set elements {}
-            for {set j 0} {$j < 100} {incr j} {
+            for {set j 0} {$j < $count} {incr j} {
                lappend elements ${prefix}${j}
            }
            r sadd set {*}$elements
@ -128,7 +129,7 @@ start_server {tags {"scan network"}} {
            }

            set keys [lsort -unique $keys]
-            assert_equal 100 [llength $keys]
+            assert_equal $count [llength $keys]
        }
    }

--- a/tests/unit/type/set.tcl
+++ b/tests/unit/type/set.tcl
@ -2,6 +2,8 @@ start_server {
    tags {"set"}
    overrides {
        "set-max-intset-entries" 512
+        "set-max-listpack-entries" 128
+        "set-max-listpack-value" 32
    }
 } {
    proc create_set {key entries} {
@ -9,12 +11,19 @@ start_server {
        foreach entry $entries { r sadd $key $entry }
    }

-    test {SADD, SCARD, SISMEMBER, SMISMEMBER, SMEMBERS basics - regular set} {
-        create_set myset {foo}
-        assert_encoding hashtable myset
+    # Values for initialing sets, per encoding.
+    array set initelems {listpack {foo} hashtable {foo}}
+    for {set i 0} {$i < 130} {incr i} {
+        lappend initelems(hashtable) [format "i%03d" $i]
+    }
+
+    foreach type {listpack hashtable} {
+    test "SADD, SCARD, SISMEMBER, SMISMEMBER, SMEMBERS basics - $type" {
+        create_set myset $initelems($type)
+        assert_encoding $type myset
        assert_equal 1 [r sadd myset bar]
        assert_equal 0 [r sadd myset bar]
-        assert_equal 2 [r scard myset]
+        assert_equal [expr [llength $initelems($type)] + 1] [r scard myset]
        assert_equal 1 [r sismember myset foo]
        assert_equal 1 [r sismember myset bar]
        assert_equal 0 [r sismember myset bla]
@ -23,7 +32,8 @@ start_server {
        assert_equal {1 0} [r smismember myset foo bla]
        assert_equal {0 1} [r smismember myset bla foo]
        assert_equal {0} [r smismember myset bla]
-        assert_equal {bar foo} [lsort [r smembers myset]]
+        assert_equal "bar $initelems($type)" [lsort [r smembers myset]]
+    }
    }

    test {SADD, SCARD, SISMEMBER, SMISMEMBER, SMEMBERS basics - intset} {
@ -67,15 +77,33 @@ start_server {
        assert_error WRONGTYPE* {r sadd mylist bar}
    }

-    test "SADD a non-integer against an intset" {
+    test "SADD a non-integer against a small intset" {
        create_set myset {1 2 3}
        assert_encoding intset myset
        assert_equal 1 [r sadd myset a]
+        assert_encoding listpack myset
+    }
+
+    test "SADD a non-integer against a large intset" {
+        create_set myset {0}
+        for {set i 1} {$i < 130} {incr i} {r sadd myset $i}
+        assert_encoding intset myset
+        assert_equal 1 [r sadd myset a]
        assert_encoding hashtable myset
    }

    test "SADD an integer larger than 64 bits" {
        create_set myset {213244124402402314402033402}
+        assert_encoding listpack myset
+        assert_equal 1 [r sismember myset 213244124402402314402033402]
+        assert_equal {1} [r smismember myset 213244124402402314402033402]
+    }
+
+    test "SADD an integer larger than 64 bits to a large intset" {
+        create_set myset {0}
+        for {set i 1} {$i < 130} {incr i} {r sadd myset $i}
+        assert_encoding intset myset
+        r sadd myset 213244124402402314402033402
        assert_encoding hashtable myset
        assert_equal 1 [r sismember myset 213244124402402314402033402]
        assert_equal {1} [r smismember myset 213244124402402314402033402]
@ -100,25 +128,32 @@ start_server {
        r del myintset
        r del myhashset
        r del mylargeintset
+        r del mysmallset
        for {set i 0} {$i <  100} {incr i} { r sadd myintset $i }
        for {set i 0} {$i < 1280} {incr i} { r sadd mylargeintset $i }
+        for {set i 0} {$i <   50} {incr i} { r sadd mysmallset [format "i%03d" $i] }
        for {set i 0} {$i <  256} {incr i} { r sadd myhashset [format "i%03d" $i] }
        assert_encoding intset myintset
        assert_encoding hashtable mylargeintset
+        assert_encoding listpack mysmallset
        assert_encoding hashtable myhashset

        r debug reload
        assert_encoding intset myintset
        assert_encoding hashtable mylargeintset
+        assert_encoding listpack mysmallset
        assert_encoding hashtable myhashset
    } {} {needs:debug}

-    test {SREM basics - regular set} {
-        create_set myset {foo bar ciao}
-        assert_encoding hashtable myset
-        assert_equal 0 [r srem myset qux]
-        assert_equal 1 [r srem myset foo]
-        assert_equal {bar ciao} [lsort [r smembers myset]]
+    foreach type {listpack hashtable} {
+        test {SREM basics - $type} {
+            create_set myset $initelems($type)
+            r sadd myset ciao
+            assert_encoding $type myset
+            assert_equal 0 [r srem myset qux]
+            assert_equal 1 [r srem myset ciao]
+            assert_equal $initelems($type) [lsort [r smembers myset]]
+        }
    }

    test {SREM basics - intset} {
@ -177,7 +212,18 @@ start_server {
        assert_equal 0 [r sintercard 1 non-existing-key limit 10]
    }

-    foreach {type} {hashtable intset} {
+    foreach {type} {regular intset} {
+        # Create sets setN{t} where N = 1..5
+        if {$type eq "regular"} {
+            set smallenc listpack
+            set bigenc hashtable
+        } else {
+            set smallenc intset
+            set bigenc intset
+        }
+        # Sets 1, 2 and 4 are big; sets 3 and 5 are small.
+        array set encoding "1 $bigenc 2 $bigenc 3 $smallenc 4 $bigenc 5 $smallenc"
+
        for {set i 1} {$i <= 5} {incr i} {
            r del [format "set%d{t}" $i]
        }
@ -198,7 +244,7 @@ start_server {
        # while the tests are running -- an extra element is added to every
        # set that determines its encoding.
        set large 200
-        if {$type eq "hashtable"} {
+        if {$type eq "regular"} {
            set large foo
        }

@ -206,9 +252,9 @@ start_server {
            r sadd [format "set%d{t}" $i] $large
        }

-        test "Generated sets must be encoded as $type" {
+        test "Generated sets must be encoded correctly - $type" {
            for {set i 1} {$i <= 5} {incr i} {
-                assert_encoding $type [format "set%d{t}" $i]
+                assert_encoding $encoding($i) [format "set%d{t}" $i]
            }
        }

@ -225,14 +271,14 @@ start_server {

        test "SINTERSTORE with two sets - $type" {
            r sinterstore setres{t} set1{t} set2{t}
-            assert_encoding $type setres{t}
+            assert_encoding $smallenc setres{t}
            assert_equal [list 195 196 197 198 199 $large] [lsort [r smembers setres{t}]]
        }

        test "SINTERSTORE with two sets, after a DEBUG RELOAD - $type" {
            r debug reload
            r sinterstore setres{t} set1{t} set2{t}
-            assert_encoding $type setres{t}
+            assert_encoding $smallenc setres{t}
            assert_equal [list 195 196 197 198 199 $large] [lsort [r smembers setres{t}]]
        } {} {needs:debug}

@ -243,7 +289,7 @@ start_server {

        test "SUNIONSTORE with two sets - $type" {
            r sunionstore setres{t} set1{t} set2{t}
-            assert_encoding $type setres{t}
+            assert_encoding $bigenc setres{t}
            set expected [lsort -uniq "[r smembers set1{t}] [r smembers set2{t}]"]
            assert_equal $expected [lsort [r smembers setres{t}]]
        }
@ -294,6 +340,46 @@ start_server {
        }
    }

+    test "SINTERSTORE with two listpack sets where result is intset" {
+        r del setres{t} set1{t} set2{t}
+        r sadd set1{t} a b c 1 3 6 x y z
+        r sadd set2{t} e f g 1 2 3 u v w
+        assert_encoding listpack set1{t}
+        assert_encoding listpack set2{t}
+        r sinterstore setres{t} set1{t} set2{t}
+        assert_equal [list 1 3] [lsort [r smembers setres{t}]]
+        assert_encoding intset setres{t}
+    }
+
+    test "SINTERSTORE with two hashtable sets where result is intset" {
+        r del setres{t} set1{t} set2{t}
+        r sadd set1{t} a b c 444 555 666
+        r sadd set2{t} e f g 111 222 333
+        set expected {}
+        for {set i 1} {$i < 130} {incr i} {
+            r sadd set1{t} $i
+            r sadd set2{t} $i
+            lappend expected $i
+        }
+        assert_encoding hashtable set1{t}
+        assert_encoding hashtable set2{t}
+        r sinterstore setres{t} set1{t} set2{t}
+        assert_equal [lsort $expected] [lsort [r smembers setres{t}]]
+        assert_encoding intset setres{t}
+    }
+
+    test "SUNION hashtable and listpack" {
+        # This adds code coverage for adding a non-sds string to a hashtable set
+        # which already contains the string.
+        r del set1{t} set2{t}
+        set union {abcdefghijklmnopqrstuvwxyz1234567890 a b c 1 2 3}
+        create_set set1{t} $union
+        create_set set2{t} {a b c}
+        assert_encoding hashtable set1{t}
+        assert_encoding listpack set2{t}
+        assert_equal [lsort $union] [lsort [r sunion set1{t} set2{t}]]
+    }
+
    test "SDIFF with first set empty" {
        r del set1{t} set2{t} set3{t}
        r sadd set2{t} 1 2 3 4
@ -428,7 +514,7 @@ start_server {
        r sadd set2{t} 1 2 3 a
        r srem set2{t} a
        assert_encoding intset set1{t}
-        assert_encoding hashtable set2{t}
+        assert_encoding listpack set2{t}
        lsort [r sinter set1{t} set2{t}]
    } {1 2 3}

@ -549,7 +635,7 @@ start_server {
        assert_equal 0 [r exists setres{t}]
    }

-    foreach {type contents} {hashtable {a b c} intset {1 2 3}} {
+    foreach {type contents} {listpack {a b c} intset {1 2 3}} {
        test "SPOP basics - $type" {
            create_set myset $contents
            assert_encoding $type myset
@ -575,11 +661,20 @@ start_server {
        }
    }

+    test "SPOP integer from listpack set" {
+        create_set myset {a 1 2 3 4 5 6 7}
+        assert_encoding listpack myset
+        set a [r spop myset]
+        set b [r spop myset]
+        assert {[string is digit $a] || [string is digit $b]}
+    }
+
    foreach {type contents} {
-        hashtable {a b c d e f g h i j k l m n o p q r s t u v w x y z} 
+        listpack {a b c d e f g h i j k l m n o p q r s t u v w x y z}
        intset {1 10 11 12 13 14 15 16 17 18 19 2 20 21 22 23 24 25 26 3 4 5 6 7 8 9}
+        hashtable {ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 b c d e f g h i j k l m n o p q r s t u v w x y z}
    } {
-        test "SPOP with <count>" {
+        test "SPOP with <count> - $type" {
            create_set myset $contents
            assert_encoding $type myset
            assert_equal $contents [lsort [concat [r spop myset 11] [r spop myset 9] [r spop myset 0] [r spop myset 4] [r spop myset 1] [r spop myset 0] [r spop myset 1] [r spop myset 0]]]
@ -610,16 +705,20 @@ start_server {
        r spop nonexisting_key 100
    } {}

-    test "SPOP new implementation: code path #1" {
-        set content {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20}
+    foreach {type content} {
+        intset   {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20}
+        listpack {a 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20}
+    } {
+    test "SPOP new implementation: code path #1 $type" {
        create_set myset $content
+        assert_encoding $type myset
        set res [r spop myset 30]
        assert {[lsort $content] eq [lsort $res]}
    }

-    test "SPOP new implementation: code path #2" {
-        set content {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20}
+    test "SPOP new implementation: code path #2 $type" {
        create_set myset $content
+        assert_encoding $type myset
        set res [r spop myset 2]
        assert {[llength $res] == 2}
        assert {[r scard myset] == 18}
@ -627,15 +726,16 @@ start_server {
        assert {[lsort $union] eq [lsort $content]}
    }

-    test "SPOP new implementation: code path #3" {
-        set content {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20}
+    test "SPOP new implementation: code path #3 $type" {
        create_set myset $content
+        assert_encoding $type myset
        set res [r spop myset 18]
        assert {[llength $res] == 18}
        assert {[r scard myset] == 2}
        set union [concat [r smembers myset] $res]
        assert {[lsort $union] eq [lsort $content]}
    }
+    }

    test "SRANDMEMBER count of 0 is handled correctly" {
        r srandmember myset 0
@ -659,7 +759,7 @@ start_server {
    r readraw 0

    foreach {type contents} {
-        hashtable {
+        listpack {
            1 5 10 50 125 50000 33959417 4775547 65434162
            12098459 427716 483706 2726473884 72615637475
            MARY PATRICIA LINDA BARBARA ELIZABETH JENNIFER MARIA
@ -674,9 +774,20 @@ start_server {
            30 31 32 33 34 35 36 37 38 39
            40 41 42 43 44 45 46 47 48 49
        }
+        hashtable {
+            ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789
+            1 5 10 50 125 50000 33959417 4775547 65434162
+            12098459 427716 483706 2726473884 72615637475
+            MARY PATRICIA LINDA BARBARA ELIZABETH JENNIFER MARIA
+            SUSAN MARGARET DOROTHY LISA NANCY KAREN BETTY HELEN
+            SANDRA DONNA CAROL RUTH SHARON MICHELLE LAURA SARAH
+            KIMBERLY DEBORAH JESSICA SHIRLEY CYNTHIA ANGELA MELISSA
+            BRENDA AMY ANNA REBECCA VIRGINIA
+        }
    } {
        test "SRANDMEMBER with <count> - $type" {
            create_set myset $contents
+            assert_encoding $type myset
            unset -nocomplain myset
            array set myset {}
            foreach ele [r smembers myset] {
@ -767,16 +878,22 @@ start_server {
    }

    foreach {type contents} {
-        hashtable {
+        listpack {
            1 5 10 50 125
            MARY PATRICIA LINDA BARBARA ELIZABETH
        }
        intset {
            0 1 2 3 4 5 6 7 8 9
        }
+        hashtable {
+            ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789
+            1 5 10 50 125
+            MARY PATRICIA LINDA BARBARA
+        }
    } {
        test "SRANDMEMBER histogram distribution - $type" {
            create_set myset $contents
+            assert_encoding $type myset
            unset -nocomplain myset
            array set myset {}
            foreach ele [r smembers myset] {
@ -809,7 +926,7 @@ start_server {
        r del myset3{t} myset4{t}
        create_set myset1{t} {1 a b}
        create_set myset2{t} {2 3 4}
-        assert_encoding hashtable myset1{t}
+        assert_encoding listpack myset1{t}
        assert_encoding intset myset2{t}
    }

@ -819,7 +936,7 @@ start_server {
        assert_equal 1 [r smove myset1{t} myset2{t} a]
        assert_equal {1 b} [lsort [r smembers myset1{t}]]
        assert_equal {2 3 4 a} [lsort [r smembers myset2{t}]]
-        assert_encoding hashtable myset2{t}
+        assert_encoding listpack myset2{t}

        # move an integer element should not convert the encoding
        setup_move
@ -855,7 +972,7 @@ start_server {
        assert_equal 1 [r smove myset1{t} myset3{t} a]
        assert_equal {1 b} [lsort [r smembers myset1{t}]]
        assert_equal {a} [lsort [r smembers myset3{t}]]
-        assert_encoding hashtable myset3{t}
+        assert_encoding listpack myset3{t}
    }

    test "SMOVE from intset to non existing destination set" {