3053 lines
79 KiB
C
3053 lines
79 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* radixtree.h
|
|
* Template for adaptive radix tree.
|
|
*
|
|
* A template to generate an "adaptive radix tree", specialized for value
|
|
* types and for local/shared memory.
|
|
*
|
|
* The concept originates from the paper "The Adaptive Radix Tree: ARTful
|
|
* Indexing for Main-Memory Databases" by Viktor Leis, Alfons Kemper,
|
|
* and Thomas Neumann, 2013.
|
|
*
|
|
* Radix trees have some advantages over hash tables:
|
|
* - The keys are logically ordered, allowing efficient sorted iteration
|
|
* and range queries
|
|
* - Operations using keys that are lexicographically close together
|
|
* will have favorable memory locality
|
|
* - Memory use grows gradually rather than by doubling
|
|
* - The key does not need to be stored with the value, since the key
|
|
* is implicitly contained in the path to the value
|
|
*
|
|
* Some disadvantages are:
|
|
* - Point queries (along with insertion and deletion) are slower than
|
|
* a linear probing hash table as in simplehash.h
|
|
* - Memory usage varies by key distribution, so is difficult to predict
|
|
*
|
|
* A classic radix tree consists of nodes, each containing an array of
|
|
* pointers to child nodes. The size of the array is determined by the
|
|
* "span" of the tree, which is the number of bits of the key used to
|
|
* index into the array. For example, with a span of 6, a "chunk"
|
|
* of 6 bits is extracted from the key at each node traversal, and
|
|
* the arrays thus have a "fanout" of 2^6 or 64 entries. A large span
|
|
* allows a shorter tree, but requires larger arrays that may be mostly
|
|
* wasted space.
|
|
*
|
|
* The key idea of the adaptive radix tree is to choose different
|
|
* data structures based on the number of child nodes. A node will
|
|
* start out small when it is first populated, and when it is full,
|
|
* it is replaced by the next larger size. Conversely, when a node
|
|
* becomes mostly empty, it is replaced by the next smaller node. The
|
|
* bulk of the code complexity in this module stems from this dynamic
|
|
* switching. One mitigating factor is using a span of 8, since bytes
|
|
* are directly addressable.
|
|
*
|
|
* The ART paper mentions three ways to implement leaves:
|
|
*
|
|
* "- Single-value leaves: The values are stored using an addi-
|
|
* tional leaf node type which stores one value.
|
|
* - Multi-value leaves: The values are stored in one of four
|
|
* different leaf node types, which mirror the structure of
|
|
* inner nodes, but contain values instead of pointers.
|
|
* - Combined pointer/value slots: If values fit into point-
|
|
* ers, no separate node types are necessary. Instead, each
|
|
* pointer storage location in an inner node can either
|
|
* store a pointer or a value."
|
|
*
|
|
* We use a form of "combined pointer/value slots", as recommended. Values
|
|
* of size (if fixed at compile time) equal or smaller than the platform's
|
|
* pointer type are stored in the child slots of the last level node,
|
|
* while larger values are the same as "single-value" leaves above. This
|
|
* offers flexibility and efficiency. Variable-length types are currently
|
|
* treated as single-value leaves for simplicity, but future work may
|
|
* allow those to be stored in the child pointer arrays, when they're
|
|
* small enough.
|
|
*
|
|
* There are two other techniques described in the paper that are not
|
|
* implemented here:
|
|
* - path compression "...removes all inner nodes that have only a single child."
|
|
* - lazy path expansion "...inner nodes are only created if they are required
|
|
* to distinguish at least two leaf nodes."
|
|
*
|
|
* We do have a form of "poor man's path compression", however, enabled by
|
|
* only supporting unsigned integer keys (for now assumed to be 64-bit):
|
|
* A tree doesn't contain paths where the highest bytes of all keys are
|
|
* zero. That way, the tree's height adapts to the distribution of keys.
|
|
*
|
|
* To handle concurrency, we use a single reader-writer lock for the
|
|
* radix tree. If concurrent write operations are possible, the tree
|
|
* must be exclusively locked during write operations such as RT_SET()
|
|
* and RT_DELETE(), and share locked during read operations such as
|
|
* RT_FIND() and RT_BEGIN_ITERATE().
|
|
*
|
|
* TODO: The current locking mechanism is not optimized for high
|
|
* concurrency with mixed read-write workloads. In the future it might
|
|
* be worthwhile to replace it with the Optimistic Lock Coupling or
|
|
* ROWEX mentioned in the paper "The ART of Practical Synchronization"
|
|
* by the same authors as the ART paper, 2016.
|
|
*
|
|
* To generate a radix tree and associated functions for a use case
|
|
* several macros have to be #define'ed before this file is included.
|
|
* Including the file #undef's all those, so a new radix tree can be
|
|
* generated afterwards.
|
|
*
|
|
* The relevant parameters are:
|
|
* - RT_PREFIX - prefix for all symbol names generated. A prefix of "foo"
|
|
* will result in radix tree type "foo_radix_tree" and functions like
|
|
* "foo_create"/"foo_free" and so forth.
|
|
* - RT_DECLARE - if defined function prototypes and type declarations are
|
|
* generated
|
|
* - RT_DEFINE - if defined function definitions are generated
|
|
* - RT_SCOPE - in which scope (e.g. extern, static inline) do function
|
|
* declarations reside
|
|
* - RT_VALUE_TYPE - the type of the value.
|
|
* - RT_VARLEN_VALUE_SIZE() - for variable length values, an expression
|
|
* involving a pointer to the value type, to calculate size.
|
|
* NOTE: implies that the value is in fact variable-length,
|
|
* so do not set for fixed-length values.
|
|
* - RT_RUNTIME_EMBEDDABLE_VALUE - for variable length values, allows
|
|
* storing the value in a child pointer slot, rather than as a single-
|
|
* value leaf, if small enough. This requires that the value, when
|
|
* read as a child pointer, can be tagged in the lowest bit.
|
|
*
|
|
* Optional parameters:
|
|
* - RT_SHMEM - if defined, the radix tree is created in the DSA area
|
|
* so that multiple processes can access it simultaneously.
|
|
* - RT_DEBUG - if defined add stats tracking and debugging functions
|
|
*
|
|
* Interface
|
|
* ---------
|
|
*
|
|
* RT_CREATE - Create a new, empty radix tree
|
|
* RT_FREE - Free the radix tree
|
|
* RT_FIND - Lookup the value for a given key
|
|
* RT_SET - Set a key-value pair
|
|
* RT_BEGIN_ITERATE - Begin iterating through all key-value pairs
|
|
* RT_ITERATE_NEXT - Return next key-value pair, if any
|
|
* RT_END_ITERATE - End iteration
|
|
* RT_MEMORY_USAGE - Get the memory as measured by space in memory context blocks
|
|
*
|
|
* Interface for Shared Memory
|
|
* ---------
|
|
*
|
|
* RT_ATTACH - Attach to the radix tree
|
|
* RT_DETACH - Detach from the radix tree
|
|
* RT_LOCK_EXCLUSIVE - Lock the radix tree in exclusive mode
|
|
* RT_LOCK_SHARE - Lock the radix tree in share mode
|
|
* RT_UNLOCK - Unlock the radix tree
|
|
* RT_GET_HANDLE - Return the handle of the radix tree
|
|
*
|
|
* Optional Interface
|
|
* ---------
|
|
*
|
|
* RT_DELETE - Delete a key-value pair. Declared/defined if RT_USE_DELETE is defined
|
|
*
|
|
*
|
|
* Copyright (c) 2024, PostgreSQL Global Development Group
|
|
*
|
|
* IDENTIFICATION
|
|
* src/include/lib/radixtree.h
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
#include "nodes/bitmapset.h"
|
|
#include "port/pg_bitutils.h"
|
|
#include "port/simd.h"
|
|
#include "utils/dsa.h"
|
|
#include "utils/memutils.h"
|
|
|
|
/* helpers */
|
|
#define RT_MAKE_PREFIX(a) CppConcat(a,_)
|
|
#define RT_MAKE_NAME(name) RT_MAKE_NAME_(RT_MAKE_PREFIX(RT_PREFIX),name)
|
|
#define RT_MAKE_NAME_(a,b) CppConcat(a,b)
|
|
/*
|
|
* stringify a macro constant, from https://gcc.gnu.org/onlinedocs/cpp/Stringizing.html
|
|
*/
|
|
#define RT_STR(s) RT_STR_(s)
|
|
#define RT_STR_(s) #s
|
|
|
|
/* function declarations */
|
|
#define RT_CREATE RT_MAKE_NAME(create)
|
|
#define RT_FREE RT_MAKE_NAME(free)
|
|
#define RT_FIND RT_MAKE_NAME(find)
|
|
#ifdef RT_SHMEM
|
|
#define RT_ATTACH RT_MAKE_NAME(attach)
|
|
#define RT_DETACH RT_MAKE_NAME(detach)
|
|
#define RT_GET_HANDLE RT_MAKE_NAME(get_handle)
|
|
#define RT_LOCK_EXCLUSIVE RT_MAKE_NAME(lock_exclusive)
|
|
#define RT_LOCK_SHARE RT_MAKE_NAME(lock_share)
|
|
#define RT_UNLOCK RT_MAKE_NAME(unlock)
|
|
#endif
|
|
#define RT_SET RT_MAKE_NAME(set)
|
|
#define RT_BEGIN_ITERATE RT_MAKE_NAME(begin_iterate)
|
|
#define RT_ITERATE_NEXT RT_MAKE_NAME(iterate_next)
|
|
#define RT_END_ITERATE RT_MAKE_NAME(end_iterate)
|
|
#ifdef RT_USE_DELETE
|
|
#define RT_DELETE RT_MAKE_NAME(delete)
|
|
#endif
|
|
#define RT_MEMORY_USAGE RT_MAKE_NAME(memory_usage)
|
|
#define RT_DUMP_NODE RT_MAKE_NAME(dump_node)
|
|
#define RT_STATS RT_MAKE_NAME(stats)
|
|
|
|
/* internal helper functions (no externally visible prototypes) */
|
|
#define RT_CHILDPTR_IS_VALUE RT_MAKE_NAME(childptr_is_value)
|
|
#define RT_VALUE_IS_EMBEDDABLE RT_MAKE_NAME(value_is_embeddable)
|
|
#define RT_GET_SLOT_RECURSIVE RT_MAKE_NAME(get_slot_recursive)
|
|
#define RT_DELETE_RECURSIVE RT_MAKE_NAME(delete_recursive)
|
|
#define RT_ALLOC_NODE RT_MAKE_NAME(alloc_node)
|
|
#define RT_ALLOC_LEAF RT_MAKE_NAME(alloc_leaf)
|
|
#define RT_FREE_NODE RT_MAKE_NAME(free_node)
|
|
#define RT_FREE_LEAF RT_MAKE_NAME(free_leaf)
|
|
#define RT_FREE_RECURSE RT_MAKE_NAME(free_recurse)
|
|
#define RT_EXTEND_UP RT_MAKE_NAME(extend_up)
|
|
#define RT_EXTEND_DOWN RT_MAKE_NAME(extend_down)
|
|
#define RT_COPY_COMMON RT_MAKE_NAME(copy_common)
|
|
#define RT_PTR_SET_LOCAL RT_MAKE_NAME(ptr_set_local)
|
|
#define RT_NODE_16_SEARCH_EQ RT_MAKE_NAME(node_16_search_eq)
|
|
#define RT_NODE_4_GET_INSERTPOS RT_MAKE_NAME(node_4_get_insertpos)
|
|
#define RT_NODE_16_GET_INSERTPOS RT_MAKE_NAME(node_16_get_insertpos)
|
|
#define RT_SHIFT_ARRAYS_FOR_INSERT RT_MAKE_NAME(shift_arrays_for_insert)
|
|
#define RT_SHIFT_ARRAYS_AND_DELETE RT_MAKE_NAME(shift_arrays_and_delete)
|
|
#define RT_COPY_ARRAYS_FOR_INSERT RT_MAKE_NAME(copy_arrays_for_insert)
|
|
#define RT_COPY_ARRAYS_AND_DELETE RT_MAKE_NAME(copy_arrays_and_delete)
|
|
#define RT_NODE_48_IS_CHUNK_USED RT_MAKE_NAME(node_48_is_chunk_used)
|
|
#define RT_NODE_48_GET_CHILD RT_MAKE_NAME(node_48_get_child)
|
|
#define RT_NODE_256_IS_CHUNK_USED RT_MAKE_NAME(node_256_is_chunk_used)
|
|
#define RT_NODE_256_GET_CHILD RT_MAKE_NAME(node_256_get_child)
|
|
#define RT_KEY_GET_SHIFT RT_MAKE_NAME(key_get_shift)
|
|
#define RT_SHIFT_GET_MAX_VAL RT_MAKE_NAME(shift_get_max_val)
|
|
#define RT_NODE_SEARCH RT_MAKE_NAME(node_search)
|
|
#define RT_NODE_DELETE RT_MAKE_NAME(node_delete)
|
|
#define RT_NODE_INSERT RT_MAKE_NAME(node_insert)
|
|
#define RT_ADD_CHILD_4 RT_MAKE_NAME(add_child_4)
|
|
#define RT_ADD_CHILD_16 RT_MAKE_NAME(add_child_16)
|
|
#define RT_ADD_CHILD_48 RT_MAKE_NAME(add_child_48)
|
|
#define RT_ADD_CHILD_256 RT_MAKE_NAME(add_child_256)
|
|
#define RT_GROW_NODE_4 RT_MAKE_NAME(grow_node_4)
|
|
#define RT_GROW_NODE_16 RT_MAKE_NAME(grow_node_16)
|
|
#define RT_GROW_NODE_48 RT_MAKE_NAME(grow_node_48)
|
|
#define RT_REMOVE_CHILD_4 RT_MAKE_NAME(remove_child_4)
|
|
#define RT_REMOVE_CHILD_16 RT_MAKE_NAME(remove_child_16)
|
|
#define RT_REMOVE_CHILD_48 RT_MAKE_NAME(remove_child_48)
|
|
#define RT_REMOVE_CHILD_256 RT_MAKE_NAME(remove_child_256)
|
|
#define RT_SHRINK_NODE_16 RT_MAKE_NAME(shrink_child_16)
|
|
#define RT_SHRINK_NODE_48 RT_MAKE_NAME(shrink_child_48)
|
|
#define RT_SHRINK_NODE_256 RT_MAKE_NAME(shrink_child_256)
|
|
#define RT_NODE_ITERATE_NEXT RT_MAKE_NAME(node_iterate_next)
|
|
#define RT_VERIFY_NODE RT_MAKE_NAME(verify_node)
|
|
|
|
/* type declarations */
|
|
#define RT_RADIX_TREE RT_MAKE_NAME(radix_tree)
|
|
#define RT_RADIX_TREE_CONTROL RT_MAKE_NAME(radix_tree_control)
|
|
#define RT_ITER RT_MAKE_NAME(iter)
|
|
#ifdef RT_SHMEM
|
|
#define RT_HANDLE RT_MAKE_NAME(handle)
|
|
#endif
|
|
#define RT_NODE RT_MAKE_NAME(node)
|
|
#define RT_CHILD_PTR RT_MAKE_NAME(child_ptr)
|
|
#define RT_NODE_ITER RT_MAKE_NAME(node_iter)
|
|
#define RT_NODE_4 RT_MAKE_NAME(node_4)
|
|
#define RT_NODE_16 RT_MAKE_NAME(node_16)
|
|
#define RT_NODE_48 RT_MAKE_NAME(node_48)
|
|
#define RT_NODE_256 RT_MAKE_NAME(node_256)
|
|
#define RT_SIZE_CLASS RT_MAKE_NAME(size_class)
|
|
#define RT_SIZE_CLASS_ELEM RT_MAKE_NAME(size_class_elem)
|
|
#define RT_SIZE_CLASS_INFO RT_MAKE_NAME(size_class_info)
|
|
#define RT_CLASS_4 RT_MAKE_NAME(class_4)
|
|
#define RT_CLASS_16_LO RT_MAKE_NAME(class_32_min)
|
|
#define RT_CLASS_16_HI RT_MAKE_NAME(class_32_max)
|
|
#define RT_CLASS_48 RT_MAKE_NAME(class_48)
|
|
#define RT_CLASS_256 RT_MAKE_NAME(class_256)
|
|
|
|
/* generate forward declarations necessary to use the radix tree */
|
|
#ifdef RT_DECLARE
|
|
|
|
typedef struct RT_RADIX_TREE RT_RADIX_TREE;
|
|
typedef struct RT_ITER RT_ITER;
|
|
|
|
#ifdef RT_SHMEM
|
|
typedef dsa_pointer RT_HANDLE;
|
|
#endif
|
|
|
|
#ifdef RT_SHMEM
|
|
RT_SCOPE RT_RADIX_TREE *RT_CREATE(MemoryContext ctx, dsa_area *dsa, int tranche_id);
|
|
RT_SCOPE RT_RADIX_TREE *RT_ATTACH(dsa_area *dsa, dsa_pointer dp);
|
|
RT_SCOPE void RT_DETACH(RT_RADIX_TREE * tree);
|
|
RT_SCOPE RT_HANDLE RT_GET_HANDLE(RT_RADIX_TREE * tree);
|
|
RT_SCOPE void RT_LOCK_EXCLUSIVE(RT_RADIX_TREE * tree);
|
|
RT_SCOPE void RT_LOCK_SHARE(RT_RADIX_TREE * tree);
|
|
RT_SCOPE void RT_UNLOCK(RT_RADIX_TREE * tree);
|
|
#else
|
|
RT_SCOPE RT_RADIX_TREE *RT_CREATE(MemoryContext ctx);
|
|
#endif
|
|
RT_SCOPE void RT_FREE(RT_RADIX_TREE * tree);
|
|
|
|
RT_SCOPE RT_VALUE_TYPE *RT_FIND(RT_RADIX_TREE * tree, uint64 key);
|
|
RT_SCOPE bool RT_SET(RT_RADIX_TREE * tree, uint64 key, RT_VALUE_TYPE * value_p);
|
|
|
|
#ifdef RT_USE_DELETE
|
|
RT_SCOPE bool RT_DELETE(RT_RADIX_TREE * tree, uint64 key);
|
|
#endif
|
|
|
|
RT_SCOPE RT_ITER *RT_BEGIN_ITERATE(RT_RADIX_TREE * tree);
|
|
RT_SCOPE RT_VALUE_TYPE *RT_ITERATE_NEXT(RT_ITER * iter, uint64 *key_p);
|
|
RT_SCOPE void RT_END_ITERATE(RT_ITER * iter);
|
|
|
|
RT_SCOPE uint64 RT_MEMORY_USAGE(RT_RADIX_TREE * tree);
|
|
|
|
#ifdef RT_DEBUG
|
|
RT_SCOPE void RT_STATS(RT_RADIX_TREE * tree);
|
|
#endif
|
|
|
|
#endif /* RT_DECLARE */
|
|
|
|
|
|
/* generate implementation of the radix tree */
|
|
#ifdef RT_DEFINE
|
|
|
|
/* The number of bits encoded in one tree level */
|
|
#define RT_SPAN BITS_PER_BYTE
|
|
|
|
/*
|
|
* The number of possible partial keys, and thus the maximum number of
|
|
* child pointers, for a node.
|
|
*/
|
|
#define RT_NODE_MAX_SLOTS (1 << RT_SPAN)
|
|
|
|
/* Mask for extracting a chunk from a key */
|
|
#define RT_CHUNK_MASK ((1 << RT_SPAN) - 1)
|
|
|
|
/* Maximum shift needed to extract a chunk from a key */
|
|
#define RT_MAX_SHIFT RT_KEY_GET_SHIFT(UINT64_MAX)
|
|
|
|
/* Maximum level a tree can reach for a key */
|
|
#define RT_MAX_LEVEL ((sizeof(uint64) * BITS_PER_BYTE) / RT_SPAN)
|
|
|
|
/* Get a chunk from the key */
|
|
#define RT_GET_KEY_CHUNK(key, shift) ((uint8) (((key) >> (shift)) & RT_CHUNK_MASK))
|
|
|
|
/* For accessing bitmaps */
|
|
#define RT_BM_IDX(x) ((x) / BITS_PER_BITMAPWORD)
|
|
#define RT_BM_BIT(x) ((x) % BITS_PER_BITMAPWORD)
|
|
|
|
/*
|
|
* Node kinds
|
|
*
|
|
* The different node kinds are what make the tree "adaptive".
|
|
*
|
|
* Each node kind is associated with a different datatype and different
|
|
* search/set/delete/iterate algorithms adapted for its size. The largest
|
|
* kind, node256 is basically the same as a traditional radix tree,
|
|
* and would be most wasteful of memory when sparsely populated. The
|
|
* smaller nodes expend some additional CPU time to enable a smaller
|
|
* memory footprint.
|
|
*
|
|
* NOTE: There are 4 node kinds, and this should never be increased,
|
|
* for several reasons:
|
|
* 1. With 5 or more kinds, gcc tends to use a jump table for switch
|
|
* statements.
|
|
* 2. The 4 kinds can be represented with 2 bits, so we have the option
|
|
* in the future to tag the node pointer with the kind, even on
|
|
* platforms with 32-bit pointers. That would touch fewer cache lines
|
|
* during traversal and allow faster recovery from branch mispredicts.
|
|
* 3. We can have multiple size classes per node kind.
|
|
*/
|
|
#define RT_NODE_KIND_4 0x00
|
|
#define RT_NODE_KIND_16 0x01
|
|
#define RT_NODE_KIND_48 0x02
|
|
#define RT_NODE_KIND_256 0x03
|
|
#define RT_NODE_KIND_COUNT 4
|
|
|
|
/*
|
|
* Calculate the slab block size so that we can allocate at least 32 chunks
|
|
* from the block.
|
|
*/
|
|
#define RT_SLAB_BLOCK_SIZE(size) \
|
|
Max(SLAB_DEFAULT_BLOCK_SIZE, pg_nextpower2_32(size * 32))
|
|
|
|
/* Common header for all nodes */
|
|
typedef struct RT_NODE
|
|
{
|
|
/* Node kind, one per search/set algorithm */
|
|
uint8 kind;
|
|
|
|
/*
|
|
* Max capacity for the current size class. Storing this in the node
|
|
* enables multiple size classes per node kind. uint8 is sufficient for
|
|
* all node kinds, because we only use this number to test if the node
|
|
* needs to grow. Since node256 never needs to grow, we let this overflow
|
|
* to zero.
|
|
*/
|
|
uint8 fanout;
|
|
|
|
/*
|
|
* Number of children. uint8 is sufficient for all node kinds, because
|
|
* nodes shrink when this number gets lower than some threshold. Since
|
|
* node256 cannot possibly have zero children, we let the counter overflow
|
|
* and we interpret zero as "256" for this node kind.
|
|
*/
|
|
uint8 count;
|
|
} RT_NODE;
|
|
|
|
|
|
/* pointer returned by allocation */
|
|
#ifdef RT_SHMEM
|
|
#define RT_PTR_ALLOC dsa_pointer
|
|
#define RT_INVALID_PTR_ALLOC InvalidDsaPointer
|
|
#define RT_PTR_ALLOC_IS_VALID(ptr) DsaPointerIsValid(ptr)
|
|
#else
|
|
#define RT_PTR_ALLOC RT_NODE *
|
|
#define RT_INVALID_PTR_ALLOC NULL
|
|
#define RT_PTR_ALLOC_IS_VALID(ptr) PointerIsValid(ptr)
|
|
#endif
|
|
|
|
/*
|
|
* A convenience type used when we need to work with a DSA pointer as well
|
|
* as its local pointer. For local memory, both members are the same, so
|
|
* we use a union.
|
|
*/
|
|
#ifdef RT_SHMEM
|
|
typedef struct RT_CHILD_PTR
|
|
#else
|
|
typedef union RT_CHILD_PTR
|
|
#endif
|
|
{
|
|
RT_PTR_ALLOC alloc;
|
|
RT_NODE *local;
|
|
} RT_CHILD_PTR;
|
|
|
|
|
|
/*
|
|
* Helper macros and functions for value storage.
|
|
* We either embed values in the child slots of the last level
|
|
* node or store pointers to values to the child slots,
|
|
* depending on the value size.
|
|
*/
|
|
|
|
#ifdef RT_VARLEN_VALUE_SIZE
|
|
#define RT_GET_VALUE_SIZE(v) RT_VARLEN_VALUE_SIZE(v)
|
|
#else
|
|
#define RT_GET_VALUE_SIZE(v) sizeof(RT_VALUE_TYPE)
|
|
#endif
|
|
|
|
/*
|
|
* Return true if the value can be stored in the child array
|
|
* of the lowest-level node, false otherwise.
|
|
*/
|
|
static inline bool
|
|
RT_VALUE_IS_EMBEDDABLE(RT_VALUE_TYPE * value_p)
|
|
{
|
|
#ifdef RT_VARLEN_VALUE_SIZE
|
|
|
|
#ifdef RT_RUNTIME_EMBEDDABLE_VALUE
|
|
return RT_GET_VALUE_SIZE(value_p) <= sizeof(RT_PTR_ALLOC);
|
|
#else
|
|
return false;
|
|
#endif
|
|
|
|
#else
|
|
return RT_GET_VALUE_SIZE(value_p) <= sizeof(RT_PTR_ALLOC);
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* Return true if the child pointer contains the value, false
|
|
* if the child pointer is a leaf pointer.
|
|
*/
|
|
static inline bool
|
|
RT_CHILDPTR_IS_VALUE(RT_PTR_ALLOC child)
|
|
{
|
|
#ifdef RT_VARLEN_VALUE_SIZE
|
|
|
|
#ifdef RT_RUNTIME_EMBEDDABLE_VALUE
|
|
/* check for pointer tag */
|
|
#ifdef RT_SHMEM
|
|
return child & 1;
|
|
#else
|
|
return ((uintptr_t) child) & 1;
|
|
#endif
|
|
|
|
#else
|
|
return false;
|
|
#endif
|
|
|
|
#else
|
|
return sizeof(RT_VALUE_TYPE) <= sizeof(RT_PTR_ALLOC);
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* Symbols for maximum possible fanout are declared first as they are
|
|
* required to declare each node kind. The declarations of other fanout
|
|
* values are followed as they need the struct sizes of each node kind.
|
|
*/
|
|
|
|
/* max possible key chunks without struct padding */
|
|
#define RT_FANOUT_4_MAX (8 - sizeof(RT_NODE))
|
|
|
|
/* equal to two 128-bit SIMD registers, regardless of availability */
|
|
#define RT_FANOUT_16_MAX 32
|
|
|
|
/*
|
|
* This also determines the number of bits necessary for the isset array,
|
|
* so we need to be mindful of the size of bitmapword. Since bitmapword
|
|
* can be 64 bits, the only values that make sense here are 64 and 128.
|
|
* The ART paper uses at most 64 for this node kind, and one advantage
|
|
* for us is that "isset" is a single bitmapword on most platforms,
|
|
* rather than an array, allowing the compiler to get rid of loops.
|
|
*/
|
|
#define RT_FANOUT_48_MAX 64
|
|
|
|
#define RT_FANOUT_256 RT_NODE_MAX_SLOTS
|
|
|
|
/*
|
|
* Node structs, one for each "kind"
|
|
*/
|
|
|
|
/*
|
|
* node4 and node16 use one array for key chunks and another
|
|
* array of the same length for children. The keys and children
|
|
* are stored at corresponding positions, sorted by chunk.
|
|
*/
|
|
|
|
typedef struct RT_NODE_4
|
|
{
|
|
RT_NODE base;
|
|
|
|
uint8 chunks[RT_FANOUT_4_MAX];
|
|
|
|
/* number of children depends on size class */
|
|
RT_PTR_ALLOC children[FLEXIBLE_ARRAY_MEMBER];
|
|
} RT_NODE_4;
|
|
|
|
typedef struct RT_NODE_16
|
|
{
|
|
RT_NODE base;
|
|
|
|
uint8 chunks[RT_FANOUT_16_MAX];
|
|
|
|
/* number of children depends on size class */
|
|
RT_PTR_ALLOC children[FLEXIBLE_ARRAY_MEMBER];
|
|
} RT_NODE_16;
|
|
|
|
/*
|
|
* node48 uses a 256-element array indexed by key chunks. This array
|
|
* stores indexes into a second array containing the children.
|
|
*/
|
|
typedef struct RT_NODE_48
|
|
{
|
|
RT_NODE base;
|
|
|
|
/* The index of slots for each fanout */
|
|
uint8 slot_idxs[RT_NODE_MAX_SLOTS];
|
|
|
|
/* Invalid index */
|
|
#define RT_INVALID_SLOT_IDX 0xFF
|
|
|
|
/* bitmap to track which slots are in use */
|
|
bitmapword isset[RT_BM_IDX(RT_FANOUT_48_MAX)];
|
|
|
|
/* number of children depends on size class */
|
|
RT_PTR_ALLOC children[FLEXIBLE_ARRAY_MEMBER];
|
|
} RT_NODE_48;
|
|
|
|
/*
|
|
* node256 is the largest node type. This node has an array of
|
|
* children directly indexed by chunk. Unlike other node kinds,
|
|
* its array size is by definition fixed.
|
|
*/
|
|
typedef struct RT_NODE_256
|
|
{
|
|
RT_NODE base;
|
|
|
|
/* bitmap to track which slots are in use */
|
|
bitmapword isset[RT_BM_IDX(RT_FANOUT_256)];
|
|
|
|
/* slots for 256 children */
|
|
RT_PTR_ALLOC children[RT_FANOUT_256];
|
|
} RT_NODE_256;
|
|
|
|
#if defined(RT_SHMEM)
|
|
/*
|
|
* Make sure the all nodes (except for node256) fit neatly into a DSA
|
|
* size class. We assume the RT_FANOUT_4 is in the range where DSA size
|
|
* classes increment by 8 (as of PG17 up to 64 bytes), so we just hard
|
|
* code that one.
|
|
*/
|
|
|
|
#if SIZEOF_DSA_POINTER < 8
|
|
#define RT_FANOUT_16_LO ((96 - offsetof(RT_NODE_16, children)) / sizeof(RT_PTR_ALLOC))
|
|
#define RT_FANOUT_16_HI Min(RT_FANOUT_16_MAX, (160 - offsetof(RT_NODE_16, children)) / sizeof(RT_PTR_ALLOC))
|
|
#define RT_FANOUT_48 Min(RT_FANOUT_48_MAX, (512 - offsetof(RT_NODE_48, children)) / sizeof(RT_PTR_ALLOC))
|
|
#else
|
|
#define RT_FANOUT_16_LO ((160 - offsetof(RT_NODE_16, children)) / sizeof(RT_PTR_ALLOC))
|
|
#define RT_FANOUT_16_HI Min(RT_FANOUT_16_MAX, (320 - offsetof(RT_NODE_16, children)) / sizeof(RT_PTR_ALLOC))
|
|
#define RT_FANOUT_48 Min(RT_FANOUT_48_MAX, (768 - offsetof(RT_NODE_48, children)) / sizeof(RT_PTR_ALLOC))
|
|
#endif /* SIZEOF_DSA_POINTER < 8 */
|
|
|
|
#else /* ! RT_SHMEM */
|
|
|
|
/* doesn't really matter, but may as well use the namesake */
|
|
#define RT_FANOUT_16_LO 16
|
|
/* use maximum possible */
|
|
#define RT_FANOUT_16_HI RT_FANOUT_16_MAX
|
|
#define RT_FANOUT_48 RT_FANOUT_48_MAX
|
|
|
|
#endif /* RT_SHMEM */
|
|
|
|
/*
|
|
* To save memory in trees with sparse keys, it would make sense to have two
|
|
* size classes for the smallest kind (perhaps a high class of 5 and a low class
|
|
* of 2), but it would be more effective to utilize lazy expansion and
|
|
* path compression.
|
|
*/
|
|
#define RT_FANOUT_4 4
|
|
|
|
StaticAssertDecl(RT_FANOUT_4 <= RT_FANOUT_4_MAX, "watch struct padding");
|
|
StaticAssertDecl(RT_FANOUT_16_LO < RT_FANOUT_16_HI, "LO subclass bigger than HI");
|
|
StaticAssertDecl(RT_FANOUT_48 <= RT_FANOUT_48_MAX, "more slots than isset bits");
|
|
|
|
/*
|
|
* Node size classes
|
|
*
|
|
* Nodes of different kinds necessarily belong to different size classes.
|
|
* One innovation in our implementation compared to the ART paper is
|
|
* decoupling the notion of size class from kind.
|
|
*
|
|
* The size classes within a given node kind have the same underlying
|
|
* type, but a variable number of children/values. This is possible
|
|
* because each type (except node256) contains metadata that work the
|
|
* same way regardless of how many child slots there are. The nodes
|
|
* can introspect their allocated capacity at runtime.
|
|
*/
|
|
typedef enum RT_SIZE_CLASS
|
|
{
|
|
RT_CLASS_4 = 0,
|
|
RT_CLASS_16_LO,
|
|
RT_CLASS_16_HI,
|
|
RT_CLASS_48,
|
|
RT_CLASS_256
|
|
} RT_SIZE_CLASS;
|
|
|
|
/* Information for each size class */
|
|
typedef struct RT_SIZE_CLASS_ELEM
|
|
{
|
|
const char *name;
|
|
int fanout;
|
|
size_t allocsize;
|
|
} RT_SIZE_CLASS_ELEM;
|
|
|
|
|
|
static const RT_SIZE_CLASS_ELEM RT_SIZE_CLASS_INFO[] = {
|
|
[RT_CLASS_4] = {
|
|
.name = RT_STR(RT_PREFIX) "_radix_tree node4",
|
|
.fanout = RT_FANOUT_4,
|
|
.allocsize = sizeof(RT_NODE_4) + RT_FANOUT_4 * sizeof(RT_PTR_ALLOC),
|
|
},
|
|
[RT_CLASS_16_LO] = {
|
|
.name = RT_STR(RT_PREFIX) "_radix_tree node16_lo",
|
|
.fanout = RT_FANOUT_16_LO,
|
|
.allocsize = sizeof(RT_NODE_16) + RT_FANOUT_16_LO * sizeof(RT_PTR_ALLOC),
|
|
},
|
|
[RT_CLASS_16_HI] = {
|
|
.name = RT_STR(RT_PREFIX) "_radix_tree node16_hi",
|
|
.fanout = RT_FANOUT_16_HI,
|
|
.allocsize = sizeof(RT_NODE_16) + RT_FANOUT_16_HI * sizeof(RT_PTR_ALLOC),
|
|
},
|
|
[RT_CLASS_48] = {
|
|
.name = RT_STR(RT_PREFIX) "_radix_tree node48",
|
|
.fanout = RT_FANOUT_48,
|
|
.allocsize = sizeof(RT_NODE_48) + RT_FANOUT_48 * sizeof(RT_PTR_ALLOC),
|
|
},
|
|
[RT_CLASS_256] = {
|
|
.name = RT_STR(RT_PREFIX) "_radix_tree node256",
|
|
.fanout = RT_FANOUT_256,
|
|
.allocsize = sizeof(RT_NODE_256),
|
|
},
|
|
};
|
|
|
|
#define RT_NUM_SIZE_CLASSES lengthof(RT_SIZE_CLASS_INFO)
|
|
|
|
#ifdef RT_SHMEM
|
|
/* A magic value used to identify our radix tree */
|
|
#define RT_RADIX_TREE_MAGIC 0x54A48167
|
|
#endif
|
|
|
|
/* Contains the actual tree, plus ancillary info */
|
|
typedef struct RT_RADIX_TREE_CONTROL
|
|
{
|
|
#ifdef RT_SHMEM
|
|
RT_HANDLE handle;
|
|
uint32 magic;
|
|
LWLock lock;
|
|
#endif
|
|
|
|
RT_PTR_ALLOC root;
|
|
uint64 max_val;
|
|
int64 num_keys;
|
|
int start_shift;
|
|
|
|
/* statistics */
|
|
#ifdef RT_DEBUG
|
|
int64 num_nodes[RT_NUM_SIZE_CLASSES];
|
|
int64 num_leaves;
|
|
#endif
|
|
} RT_RADIX_TREE_CONTROL;
|
|
|
|
/* Entry point for allocating and accessing the tree */
|
|
struct RT_RADIX_TREE
|
|
{
|
|
MemoryContext context;
|
|
|
|
/* pointing to either local memory or DSA */
|
|
RT_RADIX_TREE_CONTROL *ctl;
|
|
|
|
#ifdef RT_SHMEM
|
|
dsa_area *dsa;
|
|
#else
|
|
MemoryContextData *node_slabs[RT_NUM_SIZE_CLASSES];
|
|
|
|
/* leaf_context is used only for single-value leaves */
|
|
MemoryContextData *leaf_context;
|
|
#endif
|
|
MemoryContextData *iter_context;
|
|
};
|
|
|
|
/*
|
|
* Iteration support.
|
|
*
|
|
* Iterating over the radix tree produces each key/value pair in ascending
|
|
* order of the key.
|
|
*/
|
|
|
|
/* state for iterating over a single node */
|
|
typedef struct RT_NODE_ITER
|
|
{
|
|
RT_CHILD_PTR node;
|
|
|
|
/*
|
|
* The next index of the chunk array in RT_NODE_KIND_4 and RT_NODE_KIND_16
|
|
* nodes, or the next chunk in RT_NODE_KIND_48 and RT_NODE_KIND_256 nodes.
|
|
* 0 for the initial value.
|
|
*/
|
|
int idx;
|
|
} RT_NODE_ITER;
|
|
|
|
/* state for iterating over the whole radix tree */
|
|
struct RT_ITER
|
|
{
|
|
RT_RADIX_TREE *tree;
|
|
|
|
/*
|
|
* A stack to track iteration for each level. Level 0 is the lowest (or
|
|
* leaf) level
|
|
*/
|
|
RT_NODE_ITER node_iters[RT_MAX_LEVEL];
|
|
int top_level;
|
|
int cur_level;
|
|
|
|
/* The key constructed during iteration */
|
|
uint64 key;
|
|
};
|
|
|
|
|
|
/* verification (available only in assert-enabled builds) */
|
|
static void RT_VERIFY_NODE(RT_NODE * node);
|
|
|
|
static inline void
|
|
RT_PTR_SET_LOCAL(RT_RADIX_TREE * tree, RT_CHILD_PTR * node)
|
|
{
|
|
#ifdef RT_SHMEM
|
|
node->local = dsa_get_address(tree->dsa, node->alloc);
|
|
#endif
|
|
}
|
|
|
|
/* Convenience functions for node48 and node256 */
|
|
|
|
/* Return true if there is an entry for "chunk" */
|
|
static inline bool
|
|
RT_NODE_48_IS_CHUNK_USED(RT_NODE_48 * node, uint8 chunk)
|
|
{
|
|
return node->slot_idxs[chunk] != RT_INVALID_SLOT_IDX;
|
|
}
|
|
|
|
static inline RT_PTR_ALLOC *
|
|
RT_NODE_48_GET_CHILD(RT_NODE_48 * node, uint8 chunk)
|
|
{
|
|
return &node->children[node->slot_idxs[chunk]];
|
|
}
|
|
|
|
/* Return true if there is an entry for "chunk" */
|
|
static inline bool
|
|
RT_NODE_256_IS_CHUNK_USED(RT_NODE_256 * node, uint8 chunk)
|
|
{
|
|
int idx = RT_BM_IDX(chunk);
|
|
int bitnum = RT_BM_BIT(chunk);
|
|
|
|
return (node->isset[idx] & ((bitmapword) 1 << bitnum)) != 0;
|
|
}
|
|
|
|
static inline RT_PTR_ALLOC *
|
|
RT_NODE_256_GET_CHILD(RT_NODE_256 * node, uint8 chunk)
|
|
{
|
|
Assert(RT_NODE_256_IS_CHUNK_USED(node, chunk));
|
|
return &node->children[chunk];
|
|
}
|
|
|
|
/*
|
|
* Return the smallest shift that will allowing storing the given key.
|
|
*/
|
|
static inline int
|
|
RT_KEY_GET_SHIFT(uint64 key)
|
|
{
|
|
if (key == 0)
|
|
return 0;
|
|
else
|
|
return (pg_leftmost_one_pos64(key) / RT_SPAN) * RT_SPAN;
|
|
}
|
|
|
|
/*
|
|
* Return the max value that can be stored in the tree with the given shift.
|
|
*/
|
|
static uint64
|
|
RT_SHIFT_GET_MAX_VAL(int shift)
|
|
{
|
|
if (shift == RT_MAX_SHIFT)
|
|
return UINT64_MAX;
|
|
else
|
|
return (UINT64CONST(1) << (shift + RT_SPAN)) - 1;
|
|
}
|
|
|
|
/*
|
|
* Allocate a new node with the given node kind and size class.
|
|
*/
|
|
static inline RT_CHILD_PTR
|
|
RT_ALLOC_NODE(RT_RADIX_TREE * tree, const uint8 kind, const RT_SIZE_CLASS size_class)
|
|
{
|
|
RT_CHILD_PTR allocnode;
|
|
RT_NODE *node;
|
|
size_t allocsize;
|
|
|
|
allocsize = RT_SIZE_CLASS_INFO[size_class].allocsize;
|
|
|
|
#ifdef RT_SHMEM
|
|
allocnode.alloc = dsa_allocate(tree->dsa, allocsize);
|
|
#else
|
|
allocnode.alloc = (RT_PTR_ALLOC) MemoryContextAlloc(tree->node_slabs[size_class],
|
|
allocsize);
|
|
#endif
|
|
|
|
RT_PTR_SET_LOCAL(tree, &allocnode);
|
|
node = allocnode.local;
|
|
|
|
/* initialize contents */
|
|
|
|
memset(node, 0, sizeof(RT_NODE));
|
|
switch (kind)
|
|
{
|
|
case RT_NODE_KIND_4:
|
|
case RT_NODE_KIND_16:
|
|
break;
|
|
case RT_NODE_KIND_48:
|
|
{
|
|
RT_NODE_48 *n48 = (RT_NODE_48 *) node;
|
|
|
|
memset(n48->isset, 0, sizeof(n48->isset));
|
|
memset(n48->slot_idxs, RT_INVALID_SLOT_IDX, sizeof(n48->slot_idxs));
|
|
break;
|
|
}
|
|
case RT_NODE_KIND_256:
|
|
{
|
|
RT_NODE_256 *n256 = (RT_NODE_256 *) node;
|
|
|
|
memset(n256->isset, 0, sizeof(n256->isset));
|
|
break;
|
|
}
|
|
default:
|
|
pg_unreachable();
|
|
}
|
|
|
|
node->kind = kind;
|
|
|
|
/*
|
|
* For node256, this will actually overflow to zero, but that's okay
|
|
* because that node doesn't need to introspect this value.
|
|
*/
|
|
node->fanout = RT_SIZE_CLASS_INFO[size_class].fanout;
|
|
|
|
#ifdef RT_DEBUG
|
|
/* update the statistics */
|
|
tree->ctl->num_nodes[size_class]++;
|
|
#endif
|
|
|
|
return allocnode;
|
|
}
|
|
|
|
/*
|
|
* Allocate a new leaf.
|
|
*/
|
|
static RT_CHILD_PTR
|
|
RT_ALLOC_LEAF(RT_RADIX_TREE * tree, size_t allocsize)
|
|
{
|
|
RT_CHILD_PTR leaf;
|
|
|
|
#ifdef RT_SHMEM
|
|
leaf.alloc = dsa_allocate(tree->dsa, allocsize);
|
|
RT_PTR_SET_LOCAL(tree, &leaf);
|
|
#else
|
|
leaf.alloc = (RT_PTR_ALLOC) MemoryContextAlloc(tree->leaf_context, allocsize);
|
|
#endif
|
|
|
|
#ifdef RT_DEBUG
|
|
tree->ctl->num_leaves++;
|
|
#endif
|
|
|
|
return leaf;
|
|
}
|
|
|
|
/*
|
|
* Copy relevant members of the node header.
|
|
* This is a separate function in case other fields are added.
|
|
*/
|
|
static inline void
|
|
RT_COPY_COMMON(RT_CHILD_PTR newnode, RT_CHILD_PTR oldnode)
|
|
{
|
|
(newnode.local)->count = (oldnode.local)->count;
|
|
}
|
|
|
|
/* Free the given node */
|
|
static void
|
|
RT_FREE_NODE(RT_RADIX_TREE * tree, RT_CHILD_PTR node)
|
|
{
|
|
#ifdef RT_DEBUG
|
|
int i;
|
|
|
|
/* update the statistics */
|
|
|
|
for (i = 0; i < RT_NUM_SIZE_CLASSES; i++)
|
|
{
|
|
if ((node.local)->fanout == RT_SIZE_CLASS_INFO[i].fanout)
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* The fanout of node256 will appear to be zero within the node header
|
|
* because of overflow, so we need an extra check here.
|
|
*/
|
|
if (i == RT_NUM_SIZE_CLASSES)
|
|
i = RT_CLASS_256;
|
|
|
|
tree->ctl->num_nodes[i]--;
|
|
Assert(tree->ctl->num_nodes[i] >= 0);
|
|
#endif
|
|
|
|
#ifdef RT_SHMEM
|
|
dsa_free(tree->dsa, node.alloc);
|
|
#else
|
|
pfree(node.alloc);
|
|
#endif
|
|
}
|
|
|
|
static inline void
|
|
RT_FREE_LEAF(RT_RADIX_TREE * tree, RT_PTR_ALLOC leaf)
|
|
{
|
|
Assert(leaf != tree->ctl->root);
|
|
|
|
#ifdef RT_DEBUG
|
|
/* update the statistics */
|
|
tree->ctl->num_leaves--;
|
|
Assert(tree->ctl->num_leaves >= 0);
|
|
#endif
|
|
|
|
#ifdef RT_SHMEM
|
|
dsa_free(tree->dsa, leaf);
|
|
#else
|
|
pfree(leaf);
|
|
#endif
|
|
}
|
|
|
|
/***************** SEARCH *****************/
|
|
|
|
/*
|
|
* Return the address of the child corresponding to "chunk",
|
|
* or NULL if there is no such element.
|
|
*/
|
|
static inline RT_PTR_ALLOC *
|
|
RT_NODE_16_SEARCH_EQ(RT_NODE_16 * node, uint8 chunk)
|
|
{
|
|
int count = node->base.count;
|
|
#ifndef USE_NO_SIMD
|
|
Vector8 spread_chunk;
|
|
Vector8 haystack1;
|
|
Vector8 haystack2;
|
|
Vector8 cmp1;
|
|
Vector8 cmp2;
|
|
uint32 bitfield;
|
|
RT_PTR_ALLOC *slot_simd = NULL;
|
|
#endif
|
|
|
|
#if defined(USE_NO_SIMD) || defined(USE_ASSERT_CHECKING)
|
|
RT_PTR_ALLOC *slot = NULL;
|
|
|
|
for (int i = 0; i < count; i++)
|
|
{
|
|
if (node->chunks[i] == chunk)
|
|
{
|
|
slot = &node->children[i];
|
|
break;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#ifndef USE_NO_SIMD
|
|
/* replicate the search key */
|
|
spread_chunk = vector8_broadcast(chunk);
|
|
|
|
/* compare to all 32 keys stored in the node */
|
|
vector8_load(&haystack1, &node->chunks[0]);
|
|
vector8_load(&haystack2, &node->chunks[sizeof(Vector8)]);
|
|
cmp1 = vector8_eq(spread_chunk, haystack1);
|
|
cmp2 = vector8_eq(spread_chunk, haystack2);
|
|
|
|
/* convert comparison to a bitfield */
|
|
bitfield = vector8_highbit_mask(cmp1) | (vector8_highbit_mask(cmp2) << sizeof(Vector8));
|
|
|
|
/* mask off invalid entries */
|
|
bitfield &= ((UINT64CONST(1) << count) - 1);
|
|
|
|
/* convert bitfield to index by counting trailing zeros */
|
|
if (bitfield)
|
|
slot_simd = &node->children[pg_rightmost_one_pos32(bitfield)];
|
|
|
|
Assert(slot_simd == slot);
|
|
return slot_simd;
|
|
#else
|
|
return slot;
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* Search for the child pointer corresponding to "key" in the given node.
|
|
*
|
|
* Return child if the key is found, otherwise return NULL.
|
|
*/
|
|
static inline RT_PTR_ALLOC *
|
|
RT_NODE_SEARCH(RT_NODE * node, uint8 chunk)
|
|
{
|
|
/* Make sure we already converted to local pointer */
|
|
Assert(node != NULL);
|
|
|
|
switch (node->kind)
|
|
{
|
|
case RT_NODE_KIND_4:
|
|
{
|
|
RT_NODE_4 *n4 = (RT_NODE_4 *) node;
|
|
|
|
for (int i = 0; i < n4->base.count; i++)
|
|
{
|
|
if (n4->chunks[i] == chunk)
|
|
return &n4->children[i];
|
|
}
|
|
return NULL;
|
|
}
|
|
case RT_NODE_KIND_16:
|
|
return RT_NODE_16_SEARCH_EQ((RT_NODE_16 *) node, chunk);
|
|
case RT_NODE_KIND_48:
|
|
{
|
|
RT_NODE_48 *n48 = (RT_NODE_48 *) node;
|
|
int slotpos = n48->slot_idxs[chunk];
|
|
|
|
if (slotpos == RT_INVALID_SLOT_IDX)
|
|
return NULL;
|
|
|
|
return RT_NODE_48_GET_CHILD(n48, chunk);
|
|
}
|
|
case RT_NODE_KIND_256:
|
|
{
|
|
RT_NODE_256 *n256 = (RT_NODE_256 *) node;
|
|
|
|
if (!RT_NODE_256_IS_CHUNK_USED(n256, chunk))
|
|
return NULL;
|
|
|
|
return RT_NODE_256_GET_CHILD(n256, chunk);
|
|
}
|
|
default:
|
|
pg_unreachable();
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Search the given key in the radix tree. Return the pointer to the value if found,
|
|
* otherwise return NULL.
|
|
*
|
|
* Since the function returns a pointer (to support variable-length values),
|
|
* the caller is responsible for locking until it's finished with the value.
|
|
*/
|
|
RT_SCOPE RT_VALUE_TYPE *
|
|
RT_FIND(RT_RADIX_TREE * tree, uint64 key)
|
|
{
|
|
RT_CHILD_PTR node;
|
|
RT_PTR_ALLOC *slot = NULL;
|
|
int shift;
|
|
|
|
#ifdef RT_SHMEM
|
|
Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
|
|
#endif
|
|
|
|
if (key > tree->ctl->max_val)
|
|
return NULL;
|
|
|
|
Assert(RT_PTR_ALLOC_IS_VALID(tree->ctl->root));
|
|
node.alloc = tree->ctl->root;
|
|
shift = tree->ctl->start_shift;
|
|
|
|
/* Descend the tree */
|
|
while (shift >= 0)
|
|
{
|
|
RT_PTR_SET_LOCAL(tree, &node);
|
|
slot = RT_NODE_SEARCH(node.local, RT_GET_KEY_CHUNK(key, shift));
|
|
if (slot == NULL)
|
|
return NULL;
|
|
|
|
node.alloc = *slot;
|
|
shift -= RT_SPAN;
|
|
}
|
|
|
|
if (RT_CHILDPTR_IS_VALUE(*slot))
|
|
return (RT_VALUE_TYPE *) slot;
|
|
else
|
|
{
|
|
RT_PTR_SET_LOCAL(tree, &node);
|
|
return (RT_VALUE_TYPE *) node.local;
|
|
}
|
|
}
|
|
|
|
/***************** INSERTION *****************/
|
|
|
|
#define RT_NODE_MUST_GROW(node) \
|
|
((node)->count == (node)->fanout)
|
|
|
|
/*
|
|
* Return index of the chunk and slot arrays for inserting into the node,
|
|
* such that the arrays remain ordered.
|
|
*/
|
|
static inline int
|
|
RT_NODE_4_GET_INSERTPOS(RT_NODE_4 * node, uint8 chunk, int count)
|
|
{
|
|
int idx;
|
|
|
|
for (idx = 0; idx < count; idx++)
|
|
{
|
|
if (node->chunks[idx] >= chunk)
|
|
break;
|
|
}
|
|
|
|
return idx;
|
|
}
|
|
|
|
/*
|
|
* Return index of the chunk and slot arrays for inserting into the node,
|
|
* such that the arrays remain ordered.
|
|
*/
|
|
static inline int
|
|
RT_NODE_16_GET_INSERTPOS(RT_NODE_16 * node, uint8 chunk)
|
|
{
|
|
int count = node->base.count;
|
|
#if defined(USE_NO_SIMD) || defined(USE_ASSERT_CHECKING)
|
|
int index;
|
|
#endif
|
|
|
|
#ifndef USE_NO_SIMD
|
|
Vector8 spread_chunk;
|
|
Vector8 haystack1;
|
|
Vector8 haystack2;
|
|
Vector8 cmp1;
|
|
Vector8 cmp2;
|
|
Vector8 min1;
|
|
Vector8 min2;
|
|
uint32 bitfield;
|
|
int index_simd;
|
|
#endif
|
|
|
|
/*
|
|
* First compare the last element. There are two reasons to branch here:
|
|
*
|
|
* 1) A realistic pattern is inserting ordered keys. In that case,
|
|
* non-SIMD platforms must do a linear search to the last chunk to find
|
|
* the insert position. This will get slower as the node fills up.
|
|
*
|
|
* 2) On SIMD platforms, we must branch anyway to make sure we don't bit
|
|
* scan an empty bitfield. Doing the branch here eliminates some work that
|
|
* we might otherwise throw away.
|
|
*/
|
|
Assert(count > 0);
|
|
if (node->chunks[count - 1] < chunk)
|
|
return count;
|
|
|
|
#if defined(USE_NO_SIMD) || defined(USE_ASSERT_CHECKING)
|
|
|
|
for (index = 0; index < count; index++)
|
|
{
|
|
if (node->chunks[index] > chunk)
|
|
break;
|
|
}
|
|
#endif
|
|
|
|
#ifndef USE_NO_SIMD
|
|
|
|
/*
|
|
* This is a bit more complicated than RT_NODE_16_SEARCH_EQ(), because no
|
|
* unsigned uint8 comparison instruction exists, at least for SSE2. So we
|
|
* need to play some trickery using vector8_min() to effectively get >=.
|
|
* There'll never be any equal elements in current uses, but that's what
|
|
* we get here...
|
|
*/
|
|
spread_chunk = vector8_broadcast(chunk);
|
|
vector8_load(&haystack1, &node->chunks[0]);
|
|
vector8_load(&haystack2, &node->chunks[sizeof(Vector8)]);
|
|
min1 = vector8_min(spread_chunk, haystack1);
|
|
min2 = vector8_min(spread_chunk, haystack2);
|
|
cmp1 = vector8_eq(spread_chunk, min1);
|
|
cmp2 = vector8_eq(spread_chunk, min2);
|
|
bitfield = vector8_highbit_mask(cmp1) | (vector8_highbit_mask(cmp2) << sizeof(Vector8));
|
|
|
|
Assert((bitfield & ((UINT64CONST(1) << count) - 1)) != 0);
|
|
index_simd = pg_rightmost_one_pos32(bitfield);
|
|
|
|
Assert(index_simd == index);
|
|
return index_simd;
|
|
#else
|
|
return index;
|
|
#endif
|
|
}
|
|
|
|
/* Shift the elements right at "insertpos" by one */
|
|
static inline void
|
|
RT_SHIFT_ARRAYS_FOR_INSERT(uint8 *chunks, RT_PTR_ALLOC * children, int count, int insertpos)
|
|
{
|
|
/*
|
|
* This is basically a memmove, but written in a simple loop for speed on
|
|
* small inputs.
|
|
*/
|
|
for (int i = count - 1; i >= insertpos; i--)
|
|
{
|
|
/* workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101481 */
|
|
#ifdef __GNUC__
|
|
__asm__("");
|
|
#endif
|
|
chunks[i + 1] = chunks[i];
|
|
children[i + 1] = children[i];
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Copy both chunk and slot arrays into the right
|
|
* place. The caller is responsible for inserting the new element.
|
|
*/
|
|
static inline void
|
|
RT_COPY_ARRAYS_FOR_INSERT(uint8 *dst_chunks, RT_PTR_ALLOC * dst_children,
|
|
uint8 *src_chunks, RT_PTR_ALLOC * src_children,
|
|
int count, int insertpos)
|
|
{
|
|
for (int i = 0; i < count; i++)
|
|
{
|
|
int sourceidx = i;
|
|
|
|
/* use a branch-free computation to skip the index of the new element */
|
|
int destidx = i + (i >= insertpos);
|
|
|
|
dst_chunks[destidx] = src_chunks[sourceidx];
|
|
dst_children[destidx] = src_children[sourceidx];
|
|
}
|
|
}
|
|
|
|
static inline RT_PTR_ALLOC *
|
|
RT_ADD_CHILD_256(RT_RADIX_TREE * tree, RT_CHILD_PTR node, uint8 chunk)
|
|
{
|
|
RT_NODE_256 *n256 = (RT_NODE_256 *) node.local;
|
|
int idx = RT_BM_IDX(chunk);
|
|
int bitnum = RT_BM_BIT(chunk);
|
|
|
|
/* Mark the slot used for "chunk" */
|
|
n256->isset[idx] |= ((bitmapword) 1 << bitnum);
|
|
|
|
n256->base.count++;
|
|
RT_VERIFY_NODE((RT_NODE *) n256);
|
|
|
|
return RT_NODE_256_GET_CHILD(n256, chunk);
|
|
}
|
|
|
|
static pg_noinline RT_PTR_ALLOC *
|
|
RT_GROW_NODE_48(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node,
|
|
uint8 chunk)
|
|
{
|
|
RT_NODE_48 *n48 = (RT_NODE_48 *) node.local;
|
|
RT_CHILD_PTR newnode;
|
|
RT_NODE_256 *new256;
|
|
int i = 0;
|
|
|
|
/* initialize new node */
|
|
newnode = RT_ALLOC_NODE(tree, RT_NODE_KIND_256, RT_CLASS_256);
|
|
new256 = (RT_NODE_256 *) newnode.local;
|
|
|
|
/* copy over the entries */
|
|
RT_COPY_COMMON(newnode, node);
|
|
for (int word_num = 0; word_num < RT_BM_IDX(RT_NODE_MAX_SLOTS); word_num++)
|
|
{
|
|
bitmapword bitmap = 0;
|
|
|
|
/*
|
|
* Bit manipulation is a surprisingly large portion of the overhead in
|
|
* the naive implementation. Doing stores word-at-a-time removes a lot
|
|
* of that overhead.
|
|
*/
|
|
for (int bit = 0; bit < BITS_PER_BITMAPWORD; bit++)
|
|
{
|
|
uint8 offset = n48->slot_idxs[i];
|
|
|
|
if (offset != RT_INVALID_SLOT_IDX)
|
|
{
|
|
bitmap |= ((bitmapword) 1 << bit);
|
|
new256->children[i] = n48->children[offset];
|
|
}
|
|
|
|
i++;
|
|
}
|
|
|
|
new256->isset[word_num] = bitmap;
|
|
}
|
|
|
|
/* free old node and update reference in parent */
|
|
*parent_slot = newnode.alloc;
|
|
RT_FREE_NODE(tree, node);
|
|
|
|
return RT_ADD_CHILD_256(tree, newnode, chunk);
|
|
}
|
|
|
|
static inline RT_PTR_ALLOC *
|
|
RT_ADD_CHILD_48(RT_RADIX_TREE * tree, RT_CHILD_PTR node, uint8 chunk)
|
|
{
|
|
RT_NODE_48 *n48 = (RT_NODE_48 *) node.local;
|
|
int insertpos;
|
|
int idx = 0;
|
|
bitmapword w,
|
|
inverse;
|
|
|
|
/* get the first word with at least one bit not set */
|
|
for (int i = 0; i < RT_BM_IDX(RT_FANOUT_48_MAX); i++)
|
|
{
|
|
w = n48->isset[i];
|
|
if (w < ~((bitmapword) 0))
|
|
{
|
|
idx = i;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* To get the first unset bit in w, get the first set bit in ~w */
|
|
inverse = ~w;
|
|
insertpos = idx * BITS_PER_BITMAPWORD;
|
|
insertpos += bmw_rightmost_one_pos(inverse);
|
|
Assert(insertpos < n48->base.fanout);
|
|
|
|
/* mark the slot used by setting the rightmost zero bit */
|
|
n48->isset[idx] |= w + 1;
|
|
|
|
/* insert new chunk into place */
|
|
n48->slot_idxs[chunk] = insertpos;
|
|
|
|
n48->base.count++;
|
|
RT_VERIFY_NODE((RT_NODE *) n48);
|
|
|
|
return &n48->children[insertpos];
|
|
}
|
|
|
|
static pg_noinline RT_PTR_ALLOC *
|
|
RT_GROW_NODE_16(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node,
|
|
uint8 chunk)
|
|
{
|
|
RT_NODE_16 *n16 = (RT_NODE_16 *) node.local;
|
|
int insertpos;
|
|
|
|
if (n16->base.fanout < RT_FANOUT_16_HI)
|
|
{
|
|
RT_CHILD_PTR newnode;
|
|
RT_NODE_16 *new16;
|
|
|
|
Assert(n16->base.fanout == RT_FANOUT_16_LO);
|
|
|
|
/* initialize new node */
|
|
newnode = RT_ALLOC_NODE(tree, RT_NODE_KIND_16, RT_CLASS_16_HI);
|
|
new16 = (RT_NODE_16 *) newnode.local;
|
|
|
|
/* copy over existing entries */
|
|
RT_COPY_COMMON(newnode, node);
|
|
Assert(n16->base.count == RT_FANOUT_16_LO);
|
|
insertpos = RT_NODE_16_GET_INSERTPOS(n16, chunk);
|
|
RT_COPY_ARRAYS_FOR_INSERT(new16->chunks, new16->children,
|
|
n16->chunks, n16->children,
|
|
RT_FANOUT_16_LO, insertpos);
|
|
|
|
/* insert new chunk into place */
|
|
new16->chunks[insertpos] = chunk;
|
|
|
|
new16->base.count++;
|
|
RT_VERIFY_NODE((RT_NODE *) new16);
|
|
|
|
/* free old node and update references */
|
|
RT_FREE_NODE(tree, node);
|
|
*parent_slot = newnode.alloc;
|
|
|
|
return &new16->children[insertpos];
|
|
}
|
|
else
|
|
{
|
|
RT_CHILD_PTR newnode;
|
|
RT_NODE_48 *new48;
|
|
int idx,
|
|
bit;
|
|
|
|
Assert(n16->base.fanout == RT_FANOUT_16_HI);
|
|
|
|
/* initialize new node */
|
|
newnode = RT_ALLOC_NODE(tree, RT_NODE_KIND_48, RT_CLASS_48);
|
|
new48 = (RT_NODE_48 *) newnode.local;
|
|
|
|
/* copy over the entries */
|
|
RT_COPY_COMMON(newnode, node);
|
|
for (int i = 0; i < RT_FANOUT_16_HI; i++)
|
|
new48->slot_idxs[n16->chunks[i]] = i;
|
|
memcpy(&new48->children[0], &n16->children[0], RT_FANOUT_16_HI * sizeof(new48->children[0]));
|
|
|
|
/*
|
|
* Since we just copied a dense array, we can fill "isset" using a
|
|
* single store, provided the length of that array is at most the
|
|
* number of bits in a bitmapword.
|
|
*/
|
|
Assert(RT_FANOUT_16_HI <= BITS_PER_BITMAPWORD);
|
|
new48->isset[0] = (bitmapword) (((uint64) 1 << RT_FANOUT_16_HI) - 1);
|
|
|
|
/* put the new child at the end of the copied entries */
|
|
insertpos = RT_FANOUT_16_HI;
|
|
idx = RT_BM_IDX(insertpos);
|
|
bit = RT_BM_BIT(insertpos);
|
|
|
|
/* mark the slot used */
|
|
new48->isset[idx] |= ((bitmapword) 1 << bit);
|
|
|
|
/* insert new chunk into place */
|
|
new48->slot_idxs[chunk] = insertpos;
|
|
|
|
new48->base.count++;
|
|
RT_VERIFY_NODE((RT_NODE *) new48);
|
|
|
|
/* free old node and update reference in parent */
|
|
*parent_slot = newnode.alloc;
|
|
RT_FREE_NODE(tree, node);
|
|
|
|
return &new48->children[insertpos];
|
|
}
|
|
}
|
|
|
|
static inline RT_PTR_ALLOC *
|
|
RT_ADD_CHILD_16(RT_RADIX_TREE * tree, RT_CHILD_PTR node, uint8 chunk)
|
|
{
|
|
RT_NODE_16 *n16 = (RT_NODE_16 *) node.local;
|
|
int insertpos = RT_NODE_16_GET_INSERTPOS(n16, chunk);
|
|
|
|
/* shift chunks and children */
|
|
RT_SHIFT_ARRAYS_FOR_INSERT(n16->chunks, n16->children,
|
|
n16->base.count, insertpos);
|
|
|
|
/* insert new chunk into place */
|
|
n16->chunks[insertpos] = chunk;
|
|
|
|
n16->base.count++;
|
|
RT_VERIFY_NODE((RT_NODE *) n16);
|
|
|
|
return &n16->children[insertpos];
|
|
}
|
|
|
|
static pg_noinline RT_PTR_ALLOC *
|
|
RT_GROW_NODE_4(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node,
|
|
uint8 chunk)
|
|
{
|
|
RT_NODE_4 *n4 = (RT_NODE_4 *) (node.local);
|
|
RT_CHILD_PTR newnode;
|
|
RT_NODE_16 *new16;
|
|
int insertpos;
|
|
|
|
/* initialize new node */
|
|
newnode = RT_ALLOC_NODE(tree, RT_NODE_KIND_16, RT_CLASS_16_LO);
|
|
new16 = (RT_NODE_16 *) newnode.local;
|
|
|
|
/* copy over existing entries */
|
|
RT_COPY_COMMON(newnode, node);
|
|
Assert(n4->base.count == RT_FANOUT_4);
|
|
insertpos = RT_NODE_4_GET_INSERTPOS(n4, chunk, RT_FANOUT_4);
|
|
RT_COPY_ARRAYS_FOR_INSERT(new16->chunks, new16->children,
|
|
n4->chunks, n4->children,
|
|
RT_FANOUT_4, insertpos);
|
|
|
|
/* insert new chunk into place */
|
|
new16->chunks[insertpos] = chunk;
|
|
|
|
new16->base.count++;
|
|
RT_VERIFY_NODE((RT_NODE *) new16);
|
|
|
|
/* free old node and update reference in parent */
|
|
*parent_slot = newnode.alloc;
|
|
RT_FREE_NODE(tree, node);
|
|
|
|
return &new16->children[insertpos];
|
|
}
|
|
|
|
static inline RT_PTR_ALLOC *
|
|
RT_ADD_CHILD_4(RT_RADIX_TREE * tree, RT_CHILD_PTR node, uint8 chunk)
|
|
{
|
|
RT_NODE_4 *n4 = (RT_NODE_4 *) (node.local);
|
|
int count = n4->base.count;
|
|
int insertpos = RT_NODE_4_GET_INSERTPOS(n4, chunk, count);
|
|
|
|
/* shift chunks and children */
|
|
RT_SHIFT_ARRAYS_FOR_INSERT(n4->chunks, n4->children,
|
|
count, insertpos);
|
|
|
|
/* insert new chunk into place */
|
|
n4->chunks[insertpos] = chunk;
|
|
|
|
n4->base.count++;
|
|
RT_VERIFY_NODE((RT_NODE *) n4);
|
|
|
|
return &n4->children[insertpos];
|
|
}
|
|
|
|
/*
|
|
* Reserve slot in "node"'s child array. The caller will populate it
|
|
* with the actual child pointer.
|
|
*
|
|
* "parent_slot" is the address of the parent's child pointer to "node".
|
|
* If the node we're inserting into needs to grow, we update the parent's
|
|
* child pointer with the pointer to the new larger node.
|
|
*/
|
|
static inline RT_PTR_ALLOC *
|
|
RT_NODE_INSERT(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node,
|
|
uint8 chunk)
|
|
{
|
|
RT_NODE *n = node.local;
|
|
|
|
switch (n->kind)
|
|
{
|
|
case RT_NODE_KIND_4:
|
|
{
|
|
if (unlikely(RT_NODE_MUST_GROW(n)))
|
|
return RT_GROW_NODE_4(tree, parent_slot, node, chunk);
|
|
|
|
return RT_ADD_CHILD_4(tree, node, chunk);
|
|
}
|
|
case RT_NODE_KIND_16:
|
|
{
|
|
if (unlikely(RT_NODE_MUST_GROW(n)))
|
|
return RT_GROW_NODE_16(tree, parent_slot, node, chunk);
|
|
|
|
return RT_ADD_CHILD_16(tree, node, chunk);
|
|
}
|
|
case RT_NODE_KIND_48:
|
|
{
|
|
if (unlikely(RT_NODE_MUST_GROW(n)))
|
|
return RT_GROW_NODE_48(tree, parent_slot, node, chunk);
|
|
|
|
return RT_ADD_CHILD_48(tree, node, chunk);
|
|
}
|
|
case RT_NODE_KIND_256:
|
|
return RT_ADD_CHILD_256(tree, node, chunk);
|
|
default:
|
|
pg_unreachable();
|
|
}
|
|
}
|
|
|
|
/*
|
|
* The radix tree doesn't have sufficient height. Put new node(s) on top,
|
|
* and move the old node below it.
|
|
*/
|
|
static pg_noinline void
|
|
RT_EXTEND_UP(RT_RADIX_TREE * tree, uint64 key)
|
|
{
|
|
int target_shift = RT_KEY_GET_SHIFT(key);
|
|
int shift = tree->ctl->start_shift;
|
|
|
|
Assert(shift < target_shift);
|
|
|
|
/* Grow tree upwards until start shift can accommodate the key */
|
|
while (shift < target_shift)
|
|
{
|
|
RT_CHILD_PTR node;
|
|
RT_NODE_4 *n4;
|
|
|
|
node = RT_ALLOC_NODE(tree, RT_NODE_KIND_4, RT_CLASS_4);
|
|
n4 = (RT_NODE_4 *) node.local;
|
|
n4->base.count = 1;
|
|
n4->chunks[0] = 0;
|
|
n4->children[0] = tree->ctl->root;
|
|
|
|
/* Update the root */
|
|
tree->ctl->root = node.alloc;
|
|
|
|
shift += RT_SPAN;
|
|
}
|
|
|
|
tree->ctl->max_val = RT_SHIFT_GET_MAX_VAL(target_shift);
|
|
tree->ctl->start_shift = target_shift;
|
|
}
|
|
|
|
/*
|
|
* Insert a chain of nodes until we reach the lowest level,
|
|
* and return the address of a slot to be filled further up
|
|
* the call stack.
|
|
*/
|
|
static pg_noinline RT_PTR_ALLOC *
|
|
RT_EXTEND_DOWN(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, uint64 key, int shift)
|
|
{
|
|
RT_CHILD_PTR node,
|
|
child;
|
|
RT_NODE_4 *n4;
|
|
|
|
/*
|
|
* The child pointer of the first node in the chain goes in the
|
|
* caller-provided slot.
|
|
*/
|
|
child = RT_ALLOC_NODE(tree, RT_NODE_KIND_4, RT_CLASS_4);
|
|
*parent_slot = child.alloc;
|
|
|
|
node = child;
|
|
shift -= RT_SPAN;
|
|
|
|
while (shift > 0)
|
|
{
|
|
child = RT_ALLOC_NODE(tree, RT_NODE_KIND_4, RT_CLASS_4);
|
|
|
|
/* We open-code the insertion ourselves, for speed. */
|
|
n4 = (RT_NODE_4 *) node.local;
|
|
n4->base.count = 1;
|
|
n4->chunks[0] = RT_GET_KEY_CHUNK(key, shift);
|
|
n4->children[0] = child.alloc;
|
|
|
|
node = child;
|
|
shift -= RT_SPAN;
|
|
}
|
|
Assert(shift == 0);
|
|
|
|
/* Reserve slot for the value. */
|
|
n4 = (RT_NODE_4 *) node.local;
|
|
n4->chunks[0] = RT_GET_KEY_CHUNK(key, 0);
|
|
n4->base.count = 1;
|
|
|
|
return &n4->children[0];
|
|
}
|
|
|
|
/*
|
|
* Workhorse for RT_SET
|
|
*
|
|
* "parent_slot" is the address of the child pointer we just followed,
|
|
* in the parent's array of children, needed if inserting into the
|
|
* current node causes it to grow.
|
|
*/
|
|
static RT_PTR_ALLOC *
|
|
RT_GET_SLOT_RECURSIVE(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, uint64 key, int shift, bool *found)
|
|
{
|
|
RT_PTR_ALLOC *slot;
|
|
RT_CHILD_PTR node;
|
|
uint8 chunk = RT_GET_KEY_CHUNK(key, shift);
|
|
|
|
node.alloc = *parent_slot;
|
|
RT_PTR_SET_LOCAL(tree, &node);
|
|
slot = RT_NODE_SEARCH(node.local, chunk);
|
|
|
|
if (slot == NULL)
|
|
{
|
|
*found = false;
|
|
|
|
/* reserve slot for the caller to populate */
|
|
|
|
slot = RT_NODE_INSERT(tree, parent_slot, node, chunk);
|
|
|
|
if (shift == 0)
|
|
return slot;
|
|
else
|
|
return RT_EXTEND_DOWN(tree, slot, key, shift);
|
|
}
|
|
else
|
|
{
|
|
if (shift == 0)
|
|
{
|
|
*found = true;
|
|
return slot;
|
|
}
|
|
else
|
|
return RT_GET_SLOT_RECURSIVE(tree, slot, key, shift - RT_SPAN, found);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Set key to value that value_p points to. If the entry already exists, we
|
|
* update its value and return true. Returns false if entry doesn't yet exist.
|
|
*
|
|
* Taking a lock in exclusive mode is the caller's responsibility.
|
|
*/
|
|
RT_SCOPE bool
|
|
RT_SET(RT_RADIX_TREE * tree, uint64 key, RT_VALUE_TYPE * value_p)
|
|
{
|
|
bool found;
|
|
RT_PTR_ALLOC *slot;
|
|
size_t value_sz = RT_GET_VALUE_SIZE(value_p);
|
|
|
|
#ifdef RT_SHMEM
|
|
Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
|
|
#endif
|
|
|
|
Assert(RT_PTR_ALLOC_IS_VALID(tree->ctl->root));
|
|
|
|
/* Extend the tree if necessary */
|
|
if (unlikely(key > tree->ctl->max_val))
|
|
{
|
|
if (tree->ctl->num_keys == 0)
|
|
{
|
|
RT_CHILD_PTR node;
|
|
RT_NODE_4 *n4;
|
|
int start_shift = RT_KEY_GET_SHIFT(key);
|
|
|
|
/*
|
|
* With an empty root node, we don't extend the tree upwards,
|
|
* since that would result in orphan empty nodes. Instead we open
|
|
* code inserting into the root node and extend downward from
|
|
* there.
|
|
*/
|
|
node.alloc = tree->ctl->root;
|
|
RT_PTR_SET_LOCAL(tree, &node);
|
|
n4 = (RT_NODE_4 *) node.local;
|
|
n4->base.count = 1;
|
|
n4->chunks[0] = RT_GET_KEY_CHUNK(key, start_shift);
|
|
|
|
slot = RT_EXTEND_DOWN(tree, &n4->children[0], key, start_shift);
|
|
found = false;
|
|
tree->ctl->start_shift = start_shift;
|
|
tree->ctl->max_val = RT_SHIFT_GET_MAX_VAL(start_shift);
|
|
goto have_slot;
|
|
}
|
|
else
|
|
RT_EXTEND_UP(tree, key);
|
|
}
|
|
|
|
slot = RT_GET_SLOT_RECURSIVE(tree, &tree->ctl->root,
|
|
key, tree->ctl->start_shift, &found);
|
|
|
|
have_slot:
|
|
Assert(slot != NULL);
|
|
|
|
if (RT_VALUE_IS_EMBEDDABLE(value_p))
|
|
{
|
|
/* free the existing leaf */
|
|
if (found && !RT_CHILDPTR_IS_VALUE(*slot))
|
|
RT_FREE_LEAF(tree, *slot);
|
|
|
|
/* store value directly in child pointer slot */
|
|
memcpy(slot, value_p, value_sz);
|
|
|
|
#ifdef RT_RUNTIME_EMBEDDABLE_VALUE
|
|
/* tag child pointer */
|
|
#ifdef RT_SHMEM
|
|
*slot |= 1;
|
|
#else
|
|
*((uintptr_t *) slot) |= 1;
|
|
#endif
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
RT_CHILD_PTR leaf;
|
|
|
|
if (found && !RT_CHILDPTR_IS_VALUE(*slot))
|
|
{
|
|
Assert(RT_PTR_ALLOC_IS_VALID(*slot));
|
|
leaf.alloc = *slot;
|
|
RT_PTR_SET_LOCAL(tree, &leaf);
|
|
|
|
if (RT_GET_VALUE_SIZE((RT_VALUE_TYPE *) leaf.local) != value_sz)
|
|
{
|
|
/*
|
|
* different sizes, so first free the existing leaf before
|
|
* allocating a new one
|
|
*/
|
|
RT_FREE_LEAF(tree, *slot);
|
|
leaf = RT_ALLOC_LEAF(tree, value_sz);
|
|
*slot = leaf.alloc;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* allocate new leaf and store it in the child array */
|
|
leaf = RT_ALLOC_LEAF(tree, value_sz);
|
|
*slot = leaf.alloc;
|
|
}
|
|
|
|
memcpy(leaf.local, value_p, value_sz);
|
|
}
|
|
|
|
/* Update the statistics */
|
|
if (!found)
|
|
tree->ctl->num_keys++;
|
|
|
|
return found;
|
|
}
|
|
|
|
/***************** SETUP / TEARDOWN *****************/
|
|
|
|
/*
|
|
* Create the radix tree in the given memory context and return it.
|
|
*
|
|
* All local memory required for a radix tree is allocated in the given
|
|
* memory context and its children. Note that RT_FREE() will delete all
|
|
* allocated space within the given memory context, so the dsa_area should
|
|
* be created in a different context.
|
|
*/
|
|
RT_SCOPE RT_RADIX_TREE *
|
|
#ifdef RT_SHMEM
|
|
RT_CREATE(MemoryContext ctx, dsa_area *dsa, int tranche_id)
|
|
#else
|
|
RT_CREATE(MemoryContext ctx)
|
|
#endif
|
|
{
|
|
RT_RADIX_TREE *tree;
|
|
MemoryContext old_ctx;
|
|
RT_CHILD_PTR rootnode;
|
|
#ifdef RT_SHMEM
|
|
dsa_pointer dp;
|
|
#endif
|
|
|
|
old_ctx = MemoryContextSwitchTo(ctx);
|
|
|
|
tree = (RT_RADIX_TREE *) palloc0(sizeof(RT_RADIX_TREE));
|
|
tree->context = ctx;
|
|
|
|
/*
|
|
* Separate context for iteration in case the tree context doesn't support
|
|
* pfree
|
|
*/
|
|
tree->iter_context = AllocSetContextCreate(ctx,
|
|
RT_STR(RT_PREFIX) "_radix_tree iter context",
|
|
ALLOCSET_SMALL_SIZES);
|
|
|
|
#ifdef RT_SHMEM
|
|
tree->dsa = dsa;
|
|
dp = dsa_allocate0(dsa, sizeof(RT_RADIX_TREE_CONTROL));
|
|
tree->ctl = (RT_RADIX_TREE_CONTROL *) dsa_get_address(dsa, dp);
|
|
tree->ctl->handle = dp;
|
|
tree->ctl->magic = RT_RADIX_TREE_MAGIC;
|
|
LWLockInitialize(&tree->ctl->lock, tranche_id);
|
|
#else
|
|
tree->ctl = (RT_RADIX_TREE_CONTROL *) palloc0(sizeof(RT_RADIX_TREE_CONTROL));
|
|
|
|
/* Create a slab context for each size class */
|
|
for (int i = 0; i < RT_NUM_SIZE_CLASSES; i++)
|
|
{
|
|
RT_SIZE_CLASS_ELEM size_class = RT_SIZE_CLASS_INFO[i];
|
|
size_t inner_blocksize = RT_SLAB_BLOCK_SIZE(size_class.allocsize);
|
|
|
|
tree->node_slabs[i] = SlabContextCreate(ctx,
|
|
size_class.name,
|
|
inner_blocksize,
|
|
size_class.allocsize);
|
|
}
|
|
|
|
/* By default we use the passed context for leaves. */
|
|
tree->leaf_context = tree->context;
|
|
|
|
#ifndef RT_VARLEN_VALUE_SIZE
|
|
|
|
/*
|
|
* For leaves storing fixed-length values, we use a slab context to avoid
|
|
* the possibility of space wastage by power-of-2 rounding up.
|
|
*/
|
|
if (sizeof(RT_VALUE_TYPE) > sizeof(RT_PTR_ALLOC))
|
|
tree->leaf_context = SlabContextCreate(ctx,
|
|
RT_STR(RT_PREFIX) "_radix_tree leaf context",
|
|
RT_SLAB_BLOCK_SIZE(sizeof(RT_VALUE_TYPE)),
|
|
sizeof(RT_VALUE_TYPE));
|
|
#endif /* !RT_VARLEN_VALUE_SIZE */
|
|
#endif /* RT_SHMEM */
|
|
|
|
/* add root node now so that RT_SET can assume it exists */
|
|
rootnode = RT_ALLOC_NODE(tree, RT_NODE_KIND_4, RT_CLASS_4);
|
|
tree->ctl->root = rootnode.alloc;
|
|
tree->ctl->start_shift = 0;
|
|
tree->ctl->max_val = RT_SHIFT_GET_MAX_VAL(0);
|
|
|
|
MemoryContextSwitchTo(old_ctx);
|
|
|
|
return tree;
|
|
}
|
|
|
|
#ifdef RT_SHMEM
|
|
RT_SCOPE RT_RADIX_TREE *
|
|
RT_ATTACH(dsa_area *dsa, RT_HANDLE handle)
|
|
{
|
|
RT_RADIX_TREE *tree;
|
|
dsa_pointer control;
|
|
|
|
tree = (RT_RADIX_TREE *) palloc0(sizeof(RT_RADIX_TREE));
|
|
|
|
/* Find the control object in shared memory */
|
|
control = handle;
|
|
|
|
tree->dsa = dsa;
|
|
tree->ctl = (RT_RADIX_TREE_CONTROL *) dsa_get_address(dsa, control);
|
|
Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
|
|
|
|
return tree;
|
|
}
|
|
|
|
RT_SCOPE void
|
|
RT_DETACH(RT_RADIX_TREE * tree)
|
|
{
|
|
Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
|
|
pfree(tree);
|
|
}
|
|
|
|
RT_SCOPE RT_HANDLE
|
|
RT_GET_HANDLE(RT_RADIX_TREE * tree)
|
|
{
|
|
Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
|
|
return tree->ctl->handle;
|
|
}
|
|
|
|
RT_SCOPE void
|
|
RT_LOCK_EXCLUSIVE(RT_RADIX_TREE * tree)
|
|
{
|
|
Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
|
|
LWLockAcquire(&tree->ctl->lock, LW_EXCLUSIVE);
|
|
}
|
|
|
|
RT_SCOPE void
|
|
RT_LOCK_SHARE(RT_RADIX_TREE * tree)
|
|
{
|
|
Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
|
|
LWLockAcquire(&tree->ctl->lock, LW_SHARED);
|
|
}
|
|
|
|
RT_SCOPE void
|
|
RT_UNLOCK(RT_RADIX_TREE * tree)
|
|
{
|
|
Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
|
|
LWLockRelease(&tree->ctl->lock);
|
|
}
|
|
|
|
/*
|
|
* Recursively free all nodes allocated in the DSA area.
|
|
*/
|
|
static void
|
|
RT_FREE_RECURSE(RT_RADIX_TREE * tree, RT_PTR_ALLOC ptr, int shift)
|
|
{
|
|
RT_CHILD_PTR node;
|
|
|
|
check_stack_depth();
|
|
|
|
node.alloc = ptr;
|
|
RT_PTR_SET_LOCAL(tree, &node);
|
|
|
|
switch (node.local->kind)
|
|
{
|
|
case RT_NODE_KIND_4:
|
|
{
|
|
RT_NODE_4 *n4 = (RT_NODE_4 *) node.local;
|
|
|
|
for (int i = 0; i < n4->base.count; i++)
|
|
{
|
|
RT_PTR_ALLOC child = n4->children[i];
|
|
|
|
if (shift > 0)
|
|
RT_FREE_RECURSE(tree, child, shift - RT_SPAN);
|
|
else if (!RT_CHILDPTR_IS_VALUE(child))
|
|
dsa_free(tree->dsa, child);
|
|
}
|
|
|
|
break;
|
|
}
|
|
case RT_NODE_KIND_16:
|
|
{
|
|
RT_NODE_16 *n16 = (RT_NODE_16 *) node.local;
|
|
|
|
for (int i = 0; i < n16->base.count; i++)
|
|
{
|
|
RT_PTR_ALLOC child = n16->children[i];
|
|
|
|
if (shift > 0)
|
|
RT_FREE_RECURSE(tree, child, shift - RT_SPAN);
|
|
else if (!RT_CHILDPTR_IS_VALUE(child))
|
|
dsa_free(tree->dsa, child);
|
|
}
|
|
|
|
break;
|
|
}
|
|
case RT_NODE_KIND_48:
|
|
{
|
|
RT_NODE_48 *n48 = (RT_NODE_48 *) node.local;
|
|
|
|
for (int i = 0; i < RT_NODE_MAX_SLOTS; i++)
|
|
{
|
|
RT_PTR_ALLOC child;
|
|
|
|
if (!RT_NODE_48_IS_CHUNK_USED(n48, i))
|
|
continue;
|
|
|
|
child = *RT_NODE_48_GET_CHILD(n48, i);
|
|
|
|
if (shift > 0)
|
|
RT_FREE_RECURSE(tree, child, shift - RT_SPAN);
|
|
else if (!RT_CHILDPTR_IS_VALUE(child))
|
|
dsa_free(tree->dsa, child);
|
|
}
|
|
|
|
break;
|
|
}
|
|
case RT_NODE_KIND_256:
|
|
{
|
|
RT_NODE_256 *n256 = (RT_NODE_256 *) node.local;
|
|
|
|
for (int i = 0; i < RT_NODE_MAX_SLOTS; i++)
|
|
{
|
|
RT_PTR_ALLOC child;
|
|
|
|
if (!RT_NODE_256_IS_CHUNK_USED(n256, i))
|
|
continue;
|
|
|
|
child = *RT_NODE_256_GET_CHILD(n256, i);
|
|
|
|
if (shift > 0)
|
|
RT_FREE_RECURSE(tree, child, shift - RT_SPAN);
|
|
else if (!RT_CHILDPTR_IS_VALUE(child))
|
|
dsa_free(tree->dsa, child);
|
|
}
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* Free the inner node */
|
|
dsa_free(tree->dsa, ptr);
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* Free the radix tree, including all nodes and leaves.
|
|
*/
|
|
RT_SCOPE void
|
|
RT_FREE(RT_RADIX_TREE * tree)
|
|
{
|
|
#ifdef RT_SHMEM
|
|
Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
|
|
|
|
/* Free all memory used for radix tree nodes */
|
|
Assert(RT_PTR_ALLOC_IS_VALID(tree->ctl->root));
|
|
RT_FREE_RECURSE(tree, tree->ctl->root, tree->ctl->start_shift);
|
|
|
|
/*
|
|
* Vandalize the control block to help catch programming error where other
|
|
* backends access the memory formerly occupied by this radix tree.
|
|
*/
|
|
tree->ctl->magic = 0;
|
|
dsa_free(tree->dsa, tree->ctl->handle);
|
|
#endif
|
|
|
|
/*
|
|
* Free all space allocated within the tree's context and delete all child
|
|
* contexts such as those used for nodes.
|
|
*/
|
|
MemoryContextReset(tree->context);
|
|
}
|
|
|
|
/***************** ITERATION *****************/
|
|
|
|
/*
|
|
* Create and return the iterator for the given radix tree.
|
|
*
|
|
* Taking a lock in shared mode during the iteration is the caller's
|
|
* responsibility.
|
|
*/
|
|
RT_SCOPE RT_ITER *
|
|
RT_BEGIN_ITERATE(RT_RADIX_TREE * tree)
|
|
{
|
|
RT_ITER *iter;
|
|
RT_CHILD_PTR root;
|
|
|
|
iter = (RT_ITER *) MemoryContextAllocZero(tree->iter_context,
|
|
sizeof(RT_ITER));
|
|
iter->tree = tree;
|
|
|
|
Assert(RT_PTR_ALLOC_IS_VALID(tree->ctl->root));
|
|
root.alloc = iter->tree->ctl->root;
|
|
RT_PTR_SET_LOCAL(tree, &root);
|
|
|
|
iter->top_level = iter->tree->ctl->start_shift / RT_SPAN;
|
|
|
|
/* Set the root to start */
|
|
iter->cur_level = iter->top_level;
|
|
iter->node_iters[iter->cur_level].node = root;
|
|
iter->node_iters[iter->cur_level].idx = 0;
|
|
|
|
return iter;
|
|
}
|
|
|
|
/*
|
|
* Scan the inner node and return the next child pointer if one exists, otherwise
|
|
* return NULL.
|
|
*/
|
|
static inline RT_PTR_ALLOC *
|
|
RT_NODE_ITERATE_NEXT(RT_ITER * iter, int level)
|
|
{
|
|
uint8 key_chunk = 0;
|
|
RT_NODE_ITER *node_iter;
|
|
RT_CHILD_PTR node;
|
|
RT_PTR_ALLOC *slot = NULL;
|
|
|
|
#ifdef RT_SHMEM
|
|
Assert(iter->tree->ctl->magic == RT_RADIX_TREE_MAGIC);
|
|
#endif
|
|
|
|
node_iter = &(iter->node_iters[level]);
|
|
node = node_iter->node;
|
|
|
|
Assert(node.local != NULL);
|
|
|
|
switch ((node.local)->kind)
|
|
{
|
|
case RT_NODE_KIND_4:
|
|
{
|
|
RT_NODE_4 *n4 = (RT_NODE_4 *) (node.local);
|
|
|
|
if (node_iter->idx >= n4->base.count)
|
|
return NULL;
|
|
|
|
slot = &n4->children[node_iter->idx];
|
|
key_chunk = n4->chunks[node_iter->idx];
|
|
node_iter->idx++;
|
|
break;
|
|
}
|
|
case RT_NODE_KIND_16:
|
|
{
|
|
RT_NODE_16 *n16 = (RT_NODE_16 *) (node.local);
|
|
|
|
if (node_iter->idx >= n16->base.count)
|
|
return NULL;
|
|
|
|
slot = &n16->children[node_iter->idx];
|
|
key_chunk = n16->chunks[node_iter->idx];
|
|
node_iter->idx++;
|
|
break;
|
|
}
|
|
case RT_NODE_KIND_48:
|
|
{
|
|
RT_NODE_48 *n48 = (RT_NODE_48 *) (node.local);
|
|
int chunk;
|
|
|
|
for (chunk = node_iter->idx; chunk < RT_NODE_MAX_SLOTS; chunk++)
|
|
{
|
|
if (RT_NODE_48_IS_CHUNK_USED(n48, chunk))
|
|
break;
|
|
}
|
|
|
|
if (chunk >= RT_NODE_MAX_SLOTS)
|
|
return NULL;
|
|
|
|
slot = RT_NODE_48_GET_CHILD(n48, chunk);
|
|
|
|
key_chunk = chunk;
|
|
node_iter->idx = chunk + 1;
|
|
break;
|
|
}
|
|
case RT_NODE_KIND_256:
|
|
{
|
|
RT_NODE_256 *n256 = (RT_NODE_256 *) (node.local);
|
|
int chunk;
|
|
|
|
for (chunk = node_iter->idx; chunk < RT_NODE_MAX_SLOTS; chunk++)
|
|
{
|
|
if (RT_NODE_256_IS_CHUNK_USED(n256, chunk))
|
|
break;
|
|
}
|
|
|
|
if (chunk >= RT_NODE_MAX_SLOTS)
|
|
return NULL;
|
|
|
|
slot = RT_NODE_256_GET_CHILD(n256, chunk);
|
|
|
|
key_chunk = chunk;
|
|
node_iter->idx = chunk + 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* Update the key */
|
|
iter->key &= ~(((uint64) RT_CHUNK_MASK) << (level * RT_SPAN));
|
|
iter->key |= (((uint64) key_chunk) << (level * RT_SPAN));
|
|
|
|
return slot;
|
|
}
|
|
|
|
/*
|
|
* Return pointer to value and set key_p as long as there is a key. Otherwise
|
|
* return NULL.
|
|
*/
|
|
RT_SCOPE RT_VALUE_TYPE *
|
|
RT_ITERATE_NEXT(RT_ITER * iter, uint64 *key_p)
|
|
{
|
|
RT_PTR_ALLOC *slot = NULL;
|
|
|
|
while (iter->cur_level <= iter->top_level)
|
|
{
|
|
RT_CHILD_PTR node;
|
|
|
|
slot = RT_NODE_ITERATE_NEXT(iter, iter->cur_level);
|
|
|
|
if (iter->cur_level == 0 && slot != NULL)
|
|
{
|
|
/* Found a value at the leaf node */
|
|
*key_p = iter->key;
|
|
node.alloc = *slot;
|
|
|
|
if (RT_CHILDPTR_IS_VALUE(*slot))
|
|
return (RT_VALUE_TYPE *) slot;
|
|
else
|
|
{
|
|
RT_PTR_SET_LOCAL(iter->tree, &node);
|
|
return (RT_VALUE_TYPE *) node.local;
|
|
}
|
|
}
|
|
|
|
if (slot != NULL)
|
|
{
|
|
/* Found the child slot, move down the tree */
|
|
node.alloc = *slot;
|
|
RT_PTR_SET_LOCAL(iter->tree, &node);
|
|
|
|
iter->cur_level--;
|
|
iter->node_iters[iter->cur_level].node = node;
|
|
iter->node_iters[iter->cur_level].idx = 0;
|
|
}
|
|
else
|
|
{
|
|
/* Not found the child slot, move up the tree */
|
|
iter->cur_level++;
|
|
}
|
|
}
|
|
|
|
/* We've visited all nodes, so the iteration finished */
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* Terminate the iteration. The caller is responsible for releasing any locks.
|
|
*/
|
|
RT_SCOPE void
|
|
RT_END_ITERATE(RT_ITER * iter)
|
|
{
|
|
pfree(iter);
|
|
}
|
|
|
|
/***************** DELETION *****************/
|
|
|
|
#ifdef RT_USE_DELETE
|
|
|
|
/* Delete the element at "deletepos" */
|
|
static inline void
|
|
RT_SHIFT_ARRAYS_AND_DELETE(uint8 *chunks, RT_PTR_ALLOC * children, int count, int deletepos)
|
|
{
|
|
/*
|
|
* This is basically a memmove, but written in a simple loop for speed on
|
|
* small inputs.
|
|
*/
|
|
for (int i = deletepos; i < count - 1; i++)
|
|
{
|
|
/* workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101481 */
|
|
#ifdef __GNUC__
|
|
__asm__("");
|
|
#endif
|
|
chunks[i] = chunks[i + 1];
|
|
children[i] = children[i + 1];
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Copy both chunk and slot arrays into the right
|
|
* place. The element at "deletepos" is deleted by skipping it.
|
|
*/
|
|
static inline void
|
|
RT_COPY_ARRAYS_AND_DELETE(uint8 *dst_chunks, RT_PTR_ALLOC * dst_children,
|
|
uint8 *src_chunks, RT_PTR_ALLOC * src_children,
|
|
int count, int deletepos)
|
|
{
|
|
for (int i = 0; i < count - 1; i++)
|
|
{
|
|
/*
|
|
* use a branch-free computation to skip the index of the deleted
|
|
* element
|
|
*/
|
|
int sourceidx = i + (i >= deletepos);
|
|
int destidx = i;
|
|
|
|
dst_chunks[destidx] = src_chunks[sourceidx];
|
|
dst_children[destidx] = src_children[sourceidx];
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Note: While all node-growing functions are called to perform an insertion
|
|
* when no more space is available, shrinking is not a hard-and-fast requirement.
|
|
* When shrinking nodes, we generally wait until the count is about 3/4 of
|
|
* the next lower node's fanout. This prevents ping-ponging between different
|
|
* node sizes.
|
|
*
|
|
* Some shrinking functions delete first and then shrink, either because we
|
|
* must or because it's fast and simple that way. Sometimes it's faster to
|
|
* delete while shrinking.
|
|
*/
|
|
|
|
/*
|
|
* Move contents of a node256 to a node48. Any deletion should have happened
|
|
* in the caller.
|
|
*/
|
|
static void pg_noinline
|
|
RT_SHRINK_NODE_256(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node, uint8 chunk)
|
|
{
|
|
RT_NODE_256 *n256 = (RT_NODE_256 *) node.local;
|
|
RT_CHILD_PTR newnode;
|
|
RT_NODE_48 *new48;
|
|
int slot_idx = 0;
|
|
|
|
/* initialize new node */
|
|
newnode = RT_ALLOC_NODE(tree, RT_NODE_KIND_48, RT_CLASS_48);
|
|
new48 = (RT_NODE_48 *) newnode.local;
|
|
|
|
/* copy over the entries */
|
|
RT_COPY_COMMON(newnode, node);
|
|
for (int i = 0; i < RT_NODE_MAX_SLOTS; i++)
|
|
{
|
|
if (RT_NODE_256_IS_CHUNK_USED(n256, i))
|
|
{
|
|
new48->slot_idxs[i] = slot_idx;
|
|
new48->children[slot_idx] = n256->children[i];
|
|
slot_idx++;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Since we just copied a dense array, we can fill "isset" using a single
|
|
* store, provided the length of that array is at most the number of bits
|
|
* in a bitmapword.
|
|
*/
|
|
Assert(n256->base.count <= BITS_PER_BITMAPWORD);
|
|
new48->isset[0] = (bitmapword) (((uint64) 1 << n256->base.count) - 1);
|
|
|
|
/* free old node and update reference in parent */
|
|
*parent_slot = newnode.alloc;
|
|
RT_FREE_NODE(tree, node);
|
|
}
|
|
|
|
static inline void
|
|
RT_REMOVE_CHILD_256(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node, uint8 chunk)
|
|
{
|
|
int shrink_threshold;
|
|
RT_NODE_256 *n256 = (RT_NODE_256 *) node.local;
|
|
int idx = RT_BM_IDX(chunk);
|
|
int bitnum = RT_BM_BIT(chunk);
|
|
|
|
/* Mark the slot free for "chunk" */
|
|
n256->isset[idx] &= ~((bitmapword) 1 << bitnum);
|
|
|
|
n256->base.count--;
|
|
|
|
/*
|
|
* A full node256 will have a count of zero because of overflow, so we
|
|
* delete first before checking the shrink threshold.
|
|
*/
|
|
Assert(n256->base.count > 0);
|
|
|
|
/* This simplifies RT_SHRINK_NODE_256() */
|
|
shrink_threshold = BITS_PER_BITMAPWORD;
|
|
shrink_threshold = Min(RT_FANOUT_48 / 4 * 3, shrink_threshold);
|
|
|
|
if (n256->base.count <= shrink_threshold)
|
|
RT_SHRINK_NODE_256(tree, parent_slot, node, chunk);
|
|
}
|
|
|
|
/*
|
|
* Move contents of a node48 to a node16. Any deletion should have happened
|
|
* in the caller.
|
|
*/
|
|
static void pg_noinline
|
|
RT_SHRINK_NODE_48(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node, uint8 chunk)
|
|
{
|
|
RT_NODE_48 *n48 = (RT_NODE_48 *) (node.local);
|
|
RT_CHILD_PTR newnode;
|
|
RT_NODE_16 *new16;
|
|
int destidx = 0;
|
|
|
|
/*
|
|
* Initialize new node. For now we skip the larger node16 size class for
|
|
* simplicity.
|
|
*/
|
|
newnode = RT_ALLOC_NODE(tree, RT_NODE_KIND_16, RT_CLASS_16_LO);
|
|
new16 = (RT_NODE_16 *) newnode.local;
|
|
|
|
/* copy over all existing entries */
|
|
RT_COPY_COMMON(newnode, node);
|
|
for (int chunk = 0; chunk < RT_NODE_MAX_SLOTS; chunk++)
|
|
{
|
|
if (n48->slot_idxs[chunk] != RT_INVALID_SLOT_IDX)
|
|
{
|
|
new16->chunks[destidx] = chunk;
|
|
new16->children[destidx] = n48->children[n48->slot_idxs[chunk]];
|
|
destidx++;
|
|
}
|
|
}
|
|
|
|
Assert(destidx < new16->base.fanout);
|
|
|
|
RT_VERIFY_NODE((RT_NODE *) new16);
|
|
|
|
/* free old node and update reference in parent */
|
|
*parent_slot = newnode.alloc;
|
|
RT_FREE_NODE(tree, node);
|
|
}
|
|
|
|
static inline void
|
|
RT_REMOVE_CHILD_48(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node, uint8 chunk)
|
|
{
|
|
RT_NODE_48 *n48 = (RT_NODE_48 *) node.local;
|
|
int deletepos = n48->slot_idxs[chunk];
|
|
|
|
/* For now we skip the larger node16 size class for simplicity */
|
|
int shrink_threshold = RT_FANOUT_16_LO / 4 * 3;
|
|
int idx;
|
|
int bitnum;
|
|
|
|
Assert(deletepos != RT_INVALID_SLOT_IDX);
|
|
|
|
idx = RT_BM_IDX(deletepos);
|
|
bitnum = RT_BM_BIT(deletepos);
|
|
n48->isset[idx] &= ~((bitmapword) 1 << bitnum);
|
|
n48->slot_idxs[chunk] = RT_INVALID_SLOT_IDX;
|
|
|
|
n48->base.count--;
|
|
|
|
/*
|
|
* To keep shrinking simple, do it after deleting, which is fast for
|
|
* node48 anyway.
|
|
*/
|
|
if (n48->base.count <= shrink_threshold)
|
|
RT_SHRINK_NODE_48(tree, parent_slot, node, chunk);
|
|
}
|
|
|
|
/*
|
|
* Move contents of a node16 to a node4, and delete the one at "deletepos".
|
|
* By deleting as we move, we can avoid memmove operations in the new
|
|
* node.
|
|
*/
|
|
static void pg_noinline
|
|
RT_SHRINK_NODE_16(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node, uint8 deletepos)
|
|
{
|
|
RT_NODE_16 *n16 = (RT_NODE_16 *) (node.local);
|
|
RT_CHILD_PTR newnode;
|
|
RT_NODE_4 *new4;
|
|
|
|
/* initialize new node */
|
|
newnode = RT_ALLOC_NODE(tree, RT_NODE_KIND_4, RT_CLASS_4);
|
|
new4 = (RT_NODE_4 *) newnode.local;
|
|
|
|
/* copy over existing entries, except for the one at "deletepos" */
|
|
RT_COPY_COMMON(newnode, node);
|
|
RT_COPY_ARRAYS_AND_DELETE(new4->chunks, new4->children,
|
|
n16->chunks, n16->children,
|
|
n16->base.count, deletepos);
|
|
|
|
new4->base.count--;
|
|
RT_VERIFY_NODE((RT_NODE *) new4);
|
|
|
|
/* free old node and update reference in parent */
|
|
*parent_slot = newnode.alloc;
|
|
RT_FREE_NODE(tree, node);
|
|
}
|
|
|
|
static inline void
|
|
RT_REMOVE_CHILD_16(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node, uint8 chunk, RT_PTR_ALLOC * slot)
|
|
{
|
|
RT_NODE_16 *n16 = (RT_NODE_16 *) node.local;
|
|
int deletepos = slot - n16->children;
|
|
|
|
/*
|
|
* When shrinking to node4, 4 is hard-coded. After shrinking, the new node
|
|
* will end up with 3 elements and 3 is the largest count where linear
|
|
* search is faster than SIMD, at least on x86-64.
|
|
*/
|
|
if (n16->base.count <= 4)
|
|
{
|
|
RT_SHRINK_NODE_16(tree, parent_slot, node, deletepos);
|
|
return;
|
|
}
|
|
|
|
Assert(deletepos >= 0);
|
|
Assert(n16->chunks[deletepos] == chunk);
|
|
|
|
RT_SHIFT_ARRAYS_AND_DELETE(n16->chunks, n16->children,
|
|
n16->base.count, deletepos);
|
|
n16->base.count--;
|
|
}
|
|
|
|
static inline void
|
|
RT_REMOVE_CHILD_4(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node, uint8 chunk, RT_PTR_ALLOC * slot)
|
|
{
|
|
RT_NODE_4 *n4 = (RT_NODE_4 *) node.local;
|
|
|
|
if (n4->base.count == 1)
|
|
{
|
|
Assert(n4->chunks[0] == chunk);
|
|
|
|
/*
|
|
* If we're deleting the last entry from the root child node don't
|
|
* free it, but mark both the tree and the root child node empty. That
|
|
* way, RT_SET can assume it exists.
|
|
*/
|
|
if (parent_slot == &tree->ctl->root)
|
|
{
|
|
n4->base.count = 0;
|
|
tree->ctl->start_shift = 0;
|
|
tree->ctl->max_val = RT_SHIFT_GET_MAX_VAL(0);
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* Deleting last entry, so just free the entire node.
|
|
* RT_DELETE_RECURSIVE has already freed the value and lower-level
|
|
* children.
|
|
*/
|
|
RT_FREE_NODE(tree, node);
|
|
|
|
/*
|
|
* Also null out the parent's slot -- this tells the next higher
|
|
* level to delete its child pointer
|
|
*/
|
|
*parent_slot = RT_INVALID_PTR_ALLOC;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
int deletepos = slot - n4->children;
|
|
|
|
Assert(deletepos >= 0);
|
|
Assert(n4->chunks[deletepos] == chunk);
|
|
|
|
RT_SHIFT_ARRAYS_AND_DELETE(n4->chunks, n4->children,
|
|
n4->base.count, deletepos);
|
|
|
|
n4->base.count--;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Delete the child pointer corresponding to "key" in the given node.
|
|
*/
|
|
static inline void
|
|
RT_NODE_DELETE(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node, uint8 chunk, RT_PTR_ALLOC * slot)
|
|
{
|
|
switch ((node.local)->kind)
|
|
{
|
|
case RT_NODE_KIND_4:
|
|
{
|
|
RT_REMOVE_CHILD_4(tree, parent_slot, node, chunk, slot);
|
|
return;
|
|
}
|
|
case RT_NODE_KIND_16:
|
|
{
|
|
RT_REMOVE_CHILD_16(tree, parent_slot, node, chunk, slot);
|
|
return;
|
|
}
|
|
case RT_NODE_KIND_48:
|
|
{
|
|
RT_REMOVE_CHILD_48(tree, parent_slot, node, chunk);
|
|
return;
|
|
}
|
|
case RT_NODE_KIND_256:
|
|
{
|
|
RT_REMOVE_CHILD_256(tree, parent_slot, node, chunk);
|
|
return;
|
|
}
|
|
default:
|
|
pg_unreachable();
|
|
}
|
|
}
|
|
|
|
/* workhorse for RT_DELETE */
|
|
static bool
|
|
RT_DELETE_RECURSIVE(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, uint64 key, int shift)
|
|
{
|
|
RT_PTR_ALLOC *slot;
|
|
RT_CHILD_PTR node;
|
|
uint8 chunk = RT_GET_KEY_CHUNK(key, shift);
|
|
|
|
node.alloc = *parent_slot;
|
|
RT_PTR_SET_LOCAL(tree, &node);
|
|
slot = RT_NODE_SEARCH(node.local, chunk);
|
|
|
|
if (slot == NULL)
|
|
return false;
|
|
|
|
if (shift == 0)
|
|
{
|
|
if (!RT_CHILDPTR_IS_VALUE(*slot))
|
|
RT_FREE_LEAF(tree, *slot);
|
|
|
|
RT_NODE_DELETE(tree, parent_slot, node, chunk, slot);
|
|
return true;
|
|
}
|
|
else
|
|
{
|
|
bool deleted;
|
|
|
|
deleted = RT_DELETE_RECURSIVE(tree, slot, key, shift - RT_SPAN);
|
|
|
|
/* Child node was freed, so delete its slot now */
|
|
if (*slot == RT_INVALID_PTR_ALLOC)
|
|
{
|
|
Assert(deleted);
|
|
RT_NODE_DELETE(tree, parent_slot, node, chunk, slot);
|
|
}
|
|
|
|
return deleted;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Delete the given key from the radix tree. If the key is found delete it
|
|
* and return true, otherwise do nothing and return false.
|
|
*
|
|
* Taking a lock in exclusive mode is the caller's responsibility.
|
|
*/
|
|
RT_SCOPE bool
|
|
RT_DELETE(RT_RADIX_TREE * tree, uint64 key)
|
|
{
|
|
bool deleted;
|
|
|
|
#ifdef RT_SHMEM
|
|
Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
|
|
#endif
|
|
|
|
if (key > tree->ctl->max_val)
|
|
return false;
|
|
|
|
Assert(RT_PTR_ALLOC_IS_VALID(tree->ctl->root));
|
|
deleted = RT_DELETE_RECURSIVE(tree, &tree->ctl->root,
|
|
key, tree->ctl->start_shift);
|
|
|
|
/* Found the key to delete. Update the statistics */
|
|
if (deleted)
|
|
{
|
|
tree->ctl->num_keys--;
|
|
Assert(tree->ctl->num_keys >= 0);
|
|
}
|
|
|
|
return deleted;
|
|
}
|
|
|
|
#endif /* RT_USE_DELETE */
|
|
|
|
/***************** UTILITY FUNCTIONS *****************/
|
|
|
|
/*
|
|
* Return the statistics of the amount of memory used by the radix tree.
|
|
*
|
|
* Since dsa_get_total_size() does appropriate locking, the caller doesn't
|
|
* need to take a lock.
|
|
*/
|
|
RT_SCOPE uint64
|
|
RT_MEMORY_USAGE(RT_RADIX_TREE * tree)
|
|
{
|
|
size_t total = 0;
|
|
|
|
#ifdef RT_SHMEM
|
|
Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
|
|
total = dsa_get_total_size(tree->dsa);
|
|
#else
|
|
total = MemoryContextMemAllocated(tree->context, true);
|
|
#endif
|
|
|
|
return total;
|
|
}
|
|
|
|
/*
|
|
* Perform some sanity checks on the given node.
|
|
*/
|
|
static void
|
|
RT_VERIFY_NODE(RT_NODE * node)
|
|
{
|
|
#ifdef USE_ASSERT_CHECKING
|
|
|
|
switch (node->kind)
|
|
{
|
|
case RT_NODE_KIND_4:
|
|
{
|
|
RT_NODE_4 *n4 = (RT_NODE_4 *) node;
|
|
|
|
/* RT_DUMP_NODE(node); */
|
|
|
|
for (int i = 1; i < n4->base.count; i++)
|
|
Assert(n4->chunks[i - 1] < n4->chunks[i]);
|
|
|
|
break;
|
|
}
|
|
case RT_NODE_KIND_16:
|
|
{
|
|
RT_NODE_16 *n16 = (RT_NODE_16 *) node;
|
|
|
|
/* RT_DUMP_NODE(node); */
|
|
|
|
for (int i = 1; i < n16->base.count; i++)
|
|
Assert(n16->chunks[i - 1] < n16->chunks[i]);
|
|
|
|
break;
|
|
}
|
|
case RT_NODE_KIND_48:
|
|
{
|
|
RT_NODE_48 *n48 = (RT_NODE_48 *) node;
|
|
int cnt = 0;
|
|
|
|
/* RT_DUMP_NODE(node); */
|
|
|
|
for (int i = 0; i < RT_NODE_MAX_SLOTS; i++)
|
|
{
|
|
uint8 slot = n48->slot_idxs[i];
|
|
int idx = RT_BM_IDX(slot);
|
|
int bitnum = RT_BM_BIT(slot);
|
|
|
|
if (!RT_NODE_48_IS_CHUNK_USED(n48, i))
|
|
continue;
|
|
|
|
/* Check if the corresponding slot is used */
|
|
Assert(slot < node->fanout);
|
|
Assert((n48->isset[idx] & ((bitmapword) 1 << bitnum)) != 0);
|
|
|
|
cnt++;
|
|
}
|
|
|
|
Assert(n48->base.count == cnt);
|
|
|
|
break;
|
|
}
|
|
case RT_NODE_KIND_256:
|
|
{
|
|
RT_NODE_256 *n256 = (RT_NODE_256 *) node;
|
|
int cnt = 0;
|
|
|
|
/* RT_DUMP_NODE(node); */
|
|
|
|
for (int i = 0; i < RT_BM_IDX(RT_NODE_MAX_SLOTS); i++)
|
|
cnt += bmw_popcount(n256->isset[i]);
|
|
|
|
/*
|
|
* Check if the number of used chunk matches, accounting for
|
|
* overflow
|
|
*/
|
|
if (cnt == RT_FANOUT_256)
|
|
Assert(n256->base.count == 0);
|
|
else
|
|
Assert(n256->base.count == cnt);
|
|
|
|
break;
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/***************** DEBUG FUNCTIONS *****************/
|
|
|
|
#ifdef RT_DEBUG
|
|
|
|
/*
|
|
* Print out tree stats, some of which are only collected in debugging builds.
|
|
*/
|
|
RT_SCOPE void
|
|
RT_STATS(RT_RADIX_TREE * tree)
|
|
{
|
|
fprintf(stderr, "max_val = " UINT64_FORMAT "\n", tree->ctl->max_val);
|
|
fprintf(stderr, "num_keys = %lld\n", (long long) tree->ctl->num_keys);
|
|
|
|
#ifdef RT_SHMEM
|
|
fprintf(stderr, "handle = " DSA_POINTER_FORMAT "\n", tree->ctl->handle);
|
|
#endif
|
|
|
|
fprintf(stderr, "height = %d", tree->ctl->start_shift / RT_SPAN);
|
|
|
|
for (int i = 0; i < RT_NUM_SIZE_CLASSES; i++)
|
|
{
|
|
RT_SIZE_CLASS_ELEM size_class = RT_SIZE_CLASS_INFO[i];
|
|
|
|
fprintf(stderr, ", n%d = %lld", size_class.fanout, (long long) tree->ctl->num_nodes[i]);
|
|
}
|
|
|
|
fprintf(stderr, ", leaves = %lld", (long long) tree->ctl->num_leaves);
|
|
|
|
fprintf(stderr, "\n");
|
|
}
|
|
|
|
/*
|
|
* Print out debugging information about the given node.
|
|
*/
|
|
static void
|
|
pg_attribute_unused()
|
|
RT_DUMP_NODE(RT_NODE * node)
|
|
{
|
|
#ifdef RT_SHMEM
|
|
#define RT_CHILD_PTR_FORMAT DSA_POINTER_FORMAT
|
|
#else
|
|
#define RT_CHILD_PTR_FORMAT "%p"
|
|
#endif
|
|
|
|
fprintf(stderr, "kind %d, fanout %d, count %u\n",
|
|
(node->kind == RT_NODE_KIND_4) ? 4 :
|
|
(node->kind == RT_NODE_KIND_16) ? 16 :
|
|
(node->kind == RT_NODE_KIND_48) ? 48 : 256,
|
|
node->fanout == 0 ? 256 : node->fanout,
|
|
node->count == 0 ? 256 : node->count);
|
|
|
|
switch (node->kind)
|
|
{
|
|
case RT_NODE_KIND_4:
|
|
{
|
|
RT_NODE_4 *n4 = (RT_NODE_4 *) node;
|
|
|
|
fprintf(stderr, "chunks and slots:\n");
|
|
for (int i = 0; i < n4->base.count; i++)
|
|
{
|
|
fprintf(stderr, " [%d] chunk %x slot " RT_CHILD_PTR_FORMAT "\n",
|
|
i, n4->chunks[i], n4->children[i]);
|
|
}
|
|
|
|
break;
|
|
}
|
|
case RT_NODE_KIND_16:
|
|
{
|
|
RT_NODE_16 *n16 = (RT_NODE_16 *) node;
|
|
|
|
fprintf(stderr, "chunks and slots:\n");
|
|
for (int i = 0; i < n16->base.count; i++)
|
|
{
|
|
fprintf(stderr, " [%d] chunk %x slot " RT_CHILD_PTR_FORMAT "\n",
|
|
i, n16->chunks[i], n16->children[i]);
|
|
}
|
|
break;
|
|
}
|
|
case RT_NODE_KIND_48:
|
|
{
|
|
RT_NODE_48 *n48 = (RT_NODE_48 *) node;
|
|
char *sep = "";
|
|
|
|
fprintf(stderr, "slot_idxs: \n");
|
|
for (int chunk = 0; chunk < RT_NODE_MAX_SLOTS; chunk++)
|
|
{
|
|
if (!RT_NODE_48_IS_CHUNK_USED(n48, chunk))
|
|
continue;
|
|
|
|
fprintf(stderr, " idx[%d] = %d\n",
|
|
chunk, n48->slot_idxs[chunk]);
|
|
}
|
|
|
|
fprintf(stderr, "isset-bitmap: ");
|
|
for (int i = 0; i < (RT_FANOUT_48_MAX / BITS_PER_BYTE); i++)
|
|
{
|
|
fprintf(stderr, "%s%x", sep, ((uint8 *) n48->isset)[i]);
|
|
sep = " ";
|
|
}
|
|
fprintf(stderr, "\n");
|
|
|
|
fprintf(stderr, "chunks and slots:\n");
|
|
for (int chunk = 0; chunk < RT_NODE_MAX_SLOTS; chunk++)
|
|
{
|
|
if (!RT_NODE_48_IS_CHUNK_USED(n48, chunk))
|
|
continue;
|
|
|
|
fprintf(stderr, " chunk %x slot " RT_CHILD_PTR_FORMAT "\n",
|
|
chunk,
|
|
*RT_NODE_48_GET_CHILD(n48, chunk));
|
|
}
|
|
break;
|
|
}
|
|
case RT_NODE_KIND_256:
|
|
{
|
|
RT_NODE_256 *n256 = (RT_NODE_256 *) node;
|
|
char *sep = "";
|
|
|
|
fprintf(stderr, "isset-bitmap: ");
|
|
for (int i = 0; i < (RT_FANOUT_256 / BITS_PER_BYTE); i++)
|
|
{
|
|
fprintf(stderr, "%s%x", sep, ((uint8 *) n256->isset)[i]);
|
|
sep = " ";
|
|
}
|
|
fprintf(stderr, "\n");
|
|
|
|
fprintf(stderr, "chunks and slots:\n");
|
|
for (int chunk = 0; chunk < RT_NODE_MAX_SLOTS; chunk++)
|
|
{
|
|
if (!RT_NODE_256_IS_CHUNK_USED(n256, chunk))
|
|
continue;
|
|
|
|
fprintf(stderr, " chunk %x slot " RT_CHILD_PTR_FORMAT "\n",
|
|
chunk,
|
|
*RT_NODE_256_GET_CHILD(n256, chunk));
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
#endif /* RT_DEBUG */
|
|
|
|
#endif /* RT_DEFINE */
|
|
|
|
|
|
/* undefine external parameters, so next radix tree can be defined */
|
|
#undef RT_PREFIX
|
|
#undef RT_SCOPE
|
|
#undef RT_DECLARE
|
|
#undef RT_DEFINE
|
|
#undef RT_VALUE_TYPE
|
|
#undef RT_VARLEN_VALUE_SIZE
|
|
#undef RT_RUNTIME_EMBEDDABLE_VALUE
|
|
#undef RT_SHMEM
|
|
#undef RT_USE_DELETE
|
|
#undef RT_DEBUG
|
|
|
|
/* locally declared macros */
|
|
#undef RT_MAKE_PREFIX
|
|
#undef RT_MAKE_NAME
|
|
#undef RT_MAKE_NAME_
|
|
#undef RT_STR
|
|
#undef RT_STR_
|
|
#undef RT_SPAN
|
|
#undef RT_NODE_MAX_SLOTS
|
|
#undef RT_CHUNK_MASK
|
|
#undef RT_MAX_SHIFT
|
|
#undef RT_MAX_LEVEL
|
|
#undef RT_GET_KEY_CHUNK
|
|
#undef RT_BM_IDX
|
|
#undef RT_BM_BIT
|
|
#undef RT_NODE_MUST_GROW
|
|
#undef RT_NODE_KIND_COUNT
|
|
#undef RT_NUM_SIZE_CLASSES
|
|
#undef RT_INVALID_SLOT_IDX
|
|
#undef RT_SLAB_BLOCK_SIZE
|
|
#undef RT_RADIX_TREE_MAGIC
|
|
#undef RT_CHILD_PTR_FORMAT
|
|
|
|
/* type declarations */
|
|
#undef RT_RADIX_TREE
|
|
#undef RT_RADIX_TREE_CONTROL
|
|
#undef RT_CHILD_PTR
|
|
#undef RT_PTR_ALLOC
|
|
#undef RT_INVALID_PTR_ALLOC
|
|
#undef RT_HANDLE
|
|
#undef RT_ITER
|
|
#undef RT_NODE
|
|
#undef RT_NODE_ITER
|
|
#undef RT_NODE_KIND_4
|
|
#undef RT_NODE_KIND_16
|
|
#undef RT_NODE_KIND_48
|
|
#undef RT_NODE_KIND_256
|
|
#undef RT_NODE_4
|
|
#undef RT_NODE_16
|
|
#undef RT_NODE_48
|
|
#undef RT_NODE_256
|
|
#undef RT_SIZE_CLASS
|
|
#undef RT_SIZE_CLASS_ELEM
|
|
#undef RT_SIZE_CLASS_INFO
|
|
#undef RT_CLASS_4
|
|
#undef RT_CLASS_16_LO
|
|
#undef RT_CLASS_16_HI
|
|
#undef RT_CLASS_48
|
|
#undef RT_CLASS_256
|
|
#undef RT_FANOUT_4
|
|
#undef RT_FANOUT_4_MAX
|
|
#undef RT_FANOUT_16_LO
|
|
#undef RT_FANOUT_16_HI
|
|
#undef RT_FANOUT_16_MAX
|
|
#undef RT_FANOUT_48
|
|
#undef RT_FANOUT_48_MAX
|
|
#undef RT_FANOUT_256
|
|
|
|
/* function declarations */
|
|
#undef RT_CREATE
|
|
#undef RT_FREE
|
|
#undef RT_ATTACH
|
|
#undef RT_DETACH
|
|
#undef RT_LOCK_EXCLUSIVE
|
|
#undef RT_LOCK_SHARE
|
|
#undef RT_UNLOCK
|
|
#undef RT_GET_HANDLE
|
|
#undef RT_FIND
|
|
#undef RT_SET
|
|
#undef RT_BEGIN_ITERATE
|
|
#undef RT_ITERATE_NEXT
|
|
#undef RT_END_ITERATE
|
|
#undef RT_DELETE
|
|
#undef RT_MEMORY_USAGE
|
|
#undef RT_DUMP_NODE
|
|
#undef RT_STATS
|
|
|
|
/* internal helper functions */
|
|
#undef RT_GET_VALUE_SIZE
|
|
#undef RT_VALUE_IS_EMBEDDABLE
|
|
#undef RT_CHILDPTR_IS_VALUE
|
|
#undef RT_GET_SLOT_RECURSIVE
|
|
#undef RT_DELETE_RECURSIVE
|
|
#undef RT_ALLOC_NODE
|
|
#undef RT_ALLOC_LEAF
|
|
#undef RT_FREE_NODE
|
|
#undef RT_FREE_LEAF
|
|
#undef RT_FREE_RECURSE
|
|
#undef RT_EXTEND_UP
|
|
#undef RT_EXTEND_DOWN
|
|
#undef RT_COPY_COMMON
|
|
#undef RT_PTR_SET_LOCAL
|
|
#undef RT_PTR_ALLOC_IS_VALID
|
|
#undef RT_NODE_16_SEARCH_EQ
|
|
#undef RT_NODE_4_GET_INSERTPOS
|
|
#undef RT_NODE_16_GET_INSERTPOS
|
|
#undef RT_SHIFT_ARRAYS_FOR_INSERT
|
|
#undef RT_SHIFT_ARRAYS_AND_DELETE
|
|
#undef RT_COPY_ARRAYS_FOR_INSERT
|
|
#undef RT_COPY_ARRAYS_AND_DELETE
|
|
#undef RT_NODE_48_IS_CHUNK_USED
|
|
#undef RT_NODE_48_GET_CHILD
|
|
#undef RT_NODE_256_IS_CHUNK_USED
|
|
#undef RT_NODE_256_GET_CHILD
|
|
#undef RT_KEY_GET_SHIFT
|
|
#undef RT_SHIFT_GET_MAX_VAL
|
|
#undef RT_NODE_SEARCH
|
|
#undef RT_ADD_CHILD_4
|
|
#undef RT_ADD_CHILD_16
|
|
#undef RT_ADD_CHILD_48
|
|
#undef RT_ADD_CHILD_256
|
|
#undef RT_GROW_NODE_4
|
|
#undef RT_GROW_NODE_16
|
|
#undef RT_GROW_NODE_48
|
|
#undef RT_REMOVE_CHILD_4
|
|
#undef RT_REMOVE_CHILD_16
|
|
#undef RT_REMOVE_CHILD_48
|
|
#undef RT_REMOVE_CHILD_256
|
|
#undef RT_SHRINK_NODE_16
|
|
#undef RT_SHRINK_NODE_48
|
|
#undef RT_SHRINK_NODE_256
|
|
#undef RT_NODE_DELETE
|
|
#undef RT_NODE_INSERT
|
|
#undef RT_NODE_ITERATE_NEXT
|
|
#undef RT_VERIFY_NODE
|