295 lines
9.3 KiB
C
295 lines
9.3 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* bloomfilter.c
|
|
* Space-efficient set membership testing
|
|
*
|
|
* A Bloom filter is a probabilistic data structure that is used to test an
|
|
* element's membership of a set. False positives are possible, but false
|
|
* negatives are not; a test of membership of the set returns either "possibly
|
|
* in set" or "definitely not in set". This is typically very space efficient,
|
|
* which can be a decisive advantage.
|
|
*
|
|
* Elements can be added to the set, but not removed. The more elements that
|
|
* are added, the larger the probability of false positives. Caller must hint
|
|
* an estimated total size of the set when the Bloom filter is initialized.
|
|
* This is used to balance the use of memory against the final false positive
|
|
* rate.
|
|
*
|
|
* The implementation is well suited to data synchronization problems between
|
|
* unordered sets, especially where predictable performance is important and
|
|
* some false positives are acceptable. It's also well suited to cache
|
|
* filtering problems where a relatively small and/or low cardinality set is
|
|
* fingerprinted, especially when many subsequent membership tests end up
|
|
* indicating that values of interest are not present. That should save the
|
|
* caller many authoritative lookups, such as expensive probes of a much larger
|
|
* on-disk structure.
|
|
*
|
|
* Copyright (c) 2018-2024, PostgreSQL Global Development Group
|
|
*
|
|
* IDENTIFICATION
|
|
* src/backend/lib/bloomfilter.c
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include <math.h>
|
|
|
|
#include "common/hashfn.h"
|
|
#include "lib/bloomfilter.h"
|
|
#include "port/pg_bitutils.h"
|
|
|
|
#define MAX_HASH_FUNCS 10
|
|
|
|
struct bloom_filter
|
|
{
|
|
/* K hash functions are used, seeded by caller's seed */
|
|
int k_hash_funcs;
|
|
uint64 seed;
|
|
/* m is bitset size, in bits. Must be a power of two <= 2^32. */
|
|
uint64 m;
|
|
unsigned char bitset[FLEXIBLE_ARRAY_MEMBER];
|
|
};
|
|
|
|
static int my_bloom_power(uint64 target_bitset_bits);
|
|
static int optimal_k(uint64 bitset_bits, int64 total_elems);
|
|
static void k_hashes(bloom_filter *filter, uint32 *hashes, unsigned char *elem,
|
|
size_t len);
|
|
static inline uint32 mod_m(uint32 val, uint64 m);
|
|
|
|
/*
|
|
* Create Bloom filter in caller's memory context. We aim for a false positive
|
|
* rate of between 1% and 2% when bitset size is not constrained by memory
|
|
* availability.
|
|
*
|
|
* total_elems is an estimate of the final size of the set. It should be
|
|
* approximately correct, but the implementation can cope well with it being
|
|
* off by perhaps a factor of five or more. See "Bloom Filters in
|
|
* Probabilistic Verification" (Dillinger & Manolios, 2004) for details of why
|
|
* this is the case.
|
|
*
|
|
* bloom_work_mem is sized in KB, in line with the general work_mem convention.
|
|
* This determines the size of the underlying bitset (trivial bookkeeping space
|
|
* isn't counted). The bitset is always sized as a power of two number of
|
|
* bits, and the largest possible bitset is 512MB (2^32 bits). The
|
|
* implementation allocates only enough memory to target its standard false
|
|
* positive rate, using a simple formula with caller's total_elems estimate as
|
|
* an input. The bitset might be as small as 1MB, even when bloom_work_mem is
|
|
* much higher.
|
|
*
|
|
* The Bloom filter is seeded using a value provided by the caller. Using a
|
|
* distinct seed value on every call makes it unlikely that the same false
|
|
* positives will reoccur when the same set is fingerprinted a second time.
|
|
* Callers that don't care about this pass a constant as their seed, typically
|
|
* 0. Callers can also use a pseudo-random seed, eg from pg_prng_uint64().
|
|
*/
|
|
bloom_filter *
|
|
bloom_create(int64 total_elems, int bloom_work_mem, uint64 seed)
|
|
{
|
|
bloom_filter *filter;
|
|
int bloom_power;
|
|
uint64 bitset_bytes;
|
|
uint64 bitset_bits;
|
|
|
|
/*
|
|
* Aim for two bytes per element; this is sufficient to get a false
|
|
* positive rate below 1%, independent of the size of the bitset or total
|
|
* number of elements. Also, if rounding down the size of the bitset to
|
|
* the next lowest power of two turns out to be a significant drop, the
|
|
* false positive rate still won't exceed 2% in almost all cases.
|
|
*/
|
|
bitset_bytes = Min(bloom_work_mem * UINT64CONST(1024), total_elems * 2);
|
|
bitset_bytes = Max(1024 * 1024, bitset_bytes);
|
|
|
|
/*
|
|
* Size in bits should be the highest power of two <= target. bitset_bits
|
|
* is uint64 because PG_UINT32_MAX is 2^32 - 1, not 2^32
|
|
*/
|
|
bloom_power = my_bloom_power(bitset_bytes * BITS_PER_BYTE);
|
|
bitset_bits = UINT64CONST(1) << bloom_power;
|
|
bitset_bytes = bitset_bits / BITS_PER_BYTE;
|
|
|
|
/* Allocate bloom filter with unset bitset */
|
|
filter = palloc0(offsetof(bloom_filter, bitset) +
|
|
sizeof(unsigned char) * bitset_bytes);
|
|
filter->k_hash_funcs = optimal_k(bitset_bits, total_elems);
|
|
filter->seed = seed;
|
|
filter->m = bitset_bits;
|
|
|
|
return filter;
|
|
}
|
|
|
|
/*
|
|
* Free Bloom filter
|
|
*/
|
|
void
|
|
bloom_free(bloom_filter *filter)
|
|
{
|
|
pfree(filter);
|
|
}
|
|
|
|
/*
|
|
* Add element to Bloom filter
|
|
*/
|
|
void
|
|
bloom_add_element(bloom_filter *filter, unsigned char *elem, size_t len)
|
|
{
|
|
uint32 hashes[MAX_HASH_FUNCS];
|
|
int i;
|
|
|
|
k_hashes(filter, hashes, elem, len);
|
|
|
|
/* Map a bit-wise address to a byte-wise address + bit offset */
|
|
for (i = 0; i < filter->k_hash_funcs; i++)
|
|
{
|
|
filter->bitset[hashes[i] >> 3] |= 1 << (hashes[i] & 7);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Test if Bloom filter definitely lacks element.
|
|
*
|
|
* Returns true if the element is definitely not in the set of elements
|
|
* observed by bloom_add_element(). Otherwise, returns false, indicating that
|
|
* element is probably present in set.
|
|
*/
|
|
bool
|
|
bloom_lacks_element(bloom_filter *filter, unsigned char *elem, size_t len)
|
|
{
|
|
uint32 hashes[MAX_HASH_FUNCS];
|
|
int i;
|
|
|
|
k_hashes(filter, hashes, elem, len);
|
|
|
|
/* Map a bit-wise address to a byte-wise address + bit offset */
|
|
for (i = 0; i < filter->k_hash_funcs; i++)
|
|
{
|
|
if (!(filter->bitset[hashes[i] >> 3] & (1 << (hashes[i] & 7))))
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* What proportion of bits are currently set?
|
|
*
|
|
* Returns proportion, expressed as a multiplier of filter size. That should
|
|
* generally be close to 0.5, even when we have more than enough memory to
|
|
* ensure a false positive rate within target 1% to 2% band, since more hash
|
|
* functions are used as more memory is available per element.
|
|
*
|
|
* This is the only instrumentation that is low overhead enough to appear in
|
|
* debug traces. When debugging Bloom filter code, it's likely to be far more
|
|
* interesting to directly test the false positive rate.
|
|
*/
|
|
double
|
|
bloom_prop_bits_set(bloom_filter *filter)
|
|
{
|
|
int bitset_bytes = filter->m / BITS_PER_BYTE;
|
|
uint64 bits_set = pg_popcount((char *) filter->bitset, bitset_bytes);
|
|
|
|
return bits_set / (double) filter->m;
|
|
}
|
|
|
|
/*
|
|
* Which element in the sequence of powers of two is less than or equal to
|
|
* target_bitset_bits?
|
|
*
|
|
* Value returned here must be generally safe as the basis for actual bitset
|
|
* size.
|
|
*
|
|
* Bitset is never allowed to exceed 2 ^ 32 bits (512MB). This is sufficient
|
|
* for the needs of all current callers, and allows us to use 32-bit hash
|
|
* functions. It also makes it easy to stay under the MaxAllocSize restriction
|
|
* (caller needs to leave room for non-bitset fields that appear before
|
|
* flexible array member, so a 1GB bitset would use an allocation that just
|
|
* exceeds MaxAllocSize).
|
|
*/
|
|
static int
|
|
my_bloom_power(uint64 target_bitset_bits)
|
|
{
|
|
int bloom_power = -1;
|
|
|
|
while (target_bitset_bits > 0 && bloom_power < 32)
|
|
{
|
|
bloom_power++;
|
|
target_bitset_bits >>= 1;
|
|
}
|
|
|
|
return bloom_power;
|
|
}
|
|
|
|
/*
|
|
* Determine optimal number of hash functions based on size of filter in bits,
|
|
* and projected total number of elements. The optimal number is the number
|
|
* that minimizes the false positive rate.
|
|
*/
|
|
static int
|
|
optimal_k(uint64 bitset_bits, int64 total_elems)
|
|
{
|
|
int k = rint(log(2.0) * bitset_bits / total_elems);
|
|
|
|
return Max(1, Min(k, MAX_HASH_FUNCS));
|
|
}
|
|
|
|
/*
|
|
* Generate k hash values for element.
|
|
*
|
|
* Caller passes array, which is filled-in with k values determined by hashing
|
|
* caller's element.
|
|
*
|
|
* Only 2 real independent hash functions are actually used to support an
|
|
* interface of up to MAX_HASH_FUNCS hash functions; enhanced double hashing is
|
|
* used to make this work. The main reason we prefer enhanced double hashing
|
|
* to classic double hashing is that the latter has an issue with collisions
|
|
* when using power of two sized bitsets. See Dillinger & Manolios for full
|
|
* details.
|
|
*/
|
|
static void
|
|
k_hashes(bloom_filter *filter, uint32 *hashes, unsigned char *elem, size_t len)
|
|
{
|
|
uint64 hash;
|
|
uint32 x,
|
|
y;
|
|
uint64 m;
|
|
int i;
|
|
|
|
/* Use 64-bit hashing to get two independent 32-bit hashes */
|
|
hash = DatumGetUInt64(hash_any_extended(elem, len, filter->seed));
|
|
x = (uint32) hash;
|
|
y = (uint32) (hash >> 32);
|
|
m = filter->m;
|
|
|
|
x = mod_m(x, m);
|
|
y = mod_m(y, m);
|
|
|
|
/* Accumulate hashes */
|
|
hashes[0] = x;
|
|
for (i = 1; i < filter->k_hash_funcs; i++)
|
|
{
|
|
x = mod_m(x + y, m);
|
|
y = mod_m(y + i, m);
|
|
|
|
hashes[i] = x;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Calculate "val MOD m" inexpensively.
|
|
*
|
|
* Assumes that m (which is bitset size) is a power of two.
|
|
*
|
|
* Using a power of two number of bits for bitset size allows us to use bitwise
|
|
* AND operations to calculate the modulo of a hash value. It's also a simple
|
|
* way of avoiding the modulo bias effect.
|
|
*/
|
|
static inline uint32
|
|
mod_m(uint32 val, uint64 m)
|
|
{
|
|
Assert(m <= PG_UINT32_MAX + UINT64CONST(1));
|
|
Assert(((m - 1) & m) == 0);
|
|
|
|
return val & (m - 1);
|
|
}
|