Add hash support functions and hash opclass for contrib/ltree.

This also enables hash join and hash aggregation on ltree columns.

Tommy Pavlicek, reviewed by jian he

Discussion: https://postgr.es/m/CAEhP-W9ZEoHeaP_nKnPCVd_o1c3BAUvq1gWHrq8EbkNRiS9CvQ@mail.gmail.com
This commit is contained in:
Tom Lane 2024-03-21 18:27:49 -04:00
parent 0997e0af27
commit 485f0aa859
9 changed files with 221 additions and 3 deletions

View File

@ -14,7 +14,8 @@ OBJS = \
ltxtquery_op.o
EXTENSION = ltree
DATA = ltree--1.1--1.2.sql ltree--1.1.sql ltree--1.0--1.1.sql
DATA = ltree--1.2--1.3.sql ltree--1.1--1.2.sql ltree--1.1.sql \
ltree--1.0--1.1.sql
PGFILEDESC = "ltree - hierarchical label data type"
HEADERS = ltree.h

View File

@ -1433,8 +1433,27 @@ SELECT '{j.k.l.m, g.b.c.d.e}'::ltree[] ?~ 'A*@|g.b.c.d.e';
g.b.c.d.e
(1 row)
-- Check that the hash_ltree() and hash_ltree_extended() function's lower
-- 32 bits match when the seed is 0 and do not match when the seed != 0
SELECT v as value, hash_ltree(v)::bit(32) as standard,
hash_ltree_extended(v, 0)::bit(32) as extended0,
hash_ltree_extended(v, 1)::bit(32) as extended1
FROM (VALUES (NULL::ltree), (''::ltree), ('0'::ltree), ('0.1'::ltree),
('0.1.2'::ltree), ('0'::ltree), ('0_asd.1_ASD'::ltree)) x(v)
WHERE hash_ltree(v)::bit(32) != hash_ltree_extended(v, 0)::bit(32)
OR hash_ltree(v)::bit(32) = hash_ltree_extended(v, 1)::bit(32);
value | standard | extended0 | extended1
-------+----------+-----------+-----------
(0 rows)
CREATE TABLE ltreetest (t ltree);
\copy ltreetest FROM 'data/ltree.data'
SELECT count(*) from ltreetest;
count
-------
1006
(1 row)
SELECT * FROM ltreetest WHERE t < '12.3' order by t asc;
t
----------------------------------
@ -7833,6 +7852,55 @@ SELECT * FROM ltreetest WHERE t ? '{23.*.1,23.*.2}' order by t asc;
(4 rows)
drop index tstidx;
--- test hash index
create index tstidx on ltreetest using hash (t);
set enable_seqscan=off;
set enable_bitmapscan=off;
EXPLAIN (COSTS OFF)
SELECT * FROM ltreetest WHERE t = '12.3' order by t asc;
QUERY PLAN
--------------------------------------
Index Scan using tstidx on ltreetest
Index Cond: (t = '12.3'::ltree)
(2 rows)
SELECT * FROM ltreetest WHERE t = '12.3' order by t asc;
t
------
12.3
(1 row)
reset enable_seqscan;
reset enable_bitmapscan;
-- test hash aggregate
set enable_hashagg=on;
set enable_sort=off;
EXPLAIN (COSTS OFF)
SELECT count(*) FROM (
SELECT t FROM (SELECT * FROM ltreetest UNION ALL SELECT * FROM ltreetest) t1 GROUP BY t
) t2;
QUERY PLAN
-----------------------------------------------------
Aggregate
-> HashAggregate
Group Key: ltreetest.t
-> Append
-> Seq Scan on ltreetest
-> Seq Scan on ltreetest ltreetest_1
(6 rows)
SELECT count(*) FROM (
SELECT t FROM (SELECT * FROM ltreetest UNION ALL SELECT * FROM ltreetest) t1 GROUP BY t
) t2;
count
-------
1006
(1 row)
reset enable_hashagg;
reset enable_sort;
drop index tstidx;
-- test gist index
create index tstidx on ltreetest using gist (t gist_ltree_ops(siglen=0));
ERROR: value 0 out of bounds for option "siglen"
DETAIL: Valid values are between "4" and "2024".

View File

@ -0,0 +1,23 @@
/* contrib/ltree/ltree--1.2--1.3.sql */
-- complain if script is sourced in psql, rather than via ALTER EXTENSION
\echo Use "ALTER EXTENSION ltree UPDATE TO '1.3'" to load this file. \quit
CREATE FUNCTION hash_ltree(ltree)
RETURNS integer
AS 'MODULE_PATHNAME'
LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
CREATE FUNCTION hash_ltree_extended(ltree, bigint)
RETURNS bigint
AS 'MODULE_PATHNAME'
LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
CREATE OPERATOR CLASS hash_ltree_ops
DEFAULT FOR TYPE ltree USING hash
AS
OPERATOR 1 = ,
FUNCTION 1 hash_ltree(ltree),
FUNCTION 2 hash_ltree_extended(ltree, bigint);
ALTER OPERATOR =(ltree, ltree) SET (HASHES);

View File

@ -1,6 +1,6 @@
# ltree extension
comment = 'data type for hierarchical tree-like structures'
default_version = '1.2'
default_version = '1.3'
module_pathname = '$libdir/ltree'
relocatable = true
trusted = true

View File

@ -9,6 +9,7 @@
#include "access/htup_details.h"
#include "catalog/pg_statistic.h"
#include "common/hashfn.h"
#include "ltree.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
@ -24,6 +25,8 @@ PG_FUNCTION_INFO_V1(ltree_eq);
PG_FUNCTION_INFO_V1(ltree_ne);
PG_FUNCTION_INFO_V1(ltree_ge);
PG_FUNCTION_INFO_V1(ltree_gt);
PG_FUNCTION_INFO_V1(hash_ltree);
PG_FUNCTION_INFO_V1(hash_ltree_extended);
PG_FUNCTION_INFO_V1(nlevel);
PG_FUNCTION_INFO_V1(ltree_isparent);
PG_FUNCTION_INFO_V1(ltree_risparent);
@ -129,6 +132,72 @@ ltree_ne(PG_FUNCTION_ARGS)
PG_RETURN_BOOL(res != 0);
}
/* Compute a hash for the ltree */
Datum
hash_ltree(PG_FUNCTION_ARGS)
{
ltree *a = PG_GETARG_LTREE_P(0);
uint32 result = 1;
int an = a->numlevel;
ltree_level *al = LTREE_FIRST(a);
while (an > 0)
{
uint32 levelHash = DatumGetUInt32(hash_any((unsigned char *) al->name, al->len));
/*
* Combine hash values of successive elements by multiplying the
* current value by 31 and adding on the new element's hash value.
*
* This method is borrowed from hash_array(), which see for further
* commentary.
*/
result = (result << 5) - result + levelHash;
an--;
al = LEVEL_NEXT(al);
}
PG_FREE_IF_COPY(a, 0);
PG_RETURN_UINT32(result);
}
/* Compute an extended hash for the ltree */
Datum
hash_ltree_extended(PG_FUNCTION_ARGS)
{
ltree *a = PG_GETARG_LTREE_P(0);
const uint64 seed = PG_GETARG_INT64(1);
uint64 result = 1;
int an = a->numlevel;
ltree_level *al = LTREE_FIRST(a);
/*
* If the path has length zero, return 1 + seed to ensure that the low 32
* bits of the result match hash_ltree when the seed is 0, as required by
* the hash index support functions, but to also return a different value
* when there is a seed.
*/
if (an == 0)
{
PG_FREE_IF_COPY(a, 0);
PG_RETURN_UINT64(result + seed);
}
while (an > 0)
{
uint64 levelHash = DatumGetUInt64(hash_any_extended((unsigned char *) al->name, al->len, seed));
result = (result << 5) - result + levelHash;
an--;
al = LEVEL_NEXT(al);
}
PG_FREE_IF_COPY(a, 0);
PG_RETURN_UINT64(result);
}
Datum
nlevel(PG_FUNCTION_ARGS)
{

View File

@ -19,3 +19,4 @@ INSERT INTO test VALUES ('Top.Collections.Pictures.Astronomy.Galaxies');
INSERT INTO test VALUES ('Top.Collections.Pictures.Astronomy.Astronauts');
CREATE INDEX path_gist_idx ON test USING gist(path);
CREATE INDEX path_idx ON test USING btree(path);
CREATE INDEX path_hash_idx ON test USING hash(path);

View File

@ -30,8 +30,9 @@ contrib_targets += ltree
install_data(
'ltree.control',
'ltree--1.0--1.1.sql',
'ltree--1.1--1.2.sql',
'ltree--1.1.sql',
'ltree--1.1--1.2.sql',
'ltree--1.2--1.3.sql',
kwargs: contrib_data_args,
)

View File

@ -282,9 +282,21 @@ SELECT ('{3456,1.2.3.4}'::ltree[] ?<@ '1.2.5') is null;
SELECT '{ltree.asd, tree.awdfg}'::ltree[] ?@ 'tree & aWdfg@'::ltxtquery;
SELECT '{j.k.l.m, g.b.c.d.e}'::ltree[] ?~ 'A*@|g.b.c.d.e';
-- Check that the hash_ltree() and hash_ltree_extended() function's lower
-- 32 bits match when the seed is 0 and do not match when the seed != 0
SELECT v as value, hash_ltree(v)::bit(32) as standard,
hash_ltree_extended(v, 0)::bit(32) as extended0,
hash_ltree_extended(v, 1)::bit(32) as extended1
FROM (VALUES (NULL::ltree), (''::ltree), ('0'::ltree), ('0.1'::ltree),
('0.1.2'::ltree), ('0'::ltree), ('0_asd.1_ASD'::ltree)) x(v)
WHERE hash_ltree(v)::bit(32) != hash_ltree_extended(v, 0)::bit(32)
OR hash_ltree(v)::bit(32) = hash_ltree_extended(v, 1)::bit(32);
CREATE TABLE ltreetest (t ltree);
\copy ltreetest FROM 'data/ltree.data'
SELECT count(*) from ltreetest;
SELECT * FROM ltreetest WHERE t < '12.3' order by t asc;
SELECT * FROM ltreetest WHERE t <= '12.3' order by t asc;
SELECT * FROM ltreetest WHERE t = '12.3' order by t asc;
@ -329,6 +341,41 @@ SELECT * FROM ltreetest WHERE t ~ '23.*.2' order by t asc;
SELECT * FROM ltreetest WHERE t ? '{23.*.1,23.*.2}' order by t asc;
drop index tstidx;
--- test hash index
create index tstidx on ltreetest using hash (t);
set enable_seqscan=off;
set enable_bitmapscan=off;
EXPLAIN (COSTS OFF)
SELECT * FROM ltreetest WHERE t = '12.3' order by t asc;
SELECT * FROM ltreetest WHERE t = '12.3' order by t asc;
reset enable_seqscan;
reset enable_bitmapscan;
-- test hash aggregate
set enable_hashagg=on;
set enable_sort=off;
EXPLAIN (COSTS OFF)
SELECT count(*) FROM (
SELECT t FROM (SELECT * FROM ltreetest UNION ALL SELECT * FROM ltreetest) t1 GROUP BY t
) t2;
SELECT count(*) FROM (
SELECT t FROM (SELECT * FROM ltreetest UNION ALL SELECT * FROM ltreetest) t1 GROUP BY t
) t2;
reset enable_hashagg;
reset enable_sort;
drop index tstidx;
-- test gist index
create index tstidx on ltreetest using gist (t gist_ltree_ops(siglen=0));
create index tstidx on ltreetest using gist (t gist_ltree_ops(siglen=2025));
create index tstidx on ltreetest using gist (t gist_ltree_ops(siglen=2028));

View File

@ -623,6 +623,13 @@ Europe &amp; Russia*@ &amp; !Transportation
<literal>&gt;=</literal>, <literal>&gt;</literal>
</para>
</listitem>
<listitem>
<para>
Hash index over <type>ltree</type>:
<literal>=</literal>
</para>
</listitem>
<listitem>
<para>
GiST index over <type>ltree</type> (<literal>gist_ltree_ops</literal>
@ -712,6 +719,7 @@ INSERT INTO test VALUES ('Top.Collections.Pictures.Astronomy.Galaxies');
INSERT INTO test VALUES ('Top.Collections.Pictures.Astronomy.Astronauts');
CREATE INDEX path_gist_idx ON test USING GIST (path);
CREATE INDEX path_idx ON test USING BTREE (path);
CREATE INDEX path_hash_idx ON test USING HASH (path);
</programlisting>
<para>