From 4b754d6c16e16cc1a1adf12ab0f48603069a0efd Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Sat, 18 Jan 2020 01:11:39 +0300 Subject: [PATCH] Avoid full scan of GIN indexes when possible The strategy of GIN index scan is driven by opclass-specific extract_query method. This method that needed search mode is GIN_SEARCH_MODE_ALL. This mode means that matching tuple may contain none of extracted entries. Simple example is '!term' tsquery, which doesn't need any term to exist in matching tsvector. In order to handle such scan key GIN calculates virtual entry, which contains all TIDs of all entries of attribute. In fact this is full scan of index attribute. And typically this is very slow, but allows to handle some queries correctly in GIN. However, current algorithm calculate such virtual entry for each GIN_SEARCH_MODE_ALL scan key even if they are multiple for the same attribute. This is clearly not optimal. This commit improves the situation by introduction of "exclude only" scan keys. Such scan keys are not capable to return set of matching TIDs. Instead, they are capable only to filter TIDs produced by normal scan keys. Therefore, each attribute should contain at least one normal scan key, while rest of them may be "exclude only" if search mode is GIN_SEARCH_MODE_ALL. The same optimization might be applied to the whole scan, not per-attribute. But that leads to NULL values elimination problem. There is trade-off between multiple possible ways to do this. We probably want to do this later using some cost-based decision algorithm. Discussion: https://postgr.es/m/CAOBaU_YGP5-BEt5Cc0%3DzMve92vocPzD%2BXiZgiZs1kjY0cj%3DXBg%40mail.gmail.com Author: Nikita Glukhov, Alexander Korotkov, Tom Lane, Julien Rouhaud Reviewed-by: Julien Rouhaud, Tomas Vondra, Tom Lane --- contrib/pg_trgm/expected/pg_trgm.out | 101 ++++++++++++++++++++ contrib/pg_trgm/sql/pg_trgm.sql | 27 ++++++ src/backend/access/gin/ginget.c | 84 +++++++++++++---- src/backend/access/gin/ginscan.c | 127 +++++++++++++++---------- src/backend/utils/adt/selfuncs.c | 37 ++++++-- src/include/access/gin_private.h | 14 +++ src/test/regress/expected/gin.out | 131 +++++++++++++++++++++++++- src/test/regress/expected/tsearch.out | 35 +++++++ src/test/regress/sql/gin.sql | 91 +++++++++++++++++- src/test/regress/sql/tsearch.sql | 9 ++ 10 files changed, 579 insertions(+), 77 deletions(-) diff --git a/contrib/pg_trgm/expected/pg_trgm.out b/contrib/pg_trgm/expected/pg_trgm.out index b3e709f496..91596f8645 100644 --- a/contrib/pg_trgm/expected/pg_trgm.out +++ b/contrib/pg_trgm/expected/pg_trgm.out @@ -3498,6 +3498,107 @@ select count(*) from test_trgm where t ~ '[qwerty]{2}-?[qwerty]{2}'; 1000 (1 row) +-- check handling of indexquals that generate no searchable conditions +explain (costs off) +select count(*) from test_trgm where t like '%99%' and t like '%qwerty%'; + QUERY PLAN +----------------------------------------------------------------------------- + Aggregate + -> Bitmap Heap Scan on test_trgm + Recheck Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qwerty%'::text)) + -> Bitmap Index Scan on trgm_idx + Index Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qwerty%'::text)) +(5 rows) + +select count(*) from test_trgm where t like '%99%' and t like '%qwerty%'; + count +------- + 19 +(1 row) + +explain (costs off) +select count(*) from test_trgm where t like '%99%' and t like '%qw%'; + QUERY PLAN +------------------------------------------------------------------------- + Aggregate + -> Bitmap Heap Scan on test_trgm + Recheck Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qw%'::text)) + -> Bitmap Index Scan on trgm_idx + Index Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qw%'::text)) +(5 rows) + +select count(*) from test_trgm where t like '%99%' and t like '%qw%'; + count +------- + 19 +(1 row) + +-- ensure that pending-list items are handled correctly, too +create temp table t_test_trgm(t text COLLATE "C"); +create index t_trgm_idx on t_test_trgm using gin (t gin_trgm_ops); +insert into t_test_trgm values ('qwerty99'), ('qwerty01'); +explain (costs off) +select count(*) from t_test_trgm where t like '%99%' and t like '%qwerty%'; + QUERY PLAN +----------------------------------------------------------------------------- + Aggregate + -> Bitmap Heap Scan on t_test_trgm + Recheck Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qwerty%'::text)) + -> Bitmap Index Scan on t_trgm_idx + Index Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qwerty%'::text)) +(5 rows) + +select count(*) from t_test_trgm where t like '%99%' and t like '%qwerty%'; + count +------- + 1 +(1 row) + +explain (costs off) +select count(*) from t_test_trgm where t like '%99%' and t like '%qw%'; + QUERY PLAN +------------------------------------------------------------------------- + Aggregate + -> Bitmap Heap Scan on t_test_trgm + Recheck Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qw%'::text)) + -> Bitmap Index Scan on t_trgm_idx + Index Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qw%'::text)) +(5 rows) + +select count(*) from t_test_trgm where t like '%99%' and t like '%qw%'; + count +------- + 1 +(1 row) + +-- run the same queries with sequential scan to check the results +set enable_bitmapscan=off; +set enable_seqscan=on; +select count(*) from test_trgm where t like '%99%' and t like '%qwerty%'; + count +------- + 19 +(1 row) + +select count(*) from test_trgm where t like '%99%' and t like '%qw%'; + count +------- + 19 +(1 row) + +select count(*) from t_test_trgm where t like '%99%' and t like '%qwerty%'; + count +------- + 1 +(1 row) + +select count(*) from t_test_trgm where t like '%99%' and t like '%qw%'; + count +------- + 1 +(1 row) + +reset enable_bitmapscan; create table test2(t text COLLATE "C"); insert into test2 values ('abcdef'); insert into test2 values ('quark'); diff --git a/contrib/pg_trgm/sql/pg_trgm.sql b/contrib/pg_trgm/sql/pg_trgm.sql index 08459e64c3..2019d1f6be 100644 --- a/contrib/pg_trgm/sql/pg_trgm.sql +++ b/contrib/pg_trgm/sql/pg_trgm.sql @@ -55,6 +55,33 @@ select t,similarity(t,'gwertyu0988') as sml from test_trgm where t % 'gwertyu098 select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu1988' order by sml desc, t; select count(*) from test_trgm where t ~ '[qwerty]{2}-?[qwerty]{2}'; +-- check handling of indexquals that generate no searchable conditions +explain (costs off) +select count(*) from test_trgm where t like '%99%' and t like '%qwerty%'; +select count(*) from test_trgm where t like '%99%' and t like '%qwerty%'; +explain (costs off) +select count(*) from test_trgm where t like '%99%' and t like '%qw%'; +select count(*) from test_trgm where t like '%99%' and t like '%qw%'; +-- ensure that pending-list items are handled correctly, too +create temp table t_test_trgm(t text COLLATE "C"); +create index t_trgm_idx on t_test_trgm using gin (t gin_trgm_ops); +insert into t_test_trgm values ('qwerty99'), ('qwerty01'); +explain (costs off) +select count(*) from t_test_trgm where t like '%99%' and t like '%qwerty%'; +select count(*) from t_test_trgm where t like '%99%' and t like '%qwerty%'; +explain (costs off) +select count(*) from t_test_trgm where t like '%99%' and t like '%qw%'; +select count(*) from t_test_trgm where t like '%99%' and t like '%qw%'; + +-- run the same queries with sequential scan to check the results +set enable_bitmapscan=off; +set enable_seqscan=on; +select count(*) from test_trgm where t like '%99%' and t like '%qwerty%'; +select count(*) from test_trgm where t like '%99%' and t like '%qw%'; +select count(*) from t_test_trgm where t like '%99%' and t like '%qwerty%'; +select count(*) from t_test_trgm where t like '%99%' and t like '%qw%'; +reset enable_bitmapscan; + create table test2(t text COLLATE "C"); insert into test2 values ('abcdef'); insert into test2 values ('quark'); diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c index 7ae4ef05c2..50fe38b711 100644 --- a/src/backend/access/gin/ginget.c +++ b/src/backend/access/gin/ginget.c @@ -528,8 +528,20 @@ startScanKey(GinState *ginstate, GinScanOpaque so, GinScanKey key) * order, until the consistent function says that none of the remaining * entries can form a match, without any items from the required set. The * rest go to the additional set. + * + * Exclude-only scan keys are known to have no required entries. */ - if (key->nentries > 1) + if (key->excludeOnly) + { + MemoryContextSwitchTo(so->keyCtx); + + key->nrequired = 0; + key->nadditional = key->nentries; + key->additionalEntries = palloc(key->nadditional * sizeof(GinScanEntry)); + for (i = 0; i < key->nadditional; i++) + key->additionalEntries[i] = key->scanEntry[i]; + } + else if (key->nentries > 1) { MemoryContextSwitchTo(so->tempCtx); @@ -1008,37 +1020,52 @@ keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key, minItem = entry->curItem; } - if (allFinished) + if (allFinished && !key->excludeOnly) { /* all entries are finished */ key->isFinished = true; return; } - /* - * Ok, we now know that there are no matches < minItem. - * - * If minItem is lossy, it means that there were no exact items on the - * page among requiredEntries, because lossy pointers sort after exact - * items. However, there might be exact items for the same page among - * additionalEntries, so we mustn't advance past them. - */ - if (ItemPointerIsLossyPage(&minItem)) + if (!key->excludeOnly) { - if (GinItemPointerGetBlockNumber(&advancePast) < - GinItemPointerGetBlockNumber(&minItem)) + /* + * For a normal scan key, we now know there are no matches < minItem. + * + * If minItem is lossy, it means that there were no exact items on the + * page among requiredEntries, because lossy pointers sort after exact + * items. However, there might be exact items for the same page among + * additionalEntries, so we mustn't advance past them. + */ + if (ItemPointerIsLossyPage(&minItem)) { + if (GinItemPointerGetBlockNumber(&advancePast) < + GinItemPointerGetBlockNumber(&minItem)) + { + ItemPointerSet(&advancePast, + GinItemPointerGetBlockNumber(&minItem), + InvalidOffsetNumber); + } + } + else + { + Assert(GinItemPointerGetOffsetNumber(&minItem) > 0); ItemPointerSet(&advancePast, GinItemPointerGetBlockNumber(&minItem), - InvalidOffsetNumber); + OffsetNumberPrev(GinItemPointerGetOffsetNumber(&minItem))); } } else { - Assert(GinItemPointerGetOffsetNumber(&minItem) > 0); - ItemPointerSet(&advancePast, - GinItemPointerGetBlockNumber(&minItem), - OffsetNumberPrev(GinItemPointerGetOffsetNumber(&minItem))); + /* + * excludeOnly scan keys don't have any entries that are necessarily + * present in matching items. So, we consider the item just after + * advancePast. + */ + Assert(key->nrequired == 0); + ItemPointerSet(&minItem, + GinItemPointerGetBlockNumber(&advancePast), + OffsetNumberNext(GinItemPointerGetOffsetNumber(&advancePast))); } /* @@ -1266,6 +1293,20 @@ scanGetItem(IndexScanDesc scan, ItemPointerData advancePast, { GinScanKey key = so->keys + i; + /* + * If we're considering a lossy page, skip excludeOnly keys, They + * can't exclude the whole page anyway. + */ + if (ItemPointerIsLossyPage(item) && key->excludeOnly) + { + /* + * ginNewScanKey() should never mark the first key as + * excludeOnly. + */ + Assert(i > 0); + continue; + } + /* Fetch the next item for this key that is > advancePast. */ keyGetItem(&so->ginstate, so->tempCtx, key, advancePast, scan->xs_snapshot); @@ -1736,11 +1777,14 @@ collectMatchesForHeapRow(IndexScanDesc scan, pendingPosition *pos) } /* - * Now return "true" if all scan keys have at least one matching datum + * All scan keys except excludeOnly require at least one entry to match. + * excludeOnly keys are an exception, because their implied + * GIN_CAT_EMPTY_QUERY scanEntry always matches. So return "true" if all + * non-excludeOnly scan keys have at least one match. */ for (i = 0; i < so->nkeys; i++) { - if (pos->hasMatchKey[i] == false) + if (pos->hasMatchKey[i] == false && !so->keys[i].excludeOnly) return false; } diff --git a/src/backend/access/gin/ginscan.c b/src/backend/access/gin/ginscan.c index c15d06ceba..0a685bdbfc 100644 --- a/src/backend/access/gin/ginscan.c +++ b/src/backend/access/gin/ginscan.c @@ -125,6 +125,27 @@ ginFillScanEntry(GinScanOpaque so, OffsetNumber attnum, return scanEntry; } +/* + * Append hidden scan entry of given category to the scan key. + * + * NB: this had better be called at most once per scan key, since + * ginFillScanKey leaves room for only one hidden entry. Currently, + * it seems sufficiently clear that this is true that we don't bother + * with any cross-check logic. + */ +static void +ginScanKeyAddHiddenEntry(GinScanOpaque so, GinScanKey key, + GinNullCategory queryCategory) +{ + int i = key->nentries++; + + /* strategy is of no interest because this is not a partial-match item */ + key->scanEntry[i] = ginFillScanEntry(so, key->attnum, + InvalidStrategy, key->searchMode, + (Datum) 0, queryCategory, + false, NULL); +} + /* * Initialize the next GinScanKey using the output from the extractQueryFn */ @@ -137,17 +158,16 @@ ginFillScanKey(GinScanOpaque so, OffsetNumber attnum, { GinScanKey key = &(so->keys[so->nkeys++]); GinState *ginstate = &so->ginstate; - uint32 nUserQueryValues = nQueryValues; uint32 i; - /* Non-default search modes add one "hidden" entry to each key */ - if (searchMode != GIN_SEARCH_MODE_DEFAULT) - nQueryValues++; key->nentries = nQueryValues; - key->nuserentries = nUserQueryValues; + key->nuserentries = nQueryValues; - key->scanEntry = (GinScanEntry *) palloc(sizeof(GinScanEntry) * nQueryValues); - key->entryRes = (GinTernaryValue *) palloc0(sizeof(GinTernaryValue) * nQueryValues); + /* Allocate one extra array slot for possible "hidden" entry */ + key->scanEntry = (GinScanEntry *) palloc(sizeof(GinScanEntry) * + (nQueryValues + 1)); + key->entryRes = (GinTernaryValue *) palloc0(sizeof(GinTernaryValue) * + (nQueryValues + 1)); key->query = query; key->queryValues = queryValues; @@ -157,6 +177,12 @@ ginFillScanKey(GinScanOpaque so, OffsetNumber attnum, key->searchMode = searchMode; key->attnum = attnum; + /* + * Initially, scan keys of GIN_SEARCH_MODE_ALL mode are marked + * excludeOnly. This might get changed later. + */ + key->excludeOnly = (searchMode == GIN_SEARCH_MODE_ALL); + ItemPointerSetMin(&key->curItem); key->curItemMatches = false; key->recheckCurItem = false; @@ -168,6 +194,7 @@ ginFillScanKey(GinScanOpaque so, OffsetNumber attnum, ginInitConsistentFunction(ginstate, key); + /* Set up normal scan entries using extractQueryFn's outputs */ for (i = 0; i < nQueryValues; i++) { Datum queryKey; @@ -175,54 +202,28 @@ ginFillScanKey(GinScanOpaque so, OffsetNumber attnum, bool isPartialMatch; Pointer this_extra; - if (i < nUserQueryValues) - { - /* set up normal entry using extractQueryFn's outputs */ - queryKey = queryValues[i]; - queryCategory = queryCategories[i]; - isPartialMatch = - (ginstate->canPartialMatch[attnum - 1] && partial_matches) - ? partial_matches[i] : false; - this_extra = (extra_data) ? extra_data[i] : NULL; - } - else - { - /* set up hidden entry */ - queryKey = (Datum) 0; - switch (searchMode) - { - case GIN_SEARCH_MODE_INCLUDE_EMPTY: - queryCategory = GIN_CAT_EMPTY_ITEM; - break; - case GIN_SEARCH_MODE_ALL: - queryCategory = GIN_CAT_EMPTY_QUERY; - break; - case GIN_SEARCH_MODE_EVERYTHING: - queryCategory = GIN_CAT_EMPTY_QUERY; - break; - default: - elog(ERROR, "unexpected searchMode: %d", searchMode); - queryCategory = 0; /* keep compiler quiet */ - break; - } - isPartialMatch = false; - this_extra = NULL; - - /* - * We set the strategy to a fixed value so that ginFillScanEntry - * can combine these entries for different scan keys. This is - * safe because the strategy value in the entry struct is only - * used for partial-match cases. It's OK to overwrite our local - * variable here because this is the last loop iteration. - */ - strategy = InvalidStrategy; - } + queryKey = queryValues[i]; + queryCategory = queryCategories[i]; + isPartialMatch = + (ginstate->canPartialMatch[attnum - 1] && partial_matches) + ? partial_matches[i] : false; + this_extra = (extra_data) ? extra_data[i] : NULL; key->scanEntry[i] = ginFillScanEntry(so, attnum, strategy, searchMode, queryKey, queryCategory, isPartialMatch, this_extra); } + + /* + * For GIN_SEARCH_MODE_INCLUDE_EMPTY and GIN_SEARCH_MODE_EVERYTHING search + * modes, we add the "hidden" entry immediately. GIN_SEARCH_MODE_ALL is + * handled later, since we might be able to omit the hidden entry for it. + */ + if (searchMode == GIN_SEARCH_MODE_INCLUDE_EMPTY) + ginScanKeyAddHiddenEntry(so, key, GIN_CAT_EMPTY_ITEM); + else if (searchMode == GIN_SEARCH_MODE_EVERYTHING) + ginScanKeyAddHiddenEntry(so, key, GIN_CAT_EMPTY_QUERY); } /* @@ -265,6 +266,7 @@ ginNewScanKey(IndexScanDesc scan) GinScanOpaque so = (GinScanOpaque) scan->opaque; int i; bool hasNullQuery = false; + bool attrHasNormalScan[INDEX_MAX_KEYS] = {false}; MemoryContext oldCtx; /* @@ -371,6 +373,33 @@ ginNewScanKey(IndexScanDesc scan) skey->sk_argument, nQueryValues, queryValues, categories, partial_matches, extra_data); + + /* Remember if we had any non-excludeOnly keys */ + if (searchMode != GIN_SEARCH_MODE_ALL) + attrHasNormalScan[skey->sk_attno - 1] = true; + } + + /* + * Processing GIN_SEARCH_MODE_ALL scan keys requires us to make a second + * pass over the scan keys. Above we marked each such scan key as + * excludeOnly. If the involved column has any normal (not excludeOnly) + * scan key as well, then we can leave it like that. Otherwise, one + * excludeOnly scan key must receive a GIN_CAT_EMPTY_QUERY hidden entry + * and be set to normal (excludeOnly = false). + */ + for (i = 0; i < so->nkeys; i++) + { + GinScanKey key = &so->keys[i]; + + if (key->searchMode != GIN_SEARCH_MODE_ALL) + continue; + + if (!attrHasNormalScan[key->attnum - 1]) + { + key->excludeOnly = false; + ginScanKeyAddHiddenEntry(so, key, GIN_CAT_EMPTY_QUERY); + attrHasNormalScan[key->attnum - 1] = true; + } } /* diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 18d77ac0b7..7c6f0574b3 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -6356,7 +6356,8 @@ spgcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, typedef struct { - bool haveFullScan; + bool attHasFullScan[INDEX_MAX_KEYS]; + bool attHasNormalScan[INDEX_MAX_KEYS]; double partialEntries; double exactEntries; double searchEntries; @@ -6452,16 +6453,21 @@ gincost_pattern(IndexOptInfo *index, int indexcol, counts->searchEntries++; } - if (searchMode == GIN_SEARCH_MODE_INCLUDE_EMPTY) + if (searchMode == GIN_SEARCH_MODE_DEFAULT) + { + counts->attHasNormalScan[indexcol] = true; + } + else if (searchMode == GIN_SEARCH_MODE_INCLUDE_EMPTY) { /* Treat "include empty" like an exact-match item */ + counts->attHasNormalScan[indexcol] = true; counts->exactEntries++; counts->searchEntries++; } - else if (searchMode != GIN_SEARCH_MODE_DEFAULT) + else { /* It's GIN_SEARCH_MODE_ALL */ - counts->haveFullScan = true; + counts->attHasFullScan[indexcol] = true; } return true; @@ -6597,7 +6603,8 @@ gincost_scalararrayopexpr(PlannerInfo *root, /* We ignore array elements that are unsatisfiable patterns */ numPossible++; - if (elemcounts.haveFullScan) + if (elemcounts.attHasFullScan[indexcol] && + !elemcounts.attHasNormalScan[indexcol]) { /* * Full index scan will be required. We treat this as if @@ -6654,6 +6661,7 @@ gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, numEntries; GinQualCounts counts; bool matchPossible; + bool fullIndexScan; double partialScale; double entryPagesFetched, dataPagesFetched, @@ -6665,6 +6673,7 @@ gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Relation indexRel; GinStatsData ginStats; ListCell *lc; + int i; /* * Obtain statistical information from the meta page, if possible. Else @@ -6821,7 +6830,23 @@ gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, return; } - if (counts.haveFullScan || indexQuals == NIL) + /* + * If attribute has a full scan and at the same time doesn't have normal + * scan, then we'll have to scan all non-null entries of that attribute. + * Currently, we don't have per-attribute statistics for GIN. Thus, we + * must assume the whole GIN index has to be scanned in this case. + */ + fullIndexScan = false; + for (i = 0; i < index->nkeycolumns; i++) + { + if (counts.attHasFullScan[i] && !counts.attHasNormalScan[i]) + { + fullIndexScan = true; + break; + } + } + + if (fullIndexScan || indexQuals == NIL) { /* * Full index scan will be required. We treat this as if every key in diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h index a136f7f918..71eeac205c 100644 --- a/src/include/access/gin_private.h +++ b/src/include/access/gin_private.h @@ -303,6 +303,20 @@ typedef struct GinScanKeyData int32 searchMode; OffsetNumber attnum; + /* + * An excludeOnly scan key is not able to enumerate all matching tuples. + * That is, to be semantically correct on its own, it would need to have a + * GIN_CAT_EMPTY_QUERY scanEntry, but it doesn't. Such a key can still be + * used to filter tuples returned by other scan keys, so we will get the + * right answers as long as there's at least one non-excludeOnly scan key + * for each index attribute considered by the search. For efficiency + * reasons we don't want to have unnecessary GIN_CAT_EMPTY_QUERY entries, + * so we will convert an excludeOnly scan key to non-excludeOnly (by + * adding a GIN_CAT_EMPTY_QUERY scanEntry) only if there are no other + * non-excludeOnly scan keys. + */ + bool excludeOnly; + /* * Match status data. curItem is the TID most recently tested (could be a * lossy-page pointer). curItemMatches is true if it passes the diff --git a/src/test/regress/expected/gin.out b/src/test/regress/expected/gin.out index a3911a6c6c..e3c4805c23 100644 --- a/src/test/regress/expected/gin.out +++ b/src/test/regress/expected/gin.out @@ -1,7 +1,7 @@ -- -- Test GIN indexes. -- --- There are other tests to test different GIN opclassed. This is for testing +-- There are other tests to test different GIN opclasses. This is for testing -- GIN itself. -- Create and populate a test table with a GIN index. create table gin_test_tbl(i int4[]) with (autovacuum_enabled = off); @@ -35,3 +35,132 @@ insert into gin_test_tbl select array[1, 2, g] from generate_series(1, 1000) g; insert into gin_test_tbl select array[1, 3, g] from generate_series(1, 1000) g; delete from gin_test_tbl where i @> array[2]; vacuum gin_test_tbl; +-- Test optimization of empty queries +create temp table t_gin_test_tbl(i int4[], j int4[]); +create index on t_gin_test_tbl using gin (i, j); +insert into t_gin_test_tbl +values + (null, null), + ('{}', null), + ('{1}', null), + ('{1,2}', null), + (null, '{}'), + (null, '{10}'), + ('{1,2}', '{10}'), + ('{2}', '{10}'), + ('{1,3}', '{}'), + ('{1,1}', '{10}'); +set enable_seqscan = off; +explain (costs off) +select * from t_gin_test_tbl where array[0] <@ i; + QUERY PLAN +--------------------------------------------------- + Bitmap Heap Scan on t_gin_test_tbl + Recheck Cond: ('{0}'::integer[] <@ i) + -> Bitmap Index Scan on t_gin_test_tbl_i_j_idx + Index Cond: (i @> '{0}'::integer[]) +(4 rows) + +select * from t_gin_test_tbl where array[0] <@ i; + i | j +---+--- +(0 rows) + +select * from t_gin_test_tbl where array[0] <@ i and '{}'::int4[] <@ j; + i | j +---+--- +(0 rows) + +explain (costs off) +select * from t_gin_test_tbl where i @> '{}'; + QUERY PLAN +--------------------------------------------------- + Bitmap Heap Scan on t_gin_test_tbl + Recheck Cond: (i @> '{}'::integer[]) + -> Bitmap Index Scan on t_gin_test_tbl_i_j_idx + Index Cond: (i @> '{}'::integer[]) +(4 rows) + +select * from t_gin_test_tbl where i @> '{}'; + i | j +-------+------ + {} | + {1} | + {1,2} | + {1,2} | {10} + {2} | {10} + {1,3} | {} + {1,1} | {10} +(7 rows) + +create function explain_query_json(query_sql text) +returns table (explain_line json) +language plpgsql as +$$ +begin + set enable_seqscan = off; + set enable_bitmapscan = on; + return query execute 'EXPLAIN (ANALYZE, FORMAT json) ' || query_sql; +end; +$$; +create function execute_text_query_index(query_sql text) +returns setof text +language plpgsql +as +$$ +begin + set enable_seqscan = off; + set enable_bitmapscan = on; + return query execute query_sql; +end; +$$; +create function execute_text_query_heap(query_sql text) +returns setof text +language plpgsql +as +$$ +begin + set enable_seqscan = on; + set enable_bitmapscan = off; + return query execute query_sql; +end; +$$; +-- check number of rows returned by index and removed by recheck +select + query, + js->0->'Plan'->'Plans'->0->'Actual Rows' as "return by index", + js->0->'Plan'->'Rows Removed by Index Recheck' as "removed by recheck", + (res_index = res_heap) as "match" +from + (values + ($$ i @> '{}' $$), + ($$ j @> '{}' $$), + ($$ i @> '{}' and j @> '{}' $$), + ($$ i @> '{1}' $$), + ($$ i @> '{1}' and j @> '{}' $$), + ($$ i @> '{1}' and i @> '{}' and j @> '{}' $$), + ($$ j @> '{10}' $$), + ($$ j @> '{10}' and i @> '{}' $$), + ($$ j @> '{10}' and j @> '{}' and i @> '{}' $$), + ($$ i @> '{1}' and j @> '{10}' $$) + ) q(query), + lateral explain_query_json($$select * from t_gin_test_tbl where $$ || query) js, + lateral execute_text_query_index($$select string_agg((i, j)::text, ' ') from t_gin_test_tbl where $$ || query) res_index, + lateral execute_text_query_heap($$select string_agg((i, j)::text, ' ') from t_gin_test_tbl where $$ || query) res_heap; + query | return by index | removed by recheck | match +-------------------------------------------+-----------------+--------------------+------- + i @> '{}' | 7 | 0 | t + j @> '{}' | 6 | 0 | t + i @> '{}' and j @> '{}' | 4 | 0 | t + i @> '{1}' | 5 | 0 | t + i @> '{1}' and j @> '{}' | 3 | 0 | t + i @> '{1}' and i @> '{}' and j @> '{}' | 3 | 0 | t + j @> '{10}' | 4 | 0 | t + j @> '{10}' and i @> '{}' | 3 | 0 | t + j @> '{10}' and j @> '{}' and i @> '{}' | 3 | 0 | t + i @> '{1}' and j @> '{10}' | 2 | 0 | t +(10 rows) + +reset enable_seqscan; +reset enable_bitmapscan; +drop table t_gin_test_tbl; diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out index 7af289927c..fe1cd9deb0 100644 --- a/src/test/regress/expected/tsearch.out +++ b/src/test/regress/expected/tsearch.out @@ -337,6 +337,41 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme'; 508 (1 row) +-- Test optimization of non-empty GIN_SEARCH_MODE_ALL queries +EXPLAIN (COSTS OFF) +SELECT count(*) FROM test_tsvector WHERE a @@ '!qh'; + QUERY PLAN +----------------------------------------------------- + Aggregate + -> Bitmap Heap Scan on test_tsvector + Recheck Cond: (a @@ '!''qh'''::tsquery) + -> Bitmap Index Scan on wowidx + Index Cond: (a @@ '!''qh'''::tsquery) +(5 rows) + +SELECT count(*) FROM test_tsvector WHERE a @@ '!qh'; + count +------- + 410 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM test_tsvector WHERE a @@ 'wr' AND a @@ '!qh'; + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate + -> Bitmap Heap Scan on test_tsvector + Recheck Cond: ((a @@ '''wr'''::tsquery) AND (a @@ '!''qh'''::tsquery)) + -> Bitmap Index Scan on wowidx + Index Cond: ((a @@ '''wr'''::tsquery) AND (a @@ '!''qh'''::tsquery)) +(5 rows) + +SELECT count(*) FROM test_tsvector WHERE a @@ 'wr' AND a @@ '!qh'; + count +------- + 60 +(1 row) + RESET enable_seqscan; INSERT INTO test_tsvector VALUES ('???', 'DFG:1A,2B,6C,10 FGH'); SELECT * FROM ts_stat('SELECT a FROM test_tsvector') ORDER BY ndoc DESC, nentry DESC, word LIMIT 10; diff --git a/src/test/regress/sql/gin.sql b/src/test/regress/sql/gin.sql index c566e9b58c..836717c996 100644 --- a/src/test/regress/sql/gin.sql +++ b/src/test/regress/sql/gin.sql @@ -1,7 +1,7 @@ -- -- Test GIN indexes. -- --- There are other tests to test different GIN opclassed. This is for testing +-- There are other tests to test different GIN opclasses. This is for testing -- GIN itself. -- Create and populate a test table with a GIN index. @@ -34,3 +34,92 @@ insert into gin_test_tbl select array[1, 3, g] from generate_series(1, 1000) g; delete from gin_test_tbl where i @> array[2]; vacuum gin_test_tbl; + +-- Test optimization of empty queries +create temp table t_gin_test_tbl(i int4[], j int4[]); +create index on t_gin_test_tbl using gin (i, j); +insert into t_gin_test_tbl +values + (null, null), + ('{}', null), + ('{1}', null), + ('{1,2}', null), + (null, '{}'), + (null, '{10}'), + ('{1,2}', '{10}'), + ('{2}', '{10}'), + ('{1,3}', '{}'), + ('{1,1}', '{10}'); + +set enable_seqscan = off; +explain (costs off) +select * from t_gin_test_tbl where array[0] <@ i; +select * from t_gin_test_tbl where array[0] <@ i; +select * from t_gin_test_tbl where array[0] <@ i and '{}'::int4[] <@ j; + +explain (costs off) +select * from t_gin_test_tbl where i @> '{}'; +select * from t_gin_test_tbl where i @> '{}'; + +create function explain_query_json(query_sql text) +returns table (explain_line json) +language plpgsql as +$$ +begin + set enable_seqscan = off; + set enable_bitmapscan = on; + return query execute 'EXPLAIN (ANALYZE, FORMAT json) ' || query_sql; +end; +$$; + +create function execute_text_query_index(query_sql text) +returns setof text +language plpgsql +as +$$ +begin + set enable_seqscan = off; + set enable_bitmapscan = on; + return query execute query_sql; +end; +$$; + +create function execute_text_query_heap(query_sql text) +returns setof text +language plpgsql +as +$$ +begin + set enable_seqscan = on; + set enable_bitmapscan = off; + return query execute query_sql; +end; +$$; + +-- check number of rows returned by index and removed by recheck +select + query, + js->0->'Plan'->'Plans'->0->'Actual Rows' as "return by index", + js->0->'Plan'->'Rows Removed by Index Recheck' as "removed by recheck", + (res_index = res_heap) as "match" +from + (values + ($$ i @> '{}' $$), + ($$ j @> '{}' $$), + ($$ i @> '{}' and j @> '{}' $$), + ($$ i @> '{1}' $$), + ($$ i @> '{1}' and j @> '{}' $$), + ($$ i @> '{1}' and i @> '{}' and j @> '{}' $$), + ($$ j @> '{10}' $$), + ($$ j @> '{10}' and i @> '{}' $$), + ($$ j @> '{10}' and j @> '{}' and i @> '{}' $$), + ($$ i @> '{1}' and j @> '{10}' $$) + ) q(query), + lateral explain_query_json($$select * from t_gin_test_tbl where $$ || query) js, + lateral execute_text_query_index($$select string_agg((i, j)::text, ' ') from t_gin_test_tbl where $$ || query) res_index, + lateral execute_text_query_heap($$select string_agg((i, j)::text, ' ') from t_gin_test_tbl where $$ || query) res_heap; + +reset enable_seqscan; +reset enable_bitmapscan; + +drop table t_gin_test_tbl; diff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql index ece80b983c..14da7edd84 100644 --- a/src/test/regress/sql/tsearch.sql +++ b/src/test/regress/sql/tsearch.sql @@ -111,6 +111,15 @@ SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}'); SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme'; SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme'; +-- Test optimization of non-empty GIN_SEARCH_MODE_ALL queries +EXPLAIN (COSTS OFF) +SELECT count(*) FROM test_tsvector WHERE a @@ '!qh'; +SELECT count(*) FROM test_tsvector WHERE a @@ '!qh'; + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM test_tsvector WHERE a @@ 'wr' AND a @@ '!qh'; +SELECT count(*) FROM test_tsvector WHERE a @@ 'wr' AND a @@ '!qh'; + RESET enable_seqscan; INSERT INTO test_tsvector VALUES ('???', 'DFG:1A,2B,6C,10 FGH');