diff --git a/contrib/pg_trgm/expected/pg_trgm.out b/contrib/pg_trgm/expected/pg_trgm.out index b3e709f496..91596f8645 100644 --- a/contrib/pg_trgm/expected/pg_trgm.out +++ b/contrib/pg_trgm/expected/pg_trgm.out @@ -3498,6 +3498,107 @@ select count(*) from test_trgm where t ~ '[qwerty]{2}-?[qwerty]{2}'; 1000 (1 row) +-- check handling of indexquals that generate no searchable conditions +explain (costs off) +select count(*) from test_trgm where t like '%99%' and t like '%qwerty%'; + QUERY PLAN +----------------------------------------------------------------------------- + Aggregate + -> Bitmap Heap Scan on test_trgm + Recheck Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qwerty%'::text)) + -> Bitmap Index Scan on trgm_idx + Index Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qwerty%'::text)) +(5 rows) + +select count(*) from test_trgm where t like '%99%' and t like '%qwerty%'; + count +------- + 19 +(1 row) + +explain (costs off) +select count(*) from test_trgm where t like '%99%' and t like '%qw%'; + QUERY PLAN +------------------------------------------------------------------------- + Aggregate + -> Bitmap Heap Scan on test_trgm + Recheck Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qw%'::text)) + -> Bitmap Index Scan on trgm_idx + Index Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qw%'::text)) +(5 rows) + +select count(*) from test_trgm where t like '%99%' and t like '%qw%'; + count +------- + 19 +(1 row) + +-- ensure that pending-list items are handled correctly, too +create temp table t_test_trgm(t text COLLATE "C"); +create index t_trgm_idx on t_test_trgm using gin (t gin_trgm_ops); +insert into t_test_trgm values ('qwerty99'), ('qwerty01'); +explain (costs off) +select count(*) from t_test_trgm where t like '%99%' and t like '%qwerty%'; + QUERY PLAN +----------------------------------------------------------------------------- + Aggregate + -> Bitmap Heap Scan on t_test_trgm + Recheck Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qwerty%'::text)) + -> Bitmap Index Scan on t_trgm_idx + Index Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qwerty%'::text)) +(5 rows) + +select count(*) from t_test_trgm where t like '%99%' and t like '%qwerty%'; + count +------- + 1 +(1 row) + +explain (costs off) +select count(*) from t_test_trgm where t like '%99%' and t like '%qw%'; + QUERY PLAN +------------------------------------------------------------------------- + Aggregate + -> Bitmap Heap Scan on t_test_trgm + Recheck Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qw%'::text)) + -> Bitmap Index Scan on t_trgm_idx + Index Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qw%'::text)) +(5 rows) + +select count(*) from t_test_trgm where t like '%99%' and t like '%qw%'; + count +------- + 1 +(1 row) + +-- run the same queries with sequential scan to check the results +set enable_bitmapscan=off; +set enable_seqscan=on; +select count(*) from test_trgm where t like '%99%' and t like '%qwerty%'; + count +------- + 19 +(1 row) + +select count(*) from test_trgm where t like '%99%' and t like '%qw%'; + count +------- + 19 +(1 row) + +select count(*) from t_test_trgm where t like '%99%' and t like '%qwerty%'; + count +------- + 1 +(1 row) + +select count(*) from t_test_trgm where t like '%99%' and t like '%qw%'; + count +------- + 1 +(1 row) + +reset enable_bitmapscan; create table test2(t text COLLATE "C"); insert into test2 values ('abcdef'); insert into test2 values ('quark'); diff --git a/contrib/pg_trgm/sql/pg_trgm.sql b/contrib/pg_trgm/sql/pg_trgm.sql index 08459e64c3..2019d1f6be 100644 --- a/contrib/pg_trgm/sql/pg_trgm.sql +++ b/contrib/pg_trgm/sql/pg_trgm.sql @@ -55,6 +55,33 @@ select t,similarity(t,'gwertyu0988') as sml from test_trgm where t % 'gwertyu098 select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu1988' order by sml desc, t; select count(*) from test_trgm where t ~ '[qwerty]{2}-?[qwerty]{2}'; +-- check handling of indexquals that generate no searchable conditions +explain (costs off) +select count(*) from test_trgm where t like '%99%' and t like '%qwerty%'; +select count(*) from test_trgm where t like '%99%' and t like '%qwerty%'; +explain (costs off) +select count(*) from test_trgm where t like '%99%' and t like '%qw%'; +select count(*) from test_trgm where t like '%99%' and t like '%qw%'; +-- ensure that pending-list items are handled correctly, too +create temp table t_test_trgm(t text COLLATE "C"); +create index t_trgm_idx on t_test_trgm using gin (t gin_trgm_ops); +insert into t_test_trgm values ('qwerty99'), ('qwerty01'); +explain (costs off) +select count(*) from t_test_trgm where t like '%99%' and t like '%qwerty%'; +select count(*) from t_test_trgm where t like '%99%' and t like '%qwerty%'; +explain (costs off) +select count(*) from t_test_trgm where t like '%99%' and t like '%qw%'; +select count(*) from t_test_trgm where t like '%99%' and t like '%qw%'; + +-- run the same queries with sequential scan to check the results +set enable_bitmapscan=off; +set enable_seqscan=on; +select count(*) from test_trgm where t like '%99%' and t like '%qwerty%'; +select count(*) from test_trgm where t like '%99%' and t like '%qw%'; +select count(*) from t_test_trgm where t like '%99%' and t like '%qwerty%'; +select count(*) from t_test_trgm where t like '%99%' and t like '%qw%'; +reset enable_bitmapscan; + create table test2(t text COLLATE "C"); insert into test2 values ('abcdef'); insert into test2 values ('quark'); diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c index 7ae4ef05c2..50fe38b711 100644 --- a/src/backend/access/gin/ginget.c +++ b/src/backend/access/gin/ginget.c @@ -528,8 +528,20 @@ startScanKey(GinState *ginstate, GinScanOpaque so, GinScanKey key) * order, until the consistent function says that none of the remaining * entries can form a match, without any items from the required set. The * rest go to the additional set. + * + * Exclude-only scan keys are known to have no required entries. */ - if (key->nentries > 1) + if (key->excludeOnly) + { + MemoryContextSwitchTo(so->keyCtx); + + key->nrequired = 0; + key->nadditional = key->nentries; + key->additionalEntries = palloc(key->nadditional * sizeof(GinScanEntry)); + for (i = 0; i < key->nadditional; i++) + key->additionalEntries[i] = key->scanEntry[i]; + } + else if (key->nentries > 1) { MemoryContextSwitchTo(so->tempCtx); @@ -1008,37 +1020,52 @@ keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key, minItem = entry->curItem; } - if (allFinished) + if (allFinished && !key->excludeOnly) { /* all entries are finished */ key->isFinished = true; return; } - /* - * Ok, we now know that there are no matches < minItem. - * - * If minItem is lossy, it means that there were no exact items on the - * page among requiredEntries, because lossy pointers sort after exact - * items. However, there might be exact items for the same page among - * additionalEntries, so we mustn't advance past them. - */ - if (ItemPointerIsLossyPage(&minItem)) + if (!key->excludeOnly) { - if (GinItemPointerGetBlockNumber(&advancePast) < - GinItemPointerGetBlockNumber(&minItem)) + /* + * For a normal scan key, we now know there are no matches < minItem. + * + * If minItem is lossy, it means that there were no exact items on the + * page among requiredEntries, because lossy pointers sort after exact + * items. However, there might be exact items for the same page among + * additionalEntries, so we mustn't advance past them. + */ + if (ItemPointerIsLossyPage(&minItem)) { + if (GinItemPointerGetBlockNumber(&advancePast) < + GinItemPointerGetBlockNumber(&minItem)) + { + ItemPointerSet(&advancePast, + GinItemPointerGetBlockNumber(&minItem), + InvalidOffsetNumber); + } + } + else + { + Assert(GinItemPointerGetOffsetNumber(&minItem) > 0); ItemPointerSet(&advancePast, GinItemPointerGetBlockNumber(&minItem), - InvalidOffsetNumber); + OffsetNumberPrev(GinItemPointerGetOffsetNumber(&minItem))); } } else { - Assert(GinItemPointerGetOffsetNumber(&minItem) > 0); - ItemPointerSet(&advancePast, - GinItemPointerGetBlockNumber(&minItem), - OffsetNumberPrev(GinItemPointerGetOffsetNumber(&minItem))); + /* + * excludeOnly scan keys don't have any entries that are necessarily + * present in matching items. So, we consider the item just after + * advancePast. + */ + Assert(key->nrequired == 0); + ItemPointerSet(&minItem, + GinItemPointerGetBlockNumber(&advancePast), + OffsetNumberNext(GinItemPointerGetOffsetNumber(&advancePast))); } /* @@ -1266,6 +1293,20 @@ scanGetItem(IndexScanDesc scan, ItemPointerData advancePast, { GinScanKey key = so->keys + i; + /* + * If we're considering a lossy page, skip excludeOnly keys, They + * can't exclude the whole page anyway. + */ + if (ItemPointerIsLossyPage(item) && key->excludeOnly) + { + /* + * ginNewScanKey() should never mark the first key as + * excludeOnly. + */ + Assert(i > 0); + continue; + } + /* Fetch the next item for this key that is > advancePast. */ keyGetItem(&so->ginstate, so->tempCtx, key, advancePast, scan->xs_snapshot); @@ -1736,11 +1777,14 @@ collectMatchesForHeapRow(IndexScanDesc scan, pendingPosition *pos) } /* - * Now return "true" if all scan keys have at least one matching datum + * All scan keys except excludeOnly require at least one entry to match. + * excludeOnly keys are an exception, because their implied + * GIN_CAT_EMPTY_QUERY scanEntry always matches. So return "true" if all + * non-excludeOnly scan keys have at least one match. */ for (i = 0; i < so->nkeys; i++) { - if (pos->hasMatchKey[i] == false) + if (pos->hasMatchKey[i] == false && !so->keys[i].excludeOnly) return false; } diff --git a/src/backend/access/gin/ginscan.c b/src/backend/access/gin/ginscan.c index c15d06ceba..0a685bdbfc 100644 --- a/src/backend/access/gin/ginscan.c +++ b/src/backend/access/gin/ginscan.c @@ -125,6 +125,27 @@ ginFillScanEntry(GinScanOpaque so, OffsetNumber attnum, return scanEntry; } +/* + * Append hidden scan entry of given category to the scan key. + * + * NB: this had better be called at most once per scan key, since + * ginFillScanKey leaves room for only one hidden entry. Currently, + * it seems sufficiently clear that this is true that we don't bother + * with any cross-check logic. + */ +static void +ginScanKeyAddHiddenEntry(GinScanOpaque so, GinScanKey key, + GinNullCategory queryCategory) +{ + int i = key->nentries++; + + /* strategy is of no interest because this is not a partial-match item */ + key->scanEntry[i] = ginFillScanEntry(so, key->attnum, + InvalidStrategy, key->searchMode, + (Datum) 0, queryCategory, + false, NULL); +} + /* * Initialize the next GinScanKey using the output from the extractQueryFn */ @@ -137,17 +158,16 @@ ginFillScanKey(GinScanOpaque so, OffsetNumber attnum, { GinScanKey key = &(so->keys[so->nkeys++]); GinState *ginstate = &so->ginstate; - uint32 nUserQueryValues = nQueryValues; uint32 i; - /* Non-default search modes add one "hidden" entry to each key */ - if (searchMode != GIN_SEARCH_MODE_DEFAULT) - nQueryValues++; key->nentries = nQueryValues; - key->nuserentries = nUserQueryValues; + key->nuserentries = nQueryValues; - key->scanEntry = (GinScanEntry *) palloc(sizeof(GinScanEntry) * nQueryValues); - key->entryRes = (GinTernaryValue *) palloc0(sizeof(GinTernaryValue) * nQueryValues); + /* Allocate one extra array slot for possible "hidden" entry */ + key->scanEntry = (GinScanEntry *) palloc(sizeof(GinScanEntry) * + (nQueryValues + 1)); + key->entryRes = (GinTernaryValue *) palloc0(sizeof(GinTernaryValue) * + (nQueryValues + 1)); key->query = query; key->queryValues = queryValues; @@ -157,6 +177,12 @@ ginFillScanKey(GinScanOpaque so, OffsetNumber attnum, key->searchMode = searchMode; key->attnum = attnum; + /* + * Initially, scan keys of GIN_SEARCH_MODE_ALL mode are marked + * excludeOnly. This might get changed later. + */ + key->excludeOnly = (searchMode == GIN_SEARCH_MODE_ALL); + ItemPointerSetMin(&key->curItem); key->curItemMatches = false; key->recheckCurItem = false; @@ -168,6 +194,7 @@ ginFillScanKey(GinScanOpaque so, OffsetNumber attnum, ginInitConsistentFunction(ginstate, key); + /* Set up normal scan entries using extractQueryFn's outputs */ for (i = 0; i < nQueryValues; i++) { Datum queryKey; @@ -175,54 +202,28 @@ ginFillScanKey(GinScanOpaque so, OffsetNumber attnum, bool isPartialMatch; Pointer this_extra; - if (i < nUserQueryValues) - { - /* set up normal entry using extractQueryFn's outputs */ - queryKey = queryValues[i]; - queryCategory = queryCategories[i]; - isPartialMatch = - (ginstate->canPartialMatch[attnum - 1] && partial_matches) - ? partial_matches[i] : false; - this_extra = (extra_data) ? extra_data[i] : NULL; - } - else - { - /* set up hidden entry */ - queryKey = (Datum) 0; - switch (searchMode) - { - case GIN_SEARCH_MODE_INCLUDE_EMPTY: - queryCategory = GIN_CAT_EMPTY_ITEM; - break; - case GIN_SEARCH_MODE_ALL: - queryCategory = GIN_CAT_EMPTY_QUERY; - break; - case GIN_SEARCH_MODE_EVERYTHING: - queryCategory = GIN_CAT_EMPTY_QUERY; - break; - default: - elog(ERROR, "unexpected searchMode: %d", searchMode); - queryCategory = 0; /* keep compiler quiet */ - break; - } - isPartialMatch = false; - this_extra = NULL; - - /* - * We set the strategy to a fixed value so that ginFillScanEntry - * can combine these entries for different scan keys. This is - * safe because the strategy value in the entry struct is only - * used for partial-match cases. It's OK to overwrite our local - * variable here because this is the last loop iteration. - */ - strategy = InvalidStrategy; - } + queryKey = queryValues[i]; + queryCategory = queryCategories[i]; + isPartialMatch = + (ginstate->canPartialMatch[attnum - 1] && partial_matches) + ? partial_matches[i] : false; + this_extra = (extra_data) ? extra_data[i] : NULL; key->scanEntry[i] = ginFillScanEntry(so, attnum, strategy, searchMode, queryKey, queryCategory, isPartialMatch, this_extra); } + + /* + * For GIN_SEARCH_MODE_INCLUDE_EMPTY and GIN_SEARCH_MODE_EVERYTHING search + * modes, we add the "hidden" entry immediately. GIN_SEARCH_MODE_ALL is + * handled later, since we might be able to omit the hidden entry for it. + */ + if (searchMode == GIN_SEARCH_MODE_INCLUDE_EMPTY) + ginScanKeyAddHiddenEntry(so, key, GIN_CAT_EMPTY_ITEM); + else if (searchMode == GIN_SEARCH_MODE_EVERYTHING) + ginScanKeyAddHiddenEntry(so, key, GIN_CAT_EMPTY_QUERY); } /* @@ -265,6 +266,7 @@ ginNewScanKey(IndexScanDesc scan) GinScanOpaque so = (GinScanOpaque) scan->opaque; int i; bool hasNullQuery = false; + bool attrHasNormalScan[INDEX_MAX_KEYS] = {false}; MemoryContext oldCtx; /* @@ -371,6 +373,33 @@ ginNewScanKey(IndexScanDesc scan) skey->sk_argument, nQueryValues, queryValues, categories, partial_matches, extra_data); + + /* Remember if we had any non-excludeOnly keys */ + if (searchMode != GIN_SEARCH_MODE_ALL) + attrHasNormalScan[skey->sk_attno - 1] = true; + } + + /* + * Processing GIN_SEARCH_MODE_ALL scan keys requires us to make a second + * pass over the scan keys. Above we marked each such scan key as + * excludeOnly. If the involved column has any normal (not excludeOnly) + * scan key as well, then we can leave it like that. Otherwise, one + * excludeOnly scan key must receive a GIN_CAT_EMPTY_QUERY hidden entry + * and be set to normal (excludeOnly = false). + */ + for (i = 0; i < so->nkeys; i++) + { + GinScanKey key = &so->keys[i]; + + if (key->searchMode != GIN_SEARCH_MODE_ALL) + continue; + + if (!attrHasNormalScan[key->attnum - 1]) + { + key->excludeOnly = false; + ginScanKeyAddHiddenEntry(so, key, GIN_CAT_EMPTY_QUERY); + attrHasNormalScan[key->attnum - 1] = true; + } } /* diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 18d77ac0b7..7c6f0574b3 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -6356,7 +6356,8 @@ spgcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, typedef struct { - bool haveFullScan; + bool attHasFullScan[INDEX_MAX_KEYS]; + bool attHasNormalScan[INDEX_MAX_KEYS]; double partialEntries; double exactEntries; double searchEntries; @@ -6452,16 +6453,21 @@ gincost_pattern(IndexOptInfo *index, int indexcol, counts->searchEntries++; } - if (searchMode == GIN_SEARCH_MODE_INCLUDE_EMPTY) + if (searchMode == GIN_SEARCH_MODE_DEFAULT) + { + counts->attHasNormalScan[indexcol] = true; + } + else if (searchMode == GIN_SEARCH_MODE_INCLUDE_EMPTY) { /* Treat "include empty" like an exact-match item */ + counts->attHasNormalScan[indexcol] = true; counts->exactEntries++; counts->searchEntries++; } - else if (searchMode != GIN_SEARCH_MODE_DEFAULT) + else { /* It's GIN_SEARCH_MODE_ALL */ - counts->haveFullScan = true; + counts->attHasFullScan[indexcol] = true; } return true; @@ -6597,7 +6603,8 @@ gincost_scalararrayopexpr(PlannerInfo *root, /* We ignore array elements that are unsatisfiable patterns */ numPossible++; - if (elemcounts.haveFullScan) + if (elemcounts.attHasFullScan[indexcol] && + !elemcounts.attHasNormalScan[indexcol]) { /* * Full index scan will be required. We treat this as if @@ -6654,6 +6661,7 @@ gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, numEntries; GinQualCounts counts; bool matchPossible; + bool fullIndexScan; double partialScale; double entryPagesFetched, dataPagesFetched, @@ -6665,6 +6673,7 @@ gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Relation indexRel; GinStatsData ginStats; ListCell *lc; + int i; /* * Obtain statistical information from the meta page, if possible. Else @@ -6821,7 +6830,23 @@ gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, return; } - if (counts.haveFullScan || indexQuals == NIL) + /* + * If attribute has a full scan and at the same time doesn't have normal + * scan, then we'll have to scan all non-null entries of that attribute. + * Currently, we don't have per-attribute statistics for GIN. Thus, we + * must assume the whole GIN index has to be scanned in this case. + */ + fullIndexScan = false; + for (i = 0; i < index->nkeycolumns; i++) + { + if (counts.attHasFullScan[i] && !counts.attHasNormalScan[i]) + { + fullIndexScan = true; + break; + } + } + + if (fullIndexScan || indexQuals == NIL) { /* * Full index scan will be required. We treat this as if every key in diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h index a136f7f918..71eeac205c 100644 --- a/src/include/access/gin_private.h +++ b/src/include/access/gin_private.h @@ -303,6 +303,20 @@ typedef struct GinScanKeyData int32 searchMode; OffsetNumber attnum; + /* + * An excludeOnly scan key is not able to enumerate all matching tuples. + * That is, to be semantically correct on its own, it would need to have a + * GIN_CAT_EMPTY_QUERY scanEntry, but it doesn't. Such a key can still be + * used to filter tuples returned by other scan keys, so we will get the + * right answers as long as there's at least one non-excludeOnly scan key + * for each index attribute considered by the search. For efficiency + * reasons we don't want to have unnecessary GIN_CAT_EMPTY_QUERY entries, + * so we will convert an excludeOnly scan key to non-excludeOnly (by + * adding a GIN_CAT_EMPTY_QUERY scanEntry) only if there are no other + * non-excludeOnly scan keys. + */ + bool excludeOnly; + /* * Match status data. curItem is the TID most recently tested (could be a * lossy-page pointer). curItemMatches is true if it passes the diff --git a/src/test/regress/expected/gin.out b/src/test/regress/expected/gin.out index a3911a6c6c..e3c4805c23 100644 --- a/src/test/regress/expected/gin.out +++ b/src/test/regress/expected/gin.out @@ -1,7 +1,7 @@ -- -- Test GIN indexes. -- --- There are other tests to test different GIN opclassed. This is for testing +-- There are other tests to test different GIN opclasses. This is for testing -- GIN itself. -- Create and populate a test table with a GIN index. create table gin_test_tbl(i int4[]) with (autovacuum_enabled = off); @@ -35,3 +35,132 @@ insert into gin_test_tbl select array[1, 2, g] from generate_series(1, 1000) g; insert into gin_test_tbl select array[1, 3, g] from generate_series(1, 1000) g; delete from gin_test_tbl where i @> array[2]; vacuum gin_test_tbl; +-- Test optimization of empty queries +create temp table t_gin_test_tbl(i int4[], j int4[]); +create index on t_gin_test_tbl using gin (i, j); +insert into t_gin_test_tbl +values + (null, null), + ('{}', null), + ('{1}', null), + ('{1,2}', null), + (null, '{}'), + (null, '{10}'), + ('{1,2}', '{10}'), + ('{2}', '{10}'), + ('{1,3}', '{}'), + ('{1,1}', '{10}'); +set enable_seqscan = off; +explain (costs off) +select * from t_gin_test_tbl where array[0] <@ i; + QUERY PLAN +--------------------------------------------------- + Bitmap Heap Scan on t_gin_test_tbl + Recheck Cond: ('{0}'::integer[] <@ i) + -> Bitmap Index Scan on t_gin_test_tbl_i_j_idx + Index Cond: (i @> '{0}'::integer[]) +(4 rows) + +select * from t_gin_test_tbl where array[0] <@ i; + i | j +---+--- +(0 rows) + +select * from t_gin_test_tbl where array[0] <@ i and '{}'::int4[] <@ j; + i | j +---+--- +(0 rows) + +explain (costs off) +select * from t_gin_test_tbl where i @> '{}'; + QUERY PLAN +--------------------------------------------------- + Bitmap Heap Scan on t_gin_test_tbl + Recheck Cond: (i @> '{}'::integer[]) + -> Bitmap Index Scan on t_gin_test_tbl_i_j_idx + Index Cond: (i @> '{}'::integer[]) +(4 rows) + +select * from t_gin_test_tbl where i @> '{}'; + i | j +-------+------ + {} | + {1} | + {1,2} | + {1,2} | {10} + {2} | {10} + {1,3} | {} + {1,1} | {10} +(7 rows) + +create function explain_query_json(query_sql text) +returns table (explain_line json) +language plpgsql as +$$ +begin + set enable_seqscan = off; + set enable_bitmapscan = on; + return query execute 'EXPLAIN (ANALYZE, FORMAT json) ' || query_sql; +end; +$$; +create function execute_text_query_index(query_sql text) +returns setof text +language plpgsql +as +$$ +begin + set enable_seqscan = off; + set enable_bitmapscan = on; + return query execute query_sql; +end; +$$; +create function execute_text_query_heap(query_sql text) +returns setof text +language plpgsql +as +$$ +begin + set enable_seqscan = on; + set enable_bitmapscan = off; + return query execute query_sql; +end; +$$; +-- check number of rows returned by index and removed by recheck +select + query, + js->0->'Plan'->'Plans'->0->'Actual Rows' as "return by index", + js->0->'Plan'->'Rows Removed by Index Recheck' as "removed by recheck", + (res_index = res_heap) as "match" +from + (values + ($$ i @> '{}' $$), + ($$ j @> '{}' $$), + ($$ i @> '{}' and j @> '{}' $$), + ($$ i @> '{1}' $$), + ($$ i @> '{1}' and j @> '{}' $$), + ($$ i @> '{1}' and i @> '{}' and j @> '{}' $$), + ($$ j @> '{10}' $$), + ($$ j @> '{10}' and i @> '{}' $$), + ($$ j @> '{10}' and j @> '{}' and i @> '{}' $$), + ($$ i @> '{1}' and j @> '{10}' $$) + ) q(query), + lateral explain_query_json($$select * from t_gin_test_tbl where $$ || query) js, + lateral execute_text_query_index($$select string_agg((i, j)::text, ' ') from t_gin_test_tbl where $$ || query) res_index, + lateral execute_text_query_heap($$select string_agg((i, j)::text, ' ') from t_gin_test_tbl where $$ || query) res_heap; + query | return by index | removed by recheck | match +-------------------------------------------+-----------------+--------------------+------- + i @> '{}' | 7 | 0 | t + j @> '{}' | 6 | 0 | t + i @> '{}' and j @> '{}' | 4 | 0 | t + i @> '{1}' | 5 | 0 | t + i @> '{1}' and j @> '{}' | 3 | 0 | t + i @> '{1}' and i @> '{}' and j @> '{}' | 3 | 0 | t + j @> '{10}' | 4 | 0 | t + j @> '{10}' and i @> '{}' | 3 | 0 | t + j @> '{10}' and j @> '{}' and i @> '{}' | 3 | 0 | t + i @> '{1}' and j @> '{10}' | 2 | 0 | t +(10 rows) + +reset enable_seqscan; +reset enable_bitmapscan; +drop table t_gin_test_tbl; diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out index 7af289927c..fe1cd9deb0 100644 --- a/src/test/regress/expected/tsearch.out +++ b/src/test/regress/expected/tsearch.out @@ -337,6 +337,41 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme'; 508 (1 row) +-- Test optimization of non-empty GIN_SEARCH_MODE_ALL queries +EXPLAIN (COSTS OFF) +SELECT count(*) FROM test_tsvector WHERE a @@ '!qh'; + QUERY PLAN +----------------------------------------------------- + Aggregate + -> Bitmap Heap Scan on test_tsvector + Recheck Cond: (a @@ '!''qh'''::tsquery) + -> Bitmap Index Scan on wowidx + Index Cond: (a @@ '!''qh'''::tsquery) +(5 rows) + +SELECT count(*) FROM test_tsvector WHERE a @@ '!qh'; + count +------- + 410 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM test_tsvector WHERE a @@ 'wr' AND a @@ '!qh'; + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate + -> Bitmap Heap Scan on test_tsvector + Recheck Cond: ((a @@ '''wr'''::tsquery) AND (a @@ '!''qh'''::tsquery)) + -> Bitmap Index Scan on wowidx + Index Cond: ((a @@ '''wr'''::tsquery) AND (a @@ '!''qh'''::tsquery)) +(5 rows) + +SELECT count(*) FROM test_tsvector WHERE a @@ 'wr' AND a @@ '!qh'; + count +------- + 60 +(1 row) + RESET enable_seqscan; INSERT INTO test_tsvector VALUES ('???', 'DFG:1A,2B,6C,10 FGH'); SELECT * FROM ts_stat('SELECT a FROM test_tsvector') ORDER BY ndoc DESC, nentry DESC, word LIMIT 10; diff --git a/src/test/regress/sql/gin.sql b/src/test/regress/sql/gin.sql index c566e9b58c..836717c996 100644 --- a/src/test/regress/sql/gin.sql +++ b/src/test/regress/sql/gin.sql @@ -1,7 +1,7 @@ -- -- Test GIN indexes. -- --- There are other tests to test different GIN opclassed. This is for testing +-- There are other tests to test different GIN opclasses. This is for testing -- GIN itself. -- Create and populate a test table with a GIN index. @@ -34,3 +34,92 @@ insert into gin_test_tbl select array[1, 3, g] from generate_series(1, 1000) g; delete from gin_test_tbl where i @> array[2]; vacuum gin_test_tbl; + +-- Test optimization of empty queries +create temp table t_gin_test_tbl(i int4[], j int4[]); +create index on t_gin_test_tbl using gin (i, j); +insert into t_gin_test_tbl +values + (null, null), + ('{}', null), + ('{1}', null), + ('{1,2}', null), + (null, '{}'), + (null, '{10}'), + ('{1,2}', '{10}'), + ('{2}', '{10}'), + ('{1,3}', '{}'), + ('{1,1}', '{10}'); + +set enable_seqscan = off; +explain (costs off) +select * from t_gin_test_tbl where array[0] <@ i; +select * from t_gin_test_tbl where array[0] <@ i; +select * from t_gin_test_tbl where array[0] <@ i and '{}'::int4[] <@ j; + +explain (costs off) +select * from t_gin_test_tbl where i @> '{}'; +select * from t_gin_test_tbl where i @> '{}'; + +create function explain_query_json(query_sql text) +returns table (explain_line json) +language plpgsql as +$$ +begin + set enable_seqscan = off; + set enable_bitmapscan = on; + return query execute 'EXPLAIN (ANALYZE, FORMAT json) ' || query_sql; +end; +$$; + +create function execute_text_query_index(query_sql text) +returns setof text +language plpgsql +as +$$ +begin + set enable_seqscan = off; + set enable_bitmapscan = on; + return query execute query_sql; +end; +$$; + +create function execute_text_query_heap(query_sql text) +returns setof text +language plpgsql +as +$$ +begin + set enable_seqscan = on; + set enable_bitmapscan = off; + return query execute query_sql; +end; +$$; + +-- check number of rows returned by index and removed by recheck +select + query, + js->0->'Plan'->'Plans'->0->'Actual Rows' as "return by index", + js->0->'Plan'->'Rows Removed by Index Recheck' as "removed by recheck", + (res_index = res_heap) as "match" +from + (values + ($$ i @> '{}' $$), + ($$ j @> '{}' $$), + ($$ i @> '{}' and j @> '{}' $$), + ($$ i @> '{1}' $$), + ($$ i @> '{1}' and j @> '{}' $$), + ($$ i @> '{1}' and i @> '{}' and j @> '{}' $$), + ($$ j @> '{10}' $$), + ($$ j @> '{10}' and i @> '{}' $$), + ($$ j @> '{10}' and j @> '{}' and i @> '{}' $$), + ($$ i @> '{1}' and j @> '{10}' $$) + ) q(query), + lateral explain_query_json($$select * from t_gin_test_tbl where $$ || query) js, + lateral execute_text_query_index($$select string_agg((i, j)::text, ' ') from t_gin_test_tbl where $$ || query) res_index, + lateral execute_text_query_heap($$select string_agg((i, j)::text, ' ') from t_gin_test_tbl where $$ || query) res_heap; + +reset enable_seqscan; +reset enable_bitmapscan; + +drop table t_gin_test_tbl; diff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql index ece80b983c..14da7edd84 100644 --- a/src/test/regress/sql/tsearch.sql +++ b/src/test/regress/sql/tsearch.sql @@ -111,6 +111,15 @@ SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}'); SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme'; SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme'; +-- Test optimization of non-empty GIN_SEARCH_MODE_ALL queries +EXPLAIN (COSTS OFF) +SELECT count(*) FROM test_tsvector WHERE a @@ '!qh'; +SELECT count(*) FROM test_tsvector WHERE a @@ '!qh'; + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM test_tsvector WHERE a @@ 'wr' AND a @@ '!qh'; +SELECT count(*) FROM test_tsvector WHERE a @@ 'wr' AND a @@ '!qh'; + RESET enable_seqscan; INSERT INTO test_tsvector VALUES ('???', 'DFG:1A,2B,6C,10 FGH');