Add reusable routine for making arrays unique.

Introduce qunique() and qunique_arg(), which can be used after qsort()
and qsort_arg() respectively to remove duplicate values.  Use it where
appropriate.

Author: Thomas Munro
Reviewed-by: Tom Lane (in an earlier version)
Discussion: https://postgr.es/m/CAEepm%3D2vmFTNpAmwbGGD2WaryM6T3hSDVKQPfUwjdD_5XY6vAA%40mail.gmail.com
This commit is contained in:
Thomas Munro 2019-11-07 16:51:04 +13:00
parent 3feb6ace7c
commit 7815e7efdb
13 changed files with 115 additions and 208 deletions

View File

@ -322,6 +322,11 @@ hstoreUniquePairs(Pairs *a, int32 l, int32 *buflen)
}
qsort((void *) a, l, sizeof(Pairs), comparePairs);
/*
* We can't use qunique here because we have some clean-up code to run on
* removed elements.
*/
ptr = a + 1;
res = a;
while (ptr - a < l)

View File

@ -7,6 +7,7 @@
#include "_int.h"
#include "catalog/pg_type.h"
#include "lib/qunique.h"
/* arguments are assumed sorted & unique-ified */
bool
@ -308,23 +309,13 @@ internal_size(int *a, int len)
ArrayType *
_int_unique(ArrayType *r)
{
int *tmp,
*dr,
*data;
int num = ARRNELEMS(r);
bool duplicates_found; /* not used */
if (num < 2)
return r;
num = qunique_arg(ARRPTR(r), num, sizeof(int), isort_cmp,
&duplicates_found);
data = tmp = dr = ARRPTR(r);
while (tmp - data < num)
{
if (*tmp != *dr)
*(++dr) = *tmp++;
else
tmp++;
}
return resize_intArrayType(r, dr + 1 - ARRPTR(r));
return resize_intArrayType(r, num);
}
void

View File

@ -6,6 +6,7 @@
#include <ctype.h>
#include "catalog/pg_type.h"
#include "lib/qunique.h"
#include "trgm.h"
#include "tsearch/ts_locale.h"
#include "utils/lsyscache.h"
@ -162,26 +163,6 @@ comp_trgm(const void *a, const void *b)
return CMPTRGM(a, b);
}
static int
unique_array(trgm *a, int len)
{
trgm *curend,
*tmp;
curend = tmp = a;
while (tmp - a < len)
if (CMPTRGM(tmp, curend))
{
curend++;
CPTRGM(curend, tmp);
tmp++;
}
else
tmp++;
return curend + 1 - a;
}
/*
* Finds first word in string, returns pointer to the word,
* endword points to the character after word
@ -394,7 +375,7 @@ generate_trgm(char *str, int slen)
if (len > 1)
{
qsort((void *) GETARR(trg), len, sizeof(trgm), comp_trgm);
len = unique_array(GETARR(trg), len);
len = qunique(GETARR(trg), len, sizeof(trgm), comp_trgm);
}
SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len));
@ -942,7 +923,7 @@ generate_wildcard_trgm(const char *str, int slen)
if (len > 1)
{
qsort((void *) GETARR(trg), len, sizeof(trgm), comp_trgm);
len = unique_array(GETARR(trg), len);
len = qunique(GETARR(trg), len, sizeof(trgm), comp_trgm);
}
SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len));

View File

@ -21,6 +21,7 @@
#include "access/reloptions.h"
#include "access/relscan.h"
#include "commands/progress.h"
#include "lib/qunique.h"
#include "miscadmin.h"
#include "utils/array.h"
#include "utils/datum.h"
@ -435,8 +436,6 @@ _bt_sort_array_elements(IndexScanDesc scan, ScanKey skey,
Oid elemtype;
RegProcedure cmp_proc;
BTSortArrayContext cxt;
int last_non_dup;
int i;
if (nelems <= 1)
return nelems; /* no work to do */
@ -475,20 +474,8 @@ _bt_sort_array_elements(IndexScanDesc scan, ScanKey skey,
_bt_compare_array_elements, (void *) &cxt);
/* Now scan the sorted elements and remove duplicates */
last_non_dup = 0;
for (i = 1; i < nelems; i++)
{
int32 compare;
compare = DatumGetInt32(FunctionCall2Coll(&cxt.flinfo,
cxt.collation,
elems[last_non_dup],
elems[i]));
if (compare != 0)
elems[++last_non_dup] = elems[i];
}
return last_non_dup + 1;
return qunique_arg(elems, nelems, sizeof(Datum),
_bt_compare_array_elements, &cxt);
}
/*

View File

@ -27,6 +27,7 @@
#include "catalog/pg_type.h"
#include "executor/execdebug.h"
#include "executor/nodeTidscan.h"
#include "lib/qunique.h"
#include "miscadmin.h"
#include "nodes/nodeFuncs.h"
#include "storage/bufmgr.h"
@ -260,21 +261,13 @@ TidListEval(TidScanState *tidstate)
*/
if (numTids > 1)
{
int lastTid;
int i;
/* CurrentOfExpr could never appear OR'd with something else */
Assert(!tidstate->tss_isCurrentOf);
qsort((void *) tidList, numTids, sizeof(ItemPointerData),
itemptr_comparator);
lastTid = 0;
for (i = 1; i < numTids; i++)
{
if (!ItemPointerEquals(&tidList[lastTid], &tidList[i]))
tidList[++lastTid] = tidList[i];
}
numTids = lastTid + 1;
numTids = qunique(tidList, numTids, sizeof(ItemPointerData),
itemptr_comparator);
}
tidstate->tss_TidList = tidList;

View File

@ -28,6 +28,7 @@
#include "commands/tablespace.h"
#include "foreign/foreign.h"
#include "funcapi.h"
#include "lib/qunique.h"
#include "miscadmin.h"
#include "utils/acl.h"
#include "utils/array.h"
@ -1475,8 +1476,7 @@ aclmembers(const Acl *acl, Oid **roleids)
Oid *list;
const AclItem *acldat;
int i,
j,
k;
j;
if (acl == NULL || ACL_NUM(acl) == 0)
{
@ -1508,21 +1508,14 @@ aclmembers(const Acl *acl, Oid **roleids)
/* Sort the array */
qsort(list, j, sizeof(Oid), oid_cmp);
/* Remove duplicates from the array */
k = 0;
for (i = 1; i < j; i++)
{
if (list[k] != list[i])
list[++k] = list[i];
}
/*
* We could repalloc the array down to minimum size, but it's hardly worth
* it since it's only transient memory.
*/
*roleids = list;
return k + 1;
/* Remove duplicates from the array */
return qunique(list, j, sizeof(Oid), oid_cmp);
}

View File

@ -16,6 +16,7 @@
#include "access/gist.h"
#include "access/heaptoast.h"
#include "lib/qunique.h"
#include "port/pg_bitutils.h"
#include "tsearch/ts_utils.h"
#include "utils/builtins.h"
@ -122,31 +123,6 @@ compareint(const void *va, const void *vb)
return (a > b) ? 1 : -1;
}
/*
* Removes duplicates from an array of int32. 'l' is
* size of the input array. Returns the new size of the array.
*/
static int
uniqueint(int32 *a, int32 l)
{
int32 *ptr,
*res;
if (l <= 1)
return l;
ptr = res = a;
qsort((void *) a, l, sizeof(int32), compareint);
while (ptr - a < l)
if (*ptr != *res)
*(++res) = *ptr++;
else
ptr++;
return res + 1 - a;
}
static void
makesign(BITVECP sign, SignTSVector *a)
{
@ -193,7 +169,8 @@ gtsvector_compress(PG_FUNCTION_ARGS)
ptr++;
}
len = uniqueint(GETARR(res), val->size);
qsort(GETARR(res), val->size, sizeof(int), compareint);
len = qunique(GETARR(res), val->size, sizeof(int), compareint);
if (len != val->size)
{
/*

View File

@ -14,6 +14,7 @@
#include "postgres.h"
#include "lib/qunique.h"
#include "tsearch/ts_utils.h"
#include "utils/builtins.h"
@ -302,29 +303,6 @@ cmp_string(const void *a, const void *b)
return strcmp(sa, sb);
}
static int
remove_duplicates(char **strings, int n)
{
if (n <= 1)
return n;
else
{
int i;
char *prev = strings[0];
int new_n = 1;
for (i = 1; i < n; i++)
{
if (strcmp(strings[i], prev) != 0)
{
strings[new_n++] = strings[i];
prev = strings[i];
}
}
return new_n;
}
}
Datum
tsq_mcontains(PG_FUNCTION_ARGS)
{
@ -342,9 +320,10 @@ tsq_mcontains(PG_FUNCTION_ARGS)
/* Sort and remove duplicates from both arrays */
qsort(query_values, query_nvalues, sizeof(char *), cmp_string);
query_nvalues = remove_duplicates(query_values, query_nvalues);
query_nvalues = qunique(query_values, query_nvalues, sizeof(char *),
cmp_string);
qsort(ex_values, ex_nvalues, sizeof(char *), cmp_string);
ex_nvalues = remove_duplicates(ex_values, ex_nvalues);
ex_nvalues = qunique(ex_values, ex_nvalues, sizeof(char *), cmp_string);
if (ex_nvalues > query_nvalues)
result = false;

View File

@ -41,8 +41,9 @@ compareWordEntryPos(const void *a, const void *b)
}
/*
* Removes duplicate pos entries. If there's two entries with same pos
* but different weight, the higher weight is retained.
* Removes duplicate pos entries. If there's two entries with same pos but
* different weight, the higher weight is retained, so we can't use
* qunique here.
*
* Returns new length.
*/

View File

@ -21,6 +21,7 @@
#include "commands/trigger.h"
#include "executor/spi.h"
#include "funcapi.h"
#include "lib/qunique.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "parser/parse_coerce.h"
@ -475,16 +476,9 @@ tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
*/
if (indices_count > 1)
{
int kp;
qsort(indices_to_delete, indices_count, sizeof(int), compare_int);
kp = 0;
for (k = 1; k < indices_count; k++)
{
if (indices_to_delete[k] != indices_to_delete[kp])
indices_to_delete[++kp] = indices_to_delete[k];
}
indices_count = ++kp;
indices_count = qunique(indices_to_delete, indices_count, sizeof(int),
compare_int);
}
/*
@ -761,7 +755,6 @@ array_to_tsvector(PG_FUNCTION_ARGS)
bool *nulls;
int nitems,
i,
j,
tslen,
datalen = 0;
char *cur;
@ -781,13 +774,8 @@ array_to_tsvector(PG_FUNCTION_ARGS)
if (nitems > 1)
{
qsort(dlexemes, nitems, sizeof(Datum), compare_text_lexemes);
j = 0;
for (i = 1; i < nitems; i++)
{
if (compare_text_lexemes(&dlexemes[j], &dlexemes[i]) < 0)
dlexemes[++j] = dlexemes[i];
}
nitems = ++j;
nitems = qunique(dlexemes, nitems, sizeof(Datum),
compare_text_lexemes);
}
/* Calculate space needed for surviving lexemes. */
@ -1270,39 +1258,6 @@ checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
return result;
}
/*
* Removes duplicate pos entries. We can't use uniquePos() from
* tsvector.c because array might be longer than MAXENTRYPOS
*
* Returns new length.
*/
static int
uniqueLongPos(WordEntryPos *pos, int npos)
{
WordEntryPos *pos_iter,
*result;
if (npos <= 1)
return npos;
qsort((void *) pos, npos, sizeof(WordEntryPos), compareWordEntryPos);
result = pos;
pos_iter = pos + 1;
while (pos_iter < pos + npos)
{
if (WEP_GETPOS(*pos_iter) != WEP_GETPOS(*result))
{
result++;
*result = WEP_GETPOS(*pos_iter);
}
pos_iter++;
}
return result + 1 - pos;
}
/*
* is there value 'val' in array or not ?
*/
@ -1397,7 +1352,9 @@ checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
{
/* Sort and make unique array of found positions */
data->pos = allpos;
data->npos = uniqueLongPos(allpos, npos);
qsort(data->pos, npos, sizeof(WordEntryPos), compareWordEntryPos);
data->npos = qunique(data->pos, npos, sizeof(WordEntryPos),
compareWordEntryPos);
data->allocated = true;
}
}

View File

@ -27,6 +27,7 @@
#include "access/xlog.h"
#include "funcapi.h"
#include "miscadmin.h"
#include "lib/qunique.h"
#include "libpq/pqformat.h"
#include "postmaster/postmaster.h"
#include "storage/lwlock.h"
@ -213,26 +214,10 @@ cmp_txid(const void *aa, const void *bb)
static void
sort_snapshot(TxidSnapshot *snap)
{
txid last = 0;
int nxip,
idx1,
idx2;
if (snap->nxip > 1)
{
qsort(snap->xip, snap->nxip, sizeof(txid), cmp_txid);
/* remove duplicates */
nxip = snap->nxip;
idx1 = idx2 = 0;
while (idx1 < nxip)
{
if (snap->xip[idx1] != last)
last = snap->xip[idx2++] = snap->xip[idx1];
else
snap->nxip--;
idx1++;
}
snap->nxip = qunique(snap->xip, snap->nxip, sizeof(txid), cmp_txid);
}
}

View File

@ -74,6 +74,7 @@
#include "catalog/pg_ts_template.h"
#include "catalog/pg_type.h"
#include "catalog/pg_user_mapping.h"
#include "lib/qunique.h"
#include "utils/rel.h"
#include "utils/catcache.h"
#include "utils/syscache.h"
@ -1010,8 +1011,6 @@ void
InitCatalogCache(void)
{
int cacheId;
int i,
j;
StaticAssertStmt(SysCacheSize == (int) lengthof(cacheinfo),
"SysCacheSize does not match syscache.c's array");
@ -1048,21 +1047,15 @@ InitCatalogCache(void)
/* Sort and de-dup OID arrays, so we can use binary search. */
pg_qsort(SysCacheRelationOid, SysCacheRelationOidSize,
sizeof(Oid), oid_compare);
for (i = 1, j = 0; i < SysCacheRelationOidSize; i++)
{
if (SysCacheRelationOid[i] != SysCacheRelationOid[j])
SysCacheRelationOid[++j] = SysCacheRelationOid[i];
}
SysCacheRelationOidSize = j + 1;
SysCacheRelationOidSize =
qunique(SysCacheRelationOid, SysCacheRelationOidSize, sizeof(Oid),
oid_compare);
pg_qsort(SysCacheSupportingRelOid, SysCacheSupportingRelOidSize,
sizeof(Oid), oid_compare);
for (i = 1, j = 0; i < SysCacheSupportingRelOidSize; i++)
{
if (SysCacheSupportingRelOid[i] != SysCacheSupportingRelOid[j])
SysCacheSupportingRelOid[++j] = SysCacheSupportingRelOid[i];
}
SysCacheSupportingRelOidSize = j + 1;
SysCacheSupportingRelOidSize =
qunique(SysCacheSupportingRelOid, SysCacheSupportingRelOidSize,
sizeof(Oid), oid_compare);
CacheInitialized = true;
}

65
src/include/lib/qunique.h Normal file
View File

@ -0,0 +1,65 @@
/*-------------------------------------------------------------------------
*
* qunique.h
* inline array unique functions
* Portions Copyright (c) 2019, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/include/lib/qunique.h
*-------------------------------------------------------------------------
*/
#ifndef QUNIQUE_H
#define QUNIQUE_H
/*
* Remove duplicates from a pre-sorted array, according to a user-supplied
* comparator. Usually the array should have been sorted with qsort() using
* the same arguments. Return the new size.
*/
static inline size_t
qunique(void *array, size_t elements, size_t width,
int (*compare) (const void *, const void *))
{
char *bytes = (char *) array;
size_t i,
j;
if (elements <= 1)
return elements;
for (i = 1, j = 0; i < elements; ++i)
{
if (compare(bytes + i * width, bytes + j * width) != 0)
memcpy(bytes + ++j * width, bytes + i * width, width);
}
return j + 1;
}
/*
* Like qunique(), but takes a comparator with an extra user data argument
* which is passed through, for compatibility with qsort_arg().
*/
static inline size_t
qunique_arg(void *array, size_t elements, size_t width,
int (*compare) (const void *, const void *, void *),
void *arg)
{
char *bytes = (char *) array;
size_t i,
j;
if (elements <= 1)
return elements;
for (i = 1, j = 0; i < elements; ++i)
{
if (compare(bytes + i * width, bytes + j * width, arg) != 0)
memcpy(bytes + ++j * width, bytes + i * width, width);
}
return j + 1;
}
#endif /* QUNIQUE_H */