First step in fixing selectivity-estimation code. eqsel and
neqsel now behave as per my suggestions in pghackers a few days ago. selectivity for < > <= >= should work OK for integral types as well, but still need work for nonintegral types. Since these routines have never actually executed before :-(, this may result in some significant changes in the optimizer's choices of execution plans. Let me know if you see any serious misbehavior. CAUTION: THESE CHANGES REQUIRE INITDB. pg_statistic table has changed.
This commit is contained in:
parent
f851c6b07d
commit
44878506d8
|
@ -7,7 +7,7 @@
|
|||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.115 1999/07/19 07:07:20 momjian Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.116 1999/08/01 04:54:24 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
@ -78,7 +78,7 @@ static void vc_vacpage(Page page, VPageDescr vpd);
|
|||
static void vc_vaconeind(VPageList vpl, Relation indrel, int num_tuples, int keep_tuples);
|
||||
static void vc_scanoneind(Relation indrel, int num_tuples);
|
||||
static void vc_attrstats(Relation onerel, VRelStats *vacrelstats, HeapTuple tuple);
|
||||
static void vc_bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int16 *bucket_len);
|
||||
static void vc_bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int *bucket_len);
|
||||
static void vc_updstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats *vacrelstats);
|
||||
static void vc_delhilowstats(Oid relid, int attcnt, int *attnums);
|
||||
static VPageDescr vc_tidreapped(ItemPointer itemptr, VPageList vpl);
|
||||
|
@ -473,9 +473,13 @@ vc_vacone(Oid relid, bool analyze, List *va_cols)
|
|||
{
|
||||
pgopform = (Form_pg_operator) GETSTRUCT(func_operator);
|
||||
fmgr_info(pgopform->oprcode, &(stats->f_cmplt));
|
||||
stats->op_cmplt = oprid(func_operator);
|
||||
}
|
||||
else
|
||||
{
|
||||
stats->f_cmplt.fn_addr = NULL;
|
||||
stats->op_cmplt = InvalidOid;
|
||||
}
|
||||
|
||||
func_operator = oper(">", stats->attr->atttypid, stats->attr->atttypid, true);
|
||||
if (func_operator != NULL)
|
||||
|
@ -2200,8 +2204,8 @@ vc_attrstats(Relation onerel, VRelStats *vacrelstats, HeapTuple tuple)
|
|||
{
|
||||
swapDatum(stats->guess1, stats->guess2);
|
||||
swapInt(stats->guess1_len, stats->guess2_len);
|
||||
stats->guess1_cnt = stats->guess2_hits;
|
||||
swapLong(stats->guess1_hits, stats->guess2_hits);
|
||||
stats->guess1_cnt = stats->guess1_hits;
|
||||
}
|
||||
if (stats->guess1_cnt > stats->best_cnt)
|
||||
{
|
||||
|
@ -2227,7 +2231,7 @@ vc_attrstats(Relation onerel, VRelStats *vacrelstats, HeapTuple tuple)
|
|||
*
|
||||
*/
|
||||
static void
|
||||
vc_bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int16 *bucket_len)
|
||||
vc_bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int *bucket_len)
|
||||
{
|
||||
if (attr->attbyval && attr->attlen != -1)
|
||||
*bucket = value;
|
||||
|
@ -2340,13 +2344,14 @@ vc_updstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats *
|
|||
selratio = 0;
|
||||
else if (VacAttrStatsLtGtValid(stats) && stats->min_cnt + stats->max_cnt == stats->nonnull_cnt)
|
||||
{
|
||||
/* exact result when there are just 1 or 2 values... */
|
||||
double min_cnt_d = stats->min_cnt,
|
||||
max_cnt_d = stats->max_cnt,
|
||||
null_cnt_d = stats->null_cnt,
|
||||
nonnullcnt_d = stats->nonnull_cnt; /* prevent overflow */
|
||||
nonnull_cnt_d = stats->nonnull_cnt; /* prevent overflow */
|
||||
|
||||
selratio = (min_cnt_d * min_cnt_d + max_cnt_d * max_cnt_d + null_cnt_d * null_cnt_d) /
|
||||
(nonnullcnt_d + null_cnt_d) / (nonnullcnt_d + null_cnt_d);
|
||||
(nonnull_cnt_d + null_cnt_d) / (nonnull_cnt_d + null_cnt_d);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -2359,7 +2364,9 @@ vc_updstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats *
|
|||
*/
|
||||
selratio = (most * most + 0.20 * most * (total - most)) / total / total;
|
||||
}
|
||||
if (selratio > 1.0)
|
||||
if (selratio < 0.0)
|
||||
selratio = 0.0;
|
||||
else if (selratio > 1.0)
|
||||
selratio = 1.0;
|
||||
attp->attdisbursion = selratio;
|
||||
|
||||
|
@ -2375,13 +2382,22 @@ vc_updstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats *
|
|||
* doing system relations, especially pg_statistic is a
|
||||
* problem
|
||||
*/
|
||||
if (VacAttrStatsLtGtValid(stats) && stats->initialized /* &&
|
||||
* !IsSystemRelationName(
|
||||
*
|
||||
pgcform->relname.data) */ )
|
||||
if (VacAttrStatsLtGtValid(stats) && stats->initialized
|
||||
/* && !IsSystemRelationName(pgcform->relname.data)
|
||||
*/ )
|
||||
{
|
||||
float32data nullratio;
|
||||
float32data bestratio;
|
||||
FmgrInfo out_function;
|
||||
char *out_string;
|
||||
double best_cnt_d = stats->best_cnt,
|
||||
null_cnt_d = stats->null_cnt,
|
||||
nonnull_cnt_d = stats->nonnull_cnt; /* prevent overflow */
|
||||
|
||||
nullratio = null_cnt_d / (nonnull_cnt_d + null_cnt_d);
|
||||
bestratio = best_cnt_d / (nonnull_cnt_d + null_cnt_d);
|
||||
|
||||
fmgr_info(stats->outfunc, &out_function);
|
||||
|
||||
for (i = 0; i < Natts_pg_statistic; ++i)
|
||||
nulls[i] = ' ';
|
||||
|
@ -2391,26 +2407,34 @@ vc_updstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats *
|
|||
* ----------------
|
||||
*/
|
||||
i = 0;
|
||||
values[i++] = (Datum) relid; /* 1 */
|
||||
values[i++] = (Datum) attp->attnum; /* 2 */
|
||||
values[i++] = (Datum) InvalidOid; /* 3 */
|
||||
fmgr_info(stats->outfunc, &out_function);
|
||||
out_string = (*fmgr_faddr(&out_function)) (stats->min, stats->attr->atttypid);
|
||||
values[i++] = (Datum) fmgr(F_TEXTIN, out_string);
|
||||
values[i++] = (Datum) relid; /* starelid */
|
||||
values[i++] = (Datum) attp->attnum; /* staattnum */
|
||||
values[i++] = (Datum) stats->op_cmplt; /* staop */
|
||||
/* hack: this code knows float4 is pass-by-ref */
|
||||
values[i++] = PointerGetDatum(&nullratio); /* stanullfrac */
|
||||
values[i++] = PointerGetDatum(&bestratio); /* stacommonfrac */
|
||||
out_string = (*fmgr_faddr(&out_function)) (stats->best, stats->attr->atttypid, stats->attr->atttypmod);
|
||||
values[i++] = PointerGetDatum(textin(out_string)); /* stacommonval */
|
||||
pfree(out_string);
|
||||
out_string = (char *) (*fmgr_faddr(&out_function)) (stats->max, stats->attr->atttypid);
|
||||
values[i++] = (Datum) fmgr(F_TEXTIN, out_string);
|
||||
out_string = (*fmgr_faddr(&out_function)) (stats->min, stats->attr->atttypid, stats->attr->atttypmod);
|
||||
values[i++] = PointerGetDatum(textin(out_string)); /* staloval */
|
||||
pfree(out_string);
|
||||
out_string = (char *) (*fmgr_faddr(&out_function)) (stats->max, stats->attr->atttypid, stats->attr->atttypmod);
|
||||
values[i++] = PointerGetDatum(textin(out_string)); /* stahival */
|
||||
pfree(out_string);
|
||||
|
||||
stup = heap_formtuple(sd->rd_att, values, nulls);
|
||||
|
||||
/* ----------------
|
||||
* insert the tuple in the relation and get the tuple's oid.
|
||||
* insert the tuple in the relation.
|
||||
* ----------------
|
||||
*/
|
||||
heap_insert(sd, stup);
|
||||
pfree(DatumGetPointer(values[3]));
|
||||
pfree(DatumGetPointer(values[4]));
|
||||
|
||||
/* release allocated space */
|
||||
pfree(DatumGetPointer(values[Anum_pg_statistic_stacommonval-1]));
|
||||
pfree(DatumGetPointer(values[Anum_pg_statistic_staloval-1]));
|
||||
pfree(DatumGetPointer(values[Anum_pg_statistic_stahival-1]));
|
||||
pfree(stup);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6,13 +6,11 @@
|
|||
* These routines are registered in the operator catalog in the
|
||||
* "oprrest" and "oprjoin" attributes.
|
||||
*
|
||||
* XXX check all the functions--I suspect them to be 1-based.
|
||||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.35 1999/07/17 20:17:59 momjian Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.36 1999/08/01 04:54:22 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
@ -21,7 +19,10 @@
|
|||
|
||||
#include "access/heapam.h"
|
||||
#include "catalog/catname.h"
|
||||
#include "catalog/pg_operator.h"
|
||||
#include "catalog/pg_statistic.h"
|
||||
#include "catalog/pg_type.h"
|
||||
#include "parser/parse_oper.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/lsyscache.h"
|
||||
#include "utils/syscache.h"
|
||||
|
@ -29,24 +30,35 @@
|
|||
/* N is not a valid var/constant or relation id */
|
||||
#define NONVALUE(N) ((N) == -1)
|
||||
|
||||
/*
|
||||
* generalize the test for functional index selectivity request
|
||||
*/
|
||||
#define FunctionalSelectivity(nIndKeys,attNum) (attNum==InvalidAttrNumber)
|
||||
/* are we looking at a functional index selectivity request? */
|
||||
#define FunctionalSelectivity(nIndKeys,attNum) ((attNum)==InvalidAttrNumber)
|
||||
|
||||
static float32data getattdisbursion(Oid relid, AttrNumber attnum);
|
||||
static void gethilokey(Oid relid, AttrNumber attnum, Oid opid,
|
||||
char **high, char **low);
|
||||
/* default selectivity estimate for inequalities such as "A < b" */
|
||||
#define DEFAULT_INEQ_SEL (1.0 / 3.0)
|
||||
|
||||
static void getattproperties(Oid relid, AttrNumber attnum,
|
||||
Oid *typid,
|
||||
int *typlen,
|
||||
bool *typbyval,
|
||||
int32 *typmod);
|
||||
static bool getattstatistics(Oid relid, AttrNumber attnum,
|
||||
Oid typid, int32 typmod,
|
||||
double *nullfrac,
|
||||
double *commonfrac,
|
||||
Datum *commonval,
|
||||
Datum *loval,
|
||||
Datum *hival);
|
||||
static double getattdisbursion(Oid relid, AttrNumber attnum);
|
||||
|
||||
|
||||
/*
|
||||
* eqsel - Selectivity of "=" for any data type.
|
||||
* eqsel - Selectivity of "=" for any data types.
|
||||
*/
|
||||
float64
|
||||
eqsel(Oid opid,
|
||||
Oid relid,
|
||||
AttrNumber attno,
|
||||
char *value,
|
||||
Datum value,
|
||||
int32 flag)
|
||||
{
|
||||
float64 result;
|
||||
|
@ -55,18 +67,124 @@ eqsel(Oid opid,
|
|||
if (NONVALUE(attno) || NONVALUE(relid))
|
||||
*result = 0.1;
|
||||
else
|
||||
*result = (float64data) getattdisbursion(relid, (int) attno);
|
||||
{
|
||||
Oid typid;
|
||||
int typlen;
|
||||
bool typbyval;
|
||||
int32 typmod;
|
||||
double nullfrac;
|
||||
double commonfrac;
|
||||
Datum commonval;
|
||||
double selec;
|
||||
|
||||
/* get info about the attribute */
|
||||
getattproperties(relid, attno,
|
||||
&typid, &typlen, &typbyval, &typmod);
|
||||
|
||||
if (getattstatistics(relid, attno, typid, typmod,
|
||||
&nullfrac, &commonfrac, &commonval,
|
||||
NULL, NULL))
|
||||
{
|
||||
if (flag & SEL_CONSTANT)
|
||||
{
|
||||
/* Is the constant the same as the most common value? */
|
||||
HeapTuple oprtuple;
|
||||
Oid ltype,
|
||||
rtype;
|
||||
Operator func_operator;
|
||||
bool mostcommon = false;
|
||||
|
||||
/* get left and right datatypes of the operator */
|
||||
oprtuple = get_operator_tuple(opid);
|
||||
if (! HeapTupleIsValid(oprtuple))
|
||||
elog(ERROR, "eqsel: no tuple for operator %u", opid);
|
||||
ltype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprleft;
|
||||
rtype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprright;
|
||||
|
||||
/* and find appropriate equality operator (no, it ain't
|
||||
* necessarily opid itself...)
|
||||
*/
|
||||
func_operator = oper("=", ltype, rtype, true);
|
||||
|
||||
if (func_operator != NULL)
|
||||
{
|
||||
RegProcedure eqproc = ((Form_pg_operator) GETSTRUCT(func_operator))->oprcode;
|
||||
if (flag & SEL_RIGHT) /* given value on the right? */
|
||||
mostcommon = (bool)
|
||||
DatumGetUInt8(fmgr(eqproc, commonval, value));
|
||||
else
|
||||
mostcommon = (bool)
|
||||
DatumGetUInt8(fmgr(eqproc, value, commonval));
|
||||
}
|
||||
|
||||
if (mostcommon)
|
||||
{
|
||||
/* Search is for the most common value. We know the
|
||||
* selectivity exactly (or as exactly as VACUUM could
|
||||
* calculate it, anyway).
|
||||
*/
|
||||
selec = commonfrac;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Comparison is against a constant that is neither the
|
||||
* most common value nor null. Its selectivity cannot
|
||||
* be more than this:
|
||||
*/
|
||||
selec = 1.0 - commonfrac - nullfrac;
|
||||
if (selec > commonfrac)
|
||||
selec = commonfrac;
|
||||
/* and in fact it's probably less, so apply a fudge
|
||||
* factor.
|
||||
*/
|
||||
selec *= 0.5;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Search is for a value that we do not know a priori,
|
||||
* but we will assume it is not NULL. Selectivity
|
||||
* cannot be more than this:
|
||||
*/
|
||||
selec = 1.0 - nullfrac;
|
||||
if (selec > commonfrac)
|
||||
selec = commonfrac;
|
||||
/* and in fact it's probably less, so apply a fudge
|
||||
* factor.
|
||||
*/
|
||||
selec *= 0.5;
|
||||
}
|
||||
|
||||
/* result should be in range, but make sure... */
|
||||
if (selec < 0.0)
|
||||
selec = 0.0;
|
||||
else if (selec > 1.0)
|
||||
selec = 1.0;
|
||||
|
||||
if (! typbyval)
|
||||
pfree(DatumGetPointer(commonval));
|
||||
}
|
||||
else
|
||||
{
|
||||
/* No VACUUM ANALYZE stats available, so make a guess using
|
||||
* the disbursion stat (if we have that, which is unlikely...)
|
||||
*/
|
||||
selec = getattdisbursion(relid, attno);
|
||||
}
|
||||
|
||||
*result = (float64data) selec;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* neqsel - Selectivity of "!=" for any data type.
|
||||
* neqsel - Selectivity of "!=" for any data types.
|
||||
*/
|
||||
float64
|
||||
neqsel(Oid opid,
|
||||
Oid relid,
|
||||
AttrNumber attno,
|
||||
char *value,
|
||||
Datum value,
|
||||
int32 flag)
|
||||
{
|
||||
float64 result;
|
||||
|
@ -77,96 +195,164 @@ neqsel(Oid opid,
|
|||
}
|
||||
|
||||
/*
|
||||
* intltsel - Selectivity of "<" for integers.
|
||||
* intltsel - Selectivity of "<" (also "<=") for integers.
|
||||
* Should work for both longs and shorts.
|
||||
*/
|
||||
float64
|
||||
intltsel(Oid opid,
|
||||
Oid relid,
|
||||
AttrNumber attno,
|
||||
int32 value,
|
||||
Datum value,
|
||||
int32 flag)
|
||||
{
|
||||
float64 result;
|
||||
char *highchar,
|
||||
*lowchar;
|
||||
long val,
|
||||
high,
|
||||
low,
|
||||
top,
|
||||
bottom;
|
||||
|
||||
result = (float64) palloc(sizeof(float64data));
|
||||
if (NONVALUE(attno) || NONVALUE(relid))
|
||||
*result = 1.0 / 3;
|
||||
if (! (flag & SEL_CONSTANT) || NONVALUE(attno) || NONVALUE(relid))
|
||||
*result = DEFAULT_INEQ_SEL;
|
||||
else
|
||||
{
|
||||
/* XXX val = atol(value); */
|
||||
val = value;
|
||||
gethilokey(relid, (int) attno, opid, &highchar, &lowchar);
|
||||
if (*highchar == 'n' || *lowchar == 'n')
|
||||
HeapTuple oprtuple;
|
||||
Oid ltype,
|
||||
rtype;
|
||||
Oid typid;
|
||||
int typlen;
|
||||
bool typbyval;
|
||||
int32 typmod;
|
||||
Datum hival,
|
||||
loval;
|
||||
long val,
|
||||
high,
|
||||
low,
|
||||
numerator,
|
||||
denominator;
|
||||
|
||||
/* get left and right datatypes of the operator */
|
||||
oprtuple = get_operator_tuple(opid);
|
||||
if (! HeapTupleIsValid(oprtuple))
|
||||
elog(ERROR, "intltsel: no tuple for operator %u", opid);
|
||||
ltype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprleft;
|
||||
rtype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprright;
|
||||
|
||||
/*
|
||||
* TEMPORARY HACK: this code is currently getting called for
|
||||
* a bunch of non-integral types. Give a default estimate if
|
||||
* either side is not pass-by-val. Need better solution.
|
||||
*/
|
||||
if (! get_typbyval(ltype) || ! get_typbyval(rtype))
|
||||
{
|
||||
*result = 1.0 / 3.0;
|
||||
*result = DEFAULT_INEQ_SEL;
|
||||
return result;
|
||||
}
|
||||
high = atol(highchar);
|
||||
low = atol(lowchar);
|
||||
if ((flag & SEL_RIGHT && val < low) ||
|
||||
(!(flag & SEL_RIGHT) && val > high))
|
||||
{
|
||||
float32data nvals;
|
||||
|
||||
nvals = getattdisbursion(relid, (int) attno);
|
||||
if (nvals == 0)
|
||||
*result = 1.0 / 3.0;
|
||||
else
|
||||
{
|
||||
*result = 3.0 * (float64data) nvals;
|
||||
if (*result > 1.0)
|
||||
*result = 1;
|
||||
}
|
||||
/* Deduce type of the constant, and convert to uniform "long" format.
|
||||
* Note that constant might well be a different type than attribute.
|
||||
* XXX this ought to use a type-specific "convert to double" op.
|
||||
*/
|
||||
typid = (flag & SEL_RIGHT) ? rtype : ltype;
|
||||
switch (get_typlen(typid))
|
||||
{
|
||||
case 1:
|
||||
val = (long) DatumGetUInt8(value);
|
||||
break;
|
||||
case 2:
|
||||
val = (long) DatumGetInt16(value);
|
||||
break;
|
||||
case 4:
|
||||
val = (long) DatumGetInt32(value);
|
||||
break;
|
||||
default:
|
||||
elog(ERROR, "intltsel: unsupported type %u", typid);
|
||||
*result = DEFAULT_INEQ_SEL;
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Now get info about the attribute */
|
||||
getattproperties(relid, attno,
|
||||
&typid, &typlen, &typbyval, &typmod);
|
||||
|
||||
if (! getattstatistics(relid, attno, typid, typmod,
|
||||
NULL, NULL, NULL,
|
||||
&loval, &hival))
|
||||
{
|
||||
*result = DEFAULT_INEQ_SEL;
|
||||
return result;
|
||||
}
|
||||
/*
|
||||
* Convert loval/hival to common "long int" representation.
|
||||
*/
|
||||
switch (typlen)
|
||||
{
|
||||
case 1:
|
||||
low = (long) DatumGetUInt8(loval);
|
||||
high = (long) DatumGetUInt8(hival);
|
||||
break;
|
||||
case 2:
|
||||
low = (long) DatumGetInt16(loval);
|
||||
high = (long) DatumGetInt16(hival);
|
||||
break;
|
||||
case 4:
|
||||
low = (long) DatumGetInt32(loval);
|
||||
high = (long) DatumGetInt32(hival);
|
||||
break;
|
||||
default:
|
||||
elog(ERROR, "intltsel: unsupported type %u", typid);
|
||||
*result = DEFAULT_INEQ_SEL;
|
||||
return result;
|
||||
}
|
||||
if (val < low || val > high)
|
||||
{
|
||||
/* If given value is outside the statistical range,
|
||||
* assume we have out-of-date stats and return a default guess.
|
||||
* We could return a small or large value if we trusted the stats
|
||||
* more. XXX change this eventually.
|
||||
*/
|
||||
*result = DEFAULT_INEQ_SEL;
|
||||
}
|
||||
else
|
||||
{
|
||||
bottom = high - low;
|
||||
if (bottom == 0)
|
||||
++bottom;
|
||||
denominator = high - low;
|
||||
if (denominator <= 0)
|
||||
denominator = 1;
|
||||
if (flag & SEL_RIGHT)
|
||||
top = val - low;
|
||||
numerator = val - low;
|
||||
else
|
||||
top = high - val;
|
||||
if (top > bottom)
|
||||
numerator = high - val;
|
||||
if (numerator <= 0) /* never return a zero estimate! */
|
||||
numerator = 1;
|
||||
if (numerator >= denominator)
|
||||
*result = 1.0;
|
||||
else
|
||||
{
|
||||
if (top == 0)
|
||||
++top;
|
||||
*result = ((1.0 * top) / bottom);
|
||||
}
|
||||
*result = (double) numerator / (double) denominator;
|
||||
}
|
||||
if (! typbyval)
|
||||
{
|
||||
pfree(DatumGetPointer(hival));
|
||||
pfree(DatumGetPointer(loval));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* intgtsel - Selectivity of ">" for integers.
|
||||
* intgtsel - Selectivity of ">" (also ">=") for integers.
|
||||
* Should work for both longs and shorts.
|
||||
*/
|
||||
float64
|
||||
intgtsel(Oid opid,
|
||||
Oid relid,
|
||||
AttrNumber attno,
|
||||
int32 value,
|
||||
Datum value,
|
||||
int32 flag)
|
||||
{
|
||||
float64 result;
|
||||
int notflag;
|
||||
|
||||
if (flag & 0)
|
||||
notflag = flag & ~SEL_RIGHT;
|
||||
else
|
||||
notflag = flag | SEL_RIGHT;
|
||||
result = intltsel(opid, relid, attno, value, (int32) notflag);
|
||||
/* Compute selectivity of "<", then invert --- but only if we
|
||||
* were able to produce a non-default estimate.
|
||||
*/
|
||||
result = intltsel(opid, relid, attno, value, flag);
|
||||
if (*result != DEFAULT_INEQ_SEL)
|
||||
*result = 1.0 - *result;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -181,7 +367,7 @@ eqjoinsel(Oid opid,
|
|||
AttrNumber attno2)
|
||||
{
|
||||
float64 result;
|
||||
float32data num1,
|
||||
float64data num1,
|
||||
num2,
|
||||
max;
|
||||
|
||||
|
@ -191,13 +377,13 @@ eqjoinsel(Oid opid,
|
|||
*result = 0.1;
|
||||
else
|
||||
{
|
||||
num1 = getattdisbursion(relid1, (int) attno1);
|
||||
num2 = getattdisbursion(relid2, (int) attno2);
|
||||
num1 = getattdisbursion(relid1, attno1);
|
||||
num2 = getattdisbursion(relid2, attno2);
|
||||
max = (num1 > num2) ? num1 : num2;
|
||||
if (max == 0)
|
||||
if (max <= 0)
|
||||
*result = 1.0;
|
||||
else
|
||||
*result = (float64data) max;
|
||||
*result = max;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
@ -220,7 +406,7 @@ neqjoinsel(Oid opid,
|
|||
}
|
||||
|
||||
/*
|
||||
* intltjoinsel - Join selectivity of "<"
|
||||
* intltjoinsel - Join selectivity of "<" and "<="
|
||||
*/
|
||||
float64
|
||||
intltjoinsel(Oid opid,
|
||||
|
@ -232,12 +418,12 @@ intltjoinsel(Oid opid,
|
|||
float64 result;
|
||||
|
||||
result = (float64) palloc(sizeof(float64data));
|
||||
*result = 1.0 / 3.0;
|
||||
*result = DEFAULT_INEQ_SEL;
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* intgtjoinsel - Join selectivity of ">"
|
||||
* intgtjoinsel - Join selectivity of ">" and ">="
|
||||
*/
|
||||
float64
|
||||
intgtjoinsel(Oid opid,
|
||||
|
@ -249,29 +435,188 @@ intgtjoinsel(Oid opid,
|
|||
float64 result;
|
||||
|
||||
result = (float64) palloc(sizeof(float64data));
|
||||
*result = 1.0 / 3.0;
|
||||
*result = DEFAULT_INEQ_SEL;
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* getattdisbursion - Retrieves the number of values within an attribute.
|
||||
*
|
||||
* Note:
|
||||
* getattdisbursion and gethilokey both currently use keyed
|
||||
* relation scans and amgetattr. Alternatively,
|
||||
* the relation scan could be non-keyed and the tuple
|
||||
* returned could be cast (struct X *) tuple + tuple->t_hoff.
|
||||
* The first method is good for testing the implementation,
|
||||
* but the second may ultimately be faster?!? In any case,
|
||||
* using the cast instead of amgetattr would be
|
||||
* more efficient. However, the cast will not work
|
||||
* for gethilokey which accesses stahikey in struct statistic.
|
||||
* getattproperties
|
||||
* Retrieve pg_attribute properties for an attribute,
|
||||
* including type OID, type len, type byval flag, typmod.
|
||||
*/
|
||||
static float32data
|
||||
static void
|
||||
getattproperties(Oid relid, AttrNumber attnum,
|
||||
Oid *typid, int *typlen, bool *typbyval, int32 *typmod)
|
||||
{
|
||||
HeapTuple atp;
|
||||
Form_pg_attribute att_tup;
|
||||
|
||||
atp = SearchSysCacheTuple(ATTNUM,
|
||||
ObjectIdGetDatum(relid),
|
||||
Int16GetDatum(attnum),
|
||||
0, 0);
|
||||
if (! HeapTupleIsValid(atp))
|
||||
elog(ERROR, "getattproperties: no attribute tuple %u %d",
|
||||
relid, (int) attnum);
|
||||
att_tup = (Form_pg_attribute) GETSTRUCT(atp);
|
||||
|
||||
*typid = att_tup->atttypid;
|
||||
*typlen = att_tup->attlen;
|
||||
*typbyval = att_tup->attbyval;
|
||||
*typmod = att_tup->atttypmod;
|
||||
}
|
||||
|
||||
/*
|
||||
* getattstatistics
|
||||
* Retrieve the pg_statistic data for an attribute.
|
||||
* Returns 'false' if no stats are available.
|
||||
*
|
||||
* Inputs:
|
||||
* 'relid' and 'attnum' are the relation and attribute number.
|
||||
* 'typid' and 'typmod' are the type and typmod of the column,
|
||||
* which the caller must already have looked up.
|
||||
*
|
||||
* Outputs:
|
||||
* The available stats are nullfrac, commonfrac, commonval, loval, hival.
|
||||
* The caller need not retrieve all five --- pass NULL pointers for the
|
||||
* unwanted values.
|
||||
*
|
||||
* commonval, loval, hival are returned as Datums holding the internal
|
||||
* representation of the values. (Note that these should be pfree'd
|
||||
* after use if the data type is not by-value.)
|
||||
*
|
||||
* XXX currently, this does a linear search of pg_statistic because there
|
||||
* is no index nor syscache for pg_statistic. FIX THIS!
|
||||
*/
|
||||
static bool
|
||||
getattstatistics(Oid relid, AttrNumber attnum, Oid typid, int32 typmod,
|
||||
double *nullfrac,
|
||||
double *commonfrac,
|
||||
Datum *commonval,
|
||||
Datum *loval,
|
||||
Datum *hival)
|
||||
{
|
||||
Relation rel;
|
||||
HeapScanDesc scan;
|
||||
static ScanKeyData key[2] = {
|
||||
{0, Anum_pg_statistic_starelid, F_OIDEQ, {0, 0, F_OIDEQ}},
|
||||
{0, Anum_pg_statistic_staattnum, F_INT2EQ, {0, 0, F_INT2EQ}}
|
||||
};
|
||||
bool isnull;
|
||||
HeapTuple tuple;
|
||||
HeapTuple typeTuple;
|
||||
FmgrInfo inputproc;
|
||||
|
||||
rel = heap_openr(StatisticRelationName);
|
||||
|
||||
key[0].sk_argument = ObjectIdGetDatum(relid);
|
||||
key[1].sk_argument = Int16GetDatum((int16) attnum);
|
||||
|
||||
scan = heap_beginscan(rel, 0, SnapshotNow, 2, key);
|
||||
tuple = heap_getnext(scan, 0);
|
||||
if (!HeapTupleIsValid(tuple))
|
||||
{
|
||||
/* no such stats entry */
|
||||
heap_endscan(scan);
|
||||
heap_close(rel);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* We assume that there will only be one entry in pg_statistic
|
||||
* for the given rel/att. Someday, VACUUM might store more than one...
|
||||
*/
|
||||
if (nullfrac)
|
||||
*nullfrac = ((Form_pg_statistic) GETSTRUCT(tuple))->stanullfrac;
|
||||
if (commonfrac)
|
||||
*commonfrac = ((Form_pg_statistic) GETSTRUCT(tuple))->stacommonfrac;
|
||||
|
||||
/* Get the type input proc for the column datatype */
|
||||
typeTuple = SearchSysCacheTuple(TYPOID,
|
||||
ObjectIdGetDatum(typid),
|
||||
0, 0, 0);
|
||||
if (! HeapTupleIsValid(typeTuple))
|
||||
elog(ERROR, "getattstatistics: Cache lookup failed for type %u",
|
||||
typid);
|
||||
fmgr_info(((Form_pg_type) GETSTRUCT(typeTuple))->typinput, &inputproc);
|
||||
|
||||
/* Values are variable-length fields, so cannot access as struct fields.
|
||||
* Must do it the hard way with heap_getattr.
|
||||
*/
|
||||
if (commonval)
|
||||
{
|
||||
text *val = (text *) heap_getattr(tuple,
|
||||
Anum_pg_statistic_stacommonval,
|
||||
RelationGetDescr(rel),
|
||||
&isnull);
|
||||
if (isnull)
|
||||
{
|
||||
elog(DEBUG, "getattstatistics: stacommonval is null");
|
||||
*commonval = PointerGetDatum(NULL);
|
||||
}
|
||||
else
|
||||
{
|
||||
char *strval = textout(val);
|
||||
*commonval = (Datum)
|
||||
(*fmgr_faddr(&inputproc)) (strval, typid, typmod);
|
||||
pfree(strval);
|
||||
}
|
||||
}
|
||||
|
||||
if (loval)
|
||||
{
|
||||
text *val = (text *) heap_getattr(tuple,
|
||||
Anum_pg_statistic_staloval,
|
||||
RelationGetDescr(rel),
|
||||
&isnull);
|
||||
if (isnull)
|
||||
{
|
||||
elog(DEBUG, "getattstatistics: staloval is null");
|
||||
*loval = PointerGetDatum(NULL);
|
||||
}
|
||||
else
|
||||
{
|
||||
char *strval = textout(val);
|
||||
*loval = (Datum)
|
||||
(*fmgr_faddr(&inputproc)) (strval, typid, typmod);
|
||||
pfree(strval);
|
||||
}
|
||||
}
|
||||
|
||||
if (hival)
|
||||
{
|
||||
text *val = (text *) heap_getattr(tuple,
|
||||
Anum_pg_statistic_stahival,
|
||||
RelationGetDescr(rel),
|
||||
&isnull);
|
||||
if (isnull)
|
||||
{
|
||||
elog(DEBUG, "getattstatistics: stahival is null");
|
||||
*hival = PointerGetDatum(NULL);
|
||||
}
|
||||
else
|
||||
{
|
||||
char *strval = textout(val);
|
||||
*hival = (Datum)
|
||||
(*fmgr_faddr(&inputproc)) (strval, typid, typmod);
|
||||
pfree(strval);
|
||||
}
|
||||
}
|
||||
|
||||
heap_endscan(scan);
|
||||
heap_close(rel);
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* getattdisbursion
|
||||
* Retrieve the disbursion statistic for an attribute,
|
||||
* or produce an estimate if no info is available.
|
||||
*/
|
||||
static double
|
||||
getattdisbursion(Oid relid, AttrNumber attnum)
|
||||
{
|
||||
HeapTuple atp;
|
||||
float32data nvals;
|
||||
double disbursion;
|
||||
int32 ntuples;
|
||||
|
||||
atp = SearchSysCacheTuple(ATTNUM,
|
||||
|
@ -280,98 +625,40 @@ getattdisbursion(Oid relid, AttrNumber attnum)
|
|||
0, 0);
|
||||
if (!HeapTupleIsValid(atp))
|
||||
{
|
||||
/* this should not happen */
|
||||
elog(ERROR, "getattdisbursion: no attribute tuple %u %d",
|
||||
relid, attnum);
|
||||
return 0;
|
||||
return 0.1;
|
||||
}
|
||||
nvals = ((Form_pg_attribute) GETSTRUCT(atp))->attdisbursion;
|
||||
if (nvals > 0)
|
||||
return nvals;
|
||||
|
||||
disbursion = ((Form_pg_attribute) GETSTRUCT(atp))->attdisbursion;
|
||||
if (disbursion > 0.0)
|
||||
return disbursion;
|
||||
|
||||
/* VACUUM ANALYZE has not stored a disbursion statistic for us.
|
||||
* Produce an estimate = 1/numtuples. This may produce
|
||||
* unreasonably small estimates for large tables, so limit
|
||||
* the estimate to no less than 0.01.
|
||||
*/
|
||||
atp = SearchSysCacheTuple(RELOID,
|
||||
ObjectIdGetDatum(relid),
|
||||
0, 0, 0);
|
||||
|
||||
/*
|
||||
* XXX -- use number of tuples as number of distinctive values just
|
||||
* for now, in case number of distinctive values is not cached
|
||||
*/
|
||||
if (!HeapTupleIsValid(atp))
|
||||
{
|
||||
/* this should not happen */
|
||||
elog(ERROR, "getattdisbursion: no relation tuple %u", relid);
|
||||
return 0;
|
||||
return 0.1;
|
||||
}
|
||||
|
||||
ntuples = ((Form_pg_class) GETSTRUCT(atp))->reltuples;
|
||||
/* Look above how nvals is used. - vadim 04/09/97 */
|
||||
|
||||
if (ntuples > 0)
|
||||
nvals = 1.0 / ntuples;
|
||||
disbursion = 1.0 / (double) ntuples;
|
||||
|
||||
return nvals;
|
||||
}
|
||||
if (disbursion < 0.01)
|
||||
disbursion = 0.01;
|
||||
|
||||
/*
|
||||
* gethilokey - Returns a pointer to strings containing
|
||||
* the high and low keys within an attribute.
|
||||
*
|
||||
* Currently returns "0", and "0" in high and low if the statistic
|
||||
* catalog does not contain the proper tuple. Eventually, the
|
||||
* statistic demon should have the tuple maintained, and it should
|
||||
* elog() if the tuple is missing.
|
||||
*
|
||||
* XXX Question: is this worth sticking in the catalog caches,
|
||||
* or will this get invalidated too often?
|
||||
*/
|
||||
static void
|
||||
gethilokey(Oid relid,
|
||||
AttrNumber attnum,
|
||||
Oid opid,
|
||||
char **high,
|
||||
char **low)
|
||||
{
|
||||
Relation rel;
|
||||
HeapScanDesc scan;
|
||||
static ScanKeyData key[3] = {
|
||||
{0, Anum_pg_statistic_starelid, F_OIDEQ, {0, 0, F_OIDEQ}},
|
||||
{0, Anum_pg_statistic_staattnum, F_INT2EQ, {0, 0, F_INT2EQ}},
|
||||
{0, Anum_pg_statistic_staop, F_OIDEQ, {0, 0, F_OIDEQ}}
|
||||
};
|
||||
bool isnull;
|
||||
HeapTuple tuple;
|
||||
|
||||
rel = heap_openr(StatisticRelationName);
|
||||
|
||||
key[0].sk_argument = ObjectIdGetDatum(relid);
|
||||
key[1].sk_argument = Int16GetDatum((int16) attnum);
|
||||
key[2].sk_argument = ObjectIdGetDatum(opid);
|
||||
scan = heap_beginscan(rel, 0, SnapshotNow, 3, key);
|
||||
tuple = heap_getnext(scan, 0);
|
||||
if (!HeapTupleIsValid(tuple))
|
||||
{
|
||||
*high = "n";
|
||||
*low = "n";
|
||||
|
||||
/*
|
||||
* XXX elog(ERROR, "gethilokey: statistic tuple not
|
||||
* found");
|
||||
*/
|
||||
return;
|
||||
}
|
||||
*high = textout((struct varlena *)
|
||||
heap_getattr(tuple,
|
||||
Anum_pg_statistic_stahikey,
|
||||
RelationGetDescr(rel),
|
||||
&isnull));
|
||||
if (isnull)
|
||||
elog(DEBUG, "gethilokey: high key is null");
|
||||
*low = textout((struct varlena *)
|
||||
heap_getattr(tuple,
|
||||
Anum_pg_statistic_stalokey,
|
||||
RelationGetDescr(rel),
|
||||
&isnull));
|
||||
if (isnull)
|
||||
elog(DEBUG, "gethilokey: low key is null");
|
||||
heap_endscan(scan);
|
||||
heap_close(rel);
|
||||
return disbursion;
|
||||
}
|
||||
|
||||
float64
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: pg_statistic.h,v 1.6 1999/02/13 23:21:15 momjian Exp $
|
||||
* $Id: pg_statistic.h,v 1.7 1999/08/01 04:54:21 tgl Exp $
|
||||
*
|
||||
* NOTES
|
||||
* the genbki.sh script reads this file and generates .bki
|
||||
|
@ -32,11 +32,32 @@
|
|||
*/
|
||||
CATALOG(pg_statistic)
|
||||
{
|
||||
Oid starelid;
|
||||
int2 staattnum;
|
||||
Oid staop;
|
||||
text stalokey; /* VARIABLE LENGTH FIELD */
|
||||
text stahikey; /* VARIABLE LENGTH FIELD */
|
||||
/* These fields form the unique key for the entry: */
|
||||
Oid starelid; /* relation containing attribute */
|
||||
int2 staattnum; /* attribute (column) stats are for */
|
||||
Oid staop; /* '<' comparison op used for lo/hi vals */
|
||||
/* Note: the current VACUUM code will never produce more than one entry
|
||||
* per column, but in theory there could be multiple entries if a datatype
|
||||
* has more than one useful ordering operator. Also, the current code
|
||||
* will not write an entry unless it found at least one non-NULL value
|
||||
* in the column; so the remaining fields will never be NULL.
|
||||
*/
|
||||
|
||||
/* These fields contain the stats about the column indicated by the key */
|
||||
float4 stanullfrac; /* the fraction of the entries that are NULL */
|
||||
float4 stacommonfrac; /* the fraction that are the most common val */
|
||||
|
||||
/* THE REST OF THESE ARE VARIABLE LENGTH FIELDS.
|
||||
* They cannot be accessed as C struct entries; you have to use the
|
||||
* full field access machinery (heap_getattr) for them.
|
||||
*
|
||||
* All three of these are text representations of data values of the
|
||||
* column's data type. To re-create the actual Datum, do
|
||||
* datatypein(textout(givenvalue)).
|
||||
*/
|
||||
text stacommonval; /* most common non-null value in column */
|
||||
text staloval; /* smallest non-null value in column */
|
||||
text stahival; /* largest non-null value in column */
|
||||
} FormData_pg_statistic;
|
||||
|
||||
/* ----------------
|
||||
|
@ -50,11 +71,14 @@ typedef FormData_pg_statistic *Form_pg_statistic;
|
|||
* compiler constants for pg_statistic
|
||||
* ----------------
|
||||
*/
|
||||
#define Natts_pg_statistic 5
|
||||
#define Natts_pg_statistic 8
|
||||
#define Anum_pg_statistic_starelid 1
|
||||
#define Anum_pg_statistic_staattnum 2
|
||||
#define Anum_pg_statistic_staop 3
|
||||
#define Anum_pg_statistic_stalokey 4
|
||||
#define Anum_pg_statistic_stahikey 5
|
||||
#define Anum_pg_statistic_stanullfrac 4
|
||||
#define Anum_pg_statistic_stacommonfrac 5
|
||||
#define Anum_pg_statistic_stacommonval 6
|
||||
#define Anum_pg_statistic_staloval 7
|
||||
#define Anum_pg_statistic_stahival 8
|
||||
|
||||
#endif /* PG_STATISTIC_H */
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: vacuum.h,v 1.22 1999/07/15 15:21:03 momjian Exp $
|
||||
* $Id: vacuum.h,v 1.23 1999/08/01 04:54:25 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
@ -67,22 +67,23 @@ typedef struct
|
|||
guess2,
|
||||
max,
|
||||
min;
|
||||
int16 best_len,
|
||||
int best_len,
|
||||
guess1_len,
|
||||
guess2_len,
|
||||
max_len,
|
||||
min_len;
|
||||
int32 best_cnt,
|
||||
long best_cnt,
|
||||
guess1_cnt,
|
||||
guess1_hits,
|
||||
guess2_hits,
|
||||
null_cnt,
|
||||
nonnull_cnt;
|
||||
int32 max_cnt,
|
||||
nonnull_cnt,
|
||||
max_cnt,
|
||||
min_cnt;
|
||||
FmgrInfo f_cmpeq,
|
||||
f_cmplt,
|
||||
f_cmpgt;
|
||||
Oid op_cmplt;
|
||||
regproc outfunc;
|
||||
bool initialized;
|
||||
} VacAttrStats;
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
*
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: builtins.h,v 1.84 1999/07/16 17:07:39 momjian Exp $
|
||||
* $Id: builtins.h,v 1.85 1999/08/01 04:54:20 tgl Exp $
|
||||
*
|
||||
* NOTES
|
||||
* This should normally only be included by fmgr.h.
|
||||
|
@ -372,10 +372,10 @@ extern Oid regproctooid(RegProcedure rp);
|
|||
#define RegprocToOid(rp) regproctooid(rp)
|
||||
|
||||
/* selfuncs.c */
|
||||
extern float64 eqsel(Oid opid, Oid relid, AttrNumber attno, char *value, int32 flag);
|
||||
extern float64 neqsel(Oid opid, Oid relid, AttrNumber attno, char *value, int32 flag);
|
||||
extern float64 intltsel(Oid opid, Oid relid, AttrNumber attno, int32 value, int32 flag);
|
||||
extern float64 intgtsel(Oid opid, Oid relid, AttrNumber attno, int32 value, int32 flag);
|
||||
extern float64 eqsel(Oid opid, Oid relid, AttrNumber attno, Datum value, int32 flag);
|
||||
extern float64 neqsel(Oid opid, Oid relid, AttrNumber attno, Datum value, int32 flag);
|
||||
extern float64 intltsel(Oid opid, Oid relid, AttrNumber attno, Datum value, int32 flag);
|
||||
extern float64 intgtsel(Oid opid, Oid relid, AttrNumber attno, Datum value, int32 flag);
|
||||
extern float64 eqjoinsel(Oid opid, Oid relid1, AttrNumber attno1, Oid relid2, AttrNumber attno2);
|
||||
extern float64 neqjoinsel(Oid opid, Oid relid1, AttrNumber attno1, Oid relid2, AttrNumber attno2);
|
||||
extern float64 intltjoinsel(Oid opid, Oid relid1, AttrNumber attno1, Oid relid2, AttrNumber attno2);
|
||||
|
|
Loading…
Reference in New Issue