Clean up EXPLAIN's handling of per-worker details.

Previously, it was possible for EXPLAIN ANALYZE of a parallel query
to produce several different "Workers" fields for a single plan node,
because different portions of explain.c independently generated
per-worker data and wrapped that output in separate fields.  This
is pretty bogus, especially for the structured output formats: even
if it's not technically illegal, most programs would have a hard time
dealing with such data.

To improve matters, add infrastructure that allows redirecting
per-worker values into a side data structure, and then collect that
data into a single "Workers" field after we've finished running all
the relevant code for a given plan node.

There are a few visible side-effects:

* In text format, instead of something like

  Sort Method: external merge  Disk: 4920kB
  Worker 0:  Sort Method: external merge  Disk: 5880kB
  Worker 1:  Sort Method: external merge  Disk: 5920kB
  Buffers: shared hit=682 read=10188, temp read=1415 written=2101
  Worker 0:  actual time=130.058..130.324 rows=1324 loops=1
    Buffers: shared hit=337 read=3489, temp read=505 written=739
  Worker 1:  actual time=130.273..130.512 rows=1297 loops=1
    Buffers: shared hit=345 read=3507, temp read=505 written=744

you get

  Sort Method: external merge  Disk: 4920kB
  Buffers: shared hit=682 read=10188, temp read=1415 written=2101
  Worker 0:  actual time=130.058..130.324 rows=1324 loops=1
    Sort Method: external merge  Disk: 5880kB
    Buffers: shared hit=337 read=3489, temp read=505 written=739
  Worker 1:  actual time=130.273..130.512 rows=1297 loops=1
    Sort Method: external merge  Disk: 5920kB
    Buffers: shared hit=345 read=3507, temp read=505 written=744

* When JIT is enabled, any relevant per-worker JIT stats are attached
to the child node of the Gather or Gather Merge node, which is where
the other per-worker output has always been.  Previously, that info
was attached directly to a Gather node, or missed entirely for Gather
Merge.

* A query's summary JIT data no longer includes a bogus
"Worker Number: -1" field.

A notable code-level change is that indenting for lines of text-format
output should now be handled by calling "ExplainIndentText(es)",
instead of hard-wiring how much space to emit.  This seems a good deal
cleaner anyway.

This patch also adds a new "explain.sql" regression test script that's
dedicated to testing EXPLAIN.  There is more that can be done in that
line, certainly, but for now it just adds some coverage of the XML and
YAML output formats, which had been completely untested.

Although this is surely a bug fix, it's not clear that people would
be happy with rearranging EXPLAIN output in a minor release, so apply
to HEAD only.

Maciek Sakrejda and Tom Lane, based on an idea of Andres Freund's;
reviewed by Georgios Kokolatos

Discussion: https://postgr.es/m/CAOtHd0AvAA8CLB9Xz0wnxu1U=zJCKrr1r4QwwXi_kcQsHDVU=Q@mail.gmail.com
This commit is contained in:
Tom Lane 2020-01-25 18:16:42 -05:00
parent 13661ddd7e
commit 1001368497
6 changed files with 876 additions and 130 deletions

View File

@ -57,6 +57,8 @@ static void ExplainOneQuery(Query *query, int cursorOptions,
IntoClause *into, ExplainState *es,
const char *queryString, ParamListInfo params,
QueryEnvironment *queryEnv);
static void ExplainPrintJIT(ExplainState *es, int jit_flags,
JitInstrumentation *ji);
static void report_triggers(ResultRelInfo *rInfo, bool show_relname,
ExplainState *es);
static double elapsed_time(instr_time *starttime);
@ -123,11 +125,20 @@ static void ExplainSubPlans(List *plans, List *ancestors,
const char *relationship, ExplainState *es);
static void ExplainCustomChildren(CustomScanState *css,
List *ancestors, ExplainState *es);
static ExplainWorkersState *ExplainCreateWorkersState(int num_workers);
static void ExplainOpenWorker(int n, ExplainState *es);
static void ExplainCloseWorker(int n, ExplainState *es);
static void ExplainFlushWorkersState(ExplainState *es);
static void ExplainProperty(const char *qlabel, const char *unit,
const char *value, bool numeric, ExplainState *es);
static void ExplainOpenSetAsideGroup(const char *objtype, const char *labelname,
bool labeled, int depth, ExplainState *es);
static void ExplainSaveGroup(ExplainState *es, int depth, int *state_save);
static void ExplainRestoreGroup(ExplainState *es, int depth, int *state_save);
static void ExplainDummyGroup(const char *objtype, const char *labelname,
ExplainState *es);
static void ExplainXMLTag(const char *tagname, int flags, ExplainState *es);
static void ExplainIndentText(ExplainState *es);
static void ExplainJSONLineEnding(ExplainState *es);
static void ExplainYAMLLineStarting(ExplainState *es);
static void escape_yaml(StringInfo buf, const char *str);
@ -790,31 +801,22 @@ ExplainPrintJITSummary(ExplainState *es, QueryDesc *queryDesc)
if (queryDesc->estate->es_jit_worker_instr)
InstrJitAgg(&ji, queryDesc->estate->es_jit_worker_instr);
ExplainPrintJIT(es, queryDesc->estate->es_jit_flags, &ji, -1);
ExplainPrintJIT(es, queryDesc->estate->es_jit_flags, &ji);
}
/*
* ExplainPrintJIT -
* Append information about JITing to es->str.
*
* Can be used to print the JIT instrumentation of the backend (worker_num =
* -1) or that of a specific worker (worker_num = ...).
*/
void
ExplainPrintJIT(ExplainState *es, int jit_flags,
JitInstrumentation *ji, int worker_num)
static void
ExplainPrintJIT(ExplainState *es, int jit_flags, JitInstrumentation *ji)
{
instr_time total_time;
bool for_workers = (worker_num >= 0);
/* don't print information if no JITing happened */
if (!ji || ji->created_functions == 0)
return;
/* don't print per-worker info if we're supposed to hide that */
if (for_workers && es->hide_workers)
return;
/* calculate total time */
INSTR_TIME_SET_ZERO(total_time);
INSTR_TIME_ADD(total_time, ji->generation_counter);
@ -827,16 +829,13 @@ ExplainPrintJIT(ExplainState *es, int jit_flags,
/* for higher density, open code the text output format */
if (es->format == EXPLAIN_FORMAT_TEXT)
{
appendStringInfoSpaces(es->str, es->indent * 2);
if (for_workers)
appendStringInfo(es->str, "JIT for worker %u:\n", worker_num);
else
appendStringInfoString(es->str, "JIT:\n");
es->indent += 1;
ExplainIndentText(es);
appendStringInfoString(es->str, "JIT:\n");
es->indent++;
ExplainPropertyInteger("Functions", NULL, ji->created_functions, es);
appendStringInfoSpaces(es->str, es->indent * 2);
ExplainIndentText(es);
appendStringInfo(es->str, "Options: %s %s, %s %s, %s %s, %s %s\n",
"Inlining", jit_flags & PGJIT_INLINE ? "true" : "false",
"Optimization", jit_flags & PGJIT_OPT3 ? "true" : "false",
@ -845,7 +844,7 @@ ExplainPrintJIT(ExplainState *es, int jit_flags,
if (es->analyze && es->timing)
{
appendStringInfoSpaces(es->str, es->indent * 2);
ExplainIndentText(es);
appendStringInfo(es->str,
"Timing: %s %.3f ms, %s %.3f ms, %s %.3f ms, %s %.3f ms, %s %.3f ms\n",
"Generation", 1000.0 * INSTR_TIME_GET_DOUBLE(ji->generation_counter),
@ -855,11 +854,10 @@ ExplainPrintJIT(ExplainState *es, int jit_flags,
"Total", 1000.0 * INSTR_TIME_GET_DOUBLE(total_time));
}
es->indent -= 1;
es->indent--;
}
else
{
ExplainPropertyInteger("Worker Number", NULL, worker_num, es);
ExplainPropertyInteger("Functions", NULL, ji->created_functions, es);
ExplainOpenGroup("Options", "Options", true, es);
@ -1074,9 +1072,10 @@ ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used)
* optional name to be attached to the node.
*
* In text format, es->indent is controlled in this function since we only
* want it to change at plan-node boundaries. In non-text formats, es->indent
* corresponds to the nesting depth of logical output groups, and therefore
* is controlled by ExplainOpenGroup/ExplainCloseGroup.
* want it to change at plan-node boundaries (but a few subroutines will
* transiently increment it). In non-text formats, es->indent corresponds
* to the nesting depth of logical output groups, and therefore is controlled
* by ExplainOpenGroup/ExplainCloseGroup.
*/
static void
ExplainNode(PlanState *planstate, List *ancestors,
@ -1090,9 +1089,20 @@ ExplainNode(PlanState *planstate, List *ancestors,
const char *partialmode = NULL;
const char *operation = NULL;
const char *custom_name = NULL;
ExplainWorkersState *save_workers_state = es->workers_state;
int save_indent = es->indent;
bool haschildren;
/*
* Prepare per-worker output buffers, if needed. We'll append the data in
* these to the main output string further down.
*/
if (planstate->worker_instrument && es->analyze && !es->hide_workers)
es->workers_state = ExplainCreateWorkersState(planstate->worker_instrument->num_workers);
else
es->workers_state = NULL;
/* Identify plan node type, and print generic details */
switch (nodeTag(plan))
{
case T_Result:
@ -1324,13 +1334,13 @@ ExplainNode(PlanState *planstate, List *ancestors,
{
if (plan_name)
{
appendStringInfoSpaces(es->str, es->indent * 2);
ExplainIndentText(es);
appendStringInfo(es->str, "%s\n", plan_name);
es->indent++;
}
if (es->indent)
{
appendStringInfoSpaces(es->str, es->indent * 2);
ExplainIndentText(es);
appendStringInfoString(es->str, "-> ");
es->indent += 2;
}
@ -1574,6 +1584,56 @@ ExplainNode(PlanState *planstate, List *ancestors,
if (es->format == EXPLAIN_FORMAT_TEXT)
appendStringInfoChar(es->str, '\n');
/* prepare per-worker general execution details */
if (es->workers_state && es->verbose)
{
WorkerInstrumentation *w = planstate->worker_instrument;
for (int n = 0; n < w->num_workers; n++)
{
Instrumentation *instrument = &w->instrument[n];
double nloops = instrument->nloops;
double startup_ms;
double total_ms;
double rows;
if (nloops <= 0)
continue;
startup_ms = 1000.0 * instrument->startup / nloops;
total_ms = 1000.0 * instrument->total / nloops;
rows = instrument->ntuples / nloops;
ExplainOpenWorker(n, es);
if (es->format == EXPLAIN_FORMAT_TEXT)
{
ExplainIndentText(es);
if (es->timing)
appendStringInfo(es->str,
"actual time=%.3f..%.3f rows=%.0f loops=%.0f\n",
startup_ms, total_ms, rows, nloops);
else
appendStringInfo(es->str,
"actual rows=%.0f loops=%.0f\n",
rows, nloops);
}
else
{
if (es->timing)
{
ExplainPropertyFloat("Actual Startup Time", "ms",
startup_ms, 3, es);
ExplainPropertyFloat("Actual Total Time", "ms",
total_ms, 3, es);
}
ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
}
ExplainCloseWorker(n, es);
}
}
/* target list */
if (es->verbose)
show_plan_tlist(planstate, ancestors, es);
@ -1684,24 +1744,6 @@ ExplainNode(PlanState *planstate, List *ancestors,
nworkers, es);
}
/*
* Print per-worker Jit instrumentation. Use same conditions
* as for the leader's JIT instrumentation, see comment there.
*/
if (es->costs && es->verbose &&
outerPlanState(planstate)->worker_jit_instrument)
{
PlanState *child = outerPlanState(planstate);
int n;
SharedJitInstrumentation *w = child->worker_jit_instrument;
for (n = 0; n < w->num_workers; ++n)
{
ExplainPrintJIT(es, child->state->es_jit_flags,
&w->jit_instr[n], n);
}
}
if (gather->single_copy || es->format != EXPLAIN_FORMAT_TEXT)
ExplainPropertyBool("Single Copy", gather->single_copy, es);
}
@ -1881,80 +1923,54 @@ ExplainNode(PlanState *planstate, List *ancestors,
break;
}
/*
* Prepare per-worker JIT instrumentation. As with the overall JIT
* summary, this is printed only if printing costs is enabled.
*/
if (es->workers_state && es->costs && es->verbose)
{
SharedJitInstrumentation *w = planstate->worker_jit_instrument;
if (w)
{
for (int n = 0; n < w->num_workers; n++)
{
ExplainOpenWorker(n, es);
ExplainPrintJIT(es, planstate->state->es_jit_flags,
&w->jit_instr[n]);
ExplainCloseWorker(n, es);
}
}
}
/* Show buffer usage */
if (es->buffers && planstate->instrument)
show_buffer_usage(es, &planstate->instrument->bufusage);
/* Show worker detail */
if (es->analyze && es->verbose && !es->hide_workers &&
planstate->worker_instrument)
/* Prepare per-worker buffer usage */
if (es->workers_state && es->buffers && es->verbose)
{
WorkerInstrumentation *w = planstate->worker_instrument;
bool opened_group = false;
int n;
for (n = 0; n < w->num_workers; ++n)
for (int n = 0; n < w->num_workers; n++)
{
Instrumentation *instrument = &w->instrument[n];
double nloops = instrument->nloops;
double startup_ms;
double total_ms;
double rows;
if (nloops <= 0)
continue;
startup_ms = 1000.0 * instrument->startup / nloops;
total_ms = 1000.0 * instrument->total / nloops;
rows = instrument->ntuples / nloops;
if (es->format == EXPLAIN_FORMAT_TEXT)
{
appendStringInfoSpaces(es->str, es->indent * 2);
appendStringInfo(es->str, "Worker %d: ", n);
if (es->timing)
appendStringInfo(es->str,
"actual time=%.3f..%.3f rows=%.0f loops=%.0f\n",
startup_ms, total_ms, rows, nloops);
else
appendStringInfo(es->str,
"actual rows=%.0f loops=%.0f\n",
rows, nloops);
es->indent++;
if (es->buffers)
show_buffer_usage(es, &instrument->bufusage);
es->indent--;
}
else
{
if (!opened_group)
{
ExplainOpenGroup("Workers", "Workers", false, es);
opened_group = true;
}
ExplainOpenGroup("Worker", NULL, true, es);
ExplainPropertyInteger("Worker Number", NULL, n, es);
if (es->timing)
{
ExplainPropertyFloat("Actual Startup Time", "ms",
startup_ms, 3, es);
ExplainPropertyFloat("Actual Total Time", "ms",
total_ms, 3, es);
}
ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
if (es->buffers)
show_buffer_usage(es, &instrument->bufusage);
ExplainCloseGroup("Worker", NULL, true, es);
}
ExplainOpenWorker(n, es);
show_buffer_usage(es, &instrument->bufusage);
ExplainCloseWorker(n, es);
}
if (opened_group)
ExplainCloseGroup("Workers", "Workers", false, es);
}
/* Show per-worker details for this plan node, then pop that stack */
if (es->workers_state)
ExplainFlushWorkersState(es);
es->workers_state = save_workers_state;
/* Get ready to display the child plans */
haschildren = planstate->initPlan ||
outerPlanState(planstate) ||
@ -2525,7 +2541,7 @@ show_tablesample(TableSampleClause *tsc, PlanState *planstate,
{
bool first = true;
appendStringInfoSpaces(es->str, es->indent * 2);
ExplainIndentText(es);
appendStringInfo(es->str, "Sampling: %s (", method_name);
foreach(lc, params)
{
@ -2572,7 +2588,7 @@ show_sort_info(SortState *sortstate, ExplainState *es)
if (es->format == EXPLAIN_FORMAT_TEXT)
{
appendStringInfoSpaces(es->str, es->indent * 2);
ExplainIndentText(es);
appendStringInfo(es->str, "Sort Method: %s %s: %ldkB\n",
sortMethod, spaceType, spaceUsed);
}
@ -2588,12 +2604,14 @@ show_sort_info(SortState *sortstate, ExplainState *es)
* You might think we should just skip this stanza entirely when
* es->hide_workers is true, but then we'd get no sort-method output at
* all. We have to make it look like worker 0's data is top-level data.
* Currently, we only bother with that for text-format output.
* This is easily done by just skipping the OpenWorker/CloseWorker calls.
* Currently, we don't worry about the possibility that there are multiple
* workers in such a case; if there are, duplicate output fields will be
* emitted.
*/
if (sortstate->shared_info != NULL)
{
int n;
bool opened_group = false;
for (n = 0; n < sortstate->shared_info->num_workers; n++)
{
@ -2609,32 +2627,26 @@ show_sort_info(SortState *sortstate, ExplainState *es)
spaceType = tuplesort_space_type_name(sinstrument->spaceType);
spaceUsed = sinstrument->spaceUsed;
if (es->workers_state)
ExplainOpenWorker(n, es);
if (es->format == EXPLAIN_FORMAT_TEXT)
{
appendStringInfoSpaces(es->str, es->indent * 2);
if (n > 0 || !es->hide_workers)
appendStringInfo(es->str, "Worker %d: ", n);
ExplainIndentText(es);
appendStringInfo(es->str,
"Sort Method: %s %s: %ldkB\n",
sortMethod, spaceType, spaceUsed);
}
else
{
if (!opened_group)
{
ExplainOpenGroup("Workers", "Workers", false, es);
opened_group = true;
}
ExplainOpenGroup("Worker", NULL, true, es);
ExplainPropertyInteger("Worker Number", NULL, n, es);
ExplainPropertyText("Sort Method", sortMethod, es);
ExplainPropertyInteger("Sort Space Used", "kB", spaceUsed, es);
ExplainPropertyText("Sort Space Type", spaceType, es);
ExplainCloseGroup("Worker", NULL, true, es);
}
if (es->workers_state)
ExplainCloseWorker(n, es);
}
if (opened_group)
ExplainCloseGroup("Workers", "Workers", false, es);
}
}
@ -2721,7 +2733,7 @@ show_hash_info(HashState *hashstate, ExplainState *es)
else if (hinstrument.nbatch_original != hinstrument.nbatch ||
hinstrument.nbuckets_original != hinstrument.nbuckets)
{
appendStringInfoSpaces(es->str, es->indent * 2);
ExplainIndentText(es);
appendStringInfo(es->str,
"Buckets: %d (originally %d) Batches: %d (originally %d) Memory Usage: %ldkB\n",
hinstrument.nbuckets,
@ -2732,7 +2744,7 @@ show_hash_info(HashState *hashstate, ExplainState *es)
}
else
{
appendStringInfoSpaces(es->str, es->indent * 2);
ExplainIndentText(es);
appendStringInfo(es->str,
"Buckets: %d Batches: %d Memory Usage: %ldkB\n",
hinstrument.nbuckets, hinstrument.nbatch,
@ -2758,7 +2770,7 @@ show_tidbitmap_info(BitmapHeapScanState *planstate, ExplainState *es)
{
if (planstate->exact_pages > 0 || planstate->lossy_pages > 0)
{
appendStringInfoSpaces(es->str, es->indent * 2);
ExplainIndentText(es);
appendStringInfoString(es->str, "Heap Blocks:");
if (planstate->exact_pages > 0)
appendStringInfo(es->str, " exact=%ld", planstate->exact_pages);
@ -2894,7 +2906,7 @@ show_buffer_usage(ExplainState *es, const BufferUsage *usage)
/* Show only positive counter values. */
if (has_shared || has_local || has_temp)
{
appendStringInfoSpaces(es->str, es->indent * 2);
ExplainIndentText(es);
appendStringInfoString(es->str, "Buffers:");
if (has_shared)
@ -2949,7 +2961,7 @@ show_buffer_usage(ExplainState *es, const BufferUsage *usage)
/* As above, show only positive counter values. */
if (has_timing)
{
appendStringInfoSpaces(es->str, es->indent * 2);
ExplainIndentText(es);
appendStringInfoString(es->str, "I/O Timings:");
if (!INSTR_TIME_IS_ZERO(usage->blk_read_time))
appendStringInfo(es->str, " read=%0.3f",
@ -3237,7 +3249,7 @@ show_modifytable_info(ModifyTableState *mtstate, List *ancestors,
*/
if (es->format == EXPLAIN_FORMAT_TEXT)
{
appendStringInfoSpaces(es->str, es->indent * 2);
ExplainIndentText(es);
appendStringInfoString(es->str,
fdwroutine ? foperation : operation);
}
@ -3426,6 +3438,158 @@ ExplainCustomChildren(CustomScanState *css, List *ancestors, ExplainState *es)
ExplainNode((PlanState *) lfirst(cell), ancestors, label, NULL, es);
}
/*
* Create a per-plan-node workspace for collecting per-worker data.
*
* Output related to each worker will be temporarily "set aside" into a
* separate buffer, which we'll merge into the main output stream once
* we've processed all data for the plan node. This makes it feasible to
* generate a coherent sub-group of fields for each worker, even though the
* code that produces the fields is in several different places in this file.
* Formatting of such a set-aside field group is managed by
* ExplainOpenSetAsideGroup and ExplainSaveGroup/ExplainRestoreGroup.
*/
static ExplainWorkersState *
ExplainCreateWorkersState(int num_workers)
{
ExplainWorkersState *wstate;
wstate = (ExplainWorkersState *) palloc(sizeof(ExplainWorkersState));
wstate->num_workers = num_workers;
wstate->worker_inited = (bool *) palloc0(num_workers * sizeof(bool));
wstate->worker_str = (StringInfoData *)
palloc0(num_workers * sizeof(StringInfoData));
wstate->worker_state_save = (int *) palloc(num_workers * sizeof(int));
return wstate;
}
/*
* Begin or resume output into the set-aside group for worker N.
*/
static void
ExplainOpenWorker(int n, ExplainState *es)
{
ExplainWorkersState *wstate = es->workers_state;
Assert(wstate);
Assert(n >= 0 && n < wstate->num_workers);
/* Save prior output buffer pointer */
wstate->prev_str = es->str;
if (!wstate->worker_inited[n])
{
/* First time through, so create the buffer for this worker */
initStringInfo(&wstate->worker_str[n]);
es->str = &wstate->worker_str[n];
/*
* Push suitable initial formatting state for this worker's field
* group. We allow one extra logical nesting level, since this group
* will eventually be wrapped in an outer "Workers" group.
*/
ExplainOpenSetAsideGroup("Worker", NULL, true, 2, es);
/*
* In non-TEXT formats we always emit a "Worker Number" field, even if
* there's no other data for this worker.
*/
if (es->format != EXPLAIN_FORMAT_TEXT)
ExplainPropertyInteger("Worker Number", NULL, n, es);
wstate->worker_inited[n] = true;
}
else
{
/* Resuming output for a worker we've already emitted some data for */
es->str = &wstate->worker_str[n];
/* Restore formatting state saved by last ExplainCloseWorker() */
ExplainRestoreGroup(es, 2, &wstate->worker_state_save[n]);
}
/*
* In TEXT format, prefix the first output line for this worker with
* "Worker N:". Then, any additional lines should be indented one more
* stop than the "Worker N" line is.
*/
if (es->format == EXPLAIN_FORMAT_TEXT)
{
if (es->str->len == 0)
{
ExplainIndentText(es);
appendStringInfo(es->str, "Worker %d: ", n);
}
es->indent++;
}
}
/*
* End output for worker N --- must pair with previous ExplainOpenWorker call
*/
static void
ExplainCloseWorker(int n, ExplainState *es)
{
ExplainWorkersState *wstate = es->workers_state;
Assert(wstate);
Assert(n >= 0 && n < wstate->num_workers);
Assert(wstate->worker_inited[n]);
/*
* Save formatting state in case we do another ExplainOpenWorker(), then
* pop the formatting stack.
*/
ExplainSaveGroup(es, 2, &wstate->worker_state_save[n]);
/*
* In TEXT format, if we didn't actually produce any output line(s) then
* truncate off the partial line emitted by ExplainOpenWorker. (This is
* to avoid bogus output if, say, show_buffer_usage chooses not to print
* anything for the worker.) Also fix up the indent level.
*/
if (es->format == EXPLAIN_FORMAT_TEXT)
{
while (es->str->len > 0 && es->str->data[es->str->len - 1] != '\n')
es->str->data[--(es->str->len)] = '\0';
es->indent--;
}
/* Restore prior output buffer pointer */
es->str = wstate->prev_str;
}
/*
* Print per-worker info for current node, then free the ExplainWorkersState.
*/
static void
ExplainFlushWorkersState(ExplainState *es)
{
ExplainWorkersState *wstate = es->workers_state;
ExplainOpenGroup("Workers", "Workers", false, es);
for (int i = 0; i < wstate->num_workers; i++)
{
if (wstate->worker_inited[i])
{
/* This must match previous ExplainOpenSetAsideGroup call */
ExplainOpenGroup("Worker", NULL, true, es);
appendStringInfoString(es->str, wstate->worker_str[i].data);
ExplainCloseGroup("Worker", NULL, true, es);
pfree(wstate->worker_str[i].data);
}
}
ExplainCloseGroup("Workers", "Workers", false, es);
pfree(wstate->worker_inited);
pfree(wstate->worker_str);
pfree(wstate->worker_state_save);
pfree(wstate);
}
/*
* Explain a property, such as sort keys or targets, that takes the form of
* a list of unlabeled items. "data" is a list of C strings.
@ -3439,7 +3603,7 @@ ExplainPropertyList(const char *qlabel, List *data, ExplainState *es)
switch (es->format)
{
case EXPLAIN_FORMAT_TEXT:
appendStringInfoSpaces(es->str, es->indent * 2);
ExplainIndentText(es);
appendStringInfo(es->str, "%s: ", qlabel);
foreach(lc, data)
{
@ -3560,7 +3724,7 @@ ExplainProperty(const char *qlabel, const char *unit, const char *value,
switch (es->format)
{
case EXPLAIN_FORMAT_TEXT:
appendStringInfoSpaces(es->str, es->indent * 2);
ExplainIndentText(es);
if (unit)
appendStringInfo(es->str, "%s: %s %s\n", qlabel, value, unit);
else
@ -3751,6 +3915,117 @@ ExplainCloseGroup(const char *objtype, const char *labelname,
}
}
/*
* Open a group of related objects, without emitting actual data.
*
* Prepare the formatting state as though we were beginning a group with
* the identified properties, but don't actually emit anything. Output
* subsequent to this call can be redirected into a separate output buffer,
* and then eventually appended to the main output buffer after doing a
* regular ExplainOpenGroup call (with the same parameters).
*
* The extra "depth" parameter is the new group's depth compared to current.
* It could be more than one, in case the eventual output will be enclosed
* in additional nesting group levels. We assume we don't need to track
* formatting state for those levels while preparing this group's output.
*
* There is no ExplainCloseSetAsideGroup --- in current usage, we always
* pop this state with ExplainSaveGroup.
*/
static void
ExplainOpenSetAsideGroup(const char *objtype, const char *labelname,
bool labeled, int depth, ExplainState *es)
{
switch (es->format)
{
case EXPLAIN_FORMAT_TEXT:
/* nothing to do */
break;
case EXPLAIN_FORMAT_XML:
es->indent += depth;
break;
case EXPLAIN_FORMAT_JSON:
es->grouping_stack = lcons_int(0, es->grouping_stack);
es->indent += depth;
break;
case EXPLAIN_FORMAT_YAML:
if (labelname)
es->grouping_stack = lcons_int(1, es->grouping_stack);
else
es->grouping_stack = lcons_int(0, es->grouping_stack);
es->indent += depth;
break;
}
}
/*
* Pop one level of grouping state, allowing for a re-push later.
*
* This is typically used after ExplainOpenSetAsideGroup; pass the
* same "depth" used for that.
*
* This should not emit any output. If state needs to be saved,
* save it at *state_save. Currently, an integer save area is sufficient
* for all formats, but we might need to revisit that someday.
*/
static void
ExplainSaveGroup(ExplainState *es, int depth, int *state_save)
{
switch (es->format)
{
case EXPLAIN_FORMAT_TEXT:
/* nothing to do */
break;
case EXPLAIN_FORMAT_XML:
es->indent -= depth;
break;
case EXPLAIN_FORMAT_JSON:
es->indent -= depth;
*state_save = linitial_int(es->grouping_stack);
es->grouping_stack = list_delete_first(es->grouping_stack);
break;
case EXPLAIN_FORMAT_YAML:
es->indent -= depth;
*state_save = linitial_int(es->grouping_stack);
es->grouping_stack = list_delete_first(es->grouping_stack);
break;
}
}
/*
* Re-push one level of grouping state, undoing the effects of ExplainSaveGroup.
*/
static void
ExplainRestoreGroup(ExplainState *es, int depth, int *state_save)
{
switch (es->format)
{
case EXPLAIN_FORMAT_TEXT:
/* nothing to do */
break;
case EXPLAIN_FORMAT_XML:
es->indent += depth;
break;
case EXPLAIN_FORMAT_JSON:
es->grouping_stack = lcons_int(*state_save, es->grouping_stack);
es->indent += depth;
break;
case EXPLAIN_FORMAT_YAML:
es->grouping_stack = lcons_int(*state_save, es->grouping_stack);
es->indent += depth;
break;
}
}
/*
* Emit a "dummy" group that never has any members.
*
@ -3912,6 +4187,21 @@ ExplainXMLTag(const char *tagname, int flags, ExplainState *es)
appendStringInfoCharMacro(es->str, '\n');
}
/*
* Indent a text-format line.
*
* We indent by two spaces per indentation level. However, when emitting
* data for a parallel worker there might already be data on the current line
* (cf. ExplainOpenWorker); in that case, don't indent any more.
*/
static void
ExplainIndentText(ExplainState *es)
{
Assert(es->format == EXPLAIN_FORMAT_TEXT);
if (es->str->len == 0 || es->str->data[es->str->len - 1] == '\n')
appendStringInfoSpaces(es->str, es->indent * 2);
}
/*
* Emit a JSON line ending.
*

View File

@ -25,6 +25,15 @@ typedef enum ExplainFormat
EXPLAIN_FORMAT_YAML
} ExplainFormat;
typedef struct ExplainWorkersState
{
int num_workers; /* # of worker processes the plan used */
bool *worker_inited; /* per-worker state-initialized flags */
StringInfoData *worker_str; /* per-worker transient output buffers */
int *worker_state_save; /* per-worker grouping state save areas */
StringInfo prev_str; /* saved output buffer while redirecting */
} ExplainWorkersState;
typedef struct ExplainState
{
StringInfo str; /* output buffer */
@ -47,6 +56,8 @@ typedef struct ExplainState
List *deparse_cxt; /* context list for deparsing expressions */
Bitmapset *printed_subplans; /* ids of SubPlans we've printed */
bool hide_workers; /* set if we find an invisible Gather */
/* state related to the current plan node */
ExplainWorkersState *workers_state; /* needed if parallel plan */
} ExplainState;
/* Hook for plugins to get control in ExplainOneQuery() */
@ -84,8 +95,6 @@ extern void ExplainPrintPlan(ExplainState *es, QueryDesc *queryDesc);
extern void ExplainPrintTriggers(ExplainState *es, QueryDesc *queryDesc);
extern void ExplainPrintJITSummary(ExplainState *es, QueryDesc *queryDesc);
extern void ExplainPrintJIT(ExplainState *es, int jit_flags,
struct JitInstrumentation *jit_instr, int worker_num);
extern void ExplainQueryText(ExplainState *es, QueryDesc *queryDesc);

View File

@ -0,0 +1,357 @@
--
-- EXPLAIN
--
-- There are many test cases elsewhere that use EXPLAIN as a vehicle for
-- checking something else (usually planner behavior). This file is
-- concerned with testing EXPLAIN in its own right.
--
-- To produce stable regression test output, it's usually necessary to
-- ignore details such as exact costs or row counts. These filter
-- functions replace changeable output details with fixed strings.
create function explain_filter(text) returns setof text
language plpgsql as
$$
declare
ln text;
begin
for ln in execute $1
loop
-- Replace any numeric word with just 'N'
ln := regexp_replace(ln, '\m\d+\M', 'N', 'g');
-- In sort output, the above won't match units-suffixed numbers
ln := regexp_replace(ln, '\m\d+kB', 'NkB', 'g');
-- Text-mode buffers output varies depending on the system state
ln := regexp_replace(ln, '^( +Buffers: shared)( hit=N)?( read=N)?', '\1 [read]');
return next ln;
end loop;
end;
$$;
-- To produce valid JSON output, replace numbers with "0" or "0.0" not "N"
create function explain_filter_to_json(text) returns jsonb
language plpgsql as
$$
declare
data text := '';
ln text;
begin
for ln in execute $1
loop
-- Replace any numeric word with just '0'
ln := regexp_replace(ln, '\m\d+\M', '0', 'g');
data := data || ln;
end loop;
return data::jsonb;
end;
$$;
-- Simple cases
select explain_filter('explain select * from int8_tbl i8');
explain_filter
---------------------------------------------------------
Seq Scan on int8_tbl i8 (cost=N.N..N.N rows=N width=N)
(1 row)
select explain_filter('explain (analyze) select * from int8_tbl i8');
explain_filter
-----------------------------------------------------------------------------------------------
Seq Scan on int8_tbl i8 (cost=N.N..N.N rows=N width=N) (actual time=N.N..N.N rows=N loops=N)
Planning Time: N.N ms
Execution Time: N.N ms
(3 rows)
select explain_filter('explain (analyze, verbose) select * from int8_tbl i8');
explain_filter
------------------------------------------------------------------------------------------------------
Seq Scan on public.int8_tbl i8 (cost=N.N..N.N rows=N width=N) (actual time=N.N..N.N rows=N loops=N)
Output: q1, q2
Planning Time: N.N ms
Execution Time: N.N ms
(4 rows)
select explain_filter('explain (analyze, buffers, format text) select * from int8_tbl i8');
explain_filter
-----------------------------------------------------------------------------------------------
Seq Scan on int8_tbl i8 (cost=N.N..N.N rows=N width=N) (actual time=N.N..N.N rows=N loops=N)
Buffers: shared [read]
Planning Time: N.N ms
Execution Time: N.N ms
(4 rows)
select explain_filter('explain (analyze, buffers, format json) select * from int8_tbl i8');
explain_filter
------------------------------------
[ +
{ +
"Plan": { +
"Node Type": "Seq Scan", +
"Parallel Aware": false, +
"Relation Name": "int8_tbl",+
"Alias": "i8", +
"Startup Cost": N.N, +
"Total Cost": N.N, +
"Plan Rows": N, +
"Plan Width": N, +
"Actual Startup Time": N.N, +
"Actual Total Time": N.N, +
"Actual Rows": N, +
"Actual Loops": N, +
"Shared Hit Blocks": N, +
"Shared Read Blocks": N, +
"Shared Dirtied Blocks": N, +
"Shared Written Blocks": N, +
"Local Hit Blocks": N, +
"Local Read Blocks": N, +
"Local Dirtied Blocks": N, +
"Local Written Blocks": N, +
"Temp Read Blocks": N, +
"Temp Written Blocks": N +
}, +
"Planning Time": N.N, +
"Triggers": [ +
], +
"Execution Time": N.N +
} +
]
(1 row)
select explain_filter('explain (analyze, buffers, format xml) select * from int8_tbl i8');
explain_filter
--------------------------------------------------------
<explain xmlns="http://www.postgresql.org/N/explain"> +
<Query> +
<Plan> +
<Node-Type>Seq Scan</Node-Type> +
<Parallel-Aware>false</Parallel-Aware> +
<Relation-Name>int8_tbl</Relation-Name> +
<Alias>i8</Alias> +
<Startup-Cost>N.N</Startup-Cost> +
<Total-Cost>N.N</Total-Cost> +
<Plan-Rows>N</Plan-Rows> +
<Plan-Width>N</Plan-Width> +
<Actual-Startup-Time>N.N</Actual-Startup-Time> +
<Actual-Total-Time>N.N</Actual-Total-Time> +
<Actual-Rows>N</Actual-Rows> +
<Actual-Loops>N</Actual-Loops> +
<Shared-Hit-Blocks>N</Shared-Hit-Blocks> +
<Shared-Read-Blocks>N</Shared-Read-Blocks> +
<Shared-Dirtied-Blocks>N</Shared-Dirtied-Blocks>+
<Shared-Written-Blocks>N</Shared-Written-Blocks>+
<Local-Hit-Blocks>N</Local-Hit-Blocks> +
<Local-Read-Blocks>N</Local-Read-Blocks> +
<Local-Dirtied-Blocks>N</Local-Dirtied-Blocks> +
<Local-Written-Blocks>N</Local-Written-Blocks> +
<Temp-Read-Blocks>N</Temp-Read-Blocks> +
<Temp-Written-Blocks>N</Temp-Written-Blocks> +
</Plan> +
<Planning-Time>N.N</Planning-Time> +
<Triggers> +
</Triggers> +
<Execution-Time>N.N</Execution-Time> +
</Query> +
</explain>
(1 row)
select explain_filter('explain (analyze, buffers, format yaml) select * from int8_tbl i8');
explain_filter
-------------------------------
- Plan: +
Node Type: "Seq Scan" +
Parallel Aware: false +
Relation Name: "int8_tbl"+
Alias: "i8" +
Startup Cost: N.N +
Total Cost: N.N +
Plan Rows: N +
Plan Width: N +
Actual Startup Time: N.N +
Actual Total Time: N.N +
Actual Rows: N +
Actual Loops: N +
Shared Hit Blocks: N +
Shared Read Blocks: N +
Shared Dirtied Blocks: N +
Shared Written Blocks: N +
Local Hit Blocks: N +
Local Read Blocks: N +
Local Dirtied Blocks: N +
Local Written Blocks: N +
Temp Read Blocks: N +
Temp Written Blocks: N +
Planning Time: N.N +
Triggers: +
Execution Time: N.N
(1 row)
--
-- Test production of per-worker data
--
-- Unfortunately, because we don't know how many worker processes we'll
-- actually get (maybe none at all), we can't examine the "Workers" output
-- in any detail. We can check that it parses correctly as JSON, and then
-- remove it from the displayed results.
-- Serializable isolation would disable parallel query, so explicitly use an
-- arbitrary other level.
begin isolation level repeatable read;
-- encourage use of parallel plans
set parallel_setup_cost=0;
set parallel_tuple_cost=0;
set min_parallel_table_scan_size=0;
set max_parallel_workers_per_gather=4;
select jsonb_pretty(
explain_filter_to_json('explain (analyze, verbose, buffers, format json)
select * from tenk1 order by tenthous')
-- remove "Workers" node of the Seq Scan plan node
#- '{0,Plan,Plans,0,Plans,0,Workers}'
-- remove "Workers" node of the Sort plan node
#- '{0,Plan,Plans,0,Workers}'
-- Also remove its sort-type fields, as those aren't 100% stable
#- '{0,Plan,Plans,0,Sort Method}'
#- '{0,Plan,Plans,0,Sort Space Type}'
);
jsonb_pretty
-------------------------------------------------------------
[ +
{ +
"Plan": { +
"Plans": [ +
{ +
"Plans": [ +
{ +
"Alias": "tenk1", +
"Output": [ +
"unique1", +
"unique2", +
"two", +
"four", +
"ten", +
"twenty", +
"hundred", +
"thousand", +
"twothousand", +
"fivethous", +
"tenthous", +
"odd", +
"even", +
"stringu1", +
"stringu2", +
"string4" +
], +
"Schema": "public", +
"Node Type": "Seq Scan", +
"Plan Rows": 0, +
"Plan Width": 0, +
"Total Cost": 0.0, +
"Actual Rows": 0, +
"Actual Loops": 0, +
"Startup Cost": 0.0, +
"Relation Name": "tenk1", +
"Parallel Aware": true, +
"Local Hit Blocks": 0, +
"Temp Read Blocks": 0, +
"Actual Total Time": 0.0, +
"Local Read Blocks": 0, +
"Shared Hit Blocks": 0, +
"Shared Read Blocks": 0, +
"Actual Startup Time": 0.0, +
"Parent Relationship": "Outer",+
"Temp Written Blocks": 0, +
"Local Dirtied Blocks": 0, +
"Local Written Blocks": 0, +
"Shared Dirtied Blocks": 0, +
"Shared Written Blocks": 0 +
} +
], +
"Output": [ +
"unique1", +
"unique2", +
"two", +
"four", +
"ten", +
"twenty", +
"hundred", +
"thousand", +
"twothousand", +
"fivethous", +
"tenthous", +
"odd", +
"even", +
"stringu1", +
"stringu2", +
"string4" +
], +
"Sort Key": [ +
"tenk1.tenthous" +
], +
"Node Type": "Sort", +
"Plan Rows": 0, +
"Plan Width": 0, +
"Total Cost": 0.0, +
"Actual Rows": 0, +
"Actual Loops": 0, +
"Startup Cost": 0.0, +
"Parallel Aware": false, +
"Sort Space Used": 0, +
"Local Hit Blocks": 0, +
"Temp Read Blocks": 0, +
"Actual Total Time": 0.0, +
"Local Read Blocks": 0, +
"Shared Hit Blocks": 0, +
"Shared Read Blocks": 0, +
"Actual Startup Time": 0.0, +
"Parent Relationship": "Outer", +
"Temp Written Blocks": 0, +
"Local Dirtied Blocks": 0, +
"Local Written Blocks": 0, +
"Shared Dirtied Blocks": 0, +
"Shared Written Blocks": 0 +
} +
], +
"Output": [ +
"unique1", +
"unique2", +
"two", +
"four", +
"ten", +
"twenty", +
"hundred", +
"thousand", +
"twothousand", +
"fivethous", +
"tenthous", +
"odd", +
"even", +
"stringu1", +
"stringu2", +
"string4" +
], +
"Node Type": "Gather Merge", +
"Plan Rows": 0, +
"Plan Width": 0, +
"Total Cost": 0.0, +
"Actual Rows": 0, +
"Actual Loops": 0, +
"Startup Cost": 0.0, +
"Parallel Aware": false, +
"Workers Planned": 0, +
"Local Hit Blocks": 0, +
"Temp Read Blocks": 0, +
"Workers Launched": 0, +
"Actual Total Time": 0.0, +
"Local Read Blocks": 0, +
"Shared Hit Blocks": 0, +
"Shared Read Blocks": 0, +
"Actual Startup Time": 0.0, +
"Temp Written Blocks": 0, +
"Local Dirtied Blocks": 0, +
"Local Written Blocks": 0, +
"Shared Dirtied Blocks": 0, +
"Shared Written Blocks": 0 +
}, +
"Triggers": [ +
], +
"Planning Time": 0.0, +
"Execution Time": 0.0 +
} +
]
(1 row)
rollback;

View File

@ -112,7 +112,7 @@ test: plancache limit plpgsql copy2 temp domain rangefuncs prepare conversion tr
# ----------
# Another group of parallel tests
# ----------
test: partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort
test: partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain
# event triggers cannot run concurrently with any test that runs DDL
test: event_trigger

View File

@ -193,6 +193,7 @@ test: indexing
test: partition_aggregate
test: partition_info
test: tuplesort
test: explain
test: event_trigger
test: fast_default
test: stats

View File

@ -0,0 +1,89 @@
--
-- EXPLAIN
--
-- There are many test cases elsewhere that use EXPLAIN as a vehicle for
-- checking something else (usually planner behavior). This file is
-- concerned with testing EXPLAIN in its own right.
--
-- To produce stable regression test output, it's usually necessary to
-- ignore details such as exact costs or row counts. These filter
-- functions replace changeable output details with fixed strings.
create function explain_filter(text) returns setof text
language plpgsql as
$$
declare
ln text;
begin
for ln in execute $1
loop
-- Replace any numeric word with just 'N'
ln := regexp_replace(ln, '\m\d+\M', 'N', 'g');
-- In sort output, the above won't match units-suffixed numbers
ln := regexp_replace(ln, '\m\d+kB', 'NkB', 'g');
-- Text-mode buffers output varies depending on the system state
ln := regexp_replace(ln, '^( +Buffers: shared)( hit=N)?( read=N)?', '\1 [read]');
return next ln;
end loop;
end;
$$;
-- To produce valid JSON output, replace numbers with "0" or "0.0" not "N"
create function explain_filter_to_json(text) returns jsonb
language plpgsql as
$$
declare
data text := '';
ln text;
begin
for ln in execute $1
loop
-- Replace any numeric word with just '0'
ln := regexp_replace(ln, '\m\d+\M', '0', 'g');
data := data || ln;
end loop;
return data::jsonb;
end;
$$;
-- Simple cases
select explain_filter('explain select * from int8_tbl i8');
select explain_filter('explain (analyze) select * from int8_tbl i8');
select explain_filter('explain (analyze, verbose) select * from int8_tbl i8');
select explain_filter('explain (analyze, buffers, format text) select * from int8_tbl i8');
select explain_filter('explain (analyze, buffers, format json) select * from int8_tbl i8');
select explain_filter('explain (analyze, buffers, format xml) select * from int8_tbl i8');
select explain_filter('explain (analyze, buffers, format yaml) select * from int8_tbl i8');
--
-- Test production of per-worker data
--
-- Unfortunately, because we don't know how many worker processes we'll
-- actually get (maybe none at all), we can't examine the "Workers" output
-- in any detail. We can check that it parses correctly as JSON, and then
-- remove it from the displayed results.
-- Serializable isolation would disable parallel query, so explicitly use an
-- arbitrary other level.
begin isolation level repeatable read;
-- encourage use of parallel plans
set parallel_setup_cost=0;
set parallel_tuple_cost=0;
set min_parallel_table_scan_size=0;
set max_parallel_workers_per_gather=4;
select jsonb_pretty(
explain_filter_to_json('explain (analyze, verbose, buffers, format json)
select * from tenk1 order by tenthous')
-- remove "Workers" node of the Seq Scan plan node
#- '{0,Plan,Plans,0,Plans,0,Workers}'
-- remove "Workers" node of the Sort plan node
#- '{0,Plan,Plans,0,Workers}'
-- Also remove its sort-type fields, as those aren't 100% stable
#- '{0,Plan,Plans,0,Sort Method}'
#- '{0,Plan,Plans,0,Sort Space Type}'
);
rollback;