Add new COPY option LOG_VERBOSITY.

This commit adds a new COPY option LOG_VERBOSITY, which controls the
amount of messages emitted during processing. Valid values are
'default' and 'verbose'.

This is currently used in COPY FROM when ON_ERROR option is set to
ignore. If 'verbose' is specified, a NOTICE message is emitted for
each discarded row, providing additional information such as line
number, column name, and the malformed value. This helps users to
identify problematic rows that failed to load.

Author: Bharath Rupireddy
Reviewed-by: Michael Paquier, Atsushi Torikoshi, Masahiko Sawada
Discussion: https://www.postgresql.org/message-id/CALj2ACUk700cYhx1ATRQyRw-fBM%2BaRo6auRAitKGff7XNmYfqQ%40mail.gmail.com
This commit is contained in:
Masahiko Sawada 2024-04-01 15:25:25 +09:00
parent f4ad0021af
commit f5a227895e
10 changed files with 175 additions and 11 deletions

View File

@ -45,6 +45,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
FORCE_NULL { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
ON_ERROR '<replaceable class="parameter">error_action</replaceable>'
ENCODING '<replaceable class="parameter">encoding_name</replaceable>'
LOG_VERBOSITY <replaceable class="parameter">mode</replaceable>
</synopsis>
</refsynopsisdiv>
@ -400,8 +401,12 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
when the <literal>FORMAT</literal> is <literal>text</literal> or <literal>csv</literal>.
</para>
<para>
A <literal>NOTICE</literal> message containing the ignored row count is emitted at the end
of the <command>COPY FROM</command> if at least one row was discarded.
A <literal>NOTICE</literal> message containing the ignored row count is
emitted at the end of the <command>COPY FROM</command> if at least one
row was discarded. When <literal>LOG_VERBOSITY</literal> option is set to
<literal>verbose</literal>, a <literal>NOTICE</literal> message
containing the line of the input file and the column name whose input
conversion has failed is emitted for each discarded row.
</para>
</listitem>
</varlistentry>
@ -418,6 +423,22 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
</listitem>
</varlistentry>
<varlistentry>
<term><literal>LOG_VERBOSITY</literal></term>
<listitem>
<para>
Specify the amount of messages emitted by a <command>COPY</command>
command: <literal>default</literal> or <literal>verbose</literal>. If
<literal>verbose</literal> is specified, additional messages are emitted
during processing.
</para>
<para>
This is currently used in <command>COPY FROM</command> command when
<literal>ON_ERROR</literal> option is set to <literal>ignore</literal>.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><literal>WHERE</literal></term>
<listitem>

View File

@ -422,6 +422,30 @@ defGetCopyOnErrorChoice(DefElem *def, ParseState *pstate, bool is_from)
return COPY_ON_ERROR_STOP; /* keep compiler quiet */
}
/*
* Extract a CopyLogVerbosityChoice value from a DefElem.
*/
static CopyLogVerbosityChoice
defGetCopyLogVerbosityChoice(DefElem *def, ParseState *pstate)
{
char *sval;
/*
* Allow "default", or "verbose" values.
*/
sval = defGetString(def);
if (pg_strcasecmp(sval, "default") == 0)
return COPY_LOG_VERBOSITY_DEFAULT;
if (pg_strcasecmp(sval, "verbose") == 0)
return COPY_LOG_VERBOSITY_VERBOSE;
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("COPY LOG_VERBOSITY \"%s\" not recognized", sval),
parser_errposition(pstate, def->location)));
return COPY_LOG_VERBOSITY_DEFAULT; /* keep compiler quiet */
}
/*
* Process the statement option list for COPY.
*
@ -448,6 +472,7 @@ ProcessCopyOptions(ParseState *pstate,
bool freeze_specified = false;
bool header_specified = false;
bool on_error_specified = false;
bool log_verbosity_specified = false;
ListCell *option;
/* Support external use for option sanity checking */
@ -607,6 +632,13 @@ ProcessCopyOptions(ParseState *pstate,
on_error_specified = true;
opts_out->on_error = defGetCopyOnErrorChoice(defel, pstate, is_from);
}
else if (strcmp(defel->defname, "log_verbosity") == 0)
{
if (log_verbosity_specified)
errorConflictingDefElem(defel, pstate);
log_verbosity_specified = true;
opts_out->log_verbosity = defGetCopyLogVerbosityChoice(defel, pstate);
}
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),

View File

@ -101,8 +101,6 @@ typedef struct CopyMultiInsertInfo
/* non-export function prototypes */
static char *limit_printout_length(const char *str);
static void ClosePipeFromProgram(CopyFromState cstate);
/*
@ -141,7 +139,7 @@ CopyFromErrorCallback(void *arg)
/* error is relevant to a particular column */
char *attval;
attval = limit_printout_length(cstate->cur_attval);
attval = CopyLimitPrintoutLength(cstate->cur_attval);
errcontext("COPY %s, line %llu, column %s: \"%s\"",
cstate->cur_relname,
(unsigned long long) cstate->cur_lineno,
@ -168,7 +166,7 @@ CopyFromErrorCallback(void *arg)
{
char *lineval;
lineval = limit_printout_length(cstate->line_buf.data);
lineval = CopyLimitPrintoutLength(cstate->line_buf.data);
errcontext("COPY %s, line %llu: \"%s\"",
cstate->cur_relname,
(unsigned long long) cstate->cur_lineno, lineval);
@ -189,8 +187,8 @@ CopyFromErrorCallback(void *arg)
*
* Returns a pstrdup'd copy of the input.
*/
static char *
limit_printout_length(const char *str)
char *
CopyLimitPrintoutLength(const char *str)
{
#define MAX_COPY_DATA_DISPLAY 100

View File

@ -967,7 +967,42 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
(Node *) cstate->escontext,
&values[m]))
{
Assert(cstate->opts.on_error != COPY_ON_ERROR_STOP);
cstate->num_errors++;
if (cstate->opts.log_verbosity == COPY_LOG_VERBOSITY_VERBOSE)
{
/*
* Since we emit line number and column info in the below
* notice message, we suppress error context information
* other than the relation name.
*/
Assert(!cstate->relname_only);
cstate->relname_only = true;
if (cstate->cur_attval)
{
char *attval;
attval = CopyLimitPrintoutLength(cstate->cur_attval);
ereport(NOTICE,
errmsg("skipping row due to data type incompatibility at line %llu for column %s: \"%s\"",
(unsigned long long) cstate->cur_lineno,
cstate->cur_attname,
attval));
pfree(attval);
}
else
ereport(NOTICE,
errmsg("skipping row due to data type incompatibility at line %llu for column %s: null input",
(unsigned long long) cstate->cur_lineno,
cstate->cur_attname));
/* reset relname_only */
cstate->relname_only = false;
}
return true;
}

View File

@ -3530,6 +3530,7 @@ copy_generic_opt_arg:
opt_boolean_or_string { $$ = (Node *) makeString($1); }
| NumericOnly { $$ = (Node *) $1; }
| '*' { $$ = (Node *) makeNode(A_Star); }
| DEFAULT { $$ = (Node *) makeString("default"); }
| '(' copy_generic_opt_arg_list ')' { $$ = (Node *) $2; }
| /* EMPTY */ { $$ = NULL; }
;

View File

@ -2904,7 +2904,7 @@ psql_completion(const char *text, int start, int end)
COMPLETE_WITH("FORMAT", "FREEZE", "DELIMITER", "NULL",
"HEADER", "QUOTE", "ESCAPE", "FORCE_QUOTE",
"FORCE_NOT_NULL", "FORCE_NULL", "ENCODING", "DEFAULT",
"ON_ERROR");
"ON_ERROR", "LOG_VERBOSITY");
/* Complete COPY <sth> FROM|TO filename WITH (FORMAT */
else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "(", "FORMAT"))
@ -2914,6 +2914,10 @@ psql_completion(const char *text, int start, int end)
else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "(", "ON_ERROR"))
COMPLETE_WITH("stop", "ignore");
/* Complete COPY <sth> FROM filename WITH (LOG_VERBOSITY */
else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "(", "LOG_VERBOSITY"))
COMPLETE_WITH("default", "verbose");
/* Complete COPY <sth> FROM <sth> WITH (<options>) */
else if (Matches("COPY|\\copy", MatchAny, "FROM", MatchAny, "WITH", MatchAny))
COMPLETE_WITH("WHERE");

View File

@ -40,6 +40,15 @@ typedef enum CopyOnErrorChoice
COPY_ON_ERROR_IGNORE, /* ignore errors */
} CopyOnErrorChoice;
/*
* Represents verbosity of logged messages by COPY command.
*/
typedef enum CopyLogVerbosityChoice
{
COPY_LOG_VERBOSITY_DEFAULT = 0, /* logs no additional messages, default */
COPY_LOG_VERBOSITY_VERBOSE, /* logs additional messages */
} CopyLogVerbosityChoice;
/*
* A struct to hold COPY options, in a parsed form. All of these are related
* to formatting, except for 'freeze', which doesn't really belong here, but
@ -73,6 +82,7 @@ typedef struct CopyFormatOptions
bool *force_null_flags; /* per-column CSV FN flags */
bool convert_selectively; /* do selective binary conversion? */
CopyOnErrorChoice on_error; /* what to do when error happened */
CopyLogVerbosityChoice log_verbosity; /* verbosity of logged messages */
List *convert_select; /* list of column names (can be NIL) */
} CopyFormatOptions;
@ -97,6 +107,7 @@ extern bool NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
extern bool NextCopyFromRawFields(CopyFromState cstate,
char ***fields, int *nfields);
extern void CopyFromErrorCallback(void *arg);
extern char *CopyLimitPrintoutLength(const char *str);
extern uint64 CopyFrom(CopyFromState cstate);

View File

@ -81,6 +81,10 @@ COPY x from stdin (on_error ignore, on_error ignore);
ERROR: conflicting or redundant options
LINE 1: COPY x from stdin (on_error ignore, on_error ignore);
^
COPY x from stdin (log_verbosity default, log_verbosity verbose);
ERROR: conflicting or redundant options
LINE 1: COPY x from stdin (log_verbosity default, log_verbosity verb...
^
-- incorrect options
COPY x to stdin (format BINARY, delimiter ',');
ERROR: cannot specify DELIMITER in BINARY mode
@ -108,6 +112,10 @@ COPY x to stdin (format BINARY, on_error unsupported);
ERROR: COPY ON_ERROR cannot be used with COPY TO
LINE 1: COPY x to stdin (format BINARY, on_error unsupported);
^
COPY x to stdout (log_verbosity unsupported);
ERROR: COPY LOG_VERBOSITY "unsupported" not recognized
LINE 1: COPY x to stdout (log_verbosity unsupported);
^
-- too many columns in column list: should fail
COPY x (a, b, c, d, e, d, c) from stdin;
ERROR: column "d" specified more than once
@ -729,8 +737,31 @@ CREATE TABLE check_ign_err (n int, m int[], k int);
COPY check_ign_err FROM STDIN WITH (on_error stop);
ERROR: invalid input syntax for type integer: "a"
CONTEXT: COPY check_ign_err, line 2, column n: "a"
COPY check_ign_err FROM STDIN WITH (on_error ignore);
-- want context for notices
\set SHOW_CONTEXT always
COPY check_ign_err FROM STDIN WITH (on_error ignore, log_verbosity verbose);
NOTICE: skipping row due to data type incompatibility at line 2 for column n: "a"
CONTEXT: COPY check_ign_err
NOTICE: skipping row due to data type incompatibility at line 3 for column k: "3333333333"
CONTEXT: COPY check_ign_err
NOTICE: skipping row due to data type incompatibility at line 4 for column m: "{a, 4}"
CONTEXT: COPY check_ign_err
NOTICE: skipping row due to data type incompatibility at line 5 for column n: ""
CONTEXT: COPY check_ign_err
NOTICE: skipping row due to data type incompatibility at line 7 for column m: "a"
CONTEXT: COPY check_ign_err
NOTICE: skipping row due to data type incompatibility at line 8 for column k: "a"
CONTEXT: COPY check_ign_err
NOTICE: 6 rows were skipped due to data type incompatibility
-- tests for on_error option with log_verbosity and null constraint via domain
CREATE DOMAIN dcheck_ign_err2 varchar(15) NOT NULL;
CREATE TABLE check_ign_err2 (n int, m int[], k int, l dcheck_ign_err2);
COPY check_ign_err2 FROM STDIN WITH (on_error ignore, log_verbosity verbose);
NOTICE: skipping row due to data type incompatibility at line 2 for column l: null input
CONTEXT: COPY check_ign_err2
NOTICE: 1 row was skipped due to data type incompatibility
-- reset context choice
\set SHOW_CONTEXT errors
SELECT * FROM check_ign_err;
n | m | k
---+-----+---
@ -739,6 +770,12 @@ SELECT * FROM check_ign_err;
8 | {8} | 8
(3 rows)
SELECT * FROM check_ign_err2;
n | m | k | l
---+-----+---+-------
1 | {1} | 1 | 'foo'
(1 row)
-- test datatype error that can't be handled as soft: should fail
CREATE TABLE hard_err(foo widget);
COPY hard_err FROM STDIN WITH (on_error ignore);
@ -767,6 +804,8 @@ DROP VIEW instead_of_insert_tbl_view;
DROP VIEW instead_of_insert_tbl_view_2;
DROP FUNCTION fun_instead_of_insert_tbl();
DROP TABLE check_ign_err;
DROP TABLE check_ign_err2;
DROP DOMAIN dcheck_ign_err2;
DROP TABLE hard_err;
--
-- COPY FROM ... DEFAULT

View File

@ -67,6 +67,7 @@ COPY x from stdin (force_null (a), force_null (b));
COPY x from stdin (convert_selectively (a), convert_selectively (b));
COPY x from stdin (encoding 'sql_ascii', encoding 'sql_ascii');
COPY x from stdin (on_error ignore, on_error ignore);
COPY x from stdin (log_verbosity default, log_verbosity verbose);
-- incorrect options
COPY x to stdin (format BINARY, delimiter ',');
@ -80,6 +81,7 @@ COPY x to stdin (format CSV, force_not_null(a));
COPY x to stdout (format TEXT, force_null(a));
COPY x to stdin (format CSV, force_null(a));
COPY x to stdin (format BINARY, on_error unsupported);
COPY x to stdout (log_verbosity unsupported);
-- too many columns in column list: should fail
COPY x (a, b, c, d, e, d, c) from stdin;
@ -508,7 +510,11 @@ a {2} 2
5 {5} 5
\.
COPY check_ign_err FROM STDIN WITH (on_error ignore);
-- want context for notices
\set SHOW_CONTEXT always
COPY check_ign_err FROM STDIN WITH (on_error ignore, log_verbosity verbose);
1 {1} 1
a {2} 2
3 {3} 3333333333
@ -519,8 +525,22 @@ a {2} 2
7 {7} a
8 {8} 8
\.
-- tests for on_error option with log_verbosity and null constraint via domain
CREATE DOMAIN dcheck_ign_err2 varchar(15) NOT NULL;
CREATE TABLE check_ign_err2 (n int, m int[], k int, l dcheck_ign_err2);
COPY check_ign_err2 FROM STDIN WITH (on_error ignore, log_verbosity verbose);
1 {1} 1 'foo'
2 {2} 2 \N
\.
-- reset context choice
\set SHOW_CONTEXT errors
SELECT * FROM check_ign_err;
SELECT * FROM check_ign_err2;
-- test datatype error that can't be handled as soft: should fail
CREATE TABLE hard_err(foo widget);
COPY hard_err FROM STDIN WITH (on_error ignore);
@ -552,6 +572,8 @@ DROP VIEW instead_of_insert_tbl_view;
DROP VIEW instead_of_insert_tbl_view_2;
DROP FUNCTION fun_instead_of_insert_tbl();
DROP TABLE check_ign_err;
DROP TABLE check_ign_err2;
DROP DOMAIN dcheck_ign_err2;
DROP TABLE hard_err;
--

View File

@ -480,6 +480,7 @@ CopyFromState
CopyFromStateData
CopyHeaderChoice
CopyInsertMethod
CopyLogVerbosityChoice
CopyMultiInsertBuffer
CopyMultiInsertInfo
CopyOnErrorChoice