Add contrib/file_fdw foreign-data wrapper for reading files via COPY.

This is both very useful in its own right, and an important test case
for the core FDW support.

This commit includes a small refactoring of copy.c to expose its option
checking code as a separately callable function.  The original patch
submission duplicated hundreds of lines of that code, which seemed pretty
unmaintainable.

Shigeru Hanada, reviewed by Itagaki Takahiro and Tom Lane
This commit is contained in:
Tom Lane 2011-02-20 14:06:59 -05:00
parent bb74240794
commit 7c5d0ae707
19 changed files with 1171 additions and 55 deletions

View File

@ -18,6 +18,7 @@ SUBDIRS = \
dict_xsyn \
dummy_seclabel \
earthdistance \
file_fdw \
fuzzystrmatch \
hstore \
intagg \

View File

@ -73,6 +73,10 @@ earthdistance -
Functions for computing distances between two points on Earth
by Bruno Wolff III <bruno@wolff.to> and Hal Snyder <hal@vailsys.com>
file_fdw
Foreign-data wrapper for server-side CSV/TEXT files
by Shigeru Hanada <hanada@metrosystems.co.jp>
fuzzystrmatch -
Levenshtein, metaphone, and soundex fuzzy string matching
by Joe Conway <mail@joeconway.com> and Joel Burton <jburton@scw.org>

2
contrib/file_fdw/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
# Generated subdirectories
/results/

21
contrib/file_fdw/Makefile Normal file
View File

@ -0,0 +1,21 @@
# contrib/file_fdw/Makefile
MODULES = file_fdw
EXTENSION = file_fdw
DATA = file_fdw--1.0.sql
REGRESS = file_fdw
EXTRA_CLEAN = sql/file_fdw.sql expected/file_fdw.out
ifdef USE_PGXS
PG_CONFIG = pg_config
PGXS := $(shell $(PG_CONFIG) --pgxs)
include $(PGXS)
else
subdir = contrib/file_fdw
top_builddir = ../..
include $(top_builddir)/src/Makefile.global
include $(top_srcdir)/contrib/contrib-global.mk
endif

View File

@ -0,0 +1,4 @@
56;@7.8@
100;@99.097@
0;@aaa@
42;@324.78@

View File

@ -0,0 +1,4 @@
56;@7.8@
100;@99.097@
0;@0.09561@
42;@324.78@
1 56; 7.8
2 100; 99.097
3 0; 0.09561
4 42; 324.78

View File

@ -0,0 +1,4 @@
56 7.8
100 99.097
0 0.09561
42 324.78

1
contrib/file_fdw/expected/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/file_fdw.out

View File

@ -0,0 +1,15 @@
/* contrib/file_fdw/file_fdw--1.0.sql */
CREATE FUNCTION file_fdw_handler()
RETURNS fdw_handler
AS 'MODULE_PATHNAME'
LANGUAGE C STRICT;
CREATE FUNCTION file_fdw_validator(text[], oid)
RETURNS void
AS 'MODULE_PATHNAME'
LANGUAGE C STRICT;
CREATE FOREIGN DATA WRAPPER file_fdw
HANDLER file_fdw_handler
VALIDATOR file_fdw_validator;

536
contrib/file_fdw/file_fdw.c Normal file
View File

@ -0,0 +1,536 @@
/*-------------------------------------------------------------------------
*
* file_fdw.c
* foreign-data wrapper for server-side flat files.
*
* Copyright (c) 2010-2011, PostgreSQL Global Development Group
*
* IDENTIFICATION
* contrib/file_fdw/file_fdw.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <sys/stat.h>
#include <unistd.h>
#include "access/reloptions.h"
#include "catalog/pg_foreign_table.h"
#include "commands/copy.h"
#include "commands/defrem.h"
#include "commands/explain.h"
#include "foreign/fdwapi.h"
#include "foreign/foreign.h"
#include "miscadmin.h"
#include "optimizer/cost.h"
PG_MODULE_MAGIC;
/*
* Describes the valid options for objects that use this wrapper.
*/
struct FileFdwOption
{
const char *optname;
Oid optcontext; /* Oid of catalog in which option may appear */
};
/*
* Valid options for file_fdw.
* These options are based on the options for COPY FROM command.
*
* Note: If you are adding new option for user mapping, you need to modify
* fileGetOptions(), which currently doesn't bother to look at user mappings.
*/
static struct FileFdwOption valid_options[] = {
/* File options */
{ "filename", ForeignTableRelationId },
/* Format options */
/* oids option is not supported */
{ "format", ForeignTableRelationId },
{ "header", ForeignTableRelationId },
{ "delimiter", ForeignTableRelationId },
{ "quote", ForeignTableRelationId },
{ "escape", ForeignTableRelationId },
{ "null", ForeignTableRelationId },
/*
* force_quote is not supported by file_fdw because it's for COPY TO.
*/
/*
* force_not_null is not supported by file_fdw. It would need a parser
* for list of columns, not to mention a way to check the column list
* against the table.
*/
/* Sentinel */
{ NULL, InvalidOid }
};
/*
* FDW-specific information for ForeignScanState.fdw_state.
*/
typedef struct FileFdwExecutionState
{
char *filename; /* file to read */
List *options; /* merged COPY options, excluding filename */
CopyState cstate; /* state of reading file */
} FileFdwExecutionState;
/*
* SQL functions
*/
extern Datum file_fdw_handler(PG_FUNCTION_ARGS);
extern Datum file_fdw_validator(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(file_fdw_handler);
PG_FUNCTION_INFO_V1(file_fdw_validator);
/*
* FDW callback routines
*/
static FdwPlan *filePlanForeignScan(Oid foreigntableid,
PlannerInfo *root,
RelOptInfo *baserel);
static void fileExplainForeignScan(ForeignScanState *node, ExplainState *es);
static void fileBeginForeignScan(ForeignScanState *node, int eflags);
static TupleTableSlot *fileIterateForeignScan(ForeignScanState *node);
static void fileReScanForeignScan(ForeignScanState *node);
static void fileEndForeignScan(ForeignScanState *node);
/*
* Helper functions
*/
static bool is_valid_option(const char *option, Oid context);
static void fileGetOptions(Oid foreigntableid,
char **filename, List **other_options);
static void estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
const char *filename,
Cost *startup_cost, Cost *total_cost);
/*
* Foreign-data wrapper handler function: return a struct with pointers
* to my callback routines.
*/
Datum
file_fdw_handler(PG_FUNCTION_ARGS)
{
FdwRoutine *fdwroutine = makeNode(FdwRoutine);
fdwroutine->PlanForeignScan = filePlanForeignScan;
fdwroutine->ExplainForeignScan = fileExplainForeignScan;
fdwroutine->BeginForeignScan = fileBeginForeignScan;
fdwroutine->IterateForeignScan = fileIterateForeignScan;
fdwroutine->ReScanForeignScan = fileReScanForeignScan;
fdwroutine->EndForeignScan = fileEndForeignScan;
PG_RETURN_POINTER(fdwroutine);
}
/*
* Validate the generic options given to a FOREIGN DATA WRAPPER, SERVER,
* USER MAPPING or FOREIGN TABLE that uses file_fdw.
*
* Raise an ERROR if the option or its value is considered invalid.
*/
Datum
file_fdw_validator(PG_FUNCTION_ARGS)
{
List *options_list = untransformRelOptions(PG_GETARG_DATUM(0));
Oid catalog = PG_GETARG_OID(1);
char *filename = NULL;
List *other_options = NIL;
ListCell *cell;
/*
* Only superusers are allowed to set options of a file_fdw foreign table.
* This is because the filename is one of those options, and we don't
* want non-superusers to be able to determine which file gets read.
*
* Putting this sort of permissions check in a validator is a bit of a
* crock, but there doesn't seem to be any other place that can enforce
* the check more cleanly.
*
* Note that the valid_options[] array disallows setting filename at
* any options level other than foreign table --- otherwise there'd
* still be a security hole.
*/
if (catalog == ForeignTableRelationId && !superuser())
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("only superuser can change options of a file_fdw foreign table")));
/*
* Check that only options supported by file_fdw, and allowed for the
* current object type, are given.
*/
foreach(cell, options_list)
{
DefElem *def = (DefElem *) lfirst(cell);
if (!is_valid_option(def->defname, catalog))
{
struct FileFdwOption *opt;
StringInfoData buf;
/*
* Unknown option specified, complain about it. Provide a hint
* with list of valid options for the object.
*/
initStringInfo(&buf);
for (opt = valid_options; opt->optname; opt++)
{
if (catalog == opt->optcontext)
appendStringInfo(&buf, "%s%s", (buf.len > 0) ? ", " : "",
opt->optname);
}
ereport(ERROR,
(errcode(ERRCODE_FDW_INVALID_OPTION_NAME),
errmsg("invalid option \"%s\"", def->defname),
errhint("Valid options in this context are: %s",
buf.data)));
}
/* Separate out filename, since ProcessCopyOptions won't allow it */
if (strcmp(def->defname, "filename") == 0)
{
if (filename)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("conflicting or redundant options")));
filename = defGetString(def);
}
else
other_options = lappend(other_options, def);
}
/*
* Now apply the core COPY code's validation logic for more checks.
*/
ProcessCopyOptions(NULL, true, other_options);
PG_RETURN_VOID();
}
/*
* Check if the provided option is one of the valid options.
* context is the Oid of the catalog holding the object the option is for.
*/
static bool
is_valid_option(const char *option, Oid context)
{
struct FileFdwOption *opt;
for (opt = valid_options; opt->optname; opt++)
{
if (context == opt->optcontext && strcmp(opt->optname, option) == 0)
return true;
}
return false;
}
/*
* Fetch the options for a file_fdw foreign table.
*
* We have to separate out "filename" from the other options because
* it must not appear in the options list passed to the core COPY code.
*/
static void
fileGetOptions(Oid foreigntableid,
char **filename, List **other_options)
{
ForeignTable *table;
ForeignServer *server;
ForeignDataWrapper *wrapper;
List *options;
ListCell *lc,
*prev;
/*
* Extract options from FDW objects. We ignore user mappings because
* file_fdw doesn't have any options that can be specified there.
*
* (XXX Actually, given the current contents of valid_options[], there's
* no point in examining anything except the foreign table's own options.
* Simplify?)
*/
table = GetForeignTable(foreigntableid);
server = GetForeignServer(table->serverid);
wrapper = GetForeignDataWrapper(server->fdwid);
options = NIL;
options = list_concat(options, wrapper->options);
options = list_concat(options, server->options);
options = list_concat(options, table->options);
/*
* Separate out the filename.
*/
*filename = NULL;
prev = NULL;
foreach(lc, options)
{
DefElem *def = (DefElem *) lfirst(lc);
if (strcmp(def->defname, "filename") == 0)
{
*filename = defGetString(def);
options = list_delete_cell(options, lc, prev);
break;
}
prev = lc;
}
if (*filename == NULL)
ereport(ERROR,
(errcode(ERRCODE_FDW_UNABLE_TO_CREATE_REPLY),
errmsg("filename is required for file_fdw foreign tables")));
*other_options = options;
}
/*
* filePlanForeignScan
* Create a FdwPlan for a scan on the foreign table
*/
static FdwPlan *
filePlanForeignScan(Oid foreigntableid,
PlannerInfo *root,
RelOptInfo *baserel)
{
FdwPlan *fdwplan;
char *filename;
List *options;
/* Fetch options --- we only need filename at this point */
fileGetOptions(foreigntableid, &filename, &options);
/* Construct FdwPlan with cost estimates */
fdwplan = makeNode(FdwPlan);
estimate_costs(root, baserel, filename,
&fdwplan->startup_cost, &fdwplan->total_cost);
fdwplan->fdw_private = NIL; /* not used */
return fdwplan;
}
/*
* fileExplainForeignScan
* Produce extra output for EXPLAIN
*/
static void
fileExplainForeignScan(ForeignScanState *node, ExplainState *es)
{
char *filename;
List *options;
/* Fetch options --- we only need filename at this point */
fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation),
&filename, &options);
ExplainPropertyText("Foreign File", filename, es);
/* Suppress file size if we're not showing cost details */
if (es->costs)
{
struct stat stat_buf;
if (stat(filename, &stat_buf) == 0)
ExplainPropertyLong("Foreign File Size", (long) stat_buf.st_size,
es);
}
}
/*
* fileBeginForeignScan
* Initiate access to the file by creating CopyState
*/
static void
fileBeginForeignScan(ForeignScanState *node, int eflags)
{
char *filename;
List *options;
CopyState cstate;
FileFdwExecutionState *festate;
/*
* Do nothing in EXPLAIN (no ANALYZE) case. node->fdw_state stays NULL.
*/
if (eflags & EXEC_FLAG_EXPLAIN_ONLY)
return;
/* Fetch options of foreign table */
fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation),
&filename, &options);
/*
* Create CopyState from FDW options. We always acquire all columns,
* so as to match the expected ScanTupleSlot signature.
*/
cstate = BeginCopyFrom(node->ss.ss_currentRelation,
filename,
NIL,
options);
/*
* Save state in node->fdw_state. We must save enough information to call
* BeginCopyFrom() again.
*/
festate = (FileFdwExecutionState *) palloc(sizeof(FileFdwExecutionState));
festate->filename = filename;
festate->options = options;
festate->cstate = cstate;
node->fdw_state = (void *) festate;
}
/*
* fileIterateForeignScan
* Read next record from the data file and store it into the
* ScanTupleSlot as a virtual tuple
*/
static TupleTableSlot *
fileIterateForeignScan(ForeignScanState *node)
{
FileFdwExecutionState *festate = (FileFdwExecutionState *) node->fdw_state;
TupleTableSlot *slot = node->ss.ss_ScanTupleSlot;
bool found;
ErrorContextCallback errcontext;
/* Set up callback to identify error line number. */
errcontext.callback = CopyFromErrorCallback;
errcontext.arg = (void *) festate->cstate;
errcontext.previous = error_context_stack;
error_context_stack = &errcontext;
/*
* The protocol for loading a virtual tuple into a slot is first
* ExecClearTuple, then fill the values/isnull arrays, then
* ExecStoreVirtualTuple. If we don't find another row in the file,
* we just skip the last step, leaving the slot empty as required.
*
* We can pass ExprContext = NULL because we read all columns from the
* file, so no need to evaluate default expressions.
*
* We can also pass tupleOid = NULL because we don't allow oids for
* foreign tables.
*/
ExecClearTuple(slot);
found = NextCopyFrom(festate->cstate, NULL,
slot->tts_values, slot->tts_isnull,
NULL);
if (found)
ExecStoreVirtualTuple(slot);
/* Remove error callback. */
error_context_stack = errcontext.previous;
return slot;
}
/*
* fileEndForeignScan
* Finish scanning foreign table and dispose objects used for this scan
*/
static void
fileEndForeignScan(ForeignScanState *node)
{
FileFdwExecutionState *festate = (FileFdwExecutionState *) node->fdw_state;
/* if festate is NULL, we are in EXPLAIN; nothing to do */
if (festate)
EndCopyFrom(festate->cstate);
}
/*
* fileReScanForeignScan
* Rescan table, possibly with new parameters
*/
static void
fileReScanForeignScan(ForeignScanState *node)
{
FileFdwExecutionState *festate = (FileFdwExecutionState *) node->fdw_state;
EndCopyFrom(festate->cstate);
festate->cstate = BeginCopyFrom(node->ss.ss_currentRelation,
festate->filename,
NIL,
festate->options);
}
/*
* Estimate costs of scanning a foreign table.
*/
static void
estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
const char *filename,
Cost *startup_cost, Cost *total_cost)
{
struct stat stat_buf;
BlockNumber pages;
int tuple_width;
double ntuples;
double nrows;
Cost run_cost = 0;
Cost cpu_per_tuple;
/*
* Get size of the file. It might not be there at plan time, though,
* in which case we have to use a default estimate.
*/
if (stat(filename, &stat_buf) < 0)
stat_buf.st_size = 10 * BLCKSZ;
/*
* Convert size to pages for use in I/O cost estimate below.
*/
pages = (stat_buf.st_size + (BLCKSZ-1)) / BLCKSZ;
if (pages < 1)
pages = 1;
/*
* Estimate the number of tuples in the file. We back into this estimate
* using the planner's idea of the relation width; which is bogus if not
* all columns are being read, not to mention that the text representation
* of a row probably isn't the same size as its internal representation.
* FIXME later.
*/
tuple_width = MAXALIGN(baserel->width) + MAXALIGN(sizeof(HeapTupleHeaderData));
ntuples = clamp_row_est((double) stat_buf.st_size / (double) tuple_width);
/*
* Now estimate the number of rows returned by the scan after applying
* the baserestrictinfo quals. This is pretty bogus too, since the
* planner will have no stats about the relation, but it's better than
* nothing.
*/
nrows = ntuples *
clauselist_selectivity(root,
baserel->baserestrictinfo,
0,
JOIN_INNER,
NULL);
nrows = clamp_row_est(nrows);
/* Save the output-rows estimate for the planner */
baserel->rows = nrows;
/*
* Now estimate costs. We estimate costs almost the same way as
* cost_seqscan(), thus assuming that I/O costs are equivalent to a
* regular table file of the same size. However, we take per-tuple CPU
* costs as 10x of a seqscan, to account for the cost of parsing records.
*/
run_cost += seq_page_cost * pages;
*startup_cost = baserel->baserestrictcost.startup;
cpu_per_tuple = cpu_tuple_cost * 10 + baserel->baserestrictcost.per_tuple;
run_cost += cpu_per_tuple * ntuples;
*total_cost = *startup_cost + run_cost;
}

View File

@ -0,0 +1,5 @@
# file_fdw extension
comment = 'foreign-data wrapper for flat file access'
default_version = '1.0'
module_pathname = '$libdir/file_fdw'
relocatable = true

View File

@ -0,0 +1,131 @@
--
-- Test foreign-data wrapper file_fdw.
--
-- Clean up in case a prior regression run failed
SET client_min_messages TO 'error';
DROP ROLE IF EXISTS file_fdw_superuser, file_fdw_user, no_priv_user;
RESET client_min_messages;
CREATE ROLE file_fdw_superuser LOGIN SUPERUSER; -- is a superuser
CREATE ROLE file_fdw_user LOGIN; -- has priv and user mapping
CREATE ROLE no_priv_user LOGIN; -- has priv but no user mapping
-- Install file_fdw
CREATE EXTENSION file_fdw;
-- file_fdw_superuser owns fdw-related objects
SET ROLE file_fdw_superuser;
CREATE SERVER file_server FOREIGN DATA WRAPPER file_fdw;
-- privilege tests
SET ROLE file_fdw_user;
CREATE FOREIGN DATA WRAPPER file_fdw2 HANDLER file_fdw_handler VALIDATOR file_fdw_validator; -- ERROR
CREATE SERVER file_server2 FOREIGN DATA WRAPPER file_fdw; -- ERROR
CREATE USER MAPPING FOR file_fdw_user SERVER file_server; -- ERROR
SET ROLE file_fdw_superuser;
GRANT USAGE ON FOREIGN SERVER file_server TO file_fdw_user;
SET ROLE file_fdw_user;
CREATE USER MAPPING FOR file_fdw_user SERVER file_server;
-- create user mappings and grant privilege to test users
SET ROLE file_fdw_superuser;
CREATE USER MAPPING FOR file_fdw_superuser SERVER file_server;
CREATE USER MAPPING FOR no_priv_user SERVER file_server;
-- validator tests
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'xml'); -- ERROR
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', header 'true'); -- ERROR
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', quote ':'); -- ERROR
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', escape ':'); -- ERROR
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'binary', header 'true'); -- ERROR
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'binary', quote ':'); -- ERROR
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'binary', escape ':'); -- ERROR
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', delimiter 'a'); -- ERROR
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', escape '-'); -- ERROR
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', quote '-', null '=-='); -- ERROR
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', delimiter '-', null '=-='); -- ERROR
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', delimiter '-', quote '-'); -- ERROR
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', delimiter '---'); -- ERROR
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', quote '---'); -- ERROR
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', escape '---'); -- ERROR
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', delimiter '\'); -- ERROR
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', delimiter '.'); -- ERROR
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', delimiter '1'); -- ERROR
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', delimiter 'a'); -- ERROR
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', delimiter '
'); -- ERROR
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', null '
'); -- ERROR
CREATE FOREIGN TABLE agg_text (
a int2,
b float4
) SERVER file_server
OPTIONS (format 'text', filename '@abs_srcdir@/data/agg.data', delimiter ' ', null '\N');
GRANT SELECT ON agg_text TO file_fdw_user;
CREATE FOREIGN TABLE agg_csv (
a int2,
b float4
) SERVER file_server
OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.csv', header 'true', delimiter ';', quote '@', escape '"', null '');
CREATE FOREIGN TABLE agg_bad (
a int2,
b float4
) SERVER file_server
OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.bad', header 'true', delimiter ';', quote '@', escape '"', null '');
-- basic query tests
SELECT * FROM agg_text WHERE b > 10.0 ORDER BY a;
SELECT * FROM agg_csv ORDER BY a;
SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a;
-- error context report tests
SELECT * FROM agg_bad; -- ERROR
-- misc query tests
\t on
EXPLAIN (VERBOSE, COSTS FALSE) SELECT * FROM agg_csv;
\t off
PREPARE st(int) AS SELECT * FROM agg_csv WHERE a = $1;
EXECUTE st(100);
EXECUTE st(100);
DEALLOCATE st;
-- tableoid
SELECT tableoid::regclass, b FROM agg_csv;
-- updates aren't supported
INSERT INTO agg_csv VALUES(1,2.0);
UPDATE agg_csv SET a = 1;
DELETE FROM agg_csv WHERE a = 100;
SELECT * FROM agg_csv FOR UPDATE OF agg_csv;
-- but this should be ignored
SELECT * FROM agg_csv FOR UPDATE;
-- privilege tests
SET ROLE file_fdw_superuser;
SELECT * FROM agg_text ORDER BY a;
SET ROLE file_fdw_user;
SELECT * FROM agg_text ORDER BY a;
SET ROLE no_priv_user;
SELECT * FROM agg_text ORDER BY a; -- ERROR
SET ROLE file_fdw_user;
\t on
EXPLAIN (VERBOSE, COSTS FALSE) SELECT * FROM agg_text WHERE a > 0;
\t off
-- privilege tests for object
SET ROLE file_fdw_superuser;
ALTER FOREIGN TABLE agg_text OWNER TO file_fdw_user;
ALTER FOREIGN TABLE agg_text OPTIONS (SET format 'text');
SET ROLE file_fdw_user;
ALTER FOREIGN TABLE agg_text OPTIONS (SET format 'text');
SET ROLE file_fdw_superuser;
-- cleanup
RESET ROLE;
DROP EXTENSION file_fdw CASCADE;
DROP ROLE file_fdw_superuser, file_fdw_user, no_priv_user;

View File

@ -0,0 +1,225 @@
--
-- Test foreign-data wrapper file_fdw.
--
-- Clean up in case a prior regression run failed
SET client_min_messages TO 'error';
DROP ROLE IF EXISTS file_fdw_superuser, file_fdw_user, no_priv_user;
RESET client_min_messages;
CREATE ROLE file_fdw_superuser LOGIN SUPERUSER; -- is a superuser
CREATE ROLE file_fdw_user LOGIN; -- has priv and user mapping
CREATE ROLE no_priv_user LOGIN; -- has priv but no user mapping
-- Install file_fdw
CREATE EXTENSION file_fdw;
-- file_fdw_superuser owns fdw-related objects
SET ROLE file_fdw_superuser;
CREATE SERVER file_server FOREIGN DATA WRAPPER file_fdw;
-- privilege tests
SET ROLE file_fdw_user;
CREATE FOREIGN DATA WRAPPER file_fdw2 HANDLER file_fdw_handler VALIDATOR file_fdw_validator; -- ERROR
ERROR: permission denied to create foreign-data wrapper "file_fdw2"
HINT: Must be superuser to create a foreign-data wrapper.
CREATE SERVER file_server2 FOREIGN DATA WRAPPER file_fdw; -- ERROR
ERROR: permission denied for foreign-data wrapper file_fdw
CREATE USER MAPPING FOR file_fdw_user SERVER file_server; -- ERROR
ERROR: permission denied for foreign server file_server
SET ROLE file_fdw_superuser;
GRANT USAGE ON FOREIGN SERVER file_server TO file_fdw_user;
SET ROLE file_fdw_user;
CREATE USER MAPPING FOR file_fdw_user SERVER file_server;
-- create user mappings and grant privilege to test users
SET ROLE file_fdw_superuser;
CREATE USER MAPPING FOR file_fdw_superuser SERVER file_server;
CREATE USER MAPPING FOR no_priv_user SERVER file_server;
-- validator tests
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'xml'); -- ERROR
ERROR: COPY format "xml" not recognized
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', header 'true'); -- ERROR
ERROR: COPY HEADER available only in CSV mode
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', quote ':'); -- ERROR
ERROR: COPY quote available only in CSV mode
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', escape ':'); -- ERROR
ERROR: COPY escape available only in CSV mode
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'binary', header 'true'); -- ERROR
ERROR: COPY HEADER available only in CSV mode
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'binary', quote ':'); -- ERROR
ERROR: COPY quote available only in CSV mode
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'binary', escape ':'); -- ERROR
ERROR: COPY escape available only in CSV mode
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', delimiter 'a'); -- ERROR
ERROR: COPY delimiter cannot be "a"
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', escape '-'); -- ERROR
ERROR: COPY escape available only in CSV mode
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', quote '-', null '=-='); -- ERROR
ERROR: CSV quote character must not appear in the NULL specification
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', delimiter '-', null '=-='); -- ERROR
ERROR: COPY delimiter must not appear in the NULL specification
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', delimiter '-', quote '-'); -- ERROR
ERROR: COPY delimiter and quote must be different
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', delimiter '---'); -- ERROR
ERROR: COPY delimiter must be a single one-byte character
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', quote '---'); -- ERROR
ERROR: COPY quote must be a single one-byte character
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', escape '---'); -- ERROR
ERROR: COPY escape must be a single one-byte character
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', delimiter '\'); -- ERROR
ERROR: COPY delimiter cannot be "\"
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', delimiter '.'); -- ERROR
ERROR: COPY delimiter cannot be "."
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', delimiter '1'); -- ERROR
ERROR: COPY delimiter cannot be "1"
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', delimiter 'a'); -- ERROR
ERROR: COPY delimiter cannot be "a"
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', delimiter '
'); -- ERROR
ERROR: COPY delimiter cannot be newline or carriage return
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', null '
'); -- ERROR
ERROR: COPY null representation cannot use newline or carriage return
CREATE FOREIGN TABLE agg_text (
a int2,
b float4
) SERVER file_server
OPTIONS (format 'text', filename '@abs_srcdir@/data/agg.data', delimiter ' ', null '\N');
GRANT SELECT ON agg_text TO file_fdw_user;
CREATE FOREIGN TABLE agg_csv (
a int2,
b float4
) SERVER file_server
OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.csv', header 'true', delimiter ';', quote '@', escape '"', null '');
CREATE FOREIGN TABLE agg_bad (
a int2,
b float4
) SERVER file_server
OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.bad', header 'true', delimiter ';', quote '@', escape '"', null '');
-- basic query tests
SELECT * FROM agg_text WHERE b > 10.0 ORDER BY a;
a | b
-----+--------
42 | 324.78
100 | 99.097
(2 rows)
SELECT * FROM agg_csv ORDER BY a;
a | b
-----+---------
0 | 0.09561
42 | 324.78
100 | 99.097
(3 rows)
SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a;
a | b | a | b
-----+---------+-----+---------
0 | 0.09561 | 0 | 0.09561
42 | 324.78 | 42 | 324.78
100 | 99.097 | 100 | 99.097
(3 rows)
-- error context report tests
SELECT * FROM agg_bad; -- ERROR
ERROR: invalid input syntax for type real: "aaa"
CONTEXT: COPY agg_bad, line 3, column b: "aaa"
-- misc query tests
\t on
EXPLAIN (VERBOSE, COSTS FALSE) SELECT * FROM agg_csv;
Foreign Scan on public.agg_csv
Output: a, b
Foreign File: @abs_srcdir@/data/agg.csv
\t off
PREPARE st(int) AS SELECT * FROM agg_csv WHERE a = $1;
EXECUTE st(100);
a | b
-----+--------
100 | 99.097
(1 row)
EXECUTE st(100);
a | b
-----+--------
100 | 99.097
(1 row)
DEALLOCATE st;
-- tableoid
SELECT tableoid::regclass, b FROM agg_csv;
tableoid | b
----------+---------
agg_csv | 99.097
agg_csv | 0.09561
agg_csv | 324.78
(3 rows)
-- updates aren't supported
INSERT INTO agg_csv VALUES(1,2.0);
ERROR: cannot change foreign table "agg_csv"
UPDATE agg_csv SET a = 1;
ERROR: cannot change foreign table "agg_csv"
DELETE FROM agg_csv WHERE a = 100;
ERROR: cannot change foreign table "agg_csv"
SELECT * FROM agg_csv FOR UPDATE OF agg_csv;
ERROR: SELECT FOR UPDATE/SHARE cannot be used with foreign table "agg_csv"
LINE 1: SELECT * FROM agg_csv FOR UPDATE OF agg_csv;
^
-- but this should be ignored
SELECT * FROM agg_csv FOR UPDATE;
a | b
-----+---------
100 | 99.097
0 | 0.09561
42 | 324.78
(3 rows)
-- privilege tests
SET ROLE file_fdw_superuser;
SELECT * FROM agg_text ORDER BY a;
a | b
-----+---------
0 | 0.09561
42 | 324.78
56 | 7.8
100 | 99.097
(4 rows)
SET ROLE file_fdw_user;
SELECT * FROM agg_text ORDER BY a;
a | b
-----+---------
0 | 0.09561
42 | 324.78
56 | 7.8
100 | 99.097
(4 rows)
SET ROLE no_priv_user;
SELECT * FROM agg_text ORDER BY a; -- ERROR
ERROR: permission denied for relation agg_text
SET ROLE file_fdw_user;
\t on
EXPLAIN (VERBOSE, COSTS FALSE) SELECT * FROM agg_text WHERE a > 0;
Foreign Scan on public.agg_text
Output: a, b
Filter: (agg_text.a > 0)
Foreign File: @abs_srcdir@/data/agg.data
\t off
-- privilege tests for object
SET ROLE file_fdw_superuser;
ALTER FOREIGN TABLE agg_text OWNER TO file_fdw_user;
ALTER FOREIGN TABLE agg_text OPTIONS (SET format 'text');
SET ROLE file_fdw_user;
ALTER FOREIGN TABLE agg_text OPTIONS (SET format 'text');
ERROR: only superuser can change options of a file_fdw foreign table
SET ROLE file_fdw_superuser;
-- cleanup
RESET ROLE;
DROP EXTENSION file_fdw CASCADE;
NOTICE: drop cascades to 7 other objects
DETAIL: drop cascades to server file_server
drop cascades to user mapping for file_fdw_user
drop cascades to user mapping for file_fdw_superuser
drop cascades to user mapping for no_priv_user
drop cascades to foreign table agg_text
drop cascades to foreign table agg_csv
drop cascades to foreign table agg_bad
DROP ROLE file_fdw_superuser, file_fdw_user, no_priv_user;

1
contrib/file_fdw/sql/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/file_fdw.sql

View File

@ -100,6 +100,7 @@ CREATE EXTENSION <replaceable>module_name</> FROM unpackaged;
&dict-xsyn;
&dummy-seclabel;
&earthdistance;
&file-fdw;
&fuzzystrmatch;
&hstore;
&intagg;

127
doc/src/sgml/file-fdw.sgml Normal file
View File

@ -0,0 +1,127 @@
<!-- doc/src/sgml/file-fdw.sgml -->
<sect1 id="file-fdw">
<title>file_fdw</title>
<indexterm zone="file-fdw">
<primary>file_fdw</primary>
</indexterm>
<para>
The <filename>file_fdw</> module provides the foreign-data wrapper
<function>file_fdw</function>, which can be used to access data
files in the server's filesystem. Data files must be in a format
that can be read by <command>COPY FROM</command>;
see <xref linkend="sql-copy"> for details.
</para>
<para>
A foreign table created using this wrapper can have the following options:
</para>
<variablelist>
<varlistentry>
<term><literal>filename</literal></term>
<listitem>
<para>
Specifies the file to be read. Required. Must be an absolute path name.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><literal>format</literal></term>
<listitem>
<para>
Specifies the file's format,
the same as <command>COPY</>'s <literal>FORMAT</literal> option.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><literal>header</literal></term>
<listitem>
<para>
Specifies whether the file has a header line,
the same as <command>COPY</>'s <literal>HEADER</literal> option.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><literal>delimiter</literal></term>
<listitem>
<para>
Specifies the file's delimiter character,
the same as <command>COPY</>'s <literal>DELIMITER</literal> option.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><literal>quote</literal></term>
<listitem>
<para>
Specifies the file's quote character,
the same as <command>COPY</>'s <literal>QUOTE</literal> option.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><literal>escape</literal></term>
<listitem>
<para>
Specifies the file's escape character,
the same as <command>COPY</>'s <literal>ESCAPE</literal> option.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><literal>null</literal></term>
<listitem>
<para>
Specifies the file's null string,
the same as <command>COPY</>'s <literal>NULL</literal> option.
</para>
</listitem>
</varlistentry>
</variablelist>
<para>
<command>COPY</>'s <literal>OIDS</literal>, <literal>FORCE_QUOTE</literal>,
and <literal>FORCE_NOT_NULL</literal> options are currently not supported by
<literal>file_fdw</>.
</para>
<para>
These options can only be specified for a foreign table, not in the
options of the <literal>file_fdw</> foreign-data wrapper, nor in the
options of a server or user mapping using the wrapper.
</para>
<para>
Changing table-level options requires superuser privileges, for security
reasons: only a superuser should be able to determine which file is read.
In principle non-superusers could be allowed to change the other options,
but that's not supported at present.
</para>
<para>
For a foreign table using <literal>file_fdw</>, <command>EXPLAIN</> shows
the name of the file to be read. Unless <literal>COSTS OFF</> is
specified, the file size (in bytes) is shown as well.
</para>
</sect1>

View File

@ -106,6 +106,7 @@
<!entity dict-xsyn SYSTEM "dict-xsyn.sgml">
<!entity dummy-seclabel SYSTEM "dummy-seclabel.sgml">
<!entity earthdistance SYSTEM "earthdistance.sgml">
<!entity file-fdw SYSTEM "file-fdw.sgml">
<!entity fuzzystrmatch SYSTEM "fuzzystrmatch.sgml">
<!entity hstore SYSTEM "hstore.sgml">
<!entity intagg SYSTEM "intagg.sgml">

View File

@ -114,7 +114,10 @@ typedef struct CopyStateData
char *delim; /* column delimiter (must be 1 byte) */
char *quote; /* CSV quote char (must be 1 byte) */
char *escape; /* CSV escape char (must be 1 byte) */
List *force_quote; /* list of column names */
bool force_quote_all; /* FORCE QUOTE *? */
bool *force_quote_flags; /* per-column CSV FQ flags */
List *force_notnull; /* list of column names */
bool *force_notnull_flags; /* per-column CSV FNN flags */
/* these are just for error messages, see CopyFromErrorCallback */
@ -764,7 +767,7 @@ DoCopy(const CopyStmt *stmt, const char *queryString)
tupDesc = RelationGetDescr(rel);
attnums = CopyGetAttnums(tupDesc, rel, stmt->attlist);
foreach (cur, attnums)
foreach(cur, attnums)
{
int attno = lfirst_int(cur) -
FirstLowInvalidHeapAttributeNumber;
@ -814,52 +817,33 @@ DoCopy(const CopyStmt *stmt, const char *queryString)
}
/*
* Common setup routines used by BeginCopyFrom and BeginCopyTo.
* Process the statement option list for COPY.
*
* Iff <binary>, unload or reload in the binary format, as opposed to the
* more wasteful but more robust and portable text format.
* Scan the options list (a list of DefElem) and transpose the information
* into cstate, applying appropriate error checking.
*
* Iff <oids>, unload or reload the format that includes OID information.
* On input, we accept OIDs whether or not the table has an OID column,
* but silently drop them if it does not. On output, we report an error
* if the user asks for OIDs in a table that has none (not providing an
* OID column might seem friendlier, but could seriously confuse programs).
* cstate is assumed to be filled with zeroes initially.
*
* If in the text format, delimit columns with delimiter <delim> and print
* NULL values as <null_print>.
* This is exported so that external users of the COPY API can sanity-check
* a list of options. In that usage, cstate should be passed as NULL
* (since external users don't know sizeof(CopyStateData)) and the collected
* data is just leaked until CurrentMemoryContext is reset.
*
* Note that additional checking, such as whether column names listed in FORCE
* QUOTE actually exist, has to be applied later. This just checks for
* self-consistency of the options list.
*/
static CopyState
BeginCopy(bool is_from,
Relation rel,
Node *raw_query,
const char *queryString,
List *attnamelist,
List *options)
void
ProcessCopyOptions(CopyState cstate,
bool is_from,
List *options)
{
CopyState cstate;
List *force_quote = NIL;
List *force_notnull = NIL;
bool force_quote_all = false;
bool format_specified = false;
ListCell *option;
TupleDesc tupDesc;
int num_phys_attrs;
MemoryContext oldcontext;
/* Allocate workspace and zero all fields */
cstate = (CopyStateData *) palloc0(sizeof(CopyStateData));
/*
* We allocate everything used by a cstate in a new memory context.
* This would avoid memory leaks repeated uses of COPY in a query.
*/
cstate->copycontext = AllocSetContextCreate(CurrentMemoryContext,
"COPY",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
oldcontext = MemoryContextSwitchTo(cstate->copycontext);
/* Support external use for option sanity checking */
if (cstate == NULL)
cstate = (CopyStateData *) palloc0(sizeof(CopyStateData));
/* Extract options from the statement node tree */
foreach(option, options)
@ -936,14 +920,14 @@ BeginCopy(bool is_from,
}
else if (strcmp(defel->defname, "force_quote") == 0)
{
if (force_quote || force_quote_all)
if (cstate->force_quote || cstate->force_quote_all)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("conflicting or redundant options")));
if (defel->arg && IsA(defel->arg, A_Star))
force_quote_all = true;
cstate->force_quote_all = true;
else if (defel->arg && IsA(defel->arg, List))
force_quote = (List *) defel->arg;
cstate->force_quote = (List *) defel->arg;
else
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
@ -952,12 +936,12 @@ BeginCopy(bool is_from,
}
else if (strcmp(defel->defname, "force_not_null") == 0)
{
if (force_notnull)
if (cstate->force_notnull)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("conflicting or redundant options")));
if (defel->arg && IsA(defel->arg, List))
force_notnull = (List *) defel->arg;
cstate->force_notnull = (List *) defel->arg;
else
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
@ -1071,21 +1055,21 @@ BeginCopy(bool is_from,
errmsg("COPY escape must be a single one-byte character")));
/* Check force_quote */
if (!cstate->csv_mode && (force_quote != NIL || force_quote_all))
if (!cstate->csv_mode && (cstate->force_quote || cstate->force_quote_all))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY force quote available only in CSV mode")));
if ((force_quote != NIL || force_quote_all) && is_from)
if ((cstate->force_quote || cstate->force_quote_all) && is_from)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY force quote only available using COPY TO")));
/* Check force_notnull */
if (!cstate->csv_mode && force_notnull != NIL)
if (!cstate->csv_mode && cstate->force_notnull != NIL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY force not null available only in CSV mode")));
if (force_notnull != NIL && !is_from)
if (cstate->force_notnull != NIL && !is_from)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY force not null only available using COPY FROM")));
@ -1102,7 +1086,55 @@ BeginCopy(bool is_from,
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("CSV quote character must not appear in the NULL specification")));
}
/*
* Common setup routines used by BeginCopyFrom and BeginCopyTo.
*
* Iff <binary>, unload or reload in the binary format, as opposed to the
* more wasteful but more robust and portable text format.
*
* Iff <oids>, unload or reload the format that includes OID information.
* On input, we accept OIDs whether or not the table has an OID column,
* but silently drop them if it does not. On output, we report an error
* if the user asks for OIDs in a table that has none (not providing an
* OID column might seem friendlier, but could seriously confuse programs).
*
* If in the text format, delimit columns with delimiter <delim> and print
* NULL values as <null_print>.
*/
static CopyState
BeginCopy(bool is_from,
Relation rel,
Node *raw_query,
const char *queryString,
List *attnamelist,
List *options)
{
CopyState cstate;
TupleDesc tupDesc;
int num_phys_attrs;
MemoryContext oldcontext;
/* Allocate workspace and zero all fields */
cstate = (CopyStateData *) palloc0(sizeof(CopyStateData));
/*
* We allocate everything used by a cstate in a new memory context.
* This avoids memory leaks during repeated use of COPY in a query.
*/
cstate->copycontext = AllocSetContextCreate(CurrentMemoryContext,
"COPY",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
oldcontext = MemoryContextSwitchTo(cstate->copycontext);
/* Extract options from the statement node tree */
ProcessCopyOptions(cstate, is_from, options);
/* Process the source/target relation or query */
if (rel)
{
Assert(!raw_query);
@ -1197,19 +1229,19 @@ BeginCopy(bool is_from,
/* Convert FORCE QUOTE name list to per-column flags, check validity */
cstate->force_quote_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
if (force_quote_all)
if (cstate->force_quote_all)
{
int i;
for (i = 0; i < num_phys_attrs; i++)
cstate->force_quote_flags[i] = true;
}
else if (force_quote)
else if (cstate->force_quote)
{
List *attnums;
ListCell *cur;
attnums = CopyGetAttnums(tupDesc, cstate->rel, force_quote);
attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->force_quote);
foreach(cur, attnums)
{
@ -1226,12 +1258,12 @@ BeginCopy(bool is_from,
/* Convert FORCE NOT NULL name list to per-column flags, check validity */
cstate->force_notnull_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
if (force_notnull)
if (cstate->force_notnull)
{
List *attnums;
ListCell *cur;
attnums = CopyGetAttnums(tupDesc, cstate->rel, force_notnull);
attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->force_notnull);
foreach(cur, attnums)
{

View File

@ -18,11 +18,12 @@
#include "nodes/parsenodes.h"
#include "tcop/dest.h"
typedef struct CopyStateData *CopyState;
/* CopyStateData is private in commands/copy.c */
typedef struct CopyStateData *CopyState;
extern uint64 DoCopy(const CopyStmt *stmt, const char *queryString);
extern void ProcessCopyOptions(CopyState cstate, bool is_from, List *options);
extern CopyState BeginCopyFrom(Relation rel, const char *filename,
List *attnamelist, List *options);
extern void EndCopyFrom(CopyState cstate);