Introduce logical decoding.

This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables.  The output format is controlled by a
so-called "output plugin"; an example is included.  To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.

Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.

Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
This commit is contained in:
Robert Haas 2014-03-03 16:32:18 -05:00
parent de94b47c0a
commit b89e151054
89 changed files with 12998 additions and 194 deletions

View File

@ -50,6 +50,7 @@ SUBDIRS = \
spi \
tablefunc \
tcn \
test_decoding \
test_parser \
test_shm_mq \
tsearch2 \

4
contrib/test_decoding/.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
# Generated subdirectories
/log/
/results/
/tmp_check/

View File

@ -0,0 +1,69 @@
# contrib/test_decoding/Makefile
MODULES = test_decoding
OBJS = test_decoding.o
# Note: because we don't tell the Makefile there are any regression tests,
# we have to clean those result files explicitly
EXTRA_CLEAN = -r $(pg_regress_clean_files)
ifdef USE_PGXS
PG_CONFIG = pg_config
PGXS := $(shell $(PG_CONFIG) --pgxs)
include $(PGXS)
else
subdir = contrib/test_decoding
top_builddir = ../..
include $(top_builddir)/src/Makefile.global
include $(top_srcdir)/contrib/contrib-global.mk
endif
# Disabled because these tests require "wal_level=logical", which
# typical installcheck users do not have (e.g. buildfarm clients).
installcheck:;
# But it can nonetheless be very helpful to run tests on preexisting
# installation, allow to do so, but only if requested explicitly.
installcheck-force: regresscheck-install-force isolationcheck-install-force
check: regresscheck isolationcheck
submake-regress:
$(MAKE) -C $(top_builddir)/src/test/regress all
submake-isolation:
$(MAKE) -C $(top_builddir)/src/test/isolation all
submake-test_decoding:
$(MAKE) -C $(top_builddir)/contrib/test_decoding
REGRESSCHECKS=ddl rewrite toast permissions decoding_in_xact binary
regresscheck: all | submake-regress submake-test_decoding
$(pg_regress_check) \
--temp-config $(top_srcdir)/contrib/test_decoding/logical.conf \
--temp-install=./tmp_check \
--extra-install=contrib/test_decoding \
$(REGRESSCHECKS)
regresscheck-install-force: | submake-regress submake-test_decoding
$(pg_regress_installcheck) \
--extra-install=contrib/test_decoding \
$(REGRESSCHECKS)
ISOLATIONCHECKS=mxact delayed_startup concurrent_ddl_dml
isolationcheck: all | submake-isolation submake-test_decoding
$(pg_isolation_regress_check) \
--temp-config $(top_srcdir)/contrib/test_decoding/logical.conf \
--extra-install=contrib/test_decoding \
$(ISOLATIONCHECKS)
isolationcheck-install-force: all | submake-isolation submake-test_decoding
$(pg_isolation_regress_installcheck) \
--extra-install=contrib/test_decoding \
$(ISOLATIONCHECKS)
PHONY: submake-test_decoding submake-regress check \
regresscheck regresscheck-install-force \
isolationcheck isolationcheck-install-force

View File

@ -0,0 +1,35 @@
-- predictability
SET synchronous_commit = on;
SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
?column?
----------
init
(1 row)
-- succeeds, textual plugin, textual consumer
SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'force-binary', '0');
data
------
(0 rows)
-- fails, binary plugin, textual consumer
SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'force-binary', '1');
ERROR: output plugin cannot produce text output
-- succeeds, textual plugin, binary consumer
SELECT data FROM pg_logical_slot_get_binary_changes('regression_slot', NULL, NULL, 'force-binary', '0');
data
------
(0 rows)
-- succeeds, binary plugin, binary consumer
SELECT data FROM pg_logical_slot_get_binary_changes('regression_slot', NULL, NULL, 'force-binary', '1');
data
------
(0 rows)
SELECT 'init' FROM pg_drop_replication_slot('regression_slot');
?column?
----------
init
(1 row)

View File

@ -0,0 +1,733 @@
Parsed test spec with 2 sessions
starting permutation: s1_init s1_begin s1_insert_tbl1 s2_alter_tbl2_float s1_insert_tbl2 s1_commit s2_get_changes
step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
?column?
init
step s1_begin: BEGIN;
step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
step s2_alter_tbl2_float: ALTER TABLE tbl2 ALTER COLUMN val2 TYPE float;
step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1);
step s1_commit: COMMIT;
step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
data
BEGIN
COMMIT
BEGIN
table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
table public.tbl2: INSERT: val1[integer]:1 val2[double precision]:1
COMMIT
?column?
stop
starting permutation: s1_init s1_begin s1_insert_tbl1 s2_alter_tbl1_float s1_insert_tbl2 s1_commit s2_get_changes
step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
?column?
init
step s1_begin: BEGIN;
step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
step s2_alter_tbl1_float: ALTER TABLE tbl1 ALTER COLUMN val2 TYPE float; <waiting ...>
step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1);
step s1_commit: COMMIT;
step s2_alter_tbl1_float: <... completed>
step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
data
BEGIN
table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1
COMMIT
BEGIN
table public.pg_temp: INSERT: val1[integer]:1 val2[double precision]:1
COMMIT
?column?
stop
starting permutation: s1_init s1_begin s1_insert_tbl1 s2_alter_tbl2_char s1_insert_tbl2 s1_commit s2_get_changes
step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
?column?
init
step s1_begin: BEGIN;
step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
step s2_alter_tbl2_char: ALTER TABLE tbl2 ALTER COLUMN val2 TYPE character varying;
step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1);
step s1_commit: COMMIT;
step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
data
BEGIN
COMMIT
BEGIN
table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
table public.tbl2: INSERT: val1[integer]:1 val2[character varying]:'1'
COMMIT
?column?
stop
starting permutation: s1_init s1_begin s1_insert_tbl1 s2_alter_tbl1_char s1_insert_tbl2 s1_commit s2_get_changes
step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
?column?
init
step s1_begin: BEGIN;
step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
step s2_alter_tbl1_char: ALTER TABLE tbl1 ALTER COLUMN val2 TYPE character varying; <waiting ...>
step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1);
step s1_commit: COMMIT;
step s2_alter_tbl1_char: <... completed>
step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
data
BEGIN
table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1
COMMIT
BEGIN
table public.pg_temp: INSERT: val1[integer]:1 val2[character varying]:'1'
COMMIT
?column?
stop
starting permutation: s1_init s1_begin s1_insert_tbl1 s1_insert_tbl2 s2_alter_tbl1_float s1_commit s2_get_changes
step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
?column?
init
step s1_begin: BEGIN;
step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1);
step s2_alter_tbl1_float: ALTER TABLE tbl1 ALTER COLUMN val2 TYPE float; <waiting ...>
step s1_commit: COMMIT;
step s2_alter_tbl1_float: <... completed>
step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
data
BEGIN
table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1
COMMIT
BEGIN
table public.pg_temp: INSERT: val1[integer]:1 val2[double precision]:1
COMMIT
?column?
stop
starting permutation: s1_init s1_begin s1_insert_tbl1 s1_insert_tbl2 s2_alter_tbl1_char s1_commit s2_get_changes
step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
?column?
init
step s1_begin: BEGIN;
step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1);
step s2_alter_tbl1_char: ALTER TABLE tbl1 ALTER COLUMN val2 TYPE character varying; <waiting ...>
step s1_commit: COMMIT;
step s2_alter_tbl1_char: <... completed>
step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
data
BEGIN
table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1
COMMIT
BEGIN
table public.pg_temp: INSERT: val1[integer]:1 val2[character varying]:'1'
COMMIT
?column?
stop
starting permutation: s1_init s1_begin s1_insert_tbl1 s2_alter_tbl2_float s1_insert_tbl2 s2_alter_tbl1_float s1_commit s2_get_changes
step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
?column?
init
step s1_begin: BEGIN;
step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
step s2_alter_tbl2_float: ALTER TABLE tbl2 ALTER COLUMN val2 TYPE float;
step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1);
step s2_alter_tbl1_float: ALTER TABLE tbl1 ALTER COLUMN val2 TYPE float; <waiting ...>
step s1_commit: COMMIT;
step s2_alter_tbl1_float: <... completed>
step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
data
BEGIN
COMMIT
BEGIN
table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
table public.tbl2: INSERT: val1[integer]:1 val2[double precision]:1
COMMIT
BEGIN
table public.pg_temp: INSERT: val1[integer]:1 val2[double precision]:1
COMMIT
?column?
stop
starting permutation: s1_init s1_begin s1_insert_tbl1 s2_alter_tbl2_char s1_insert_tbl2 s2_alter_tbl1_char s1_commit s2_get_changes
step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
?column?
init
step s1_begin: BEGIN;
step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
step s2_alter_tbl2_char: ALTER TABLE tbl2 ALTER COLUMN val2 TYPE character varying;
step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1);
step s2_alter_tbl1_char: ALTER TABLE tbl1 ALTER COLUMN val2 TYPE character varying; <waiting ...>
step s1_commit: COMMIT;
step s2_alter_tbl1_char: <... completed>
step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
data
BEGIN
COMMIT
BEGIN
table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
table public.tbl2: INSERT: val1[integer]:1 val2[character varying]:'1'
COMMIT
BEGIN
table public.pg_temp: INSERT: val1[integer]:1 val2[character varying]:'1'
COMMIT
?column?
stop
starting permutation: s1_init s2_alter_tbl2_char s1_begin s1_insert_tbl1 s2_alter_tbl2_text s1_insert_tbl2 s1_commit s2_get_changes
step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
?column?
init
step s2_alter_tbl2_char: ALTER TABLE tbl2 ALTER COLUMN val2 TYPE character varying;
step s1_begin: BEGIN;
step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
step s2_alter_tbl2_text: ALTER TABLE tbl2 ALTER COLUMN val2 TYPE text;
step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1);
step s1_commit: COMMIT;
step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
data
BEGIN
COMMIT
BEGIN
COMMIT
BEGIN
table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
table public.tbl2: INSERT: val1[integer]:1 val2[text]:'1'
COMMIT
?column?
stop
starting permutation: s1_init s2_alter_tbl2_char s1_begin s1_insert_tbl1 s2_alter_tbl2_text s1_insert_tbl2 s2_alter_tbl1_char s1_commit s2_get_changes
step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
?column?
init
step s2_alter_tbl2_char: ALTER TABLE tbl2 ALTER COLUMN val2 TYPE character varying;
step s1_begin: BEGIN;
step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
step s2_alter_tbl2_text: ALTER TABLE tbl2 ALTER COLUMN val2 TYPE text;
step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1);
step s2_alter_tbl1_char: ALTER TABLE tbl1 ALTER COLUMN val2 TYPE character varying; <waiting ...>
step s1_commit: COMMIT;
step s2_alter_tbl1_char: <... completed>
step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
data
BEGIN
COMMIT
BEGIN
COMMIT
BEGIN
table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
table public.tbl2: INSERT: val1[integer]:1 val2[text]:'1'
COMMIT
BEGIN
table public.pg_temp: INSERT: val1[integer]:1 val2[character varying]:'1'
COMMIT
?column?
stop
starting permutation: s1_init s1_begin s1_insert_tbl1 s2_alter_tbl2_boolean s1_insert_tbl2 s1_commit s2_get_changes
step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
?column?
init
step s1_begin: BEGIN;
step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
step s2_alter_tbl2_boolean: ALTER TABLE tbl2 ALTER COLUMN val2 TYPE boolean;
ERROR: column "val2" cannot be cast automatically to type boolean
step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1);
step s1_commit: COMMIT;
step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
data
BEGIN
table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1
COMMIT
?column?
stop
starting permutation: s1_init s1_begin s1_insert_tbl1 s2_alter_tbl2_boolean s1_insert_tbl2 s2_alter_tbl1_boolean s1_commit s2_get_changes
step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
?column?
init
step s1_begin: BEGIN;
step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
step s2_alter_tbl2_boolean: ALTER TABLE tbl2 ALTER COLUMN val2 TYPE boolean;
ERROR: column "val2" cannot be cast automatically to type boolean
step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1);
step s2_alter_tbl1_boolean: ALTER TABLE tbl1 ALTER COLUMN val2 TYPE boolean; <waiting ...>
step s1_commit: COMMIT;
step s2_alter_tbl1_boolean: <... completed>
error in steps s1_commit s2_alter_tbl1_boolean: ERROR: column "val2" cannot be cast automatically to type boolean
step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
data
BEGIN
table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1
COMMIT
?column?
stop
starting permutation: s1_init s1_begin s1_insert_tbl1 s2_alter_tbl2_add_int s1_insert_tbl2_3col s1_commit s2_get_changes
step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
?column?
init
step s1_begin: BEGIN;
step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
step s2_alter_tbl2_add_int: ALTER TABLE tbl2 ADD COLUMN val3 INTEGER;
step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
step s1_commit: COMMIT;
step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
data
BEGIN
COMMIT
BEGIN
table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[integer]:1
COMMIT
?column?
stop
starting permutation: s1_init s1_begin s1_insert_tbl1 s1_insert_tbl2 s1_commit s1_begin s2_alter_tbl2_add_int s1_insert_tbl2_3col s1_commit s2_get_changes
step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
?column?
init
step s1_begin: BEGIN;
step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1);
step s1_commit: COMMIT;
step s1_begin: BEGIN;
step s2_alter_tbl2_add_int: ALTER TABLE tbl2 ADD COLUMN val3 INTEGER;
step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
step s1_commit: COMMIT;
step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
data
BEGIN
table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1
COMMIT
BEGIN
COMMIT
BEGIN
table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[integer]:1
COMMIT
?column?
stop
starting permutation: s1_init s1_begin s1_insert_tbl1 s2_alter_tbl2_add_float s1_insert_tbl2_3col s1_commit s2_get_changes
step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
?column?
init
step s1_begin: BEGIN;
step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
step s2_alter_tbl2_add_float: ALTER TABLE tbl2 ADD COLUMN val3 FLOAT;
step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
step s1_commit: COMMIT;
step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
data
BEGIN
COMMIT
BEGIN
table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[double precision]:1
COMMIT
?column?
stop
starting permutation: s1_init s1_begin s1_insert_tbl1 s1_insert_tbl2 s1_commit s1_begin s2_alter_tbl2_add_float s1_insert_tbl2_3col s1_commit s2_get_changes
step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
?column?
init
step s1_begin: BEGIN;
step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1);
step s1_commit: COMMIT;
step s1_begin: BEGIN;
step s2_alter_tbl2_add_float: ALTER TABLE tbl2 ADD COLUMN val3 FLOAT;
step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
step s1_commit: COMMIT;
step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
data
BEGIN
table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1
COMMIT
BEGIN
COMMIT
BEGIN
table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[double precision]:1
COMMIT
?column?
stop
starting permutation: s1_init s1_begin s1_insert_tbl1 s2_alter_tbl2_add_char s1_insert_tbl2_3col s1_commit s2_get_changes
step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
?column?
init
step s1_begin: BEGIN;
step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
step s2_alter_tbl2_add_char: ALTER TABLE tbl2 ADD COLUMN val3 character varying;
step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
step s1_commit: COMMIT;
step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
data
BEGIN
COMMIT
BEGIN
table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[character varying]:'1'
COMMIT
?column?
stop
starting permutation: s1_init s1_begin s1_insert_tbl1 s1_insert_tbl2 s1_commit s1_begin s2_alter_tbl2_add_char s1_insert_tbl2_3col s1_commit s2_get_changes
step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
?column?
init
step s1_begin: BEGIN;
step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1);
step s1_commit: COMMIT;
step s1_begin: BEGIN;
step s2_alter_tbl2_add_char: ALTER TABLE tbl2 ADD COLUMN val3 character varying;
step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
step s1_commit: COMMIT;
step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
data
BEGIN
table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1
COMMIT
BEGIN
COMMIT
BEGIN
table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[character varying]:'1'
COMMIT
?column?
stop
starting permutation: s1_init s2_alter_tbl2_add_int s1_begin s1_insert_tbl2_3col s2_alter_tbl2_drop_3rd_col s1_commit s2_get_changes
step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
?column?
init
step s2_alter_tbl2_add_int: ALTER TABLE tbl2 ADD COLUMN val3 INTEGER;
step s1_begin: BEGIN;
step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
step s2_alter_tbl2_drop_3rd_col: ALTER TABLE tbl2 DROP COLUMN val3; <waiting ...>
step s1_commit: COMMIT;
step s2_alter_tbl2_drop_3rd_col: <... completed>
step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
data
BEGIN
COMMIT
BEGIN
table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[integer]:1
COMMIT
BEGIN
COMMIT
?column?
stop
starting permutation: s1_init s2_alter_tbl2_add_int s1_begin s1_insert_tbl2_3col s2_alter_tbl2_drop_3rd_col s1_insert_tbl2 s1_commit s1_insert_tbl2 s2_get_changes
step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
?column?
init
step s2_alter_tbl2_add_int: ALTER TABLE tbl2 ADD COLUMN val3 INTEGER;
step s1_begin: BEGIN;
step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
step s2_alter_tbl2_drop_3rd_col: ALTER TABLE tbl2 DROP COLUMN val3; <waiting ...>
step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1);
step s1_commit: COMMIT;
step s2_alter_tbl2_drop_3rd_col: <... completed>
step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1);
step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
data
BEGIN
COMMIT
BEGIN
table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[integer]:1
table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[integer]:null
COMMIT
BEGIN
COMMIT
BEGIN
table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1
COMMIT
?column?
stop
starting permutation: s1_init s2_alter_tbl2_add_int s1_begin s1_insert_tbl2_3col s2_alter_tbl2_drop_3rd_col s1_commit s2_get_changes s2_alter_tbl2_add_text s1_begin s1_insert_tbl2_3col s2_alter_tbl2_3rd_char s1_insert_tbl2_3col s1_commit s2_get_changes s2_alter_tbl2_3rd_int s1_insert_tbl2_3col s2_get_changes
step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
?column?
init
step s2_alter_tbl2_add_int: ALTER TABLE tbl2 ADD COLUMN val3 INTEGER;
step s1_begin: BEGIN;
step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
step s2_alter_tbl2_drop_3rd_col: ALTER TABLE tbl2 DROP COLUMN val3; <waiting ...>
step s1_commit: COMMIT;
step s2_alter_tbl2_drop_3rd_col: <... completed>
step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
data
BEGIN
COMMIT
BEGIN
table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[integer]:1
COMMIT
BEGIN
COMMIT
step s2_alter_tbl2_add_text: ALTER TABLE tbl2 ADD COLUMN val3 TEXT;
step s1_begin: BEGIN;
step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
step s2_alter_tbl2_3rd_char: ALTER TABLE tbl2 ALTER COLUMN val3 TYPE character varying; <waiting ...>
step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
step s1_commit: COMMIT;
step s2_alter_tbl2_3rd_char: <... completed>
step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
data
BEGIN
COMMIT
BEGIN
table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[text]:'1'
table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[text]:'1'
COMMIT
BEGIN
COMMIT
step s2_alter_tbl2_3rd_int: ALTER TABLE tbl2 ALTER COLUMN val3 TYPE int USING val3::integer;
step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
data
BEGIN
table public.pg_temp: INSERT: val1[integer]:1 val2[integer]:1 val3[integer]:null
table public.pg_temp: INSERT: val1[integer]:1 val2[integer]:1 val3[integer]:1
table public.pg_temp: INSERT: val1[integer]:1 val2[integer]:1 val3[integer]:1
COMMIT
BEGIN
table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[integer]:1
COMMIT
?column?
stop
starting permutation: s1_init s2_alter_tbl2_add_char s1_begin s1_insert_tbl1 s1_insert_tbl2_3col s2_alter_tbl2_3rd_text s1_insert_tbl2_3col s1_commit s1_insert_tbl2_3col s2_get_changes
step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
?column?
init
step s2_alter_tbl2_add_char: ALTER TABLE tbl2 ADD COLUMN val3 character varying;
step s1_begin: BEGIN;
step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
step s2_alter_tbl2_3rd_text: ALTER TABLE tbl2 ALTER COLUMN val3 TYPE text; <waiting ...>
step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
step s1_commit: COMMIT;
step s2_alter_tbl2_3rd_text: <... completed>
step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
data
BEGIN
COMMIT
BEGIN
table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[character varying]:'1'
table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[character varying]:'1'
COMMIT
BEGIN
COMMIT
BEGIN
table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[text]:'1'
COMMIT
?column?
stop
starting permutation: s1_init s2_alter_tbl2_add_text s1_begin s1_insert_tbl1 s1_insert_tbl2_3col s2_alter_tbl2_3rd_char s1_insert_tbl2_3col s1_commit s1_insert_tbl2_3col s2_get_changes
step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
?column?
init
step s2_alter_tbl2_add_text: ALTER TABLE tbl2 ADD COLUMN val3 TEXT;
step s1_begin: BEGIN;
step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
step s2_alter_tbl2_3rd_char: ALTER TABLE tbl2 ALTER COLUMN val3 TYPE character varying; <waiting ...>
step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
step s1_commit: COMMIT;
step s2_alter_tbl2_3rd_char: <... completed>
step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
data
BEGIN
COMMIT
BEGIN
table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[text]:'1'
table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[text]:'1'
COMMIT
BEGIN
COMMIT
BEGIN
table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[character varying]:'1'
COMMIT
?column?
stop
starting permutation: s1_init s2_alter_tbl2_add_char s1_begin s1_insert_tbl1 s2_alter_tbl2_3rd_text s1_insert_tbl2_3col s1_commit s2_alter_tbl2_drop_3rd_col s1_insert_tbl2 s2_get_changes
step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
?column?
init
step s2_alter_tbl2_add_char: ALTER TABLE tbl2 ADD COLUMN val3 character varying;
step s1_begin: BEGIN;
step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
step s2_alter_tbl2_3rd_text: ALTER TABLE tbl2 ALTER COLUMN val3 TYPE text;
step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
step s1_commit: COMMIT;
step s2_alter_tbl2_drop_3rd_col: ALTER TABLE tbl2 DROP COLUMN val3;
step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1);
step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
data
BEGIN
COMMIT
BEGIN
COMMIT
BEGIN
table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[text]:'1'
COMMIT
BEGIN
COMMIT
BEGIN
table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1
COMMIT
?column?
stop
starting permutation: s1_init s2_alter_tbl2_add_text s1_begin s1_insert_tbl1 s2_alter_tbl2_3rd_char s1_insert_tbl2_3col s1_commit s2_alter_tbl2_drop_3rd_col s1_insert_tbl2 s2_get_changes
step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
?column?
init
step s2_alter_tbl2_add_text: ALTER TABLE tbl2 ADD COLUMN val3 TEXT;
step s1_begin: BEGIN;
step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
step s2_alter_tbl2_3rd_char: ALTER TABLE tbl2 ALTER COLUMN val3 TYPE character varying;
step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
step s1_commit: COMMIT;
step s2_alter_tbl2_drop_3rd_col: ALTER TABLE tbl2 DROP COLUMN val3;
step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1);
step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
data
BEGIN
COMMIT
BEGIN
COMMIT
BEGIN
table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[character varying]:'1'
COMMIT
BEGIN
COMMIT
BEGIN
table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1
COMMIT
?column?
stop
starting permutation: s1_init s2_alter_tbl2_add_char s1_begin s1_insert_tbl1 s2_alter_tbl2_drop_3rd_col s1_insert_tbl1 s1_commit s2_get_changes
step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
?column?
init
step s2_alter_tbl2_add_char: ALTER TABLE tbl2 ADD COLUMN val3 character varying;
step s1_begin: BEGIN;
step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
step s2_alter_tbl2_drop_3rd_col: ALTER TABLE tbl2 DROP COLUMN val3;
step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
step s1_commit: COMMIT;
step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
data
BEGIN
COMMIT
BEGIN
COMMIT
BEGIN
table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
COMMIT
?column?
stop

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,89 @@
-- predictability
SET synchronous_commit = on;
-- fail because we're creating a slot while in an xact with xid
BEGIN;
SELECT txid_current() = 0;
?column?
----------
f
(1 row)
SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
ERROR: cannot create logical replication slot in transaction that has performed writes
ROLLBACK;
-- fail because we're creating a slot while in an subxact whose topxact has a xid
BEGIN;
SELECT txid_current() = 0;
?column?
----------
f
(1 row)
SAVEPOINT barf;
SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
ERROR: cannot create logical replication slot in transaction that has performed writes
ROLLBACK TO SAVEPOINT barf;
ROLLBACK;
-- succeed, outside tx.
SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
?column?
----------
init
(1 row)
SELECT 'stop' FROM pg_drop_replication_slot('regression_slot');
?column?
----------
stop
(1 row)
-- succeed, in tx without xid.
BEGIN;
SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
?column?
----------
init
(1 row)
COMMIT;
CREATE TABLE nobarf(id serial primary key, data text);
INSERT INTO nobarf(data) VALUES('1');
-- decoding works in transaction with xid
BEGIN;
SELECT txid_current() = 0;
?column?
----------
f
(1 row)
-- don't show yet, haven't committed
INSERT INTO nobarf(data) VALUES('2');
SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
data
-----------------------------------------------------------
BEGIN
COMMIT
BEGIN
table public.nobarf: INSERT: id[integer]:1 data[text]:'1'
COMMIT
(5 rows)
COMMIT;
INSERT INTO nobarf(data) VALUES('3');
SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
data
-----------------------------------------------------------
BEGIN
table public.nobarf: INSERT: id[integer]:2 data[text]:'2'
COMMIT
BEGIN
table public.nobarf: INSERT: id[integer]:3 data[text]:'3'
COMMIT
(6 rows)
SELECT 'stop' FROM pg_drop_replication_slot('regression_slot');
?column?
----------
stop
(1 row)

View File

@ -0,0 +1,38 @@
Parsed test spec with 2 sessions
starting permutation: s1b s1w s2init s1c s2start s1b s1w s1c s2start s1b s1w s2start s1c s2start
step s1b: BEGIN ISOLATION LEVEL SERIALIZABLE;
step s1w: INSERT INTO do_write DEFAULT VALUES;
step s2init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); <waiting ...>
step s1c: COMMIT;
step s2init: <... completed>
?column?
init
step s2start: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', 'false');
data
step s1b: BEGIN ISOLATION LEVEL SERIALIZABLE;
step s1w: INSERT INTO do_write DEFAULT VALUES;
step s1c: COMMIT;
step s2start: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', 'false');
data
BEGIN
table public.do_write: INSERT: id[integer]:2
COMMIT
step s1b: BEGIN ISOLATION LEVEL SERIALIZABLE;
step s1w: INSERT INTO do_write DEFAULT VALUES;
step s2start: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', 'false');
data
step s1c: COMMIT;
step s2start: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', 'false');
data
BEGIN
table public.do_write: INSERT: id[integer]:3
COMMIT
?column?
stop

View File

@ -0,0 +1,66 @@
Parsed test spec with 3 sessions
starting permutation: s0init s0start s1begin s1sharepgclass s2begin s2sharepgclass s0w s0start s2commit s1commit
step s0init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
?column?
init
step s0start: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', 'false');
data
step s1begin: BEGIN;
step s1sharepgclass: SELECT count(*) > 1 FROM (SELECT * FROM pg_class FOR SHARE) s;
?column?
t
step s2begin: BEGIN;
step s2sharepgclass: SELECT count(*) > 1 FROM (SELECT * FROM pg_class FOR SHARE) s;
?column?
t
step s0w: INSERT INTO do_write DEFAULT VALUES;
step s0start: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', 'false');
data
BEGIN
table public.do_write: INSERT: id[integer]:1
COMMIT
step s2commit: COMMIT;
step s1commit: COMMIT;
?column?
stop
starting permutation: s0init s0start s1begin s1keysharepgclass s2begin s2keysharepgclass s0alter s0w s0start s2commit s1commit
step s0init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
?column?
init
step s0start: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', 'false');
data
step s1begin: BEGIN;
step s1keysharepgclass: SELECT count(*) > 1 FROM (SELECT * FROM pg_class FOR KEY SHARE) s;
?column?
t
step s2begin: BEGIN;
step s2keysharepgclass: SELECT count(*) > 1 FROM (SELECT * FROM pg_class FOR KEY SHARE) s;
?column?
t
step s0alter: ALTER TABLE do_write ADD column ts timestamptz;
step s0w: INSERT INTO do_write DEFAULT VALUES;
step s0start: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', 'false');
data
BEGIN
COMMIT
BEGIN
table public.do_write: INSERT: id[integer]:1 ts[timestamp with time zone]:null
COMMIT
step s2commit: COMMIT;
step s1commit: COMMIT;
?column?
stop

View File

@ -0,0 +1,130 @@
-- predictability
SET synchronous_commit = on;
-- setup
CREATE ROLE lr_normal;
CREATE ROLE lr_superuser SUPERUSER;
CREATE ROLE lr_replication REPLICATION;
CREATE TABLE lr_test(data text);
-- superuser can control replication
SET ROLE lr_superuser;
SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
?column?
----------
init
(1 row)
INSERT INTO lr_test VALUES('lr_superuser_init');
SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
data
--------------------------------------------------------------
BEGIN
table public.lr_test: INSERT: data[text]:'lr_superuser_init'
COMMIT
(3 rows)
SELECT pg_drop_replication_slot('regression_slot');
pg_drop_replication_slot
--------------------------
(1 row)
RESET ROLE;
-- replication user can control replication
SET ROLE lr_replication;
SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
?column?
----------
init
(1 row)
INSERT INTO lr_test VALUES('lr_superuser_init');
ERROR: permission denied for relation lr_test
SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
data
------
(0 rows)
SELECT pg_drop_replication_slot('regression_slot');
pg_drop_replication_slot
--------------------------
(1 row)
RESET ROLE;
-- plain user *can't* can control replication
SET ROLE lr_normal;
SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
ERROR: must be superuser or replication role to use replication slots
INSERT INTO lr_test VALUES('lr_superuser_init');
ERROR: permission denied for relation lr_test
SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
ERROR: must be superuser or replication role to use replication slots
SELECT pg_drop_replication_slot('regression_slot');
ERROR: must be superuser or replication role to use replication slots
RESET ROLE;
-- replication users can drop superuser created slots
SET ROLE lr_superuser;
SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
?column?
----------
init
(1 row)
RESET ROLE;
SET ROLE lr_replication;
SELECT pg_drop_replication_slot('regression_slot');
pg_drop_replication_slot
--------------------------
(1 row)
RESET ROLE;
-- normal users can't drop existing slots
SET ROLE lr_superuser;
SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
?column?
----------
init
(1 row)
RESET ROLE;
SET ROLE lr_normal;
SELECT pg_drop_replication_slot('regression_slot');
ERROR: must be superuser or replication role to use replication slots
RESET ROLE;
-- all users can see existing slots
SET ROLE lr_superuser;
SELECT slot_name, plugin FROM pg_replication_slots;
slot_name | plugin
-----------------+---------------
regression_slot | test_decoding
(1 row)
RESET ROLE;
SET ROLE lr_replication;
SELECT slot_name, plugin FROM pg_replication_slots;
slot_name | plugin
-----------------+---------------
regression_slot | test_decoding
(1 row)
RESET ROLE;
SET ROLE lr_normal;
SELECT slot_name, plugin FROM pg_replication_slots;
slot_name | plugin
-----------------+---------------
regression_slot | test_decoding
(1 row)
RESET ROLE;
-- cleanup
SELECT pg_drop_replication_slot('regression_slot');
pg_drop_replication_slot
--------------------------
(1 row)
DROP ROLE lr_normal;
DROP ROLE lr_superuser;
DROP ROLE lr_replication;
DROP TABLE lr_test;

View File

@ -0,0 +1,107 @@
-- predictability
SET synchronous_commit = on;
DROP TABLE IF EXISTS replication_example;
SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
?column?
----------
init
(1 row)
CREATE TABLE replication_example(id SERIAL PRIMARY KEY, somedata int, text varchar(120));
INSERT INTO replication_example(somedata) VALUES (1);
SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
data
----------------------------------------------------------------------------------------------------------
BEGIN
COMMIT
BEGIN
table public.replication_example: INSERT: id[integer]:1 somedata[integer]:1 text[character varying]:null
COMMIT
(5 rows)
BEGIN;
INSERT INTO replication_example(somedata) VALUES (2);
ALTER TABLE replication_example ADD COLUMN testcolumn1 int;
INSERT INTO replication_example(somedata, testcolumn1) VALUES (3, 1);
COMMIT;
BEGIN;
INSERT INTO replication_example(somedata) VALUES (3);
ALTER TABLE replication_example ADD COLUMN testcolumn2 int;
INSERT INTO replication_example(somedata, testcolumn1, testcolumn2) VALUES (4, 2, 1);
COMMIT;
VACUUM FULL pg_am;
VACUUM FULL pg_amop;
VACUUM FULL pg_proc;
VACUUM FULL pg_opclass;
VACUUM FULL pg_type;
VACUUM FULL pg_index;
VACUUM FULL pg_database;
-- repeated rewrites that fail
BEGIN;
CLUSTER pg_class USING pg_class_oid_index;
CLUSTER pg_class USING pg_class_oid_index;
ROLLBACK;
-- repeated rewrites that succeed
BEGIN;
CLUSTER pg_class USING pg_class_oid_index;
CLUSTER pg_class USING pg_class_oid_index;
CLUSTER pg_class USING pg_class_oid_index;
COMMIT;
-- repeated rewrites in different transactions
VACUUM FULL pg_class;
VACUUM FULL pg_class;
INSERT INTO replication_example(somedata, testcolumn1) VALUES (5, 3);
BEGIN;
INSERT INTO replication_example(somedata, testcolumn1) VALUES (6, 4);
ALTER TABLE replication_example ADD COLUMN testcolumn3 int;
INSERT INTO replication_example(somedata, testcolumn1, testcolumn3) VALUES (7, 5, 1);
COMMIT;
-- make old files go away
CHECKPOINT;
SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
data
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
BEGIN
table public.replication_example: INSERT: id[integer]:2 somedata[integer]:2 text[character varying]:null
table public.replication_example: INSERT: id[integer]:3 somedata[integer]:3 text[character varying]:null testcolumn1[integer]:1
COMMIT
BEGIN
table public.replication_example: INSERT: id[integer]:4 somedata[integer]:3 text[character varying]:null testcolumn1[integer]:null
table public.replication_example: INSERT: id[integer]:5 somedata[integer]:4 text[character varying]:null testcolumn1[integer]:2 testcolumn2[integer]:1
COMMIT
BEGIN
COMMIT
BEGIN
COMMIT
BEGIN
COMMIT
BEGIN
COMMIT
BEGIN
COMMIT
BEGIN
COMMIT
BEGIN
COMMIT
BEGIN
COMMIT
BEGIN
COMMIT
BEGIN
COMMIT
BEGIN
table public.replication_example: INSERT: id[integer]:6 somedata[integer]:5 text[character varying]:null testcolumn1[integer]:3 testcolumn2[integer]:null
COMMIT
BEGIN
table public.replication_example: INSERT: id[integer]:7 somedata[integer]:6 text[character varying]:null testcolumn1[integer]:4 testcolumn2[integer]:null
table public.replication_example: INSERT: id[integer]:8 somedata[integer]:7 text[character varying]:null testcolumn1[integer]:5 testcolumn2[integer]:null testcolumn3[integer]:1
COMMIT
(35 rows)
SELECT pg_drop_replication_slot('regression_slot');
pg_drop_replication_slot
--------------------------
(1 row)
DROP TABLE IF EXISTS replication_example;

View File

@ -0,0 +1,90 @@
-- predictability
SET synchronous_commit = on;
DROP TABLE IF EXISTS xpto;
NOTICE: table "xpto" does not exist, skipping
SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
?column?
----------
init
(1 row)
CREATE SEQUENCE xpto_rand_seq START 79 INCREMENT 1499; -- portable "random"
CREATE TABLE xpto (
id serial primary key,
toasted_col1 text,
rand1 float8 DEFAULT nextval('xpto_rand_seq'),
toasted_col2 text,
rand2 float8 DEFAULT nextval('xpto_rand_seq')
);
-- uncompressed external toast data
INSERT INTO xpto (toasted_col1, toasted_col2) SELECT string_agg(g.i::text, ''), string_agg((g.i*2)::text, '') FROM generate_series(1, 2000) g(i);
-- compressed external toast data
INSERT INTO xpto (toasted_col2) SELECT repeat(string_agg(to_char(g.i, 'FM0000'), ''), 50) FROM generate_series(1, 500) g(i);
-- update of existing column
UPDATE xpto SET toasted_col1 = (SELECT string_agg(g.i::text, '') FROM generate_series(1, 2000) g(i)) WHERE id = 1;
UPDATE xpto SET rand1 = 123.456 WHERE id = 1;
DELETE FROM xpto WHERE id = 1;
DROP TABLE IF EXISTS toasted_key;
NOTICE: table "toasted_key" does not exist, skipping
CREATE TABLE toasted_key (
id serial,
toasted_key text PRIMARY KEY,
toasted_col1 text,
toasted_col2 text
);
ALTER TABLE toasted_key ALTER COLUMN toasted_key SET STORAGE EXTERNAL;
ALTER TABLE toasted_key ALTER COLUMN toasted_col1 SET STORAGE EXTERNAL;
INSERT INTO toasted_key(toasted_key, toasted_col1) VALUES(repeat('1234567890', 200), repeat('9876543210', 200));
-- test update of a toasted key without changing it
UPDATE toasted_key SET toasted_col2 = toasted_col1;
-- test update of a toasted key, changing it
UPDATE toasted_key SET toasted_key = toasted_key || '1';
DELETE FROM toasted_key;
SELECT substr(data, 1, 200) FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
substr
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
BEGIN
COMMIT
BEGIN
COMMIT
BEGIN
table public.xpto: INSERT: id[integer]:1 toasted_col1[text]:'1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
COMMIT
BEGIN
table public.xpto: INSERT: id[integer]:2 toasted_col1[text]:null rand1[double precision]:3077 toasted_col2[text]:'00010002000300040005000600070008000900100011001200130014001500160017001800190020002100
COMMIT
BEGIN
table public.xpto: UPDATE: id[integer]:1 toasted_col1[text]:'1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
COMMIT
BEGIN
table public.xpto: UPDATE: id[integer]:1 toasted_col1[text]:unchanged-toast-datum rand1[double precision]:123.456 toasted_col2[text]:unchanged-toast-datum rand2[double precision]:1578
COMMIT
BEGIN
table public.xpto: DELETE: id[integer]:1
COMMIT
BEGIN
COMMIT
BEGIN
COMMIT
BEGIN
COMMIT
BEGIN
table public.toasted_key: INSERT: id[integer]:1 toasted_key[text]:'1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123
COMMIT
BEGIN
table public.toasted_key: UPDATE: id[integer]:1 toasted_key[text]:unchanged-toast-datum toasted_col1[text]:unchanged-toast-datum toasted_col2[text]:'987654321098765432109876543210987654321098765432109
COMMIT
BEGIN
table public.toasted_key: UPDATE: old-key: toasted_key[text]:'123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678
COMMIT
BEGIN
table public.toasted_key: DELETE: toasted_key[text]:'123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567
COMMIT
(37 rows)
SELECT pg_drop_replication_slot('regression_slot');
pg_drop_replication_slot
--------------------------
(1 row)

View File

@ -0,0 +1,2 @@
wal_level = logical
max_replication_slots = 4

View File

@ -0,0 +1,94 @@
setup
{
DROP TABLE IF EXISTS tbl1;
DROP TABLE IF EXISTS tbl2;
CREATE TABLE tbl1(val1 integer, val2 integer);
CREATE TABLE tbl2(val1 integer, val2 integer);
}
teardown
{
DROP TABLE tbl1;
DROP TABLE tbl2;
SELECT 'stop' FROM pg_drop_replication_slot('isolation_slot');
}
session "s1"
step "s1_init" { SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); }
step "s1_begin" { BEGIN; }
step "s1_insert_tbl1" { INSERT INTO tbl1 (val1, val2) VALUES (1, 1); }
step "s1_insert_tbl1_3col" { INSERT INTO tbl1 (val1, val2, val3) VALUES (1, 1, 1); }
step "s1_insert_tbl2" { INSERT INTO tbl2 (val1, val2) VALUES (1, 1); }
step "s1_insert_tbl2_3col" { INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1); }
step "s1_commit" { COMMIT; }
session "s2"
step "s2_alter_tbl1_float" { ALTER TABLE tbl1 ALTER COLUMN val2 TYPE float; }
step "s2_alter_tbl1_char" { ALTER TABLE tbl1 ALTER COLUMN val2 TYPE character varying; }
step "s2_alter_tbl1_text" { ALTER TABLE tbl1 ALTER COLUMN val2 TYPE text; }
step "s2_alter_tbl1_boolean" { ALTER TABLE tbl1 ALTER COLUMN val2 TYPE boolean; }
step "s2_alter_tbl1_add_int" { ALTER TABLE tbl1 ADD COLUMN val3 INTEGER; }
step "s2_alter_tbl1_add_float" { ALTER TABLE tbl1 ADD COLUMN val3 FLOAT; }
step "s2_alter_tbl1_add_char" { ALTER TABLE tbl1 ADD COLUMN val3 character varying; }
step "s2_alter_tbl1_add_boolean" { ALTER TABLE tbl1 ADD COLUMN val3 BOOLEAN; }
step "s2_alter_tbl1_add_text" { ALTER TABLE tbl1 ADD COLUMN val3 TEXT; }
step "s2_alter_tbl2_float" { ALTER TABLE tbl2 ALTER COLUMN val2 TYPE float; }
step "s2_alter_tbl2_char" { ALTER TABLE tbl2 ALTER COLUMN val2 TYPE character varying; }
step "s2_alter_tbl2_text" { ALTER TABLE tbl2 ALTER COLUMN val2 TYPE text; }
step "s2_alter_tbl2_boolean" { ALTER TABLE tbl2 ALTER COLUMN val2 TYPE boolean; }
step "s2_alter_tbl2_text" { ALTER TABLE tbl2 ALTER COLUMN val2 TYPE boolean; }
step "s2_alter_tbl2_add_int" { ALTER TABLE tbl2 ADD COLUMN val3 INTEGER; }
step "s2_alter_tbl2_add_float" { ALTER TABLE tbl2 ADD COLUMN val3 FLOAT; }
step "s2_alter_tbl2_add_char" { ALTER TABLE tbl2 ADD COLUMN val3 character varying; }
step "s2_alter_tbl2_add_boolean" { ALTER TABLE tbl2 ADD COLUMN val3 BOOLEAN; }
step "s2_alter_tbl2_add_text" { ALTER TABLE tbl2 ADD COLUMN val3 TEXT; }
step "s2_alter_tbl2_drop_3rd_col" { ALTER TABLE tbl2 DROP COLUMN val3; }
step "s2_alter_tbl2_3rd_char" { ALTER TABLE tbl2 ALTER COLUMN val3 TYPE character varying; }
step "s2_alter_tbl2_3rd_text" { ALTER TABLE tbl2 ALTER COLUMN val3 TYPE text; }
step "s2_alter_tbl2_3rd_int" { ALTER TABLE tbl2 ALTER COLUMN val3 TYPE int USING val3::integer; }
step "s2_get_changes" { SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0'); }
permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_float" "s1_insert_tbl2" "s1_commit" "s2_get_changes"
permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl1_float" "s1_insert_tbl2" "s1_commit" "s2_get_changes"
permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_char" "s1_insert_tbl2" "s1_commit" "s2_get_changes"
permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl1_char" "s1_insert_tbl2" "s1_commit" "s2_get_changes"
permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s1_insert_tbl2" "s2_alter_tbl1_float" "s1_commit" "s2_get_changes"
permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s1_insert_tbl2" "s2_alter_tbl1_char" "s1_commit" "s2_get_changes"
permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_float" "s1_insert_tbl2" "s2_alter_tbl1_float" "s1_commit" "s2_get_changes"
permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_char" "s1_insert_tbl2" "s2_alter_tbl1_char" "s1_commit" "s2_get_changes"
permutation "s1_init" "s2_alter_tbl2_char" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_text" "s1_insert_tbl2" "s1_commit" "s2_get_changes"
permutation "s1_init" "s2_alter_tbl2_char" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_text" "s1_insert_tbl2" "s2_alter_tbl1_char" "s1_commit" "s2_get_changes"
permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_boolean" "s1_insert_tbl2" "s1_commit" "s2_get_changes"
permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_boolean" "s1_insert_tbl2" "s2_alter_tbl1_boolean" "s1_commit" "s2_get_changes"
permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_add_int" "s1_insert_tbl2_3col" "s1_commit" "s2_get_changes"
permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s1_insert_tbl2" "s1_commit" "s1_begin" "s2_alter_tbl2_add_int" "s1_insert_tbl2_3col" "s1_commit" "s2_get_changes"
permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_add_float" "s1_insert_tbl2_3col" "s1_commit" "s2_get_changes"
permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s1_insert_tbl2" "s1_commit" "s1_begin" "s2_alter_tbl2_add_float" "s1_insert_tbl2_3col" "s1_commit" "s2_get_changes"
permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_add_char" "s1_insert_tbl2_3col" "s1_commit" "s2_get_changes"
permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s1_insert_tbl2" "s1_commit" "s1_begin" "s2_alter_tbl2_add_char" "s1_insert_tbl2_3col" "s1_commit" "s2_get_changes"
permutation "s1_init" "s2_alter_tbl2_add_int" "s1_begin" "s1_insert_tbl2_3col" "s2_alter_tbl2_drop_3rd_col" "s1_commit" "s2_get_changes"
permutation "s1_init" "s2_alter_tbl2_add_int" "s1_begin" "s1_insert_tbl2_3col" "s2_alter_tbl2_drop_3rd_col" "s1_insert_tbl2" "s1_commit" "s1_insert_tbl2" "s2_get_changes"
permutation "s1_init" "s2_alter_tbl2_add_int" "s1_begin" "s1_insert_tbl2_3col" "s2_alter_tbl2_drop_3rd_col" "s1_commit" "s2_get_changes" "s2_alter_tbl2_add_text" "s1_begin" "s1_insert_tbl2_3col" "s2_alter_tbl2_3rd_char" "s1_insert_tbl2_3col" "s1_commit" "s2_get_changes" "s2_alter_tbl2_3rd_int" "s1_insert_tbl2_3col" "s2_get_changes"
permutation "s1_init" "s2_alter_tbl2_add_char" "s1_begin" "s1_insert_tbl1" "s1_insert_tbl2_3col" "s2_alter_tbl2_3rd_text" "s1_insert_tbl2_3col" "s1_commit" "s1_insert_tbl2_3col" "s2_get_changes"
permutation "s1_init" "s2_alter_tbl2_add_text" "s1_begin" "s1_insert_tbl1" "s1_insert_tbl2_3col" "s2_alter_tbl2_3rd_char" "s1_insert_tbl2_3col" "s1_commit" "s1_insert_tbl2_3col" "s2_get_changes"
permutation "s1_init" "s2_alter_tbl2_add_char" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_3rd_text" "s1_insert_tbl2_3col" "s1_commit" "s2_alter_tbl2_drop_3rd_col" "s1_insert_tbl2" "s2_get_changes"
permutation "s1_init" "s2_alter_tbl2_add_text" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_3rd_char" "s1_insert_tbl2_3col" "s1_commit" "s2_alter_tbl2_drop_3rd_col" "s1_insert_tbl2" "s2_get_changes"
permutation "s1_init" "s2_alter_tbl2_add_char" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_drop_3rd_col" "s1_insert_tbl1" "s1_commit" "s2_get_changes"

View File

@ -0,0 +1,24 @@
setup
{
DROP TABLE IF EXISTS do_write;
CREATE TABLE do_write(id serial primary key);
}
teardown
{
DROP TABLE do_write;
SELECT 'stop' FROM pg_drop_replication_slot('isolation_slot');
}
session "s1"
setup { SET synchronous_commit=on; }
step "s1b" { BEGIN ISOLATION LEVEL SERIALIZABLE; }
step "s1w" { INSERT INTO do_write DEFAULT VALUES; }
step "s1c" { COMMIT; }
session "s2"
setup { SET synchronous_commit=on; }
step "s2init" {SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');}
step "s2start" {SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', 'false');}
permutation "s1b" "s1w" "s2init" "s1c" "s2start" "s1b" "s1w" "s1c" "s2start" "s1b" "s1w" "s2start" "s1c" "s2start"

View File

@ -0,0 +1,38 @@
setup
{
DROP TABLE IF EXISTS do_write;
CREATE TABLE do_write(id serial primary key);
}
teardown
{
DROP TABLE IF EXISTS do_write;
SELECT 'stop' FROM pg_drop_replication_slot('isolation_slot');
}
session "s0"
setup { SET synchronous_commit=on; }
step "s0init" {SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');}
step "s0start" {SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', 'false');}
step "s0alter" {ALTER TABLE do_write ADD column ts timestamptz; }
step "s0w" { INSERT INTO do_write DEFAULT VALUES; }
session "s1"
setup { SET synchronous_commit=on; }
step "s1begin" {BEGIN;}
step "s1sharepgclass" { SELECT count(*) > 1 FROM (SELECT * FROM pg_class FOR SHARE) s; }
step "s1keysharepgclass" { SELECT count(*) > 1 FROM (SELECT * FROM pg_class FOR KEY SHARE) s; }
step "s1commit" {COMMIT;}
session "s2"
setup { SET synchronous_commit=on; }
step "s2begin" {BEGIN;}
step "s2sharepgclass" { SELECT count(*) > 1 FROM (SELECT * FROM pg_class FOR SHARE) s; }
step "s2keysharepgclass" { SELECT count(*) > 1 FROM (SELECT * FROM pg_class FOR KEY SHARE) s; }
step "s2commit" {COMMIT;}
# test that we're handling an update-only mxact xmax correctly
permutation "s0init" "s0start" "s1begin" "s1sharepgclass" "s2begin" "s2sharepgclass" "s0w" "s0start" "s2commit" "s1commit"
# test that we're handling an update-only mxact xmax correctly
permutation "s0init" "s0start" "s1begin" "s1keysharepgclass" "s2begin" "s2keysharepgclass" "s0alter" "s0w" "s0start" "s2commit" "s1commit"

View File

@ -0,0 +1,14 @@
-- predictability
SET synchronous_commit = on;
SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
-- succeeds, textual plugin, textual consumer
SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'force-binary', '0');
-- fails, binary plugin, textual consumer
SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'force-binary', '1');
-- succeeds, textual plugin, binary consumer
SELECT data FROM pg_logical_slot_get_binary_changes('regression_slot', NULL, NULL, 'force-binary', '0');
-- succeeds, binary plugin, binary consumer
SELECT data FROM pg_logical_slot_get_binary_changes('regression_slot', NULL, NULL, 'force-binary', '1');
SELECT 'init' FROM pg_drop_replication_slot('regression_slot');

View File

@ -0,0 +1,337 @@
-- predictability
SET synchronous_commit = on;
SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
-- fail because of an already existing slot
SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
-- fail because of an invalid name
SELECT 'init' FROM pg_create_logical_replication_slot('Invalid Name', 'test_decoding');
-- fail twice because of an invalid parameter values
SELECT 'init' FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', 'frakbar');
SELECT 'init' FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'nonexistant-option', 'frakbar');
SELECT 'init' FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', 'frakbar');
-- succeed once
SELECT pg_drop_replication_slot('regression_slot');
-- fail
SELECT pg_drop_replication_slot('regression_slot');
-- check that we're detecting a streaming rep slot used for logical decoding
SELECT 'init' FROM pg_create_physical_replication_slot('repl');
SELECT data FROM pg_logical_slot_get_changes('repl', NULL, NULL, 'include-xids', '0');
SELECT pg_drop_replication_slot('repl');
SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
/* check whether status function reports us, only reproduceable columns */
SELECT slot_name, plugin, slot_type, active,
NOT catalog_xmin IS NULL AS catalog_xmin_set,
xmin IS NULl AS data_xmin_not_set,
pg_xlog_location_diff(restart_lsn, '0/01000000') > 0 AS some_wal
FROM pg_replication_slots;
/*
* Check that changes are handled correctly when interleaved with ddl
*/
CREATE TABLE replication_example(id SERIAL PRIMARY KEY, somedata int, text varchar(120));
BEGIN;
INSERT INTO replication_example(somedata, text) VALUES (1, 1);
INSERT INTO replication_example(somedata, text) VALUES (1, 2);
COMMIT;
ALTER TABLE replication_example ADD COLUMN bar int;
INSERT INTO replication_example(somedata, text, bar) VALUES (2, 1, 4);
BEGIN;
INSERT INTO replication_example(somedata, text, bar) VALUES (2, 2, 4);
INSERT INTO replication_example(somedata, text, bar) VALUES (2, 3, 4);
INSERT INTO replication_example(somedata, text, bar) VALUES (2, 4, NULL);
COMMIT;
ALTER TABLE replication_example DROP COLUMN bar;
INSERT INTO replication_example(somedata, text) VALUES (3, 1);
BEGIN;
INSERT INTO replication_example(somedata, text) VALUES (3, 2);
INSERT INTO replication_example(somedata, text) VALUES (3, 3);
COMMIT;
ALTER TABLE replication_example RENAME COLUMN text TO somenum;
INSERT INTO replication_example(somedata, somenum) VALUES (4, 1);
-- collect all changes
SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
ALTER TABLE replication_example ALTER COLUMN somenum TYPE int4 USING (somenum::int4);
-- throw away changes, they contain oids
SELECT count(data) FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
INSERT INTO replication_example(somedata, somenum) VALUES (5, 1);
BEGIN;
INSERT INTO replication_example(somedata, somenum) VALUES (6, 1);
ALTER TABLE replication_example ADD COLUMN zaphod1 int;
INSERT INTO replication_example(somedata, somenum, zaphod1) VALUES (6, 2, 1);
ALTER TABLE replication_example ADD COLUMN zaphod2 int;
INSERT INTO replication_example(somedata, somenum, zaphod2) VALUES (6, 3, 1);
INSERT INTO replication_example(somedata, somenum, zaphod1) VALUES (6, 4, 2);
COMMIT;
-- show changes
SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
-- hide changes bc of oid visible in full table rewrites
CREATE TABLE tr_unique(id2 serial unique NOT NULL, data int);
INSERT INTO tr_unique(data) VALUES(10);
ALTER TABLE tr_unique RENAME TO tr_pkey;
ALTER TABLE tr_pkey ADD COLUMN id serial primary key;
SELECT count(data) FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
INSERT INTO tr_pkey(data) VALUES(1);
--show deletion with primary key
DELETE FROM tr_pkey;
/* display results */
SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
/*
* check that disk spooling works
*/
BEGIN;
CREATE TABLE tr_etoomuch (id serial primary key, data int);
INSERT INTO tr_etoomuch(data) SELECT g.i FROM generate_series(1, 10234) g(i);
DELETE FROM tr_etoomuch WHERE id < 5000;
UPDATE tr_etoomuch SET data = - data WHERE id > 5000;
COMMIT;
/* display results, but hide most of the output */
SELECT count(*), min(data), max(data)
FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0')
GROUP BY substring(data, 1, 24)
ORDER BY 1;
/*
* check whether we decode subtransactions correctly in relation with each
* other
*/
CREATE TABLE tr_sub (id serial primary key, path text);
-- toplevel, subtxn, toplevel, subtxn, subtxn
BEGIN;
INSERT INTO tr_sub(path) VALUES ('1-top-#1');
SAVEPOINT a;
INSERT INTO tr_sub(path) VALUES ('1-top-1-#1');
INSERT INTO tr_sub(path) VALUES ('1-top-1-#2');
RELEASE SAVEPOINT a;
SAVEPOINT b;
SAVEPOINT c;
INSERT INTO tr_sub(path) VALUES ('1-top-2-1-#1');
INSERT INTO tr_sub(path) VALUES ('1-top-2-1-#2');
RELEASE SAVEPOINT c;
INSERT INTO tr_sub(path) VALUES ('1-top-2-#1');
RELEASE SAVEPOINT b;
COMMIT;
SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
-- check that we handle xlog assignments correctly
BEGIN;
-- nest 80 subtxns
SAVEPOINT subtop;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
-- assign xid by inserting
INSERT INTO tr_sub(path) VALUES ('2-top-1...--#1');
INSERT INTO tr_sub(path) VALUES ('2-top-1...--#2');
INSERT INTO tr_sub(path) VALUES ('2-top-1...--#3');
RELEASE SAVEPOINT subtop;
INSERT INTO tr_sub(path) VALUES ('2-top-#1');
COMMIT;
SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
-- make sure rollbacked subtransactions aren't decoded
BEGIN;
INSERT INTO tr_sub(path) VALUES ('3-top-2-#1');
SAVEPOINT a;
INSERT INTO tr_sub(path) VALUES ('3-top-2-1-#1');
SAVEPOINT b;
INSERT INTO tr_sub(path) VALUES ('3-top-2-2-#1');
ROLLBACK TO SAVEPOINT b;
INSERT INTO tr_sub(path) VALUES ('3-top-2-#2');
COMMIT;
SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
-- test whether a known, but not yet logged toplevel xact, followed by a
-- subxact commit is handled correctly
BEGIN;
SELECT txid_current() != 0; -- so no fixed xid apears in the outfile
SAVEPOINT a;
INSERT INTO tr_sub(path) VALUES ('4-top-1-#1');
RELEASE SAVEPOINT a;
COMMIT;
-- test whether a change in a subtransaction, in an unknown toplevel
-- xact is handled correctly.
BEGIN;
SAVEPOINT a;
INSERT INTO tr_sub(path) VALUES ('5-top-1-#1');
COMMIT;
SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
/*
* Check whether treating a table as a catalog table works somewhat
*/
CREATE TABLE replication_metadata (
id serial primary key,
relation name NOT NULL,
options text[]
)
WITH (user_catalog_table = true)
;
\d+ replication_metadata
INSERT INTO replication_metadata(relation, options)
VALUES ('foo', ARRAY['a', 'b']);
ALTER TABLE replication_metadata RESET (user_catalog_table);
\d+ replication_metadata
INSERT INTO replication_metadata(relation, options)
VALUES ('bar', ARRAY['a', 'b']);
ALTER TABLE replication_metadata SET (user_catalog_table = true);
\d+ replication_metadata
INSERT INTO replication_metadata(relation, options)
VALUES ('blub', NULL);
-- make sure rewrites don't work
ALTER TABLE replication_metadata ADD COLUMN rewritemeornot int;
ALTER TABLE replication_metadata ALTER COLUMN rewritemeornot TYPE text;
ALTER TABLE replication_metadata SET (user_catalog_table = false);
\d+ replication_metadata
INSERT INTO replication_metadata(relation, options)
VALUES ('zaphod', NULL);
SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
/*
* check whether we handle updates/deletes correct with & without a pkey
*/
/* we should handle the case without a key at all more gracefully */
CREATE TABLE table_without_key(id serial, data int);
INSERT INTO table_without_key(data) VALUES(1),(2);
DELETE FROM table_without_key WHERE data = 1;
-- won't log old keys
UPDATE table_without_key SET data = 3 WHERE data = 2;
UPDATE table_without_key SET id = -id;
UPDATE table_without_key SET id = -id;
-- should log the full old row now
ALTER TABLE table_without_key REPLICA IDENTITY FULL;
UPDATE table_without_key SET data = 3 WHERE data = 2;
UPDATE table_without_key SET id = -id;
UPDATE table_without_key SET id = -id;
DELETE FROM table_without_key WHERE data = 3;
CREATE TABLE table_with_pkey(id serial primary key, data int);
INSERT INTO table_with_pkey(data) VALUES(1), (2);
DELETE FROM table_with_pkey WHERE data = 1;
-- should log the old pkey
UPDATE table_with_pkey SET data = 3 WHERE data = 2;
UPDATE table_with_pkey SET id = -id;
UPDATE table_with_pkey SET id = -id;
-- check that we log nothing despite having a pkey
ALTER TABLE table_without_key REPLICA IDENTITY NOTHING;
UPDATE table_with_pkey SET id = -id;
-- check that we log everything despite having a pkey
ALTER TABLE table_without_key REPLICA IDENTITY FULL;
UPDATE table_with_pkey SET id = -id;
DELETE FROM table_with_pkey WHERE data = 3;
CREATE TABLE table_with_unique_not_null(id serial unique, data int);
ALTER TABLE table_with_unique_not_null ALTER COLUMN id SET NOT NULL; --already set
-- won't log anything, replica identity not setup
INSERT INTO table_with_unique_not_null(data) VALUES(1), (2);
DELETE FROM table_with_unique_not_null WHERE data = 1;
UPDATE table_with_unique_not_null SET data = 3 WHERE data = 2;
UPDATE table_with_unique_not_null SET id = -id;
UPDATE table_with_unique_not_null SET id = -id;
DELETE FROM table_with_unique_not_null WHERE data = 3;
-- should log old key
ALTER TABLE table_with_unique_not_null REPLICA IDENTITY USING INDEX table_with_unique_not_null_id_key;
INSERT INTO table_with_unique_not_null(data) VALUES(1), (2);
DELETE FROM table_with_unique_not_null WHERE data = 1;
UPDATE table_with_unique_not_null SET data = 3 WHERE data = 2;
UPDATE table_with_unique_not_null SET id = -id;
UPDATE table_with_unique_not_null SET id = -id;
DELETE FROM table_with_unique_not_null WHERE data = 3;
-- check toast support
BEGIN;
CREATE SEQUENCE toasttable_rand_seq START 79 INCREMENT 1499; -- portable "random"
CREATE TABLE toasttable(
id serial primary key,
toasted_col1 text,
rand1 float8 DEFAULT nextval('toasttable_rand_seq'),
toasted_col2 text,
rand2 float8 DEFAULT nextval('toasttable_rand_seq')
);
COMMIT;
-- uncompressed external toast data
INSERT INTO toasttable(toasted_col1) SELECT string_agg(g.i::text, '') FROM generate_series(1, 2000) g(i);
-- compressed external toast data
INSERT INTO toasttable(toasted_col2) SELECT repeat(string_agg(to_char(g.i, 'FM0000'), ''), 50) FROM generate_series(1, 500) g(i);
-- update of existing column
UPDATE toasttable
SET toasted_col1 = (SELECT string_agg(g.i::text, '') FROM generate_series(1, 2000) g(i))
WHERE id = 1;
SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
INSERT INTO toasttable(toasted_col1) SELECT string_agg(g.i::text, '') FROM generate_series(1, 2000) g(i);
-- update of second column, first column unchanged
UPDATE toasttable
SET toasted_col2 = (SELECT string_agg(g.i::text, '') FROM generate_series(1, 2000) g(i))
WHERE id = 1;
-- make sure we decode correctly even if the toast table is gone
DROP TABLE toasttable;
SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
-- done, free logical replication slot
SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
SELECT pg_drop_replication_slot('regression_slot');
/* check that we aren't visible anymore now */
SELECT * FROM pg_stat_replication;

View File

@ -0,0 +1,41 @@
-- predictability
SET synchronous_commit = on;
-- fail because we're creating a slot while in an xact with xid
BEGIN;
SELECT txid_current() = 0;
SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
ROLLBACK;
-- fail because we're creating a slot while in an subxact whose topxact has a xid
BEGIN;
SELECT txid_current() = 0;
SAVEPOINT barf;
SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
ROLLBACK TO SAVEPOINT barf;
ROLLBACK;
-- succeed, outside tx.
SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
SELECT 'stop' FROM pg_drop_replication_slot('regression_slot');
-- succeed, in tx without xid.
BEGIN;
SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
COMMIT;
CREATE TABLE nobarf(id serial primary key, data text);
INSERT INTO nobarf(data) VALUES('1');
-- decoding works in transaction with xid
BEGIN;
SELECT txid_current() = 0;
-- don't show yet, haven't committed
INSERT INTO nobarf(data) VALUES('2');
SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
COMMIT;
INSERT INTO nobarf(data) VALUES('3');
SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
SELECT 'stop' FROM pg_drop_replication_slot('regression_slot');

View File

@ -0,0 +1,69 @@
-- predictability
SET synchronous_commit = on;
-- setup
CREATE ROLE lr_normal;
CREATE ROLE lr_superuser SUPERUSER;
CREATE ROLE lr_replication REPLICATION;
CREATE TABLE lr_test(data text);
-- superuser can control replication
SET ROLE lr_superuser;
SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
INSERT INTO lr_test VALUES('lr_superuser_init');
SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
SELECT pg_drop_replication_slot('regression_slot');
RESET ROLE;
-- replication user can control replication
SET ROLE lr_replication;
SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
INSERT INTO lr_test VALUES('lr_superuser_init');
SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
SELECT pg_drop_replication_slot('regression_slot');
RESET ROLE;
-- plain user *can't* can control replication
SET ROLE lr_normal;
SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
INSERT INTO lr_test VALUES('lr_superuser_init');
SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
SELECT pg_drop_replication_slot('regression_slot');
RESET ROLE;
-- replication users can drop superuser created slots
SET ROLE lr_superuser;
SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
RESET ROLE;
SET ROLE lr_replication;
SELECT pg_drop_replication_slot('regression_slot');
RESET ROLE;
-- normal users can't drop existing slots
SET ROLE lr_superuser;
SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
RESET ROLE;
SET ROLE lr_normal;
SELECT pg_drop_replication_slot('regression_slot');
RESET ROLE;
-- all users can see existing slots
SET ROLE lr_superuser;
SELECT slot_name, plugin FROM pg_replication_slots;
RESET ROLE;
SET ROLE lr_replication;
SELECT slot_name, plugin FROM pg_replication_slots;
RESET ROLE;
SET ROLE lr_normal;
SELECT slot_name, plugin FROM pg_replication_slots;
RESET ROLE;
-- cleanup
SELECT pg_drop_replication_slot('regression_slot');
DROP ROLE lr_normal;
DROP ROLE lr_superuser;
DROP ROLE lr_replication;
DROP TABLE lr_test;

View File

@ -0,0 +1,62 @@
-- predictability
SET synchronous_commit = on;
DROP TABLE IF EXISTS replication_example;
SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
CREATE TABLE replication_example(id SERIAL PRIMARY KEY, somedata int, text varchar(120));
INSERT INTO replication_example(somedata) VALUES (1);
SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
BEGIN;
INSERT INTO replication_example(somedata) VALUES (2);
ALTER TABLE replication_example ADD COLUMN testcolumn1 int;
INSERT INTO replication_example(somedata, testcolumn1) VALUES (3, 1);
COMMIT;
BEGIN;
INSERT INTO replication_example(somedata) VALUES (3);
ALTER TABLE replication_example ADD COLUMN testcolumn2 int;
INSERT INTO replication_example(somedata, testcolumn1, testcolumn2) VALUES (4, 2, 1);
COMMIT;
VACUUM FULL pg_am;
VACUUM FULL pg_amop;
VACUUM FULL pg_proc;
VACUUM FULL pg_opclass;
VACUUM FULL pg_type;
VACUUM FULL pg_index;
VACUUM FULL pg_database;
-- repeated rewrites that fail
BEGIN;
CLUSTER pg_class USING pg_class_oid_index;
CLUSTER pg_class USING pg_class_oid_index;
ROLLBACK;
-- repeated rewrites that succeed
BEGIN;
CLUSTER pg_class USING pg_class_oid_index;
CLUSTER pg_class USING pg_class_oid_index;
CLUSTER pg_class USING pg_class_oid_index;
COMMIT;
-- repeated rewrites in different transactions
VACUUM FULL pg_class;
VACUUM FULL pg_class;
INSERT INTO replication_example(somedata, testcolumn1) VALUES (5, 3);
BEGIN;
INSERT INTO replication_example(somedata, testcolumn1) VALUES (6, 4);
ALTER TABLE replication_example ADD COLUMN testcolumn3 int;
INSERT INTO replication_example(somedata, testcolumn1, testcolumn3) VALUES (7, 5, 1);
COMMIT;
-- make old files go away
CHECKPOINT;
SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
SELECT pg_drop_replication_slot('regression_slot');
DROP TABLE IF EXISTS replication_example;

View File

@ -0,0 +1,51 @@
-- predictability
SET synchronous_commit = on;
DROP TABLE IF EXISTS xpto;
SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
CREATE SEQUENCE xpto_rand_seq START 79 INCREMENT 1499; -- portable "random"
CREATE TABLE xpto (
id serial primary key,
toasted_col1 text,
rand1 float8 DEFAULT nextval('xpto_rand_seq'),
toasted_col2 text,
rand2 float8 DEFAULT nextval('xpto_rand_seq')
);
-- uncompressed external toast data
INSERT INTO xpto (toasted_col1, toasted_col2) SELECT string_agg(g.i::text, ''), string_agg((g.i*2)::text, '') FROM generate_series(1, 2000) g(i);
-- compressed external toast data
INSERT INTO xpto (toasted_col2) SELECT repeat(string_agg(to_char(g.i, 'FM0000'), ''), 50) FROM generate_series(1, 500) g(i);
-- update of existing column
UPDATE xpto SET toasted_col1 = (SELECT string_agg(g.i::text, '') FROM generate_series(1, 2000) g(i)) WHERE id = 1;
UPDATE xpto SET rand1 = 123.456 WHERE id = 1;
DELETE FROM xpto WHERE id = 1;
DROP TABLE IF EXISTS toasted_key;
CREATE TABLE toasted_key (
id serial,
toasted_key text PRIMARY KEY,
toasted_col1 text,
toasted_col2 text
);
ALTER TABLE toasted_key ALTER COLUMN toasted_key SET STORAGE EXTERNAL;
ALTER TABLE toasted_key ALTER COLUMN toasted_col1 SET STORAGE EXTERNAL;
INSERT INTO toasted_key(toasted_key, toasted_col1) VALUES(repeat('1234567890', 200), repeat('9876543210', 200));
-- test update of a toasted key without changing it
UPDATE toasted_key SET toasted_col2 = toasted_col1;
-- test update of a toasted key, changing it
UPDATE toasted_key SET toasted_key = toasted_key || '1';
DELETE FROM toasted_key;
SELECT substr(data, 1, 200) FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
SELECT pg_drop_replication_slot('regression_slot');

View File

@ -0,0 +1,404 @@
/*-------------------------------------------------------------------------
*
* test_decoding.c
* example logical decoding output plugin
*
* Copyright (c) 2012-2014, PostgreSQL Global Development Group
*
* IDENTIFICATION
* contrib/test_decoding/test_decoding.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/sysattr.h"
#include "catalog/pg_class.h"
#include "catalog/pg_type.h"
#include "nodes/parsenodes.h"
#include "replication/output_plugin.h"
#include "replication/logical.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/rel.h"
#include "utils/relcache.h"
#include "utils/syscache.h"
#include "utils/typcache.h"
PG_MODULE_MAGIC;
extern void _PG_init(void);
extern void _PG_output_plugin_init(OutputPluginCallbacks *cb);
typedef struct
{
MemoryContext context;
bool include_xids;
bool include_timestamp;
} TestDecodingData;
/* These must be available to pg_dlsym() */
static void pg_decode_startup(LogicalDecodingContext *ctx, OutputPluginOptions *opt,
bool is_init);
static void pg_decode_shutdown(LogicalDecodingContext *ctx);
static void pg_decode_begin_txn(LogicalDecodingContext *ctx,
ReorderBufferTXN *txn);
static void pg_decode_commit_txn(LogicalDecodingContext *ctx,
ReorderBufferTXN *txn, XLogRecPtr commit_lsn);
static void pg_decode_change(LogicalDecodingContext *ctx,
ReorderBufferTXN *txn, Relation rel,
ReorderBufferChange *change);
void
_PG_init(void)
{
/* other plugins can perform things here */
}
/* specify output plugin callbacks */
void
_PG_output_plugin_init(OutputPluginCallbacks *cb)
{
AssertVariableIsOfType(&_PG_output_plugin_init, LogicalOutputPluginInit);
cb->startup_cb = pg_decode_startup;
cb->begin_cb = pg_decode_begin_txn;
cb->change_cb = pg_decode_change;
cb->commit_cb = pg_decode_commit_txn;
cb->shutdown_cb = pg_decode_shutdown;
}
/* initialize this plugin */
static void
pg_decode_startup(LogicalDecodingContext *ctx, OutputPluginOptions *opt,
bool is_init)
{
ListCell *option;
TestDecodingData *data;
data = palloc(sizeof(TestDecodingData));
data->context = AllocSetContextCreate(ctx->context,
"text conversion context",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
data->include_xids = true;
data->include_timestamp = false;
ctx->output_plugin_private = data;
opt->output_type = OUTPUT_PLUGIN_TEXTUAL_OUTPUT;
foreach(option, ctx->output_plugin_options)
{
DefElem *elem = lfirst(option);
Assert(elem->arg == NULL || IsA(elem->arg, String));
if (strcmp(elem->defname, "include-xids") == 0)
{
/* if option does not provide a value, it means its value is true */
if (elem->arg == NULL)
data->include_xids = true;
else if (!parse_bool(strVal(elem->arg), &data->include_xids))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("could not parse value \"%s\" for parameter \"%s\"",
strVal(elem->arg), elem->defname)));
}
else if (strcmp(elem->defname, "include-timestamp") == 0)
{
if (elem->arg == NULL)
data->include_timestamp = true;
else if (!parse_bool(strVal(elem->arg), &data->include_timestamp))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("could not parse value \"%s\" for parameter \"%s\"",
strVal(elem->arg), elem->defname)));
}
else if (strcmp(elem->defname, "force-binary") == 0)
{
bool force_binary;
if (elem->arg == NULL)
continue;
else if (!parse_bool(strVal(elem->arg), &force_binary))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("could not parse value \"%s\" for parameter \"%s\"",
strVal(elem->arg), elem->defname)));
if (force_binary)
opt->output_type = OUTPUT_PLUGIN_BINARY_OUTPUT;
}
else
{
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("option \"%s\" = \"%s\" is unknown",
elem->defname,
elem->arg ? strVal(elem->arg) : "(null)")));
}
}
}
/* cleanup this plugin's resources */
static void
pg_decode_shutdown(LogicalDecodingContext *ctx)
{
TestDecodingData *data = ctx->output_plugin_private;
/* cleanup our own resources via memory context reset */
MemoryContextDelete(data->context);
}
/* BEGIN callback */
static void
pg_decode_begin_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn)
{
TestDecodingData *data = ctx->output_plugin_private;
OutputPluginPrepareWrite(ctx, true);
if (data->include_xids)
appendStringInfo(ctx->out, "BEGIN %u", txn->xid);
else
appendStringInfoString(ctx->out, "BEGIN");
OutputPluginWrite(ctx, true);
}
/* COMMIT callback */
static void
pg_decode_commit_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
XLogRecPtr commit_lsn)
{
TestDecodingData *data = ctx->output_plugin_private;
OutputPluginPrepareWrite(ctx, true);
if (data->include_xids)
appendStringInfo(ctx->out, "COMMIT %u", txn->xid);
else
appendStringInfoString(ctx->out, "COMMIT");
if (data->include_timestamp)
appendStringInfo(ctx->out, " (at %s)",
timestamptz_to_str(txn->commit_time));
OutputPluginWrite(ctx, true);
}
/*
* Print literal `outputstr' already represented as string of type `typid'
* into stringbuf `s'.
*
* Some builtin types aren't quoted, the rest is quoted. Escaping is done as
* if standard_conforming_strings were enabled.
*/
static void
print_literal(StringInfo s, Oid typid, char *outputstr)
{
const char *valptr;
switch (typid)
{
case INT2OID:
case INT4OID:
case INT8OID:
case OIDOID:
case FLOAT4OID:
case FLOAT8OID:
case NUMERICOID:
/* NB: We don't care about Inf, NaN et al. */
appendStringInfoString(s, outputstr);
break;
case BITOID:
case VARBITOID:
appendStringInfo(s, "B'%s'", outputstr);
break;
case BOOLOID:
if (strcmp(outputstr, "t") == 0)
appendStringInfoString(s, "true");
else
appendStringInfoString(s, "false");
break;
default:
appendStringInfoChar(s, '\'');
for (valptr = outputstr; *valptr; valptr++)
{
char ch = *valptr;
if (SQL_STR_DOUBLE(ch, false))
appendStringInfoChar(s, ch);
appendStringInfoChar(s, ch);
}
appendStringInfoChar(s, '\'');
break;
}
}
/* print the tuple 'tuple' into the StringInfo s */
static void
tuple_to_stringinfo(StringInfo s, TupleDesc tupdesc, HeapTuple tuple, bool skip_nulls)
{
int natt;
Oid oid;
/* print oid of tuple, it's not included in the TupleDesc */
if ((oid = HeapTupleHeaderGetOid(tuple->t_data)) != InvalidOid)
{
appendStringInfo(s, " oid[oid]:%u", oid);
}
/* print all columns individually */
for (natt = 0; natt < tupdesc->natts; natt++)
{
Form_pg_attribute attr; /* the attribute itself */
Oid typid; /* type of current attribute */
Oid typoutput; /* output function */
bool typisvarlena;
Datum origval; /* possibly toasted Datum */
bool isnull; /* column is null? */
attr = tupdesc->attrs[natt];
/*
* don't print dropped columns, we can't be sure everything is
* available for them
*/
if (attr->attisdropped)
continue;
/*
* Don't print system columns, oid will already have been printed if
* present.
*/
if (attr->attnum < 0)
continue;
typid = attr->atttypid;
/* get Datum from tuple */
origval = fastgetattr(tuple, natt + 1, tupdesc, &isnull);
if (isnull && skip_nulls)
continue;
/* print attribute name */
appendStringInfoChar(s, ' ');
appendStringInfoString(s, quote_identifier(NameStr(attr->attname)));
/* print attribute type */
appendStringInfoChar(s, '[');
appendStringInfoString(s, format_type_be(typid));
appendStringInfoChar(s, ']');
/* query output function */
getTypeOutputInfo(typid,
&typoutput, &typisvarlena);
/* print separator */
appendStringInfoChar(s, ':');
/* print data */
if (isnull)
appendStringInfoString(s, "null");
else if (typisvarlena && VARATT_IS_EXTERNAL_ONDISK(origval))
appendStringInfoString(s, "unchanged-toast-datum");
else if (!typisvarlena)
print_literal(s, typid,
OidOutputFunctionCall(typoutput, origval));
else
{
Datum val; /* definitely detoasted Datum */
val = PointerGetDatum(PG_DETOAST_DATUM(origval));
print_literal(s, typid, OidOutputFunctionCall(typoutput, val));
}
}
}
/*
* callback for individual changed tuples
*/
static void
pg_decode_change(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
Relation relation, ReorderBufferChange *change)
{
TestDecodingData *data;
Form_pg_class class_form;
TupleDesc tupdesc;
MemoryContext old;
data = ctx->output_plugin_private;
class_form = RelationGetForm(relation);
tupdesc = RelationGetDescr(relation);
/* Avoid leaking memory by using and resetting our own context */
old = MemoryContextSwitchTo(data->context);
OutputPluginPrepareWrite(ctx, true);
appendStringInfoString(ctx->out, "table ");
appendStringInfoString(ctx->out,
quote_qualified_identifier(
get_namespace_name(
get_rel_namespace(RelationGetRelid(relation))),
NameStr(class_form->relname)));
appendStringInfoString(ctx->out, ":");
switch (change->action)
{
case REORDER_BUFFER_CHANGE_INSERT:
appendStringInfoString(ctx->out, " INSERT:");
if (change->tp.newtuple == NULL)
appendStringInfoString(ctx->out, " (no-tuple-data)");
else
tuple_to_stringinfo(ctx->out, tupdesc,
&change->tp.newtuple->tuple,
false);
break;
case REORDER_BUFFER_CHANGE_UPDATE:
appendStringInfoString(ctx->out, " UPDATE:");
if (change->tp.oldtuple != NULL)
{
appendStringInfoString(ctx->out, " old-key:");
tuple_to_stringinfo(ctx->out, tupdesc,
&change->tp.oldtuple->tuple,
true);
appendStringInfoString(ctx->out, " new-tuple:");
}
if (change->tp.newtuple == NULL)
appendStringInfoString(ctx->out, " (no-tuple-data)");
else
tuple_to_stringinfo(ctx->out, tupdesc,
&change->tp.newtuple->tuple,
false);
break;
case REORDER_BUFFER_CHANGE_DELETE:
appendStringInfoString(ctx->out, " DELETE:");
/* if there was no PK, we only know that a delete happened */
if (change->tp.oldtuple == NULL)
appendStringInfoString(ctx->out, " (no-tuple-data)");
/* In DELETE, only the replica identity is present; display that */
else
tuple_to_stringinfo(ctx->out, tupdesc,
&change->tp.oldtuple->tuple,
true);
break;
}
MemoryContextSwitchTo(old);
MemoryContextReset(data->context);
OutputPluginWrite(ctx, true);
}

View File

@ -140,6 +140,7 @@ CREATE EXTENSION <replaceable>module_name</> FROM unpackaged;
&sslinfo;
&tablefunc;
&tcn;
&test-decoding;
&test-parser;
&test-shm-mq;
&tsearch2;

View File

@ -143,6 +143,7 @@
<!ENTITY sslinfo SYSTEM "sslinfo.sgml">
<!ENTITY tablefunc SYSTEM "tablefunc.sgml">
<!ENTITY tcn SYSTEM "tcn.sgml">
<!ENTITY test-decoding SYSTEM "test-decoding.sgml">
<!ENTITY test-parser SYSTEM "test-parser.sgml">
<!ENTITY test-shm-mq SYSTEM "test-shm-mq.sgml">
<!ENTITY tsearch2 SYSTEM "tsearch2.sgml">

View File

@ -0,0 +1,42 @@
<!-- doc/src/sgml/test-decoding.sgml -->
<sect1 id="test-decoding" xreflabel="test_decoding">
<title>test_decoding</title>
<indexterm zone="test-decoding">
<primary>test_decoding</primary>
</indexterm>
<para>
<filename>test_decoding</> is an example of a logical decoding
output plugin. It doesn't do anything especially useful, but can serve as
a starting point for developing your own decoder.
</para>
<para>
<filename>test_decoding</> receives WAL through the logical decoding
mechanism and decodes it into text representations of the operations
performed.
</para>
<para>
Typical output from this plugin, used over the SQL logical decoding
interface, might be:
<programlisting>
postgres=# SELECT * FROM pg_logical_slot_get_changes('test_slot', 'now', 'include-xids', '0');
location | xid | data
-----------+-----+--------------------------------------------------
0/16D30F8 | 691 | BEGIN
0/16D32A0 | 691 | table public.data: INSERT: id[int4]:2 data[text]:'arg'
0/16D32A0 | 691 | table public.data: INSERT: id[int4]:3 data[text]:'demo'
0/16D32A0 | 691 | COMMIT
0/16D32D8 | 692 | BEGIN
0/16D3398 | 692 | table public.data: DELETE: id[int4]:2
0/16D3398 | 692 | table public.data: DELETE: id[int4]:3
0/16D3398 | 692 | COMMIT
(8 rows)
</programlisting>
</para>
</sect1>

View File

@ -468,6 +468,8 @@ pg_regress_installcheck = $(top_builddir)/src/test/regress/pg_regress --inputdir
pg_regress_clean_files = results/ regression.diffs regression.out tmp_check/ log/
pg_isolation_regress_check = $(top_builddir)/src/test/isolation/pg_isolation_regress --inputdir=$(srcdir) --temp-install=./tmp_check --top-builddir=$(top_builddir) $(pg_regress_locale_flags)
pg_isolation_regress_installcheck = $(top_builddir)/src/test/isolation/pg_isolation_regress --inputdir=$(srcdir) --top-builddir=$(top_builddir) $(pg_regress_locale_flags)
##########################################################################
#

View File

@ -347,8 +347,7 @@ heapgetpage(HeapScanDesc scan, BlockNumber page)
/*
* Prune and repair fragmentation for the whole page, if possible.
*/
Assert(TransactionIdIsValid(RecentGlobalXmin));
heap_page_prune_opt(scan->rs_rd, buffer, RecentGlobalXmin);
heap_page_prune_opt(scan->rs_rd, buffer);
/*
* We must hold share lock on the buffer content while examining tuple
@ -1750,10 +1749,22 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
*/
if (!skip)
{
/*
* For the benefit of logical decoding, have t_self point at the
* element of the HOT chain we're currently investigating instead
* of the root tuple of the HOT chain. This is important because
* the *Satisfies routine for historical mvcc snapshots needs the
* correct tid to decide about the visibility in some cases.
*/
ItemPointerSet(&(heapTuple->t_self), BufferGetBlockNumber(buffer), offnum);
/* If it's visible per the snapshot, we must return it */
valid = HeapTupleSatisfiesVisibility(heapTuple, snapshot, buffer);
CheckForSerializableConflictOut(valid, relation, heapTuple,
buffer, snapshot);
/* reset to original, non-redirected, tid */
heapTuple->t_self = *tid;
if (valid)
{
ItemPointerSetOffsetNumber(tid, offnum);
@ -8207,6 +8218,9 @@ heap2_redo(XLogRecPtr lsn, XLogRecord *record)
* decoding.
*/
break;
case XLOG_HEAP2_REWRITE:
heap_xlog_logical_rewrite(lsn, record);
break;
default:
elog(PANIC, "heap2_redo: unknown op code %u", info);
}

View File

@ -18,13 +18,14 @@
#include "access/heapam_xlog.h"
#include "access/transam.h"
#include "access/htup_details.h"
#include "catalog/catalog.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "storage/bufmgr.h"
#include "utils/snapmgr.h"
#include "utils/rel.h"
#include "utils/tqual.h"
/* Working data for heap_page_prune and subroutines */
typedef struct
{
@ -70,10 +71,34 @@ static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum);
* or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
*/
void
heap_page_prune_opt(Relation relation, Buffer buffer, TransactionId OldestXmin)
heap_page_prune_opt(Relation relation, Buffer buffer)
{
Page page = BufferGetPage(buffer);
Size minfree;
TransactionId OldestXmin;
/*
* We can't write WAL in recovery mode, so there's no point trying to
* clean the page. The master will likely issue a cleaning WAL record soon
* anyway, so this is no particular loss.
*/
if (RecoveryInProgress())
return;
/*
* Use the appropriate xmin horizon for this relation. If it's a proper
* catalog relation or a user defined, additional, catalog relation, we
* need to use the horizon that includes slots, otherwise the data-only
* horizon can be used. Note that the toast relation of user defined
* relations are *not* considered catalog relations.
*/
if (IsCatalogRelation(relation) ||
RelationIsAccessibleInLogicalDecoding(relation))
OldestXmin = RecentGlobalXmin;
else
OldestXmin = RecentGlobalDataXmin;
Assert(TransactionIdIsValid(OldestXmin));
/*
* Let's see if we really need pruning.
@ -84,14 +109,6 @@ heap_page_prune_opt(Relation relation, Buffer buffer, TransactionId OldestXmin)
if (!PageIsPrunable(page, OldestXmin))
return;
/*
* We can't write WAL in recovery mode, so there's no point trying to
* clean the page. The master will likely issue a cleaning WAL record soon
* anyway, so this is no particular loss.
*/
if (RecoveryInProgress())
return;
/*
* We prune when a previous UPDATE failed to find enough space on the page
* for a new tuple version, or when free space falls below the relation's

View File

@ -102,17 +102,34 @@
*/
#include "postgres.h"
#include <sys/stat.h>
#include <unistd.h>
#include "miscadmin.h"
#include "access/heapam.h"
#include "access/heapam_xlog.h"
#include "access/rewriteheap.h"
#include "access/transam.h"
#include "access/tuptoaster.h"
#include "access/xact.h"
#include "catalog/catalog.h"
#include "lib/ilist.h"
#include "replication/logical.h"
#include "replication/slot.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/smgr.h"
#include "utils/memutils.h"
#include "utils/rel.h"
#include "utils/tqual.h"
#include "storage/procarray.h"
/*
* State associated with a rewrite operation. This is opaque to the user
@ -120,21 +137,28 @@
*/
typedef struct RewriteStateData
{
Relation rs_old_rel; /* source heap */
Relation rs_new_rel; /* destination heap */
Page rs_buffer; /* page currently being built */
BlockNumber rs_blockno; /* block where page will go */
bool rs_buffer_valid; /* T if any tuples in buffer */
bool rs_use_wal; /* must we WAL-log inserts? */
bool rs_logical_rewrite; /* do we need to do logical rewriting */
TransactionId rs_oldest_xmin; /* oldest xmin used by caller to
* determine tuple visibility */
TransactionId rs_freeze_xid;/* Xid that will be used as freeze cutoff
* point */
TransactionId rs_logical_xmin; /* Xid that will be used as cutoff
* point for logical rewrites */
MultiXactId rs_cutoff_multi;/* MultiXactId that will be used as cutoff
* point for multixacts */
MemoryContext rs_cxt; /* for hash tables and entries and tuples in
* them */
XLogRecPtr rs_begin_lsn; /* XLogInsertLsn when starting the rewrite */
HTAB *rs_unresolved_tups; /* unmatched A tuples */
HTAB *rs_old_new_tid_map; /* unmatched B tuples */
HTAB *rs_logical_mappings; /* logical remapping files */
uint32 rs_num_rewrite_mappings; /* # in memory mappings */
} RewriteStateData;
/*
@ -169,14 +193,45 @@ typedef struct
typedef OldToNewMappingData *OldToNewMapping;
/*
* In-Memory data for a xid that might need logical remapping entries
* to be logged.
*/
typedef struct RewriteMappingFile
{
TransactionId xid; /* xid that might need to see the row */
int vfd; /* fd of mappings file */
off_t off; /* how far have we written yet */
uint32 num_mappings; /* number of in-memory mappings */
dlist_head mappings; /* list of in-memory mappings */
char path[MAXPGPATH]; /* path, for error messages */
} RewriteMappingFile;
/*
* A single In-Memeory logical rewrite mapping, hanging of
* RewriteMappingFile->mappings.
*/
typedef struct RewriteMappingDataEntry
{
LogicalRewriteMappingData map; /* map between old and new location of
* the tuple */
dlist_node node;
} RewriteMappingDataEntry;
/* prototypes for internal functions */
static void raw_heap_insert(RewriteState state, HeapTuple tup);
/* internal logical remapping prototypes */
static void logical_begin_heap_rewrite(RewriteState state);
static void logical_rewrite_heap_tuple(RewriteState state, ItemPointerData old_tid, HeapTuple new_tuple);
static void logical_end_heap_rewrite(RewriteState state);
/*
* Begin a rewrite of a table
*
* old_heap old, locked heap relation tuples will be read from
* new_heap new, locked heap relation to insert tuples to
* oldest_xmin xid used by the caller to determine which tuples are dead
* freeze_xid xid before which tuples will be frozen
@ -187,7 +242,7 @@ static void raw_heap_insert(RewriteState state, HeapTuple tup);
* to be used in subsequent calls to the other functions.
*/
RewriteState
begin_heap_rewrite(Relation new_heap, TransactionId oldest_xmin,
begin_heap_rewrite(Relation old_heap, Relation new_heap, TransactionId oldest_xmin,
TransactionId freeze_xid, MultiXactId cutoff_multi,
bool use_wal)
{
@ -210,6 +265,7 @@ begin_heap_rewrite(Relation new_heap, TransactionId oldest_xmin,
/* Create and fill in the state struct */
state = palloc0(sizeof(RewriteStateData));
state->rs_old_rel = old_heap;
state->rs_new_rel = new_heap;
state->rs_buffer = (Page) palloc(BLCKSZ);
/* new_heap needn't be empty, just locked */
@ -244,6 +300,8 @@ begin_heap_rewrite(Relation new_heap, TransactionId oldest_xmin,
MemoryContextSwitchTo(old_cxt);
logical_begin_heap_rewrite(state);
return state;
}
@ -301,6 +359,8 @@ end_heap_rewrite(RewriteState state)
if (RelationNeedsWAL(state->rs_new_rel))
heap_sync(state->rs_new_rel);
logical_end_heap_rewrite(state);
/* Deleting the context frees everything */
MemoryContextDelete(state->rs_cxt);
}
@ -429,6 +489,8 @@ rewrite_heap_tuple(RewriteState state,
raw_heap_insert(state, new_tuple);
new_tid = new_tuple->t_self;
logical_rewrite_heap_tuple(state, old_tid, new_tuple);
/*
* If the tuple is the updated version of a row, and the prior version
* wouldn't be DEAD yet, then we need to either resolve the prior
@ -678,3 +740,545 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
if (heaptup != tup)
heap_freetuple(heaptup);
}
/* ------------------------------------------------------------------------
* Logical rewrite support
*
* When doing logical decoding - which relies on using cmin/cmax of catalog
* tuples, via xl_heap_new_cid records - heap rewrites have to log enough
* information to allow the decoding backend to updates its internal mapping
* of (relfilenode,ctid) => (cmin, cmax) to be correct for the rewritten heap.
*
* For that, every time we find a tuple that's been modified in a catalog
* relation within the xmin horizon of any decoding slot, we log a mapping
* from the old to the new location.
*
* To deal with rewrites that abort the filename of a mapping file contains
* the xid of the transaction performing the rewrite, which then can be
* checked before being read in.
*
* For efficiency we don't immediately spill every single map mapping for a
* row to disk but only do so in batches when we've collected several of them
* in memory or when end_heap_rewrite() has been called.
*
* Crash-Safety: This module diverts from the usual patterns of doing WAL
* since it cannot rely on checkpoint flushing out all buffers and thus
* waiting for exlusive locks on buffers. Usually the XLogInsert() covering
* buffer modifications is performed while the buffer(s) that are being
* modified are exlusively locked guaranteeing that both the WAL record and
* the modified heap are on either side of the checkpoint. But since the
* mapping files we log aren't in shared_buffers that interlock doesn't work.
*
* Instead we simply write the mapping files out to disk, *before* the
* XLogInsert() is performed. That guarantees that either the XLogInsert() is
* inserted after the checkpoint's redo pointer or that the checkpoint (via
* LogicalRewriteHeapCheckpoint()) has flushed the (partial) mapping file to
* disk. That leaves the tail end that has not yet been flushed open to
* corruption, which is solved by including the current offset in the
* xl_heap_rewrite_mapping records and truncating the mapping file to it
* during replay. Every time a rewrite is finished all generated mapping files
* are synced to disk.
*
* Note that if we were only concerned about crash safety we wouldn't have to
* deal with WAL logging at all - an fsync() at the end of a rewrite would be
* sufficient for crash safety. Any mapping that hasn't been safely flushed to
* disk has to be by an aborted (explicitly or via a crash) transaction and is
* ignored by virtue of the xid in it's name being subject to a
* TransactionDidCommit() check. But we want to support having standbys via
* physical replication, both for availability and to to do logical decoding
* there.
* ------------------------------------------------------------------------
*/
/*
* Do preparations for logging logical mappings during a rewrite if
* necessary. If we detect that we don't need to log anything we'll prevent
* any further action by the various logical rewrite functions.
*/
static void
logical_begin_heap_rewrite(RewriteState state)
{
HASHCTL hash_ctl;
TransactionId logical_xmin;
/*
* We only need to persist these mappings if the rewritten table can be
* accessed during logical decoding, if not, we can skip doing any
* additional work.
*/
state->rs_logical_rewrite =
RelationIsAccessibleInLogicalDecoding(state->rs_old_rel);
if (!state->rs_logical_rewrite)
return;
Assert(ReplicationSlotCtl != NULL);
ProcArrayGetReplicationSlotXmin(NULL, &logical_xmin);
/*
* If there are no logical slots in progress we don't need to do anything,
* there cannot be any remappings for relevant rows yet. The relation's
* lock protects us against races.
*/
if (logical_xmin == InvalidTransactionId)
{
state->rs_logical_rewrite = false;
return;
}
state->rs_logical_xmin = logical_xmin;
state->rs_begin_lsn = GetXLogInsertRecPtr();
state->rs_num_rewrite_mappings = 0;
memset(&hash_ctl, 0, sizeof(hash_ctl));
hash_ctl.keysize = sizeof(TransactionId);
hash_ctl.entrysize = sizeof(RewriteMappingFile);
hash_ctl.hcxt = state->rs_cxt;
hash_ctl.hash = tag_hash;
state->rs_logical_mappings =
hash_create("Logical rewrite mapping",
128, /* arbitrary initial size */
&hash_ctl,
HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
}
/*
* Flush all logical in-memory mappings to disk, but don't fsync them yet.
*/
static void
logical_heap_rewrite_flush_mappings(RewriteState state)
{
HASH_SEQ_STATUS seq_status;
RewriteMappingFile *src;
dlist_mutable_iter iter;
Assert(state->rs_logical_rewrite);
/* no logical rewrite in progress, no need to iterate over mappings */
if (state->rs_num_rewrite_mappings == 0)
return;
elog(DEBUG1, "flushing %u logical rewrite mapping entries",
state->rs_num_rewrite_mappings);
hash_seq_init(&seq_status, state->rs_logical_mappings);
while ((src = (RewriteMappingFile *) hash_seq_search(&seq_status)) != NULL)
{
XLogRecData rdata[2];
char *waldata;
char *waldata_start;
xl_heap_rewrite_mapping xlrec;
Oid dboid;
uint32 len;
int written;
/* this file hasn't got any new mappings */
if (src->num_mappings == 0)
continue;
if (state->rs_old_rel->rd_rel->relisshared)
dboid = InvalidOid;
else
dboid = MyDatabaseId;
xlrec.num_mappings = src->num_mappings;
xlrec.mapped_rel = RelationGetRelid(state->rs_old_rel);
xlrec.mapped_xid = src->xid;
xlrec.mapped_db = dboid;
xlrec.offset = src->off;
xlrec.start_lsn = state->rs_begin_lsn;
rdata[0].data = (char *) (&xlrec);
rdata[0].len = sizeof(xlrec);
rdata[0].buffer = InvalidBuffer;
rdata[0].next = &(rdata[1]);
/* write all mappings consecutively */
len = src->num_mappings * sizeof(LogicalRewriteMappingData);
waldata = palloc(len);
waldata_start = waldata;
/*
* collect data we need to write out, but don't modify ondisk data yet
*/
dlist_foreach_modify(iter, &src->mappings)
{
RewriteMappingDataEntry *pmap;
pmap = dlist_container(RewriteMappingDataEntry, node, iter.cur);
memcpy(waldata, &pmap->map, sizeof(pmap->map));
waldata += sizeof(pmap->map);
/* remove from the list and free */
dlist_delete(&pmap->node);
pfree(pmap);
/* update bookkeeping */
state->rs_num_rewrite_mappings--;
src->num_mappings--;
}
/*
* Note that we deviate from the usual WAL coding practices here,
* check the above "Logical rewrite support" comment for reasoning.
*/
written = FileWrite(src->vfd, waldata_start, len);
if (written != len)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not write to file \"%s\", wrote %d of %d: %m", src->path,
written, len)));
src->off += len;
Assert(src->num_mappings == 0);
rdata[1].data = waldata_start;
rdata[1].len = len;
rdata[1].buffer = InvalidBuffer;
rdata[1].next = NULL;
/* write xlog record */
XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_REWRITE, rdata);
}
Assert(state->rs_num_rewrite_mappings == 0);
}
/*
* Logical remapping part of end_heap_rewrite().
*/
static void
logical_end_heap_rewrite(RewriteState state)
{
HASH_SEQ_STATUS seq_status;
RewriteMappingFile *src;
/* done, no logical rewrite in progress */
if (!state->rs_logical_rewrite)
return;
/* writeout remaining in-memory entries */
if (state->rs_num_rewrite_mappings > 0 )
logical_heap_rewrite_flush_mappings(state);
/* Iterate over all mappings we have written and fsync the files. */
hash_seq_init(&seq_status, state->rs_logical_mappings);
while ((src = (RewriteMappingFile *) hash_seq_search(&seq_status)) != NULL)
{
if(FileSync(src->vfd) != 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m", src->path)));
FileClose(src->vfd);
}
/* memory context cleanup will deal with the rest */
}
/*
* Log a single (old->new) mapping for 'xid'.
*/
static void
logical_rewrite_log_mapping(RewriteState state, TransactionId xid,
LogicalRewriteMappingData *map)
{
RewriteMappingFile *src;
RewriteMappingDataEntry *pmap;
Oid relid;
bool found;
relid = RelationGetRelid(state->rs_old_rel);
/* look for existing mappings for this 'mapped' xid */
src = hash_search(state->rs_logical_mappings, &xid,
HASH_ENTER, &found);
/*
* We haven't yet had the need to map anything for this xid, create
* per-xid data structures.
*/
if (!found)
{
char path[MAXPGPATH];
Oid dboid;
if (state->rs_old_rel->rd_rel->relisshared)
dboid = InvalidOid;
else
dboid = MyDatabaseId;
snprintf(path, MAXPGPATH,
"pg_llog/mappings/" LOGICAL_REWRITE_FORMAT,
dboid, relid,
(uint32) (state->rs_begin_lsn >> 32),
(uint32) state->rs_begin_lsn,
xid, GetCurrentTransactionId());
dlist_init(&src->mappings);
src->num_mappings = 0;
src->off = 0;
memcpy(src->path, path, sizeof(path));
src->vfd = PathNameOpenFile(path,
O_CREAT | O_EXCL | O_WRONLY | PG_BINARY,
S_IRUSR | S_IWUSR);
if (src->vfd < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not create file \"%s\": %m", path)));
}
pmap = MemoryContextAlloc(state->rs_cxt,
sizeof(RewriteMappingDataEntry));
memcpy(&pmap->map, map, sizeof(LogicalRewriteMappingData));
dlist_push_tail(&src->mappings, &pmap->node);
src->num_mappings++;
state->rs_num_rewrite_mappings++;
/*
* Write out buffer every time we've too many in-memory entries across all
* mapping files.
*/
if (state->rs_num_rewrite_mappings >= 1000 /* arbitrary number */)
logical_heap_rewrite_flush_mappings(state);
}
/*
* Perform logical remapping for a tuple that's mapped from old_tid to
* new_tuple->t_self by rewrite_heap_tuple() iff necessary for the tuple.
*/
static void
logical_rewrite_heap_tuple(RewriteState state, ItemPointerData old_tid,
HeapTuple new_tuple)
{
ItemPointerData new_tid = new_tuple->t_self;
TransactionId cutoff = state->rs_logical_xmin;
TransactionId xmin;
TransactionId xmax;
bool do_log_xmin = false;
bool do_log_xmax = false;
LogicalRewriteMappingData map;
/* no logical rewrite in progress, we don't need to log anything */
if (!state->rs_logical_rewrite)
return;
xmin = HeapTupleHeaderGetXmin(new_tuple->t_data);
/* use *GetUpdateXid to correctly deal with multixacts */
xmax = HeapTupleHeaderGetUpdateXid(new_tuple->t_data);
/*
* Log the mapping iff the tuple has been created recently.
*/
if (TransactionIdIsNormal(xmin) && !TransactionIdPrecedes(xmin, cutoff))
do_log_xmin = true;
if (!TransactionIdIsNormal(xmax))
{
/*
* no xmax is set, can't have any permanent ones, so this check is
* sufficient
*/
}
else if (HEAP_XMAX_IS_LOCKED_ONLY(new_tuple->t_data->t_infomask))
{
/* only locked, we don't care */
}
else if (!TransactionIdPrecedes(xmax, cutoff))
{
/* tuple has been deleted recently, log */
do_log_xmax = true;
}
/* if neither needs to be logged, we're done */
if (!do_log_xmin && !do_log_xmax)
return;
/* fill out mapping information */
map.old_node = state->rs_old_rel->rd_node;
map.old_tid = old_tid;
map.new_node = state->rs_new_rel->rd_node;
map.new_tid = new_tid;
/* ---
* Now persist the mapping for the individual xids that are affected. We
* need to log for both xmin and xmax if they aren't the same transaction
* since the mapping files are per "affected" xid.
* We don't muster all that much effort detecting whether xmin and xmax
* are actually the same transaction, we just check whether the xid is the
* same disregarding subtransactions. Logging too much is relatively
* harmless and we could never do the check fully since subtransaction
* data is thrown away during restarts.
* ---
*/
if (do_log_xmin)
logical_rewrite_log_mapping(state, xmin, &map);
/* separately log mapping for xmax unless it'd be redundant */
if (do_log_xmax && !TransactionIdEquals(xmin, xmax))
logical_rewrite_log_mapping(state, xmax, &map);
}
/*
* Replay XLOG_HEAP2_REWRITE records
*/
void
heap_xlog_logical_rewrite(XLogRecPtr lsn, XLogRecord *r)
{
char path[MAXPGPATH];
int fd;
xl_heap_rewrite_mapping *xlrec;
uint32 len;
char *data;
xlrec = (xl_heap_rewrite_mapping *) XLogRecGetData(r);
snprintf(path, MAXPGPATH,
"pg_llog/mappings/" LOGICAL_REWRITE_FORMAT,
xlrec->mapped_db, xlrec->mapped_rel,
(uint32) (xlrec->start_lsn >> 32),
(uint32) xlrec->start_lsn,
xlrec->mapped_xid, r->xl_xid);
fd = OpenTransientFile(path,
O_CREAT | O_WRONLY | PG_BINARY,
S_IRUSR | S_IWUSR);
if (fd < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not create file \"%s\": %m", path)));
/*
* Truncate all data that's not guaranteed to have been safely fsynced (by
* previous record or by the last checkpoint).
*/
if (ftruncate(fd, xlrec->offset) != 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not truncate file \"%s\" to %u: %m",
path, (uint32) xlrec->offset)));
/* now seek to the position we want to write our data to */
if (lseek(fd, xlrec->offset, SEEK_SET) != xlrec->offset)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not seek to the end of file \"%s\": %m",
path)));
data = XLogRecGetData(r) + sizeof(*xlrec);
len = xlrec->num_mappings * sizeof(LogicalRewriteMappingData);
/* write out tail end of mapping file (again) */
if (write(fd, data, len) != len)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m", path)));
/*
* Now fsync all previously written data. We could improve things and only
* do this for the last write to a file, but the required bookkeeping
* doesn't seem worth the trouble.
*/
if (pg_fsync(fd) != 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m", path)));
CloseTransientFile(fd);
}
/* ---
* Perform a checkpoint for logical rewrite mappings
*
* This serves two tasks:
* 1) Remove all mappings not needed anymore based on the logical restart LSN
* 2) Flush all remaining mappings to disk, so that replay after a checkpoint
* only has to deal with the parts of a mapping that have been written out
* after the checkpoint started.
* ---
*/
void
CheckPointLogicalRewriteHeap(void)
{
XLogRecPtr cutoff;
XLogRecPtr redo;
DIR *mappings_dir;
struct dirent *mapping_de;
char path[MAXPGPATH];
/*
* We start of with a minimum of the last redo pointer. No new decoding
* slot will start before that, so that's a safe upper bound for removal.
*/
redo = GetRedoRecPtr();
/* now check for the restart ptrs from existing slots */
cutoff = ReplicationSlotsComputeLogicalRestartLSN();
/* don't start earlier than the restart lsn */
if (cutoff != InvalidXLogRecPtr && redo < cutoff)
cutoff = redo;
mappings_dir = AllocateDir("pg_llog/mappings");
while ((mapping_de = ReadDir(mappings_dir, "pg_llog/mappings")) != NULL)
{
struct stat statbuf;
Oid dboid;
Oid relid;
XLogRecPtr lsn;
TransactionId rewrite_xid;
TransactionId create_xid;
uint32 hi, lo;
if (strcmp(mapping_de->d_name, ".") == 0 ||
strcmp(mapping_de->d_name, "..") == 0)
continue;
snprintf(path, MAXPGPATH, "pg_llog/mappings/%s", mapping_de->d_name);
if (lstat(path, &statbuf) == 0 && !S_ISREG(statbuf.st_mode))
continue;
/* Skip over files that cannot be ours. */
if (strncmp(mapping_de->d_name, "map-", 4) != 0)
continue;
if (sscanf(mapping_de->d_name, LOGICAL_REWRITE_FORMAT,
&dboid, &relid, &hi, &lo, &rewrite_xid, &create_xid) != 6)
elog(ERROR,"could not parse filename \"%s\"", mapping_de->d_name);
lsn = ((uint64) hi) << 32 | lo;
if (lsn < cutoff || cutoff == InvalidXLogRecPtr)
{
elog(DEBUG1, "removing logical rewrite file \"%s\"", path);
if (unlink(path) < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not unlink file \"%s\": %m", path)));
}
else
{
int fd = OpenTransientFile(path, O_RDONLY | PG_BINARY, 0);
/*
* The file cannot vanish due to concurrency since this function
* is the only one removing logical mappings and it's run while
* CheckpointLock is held exclusively.
*/
if (fd < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not open file \"%s\": %m", path)));
/*
* We could try to avoid fsyncing files that either haven't
* changed or have only been created since the checkpoint's start,
* but it's currently not deemed worth the effort.
*/
else if (pg_fsync(fd) != 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m", path)));
CloseTransientFile(fd);
}
}
FreeDir(mappings_dir);
}

View File

@ -44,32 +44,6 @@
#undef TOAST_DEBUG
/*
* Testing whether an externally-stored value is compressed now requires
* comparing extsize (the actual length of the external data) to rawsize
* (the original uncompressed datum's size). The latter includes VARHDRSZ
* overhead, the former doesn't. We never use compression unless it actually
* saves space, so we expect either equality or less-than.
*/
#define VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) \
((toast_pointer).va_extsize < (toast_pointer).va_rawsize - VARHDRSZ)
/*
* Macro to fetch the possibly-unaligned contents of an EXTERNAL datum
* into a local "struct varatt_external" toast pointer. This should be
* just a memcpy, but some versions of gcc seem to produce broken code
* that assumes the datum contents are aligned. Introducing an explicit
* intermediate "varattrib_1b_e *" variable seems to fix it.
*/
#define VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr) \
do { \
varattrib_1b_e *attre = (varattrib_1b_e *) (attr); \
Assert(VARATT_IS_EXTERNAL(attre)); \
Assert(VARSIZE_EXTERNAL(attre) == sizeof(toast_pointer) + VARHDRSZ_EXTERNAL); \
memcpy(&(toast_pointer), VARDATA_EXTERNAL(attre), sizeof(toast_pointer)); \
} while (0)
static void toast_delete_datum(Relation rel, Datum value);
static Datum toast_save_datum(Relation rel, Datum value,
struct varlena * oldexternal, int options);

View File

@ -67,7 +67,10 @@
#include "access/relscan.h"
#include "access/transam.h"
#include "access/xlog.h"
#include "catalog/index.h"
#include "catalog/catalog.h"
#include "pgstat.h"
#include "storage/bufmgr.h"
#include "storage/lmgr.h"
@ -520,8 +523,7 @@ index_fetch_heap(IndexScanDesc scan)
* Prune page, but only if we weren't already on this page
*/
if (prev_buf != scan->xs_cbuf)
heap_page_prune_opt(scan->heapRelation, scan->xs_cbuf,
RecentGlobalXmin);
heap_page_prune_opt(scan->heapRelation, scan->xs_cbuf);
}
/* Obtain share-lock on the buffer so we can examine visibility */

View File

@ -149,6 +149,10 @@ heap2_desc(StringInfo buf, uint8 xl_info, char *rec)
xlrec->node.relNode, xlrec->block,
xlrec->cutoff_xid, xlrec->ntuples);
}
else if (info == XLOG_HEAP2_REWRITE)
{
appendStringInfoString(buf, "heap rewrite:");
}
else if (info == XLOG_HEAP2_CLEANUP_INFO)
{
xl_heap_cleanup_info *xlrec = (xl_heap_cleanup_info *) rec;

View File

@ -1074,8 +1074,16 @@ RecordTransactionCommit(void)
/*
* Do we need the long commit record? If not, use the compact format.
*
* For now always use the non-compact version if wal_level=logical, so
* we can hide commits from other databases. TODO: In the future we
* should merge compact and non-compact commits and use a flags
* variable to determine if it contains subxacts, relations or
* invalidation messages, that's more extensible and degrades more
* gracefully. Till then, it's just 20 bytes of overhead.
*/
if (nrels > 0 || nmsgs > 0 || RelcacheInitFileInval || forceSyncCommit)
if (nrels > 0 || nmsgs > 0 || RelcacheInitFileInval || forceSyncCommit ||
XLogLogicalInfoActive())
{
XLogRecData rdata[4];
int lastrdata = 0;

View File

@ -23,6 +23,7 @@
#include "access/clog.h"
#include "access/multixact.h"
#include "access/rewriteheap.h"
#include "access/subtrans.h"
#include "access/timeline.h"
#include "access/transam.h"
@ -39,7 +40,9 @@
#include "pgstat.h"
#include "postmaster/bgwriter.h"
#include "postmaster/startup.h"
#include "replication/logical.h"
#include "replication/slot.h"
#include "replication/snapbuild.h"
#include "replication/walreceiver.h"
#include "replication/walsender.h"
#include "storage/barrier.h"
@ -4015,6 +4018,27 @@ CheckXLogRemoved(XLogSegNo segno, TimeLineID tli)
}
}
/*
* Return the last WAL segment removed, or 0 if no segment has been removed
* since startup.
*
* NB: the result can be out of date arbitrarily fast, the caller has to deal
* with that.
*/
XLogSegNo
XLogGetLastRemovedSegno(void)
{
/* use volatile pointer to prevent code rearrangement */
volatile XLogCtlData *xlogctl = XLogCtl;
XLogSegNo lastRemovedSegNo;
SpinLockAcquire(&xlogctl->info_lck);
lastRemovedSegNo = xlogctl->lastRemovedSegNo;
SpinLockRelease(&xlogctl->info_lck);
return lastRemovedSegNo;
}
/*
* Update the last removed segno pointer in shared memory, to reflect
* that the given XLOG file has been removed.
@ -6558,6 +6582,12 @@ StartupXLOG(void)
*/
StartupReplicationSlots(checkPoint.redo);
/*
* Startup logical state, needs to be setup now so we have proper data
* during crash recovery.
*/
StartupReorderBuffer();
/*
* Startup MultiXact. We need to do this early for two reasons: one
* is that we might try to access multixacts when we do tuple freezing,
@ -8589,7 +8619,7 @@ CreateCheckPoint(int flags)
* StartupSUBTRANS hasn't been called yet.
*/
if (!RecoveryInProgress())
TruncateSUBTRANS(GetOldestXmin(true, false));
TruncateSUBTRANS(GetOldestXmin(NULL, false));
/* Real work is done, but log and update stats before releasing lock. */
LogCheckpointEnd(false);
@ -8674,6 +8704,8 @@ CheckPointGuts(XLogRecPtr checkPointRedo, int flags)
CheckPointPredicate();
CheckPointRelationMap();
CheckPointReplicationSlots();
CheckPointSnapBuild();
CheckPointLogicalRewriteHeap();
CheckPointBuffers(flags); /* performs all required fsyncs */
/* We deliberately delay 2PC checkpointing as long as possible */
CheckPointTwoPhase(checkPointRedo);
@ -8965,7 +8997,7 @@ CreateRestartPoint(int flags)
* this because StartupSUBTRANS hasn't been called yet.
*/
if (EnableHotStandby)
TruncateSUBTRANS(GetOldestXmin(true, false));
TruncateSUBTRANS(GetOldestXmin(NULL, false));
/* Real work is done, but log and update before releasing lock. */
LogCheckpointEnd(true);

View File

@ -2156,7 +2156,7 @@ IndexBuildHeapScan(Relation heapRelation,
{
snapshot = SnapshotAny;
/* okay to ignore lazy VACUUMs here */
OldestXmin = GetOldestXmin(heapRelation->rd_rel->relisshared, true);
OldestXmin = GetOldestXmin(heapRelation, true);
}
scan = heap_beginscan_strat(heapRelation, /* relation */

View File

@ -619,11 +619,13 @@ CREATE VIEW pg_stat_replication AS
CREATE VIEW pg_replication_slots AS
SELECT
L.slot_name,
L.plugin,
L.slot_type,
L.datoid,
D.datname AS database,
L.active,
L.xmin,
L.catalog_xmin,
L.restart_lsn
FROM pg_get_replication_slots() AS L
LEFT JOIN pg_database D ON (L.datoid = D.oid);
@ -822,3 +824,35 @@ CREATE OR REPLACE FUNCTION
CREATE OR REPLACE FUNCTION
json_populate_recordset(base anyelement, from_json json, use_json_as_text boolean DEFAULT false)
RETURNS SETOF anyelement LANGUAGE internal STABLE ROWS 100 AS 'json_populate_recordset';
CREATE OR REPLACE FUNCTION pg_logical_slot_get_changes(
IN slotname name, IN upto_lsn pg_lsn, IN upto_nchanges int, VARIADIC options text[] DEFAULT '{}',
OUT location pg_lsn, OUT xid xid, OUT data text)
RETURNS SETOF RECORD
LANGUAGE INTERNAL
VOLATILE ROWS 1000 COST 1000
AS 'pg_logical_slot_get_changes';
CREATE OR REPLACE FUNCTION pg_logical_slot_peek_changes(
IN slotname name, IN upto_lsn pg_lsn, IN upto_nchanges int, VARIADIC options text[] DEFAULT '{}',
OUT location pg_lsn, OUT xid xid, OUT data text)
RETURNS SETOF RECORD
LANGUAGE INTERNAL
VOLATILE ROWS 1000 COST 1000
AS 'pg_logical_slot_peek_changes';
CREATE OR REPLACE FUNCTION pg_logical_slot_get_binary_changes(
IN slotname name, IN upto_lsn pg_lsn, IN upto_nchanges int, VARIADIC options text[] DEFAULT '{}',
OUT location pg_lsn, OUT xid xid, OUT data bytea)
RETURNS SETOF RECORD
LANGUAGE INTERNAL
VOLATILE ROWS 1000 COST 1000
AS 'pg_logical_slot_get_binary_changes';
CREATE OR REPLACE FUNCTION pg_logical_slot_peek_binary_changes(
IN slotname name, IN upto_lsn pg_lsn, IN upto_nchanges int, VARIADIC options text[] DEFAULT '{}',
OUT location pg_lsn, OUT xid xid, OUT data bytea)
RETURNS SETOF RECORD
LANGUAGE INTERNAL
VOLATILE ROWS 1000 COST 1000
AS 'pg_logical_slot_peek_binary_changes';

View File

@ -22,6 +22,7 @@
#include "access/tuptoaster.h"
#include "access/visibilitymap.h"
#include "access/xact.h"
#include "catalog/catalog.h"
#include "catalog/index.h"
#include "catalog/indexing.h"
#include "catalog/pg_collation.h"
@ -1081,7 +1082,7 @@ acquire_sample_rows(Relation onerel, int elevel,
totalblocks = RelationGetNumberOfBlocks(onerel);
/* Need a cutoff xmin for HeapTupleSatisfiesVacuum */
OldestXmin = GetOldestXmin(onerel->rd_rel->relisshared, true);
OldestXmin = GetOldestXmin(onerel, true);
/* Prepare for sampling block numbers */
BlockSampler_Init(&bs, totalblocks, targrows);

View File

@ -850,7 +850,7 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
* Since we're going to rewrite the whole table anyway, there's no reason
* not to be aggressive about this.
*/
vacuum_set_xid_limits(0, 0, 0, 0, OldHeap->rd_rel->relisshared,
vacuum_set_xid_limits(OldHeap, 0, 0, 0, 0,
&OldestXmin, &FreezeXid, NULL, &MultiXactCutoff,
NULL);
@ -869,7 +869,7 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
is_system_catalog = IsSystemRelation(OldHeap);
/* Initialize the rewrite operation */
rwstate = begin_heap_rewrite(NewHeap, OldestXmin, FreezeXid,
rwstate = begin_heap_rewrite(OldHeap, NewHeap, OldestXmin, FreezeXid,
MultiXactCutoff, use_wal);
/*

View File

@ -45,6 +45,7 @@
#include "miscadmin.h"
#include "pgstat.h"
#include "postmaster/bgwriter.h"
#include "replication/slot.h"
#include "storage/copydir.h"
#include "storage/fd.h"
#include "storage/lmgr.h"
@ -750,6 +751,7 @@ dropdb(const char *dbname, bool missing_ok)
HeapTuple tup;
int notherbackends;
int npreparedxacts;
int nslots, nslots_active;
/*
* Look up the target database's OID, and get exclusive lock on it. We
@ -806,6 +808,19 @@ dropdb(const char *dbname, bool missing_ok)
(errcode(ERRCODE_OBJECT_IN_USE),
errmsg("cannot drop the currently open database")));
/*
* Check whether there are, possibly unconnected, logical slots that refer
* to the to-be-dropped database. The database lock we are holding
* prevents the creation of new slots using the database.
*/
if (ReplicationSlotsCountDBSlots(db_id, &nslots, &nslots_active))
ereport(ERROR,
(errcode(ERRCODE_OBJECT_IN_USE),
errmsg("database \"%s\" is used by a logical decoding slot",
dbname),
errdetail("There are %d slot(s), %d of them active",
nslots, nslots_active)));
/*
* Check for other backends in the target database. (Because we hold the
* database lock, no new ones can start after this.)

View File

@ -398,11 +398,11 @@ get_rel_oids(Oid relid, const RangeVar *vacrel)
* not interested.
*/
void
vacuum_set_xid_limits(int freeze_min_age,
vacuum_set_xid_limits(Relation rel,
int freeze_min_age,
int freeze_table_age,
int multixact_freeze_min_age,
int multixact_freeze_table_age,
bool sharedRel,
TransactionId *oldestXmin,
TransactionId *freezeLimit,
TransactionId *xidFullScanLimit,
@ -425,7 +425,7 @@ vacuum_set_xid_limits(int freeze_min_age,
* working on a particular table at any time, and that each vacuum is
* always an independent transaction.
*/
*oldestXmin = GetOldestXmin(sharedRel, true);
*oldestXmin = GetOldestXmin(rel, true);
Assert(TransactionIdIsNormal(*oldestXmin));
@ -795,7 +795,7 @@ vac_update_datfrozenxid(void)
* committed pg_class entries for new tables; see AddNewRelationTuple().
* So we cannot produce a wrong minimum by starting with this.
*/
newFrozenXid = GetOldestXmin(true, true);
newFrozenXid = GetOldestXmin(NULL, true);
/*
* Similarly, initialize the MultiXact "min" with the value that would be

View File

@ -44,6 +44,7 @@
#include "access/multixact.h"
#include "access/transam.h"
#include "access/visibilitymap.h"
#include "catalog/catalog.h"
#include "catalog/storage.h"
#include "commands/dbcommands.h"
#include "commands/vacuum.h"
@ -204,10 +205,10 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
vac_strategy = bstrategy;
vacuum_set_xid_limits(vacstmt->freeze_min_age, vacstmt->freeze_table_age,
vacuum_set_xid_limits(onerel,
vacstmt->freeze_min_age, vacstmt->freeze_table_age,
vacstmt->multixact_freeze_min_age,
vacstmt->multixact_freeze_table_age,
onerel->rd_rel->relisshared,
&OldestXmin, &FreezeLimit, &xidFullScanLimit,
&MultiXactCutoff, &mxactFullScanLimit);

View File

@ -336,8 +336,7 @@ bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres)
/*
* Prune and repair fragmentation for the whole page, if possible.
*/
Assert(TransactionIdIsValid(RecentGlobalXmin));
heap_page_prune_opt(scan->rs_rd, buffer, RecentGlobalXmin);
heap_page_prune_opt(scan->rs_rd, buffer);
/*
* We must hold share lock on the buffer content while examining tuple

View File

@ -17,6 +17,8 @@ override CPPFLAGS := -I$(srcdir) $(CPPFLAGS)
OBJS = walsender.o walreceiverfuncs.o walreceiver.o basebackup.o \
repl_gram.o slot.o slotfuncs.o syncrep.o
SUBDIRS = logical
include $(top_srcdir)/src/backend/common.mk
# repl_scanner is compiled as part of repl_gram

View File

@ -0,0 +1,19 @@
#-------------------------------------------------------------------------
#
# Makefile--
# Makefile for src/backend/replication/logical
#
# IDENTIFICATION
# src/backend/replication/logical/Makefile
#
#-------------------------------------------------------------------------
subdir = src/backend/replication/logical
top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
override CPPFLAGS := -I$(srcdir) $(CPPFLAGS)
OBJS = decode.o logical.o logicalfuncs.o reorderbuffer.o snapbuild.o
include $(top_srcdir)/src/backend/common.mk

View File

@ -0,0 +1,826 @@
/* -------------------------------------------------------------------------
*
* decode.c
* This module decodes WAL records read using xlogreader.h's APIs for the
* purpose of logical decoding by passing information to the
* reorderbuffer module (containing the actual changes) and to the
* snapbuild module to build a fitting catalog snapshot (to be able to
* properly decode the changes in the reorderbuffer).
*
* NOTE:
* This basically tries to handle all low level xlog stuff for
* reorderbuffer.c and snapbuild.c. There's some minor leakage where a
* specific record's struct is used to pass data along, but those just
* happen to contain the right amount of data in a convenient
* format. There isn't and shouldn't be much intelligence about the
* contents of records in here except turning them into a more usable
* format.
*
* Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/backend/replication/logical/decode.c
*
* -------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/heapam.h"
#include "access/heapam_xlog.h"
#include "access/transam.h"
#include "access/xact.h"
#include "access/xlog_internal.h"
#include "access/xlogreader.h"
#include "catalog/pg_control.h"
#include "replication/decode.h"
#include "replication/logical.h"
#include "replication/reorderbuffer.h"
#include "replication/snapbuild.h"
#include "storage/standby.h"
typedef struct XLogRecordBuffer
{
XLogRecPtr origptr;
XLogRecPtr endptr;
XLogRecord record;
char *record_data;
} XLogRecordBuffer;
/* RMGR Handlers */
static void DecodeXLogOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
static void DecodeHeapOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
static void DecodeHeap2Op(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
static void DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
static void DecodeStandbyOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
/* individual record(group)'s handlers */
static void DecodeInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
static void DecodeUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
static void DecodeDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
static void DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
static void DecodeCommit(LogicalDecodingContext *ctx, XLogRecordBuffer *buf,
TransactionId xid, Oid dboid,
TimestampTz commit_time,
int nsubxacts, TransactionId *sub_xids,
int ninval_msgs, SharedInvalidationMessage *msg);
static void DecodeAbort(LogicalDecodingContext *ctx, XLogRecPtr lsn,
TransactionId xid, TransactionId *sub_xids, int nsubxacts);
/* common function to decode tuples */
static void DecodeXLogTuple(char *data, Size len, ReorderBufferTupleBuf *tup);
/*
* Take every XLogReadRecord()ed record and perform the actions required to
* decode it using the output plugin already setup in the logical decoding
* context.
*/
void
LogicalDecodingProcessRecord(LogicalDecodingContext *ctx, XLogRecord *record)
{
XLogRecordBuffer buf;
buf.origptr = ctx->reader->ReadRecPtr;
buf.endptr = ctx->reader->EndRecPtr;
buf.record = *record;
buf.record_data = XLogRecGetData(record);
/* cast so we get a warning when new rmgrs are added */
switch ((RmgrIds) buf.record.xl_rmid)
{
/*
* Rmgrs we care about for logical decoding. Add new rmgrs in
* rmgrlist.h's order.
*/
case RM_XLOG_ID:
DecodeXLogOp(ctx, &buf);
break;
case RM_XACT_ID:
DecodeXactOp(ctx, &buf);
break;
case RM_STANDBY_ID:
DecodeStandbyOp(ctx, &buf);
break;
case RM_HEAP2_ID:
DecodeHeap2Op(ctx, &buf);
break;
case RM_HEAP_ID:
DecodeHeapOp(ctx, &buf);
break;
/*
* Rmgrs irrelevant for logical decoding; they describe stuff not
* represented in logical decoding. Add new rmgrs in rmgrlist.h's
* order.
*/
case RM_SMGR_ID:
case RM_CLOG_ID:
case RM_DBASE_ID:
case RM_TBLSPC_ID:
case RM_MULTIXACT_ID:
case RM_RELMAP_ID:
case RM_BTREE_ID:
case RM_HASH_ID:
case RM_GIN_ID:
case RM_GIST_ID:
case RM_SEQ_ID:
case RM_SPGIST_ID:
break;
case RM_NEXT_ID:
elog(ERROR, "unexpected RM_NEXT_ID rmgr_id: %u", (RmgrIds) buf.record.xl_rmid);
}
}
/*
* Handle rmgr XLOG_ID records for DecodeRecordIntoReorderBuffer().
*/
static void
DecodeXLogOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{
SnapBuild *builder = ctx->snapshot_builder;
uint8 info = buf->record.xl_info & ~XLR_INFO_MASK;
switch (info)
{
/* this is also used in END_OF_RECOVERY checkpoints */
case XLOG_CHECKPOINT_SHUTDOWN:
case XLOG_END_OF_RECOVERY:
SnapBuildSerializationPoint(builder, buf->origptr);
break;
case XLOG_CHECKPOINT_ONLINE:
/*
* a RUNNING_XACTS record will have been logged near to this, we
* can restart from there.
*/
break;
case XLOG_NOOP:
case XLOG_NEXTOID:
case XLOG_SWITCH:
case XLOG_BACKUP_END:
case XLOG_PARAMETER_CHANGE:
case XLOG_RESTORE_POINT:
case XLOG_FPW_CHANGE:
case XLOG_FPI:
break;
default:
elog(ERROR, "unexpected RM_XLOG_ID record type: %u", info);
}
}
/*
* Handle rmgr XACT_ID records for DecodeRecordIntoReorderBuffer().
*/
static void
DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{
SnapBuild *builder = ctx->snapshot_builder;
ReorderBuffer *reorder = ctx->reorder;
XLogRecord *r = &buf->record;
uint8 info = r->xl_info & ~XLR_INFO_MASK;
/* no point in doing anything yet, data could not be decoded anyway */
if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT)
return;
switch (info)
{
case XLOG_XACT_COMMIT:
{
xl_xact_commit *xlrec;
TransactionId *subxacts = NULL;
SharedInvalidationMessage *invals = NULL;
xlrec = (xl_xact_commit *) buf->record_data;
subxacts = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
invals = (SharedInvalidationMessage *) &(subxacts[xlrec->nsubxacts]);
DecodeCommit(ctx, buf, r->xl_xid, xlrec->dbId,
xlrec->xact_time,
xlrec->nsubxacts, subxacts,
xlrec->nmsgs, invals);
break;
}
case XLOG_XACT_COMMIT_PREPARED:
{
xl_xact_commit_prepared *prec;
xl_xact_commit *xlrec;
TransactionId *subxacts;
SharedInvalidationMessage *invals = NULL;
/* Prepared commits contain a normal commit record... */
prec = (xl_xact_commit_prepared *) buf->record_data;
xlrec = &prec->crec;
subxacts = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
invals = (SharedInvalidationMessage *) &(subxacts[xlrec->nsubxacts]);
DecodeCommit(ctx, buf, r->xl_xid, xlrec->dbId,
xlrec->xact_time,
xlrec->nsubxacts, subxacts,
xlrec->nmsgs, invals);
break;
}
case XLOG_XACT_COMMIT_COMPACT:
{
xl_xact_commit_compact *xlrec;
xlrec = (xl_xact_commit_compact *) buf->record_data;
DecodeCommit(ctx, buf, r->xl_xid, InvalidOid,
xlrec->xact_time,
xlrec->nsubxacts, xlrec->subxacts,
0, NULL);
break;
}
case XLOG_XACT_ABORT:
{
xl_xact_abort *xlrec;
TransactionId *sub_xids;
xlrec = (xl_xact_abort *) buf->record_data;
sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
DecodeAbort(ctx, buf->origptr, r->xl_xid,
sub_xids, xlrec->nsubxacts);
break;
}
case XLOG_XACT_ABORT_PREPARED:
{
xl_xact_abort_prepared *prec;
xl_xact_abort *xlrec;
TransactionId *sub_xids;
/* prepared abort contain a normal commit abort... */
prec = (xl_xact_abort_prepared *) buf->record_data;
xlrec = &prec->arec;
sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
/* r->xl_xid is committed in a separate record */
DecodeAbort(ctx, buf->origptr, prec->xid,
sub_xids, xlrec->nsubxacts);
break;
}
case XLOG_XACT_ASSIGNMENT:
{
xl_xact_assignment *xlrec;
int i;
TransactionId *sub_xid;
xlrec = (xl_xact_assignment *) buf->record_data;
sub_xid = &xlrec->xsub[0];
for (i = 0; i < xlrec->nsubxacts; i++)
{
ReorderBufferAssignChild(reorder, xlrec->xtop,
*(sub_xid++), buf->origptr);
}
break;
}
case XLOG_XACT_PREPARE:
/*
* Currently decoding ignores PREPARE TRANSACTION and will just
* decode the transaction when the COMMIT PREPARED is sent or
* throw away the transaction's contents when a ROLLBACK PREPARED
* is received. In the future we could add code to expose prepared
* transactions in the changestream allowing for a kind of
* distributed 2PC.
*/
break;
default:
elog(ERROR, "unexpected RM_XACT_ID record type: %u", info);
}
}
/*
* Handle rmgr STANDBY_ID records for DecodeRecordIntoReorderBuffer().
*/
static void
DecodeStandbyOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{
SnapBuild *builder = ctx->snapshot_builder;
XLogRecord *r = &buf->record;
uint8 info = r->xl_info & ~XLR_INFO_MASK;
switch (info)
{
case XLOG_RUNNING_XACTS:
{
xl_running_xacts *running = (xl_running_xacts *) buf->record_data;
SnapBuildProcessRunningXacts(builder, buf->origptr, running);
/*
* Abort all transactions that we keep track of, that are
* older than the record's oldestRunningXid. This is the most
* convenient spot for doing so since, in contrast to shutdown
* or end-of-recovery checkpoints, we have information about
* all running transactions which includes prepared ones,
* while shutdown checkpoints just know that no non-prepared
* transactions are in progress.
*/
ReorderBufferAbortOld(ctx->reorder, running->oldestRunningXid);
}
break;
case XLOG_STANDBY_LOCK:
break;
default:
elog(ERROR, "unexpected RM_STANDBY_ID record type: %u", info);
}
}
/*
* Handle rmgr HEAP2_ID records for DecodeRecordIntoReorderBuffer().
*/
static void
DecodeHeap2Op(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{
uint8 info = buf->record.xl_info & XLOG_HEAP_OPMASK;
TransactionId xid = buf->record.xl_xid;
SnapBuild *builder = ctx->snapshot_builder;
/* no point in doing anything yet */
if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT)
return;
switch (info)
{
case XLOG_HEAP2_MULTI_INSERT:
if (SnapBuildProcessChange(builder, xid, buf->origptr))
DecodeMultiInsert(ctx, buf);
break;
case XLOG_HEAP2_NEW_CID:
{
xl_heap_new_cid *xlrec;
xlrec = (xl_heap_new_cid *) buf->record_data;
SnapBuildProcessNewCid(builder, xid, buf->origptr, xlrec);
break;
}
case XLOG_HEAP2_REWRITE:
/*
* Although these records only exist to serve the needs of logical
* decoding, all the work happens as part of crash or archive
* recovery, so we don't need to do anything here.
*/
break;
/*
* Everything else here is just low level physical stuff we're
* not interested in.
*/
case XLOG_HEAP2_FREEZE_PAGE:
case XLOG_HEAP2_CLEAN:
case XLOG_HEAP2_CLEANUP_INFO:
case XLOG_HEAP2_VISIBLE:
case XLOG_HEAP2_LOCK_UPDATED:
break;
default:
elog(ERROR, "unexpected RM_HEAP2_ID record type: %u", info);
}
}
/*
* Handle rmgr HEAP_ID records for DecodeRecordIntoReorderBuffer().
*/
static void
DecodeHeapOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{
uint8 info = buf->record.xl_info & XLOG_HEAP_OPMASK;
TransactionId xid = buf->record.xl_xid;
SnapBuild *builder = ctx->snapshot_builder;
/* no point in doing anything yet */
if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT)
return;
switch (info)
{
case XLOG_HEAP_INSERT:
if (SnapBuildProcessChange(builder, xid, buf->origptr))
DecodeInsert(ctx, buf);
break;
/*
* Treat HOT update as normal updates. There is no useful
* information in the fact that we could make it a HOT update
* locally and the WAL layout is compatible.
*/
case XLOG_HEAP_HOT_UPDATE:
case XLOG_HEAP_UPDATE:
if (SnapBuildProcessChange(builder, xid, buf->origptr))
DecodeUpdate(ctx, buf);
break;
case XLOG_HEAP_DELETE:
if (SnapBuildProcessChange(builder, xid, buf->origptr))
DecodeDelete(ctx, buf);
break;
case XLOG_HEAP_NEWPAGE:
/*
* This is only used in places like indexams and CLUSTER which
* don't contain changes relevant for logical replication.
*/
break;
case XLOG_HEAP_INPLACE:
/*
* Inplace updates are only ever performed on catalog tuples and
* can, per definition, not change tuple visibility. Since we
* don't decode catalog tuples, we're not interested in the
* record's contents.
*
* In-place updates can be used either by XID-bearing transactions
* (e.g. in CREATE INDEX CONCURRENTLY) or by XID-less
* transactions (e.g. VACUUM). In the former case, the commit
* record will include cache invalidations, so we mark the
* transaction as catalog modifying here. Currently that's
* redundant because the commit will do that as well, but once we
* support decoding in-progress relations, this will be important.
*/
if (!TransactionIdIsValid(xid))
break;
SnapBuildProcessChange(builder, xid, buf->origptr);
ReorderBufferXidSetCatalogChanges(ctx->reorder, xid, buf->origptr);
break;
case XLOG_HEAP_LOCK:
/* we don't care about row level locks for now */
break;
default:
elog(ERROR, "unexpected RM_HEAP_ID record type: %u", info);
break;
}
}
/*
* Consolidated commit record handling between the different form of commit
* records.
*/
static void
DecodeCommit(LogicalDecodingContext *ctx, XLogRecordBuffer *buf,
TransactionId xid, Oid dboid,
TimestampTz commit_time,
int nsubxacts, TransactionId *sub_xids,
int ninval_msgs, SharedInvalidationMessage *msgs)
{
int i;
/*
* Process invalidation messages, even if we're not interested in the
* transaction's contents, since the various caches need to always be
* consistent.
*/
if (ninval_msgs > 0)
{
ReorderBufferAddInvalidations(ctx->reorder, xid, buf->origptr,
ninval_msgs, msgs);
ReorderBufferXidSetCatalogChanges(ctx->reorder, xid, buf->origptr);
}
SnapBuildCommitTxn(ctx->snapshot_builder, buf->origptr, xid,
nsubxacts, sub_xids);
/* ----
* Check whether we are interested in this specific transaction, and tell
* the the reorderbuffer to forget the content of the (sub-)transactions
* if not.
*
* There basically two reasons we might not be interested in this
* transaction:
* 1) We might not be interested in decoding transactions up to this
* LSN. This can happen because we previously decoded it and now just
* are restarting or if we haven't assembled a consistent snapshot yet.
* 2) The transaction happened in another database.
*
* We can't just use ReorderBufferAbort() here, because we need to execute
* the transaction's invalidations. This currently won't be needed if
* we're just skipping over the transaction because currently we only do
* so during startup, to get to the first transaction the client needs. As
* we have reset the catalog caches before starting to read WAL, and we
* haven't yet touched any catalogs, there can't be anything to invalidate.
* But if we're "forgetting" this commit because it's it happened in
* another database, the invalidations might be important, because they
* could be for shared catalogs and we might have loaded data into the
* relevant syscaches.
* ---
*/
if (SnapBuildXactNeedsSkip(ctx->snapshot_builder, buf->origptr) ||
(dboid != InvalidOid && dboid != ctx->slot->data.database))
{
for (i = 0; i < nsubxacts; i++)
{
ReorderBufferForget(ctx->reorder, *sub_xids, buf->origptr);
sub_xids++;
}
ReorderBufferForget(ctx->reorder, xid, buf->origptr);
return;
}
/* tell the reorderbuffer about the surviving subtransactions */
for (i = 0; i < nsubxacts; i++)
{
ReorderBufferCommitChild(ctx->reorder, xid, *sub_xids,
buf->origptr, buf->endptr);
sub_xids++;
}
/* replay actions of all transaction + subtransactions in order */
ReorderBufferCommit(ctx->reorder, xid, buf->origptr, buf->endptr,
commit_time);
}
/*
* Get the data from the various forms of abort records and pass it on to
* snapbuild.c and reorderbuffer.c
*/
static void
DecodeAbort(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid,
TransactionId *sub_xids, int nsubxacts)
{
int i;
SnapBuildAbortTxn(ctx->snapshot_builder, lsn, xid, nsubxacts, sub_xids);
for (i = 0; i < nsubxacts; i++)
{
ReorderBufferAbort(ctx->reorder, *sub_xids, lsn);
sub_xids++;
}
ReorderBufferAbort(ctx->reorder, xid, lsn);
}
/*
* Parse XLOG_HEAP_INSERT (not MULTI_INSERT!) records into tuplebufs.
*
* Deletes can contain the new tuple.
*/
static void
DecodeInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{
XLogRecord *r = &buf->record;
xl_heap_insert *xlrec;
ReorderBufferChange *change;
xlrec = (xl_heap_insert *) buf->record_data;
/* only interested in our database */
if (xlrec->target.node.dbNode != ctx->slot->data.database)
return;
change = ReorderBufferGetChange(ctx->reorder);
change->action = REORDER_BUFFER_CHANGE_INSERT;
memcpy(&change->tp.relnode, &xlrec->target.node, sizeof(RelFileNode));
if (xlrec->flags & XLOG_HEAP_CONTAINS_NEW_TUPLE)
{
Assert(r->xl_len > (SizeOfHeapInsert + SizeOfHeapHeader));
change->tp.newtuple = ReorderBufferGetTupleBuf(ctx->reorder);
DecodeXLogTuple((char *) xlrec + SizeOfHeapInsert,
r->xl_len - SizeOfHeapInsert,
change->tp.newtuple);
}
ReorderBufferQueueChange(ctx->reorder, r->xl_xid, buf->origptr, change);
}
/*
* Parse XLOG_HEAP_UPDATE and XLOG_HEAP_HOT_UPDATE, which have the same layout
* in the record, from wal into proper tuplebufs.
*
* Updates can possibly contain a new tuple and the old primary key.
*/
static void
DecodeUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{
XLogRecord *r = &buf->record;
xl_heap_update *xlrec;
xl_heap_header_len *xlhdr;
ReorderBufferChange *change;
char *data;
xlrec = (xl_heap_update *) buf->record_data;
xlhdr = (xl_heap_header_len *) (buf->record_data + SizeOfHeapUpdate);
/* only interested in our database */
if (xlrec->target.node.dbNode != ctx->slot->data.database)
return;
change = ReorderBufferGetChange(ctx->reorder);
change->action = REORDER_BUFFER_CHANGE_UPDATE;
memcpy(&change->tp.relnode, &xlrec->target.node, sizeof(RelFileNode));
data = (char *) &xlhdr->header;
if (xlrec->flags & XLOG_HEAP_CONTAINS_NEW_TUPLE)
{
Assert(r->xl_len > (SizeOfHeapUpdate + SizeOfHeapHeaderLen));
change->tp.newtuple = ReorderBufferGetTupleBuf(ctx->reorder);
DecodeXLogTuple(data,
xlhdr->t_len + SizeOfHeapHeader,
change->tp.newtuple);
/* skip over the rest of the tuple header */
data += SizeOfHeapHeader;
/* skip over the tuple data */
data += xlhdr->t_len;
}
if (xlrec->flags & XLOG_HEAP_CONTAINS_OLD)
{
xlhdr = (xl_heap_header_len *) data;
change->tp.oldtuple = ReorderBufferGetTupleBuf(ctx->reorder);
DecodeXLogTuple((char *) &xlhdr->header,
xlhdr->t_len + SizeOfHeapHeader,
change->tp.oldtuple);
data = (char *) &xlhdr->header;
data += SizeOfHeapHeader;
data += xlhdr->t_len;
}
ReorderBufferQueueChange(ctx->reorder, r->xl_xid, buf->origptr, change);
}
/*
* Parse XLOG_HEAP_DELETE from wal into proper tuplebufs.
*
* Deletes can possibly contain the old primary key.
*/
static void
DecodeDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{
XLogRecord *r = &buf->record;
xl_heap_delete *xlrec;
ReorderBufferChange *change;
xlrec = (xl_heap_delete *) buf->record_data;
/* only interested in our database */
if (xlrec->target.node.dbNode != ctx->slot->data.database)
return;
change = ReorderBufferGetChange(ctx->reorder);
change->action = REORDER_BUFFER_CHANGE_DELETE;
memcpy(&change->tp.relnode, &xlrec->target.node, sizeof(RelFileNode));
/* old primary key stored */
if (xlrec->flags & XLOG_HEAP_CONTAINS_OLD)
{
Assert(r->xl_len > (SizeOfHeapDelete + SizeOfHeapHeader));
change->tp.oldtuple = ReorderBufferGetTupleBuf(ctx->reorder);
DecodeXLogTuple((char *) xlrec + SizeOfHeapDelete,
r->xl_len - SizeOfHeapDelete,
change->tp.oldtuple);
}
ReorderBufferQueueChange(ctx->reorder, r->xl_xid, buf->origptr, change);
}
/*
* Decode XLOG_HEAP2_MULTI_INSERT_insert record into multiple tuplebufs.
*
* Currently MULTI_INSERT will always contain the full tuples.
*/
static void
DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{
XLogRecord *r = &buf->record;
xl_heap_multi_insert *xlrec;
int i;
char *data;
bool isinit = (r->xl_info & XLOG_HEAP_INIT_PAGE) != 0;
xlrec = (xl_heap_multi_insert *) buf->record_data;
/* only interested in our database */
if (xlrec->node.dbNode != ctx->slot->data.database)
return;
data = buf->record_data + SizeOfHeapMultiInsert;
/*
* OffsetNumbers (which are not of interest to us) are stored when
* XLOG_HEAP_INIT_PAGE is not set -- skip over them.
*/
if (!isinit)
data += sizeof(OffsetNumber) * xlrec->ntuples;
for (i = 0; i < xlrec->ntuples; i++)
{
ReorderBufferChange *change;
xl_multi_insert_tuple *xlhdr;
int datalen;
ReorderBufferTupleBuf *tuple;
change = ReorderBufferGetChange(ctx->reorder);
change->action = REORDER_BUFFER_CHANGE_INSERT;
memcpy(&change->tp.relnode, &xlrec->node, sizeof(RelFileNode));
/*
* CONTAINS_NEW_TUPLE will always be set currently as multi_insert
* isn't used for catalogs, but better be future proof.
*
* We decode the tuple in pretty much the same way as DecodeXLogTuple,
* but since the layout is slightly different, we can't use it here.
*/
if (xlrec->flags & XLOG_HEAP_CONTAINS_NEW_TUPLE)
{
change->tp.newtuple = ReorderBufferGetTupleBuf(ctx->reorder);
tuple = change->tp.newtuple;
/* not a disk based tuple */
ItemPointerSetInvalid(&tuple->tuple.t_self);
xlhdr = (xl_multi_insert_tuple *) SHORTALIGN(data);
data = ((char *) xlhdr) + SizeOfMultiInsertTuple;
datalen = xlhdr->datalen;
/*
* We can only figure this out after reassembling the
* transactions.
*/
tuple->tuple.t_tableOid = InvalidOid;
tuple->tuple.t_data = &tuple->header;
tuple->tuple.t_len = datalen
+ offsetof(HeapTupleHeaderData, t_bits);
memset(&tuple->header, 0, sizeof(HeapTupleHeaderData));
memcpy((char *) &tuple->header
+ offsetof(HeapTupleHeaderData, t_bits),
(char *) data,
datalen);
data += datalen;
tuple->header.t_infomask = xlhdr->t_infomask;
tuple->header.t_infomask2 = xlhdr->t_infomask2;
tuple->header.t_hoff = xlhdr->t_hoff;
}
ReorderBufferQueueChange(ctx->reorder, r->xl_xid,
buf->origptr, change);
}
}
/*
* Read a HeapTuple as WAL logged by heap_insert, heap_update and heap_delete
* (but not by heap_multi_insert) into a tuplebuf.
*
* The size 'len' and the pointer 'data' in the record need to be
* computed outside as they are record specific.
*/
static void
DecodeXLogTuple(char *data, Size len, ReorderBufferTupleBuf *tuple)
{
xl_heap_header xlhdr;
int datalen = len - SizeOfHeapHeader;
Assert(datalen >= 0);
Assert(datalen <= MaxHeapTupleSize);
tuple->tuple.t_len = datalen + offsetof(HeapTupleHeaderData, t_bits);
/* not a disk based tuple */
ItemPointerSetInvalid(&tuple->tuple.t_self);
/* we can only figure this out after reassembling the transactions */
tuple->tuple.t_tableOid = InvalidOid;
tuple->tuple.t_data = &tuple->header;
/* data is not stored aligned, copy to aligned storage */
memcpy((char *) &xlhdr,
data,
SizeOfHeapHeader);
memset(&tuple->header, 0, sizeof(HeapTupleHeaderData));
memcpy((char *) &tuple->header + offsetof(HeapTupleHeaderData, t_bits),
data + SizeOfHeapHeader,
datalen);
tuple->header.t_infomask = xlhdr.t_infomask;
tuple->header.t_infomask2 = xlhdr.t_infomask2;
tuple->header.t_hoff = xlhdr.t_hoff;
}

View File

@ -0,0 +1,920 @@
/*-------------------------------------------------------------------------
* logical.c
* PostgreSQL logical decoding coordination
*
* Copyright (c) 2012-2014, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/backend/replication/logical/logical.c
*
* NOTES
* This file coordinates interaction between the various modules that
* together providethe logical decoding, primarily by providing so
* called LogicalDecodingContexts. The goal is to encapsulate most of the
* internal complexity for consumers of logical decoding, so they can
* create and consume a changestream with a low amount of code.
*
* The idea is that a consumer provides three callbacks, one to read WAL,
* one to prepare a data write, and a final one for actually writing since
* their implementation depends on the type of consumer. Check
* logicalfunc.c for an example implementations of a fairly simple consumer
* and a implementation of a WAL reading callback that's suitable for
* simpler consumers.
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <unistd.h>
#include <sys/stat.h>
#include "miscadmin.h"
#include "access/xact.h"
#include "replication/decode.h"
#include "replication/logical.h"
#include "replication/reorderbuffer.h"
#include "replication/snapbuild.h"
#include "storage/proc.h"
#include "storage/procarray.h"
#include "utils/memutils.h"
/* data for errcontext callback */
typedef struct LogicalErrorCallbackState
{
LogicalDecodingContext *ctx;
const char *callback_name;
XLogRecPtr report_location;
} LogicalErrorCallbackState;
/* wrappers around output plugin callbacks */
static void output_plugin_error_callback(void *arg);
static void startup_cb_wrapper(LogicalDecodingContext *ctx, OutputPluginOptions *opt,
bool is_init);
static void shutdown_cb_wrapper(LogicalDecodingContext *ctx);
static void begin_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn);
static void commit_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
XLogRecPtr commit_lsn);
static void change_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
Relation relation, ReorderBufferChange *change);
static void LoadOutputPlugin(OutputPluginCallbacks *callbacks, char *plugin);
/*
* Make sure the current settings & environment are capable of doing logical
* decoding.
*/
void
CheckLogicalDecodingRequirements(void)
{
CheckSlotRequirements();
if (wal_level < WAL_LEVEL_LOGICAL)
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("logical decoding requires wal_level >= logical")));
if (MyDatabaseId == InvalidOid)
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("logical decoding requires a database connection")));
/* ----
* TODO: We got to change that someday soon...
*
* There's basically three things missing to allow this:
* 1) We need to be able to correctly and quickly identify the timeline a
* LSN belongs to
* 2) We need to force hot_standby_feedback to be enabled at all times so
* the primary cannot remove rows we need.
* 3) support dropping replication slots referring to a database, in
* dbase_redo. There can't be any active ones due to HS recovery
* conflicts, so that should be relatively easy.
* ----
*/
if (RecoveryInProgress())
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("logical decoding cannot be used while in recovery")));
}
/*
* Helper function for CreateInitialDecodingContext() and
* CreateDecodingContext() performing common tasks.
*/
static LogicalDecodingContext *
StartupDecodingContext(List *output_plugin_options,
XLogRecPtr start_lsn,
TransactionId xmin_horizon,
XLogPageReadCB read_page,
LogicalOutputPluginWriterPrepareWrite prepare_write,
LogicalOutputPluginWriterWrite do_write)
{
ReplicationSlot *slot;
MemoryContext context, old_context;
LogicalDecodingContext *ctx;
/* shorter lines... */
slot = MyReplicationSlot;
context = AllocSetContextCreate(CurrentMemoryContext,
"Changeset Extraction Context",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
old_context = MemoryContextSwitchTo(context);
ctx = palloc0(sizeof(LogicalDecodingContext));
ctx->context = context;
/* (re-)load output plugins, so we detect a bad (removed) output plugin now. */
LoadOutputPlugin(&ctx->callbacks, NameStr(slot->data.plugin));
/*
* Now that the slot's xmin has been set, we can announce ourselves as a
* logical decoding backend which doesn't need to be checked individually
* when computing the xmin horizon because the xmin is enforced via
* replication slots.
*/
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
MyPgXact->vacuumFlags |= PROC_IN_LOGICAL_DECODING;
LWLockRelease(ProcArrayLock);
ctx->slot = slot;
ctx->reader = XLogReaderAllocate(read_page, ctx);
ctx->reader->private_data = ctx;
ctx->reorder = ReorderBufferAllocate();
ctx->snapshot_builder =
AllocateSnapshotBuilder(ctx->reorder, xmin_horizon, start_lsn);
ctx->reorder->private_data = ctx;
/* wrap output plugin callbacks, so we can add error context information */
ctx->reorder->begin = begin_cb_wrapper;
ctx->reorder->apply_change = change_cb_wrapper;
ctx->reorder->commit = commit_cb_wrapper;
ctx->out = makeStringInfo();
ctx->prepare_write = prepare_write;
ctx->write = do_write;
ctx->output_plugin_options = output_plugin_options;
MemoryContextSwitchTo(old_context);
return ctx;
}
/*
* Create a new decoding context, for a new logical slot.
*
* plugin contains the name of the output plugin
* output_plugin_options contains options passed to the output plugin
* read_page, prepare_write, do_write are callbacks that have to be filled to
* perform the use-case dependent, actual, work.
*
* Needs to be called while in a memory context that's at least as long lived
* as the the decoding context because further memory contexts will be created
* inside it.
*
* Returns an initialized decoding context after calling the output plugin's
* startup function.
*/
LogicalDecodingContext *
CreateInitDecodingContext(char *plugin,
List *output_plugin_options,
XLogPageReadCB read_page,
LogicalOutputPluginWriterPrepareWrite prepare_write,
LogicalOutputPluginWriterWrite do_write)
{
TransactionId xmin_horizon = InvalidTransactionId;
ReplicationSlot *slot;
LogicalDecodingContext *ctx;
MemoryContext old_context;
/* shorter lines... */
slot = MyReplicationSlot;
/* first some sanity checks that are unlikely to be violated */
if (slot == NULL)
elog(ERROR, "cannot perform logical decoding without a acquired slot");
if (plugin == NULL)
elog(ERROR, "cannot initialize logical decoding without a specified plugin");
/* Make sure the passed slot is suitable. These are user facing errors. */
if (slot->data.database == InvalidOid)
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("cannot use physical replication slot created for logical decoding")));
if (slot->data.database != MyDatabaseId)
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("replication slot \"%s\" was not created in this database",
NameStr(slot->data.name))));
if (IsTransactionState() &&
GetTopTransactionIdIfAny() != InvalidTransactionId)
ereport(ERROR,
(errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
errmsg("cannot create logical replication slot in transaction that has performed writes")));
/* register output plugin name with slot */
SpinLockAcquire(&slot->mutex);
strncpy(NameStr(slot->data.plugin), plugin,
NAMEDATALEN);
NameStr(slot->data.plugin)[NAMEDATALEN - 1] = '\0';
SpinLockRelease(&slot->mutex);
/*
* The replication slot mechanism is used to prevent removal of required
* WAL. As there is no interlock between this and checkpoints required WAL
* could be removed before ReplicationSlotsComputeRequiredLSN() has been
* called to prevent that. In the very unlikely case that this happens
* we'll just retry.
*/
while (true)
{
XLogSegNo segno;
/*
* Let's start with enough information if we can, so log a standby
* snapshot and start decoding at exactly that position.
*/
if (!RecoveryInProgress())
{
XLogRecPtr flushptr;
/* start at current insert position*/
slot->data.restart_lsn = GetXLogInsertRecPtr();
/* make sure we have enough information to start */
flushptr = LogStandbySnapshot();
/* and make sure it's fsynced to disk */
XLogFlush(flushptr);
}
else
slot->data.restart_lsn = GetRedoRecPtr();
/* prevent WAL removal as fast as possible */
ReplicationSlotsComputeRequiredLSN();
/*
* If all required WAL is still there, great, otherwise retry. The
* slot should prevent further removal of WAL, unless there's a
* concurrent ReplicationSlotsComputeRequiredLSN() after we've written
* the new restart_lsn above, so normally we should never need to loop
* more than twice.
*/
XLByteToSeg(slot->data.restart_lsn, segno);
if (XLogGetLastRemovedSegno() < segno)
break;
}
/* ----
* This is a bit tricky: We need to determine a safe xmin horizon to start
* decoding from, to avoid starting from a running xacts record referring
* to xids whose rows have been vacuumed or pruned
* already. GetOldestSafeDecodingTransactionId() returns such a value, but
* without further interlock it's return value might immediately be out of
* date.
*
* So we have to acquire the ProcArrayLock to prevent computation of new
* xmin horizons by other backends, get the safe decoding xid, and inform
* the slot machinery about the new limit. Once that's done the
* ProcArrayLock can be be released as the slot machinery now is
* protecting against vacuum.
* ----
*/
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
slot->effective_catalog_xmin = GetOldestSafeDecodingTransactionId();
slot->data.catalog_xmin = slot->effective_catalog_xmin;
ReplicationSlotsComputeRequiredXmin(true);
LWLockRelease(ProcArrayLock);
/*
* tell the snapshot builder to only assemble snapshot once reaching
* the a running_xact's record with the respective xmin.
*/
xmin_horizon = slot->data.catalog_xmin;
ReplicationSlotMarkDirty();
ReplicationSlotSave();
ctx = StartupDecodingContext(NIL, InvalidXLogRecPtr, xmin_horizon,
read_page, prepare_write, do_write);
/* call output plugin initialization callback */
old_context = MemoryContextSwitchTo(ctx->context);
if (ctx->callbacks.startup_cb != NULL)
startup_cb_wrapper(ctx, &ctx->options, true);
MemoryContextSwitchTo(old_context);
return ctx;
}
/*
* Create a new decoding context, for a logical slot that has previously been
* used already.
*
* start_lsn contains the LSN of the last received data or InvalidXLogRecPtr
* output_plugin_options contains options passed to the output plugin
* read_page, prepare_write, do_write are callbacks that have to be filled to
* perform the use-case dependent, actual, work.
*
* Needs to be called while in a memory context that's at least as long lived
* as the the decoding context because further memory contexts will be created
* inside it.
*
* Returns an initialized decoding context after calling the output plugin's
* startup function.
*/
LogicalDecodingContext *
CreateDecodingContext(XLogRecPtr start_lsn,
List *output_plugin_options,
XLogPageReadCB read_page,
LogicalOutputPluginWriterPrepareWrite prepare_write,
LogicalOutputPluginWriterWrite do_write)
{
LogicalDecodingContext *ctx;
ReplicationSlot *slot;
MemoryContext old_context;
/* shorter lines... */
slot = MyReplicationSlot;
/* first some sanity checks that are unlikely to be violated */
if (slot == NULL)
elog(ERROR, "cannot perform logical decoding without a acquired slot");
/* make sure the passed slot is suitable, these are user facing errors */
if (slot->data.database == InvalidOid)
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
(errmsg("cannot use physical replication slot for logical decoding"))));
if (slot->data.database != MyDatabaseId)
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
(errmsg("replication slot \"%s\" was not created in this database",
NameStr(slot->data.name)))));
if (start_lsn == InvalidXLogRecPtr)
{
/* continue from last position */
start_lsn = slot->data.confirmed_flush;
}
else if (start_lsn < slot->data.confirmed_flush)
{
/*
* It might seem like we should error out in this case, but it's
* pretty common for a client to acknowledge a LSN it doesn't have to
* do anything for, and thus didn't store persistently, because the
* xlog records didn't result in anything relevant for logical
* decoding. Clients have to be able to do that to support
* synchronous replication.
*/
start_lsn = slot->data.confirmed_flush;
elog(DEBUG1, "cannot stream from %X/%X, minimum is %X/%X, forwarding",
(uint32)(start_lsn >> 32), (uint32)start_lsn,
(uint32)(slot->data.confirmed_flush >> 32),
(uint32)slot->data.confirmed_flush);
}
ctx = StartupDecodingContext(output_plugin_options,
start_lsn, InvalidTransactionId,
read_page, prepare_write, do_write);
/* call output plugin initialization callback */
old_context = MemoryContextSwitchTo(ctx->context);
if (ctx->callbacks.startup_cb != NULL)
startup_cb_wrapper(ctx, &ctx->options, true);
MemoryContextSwitchTo(old_context);
ereport(LOG,
(errmsg("starting logical decoding for slot %s",
NameStr(slot->data.name)),
errdetail("streaming transactions committing after %X/%X, reading WAL from %X/%X",
(uint32)(slot->data.confirmed_flush >> 32),
(uint32)slot->data.confirmed_flush,
(uint32)(slot->data.restart_lsn >> 32),
(uint32)slot->data.restart_lsn)));
return ctx;
}
/*
* Returns true if an consistent initial decoding snapshot has been built.
*/
bool
DecodingContextReady(LogicalDecodingContext *ctx)
{
return SnapBuildCurrentState(ctx->snapshot_builder) == SNAPBUILD_CONSISTENT;
}
/*
* Read from the decoding slot, until it is ready to start extracting changes.
*/
void
DecodingContextFindStartpoint(LogicalDecodingContext *ctx)
{
XLogRecPtr startptr;
/* Initialize from where to start reading WAL. */
startptr = ctx->slot->data.restart_lsn;
elog(DEBUG1, "searching for logical decoding starting point, starting at %X/%X",
(uint32)(ctx->slot->data.restart_lsn >> 32),
(uint32)ctx->slot->data.restart_lsn);
/* Wait for a consistent starting point */
for (;;)
{
XLogRecord *record;
char *err = NULL;
/*
* If the caller requires that interrupts be checked, the read_page
* callback should do so, as those will often wait.
*/
/* the read_page callback waits for new WAL */
record = XLogReadRecord(ctx->reader, startptr, &err);
if (err)
elog(ERROR, "%s", err);
Assert(record);
startptr = InvalidXLogRecPtr;
LogicalDecodingProcessRecord(ctx, record);
/* only continue till we found a consistent spot */
if (DecodingContextReady(ctx))
break;
}
ctx->slot->data.confirmed_flush = ctx->reader->EndRecPtr;
}
/*
* Free a previously allocated decoding context, invoking the shutdown
* callback if necessary.
*/
void
FreeDecodingContext(LogicalDecodingContext *ctx)
{
if (ctx->callbacks.shutdown_cb != NULL)
shutdown_cb_wrapper(ctx);
ReorderBufferFree(ctx->reorder);
FreeSnapshotBuilder(ctx->snapshot_builder);
XLogReaderFree(ctx->reader);
MemoryContextDelete(ctx->context);
}
/*
* Prepare a write using the context's output routine.
*/
void
OutputPluginPrepareWrite(struct LogicalDecodingContext *ctx, bool last_write)
{
if (!ctx->accept_writes)
elog(ERROR, "writes are only accepted in commit, begin and change callbacks");
ctx->prepare_write(ctx, ctx->write_location, ctx->write_xid, last_write);
ctx->prepared_write = true;
}
/*
* Perform a write using the context's output routine.
*/
void
OutputPluginWrite(struct LogicalDecodingContext *ctx, bool last_write)
{
if (!ctx->prepared_write)
elog(ERROR, "OutputPluginPrepareWrite needs to be called before OutputPluginWrite");
ctx->write(ctx, ctx->write_location, ctx->write_xid, last_write);
ctx->prepared_write = false;
}
/*
* Load the output plugin, lookup its output plugin init function, and check
* that it provides the required callbacks.
*/
static void
LoadOutputPlugin(OutputPluginCallbacks *callbacks, char *plugin)
{
LogicalOutputPluginInit plugin_init;
plugin_init = (LogicalOutputPluginInit)
load_external_function(plugin, "_PG_output_plugin_init", false, NULL);
if (plugin_init == NULL)
elog(ERROR, "output plugins have to declare the _PG_output_plugin_init symbol");
/* ask the output plugin to fill the callback struct */
plugin_init(callbacks);
if (callbacks->begin_cb == NULL)
elog(ERROR, "output plugins have to register a begin callback");
if (callbacks->change_cb == NULL)
elog(ERROR, "output plugins have to register a change callback");
if (callbacks->commit_cb == NULL)
elog(ERROR, "output plugins have to register a commit callback");
}
static void
output_plugin_error_callback(void *arg)
{
LogicalErrorCallbackState *state = (LogicalErrorCallbackState *) arg;
/* not all callbacks have an associated LSN */
if (state->report_location != InvalidXLogRecPtr)
errcontext("slot \"%s\", output plugin \"%s\", in the %s callback, associated LSN %X/%X",
NameStr(state->ctx->slot->data.name),
NameStr(state->ctx->slot->data.plugin),
state->callback_name,
(uint32)(state->report_location >> 32),
(uint32)state->report_location);
else
errcontext("slot \"%s\", output plugin \"%s\", in the %s callback",
NameStr(state->ctx->slot->data.name),
NameStr(state->ctx->slot->data.plugin),
state->callback_name);
}
static void
startup_cb_wrapper(LogicalDecodingContext *ctx, OutputPluginOptions *opt, bool is_init)
{
LogicalErrorCallbackState state;
ErrorContextCallback errcallback;
/* Push callback + info on the error context stack */
state.ctx = ctx;
state.callback_name = "startup";
state.report_location = InvalidXLogRecPtr;
errcallback.callback = output_plugin_error_callback;
errcallback.arg = (void *) &state;
errcallback.previous = error_context_stack;
error_context_stack = &errcallback;
/* set output state */
ctx->accept_writes = false;
/* do the actual work: call callback */
ctx->callbacks.startup_cb(ctx, opt, is_init);
/* Pop the error context stack */
error_context_stack = errcallback.previous;
}
static void
shutdown_cb_wrapper(LogicalDecodingContext *ctx)
{
LogicalErrorCallbackState state;
ErrorContextCallback errcallback;
/* Push callback + info on the error context stack */
state.ctx = ctx;
state.callback_name = "shutdown";
state.report_location = InvalidXLogRecPtr;
errcallback.callback = output_plugin_error_callback;
errcallback.arg = (void *) &state;
errcallback.previous = error_context_stack;
error_context_stack = &errcallback;
/* set output state */
ctx->accept_writes = false;
/* do the actual work: call callback */
ctx->callbacks.shutdown_cb(ctx);
/* Pop the error context stack */
error_context_stack = errcallback.previous;
}
/*
* Callbacks for ReorderBuffer which add in some more information and then call
* output_plugin.h plugins.
*/
static void
begin_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn)
{
LogicalDecodingContext *ctx = cache->private_data;
LogicalErrorCallbackState state;
ErrorContextCallback errcallback;
/* Push callback + info on the error context stack */
state.ctx = ctx;
state.callback_name = "begin";
state.report_location = txn->first_lsn;
errcallback.callback = output_plugin_error_callback;
errcallback.arg = (void *) &state;
errcallback.previous = error_context_stack;
error_context_stack = &errcallback;
/* set output state */
ctx->accept_writes = true;
ctx->write_xid = txn->xid;
ctx->write_location = txn->first_lsn;
/* do the actual work: call callback */
ctx->callbacks.begin_cb(ctx, txn);
/* Pop the error context stack */
error_context_stack = errcallback.previous;
}
static void
commit_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
XLogRecPtr commit_lsn)
{
LogicalDecodingContext *ctx = cache->private_data;
LogicalErrorCallbackState state;
ErrorContextCallback errcallback;
/* Push callback + info on the error context stack */
state.ctx = ctx;
state.callback_name = "commit";
state.report_location = txn->final_lsn; /* beginning of commit record */
errcallback.callback = output_plugin_error_callback;
errcallback.arg = (void *) &state;
errcallback.previous = error_context_stack;
error_context_stack = &errcallback;
/* set output state */
ctx->accept_writes = true;
ctx->write_xid = txn->xid;
ctx->write_location = txn->end_lsn; /* points to the end of the record */
/* do the actual work: call callback */
ctx->callbacks.commit_cb(ctx, txn, commit_lsn);
/* Pop the error context stack */
error_context_stack = errcallback.previous;
}
static void
change_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
Relation relation, ReorderBufferChange *change)
{
LogicalDecodingContext *ctx = cache->private_data;
LogicalErrorCallbackState state;
ErrorContextCallback errcallback;
/* Push callback + info on the error context stack */
state.ctx = ctx;
state.callback_name = "change";
state.report_location = change->lsn;
errcallback.callback = output_plugin_error_callback;
errcallback.arg = (void *) &state;
errcallback.previous = error_context_stack;
error_context_stack = &errcallback;
/* set output state */
ctx->accept_writes = true;
ctx->write_xid = txn->xid;
/*
* report this change's lsn so replies from clients can give an up2date
* answer. This won't ever be enough (and shouldn't be!) to confirm
* receipt of this transaction, but it might allow another transaction's
* commit to be confirmed with one message.
*/
ctx->write_location = change->lsn;
ctx->callbacks.change_cb(ctx, txn, relation, change);
/* Pop the error context stack */
error_context_stack = errcallback.previous;
}
/*
* Set the required catalog xmin horizon for historic snapshots in the current
* replication slot.
*
* Note that in the most cases, we won't be able to immediately use the xmin
* to increase the xmin horizon, we need to wait till the client has confirmed
* receiving current_lsn with LogicalConfirmReceivedLocation().
*/
void
LogicalIncreaseXminForSlot(XLogRecPtr current_lsn, TransactionId xmin)
{
bool updated_xmin = false;
ReplicationSlot *slot;
slot = MyReplicationSlot;
Assert(slot != NULL);
SpinLockAcquire(&slot->mutex);
/*
* don't overwrite if we already have a newer xmin. This can
* happen if we restart decoding in a slot.
*/
if (TransactionIdPrecedesOrEquals(xmin, slot->data.catalog_xmin))
{
}
/*
* If the client has already confirmed up to this lsn, we directly
* can mark this as accepted. This can happen if we restart
* decoding in a slot.
*/
else if (current_lsn <= slot->data.confirmed_flush)
{
slot->candidate_catalog_xmin = xmin;
slot->candidate_xmin_lsn = current_lsn;
/* our candidate can directly be used */
updated_xmin = true;
}
/*
* Only increase if the previous values have been applied, otherwise we
* might never end up updating if the receiver acks too slowly.
*/
else if (slot->candidate_xmin_lsn == InvalidXLogRecPtr)
{
slot->candidate_catalog_xmin = xmin;
slot->candidate_xmin_lsn = current_lsn;
}
SpinLockRelease(&slot->mutex);
/* candidate already valid with the current flush position, apply */
if (updated_xmin)
LogicalConfirmReceivedLocation(slot->data.confirmed_flush);
}
/*
* Mark the minimal LSN (restart_lsn) we need to read to replay all
* transactions that have not yet committed at current_lsn.
*
* Just like IncreaseRestartDecodingForSlot this nly takes effect when the
* client has confirmed to have received current_lsn.
*/
void
LogicalIncreaseRestartDecodingForSlot(XLogRecPtr current_lsn, XLogRecPtr restart_lsn)
{
bool updated_lsn = false;
ReplicationSlot *slot;
slot = MyReplicationSlot;
Assert(slot != NULL);
Assert(restart_lsn != InvalidXLogRecPtr);
Assert(current_lsn != InvalidXLogRecPtr);
SpinLockAcquire(&slot->mutex);
/* don't overwrite if have a newer restart lsn*/
if (restart_lsn <= slot->data.restart_lsn)
{
}
/*
* We might have already flushed far enough to directly accept this lsn, in
* this case there is no need to check for existing candidate LSNs
*/
else if (current_lsn <= slot->data.confirmed_flush)
{
slot->candidate_restart_valid = current_lsn;
slot->candidate_restart_lsn = restart_lsn;
/* our candidate can directly be used */
updated_lsn = true;
}
/*
* Only increase if the previous values have been applied, otherwise we
* might never end up updating if the receiver acks too slowly. A missed
* value here will just cause some extra effort after reconnecting.
*/
if (slot->candidate_restart_valid == InvalidXLogRecPtr)
{
slot->candidate_restart_valid = current_lsn;
slot->candidate_restart_lsn = restart_lsn;
elog(DEBUG1, "got new restart lsn %X/%X at %X/%X",
(uint32) (restart_lsn >> 32), (uint32) restart_lsn,
(uint32) (current_lsn >> 32), (uint32) current_lsn);
}
else
{
elog(DEBUG1, "failed to increase restart lsn: proposed %X/%X, after %X/%X, current candidate %X/%X, current after %X/%X, flushed up to %X/%X",
(uint32) (restart_lsn >> 32), (uint32) restart_lsn,
(uint32) (current_lsn >> 32), (uint32) current_lsn,
(uint32) (slot->candidate_restart_lsn >> 32),
(uint32) slot->candidate_restart_lsn,
(uint32) (slot->candidate_restart_valid >> 32),
(uint32) slot->candidate_restart_valid,
(uint32) (slot->data.confirmed_flush >> 32),
(uint32) slot->data.confirmed_flush
);
}
SpinLockRelease(&slot->mutex);
/* candidates are already valid with the current flush position, apply */
if (updated_lsn)
LogicalConfirmReceivedLocation(slot->data.confirmed_flush);
}
/*
* Handle a consumer's conformation having received all changes up to lsn.
*/
void
LogicalConfirmReceivedLocation(XLogRecPtr lsn)
{
Assert(lsn != InvalidXLogRecPtr);
/* Do an unlocked check for candidate_lsn first. */
if (MyReplicationSlot->candidate_xmin_lsn != InvalidXLogRecPtr ||
MyReplicationSlot->candidate_restart_valid != InvalidXLogRecPtr)
{
bool updated_xmin = false;
bool updated_restart = false;
/* use volatile pointer to prevent code rearrangement */
volatile ReplicationSlot *slot = MyReplicationSlot;
SpinLockAcquire(&slot->mutex);
slot->data.confirmed_flush = lsn;
/* if were past the location required for bumping xmin, do so */
if (slot->candidate_xmin_lsn != InvalidXLogRecPtr &&
slot->candidate_xmin_lsn <= lsn)
{
/*
* We have to write the changed xmin to disk *before* we change
* the in-memory value, otherwise after a crash we wouldn't know
* that some catalog tuples might have been removed already.
*
* Ensure that by first writing to ->xmin and only update
* ->effective_xmin once the new state is synced to disk. After a
* crash ->effective_xmin is set to ->xmin.
*/
if (TransactionIdIsValid(slot->candidate_catalog_xmin) &&
slot->data.catalog_xmin != slot->candidate_catalog_xmin)
{
slot->data.catalog_xmin = slot->candidate_catalog_xmin;
slot->candidate_catalog_xmin = InvalidTransactionId;
slot->candidate_xmin_lsn = InvalidXLogRecPtr;
updated_xmin = true;
}
}
if (slot->candidate_restart_valid != InvalidXLogRecPtr &&
slot->candidate_restart_valid <= lsn)
{
Assert(slot->candidate_restart_lsn != InvalidXLogRecPtr);
slot->data.restart_lsn = slot->candidate_restart_lsn;
slot->candidate_restart_lsn = InvalidXLogRecPtr;
slot->candidate_restart_valid = InvalidXLogRecPtr;
updated_restart = true;
}
SpinLockRelease(&slot->mutex);
/* first write new xmin to disk, so we know whats up after a crash */
if (updated_xmin || updated_restart)
{
ReplicationSlotMarkDirty();
ReplicationSlotSave();
elog(DEBUG1, "updated xmin: %u restart: %u", updated_xmin, updated_restart);
}
/*
* Now the new xmin is safely on disk, we can let the global value
* advance. We do not take ProcArrayLock or similar since we only
* advance xmin here and there's not much harm done by a concurrent
* computation missing that.
*/
if (updated_xmin)
{
SpinLockAcquire(&slot->mutex);
slot->effective_catalog_xmin = slot->data.catalog_xmin;
SpinLockRelease(&slot->mutex);
ReplicationSlotsComputeRequiredXmin(false);
ReplicationSlotsComputeRequiredLSN();
}
}
else
{
volatile ReplicationSlot *slot = MyReplicationSlot;
SpinLockAcquire(&slot->mutex);
slot->data.confirmed_flush = lsn;
SpinLockRelease(&slot->mutex);
}
}

View File

@ -0,0 +1,509 @@
/*-------------------------------------------------------------------------
*
* logicalfuncs.c
*
* Support functions for using logical decoding and managemnt of
* logical replication slots via SQL.
*
*
* Copyright (c) 2012-2014, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/backend/replication/logicalfuncs.c
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <unistd.h>
#include "fmgr.h"
#include "funcapi.h"
#include "miscadmin.h"
#include "catalog/pg_type.h"
#include "nodes/makefuncs.h"
#include "mb/pg_wchar.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/inval.h"
#include "utils/memutils.h"
#include "utils/pg_lsn.h"
#include "utils/resowner.h"
#include "utils/lsyscache.h"
#include "replication/decode.h"
#include "replication/logical.h"
#include "replication/logicalfuncs.h"
#include "storage/fd.h"
/* private date for writing out data */
typedef struct DecodingOutputState {
Tuplestorestate *tupstore;
TupleDesc tupdesc;
bool binary_output;
int64 returned_rows;
} DecodingOutputState;
/*
* Prepare for a output plugin write.
*/
static void
LogicalOutputPrepareWrite(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid,
bool last_write)
{
resetStringInfo(ctx->out);
}
/*
* Perform output plugin write into tuplestore.
*/
static void
LogicalOutputWrite(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid,
bool last_write)
{
Datum values[3];
bool nulls[3];
DecodingOutputState *p;
/* SQL Datums can only be of a limited length... */
if (ctx->out->len > MaxAllocSize - VARHDRSZ)
elog(ERROR, "too much output for sql interface");
p = (DecodingOutputState *) ctx->output_writer_private;
memset(nulls, 0, sizeof(nulls));
values[0] = LSNGetDatum(lsn);
values[1] = TransactionIdGetDatum(xid);
/*
* Assert ctx->out is in database encoding when we're writing textual
* output.
*/
if (!p->binary_output)
Assert(pg_verify_mbstr(GetDatabaseEncoding(),
ctx->out->data, ctx->out->len,
false));
/* ick, but cstring_to_text_with_len works for bytea perfectly fine */
values[2] = PointerGetDatum(
cstring_to_text_with_len(ctx->out->data, ctx->out->len));
tuplestore_putvalues(p->tupstore, p->tupdesc, values, nulls);
p->returned_rows++;
}
/*
* TODO: This is duplicate code with pg_xlogdump, similar to walsender.c, but
* we currently don't have the infrastructure (elog!) to share it.
*/
static void
XLogRead(char *buf, TimeLineID tli, XLogRecPtr startptr, Size count)
{
char *p;
XLogRecPtr recptr;
Size nbytes;
static int sendFile = -1;
static XLogSegNo sendSegNo = 0;
static uint32 sendOff = 0;
p = buf;
recptr = startptr;
nbytes = count;
while (nbytes > 0)
{
uint32 startoff;
int segbytes;
int readbytes;
startoff = recptr % XLogSegSize;
if (sendFile < 0 || !XLByteInSeg(recptr, sendSegNo))
{
char path[MAXPGPATH];
/* Switch to another logfile segment */
if (sendFile >= 0)
close(sendFile);
XLByteToSeg(recptr, sendSegNo);
XLogFilePath(path, tli, sendSegNo);
sendFile = BasicOpenFile(path, O_RDONLY | PG_BINARY, 0);
if (sendFile < 0)
{
if (errno == ENOENT)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("requested WAL segment %s has already been removed",
path)));
else
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not open file \"%s\": %m",
path)));
}
sendOff = 0;
}
/* Need to seek in the file? */
if (sendOff != startoff)
{
if (lseek(sendFile, (off_t) startoff, SEEK_SET) < 0)
{
char path[MAXPGPATH];
XLogFilePath(path, tli, sendSegNo);
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not seek in log segment %s to offset %u: %m",
path, startoff)));
}
sendOff = startoff;
}
/* How many bytes are within this segment? */
if (nbytes > (XLogSegSize - startoff))
segbytes = XLogSegSize - startoff;
else
segbytes = nbytes;
readbytes = read(sendFile, p, segbytes);
if (readbytes <= 0)
{
char path[MAXPGPATH];
XLogFilePath(path, tli, sendSegNo);
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not read from log segment %s, offset %u, length %lu: %m",
path, sendOff, (unsigned long) segbytes)));
}
/* Update state for read */
recptr += readbytes;
sendOff += readbytes;
nbytes -= readbytes;
p += readbytes;
}
}
static void
check_permissions(void)
{
if (!superuser() && !has_rolreplication(GetUserId()))
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
(errmsg("must be superuser or replication role to use replication slots"))));
}
/*
* read_page callback for logical decoding contexts.
*
* Public because it would likely be very helpful for someone writing another
* output method outside walsender, e.g. in a bgworker.
*
* TODO: The walsender has it's own version of this, but it relies on the
* walsender's latch being set whenever WAL is flushed. No such infrastructure
* exists for normal backends, so we have to do a check/sleep/repeat style of
* loop for now.
*/
int
logical_read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr,
int reqLen, XLogRecPtr targetRecPtr, char *cur_page, TimeLineID *pageTLI)
{
XLogRecPtr flushptr,
loc;
int count;
loc = targetPagePtr + reqLen;
while (1)
{
/*
* TODO: we're going to have to do something more intelligent about
* timelines on standbys. Use readTimeLineHistory() and
* tliOfPointInHistory() to get the proper LSN? For now we'll catch
* that case earlier, but the code and TODO is left in here for when
* that changes.
*/
if (!RecoveryInProgress())
{
*pageTLI = ThisTimeLineID;
flushptr = GetFlushRecPtr();
}
else
flushptr = GetXLogReplayRecPtr(pageTLI);
if (loc <= flushptr)
break;
CHECK_FOR_INTERRUPTS();
pg_usleep(1000L);
}
/* more than one block available */
if (targetPagePtr + XLOG_BLCKSZ <= flushptr)
count = XLOG_BLCKSZ;
/* not enough data there */
else if (targetPagePtr + reqLen > flushptr)
return -1;
/* part of the page available */
else
count = flushptr - targetPagePtr;
XLogRead(cur_page, *pageTLI, targetPagePtr, XLOG_BLCKSZ);
return count;
}
/*
* Helper function for the various SQL callable logical decoding functions.
*/
static Datum
pg_logical_slot_get_changes_guts(FunctionCallInfo fcinfo, bool confirm, bool binary)
{
Name name = PG_GETARG_NAME(0);
XLogRecPtr upto_lsn;
int32 upto_nchanges;
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
MemoryContext per_query_ctx;
MemoryContext oldcontext;
XLogRecPtr end_of_wal;
XLogRecPtr startptr;
LogicalDecodingContext *ctx;
ResourceOwner old_resowner = CurrentResourceOwner;
ArrayType *arr;
Size ndim;
List *options = NIL;
DecodingOutputState *p;
if (PG_ARGISNULL(1))
upto_lsn = InvalidXLogRecPtr;
else
upto_lsn = PG_GETARG_LSN(1);
if (PG_ARGISNULL(2))
upto_nchanges = InvalidXLogRecPtr;
else
upto_nchanges = PG_GETARG_INT32(2);
/* check to see if caller supports us returning a tuplestore */
if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("set-valued function called in context that cannot accept a set")));
if (!(rsinfo->allowedModes & SFRM_Materialize))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("materialize mode required, but it is not allowed in this context")));
/* state to write output to */
p = palloc0(sizeof(DecodingOutputState));
p->binary_output = binary;
/* Build a tuple descriptor for our result type */
if (get_call_result_type(fcinfo, NULL, &p->tupdesc) != TYPEFUNC_COMPOSITE)
elog(ERROR, "return type must be a row type");
check_permissions();
CheckLogicalDecodingRequirements();
arr = PG_GETARG_ARRAYTYPE_P(3);
ndim = ARR_NDIM(arr);
per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
oldcontext = MemoryContextSwitchTo(per_query_ctx);
if (ndim > 1)
{
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("array must be one-dimensional")));
}
else if (array_contains_nulls(arr))
{
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("array must not contain nulls")));
}
else if (ndim == 1)
{
int nelems;
Datum *datum_opts;
int i;
Assert(ARR_ELEMTYPE(arr) == TEXTOID);
deconstruct_array(arr, TEXTOID, -1, false, 'i',
&datum_opts, NULL, &nelems);
if (nelems % 2 != 0)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("array must have even number of elements")));
for (i = 0; i < nelems; i += 2)
{
char *name = TextDatumGetCString(datum_opts[i]);
char *opt = TextDatumGetCString(datum_opts[i + 1]);
options = lappend(options, makeDefElem(name, (Node *) makeString(opt)));
}
}
p->tupstore = tuplestore_begin_heap(true, false, work_mem);
rsinfo->returnMode = SFRM_Materialize;
rsinfo->setResult = p->tupstore;
rsinfo->setDesc = p->tupdesc;
/* compute the current end-of-wal */
if (!RecoveryInProgress())
end_of_wal = GetFlushRecPtr();
else
end_of_wal = GetXLogReplayRecPtr(NULL);
CheckLogicalDecodingRequirements();
ReplicationSlotAcquire(NameStr(*name));
PG_TRY();
{
ctx = CreateDecodingContext(InvalidXLogRecPtr,
options,
logical_read_local_xlog_page,
LogicalOutputPrepareWrite,
LogicalOutputWrite);
MemoryContextSwitchTo(oldcontext);
/*
* Check whether the output pluggin writes textual output if that's
* what we need.
*/
if (!binary &&
ctx->options.output_type != OUTPUT_PLUGIN_TEXTUAL_OUTPUT)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("output plugin cannot produce text output")));
ctx->output_writer_private = p;
startptr = MyReplicationSlot->data.restart_lsn;
CurrentResourceOwner = ResourceOwnerCreate(CurrentResourceOwner, "logical decoding");
/* invalidate non-timetravel entries */
InvalidateSystemCaches();
while ((startptr != InvalidXLogRecPtr && startptr < end_of_wal) ||
(ctx->reader->EndRecPtr && ctx->reader->EndRecPtr < end_of_wal))
{
XLogRecord *record;
char *errm = NULL;
record = XLogReadRecord(ctx->reader, startptr, &errm);
if (errm)
elog(ERROR, "%s", errm);
startptr = InvalidXLogRecPtr;
/*
* The {begin_txn,change,commit_txn}_wrapper callbacks above will
* store the description into our tuplestore.
*/
if (record != NULL)
LogicalDecodingProcessRecord(ctx, record);
/* check limits */
if (upto_lsn != InvalidXLogRecPtr &&
upto_lsn <= ctx->reader->EndRecPtr)
break;
if (upto_nchanges != 0 &&
upto_nchanges <= p->returned_rows)
break;
}
}
PG_CATCH();
{
/* clear all timetravel entries */
InvalidateSystemCaches();
PG_RE_THROW();
}
PG_END_TRY();
tuplestore_donestoring(tupstore);
CurrentResourceOwner = old_resowner;
/*
* Next time, start where we left off. (Hunting things, the family
* business..)
*/
if (ctx->reader->EndRecPtr != InvalidXLogRecPtr && confirm)
LogicalConfirmReceivedLocation(ctx->reader->EndRecPtr);
/* free context, call shutdown callback */
FreeDecodingContext(ctx);
ReplicationSlotRelease();
InvalidateSystemCaches();
return (Datum) 0;
}
/*
* SQL function returning the changestream as text, consuming the data.
*/
Datum
pg_logical_slot_get_changes(PG_FUNCTION_ARGS)
{
Datum ret = pg_logical_slot_get_changes_guts(fcinfo, true, false);
return ret;
}
/*
* SQL function returning the changestream as text, only peeking ahead.
*/
Datum
pg_logical_slot_peek_changes(PG_FUNCTION_ARGS)
{
Datum ret = pg_logical_slot_get_changes_guts(fcinfo, false, false);
return ret;
}
/*
* SQL function returning the changestream in binary, consuming the data.
*/
Datum
pg_logical_slot_get_binary_changes(PG_FUNCTION_ARGS)
{
Datum ret = pg_logical_slot_get_changes_guts(fcinfo, true, true);
return ret;
}
/*
* SQL function returning the changestream in binary, only peeking ahead.
*/
Datum
pg_logical_slot_peek_binary_changes(PG_FUNCTION_ARGS)
{
Datum ret = pg_logical_slot_get_changes_guts(fcinfo, false, true);
return ret;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -43,6 +43,7 @@
#include "miscadmin.h"
#include "replication/slot.h"
#include "storage/fd.h"
#include "storage/proc.h"
#include "storage/procarray.h"
/*
@ -82,6 +83,8 @@ ReplicationSlot *MyReplicationSlot = NULL;
/* GUCs */
int max_replication_slots = 0; /* the maximum number of replication slots */
static void ReplicationSlotDropAcquired(void);
/* internal persistency functions */
static void RestoreSlotFromDisk(const char *name);
static void CreateSlotOnDisk(ReplicationSlot *slot);
@ -190,11 +193,12 @@ ReplicationSlotValidateName(const char *name, int elevel)
* Create a new replication slot and mark it as used by this backend.
*
* name: Name of the slot
* db_specific: changeset extraction is db specific, if the slot is going to
* db_specific: logical decoding is db specific; if the slot is going to
* be used for that pass true, otherwise false.
*/
void
ReplicationSlotCreate(const char *name, bool db_specific)
ReplicationSlotCreate(const char *name, bool db_specific,
ReplicationSlotPersistency persistency)
{
ReplicationSlot *slot = NULL;
int i;
@ -246,6 +250,7 @@ ReplicationSlotCreate(const char *name, bool db_specific)
*/
Assert(!slot->in_use);
Assert(!slot->active);
slot->data.persistency = persistency;
slot->data.xmin = InvalidTransactionId;
slot->effective_xmin = InvalidTransactionId;
strncpy(NameStr(slot->data.name), name, NAMEDATALEN);
@ -348,14 +353,30 @@ ReplicationSlotRelease(void)
Assert(slot != NULL && slot->active);
/* Mark slot inactive. We're not freeing it, just disconnecting. */
if (slot->data.persistency == RS_EPHEMERAL)
{
/*
* Delete the slot. There is no !PANIC case where this is allowed to
* fail, all that may happen is an incomplete cleanup of the on-disk
* data.
*/
ReplicationSlotDropAcquired();
}
else
{
/* Mark slot inactive. We're not freeing it, just disconnecting. */
volatile ReplicationSlot *vslot = slot;
SpinLockAcquire(&slot->mutex);
vslot->active = false;
SpinLockRelease(&slot->mutex);
MyReplicationSlot = NULL;
}
MyReplicationSlot = NULL;
/* might not have been set when we've been a plain slot */
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
MyPgXact->vacuumFlags &= ~PROC_IN_LOGICAL_DECODING;
LWLockRelease(ProcArrayLock);
}
/*
@ -364,52 +385,36 @@ ReplicationSlotRelease(void)
void
ReplicationSlotDrop(const char *name)
{
ReplicationSlot *slot = NULL;
int i;
bool active;
Assert(MyReplicationSlot == NULL);
ReplicationSlotAcquire(name);
ReplicationSlotDropAcquired();
}
/*
* Permanently drop the currently acquired replication slot which will be
* released by the point this function returns.
*/
static void
ReplicationSlotDropAcquired(void)
{
char path[MAXPGPATH];
char tmppath[MAXPGPATH];
ReplicationSlot *slot = MyReplicationSlot;
ReplicationSlotValidateName(name, ERROR);
Assert(MyReplicationSlot != NULL);
/* slot isn't acquired anymore */
MyReplicationSlot = NULL;
/*
* If some other backend ran this code currently with us, we might both
* try to free the same slot at the same time. Or we might try to delete
* a slot with a certain name while someone else was trying to create a
* slot with the same name.
* If some other backend ran this code concurrently with us, we might try
* to delete a slot with a certain name while someone else was trying to
* create a slot with the same name.
*/
LWLockAcquire(ReplicationSlotAllocationLock, LW_EXCLUSIVE);
/* Search for the named slot and mark it active if we find it. */
LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
for (i = 0; i < max_replication_slots; i++)
{
ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i];
if (s->in_use && strcmp(name, NameStr(s->data.name)) == 0)
{
volatile ReplicationSlot *vslot = s;
SpinLockAcquire(&s->mutex);
active = vslot->active;
vslot->active = true;
SpinLockRelease(&s->mutex);
slot = s;
break;
}
}
LWLockRelease(ReplicationSlotControlLock);
/* If we did not find the slot or it was already active, error out. */
if (slot == NULL)
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_OBJECT),
errmsg("replication slot \"%s\" does not exist", name)));
if (active)
ereport(ERROR,
(errcode(ERRCODE_OBJECT_IN_USE),
errmsg("replication slot \"%s\" is already active", name)));
/* Generate pathnames. */
sprintf(path, "pg_replslot/%s", NameStr(slot->data.name));
sprintf(tmppath, "pg_replslot/%s.tmp", NameStr(slot->data.name));
@ -417,34 +422,40 @@ ReplicationSlotDrop(const char *name)
/*
* Rename the slot directory on disk, so that we'll no longer recognize
* this as a valid slot. Note that if this fails, we've got to mark the
* slot inactive again before bailing out.
* slot inactive before bailing out. If we're dropping a ephemeral slot,
* we better never fail hard as the caller won't expect the slot to
* survive and this might get called during error handling.
*/
if (rename(path, tmppath) != 0)
if (rename(path, tmppath) == 0)
{
/*
* We need to fsync() the directory we just renamed and its parent to
* make sure that our changes are on disk in a crash-safe fashion. If
* fsync() fails, we can't be sure whether the changes are on disk or
* not. For now, we handle that by panicking;
* StartupReplicationSlots() will try to straighten it out after
* restart.
*/
START_CRIT_SECTION();
fsync_fname(tmppath, true);
fsync_fname("pg_replslot", true);
END_CRIT_SECTION();
}
else
{
volatile ReplicationSlot *vslot = slot;
bool fail_softly = slot->data.persistency == RS_EPHEMERAL;
SpinLockAcquire(&slot->mutex);
vslot->active = false;
SpinLockRelease(&slot->mutex);
ereport(ERROR,
ereport(fail_softly ? WARNING : ERROR,
(errcode_for_file_access(),
errmsg("could not rename \"%s\" to \"%s\": %m",
path, tmppath)));
}
/*
* We need to fsync() the directory we just renamed and its parent to make
* sure that our changes are on disk in a crash-safe fashion. If fsync()
* fails, we can't be sure whether the changes are on disk or not. For
* now, we handle that by panicking; StartupReplicationSlots() will
* try to straighten it out after restart.
*/
START_CRIT_SECTION();
fsync_fname(tmppath, true);
fsync_fname("pg_replslot", true);
END_CRIT_SECTION();
/*
* The slot is definitely gone. Lock out concurrent scans of the array
* long enough to kill it. It's OK to clear the active flag here without
@ -461,7 +472,7 @@ ReplicationSlotDrop(const char *name)
* Slot is dead and doesn't prevent resource removal anymore, recompute
* limits.
*/
ReplicationSlotsComputeRequiredXmin();
ReplicationSlotsComputeRequiredXmin(false);
ReplicationSlotsComputeRequiredLSN();
/*
@ -518,22 +529,50 @@ ReplicationSlotMarkDirty(void)
}
}
/*
* Convert a slot that's marked as RS_DROP_ON_ERROR to a RS_PERSISTENT slot,
* guaranteeing it will be there after a eventual crash.
*/
void
ReplicationSlotPersist(void)
{
ReplicationSlot *slot = MyReplicationSlot;
Assert(slot != NULL);
Assert(slot->data.persistency != RS_PERSISTENT);
{
volatile ReplicationSlot *vslot = slot;
SpinLockAcquire(&slot->mutex);
vslot->data.persistency = RS_PERSISTENT;
SpinLockRelease(&slot->mutex);
}
ReplicationSlotMarkDirty();
ReplicationSlotSave();
}
/*
* Compute the oldest xmin across all slots and store it in the ProcArray.
*/
void
ReplicationSlotsComputeRequiredXmin(void)
ReplicationSlotsComputeRequiredXmin(bool already_locked)
{
int i;
TransactionId agg_xmin = InvalidTransactionId;
TransactionId agg_catalog_xmin = InvalidTransactionId;
Assert(ReplicationSlotCtl != NULL);
LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
if (!already_locked)
LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
for (i = 0; i < max_replication_slots; i++)
{
ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i];
TransactionId effective_xmin;
TransactionId effective_catalog_xmin;
if (!s->in_use)
continue;
@ -543,6 +582,7 @@ ReplicationSlotsComputeRequiredXmin(void)
SpinLockAcquire(&s->mutex);
effective_xmin = vslot->effective_xmin;
effective_catalog_xmin = vslot->effective_catalog_xmin;
SpinLockRelease(&s->mutex);
}
@ -551,10 +591,18 @@ ReplicationSlotsComputeRequiredXmin(void)
(!TransactionIdIsValid(agg_xmin) ||
TransactionIdPrecedes(effective_xmin, agg_xmin)))
agg_xmin = effective_xmin;
}
LWLockRelease(ReplicationSlotControlLock);
ProcArraySetReplicationSlotXmin(agg_xmin);
/* check the catalog xmin */
if (TransactionIdIsValid(effective_catalog_xmin) &&
(!TransactionIdIsValid(agg_catalog_xmin) ||
TransactionIdPrecedes(effective_catalog_xmin, agg_catalog_xmin)))
agg_catalog_xmin = effective_catalog_xmin;
}
if (!already_locked)
LWLockRelease(ReplicationSlotControlLock);
ProcArraySetReplicationSlotXmin(agg_xmin, agg_catalog_xmin, already_locked);
}
/*
@ -595,6 +643,110 @@ ReplicationSlotsComputeRequiredLSN(void)
XLogSetReplicationSlotMinimumLSN(min_required);
}
/*
* Compute the oldest WAL LSN required by *logical* decoding slots..
*
* Returns InvalidXLogRecPtr if logical decoding is disabled or no logicals
* slots exist.
*
* NB: this returns a value >= ReplicationSlotsComputeRequiredLSN(), since it
* ignores physical replication slots.
*
* The results aren't required frequently, so we don't maintain a precomputed
* value like we do for ComputeRequiredLSN() and ComputeRequiredXmin().
*/
XLogRecPtr
ReplicationSlotsComputeLogicalRestartLSN(void)
{
XLogRecPtr result = InvalidXLogRecPtr;
int i;
if (max_replication_slots <= 0)
return InvalidXLogRecPtr;
LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
for (i = 0; i < max_replication_slots; i++)
{
volatile ReplicationSlot *s;
XLogRecPtr restart_lsn;
s = &ReplicationSlotCtl->replication_slots[i];
/* cannot change while ReplicationSlotCtlLock is held */
if (!s->in_use)
continue;
/* we're only interested in logical slots */
if (s->data.database == InvalidOid)
continue;
/* read once, it's ok if it increases while we're checking */
SpinLockAcquire(&s->mutex);
restart_lsn = s->data.restart_lsn;
SpinLockRelease(&s->mutex);
if (result == InvalidXLogRecPtr ||
restart_lsn < result)
result = restart_lsn;
}
LWLockRelease(ReplicationSlotControlLock);
return result;
}
/*
* ReplicationSlotsCountDBSlots -- count the number of slots that refer to the
* passed database oid.
*
* Returns true if there are any slots referencing the database. *nslots will
* be set to the absolute number of slots in the database, *nactive to ones
* currently active.
*/
bool
ReplicationSlotsCountDBSlots(Oid dboid, int *nslots, int *nactive)
{
int i;
*nslots = *nactive = 0;
if (max_replication_slots <= 0)
return false;
LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
for (i = 0; i < max_replication_slots; i++)
{
volatile ReplicationSlot *s;
s = &ReplicationSlotCtl->replication_slots[i];
/* cannot change while ReplicationSlotCtlLock is held */
if (!s->in_use)
continue;
/* not database specific, skip */
if (s->data.database == InvalidOid)
/* not our database, skip */
if (s->data.database != dboid)
continue;
/* count slots with spinlock held */
SpinLockAcquire(&s->mutex);
(*nslots)++;
if (s->active)
(*nactive)++;
SpinLockRelease(&s->mutex);
}
LWLockRelease(ReplicationSlotControlLock);
if (*nslots > 0)
return true;
return false;
}
/*
* Check whether the server's configuration supports using replication
* slots.
@ -723,7 +875,7 @@ StartupReplicationSlots(XLogRecPtr checkPointRedo)
return;
/* Now that we have recovered all the data, compute replication xmin */
ReplicationSlotsComputeRequiredXmin();
ReplicationSlotsComputeRequiredXmin(false);
ReplicationSlotsComputeRequiredLSN();
}
@ -1050,8 +1202,19 @@ RestoreSlotFromDisk(const char *name)
memcpy(&slot->data, &cp.slotdata,
sizeof(ReplicationSlotPersistentData));
/* Don't restore the slot if it's not parked as persistent. */
if (slot->data.persistency != RS_PERSISTENT)
return;
/* initialize in memory state */
slot->effective_xmin = cp.slotdata.xmin;
slot->effective_catalog_xmin = cp.slotdata.catalog_xmin;
slot->candidate_catalog_xmin = InvalidTransactionId;
slot->candidate_xmin_lsn = InvalidXLogRecPtr;
slot->candidate_restart_lsn = InvalidXLogRecPtr;
slot->candidate_restart_valid = InvalidXLogRecPtr;
slot->in_use = true;
slot->active = false;

View File

@ -15,13 +15,13 @@
#include "funcapi.h"
#include "miscadmin.h"
#include "access/htup_details.h"
#include "replication/slot.h"
#include "replication/logical.h"
#include "replication/logicalfuncs.h"
#include "utils/builtins.h"
#include "utils/pg_lsn.h"
#include "replication/slot.h"
Datum pg_create_physical_replication_slot(PG_FUNCTION_ARGS);
Datum pg_drop_replication_slot(PG_FUNCTION_ARGS);
static void
check_permissions(void)
@ -54,7 +54,7 @@ pg_create_physical_replication_slot(PG_FUNCTION_ARGS)
elog(ERROR, "return type must be a row type");
/* acquire replication slot, this will check for conflicting names*/
ReplicationSlotCreate(NameStr(*name), false);
ReplicationSlotCreate(NameStr(*name), false, RS_PERSISTENT);
values[0] = NameGetDatum(&MyReplicationSlot->data.name);
@ -69,6 +69,68 @@ pg_create_physical_replication_slot(PG_FUNCTION_ARGS)
PG_RETURN_DATUM(result);
}
/*
* SQL function for creating a new logical replication slot.
*/
Datum
pg_create_logical_replication_slot(PG_FUNCTION_ARGS)
{
Name name = PG_GETARG_NAME(0);
Name plugin = PG_GETARG_NAME(1);
LogicalDecodingContext *ctx = NULL;
TupleDesc tupdesc;
HeapTuple tuple;
Datum result;
Datum values[2];
bool nulls[2];
if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
elog(ERROR, "return type must be a row type");
check_permissions();
CheckLogicalDecodingRequirements();
Assert(!MyReplicationSlot);
/*
* Acquire a logical decoding slot, this will check for conflicting
* names.
*/
ReplicationSlotCreate(NameStr(*name), true, RS_EPHEMERAL);
/*
* Create logical decoding context, to build the initial snapshot.
*/
ctx = CreateInitDecodingContext(
NameStr(*plugin), NIL,
logical_read_local_xlog_page, NULL, NULL);
/* build initial snapshot, might take a while */
DecodingContextFindStartpoint(ctx);
values[0] = CStringGetTextDatum(NameStr(MyReplicationSlot->data.name));
values[1] = LSNGetDatum(MyReplicationSlot->data.confirmed_flush);
/* don't need the decoding context anymore */
FreeDecodingContext(ctx);
memset(nulls, 0, sizeof(nulls));
tuple = heap_form_tuple(tupdesc, values, nulls);
result = HeapTupleGetDatum(tuple);
/* ok, slot is now fully created, mark it as persistent */
ReplicationSlotPersist();
ReplicationSlotRelease();
PG_RETURN_DATUM(result);
}
/*
* SQL function for dropping a replication slot.
*/
@ -92,7 +154,7 @@ pg_drop_replication_slot(PG_FUNCTION_ARGS)
Datum
pg_get_replication_slots(PG_FUNCTION_ARGS)
{
#define PG_STAT_GET_REPLICATION_SLOTS_COLS 6
#define PG_GET_REPLICATION_SLOTS_COLS 8
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
TupleDesc tupdesc;
Tuplestorestate *tupstore;
@ -134,15 +196,16 @@ pg_get_replication_slots(PG_FUNCTION_ARGS)
for (slotno = 0; slotno < max_replication_slots; slotno++)
{
ReplicationSlot *slot = &ReplicationSlotCtl->replication_slots[slotno];
Datum values[PG_STAT_GET_REPLICATION_SLOTS_COLS];
bool nulls[PG_STAT_GET_REPLICATION_SLOTS_COLS];
Datum values[PG_GET_REPLICATION_SLOTS_COLS];
bool nulls[PG_GET_REPLICATION_SLOTS_COLS];
TransactionId xmin;
TransactionId catalog_xmin;
XLogRecPtr restart_lsn;
bool active;
Oid database;
NameData slot_name;
NameData plugin;
int i;
SpinLockAcquire(&slot->mutex);
@ -154,9 +217,11 @@ pg_get_replication_slots(PG_FUNCTION_ARGS)
else
{
xmin = slot->data.xmin;
catalog_xmin = slot->data.catalog_xmin;
database = slot->data.database;
restart_lsn = slot->data.restart_lsn;
namecpy(&slot_name, &slot->data.name);
namecpy(&plugin, &slot->data.plugin);
active = slot->active;
}
@ -166,19 +231,34 @@ pg_get_replication_slots(PG_FUNCTION_ARGS)
i = 0;
values[i++] = NameGetDatum(&slot_name);
if (database == InvalidOid)
nulls[i++] = true;
else
values[i++] = NameGetDatum(&plugin);
if (database == InvalidOid)
values[i++] = CStringGetTextDatum("physical");
else
values[i++] = CStringGetTextDatum("logical");
if (database == InvalidOid)
nulls[i++] = true;
else
values[i++] = database;
values[i++] = BoolGetDatum(active);
if (xmin != InvalidTransactionId)
values[i++] = TransactionIdGetDatum(xmin);
else
nulls[i++] = true;
if (catalog_xmin != InvalidTransactionId)
values[i++] = TransactionIdGetDatum(catalog_xmin);
else
nulls[i++] = true;
if (restart_lsn != InvalidTransactionId)
values[i++] = LSNGetDatum(restart_lsn);
else

View File

@ -1147,7 +1147,7 @@ XLogWalRcvSendHSFeedback(bool immed)
* everything else has been checked.
*/
if (hot_standby_feedback)
xmin = GetOldestXmin(true, false);
xmin = GetOldestXmin(NULL, false);
else
xmin = InvalidTransactionId;

View File

@ -55,6 +55,7 @@
#include "replication/basebackup.h"
#include "replication/slot.h"
#include "replication/syncrep.h"
#include "replication/slot.h"
#include "replication/walreceiver.h"
#include "replication/walsender.h"
#include "replication/walsender_private.h"
@ -434,7 +435,7 @@ StartReplication(StartReplicationCmd *cmd)
if (MyReplicationSlot->data.database != InvalidOid)
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
(errmsg("cannot use a replication slot created for changeset extraction for streaming replication"))));
(errmsg("cannot use a logical replication slot for physical replication"))));
}
/*
@ -656,7 +657,9 @@ CreateReplicationSlot(CreateReplicationSlotCmd *cmd)
sendTimeLineIsHistoric = false;
sendTimeLine = ThisTimeLineID;
ReplicationSlotCreate(cmd->slotname, cmd->kind == REPLICATION_KIND_LOGICAL);
ReplicationSlotCreate(cmd->slotname,
cmd->kind == REPLICATION_KIND_LOGICAL,
RS_PERSISTENT);
initStringInfo(&output_message);
@ -766,7 +769,7 @@ exec_replication_command(const char *cmd_string)
if (cmd->kind == REPLICATION_KIND_PHYSICAL)
StartReplication(cmd);
else
elog(ERROR, "cannot handle changeset extraction yet");
elog(ERROR, "cannot handle logical decoding yet");
break;
}
@ -1017,7 +1020,7 @@ ProcessStandbyReplyMessage(void)
if (MyReplicationSlot && flushPtr != InvalidXLogRecPtr)
{
if (MyReplicationSlot->data.database != InvalidOid)
elog(ERROR, "cannot handle changeset extraction yet");
elog(ERROR, "cannot handle logical decoding yet");
else
PhysicalConfirmReceivedLocation(flushPtr);
}
@ -1050,7 +1053,7 @@ PhysicalReplicationSlotNewXmin(TransactionId feedbackXmin)
if (changed)
{
ReplicationSlotMarkDirty();
ReplicationSlotsComputeRequiredXmin();
ReplicationSlotsComputeRequiredXmin(false);
}
}

View File

@ -50,11 +50,13 @@
#include "access/transam.h"
#include "access/xact.h"
#include "access/twophase.h"
#include "catalog/catalog.h"
#include "miscadmin.h"
#include "storage/proc.h"
#include "storage/procarray.h"
#include "storage/spin.h"
#include "utils/builtins.h"
#include "utils/rel.h"
#include "utils/snapmgr.h"
@ -84,6 +86,8 @@ typedef struct ProcArrayStruct
/* oldest xmin of any replication slot */
TransactionId replication_slot_xmin;
/* oldest catalog xmin of any replication slot */
TransactionId replication_slot_catalog_xmin;
/*
* We declare pgprocnos[] as 1 entry because C wants a fixed-size array,
@ -1108,21 +1112,22 @@ TransactionIdIsActive(TransactionId xid)
* GetOldestXmin -- returns oldest transaction that was running
* when any current transaction was started.
*
* If allDbs is TRUE then all backends are considered; if allDbs is FALSE
* then only backends running in my own database are considered.
* If rel is NULL or a shared relation, all backends are considered, otherwise
* only backends running in this database are considered.
*
* If ignoreVacuum is TRUE then backends with the PROC_IN_VACUUM flag set are
* ignored.
*
* This is used by VACUUM to decide which deleted tuples must be preserved
* in a table. allDbs = TRUE is needed for shared relations, but allDbs =
* FALSE is sufficient for non-shared relations, since only backends in my
* own database could ever see the tuples in them. Also, we can ignore
* concurrently running lazy VACUUMs because (a) they must be working on other
* tables, and (b) they don't need to do snapshot-based lookups.
* This is used by VACUUM to decide which deleted tuples must be preserved in
* the passed in table. For shared relations backends in all databases must be
* considered, but for non-shared relations that's not required, since only
* backends in my own database could ever see the tuples in them. Also, we can
* ignore concurrently running lazy VACUUMs because (a) they must be working
* on other tables, and (b) they don't need to do snapshot-based lookups.
*
* This is also used to determine where to truncate pg_subtrans. allDbs
* must be TRUE for that case, and ignoreVacuum FALSE.
* This is also used to determine where to truncate pg_subtrans. For that
* backends in all databases have to be considered, so rel = NULL has to be
* passed in.
*
* Note: we include all currently running xids in the set of considered xids.
* This ensures that if a just-started xact has not yet set its snapshot,
@ -1133,7 +1138,7 @@ TransactionIdIsActive(TransactionId xid)
* backwards on repeated calls. The calculated value is conservative, so that
* anything older is definitely not considered as running by anyone anymore,
* but the exact value calculated depends on a number of things. For example,
* if allDbs is FALSE and there are no transactions running in the current
* if rel = NULL and there are no transactions running in the current
* database, GetOldestXmin() returns latestCompletedXid. If a transaction
* begins after that, its xmin will include in-progress transactions in other
* databases that started earlier, so another call will return a lower value.
@ -1152,12 +1157,22 @@ TransactionIdIsActive(TransactionId xid)
* GetOldestXmin() move backwards, with no consequences for data integrity.
*/
TransactionId
GetOldestXmin(bool allDbs, bool ignoreVacuum)
GetOldestXmin(Relation rel, bool ignoreVacuum)
{
ProcArrayStruct *arrayP = procArray;
TransactionId result;
int index;
bool allDbs;
volatile TransactionId replication_slot_xmin = InvalidTransactionId;
volatile TransactionId replication_slot_catalog_xmin = InvalidTransactionId;
/*
* If we're not computing a relation specific limit, or if a shared
* relation has been passed in, backends in all databases have to be
* considered.
*/
allDbs = rel == NULL || rel->rd_rel->relisshared;
/* Cannot look for individual databases during recovery */
Assert(allDbs || !RecoveryInProgress());
@ -1180,6 +1195,13 @@ GetOldestXmin(bool allDbs, bool ignoreVacuum)
volatile PGPROC *proc = &allProcs[pgprocno];
volatile PGXACT *pgxact = &allPgXact[pgprocno];
/*
* Backend is doing logical decoding which manages xmin separately,
* check below.
*/
if (pgxact->vacuumFlags & PROC_IN_LOGICAL_DECODING)
continue;
if (ignoreVacuum && (pgxact->vacuumFlags & PROC_IN_VACUUM))
continue;
@ -1211,6 +1233,7 @@ GetOldestXmin(bool allDbs, bool ignoreVacuum)
/* fetch into volatile var while ProcArrayLock is held */
replication_slot_xmin = procArray->replication_slot_xmin;
replication_slot_catalog_xmin = procArray->replication_slot_catalog_xmin;
if (RecoveryInProgress())
{
@ -1259,6 +1282,18 @@ GetOldestXmin(bool allDbs, bool ignoreVacuum)
NormalTransactionIdPrecedes(replication_slot_xmin, result))
result = replication_slot_xmin;
/*
* After locks have been released and defer_cleanup_age has been applied,
* check whether we need to back up further to make logical decoding
* possible. We need to do so if we're computing the global limit (rel =
* NULL) or if the passed relation is a catalog relation of some kind.
*/
if ((rel == NULL ||
RelationIsAccessibleInLogicalDecoding(rel)) &&
TransactionIdIsValid(replication_slot_catalog_xmin) &&
NormalTransactionIdPrecedes(replication_slot_catalog_xmin, result))
result = replication_slot_catalog_xmin;
return result;
}
@ -1313,6 +1348,8 @@ GetMaxSnapshotSubxidCount(void)
* RecentGlobalXmin: the global xmin (oldest TransactionXmin across all
* running transactions, except those running LAZY VACUUM). This is
* the same computation done by GetOldestXmin(true, true).
* RecentGlobalDataXmin: the global xmin for non-catalog tables
* >= RecentGlobalXmin
*
* Note: this function should probably not be called with an argument that's
* not statically allocated (see xip allocation below).
@ -1329,6 +1366,7 @@ GetSnapshotData(Snapshot snapshot)
int subcount = 0;
bool suboverflowed = false;
volatile TransactionId replication_slot_xmin = InvalidTransactionId;
volatile TransactionId replication_slot_catalog_xmin = InvalidTransactionId;
Assert(snapshot != NULL);
@ -1397,6 +1435,13 @@ GetSnapshotData(Snapshot snapshot)
volatile PGXACT *pgxact = &allPgXact[pgprocno];
TransactionId xid;
/*
* Backend is doing logical decoding which manages xmin
* separately, check below.
*/
if (pgxact->vacuumFlags & PROC_IN_LOGICAL_DECODING)
continue;
/* Ignore procs running LAZY VACUUM */
if (pgxact->vacuumFlags & PROC_IN_VACUUM)
continue;
@ -1509,6 +1554,7 @@ GetSnapshotData(Snapshot snapshot)
/* fetch into volatile var while ProcArrayLock is held */
replication_slot_xmin = procArray->replication_slot_xmin;
replication_slot_catalog_xmin = procArray->replication_slot_catalog_xmin;
if (!TransactionIdIsValid(MyPgXact->xmin))
MyPgXact->xmin = TransactionXmin = xmin;
@ -1533,6 +1579,17 @@ GetSnapshotData(Snapshot snapshot)
NormalTransactionIdPrecedes(replication_slot_xmin, RecentGlobalXmin))
RecentGlobalXmin = replication_slot_xmin;
/* Non-catalog tables can be vacuumed if older than this xid */
RecentGlobalDataXmin = RecentGlobalXmin;
/*
* Check whether there's a replication slot requiring an older catalog
* xmin.
*/
if (TransactionIdIsNormal(replication_slot_catalog_xmin) &&
NormalTransactionIdPrecedes(replication_slot_catalog_xmin, RecentGlobalXmin))
RecentGlobalXmin = replication_slot_catalog_xmin;
RecentXmin = xmin;
snapshot->xmin = xmin;
@ -1633,9 +1690,11 @@ ProcArrayInstallImportedXmin(TransactionId xmin, TransactionId sourcexid)
* Similar to GetSnapshotData but returns more information. We include
* all PGXACTs with an assigned TransactionId, even VACUUM processes.
*
* We acquire XidGenLock, but the caller is responsible for releasing it.
* This ensures that no new XIDs enter the proc array until the caller has
* WAL-logged this snapshot, and releases the lock.
* We acquire XidGenLock and ProcArrayLock, but the caller is responsible for
* releasing them. Acquiring XidGenLock ensures that no new XIDs enter the proc
* array until the caller has WAL-logged this snapshot, and releases the
* lock. Acquiring ProcArrayLock ensures that no transactions commit until the
* lock is released.
*
* The returned data structure is statically allocated; caller should not
* modify it, and must not assume it is valid past the next call.
@ -1770,6 +1829,15 @@ GetRunningTransactionData(void)
}
}
/*
* It's important *not* to include the limits set by slots here because
* snapbuild.c uses oldestRunningXid to manage its xmin horizon. If those
* were to be included here the initial value could never increase because
* of a circular dependency where slots only increase their limits when
* running xacts increases oldestRunningXid and running xacts only
* increases if slots do.
*/
CurrentRunningXacts->xcnt = count - subcount;
CurrentRunningXacts->subxcnt = subcount;
CurrentRunningXacts->subxid_overflow = suboverflowed;
@ -1777,13 +1845,12 @@ GetRunningTransactionData(void)
CurrentRunningXacts->oldestRunningXid = oldestRunningXid;
CurrentRunningXacts->latestCompletedXid = latestCompletedXid;
/* We don't release XidGenLock here, the caller is responsible for that */
LWLockRelease(ProcArrayLock);
Assert(TransactionIdIsValid(CurrentRunningXacts->nextXid));
Assert(TransactionIdIsValid(CurrentRunningXacts->oldestRunningXid));
Assert(TransactionIdIsNormal(CurrentRunningXacts->latestCompletedXid));
/* We don't release the locks here, the caller is responsible for that */
return CurrentRunningXacts;
}
@ -1852,6 +1919,92 @@ GetOldestActiveTransactionId(void)
return oldestRunningXid;
}
/*
* GetOldestSafeDecodingTransactionId -- lowest xid not affected by vacuum
*
* Returns the oldest xid that we can guarantee not to have been affected by
* vacuum, i.e. no rows >= that xid have been vacuumed away unless the
* transaction aborted. Note that the value can (and most of the time will) be
* much more conservative than what really has been affected by vacuum, but we
* currently don't have better data available.
*
* This is useful to initalize the cutoff xid after which a new changeset
* extraction replication slot can start decoding changes.
*
* Must be called with ProcArrayLock held either shared or exclusively,
* although most callers will want to use exclusive mode since it is expected
* that the caller will immediately use the xid to peg the xmin horizon.
*/
TransactionId
GetOldestSafeDecodingTransactionId(void)
{
ProcArrayStruct *arrayP = procArray;
TransactionId oldestSafeXid;
int index;
bool recovery_in_progress = RecoveryInProgress();
Assert(LWLockHeldByMe(ProcArrayLock));
/*
* Acquire XidGenLock, so no transactions can acquire an xid while we're
* running. If no transaction with xid were running concurrently a new xid
* could influence the the RecentXmin et al.
*
* We initialize the computation to nextXid since that's guaranteed to be
* a safe, albeit pessimal, value.
*/
LWLockAcquire(XidGenLock, LW_SHARED);
oldestSafeXid = ShmemVariableCache->nextXid;
/*
* If there's already a slot pegging the xmin horizon, we can start with
* that value, it's guaranteed to be safe since it's computed by this
* routine initally and has been enforced since.
*/
if (TransactionIdIsValid(procArray->replication_slot_catalog_xmin) &&
TransactionIdPrecedes(procArray->replication_slot_catalog_xmin,
oldestSafeXid))
oldestSafeXid = procArray->replication_slot_catalog_xmin;
/*
* If we're not in recovery, we walk over the procarray and collect the
* lowest xid. Since we're called with ProcArrayLock held and have
* acquired XidGenLock, no entries can vanish concurrently, since
* PGXACT->xid is only set with XidGenLock held and only cleared with
* ProcArrayLock held.
*
* In recovery we can't lower the safe value besides what we've computed
* above, so we'll have to wait a bit longer there. We unfortunately can
* *not* use KnownAssignedXidsGetOldestXmin() since the KnownAssignedXids
* machinery can miss values and return an older value than is safe.
*/
if (!recovery_in_progress)
{
/*
* Spin over procArray collecting all min(PGXACT->xid)
*/
for (index = 0; index < arrayP->numProcs; index++)
{
int pgprocno = arrayP->pgprocnos[index];
volatile PGXACT *pgxact = &allPgXact[pgprocno];
TransactionId xid;
/* Fetch xid just once - see GetNewTransactionId */
xid = pgxact->xid;
if (!TransactionIdIsNormal(xid))
continue;
if (TransactionIdPrecedes(xid, oldestSafeXid))
oldestSafeXid = xid;
}
}
LWLockRelease(XidGenLock);
return oldestSafeXid;
}
/*
* GetVirtualXIDsDelayingChkpt -- Get the VXIDs of transactions that are
* delaying checkpoint because they have critical actions in progress.
@ -2523,10 +2676,39 @@ CountOtherDBBackends(Oid databaseId, int *nbackends, int *nprepared)
* replicaton slots.
*/
void
ProcArraySetReplicationSlotXmin(TransactionId xmin)
ProcArraySetReplicationSlotXmin(TransactionId xmin, TransactionId catalog_xmin,
bool already_locked)
{
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
Assert(!already_locked || LWLockHeldByMe(ProcArrayLock));
if (!already_locked)
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
procArray->replication_slot_xmin = xmin;
procArray->replication_slot_catalog_xmin = catalog_xmin;
if (!already_locked)
LWLockRelease(ProcArrayLock);
}
/*
* ProcArrayGetReplicationSlotXmin
*
* Return the current slot xmin limits. That's useful to be able to remove
* data that's older than those limits.
*/
void
ProcArrayGetReplicationSlotXmin(TransactionId *xmin,
TransactionId *catalog_xmin)
{
LWLockAcquire(ProcArrayLock, LW_SHARED);
if (xmin != NULL)
*xmin = procArray->replication_slot_xmin;
if (catalog_xmin != NULL)
*catalog_xmin = procArray->replication_slot_catalog_xmin;
LWLockRelease(ProcArrayLock);
}

View File

@ -800,7 +800,9 @@ standby_redo(XLogRecPtr lsn, XLogRecord *record)
/*
* Log details of the current snapshot to WAL. This allows the snapshot state
* to be reconstructed on the standby.
* to be reconstructed on the standby and for logical decoding.
*
* This is used for Hot Standby as follows:
*
* We can move directly to STANDBY_SNAPSHOT_READY at startup if we
* start from a shutdown checkpoint because we know nothing was running
@ -854,6 +856,12 @@ standby_redo(XLogRecPtr lsn, XLogRecord *record)
* Zero xids should no longer be possible, but we may be replaying WAL
* from a time when they were possible.
*
* For logical decoding only the running xacts information is needed;
* there's no need to look at the locking information, but it's logged anyway,
* as there's no independent knob to just enable logical decoding. For
* details of how this is used, check snapbuild.c's introductory comment.
*
*
* Returns the RecPtr of the last inserted record.
*/
XLogRecPtr
@ -879,8 +887,28 @@ LogStandbySnapshot(void)
* record we write, because standby will open up when it sees this.
*/
running = GetRunningTransactionData();
/*
* GetRunningTransactionData() acquired ProcArrayLock, we must release
* it. For Hot Standby this can be done before inserting the WAL record
* because ProcArrayApplyRecoveryInfo() rechecks the commit status using
* the clog. For logical decoding, though, the lock can't be released
* early becuase the clog might be "in the future" from the POV of the
* historic snapshot. This would allow for situations where we're waiting
* for the end of a transaction listed in the xl_running_xacts record
* which, according to the WAL, have commit before the xl_running_xacts
* record. Fortunately this routine isn't executed frequently, and it's
* only a shared lock.
*/
if (wal_level < WAL_LEVEL_LOGICAL)
LWLockRelease(ProcArrayLock);
recptr = LogCurrentRunningXacts(running);
/* Release lock if we kept it longer ... */
if (wal_level >= WAL_LEVEL_LOGICAL)
LWLockRelease(ProcArrayLock);
/* GetRunningTransactionData() acquired XidGenLock, we must release it */
LWLockRelease(XidGenLock);

View File

@ -781,10 +781,6 @@ ProcKill(int code, Datum arg)
/* Make sure we're out of the sync rep lists */
SyncRepCleanupAtProcExit();
/* Make sure active replication slots are released */
if (MyReplicationSlot != NULL)
ReplicationSlotRelease();
#ifdef USE_ASSERT_CHECKING
if (assert_enabled)
{
@ -803,6 +799,10 @@ ProcKill(int code, Datum arg)
*/
LWLockReleaseAll();
/* Make sure active replication slots are released */
if (MyReplicationSlot != NULL)
ReplicationSlotRelease();
/*
* Clear MyProc first; then disown the process latch. This is so that
* signal handlers won't try to clear the process latch after it's no

View File

@ -55,6 +55,7 @@
#include "pg_getopt.h"
#include "postmaster/autovacuum.h"
#include "postmaster/postmaster.h"
#include "replication/slot.h"
#include "replication/walsender.h"
#include "rewrite/rewriteHandler.h"
#include "storage/bufmgr.h"
@ -3853,6 +3854,16 @@ PostgresMain(int argc, char *argv[],
if (am_walsender)
WalSndErrorCleanup();
/*
* We can't release replication slots inside AbortTransaction() as we
* need to be able to start and abort transactions while having a slot
* acquired. But we never need to hold them across top level errors,
* so releasing here is fine. There's another cleanup in ProcKill()
* ensuring we'll correctly cleanup on FATAL errors as well.
*/
if (MyReplicationSlot != NULL)
ReplicationSlotRelease();
/*
* Now return to normal top-level context and clear ErrorContext for
* next time.

View File

@ -512,7 +512,7 @@ RegisterSnapshotInvalidation(Oid dbId, Oid relId)
* Only the local caches are flushed; this does not transmit the message
* to other backends.
*/
static void
void
LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
{
if (msg->id >= 0)
@ -596,7 +596,7 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
* since that tells us we've lost some shared-inval messages and hence
* don't know what needs to be invalidated.
*/
static void
void
InvalidateSystemCaches(void)
{
int i;

View File

@ -73,6 +73,7 @@
#include "utils/memutils.h"
#include "utils/relmapper.h"
#include "utils/resowner_private.h"
#include "utils/snapmgr.h"
#include "utils/syscache.h"
#include "utils/tqual.h"
@ -235,7 +236,7 @@ static void formrdesc(const char *relationName, Oid relationReltype,
bool isshared, bool hasoids,
int natts, const FormData_pg_attribute *attrs);
static HeapTuple ScanPgRelation(Oid targetRelId, bool indexOK);
static HeapTuple ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_historic);
static Relation AllocateRelationDesc(Form_pg_class relp);
static void RelationParseRelOptions(Relation relation, HeapTuple tuple);
static void RelationBuildTupleDesc(Relation relation);
@ -274,12 +275,13 @@ static void unlink_initfile(const char *initfilename);
* and must eventually be freed with heap_freetuple.
*/
static HeapTuple
ScanPgRelation(Oid targetRelId, bool indexOK)
ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_historic)
{
HeapTuple pg_class_tuple;
Relation pg_class_desc;
SysScanDesc pg_class_scan;
ScanKeyData key[1];
Snapshot snapshot;
/*
* If something goes wrong during backend startup, we might find ourselves
@ -305,9 +307,20 @@ ScanPgRelation(Oid targetRelId, bool indexOK)
* scan by setting indexOK == false.
*/
pg_class_desc = heap_open(RelationRelationId, AccessShareLock);
/*
* The caller might need a tuple that's newer than the one the historic
* snapshot; currently the only case requiring to do so is looking up the
* relfilenode of non mapped system relations during decoding.
*/
if (force_non_historic)
snapshot = GetNonHistoricCatalogSnapshot(RelationRelationId);
else
snapshot = GetCatalogSnapshot(RelationRelationId);
pg_class_scan = systable_beginscan(pg_class_desc, ClassOidIndexId,
indexOK && criticalRelcachesBuilt,
NULL,
snapshot,
1, key);
pg_class_tuple = systable_getnext(pg_class_scan);
@ -836,7 +849,7 @@ RelationBuildDesc(Oid targetRelId, bool insertIt)
/*
* find the tuple in pg_class corresponding to the given relation id
*/
pg_class_tuple = ScanPgRelation(targetRelId, true);
pg_class_tuple = ScanPgRelation(targetRelId, true, false);
/*
* if no such tuple exists, return NULL
@ -989,8 +1002,42 @@ RelationInitPhysicalAddr(Relation relation)
relation->rd_node.dbNode = InvalidOid;
else
relation->rd_node.dbNode = MyDatabaseId;
if (relation->rd_rel->relfilenode)
{
/*
* Even if we are using a decoding snapshot that doesn't represent
* the current state of the catalog we need to make sure the
* filenode points to the current file since the older file will
* be gone (or truncated). The new file will still contain older
* rows so lookups in them will work correctly. This wouldn't work
* correctly if rewrites were allowed to change the schema in a
* noncompatible way, but those are prevented both on catalog
* tables and on user tables declared as additional catalog
* tables.
*/
if (HistoricSnapshotActive()
&& RelationIsAccessibleInLogicalDecoding(relation)
&& IsTransactionState())
{
HeapTuple phys_tuple;
Form_pg_class physrel;
phys_tuple = ScanPgRelation(RelationGetRelid(relation),
RelationGetRelid(relation) != ClassOidIndexId,
true);
if (!HeapTupleIsValid(phys_tuple))
elog(ERROR, "could not find pg_class entry for %u",
RelationGetRelid(relation));
physrel = (Form_pg_class) GETSTRUCT(phys_tuple);
relation->rd_rel->reltablespace = physrel->reltablespace;
relation->rd_rel->relfilenode = physrel->relfilenode;
heap_freetuple(phys_tuple);
}
relation->rd_node.relNode = relation->rd_rel->relfilenode;
}
else
{
/* Consult the relation mapper */
@ -1742,7 +1789,7 @@ RelationReloadIndexInfo(Relation relation)
* for pg_class_oid_index ...
*/
indexOK = (RelationGetRelid(relation) != ClassOidIndexId);
pg_class_tuple = ScanPgRelation(RelationGetRelid(relation), indexOK);
pg_class_tuple = ScanPgRelation(RelationGetRelid(relation), indexOK, false);
if (!HeapTupleIsValid(pg_class_tuple))
elog(ERROR, "could not find pg_class tuple for index %u",
RelationGetRelid(relation));

View File

@ -19,6 +19,10 @@
* have regd_count = 1 and are counted in RegisteredSnapshots, but are not
* tracked by any resource owner.
*
* The same is true for historic snapshots used during logical decoding,
* their lifetime is managed separately (as they life longer as one xact.c
* transaction).
*
* These arrangements let us reset MyPgXact->xmin when there are no snapshots
* referenced by this transaction. (One possible improvement would be to be
* able to advance Xmin when the snapshot with the earliest Xmin is no longer
@ -69,12 +73,13 @@
*/
static SnapshotData CurrentSnapshotData = {HeapTupleSatisfiesMVCC};
static SnapshotData SecondarySnapshotData = {HeapTupleSatisfiesMVCC};
static SnapshotData CatalogSnapshotData = {HeapTupleSatisfiesMVCC};
SnapshotData CatalogSnapshotData = {HeapTupleSatisfiesMVCC};
/* Pointers to valid snapshots */
static Snapshot CurrentSnapshot = NULL;
static Snapshot SecondarySnapshot = NULL;
static Snapshot CatalogSnapshot = NULL;
static Snapshot HistoricSnapshot = NULL;
/*
* Staleness detection for CatalogSnapshot.
@ -86,13 +91,18 @@ static bool CatalogSnapshotStale = true;
* for the convenience of TransactionIdIsInProgress: even in bootstrap
* mode, we don't want it to say that BootstrapTransactionId is in progress.
*
* RecentGlobalXmin is initialized to InvalidTransactionId, to ensure that no
* one tries to use a stale value. Readers should ensure that it has been set
* to something else before using it.
* RecentGlobalXmin and RecentGlobalDataXmin are initialized to
* InvalidTransactionId, to ensure that no one tries to use a stale
* value. Readers should ensure that it has been set to something else
* before using it.
*/
TransactionId TransactionXmin = FirstNormalTransactionId;
TransactionId RecentXmin = FirstNormalTransactionId;
TransactionId RecentGlobalXmin = InvalidTransactionId;
TransactionId RecentGlobalDataXmin = InvalidTransactionId;
/* (table, ctid) => (cmin, cmax) mapping during timetravel */
static HTAB *tuplecid_data = NULL;
/*
* Elements of the active snapshot stack.
@ -158,6 +168,18 @@ static void SnapshotResetXmin(void);
Snapshot
GetTransactionSnapshot(void)
{
/*
* Return historic snapshot if doing logical decoding. We'll never
* need a non-historic transaction snapshot in this (sub-)transaction, so
* there's no need to be careful to set one up for later calls to
* GetTransactionSnapshot().
*/
if (HistoricSnapshotActive())
{
Assert(!FirstSnapshotSet);
return HistoricSnapshot;
}
/* First call in transaction? */
if (!FirstSnapshotSet)
{
@ -214,6 +236,13 @@ GetTransactionSnapshot(void)
Snapshot
GetLatestSnapshot(void)
{
/*
* So far there are no cases requiring support for GetLatestSnapshot()
* during logical decoding, but it wouldn't be hard to add if
* required.
*/
Assert(!HistoricSnapshotActive());
/* If first call in transaction, go ahead and set the xact snapshot */
if (!FirstSnapshotSet)
return GetTransactionSnapshot();
@ -230,6 +259,26 @@ GetLatestSnapshot(void)
*/
Snapshot
GetCatalogSnapshot(Oid relid)
{
/*
* Return historic snapshot if we're doing logical decoding, but
* return a non-historic, snapshot if we temporarily are doing up2date
* lookups.
*/
if (HistoricSnapshotActive())
return HistoricSnapshot;
return GetNonHistoricCatalogSnapshot(relid);
}
/*
* GetNonHistoricCatalogSnapshot
* Get a snapshot that is sufficiently up-to-date for scan of the system
* catalog with the specified OID, even while historic snapshots are set
* up.
*/
Snapshot
GetNonHistoricCatalogSnapshot(Oid relid)
{
/*
* If the caller is trying to scan a relation that has no syscache,
@ -303,6 +352,7 @@ SetTransactionSnapshot(Snapshot sourcesnap, TransactionId sourcexid)
Assert(RegisteredSnapshots == 0);
Assert(FirstXactSnapshot == NULL);
Assert(HistoricSnapshotActive());
/*
* Even though we are not going to use the snapshot it computes, we must
@ -796,7 +846,7 @@ AtEOXact_Snapshot(bool isCommit)
* Returns the token (the file name) that can be used to import this
* snapshot.
*/
static char *
char *
ExportSnapshot(Snapshot snapshot)
{
TransactionId topXid;
@ -1258,3 +1308,45 @@ ThereAreNoPriorRegisteredSnapshots(void)
return false;
}
/*
* Setup a snapshot that replaces normal catalog snapshots that allows catalog
* access to behave just like it did at a certain point in the past.
*
* Needed for logical decoding.
*/
void
SetupHistoricSnapshot(Snapshot historic_snapshot, HTAB *tuplecids)
{
Assert(historic_snapshot != NULL);
/* setup the timetravel snapshot */
HistoricSnapshot = historic_snapshot;
/* setup (cmin, cmax) lookup hash */
tuplecid_data = tuplecids;
}
/*
* Make catalog snapshots behave normally again.
*/
void
TeardownHistoricSnapshot(bool is_error)
{
HistoricSnapshot = NULL;
tuplecid_data = NULL;
}
bool
HistoricSnapshotActive(void)
{
return HistoricSnapshot != NULL;
}
HTAB *
HistoricSnapshotGetTupleCids(void)
{
Assert(HistoricSnapshotActive());
return tuplecid_data;
}

View File

@ -62,6 +62,9 @@
#include "access/xact.h"
#include "storage/bufmgr.h"
#include "storage/procarray.h"
#include "utils/builtins.h"
#include "utils/combocid.h"
#include "utils/snapmgr.h"
#include "utils/tqual.h"
@ -73,7 +76,6 @@ SnapshotData SnapshotToastData = {HeapTupleSatisfiesToast};
/* local functions */
static bool XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot);
/*
* SetHintBits()
*
@ -1545,3 +1547,163 @@ HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple)
*/
return true;
}
/*
* check whether the transaciont id 'xid' in in the pre-sorted array 'xip'.
*/
static bool
TransactionIdInArray(TransactionId xid, TransactionId *xip, Size num)
{
return bsearch(&xid, xip, num,
sizeof(TransactionId), xidComparator) != NULL;
}
/*
* See the comments for HeapTupleSatisfiesMVCC for the semantics this function
* obeys.
*
* Only usable on tuples from catalog tables!
*
* We don't need to support HEAP_MOVED_(IN|OFF) for now because we only support
* reading catalog pages which couldn't have been created in an older version.
*
* We don't set any hint bits in here as it seems unlikely to be beneficial as
* those should already be set by normal access and it seems to be too
* dangerous to do so as the semantics of doing so during timetravel are more
* complicated than when dealing "only" with the present.
*/
bool
HeapTupleSatisfiesHistoricMVCC(HeapTuple htup, Snapshot snapshot,
Buffer buffer)
{
HeapTupleHeader tuple = htup->t_data;
TransactionId xmin = HeapTupleHeaderGetXmin(tuple);
TransactionId xmax = HeapTupleHeaderGetRawXmax(tuple);
Assert(ItemPointerIsValid(&htup->t_self));
Assert(htup->t_tableOid != InvalidOid);
/* inserting transaction aborted */
if (HeapTupleHeaderXminInvalid(tuple))
{
Assert(!TransactionIdDidCommit(xmin));
return false;
}
/* check if its one of our txids, toplevel is also in there */
else if (TransactionIdInArray(xmin, snapshot->subxip, snapshot->subxcnt))
{
bool resolved;
CommandId cmin = HeapTupleHeaderGetRawCommandId(tuple);
CommandId cmax = InvalidCommandId;
/*
* another transaction might have (tried to) delete this tuple or
* cmin/cmax was stored in a combocid. S we need to to lookup the
* actual values externally.
*/
resolved = ResolveCminCmaxDuringDecoding(HistoricSnapshotGetTupleCids(), snapshot,
htup, buffer,
&cmin, &cmax);
if (!resolved)
elog(ERROR, "could not resolve cmin/cmax of catalog tuple");
Assert(cmin != InvalidCommandId);
if (cmin >= snapshot->curcid)
return false; /* inserted after scan started */
/* fall through */
}
/* committed before our xmin horizon. Do a normal visibility check. */
else if (TransactionIdPrecedes(xmin, snapshot->xmin))
{
Assert(!(HeapTupleHeaderXminCommitted(tuple) &&
!TransactionIdDidCommit(xmin)));
/* check for hint bit first, consult clog afterwards */
if (!HeapTupleHeaderXminCommitted(tuple) &&
!TransactionIdDidCommit(xmin))
return false;
/* fall through */
}
/* beyond our xmax horizon, i.e. invisible */
else if (TransactionIdFollowsOrEquals(xmin, snapshot->xmax))
{
return false;
}
/* check if it's a committed transaction in [xmin, xmax) */
else if(TransactionIdInArray(xmin, snapshot->xip, snapshot->xcnt))
{
/* fall through */
}
/*
* none of the above, i.e. between [xmin, xmax) but hasn't
* committed. I.e. invisible.
*/
else
{
return false;
}
/* at this point we know xmin is visible, go on to check xmax */
/* xid invalid or aborted */
if (tuple->t_infomask & HEAP_XMAX_INVALID)
return true;
/* locked tuples are always visible */
else if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
return true;
/*
* We can see multis here if we're looking at user tables or if
* somebody SELECT ... FOR SHARE/UPDATE a system table.
*/
else if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
{
xmax = HeapTupleGetUpdateXid(tuple);
}
/* check if its one of our txids, toplevel is also in there */
if (TransactionIdInArray(xmax, snapshot->subxip, snapshot->subxcnt))
{
bool resolved;
CommandId cmin;
CommandId cmax = HeapTupleHeaderGetRawCommandId(tuple);
/* Lookup actual cmin/cmax values */
resolved = ResolveCminCmaxDuringDecoding(HistoricSnapshotGetTupleCids(), snapshot,
htup, buffer,
&cmin, &cmax);
if (!resolved)
elog(ERROR, "could not resolve combocid to cmax");
Assert(cmax != InvalidCommandId);
if (cmax >= snapshot->curcid)
return true; /* deleted after scan started */
else
return false; /* deleted before scan started */
}
/* below xmin horizon, normal transaction state is valid */
else if (TransactionIdPrecedes(xmax, snapshot->xmin))
{
Assert(!(tuple->t_infomask & HEAP_XMAX_COMMITTED &&
!TransactionIdDidCommit(xmax)));
/* check hint bit first */
if (tuple->t_infomask & HEAP_XMAX_COMMITTED)
return false;
/* check clog */
return !TransactionIdDidCommit(xmax);
}
/* above xmax horizon, we cannot possibly see the deleting transaction */
else if (TransactionIdFollowsOrEquals(xmax, snapshot->xmax))
return true;
/* xmax is between [xmin, xmax), check known committed array */
else if (TransactionIdInArray(xmax, snapshot->xip, snapshot->xcnt))
return false;
/* xmax is between [xmin, xmax), but known not to have committed yet */
else
return true;
}

View File

@ -198,7 +198,10 @@ static const char *subdirs[] = {
"pg_replslot",
"pg_tblspc",
"pg_stat",
"pg_stat_tmp"
"pg_stat_tmp",
"pg_llog",
"pg_llog/snapshots",
"pg_llog/mappings"
};

View File

@ -164,8 +164,7 @@ extern void heap_restrpos(HeapScanDesc scan);
extern void heap_sync(Relation relation);
/* in heap/pruneheap.c */
extern void heap_page_prune_opt(Relation relation, Buffer buffer,
TransactionId OldestXmin);
extern void heap_page_prune_opt(Relation relation, Buffer buffer);
extern int heap_page_prune(Relation relation, Buffer buffer,
TransactionId OldestXmin,
bool report_stats, TransactionId *latestRemovedXid);

View File

@ -48,7 +48,7 @@
* the ones above associated with RM_HEAP_ID. XLOG_HEAP_OPMASK applies to
* these, too.
*/
/* 0x00 is free, was XLOG_HEAP2_FREEZE */
#define XLOG_HEAP2_REWRITE 0x00
#define XLOG_HEAP2_CLEAN 0x10
#define XLOG_HEAP2_FREEZE_PAGE 0x20
#define XLOG_HEAP2_CLEANUP_INFO 0x30
@ -332,6 +332,17 @@ typedef struct xl_heap_new_cid
xl_heaptid target;
} xl_heap_new_cid;
/* logical rewrite xlog record header */
typedef struct xl_heap_rewrite_mapping
{
TransactionId mapped_xid; /* xid that might need to see the row */
Oid mapped_db; /* DbOid or InvalidOid for shared rels */
Oid mapped_rel; /* Oid of the mapped relation */
off_t offset; /* How far have we written so far */
uint32 num_mappings; /* Number of in-memory mappings */
XLogRecPtr start_lsn; /* Insert LSN at begin of rewrite */
} xl_heap_rewrite_mapping;
#define SizeOfHeapNewCid (offsetof(xl_heap_new_cid, target) + SizeOfHeapTid)
extern void HeapTupleHeaderAdvanceLatestRemovedXid(HeapTupleHeader tuple,
@ -341,6 +352,7 @@ extern void heap_redo(XLogRecPtr lsn, XLogRecord *rptr);
extern void heap_desc(StringInfo buf, uint8 xl_info, char *rec);
extern void heap2_redo(XLogRecPtr lsn, XLogRecord *rptr);
extern void heap2_desc(StringInfo buf, uint8 xl_info, char *rec);
extern void heap_xlog_logical_rewrite(XLogRecPtr lsn, XLogRecord *r);
extern XLogRecPtr log_heap_cleanup_info(RelFileNode rnode,
TransactionId latestRemovedXid);

View File

@ -14,12 +14,14 @@
#define REWRITE_HEAP_H
#include "access/htup.h"
#include "storage/itemptr.h"
#include "storage/relfilenode.h"
#include "utils/relcache.h"
/* struct definition is private to rewriteheap.c */
typedef struct RewriteStateData *RewriteState;
extern RewriteState begin_heap_rewrite(Relation NewHeap,
extern RewriteState begin_heap_rewrite(Relation OldHeap, Relation NewHeap,
TransactionId OldestXmin, TransactionId FreezeXid,
MultiXactId MultiXactCutoff, bool use_wal);
extern void end_heap_rewrite(RewriteState state);
@ -27,4 +29,29 @@ extern void rewrite_heap_tuple(RewriteState state, HeapTuple oldTuple,
HeapTuple newTuple);
extern bool rewrite_heap_dead_tuple(RewriteState state, HeapTuple oldTuple);
/*
* On-Disk data format for an individual logical rewrite mapping.
*/
typedef struct LogicalRewriteMappingData
{
RelFileNode old_node;
RelFileNode new_node;
ItemPointerData old_tid;
ItemPointerData new_tid;
} LogicalRewriteMappingData;
/* ---
* The filename consists out of the following, dash separated,
* components:
* 1) database oid or InvalidOid for shared relations
* 2) the oid of the relation
* 3) xid we are mapping for
* 4) upper 32bit of the LSN at which a rewrite started
* 5) lower 32bit of the LSN at which a rewrite started
* 6) xid of the xact performing the mapping
* ---
*/
#define LOGICAL_REWRITE_FORMAT "map-%x-%x-%X_%X-%x-%x"
void CheckPointLogicalRewriteHeap(void);
#endif /* REWRITE_HEAP_H */

View File

@ -63,6 +63,11 @@
(AssertMacro(TransactionIdIsNormal(id1) && TransactionIdIsNormal(id2)), \
(int32) ((id1) - (id2)) < 0)
/* compare two XIDs already known to be normal; this is a macro for speed */
#define NormalTransactionIdFollows(id1, id2) \
(AssertMacro(TransactionIdIsNormal(id1) && TransactionIdIsNormal(id2)), \
(int32) ((id1) - (id2)) > 0)
/* ----------
* Object ID (OID) zero is InvalidOid.
*

View File

@ -98,9 +98,34 @@
/* Size of an EXTERNAL datum that contains a standard TOAST pointer */
#define TOAST_POINTER_SIZE (VARHDRSZ_EXTERNAL + sizeof(struct varatt_external))
/* Size of an indirect datum that contains an indirect TOAST pointer */
/* Size of an indirect datum that contains a standard TOAST pointer */
#define INDIRECT_POINTER_SIZE (VARHDRSZ_EXTERNAL + sizeof(struct varatt_indirect))
/*
* Testing whether an externally-stored value is compressed now requires
* comparing extsize (the actual length of the external data) to rawsize
* (the original uncompressed datum's size). The latter includes VARHDRSZ
* overhead, the former doesn't. We never use compression unless it actually
* saves space, so we expect either equality or less-than.
*/
#define VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) \
((toast_pointer).va_extsize < (toast_pointer).va_rawsize - VARHDRSZ)
/*
* Macro to fetch the possibly-unaligned contents of an EXTERNAL datum
* into a local "struct varatt_external" toast pointer. This should be
* just a memcpy, but some versions of gcc seem to produce broken code
* that assumes the datum contents are aligned. Introducing an explicit
* intermediate "varattrib_1b_e *" variable seems to fix it.
*/
#define VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr) \
do { \
varattrib_1b_e *attre = (varattrib_1b_e *) (attr); \
Assert(VARATT_IS_EXTERNAL(attre)); \
Assert(VARSIZE_EXTERNAL(attre) == sizeof(toast_pointer) + VARHDRSZ_EXTERNAL); \
memcpy(&(toast_pointer), VARDATA_EXTERNAL(attre), sizeof(toast_pointer)); \
} while (0)
/* ----------
* toast_insert_or_update -
*

View File

@ -288,6 +288,7 @@ extern int XLogFileOpen(XLogSegNo segno);
extern XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std);
extern void CheckXLogRemoved(XLogSegNo segno, TimeLineID tli);
extern XLogSegNo XLogGetLastRemovedSegno(void);
extern void XLogSetAsyncXactLSN(XLogRecPtr record);
extern void XLogSetReplicationSlotMinimumLSN(XLogRecPtr lsn);

View File

@ -53,6 +53,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 201403031
#define CATALOG_VERSION_NO 201403032
#endif

View File

@ -4804,8 +4804,18 @@ DATA(insert OID = 3779 ( pg_create_physical_replication_slot PGNSP PGUID 12 1 0
DESCR("create a physical replication slot");
DATA(insert OID = 3780 ( pg_drop_replication_slot PGNSP PGUID 12 1 0 0 0 f f f f f f v 1 0 2278 "19" _null_ _null_ _null_ _null_ pg_drop_replication_slot _null_ _null_ _null_ ));
DESCR("drop a replication slot");
DATA(insert OID = 3781 ( pg_get_replication_slots PGNSP PGUID 12 1 10 0 0 f f f f f t s 0 0 2249 "" "{19,25,26,16,28,3220}" "{o,o,o,o,o,o}" "{slot_name,slot_type,datoid,active,xmin,restart_lsn}" _null_ pg_get_replication_slots _null_ _null_ _null_ ));
DATA(insert OID = 3781 ( pg_get_replication_slots PGNSP PGUID 12 1 10 0 0 f f f f f t s 0 0 2249 "" "{19,19,25,26,16,28,28,3220}" "{o,o,o,o,o,o,o,o}" "{slot_name,plugin,slot_type,datoid,active,xmin,catalog_xmin,restart_lsn}" _null_ pg_get_replication_slots _null_ _null_ _null_ ));
DESCR("information about replication slots currently in use");
DATA(insert OID = 3786 ( pg_create_logical_replication_slot PGNSP PGUID 12 1 0 0 0 f f f f f f v 2 0 2249 "19 19" "{19,19,25,3220}" "{i,i,o,o}" "{slotname,plugin,slotname,xlog_position}" _null_ pg_create_logical_replication_slot _null_ _null_ _null_ ));
DESCR("set up a logical replication slot");
DATA(insert OID = 3782 ( pg_logical_slot_get_changes PGNSP PGUID 12 1000 1000 25 0 f f f f f t v 4 0 2249 "19 3220 23 1009" "{19,3220,23,1009,3220,28,25}" "{i,i,i,v,o,o,o}" "{slotname,upto_lsn,upto_nchanges,options,location,xid,data}" _null_ pg_logical_slot_get_changes _null_ _null_ _null_ ));
DESCR("get changes from replication slot");
DATA(insert OID = 3783 ( pg_logical_slot_get_binary_changes PGNSP PGUID 12 1000 1000 25 0 f f f f f t v 4 0 2249 "19 3220 23 1009" "{19,3220,23,1009,3220,28,17}" "{i,i,i,v,o,o,o}" "{slotname,upto_lsn,upto_nchanges,options,location,xid,data}" _null_ pg_logical_slot_get_binary_changes _null_ _null_ _null_ ));
DESCR("get binary changes from replication slot");
DATA(insert OID = 3784 ( pg_logical_slot_peek_changes PGNSP PGUID 12 1000 1000 25 0 f f f f f t v 4 0 2249 "19 3220 23 1009" "{19,3220,23,1009,3220,28,25}" "{i,i,i,v,o,o,o}" "{slotname,upto_lsn,upto_nchanges,options,location,xid,data}" _null_ pg_logical_slot_peek_changes _null_ _null_ _null_ ));
DESCR("peek at changes from replication slot");
DATA(insert OID = 3785 ( pg_logical_slot_peek_binary_changes PGNSP PGUID 12 1000 1000 25 0 f f f f f t v 4 0 2249 "19 3220 23 1009" "{19,3220,23,1009,3220,28,17}" "{i,i,i,v,o,o,o}" "{slotname,upto_lsn,upto_nchanges,options,location,xid,data}" _null_ pg_logical_slot_peek_binary_changes _null_ _null_ _null_ ));
DESCR("peek at binary changes from replication slot");
/* event triggers */
DATA(insert OID = 3566 ( pg_event_trigger_dropped_objects PGNSP PGUID 12 10 100 0 0 f f f f t t s 0 0 2249 "" "{26,26,23,25,25,25,25}" "{o,o,o,o,o,o,o}" "{classid, objid, objsubid, object_type, schema_name, object_name, object_identity}" _null_ pg_event_trigger_dropped_objects _null_ _null_ _null_ ));

View File

@ -157,10 +157,10 @@ extern void vac_update_relstats(Relation relation,
bool hasindex,
TransactionId frozenxid,
MultiXactId minmulti);
extern void vacuum_set_xid_limits(int freeze_min_age, int freeze_table_age,
extern void vacuum_set_xid_limits(Relation rel,
int freeze_min_age, int freeze_table_age,
int multixact_freeze_min_age,
int multixact_freeze_table_age,
bool sharedRel,
TransactionId *oldestXmin,
TransactionId *freezeLimit,
TransactionId *xidFullScanLimit,

View File

@ -0,0 +1,19 @@
/*-------------------------------------------------------------------------
* decode.h
* PostgreSQL WAL to logical transformation
*
* Portions Copyright (c) 2012-2014, PostgreSQL Global Development Group
*
*-------------------------------------------------------------------------
*/
#ifndef DECODE_H
#define DECODE_H
#include "access/xlogreader.h"
#include "replication/reorderbuffer.h"
#include "replication/logical.h"
void LogicalDecodingProcessRecord(LogicalDecodingContext *ctx,
XLogRecord *record);
#endif

View File

@ -0,0 +1,100 @@
/*-------------------------------------------------------------------------
* logical.h
* PostgreSQL logical decoding coordination
*
* Copyright (c) 2012-2014, PostgreSQL Global Development Group
*
*-------------------------------------------------------------------------
*/
#ifndef LOGICAL_H
#define LOGICAL_H
#include "replication/slot.h"
#include "access/xlog.h"
#include "access/xlogreader.h"
#include "replication/output_plugin.h"
struct LogicalDecodingContext;
typedef void (*LogicalOutputPluginWriterWrite) (
struct LogicalDecodingContext *lr,
XLogRecPtr Ptr,
TransactionId xid,
bool last_write
);
typedef LogicalOutputPluginWriterWrite LogicalOutputPluginWriterPrepareWrite;
typedef struct LogicalDecodingContext
{
/* memory context this is all allocated in */
MemoryContext context;
/* infrastructure pieces */
XLogReaderState *reader;
ReplicationSlot *slot;
struct ReorderBuffer *reorder;
struct SnapBuild *snapshot_builder;
OutputPluginCallbacks callbacks;
OutputPluginOptions options;
/*
* User specified options
*/
List *output_plugin_options;
/*
* User-Provided callback for writing/streaming out data.
*/
LogicalOutputPluginWriterPrepareWrite prepare_write;
LogicalOutputPluginWriterWrite write;
/*
* Output buffer.
*/
StringInfo out;
/*
* Private data pointer of the output plugin.
*/
void *output_plugin_private;
/*
* Private data pointer for the data writer.
*/
void *output_writer_private;
/*
* State for writing output.
*/
bool accept_writes;
bool prepared_write;
XLogRecPtr write_location;
TransactionId write_xid;
} LogicalDecodingContext;
extern void CheckLogicalDecodingRequirements(void);
extern LogicalDecodingContext *CreateInitDecodingContext(char *plugin,
List *output_plugin_options,
XLogPageReadCB read_page,
LogicalOutputPluginWriterPrepareWrite prepare_write,
LogicalOutputPluginWriterWrite do_write);
extern LogicalDecodingContext *CreateDecodingContext(
XLogRecPtr start_lsn,
List *output_plugin_options,
XLogPageReadCB read_page,
LogicalOutputPluginWriterPrepareWrite prepare_write,
LogicalOutputPluginWriterWrite do_write);
extern void DecodingContextFindStartpoint(LogicalDecodingContext *ctx);
extern bool DecodingContextReady(LogicalDecodingContext *ctx);
extern void FreeDecodingContext(LogicalDecodingContext *ctx);
extern void LogicalIncreaseXminForSlot(XLogRecPtr lsn, TransactionId xmin);
extern void LogicalIncreaseRestartDecodingForSlot(XLogRecPtr current_lsn,
XLogRecPtr restart_lsn);
extern void LogicalConfirmReceivedLocation(XLogRecPtr lsn);
#endif

View File

@ -0,0 +1,24 @@
/*-------------------------------------------------------------------------
* logicalfuncs.h
* PostgreSQL WAL to logical transformation support functions
*
* Copyright (c) 2012-2014, PostgreSQL Global Development Group
*
*-------------------------------------------------------------------------
*/
#ifndef LOGICALFUNCS_H
#define LOGICALFUNCS_H
#include "replication/logical.h"
extern int logical_read_local_xlog_page(XLogReaderState *state,
XLogRecPtr targetPagePtr,
int reqLen, XLogRecPtr targetRecPtr,
char *cur_page, TimeLineID *pageTLI);
extern Datum pg_logical_slot_get_changes(PG_FUNCTION_ARGS);
extern Datum pg_logical_slot_get_binary_changes(PG_FUNCTION_ARGS);
extern Datum pg_logical_slot_peek_changes(PG_FUNCTION_ARGS);
extern Datum pg_logical_slot_peek_binary_changes(PG_FUNCTION_ARGS);
#endif

View File

@ -0,0 +1,98 @@
/*-------------------------------------------------------------------------
* output_plugin.h
* PostgreSQL Logical Decode Plugin Interface
*
* Copyright (c) 2012-2014, PostgreSQL Global Development Group
*
*-------------------------------------------------------------------------
*/
#ifndef OUTPUT_PLUGIN_H
#define OUTPUT_PLUGIN_H
#include "replication/reorderbuffer.h"
struct LogicalDecodingContext;
struct OutputPluginCallbacks;
typedef enum OutputPluginOutputType
{
OUTPUT_PLUGIN_BINARY_OUTPUT,
OUTPUT_PLUGIN_TEXTUAL_OUTPUT
} OutputPluginOutputType;
/*
* Options set by the output plugin, in the startup callback.
*/
typedef struct OutputPluginOptions
{
OutputPluginOutputType output_type;
} OutputPluginOptions;
/*
* Type of the shared library symbol _PG_output_plugin_init that is looked up
* when loading an output plugin shared library.
*/
typedef void (*LogicalOutputPluginInit)(struct OutputPluginCallbacks *cb);
/*
* Callback that gets called in a user-defined plugin. ctx->private_data can
* be set to some private data.
*
* "is_init" will be set to "true" if the decoding slot just got defined. When
* the same slot is used from there one, it will be "false".
*/
typedef void (*LogicalDecodeStartupCB) (
struct LogicalDecodingContext *ctx,
OutputPluginOptions *options,
bool is_init
);
/*
* Callback called for every (explicit or implicit) BEGIN of a successful
* transaction.
*/
typedef void (*LogicalDecodeBeginCB) (
struct LogicalDecodingContext *,
ReorderBufferTXN *txn);
/*
* Callback for every individual change in a successful transaction.
*/
typedef void (*LogicalDecodeChangeCB) (
struct LogicalDecodingContext *,
ReorderBufferTXN *txn,
Relation relation,
ReorderBufferChange *change
);
/*
* Called for every (explicit or implicit) COMMIT of a successful transaction.
*/
typedef void (*LogicalDecodeCommitCB) (
struct LogicalDecodingContext *,
ReorderBufferTXN *txn,
XLogRecPtr commit_lsn);
/*
* Called to shutdown an output plugin.
*/
typedef void (*LogicalDecodeShutdownCB) (
struct LogicalDecodingContext *
);
/*
* Output plugin callbacks
*/
typedef struct OutputPluginCallbacks
{
LogicalDecodeStartupCB startup_cb;
LogicalDecodeBeginCB begin_cb;
LogicalDecodeChangeCB change_cb;
LogicalDecodeCommitCB commit_cb;
LogicalDecodeShutdownCB shutdown_cb;
} OutputPluginCallbacks;
void OutputPluginPrepareWrite(struct LogicalDecodingContext *ctx, bool last_write);
void OutputPluginWrite(struct LogicalDecodingContext *ctx, bool last_write);
#endif /* OUTPUT_PLUGIN_H */

View File

@ -0,0 +1,351 @@
/*
* reorderbuffer.h
* PostgreSQL logical replay/reorder buffer management.
*
* Copyright (c) 2012-2014, PostgreSQL Global Development Group
*
* src/include/replication/reorderbuffer.h
*/
#ifndef REORDERBUFFER_H
#define REORDERBUFFER_H
#include "access/htup_details.h"
#include "lib/ilist.h"
#include "storage/sinval.h"
#include "utils/hsearch.h"
#include "utils/rel.h"
#include "utils/snapshot.h"
#include "utils/timestamp.h"
/* an individual tuple, stored in one chunk of memory */
typedef struct ReorderBufferTupleBuf
{
/* position in preallocated list */
slist_node node;
/* tuple, stored sequentially */
HeapTupleData tuple;
HeapTupleHeaderData header;
char data[MaxHeapTupleSize];
} ReorderBufferTupleBuf;
/* types of the change passed to a 'change' callback */
enum ReorderBufferChangeType
{
REORDER_BUFFER_CHANGE_INSERT,
REORDER_BUFFER_CHANGE_UPDATE,
REORDER_BUFFER_CHANGE_DELETE
};
/*
* a single 'change', can be an insert (with one tuple), an update (old, new),
* or a delete (old).
*
* The same struct is also used internally for other purposes but that should
* never be visible outside reorderbuffer.c.
*/
typedef struct ReorderBufferChange
{
XLogRecPtr lsn;
/* type of change */
union
{
enum ReorderBufferChangeType action;
/* do not leak internal enum values to the outside */
int action_internal;
};
/*
* Context data for the change, which part of the union is valid depends
* on action/action_internal.
*/
union
{
/* old, new tuples when action == *_INSERT|UPDATE|DELETE */
struct
{
/* relation that has been changed */
RelFileNode relnode;
/* valid for DELETE || UPDATE */
ReorderBufferTupleBuf *oldtuple;
/* valid for INSERT || UPDATE */
ReorderBufferTupleBuf *newtuple;
} tp;
/* new snapshot */
Snapshot snapshot;
/* new command id for existing snapshot in a catalog changing tx */
CommandId command_id;
/* new cid mapping for catalog changing transaction */
struct
{
RelFileNode node;
ItemPointerData tid;
CommandId cmin;
CommandId cmax;
CommandId combocid;
} tuplecid;
};
/*
* While in use this is how a change is linked into a transactions,
* otherwise it's the preallocated list.
*/
dlist_node node;
} ReorderBufferChange;
typedef struct ReorderBufferTXN
{
/*
* The transactions transaction id, can be a toplevel or sub xid.
*/
TransactionId xid;
/* did the TX have catalog changes */
bool has_catalog_changes;
/*
* Do we know this is a subxact?
*/
bool is_known_as_subxact;
/*
* LSN of the first data carrying, WAL record with knowledge about this
* xid. This is allowed to *not* be first record adorned with this xid, if
* the previous records aren't relevant for logical decoding.
*/
XLogRecPtr first_lsn;
/* ----
* LSN of the record that lead to this xact to be committed or
* aborted. This can be a
* * plain commit record
* * plain commit record, of a parent transaction
* * prepared transaction commit
* * plain abort record
* * prepared transaction abort
* * error during decoding
* ----
*/
XLogRecPtr final_lsn;
/*
* LSN pointing to the end of the commit record + 1.
*/
XLogRecPtr end_lsn;
/*
* LSN of the last lsn at which snapshot information reside, so we can
* restart decoding from there and fully recover this transaction from
* WAL.
*/
XLogRecPtr restart_decoding_lsn;
/*
* Commit time, only known when we read the actual commit record.
*/
TimestampTz commit_time;
/*
* Base snapshot or NULL.
*/
Snapshot base_snapshot;
XLogRecPtr base_snapshot_lsn;
/*
* How many ReorderBufferChange's do we have in this txn.
*
* Changes in subtransactions are *not* included but tracked separately.
*/
uint64 nentries;
/*
* How many of the above entries are stored in memory in contrast to being
* spilled to disk.
*/
uint64 nentries_mem;
/*
* List of ReorderBufferChange structs, including new Snapshots and new
* CommandIds
*/
dlist_head changes;
/*
* List of (relation, ctid) => (cmin, cmax) mappings for catalog tuples.
* Those are always assigned to the toplevel transaction. (Keep track of
* #entries to create a hash of the right size)
*/
dlist_head tuplecids;
uint64 ntuplecids;
/*
* On-demand built hash for looking up the above values.
*/
HTAB *tuplecid_hash;
/*
* Hash containing (potentially partial) toast entries. NULL if no toast
* tuples have been found for the current change.
*/
HTAB *toast_hash;
/*
* non-hierarchical list of subtransactions that are *not* aborted. Only
* used in toplevel transactions.
*/
dlist_head subtxns;
uint32 nsubtxns;
/*
* Stored cache invalidations. This is not a linked list because we get
* all the invalidations at once.
*/
uint32 ninvalidations;
SharedInvalidationMessage *invalidations;
/* ---
* Position in one of three lists:
* * list of subtransactions if we are *known* to be subxact
* * list of toplevel xacts (can be a as-yet unknown subxact)
* * list of preallocated ReorderBufferTXNs
* ---
*/
dlist_node node;
} ReorderBufferTXN;
/* so we can define the callbacks used inside struct ReorderBuffer itself */
typedef struct ReorderBuffer ReorderBuffer;
/* change callback signature */
typedef void (*ReorderBufferApplyChangeCB) (
ReorderBuffer *rb,
ReorderBufferTXN *txn,
Relation relation,
ReorderBufferChange *change);
/* begin callback signature */
typedef void (*ReorderBufferBeginCB) (
ReorderBuffer *rb,
ReorderBufferTXN *txn);
/* commit callback signature */
typedef void (*ReorderBufferCommitCB) (
ReorderBuffer *rb,
ReorderBufferTXN *txn,
XLogRecPtr commit_lsn);
struct ReorderBuffer
{
/*
* xid => ReorderBufferTXN lookup table
*/
HTAB *by_txn;
/*
* Transactions that could be a toplevel xact, ordered by LSN of the first
* record bearing that xid..
*/
dlist_head toplevel_by_lsn;
/*
* one-entry sized cache for by_txn. Very frequently the same txn gets
* looked up over and over again.
*/
TransactionId by_txn_last_xid;
ReorderBufferTXN *by_txn_last_txn;
/*
* Callacks to be called when a transactions commits.
*/
ReorderBufferBeginCB begin;
ReorderBufferApplyChangeCB apply_change;
ReorderBufferCommitCB commit;
/*
* Pointer that will be passed untouched to the callbacks.
*/
void *private_data;
/*
* Private memory context.
*/
MemoryContext context;
/*
* Data structure slab cache.
*
* We allocate/deallocate some structures very frequently, to avoid bigger
* overhead we cache some unused ones here.
*
* The maximum number of cached entries is controlled by const variables
* ontop of reorderbuffer.c
*/
/* cached ReorderBufferTXNs */
dlist_head cached_transactions;
Size nr_cached_transactions;
/* cached ReorderBufferChanges */
dlist_head cached_changes;
Size nr_cached_changes;
/* cached ReorderBufferTupleBufs */
slist_head cached_tuplebufs;
Size nr_cached_tuplebufs;
XLogRecPtr current_restart_decoding_lsn;
/* buffer for disk<->memory conversions */
char *outbuf;
Size outbufsize;
};
ReorderBuffer *ReorderBufferAllocate(void);
void ReorderBufferFree(ReorderBuffer *);
ReorderBufferTupleBuf *ReorderBufferGetTupleBuf(ReorderBuffer *);
void ReorderBufferReturnTupleBuf(ReorderBuffer *, ReorderBufferTupleBuf *tuple);
ReorderBufferChange *ReorderBufferGetChange(ReorderBuffer *);
void ReorderBufferReturnChange(ReorderBuffer *, ReorderBufferChange *);
void ReorderBufferQueueChange(ReorderBuffer *, TransactionId, XLogRecPtr lsn, ReorderBufferChange *);
void ReorderBufferCommit(ReorderBuffer *, TransactionId,
XLogRecPtr commit_lsn, XLogRecPtr end_lsn,
TimestampTz commit_time);
void ReorderBufferAssignChild(ReorderBuffer *, TransactionId, TransactionId, XLogRecPtr commit_lsn);
void ReorderBufferCommitChild(ReorderBuffer *, TransactionId, TransactionId,
XLogRecPtr commit_lsn, XLogRecPtr end_lsn);
void ReorderBufferAbort(ReorderBuffer *, TransactionId, XLogRecPtr lsn);
void ReorderBufferAbortOld(ReorderBuffer *, TransactionId xid);
void ReorderBufferForget(ReorderBuffer *, TransactionId, XLogRecPtr lsn);
void ReorderBufferSetBaseSnapshot(ReorderBuffer *, TransactionId, XLogRecPtr lsn, struct SnapshotData *snap);
void ReorderBufferAddSnapshot(ReorderBuffer *, TransactionId, XLogRecPtr lsn, struct SnapshotData *snap);
void ReorderBufferAddNewCommandId(ReorderBuffer *, TransactionId, XLogRecPtr lsn,
CommandId cid);
void ReorderBufferAddNewTupleCids(ReorderBuffer *, TransactionId, XLogRecPtr lsn,
RelFileNode node, ItemPointerData pt,
CommandId cmin, CommandId cmax, CommandId combocid);
void ReorderBufferAddInvalidations(ReorderBuffer *, TransactionId, XLogRecPtr lsn,
Size nmsgs, SharedInvalidationMessage *msgs);
bool ReorderBufferIsXidKnown(ReorderBuffer *, TransactionId xid);
void ReorderBufferXidSetCatalogChanges(ReorderBuffer *, TransactionId xid, XLogRecPtr lsn);
bool ReorderBufferXidHasCatalogChanges(ReorderBuffer *, TransactionId xid);
bool ReorderBufferXidHasBaseSnapshot(ReorderBuffer *, TransactionId xid);
ReorderBufferTXN *ReorderBufferGetOldestTXN(ReorderBuffer *);
void ReorderBufferSetRestartPoint(ReorderBuffer *, XLogRecPtr ptr);
void StartupReorderBuffer(void);
#endif

View File

@ -16,6 +16,24 @@
#include "storage/shmem.h"
#include "storage/spin.h"
/*
* Behaviour of replication slots, upon release or crash.
*
* Slots marked as PERSISTENT are crashsafe and will not be dropped when
* released. Slots marked as EPHEMERAL will be dropped when released or after
* restarts.
*
* EPHEMERAL slots can be made PERSISTENT by calling ReplicationSlotPersist().
*/
typedef enum ReplicationSlotPersistency
{
RS_PERSISTENT,
RS_EPHEMERAL
} ReplicationSlotPersistency;
/*
* On-Disk data of a replication slot, preserved across restarts.
*/
typedef struct ReplicationSlotPersistentData
{
/* The slot's identifier */
@ -24,6 +42,11 @@ typedef struct ReplicationSlotPersistentData
/* database the slot is active on */
Oid database;
/*
* The slot's behaviour when being dropped (or restored after a crash).
*/
ReplicationSlotPersistency persistency;
/*
* xmin horizon for data
*
@ -32,9 +55,22 @@ typedef struct ReplicationSlotPersistentData
*/
TransactionId xmin;
/*
* xmin horizon for catalog tuples
*
* NB: This may represent a value that hasn't been written to disk yet;
* see notes for effective_xmin, below.
*/
TransactionId catalog_xmin;
/* oldest LSN that might be required by this replication slot */
XLogRecPtr restart_lsn;
/* oldest LSN that the client has acked receipt for */
XLogRecPtr confirmed_flush;
/* plugin name */
NameData plugin;
} ReplicationSlotPersistentData;
/*
@ -67,12 +103,26 @@ typedef struct ReplicationSlot
* same as the persistent value (data.xmin).
*/
TransactionId effective_xmin;
TransactionId effective_catalog_xmin;
/* data surviving shutdowns and crashes */
ReplicationSlotPersistentData data;
/* is somebody performing io on this slot? */
LWLock *io_in_progress_lock;
/* all the remaining data is only used for logical slots */
/* ----
* When the client has confirmed flushes >= candidate_xmin_lsn we can
* advance the catalog xmin, when restart_valid has been passed,
* restart_lsn can be increased.
* ----
*/
TransactionId candidate_catalog_xmin;
XLogRecPtr candidate_xmin_lsn;
XLogRecPtr candidate_restart_valid;
XLogRecPtr candidate_restart_lsn;
} ReplicationSlot;
/*
@ -97,8 +147,11 @@ extern Size ReplicationSlotsShmemSize(void);
extern void ReplicationSlotsShmemInit(void);
/* management of individual slots */
extern void ReplicationSlotCreate(const char *name, bool db_specific);
extern void ReplicationSlotCreate(const char *name, bool db_specific,
ReplicationSlotPersistency p);
extern void ReplicationSlotPersist(void);
extern void ReplicationSlotDrop(const char *name);
extern void ReplicationSlotAcquire(const char *name);
extern void ReplicationSlotRelease(void);
extern void ReplicationSlotSave(void);
@ -106,15 +159,20 @@ extern void ReplicationSlotMarkDirty(void);
/* misc stuff */
extern bool ReplicationSlotValidateName(const char *name, int elevel);
extern void ReplicationSlotsComputeRequiredXmin(void);
extern void ReplicationSlotsComputeRequiredXmin(bool already_locked);
extern void ReplicationSlotsComputeRequiredLSN(void);
extern XLogRecPtr ReplicationSlotsComputeLogicalRestartLSN(void);
extern bool ReplicationSlotsCountDBSlots(Oid dboid, int *nslots, int *nactive);
extern void StartupReplicationSlots(XLogRecPtr checkPointRedo);
extern void CheckPointReplicationSlots(void);
extern void CheckSlotRequirements(void);
extern void ReplicationSlotAtProcExit(void);
/* SQL callable functions */
extern Datum pg_create_physical_replication_slot(PG_FUNCTION_ARGS);
extern Datum pg_create_logical_replication_slot(PG_FUNCTION_ARGS);
extern Datum pg_drop_replication_slot(PG_FUNCTION_ARGS);
extern Datum pg_get_replication_slots(PG_FUNCTION_ARGS);
#endif /* SLOT_H */

View File

@ -0,0 +1,83 @@
/*-------------------------------------------------------------------------
*
* snapbuild.h
* Exports from replication/logical/snapbuild.c.
*
* Copyright (c) 2012-2014, PostgreSQL Global Development Group
*
* src/include/replication/snapbuild.h
*
*-------------------------------------------------------------------------
*/
#ifndef SNAPBUILD_H
#define SNAPBUILD_H
#include "access/xlogdefs.h"
#include "utils/snapmgr.h"
typedef enum
{
/*
* Initial state, we can't do much yet.
*/
SNAPBUILD_START,
/*
* We have collected enough information to decode tuples in transactions
* that started after this.
*
* Once we reached this we start to collect changes. We cannot apply them
* yet because the might be based on transactions that were still running
* when we reached them yet.
*/
SNAPBUILD_FULL_SNAPSHOT,
/*
* Found a point after hitting built_full_snapshot where all transactions
* that were running at that point finished. Till we reach that we hold
* off calling any commit callbacks.
*/
SNAPBUILD_CONSISTENT
} SnapBuildState;
/* forward declare so we don't have to expose the struct to the public */
struct SnapBuild;
typedef struct SnapBuild SnapBuild;
/* forward declare so we don't have to include reorderbuffer.h */
struct ReorderBuffer;
/* forward declare so we don't have to include heapam_xlog.h */
struct xl_heap_new_cid;
struct xl_running_xacts;
extern void CheckPointSnapBuild(void);
extern SnapBuild *AllocateSnapshotBuilder(struct ReorderBuffer *cache,
TransactionId xmin_horizon, XLogRecPtr start_lsn);
extern void FreeSnapshotBuilder(SnapBuild *cache);
extern void SnapBuildSnapDecRefcount(Snapshot snap);
extern const char *SnapBuildExportSnapshot(SnapBuild *snapstate);
extern void SnapBuildClearExportedSnapshot(void);
extern SnapBuildState SnapBuildCurrentState(SnapBuild *snapstate);
extern bool SnapBuildXactNeedsSkip(SnapBuild *snapstate, XLogRecPtr ptr);
extern void SnapBuildCommitTxn(SnapBuild *builder, XLogRecPtr lsn,
TransactionId xid, int nsubxacts,
TransactionId *subxacts);
extern void SnapBuildAbortTxn(SnapBuild *builder, XLogRecPtr lsn,
TransactionId xid, int nsubxacts,
TransactionId *subxacts);
extern bool SnapBuildProcessChange(SnapBuild *builder, TransactionId xid,
XLogRecPtr lsn);
extern void SnapBuildProcessNewCid(SnapBuild *builder, TransactionId xid,
XLogRecPtr lsn, struct xl_heap_new_cid *cid);
extern void SnapBuildProcessRunningXacts(SnapBuild *builder, XLogRecPtr lsn,
struct xl_running_xacts *running);
extern void SnapBuildSerializationPoint(SnapBuild *builder, XLogRecPtr lsn);
#endif /* SNAPBUILD_H */

View File

@ -116,6 +116,9 @@ typedef ItemPointerData *ItemPointer;
/*
* ItemPointerCopy
* Copies the contents of one disk item pointer to another.
*
* Should there ever be padding in an ItemPointer this would need to be handled
* differently as it's used as hash key.
*/
#define ItemPointerCopy(fromPointer, toPointer) \
( \

View File

@ -41,10 +41,12 @@ struct XidCache
#define PROC_IS_AUTOVACUUM 0x01 /* is it an autovac worker? */
#define PROC_IN_VACUUM 0x02 /* currently running lazy vacuum */
#define PROC_IN_ANALYZE 0x04 /* currently running analyze */
#define PROC_VACUUM_FOR_WRAPAROUND 0x08 /* set by autovac only */
#define PROC_VACUUM_FOR_WRAPAROUND 0x08 /* set by autovac only */
#define PROC_IN_LOGICAL_DECODING 0x10 /* currently doing logical decoding */
/* flags reset at EOXact */
#define PROC_VACUUM_STATE_MASK (0x0E)
#define PROC_VACUUM_STATE_MASK \
(PROC_IN_VACUUM | PROC_IN_ANALYZE | PROC_VACUUM_FOR_WRAPAROUND)
/*
* We allow a small number of "weak" relation locks (AccesShareLock,

View File

@ -15,6 +15,7 @@
#define PROCARRAY_H
#include "storage/standby.h"
#include "utils/relcache.h"
#include "utils/snapshot.h"
@ -50,8 +51,9 @@ extern RunningTransactions GetRunningTransactionData(void);
extern bool TransactionIdIsInProgress(TransactionId xid);
extern bool TransactionIdIsActive(TransactionId xid);
extern TransactionId GetOldestXmin(bool allDbs, bool ignoreVacuum);
extern TransactionId GetOldestXmin(Relation rel, bool ignoreVacuum);
extern TransactionId GetOldestActiveTransactionId(void);
extern TransactionId GetOldestSafeDecodingTransactionId(void);
extern VirtualTransactionId *GetVirtualXIDsDelayingChkpt(int *nvxids);
extern bool HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids);
@ -77,6 +79,10 @@ extern void XidCacheRemoveRunningXids(TransactionId xid,
int nxids, const TransactionId *xids,
TransactionId latestXid);
extern void ProcArraySetReplicationSlotXmin(TransactionId xmin);
extern void ProcArraySetReplicationSlotXmin(TransactionId xmin,
TransactionId catalog_xmin, bool already_locked);
extern void ProcArrayGetReplicationSlotXmin(TransactionId *xmin,
TransactionId *catalog_xmin);
#endif /* PROCARRAY_H */

View File

@ -147,4 +147,6 @@ extern void ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs
int nmsgs, bool RelcacheInitFileInval,
Oid dbid, Oid tsid);
extern void LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg);
#endif /* SINVAL_H */

View File

@ -64,4 +64,5 @@ extern void CacheRegisterRelcacheCallback(RelcacheCallbackFunction func,
extern void CallSyscacheCallbacks(int cacheid, uint32 hashvalue);
extern void InvalidateSystemCaches(void);
#endif /* INVAL_H */

View File

@ -23,12 +23,14 @@ extern bool FirstSnapshotSet;
extern TransactionId TransactionXmin;
extern TransactionId RecentXmin;
extern TransactionId RecentGlobalXmin;
extern TransactionId RecentGlobalDataXmin;
extern Snapshot GetTransactionSnapshot(void);
extern Snapshot GetLatestSnapshot(void);
extern void SnapshotSetCommandId(CommandId curcid);
extern Snapshot GetCatalogSnapshot(Oid relid);
extern Snapshot GetNonHistoricCatalogSnapshot(Oid relid);
extern void InvalidateCatalogSnapshot(void);
extern void PushActiveSnapshot(Snapshot snapshot);
@ -53,4 +55,13 @@ extern bool XactHasExportedSnapshots(void);
extern void DeleteAllExportedSnapshotFiles(void);
extern bool ThereAreNoPriorRegisteredSnapshots(void);
extern char *ExportSnapshot(Snapshot snapshot);
/* Support for catalog timetravel for logical decoding */
struct HTAB;
extern struct HTAB *HistoricSnapshotGetTupleCids(void);
extern void SetupHistoricSnapshot(Snapshot snapshot_now, struct HTAB *tuplecids);
extern void TeardownHistoricSnapshot(bool is_error);
extern bool HistoricSnapshotActive(void);
#endif /* SNAPMGR_H */

View File

@ -30,6 +30,22 @@ typedef struct SnapshotData *Snapshot;
typedef bool (*SnapshotSatisfiesFunc) (HeapTuple htup,
Snapshot snapshot, Buffer buffer);
/*
* Struct representing all kind of possible snapshots.
*
* There are several different kinds of snapshots:
* * Normal MVCC snapshots
* * MVCC snapshots taken during recovery (in Hot-Standby mode)
* * Historic MVCC snapshots used during logical decoding
* * snapshots passed to HeapTupleSatisfiesDirty()
* * snapshots used for SatisfiesAny, Toast, Self where no members are
* accessed.
*
* TODO: It's probably a good idea to split this struct using a NodeTag
* similar to how parser and executor nodes are handled, with one type for
* each different kind of snapshot to avoid overloading the meaning of
* individual fields.
*/
typedef struct SnapshotData
{
SnapshotSatisfiesFunc satisfies; /* tuple test function */
@ -46,11 +62,23 @@ typedef struct SnapshotData
*/
TransactionId xmin; /* all XID < xmin are visible to me */
TransactionId xmax; /* all XID >= xmax are invisible to me */
TransactionId *xip; /* array of xact IDs in progress */
/*
* For normal MVCC snapshot this contains the all xact IDs that are in
* progress, unless the snapshot was taken during recovery in which case
* it's empty. For historic MVCC snapshots, the meaning is inverted,
* i.e. it contains *committed* transactions between xmin and xmax.
*/
TransactionId *xip;
uint32 xcnt; /* # of xact ids in xip[] */
/* note: all ids in xip[] satisfy xmin <= xip[i] < xmax */
int32 subxcnt; /* # of xact ids in subxip[] */
TransactionId *subxip; /* array of subxact IDs in progress */
/*
* For non-historic MVCC snapshots, this contains subxact IDs that are in
* progress (and other transactions that are in progress if taken during
* recovery). For historic snapshot it contains *all* xids assigned to the
* replayed transaction, including the toplevel xid.
*/
TransactionId *subxip;
bool suboverflowed; /* has the subxip array overflowed? */
bool takenDuringRecovery; /* recovery-shaped snapshot? */
bool copied; /* false if it's a static snapshot */

View File

@ -22,6 +22,7 @@
extern PGDLLIMPORT SnapshotData SnapshotSelfData;
extern PGDLLIMPORT SnapshotData SnapshotAnyData;
extern PGDLLIMPORT SnapshotData SnapshotToastData;
extern PGDLLIMPORT SnapshotData CatalogSnapshotData;
#define SnapshotSelf (&SnapshotSelfData)
#define SnapshotAny (&SnapshotAnyData)
@ -37,7 +38,8 @@ extern PGDLLIMPORT SnapshotData SnapshotToastData;
/* This macro encodes the knowledge of which snapshots are MVCC-safe */
#define IsMVCCSnapshot(snapshot) \
((snapshot)->satisfies == HeapTupleSatisfiesMVCC)
((snapshot)->satisfies == HeapTupleSatisfiesMVCC || \
(snapshot)->satisfies == HeapTupleSatisfiesHistoricMVCC)
/*
* HeapTupleSatisfiesVisibility
@ -73,6 +75,8 @@ extern bool HeapTupleSatisfiesToast(HeapTuple htup,
Snapshot snapshot, Buffer buffer);
extern bool HeapTupleSatisfiesDirty(HeapTuple htup,
Snapshot snapshot, Buffer buffer);
extern bool HeapTupleSatisfiesHistoricMVCC(HeapTuple htup,
Snapshot snapshot, Buffer buffer);
/* Special "satisfies" routines with different APIs */
extern HTSU_Result HeapTupleSatisfiesUpdate(HeapTuple htup,
@ -86,4 +90,13 @@ extern void HeapTupleSetHintBits(HeapTupleHeader tuple, Buffer buffer,
uint16 infomask, TransactionId xid);
extern bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple);
/*
* To avoid leaking to much knowledge about reorderbuffer implementation
* details this is implemented in reorderbuffer.c not tqual.c.
*/
extern bool ResolveCminCmaxDuringDecoding(struct HTAB *tuplecid_data,
Snapshot snapshot,
HeapTuple htup,
Buffer buffer,
CommandId *cmin, CommandId *cmax);
#endif /* TQUAL_H */

View File

@ -1368,13 +1368,15 @@ pg_prepared_xacts| SELECT p.transaction,
LEFT JOIN pg_authid u ON ((p.ownerid = u.oid)))
LEFT JOIN pg_database d ON ((p.dbid = d.oid)));
pg_replication_slots| SELECT l.slot_name,
l.plugin,
l.slot_type,
l.datoid,
d.datname AS database,
l.active,
l.xmin,
l.catalog_xmin,
l.restart_lsn
FROM (pg_get_replication_slots() l(slot_name, slot_type, datoid, active, xmin, restart_lsn)
FROM (pg_get_replication_slots() l(slot_name, plugin, slot_type, datoid, active, xmin, catalog_xmin, restart_lsn)
LEFT JOIN pg_database d ON ((l.datoid = d.oid)));
pg_roles| SELECT pg_authid.rolname,
pg_authid.rolsuper,

View File

@ -940,6 +940,17 @@ LockTupleMode
LockingClause
LogOpts
LogStmtLevel
LogicalDecodeBeginCB
LogicalDecodeChangeCB
LogicalDecodeCleanupCB
LogicalDecodeCommitCB
LogicalDecodeInitCB
LogicalDecodingCheckpointData
LogicalDecodingContext
LogicalDecodingCtlData
LogicalDecodingSlot
LogicalOutputPluginWriterPrepareWrite
LogicalOutputPluginWriterWrite
LogicalTape
LogicalTapeSet
MAGIC
@ -1053,6 +1064,7 @@ OprInfo
OprProofCacheEntry
OprProofCacheKey
OutputContext
OutputPluginCallbacks
OverrideSearchPath
OverrideStackEntry
PACE_HEADER
@ -1468,6 +1480,21 @@ Relids
RelocationBufferInfo
RenameStmt
ReopenPtr
ReorderBuffer
ReorderBufferApplyChangeCB
ReorderBufferBeginCB
ReorderBufferChange
ReorderBufferChangeTypeInternal
ReorderBufferCommitCB
ReorderBufferDiskChange
ReorderBufferIterTXNEntry
ReorderBufferIterTXNState
ReorderBufferToastEnt
ReorderBufferTupleBuf
ReorderBufferTupleCidEnt
ReorderBufferTupleCidKey
ReorderBufferTXN
ReorderBufferTXNByIdEnt
ReplaceVarsFromTargetList_context
ReplaceVarsNoMatchOption
ResTarget
@ -1522,6 +1549,8 @@ SID_NAME_USE
SISeg
SMgrRelation
SMgrRelationData
SnapBuildAction
SnapBuildState
SOCKADDR
SOCKET
SPELL
@ -1613,6 +1642,8 @@ SlruSharedData
Snapshot
SnapshotData
SnapshotSatisfiesFunc
Snapstate
SnapstateOnDisk
SockAddr
Sort
SortBy
@ -1929,6 +1960,7 @@ XLogReaderState
XLogRecData
XLogRecPtr
XLogRecord
XLogRecordBuffer
XLogSegNo
XLogSource
XLogwrtResult
@ -2351,6 +2383,7 @@ symbol
tablespaceinfo
teReqs
teSection
TestDecodingData
temp_tablespaces_extra
text
timeKEY