pg_upgrade: run all data type checks per connection

The checks for data type usage were each connecting to all databases in the cluster and running their query. On clusters which have a lot of databases this can become unnecessarily expensive. This moves the checks to run in a single connection instead to minimize setup and teardown overhead. Reviewed-by: Nathan Bossart <nathandbossart@gmail.com> Reviewed-by: Justin Pryzby <pryzby@telsasoft.com> Reviewed-by: Peter Eisentraut <peter@eisentraut.org> Discussion: https://postgr.es/m/BB4C76F-D416-4F9F-949E-DBE950D37787@yesql.se
2024-03-19 13:32:50 +01:00 · 2024-03-19 13:32:50 +01:00 · 347758b120
parent 5577a71fb0
commit 347758b120
3 changed files with 506 additions and 521 deletions
--- a/src/bin/pg_upgrade/check.c
+++ b/src/bin/pg_upgrade/check.c
@ -10,6 +10,7 @@
 #include "postgres_fe.h"

 #include "catalog/pg_authid_d.h"
+#include "catalog/pg_class_d.h"
 #include "catalog/pg_collation.h"
 #include "fe_utils/string_utils.h"
 #include "mb/pg_wchar.h"
@ -23,13 +24,6 @@ static void check_for_isn_and_int8_passing_mismatch(ClusterInfo *cluster);
 static void check_for_user_defined_postfix_ops(ClusterInfo *cluster);
 static void check_for_incompatible_polymorphics(ClusterInfo *cluster);
 static void check_for_tables_with_oids(ClusterInfo *cluster);
-static void check_for_composite_data_type_usage(ClusterInfo *cluster);
-static void check_for_reg_data_type_usage(ClusterInfo *cluster);
-static void check_for_aclitem_data_type_usage(ClusterInfo *cluster);
-static void check_for_removed_data_type_usage(ClusterInfo *cluster,
-											  const char *version,
-											  const char *datatype);
-static void check_for_jsonb_9_4_usage(ClusterInfo *cluster);
 static void check_for_pg_role_prefix(ClusterInfo *cluster);
 static void check_for_new_tablespace_dir(void);
 static void check_for_user_defined_encoding_conversions(ClusterInfo *cluster);
@ -38,6 +32,497 @@ static void check_new_cluster_subscription_configuration(void);
 static void check_old_cluster_for_valid_slots(bool live_check);
 static void check_old_cluster_subscription_state(void);

+/*
+ * DataTypesUsageChecks - definitions of data type checks for the old cluster
+ * in order to determine if an upgrade can be performed.  See the comment on
+ * data_types_usage_checks below for a more detailed description.
+ */
+typedef struct
+{
+	/* Status line to print to the user */
+	const char *status;
+	/* Filename to store report to */
+	const char *report_filename;
+	/* Query to extract the oid of the datatype */
+	const char *base_query;
+	/* Text to store to report in case of error */
+	const char *report_text;
+	/* The latest version where the check applies */
+	int			threshold_version;
+	/* A function pointer for determining if the check applies */
+	DataTypesUsageVersionCheck version_hook;
+}			DataTypesUsageChecks;
+
+/*
+ * Special values for threshold_version for indicating that a check applies to
+ * all versions, or that a custom function needs to be invoked to determine
+ * if the check applies.
+ */
+#define MANUAL_CHECK 1
+#define ALL_VERSIONS -1
+
+/*--
+ * Data type usage checks. Each check for problematic data type usage is
+ * defined in this array with metadata, SQL query for finding the data type
+ * and functionality for deciding if the check is applicable to the version
+ * of the old cluster. The struct members are described in detail below:
+ *
+ * status				A oneline string which can be printed to the user to
+ *						inform about progress. Should not end with newline.
+ * report_filename		The filename in which the list of problems detected by
+ *						the check will be printed.
+ * base_query			A query which extracts the Oid of the datatype checked
+ *						for.
+ * report_text			The text which will be printed to the user to explain
+ *						what the check did, and why it failed. The text should
+ *						end with a newline, and does not need to refer to the
+ *						report_filename as that is automatically appended to
+ *						the report with the path to the log folder.
+ * threshold_version	The major version of PostgreSQL for which to run the
+ *						check. Iff the old cluster is less than, or equal to,
+ *						the threshold version then the check will be executed.
+ *						If the old version is greater than the threshold then
+ *						the check is skipped. If the threshold_version is set
+ *						to ALL_VERSIONS then it will be run unconditionally,
+ *						if set to MANUAL_CHECK then the version_hook function
+ *						will be executed in order to determine whether or not
+ *						to run.
+ * version_hook			A function pointer to a version check function of type
+ *						DataTypesUsageVersionCheck which is used to determine
+ *						if the check is applicable to the old cluster. If the
+ *						version_hook returns true then the check will be run,
+ *						else it will be skipped. The function will only be
+ *						executed iff threshold_version is set to MANUAL_CHECK.
+ */
+static DataTypesUsageChecks data_types_usage_checks[] =
+{
+	/*
+	 * Look for composite types that were made during initdb *or* belong to
+	 * information_schema; that's important in case information_schema was
+	 * dropped and reloaded.
+	 *
+	 * The cutoff OID here should match the source cluster's value of
+	 * FirstNormalObjectId.  We hardcode it rather than using that C #define
+	 * because, if that #define is ever changed, our own version's value is
+	 * NOT what to use.  Eventually we may need a test on the source cluster's
+	 * version to select the correct value.
+	 */
+	{
+		.status = gettext_noop("Checking for system-defined composite types in user tables"),
+			.report_filename = "tables_using_composite.txt",
+			.base_query =
+			"SELECT t.oid FROM pg_catalog.pg_type t "
+			"LEFT JOIN pg_catalog.pg_namespace n ON t.typnamespace = n.oid "
+			" WHERE typtype = 'c' AND (t.oid < 16384 OR nspname = 'information_schema')",
+			.report_text =
+			gettext_noop("Your installation contains system-defined composite types in user tables.\n"
+						 "These type OIDs are not stable across PostgreSQL versions,\n"
+						 "so this cluster cannot currently be upgraded.  You can drop the\n"
+						 "problem columns and restart the upgrade.\n"),
+			.threshold_version = ALL_VERSIONS
+	},
+
+	/*
+	 * 9.3 -> 9.4 Fully implement the 'line' data type in 9.4, which
+	 * previously returned "not enabled" by default and was only functionally
+	 * enabled with a compile-time switch; as of 9.4 "line" has a different
+	 * on-disk representation format.
+	 */
+	{
+		.status = gettext_noop("Checking for incompatible \"line\" data type"),
+			.report_filename = "tables_using_line.txt",
+			.base_query =
+			"SELECT 'pg_catalog.line'::pg_catalog.regtype AS oid",
+			.report_text =
+			gettext_noop("Your installation contains the \"line\" data type in user tables.\n"
+						 "this data type changed its internal and input/output format\n"
+						 "between your old and new versions so this\n"
+						 "cluster cannot currently be upgraded.  You can\n"
+						 "drop the problem columns and restart the upgrade.\n"),
+			.threshold_version = 903
+	},
+
+	/*
+	 * pg_upgrade only preserves these system values: pg_class.oid pg_type.oid
+	 * pg_enum.oid
+	 *
+	 * Many of the reg* data types reference system catalog info that is not
+	 * preserved, and hence these data types cannot be used in user tables
+	 * upgraded by pg_upgrade.
+	 */
+	{
+		.status = gettext_noop("Checking for reg* data types in user tables"),
+			.report_filename = "tables_using_reg.txt",
+
+		/*
+		 * Note: older servers will not have all of these reg* types, so we
+		 * have to write the query like this rather than depending on casts to
+		 * regtype.
+		 */
+			.base_query =
+			"SELECT oid FROM pg_catalog.pg_type t "
+			"WHERE t.typnamespace = "
+			"        (SELECT oid FROM pg_catalog.pg_namespace "
+			"         WHERE nspname = 'pg_catalog') "
+			"  AND t.typname IN ( "
+		/* pg_class.oid is preserved, so 'regclass' is OK */
+			"           'regcollation', "
+			"           'regconfig', "
+			"           'regdictionary', "
+			"           'regnamespace', "
+			"           'regoper', "
+			"           'regoperator', "
+			"           'regproc', "
+			"           'regprocedure' "
+		/* pg_authid.oid is preserved, so 'regrole' is OK */
+		/* pg_type.oid is (mostly) preserved, so 'regtype' is OK */
+			"         )",
+			.report_text =
+			gettext_noop("Your installation contains one of the reg* data types in user tables.\n"
+						 "These data types reference system OIDs that are not preserved by\n"
+						 "pg_upgrade, so this cluster cannot currently be upgraded.  You can\n"
+						 "drop the problem columns and restart the upgrade.\n"),
+			.threshold_version = ALL_VERSIONS
+	},
+
+	/*
+	 * PG 16 increased the size of the 'aclitem' type, which breaks the
+	 * on-disk format for existing data.
+	 */
+	{
+		.status = gettext_noop("Checking for incompatible \"aclitem\" data type"),
+			.report_filename = "tables_using_aclitem.txt",
+			.base_query =
+			"SELECT 'pg_catalog.aclitem'::pg_catalog.regtype AS oid",
+			.report_text =
+			gettext_noop("Your installation contains the \"aclitem\" data type in user tables.\n"
+						 "The internal format of \"aclitem\" changed in PostgreSQL version 16\n"
+						 "so this cluster cannot currently be upgraded.  You can drop the\n"
+						 "problem columns and restart the upgrade.\n"),
+			.threshold_version = 1500
+	},
+
+	/*
+	 * It's no longer allowed to create tables or views with "unknown"-type
+	 * columns.  We do not complain about views with such columns, because
+	 * they should get silently converted to "text" columns during the DDL
+	 * dump and reload; it seems unlikely to be worth making users do that by
+	 * hand.  However, if there's a table with such a column, the DDL reload
+	 * will fail, so we should pre-detect that rather than failing
+	 * mid-upgrade.  Worse, if there's a matview with such a column, the DDL
+	 * reload will silently change it to "text" which won't match the on-disk
+	 * storage (which is like "cstring").  So we *must* reject that.
+	 */
+	{
+		.status = gettext_noop("Checking for invalid \"unknown\" user columns"),
+			.report_filename = "tables_using_unknown.txt",
+			.base_query =
+			"SELECT 'pg_catalog.unknown'::pg_catalog.regtype AS oid",
+			.report_text =
+			gettext_noop("Your installation contains the \"unknown\" data type in user tables.\n"
+						 "This data type is no longer allowed in tables, so this cluster\n"
+						 "cannot currently be upgraded.  You can drop the problem columns\n"
+						 "and restart the upgrade.\n"),
+			.threshold_version = 906
+	},
+
+	/*
+	 * PG 12 changed the 'sql_identifier' type storage to be based on name,
+	 * not varchar, which breaks on-disk format for existing data. So we need
+	 * to prevent upgrade when used in user objects (tables, indexes, ...). In
+	 * 12, the sql_identifier data type was switched from name to varchar,
+	 * which does affect the storage (name is by-ref, but not varlena). This
+	 * means user tables using sql_identifier for columns are broken because
+	 * the on-disk format is different.
+	 */
+	{
+		.status = gettext_noop("Checking for invalid \"sql_identifier\" user columns"),
+			.report_filename = "tables_using_sql_identifier.txt",
+			.base_query =
+			"SELECT 'information_schema.sql_identifier'::pg_catalog.regtype AS oid",
+			.report_text =
+			gettext_noop("Your installation contains the \"sql_identifier\" data type in user tables.\n"
+						 "The on-disk format for this data type has changed, so this\n"
+						 "cluster cannot currently be upgraded.  You can drop the problem\n"
+						 "columns and restart the upgrade.\n"),
+			.threshold_version = 1100
+	},
+
+	/*
+	 * JSONB changed its storage format during 9.4 beta, so check for it.
+	 */
+	{
+		.status = gettext_noop("Checking for incompatible \"jsonb\" data type in user tables"),
+			.report_filename = "tables_using_jsonb.txt",
+			.base_query =
+			"SELECT 'pg_catalog.jsonb'::pg_catalog.regtype AS oid",
+			.report_text =
+			gettext_noop("Your installation contains the \"jsonb\" data type in user tables.\n"
+						 "The internal format of \"jsonb\" changed during 9.4 beta so this\n"
+						 "cluster cannot currently be upgraded.  You can drop the problem \n"
+						 "columns and restart the upgrade.\n"),
+			.threshold_version = MANUAL_CHECK,
+			.version_hook = jsonb_9_4_check_applicable
+	},
+
+	/*
+	 * PG 12 removed types abstime, reltime, tinterval.
+	 */
+	{
+		.status = gettext_noop("Checking for removed \"abstime\" data type in user tables"),
+			.report_filename = "tables_using_abstime.txt",
+			.base_query =
+			"SELECT 'pg_catalog.abstime'::pg_catalog.regtype AS oid",
+			.report_text =
+			gettext_noop("Your installation contains the \"abstime\" data type in user tables.\n"
+						 "The \"abstime\" type has been removed in PostgreSQL version 12,\n"
+						 "so this cluster cannot currently be upgraded.  You can drop the\n"
+						 "problem columns, or change them to another data type, and restart\n"
+						 "the upgrade.\n"),
+			.threshold_version = 1100
+	},
+	{
+		.status = gettext_noop("Checking for removed \"reltime\" data type in user tables"),
+			.report_filename = "tables_using_reltime.txt",
+			.base_query =
+			"SELECT 'pg_catalog.reltime'::pg_catalog.regtype AS oid",
+			.report_text =
+			gettext_noop("Your installation contains the \"reltime\" data type in user tables.\n"
+						 "The \"reltime\" type has been removed in PostgreSQL version 12,\n"
+						 "so this cluster cannot currently be upgraded.  You can drop the\n"
+						 "problem columns, or change them to another data type, and restart\n"
+						 "the upgrade.\n"),
+			.threshold_version = 1100
+	},
+	{
+		.status = gettext_noop("Checking for removed \"tinterval\" data type in user tables"),
+			.report_filename = "tables_using_tinterval.txt",
+			.base_query =
+			"SELECT 'pg_catalog.tinterval'::pg_catalog.regtype AS oid",
+			.report_text =
+			gettext_noop("Your installation contains the \"tinterval\" data type in user tables.\n"
+						 "The \"tinterval\" type has been removed in PostgreSQL version 12,\n"
+						 "so this cluster cannot currently be upgraded.  You can drop the\n"
+						 "problem columns, or change them to another data type, and restart\n"
+						 "the upgrade.\n"),
+			.threshold_version = 1100
+	},
+
+	/* End of checks marker, must remain last */
+	{
+		NULL, NULL, NULL, NULL, 0, NULL
+	}
+};
+
+/*
+ * check_for_data_types_usage()
+ *	Detect whether there are any stored columns depending on given type(s)
+ *
+ * If so, write a report to the given file name and signal a failure to the
+ * user.
+ *
+ * The checks to run are defined in a DataTypesUsageChecks structure where
+ * each check has a metadata for explaining errors to the user, a base_query,
+ * a report filename and a function pointer hook for validating if the check
+ * should be executed given the cluster at hand.
+ *
+ * base_query should be a SELECT yielding a single column named "oid",
+ * containing the pg_type OIDs of one or more types that are known to have
+ * inconsistent on-disk representations across server versions.
+ *
+ * We check for the type(s) in tables, matviews, and indexes, but not views;
+ * there's no storage involved in a view.
+ */
+static void
+check_for_data_types_usage(ClusterInfo *cluster, DataTypesUsageChecks * checks)
+{
+	bool		found = false;
+	bool	   *results;
+	PQExpBufferData report;
+	DataTypesUsageChecks *tmp = checks;
+	int			n_data_types_usage_checks = 0;
+
+	prep_status("Checking for data type usage");
+
+	/* Gather number of checks to perform */
+	while (tmp->status != NULL)
+	{
+		n_data_types_usage_checks++;
+		tmp++;
+	}
+
+	/* Prepare an array to store the results of checks in */
+	results = pg_malloc0(sizeof(bool) * n_data_types_usage_checks);
+
+	/*
+	 * Connect to each database in the cluster and run all defined checks
+	 * against that database before trying the next one.
+	 */
+	for (int dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++)
+	{
+		DbInfo	   *active_db = &cluster->dbarr.dbs[dbnum];
+		PGconn	   *conn = connectToServer(cluster, active_db->db_name);
+
+		for (int checknum = 0; checknum < n_data_types_usage_checks; checknum++)
+		{
+			PGresult   *res;
+			int			ntups;
+			int			i_nspname;
+			int			i_relname;
+			int			i_attname;
+			FILE	   *script = NULL;
+			bool		db_used = false;
+			char		output_path[MAXPGPATH];
+			DataTypesUsageChecks *cur_check = &checks[checknum];
+
+			if (cur_check->threshold_version == MANUAL_CHECK)
+			{
+				Assert(cur_check->version_hook);
+
+				/*
+				 * Make sure that the check applies to the current cluster
+				 * version and skip if not. If no check hook has been defined
+				 * we run the check for all versions.
+				 */
+				if (!cur_check->version_hook(cluster))
+					continue;
+			}
+			else if (cur_check->threshold_version != ALL_VERSIONS)
+			{
+				if (GET_MAJOR_VERSION(cluster->major_version) > cur_check->threshold_version)
+					continue;
+			}
+			else
+				Assert(cur_check->threshold_version == ALL_VERSIONS);
+
+			snprintf(output_path, sizeof(output_path), "%s/%s",
+					 log_opts.basedir,
+					 cur_check->report_filename);
+
+			/*
+			 * The type(s) of interest might be wrapped in a domain, array,
+			 * composite, or range, and these container types can be nested
+			 * (to varying extents depending on server version, but that's not
+			 * of concern here).  To handle all these cases we need a
+			 * recursive CTE.
+			 */
+			res = executeQueryOrDie(conn,
+									"WITH RECURSIVE oids AS ( "
+			/* start with the type(s) returned by base_query */
+									"	%s "
+									"	UNION ALL "
+									"	SELECT * FROM ( "
+			/* inner WITH because we can only reference the CTE once */
+									"		WITH x AS (SELECT oid FROM oids) "
+			/* domains on any type selected so far */
+									"			SELECT t.oid FROM pg_catalog.pg_type t, x WHERE typbasetype = x.oid AND typtype = 'd' "
+									"			UNION ALL "
+			/* arrays over any type selected so far */
+									"			SELECT t.oid FROM pg_catalog.pg_type t, x WHERE typelem = x.oid AND typtype = 'b' "
+									"			UNION ALL "
+			/* composite types containing any type selected so far */
+									"			SELECT t.oid FROM pg_catalog.pg_type t, pg_catalog.pg_class c, pg_catalog.pg_attribute a, x "
+									"			WHERE t.typtype = 'c' AND "
+									"				  t.oid = c.reltype AND "
+									"				  c.oid = a.attrelid AND "
+									"				  NOT a.attisdropped AND "
+									"				  a.atttypid = x.oid "
+									"			UNION ALL "
+			/* ranges containing any type selected so far */
+									"			SELECT t.oid FROM pg_catalog.pg_type t, pg_catalog.pg_range r, x "
+									"			WHERE t.typtype = 'r' AND r.rngtypid = t.oid AND r.rngsubtype = x.oid"
+									"	) foo "
+									") "
+			/* now look for stored columns of any such type */
+									"SELECT n.nspname, c.relname, a.attname "
+									"FROM	pg_catalog.pg_class c, "
+									"		pg_catalog.pg_namespace n, "
+									"		pg_catalog.pg_attribute a "
+									"WHERE	c.oid = a.attrelid AND "
+									"		NOT a.attisdropped AND "
+									"		a.atttypid IN (SELECT oid FROM oids) AND "
+									"		c.relkind IN ("
+									CppAsString2(RELKIND_RELATION) ", "
+									CppAsString2(RELKIND_MATVIEW) ", "
+									CppAsString2(RELKIND_INDEX) ") AND "
+									"		c.relnamespace = n.oid AND "
+			/* exclude possible orphaned temp tables */
+									"		n.nspname !~ '^pg_temp_' AND "
+									"		n.nspname !~ '^pg_toast_temp_' AND "
+			/* exclude system catalogs, too */
+									"		n.nspname NOT IN ('pg_catalog', 'information_schema')",
+									cur_check->base_query);
+
+			ntups = PQntuples(res);
+
+			/*
+			 * The datatype was found, so extract the data and log to the
+			 * requested filename. We need to open the file for appending
+			 * since the check might have already found the type in another
+			 * database earlier in the loop.
+			 */
+			if (ntups)
+			{
+				/*
+				 * Make sure we have a buffer to save reports to now that we
+				 * found a first failing check.
+				 */
+				if (!found)
+					initPQExpBuffer(&report);
+				found = true;
+
+				/*
+				 * If this is the first time we see an error for the check in
+				 * question then print a status message of the failure.
+				 */
+				if (!results[checknum])
+				{
+					pg_log(PG_REPORT, "    failed check: %s", _(cur_check->status));
+					appendPQExpBuffer(&report, "\n%s\n%s    %s\n",
+									  _(cur_check->report_text),
+									  _("A list of the problem columns is in the file:"),
+									  output_path);
+				}
+				results[checknum] = true;
+
+				i_nspname = PQfnumber(res, "nspname");
+				i_relname = PQfnumber(res, "relname");
+				i_attname = PQfnumber(res, "attname");
+
+				for (int rowno = 0; rowno < ntups; rowno++)
+				{
+					if (script == NULL && (script = fopen_priv(output_path, "a")) == NULL)
+						pg_fatal("could not open file \"%s\": %m", output_path);
+
+					if (!db_used)
+					{
+						fprintf(script, "In database: %s\n", active_db->db_name);
+						db_used = true;
+					}
+					fprintf(script, "  %s.%s.%s\n",
+							PQgetvalue(res, rowno, i_nspname),
+							PQgetvalue(res, rowno, i_relname),
+							PQgetvalue(res, rowno, i_attname));
+				}
+
+				if (script)
+				{
+					fclose(script);
+					script = NULL;
+				}
+			}
+
+			PQclear(res);
+		}
+
+		PQfinish(conn);
+	}
+
+	if (found)
+		pg_fatal("Data type checks failed: %s", report.data);
+
+	check_ok();
+}

 /*
 * fix_path_separator
@ -110,8 +595,6 @@ check_and_dump_old_cluster(bool live_check)
 	check_is_install_user(&old_cluster);
 	check_proper_datallowconn(&old_cluster);
 	check_for_prepared_transactions(&old_cluster);
-	check_for_composite_data_type_usage(&old_cluster);
-	check_for_reg_data_type_usage(&old_cluster);
 	check_for_isn_and_int8_passing_mismatch(&old_cluster);

 	if (GET_MAJOR_VERSION(old_cluster.major_version) >= 1700)
@ -129,22 +612,7 @@ check_and_dump_old_cluster(bool live_check)
 		check_old_cluster_subscription_state();
 	}

-	/*
-	 * PG 16 increased the size of the 'aclitem' type, which breaks the
-	 * on-disk format for existing data.
-	 */
-	if (GET_MAJOR_VERSION(old_cluster.major_version) <= 1500)
-		check_for_aclitem_data_type_usage(&old_cluster);
-
-	/*
-	 * PG 12 removed types abstime, reltime, tinterval.
-	 */
-	if (GET_MAJOR_VERSION(old_cluster.major_version) <= 1100)
-	{
-		check_for_removed_data_type_usage(&old_cluster, "12", "abstime");
-		check_for_removed_data_type_usage(&old_cluster, "12", "reltime");
-		check_for_removed_data_type_usage(&old_cluster, "12", "tinterval");
-	}
+	check_for_data_types_usage(&old_cluster, data_types_usage_checks);

 	/*
 	 * PG 14 changed the function signature of encoding conversion functions.
@ -176,21 +644,12 @@ check_and_dump_old_cluster(bool live_check)
 	if (GET_MAJOR_VERSION(old_cluster.major_version) <= 1100)
 		check_for_tables_with_oids(&old_cluster);

-	/*
-	 * PG 12 changed the 'sql_identifier' type storage to be based on name,
-	 * not varchar, which breaks on-disk format for existing data. So we need
-	 * to prevent upgrade when used in user objects (tables, indexes, ...).
-	 */
-	if (GET_MAJOR_VERSION(old_cluster.major_version) <= 1100)
-		old_11_check_for_sql_identifier_data_type_usage(&old_cluster);
-
 	/*
 	 * Pre-PG 10 allowed tables with 'unknown' type columns and non WAL logged
 	 * hash indexes
 	 */
 	if (GET_MAJOR_VERSION(old_cluster.major_version) <= 906)
 	{
-		old_9_6_check_for_unknown_data_type_usage(&old_cluster);
 		if (user_opts.check)
 			old_9_6_invalidate_hash_indexes(&old_cluster, true);
 	}
@ -199,14 +658,6 @@ check_and_dump_old_cluster(bool live_check)
 	if (GET_MAJOR_VERSION(old_cluster.major_version) <= 905)
 		check_for_pg_role_prefix(&old_cluster);

-	if (GET_MAJOR_VERSION(old_cluster.major_version) == 904 &&
-		old_cluster.controldata.cat_ver < JSONB_FORMAT_CHANGE_CAT_VER)
-		check_for_jsonb_9_4_usage(&old_cluster);
-
-	/* Pre-PG 9.4 had a different 'line' data type internal format */
-	if (GET_MAJOR_VERSION(old_cluster.major_version) <= 903)
-		old_9_3_check_for_line_data_type_usage(&old_cluster);
-
 	/*
 	 * While not a check option, we do this now because this is the only time
 	 * the old server is running.
@ -1122,220 +1573,6 @@ check_for_tables_with_oids(ClusterInfo *cluster)
 }


-/*
- * check_for_composite_data_type_usage()
- *	Check for system-defined composite types used in user tables.
- *
- *	The OIDs of rowtypes of system catalogs and information_schema views
- *	can change across major versions; unlike user-defined types, we have
- *	no mechanism for forcing them to be the same in the new cluster.
- *	Hence, if any user table uses one, that's problematic for pg_upgrade.
- */
-static void
-check_for_composite_data_type_usage(ClusterInfo *cluster)
-{
-	bool		found;
-	Oid			firstUserOid;
-	char		output_path[MAXPGPATH];
-	char	   *base_query;
-
-	prep_status("Checking for system-defined composite types in user tables");
-
-	snprintf(output_path, sizeof(output_path), "%s/%s",
-			 log_opts.basedir,
-			 "tables_using_composite.txt");
-
-	/*
-	 * Look for composite types that were made during initdb *or* belong to
-	 * information_schema; that's important in case information_schema was
-	 * dropped and reloaded.
-	 *
-	 * The cutoff OID here should match the source cluster's value of
-	 * FirstNormalObjectId.  We hardcode it rather than using that C #define
-	 * because, if that #define is ever changed, our own version's value is
-	 * NOT what to use.  Eventually we may need a test on the source cluster's
-	 * version to select the correct value.
-	 */
-	firstUserOid = 16384;
-
-	base_query = psprintf("SELECT t.oid FROM pg_catalog.pg_type t "
-						  "LEFT JOIN pg_catalog.pg_namespace n ON t.typnamespace = n.oid "
-						  " WHERE typtype = 'c' AND (t.oid < %u OR nspname = 'information_schema')",
-						  firstUserOid);
-
-	found = check_for_data_types_usage(cluster, base_query, output_path);
-
-	free(base_query);
-
-	if (found)
-	{
-		pg_log(PG_REPORT, "fatal");
-		pg_fatal("Your installation contains system-defined composite types in user tables.\n"
-				 "These type OIDs are not stable across PostgreSQL versions,\n"
-				 "so this cluster cannot currently be upgraded.  You can\n"
-				 "drop the problem columns and restart the upgrade.\n"
-				 "A list of the problem columns is in the file:\n"
-				 "    %s", output_path);
-	}
-	else
-		check_ok();
-}
-
-/*
- * check_for_reg_data_type_usage()
- *	pg_upgrade only preserves these system values:
- *		pg_class.oid
- *		pg_type.oid
- *		pg_enum.oid
- *
- *	Many of the reg* data types reference system catalog info that is
- *	not preserved, and hence these data types cannot be used in user
- *	tables upgraded by pg_upgrade.
- */
-static void
-check_for_reg_data_type_usage(ClusterInfo *cluster)
-{
-	bool		found;
-	char		output_path[MAXPGPATH];
-
-	prep_status("Checking for reg* data types in user tables");
-
-	snprintf(output_path, sizeof(output_path), "%s/%s",
-			 log_opts.basedir,
-			 "tables_using_reg.txt");
-
-	/*
-	 * Note: older servers will not have all of these reg* types, so we have
-	 * to write the query like this rather than depending on casts to regtype.
-	 */
-	found = check_for_data_types_usage(cluster,
-									   "SELECT oid FROM pg_catalog.pg_type t "
-									   "WHERE t.typnamespace = "
-									   "        (SELECT oid FROM pg_catalog.pg_namespace "
-									   "         WHERE nspname = 'pg_catalog') "
-									   "  AND t.typname IN ( "
-	/* pg_class.oid is preserved, so 'regclass' is OK */
-									   "           'regcollation', "
-									   "           'regconfig', "
-									   "           'regdictionary', "
-									   "           'regnamespace', "
-									   "           'regoper', "
-									   "           'regoperator', "
-									   "           'regproc', "
-									   "           'regprocedure' "
-	/* pg_authid.oid is preserved, so 'regrole' is OK */
-	/* pg_type.oid is (mostly) preserved, so 'regtype' is OK */
-									   "         )",
-									   output_path);
-
-	if (found)
-	{
-		pg_log(PG_REPORT, "fatal");
-		pg_fatal("Your installation contains one of the reg* data types in user tables.\n"
-				 "These data types reference system OIDs that are not preserved by\n"
-				 "pg_upgrade, so this cluster cannot currently be upgraded.  You can\n"
-				 "drop the problem columns and restart the upgrade.\n"
-				 "A list of the problem columns is in the file:\n"
-				 "    %s", output_path);
-	}
-	else
-		check_ok();
-}
-
-/*
- * check_for_aclitem_data_type_usage
- *
- *	aclitem changed its storage format in 16, so check for it.
- */
-static void
-check_for_aclitem_data_type_usage(ClusterInfo *cluster)
-{
-	char		output_path[MAXPGPATH];
-
-	prep_status("Checking for incompatible \"%s\" data type in user tables",
-				"aclitem");
-
-	snprintf(output_path, sizeof(output_path), "tables_using_aclitem.txt");
-
-	if (check_for_data_type_usage(cluster, "pg_catalog.aclitem", output_path))
-	{
-		pg_log(PG_REPORT, "fatal");
-		pg_fatal("Your installation contains the \"aclitem\" data type in user tables.\n"
-				 "The internal format of \"aclitem\" changed in PostgreSQL version 16\n"
-				 "so this cluster cannot currently be upgraded.  You can drop the\n"
-				 "problem columns and restart the upgrade.  A list of the problem\n"
-				 "columns is in the file:\n"
-				 "    %s", output_path);
-	}
-	else
-		check_ok();
-}
-
-/*
- * check_for_removed_data_type_usage
- *
- *	Check for in-core data types that have been removed.  Callers know
- *	the exact list.
- */
-static void
-check_for_removed_data_type_usage(ClusterInfo *cluster, const char *version,
-								  const char *datatype)
-{
-	char		output_path[MAXPGPATH];
-	char		typename[NAMEDATALEN];
-
-	prep_status("Checking for removed \"%s\" data type in user tables",
-				datatype);
-
-	snprintf(output_path, sizeof(output_path), "tables_using_%s.txt",
-			 datatype);
-	snprintf(typename, sizeof(typename), "pg_catalog.%s", datatype);
-
-	if (check_for_data_type_usage(cluster, typename, output_path))
-	{
-		pg_log(PG_REPORT, "fatal");
-		pg_fatal("Your installation contains the \"%s\" data type in user tables.\n"
-				 "The \"%s\" type has been removed in PostgreSQL version %s,\n"
-				 "so this cluster cannot currently be upgraded.  You can drop the\n"
-				 "problem columns, or change them to another data type, and restart\n"
-				 "the upgrade.  A list of the problem columns is in the file:\n"
-				 "    %s", datatype, datatype, version, output_path);
-	}
-	else
-		check_ok();
-}
-
-
-/*
- * check_for_jsonb_9_4_usage()
- *
- *	JSONB changed its storage format during 9.4 beta, so check for it.
- */
-static void
-check_for_jsonb_9_4_usage(ClusterInfo *cluster)
-{
-	char		output_path[MAXPGPATH];
-
-	prep_status("Checking for incompatible \"jsonb\" data type");
-
-	snprintf(output_path, sizeof(output_path), "%s/%s",
-			 log_opts.basedir,
-			 "tables_using_jsonb.txt");
-
-	if (check_for_data_type_usage(cluster, "pg_catalog.jsonb", output_path))
-	{
-		pg_log(PG_REPORT, "fatal");
-		pg_fatal("Your installation contains the \"jsonb\" data type in user tables.\n"
-				 "The internal format of \"jsonb\" changed during 9.4 beta so this\n"
-				 "cluster cannot currently be upgraded.  You can\n"
-				 "drop the problem columns and restart the upgrade.\n"
-				 "A list of the problem columns is in the file:\n"
-				 "    %s", output_path);
-	}
-	else
-		check_ok();
-}
-
 /*
 * check_for_pg_role_prefix()
 *
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@ -352,6 +352,9 @@ typedef struct
 } OSInfo;


+/* Function signature for data type check version hook */
+typedef bool (*DataTypesUsageVersionCheck) (ClusterInfo *cluster);
+
 /*
 * Global variables
 */
@ -479,18 +482,10 @@ unsigned int str2uint(const char *str);

 /* version.c */

-bool		check_for_data_types_usage(ClusterInfo *cluster,
-									   const char *base_query,
-									   const char *output_path);
-bool		check_for_data_type_usage(ClusterInfo *cluster,
-									  const char *type_name,
-									  const char *output_path);
-void		old_9_3_check_for_line_data_type_usage(ClusterInfo *cluster);
-void		old_9_6_check_for_unknown_data_type_usage(ClusterInfo *cluster);
+bool		jsonb_9_4_check_applicable(ClusterInfo *cluster);
 void		old_9_6_invalidate_hash_indexes(ClusterInfo *cluster,
 											bool check_mode);

-void		old_11_check_for_sql_identifier_data_type_usage(ClusterInfo *cluster);
 void		report_extension_updates(ClusterInfo *cluster);

 /* parallel.c */
--- a/src/bin/pg_upgrade/version.c
+++ b/src/bin/pg_upgrade/version.c
@ -9,235 +9,23 @@

 #include "postgres_fe.h"

-#include "catalog/pg_class_d.h"
 #include "fe_utils/string_utils.h"
 #include "pg_upgrade.h"

-
 /*
- * check_for_data_types_usage()
- *	Detect whether there are any stored columns depending on given type(s)
- *
- * If so, write a report to the given file name, and return true.
- *
- * base_query should be a SELECT yielding a single column named "oid",
- * containing the pg_type OIDs of one or more types that are known to have
- * inconsistent on-disk representations across server versions.
- *
- * We check for the type(s) in tables, matviews, and indexes, but not views;
- * there's no storage involved in a view.
+ * version_hook functions for check_for_data_types_usage in order to determine
+ * whether a data type check should be executed for the cluster in question or
+ * not.
 */
 bool
-check_for_data_types_usage(ClusterInfo *cluster,
-						   const char *base_query,
-						   const char *output_path)
+jsonb_9_4_check_applicable(ClusterInfo *cluster)
 {
-	bool		found = false;
-	FILE	   *script = NULL;
-	int			dbnum;
+	/* JSONB changed its storage format during 9.4 beta */
+	if (GET_MAJOR_VERSION(cluster->major_version) == 904 &&
+		cluster->controldata.cat_ver < JSONB_FORMAT_CHANGE_CAT_VER)
+		return true;

-	for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++)
-	{
-		DbInfo	   *active_db = &cluster->dbarr.dbs[dbnum];
-		PGconn	   *conn = connectToServer(cluster, active_db->db_name);
-		PQExpBufferData querybuf;
-		PGresult   *res;
-		bool		db_used = false;
-		int			ntups;
-		int			rowno;
-		int			i_nspname,
-					i_relname,
-					i_attname;
-
-		/*
-		 * The type(s) of interest might be wrapped in a domain, array,
-		 * composite, or range, and these container types can be nested (to
-		 * varying extents depending on server version, but that's not of
-		 * concern here).  To handle all these cases we need a recursive CTE.
-		 */
-		initPQExpBuffer(&querybuf);
-		appendPQExpBuffer(&querybuf,
-						  "WITH RECURSIVE oids AS ( "
-		/* start with the type(s) returned by base_query */
-						  "	%s "
-						  "	UNION ALL "
-						  "	SELECT * FROM ( "
-		/* inner WITH because we can only reference the CTE once */
-						  "		WITH x AS (SELECT oid FROM oids) "
-		/* domains on any type selected so far */
-						  "			SELECT t.oid FROM pg_catalog.pg_type t, x WHERE typbasetype = x.oid AND typtype = 'd' "
-						  "			UNION ALL "
-		/* arrays over any type selected so far */
-						  "			SELECT t.oid FROM pg_catalog.pg_type t, x WHERE typelem = x.oid AND typtype = 'b' "
-						  "			UNION ALL "
-		/* composite types containing any type selected so far */
-						  "			SELECT t.oid FROM pg_catalog.pg_type t, pg_catalog.pg_class c, pg_catalog.pg_attribute a, x "
-						  "			WHERE t.typtype = 'c' AND "
-						  "				  t.oid = c.reltype AND "
-						  "				  c.oid = a.attrelid AND "
-						  "				  NOT a.attisdropped AND "
-						  "				  a.atttypid = x.oid "
-						  "			UNION ALL "
-		/* ranges containing any type selected so far */
-						  "			SELECT t.oid FROM pg_catalog.pg_type t, pg_catalog.pg_range r, x "
-						  "			WHERE t.typtype = 'r' AND r.rngtypid = t.oid AND r.rngsubtype = x.oid"
-						  "	) foo "
-						  ") "
-		/* now look for stored columns of any such type */
-						  "SELECT n.nspname, c.relname, a.attname "
-						  "FROM	pg_catalog.pg_class c, "
-						  "		pg_catalog.pg_namespace n, "
-						  "		pg_catalog.pg_attribute a "
-						  "WHERE	c.oid = a.attrelid AND "
-						  "		NOT a.attisdropped AND "
-						  "		a.atttypid IN (SELECT oid FROM oids) AND "
-						  "		c.relkind IN ("
-						  CppAsString2(RELKIND_RELATION) ", "
-						  CppAsString2(RELKIND_MATVIEW) ", "
-						  CppAsString2(RELKIND_INDEX) ") AND "
-						  "		c.relnamespace = n.oid AND "
-		/* exclude possible orphaned temp tables */
-						  "		n.nspname !~ '^pg_temp_' AND "
-						  "		n.nspname !~ '^pg_toast_temp_' AND "
-		/* exclude system catalogs, too */
-						  "		n.nspname NOT IN ('pg_catalog', 'information_schema')",
-						  base_query);
-
-		res = executeQueryOrDie(conn, "%s", querybuf.data);
-
-		ntups = PQntuples(res);
-		i_nspname = PQfnumber(res, "nspname");
-		i_relname = PQfnumber(res, "relname");
-		i_attname = PQfnumber(res, "attname");
-		for (rowno = 0; rowno < ntups; rowno++)
-		{
-			found = true;
-			if (script == NULL && (script = fopen_priv(output_path, "w")) == NULL)
-				pg_fatal("could not open file \"%s\": %m", output_path);
-			if (!db_used)
-			{
-				fprintf(script, "In database: %s\n", active_db->db_name);
-				db_used = true;
-			}
-			fprintf(script, "  %s.%s.%s\n",
-					PQgetvalue(res, rowno, i_nspname),
-					PQgetvalue(res, rowno, i_relname),
-					PQgetvalue(res, rowno, i_attname));
-		}
-
-		PQclear(res);
-
-		termPQExpBuffer(&querybuf);
-
-		PQfinish(conn);
-	}
-
-	if (script)
-		fclose(script);
-
-	return found;
-}
-
-/*
- * check_for_data_type_usage()
- *	Detect whether there are any stored columns depending on the given type
- *
- * If so, write a report to the given file name, and return true.
- *
- * type_name should be a fully qualified type name.  This is just a
- * trivial wrapper around check_for_data_types_usage() to convert a
- * type name into a base query.
- */
-bool
-check_for_data_type_usage(ClusterInfo *cluster,
-						  const char *type_name,
-						  const char *output_path)
-{
-	bool		found;
-	char	   *base_query;
-
-	base_query = psprintf("SELECT '%s'::pg_catalog.regtype AS oid",
-						  type_name);
-
-	found = check_for_data_types_usage(cluster, base_query, output_path);
-
-	free(base_query);
-
-	return found;
-}
-
-
-/*
- * old_9_3_check_for_line_data_type_usage()
- *	9.3 -> 9.4
- *	Fully implement the 'line' data type in 9.4, which previously returned
- *	"not enabled" by default and was only functionally enabled with a
- *	compile-time switch; as of 9.4 "line" has a different on-disk
- *	representation format.
- */
-void
-old_9_3_check_for_line_data_type_usage(ClusterInfo *cluster)
-{
-	char		output_path[MAXPGPATH];
-
-	prep_status("Checking for incompatible \"line\" data type");
-
-	snprintf(output_path, sizeof(output_path), "%s/%s",
-			 log_opts.basedir,
-			 "tables_using_line.txt");
-
-	if (check_for_data_type_usage(cluster, "pg_catalog.line", output_path))
-	{
-		pg_log(PG_REPORT, "fatal");
-		pg_fatal("Your installation contains the \"line\" data type in user tables.\n"
-				 "This data type changed its internal and input/output format\n"
-				 "between your old and new versions so this\n"
-				 "cluster cannot currently be upgraded.  You can\n"
-				 "drop the problem columns and restart the upgrade.\n"
-				 "A list of the problem columns is in the file:\n"
-				 "    %s", output_path);
-	}
-	else
-		check_ok();
-}
-
-
-/*
- * old_9_6_check_for_unknown_data_type_usage()
- *	9.6 -> 10
- *	It's no longer allowed to create tables or views with "unknown"-type
- *	columns.  We do not complain about views with such columns, because
- *	they should get silently converted to "text" columns during the DDL
- *	dump and reload; it seems unlikely to be worth making users do that
- *	by hand.  However, if there's a table with such a column, the DDL
- *	reload will fail, so we should pre-detect that rather than failing
- *	mid-upgrade.  Worse, if there's a matview with such a column, the
- *	DDL reload will silently change it to "text" which won't match the
- *	on-disk storage (which is like "cstring").  So we *must* reject that.
- */
-void
-old_9_6_check_for_unknown_data_type_usage(ClusterInfo *cluster)
-{
-	char		output_path[MAXPGPATH];
-
-	prep_status("Checking for invalid \"unknown\" user columns");
-
-	snprintf(output_path, sizeof(output_path), "%s/%s",
-			 log_opts.basedir,
-			 "tables_using_unknown.txt");
-
-	if (check_for_data_type_usage(cluster, "pg_catalog.unknown", output_path))
-	{
-		pg_log(PG_REPORT, "fatal");
-		pg_fatal("Your installation contains the \"unknown\" data type in user tables.\n"
-				 "This data type is no longer allowed in tables, so this\n"
-				 "cluster cannot currently be upgraded.  You can\n"
-				 "drop the problem columns and restart the upgrade.\n"
-				 "A list of the problem columns is in the file:\n"
-				 "    %s", output_path);
-	}
-	else
-		check_ok();
+	return false;
 }

 /*
@ -351,41 +139,6 @@ old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode)
 		check_ok();
 }

-/*
- * old_11_check_for_sql_identifier_data_type_usage()
- *	11 -> 12
- *	In 12, the sql_identifier data type was switched from name to varchar,
- *	which does affect the storage (name is by-ref, but not varlena). This
- *	means user tables using sql_identifier for columns are broken because
- *	the on-disk format is different.
- */
-void
-old_11_check_for_sql_identifier_data_type_usage(ClusterInfo *cluster)
-{
-	char		output_path[MAXPGPATH];
-
-	prep_status("Checking for invalid \"sql_identifier\" user columns");
-
-	snprintf(output_path, sizeof(output_path), "%s/%s",
-			 log_opts.basedir,
-			 "tables_using_sql_identifier.txt");
-
-	if (check_for_data_type_usage(cluster, "information_schema.sql_identifier",
-								  output_path))
-	{
-		pg_log(PG_REPORT, "fatal");
-		pg_fatal("Your installation contains the \"sql_identifier\" data type in user tables.\n"
-				 "The on-disk format for this data type has changed, so this\n"
-				 "cluster cannot currently be upgraded.  You can\n"
-				 "drop the problem columns and restart the upgrade.\n"
-				 "A list of the problem columns is in the file:\n"
-				 "    %s", output_path);
-	}
-	else
-		check_ok();
-}
-
-
 /*
 * report_extension_updates()
 *	Report extensions that should be updated.