Postgres95 1.01 Distribution - Virgin Sources

This commit is contained in:
Marc G. Fournier 1996-07-09 06:22:35 +00:00
commit d31084e9d1
868 changed files with 242656 additions and 0 deletions

48
src/Makefile Normal file
View File

@ -0,0 +1,48 @@
#-------------------------------------------------------------------------
#
# Makefile.inc--
# Build and install postgres.
#
# Copyright (c) 1994, Regents of the University of California
#
#
# IDENTIFICATION
# $Header: /cvsroot/pgsql/src/Makefile,v 1.1.1.1 1996/07/09 06:21:07 scrappy Exp $
#
# NOTES
# objdir - location of the objects and generated files (eg. obj)
#
#-------------------------------------------------------------------------
SUBDIR= backend libpq bin
FIND = find
# assuming gnu tar and split here
TAR = tar
SPLIT = split
ETAGS = etags
XARGS = xargs
ifeq ($(USE_TCL), true)
SUBDIR += libpgtcl
endif
include mk/postgres.subdir.mk
TAGS:
rm -f TAGS; \
for i in backend libpq bin; do \
$(FIND) $$i -name '*.[chyl]' -print | $(XARGS) $(ETAGS) -a ; \
done
# target to generate a backup tar file and split files that can be
# saved to 1.44M floppy
BACKUP:
rm -f BACKUP.filelist BACKUP.tgz; \
$(FIND) . -not -path '*obj/*' -not -path '*data/*' -type f -print > BACKUP.filelist; \
$(TAR) --files-from BACKUP.filelist -c -z -v -f BACKUP.tgz
$(SPLIT) --bytes=1400k BACKUP.tgz pgBACKUP.
.PHONY: TAGS
.PHONY: BACKUP

306
src/Makefile.global Normal file
View File

@ -0,0 +1,306 @@
#-------------------------------------------------------------------------
#
# Makefile.global--
# global configuration for the Makefiles
#
# Copyright (c) 1994, Regents of the University of California
#
#
# IDENTIFICATION
# $Header: /cvsroot/pgsql/src/Attic/Makefile.global,v 1.1.1.1 1996/07/09 06:21:07 scrappy Exp $
#
# NOTES
# This is seen by any Makefiles that include mk/postgres.mk. To
# override the default setting, create a Makefile.custom in this
# directory and put your defines there. (Makefile.custom is included
# at the end of this file.)
#
# If you change any of these defines you probably have to
# gmake clean; gmake
# since no dependecies are created for these. (of course you can
# be crafty and check what files really depend on them and just remake
# those).
#
#-------------------------------------------------------------------------
##############################################################################
#
# CONFIGURATION SECTION
#
# Following are settings pertaining to the postgres build and
# installation. The most important one is obviously the name
# of the port.
# The name of the port. Valid choices are:
# alpha - DEC Alpha AXP on OSF/1 2.0
# hpux - HP PA-RISC on HP-UX 9.0
# sparc_solaris - SUN SPARC on Solaris 2.4
# sparc - SUN SPARC on SunOS 4.1.3
# ultrix4 - DEC MIPS on Ultrix 4.4
# linux - Intel x86 on Linux 1.2 and Linux ELF
# (For non-ELF Linux, you need to comment out
# "LINUX_ELF=1" in src/mk/port/postgres.mk.linux)
# BSD44_derived - OSs derived from 4.4-lite BSD (NetBSD, FreeBSD)
# bsdi - BSD/OS 2.0 and 2.01
# aix - IBM on AIX 3.2.5
# irix5 - SGI MIPS on IRIX 5.3
# Some hooks are provided for
# svr4 - Intel x86 on Intel SVR4
# next - Motorola MC68K or Intel x86 on NeXTSTEP 3.2
# but these are guaranteed not to work as of yet.
#
# XXX Note that you MUST set PORTNAME here (or on the command line) so
# that port-dependent variables are correctly set within this file.
# Makefile.custom does not take effect (for ifeq purposes)
# until after this file is processed!
# make sure that you have no whitespaces after the PORTNAME setting
# or the makefiles can get confused
PORTNAME= alpha
# POSTGRESLOGIN is the login name of the user who gets special
# privileges within the database. By default it is "postgres", but
# you can change it to any existing login name (such as your own
# login if you are compiling a private version or don't have root
# access).
POSTGRESLOGIN= postgres
# For convenience, POSTGRESDIR is where DATADIR, BINDIR, and LIBDIR
# and other target destinations are rooted. Of course, each of these is
# changable separately.
POSTGRESDIR= /private/postgres95
# SRCDIR specifies where the source files are.
SRCDIR= $(POSTGRESDIR)/src
# DATADIR specifies where the postmaster expects to find its database.
# This may be overridden by command line options or the PGDATA environment
# variable.
DATADIR= $(POSTGRESDIR)/data
# Where the postgres executables live (changeable by just putting them
# somewhere else and putting that directory in your shell PATH)
BINDIR= $(POSTGRESDIR)/bin
# Where libpq.a gets installed. You must put it where your loader will
# look for it if you wish to use the -lpq convention. Otherwise you
# can just put the absolute pathname to the library at the end of your
# command line.
LIBDIR= $(POSTGRESDIR)/lib
# This is the directory where IPC utilities ipcs and ipcrm are located
#
IPCSDIR= /usr/bin
# Where the man pages (suitable for use with "man") get installed.
POSTMANDIR= $(POSTGRESDIR)/man
# Where the formatted documents (e.g., the reference manual) get installed.
POSTDOCDIR= $(POSTGRESDIR)/doc
# Where the header files necessary to build frontend programs get installed.
HEADERDIR= $(POSTGRESDIR)/include
# NAMEDATALEN is the max length for system identifiers (e.g. table names,
# attribute names, function names, etc.)
#
# These MUST be set here. DO NOT COMMENT THESE OUT
# Setting these too high will result in excess space usage for system catalogs
# Setting them too low will make the system unusable.
# values between 16 and 64 that are multiples of four are recommended.
#
# NOTE also that databases with different NAMEDATALEN's cannot interoperate!
#
NAMEDATALEN = 32
# OIDNAMELEN should be set to NAMEDATALEN + sizeof(Oid)
OIDNAMELEN = 36
CFLAGS+= -DNAMEDATALEN=$(NAMEDATALEN) -DOIDNAMELEN=$(OIDNAMELEN)
##############################################################################
#
# FEATURES
#
# To disable a feature, comment out the entire definition
# (that is, prepend '#', don't set it to "0" or "no").
# Comment out ENFORCE_ALIGNMENT if you do NOT want unaligned access to
# multi-byte types to generate a bus error.
ENFORCE_ALIGNMENT= true
# Comment out CDEBUG to turn off debugging and sanity-checking.
#
# XXX on MIPS, use -g3 if you want to compile with -O
CDEBUG= -g
# turn this on if you prefer European style dates instead of American
# style dates
# EUROPEAN_DATES = 1
# Comment out PROFILE to disable profiling.
#
# XXX define on MIPS if you want to be able to use pixie.
# note that this disables dynamic loading!
#PROFILE= -p -non_shared
# About the use of readline in psql:
# psql does not require the GNU readline and history libraries. Hence, we
# do not compile with them by default. However, there are hooks in the
# program which supports the use of GNU readline and history. Should you
# decide to use them, change USE_READLINE to true and change READLINE_INCDIR
# and READLINE_LIBDIR to reflect the location of the readline and histroy
# headers and libraries.
#
#USE_READLINE= true
# directories for the readline and history libraries.
READLINE_INCDIR= /usr/local/include
HISTORY_INCDIR= /usr/local/include
READLINE_LIBDIR= /usr/local/lib
HISTORY_LIBDIR= /usr/local/lib
# If you do not plan to use Host based authentication,
# comment out the following line
HBA = 1
ifdef HBA
HBAFLAGS= -DHBA
endif
# If you plan to use Kerberos for authentication...
#
# Comment out KRBVERS if you do not use Kerberos.
# Set KRBVERS to "4" for Kerberos v4, "5" for Kerberos v5.
# XXX Edit the default Kerberos variables below!
#
#KRBVERS= 5
# Globally pass Kerberos file locations.
# these are used in the postmaster and all libpq applications.
#
# Adjust KRBINCS and KRBLIBS to reflect where you have Kerberos
# include files and libraries installed.
# PG_KRB_SRVNAM is the name under which POSTGRES is registered in
# the Kerberos database (KDC).
# PG_KRB_SRVTAB is the location of the server's keytab file.
#
ifdef KRBVERS
KRBINCS= -I/usr/athena/include
KRBLIBS= -L/usr/athena/lib
KRBFLAGS+= $(KRBINCS) -DPG_KRB_SRVNAM='"postgres_dbms"'
ifeq ($(KRBVERS), 4)
KRBFLAGS+= -DKRB4
KRBFLAGS+= -DPG_KRB_SRVTAB='"/etc/srvtab"'
KRBLIBS+= -lkrb -ldes
else
ifeq ($(KRBVERS), 5)
KRBFLAGS+= -DKRB5
KRBFLAGS+= -DPG_KRB_SRVTAB='"FILE:/krb5/srvtab.postgres"'
KRBLIBS+= -lkrb5 -lcrypto -lcom_err -lisode
endif
endif
endif
#
# location of Tcl/Tk headers and libraries
#
# Uncomment this to build the tcl utilities.
USE_TCL= true
# customize these to your site's needs
#
TCL_INCDIR= /usr/local/devel/tcl7.4/include
TCL_LIBDIR= /usr/local/devel/tcl7.4/lib
TCL_LIB = -ltcl7.4
TK_INCDIR= /usr/local/devel/tk4.0/include
TK_LIBDIR= /usr/local/devel/tk4.0/lib
TK_LIB = -ltk4.0
#
# include port specific rules and variables. For instance:
#
# signal(2) handling - this is here because it affects some of
# the frontend commands as well as the backend server.
#
# Ultrix and SunOS provide BSD signal(2) semantics by default.
#
# SVID2 and POSIX signal(2) semantics differ from BSD signal(2)
# semantics. We can use the POSIX sigaction(2) on systems that
# allow us to request restartable signals (SA_RESTART).
#
# Some systems don't allow restartable signals at all unless we
# link to a special BSD library.
#
# We devoutly hope that there aren't any systems that provide
# neither POSIX signals nor BSD signals. The alternative
# is to do signal-handler reinstallation, which doesn't work well
# at all.
#
-include $(MKDIR)/port/postgres.mk.$(PORTNAME)
##############################################################################
#
# Flags for CC and LD. (depend on CDEBUG and PROFILE)
#
# Globally pass debugging/optimization/profiling flags based
# on the options selected above.
ifdef CDEBUG
CFLAGS+= $(CDEBUG)
LDFLAGS+= $(CDEBUG)
else
ifndef CFLAGS_OPT
CFLAGS_OPT= -O
endif
CFLAGS+= $(CFLAGS_OPT)
#
# Uncommenting this will make things go a LOT faster, but you will
# also lose a lot of useful error-checking.
#
CFLAGS+= -DNO_ASSERT_CHECKING
endif
ifdef PROFILE
CFLAGS+= $(PROFILE)
LDFLAGS+= $(PROFILE)
endif
# Globally pass PORTNAME
CFLAGS+= -DPORTNAME_$(PORTNAME)
# Globally pass the default TCP port for postmaster(1).
CFLAGS+= -DPOSTPORT='"5432"'
# include flags from mk/port/postgres.mk.$(PORTNAME)
CFLAGS+= $(CFLAGS_BE)
LDADD+= $(LDADD_BE)
LDFLAGS+= $(LDFLAGS_BE)
##############################################################################
#
# Miscellaneous configuration
#
# This is the time, in seconds, at which a given backend server
# will wait on a lock before deciding to abort the transaction
# (this is what we do in lieu of deadlock detection).
#
# Low numbers are not recommended as they will tend to cause
# false aborts if many transactions are long-lived.
CFLAGS+= -DDEADLOCK_TIMEOUT=60
srcdir= $(SRCDIR)
includedir= $(HEADERDIR)
objdir= obj
##############################################################################
#
# Customization.
#
-include $(MKDIR)/../Makefile.custom

289
src/backend/Makefile Normal file
View File

@ -0,0 +1,289 @@
#-------------------------------------------------------------------------
#
# Makefile--
# Makefile for the postgres backend (and the postmaster)
#
# Copyright (c) 1994, Regents of the University of California
#
#
# IDENTIFICATION
# $Header: /cvsroot/pgsql/src/backend/Makefile,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $
#
#-------------------------------------------------------------------------
#
# The following turns on intermediate linking of partial objects to speed
# the link cycle during development. (To turn this off, put "BIGOBJS=false"
# in your custom makefile, ../Makefile.custom.)
BIGOBJS= true
PROG= postgres
MKDIR= ../mk
include $(MKDIR)/postgres.mk
include $(CURDIR)/access/Makefile.inc
include $(CURDIR)/bootstrap/Makefile.inc
include $(CURDIR)/catalog/Makefile.inc
include $(CURDIR)/commands/Makefile.inc
include $(CURDIR)/executor/Makefile.inc
include $(CURDIR)/include/Makefile.inc
include $(CURDIR)/lib/Makefile.inc
include $(CURDIR)/libpq/Makefile.inc
include $(CURDIR)/main/Makefile.inc
include $(CURDIR)/nodes/Makefile.inc
include $(CURDIR)/optimizer/Makefile.inc
include $(CURDIR)/parser/Makefile.inc
include $(CURDIR)/port/Makefile.inc
include $(CURDIR)/postmaster/Makefile.inc
include $(CURDIR)/regex/Makefile.inc
include $(CURDIR)/rewrite/Makefile.inc
include $(CURDIR)/storage/Makefile.inc
include $(CURDIR)/tcop/Makefile.inc
include $(CURDIR)/tioga/Makefile.inc
include $(CURDIR)/utils/Makefile.inc
SRCS:= ${SRCS_ACCESS} ${SRCS_BOOTSTRAP} $(SRCS_CATALOG) ${SRCS_COMMANDS} \
${SRCS_EXECUTOR} $(SRCS_LIB) $(SRCS_LIBPQ) ${SRCS_MAIN} \
${SRCS_NODES} ${SRCS_OPTIMIZER} ${SRCS_PARSER} ${SRCS_PORT} \
$(SRCS_POSTMASTER) ${SRCS_REGEX} ${SRCS_REWRITE} ${SRCS_STORAGE} \
${SRCS_TCOP} ${SRCS_UTILS}
ifeq ($(BIGOBJS), true)
OBJS= ACCESS.o BOOTSTRAP.o COMMANDS.o EXECUTOR.o MAIN.o MISC.o NODES.o \
PARSER.o OPTIMIZER.o REGEX.o REWRITE.o STORAGE.o TCOP.o UTILS.o
CLEANFILES+= $(subst .s,.o,$(SRCS:.c=.o)) $(OBJS)
else
OBJS:= $(subst .s,.o,$(SRCS:%.c=$(objdir)/%.o))
CLEANFILES+= $(notdir $(OBJS))
endif
#############################################################################
#
# TIOGA stuff
#
ifdef TIOGA
SRCS+= $(SRCS_TIOGA)
ifeq ($(BIGOBJS), true)
TIOGA.o: $(SRCS_TIOGA:%.c=$(objdir)/%.o)
$(make_partial)
OBJS+= TIOGA.o
CLEANFILES+= $(SRCS_TIOGA:%.c=%.o) TIOGA.o
else
OBJS+= $(SRCS_TIOGA:%.c=$(objdir)/%.o)
endif
endif
#############################################################################
#
# Compiling the postgres backend.
#
CFLAGS+= -DPOSTGRESDIR='"$(POSTGRESDIR)"' \
-DPGDATADIR='"$(DATADIR)"' \
-I$(CURDIR)/. -I$(CURDIR)/$(objdir) \
-I$(CURDIR)/include \
-I$(CURDIR)/port/$(PORTNAME)
# turn this on if you prefer European style dates instead of American
# style dates
ifdef EUROPEAN_DATES
CFLAGS += -DEUROPEAN_STYLE
endif
# kerberos flags
ifdef KRBVERS
CFLAGS+= $(KRBFLAGS)
LDADD+= $(KRBLIBS)
endif
# host based access flags
ifdef HBA
CFLAGS+= $(HBAFLAGS)
endif
#
# All systems except NEXTSTEP require the math library.
# Loader flags for system-dependent libraries are appended in
# src/backend/port/$(PORTNAME)/Makefile.inc
#
ifneq ($(PORTNAME), next)
LDADD+= -lm
endif
# statically link in libc for linux
ifeq ($(PORTNAME), linux)
LDADD+= -lc
endif
postgres: $(POSTGRES_DEPEND) $(OBJS) $(EXPORTS)
$(CC) $(LDFLAGS) -o $(objdir)/$(@F) $(addprefix $(objdir)/,$(notdir $(OBJS))) $(LDADD)
# Make this target first if you are doing a parallel make.
# The targets in 'first' need to be made sequentially because of dependencies.
# Then, you can make 'all' with parallelism turned on.
first: $(POSTGRES_DEPEND)
#############################################################################
#
# Partial objects for platforms with slow linkers.
#
ifeq ($(BIGOBJS), true)
OBJS_ACCESS:= $(SRCS_ACCESS:%.c=$(objdir)/%.o)
OBJS_BOOTSTRAP:= $(SRCS_BOOTSTRAP:%.c=$(objdir)/%.o)
OBJS_CATALOG:= $(SRCS_CATALOG:%.c=$(objdir)/%.o)
OBJS_COMMANDS:= $(SRCS_COMMANDS:%.c=$(objdir)/%.o)
OBJS_EXECUTOR:= $(SRCS_EXECUTOR:%.c=$(objdir)/%.o)
OBJS_MAIN:= $(SRCS_MAIN:%.c=$(objdir)/%.o)
OBJS_POSTMASTER:= $(SRCS_POSTMASTER:%.c=$(objdir)/%.o)
OBJS_LIB:= $(SRCS_LIB:%.c=$(objdir)/%.o)
OBJS_LIBPQ:= $(SRCS_LIBPQ:%.c=$(objdir)/%.o)
OBJS_PORT:= $(addprefix $(objdir)/,$(subst .s,.o,$(SRCS_PORT:.c=.o)))
OBJS_NODES:= $(SRCS_NODES:%.c=$(objdir)/%.o)
OBJS_PARSER:= $(SRCS_PARSER:%.c=$(objdir)/%.o)
OBJS_OPTIMIZER:= $(SRCS_OPTIMIZER:%.c=$(objdir)/%.o)
OBJS_REGEX:= $(SRCS_REGEX:%.c=$(objdir)/%.o)
OBJS_REWRITE:= $(SRCS_REWRITE:%.c=$(objdir)/%.o)
OBJS_STORAGE:= $(SRCS_STORAGE:%.c=$(objdir)/%.o)
OBJS_TCOP:= $(SRCS_TCOP:%.c=$(objdir)/%.o)
OBJS_UTILS:= $(SRCS_UTILS:%.c=$(objdir)/%.o)
ACCESS.o: $(OBJS_ACCESS)
$(make_partial)
BOOTSTRAP.o: $(OBJS_BOOTSTRAP)
$(make_partial)
COMMANDS.o: $(OBJS_COMMANDS)
$(make_partial)
EXECUTOR.o: $(OBJS_EXECUTOR)
$(make_partial)
MAIN.o: $(OBJS_MAIN) $(OBJS_POSTMASTER)
$(make_partial)
MISC.o: $(OBJS_CATALOG) $(OBJS_LIB) $(OBJS_LIBPQ) $(OBJS_PORT)
$(make_partial)
NODES.o: $(OBJS_NODES)
$(make_partial)
PARSER.o: $(OBJS_PARSER)
$(make_partial)
OPTIMIZER.o: $(OBJS_OPTIMIZER)
$(make_partial)
REGEX.o: $(OBJS_REGEX)
$(make_partial)
REWRITE.o: $(OBJS_REWRITE)
$(make_partial)
STORAGE.o: $(OBJS_STORAGE)
$(make_partial)
TCOP.o: $(OBJS_TCOP)
$(make_partial)
UTILS.o: $(OBJS_UTILS)
$(make_partial)
endif
#############################################################################
#
# Installation.
#
# Install the bki files to the data directory. We also copy a version
# of them that has "PGUID" intact, so one can change the value of the
# postgres userid before running initdb in the case of customizing the
# binary release (i.e., fixing up PGUID w/o recompiling the system).
# Those files are copied out as foo.source. The program newbki(1) can
# be run later to reset the postgres login id (but it must be run before
# initdb is run, or after clearing the data directory with
# cleardbdir(1)). [newbki distributed with v4r2 but not with Postgres95.]
#
# NAMEDATALEN=`egrep "^#define NAMEDATALEN" $(CURDIR)/include/postgres.h | awk '{print $$3}'`; \
# OIDNAMELEN=`egrep "^#define OIDNAMELEN" $(CURDIR)/include/postgres.h | awk '{print $$3}'`; \
install: beforeinstall pg_id $(BKIFILES) postgres
$(INSTALL) $(INSTL_EXE_OPTS) $(objdir)/postgres $(DESTDIR)$(BINDIR)/postgres
@rm -f $(DESTDIR)$(BINDIR)/postmaster
cd $(DESTDIR)$(BINDIR); ln -s postgres postmaster
@cd $(objdir); \
PG_UID=`./pg_id $(POSTGRESLOGIN)`; \
POSTGRESLOGIN=$(POSTGRESLOGIN);\
echo "NAMEDATALEN = $(NAMEDATALEN)"; \
echo "OIDNAMELEN = $(OIDNAMELEN)"; \
case $$PG_UID in "NOUSER") \
echo "Warning: no account named $(POSTGRESLOGIN), using yours";\
POSTGRESLOGIN=`whoami`; \
PG_UID=`./pg_id`;; \
esac ;\
for bki in $(BKIFILES); do \
sed \
-e "s/postgres PGUID/$$POSTGRESLOGIN $$PG_UID/" \
-e "s/NAMEDATALEN/$(NAMEDATALEN)/g" \
-e "s/OIDNAMELEN/$(OIDNAMELEN)/g" \
-e "s/PGUID/$$PG_UID/" \
< $$bki > $$bki.sed ; \
echo "Installing $(DESTDIR)$(DATADIR)/files/$$bki."; \
$(INSTALL) $(INSTLOPTS) \
$$bki.sed $(DESTDIR)$(DATADIR)/files/$$bki; \
rm -f $$bki.sed; \
echo "Installing $(DESTDIR)$(DATADIR)/files/$$bki.source."; \
$(INSTALL) $(INSTLOPTS) \
$$bki $(DESTDIR)$(DATADIR)/files/$$bki.source; \
done;
@echo "Installing $(DATADIR)/pg_hba";
@cp $(srcdir)/libpq/pg_hba $(DATADIR)
@chmod 644 $(DATADIR)/pg_hba
# so we can get the UID of the postgres owner (w/o moving pg_id to
# src/tools). We just want the vanilla LDFLAGS for pg_id
IDLDFLAGS:= $(LDFLAGS)
ifeq ($(PORTNAME), hpux)
ifeq ($(CC), cc)
IDLDFLAGS+= -Aa -D_HPUX_SOURCE
endif
endif
pg_id: $(srcdir)/bin/pg_id/pg_id.c
$(CC) $(IDLDFLAGS) -o $(objdir)/$(@F) $<
CLEANFILES+= pg_id postgres
#############################################################################
#
# Support for code development.
#
#
# Build the file, "./ID", used by the "gid" (grep-for-identifier) tool
#
IDFILE= ID
.PHONY: $(IDFILE)
$(IDFILE):
$(CURDIR)/makeID $(PORTNAME)
#
# Special rule to generate cpp'd version of a .c file. This is
# especially useful given all the hellish macro processing going on.
# The cpp'd version has a .C suffix. To create foo.C from foo.c, just
# type
# bmake foo.C
#
%.cpp: %.c
$(CC) -E $(CFLAGS) $(<:.C=.c) | cat -s | cb | tr -s '\012*' '\012' > $(objdir)/$(@F)
cppall: $(SRCS:.c=.cpp)
#
# To use Purify (SunOS only), define PURIFY to be the path (and
# options) with which to invoke the Purify loader. Only the executable
# needs to be loaded with Purify.
#
# PURIFY = /usr/sww/bin/purify -cache-dir=/usr/local/postgres/src/backend/purify-cache
#.if defined(PURIFY)
#${PROG}: $(POSTGRES_DEPEND) $(OBJS) $(EXPORTS)
# ${PURIFY} ${CC} ${LDFLAGS} -o $(objdir)/$(@F) $(addprefix $(objdir)/,$(notdir $(OBJS))) $(LDADD)
#
#CLEANFILES+= .purify* .pure .lock.*.o *_pure_*.o *.pure_*link*
#.endif

View File

@ -0,0 +1,35 @@
#-------------------------------------------------------------------------
#
# Makefile.inc--
# Makefile for the access methods module
#
# Copyright (c) 1994, Regents of the University of California
#
#
# IDENTIFICATION
# $Header: /cvsroot/pgsql/src/backend/access/Attic/Makefile.inc,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $
#
#-------------------------------------------------------------------------
accdir=$(CURDIR)/access
VPATH:=$(VPATH):$(accdir):\
$(accdir)/common:$(accdir)/hash:$(accdir)/heap:$(accdir)/index:\
$(accdir)/rtree:$(accdir)/nbtree:$(accdir)/transam
SUBSRCS=
include $(accdir)/common/Makefile.inc
include $(accdir)/hash/Makefile.inc
include $(accdir)/heap/Makefile.inc
include $(accdir)/index/Makefile.inc
include $(accdir)/rtree/Makefile.inc
include $(accdir)/nbtree/Makefile.inc
include $(accdir)/transam/Makefile.inc
SRCS_ACCESS:= $(SUBSRCS)
HEADERS+= attnum.h funcindex.h genam.h hash.h \
heapam.h hio.h htup.h ibit.h iqual.h istrat.h \
itup.h nbtree.h printtup.h relscan.h rtree.h \
sdir.h skey.h strat.h transam.h tupdesc.h tupmacs.h \
valid.h xact.h

View File

@ -0,0 +1,61 @@
/*-------------------------------------------------------------------------
*
* attnum.h--
* POSTGRES attribute number definitions.
*
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: attnum.h,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef ATTNUM_H
#define ATTNUM_H
#include "c.h"
/*
* user defined attribute numbers start at 1. -ay 2/95
*/
typedef int16 AttrNumber;
#define InvalidAttrNumber 0
/* ----------------
* support macros
* ----------------
*/
/*
* AttributeNumberIsValid --
* True iff the attribute number is valid.
*/
#define AttributeNumberIsValid(attributeNumber) \
((bool) ((attributeNumber) != InvalidAttrNumber))
/*
* AttrNumberIsForUserDefinedAttr --
* True iff the attribute number corresponds to an user defined attribute.
*/
#define AttrNumberIsForUserDefinedAttr(attributeNumber) \
((bool) ((attributeNumber) > 0))
/*
* AttrNumberGetAttrOffset --
* Returns the attribute offset for an attribute number.
*
* Note:
* Assumes the attribute number is for an user defined attribute.
*/
#define AttrNumberGetAttrOffset(attNum) \
(AssertMacro(AttrNumberIsForUserDefinedAttr(attNum)) ? \
((attNum - 1)) : 0)
/*
* AttributeOffsetGetAttributeNumber --
* Returns the attribute number for an attribute offset.
*/
#define AttrOffsetGetAttrNumber(attributeOffset) \
((AttrNumber) (1 + attributeOffset))
#endif /* ATTNUM_H */

View File

@ -0,0 +1,16 @@
#-------------------------------------------------------------------------
#
# Makefile.inc--
# Makefile for access/common
#
# Copyright (c) 1994, Regents of the University of California
#
#
# IDENTIFICATION
# $Header: /cvsroot/pgsql/src/backend/access/common/Attic/Makefile.inc,v 1.1.1.1 1996/07/09 06:21:09 scrappy Exp $
#
#-------------------------------------------------------------------------
SUBSRCS+= heaptuple.c heapvalid.c indextuple.c indexvalid.c printtup.c \
scankey.c tupdesc.c

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,134 @@
/*-------------------------------------------------------------------------
*
* heapvalid.c--
* heap tuple qualification validity checking code
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/common/Attic/heapvalid.c,v 1.1.1.1 1996/07/09 06:21:09 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#include "c.h"
#include "access/htup.h"
#include "access/skey.h"
#include "access/heapam.h"
#include "utils/tqual.h"
#include "access/valid.h" /* where the declarations go */
#include "access/xact.h"
#include "storage/buf.h"
#include "storage/bufmgr.h"
#include "storage/bufpage.h"
#include "storage/itemid.h"
#include "fmgr.h"
#include "utils/elog.h"
#include "utils/rel.h"
/* ----------------
* heap_keytest
*
* Test a heap tuple with respect to a scan key.
* ----------------
*/
bool
heap_keytest(HeapTuple t,
TupleDesc tupdesc,
int nkeys,
ScanKey keys)
{
bool isnull;
Datum atp;
int test;
for (; nkeys--; keys++) {
atp = (Datum)heap_getattr(t, InvalidBuffer,
keys->sk_attno,
tupdesc,
&isnull);
if (isnull)
/* XXX eventually should check if SK_ISNULL */
return false;
if (keys->sk_flags & SK_COMMUTE)
test = (long) FMGR_PTR2(keys->sk_func, keys->sk_procedure,
keys->sk_argument, atp);
else
test = (long) FMGR_PTR2(keys->sk_func, keys->sk_procedure,
atp, keys->sk_argument);
if (!test == !(keys->sk_flags & SK_NEGATE))
return false;
}
return true;
}
/* ----------------
* heap_tuple_satisfies
*
* Returns a valid HeapTuple if it satisfies the timequal and keytest.
* Returns NULL otherwise. Used to be heap_satisifies (sic) which
* returned a boolean. It now returns a tuple so that we can avoid doing two
* PageGetItem's per tuple.
*
* Complete check of validity including LP_CTUP and keytest.
* This should perhaps be combined with valid somehow in the
* future. (Also, additional rule tests/time range tests.)
*
* on 8/21/92 mao says: i rearranged the tests here to do keytest before
* SatisfiesTimeQual. profiling indicated that even for vacuumed relations,
* time qual checking was more expensive than key testing. time qual is
* least likely to fail, too. we should really add the time qual test to
* the restriction and optimize it in the normal way. this has interactions
* with joey's expensive function work.
* ----------------
*/
HeapTuple
heap_tuple_satisfies(ItemId itemId,
Relation relation,
PageHeader disk_page,
TimeQual qual,
int nKeys,
ScanKey key)
{
HeapTuple tuple;
bool res;
if (! ItemIdIsUsed(itemId))
return NULL;
tuple = (HeapTuple) PageGetItem((Page) disk_page, itemId);
if (key != NULL)
res = heap_keytest(tuple, RelationGetTupleDescriptor(relation),
nKeys, key);
else
res = TRUE;
if (res && (relation->rd_rel->relkind == RELKIND_UNCATALOGED
|| HeapTupleSatisfiesTimeQual(tuple,qual)))
return tuple;
return (HeapTuple) NULL;
}
/*
* TupleUpdatedByCurXactAndCmd() -- Returns true if this tuple has
* already been updated once by the current transaction/command
* pair.
*/
bool
TupleUpdatedByCurXactAndCmd(HeapTuple t)
{
if (TransactionIdEquals(t->t_xmax,
GetCurrentTransactionId()) &&
t->t_cmax == GetCurrentCommandId())
return true;
return false;
}

View File

@ -0,0 +1,427 @@
/*-------------------------------------------------------------------------
*
* indextuple.c--
* This file contains index tuple accessor and mutator routines,
* as well as a few various tuple utilities.
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/common/indextuple.c,v 1.1.1.1 1996/07/09 06:21:09 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#include <string.h>
#include "c.h"
#include "access/ibit.h"
#include "access/itup.h" /* where the declarations go */
#include "access/heapam.h"
#include "access/genam.h"
#include "access/tupdesc.h"
#include "access/tupmacs.h"
#include "storage/itemptr.h"
#include "utils/elog.h"
#include "utils/palloc.h"
static Size IndexInfoFindDataOffset(unsigned short t_info);
/* ----------------------------------------------------------------
* index_ tuple interface routines
* ----------------------------------------------------------------
*/
/* ----------------
* index_formtuple
* ----------------
*/
IndexTuple
index_formtuple(TupleDesc tupleDescriptor,
Datum value[],
char null[])
{
register char *tp; /* tuple pointer */
IndexTuple tuple; /* return tuple */
Size size, hoff;
int i;
unsigned short infomask = 0;
bool hasnull = false;
char tupmask = 0;
int numberOfAttributes = tupleDescriptor->natts;
if (numberOfAttributes > MaxIndexAttributeNumber)
elog(WARN, "index_formtuple: numberOfAttributes of %d > %d",
numberOfAttributes, MaxIndexAttributeNumber);
for (i = 0; i < numberOfAttributes && !hasnull; i++) {
if (null[i] != ' ') hasnull = true;
}
if (hasnull) infomask |= INDEX_NULL_MASK;
hoff = IndexInfoFindDataOffset(infomask);
size = hoff
+ ComputeDataSize(tupleDescriptor,
value, null);
size = DOUBLEALIGN(size); /* be conservative */
tp = (char *) palloc(size);
tuple = (IndexTuple) tp;
memset(tp,0,(int)size);
DataFill((char *)tp + hoff,
tupleDescriptor,
value,
null,
&tupmask,
(hasnull ? (bits8*)tp + sizeof(*tuple) : NULL));
/*
* We do this because DataFill wants to initialize a "tupmask" which
* is used for HeapTuples, but we want an indextuple infomask. The only
* "relevent" info is the "has variable attributes" field, which is in
* mask position 0x02. We have already set the null mask above.
*/
if (tupmask & 0x02) infomask |= INDEX_VAR_MASK;
/*
* Here we make sure that we can actually hold the size. We also want
* to make sure that size is not aligned oddly. This actually is a
* rather odd way to make sure the size is not too large overall.
*/
if (size & 0xE000)
elog(WARN, "index_formtuple: data takes %d bytes: too big", size);
infomask |= size;
/* ----------------
* initialize metadata
* ----------------
*/
tuple->t_info = infomask;
return (tuple);
}
/* ----------------
* fastgetiattr
*
* This is a newer version of fastgetiattr which attempts to be
* faster by caching attribute offsets in the attribute descriptor.
*
* an alternate way to speed things up would be to cache offsets
* with the tuple, but that seems more difficult unless you take
* the storage hit of actually putting those offsets into the
* tuple you send to disk. Yuck.
*
* This scheme will be slightly slower than that, but should
* preform well for queries which hit large #'s of tuples. After
* you cache the offsets once, examining all the other tuples using
* the same attribute descriptor will go much quicker. -cim 5/4/91
* ----------------
*/
char *
fastgetiattr(IndexTuple tup,
int attnum,
TupleDesc tupleDesc,
bool *isnull)
{
register char *tp; /* ptr to att in tuple */
register char *bp; /* ptr to att in tuple */
int slow; /* do we have to walk nulls? */
register int data_off; /* tuple data offset */
/* ----------------
* sanity checks
* ----------------
*/
Assert(PointerIsValid(isnull));
Assert(attnum > 0);
/* ----------------
* Three cases:
*
* 1: No nulls and no variable length attributes.
* 2: Has a null or a varlena AFTER att.
* 3: Has nulls or varlenas BEFORE att.
* ----------------
*/
*isnull = false;
data_off = IndexTupleHasMinHeader(tup) ? sizeof *tup :
IndexInfoFindDataOffset(tup->t_info);
if (IndexTupleNoNulls(tup)) {
/* first attribute is always at position zero */
if (attnum == 1) {
return(fetchatt(&(tupleDesc->attrs[0]), (char *) tup + data_off));
}
attnum--;
if (tupleDesc->attrs[attnum]->attcacheoff > 0) {
return(fetchatt(&(tupleDesc->attrs[attnum]),
(char *) tup + data_off +
tupleDesc->attrs[attnum]->attcacheoff));
}
tp = (char *) tup + data_off;
slow = 0;
}else { /* there's a null somewhere in the tuple */
bp = (char *) tup + sizeof(*tup); /* "knows" t_bits are here! */
slow = 0;
/* ----------------
* check to see if desired att is null
* ----------------
*/
attnum--;
{
if (att_isnull(attnum, bp)) {
*isnull = true;
return NULL;
}
}
/* ----------------
* Now check to see if any preceeding bits are null...
* ----------------
*/
{
register int i = 0; /* current offset in bp */
register int mask; /* bit in byte we're looking at */
register char n; /* current byte in bp */
register int byte, finalbit;
byte = attnum >> 3;
finalbit = attnum & 0x07;
for (; i <= byte; i++) {
n = bp[i];
if (i < byte) {
/* check for nulls in any "earlier" bytes */
if ((~n) != 0) {
slow++;
break;
}
} else {
/* check for nulls "before" final bit of last byte*/
mask = (finalbit << 1) - 1;
if ((~n) & mask)
slow++;
}
}
}
tp = (char *) tup + data_off;
}
/* now check for any non-fixed length attrs before our attribute */
if (!slow) {
if (tupleDesc->attrs[attnum]->attcacheoff > 0) {
return(fetchatt(&(tupleDesc->attrs[attnum]),
tp + tupleDesc->attrs[attnum]->attcacheoff));
}else if (!IndexTupleAllFixed(tup)) {
register int j = 0;
for (j = 0; j < attnum && !slow; j++)
if (tupleDesc->attrs[j]->attlen < 1) slow = 1;
}
}
/*
* if slow is zero, and we got here, we know that we have a tuple with
* no nulls. We also know that we have to initialize the remainder of
* the attribute cached offset values.
*/
if (!slow) {
register int j = 1;
register long off;
/*
* need to set cache for some atts
*/
tupleDesc->attrs[0]->attcacheoff = 0;
while (tupleDesc->attrs[j]->attcacheoff > 0) j++;
off = tupleDesc->attrs[j-1]->attcacheoff +
tupleDesc->attrs[j-1]->attlen;
for (; j < attnum + 1; j++) {
/*
* Fix me when going to a machine with more than a four-byte
* word!
*/
switch(tupleDesc->attrs[j]->attlen)
{
case -1:
off = (tupleDesc->attrs[j]->attalign=='d')?
DOUBLEALIGN(off):INTALIGN(off);
break;
case sizeof(char):
break;
case sizeof(short):
off = SHORTALIGN(off);
break;
case sizeof(int32):
off = INTALIGN(off);
break;
default:
if (tupleDesc->attrs[j]->attlen > sizeof(int32))
off = (tupleDesc->attrs[j]->attalign=='d')?
DOUBLEALIGN(off) : LONGALIGN(off);
else
elog(WARN, "fastgetiattr: attribute %d has len %d",
j, tupleDesc->attrs[j]->attlen);
break;
}
tupleDesc->attrs[j]->attcacheoff = off;
off += tupleDesc->attrs[j]->attlen;
}
return(fetchatt( &(tupleDesc->attrs[attnum]),
tp + tupleDesc->attrs[attnum]->attcacheoff));
}else {
register bool usecache = true;
register int off = 0;
register int i;
/*
* Now we know that we have to walk the tuple CAREFULLY.
*/
for (i = 0; i < attnum; i++) {
if (!IndexTupleNoNulls(tup)) {
if (att_isnull(i, bp)) {
usecache = false;
continue;
}
}
if (usecache && tupleDesc->attrs[i]->attcacheoff > 0) {
off = tupleDesc->attrs[i]->attcacheoff;
if (tupleDesc->attrs[i]->attlen == -1)
usecache = false;
else
continue;
}
if (usecache) tupleDesc->attrs[i]->attcacheoff = off;
switch(tupleDesc->attrs[i]->attlen)
{
case sizeof(char):
off++;
break;
case sizeof(short):
off = SHORTALIGN(off) + sizeof(short);
break;
case -1:
usecache = false;
off = (tupleDesc->attrs[i]->attalign=='d')?
DOUBLEALIGN(off):INTALIGN(off);
off += VARSIZE(tp + off);
break;
default:
if (tupleDesc->attrs[i]->attlen > sizeof(int32))
off = (tupleDesc->attrs[i]->attalign=='d') ?
DOUBLEALIGN(off) + tupleDesc->attrs[i]->attlen :
LONGALIGN(off) + tupleDesc->attrs[i]->attlen;
else
elog(WARN, "fastgetiattr2: attribute %d has len %d",
i, tupleDesc->attrs[i]->attlen);
break;
}
}
return(fetchatt(&tupleDesc->attrs[attnum], tp + off));
}
}
/* ----------------
* index_getattr
* ----------------
*/
Datum
index_getattr(IndexTuple tuple,
AttrNumber attNum,
TupleDesc tupDesc,
bool *isNullOutP)
{
Assert (attNum > 0);
return (Datum)
fastgetiattr(tuple, attNum, tupDesc, isNullOutP);
}
RetrieveIndexResult
FormRetrieveIndexResult(ItemPointer indexItemPointer,
ItemPointer heapItemPointer)
{
RetrieveIndexResult result;
Assert(ItemPointerIsValid(indexItemPointer));
Assert(ItemPointerIsValid(heapItemPointer));
result = (RetrieveIndexResult) palloc(sizeof *result);
result->index_iptr = *indexItemPointer;
result->heap_iptr = *heapItemPointer;
return (result);
}
/*
* Takes an infomask as argument (primarily because this needs to be usable
* at index_formtuple time so enough space is allocated).
*
* Change me if adding an attribute to IndexTuples!!!!!!!!!!!
*/
static Size
IndexInfoFindDataOffset(unsigned short t_info)
{
if (!(t_info & INDEX_NULL_MASK))
return((Size) sizeof(IndexTupleData));
else {
Size size = sizeof(IndexTupleData);
if (t_info & INDEX_NULL_MASK) {
size += sizeof(IndexAttributeBitMapData);
}
return DOUBLEALIGN(size); /* be conservative */
}
}
/*
* Copies source into target. If *target == NULL, we palloc space; otherwise
* we assume we have space that is already palloc'ed.
*/
void
CopyIndexTuple(IndexTuple source, IndexTuple *target)
{
Size size;
IndexTuple ret;
size = IndexTupleSize(source);
if (*target == NULL) {
*target = (IndexTuple) palloc(size);
}
ret = *target;
memmove((char*)ret, (char*)source, size);
}

View File

@ -0,0 +1,84 @@
/*-------------------------------------------------------------------------
*
* indexvalid.c--
* index tuple qualification validity checking code
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/common/Attic/indexvalid.c,v 1.1.1.1 1996/07/09 06:21:09 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "executor/execdebug.h"
#include "access/genam.h"
#include "access/iqual.h" /* where the declarations go */
#include "access/itup.h"
#include "access/skey.h"
#include "storage/buf.h"
#include "storage/bufpage.h"
#include "storage/itemid.h"
#include "utils/rel.h"
/* ----------------------------------------------------------------
* index scan key qualification code
* ----------------------------------------------------------------
*/
int NIndexTupleProcessed;
/* ----------------
* index_keytest
*
* old comments
* May eventually combine with other tests (like timeranges)?
* Should have Buffer buffer; as an argument and pass it to amgetattr.
* ----------------
*/
bool
index_keytest(IndexTuple tuple,
TupleDesc tupdesc,
int scanKeySize,
ScanKey key)
{
bool isNull;
Datum datum;
int test;
IncrIndexProcessed();
while (scanKeySize > 0) {
datum = index_getattr(tuple,
1,
tupdesc,
&isNull);
if (isNull) {
/* XXX eventually should check if SK_ISNULL */
return (false);
}
if (key[0].sk_flags & SK_COMMUTE) {
test = (int) (*(key[0].sk_func))
(DatumGetPointer(key[0].sk_argument),
datum);
} else {
test = (int) (*(key[0].sk_func))
(datum,
DatumGetPointer(key[0].sk_argument));
}
if (!test == !(key[0].sk_flags & SK_NEGATE)) {
return (false);
}
scanKeySize -= 1;
key++;
}
return (true);
}

View File

@ -0,0 +1,306 @@
/*-------------------------------------------------------------------------
*
* printtup.c--
* Routines to print out tuples to the destination (binary or non-binary
* portals, frontend/interactive backend, etc.).
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/common/printtup.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#include <sys/file.h>
#include <stdio.h>
#include <string.h>
#include "postgres.h"
#include "access/heapam.h"
#include "access/htup.h"
#include "access/skey.h"
#include "access/printtup.h"
#include "access/tupdesc.h"
#include "storage/buf.h"
#include "utils/memutils.h"
#include "utils/palloc.h"
#include "fmgr.h"
#include "utils/elog.h"
#include "utils/syscache.h"
#include "catalog/pg_type.h"
#include "libpq/libpq.h"
/* ----------------------------------------------------------------
* printtup / debugtup support
* ----------------------------------------------------------------
*/
/* ----------------
* typtoout - used by printtup and debugtup
* ----------------
*/
Oid
typtoout(Oid type)
{
HeapTuple typeTuple;
typeTuple = SearchSysCacheTuple(TYPOID,
ObjectIdGetDatum(type),
0, 0, 0);
if (HeapTupleIsValid(typeTuple))
return((Oid)
((TypeTupleForm) GETSTRUCT(typeTuple))->typoutput);
elog(WARN, "typtoout: Cache lookup of type %d failed", type);
return(InvalidOid);
}
Oid
gettypelem(Oid type)
{
HeapTuple typeTuple;
typeTuple = SearchSysCacheTuple(TYPOID,
ObjectIdGetDatum(type),
0,0,0);
if (HeapTupleIsValid(typeTuple))
return((Oid)
((TypeTupleForm) GETSTRUCT(typeTuple))->typelem);
elog(WARN, "typtoout: Cache lookup of type %d failed", type);
return(InvalidOid);
}
/* ----------------
* printtup
* ----------------
*/
void
printtup(HeapTuple tuple, TupleDesc typeinfo)
{
int i, j, k;
char *outputstr, *attr;
bool isnull;
Oid typoutput;
/* ----------------
* tell the frontend to expect new tuple data
* ----------------
*/
pq_putnchar("D", 1);
/* ----------------
* send a bitmap of which attributes are null
* ----------------
*/
j = 0;
k = 1 << 7;
for (i = 0; i < tuple->t_natts; ) {
attr = heap_getattr(tuple, InvalidBuffer, ++i, typeinfo, &isnull);
if (!isnull)
j |= k;
k >>= 1;
if (!(i & 7)) {
pq_putint(j, 1);
j = 0;
k = 1 << 7;
}
}
if (i & 7)
pq_putint(j, 1);
/* ----------------
* send the attributes of this tuple
* ----------------
*/
for (i = 0; i < tuple->t_natts; ++i) {
attr = heap_getattr(tuple, InvalidBuffer, i+1, typeinfo, &isnull);
typoutput = typtoout((Oid) typeinfo->attrs[i]->atttypid);
if (!isnull && OidIsValid(typoutput)) {
outputstr = fmgr(typoutput, attr,
gettypelem(typeinfo->attrs[i]->atttypid));
pq_putint(strlen(outputstr)+4, 4);
pq_putnchar(outputstr, strlen(outputstr));
pfree(outputstr);
}
}
}
/* ----------------
* printatt
* ----------------
*/
static void
printatt(unsigned attributeId,
AttributeTupleForm attributeP,
char *value)
{
printf("\t%2d: %.*s%s%s%s\t(typeid = %u, len = %d, byval = %c)\n",
attributeId,
NAMEDATALEN, /* attname is a char16 */
attributeP->attname.data,
value != NULL ? " = \"" : "",
value != NULL ? value : "",
value != NULL ? "\"" : "",
(unsigned int) (attributeP->atttypid),
attributeP->attlen,
attributeP->attbyval ? 't' : 'f');
}
/* ----------------
* showatts
* ----------------
*/
void
showatts(char *name, TupleDesc tupleDesc)
{
int i;
int natts = tupleDesc->natts;
AttributeTupleForm *attinfo = tupleDesc->attrs;
puts(name);
for (i = 0; i < natts; ++i)
printatt((unsigned) i+1, attinfo[i], (char *) NULL);
printf("\t----\n");
}
/* ----------------
* debugtup
* ----------------
*/
void
debugtup(HeapTuple tuple, TupleDesc typeinfo)
{
register int i;
char *attr, *value;
bool isnull;
Oid typoutput;
for (i = 0; i < tuple->t_natts; ++i) {
attr = heap_getattr(tuple, InvalidBuffer, i+1, typeinfo, &isnull);
typoutput = typtoout((Oid) typeinfo->attrs[i]->atttypid);
if (!isnull && OidIsValid(typoutput)) {
value = fmgr(typoutput, attr,
gettypelem(typeinfo->attrs[i]->atttypid));
printatt((unsigned) i+1, typeinfo->attrs[i], value);
pfree(value);
}
}
printf("\t----\n");
}
/*#define IPORTAL_DEBUG*/
/* ----------------
* printtup_internal
* Protocol expects either T, D, C, E, or N.
* We use a different data prefix, e.g. 'B' instead of 'D' to
* indicate a tuple in internal (binary) form.
*
* This is same as printtup, except we don't use the typout func.
* ----------------
*/
void
printtup_internal(HeapTuple tuple, TupleDesc typeinfo)
{
int i, j, k;
char *attr;
bool isnull;
/* ----------------
* tell the frontend to expect new tuple data
* ----------------
*/
pq_putnchar("B", 1);
/* ----------------
* send a bitmap of which attributes are null
* ----------------
*/
j = 0;
k = 1 << 7;
for (i = 0; i < tuple->t_natts; ) {
attr = heap_getattr(tuple, InvalidBuffer, ++i, typeinfo, &isnull);
if (!isnull)
j |= k;
k >>= 1;
if (!(i & 7)) {
pq_putint(j, 1);
j = 0;
k = 1 << 7;
}
}
if (i & 7)
pq_putint(j, 1);
/* ----------------
* send the attributes of this tuple
* ----------------
*/
#ifdef IPORTAL_DEBUG
fprintf(stderr, "sending tuple with %d atts\n", tuple->t_natts);
#endif
for (i = 0; i < tuple->t_natts; ++i) {
int32 len = typeinfo->attrs[i]->attlen;
attr = heap_getattr(tuple, InvalidBuffer, i+1, typeinfo, &isnull);
if (!isnull) {
/* # of bytes, and opaque data */
if (len == -1) {
/* variable length, assume a varlena structure */
len = VARSIZE(attr) - VARHDRSZ;
pq_putint(len, sizeof(int32));
pq_putnchar(VARDATA(attr), len);
#ifdef IPORTAL_DEBUG
{
char *d = VARDATA(attr);
fprintf(stderr, "length %d data %x%x%x%x\n",
len, *d, *(d+1), *(d+2), *(d+3));
}
#endif
} else {
/* fixed size */
if (typeinfo->attrs[i]->attbyval) {
int8 i8;
int16 i16;
int32 i32;
pq_putint(len, sizeof(int32));
switch (len) {
case sizeof(int8):
i8 = DatumGetChar(attr);
pq_putnchar((char *) &i8, len);
break;
case sizeof(int16):
i16 = DatumGetInt16(attr);
pq_putnchar((char *) &i16, len);
break;
case sizeof(int32):
i32 = DatumGetInt32(attr);
pq_putnchar((char *) &i32, len);
break;
}
#ifdef IPORTAL_DEBUG
fprintf(stderr, "byval length %d data %d\n", len, attr);
#endif
} else {
pq_putint(len, sizeof(int32));
pq_putnchar(attr, len);
#ifdef IPORTAL_DEBUG
fprintf(stderr, "byref length %d data %x\n", len, attr);
#endif
}
}
}
}
}

View File

@ -0,0 +1,68 @@
/*-------------------------------------------------------------------------
*
* scan.c--
* scan direction and key code
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/common/scankey.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#include "c.h"
#include "access/sdir.h"
#include "access/attnum.h"
#include "access/skey.h"
#include "fmgr.h"
/*
* ScanKeyEntryIsLegal --
* True iff the scan key entry is legal.
*/
#define ScanKeyEntryIsLegal(entry) \
((bool) (AssertMacro(PointerIsValid(entry)) && \
AttributeNumberIsValid(entry->sk_attno)))
/*
* ScanKeyEntrySetIllegal --
* Marks a scan key entry as illegal.
*/
void
ScanKeyEntrySetIllegal(ScanKey entry)
{
Assert(PointerIsValid(entry));
entry->sk_flags = 0; /* just in case... */
entry->sk_attno = InvalidAttrNumber;
entry->sk_procedure = 0; /* should be InvalidRegProcedure */
}
/*
* ScanKeyEntryInitialize --
* Initializes an scan key entry.
*
* Note:
* Assumes the scan key entry is valid.
* Assumes the intialized scan key entry will be legal.
*/
void
ScanKeyEntryInitialize(ScanKey entry,
bits16 flags,
AttrNumber attributeNumber,
RegProcedure procedure,
Datum argument)
{
Assert(PointerIsValid(entry));
entry->sk_flags = flags;
entry->sk_attno = attributeNumber;
entry->sk_procedure = procedure;
entry->sk_argument = argument;
fmgr_info(procedure, &entry->sk_func, &entry->sk_nargs);
Assert(ScanKeyEntryIsLegal(entry));
}

View File

@ -0,0 +1,398 @@
/*-------------------------------------------------------------------------
*
* tupdesc.c--
* POSTGRES tuple descriptor support code
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/common/tupdesc.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $
*
* NOTES
* some of the executor utility code such as "ExecTypeFromTL" should be
* moved here.
*
*-------------------------------------------------------------------------
*/
#include <stdio.h> /* for sprintf() */
#include <ctype.h>
#include <string.h>
#include "postgres.h"
#include "nodes/pg_list.h"
#include "nodes/parsenodes.h"
#include "access/attnum.h"
#include "access/htup.h"
#include "access/tupdesc.h"
#include "utils/builtins.h"
#include "utils/elog.h" /* XXX generate exceptions instead */
#include "utils/palloc.h"
#include "utils/syscache.h"
#include "catalog/pg_type.h"
#include "nodes/primnodes.h"
#include "parser/catalog_utils.h"
/* ----------------------------------------------------------------
* CreateTemplateTupleDesc
*
* This function allocates and zeros a tuple descriptor structure.
* ----------------------------------------------------------------
*/
TupleDesc
CreateTemplateTupleDesc(int natts)
{
uint32 size;
TupleDesc desc;
/* ----------------
* sanity checks
* ----------------
*/
AssertArg(natts >= 1);
/* ----------------
* allocate enough memory for the tuple descriptor and
* zero it as TupleDescInitEntry assumes that the descriptor
* is filled with NULL pointers.
* ----------------
*/
size = natts * sizeof (AttributeTupleForm);
desc = (TupleDesc) palloc(sizeof(struct tupleDesc));
desc->attrs = (AttributeTupleForm*) palloc(size);
memset(desc->attrs, 0, size);
desc->natts = natts;
return (desc);
}
/* ----------------------------------------------------------------
* CreateTupleDesc
*
* This function allocates a new TupleDesc from AttributeTupleForm array
* ----------------------------------------------------------------
*/
TupleDesc
CreateTupleDesc(int natts, AttributeTupleForm* attrs)
{
TupleDesc desc;
/* ----------------
* sanity checks
* ----------------
*/
AssertArg(natts >= 1);
desc = (TupleDesc) palloc(sizeof(struct tupleDesc));
desc->attrs = attrs;
desc->natts = natts;
return (desc);
}
/* ----------------------------------------------------------------
* CreateTupleDescCopy
*
* This function creates a new TupleDesc by copying from an existing
* TupleDesc
*
* ----------------------------------------------------------------
*/
TupleDesc
CreateTupleDescCopy(TupleDesc tupdesc)
{
TupleDesc desc;
int i, size;
desc = (TupleDesc) palloc(sizeof(struct tupleDesc));
desc->natts = tupdesc->natts;
size = desc->natts * sizeof (AttributeTupleForm);
desc->attrs = (AttributeTupleForm*) palloc(size);
for (i=0;i<desc->natts;i++) {
desc->attrs[i] =
(AttributeTupleForm)palloc(ATTRIBUTE_TUPLE_SIZE);
memmove(desc->attrs[i],
tupdesc->attrs[i],
ATTRIBUTE_TUPLE_SIZE);
}
return desc;
}
/* ----------------------------------------------------------------
* TupleDescInitEntry
*
* This function initializes a single attribute structure in
* a preallocated tuple descriptor.
* ----------------------------------------------------------------
*/
bool
TupleDescInitEntry(TupleDesc desc,
AttrNumber attributeNumber,
char *attributeName,
char *typeName,
int attdim,
bool attisset)
{
HeapTuple tuple;
TypeTupleForm typeForm;
AttributeTupleForm att;
/* ----------------
* sanity checks
* ----------------
*/
AssertArg(PointerIsValid(desc));
AssertArg(attributeNumber >= 1);
/* attributeName's are sometimes NULL,
from resdom's. I don't know why that is, though -- Jolly */
/* AssertArg(NameIsValid(attributeName));*/
/* AssertArg(NameIsValid(typeName));*/
AssertArg(!PointerIsValid(desc->attrs[attributeNumber - 1]));
/* ----------------
* allocate storage for this attribute
* ----------------
*/
att = (AttributeTupleForm) palloc(ATTRIBUTE_TUPLE_SIZE);
desc->attrs[attributeNumber - 1] = att;
/* ----------------
* initialize some of the attribute fields
* ----------------
*/
att->attrelid = 0; /* dummy value */
if (attributeName != NULL)
namestrcpy(&(att->attname), attributeName);
else
memset(att->attname.data,0,NAMEDATALEN);
att->attdefrel = 0; /* dummy value */
att->attnvals = 0; /* dummy value */
att->atttyparg = 0; /* dummy value */
att->attbound = 0; /* dummy value */
att->attcanindex = 0; /* dummy value */
att->attproc = 0; /* dummy value */
att->attcacheoff = -1;
att->attnum = attributeNumber;
att->attnelems = attdim;
att->attisset = attisset;
/* ----------------
* search the system cache for the type tuple of the attribute
* we are creating so that we can get the typeid and some other
* stuff.
*
* Note: in the special case of
*
* create EMP (name = char16, manager = EMP)
*
* RelationNameCreateHeapRelation() calls BuildDesc() which
* calls this routine and since EMP does not exist yet, the
* system cache lookup below fails. That's fine, but rather
* then doing a elog(WARN) we just leave that information
* uninitialized, return false, then fix things up later.
* -cim 6/14/90
* ----------------
*/
tuple = SearchSysCacheTuple(TYPNAME, PointerGetDatum(typeName),
0,0,0);
if (! HeapTupleIsValid(tuple)) {
/* ----------------
* here type info does not exist yet so we just fill
* the attribute with dummy information and return false.
* ----------------
*/
att->atttypid = InvalidOid;
att->attlen = (int16) 0;
att->attbyval = (bool) 0;
att->attalign = 'i';
return false;
}
/* ----------------
* type info exists so we initialize our attribute
* information from the type tuple we found..
* ----------------
*/
typeForm = (TypeTupleForm) GETSTRUCT(tuple);
att->atttypid = tuple->t_oid;
att->attalign = typeForm->typalign;
/* ------------------------
If this attribute is a set, what is really stored in the
attribute is the OID of a tuple in the pg_proc catalog.
The pg_proc tuple contains the query string which defines
this set - i.e., the query to run to get the set.
So the atttypid (just assigned above) refers to the type returned
by this query, but the actual length of this attribute is the
length (size) of an OID.
Why not just make the atttypid point to the OID type, instead
of the type the query returns? Because the executor uses the atttypid
to tell the front end what type will be returned (in BeginCommand),
and in the end the type returned will be the result of the query, not
an OID.
Why not wait until the return type of the set is known (i.e., the
recursive call to the executor to execute the set has returned)
before telling the front end what the return type will be? Because
the executor is a delicate thing, and making sure that the correct
order of front-end commands is maintained is messy, especially
considering that target lists may change as inherited attributes
are considered, etc. Ugh.
-----------------------------------------
*/
if (attisset) {
Type t = type("oid");
att->attlen = tlen(t);
att->attbyval = tbyval(t);
} else {
att->attlen = typeForm->typlen;
att->attbyval = typeForm->typbyval;
}
return true;
}
/* ----------------------------------------------------------------
* TupleDescMakeSelfReference
*
* This function initializes a "self-referential" attribute like
* manager in "create EMP (name=text, manager = EMP)".
* It calls TypeShellMake() which inserts a "shell" type
* tuple into pg_type. A self-reference is one kind of set, so
* its size and byval are the same as for a set. See the comments
* above in TupleDescInitEntry.
* ----------------------------------------------------------------
*/
static void
TupleDescMakeSelfReference(TupleDesc desc,
AttrNumber attnum,
char *relname)
{
AttributeTupleForm att;
Type t = type("oid");
att = desc->attrs[attnum-1];
att->atttypid = TypeShellMake(relname);
att->attlen = tlen(t);
att->attbyval = tbyval(t);
att->attnelems = 0;
}
/* ----------------------------------------------------------------
* BuildDescForRelation
*
* This is a general purpose function identical to BuildDesc
* but is used by the DefineRelation() code to catch the
* special case where you
*
* create FOO ( ..., x = FOO )
*
* here, the initial type lookup for "x = FOO" will fail
* because FOO isn't in the catalogs yet. But since we
* are creating FOO, instead of doing an elog() we add
* a shell type tuple to pg_type and fix things later
* in amcreate().
* ----------------------------------------------------------------
*/
TupleDesc
BuildDescForRelation(List *schema, char *relname)
{
int natts;
AttrNumber attnum;
List *p;
TupleDesc desc;
char *attname;
char *typename;
int attdim;
bool attisset;
/* ----------------
* allocate a new tuple descriptor
* ----------------
*/
natts = length(schema);
desc = CreateTemplateTupleDesc(natts);
attnum = 0;
typename = palloc(NAMEDATALEN+1);
foreach(p, schema) {
ColumnDef *entry;
List *arry;
/* ----------------
* for each entry in the list, get the name and type
* information from the list and have TupleDescInitEntry
* fill in the attribute information we need.
* ----------------
*/
attnum++;
entry = lfirst(p);
attname = entry->colname;
arry = entry->typename->arrayBounds;
attisset = entry->typename->setof;
if (arry != NIL) {
char buf[20];
attdim = length(arry);
/* array of XXX is _XXX (inherited from release 3) */
sprintf(buf, "_%.*s", NAMEDATALEN, entry->typename->name);
strcpy(typename, buf);
} else {
strcpy(typename, entry->typename->name);
attdim = 0;
}
if (! TupleDescInitEntry(desc, attnum, attname,
typename, attdim, attisset)) {
/* ----------------
* if TupleDescInitEntry() fails, it means there is
* no type in the system catalogs. So now we check if
* the type name equals the relation name. If so we
* have a self reference, otherwise it's an error.
* ----------------
*/
if (!strcmp(typename, relname)) {
TupleDescMakeSelfReference(desc, attnum, relname);
} else
elog(WARN, "DefineRelation: no such type %.*s",
NAMEDATALEN, typename);
}
/*
* this is for char() and varchar(). When an entry is of type
* char() or varchar(), typlen is set to the appropriate length,
* which we'll use here instead. (The catalog lookup only returns
* the length of bpchar and varchar which is not what we want!)
* - ay 6/95
*/
if (entry->typename->typlen > 0) {
desc->attrs[attnum - 1]->attlen = entry->typename->typlen;
}
}
return desc;
}

View File

@ -0,0 +1,43 @@
/*-------------------------------------------------------------------------
*
* funcindex.h--
*
*
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: funcindex.h,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef _FUNC_INDEX_INCLUDED_
#define _FUNC_INDEX_INCLUDED_
#include "postgres.h"
typedef struct {
int nargs;
Oid arglist[8];
Oid procOid;
NameData funcName;
} FuncIndexInfo;
typedef FuncIndexInfo *FuncIndexInfoPtr;
/*
* some marginally useful macro definitions
*/
/* #define FIgetname(FINFO) (&((FINFO)->funcName.data[0]))*/
#define FIgetname(FINFO) (FINFO)->funcName.data
#define FIgetnArgs(FINFO) (FINFO)->nargs
#define FIgetProcOid(FINFO) (FINFO)->procOid
#define FIgetArg(FINFO, argnum) (FINFO)->arglist[argnum]
#define FIgetArglist(FINFO) (FINFO)->arglist
#define FIsetnArgs(FINFO, numargs) ((FINFO)->nargs = numargs)
#define FIsetProcOid(FINFO, id) ((FINFO)->procOid = id)
#define FIsetArg(FINFO, argnum, argtype) ((FINFO)->arglist[argnum] = argtype)
#define FIisFunctionalIndex(FINFO) (FINFO->procOid != InvalidOid)
#endif /* FUNCINDEX_H */

View File

@ -0,0 +1,60 @@
/*-------------------------------------------------------------------------
*
* genam.h--
* POSTGRES general access method definitions.
*
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: genam.h,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef GENAM_H
#define GENAM_H
#include "postgres.h"
#include "access/attnum.h"
#include "access/htup.h"
#include "access/istrat.h"
#include "access/itup.h"
#include "access/relscan.h"
#include "access/skey.h"
#include "access/sdir.h"
#include "access/funcindex.h"
/* ----------------
* generalized index_ interface routines
* ----------------
*/
extern Relation index_open(Oid relationId);
extern Relation index_openr(char *relationName);
extern void index_close(Relation relation);
extern InsertIndexResult index_insert(Relation relation,
IndexTuple indexTuple);
extern void index_delete(Relation relation, ItemPointer indexItem);
extern IndexScanDesc index_beginscan(Relation relation, bool scanFromEnd,
uint16 numberOfKeys, ScanKey key);
extern void index_rescan(IndexScanDesc scan, bool scanFromEnd, ScanKey key);
extern void index_endscan(IndexScanDesc scan);
extern void index_markpos(IndexScanDesc scan);
extern void index_restrpos(IndexScanDesc scan);
extern RetrieveIndexResult index_getnext(IndexScanDesc scan,
ScanDirection direction);
extern RegProcedure index_getprocid(Relation irel, AttrNumber attnum,
uint16 procnum);
extern Datum GetIndexValue(HeapTuple tuple, TupleDesc hTupDesc,
int attOff, AttrNumber attrNums[], FuncIndexInfo *fInfo,
bool *attNull, Buffer buffer);
/* in genam.c */
extern IndexScanDesc RelationGetIndexScan(Relation relation, bool scanFromEnd,
uint16 numberOfKeys, ScanKey key);
extern void IndexScanRestart(IndexScanDesc scan, bool scanFromEnd,
ScanKey key);
extern void IndexScanEnd(IndexScanDesc scan);
extern void IndexScanMarkPosition(IndexScanDesc scan);
extern void IndexScanRestorePosition(IndexScanDesc scan);
#endif /* GENAM_H */

336
src/backend/access/hash.h Normal file
View File

@ -0,0 +1,336 @@
/*-------------------------------------------------------------------------
*
* hash.h--
* header file for postgres hash access method implementation
*
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: hash.h,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $
*
* NOTES
* modeled after Margo Seltzer's hash implementation for unix.
*
*-------------------------------------------------------------------------
*/
#ifndef HASH_H
#define HASH_H
#include "access/itup.h"
/*
* An overflow page is a spare page allocated for storing data whose
* bucket doesn't have room to store it. We use overflow pages rather
* than just splitting the bucket because there is a linear order in
* the way we split buckets. In other words, if there isn't enough space
* in the bucket itself, put it in an overflow page.
*
* Overflow page addresses are stored in form: (Splitnumber, Page offset).
*
* A splitnumber is the number of the generation where the table doubles
* in size. The ovflpage's offset within the splitnumber; offsets start
* at 1.
*
* We convert the stored bitmap address into a page address with the
* macro OADDR_OF(S, O) where S is the splitnumber and O is the page
* offset.
*/
typedef uint32 Bucket;
typedef bits16 OverflowPageAddress;
typedef uint32 SplitNumber;
typedef uint32 PageOffset;
/* A valid overflow address will always have a page offset >= 1 */
#define InvalidOvflAddress 0
#define SPLITSHIFT 11
#define SPLITMASK 0x7FF
#define SPLITNUM(N) ((SplitNumber)(((uint32)(N)) >> SPLITSHIFT))
#define OPAGENUM(N) ((PageOffset)((N) & SPLITMASK))
#define OADDR_OF(S,O) ((OverflowPageAddress)((uint32)((uint32)(S) << SPLITSHIFT) + (O)))
#define BUCKET_TO_BLKNO(B) \
((Bucket) ((B) + ((B) ? metap->SPARES[_hash_log2((B)+1)-1] : 0)) + 1)
#define OADDR_TO_BLKNO(B) \
((BlockNumber) \
(BUCKET_TO_BLKNO ( (1 << SPLITNUM((B))) -1 ) + OPAGENUM((B))));
/*
* hasho_flag tells us which type of page we're looking at. For
* example, knowing overflow pages from bucket pages is necessary
* information when you're deleting tuples from a page. If all the
* tuples are deleted from an overflow page, the overflow is made
* available to other buckets by calling _hash_freeovflpage(). If all
* the tuples are deleted from a bucket page, no additional action is
* necessary.
*/
#define LH_UNUSED_PAGE (0)
#define LH_OVERFLOW_PAGE (1 << 0)
#define LH_BUCKET_PAGE (1 << 1)
#define LH_BITMAP_PAGE (1 << 2)
#define LH_META_PAGE (1 << 3)
typedef struct HashPageOpaqueData {
bits16 hasho_flag; /* is this page a bucket or ovfl */
Bucket hasho_bucket; /* bucket number this pg belongs to */
OverflowPageAddress hasho_oaddr; /* ovfl address of this ovfl pg */
BlockNumber hasho_nextblkno; /* next ovfl blkno */
BlockNumber hasho_prevblkno; /* previous ovfl (or bucket) blkno */
} HashPageOpaqueData;
typedef HashPageOpaqueData *HashPageOpaque;
/*
* ScanOpaqueData is used to remember which buffers we're currently
* examining in the scan. We keep these buffers locked and pinned and
* recorded in the opaque entry of the scan in order to avoid doing a
* ReadBuffer() for every tuple in the index. This avoids semop() calls,
* which are expensive.
*/
typedef struct HashScanOpaqueData {
Buffer hashso_curbuf;
Buffer hashso_mrkbuf;
} HashScanOpaqueData;
typedef HashScanOpaqueData *HashScanOpaque;
/*
* Definitions for metapage.
*/
#define HASH_METAPAGE 0 /* metapage is always block 0 */
#define HASH_MAGIC 0x6440640
#define HASH_VERSION 0
/*
* NCACHED is used to set the array sizeof spares[] & bitmaps[].
*
* Spares[] is used to hold the number overflow pages currently
* allocated at a certain splitpoint. For example, if spares[3] = 7
* then there are a maximum of 7 ovflpages available at splitpoint 3.
* The value in spares[] will change as ovflpages are added within
* a splitpoint.
*
* Within a splitpoint, one can find which ovflpages are available and
* which are used by looking at a bitmaps that are stored on the ovfl
* pages themselves. There is at least one bitmap for every splitpoint's
* ovflpages. Bitmaps[] contains the ovflpage addresses of the ovflpages
* that hold the ovflpage bitmaps.
*
* The reason that the size is restricted to NCACHED (32) is because
* the bitmaps are 16 bits: upper 5 represent the splitpoint, lower 11
* indicate the page number within the splitpoint. Since there are
* only 5 bits to store the splitpoint, there can only be 32 splitpoints.
* Both spares[] and bitmaps[] use splitpoints as there indices, so there
* can only be 32 of them.
*/
#define NCACHED 32
typedef struct HashMetaPageData {
PageHeaderData hashm_phdr; /* pad for page header
(do not use) */
uint32 hashm_magic; /* magic no. for hash tables */
uint32 hashm_version; /* version ID */
uint32 hashm_nkeys; /* number of keys stored in
the table */
uint16 hashm_ffactor; /* fill factor */
uint16 hashm_bsize; /* bucket size (bytes) -
must be a power of 2 */
uint16 hashm_bshift; /* bucket shift */
uint16 hashm_bmsize; /* bitmap array size (bytes) -
must be a power of 2 */
uint32 hashm_maxbucket; /* ID of maximum bucket
in use */
uint32 hashm_highmask; /* mask to modulo into
entire table */
uint32 hashm_lowmask; /* mask to modulo into lower
half of table */
uint32 hashm_ovflpoint; /* pageno. from which ovflpgs
being allocated */
uint32 hashm_lastfreed; /* last ovflpage freed */
uint32 hashm_nmaps; /* Initial number of bitmaps */
uint32 hashm_spares[NCACHED]; /* spare pages available at
splitpoints */
BlockNumber hashm_mapp[NCACHED]; /* blknumbers of ovfl page
maps */
RegProcedure hashm_procid; /* hash procedure id from
pg_proc */
} HashMetaPageData;
typedef HashMetaPageData *HashMetaPage;
/* Short hands for accessing structure */
#define BSHIFT hashm_bshift
#define OVFL_POINT hashm_ovflpoint
#define LAST_FREED hashm_lastfreed
#define MAX_BUCKET hashm_maxbucket
#define FFACTOR hashm_ffactor
#define HIGH_MASK hashm_highmask
#define LOW_MASK hashm_lowmask
#define NKEYS hashm_nkeys
#define SPARES hashm_spares
extern bool BuildingHash;
typedef struct HashItemData {
IndexTupleData hash_itup;
} HashItemData;
typedef HashItemData *HashItem;
/*
* Constants
*/
#define DEFAULT_FFACTOR 300
#define SPLITMAX 8
#define BYTE_TO_BIT 3 /* 2^3 bits/byte */
#define INT_TO_BYTE 2 /* 2^2 bytes/int */
#define INT_TO_BIT 5 /* 2^5 bits/int */
#define ALL_SET ((uint32) ~0)
/*
* bitmap pages do not contain tuples. they do contain the standard
* page headers and trailers; however, everything in between is a
* giant bit array. the number of bits that fit on a page obviously
* depends on the page size and the header/trailer overhead.
*/
#define BMPGSZ_BYTE(metap) ((metap)->hashm_bmsize)
#define BMPGSZ_BIT(metap) ((metap)->hashm_bmsize << BYTE_TO_BIT)
#define HashPageGetBitmap(pg) \
((uint32 *) (((char *) (pg)) + DOUBLEALIGN(sizeof(PageHeaderData))))
/*
* The number of bits in an ovflpage bitmap which
* tells which ovflpages are empty versus in use (NOT the number of
* bits in an overflow page *address* bitmap).
*/
#define BITS_PER_MAP 32 /* Number of bits in ovflpage bitmap */
/* Given the address of the beginning of a big map, clear/set the nth bit */
#define CLRBIT(A, N) ((A)[(N)/BITS_PER_MAP] &= ~(1<<((N)%BITS_PER_MAP)))
#define SETBIT(A, N) ((A)[(N)/BITS_PER_MAP] |= (1<<((N)%BITS_PER_MAP)))
#define ISSET(A, N) ((A)[(N)/BITS_PER_MAP] & (1<<((N)%BITS_PER_MAP)))
/*
* page locking modes
*/
#define HASH_READ 0
#define HASH_WRITE 1
/*
* In general, the hash code tries to localize its knowledge about page
* layout to a couple of routines. However, we need a special value to
* indicate "no page number" in those places where we expect page numbers.
*/
#define P_NONE 0
/*
* Strategy number. There's only one valid strategy for hashing: equality.
*/
#define HTEqualStrategyNumber 1
#define HTMaxStrategyNumber 1
/*
* When a new operator class is declared, we require that the user supply
* us with an amproc procudure for hashing a key of the new type.
* Since we only have one such proc in amproc, it's number 1.
*/
#define HASHPROC 1
/* public routines */
extern void hashbuild(Relation heap, Relation index, int natts,
AttrNumber *attnum, IndexStrategy istrat, uint16 pcount,
Datum *params, FuncIndexInfo *finfo, PredInfo *predInfo);
extern InsertIndexResult hashinsert(Relation rel, IndexTuple itup);
extern char *hashgettuple(IndexScanDesc scan, ScanDirection dir);
extern char *hashbeginscan(Relation rel, bool fromEnd, uint16 keysz,
ScanKey scankey);
extern void hashrescan(IndexScanDesc scan, bool fromEnd, ScanKey scankey);
extern void hashendscan(IndexScanDesc scan);
extern void hashmarkpos(IndexScanDesc scan);
extern void hashrestrpos(IndexScanDesc scan);
extern void hashdelete(Relation rel, ItemPointer tid);
/* hashfunc.c */
extern uint32 hashint2(int16 key);
extern uint32 hashint4(uint32 key);
extern uint32 hashfloat4(float32 keyp);
extern uint32 hashfloat8(float64 keyp);
extern uint32 hashoid(Oid key);
extern uint32 hashchar(char key);
extern uint32 hashchar2(uint16 intkey);
extern uint32 hashchar4(uint32 intkey);
extern uint32 hashchar8(char *key);
extern uint32 hashchar16(char *key);
extern uint32 hashtext(struct varlena *key);
/* private routines */
/* hashinsert.c */
extern InsertIndexResult _hash_doinsert(Relation rel, HashItem hitem);
/* hashovfl.c */
extern Buffer _hash_addovflpage(Relation rel, Buffer *metabufp, Buffer buf);
extern Buffer _hash_freeovflpage(Relation rel, Buffer ovflbuf);
extern int32 _hash_initbitmap(Relation rel, HashMetaPage metap, int32 pnum,
int32 nbits, int32 ndx);
extern void _hash_squeezebucket(Relation rel, HashMetaPage metap,
Bucket bucket);
/* hashpage.c */
extern void _hash_metapinit(Relation rel);
extern Buffer _hash_getbuf(Relation rel, BlockNumber blkno, int access);
extern void _hash_relbuf(Relation rel, Buffer buf, int access);
extern void _hash_wrtbuf(Relation rel, Buffer buf);
extern void _hash_wrtnorelbuf(Relation rel, Buffer buf);
extern Page _hash_chgbufaccess(Relation rel, Buffer *bufp, int from_access,
int to_access);
extern void _hash_pageinit(Page page, Size size);
extern void _hash_pagedel(Relation rel, ItemPointer tid);
extern void _hash_expandtable(Relation rel, Buffer metabuf);
/* hashscan.c */
extern void _hash_regscan(IndexScanDesc scan);
extern void _hash_dropscan(IndexScanDesc scan);
extern void _hash_adjscans(Relation rel, ItemPointer tid);
/* hashsearch.c */
extern void _hash_search(Relation rel, int keysz, ScanKey scankey,
Buffer *bufP, HashMetaPage metap);
extern RetrieveIndexResult _hash_next(IndexScanDesc scan, ScanDirection dir);
extern RetrieveIndexResult _hash_first(IndexScanDesc scan, ScanDirection dir);
extern bool _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir,
Buffer metabuf);
/* hashstrat.c */
extern StrategyNumber _hash_getstrat(Relation rel, AttrNumber attno,
RegProcedure proc);
extern bool _hash_invokestrat(Relation rel, AttrNumber attno,
StrategyNumber strat, Datum left, Datum right);
/* hashutil.c */
extern ScanKey _hash_mkscankey(Relation rel, IndexTuple itup,
HashMetaPage metap);
extern void _hash_freeskey(ScanKey skey);
extern bool _hash_checkqual(IndexScanDesc scan, IndexTuple itup);
extern HashItem _hash_formitem(IndexTuple itup);
extern Bucket _hash_call(Relation rel, HashMetaPage metap, Datum key);
extern uint32 _hash_log2(uint32 num);
extern void _hash_checkpage(Page page, int flags);
#endif /* HASH_H */

View File

@ -0,0 +1,18 @@
#-------------------------------------------------------------------------
#
# Makefile.inc--
# Makefile for access/hash (hash access method)
#
# Copyright (c) 1994, Regents of the University of California
#
#
# IDENTIFICATION
# $Header: /cvsroot/pgsql/src/backend/access/hash/Attic/Makefile.inc,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $
#
#-------------------------------------------------------------------------
SUBSRCS+= hash.c hashfunc.c hashinsert.c hashovfl.c hashpage.c hashscan.c \
hashsearch.c hashstrat.c hashutil.c

View File

@ -0,0 +1,467 @@
/*-------------------------------------------------------------------------
*
* hash.c--
* Implementation of Margo Seltzer's Hashing package for postgres.
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $
*
* NOTES
* This file contains only the public interface routines.
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "storage/bufmgr.h"
#include "storage/bufpage.h"
#include "utils/elog.h"
#include "utils/palloc.h"
#include "utils/rel.h"
#include "utils/excid.h"
#include "access/heapam.h"
#include "access/genam.h"
#include "access/sdir.h"
#include "access/hash.h"
#include "access/funcindex.h"
#include "nodes/execnodes.h"
#include "nodes/plannodes.h"
#include "executor/executor.h"
#include "executor/tuptable.h"
#include "catalog/index.h"
bool BuildingHash = false;
/*
* hashbuild() -- build a new hash index.
*
* We use a global variable to record the fact that we're creating
* a new index. This is used to avoid high-concurrency locking,
* since the index won't be visible until this transaction commits
* and since building is guaranteed to be single-threaded.
*/
void
hashbuild(Relation heap,
Relation index,
int natts,
AttrNumber *attnum,
IndexStrategy istrat,
uint16 pcount,
Datum *params,
FuncIndexInfo *finfo,
PredInfo *predInfo)
{
HeapScanDesc hscan;
Buffer buffer;
HeapTuple htup;
IndexTuple itup;
TupleDesc htupdesc, itupdesc;
Datum *attdata;
bool *nulls;
InsertIndexResult res;
int nhtups, nitups;
int i;
HashItem hitem;
ExprContext *econtext;
TupleTable tupleTable;
TupleTableSlot *slot;
Oid hrelid, irelid;
Node *pred, *oldPred;
/* note that this is a new btree */
BuildingHash = true;
pred = predInfo->pred;
oldPred = predInfo->oldPred;
/* initialize the hash index metadata page (if this is a new index) */
if (oldPred == NULL)
_hash_metapinit(index);
/* get tuple descriptors for heap and index relations */
htupdesc = RelationGetTupleDescriptor(heap);
itupdesc = RelationGetTupleDescriptor(index);
/* get space for data items that'll appear in the index tuple */
attdata = (Datum *) palloc(natts * sizeof(Datum));
nulls = (bool *) palloc(natts * sizeof(bool));
/*
* If this is a predicate (partial) index, we will need to evaluate the
* predicate using ExecQual, which requires the current tuple to be in a
* slot of a TupleTable. In addition, ExecQual must have an ExprContext
* referring to that slot. Here, we initialize dummy TupleTable and
* ExprContext objects for this purpose. --Nels, Feb '92
*/
#ifndef OMIT_PARTIAL_INDEX
if (pred != NULL || oldPred != NULL) {
tupleTable = ExecCreateTupleTable(1);
slot = ExecAllocTableSlot(tupleTable);
econtext = makeNode(ExprContext);
FillDummyExprContext(econtext, slot, htupdesc, buffer);
}
#endif /* OMIT_PARTIAL_INDEX */
/* start a heap scan */
hscan = heap_beginscan(heap, 0, NowTimeQual, 0, (ScanKey) NULL);
htup = heap_getnext(hscan, 0, &buffer);
/* build the index */
nhtups = nitups = 0;
for (; HeapTupleIsValid(htup); htup = heap_getnext(hscan, 0, &buffer)) {
nhtups++;
/*
* If oldPred != NULL, this is an EXTEND INDEX command, so skip
* this tuple if it was already in the existing partial index
*/
if (oldPred != NULL) {
/*SetSlotContents(slot, htup); */
#ifndef OMIT_PARTIAL_INDEX
slot->val = htup;
if (ExecQual((List*)oldPred, econtext) == true) {
nitups++;
continue;
}
#endif /* OMIT_PARTIAL_INDEX */
}
/* Skip this tuple if it doesn't satisfy the partial-index predicate */
if (pred != NULL) {
#ifndef OMIT_PARTIAL_INDEX
/*SetSlotContents(slot, htup); */
slot->val = htup;
if (ExecQual((List*)pred, econtext) == false)
continue;
#endif /* OMIT_PARTIAL_INDEX */
}
nitups++;
/*
* For the current heap tuple, extract all the attributes
* we use in this index, and note which are null.
*/
for (i = 1; i <= natts; i++) {
int attoff;
bool attnull;
/*
* Offsets are from the start of the tuple, and are
* zero-based; indices are one-based. The next call
* returns i - 1. That's data hiding for you.
*/
/* attoff = i - 1 */
attoff = AttrNumberGetAttrOffset(i);
/* below, attdata[attoff] set to equal some datum &
* attnull is changed to indicate whether or not the attribute
* is null for this tuple
*/
attdata[attoff] = GetIndexValue(htup,
htupdesc,
attoff,
attnum,
finfo,
&attnull,
buffer);
nulls[attoff] = (attnull ? 'n' : ' ');
}
/* form an index tuple and point it at the heap tuple */
itup = index_formtuple(itupdesc, attdata, nulls);
/*
* If the single index key is null, we don't insert it into
* the index. Hash tables support scans on '='.
* Relational algebra says that A = B
* returns null if either A or B is null. This
* means that no qualification used in an index scan could ever
* return true on a null attribute. It also means that indices
* can't be used by ISNULL or NOTNULL scans, but that's an
* artifact of the strategy map architecture chosen in 1986, not
* of the way nulls are handled here.
*/
if (itup->t_info & INDEX_NULL_MASK) {
pfree(itup);
continue;
}
itup->t_tid = htup->t_ctid;
hitem = _hash_formitem(itup);
res = _hash_doinsert(index, hitem);
pfree(hitem);
pfree(itup);
pfree(res);
}
/* okay, all heap tuples are indexed */
heap_endscan(hscan);
if (pred != NULL || oldPred != NULL) {
#ifndef OMIT_PARTIAL_INDEX
ExecDestroyTupleTable(tupleTable, true);
pfree(econtext);
#endif /* OMIT_PARTIAL_INDEX */
}
/*
* Since we just counted the tuples in the heap, we update its
* stats in pg_class to guarantee that the planner takes advantage
* of the index we just created. Finally, only update statistics
* during normal index definitions, not for indices on system catalogs
* created during bootstrap processing. We must close the relations
* before updatings statistics to guarantee that the relcache entries
* are flushed when we increment the command counter in UpdateStats().
*/
if (IsNormalProcessingMode())
{
hrelid = heap->rd_id;
irelid = index->rd_id;
heap_close(heap);
index_close(index);
UpdateStats(hrelid, nhtups, true);
UpdateStats(irelid, nitups, false);
if (oldPred != NULL) {
if (nitups == nhtups) pred = NULL;
UpdateIndexPredicate(irelid, oldPred, pred);
}
}
/* be tidy */
pfree(nulls);
pfree(attdata);
/* all done */
BuildingHash = false;
}
/*
* hashinsert() -- insert an index tuple into a hash table.
*
* Hash on the index tuple's key, find the appropriate location
* for the new tuple, put it there, and return an InsertIndexResult
* to the caller.
*/
InsertIndexResult
hashinsert(Relation rel, IndexTuple itup)
{
HashItem hitem;
InsertIndexResult res;
if (itup->t_info & INDEX_NULL_MASK)
return ((InsertIndexResult) NULL);
hitem = _hash_formitem(itup);
res = _hash_doinsert(rel, hitem);
pfree(hitem);
return (res);
}
/*
* hashgettuple() -- Get the next tuple in the scan.
*/
char *
hashgettuple(IndexScanDesc scan, ScanDirection dir)
{
RetrieveIndexResult res;
/*
* If we've already initialized this scan, we can just advance it
* in the appropriate direction. If we haven't done so yet, we
* call a routine to get the first item in the scan.
*/
if (ItemPointerIsValid(&(scan->currentItemData)))
res = _hash_next(scan, dir);
else
res = _hash_first(scan, dir);
return ((char *) res);
}
/*
* hashbeginscan() -- start a scan on a hash index
*/
char *
hashbeginscan(Relation rel,
bool fromEnd,
uint16 keysz,
ScanKey scankey)
{
IndexScanDesc scan;
HashScanOpaque so;
scan = RelationGetIndexScan(rel, fromEnd, keysz, scankey);
so = (HashScanOpaque) palloc(sizeof(HashScanOpaqueData));
so->hashso_curbuf = so->hashso_mrkbuf = InvalidBuffer;
scan->opaque = so;
scan->flags = 0x0;
/* register scan in case we change pages it's using */
_hash_regscan(scan);
return ((char *) scan);
}
/*
* hashrescan() -- rescan an index relation
*/
void
hashrescan(IndexScanDesc scan, bool fromEnd, ScanKey scankey)
{
ItemPointer iptr;
HashScanOpaque so;
so = (HashScanOpaque) scan->opaque;
/* we hold a read lock on the current page in the scan */
if (ItemPointerIsValid(iptr = &(scan->currentItemData))) {
_hash_relbuf(scan->relation, so->hashso_curbuf, HASH_READ);
so->hashso_curbuf = InvalidBuffer;
ItemPointerSetInvalid(iptr);
}
if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) {
_hash_relbuf(scan->relation, so->hashso_mrkbuf, HASH_READ);
so->hashso_mrkbuf = InvalidBuffer;
ItemPointerSetInvalid(iptr);
}
/* reset the scan key */
if (scan->numberOfKeys > 0) {
memmove(scan->keyData,
scankey,
scan->numberOfKeys * sizeof(ScanKeyData));
}
}
/*
* hashendscan() -- close down a scan
*/
void
hashendscan(IndexScanDesc scan)
{
ItemPointer iptr;
HashScanOpaque so;
so = (HashScanOpaque) scan->opaque;
/* release any locks we still hold */
if (ItemPointerIsValid(iptr = &(scan->currentItemData))) {
_hash_relbuf(scan->relation, so->hashso_curbuf, HASH_READ);
so->hashso_curbuf = InvalidBuffer;
ItemPointerSetInvalid(iptr);
}
if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) {
if (BufferIsValid(so->hashso_mrkbuf))
_hash_relbuf(scan->relation, so->hashso_mrkbuf, HASH_READ);
so->hashso_mrkbuf = InvalidBuffer;
ItemPointerSetInvalid(iptr);
}
/* don't need scan registered anymore */
_hash_dropscan(scan);
/* be tidy */
#ifdef PERFECT_MMGR
pfree (scan->opaque);
#endif /* PERFECT_MMGR */
}
/*
* hashmarkpos() -- save current scan position
*
*/
void
hashmarkpos(IndexScanDesc scan)
{
ItemPointer iptr;
HashScanOpaque so;
/* see if we ever call this code. if we do, then so_mrkbuf a
* useful element in the scan->opaque structure. if this procedure
* is never called, so_mrkbuf should be removed from the scan->opaque
* structure.
*/
elog(NOTICE, "Hashmarkpos() called.");
so = (HashScanOpaque) scan->opaque;
/* release lock on old marked data, if any */
if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) {
_hash_relbuf(scan->relation, so->hashso_mrkbuf, HASH_READ);
so->hashso_mrkbuf = InvalidBuffer;
ItemPointerSetInvalid(iptr);
}
/* bump lock on currentItemData and copy to currentMarkData */
if (ItemPointerIsValid(&(scan->currentItemData))) {
so->hashso_mrkbuf = _hash_getbuf(scan->relation,
BufferGetBlockNumber(so->hashso_curbuf),
HASH_READ);
scan->currentMarkData = scan->currentItemData;
}
}
/*
* hashrestrpos() -- restore scan to last saved position
*/
void
hashrestrpos(IndexScanDesc scan)
{
ItemPointer iptr;
HashScanOpaque so;
/* see if we ever call this code. if we do, then so_mrkbuf a
* useful element in the scan->opaque structure. if this procedure
* is never called, so_mrkbuf should be removed from the scan->opaque
* structure.
*/
elog(NOTICE, "Hashrestrpos() called.");
so = (HashScanOpaque) scan->opaque;
/* release lock on current data, if any */
if (ItemPointerIsValid(iptr = &(scan->currentItemData))) {
_hash_relbuf(scan->relation, so->hashso_curbuf, HASH_READ);
so->hashso_curbuf = InvalidBuffer;
ItemPointerSetInvalid(iptr);
}
/* bump lock on currentMarkData and copy to currentItemData */
if (ItemPointerIsValid(&(scan->currentMarkData))) {
so->hashso_curbuf =
_hash_getbuf(scan->relation,
BufferGetBlockNumber(so->hashso_mrkbuf),
HASH_READ);
scan->currentItemData = scan->currentMarkData;
}
}
/* stubs */
void
hashdelete(Relation rel, ItemPointer tid)
{
/* adjust any active scans that will be affected by this deletion */
_hash_adjscans(rel, tid);
/* delete the data from the page */
_hash_pagedel(rel, tid);
}

View File

@ -0,0 +1,276 @@
/*-------------------------------------------------------------------------
*
* hashfunc.c--
* Comparison functions for hash access method.
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashfunc.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $
*
* NOTES
* These functions are stored in pg_amproc. For each operator class
* defined on hash tables, they compute the hash value of the argument.
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "utils/nabstime.h"
uint32 hashint2(int16 key)
{
return ((uint32) ~key);
}
uint32 hashint4(uint32 key)
{
return (~key);
}
/* Hash function from Chris Torek. */
uint32 hashfloat4(float32 keyp)
{
int len;
int loop;
uint32 h;
char *kp = (char *) keyp;
len = sizeof(float32data);
#define HASH4a h = (h << 5) - h + *kp++;
#define HASH4b h = (h << 5) + h + *kp++;
#define HASH4 HASH4b
h = 0;
if (len > 0) {
loop = (len + 8 - 1) >> 3;
switch (len & (8 - 1)) {
case 0:
do { /* All fall throughs */
HASH4;
case 7:
HASH4;
case 6:
HASH4;
case 5:
HASH4;
case 4:
HASH4;
case 3:
HASH4;
case 2:
HASH4;
case 1:
HASH4;
} while (--loop);
}
}
return (h);
}
uint32 hashfloat8(float64 keyp)
{
int len;
int loop;
uint32 h;
char *kp = (char *) keyp;
len = sizeof(float64data);
#define HASH4a h = (h << 5) - h + *kp++;
#define HASH4b h = (h << 5) + h + *kp++;
#define HASH4 HASH4b
h = 0;
if (len > 0) {
loop = (len + 8 - 1) >> 3;
switch (len & (8 - 1)) {
case 0:
do { /* All fall throughs */
HASH4;
case 7:
HASH4;
case 6:
HASH4;
case 5:
HASH4;
case 4:
HASH4;
case 3:
HASH4;
case 2:
HASH4;
case 1:
HASH4;
} while (--loop);
}
}
return (h);
}
uint32 hashoid(Oid key)
{
return ((uint32) ~key);
}
uint32 hashchar(char key)
{
int len;
uint32 h;
len = sizeof(char);
#define PRIME1 37
#define PRIME2 1048583
h = 0;
/* Convert char to integer */
h = h * PRIME1 ^ (key - ' ');
h %= PRIME2;
return (h);
}
uint32 hashchar2(uint16 intkey)
{
uint32 h;
int len;
char *key = (char *) &intkey;
h = 0;
len = sizeof(uint16);
/* Convert string to integer */
while (len--)
h = h * PRIME1 ^ (*key++ - ' ');
h %= PRIME2;
return (h);
}
uint32 hashchar4(uint32 intkey)
{
uint32 h;
int len;
char *key = (char *) &intkey;
h = 0;
len = sizeof(uint32);
/* Convert string to integer */
while (len--)
h = h * PRIME1 ^ (*key++ - ' ');
h %= PRIME2;
return (h);
}
uint32 hashchar8(char *key)
{
uint32 h;
int len;
h = 0;
len = sizeof(char8);
/* Convert string to integer */
while (len--)
h = h * PRIME1 ^ (*key++ - ' ');
h %= PRIME2;
return (h);
}
uint32 hashname(NameData *n)
{
uint32 h;
int len;
char *key;
key = n->data;
h = 0;
len = NAMEDATALEN;
/* Convert string to integer */
while (len--)
h = h * PRIME1 ^ (*key++ - ' ');
h %= PRIME2;
return (h);
}
uint32 hashchar16(char *key)
{
uint32 h;
int len;
h = 0;
len = sizeof(char16);
/* Convert string to integer */
while (len--)
h = h * PRIME1 ^ (*key++ - ' ');
h %= PRIME2;
return (h);
}
/*
* (Comment from the original db3 hashing code: )
*
* "This is INCREDIBLY ugly, but fast. We break the string up into 8 byte
* units. On the first time through the loop we get the 'leftover bytes'
* (strlen % 8). On every other iteration, we perform 8 HASHC's so we handle
* all 8 bytes. Essentially, this saves us 7 cmp & branch instructions. If
* this routine is heavily used enough, it's worth the ugly coding.
*
* "OZ's original sdbm hash"
*/
uint32 hashtext(struct varlena *key)
{
int keylen;
char *keydata;
uint32 n;
int loop;
keydata = VARDATA(key);
keylen = VARSIZE(key);
/* keylen includes the four bytes in which string keylength is stored */
keylen -= sizeof(VARSIZE(key));
#define HASHC n = *keydata++ + 65599 * n
n = 0;
if (keylen > 0) {
loop = (keylen + 8 - 1) >> 3;
switch (keylen & (8 - 1)) {
case 0:
do { /* All fall throughs */
HASHC;
case 7:
HASHC;
case 6:
HASHC;
case 5:
HASHC;
case 4:
HASHC;
case 3:
HASHC;
case 2:
HASHC;
case 1:
HASHC;
} while (--loop);
}
}
return (n);
}

View File

@ -0,0 +1,239 @@
/*-------------------------------------------------------------------------
*
* hashinsert.c--
* Item insertion in hash tables for Postgres.
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashinsert.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "storage/bufmgr.h"
#include "storage/bufpage.h"
#include "utils/elog.h"
#include "utils/palloc.h"
#include "utils/rel.h"
#include "utils/excid.h"
#include "access/heapam.h"
#include "access/genam.h"
#include "access/hash.h"
static InsertIndexResult _hash_insertonpg(Relation rel, Buffer buf, int keysz, ScanKey scankey, HashItem hitem, Buffer metabuf);
static OffsetNumber _hash_pgaddtup(Relation rel, Buffer buf, int keysz, ScanKey itup_scankey, Size itemsize, HashItem hitem);
/*
* _hash_doinsert() -- Handle insertion of a single HashItem in the table.
*
* This routine is called by the public interface routines, hashbuild
* and hashinsert. By here, hashitem is filled in, and has a unique
* (xid, seqno) pair. The datum to be used as a "key" is in the
* hashitem.
*/
InsertIndexResult
_hash_doinsert(Relation rel, HashItem hitem)
{
Buffer buf;
Buffer metabuf;
BlockNumber blkno;
HashMetaPage metap;
IndexTuple itup;
InsertIndexResult res;
ScanKey itup_scankey;
int natts;
Page page;
metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ);
metap = (HashMetaPage) BufferGetPage(metabuf);
_hash_checkpage((Page) metap, LH_META_PAGE);
/* we need a scan key to do our search, so build one */
itup = &(hitem->hash_itup);
if ((natts = rel->rd_rel->relnatts) != 1)
elog(WARN, "Hash indices valid for only one index key.");
itup_scankey = _hash_mkscankey(rel, itup, metap);
/*
* find the first page in the bucket chain containing this key and
* place it in buf. _hash_search obtains a read lock for us.
*/
_hash_search(rel, natts, itup_scankey, &buf, metap);
page = BufferGetPage(buf);
_hash_checkpage(page, LH_BUCKET_PAGE);
/*
* trade in our read lock for a write lock so that we can do the
* insertion.
*/
blkno = BufferGetBlockNumber(buf);
_hash_relbuf(rel, buf, HASH_READ);
buf = _hash_getbuf(rel, blkno, HASH_WRITE);
/*
* XXX btree comment (haven't decided what to do in hash): don't
* think the bucket can be split while we're reading the metapage.
*
* If the page was split between the time that we surrendered our
* read lock and acquired our write lock, then this page may no
* longer be the right place for the key we want to insert.
*/
/* do the insertion */
res = _hash_insertonpg(rel, buf, natts, itup_scankey,
hitem, metabuf);
/* be tidy */
_hash_freeskey(itup_scankey);
return (res);
}
/*
* _hash_insertonpg() -- Insert a tuple on a particular page in the table.
*
* This recursive procedure does the following things:
*
* + if necessary, splits the target page.
* + inserts the tuple.
*
* On entry, we must have the right buffer on which to do the
* insertion, and the buffer must be pinned and locked. On return,
* we will have dropped both the pin and the write lock on the buffer.
*
*/
static InsertIndexResult
_hash_insertonpg(Relation rel,
Buffer buf,
int keysz,
ScanKey scankey,
HashItem hitem,
Buffer metabuf)
{
InsertIndexResult res;
Page page;
BlockNumber itup_blkno;
OffsetNumber itup_off;
int itemsz;
HashPageOpaque pageopaque;
bool do_expand = false;
Buffer ovflbuf;
HashMetaPage metap;
Bucket bucket;
metap = (HashMetaPage) BufferGetPage(metabuf);
_hash_checkpage((Page) metap, LH_META_PAGE);
page = BufferGetPage(buf);
_hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
bucket = pageopaque->hasho_bucket;
itemsz = IndexTupleDSize(hitem->hash_itup)
+ (sizeof(HashItemData) - sizeof(IndexTupleData));
itemsz = DOUBLEALIGN(itemsz);
while (PageGetFreeSpace(page) < itemsz) {
/*
* no space on this page; check for an overflow page
*/
if (BlockNumberIsValid(pageopaque->hasho_nextblkno)) {
/*
* ovfl page exists; go get it. if it doesn't have room,
* we'll find out next pass through the loop test above.
*/
ovflbuf = _hash_getbuf(rel, pageopaque->hasho_nextblkno,
HASH_WRITE);
_hash_relbuf(rel, buf, HASH_WRITE);
buf = ovflbuf;
page = BufferGetPage(buf);
} else {
/*
* we're at the end of the bucket chain and we haven't
* found a page with enough room. allocate a new overflow
* page.
*/
do_expand = true;
ovflbuf = _hash_addovflpage(rel, &metabuf, buf);
_hash_relbuf(rel, buf, HASH_WRITE);
buf = ovflbuf;
page = BufferGetPage(buf);
if (PageGetFreeSpace(page) < itemsz) {
/* it doesn't fit on an empty page -- give up */
elog(WARN, "hash item too large");
}
}
_hash_checkpage(page, LH_OVERFLOW_PAGE);
pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
Assert(pageopaque->hasho_bucket == bucket);
}
itup_off = _hash_pgaddtup(rel, buf, keysz, scankey, itemsz, hitem);
itup_blkno = BufferGetBlockNumber(buf);
/* by here, the new tuple is inserted */
res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData));
ItemPointerSet(&(res->pointerData), itup_blkno, itup_off);
if (res != NULL) {
/*
* Increment the number of keys in the table.
* We switch lock access type just for a moment
* to allow greater accessibility to the metapage.
*/
metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf,
HASH_READ, HASH_WRITE);
metap->hashm_nkeys += 1;
metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf,
HASH_WRITE, HASH_READ);
}
_hash_wrtbuf(rel, buf);
if (do_expand ||
(metap->hashm_nkeys / (metap->hashm_maxbucket + 1))
> metap->hashm_ffactor) {
_hash_expandtable(rel, metabuf);
}
_hash_relbuf(rel, metabuf, HASH_READ);
return (res);
}
/*
* _hash_pgaddtup() -- add a tuple to a particular page in the index.
*
* This routine adds the tuple to the page as requested, and keeps the
* write lock and reference associated with the page's buffer. It is
* an error to call pgaddtup() without a write lock and reference.
*/
static OffsetNumber
_hash_pgaddtup(Relation rel,
Buffer buf,
int keysz,
ScanKey itup_scankey,
Size itemsize,
HashItem hitem)
{
OffsetNumber itup_off;
Page page;
page = BufferGetPage(buf);
_hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
itup_off = OffsetNumberNext(PageGetMaxOffsetNumber(page));
(void) PageAddItem(page, (Item) hitem, itemsize, itup_off, LP_USED);
/* write the buffer, but hold our lock */
_hash_wrtnorelbuf(rel, buf);
return (itup_off);
}

View File

@ -0,0 +1,614 @@
/*-------------------------------------------------------------------------
*
* hashovfl.c--
* Overflow page management code for the Postgres hash access method
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashovfl.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $
*
* NOTES
* Overflow pages look like ordinary relation pages.
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "storage/bufmgr.h"
#include "storage/bufpage.h"
#include "utils/elog.h"
#include "utils/rel.h"
#include "utils/excid.h"
#include "access/genam.h"
#include "access/hash.h"
static OverflowPageAddress _hash_getovfladdr(Relation rel, Buffer *metabufp);
static uint32 _hash_firstfreebit(uint32 map);
/*
* _hash_addovflpage
*
* Add an overflow page to the page currently pointed to by the buffer
* argument 'buf'.
*
* *Metabufp has a read lock upon entering the function; buf has a
* write lock.
*
*/
Buffer
_hash_addovflpage(Relation rel, Buffer *metabufp, Buffer buf)
{
OverflowPageAddress oaddr;
BlockNumber ovflblkno;
Buffer ovflbuf;
HashMetaPage metap;
HashPageOpaque ovflopaque;
HashPageOpaque pageopaque;
Page page;
Page ovflpage;
/* this had better be the last page in a bucket chain */
page = BufferGetPage(buf);
_hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
Assert(!BlockNumberIsValid(pageopaque->hasho_nextblkno));
metap = (HashMetaPage) BufferGetPage(*metabufp);
_hash_checkpage((Page) metap, LH_META_PAGE);
/* allocate an empty overflow page */
oaddr = _hash_getovfladdr(rel, metabufp);
if (oaddr == InvalidOvflAddress) {
elog(WARN, "_hash_addovflpage: problem with _hash_getovfladdr.");
}
ovflblkno = OADDR_TO_BLKNO(OADDR_OF(SPLITNUM(oaddr), OPAGENUM(oaddr)));
Assert(BlockNumberIsValid(ovflblkno));
ovflbuf = _hash_getbuf(rel, ovflblkno, HASH_WRITE);
Assert(BufferIsValid(ovflbuf));
ovflpage = BufferGetPage(ovflbuf);
/* initialize the new overflow page */
_hash_pageinit(ovflpage, BufferGetPageSize(ovflbuf));
ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage);
ovflopaque->hasho_prevblkno = BufferGetBlockNumber(buf);
ovflopaque->hasho_nextblkno = InvalidBlockNumber;
ovflopaque->hasho_flag = LH_OVERFLOW_PAGE;
ovflopaque->hasho_oaddr = oaddr;
ovflopaque->hasho_bucket = pageopaque->hasho_bucket;
_hash_wrtnorelbuf(rel, ovflbuf);
/* logically chain overflow page to previous page */
pageopaque->hasho_nextblkno = ovflblkno;
_hash_wrtnorelbuf(rel, buf);
return (ovflbuf);
}
/*
* _hash_getovfladdr()
*
* Find an available overflow page and return its address.
*
* When we enter this function, we have a read lock on *metabufp which
* we change to a write lock immediately. Before exiting, the write lock
* is exchanged for a read lock.
*
*/
static OverflowPageAddress
_hash_getovfladdr(Relation rel, Buffer *metabufp)
{
HashMetaPage metap;
Buffer mapbuf;
BlockNumber blkno;
PageOffset offset;
OverflowPageAddress oaddr;
SplitNumber splitnum;
uint32 *freep;
uint32 max_free;
uint32 bit;
uint32 first_page;
uint32 free_bit;
uint32 free_page;
uint32 in_use_bits;
uint32 i, j;
metap = (HashMetaPage) _hash_chgbufaccess(rel, metabufp, HASH_READ, HASH_WRITE);
splitnum = metap->OVFL_POINT;
max_free = metap->SPARES[splitnum];
free_page = (max_free - 1) >> (metap->BSHIFT + BYTE_TO_BIT);
free_bit = (max_free - 1) & (BMPGSZ_BIT(metap) - 1);
/* Look through all the free maps to find the first free block */
first_page = metap->LAST_FREED >> (metap->BSHIFT + BYTE_TO_BIT);
for ( i = first_page; i <= free_page; i++ ) {
Page mappage;
blkno = metap->hashm_mapp[i];
mapbuf = _hash_getbuf(rel, blkno, HASH_WRITE);
mappage = BufferGetPage(mapbuf);
_hash_checkpage(mappage, LH_BITMAP_PAGE);
freep = HashPageGetBitmap(mappage);
Assert(freep);
if (i == free_page)
in_use_bits = free_bit;
else
in_use_bits = BMPGSZ_BIT(metap) - 1;
if (i == first_page) {
bit = metap->LAST_FREED & (BMPGSZ_BIT(metap) - 1);
j = bit / BITS_PER_MAP;
bit = bit & ~(BITS_PER_MAP - 1);
} else {
bit = 0;
j = 0;
}
for (; bit <= in_use_bits; j++, bit += BITS_PER_MAP)
if (freep[j] != ALL_SET)
goto found;
}
/* No Free Page Found - have to allocate a new page */
metap->LAST_FREED = metap->SPARES[splitnum];
metap->SPARES[splitnum]++;
offset = metap->SPARES[splitnum] -
(splitnum ? metap->SPARES[splitnum - 1] : 0);
#define OVMSG "HASH: Out of overflow pages. Out of luck.\n"
if (offset > SPLITMASK) {
if (++splitnum >= NCACHED) {
elog(WARN, OVMSG);
}
metap->OVFL_POINT = splitnum;
metap->SPARES[splitnum] = metap->SPARES[splitnum-1];
metap->SPARES[splitnum-1]--;
offset = 0;
}
/* Check if we need to allocate a new bitmap page */
if (free_bit == BMPGSZ_BIT(metap) - 1) {
/* won't be needing old map page */
_hash_relbuf(rel, mapbuf, HASH_WRITE);
free_page++;
if (free_page >= NCACHED) {
elog(WARN, OVMSG);
}
/*
* This is tricky. The 1 indicates that you want the new page
* allocated with 1 clear bit. Actually, you are going to
* allocate 2 pages from this map. The first is going to be
* the map page, the second is the overflow page we were
* looking for. The init_bitmap routine automatically, sets
* the first bit of itself to indicate that the bitmap itself
* is in use. We would explicitly set the second bit, but
* don't have to if we tell init_bitmap not to leave it clear
* in the first place.
*/
if (_hash_initbitmap(rel, metap, OADDR_OF(splitnum, offset),
1, free_page)) {
elog(WARN, "overflow_page: problem with _hash_initbitmap.");
}
metap->SPARES[splitnum]++;
offset++;
if (offset > SPLITMASK) {
if (++splitnum >= NCACHED) {
elog(WARN, OVMSG);
}
metap->OVFL_POINT = splitnum;
metap->SPARES[splitnum] = metap->SPARES[splitnum-1];
metap->SPARES[splitnum-1]--;
offset = 0;
}
} else {
/*
* Free_bit addresses the last used bit. Bump it to address
* the first available bit.
*/
free_bit++;
SETBIT(freep, free_bit);
_hash_wrtbuf(rel, mapbuf);
}
/* Calculate address of the new overflow page */
oaddr = OADDR_OF(splitnum, offset);
_hash_chgbufaccess(rel, metabufp, HASH_WRITE, HASH_READ);
return (oaddr);
found:
bit = bit + _hash_firstfreebit(freep[j]);
SETBIT(freep, bit);
_hash_wrtbuf(rel, mapbuf);
/*
* Bits are addressed starting with 0, but overflow pages are addressed
* beginning at 1. Bit is a bit addressnumber, so we need to increment
* it to convert it to a page number.
*/
bit = 1 + bit + (i * BMPGSZ_BIT(metap));
if (bit >= metap->LAST_FREED) {
metap->LAST_FREED = bit - 1;
}
/* Calculate the split number for this page */
for (i = 0; (i < splitnum) && (bit > metap->SPARES[i]); i++)
;
offset = (i ? bit - metap->SPARES[i - 1] : bit);
if (offset >= SPLITMASK) {
elog(WARN, OVMSG);
}
/* initialize this page */
oaddr = OADDR_OF(i, offset);
_hash_chgbufaccess(rel, metabufp, HASH_WRITE, HASH_READ);
return (oaddr);
}
/*
* _hash_firstfreebit()
*
* Return the first bit that is not set in the argument 'map'. This
* function is used to find an available overflow page within a
* splitnumber.
*
*/
static uint32
_hash_firstfreebit(uint32 map)
{
uint32 i, mask;
mask = 0x1;
for (i = 0; i < BITS_PER_MAP; i++) {
if (!(mask & map))
return (i);
mask = mask << 1;
}
return (i);
}
/*
* _hash_freeovflpage() -
*
* Mark this overflow page as free and return a buffer with
* the page that follows it (which may be defined as
* InvalidBuffer).
*
*/
Buffer
_hash_freeovflpage(Relation rel, Buffer ovflbuf)
{
HashMetaPage metap;
Buffer metabuf;
Buffer mapbuf;
BlockNumber prevblkno;
BlockNumber blkno;
BlockNumber nextblkno;
HashPageOpaque ovflopaque;
Page ovflpage;
Page mappage;
OverflowPageAddress addr;
SplitNumber splitnum;
uint32 *freep;
uint32 ovflpgno;
int32 bitmappage, bitmapbit;
Bucket bucket;
metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE);
metap = (HashMetaPage) BufferGetPage(metabuf);
_hash_checkpage((Page) metap, LH_META_PAGE);
ovflpage = BufferGetPage(ovflbuf);
_hash_checkpage(ovflpage, LH_OVERFLOW_PAGE);
ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage);
addr = ovflopaque->hasho_oaddr;
nextblkno = ovflopaque->hasho_nextblkno;
prevblkno = ovflopaque->hasho_prevblkno;
bucket = ovflopaque->hasho_bucket;
(void) memset(ovflpage, 0, BufferGetPageSize(ovflbuf));
_hash_wrtbuf(rel, ovflbuf);
/*
* fix up the bucket chain. this is a doubly-linked list, so we
* must fix up the bucket chain members behind and ahead of the
* overflow page being deleted.
*
* XXX this should look like:
* - lock prev/next
* - modify/write prev/next (how to do write ordering with a
* doubly-linked list???)
* - unlock prev/next
*/
if (BlockNumberIsValid(prevblkno)) {
Buffer prevbuf = _hash_getbuf(rel, prevblkno, HASH_WRITE);
Page prevpage = BufferGetPage(prevbuf);
HashPageOpaque prevopaque =
(HashPageOpaque) PageGetSpecialPointer(prevpage);
_hash_checkpage(prevpage, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
Assert(prevopaque->hasho_bucket == bucket);
prevopaque->hasho_nextblkno = nextblkno;
_hash_wrtbuf(rel, prevbuf);
}
if (BlockNumberIsValid(nextblkno)) {
Buffer nextbuf = _hash_getbuf(rel, nextblkno, HASH_WRITE);
Page nextpage = BufferGetPage(nextbuf);
HashPageOpaque nextopaque =
(HashPageOpaque) PageGetSpecialPointer(nextpage);
_hash_checkpage(nextpage, LH_OVERFLOW_PAGE);
Assert(nextopaque->hasho_bucket == bucket);
nextopaque->hasho_prevblkno = prevblkno;
_hash_wrtbuf(rel, nextbuf);
}
/*
* Fix up the overflow page bitmap that tracks this particular
* overflow page. The bitmap can be found in the MetaPageData
* array element hashm_mapp[bitmappage].
*/
splitnum = (addr >> SPLITSHIFT);
ovflpgno =
(splitnum ? metap->SPARES[splitnum - 1] : 0) + (addr & SPLITMASK) - 1;
if (ovflpgno < metap->LAST_FREED) {
metap->LAST_FREED = ovflpgno;
}
bitmappage = (ovflpgno >> (metap->BSHIFT + BYTE_TO_BIT));
bitmapbit = ovflpgno & (BMPGSZ_BIT(metap) - 1);
blkno = metap->hashm_mapp[bitmappage];
mapbuf = _hash_getbuf(rel, blkno, HASH_WRITE);
mappage = BufferGetPage(mapbuf);
_hash_checkpage(mappage, LH_BITMAP_PAGE);
freep = HashPageGetBitmap(mappage);
CLRBIT(freep, bitmapbit);
_hash_wrtbuf(rel, mapbuf);
_hash_relbuf(rel, metabuf, HASH_WRITE);
/*
* now instantiate the page that replaced this one,
* if it exists, and return that buffer with a write lock.
*/
if (BlockNumberIsValid(nextblkno)) {
return (_hash_getbuf(rel, nextblkno, HASH_WRITE));
} else {
return (InvalidBuffer);
}
}
/*
* _hash_initbitmap()
*
* Initialize a new bitmap page. The metapage has a write-lock upon
* entering the function.
*
* 'pnum' is the OverflowPageAddress of the new bitmap page.
* 'nbits' is how many bits to clear (i.e., make available) in the new
* bitmap page. the remainder of the bits (as well as the first bit,
* representing the bitmap page itself) will be set.
* 'ndx' is the 0-based offset of the new bitmap page within the
* metapage's array of bitmap page OverflowPageAddresses.
*/
#define INT_MASK ((1 << INT_TO_BIT) -1)
int32
_hash_initbitmap(Relation rel,
HashMetaPage metap,
int32 pnum,
int32 nbits,
int32 ndx)
{
Buffer buf;
BlockNumber blkno;
Page pg;
HashPageOpaque op;
uint32 *freep;
int clearbytes, clearints;
blkno = OADDR_TO_BLKNO(pnum);
buf = _hash_getbuf(rel, blkno, HASH_WRITE);
pg = BufferGetPage(buf);
_hash_pageinit(pg, BufferGetPageSize(buf));
op = (HashPageOpaque) PageGetSpecialPointer(pg);
op->hasho_oaddr = InvalidOvflAddress;
op->hasho_prevblkno = InvalidBlockNumber;
op->hasho_nextblkno = InvalidBlockNumber;
op->hasho_flag = LH_BITMAP_PAGE;
op->hasho_bucket = -1;
freep = HashPageGetBitmap(pg);
/* set all of the bits above 'nbits' to 1 */
clearints = ((nbits - 1) >> INT_TO_BIT) + 1;
clearbytes = clearints << INT_TO_BYTE;
(void) memset((char *) freep, 0, clearbytes);
(void) memset(((char *) freep) + clearbytes, 0xFF,
BMPGSZ_BYTE(metap) - clearbytes);
freep[clearints - 1] = ALL_SET << (nbits & INT_MASK);
/* bit 0 represents the new bitmap page */
SETBIT(freep, 0);
/* metapage already has a write lock */
metap->hashm_nmaps++;
metap->hashm_mapp[ndx] = blkno;
/* write out the new bitmap page (releasing its locks) */
_hash_wrtbuf(rel, buf);
return (0);
}
/*
* _hash_squeezebucket(rel, bucket)
*
* Try to squeeze the tuples onto pages occuring earlier in the
* bucket chain in an attempt to free overflow pages. When we start
* the "squeezing", the page from which we start taking tuples (the
* "read" page) is the last bucket in the bucket chain and the page
* onto which we start squeezing tuples (the "write" page) is the
* first page in the bucket chain. The read page works backward and
* the write page works forward; the procedure terminates when the
* read page and write page are the same page.
*/
void
_hash_squeezebucket(Relation rel,
HashMetaPage metap,
Bucket bucket)
{
Buffer wbuf;
Buffer rbuf;
BlockNumber wblkno;
BlockNumber rblkno;
Page wpage;
Page rpage;
HashPageOpaque wopaque;
HashPageOpaque ropaque;
OffsetNumber woffnum;
OffsetNumber roffnum;
HashItem hitem;
int itemsz;
/* elog(DEBUG, "_hash_squeezebucket: squeezing bucket %d", bucket); */
/*
* start squeezing into the base bucket page.
*/
wblkno = BUCKET_TO_BLKNO(bucket);
wbuf = _hash_getbuf(rel, wblkno, HASH_WRITE);
wpage = BufferGetPage(wbuf);
_hash_checkpage(wpage, LH_BUCKET_PAGE);
wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage);
/*
* if there aren't any overflow pages, there's nothing to squeeze.
*/
if (!BlockNumberIsValid(wopaque->hasho_nextblkno)) {
_hash_relbuf(rel, wbuf, HASH_WRITE);
return;
}
/*
* find the last page in the bucket chain by starting at the base
* bucket page and working forward.
*
* XXX if chains tend to be long, we should probably move forward
* using HASH_READ and then _hash_chgbufaccess to HASH_WRITE when
* we reach the end. if they are short we probably don't care
* very much. if the hash function is working at all, they had
* better be short..
*/
ropaque = wopaque;
do {
rblkno = ropaque->hasho_nextblkno;
if (ropaque != wopaque) {
_hash_relbuf(rel, rbuf, HASH_WRITE);
}
rbuf = _hash_getbuf(rel, rblkno, HASH_WRITE);
rpage = BufferGetPage(rbuf);
_hash_checkpage(rpage, LH_OVERFLOW_PAGE);
Assert(!PageIsEmpty(rpage));
ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage);
Assert(ropaque->hasho_bucket == bucket);
} while (BlockNumberIsValid(ropaque->hasho_nextblkno));
/*
* squeeze the tuples.
*/
roffnum = FirstOffsetNumber;
for(;;) {
hitem = (HashItem) PageGetItem(rpage, PageGetItemId(rpage, roffnum));
itemsz = IndexTupleDSize(hitem->hash_itup)
+ (sizeof(HashItemData) - sizeof(IndexTupleData));
itemsz = DOUBLEALIGN(itemsz);
/*
* walk up the bucket chain, looking for a page big enough for
* this item.
*/
while (PageGetFreeSpace(wpage) < itemsz) {
wblkno = wopaque->hasho_nextblkno;
_hash_wrtbuf(rel, wbuf);
if (!BlockNumberIsValid(wblkno) || (rblkno == wblkno)) {
_hash_wrtbuf(rel, rbuf);
/* wbuf is already released */
return;
}
wbuf = _hash_getbuf(rel, wblkno, HASH_WRITE);
wpage = BufferGetPage(wbuf);
_hash_checkpage(wpage, LH_OVERFLOW_PAGE);
Assert(!PageIsEmpty(wpage));
wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage);
Assert(wopaque->hasho_bucket == bucket);
}
/*
* if we're here, we have found room so insert on the "write"
* page.
*/
woffnum = OffsetNumberNext(PageGetMaxOffsetNumber(wpage));
(void) PageAddItem(wpage, (Item) hitem, itemsz, woffnum, LP_USED);
/*
* delete the tuple from the "read" page.
* PageIndexTupleDelete repacks the ItemId array, so 'roffnum'
* will be "advanced" to the "next" ItemId.
*/
PageIndexTupleDelete(rpage, roffnum);
_hash_wrtnorelbuf(rel, rbuf);
/*
* if the "read" page is now empty because of the deletion,
* free it.
*/
if (PageIsEmpty(rpage) && (ropaque->hasho_flag & LH_OVERFLOW_PAGE)) {
rblkno = ropaque->hasho_prevblkno;
Assert(BlockNumberIsValid(rblkno));
/*
* free this overflow page. the extra _hash_relbuf is
* because _hash_freeovflpage gratuitously returns the
* next page (we want the previous page and will get it
* ourselves later).
*/
rbuf = _hash_freeovflpage(rel, rbuf);
if (BufferIsValid(rbuf)) {
_hash_relbuf(rel, rbuf, HASH_WRITE);
}
if (rblkno == wblkno) {
/* rbuf is already released */
_hash_wrtbuf(rel, wbuf);
return;
}
rbuf = _hash_getbuf(rel, rblkno, HASH_WRITE);
rpage = BufferGetPage(rbuf);
_hash_checkpage(rpage, LH_OVERFLOW_PAGE);
Assert(!PageIsEmpty(rpage));
ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage);
Assert(ropaque->hasho_bucket == bucket);
roffnum = FirstOffsetNumber;
}
}
}

View File

@ -0,0 +1,669 @@
/*-------------------------------------------------------------------------
*
* hashpage.c--
* Hash table page management code for the Postgres hash access method
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashpage.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $
*
* NOTES
* Postgres hash pages look like ordinary relation pages. The opaque
* data at high addresses includes information about the page including
* whether a page is an overflow page or a true bucket, the block
* numbers of the preceding and following pages, and the overflow
* address of the page if it is an overflow page.
*
* The first page in a hash relation, page zero, is special -- it stores
* information describing the hash table; it is referred to as teh
* "meta page." Pages one and higher store the actual data.
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "storage/bufmgr.h"
#include "storage/bufpage.h"
#include "utils/elog.h"
#include "utils/rel.h"
#include "utils/excid.h"
#include "access/genam.h"
#include "access/hash.h"
static void _hash_setpagelock(Relation rel, BlockNumber blkno, int access);
static void _hash_unsetpagelock(Relation rel, BlockNumber blkno, int access);
static void _hash_splitpage(Relation rel, Buffer metabuf, Bucket obucket, Bucket nbucket);
/*
* We use high-concurrency locking on hash indices. There are two cases in
* which we don't do locking. One is when we're building the index.
* Since the creating transaction has not committed, no one can see
* the index, and there's no reason to share locks. The second case
* is when we're just starting up the database system. We use some
* special-purpose initialization code in the relation cache manager
* (see utils/cache/relcache.c) to allow us to do indexed scans on
* the system catalogs before we'd normally be able to. This happens
* before the lock table is fully initialized, so we can't use it.
* Strictly speaking, this violates 2pl, but we don't do 2pl on the
* system catalogs anyway.
*/
#define USELOCKING (!BuildingHash && !IsInitProcessingMode())
/*
* _hash_metapinit() -- Initialize the metadata page of a hash index,
* the two buckets that we begin with and the initial
* bitmap page.
*/
void
_hash_metapinit(Relation rel)
{
HashMetaPage metap;
HashPageOpaque pageopaque;
Buffer metabuf;
Buffer buf;
Page pg;
int nbuckets;
uint32 nelem; /* number elements */
uint32 lg2nelem; /* _hash_log2(nelem) */
uint32 nblocks;
uint16 i;
/* can't be sharing this with anyone, now... */
if (USELOCKING)
RelationSetLockForWrite(rel);
if ((nblocks = RelationGetNumberOfBlocks(rel)) != 0) {
elog(WARN, "Cannot initialize non-empty hash table %s",
RelationGetRelationName(rel));
}
metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE);
pg = BufferGetPage(metabuf);
metap = (HashMetaPage) pg;
_hash_pageinit(pg, BufferGetPageSize(metabuf));
metap->hashm_magic = HASH_MAGIC;
metap->hashm_version = HASH_VERSION;
metap->hashm_nkeys = 0;
metap->hashm_nmaps = 0;
metap->hashm_ffactor = DEFAULT_FFACTOR;
metap->hashm_bsize = BufferGetPageSize(metabuf);
metap->hashm_bshift = _hash_log2(metap->hashm_bsize);
for (i = metap->hashm_bshift; i > 0; --i) {
if ((1 << i) < (metap->hashm_bsize -
(DOUBLEALIGN(sizeof(PageHeaderData)) +
DOUBLEALIGN(sizeof(HashPageOpaqueData))))) {
break;
}
}
Assert(i);
metap->hashm_bmsize = 1 << i;
metap->hashm_procid = index_getprocid(rel, 1, HASHPROC);
/*
* Make nelem = 2 rather than 0 so that we end up allocating space
* for the next greater power of two number of buckets.
*/
nelem = 2;
lg2nelem = 1; /*_hash_log2(MAX(nelem, 2)) */
nbuckets = 2; /*1 << lg2nelem */
memset((char *) metap->hashm_spares, 0, sizeof(metap->hashm_spares));
memset((char *) metap->hashm_mapp, 0, sizeof(metap->hashm_mapp));
metap->hashm_spares[lg2nelem] = 2; /* lg2nelem + 1 */
metap->hashm_spares[lg2nelem + 1] = 2; /* lg2nelem + 1 */
metap->hashm_ovflpoint = 1; /* lg2nelem */
metap->hashm_lastfreed = 2;
metap->hashm_maxbucket = metap->hashm_lowmask = 1; /* nbuckets - 1 */
metap->hashm_highmask = 3; /* (nbuckets << 1) - 1 */
pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
pageopaque->hasho_oaddr = InvalidOvflAddress;
pageopaque->hasho_prevblkno = InvalidBlockNumber;
pageopaque->hasho_nextblkno = InvalidBlockNumber;
pageopaque->hasho_flag = LH_META_PAGE;
pageopaque->hasho_bucket = -1;
/*
* First bitmap page is at: splitpoint lg2nelem page offset 1 which
* turns out to be page 3. Couldn't initialize page 3 until we created
* the first two buckets above.
*/
if (_hash_initbitmap(rel, metap, OADDR_OF(lg2nelem, 1), lg2nelem + 1, 0))
elog(WARN, "Problem with _hash_initbitmap.");
/* all done */
_hash_wrtnorelbuf(rel, metabuf);
/*
* initialize the first two buckets
*/
for (i = 0; i <= 1; i++) {
buf = _hash_getbuf(rel, BUCKET_TO_BLKNO(i), HASH_WRITE);
pg = BufferGetPage(buf);
_hash_pageinit(pg, BufferGetPageSize(buf));
pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
pageopaque->hasho_oaddr = InvalidOvflAddress;
pageopaque->hasho_prevblkno = InvalidBlockNumber;
pageopaque->hasho_nextblkno = InvalidBlockNumber;
pageopaque->hasho_flag = LH_BUCKET_PAGE;
pageopaque->hasho_bucket = i;
_hash_wrtbuf(rel, buf);
}
_hash_relbuf(rel, metabuf, HASH_WRITE);
if (USELOCKING)
RelationUnsetLockForWrite(rel);
}
/*
* _hash_getbuf() -- Get a buffer by block number for read or write.
*
* When this routine returns, the appropriate lock is set on the
* requested buffer its reference count is correct.
*
* XXX P_NEW is not used because, unlike the tree structures, we
* need the bucket blocks to be at certain block numbers. we must
* depend on the caller to call _hash_pageinit on the block if it
* knows that this is a new block.
*/
Buffer
_hash_getbuf(Relation rel, BlockNumber blkno, int access)
{
Buffer buf;
if (blkno == P_NEW) {
elog(WARN, "_hash_getbuf: internal error: hash AM does not use P_NEW");
}
switch (access) {
case HASH_WRITE:
case HASH_READ:
_hash_setpagelock(rel, blkno, access);
break;
default:
elog(WARN, "_hash_getbuf: invalid access (%d) on new blk: %.*s",
access, NAMEDATALEN, RelationGetRelationName(rel));
break;
}
buf = ReadBuffer(rel, blkno);
/* ref count and lock type are correct */
return (buf);
}
/*
* _hash_relbuf() -- release a locked buffer.
*/
void
_hash_relbuf(Relation rel, Buffer buf, int access)
{
BlockNumber blkno;
blkno = BufferGetBlockNumber(buf);
switch (access) {
case HASH_WRITE:
case HASH_READ:
_hash_unsetpagelock(rel, blkno, access);
break;
default:
elog(WARN, "_hash_relbuf: invalid access (%d) on blk %x: %.*s",
access, blkno, NAMEDATALEN, RelationGetRelationName(rel));
}
ReleaseBuffer(buf);
}
/*
* _hash_wrtbuf() -- write a hash page to disk.
*
* This routine releases the lock held on the buffer and our reference
* to it. It is an error to call _hash_wrtbuf() without a write lock
* or a reference to the buffer.
*/
void
_hash_wrtbuf(Relation rel, Buffer buf)
{
BlockNumber blkno;
blkno = BufferGetBlockNumber(buf);
WriteBuffer(buf);
_hash_unsetpagelock(rel, blkno, HASH_WRITE);
}
/*
* _hash_wrtnorelbuf() -- write a hash page to disk, but do not release
* our reference or lock.
*
* It is an error to call _hash_wrtnorelbuf() without a write lock
* or a reference to the buffer.
*/
void
_hash_wrtnorelbuf(Relation rel, Buffer buf)
{
BlockNumber blkno;
blkno = BufferGetBlockNumber(buf);
WriteNoReleaseBuffer(buf);
}
Page
_hash_chgbufaccess(Relation rel,
Buffer *bufp,
int from_access,
int to_access)
{
BlockNumber blkno;
blkno = BufferGetBlockNumber(*bufp);
switch (from_access) {
case HASH_WRITE:
_hash_wrtbuf(rel, *bufp);
break;
case HASH_READ:
_hash_relbuf(rel, *bufp, from_access);
break;
default:
elog(WARN, "_hash_chgbufaccess: invalid access (%d) on blk %x: %.*s",
from_access, blkno, NAMEDATALEN, RelationGetRelationName(rel));
break;
}
*bufp = _hash_getbuf(rel, blkno, to_access);
return (BufferGetPage(*bufp));
}
/*
* _hash_pageinit() -- Initialize a new page.
*/
void
_hash_pageinit(Page page, Size size)
{
Assert(((PageHeader) page)->pd_lower == 0);
Assert(((PageHeader) page)->pd_upper == 0);
Assert(((PageHeader) page)->pd_special == 0);
/*
* Cargo-cult programming -- don't really need this to be zero, but
* creating new pages is an infrequent occurrence and it makes me feel
* good when I know they're empty.
*/
memset(page, 0, size);
PageInit(page, size, sizeof(HashPageOpaqueData));
}
static void
_hash_setpagelock(Relation rel,
BlockNumber blkno,
int access)
{
ItemPointerData iptr;
if (USELOCKING) {
ItemPointerSet(&iptr, blkno, 1);
switch (access) {
case HASH_WRITE:
RelationSetSingleWLockPage(rel, &iptr);
break;
case HASH_READ:
RelationSetSingleRLockPage(rel, &iptr);
break;
default:
elog(WARN, "_hash_setpagelock: invalid access (%d) on blk %x: %.*s",
access, blkno, NAMEDATALEN, RelationGetRelationName(rel));
break;
}
}
}
static void
_hash_unsetpagelock(Relation rel,
BlockNumber blkno,
int access)
{
ItemPointerData iptr;
if (USELOCKING) {
ItemPointerSet(&iptr, blkno, 1);
switch (access) {
case HASH_WRITE:
RelationUnsetSingleWLockPage(rel, &iptr);
break;
case HASH_READ:
RelationUnsetSingleRLockPage(rel, &iptr);
break;
default:
elog(WARN, "_hash_unsetpagelock: invalid access (%d) on blk %x: %.*s",
access, blkno, NAMEDATALEN, RelationGetRelationName(rel));
break;
}
}
}
void
_hash_pagedel(Relation rel, ItemPointer tid)
{
Buffer buf;
Buffer metabuf;
Page page;
BlockNumber blkno;
OffsetNumber offno;
HashMetaPage metap;
HashPageOpaque opaque;
blkno = ItemPointerGetBlockNumber(tid);
offno = ItemPointerGetOffsetNumber(tid);
buf = _hash_getbuf(rel, blkno, HASH_WRITE);
page = BufferGetPage(buf);
_hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
opaque = (HashPageOpaque) PageGetSpecialPointer(page);
PageIndexTupleDelete(page, offno);
_hash_wrtnorelbuf(rel, buf);
if (PageIsEmpty(page) && (opaque->hasho_flag & LH_OVERFLOW_PAGE)) {
buf = _hash_freeovflpage(rel, buf);
if (BufferIsValid(buf)) {
_hash_relbuf(rel, buf, HASH_WRITE);
}
} else {
_hash_relbuf(rel, buf, HASH_WRITE);
}
metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE);
metap = (HashMetaPage) BufferGetPage(metabuf);
_hash_checkpage((Page) metap, LH_META_PAGE);
++metap->hashm_nkeys;
_hash_wrtbuf(rel, metabuf);
}
void
_hash_expandtable(Relation rel, Buffer metabuf)
{
HashMetaPage metap;
Bucket old_bucket;
Bucket new_bucket;
uint32 spare_ndx;
/* elog(DEBUG, "_hash_expandtable: expanding..."); */
metap = (HashMetaPage) BufferGetPage(metabuf);
_hash_checkpage((Page) metap, LH_META_PAGE);
metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE);
new_bucket = ++metap->MAX_BUCKET;
metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ);
old_bucket = (metap->MAX_BUCKET & metap->LOW_MASK);
/*
* If the split point is increasing (MAX_BUCKET's log base 2
* * increases), we need to copy the current contents of the spare
* split bucket to the next bucket.
*/
spare_ndx = _hash_log2(metap->MAX_BUCKET + 1);
if (spare_ndx > metap->OVFL_POINT) {
metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE);
metap->SPARES[spare_ndx] = metap->SPARES[metap->OVFL_POINT];
metap->OVFL_POINT = spare_ndx;
metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ);
}
if (new_bucket > metap->HIGH_MASK) {
/* Starting a new doubling */
metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE);
metap->LOW_MASK = metap->HIGH_MASK;
metap->HIGH_MASK = new_bucket | metap->LOW_MASK;
metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ);
}
/* Relocate records to the new bucket */
_hash_splitpage(rel, metabuf, old_bucket, new_bucket);
}
/*
* _hash_splitpage -- split 'obucket' into 'obucket' and 'nbucket'
*
* this routine is actually misnamed -- we are splitting a bucket that
* consists of a base bucket page and zero or more overflow (bucket
* chain) pages.
*/
static void
_hash_splitpage(Relation rel,
Buffer metabuf,
Bucket obucket,
Bucket nbucket)
{
Bucket bucket;
Buffer obuf;
Buffer nbuf;
Buffer ovflbuf;
BlockNumber oblkno;
BlockNumber nblkno;
bool null;
Datum datum;
HashItem hitem;
HashPageOpaque oopaque;
HashPageOpaque nopaque;
HashMetaPage metap;
IndexTuple itup;
int itemsz;
OffsetNumber ooffnum;
OffsetNumber noffnum;
OffsetNumber omaxoffnum;
Page opage;
Page npage;
TupleDesc itupdesc;
/* elog(DEBUG, "_hash_splitpage: splitting %d into %d,%d",
obucket, obucket, nbucket);
*/
metap = (HashMetaPage) BufferGetPage(metabuf);
_hash_checkpage((Page) metap, LH_META_PAGE);
/* get the buffers & pages */
oblkno = BUCKET_TO_BLKNO(obucket);
nblkno = BUCKET_TO_BLKNO(nbucket);
obuf = _hash_getbuf(rel, oblkno, HASH_WRITE);
nbuf = _hash_getbuf(rel, nblkno, HASH_WRITE);
opage = BufferGetPage(obuf);
npage = BufferGetPage(nbuf);
/* initialize the new bucket */
_hash_pageinit(npage, BufferGetPageSize(nbuf));
nopaque = (HashPageOpaque) PageGetSpecialPointer(npage);
nopaque->hasho_prevblkno = InvalidBlockNumber;
nopaque->hasho_nextblkno = InvalidBlockNumber;
nopaque->hasho_flag = LH_BUCKET_PAGE;
nopaque->hasho_oaddr = InvalidOvflAddress;
nopaque->hasho_bucket = nbucket;
_hash_wrtnorelbuf(rel, nbuf);
/*
* make sure the old bucket isn't empty. advance 'opage' and
* friends through the overflow bucket chain until we find a
* non-empty page.
*
* XXX we should only need this once, if we are careful to
* preserve the invariant that overflow pages are never empty.
*/
_hash_checkpage(opage, LH_BUCKET_PAGE);
oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
if (PageIsEmpty(opage)) {
oblkno = oopaque->hasho_nextblkno;
_hash_relbuf(rel, obuf, HASH_WRITE);
if (!BlockNumberIsValid(oblkno)) {
/*
* the old bucket is completely empty; of course, the new
* bucket will be as well, but since it's a base bucket
* page we don't care.
*/
_hash_relbuf(rel, nbuf, HASH_WRITE);
return;
}
obuf = _hash_getbuf(rel, oblkno, HASH_WRITE);
opage = BufferGetPage(obuf);
_hash_checkpage(opage, LH_OVERFLOW_PAGE);
if (PageIsEmpty(opage)) {
elog(WARN, "_hash_splitpage: empty overflow page %d", oblkno);
}
oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
}
/*
* we are now guaranteed that 'opage' is not empty. partition the
* tuples in the old bucket between the old bucket and the new
* bucket, advancing along their respective overflow bucket chains
* and adding overflow pages as needed.
*/
ooffnum = FirstOffsetNumber;
omaxoffnum = PageGetMaxOffsetNumber(opage);
for (;;) {
/*
* at each iteration through this loop, each of these variables
* should be up-to-date: obuf opage oopaque ooffnum omaxoffnum
*/
/* check if we're at the end of the page */
if (ooffnum > omaxoffnum) {
/* at end of page, but check for overflow page */
oblkno = oopaque->hasho_nextblkno;
if (BlockNumberIsValid(oblkno)) {
/*
* we ran out of tuples on this particular page, but
* we have more overflow pages; re-init values.
*/
_hash_wrtbuf(rel, obuf);
obuf = _hash_getbuf(rel, oblkno, HASH_WRITE);
opage = BufferGetPage(obuf);
_hash_checkpage(opage, LH_OVERFLOW_PAGE);
oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
/* we're guaranteed that an ovfl page has at least 1 tuple */
if (PageIsEmpty(opage)) {
elog(WARN, "_hash_splitpage: empty ovfl page %d!",
oblkno);
}
ooffnum = FirstOffsetNumber;
omaxoffnum = PageGetMaxOffsetNumber(opage);
} else {
/*
* we're at the end of the bucket chain, so now we're
* really done with everything. before quitting, call
* _hash_squeezebucket to ensure the tuples in the
* bucket (including the overflow pages) are packed as
* tightly as possible.
*/
_hash_wrtbuf(rel, obuf);
_hash_wrtbuf(rel, nbuf);
_hash_squeezebucket(rel, metap, obucket);
return;
}
}
/* hash on the tuple */
hitem = (HashItem) PageGetItem(opage, PageGetItemId(opage, ooffnum));
itup = &(hitem->hash_itup);
itupdesc = RelationGetTupleDescriptor(rel);
datum = index_getattr(itup, 1, itupdesc, &null);
bucket = _hash_call(rel, metap, datum);
if (bucket == nbucket) {
/*
* insert the tuple into the new bucket. if it doesn't
* fit on the current page in the new bucket, we must
* allocate a new overflow page and place the tuple on
* that page instead.
*/
itemsz = IndexTupleDSize(hitem->hash_itup)
+ (sizeof(HashItemData) - sizeof(IndexTupleData));
itemsz = DOUBLEALIGN(itemsz);
if (PageGetFreeSpace(npage) < itemsz) {
ovflbuf = _hash_addovflpage(rel, &metabuf, nbuf);
_hash_wrtbuf(rel, nbuf);
nbuf = ovflbuf;
npage = BufferGetPage(nbuf);
_hash_checkpage(npage, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
}
noffnum = OffsetNumberNext(PageGetMaxOffsetNumber(npage));
(void) PageAddItem(npage, (Item) hitem, itemsz, noffnum, LP_USED);
_hash_wrtnorelbuf(rel, nbuf);
/*
* now delete the tuple from the old bucket. after this
* section of code, 'ooffnum' will actually point to the
* ItemId to which we would point if we had advanced it
* before the deletion (PageIndexTupleDelete repacks the
* ItemId array). this also means that 'omaxoffnum' is
* exactly one less than it used to be, so we really can
* just decrement it instead of calling
* PageGetMaxOffsetNumber.
*/
PageIndexTupleDelete(opage, ooffnum);
_hash_wrtnorelbuf(rel, obuf);
omaxoffnum = OffsetNumberPrev(omaxoffnum);
/*
* tidy up. if the old page was an overflow page and it
* is now empty, we must free it (we want to preserve the
* invariant that overflow pages cannot be empty).
*/
if (PageIsEmpty(opage) &&
(oopaque->hasho_flag & LH_OVERFLOW_PAGE)) {
obuf = _hash_freeovflpage(rel, obuf);
/* check that we're not through the bucket chain */
if (BufferIsInvalid(obuf)) {
_hash_wrtbuf(rel, nbuf);
_hash_squeezebucket(rel, metap, obucket);
return;
}
/*
* re-init. again, we're guaranteed that an ovfl page
* has at least one tuple.
*/
opage = BufferGetPage(obuf);
_hash_checkpage(opage, LH_OVERFLOW_PAGE);
oblkno = BufferGetBlockNumber(obuf);
oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
if (PageIsEmpty(opage)) {
elog(WARN, "_hash_splitpage: empty overflow page %d",
oblkno);
}
ooffnum = FirstOffsetNumber;
omaxoffnum = PageGetMaxOffsetNumber(opage);
}
} else {
/*
* the tuple stays on this page. we didn't move anything,
* so we didn't delete anything and therefore we don't
* have to change 'omaxoffnum'.
*
* XXX any hash value from [0, nbucket-1] will map to this
* bucket, which doesn't make sense to me.
*/
ooffnum = OffsetNumberNext(ooffnum);
}
}
/*NOTREACHED*/
}

View File

@ -0,0 +1,172 @@
/*-------------------------------------------------------------------------
*
* hashscan.c--
* manage scans on hash tables
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashscan.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $
*
* NOTES
* Because we can be doing an index scan on a relation while we
* update it, we need to avoid missing data that moves around in
* the index. The routines and global variables in this file
* guarantee that all scans in the local address space stay
* correctly positioned. This is all we need to worry about, since
* write locking guarantees that no one else will be on the same
* page at the same time as we are.
*
* The scheme is to manage a list of active scans in the current
* backend. Whenever we add or remove records from an index, we
* check the list of active scans to see if any has been affected.
* A scan is affected only if it is on the same relation, and the
* same page, as the update.
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "storage/bufmgr.h"
#include "storage/bufpage.h"
#include "utils/elog.h"
#include "utils/palloc.h"
#include "utils/rel.h"
#include "utils/excid.h"
#include "access/heapam.h"
#include "access/genam.h"
#include "access/sdir.h"
#include "access/hash.h"
static void _hash_scandel(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno);
static bool _hash_scantouched(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno);
typedef struct HashScanListData {
IndexScanDesc hashsl_scan;
struct HashScanListData *hashsl_next;
} HashScanListData;
typedef HashScanListData *HashScanList;
static HashScanList HashScans = (HashScanList) NULL;
/*
* _Hash_regscan() -- register a new scan.
*/
void
_hash_regscan(IndexScanDesc scan)
{
HashScanList new_el;
new_el = (HashScanList) palloc(sizeof(HashScanListData));
new_el->hashsl_scan = scan;
new_el->hashsl_next = HashScans;
HashScans = new_el;
}
/*
* _hash_dropscan() -- drop a scan from the scan list
*/
void
_hash_dropscan(IndexScanDesc scan)
{
HashScanList chk, last;
last = (HashScanList) NULL;
for (chk = HashScans;
chk != (HashScanList) NULL && chk->hashsl_scan != scan;
chk = chk->hashsl_next) {
last = chk;
}
if (chk == (HashScanList) NULL)
elog(WARN, "hash scan list trashed; can't find 0x%lx", scan);
if (last == (HashScanList) NULL)
HashScans = chk->hashsl_next;
else
last->hashsl_next = chk->hashsl_next;
#ifdef PERFECT_MEM
pfree (chk);
#endif /* PERFECT_MEM */
}
void
_hash_adjscans(Relation rel, ItemPointer tid)
{
HashScanList l;
Oid relid;
relid = rel->rd_id;
for (l = HashScans; l != (HashScanList) NULL; l = l->hashsl_next) {
if (relid == l->hashsl_scan->relation->rd_id)
_hash_scandel(l->hashsl_scan, ItemPointerGetBlockNumber(tid),
ItemPointerGetOffsetNumber(tid));
}
}
static void
_hash_scandel(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno)
{
ItemPointer current;
Buffer buf;
Buffer metabuf;
HashScanOpaque so;
if (!_hash_scantouched(scan, blkno, offno))
return;
metabuf = _hash_getbuf(scan->relation, HASH_METAPAGE, HASH_READ);
so = (HashScanOpaque) scan->opaque;
buf = so->hashso_curbuf;
current = &(scan->currentItemData);
if (ItemPointerIsValid(current)
&& ItemPointerGetBlockNumber(current) == blkno
&& ItemPointerGetOffsetNumber(current) >= offno) {
_hash_step(scan, &buf, BackwardScanDirection, metabuf);
so->hashso_curbuf = buf;
}
current = &(scan->currentMarkData);
if (ItemPointerIsValid(current)
&& ItemPointerGetBlockNumber(current) == blkno
&& ItemPointerGetOffsetNumber(current) >= offno) {
ItemPointerData tmp;
tmp = *current;
*current = scan->currentItemData;
scan->currentItemData = tmp;
_hash_step(scan, &buf, BackwardScanDirection, metabuf);
so->hashso_mrkbuf = buf;
tmp = *current;
*current = scan->currentItemData;
scan->currentItemData = tmp;
}
}
static bool
_hash_scantouched(IndexScanDesc scan,
BlockNumber blkno,
OffsetNumber offno)
{
ItemPointer current;
current = &(scan->currentItemData);
if (ItemPointerIsValid(current)
&& ItemPointerGetBlockNumber(current) == blkno
&& ItemPointerGetOffsetNumber(current) >= offno)
return (true);
current = &(scan->currentMarkData);
if (ItemPointerIsValid(current)
&& ItemPointerGetBlockNumber(current) == blkno
&& ItemPointerGetOffsetNumber(current) >= offno)
return (true);
return (false);
}

View File

@ -0,0 +1,425 @@
/*-------------------------------------------------------------------------
*
* hashsearch.c--
* search code for postgres hash tables
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashsearch.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "storage/bufmgr.h"
#include "storage/bufpage.h"
#include "utils/elog.h"
#include "utils/palloc.h"
#include "utils/rel.h"
#include "utils/excid.h"
#include "fmgr.h"
#include "access/heapam.h"
#include "access/genam.h"
#include "access/skey.h"
#include "access/sdir.h"
#include "access/hash.h"
/*
* _hash_search() -- Finds the page/bucket that the contains the
* scankey and loads it into *bufP. the buffer has a read lock.
*/
void
_hash_search(Relation rel,
int keysz,
ScanKey scankey,
Buffer *bufP,
HashMetaPage metap)
{
BlockNumber blkno;
Datum keyDatum;
Bucket bucket;
if (scankey == (ScanKey) NULL ||
(keyDatum = scankey[0].sk_argument) == (Datum) NULL) {
/*
* If the scankey argument is NULL, all tuples will satisfy
* the scan so we start the scan at the first bucket (bucket
* 0).
*/
bucket = 0;
} else {
bucket = _hash_call(rel, metap, keyDatum);
}
blkno = BUCKET_TO_BLKNO(bucket);
*bufP = _hash_getbuf(rel, blkno, HASH_READ);
}
/*
* _hash_next() -- Get the next item in a scan.
*
* On entry, we have a valid currentItemData in the scan, and a
* read lock on the page that contains that item. We do not have
* the page pinned. We return the next item in the scan. On
* exit, we have the page containing the next item locked but not
* pinned.
*/
RetrieveIndexResult
_hash_next(IndexScanDesc scan, ScanDirection dir)
{
Relation rel;
Buffer buf;
Buffer metabuf;
Page page;
OffsetNumber offnum;
RetrieveIndexResult res;
ItemPointer current;
ItemPointer iptr;
HashItem hitem;
IndexTuple itup;
HashScanOpaque so;
rel = scan->relation;
so = (HashScanOpaque) scan->opaque;
current = &(scan->currentItemData);
metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ);
/*
* XXX 10 may 91: somewhere there's a bug in our management of the
* cached buffer for this scan. wei discovered it. the following
* is a workaround so he can work until i figure out what's going on.
*/
if (!BufferIsValid(so->hashso_curbuf)) {
so->hashso_curbuf = _hash_getbuf(rel,
ItemPointerGetBlockNumber(current),
HASH_READ);
}
/* we still have the buffer pinned and locked */
buf = so->hashso_curbuf;
/*
* step to next valid tuple. note that _hash_step releases our
* lock on 'metabuf'; if we switch to a new 'buf' while looking
* for the next tuple, we come back with a lock on that buffer.
*/
if (!_hash_step(scan, &buf, dir, metabuf)) {
return ((RetrieveIndexResult) NULL);
}
/* if we're here, _hash_step found a valid tuple */
current = &(scan->currentItemData);
offnum = ItemPointerGetOffsetNumber(current);
page = BufferGetPage(buf);
_hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
hitem = (HashItem) PageGetItem(page, PageGetItemId(page, offnum));
itup = &hitem->hash_itup;
iptr = (ItemPointer) palloc(sizeof(ItemPointerData));
memmove((char *) iptr, (char *) &(itup->t_tid), sizeof(ItemPointerData));
res = FormRetrieveIndexResult(current, iptr);
return (res);
}
static void
_hash_readnext(Relation rel,
Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
{
BlockNumber blkno;
blkno = (*opaquep)->hasho_nextblkno;
_hash_relbuf(rel, *bufp, HASH_READ);
*bufp = InvalidBuffer;
if (BlockNumberIsValid(blkno)) {
*bufp = _hash_getbuf(rel, blkno, HASH_READ);
*pagep = BufferGetPage(*bufp);
_hash_checkpage(*pagep, LH_OVERFLOW_PAGE);
*opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
Assert(!PageIsEmpty(*pagep));
}
}
static void
_hash_readprev(Relation rel,
Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
{
BlockNumber blkno;
blkno = (*opaquep)->hasho_prevblkno;
_hash_relbuf(rel, *bufp, HASH_READ);
*bufp = InvalidBuffer;
if (BlockNumberIsValid(blkno)) {
*bufp = _hash_getbuf(rel, blkno, HASH_READ);
*pagep = BufferGetPage(*bufp);
_hash_checkpage(*pagep, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
*opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
if (PageIsEmpty(*pagep)) {
Assert((*opaquep)->hasho_flag & LH_BUCKET_PAGE);
_hash_relbuf(rel, *bufp, HASH_READ);
*bufp = InvalidBuffer;
}
}
}
/*
* _hash_first() -- Find the first item in a scan.
*
* Return the RetrieveIndexResult of the first item in the tree that
* satisfies the qualificatin associated with the scan descriptor. On
* exit, the page containing the current index tuple is read locked
* and pinned, and the scan's opaque data entry is updated to
* include the buffer.
*/
RetrieveIndexResult
_hash_first(IndexScanDesc scan, ScanDirection dir)
{
Relation rel;
Buffer buf;
Buffer metabuf;
Page page;
HashPageOpaque opaque;
HashMetaPage metap;
HashItem hitem;
IndexTuple itup;
ItemPointer current;
ItemPointer iptr;
OffsetNumber offnum;
RetrieveIndexResult res;
HashScanOpaque so;
rel = scan->relation;
so = (HashScanOpaque) scan->opaque;
current = &(scan->currentItemData);
metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ);
metap = (HashMetaPage) BufferGetPage(metabuf);
_hash_checkpage((Page) metap, LH_META_PAGE);
/*
* XXX -- The attribute number stored in the scan key is the attno
* in the heap relation. We need to transmogrify this into
* the index relation attno here. For the moment, we have
* hardwired attno == 1.
*/
/* find the correct bucket page and load it into buf */
_hash_search(rel, 1, scan->keyData, &buf, metap);
page = BufferGetPage(buf);
_hash_checkpage(page, LH_BUCKET_PAGE);
opaque = (HashPageOpaque) PageGetSpecialPointer(page);
/*
* if we are scanning forward, we need to find the first non-empty
* page (if any) in the bucket chain. since overflow pages are
* never empty, this had better be either the bucket page or the
* first overflow page.
*
* if we are scanning backward, we always go all the way to the
* end of the bucket chain.
*/
if (PageIsEmpty(page)) {
if (BlockNumberIsValid(opaque->hasho_nextblkno)) {
_hash_readnext(rel, &buf, &page, &opaque);
} else {
ItemPointerSetInvalid(current);
so->hashso_curbuf = InvalidBuffer;
return ((RetrieveIndexResult) NULL);
}
}
if (ScanDirectionIsBackward(dir)) {
while (BlockNumberIsValid(opaque->hasho_nextblkno)) {
_hash_readnext(rel, &buf, &page, &opaque);
}
}
if (!_hash_step(scan, &buf, dir, metabuf)) {
return ((RetrieveIndexResult) NULL);
}
/* if we're here, _hash_step found a valid tuple */
current = &(scan->currentItemData);
offnum = ItemPointerGetOffsetNumber(current);
page = BufferGetPage(buf);
_hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
hitem = (HashItem) PageGetItem(page, PageGetItemId(page, offnum));
itup = &hitem->hash_itup;
iptr = (ItemPointer) palloc(sizeof(ItemPointerData));
memmove((char *) iptr, (char *) &(itup->t_tid), sizeof(ItemPointerData));
res = FormRetrieveIndexResult(current, iptr);
return (res);
}
/*
* _hash_step() -- step to the next valid item in a scan in the bucket.
*
* If no valid record exists in the requested direction, return
* false. Else, return true and set the CurrentItemData for the
* scan to the right thing.
*
* 'bufP' points to the buffer which contains the current page
* that we'll step through.
*
* 'metabuf' is released when this returns.
*/
bool
_hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir, Buffer metabuf)
{
Relation rel;
ItemPointer current;
HashScanOpaque so;
int allbuckets;
HashMetaPage metap;
Buffer buf;
Page page;
HashPageOpaque opaque;
OffsetNumber maxoff;
OffsetNumber offnum;
Bucket bucket;
BlockNumber blkno;
HashItem hitem;
IndexTuple itup;
rel = scan->relation;
current = &(scan->currentItemData);
so = (HashScanOpaque) scan->opaque;
allbuckets = (scan->numberOfKeys < 1);
metap = (HashMetaPage) BufferGetPage(metabuf);
_hash_checkpage((Page) metap, LH_META_PAGE);
buf = *bufP;
page = BufferGetPage(buf);
_hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
opaque = (HashPageOpaque) PageGetSpecialPointer(page);
/*
* If _hash_step is called from _hash_first, current will not be
* valid, so we can't dereference it. However, in that case, we
* presumably want to start at the beginning/end of the page...
*/
maxoff = PageGetMaxOffsetNumber(page);
if (ItemPointerIsValid(current)) {
offnum = ItemPointerGetOffsetNumber(current);
} else {
offnum = InvalidOffsetNumber;
}
/*
* 'offnum' now points to the last tuple we have seen (if any).
*
* continue to step through tuples until:
* 1) we get to the end of the bucket chain or
* 2) we find a valid tuple.
*/
do {
bucket = opaque->hasho_bucket;
switch (dir) {
case ForwardScanDirection:
if (offnum != InvalidOffsetNumber) {
offnum = OffsetNumberNext(offnum); /* move forward */
} else {
offnum = FirstOffsetNumber; /* new page */
}
while (offnum > maxoff) {
/*
* either this page is empty (maxoff ==
* InvalidOffsetNumber) or we ran off the end.
*/
_hash_readnext(rel, &buf, &page, &opaque);
if (BufferIsInvalid(buf)) { /* end of chain */
if (allbuckets && bucket < metap->hashm_maxbucket) {
++bucket;
blkno = BUCKET_TO_BLKNO(bucket);
buf = _hash_getbuf(rel, blkno, HASH_READ);
page = BufferGetPage(buf);
_hash_checkpage(page, LH_BUCKET_PAGE);
opaque = (HashPageOpaque) PageGetSpecialPointer(page);
Assert(opaque->hasho_bucket == bucket);
while (PageIsEmpty(page) &&
BlockNumberIsValid(opaque->hasho_nextblkno)) {
_hash_readnext(rel, &buf, &page, &opaque);
}
maxoff = PageGetMaxOffsetNumber(page);
offnum = FirstOffsetNumber;
} else {
maxoff = offnum = InvalidOffsetNumber;
break; /* while */
}
} else {
/* _hash_readnext never returns an empty page */
maxoff = PageGetMaxOffsetNumber(page);
offnum = FirstOffsetNumber;
}
}
break;
case BackwardScanDirection:
if (offnum != InvalidOffsetNumber) {
offnum = OffsetNumberPrev(offnum); /* move back */
} else {
offnum = maxoff; /* new page */
}
while (offnum < FirstOffsetNumber) {
/*
* either this page is empty (offnum ==
* InvalidOffsetNumber) or we ran off the end.
*/
_hash_readprev(rel, &buf, &page, &opaque);
if (BufferIsInvalid(buf)) { /* end of chain */
if (allbuckets && bucket > 0) {
--bucket;
blkno = BUCKET_TO_BLKNO(bucket);
buf = _hash_getbuf(rel, blkno, HASH_READ);
page = BufferGetPage(buf);
_hash_checkpage(page, LH_BUCKET_PAGE);
opaque = (HashPageOpaque) PageGetSpecialPointer(page);
Assert(opaque->hasho_bucket == bucket);
while (BlockNumberIsValid(opaque->hasho_nextblkno)) {
_hash_readnext(rel, &buf, &page, &opaque);
}
maxoff = offnum = PageGetMaxOffsetNumber(page);
} else {
maxoff = offnum = InvalidOffsetNumber;
break; /* while */
}
} else {
/* _hash_readprev never returns an empty page */
maxoff = offnum = PageGetMaxOffsetNumber(page);
}
}
break;
default:
/* NoMovementScanDirection */
/* this should not be reached */
break;
}
/* we ran off the end of the world without finding a match */
if (offnum == InvalidOffsetNumber) {
_hash_relbuf(rel, metabuf, HASH_READ);
*bufP = so->hashso_curbuf = InvalidBuffer;
ItemPointerSetInvalid(current);
return(false);
}
/* get ready to check this tuple */
hitem = (HashItem) PageGetItem(page, PageGetItemId(page, offnum));
itup = &hitem->hash_itup;
} while (!_hash_checkqual(scan, itup));
/* if we made it to here, we've found a valid tuple */
_hash_relbuf(rel, metabuf, HASH_READ);
blkno = BufferGetBlockNumber(buf);
*bufP = so->hashso_curbuf = buf;
ItemPointerSet(current, blkno, offnum);
return(true);
}

View File

@ -0,0 +1,104 @@
/*-------------------------------------------------------------------------
*
* btstrat.c--
* Srategy map entries for the btree indexed access method
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/hash/Attic/hashstrat.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "storage/bufpage.h"
#include "utils/elog.h"
#include "utils/rel.h"
#include "utils/excid.h"
#include "access/genam.h"
#include "access/hash.h"
/*
* only one valid strategy for hash tables: equality.
*/
static StrategyNumber HTNegate[1] = {
InvalidStrategy
};
static StrategyNumber HTCommute[1] = {
HTEqualStrategyNumber
};
static StrategyNumber HTNegateCommute[1] = {
InvalidStrategy
};
static StrategyEvaluationData HTEvaluationData = {
/* XXX static for simplicity */
HTMaxStrategyNumber,
(StrategyTransformMap)HTNegate,
(StrategyTransformMap)HTCommute,
(StrategyTransformMap)HTNegateCommute,
{NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL}
};
/* ----------------------------------------------------------------
* RelationGetHashStrategy
* ----------------------------------------------------------------
*/
StrategyNumber
_hash_getstrat(Relation rel,
AttrNumber attno,
RegProcedure proc)
{
StrategyNumber strat;
strat = RelationGetStrategy(rel, attno, &HTEvaluationData, proc);
Assert(StrategyNumberIsValid(strat));
return (strat);
}
bool
_hash_invokestrat(Relation rel,
AttrNumber attno,
StrategyNumber strat,
Datum left,
Datum right)
{
return (RelationInvokeStrategy(rel, &HTEvaluationData, attno, strat,
left, right));
}

View File

@ -0,0 +1,147 @@
/*-------------------------------------------------------------------------
*
* btutils.c--
* Utility code for Postgres btree implementation.
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashutil.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "storage/bufmgr.h"
#include "storage/bufpage.h"
#include "fmgr.h"
#include "utils/elog.h"
#include "utils/palloc.h"
#include "utils/rel.h"
#include "utils/excid.h"
#include "access/heapam.h"
#include "access/genam.h"
#include "access/iqual.h"
#include "access/hash.h"
ScanKey
_hash_mkscankey(Relation rel, IndexTuple itup, HashMetaPage metap)
{
ScanKey skey;
TupleDesc itupdesc;
int natts;
AttrNumber i;
Datum arg;
RegProcedure proc;
bool null;
natts = rel->rd_rel->relnatts;
itupdesc = RelationGetTupleDescriptor(rel);
skey = (ScanKey) palloc(natts * sizeof(ScanKeyData));
for (i = 0; i < natts; i++) {
arg = index_getattr(itup, i + 1, itupdesc, &null);
proc = metap->hashm_procid;
ScanKeyEntryInitialize(&skey[i],
0x0, (AttrNumber) (i + 1), proc, arg);
}
return (skey);
}
void
_hash_freeskey(ScanKey skey)
{
pfree(skey);
}
bool
_hash_checkqual(IndexScanDesc scan, IndexTuple itup)
{
if (scan->numberOfKeys > 0)
return (index_keytest(itup,
RelationGetTupleDescriptor(scan->relation),
scan->numberOfKeys, scan->keyData));
else
return (true);
}
HashItem
_hash_formitem(IndexTuple itup)
{
int nbytes_hitem;
HashItem hitem;
Size tuplen;
/* disallow nulls in hash keys */
if (itup->t_info & INDEX_NULL_MASK)
elog(WARN, "hash indices cannot include null keys");
/* make a copy of the index tuple with room for the sequence number */
tuplen = IndexTupleSize(itup);
nbytes_hitem = tuplen +
(sizeof(HashItemData) - sizeof(IndexTupleData));
hitem = (HashItem) palloc(nbytes_hitem);
memmove((char *) &(hitem->hash_itup), (char *) itup, tuplen);
return (hitem);
}
Bucket
_hash_call(Relation rel, HashMetaPage metap, Datum key)
{
uint32 n;
Bucket bucket;
RegProcedure proc;
proc = metap->hashm_procid;
n = (uint32) fmgr(proc, key);
bucket = n & metap->hashm_highmask;
if (bucket > metap->hashm_maxbucket)
bucket = bucket & metap->hashm_lowmask;
return (bucket);
}
/*
* _hash_log2 -- returns ceil(lg2(num))
*/
uint32
_hash_log2(uint32 num)
{
uint32 i, limit;
limit = 1;
for (i = 0; limit < num; limit = limit << 1, i++)
;
return (i);
}
/*
* _hash_checkpage -- sanity checks on the format of all hash pages
*/
void
_hash_checkpage(Page page, int flags)
{
PageHeader ph = (PageHeader) page;
HashPageOpaque opaque;
Assert(page);
Assert(ph->pd_lower >= (sizeof(PageHeaderData) - sizeof(ItemIdData)));
#if 1
Assert(ph->pd_upper <=
(BLCKSZ - DOUBLEALIGN(sizeof(HashPageOpaqueData))));
Assert(ph->pd_special ==
(BLCKSZ - DOUBLEALIGN(sizeof(HashPageOpaqueData))));
Assert(ph->pd_opaque.od_pagesize == BLCKSZ);
#endif
if (flags) {
opaque = (HashPageOpaque) PageGetSpecialPointer(page);
Assert(opaque->hasho_flag & flags);
}
}

View File

@ -0,0 +1,14 @@
#-------------------------------------------------------------------------
#
# Makefile.inc--
# Makefile for access/heap
#
# Copyright (c) 1994, Regents of the University of California
#
#
# IDENTIFICATION
# $Header: /cvsroot/pgsql/src/backend/access/heap/Attic/Makefile.inc,v 1.1.1.1 1996/07/09 06:21:11 scrappy Exp $
#
#-------------------------------------------------------------------------
SUBSRCS+= heapam.c hio.c stats.c

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,195 @@
/*-------------------------------------------------------------------------
*
* hio.c--
* POSTGRES heap access method input/output code.
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Id: hio.c,v 1.1.1.1 1996/07/09 06:21:11 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#include <string.h>
#include "c.h"
#include "access/heapam.h"
#include "access/hio.h"
#include "access/htup.h"
#include "storage/block.h"
#include "storage/buf.h"
#include "storage/bufmgr.h"
#include "storage/bufpage.h"
#include "storage/itemid.h"
#include "storage/itemptr.h"
#include "storage/off.h"
#include "utils/memutils.h"
#include "utils/elog.h"
#include "utils/rel.h"
/*
* amputunique - place tuple at tid
* Currently on errors, calls elog. Perhaps should return -1?
* Possible errors include the addition of a tuple to the page
* between the time the linep is chosen and the page is L_UP'd.
*
* This should be coordinated with the B-tree code.
* Probably needs to have an amdelunique to allow for
* internal index records to be deleted and reordered as needed.
* For the heap AM, this should never be needed.
*/
void
RelationPutHeapTuple(Relation relation,
BlockNumber blockIndex,
HeapTuple tuple)
{
Buffer buffer;
Page pageHeader;
BlockNumber numberOfBlocks;
OffsetNumber offnum;
unsigned int len;
ItemId itemId;
Item item;
/* ----------------
* increment access statistics
* ----------------
*/
IncrHeapAccessStat(local_RelationPutHeapTuple);
IncrHeapAccessStat(global_RelationPutHeapTuple);
Assert(RelationIsValid(relation));
Assert(HeapTupleIsValid(tuple));
numberOfBlocks = RelationGetNumberOfBlocks(relation);
Assert(blockIndex < numberOfBlocks);
buffer = ReadBuffer(relation, blockIndex);
#ifndef NO_BUFFERISVALID
if (!BufferIsValid(buffer)) {
elog(WARN, "RelationPutHeapTuple: no buffer for %ld in %s",
blockIndex, &relation->rd_rel->relname);
}
#endif
pageHeader = (Page)BufferGetPage(buffer);
len = (unsigned)DOUBLEALIGN(tuple->t_len); /* be conservative */
Assert((int)len <= PageGetFreeSpace(pageHeader));
offnum = PageAddItem((Page)pageHeader, (Item)tuple,
tuple->t_len, InvalidOffsetNumber, LP_USED);
itemId = PageGetItemId((Page)pageHeader, offnum);
item = PageGetItem((Page)pageHeader, itemId);
ItemPointerSet(&((HeapTuple)item)->t_ctid, blockIndex, offnum);
WriteBuffer(buffer);
/* return an accurate tuple */
ItemPointerSet(&tuple->t_ctid, blockIndex, offnum);
}
/*
* The heap_insert routines "know" that a buffer page is initialized to
* zero when a BlockExtend operation is performed.
*/
#define PageIsNew(page) ((page)->pd_upper == 0)
/*
* This routine is another in the series of attempts to reduce the number
* of I/O's and system calls executed in the various benchmarks. In
* particular, this routine is used to append data to the end of a relation
* file without excessive lseeks. This code should do no more than 2 semops
* in the ideal case.
*
* Eventually, we should cache the number of blocks in a relation somewhere.
* Until that time, this code will have to do an lseek to determine the number
* of blocks in a relation.
*
* This code should ideally do at most 4 semops, 1 lseek, and possibly 1 write
* to do an append; it's possible to eliminate 2 of the semops if we do direct
* buffer stuff (!); the lseek and the write can go if we get
* RelationGetNumberOfBlocks to be useful.
*
* NOTE: This code presumes that we have a write lock on the relation.
*
* Also note that this routine probably shouldn't have to exist, and does
* screw up the call graph rather badly, but we are wasting so much time and
* system resources being massively general that we are losing badly in our
* performance benchmarks.
*/
void
RelationPutHeapTupleAtEnd(Relation relation, HeapTuple tuple)
{
Buffer buffer;
Page pageHeader;
BlockNumber lastblock;
OffsetNumber offnum;
unsigned int len;
ItemId itemId;
Item item;
Assert(RelationIsValid(relation));
Assert(HeapTupleIsValid(tuple));
/*
* XXX This does an lseek - VERY expensive - but at the moment it
* is the only way to accurately determine how many blocks are in
* a relation. A good optimization would be to get this to actually
* work properly.
*/
lastblock = RelationGetNumberOfBlocks(relation);
if (lastblock == 0)
{
buffer = ReadBuffer(relation, lastblock);
pageHeader = (Page)BufferGetPage(buffer);
if (PageIsNew((PageHeader) pageHeader))
{
buffer = ReleaseAndReadBuffer(buffer, relation, P_NEW);
pageHeader = (Page)BufferGetPage(buffer);
PageInit(pageHeader, BufferGetPageSize(buffer), 0);
}
}
else
buffer = ReadBuffer(relation, lastblock - 1);
pageHeader = (Page)BufferGetPage(buffer);
len = (unsigned)DOUBLEALIGN(tuple->t_len); /* be conservative */
/*
* Note that this is true if the above returned a bogus page, which
* it will do for a completely empty relation.
*/
if (len > PageGetFreeSpace(pageHeader))
{
buffer = ReleaseAndReadBuffer(buffer, relation, P_NEW);
pageHeader = (Page)BufferGetPage(buffer);
PageInit(pageHeader, BufferGetPageSize(buffer), 0);
if (len > PageGetFreeSpace(pageHeader))
elog(WARN, "Tuple is too big: size %d", len);
}
offnum = PageAddItem((Page)pageHeader, (Item)tuple,
tuple->t_len, InvalidOffsetNumber, LP_USED);
itemId = PageGetItemId((Page)pageHeader, offnum);
item = PageGetItem((Page)pageHeader, itemId);
lastblock = BufferGetBlockNumber(buffer);
ItemPointerSet(&((HeapTuple)item)->t_ctid, lastblock, offnum);
/* return an accurate tuple */
ItemPointerSet(&tuple->t_ctid, lastblock, offnum);
WriteBuffer(buffer);
}

View File

@ -0,0 +1,329 @@
/*-------------------------------------------------------------------------
*
* stats.c--
* heap access method debugging statistic collection routines
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/heap/Attic/stats.c,v 1.1.1.1 1996/07/09 06:21:11 scrappy Exp $
*
* NOTES
* initam should be moved someplace else.
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/heapam.h"
#include "utils/memutils.h"
#include "utils/palloc.h"
#include "utils/elog.h"
#include "utils/mcxt.h"
/* ----------------
* InitHeapAccessStatistics
* ----------------
*/
HeapAccessStatistics heap_access_stats = (HeapAccessStatistics) NULL;
void
InitHeapAccessStatistics()
{
MemoryContext oldContext;
HeapAccessStatistics stats;
/* ----------------
* make sure we don't initialize things twice
* ----------------
*/
if (heap_access_stats != NULL)
return;
/* ----------------
* allocate statistics structure from the top memory context
* ----------------
*/
oldContext = MemoryContextSwitchTo(TopMemoryContext);
stats = (HeapAccessStatistics)
palloc(sizeof(HeapAccessStatisticsData));
/* ----------------
* initialize fields to default values
* ----------------
*/
stats->global_open = 0;
stats->global_openr = 0;
stats->global_close = 0;
stats->global_beginscan = 0;
stats->global_rescan = 0;
stats->global_endscan = 0;
stats->global_getnext = 0;
stats->global_fetch = 0;
stats->global_insert = 0;
stats->global_delete = 0;
stats->global_replace = 0;
stats->global_markpos = 0;
stats->global_restrpos = 0;
stats->global_BufferGetRelation = 0;
stats->global_RelationIdGetRelation = 0;
stats->global_RelationIdGetRelation_Buf = 0;
stats->global_getreldesc = 0;
stats->global_heapgettup = 0;
stats->global_RelationPutHeapTuple = 0;
stats->global_RelationPutLongHeapTuple = 0;
stats->local_open = 0;
stats->local_openr = 0;
stats->local_close = 0;
stats->local_beginscan = 0;
stats->local_rescan = 0;
stats->local_endscan = 0;
stats->local_getnext = 0;
stats->local_fetch = 0;
stats->local_insert = 0;
stats->local_delete = 0;
stats->local_replace = 0;
stats->local_markpos = 0;
stats->local_restrpos = 0;
stats->local_BufferGetRelation = 0;
stats->local_RelationIdGetRelation = 0;
stats->local_RelationIdGetRelation_Buf = 0;
stats->local_getreldesc = 0;
stats->local_heapgettup = 0;
stats->local_RelationPutHeapTuple = 0;
stats->local_RelationPutLongHeapTuple = 0;
stats->local_RelationNameGetRelation = 0;
stats->global_RelationNameGetRelation = 0;
/* ----------------
* record init times
* ----------------
*/
time(&stats->init_global_timestamp);
time(&stats->local_reset_timestamp);
time(&stats->last_request_timestamp);
/* ----------------
* return to old memory context
* ----------------
*/
(void) MemoryContextSwitchTo(oldContext);
heap_access_stats = stats;
}
/* ----------------
* ResetHeapAccessStatistics
* ----------------
*/
void
ResetHeapAccessStatistics()
{
HeapAccessStatistics stats;
/* ----------------
* do nothing if stats aren't initialized
* ----------------
*/
if (heap_access_stats == NULL)
return;
stats = heap_access_stats;
/* ----------------
* reset local counts
* ----------------
*/
stats->local_open = 0;
stats->local_openr = 0;
stats->local_close = 0;
stats->local_beginscan = 0;
stats->local_rescan = 0;
stats->local_endscan = 0;
stats->local_getnext = 0;
stats->local_fetch = 0;
stats->local_insert = 0;
stats->local_delete = 0;
stats->local_replace = 0;
stats->local_markpos = 0;
stats->local_restrpos = 0;
stats->local_BufferGetRelation = 0;
stats->local_RelationIdGetRelation = 0;
stats->local_RelationIdGetRelation_Buf = 0;
stats->local_getreldesc = 0;
stats->local_heapgettup = 0;
stats->local_RelationPutHeapTuple = 0;
stats->local_RelationPutLongHeapTuple = 0;
/* ----------------
* reset local timestamps
* ----------------
*/
time(&stats->local_reset_timestamp);
time(&stats->last_request_timestamp);
}
/* ----------------
* GetHeapAccessStatistics
* ----------------
*/
HeapAccessStatistics GetHeapAccessStatistics()
{
HeapAccessStatistics stats;
/* ----------------
* return nothing if stats aren't initialized
* ----------------
*/
if (heap_access_stats == NULL)
return NULL;
/* ----------------
* record the current request time
* ----------------
*/
time(&heap_access_stats->last_request_timestamp);
/* ----------------
* allocate a copy of the stats and return it to the caller.
* ----------------
*/
stats = (HeapAccessStatistics)
palloc(sizeof(HeapAccessStatisticsData));
memmove(stats,
heap_access_stats,
sizeof(HeapAccessStatisticsData));
return stats;
}
/* ----------------
* PrintHeapAccessStatistics
* ----------------
*/
void
PrintHeapAccessStatistics(HeapAccessStatistics stats)
{
/* ----------------
* return nothing if stats aren't valid
* ----------------
*/
if (stats == NULL)
return;
printf("======== heap am statistics ========\n");
printf("init_global_timestamp: %s",
ctime(&(stats->init_global_timestamp)));
printf("local_reset_timestamp: %s",
ctime(&(stats->local_reset_timestamp)));
printf("last_request_timestamp: %s",
ctime(&(stats->last_request_timestamp)));
printf("local/global_open: %6d/%6d\n",
stats->local_open, stats->global_open);
printf("local/global_openr: %6d/%6d\n",
stats->local_openr, stats->global_openr);
printf("local/global_close: %6d/%6d\n",
stats->local_close, stats->global_close);
printf("local/global_beginscan: %6d/%6d\n",
stats->local_beginscan, stats->global_beginscan);
printf("local/global_rescan: %6d/%6d\n",
stats->local_rescan, stats->global_rescan);
printf("local/global_endscan: %6d/%6d\n",
stats->local_endscan, stats->global_endscan);
printf("local/global_getnext: %6d/%6d\n",
stats->local_getnext, stats->global_getnext);
printf("local/global_fetch: %6d/%6d\n",
stats->local_fetch, stats->global_fetch);
printf("local/global_insert: %6d/%6d\n",
stats->local_insert, stats->global_insert);
printf("local/global_delete: %6d/%6d\n",
stats->local_delete, stats->global_delete);
printf("local/global_replace: %6d/%6d\n",
stats->local_replace, stats->global_replace);
printf("local/global_markpos: %6d/%6d\n",
stats->local_markpos, stats->global_markpos);
printf("local/global_restrpos: %6d/%6d\n",
stats->local_restrpos, stats->global_restrpos);
printf("================\n");
printf("local/global_BufferGetRelation: %6d/%6d\n",
stats->local_BufferGetRelation,
stats->global_BufferGetRelation);
printf("local/global_RelationIdGetRelation: %6d/%6d\n",
stats->local_RelationIdGetRelation,
stats->global_RelationIdGetRelation);
printf("local/global_RelationIdGetRelation_Buf: %6d/%6d\n",
stats->local_RelationIdGetRelation_Buf,
stats->global_RelationIdGetRelation_Buf);
printf("local/global_getreldesc: %6d/%6d\n",
stats->local_getreldesc, stats->global_getreldesc);
printf("local/global_heapgettup: %6d/%6d\n",
stats->local_heapgettup, stats->global_heapgettup);
printf("local/global_RelationPutHeapTuple: %6d/%6d\n",
stats->local_RelationPutHeapTuple,
stats->global_RelationPutHeapTuple);
printf("local/global_RelationPutLongHeapTuple: %6d/%6d\n",
stats->local_RelationPutLongHeapTuple,
stats->global_RelationPutLongHeapTuple);
printf("===================================\n");
printf("\n");
}
/* ----------------
* PrintAndFreeHeapAccessStatistics
* ----------------
*/
void
PrintAndFreeHeapAccessStatistics(HeapAccessStatistics stats)
{
PrintHeapAccessStatistics(stats);
if (stats != NULL)
pfree(stats);
}
/* ----------------------------------------------------------------
* access method initialization
* ----------------------------------------------------------------
*/
/* ----------------
* initam should someday be moved someplace else.
* ----------------
*/
void
initam()
{
/* ----------------
* initialize heap statistics.
* ----------------
*/
InitHeapAccessStatistics();
}

149
src/backend/access/heapam.h Normal file
View File

@ -0,0 +1,149 @@
/*-------------------------------------------------------------------------
*
* heapam.h--
* POSTGRES heap access method definitions.
*
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: heapam.h,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef HEAPAM_H
#define HEAPAM_H
#include <sys/types.h>
#include "postgres.h"
#include "access/attnum.h"
#include "access/htup.h"
#include "access/relscan.h"
#include "access/skey.h"
#include "utils/tqual.h"
#include "access/tupdesc.h"
#include "storage/smgr.h"
#include "utils/rel.h"
/* ----------------------------------------------------------------
* heap access method statistics
* ----------------------------------------------------------------
*/
typedef struct HeapAccessStatisticsData {
time_t init_global_timestamp; /* time global statistics started */
time_t local_reset_timestamp; /* last time local reset was done */
time_t last_request_timestamp; /* last time stats were requested */
int global_open;
int global_openr;
int global_close;
int global_beginscan;
int global_rescan;
int global_endscan;
int global_getnext;
int global_fetch;
int global_insert;
int global_delete;
int global_replace;
int global_markpos;
int global_restrpos;
int global_BufferGetRelation;
int global_RelationIdGetRelation;
int global_RelationIdGetRelation_Buf;
int global_RelationNameGetRelation;
int global_getreldesc;
int global_heapgettup;
int global_RelationPutHeapTuple;
int global_RelationPutLongHeapTuple;
int local_open;
int local_openr;
int local_close;
int local_beginscan;
int local_rescan;
int local_endscan;
int local_getnext;
int local_fetch;
int local_insert;
int local_delete;
int local_replace;
int local_markpos;
int local_restrpos;
int local_BufferGetRelation;
int local_RelationIdGetRelation;
int local_RelationIdGetRelation_Buf;
int local_RelationNameGetRelation;
int local_getreldesc;
int local_heapgettup;
int local_RelationPutHeapTuple;
int local_RelationPutLongHeapTuple;
} HeapAccessStatisticsData;
typedef HeapAccessStatisticsData *HeapAccessStatistics;
#define IncrHeapAccessStat(x) \
(heap_access_stats == NULL ? 0 : (heap_access_stats->x)++)
extern HeapAccessStatistics heap_access_stats; /* in stats.c */
/* ----------------
* function prototypes for heap access method
* ----------------
*/
/* heap_create, heap_creatr, and heap_destroy are declared in catalog/heap.h */
#include "catalog/heap.h"
/* heapam.c */
extern void doinsert(Relation relation, HeapTuple tup);
extern void SetHeapAccessMethodImmediateInvalidation(bool on);
extern Relation heap_open(Oid relationId);
extern Relation heap_openr(char *relationName);
extern void heap_close(Relation relation);
extern HeapScanDesc heap_beginscan(Relation relation, int atend,
TimeQual timeQual, unsigned nkeys, ScanKey key);
extern void heap_rescan(HeapScanDesc sdesc, bool scanFromEnd, ScanKey key);
extern void heap_endscan(HeapScanDesc sdesc);
extern HeapTuple heap_getnext(HeapScanDesc scandesc, int backw, Buffer *b);
extern HeapTuple heap_fetch(Relation relation, TimeQual timeQual,
ItemPointer tid, Buffer *b);
extern Oid heap_insert(Relation relation, HeapTuple tup);
extern void heap_delete(Relation relation, ItemPointer tid);
extern int heap_replace(Relation relation, ItemPointer otid,
HeapTuple tup);
extern void heap_markpos(HeapScanDesc sdesc);
extern void heap_restrpos(HeapScanDesc sdesc);
/* in common/heaptuple.c */
extern Size ComputeDataSize(TupleDesc tupleDesc, Datum value[], char nulls[]);
extern void DataFill(char *data, TupleDesc tupleDesc,
Datum value[], char nulls[], char *infomask,
bits8 bit[]);
extern int heap_attisnull(HeapTuple tup, int attnum);
extern int heap_sysattrlen(AttrNumber attno);
extern bool heap_sysattrbyval(AttrNumber attno);
extern char *heap_getsysattr(HeapTuple tup, Buffer b, int attnum);
extern char *fastgetattr(HeapTuple tup, unsigned attnum,
TupleDesc att, bool *isnull);
extern char *heap_getattr(HeapTuple tup, Buffer b, int attnum,
TupleDesc att, bool *isnull);
extern HeapTuple heap_copytuple(HeapTuple tuple);
extern void heap_deformtuple(HeapTuple tuple, TupleDesc tdesc,
Datum values[], char nulls[]);
extern HeapTuple heap_formtuple(TupleDesc tupleDescriptor,
Datum value[], char nulls[]);
extern HeapTuple heap_modifytuple(HeapTuple tuple, Buffer buffer,
Relation relation, Datum replValue[], char replNull[], char repl[]);
HeapTuple heap_addheader(uint32 natts, int structlen, char *structure);
/* in common/heap/stats.c */
extern void InitHeapAccessStatistics(void);
extern void ResetHeapAccessStatistics(void);
extern HeapAccessStatistics GetHeapAccessStatistics(void);
extern void PrintHeapAccessStatistics(HeapAccessStatistics stats);
extern void PrintAndFreeHeapAccessStatistics(HeapAccessStatistics stats);
extern void initam(void);
#endif /* HEAPAM_H */

26
src/backend/access/hio.h Normal file
View File

@ -0,0 +1,26 @@
/*-------------------------------------------------------------------------
*
* hio.h--
* POSTGRES heap access method input/output definitions.
*
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: hio.h,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef HIO_H
#define HIO_H
#include "c.h"
#include "storage/block.h"
#include "access/htup.h"
#include "utils/rel.h"
extern void RelationPutHeapTuple(Relation relation, BlockNumber blockIndex,
HeapTuple tuple);
extern void RelationPutHeapTupleAtEnd(Relation relation, HeapTuple tuple);
#endif /* HIO_H */

115
src/backend/access/htup.h Normal file
View File

@ -0,0 +1,115 @@
/*-------------------------------------------------------------------------
*
* htup.h--
* POSTGRES heap tuple definitions.
*
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: htup.h,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef HTUP_H
#define HTUP_H
#include "access/attnum.h"
#include "storage/bufpage.h" /* just to reduce levels of #include */
#include "storage/itemptr.h"
#include "utils/nabstime.h"
#define MinHeapTupleBitmapSize 32 /* 8 * 4 */
/* check these, they are likely to be more severely limited by t_hoff */
#define MaxHeapAttributeNumber 1600 /* 8 * 200 */
/*
* to avoid wasting space, the attributes should be layed out in such a
* way to reduce structure padding.
*/
typedef struct HeapTupleData {
unsigned int t_len; /* length of entire tuple */
ItemPointerData t_ctid; /* current TID of this tuple */
ItemPointerData t_chain; /* replaced tuple TID */
Oid t_oid; /* OID of this tuple -- 4 bytes */
CommandId t_cmin; /* insert CID stamp -- 2 bytes each */
CommandId t_cmax; /* delete CommandId stamp */
TransactionId t_xmin; /* insert XID stamp -- 4 bytes each */
TransactionId t_xmax; /* delete XID stamp */
AbsoluteTime t_tmin; /* time stamps -- 4 bytes each */
AbsoluteTime t_tmax;
int16 t_natts; /* number of attributes */
char t_vtype; /* not used - padding */
char t_infomask; /* whether tuple as null or variable
* length attributes
*/
uint8 t_hoff; /* sizeof tuple header */
bits8 t_bits[MinHeapTupleBitmapSize / 8];
/* bit map of domains */
/* MORE DATA FOLLOWS AT END OF STRUCT */
} HeapTupleData;
typedef HeapTupleData *HeapTuple;
#define SelfItemPointerAttributeNumber (-1)
#define ObjectIdAttributeNumber (-2)
#define MinTransactionIdAttributeNumber (-3)
#define MinCommandIdAttributeNumber (-4)
#define MaxTransactionIdAttributeNumber (-5)
#define MaxCommandIdAttributeNumber (-6)
#define ChainItemPointerAttributeNumber (-7)
#define AnchorItemPointerAttributeNumber (-8)
#define MinAbsoluteTimeAttributeNumber (-9)
#define MaxAbsoluteTimeAttributeNumber (-10)
#define VersionTypeAttributeNumber (-11)
#define FirstLowInvalidHeapAttributeNumber (-12)
/* ----------------
* support macros
* ----------------
*/
#define GETSTRUCT(TUP) (((char *)(TUP)) + ((HeapTuple)(TUP))->t_hoff)
/*
* BITMAPLEN(NATTS) -
* Computes minimum size of bitmap given number of domains.
*/
#define BITMAPLEN(NATTS) \
((((((int)(NATTS) - 1) >> 3) + 4 - (MinHeapTupleBitmapSize >> 3)) \
& ~03) + (MinHeapTupleBitmapSize >> 3))
/*
* HeapTupleIsValid
* True iff the heap tuple is valid.
*/
#define HeapTupleIsValid(tuple) PointerIsValid(tuple)
/*
* information stored in t_infomask:
*/
#define HEAP_HASNULL 0x01 /* has null attribute(s) */
#define HEAP_HASVARLENA 0x02 /* has variable length attribute(s) */
#define HeapTupleNoNulls(tuple) \
(!(((HeapTuple) (tuple))->t_infomask & HEAP_HASNULL))
#define HeapTupleAllFixed(tuple) \
(!(((HeapTuple) (tuple))->t_infomask & HEAP_HASVARLENA))
#endif /* HTUP_H */

34
src/backend/access/ibit.h Normal file
View File

@ -0,0 +1,34 @@
/*-------------------------------------------------------------------------
*
* ibit.h--
* POSTGRES index valid attribute bit map definitions.
*
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: ibit.h,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef IBIT_H
#define IBIT_H
#include "c.h"
#include "utils/memutils.h"
typedef struct IndexAttributeBitMapData {
char bits[(MaxIndexAttributeNumber + MaxBitsPerByte - 1)
/ MaxBitsPerByte];
} IndexAttributeBitMapData;
typedef IndexAttributeBitMapData *IndexAttributeBitMap;
#define IndexAttributeBitMapSize sizeof(IndexAttributeBitMapData)
/*
* IndexAttributeBitMapIsValid --
* True iff attribute bit map is valid.
*/
#define IndexAttributeBitMapIsValid(bits) PointerIsValid(bits)
#endif /* IBIT_H */

View File

@ -0,0 +1,14 @@
#-------------------------------------------------------------------------
#
# Makefile.inc--
# Makefile for access/index
#
# Copyright (c) 1994, Regents of the University of California
#
#
# IDENTIFICATION
# $Header: /cvsroot/pgsql/src/backend/access/index/Attic/Makefile.inc,v 1.1.1.1 1996/07/09 06:21:11 scrappy Exp $
#
#-------------------------------------------------------------------------
SUBSRCS+= genam.c indexam.c istrat.c

View File

@ -0,0 +1,275 @@
/*-------------------------------------------------------------------------
*
* genam.c--
* general index access method routines
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/index/genam.c,v 1.1.1.1 1996/07/09 06:21:11 scrappy Exp $
*
* NOTES
* many of the old access method routines have been turned into
* macros and moved to genam.h -cim 4/30/91
*
*-------------------------------------------------------------------------
*/
/*
* OLD COMMENTS
* Scans are implemented as follows:
*
* `0' represents an invalid item pointer.
* `-' represents an unknown item pointer.
* `X' represents a known item pointers.
* `+' represents known or invalid item pointers.
* `*' represents any item pointers.
*
* State is represented by a triple of these symbols in the order of
* previous, current, next. Note that the case of reverse scans works
* identically.
*
* State Result
* (1) + + - + 0 0 (if the next item pointer is invalid)
* (2) + X - (otherwise)
* (3) * 0 0 * 0 0 (no change)
* (4) + X 0 X 0 0 (shift)
* (5) * + X + X - (shift, add unknown)
*
* All other states cannot occur.
*
* Note:
*It would be possible to cache the status of the previous and
* next item pointer using the flags.
* ----------------------------------------------------------------
*/
#include "postgres.h"
#include "access/attnum.h"
#include "access/genam.h"
#include "access/heapam.h"
#include "access/itup.h"
#include "access/relscan.h"
#include "access/sdir.h"
#include "access/skey.h"
#include "storage/bufmgr.h"
#include "utils/elog.h"
#include "utils/palloc.h"
#include "utils/rel.h"
#include "catalog/catname.h"
#include "catalog/pg_attribute.h"
#include "catalog/pg_index.h"
#include "catalog/pg_proc.h"
#include "catalog/index.h"
/* ----------------------------------------------------------------
* general access method routines
*
* All indexed access methods use an identical scan structure.
* We don't know how the various AMs do locking, however, so we don't
* do anything about that here.
*
* The intent is that an AM implementor will define a front-end routine
* that calls this one, to fill in the scan, and then does whatever kind
* of locking he wants.
* ----------------------------------------------------------------
*/
/* ----------------
* RelationGetIndexScan -- Create and fill an IndexScanDesc.
*
* This routine creates an index scan structure and sets its contents
* up correctly. This routine calls AMrescan to set up the scan with
* the passed key.
*
* Parameters:
* relation -- index relation for scan.
* scanFromEnd -- if true, begin scan at one of the index's
* endpoints.
* numberOfKeys -- count of scan keys (more than one won't
* necessarily do anything useful, yet).
* key -- the ScanKey for the starting position of the scan.
*
* Returns:
* An initialized IndexScanDesc.
*
* Side Effects:
* Bumps the ref count on the relation to keep it in the cache.
*
* ----------------
*/
IndexScanDesc
RelationGetIndexScan(Relation relation,
bool scanFromEnd,
uint16 numberOfKeys,
ScanKey key)
{
IndexScanDesc scan;
if (! RelationIsValid(relation))
elog(WARN, "RelationGetIndexScan: relation invalid");
scan = (IndexScanDesc) palloc(sizeof(IndexScanDescData));
scan->relation = relation;
scan->opaque = NULL;
scan->numberOfKeys = numberOfKeys;
ItemPointerSetInvalid(&scan->previousItemData);
ItemPointerSetInvalid(&scan->currentItemData);
ItemPointerSetInvalid(&scan->nextItemData);
ItemPointerSetInvalid(&scan->previousMarkData);
ItemPointerSetInvalid(&scan->currentMarkData);
ItemPointerSetInvalid(&scan->nextMarkData);
if (numberOfKeys > 0) {
scan->keyData = (ScanKey) palloc(sizeof(ScanKeyData) * numberOfKeys);
} else {
scan->keyData = NULL;
}
index_rescan(scan, scanFromEnd, key);
return (scan);
}
/* ----------------
* IndexScanRestart -- Restart an index scan.
*
* This routine isn't used by any existing access method. It's
* appropriate if relation level locks are what you want.
*
* Returns:
* None.
*
* Side Effects:
* None.
* ----------------
*/
void
IndexScanRestart(IndexScanDesc scan,
bool scanFromEnd,
ScanKey key)
{
if (! IndexScanIsValid(scan))
elog(WARN, "IndexScanRestart: invalid scan");
ItemPointerSetInvalid(&scan->previousItemData);
ItemPointerSetInvalid(&scan->currentItemData);
ItemPointerSetInvalid(&scan->nextItemData);
if (RelationGetNumberOfBlocks(scan->relation) == 0)
scan->flags = ScanUnmarked;
else if (scanFromEnd)
scan->flags = ScanUnmarked | ScanUncheckedPrevious;
else
scan->flags = ScanUnmarked | ScanUncheckedNext;
scan->scanFromEnd = (bool) scanFromEnd;
if (scan->numberOfKeys > 0)
memmove(scan->keyData,
key,
scan->numberOfKeys * sizeof(ScanKeyData));
}
/* ----------------
* IndexScanEnd -- End and index scan.
*
* This routine is not used by any existing access method, but is
* suitable for use if you don't want to do sophisticated locking.
*
* Returns:
* None.
*
* Side Effects:
* None.
* ----------------
*/
void
IndexScanEnd(IndexScanDesc scan)
{
if (! IndexScanIsValid(scan))
elog(WARN, "IndexScanEnd: invalid scan");
pfree(scan);
}
/* ----------------
* IndexScanMarkPosition -- Mark current position in a scan.
*
* This routine isn't used by any existing access method, but is the
* one that AM implementors should use, if they don't want to do any
* special locking. If relation-level locking is sufficient, this is
* the routine for you.
*
* Returns:
* None.
*
* Side Effects:
* None.
* ----------------
*/
void
IndexScanMarkPosition(IndexScanDesc scan)
{
RetrieveIndexResult result;
if (scan->flags & ScanUncheckedPrevious) {
result =
index_getnext(scan, BackwardScanDirection);
if (result != NULL) {
scan->previousItemData = result->index_iptr;
} else {
ItemPointerSetInvalid(&scan->previousItemData);
}
} else if (scan->flags & ScanUncheckedNext) {
result = (RetrieveIndexResult)
index_getnext(scan, ForwardScanDirection);
if (result != NULL) {
scan->nextItemData = result->index_iptr;
} else {
ItemPointerSetInvalid(&scan->nextItemData);
}
}
scan->previousMarkData = scan->previousItemData;
scan->currentMarkData = scan->currentItemData;
scan->nextMarkData = scan->nextItemData;
scan->flags = 0x0; /* XXX should have a symbolic name */
}
/* ----------------
* IndexScanRestorePosition -- Restore position on a marked scan.
*
* This routine isn't used by any existing access method, but is the
* one that AM implementors should use if they don't want to do any
* special locking. If relation-level locking is sufficient, then
* this is the one you want.
*
* Returns:
* None.
*
* Side Effects:
* None.
* ----------------
*/
void
IndexScanRestorePosition(IndexScanDesc scan)
{
if (scan->flags & ScanUnmarked)
elog(WARN, "IndexScanRestorePosition: no mark to restore");
scan->previousItemData = scan->previousMarkData;
scan->currentItemData = scan->currentMarkData;
scan->nextItemData = scan->nextMarkData;
scan->flags = 0x0; /* XXX should have a symbolic name */
}

View File

@ -0,0 +1,411 @@
/*-------------------------------------------------------------------------
*
* indexam.c--
* general index access method routines
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/index/indexam.c,v 1.1.1.1 1996/07/09 06:21:11 scrappy Exp $
*
* INTERFACE ROUTINES
* index_open - open an index relation by relationId
* index_openr - open a index relation by name
* index_close - close a index relation
* index_beginscan - start a scan of an index
* index_rescan - restart a scan of an index
* index_endscan - end a scan
* index_insert - insert an index tuple into a relation
* index_delete - delete an item from an index relation
* index_markpos - mark a scan position
* index_restrpos - restore a scan position
* index_getnext - get the next tuple from a scan
* ** index_fetch - retrieve tuple with tid
* ** index_replace - replace a tuple
* ** index_getattr - get an attribute from an index tuple
* index_getprocid - get a support procedure id from the rel tuple
*
* IndexScanIsValid - check index scan
*
* NOTES
* This file contains the index_ routines which used
* to be a scattered collection of stuff in access/genam.
*
* The ** routines: index_fetch, index_replace, and index_getattr
* have not yet been implemented. They may not be needed.
*
* old comments
* Scans are implemented as follows:
*
* `0' represents an invalid item pointer.
* `-' represents an unknown item pointer.
* `X' represents a known item pointers.
* `+' represents known or invalid item pointers.
* `*' represents any item pointers.
*
* State is represented by a triple of these symbols in the order of
* previous, current, next. Note that the case of reverse scans works
* identically.
*
* State Result
* (1) + + - + 0 0 (if the next item pointer is invalid)
* (2) + X - (otherwise)
* (3) * 0 0 * 0 0 (no change)
* (4) + X 0 X 0 0 (shift)
* (5) * + X + X - (shift, add unknown)
*
* All other states cannot occur.
*
* Note: It would be possible to cache the status of the previous and
* next item pointer using the flags.
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/attnum.h"
#include "access/genam.h"
#include "access/heapam.h"
#include "access/itup.h"
#include "access/relscan.h"
#include "access/sdir.h"
#include "access/skey.h"
#include "access/funcindex.h"
#include "storage/lmgr.h"
#include "utils/elog.h"
#include "utils/palloc.h"
#include "utils/rel.h"
#include "utils/relcache.h"
#include "catalog/catname.h"
#include "catalog/pg_attribute.h"
#include "catalog/pg_index.h"
#include "catalog/pg_proc.h"
#include "catalog/index.h"
#include "fmgr.h"
/* ----------------
* undefine macros we aren't going to use that would otherwise
* get in our way.. delete is defined in c.h and the am's are
* defined in heapam.h
* ----------------
*/
#undef delete
#undef aminsert
#undef amdelete
#undef ambeginscan
#undef amrescan
#undef amendscan
#undef ammarkpos
#undef amrestrpos
#undef amgettuple
/* ----------------------------------------------------------------
* macros used in index_ routines
* ----------------------------------------------------------------
*/
#define RELATION_CHECKS \
Assert(RelationIsValid(relation)); \
Assert(PointerIsValid(relation->rd_am))
#define SCAN_CHECKS \
Assert(IndexScanIsValid(scan)); \
Assert(RelationIsValid(scan->relation)); \
Assert(PointerIsValid(scan->relation->rd_am))
#define GET_REL_PROCEDURE(x,y) \
CppConcat(procedure = relation->rd_am->,y); \
if (! RegProcedureIsValid(procedure)) \
elog(WARN, "index_%s: invalid %s regproc", \
CppAsString(x), CppAsString(y))
#define GET_SCAN_PROCEDURE(x,y) \
CppConcat(procedure = scan->relation->rd_am->,y); \
if (! RegProcedureIsValid(procedure)) \
elog(WARN, "index_%s: invalid %s regproc", \
CppAsString(x), CppAsString(y))
/* ----------------------------------------------------------------
* index_ interface functions
* ----------------------------------------------------------------
*/
/* ----------------
* index_open - open an index relation by relationId
*
* presently the relcache routines do all the work we need
* to open/close index relations.
* ----------------
*/
Relation
index_open(Oid relationId)
{
return RelationIdGetRelation(relationId);
}
/* ----------------
* index_openr - open a index relation by name
*
* presently the relcache routines do all the work we need
* to open/close index relations.
* ----------------
*/
Relation
index_openr(char *relationName)
{
return RelationNameGetRelation(relationName);
}
/* ----------------
* index_close - close a index relation
*
* presently the relcache routines do all the work we need
* to open/close index relations.
* ----------------
*/
void
index_close(Relation relation)
{
(void) RelationClose(relation);
}
/* ----------------
* index_insert - insert an index tuple into a relation
* ----------------
*/
InsertIndexResult
index_insert(Relation relation,
IndexTuple indexTuple)
{
RegProcedure procedure;
InsertIndexResult specificResult;
RELATION_CHECKS;
GET_REL_PROCEDURE(insert,aminsert);
/* ----------------
* have the am's insert proc do all the work.
* ----------------
*/
specificResult = (InsertIndexResult)
fmgr(procedure, relation, indexTuple, NULL);
/* ----------------
* the insert proc is supposed to return a "specific result" and
* this routine has to return a "general result" so after we get
* something back from the insert proc, we allocate a
* "general result" and copy some crap between the two.
*
* As far as I'm concerned all this result shit is needlessly c
* omplicated and should be eliminated. -cim 1/19/91
*
* mao concurs. regardless of how we feel here, however, it is
* important to free memory we don't intend to return to anyone.
* 2/28/91
*
* this "general result" crap is now gone. -ay 3/6/95
* ----------------
*/
return (specificResult);
}
/* ----------------
* index_delete - delete an item from an index relation
* ----------------
*/
void
index_delete(Relation relation, ItemPointer indexItem)
{
RegProcedure procedure;
RELATION_CHECKS;
GET_REL_PROCEDURE(delete,amdelete);
(void) fmgr(procedure, relation, indexItem);
}
/* ----------------
* index_beginscan - start a scan of an index
* ----------------
*/
IndexScanDesc
index_beginscan(Relation relation,
bool scanFromEnd,
uint16 numberOfKeys,
ScanKey key)
{
IndexScanDesc scandesc;
RegProcedure procedure;
RELATION_CHECKS;
GET_REL_PROCEDURE(beginscan,ambeginscan);
RelationSetRIntentLock(relation);
scandesc = (IndexScanDesc)
fmgr(procedure, relation, scanFromEnd, numberOfKeys, key);
return scandesc;
}
/* ----------------
* index_rescan - restart a scan of an index
* ----------------
*/
void
index_rescan(IndexScanDesc scan, bool scanFromEnd, ScanKey key)
{
RegProcedure procedure;
SCAN_CHECKS;
GET_SCAN_PROCEDURE(rescan,amrescan);
(void) fmgr(procedure, scan, scanFromEnd, key);
}
/* ----------------
* index_endscan - end a scan
* ----------------
*/
void
index_endscan(IndexScanDesc scan)
{
RegProcedure procedure;
SCAN_CHECKS;
GET_SCAN_PROCEDURE(endscan,amendscan);
(void) fmgr(procedure, scan);
RelationUnsetRIntentLock(scan->relation);
}
/* ----------------
* index_markpos - mark a scan position
* ----------------
*/
void
index_markpos(IndexScanDesc scan)
{
RegProcedure procedure;
SCAN_CHECKS;
GET_SCAN_PROCEDURE(markpos,ammarkpos);
(void) fmgr(procedure, scan);
}
/* ----------------
* index_restrpos - restore a scan position
* ----------------
*/
void
index_restrpos(IndexScanDesc scan)
{
RegProcedure procedure;
SCAN_CHECKS;
GET_SCAN_PROCEDURE(restrpos,amrestrpos);
(void) fmgr(procedure, scan);
}
/* ----------------
* index_getnext - get the next tuple from a scan
*
* A RetrieveIndexResult is a index tuple/heap tuple pair
* ----------------
*/
RetrieveIndexResult
index_getnext(IndexScanDesc scan,
ScanDirection direction)
{
RegProcedure procedure;
RetrieveIndexResult result;
SCAN_CHECKS;
GET_SCAN_PROCEDURE(getnext,amgettuple);
/* ----------------
* have the am's gettuple proc do all the work.
* ----------------
*/
result = (RetrieveIndexResult)
fmgr(procedure, scan, direction);
return result;
}
/* ----------------
* index_getprocid
*
* Some indexed access methods may require support routines that are
* not in the operator class/operator model imposed by pg_am. These
* access methods may store the OIDs of registered procedures they
* need in pg_amproc. These registered procedure OIDs are ordered in
* a way that makes sense to the access method, and used only by the
* access method. The general index code doesn't know anything about
* the routines involved; it just builds an ordered list of them for
* each attribute on which an index is defined.
*
* This routine returns the requested procedure OID for a particular
* indexed attribute.
* ----------------
*/
RegProcedure
index_getprocid(Relation irel,
AttrNumber attnum,
uint16 procnum)
{
RegProcedure *loc;
int natts;
natts = irel->rd_rel->relnatts;
loc = irel->rd_support;
Assert(loc != NULL);
return (loc[(natts * (procnum - 1)) + (attnum - 1)]);
}
Datum
GetIndexValue(HeapTuple tuple,
TupleDesc hTupDesc,
int attOff,
AttrNumber attrNums[],
FuncIndexInfo *fInfo,
bool *attNull,
Buffer buffer)
{
Datum returnVal;
bool isNull;
if (PointerIsValid(fInfo) && FIgetProcOid(fInfo) != InvalidOid) {
int i;
Datum *attData = (Datum *)palloc(FIgetnArgs(fInfo)*sizeof(Datum));
for (i = 0; i < FIgetnArgs(fInfo); i++) {
attData[i] = (Datum) heap_getattr(tuple,
buffer,
attrNums[i],
hTupDesc,
attNull);
}
returnVal = (Datum)fmgr_array_args(FIgetProcOid(fInfo),
FIgetnArgs(fInfo),
(char **) attData,
&isNull);
pfree(attData);
*attNull = FALSE;
}else {
returnVal = (Datum) heap_getattr(tuple, buffer, attrNums[attOff],
hTupDesc, attNull);
}
return returnVal;
}

View File

@ -0,0 +1,679 @@
/*-------------------------------------------------------------------------
*
* istrat.c--
* index scan strategy manipulation code and index strategy manipulation
* operator code.
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/index/Attic/istrat.c,v 1.1.1.1 1996/07/09 06:21:11 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/attnum.h"
#include "access/heapam.h"
#include "access/istrat.h"
#include "access/itup.h" /* for MaxIndexAttributeNumber */
#include "access/skey.h"
#include "utils/tqual.h" /* for NowTimeQual */
#include "fmgr.h"
#include "utils/elog.h"
#include "utils/rel.h"
#include "catalog/catname.h"
#include "catalog/pg_amop.h"
#include "catalog/pg_amproc.h"
#include "catalog/pg_index.h"
#include "catalog/pg_proc.h"
/* ----------------------------------------------------------------
* misc strategy support routines
* ----------------------------------------------------------------
*/
/*
* StrategyNumberIsValid
* StrategyNumberIsInBounds
* StrategyMapIsValid
* StrategyTransformMapIsValid
* IndexStrategyIsValid
*
* ... are now macros in istrat.h -cim 4/27/91
*/
/*
* StrategyMapGetScanKeyEntry --
* Returns a scan key entry of a index strategy mapping member.
*
* Note:
* Assumes that the index strategy mapping is valid.
* Assumes that the index strategy number is valid.
* Bounds checking should be done outside this routine.
*/
ScanKey
StrategyMapGetScanKeyEntry(StrategyMap map,
StrategyNumber strategyNumber)
{
Assert(StrategyMapIsValid(map));
Assert(StrategyNumberIsValid(strategyNumber));
return (&map->entry[strategyNumber - 1]);
}
/*
* IndexStrategyGetStrategyMap --
* Returns an index strategy mapping of an index strategy.
*
* Note:
* Assumes that the index strategy is valid.
* Assumes that the number of index strategies is valid.
* Bounds checking should be done outside this routine.
*/
StrategyMap
IndexStrategyGetStrategyMap(IndexStrategy indexStrategy,
StrategyNumber maxStrategyNum,
AttrNumber attrNum)
{
Assert(IndexStrategyIsValid(indexStrategy));
Assert(StrategyNumberIsValid(maxStrategyNum));
Assert(AttributeNumberIsValid(attrNum));
maxStrategyNum = AMStrategies(maxStrategyNum); /* XXX */
return
&indexStrategy->strategyMapData[maxStrategyNum * (attrNum - 1)];
}
/*
* AttributeNumberGetIndexStrategySize --
* Computes the size of an index strategy.
*/
Size
AttributeNumberGetIndexStrategySize(AttrNumber maxAttributeNumber,
StrategyNumber maxStrategyNumber)
{
maxStrategyNumber = AMStrategies(maxStrategyNumber); /* XXX */
return
maxAttributeNumber * maxStrategyNumber * sizeof (ScanKeyData);
}
/*
* StrategyTransformMapIsValid is now a macro in istrat.h -cim 4/27/91
*/
/* ----------------
* StrategyOperatorIsValid
* ----------------
*/
bool
StrategyOperatorIsValid(StrategyOperator operator,
StrategyNumber maxStrategy)
{
return (bool)
(PointerIsValid(operator) &&
StrategyNumberIsInBounds(operator->strategy, maxStrategy) &&
!(operator->flags & ~(SK_NEGATE | SK_COMMUTE)));
}
/* ----------------
* StrategyTermIsValid
* ----------------
*/
bool
StrategyTermIsValid(StrategyTerm term,
StrategyNumber maxStrategy)
{
Index index;
if (! PointerIsValid(term) || term->degree == 0)
return false;
for (index = 0; index < term->degree; index += 1) {
if (! StrategyOperatorIsValid(&term->operatorData[index],
maxStrategy)) {
return false;
}
}
return true;
}
/* ----------------
* StrategyExpressionIsValid
* ----------------
*/
bool
StrategyExpressionIsValid(StrategyExpression expression,
StrategyNumber maxStrategy)
{
StrategyTerm *termP;
if (!PointerIsValid(expression))
return true;
if (!StrategyTermIsValid(expression->term[0], maxStrategy))
return false;
termP = &expression->term[1];
while (StrategyTermIsValid(*termP, maxStrategy))
termP += 1;
return (bool)
(! PointerIsValid(*termP));
}
/* ----------------
* StrategyEvaluationIsValid
* ----------------
*/
bool
StrategyEvaluationIsValid(StrategyEvaluation evaluation)
{
Index index;
if (! PointerIsValid(evaluation) ||
! StrategyNumberIsValid(evaluation->maxStrategy) ||
! StrategyTransformMapIsValid(evaluation->negateTransform) ||
! StrategyTransformMapIsValid(evaluation->commuteTransform) ||
! StrategyTransformMapIsValid(evaluation->negateCommuteTransform)) {
return false;
}
for (index = 0; index < evaluation->maxStrategy; index += 1) {
if (! StrategyExpressionIsValid(evaluation->expression[index],
evaluation->maxStrategy)) {
return false;
}
}
return true;
}
/* ----------------
* StrategyTermEvaluate
* ----------------
*/
static bool
StrategyTermEvaluate(StrategyTerm term,
StrategyMap map,
Datum left,
Datum right)
{
Index index;
long tmpres;
bool result;
StrategyOperator operator;
ScanKey entry;
for (index = 0, operator = &term->operatorData[0];
index < term->degree; index += 1, operator += 1) {
entry = &map->entry[operator->strategy - 1];
Assert(RegProcedureIsValid(entry->sk_procedure));
switch (operator->flags ^ entry->sk_flags) {
case 0x0:
tmpres = (long) FMGR_PTR2(entry->sk_func, entry->sk_procedure,
left, right);
break;
case SK_NEGATE:
tmpres = (long) !FMGR_PTR2(entry->sk_func, entry->sk_procedure,
left, right);
break;
case SK_COMMUTE:
tmpres = (long) FMGR_PTR2(entry->sk_func, entry->sk_procedure,
right, left);
break;
case SK_NEGATE | SK_COMMUTE:
tmpres = (long) !FMGR_PTR2(entry->sk_func, entry->sk_procedure,
right, left);
break;
default:
elog(FATAL, "StrategyTermEvaluate: impossible case %d",
operator->flags ^ entry->sk_flags);
}
result = (bool) tmpres;
if (!result)
return result;
}
return result;
}
/* ----------------
* RelationGetStrategy
* ----------------
*/
StrategyNumber
RelationGetStrategy(Relation relation,
AttrNumber attributeNumber,
StrategyEvaluation evaluation,
RegProcedure procedure)
{
StrategyNumber strategy;
StrategyMap strategyMap;
ScanKey entry;
Index index;
int numattrs;
Assert(RelationIsValid(relation));
numattrs = RelationGetNumberOfAttributes(relation);
Assert(relation->rd_rel->relkind == RELKIND_INDEX); /* XXX use accessor */
Assert(AttributeNumberIsValid(attributeNumber));
Assert( (attributeNumber >= 1) && (attributeNumber < 1 + numattrs));
Assert(StrategyEvaluationIsValid(evaluation));
Assert(RegProcedureIsValid(procedure));
strategyMap =
IndexStrategyGetStrategyMap(RelationGetIndexStrategy(relation),
evaluation->maxStrategy,
attributeNumber);
/* get a strategy number for the procedure ignoring flags for now */
for (index = 0; index < evaluation->maxStrategy; index += 1) {
if (strategyMap->entry[index].sk_procedure == procedure) {
break;
}
}
if (index == evaluation->maxStrategy)
return InvalidStrategy;
strategy = 1 + index;
entry = StrategyMapGetScanKeyEntry(strategyMap, strategy);
Assert(!(entry->sk_flags & ~(SK_NEGATE | SK_COMMUTE)));
switch (entry->sk_flags & (SK_NEGATE | SK_COMMUTE)) {
case 0x0:
return strategy;
case SK_NEGATE:
strategy = evaluation->negateTransform->strategy[strategy - 1];
break;
case SK_COMMUTE:
strategy = evaluation->commuteTransform->strategy[strategy - 1];
break;
case SK_NEGATE | SK_COMMUTE:
strategy = evaluation->negateCommuteTransform->strategy[strategy - 1];
break;
default:
elog(FATAL, "RelationGetStrategy: impossible case %d", entry->sk_flags);
}
if (! StrategyNumberIsInBounds(strategy, evaluation->maxStrategy)) {
if (! StrategyNumberIsValid(strategy)) {
elog(WARN, "RelationGetStrategy: corrupted evaluation");
}
}
return strategy;
}
/* ----------------
* RelationInvokeStrategy
* ----------------
*/
bool /* XXX someday, this may return Datum */
RelationInvokeStrategy(Relation relation,
StrategyEvaluation evaluation,
AttrNumber attributeNumber,
StrategyNumber strategy,
Datum left,
Datum right)
{
StrategyNumber newStrategy;
StrategyMap strategyMap;
ScanKey entry;
StrategyTermData termData;
int numattrs;
Assert(RelationIsValid(relation));
Assert(relation->rd_rel->relkind == RELKIND_INDEX); /* XXX use accessor */
numattrs = RelationGetNumberOfAttributes(relation);
Assert(StrategyEvaluationIsValid(evaluation));
Assert(AttributeNumberIsValid(attributeNumber));
Assert( (attributeNumber >= 1) && (attributeNumber < 1 + numattrs));
Assert(StrategyNumberIsInBounds(strategy, evaluation->maxStrategy));
termData.degree = 1;
strategyMap =
IndexStrategyGetStrategyMap(RelationGetIndexStrategy(relation),
evaluation->maxStrategy,
attributeNumber);
entry = StrategyMapGetScanKeyEntry(strategyMap, strategy);
if (RegProcedureIsValid(entry->sk_procedure)) {
termData.operatorData[0].strategy = strategy;
termData.operatorData[0].flags = 0x0;
return
StrategyTermEvaluate(&termData, strategyMap, left, right);
}
newStrategy = evaluation->negateTransform->strategy[strategy - 1];
if (newStrategy != strategy && StrategyNumberIsValid(newStrategy)) {
entry = StrategyMapGetScanKeyEntry(strategyMap, newStrategy);
if (RegProcedureIsValid(entry->sk_procedure)) {
termData.operatorData[0].strategy = newStrategy;
termData.operatorData[0].flags = SK_NEGATE;
return
StrategyTermEvaluate(&termData, strategyMap, left, right);
}
}
newStrategy = evaluation->commuteTransform->strategy[strategy - 1];
if (newStrategy != strategy && StrategyNumberIsValid(newStrategy)) {
entry = StrategyMapGetScanKeyEntry(strategyMap, newStrategy);
if (RegProcedureIsValid(entry->sk_procedure)) {
termData.operatorData[0].strategy = newStrategy;
termData.operatorData[0].flags = SK_COMMUTE;
return
StrategyTermEvaluate(&termData, strategyMap, left, right);
}
}
newStrategy = evaluation->negateCommuteTransform->strategy[strategy - 1];
if (newStrategy != strategy && StrategyNumberIsValid(newStrategy)) {
entry = StrategyMapGetScanKeyEntry(strategyMap, newStrategy);
if (RegProcedureIsValid(entry->sk_procedure)) {
termData.operatorData[0].strategy = newStrategy;
termData.operatorData[0].flags = SK_NEGATE | SK_COMMUTE;
return
StrategyTermEvaluate(&termData, strategyMap, left, right);
}
}
if (PointerIsValid(evaluation->expression[strategy - 1])) {
StrategyTerm *termP;
termP = &evaluation->expression[strategy - 1]->term[0];
while (PointerIsValid(*termP)) {
Index index;
for (index = 0; index < (*termP)->degree; index += 1) {
entry = StrategyMapGetScanKeyEntry(strategyMap,
(*termP)->operatorData[index].strategy);
if (! RegProcedureIsValid(entry->sk_procedure)) {
break;
}
}
if (index == (*termP)->degree) {
return
StrategyTermEvaluate(*termP, strategyMap, left, right);
}
termP += 1;
}
}
elog(WARN, "RelationInvokeStrategy: cannot evaluate strategy %d",
strategy);
/* not reached, just to make compiler happy */
return FALSE;
}
/* ----------------
* OperatorRelationFillScanKeyEntry
* ----------------
*/
static void
OperatorRelationFillScanKeyEntry(Relation operatorRelation,
Oid operatorObjectId,
ScanKey entry)
{
HeapScanDesc scan;
ScanKeyData scanKeyData;
HeapTuple tuple;
ScanKeyEntryInitialize(&scanKeyData, 0,
ObjectIdAttributeNumber,
ObjectIdEqualRegProcedure,
ObjectIdGetDatum(operatorObjectId));
scan = heap_beginscan(operatorRelation, false, NowTimeQual,
1, &scanKeyData);
tuple = heap_getnext(scan, false, (Buffer *)NULL);
if (! HeapTupleIsValid(tuple)) {
elog(WARN, "OperatorObjectIdFillScanKeyEntry: unknown operator %lu",
(uint32) operatorObjectId);
}
entry->sk_flags = 0;
entry->sk_procedure =
((OperatorTupleForm) GETSTRUCT(tuple))->oprcode;
fmgr_info(entry->sk_procedure, &entry->sk_func, &entry->sk_nargs);
if (! RegProcedureIsValid(entry->sk_procedure)) {
elog(WARN,
"OperatorObjectIdFillScanKeyEntry: no procedure for operator %lu",
(uint32) operatorObjectId);
}
heap_endscan(scan);
}
/*
* IndexSupportInitialize --
* Initializes an index strategy and associated support procedures.
*/
void
IndexSupportInitialize(IndexStrategy indexStrategy,
RegProcedure *indexSupport,
Oid indexObjectId,
Oid accessMethodObjectId,
StrategyNumber maxStrategyNumber,
StrategyNumber maxSupportNumber,
AttrNumber maxAttributeNumber)
{
Relation relation;
Relation operatorRelation;
HeapScanDesc scan;
HeapTuple tuple;
ScanKeyData entry[2];
StrategyMap map;
AttrNumber attributeNumber;
int attributeIndex;
Oid operatorClassObjectId[ MaxIndexAttributeNumber ];
maxStrategyNumber = AMStrategies(maxStrategyNumber);
ScanKeyEntryInitialize(&entry[0], 0, Anum_pg_index_indexrelid,
ObjectIdEqualRegProcedure,
ObjectIdGetDatum(indexObjectId));
relation = heap_openr(IndexRelationName);
scan = heap_beginscan(relation, false, NowTimeQual, 1, entry);
tuple = heap_getnext(scan, 0, (Buffer *)NULL);
if (! HeapTupleIsValid(tuple))
elog(WARN, "IndexSupportInitialize: corrupted catalogs");
/*
* XXX note that the following assumes the INDEX tuple is well formed and
* that the key[] and class[] are 0 terminated.
*/
for (attributeIndex=0; attributeIndex<maxAttributeNumber; attributeIndex++)
{
IndexTupleForm iform;
iform = (IndexTupleForm) GETSTRUCT(tuple);
if (!OidIsValid(iform->indkey[attributeIndex])) {
if (attributeIndex == 0) {
elog(WARN, "IndexSupportInitialize: no pg_index tuple");
}
break;
}
operatorClassObjectId[attributeIndex]
= iform->indclass[attributeIndex];
}
heap_endscan(scan);
heap_close(relation);
/* if support routines exist for this access method, load them */
if (maxSupportNumber > 0) {
ScanKeyEntryInitialize(&entry[0], 0, Anum_pg_amproc_amid,
ObjectIdEqualRegProcedure,
ObjectIdGetDatum(accessMethodObjectId));
ScanKeyEntryInitialize(&entry[1], 0, Anum_pg_amproc_amopclaid,
ObjectIdEqualRegProcedure, 0);
/* relation = heap_openr(Name_pg_amproc); */
relation = heap_openr(AccessMethodProcedureRelationName);
for (attributeNumber = maxAttributeNumber; attributeNumber > 0;
attributeNumber--) {
int16 support;
Form_pg_amproc form;
RegProcedure *loc;
loc = &indexSupport[((attributeNumber - 1) * maxSupportNumber)];
for (support = maxSupportNumber; --support >= 0; ) {
loc[support] = InvalidOid;
}
entry[1].sk_argument =
ObjectIdGetDatum(operatorClassObjectId[attributeNumber - 1]);
scan = heap_beginscan(relation, false, NowTimeQual, 2, entry);
while (tuple = heap_getnext(scan, 0, (Buffer *)NULL),
HeapTupleIsValid(tuple)) {
form = (Form_pg_amproc) GETSTRUCT(tuple);
loc[(form->amprocnum - 1)] = form->amproc;
}
heap_endscan(scan);
}
heap_close(relation);
}
ScanKeyEntryInitialize(&entry[0], 0,
Anum_pg_amop_amopid,
ObjectIdEqualRegProcedure,
ObjectIdGetDatum(accessMethodObjectId));
ScanKeyEntryInitialize(&entry[1], 0,
Anum_pg_amop_amopclaid,
ObjectIdEqualRegProcedure, 0);
relation = heap_openr(AccessMethodOperatorRelationName);
operatorRelation = heap_openr(OperatorRelationName);
for (attributeNumber = maxAttributeNumber; attributeNumber > 0;
attributeNumber--) {
StrategyNumber strategy;
entry[1].sk_argument =
ObjectIdGetDatum(operatorClassObjectId[attributeNumber - 1]);
map = IndexStrategyGetStrategyMap(indexStrategy,
maxStrategyNumber,
attributeNumber);
for (strategy = 1; strategy <= maxStrategyNumber; strategy++)
ScanKeyEntrySetIllegal(StrategyMapGetScanKeyEntry(map, strategy));
scan = heap_beginscan(relation, false, NowTimeQual, 2, entry);
while (tuple = heap_getnext(scan, 0, (Buffer *)NULL),
HeapTupleIsValid(tuple)) {
Form_pg_amop form;
form = (Form_pg_amop) GETSTRUCT(tuple);
OperatorRelationFillScanKeyEntry(operatorRelation,
form->amopopr,
StrategyMapGetScanKeyEntry(map, form->amopstrategy));
}
heap_endscan(scan);
}
heap_close(operatorRelation);
heap_close(relation);
}
/* ----------------
* IndexStrategyDisplay
* ----------------
*/
#ifdef ISTRATDEBUG
int
IndexStrategyDisplay(IndexStrategy indexStrategy,
StrategyNumber numberOfStrategies,
int numberOfAttributes)
{
StrategyMap strategyMap;
AttrNumber attributeNumber;
StrategyNumber strategyNumber;
for (attributeNumber = 1; attributeNumber <= numberOfAttributes;
attributeNumber += 1) {
strategyMap = IndexStrategyGetStrategyMap(indexStrategy,
numberOfStrategies,
attributeNumber);
for (strategyNumber = 1;
strategyNumber <= AMStrategies(numberOfStrategies);
strategyNumber += 1) {
printf(":att %d\t:str %d\t:opr 0x%x(%d)\n",
attributeNumber, strategyNumber,
strategyMap->entry[strategyNumber - 1].sk_procedure,
strategyMap->entry[strategyNumber - 1].sk_procedure);
}
}
}
#endif /* defined(ISTRATDEBUG) */

View File

@ -0,0 +1,32 @@
/*-------------------------------------------------------------------------
*
* iqual.h--
* Index scan key qualification definitions.
*
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: iqual.h,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef IQUAL_H
#define IQUAL_H
#include "c.h"
#include "storage/itemid.h"
#include "utils/rel.h"
#include "access/skey.h"
/* ----------------
* index tuple qualification support
* ----------------
*/
extern int NIndexTupleProcessed;
extern bool index_keytest(IndexTuple tuple, TupleDesc tupdesc,
int scanKeySize, ScanKey key);
#endif /* IQUAL_H */

View File

@ -0,0 +1,80 @@
/*-------------------------------------------------------------------------
*
* istrat.h--
* POSTGRES index strategy definitions.
*
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: istrat.h,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef ISTRAT_H
#define ISTRAT_H
#include "postgres.h"
#include "access/attnum.h"
#include "access/skey.h"
#include "access/strat.h"
#include "utils/rel.h" /* for Relation */
/*
* StrategyNumberIsValid --
* True iff the strategy number is valid.
*/
#define StrategyNumberIsValid(strategyNumber) \
((bool) ((strategyNumber) != InvalidStrategy))
/*
* StrategyNumberIsInBounds --
* True iff strategy number is within given bounds.
*
* Note:
* Assumes StrategyNumber is an unsigned type.
* Assumes the bounded interval to be (0,max].
*/
#define StrategyNumberIsInBounds(strategyNumber, maxStrategyNumber) \
((bool)(InvalidStrategy < (strategyNumber) && \
(strategyNumber) <= (maxStrategyNumber)))
/*
* StrategyMapIsValid --
* True iff the index strategy mapping is valid.
*/
#define StrategyMapIsValid(map) PointerIsValid(map)
/*
* IndexStrategyIsValid --
* True iff the index strategy is valid.
*/
#define IndexStrategyIsValid(s) PointerIsValid(s)
extern ScanKey StrategyMapGetScanKeyEntry(StrategyMap map,
StrategyNumber strategyNumber);
extern StrategyMap IndexStrategyGetStrategyMap(IndexStrategy indexStrategy,
StrategyNumber maxStrategyNum, AttrNumber attrNum);
extern Size
AttributeNumberGetIndexStrategySize(AttrNumber maxAttributeNumber,
StrategyNumber maxStrategyNumber);
extern bool StrategyOperatorIsValid(StrategyOperator operator,
StrategyNumber maxStrategy);
extern bool StrategyTermIsValid(StrategyTerm term,
StrategyNumber maxStrategy);
extern bool StrategyExpressionIsValid(StrategyExpression expression,
StrategyNumber maxStrategy);
extern bool StrategyEvaluationIsValid(StrategyEvaluation evaluation);
extern StrategyNumber RelationGetStrategy(Relation relation,
AttrNumber attributeNumber, StrategyEvaluation evaluation,
RegProcedure procedure);
extern bool RelationInvokeStrategy(Relation relation,
StrategyEvaluation evaluation, AttrNumber attributeNumber,
StrategyNumber strategy, Datum left, Datum right);
extern void IndexSupportInitialize(IndexStrategy indexStrategy,
RegProcedure *indexSupport, Oid indexObjectId,
Oid accessMethodObjectId, StrategyNumber maxStrategyNumber,
StrategyNumber maxSupportNumber, AttrNumber maxAttributeNumber);
#endif /* ISTRAT_H */

104
src/backend/access/itup.h Normal file
View File

@ -0,0 +1,104 @@
/*-------------------------------------------------------------------------
*
* itup.h--
* POSTGRES index tuple definitions.
*
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: itup.h,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef ITUP_H
#define ITUP_H
#include "c.h"
#include "access/ibit.h"
#include "access/tupdesc.h" /* for TupleDesc */
#include "storage/itemptr.h"
#define MaxIndexAttributeNumber 7
typedef struct IndexTupleData {
ItemPointerData t_tid; /* reference TID to base tuple */
/*
* t_info is layed out in the following fashion:
*
* 15th (leftmost) bit: "has nulls" bit
* 14th bit: "has varlenas" bit
* 13th bit: "has rules" bit - (removed ay 11/94)
* bits 12-0 bit: size of tuple.
*/
unsigned short t_info; /* various info about tuple */
/*
* please make sure sizeof(IndexTupleData) is MAXALIGN'ed.
* See IndexInfoFindDataOffset() for the reason.
*/
} IndexTupleData; /* MORE DATA FOLLOWS AT END OF STRUCT */
typedef IndexTupleData *IndexTuple;
typedef struct InsertIndexResultData {
ItemPointerData pointerData;
} InsertIndexResultData;
typedef InsertIndexResultData *InsertIndexResult;
typedef struct RetrieveIndexResultData {
ItemPointerData index_iptr;
ItemPointerData heap_iptr;
} RetrieveIndexResultData;
typedef RetrieveIndexResultData *RetrieveIndexResult;
/*-----------------
* PredInfo -
* used for partial indices
*-----------------
*/
typedef struct PredInfo {
Node *pred;
Node *oldPred;
} PredInfo;
/* ----------------
* externs
* ----------------
*/
#define INDEX_SIZE_MASK 0x1FFF
#define INDEX_NULL_MASK 0x8000
#define INDEX_VAR_MASK 0x4000
#define IndexTupleSize(itup) (((IndexTuple) (itup))->t_info & 0x1FFF)
#define IndexTupleDSize(itup) ((itup).t_info & 0x1FFF)
#define IndexTupleNoNulls(itup) (!(((IndexTuple) (itup))->t_info & 0x8000))
#define IndexTupleAllFixed(itup) (!(((IndexTuple) (itup))->t_info & 0x4000))
#define IndexTupleHasMinHeader(itup) (IndexTupleNoNulls(itup))
/* indextuple.h */
extern IndexTuple index_formtuple(TupleDesc tupleDescriptor,
Datum value[], char null[]);
extern char *fastgetiattr(IndexTuple tup, int attnum,
TupleDesc att, bool *isnull);
extern Datum index_getattr(IndexTuple tuple, AttrNumber attNum,
TupleDesc tupDesc, bool *isNullOutP);
extern RetrieveIndexResult
FormRetrieveIndexResult(ItemPointer indexItemPointer,
ItemPointer heapItemPointer);
extern void CopyIndexTuple(IndexTuple source, IndexTuple *target);
#endif /* ITUP_H */

264
src/backend/access/nbtree.h Normal file
View File

@ -0,0 +1,264 @@
/*-------------------------------------------------------------------------
*
* nbtree.h--
* header file for postgres btree access method implementation.
*
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: nbtree.h,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef NBTREE_H
#define NBTREE_H
#include "access/attnum.h"
#include "access/itup.h"
#include "access/htup.h"
#include "access/tupdesc.h"
#include "access/istrat.h"
#include "access/funcindex.h"
#include "access/relscan.h"
#include "access/sdir.h"
#include "nodes/pg_list.h"
/*
* BTPageOpaqueData -- At the end of every page, we store a pointer
* to both siblings in the tree. See Lehman and Yao's paper for more
* info. In addition, we need to know what sort of page this is
* (leaf or internal), and whether the page is available for reuse.
*
* Lehman and Yao's algorithm requires a ``high key'' on every page.
* The high key on a page is guaranteed to be greater than or equal
* to any key that appears on this page. Our insertion algorithm
* guarantees that we can use the initial least key on our right
* sibling as the high key. We allocate space for the line pointer
* to the high key in the opaque data at the end of the page.
*
* Rightmost pages in the tree have no high key.
*/
typedef struct BTPageOpaqueData {
BlockNumber btpo_prev;
BlockNumber btpo_next;
uint16 btpo_flags;
#define BTP_LEAF (1 << 0)
#define BTP_ROOT (1 << 1)
#define BTP_FREE (1 << 2)
#define BTP_META (1 << 3)
} BTPageOpaqueData;
typedef BTPageOpaqueData *BTPageOpaque;
/*
* ScanOpaqueData is used to remember which buffers we're currently
* examining in the scan. We keep these buffers locked and pinned
* and recorded in the opaque entry of the scan in order to avoid
* doing a ReadBuffer() for every tuple in the index. This avoids
* semop() calls, which are expensive.
*/
typedef struct BTScanOpaqueData {
Buffer btso_curbuf;
Buffer btso_mrkbuf;
} BTScanOpaqueData;
typedef BTScanOpaqueData *BTScanOpaque;
/*
* BTItems are what we store in the btree. Each item has an index
* tuple, including key and pointer values. In addition, we must
* guarantee that all tuples in the index are unique, in order to
* satisfy some assumptions in Lehman and Yao. The way that we do
* this is by generating a new OID for every insertion that we do in
* the tree. This adds eight bytes to the size of btree index
* tuples. Note that we do not use the OID as part of a composite
* key; the OID only serves as a unique identifier for a given index
* tuple (logical position within a page).
*/
typedef struct BTItemData {
Oid bti_oid;
int32 bti_dummy; /* padding to make bti_itup
* align at 8-byte boundary
*/
IndexTupleData bti_itup;
} BTItemData;
typedef BTItemData *BTItem;
/*
* BTStackData -- As we descend a tree, we push the (key, pointer)
* pairs from internal nodes onto a private stack. If we split a
* leaf, we use this stack to walk back up the tree and insert data
* into parent nodes (and possibly to split them, too). Lehman and
* Yao's update algorithm guarantees that under no circumstances can
* our private stack give us an irredeemably bad picture up the tree.
* Again, see the paper for details.
*/
typedef struct BTStackData {
BlockNumber bts_blkno;
OffsetNumber bts_offset;
BTItem bts_btitem;
struct BTStackData *bts_parent;
} BTStackData;
typedef BTStackData *BTStack;
/*
* We need to be able to tell the difference between read and write
* requests for pages, in order to do locking correctly.
*/
#define BT_READ 0
#define BT_WRITE 1
/*
* Similarly, the difference between insertion and non-insertion binary
* searches on a given page makes a difference when we're descending the
* tree.
*/
#define BT_INSERTION 0
#define BT_DESCENT 1
/*
* In general, the btree code tries to localize its knowledge about
* page layout to a couple of routines. However, we need a special
* value to indicate "no page number" in those places where we expect
* page numbers.
*/
#define P_NONE 0
#define P_LEFTMOST(opaque) ((opaque)->btpo_prev == P_NONE)
#define P_RIGHTMOST(opaque) ((opaque)->btpo_next == P_NONE)
#define P_HIKEY ((OffsetNumber) 1)
#define P_FIRSTKEY ((OffsetNumber) 2)
/*
* Strategy numbers -- ordering of these is <, <=, =, >=, >
*/
#define BTLessStrategyNumber 1
#define BTLessEqualStrategyNumber 2
#define BTEqualStrategyNumber 3
#define BTGreaterEqualStrategyNumber 4
#define BTGreaterStrategyNumber 5
#define BTMaxStrategyNumber 5
/*
* When a new operator class is declared, we require that the user
* supply us with an amproc procedure for determining whether, for
* two keys a and b, a < b, a = b, or a > b. This routine must
* return < 0, 0, > 0, respectively, in these three cases. Since we
* only have one such proc in amproc, it's number 1.
*/
#define BTORDER_PROC 1
/*
* prototypes for functions in nbtinsert.c
*/
extern InsertIndexResult _bt_doinsert(Relation rel, BTItem btitem);
extern bool _bt_itemcmp(Relation rel, Size keysz, BTItem item1, BTItem item2,
StrategyNumber strat);
/*
* prototypes for functions in nbtpage.c
*/
extern void _bt_metapinit(Relation rel);
extern void _bt_checkmeta(Relation rel);
extern Buffer _bt_getroot(Relation rel, int access);
extern Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access);
extern void _bt_relbuf(Relation rel, Buffer buf, int access);
extern void _bt_wrtbuf(Relation rel, Buffer buf);
extern void _bt_wrtnorelbuf(Relation rel, Buffer buf);
extern void _bt_pageinit(Page page, Size size);
extern void _bt_metaproot(Relation rel, BlockNumber rootbknum);
extern Buffer _bt_getstackbuf(Relation rel, BTStack stack, int access);
extern void _bt_setpagelock(Relation rel, BlockNumber blkno, int access);
extern void _bt_unsetpagelock(Relation rel, BlockNumber blkno, int access);
extern void _bt_pagedel(Relation rel, ItemPointer tid);
/*
* prototypes for functions in nbtree.c
*/
extern bool BuildingBtree; /* in nbtree.c */
extern void btbuild(Relation heap, Relation index, int natts,
AttrNumber *attnum, IndexStrategy istrat, uint16 pcount,
Datum *params, FuncIndexInfo *finfo, PredInfo *predInfo);
extern InsertIndexResult btinsert(Relation rel, IndexTuple itup);
extern char *btgettuple(IndexScanDesc scan, ScanDirection dir);
extern char *btbeginscan(Relation rel, bool fromEnd, uint16 keysz,
ScanKey scankey);
extern void btrescan(IndexScanDesc scan, bool fromEnd, ScanKey scankey);
extern void btmovescan(IndexScanDesc scan, Datum v);
extern void btendscan(IndexScanDesc scan);
extern void btmarkpos(IndexScanDesc scan);
extern void btrestrpos(IndexScanDesc scan);
extern void btdelete(Relation rel, ItemPointer tid);
/*
* prototypes for functions in nbtscan.c
*/
extern void _bt_regscan(IndexScanDesc scan);
extern void _bt_dropscan(IndexScanDesc scan);
extern void _bt_adjscans(Relation rel, ItemPointer tid);
extern void _bt_scandel(IndexScanDesc scan, BlockNumber blkno,
OffsetNumber offno);
extern bool _bt_scantouched(IndexScanDesc scan, BlockNumber blkno,
OffsetNumber offno);
/*
* prototypes for functions in nbtsearch.c
*/
extern BTStack _bt_search(Relation rel, int keysz, ScanKey scankey,
Buffer *bufP);
extern Buffer _bt_moveright(Relation rel, Buffer buf, int keysz,
ScanKey scankey, int access);
extern bool _bt_skeycmp(Relation rel, Size keysz, ScanKey scankey,
Page page, ItemId itemid, StrategyNumber strat);
extern OffsetNumber _bt_binsrch(Relation rel, Buffer buf, int keysz,
ScanKey scankey, int srchtype);
extern RetrieveIndexResult _bt_next(IndexScanDesc scan, ScanDirection dir);
extern RetrieveIndexResult _bt_first(IndexScanDesc scan, ScanDirection dir);
extern bool _bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir);
/*
* prototypes for functions in nbtstrat.c
*/
extern StrategyNumber _bt_getstrat(Relation rel, AttrNumber attno,
RegProcedure proc);
extern bool _bt_invokestrat(Relation rel, AttrNumber attno,
StrategyNumber strat, Datum left, Datum right);
/*
* prototypes for functions in nbtutils.c
*/
extern ScanKey _bt_mkscankey(Relation rel, IndexTuple itup);
extern void _bt_freeskey(ScanKey skey);
extern void _bt_freestack(BTStack stack);
extern void _bt_orderkeys(Relation relation, uint16 *numberOfKeys,
ScanKey key);
extern bool _bt_checkqual(IndexScanDesc scan, IndexTuple itup);
extern BTItem _bt_formitem(IndexTuple itup);
/*
* prototypes for functions in nbtsort.c
*/
extern void *_bt_spoolinit(Relation index, int ntapes);
extern void _bt_spooldestroy(void *spool);
extern void _bt_spool(Relation index, BTItem btitem, void *spool);
extern void _bt_upperbuild(Relation index, BlockNumber blk, int level);
extern void _bt_leafbuild(Relation index, void *spool);
#endif /* NBTREE_H */

View File

@ -0,0 +1,15 @@
#-------------------------------------------------------------------------
#
# Makefile.inc--
# Makefile for access/nbtree (btree acess methods)
#
# Copyright (c) 1994, Regents of the University of California
#
#
# IDENTIFICATION
# $Header: /cvsroot/pgsql/src/backend/access/nbtree/Attic/Makefile.inc,v 1.1.1.1 1996/07/09 06:21:11 scrappy Exp $
#
#-------------------------------------------------------------------------
SUBSRCS+= nbtcompare.c nbtinsert.c nbtpage.c nbtree.c nbtscan.c nbtsearch.c \
nbtstrat.c nbtutils.c nbtsort.c

View File

@ -0,0 +1,68 @@
$Header: /cvsroot/pgsql/src/backend/access/nbtree/README,v 1.1.1.1 1996/07/09 06:21:12 scrappy Exp $
This directory contains a correct implementation of Lehman and Yao's
btree management algorithm that supports concurrent access for Postgres.
We have made the following changes in order to incorporate their algorithm
into Postgres:
+ The requirement that all btree keys be unique is too onerous,
but the algorithm won't work correctly without it. As a result,
this implementation adds an OID (guaranteed to be unique) to
every key in the index. This guarantees uniqueness within a set
of duplicates. Space overhead is four bytes.
For this reason, when we're passed an index tuple to store by the
common access method code, we allocate a larger one and copy the
supplied tuple into it. No Postgres code outside of the btree
access method knows about this xid or sequence number.
+ Lehman and Yao don't require read locks, but assume that in-
memory copies of tree nodes are unshared. Postgres shares
in-memory buffers among backends. As a result, we do page-
level read locking on btree nodes in order to guarantee that
no record is modified while we are examining it. This reduces
concurrency but guaranteees correct behavior.
+ Read locks on a page are held for as long as a scan has a pointer
to the page. However, locks are always surrendered before the
sibling page lock is acquired (for readers), so we remain deadlock-
free. I will do a formal proof if I get bored anytime soon.
In addition, the following things are handy to know:
+ Page zero of every btree is a meta-data page. This page stores
the location of the root page, a pointer to a list of free
pages, and other stuff that's handy to know.
+ This algorithm doesn't really work, since it requires ordered
writes, and UNIX doesn't support ordered writes.
+ There's one other case where we may screw up in this
implementation. When we start a scan, we descend the tree
to the key nearest the one in the qual, and once we get there,
position ourselves correctly for the qual type (eg, <, >=, etc).
If we happen to step off a page, decide we want to get back to
it, and fetch the page again, and if some bad person has split
the page and moved the last tuple we saw off of it, then the
code complains about botched concurrency in an elog(WARN, ...)
and gives up the ghost. This is the ONLY violation of Lehman
and Yao's guarantee of correct behavior that I am aware of in
this code.
Notes to operator class implementors:
With this implementation, we require the user to supply us with
a procedure for pg_amproc. This procedure should take two keys
A and B and return < 0, 0, or > 0 if A < B, A = B, or A > B,
respectively. See the contents of that relation for the btree
access method for some samples.
Notes to mao for implementation document:
On deletions, we need to adjust the position of active scans on
the index. The code in nbtscan.c handles this. We don't need to
do this for splits because of the way splits are handled; if they
happen behind us, we'll automatically go to the next page, and if
they happen in front of us, we're not affected by them. For
insertions, if we inserted a tuple behind the current scan location
on the current scan page, we move one space ahead.

View File

@ -0,0 +1,173 @@
/*-------------------------------------------------------------------------
*
* btcompare.c--
* Comparison functions for btree access method.
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtcompare.c,v 1.1.1.1 1996/07/09 06:21:12 scrappy Exp $
*
* NOTES
* These functions are stored in pg_amproc. For each operator class
* defined on btrees, they compute
*
* compare(a, b):
* < 0 if a < b,
* = 0 if a == b,
* > 0 if a > b.
*-------------------------------------------------------------------------
*/
#include <string.h>
#include "postgres.h"
#include "utils/nabstime.h"
int32
btint2cmp(int16 a, int16 b)
{
return ((int32) (a - b));
}
int32
btint4cmp(int32 a, int32 b)
{
return (a - b);
}
int32
btint24cmp(int16 a, int32 b)
{
return (((int32) a) - b);
}
int32
btint42cmp(int32 a, int16 b)
{
return (a - ((int32) b));
}
int32
btfloat4cmp(float32 a, float32 b)
{
if (*a > *b)
return (1);
else if (*a == *b)
return (0);
else
return (-1);
}
int32
btfloat8cmp(float64 a, float64 b)
{
if (*a > *b)
return (1);
else if (*a == *b)
return (0);
else
return (-1);
}
int32
btoidcmp(Oid a, Oid b)
{
if (a > b)
return (1);
else if (a == b)
return (0);
else
return (-1);
}
int32
btabstimecmp(AbsoluteTime a, AbsoluteTime b)
{
if (AbsoluteTimeIsBefore(a, b))
return (1);
else if (AbsoluteTimeIsBefore(b, a))
return (-1);
else
return (0);
}
int32
btcharcmp(char a, char b)
{
return ((int32) (a - b));
}
int32
btchar2cmp(uint16 a, uint16 b)
{
return (strncmp((char *) &a, (char *) &b, 2));
}
int32
btchar4cmp(uint32 a, uint32 b)
{
return (strncmp((char *) &a, (char *) &b, 4));
}
int32
btchar8cmp(char *a, char *b)
{
return (strncmp(a, b, 8));
}
int32
btchar16cmp(char *a, char *b)
{
return (strncmp(a, b, 16));
}
int32
btnamecmp(NameData *a, NameData *b)
{
return (strncmp(a->data, b->data, NAMEDATALEN));
}
int32
bttextcmp(struct varlena *a, struct varlena *b)
{
char *ap, *bp;
int len;
int res;
ap = VARDATA(a);
bp = VARDATA(b);
/* len is the length of the shorter of the two strings */
if ((len = VARSIZE(a)) > VARSIZE(b))
len = VARSIZE(b);
/* len includes the four bytes in which string length is stored */
len -= sizeof(VARSIZE(a));
/*
* If the two strings differ in the first len bytes, or if they're
* the same in the first len bytes and they're both len bytes long,
* we're done.
*/
res = 0;
if (len > 0) {
do {
res = (int) (*ap++ - *bp++);
len--;
} while (res == 0 && len != 0);
}
if (res != 0 || VARSIZE(a) == VARSIZE(b))
return (res);
/*
* The two strings are the same in the first len bytes, and they
* are of different lengths.
*/
if (VARSIZE(a) < VARSIZE(b))
return (-1);
else
return (1);
}

View File

@ -0,0 +1,831 @@
/*-------------------------------------------------------------------------
*
* btinsert.c--
* Item insertion in Lehman and Yao btrees for Postgres.
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.1.1.1 1996/07/09 06:21:12 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "storage/bufmgr.h"
#include "storage/bufpage.h"
#include "utils/elog.h"
#include "utils/palloc.h"
#include "utils/rel.h"
#include "utils/excid.h"
#include "access/heapam.h"
#include "access/genam.h"
#include "access/nbtree.h"
static InsertIndexResult _bt_insertonpg(Relation rel, Buffer buf, BTStack stack, int keysz, ScanKey scankey, BTItem btitem, BTItem afteritem);
static Buffer _bt_split(Relation rel, Buffer buf);
static OffsetNumber _bt_findsplitloc(Relation rel, Page page, OffsetNumber start, OffsetNumber maxoff, Size llimit);
static void _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf);
static OffsetNumber _bt_pgaddtup(Relation rel, Buffer buf, int keysz, ScanKey itup_scankey, Size itemsize, BTItem btitem, BTItem afteritem);
static bool _bt_goesonpg(Relation rel, Buffer buf, Size keysz, ScanKey scankey, BTItem afteritem);
static void _bt_updateitem(Relation rel, Size keysz, Buffer buf, Oid bti_oid, BTItem newItem);
/*
* _bt_doinsert() -- Handle insertion of a single btitem in the tree.
*
* This routine is called by the public interface routines, btbuild
* and btinsert. By here, btitem is filled in, and has a unique
* (xid, seqno) pair.
*/
InsertIndexResult
_bt_doinsert(Relation rel, BTItem btitem)
{
ScanKey itup_scankey;
IndexTuple itup;
BTStack stack;
Buffer buf;
BlockNumber blkno;
int natts;
InsertIndexResult res;
itup = &(btitem->bti_itup);
/* we need a scan key to do our search, so build one */
itup_scankey = _bt_mkscankey(rel, itup);
natts = rel->rd_rel->relnatts;
/* find the page containing this key */
stack = _bt_search(rel, natts, itup_scankey, &buf);
blkno = BufferGetBlockNumber(buf);
/* trade in our read lock for a write lock */
_bt_relbuf(rel, buf, BT_READ);
buf = _bt_getbuf(rel, blkno, BT_WRITE);
/*
* If the page was split between the time that we surrendered our
* read lock and acquired our write lock, then this page may no
* longer be the right place for the key we want to insert. In this
* case, we need to move right in the tree. See Lehman and Yao for
* an excruciatingly precise description.
*/
buf = _bt_moveright(rel, buf, natts, itup_scankey, BT_WRITE);
/* do the insertion */
res = _bt_insertonpg(rel, buf, stack, natts, itup_scankey,
btitem, (BTItem) NULL);
/* be tidy */
_bt_freestack(stack);
_bt_freeskey(itup_scankey);
return (res);
}
/*
* _bt_insertonpg() -- Insert a tuple on a particular page in the index.
*
* This recursive procedure does the following things:
*
* + if necessary, splits the target page.
* + finds the right place to insert the tuple (taking into
* account any changes induced by a split).
* + inserts the tuple.
* + if the page was split, pops the parent stack, and finds the
* right place to insert the new child pointer (by walking
* right using information stored in the parent stack).
* + invoking itself with the appropriate tuple for the right
* child page on the parent.
*
* On entry, we must have the right buffer on which to do the
* insertion, and the buffer must be pinned and locked. On return,
* we will have dropped both the pin and the write lock on the buffer.
*
* The locking interactions in this code are critical. You should
* grok Lehman and Yao's paper before making any changes. In addition,
* you need to understand how we disambiguate duplicate keys in this
* implementation, in order to be able to find our location using
* L&Y "move right" operations. Since we may insert duplicate user
* keys, and since these dups may propogate up the tree, we use the
* 'afteritem' parameter to position ourselves correctly for the
* insertion on internal pages.
*/
static InsertIndexResult
_bt_insertonpg(Relation rel,
Buffer buf,
BTStack stack,
int keysz,
ScanKey scankey,
BTItem btitem,
BTItem afteritem)
{
InsertIndexResult res;
Page page;
Buffer rbuf;
Buffer pbuf;
Page rpage;
ScanKey newskey;
BTItem ritem;
BTPageOpaque rpageop;
BlockNumber rbknum, itup_blkno;
OffsetNumber itup_off;
int itemsz;
InsertIndexResult newres;
BTItem new_item = (BTItem) NULL;
BTItem lowLeftItem;
page = BufferGetPage(buf);
itemsz = IndexTupleDSize(btitem->bti_itup)
+ (sizeof(BTItemData) - sizeof(IndexTupleData));
itemsz = DOUBLEALIGN(itemsz); /* be safe, PageAddItem will do this
but we need to be consistent */
if (PageGetFreeSpace(page) < itemsz) {
/* split the buffer into left and right halves */
rbuf = _bt_split(rel, buf);
/* which new page (left half or right half) gets the tuple? */
if (_bt_goesonpg(rel, buf, keysz, scankey, afteritem)) {
/* left page */
itup_off = _bt_pgaddtup(rel, buf, keysz, scankey,
itemsz, btitem, afteritem);
itup_blkno = BufferGetBlockNumber(buf);
} else {
/* right page */
itup_off = _bt_pgaddtup(rel, rbuf, keysz, scankey,
itemsz, btitem, afteritem);
itup_blkno = BufferGetBlockNumber(rbuf);
}
/*
* By here,
*
* + our target page has been split;
* + the original tuple has been inserted;
* + we have write locks on both the old (left half) and new
* (right half) buffers, after the split; and
* + we have the key we want to insert into the parent.
*
* Do the parent insertion. We need to hold onto the locks for
* the child pages until we locate the parent, but we can release
* them before doing the actual insertion (see Lehman and Yao for
* the reasoning).
*/
if (stack == (BTStack) NULL) {
/* create a new root node and release the split buffers */
_bt_newroot(rel, buf, rbuf);
_bt_relbuf(rel, buf, BT_WRITE);
_bt_relbuf(rel, rbuf, BT_WRITE);
} else {
/* form a index tuple that points at the new right page */
rbknum = BufferGetBlockNumber(rbuf);
rpage = BufferGetPage(rbuf);
rpageop = (BTPageOpaque) PageGetSpecialPointer(rpage);
/*
* By convention, the first entry (0) on every
* non-rightmost page is the high key for that page. In
* order to get the lowest key on the new right page, we
* actually look at its second (1) entry.
*/
if (! P_RIGHTMOST(rpageop)) {
ritem = (BTItem) PageGetItem(rpage,
PageGetItemId(rpage, P_FIRSTKEY));
} else {
ritem = (BTItem) PageGetItem(rpage,
PageGetItemId(rpage, P_HIKEY));
}
/* get a unique btitem for this key */
new_item = _bt_formitem(&(ritem->bti_itup));
ItemPointerSet(&(new_item->bti_itup.t_tid), rbknum, P_HIKEY);
/* find the parent buffer */
pbuf = _bt_getstackbuf(rel, stack, BT_WRITE);
/*
* If the key of new_item is < than the key of the item
* in the parent page pointing to the left page
* (stack->bts_btitem), we have to update the latter key;
* otherwise the keys on the parent page wouldn't be
* monotonically increasing after we inserted the new
* pointer to the right page (new_item). This only
* happens if our left page is the leftmost page and a
* new minimum key had been inserted before, which is not
* reflected in the parent page but didn't matter so
* far. If there are duplicate keys and this new minimum
* key spills over to our new right page, we get an
* inconsistency if we don't update the left key in the
* parent page.
*/
if (_bt_itemcmp(rel, keysz, stack->bts_btitem, new_item,
BTGreaterStrategyNumber)) {
lowLeftItem =
(BTItem) PageGetItem(page,
PageGetItemId(page, P_FIRSTKEY));
/* page must have right pointer after split */
_bt_updateitem(rel, keysz, pbuf, stack->bts_btitem->bti_oid,
lowLeftItem);
}
/* don't need the children anymore */
_bt_relbuf(rel, buf, BT_WRITE);
_bt_relbuf(rel, rbuf, BT_WRITE);
newskey = _bt_mkscankey(rel, &(new_item->bti_itup));
newres = _bt_insertonpg(rel, pbuf, stack->bts_parent,
keysz, newskey, new_item,
stack->bts_btitem);
/* be tidy */
pfree(newres);
pfree(newskey);
pfree(new_item);
}
} else {
itup_off = _bt_pgaddtup(rel, buf, keysz, scankey,
itemsz, btitem, afteritem);
itup_blkno = BufferGetBlockNumber(buf);
_bt_relbuf(rel, buf, BT_WRITE);
}
/* by here, the new tuple is inserted */
res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData));
ItemPointerSet(&(res->pointerData), itup_blkno, itup_off);
return (res);
}
/*
* _bt_split() -- split a page in the btree.
*
* On entry, buf is the page to split, and is write-locked and pinned.
* Returns the new right sibling of buf, pinned and write-locked. The
* pin and lock on buf are maintained.
*/
static Buffer
_bt_split(Relation rel, Buffer buf)
{
Buffer rbuf;
Page origpage;
Page leftpage, rightpage;
BTPageOpaque ropaque, lopaque, oopaque;
Buffer sbuf;
Page spage;
BTPageOpaque sopaque;
Size itemsz;
ItemId itemid;
BTItem item;
OffsetNumber leftoff, rightoff;
OffsetNumber start;
OffsetNumber maxoff;
OffsetNumber firstright;
OffsetNumber i;
Size llimit;
rbuf = _bt_getbuf(rel, P_NEW, BT_WRITE);
origpage = BufferGetPage(buf);
leftpage = PageGetTempPage(origpage, sizeof(BTPageOpaqueData));
rightpage = BufferGetPage(rbuf);
_bt_pageinit(rightpage, BufferGetPageSize(rbuf));
_bt_pageinit(leftpage, BufferGetPageSize(buf));
/* init btree private data */
oopaque = (BTPageOpaque) PageGetSpecialPointer(origpage);
lopaque = (BTPageOpaque) PageGetSpecialPointer(leftpage);
ropaque = (BTPageOpaque) PageGetSpecialPointer(rightpage);
/* if we're splitting this page, it won't be the root when we're done */
oopaque->btpo_flags &= ~BTP_ROOT;
lopaque->btpo_flags = ropaque->btpo_flags = oopaque->btpo_flags;
lopaque->btpo_prev = oopaque->btpo_prev;
ropaque->btpo_prev = BufferGetBlockNumber(buf);
lopaque->btpo_next = BufferGetBlockNumber(rbuf);
ropaque->btpo_next = oopaque->btpo_next;
/*
* If the page we're splitting is not the rightmost page at its
* level in the tree, then the first (0) entry on the page is the
* high key for the page. We need to copy that to the right
* half. Otherwise (meaning the rightmost page case), we should
* treat the line pointers beginning at zero as user data.
*
* We leave a blank space at the start of the line table for the
* left page. We'll come back later and fill it in with the high
* key item we get from the right key.
*/
leftoff = P_FIRSTKEY;
ropaque->btpo_next = oopaque->btpo_next;
if (! P_RIGHTMOST(oopaque)) {
/* splitting a non-rightmost page, start at the first data item */
start = P_FIRSTKEY;
/* copy the original high key to the new page */
itemid = PageGetItemId(origpage, P_HIKEY);
itemsz = ItemIdGetLength(itemid);
item = (BTItem) PageGetItem(origpage, itemid);
(void) PageAddItem(rightpage, (Item) item, itemsz, P_HIKEY, LP_USED);
rightoff = P_FIRSTKEY;
} else {
/* splitting a rightmost page, "high key" is the first data item */
start = P_HIKEY;
/* the new rightmost page will not have a high key */
rightoff = P_HIKEY;
}
maxoff = PageGetMaxOffsetNumber(origpage);
llimit = PageGetFreeSpace(leftpage) / 2;
firstright = _bt_findsplitloc(rel, origpage, start, maxoff, llimit);
for (i = start; i <= maxoff; i = OffsetNumberNext(i)) {
itemid = PageGetItemId(origpage, i);
itemsz = ItemIdGetLength(itemid);
item = (BTItem) PageGetItem(origpage, itemid);
/* decide which page to put it on */
if (i < firstright) {
(void) PageAddItem(leftpage, (Item) item, itemsz, leftoff,
LP_USED);
leftoff = OffsetNumberNext(leftoff);
} else {
(void) PageAddItem(rightpage, (Item) item, itemsz, rightoff,
LP_USED);
rightoff = OffsetNumberNext(rightoff);
}
}
/*
* Okay, page has been split, high key on right page is correct. Now
* set the high key on the left page to be the min key on the right
* page.
*/
if (P_RIGHTMOST(ropaque)) {
itemid = PageGetItemId(rightpage, P_HIKEY);
} else {
itemid = PageGetItemId(rightpage, P_FIRSTKEY);
}
itemsz = ItemIdGetLength(itemid);
item = (BTItem) PageGetItem(rightpage, itemid);
/*
* We left a hole for the high key on the left page; fill it. The
* modal crap is to tell the page manager to put the new item on the
* page and not screw around with anything else. Whoever designed
* this interface has presumably crawled back into the dung heap they
* came from. No one here will admit to it.
*/
PageManagerModeSet(OverwritePageManagerMode);
(void) PageAddItem(leftpage, (Item) item, itemsz, P_HIKEY, LP_USED);
PageManagerModeSet(ShufflePageManagerMode);
/*
* By here, the original data page has been split into two new halves,
* and these are correct. The algorithm requires that the left page
* never move during a split, so we copy the new left page back on top
* of the original. Note that this is not a waste of time, since we
* also require (in the page management code) that the center of a
* page always be clean, and the most efficient way to guarantee this
* is just to compact the data by reinserting it into a new left page.
*/
PageRestoreTempPage(leftpage, origpage);
/* write these guys out */
_bt_wrtnorelbuf(rel, rbuf);
_bt_wrtnorelbuf(rel, buf);
/*
* Finally, we need to grab the right sibling (if any) and fix the
* prev pointer there. We are guaranteed that this is deadlock-free
* since no other writer will be moving holding a lock on that page
* and trying to move left, and all readers release locks on a page
* before trying to fetch its neighbors.
*/
if (! P_RIGHTMOST(ropaque)) {
sbuf = _bt_getbuf(rel, ropaque->btpo_next, BT_WRITE);
spage = BufferGetPage(sbuf);
sopaque = (BTPageOpaque) PageGetSpecialPointer(spage);
sopaque->btpo_prev = BufferGetBlockNumber(rbuf);
/* write and release the old right sibling */
_bt_wrtbuf(rel, sbuf);
}
/* split's done */
return (rbuf);
}
/*
* _bt_findsplitloc() -- find a safe place to split a page.
*
* In order to guarantee the proper handling of searches for duplicate
* keys, the first duplicate in the chain must either be the first
* item on the page after the split, or the entire chain must be on
* one of the two pages. That is,
* [1 2 2 2 3 4 5]
* must become
* [1] [2 2 2 3 4 5]
* or
* [1 2 2 2] [3 4 5]
* but not
* [1 2 2] [2 3 4 5].
* However,
* [2 2 2 2 2 3 4]
* may be split as
* [2 2 2 2] [2 3 4].
*/
static OffsetNumber
_bt_findsplitloc(Relation rel,
Page page,
OffsetNumber start,
OffsetNumber maxoff,
Size llimit)
{
OffsetNumber i;
OffsetNumber saferight;
ItemId nxtitemid, safeitemid;
BTItem safeitem, nxtitem;
IndexTuple safetup, nxttup;
Size nbytes;
TupleDesc itupdesc;
int natts;
int attno;
Datum attsafe;
Datum attnext;
bool null;
itupdesc = RelationGetTupleDescriptor(rel);
natts = rel->rd_rel->relnatts;
saferight = start;
safeitemid = PageGetItemId(page, saferight);
nbytes = ItemIdGetLength(safeitemid) + sizeof(ItemIdData);
safeitem = (BTItem) PageGetItem(page, safeitemid);
safetup = &(safeitem->bti_itup);
i = OffsetNumberNext(start);
while (nbytes < llimit) {
/* check the next item on the page */
nxtitemid = PageGetItemId(page, i);
nbytes += (ItemIdGetLength(nxtitemid) + sizeof(ItemIdData));
nxtitem = (BTItem) PageGetItem(page, nxtitemid);
nxttup = &(nxtitem->bti_itup);
/* test against last known safe item */
for (attno = 1; attno <= natts; attno++) {
attsafe = index_getattr(safetup, attno, itupdesc, &null);
attnext = index_getattr(nxttup, attno, itupdesc, &null);
/*
* If the tuple we're looking at isn't equal to the last safe one
* we saw, then it's our new safe tuple.
*/
if (!_bt_invokestrat(rel, attno, BTEqualStrategyNumber,
attsafe, attnext)) {
safetup = nxttup;
saferight = i;
/* break is for the attno for loop */
break;
}
}
i = OffsetNumberNext(i);
}
/*
* If the chain of dups starts at the beginning of the page and extends
* past the halfway mark, we can split it in the middle.
*/
if (saferight == start)
saferight = i;
return (saferight);
}
/*
* _bt_newroot() -- Create a new root page for the index.
*
* We've just split the old root page and need to create a new one.
* In order to do this, we add a new root page to the file, then lock
* the metadata page and update it. This is guaranteed to be deadlock-
* free, because all readers release their locks on the metadata page
* before trying to lock the root, and all writers lock the root before
* trying to lock the metadata page. We have a write lock on the old
* root page, so we have not introduced any cycles into the waits-for
* graph.
*
* On entry, lbuf (the old root) and rbuf (its new peer) are write-
* locked. We don't drop the locks in this routine; that's done by
* the caller. On exit, a new root page exists with entries for the
* two new children. The new root page is neither pinned nor locked.
*/
static void
_bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
{
Buffer rootbuf;
Page lpage, rpage, rootpage;
BlockNumber lbkno, rbkno;
BlockNumber rootbknum;
BTPageOpaque rootopaque;
ItemId itemid;
BTItem item;
Size itemsz;
BTItem new_item;
/* get a new root page */
rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE);
rootpage = BufferGetPage(rootbuf);
_bt_pageinit(rootpage, BufferGetPageSize(rootbuf));
/* set btree special data */
rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage);
rootopaque->btpo_prev = rootopaque->btpo_next = P_NONE;
rootopaque->btpo_flags |= BTP_ROOT;
/*
* Insert the internal tuple pointers.
*/
lbkno = BufferGetBlockNumber(lbuf);
rbkno = BufferGetBlockNumber(rbuf);
lpage = BufferGetPage(lbuf);
rpage = BufferGetPage(rbuf);
/*
* step over the high key on the left page while building the
* left page pointer.
*/
itemid = PageGetItemId(lpage, P_FIRSTKEY);
itemsz = ItemIdGetLength(itemid);
item = (BTItem) PageGetItem(lpage, itemid);
new_item = _bt_formitem(&(item->bti_itup));
ItemPointerSet(&(new_item->bti_itup.t_tid), lbkno, P_FIRSTKEY);
/*
* insert the left page pointer into the new root page. the root
* page is the rightmost page on its level so the "high key" item
* is the first data item.
*/
(void) PageAddItem(rootpage, (Item) new_item, itemsz, P_HIKEY, LP_USED);
pfree(new_item);
/*
* the right page is the rightmost page on the second level, so
* the "high key" item is the first data item on that page as well.
*/
itemid = PageGetItemId(rpage, P_HIKEY);
itemsz = ItemIdGetLength(itemid);
item = (BTItem) PageGetItem(rpage, itemid);
new_item = _bt_formitem(&(item->bti_itup));
ItemPointerSet(&(new_item->bti_itup.t_tid), rbkno, P_HIKEY);
/*
* insert the right page pointer into the new root page.
*/
(void) PageAddItem(rootpage, (Item) new_item, itemsz, P_FIRSTKEY, LP_USED);
pfree(new_item);
/* write and let go of the root buffer */
rootbknum = BufferGetBlockNumber(rootbuf);
_bt_wrtbuf(rel, rootbuf);
/* update metadata page with new root block number */
_bt_metaproot(rel, rootbknum);
}
/*
* _bt_pgaddtup() -- add a tuple to a particular page in the index.
*
* This routine adds the tuple to the page as requested, and keeps the
* write lock and reference associated with the page's buffer. It is
* an error to call pgaddtup() without a write lock and reference. If
* afteritem is non-null, it's the item that we expect our new item
* to follow. Otherwise, we do a binary search for the correct place
* and insert the new item there.
*/
static OffsetNumber
_bt_pgaddtup(Relation rel,
Buffer buf,
int keysz,
ScanKey itup_scankey,
Size itemsize,
BTItem btitem,
BTItem afteritem)
{
OffsetNumber itup_off;
OffsetNumber first;
Page page;
BTPageOpaque opaque;
BTItem chkitem;
Oid afteroid;
page = BufferGetPage(buf);
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
first = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
if (afteritem == (BTItem) NULL) {
itup_off = _bt_binsrch(rel, buf, keysz, itup_scankey, BT_INSERTION);
} else {
afteroid = afteritem->bti_oid;
itup_off = first;
do {
chkitem =
(BTItem) PageGetItem(page, PageGetItemId(page, itup_off));
itup_off = OffsetNumberNext(itup_off);
} while (chkitem->bti_oid != afteroid);
}
(void) PageAddItem(page, (Item) btitem, itemsize, itup_off, LP_USED);
/* write the buffer, but hold our lock */
_bt_wrtnorelbuf(rel, buf);
return (itup_off);
}
/*
* _bt_goesonpg() -- Does a new tuple belong on this page?
*
* This is part of the complexity introduced by allowing duplicate
* keys into the index. The tuple belongs on this page if:
*
* + there is no page to the right of this one; or
* + it is less than the high key on the page; or
* + the item it is to follow ("afteritem") appears on this
* page.
*/
static bool
_bt_goesonpg(Relation rel,
Buffer buf,
Size keysz,
ScanKey scankey,
BTItem afteritem)
{
Page page;
ItemId hikey;
BTPageOpaque opaque;
BTItem chkitem;
OffsetNumber offnum, maxoff;
Oid afteroid;
bool found;
page = BufferGetPage(buf);
/* no right neighbor? */
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
if (P_RIGHTMOST(opaque))
return (true);
/*
* this is a non-rightmost page, so it must have a high key item.
*
* If the scan key is < the high key (the min key on the next page),
* then it for sure belongs here.
*/
hikey = PageGetItemId(page, P_HIKEY);
if (_bt_skeycmp(rel, keysz, scankey, page, hikey, BTLessStrategyNumber))
return (true);
/*
* If the scan key is > the high key, then it for sure doesn't belong
* here.
*/
if (_bt_skeycmp(rel, keysz, scankey, page, hikey, BTGreaterStrategyNumber))
return (false);
/*
* If we have no adjacency information, and the item is equal to the
* high key on the page (by here it is), then the item does not belong
* on this page.
*/
if (afteritem == (BTItem) NULL)
return (false);
/* damn, have to work for it. i hate that. */
afteroid = afteritem->bti_oid;
maxoff = PageGetMaxOffsetNumber(page);
/*
* Search the entire page for the afteroid. We need to do this, rather
* than doing a binary search and starting from there, because if the
* key we're searching for is the leftmost key in the tree at this
* level, then a binary search will do the wrong thing. Splits are
* pretty infrequent, so the cost isn't as bad as it could be.
*/
found = false;
for (offnum = P_FIRSTKEY;
offnum <= maxoff;
offnum = OffsetNumberNext(offnum)) {
chkitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum));
if (chkitem->bti_oid == afteroid) {
found = true;
break;
}
}
return (found);
}
/*
* _bt_itemcmp() -- compare item1 to item2 using a requested
* strategy (<, <=, =, >=, >)
*
*/
bool
_bt_itemcmp(Relation rel,
Size keysz,
BTItem item1,
BTItem item2,
StrategyNumber strat)
{
TupleDesc tupDes;
IndexTuple indexTuple1, indexTuple2;
Datum attrDatum1, attrDatum2;
int i;
bool isNull;
bool compare;
tupDes = RelationGetTupleDescriptor(rel);
indexTuple1 = &(item1->bti_itup);
indexTuple2 = &(item2->bti_itup);
for (i = 1; i <= keysz; i++) {
attrDatum1 = index_getattr(indexTuple1, i, tupDes, &isNull);
attrDatum2 = index_getattr(indexTuple2, i, tupDes, &isNull);
compare = _bt_invokestrat(rel, i, strat, attrDatum1, attrDatum2);
if (!compare) {
return (false);
}
}
return (true);
}
/*
* _bt_updateitem() -- updates the key of the item identified by the
* oid with the key of newItem (done in place)
*
*/
static void
_bt_updateitem(Relation rel,
Size keysz,
Buffer buf,
Oid bti_oid,
BTItem newItem)
{
Page page;
OffsetNumber maxoff;
OffsetNumber i;
ItemPointerData itemPtrData;
BTItem item;
IndexTuple oldIndexTuple, newIndexTuple;
page = BufferGetPage(buf);
maxoff = PageGetMaxOffsetNumber(page);
/* locate item on the page */
i = P_HIKEY;
do {
item = (BTItem) PageGetItem(page, PageGetItemId(page, i));
i = OffsetNumberNext(i);
} while (i <= maxoff && item->bti_oid != bti_oid);
/* this should never happen (in theory) */
if (item->bti_oid != bti_oid) {
elog(FATAL, "_bt_getstackbuf was lying!!");
}
oldIndexTuple = &(item->bti_itup);
newIndexTuple = &(newItem->bti_itup);
/* keep the original item pointer */
ItemPointerCopy(&(oldIndexTuple->t_tid), &itemPtrData);
CopyIndexTuple(newIndexTuple, &oldIndexTuple);
ItemPointerCopy(&itemPtrData, &(oldIndexTuple->t_tid));
}

View File

@ -0,0 +1,523 @@
/*-------------------------------------------------------------------------
*
* btpage.c--
* BTree-specific page management code for the Postgres btree access
* method.
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.1.1.1 1996/07/09 06:21:12 scrappy Exp $
*
* NOTES
* Postgres btree pages look like ordinary relation pages. The opaque
* data at high addresses includes pointers to left and right siblings
* and flag data describing page state. The first page in a btree, page
* zero, is special -- it stores meta-information describing the tree.
* Pages one and higher store the actual tree data.
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "storage/bufmgr.h"
#include "storage/bufpage.h"
#include "utils/elog.h"
#include "utils/rel.h"
#include "utils/excid.h"
#include "access/genam.h"
#include "access/nbtree.h"
#define BTREE_METAPAGE 0
#define BTREE_MAGIC 0x053162
#define BTREE_VERSION 0
typedef struct BTMetaPageData {
uint32 btm_magic;
uint32 btm_version;
BlockNumber btm_root;
} BTMetaPageData;
#define BTPageGetMeta(p) \
((BTMetaPageData *) &((PageHeader) p)->pd_linp[0])
extern bool BuildingBtree;
/*
* We use high-concurrency locking on btrees. There are two cases in
* which we don't do locking. One is when we're building the btree.
* Since the creating transaction has not committed, no one can see
* the index, and there's no reason to share locks. The second case
* is when we're just starting up the database system. We use some
* special-purpose initialization code in the relation cache manager
* (see utils/cache/relcache.c) to allow us to do indexed scans on
* the system catalogs before we'd normally be able to. This happens
* before the lock table is fully initialized, so we can't use it.
* Strictly speaking, this violates 2pl, but we don't do 2pl on the
* system catalogs anyway, so I declare this to be okay.
*/
#define USELOCKING (!BuildingBtree && !IsInitProcessingMode())
/*
* _bt_metapinit() -- Initialize the metadata page of a btree.
*/
void
_bt_metapinit(Relation rel)
{
Buffer buf;
Page pg;
int nblocks;
BTMetaPageData metad;
BTPageOpaque op;
/* can't be sharing this with anyone, now... */
if (USELOCKING)
RelationSetLockForWrite(rel);
if ((nblocks = RelationGetNumberOfBlocks(rel)) != 0) {
elog(WARN, "Cannot initialize non-empty btree %s",
RelationGetRelationName(rel));
}
buf = ReadBuffer(rel, P_NEW);
pg = BufferGetPage(buf);
_bt_pageinit(pg, BufferGetPageSize(buf));
metad.btm_magic = BTREE_MAGIC;
metad.btm_version = BTREE_VERSION;
metad.btm_root = P_NONE;
memmove((char *) BTPageGetMeta(pg), (char *) &metad, sizeof(metad));
op = (BTPageOpaque) PageGetSpecialPointer(pg);
op->btpo_flags = BTP_META;
WriteBuffer(buf);
/* all done */
if (USELOCKING)
RelationUnsetLockForWrite(rel);
}
/*
* _bt_checkmeta() -- Verify that the metadata stored in a btree are
* reasonable.
*/
void
_bt_checkmeta(Relation rel)
{
Buffer metabuf;
Page metap;
BTMetaPageData *metad;
BTPageOpaque op;
int nblocks;
/* if the relation is empty, this is init time; don't complain */
if ((nblocks = RelationGetNumberOfBlocks(rel)) == 0)
return;
metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ);
metap = BufferGetPage(metabuf);
op = (BTPageOpaque) PageGetSpecialPointer(metap);
if (!(op->btpo_flags & BTP_META)) {
elog(WARN, "Invalid metapage for index %s",
RelationGetRelationName(rel));
}
metad = BTPageGetMeta(metap);
if (metad->btm_magic != BTREE_MAGIC) {
elog(WARN, "Index %s is not a btree",
RelationGetRelationName(rel));
}
if (metad->btm_version != BTREE_VERSION) {
elog(WARN, "Version mismatch on %s: version %d file, version %d code",
RelationGetRelationName(rel),
metad->btm_version, BTREE_VERSION);
}
_bt_relbuf(rel, metabuf, BT_READ);
}
/*
* _bt_getroot() -- Get the root page of the btree.
*
* Since the root page can move around the btree file, we have to read
* its location from the metadata page, and then read the root page
* itself. If no root page exists yet, we have to create one. The
* standard class of race conditions exists here; I think I covered
* them all in the Hopi Indian rain dance of lock requests below.
*
* We pass in the access type (BT_READ or BT_WRITE), and return the
* root page's buffer with the appropriate lock type set. Reference
* count on the root page gets bumped by ReadBuffer. The metadata
* page is unlocked and unreferenced by this process when this routine
* returns.
*/
Buffer
_bt_getroot(Relation rel, int access)
{
Buffer metabuf;
Page metapg;
BTPageOpaque metaopaque;
Buffer rootbuf;
Page rootpg;
BTPageOpaque rootopaque;
BlockNumber rootblkno;
BTMetaPageData *metad;
metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ);
metapg = BufferGetPage(metabuf);
metaopaque = (BTPageOpaque) PageGetSpecialPointer(metapg);
Assert(metaopaque->btpo_flags & BTP_META);
metad = BTPageGetMeta(metapg);
/* if no root page initialized yet, do it */
if (metad->btm_root == P_NONE) {
/* turn our read lock in for a write lock */
_bt_relbuf(rel, metabuf, BT_READ);
metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_WRITE);
metapg = BufferGetPage(metabuf);
metaopaque = (BTPageOpaque) PageGetSpecialPointer(metapg);
Assert(metaopaque->btpo_flags & BTP_META);
metad = BTPageGetMeta(metapg);
/*
* Race condition: if someone else initialized the metadata between
* the time we released the read lock and acquired the write lock,
* above, we want to avoid doing it again.
*/
if (metad->btm_root == P_NONE) {
/*
* Get, initialize, write, and leave a lock of the appropriate
* type on the new root page. Since this is the first page in
* the tree, it's a leaf.
*/
rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE);
rootblkno = BufferGetBlockNumber(rootbuf);
rootpg = BufferGetPage(rootbuf);
metad->btm_root = rootblkno;
_bt_pageinit(rootpg, BufferGetPageSize(rootbuf));
rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpg);
rootopaque->btpo_flags |= (BTP_LEAF | BTP_ROOT);
_bt_wrtnorelbuf(rel, rootbuf);
/* swap write lock for read lock, if appropriate */
if (access != BT_WRITE) {
_bt_setpagelock(rel, rootblkno, BT_READ);
_bt_unsetpagelock(rel, rootblkno, BT_WRITE);
}
/* okay, metadata is correct */
_bt_wrtbuf(rel, metabuf);
} else {
/*
* Metadata initialized by someone else. In order to guarantee
* no deadlocks, we have to release the metadata page and start
* all over again.
*/
_bt_relbuf(rel, metabuf, BT_WRITE);
return (_bt_getroot(rel, access));
}
} else {
rootbuf = _bt_getbuf(rel, metad->btm_root, access);
/* done with the meta page */
_bt_relbuf(rel, metabuf, BT_READ);
}
/*
* Race condition: If the root page split between the time we looked
* at the metadata page and got the root buffer, then we got the wrong
* buffer.
*/
rootpg = BufferGetPage(rootbuf);
rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpg);
if (!(rootopaque->btpo_flags & BTP_ROOT)) {
/* it happened, try again */
_bt_relbuf(rel, rootbuf, access);
return (_bt_getroot(rel, access));
}
/*
* By here, we have a correct lock on the root block, its reference
* count is correct, and we have no lock set on the metadata page.
* Return the root block.
*/
return (rootbuf);
}
/*
* _bt_getbuf() -- Get a buffer by block number for read or write.
*
* When this routine returns, the appropriate lock is set on the
* requested buffer its reference count is correct.
*/
Buffer
_bt_getbuf(Relation rel, BlockNumber blkno, int access)
{
Buffer buf;
Page page;
/*
* If we want a new block, we can't set a lock of the appropriate type
* until we've instantiated the buffer.
*/
if (blkno != P_NEW) {
if (access == BT_WRITE)
_bt_setpagelock(rel, blkno, BT_WRITE);
else
_bt_setpagelock(rel, blkno, BT_READ);
buf = ReadBuffer(rel, blkno);
} else {
buf = ReadBuffer(rel, blkno);
blkno = BufferGetBlockNumber(buf);
page = BufferGetPage(buf);
_bt_pageinit(page, BufferGetPageSize(buf));
if (access == BT_WRITE)
_bt_setpagelock(rel, blkno, BT_WRITE);
else
_bt_setpagelock(rel, blkno, BT_READ);
}
/* ref count and lock type are correct */
return (buf);
}
/*
* _bt_relbuf() -- release a locked buffer.
*/
void
_bt_relbuf(Relation rel, Buffer buf, int access)
{
BlockNumber blkno;
blkno = BufferGetBlockNumber(buf);
/* access had better be one of read or write */
if (access == BT_WRITE)
_bt_unsetpagelock(rel, blkno, BT_WRITE);
else
_bt_unsetpagelock(rel, blkno, BT_READ);
ReleaseBuffer(buf);
}
/*
* _bt_wrtbuf() -- write a btree page to disk.
*
* This routine releases the lock held on the buffer and our reference
* to it. It is an error to call _bt_wrtbuf() without a write lock
* or a reference to the buffer.
*/
void
_bt_wrtbuf(Relation rel, Buffer buf)
{
BlockNumber blkno;
blkno = BufferGetBlockNumber(buf);
WriteBuffer(buf);
_bt_unsetpagelock(rel, blkno, BT_WRITE);
}
/*
* _bt_wrtnorelbuf() -- write a btree page to disk, but do not release
* our reference or lock.
*
* It is an error to call _bt_wrtnorelbuf() without a write lock
* or a reference to the buffer.
*/
void
_bt_wrtnorelbuf(Relation rel, Buffer buf)
{
BlockNumber blkno;
blkno = BufferGetBlockNumber(buf);
WriteNoReleaseBuffer(buf);
}
/*
* _bt_pageinit() -- Initialize a new page.
*/
void
_bt_pageinit(Page page, Size size)
{
/*
* Cargo-cult programming -- don't really need this to be zero, but
* creating new pages is an infrequent occurrence and it makes me feel
* good when I know they're empty.
*/
memset(page, 0, size);
PageInit(page, size, sizeof(BTPageOpaqueData));
}
/*
* _bt_metaproot() -- Change the root page of the btree.
*
* Lehman and Yao require that the root page move around in order to
* guarantee deadlock-free short-term, fine-granularity locking. When
* we split the root page, we record the new parent in the metadata page
* for the relation. This routine does the work.
*
* No direct preconditions, but if you don't have the a write lock on
* at least the old root page when you call this, you're making a big
* mistake. On exit, metapage data is correct and we no longer have
* a reference to or lock on the metapage.
*/
void
_bt_metaproot(Relation rel, BlockNumber rootbknum)
{
Buffer metabuf;
Page metap;
BTPageOpaque metaopaque;
BTMetaPageData *metad;
metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_WRITE);
metap = BufferGetPage(metabuf);
metaopaque = (BTPageOpaque) PageGetSpecialPointer(metap);
Assert(metaopaque->btpo_flags & BTP_META);
metad = BTPageGetMeta(metap);
metad->btm_root = rootbknum;
_bt_wrtbuf(rel, metabuf);
}
/*
* _bt_getstackbuf() -- Walk back up the tree one step, and find the item
* we last looked at in the parent.
*
* This is possible because we save a bit image of the last item
* we looked at in the parent, and the update algorithm guarantees
* that if items above us in the tree move, they only move right.
*/
Buffer
_bt_getstackbuf(Relation rel, BTStack stack, int access)
{
Buffer buf;
BlockNumber blkno;
OffsetNumber start, offnum, maxoff;
OffsetNumber i;
Page page;
ItemId itemid;
BTItem item;
BTPageOpaque opaque;
blkno = stack->bts_blkno;
buf = _bt_getbuf(rel, blkno, access);
page = BufferGetPage(buf);
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
maxoff = PageGetMaxOffsetNumber(page);
if (maxoff >= stack->bts_offset) {
itemid = PageGetItemId(page, stack->bts_offset);
item = (BTItem) PageGetItem(page, itemid);
/* if the item is where we left it, we're done */
if (item->bti_oid == stack->bts_btitem->bti_oid)
return (buf);
/* if the item has just moved right on this page, we're done */
for (i = OffsetNumberNext(stack->bts_offset);
i <= maxoff;
i = OffsetNumberNext(i)) {
itemid = PageGetItemId(page, i);
item = (BTItem) PageGetItem(page, itemid);
/* if the item is where we left it, we're done */
if (item->bti_oid == stack->bts_btitem->bti_oid)
return (buf);
}
}
/* by here, the item we're looking for moved right at least one page */
for (;;) {
blkno = opaque->btpo_next;
if (P_RIGHTMOST(opaque))
elog(FATAL, "my bits moved right off the end of the world!");
_bt_relbuf(rel, buf, access);
buf = _bt_getbuf(rel, blkno, access);
page = BufferGetPage(buf);
maxoff = PageGetMaxOffsetNumber(page);
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
/* if we have a right sibling, step over the high key */
start = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
/* see if it's on this page */
for (offnum = start;
offnum <= maxoff;
offnum = OffsetNumberNext(offnum)) {
itemid = PageGetItemId(page, offnum);
item = (BTItem) PageGetItem(page, itemid);
if (item->bti_oid == stack->bts_btitem->bti_oid)
return (buf);
}
}
}
void
_bt_setpagelock(Relation rel, BlockNumber blkno, int access)
{
ItemPointerData iptr;
if (USELOCKING) {
ItemPointerSet(&iptr, blkno, P_HIKEY);
if (access == BT_WRITE)
RelationSetSingleWLockPage(rel, &iptr);
else
RelationSetSingleRLockPage(rel, &iptr);
}
}
void
_bt_unsetpagelock(Relation rel, BlockNumber blkno, int access)
{
ItemPointerData iptr;
if (USELOCKING) {
ItemPointerSet(&iptr, blkno, P_HIKEY);
if (access == BT_WRITE)
RelationUnsetSingleWLockPage(rel, &iptr);
else
RelationUnsetSingleRLockPage(rel, &iptr);
}
}
void
_bt_pagedel(Relation rel, ItemPointer tid)
{
Buffer buf;
Page page;
BlockNumber blkno;
OffsetNumber offno;
blkno = ItemPointerGetBlockNumber(tid);
offno = ItemPointerGetOffsetNumber(tid);
buf = _bt_getbuf(rel, blkno, BT_WRITE);
page = BufferGetPage(buf);
PageIndexTupleDelete(page, offno);
/* write the buffer and release the lock */
_bt_wrtbuf(rel, buf);
}

View File

@ -0,0 +1,516 @@
/*-------------------------------------------------------------------------
*
* btree.c--
* Implementation of Lehman and Yao's btree management algorithm for
* Postgres.
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.1.1.1 1996/07/09 06:21:12 scrappy Exp $
*
* NOTES
* This file contains only the public interface routines.
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "storage/bufmgr.h"
#include "storage/bufpage.h"
#include "utils/elog.h"
#include "utils/palloc.h"
#include "utils/rel.h"
#include "utils/excid.h"
#include "access/heapam.h"
#include "access/genam.h"
#include "access/sdir.h"
#include "access/nbtree.h"
#include "access/funcindex.h"
#include "nodes/execnodes.h"
#include "nodes/plannodes.h"
#include "executor/executor.h"
#include "executor/tuptable.h"
#include "catalog/index.h"
bool BuildingBtree = false;
bool FastBuild = false; /* turn this on to make bulk builds work*/
/*
* btbuild() -- build a new btree index.
*
* We use a global variable to record the fact that we're creating
* a new index. This is used to avoid high-concurrency locking,
* since the index won't be visible until this transaction commits
* and since building is guaranteed to be single-threaded.
*/
void
btbuild(Relation heap,
Relation index,
int natts,
AttrNumber *attnum,
IndexStrategy istrat,
uint16 pcount,
Datum *params,
FuncIndexInfo *finfo,
PredInfo *predInfo)
{
HeapScanDesc hscan;
Buffer buffer;
HeapTuple htup;
IndexTuple itup;
TupleDesc htupdesc, itupdesc;
Datum *attdata;
bool *nulls;
InsertIndexResult res;
int nhtups, nitups;
int i;
BTItem btitem;
ExprContext *econtext;
TupleTable tupleTable;
TupleTableSlot *slot;
Oid hrelid, irelid;
Node *pred, *oldPred;
void *spool;
/* note that this is a new btree */
BuildingBtree = true;
pred = predInfo->pred;
oldPred = predInfo->oldPred;
/* initialize the btree index metadata page (if this is a new index) */
if (oldPred == NULL)
_bt_metapinit(index);
/* get tuple descriptors for heap and index relations */
htupdesc = RelationGetTupleDescriptor(heap);
itupdesc = RelationGetTupleDescriptor(index);
/* get space for data items that'll appear in the index tuple */
attdata = (Datum *) palloc(natts * sizeof(Datum));
nulls = (bool *) palloc(natts * sizeof(bool));
/*
* If this is a predicate (partial) index, we will need to evaluate the
* predicate using ExecQual, which requires the current tuple to be in a
* slot of a TupleTable. In addition, ExecQual must have an ExprContext
* referring to that slot. Here, we initialize dummy TupleTable and
* ExprContext objects for this purpose. --Nels, Feb '92
*/
#ifndef OMIT_PARTIAL_INDEX
if (pred != NULL || oldPred != NULL) {
tupleTable = ExecCreateTupleTable(1);
slot = ExecAllocTableSlot(tupleTable);
econtext = makeNode(ExprContext);
FillDummyExprContext(econtext, slot, htupdesc, InvalidBuffer);
}
#endif /* OMIT_PARTIAL_INDEX */
/* start a heap scan */
hscan = heap_beginscan(heap, 0, NowTimeQual, 0, (ScanKey) NULL);
htup = heap_getnext(hscan, 0, &buffer);
/* build the index */
nhtups = nitups = 0;
if (FastBuild) {
spool = _bt_spoolinit(index, 7);
res = (InsertIndexResult) NULL;
}
for (; HeapTupleIsValid(htup); htup = heap_getnext(hscan, 0, &buffer)) {
nhtups++;
/*
* If oldPred != NULL, this is an EXTEND INDEX command, so skip
* this tuple if it was already in the existing partial index
*/
if (oldPred != NULL) {
#ifndef OMIT_PARTIAL_INDEX
/*SetSlotContents(slot, htup);*/
slot->val = htup;
if (ExecQual((List*)oldPred, econtext) == true) {
nitups++;
continue;
}
#endif /* OMIT_PARTIAL_INDEX */
}
/* Skip this tuple if it doesn't satisfy the partial-index predicate */
if (pred != NULL) {
#ifndef OMIT_PARTIAL_INDEX
/* SetSlotContents(slot, htup); */
slot->val = htup;
if (ExecQual((List*)pred, econtext) == false)
continue;
#endif /* OMIT_PARTIAL_INDEX */
}
nitups++;
/*
* For the current heap tuple, extract all the attributes
* we use in this index, and note which are null.
*/
for (i = 1; i <= natts; i++) {
int attoff;
bool attnull;
/*
* Offsets are from the start of the tuple, and are
* zero-based; indices are one-based. The next call
* returns i - 1. That's data hiding for you.
*/
attoff = AttrNumberGetAttrOffset(i);
attdata[attoff] = GetIndexValue(htup,
htupdesc,
attoff,
attnum,
finfo,
&attnull,
buffer);
nulls[attoff] = (attnull ? 'n' : ' ');
}
/* form an index tuple and point it at the heap tuple */
itup = index_formtuple(itupdesc, attdata, nulls);
/*
* If the single index key is null, we don't insert it into
* the index. Btrees support scans on <, <=, =, >=, and >.
* Relational algebra says that A op B (where op is one of the
* operators above) returns null if either A or B is null. This
* means that no qualification used in an index scan could ever
* return true on a null attribute. It also means that indices
* can't be used by ISNULL or NOTNULL scans, but that's an
* artifact of the strategy map architecture chosen in 1986, not
* of the way nulls are handled here.
*/
if (itup->t_info & INDEX_NULL_MASK) {
pfree(itup);
continue;
}
itup->t_tid = htup->t_ctid;
btitem = _bt_formitem(itup);
/*
* if we are doing bottom-up btree build, we insert the index
* into a spool page for subsequent processing. otherwise, we
* insert into the btree.
*/
if (FastBuild) {
_bt_spool(index, btitem, spool);
} else {
res = _bt_doinsert(index, btitem);
}
pfree(btitem);
pfree(itup);
if (res) {
pfree(res);
}
}
/* okay, all heap tuples are indexed */
heap_endscan(hscan);
if (pred != NULL || oldPred != NULL) {
#ifndef OMIT_PARTIAL_INDEX
ExecDestroyTupleTable(tupleTable, true);
pfree(econtext);
#endif /* OMIT_PARTIAL_INDEX */
}
/*
* if we are doing bottom-up btree build, we now have a bunch of
* sorted runs in the spool pages. finish the build by (1)
* merging the runs, (2) inserting the sorted tuples into btree
* pages and (3) building the upper levels.
*/
if (FastBuild) {
_bt_spool(index, (BTItem) NULL, spool); /* flush spool */
_bt_leafbuild(index, spool);
_bt_spooldestroy(spool);
}
/*
* Since we just counted the tuples in the heap, we update its
* stats in pg_class to guarantee that the planner takes advantage
* of the index we just created. Finally, only update statistics
* during normal index definitions, not for indices on system catalogs
* created during bootstrap processing. We must close the relations
* before updatings statistics to guarantee that the relcache entries
* are flushed when we increment the command counter in UpdateStats().
*/
if (IsNormalProcessingMode())
{
hrelid = heap->rd_id;
irelid = index->rd_id;
heap_close(heap);
index_close(index);
UpdateStats(hrelid, nhtups, true);
UpdateStats(irelid, nitups, false);
if (oldPred != NULL) {
if (nitups == nhtups) pred = NULL;
UpdateIndexPredicate(irelid, oldPred, pred);
}
}
/* be tidy */
pfree(nulls);
pfree(attdata);
/* all done */
BuildingBtree = false;
}
/*
* btinsert() -- insert an index tuple into a btree.
*
* Descend the tree recursively, find the appropriate location for our
* new tuple, put it there, set its unique OID as appropriate, and
* return an InsertIndexResult to the caller.
*/
InsertIndexResult
btinsert(Relation rel, IndexTuple itup)
{
BTItem btitem;
InsertIndexResult res;
if (itup->t_info & INDEX_NULL_MASK)
return ((InsertIndexResult) NULL);
btitem = _bt_formitem(itup);
res = _bt_doinsert(rel, btitem);
pfree(btitem);
return (res);
}
/*
* btgettuple() -- Get the next tuple in the scan.
*/
char *
btgettuple(IndexScanDesc scan, ScanDirection dir)
{
RetrieveIndexResult res;
/*
* If we've already initialized this scan, we can just advance it
* in the appropriate direction. If we haven't done so yet, we
* call a routine to get the first item in the scan.
*/
if (ItemPointerIsValid(&(scan->currentItemData)))
res = _bt_next(scan, dir);
else
res = _bt_first(scan, dir);
return ((char *) res);
}
/*
* btbeginscan() -- start a scan on a btree index
*/
char *
btbeginscan(Relation rel, bool fromEnd, uint16 keysz, ScanKey scankey)
{
IndexScanDesc scan;
StrategyNumber strat;
BTScanOpaque so;
/* first order the keys in the qualification */
if (keysz > 1)
_bt_orderkeys(rel, &keysz, scankey);
/* now get the scan */
scan = RelationGetIndexScan(rel, fromEnd, keysz, scankey);
so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData));
so->btso_curbuf = so->btso_mrkbuf = InvalidBuffer;
scan->opaque = so;
/* finally, be sure that the scan exploits the tree order */
scan->scanFromEnd = false;
scan->flags = 0x0;
if (keysz > 0) {
strat = _bt_getstrat(scan->relation, 1 /* XXX */,
scankey[0].sk_procedure);
if (strat == BTLessStrategyNumber
|| strat == BTLessEqualStrategyNumber)
scan->scanFromEnd = true;
} else {
scan->scanFromEnd = true;
}
/* register scan in case we change pages it's using */
_bt_regscan(scan);
return ((char *) scan);
}
/*
* btrescan() -- rescan an index relation
*/
void
btrescan(IndexScanDesc scan, bool fromEnd, ScanKey scankey)
{
ItemPointer iptr;
BTScanOpaque so;
so = (BTScanOpaque) scan->opaque;
/* we hold a read lock on the current page in the scan */
if (ItemPointerIsValid(iptr = &(scan->currentItemData))) {
_bt_relbuf(scan->relation, so->btso_curbuf, BT_READ);
so->btso_curbuf = InvalidBuffer;
ItemPointerSetInvalid(iptr);
}
/* and we hold a read lock on the last marked item in the scan */
if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) {
_bt_relbuf(scan->relation, so->btso_mrkbuf, BT_READ);
so->btso_mrkbuf = InvalidBuffer;
ItemPointerSetInvalid(iptr);
}
/* reset the scan key */
if (scan->numberOfKeys > 0) {
memmove(scan->keyData,
scankey,
scan->numberOfKeys * sizeof(ScanKeyData));
}
}
void
btmovescan(IndexScanDesc scan, Datum v)
{
ItemPointer iptr;
BTScanOpaque so;
so = (BTScanOpaque) scan->opaque;
/* release any locks we still hold */
if (ItemPointerIsValid(iptr = &(scan->currentItemData))) {
_bt_relbuf(scan->relation, so->btso_curbuf, BT_READ);
so->btso_curbuf = InvalidBuffer;
ItemPointerSetInvalid(iptr);
}
scan->keyData[0].sk_argument = v;
}
/*
* btendscan() -- close down a scan
*/
void
btendscan(IndexScanDesc scan)
{
ItemPointer iptr;
BTScanOpaque so;
so = (BTScanOpaque) scan->opaque;
/* release any locks we still hold */
if (ItemPointerIsValid(iptr = &(scan->currentItemData))) {
if (BufferIsValid(so->btso_curbuf))
_bt_relbuf(scan->relation, so->btso_curbuf, BT_READ);
so->btso_curbuf = InvalidBuffer;
ItemPointerSetInvalid(iptr);
}
if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) {
if (BufferIsValid(so->btso_mrkbuf))
_bt_relbuf(scan->relation, so->btso_mrkbuf, BT_READ);
so->btso_mrkbuf = InvalidBuffer;
ItemPointerSetInvalid(iptr);
}
/* don't need scan registered anymore */
_bt_dropscan(scan);
/* be tidy */
#ifdef PERFECT_MMGR
pfree (scan->opaque);
#endif /* PERFECT_MMGR */
}
/*
* btmarkpos() -- save current scan position
*/
void
btmarkpos(IndexScanDesc scan)
{
ItemPointer iptr;
BTScanOpaque so;
so = (BTScanOpaque) scan->opaque;
/* release lock on old marked data, if any */
if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) {
_bt_relbuf(scan->relation, so->btso_mrkbuf, BT_READ);
so->btso_mrkbuf = InvalidBuffer;
ItemPointerSetInvalid(iptr);
}
/* bump lock on currentItemData and copy to currentMarkData */
if (ItemPointerIsValid(&(scan->currentItemData))) {
so->btso_mrkbuf = _bt_getbuf(scan->relation,
BufferGetBlockNumber(so->btso_curbuf),
BT_READ);
scan->currentMarkData = scan->currentItemData;
}
}
/*
* btrestrpos() -- restore scan to last saved position
*/
void
btrestrpos(IndexScanDesc scan)
{
ItemPointer iptr;
BTScanOpaque so;
so = (BTScanOpaque) scan->opaque;
/* release lock on current data, if any */
if (ItemPointerIsValid(iptr = &(scan->currentItemData))) {
_bt_relbuf(scan->relation, so->btso_curbuf, BT_READ);
so->btso_curbuf = InvalidBuffer;
ItemPointerSetInvalid(iptr);
}
/* bump lock on currentMarkData and copy to currentItemData */
if (ItemPointerIsValid(&(scan->currentMarkData))) {
so->btso_curbuf = _bt_getbuf(scan->relation,
BufferGetBlockNumber(so->btso_mrkbuf),
BT_READ);
scan->currentItemData = scan->currentMarkData;
}
}
/* stubs */
void
btdelete(Relation rel, ItemPointer tid)
{
/* adjust any active scans that will be affected by this deletion */
_bt_adjscans(rel, tid);
/* delete the data from the page */
_bt_pagedel(rel, tid);
}

View File

@ -0,0 +1,164 @@
/*-------------------------------------------------------------------------
*
* btscan.c--
* manage scans on btrees.
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/Attic/nbtscan.c,v 1.1.1.1 1996/07/09 06:21:12 scrappy Exp $
*
*
* NOTES
* Because we can be doing an index scan on a relation while we update
* it, we need to avoid missing data that moves around in the index.
* The routines and global variables in this file guarantee that all
* scans in the local address space stay correctly positioned. This
* is all we need to worry about, since write locking guarantees that
* no one else will be on the same page at the same time as we are.
*
* The scheme is to manage a list of active scans in the current backend.
* Whenever we add or remove records from an index, or whenever we
* split a leaf page, we check the list of active scans to see if any
* has been affected. A scan is affected only if it is on the same
* relation, and the same page, as the update.
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "storage/bufmgr.h"
#include "storage/bufpage.h"
#include "utils/elog.h"
#include "utils/palloc.h"
#include "utils/rel.h"
#include "utils/excid.h"
#include "access/heapam.h"
#include "access/genam.h"
#include "access/sdir.h"
#include "access/nbtree.h"
typedef struct BTScanListData {
IndexScanDesc btsl_scan;
struct BTScanListData *btsl_next;
} BTScanListData;
typedef BTScanListData *BTScanList;
static BTScanList BTScans = (BTScanList) NULL;
/*
* _bt_regscan() -- register a new scan.
*/
void
_bt_regscan(IndexScanDesc scan)
{
BTScanList new_el;
new_el = (BTScanList) palloc(sizeof(BTScanListData));
new_el->btsl_scan = scan;
new_el->btsl_next = BTScans;
BTScans = new_el;
}
/*
* _bt_dropscan() -- drop a scan from the scan list
*/
void
_bt_dropscan(IndexScanDesc scan)
{
BTScanList chk, last;
last = (BTScanList) NULL;
for (chk = BTScans;
chk != (BTScanList) NULL && chk->btsl_scan != scan;
chk = chk->btsl_next) {
last = chk;
}
if (chk == (BTScanList) NULL)
elog(WARN, "btree scan list trashed; can't find 0x%lx", scan);
if (last == (BTScanList) NULL)
BTScans = chk->btsl_next;
else
last->btsl_next = chk->btsl_next;
#ifdef PERFECT_MEM
pfree (chk);
#endif /* PERFECT_MEM */
}
void
_bt_adjscans(Relation rel, ItemPointer tid)
{
BTScanList l;
Oid relid;
relid = rel->rd_id;
for (l = BTScans; l != (BTScanList) NULL; l = l->btsl_next) {
if (relid == l->btsl_scan->relation->rd_id)
_bt_scandel(l->btsl_scan, ItemPointerGetBlockNumber(tid),
ItemPointerGetOffsetNumber(tid));
}
}
void
_bt_scandel(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno)
{
ItemPointer current;
Buffer buf;
BTScanOpaque so;
if (!_bt_scantouched(scan, blkno, offno))
return;
so = (BTScanOpaque) scan->opaque;
buf = so->btso_curbuf;
current = &(scan->currentItemData);
if (ItemPointerIsValid(current)
&& ItemPointerGetBlockNumber(current) == blkno
&& ItemPointerGetOffsetNumber(current) >= offno) {
_bt_step(scan, &buf, BackwardScanDirection);
so->btso_curbuf = buf;
}
current = &(scan->currentMarkData);
if (ItemPointerIsValid(current)
&& ItemPointerGetBlockNumber(current) == blkno
&& ItemPointerGetOffsetNumber(current) >= offno) {
ItemPointerData tmp;
tmp = *current;
*current = scan->currentItemData;
scan->currentItemData = tmp;
_bt_step(scan, &buf, BackwardScanDirection);
so->btso_mrkbuf = buf;
tmp = *current;
*current = scan->currentItemData;
scan->currentItemData = tmp;
}
}
bool
_bt_scantouched(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno)
{
ItemPointer current;
current = &(scan->currentItemData);
if (ItemPointerIsValid(current)
&& ItemPointerGetBlockNumber(current) == blkno
&& ItemPointerGetOffsetNumber(current) >= offno)
return (true);
current = &(scan->currentMarkData);
if (ItemPointerIsValid(current)
&& ItemPointerGetBlockNumber(current) == blkno
&& ItemPointerGetOffsetNumber(current) >= offno)
return (true);
return (false);
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,134 @@
/*-------------------------------------------------------------------------
*
* btstrat.c--
* Srategy map entries for the btree indexed access method
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/Attic/nbtstrat.c,v 1.1.1.1 1996/07/09 06:21:12 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "storage/bufpage.h"
#include "utils/elog.h"
#include "utils/rel.h"
#include "utils/excid.h"
#include "access/genam.h"
#include "access/nbtree.h"
/*
* Note:
* StrategyNegate, StrategyCommute, and StrategyNegateCommute
* assume <, <=, ==, >=, > ordering.
*/
static StrategyNumber BTNegate[5] = {
BTGreaterEqualStrategyNumber,
BTGreaterStrategyNumber,
InvalidStrategy,
BTLessStrategyNumber,
BTLessEqualStrategyNumber
};
static StrategyNumber BTCommute[5] = {
BTGreaterStrategyNumber,
BTGreaterEqualStrategyNumber,
InvalidStrategy,
BTLessEqualStrategyNumber,
BTLessStrategyNumber
};
static StrategyNumber BTNegateCommute[5] = {
BTLessEqualStrategyNumber,
BTLessStrategyNumber,
InvalidStrategy,
BTGreaterStrategyNumber,
BTGreaterEqualStrategyNumber
};
static uint16 BTLessTermData[] = { /* XXX type clash */
2,
BTLessStrategyNumber,
SK_NEGATE,
BTLessStrategyNumber,
SK_NEGATE | SK_COMMUTE
};
static uint16 BTLessEqualTermData[] = { /* XXX type clash */
2,
BTLessEqualStrategyNumber,
0x0,
BTLessEqualStrategyNumber,
SK_COMMUTE
};
static uint16 BTGreaterEqualTermData[] = { /* XXX type clash */
2,
BTGreaterEqualStrategyNumber,
0x0,
BTGreaterEqualStrategyNumber,
SK_COMMUTE
};
static uint16 BTGreaterTermData[] = { /* XXX type clash */
2,
BTGreaterStrategyNumber,
SK_NEGATE,
BTGreaterStrategyNumber,
SK_NEGATE | SK_COMMUTE
};
static StrategyTerm BTEqualExpressionData[] = {
(StrategyTerm)BTLessTermData, /* XXX */
(StrategyTerm)BTLessEqualTermData, /* XXX */
(StrategyTerm)BTGreaterEqualTermData, /* XXX */
(StrategyTerm)BTGreaterTermData, /* XXX */
NULL
};
static StrategyEvaluationData BTEvaluationData = {
/* XXX static for simplicity */
BTMaxStrategyNumber,
(StrategyTransformMap)BTNegate, /* XXX */
(StrategyTransformMap)BTCommute, /* XXX */
(StrategyTransformMap)BTNegateCommute, /* XXX */
{ NULL, NULL, (StrategyExpression)BTEqualExpressionData, NULL, NULL,
NULL,NULL,NULL,NULL,NULL,NULL,NULL}
};
/* ----------------------------------------------------------------
* RelationGetBTStrategy
* ----------------------------------------------------------------
*/
StrategyNumber
_bt_getstrat(Relation rel,
AttrNumber attno,
RegProcedure proc)
{
StrategyNumber strat;
strat = RelationGetStrategy(rel, attno, &BTEvaluationData, proc);
Assert(StrategyNumberIsValid(strat));
return (strat);
}
bool
_bt_invokestrat(Relation rel,
AttrNumber attno,
StrategyNumber strat,
Datum left,
Datum right)
{
return (RelationInvokeStrategy(rel, &BTEvaluationData, attno, strat,
left, right));
}

View File

@ -0,0 +1,239 @@
/*-------------------------------------------------------------------------
*
* btutils.c--
* Utility code for Postgres btree implementation.
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtutils.c,v 1.1.1.1 1996/07/09 06:21:12 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#include <stdio.h>
#include "postgres.h"
#include "storage/bufmgr.h"
#include "storage/bufpage.h"
#include "fmgr.h"
#include "utils/elog.h"
#include "utils/palloc.h"
#include "utils/rel.h"
#include "utils/excid.h"
#include "utils/datum.h"
#include "access/heapam.h"
#include "access/genam.h"
#include "access/iqual.h"
#include "access/nbtree.h"
ScanKey
_bt_mkscankey(Relation rel, IndexTuple itup)
{
ScanKey skey;
TupleDesc itupdesc;
int natts;
int i;
Datum arg;
RegProcedure proc;
bool null;
natts = rel->rd_rel->relnatts;
itupdesc = RelationGetTupleDescriptor(rel);
skey = (ScanKey) palloc(natts * sizeof(ScanKeyData));
for (i = 0; i < natts; i++) {
arg = index_getattr(itup, i + 1, itupdesc, &null);
proc = index_getprocid(rel, i + 1, BTORDER_PROC);
ScanKeyEntryInitialize(&skey[i],
0x0, (AttrNumber) (i + 1), proc, arg);
}
return (skey);
}
void
_bt_freeskey(ScanKey skey)
{
pfree(skey);
}
void
_bt_freestack(BTStack stack)
{
BTStack ostack;
while (stack != (BTStack) NULL) {
ostack = stack;
stack = stack->bts_parent;
pfree(ostack->bts_btitem);
pfree(ostack);
}
}
/*
* _bt_orderkeys() -- Put keys in a sensible order for conjunctive quals.
*
* The order of the keys in the qual match the ordering imposed by
* the index. This routine only needs to be called if there are
* more than one qual clauses using this index.
*/
void
_bt_orderkeys(Relation relation, uint16 *numberOfKeys, ScanKey key)
{
ScanKey xform;
ScanKeyData *cur;
StrategyMap map;
int nbytes;
long test;
int i, j;
int init[BTMaxStrategyNumber+1];
/* haven't looked at any strategies yet */
for (i = 0; i <= BTMaxStrategyNumber; i++)
init[i] = 0;
/* get space for the modified array of keys */
nbytes = BTMaxStrategyNumber * sizeof(ScanKeyData);
xform = (ScanKey) palloc(nbytes);
memset(xform, 0, nbytes);
/* get the strategy map for this index/attribute pair */
/*
* XXX
* When we support multiple keys in a single index, this is what
* we'll want to do. At present, the planner is hosed, so we
* hard-wire the attribute number below. Postgres only does single-
* key indices...
* map = IndexStrategyGetStrategyMap(RelationGetIndexStrategy(relation),
* BTMaxStrategyNumber,
* key->data[0].attributeNumber);
*/
map = IndexStrategyGetStrategyMap(RelationGetIndexStrategy(relation),
BTMaxStrategyNumber,
1 /* XXX */ );
/* check each key passed in */
for (i = *numberOfKeys; --i >= 0; ) {
cur = &key[i];
for (j = BTMaxStrategyNumber; --j >= 0; ) {
if (cur->sk_procedure == map->entry[j].sk_procedure)
break;
}
/* have we seen one of these before? */
if (init[j]) {
/* yup, use the appropriate value */
test =
(long) FMGR_PTR2(cur->sk_func, cur->sk_procedure,
cur->sk_argument, xform[j].sk_argument);
if (test)
xform[j].sk_argument = cur->sk_argument;
} else {
/* nope, use this value */
memmove(&xform[j], cur, sizeof(*cur));
init[j] = 1;
}
}
/* if = has been specified, no other key will be used */
if (init[BTEqualStrategyNumber - 1]) {
init[BTLessStrategyNumber - 1] = 0;
init[BTLessEqualStrategyNumber - 1] = 0;
init[BTGreaterEqualStrategyNumber - 1] = 0;
init[BTGreaterStrategyNumber - 1] = 0;
}
/* only one of <, <= */
if (init[BTLessStrategyNumber - 1]
&& init[BTLessEqualStrategyNumber - 1]) {
ScanKeyData *lt, *le;
lt = &xform[BTLessStrategyNumber - 1];
le = &xform[BTLessEqualStrategyNumber - 1];
/*
* DO NOT use the cached function stuff here -- this is key
* ordering, happens only when the user expresses a hokey
* qualification, and gets executed only once, anyway. The
* transform maps are hard-coded, and can't be initialized
* in the correct way.
*/
test = (long) fmgr(le->sk_procedure, le->sk_argument, lt->sk_argument);
if (test)
init[BTLessEqualStrategyNumber - 1] = 0;
else
init[BTLessStrategyNumber - 1] = 0;
}
/* only one of >, >= */
if (init[BTGreaterStrategyNumber - 1]
&& init[BTGreaterEqualStrategyNumber - 1]) {
ScanKeyData *gt, *ge;
gt = &xform[BTGreaterStrategyNumber - 1];
ge = &xform[BTGreaterEqualStrategyNumber - 1];
/* see note above on function cache */
test = (long) fmgr(ge->sk_procedure, gt->sk_argument, gt->sk_argument);
if (test)
init[BTGreaterStrategyNumber - 1] = 0;
else
init[BTGreaterEqualStrategyNumber - 1] = 0;
}
/* okay, reorder and count */
j = 0;
for (i = BTMaxStrategyNumber; --i >= 0; )
if (init[i])
key[j++] = xform[i];
*numberOfKeys = j;
pfree(xform);
}
bool
_bt_checkqual(IndexScanDesc scan, IndexTuple itup)
{
if (scan->numberOfKeys > 0)
return (index_keytest(itup, RelationGetTupleDescriptor(scan->relation),
scan->numberOfKeys, scan->keyData));
else
return (true);
}
BTItem
_bt_formitem(IndexTuple itup)
{
int nbytes_btitem;
BTItem btitem;
Size tuplen;
extern Oid newoid();
/* disallow nulls in btree keys */
if (itup->t_info & INDEX_NULL_MASK)
elog(WARN, "btree indices cannot include null keys");
/* make a copy of the index tuple with room for the sequence number */
tuplen = IndexTupleSize(itup);
nbytes_btitem = tuplen +
(sizeof(BTItemData) - sizeof(IndexTupleData));
btitem = (BTItem) palloc(nbytes_btitem);
memmove((char *) &(btitem->bti_itup), (char *) itup, tuplen);
btitem->bti_oid = newoid();
return (btitem);
}

View File

@ -0,0 +1,26 @@
/*-------------------------------------------------------------------------
*
* printtup.h--
*
*
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: printtup.h,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef PRINTTUP_H
#define PRINTTUP_H
#include "access/htup.h"
#include "access/tupdesc.h"
extern Oid typtoout(Oid type);
extern void printtup(HeapTuple tuple, TupleDesc typeinfo);
extern void showatts(char *name, TupleDesc attinfo);
extern void debugtup(HeapTuple tuple, TupleDesc typeinfo);
extern void printtup_internal(HeapTuple tuple, TupleDesc typeinfo);
extern Oid gettypelem(Oid type);
#endif /* PRINTTUP_H */

View File

@ -0,0 +1,87 @@
/*-------------------------------------------------------------------------
*
* relscan.h--
* POSTGRES internal relation scan descriptor definitions.
*
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: relscan.h,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef RELSCAN_H
#define RELSCAN_H
#include "c.h"
#include "access/skey.h"
#include "storage/buf.h"
#include "access/htup.h"
#include "storage/itemptr.h"
#include "utils/tqual.h"
#include "utils/rel.h"
typedef ItemPointerData MarkData;
typedef struct HeapScanDescData {
Relation rs_rd; /* pointer to relation descriptor */
HeapTuple rs_ptup; /* previous tuple in scan */
HeapTuple rs_ctup; /* current tuple in scan */
HeapTuple rs_ntup; /* next tuple in scan */
Buffer rs_pbuf; /* previous buffer in scan */
Buffer rs_cbuf; /* current buffer in scan */
Buffer rs_nbuf; /* next buffer in scan */
ItemPointerData rs_mptid; /* marked previous tid */
ItemPointerData rs_mctid; /* marked current tid */
ItemPointerData rs_mntid; /* marked next tid */
ItemPointerData rs_mcd; /* marked current delta XXX ??? */
bool rs_atend; /* restart scan at end? */
TimeQual rs_tr; /* time qualification */
uint16 rs_cdelta; /* current delta in chain */
uint16 rs_nkeys; /* number of attributes in keys */
ScanKey rs_key; /* key descriptors */
} HeapScanDescData;
typedef HeapScanDescData *HeapScanDesc;
typedef struct IndexScanDescData {
Relation relation; /* relation descriptor */
void *opaque; /* am-specific slot */
ItemPointerData previousItemData; /* previous index pointer */
ItemPointerData currentItemData; /* current index pointer */
ItemPointerData nextItemData; /* next index pointer */
MarkData previousMarkData; /* marked previous pointer */
MarkData currentMarkData; /* marked current pointer */
MarkData nextMarkData; /* marked next pointer */
uint8 flags; /* scan position flags */
bool scanFromEnd; /* restart scan at end? */
uint16 numberOfKeys; /* number of key attributes */
ScanKey keyData; /* key descriptor */
} IndexScanDescData;
typedef IndexScanDescData *IndexScanDesc;
/* ----------------
* IndexScanDescPtr is used in the executor where we have to
* keep track of several index scans when using several indices
* - cim 9/10/89
* ----------------
*/
typedef IndexScanDesc *IndexScanDescPtr;
/*
* HeapScanIsValid --
* True iff the heap scan is valid.
*/
#define HeapScanIsValid(scan) PointerIsValid(scan)
/*
* IndexScanIsValid --
* True iff the index scan is valid.
*/
#define IndexScanIsValid(scan) PointerIsValid(scan)
#endif /* RELSCAN_H */

View File

@ -0,0 +1,98 @@
/*-------------------------------------------------------------------------
*
* rtree.h--
* common declarations for the rtree access method code.
*
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: rtree.h,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef RTREE_H
#define RTREE_H
/* see rtstrat.c for what all this is about */
#define RTNStrategies 8
#define RTLeftStrategyNumber 1
#define RTOverLeftStrategyNumber 2
#define RTOverlapStrategyNumber 3
#define RTOverRightStrategyNumber 4
#define RTRightStrategyNumber 5
#define RTSameStrategyNumber 6
#define RTContainsStrategyNumber 7
#define RTContainedByStrategyNumber 8
#define RTNProcs 3
#define RT_UNION_PROC 1
#define RT_INTER_PROC 2
#define RT_SIZE_PROC 3
#define F_LEAF (1 << 0)
typedef struct RTreePageOpaqueData {
uint32 flags;
} RTreePageOpaqueData;
typedef RTreePageOpaqueData *RTreePageOpaque;
/*
* When we descend a tree, we keep a stack of parent pointers.
*/
typedef struct RTSTACK {
struct RTSTACK *rts_parent;
OffsetNumber rts_child;
BlockNumber rts_blk;
} RTSTACK;
/*
* When we're doing a scan, we need to keep track of the parent stack
* for the marked and current items. Also, rtrees have the following
* property: if you're looking for the box (1,1,2,2), on the internal
* nodes you have to search for all boxes that *contain* (1,1,2,2), and
* not the ones that match it. We have a private scan key for internal
* nodes in the opaque structure for rtrees for this reason. See
* access/index-rtree/rtscan.c and rtstrat.c for how it gets initialized.
*/
typedef struct RTreeScanOpaqueData {
struct RTSTACK *s_stack;
struct RTSTACK *s_markstk;
uint16 s_flags;
uint16 s_internalNKey;
ScanKey s_internalKey;
} RTreeScanOpaqueData;
typedef RTreeScanOpaqueData *RTreeScanOpaque;
/*
* When we're doing a scan and updating a tree at the same time, the
* updates may affect the scan. We use the flags entry of the scan's
* opaque space to record our actual position in response to updates
* that we can't handle simply by adjusting pointers.
*/
#define RTS_CURBEFORE ((uint16) (1 << 0))
#define RTS_MRKBEFORE ((uint16) (1 << 1))
/* root page of an rtree */
#define P_ROOT 0
/*
* When we update a relation on which we're doing a scan, we need to
* check the scan and fix it if the update affected any of the pages it
* touches. Otherwise, we can miss records that we should see. The only
* times we need to do this are for deletions and splits. See the code in
* rtscan.c for how the scan is fixed. These two contants tell us what sort
* of operation changed the index.
*/
#define RTOP_DEL 0
#define RTOP_SPLIT 1
/* defined in rtree.c */
extern void freestack(RTSTACK *s);
#endif /* RTREE_H */

View File

@ -0,0 +1,14 @@
#-------------------------------------------------------------------------
#
# Makefile.inc--
# Makefile for access/rtree (R-Tree access method)
#
# Copyright (c) 1994, Regents of the University of California
#
#
# IDENTIFICATION
# $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/Makefile.inc,v 1.1.1.1 1996/07/09 06:21:12 scrappy Exp $
#
#-------------------------------------------------------------------------
SUBSRCS+= rtget.c rtproc.c rtree.c rtscan.c rtstrat.c

View File

@ -0,0 +1,320 @@
/*-------------------------------------------------------------------------
*
* rtget.c--
* fetch tuples from an rtree scan.
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtget.c,v 1.1.1.1 1996/07/09 06:21:13 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "storage/bufmgr.h"
#include "storage/bufpage.h"
#include "utils/elog.h"
#include "utils/palloc.h"
#include "utils/rel.h"
#include "access/heapam.h"
#include "access/genam.h"
#include "access/iqual.h"
#include "access/rtree.h"
#include "access/sdir.h"
static OffsetNumber findnext(IndexScanDesc s, Page p, OffsetNumber n,
ScanDirection dir);
static RetrieveIndexResult rtscancache(IndexScanDesc s, ScanDirection dir);
static RetrieveIndexResult rtfirst(IndexScanDesc s, ScanDirection dir);
static RetrieveIndexResult rtnext(IndexScanDesc s, ScanDirection dir);
static ItemPointer rtheapptr(Relation r, ItemPointer itemp);
RetrieveIndexResult
rtgettuple(IndexScanDesc s, ScanDirection dir)
{
RetrieveIndexResult res;
/* if we have it cached in the scan desc, just return the value */
if ((res = rtscancache(s, dir)) != (RetrieveIndexResult) NULL)
return (res);
/* not cached, so we'll have to do some work */
if (ItemPointerIsValid(&(s->currentItemData))) {
res = rtnext(s, dir);
} else {
res = rtfirst(s, dir);
}
return (res);
}
static RetrieveIndexResult
rtfirst(IndexScanDesc s, ScanDirection dir)
{
Buffer b;
Page p;
OffsetNumber n;
OffsetNumber maxoff;
RetrieveIndexResult res;
RTreePageOpaque po;
RTreeScanOpaque so;
RTSTACK *stk;
BlockNumber blk;
IndexTuple it;
ItemPointer ip;
b = ReadBuffer(s->relation, P_ROOT);
p = BufferGetPage(b);
po = (RTreePageOpaque) PageGetSpecialPointer(p);
so = (RTreeScanOpaque) s->opaque;
for (;;) {
maxoff = PageGetMaxOffsetNumber(p);
if (ScanDirectionIsBackward(dir))
n = findnext(s, p, maxoff, dir);
else
n = findnext(s, p, FirstOffsetNumber, dir);
while (n < FirstOffsetNumber || n > maxoff) {
ReleaseBuffer(b);
if (so->s_stack == (RTSTACK *) NULL)
return ((RetrieveIndexResult) NULL);
stk = so->s_stack;
b = ReadBuffer(s->relation, stk->rts_blk);
p = BufferGetPage(b);
po = (RTreePageOpaque) PageGetSpecialPointer(p);
maxoff = PageGetMaxOffsetNumber(p);
if (ScanDirectionIsBackward(dir)) {
n = OffsetNumberPrev(stk->rts_child);
} else {
n = OffsetNumberNext(stk->rts_child);
}
so->s_stack = stk->rts_parent;
pfree(stk);
n = findnext(s, p, n, dir);
}
if (po->flags & F_LEAF) {
ItemPointerSet(&(s->currentItemData), BufferGetBlockNumber(b), n);
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
ip = (ItemPointer) palloc(sizeof(ItemPointerData));
memmove((char *) ip, (char *) &(it->t_tid),
sizeof(ItemPointerData));
ReleaseBuffer(b);
res = FormRetrieveIndexResult(&(s->currentItemData), ip);
return (res);
} else {
stk = (RTSTACK *) palloc(sizeof(RTSTACK));
stk->rts_child = n;
stk->rts_blk = BufferGetBlockNumber(b);
stk->rts_parent = so->s_stack;
so->s_stack = stk;
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
blk = ItemPointerGetBlockNumber(&(it->t_tid));
ReleaseBuffer(b);
b = ReadBuffer(s->relation, blk);
p = BufferGetPage(b);
po = (RTreePageOpaque) PageGetSpecialPointer(p);
}
}
}
static RetrieveIndexResult
rtnext(IndexScanDesc s, ScanDirection dir)
{
Buffer b;
Page p;
OffsetNumber n;
OffsetNumber maxoff;
RetrieveIndexResult res;
RTreePageOpaque po;
RTreeScanOpaque so;
RTSTACK *stk;
BlockNumber blk;
IndexTuple it;
ItemPointer ip;
blk = ItemPointerGetBlockNumber(&(s->currentItemData));
n = ItemPointerGetOffsetNumber(&(s->currentItemData));
if (ScanDirectionIsForward(dir)) {
n = OffsetNumberNext(n);
} else {
n = OffsetNumberPrev(n);
}
b = ReadBuffer(s->relation, blk);
p = BufferGetPage(b);
po = (RTreePageOpaque) PageGetSpecialPointer(p);
so = (RTreeScanOpaque) s->opaque;
for (;;) {
maxoff = PageGetMaxOffsetNumber(p);
n = findnext(s, p, n, dir);
while (n < FirstOffsetNumber || n > maxoff) {
ReleaseBuffer(b);
if (so->s_stack == (RTSTACK *) NULL)
return ((RetrieveIndexResult) NULL);
stk = so->s_stack;
b = ReadBuffer(s->relation, stk->rts_blk);
p = BufferGetPage(b);
maxoff = PageGetMaxOffsetNumber(p);
po = (RTreePageOpaque) PageGetSpecialPointer(p);
if (ScanDirectionIsBackward(dir)) {
n = OffsetNumberPrev(stk->rts_child);
} else {
n = OffsetNumberNext(stk->rts_child);
}
so->s_stack = stk->rts_parent;
pfree(stk);
n = findnext(s, p, n, dir);
}
if (po->flags & F_LEAF) {
ItemPointerSet(&(s->currentItemData), BufferGetBlockNumber(b), n);
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
ip = (ItemPointer) palloc(sizeof(ItemPointerData));
memmove((char *) ip, (char *) &(it->t_tid),
sizeof(ItemPointerData));
ReleaseBuffer(b);
res = FormRetrieveIndexResult(&(s->currentItemData), ip);
return (res);
} else {
stk = (RTSTACK *) palloc(sizeof(RTSTACK));
stk->rts_child = n;
stk->rts_blk = BufferGetBlockNumber(b);
stk->rts_parent = so->s_stack;
so->s_stack = stk;
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
blk = ItemPointerGetBlockNumber(&(it->t_tid));
ReleaseBuffer(b);
b = ReadBuffer(s->relation, blk);
p = BufferGetPage(b);
po = (RTreePageOpaque) PageGetSpecialPointer(p);
if (ScanDirectionIsBackward(dir)) {
n = PageGetMaxOffsetNumber(p);
} else {
n = FirstOffsetNumber;
}
}
}
}
static OffsetNumber
findnext(IndexScanDesc s, Page p, OffsetNumber n, ScanDirection dir)
{
OffsetNumber maxoff;
IndexTuple it;
RTreePageOpaque po;
RTreeScanOpaque so;
maxoff = PageGetMaxOffsetNumber(p);
po = (RTreePageOpaque) PageGetSpecialPointer(p);
so = (RTreeScanOpaque) s->opaque;
/*
* If we modified the index during the scan, we may have a pointer to
* a ghost tuple, before the scan. If this is the case, back up one.
*/
if (so->s_flags & RTS_CURBEFORE) {
so->s_flags &= ~RTS_CURBEFORE;
n = OffsetNumberPrev(n);
}
while (n >= FirstOffsetNumber && n <= maxoff) {
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
if (po->flags & F_LEAF) {
if (index_keytest(it,
RelationGetTupleDescriptor(s->relation),
s->numberOfKeys, s->keyData))
break;
} else {
if (index_keytest(it,
RelationGetTupleDescriptor(s->relation),
so->s_internalNKey, so->s_internalKey))
break;
}
if (ScanDirectionIsBackward(dir)) {
n = OffsetNumberPrev(n);
} else {
n = OffsetNumberNext(n);
}
}
return (n);
}
static RetrieveIndexResult
rtscancache(IndexScanDesc s, ScanDirection dir)
{
RetrieveIndexResult res;
ItemPointer ip;
if (!(ScanDirectionIsNoMovement(dir)
&& ItemPointerIsValid(&(s->currentItemData)))) {
return ((RetrieveIndexResult) NULL);
}
ip = rtheapptr(s->relation, &(s->currentItemData));
if (ItemPointerIsValid(ip))
res = FormRetrieveIndexResult(&(s->currentItemData), ip);
else
res = (RetrieveIndexResult) NULL;
return (res);
}
/*
* rtheapptr returns the item pointer to the tuple in the heap relation
* for which itemp is the index relation item pointer.
*/
static ItemPointer
rtheapptr(Relation r, ItemPointer itemp)
{
Buffer b;
Page p;
IndexTuple it;
ItemPointer ip;
OffsetNumber n;
ip = (ItemPointer) palloc(sizeof(ItemPointerData));
if (ItemPointerIsValid(itemp)) {
b = ReadBuffer(r, ItemPointerGetBlockNumber(itemp));
p = BufferGetPage(b);
n = ItemPointerGetOffsetNumber(itemp);
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
memmove((char *) ip, (char *) &(it->t_tid),
sizeof(ItemPointerData));
ReleaseBuffer(b);
} else {
ItemPointerSetInvalid(ip);
}
return (ip);
}

View File

@ -0,0 +1,150 @@
/*-------------------------------------------------------------------------
*
* rtproc.c--
* pg_amproc entries for rtrees.
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtproc.c,v 1.1.1.1 1996/07/09 06:21:13 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#include <math.h>
#include <string.h>
#include "postgres.h"
#include "utils/elog.h"
#include "utils/geo-decls.h"
#include "utils/palloc.h"
BOX
*rt_box_union(BOX *a, BOX *b)
{
BOX *n;
if ((n = (BOX *) palloc(sizeof (*n))) == (BOX *) NULL)
elog(WARN, "Cannot allocate box for union");
n->xh = Max(a->xh, b->xh);
n->yh = Max(a->yh, b->yh);
n->xl = Min(a->xl, b->xl);
n->yl = Min(a->yl, b->yl);
return (n);
}
BOX *
rt_box_inter(BOX *a, BOX *b)
{
BOX *n;
if ((n = (BOX *) palloc(sizeof (*n))) == (BOX *) NULL)
elog(WARN, "Cannot allocate box for union");
n->xh = Min(a->xh, b->xh);
n->yh = Min(a->yh, b->yh);
n->xl = Max(a->xl, b->xl);
n->yl = Max(a->yl, b->yl);
if (n->xh < n->xl || n->yh < n->yl) {
pfree(n);
return ((BOX *) NULL);
}
return (n);
}
void
rt_box_size(BOX *a, float *size)
{
if (a == (BOX *) NULL || a->xh <= a->xl || a->yh <= a->yl)
*size = 0.0;
else
*size = (float) ((a->xh - a->xl) * (a->yh - a->yl));
return;
}
/*
* rt_bigbox_size() -- Compute a size for big boxes.
*
* In an earlier release of the system, this routine did something
* different from rt_box_size. We now use floats, rather than ints,
* as the return type for the size routine, so we no longer need to
* have a special return type for big boxes.
*/
void
rt_bigbox_size(BOX *a, float *size)
{
rt_box_size(a, size);
}
POLYGON *
rt_poly_union(POLYGON *a, POLYGON *b)
{
POLYGON *p;
p = (POLYGON *)PALLOCTYPE(POLYGON);
if (!PointerIsValid(p))
elog(WARN, "Cannot allocate polygon for union");
memset((char *) p, 0, sizeof(POLYGON)); /* zero any holes */
p->size = sizeof(POLYGON);
p->npts = 0;
p->boundbox.xh = Max(a->boundbox.xh, b->boundbox.xh);
p->boundbox.yh = Max(a->boundbox.yh, b->boundbox.yh);
p->boundbox.xl = Min(a->boundbox.xl, b->boundbox.xl);
p->boundbox.yl = Min(a->boundbox.yl, b->boundbox.yl);
return p;
}
void
rt_poly_size(POLYGON *a, float *size)
{
double xdim, ydim;
size = (float *) palloc(sizeof(float));
if (a == (POLYGON *) NULL ||
a->boundbox.xh <= a->boundbox.xl ||
a->boundbox.yh <= a->boundbox.yl)
*size = 0.0;
else {
xdim = (a->boundbox.xh - a->boundbox.xl);
ydim = (a->boundbox.yh - a->boundbox.yl);
*size = (float) (xdim * ydim);
}
return;
}
POLYGON *
rt_poly_inter(POLYGON *a, POLYGON *b)
{
POLYGON *p;
p = (POLYGON *) PALLOCTYPE(POLYGON);
if (!PointerIsValid(p))
elog(WARN, "Cannot allocate polygon for intersection");
memset((char *) p, 0, sizeof(POLYGON)); /* zero any holes */
p->size = sizeof(POLYGON);
p->npts = 0;
p->boundbox.xh = Min(a->boundbox.xh, b->boundbox.xh);
p->boundbox.yh = Min(a->boundbox.yh, b->boundbox.yh);
p->boundbox.xl = Max(a->boundbox.xl, b->boundbox.xl);
p->boundbox.yl = Max(a->boundbox.yl, b->boundbox.yl);
if (p->boundbox.xh < p->boundbox.xl || p->boundbox.yh < p->boundbox.yl)
{
pfree(p);
return ((POLYGON *) NULL);
}
return (p);
}

View File

@ -0,0 +1,955 @@
/*-------------------------------------------------------------------------
*
* rtree.c--
* interface routines for the postgres rtree indexed access method.
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.1.1.1 1996/07/09 06:21:13 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "storage/bufmgr.h"
#include "storage/bufpage.h"
#include "utils/elog.h"
#include "utils/palloc.h"
#include "utils/rel.h"
#include "utils/excid.h"
#include "access/heapam.h"
#include "access/genam.h"
#include "access/rtree.h"
#include "access/rtscan.h"
#include "access/funcindex.h"
#include "access/tupdesc.h"
#include "nodes/execnodes.h"
#include "nodes/plannodes.h"
#include "executor/executor.h"
#include "executor/tuptable.h"
#include "catalog/index.h"
typedef struct SPLITVEC {
OffsetNumber *spl_left;
int spl_nleft;
char *spl_ldatum;
OffsetNumber *spl_right;
int spl_nright;
char *spl_rdatum;
} SPLITVEC;
typedef struct RTSTATE {
func_ptr unionFn; /* union function */
func_ptr sizeFn; /* size function */
func_ptr interFn; /* intersection function */
} RTSTATE;
/* non-export function prototypes */
static InsertIndexResult rtdoinsert(Relation r, IndexTuple itup,
RTSTATE *rtstate);
static void rttighten(Relation r, RTSTACK *stk, char *datum, int att_size,
RTSTATE *rtstate);
static InsertIndexResult dosplit(Relation r, Buffer buffer, RTSTACK *stack,
IndexTuple itup, RTSTATE *rtstate);
static void rtintinsert(Relation r, RTSTACK *stk, IndexTuple ltup,
IndexTuple rtup, RTSTATE *rtstate);
static void rtnewroot(Relation r, IndexTuple lt, IndexTuple rt);
static void picksplit(Relation r, Page page, SPLITVEC *v, IndexTuple itup,
RTSTATE *rtstate);
static void RTInitBuffer(Buffer b, uint32 f);
static OffsetNumber choose(Relation r, Page p, IndexTuple it,
RTSTATE *rtstate);
static int nospace(Page p, IndexTuple it);
static void initRtstate(RTSTATE *rtstate, Relation index);
void
rtbuild(Relation heap,
Relation index,
int natts,
AttrNumber *attnum,
IndexStrategy istrat,
uint16 pcount,
Datum *params,
FuncIndexInfo *finfo,
PredInfo *predInfo)
{
HeapScanDesc scan;
Buffer buffer;
AttrNumber i;
HeapTuple htup;
IndexTuple itup;
TupleDesc hd, id;
InsertIndexResult res;
Datum *d;
bool *nulls;
int nb, nh, ni;
ExprContext *econtext;
TupleTable tupleTable;
TupleTableSlot *slot;
Oid hrelid, irelid;
Node *pred, *oldPred;
RTSTATE rtState;
initRtstate(&rtState, index);
/* rtrees only know how to do stupid locking now */
RelationSetLockForWrite(index);
pred = predInfo->pred;
oldPred = predInfo->oldPred;
/*
* We expect to be called exactly once for any index relation.
* If that's not the case, big trouble's what we have.
*/
if (oldPred == NULL && (nb = RelationGetNumberOfBlocks(index)) != 0)
elog(WARN, "%s already contains data", index->rd_rel->relname.data);
/* initialize the root page (if this is a new index) */
if (oldPred == NULL) {
buffer = ReadBuffer(index, P_NEW);
RTInitBuffer(buffer, F_LEAF);
WriteBuffer(buffer);
}
/* init the tuple descriptors and get set for a heap scan */
hd = RelationGetTupleDescriptor(heap);
id = RelationGetTupleDescriptor(index);
d = (Datum *)palloc(natts * sizeof (*d));
nulls = (bool *)palloc(natts * sizeof (*nulls));
/*
* If this is a predicate (partial) index, we will need to evaluate the
* predicate using ExecQual, which requires the current tuple to be in a
* slot of a TupleTable. In addition, ExecQual must have an ExprContext
* referring to that slot. Here, we initialize dummy TupleTable and
* ExprContext objects for this purpose. --Nels, Feb '92
*/
#ifndef OMIT_PARTIAL_INDEX
if (pred != NULL || oldPred != NULL) {
tupleTable = ExecCreateTupleTable(1);
slot = ExecAllocTableSlot(tupleTable);
econtext = makeNode(ExprContext);
FillDummyExprContext(econtext, slot, hd, buffer);
}
#endif /* OMIT_PARTIAL_INDEX */
scan = heap_beginscan(heap, 0, NowTimeQual, 0, (ScanKey) NULL);
htup = heap_getnext(scan, 0, &buffer);
/* count the tuples as we insert them */
nh = ni = 0;
for (; HeapTupleIsValid(htup); htup = heap_getnext(scan, 0, &buffer)) {
nh++;
/*
* If oldPred != NULL, this is an EXTEND INDEX command, so skip
* this tuple if it was already in the existing partial index
*/
if (oldPred != NULL) {
#ifndef OMIT_PARTIAL_INDEX
/*SetSlotContents(slot, htup); */
slot->val = htup;
if (ExecQual((List*)oldPred, econtext) == true) {
ni++;
continue;
}
#endif /* OMIT_PARTIAL_INDEX */
}
/* Skip this tuple if it doesn't satisfy the partial-index predicate */
if (pred != NULL) {
#ifndef OMIT_PARTIAL_INDEX
/*SetSlotContents(slot, htup); */
slot->val = htup;
if (ExecQual((List*)pred, econtext) == false)
continue;
#endif /* OMIT_PARTIAL_INDEX */
}
ni++;
/*
* For the current heap tuple, extract all the attributes
* we use in this index, and note which are null.
*/
for (i = 1; i <= natts; i++) {
int attoff;
bool attnull;
/*
* Offsets are from the start of the tuple, and are
* zero-based; indices are one-based. The next call
* returns i - 1. That's data hiding for you.
*/
attoff = AttrNumberGetAttrOffset(i);
/*
d[attoff] = HeapTupleGetAttributeValue(htup, buffer,
*/
d[attoff] = GetIndexValue(htup,
hd,
attoff,
attnum,
finfo,
&attnull,
buffer);
nulls[attoff] = (attnull ? 'n' : ' ');
}
/* form an index tuple and point it at the heap tuple */
itup = index_formtuple(id, &d[0], nulls);
itup->t_tid = htup->t_ctid;
/*
* Since we already have the index relation locked, we
* call rtdoinsert directly. Normal access method calls
* dispatch through rtinsert, which locks the relation
* for write. This is the right thing to do if you're
* inserting single tups, but not when you're initializing
* the whole index at once.
*/
res = rtdoinsert(index, itup, &rtState);
pfree(itup);
pfree(res);
}
/* okay, all heap tuples are indexed */
heap_endscan(scan);
RelationUnsetLockForWrite(index);
if (pred != NULL || oldPred != NULL) {
#ifndef OMIT_PARTIAL_INDEX
ExecDestroyTupleTable(tupleTable, true);
pfree(econtext);
#endif /* OMIT_PARTIAL_INDEX */
}
/*
* Since we just counted the tuples in the heap, we update its
* stats in pg_relation to guarantee that the planner takes
* advantage of the index we just created. UpdateStats() does a
* CommandCounterIncrement(), which flushes changed entries from
* the system relcache. The act of constructing an index changes
* these heap and index tuples in the system catalogs, so they
* need to be flushed. We close them to guarantee that they
* will be.
*/
hrelid = heap->rd_id;
irelid = index->rd_id;
heap_close(heap);
index_close(index);
UpdateStats(hrelid, nh, true);
UpdateStats(irelid, ni, false);
if (oldPred != NULL) {
if (ni == nh) pred = NULL;
UpdateIndexPredicate(irelid, oldPred, pred);
}
/* be tidy */
pfree(nulls);
pfree(d);
}
/*
* rtinsert -- wrapper for rtree tuple insertion.
*
* This is the public interface routine for tuple insertion in rtrees.
* It doesn't do any work; just locks the relation and passes the buck.
*/
InsertIndexResult
rtinsert(Relation r, IndexTuple itup)
{
InsertIndexResult res;
RTSTATE rtState;
initRtstate(&rtState, r);
RelationSetLockForWrite(r);
res = rtdoinsert(r, itup, &rtState);
/* XXX two-phase locking -- don't unlock the relation until EOT */
return (res);
}
static InsertIndexResult
rtdoinsert(Relation r, IndexTuple itup, RTSTATE *rtstate)
{
Page page;
Buffer buffer;
BlockNumber blk;
IndexTuple which;
OffsetNumber l;
RTSTACK *stack;
InsertIndexResult res;
RTreePageOpaque opaque;
char *datum;
blk = P_ROOT;
buffer = InvalidBuffer;
stack = (RTSTACK *) NULL;
do {
/* let go of current buffer before getting next */
if (buffer != InvalidBuffer)
ReleaseBuffer(buffer);
/* get next buffer */
buffer = ReadBuffer(r, blk);
page = (Page) BufferGetPage(buffer);
opaque = (RTreePageOpaque) PageGetSpecialPointer(page);
if (!(opaque->flags & F_LEAF)) {
RTSTACK *n;
ItemId iid;
n = (RTSTACK *) palloc(sizeof(RTSTACK));
n->rts_parent = stack;
n->rts_blk = blk;
n->rts_child = choose(r, page, itup, rtstate);
stack = n;
iid = PageGetItemId(page, n->rts_child);
which = (IndexTuple) PageGetItem(page, iid);
blk = ItemPointerGetBlockNumber(&(which->t_tid));
}
} while (!(opaque->flags & F_LEAF));
if (nospace(page, itup)) {
/* need to do a split */
res = dosplit(r, buffer, stack, itup, rtstate);
freestack(stack);
WriteBuffer(buffer); /* don't forget to release buffer! */
return (res);
}
/* add the item and write the buffer */
if (PageIsEmpty(page)) {
l = PageAddItem(page, (Item) itup, IndexTupleSize(itup),
FirstOffsetNumber,
LP_USED);
} else {
l = PageAddItem(page, (Item) itup, IndexTupleSize(itup),
OffsetNumberNext(PageGetMaxOffsetNumber(page)),
LP_USED);
}
WriteBuffer(buffer);
datum = (((char *) itup) + sizeof(IndexTupleData));
/* now expand the page boundary in the parent to include the new child */
rttighten(r, stack, datum,
(IndexTupleSize(itup) - sizeof(IndexTupleData)), rtstate);
freestack(stack);
/* build and return an InsertIndexResult for this insertion */
res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData));
ItemPointerSet(&(res->pointerData), blk, l);
return (res);
}
static void
rttighten(Relation r,
RTSTACK *stk,
char *datum,
int att_size,
RTSTATE *rtstate)
{
char *oldud;
char *tdatum;
Page p;
float old_size, newd_size;
Buffer b;
if (stk == (RTSTACK *) NULL)
return;
b = ReadBuffer(r, stk->rts_blk);
p = BufferGetPage(b);
oldud = (char *) PageGetItem(p, PageGetItemId(p, stk->rts_child));
oldud += sizeof(IndexTupleData);
(*rtstate->sizeFn)(oldud, &old_size);
datum = (char *) (*rtstate->unionFn)(oldud, datum);
(*rtstate->sizeFn)(datum, &newd_size);
if (newd_size != old_size) {
TupleDesc td = RelationGetTupleDescriptor(r);
if (td->attrs[0]->attlen < 0) {
/*
* This is an internal page, so 'oldud' had better be a
* union (constant-length) key, too. (See comment below.)
*/
Assert(VARSIZE(datum) == VARSIZE(oldud));
memmove(oldud, datum, VARSIZE(datum));
} else {
memmove(oldud, datum, att_size);
}
WriteBuffer(b);
/*
* The user may be defining an index on variable-sized data (like
* polygons). If so, we need to get a constant-sized datum for
* insertion on the internal page. We do this by calling the union
* proc, which is guaranteed to return a rectangle.
*/
tdatum = (char *) (*rtstate->unionFn)(datum, datum);
rttighten(r, stk->rts_parent, tdatum, att_size, rtstate);
pfree(tdatum);
} else {
ReleaseBuffer(b);
}
pfree(datum);
}
/*
* dosplit -- split a page in the tree.
*
* This is the quadratic-cost split algorithm Guttman describes in
* his paper. The reason we chose it is that you can implement this
* with less information about the data types on which you're operating.
*/
static InsertIndexResult
dosplit(Relation r,
Buffer buffer,
RTSTACK *stack,
IndexTuple itup,
RTSTATE *rtstate)
{
Page p;
Buffer leftbuf, rightbuf;
Page left, right;
ItemId itemid;
IndexTuple item;
IndexTuple ltup, rtup;
OffsetNumber maxoff;
OffsetNumber i;
OffsetNumber leftoff, rightoff;
BlockNumber lbknum, rbknum;
BlockNumber bufblock;
RTreePageOpaque opaque;
int blank;
InsertIndexResult res;
char *isnull;
SPLITVEC v;
TupleDesc tupDesc;
isnull = (char *) palloc(r->rd_rel->relnatts);
for (blank = 0; blank < r->rd_rel->relnatts; blank++)
isnull[blank] = ' ';
p = (Page) BufferGetPage(buffer);
opaque = (RTreePageOpaque) PageGetSpecialPointer(p);
/*
* The root of the tree is the first block in the relation. If
* we're about to split the root, we need to do some hocus-pocus
* to enforce this guarantee.
*/
if (BufferGetBlockNumber(buffer) == P_ROOT) {
leftbuf = ReadBuffer(r, P_NEW);
RTInitBuffer(leftbuf, opaque->flags);
lbknum = BufferGetBlockNumber(leftbuf);
left = (Page) BufferGetPage(leftbuf);
} else {
leftbuf = buffer;
IncrBufferRefCount(buffer);
lbknum = BufferGetBlockNumber(buffer);
left = (Page) PageGetTempPage(p, sizeof(RTreePageOpaqueData));
}
rightbuf = ReadBuffer(r, P_NEW);
RTInitBuffer(rightbuf, opaque->flags);
rbknum = BufferGetBlockNumber(rightbuf);
right = (Page) BufferGetPage(rightbuf);
picksplit(r, p, &v, itup, rtstate);
leftoff = rightoff = FirstOffsetNumber;
maxoff = PageGetMaxOffsetNumber(p);
for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) {
itemid = PageGetItemId(p, i);
item = (IndexTuple) PageGetItem(p, itemid);
if (i == *(v.spl_left)) {
(void) PageAddItem(left, (Item) item, IndexTupleSize(item),
leftoff, LP_USED);
leftoff = OffsetNumberNext(leftoff);
v.spl_left++; /* advance in left split vector */
} else {
(void) PageAddItem(right, (Item) item, IndexTupleSize(item),
rightoff, LP_USED);
rightoff = OffsetNumberNext(rightoff);
v.spl_right++; /* advance in right split vector */
}
}
/* build an InsertIndexResult for this insertion */
res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData));
/* now insert the new index tuple */
if (*(v.spl_left) != FirstOffsetNumber) {
(void) PageAddItem(left, (Item) itup, IndexTupleSize(itup),
leftoff, LP_USED);
leftoff = OffsetNumberNext(leftoff);
ItemPointerSet(&(res->pointerData), lbknum, leftoff);
} else {
(void) PageAddItem(right, (Item) itup, IndexTupleSize(itup),
rightoff, LP_USED);
rightoff = OffsetNumberNext(rightoff);
ItemPointerSet(&(res->pointerData), rbknum, rightoff);
}
if ((bufblock = BufferGetBlockNumber(buffer)) != P_ROOT) {
PageRestoreTempPage(left, p);
}
WriteBuffer(leftbuf);
WriteBuffer(rightbuf);
/*
* Okay, the page is split. We have three things left to do:
*
* 1) Adjust any active scans on this index to cope with changes
* we introduced in its structure by splitting this page.
*
* 2) "Tighten" the bounding box of the pointer to the left
* page in the parent node in the tree, if any. Since we
* moved a bunch of stuff off the left page, we expect it
* to get smaller. This happens in the internal insertion
* routine.
*
* 3) Insert a pointer to the right page in the parent. This
* may cause the parent to split. If it does, we need to
* repeat steps one and two for each split node in the tree.
*/
/* adjust active scans */
rtadjscans(r, RTOP_SPLIT, bufblock, FirstOffsetNumber);
tupDesc = r->rd_att;
ltup = (IndexTuple) index_formtuple(tupDesc,
(Datum *) &(v.spl_ldatum), isnull);
rtup = (IndexTuple) index_formtuple(tupDesc,
(Datum *) &(v.spl_rdatum), isnull);
pfree(isnull);
/* set pointers to new child pages in the internal index tuples */
ItemPointerSet(&(ltup->t_tid), lbknum, 1);
ItemPointerSet(&(rtup->t_tid), rbknum, 1);
rtintinsert(r, stack, ltup, rtup, rtstate);
pfree(ltup);
pfree(rtup);
return (res);
}
static void
rtintinsert(Relation r,
RTSTACK *stk,
IndexTuple ltup,
IndexTuple rtup,
RTSTATE *rtstate)
{
IndexTuple old;
Buffer b;
Page p;
char *ldatum, *rdatum, *newdatum;
InsertIndexResult res;
if (stk == (RTSTACK *) NULL) {
rtnewroot(r, ltup, rtup);
return;
}
b = ReadBuffer(r, stk->rts_blk);
p = BufferGetPage(b);
old = (IndexTuple) PageGetItem(p, PageGetItemId(p, stk->rts_child));
/*
* This is a hack. Right now, we force rtree keys to be constant size.
* To fix this, need delete the old key and add both left and right
* for the two new pages. The insertion of left may force a split if
* the new left key is bigger than the old key.
*/
if (IndexTupleSize(old) != IndexTupleSize(ltup))
elog(WARN, "Variable-length rtree keys are not supported.");
/* install pointer to left child */
memmove(old, ltup,IndexTupleSize(ltup));
if (nospace(p, rtup)) {
newdatum = (((char *) ltup) + sizeof(IndexTupleData));
rttighten(r, stk->rts_parent, newdatum,
(IndexTupleSize(ltup) - sizeof(IndexTupleData)), rtstate);
res = dosplit(r, b, stk->rts_parent, rtup, rtstate);
WriteBuffer(b); /* don't forget to release buffer! - 01/31/94 */
pfree(res);
} else {
(void) PageAddItem(p, (Item) rtup, IndexTupleSize(rtup),
PageGetMaxOffsetNumber(p), LP_USED);
WriteBuffer(b);
ldatum = (((char *) ltup) + sizeof(IndexTupleData));
rdatum = (((char *) rtup) + sizeof(IndexTupleData));
newdatum = (char *) (*rtstate->unionFn)(ldatum, rdatum);
rttighten(r, stk->rts_parent, newdatum,
(IndexTupleSize(rtup) - sizeof(IndexTupleData)), rtstate);
pfree(newdatum);
}
}
static void
rtnewroot(Relation r, IndexTuple lt, IndexTuple rt)
{
Buffer b;
Page p;
b = ReadBuffer(r, P_ROOT);
RTInitBuffer(b, 0);
p = BufferGetPage(b);
(void) PageAddItem(p, (Item) lt, IndexTupleSize(lt),
FirstOffsetNumber, LP_USED);
(void) PageAddItem(p, (Item) rt, IndexTupleSize(rt),
OffsetNumberNext(FirstOffsetNumber), LP_USED);
WriteBuffer(b);
}
static void
picksplit(Relation r,
Page page,
SPLITVEC *v,
IndexTuple itup,
RTSTATE *rtstate)
{
OffsetNumber maxoff;
OffsetNumber i, j;
IndexTuple item_1, item_2;
char *datum_alpha, *datum_beta;
char *datum_l, *datum_r;
char *union_d, *union_dl, *union_dr;
char *inter_d;
bool firsttime;
float size_alpha, size_beta, size_union, size_inter;
float size_waste, waste;
float size_l, size_r;
int nbytes;
OffsetNumber seed_1 = 0, seed_2 = 0;
OffsetNumber *left, *right;
maxoff = PageGetMaxOffsetNumber(page);
nbytes = (maxoff + 2) * sizeof(OffsetNumber);
v->spl_left = (OffsetNumber *) palloc(nbytes);
v->spl_right = (OffsetNumber *) palloc(nbytes);
firsttime = true;
waste = 0.0;
for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i)) {
item_1 = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
datum_alpha = ((char *) item_1) + sizeof(IndexTupleData);
for (j = OffsetNumberNext(i); j <= maxoff; j = OffsetNumberNext(j)) {
item_2 = (IndexTuple) PageGetItem(page, PageGetItemId(page, j));
datum_beta = ((char *) item_2) + sizeof(IndexTupleData);
/* compute the wasted space by unioning these guys */
union_d = (char *)(rtstate->unionFn)(datum_alpha, datum_beta);
(rtstate->sizeFn)(union_d, &size_union);
inter_d = (char *)(rtstate->interFn)(datum_alpha, datum_beta);
(rtstate->sizeFn)(inter_d, &size_inter);
size_waste = size_union - size_inter;
pfree(union_d);
if (inter_d != (char *) NULL)
pfree(inter_d);
/*
* are these a more promising split that what we've
* already seen?
*/
if (size_waste > waste || firsttime) {
waste = size_waste;
seed_1 = i;
seed_2 = j;
firsttime = false;
}
}
}
left = v->spl_left;
v->spl_nleft = 0;
right = v->spl_right;
v->spl_nright = 0;
item_1 = (IndexTuple) PageGetItem(page, PageGetItemId(page, seed_1));
datum_alpha = ((char *) item_1) + sizeof(IndexTupleData);
datum_l = (char *)(*rtstate->unionFn)(datum_alpha, datum_alpha);
(*rtstate->sizeFn)(datum_l, &size_l);
item_2 = (IndexTuple) PageGetItem(page, PageGetItemId(page, seed_2));
datum_beta = ((char *) item_2) + sizeof(IndexTupleData);
datum_r = (char *)(*rtstate->unionFn)(datum_beta, datum_beta);
(*rtstate->sizeFn)(datum_r, &size_r);
/*
* Now split up the regions between the two seeds. An important
* property of this split algorithm is that the split vector v
* has the indices of items to be split in order in its left and
* right vectors. We exploit this property by doing a merge in
* the code that actually splits the page.
*
* For efficiency, we also place the new index tuple in this loop.
* This is handled at the very end, when we have placed all the
* existing tuples and i == maxoff + 1.
*/
maxoff = OffsetNumberNext(maxoff);
for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) {
/*
* If we've already decided where to place this item, just
* put it on the right list. Otherwise, we need to figure
* out which page needs the least enlargement in order to
* store the item.
*/
if (i == seed_1) {
*left++ = i;
v->spl_nleft++;
continue;
} else if (i == seed_2) {
*right++ = i;
v->spl_nright++;
continue;
}
/* okay, which page needs least enlargement? */
if (i == maxoff) {
item_1 = itup;
} else {
item_1 = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
}
datum_alpha = ((char *) item_1) + sizeof(IndexTupleData);
union_dl = (char *)(*rtstate->unionFn)(datum_l, datum_alpha);
union_dr = (char *)(*rtstate->unionFn)(datum_r, datum_alpha);
(*rtstate->sizeFn)(union_dl, &size_alpha);
(*rtstate->sizeFn)(union_dr, &size_beta);
/* pick which page to add it to */
if (size_alpha - size_l < size_beta - size_r) {
pfree(datum_l);
pfree(union_dr);
datum_l = union_dl;
size_l = size_alpha;
*left++ = i;
v->spl_nleft++;
} else {
pfree(datum_r);
pfree(union_dl);
datum_r = union_dr;
size_r = size_alpha;
*right++ = i;
v->spl_nright++;
}
}
*left = *right = FirstOffsetNumber; /* sentinel value, see dosplit() */
v->spl_ldatum = datum_l;
v->spl_rdatum = datum_r;
}
static void
RTInitBuffer(Buffer b, uint32 f)
{
RTreePageOpaque opaque;
Page page;
Size pageSize;
pageSize = BufferGetPageSize(b);
page = BufferGetPage(b);
memset(page, 0, (int) pageSize);
PageInit(page, pageSize, sizeof(RTreePageOpaqueData));
opaque = (RTreePageOpaque) PageGetSpecialPointer(page);
opaque->flags = f;
}
static OffsetNumber
choose(Relation r, Page p, IndexTuple it, RTSTATE *rtstate)
{
OffsetNumber maxoff;
OffsetNumber i;
char *ud, *id;
char *datum;
float usize, dsize;
OffsetNumber which;
float which_grow;
id = ((char *) it) + sizeof(IndexTupleData);
maxoff = PageGetMaxOffsetNumber(p);
which_grow = -1.0;
which = -1;
for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) {
datum = (char *) PageGetItem(p, PageGetItemId(p, i));
datum += sizeof(IndexTupleData);
(*rtstate->sizeFn)(datum, &dsize);
ud = (char *) (*rtstate->unionFn)(datum, id);
(*rtstate->sizeFn)(ud, &usize);
pfree(ud);
if (which_grow < 0 || usize - dsize < which_grow) {
which = i;
which_grow = usize - dsize;
if (which_grow == 0)
break;
}
}
return (which);
}
static int
nospace(Page p, IndexTuple it)
{
return (PageGetFreeSpace(p) < IndexTupleSize(it));
}
void
freestack(RTSTACK *s)
{
RTSTACK *p;
while (s != (RTSTACK *) NULL) {
p = s->rts_parent;
pfree(s);
s = p;
}
}
char *
rtdelete(Relation r, ItemPointer tid)
{
BlockNumber blkno;
OffsetNumber offnum;
Buffer buf;
Page page;
/* must write-lock on delete */
RelationSetLockForWrite(r);
blkno = ItemPointerGetBlockNumber(tid);
offnum = ItemPointerGetOffsetNumber(tid);
/* adjust any scans that will be affected by this deletion */
rtadjscans(r, RTOP_DEL, blkno, offnum);
/* delete the index tuple */
buf = ReadBuffer(r, blkno);
page = BufferGetPage(buf);
PageIndexTupleDelete(page, offnum);
WriteBuffer(buf);
/* XXX -- two-phase locking, don't release the write lock */
return ((char *) NULL);
}
static void initRtstate(RTSTATE *rtstate, Relation index)
{
RegProcedure union_proc, size_proc, inter_proc;
func_ptr user_fn;
int pronargs;
union_proc = index_getprocid(index, 1, RT_UNION_PROC);
size_proc = index_getprocid(index, 1, RT_SIZE_PROC);
inter_proc = index_getprocid(index, 1, RT_INTER_PROC);
fmgr_info(union_proc, &user_fn, &pronargs);
rtstate->unionFn = user_fn;
fmgr_info(size_proc, &user_fn, &pronargs);
rtstate->sizeFn = user_fn;
fmgr_info(inter_proc, &user_fn, &pronargs);
rtstate->interFn = user_fn;
return;
}
#define RTDEBUG
#ifdef RTDEBUG
#include "utils/geo-decls.h"
void
_rtdump(Relation r)
{
Buffer buf;
Page page;
OffsetNumber offnum, maxoff;
BlockNumber blkno;
BlockNumber nblocks;
RTreePageOpaque po;
IndexTuple itup;
BlockNumber itblkno;
OffsetNumber itoffno;
char *datum;
char *itkey;
nblocks = RelationGetNumberOfBlocks(r);
for (blkno = 0; blkno < nblocks; blkno++) {
buf = ReadBuffer(r, blkno);
page = BufferGetPage(buf);
po = (RTreePageOpaque) PageGetSpecialPointer(page);
maxoff = PageGetMaxOffsetNumber(page);
printf("Page %d maxoff %d <%s>\n", blkno, maxoff,
(po->flags & F_LEAF ? "LEAF" : "INTERNAL"));
if (PageIsEmpty(page)) {
ReleaseBuffer(buf);
continue;
}
for (offnum = FirstOffsetNumber;
offnum <= maxoff;
offnum = OffsetNumberNext(offnum)) {
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
itblkno = ItemPointerGetBlockNumber(&(itup->t_tid));
itoffno = ItemPointerGetOffsetNumber(&(itup->t_tid));
datum = ((char *) itup);
datum += sizeof(IndexTupleData);
itkey = (char *) box_out((BOX *) datum);
printf("\t[%d] size %d heap <%d,%d> key:%s\n",
offnum, IndexTupleSize(itup), itblkno, itoffno, itkey);
pfree(itkey);
}
ReleaseBuffer(buf);
}
}
#endif /* defined RTDEBUG */

View File

@ -0,0 +1,392 @@
/*-------------------------------------------------------------------------
*
* rtscan.c--
* routines to manage scans on index relations
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtscan.c,v 1.1.1.1 1996/07/09 06:21:13 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#include "c.h"
#include "postgres.h"
#include "storage/bufmgr.h"
#include "storage/bufpage.h"
#include "utils/elog.h"
#include "utils/palloc.h"
#include "utils/rel.h"
#include "access/heapam.h"
#include "access/genam.h"
#include "access/rtree.h"
#include "access/rtstrat.h"
/* routines defined and used here */
static void rtregscan(IndexScanDesc s);
static void rtdropscan(IndexScanDesc s);
static void rtadjone(IndexScanDesc s, int op, BlockNumber blkno,
OffsetNumber offnum);
static void adjuststack(RTSTACK *stk, BlockNumber blkno,
OffsetNumber offnum);
static void adjustiptr(IndexScanDesc s, ItemPointer iptr,
int op, BlockNumber blkno, OffsetNumber offnum);
/*
* Whenever we start an rtree scan in a backend, we register it in private
* space. Then if the rtree index gets updated, we check all registered
* scans and adjust them if the tuple they point at got moved by the
* update. We only need to do this in private space, because when we update
* an rtree we have a write lock on the tree, so no other process can have
* any locks at all on it. A single transaction can have write and read
* locks on the same object, so that's why we need to handle this case.
*/
typedef struct RTScanListData {
IndexScanDesc rtsl_scan;
struct RTScanListData *rtsl_next;
} RTScanListData;
typedef RTScanListData *RTScanList;
/* pointer to list of local scans on rtrees */
static RTScanList RTScans = (RTScanList) NULL;
IndexScanDesc
rtbeginscan(Relation r,
bool fromEnd,
uint16 nkeys,
ScanKey key)
{
IndexScanDesc s;
RelationSetLockForRead(r);
s = RelationGetIndexScan(r, fromEnd, nkeys, key);
rtregscan(s);
return (s);
}
void
rtrescan(IndexScanDesc s, bool fromEnd, ScanKey key)
{
RTreeScanOpaque p;
RegProcedure internal_proc;
int i;
if (!IndexScanIsValid(s)) {
elog(WARN, "rtrescan: invalid scan.");
return;
}
/*
* Clear all the pointers.
*/
ItemPointerSetInvalid(&s->previousItemData);
ItemPointerSetInvalid(&s->currentItemData);
ItemPointerSetInvalid(&s->nextItemData);
ItemPointerSetInvalid(&s->previousMarkData);
ItemPointerSetInvalid(&s->currentMarkData);
ItemPointerSetInvalid(&s->nextMarkData);
/*
* Set flags.
*/
if (RelationGetNumberOfBlocks(s->relation) == 0) {
s->flags = ScanUnmarked;
} else if (fromEnd) {
s->flags = ScanUnmarked | ScanUncheckedPrevious;
} else {
s->flags = ScanUnmarked | ScanUncheckedNext;
}
s->scanFromEnd = fromEnd;
if (s->numberOfKeys > 0) {
memmove(s->keyData,
key,
s->numberOfKeys * sizeof(ScanKeyData));
}
p = (RTreeScanOpaque) s->opaque;
if (p != (RTreeScanOpaque) NULL) {
freestack(p->s_stack);
freestack(p->s_markstk);
p->s_stack = p->s_markstk = (RTSTACK *) NULL;
p->s_flags = 0x0;
} else {
/* initialize opaque data */
p = (RTreeScanOpaque) palloc(sizeof(RTreeScanOpaqueData));
p->s_internalKey =
(ScanKey) palloc(sizeof(ScanKeyData) * s->numberOfKeys);
p->s_stack = p->s_markstk = (RTSTACK *) NULL;
p->s_internalNKey = s->numberOfKeys;
p->s_flags = 0x0;
for (i = 0; i < s->numberOfKeys; i++)
p->s_internalKey[i].sk_argument = s->keyData[i].sk_argument;
s->opaque = p;
if (s->numberOfKeys > 0) {
/*
* Scans on internal pages use different operators than they
* do on leaf pages. For example, if the user wants all boxes
* that exactly match (x1,y1,x2,y2), then on internal pages
* we need to find all boxes that contain (x1,y1,x2,y2).
*/
for (i = 0; i < s->numberOfKeys; i++) {
internal_proc = RTMapOperator(s->relation,
s->keyData[i].sk_attno,
s->keyData[i].sk_procedure);
ScanKeyEntryInitialize(&(p->s_internalKey[i]),
s->keyData[i].sk_flags,
s->keyData[i].sk_attno,
internal_proc,
s->keyData[i].sk_argument);
}
}
}
}
void
rtmarkpos(IndexScanDesc s)
{
RTreeScanOpaque p;
RTSTACK *o, *n, *tmp;
s->currentMarkData = s->currentItemData;
p = (RTreeScanOpaque) s->opaque;
if (p->s_flags & RTS_CURBEFORE)
p->s_flags |= RTS_MRKBEFORE;
else
p->s_flags &= ~RTS_MRKBEFORE;
o = (RTSTACK *) NULL;
n = p->s_stack;
/* copy the parent stack from the current item data */
while (n != (RTSTACK *) NULL) {
tmp = (RTSTACK *) palloc(sizeof(RTSTACK));
tmp->rts_child = n->rts_child;
tmp->rts_blk = n->rts_blk;
tmp->rts_parent = o;
o = tmp;
n = n->rts_parent;
}
freestack(p->s_markstk);
p->s_markstk = o;
}
void
rtrestrpos(IndexScanDesc s)
{
RTreeScanOpaque p;
RTSTACK *o, *n, *tmp;
s->currentItemData = s->currentMarkData;
p = (RTreeScanOpaque) s->opaque;
if (p->s_flags & RTS_MRKBEFORE)
p->s_flags |= RTS_CURBEFORE;
else
p->s_flags &= ~RTS_CURBEFORE;
o = (RTSTACK *) NULL;
n = p->s_markstk;
/* copy the parent stack from the current item data */
while (n != (RTSTACK *) NULL) {
tmp = (RTSTACK *) palloc(sizeof(RTSTACK));
tmp->rts_child = n->rts_child;
tmp->rts_blk = n->rts_blk;
tmp->rts_parent = o;
o = tmp;
n = n->rts_parent;
}
freestack(p->s_stack);
p->s_stack = o;
}
void
rtendscan(IndexScanDesc s)
{
RTreeScanOpaque p;
p = (RTreeScanOpaque) s->opaque;
if (p != (RTreeScanOpaque) NULL) {
freestack(p->s_stack);
freestack(p->s_markstk);
}
rtdropscan(s);
/* XXX don't unset read lock -- two-phase locking */
}
static void
rtregscan(IndexScanDesc s)
{
RTScanList l;
l = (RTScanList) palloc(sizeof(RTScanListData));
l->rtsl_scan = s;
l->rtsl_next = RTScans;
RTScans = l;
}
static void
rtdropscan(IndexScanDesc s)
{
RTScanList l;
RTScanList prev;
prev = (RTScanList) NULL;
for (l = RTScans;
l != (RTScanList) NULL && l->rtsl_scan != s;
l = l->rtsl_next) {
prev = l;
}
if (l == (RTScanList) NULL)
elog(WARN, "rtree scan list corrupted -- cannot find 0x%lx", s);
if (prev == (RTScanList) NULL)
RTScans = l->rtsl_next;
else
prev->rtsl_next = l->rtsl_next;
pfree(l);
}
void
rtadjscans(Relation r, int op, BlockNumber blkno, OffsetNumber offnum)
{
RTScanList l;
Oid relid;
relid = r->rd_id;
for (l = RTScans; l != (RTScanList) NULL; l = l->rtsl_next) {
if (l->rtsl_scan->relation->rd_id == relid)
rtadjone(l->rtsl_scan, op, blkno, offnum);
}
}
/*
* rtadjone() -- adjust one scan for update.
*
* By here, the scan passed in is on a modified relation. Op tells
* us what the modification is, and blkno and offind tell us what
* block and offset index were affected. This routine checks the
* current and marked positions, and the current and marked stacks,
* to see if any stored location needs to be changed because of the
* update. If so, we make the change here.
*/
static void
rtadjone(IndexScanDesc s,
int op,
BlockNumber blkno,
OffsetNumber offnum)
{
RTreeScanOpaque so;
adjustiptr(s, &(s->currentItemData), op, blkno, offnum);
adjustiptr(s, &(s->currentMarkData), op, blkno, offnum);
so = (RTreeScanOpaque) s->opaque;
if (op == RTOP_SPLIT) {
adjuststack(so->s_stack, blkno, offnum);
adjuststack(so->s_markstk, blkno, offnum);
}
}
/*
* adjustiptr() -- adjust current and marked item pointers in the scan
*
* Depending on the type of update and the place it happened, we
* need to do nothing, to back up one record, or to start over on
* the same page.
*/
static void
adjustiptr(IndexScanDesc s,
ItemPointer iptr,
int op,
BlockNumber blkno,
OffsetNumber offnum)
{
OffsetNumber curoff;
RTreeScanOpaque so;
if (ItemPointerIsValid(iptr)) {
if (ItemPointerGetBlockNumber(iptr) == blkno) {
curoff = ItemPointerGetOffsetNumber(iptr);
so = (RTreeScanOpaque) s->opaque;
switch (op) {
case RTOP_DEL:
/* back up one if we need to */
if (curoff >= offnum) {
if (curoff > FirstOffsetNumber) {
/* just adjust the item pointer */
ItemPointerSet(iptr, blkno, OffsetNumberPrev(curoff));
} else {
/* remember that we're before the current tuple */
ItemPointerSet(iptr, blkno, FirstOffsetNumber);
if (iptr == &(s->currentItemData))
so->s_flags |= RTS_CURBEFORE;
else
so->s_flags |= RTS_MRKBEFORE;
}
}
break;
case RTOP_SPLIT:
/* back to start of page on split */
ItemPointerSet(iptr, blkno, FirstOffsetNumber);
if (iptr == &(s->currentItemData))
so->s_flags &= ~RTS_CURBEFORE;
else
so->s_flags &= ~RTS_MRKBEFORE;
break;
default:
elog(WARN, "Bad operation in rtree scan adjust: %d", op);
}
}
}
}
/*
* adjuststack() -- adjust the supplied stack for a split on a page in
* the index we're scanning.
*
* If a page on our parent stack has split, we need to back up to the
* beginning of the page and rescan it. The reason for this is that
* the split algorithm for rtrees doesn't order tuples in any useful
* way on a single page. This means on that a split, we may wind up
* looking at some heap tuples more than once. This is handled in the
* access method update code for heaps; if we've modified the tuple we
* are looking at already in this transaction, we ignore the update
* request.
*/
/*ARGSUSED*/
static void
adjuststack(RTSTACK *stk,
BlockNumber blkno,
OffsetNumber offnum)
{
while (stk != (RTSTACK *) NULL) {
if (stk->rts_blk == blkno)
stk->rts_child = FirstOffsetNumber;
stk = stk->rts_parent;
}
}

View File

@ -0,0 +1,239 @@
/*-------------------------------------------------------------------------
*
* rtstrat.c--
* strategy map data for rtrees.
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtstrat.c,v 1.1.1.1 1996/07/09 06:21:13 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#include "c.h"
#include "utils/rel.h"
#include "storage/bufmgr.h"
#include "storage/bufpage.h"
#include "access/istrat.h"
#include "access/rtree.h"
/*
* Note: negate, commute, and negatecommute all assume that operators are
* ordered as follows in the strategy map:
*
* left, left-or-overlap, overlap, right-or-overlap, right, same,
* contains, contained-by
*
* The negate, commute, and negatecommute arrays are used by the planner
* to plan indexed scans over data that appears in the qualificiation in
* a boolean negation, or whose operands appear in the wrong order. For
* example, if the operator "<%" means "contains", and the user says
*
* where not rel.box <% "(10,10,20,20)"::box
*
* the planner can plan an index scan by noting that rtree indices have
* an operator in their operator class for negating <%.
*
* Similarly, if the user says something like
*
* where "(10,10,20,20)"::box <% rel.box
*
* the planner can see that the rtree index on rel.box has an operator in
* its opclass for commuting <%, and plan the scan using that operator.
* This added complexity in the access methods makes the planner a lot easier
* to write.
*/
/* if a op b, what operator tells us if (not a op b)? */
static StrategyNumber RTNegate[RTNStrategies] = {
InvalidStrategy,
InvalidStrategy,
InvalidStrategy,
InvalidStrategy,
InvalidStrategy,
InvalidStrategy,
InvalidStrategy,
InvalidStrategy
};
/* if a op_1 b, what is the operator op_2 such that b op_2 a? */
static StrategyNumber RTCommute[RTNStrategies] = {
InvalidStrategy,
InvalidStrategy,
InvalidStrategy,
InvalidStrategy,
InvalidStrategy,
InvalidStrategy,
InvalidStrategy,
InvalidStrategy
};
/* if a op_1 b, what is the operator op_2 such that (b !op_2 a)? */
static StrategyNumber RTNegateCommute[RTNStrategies] = {
InvalidStrategy,
InvalidStrategy,
InvalidStrategy,
InvalidStrategy,
InvalidStrategy,
InvalidStrategy,
InvalidStrategy,
InvalidStrategy
};
/*
* Now do the TermData arrays. These exist in case the user doesn't give
* us a full set of operators for a particular operator class. The idea
* is that by making multiple comparisons using any one of the supplied
* operators, we can decide whether two n-dimensional polygons are equal.
* For example, if a contains b and b contains a, we may conclude that
* a and b are equal.
*
* The presence of the TermData arrays in all this is a historical accident.
* Early in the development of the POSTGRES access methods, it was believed
* that writing functions was harder than writing arrays. This is wrong;
* TermData is hard to understand and hard to get right. In general, when
* someone populates a new operator class, the populate it completely. If
* Mike Hirohama had forced Cimarron Taylor to populate the strategy map
* for btree int2_ops completely in 1988, you wouldn't have to deal with
* all this now. Too bad for you.
*
* Since you can't necessarily do this in all cases (for example, you can't
* do it given only "intersects" or "disjoint"), TermData arrays for some
* operators don't appear below.
*
* Note that if you DO supply all the operators required in a given opclass
* by inserting them into the pg_opclass system catalog, you can get away
* without doing all this TermData stuff. Since the rtree code is intended
* to be a reference for access method implementors, I'm doing TermData
* correctly here.
*
* Note on style: these are all actually of type StrategyTermData, but
* since those have variable-length data at the end of the struct we can't
* properly initialize them if we declare them to be what they are.
*/
/* if you only have "contained-by", how do you determine equality? */
static uint16 RTContainedByTermData[] = {
2, /* make two comparisons */
RTContainedByStrategyNumber, /* use "a contained-by b" */
0x0, /* without any magic */
RTContainedByStrategyNumber, /* then use contained-by, */
SK_COMMUTE /* swapping a and b */
};
/* if you only have "contains", how do you determine equality? */
static uint16 RTContainsTermData[] = {
2, /* make two comparisons */
RTContainsStrategyNumber, /* use "a contains b" */
0x0, /* without any magic */
RTContainsStrategyNumber, /* then use contains again, */
SK_COMMUTE /* swapping a and b */
};
/* now put all that together in one place for the planner */
static StrategyTerm RTEqualExpressionData[] = {
(StrategyTerm) RTContainedByTermData,
(StrategyTerm) RTContainsTermData,
NULL
};
/*
* If you were sufficiently attentive to detail, you would go through
* the ExpressionData pain above for every one of the seven strategies
* we defined. I am not. Now we declare the StrategyEvaluationData
* structure that gets shipped around to help the planner and the access
* method decide what sort of scan it should do, based on (a) what the
* user asked for, (b) what operators are defined for a particular opclass,
* and (c) the reams of information we supplied above.
*
* The idea of all of this initialized data is to make life easier on the
* user when he defines a new operator class to use this access method.
* By filling in all the data, we let him get away with leaving holes in his
* operator class, and still let him use the index. The added complexity
* in the access methods just isn't worth the trouble, though.
*/
static StrategyEvaluationData RTEvaluationData = {
RTNStrategies, /* # of strategies */
(StrategyTransformMap) RTNegate, /* how to do (not qual) */
(StrategyTransformMap) RTCommute, /* how to swap operands */
(StrategyTransformMap) RTNegateCommute, /* how to do both */
{
NULL, /* express left */
NULL, /* express overleft */
NULL, /* express over */
NULL, /* express overright */
NULL, /* express right */
(StrategyExpression) RTEqualExpressionData, /* express same */
NULL, /* express contains */
NULL, /* express contained-by */
NULL,
NULL,
NULL
}
};
/*
* Okay, now something peculiar to rtrees that doesn't apply to most other
* indexing structures: When we're searching a tree for a given value, we
* can't do the same sorts of comparisons on internal node entries as we
* do at leaves. The reason is that if we're looking for (say) all boxes
* that are the same as (0,0,10,10), then we need to find all leaf pages
* that overlap that region. So internally we search for overlap, and at
* the leaf we search for equality.
*
* This array maps leaf search operators to the internal search operators.
* We assume the normal ordering on operators:
*
* left, left-or-overlap, overlap, right-or-overlap, right, same,
* contains, contained-by
*/
static StrategyNumber RTOperMap[RTNStrategies] = {
RTOverLeftStrategyNumber,
RTOverLeftStrategyNumber,
RTOverlapStrategyNumber,
RTOverRightStrategyNumber,
RTOverRightStrategyNumber,
RTContainsStrategyNumber,
RTContainsStrategyNumber,
RTOverlapStrategyNumber
};
StrategyNumber
RelationGetRTStrategy(Relation r,
AttrNumber attnum,
RegProcedure proc)
{
return (RelationGetStrategy(r, attnum, &RTEvaluationData, proc));
}
bool
RelationInvokeRTStrategy(Relation r,
AttrNumber attnum,
StrategyNumber s,
Datum left,
Datum right)
{
return (RelationInvokeStrategy(r, &RTEvaluationData, attnum, s,
left, right));
}
RegProcedure
RTMapOperator(Relation r,
AttrNumber attnum,
RegProcedure proc)
{
StrategyNumber procstrat;
StrategyMap strategyMap;
procstrat = RelationGetRTStrategy(r, attnum, proc);
strategyMap = IndexStrategyGetStrategyMap(RelationGetIndexStrategy(r),
RTNStrategies,
attnum);
return (strategyMap->entry[RTOperMap[procstrat - 1] - 1].sk_procedure);
}

View File

@ -0,0 +1,17 @@
/*-------------------------------------------------------------------------
*
* rtscan.h--
* routines defined in access/rtree/rtscan.c
*
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: rtscan.h,v 1.1.1.1 1996/07/09 06:21:09 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef RTSCAN_H
void rtadjscans(Relation r, int op, BlockNumber blkno, OffsetNumber offnum);
#endif /* RTSCAN_H */

View File

@ -0,0 +1,18 @@
/*-------------------------------------------------------------------------
*
* rtstrat.h--
* routines defined in access/rtree/rtstrat.c
*
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: rtstrat.h,v 1.1.1.1 1996/07/09 06:21:09 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef RTSTRAT_H
extern RegProcedure RTMapOperator(Relation r, AttrNumber attnum,
RegProcedure proc);
#endif /* RTSTRAT_H */

57
src/backend/access/sdir.h Normal file
View File

@ -0,0 +1,57 @@
/*-------------------------------------------------------------------------
*
* sdir.h--
* POSTGRES scan direction definitions.
*
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: sdir.h,v 1.1.1.1 1996/07/09 06:21:09 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef SDIR_H
#define SDIR_H
#include "c.h"
/*
* ScanDirection was an int8 for no apparent reason. I kept the original
* values because I'm not sure if I'll break anything otherwise. -ay 2/95
*/
typedef enum ScanDirection {
BackwardScanDirection = -1,
NoMovementScanDirection = 0,
ForwardScanDirection = 1
} ScanDirection;
/*
* ScanDirectionIsValid --
* True iff scan direciton is valid.
*/
#define ScanDirectionIsValid(direction) \
((bool) (BackwardScanDirection <= direction && \
direction <= ForwardScanDirection))
/*
* ScanDirectionIsBackward --
* True iff scan direciton is backward.
*/
#define ScanDirectionIsBackward(direction) \
((bool) (direction == BackwardScanDirection))
/*
* ScanDirectionIsNoMovement --
* True iff scan direciton indicates no movement.
*/
#define ScanDirectionIsNoMovement(direction) \
((bool) (direction == NoMovementScanDirection))
/*
* ScanDirectionIsForward --
* True iff scan direciton is forward.
*/
#define ScanDirectionIsForward(direction) \
((bool) (direction == ForwardScanDirection))
#endif /* SDIR_H */

52
src/backend/access/skey.h Normal file
View File

@ -0,0 +1,52 @@
/*-------------------------------------------------------------------------
*
* skey.h--
* POSTGRES scan key definitions.
*
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: skey.h,v 1.1.1.1 1996/07/09 06:21:09 scrappy Exp $
*
*
* Note:
* Needs more accessor/assignment routines.
*-------------------------------------------------------------------------
*/
#ifndef SKEY_H
#define SKEY_H
#include "postgres.h"
#include "access/attnum.h"
typedef struct ScanKeyData {
bits16 sk_flags; /* flags */
AttrNumber sk_attno; /* domain number */
RegProcedure sk_procedure; /* procedure OID */
func_ptr sk_func;
int32 sk_nargs;
Datum sk_argument; /* data to compare */
} ScanKeyData;
typedef ScanKeyData *ScanKey;
#define SK_ISNULL 0x1
#define SK_UNARY 0x2
#define SK_NEGATE 0x4
#define SK_COMMUTE 0x8
#define ScanUnmarked 0x01
#define ScanUncheckedPrevious 0x02
#define ScanUncheckedNext 0x04
/*
* prototypes for functions in access/common/scankey.c
*/
extern void ScanKeyEntrySetIllegal(ScanKey entry);
extern void ScanKeyEntryInitialize(ScanKey entry, bits16 flags,
AttrNumber attributeNumber, RegProcedure procedure, Datum argument);
#endif /* SKEY_H */

View File

@ -0,0 +1,86 @@
/*-------------------------------------------------------------------------
*
* strat.h--
* index strategy type definitions
* (separated out from original istrat.h to avoid circular refs)
*
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: strat.h,v 1.1.1.1 1996/07/09 06:21:09 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef STRAT_H
#define STRAT_H
#include "postgres.h"
#include "access/attnum.h"
#include "access/skey.h"
typedef uint16 StrategyNumber;
#define InvalidStrategy 0
typedef struct StrategyTransformMapData {
StrategyNumber strategy[1]; /* VARIABLE LENGTH ARRAY */
} StrategyTransformMapData; /* VARIABLE LENGTH STRUCTURE */
typedef StrategyTransformMapData *StrategyTransformMap;
typedef struct StrategyOperatorData {
StrategyNumber strategy;
bits16 flags; /* scan qualification flags h/skey.h */
} StrategyOperatorData;
typedef StrategyOperatorData *StrategyOperator;
typedef struct StrategyTermData { /* conjunctive term */
uint16 degree;
StrategyOperatorData operatorData[1]; /* VARIABLE LENGTH */
} StrategyTermData; /* VARIABLE LENGTH STRUCTURE */
typedef StrategyTermData *StrategyTerm;
typedef struct StrategyExpressionData { /* disjunctive normal form */
StrategyTerm term[1]; /* VARIABLE LENGTH ARRAY */
} StrategyExpressionData; /* VARIABLE LENGTH STRUCTURE */
typedef StrategyExpressionData *StrategyExpression;
typedef struct StrategyEvaluationData {
StrategyNumber maxStrategy;
StrategyTransformMap negateTransform;
StrategyTransformMap commuteTransform;
StrategyTransformMap negateCommuteTransform;
StrategyExpression expression[12]; /* XXX VARIABLE LENGTH */
} StrategyEvaluationData; /* VARIABLE LENGTH STRUCTURE */
typedef StrategyEvaluationData *StrategyEvaluation;
/*
* StrategyTransformMapIsValid --
* Returns true iff strategy transformation map is valid.
*/
#define StrategyTransformMapIsValid(transform) PointerIsValid(transform)
#ifndef CorrectStrategies /* XXX this should be removable */
#define AMStrategies(foo) 12
#else /* !defined(CorrectStrategies) */
#define AMStrategies(foo) (foo)
#endif /* !defined(CorrectStrategies) */
typedef struct StrategyMapData {
ScanKeyData entry[1]; /* VARIABLE LENGTH ARRAY */
} StrategyMapData; /* VARIABLE LENGTH STRUCTURE */
typedef StrategyMapData *StrategyMap;
typedef struct IndexStrategyData {
StrategyMapData strategyMapData[1]; /* VARIABLE LENGTH ARRAY */
} IndexStrategyData; /* VARIABLE LENGTH STRUCTURE */
typedef IndexStrategyData *IndexStrategy;
#endif /*STRAT_H */

View File

@ -0,0 +1,213 @@
/*-------------------------------------------------------------------------
*
* transam.h--
* postgres transaction access method support code header
*
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: transam.h,v 1.1.1.1 1996/07/09 06:21:09 scrappy Exp $
*
* NOTES
* Transaction System Version 101 now support proper oid
* generation and recording in the variable relation.
*
*-------------------------------------------------------------------------
*/
#ifndef TRANSAM_H
#define TRANSAM_H
/* ----------------
* transaction system version id
*
* this is stored on the first page of the log, time and variable
* relations on the first 4 bytes. This is so that if we improve
* the format of the transaction log after postgres version 2, then
* people won't have to rebuild their databases.
*
* TRANS_SYSTEM_VERSION 100 means major version 1 minor version 0.
* Two databases with the same major version should be compatible,
* even if their minor versions differ.
* ----------------
*/
#define TRANS_SYSTEM_VERSION 101
/* ----------------
* transaction id status values
*
* someday we will use "11" = 3 = XID_INVALID to mean the
* starting of run-length encoded log data.
* ----------------
*/
#define XID_COMMIT 2 /* transaction commited */
#define XID_ABORT 1 /* transaction aborted */
#define XID_INPROGRESS 0 /* transaction in progress */
#define XID_INVALID 3 /* other */
typedef unsigned char XidStatus; /* (2 bits) */
/* ----------------
* BitIndexOf computes the index of the Nth xid on a given block
* ----------------
*/
#define BitIndexOf(N) ((N) * 2)
/* ----------------
* transaction page definitions
* ----------------
*/
#define TP_DataSize BLCKSZ
#define TP_NumXidStatusPerBlock (TP_DataSize * 4)
#define TP_NumTimePerBlock (TP_DataSize / 4)
/* ----------------
* LogRelationContents structure
*
* This structure describes the storage of the data in the
* first 128 bytes of the log relation. This storage is never
* used for transaction status because transaction id's begin
* their numbering at 512.
*
* The first 4 bytes of this relation store the version
* number of the transction system.
* ----------------
*/
typedef struct LogRelationContentsData {
int TransSystemVersion;
} LogRelationContentsData;
typedef LogRelationContentsData *LogRelationContents;
/* ----------------
* TimeRelationContents structure
*
* This structure describes the storage of the data in the
* first 2048 bytes of the time relation. This storage is never
* used for transaction commit times because transaction id's begin
* their numbering at 512.
*
* The first 4 bytes of this relation store the version
* number of the transction system.
* ----------------
*/
typedef struct TimeRelationContentsData {
int TransSystemVersion;
} TimeRelationContentsData;
typedef TimeRelationContentsData *TimeRelationContents;
/* ----------------
* VariableRelationContents structure
*
* The variable relation is a special "relation" which
* is used to store various system "variables" persistantly.
* Unlike other relations in the system, this relation
* is updated in place whenever the variables change.
*
* The first 4 bytes of this relation store the version
* number of the transction system.
*
* Currently, the relation has only one page and the next
* available xid, the last committed xid and the next
* available oid are stored there.
* ----------------
*/
typedef struct VariableRelationContentsData {
int TransSystemVersion;
TransactionId nextXidData;
TransactionId lastXidData;
Oid nextOid;
} VariableRelationContentsData;
typedef VariableRelationContentsData *VariableRelationContents;
/* ----------------
* extern declarations
* ----------------
*/
/*
* prototypes for functions in transam/transam.c
*/
extern int RecoveryCheckingEnabled();
extern void SetRecoveryCheckingEnabled(bool state);
extern bool TransactionLogTest(TransactionId transactionId, XidStatus status);
extern void TransactionLogUpdate(TransactionId transactionId,
XidStatus status);
extern AbsoluteTime TransactionIdGetCommitTime(TransactionId transactionId);
extern void TransRecover(Relation logRelation);
extern void InitializeTransactionLog();
extern bool TransactionIdDidCommit(TransactionId transactionId);
extern bool TransactionIdDidAbort(TransactionId transactionId);
extern bool TransactionIdIsInProgress(TransactionId transactionId);
extern void TransactionIdCommit(TransactionId transactionId);
extern void TransactionIdAbort(TransactionId transactionId);
extern void TransactionIdSetInProgress(TransactionId transactionId);
/* in transam/transsup.c */
extern void AmiTransactionOverride(bool flag);
extern void TransComputeBlockNumber(Relation relation,
TransactionId transactionId, BlockNumber *blockNumberOutP);
extern XidStatus TransBlockGetLastTransactionIdStatus(Block tblock,
TransactionId baseXid, TransactionId *returnXidP);
extern XidStatus TransBlockGetXidStatus(Block tblock,
TransactionId transactionId);
extern void TransBlockSetXidStatus(Block tblock,
TransactionId transactionId, XidStatus xstatus);
extern AbsoluteTime TransBlockGetCommitTime(Block tblock,
TransactionId transactionId);
extern void TransBlockSetCommitTime(Block tblock,
TransactionId transactionId, AbsoluteTime commitTime);
extern XidStatus TransBlockNumberGetXidStatus(Relation relation,
BlockNumber blockNumber, TransactionId xid, bool *failP);
extern void TransBlockNumberSetXidStatus(Relation relation,
BlockNumber blockNumber, TransactionId xid, XidStatus xstatus,
bool *failP);
extern AbsoluteTime TransBlockNumberGetCommitTime(Relation relation,
BlockNumber blockNumber, TransactionId xid, bool *failP);
extern void TransBlockNumberSetCommitTime(Relation relation,
BlockNumber blockNumber, TransactionId xid, AbsoluteTime xtime,
bool *failP);
extern void TransGetLastRecordedTransaction(Relation relation,
TransactionId xid, bool *failP);
/* in transam/varsup.c */
extern void VariableRelationGetNextXid(TransactionId *xidP);
extern void VariableRelationGetLastXid(TransactionId *xidP);
extern void VariableRelationPutNextXid(TransactionId xid);
extern void VariableRelationPutLastXid(TransactionId xid);
extern void VariableRelationGetNextOid(Oid *oid_return);
extern void VariableRelationPutNextOid(Oid *oidP);
extern void GetNewTransactionId(TransactionId *xid);
extern void UpdateLastCommittedXid(TransactionId xid);
extern void GetNewObjectIdBlock(Oid *oid_return, int oid_block_size);
extern void GetNewObjectId(Oid *oid_return);
/* ----------------
* global variable extern declarations
* ----------------
*/
/* in transam.c */
extern Relation LogRelation;
extern Relation TimeRelation;
extern Relation VariableRelation;
extern TransactionId cachedGetCommitTimeXid;
extern AbsoluteTime cachedGetCommitTime;
extern TransactionId cachedTestXid;
extern XidStatus cachedTestXidStatus;
extern TransactionId NullTransactionId;
extern TransactionId AmiTransactionId;
extern TransactionId FirstTransactionId;
extern int RecoveryCheckingEnableState;
/* in transsup.c */
extern bool AMI_OVERRIDE;
/* in varsup.c */
extern int OidGenLockId;
#endif /* TRAMSAM_H */

View File

@ -0,0 +1,14 @@
#-------------------------------------------------------------------------
#
# Makefile.inc--
# Makefile for access/transam
#
# Copyright (c) 1994, Regents of the University of California
#
#
# IDENTIFICATION
# $Header: /cvsroot/pgsql/src/backend/access/transam/Attic/Makefile.inc,v 1.1.1.1 1996/07/09 06:21:13 scrappy Exp $
#
#-------------------------------------------------------------------------
SUBSRCS+= transam.c transsup.c varsup.c xact.c xid.c

View File

@ -0,0 +1,675 @@
/*-------------------------------------------------------------------------
*
* transam.c--
* postgres transaction log/time interface routines
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/transam/transam.c,v 1.1.1.1 1996/07/09 06:21:13 scrappy Exp $
*
* NOTES
* This file contains the high level access-method interface to the
* transaction system.
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "machine.h" /* in port/ directory (needed for BLCKSZ) */
#include "access/heapam.h"
#include "storage/buf.h"
#include "storage/bufmgr.h"
#include "utils/memutils.h"
#include "utils/mcxt.h"
#include "utils/rel.h"
#include "utils/elog.h"
#include "utils/nabstime.h"
#include "catalog/catname.h"
#include "access/transam.h"
#include "access/xact.h"
#include "commands/vacuum.h" /* for VacuumRunning */
/* ----------------
* global variables holding pointers to relations used
* by the transaction system. These are initialized by
* InitializeTransactionLog().
* ----------------
*/
Relation LogRelation = (Relation) NULL;
Relation TimeRelation = (Relation) NULL;
Relation VariableRelation = (Relation) NULL;
/* ----------------
* global variables holding cached transaction id's and statuses.
* ----------------
*/
TransactionId cachedGetCommitTimeXid;
AbsoluteTime cachedGetCommitTime;
TransactionId cachedTestXid;
XidStatus cachedTestXidStatus;
/* ----------------
* transaction system constants
* ----------------
*/
/* ----------------------------------------------------------------
* transaction system constants
*
* read the comments for GetNewTransactionId in order to
* understand the initial values for AmiTransactionId and
* FirstTransactionId. -cim 3/23/90
* ----------------------------------------------------------------
*/
TransactionId NullTransactionId = (TransactionId) 0;
TransactionId AmiTransactionId = (TransactionId) 512;
TransactionId FirstTransactionId = (TransactionId) 514;
/* ----------------
* transaction recovery state variables
*
* When the transaction system is initialized, we may
* need to do recovery checking. This decision is decided
* by the postmaster or the user by supplying the backend
* with a special flag. In general, we want to do recovery
* checking whenever we are running without a postmaster
* or when the number of backends running under the postmaster
* goes from zero to one. -cim 3/21/90
* ----------------
*/
int RecoveryCheckingEnableState = 0;
/* ------------------
* spinlock for oid generation
* -----------------
*/
extern int OidGenLockId;
/* ----------------
* globals that must be reset at abort
* ----------------
*/
extern bool BuildingBtree;
/* ----------------
* recovery checking accessors
* ----------------
*/
int
RecoveryCheckingEnabled()
{
return RecoveryCheckingEnableState;
}
void
SetRecoveryCheckingEnabled(bool state)
{
RecoveryCheckingEnableState = (state == true);
}
/* ----------------------------------------------------------------
* postgres log/time access method interface
*
* TransactionLogTest
* TransactionLogUpdate
* ========
* these functions do work for the interface
* functions - they search/retrieve and append/update
* information in the log and time relations.
* ----------------------------------------------------------------
*/
/* --------------------------------
* TransactionLogTest
* --------------------------------
*/
bool /* true/false: does transaction id have specified status? */
TransactionLogTest(TransactionId transactionId, /* transaction id to test */
XidStatus status) /* transaction status */
{
BlockNumber blockNumber;
XidStatus xidstatus; /* recorded status of xid */
bool fail = false; /* success/failure */
/* ----------------
* during initialization consider all transactions
* as having been committed
* ----------------
*/
if (! RelationIsValid(LogRelation))
return (bool) (status == XID_COMMIT);
/* ----------------
* before going to the buffer manager, check our single
* item cache to see if we didn't just check the transaction
* status a moment ago.
* ----------------
*/
if (TransactionIdEquals(transactionId, cachedTestXid))
return (bool)
(status == cachedTestXidStatus);
/* ----------------
* compute the item pointer corresponding to the
* page containing our transaction id. We save the item in
* our cache to speed up things if we happen to ask for the
* same xid's status more than once.
* ----------------
*/
TransComputeBlockNumber(LogRelation, transactionId, &blockNumber);
xidstatus = TransBlockNumberGetXidStatus(LogRelation,
blockNumber,
transactionId,
&fail);
if (! fail) {
TransactionIdStore(transactionId, &cachedTestXid);
cachedTestXidStatus = xidstatus;
return (bool)
(status == xidstatus);
}
/* ----------------
* here the block didn't contain the information we wanted
* ----------------
*/
elog(WARN, "TransactionLogTest: failed to get xidstatus");
/*
* so lint is happy...
*/
return(false);
}
/* --------------------------------
* TransactionLogUpdate
* --------------------------------
*/
void
TransactionLogUpdate(TransactionId transactionId, /* trans id to update */
XidStatus status) /* new trans status */
{
BlockNumber blockNumber;
bool fail = false; /* success/failure */
AbsoluteTime currentTime; /* time of this transaction */
/* ----------------
* during initialization we don't record any updates.
* ----------------
*/
if (! RelationIsValid(LogRelation))
return;
/* ----------------
* get the transaction commit time
* ----------------
*/
currentTime = getSystemTime();
/* ----------------
* update the log relation
* ----------------
*/
TransComputeBlockNumber(LogRelation, transactionId, &blockNumber);
TransBlockNumberSetXidStatus(LogRelation,
blockNumber,
transactionId,
status,
&fail);
/* ----------------
* update (invalidate) our single item TransactionLogTest cache.
* ----------------
*/
TransactionIdStore(transactionId, &cachedTestXid);
cachedTestXidStatus = status;
/* ----------------
* now we update the time relation, if necessary
* (we only record commit times)
* ----------------
*/
if (RelationIsValid(TimeRelation) && status == XID_COMMIT) {
TransComputeBlockNumber(TimeRelation, transactionId, &blockNumber);
TransBlockNumberSetCommitTime(TimeRelation,
blockNumber,
transactionId,
currentTime,
&fail);
/* ----------------
* update (invalidate) our single item GetCommitTime cache.
* ----------------
*/
TransactionIdStore(transactionId, &cachedGetCommitTimeXid);
cachedGetCommitTime = currentTime;
}
/* ----------------
* now we update the "last committed transaction" field
* in the variable relation if we are recording a commit.
* ----------------
*/
if (RelationIsValid(VariableRelation) && status == XID_COMMIT)
UpdateLastCommittedXid(transactionId);
}
/* --------------------------------
* TransactionIdGetCommitTime
* --------------------------------
*/
AbsoluteTime /* commit time of transaction id */
TransactionIdGetCommitTime(TransactionId transactionId) /* transaction id to test */
{
BlockNumber blockNumber;
AbsoluteTime commitTime; /* commit time */
bool fail = false; /* success/failure */
/* ----------------
* return invalid if we aren't running yet...
* ----------------
*/
if (! RelationIsValid(TimeRelation))
return INVALID_ABSTIME;
/* ----------------
* before going to the buffer manager, check our single
* item cache to see if we didn't just get the commit time
* a moment ago.
* ----------------
*/
if (TransactionIdEquals(transactionId, cachedGetCommitTimeXid))
return cachedGetCommitTime;
/* ----------------
* compute the item pointer corresponding to the
* page containing our transaction commit time
* ----------------
*/
TransComputeBlockNumber(TimeRelation, transactionId, &blockNumber);
commitTime = TransBlockNumberGetCommitTime(TimeRelation,
blockNumber,
transactionId,
&fail);
/* ----------------
* update our cache and return the transaction commit time
* ----------------
*/
if (! fail) {
TransactionIdStore(transactionId, &cachedGetCommitTimeXid);
cachedGetCommitTime = commitTime;
return commitTime;
} else
return INVALID_ABSTIME;
}
/* ----------------------------------------------------------------
* transaction recovery code
* ----------------------------------------------------------------
*/
/* --------------------------------
* TransRecover
*
* preform transaction recovery checking.
*
* Note: this should only be preformed if no other backends
* are running. This is known by the postmaster and
* conveyed by the postmaster passing a "do recovery checking"
* flag to the backend.
*
* here we get the last recorded transaction from the log,
* get the "last" and "next" transactions from the variable relation
* and then preform some integrity tests:
*
* 1) No transaction may exist higher then the "next" available
* transaction recorded in the variable relation. If this is the
* case then it means either the log or the variable relation
* has become corrupted.
*
* 2) The last committed transaction may not be higher then the
* next available transaction for the same reason.
*
* 3) The last recorded transaction may not be lower then the
* last committed transaction. (the reverse is ok - it means
* that some transactions have aborted since the last commit)
*
* Here is what the proper situation looks like. The line
* represents the data stored in the log. 'c' indicates the
* transaction was recorded as committed, 'a' indicates an
* abortted transaction and '.' represents information not
* recorded. These may correspond to in progress transactions.
*
* c c a c . . a . . . . . . . . . .
* | |
* last next
*
* Since "next" is only incremented by GetNewTransactionId() which
* is called when transactions are started. Hence if there
* are commits or aborts after "next", then it means we committed
* or aborted BEFORE we started the transaction. This is the
* rational behind constraint (1).
*
* Likewise, "last" should never greater then "next" for essentially
* the same reason - it would imply we committed before we started.
* This is the reasoning for (2).
*
* (3) implies we may never have a situation such as:
*
* c c a c . . a c . . . . . . . . .
* | |
* last next
*
* where there is a 'c' greater then "last".
*
* Recovery checking is more difficult in the case where
* several backends are executing concurrently because the
* transactions may be executing in the other backends.
* So, we only do recovery stuff when the backend is explicitly
* passed a flag on the command line.
* --------------------------------
*/
void
TransRecover(Relation logRelation)
{
#if 0
/* ----------------
* first get the last recorded transaction in the log.
* ----------------
*/
TransGetLastRecordedTransaction(logRelation, logLastXid, &fail);
if (fail == true)
elog(WARN, "TransRecover: failed TransGetLastRecordedTransaction");
/* ----------------
* next get the "last" and "next" variables
* ----------------
*/
VariableRelationGetLastXid(&varLastXid);
VariableRelationGetNextXid(&varNextXid);
/* ----------------
* intregity test (1)
* ----------------
*/
if (TransactionIdIsLessThan(varNextXid, logLastXid))
elog(WARN, "TransRecover: varNextXid < logLastXid");
/* ----------------
* intregity test (2)
* ----------------
*/
/* ----------------
* intregity test (3)
* ----------------
*/
/* ----------------
* here we have a valid "
*
* **** RESUME HERE ****
* ----------------
*/
varNextXid = TransactionIdDup(varLastXid);
TransactionIdIncrement(&varNextXid);
VarPut(var, VAR_PUT_LASTXID, varLastXid);
VarPut(var, VAR_PUT_NEXTXID, varNextXid);
#endif
}
/* ----------------------------------------------------------------
* Interface functions
*
* InitializeTransactionLog
* ========
* this function (called near cinit) initializes
* the transaction log, time and variable relations.
*
* TransactionId DidCommit
* TransactionId DidAbort
* TransactionId IsInProgress
* ========
* these functions test the transaction status of
* a specified transaction id.
*
* TransactionId Commit
* TransactionId Abort
* TransactionId SetInProgress
* ========
* these functions set the transaction status
* of the specified xid. TransactionIdCommit() also
* records the current time in the time relation
* and updates the variable relation counter.
*
* ----------------------------------------------------------------
*/
/*
* InitializeTransactionLog --
* Initializes transaction logging.
*/
void
InitializeTransactionLog()
{
Relation logRelation;
Relation timeRelation;
MemoryContext oldContext;
/* ----------------
* don't do anything during bootstrapping
* ----------------
*/
if (AMI_OVERRIDE)
return;
/* ----------------
* disable the transaction system so the access methods
* don't interfere during initialization.
* ----------------
*/
OverrideTransactionSystem(true);
/* ----------------
* make sure allocations occur within the top memory context
* so that our log management structures are protected from
* garbage collection at the end of every transaction.
* ----------------
*/
oldContext = MemoryContextSwitchTo(TopMemoryContext);
/* ----------------
* first open the log and time relations
* (these are created by amiint so they are guaranteed to exist)
* ----------------
*/
logRelation = heap_openr(LogRelationName);
timeRelation = heap_openr(TimeRelationName);
VariableRelation = heap_openr(VariableRelationName);
/* ----------------
* XXX TransactionLogUpdate requires that LogRelation
* and TimeRelation are valid so we temporarily set
* them so we can initialize things properly.
* This could be done cleaner.
* ----------------
*/
LogRelation = logRelation;
TimeRelation = timeRelation;
/* ----------------
* if we have a virgin database, we initialize the log and time
* relation by committing the AmiTransactionId (id 512) and we
* initialize the variable relation by setting the next available
* transaction id to FirstTransactionId (id 514). OID initialization
* happens as a side effect of bootstrapping in varsup.c.
* ----------------
*/
SpinAcquire(OidGenLockId);
if (!TransactionIdDidCommit(AmiTransactionId)) {
/* ----------------
* SOMEDAY initialize the information stored in
* the headers of the log/time/variable relations.
* ----------------
*/
TransactionLogUpdate(AmiTransactionId, XID_COMMIT);
VariableRelationPutNextXid(FirstTransactionId);
} else if (RecoveryCheckingEnabled()) {
/* ----------------
* if we have a pre-initialized database and if the
* perform recovery checking flag was passed then we
* do our database integrity checking.
* ----------------
*/
TransRecover(logRelation);
}
LogRelation = (Relation) NULL;
TimeRelation = (Relation) NULL;
SpinRelease(OidGenLockId);
/* ----------------
* now re-enable the transaction system
* ----------------
*/
OverrideTransactionSystem(false);
/* ----------------
* instantiate the global variables
* ----------------
*/
LogRelation = logRelation;
TimeRelation = timeRelation;
/* ----------------
* restore the memory context to the previous context
* before we return from initialization.
* ----------------
*/
MemoryContextSwitchTo(oldContext);
}
/* --------------------------------
* TransactionId DidCommit
* TransactionId DidAbort
* TransactionId IsInProgress
* --------------------------------
*/
/*
* TransactionIdDidCommit --
* True iff transaction associated with the identifier did commit.
*
* Note:
* Assumes transaction identifier is valid.
*/
bool /* true if given transaction committed */
TransactionIdDidCommit(TransactionId transactionId)
{
if (AMI_OVERRIDE)
return true;
return
TransactionLogTest(transactionId, XID_COMMIT);
}
/*
* TransactionIdDidAborted --
* True iff transaction associated with the identifier did abort.
*
* Note:
* Assumes transaction identifier is valid.
* XXX Is this unneeded?
*/
bool /* true if given transaction aborted */
TransactionIdDidAbort(TransactionId transactionId)
{
if (AMI_OVERRIDE)
return false;
return
TransactionLogTest(transactionId, XID_ABORT);
}
bool /* true if given transaction neither committed nor aborted */
TransactionIdIsInProgress(TransactionId transactionId)
{
if (AMI_OVERRIDE)
return false;
return
TransactionLogTest(transactionId, XID_INPROGRESS);
}
/* --------------------------------
* TransactionId Commit
* TransactionId Abort
* TransactionId SetInProgress
* --------------------------------
*/
/*
* TransactionIdCommit --
* Commits the transaction associated with the identifier.
*
* Note:
* Assumes transaction identifier is valid.
*/
void
TransactionIdCommit(TransactionId transactionId)
{
if (AMI_OVERRIDE)
return;
/*
* Within TransactionLogUpdate we call UpdateLastCommited()
* which assumes we have exclusive access to pg_variable.
* Therefore we need to get exclusive access before calling
* TransactionLogUpdate. -mer 18 Aug 1992
*/
SpinAcquire(OidGenLockId);
TransactionLogUpdate(transactionId, XID_COMMIT);
SpinRelease(OidGenLockId);
}
/*
* TransactionIdAbort --
* Aborts the transaction associated with the identifier.
*
* Note:
* Assumes transaction identifier is valid.
*/
void
TransactionIdAbort(TransactionId transactionId)
{
BuildingBtree = false;
if (VacuumRunning)
vc_abort();
if (AMI_OVERRIDE)
return;
TransactionLogUpdate(transactionId, XID_ABORT);
}
void
TransactionIdSetInProgress(TransactionId transactionId)
{
if (AMI_OVERRIDE)
return;
TransactionLogUpdate(transactionId, XID_INPROGRESS);
}

View File

@ -0,0 +1,663 @@
/*-------------------------------------------------------------------------
*
* transsup.c--
* postgres transaction access method support code
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/transam/Attic/transsup.c,v 1.1.1.1 1996/07/09 06:21:13 scrappy Exp $
*
* NOTES
* This file contains support functions for the high
* level access method interface routines found in transam.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "machine.h" /* in port/ directory (needed for BLCKSZ) */
#include "storage/buf.h"
#include "storage/bufmgr.h"
#include "utils/rel.h"
#include "utils/elog.h"
#include "utils/memutils.h"
#include "utils/nabstime.h"
#include "catalog/heap.h"
#include "access/transam.h" /* where the declarations go */
#include "access/xact.h" /* where the declarations go */
#include "storage/smgr.h"
/* ----------------------------------------------------------------
* general support routines
* ----------------------------------------------------------------
*/
/* --------------------------------
* AmiTransactionOverride
*
* This function is used to manipulate the bootstrap flag.
* --------------------------------
*/
void
AmiTransactionOverride(bool flag)
{
AMI_OVERRIDE = flag;
}
/* --------------------------------
* TransComputeBlockNumber
* --------------------------------
*/
void
TransComputeBlockNumber(Relation relation, /* relation to test */
TransactionId transactionId, /* transaction id to test */
BlockNumber *blockNumberOutP)
{
long itemsPerBlock;
/* ----------------
* we calculate the block number of our transaction
* by dividing the transaction id by the number of
* transaction things per block.
* ----------------
*/
if (relation == LogRelation)
itemsPerBlock = TP_NumXidStatusPerBlock;
else if (relation == TimeRelation)
itemsPerBlock = TP_NumTimePerBlock;
else
elog(WARN, "TransComputeBlockNumber: unknown relation");
/* ----------------
* warning! if the transaction id's get too large
* then a BlockNumber may not be large enough to hold the results
* of our division.
*
* XXX this will all vanish soon when we implement an improved
* transaction id schema -cim 3/23/90
*
* This has vanished now that xid's are 4 bytes (no longer 5).
* -mer 5/24/92
* ----------------
*/
(*blockNumberOutP) = transactionId / itemsPerBlock;
}
/* ----------------------------------------------------------------
* trans block support routines
* ----------------------------------------------------------------
*/
/* --------------------------------
* TransBlockGetLastTransactionIdStatus
*
* This returns the status and transaction id of the last
* transaction information recorded on the given TransBlock.
* --------------------------------
*/
XidStatus
TransBlockGetLastTransactionIdStatus(Block tblock,
TransactionId baseXid,
TransactionId *returnXidP)
{
Index index;
Index maxIndex;
bits8 bit1;
bits8 bit2;
BitIndex offset;
XidStatus xstatus;
/* ----------------
* sanity check
* ----------------
*/
Assert((tblock != NULL));
/* ----------------
* search downward from the top of the block data, looking
* for the first Non-in progress transaction status. Since we
* are scanning backward, this will be last recorded transaction
* status on the block.
* ----------------
*/
maxIndex = TP_NumXidStatusPerBlock;
for (index = maxIndex-1; index>=0; index--) {
offset = BitIndexOf(index);
bit1 = ((bits8) BitArrayBitIsSet((BitArray) tblock, offset++)) << 1;
bit2 = (bits8) BitArrayBitIsSet((BitArray) tblock, offset);
xstatus = (bit1 | bit2) ;
/* ----------------
* here we have the status of some transaction, so test
* if the status is recorded as "in progress". If so, then
* we save the transaction id in the place specified by the caller.
* ----------------
*/
if (xstatus != XID_INPROGRESS) {
if (returnXidP != NULL) {
TransactionIdStore(baseXid, returnXidP);
TransactionIdAdd(returnXidP, index);
}
break;
}
}
/* ----------------
* if we get here and index is 0 it means we couldn't find
* a non-inprogress transaction on the block. For now we just
* return this info to the user. They can check if the return
* status is "in progress" to know this condition has arisen.
* ----------------
*/
if (index == 0) {
if (returnXidP != NULL)
TransactionIdStore(baseXid, returnXidP);
}
/* ----------------
* return the status to the user
* ----------------
*/
return xstatus;
}
/* --------------------------------
* TransBlockGetXidStatus
*
* This returns the status of the desired transaction
* --------------------------------
*/
XidStatus
TransBlockGetXidStatus(Block tblock,
TransactionId transactionId)
{
Index index;
bits8 bit1;
bits8 bit2;
BitIndex offset;
/* ----------------
* sanity check
* ----------------
*/
if (tblock == NULL) {
return XID_INVALID;
}
/* ----------------
* calculate the index into the transaction data where
* our transaction status is located
*
* XXX this will be replaced soon when we move to the
* new transaction id scheme -cim 3/23/90
*
* The old system has now been replaced. -mer 5/24/92
* ----------------
*/
index = transactionId % TP_NumXidStatusPerBlock;
/* ----------------
* get the data at the specified index
* ----------------
*/
offset = BitIndexOf(index);
bit1 = ((bits8) BitArrayBitIsSet((BitArray) tblock, offset++)) << 1;
bit2 = (bits8) BitArrayBitIsSet((BitArray) tblock, offset);
/* ----------------
* return the transaction status to the caller
* ----------------
*/
return (XidStatus)
(bit1 | bit2);
}
/* --------------------------------
* TransBlockSetXidStatus
*
* This sets the status of the desired transaction
* --------------------------------
*/
void
TransBlockSetXidStatus(Block tblock,
TransactionId transactionId,
XidStatus xstatus)
{
Index index;
BitIndex offset;
/* ----------------
* sanity check
* ----------------
*/
if (tblock == NULL)
return;
/* ----------------
* calculate the index into the transaction data where
* we sould store our transaction status.
*
* XXX this will be replaced soon when we move to the
* new transaction id scheme -cim 3/23/90
*
* The new scheme is here -mer 5/24/92
* ----------------
*/
index = transactionId % TP_NumXidStatusPerBlock;
offset = BitIndexOf(index);
/* ----------------
* store the transaction value at the specified offset
* ----------------
*/
switch(xstatus) {
case XID_COMMIT: /* set 10 */
BitArraySetBit((BitArray) tblock, offset);
BitArrayClearBit((BitArray) tblock, offset + 1);
break;
case XID_ABORT: /* set 01 */
BitArrayClearBit((BitArray) tblock, offset);
BitArraySetBit((BitArray) tblock, offset + 1);
break;
case XID_INPROGRESS: /* set 00 */
BitArrayClearBit((BitArray) tblock, offset);
BitArrayClearBit((BitArray) tblock, offset + 1);
break;
default:
elog(NOTICE,
"TransBlockSetXidStatus: invalid status: %d (ignored)",
xstatus);
break;
}
}
/* --------------------------------
* TransBlockGetCommitTime
*
* This returns the transaction commit time for the
* specified transaction id in the trans block.
* --------------------------------
*/
AbsoluteTime
TransBlockGetCommitTime(Block tblock,
TransactionId transactionId)
{
Index index;
AbsoluteTime *timeArray;
/* ----------------
* sanity check
* ----------------
*/
if (tblock == NULL)
return INVALID_ABSTIME;
/* ----------------
* calculate the index into the transaction data where
* our transaction commit time is located
*
* XXX this will be replaced soon when we move to the
* new transaction id scheme -cim 3/23/90
*
* The new scheme is here. -mer 5/24/92
* ----------------
*/
index = transactionId % TP_NumTimePerBlock;
/* ----------------
* return the commit time to the caller
* ----------------
*/
timeArray = (AbsoluteTime *) tblock;
return (AbsoluteTime)
timeArray[ index ];
}
/* --------------------------------
* TransBlockSetCommitTime
*
* This sets the commit time of the specified transaction
* --------------------------------
*/
void
TransBlockSetCommitTime(Block tblock,
TransactionId transactionId,
AbsoluteTime commitTime)
{
Index index;
AbsoluteTime *timeArray;
/* ----------------
* sanity check
* ----------------
*/
if (tblock == NULL)
return;
/* ----------------
* calculate the index into the transaction data where
* we sould store our transaction status.
*
* XXX this will be replaced soon when we move to the
* new transaction id scheme -cim 3/23/90
*
* The new scheme is here. -mer 5/24/92
* ----------------
*/
index = transactionId % TP_NumTimePerBlock;
/* ----------------
* store the transaction commit time at the specified index
* ----------------
*/
timeArray = (AbsoluteTime *) tblock;
timeArray[ index ] = commitTime;
}
/* ----------------------------------------------------------------
* transam i/o support routines
* ----------------------------------------------------------------
*/
/* --------------------------------
* TransBlockNumberGetXidStatus
* --------------------------------
*/
XidStatus
TransBlockNumberGetXidStatus(Relation relation,
BlockNumber blockNumber,
TransactionId xid,
bool *failP)
{
Buffer buffer; /* buffer associated with block */
Block block; /* block containing xstatus */
XidStatus xstatus; /* recorded status of xid */
bool localfail; /* bool used if failP = NULL */
/* ----------------
* SOMEDAY place a read lock on the log relation
* That someday is today 5 Aug 1991 -mer
* ----------------
*/
RelationSetLockForRead(relation);
/* ----------------
* get the page containing the transaction information
* ----------------
*/
buffer = ReadBuffer(relation, blockNumber);
block = BufferGetBlock(buffer);
/* ----------------
* get the status from the block. note, for now we always
* return false in failP.
* ----------------
*/
if (failP == NULL)
failP = &localfail;
(*failP) = false;
xstatus = TransBlockGetXidStatus(block, xid);
/* ----------------
* release the buffer and return the status
* ----------------
*/
ReleaseBuffer(buffer);
/* ----------------
* SOMEDAY release our lock on the log relation
* ----------------
*/
RelationUnsetLockForRead(relation);
return
xstatus;
}
/* --------------------------------
* TransBlockNumberSetXidStatus
* --------------------------------
*/
void
TransBlockNumberSetXidStatus(Relation relation,
BlockNumber blockNumber,
TransactionId xid,
XidStatus xstatus,
bool *failP)
{
Buffer buffer; /* buffer associated with block */
Block block; /* block containing xstatus */
bool localfail; /* bool used if failP = NULL */
/* ----------------
* SOMEDAY gain exclusive access to the log relation
*
* That someday is today 5 Aug 1991 -mer
* ----------------
*/
RelationSetLockForWrite(relation);
/* ----------------
* get the block containing the transaction status
* ----------------
*/
buffer = ReadBuffer(relation, blockNumber);
block = BufferGetBlock(buffer);
/* ----------------
* attempt to update the status of the transaction on the block.
* if we are successful, write the block. otherwise release the buffer.
* note, for now we always return false in failP.
* ----------------
*/
if (failP == NULL)
failP = &localfail;
(*failP) = false;
TransBlockSetXidStatus(block, xid, xstatus);
if ((*failP) == false)
WriteBuffer(buffer);
else
ReleaseBuffer(buffer);
/* ----------------
* SOMEDAY release our lock on the log relation
* ----------------
*/
RelationUnsetLockForWrite(relation);
}
/* --------------------------------
* TransBlockNumberGetCommitTime
* --------------------------------
*/
AbsoluteTime
TransBlockNumberGetCommitTime(Relation relation,
BlockNumber blockNumber,
TransactionId xid,
bool *failP)
{
Buffer buffer; /* buffer associated with block */
Block block; /* block containing commit time */
bool localfail; /* bool used if failP = NULL */
AbsoluteTime xtime; /* commit time */
/* ----------------
* SOMEDAY place a read lock on the time relation
*
* That someday is today 5 Aug. 1991 -mer
* ----------------
*/
RelationSetLockForRead(relation);
/* ----------------
* get the block containing the transaction information
* ----------------
*/
buffer = ReadBuffer(relation, blockNumber);
block = BufferGetBlock(buffer);
/* ----------------
* get the commit time from the block
* note, for now we always return false in failP.
* ----------------
*/
if (failP == NULL)
failP = &localfail;
(*failP) = false;
xtime = TransBlockGetCommitTime(block, xid);
/* ----------------
* release the buffer and return the commit time
* ----------------
*/
ReleaseBuffer(buffer);
/* ----------------
* SOMEDAY release our lock on the time relation
* ----------------
*/
RelationUnsetLockForRead(relation);
if ((*failP) == false)
return xtime;
else
return INVALID_ABSTIME;
}
/* --------------------------------
* TransBlockNumberSetCommitTime
* --------------------------------
*/
void
TransBlockNumberSetCommitTime(Relation relation,
BlockNumber blockNumber,
TransactionId xid,
AbsoluteTime xtime,
bool *failP)
{
Buffer buffer; /* buffer associated with block */
Block block; /* block containing commit time */
bool localfail; /* bool used if failP = NULL */
/* ----------------
* SOMEDAY gain exclusive access to the time relation
*
* That someday is today 5 Aug. 1991 -mer
* ----------------
*/
RelationSetLockForWrite(relation);
/* ----------------
* get the block containing our commit time
* ----------------
*/
buffer = ReadBuffer(relation, blockNumber);
block = BufferGetBlock(buffer);
/* ----------------
* attempt to update the commit time of the transaction on the block.
* if we are successful, write the block. otherwise release the buffer.
* note, for now we always return false in failP.
* ----------------
*/
if (failP == NULL)
failP = &localfail;
(*failP) = false;
TransBlockSetCommitTime(block, xid, xtime);
if ((*failP) == false)
WriteBuffer(buffer);
else
ReleaseBuffer(buffer);
/* ----------------
* SOMEDAY release our lock on the time relation
* ----------------
*/
RelationUnsetLockForWrite(relation);
}
/* --------------------------------
* TransGetLastRecordedTransaction
* --------------------------------
*/
void
TransGetLastRecordedTransaction(Relation relation,
TransactionId xid, /* return: transaction id */
bool *failP)
{
BlockNumber blockNumber; /* block number */
Buffer buffer; /* buffer associated with block */
Block block; /* block containing xid status */
BlockNumber n; /* number of blocks in the relation */
TransactionId baseXid;
(*failP) = false;
/* ----------------
* SOMEDAY gain exclusive access to the log relation
*
* That someday is today 5 Aug. 1991 -mer
* It looks to me like we only need to set a read lock here, despite
* the above comment about exclusive access. The block is never
* actually written into, we only check status bits.
* ----------------
*/
RelationSetLockForRead(relation);
/* ----------------
* we assume the last block of the log contains the last
* recorded transaction. If the relation is empty we return
* failure to the user.
* ----------------
*/
n = RelationGetNumberOfBlocks(relation);
if (n == 0) {
(*failP) = true;
return;
}
/* ----------------
* get the block containing the transaction information
* ----------------
*/
blockNumber = n-1;
buffer = ReadBuffer(relation, blockNumber);
block = BufferGetBlock(buffer);
/* ----------------
* get the last xid on the block
* ----------------
*/
baseXid = blockNumber * TP_NumXidStatusPerBlock;
/* XXX ???? xid won't get returned! - AY '94 */
(void) TransBlockGetLastTransactionIdStatus(block, baseXid, &xid);
ReleaseBuffer(buffer);
/* ----------------
* SOMEDAY release our lock on the log relation
* ----------------
*/
RelationUnsetLockForRead(relation);
}

View File

@ -0,0 +1,606 @@
/*-------------------------------------------------------------------------
*
* varsup.c--
* postgres variable relation support routines
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/transam/varsup.c,v 1.1.1.1 1996/07/09 06:21:13 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#include <math.h>
#include "postgres.h"
#include "machine.h" /* in port/ directory (needed for BLCKSZ) */
#include "storage/buf.h"
#include "storage/bufmgr.h"
#include "storage/ipc.h" /* for OIDGENLOCKID */
#include "utils/rel.h"
#include "utils/elog.h"
#include "access/heapam.h"
#include "access/transam.h" /* where the declarations go */
#include "access/xact.h" /* where the declarations go */
#include "catalog/catname.h"
/* ----------
* note: we reserve the first 16384 object ids for internal use.
* oid's less than this appear in the .bki files. the choice of
* 16384 is completely arbitrary.
* ----------
*/
#define BootstrapObjectIdData 16384
/* ---------------------
* spin lock for oid generation
* ---------------------
*/
int OidGenLockId;
/* ----------------------------------------------------------------
* variable relation query/update routines
* ----------------------------------------------------------------
*/
/* --------------------------------
* VariableRelationGetNextXid
* --------------------------------
*/
void
VariableRelationGetNextXid(TransactionId *xidP)
{
Buffer buf;
VariableRelationContents var;
/* ----------------
* We assume that a spinlock has been acquire to guarantee
* exclusive access to the variable relation.
* ----------------
*/
/* ----------------
* do nothing before things are initialized
* ----------------
*/
if (! RelationIsValid(VariableRelation))
return;
/* ----------------
* read the variable page, get the the nextXid field and
* release the buffer
* ----------------
*/
buf = ReadBuffer(VariableRelation, 0);
if (! BufferIsValid(buf))
{
SpinRelease(OidGenLockId);
elog(WARN, "VariableRelationGetNextXid: ReadBuffer failed");
}
var = (VariableRelationContents) BufferGetBlock(buf);
TransactionIdStore(var->nextXidData, xidP);
ReleaseBuffer(buf);
}
/* --------------------------------
* VariableRelationGetLastXid
* --------------------------------
*/
void
VariableRelationGetLastXid(TransactionId *xidP)
{
Buffer buf;
VariableRelationContents var;
/* ----------------
* We assume that a spinlock has been acquire to guarantee
* exclusive access to the variable relation.
* ----------------
*/
/* ----------------
* do nothing before things are initialized
* ----------------
*/
if (! RelationIsValid(VariableRelation))
return;
/* ----------------
* read the variable page, get the the lastXid field and
* release the buffer
* ----------------
*/
buf = ReadBuffer(VariableRelation, 0);
if (! BufferIsValid(buf))
{
SpinRelease(OidGenLockId);
elog(WARN, "VariableRelationGetNextXid: ReadBuffer failed");
}
var = (VariableRelationContents) BufferGetBlock(buf);
TransactionIdStore(var->lastXidData, xidP);
ReleaseBuffer(buf);
}
/* --------------------------------
* VariableRelationPutNextXid
* --------------------------------
*/
void
VariableRelationPutNextXid(TransactionId xid)
{
Buffer buf;
VariableRelationContents var;
/* ----------------
* We assume that a spinlock has been acquire to guarantee
* exclusive access to the variable relation.
* ----------------
*/
/* ----------------
* do nothing before things are initialized
* ----------------
*/
if (! RelationIsValid(VariableRelation))
return;
/* ----------------
* read the variable page, update the nextXid field and
* write the page back out to disk.
* ----------------
*/
buf = ReadBuffer(VariableRelation, 0);
if (! BufferIsValid(buf))
{
SpinRelease(OidGenLockId);
elog(WARN, "VariableRelationPutNextXid: ReadBuffer failed");
}
var = (VariableRelationContents) BufferGetBlock(buf);
TransactionIdStore(xid, &(var->nextXidData));
WriteBuffer(buf);
}
/* --------------------------------
* VariableRelationPutLastXid
* --------------------------------
*/
void
VariableRelationPutLastXid(TransactionId xid)
{
Buffer buf;
VariableRelationContents var;
/* ----------------
* We assume that a spinlock has been acquire to guarantee
* exclusive access to the variable relation.
* ----------------
*/
/* ----------------
* do nothing before things are initialized
* ----------------
*/
if (! RelationIsValid(VariableRelation))
return;
/* ----------------
* read the variable page, update the lastXid field and
* force the page back out to disk.
* ----------------
*/
buf = ReadBuffer(VariableRelation, 0);
if (! BufferIsValid(buf))
{
SpinRelease(OidGenLockId);
elog(WARN, "VariableRelationPutLastXid: ReadBuffer failed");
}
var = (VariableRelationContents) BufferGetBlock(buf);
TransactionIdStore(xid, &(var->lastXidData));
WriteBuffer(buf);
}
/* --------------------------------
* VariableRelationGetNextOid
* --------------------------------
*/
void
VariableRelationGetNextOid(Oid *oid_return)
{
Buffer buf;
VariableRelationContents var;
/* ----------------
* We assume that a spinlock has been acquire to guarantee
* exclusive access to the variable relation.
* ----------------
*/
/* ----------------
* if the variable relation is not initialized, then we
* assume we are running at bootstrap time and so we return
* an invalid object id -- during this time GetNextBootstrapObjectId
* should be called instead..
* ----------------
*/
if (! RelationIsValid(VariableRelation)) {
if (PointerIsValid(oid_return))
(*oid_return) = InvalidOid;
return;
}
/* ----------------
* read the variable page, get the the nextOid field and
* release the buffer
* ----------------
*/
buf = ReadBuffer(VariableRelation, 0);
if (! BufferIsValid(buf))
{
SpinRelease(OidGenLockId);
elog(WARN, "VariableRelationGetNextXid: ReadBuffer failed");
}
var = (VariableRelationContents) BufferGetBlock(buf);
if (PointerIsValid(oid_return)) {
/* ----------------
* nothing up my sleeve... what's going on here is that this code
* is guaranteed never to be called until all files in data/base/
* are created, and the template database exists. at that point,
* we want to append a pg_database tuple. the first time we do
* this, the oid stored in pg_variable will be bogus, so we use
* a bootstrap value defined at the top of this file.
*
* this comment no longer holds true. This code is called before
* all of the files in data/base are created and you can't rely
* on system oid's to be less than BootstrapObjectIdData. mer 9/18/91
* ----------------
*/
if (OidIsValid(var->nextOid))
(*oid_return) = var->nextOid;
else
(*oid_return) = BootstrapObjectIdData;
}
ReleaseBuffer(buf);
}
/* --------------------------------
* VariableRelationPutNextOid
* --------------------------------
*/
void
VariableRelationPutNextOid(Oid *oidP)
{
Buffer buf;
VariableRelationContents var;
/* ----------------
* We assume that a spinlock has been acquire to guarantee
* exclusive access to the variable relation.
* ----------------
*/
/* ----------------
* do nothing before things are initialized
* ----------------
*/
if (! RelationIsValid(VariableRelation))
return;
/* ----------------
* sanity check
* ----------------
*/
if (! PointerIsValid(oidP))
{
SpinRelease(OidGenLockId);
elog(WARN, "VariableRelationPutNextOid: invalid oid pointer");
}
/* ----------------
* read the variable page, update the nextXid field and
* write the page back out to disk.
* ----------------
*/
buf = ReadBuffer(VariableRelation, 0);
if (! BufferIsValid(buf))
{
SpinRelease(OidGenLockId);
elog(WARN, "VariableRelationPutNextXid: ReadBuffer failed");
}
var = (VariableRelationContents) BufferGetBlock(buf);
var->nextOid = (*oidP);
WriteBuffer(buf);
}
/* ----------------------------------------------------------------
* transaction id generation support
* ----------------------------------------------------------------
*/
/* ----------------
* GetNewTransactionId
*
* In the version 2 transaction system, transaction id's are
* restricted in several ways.
*
* First, all transaction id's are even numbers (4, 88, 121342, etc).
* This means the binary representation of the number will never
* have the least significent bit set. This bit is reserved to
* indicate that the transaction id does not in fact hold an XID,
* but rather a commit time. This makes it possible for the
* vaccuum daemon to disgard information from the log and time
* relations for committed tuples. This is important when archiving
* tuples to an optical disk because tuples with commit times
* stored in their xid fields will not need to consult the log
* and time relations.
*
* Second, since we may someday preform compression of the data
* in the log and time relations, we cause the numbering of the
* transaction ids to begin at 512. This means that some space
* on the page of the log and time relations corresponding to
* transaction id's 0 - 510 will never be used. This space is
* in fact used to store the version number of the postgres
* transaction log and will someday store compression information
* about the log.
*
* Lastly, rather then access the variable relation each time
* a backend requests a new transction id, we "prefetch" 32
* transaction id's by incrementing the nextXid stored in the
* var relation by 64 (remember only even xid's are legal) and then
* returning these id's one at a time until they are exhausted.
* This means we reduce the number of accesses to the variable
* relation by 32 for each backend.
*
* Note: 32 has no special significance. We don't want the
* number to be too large because if when the backend
* terminates, we lose the xid's we cached.
*
* ----------------
*/
#define VAR_XID_PREFETCH 32
static int prefetched_xid_count = 0;
static TransactionId next_prefetched_xid;
void
GetNewTransactionId(TransactionId *xid)
{
TransactionId nextid;
/* ----------------
* during bootstrap initialization, we return the special
* bootstrap transaction id.
* ----------------
*/
if (AMI_OVERRIDE) {
TransactionIdStore(AmiTransactionId, xid);
return;
}
/* ----------------
* if we run out of prefetched xids, then we get some
* more before handing them out to the caller.
* ----------------
*/
if (prefetched_xid_count == 0) {
/* ----------------
* obtain exclusive access to the variable relation page
*
* get the "next" xid from the variable relation
* and save it in the prefetched id.
* ----------------
*/
SpinAcquire(OidGenLockId);
VariableRelationGetNextXid(&nextid);
TransactionIdStore(nextid, &next_prefetched_xid);
/* ----------------
* now increment the variable relation's next xid
* and reset the prefetched_xid_count. We multiply
* the id by two because our xid's are always even.
* ----------------
*/
prefetched_xid_count = VAR_XID_PREFETCH;
TransactionIdAdd(&nextid, prefetched_xid_count);
VariableRelationPutNextXid(nextid);
SpinRelease(OidGenLockId);
}
/* ----------------
* return the next prefetched xid in the pointer passed by
* the user and decrement the prefetch count. We add two
* to id we return the next time this is called because our
* transaction ids are always even.
*
* XXX Transaction Ids used to be even as the low order bit was
* used to determine commit status. This is no long true so
* we now use even and odd transaction ids. -mer 5/26/92
* ----------------
*/
TransactionIdStore(next_prefetched_xid, xid);
TransactionIdAdd(&next_prefetched_xid, 1);
prefetched_xid_count--;
}
/* ----------------
* UpdateLastCommittedXid
* ----------------
*/
void
UpdateLastCommittedXid(TransactionId xid)
{
TransactionId lastid;
/* we assume that spinlock OidGenLockId has been acquired
* prior to entering this function
*/
/* ----------------
* get the "last committed" transaction id from
* the variable relation page.
* ----------------
*/
VariableRelationGetLastXid(&lastid);
/* ----------------
* if the transaction id is greater than the last committed
* transaction then we update the last committed transaction
* in the variable relation.
* ----------------
*/
if (TransactionIdIsLessThan(lastid, xid))
VariableRelationPutLastXid(xid);
}
/* ----------------------------------------------------------------
* object id generation support
* ----------------------------------------------------------------
*/
/* ----------------
* GetNewObjectIdBlock
*
* This support function is used to allocate a block of object ids
* of the given size. applications wishing to do their own object
* id assignments should use this
* ----------------
*/
void
GetNewObjectIdBlock(Oid *oid_return, /* place to return the new object id */
int oid_block_size) /* number of oids desired */
{
Oid nextoid;
/* ----------------
* SOMEDAY obtain exclusive access to the variable relation page
* That someday is today -mer 6 Aug 1992
* ----------------
*/
SpinAcquire(OidGenLockId);
/* ----------------
* get the "next" oid from the variable relation
* and give it to the caller.
* ----------------
*/
VariableRelationGetNextOid(&nextoid);
if (PointerIsValid(oid_return))
(*oid_return) = nextoid;
/* ----------------
* now increment the variable relation's next oid
* field by the size of the oid block requested.
* ----------------
*/
nextoid += oid_block_size;
VariableRelationPutNextOid(&nextoid);
/* ----------------
* SOMEDAY relinquish our lock on the variable relation page
* That someday is today -mer 6 Apr 1992
* ----------------
*/
SpinRelease(OidGenLockId);
}
/* ----------------
* GetNewObjectId
*
* This function allocates and parses out object ids. Like
* GetNewTransactionId(), it "prefetches" 32 object ids by
* incrementing the nextOid stored in the var relation by 32 and then
* returning these id's one at a time until they are exhausted.
* This means we reduce the number of accesses to the variable
* relation by 32 for each backend.
*
* Note: 32 has no special significance. We don't want the
* number to be too large because if when the backend
* terminates, we lose the oids we cached.
*
* ----------------
*/
#define VAR_OID_PREFETCH 32
static int prefetched_oid_count = 0;
static Oid next_prefetched_oid;
void
GetNewObjectId(Oid *oid_return) /* place to return the new object id */
{
/* ----------------
* if we run out of prefetched oids, then we get some
* more before handing them out to the caller.
* ----------------
*/
if (prefetched_oid_count == 0) {
int oid_block_size = VAR_OID_PREFETCH;
/* ----------------
* during bootstrap time, we want to allocate oids
* one at a time. Otherwise there might be some
* bootstrap oid's left in the block we prefetch which
* would be passed out after the variable relation was
* initialized. This would be bad.
* ----------------
*/
if (! RelationIsValid(VariableRelation))
VariableRelation = heap_openr(VariableRelationName);
/* ----------------
* get a new block of prefetched object ids.
* ----------------
*/
GetNewObjectIdBlock(&next_prefetched_oid, oid_block_size);
/* ----------------
* now reset the prefetched_oid_count.
* ----------------
*/
prefetched_oid_count = oid_block_size;
}
/* ----------------
* return the next prefetched oid in the pointer passed by
* the user and decrement the prefetch count.
* ----------------
*/
if (PointerIsValid(oid_return))
(*oid_return) = next_prefetched_oid;
next_prefetched_oid++;
prefetched_oid_count--;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,156 @@
/*-------------------------------------------------------------------------
*
* xid.c--
* POSTGRES transaction identifier code.
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/transam/Attic/xid.c,v 1.1.1.1 1996/07/09 06:21:14 scrappy Exp $
*
* OLD COMMENTS
* XXX WARNING
* Much of this file will change when we change our representation
* of transaction ids -cim 3/23/90
*
* It is time to make the switch from 5 byte to 4 byte transaction ids
* This file was totally reworked. -mer 5/22/92
*
*-------------------------------------------------------------------------
*/
#include <stdio.h>
#include "postgres.h"
#include "utils/palloc.h"
#include "utils/elog.h"
#include "utils/memutils.h"
#include "utils/nabstime.h"
extern TransactionId NullTransactionId;
extern TransactionId DisabledTransactionId;
extern TransactionId AmiTransactionId;
extern TransactionId FirstTransactionId;
/* ----------------------------------------------------------------
* TransactionIdIsValid
*
* Macro-ize me.
* ----------------------------------------------------------------
*/
bool
TransactionIdIsValid(TransactionId transactionId)
{
return ((bool) (transactionId != NullTransactionId) );
}
/* XXX char16 name for catalogs */
TransactionId
xidin(char *representation)
{
return (atol(representation));
}
/* XXX char16 name for catalogs */
char*
xidout(TransactionId transactionId)
{
/* return(TransactionIdFormString(transactionId)); */
char *representation;
/* maximum 32 bit unsigned integer representation takes 10 chars */
representation = palloc(11);
(void)sprintf(representation, "%u", transactionId);
return (representation);
}
/* ----------------------------------------------------------------
* StoreInvalidTransactionId
*
* Maybe do away with Pointer types in these routines.
* Macro-ize this one.
* ----------------------------------------------------------------
*/
void
StoreInvalidTransactionId(TransactionId *destination)
{
*destination = NullTransactionId;
}
/* ----------------------------------------------------------------
* TransactionIdStore
*
* Macro-ize this one.
* ----------------------------------------------------------------
*/
void
TransactionIdStore(TransactionId transactionId,
TransactionId *destination)
{
*destination = transactionId;
}
/* ----------------------------------------------------------------
* TransactionIdEquals
* ----------------------------------------------------------------
*/
bool
TransactionIdEquals(TransactionId id1, TransactionId id2)
{
return ((bool) (id1 == id2));
}
/* ----------------------------------------------------------------
* TransactionIdIsLessThan
* ----------------------------------------------------------------
*/
bool
TransactionIdIsLessThan(TransactionId id1, TransactionId id2)
{
return ((bool)(id1 < id2));
}
/* ----------------------------------------------------------------
* xideq
* ----------------------------------------------------------------
*/
/*
* xideq - returns 1, iff xid1 == xid2
* 0 else;
*/
bool
xideq(TransactionId xid1, TransactionId xid2)
{
return( (bool) (xid1 == xid2) );
}
/* ----------------------------------------------------------------
* TransactionIdIncrement
* ----------------------------------------------------------------
*/
void
TransactionIdIncrement(TransactionId *transactionId)
{
(*transactionId)++;
if (*transactionId == DisabledTransactionId)
elog(FATAL, "TransactionIdIncrement: exhausted XID's");
return;
}
/* ----------------------------------------------------------------
* TransactionIdAdd
* ----------------------------------------------------------------
*/
void
TransactionIdAdd(TransactionId *xid, int value)
{
*xid += value;
return;
}

View File

@ -0,0 +1,53 @@
/*-------------------------------------------------------------------------
*
* tupdesc.h--
* POSTGRES tuple descriptor definitions.
*
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: tupdesc.h,v 1.1.1.1 1996/07/09 06:21:09 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef TUPDESC_H
#define TUPDESC_H
#include "postgres.h"
#include "access/attnum.h"
#include "nodes/pg_list.h" /* for List */
#include "catalog/pg_attribute.h"
/*
* a TupleDesc is an array of AttributeTupleForms, each of which is a
* pointer to a AttributeTupleForm
*/
/* typedef AttributeTupleForm *TupleDesc; */
/* a TupleDesc is a pointer to a structure which includes an array of */
/* AttributeTupleForms, i.e. pg_attribute information, and the size of */
/* the array, i.e. the number of attributes */
/* in short, a TupleDesc completely captures the attribute information */
/* for a tuple */
typedef struct tupleDesc {
int natts;
AttributeTupleForm *attrs;
} *TupleDesc;
extern TupleDesc CreateTemplateTupleDesc(int natts);
extern TupleDesc CreateTupleDesc(int natts, AttributeTupleForm *attrs);
extern TupleDesc CreateTupleDescCopy(TupleDesc tupdesc);
extern bool TupleDescInitEntry(TupleDesc desc,
AttrNumber attributeNumber,
char *attributeName,
char *typeName,
int attdim,
bool attisset);
extern TupleDesc BuildDescForRelation(List *schema, char *relname);
#endif /* TUPDESC_H */

View File

@ -0,0 +1,43 @@
/*-------------------------------------------------------------------------
*
* tupmacs.h--
* Tuple macros used by both index tuples and heap tuples.
*
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: tupmacs.h,v 1.1.1.1 1996/07/09 06:21:09 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef TUPMACS_H
#define TUPMACS_H
/*
* check to see if the ATT'th bit of an array of 8-bit bytes is set.
*/
#define att_isnull(ATT, BITS) (!((BITS)[(ATT) >> 3] & (1 << ((ATT) & 0x07))))
/*
* given a AttributeTupleForm and a pointer into a tuple's data
* area, return the correct value or pointer.
*
* note that T must already be properly LONGALIGN/SHORTALIGN'd for
* this to work correctly.
*
* the double-cast is to stop gcc from (correctly) complaining about
* casting integer types with size < sizeof(char *) to (char *).
* sign-extension may get weird if you use an integer type that
* isn't the same size as (char *) for the first cast. (on the other
* hand, it's safe to use another type for the (foo *)(T).)
*/
#define fetchatt(A, T) \
((*(A))->attbyval \
? ((*(A))->attlen > sizeof(int16) \
? (char *) (long) *((int32 *)(T)) \
: ((*(A))->attlen < sizeof(int16) \
? (char *) (long) *((char *)(T)) \
: (char *) (long) *((int16 *)(T)))) \
: (char *) (T))
#endif

View File

@ -0,0 +1,37 @@
/*-------------------------------------------------------------------------
*
* valid.h--
* POSTGRES tuple qualification validity definitions.
*
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: valid.h,v 1.1.1.1 1996/07/09 06:21:09 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef VALID_H
#define VALID_H
#include "c.h"
#include "access/skey.h"
#include "storage/buf.h"
#include "utils/tqual.h"
#include "access/tupdesc.h"
#include "utils/rel.h"
#include "storage/bufpage.h"
/* ----------------
* extern decl's
* ----------------
*/
extern bool heap_keytest(HeapTuple t, TupleDesc tupdesc,
int nkeys, ScanKey keys);
extern HeapTuple heap_tuple_satisfies(ItemId itemId, Relation relation,
PageHeader disk_page, TimeQual qual, int nKeys, ScanKey key);
extern bool TupleUpdatedByCurXactAndCmd(HeapTuple t);
#endif /* VALID_H */

115
src/backend/access/xact.h Normal file
View File

@ -0,0 +1,115 @@
/*-------------------------------------------------------------------------
*
* xact.h--
* postgres transaction system header
*
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: xact.h,v 1.1.1.1 1996/07/09 06:21:09 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef XACT_H
#define XACT_H
#include <signal.h>
#include "storage/ipc.h"
#include "miscadmin.h"
#include "utils/portal.h"
#include "utils/elog.h"
#include "utils/mcxt.h"
#include "utils/nabstime.h"
/* ----------------
* transaction state structure
* ----------------
*/
typedef struct TransactionStateData {
TransactionId transactionIdData;
CommandId commandId;
AbsoluteTime startTime;
int state;
int blockState;
} TransactionStateData;
/* ----------------
* transaction states
* ----------------
*/
#define TRANS_DEFAULT 0
#define TRANS_START 1
#define TRANS_INPROGRESS 2
#define TRANS_COMMIT 3
#define TRANS_ABORT 4
#define TRANS_DISABLED 5
/* ----------------
* transaction block states
* ----------------
*/
#define TBLOCK_DEFAULT 0
#define TBLOCK_BEGIN 1
#define TBLOCK_INPROGRESS 2
#define TBLOCK_END 3
#define TBLOCK_ABORT 4
#define TBLOCK_ENDABORT 5
typedef TransactionStateData *TransactionState;
/* ----------------
* extern definitions
* ----------------
*/
extern int TransactionFlushEnabled();
extern void SetTransactionFlushEnabled(bool state);
extern bool IsTransactionState(void);
extern bool IsAbortedTransactionBlockState(void);
extern void OverrideTransactionSystem(bool flag);
extern TransactionId GetCurrentTransactionId(void);
extern CommandId GetCurrentCommandId(void);
extern AbsoluteTime GetCurrentTransactionStartTime(void);
extern bool TransactionIdIsCurrentTransactionId(TransactionId xid);
extern bool CommandIdIsCurrentCommandId(CommandId cid);
extern void ClearCommandIdCounterOverflowFlag(void);
extern void CommandCounterIncrement(void);
extern void InitializeTransactionSystem(void);
extern void AtStart_Cache(void);
extern void AtStart_Locks(void);
extern void AtStart_Memory(void);
extern void RecordTransactionCommit(void);
extern void AtCommit_Cache(void);
extern void AtCommit_Locks(void);
extern void AtCommit_Memory(void);
extern void RecordTransactionAbort(void);
extern void AtAbort_Cache(void);
extern void AtAbort_Locks(void);
extern void AtAbort_Memory(void);
extern void StartTransaction(void);
extern bool CurrentXactInProgress(void);
extern void CommitTransaction(void);
extern void AbortTransaction(void);
extern void StartTransactionCommand(void);
extern void CommitTransactionCommand(void);
extern void AbortCurrentTransaction(void);
extern void BeginTransactionBlock(void);
extern void EndTransactionBlock(void);
extern void AbortTransactionBlock(void);
extern bool IsTransactionBlock();
extern void UserAbortTransactionBlock();
extern TransactionId DisabledTransactionId;
/* defined in xid.c */
extern bool TransactionIdIsValid(TransactionId transactionId);
extern void StoreInvalidTransactionId(TransactionId *destination);
extern void TransactionIdStore(TransactionId transactionId,
TransactionId *destination);
extern bool TransactionIdEquals(TransactionId id1, TransactionId id2);
extern bool TransactionIdIsLessThan(TransactionId id1, TransactionId id2);
extern void TransactionIdIncrement(TransactionId *transactionId);
extern void TransactionIdAdd(TransactionId *xid, int value);
#endif /* XACT_H */

View File

@ -0,0 +1,63 @@
#-------------------------------------------------------------------------
#
# Makefile.inc--
# Makefile for the bootstrap module
#
# Copyright (c) 1994, Regents of the University of California
#
#
# IDENTIFICATION
# $Header: /cvsroot/pgsql/src/backend/bootstrap/Attic/Makefile.inc,v 1.1.1.1 1996/07/09 06:21:14 scrappy Exp $
#
#
# Another kinda weird Makefile.inc cause we need two
# scanner/parsers in the backend and most yaccs and lexs
# don't have the prefix option.
#
# sed files are HACK CITY! - redo...
#
#-------------------------------------------------------------------------
bootdir= $(CURDIR)/bootstrap
VPATH:= $(VPATH):$(bootdir)
#BOOTYACCS= bootstrap_tokens.h bootparse.c
BOOTYACCS= bootparse.c
SRCS_BOOTSTRAP= bootparse.c bootscanner.c bootstrap.c
$(BOOTYACCS): bootparse.y
cd $(objdir); \
$(YACC) $(YFLAGS) $<; \
sed -f $(bootdir)/boot.sed < y.tab.c > bootparse.c; \
mv y.tab.h bootstrap_tokens.h; \
rm -f y.tab.c
$(objdir)/bootparse.o: bootparse.c
$(cc_inobjdir)
bootscanner.c: bootscanner.l
cd $(objdir); \
$(LEX) $<; \
sed -f $(bootdir)/boot.sed < lex.yy.c > bootscanner.c; \
rm -f lex.yy.c
$(objdir)/bootscanner.o: bootscanner.c
$(cc_inobjdir)
#
# The following insures that y.tab.h gets made as bootstrap.c
# includes it
#
bootstrap.o: $(BOOTYACCS)
POSTGRES_DEPEND+= $(BOOTYACCS) bootscanner.c
CLEANFILES+= bootscanner.c $(BOOTYACCS) y.tab.h y.output
HEADERS+= bootstrap.h

View File

@ -0,0 +1,9 @@
#
# lex.sed - sed rules to remove conflicts between the
# bootstrap backend interface LEX scanner and the
# normal backend SQL LEX scanner
#
# $Header: /cvsroot/pgsql/src/backend/bootstrap/Attic/boot.sed,v 1.1.1.1 1996/07/09 06:21:14 scrappy Exp $
#
s/^yy/Int_yy/g
s/\([^a-zA-Z0-9_]\)yy/\1Int_yy/g

View File

@ -0,0 +1,293 @@
%{
/*-------------------------------------------------------------------------
*
* backendparse.y--
* yacc parser grammer for the "backend" initialization program.
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/bootstrap/bootparse.y,v 1.1.1.1 1996/07/09 06:21:14 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#include "access/heapam.h"
#include "access/tupdesc.h"
#include "bootstrap/bootstrap.h"
#include "utils/portal.h"
#include "storage/smgr.h"
#include "nodes/pg_list.h"
#include "catalog/catalog.h"
#include "catalog/catname.h"
#include "catalog/heap.h"
#include "catalog/index.h"
#include "commands/rename.h"
#include "commands/defrem.h"
#include "access/transam.h"
#include "access/xact.h"
#define DO_START { StartTransactionCommand();\
}
#define DO_END { CommitTransactionCommand();\
if (!Quiet) { EMITPROMPT; }\
fflush(stdout); \
}
int num_tuples_read = 0;
static Oid objectid;
%}
%union {
List *list;
IndexElem *ielem;
char *str;
int ival;
}
%type <list> arg_list
%type <ielem> index_params index_on
%type <ival> const ident
%type <ival> optbootstrap optoideq tuple tuplelist
%token <ival> CONST ID
%token OPEN XCLOSE XCREATE INSERT_TUPLE
%token STRING XDEFINE
%token XDECLARE INDEX ON USING XBUILD INDICES
%token COMMA EQUALS LPAREN RPAREN
%token OBJ_ID XBOOTSTRAP NULLVAL
%start TopLevel
%nonassoc low
%nonassoc high
%%
TopLevel:
Queries
|
;
Queries:
Query
| Queries Query
;
Query :
OpenStmt
| CloseStmt
| CreateStmt
| InsertStmt
| DeclareIndexStmt
| BuildIndsStmt
;
OpenStmt:
OPEN ident
{
DO_START;
boot_openrel(LexIDStr($2));
DO_END;
}
;
CloseStmt:
XCLOSE ident %prec low
{
DO_START;
closerel(LexIDStr($2));
DO_END;
}
| XCLOSE %prec high
{
DO_START;
closerel(NULL);
DO_END;
}
;
CreateStmt:
XCREATE optbootstrap ident LPAREN
{
DO_START;
numattr=(int)0;
}
typelist
{
if (!Quiet) putchar('\n');
DO_END;
}
RPAREN
{
DO_START;
if ($2) {
extern Relation reldesc;
TupleDesc tupdesc;
if (reldesc) {
puts("create bootstrap: Warning, open relation");
puts("exists, closing first");
closerel(NULL);
}
if (DebugMode)
puts("creating bootstrap relation");
tupdesc = CreateTupleDesc(numattr,attrtypes);
reldesc = heap_creatr(LexIDStr($3),
DEFAULT_SMGR,
tupdesc);
if (DebugMode)
puts("bootstrap relation created ok");
} else {
Oid id;
TupleDesc tupdesc;
/* extern Oid heap_create();*/
tupdesc = CreateTupleDesc(numattr,attrtypes);
id = heap_create(LexIDStr($3),
NULL,
'n',
DEFAULT_SMGR,
tupdesc);
if (!Quiet)
printf("CREATED relation %s with OID %d\n",
LexIDStr($3), id);
}
DO_END;
if (DebugMode)
puts("Commit End");
}
;
InsertStmt:
INSERT_TUPLE optoideq
{
DO_START;
if (DebugMode)
printf("tuple %d<", $2);
num_tuples_read = 0;
}
LPAREN tuplelist RPAREN
{
if (num_tuples_read != numattr)
elog(WARN,"incorrect number of values for tuple");
if (reldesc == (Relation)NULL) {
elog(WARN,"must OPEN RELATION before INSERT\n");
err();
}
if (DebugMode)
puts("Insert Begin");
objectid = $2;
InsertOneTuple(objectid);
if (DebugMode)
puts("Insert End");
if (!Quiet) { putchar('\n'); }
DO_END;
if (DebugMode)
puts("Transaction End");
}
;
DeclareIndexStmt:
XDECLARE INDEX ident ON ident USING ident LPAREN index_params RPAREN
{
List *params;
DO_START;
params = lappend(NIL, (List*)$9);
DefineIndex(LexIDStr($5),
LexIDStr($3),
LexIDStr($7),
params, NIL, 0, NIL);
DO_END;
}
;
BuildIndsStmt:
XBUILD INDICES { build_indices(); }
index_params:
index_on ident
{
IndexElem *n = (IndexElem*)$1;
n->class = LexIDStr($2);
$$ = n;
}
index_on:
ident
{
IndexElem *n = makeNode(IndexElem);
n->name = LexIDStr($1);
$$ = n;
}
| ident LPAREN arg_list RPAREN
{
IndexElem *n = makeNode(IndexElem);
n->name = LexIDStr($1);
n->args = (List*)$3;
$$ = n;
}
arg_list:
ident
{
$$ = lappend(NIL, makeString(LexIDStr($1)));
}
| arg_list COMMA ident
{
$$ = lappend((List*)$1, makeString(LexIDStr($3)));
}
optbootstrap:
XBOOTSTRAP { $$ = 1; }
| { $$ = 0; }
;
typelist:
typething
| typelist COMMA typething
;
typething:
ident EQUALS ident
{
if(++numattr > MAXATTR)
elog(FATAL,"Too many attributes\n");
DefineAttr(LexIDStr($1),LexIDStr($3),numattr-1);
if (DebugMode)
printf("\n");
}
;
optoideq:
OBJ_ID EQUALS ident { $$ = atol(LexIDStr($3)); }
| { extern Oid newoid(); $$ = newoid(); }
;
tuplelist:
tuple
| tuplelist tuple
| tuplelist COMMA tuple
;
tuple:
ident {InsertOneValue(objectid, LexIDStr($1), num_tuples_read++); }
| const {InsertOneValue(objectid, LexIDStr($1), num_tuples_read++); }
| NULLVAL
{ InsertOneNull(num_tuples_read++); }
;
const :
CONST { $$=yylval.ival; }
;
ident :
ID { $$=yylval.ival; }
;
%%

View File

@ -0,0 +1,108 @@
%{
/*-------------------------------------------------------------------------
*
* bootscanner.lex--
* a lexical scanner for the bootstrap parser
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/bootstrap/bootscanner.l,v 1.1.1.1 1996/07/09 06:21:14 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#include "bootstrap/bootstrap.h"
#include "utils/portal.h"
#include "access/xact.h"
#include "parser/scansup.h"
#include "bootstrap_tokens.h"
/* some versions of lex define this as a macro */
#if defined(yywrap)
#undef yywrap
#endif /* yywrap */
YYSTYPE yylval;
int yyline; /* keep track of the line number for error reporting */
%}
D [0-9]
oct \\{D}{D}{D}
Exp [Ee][-+]?{D}+
id ([A-Za-z0-9_]|{oct}|\-)+
sid \"([^\"])*\"
arrayid [A-Za-z0-9_]+\[{D}*\]
%%
open { return(OPEN); }
close { return(XCLOSE); }
create { return(XCREATE); }
OID { return(OBJ_ID); }
bootstrap { return(XBOOTSTRAP); }
_null_ { return(NULLVAL); }
insert { return(INSERT_TUPLE); }
"," { return(COMMA); }
"=" { return(EQUALS); }
"(" { return(LPAREN); }
")" { return(RPAREN); }
[\n] { yyline++; }
[\t] ;
" " ;
^\#[^\n]* ; /* drop everything after "#" for comments */
"declare" { return(XDECLARE); }
"build" { return(XBUILD); }
"indices" { return(INDICES); }
"index" { return(INDEX); }
"on" { return(ON); }
"using" { return(USING); }
{arrayid} {
yylval.ival = EnterString(MapArrayTypeName((char*)yytext));
return(ID);
}
{id} {
yylval.ival = EnterString(scanstr((char*)yytext));
return(ID);
}
{sid} {
yylval.ival = EnterString(scanstr((char*)yytext));
return(ID);
}
(-)?{D}+"."{D}*({Exp})? |
(-)?{D}*"."{D}+({Exp})? |
(-)?{D}+{Exp} {
yylval.ival = EnterString((char*)yytext);
return(CONST);
}
. {
printf("syntax error %d : -> %s\n", yyline, yytext);
}
%%
yywrap()
{
return 1;
}
yyerror(str)
char *str;
{
fprintf(stderr,"\tsyntax error %d : %s",yyline, str);
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,78 @@
/*-------------------------------------------------------------------------
*
* bootstrap.h--
* include file for the bootstrapping code
*
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: bootstrap.h,v 1.1.1.1 1996/07/09 06:21:14 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef BOOTSTRAP_H
#define BOOTSTRAP_H
#include <sys/file.h>
#include <stdio.h>
#include <string.h>
#include <signal.h>
#include <ctype.h>
#include "access/htup.h"
#include "access/itup.h"
#include "access/relscan.h"
#include "access/skey.h"
#include "utils/tqual.h"
#include "storage/buf.h"
#include "storage/bufmgr.h" /* for BufferManagerFlush */
#include "utils/portal.h"
#include "utils/elog.h"
#include "utils/rel.h"
#define MAXATTR 40 /* max. number of attributes in a relation */
typedef struct hashnode {
int strnum; /* Index into string table */
struct hashnode *next;
} hashnode;
#define EMITPROMPT printf("> ")
extern Relation reldesc;
extern AttributeTupleForm attrtypes[MAXATTR];
extern int numattr;
extern int DebugMode;
extern int BootstrapMain(int ac, char *av[]);
extern void index_register(char *heap,
char *ind,
int natts,
AttrNumber *attnos,
uint16 nparams,
Datum *params,
FuncIndexInfo *finfo,
PredInfo *predInfo);
extern void err(void);
extern void InsertOneTuple(Oid objectid);
extern void closerel(char *name);
extern void boot_openrel(char *name);
extern char *LexIDStr(int ident_num);
extern void DefineAttr(char *name, char *type, int attnum);
extern void InsertOneValue(Oid objectid, char *value, int i);
extern void InsertOneNull(int i);
extern bool BootstrapAlreadySeen(Oid id);
extern void cleanup(void);
extern int gettype(char *type);
extern AttributeTupleForm AllocateAttribute(void);
extern char* MapArrayTypeName(char *s);
extern char* CleanUpStr(char *s);
extern int EnterString (char *str);
extern int CompHash (char *str, int len);
extern hashnode *FindStr (char *str, int length, hashnode *mderef);
extern hashnode *AddStr(char *str, int strlength, int mderef);
extern void build_indices(void);
#endif /* BOOTSTRAP_H */

View File

@ -0,0 +1,69 @@
#-------------------------------------------------------------------------
#
# Makefile.inc--
# Makefile for the system catalogs module
#
# Copyright (c) 1994, Regents of the University of California
#
#
# IDENTIFICATION
# $Header: /cvsroot/pgsql/src/backend/catalog/Attic/Makefile.inc,v 1.1.1.1 1996/07/09 06:21:14 scrappy Exp $
#
#-------------------------------------------------------------------------
catdir=$(CURDIR)/catalog
VPATH:=$(VPATH):$(catdir)
SRCS_CATALOG= catalog.c heap.c index.c indexing.c \
pg_aggregate.c pg_operator.c pg_proc.c pg_type.c
HEADERS+= catalog.h catname.h heap.h index.h indexing.h pg_aggregate.h \
pg_am.h pg_amop.h pg_amproc.h pg_attribute.h pg_database.h \
pg_defaults.h pg_demon.h pg_group.h pg_index.h pg_inheritproc.h \
pg_inherits.h pg_ipl.h pg_language.h pg_listener.h \
pg_log.h pg_magic.h pg_opclass.h pg_operator.h pg_parg.h \
pg_proc.h pg_class.h \
pg_rewrite.h pg_server.h pg_statistic.h pg_time.h pg_type.h \
pg_user.h pg_variable.h pg_version.h
#
# The following is to create the .bki files.
# TODO: sort headers, (figure some automatic way of of determining
# the bki sources?)
#
# XXX - more grot. includes names and uid's in the header file. FIX THIS
# (not sure if i got this right - which do i need - or should i
# burn the whole damned thing)
#
ifdef ALLOW_PG_GROUP
BKIOPTS= -DALLOW_PG_GROUP
endif
GENBKI= $(catdir)/genbki.sh
BKIFILES= global1.bki local1_template1.bki
GLOBALBKI_SRCS= pg_database.h pg_demon.h pg_magic.h pg_defaults.h \
pg_variable.h pg_server.h pg_user.h pg_hosts.h \
pg_group.h pg_log.h pg_time.h
LOCALBKI_SRCS= pg_proc.h pg_type.h pg_attribute.h pg_class.h \
pg_inherits.h pg_index.h pg_version.h pg_statistic.h pg_operator.h \
pg_opclass.h pg_am.h pg_amop.h pg_amproc.h pg_language.h pg_parg.h \
pg_aggregate.h pg_ipl.h pg_inheritproc.h \
pg_rewrite.h pg_listener.h indexing.h
global1.bki: $(GENBKI) $(GLOBALBKI_SRCS)
sh $(SHOPTS) $(GENBKI) $(BKIOPTS) \
$(patsubst $(GENBKI),,$^) > $(objdir)/$(@F)
local1_template1.bki: $(GENBKI) $(LOCALBKI_SRCS)
sh $(SHOPTS) $(GENBKI) $(BKIOPTS) \
$(patsubst $(GENBKI),,$^) > $(objdir)/$(@F)
#${PROG}: ${BKIFILES}
#
CLEANFILES+= ${BKIFILES}

View File

@ -0,0 +1,66 @@
$Header: /cvsroot/pgsql/src/backend/catalog/README,v 1.1.1.1 1996/07/09 06:21:15 scrappy Exp $
This directory contains .c files that manipulate the system catalogs
as well as .h files that define the structure of the system catalogs.
When the compile-time scripts (such as Gen_fmgrtab.sh and genbki.sh)
execute, they grep the DATA statements out of the .h files and munge
these in order to generate the .bki files. The .bki files are then
used as input to initdb (which is just a wrapper around postgres
running single-user in bootstrapping mode) in order to generate the
initial (template) system catalog relation files.
-----------------------------------------------------------------
People who are going to hose around with the .h files should be aware
of the following facts:
- It is very important that the DATA statements be properly formatted
(e.g., no broken lines, proper use of white-space and _null_). The
scripts are line-oriented and break easily. In addition, the only
documentation on the proper format for them is the code in the
bootstrap/ directory. Just be careful when adding new DATA
statements.
- Some catalogs require that OIDs be preallocated to tuples because
certain catalogs contain circular references. For example, pg_type
contains pointers into pg_proc (pg_type.typinput), and pg_proc
contains back-pointers into pg_type (pg_proc.proargtypes). In these
cases, the references may be explicitly set by use of the "OID ="
clause of the .bki insert statement. If no such pointers are required
to a given tuple, then the OID may be set to the wildcard value 0
(i.e., the system generates a random OID in the usual way).
If you need to find a valid OID for a set of tuples that refer to each
other, use the unused_oids script. It generates inclusive ranges of
*unused* OIDs (i.e., the line "45-900" means OIDs 45 through 900 have
not been allocated yet). However, you should not rely 100% on this
script, since it only looks at the .h files in the catalog/ directory.
Do a pg_grepsrc (recursive grep) of the source tree to insure that
there aren't any hidden crocks (i.e., explicit use of a numeric OID)
anywhere in the code.
-----------------------------------------------------------------
When munging the .c files, you should be aware of certain conventions:
- The system catalog cache code (and most catalog-munging code in
general) assumes that the fixed-length portion of all system catalog
tuples are in fact present. That is, only the variable-length
portions of a catalog tuple are assumed to be permitted to be
non-NULL. For example, if you set pg_type.typdelim to be NULL, a
piece of code will likely perform "typetup->typdelim" (or, worse,
"typetyp->typelem", which follows typdelim). This will result in
random errors or even segmentation violations. Hence, do NOT insert
catalog tuples that contain NULL attributes except in their
variable-length portions!
- Modification of the catalogs must be performed with the proper
updating of catalog indexes! That is, several catalogs have indexes
on them; when you munge them using the executor, the executor will
take care of doing the index updates, but if you make direct access
method calls to insert new or modified tuples into a heap, you must
also make the calls to insert the tuple into ALL of its indexes! If
not, the new tuple will generally be "invisible" to the system because
most of the accesses to the catalogs in question will be through the
associated indexes.

View File

@ -0,0 +1,205 @@
/*-------------------------------------------------------------------------
*
* catalog.c--
*
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/catalog/catalog.c,v 1.1.1.1 1996/07/09 06:21:15 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#include <string.h> /* XXX */
#include "postgres.h"
#include "miscadmin.h" /* for DataDir */
#include "access/htup.h"
#include "storage/buf.h"
#include "utils/elog.h"
#include "utils/palloc.h"
#include "utils/syscache.h"
#include "catalog/catname.h" /* NameIs{,Shared}SystemRelationName */
#include "catalog/pg_attribute.h"
#include "catalog/pg_type.h"
#include "catalog/catalog.h"
#include "storage/bufmgr.h"
#include "access/transam.h"
#ifndef MAXPATHLEN
#define MAXPATHLEN 80
#endif
/*
* relpath - path to the relation
* Perhaps this should be in-line code in relopen().
*/
char *
relpath(char relname[])
{
char *path;
if (IsSharedSystemRelationName(relname)) {
path = (char *) palloc(strlen(DataDir) + sizeof(NameData) + 2);
sprintf(path, "%s/%.*s", DataDir, NAMEDATALEN, relname);
return (path);
}
return(relname);
}
/*
* issystem - returns non-zero iff relname is a system catalog
*
* We now make a new requirement where system catalog relns must begin
* with pg_ while user relns are forbidden to do so. Make the test
* trivial and instantaneous.
*
* XXX this is way bogus. -- pma
*/
bool
issystem(char relname[])
{
if (relname[0] && relname[1] && relname[2])
return (relname[0] == 'p' &&
relname[1] == 'g' &&
relname[2] == '_');
else
return FALSE;
}
/*
* IsSystemRelationName --
* True iff name is the name of a system catalog relation.
*
* We now make a new requirement where system catalog relns must begin
* with pg_ while user relns are forbidden to do so. Make the test
* trivial and instantaneous.
*
* XXX this is way bogus. -- pma
*/
bool
IsSystemRelationName(char *relname)
{
if (relname[0] && relname[1] && relname[2])
return (relname[0] == 'p' &&
relname[1] == 'g' &&
relname[2] == '_');
else
return FALSE;
}
/*
* IsSharedSystemRelationName --
* True iff name is the name of a shared system catalog relation.
*/
bool
IsSharedSystemRelationName(char *relname)
{
int i;
/*
* Quick out: if it's not a system relation, it can't be a shared
* system relation.
*/
if (!IsSystemRelationName(relname))
return FALSE;
i = 0;
while ( SharedSystemRelationNames[i] != NULL) {
if (strcmp(SharedSystemRelationNames[i],relname) == 0)
return TRUE;
i++;
}
return FALSE;
}
/*
* newoid - returns a unique identifier across all catalogs.
*
* Object Id allocation is now done by GetNewObjectID in
* access/transam/varsup.c. oids are now allocated correctly.
*
* old comments:
* This needs to change soon, it fails if there are too many more
* than one call per second when postgres restarts after it dies.
*
* The distribution of OID's should be done by the POSTMASTER.
* Also there needs to be a facility to preallocate OID's. Ie.,
* for a block of OID's to be declared as invalid ones to allow
* user programs to use them for temporary object identifiers.
*/
Oid newoid()
{
Oid lastoid;
GetNewObjectId(&lastoid);
if (! OidIsValid(lastoid))
elog(WARN, "newoid: GetNewObjectId returns invalid oid");
return lastoid;
}
/*
* fillatt - fills the ATTRIBUTE relation fields from the TYP
*
* Expects that the atttypid domain is set for each att[].
* Returns with the attnum, and attlen domains set.
* attnum, attproc, atttyparg, ... should be set by the user.
*
* In the future, attnum may not be set?!? or may be passed as an arg?!?
*
* Current implementation is very inefficient--should cashe the
* information if this is at all possible.
*
* Check to see if this is really needed, and especially in the case
* of index tuples.
*/
void
fillatt(TupleDesc tupleDesc)
{
AttributeTupleForm *attributeP;
register TypeTupleForm typp;
HeapTuple tuple;
int i;
int natts = tupleDesc->natts;
AttributeTupleForm *att = tupleDesc->attrs;
if (natts < 0 || natts > MaxHeapAttributeNumber)
elog(WARN, "fillatt: %d attributes is too large", natts);
if (natts == 0) {
elog(DEBUG, "fillatt: called with natts == 0");
return;
}
attributeP = &att[0];
for (i = 0; i < natts;) {
tuple = SearchSysCacheTuple(TYPOID,
Int32GetDatum((*attributeP)->atttypid),
0,0,0);
if (!HeapTupleIsValid(tuple)) {
elog(WARN, "fillatt: unknown atttypid %ld",
(*attributeP)->atttypid);
} else {
(*attributeP)->attnum = (int16) ++i;
/* Check if the attr is a set before messing with the length
and byval, since those were already set in
TupleDescInitEntry. In fact, this seems redundant
here, but who knows what I'll break if I take it out...
same for char() and varchar() stuff. I share the same
sentiments. This function is poorly written anyway. -ay 6/95
*/
if (!(*attributeP)->attisset &&
(*attributeP)->atttypid!=BPCHAROID &&
(*attributeP)->atttypid!=VARCHAROID) {
typp = (TypeTupleForm) GETSTRUCT(tuple); /* XXX */
(*attributeP)->attlen = typp->typlen;
(*attributeP)->attbyval = typp->typbyval;
}
}
attributeP += 1;
}
}

View File

@ -0,0 +1,24 @@
/*-------------------------------------------------------------------------
*
* catalog.h--
* prototypes for functions in lib/catalog/catalog.c
*
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: catalog.h,v 1.1.1.1 1996/07/09 06:21:15 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef CATALOG_H
#define CATALOG_H
#include "access/tupdesc.h"
extern char *relpath(char relname[]);
extern bool IsSystemRelationName(char *relname);
extern bool IsSharedSystemRelationName(char *relname);
extern Oid newoid(void);
extern void fillatt(TupleDesc att);
#endif /* CATALOG_H */

View File

@ -0,0 +1,52 @@
/*-------------------------------------------------------------------------
*
* catname.h--
* POSTGRES system catalog relation name definitions.
*
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: catname.h,v 1.1.1.1 1996/07/09 06:21:15 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef CATNAME_H
#define CATNAME_H
#include "postgres.h"
#define AggregateRelationName "pg_aggregate"
#define AccessMethodRelationName "pg_am"
#define AccessMethodOperatorRelationName "pg_amop"
#define AccessMethodProcedureRelationName "pg_amproc"
#define AttributeRelationName "pg_attribute"
#define DatabaseRelationName "pg_database"
#define DefaultsRelationName "pg_defaults"
#define DemonRelationName "pg_demon"
#define GroupRelationName "pg_group"
#define HostsRelationName "pg_hosts"
#define IndexRelationName "pg_index"
#define InheritProcedureRelationName "pg_inheritproc"
#define InheritsRelationName "pg_inherits"
#define InheritancePrecidenceListRelationName "pg_ipl"
#define LanguageRelationName "pg_language"
#define ListenerRelationName "pg_listener"
#define LogRelationName "pg_log"
#define MagicRelationName "pg_magic"
#define OperatorClassRelationName "pg_opclass"
#define OperatorRelationName "pg_operator"
#define ProcedureRelationName "pg_proc"
#define RelationRelationName "pg_class"
#define RewriteRelationName "pg_rewrite"
#define ServerRelationName "pg_server"
#define StatisticRelationName "pg_statistic"
#define TimeRelationName "pg_time"
#define TypeRelationName "pg_type"
#define UserRelationName "pg_user"
#define VariableRelationName "pg_variable"
#define VersionRelationName "pg_version"
extern char *SharedSystemRelationNames[];
#endif /* CATNAME_H */

View File

@ -0,0 +1,218 @@
#!/bin/sh
#-------------------------------------------------------------------------
#
# genbki.sh--
# shell script which generates .bki files from specially formatted .h
# files. These .bki files are used to initialize the postgres template
# database.
#
# Copyright (c) 1994, Regents of the University of California
#
#
# IDENTIFICATION
# $Header: /cvsroot/pgsql/src/backend/catalog/Attic/genbki.sh,v 1.1.1.1 1996/07/09 06:21:15 scrappy Exp $
#
# NOTES
# non-essential whitespace is removed from the generated file.
# if this is ever a problem, then the sed script at the very
# end can be changed into another awk script or something smarter..
#
#-------------------------------------------------------------------------
PATH=$PATH:/lib:/usr/ccs/lib # to find cpp
BKIOPTS=''
if [ $? != 0 ]
then
echo `basename $0`: Bad option
exit 1
fi
for opt in $*
do
case $opt in
-D) BKIOPTS="$BKIOPTS -D$2"; shift; shift;;
-D*) BKIOPTS="$BKIOPTS $1";shift;;
--) shift; break;;
esac
done
# ----------------
# collect nodefiles
# ----------------
SYSFILES=''
x=1
numargs=$#
while test $x -le $numargs ; do
SYSFILES="$SYSFILES $1"
x=`expr $x + 1`
shift
done
# ----------------
# strip comments and trash from .h before we generate
# the .bki file...
# ----------------
# also, change Oid to oid. -- AY 8/94.
# also, change NameData to name. -- jolly 8/21/95.
#
cat $SYSFILES | \
sed -e 's/\/\*.*\*\///g' \
-e 's/;[ ]*$//g' \
-e 's/\ Oid/\ oid/g' \
-e 's/\ NameData/\ name/g' \
-e 's/(NameData/(name/g' \
-e 's/(Oid/(oid/g' | \
awk '
# ----------------
# now use awk to process remaining .h file..
#
# nc is the number of catalogs
# inside is a variable set to 1 when we are scanning the
# contents of a catalog definition.
# inserting_data is a flag indicating when we are processing DATA lines.
# (i.e. have a relation open and need to close it)
# ----------------
BEGIN {
inside = 0;
raw = 0;
bootstrap = 0;
nc = 0;
reln_open = 0;
}
# ----------------
# anything in a BKI_BEGIN .. BKI_END block should be passed
# along without interpretation.
# ----------------
/^BKI_BEGIN/ { raw = 1; next; }
/^BKI_END/ { raw = 0; next; }
raw == 1 { print; next; }
# ----------------
# DATA() statements should get passed right through after
# stripping off the DATA( and the ) on the end.
# ----------------
/^DATA\(/ {
data = substr($0, 6, length($0) - 6);
print data;
next;
}
/^DECLARE_INDEX\(/ {
# ----
# end any prior catalog data insertions before starting a define index
# ----
if (reln_open == 1) {
# print "show";
print "close " catalog;
reln_open = 0;
}
data = substr($0, 15, length($0) - 15);
print "declare index " data
}
/^BUILD_INDICES/ { print "build indices"; }
# ----------------
# CATALOG() definitions take some more work.
# ----------------
/^CATALOG\(/ {
# ----
# end any prior catalog data insertions before starting a new one..
# ----
if (reln_open == 1) {
# print "show";
print "close " catalog;
reln_open = 0;
}
# ----
# get the name of the new catalog
# ----
pos = index($1,")");
catalog = substr($1,9,pos-9);
if ($0 ~ /BOOTSTRAP/) {
bootstrap = 1;
}
i = 1;
inside = 1;
nc++;
next;
}
# ----------------
# process the contents of the catalog definition
#
# attname[ x ] contains the attribute name for attribute x
# atttype[ x ] contains the attribute type fot attribute x
# ----------------
inside == 1 {
# ----
# ignore a leading brace line..
# ----
if ($1 ~ /\{/)
next;
# ----
# if this is the last line, then output the bki catalog stuff.
# ----
if ($1 ~ /}/) {
if (bootstrap) {
print "create bootstrap " catalog;
} else {
print "create " catalog;
}
print "\t(";
for (j=1; j<i-1; j++) {
print "\t " attname[ j ] " = " atttype[ j ] " ,";
}
print "\t " attname[ j ] " = " atttype[ j ] ;
print "\t)";
if (! bootstrap) {
print "open " catalog;
}
i = 1;
reln_open = 1;
inside = 0;
bootstrap = 0;
next;
}
# ----
# if we are inside the catalog definition, then keep sucking up
# attibute names and types
# ----
if ($2 ~ /\[.*\]/) { # array attribute
idlen = index($2,"[") - 1;
atttype[ i ] = $1 "[]"; # variable-length only..
attname[ i ] = substr($2,1,idlen);
} else {
atttype[ i ] = $1;
attname[ i ] = $2;
}
i++;
next;
}
END {
if (reln_open == 1) {
# print "show";
print "close " catalog;
reln_open = 0;
}
}
' | \
cpp $BKIOPTS | \
sed -e '/^[ ]*$/d' \
-e 's/[ ][ ]*/ /g'
# ----------------
# all done
# ----------------
exit 0

1428
src/backend/catalog/heap.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,42 @@
/*-------------------------------------------------------------------------
*
* heap.h--
* prototypes for functions in lib/catalog/heap.c
*
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: heap.h,v 1.1.1.1 1996/07/09 06:21:15 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef HEAP_H
#define HEAP_H
extern Relation heap_creatr(char *relname, unsigned smgr, TupleDesc att);
extern int RelationAlreadyExists(Relation pg_class_desc, char relname[]);
extern void addNewRelationType(char *typeName, Oid new_rel_oid);
extern void AddPgRelationTuple(Relation pg_class_desc,
Relation new_rel_desc, Oid new_rel_oid, int arch, unsigned natts);
extern Oid heap_create(char relname[],
char *typename,
int arch,
unsigned smgr, TupleDesc tupdesc);
extern void RelationRemoveInheritance(Relation relation);
extern void RelationRemoveIndexes(Relation relation);
extern void DeletePgRelationTuple(Relation rdesc);
extern void DeletePgAttributeTuples(Relation rdesc);
extern void DeletePgTypeTuple(Relation rdesc);
extern void heap_destroy(char relname[]);
extern void heap_destroyr(Relation r);
extern void InitTempRelList();
extern void AddToTempRelList(Relation r);
extern void RemoveFromTempRelList(Relation r);
extern void DestroyTempRels();
#endif /* HEAP_H */

1655
src/backend/catalog/index.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,59 @@
/*-------------------------------------------------------------------------
*
* index.h--
* prototypes for index.c.
*
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: index.h,v 1.1.1.1 1996/07/09 06:21:15 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef INDEX_H
#define INDEX_H
#include "access/funcindex.h"
#include "access/itup.h"
#include "nodes/execnodes.h"
extern Form_pg_am
AccessMethodObjectIdGetAccessMethodTupleForm(Oid accessMethodObjectId);
extern void
UpdateIndexPredicate(Oid indexoid, Node *oldPred, Node *predicate);
extern void InitIndexStrategy(int numatts,
Relation indexRelation,
Oid accessMethodObjectId);
extern void index_create(char *heapRelationName,
char* indexRelationName,
FuncIndexInfo *funcInfo,
Oid accessMethodObjectId,
int numatts,
AttrNumber attNums[],
Oid classObjectId[],
uint16 parameterCount,
Datum parameter[],
Node *predicate);
extern void index_destroy(Oid indexId);
extern void FormIndexDatum(int numberOfAttributes,
AttrNumber attributeNumber[], HeapTuple heapTuple,
TupleDesc heapDescriptor, Buffer buffer, Datum *datum,
char *nullv, FuncIndexInfoPtr fInfo);
extern void UpdateStats(Oid relid, long reltuples, bool hasindex);
extern void FillDummyExprContext(ExprContext *econtext, TupleTableSlot *slot,
TupleDesc tupdesc, Buffer buffer);
extern void index_build(Relation heapRelation, Relation indexRelation,
int numberOfAttributes, AttrNumber attributeNumber[],
uint16 parameterCount, Datum parameter[], FuncIndexInfo *funcInfo,
PredInfo *predInfo);
#endif /* INDEX_H */

View File

@ -0,0 +1,561 @@
/*-------------------------------------------------------------------------
*
* indexing.c--
* This file contains routines to support indices defined on system
* catalogs.
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/catalog/indexing.c,v 1.1.1.1 1996/07/09 06:21:15 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "utils/builtins.h"
#include "utils/rel.h"
#include "utils/elog.h"
#include "utils/oidcompos.h"
#include "utils/palloc.h"
#include "access/htup.h"
#include "access/heapam.h"
#include "access/genam.h"
#include "access/attnum.h"
#include "access/funcindex.h"
#include "access/skey.h"
#include "storage/buf.h"
#include "storage/bufmgr.h"
#include "nodes/execnodes.h"
#include "catalog/catalog.h"
#include "catalog/catname.h"
#include "catalog/pg_index.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_type.h"
#include "catalog/pg_class.h"
#include "catalog/pg_attribute.h"
#include "utils/syscache.h"
#include "catalog/indexing.h"
#include "catalog/index.h"
/*
* Names of indices on the following system catalogs:
*
* pg_attribute
* pg_proc
* pg_type
* pg_naming
* pg_class
*/
/*
static NameData AttributeNameIndexData = { "pg_attnameind" };
static NameData AttributeNumIndexData = { "pg_attnumind" };
static NameData AttributeRelidIndexData= { "pg_attrelidind" };
static NameData ProcedureNameIndexData = { "pg_procnameind" };
static NameData ProcedureOidIndexData = { "pg_procidind" };
static NameData ProcedureSrcIndexData = { "pg_procsrcind" };
static NameData TypeNameIndexData = { "pg_typenameind" };
static NameData TypeOidIndexData = { "pg_typeidind" };
static NameData ClassNameIndexData = { "pg_classnameind" };
static NameData ClassOidIndexData = { "pg_classoidind" };
Name AttributeNameIndex = &AttributeNameIndexData;
Name AttributeNumIndex = &AttributeNumIndexData;
Name AttributeRelidIndex= &AttributeRelidIndexData;
Name ProcedureNameIndex = &ProcedureNameIndexData;
Name ProcedureOidIndex = &ProcedureOidIndexData;
Name ProcedureSrcIndex = &ProcedureSrcIndexData;
Name TypeNameIndex = &TypeNameIndexData;
Name TypeOidIndex = &TypeOidIndexData;
Name ClassNameIndex = &ClassNameIndexData;
Name ClassOidIndex = &ClassOidIndexData;
char *Name_pg_attr_indices[Num_pg_attr_indices] = {AttributeNameIndexData.data,
AttributeNumIndexData.data,
AttributeRelidIndexData.data};
char *Name_pg_proc_indices[Num_pg_proc_indices] = {ProcedureNameIndexData.data,
ProcedureOidIndexData.data,
ProcedureSrcIndexData.data};char *Name_pg_type_indices[Num_pg_type_indices] = {TypeNameIndexData.data,
TypeOidIndexData.data};
char *Name_pg_class_indices[Num_pg_class_indices]= {ClassNameIndexData.data,
ClassOidIndexData.data};
*/
char *Name_pg_attr_indices[Num_pg_attr_indices] = {AttributeNameIndex,
AttributeNumIndex,
AttributeRelidIndex};
char *Name_pg_proc_indices[Num_pg_proc_indices] = { ProcedureNameIndex,
ProcedureOidIndex,
ProcedureSrcIndex};
char *Name_pg_type_indices[Num_pg_type_indices] = { TypeNameIndex,
TypeOidIndex};
char *Name_pg_class_indices[Num_pg_class_indices]= { ClassNameIndex,
ClassOidIndex};
static HeapTuple CatalogIndexFetchTuple(Relation heapRelation,
Relation idesc,
ScanKey skey);
/*
* Changes (appends) to catalogs can (and does) happen at various places
* throughout the code. We need a generic routine that will open all of
* the indices defined on a given catalog a return the relation descriptors
* associated with them.
*/
void
CatalogOpenIndices(int nIndices, char *names[], Relation idescs[])
{
int i;
for (i=0; i<nIndices; i++)
{
idescs[i] = index_openr(names[i]);
}
}
/*
* This is the inverse routine to CatalogOpenIndices()
*/
void
CatalogCloseIndices(int nIndices, Relation *idescs)
{
int i;
for (i=0; i<nIndices; i++)
index_close(idescs[i]);
}
/*
* For the same reasons outlined above CatalogOpenIndices() we need a routine
* that takes a new catalog tuple and inserts an associated index tuple into
* each catalog index.
*/
void
CatalogIndexInsert(Relation *idescs,
int nIndices,
Relation heapRelation,
HeapTuple heapTuple)
{
HeapTuple pgIndexTup;
TupleDesc heapDescriptor;
IndexTupleForm pgIndexP;
IndexTuple newIndxTup;
Datum datum;
int natts;
AttrNumber *attnumP;
FuncIndexInfo finfo, *finfoP;
char nulls[INDEX_MAX_KEYS];
int i;
heapDescriptor = RelationGetTupleDescriptor(heapRelation);
for (i=0; i<nIndices; i++)
{
TupleDesc indexDescriptor;
InsertIndexResult indexRes;
indexDescriptor = RelationGetTupleDescriptor(idescs[i]);
pgIndexTup = SearchSysCacheTuple(INDEXRELID,
Int32GetDatum(idescs[i]->rd_id),
0,0,0);
Assert(pgIndexTup);
pgIndexP = (IndexTupleForm)GETSTRUCT(pgIndexTup);
/*
* Compute the number of attributes we are indexing upon.
* very important - can't assume one if this is a functional
* index.
*/
for (attnumP=(&pgIndexP->indkey[0]), natts=0;
*attnumP != InvalidAttrNumber;
attnumP++, natts++)
;
if (pgIndexP->indproc != InvalidOid)
{
FIgetnArgs(&finfo) = natts;
natts = 1;
FIgetProcOid(&finfo) = pgIndexP->indproc;
*(FIgetname(&finfo)) = '\0';
finfoP = &finfo;
}
else
finfoP = (FuncIndexInfo *)NULL;
FormIndexDatum(natts,
(AttrNumber *)&pgIndexP->indkey[0],
heapTuple,
heapDescriptor,
InvalidBuffer,
&datum,
nulls,
finfoP);
newIndxTup = (IndexTuple)index_formtuple(indexDescriptor,
&datum,nulls);
Assert(newIndxTup);
/*
* Doing this structure assignment makes me quake in my boots when I
* think about portability.
*/
newIndxTup->t_tid = heapTuple->t_ctid;
indexRes = index_insert(idescs[i], newIndxTup);
if (indexRes) pfree(indexRes);
}
}
/*
* This is needed at initialization when reldescs for some of the crucial
* system catalogs are created and nailed into the cache.
*/
bool
CatalogHasIndex(char *catName, Oid catId)
{
Relation pg_class;
HeapTuple htup;
Form_pg_class pgRelP;
int i;
Assert(IsSystemRelationName(catName));
/*
* If we're bootstraping we don't have pg_class (or any indices).
*/
if (IsBootstrapProcessingMode())
return false;
if (IsInitProcessingMode()) {
for (i = 0; IndexedCatalogNames[i] != NULL; i++) {
if ( strcmp(IndexedCatalogNames[i], catName) == 0)
return (true);
}
return (false);
}
pg_class = heap_openr(RelationRelationName);
htup = ClassOidIndexScan(pg_class, catId);
heap_close(pg_class);
if (! HeapTupleIsValid(htup)) {
elog(NOTICE, "CatalogHasIndex: no relation with oid %d", catId);
return false;
}
pgRelP = (Form_pg_class)GETSTRUCT(htup);
return (pgRelP->relhasindex);
}
/*
* CatalogIndexFetchTuple() -- Get a tuple that satisfies a scan key
* from a catalog relation.
*
* Since the index may contain pointers to dead tuples, we need to
* iterate until we find a tuple that's valid and satisfies the scan
* key.
*/
static HeapTuple
CatalogIndexFetchTuple(Relation heapRelation,
Relation idesc,
ScanKey skey)
{
IndexScanDesc sd;
RetrieveIndexResult indexRes;
HeapTuple tuple;
Buffer buffer;
sd = index_beginscan(idesc, false, 1, skey);
tuple = (HeapTuple)NULL;
do {
indexRes = index_getnext(sd, ForwardScanDirection);
if (indexRes) {
ItemPointer iptr;
iptr = &indexRes->heap_iptr;
tuple = heap_fetch(heapRelation, NowTimeQual, iptr, &buffer);
pfree(indexRes);
} else
break;
} while (!HeapTupleIsValid(tuple));
if (HeapTupleIsValid(tuple)) {
tuple = heap_copytuple(tuple);
ReleaseBuffer(buffer);
}
index_endscan(sd);
if (sd->opaque)
pfree(sd->opaque);
pfree(sd);
return (tuple);
}
/*
* The remainder of the file is for individual index scan routines. Each
* index should be scanned according to how it was defined during bootstrap
* (that is, functional or normal) and what arguments the cache lookup
* requires. Each routine returns the heap tuple that qualifies.
*/
HeapTuple
AttributeNameIndexScan(Relation heapRelation,
Oid relid,
char *attname)
{
Relation idesc;
ScanKeyData skey;
OidName keyarg;
HeapTuple tuple;
keyarg = mkoidname(relid, attname);
ScanKeyEntryInitialize(&skey,
(bits16)0x0,
(AttrNumber)1,
(RegProcedure)OidNameEqRegProcedure,
(Datum)keyarg);
idesc = index_openr(AttributeNameIndex);
tuple = CatalogIndexFetchTuple(heapRelation, idesc, &skey);
index_close(idesc);
pfree(keyarg);
return tuple;
}
HeapTuple
AttributeNumIndexScan(Relation heapRelation,
Oid relid,
AttrNumber attnum)
{
Relation idesc;
ScanKeyData skey;
OidInt2 keyarg;
HeapTuple tuple;
keyarg = mkoidint2(relid, (uint16)attnum);
ScanKeyEntryInitialize(&skey,
(bits16)0x0,
(AttrNumber)1,
(RegProcedure)OidInt2EqRegProcedure,
(Datum)keyarg);
idesc = index_openr(AttributeNumIndex);
tuple = CatalogIndexFetchTuple(heapRelation, idesc, &skey);
index_close(idesc);
pfree(keyarg);
return tuple;
}
HeapTuple
ProcedureOidIndexScan(Relation heapRelation, Oid procId)
{
Relation idesc;
ScanKeyData skey;
HeapTuple tuple;
ScanKeyEntryInitialize(&skey,
(bits16)0x0,
(AttrNumber)1,
(RegProcedure)ObjectIdEqualRegProcedure,
(Datum)procId);
idesc = index_openr(ProcedureOidIndex);
tuple = CatalogIndexFetchTuple(heapRelation, idesc, &skey);
index_close(idesc);
return tuple;
}
HeapTuple
ProcedureNameIndexScan(Relation heapRelation,
char *procName,
int nargs,
Oid *argTypes)
{
Relation idesc;
ScanKeyData skey;
HeapTuple tuple;
IndexScanDesc sd;
RetrieveIndexResult indexRes;
Buffer buffer;
Form_pg_proc pgProcP;
bool bufferUsed = FALSE;
ScanKeyEntryInitialize(&skey,
(bits16)0x0,
(AttrNumber)1,
(RegProcedure)NameEqualRegProcedure,
(Datum)procName);
idesc = index_openr(ProcedureNameIndex);
sd = index_beginscan(idesc, false, 1, &skey);
/*
* for now, we do the work usually done by CatalogIndexFetchTuple
* by hand, so that we can check that the other keys match. when
* multi-key indices are added, they will be used here.
*/
do {
tuple = (HeapTuple)NULL;
if (bufferUsed) {
ReleaseBuffer(buffer);
bufferUsed = FALSE;
}
indexRes = index_getnext(sd, ForwardScanDirection);
if (indexRes) {
ItemPointer iptr;
iptr = &indexRes->heap_iptr;
tuple = heap_fetch(heapRelation, NowTimeQual, iptr, &buffer);
pfree(indexRes);
if (HeapTupleIsValid(tuple)) {
pgProcP = (Form_pg_proc)GETSTRUCT(tuple);
bufferUsed = TRUE;
}
} else
break;
} while (!HeapTupleIsValid(tuple) ||
pgProcP->pronargs != nargs ||
!oid8eq(&(pgProcP->proargtypes[0]), argTypes));
if (HeapTupleIsValid(tuple)) {
tuple = heap_copytuple(tuple);
ReleaseBuffer(buffer);
}
index_endscan(sd);
index_close(idesc);
return tuple;
}
HeapTuple
ProcedureSrcIndexScan(Relation heapRelation, text *procSrc)
{
Relation idesc;
IndexScanDesc sd;
ScanKeyData skey;
RetrieveIndexResult indexRes;
HeapTuple tuple;
Buffer buffer;
ScanKeyEntryInitialize(&skey,
(bits16)0x0,
(AttrNumber)Anum_pg_proc_prosrc,
(RegProcedure)TextEqualRegProcedure,
(Datum)procSrc);
idesc = index_openr(ProcedureSrcIndex);
sd = index_beginscan(idesc, false, 1, &skey);
indexRes = index_getnext(sd, ForwardScanDirection);
if (indexRes) {
ItemPointer iptr;
iptr = &indexRes->heap_iptr;
tuple = heap_fetch(heapRelation, NowTimeQual, iptr, &buffer);
pfree(indexRes);
} else
tuple = (HeapTuple)NULL;
if (HeapTupleIsValid(tuple)) {
tuple = heap_copytuple(tuple);
ReleaseBuffer(buffer);
}
index_endscan(sd);
return tuple;
}
HeapTuple
TypeOidIndexScan(Relation heapRelation, Oid typeId)
{
Relation idesc;
ScanKeyData skey;
HeapTuple tuple;
ScanKeyEntryInitialize(&skey,
(bits16)0x0,
(AttrNumber)1,
(RegProcedure)ObjectIdEqualRegProcedure,
(Datum)typeId);
idesc = index_openr(TypeOidIndex);
tuple = CatalogIndexFetchTuple(heapRelation, idesc, &skey);
index_close(idesc);
return tuple;
}
HeapTuple
TypeNameIndexScan(Relation heapRelation, char *typeName)
{
Relation idesc;
ScanKeyData skey;
HeapTuple tuple;
ScanKeyEntryInitialize(&skey,
(bits16)0x0,
(AttrNumber)1,
(RegProcedure)NameEqualRegProcedure,
(Datum)typeName);
idesc = index_openr(TypeNameIndex);
tuple = CatalogIndexFetchTuple(heapRelation, idesc, &skey);
index_close(idesc);
return tuple;
}
HeapTuple
ClassNameIndexScan(Relation heapRelation, char *relName)
{
Relation idesc;
ScanKeyData skey;
HeapTuple tuple;
ScanKeyEntryInitialize(&skey,
(bits16)0x0,
(AttrNumber)1,
(RegProcedure)NameEqualRegProcedure,
(Datum)relName);
idesc = index_openr(ClassNameIndex);
tuple = CatalogIndexFetchTuple(heapRelation, idesc, &skey);
index_close(idesc);
return tuple;
}
HeapTuple
ClassOidIndexScan(Relation heapRelation, Oid relId)
{
Relation idesc;
ScanKeyData skey;
HeapTuple tuple;
ScanKeyEntryInitialize(&skey,
(bits16)0x0,
(AttrNumber)1,
(RegProcedure)ObjectIdEqualRegProcedure,
(Datum)relId);
idesc = index_openr(ClassOidIndex);
tuple = CatalogIndexFetchTuple(heapRelation, idesc, &skey);
index_close(idesc);
return tuple;
}

View File

@ -0,0 +1,103 @@
/*-------------------------------------------------------------------------
*
* indexing.h--
* This include provides some definitions to support indexing
* on system catalogs
*
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: indexing.h,v 1.1.1.1 1996/07/09 06:21:15 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef INDEXING_H
#define INDEXING_H
#include "utils/rel.h"
/*
* Some definitions for indices on pg_attribute
*/
#define Num_pg_attr_indices 3
#define Num_pg_proc_indices 3
#define Num_pg_type_indices 2
#define Num_pg_class_indices 2
/*
* Names of indices on system catalogs
*/
#define AttributeNameIndex "pg_attnameind"
#define AttributeNumIndex "pg_attnumind"
#define AttributeRelidIndex "pg_attrelidind"
#define ProcedureNameIndex "pg_procnameind"
#define ProcedureOidIndex "pg_procidind"
#define ProcedureSrcIndex "pg_procsrcind"
#define TypeNameIndex "pg_typenameind"
#define TypeOidIndex "pg_typeidind"
#define ClassNameIndex "pg_classnameind"
#define ClassOidIndex "pg_classoidind"
extern char *Name_pg_attr_indices[];
extern char *Name_pg_proc_indices[];
extern char *Name_pg_type_indices[];
extern char *Name_pg_class_indices[];
extern char *IndexedCatalogNames[];
/*
* indexing.c prototypes
*
* Functions for each index to perform the necessary scan on a cache miss.
*/
extern void CatalogOpenIndices(int nIndices, char *names[], Relation idescs[]);
extern void CatalogCloseIndices(int nIndices, Relation *idescs);
extern void CatalogIndexInsert(Relation *idescs,
int nIndices,
Relation heapRelation,
HeapTuple heapTuple);
extern bool CatalogHasIndex(char *catName, Oid catId);
extern HeapTuple AttributeNameIndexScan(Relation heapRelation,
Oid relid,
char *attname);
extern HeapTuple AttributeNumIndexScan(Relation heapRelation,
Oid relid,
AttrNumber attnum);
extern HeapTuple ProcedureOidIndexScan(Relation heapRelation, Oid procId);
extern HeapTuple ProcedureNameIndexScan(Relation heapRelation,
char *procName, int nargs, Oid *argTypes);
extern HeapTuple ProcedureSrcIndexScan(Relation heapRelation, text *procSrc);
extern HeapTuple TypeOidIndexScan(Relation heapRelation, Oid typeId);
extern HeapTuple TypeNameIndexScan(Relation heapRelation, char *typeName);
extern HeapTuple ClassNameIndexScan(Relation heapRelation, char *relName);
extern HeapTuple ClassOidIndexScan(Relation heapRelation, Oid relId);
/*
* What follows are lines processed by genbki.sh to create the statements
* the bootstrap parser will turn into DefineIndex commands.
*
* The keyword is DECLARE_INDEX every thing after that is just like in a
* normal specification of the 'define index' POSTQUEL command.
*/
DECLARE_INDEX(pg_attnameind on pg_attribute using btree (mkoidname(attrelid, attname) oidname_ops));
DECLARE_INDEX(pg_attnumind on pg_attribute using btree (mkoidint2(attrelid, attnum) oidint2_ops));
DECLARE_INDEX(pg_attrelidind on pg_attribute using btree (attrelid oid_ops));
DECLARE_INDEX(pg_procidind on pg_proc using btree (Oid oid_ops));
DECLARE_INDEX(pg_procnameind on pg_proc using btree (proname name_ops));
DECLARE_INDEX(pg_procsrcind on pg_proc using btree (prosrc text_ops));
DECLARE_INDEX(pg_typeidind on pg_type using btree (Oid oid_ops));
DECLARE_INDEX(pg_typenameind on pg_type using btree (typname name_ops));
DECLARE_INDEX(pg_classnameind on pg_class using btree (relname name_ops));
DECLARE_INDEX(pg_classoidind on pg_class using btree (Oid oid_ops));
/* now build indices in the initialization scripts */
BUILD_INDICES
#endif /* INDEXING_H */

View File

@ -0,0 +1,325 @@
/*-------------------------------------------------------------------------
*
* pg_aggregate.c--
* routines to support manipulation of the pg_aggregate relation
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/catalog/pg_aggregate.c,v 1.1.1.1 1996/07/09 06:21:16 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
#include <string.h>
#include "postgres.h"
#include "access/heapam.h"
#include "access/relscan.h"
#include "access/skey.h"
#include "access/htup.h"
#include "access/tupdesc.h"
#include "utils/rel.h"
#include "utils/elog.h"
#include "utils/palloc.h"
#include "utils/builtins.h"
#include "fmgr.h"
#include "catalog/catname.h"
#include "utils/syscache.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_type.h"
#include "catalog/pg_aggregate.h"
/* ----------------
* AggregateCreate
*
* aggregates overloading has been added. Instead of the full
* overload support we have for functions, aggregate overloading only
* applies to exact basetype matches. That is, we don't check the
* the inheritance hierarchy
*
* OLD COMMENTS:
* Currently, redefining aggregates using the same name is not
* supported. In such a case, a warning is printed that the
* aggregate already exists. If such is not the case, a new tuple
* is created and inserted in the aggregate relation. The fields
* of this tuple are aggregate name, owner id, 2 transition functions
* (called aggtransfn1 and aggtransfn2), final function (aggfinalfn),
* type of data on which aggtransfn1 operates (aggbasetype), return
* types of the two transition functions (aggtranstype1 and
* aggtranstype2), final return type (aggfinaltype), and initial values
* for the two state transition functions (agginitval1 and agginitval2).
* All types and functions must have been defined
* prior to defining the aggregate.
*
* ---------------
*/
void
AggregateCreate(char *aggName,
char *aggtransfn1Name,
char *aggtransfn2Name,
char *aggfinalfnName,
char *aggbasetypeName,
char *aggtransfn1typeName,
char *aggtransfn2typeName,
char *agginitval1,
char *agginitval2)
{
register i;
Relation aggdesc;
HeapTuple tup;
char nulls[Natts_pg_aggregate];
Datum values[Natts_pg_aggregate];
Form_pg_proc proc;
Oid xfn1 = InvalidOid;
Oid xfn2 = InvalidOid;
Oid ffn = InvalidOid;
Oid xbase = InvalidOid;
Oid xret1 = InvalidOid;
Oid xret2 = InvalidOid;
Oid fret = InvalidOid;
Oid fnArgs[8];
TupleDesc tupDesc;
memset(fnArgs, 0, 8 * sizeof(Oid));
/* sanity checks */
if (!aggName)
elog(WARN, "AggregateCreate: no aggregate name supplied");
if (!aggtransfn1Name && !aggtransfn2Name)
elog(WARN, "AggregateCreate: aggregate must have at least one transition function");
tup = SearchSysCacheTuple(TYPNAME,
PointerGetDatum(aggbasetypeName),
0,0,0);
if(!HeapTupleIsValid(tup))
elog(WARN, "AggregateCreate: Type '%s' undefined",aggbasetypeName);
xbase = tup->t_oid;
if (aggtransfn1Name) {
tup = SearchSysCacheTuple(TYPNAME,
PointerGetDatum(aggtransfn1typeName),
0,0,0);
if(!HeapTupleIsValid(tup))
elog(WARN, "AggregateCreate: Type '%s' undefined",
aggtransfn1typeName);
xret1 = tup->t_oid;
fnArgs[0] = xret1;
fnArgs[1] = xbase;
tup = SearchSysCacheTuple(PRONAME,
PointerGetDatum(aggtransfn1Name),
Int32GetDatum(2),
PointerGetDatum(fnArgs),
0);
if(!HeapTupleIsValid(tup))
elog(WARN, "AggregateCreate: '%s('%s', '%s') does not exist",
aggtransfn1Name, aggtransfn1typeName, aggbasetypeName);
if (((Form_pg_proc) GETSTRUCT(tup))->prorettype != xret1)
elog(WARN, "AggregateCreate: return type of '%s' is not '%s'",
aggtransfn1Name,
aggtransfn1typeName);
xfn1 = tup->t_oid;
if (!OidIsValid(xfn1) || !OidIsValid(xret1) ||
!OidIsValid(xbase))
elog(WARN, "AggregateCreate: bogus function '%s'", aggfinalfnName);
}
if (aggtransfn2Name) {
tup = SearchSysCacheTuple(TYPNAME,
PointerGetDatum(aggtransfn2typeName),
0,0,0);
if(!HeapTupleIsValid(tup))
elog(WARN, "AggregateCreate: Type '%s' undefined",
aggtransfn2typeName);
xret2 = tup->t_oid;
fnArgs[0] = xret2;
fnArgs[1] = 0;
tup = SearchSysCacheTuple(PRONAME,
PointerGetDatum(aggtransfn2Name),
Int32GetDatum(1),
PointerGetDatum(fnArgs),
0);
if(!HeapTupleIsValid(tup))
elog(WARN, "AggregateCreate: '%s'('%s') does not exist",
aggtransfn2Name, aggtransfn2typeName);
if (((Form_pg_proc) GETSTRUCT(tup))->prorettype != xret2)
elog(WARN, "AggregateCreate: return type of '%s' is not '%s'",
aggtransfn2Name, aggtransfn2typeName);
xfn2 = tup->t_oid;
if (!OidIsValid(xfn2) || !OidIsValid(xret2))
elog(WARN, "AggregateCreate: bogus function '%s'",aggfinalfnName);
}
tup = SearchSysCacheTuple(AGGNAME, PointerGetDatum(aggName),
ObjectIdGetDatum(xbase),
0,0);
if (HeapTupleIsValid(tup))
elog(WARN,
"AggregateCreate: aggregate '%s' with base type '%s' already exists",
aggName, aggbasetypeName);
/* more sanity checks */
if (aggtransfn1Name && aggtransfn2Name && !aggfinalfnName)
elog(WARN, "AggregateCreate: Aggregate must have final function with both transition functions");
if ((!aggtransfn1Name || !aggtransfn2Name) && aggfinalfnName)
elog(WARN, "AggregateCreate: Aggregate cannot have final function without both transition functions");
if (aggfinalfnName) {
fnArgs[0] = xret1;
fnArgs[1] = xret2;
tup = SearchSysCacheTuple(PRONAME,
PointerGetDatum(aggfinalfnName),
Int32GetDatum(2),
PointerGetDatum(fnArgs),
0);
if(!HeapTupleIsValid(tup))
elog(WARN, "AggregateCreate: '%s'('%s','%s') does not exist",
aggfinalfnName, aggtransfn1typeName, aggtransfn2typeName);
ffn = tup->t_oid;
proc = (Form_pg_proc) GETSTRUCT(tup);
fret = proc->prorettype;
if (!OidIsValid(ffn) || !OidIsValid(fret))
elog(WARN, "AggregateCreate: bogus function '%s'", aggfinalfnName);
}
/*
* If transition function 2 is defined, it must have an initial value,
* whereas transition function 1 does not, which allows man and min
* aggregates to return NULL if they are evaluated on empty sets.
*/
if (OidIsValid(xfn2) && !agginitval2)
elog(WARN, "AggregateCreate: transition function 2 MUST have an initial value");
/* initialize nulls and values */
for(i=0; i < Natts_pg_aggregate; i++) {
nulls[i] = ' ';
values[i] = (Datum)NULL;
}
values[Anum_pg_aggregate_aggname-1] = PointerGetDatum(aggName);
values[Anum_pg_aggregate_aggowner-1] =
Int32GetDatum(GetUserId());
values[Anum_pg_aggregate_aggtransfn1-1] =
ObjectIdGetDatum(xfn1);
values[Anum_pg_aggregate_aggtransfn2-1] =
ObjectIdGetDatum(xfn2);
values[Anum_pg_aggregate_aggfinalfn-1] =
ObjectIdGetDatum(ffn);
values[Anum_pg_aggregate_aggbasetype-1] =
ObjectIdGetDatum(xbase);
if (!OidIsValid(xfn1)) {
values[Anum_pg_aggregate_aggtranstype1-1] =
ObjectIdGetDatum(InvalidOid);
values[Anum_pg_aggregate_aggtranstype2-1] =
ObjectIdGetDatum(xret2);
values[Anum_pg_aggregate_aggfinaltype-1] =
ObjectIdGetDatum(xret2);
}
else if (!OidIsValid(xfn2)) {
values[Anum_pg_aggregate_aggtranstype1-1] =
ObjectIdGetDatum(xret1);
values[Anum_pg_aggregate_aggtranstype2-1] =
ObjectIdGetDatum(InvalidOid);
values[Anum_pg_aggregate_aggfinaltype-1] =
ObjectIdGetDatum(xret1);
}
else {
values[Anum_pg_aggregate_aggtranstype1-1] =
ObjectIdGetDatum(xret1);
values[Anum_pg_aggregate_aggtranstype2-1] =
ObjectIdGetDatum(xret2);
values[Anum_pg_aggregate_aggfinaltype-1] =
ObjectIdGetDatum(fret);
}
if (agginitval1)
values[Anum_pg_aggregate_agginitval1-1] = PointerGetDatum(textin(agginitval1));
else
nulls[Anum_pg_aggregate_agginitval1-1] = 'n';
if (agginitval2)
values[Anum_pg_aggregate_agginitval2-1] = PointerGetDatum(textin(agginitval2));
else
nulls[Anum_pg_aggregate_agginitval2-1] = 'n';
if (!RelationIsValid(aggdesc = heap_openr(AggregateRelationName)))
elog(WARN, "AggregateCreate: could not open '%s'",
AggregateRelationName);
tupDesc = aggdesc->rd_att;
if (!HeapTupleIsValid(tup = heap_formtuple(tupDesc,
values,
nulls)))
elog(WARN, "AggregateCreate: heap_formtuple failed");
if (!OidIsValid(heap_insert(aggdesc, tup)))
elog(WARN, "AggregateCreate: heap_insert failed");
heap_close(aggdesc);
}
char *
AggNameGetInitVal(char *aggName, Oid basetype, int xfuncno, bool *isNull)
{
HeapTuple tup;
Relation aggRel;
int initValAttno;
Oid transtype;
text *textInitVal;
char *strInitVal, *initVal;
extern char *textout();
Assert(PointerIsValid(aggName));
Assert(PointerIsValid(isNull));
Assert(xfuncno == 1 || xfuncno == 2);
tup = SearchSysCacheTuple(AGGNAME,
PointerGetDatum(aggName),
PointerGetDatum(basetype),
0,0);
if (!HeapTupleIsValid(tup))
elog(WARN, "AggNameGetInitVal: cache lookup failed for aggregate '%s'",
aggName);
if (xfuncno == 1) {
transtype = ((Form_pg_aggregate) GETSTRUCT(tup))->aggtranstype1;
initValAttno = Anum_pg_aggregate_agginitval1;
}
else if (xfuncno == 2) {
transtype = ((Form_pg_aggregate) GETSTRUCT(tup))->aggtranstype2;
initValAttno = Anum_pg_aggregate_agginitval2;
}
aggRel = heap_openr(AggregateRelationName);
if (!RelationIsValid(aggRel))
elog(WARN, "AggNameGetInitVal: could not open \"%-.*s\"",
AggregateRelationName);
/*
* must use fastgetattr in case one or other of the init values is NULL
*/
textInitVal = (text *) fastgetattr(tup, initValAttno,
RelationGetTupleDescriptor(aggRel),
isNull);
if (!PointerIsValid(textInitVal))
*isNull = true;
if (*isNull) {
heap_close(aggRel);
return((char *) NULL);
}
strInitVal = textout(textInitVal);
heap_close(aggRel);
tup = SearchSysCacheTuple(TYPOID, ObjectIdGetDatum(transtype),
0,0,0);
if (!HeapTupleIsValid(tup)) {
pfree(strInitVal);
elog(WARN, "AggNameGetInitVal: cache lookup failed on aggregate transition function return type");
}
initVal = fmgr(((TypeTupleForm) GETSTRUCT(tup))->typinput, strInitVal, -1);
pfree(strInitVal);
return(initVal);
}

View File

@ -0,0 +1,132 @@
/*-------------------------------------------------------------------------
*
* pg_aggregate.h--
* definition of the system "aggregate" relation (pg_aggregate)
* along with the relation's initial contents.
*
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: pg_aggregate.h,v 1.1.1.1 1996/07/09 06:21:16 scrappy Exp $
*
* NOTES
* the genbki.sh script reads this file and generates .bki
* information from the DATA() statements.
*
*-------------------------------------------------------------------------
*/
#ifndef PG_AGGREGATE_H
#define PG_AGGREGATE_H
/* ----------------
* postgres.h contains the system type definintions and the
* CATALOG(), BOOTSTRAP and DATA() sugar words so this file
* can be read by both genbki.sh and the C compiler.
* ----------------
*/
#include "postgres.h"
/* ----------------------------------------------------------------
* pg_aggregate definition.
*
* cpp turns this into typedef struct FormData_pg_aggregate
*
* aggname name of the aggregate
* aggtransfn1 transition function 1
* aggtransfn2 transition function 2
* aggfinalfn final function
* aggbasetype type of data on which aggregate operates
* aggtranstype1 output types for xition func 1
* aggtranstype2 output types for xition func 2
* aggfinaltype output type for final func
* agginitval1 initial aggregate value
* agginitval2 initial value for transition state 2
* ----------------------------------------------------------------
*/
CATALOG(pg_aggregate) {
NameData aggname;
Oid aggowner;
regproc aggtransfn1;
regproc aggtransfn2;
regproc aggfinalfn;
Oid aggbasetype;
Oid aggtranstype1;
Oid aggtranstype2;
Oid aggfinaltype;
text agginitval1; /* VARIABLE LENGTH FIELD */
text agginitval2; /* VARIABLE LENGTH FIELD */
} FormData_pg_aggregate;
/* ----------------
* Form_pg_aggregate corresponds to a pointer to a tuple with
* the format of pg_aggregate relation.
* ----------------
*/
typedef FormData_pg_aggregate *Form_pg_aggregate;
/* ----------------
* compiler constants for pg_aggregate
* ----------------
*/
#define Natts_pg_aggregate 11
#define Anum_pg_aggregate_aggname 1
#define Anum_pg_aggregate_aggowner 2
#define Anum_pg_aggregate_aggtransfn1 3
#define Anum_pg_aggregate_aggtransfn2 4
#define Anum_pg_aggregate_aggfinalfn 5
#define Anum_pg_aggregate_aggbasetype 6
#define Anum_pg_aggregate_aggtranstype1 7
#define Anum_pg_aggregate_aggtranstype2 8
#define Anum_pg_aggregate_aggfinaltype 9
#define Anum_pg_aggregate_agginitval1 10
#define Anum_pg_aggregate_agginitval2 11
/* ----------------
* initial contents of pg_aggregate
* ---------------
*/
DATA(insert OID = 0 ( avg PGUID int4pl int4inc int4div 23 23 23 23 0 0 ));
DATA(insert OID = 0 ( avg PGUID int2pl int2inc int2div 21 21 21 21 0 0 ));
DATA(insert OID = 0 ( avg PGUID float4pl float4inc float4div 700 700 700 700 0.0 0.0 ));
DATA(insert OID = 0 ( avg PGUID float8pl float8inc float8div 701 701 701 701 0.0 0.0 ));
DATA(insert OID = 0 ( sum PGUID int4pl - - 23 23 0 23 0 _null_ ));
DATA(insert OID = 0 ( sum PGUID int2pl - - 21 21 0 21 0 _null_ ));
DATA(insert OID = 0 ( sum PGUID float4pl - - 700 700 0 700 0.0 _null_ ));
DATA(insert OID = 0 ( sum PGUID float8pl - - 701 701 0 701 0.0 _null_ ));
DATA(insert OID = 0 ( max PGUID int4larger - - 23 23 0 23 _null_ _null_ ));
DATA(insert OID = 0 ( max PGUID int2larger - - 21 21 0 21 _null_ _null_ ));
DATA(insert OID = 0 ( max PGUID float4larger - - 700 700 0 700 _null_ _null_ ));
DATA(insert OID = 0 ( max PGUID float8larger - - 701 701 0 701 _null_ _null_ ));
DATA(insert OID = 0 ( min PGUID int4smaller - - 23 23 0 23 _null_ _null_ ));
DATA(insert OID = 0 ( min PGUID int2smaller - - 21 21 0 21 _null_ _null_ ));
DATA(insert OID = 0 ( min PGUID float4smaller - - 700 700 0 700 _null_ _null_ ));
DATA(insert OID = 0 ( min PGUID float8smaller - - 701 701 0 701 _null_ _null_ ));
DATA(insert OID = 0 ( count PGUID - int4inc - 0 0 23 23 _null_ 0 ));
/*
* prototypes for fucnctions in pg_aggregate.c
*/
extern void AggregateCreate(char *aggName,
char *aggtransfn1Name,
char *aggtransfn2Name,
char *aggfinalfnName,
char *aggbasetypeName,
char *aggtransfn1typeName,
char *aggtransfn2typeName,
char *agginitval1,
char *agginitval2);
extern char *AggNameGetInitVal(char *aggName, Oid basetype,
int xfuncno, bool *isNull);
#endif /* PG_AGGREGATE_H */

115
src/backend/catalog/pg_am.h Normal file
View File

@ -0,0 +1,115 @@
/*-------------------------------------------------------------------------
*
* pg_am.h--
* definition of the system "am" relation (pg_am)
* along with the relation's initial contents.
*
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: pg_am.h,v 1.1.1.1 1996/07/09 06:21:16 scrappy Exp $
*
* NOTES
* the genbki.sh script reads this file and generates .bki
* information from the DATA() statements.
*
* XXX do NOT break up DATA() statements into multiple lines!
* the scripts are not as smart as you might think...
*
*-------------------------------------------------------------------------
*/
#ifndef PG_AM_H
#define PG_AM_H
/* ----------------
* postgres.h contains the system type definintions and the
* CATALOG(), BOOTSTRAP and DATA() sugar words so this file
* can be read by both genbki.sh and the C compiler.
* ----------------
*/
#include "postgres.h"
/* ----------------
* pg_am definition. cpp turns this into
* typedef struct FormData_pg_am
* ----------------
*/
CATALOG(pg_am) {
NameData amname;
Oid amowner;
char amkind;
int2 amstrategies;
int2 amsupport;
regproc amgettuple;
regproc aminsert;
regproc amdelete;
regproc amgetattr;
regproc amsetlock;
regproc amsettid;
regproc amfreetuple;
regproc ambeginscan;
regproc amrescan;
regproc amendscan;
regproc ammarkpos;
regproc amrestrpos;
regproc amopen;
regproc amclose;
regproc ambuild;
regproc amcreate;
regproc amdestroy;
} FormData_pg_am;
/* ----------------
* Form_pg_am corresponds to a pointer to a tuple with
* the format of pg_am relation.
* ----------------
*/
typedef FormData_pg_am *Form_pg_am;
/* ----------------
* compiler constants for pg_am
* ----------------
*/
#define Natts_pg_am 22
#define Anum_pg_am_amname 1
#define Anum_pg_am_amowner 2
#define Anum_pg_am_amkind 3
#define Anum_pg_am_amstrategies 4
#define Anum_pg_am_amsupport 5
#define Anum_pg_am_amgettuple 6
#define Anum_pg_am_aminsert 7
#define Anum_pg_am_amdelete 8
#define Anum_pg_am_amgetattr 9
#define Anum_pg_am_amsetlock 10
#define Anum_pg_am_amsettid 11
#define Anum_pg_am_amfreetuple 12
#define Anum_pg_am_ambeginscan 13
#define Anum_pg_am_amrescan 14
#define Anum_pg_am_amendscan 15
#define Anum_pg_am_ammarkpos 16
#define Anum_pg_am_amrestrpos 17
#define Anum_pg_am_amopen 18
#define Anum_pg_am_amclose 19
#define Anum_pg_am_ambuild 20
#define Anum_pg_am_amcreate 21
#define Anum_pg_am_amdestroy 22
/* ----------------
* initial contents of pg_am
* ----------------
*/
DATA(insert OID = 405 ( hash PGUID "o" 1 1 hashgettuple hashinsert hashdelete - - - - hashbeginscan hashrescan hashendscan hashmarkpos hashrestrpos - - hashbuild - - ));
DATA(insert OID = 402 ( rtree PGUID "o" 8 3 rtgettuple rtinsert rtdelete - - - - rtbeginscan rtrescan rtendscan rtmarkpos rtrestrpos - - rtbuild - - ));
DATA(insert OID = 403 ( btree PGUID "o" 5 1 btgettuple btinsert btdelete - - - - btbeginscan btrescan btendscan btmarkpos btrestrpos - - btbuild - - ));
#define BTREE_AM_OID 403
BKI_BEGIN
#ifdef NOBTREE
BKI_END
DATA(insert OID = 404 ( nobtree PGUID "o" 5 1 nobtgettuple nobtinsert nobtdelete - - - - nobtbeginscan nobtrescan nobtendscan nobtmarkpos nobtrestrpos - - nobtbuild - - ));
BKI_BEGIN
#endif /* NOBTREE */
BKI_END
#endif /* PG_AM_H */

View File

@ -0,0 +1,546 @@
/*-------------------------------------------------------------------------
*
* pg_amop.h--
* definition of the system "amop" relation (pg_amop)
* along with the relation's initial contents.
*
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: pg_amop.h,v 1.1.1.1 1996/07/09 06:21:16 scrappy Exp $
*
* NOTES
* the genbki.sh script reads this file and generates .bki
* information from the DATA() statements.
*
*-------------------------------------------------------------------------
*/
#ifndef PG_AMOP_H
#define PG_AMOP_H
/* ----------------
* postgres.h contains the system type definintions and the
* CATALOG(), BOOTSTRAP and DATA() sugar words so this file
* can be read by both genbki.sh and the C compiler.
* ----------------
*/
#include "postgres.h"
#include "access/istrat.h"
/* ----------------
* pg_amop definition. cpp turns this into
* typedef struct FormData_pg_amop
* ----------------
*/
CATALOG(pg_amop) {
Oid amopid;
Oid amopclaid;
Oid amopopr;
int2 amopstrategy;
regproc amopselect;
regproc amopnpages;
} FormData_pg_amop;
/* ----------------
* Form_pg_amop corresponds to a pointer to a tuple with
* the format of pg_amop relation.
* ----------------
*/
typedef FormData_pg_amop *Form_pg_amop;
/* ----------------
* compiler constants for pg_amop
* ----------------
*/
/* #define Name_pg_amop "pg_amop" */
#define Natts_pg_amop 6
#define Anum_pg_amop_amopid 1
#define Anum_pg_amop_amopclaid 2
#define Anum_pg_amop_amopopr 3
#define Anum_pg_amop_amopstrategy 4
#define Anum_pg_amop_amopselect 5
#define Anum_pg_amop_amopnpages 6
/* ----------------
* initial contents of pg_amop
* ----------------
*/
/*
* rtree box_ops
*/
DATA(insert OID = 0 ( 402 422 493 1 rtsel rtnpage ));
DATA(insert OID = 0 ( 402 422 494 2 rtsel rtnpage ));
DATA(insert OID = 0 ( 402 422 500 3 rtsel rtnpage ));
DATA(insert OID = 0 ( 402 422 495 4 rtsel rtnpage ));
DATA(insert OID = 0 ( 402 422 496 5 rtsel rtnpage ));
DATA(insert OID = 0 ( 402 422 499 6 rtsel rtnpage ));
DATA(insert OID = 0 ( 402 422 498 7 rtsel rtnpage ));
DATA(insert OID = 0 ( 402 422 497 8 rtsel rtnpage ));
/*
* rtree bigbox_ops
*/
DATA(insert OID = 0 ( 402 433 493 1 rtsel rtnpage ));
DATA(insert OID = 0 ( 402 433 494 2 rtsel rtnpage ));
DATA(insert OID = 0 ( 402 433 500 3 rtsel rtnpage ));
DATA(insert OID = 0 ( 402 433 495 4 rtsel rtnpage ));
DATA(insert OID = 0 ( 402 433 496 5 rtsel rtnpage ));
DATA(insert OID = 0 ( 402 433 499 6 rtsel rtnpage ));
DATA(insert OID = 0 ( 402 433 498 7 rtsel rtnpage ));
DATA(insert OID = 0 ( 402 433 497 8 rtsel rtnpage ));
/*
* rtree poly_ops (supports polygons)
*/
DATA(insert OID = 0 ( 402 434 485 1 rtsel rtnpage ));
DATA(insert OID = 0 ( 402 434 486 2 rtsel rtnpage ));
DATA(insert OID = 0 ( 402 434 487 3 rtsel rtnpage ));
DATA(insert OID = 0 ( 402 434 488 4 rtsel rtnpage ));
DATA(insert OID = 0 ( 402 434 489 5 rtsel rtnpage ));
DATA(insert OID = 0 ( 402 434 490 6 rtsel rtnpage ));
DATA(insert OID = 0 ( 402 434 491 7 rtsel rtnpage ));
DATA(insert OID = 0 ( 402 434 492 8 rtsel rtnpage ));
/*
* nbtree int2_ops
*/
DATA(insert OID = 0 ( 403 421 95 1 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 421 522 2 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 421 94 3 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 421 524 4 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 421 520 5 btreesel btreenpage ));
/*
* nbtree float8_ops
*/
DATA(insert OID = 0 ( 403 423 672 1 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 423 673 2 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 423 670 3 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 423 675 4 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 423 674 5 btreesel btreenpage ));
/*
* nbtree int24_ops
*/
DATA(insert OID = 0 ( 403 424 534 1 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 424 540 2 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 424 532 3 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 424 542 4 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 424 536 5 btreesel btreenpage ));
/*
* nbtree int42_ops
*/
DATA(insert OID = 0 ( 403 425 535 1 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 425 541 2 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 425 533 3 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 425 543 4 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 425 537 5 btreesel btreenpage ));
/*
* nbtree int4_ops
*/
DATA(insert OID = 0 ( 403 426 97 1 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 426 523 2 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 426 96 3 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 426 525 4 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 426 521 5 btreesel btreenpage ));
/*
* nbtree oid_ops
*/
DATA(insert OID = 0 ( 403 427 609 1 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 427 611 2 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 427 607 3 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 427 612 4 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 427 610 5 btreesel btreenpage ));
/*
* nbtree float4_ops
*/
DATA(insert OID = 0 ( 403 428 622 1 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 428 624 2 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 428 620 3 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 428 625 4 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 428 623 5 btreesel btreenpage ));
/*
* nbtree char_ops
*/
DATA(insert OID = 0 ( 403 429 631 1 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 429 632 2 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 429 92 3 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 429 634 4 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 429 633 5 btreesel btreenpage ));
/*
* nbtree char2_ops
*/
DATA(insert OID = 0 ( 403 406 418 1 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 406 457 2 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 406 412 3 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 406 463 4 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 406 460 5 btreesel btreenpage ));
/*
* nbtree char4_ops
*/
DATA(insert OID = 0 ( 403 407 419 1 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 407 458 2 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 407 413 3 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 407 464 4 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 407 461 5 btreesel btreenpage ));
/*
* nbtree char8_ops
*/
DATA(insert OID = 0 ( 403 408 420 1 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 408 459 2 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 408 414 3 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 408 465 4 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 408 462 5 btreesel btreenpage ));
/*
* nbtree name_ops
*/
DATA(insert OID = 0 ( 403 409 660 1 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 409 661 2 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 409 93 3 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 409 663 4 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 409 662 5 btreesel btreenpage ));
/*
* nbtree char16_ops
*/
DATA(insert OID = 0 ( 403 430 645 1 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 430 646 2 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 430 99 3 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 430 648 4 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 430 647 5 btreesel btreenpage ));
/*
* nbtree text_ops
*/
DATA(insert OID = 0 ( 403 431 664 1 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 431 665 2 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 431 98 3 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 431 667 4 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 431 666 5 btreesel btreenpage ));
/*
* nbtree abstime_ops
*/
DATA(insert OID = 0 ( 403 432 562 1 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 432 564 2 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 432 560 3 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 432 565 4 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 432 563 5 btreesel btreenpage ));
/*
* nbtree oidint4_ops
*/
DATA(insert OID = 0 ( 403 435 930 1 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 435 931 2 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 435 932 3 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 435 933 4 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 435 934 5 btreesel btreenpage ));
/*
* nbtree oidint2_ops
*/
DATA(insert OID = 0 ( 403 437 830 1 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 437 831 2 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 437 832 3 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 437 833 4 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 437 834 5 btreesel btreenpage ));
/*
* nbtree oidname_ops
*/
DATA(insert OID = 0 ( 403 436 676 1 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 436 677 2 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 436 678 3 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 436 679 4 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 436 680 5 btreesel btreenpage ));
/*
* nbtree bpchar_ops
*/
DATA(insert OID = 0 ( 403 1076 1058 1 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 1076 1059 2 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 1076 1054 3 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 1076 1061 4 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 1076 1060 5 btreesel btreenpage ));
/*
* nbtree varchar_ops
*/
DATA(insert OID = 0 ( 403 1077 1066 1 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 1077 1067 2 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 1077 1062 3 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 1077 1069 4 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 1077 1068 5 btreesel btreenpage ));
/*
* nbtree date_ops
*/
DATA(insert OID = 0 ( 403 1114 1095 1 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 1114 1096 2 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 1114 1093 3 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 1114 1098 4 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 1114 1097 5 btreesel btreenpage ));
/*
* nbtree time_ops
*/
DATA(insert OID = 0 ( 403 1115 1110 1 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 1115 1111 2 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 1115 1108 3 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 1115 1113 4 btreesel btreenpage ));
DATA(insert OID = 0 ( 403 1115 1112 5 btreesel btreenpage ));
BKI_BEGIN
#ifdef NOBTREE
BKI_END
/*
* nobtree int2_ops
*/
DATA(insert OID = 0 ( 404 421 95 1 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 421 522 2 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 421 94 3 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 421 524 4 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 421 520 5 btreesel btreenpage ));
/*
* nobtree float8_ops
*/
DATA(insert OID = 0 ( 404 423 672 1 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 423 673 2 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 423 670 3 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 423 675 4 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 423 674 5 btreesel btreenpage ));
/*
* nobtree int24_ops
*/
DATA(insert OID = 0 ( 404 424 534 1 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 424 540 2 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 424 532 3 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 424 542 4 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 424 536 5 btreesel btreenpage ));
/*
* nobtree int42_ops
*/
DATA(insert OID = 0 ( 404 425 535 1 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 425 541 2 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 425 533 3 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 425 543 4 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 425 537 5 btreesel btreenpage ));
/*
* nobtree int4_ops
*/
DATA(insert OID = 0 ( 404 426 97 1 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 426 523 2 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 426 96 3 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 426 525 4 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 426 521 5 btreesel btreenpage ));
/*
* nobtree oid_ops
*/
DATA(insert OID = 0 ( 404 427 609 1 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 427 611 2 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 427 607 3 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 427 612 4 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 427 610 5 btreesel btreenpage ));
/*
* nobtree float4_ops
*/
DATA(insert OID = 0 ( 404 428 622 1 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 428 624 2 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 428 620 3 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 428 625 4 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 428 623 5 btreesel btreenpage ));
/*
* nobtree char_ops
*/
DATA(insert OID = 0 ( 404 429 631 1 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 429 632 2 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 429 92 3 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 429 634 4 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 429 633 5 btreesel btreenpage ));
/*
* nobtree char2_ops
*/
DATA(insert OID = 0 ( 404 406 418 1 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 406 457 2 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 406 412 3 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 406 463 4 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 406 460 5 btreesel btreenpage ));
/*
* nobtree char4_ops
*/
DATA(insert OID = 0 ( 404 407 419 1 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 407 458 2 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 407 413 3 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 407 464 4 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 407 461 5 btreesel btreenpage ));
/*
* nobtree char8_ops
*/
DATA(insert OID = 0 ( 404 408 420 1 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 408 459 2 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 408 414 3 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 408 465 4 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 408 462 5 btreesel btreenpage ));
/*
* nobtree char16_ops
*/
DATA(insert OID = 0 ( 404 430 645 1 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 430 646 2 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 430 99 3 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 430 648 4 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 430 647 5 btreesel btreenpage ));
/*
* nobtree name_ops
*/
DATA(insert OID = 0 ( 404 409 660 1 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 409 661 2 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 409 93 3 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 409 663 4 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 409 662 5 btreesel btreenpage ));
/*
* nobtree text_ops
*/
DATA(insert OID = 0 ( 404 431 664 1 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 431 665 2 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 431 98 3 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 431 667 4 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 431 666 5 btreesel btreenpage ));
/*
* nobtree abstime_ops
*/
DATA(insert OID = 0 ( 404 432 562 1 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 432 564 2 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 432 560 3 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 432 565 4 btreesel btreenpage ));
DATA(insert OID = 0 ( 404 432 563 5 btreesel btreenpage ));
BKI_BEGIN
#endif /* NOBTREE */
BKI_END
/*
* hash table int2_ops
*/
DATA(insert OID = 0 ( 405 421 94 1 btreesel btreenpage ));
/*
* hash table float8_ops
*/
DATA(insert OID = 0 ( 405 423 670 1 btreesel btreenpage ));
/*
* hash table int4_ops
*/
DATA(insert OID = 0 ( 405 426 96 1 hashsel hashnpage ));
/*
* hash table oid_ops
*/
DATA(insert OID = 0 ( 405 427 607 1 hashsel hashnpage ));
/*
* hash table float4_ops
*/
DATA(insert OID = 0 ( 405 428 620 1 hashsel hashnpage ));
/*
* hash table char_ops
*/
DATA(insert OID = 0 ( 405 429 92 1 hashsel hashnpage ));
/*
* hash table char2_ops
*/
DATA(insert OID = 0 ( 405 406 412 1 hashsel hashnpage ));
/*
* hash table char4_ops
*/
DATA(insert OID = 0 ( 405 407 413 1 hashsel hashnpage ));
/*
* hash table char8_ops
*/
DATA(insert OID = 0 ( 405 408 414 1 hashsel hashnpage ));
/*
* hash table char16_ops
*/
DATA(insert OID = 0 ( 405 430 99 1 hashsel hashnpage ));
/*
* hash table name_ops
*/
DATA(insert OID = 0 ( 405 409 93 1 hashsel hashnpage ));
/*
* hash table text_ops
*/
DATA(insert OID = 0 ( 405 431 98 1 hashsel hashnpage ));
/*
* hash table bpchar_ops
*/
DATA(insert OID = 0 ( 405 1076 1054 1 hashsel hashnpage ));
/*
* hash table varchar_ops
*/
DATA(insert OID = 0 ( 405 1077 1062 1 hashsel hashnpage ));
#endif /* PG_AMOP_H */

Some files were not shown because too many files have changed in this diff Show More