postgresql/contrib/dict_int/dict_int.c

/*-------------------------------------------------------------------------
 *
 * dict_int.c
 *	  Text search dictionary for integers
 *
 * Copyright (c) 2007-2020, PostgreSQL Global Development Group
 *
 * IDENTIFICATION
 *	  contrib/dict_int/dict_int.c
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include "commands/defrem.h"
#include "tsearch/ts_public.h"

PG_MODULE_MAGIC;

typedef struct
{
	int			maxlen;
	bool		rejectlong;
} DictInt;


PG_FUNCTION_INFO_V1(dintdict_init);
PG_FUNCTION_INFO_V1(dintdict_lexize);

Datum
dintdict_init(PG_FUNCTION_ARGS)
{
	List	   *dictoptions = (List *) PG_GETARG_POINTER(0);
	DictInt    *d;
	ListCell   *l;

	d = (DictInt *) palloc0(sizeof(DictInt));
	d->maxlen = 6;
	d->rejectlong = false;

	foreach(l, dictoptions)
	{
		DefElem    *defel = (DefElem *) lfirst(l);

		if (strcmp(defel->defname, "maxlen") == 0)
		{
			d->maxlen = atoi(defGetString(defel));

			if (d->maxlen < 1)
				ereport(ERROR,
						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
						 errmsg("maxlen value has to be >= 1")));
		}
		else if (strcmp(defel->defname, "rejectlong") == 0)
		{
			d->rejectlong = defGetBoolean(defel);
		}
		else
		{
			ereport(ERROR,
					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
					 errmsg("unrecognized intdict parameter: \"%s\"",
							defel->defname)));
		}
	}

	PG_RETURN_POINTER(d);
}

Datum
dintdict_lexize(PG_FUNCTION_ARGS)
{
	DictInt    *d = (DictInt *) PG_GETARG_POINTER(0);
	char	   *in = (char *) PG_GETARG_POINTER(1);
	char	   *txt = pnstrdup(in, PG_GETARG_INT32(2));
	TSLexeme   *res = palloc0(sizeof(TSLexeme) * 2);

	res[1].lexeme = NULL;
	if (PG_GETARG_INT32(2) > d->maxlen)
	{
		if (d->rejectlong)
		{
			/* reject by returning void array */
			pfree(txt);
			res[0].lexeme = NULL;
		}
		else
		{
			/* trim integer */
			txt[d->maxlen] = '\0';
			res[0].lexeme = txt;
		}
	}
	else
	{
		res[0].lexeme = txt;
	}

	PG_RETURN_POINTER(res);
}
Add sample text search dictionary templates and parsers, to replace the hard-to-maintain textual examples currently in the SGML docs. From Sergey Karpov. 2007-10-15 23:36:50 +02:00			`/*-------------------------------------------------------------------------`
			`*`
			`* dict_int.c`
			`* Text search dictionary for integers`
			`*`
Update copyrights for 2020 Backpatch-through: update all files in master, backpatch legal files through 9.4 2020-01-01 18:21:45 +01:00			`* Copyright (c) 2007-2020, PostgreSQL Global Development Group`
Add sample text search dictionary templates and parsers, to replace the hard-to-maintain textual examples currently in the SGML docs. From Sergey Karpov. 2007-10-15 23:36:50 +02:00			`*`
			`* IDENTIFICATION`
Remove cvs keywords from all files. 2010-09-20 22:08:53 +02:00			`* contrib/dict_int/dict_int.c`
Add sample text search dictionary templates and parsers, to replace the hard-to-maintain textual examples currently in the SGML docs. From Sergey Karpov. 2007-10-15 23:36:50 +02:00			`*`
			`*-------------------------------------------------------------------------`
			`*/`
			`#include "postgres.h"`

			`#include "commands/defrem.h"`
			`#include "tsearch/ts_public.h"`

			`PG_MODULE_MAGIC;`

pgindent run for 8.3. 2007-11-15 22:14:46 +01:00			`typedef struct`
			`{`
			`int maxlen;`
			`bool rejectlong;`
8.4 pgindent run, with new combined Linux/FreeBSD/MinGW typedef list provided by Andrew. 2009-06-11 16:49:15 +02:00			`} DictInt;`
Add sample text search dictionary templates and parsers, to replace the hard-to-maintain textual examples currently in the SGML docs. From Sergey Karpov. 2007-10-15 23:36:50 +02:00

			`PG_FUNCTION_INFO_V1(dintdict_init);`
			`PG_FUNCTION_INFO_V1(dintdict_lexize);`

			`Datum`
			`dintdict_init(PG_FUNCTION_ARGS)`
			`{`
pgindent run for 8.3. 2007-11-15 22:14:46 +01:00			`List dictoptions = (List ) PG_GETARG_POINTER(0);`
			`DictInt *d;`
			`ListCell *l;`
Add sample text search dictionary templates and parsers, to replace the hard-to-maintain textual examples currently in the SGML docs. From Sergey Karpov. 2007-10-15 23:36:50 +02:00
			`d = (DictInt *) palloc0(sizeof(DictInt));`
			`d->maxlen = 6;`
			`d->rejectlong = false;`

			`foreach(l, dictoptions)`
			`{`
pgindent run for 8.3. 2007-11-15 22:14:46 +01:00			`DefElem defel = (DefElem ) lfirst(l);`
Add sample text search dictionary templates and parsers, to replace the hard-to-maintain textual examples currently in the SGML docs. From Sergey Karpov. 2007-10-15 23:36:50 +02:00
Avoid unnecessary use of pg_strcasecmp for already-downcased identifiers. We have a lot of code in which option names, which from the user's viewpoint are logically keywords, are passed through the grammar as plain identifiers, and then matched to string literals during command execution. This approach avoids making words into lexer keywords unnecessarily. Some places matched these strings using plain strcmp, some using pg_strcasecmp. But the latter should be unnecessary since identifiers would have been downcased on their way through the parser. Aside from any efficiency concerns (probably not a big factor), the lack of consistency in this area creates a hazard of subtle bugs due to different places coming to different conclusions about whether two option names are the same or different. Hence, standardize on using strcmp() to match any option names that are expected to have been fed through the parser. This does create a user-visible behavioral change, which is that while formerly all of these would work: alter table foo set (fillfactor = 50); alter table foo set (FillFactor = 50); alter table foo set ("fillfactor" = 50); alter table foo set ("FillFactor" = 50); now the last case will fail because that double-quoted identifier is different from the others. However, none of our documentation says that you can use a quoted identifier in such contexts at all, and we should discourage doing so since it would break if we ever decide to parse such constructs as true lexer keywords rather than poor man's substitutes. So this shouldn't create a significant compatibility issue for users. Daniel Gustafsson, reviewed by Michael Paquier, small changes by me Discussion: https://postgr.es/m/29405B24-564E-476B-98C0-677A29805B84@yesql.se 2018-01-27 00:25:02 +01:00			`if (strcmp(defel->defname, "maxlen") == 0)`
Add sample text search dictionary templates and parsers, to replace the hard-to-maintain textual examples currently in the SGML docs. From Sergey Karpov. 2007-10-15 23:36:50 +02:00			`{`
			`d->maxlen = atoi(defGetString(defel));`
Ensure maxlen is at leat 1 in dict_int The dict_int text search dictionary template accepts maxlen parameter, which is then used to cap the length of input strings. The value was not properly checked, and the code simply does txt[d->maxlen] = '\0'; to insert a terminator, leading to segfaults with negative values. This commit simply rejects values less than 1. The issue was there since dct_int was introduced in 9.3, so backpatch all the way back to 9.4 which is the oldest supported version. Reported-by: cili Discussion: https://postgr.es/m/16144-a36a5bef7657047d@postgresql.org Backpatch-through: 9.4 2019-12-03 16:55:51 +01:00
			`if (d->maxlen < 1)`
			`ereport(ERROR,`
			`(errcode(ERRCODE_INVALID_PARAMETER_VALUE),`
			`errmsg("maxlen value has to be >= 1")));`
Add sample text search dictionary templates and parsers, to replace the hard-to-maintain textual examples currently in the SGML docs. From Sergey Karpov. 2007-10-15 23:36:50 +02:00			`}`
Avoid unnecessary use of pg_strcasecmp for already-downcased identifiers. We have a lot of code in which option names, which from the user's viewpoint are logically keywords, are passed through the grammar as plain identifiers, and then matched to string literals during command execution. This approach avoids making words into lexer keywords unnecessarily. Some places matched these strings using plain strcmp, some using pg_strcasecmp. But the latter should be unnecessary since identifiers would have been downcased on their way through the parser. Aside from any efficiency concerns (probably not a big factor), the lack of consistency in this area creates a hazard of subtle bugs due to different places coming to different conclusions about whether two option names are the same or different. Hence, standardize on using strcmp() to match any option names that are expected to have been fed through the parser. This does create a user-visible behavioral change, which is that while formerly all of these would work: alter table foo set (fillfactor = 50); alter table foo set (FillFactor = 50); alter table foo set ("fillfactor" = 50); alter table foo set ("FillFactor" = 50); now the last case will fail because that double-quoted identifier is different from the others. However, none of our documentation says that you can use a quoted identifier in such contexts at all, and we should discourage doing so since it would break if we ever decide to parse such constructs as true lexer keywords rather than poor man's substitutes. So this shouldn't create a significant compatibility issue for users. Daniel Gustafsson, reviewed by Michael Paquier, small changes by me Discussion: https://postgr.es/m/29405B24-564E-476B-98C0-677A29805B84@yesql.se 2018-01-27 00:25:02 +01:00			`else if (strcmp(defel->defname, "rejectlong") == 0)`
Add sample text search dictionary templates and parsers, to replace the hard-to-maintain textual examples currently in the SGML docs. From Sergey Karpov. 2007-10-15 23:36:50 +02:00			`{`
			`d->rejectlong = defGetBoolean(defel);`
			`}`
			`else`
			`{`
			`ereport(ERROR,`
			`(errcode(ERRCODE_INVALID_PARAMETER_VALUE),`
			`errmsg("unrecognized intdict parameter: \"%s\"",`
			`defel->defname)));`
			`}`
			`}`
pgindent run for 8.3. 2007-11-15 22:14:46 +01:00
Add sample text search dictionary templates and parsers, to replace the hard-to-maintain textual examples currently in the SGML docs. From Sergey Karpov. 2007-10-15 23:36:50 +02:00			`PG_RETURN_POINTER(d);`
			`}`

			`Datum`
			`dintdict_lexize(PG_FUNCTION_ARGS)`
			`{`
pgindent run for 8.3. 2007-11-15 22:14:46 +01:00			`DictInt d = (DictInt ) PG_GETARG_POINTER(0);`
			`char in = (char ) PG_GETARG_POINTER(1);`
			`char *txt = pnstrdup(in, PG_GETARG_INT32(2));`
Fix bogus code in contrib/ tsearch dictionary examples. Both dict_int and dict_xsyn were blithely assuming that whatever memory palloc gives back will be pre-zeroed. This would typically work for just about long enough to run their regression tests, and no longer :-(. The pre-9.0 code in dict_xsyn was even lamer than that, as it would happily give back a pointer to the result of palloc(0), encouraging its caller to access off the end of memory. Again, this would just barely fail to fail as long as memory contained nothing but zeroes. Per a report from Rodrigo Hjort that code based on these examples didn't work reliably. 2011-11-04 00:17:48 +01:00			`TSLexeme res = palloc0(sizeof(TSLexeme) 2);`
Add sample text search dictionary templates and parsers, to replace the hard-to-maintain textual examples currently in the SGML docs. From Sergey Karpov. 2007-10-15 23:36:50 +02:00
			`res[1].lexeme = NULL;`
pgindent run for 8.3. 2007-11-15 22:14:46 +01:00			`if (PG_GETARG_INT32(2) > d->maxlen)`
Add sample text search dictionary templates and parsers, to replace the hard-to-maintain textual examples currently in the SGML docs. From Sergey Karpov. 2007-10-15 23:36:50 +02:00			`{`
pgindent run for 8.3. 2007-11-15 22:14:46 +01:00			`if (d->rejectlong)`
Add sample text search dictionary templates and parsers, to replace the hard-to-maintain textual examples currently in the SGML docs. From Sergey Karpov. 2007-10-15 23:36:50 +02:00			`{`
			`/* reject by returning void array */`
			`pfree(txt);`
			`res[0].lexeme = NULL;`
			`}`
			`else`
			`{`
			`/* trim integer */`
			`txt[d->maxlen] = '\0';`
			`res[0].lexeme = txt;`
			`}`
			`}`
			`else`
			`{`
			`res[0].lexeme = txt;`
			`}`

			`PG_RETURN_POINTER(res);`
			`}`