postgresql/src/backend/tsearch/wparser.c

543 lines
13 KiB
C

/*-------------------------------------------------------------------------
*
* wparser.c
* Standard interface to word parser
*
* Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* src/backend/tsearch/wparser.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "catalog/namespace.h"
#include "commands/defrem.h"
#include "funcapi.h"
#include "tsearch/ts_cache.h"
#include "tsearch/ts_utils.h"
#include "utils/fmgrprotos.h"
#include "utils/jsonfuncs.h"
#include "utils/varlena.h"
/******sql-level interface******/
typedef struct
{
int cur;
LexDescr *list;
} TSTokenTypeStorage;
/* state for ts_headline_json_* */
typedef struct HeadlineJsonState
{
HeadlineParsedText *prs;
TSConfigCacheEntry *cfg;
TSParserCacheEntry *prsobj;
TSQuery query;
List *prsoptions;
bool transformed;
} HeadlineJsonState;
static text *headline_json_value(void *_state, char *elem_value, int elem_len);
static void
tt_setup_firstcall(FuncCallContext *funcctx, FunctionCallInfo fcinfo,
Oid prsid)
{
TupleDesc tupdesc;
MemoryContext oldcontext;
TSTokenTypeStorage *st;
TSParserCacheEntry *prs = lookup_ts_parser_cache(prsid);
if (!OidIsValid(prs->lextypeOid))
elog(ERROR, "method lextype isn't defined for text search parser %u",
prsid);
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
st = (TSTokenTypeStorage *) palloc(sizeof(TSTokenTypeStorage));
st->cur = 0;
/* lextype takes one dummy argument */
st->list = (LexDescr *) DatumGetPointer(OidFunctionCall1(prs->lextypeOid,
(Datum) 0));
funcctx->user_fctx = (void *) st;
if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
elog(ERROR, "return type must be a row type");
funcctx->tuple_desc = tupdesc;
funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
MemoryContextSwitchTo(oldcontext);
}
static Datum
tt_process_call(FuncCallContext *funcctx)
{
TSTokenTypeStorage *st;
st = (TSTokenTypeStorage *) funcctx->user_fctx;
if (st->list && st->list[st->cur].lexid)
{
Datum result;
char *values[3];
char txtid[16];
HeapTuple tuple;
sprintf(txtid, "%d", st->list[st->cur].lexid);
values[0] = txtid;
values[1] = st->list[st->cur].alias;
values[2] = st->list[st->cur].descr;
tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
result = HeapTupleGetDatum(tuple);
pfree(values[1]);
pfree(values[2]);
st->cur++;
return result;
}
return (Datum) 0;
}
Datum
ts_token_type_byid(PG_FUNCTION_ARGS)
{
FuncCallContext *funcctx;
Datum result;
if (SRF_IS_FIRSTCALL())
{
funcctx = SRF_FIRSTCALL_INIT();
tt_setup_firstcall(funcctx, fcinfo, PG_GETARG_OID(0));
}
funcctx = SRF_PERCALL_SETUP();
if ((result = tt_process_call(funcctx)) != (Datum) 0)
SRF_RETURN_NEXT(funcctx, result);
SRF_RETURN_DONE(funcctx);
}
Datum
ts_token_type_byname(PG_FUNCTION_ARGS)
{
FuncCallContext *funcctx;
Datum result;
if (SRF_IS_FIRSTCALL())
{
text *prsname = PG_GETARG_TEXT_PP(0);
Oid prsId;
funcctx = SRF_FIRSTCALL_INIT();
prsId = get_ts_parser_oid(textToQualifiedNameList(prsname), false);
tt_setup_firstcall(funcctx, fcinfo, prsId);
}
funcctx = SRF_PERCALL_SETUP();
if ((result = tt_process_call(funcctx)) != (Datum) 0)
SRF_RETURN_NEXT(funcctx, result);
SRF_RETURN_DONE(funcctx);
}
typedef struct
{
int type;
char *lexeme;
} LexemeEntry;
typedef struct
{
int cur;
int len;
LexemeEntry *list;
} PrsStorage;
static void
prs_setup_firstcall(FuncCallContext *funcctx, FunctionCallInfo fcinfo,
Oid prsid, text *txt)
{
TupleDesc tupdesc;
MemoryContext oldcontext;
PrsStorage *st;
TSParserCacheEntry *prs = lookup_ts_parser_cache(prsid);
char *lex = NULL;
int llen = 0,
type = 0;
void *prsdata;
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
st = (PrsStorage *) palloc(sizeof(PrsStorage));
st->cur = 0;
st->len = 16;
st->list = (LexemeEntry *) palloc(sizeof(LexemeEntry) * st->len);
prsdata = (void *) DatumGetPointer(FunctionCall2(&prs->prsstart,
PointerGetDatum(VARDATA_ANY(txt)),
Int32GetDatum(VARSIZE_ANY_EXHDR(txt))));
while ((type = DatumGetInt32(FunctionCall3(&prs->prstoken,
PointerGetDatum(prsdata),
PointerGetDatum(&lex),
PointerGetDatum(&llen)))) != 0)
{
if (st->cur >= st->len)
{
st->len = 2 * st->len;
st->list = (LexemeEntry *) repalloc(st->list, sizeof(LexemeEntry) * st->len);
}
st->list[st->cur].lexeme = palloc(llen + 1);
memcpy(st->list[st->cur].lexeme, lex, llen);
st->list[st->cur].lexeme[llen] = '\0';
st->list[st->cur].type = type;
st->cur++;
}
FunctionCall1(&prs->prsend, PointerGetDatum(prsdata));
st->len = st->cur;
st->cur = 0;
funcctx->user_fctx = (void *) st;
if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
elog(ERROR, "return type must be a row type");
funcctx->tuple_desc = tupdesc;
funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
MemoryContextSwitchTo(oldcontext);
}
static Datum
prs_process_call(FuncCallContext *funcctx)
{
PrsStorage *st;
st = (PrsStorage *) funcctx->user_fctx;
if (st->cur < st->len)
{
Datum result;
char *values[2];
char tid[16];
HeapTuple tuple;
values[0] = tid;
sprintf(tid, "%d", st->list[st->cur].type);
values[1] = st->list[st->cur].lexeme;
tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
result = HeapTupleGetDatum(tuple);
pfree(values[1]);
st->cur++;
return result;
}
return (Datum) 0;
}
Datum
ts_parse_byid(PG_FUNCTION_ARGS)
{
FuncCallContext *funcctx;
Datum result;
if (SRF_IS_FIRSTCALL())
{
text *txt = PG_GETARG_TEXT_PP(1);
funcctx = SRF_FIRSTCALL_INIT();
prs_setup_firstcall(funcctx, fcinfo, PG_GETARG_OID(0), txt);
PG_FREE_IF_COPY(txt, 1);
}
funcctx = SRF_PERCALL_SETUP();
if ((result = prs_process_call(funcctx)) != (Datum) 0)
SRF_RETURN_NEXT(funcctx, result);
SRF_RETURN_DONE(funcctx);
}
Datum
ts_parse_byname(PG_FUNCTION_ARGS)
{
FuncCallContext *funcctx;
Datum result;
if (SRF_IS_FIRSTCALL())
{
text *prsname = PG_GETARG_TEXT_PP(0);
text *txt = PG_GETARG_TEXT_PP(1);
Oid prsId;
funcctx = SRF_FIRSTCALL_INIT();
prsId = get_ts_parser_oid(textToQualifiedNameList(prsname), false);
prs_setup_firstcall(funcctx, fcinfo, prsId, txt);
}
funcctx = SRF_PERCALL_SETUP();
if ((result = prs_process_call(funcctx)) != (Datum) 0)
SRF_RETURN_NEXT(funcctx, result);
SRF_RETURN_DONE(funcctx);
}
Datum
ts_headline_byid_opt(PG_FUNCTION_ARGS)
{
Oid tsconfig = PG_GETARG_OID(0);
text *in = PG_GETARG_TEXT_PP(1);
TSQuery query = PG_GETARG_TSQUERY(2);
text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_PP(3) : NULL;
HeadlineParsedText prs;
List *prsoptions;
text *out;
TSConfigCacheEntry *cfg;
TSParserCacheEntry *prsobj;
cfg = lookup_ts_config_cache(tsconfig);
prsobj = lookup_ts_parser_cache(cfg->prsId);
if (!OidIsValid(prsobj->headlineOid))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("text search parser does not support headline creation")));
memset(&prs, 0, sizeof(HeadlineParsedText));
prs.lenwords = 32;
prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
hlparsetext(cfg->cfgId, &prs, query,
VARDATA_ANY(in), VARSIZE_ANY_EXHDR(in));
if (opt)
prsoptions = deserialize_deflist(PointerGetDatum(opt));
else
prsoptions = NIL;
FunctionCall3(&(prsobj->prsheadline),
PointerGetDatum(&prs),
PointerGetDatum(prsoptions),
PointerGetDatum(query));
out = generateHeadline(&prs);
PG_FREE_IF_COPY(in, 1);
PG_FREE_IF_COPY(query, 2);
if (opt)
PG_FREE_IF_COPY(opt, 3);
pfree(prs.words);
pfree(prs.startsel);
pfree(prs.stopsel);
PG_RETURN_POINTER(out);
}
Datum
ts_headline_byid(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_byid_opt,
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1),
PG_GETARG_DATUM(2)));
}
Datum
ts_headline(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_byid_opt,
ObjectIdGetDatum(getTSCurrentConfig(true)),
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1)));
}
Datum
ts_headline_opt(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_byid_opt,
ObjectIdGetDatum(getTSCurrentConfig(true)),
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1),
PG_GETARG_DATUM(2)));
}
Datum
ts_headline_jsonb_byid_opt(PG_FUNCTION_ARGS)
{
Oid tsconfig = PG_GETARG_OID(0);
Jsonb *jb = PG_GETARG_JSONB_P(1);
TSQuery query = PG_GETARG_TSQUERY(2);
text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
Jsonb *out;
JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value;
HeadlineParsedText prs;
HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
memset(&prs, 0, sizeof(HeadlineParsedText));
prs.lenwords = 32;
prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
state->prs = &prs;
state->cfg = lookup_ts_config_cache(tsconfig);
state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
state->query = query;
if (opt)
state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
else
state->prsoptions = NIL;
if (!OidIsValid(state->prsobj->headlineOid))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("text search parser does not support headline creation")));
out = transform_jsonb_string_values(jb, state, action);
PG_FREE_IF_COPY(jb, 1);
PG_FREE_IF_COPY(query, 2);
if (opt)
PG_FREE_IF_COPY(opt, 3);
pfree(prs.words);
if (state->transformed)
{
pfree(prs.startsel);
pfree(prs.stopsel);
}
PG_RETURN_JSONB_P(out);
}
Datum
ts_headline_jsonb(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
ObjectIdGetDatum(getTSCurrentConfig(true)),
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1)));
}
Datum
ts_headline_jsonb_byid(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1),
PG_GETARG_DATUM(2)));
}
Datum
ts_headline_jsonb_opt(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_jsonb_byid_opt,
ObjectIdGetDatum(getTSCurrentConfig(true)),
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1),
PG_GETARG_DATUM(2)));
}
Datum
ts_headline_json_byid_opt(PG_FUNCTION_ARGS)
{
Oid tsconfig = PG_GETARG_OID(0);
text *json = PG_GETARG_TEXT_P(1);
TSQuery query = PG_GETARG_TSQUERY(2);
text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
text *out;
JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value;
HeadlineParsedText prs;
HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
memset(&prs, 0, sizeof(HeadlineParsedText));
prs.lenwords = 32;
prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
state->prs = &prs;
state->cfg = lookup_ts_config_cache(tsconfig);
state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
state->query = query;
if (opt)
state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
else
state->prsoptions = NIL;
if (!OidIsValid(state->prsobj->headlineOid))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("text search parser does not support headline creation")));
out = transform_json_string_values(json, state, action);
PG_FREE_IF_COPY(json, 1);
PG_FREE_IF_COPY(query, 2);
if (opt)
PG_FREE_IF_COPY(opt, 3);
pfree(prs.words);
if (state->transformed)
{
pfree(prs.startsel);
pfree(prs.stopsel);
}
PG_RETURN_TEXT_P(out);
}
Datum
ts_headline_json(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
ObjectIdGetDatum(getTSCurrentConfig(true)),
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1)));
}
Datum
ts_headline_json_byid(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1),
PG_GETARG_DATUM(2)));
}
Datum
ts_headline_json_opt(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_json_byid_opt,
ObjectIdGetDatum(getTSCurrentConfig(true)),
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1),
PG_GETARG_DATUM(2)));
}
/*
* Return headline in text from, generated from a json(b) element
*/
static text *
headline_json_value(void *_state, char *elem_value, int elem_len)
{
HeadlineJsonState *state = (HeadlineJsonState *) _state;
HeadlineParsedText *prs = state->prs;
TSConfigCacheEntry *cfg = state->cfg;
TSParserCacheEntry *prsobj = state->prsobj;
TSQuery query = state->query;
List *prsoptions = state->prsoptions;
prs->curwords = 0;
hlparsetext(cfg->cfgId, prs, query, elem_value, elem_len);
FunctionCall3(&(prsobj->prsheadline),
PointerGetDatum(prs),
PointerGetDatum(prsoptions),
PointerGetDatum(query));
state->transformed = true;
return generateHeadline(prs);
}