Sync our Snowball stemmer dictionaries with current upstream

The main change is a new stemmer for Greek.  There are minor changes
in the Danish and French stemmers.

Author: Panagiotis Mavrogiorgos <pmav99@gmail.com>
This commit is contained in:
Peter Eisentraut 2019-07-04 13:10:41 +02:00
parent dedb6e0143
commit 7b925e1270
14 changed files with 5052 additions and 693 deletions

View File

@ -3810,6 +3810,7 @@ Parser: "pg_catalog.default"
pg_catalog | finnish_stem | snowball stemmer for finnish language
pg_catalog | french_stem | snowball stemmer for french language
pg_catalog | german_stem | snowball stemmer for german language
pg_catalog | greek_stem | snowball stemmer for greek language
pg_catalog | hungarian_stem | snowball stemmer for hungarian language
pg_catalog | indonesian_stem | snowball stemmer for indonesian language
pg_catalog | irish_stem | snowball stemmer for irish language

View File

@ -41,6 +41,7 @@ OBJS= $(WIN32RES) dict_snowball.o api.o utilities.o \
stem_UTF_8_finnish.o \
stem_UTF_8_french.o \
stem_UTF_8_german.o \
stem_UTF_8_greek.o \
stem_UTF_8_hungarian.o \
stem_UTF_8_indonesian.o \
stem_UTF_8_irish.o \
@ -69,6 +70,7 @@ LANGUAGES= \
finnish finnish \
french french \
german german \
greek greek \
hungarian hungarian \
indonesian indonesian \
irish irish \

View File

@ -29,8 +29,8 @@ We choose to include the derived files in the PostgreSQL distribution
because most installations will not have the Snowball compiler available.
We are currently synced with the Snowball git commit
1964ce688cbeca505263c8f77e16ed923296ce7a
of 2018-06-29.
4456b82c26c02493e8807a66f30593a98c5d2888
of 2019-06-24.
To update the PostgreSQL sources from a new Snowball version:
@ -57,7 +57,7 @@ do not require any changes.
4. Check whether any stemmer modules have been added or removed. If so, edit
the OBJS list in Makefile, the list of #include's in dict_snowball.c, and the
stemmer_modules[] table in dict_snowball.c. You might also need to change
the LANGUAGES list in Makefile.
the LANGUAGES list in Makefile and tsearch_config_languages in initdb.c.
5. The various stopword files in stopwords/ must be downloaded
individually from pages on the snowballstem.org website.

View File

@ -50,6 +50,7 @@
#include "snowball/libstemmer/stem_UTF_8_finnish.h"
#include "snowball/libstemmer/stem_UTF_8_french.h"
#include "snowball/libstemmer/stem_UTF_8_german.h"
#include "snowball/libstemmer/stem_UTF_8_greek.h"
#include "snowball/libstemmer/stem_UTF_8_hungarian.h"
#include "snowball/libstemmer/stem_UTF_8_indonesian.h"
#include "snowball/libstemmer/stem_UTF_8_irish.h"
@ -115,6 +116,7 @@ static const stemmer_module stemmer_modules[] =
STEMMER_MODULE(finnish, PG_UTF8, UTF_8),
STEMMER_MODULE(french, PG_UTF8, UTF_8),
STEMMER_MODULE(german, PG_UTF8, UTF_8),
STEMMER_MODULE(greek, PG_UTF8, UTF_8),
STEMMER_MODULE(hungarian, PG_UTF8, UTF_8),
STEMMER_MODULE(indonesian, PG_UTF8, UTF_8),
STEMMER_MODULE(irish, PG_UTF8, UTF_8),

View File

@ -124,6 +124,8 @@ static const struct among a_2[5] =
/* 4 */ { 4, s_2_4, -1, 2, 0}
};
static const unsigned char g_c[] = { 119, 223, 119, 1 };
static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
static const unsigned char g_s_ending[] = { 239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 };
@ -133,25 +135,25 @@ static const symbol s_1[] = { 'i', 'g' };
static const symbol s_2[] = { 'l', 0xF8, 's' };
static int r_mark_regions(struct SN_env * z) { /* forwardmode */
z->I[0] = z->l; /* $p1 = <integer expression>, line 31 */
{ int c_test1 = z->c; /* test, line 33 */
{ int ret = z->c + 3; /* hop, line 33 */
z->I[0] = z->l; /* $p1 = <integer expression>, line 33 */
{ int c_test1 = z->c; /* test, line 35 */
{ int ret = z->c + 3; /* hop, line 35 */
if (0 > ret || ret > z->l) return 0;
z->c = ret;
}
z->I[1] = z->c; /* setmark x, line 33 */
z->I[1] = z->c; /* setmark x, line 35 */
z->c = c_test1;
}
if (out_grouping(z, g_v, 97, 248, 1) < 0) return 0; /* goto */ /* grouping v, line 34 */
{ /* gopast */ /* non v, line 34 */
if (out_grouping(z, g_v, 97, 248, 1) < 0) return 0; /* goto */ /* grouping v, line 36 */
{ /* gopast */ /* non v, line 36 */
int ret = in_grouping(z, g_v, 97, 248, 1);
if (ret < 0) return 0;
z->c += ret;
}
z->I[0] = z->c; /* setmark p1, line 34 */
/* try, line 35 */
if (!(z->I[0] < z->I[1])) goto lab0; /* $(<integer expression> < <integer expression>), line 35 */
z->I[0] = z->I[1]; /* $p1 = <integer expression>, line 35 */
z->I[0] = z->c; /* setmark p1, line 36 */
/* try, line 37 */
if (!(z->I[0] < z->I[1])) goto lab0; /* $(<integer expression> < <integer expression>), line 37 */
z->I[0] = z->I[1]; /* $p1 = <integer expression>, line 37 */
lab0:
return 1;
}
@ -159,25 +161,25 @@ lab0:
static int r_main_suffix(struct SN_env * z) { /* backwardmode */
int among_var;
{ int mlimit1; /* setlimit, line 41 */
{ int mlimit1; /* setlimit, line 43 */
if (z->c < z->I[0]) return 0;
mlimit1 = z->lb; z->lb = z->I[0];
z->ket = z->c; /* [, line 41 */
if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851440 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit1; return 0; } /* substring, line 41 */
z->ket = z->c; /* [, line 43 */
if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851440 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit1; return 0; } /* substring, line 43 */
among_var = find_among_b(z, a_0, 32);
if (!(among_var)) { z->lb = mlimit1; return 0; }
z->bra = z->c; /* ], line 41 */
z->bra = z->c; /* ], line 43 */
z->lb = mlimit1;
}
switch (among_var) { /* among, line 42 */
switch (among_var) { /* among, line 44 */
case 1:
{ int ret = slice_del(z); /* delete, line 48 */
{ int ret = slice_del(z); /* delete, line 50 */
if (ret < 0) return ret;
}
break;
case 2:
if (in_grouping_b(z, g_s_ending, 97, 229, 0)) return 0; /* grouping s_ending, line 50 */
{ int ret = slice_del(z); /* delete, line 50 */
if (in_grouping_b(z, g_s_ending, 97, 229, 0)) return 0; /* grouping s_ending, line 52 */
{ int ret = slice_del(z); /* delete, line 52 */
if (ret < 0) return ret;
}
break;
@ -186,23 +188,23 @@ static int r_main_suffix(struct SN_env * z) { /* backwardmode */
}
static int r_consonant_pair(struct SN_env * z) { /* backwardmode */
{ int m_test1 = z->l - z->c; /* test, line 55 */
{ int m_test1 = z->l - z->c; /* test, line 57 */
{ int mlimit2; /* setlimit, line 56 */
{ int mlimit2; /* setlimit, line 58 */
if (z->c < z->I[0]) return 0;
mlimit2 = z->lb; z->lb = z->I[0];
z->ket = z->c; /* [, line 56 */
if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 116)) { z->lb = mlimit2; return 0; } /* substring, line 56 */
z->ket = z->c; /* [, line 58 */
if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 116)) { z->lb = mlimit2; return 0; } /* substring, line 58 */
if (!(find_among_b(z, a_1, 4))) { z->lb = mlimit2; return 0; }
z->bra = z->c; /* ], line 56 */
z->bra = z->c; /* ], line 58 */
z->lb = mlimit2;
}
z->c = z->l - m_test1;
}
if (z->c <= z->lb) return 0;
z->c--; /* next, line 62 */
z->bra = z->c; /* ], line 62 */
{ int ret = slice_del(z); /* delete, line 62 */
z->c--; /* next, line 64 */
z->bra = z->c; /* ], line 64 */
{ int ret = slice_del(z); /* delete, line 64 */
if (ret < 0) return ret;
}
return 1;
@ -210,35 +212,35 @@ static int r_consonant_pair(struct SN_env * z) { /* backwardmode */
static int r_other_suffix(struct SN_env * z) { /* backwardmode */
int among_var;
{ int m1 = z->l - z->c; (void)m1; /* do, line 66 */
z->ket = z->c; /* [, line 66 */
if (!(eq_s_b(z, 2, s_0))) goto lab0; /* literal, line 66 */
z->bra = z->c; /* ], line 66 */
if (!(eq_s_b(z, 2, s_1))) goto lab0; /* literal, line 66 */
{ int ret = slice_del(z); /* delete, line 66 */
{ int m1 = z->l - z->c; (void)m1; /* do, line 68 */
z->ket = z->c; /* [, line 68 */
if (!(eq_s_b(z, 2, s_0))) goto lab0; /* literal, line 68 */
z->bra = z->c; /* ], line 68 */
if (!(eq_s_b(z, 2, s_1))) goto lab0; /* literal, line 68 */
{ int ret = slice_del(z); /* delete, line 68 */
if (ret < 0) return ret;
}
lab0:
z->c = z->l - m1;
}
{ int mlimit2; /* setlimit, line 67 */
{ int mlimit2; /* setlimit, line 69 */
if (z->c < z->I[0]) return 0;
mlimit2 = z->lb; z->lb = z->I[0];
z->ket = z->c; /* [, line 67 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit2; return 0; } /* substring, line 67 */
z->ket = z->c; /* [, line 69 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit2; return 0; } /* substring, line 69 */
among_var = find_among_b(z, a_2, 5);
if (!(among_var)) { z->lb = mlimit2; return 0; }
z->bra = z->c; /* ], line 67 */
z->bra = z->c; /* ], line 69 */
z->lb = mlimit2;
}
switch (among_var) { /* among, line 68 */
switch (among_var) { /* among, line 70 */
case 1:
{ int ret = slice_del(z); /* delete, line 70 */
{ int ret = slice_del(z); /* delete, line 72 */
if (ret < 0) return ret;
}
{ int m3 = z->l - z->c; (void)m3; /* do, line 70 */
{ int ret = r_consonant_pair(z); /* call consonant_pair, line 70 */
{ int m3 = z->l - z->c; (void)m3; /* do, line 72 */
{ int ret = r_consonant_pair(z); /* call consonant_pair, line 72 */
if (ret == 0) goto lab1;
if (ret < 0) return ret;
}
@ -247,7 +249,7 @@ static int r_other_suffix(struct SN_env * z) { /* backwardmode */
}
break;
case 2:
{ int ret = slice_from_s(z, 3, s_2); /* <-, line 72 */
{ int ret = slice_from_s(z, 3, s_2); /* <-, line 74 */
if (ret < 0) return ret;
}
break;
@ -257,60 +259,60 @@ static int r_other_suffix(struct SN_env * z) { /* backwardmode */
static int r_undouble(struct SN_env * z) { /* backwardmode */
{ int mlimit1; /* setlimit, line 76 */
{ int mlimit1; /* setlimit, line 78 */
if (z->c < z->I[0]) return 0;
mlimit1 = z->lb; z->lb = z->I[0];
z->ket = z->c; /* [, line 76 */
if (out_grouping_b(z, g_v, 97, 248, 0)) { z->lb = mlimit1; return 0; } /* non v, line 76 */
z->bra = z->c; /* ], line 76 */
z->S[0] = slice_to(z, z->S[0]); /* -> ch, line 76 */
if (z->S[0] == 0) return -1; /* -> ch, line 76 */
z->ket = z->c; /* [, line 78 */
if (in_grouping_b(z, g_c, 98, 122, 0)) { z->lb = mlimit1; return 0; } /* grouping c, line 78 */
z->bra = z->c; /* ], line 78 */
z->S[0] = slice_to(z, z->S[0]); /* -> ch, line 78 */
if (z->S[0] == 0) return -1; /* -> ch, line 78 */
z->lb = mlimit1;
}
if (!(eq_v_b(z, z->S[0]))) return 0; /* name ch, line 77 */
{ int ret = slice_del(z); /* delete, line 78 */
if (!(eq_v_b(z, z->S[0]))) return 0; /* name ch, line 79 */
{ int ret = slice_del(z); /* delete, line 80 */
if (ret < 0) return ret;
}
return 1;
}
extern int danish_ISO_8859_1_stem(struct SN_env * z) { /* forwardmode */
{ int c1 = z->c; /* do, line 84 */
{ int ret = r_mark_regions(z); /* call mark_regions, line 84 */
{ int c1 = z->c; /* do, line 86 */
{ int ret = r_mark_regions(z); /* call mark_regions, line 86 */
if (ret == 0) goto lab0;
if (ret < 0) return ret;
}
lab0:
z->c = c1;
}
z->lb = z->c; z->c = z->l; /* backwards, line 85 */
z->lb = z->c; z->c = z->l; /* backwards, line 87 */
{ int m2 = z->l - z->c; (void)m2; /* do, line 86 */
{ int ret = r_main_suffix(z); /* call main_suffix, line 86 */
{ int m2 = z->l - z->c; (void)m2; /* do, line 88 */
{ int ret = r_main_suffix(z); /* call main_suffix, line 88 */
if (ret == 0) goto lab1;
if (ret < 0) return ret;
}
lab1:
z->c = z->l - m2;
}
{ int m3 = z->l - z->c; (void)m3; /* do, line 87 */
{ int ret = r_consonant_pair(z); /* call consonant_pair, line 87 */
{ int m3 = z->l - z->c; (void)m3; /* do, line 89 */
{ int ret = r_consonant_pair(z); /* call consonant_pair, line 89 */
if (ret == 0) goto lab2;
if (ret < 0) return ret;
}
lab2:
z->c = z->l - m3;
}
{ int m4 = z->l - z->c; (void)m4; /* do, line 88 */
{ int ret = r_other_suffix(z); /* call other_suffix, line 88 */
{ int m4 = z->l - z->c; (void)m4; /* do, line 90 */
{ int ret = r_other_suffix(z); /* call other_suffix, line 90 */
if (ret == 0) goto lab3;
if (ret < 0) return ret;
}
lab3:
z->c = z->l - m4;
}
{ int m5 = z->l - z->c; (void)m5; /* do, line 89 */
{ int ret = r_undouble(z); /* call undouble, line 89 */
{ int m5 = z->l - z->c; (void)m5; /* do, line 91 */
{ int ret = r_undouble(z); /* call undouble, line 91 */
if (ret == 0) goto lab4;
if (ret < 0) return ret;
}

File diff suppressed because it is too large Load Diff

View File

@ -124,6 +124,8 @@ static const struct among a_2[5] =
/* 4 */ { 5, s_2_4, -1, 2, 0}
};
static const unsigned char g_c[] = { 119, 223, 119, 1 };
static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
static const unsigned char g_s_ending[] = { 239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 };
@ -133,25 +135,25 @@ static const symbol s_1[] = { 'i', 'g' };
static const symbol s_2[] = { 'l', 0xC3, 0xB8, 's' };
static int r_mark_regions(struct SN_env * z) { /* forwardmode */
z->I[0] = z->l; /* $p1 = <integer expression>, line 31 */
{ int c_test1 = z->c; /* test, line 33 */
{ int ret = skip_utf8(z->p, z->c, 0, z->l, + 3); /* hop, line 33 */
z->I[0] = z->l; /* $p1 = <integer expression>, line 33 */
{ int c_test1 = z->c; /* test, line 35 */
{ int ret = skip_utf8(z->p, z->c, 0, z->l, + 3); /* hop, line 35 */
if (ret < 0) return 0;
z->c = ret;
}
z->I[1] = z->c; /* setmark x, line 33 */
z->I[1] = z->c; /* setmark x, line 35 */
z->c = c_test1;
}
if (out_grouping_U(z, g_v, 97, 248, 1) < 0) return 0; /* goto */ /* grouping v, line 34 */
{ /* gopast */ /* non v, line 34 */
if (out_grouping_U(z, g_v, 97, 248, 1) < 0) return 0; /* goto */ /* grouping v, line 36 */
{ /* gopast */ /* non v, line 36 */
int ret = in_grouping_U(z, g_v, 97, 248, 1);
if (ret < 0) return 0;
z->c += ret;
}
z->I[0] = z->c; /* setmark p1, line 34 */
/* try, line 35 */
if (!(z->I[0] < z->I[1])) goto lab0; /* $(<integer expression> < <integer expression>), line 35 */
z->I[0] = z->I[1]; /* $p1 = <integer expression>, line 35 */
z->I[0] = z->c; /* setmark p1, line 36 */
/* try, line 37 */
if (!(z->I[0] < z->I[1])) goto lab0; /* $(<integer expression> < <integer expression>), line 37 */
z->I[0] = z->I[1]; /* $p1 = <integer expression>, line 37 */
lab0:
return 1;
}
@ -159,25 +161,25 @@ lab0:
static int r_main_suffix(struct SN_env * z) { /* backwardmode */
int among_var;
{ int mlimit1; /* setlimit, line 41 */
{ int mlimit1; /* setlimit, line 43 */
if (z->c < z->I[0]) return 0;
mlimit1 = z->lb; z->lb = z->I[0];
z->ket = z->c; /* [, line 41 */
if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851440 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit1; return 0; } /* substring, line 41 */
z->ket = z->c; /* [, line 43 */
if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851440 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit1; return 0; } /* substring, line 43 */
among_var = find_among_b(z, a_0, 32);
if (!(among_var)) { z->lb = mlimit1; return 0; }
z->bra = z->c; /* ], line 41 */
z->bra = z->c; /* ], line 43 */
z->lb = mlimit1;
}
switch (among_var) { /* among, line 42 */
switch (among_var) { /* among, line 44 */
case 1:
{ int ret = slice_del(z); /* delete, line 48 */
{ int ret = slice_del(z); /* delete, line 50 */
if (ret < 0) return ret;
}
break;
case 2:
if (in_grouping_b_U(z, g_s_ending, 97, 229, 0)) return 0; /* grouping s_ending, line 50 */
{ int ret = slice_del(z); /* delete, line 50 */
if (in_grouping_b_U(z, g_s_ending, 97, 229, 0)) return 0; /* grouping s_ending, line 52 */
{ int ret = slice_del(z); /* delete, line 52 */
if (ret < 0) return ret;
}
break;
@ -186,25 +188,25 @@ static int r_main_suffix(struct SN_env * z) { /* backwardmode */
}
static int r_consonant_pair(struct SN_env * z) { /* backwardmode */
{ int m_test1 = z->l - z->c; /* test, line 55 */
{ int m_test1 = z->l - z->c; /* test, line 57 */
{ int mlimit2; /* setlimit, line 56 */
{ int mlimit2; /* setlimit, line 58 */
if (z->c < z->I[0]) return 0;
mlimit2 = z->lb; z->lb = z->I[0];
z->ket = z->c; /* [, line 56 */
if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 116)) { z->lb = mlimit2; return 0; } /* substring, line 56 */
z->ket = z->c; /* [, line 58 */
if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 116)) { z->lb = mlimit2; return 0; } /* substring, line 58 */
if (!(find_among_b(z, a_1, 4))) { z->lb = mlimit2; return 0; }
z->bra = z->c; /* ], line 56 */
z->bra = z->c; /* ], line 58 */
z->lb = mlimit2;
}
z->c = z->l - m_test1;
}
{ int ret = skip_utf8(z->p, z->c, z->lb, 0, -1);
if (ret < 0) return 0;
z->c = ret; /* next, line 62 */
z->c = ret; /* next, line 64 */
}
z->bra = z->c; /* ], line 62 */
{ int ret = slice_del(z); /* delete, line 62 */
z->bra = z->c; /* ], line 64 */
{ int ret = slice_del(z); /* delete, line 64 */
if (ret < 0) return ret;
}
return 1;
@ -212,35 +214,35 @@ static int r_consonant_pair(struct SN_env * z) { /* backwardmode */
static int r_other_suffix(struct SN_env * z) { /* backwardmode */
int among_var;
{ int m1 = z->l - z->c; (void)m1; /* do, line 66 */
z->ket = z->c; /* [, line 66 */
if (!(eq_s_b(z, 2, s_0))) goto lab0; /* literal, line 66 */
z->bra = z->c; /* ], line 66 */
if (!(eq_s_b(z, 2, s_1))) goto lab0; /* literal, line 66 */
{ int ret = slice_del(z); /* delete, line 66 */
{ int m1 = z->l - z->c; (void)m1; /* do, line 68 */
z->ket = z->c; /* [, line 68 */
if (!(eq_s_b(z, 2, s_0))) goto lab0; /* literal, line 68 */
z->bra = z->c; /* ], line 68 */
if (!(eq_s_b(z, 2, s_1))) goto lab0; /* literal, line 68 */
{ int ret = slice_del(z); /* delete, line 68 */
if (ret < 0) return ret;
}
lab0:
z->c = z->l - m1;
}
{ int mlimit2; /* setlimit, line 67 */
{ int mlimit2; /* setlimit, line 69 */
if (z->c < z->I[0]) return 0;
mlimit2 = z->lb; z->lb = z->I[0];
z->ket = z->c; /* [, line 67 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit2; return 0; } /* substring, line 67 */
z->ket = z->c; /* [, line 69 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit2; return 0; } /* substring, line 69 */
among_var = find_among_b(z, a_2, 5);
if (!(among_var)) { z->lb = mlimit2; return 0; }
z->bra = z->c; /* ], line 67 */
z->bra = z->c; /* ], line 69 */
z->lb = mlimit2;
}
switch (among_var) { /* among, line 68 */
switch (among_var) { /* among, line 70 */
case 1:
{ int ret = slice_del(z); /* delete, line 70 */
{ int ret = slice_del(z); /* delete, line 72 */
if (ret < 0) return ret;
}
{ int m3 = z->l - z->c; (void)m3; /* do, line 70 */
{ int ret = r_consonant_pair(z); /* call consonant_pair, line 70 */
{ int m3 = z->l - z->c; (void)m3; /* do, line 72 */
{ int ret = r_consonant_pair(z); /* call consonant_pair, line 72 */
if (ret == 0) goto lab1;
if (ret < 0) return ret;
}
@ -249,7 +251,7 @@ static int r_other_suffix(struct SN_env * z) { /* backwardmode */
}
break;
case 2:
{ int ret = slice_from_s(z, 4, s_2); /* <-, line 72 */
{ int ret = slice_from_s(z, 4, s_2); /* <-, line 74 */
if (ret < 0) return ret;
}
break;
@ -259,60 +261,60 @@ static int r_other_suffix(struct SN_env * z) { /* backwardmode */
static int r_undouble(struct SN_env * z) { /* backwardmode */
{ int mlimit1; /* setlimit, line 76 */
{ int mlimit1; /* setlimit, line 78 */
if (z->c < z->I[0]) return 0;
mlimit1 = z->lb; z->lb = z->I[0];
z->ket = z->c; /* [, line 76 */
if (out_grouping_b_U(z, g_v, 97, 248, 0)) { z->lb = mlimit1; return 0; } /* non v, line 76 */
z->bra = z->c; /* ], line 76 */
z->S[0] = slice_to(z, z->S[0]); /* -> ch, line 76 */
if (z->S[0] == 0) return -1; /* -> ch, line 76 */
z->ket = z->c; /* [, line 78 */
if (in_grouping_b_U(z, g_c, 98, 122, 0)) { z->lb = mlimit1; return 0; } /* grouping c, line 78 */
z->bra = z->c; /* ], line 78 */
z->S[0] = slice_to(z, z->S[0]); /* -> ch, line 78 */
if (z->S[0] == 0) return -1; /* -> ch, line 78 */
z->lb = mlimit1;
}
if (!(eq_v_b(z, z->S[0]))) return 0; /* name ch, line 77 */
{ int ret = slice_del(z); /* delete, line 78 */
if (!(eq_v_b(z, z->S[0]))) return 0; /* name ch, line 79 */
{ int ret = slice_del(z); /* delete, line 80 */
if (ret < 0) return ret;
}
return 1;
}
extern int danish_UTF_8_stem(struct SN_env * z) { /* forwardmode */
{ int c1 = z->c; /* do, line 84 */
{ int ret = r_mark_regions(z); /* call mark_regions, line 84 */
{ int c1 = z->c; /* do, line 86 */
{ int ret = r_mark_regions(z); /* call mark_regions, line 86 */
if (ret == 0) goto lab0;
if (ret < 0) return ret;
}
lab0:
z->c = c1;
}
z->lb = z->c; z->c = z->l; /* backwards, line 85 */
z->lb = z->c; z->c = z->l; /* backwards, line 87 */
{ int m2 = z->l - z->c; (void)m2; /* do, line 86 */
{ int ret = r_main_suffix(z); /* call main_suffix, line 86 */
{ int m2 = z->l - z->c; (void)m2; /* do, line 88 */
{ int ret = r_main_suffix(z); /* call main_suffix, line 88 */
if (ret == 0) goto lab1;
if (ret < 0) return ret;
}
lab1:
z->c = z->l - m2;
}
{ int m3 = z->l - z->c; (void)m3; /* do, line 87 */
{ int ret = r_consonant_pair(z); /* call consonant_pair, line 87 */
{ int m3 = z->l - z->c; (void)m3; /* do, line 89 */
{ int ret = r_consonant_pair(z); /* call consonant_pair, line 89 */
if (ret == 0) goto lab2;
if (ret < 0) return ret;
}
lab2:
z->c = z->l - m3;
}
{ int m4 = z->l - z->c; (void)m4; /* do, line 88 */
{ int ret = r_other_suffix(z); /* call other_suffix, line 88 */
{ int m4 = z->l - z->c; (void)m4; /* do, line 90 */
{ int ret = r_other_suffix(z); /* call other_suffix, line 90 */
if (ret == 0) goto lab3;
if (ret < 0) return ret;
}
lab3:
z->c = z->l - m4;
}
{ int m5 = z->l - z->c; (void)m5; /* do, line 89 */
{ int ret = r_undouble(z); /* call undouble, line 89 */
{ int m5 = z->l - z->c; (void)m5; /* do, line 91 */
{ int ret = r_undouble(z); /* call undouble, line 91 */
if (ret == 0) goto lab4;
if (ret < 0) return ret;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -59,31 +59,49 @@ extern int skip_utf8(const symbol * p, int c, int lb, int l, int n) {
/* Code for character groupings: utf8 cases */
static int get_utf8(const symbol * p, int c, int l, int * slot) {
int b0, b1;
int b0, b1, b2;
if (c >= l) return 0;
b0 = p[c++];
if (b0 < 0xC0 || c == l) { /* 1100 0000 */
* slot = b0; return 1;
*slot = b0;
return 1;
}
b1 = p[c++];
b1 = p[c++] & 0x3F;
if (b0 < 0xE0 || c == l) { /* 1110 0000 */
* slot = (b0 & 0x1F) << 6 | (b1 & 0x3F); return 2;
*slot = (b0 & 0x1F) << 6 | b1;
return 2;
}
* slot = (b0 & 0xF) << 12 | (b1 & 0x3F) << 6 | (p[c] & 0x3F); return 3;
b2 = p[c++] & 0x3F;
if (b0 < 0xF0 || c == l) { /* 1111 0000 */
*slot = (b0 & 0xF) << 12 | b1 << 6 | b2;
return 3;
}
*slot = (b0 & 0xE) << 18 | b1 << 12 | b2 << 6 | (p[c] & 0x3F);
return 4;
}
static int get_b_utf8(const symbol * p, int c, int lb, int * slot) {
int b0, b1;
int a, b;
if (c <= lb) return 0;
b0 = p[--c];
if (b0 < 0x80 || c == lb) { /* 1000 0000 */
* slot = b0; return 1;
b = p[--c];
if (b < 0x80 || c == lb) { /* 1000 0000 */
*slot = b;
return 1;
}
b1 = p[--c];
if (b1 >= 0xC0 || c == lb) { /* 1100 0000 */
* slot = (b1 & 0x1F) << 6 | (b0 & 0x3F); return 2;
a = b & 0x3F;
b = p[--c];
if (b >= 0xC0 || c == lb) { /* 1100 0000 */
*slot = (b & 0x1F) << 6 | a;
return 2;
}
* slot = (p[--c] & 0xF) << 12 | (b1 & 0x3F) << 6 | (b0 & 0x3F); return 3;
a |= (b & 0x3F) << 6;
b = p[--c];
if (b >= 0xE0 || c == lb) { /* 1110 0000 */
*slot = (b & 0xF) << 12 | a;
return 3;
}
*slot = (p[--c] & 0xE) << 18 | (b & 0x3F) << 12 | a;
return 4;
}
extern int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
@ -230,8 +248,13 @@ extern int find_among(struct SN_env * z, const struct among * v, int v_size) {
common++;
}
}
if (diff < 0) { j = k; common_j = common; }
else { i = k; common_i = common; }
if (diff < 0) {
j = k;
common_j = common;
} else {
i = k;
common_i = common;
}
if (j - i <= 1) {
if (i > 0) break; /* v->s has been inspected */
if (j == i) break; /* only one item in v */
@ -360,9 +383,8 @@ extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const
z->l += adjustment;
if (z->c >= c_ket)
z->c += adjustment;
else
if (z->c > c_bra)
z->c = c_bra;
else if (z->c > c_bra)
z->c = c_bra;
}
if (s_size) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
if (adjptr != NULL)

View File

@ -716,6 +716,8 @@ static const struct tsearch_config_match tsearch_config_languages[] =
{"french", "French"},
{"german", "de"},
{"german", "German"},
{"greek", "el"},
{"greek", "Greek"},
{"hungarian", "hu"},
{"hungarian", "Hungarian"},
{"indonesian", "id"},

View File

@ -53,6 +53,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 201906161
#define CATALOG_VERSION_NO 201907041
#endif

View File

@ -19,8 +19,15 @@ struct SN_env {
unsigned char * B;
};
#ifdef __cplusplus
extern "C" {
#endif
extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
extern void SN_close_env(struct SN_env * z, int S_size);
extern int SN_set_current(struct SN_env * z, int size, const symbol * s);
#ifdef __cplusplus
}
#endif

View File

@ -0,0 +1,16 @@
/* This file was generated automatically by the Snowball to ISO C compiler */
/* http://snowballstem.org/ */
#ifdef __cplusplus
extern "C" {
#endif
extern struct SN_env * greek_UTF_8_create_env(void);
extern void greek_UTF_8_close_env(struct SN_env * z);
extern int greek_UTF_8_stem(struct SN_env * z);
#ifdef __cplusplus
}
#endif