Add KOI8/WIN/ALT support

This commit is contained in:
Tatsuo Ishii 1999-03-24 07:02:17 +00:00
parent eb42c1c762
commit 5ae9d85f77
2 changed files with 296 additions and 37 deletions

View File

@ -2,7 +2,7 @@
* conversion between client encoding and server internal encoding
* (currently mule internal code (mic) is used)
* Tatsuo Ishii
* $Id: conv.c,v 1.5 1999/02/02 18:51:23 momjian Exp $
* $Id: conv.c,v 1.6 1999/03/24 07:02:16 ishii Exp $
*/
#include <stdio.h>
#include <string.h>
@ -588,6 +588,262 @@ mic2ascii(unsigned char *mic, unsigned char *p, int len)
*p = '\0';
}
/*
* Cyrillic support
* currently supported Cyrillic encodings:
*
* KOI8-R (this is the charset for the mule internal code
* for Cyrillic)
* ISO-8859-5
* Microsoft's CP1251(windows-1251)
* Alternativny Variant (MS-DOS CP866)
*/
/* koi2mic: KOI8-R to Mule internal code */
static void
koi2mic(unsigned char *l, unsigned char *p, int len)
{
latin2mic(l, p, len, LC_KOI8_R);
}
/* mic2koi: Mule internal code to KOI8-R */
static void
mic2koi(unsigned char *mic, unsigned char *p, int len)
{
mic2latin(mic, p, len, LC_KOI8_R);
}
/*
* latin2mic_with_table: a generic single byte charset encoding
* conversion from a local charset to the mule internal code.
* with a encoding conversion table.
* the table is ordered according to the local charset,
* starting from 128 (0x80). each entry in the table
* holds the corresponding code point for the mule internal code.
*/
static void
latin2mic_with_table(
unsigned char *l, /* local charset string (source) */
unsigned char *p, /* pointer to store mule internal code
(destination) */
int len, /* length of l */
int lc, /* leading character of p */
unsigned char *tab /* code conversion table */
)
{
unsigned char c1,c2;
while (len-- > 0 && (c1 = *l++)) {
if (c1 < 128) {
*p++ = c1;
} else {
c2 = tab[c1 - 128];
if (c2) {
*p++ = lc;
*p++ = c2;
} else {
*p++ = ' '; /* cannot convert */
}
}
}
*p = '\0';
}
/*
* mic2latin_with_table: a generic single byte charset encoding
* conversion from the mule internal code to a local charset
* with a encoding conversion table.
* the table is ordered according to the second byte of the mule
* internal code starting from 128 (0x80).
* each entry in the table
* holds the corresponding code point for the local code.
*/
static void
mic2latin_with_table(
unsigned char *mic, /* mule internal code (source) */
unsigned char *p, /* local code (destination) */
int len, /* length of p */
int lc, /* leading character */
unsigned char *tab /* code conversion table */
)
{
unsigned char c1,c2;
while (len-- > 0 && (c1 = *mic++)) {
if (c1 < 128) {
*p++ = c1;
} else if (c1 == lc) {
c1 = *mic++;
len--;
c2 = tab[c1 - 128];
if (c2) {
*p++ = c2;
} else {
*p++ = ' '; /* cannot convert */
}
} else {
*p++ = ' '; /* bogus character */
}
}
*p = '\0';
}
/* iso2mic: ISO-8859-5 to Mule internal code */
static void
iso2mic(unsigned char *l, unsigned char *p, int len)
{
static char iso2koi[] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xe1, 0xe2, 0xf7, 0xe7, 0xe4, 0xe5, 0xf6, 0xfa,
0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0,
0xf2, 0xf3, 0xf4, 0xf5, 0xe6, 0xe8, 0xe3, 0xfe,
0xfb, 0xfd, 0xff, 0xf9, 0xf8, 0xfc, 0xe0, 0xf1,
0xc1, 0xc2, 0xd7, 0xc7, 0xc4, 0xc5, 0xd6, 0xda,
0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
0xd2, 0xd3, 0xd4, 0xd5, 0xc6, 0xc8, 0xc3, 0xde,
0xdb, 0xdd, 0xdf, 0xd9, 0xd8, 0xdc, 0xc0, 0xd1,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
};
latin2mic_with_table(l, p, len, LC_KOI8_R, iso2koi);
}
/* mic2iso: Mule internal code to ISO8859-5 */
static void
mic2iso(unsigned char *mic, unsigned char *p, int len)
{
static char koi2iso[] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xee, 0xd0, 0xd1, 0xe6, 0xd4, 0xd5, 0xe4, 0xd3,
0xe5, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde,
0xdf, 0xef, 0xe0, 0xe1, 0xe2, 0xe3, 0xd6, 0xd2,
0xec, 0xeb, 0xd7, 0xe8, 0xed, 0xe9, 0xe7, 0xea,
0xce, 0xb0, 0xb1, 0xc6, 0xb4, 0xb5, 0xc4, 0xb3,
0xc5, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe,
0xbf, 0xcf, 0xc0, 0xc1, 0xc2, 0xc3, 0xb6, 0xb2,
0xcc, 0xcb, 0xb7, 0xc8, 0xcd, 0xc9, 0xc7, 0xca
};
mic2latin_with_table(mic, p, len, LC_KOI8_R, koi2iso);
}
/* win2mic: CP1251 to Mule internal code */
static void
win2mic(unsigned char *l, unsigned char *p, int len)
{
static char win2koi[] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0xbd, 0x00, 0x00,
0xb3, 0x00, 0xb4, 0x00, 0x00, 0x00, 0x00, 0xb7,
0x00, 0x00, 0xb6, 0xa6, 0xad, 0x00, 0x00, 0x00,
0xa3, 0x00, 0xa4, 0x00, 0x00, 0x00, 0x00, 0xa7,
0xe1, 0xe2, 0xf7, 0xe7, 0xe4, 0xe5, 0xf6, 0xfa,
0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0,
0xf2, 0xf3, 0xf4, 0xf5, 0xe6, 0xe8, 0xe3, 0xfe,
0xfb, 0xfd, 0xff, 0xf9, 0xf8, 0xfc, 0xe0, 0xf1,
0xc1, 0xc2, 0xd7, 0xc7, 0xc4, 0xc5, 0xd6, 0xda,
0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
0xd2, 0xd3, 0xd4, 0xd5, 0xc6, 0xc8, 0xc3, 0xde,
0xdb, 0xdd, 0xdf, 0xd9, 0xd8, 0xdc, 0xc0, 0xd1
};
latin2mic_with_table(l, p, len, LC_KOI8_R, win2koi);
}
/* mic2win: Mule internal code to CP1251 */
static void
mic2win(unsigned char *mic, unsigned char *p, int len)
{
static char koi2win[] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0xb8, 0xba, 0x00, 0xb3, 0xbf,
0x00, 0x00, 0x00, 0x00, 0x00, 0xb4, 0x00, 0x00,
0x00, 0x00, 0x00, 0xa8, 0xaa, 0x00, 0xb2, 0xaf,
0x00, 0x00, 0x00, 0x00, 0x00, 0xa5, 0x00, 0x00,
0xfe, 0xe0, 0xe1, 0xf6, 0xe4, 0xe5, 0xf4, 0xe3,
0xf5, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee,
0xef, 0xff, 0xf0, 0xf1, 0xf2, 0xf3, 0xe6, 0xe2,
0xfc, 0xfb, 0xe7, 0xf8, 0xfd, 0xf9, 0xf7, 0xfa,
0xde, 0xc0, 0xc1, 0xd6, 0xc4, 0xc5, 0xd4, 0xc3,
0xd5, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce,
0xcf, 0xdf, 0xd0, 0xd1, 0xd2, 0xd3, 0xc6, 0xc2,
0xdc, 0xdb, 0xc7, 0xd8, 0xdd, 0xd9, 0xd7, 0xda
};
mic2latin_with_table(mic, p, len, LC_KOI8_R, koi2win);
}
/* alt2mic: CP866 to Mule internal code */
static void
alt2mic(unsigned char *l, unsigned char *p, int len)
{
static char alt2koi[] = {
0xe1, 0xe2, 0xf7, 0xe7, 0xe4, 0xe5, 0xf6, 0xfa,
0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0,
0xf2, 0xf3, 0xf4, 0xf5, 0xe6, 0xe8, 0xe3, 0xfe,
0xfb, 0xfd, 0xff, 0xf9, 0xf8, 0xfc, 0xe0, 0xf1,
0xc1, 0xc2, 0xd7, 0xc7, 0xc4, 0xc5, 0xd6, 0xda,
0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0xbd, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xd2, 0xd3, 0xd4, 0xd5, 0xc6, 0xc8, 0xc3, 0xde,
0xdb, 0xdd, 0xdf, 0xd9, 0xd8, 0xdc, 0xc0, 0xd1,
0xb3, 0xa3, 0xb4, 0xa4, 0xb7, 0xa7, 0x00, 0x00,
0xb6, 0xa6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
};
latin2mic_with_table(l, p, len, LC_KOI8_R, alt2koi);
}
/* mic2alt: Mule internal code to CP866 */
static void
mic2alt(unsigned char *mic, unsigned char *p, int len)
{
static char koi2alt[] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0xf1, 0xf3, 0x00, 0xf9, 0xf5,
0x00, 0x00, 0x00, 0x00, 0x00, 0xad, 0x00, 0x00,
0x00, 0x00, 0x00, 0xf0, 0xf2, 0x00, 0xf8, 0xf4,
0x00, 0x00, 0x00, 0x00, 0x00, 0xbd, 0x00, 0x00,
0xee, 0xa0, 0xa1, 0xe6, 0xa4, 0xa5, 0xe4, 0xa3,
0xe5, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae,
0xaf, 0xef, 0xe0, 0xe1, 0xe2, 0xe3, 0xa6, 0xa2,
0xec, 0xeb, 0xa7, 0xe8, 0xed, 0xe9, 0xe7, 0xea,
0x9e, 0x80, 0x81, 0x96, 0x84, 0x85, 0x94, 0x83,
0x95, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
0x8f, 0x9f, 0x90, 0x91, 0x92, 0x93, 0x86, 0x82,
0x9c, 0x9b, 0x87, 0x98, 0x9d, 0x99, 0x97, 0x9a
};
mic2latin_with_table(mic, p, len, LC_KOI8_R, koi2alt);
}
/*
* end of Cyrillic support
*/
pg_encoding_conv_tbl pg_conv_tbl[] = {
{SQL_ASCII, "SQL_ASCII", 0, ascii2mic, mic2ascii}, /* SQL/ACII */
{EUC_JP, "EUC_JP", 0, euc_jp2mic, mic2euc_jp}, /* EUC_JP */
@ -600,7 +856,10 @@ pg_encoding_conv_tbl pg_conv_tbl[] = {
{LATIN2, "LATIN2", 0, latin22mic, mic2latin2}, /* ISO 8859 Latin 2 */
{LATIN3, "LATIN3", 0, latin32mic, mic2latin3}, /* ISO 8859 Latin 3 */
{LATIN4, "LATIN4", 0, latin42mic, mic2latin4}, /* ISO 8859 Latin 4 */
{LATIN5, "LATIN5", 0, latin52mic, mic2latin5}, /* ISO 8859 Latin 5 */
{LATIN5, "LATIN5", 0, iso2mic, mic2iso}, /* ISO 8859 Latin 5 */
{KOI8, "KOI8", 0, koi2mic, mic2koi}, /* KOI8-R */
{WIN, "WIN", 0, win2mic, mic2win}, /* CP1251 */
{ALT, "ALT", 0, alt2mic, mic2alt}, /* CP866 */
{SJIS, "SJIS", 1, sjis2mic, mic2sjis}, /* SJIS */
{BIG5, "BIG5", 1, big52mic, mic2big5}, /* Big5 */
{-1, "", 0, 0, 0} /* end mark */

View File

@ -1,7 +1,7 @@
/*
* conversion functions between pg_wchar and multi-byte streams.
* Tatsuo Ishii
* $Id: wchar.c,v 1.5 1999/02/02 18:51:23 momjian Exp $
* $Id: wchar.c,v 1.6 1999/03/24 07:02:17 ishii Exp $
*/
#include "mb/pg_wchar.h"
@ -416,40 +416,40 @@ pg_big5_mblen(const unsigned char *s)
}
pg_wchar_tbl pg_wchar_table[] = {
{pg_ascii2wchar_with_len, pg_ascii_mblen},
{pg_eucjp2wchar_with_len, pg_eucjp_mblen},
{pg_euccn2wchar_with_len, pg_euccn_mblen},
{pg_euckr2wchar_with_len, pg_euckr_mblen},
{pg_euctw2wchar_with_len, pg_euctw_mblen},
{pg_utf2wchar_with_len, pg_utf_mblen},
{pg_mule2wchar_with_len, pg_mule_mblen},
{pg_latin12wchar_with_len, pg_latin1_mblen},
{pg_latin12wchar_with_len, pg_latin1_mblen},
{pg_latin12wchar_with_len, pg_latin1_mblen},
{pg_latin12wchar_with_len, pg_latin1_mblen},
{pg_latin12wchar_with_len, pg_latin1_mblen},
{0, 0},
{0, 0},
{0, 0},
{0, 0},
{0, 0},
{0, 0},
{0, 0},
{0, 0},
{0, 0},
{0, 0},
{0, 0},
{0, 0},
{0, 0},
{0, 0},
{0, 0},
{0, 0},
{0, 0},
{0, 0},
{0, 0},
{0, 0},
{0, pg_sjis_mblen},
{0, pg_big5_mblen}
{pg_ascii2wchar_with_len, pg_ascii_mblen}, /* 0 */
{pg_eucjp2wchar_with_len, pg_eucjp_mblen}, /* 1 */
{pg_euccn2wchar_with_len, pg_euccn_mblen}, /* 2 */
{pg_euckr2wchar_with_len, pg_euckr_mblen}, /* 3 */
{pg_euctw2wchar_with_len, pg_euctw_mblen}, /* 4 */
{pg_utf2wchar_with_len, pg_utf_mblen}, /* 5 */
{pg_mule2wchar_with_len, pg_mule_mblen}, /* 6 */
{pg_latin12wchar_with_len, pg_latin1_mblen}, /* 7 */
{pg_latin12wchar_with_len, pg_latin1_mblen}, /* 8 */
{pg_latin12wchar_with_len, pg_latin1_mblen}, /* 9 */
{pg_latin12wchar_with_len, pg_latin1_mblen}, /* 10 */
{pg_latin12wchar_with_len, pg_latin1_mblen}, /* 11 */
{pg_latin12wchar_with_len, pg_latin1_mblen}, /* 12 */
{pg_latin12wchar_with_len, pg_latin1_mblen}, /* 13 */
{pg_latin12wchar_with_len, pg_latin1_mblen}, /* 14 */
{pg_latin12wchar_with_len, pg_latin1_mblen}, /* 15 */
{pg_latin12wchar_with_len, pg_latin1_mblen}, /* 16 */
{pg_latin12wchar_with_len, pg_latin1_mblen}, /* 17 */
{pg_latin12wchar_with_len, pg_latin1_mblen}, /* 18 */
{pg_latin12wchar_with_len, pg_latin1_mblen}, /* 19 */
{pg_latin12wchar_with_len, pg_latin1_mblen}, /* 20 */
{pg_latin12wchar_with_len, pg_latin1_mblen}, /* 21 */
{pg_latin12wchar_with_len, pg_latin1_mblen}, /* 22 */
{pg_latin12wchar_with_len, pg_latin1_mblen}, /* 23 */
{pg_latin12wchar_with_len, pg_latin1_mblen}, /* 24 */
{pg_latin12wchar_with_len, pg_latin1_mblen}, /* 25 */
{pg_latin12wchar_with_len, pg_latin1_mblen}, /* 26 */
{pg_latin12wchar_with_len, pg_latin1_mblen}, /* 27 */
{pg_latin12wchar_with_len, pg_latin1_mblen}, /* 28 */
{pg_latin12wchar_with_len, pg_latin1_mblen}, /* 29 */
{pg_latin12wchar_with_len, pg_latin1_mblen}, /* 30 */
{pg_latin12wchar_with_len, pg_latin1_mblen}, /* 31 */
{0, pg_sjis_mblen}, /* 32 */
{0, pg_big5_mblen} /* 33 */
};
/* returns the byte length of a word for mule internal code */