postgresql/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl

72 lines
1.6 KiB
Perl
Executable File

#! /usr/bin/perl
#
# Copyright (c) 2001-2020, PostgreSQL Global Development Group
#
# src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl
#
# Generate UTF-8 <--> EUC_TW code conversion tables from
# map files provided by Unicode organization.
# Unfortunately it is prohibited by the organization
# to distribute the map files. So if you try to use this script,
# you have to obtain CNS11643.TXT from
# the organization's ftp site.
#
# CNS11643.TXT format:
# CNS11643 code in hex (3 bytes)
# (I guess the first byte means the plane No.)
# UCS-2 code in hex
# # and Unicode name (not used in this script)
use strict;
use warnings;
use convutils;
my $this_script = 'src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl';
my $mapping = &read_source("CNS11643.TXT");
my @extras;
foreach my $i (@$mapping)
{
my $ucs = $i->{ucs};
my $code = $i->{code};
my $origcode = $i->{code};
my $plane = ($code & 0x1f0000) >> 16;
if ($plane > 16)
{
printf STDERR "Warning: invalid plane No.$plane. ignored\n";
next;
}
if ($plane == 1)
{
$code = ($code & 0xffff) | 0x8080;
}
else
{
$code = (0x8ea00000 + ($plane << 16)) | (($code & 0xffff) | 0x8080);
}
$i->{code} = $code;
# Some codes are mapped twice in the EUC_TW to UTF-8 table.
if ($origcode >= 0x12121 && $origcode <= 0x20000)
{
push @extras,
{
ucs => $i->{ucs},
code => ($i->{code} + 0x8ea10000),
rest => $i->{rest},
direction => TO_UNICODE,
f => $i->{f},
l => $i->{l}
};
}
}
push @$mapping, @extras;
print_conversion_tables($this_script, "EUC_TW", $mapping);