406 lines
9.9 KiB
Prolog
Executable File
406 lines
9.9 KiB
Prolog
Executable File
#! /usr/local/bin/perl -w
|
|
# $Id: generate_nameprep_data.pl,v 1.1 2003/06/04 00:27:54 marka Exp $
|
|
#
|
|
# Copyright (c) 2001 Japan Network Information Center. All rights reserved.
|
|
#
|
|
# By using this file, you agree to the terms and conditions set forth bellow.
|
|
#
|
|
# LICENSE TERMS AND CONDITIONS
|
|
#
|
|
# The following License Terms and Conditions apply, unless a different
|
|
# license is obtained from Japan Network Information Center ("JPNIC"),
|
|
# a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
|
|
# Chiyoda-ku, Tokyo 101-0047, Japan.
|
|
#
|
|
# 1. Use, Modification and Redistribution (including distribution of any
|
|
# modified or derived work) in source and/or binary forms is permitted
|
|
# under this License Terms and Conditions.
|
|
#
|
|
# 2. Redistribution of source code must retain the copyright notices as they
|
|
# appear in each source code file, this License Terms and Conditions.
|
|
#
|
|
# 3. Redistribution in binary form must reproduce the Copyright Notice,
|
|
# this License Terms and Conditions, in the documentation and/or other
|
|
# materials provided with the distribution. For the purposes of binary
|
|
# distribution the "Copyright Notice" refers to the following language:
|
|
# "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved."
|
|
#
|
|
# 4. The name of JPNIC may not be used to endorse or promote products
|
|
# derived from this Software without specific prior written approval of
|
|
# JPNIC.
|
|
#
|
|
# 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
|
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
|
# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
|
|
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
|
# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
|
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
|
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
|
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
|
|
#
|
|
|
|
use v5.6.0; # for pack('U')
|
|
use bytes;
|
|
|
|
use lib qw(.);
|
|
|
|
use SparseMap;
|
|
use Getopt::Long;
|
|
|
|
(my $myid = '$Id: generate_nameprep_data.pl,v 1.1 2003/06/04 00:27:54 marka Exp $') =~ s/\$([^\$]+)\$/\$-$1-\$/;
|
|
|
|
my @map_bits = (9, 7, 5);
|
|
my @proh_bits = (7, 7, 7);
|
|
my @unas_bits = (7, 7, 7);
|
|
my @bidi_bits = (9, 7, 5);
|
|
|
|
my @bidi_types = ('OTHERS', 'R_AL', 'L');
|
|
|
|
my $dir = '.';
|
|
my @versions = ();
|
|
|
|
GetOptions('dir=s', \$dir) or die usage();
|
|
@versions = @ARGV;
|
|
|
|
print_header();
|
|
|
|
bits_definition("MAP", @map_bits);
|
|
bits_definition("PROH", @proh_bits);
|
|
bits_definition("UNAS", @unas_bits);
|
|
bits_definition("BIDI", @bidi_bits);
|
|
|
|
generate_data($_) foreach @ARGV;
|
|
|
|
sub usage {
|
|
die "Usage: $0 [-dir dir] version..\n";
|
|
}
|
|
|
|
sub generate_data {
|
|
my $version = shift;
|
|
generate_mapdata($version, "$dir/nameprep.$version.map");
|
|
generate_prohibiteddata($version, "$dir/nameprep.$version.prohibited");
|
|
generate_unassigneddata($version, "$dir/nameprep.$version.unassigned");
|
|
generate_bididata($version, "$dir/nameprep.$version.bidi");
|
|
}
|
|
|
|
#
|
|
# Generate mapping data.
|
|
#
|
|
sub generate_mapdata {
|
|
my $version = shift;
|
|
my $file = shift;
|
|
|
|
my $map = SparseMap::Int->new(BITS => [@map_bits],
|
|
MAX => 0x110000,
|
|
MAPALL => 1,
|
|
DEFAULT => 0);
|
|
open FILE, $file or die "cannot open $file: $!\n";
|
|
|
|
my $mapbuf = "\0"; # dummy
|
|
my %maphash = ();
|
|
while (<FILE>) {
|
|
if ($. == 1 and /^%\s*SAME-AS\s+(\S+)/) {
|
|
my $same_as = $1;
|
|
if (grep {$_ eq $same_as} @versions > 0) {
|
|
generate_map_ref($version, $same_as);
|
|
close FILE;
|
|
return;
|
|
}
|
|
next;
|
|
}
|
|
next if /^\#/;
|
|
next if /^\s*$/;
|
|
register_map($map, \$mapbuf, \%maphash, $_);
|
|
}
|
|
close FILE;
|
|
generate_map($version, $map, \$mapbuf);
|
|
}
|
|
|
|
#
|
|
# Generate prohibited character data.
|
|
#
|
|
sub generate_prohibiteddata {
|
|
my $version = shift;
|
|
my $file = shift;
|
|
|
|
my $proh = SparseMap::Bit->new(BITS => [@proh_bits],
|
|
MAX => 0x110000);
|
|
open FILE, $file or die "cannot open $file: $!\n";
|
|
while (<FILE>) {
|
|
if ($. == 1 and /^%\s*SAME-AS\s+(\S+)/) {
|
|
my $same_as = $1;
|
|
if (grep {$_ eq $same_as} @versions > 0) {
|
|
generate_prohibited_ref($version, $same_as);
|
|
close FILE;
|
|
return;
|
|
}
|
|
next;
|
|
}
|
|
next if /^\#/;
|
|
next if /^\s*$/;
|
|
register_prohibited($proh, $_);
|
|
}
|
|
close FILE;
|
|
generate_prohibited($version, $proh);
|
|
}
|
|
|
|
#
|
|
# Generate unassigned codepoint data.
|
|
#
|
|
sub generate_unassigneddata {
|
|
my $version = shift;
|
|
my $file = shift;
|
|
|
|
my $unas = SparseMap::Bit->new(BITS => [@unas_bits],
|
|
MAX => 0x110000);
|
|
open FILE, $file or die "cannot open $file: $!\n";
|
|
while (<FILE>) {
|
|
if ($. == 1 and /^%\s*SAME-AS\s+(\S+)/) {
|
|
my $same_as = $1;
|
|
if (grep {$_ eq $same_as} @versions > 0) {
|
|
generate_unassigned_ref($version, $same_as);
|
|
close FILE;
|
|
return;
|
|
}
|
|
next;
|
|
}
|
|
next if /^\#/;
|
|
next if /^\s*$/;
|
|
register_unassigned($unas, $_);
|
|
}
|
|
close FILE;
|
|
generate_unassigned($version, $unas);
|
|
}
|
|
|
|
#
|
|
# Generate data of bidi "R" or "AL" characters.
|
|
#
|
|
sub generate_bididata {
|
|
my $version = shift;
|
|
my $file = shift;
|
|
|
|
my $bidi = SparseMap::Int->new(BITS => [@bidi_bits],
|
|
MAX => 0x110000);
|
|
open FILE, $file or die "cannot open $file: $!\n";
|
|
|
|
my $type = 0;
|
|
while (<FILE>) {
|
|
if ($. == 1 and /^%\s*SAME-AS\s+(\S+)/) {
|
|
my $same_as = $1;
|
|
if (grep {$_ eq $same_as} @versions > 0) {
|
|
generate_unassigned_ref($version, $same_as);
|
|
close FILE;
|
|
return;
|
|
}
|
|
next;
|
|
}
|
|
if (/^%\s*BIDI_TYPE\s+(\S+)$/) {
|
|
my $i = 0;
|
|
for ($i = 0; $i < @bidi_types; $i++) {
|
|
if ($1 eq $bidi_types[$i]) {
|
|
$type = $i;
|
|
last;
|
|
}
|
|
}
|
|
die "unrecognized line: $_" if ($i >= @bidi_types);
|
|
next;
|
|
}
|
|
next if /^\#/;
|
|
next if /^\s*$/;
|
|
register_bidi($bidi, $type, $_);
|
|
}
|
|
close FILE;
|
|
|
|
generate_bidi($version, $bidi);
|
|
}
|
|
|
|
sub print_header {
|
|
print <<"END";
|
|
/* \$Id\$ */
|
|
/* $myid */
|
|
/*
|
|
* Do not edit this file!
|
|
* This file is generated from NAMEPREP specification.
|
|
*/
|
|
|
|
END
|
|
}
|
|
|
|
sub bits_definition {
|
|
my $name = shift;
|
|
my @bits = @_;
|
|
my $i = 0;
|
|
|
|
foreach my $n (@bits) {
|
|
print "#define ${name}_BITS_$i\t$n\n";
|
|
$i++;
|
|
}
|
|
print "\n";
|
|
}
|
|
|
|
sub register_map {
|
|
my ($map, $bufref, $hashref, $line) = @_;
|
|
|
|
my ($from, $to) = split /;/, $line;
|
|
my @fcode = map {hex($_)} split ' ', $from;
|
|
my @tcode = map {hex($_)} split ' ', $to;
|
|
|
|
my $ucs4 = pack('V*', @tcode);
|
|
$ucs4 =~ s/\000+$//;
|
|
|
|
my $offset;
|
|
if (exists $hashref->{$ucs4}) {
|
|
$offset = $hashref->{$ucs4};
|
|
} else {
|
|
$offset = length $$bufref;
|
|
$$bufref .= pack('C', length($ucs4)) . $ucs4;
|
|
$hashref->{$ucs4} = $offset;
|
|
}
|
|
|
|
die "unrecognized line: $line" if @fcode != 1;
|
|
$map->add($fcode[0], $offset);
|
|
}
|
|
|
|
sub generate_map {
|
|
my ($version, $map, $bufref) = @_;
|
|
|
|
$map->fix();
|
|
|
|
print $map->cprog(NAME => "nameprep_${version}_map");
|
|
print "\nstatic const unsigned char nameprep_${version}_map_data[] = \{\n";
|
|
print_uchararray($$bufref);
|
|
print "};\n\n";
|
|
}
|
|
|
|
sub generate_map_ref {
|
|
my ($version, $refversion) = @_;
|
|
print <<"END";
|
|
#define nameprep_${version}_map_imap nameprep_${refversion}_map_imap
|
|
#define nameprep_${version}_map_table nameprep_${refversion}_map_table
|
|
#define nameprep_${version}_map_data nameprep_${refversion}_map_data
|
|
|
|
END
|
|
}
|
|
|
|
sub print_uchararray {
|
|
my @chars = unpack 'C*', $_[0];
|
|
my $i = 0;
|
|
foreach my $v (@chars) {
|
|
if ($i % 12 == 0) {
|
|
print "\n" if $i != 0;
|
|
print "\t";
|
|
}
|
|
printf "%3d, ", $v;
|
|
$i++;
|
|
}
|
|
print "\n";
|
|
}
|
|
|
|
sub register_prohibited {
|
|
my $proh = shift;
|
|
register_bitmap($proh, @_);
|
|
}
|
|
|
|
sub register_unassigned {
|
|
my $unas = shift;
|
|
register_bitmap($unas, @_);
|
|
}
|
|
|
|
sub register_bidi {
|
|
my $bidi = shift;
|
|
my $type = shift;
|
|
register_intmap($bidi, $type, @_);
|
|
}
|
|
|
|
sub generate_prohibited {
|
|
my ($version, $proh) = @_;
|
|
generate_bitmap($proh, "nameprep_${version}_prohibited");
|
|
print "\n";
|
|
}
|
|
|
|
sub generate_prohibited_ref {
|
|
my ($version, $refversion) = @_;
|
|
print <<"END";
|
|
#define nameprep_${version}_prohibited_imap nameprep_${refversion}_prohibited_imap
|
|
#define nameprep_${version}_prohibited_bitmap nameprep_${refversion}_prohibited_bitmap
|
|
|
|
END
|
|
}
|
|
|
|
sub generate_unassigned {
|
|
my ($version, $unas) = @_;
|
|
generate_bitmap($unas, "nameprep_${version}_unassigned");
|
|
print "\n";
|
|
}
|
|
|
|
sub generate_unassigned_ref {
|
|
my ($version, $refversion) = @_;
|
|
print <<"END";
|
|
#define nameprep_${version}_unassigned_imap nameprep_${refversion}_unassigned_imap
|
|
#define nameprep_${version}_unassigned_bitmap nameprep_${refversion}_unassigned_bitmap
|
|
|
|
END
|
|
}
|
|
|
|
sub generate_bidi {
|
|
my ($version, $bidi) = @_;
|
|
|
|
$bidi->fix();
|
|
|
|
print $bidi->cprog(NAME => "nameprep_${version}_bidi");
|
|
print "\n";
|
|
print "static const unsigned char nameprep_${version}_bidi_data[] = \{\n";
|
|
|
|
foreach my $type (@bidi_types) {
|
|
printf "\tidn_biditype_%s, \n", lc($type);
|
|
}
|
|
print "};\n\n";
|
|
}
|
|
|
|
sub generate_bidi_ref {
|
|
my ($version, $refversion) = @_;
|
|
print <<"END";
|
|
#define nameprep_${version}_bidi_imap nameprep_${refversion}_bidi_imap
|
|
#define nameprep_${version}_bidi_table nameprep_${refversion}_bidi_table
|
|
|
|
END
|
|
}
|
|
|
|
sub register_bitmap {
|
|
my $map = shift;
|
|
my $line = shift;
|
|
|
|
/^([0-9A-Fa-f]+)(?:-([0-9A-Fa-f]+))?/ or die "unrecognized line: $line";
|
|
my $start = hex($1);
|
|
my $end = defined($2) ? hex($2) : undef;
|
|
if (defined $end) {
|
|
$map->add($start .. $end);
|
|
} else {
|
|
$map->add($start);
|
|
}
|
|
}
|
|
|
|
sub register_intmap {
|
|
my $map = shift;
|
|
my $value = shift;
|
|
my $line = shift;
|
|
|
|
/^([0-9A-Fa-f]+)(?:-([0-9A-Fa-f]+))?/ or die "unrecognized line: $line";
|
|
my $start = hex($1);
|
|
my $end = defined($2) ? hex($2) : $start;
|
|
for (my $i = $start; $i <= $end; $i++) {
|
|
$map->add($i, $value);
|
|
}
|
|
}
|
|
|
|
sub generate_bitmap {
|
|
my $map = shift;
|
|
my $name = shift;
|
|
$map->fix();
|
|
#$map->stat();
|
|
print $map->cprog(NAME => $name);
|
|
}
|