From: Martin Mares Date: Sat, 11 Oct 2003 08:58:22 +0000 (+0000) Subject: Renamed tabgen to gen-charconv. X-Git-Tag: holmes-import~1198 X-Git-Url: http://mj.ucw.cz/gitweb/?a=commitdiff_plain;h=e7fcdfa74bd4ee6d8d38c3f4e0487f249fe468df;p=libucw.git Renamed tabgen to gen-charconv. --- diff --git a/charset/misc/gen-charconv b/charset/misc/gen-charconv new file mode 100755 index 00000000..28660d1d --- /dev/null +++ b/charset/misc/gen-charconv @@ -0,0 +1,181 @@ +#!/usr/bin/perl +# +# Character Set Table Generator 1.0 +# (c) 1998 Martin Mares +# +# This program can be freely distributed and used according to the terms +# of the GNU General Public License. +# + +# Internal codes 0..255 are mapped to UniCode 0..255 +# Internal code 256 is the replacement character (U#FFFD) + +$ncs = 0; + +print "/* Generated by tabgen 1.0, please don't edit manually. */\n\n"; + +print STDERR "Charset list...\n"; + +while (<>) { + chomp; + (/^\w*$/ || /^#/) && next; + $charsets[$ncs++] = $_; +} + +print STDERR "Found $ncs charsets, counting unique codes...\n"; + +for($unique=0; $unique<256; $unique++) { + $u2x{$unique} = $unique; + $x2u[$unique] = $unique; +} +$u2x{0xFFFD} = $unique; +$x2u[$unique++] = 0xFFFD; +print "static unsigned short int input_to_x[$ncs][256] = {\n"; +for($x=0; $x<$ncs; $x++) { + $a = $charsets[$x]; + print "\n/* $a */\n{\n"; + open (A, $a) || die "Error opening $a"; + while () { + chomp; + (/^\w*$/ || /^#/) && next; + ($i, $u, $c) = split /\t/; + $cc[$x][hex $i] = $u; + } + close A; + for($i=0; $i<256; $i++) { + $u = hex((defined $cc[$x][$i]) ? $cc[$x][$i] : "FFFD"); + if (!defined $u2x{$u}) { + $x2u[$unique] = $u; + $u2x{$u} = $unique++; + } + $o = $u2x{$u}; + print "$o,", ($i % 16 == 15) ? "\n" : " "; + $cc[$x][$i] = $o; + $cx[$x]{$o} = $i; + } + print "},\n"; +} +print "};\n\n"; + +print STDERR "$unique unique codes...\n"; + +print "static unsigned short int x_to_uni[$unique] = {\n"; +for($i=0; $i<$unique; $i++) { + print "$x2u[$i],", ($i % 16 == 15) ? "\n" : " "; +} +if ($i % 16) { print "\n"; } +print "};\n\n"; + +print STDERR "UNICODE table...\n"; +for($i=0; $i<$unique; $i++) { + $u = $x2u[$i]; + $p = $u / 256; + $pg[$p] = 1; +} +for($i=0; $i<256; $i++) { + if ($pg[$i]) { + print "static unsigned short int uni_to_x_$i\[256\] = {\n"; + for($j=0; $j<256; $j++) { + $u = 256*$i + $j; + $u = defined($u2x{$u}) ? $u2x{$u} : 256; + print "$u,", ($j % 16 == 15) ? "\n" : " "; + } + print "};\n\n"; + } +} +print "static unsigned short int *uni_to_x[256] = {\n"; +for($i=hex "FF00"; $i<=hex "FFFF"; $i++) { + if (defined $u2x{$i} && $i != 0xFFFD) { die "Invalid replacement strategy!"; } +} +for($i=0; $i<256; $i++) { + print "uni_to_x_", $pg[$i] ? $i : "255", ",", ($i % 4 == 3) ? "\n" : " "; +} +print "};\n\n"; + +print STDERR "UniData file...\n"; +open (U, "unicode/UnicodeData.txt") || die "No UnicodeData file"; +while () { + chomp; + ($num,$name,$_,$_,$_,$exp) = split /;/; + if ($exp ne "") { + $exp =~ s/^<.*> *//g; + $a = ""; + foreach $x (split (/ /, $exp)) { + if ($x ne "0020") { + $a = $a . " " . hex $x; + } + } + ($expand{hex $num} = $a) =~ s/^ //; + } +} +close U; + +print STDERR "Character expansions\n"; +if (open(EXTRA, "misc/user_expand")) { + while () { + chomp; + (/^\s*$/ || /^#/) && next; + s/0x([0-9a-zA-Z]+)/hex($1)/ge; + (/^(\S+)\s+(.*)$/) || die "Syntax error in user expansions"; + $expand{$1} = $2; + } + close EXTRA; +} +print "static unsigned short int x_to_output[$ncs][$unique] = {\n"; +$pstr = 256; +for($c=0; $c<$ncs; $c++) { + print "\n/* $charsets[$c] */\n{\n"; + for($i=0; $i<$unique; $i++) { + $u = $x2u[$i]; + do { + $r = $u; + $u = ""; + foreach $x (split (/ /, $r)) { + if (defined($k = $u2x{$x}) && defined $cx[$c]{$k}) { + $u = "$u $x"; + } elsif (defined($k = $expand{$x})) { + $u = "$u $k"; + } + } + $u =~ s/^ //; + } while ($r ne $u); + $u = ""; + foreach $x (split (/ /, $r)) { + if (defined($k = $u2x{$x})) { + if ($k != 256 && defined ($k = $cx[$c]{$k})) { + $u = $u . pack("C", $k); + } + } + } + if (length($u) == 1) { + $z = unpack("C", $u); + } else { + if (!defined($string{$u})) { + $string{$u} = $pstr; + $strval{$pstr} = $u; + $pstr += 1 + length($u); + } + $z = $string{$u}; + } + print "$z,", ($i % 16 == 15) ? "\n" : " "; + } + if ($i % 16) { print "\n"; } + print "},\n"; +} +print "};\n\n"; + +print STDERR "And Tubular Bells...\n"; +print "static unsigned char string_table[] = {\n"; +$i = 256; +while ($i < $pstr) { + $w = $strval{$i}; + print length $w, ","; + foreach $x (unpack("C256", $w)) { + print " $x,"; + } + print "\n"; + $i += 1 + length $w; +} +print "};\n"; + +print STDERR "Done.\n"; diff --git a/charset/misc/tabgen b/charset/misc/tabgen deleted file mode 100755 index 28660d1d..00000000 --- a/charset/misc/tabgen +++ /dev/null @@ -1,181 +0,0 @@ -#!/usr/bin/perl -# -# Character Set Table Generator 1.0 -# (c) 1998 Martin Mares -# -# This program can be freely distributed and used according to the terms -# of the GNU General Public License. -# - -# Internal codes 0..255 are mapped to UniCode 0..255 -# Internal code 256 is the replacement character (U#FFFD) - -$ncs = 0; - -print "/* Generated by tabgen 1.0, please don't edit manually. */\n\n"; - -print STDERR "Charset list...\n"; - -while (<>) { - chomp; - (/^\w*$/ || /^#/) && next; - $charsets[$ncs++] = $_; -} - -print STDERR "Found $ncs charsets, counting unique codes...\n"; - -for($unique=0; $unique<256; $unique++) { - $u2x{$unique} = $unique; - $x2u[$unique] = $unique; -} -$u2x{0xFFFD} = $unique; -$x2u[$unique++] = 0xFFFD; -print "static unsigned short int input_to_x[$ncs][256] = {\n"; -for($x=0; $x<$ncs; $x++) { - $a = $charsets[$x]; - print "\n/* $a */\n{\n"; - open (A, $a) || die "Error opening $a"; - while () { - chomp; - (/^\w*$/ || /^#/) && next; - ($i, $u, $c) = split /\t/; - $cc[$x][hex $i] = $u; - } - close A; - for($i=0; $i<256; $i++) { - $u = hex((defined $cc[$x][$i]) ? $cc[$x][$i] : "FFFD"); - if (!defined $u2x{$u}) { - $x2u[$unique] = $u; - $u2x{$u} = $unique++; - } - $o = $u2x{$u}; - print "$o,", ($i % 16 == 15) ? "\n" : " "; - $cc[$x][$i] = $o; - $cx[$x]{$o} = $i; - } - print "},\n"; -} -print "};\n\n"; - -print STDERR "$unique unique codes...\n"; - -print "static unsigned short int x_to_uni[$unique] = {\n"; -for($i=0; $i<$unique; $i++) { - print "$x2u[$i],", ($i % 16 == 15) ? "\n" : " "; -} -if ($i % 16) { print "\n"; } -print "};\n\n"; - -print STDERR "UNICODE table...\n"; -for($i=0; $i<$unique; $i++) { - $u = $x2u[$i]; - $p = $u / 256; - $pg[$p] = 1; -} -for($i=0; $i<256; $i++) { - if ($pg[$i]) { - print "static unsigned short int uni_to_x_$i\[256\] = {\n"; - for($j=0; $j<256; $j++) { - $u = 256*$i + $j; - $u = defined($u2x{$u}) ? $u2x{$u} : 256; - print "$u,", ($j % 16 == 15) ? "\n" : " "; - } - print "};\n\n"; - } -} -print "static unsigned short int *uni_to_x[256] = {\n"; -for($i=hex "FF00"; $i<=hex "FFFF"; $i++) { - if (defined $u2x{$i} && $i != 0xFFFD) { die "Invalid replacement strategy!"; } -} -for($i=0; $i<256; $i++) { - print "uni_to_x_", $pg[$i] ? $i : "255", ",", ($i % 4 == 3) ? "\n" : " "; -} -print "};\n\n"; - -print STDERR "UniData file...\n"; -open (U, "unicode/UnicodeData.txt") || die "No UnicodeData file"; -while () { - chomp; - ($num,$name,$_,$_,$_,$exp) = split /;/; - if ($exp ne "") { - $exp =~ s/^<.*> *//g; - $a = ""; - foreach $x (split (/ /, $exp)) { - if ($x ne "0020") { - $a = $a . " " . hex $x; - } - } - ($expand{hex $num} = $a) =~ s/^ //; - } -} -close U; - -print STDERR "Character expansions\n"; -if (open(EXTRA, "misc/user_expand")) { - while () { - chomp; - (/^\s*$/ || /^#/) && next; - s/0x([0-9a-zA-Z]+)/hex($1)/ge; - (/^(\S+)\s+(.*)$/) || die "Syntax error in user expansions"; - $expand{$1} = $2; - } - close EXTRA; -} -print "static unsigned short int x_to_output[$ncs][$unique] = {\n"; -$pstr = 256; -for($c=0; $c<$ncs; $c++) { - print "\n/* $charsets[$c] */\n{\n"; - for($i=0; $i<$unique; $i++) { - $u = $x2u[$i]; - do { - $r = $u; - $u = ""; - foreach $x (split (/ /, $r)) { - if (defined($k = $u2x{$x}) && defined $cx[$c]{$k}) { - $u = "$u $x"; - } elsif (defined($k = $expand{$x})) { - $u = "$u $k"; - } - } - $u =~ s/^ //; - } while ($r ne $u); - $u = ""; - foreach $x (split (/ /, $r)) { - if (defined($k = $u2x{$x})) { - if ($k != 256 && defined ($k = $cx[$c]{$k})) { - $u = $u . pack("C", $k); - } - } - } - if (length($u) == 1) { - $z = unpack("C", $u); - } else { - if (!defined($string{$u})) { - $string{$u} = $pstr; - $strval{$pstr} = $u; - $pstr += 1 + length($u); - } - $z = $string{$u}; - } - print "$z,", ($i % 16 == 15) ? "\n" : " "; - } - if ($i % 16) { print "\n"; } - print "},\n"; -} -print "};\n\n"; - -print STDERR "And Tubular Bells...\n"; -print "static unsigned char string_table[] = {\n"; -$i = 256; -while ($i < $pstr) { - $w = $strval{$i}; - print length $w, ","; - foreach $x (unpack("C256", $w)) { - print " $x,"; - } - print "\n"; - $i += 1 + length $w; -} -print "};\n"; - -print STDERR "Done.\n";