#!/usr/bin/perl # # Character Set Table Generator 1.0 # (c) 1998 Martin Mares # # This program can be freely distributed and used according to the terms # of the GNU General Public License. # # Internal codes 0..255 are mapped to UniCode 0..255 # Internal code 256 is the replacement character (U#FFFD) $ncs = 0; print "/* Generated by tabgen 1.0, please don't edit manually. */\n\n"; print STDERR "Charset list...\n"; while (<>) { chomp; (/^\w*$/ || /^#/) && next; $charsets[$ncs++] = $_; } print STDERR "Found $ncs charsets, counting unique codes...\n"; for($unique=0; $unique<256; $unique++) { $u2x{$unique} = $unique; $x2u[$unique] = $unique; } $u2x{0xFFFD} = $unique; $x2u[$unique++] = 0xFFFD; print "static unsigned short int input_to_x[$ncs][256] = {\n"; for($x=0; $x<$ncs; $x++) { $a = $charsets[$x]; print "\n/* $a */\n{\n"; open (A, $a) || die "Error opening $a"; while () { chomp; (/^\w*$/ || /^#/) && next; ($i, $u, $c) = split /\t/; $cc[$x][hex $i] = $u; } close A; for($i=0; $i<256; $i++) { $u = hex((defined $cc[$x][$i]) ? $cc[$x][$i] : "FFFD"); if (!defined $u2x{$u}) { $x2u[$unique] = $u; $u2x{$u} = $unique++; } $o = $u2x{$u}; print "$o,", ($i % 16 == 15) ? "\n" : " "; $cc[$x][$i] = $o; $cx[$x]{$o} = $i; } print "},\n"; } print "};\n\n"; print STDERR "$unique unique codes...\n"; print "static unsigned short int x_to_uni[$unique] = {\n"; for($i=0; $i<$unique; $i++) { print "$x2u[$i],", ($i % 16 == 15) ? "\n" : " "; } if ($i % 16) { print "\n"; } print "};\n\n"; print STDERR "UNICODE table...\n"; for($i=0; $i<$unique; $i++) { $u = $x2u[$i]; $p = $u / 256; $pg[$p] = 1; } for($i=0; $i<256; $i++) { if ($pg[$i]) { print "static unsigned short int uni_to_x_$i\[256\] = {\n"; for($j=0; $j<256; $j++) { $u = 256*$i + $j; $u = defined($u2x{$u}) ? $u2x{$u} : 256; print "$u,", ($j % 16 == 15) ? "\n" : " "; } print "};\n\n"; } } print "static unsigned short int *uni_to_x[256] = {\n"; for($i=hex "FF00"; $i<=hex "FFFF"; $i++) { if (defined $u2x{$i} && $i != 0xFFFD) { die "Invalid replacement strategy!"; } } for($i=0; $i<256; $i++) { print "uni_to_x_", $pg[$i] ? $i : "255", ",", ($i % 4 == 3) ? "\n" : " "; } print "};\n\n"; print STDERR "UniData file...\n"; open (U, "unicode/UnicodeData.txt") || die "No UnicodeData file"; while () { chomp; ($num,$name,$_,$_,$_,$exp) = split /;/; if ($exp ne "") { $exp =~ s/^<.*> *//g; $a = ""; foreach $x (split (/ /, $exp)) { if ($x ne "0020") { $a = $a . " " . hex $x; } } ($expand{hex $num} = $a) =~ s/^ //; } } close U; print STDERR "Character expansions\n"; if (open(EXTRA, "misc/user_expand")) { while () { chomp; (/^\s*$/ || /^#/) && next; s/0x([0-9a-zA-Z]+)/hex($1)/ge; (/^(\S+)\s+(.*)$/) || die "Syntax error in user expansions"; $expand{$1} = $2; } close EXTRA; } print "static unsigned short int x_to_output[$ncs][$unique] = {\n"; $pstr = 256; for($c=0; $c<$ncs; $c++) { print "\n/* $charsets[$c] */\n{\n"; for($i=0; $i<$unique; $i++) { $u = $x2u[$i]; do { $r = $u; $u = ""; foreach $x (split (/ /, $r)) { if (defined($k = $u2x{$x}) && defined $cx[$c]{$k}) { $u = "$u $x"; } elsif (defined($k = $expand{$x})) { $u = "$u $k"; } } $u =~ s/^ //; } while ($r ne $u); $u = ""; foreach $x (split (/ /, $r)) { if (defined($k = $u2x{$x})) { if ($k != 256 && defined ($k = $cx[$c]{$k})) { $u = $u . pack("C", $k); } } } if (length($u) == 1) { $z = unpack("C", $u); } else { if (!defined($string{$u})) { $string{$u} = $pstr; $strval{$pstr} = $u; $pstr += 1 + length($u); } $z = $string{$u}; } print "$z,", ($i % 16 == 15) ? "\n" : " "; } if ($i % 16) { print "\n"; } print "},\n"; } print "};\n\n"; print STDERR "And Tubular Bells...\n"; print "static unsigned char string_table[] = {\n"; $i = 256; while ($i < $pstr) { $w = $strval{$i}; print length $w, ","; foreach $x (unpack("C256", $w)) { print " $x,"; } print "\n"; $i += 1 + length $w; } print "};\n"; print STDERR "Done.\n";