3 # Character Set Table Generator 1.0
4 # (c) 1998 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
6 # This program can be freely distributed and used according to the terms
7 # of the GNU General Public License.
10 # Internal codes 0..255 are mapped to UniCode 0..255
11 # Internal code 256 is the replacement character (U#FFFD)
15 print "/* Generated by tabgen 1.0, please don't edit manually. */\n\n";
17 print STDERR "Charset list...\n";
21 (/^\w*$/ || /^#/) && next;
22 $charsets[$ncs++] = $_;
25 print STDERR "Found $ncs charsets, counting unique codes...\n";
27 for($unique=0; $unique<256; $unique++) {
28 $u2x{$unique} = $unique;
29 $x2u[$unique] = $unique;
31 $u2x{0xFFFD} = $unique;
32 $x2u[$unique++] = 0xFFFD;
33 print "static unsigned short int input_to_x[$ncs][256] = {\n";
34 for($x=0; $x<$ncs; $x++) {
36 print "\n/* $a */\n{\n";
37 open (A, $a) || die "Error opening $a";
40 (/^\w*$/ || /^#/) && next;
41 ($i, $u, $c) = split /\t/;
45 for($i=0; $i<256; $i++) {
46 $u = hex((defined $cc[$x][$i]) ? $cc[$x][$i] : "FFFD");
47 if (!defined $u2x{$u}) {
52 print "$o,", ($i % 16 == 15) ? "\n" : " ";
60 print STDERR "$unique unique codes...\n";
62 print "static unsigned short int x_to_uni[$unique] = {\n";
63 for($i=0; $i<$unique; $i++) {
64 print "$x2u[$i],", ($i % 16 == 15) ? "\n" : " ";
66 if ($i % 16) { print "\n"; }
69 print STDERR "UNICODE table...\n";
70 for($i=0; $i<$unique; $i++) {
75 for($i=0; $i<256; $i++) {
77 print "static unsigned short int uni_to_x_$i\[256\] = {\n";
78 for($j=0; $j<256; $j++) {
80 $u = defined($u2x{$u}) ? $u2x{$u} : 256;
81 print "$u,", ($j % 16 == 15) ? "\n" : " ";
86 print "static unsigned short int *uni_to_x[256] = {\n";
87 for($i=hex "FF00"; $i<=hex "FFFF"; $i++) {
88 if (defined $u2x{$i} && $i != 0xFFFD) { die "Invalid replacement strategy!"; }
90 for($i=0; $i<256; $i++) {
91 print "uni_to_x_", $pg[$i] ? $i : "255", ",", ($i % 4 == 3) ? "\n" : " ";
95 print STDERR "UniData file...\n";
96 open (U, "/tmp/unicode") || die "No UnicodeData file";
99 ($num,$name,$_,$_,$_,$exp) = split /;/;
101 $exp =~ s/^<.*> *//g;
103 foreach $x (split (/ /, $exp)) {
105 $a = $a . " " . hex $x;
108 ($expand{hex $num} = $a) =~ s/^ //;
113 print STDERR "Character expansions\n";
114 if (open(EXTRA, "misc/user_expand")) {
117 (/^\s*$/ || /^#/) && next;
118 s/0x([0-9a-zA-Z]+)/hex($1)/ge;
119 (/^(\S+)\s+(.*)$/) || die "Syntax error in user expansions";
124 print "static unsigned short int x_to_output[$ncs][$unique] = {\n";
126 for($c=0; $c<$ncs; $c++) {
127 print "\n/* $charsets[$c] */\n{\n";
128 for($i=0; $i<$unique; $i++) {
133 foreach $x (split (/ /, $r)) {
134 if (defined($k = $u2x{$x}) && defined $cx[$c]{$k}) {
136 } elsif (defined($k = $expand{$x})) {
143 foreach $x (split (/ /, $r)) {
144 if (defined($k = $u2x{$x})) {
145 if ($k != 256 && defined ($k = $cx[$c]{$k})) {
146 $u = $u . pack("C", $k);
150 if (length($u) == 1) {
151 $z = unpack("C", $u);
153 if (!defined($string{$u})) {
156 $pstr += 1 + length($u);
160 print "$z,", ($i % 16 == 15) ? "\n" : " ";
162 if ($i % 16) { print "\n"; }
167 print STDERR "And Tubular Bells...\n";
168 print "static unsigned char string_table[] = {\n";
172 print length $w, ",";
173 foreach $x (unpack("C256", $w)) {
181 print STDERR "Done.\n";