]> mj.ucw.cz Git - libucw.git/blob - charset/misc/import-recode
Added user unaccenting rules for stroked letters. Not tested yet.
[libucw.git] / charset / misc / import-recode
1 #!/usr/bin/perl
2 #
3 #  Use `recode` to create a translation table
4 #  (c) 2003, Robert Spalek <robert@ucw.cz>
5 #
6
7 use open IN => ":utf8";
8
9 foreach $charset (@ARGV)
10 {
11         print "Charset: $charset\n";
12         open(fi, "recode -s -f $charset/..utf-8/ <tmp/tab256 |") || die "Recoding error";
13         open(fo, "| ./mkuni >tmp/$charset") || die;
14
15         while (<fi>)
16         {
17                 chop;
18                 (($number, $char) = /^([0-9A-F]{2})\t(.?)$/) || die "Cannot parse $_";
19                 $recode[hex $number] = $char ne "" ? ord $char : -1;
20         }
21         $#recode >= 0 || die "Empty recoding table";
22         $recode[10] = 10;
23         $recode[13] = 13;
24
25         for ($i=0; $i<=$#recode; $i++)
26         {
27                 printf fo "%02X\t%04X\n", $i, $recode[$i]
28                         if $recode[$i] >= 0;
29         }
30
31         close(fo);
32         close(fi);
33 }