]> mj.ucw.cz Git - libucw.git/commitdiff
added tools for stealing translation tables from recode
authorRobert Spalek <robert@ucw.cz>
Fri, 27 Jun 2003 12:27:49 +0000 (12:27 +0000)
committerRobert Spalek <robert@ucw.cz>
Fri, 27 Jun 2003 12:27:49 +0000 (12:27 +0000)
sanity checks:
- iso-8859-{1,2} tables are identical after extraction with the tables imported
  by MJ
- cp1250 tables is quite different from the existing win-1250 table, but I do
  not know which one is right

charset/misc/mkcharset [new file with mode: 0755]
charset/misc/mktab256 [new file with mode: 0755]

diff --git a/charset/misc/mkcharset b/charset/misc/mkcharset
new file mode 100755 (executable)
index 0000000..b927536
--- /dev/null
@@ -0,0 +1,32 @@
+#!/usr/bin/perl
+#
+#  Use `recode` to create a translation table
+#  (c) 2003, Robert Spalek <robert@ucw.cz>
+#
+
+use open IN => ":utf8";
+
+foreach $charset (@ARGV)
+{
+       print "Charset: $charset\n";
+       open(fi, "recode -s -f $charset/..utf-8/ <tmp/tab256 |") || die "Recoding error";
+       open(fo, "| ./mkuni >tmp/$charset") || die;
+
+       while (<fi>)
+       {
+               chop;
+               (($number, $char) = /^([0-9A-F]{2})\t(.)$/) || die "Cannot parse $_";
+               $recode[hex $number] = ord $char;
+       }
+       $#recode >= 0 || die "Empty recoding table";
+       $recode[10] = 10;
+       $recode[13] = 13;
+
+       for ($i=0; $i<=$#recode; $i++)
+       {
+               printf fo "%02X\t%04X\n", $i, $recode[$i];
+       }
+
+       close(fo);
+       close(fi);
+}
diff --git a/charset/misc/mktab256 b/charset/misc/mktab256
new file mode 100755 (executable)
index 0000000..8cfc6e9
--- /dev/null
@@ -0,0 +1,15 @@
+#!/usr/bin/perl
+#
+#  Simply create a table of all 256 characters
+#  (c) 2003, Robert Spalek <robert@ucw.cz>
+#
+
+use open OUT => ":raw";
+
+open(fo, '>tmp/tab256') || die;
+for ($i=0; $i<256; $i++)
+{
+       next if $i==10 || $i==13;
+       printf fo "%02X\t%c\n", $i, $i;
+}
+close(fo);