#!/usr/bin/perl # # Split Unicode Data File # (c) 1997 Martin Mares # open(I, "/tmp/unicode") || die "Unable to open UniCode data file"; open(C, ">misc/u-cat") || die "cat file open"; open(U, ">misc/u-upper") || die "upper file open"; open(L, ">misc/u-lower") || die "lower file open"; while () { chomp; (/^$/ || /^#/) && next; ($code,$name,$cat,$comb,$bidir,$decomp,$d0,$d1,$d2,$n0,$mirr,$cmt2,$upper,$lower,$title) = split /;/; if ($cat =~ /^C/) { $ccat = "_C_CTRL"; } elsif ($cat =~ /^Z/) { $ccat = "_C_BLANK"; } elsif ($cat =~ /^Ll/) { $ccat = "_C_LOWER"; } elsif ($cat =~ /^Lu/) { $ccat = "_C_UPPER"; } elsif ($code ge "0030" && $code le "0039") { $ccat = "_C_DIGIT|_C_XDIGIT"; } elsif ($code eq "005F") { $ccat = "_C_INNER"; } else { $ccat = ""; } if ($code ge "0041" && $code le "0046" || $code ge "0061" && $code le "0066") { $ccat = $ccat . "|_C_XDIGIT"; } if ($ccat ne "") { print C "$code\t$ccat\n"; } if ($upper ne "") { print U "$code\t0x$upper\n"; } if ($lower ne "") { print L "$code\t0x$lower\n"; } } close I; close C; close U; close L;