From: Martin Mares Date: Sun, 8 Jan 2012 22:29:10 +0000 (+0100) Subject: Gramatikator X-Git-Url: http://mj.ucw.cz/gitweb/?a=commitdiff_plain;h=ff59732f70ffe465defda1d7f717028ee29b835d;p=misc.git Gramatikator --- diff --git a/gramm b/gramm new file mode 100755 index 0000000..4efc61a --- /dev/null +++ b/gramm @@ -0,0 +1,78 @@ +#!/usr/bin/perl + +readgram( < male | 300#female | 500#expobj COMMA Experimental object +male -> title first mid last post +female -> titlef firstf mid lastf post +expobj -> firstf mid lastf | first mid last +first -> Joachim | Maledictus | Octavianus | Marcus | Greebo | Graham | John | Stanislaw | Bastian Baltazar | Pedro | Alfred | Gonzales 'Speedy' | Karl | Anthony | Lobsang | Niemand | Siegfried | Magdulan | Ulthen | Siricitan | Quido | Vasilij | Diabetes +firstf -> Jacquelinne | Melinda | Pythia | Catherine | Magrat | Granny | Esmeralda | Mona +last -> 5000#lastu | Trestivec | Larva | Sodny | Draselny | Melitus | Hydrogenes +lastf -> 5000#lastu | Trestivcova | Larvova | Sodna | Draselna | Melita | Hydrogena | Ogg | Weatherwax | Lisa +lastu -> Aurelius | Woshtepp | von Draczek | Portius | Simon | de Ath | Faust | Bux | Jimenez | de la Cruz | Mosquitto | Thorndyke | Smith | Marx | Crowley | O'Body | Rondevald | Umwald | Sleedill | Morbispictus +title -> 3000#Mgr. | 2000#Dr. | Prof. | 2000#Mg. | Sir | PUDr. | 3000# +titlef -> 3000#Mgr. | 2000#Dr. | Prof. | 2000#Mg. | Lady | JUDr. | 3000# +post -> COMMA Dr. h. c. | COMMA Laureate of Nobel Prize | COMMA Certified Idiot | COMMA Archmage | COMMA Polyhistor | 3000# +secretary -> firstf mid lastf sec | 100#first mid last sec +sec -> COMMA Secretary +chief -> Prof. first mid last COMMA Director of the Laboratory +mid -> A. | B. | C. | D. | E. | F. | G. | H. | I. | J. | K. | L. | M. | N. | O. | P. | Q. | R. | S. | T. | U. | V. | W. | X. | Y. | Z. | 26000# +EOF +); + +print generate('chief'), "\n"; +$ee = 1+rrr(3); +for($e=0; $e<$ee; $e++) { print generate('secretary'), "\n"; } +$ee = 5+rrr(20); +for($e=0; $e<$ee; $e++) { print generate('name'), "\n"; } +exit 0; + +sub rrr { + my $l = shift @_; + return int($l*rand); +} + +sub readgram { + my $l = shift @_; + my $a; + for $a (split(/\n/, $l)) { + ($a =~ /^(\w+) -> (.*)$/) || die "Invalid rule: $a"; + $gr{$1} = $2; + } +} + +sub generate { + my $w = shift @_; + my $f=''; + my $t, $z, $r, $a, $i; + for(;;) { + if ($w !~ /^\s*([.'a-zA-Z0-9_-]+)(.*)$/) { +# print "DONE\n"; + last; + } + $w = $2; + $t = $1; + if (!defined $gr{$t}) { +# print "Terminal <$t>\n"; + $f="$f $t"; + } else { + $z=0; + $r=$gr{$t}; + foreach $a (split(/\|/,$r)) { + $z += ($a =~ /^(.*)#/) ? $1 : 1000; + } + $i=int($z*rand); + $z=0; + foreach $a (split(/\|/,$r)) { + $z += ($a =~ /^(.*)#/) ? $1 : 1000; + if ($i < $z) { $A=$a; last; } + } + $A =~ s/.*\#//; +# print "Expanding <$t> -> <$A>\n"; + $w = "$A $w"; + } + } + $f =~ s/^\s+//; + $f =~ s/\s*COMMA/,/g; + return $f; +}