#!/usr/athena/bin/perl -w # # Devanagari input preprocessor based on ITRANS http://www.aczone.com/itrans # # Copyright (C) 2002-2005, Arun A Tharuvai # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA use charnames ":full"; use strict; my %indvowels = ( 'a' => "\N{DEVANAGARI LETTER A}", 'A' => "\N{DEVANAGARI LETTER AA}", 'aa' => "\N{DEVANAGARI LETTER AA}", 'i' => "\N{DEVANAGARI LETTER I}", 'ii' => "\N{DEVANAGARI LETTER II}", 'I' => "\N{DEVANAGARI LETTER II}", 'u' => "\N{DEVANAGARI LETTER U}", 'uu' => "\N{DEVANAGARI LETTER UU}", 'U' => "\N{DEVANAGARI LETTER UU}", 'RRi' => "\N{DEVANAGARI LETTER VOCALIC R}", 'R^i' => "\N{DEVANAGARI LETTER VOCALIC R}", 'RRI' => "\N{DEVANAGARI LETTER VOCALIC RR}", 'R^I' => "\N{DEVANAGARI LETTER VOCALIC RR}", 'LLi' => "\N{DEVANAGARI LETTER VOCALIC L}", 'L^i' => "\N{DEVANAGARI LETTER VOCALIC L}", 'LLI' => "\N{DEVANAGARI LETTER VOCALIC LL}", 'L^I' => "\N{DEVANAGARI LETTER VOCALIC LL}", 'a.c' => "\N{DEVANAGARI VOWEL SIGN CANDRA E}", 'e' => "\N{DEVANAGARI LETTER E}", 'ai' => "\N{DEVANAGARI LETTER AI}", 'A.c' => "\N{DEVANAGARI VOWEL SIGN CANDRA O}", 'aa.c' => "\N{DEVANAGARI VOWEL SIGN CANDRA O}", 'o' => "\N{DEVANAGARI LETTER O}", 'au' => "\N{DEVANAGARI LETTER AU}", 'aM' => "\N{DEVANAGARI LETTER A}\N{DEVANAGARI SIGN ANUSVARA}", 'aH' => "\N{DEVANAGARI LETTER A}\N{DEVANAGARI SIGN VISARGA}", 'a.N' => "\N{DEVANAGARI LETTER A}\N{DEVANAGARI SIGN CANDRABINDU}", '.' => "\N{DEVANAGARI DANDA}", '..' => "\N{DEVANAGARI DOUBLE DANDA}", 'AUM' => "\N{DEVANAGARI OM}", '.a' => "\N{DEVANAGARI SIGN AVAGRAHA}", ); my %depvowels = ( 'A' => "\N{DEVANAGARI VOWEL SIGN AA}", 'aa' => "\N{DEVANAGARI VOWEL SIGN AA}", 'i' => "\N{DEVANAGARI VOWEL SIGN I}", 'ii' => "\N{DEVANAGARI VOWEL SIGN II}", 'I' => "\N{DEVANAGARI VOWEL SIGN II}", 'u' => "\N{DEVANAGARI VOWEL SIGN U}", 'uu' => "\N{DEVANAGARI VOWEL SIGN UU}", 'U' => "\N{DEVANAGARI VOWEL SIGN UU}", 'RRi' => "\N{DEVANAGARI VOWEL SIGN VOCALIC R}", 'R^i' => "\N{DEVANAGARI VOWEL SIGN VOCALIC R}", 'RRI' => "\N{DEVANAGARI VOWEL SIGN VOCALIC RR}", 'R^I' => "\N{DEVANAGARI VOWEL SIGN VOCALIC RR}", 'LLi' => "\N{DEVANAGARI VOWEL SIGN VOCALIC L}", 'L^i' => "\N{DEVANAGARI VOWEL SIGN VOCALIC L}", 'LLI' => "\N{DEVANAGARI VOWEL SIGN VOCALIC LL}", 'L^I' => "\N{DEVANAGARI VOWEL SIGN VOCALIC LL}", 'a.c' => "\N{DEVANAGARI VOWEL SIGN CANDRA E}", 'e' => "\N{DEVANAGARI VOWEL SIGN E}", 'ai' => "\N{DEVANAGARI VOWEL SIGN AI}", 'A.c' => "\N{DEVANAGARI VOWEL SIGN CANDRA O}", 'aa.c' => "\N{DEVANAGARI VOWEL SIGN CANDRA O}", 'o' => "\N{DEVANAGARI VOWEL SIGN O}", 'au' => "\N{DEVANAGARI VOWEL SIGN AU}", 'aM' => "N{DEVANAGARI SIGN ANUSVARA}", 'aH' => "\N{DEVANAGARI SIGN VISARGA}", 'a.N' => "\N{DEVANAGARI SIGN CANDRABINDU}", ); my %consonants = ( 'k' => "\N{DEVANAGARI LETTER KA}", 'kh' => "\N{DEVANAGARI LETTER KHA}", 'g' => "\N{DEVANAGARI LETTER GA}", 'gh' => "\N{DEVANAGARI LETTER GHA}", '~N' => "\N{DEVANAGARI LETTER NGA}", 'ch' => "\N{DEVANAGARI LETTER CA}", 'Ch' => "\N{DEVANAGARI LETTER CHA}", 'j' => "\N{DEVANAGARI LETTER JA}", 'hh' => "\N{DEVANAGARI LETTER JHA}", '~n' => "\N{DEVANAGARI LETTER NYA}", 'T'=> "\N{DEVANAGARI LETTER TTA}", 'Th'=> "\N{DEVANAGARI LETTER TTHA}", 'D'=> "\N{DEVANAGARI LETTER DDA}", 'Dh'=> "\N{DEVANAGARI LETTER DDHA}", 'N'=> "\N{DEVANAGARI LETTER NNA}", 't'=> "\N{DEVANAGARI LETTER TA}", 'th'=> "\N{DEVANAGARI LETTER THA}", 'd'=> "\N{DEVANAGARI LETTER DA}", 'dh'=> "\N{DEVANAGARI LETTER DHA}", 'n' => "\N{DEVANAGARI LETTER NA}", 'p' => "\N{DEVANAGARI LETTER PA}", 'ph' => "\N{DEVANAGARI LETTER PHA}", 'b' => "\N{DEVANAGARI LETTER BA}", 'bh' => "\N{DEVANAGARI LETTER BHA}", 'm' => "\N{DEVANAGARI LETTER MA}", 'y' => "\N{DEVANAGARI LETTER YA}", 'r' => "\N{DEVANAGARI LETTER RA}", 'l' => "\N{DEVANAGARI LETTER LA}", 'v' => "\N{DEVANAGARI LETTER VA}", 'w' => "\N{DEVANAGARI LETTER VA}", 'sh' => "\N{DEVANAGARI LETTER SHA}", 'Sh' => "\N{DEVANAGARI LETTER SSA}", 's' => "\N{DEVANAGARI LETTER SA}", 'h' => "\N{DEVANAGARI LETTER HA}", 'L' => "\N{DEVANAGARI LETTER LLA}", 'ld' => "\N{DEVANAGARI LETTER LLA}", 'x' => "\N{DEVANAGARI LETTER KA}\N{DEVANAGARI SIGN VIRAMA}\N{DEVANAGARI LETTER SSA}", 'GY' => "\N{DEVANAGARI LETTER JA}\N{DEVANAGARI SIGN VIRAMA}\N{DEVANAGARI LETTER NYA}", 'dny' => "\N{DEVANAGARI LETTER JA}\N{DEVANAGARI SIGN VIRAMA}\N{DEVANAGARI LETTER NYA}", 'q' => "\N{DEVANAGARI LETTER QA}", 'K' => "\N{DEVANAGARI LETTER KHHA}", 'G' => "\N{DEVANAGARI LETTER GHHA}", 'z' => "\N{DEVANAGARI LETTER ZA}", 'J' => "\N{DEVANAGARI LETTER ZA}", 'f' => "\N{DEVANAGARI LETTER FA}", '.D' => "\N{DEVANAGARI LETTER DDDHA}", '.Dh' => "\N{DEVANAGARI LETTER RHA}", 'Y' => "\N{DEVANAGARI LETTER YYA}", 'R' => "\N{DEVANAGARI LETTER RRA}", ); sub detvowel { my ($l1,$l2,$l3,$l4) = @_; my $outstr = ""; my $used=0; if (defined $depvowels{$l1 . $l2}) { $used = 2; $outstr = $depvowels{$l1 . $l2}; } elsif (defined $depvowels{$l1}) { $used = 1; $outstr = $depvowels{$l1}; } elsif (defined $depvowels{$l1 . $l2 . $l3}) { $used = 3; $outstr = $depvowels{$l1 . $l2 . $l3}; } elsif ($l1 eq 'a') { $used = 1; $outstr = ""; } else { $used = 0; $outstr = "\N{DEVANAGARI SIGN VIRAMA}"; } return ($used,$outstr); } while (<>) { my $outstr = ""; my $outstr2 = ""; my @line = split //; my $size = $#line +1; my @output = ""; my $used; my $counter = 0; while ($counter < $size) { $outstr2 = ""; if (defined $consonants{$line[$counter] . $line[$counter + 1]}) { $outstr .= $consonants{$line[$counter] . $line[$counter+1]}; ($used,$outstr2) = detvowel($line[$counter+2],$line[$counter+3],$line[$counter+4],$line[$counter+5]); $outstr .= $outstr2; $counter+=2; $counter+=$used; } elsif (defined $consonants{$line[$counter]}) { $outstr .= $consonants{$line[$counter]}; ($used,$outstr2) = detvowel($line[$counter+1],$line[$counter+2],$line[$counter+3],$line[$counter+4]); $outstr .= $outstr2; $counter++; $counter+=$used; } elsif (defined $consonants{$line[$counter] . $line[$counter +1] . $line[$counter+2]}) { $outstr .= $consonants{$line[$counter]. $line[$counter +1] . $line[$counter+2]}; ($used,$outstr2) = detvowel($line[$counter+3],$line[$counter+4],$line[$counter+5],$line[$counter+6]); $outstr .= $outstr2; $counter+=3; $counter+=$used; } elsif (defined $indvowels{$line[$counter] . $line[$counter+1]}) { $outstr .= $indvowels{$line[$counter] . $line[$counter+1]}; $counter+=2; } elsif (defined $indvowels{$line[$counter]}) { $outstr .= $indvowels{$line[$counter]}; $counter++; } elsif ($line[$counter] =~ /\s/) { $outstr .= $line[$counter]; $counter++; } else { print $line[$counter]; $counter++; } } print "${outstr}\n"; }