#!/usr/athena/bin/perl -w
#
# Bengali/Bangla/Assamese input preprocessor based on ITRANS http://www.aczone.com/itrans
#
# Copyright (C) 2002-2005, Arun A Tharuvai <aatharuv@MIT.eDU>
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   


use charnames ":full";
use strict;

my %indvowels = (
		 'a' => "\N{BENGALI LETTER A}",
		 'A' => "\N{BENGALI LETTER AA}", 'aa' => "\N{BENGALI LETTER AA}",
		 'i' => "\N{BENGALI LETTER I}",
		 'ii' => "\N{BENGALI LETTER II}", 'I' => "\N{BENGALI LETTER II}",
		 'u' => "\N{BENGALI LETTER U}",
		 'uu' => "\N{BENGALI LETTER UU}", 'U' => "\N{BENGALI LETTER UU}",
		 'RRi' => "\N{BENGALI LETTER VOCALIC R}", 'R^i' => "\N{BENGALI LETTER VOCALIC R}",
		 'RRI' => "\N{BENGALI LETTER VOCALIC RR}", 'R^I' => "\N{BENGALI LETTER VOCALIC RR}",
		 'LLi' => "\N{BENGALI LETTER VOCALIC L}", 'L^i' => "\N{BENGALI LETTER VOCALIC L}",
		 'LLI' => "\N{BENGALI LETTER VOCALIC LL}", 'L^I' => "\N{BENGALI LETTER VOCALIC LL}",
#		 'a.c' => "\N{BENGALI VOWEL SIGN CANDRA E}",
		 'e' => "\N{BENGALI LETTER E}",
		 'ai' => "\N{BENGALI LETTER AI}", 
#		 'A.c' => "\N{BENGALI VOWEL SIGN CANDRA O}", 'aa.c' => "\N{BENGALI VOWEL SIGN CANDRA O}",
		 'o'  => "\N{BENGALI LETTER O}",
		 'au' => "\N{BENGALI LETTER AU}", 
		 'aM' => "\N{BENGALI LETTER A}\N{BENGALI SIGN ANUSVARA}",
		 'aH' => "\N{BENGALI LETTER A}\N{BENGALI SIGN VISARGA}",
		 'a.N' => "\N{BENGALI LETTER A}\N{BENGALI SIGN CANDRABINDU}",
#		 '.' => "\N{BENGALI DANDA}",
#		 '..' => "\N{BENGALI DOUBLE DANDA}",
#		 'AUM' => "\N{BENGALI OM}",
#		 '.a' => "\N{BENGALI SIGN AVAGRAHA}",
		 );

my %depvowels = (
		 'A' => "\N{BENGALI VOWEL SIGN AA}", 'aa' => "\N{BENGALI VOWEL SIGN AA}",
		 'i' => "\N{BENGALI VOWEL SIGN I}",
		 'ii' => "\N{BENGALI VOWEL SIGN II}", 'I' => "\N{BENGALI VOWEL SIGN II}",
		 'u' => "\N{BENGALI VOWEL SIGN U}",
		 'uu' => "\N{BENGALI VOWEL SIGN UU}", 'U' => "\N{BENGALI VOWEL SIGN UU}",
		 'RRi' => "\N{BENGALI VOWEL SIGN VOCALIC R}", 'R^i' => "\N{BENGALI VOWEL SIGN VOCALIC R}",
		 'RRI' => "\N{BENGALI VOWEL SIGN VOCALIC RR}", 'R^I' => "\N{BENGALI VOWEL SIGN VOCALIC RR}",
		 'LLi' => "\N{BENGALI VOWEL SIGN VOCALIC L}", 'L^i' => "\N{BENGALI VOWEL SIGN VOCALIC L}",
		 'LLI' => "\N{BENGALI VOWEL SIGN VOCALIC LL}", 'L^I' => "\N{BENGALI VOWEL SIGN VOCALIC LL}",
#		 'a.c' => "\N{BENGALI VOWEL SIGN CANDRA E}",
		 'e' => "\N{BENGALI VOWEL SIGN E}",
		 'ai' => "\N{BENGALI VOWEL SIGN AI}", 
#		 'A.c' => "\N{BENGALI VOWEL SIGN CANDRA O}", 'aa.c' => "\N{BENGALI VOWEL SIGN CANDRA O}",
		 'o'  => "\N{BENGALI VOWEL SIGN O}",
		 'au' => "\N{BENGALI VOWEL SIGN AU}", 
		 'aM' => "N{BENGALI SIGN ANUSVARA}",
		 'aH' => "\N{BENGALI SIGN VISARGA}",
		 'a.N' => "\N{BENGALI SIGN CANDRABINDU}",
		 );

my %consonants = ( 'k' => "\N{BENGALI LETTER KA}",    
		   'kh' => "\N{BENGALI LETTER KHA}",
		   'g' => "\N{BENGALI LETTER GA}", 
		   'gh' => "\N{BENGALI LETTER GHA}", 
		   '~N' => "\N{BENGALI LETTER NGA}",

		   'ch' => "\N{BENGALI LETTER CA}",
		   'Ch' => "\N{BENGALI LETTER CHA}",
		   'j' => "\N{BENGALI LETTER JA}",
		   'hh' => "\N{BENGALI LETTER JHA}",
		   '~n' => "\N{BENGALI LETTER NYA}",

		   'T'=> "\N{BENGALI LETTER TTA}",    
		   'Th'=> "\N{BENGALI LETTER TTHA}",
		   'D'=> "\N{BENGALI LETTER DDA}",    
		   'Dh'=> "\N{BENGALI LETTER DDHA}",
		   'N'=> "\N{BENGALI LETTER NNA}",

		   't'=> "\N{BENGALI LETTER TA}",      
		   'th'=> "\N{BENGALI LETTER THA}",
		   'd'=> "\N{BENGALI LETTER DA}",      
		   'dh'=> "\N{BENGALI LETTER DHA}",
		   'n' => "\N{BENGALI LETTER NA}",

		   'p' => "\N{BENGALI LETTER PA}",
		   'ph' => "\N{BENGALI LETTER PHA}",
		   'b' => "\N{BENGALI LETTER BA}",
		   'bh' => "\N{BENGALI LETTER BHA}",
		   'm' => "\N{BENGALI LETTER MA}",

		   'y' => "\N{BENGALI LETTER YA}",
		   'r' => "\N{BENGALI LETTER RA}",
		   'l' => "\N{BENGALI LETTER LA}",
#		   'v' => "\N{BENGALI LETTER VA}",     'w' => "\N{BENGALI LETTER VA}",
		   'sh' => "\N{BENGALI LETTER SHA}",

		   'Sh' => "\N{BENGALI LETTER SSA}",
		   's' => "\N{BENGALI LETTER SA}",
		   'h' => "\N{BENGALI LETTER HA}",
#		   'L' => "\N{BENGALI LETTER LLA}", 'ld' => "\N{BENGALI LETTER LLA}",
		   'x' => "\N{BENGALI LETTER KA}\N{BENGALI SIGN VIRAMA}\N{BENGALI LETTER SSA}",
		   'GY' => "\N{BENGALI LETTER JA}\N{BENGALI SIGN VIRAMA}\N{BENGALI LETTER NYA}",
		   'dny' => "\N{BENGALI LETTER JA}\N{BENGALI SIGN VIRAMA}\N{BENGALI LETTER NYA}",

#		   'q' => "\N{BENGALI LETTER QA}",
#		   'K' => "\N{BENGALI LETTER KHHA}",
#		   'G' => "\N{BENGALI LETTER GHHA}",
#		   'z' => "\N{BENGALI LETTER ZA}", 'J' => "\N{BENGALI LETTER ZA}",
#		   'f' => "\N{BENGALI LETTER FA}",
#		   '.D' => "\N{BENGALI LETTER DDDHA}",
		   '.Dh' => "\N{BENGALI LETTER RHA}",
		   'Y' => "\N{BENGALI LETTER YYA}",
		   'R' => "\N{BENGALI LETTER RRA}",
		   );
sub detvowel {
    my ($l1,$l2,$l3,$l4) = @_;
    my $outstr = "";
    my $used=0;
    if (defined $depvowels{$l1 . $l2}) {
	$used = 2; $outstr = $depvowels{$l1 . $l2};
    } elsif (defined $depvowels{$l1}) {
	$used = 1; $outstr = $depvowels{$l1};
    } elsif (defined $depvowels{$l1 . $l2 . $l3}) {
	$used = 3; $outstr = $depvowels{$l1 . $l2 . $l3};
    } elsif ($l1 eq 'a') {
	$used = 1; $outstr = "";
    } else {
	$used = 0; $outstr = "\N{BENGALI SIGN VIRAMA}";
    }
    return ($used,$outstr);
}

while (<>) {
    my $outstr = "";
    my $outstr2 = "";
    my @line = split //;
    my $size = $#line +1;
    my @output = "";
    my $used;
    my $counter = 0;
    while ($counter < $size) {
	$outstr2 = "";
	if (defined $consonants{$line[$counter] . $line[$counter + 1]}) {
	    $outstr .= $consonants{$line[$counter] . $line[$counter+1]};
	    ($used,$outstr2) = detvowel($line[$counter+2],$line[$counter+3],$line[$counter+4],$line[$counter+5]);
	    $outstr .= $outstr2;
	    $counter+=2;
	    $counter+=$used;
	} elsif (defined $consonants{$line[$counter]}) {
	    $outstr .= $consonants{$line[$counter]};
	    ($used,$outstr2) = detvowel($line[$counter+1],$line[$counter+2],$line[$counter+3],$line[$counter+4]);
	    $outstr .= $outstr2;
	    $counter++;
	    $counter+=$used;
	} elsif (defined $consonants{$line[$counter] . $line[$counter +1] . $line[$counter+2]}) {
	    $outstr .= $consonants{$line[$counter]. $line[$counter +1] . $line[$counter+2]};
	    ($used,$outstr2) = detvowel($line[$counter+3],$line[$counter+4],$line[$counter+5],$line[$counter+6]);
	    $outstr .= $outstr2;
	    $counter+=3;
	    $counter+=$used;
	} elsif (defined $indvowels{$line[$counter] . $line[$counter+1]}) {
	    $outstr .= $indvowels{$line[$counter] . $line[$counter+1]};
	    $counter+=2;
	} elsif (defined $indvowels{$line[$counter]}) {
	    $outstr .= $indvowels{$line[$counter]};
	    $counter++;
	} elsif ($line[$counter] =~ /\s/) {
	    $outstr .= $line[$counter];
	    $counter++;
	} else {
	    print $line[$counter];
	    $counter++;
	}
    }
    print "${outstr}\n";
}
