#!/usr/athena/bin/perl #!/afs/athena.mit.edu/contrib/perl5/bin/perl5.6.0 # #-w RD_TRACE # #/usr/athena/bin/perl # #-I /afs/athena.mit.edu/user/o/c/ocschwar/perllib #push @INC, "/afs/athena.mit.edu/user/o/c/ocschwar/perllib"; use Parse::RecDescent; use Getopt::Std; use Lingua::EN::Numbers::Ordinate qw(ordinate); getopts('Tto:'); if ($opt_T ) { $::RD_TRACE = 1; } else { undef $::RD_TRACE ; } $::RD_HINT = 1; $Parse::RecDescent::skip = '\s*'; # (First effort: January 2000) # _D_escriptive _e_nglish for _C_ _S_tatements and _S_ubroutines # # # converts ANSI-C code to descriptive English sentences. # Next version will do it BACKWARDS as well. # # License: GPL # # Copyright 2000 Omri Schwarz # (Look up the GPL to know which rights are reserved and how) # # todo: 1. the entire syntax for pointers to functions. # 2. preprocessor directives. (getting there) # So, the problem with handling CPP directives is when they # interrupt something. I'm open to ideas. # # 3. CPP directives with no whitespace pryor to # carriage return suffer. must fix! # # 4. functions to handle the nesting levels (ordinal number generator and CPP stack) # 6. change returns to prints where appropriate. my $Grammar = q[ { my @defined_types = ('FILE') ; my ($basic,$add_on , @basics, $rule_context, $rule_name, $nonsyntactic, @macros); } startrule0 : startrule { print $item[-1]; } startrule0(?) startrule : ( comment | global_var_declaration | function_definition | function_prototype |preproc[matchrule => 'startrule'] ) #note dollar item brace __RULE__ brace is your friend. # # TODO: preproc statements could come anywhere. # conditionals are the hard part. # preproc needs to inherit enough context information to know what it is interrupting # and handle it correctly. Done! Yeeeha! # # Also, there is still some drudge work to do here. preproc : definition | undefinition | inclusion | line | error | pragma | preproc_conditional[matchrule => $arg{matchrule}] { $return = $item[-1]; # print STDERR "\npreproc_conditional $return"; } | '#' /^*\n/ #{ # print STDERR "CPP comment\n"; #} definition : /\\n*/ macro_definition | /\\s*?\\n\#/ 'define' identifier token_sequence(?) .../\\s*?\\n/ { my $token_sequence = join('',@{$item{token_sequence}}); $return = "\nNote: we define $item{identifier} to mean \"$token_sequence\".\n"; } # @item gets used here. be careful. macro_definition : '#' 'define' identifier '(' ')' token_sequence "\n" { my @symbols = @{$item[-4]}; my $last ; $return = "\nNote: we define the macro $item[3] "; push @macros,$item[3]; if ($#symbols > 0 ) { $last = pop @symbols; $return .= "with the symbol set '" . join("', '",@symbols) . ", and '$last' "; } elsif ($#symbols > 0 ) { $return .= "with the symbol set '$symbols[0]' and '$symbols[1]' "; } else { $return .= "with the symbol '$symbols[0]' "; } $return .= "to use the token sequence \"$item{token_sequence}\".\n"; } undefinition : ("\n")(s?) '#' 'undef' identifier { $return = "\nNote: here we annul the definition of \"$item{identifier}\".\n"; } # inclusion : # (/\\n+?/)(?) #{print STDERR "..\n" . substr($text,0,40) . "\n.."; } /\\s*?\\n\#/ #{print STDERR ".\n" . substr($text,0,40) . "\n."; } 'include' '<' filename '>' .../\\s*?\\n/ { $return = "\nNote: This program makes use of the system file '$item{filename}'.\n"; # print STDERR $return ; } | #(/\\n+/)(?) /\\s*?\\n\#/ 'include' '"' filename '"' .../\\s*?\\n/ { $return = "\nNote: This program makes use of the user file '$item{filename}'.\n"; print STDERR $return ; } | # (/\\n+/)(?) /\\s*?\\n\#/ 'include' token #token_sequence { $return = "\nNote: This program imports code noted by the token \"$item{token}\".\n"; } filename : /[_\\.\\-\\w\\/]+/ line : '#' 'line' constant ('"' filename '"' { $return = "and filename $item{filename}"; } )(?) /\\n+/ { $return = "\nNote: for debugging, this is line number $item{constant}". join('', @{$item[-1]}). ".\n"; } error : '#' 'error' token_sequence(?) { $return = "\nNote: compilation should stop here.\n" . "The message is \"$item{token_sequence}\".\n"; } pragma : '#' 'pragma' token_sequence(?) { my $pragma = join('',@{$item[-1]}); if ($pragma) {$pragma = ' "' . $pragma .'"';} $return = "\nNote: a compiler-dependent pragma$pragma is added here.\n"; } preproc_conditional : /\\n*/ if_line[matchrule => $arg{matchrule}] { $rule_name = $arg{matchrule}; # print STDERR "\n--" . substr($text,0,40)."\n--" ; } (s?) { $return = $item{if_line} . join('',@{$item[-1]}); print STDERR "matched matchrule $arg{matchrule}". substr($text,0,40)."\n$return\n---\n"; } (elif_parts[matchrule => $rule_name])(?) (else_parts[matchrule => $rule_name])(?) { #print STDERR "got here safe $rule_name .\n"; $return .= join('',@{$item[-2]}) . join('',@{$item[-1]}); } /\\n*/ '#' 'endif' { # print STDERR "found endif\n"; $return .= "\nNote: This ends a conditional inclusion section.\n"; } if_line : '#' 'ifdef' { print STDERR "try ifdef\n"; } identifier .../\\n+/ { print STDERR "trying #ifdef long\n"; $return = "\nNote: The current context is interrupted.\n"; $return .= "The next section is used only if $item{identifier} is defined.\n"; } | '#' 'ifndef' identifier /\\n+/ { $return = "\nNote: The current context is interrupted.\n"; $return .= "The next section is used only if $item{identifier} is NOT defined.\n"; } | '#' 'if' { print STDERR "try #if"; } constant_expression "\n" { $return = "\nNote: The current context is interrupted.\n"; $return .= "The next section is used only if we meet this macro condition:\n"; $return .= "\"$item{constant_expression}\".\n"; } elif_parts : ( # elif_line '#' 'elif' constant_expression { $return = "\nNote: we interrupt the current context again.\n"; $return .= "Instead of the previous precondition, we include "; $return .= "the following text based on this condition: \"$item{constant_expression}\"."; # $rule_name = $arg{matchrule}; } ( )[matchrule => $arg{matchrule}](s?) { $return .= join('',@{$item[-1]}); } )(s) else_parts : (/\\n+/)(?) '#' 'else' { $rule_name = $arg{matchrule}; } ()[matchrule => $arg{matchrule}](s?) { $return = "\nNote: we interrupt the current context once more.\n" . "The following section gets included if the previous precondition fails.\n"; $return .= join('',@{$item[-1]}); } token_sequence : token(s) { $return = join(' ',@{$item[1]}); } token : /\\\\\\n/ |/\\S+/ { $return = $item[-1]; $return =~ s/\"/\\\"/; # escaping all quotes. } global_var_declaration : declaration # resetting skip in case we're in a CPP zone function_definition : declaration_specifiers(?) declarator[context => 'function_definition'] '(' {print STDERR "function_definition1 \n";} parameter_type_list(?) ')' {print STDERR "function_definition2 \n";} '{' declaration_list(?) {print STDERR "function_definition2 \n";} statement_list(?) '}' { my $declaration_specifiers = join('', @{$item{declaration_specifiers}}); my $parameter_list = join('', @{$item{parameter_type_list}}); my $declaration_list = join('',@{$item{declaration_list}}); my $statement_list = join('',@{$item{statement_list}}); my $return_type = $item{declarator}; my $name = $item{declarator} ; $name =~ s/^.*?'/'/; $return_type =~ s/\'.*\'//; if ($return_type =~ /\w/ ) { $return_type .= "to a "; $return_type .= $declaration_specifiers; } else { $return_type = $declaration_specifiers; } $return = "\nThis is the definition of the function $name.\n"; if ($declaration_specifiers) { $return .= "It returns a value of the type $return_type.\n"; } if ($parameter_list) { $return .= "The function $name has the following parameters:\n"; $return .= $parameter_list . ".\n\n" ; } if ($declaration_list) { $return .= "The local variables for the function $name begin here:\n" . $declaration_list ; } if ($statement_list ) { $return .= "And now comes the instruction set for the function $name.\n" . $statement_list ; } $return .= "\nAnd here ends the definition of the function $name.\n"; # $return .= $item{compound_statement}; } function_prototype : declaration_specifiers(?) declarator[context => 'function_prototype'] '(' parameter_type_list(?) ')' ';' { my $declaration_specifiers = join('', @{$item{declaration_specifiers}}); my $parameter_list = join('', @{$item{parameter_type_list}}); $return = "Here is a prototype definition for a function named "; my $return_type = $item{declarator}; my $name = $item{declarator} ; $name =~ s/^.*?'/'/; $return_type =~ s/\'.*\'//; if ($return_type =~ /\w/ ) { $return_type .= "to a "; $return_type .= $declaration_specifiers; } else { $return_type = $declaration_specifiers; } $return .= "$name.\n"; $return .= "It has the return value \'$return_type\'.\n"; if ($parameter_list) { $return .= "The function $name has the following parameters:\n"; $return .= $parameter_list . ".\n" ; } } # note that we just dumped context inheritance here. # We might need to do something mor nuanced. compound_statement : '{' declaration_list(?) statement_list(?) '}' { my $declaration_list = join('',@{$item{declaration_list}}); my $statement_list = join('',@{$item{statement_list}}); $return = "A compound statement begins here.\n" ; if ($declaration_list) { $return .= "Some variables are declared.\n" . $declaration_list; } if ($statement_list ) { $return .= "And now some statements.\n" . $statement_list; } $return .= "A compound block of instructions "; if ($arg{context}) { if ($arg{context} =~ /^[aeiou]/i) { $return .= "for an $arg{context} " ; } else { $return .= "for a $arg{context} " ; } } $return .= "ends here.\n"; print STDERR "\ncompound statement in $arg{context}\n$item{statement_list}\n \n$return\ndone\n"; } statement_list : comment(?) preproc[matchrule => 'statement'](?) statement { my $preproc = join('',@{$item{preproc}}); my $comment = join('',@{$item{comment}}); $return = $item{statement}; if ($comment) { $return = $comment . $return; } if ($preproc) { $return = $preproc . $return; } print STDERR "statement_list $return"; } statement_list(?) # { $return .= join('',@{$item{statement_list}}); } statement : jump_statement { $return = $item{jump_statement}; }# | compound_statement[context => $arg{context} , name => $arg{context} ] # | iteration_statement # | selection_statement # | labeled_statement # | expression_statement # This sucker is the hard one. It has the assignments and function calls. #unfolding this: iteration_statement : 'for' '(' for_initialization(?) ';'{print STDERR "forloop start @{$item{for_initialization}}\n";} for_expression(?) ';' {print STDERR "forloop1 start @{$item{for_expression}}\n";} for_increment(?) ')' {print STDERR "forloop2 start @{$item{for_increment}}\n";} statement[context => 'for loop'] ### { my ($initialization, $item_expression, $increment); $initialization = join('',@{$item{for_initialization}}); $item_expression = join('',@{$item{for_expression}}); $increment = join('',@{$item{for_increment}}); $return = "Here we set up for an iteration loop.\n"; if ($initialization) { $return .= "We initialize by performing this instruction:\n$initialization.\n"; } if ($item_expression) { $return .= "We continue the loop as long as the following expression comes out positive:\n\"$item_expression\".\n"; } if ($increment) { $return .= "At the end of each repetition we perform this to increment things:\n$increment.\n"; } $return .= "This is the loop:\n" . $item{statement} ; } | 'while' '(' expression ')' #{print STDERR "trying while"; } statement[context => 'while loop'] { $return = "Here we go into a repetition loop which we stay in as long as the following expression evaluates as positive:\n'$item{expression}'\n"; $return .= $item{statement} . "Now ends the repetition loop.\n"; # print STDERR "while.$return\n.\n"; } | 'do' statement[context => 'do loop'] 'while' '(' expression ')' ';' { $return = "Do the following:\n$item{statement}\nDo this as long as '$item{expression}' evaluates to a positive number.\n"; } selection_statement : 'if' '(' expression[context => 'if block'] ')' {print STDERR "ifblock 0 $item{expression}\n";} statement[context => 'if block'] { $return = "Execute the following part if the expression \"$item{expression}\" evaluates as positive:\n$item{statement}"; } ('else' statement[context => 'else block'] { $return = "If the expression evaluated as negative, execute this section:\n$item{statement}"; })(?) ### { $return .= join('',@{$item[-1]}); } | 'switch' '(' expression ')' statement[context => 'switch'] { $return = "This section is controlled by a switch based on the expression \'$item{expression}\':\n$item{statement}"; } jump_statement : 'break' ';' { $return = "Here we break from the current loop.\n"; } | 'continue' ';' { $return = "Here we return to the top of the current loop and continue it.\n"; } | 'return' (expression[context => 'return'])(?) ';' { my $item_expression = join('',@{$item{expression}}) ; $return = "Here we end the current subroutine.\n"; if (length $item_expression) { $return .= "We return the following value: \"$item_expression\".\n"; } } | 'goto' identifier ';' comment(?) { $return = "Pray for forgiveness from the Demiurge.\n Then, go to the section marked $item{identifier}.\n"; if ($item{comment}) { $return .= $item{comment}; } } # todo: any reason why it's using @item? expression_statement : expression[context => 'statement'](?) ';' { my $item_expression = join('',@{$item[1]}); if (!$item_expression) { $return = "There is a no-op here.\n"; } else { $return = $item_expression.".\n" ; } } labeled_statement : identifier ':' statement # { $return = "The following statement is preceded by the label $item{identifier}.\n$item{statement}"; } | 'case' constant_expression ':' statement[context => 'case'] { $return = "In the case it has the value $item{constant_expression}, do this:\n$item{statement}"; } | 'default' ':' statement { $return = "In the default case, do this:\n$item{statement}"; } for_initialization : expression[context => 'statement'] for_expression : expression[context => 'for_expression'] for_increment : expression[context => 'statement'] expression : $arg{context}] ',' assignment_expression[context => $arg{context}] > { # $return = join(" We're not done yet.\n",@{$item[-1]}); $return = join(". We're not done yet. ",@{$item[-1]}); # print STDERR "expb by comma $return\n"; } #NOTE : here we make the operand of the assignment expression double-quoted. # we have to select a quoting policy for the identifiers and for the # function arguments. assignment_expression : # we have to break inheritance here. unary_expression[context => 'assignment_expression'] assignment_operator[context => $arg{context}] assignment_expression[context => 'assignment_expression'] { my ($foo,$assignment_operator); my $assignment_expression = $item{assignment_expression}; $assignment_operator = $item{assignment_operator}; if ($arg{context} eq 'statement' ) { $return .= "${$item{assignment_operator}}[0] $item{unary_expression}${$item{assignment_operator}}[1] \"$assignment_expression\""; } else { $return = "$item{unary_expression}, $assignment_operator $assignment_expression"; } $nonsyntactic = ''; } | conditional_expression[context => $arg{context}] conditional_expression : logical_OR_AND_expression[context => $arg{context}] # | logical_OR_AND_expression[context => $arg{context}] '?' expression[context => 'conditional_expression1'] ':' conditional_expression[context => 'conditional_expression2'] { $return = "the choice dependent on the value of $item{logical_OR_expression}" . " comprising of $item{expression} or $item{conditional_expression}"; } assignment_operator : '=' { if ($arg{context} eq 'statement') { $return = ['Assign to', ' the value' ] ; } else { $return = ', which is assigned to be '; } # print STDERR '='; } | '+=' { if ($arg{context} eq 'statement') { $return = ['Increase ' , ' by'] } else { $return = 'which is increased by '; } } | '-=' { if ($arg{context} eq 'statement') { $return = ['Decrease ' , ' by']; } else { $return = 'which is decreased by '; } } | '*=' { if ($arg{context} eq 'statement') { $return = ['Multiply ' , ' by'] } else { $return = 'which is multiplied by '; } } | '/=' { if ($arg{context} eq 'statement') { $return = ['Divide ' , ' by' ]; } else { $return = 'which is divided by '; } } | '%=' { if ($arg{context} eq 'statement') { $return = ['Pare down ', ' to modulo '] ; } else { $return = 'which is reduced to modulo '; } } | '<<=' { if ($arg{context} eq 'statement') { $return = ['Bit shift ', ' left by']; } else { $return = 'which is bitshifted left by '; } } | '>>=' { if ($arg{context} eq 'statement') { $return = ['Bit shift ', ' right by']; } else { $return = 'which is bitshifted right by '; } } | '&=' { if ($arg{context} eq 'statement') { $return = ['Bit mask ', ' down by' ]; } else { $return = 'which is bitmasked down by '; } } | '^=' { if ($arg{context} eq 'statement') { $return = ['Bit mask ',' in an exclusive-or operation by']; } else { $return = 'which is xorred by '; } } | '|=' { if ($arg{context} eq 'statement') { $return = ['Bit mask ', ' up by']; } else { $return = 'which is bitmasked up by '; } } constant_expression : conditional_expression # logical_OR_AND_expression : # relational_expression[context => $arg{context}] # # ...';' #{ # $return = $item{relational_expression}; #} $arg{context}] log_OR_AND_bit_or_and_eq rel_add_mul_shift_expression[context => 'logical_OR_AND_expression'] > { if ($arg{context} eq 'for_expression') { print STDERR "hmm2\n";} my @ands = @{$item[1]}; $return = join ('' , @ands) ; # if ($return =~ /bit_reverse/) { print STDERR "logical or2.$return.\n";} } log_OR_AND_bit_or_and_eq : '||' {$return = ' logically orred by ';} | '&&' {$return = ' logically anded by ';} | '|' {$return = ' bitwise orred by ';} | '&' {$return = ' bitwise anded by ';} | '^' {$return = ' bitwise xorred by ';} | '==' { $return = ' checked for equality against ' ; } | '!=' { $return = ' checked for inequality against ' ; } rel_mul_add_ex_op : '+' {$return = ' plus ';} | '-' {$return = ' minus '; } | '*' {$return = ' times '; } | '/' {$return = ' divided by '; } | '%' {$return = ' modulo '; } |'<<' {$return = ' shifted left by ';} | '>>' {$return = ' shifted right by ';} | '>=' {$return = ' checked to be greater than or equal to ';} | "<=" {$return = ' checked to be less than or equal to ';} | '>' {$return = ' checked to be greater than ';} | '<' {$return = ' checked to be less than ';} rel_add_mul_shift_expression :# {print STDERR "got to add_mul.._ex\n$text\n.";} cast_expression[context => $arg{context}] ...';' { $return = $item{cast_expression} ; } | $arg{context}] rel_mul_add_ex_op cast_expression[context => 'add_mul_shift_expression'] > { my @ands = @{$item[1]}; $return = join ('' , @ands); } cast_expression : '(' type_name {print STDERR "recast $item{type_name}\n"; } ')' cast_expression[context => 'recast'] { $return = "a casting into the type \'$item{type_name}\' of $item{cast_expression}"; } | unary_expression[context => $arg{context}] { $return = $item{unary_expression}; } #( ...closure )(?) #{ # if ($arg{context} eq 'statement' #&& !($return =~ /^Perform/) # ) { # if (${$item[-1]}[0]) { # $return .= ".\n"; # } # } #} closure : ',' | ';' | ')' declaration_list :# declaration(s) { # join the declarations together. $return = join('',@{$item{declaration}}); print STDERR "declaration_list $return"; } declaration : declaration_specifiers #{ # if ($item{declaration_specifiers} =~ /byte/) { # print STDERR "trying declaration\n"; # } #} init_declarator_list(?) ';' comment(?) { my $init_declaration_list = join('',@{$item{init_declarator_list}}); if ($item{declaration_specifiers} =~ /type definition/) { $init_declaration_list =~ s/the variable//g; $return = "Note a $item{declaration_specifiers}, and call it$init_declaration_list.\n"; $init_declaration_list =~ s/[\'\s]//g; push @defined_types, split(',',$init_declaration_list) ; # print STDERR join(".\n.",@defined_types); my $foo = pop @defined_types; $foo =~ s/^ and//; push @defined_types, $foo; } else { $return = "Specifying the type $item{declaration_specifiers}, allocate "; $return .= $init_declaration_list . ".\n"; } $return .= join('',@{$item{comment}}); if ($return =~ /csstab1/ ) { print STDERR "declaration done \n$return\n"; } } init_declarator_list : { my @init_decl_list = @{$item[1]}; my $last = ''; if ($#init_decl_list > 1) { $last = pop @init_decl_list; $return = 'the variables ' . join(', ', @init_decl_list) . ', and ' . $last; } elsif ( $#init_decl_list == 1 ) { $return = 'the variables ' . $init_decl_list[0] . ' and ' .$init_decl_list[1]; } else { $return = 'the variable ' . $init_decl_list[0]; } } init_declarator : declarator[context => 'init_declarator'] { $return = $item{declarator}; if ($item{declarator} =~ /csstab/ ) { print STDERR "\ninit_declarator $item{declarator} \n"; } } (# { print STDERR "intialized allocation\n"; } '=' initializer)(?) { my $init = join('',@{$item[-1]}); $return = $item{declarator}; if (length $init) { if ($return =~ /defined to be an array with the size/ ) { $return .= " and initialized to $init"; } else { $return .= " (initialized to $init)"; } } } initializer: comment(?) assignment_expression comment(?) { $return = $item[2]; if (join('',@{$item[1]})) { $return = '['.join('',@{$item[1]}).']' . $return; } if (join('',@{$item[1]})) { $return .= join('',@{$item[-1]}); } } | '{' comment(?) initializer_list (',' )(?) '}' { print STDERR "intialized allocation4\n"; $return = 'the set ' . $item{initializer_list}; } initializer_list : { my @inits = @{$item[1]} ; if ($#inits >1) { my $init = pop @inits; $return = join(', ',@inits) . ', and ' .$init; }elsif ($#inits == 1) { $return = $inits[0] . ' and ' . $inits[1]; } else { $return = $inits[0]; } } unary_expression : postfix_expression[context => $arg{context}] { $return = $item{postfix_expression}; } | '++' unary_expression { if ($arg{context} eq 'statement' ) { $return = "Uptick $item{unary_expression}"; } else { $return = "the now upticked $item{unary_expression}"; } # print STDERR "uptick $return\n"; } | '--' unary_expression { if ($arg{context} eq 'statement' ) { $return = "Downtick $item{unary_expression}"; } else { $return = "the now downticked $item{unary_expression}"; } # print STDERR "downtick $return\n"; } | unary_operator cast_expression[context => $arg{context}] # { $return = $item{unary_operator} . $item{cast_expression}; } |'sizeof' unary_expression { $return = "the memory size of $item{unary_expression}"; } |'sizeof' '(' type_name ')' { $return = "the memory size of the datatype $item{type_name}"; } unary_operator : '&' {$return = 'the memory location of ';} | '*' {$return = 'the memory contents of ';} | '+' {$return = 'the value of ';} | '-' ...constant {$return = 'negative ';} | '-' {$return = 'minus ';} | '~' {$return = "the one's complement of ";} | '!' {$return = 'the logical negation of ';} # here we have several problems. postfix_expression : primary_expression[context => $arg{context}] { # must be global. use stack to prevent disasters. # Todo: this is just a Bad Idea, TM. $return needs to be turned to an anonymous hash with the # arguments doing the right thing and then the last action assembles the sucker. push @basics, $basic; $basic = $item{primary_expression}; $add_on = 0 ; $return = $item{primary_expression}; 1; } ( # function call '(' argument_expression_list(?) ')' { # we're in an un-named sub rule. This is where things get hard. my $arg_exp_list = join('',@{$item{argument_expression_list}}); if ($arg_exp_list) { $return = " as applied to the argument$arg_exp_list"; } else { $return = "without any arguments"; } } )(?) { my $args = join('',@{$item[-1]}); if ($args) { # print STDERR "\nfunction $args\n"; $return = ''; if ($arg{context} eq 'statement') { $return = "Perform "; } # is this function call involving a pointer to a function? if ($basic =~ /parenthetical/) { $return .= "the function pointed by $basic"; } else { $return .= "the function $basic"; } # To discriminate between macros and functions. foreach (@macros) { if ($basic eq "\`$_'") { $return =~ s/function/macro/; } } if ($args =~ /^ as applied to the arg/) { $return .= $args; } # if ($arg{context} eq 'statement') { # $return .= ".\n"; # } } 1; } # array reference and plain expression ( '[' expression[context => 'array_address'] ']' {$return = $item{expression};} )(s?) { my $item_expression = ''; if (@{$item[-1]}) { $item_expression=join(',',@{$item[-1]}); $basic =~ s/^\`//; $basic =~ s/\'$//; } if ( length $item_expression) { $return = "array $basic\'s element at address ($item_expression)"; } } # struct dereferences: ( '.' identifier )(?) { # capitalize when necessary! my $identifier = join('',@{$item[-1]}); if ($identifier) { if ($arg{context} eq 'statement') { $return = 'S'; } else { $return = 's'; } $return .= "tructure $basic's member $identifier"; } } ( '->' identifier )(?) # capitalize when necessary! { my $identifier2 = join('',@{$item[-1]}); if (length $identifier2) { if ($arg{context} eq 'statement') { $return = 'The '; } else { $return = 'the '; } # todo: apply same approach one rank above.... $return .= "member $identifier2 of the structure pointed by $basic"; # print STDERR "checking basic .$basic.\n"; # $identifier2 = ; } } ( '++')(?) { my $increment = join('',@{$item[-1]}); if ($increment) { if ($arg{context} eq 'statement') { $return = "Increment $basic up by one"; } else { $return = "$return (which is incremented up by one)"; } # print STDERR "++$return"; } } ( '--' )(?) { my $increment = join('',@{$item[-1]}); if ($increment) { if ($arg{context} eq 'statement') { $return = "Increment $basic down by one"; } else { $return = "$return (which is incremented down by one)"; } # print STDERR "++$return"; } $basic = pop @basics; 1; } # having done the simplest cases, we go to the catch all for left recursions. | primary_expression postfix_suffix(s) { # todo: test this. formulate a syntax setup. print STDERR "Danger Will Robinson! Untested code testing!!\n"; $return = $item{primary_expression} . "'s " . join('',@{$item{postfix_suffix}}); } postfix_suffix : ('[' expression ']')(s) | '.' identifier | '->' identifier | '++' | '--' argument_expression_list : { my @arg_exp_list = @{$item[1]}; my $last = ''; if ($#arg_exp_list > 1) { $last = pop @arg_exp_list; $return = 's \'' . join('\', \'', @arg_exp_list) . '\', and \'' . $last . '\''; } elsif ( $#arg_exp_list == 1 ) { $return = 's \'' . $arg_exp_list[0] . '\' and ' . "'$arg_exp_list[1]'"; } else { $return = ' ' . "\'$arg_exp_list[0]\'"; } } narrow_closure : ';' | ',' | '->' primary_expression : '(' expression ')' (...narrow_closure)(?) { my $expression = $item{expression} ; my $repeats = 1 ; my $ending = 1 ; if ($expression =~ /^the (\\d+)-layered parenthetical expression/) { $repeats= $1 +1 ; $expression =~ s/^the \\d+-layered parenthetical expression //; } elsif ($expression =~ /^the parenthetical expression/) { $repeats =2 ; $expression =~ s/^the parenthetical expression //; } if ($expression =~ / now$/) { $ending ++; $expression =~ s/ now$//; $expression .= " (now drop $ending layers of context)" ; } elsif ($expression =~ /now drop (\\d+) layers of context\)$/ ) { $ending =~ $1 +1; $expression =~ s/\\d+ layers of context\)$/$ending layers of context \)/; } else { $expression .= ' now'; } if ($repeats > 1) { $return = "the $repeats-layered parenthetical expression $expression"; } else { $return = "the parenthetical expression $expression"; } if (@{$item[-1]}) { $return =~ s/ now$//; } } | constant # | string | identifier { # todo: is this where the quotation marks belong? $return = "`$item{identifier}'"; } string : m{".*?[^\"]"} constant : /-?[0-9]*\\.[0-9]+f?/ { if ($item[1] =~ /\\D/) { $return = "the floating point number $item[1]"; } else { $return = $item[1]; } # print STDERR "floater found" } | /0x[0-9a-fA-F]+/ ('L')(?) { if ($item[-1]) { $return = 'the long ' ."hexadecimal number $item[1]"; } else { $return = 'the ' . "hexadecimal number $item[1]"; } } | /0\\d+/ { $return = "the octal number $item[1]"; } # integer_constant |/-?[0-9]+[lu]?/i { $return = $item[-1]; $return =~ s/[Uu]$/\(unsigned\)/; $return =~ s/[Ll]$/\(long\)/; } | #character_constant m{'.*?[^\']'} |enumeration_constant # needs more. declarator : direct_declarator | pointer direct_declarator { $return = "$item{pointer} $item{direct_declarator}"; } # todo: learn what these recastings actually accomplish... page 122 K&R # IMPORTANT : add type checking. direct_declarator : identifier[context => 'direct_declarator'] { $return = "\'$item{identifier}\'"; } ( '[' constant_expression(?) ']' { if (@{$item{constant_expression}}) { $return = join('',@{$item{constant_expression}}); } else { $return = 'nil'; } } )(s?) { my @array = @{$item[-1]}; if (@array) { $return .= ', defined to be an array with the size ' . join(' by ' , @array); } # print STDERR "array declaration $return\n"; 1; } # | '(' declarator ')' #{ $return = $item{declarator}; }# # # | (identifier)(s?) ('(' declarator ')')(s?) ('[' constant_expression(?) ']' )(s) #{ # # this is an array allocation... # # todo: if we've reached here, we have a problem. # my @array_sizes = @{$item[-1]}; # my @declarators = @{$item[2]}; # my @identifiers = @{$item[1]}; # $return = "oh, boy."; # print STDERR "direct_declarator ???$return\n...\n$text"; #} # # | (identifier)(s?) ('(' declarator ')')(s?) '(' parameter_type_list ')' # | (identifier)(s?) ('(' declarator ')')(s?) '(' identifier_list(?) ')' identifier_list : (identifier ',')(s?) identifier { my @identifier_list = @{$item[1]}; if ($#identifier_list > 1) { $return = join(', ', @identifier_list) . ', and ' . $item{identifier}; } elsif ( $#identifier_list == 1 ) { $return = $identifier_list[1] . ' and ' . $item{identifier}; } else { $return = $item{identifier}; } } parameter_type_list : parameter_list | parameter_list ',' '...' { $return = $item{parameter_list} . ', and possibly other arguments'; } parameter_list : { my @parameter_list = @{$item[1]}; if ($#parameter_list > 1) { $return = pop(@parameter_list); $return = join(', ', @parameter_list) . ', and ' . $return; } elsif ( $#parameter_list == 1 ) { $return = $parameter_list[0] . ' and ' .$parameter_list[1]; } else { $return = $parameter_list[0]; } } parameter_declaration : declaration_specifiers declarator { $return = $item{declaration_specifiers} .' ' . $item{declarator}; # print STDERR "parameter declaration $return\n"; } | declaration_specifiers abstract_declarator(?) abstract_declarator : pointer | pointer(?) direct_abstract_declarator { $return = join('',@{$item{pointer}}) . $item{direct_abstract_declarator}; } # # This is going to require some work handling correctly. direct_abstract_declarator: '(' abstract_declarator ')' | '[' ']' | '[' constant_expression ']' | DAD '[' ']' | DAD '[' constant_expression ']' | '(' ')' | '(' parameter_type_list ')' | DAD '(' ')' | DAD '(' parameter_type_list ')' DAD: #macro for direct_abstract_declarator ( '(' abstract_declarator ')' )(s?) ( '[' ']' )(s?) ( '[' constant_expression ']' )(s?) ( '(' ')' )(s?) ( '(' parameter_type_list ')' )(s?) identifier : ...!reserved identifier_word { $return = $item{identifier_word}; } pointer : ('*')(s) { my $size = $#{$item[1]} +1 ; if ($size > 1) { $return = Lingua::EN::Numbers::Ordinate::ordinate($size) . '-level pointer' ; # $return = &ordinal($size) . '-level pointer' ; } else { $return = 'pointer'; } } | '*' type_qualifier_list(?) pointer { $return = 'a pointer to a ' . $item{type_qualifier_list} . $item{pointer} ; } integer_constant: /[0-9]+/ type_qualifier_list : type_qualifier(s) { $return = join(' ', @{$item{type_qualifier}}); } declaration_specifiers : comment(?) type_specifier ...identifier { $return = join('',@{$item{comment}}) . $item{type_specifier}; } | comment(?) storage_class_specifier declaration_specifiers(?) { my $decl_spec = join(' ',@{$item{declaration_specifiers}}); $return = join('',@{$item{comment}}) . $item{storage_class_specifier} ; if ($decl_spec) {$return .= ' ' . $decl_spec; } } | comment(?) type_specifier declaration_specifiers(?) { my $decl_spec = join(' ',@{$item{declaration_specifiers}}); $return = join('',@{$item{comment}}) . $item{type_specifier} ; if ($decl_spec) {$return .= ' ' . $decl_spec; } } | comment(?) type_qualifier declaration_specifiers(?) { my $decl_spec = $return = join('',@{$item{comment}}) . $item{type_qualifier} . join(' ',@{$item{declaration_specifiers}}); } storage_class_specifier : auto | 'extern' { $return = "(declared elsewhere)"; } | 'static' { $return = "(this declaration is not to be shared)"; } | register | 'typedef' { $return = 'type definition of' ; } type_qualifier : const | 'volatile' const : 'const' { $return = "constant"; } type_specifier : 'double' ('float')(?) { $return = 'double precision floating point number'; } | short | long | 'char' { $return = 'character'; } | 'int' { $return = 'integer'; } | float | 'void' | 'signed' | 'unsigned' | struct_or_union_specifier | enum_specifier | typedef_name # this is the one that has to be fixed! typedef_name : identifier { my $answer = 0; foreach (@defined_types) { if ($item{identifier} eq $_) { $answer = 1; $return = $item{identifier}; } } if (!$answer) { undef $answer; } $answer; } # TODO: this is going to be a hard one. struct_or_union_specifier : comment(?) struct_or_union identifier(?) '{' struct_declaration_list '}' { my $identifier = join('',@{$item{identifier}}); $return = join('',@{$item{comment}}) . $item{struct_or_union} ; if ($identifier) { $return .= "which is called $item{identifier}, " ;} $return .= " which contains the following:\n" . $item{struct_declaration_list}; print STDERR "struct_or_union1 $return\n"; } | struct_or_union identifier { $return = "the $item{struct_or_union} $item{identifier}"; print STDERR "struct_or_union_spec2 $return\n"; } struct_declaration_list : struct_declaration(s) { my $finaldec; my @declarations = @{$item{struct_declaration}}; if ($#declarations > 1 ) { $finaldec = pop @declarations; $return = join(', ', @declarations ) . ', and ' . $finaldec ; } elsif ($#declarations == 1 ) { $return = join(' and ', @declarations ); } else { $return = $declarations[0]; } } struct_declaration : comment(?) specifier_qualifier_list struct_declarator_list ';' { $return = join('',@{$item{comment}}). $item{specifier_qualifier_list} . ' ' . $item{struct_declarator_list}; # print STDERR "struct_declaration $return\n"; } #current drudge type_name : specifier_qualifier_list abstract_declarator(?) { $return = $item{specifier_qualifier_list} . join('',@{$item{abstract_declarator}}); print STDERR "type_name $return "; } specifier_qualifier_list : type_specifier specifier_qualifier_list(?) { $return = $item{type_specifier} . join('',@{$item{specifier_qualifier_list}} ); # print STDERR "specifier_qualifier_list $return "; } struct_declarator_list : struct_declarator | struct_declarator ',' struct_declarator_list { $return = $item{struct_declarator} . join('',@{$item{struct_declarator_list}}); } struct_declarator : declarator | declarator(?) ':' constant_expression { $return = join('',@{$item{declarator}}) . " which is set off the bit field $item{constant_expression}"; } struct_or_union : comment(?) ( 'struct' { $return = 'structure'; } | 'union' ) comment(?) { shift @item; foreach (@item) { if (ref($_)) { $return .= join('',@{$_}); } else { $return .= $_; } } } enum_specifier: 'enum' identifier(?) '{' enumerator_list '}' { $return = 'enumeration ' ; if (@{$item{identifier}}){ $return .= 'identified as ' . join('',@{$item{identifier}}) . ' '; } $return .= 'comprising of ' . $item{enumerator_list} ; } | 'enum' identifier enumerator_list : (enumerator ',')(s?) enumerator { my @enumerator_list = @{$item[1]}; if ($#enumerator_list > 1) { $return = join(', ', @enumerator_list) . ', and ' . $item{enumerator}; } elsif ( $#enumerator_list == 1 ) { $return = $enumerator_list[1] . ' and ' . $item{enumerator}; } else { $return = $item{enumerator_declaration}; } } enumerator : identifier ( '=' constant_expression )(?) { $return = $item[1]; if (@{$item[-1]}) { $return .= 'marking ' . join('', @{$item[-1]}); } } short : 'short' { $return = 'two byte integer'; } long : 'long' { $return = 'eight byte integer'; } auto : 'auto' { $return = "(high priority variable)"; } register : 'register' { $return = "(should remain in the register)"; } float : 'float' { $return = 'floating point number'; } comment : comment_c {$return = $item{comment_c}; } | comment_cxx {$return = $item{comment_cxx}; } comment_c : m{/\*(.*?)\*/}s # /\/\**?\*\// { $return = $item[1]; $return =~ s/^\/\*//; $return =~ s/\*\/$//; $return =~ s/\"/\\\"/g; # reverse script issue number one. $return = "\nThe author adds this comment here:\n\"" . $return . "\"\nNow back to the code.\n"; # print STDERR "$return comment here."; } comment_cxx : m{\/\/(.*?)\n} { $return = $item[1]; $return =~ s/^\/\///; $return = "\nThe author adds this quick comment here:\n" . $return . "\nNow back to the code.\n"; } identifier_word : / [a-z_] # LEADING ALPHA OR UNDERSCORE [a-z0-9_]* # THEN DIGITS ALSO ALLOWED /ix # CASE/SPACE/COMMENT INSENSITIVE # Note: in my opinion, exit deserves special treatment in the script. # then again, I hope to do printf parsing in the script # at some point in the future. # reserved: 'int' | 'double' | 'short' | 'volatile' | 'register' | 'float' | 'signed' | 'unsigned' | 'char' | 'for' | 'if' | 'switch' | 'while' | 'do' | 'case' | 'extern' | 'void' | 'exit' | 'return' | 'auto' | 'break' | 'const' | 'continue' | 'default' | 'else' | 'enum' | 'struct' | 'goto' | 'long' | 'register' | 'sizeof' | 'static' | 'typedef' | "union" ] ; # the actual execution instructions. print STDERR "Looking at this fool grammar.\n"; # # This may be necessary.. # $::RD_AUTOACTION = q { [@item] }; $parser = new Parse::RecDescent ($Grammar) or die "Bad grammar!\n"; my $arg; if ($opt_o) { open(OUTFILE, ">>$opt_o"); *STDOUT = *OUTFILE{IO}; } foreach $arg (@ARGV) { print STDERR "Opening file. $arg\n"; open(CFILE,"$arg") or die "Could not open $arg.\n"; while () { $text .= $_; } close(CFILE); print STDERR "parsing...\n"; # for debugging... if ($opt_t) { $::RD_TRACE = 1; } else { undef $::RD_TRACE ; } print "This file was generated by DECSS: Descriptive English for C Statements and Subroutines.\n"; print "The C source file for it was called '$ARGV[0]'.\n\n\n"; defined $parser->startrule0(\$text) or die "Bad text!".substr( $text,0,200)."\n"; print "----\n"; } print substr($text,0,100);