Viewing file: soen229/math2.pl | Back to directory listing
Author: Loren Segal | Last modified: February 20 2006 07:00 pm | Download

#!/usr/bin/perl -w
# @ Author: Loren Segal
# @ Date: March 2005
#
# MATH - Arithmetic parser and calculator
#
# Usage: ./math.pl [expression]
#        OR perl math.pl [expression]
# (If an expression is not given in command line, it will be prompted via the script)
#
# Description:
# This program was designed to parse a math expression (text format) as a binary tree with
# operator precedence. It supports a symbol file with defined constants, as long as they are
# listed in the following format:
#
# NAME: value=NUMBER
#
# Where NAME must be an identifier ([a-z_]\w*) and NUMBER must be a strict number: \d+(\.\d+)?
#
use strict;
 
## Global constants and variables
my $IDENTIFIER = qr/[a-z_]\w*/i;                # Identifier match
my $NUMBER = qr/\d*\.?\d*/;                   # Number match, error checking is needed when using this
my $STRICTNUMBER = qr/\d+(?:\.\d+)?/;         # Strict number match
my $VALUE = qr/$IDENTIFIER|$NUMBER/;            # Value match, this is a number or identifier
my $OPERATOR = qr/[\+\-\/\*]/;                  # Operators, +-/*
my %MATHTABLE = (                               # Math Table is a list of precedence along with the operator type
        '+' => { NAME => 'add', VALUE => 1 },
        '-' => { NAME => 'sub', VALUE => 2 },
        '*' => { NAME => 'mul', VALUE => 3 },
        '/' => { NAME => 'div', VALUE => 4 }
);
my %SYMTAB = ();
my $ERROR_FLAG = 0;                             # Keep track of syntax errors, 0 = clean expression, 1 = error
## End globals
 
## Prototypes
# Arithmetic prototypes
sub addNode($$$$);
sub insertNode($$);
sub collapseTree($);
sub main($);
sub lex($);
sub syntax_error($);
# Symbol table prototypes
sub init_symtable($);
sub lookup_symbol();
## End prototypes
 
## MAIN PROGRAM SUBROUTINE (called at the end of source)
sub main($)
{
        my($math) = @_;
        $| = 1;                                 # Auto flush the output (some print statements in main get outputted after,
                                                #                        for some reason).
        init_symtable("symbolfile.txt");        # Load the symbol file
        # Stop if we have symbol table errors
        die("% Found symbol table error(s), cannot compute expression.\n") if $ERROR_FLAG;
        if ($math eq '')                        # Take user input if nothing was
        {                                       # entered via command line.
                print "Enter math formula: ";
                $math = <STDIN>;
        }
        else { print "Math operation: $math\n"; }       # Otherwise print the statement being performed
        $math =~ s/\s*//g;                              # Get rid of spaces, they are useless.
        my $answer = do_math($math);                    # Call the main math function
        print "\nAnswer: $answer\n";                    # Spit out the answer to the screen.
}
## END MAIN
 
## Function: init_symtable(FILENAME)
#
## Description: Opens the symbol file and adds the value key to the symbol table for the name
#
sub init_symtable($)
{
        local $_;
        my $line = 0;
        # No need for an individual init_symtable() function that only calls open(), we can do it here
        open(SYMTAB, $_[0]) or die("% Critical Error: could not open symbol file '$_[0]': $!\n");
        # Parse the symbol file
        for (<SYMTAB>)
        {
                chomp;
                next if (!$_ || m/^\s*#.*/);                            # ignore commented and empty lines
                                                                        # comments start with #
                $line++;                                                # keep track of line number for errors
                my ($name) = m/^\s*($IDENTIFIER):/;                     # Get the name at the beginning of the line
                my ($value) = m/(?:\s|:)value=($STRICTNUMBER)(?:\s|$)/; # Find the value= key, that's the only one we need
                if (!$name || !$value)
                {
                        # Give an error if a line of the symbol table file is corrupted or improperly defined.
                        my $reason;
                        if ($name && !$value) { $reason = "missing value key"; }
                        else { $reason = "illegal definition syntax"; }
                        syntax_error("% Error: symbol table has invalid definition at line $line: $reason");
                }
                $SYMTAB{$name} = $value;        # Add the symbol directly to symtab,
                                                # no need for a function in this simple case
        }
        close(SYMTAB);
}
 
## Function: addNode(NODE, VALUE, LEFT_VALUE, RIGHT_VALUE)
#
## Description: Creates a node reference with a value which can then be attached
#               to a tree. Also makes a left and right child if they are defined.
#
sub addNode($$$$)
{
        my ($node, $value, $left, $right) = @_;
        $node->{VALUE} = $value;
        $node->{LEFT}{VALUE} = $left if (defined($left));
        $node->{RIGHT}{VALUE} = $right if (defined($right));
        return $node;
}
 
## Function: insertNode(NODE, VALUE)
#
## Description: Inserts a node inside a tree by replacing the node with a value and setting
#               the old node as a left child of the new one.
#
sub insertNode($$)
{
        my($node, $value) = @_;
        my %tmpnode = %$node;
        addNode($node, $value, undef, undef);
        $node->{LEFT} = \%tmpnode;
}
 
## Function: collapseTree(ROOT)
#
## Description: Resursively travels through the tree starting from root and performs the mathematical
#               operations when it encounters a operator node with two number children. It then stores
#               this calculated value as the new node value and collapses the children. This recursive
#               collapsing creates an end result of the final value being collapsed into the root of the
#               tree.
#
sub collapseTree($)
{
        my($node,$collapse) = @_;
        # Recursive traversal to the end of the tree, collapse all left and right children if we have any
        collapseTree($node->{LEFT}) if ($node->{LEFT});
        collapseTree($node->{RIGHT}) if ($node->{RIGHT});
        return if (!defined($node->{VALUE}));
        # If we're here we've reached a bottom node that is non-empty
        if (defined($MATHTABLE{$node->{VALUE}}{NAME}))
        {
                # For each operator type, perform the respective math function. i'll steer away from eval()
                # just because in a case like this it might be considered a 'cheat'. The point here is to
                # replace the current node's value (which is an operator if we reached this code) with the value
                # of left node, operator, right node (left times right, left divided by right, etc.)
                my ($l,$r,$op) = ($node->{LEFT}{VALUE}, $node->{RIGHT}{VALUE}, $node->{VALUE});
                if ($op eq '*') { $node->{VALUE} = $l * $r; }
                if ($op eq '/') { $node->{VALUE} = $l / $r; }
                if ($op eq '+') { $node->{VALUE} = $l + $r; }
                if ($op eq '-') { $node->{VALUE} = $l - $r; }
                # Collapse the children, for memory purposes
                $node->{LEFT} = undef;
                $node->{RIGHT} = undef;
        }
}
 
## Function: do_math(MATH)
#
## Description: This subroutine basically does nothing but builds a tree from the string input
#               and collapses it, returning the answer which is located at the root node after
#               the tree has been collapsed. This is just an interface to using the tree subs in
#               a more intuitive fashion.
#
sub do_math($)
{
        # Perform the lexical analysis on the expression
        my $root = lex($_[0]);
        # Stop if there are errors from the lexical analysis
        die("% Found syntax errors, cannot compute expression.\n") if $ERROR_FLAG;
        # Otherwise collapse the tree and return the root value
        collapseTree($root);
        return $root->{VALUE};
}
 
## Function: lex(EXPRESSION)
#
## Description: This subroutine will analyze a mathematical expression and bust it up into a binary
#               tree based on operator precedence. If an operator has equal or lower precedence than
#               the previous operator, it will be inserted above the operator with higher precedence in
#               the tree. This will guarantee that a bottom up node traversal will compute the operations
#               with highest precedence first.
#
sub lex($)
{
        local ($_) = @_;
        my $root = {};
        my $node = $root;
        my $prevnode = undef;
        my $rest = undef;
        my $last;
 
        # Basic syntax check: look for consecutive operators in a row
        while (s/($OPERATOR)$OPERATOR+/$1/)
        {
                syntax_error("% Syntax error at term '$&': found ".length($&)." consecutive operators.");
        }
        # Basic syntax check: We also don't want operators at the beginning or end
        while (s/^$OPERATOR// || s/$OPERATOR$//)
        {
                syntax_error("% Syntax error at term '$&': invalid operator at beginning/end of expression");
        }
        # Main lexical analysis loop
        while (!($_ eq ''))
        {
                if (m/^($VALUE)($OPERATOR)?/)
                {
                        $rest = $'; #'
                        my($expr, $op) = ($1, $2);
 
                        # Error checking
                        if ($expr =~ m/^$IDENTIFIER$/)  # Check for identifier
                        {
                                # Don't allow NUMBER followed by IDENTIFIER or vice versa
                                if ($last && $last =~ m/^$VALUE$/)
                                {
                                        syntax_error("% Syntax error at '$last$expr': ambiguous expression format or invalid symbol/value.");
                                }
                                # If the symbol is not defined then we have an error
                                # (a lookup_symbol() function is not necessary, defined() does the work we need.)
                                syntax_error("% Syntax error at term '$expr': undefined symbol value.") if (!defined($SYMTAB{$expr}));
 
                                $expr = $SYMTAB{$expr};         # Othwerwise replace the symbol with its value
                        }
                        elsif ($expr eq '.')
                        {
                                # Number match allows for a single ., but it is not considered an 'invalid floating point format'
                                syntax_error("% Syntax error at term '.': invalid character.");
                        }
                        elsif ($expr =~ m/^$NUMBER$/ && $expr =~ m/^\./ || $expr =~ m/\.$/) # Check for number
                        {
                                # This number began or ended with '.', which is illegal according to the requirements.
                                # So we have to generate an error message.
                                syntax_error("% Syntax error at term '$expr': invalid floating point format.");
                        }
 
                        if ($op) # The match had an operator following the value
                        {
                                if (!defined($node->{PARENT}{VALUE})
                                        || $MATHTABLE{$op}{VALUE} > $MATHTABLE{$node->{PARENT}{VALUE}}{VALUE})
                                {
                                        # In this case, the operator had higher precendence than the last,
                                        # and therefore must be added below the last node.
                                        addNode($node, $op, $expr, undef);
                                }
                                else
                                {
                                        # If the operator has a lower (or equal) precendence than the last operator,
                                        # we must float up the tree until the condition is no longer met. I call this
                                        # the buoyancy effect.
                                        $node->{VALUE} = $expr;
                                        while ($node->{PARENT}{VALUE}
                                                && $MATHTABLE{$op}{VALUE} <= $MATHTABLE{$node->{PARENT}{VALUE}}{VALUE})
                                        {
                                                $node = $node->{PARENT};        # Rise above parent node...
                                        }
                                        # We can finally add the node, but we have to insert it before the current node
                                        # So we need a special function that will re-add the current node as a left child
                                        insertNode($node, $op);
                                }
                                $last = $op; # Our last token was an operator
                        }
                        else
                        {
                                # If all we matched was a number (and the end of the line), stick it into the current
                                # nodes value. This will most likely take place in a right child, though it is at the
                                # root if the input is solely a number.
                                $node->{VALUE} = $expr;
                                $last = $expr; # Last token was a number
                        }
                }
                else
                {
                        # Found no valid tokens (not number, operator or identifier)
                        syntax_error("% Syntax error at character '".substr($_,0,1)."': invalid character.\n");
                }
                $_ = $rest;                     # Goto next token
                $prevnode = $node;              # Keep track of the last node
                $node->{RIGHT} = {};            # Create a new node from the current node's right leg
                $node = $node->{RIGHT};         # Change current node to right leg
                $node->{PARENT} = $prevnode;    # Mark the parent node
        }
        return $root;
}
 
## Function: syntax_error(MESSAGE)
#
## Description: Reports a syntax error and sets the error flag. Does not halt execution
#
sub syntax_error($)
{
        print STDERR $_[0]."\n";
        $ERROR_FLAG = 1;
}
 
## BEGIN PROGRAM LISTING
# Call main with command line as parameter
main(join("", @ARGV));
## END LISTING