#!/usr/bin/perl -w
# @ Author: Loren Segal
# @ Date: March 2005
#
# MATH - Arithmetic parser and calculator
#
# Usage: ./math.pl [expression]
# OR perl math.pl [expression]
# (If an expression is not given in command line, it will be prompted via the script)
#
# Description:
# This program was designed to parse a math expression (text format) as a binary tree with
# operator precedence. It supports a symbol file with defined constants, as long as they are
# listed in the following format:
#
# NAME: value=NUMBER
#
# Where NAME must be an identifier ([a-z_]\w*) and NUMBER must be a strict number: \d+(\.\d+)?
#
use strict;
## Global constants and variables
my $IDENTIFIER = qr/[a-z_]\w*/i; # Identifier match
my $NUMBER = qr/\d*\.?\d*/; # Number match, error checking is needed when using this
my $STRICTNUMBER = qr/\d+(?:\.\d+)?/; # Strict number match
my $VALUE = qr/$IDENTIFIER|$NUMBER/; # Value match, this is a number or identifier
my $OPERATOR = qr/[\+\-\/\*]/; # Operators, +-/*
my %MATHTABLE = ( # Math Table is a list of precedence along with the operator type
'+' => { NAME => 'add', VALUE => 1 },
'-' => { NAME => 'sub', VALUE => 2 },
'*' => { NAME => 'mul', VALUE => 3 },
'/' => { NAME => 'div', VALUE => 4 }
);
my %SYMTAB = ();
my $ERROR_FLAG = 0; # Keep track of syntax errors, 0 = clean expression, 1 = error
## End globals
## Prototypes
# Arithmetic prototypes
sub addNode($$$$);
sub insertNode($$);
sub collapseTree($);
sub main($);
sub lex($);
sub syntax_error($);
# Symbol table prototypes
sub init_symtable($);
sub lookup_symbol();
## End prototypes
## MAIN PROGRAM SUBROUTINE (called at the end of source)
sub main($)
{
my($math) = @_;
$| = 1; # Auto flush the output (some print statements in main get outputted after,
# for some reason).
init_symtable("symbolfile.txt"); # Load the symbol file
# Stop if we have symbol table errors
die("% Found symbol table error(s), cannot compute expression.\n") if $ERROR_FLAG;
if ($math eq '') # Take user input if nothing was
{ # entered via command line.
print "Enter math formula: ";
$math = <STDIN>;
}
else { print "Math operation: $math\n"; } # Otherwise print the statement being performed
$math =~ s/\s*//g; # Get rid of spaces, they are useless.
my $answer = do_math($math); # Call the main math function
print "\nAnswer: $answer\n"; # Spit out the answer to the screen.
}
## END MAIN
## Function: init_symtable(FILENAME)
#
## Description: Opens the symbol file and adds the value key to the symbol table for the name
#
sub init_symtable($)
{
local $_;
my $line = 0;
# No need for an individual init_symtable() function that only calls open(), we can do it here
open(SYMTAB, $_[0]) or die("% Critical Error: could not open symbol file '$_[0]': $!\n");
# Parse the symbol file
for (<SYMTAB>)
{
chomp;
next if (!$_ || m/^\s*#.*/); # ignore commented and empty lines
# comments start with #
$line++; # keep track of line number for errors
my ($name) = m/^\s*($IDENTIFIER):/; # Get the name at the beginning of the line
my ($value) = m/(?:\s|:)value=($STRICTNUMBER)(?:\s|$)/; # Find the value= key, that's the only one we need
if (!$name || !$value)
{
# Give an error if a line of the symbol table file is corrupted or improperly defined.
my $reason;
if ($name && !$value) { $reason = "missing value key"; }
else { $reason = "illegal definition syntax"; }
syntax_error("% Error: symbol table has invalid definition at line $line: $reason");
}
$SYMTAB{$name} = $value; # Add the symbol directly to symtab,
# no need for a function in this simple case
}
close(SYMTAB);
}
## Function: addNode(NODE, VALUE, LEFT_VALUE, RIGHT_VALUE)
#
## Description: Creates a node reference with a value which can then be attached
# to a tree. Also makes a left and right child if they are defined.
#
sub addNode($$$$)
{
my ($node, $value, $left, $right) = @_;
$node->{VALUE} = $value;
$node->{LEFT}{VALUE} = $left if (defined($left));
$node->{RIGHT}{VALUE} = $right if (defined($right));
return $node;
}
## Function: insertNode(NODE, VALUE)
#
## Description: Inserts a node inside a tree by replacing the node with a value and setting
# the old node as a left child of the new one.
#
sub insertNode($$)
{
my($node, $value) = @_;
my %tmpnode = %$node;
addNode($node, $value, undef, undef);
$node->{LEFT} = \%tmpnode;
}
## Function: collapseTree(ROOT)
#
## Description: Resursively travels through the tree starting from root and performs the mathematical
# operations when it encounters a operator node with two number children. It then stores
# this calculated value as the new node value and collapses the children. This recursive
# collapsing creates an end result of the final value being collapsed into the root of the
# tree.
#
sub collapseTree($)
{
my($node,$collapse) = @_;
# Recursive traversal to the end of the tree, collapse all left and right children if we have any
collapseTree($node->{LEFT}) if ($node->{LEFT});
collapseTree($node->{RIGHT}) if ($node->{RIGHT});
return if (!defined($node->{VALUE}));
# If we're here we've reached a bottom node that is non-empty
if (defined($MATHTABLE{$node->{VALUE}}{NAME}))
{
# For each operator type, perform the respective math function. i'll steer away from eval()
# just because in a case like this it might be considered a 'cheat'. The point here is to
# replace the current node's value (which is an operator if we reached this code) with the value
# of left node, operator, right node (left times right, left divided by right, etc.)
my ($l,$r,$op) = ($node->{LEFT}{VALUE}, $node->{RIGHT}{VALUE}, $node->{VALUE});
if ($op eq '*') { $node->{VALUE} = $l * $r; }
if ($op eq '/') { $node->{VALUE} = $l / $r; }
if ($op eq '+') { $node->{VALUE} = $l + $r; }
if ($op eq '-') { $node->{VALUE} = $l - $r; }
# Collapse the children, for memory purposes
$node->{LEFT} = undef;
$node->{RIGHT} = undef;
}
}
## Function: do_math(MATH)
#
## Description: This subroutine basically does nothing but builds a tree from the string input
# and collapses it, returning the answer which is located at the root node after
# the tree has been collapsed. This is just an interface to using the tree subs in
# a more intuitive fashion.
#
sub do_math($)
{
# Perform the lexical analysis on the expression
my $root = lex($_[0]);
# Stop if there are errors from the lexical analysis
die("% Found syntax errors, cannot compute expression.\n") if $ERROR_FLAG;
# Otherwise collapse the tree and return the root value
collapseTree($root);
return $root->{VALUE};
}
## Function: lex(EXPRESSION)
#
## Description: This subroutine will analyze a mathematical expression and bust it up into a binary
# tree based on operator precedence. If an operator has equal or lower precedence than
# the previous operator, it will be inserted above the operator with higher precedence in
# the tree. This will guarantee that a bottom up node traversal will compute the operations
# with highest precedence first.
#
sub lex($)
{
local ($_) = @_;
my $root = {};
my $node = $root;
my $prevnode = undef;
my $rest = undef;
my $last;
# Basic syntax check: look for consecutive operators in a row
while (s/($OPERATOR)$OPERATOR+/$1/)
{
syntax_error("% Syntax error at term '$&': found ".length($&)." consecutive operators.");
}
# Basic syntax check: We also don't want operators at the beginning or end
while (s/^$OPERATOR// || s/$OPERATOR$//)
{
syntax_error("% Syntax error at term '$&': invalid operator at beginning/end of expression");
}
# Main lexical analysis loop
while (!($_ eq ''))
{
if (m/^($VALUE)($OPERATOR)?/)
{
$rest = $'; #'
my($expr, $op) = ($1, $2);
# Error checking
if ($expr =~ m/^$IDENTIFIER$/) # Check for identifier
{
# Don't allow NUMBER followed by IDENTIFIER or vice versa
if ($last && $last =~ m/^$VALUE$/)
{
syntax_error("% Syntax error at '$last$expr': ambiguous expression format or invalid symbol/value.");
}
# If the symbol is not defined then we have an error
# (a lookup_symbol() function is not necessary, defined() does the work we need.)
syntax_error("% Syntax error at term '$expr': undefined symbol value.") if (!defined($SYMTAB{$expr}));
$expr = $SYMTAB{$expr}; # Othwerwise replace the symbol with its value
}
elsif ($expr eq '.')
{
# Number match allows for a single ., but it is not considered an 'invalid floating point format'
syntax_error("% Syntax error at term '.': invalid character.");
}
elsif ($expr =~ m/^$NUMBER$/ && $expr =~ m/^\./ || $expr =~ m/\.$/) # Check for number
{
# This number began or ended with '.', which is illegal according to the requirements.
# So we have to generate an error message.
syntax_error("% Syntax error at term '$expr': invalid floating point format.");
}
if ($op) # The match had an operator following the value
{
if (!defined($node->{PARENT}{VALUE})
|| $MATHTABLE{$op}{VALUE} > $MATHTABLE{$node->{PARENT}{VALUE}}{VALUE})
{
# In this case, the operator had higher precendence than the last,
# and therefore must be added below the last node.
addNode($node, $op, $expr, undef);
}
else
{
# If the operator has a lower (or equal) precendence than the last operator,
# we must float up the tree until the condition is no longer met. I call this
# the buoyancy effect.
$node->{VALUE} = $expr;
while ($node->{PARENT}{VALUE}
&& $MATHTABLE{$op}{VALUE} <= $MATHTABLE{$node->{PARENT}{VALUE}}{VALUE})
{
$node = $node->{PARENT}; # Rise above parent node...
}
# We can finally add the node, but we have to insert it before the current node
# So we need a special function that will re-add the current node as a left child
insertNode($node, $op);
}
$last = $op; # Our last token was an operator
}
else
{
# If all we matched was a number (and the end of the line), stick it into the current
# nodes value. This will most likely take place in a right child, though it is at the
# root if the input is solely a number.
$node->{VALUE} = $expr;
$last = $expr; # Last token was a number
}
}
else
{
# Found no valid tokens (not number, operator or identifier)
syntax_error("% Syntax error at character '".substr($_,0,1)."': invalid character.\n");
}
$_ = $rest; # Goto next token
$prevnode = $node; # Keep track of the last node
$node->{RIGHT} = {}; # Create a new node from the current node's right leg
$node = $node->{RIGHT}; # Change current node to right leg
$node->{PARENT} = $prevnode; # Mark the parent node
}
return $root;
}
## Function: syntax_error(MESSAGE)
#
## Description: Reports a syntax error and sets the error flag. Does not halt execution
#
sub syntax_error($)
{
print STDERR $_[0]."\n";
$ERROR_FLAG = 1;
}
## BEGIN PROGRAM LISTING
# Call main with command line as parameter
main(join("", @ARGV));
## END LISTING Powered by
GeSHi Syntax Highlighting software.
Author of all (other) material unless otherwise specified:
Loren Segal. Copyright 2005.