Viewing file: soen229/tcompile.pl | Back to directory listing
Author: Loren Segal | Last modified: February 20 2006 07:00 pm | Download

#!/usr/bin/perl -w
# @ Author: Loren Segal
# @ Date: March 2005
#
# DESCRIPTION:
# ------------
# This is the final version of the T compiler for the SOEN229 Team Project. This script will generate output T assembly code for a designated
# simulator script depending on the given source input. Usage of the script is as follows:
#
#       perl tcompile.pl <source_file.t>
#               -- OR --
#       ./tcompile.pl <source_file.t>
#
# The script will generate the file source_file.tsm (where source_file is the name of the input file minus the extension)
#
# Also keep in mind that the symbol file must be in the same directory as the compiler under the name 'symbolfile.txt'
#
#
use strict;     # strict programming style
 
# General functions
sub main();
sub usage();
sub tsm_print($);
sub tsm_buffer_print;   # takes variable length arguments
# Symbol table functions
sub init_symtable($);
sub load_symtable();
sub lookup_symbol($);
# Code parsing and parse_ functions
sub code_parse($);
sub syntax_error($);
sub expect_error($$);
sub parse_expr($);
sub parse_identifier($);
sub parse_assignment($);
sub parse_seperator($);
sub parse_newline($);
sub parse_statement();
#################
## Math module ##
#################
sub math_addNode($$$$);
sub math_insertNode($$);
sub math_collapseTree($);
sub math_lex($);
sub do_math($);
 
###
## GLOBAL VARIABLES
###
my(%SYMTAB);                    # Global Symbol Table
my(%PARSETABLE) = (
  'IDENTIFIER' => {
    'regex'    => qr/[a-z_]\w*/i,
    'callback' => \&parse_identifier
  },
  'UNSIGNED_VALUE' => {
    'regex'    => qr/\d+(?:\.\d+)?/,
    'callback' => \&parse_expr
  },
  'END_STATEMENT' => {
    'regex'    => qr/;|$/,
    'callback' => \&parse_seperator
  },
  'NEWLINE' => {
    'regex'    => qr/\r?\n/,
    'callback' => \&parse_newline
  },
  'ASSIGNMENT' => {
    'regex'    => qr/=/,
    'callback' => \&parse_assignment
  },
  'ARITHMETIC' => {
    'regex'    => qr/[\(\)\+\-\*\/]/,
    'callback' => \&parse_expr
  }
);
my $STATE = {};                                 # STATE keeps track of various statement values
my @TSM_BUFFER;                                 # All TSM statements of the source file
my $LINE_NUMBER = 1;                            # Keep track of line numbers
my $ERROR_FLAG = 0;                             # Keep track of syntax errors, 0 = clean expression, 1 = error
my $IDENTIFIER = qr/[a-z_]\w*/i;                # Identifier match
my $NUMBER = qr/\d*\.?\d+/i;                    # Number match, error checking is needed when using this
my $STRICTNUMBER = qr/\d+(?:\.\d+)?/;           # Strict number match
my $VALUE = qr/$IDENTIFIER|$NUMBER/;            # Value match, this is a number or identifier
my $OPERATOR = qr/[+-\/*]/;                     # Operators, +-/*
my $BRACKET = qr/\(\s*(?:$VALUE\s*|$OPERATOR\s*|[\(\)]\s*)+?\s*\)/;
my $EXPR = qr/$VALUE|$BRACKET/;
my %MATHTABLE = (
        '+' => { NAME => 'add', VALUE => 1 },
        '-' => { NAME => 'sub', VALUE => 2 },
        '*' => { NAME => 'mul', VALUE => 3 },
        '/' => { NAME => 'div', VALUE => 4 }
);
 
sub main()
{
        my $file = $ARGV[0] or usage();
        init_symtable("symbolfile.txt");                                # Load the symbol file
        load_symtable();                                                # Load symbol values
        code_parse($file);                                              # Parse the input source file
        tsm_buffer_print($file.($file =~ /\.t$/ ? 'sm' : '.tsm'));      # Print buffer to .tsm output file
        tsm_buffer_print();                                             # Print buffer to screen for debugging
}
 
## Function: usage()
#
## Description: lists the required parameters to use the script file and quits
#
sub usage()
{
        die  "usage: perl tcompile.pl <source_file.t>\n";
}
 
####
### SYMBOL TABLE FUNCTIONS
####
 
## Function: init_symbtable(FILENAME)
#
## Description: Opens the symbol table for reading under the filehandle SYMTAB
#
sub init_symtable($)
{
  open(SYMTAB, $_[0]) or die("% Error: could not open file '$_[0]': $!\n");
}
 
## Function: load_symtable()
#
## Description: Adds all of the key/value pairs to their
#               appropriate symbol name in the SYMTAB.
#
sub load_symtable()
{
  local $_;
  my($line, $valid) = (0, 1);
  for (<SYMTAB>)
  {
    chomp;
    next if (!$_ || m/^\s*#.*/);          # ignore commented and empty lines
                                          # -- NOT required by assignment.
    $line++;
    my($name) = m/^\s*(\w+):\s*\w+=\w+/;  # matches-> NAME: *=* ...
    my(@matches) = m/(\w*)=(\w*)/g;       # matches-> *=*
    $valid = 1;
    for(my($i) = 0; $i <= $#matches; $i++)
    {
      if ($matches[$i] eq '') { $valid = 0; last; }
    }
    if (!$name || !$valid)
    {
      # Give an error if a line of the symbol table file is corrupted or
      # improperly defined.
      die("% Error: symbol table has invalid definition at line $line.\n");
    }
    # Add the symbol- it is valid.
    add_symbol($name, @matches);
  }
  close(SYMTAB);
}
 
## Function: add_symbol(NAME, HASH_VALUES)
#
## Description:
# Adds a symbol NAME to the SYMTABLE along with its key/value pairs defined by
# HASH_VALUES
#
sub add_symbol($@)
{
  my($name, %symbols) = @_;
  return if (!values(%symbols));
  $symbols{count} = 0;       # Force count to be initialized at 0,
                             # even if it is not declared or set as something
                             # else in the symbol file, which is not legal.
  $SYMTAB{$name} = \%symbols;
}
 
## Function: lookup_symbol(NAME)
#
## Description:
# Looks up a symbol in SYMTAB and returns true or false (undef) if it exists
# or not, respectively.
#
sub lookup_symbol($)
{
        return (defined($SYMTAB{$_[0]}) ? !undef : undef);
}
 
 
## Function: code_parse(FILENAME)
#
## Description:
# This is the true heart of the program. All of the lexical analysis is done
# here. This function will make use of the %PARSETABLE along with all of its
# grammar definitions and callback functions. This function will also
# automatically report all recognized tokens unless otherwise specified.
#
sub code_parse($)
{
        local $_;
        my($file, $found_match) = @_;   # file is arg1, found_match will not be set
 
        # Open file, read into one big scalar, and close
        open(CODE, $file) or die "% Error: could not open file '$file': $!\n";
        $_ = join "", <CODE>;
        close CODE;
 
        $LINE_NUMBER = 1;
        # Basic syntax check, cannot begin code with a seperator
        if (s/^\s+;//)
        {
                syntax_error("Invalid statement seperator at beginning of file.");
        }
        # Main code parsing loop
        while ($_ ne '')
        {
                $found_match = 0;
                while (my($name, $values) = each(%PARSETABLE))
                {
                        s/^([ \t]*)//;                  # Get rid of extraneous spaces.
                        $STATE->{space} = $1 if ($1);
                        last if (!defined($_));         # If we removed the last of $_, leave.
                        my($re, $callback) = ($values->{regex}, $values->{callback});
                        if (m/^$re/i) {
                                $found_match = 1;
                                $STATE->{token} = $&;   # update token in STATE
 
                                # Get the current symbol type (update name if it's a keyword)
                                # and check if it throws an expect_error()
                                if (lookup_symbol($&) && $SYMTAB{$&}{type} eq 'keyword')
                                {
                                        $name = $SYMTAB{$&}{subtype};
                                }
                                # throw a possible expect_error()
                                expect_error($STATE->{next},$name);
                                $STATE->{next} = '' unless ($name eq 'NEWLINE');
 
                                # Call callback function for any data handling.
                                &$callback($&) if ($callback && defined(&$callback));
 
                                # Log syntax state changes / update current syntax state
                                $STATE->{last_token} = $STATE->{token} unless ($name eq 'NEWLINE');
 
                                $_ = $';  #' Goto next token
                        }
                }
                # A character/token was found that is not in the grammar of the language.
                if (!$found_match && $_)
                {
                        s/\S+(?=;|$)|\S+//;
                        syntax_error("Invalid token in context: '$&'");
                }
        }
        # throw a possible expect_error() for the end of the file
        expect_error($STATE->{next}, "END_OF_FILE");
        # there is still a statement to parse, parse it
        if ($STATE->{type} || $STATE->{next})
        {
                parse_statement();
        }
        # If we the child of a parent's $STATE, then we are in an unclosed if/while block
        while ($STATE->{parent}{type})
        {
                syntax_error("Premature EOF found: $STATE->{parent}{type} was not closed.");
                $STATE = $STATE->{parent};
        }
        # Print the end of assembly code
        tsm_print("halt\n");
}
 
## Function: expect_error(REASON)
#
## Description:
# This function is meant to print more intuitive error messages for the 'expecting' error catching system.
# If the token found was not the type token that was expected by the last token, the parser will call this function.
# Multiple token types can be expected. Order with multiple tokens, however, is important. Only the first token type
# will be printed as being expected- this is for clarity purposes. Also because one token is usually more expected than another
# depending on the situation (endif is more expected than an else inside an if block).
#
sub expect_error($$)
{
        my($expecting, $found) = @_;
        return if (!$expecting); # not expecting anything, leave
 
        # Replace common names with their definitions
        # Common names are: EXPRESSION, STATEMENT, END_STATEMENT
        $expecting =~ s/(EXPRESSION)/$1,IDENTIFIER,ARITHMETIC,UNSIGNED_VALUE/;
        $expecting =~ s/(STATEMENT)/$1,IDENTIFIER,IF_BEGIN,IF_ELSE,WHILE_BEGIN/;
        $expecting =~ s/(END_STATEMENT)/$1,END_OF_FILE/;
 
        # Check if the matched token is part of the expected types
        if (!$expecting || grep(/(?:^|,)$found(?:$|,)/i, "$expecting,NEWLINE"))
        {
                # The token we found is what we were expecting it to be
                return;
        }
        elsif (grep(/^ASSIGNMENT,.+/,$expecting))
        {
                # Sit this error out, do_math will catch it with much more precision
                syntax_error("Variable '$STATE->{last_token}' used without having been declared.");
        }
        else
        {
                # The token we found is not part of the expected types, throw the error
                $expecting =~ s/,.+//;
                $expecting =~ s/IDENTIFIER/statement/;
                syntax_error("Expecting $expecting, found $found".  ($STATE->{token} && $STATE->{token} =~ m/^$IDENTIFIER$/ ? ": '$STATE->{token}'" : ""));
 
                # If we're throwing an error for an some form of statement ending we have
                # to manually leave the if/while block because it won't be handled by parse_statement
                if ($STATE->{parent}{type} && (($found eq 'IF_END' && $STATE->{parent}{type} eq 'IF_BLOCK') ||
                                               ($found eq 'WHILE_END' && $STATE->{parent}{type} eq 'WHILE_BLOCK')))
                {
                        $STATE = $STATE->{parent};
                }
        }
}
 
## Function: syntax_error(REASON)
#
## Description:
# This function will throw an error and halt output of the T assembly file as well as halt error output until
# the next statement.
#
sub syntax_error($)
{
        print STDERR "% Parse error on line $LINE_NUMBER: $_[0]\n" if (!$STATE->{ERROR});
        #if (!$ERROR_FLAG) { tsm_buffer_print(); }      # DEBUGGING ONLY
        $ERROR_FLAG = 1;
        $STATE->{ERROR} = 1;
}
 
 
## Function: tsm_print(DATA)
#
## Description:
# This function prints the assembled T code to the output buffer
# but makes sure there were no previous errors in the code.
#
sub tsm_print($)
{
        push @TSM_BUFFER, $_[0] if (!$ERROR_FLAG);
}
 
## Function: tsm_buffer_print(FILEHANDLE)
#
## Description: Prints the buffer to the selected file or STDOUT if no filename is present
#
sub tsm_buffer_print($)
{
        # Don't print output if there is a parsing error
        return if ($ERROR_FLAG);
        my($file) = @_;
        # Join buffer by UNIX style newlines
        my $tsm_output = join("\n",@TSM_BUFFER);
        if ($file)
        {
                open OUT, ">$file";
                print OUT $tsm_output;
                close OUT;
        }
        else
        {
                print STDOUT $tsm_output;
        }
}
 
###
## CUSTOM PARSING AND CALLBACK FUNCTIONS
###
 
## Function: parse_expr(EXPRESSION)
#
## Description: Custom callback for all token types that fall under the EXPRESSION common name category.
#               This function is generally meant to update the state machine for the next expected token
#               as well as add all valid expression tokens to the statement's 'parameter list'.
#
sub parse_expr($)
{
        my($match) = @_;
        if ($STATE->{type})
        {
                # Update the next expected token depending on if we're in a while condition, if condition or assignment/print statement
                if ($STATE->{type} eq 'IF_CONDITION') { $STATE->{next} = 'IF_CONDITION_END,EXPRESSION'; }
                elsif ($STATE->{type} eq 'WHILE_CONDITION') { $STATE->{next} = 'WHILE_CONDITION_END,EXPRESSION'; }
                else {
                        # This will match an assignment or print statement, we're only looking for an end statement here
                        # or more of an expr depending on if we have received any part of the expr yet
 
                        #print "Found $STATE->{type} $STATE->{parent} and $match\n";
                        if (!$STATE->{parameter}) { $STATE->{next} = 'EXPRESSION,END_STATEMENT'; }
                        else { $STATE->{next} = 'END_STATEMENT,EXPRESSION'; }
                        if ($STATE->{parent}{type})
                        {
                                # However, if we're in a if/while block, we have to add the possibility of catching an if/while end or else
                                if ($STATE->{parent}{type} eq 'IF_BLOCK') { $STATE->{next} = 'IF_END,IF_ELSE,'.$STATE->{next}; }
                                if ($STATE->{parent}{type} eq 'WHILE_BLOCK') { $STATE->{next} = 'WHILE_END,'.$STATE->{next}; }
                        }
                }
                if (lookup_symbol($match) && $SYMTAB{$match}{type} ne 'variable')
                {
                        # We're inside an expression but a keyword or function call was used- this should generate an error
                        if ($SYMTAB{$match}{type} eq 'keyword') { expect_error($STATE->{next}, $SYMTAB{$match}{subtype}); }
                        else { syntax_error("Cannot use $SYMTAB{$match}{type} '$match' in $STATE->{type}"); }
                }
                else
                {
                        #print "param $match in type $STATE->{type}\n";
                        #
                        # Add the token to the parameter list with possible spaces
                        my $s = ($STATE->{space} ? $STATE->{space} : '');
                        $STATE->{parameter} .= "$s$match";
                        $STATE->{space} = '';
                }
        }
}
 
## Function: parse_identifier(IDENTIFIER)
#
## Description: Probably the most important callback function- handles all identifiers, specifically including
#               keywords, which are used to control the if/while block structure of the STATE variable, also to control
#               the expected tokens in program flow for the if/while structure. All identifiers are also passed to parse_expr()
#
sub parse_identifier($)
{
        my($match) = @_;
        # parse the identifier as an expression as well
        parse_expr($match);
 
        # Begin identifier checks
        if (!lookup_symbol($match))
        {
                # Identifier has not yet been defined, the next token should be of type ASSIGNMENT
                # But if we're inside a STATEMENT, also allow the statement to end (END_STATEMENT) and let
                # the expression parser (aka  the math module) handle the undefined variable.
                $STATE->{next} = 'ASSIGNMENT';
                $STATE->{next} .= ',END_STATEMENT' if ($STATE->{type});
                return;
        }
        elsif (!$STATE->{type} && $SYMTAB{$match}{type} eq 'function')
        {
                # The identifier is a function (most likely print), set the STATE type
                $STATE->{type} = 'FUNCTION';
                $STATE->{lvalue} = $match;
        }
        elsif (!$STATE->{ERROR} && $SYMTAB{$match}{type} eq 'keyword')  # If there was an error in this statement don't handle it
        {                                                               # this will stop keywords like do from opening while blocks if they
                                                                        # are not placed properly.
                # Identifier is a keyword, update the counter
                $SYMTAB{$match}{count}++;
                my $subtype = $SYMTAB{$match}{subtype};
                # Find out what kind of keyword
                if (!$STATE->{type} && $subtype eq 'IF_BEGIN') # We've begun an if block (cannot be inside a STATE type)
                {
                        $STATE->{type} = 'IF_BLOCK';
                        $STATE->{number} = $SYMTAB{$match}{count};      # label depends on the symbol count
                        $STATE->{else} = undef;                         # prepare for a possible else block
                        # Jump into a new child STATE
                        my $t = $STATE;
                        $STATE = { parent => $t, type => 'IF_CONDITION', next => 'EXPRESSION' };
                }
                elsif ($subtype eq 'IF_CONDITION_END') # We've ended an if block condition
                {
                        if (!$STATE->{type} || ($STATE->{type} && $STATE->{type} ne 'IF_CONDITION'))
                        {
                                syntax_error("Cannot use $subtype outside IF_CONDITION");
                        }
                        else
                        {
                                do_math($STATE->{parameter});                   # Parse the condition
                                tsm_print("gofalse __ifend_$STATE->{parent}{number}");  # goto the end of the if block (or else) if false
 
                                # Clear the STATE
                                $STATE = { parent => $STATE->{parent}, next => 'STATEMENT,IF_END,IF_ELSE' };
                        }
                }
                elsif ($subtype eq 'IF_ELSE') # We've begun an if-else block
                {
                        if (!$STATE->{parent}{type} || (!$STATE->{parent}{type} && $STATE->{parent}{type} ne 'IF_BLOCK'))
                        {
                                syntax_error("Cannot use $subtype outside IF_BLOCK");
                        }
                        else
                        {
                                parse_statement(); # parse the statement if there is one
                                $STATE->{parent}{else} = 1;
                                tsm_print("goto __ifend1_$STATE->{parent}{number}");    # Goto the end of the if block before jumping to else
                                tsm_print("label __ifend_$STATE->{parent}{number}");    # Else block label
                                $STATE->{next} = 'STATEMENT,IF_END';                    # Expecting an endif or statement
                        }
                }
                elsif ($subtype eq 'IF_END') # Ending an if block
                {
                        if (!$STATE->{parent}{type} || ($STATE->{parent}{type} && $STATE->{parent}{type} ne 'IF_BLOCK'))
                        {
                                syntax_error("Cannot use $subtype outside IF_BLOCK");
                        }
                        else
                        {
                                parse_statement(); # parse the statement if there is one
                                # Jump back to parent's STATE
                                $STATE = $STATE->{parent};
                                my $c = ($STATE->{else} ? '1' : '');
                                tsm_print("label __ifend".$c."_$STATE->{number}");      # Label for the end of the if block
 
                                # Clear the current state since we just ended a statement
                                # but give it a fake type in case parse_statement() is called on the endif/endwhile
                                $STATE = { type => $subtype, parent => $STATE->{parent} };
                        }
                }
                elsif (!$STATE->{type} && $subtype eq 'WHILE_BEGIN') # We've begun a while block (cannot be inside a STATE type)
                {
                        $STATE->{type} = 'WHILE_BLOCK';
                        $STATE->{number} = $SYMTAB{$match}{count};      # label depends on the symbol count
                        # Create a new child STATE symbolizing the new block
                        my $t = $STATE;
                        $STATE = { parent => $t, type => 'WHILE_CONDITION', next => 'EXPRESSION' };
                }
                elsif ($subtype eq 'WHILE_CONDITION_END') # Ending a while condition
                {
                        if (!$STATE->{type} || ($STATE->{type} && $STATE->{type} ne 'WHILE_CONDITION'))
                        {
                                syntax_error("Cannot use $subtype outside WHILE_CONDITION");
                        }
                        else
                        {
                                tsm_print("label __whilebegin_$STATE->{parent}{number}");       # Label for the while condition (beginning of while block)
                                do_math($STATE->{parameter});                                   # Parse the expression (condition)
                                tsm_print("gofalse __whileend_$STATE->{parent}{number}");       # Jump to end of block if false
 
                                # Clear the STATE
                                $STATE = { parent => $STATE->{parent}, next => 'STATEMENT,WHILE_END' };
                        }
                }
                elsif ($subtype eq 'WHILE_END') # Ending a while block
                {
                        if (!$STATE->{parent}{type} || ($STATE->{parent}{type} && $STATE->{parent}{type} ne 'WHILE_BLOCK'))
                        {
                                syntax_error("Cannot use $subtype outside WHILE_BLOCK");
                        }
                        else
                        {
                                parse_statement(); # parse the statement if there is one
                                # Jump back to parent's STATE
                                $STATE = $STATE->{parent};
                                tsm_print("goto __whilebegin_$STATE->{number}");        # Goto the beginning of the while (re-evaluate condition)
                                tsm_print("label __whileend_$STATE->{number}");         # Label for the end of the while block
 
                                # Clear the current state since we just ended a statement
                                # but give it a fake type in case parse_statement() is called on the endif/endwhile
                                $STATE = { type => $subtype, parent => $STATE->{parent} };
                        }
                }
        }
}
 
## Function: parse_assignment(T_EQUALS)
#
## Description: Custom callback when an EQUALS token is found. Updates the STATE variable to log which
#               variable is being set, and does syntax checking on if the variable name is valid.
#
sub parse_assignment($)
{
        my($match) = @_;
        if (lookup_symbol($STATE->{last_token}) && $SYMTAB{$STATE->{last_token}}{type} eq 'keyword')
        {
                # Variable name is actually a keyword, illegal.
                syntax_error("Cannot use defined keyword '$STATE->{last_token}' as a variable name.");
        }
        elsif ($STATE->{last_token} !~ m/^$IDENTIFIER$/)
        {
                # Variable name is not a valid identifier (m/[a-z_]\w*/i)
                syntax_error("Invalid assignment to non-identifier '$STATE->{last_token}'.");
        }
        else
        {
                if ($STATE->{type})
                {
                        # The variable name is valid, but we're already inside a statement
                        syntax_error("Cannot perform an assignment within a statement.");
                }
                else
                {
                        # Everything checked out ok, set the STATE type and log the variable name
                        $STATE->{type} = 'ASSIGNMENT';
                        $STATE->{lvalue} = $STATE->{last_token};
                }
        }
}
 
## Function: parse_newline(NEWLINE)
#
## Description: Custom callback for the newline. Mainly for incrementing the line counter.
#
sub parse_newline($)
{
        $LINE_NUMBER++;
}
 
## Function: parse_seperator(SEPERATOR)
#
## Description: Custom callback to force the end of a statement. Force the current statement to be parsed
#               and update the next expected token to be a new statement.
sub parse_seperator($)
{
        parse_statement();
        $STATE->{next} = 'STATEMENT';
        $STATE->{ERROR} = 0;
}
 
## Function: parse_statement()
#
## Description: Called at the end of a statement to clear the STATE and handle any generic statement types
#               (function calls and assignments). This function is sometimes called after a statement has already
#               been handled, so it does not always contain information to parse.
#
sub parse_statement()
{
        if ($STATE->{type})
        {
                if ($STATE->{type} eq 'ASSIGNMENT')
                {
                        # Call math module to print the formula in T assembly
                        do_math($STATE->{parameter});           # Parse the param list as an expression (push values to stack)
                        tsm_print("lvalue $STATE->{lvalue}");   # Push the symbol name to the stack
                        tsm_print("assign");                    # Assign its value
                        # Add the symbol to the symtable for future lookups
                        add_symbol($STATE->{lvalue}, ( type => 'variable' ));
                }
                if ($STATE->{type} eq 'FUNCTION')
                {
                        # The current statement is a function (or a print statement, the only defined function)
                        do_math($STATE->{parameter});   # Parse the paramater list as an expression
                        tsm_print($STATE->{lvalue});    # Push the function name to the stack
                }
        }
 
        # parse_statement() will trigger on statements that have already been handled
        # so return if we already handled it, so as not to create a new state
        return if (!$STATE->{type});
 
        # Create a clean STATE but preserve parent and set next token to be a statement
        my $t = $STATE;
        $STATE = { parent => $t->{parent}, next => 'STATEMENT' };
}
 
 
##########################################################
# MATH MODULE
##########################################################
 
 
## Function: do_math(MATH)
#
## Description: This subroutine basically does nothing but builds a tree from the string input
#               and collapses it, returning the answer which is located at the root node after
#               the tree has been collapsed. This is just an interface to using the tree subs in
#               a more intuitive fashion.
#
sub do_math($)
{
        my $root = math_lex($_[0]);
        math_collapseTree($root);
        return $root->{VALUE};
}
 
## Function: math_addNode(NODE, VALUE, LEFT_VALUE, RIGHT_VALUE)
#
## Description: Creates a node reference with a value which can then be attached
#               to a tree. Also makes a left and right child if they are defined.
#
sub math_addNode($$$$)
{
        my ($node, $value, $left, $right) = @_;
        $node->{VALUE} = $value;
        $node->{LEFT}{VALUE} = $left if (defined($left));
        $node->{RIGHT}{VALUE} = $right if (defined($right));
        return $node;
}
 
## Function: math_insertNode(NODE, VALUE)
#
## Description: Inserts a node inside a tree by replacing the node with a value and setting
#               the old node as a left child of the new one.
#
sub math_insertNode($$)
{
        my($node, $value) = @_;
        my %tmpnode = %$node;
        math_addNode($node, $value, undef, undef);
        $node->{LEFT} = \%tmpnode;
}
 
## Function: math_collapseTree(ROOT)
#
## Description: Resursively travels through the tree starting from root and performs the mathematical
#               operations when it encounters a operator node with two number children. It then stores
#               this calculated value as the new node value and collapses the children. This recursive
#               collapsing creates an end result of the final value being collapsed into the root of the
#               tree.
#
sub math_collapseTree($)
{
        my($node,$collapse) = @_;
        # Recursive traversal to the end of the tree, collapse all left and right children if we have any
        math_collapseTree($node->{LEFT}) if ($node->{LEFT});
        math_collapseTree($node->{RIGHT}) if ($node->{RIGHT});
        return if (!defined($node->{VALUE}));
        # If we're here we've reached a bottom node that is non-empty
        if ($node->{VALUE} =~ m/^$NUMBER$/)
        {
                tsm_print("push $node->{VALUE}");
        }
        elsif ($node->{VALUE} =~ m/^$IDENTIFIER$/)
        {
                tsm_print("rvalue $node->{VALUE}");
        }
        elsif ($node->{VALUE} =~ m/^$OPERATOR$/)
        {
                tsm_print($MATHTABLE{$node->{VALUE}}{NAME});
        }
        $node = undef;
}
 
## Function: math_lex(EXPRESSION)
#
## Description: This subroutine will analyze a mathematical expression and bust it up into a binary
#               tree based on operator precedence. If an operator has equal or lower precedence than
#               the previous operator, it will be inserted above the operator with higher precedence in
#               the tree. This will guarantee that a bottom up node traversal will compute the operations
#               with highest precedence first.
#
sub math_lex($)
{
        local ($_) = @_;
        my $complete = $_;
        my $root = {};
        my $node = $root;
        my $prevnode = undef;
        my $rest = undef;
        my $last;
 
        # Basic syntax check: no input
        if (!defined($_))
        {
                syntax_error("Invalid expression or statement in $STATE->{type}.");
                return;
        }
 
        # Basic syntax check: look for consecutive operators in a row
        while (s/($OPERATOR)$OPERATOR+/$1/)
        {
                syntax_error("Syntax error at term '$&': found ".length($&)." consecutive operators.");
        }
        # Basic syntax check: We also don't want operators at the beginning or end
        while (s/^$OPERATOR// || s/$OPERATOR$//)
        {
                syntax_error("Syntax error at term '$&': invalid operator at beginning/end of expression");
        }
        # Basic syntax check: Make sure all opened brackets are closed
        if (tr/(/(/ != tr/)/)/)
        {
                syntax_error("Syntax error in expression: unmatched brackets.");
        }
 
        # Main lexical analysis loop
        while ($_ ne '')
        {
                if (m/^\s+/)
                {
                        $rest = $';
                }
                elsif (m/^($EXPR)\s*($OPERATOR)?\s*/)
                {
                        $rest = $'; #'
                        my($expr, $op) = ($1, $2);
 
                        # Error checking
                        if ($expr =~ m/^$EXPR$/)
                        {
                                # Don't allow VALUE followed by VALUE without an operator sitting in between
                                if ($last && $last !~ m/^$OPERATOR$/)
                                {
                                        $complete =~ m/\Q$last\E\s*\Q$expr\E/;
                                        syntax_error("Ambiguous expression format at '$&'.");
                                }
                                if ($expr =~ m/^$IDENTIFIER$/)  # Check for identifier
                                {
                                        # If the symbol is not defined then we have an error
                                        syntax_error("Variable '$expr' used without having been declared.") if (!defined($SYMTAB{$expr}));
                                        # Or if the symbol is a keyword
                                        if (defined($SYMTAB{$expr}) && $SYMTAB{$expr}{type} ne 'variable')
                                        {
                                                syntax_error("Keyword '$expr' cannot be used in an expression.");
                                        }
                                }
                        }
                        elsif ($expr eq '.')
                        {
                                # Number match allows for a single ., but it is not considered an 'invalid floating point format'
                                syntax_error("Invalid character at '.'");
                        }
                        elsif ($expr =~ m/^$NUMBER$/ && $expr =~ m/^\./ || $expr =~ m/\.$/) # Check for number
                        {
                                # This number began or ended with '.', which is illegal according to the requirements.
                                # So we have to generate an error message.
                                syntax_error("Invalid floating point format at '$expr'.");
                        }
 
                        if (m/^\((.+?)\)/)
                        {
                                if ($op)
                                {
                                        # Add the operator node and recursively parse the bracket under it
                                        math_addNode($node, $op, undef, undef);
                                        $node->{LEFT} = math_lex($1);
                                }
                                else
                                {
                                        # We're at the end of the statement, parse the bracket recursively and add it to the end
                                        $node->{PARENT}{RIGHT} = math_lex($1);
                                }
                        }
                        elsif ($op) # The match had an operator following the value
                        {
                                if (!defined($node->{PARENT}{VALUE})
                                        || $MATHTABLE{$op}{VALUE} > $MATHTABLE{$node->{PARENT}{VALUE}}{VALUE})
                                {
                                        # In this case, the operator had higher precendence than the last,
                                        # and therefore must be added below the last node.
                                        math_addNode($node, $op, $expr, undef);
                                }
                                else
                                {
                                        # If the operator has a lower (or equal) precendence than the last operator,
                                        # we must float up the tree until the condition is no longer met. I call this
                                        # the buoyancy effect.
                                        $node->{VALUE} = $expr;
                                        while ($node->{PARENT}{VALUE}
                                                && $MATHTABLE{$op}{VALUE} <= $MATHTABLE{$node->{PARENT}{VALUE}}{VALUE})
                                        {
                                                $node = $node->{PARENT};        # Rise above parent node...
                                        }
                                        # We can finally add the node, but we have to insert it before the current node
                                        # So we need a special function that will re-add the current node as a left child
                                        math_insertNode($node, $op);
                                }
                                $last = $op; # Our last token was an operator
                        }
                        else
                        {
                                # If all we matched was a number (and the end of the line), stick it into the current
                                # nodes value. This will most likely take place in a right child, though it is at the
                                # root if the input is solely a number.
                                $node->{VALUE} = $expr;
                                $last = $expr; # Last token was a number
                        }
                        $prevnode = $node;              # Keep track of the last node
                        $node->{RIGHT} = {};            # Create a new node from the current node's right leg
                        $node = $node->{RIGHT};         # Change current node to right leg
                        $node->{PARENT} = $prevnode;    # Mark the parent node
                }
                else
                {
                        # Found no valid tokens (not number, operator or identifier)
                        syntax_error("Invalid character at '".substr($_,0,1)."'.\n");
                }
                if ($STATE->{ERROR})
                {
                        $rest = '';             # If an error occured clear the expression and go to the next statement
                }
 
                $_ = $rest;                     # Goto next token
        }
        return $root;
}
 
###
## START OF PROGRAM
###
main();                         # call main()
## end of program