#!/usr/bin/perl -w
# @ Author: Loren Segal
# @ Date: March 2005
#
# DESCRIPTION:
# ------------
# This is the final version of the T compiler for the SOEN229 Team Project. This script will generate output T assembly code for a designated
# simulator script depending on the given source input. Usage of the script is as follows:
#
# perl tcompile.pl <source_file.t>
# -- OR --
# ./tcompile.pl <source_file.t>
#
# The script will generate the file source_file.tsm (where source_file is the name of the input file minus the extension)
#
# Also keep in mind that the symbol file must be in the same directory as the compiler under the name 'symbolfile.txt'
#
#
use strict; # strict programming style
# General functions
sub main();
sub usage();
sub tsm_print($);
sub tsm_buffer_print; # takes variable length arguments
# Symbol table functions
sub init_symtable($);
sub load_symtable();
sub lookup_symbol($);
# Code parsing and parse_ functions
sub code_parse($);
sub syntax_error($);
sub expect_error($$);
sub parse_expr($);
sub parse_identifier($);
sub parse_assignment($);
sub parse_seperator($);
sub parse_newline($);
sub parse_statement();
#################
## Math module ##
#################
sub math_addNode($$$$);
sub math_insertNode($$);
sub math_collapseTree($);
sub math_lex($);
sub do_math($);
###
## GLOBAL VARIABLES
###
my(%SYMTAB); # Global Symbol Table
my(%PARSETABLE) = (
'IDENTIFIER' => {
'regex' => qr/[a-z_]\w*/i,
'callback' => \&parse_identifier
},
'UNSIGNED_VALUE' => {
'regex' => qr/\d+(?:\.\d+)?/,
'callback' => \&parse_expr
},
'END_STATEMENT' => {
'regex' => qr/;|$/,
'callback' => \&parse_seperator
},
'NEWLINE' => {
'regex' => qr/\r?\n/,
'callback' => \&parse_newline
},
'ASSIGNMENT' => {
'regex' => qr/=/,
'callback' => \&parse_assignment
},
'ARITHMETIC' => {
'regex' => qr/[\(\)\+\-\*\/]/,
'callback' => \&parse_expr
}
);
my $STATE = {}; # STATE keeps track of various statement values
my @TSM_BUFFER; # All TSM statements of the source file
my $LINE_NUMBER = 1; # Keep track of line numbers
my $ERROR_FLAG = 0; # Keep track of syntax errors, 0 = clean expression, 1 = error
my $IDENTIFIER = qr/[a-z_]\w*/i; # Identifier match
my $NUMBER = qr/\d*\.?\d+/i; # Number match, error checking is needed when using this
my $STRICTNUMBER = qr/\d+(?:\.\d+)?/; # Strict number match
my $VALUE = qr/$IDENTIFIER|$NUMBER/; # Value match, this is a number or identifier
my $OPERATOR = qr/[+-\/*]/; # Operators, +-/*
my $BRACKET = qr/\(\s*(?:$VALUE\s*|$OPERATOR\s*|[\(\)]\s*)+?\s*\)/;
my $EXPR = qr/$VALUE|$BRACKET/;
my %MATHTABLE = (
'+' => { NAME => 'add', VALUE => 1 },
'-' => { NAME => 'sub', VALUE => 2 },
'*' => { NAME => 'mul', VALUE => 3 },
'/' => { NAME => 'div', VALUE => 4 }
);
sub main()
{
my $file = $ARGV[0] or usage();
init_symtable("symbolfile.txt"); # Load the symbol file
load_symtable(); # Load symbol values
code_parse($file); # Parse the input source file
tsm_buffer_print($file.($file =~ /\.t$/ ? 'sm' : '.tsm')); # Print buffer to .tsm output file
tsm_buffer_print(); # Print buffer to screen for debugging
}
## Function: usage()
#
## Description: lists the required parameters to use the script file and quits
#
sub usage()
{
die "usage: perl tcompile.pl <source_file.t>\n";
}
####
### SYMBOL TABLE FUNCTIONS
####
## Function: init_symbtable(FILENAME)
#
## Description: Opens the symbol table for reading under the filehandle SYMTAB
#
sub init_symtable($)
{
open(SYMTAB, $_[0]) or die("% Error: could not open file '$_[0]': $!\n");
}
## Function: load_symtable()
#
## Description: Adds all of the key/value pairs to their
# appropriate symbol name in the SYMTAB.
#
sub load_symtable()
{
local $_;
my($line, $valid) = (0, 1);
for (<SYMTAB>)
{
chomp;
next if (!$_ || m/^\s*#.*/); # ignore commented and empty lines
# -- NOT required by assignment.
$line++;
my($name) = m/^\s*(\w+):\s*\w+=\w+/; # matches-> NAME: *=* ...
my(@matches) = m/(\w*)=(\w*)/g; # matches-> *=*
$valid = 1;
for(my($i) = 0; $i <= $#matches; $i++)
{
if ($matches[$i] eq '') { $valid = 0; last; }
}
if (!$name || !$valid)
{
# Give an error if a line of the symbol table file is corrupted or
# improperly defined.
die("% Error: symbol table has invalid definition at line $line.\n");
}
# Add the symbol- it is valid.
add_symbol($name, @matches);
}
close(SYMTAB);
}
## Function: add_symbol(NAME, HASH_VALUES)
#
## Description:
# Adds a symbol NAME to the SYMTABLE along with its key/value pairs defined by
# HASH_VALUES
#
sub add_symbol($@)
{
my($name, %symbols) = @_;
return if (!values(%symbols));
$symbols{count} = 0; # Force count to be initialized at 0,
# even if it is not declared or set as something
# else in the symbol file, which is not legal.
$SYMTAB{$name} = \%symbols;
}
## Function: lookup_symbol(NAME)
#
## Description:
# Looks up a symbol in SYMTAB and returns true or false (undef) if it exists
# or not, respectively.
#
sub lookup_symbol($)
{
return (defined($SYMTAB{$_[0]}) ? !undef : undef);
}
## Function: code_parse(FILENAME)
#
## Description:
# This is the true heart of the program. All of the lexical analysis is done
# here. This function will make use of the %PARSETABLE along with all of its
# grammar definitions and callback functions. This function will also
# automatically report all recognized tokens unless otherwise specified.
#
sub code_parse($)
{
local $_;
my($file, $found_match) = @_; # file is arg1, found_match will not be set
# Open file, read into one big scalar, and close
open(CODE, $file) or die "% Error: could not open file '$file': $!\n";
$_ = join "", <CODE>;
close CODE;
$LINE_NUMBER = 1;
# Basic syntax check, cannot begin code with a seperator
if (s/^\s+;//)
{
syntax_error("Invalid statement seperator at beginning of file.");
}
# Main code parsing loop
while ($_ ne '')
{
$found_match = 0;
while (my($name, $values) = each(%PARSETABLE))
{
s/^([ \t]*)//; # Get rid of extraneous spaces.
$STATE->{space} = $1 if ($1);
last if (!defined($_)); # If we removed the last of $_, leave.
my($re, $callback) = ($values->{regex}, $values->{callback});
if (m/^$re/i) {
$found_match = 1;
$STATE->{token} = $&; # update token in STATE
# Get the current symbol type (update name if it's a keyword)
# and check if it throws an expect_error()
if (lookup_symbol($&) && $SYMTAB{$&}{type} eq 'keyword')
{
$name = $SYMTAB{$&}{subtype};
}
# throw a possible expect_error()
expect_error($STATE->{next},$name);
$STATE->{next} = '' unless ($name eq 'NEWLINE');
# Call callback function for any data handling.
&$callback($&) if ($callback && defined(&$callback));
# Log syntax state changes / update current syntax state
$STATE->{last_token} = $STATE->{token} unless ($name eq 'NEWLINE');
$_ = $'; #' Goto next token
}
}
# A character/token was found that is not in the grammar of the language.
if (!$found_match && $_)
{
s/\S+(?=;|$)|\S+//;
syntax_error("Invalid token in context: '$&'");
}
}
# throw a possible expect_error() for the end of the file
expect_error($STATE->{next}, "END_OF_FILE");
# there is still a statement to parse, parse it
if ($STATE->{type} || $STATE->{next})
{
parse_statement();
}
# If we the child of a parent's $STATE, then we are in an unclosed if/while block
while ($STATE->{parent}{type})
{
syntax_error("Premature EOF found: $STATE->{parent}{type} was not closed.");
$STATE = $STATE->{parent};
}
# Print the end of assembly code
tsm_print("halt\n");
}
## Function: expect_error(REASON)
#
## Description:
# This function is meant to print more intuitive error messages for the 'expecting' error catching system.
# If the token found was not the type token that was expected by the last token, the parser will call this function.
# Multiple token types can be expected. Order with multiple tokens, however, is important. Only the first token type
# will be printed as being expected- this is for clarity purposes. Also because one token is usually more expected than another
# depending on the situation (endif is more expected than an else inside an if block).
#
sub expect_error($$)
{
my($expecting, $found) = @_;
return if (!$expecting); # not expecting anything, leave
# Replace common names with their definitions
# Common names are: EXPRESSION, STATEMENT, END_STATEMENT
$expecting =~ s/(EXPRESSION)/$1,IDENTIFIER,ARITHMETIC,UNSIGNED_VALUE/;
$expecting =~ s/(STATEMENT)/$1,IDENTIFIER,IF_BEGIN,IF_ELSE,WHILE_BEGIN/;
$expecting =~ s/(END_STATEMENT)/$1,END_OF_FILE/;
# Check if the matched token is part of the expected types
if (!$expecting || grep(/(?:^|,)$found(?:$|,)/i, "$expecting,NEWLINE"))
{
# The token we found is what we were expecting it to be
return;
}
elsif (grep(/^ASSIGNMENT,.+/,$expecting))
{
# Sit this error out, do_math will catch it with much more precision
syntax_error("Variable '$STATE->{last_token}' used without having been declared.");
}
else
{
# The token we found is not part of the expected types, throw the error
$expecting =~ s/,.+//;
$expecting =~ s/IDENTIFIER/statement/;
syntax_error("Expecting $expecting, found $found". ($STATE->{token} && $STATE->{token} =~ m/^$IDENTIFIER$/ ? ": '$STATE->{token}'" : ""));
# If we're throwing an error for an some form of statement ending we have
# to manually leave the if/while block because it won't be handled by parse_statement
if ($STATE->{parent}{type} && (($found eq 'IF_END' && $STATE->{parent}{type} eq 'IF_BLOCK') ||
($found eq 'WHILE_END' && $STATE->{parent}{type} eq 'WHILE_BLOCK')))
{
$STATE = $STATE->{parent};
}
}
}
## Function: syntax_error(REASON)
#
## Description:
# This function will throw an error and halt output of the T assembly file as well as halt error output until
# the next statement.
#
sub syntax_error($)
{
print STDERR "% Parse error on line $LINE_NUMBER: $_[0]\n" if (!$STATE->{ERROR});
#if (!$ERROR_FLAG) { tsm_buffer_print(); } # DEBUGGING ONLY
$ERROR_FLAG = 1;
$STATE->{ERROR} = 1;
}
## Function: tsm_print(DATA)
#
## Description:
# This function prints the assembled T code to the output buffer
# but makes sure there were no previous errors in the code.
#
sub tsm_print($)
{
push @TSM_BUFFER, $_[0] if (!$ERROR_FLAG);
}
## Function: tsm_buffer_print(FILEHANDLE)
#
## Description: Prints the buffer to the selected file or STDOUT if no filename is present
#
sub tsm_buffer_print($)
{
# Don't print output if there is a parsing error
return if ($ERROR_FLAG);
my($file) = @_;
# Join buffer by UNIX style newlines
my $tsm_output = join("\n",@TSM_BUFFER);
if ($file)
{
open OUT, ">$file";
print OUT $tsm_output;
close OUT;
}
else
{
print STDOUT $tsm_output;
}
}
###
## CUSTOM PARSING AND CALLBACK FUNCTIONS
###
## Function: parse_expr(EXPRESSION)
#
## Description: Custom callback for all token types that fall under the EXPRESSION common name category.
# This function is generally meant to update the state machine for the next expected token
# as well as add all valid expression tokens to the statement's 'parameter list'.
#
sub parse_expr($)
{
my($match) = @_;
if ($STATE->{type})
{
# Update the next expected token depending on if we're in a while condition, if condition or assignment/print statement
if ($STATE->{type} eq 'IF_CONDITION') { $STATE->{next} = 'IF_CONDITION_END,EXPRESSION'; }
elsif ($STATE->{type} eq 'WHILE_CONDITION') { $STATE->{next} = 'WHILE_CONDITION_END,EXPRESSION'; }
else {
# This will match an assignment or print statement, we're only looking for an end statement here
# or more of an expr depending on if we have received any part of the expr yet
#print "Found $STATE->{type} $STATE->{parent} and $match\n";
if (!$STATE->{parameter}) { $STATE->{next} = 'EXPRESSION,END_STATEMENT'; }
else { $STATE->{next} = 'END_STATEMENT,EXPRESSION'; }
if ($STATE->{parent}{type})
{
# However, if we're in a if/while block, we have to add the possibility of catching an if/while end or else
if ($STATE->{parent}{type} eq 'IF_BLOCK') { $STATE->{next} = 'IF_END,IF_ELSE,'.$STATE->{next}; }
if ($STATE->{parent}{type} eq 'WHILE_BLOCK') { $STATE->{next} = 'WHILE_END,'.$STATE->{next}; }
}
}
if (lookup_symbol($match) && $SYMTAB{$match}{type} ne 'variable')
{
# We're inside an expression but a keyword or function call was used- this should generate an error
if ($SYMTAB{$match}{type} eq 'keyword') { expect_error($STATE->{next}, $SYMTAB{$match}{subtype}); }
else { syntax_error("Cannot use $SYMTAB{$match}{type} '$match' in $STATE->{type}"); }
}
else
{
#print "param $match in type $STATE->{type}\n";
#
# Add the token to the parameter list with possible spaces
my $s = ($STATE->{space} ? $STATE->{space} : '');
$STATE->{parameter} .= "$s$match";
$STATE->{space} = '';
}
}
}
## Function: parse_identifier(IDENTIFIER)
#
## Description: Probably the most important callback function- handles all identifiers, specifically including
# keywords, which are used to control the if/while block structure of the STATE variable, also to control
# the expected tokens in program flow for the if/while structure. All identifiers are also passed to parse_expr()
#
sub parse_identifier($)
{
my($match) = @_;
# parse the identifier as an expression as well
parse_expr($match);
# Begin identifier checks
if (!lookup_symbol($match))
{
# Identifier has not yet been defined, the next token should be of type ASSIGNMENT
# But if we're inside a STATEMENT, also allow the statement to end (END_STATEMENT) and let
# the expression parser (aka the math module) handle the undefined variable.
$STATE->{next} = 'ASSIGNMENT';
$STATE->{next} .= ',END_STATEMENT' if ($STATE->{type});
return;
}
elsif (!$STATE->{type} && $SYMTAB{$match}{type} eq 'function')
{
# The identifier is a function (most likely print), set the STATE type
$STATE->{type} = 'FUNCTION';
$STATE->{lvalue} = $match;
}
elsif (!$STATE->{ERROR} && $SYMTAB{$match}{type} eq 'keyword') # If there was an error in this statement don't handle it
{ # this will stop keywords like do from opening while blocks if they
# are not placed properly.
# Identifier is a keyword, update the counter
$SYMTAB{$match}{count}++;
my $subtype = $SYMTAB{$match}{subtype};
# Find out what kind of keyword
if (!$STATE->{type} && $subtype eq 'IF_BEGIN') # We've begun an if block (cannot be inside a STATE type)
{
$STATE->{type} = 'IF_BLOCK';
$STATE->{number} = $SYMTAB{$match}{count}; # label depends on the symbol count
$STATE->{else} = undef; # prepare for a possible else block
# Jump into a new child STATE
my $t = $STATE;
$STATE = { parent => $t, type => 'IF_CONDITION', next => 'EXPRESSION' };
}
elsif ($subtype eq 'IF_CONDITION_END') # We've ended an if block condition
{
if (!$STATE->{type} || ($STATE->{type} && $STATE->{type} ne 'IF_CONDITION'))
{
syntax_error("Cannot use $subtype outside IF_CONDITION");
}
else
{
do_math($STATE->{parameter}); # Parse the condition
tsm_print("gofalse __ifend_$STATE->{parent}{number}"); # goto the end of the if block (or else) if false
# Clear the STATE
$STATE = { parent => $STATE->{parent}, next => 'STATEMENT,IF_END,IF_ELSE' };
}
}
elsif ($subtype eq 'IF_ELSE') # We've begun an if-else block
{
if (!$STATE->{parent}{type} || (!$STATE->{parent}{type} && $STATE->{parent}{type} ne 'IF_BLOCK'))
{
syntax_error("Cannot use $subtype outside IF_BLOCK");
}
else
{
parse_statement(); # parse the statement if there is one
$STATE->{parent}{else} = 1;
tsm_print("goto __ifend1_$STATE->{parent}{number}"); # Goto the end of the if block before jumping to else
tsm_print("label __ifend_$STATE->{parent}{number}"); # Else block label
$STATE->{next} = 'STATEMENT,IF_END'; # Expecting an endif or statement
}
}
elsif ($subtype eq 'IF_END') # Ending an if block
{
if (!$STATE->{parent}{type} || ($STATE->{parent}{type} && $STATE->{parent}{type} ne 'IF_BLOCK'))
{
syntax_error("Cannot use $subtype outside IF_BLOCK");
}
else
{
parse_statement(); # parse the statement if there is one
# Jump back to parent's STATE
$STATE = $STATE->{parent};
my $c = ($STATE->{else} ? '1' : '');
tsm_print("label __ifend".$c."_$STATE->{number}"); # Label for the end of the if block
# Clear the current state since we just ended a statement
# but give it a fake type in case parse_statement() is called on the endif/endwhile
$STATE = { type => $subtype, parent => $STATE->{parent} };
}
}
elsif (!$STATE->{type} && $subtype eq 'WHILE_BEGIN') # We've begun a while block (cannot be inside a STATE type)
{
$STATE->{type} = 'WHILE_BLOCK';
$STATE->{number} = $SYMTAB{$match}{count}; # label depends on the symbol count
# Create a new child STATE symbolizing the new block
my $t = $STATE;
$STATE = { parent => $t, type => 'WHILE_CONDITION', next => 'EXPRESSION' };
}
elsif ($subtype eq 'WHILE_CONDITION_END') # Ending a while condition
{
if (!$STATE->{type} || ($STATE->{type} && $STATE->{type} ne 'WHILE_CONDITION'))
{
syntax_error("Cannot use $subtype outside WHILE_CONDITION");
}
else
{
tsm_print("label __whilebegin_$STATE->{parent}{number}"); # Label for the while condition (beginning of while block)
do_math($STATE->{parameter}); # Parse the expression (condition)
tsm_print("gofalse __whileend_$STATE->{parent}{number}"); # Jump to end of block if false
# Clear the STATE
$STATE = { parent => $STATE->{parent}, next => 'STATEMENT,WHILE_END' };
}
}
elsif ($subtype eq 'WHILE_END') # Ending a while block
{
if (!$STATE->{parent}{type} || ($STATE->{parent}{type} && $STATE->{parent}{type} ne 'WHILE_BLOCK'))
{
syntax_error("Cannot use $subtype outside WHILE_BLOCK");
}
else
{
parse_statement(); # parse the statement if there is one
# Jump back to parent's STATE
$STATE = $STATE->{parent};
tsm_print("goto __whilebegin_$STATE->{number}"); # Goto the beginning of the while (re-evaluate condition)
tsm_print("label __whileend_$STATE->{number}"); # Label for the end of the while block
# Clear the current state since we just ended a statement
# but give it a fake type in case parse_statement() is called on the endif/endwhile
$STATE = { type => $subtype, parent => $STATE->{parent} };
}
}
}
}
## Function: parse_assignment(T_EQUALS)
#
## Description: Custom callback when an EQUALS token is found. Updates the STATE variable to log which
# variable is being set, and does syntax checking on if the variable name is valid.
#
sub parse_assignment($)
{
my($match) = @_;
if (lookup_symbol($STATE->{last_token}) && $SYMTAB{$STATE->{last_token}}{type} eq 'keyword')
{
# Variable name is actually a keyword, illegal.
syntax_error("Cannot use defined keyword '$STATE->{last_token}' as a variable name.");
}
elsif ($STATE->{last_token} !~ m/^$IDENTIFIER$/)
{
# Variable name is not a valid identifier (m/[a-z_]\w*/i)
syntax_error("Invalid assignment to non-identifier '$STATE->{last_token}'.");
}
else
{
if ($STATE->{type})
{
# The variable name is valid, but we're already inside a statement
syntax_error("Cannot perform an assignment within a statement.");
}
else
{
# Everything checked out ok, set the STATE type and log the variable name
$STATE->{type} = 'ASSIGNMENT';
$STATE->{lvalue} = $STATE->{last_token};
}
}
}
## Function: parse_newline(NEWLINE)
#
## Description: Custom callback for the newline. Mainly for incrementing the line counter.
#
sub parse_newline($)
{
$LINE_NUMBER++;
}
## Function: parse_seperator(SEPERATOR)
#
## Description: Custom callback to force the end of a statement. Force the current statement to be parsed
# and update the next expected token to be a new statement.
sub parse_seperator($)
{
parse_statement();
$STATE->{next} = 'STATEMENT';
$STATE->{ERROR} = 0;
}
## Function: parse_statement()
#
## Description: Called at the end of a statement to clear the STATE and handle any generic statement types
# (function calls and assignments). This function is sometimes called after a statement has already
# been handled, so it does not always contain information to parse.
#
sub parse_statement()
{
if ($STATE->{type})
{
if ($STATE->{type} eq 'ASSIGNMENT')
{
# Call math module to print the formula in T assembly
do_math($STATE->{parameter}); # Parse the param list as an expression (push values to stack)
tsm_print("lvalue $STATE->{lvalue}"); # Push the symbol name to the stack
tsm_print("assign"); # Assign its value
# Add the symbol to the symtable for future lookups
add_symbol($STATE->{lvalue}, ( type => 'variable' ));
}
if ($STATE->{type} eq 'FUNCTION')
{
# The current statement is a function (or a print statement, the only defined function)
do_math($STATE->{parameter}); # Parse the paramater list as an expression
tsm_print($STATE->{lvalue}); # Push the function name to the stack
}
}
# parse_statement() will trigger on statements that have already been handled
# so return if we already handled it, so as not to create a new state
return if (!$STATE->{type});
# Create a clean STATE but preserve parent and set next token to be a statement
my $t = $STATE;
$STATE = { parent => $t->{parent}, next => 'STATEMENT' };
}
##########################################################
# MATH MODULE
##########################################################
## Function: do_math(MATH)
#
## Description: This subroutine basically does nothing but builds a tree from the string input
# and collapses it, returning the answer which is located at the root node after
# the tree has been collapsed. This is just an interface to using the tree subs in
# a more intuitive fashion.
#
sub do_math($)
{
my $root = math_lex($_[0]);
math_collapseTree($root);
return $root->{VALUE};
}
## Function: math_addNode(NODE, VALUE, LEFT_VALUE, RIGHT_VALUE)
#
## Description: Creates a node reference with a value which can then be attached
# to a tree. Also makes a left and right child if they are defined.
#
sub math_addNode($$$$)
{
my ($node, $value, $left, $right) = @_;
$node->{VALUE} = $value;
$node->{LEFT}{VALUE} = $left if (defined($left));
$node->{RIGHT}{VALUE} = $right if (defined($right));
return $node;
}
## Function: math_insertNode(NODE, VALUE)
#
## Description: Inserts a node inside a tree by replacing the node with a value and setting
# the old node as a left child of the new one.
#
sub math_insertNode($$)
{
my($node, $value) = @_;
my %tmpnode = %$node;
math_addNode($node, $value, undef, undef);
$node->{LEFT} = \%tmpnode;
}
## Function: math_collapseTree(ROOT)
#
## Description: Resursively travels through the tree starting from root and performs the mathematical
# operations when it encounters a operator node with two number children. It then stores
# this calculated value as the new node value and collapses the children. This recursive
# collapsing creates an end result of the final value being collapsed into the root of the
# tree.
#
sub math_collapseTree($)
{
my($node,$collapse) = @_;
# Recursive traversal to the end of the tree, collapse all left and right children if we have any
math_collapseTree($node->{LEFT}) if ($node->{LEFT});
math_collapseTree($node->{RIGHT}) if ($node->{RIGHT});
return if (!defined($node->{VALUE}));
# If we're here we've reached a bottom node that is non-empty
if ($node->{VALUE} =~ m/^$NUMBER$/)
{
tsm_print("push $node->{VALUE}");
}
elsif ($node->{VALUE} =~ m/^$IDENTIFIER$/)
{
tsm_print("rvalue $node->{VALUE}");
}
elsif ($node->{VALUE} =~ m/^$OPERATOR$/)
{
tsm_print($MATHTABLE{$node->{VALUE}}{NAME});
}
$node = undef;
}
## Function: math_lex(EXPRESSION)
#
## Description: This subroutine will analyze a mathematical expression and bust it up into a binary
# tree based on operator precedence. If an operator has equal or lower precedence than
# the previous operator, it will be inserted above the operator with higher precedence in
# the tree. This will guarantee that a bottom up node traversal will compute the operations
# with highest precedence first.
#
sub math_lex($)
{
local ($_) = @_;
my $complete = $_;
my $root = {};
my $node = $root;
my $prevnode = undef;
my $rest = undef;
my $last;
# Basic syntax check: no input
if (!defined($_))
{
syntax_error("Invalid expression or statement in $STATE->{type}.");
return;
}
# Basic syntax check: look for consecutive operators in a row
while (s/($OPERATOR)$OPERATOR+/$1/)
{
syntax_error("Syntax error at term '$&': found ".length($&)." consecutive operators.");
}
# Basic syntax check: We also don't want operators at the beginning or end
while (s/^$OPERATOR// || s/$OPERATOR$//)
{
syntax_error("Syntax error at term '$&': invalid operator at beginning/end of expression");
}
# Basic syntax check: Make sure all opened brackets are closed
if (tr/(/(/ != tr/)/)/)
{
syntax_error("Syntax error in expression: unmatched brackets.");
}
# Main lexical analysis loop
while ($_ ne '')
{
if (m/^\s+/)
{
$rest = $';
}
elsif (m/^($EXPR)\s*($OPERATOR)?\s*/)
{
$rest = $'; #'
my($expr, $op) = ($1, $2);
# Error checking
if ($expr =~ m/^$EXPR$/)
{
# Don't allow VALUE followed by VALUE without an operator sitting in between
if ($last && $last !~ m/^$OPERATOR$/)
{
$complete =~ m/\Q$last\E\s*\Q$expr\E/;
syntax_error("Ambiguous expression format at '$&'.");
}
if ($expr =~ m/^$IDENTIFIER$/) # Check for identifier
{
# If the symbol is not defined then we have an error
syntax_error("Variable '$expr' used without having been declared.") if (!defined($SYMTAB{$expr}));
# Or if the symbol is a keyword
if (defined($SYMTAB{$expr}) && $SYMTAB{$expr}{type} ne 'variable')
{
syntax_error("Keyword '$expr' cannot be used in an expression.");
}
}
}
elsif ($expr eq '.')
{
# Number match allows for a single ., but it is not considered an 'invalid floating point format'
syntax_error("Invalid character at '.'");
}
elsif ($expr =~ m/^$NUMBER$/ && $expr =~ m/^\./ || $expr =~ m/\.$/) # Check for number
{
# This number began or ended with '.', which is illegal according to the requirements.
# So we have to generate an error message.
syntax_error("Invalid floating point format at '$expr'.");
}
if (m/^\((.+?)\)/)
{
if ($op)
{
# Add the operator node and recursively parse the bracket under it
math_addNode($node, $op, undef, undef);
$node->{LEFT} = math_lex($1);
}
else
{
# We're at the end of the statement, parse the bracket recursively and add it to the end
$node->{PARENT}{RIGHT} = math_lex($1);
}
}
elsif ($op) # The match had an operator following the value
{
if (!defined($node->{PARENT}{VALUE})
|| $MATHTABLE{$op}{VALUE} > $MATHTABLE{$node->{PARENT}{VALUE}}{VALUE})
{
# In this case, the operator had higher precendence than the last,
# and therefore must be added below the last node.
math_addNode($node, $op, $expr, undef);
}
else
{
# If the operator has a lower (or equal) precendence than the last operator,
# we must float up the tree until the condition is no longer met. I call this
# the buoyancy effect.
$node->{VALUE} = $expr;
while ($node->{PARENT}{VALUE}
&& $MATHTABLE{$op}{VALUE} <= $MATHTABLE{$node->{PARENT}{VALUE}}{VALUE})
{
$node = $node->{PARENT}; # Rise above parent node...
}
# We can finally add the node, but we have to insert it before the current node
# So we need a special function that will re-add the current node as a left child
math_insertNode($node, $op);
}
$last = $op; # Our last token was an operator
}
else
{
# If all we matched was a number (and the end of the line), stick it into the current
# nodes value. This will most likely take place in a right child, though it is at the
# root if the input is solely a number.
$node->{VALUE} = $expr;
$last = $expr; # Last token was a number
}
$prevnode = $node; # Keep track of the last node
$node->{RIGHT} = {}; # Create a new node from the current node's right leg
$node = $node->{RIGHT}; # Change current node to right leg
$node->{PARENT} = $prevnode; # Mark the parent node
}
else
{
# Found no valid tokens (not number, operator or identifier)
syntax_error("Invalid character at '".substr($_,0,1)."'.\n");
}
if ($STATE->{ERROR})
{
$rest = ''; # If an error occured clear the expression and go to the next statement
}
$_ = $rest; # Goto next token
}
return $root;
}
###
## START OF PROGRAM
###
main(); # call main()
## end of program Powered by
GeSHi Syntax Highlighting software.
Author of all (other) material unless otherwise specified:
Loren Segal. Copyright 2005.