algebrite
Version:
Computer Algebra System in Coffeescript
828 lines (684 loc) • 21.7 kB
text/coffeescript
# This scanner uses the recursive descent method.
#
# The char pointers token_str and scan_str are pointers to the input string as
# in the following example.
#
# | g | a | m | m | a | | a | l | p | h | a |
# ^ ^
# token_str scan_str
#
# The char pointer token_buf points to a malloc buffer.
#
# | g | a | m | m | a | \0 |
# ^
# token_buf
#
# In the sequence of method invocations for scanning,
# first we do the calls for scanning the operands
# of the operators of least precedence.
# So, since precedence in maths goes something like
# (form high to low) exponents, mult/div, plus/minus
# so we scan first for terms, then factors, then powers.
# That's the general idea, but of course we also have to deal
# with things like parens, non-commutative
# dot (or inner) product, assignments and tests,
# function calls etc.
# Note that a^1/2 is, correctly, a/2, not, incorrectly, sqrt(a),
# see comment in related test in power.coffee for more about this.
# Notes:
#
# Formerly add() and multiply() were used to construct expressions but
# this preevaluation caused problems.
#
# For example, suppose A has the floating point value inf.
#
# Before, the expression A/A resulted in 1 because the scanner would
# divide the symbols.
#
# After removing add() and multiply(), A/A results in nan which is the
# correct result.
#
# The functions negate() and inverse() are used but they do not cause
# problems with preevaluation of symbols.
T_INTEGER = 1001
T_DOUBLE = 1002
T_SYMBOL = 1003
T_FUNCTION = 1004
T_NEWLINE = 1006
T_STRING = 1007
T_GTEQ = 1008
T_LTEQ = 1009
T_EQ = 1010
T_NEQ = 1011
T_QUOTASSIGN = 1012
token = ""
newline_flag = 0
meta_mode = 0
input_str = 0
scan_str = 0
token_str = 0
token_buf = 0
lastFoundSymbol = null
symbolsRightOfAssignment = null
symbolsLeftOfAssignment = null
isSymbolLeftOfAssignment = null
scanningParameters = null
functionInvokationsScanningStack = null
skipRootVariableToBeSolved = false
assignmentFound = null
# Returns number of chars scanned and expr on stack.
# Returns zero when nothing left to scan.
# takes a string
scanned = ""
scan = (s) ->
if DEBUG then console.log "#### scanning " + s
#if s=="y=x"
# debugger
#if s=="y"
# debugger
#if s=="i=sqrt(-1)"
# debugger
lastFoundSymbol = null
symbolsRightOfAssignment = []
symbolsLeftOfAssignment = []
isSymbolLeftOfAssignment = true
scanningParameters = []
functionInvokationsScanningStack = [""]
assignmentFound = false
scanned = s
meta_mode = 0
expanding++
input_str = 0
scan_str = 0
get_next_token()
if (token == "")
push(symbol(NIL))
expanding--
return 0
scan_stmt()
expanding--
if !assignmentFound
symbolsInExpressionsWithoutAssignments = symbolsInExpressionsWithoutAssignments.concat symbolsLeftOfAssignment
return token_str - input_str
# takes a string
scan_meta = (s) ->
scanned = s
meta_mode = 1
expanding++
input_str = 0
scan_str = 0
get_next_token()
if (token == "")
push(symbol(NIL))
expanding--
return 0
scan_stmt()
expanding--
return token_str - input_str
scan_stmt = ->
scan_relation()
assignmentIsOfQuotedType = false
if token == T_QUOTASSIGN
assignmentIsOfQuotedType = true
if (token == T_QUOTASSIGN or token == '=')
symbolLeftOfAssignment = lastFoundSymbol
if DEBUG then console.log("assignment!")
assignmentFound = true
isSymbolLeftOfAssignment = false
get_next_token()
push_symbol(SETQ)
swap()
# if it's a := then add a quote
if (assignmentIsOfQuotedType)
push_symbol(QUOTE)
scan_relation()
# if it's a := then you have to list
# together the quote and its argument
if assignmentIsOfQuotedType
list(2)
list(3)
isSymbolLeftOfAssignment = true
if codeGen
# in case of re-assignment, the symbol on the
# left will also be in the set of the symbols
# on the right. In that case just remove it from
# the symbols on the right.
indexOfSymbolLeftOfAssignment = symbolsRightOfAssignment.indexOf(symbolLeftOfAssignment)
if indexOfSymbolLeftOfAssignment != -1
symbolsRightOfAssignment.splice(indexOfSymbolLeftOfAssignment, 1)
symbolsHavingReassignments.push symbolLeftOfAssignment
# print out the immediate dependencies
if DEBUG
console.log "locally, " + symbolLeftOfAssignment + " depends on: "
for i in symbolsRightOfAssignment
console.log " " + i
# ok add the local dependencies to the existing
# dependencies of this left-value symbol
# create the exiting dependencies list if it doesn't exist
symbolsDependencies[symbolLeftOfAssignment] ?= []
existingDependencies = symbolsDependencies[symbolLeftOfAssignment]
# copy over the new dependencies to the existing
# dependencies avoiding repetitions
for i in symbolsRightOfAssignment
if existingDependencies.indexOf(i) == -1
existingDependencies.push i
symbolsRightOfAssignment = []
scan_relation = ->
scan_expression()
switch (token)
when T_EQ
push_symbol(TESTEQ)
swap()
get_next_token()
scan_expression()
list(3)
when T_NEQ
push_symbol(NOT)
swap()
push_symbol(TESTEQ)
swap()
get_next_token()
scan_expression()
list(3)
list(2)
when T_LTEQ
push_symbol(TESTLE)
swap()
get_next_token()
scan_expression()
list(3)
when T_GTEQ
push_symbol(TESTGE)
swap()
get_next_token()
scan_expression()
list(3)
when '<'
push_symbol(TESTLT)
swap()
get_next_token()
scan_expression()
list(3)
when '>'
push_symbol(TESTGT)
swap()
get_next_token()
scan_expression()
list(3)
scan_expression = ->
h = tos
switch token
when '+'
get_next_token()
scan_term()
when '-'
get_next_token()
scan_term()
negate()
else
scan_term()
while (newline_flag == 0 && (token == '+' || token == '-'))
if (token == '+')
get_next_token()
scan_term()
else
get_next_token()
scan_term()
negate()
if (tos - h > 1)
list(tos - h)
push_symbol(ADD)
swap()
cons()
is_factor = ->
if token.charCodeAt?(0) == dotprod_unicode
return 1
switch (token)
when '*', '/'
return 1
when '(', T_SYMBOL, T_FUNCTION, T_INTEGER, T_DOUBLE, T_STRING
if (newline_flag) # implicit mul can't cross line
scan_str = token_str # better error display
return 0
else
return 1
return 0
simplify_1_in_products = (tos,h) ->
if (tos > h && isrational(stack[tos - 1]) && equaln(stack[tos - 1], 1))
pop()
# calculate away consecutive constants
multiply_consecutive_constants = (tos,h)->
if (tos > h + 1 && isnum(stack[tos - 2]) && isnum(stack[tos - 1]))
multiply()
scan_term = ->
h = tos
scan_factor()
if parse_time_simplifications
simplify_1_in_products(tos,h)
while (is_factor())
if (token == '*')
get_next_token()
scan_factor()
else if (token == '/')
# in case of 1/... then
# we scanned the 1, we get rid
# of it because otherwise it becomes
# an extra factor that wasn't there and
# things like
# 1/(2*a) become 1*(1/(2*a))
simplify_1_in_products(tos,h)
get_next_token()
scan_factor()
inverse()
else if (token.charCodeAt?(0) == dotprod_unicode)
get_next_token()
push_symbol(INNER)
swap()
scan_factor()
list(3)
else
scan_factor()
if parse_time_simplifications
multiply_consecutive_constants(tos,h)
simplify_1_in_products(tos,h)
if (h == tos)
push_integer(1)
else if (tos - h > 1)
list(tos - h)
push_symbol(MULTIPLY)
swap()
cons()
scan_power = ->
if (token == '^')
get_next_token()
push_symbol(POWER)
swap()
scan_factor()
list(3)
scan_index = (h) ->
#console.log "[ as index"
get_next_token()
push_symbol(INDEX)
swap()
scan_expression()
while (token == ',')
get_next_token()
scan_expression()
if (token != ']')
scan_error("] expected")
get_next_token()
list(tos - h)
scan_factor = ->
h = tos
#console.log "scan_factor token: " + token
firstFactorIsNumber = false
if (token == '(')
scan_subexpr()
else if (token == T_SYMBOL)
scan_symbol()
else if (token == T_FUNCTION)
scan_function_call_with_function_name()
else if token == '['
#console.log "[ as tensor"
#debugger
scan_tensor()
else if (token == T_INTEGER)
firstFactorIsNumber = true
bignum_scan_integer(token_buf)
get_next_token()
else if (token == T_DOUBLE)
firstFactorIsNumber = true
bignum_scan_float(token_buf)
get_next_token()
else if (token == T_STRING)
scan_string()
else
scan_error("syntax error")
# after the main initial part of the factor that
# we just scanned above,
# we can get an arbitrary about of appendages
# of the form ...[...](...)...
# If the main part is not a number, then these are all, respectively,
# - index references (as opposed to tensor definition) and
# - function calls without an explicit function name
# (instead of subexpressions or parameters of function
# definitions or function calls with an explicit function
# name), respectively
while token == '[' or token == '(' and newline_flag == 0 and !firstFactorIsNumber
if token == '['
scan_index(h)
else if token == '('
#console.log "( as function call without function name "
scan_function_call_without_function_name()
while (token == '!')
get_next_token()
push_symbol(FACTORIAL)
swap()
list(2)
# in theory we could already count the
# number of transposes and simplify them
# away, but it's not that clean to have
# multiple places where that happens, and
# the parser is not the place.
while (token.charCodeAt?(0) == transpose_unicode)
get_next_token()
push_symbol(TRANSPOSE)
swap()
list(2)
scan_power()
addSymbolRightOfAssignment = (theSymbol) ->
if predefinedSymbolsInGlobalScope_doNotTrackInDependencies.indexOf(theSymbol) == -1 and
symbolsRightOfAssignment.indexOf(theSymbol) == -1 and
symbolsRightOfAssignment.indexOf("'"+theSymbol) == -1 and
!skipRootVariableToBeSolved
if DEBUG then console.log("... adding symbol: " + theSymbol + " to the set of the symbols right of assignment")
prefixVar = ""
for i in [1...functionInvokationsScanningStack.length]
if functionInvokationsScanningStack[i] != ""
prefixVar += functionInvokationsScanningStack[i] + "_" + i + "_"
theSymbol = prefixVar + theSymbol
symbolsRightOfAssignment.push theSymbol
addSymbolLeftOfAssignment = (theSymbol) ->
if predefinedSymbolsInGlobalScope_doNotTrackInDependencies.indexOf(theSymbol) == -1 and
symbolsLeftOfAssignment.indexOf(theSymbol) == -1 and
symbolsLeftOfAssignment.indexOf("'"+theSymbol) == -1 and
!skipRootVariableToBeSolved
if DEBUG then console.log("... adding symbol: " + theSymbol + " to the set of the symbols left of assignment")
prefixVar = ""
for i in [1...functionInvokationsScanningStack.length]
if functionInvokationsScanningStack[i] != ""
prefixVar += functionInvokationsScanningStack[i] + "_" + i + "_"
theSymbol = prefixVar + theSymbol
symbolsLeftOfAssignment.push theSymbol
scan_symbol = ->
if (token != T_SYMBOL)
scan_error("symbol expected")
if (meta_mode && token_buf.length == 1)
switch (token_buf[0])
when 'a'
push(symbol(METAA))
when 'b'
push(symbol(METAB))
when 'x'
push(symbol(METAX))
else
push(usr_symbol(token_buf))
else
push(usr_symbol(token_buf))
#console.log "found symbol: " + token_buf
if scanningParameters.length == 0
if DEBUG then console.log "out of scanning parameters, processing " + token_buf
lastFoundSymbol = token_buf
if isSymbolLeftOfAssignment
addSymbolLeftOfAssignment token_buf
else
if DEBUG then console.log "still scanning parameters, skipping " + token_buf
if isSymbolLeftOfAssignment
addSymbolRightOfAssignment "'" + token_buf
if DEBUG then console.log("found symbol: " + token_buf + " left of assignment: " + isSymbolLeftOfAssignment)
# if we were looking at the right part of an assignment while we
# found the symbol, then add it to the "symbolsRightOfAssignment"
# set (we check for duplications)
if !isSymbolLeftOfAssignment
addSymbolRightOfAssignment token_buf
get_next_token()
scan_string = ->
new_string(token_buf)
get_next_token()
scan_function_call_with_function_name = ->
if DEBUG then console.log "-- scan_function_call_with_function_name start"
n = 1 # the parameter number as we scan parameters
p = new U()
p = usr_symbol(token_buf)
push(p)
get_next_token() # function name
functionName = token_buf
if functionName == "roots" or functionName == "defint" or functionName == "sum" or functionName == "product" or functionName == "for"
functionInvokationsScanningStack.push token_buf
lastFoundSymbol = token_buf
if !isSymbolLeftOfAssignment
addSymbolRightOfAssignment token_buf
get_next_token() # 1st parameter
scanningParameters.push true
if (token != ')')
scan_stmt()
n++
while (token == ',')
get_next_token()
# roots' disappearing variable, if there, is the second one
if n == 2 and functionInvokationsScanningStack[functionInvokationsScanningStack.length - 1].indexOf("roots") != -1
symbolsRightOfAssignment = symbolsRightOfAssignment.filter (x) -> !(new RegExp("roots_" + (functionInvokationsScanningStack.length - 1) + "_" + token_buf)).test(x)
skipRootVariableToBeSolved = true
# sums' disappearing variable, is alsways the second one
if n == 2 and functionInvokationsScanningStack[functionInvokationsScanningStack.length - 1].indexOf("sum") != -1
symbolsRightOfAssignment = symbolsRightOfAssignment.filter (x) -> !(new RegExp("sum_" + (functionInvokationsScanningStack.length - 1) + "_" + token_buf)).test(x)
skipRootVariableToBeSolved = true
# product's disappearing variable, is alsways the second one
if n == 2 and functionInvokationsScanningStack[functionInvokationsScanningStack.length - 1].indexOf("product") != -1
symbolsRightOfAssignment = symbolsRightOfAssignment.filter (x) -> !(new RegExp("product_" + (functionInvokationsScanningStack.length - 1) + "_" + token_buf)).test(x)
skipRootVariableToBeSolved = true
# for's disappearing variable, is alsways the second one
if n == 2 and functionInvokationsScanningStack[functionInvokationsScanningStack.length - 1].indexOf("for") != -1
symbolsRightOfAssignment = symbolsRightOfAssignment.filter (x) -> !(new RegExp("for_" + (functionInvokationsScanningStack.length - 1) + "_" + token_buf)).test(x)
skipRootVariableToBeSolved = true
# defint's disappearing variables can be in positions 2,5,8...
if functionInvokationsScanningStack[functionInvokationsScanningStack.length - 1].indexOf("defint") != -1 and
(n == 2 or (n>2 and ((n-2) % 3 == 0)))
symbolsRightOfAssignment = symbolsRightOfAssignment.filter (x) -> !(new RegExp("defint_" + (functionInvokationsScanningStack.length - 1) + "_" + token_buf)).test(x)
skipRootVariableToBeSolved = true
scan_stmt()
skipRootVariableToBeSolved = false
n++
# todo refactor this, there are two copies
# this catches the case where the "roots" variable is not specified
if n == 2 and functionInvokationsScanningStack[functionInvokationsScanningStack.length - 1].indexOf("roots") != -1
symbolsRightOfAssignment = symbolsRightOfAssignment.filter (x) -> !(new RegExp("roots_" + (functionInvokationsScanningStack.length - 1) + "_" + "x")).test(x)
scanningParameters.pop()
for i in [0..symbolsRightOfAssignment.length]
if symbolsRightOfAssignment[i]?
if functionName == "roots"
symbolsRightOfAssignment[i] = symbolsRightOfAssignment[i].replace(new RegExp("roots_" + (functionInvokationsScanningStack.length - 1) + "_"),"")
if functionName == "defint"
symbolsRightOfAssignment[i] = symbolsRightOfAssignment[i].replace(new RegExp("defint_" + (functionInvokationsScanningStack.length - 1) + "_"),"")
if functionName == "sum"
symbolsRightOfAssignment[i] = symbolsRightOfAssignment[i].replace(new RegExp("sum_" + (functionInvokationsScanningStack.length - 1) + "_"),"")
if functionName == "product"
symbolsRightOfAssignment[i] = symbolsRightOfAssignment[i].replace(new RegExp("product_" + (functionInvokationsScanningStack.length - 1) + "_"),"")
if functionName == "for"
symbolsRightOfAssignment[i] = symbolsRightOfAssignment[i].replace(new RegExp("for_" + (functionInvokationsScanningStack.length - 1) + "_"),"")
if (token != ')')
scan_error(") expected")
get_next_token()
list(n)
if functionName == "roots" or functionName == "defint" or functionName == "sum" or functionName == "product" or functionName == "for"
functionInvokationsScanningStack.pop()
if functionName == symbol(PATTERN).printname
patternHasBeenFound = true
if DEBUG then console.log "-- scan_function_call_with_function_name end"
scan_function_call_without_function_name = ->
if DEBUG then console.log "-- scan_function_call_without_function_name start"
# the function will have to be looked up
# at runtime
push_symbol(EVAL)
swap()
list(2)
n = 1 # the parameter number as we scan parameters
get_next_token() # left paren
scanningParameters.push true
if (token != ')')
scan_stmt()
n++
while (token == ',')
get_next_token()
scan_stmt()
n++
scanningParameters.pop()
if (token != ')')
scan_error(") expected")
get_next_token()
list(n)
if DEBUG then console.log "-- scan_function_call_without_function_name end: " + stack[tos-1]
# scan subexpression
scan_subexpr = ->
n = 0
if (token != '(')
scan_error("( expected")
get_next_token()
scan_stmt()
if (token != ')')
scan_error(") expected")
get_next_token()
scan_tensor = ->
n = 0
if (token != '[')
scan_error("[ expected")
get_next_token()
#console.log "scanning the next statement"
scan_stmt()
n = 1
while (token == ',')
get_next_token()
scan_stmt()
n++
#console.log "building tensor with elements number: " + n
build_tensor(n)
if (token != ']')
scan_error("] expected")
get_next_token()
scan_error = (errmsg) ->
errorMessage = ""
# try not to put question mark on orphan line
while (input_str != scan_str)
if ((scanned[input_str] == '\n' || scanned[input_str] == '\r') && input_str + 1 == scan_str)
break
errorMessage += scanned[input_str++]
errorMessage += " ? "
while (scanned[input_str] && (scanned[input_str] != '\n' && scanned[input_str] != '\r'))
errorMessage += scanned[input_str++]
errorMessage += '\n'
stop(errmsg)
# There are n expressions on the stack, possibly tensors.
#
# This function assembles the stack expressions into a single tensor.
#
# For example, at the top level of the expression ((a,b),(c,d)), the vectors
# (a,b) and (c,d) would be on the stack.
# takes an integer
build_tensor = (n) ->
# int i, j, k, ndim, nelem
i = 0
save()
p2 = alloc_tensor(n)
p2.tensor.ndim = 1
p2.tensor.dim[0] = n
for i in [0...n]
p2.tensor.elem[i] = stack[tos-n+i]
check_tensor_dimensions p2
moveTos tos - n
push(p2)
restore()
get_next_token = ->
newline_flag = 0
while (1)
get_token()
if (token != T_NEWLINE)
break
newline_flag = 1
if DEBUG then console.log "get_next_token token: " + token
#if token == ')'
# debugger
get_token = ->
# skip spaces
while (isspace(scanned[scan_str]))
if (scanned[scan_str] == '\n' || scanned[scan_str] == '\r')
token = T_NEWLINE
scan_str++
return
scan_str++
token_str = scan_str
# end of string?
if (scan_str == scanned.length)
token = ""
return
# number?
if (isdigit(scanned[scan_str]) || scanned[scan_str] == '.')
while (isdigit(scanned[scan_str]))
scan_str++
if (scanned[scan_str] == '.')
scan_str++
while (isdigit(scanned[scan_str]))
scan_str++
if (scanned[scan_str] == 'e' && (scanned[scan_str+1] == '+' || scanned[scan_str+1] == '-' || isdigit(scanned[scan_str+1])))
scan_str += 2
while (isdigit(scanned[scan_str]))
scan_str++
token = T_DOUBLE
else
token = T_INTEGER
update_token_buf(token_str, scan_str)
return
# symbol?
if (isalpha(scanned[scan_str]))
while (isalnumorunderscore(scanned[scan_str]))
scan_str++
if (scanned[scan_str] == '(')
token = T_FUNCTION
else
token = T_SYMBOL
update_token_buf(token_str, scan_str)
return
# string ?
if (scanned[scan_str] == '"')
scan_str++
while (scanned[scan_str] != '"')
#if (scan_str == scanned.length || scanned[scan_str] == '\n' || scanned[scan_str] == '\r')
if (scan_str == scanned.length - 1)
scan_str++
scan_error("runaway string")
scan_str--
scan_str++
scan_str++
token = T_STRING
update_token_buf(token_str + 1, scan_str - 1)
return
# comment?
if (scanned[scan_str] == '#' || scanned[scan_str] == '-' && scanned[scan_str+1] == '-')
while (scanned[scan_str] && scanned[scan_str] != '\n' && scanned[scan_str] != '\r')
scan_str++
if (scanned[scan_str])
scan_str++
token = T_NEWLINE
return
# quote-assignment
if (scanned[scan_str] == ':' && scanned[scan_str+1] == '=')
scan_str += 2
token = T_QUOTASSIGN
return
# relational operator?
if (scanned[scan_str] == '=' && scanned[scan_str+1] == '=')
scan_str += 2
token = T_EQ
return
# != operator. It's a little odd because
# "!" is not a "not", which would make things consistent.
# (it's used for factorial).
# An alternative would be to use "<>" but it's not used
# a lot in other languages...
if (scanned[scan_str] == '!' && scanned[scan_str+1] == '=')
scan_str += 2
token = T_NEQ
return
if (scanned[scan_str] == '<' && scanned[scan_str+1] == '=')
scan_str += 2
token = T_LTEQ
return
if (scanned[scan_str] == '>' && scanned[scan_str+1] == '=')
scan_str += 2
token = T_GTEQ
return
# single char token
token = scanned[scan_str++]
# both strings
update_token_buf = (a,b) ->
token_buf = scanned.substring(a,b)
$.scan = scan