Syntactic analysis
Lexical analysis
# A simple scanner:
# takes an input string,
# returns a list of tokens.
class SyntaxError = {}
function syntax_error(s)
return table SyntaxError{value = "Syntax error: {}."%[s]}
end
function scan(s)
a = []
n = len(s)
i = 0
while i<n
if s[i] in "+-*/^(){}[];,"
a.push(s[i])
i+=1
elif s[i].isalpha()
j = i
while i<n and s[i].isalnum()
i+=1
end
a.push(s[j..i-1])
elif s[i].isdigit()
j = i
while i<n and s[i].isdigit()
i+=1
end
a.push(int(s[j..i-1]))
elif s[i].isspace()
i+=1
else
raise syntax_error(
"unexpected character: '{}'"%[s[i]])
end
end
a.push(null)
return a
end
while true
s = input("> ")
try
print(scan(s))
catch e if e: SyntaxError
print(e.value)
end
end
A recursive descent parser
# The function ast(a) is a parser that takes a vector of tokens,
# and returns an abstract syntax tree consisting of S-expressions
# of the form [operator,arg0,arg1,...].
# Formal grammar
# by production rules in EBNF
# atom = number | identifier | "(" expression ")";
# power = atom ["^" negation];
# negation = ["-"] power;
# multiplication = negation {("*" | "/") negation};
# addition = multiplication {("+" | "-") multiplication};
# expression = addition;
# Example input:
# "2*(x+y)^2+4*x".
function expect(a,i)
if a[i] is null
raise syntax_error("unexpected end of input")
else
return a[i]
end
end
function atom(a,i)
t = expect(a,i)
if t: Int
return i+1,a[i]
elif t: String and t[0].isalpha()
return i+1,a[i]
elif t=="("
i,x = expression(a,i+1)
if expect(a,i)!=")"
raise syntax_error(
"expected ')', but got '{}'"%[a[i]])
end
return i+1,x
else
raise syntax_error(
"unexpected symbol: '{}'"%[t])
end
end
function power(a,i)
i,x = atom(a,i)
if a[i]=="^"
i,y = negation(a,i+1)
return i,["^",x,y]
else
return i,x
end
end
function negation(a,i)
if a[i]=="-"
i,x = power(a,i+1)
return i,["~",x]
else
return power(a,i)
end
end
function multiplication(a,i)
i,x = negation(a,i)
op = a[i]
while op=="*" or op=="/"
i,y = negation(a,i+1)
x = [op,x,y]
op = a[i]
end
return i,x
end
function addition(a,i)
i,x = multiplication(a,i)
op = a[i]
while op=="+" or op=="-"
i,y = multiplication(a,i+1)
x = [op,x,y]
op = a[i]
end
return i,x
end
function expression(a,i)
return addition(a,i)
end
function ast(a)
i,t = expression(a,0)
if a[i] is null
return t
else
raise syntax_error(
"unexpected symbol: '{}'"%[a[i]])
end
end
while true
s = input("> ")
try
a = scan(s)
print(ast(a))
catch e if e: SyntaxError
print(e.value)
end
end
A simple evaluator
function eval(t)
if t: Int
return t
elif t: List
op = t[0]
if op=="+"
return eval(t[1])+eval(t[2])
elif op=="-"
return eval(t[1])-eval(t[2])
elif op=="*"
return eval(t[1])*eval(t[2])
elif op=="/"
return eval(t[1])/eval(t[2])
elif op=="^"
return eval(t[1])^eval(t[2])
elif op=="~"
return -eval(t[1])
else
abort("unimplemented")
end
else
abort("unimplemented")
end
end
while true
s = input("> ")
try
a = scan(s)
t = ast(a)
print(eval(t))
catch e if e: SyntaxError
print(e.value)
end
end
# It is possible to use a dispatch table
dispatch = {
"+": |x,y| x+y, "-": |x,y| x-y,
"*": |x,y| x*y, "/": |x,y| x/y,
"^": |x,y| x^y, "~": |x| -x
}
eval = |t| (t if t: Int else
dispatch[t[0]](*t[1..].map(eval)))
Using a parser generator
# The standard library contains a simple parser generator.
use string.syntax: parser, SyntaxError
ast = parser{
scan = {symbols = list("+-*/^()")},
transform = {
atom = "", power_expr = "", neg_expr = "",
add_expr = "", mul_expr = "",
add = "+", sub = "-", mul = "*", div = "/",
power = "^", neg = "~"
},
grammar = "
atom = number | identifier | '(' add_expr ')';
power = atom '^' neg_expr;
power_expr = power | atom;
neg = '-' power_expr;
neg_expr = neg | power_expr;
mul = '*' neg_expr;
div = '/' neg_expr;
mul_expr = neg_expr {mul | div};
add = '+' mul_expr;
sub = '-' mul_expr;
add_expr = mul_expr {add | sub};
ast = add_expr;
"
}
while true
s = input("> ")
try
t = ast(s)
print(t)
catch e if e: SyntaxError
print(e.text)
end
end