Syntactic analysis

Table of contents

  1. Lexical analysis
  2. A recursive descent parser
  3. A simple evaluator
  4. Using a parser generator

Lexical analysis

# A simple scanner:
# takes an input string,
# returns a list of tokens.

class SyntaxError = {}

function syntax_error(s)
   return table SyntaxError{value = "Syntax error: {}."%[s]}
end

function scan(s)
   a = []
   n = len(s)
   i = 0
   while i<n
      if s[i] in "+-*/^(){}[];,"
         a.push(s[i])
         i+=1
      elif s[i].isalpha()
         j = i
         while i<n and s[i].isalnum()
            i+=1
         end
         a.push(s[j..i-1])
      elif s[i].isdigit()
         j = i
         while i<n and s[i].isdigit()
            i+=1
         end
         a.push(int(s[j..i-1]))
      elif s[i].isspace()
         i+=1
      else
         raise syntax_error(
            "unexpected character: '{}'"%[s[i]])
      end
   end
   a.push(null)
   return a
end

while true
   s = input("> ")
   try
      print(scan(s))
   catch e if e: SyntaxError
      print(e.value)
   end
end

A recursive descent parser

# The function ast(a) is a parser that takes a vector of tokens,
# and returns an abstract syntax tree consisting of S-expressions
# of the form [operator,arg0,arg1,...].

# Formal grammar
# by production rules in EBNF

# atom = number | identifier | "(" expression ")";
# power = atom ["^" negation];
# negation = ["-"] power;
# multiplication = negation {("*" | "/") negation};
# addition = multiplication {("+" | "-") multiplication};
# expression = addition;

# Example input:
# "2*(x+y)^2+4*x".


function expect(a,i)
   if a[i] is null
      raise syntax_error("unexpected end of input")
   else
      return a[i]
   end
end

function atom(a,i)
   t = expect(a,i)
   if t: Int
      return i+1,a[i]
   elif t: String and t[0].isalpha()
      return i+1,a[i]
   elif t=="("
      i,x = expression(a,i+1)
      if expect(a,i)!=")"
         raise syntax_error(
            "expected ')', but got '{}'"%[a[i]])
      end
      return i+1,x
   else
      raise syntax_error(
         "unexpected symbol: '{}'"%[t])
   end
end

function power(a,i)
   i,x = atom(a,i)
   if a[i]=="^"
      i,y = negation(a,i+1)
      return i,["^",x,y]
   else
      return i,x
   end
end

function negation(a,i)
   if a[i]=="-"
      i,x = power(a,i+1)
      return i,["~",x]
   else
      return power(a,i)
   end
end

function multiplication(a,i)
   i,x = negation(a,i)
   op = a[i]
   while op=="*" or op=="/"
      i,y = negation(a,i+1)
      x = [op,x,y]
      op = a[i]
   end
   return i,x
end

function addition(a,i)
   i,x = multiplication(a,i)
   op = a[i]
   while op=="+" or op=="-"
      i,y = multiplication(a,i+1)
      x = [op,x,y]
      op = a[i]
   end
   return i,x
end

function expression(a,i)
   return addition(a,i)
end

function ast(a)
   i,t = expression(a,0)
   if a[i] is null
      return t
   else
      raise syntax_error(
         "unexpected symbol: '{}'"%[a[i]])
   end
end

while true
   s = input("> ")
   try
      a = scan(s)
      print(ast(a))
   catch e if e: SyntaxError
      print(e.value)
   end
end

A simple evaluator

function eval(t)
   if t: Int
      return t
   elif t: List
      op = t[0]
      if op=="+"
         return eval(t[1])+eval(t[2])
      elif op=="-"
         return eval(t[1])-eval(t[2])
      elif op=="*"
         return eval(t[1])*eval(t[2])
      elif op=="/"
         return eval(t[1])/eval(t[2])
      elif op=="^"
         return eval(t[1])^eval(t[2])
      elif op=="~"
         return -eval(t[1])
      else
         abort("unimplemented")
      end
   else
      abort("unimplemented")
   end
end

while true
   s = input("> ")
   try
      a = scan(s)
      t = ast(a)
      print(eval(t))
   catch e if e: SyntaxError
      print(e.value)
   end
end


# It is possible to use a dispatch table

dispatch = {
  "+": |x,y| x+y, "-": |x,y| x-y,
  "*": |x,y| x*y, "/": |x,y| x/y,
  "^": |x,y| x^y, "~": |x| -x
}

eval = |t| (t if t: Int else
  dispatch[t[0]](*t[1..].map(eval)))

Using a parser generator

# The standard library contains a simple parser generator.

use string.syntax: parser, SyntaxError

ast = parser{
   scan = {symbols = list("+-*/^()")},
   transform = {
      atom = "", power_expr = "", neg_expr = "",
      add_expr = "", mul_expr = "",
      add = "+", sub = "-", mul = "*", div = "/",
      power = "^", neg = "~"
   },
   grammar = "
      atom = number | identifier | '(' add_expr ')';
      power = atom '^' neg_expr;
      power_expr = power | atom;
      neg = '-' power_expr;
      neg_expr = neg | power_expr;
      mul = '*' neg_expr;
      div = '/' neg_expr;
      mul_expr = neg_expr {mul | div};
      add = '+' mul_expr;
      sub = '-' mul_expr;
      add_expr = mul_expr {add | sub};
      ast = add_expr;
   "
}

while true
   s = input("> ")
   try
      t = ast(s)
      print(t)
   catch e if e: SyntaxError
      print(e.text)
   end
end