--- Parser generator. --

A parser is created by

--
--

p = Parser {grammar}

--
--

and called with

--
--

result = p:parse (start_token, token_list[, -- from])

--
--

where start_token is the non-terminal at which to start parsing -- in the grammar, token_list is a list of tokens of the form

--
--

{ty = "token_type", tok = "token_text"}

--
--

and from is the token in the list from which to start (the -- default value is 1).

--

The output of the parser is a tree, each of whose -- nodes is of the form:

--
--

{ty = symbol, node1 = tree1, -- node2 = tree2, ... [, list]}

--
--

where each nodei is a symbolic name, and -- list is the list of trees returned if the corresponding token was a -- list token.

--

A grammar is a table of rules of the form

--
--

non-terminal = {production1, -- production2, ...}

--
--

plus a special item

--
--

lexemes = Set {"class1", "class2", -- ...}

--
--

Each production gives a form that a non-terminal may take. A -- production has the form

--
--

production = {"token1", "token2", -- ..., [action][,abstract]}

--
--

A production

-- --

Each token may be

-- --

The parse tree for a literal string or lexeme class is the string -- that was matched. The parse tree for a non-terminal is a table of -- the form

--
--

{ty = "non_terminal_name", tree1, -- tree2, ...}

--
--

where the treei are the parse trees for the -- corresponding terminals and non-terminals.

--

An action is of the form

--
--

action = function (tree, token, pos) ... return tree_ -- end

--
--

It is passed the parse tree for the current node, the token list, -- and the current position in the token list, and returns a new parse -- tree.

--

An abstract syntax rule is of the form

--
--

name = {i1, i2, ...}

--
--

where i1, i2, -- ... are numbers. This results in a parse tree of the form

--
--

{ty = "name"; treei1, -- treei2, ...}

--
--

If a production has no abstract syntax rule, the result is the -- parse node for the current node.

--

FIXME: Give lexemes as an extra argument to Parser? --
FIXME: Rename second argument to parse method to "tokens"? --
FIXME: Make start_token an optional argument to parse? (swap with -- token list) and have it default to the first non-terminal?

module ("parser", package.seeall) require "object" Parser = Object {_init = {"grammar"}} --- Parser constructor -- @param grammar parser grammar -- @return parser function Parser:_clone (grammar) local init = table.permute (self._init, grammar) -- Reformat the abstract syntax rules for rname, rule in pairs (init.grammar) do if name ~= "lexemes" then for pnum, prod in ipairs (rule) do local abstract for i, v in pairs (prod) do if type (i) == "string" and i ~= "action" then if abstract then print (prod) die ("more than one abstract rule for " .. rname .. "." .. tostring (pnum)) else if type (v) ~= "table" then die ("bad abstract syntax rule of type " .. type (v)) end abstract = {ty = i, template = v} prod[i] = nil end end end if abstract then prod.abstract = abstract end end end end local object = table.merge (self, init) return setmetatable (object, object) end --- Parse a token list. -- @param start the token at which to start -- @param token the list of tokens -- @param from the index of the token to start from (default: 1) -- @return parse tree function Parser:parse (start, token, from) local grammar = self.grammar -- for consistency and brevity local rule, symbol -- functions called before they are defined -- Try to parse an optional symbol. -- @param sym the symbol being tried -- @param from the index of the token to start from -- @return the resulting parse tree, or false if empty -- @return the index of the first unused token, or false to -- indicate failure local function optional (sym, from) local tree, to = symbol (sym, from) if to then return tree, to else return false, from end end -- Try to parse a list of symbols. -- @param sym the symbol being tried -- @param sep the list separator -- @param from the index of the token to start from -- @return the resulting parse tree, or false if empty -- @return the index of the first unused token, or false to -- indicate failure local function list (sym, sep, from) local tree, to tree, from = symbol (sym, from) local list = {tree} if from == false then return list, false end to = from repeat if sep ~= "" then tree, from = symbol (sep, from) end if from then tree, from = symbol (sym, from) if from then table.insert (list, tree) to = from end end until from == false return list, to end -- Try to parse a given symbol. -- @param sym the symbol being tried -- @param from the index of the token to start from -- @return tree the resulting parse tree, or false if empty -- @return the index of the first unused token, or false to -- indicate failure symbol = function (sym, from) -- declared at the top if string.sub (sym, -4, -1) == "_opt" then -- optional symbol return optional (string.sub (sym, 1, -5), from) elseif string.find (sym, "_list.-$") then -- list local _, _, subsym, sep = string.find (sym, "^(.*)_list_?(.-)$") return list (subsym, sep, from) elseif grammar[sym] then -- non-terminal return rule (sym, from) elseif token[from] and -- not end of token list ((grammar.lexemes[sym] and sym == token[from].ty) or -- lexeme sym == token[from].tok) -- literal terminal then return token[from].tok, from + 1 -- advance to next token else return false, false end end -- Try a production. -- @param name the name of the current rule -- @param prod the production (list of symbols) being tried -- @param from the index of the token to start from -- @return the parse tree (incomplete if to is false) -- @return the index of the first unused token, or false to -- indicate failure local function production (name, prod, from) local tree = {ty = name} local to = from for _, prod in ipairs (prod) do local sym sym, to = symbol (prod, to) if to then table.insert (tree, sym) else return tree, false end end if prod.action then tree = prod.action (tree, token, to) end if prod.abstract then local ntree = {} ntree.ty = prod.abstract.ty for i, n in prod.abstract.template do ntree[i] = tree[n] end tree = ntree end return tree, to end -- Parse according to a particular rule. -- @param name the name of the rule to try -- @param from the index of the token to start from -- @return parse tree -- @return the index of the first unused token, or false to -- indicate failure rule = function (name, from) -- declared at the top local alt = grammar[name] local tree, to for _, alt in ipairs (alt) do tree, to = production (name, alt, from) if to then return tree, to end end return tree, false end return rule (start, 1, from or 1) end