--[[ LuaSrcDiet License ------------------ LuaSrcDiet is licensed under the terms of the MIT license reproduced below. This means that LuaSrcDiet is free software and can be used for both academic and commercial purposes at absolutely no cost. Think of LuaSrcDiet as a compiler or a text filter; whatever that is processed by LuaSrcDiet is not affected by its license. It does not add anything new into your source code; it only transforms code that already exist. Hence, there is no need to tag this license onto Lua programs that are only processed. Given the liberal terms of this kind of license, the primary purpose is just to claim authorship of LuaSrcDiet. Parts of LuaSrcDiet is based on Lua 5 code. See the file COPYRIGHT_Lua51 (Lua 5.1.4) for Lua 5's license. =============================================================================== Copyright (C) 2005-2008,2011 Kein-Hong Man Lua 5.1.4 Copyright (C) 1994-2008 Lua.org, PUC-Rio. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. =============================================================================== (end of COPYRIGHT) ]] return loadstring( [=[ --#!/usr/bin/env lua --[[-------------------------------------------------------------------- LuaSrcDiet Compresses Lua source code by removing unnecessary characters. For Lua 5.1.x source code. Copyright (c) 2008,2011 Kein-Hong Man The COPYRIGHT file describes the conditions under which this software may be distributed. ----------------------------------------------------------------------]] --[[-------------------------------------------------------------------- -- NOTES: -- * Remember to update version and date information below (MSG_TITLE) -- * TODO: passing data tables around is a horrific mess -- * TODO: to implement pcall() to properly handle lexer etc. errors -- * TODO: need some automatic testing for a semblance of sanity -- * TODO: the plugin module is highly experimental and unstable ----------------------------------------------------------------------]] -- standard libraries, functions local string = string local math = math local table = table local require = require local print = print local sub = string.sub local gmatch = string.gmatch local match = string.match -- modules incorporated as preload functions follows local preload = package.preload local base = _G local plugin_info = { html = "html generates a HTML file for checking globals", sloc = "sloc calculates SLOC for given source file", } local p_embedded = { 'html', 'sloc', } -- preload function for module llex preload.llex = function() --start of inserted module module "llex" local string = base.require "string" local find = string.find local match = string.match local sub = string.sub ---------------------------------------------------------------------- -- initialize keyword list, variables ---------------------------------------------------------------------- local kw = {} for v in string.gmatch([[ and break do else elseif end false for function if in local nil not or repeat return then true until while]], "%S+") do kw[v] = true end -- see init() for module variables (externally visible): -- tok, seminfo, tokln local z, -- source stream sourceid, -- name of source I, -- position of lexer buff, -- buffer for strings ln -- line number ---------------------------------------------------------------------- -- add information to token listing ---------------------------------------------------------------------- local function addtoken(token, info) local i = #tok + 1 tok[i] = token seminfo[i] = info tokln[i] = ln end ---------------------------------------------------------------------- -- handles line number incrementation and end-of-line characters ---------------------------------------------------------------------- local function inclinenumber(i, is_tok) local sub = sub local old = sub(z, i, i) i = i + 1 -- skip '\n' or '\r' local c = sub(z, i, i) if (c == "\n" or c == "\r") and (c ~= old) then i = i + 1 -- skip '\n\r' or '\r\n' old = old..c end if is_tok then addtoken("TK_EOL", old) end ln = ln + 1 I = i return i end ---------------------------------------------------------------------- -- initialize lexer for given source _z and source name _sourceid ---------------------------------------------------------------------- function init(_z, _sourceid) z = _z -- source sourceid = _sourceid -- name of source I = 1 -- lexer's position in source ln = 1 -- line number tok = {} -- lexed token list* seminfo = {} -- lexed semantic information list* tokln = {} -- line numbers for messages* -- (*) externally visible thru' module -------------------------------------------------------------------- -- initial processing (shbang handling) -------------------------------------------------------------------- local p, _, q, r = find(z, "^(#[^\r\n]*)(\r?\n?)") if p then -- skip first line I = I + #q addtoken("TK_COMMENT", q) if #r > 0 then inclinenumber(I, true) end end end ---------------------------------------------------------------------- -- returns a chunk name or id, no truncation for long names ---------------------------------------------------------------------- function chunkid() if sourceid and match(sourceid, "^[=@]") then return sub(sourceid, 2) -- remove first char end return "[string]" end ---------------------------------------------------------------------- -- formats error message and throws error -- * a simplified version, does not report what token was responsible ---------------------------------------------------------------------- function errorline(s, line) local e = error or base.error e(string.format("%s:%d: %s", chunkid(), line or ln, s)) end local errorline = errorline ------------------------------------------------------------------------ -- count separators ("=") in a long string delimiter ------------------------------------------------------------------------ local function skip_sep(i) local sub = sub local s = sub(z, i, i) i = i + 1 local count = #match(z, "=*", i) i = i + count I = i return (sub(z, i, i) == s) and count or (-count) - 1 end ---------------------------------------------------------------------- -- reads a long string or long comment ---------------------------------------------------------------------- local function read_long_string(is_str, sep) local i = I + 1 -- skip 2nd '[' local sub = sub local c = sub(z, i, i) if c == "\r" or c == "\n" then -- string starts with a newline? i = inclinenumber(i) -- skip it end while true do local p, q, r = find(z, "([\r\n%]])", i) -- (long range match) if not p then errorline(is_str and "unfinished long string" or "unfinished long comment") end i = p if r == "]" then -- delimiter test if skip_sep(i) == sep then buff = sub(z, buff, I) I = I + 1 -- skip 2nd ']' return buff end i = I else -- newline buff = buff.."\n" i = inclinenumber(i) end end--while end ---------------------------------------------------------------------- -- reads a string ---------------------------------------------------------------------- local function read_string(del) local i = I local find = find local sub = sub while true do local p, q, r = find(z, "([\n\r\\\"\'])", i) -- (long range match) if p then if r == "\n" or r == "\r" then errorline("unfinished string") end i = p if r == "\\" then -- handle escapes i = i + 1 r = sub(z, i, i) if r == "" then break end -- (EOZ error) p = find("abfnrtv\n\r", r, 1, true) ------------------------------------------------------ if p then -- special escapes if p > 7 then i = inclinenumber(i) else i = i + 1 end ------------------------------------------------------ elseif find(r, "%D") then -- other non-digits i = i + 1 ------------------------------------------------------ else -- \xxx sequence local p, q, s = find(z, "^(%d%d?%d?)", i) i = q + 1 if s + 1 > 256 then -- UCHAR_MAX errorline("escape sequence too large") end ------------------------------------------------------ end--if p else i = i + 1 if r == del then -- ending delimiter I = i return sub(z, buff, i - 1) -- return string end end--if r else break -- (error) end--if p end--while errorline("unfinished string") end ------------------------------------------------------------------------ -- main lexer function ------------------------------------------------------------------------ function llex() local find = find local match = match while true do--outer local i = I -- inner loop allows break to be used to nicely section tests while true do--inner ---------------------------------------------------------------- local p, _, r = find(z, "^([_%a][_%w]*)", i) if p then I = i + #r if kw[r] then addtoken("TK_KEYWORD", r) -- reserved word (keyword) else addtoken("TK_NAME", r) -- identifier end break -- (continue) end ---------------------------------------------------------------- local p, _, r = find(z, "^(%.?)%d", i) if p then -- numeral if r == "." then i = i + 1 end local _, q, r = find(z, "^%d*[%.%d]*([eE]?)", i) i = q + 1 if #r == 1 then -- optional exponent if match(z, "^[%+%-]", i) then -- optional sign i = i + 1 end end local _, q = find(z, "^[_%w]*", i) I = q + 1 local v = sub(z, p, q) -- string equivalent if not base.tonumber(v) then -- handles hex test also errorline("malformed number") end addtoken("TK_NUMBER", v) break -- (continue) end ---------------------------------------------------------------- local p, q, r, t = find(z, "^((%s)[ \t\v\f]*)", i) if p then if t == "\n" or t == "\r" then -- newline inclinenumber(i, true) else I = q + 1 -- whitespace addtoken("TK_SPACE", r) end break -- (continue) end ---------------------------------------------------------------- local r = match(z, "^%p", i) if r then buff = i local p = find("-[\"\'.=<>~", r, 1, true) if p then -- two-level if block for punctuation/symbols -------------------------------------------------------- if p <= 2 then if p == 1 then -- minus local c = match(z, "^%-%-(%[?)", i) if c then i = i + 2 local sep = -1 if c == "[" then sep = skip_sep(i) end if sep >= 0 then -- long comment addtoken("TK_LCOMMENT", read_long_string(false, sep)) else -- short comment I = find(z, "[\n\r]", i) or (#z + 1) addtoken("TK_COMMENT", sub(z, buff, I - 1)) end break -- (continue) end -- (fall through for "-") else -- [ or long string local sep = skip_sep(i) if sep >= 0 then addtoken("TK_LSTRING", read_long_string(true, sep)) elseif sep == -1 then addtoken("TK_OP", "[") else errorline("invalid long string delimiter") end break -- (continue) end -------------------------------------------------------- elseif p <= 5 then if p < 5 then -- strings I = i + 1 addtoken("TK_STRING", read_string(r)) break -- (continue) end r = match(z, "^%.%.?%.?", i) -- .|..|... dots -- (fall through) -------------------------------------------------------- else -- relational r = match(z, "^%p=?", i) -- (fall through) end end I = i + #r addtoken("TK_OP", r) -- for other symbols, fall through break -- (continue) end ---------------------------------------------------------------- local r = sub(z, i, i) if r ~= "" then I = i + 1 addtoken("TK_OP", r) -- other single-char tokens break end addtoken("TK_EOS", "") -- end of stream, return -- exit here ---------------------------------------------------------------- end--while inner end--while outer end --end of inserted module end -- preload function for module lparser preload.lparser = function() --start of inserted module module "lparser" local string = base.require "string" --[[-------------------------------------------------------------------- -- variable and data structure initialization ----------------------------------------------------------------------]] ---------------------------------------------------------------------- -- initialization: main variables ---------------------------------------------------------------------- local toklist, -- grammar-only token tables (token table, seminfolist, -- semantic information table, line number toklnlist, -- table, cross-reference table) xreflist, tpos, -- token position line, -- start line # for error messages lastln, -- last line # for ambiguous syntax chk tok, seminfo, ln, xref, -- token, semantic info, line nameref, -- proper position of token fs, -- current function state top_fs, -- top-level function state globalinfo, -- global variable information table globallookup, -- global variable name lookup table localinfo, -- local variable information table ilocalinfo, -- inactive locals (prior to activation) ilocalrefs, -- corresponding references to activate statinfo -- statements labeled by type -- forward references for local functions local explist1, expr, block, exp1, body, chunk ---------------------------------------------------------------------- -- initialization: data structures ---------------------------------------------------------------------- local gmatch = string.gmatch local block_follow = {} -- lookahead check in chunk(), returnstat() for v in gmatch("else elseif end until ", "%S+") do block_follow[v] = true end local binopr_left = {} -- binary operators, left priority local binopr_right = {} -- binary operators, right priority for op, lt, rt in gmatch([[ {+ 6 6}{- 6 6}{* 7 7}{/ 7 7}{% 7 7} {^ 10 9}{.. 5 4} {~= 3 3}{== 3 3} {< 3 3}{<= 3 3}{> 3 3}{>= 3 3} {and 2 2}{or 1 1} ]], "{(%S+)%s(%d+)%s(%d+)}") do binopr_left[op] = lt + 0 binopr_right[op] = rt + 0 end local unopr = { ["not"] = true, ["-"] = true, ["#"] = true, } -- unary operators local UNARY_PRIORITY = 8 -- priority for unary operators --[[-------------------------------------------------------------------- -- support functions ----------------------------------------------------------------------]] ---------------------------------------------------------------------- -- formats error message and throws error (duplicated from llex) -- * a simplified version, does not report what token was responsible ---------------------------------------------------------------------- local function errorline(s, line) local e = error or base.error e(string.format("(source):%d: %s", line or ln, s)) end ---------------------------------------------------------------------- -- handles incoming token, semantic information pairs -- * NOTE: 'nextt' is named 'next' originally ---------------------------------------------------------------------- -- reads in next token local function nextt() lastln = toklnlist[tpos] tok, seminfo, ln, xref = toklist[tpos], seminfolist[tpos], toklnlist[tpos], xreflist[tpos] tpos = tpos + 1 end -- peek at next token (single lookahead for table constructor) local function lookahead() return toklist[tpos] end ---------------------------------------------------------------------- -- throws a syntax error, or if token expected is not there ---------------------------------------------------------------------- local function syntaxerror(msg) local tok = tok if tok ~= "" and tok ~= "" then if tok == "" then tok = seminfo end tok = "'"..tok.."'" end errorline(msg.." near "..tok) end local function error_expected(token) syntaxerror("'"..token.."' expected") end ---------------------------------------------------------------------- -- tests for a token, returns outcome -- * return value changed to boolean ---------------------------------------------------------------------- local function testnext(c) if tok == c then nextt(); return true end end ---------------------------------------------------------------------- -- check for existence of a token, throws error if not found ---------------------------------------------------------------------- local function check(c) if tok ~= c then error_expected(c) end end ---------------------------------------------------------------------- -- verify existence of a token, then skip it ---------------------------------------------------------------------- local function checknext(c) check(c); nextt() end ---------------------------------------------------------------------- -- throws error if condition not matched ---------------------------------------------------------------------- local function check_condition(c, msg) if not c then syntaxerror(msg) end end ---------------------------------------------------------------------- -- verifies token conditions are met or else throw error ---------------------------------------------------------------------- local function check_match(what, who, where) if not testnext(what) then if where == ln then error_expected(what) else syntaxerror("'"..what.."' expected (to close '"..who.."' at line "..where..")") end end end ---------------------------------------------------------------------- -- expect that token is a name, return the name ---------------------------------------------------------------------- local function str_checkname() check("") local ts = seminfo nameref = xref nextt() return ts end ---------------------------------------------------------------------- -- adds given string s in string pool, sets e as VK ---------------------------------------------------------------------- local function codestring(e, s) e.k = "VK" end ---------------------------------------------------------------------- -- consume a name token, adds it to string pool ---------------------------------------------------------------------- local function checkname(e) codestring(e, str_checkname()) end --[[-------------------------------------------------------------------- -- variable (global|local|upvalue) handling -- * to track locals and globals, variable management code needed -- * entry point is singlevar() for variable lookups -- * lookup tables (bl.locallist) are maintained awkwardly in the basic -- block data structures, PLUS the function data structure (this is -- an inelegant hack, since bl is nil for the top level of a function) ----------------------------------------------------------------------]] ---------------------------------------------------------------------- -- register a local variable, create local variable object, set in -- to-activate variable list -- * used in new_localvarliteral(), parlist(), fornum(), forlist(), -- localfunc(), localstat() ---------------------------------------------------------------------- local function new_localvar(name, special) local bl = fs.bl local locallist -- locate locallist in current block object or function root object if bl then locallist = bl.locallist else locallist = fs.locallist end -- build local variable information object and set localinfo local id = #localinfo + 1 localinfo[id] = { -- new local variable object name = name, -- local variable name xref = { nameref }, -- xref, first value is declaration decl = nameref, -- location of declaration, = xref[1] } if special then -- "self" must be not be changed localinfo[id].isself = true end -- this can override a local with the same name in the same scope -- but first, keep it inactive until it gets activated local i = #ilocalinfo + 1 ilocalinfo[i] = id ilocalrefs[i] = locallist end ---------------------------------------------------------------------- -- actually activate the variables so that they are visible -- * remember Lua semantics, e.g. RHS is evaluated first, then LHS -- * used in parlist(), forbody(), localfunc(), localstat(), body() ---------------------------------------------------------------------- local function adjustlocalvars(nvars) local sz = #ilocalinfo -- i goes from left to right, in order of local allocation, because -- of something like: local a,a,a = 1,2,3 which gives a = 3 while nvars > 0 do nvars = nvars - 1 local i = sz - nvars local id = ilocalinfo[i] -- local's id local obj = localinfo[id] local name = obj.name -- name of local obj.act = xref -- set activation location ilocalinfo[i] = nil local locallist = ilocalrefs[i] -- ref to lookup table to update ilocalrefs[i] = nil local existing = locallist[name] -- if existing, remove old first! if existing then -- do not overlap, set special obj = localinfo[existing] -- form of rem, as -id obj.rem = -id end locallist[name] = id -- activate, now visible to Lua end end ---------------------------------------------------------------------- -- remove (deactivate) variables in current scope (before scope exits) -- * zap entire locallist tables since we are not allocating registers -- * used in leaveblock(), close_func() ---------------------------------------------------------------------- local function removevars() local bl = fs.bl local locallist -- locate locallist in current block object or function root object if bl then locallist = bl.locallist else locallist = fs.locallist end -- enumerate the local list at current scope and deactivate 'em for name, id in base.pairs(locallist) do local obj = localinfo[id] obj.rem = xref -- set deactivation location end end ---------------------------------------------------------------------- -- creates a new local variable given a name -- * skips internal locals (those starting with '('), so internal -- locals never needs a corresponding adjustlocalvars() call -- * special is true for "self" which must not be optimized -- * used in fornum(), forlist(), parlist(), body() ---------------------------------------------------------------------- local function new_localvarliteral(name, special) if string.sub(name, 1, 1) == "(" then -- can skip internal locals return end new_localvar(name, special) end ---------------------------------------------------------------------- -- search the local variable namespace of the given fs for a match -- * returns localinfo index -- * used only in singlevaraux() ---------------------------------------------------------------------- local function searchvar(fs, n) local bl = fs.bl local locallist if bl then locallist = bl.locallist while locallist do if locallist[n] then return locallist[n] end -- found bl = bl.prev locallist = bl and bl.locallist end end locallist = fs.locallist return locallist[n] or -1 -- found or not found (-1) end ---------------------------------------------------------------------- -- handle locals, globals and upvalues and related processing -- * search mechanism is recursive, calls itself to search parents -- * used only in singlevar() ---------------------------------------------------------------------- local function singlevaraux(fs, n, var) if fs == nil then -- no more levels? var.k = "VGLOBAL" -- default is global variable return "VGLOBAL" else local v = searchvar(fs, n) -- look up at current level if v >= 0 then var.k = "VLOCAL" var.id = v -- codegen may need to deal with upvalue here return "VLOCAL" else -- not found at current level; try upper one if singlevaraux(fs.prev, n, var) == "VGLOBAL" then return "VGLOBAL" end -- else was LOCAL or UPVAL, handle here var.k = "VUPVAL" -- upvalue in this level return "VUPVAL" end--if v end--if fs end ---------------------------------------------------------------------- -- consume a name token, creates a variable (global|local|upvalue) -- * used in prefixexp(), funcname() ---------------------------------------------------------------------- local function singlevar(v) local name = str_checkname() singlevaraux(fs, name, v) ------------------------------------------------------------------ -- variable tracking ------------------------------------------------------------------ if v.k == "VGLOBAL" then -- if global being accessed, keep track of it by creating an object local id = globallookup[name] if not id then id = #globalinfo + 1 globalinfo[id] = { -- new global variable object name = name, -- global variable name xref = { nameref }, -- xref, first value is declaration } globallookup[name] = id -- remember it else local obj = globalinfo[id].xref obj[#obj + 1] = nameref -- add xref end else -- local/upvalue is being accessed, keep track of it local id = v.id local obj = localinfo[id].xref obj[#obj + 1] = nameref -- add xref end end --[[-------------------------------------------------------------------- -- state management functions with open/close pairs ----------------------------------------------------------------------]] ---------------------------------------------------------------------- -- enters a code unit, initializes elements ---------------------------------------------------------------------- local function enterblock(isbreakable) local bl = {} -- per-block state bl.isbreakable = isbreakable bl.prev = fs.bl bl.locallist = {} fs.bl = bl end ---------------------------------------------------------------------- -- leaves a code unit, close any upvalues ---------------------------------------------------------------------- local function leaveblock() local bl = fs.bl removevars() fs.bl = bl.prev end ---------------------------------------------------------------------- -- opening of a function -- * top_fs is only for anchoring the top fs, so that parser() can -- return it to the caller function along with useful output -- * used in parser() and body() ---------------------------------------------------------------------- local function open_func() local new_fs -- per-function state if not fs then -- top_fs is created early new_fs = top_fs else new_fs = {} end new_fs.prev = fs -- linked list of function states new_fs.bl = nil new_fs.locallist = {} fs = new_fs end ---------------------------------------------------------------------- -- closing of a function -- * used in parser() and body() ---------------------------------------------------------------------- local function close_func() removevars() fs = fs.prev end --[[-------------------------------------------------------------------- -- other parsing functions -- * for table constructor, parameter list, argument list ----------------------------------------------------------------------]] ---------------------------------------------------------------------- -- parse a function name suffix, for function call specifications -- * used in primaryexp(), funcname() ---------------------------------------------------------------------- local function field(v) -- field -> ['.' | ':'] NAME local key = {} nextt() -- skip the dot or colon checkname(key) v.k = "VINDEXED" end ---------------------------------------------------------------------- -- parse a table indexing suffix, for constructors, expressions -- * used in recfield(), primaryexp() ---------------------------------------------------------------------- local function yindex(v) -- index -> '[' expr ']' nextt() -- skip the '[' expr(v) checknext("]") end ---------------------------------------------------------------------- -- parse a table record (hash) field -- * used in constructor() ---------------------------------------------------------------------- local function recfield(cc) -- recfield -> (NAME | '['exp1']') = exp1 local key, val = {}, {} if tok == "" then checkname(key) else-- tok == '[' yindex(key) end checknext("=") expr(val) end ---------------------------------------------------------------------- -- emit a set list instruction if enough elements (LFIELDS_PER_FLUSH) -- * note: retained in this skeleton because it modifies cc.v.k -- * used in constructor() ---------------------------------------------------------------------- local function closelistfield(cc) if cc.v.k == "VVOID" then return end -- there is no list item cc.v.k = "VVOID" end ---------------------------------------------------------------------- -- parse a table list (array) field -- * used in constructor() ---------------------------------------------------------------------- local function listfield(cc) expr(cc.v) end ---------------------------------------------------------------------- -- parse a table constructor -- * used in funcargs(), simpleexp() ---------------------------------------------------------------------- local function constructor(t) -- constructor -> '{' [ field { fieldsep field } [ fieldsep ] ] '}' -- field -> recfield | listfield -- fieldsep -> ',' | ';' local line = ln local cc = {} cc.v = {} cc.t = t t.k = "VRELOCABLE" cc.v.k = "VVOID" checknext("{") repeat if tok == "}" then break end -- closelistfield(cc) here local c = tok if c == "" then -- may be listfields or recfields if lookahead() ~= "=" then -- look ahead: expression? listfield(cc) else recfield(cc) end elseif c == "[" then -- constructor_item -> recfield recfield(cc) else -- constructor_part -> listfield listfield(cc) end until not testnext(",") and not testnext(";") check_match("}", "{", line) -- lastlistfield(cc) here end ---------------------------------------------------------------------- -- parse the arguments (parameters) of a function declaration -- * used in body() ---------------------------------------------------------------------- local function parlist() -- parlist -> [ param { ',' param } ] local nparams = 0 if tok ~= ")" then -- is 'parlist' not empty? repeat local c = tok if c == "" then -- param -> NAME new_localvar(str_checkname()) nparams = nparams + 1 elseif c == "..." then nextt() fs.is_vararg = true else syntaxerror(" or '...' expected") end until fs.is_vararg or not testnext(",") end--if adjustlocalvars(nparams) end ---------------------------------------------------------------------- -- parse the parameters of a function call -- * contrast with parlist(), used in function declarations -- * used in primaryexp() ---------------------------------------------------------------------- local function funcargs(f) local args = {} local line = ln local c = tok if c == "(" then -- funcargs -> '(' [ explist1 ] ')' if line ~= lastln then syntaxerror("ambiguous syntax (function call x new statement)") end nextt() if tok == ")" then -- arg list is empty? args.k = "VVOID" else explist1(args) end check_match(")", "(", line) elseif c == "{" then -- funcargs -> constructor constructor(args) elseif c == "" then -- funcargs -> STRING codestring(args, seminfo) nextt() -- must use 'seminfo' before 'next' else syntaxerror("function arguments expected") return end--if c f.k = "VCALL" end --[[-------------------------------------------------------------------- -- mostly expression functions ----------------------------------------------------------------------]] ---------------------------------------------------------------------- -- parses an expression in parentheses or a single variable -- * used in primaryexp() ---------------------------------------------------------------------- local function prefixexp(v) -- prefixexp -> NAME | '(' expr ')' local c = tok if c == "(" then local line = ln nextt() expr(v) check_match(")", "(", line) elseif c == "" then singlevar(v) else syntaxerror("unexpected symbol") end--if c end ---------------------------------------------------------------------- -- parses a prefixexp (an expression in parentheses or a single -- variable) or a function call specification -- * used in simpleexp(), assignment(), expr_stat() ---------------------------------------------------------------------- local function primaryexp(v) -- primaryexp -> -- prefixexp { '.' NAME | '[' exp ']' | ':' NAME funcargs | funcargs } prefixexp(v) while true do local c = tok if c == "." then -- field field(v) elseif c == "[" then -- '[' exp1 ']' local key = {} yindex(key) elseif c == ":" then -- ':' NAME funcargs local key = {} nextt() checkname(key) funcargs(v) elseif c == "(" or c == "" or c == "{" then -- funcargs funcargs(v) else return end--if c end--while end ---------------------------------------------------------------------- -- parses general expression types, constants handled here -- * used in subexpr() ---------------------------------------------------------------------- local function simpleexp(v) -- simpleexp -> NUMBER | STRING | NIL | TRUE | FALSE | ... | -- constructor | FUNCTION body | primaryexp local c = tok if c == "" then v.k = "VKNUM" elseif c == "" then codestring(v, seminfo) elseif c == "nil" then v.k = "VNIL" elseif c == "true" then v.k = "VTRUE" elseif c == "false" then v.k = "VFALSE" elseif c == "..." then -- vararg check_condition(fs.is_vararg == true, "cannot use '...' outside a vararg function"); v.k = "VVARARG" elseif c == "{" then -- constructor constructor(v) return elseif c == "function" then nextt() body(v, false, ln) return else primaryexp(v) return end--if c nextt() end ------------------------------------------------------------------------ -- Parse subexpressions. Includes handling of unary operators and binary -- operators. A subexpr is given the rhs priority level of the operator -- immediately left of it, if any (limit is -1 if none,) and if a binop -- is found, limit is compared with the lhs priority level of the binop -- in order to determine which executes first. -- * recursively called -- * used in expr() ------------------------------------------------------------------------ local function subexpr(v, limit) -- subexpr -> (simpleexp | unop subexpr) { binop subexpr } -- * where 'binop' is any binary operator with a priority -- higher than 'limit' local op = tok local uop = unopr[op] if uop then nextt() subexpr(v, UNARY_PRIORITY) else simpleexp(v) end -- expand while operators have priorities higher than 'limit' op = tok local binop = binopr_left[op] while binop and binop > limit do local v2 = {} nextt() -- read sub-expression with higher priority local nextop = subexpr(v2, binopr_right[op]) op = nextop binop = binopr_left[op] end return op -- return first untreated operator end ---------------------------------------------------------------------- -- Expression parsing starts here. Function subexpr is entered with the -- left operator (which is non-existent) priority of -1, which is lower -- than all actual operators. Expr information is returned in parm v. -- * used in cond(), explist1(), index(), recfield(), listfield(), -- prefixexp(), while_stat(), exp1() ---------------------------------------------------------------------- -- this is a forward-referenced local function expr(v) -- expr -> subexpr subexpr(v, 0) end --[[-------------------------------------------------------------------- -- third level parsing functions ----------------------------------------------------------------------]] ------------------------------------------------------------------------ -- parse a variable assignment sequence -- * recursively called -- * used in expr_stat() ------------------------------------------------------------------------ local function assignment(v) local e = {} local c = v.v.k check_condition(c == "VLOCAL" or c == "VUPVAL" or c == "VGLOBAL" or c == "VINDEXED", "syntax error") if testnext(",") then -- assignment -> ',' primaryexp assignment local nv = {} -- expdesc nv.v = {} primaryexp(nv.v) -- lparser.c deals with some register usage conflict here assignment(nv) else -- assignment -> '=' explist1 checknext("=") explist1(e) return -- avoid default end e.k = "VNONRELOC" end ---------------------------------------------------------------------- -- parse a for loop body for both versions of the for loop -- * used in fornum(), forlist() ---------------------------------------------------------------------- local function forbody(nvars, isnum) -- forbody -> DO block checknext("do") enterblock(false) -- scope for declared variables adjustlocalvars(nvars) block() leaveblock() -- end of scope for declared variables end ---------------------------------------------------------------------- -- parse a numerical for loop, calls forbody() -- * used in for_stat() ---------------------------------------------------------------------- local function fornum(varname) -- fornum -> NAME = exp1, exp1 [, exp1] DO body local line = line new_localvarliteral("(for index)") new_localvarliteral("(for limit)") new_localvarliteral("(for step)") new_localvar(varname) checknext("=") exp1() -- initial value checknext(",") exp1() -- limit if testnext(",") then exp1() -- optional step else -- default step = 1 end forbody(1, true) end ---------------------------------------------------------------------- -- parse a generic for loop, calls forbody() -- * used in for_stat() ---------------------------------------------------------------------- local function forlist(indexname) -- forlist -> NAME {, NAME} IN explist1 DO body local e = {} -- create control variables new_localvarliteral("(for generator)") new_localvarliteral("(for state)") new_localvarliteral("(for control)") -- create declared variables new_localvar(indexname) local nvars = 1 while testnext(",") do new_localvar(str_checkname()) nvars = nvars + 1 end checknext("in") local line = line explist1(e) forbody(nvars, false) end ---------------------------------------------------------------------- -- parse a function name specification -- * used in func_stat() ---------------------------------------------------------------------- local function funcname(v) -- funcname -> NAME {field} [':' NAME] local needself = false singlevar(v) while tok == "." do field(v) end if tok == ":" then needself = true field(v) end return needself end ---------------------------------------------------------------------- -- parse the single expressions needed in numerical for loops -- * used in fornum() ---------------------------------------------------------------------- -- this is a forward-referenced local function exp1() -- exp1 -> expr local e = {} expr(e) end ---------------------------------------------------------------------- -- parse condition in a repeat statement or an if control structure -- * used in repeat_stat(), test_then_block() ---------------------------------------------------------------------- local function cond() -- cond -> expr local v = {} expr(v) -- read condition end ---------------------------------------------------------------------- -- parse part of an if control structure, including the condition -- * used in if_stat() ---------------------------------------------------------------------- local function test_then_block() -- test_then_block -> [IF | ELSEIF] cond THEN block nextt() -- skip IF or ELSEIF cond() checknext("then") block() -- 'then' part end ---------------------------------------------------------------------- -- parse a local function statement -- * used in local_stat() ---------------------------------------------------------------------- local function localfunc() -- localfunc -> NAME body local v, b = {} new_localvar(str_checkname()) v.k = "VLOCAL" adjustlocalvars(1) body(b, false, ln) end ---------------------------------------------------------------------- -- parse a local variable declaration statement -- * used in local_stat() ---------------------------------------------------------------------- local function localstat() -- localstat -> NAME {',' NAME} ['=' explist1] local nvars = 0 local e = {} repeat new_localvar(str_checkname()) nvars = nvars + 1 until not testnext(",") if testnext("=") then explist1(e) else e.k = "VVOID" end adjustlocalvars(nvars) end ---------------------------------------------------------------------- -- parse a list of comma-separated expressions -- * used in return_stat(), localstat(), funcargs(), assignment(), -- forlist() ---------------------------------------------------------------------- -- this is a forward-referenced local function explist1(e) -- explist1 -> expr { ',' expr } expr(e) while testnext(",") do expr(e) end end ---------------------------------------------------------------------- -- parse function declaration body -- * used in simpleexp(), localfunc(), func_stat() ---------------------------------------------------------------------- -- this is a forward-referenced local function body(e, needself, line) -- body -> '(' parlist ')' chunk END open_func() checknext("(") if needself then new_localvarliteral("self", true) adjustlocalvars(1) end parlist() checknext(")") chunk() check_match("end", "function", line) close_func() end ---------------------------------------------------------------------- -- parse a code block or unit -- * used in do_stat(), while_stat(), forbody(), test_then_block(), -- if_stat() ---------------------------------------------------------------------- -- this is a forward-referenced local function block() -- block -> chunk enterblock(false) chunk() leaveblock() end --[[-------------------------------------------------------------------- -- second level parsing functions, all with '_stat' suffix -- * since they are called via a table lookup, they cannot be local -- functions (a lookup table of local functions might be smaller...) -- * stat() -> *_stat() ----------------------------------------------------------------------]] ---------------------------------------------------------------------- -- initial parsing for a for loop, calls fornum() or forlist() -- * removed 'line' parameter (used to set debug information only) -- * used in stat() ---------------------------------------------------------------------- local function for_stat() -- stat -> for_stat -> FOR (fornum | forlist) END local line = line enterblock(true) -- scope for loop and control variables nextt() -- skip 'for' local varname = str_checkname() -- first variable name local c = tok if c == "=" then fornum(varname) elseif c == "," or c == "in" then forlist(varname) else syntaxerror("'=' or 'in' expected") end check_match("end", "for", line) leaveblock() -- loop scope (`break' jumps to this point) end ---------------------------------------------------------------------- -- parse a while-do control structure, body processed by block() -- * used in stat() ---------------------------------------------------------------------- local function while_stat() -- stat -> while_stat -> WHILE cond DO block END local line = line nextt() -- skip WHILE cond() -- parse condition enterblock(true) checknext("do") block() check_match("end", "while", line) leaveblock() end ---------------------------------------------------------------------- -- parse a repeat-until control structure, body parsed by chunk() -- * originally, repeatstat() calls breakstat() too if there is an -- upvalue in the scope block; nothing is actually lexed, it is -- actually the common code in breakstat() for closing of upvalues -- * used in stat() ---------------------------------------------------------------------- local function repeat_stat() -- stat -> repeat_stat -> REPEAT block UNTIL cond local line = line enterblock(true) -- loop block enterblock(false) -- scope block nextt() -- skip REPEAT chunk() check_match("until", "repeat", line) cond() -- close upvalues at scope level below leaveblock() -- finish scope leaveblock() -- finish loop end ---------------------------------------------------------------------- -- parse an if control structure -- * used in stat() ---------------------------------------------------------------------- local function if_stat() -- stat -> if_stat -> IF cond THEN block -- {ELSEIF cond THEN block} [ELSE block] END local line = line local v = {} test_then_block() -- IF cond THEN block while tok == "elseif" do test_then_block() -- ELSEIF cond THEN block end if tok == "else" then nextt() -- skip ELSE block() -- 'else' part end check_match("end", "if", line) end ---------------------------------------------------------------------- -- parse a return statement -- * used in stat() ---------------------------------------------------------------------- local function return_stat() -- stat -> return_stat -> RETURN explist local e = {} nextt() -- skip RETURN local c = tok if block_follow[c] or c == ";" then -- return no values else explist1(e) -- optional return values end end ---------------------------------------------------------------------- -- parse a break statement -- * used in stat() ---------------------------------------------------------------------- local function break_stat() -- stat -> break_stat -> BREAK local bl = fs.bl nextt() -- skip BREAK while bl and not bl.isbreakable do -- find a breakable block bl = bl.prev end if not bl then syntaxerror("no loop to break") end end ---------------------------------------------------------------------- -- parse a function call with no returns or an assignment statement -- * the struct with .prev is used for name searching in lparse.c, -- so it is retained for now; present in assignment() also -- * used in stat() ---------------------------------------------------------------------- local function expr_stat() local id = tpos - 1 -- stat -> expr_stat -> func | assignment local v = {} v.v = {} primaryexp(v.v) if v.v.k == "VCALL" then -- stat -> func -- call statement uses no results statinfo[id] = "call" else -- stat -> assignment v.prev = nil assignment(v) statinfo[id] = "assign" end end ---------------------------------------------------------------------- -- parse a function statement -- * used in stat() ---------------------------------------------------------------------- local function function_stat() -- stat -> function_stat -> FUNCTION funcname body local line = line local v, b = {}, {} nextt() -- skip FUNCTION local needself = funcname(v) body(b, needself, line) end ---------------------------------------------------------------------- -- parse a simple block enclosed by a DO..END pair -- * used in stat() ---------------------------------------------------------------------- local function do_stat() -- stat -> do_stat -> DO block END local line = line nextt() -- skip DO block() check_match("end", "do", line) end ---------------------------------------------------------------------- -- parse a statement starting with LOCAL -- * used in stat() ---------------------------------------------------------------------- local function local_stat() -- stat -> local_stat -> LOCAL FUNCTION localfunc -- -> LOCAL localstat nextt() -- skip LOCAL if testnext("function") then -- local function? localfunc() else localstat() end end --[[-------------------------------------------------------------------- -- main functions, top level parsing functions -- * accessible functions are: init(lexer), parser() -- * [entry] -> parser() -> chunk() -> stat() ----------------------------------------------------------------------]] ---------------------------------------------------------------------- -- initial parsing for statements, calls '_stat' suffixed functions -- * used in chunk() ---------------------------------------------------------------------- local stat_call = { -- lookup for calls in stat() ["if"] = if_stat, ["while"] = while_stat, ["do"] = do_stat, ["for"] = for_stat, ["repeat"] = repeat_stat, ["function"] = function_stat, ["local"] = local_stat, ["return"] = return_stat, ["break"] = break_stat, } local function stat() -- stat -> if_stat while_stat do_stat for_stat repeat_stat -- function_stat local_stat return_stat break_stat -- expr_stat line = ln -- may be needed for error messages local c = tok local fn = stat_call[c] -- handles: if while do for repeat function local return break if fn then statinfo[tpos - 1] = c fn() -- return or break must be last statement if c == "return" or c == "break" then return true end else expr_stat() end return false end ---------------------------------------------------------------------- -- parse a chunk, which consists of a bunch of statements -- * used in parser(), body(), block(), repeat_stat() ---------------------------------------------------------------------- -- this is a forward-referenced local function chunk() -- chunk -> { stat [';'] } local islast = false while not islast and not block_follow[tok] do islast = stat() testnext(";") end end ---------------------------------------------------------------------- -- performs parsing, returns parsed data structure ---------------------------------------------------------------------- function parser() open_func() fs.is_vararg = true -- main func. is always vararg nextt() -- read first token chunk() check("") close_func() return { -- return everything globalinfo = globalinfo, localinfo = localinfo, statinfo = statinfo, toklist = toklist, seminfolist = seminfolist, toklnlist = toklnlist, xreflist = xreflist, } end ---------------------------------------------------------------------- -- initialization function ---------------------------------------------------------------------- function init(tokorig, seminfoorig, toklnorig) tpos = 1 -- token position top_fs = {} -- reset top level function state ------------------------------------------------------------------ -- set up grammar-only token tables; impedance-matching... -- note that constants returned by the lexer is source-level, so -- for now, fake(!) constant tokens (TK_NUMBER|TK_STRING|TK_LSTRING) ------------------------------------------------------------------ local j = 1 toklist, seminfolist, toklnlist, xreflist = {}, {}, {}, {} for i = 1, #tokorig do local tok = tokorig[i] local yep = true if tok == "TK_KEYWORD" or tok == "TK_OP" then tok = seminfoorig[i] elseif tok == "TK_NAME" then tok = "" seminfolist[j] = seminfoorig[i] elseif tok == "TK_NUMBER" then tok = "" seminfolist[j] = 0 -- fake! elseif tok == "TK_STRING" or tok == "TK_LSTRING" then tok = "" seminfolist[j] = "" -- fake! elseif tok == "TK_EOS" then tok = "" else -- non-grammar tokens; ignore them yep = false end if yep then -- set rest of the information toklist[j] = tok toklnlist[j] = toklnorig[i] xreflist[j] = i j = j + 1 end end--for ------------------------------------------------------------------ -- initialize data structures for variable tracking ------------------------------------------------------------------ globalinfo, globallookup, localinfo = {}, {}, {} ilocalinfo, ilocalrefs = {}, {} statinfo = {} -- experimental end --end of inserted module end -- preload function for module optlex preload.optlex = function() --start of inserted module module "optlex" local string = base.require "string" local match = string.match local sub = string.sub local find = string.find local rep = string.rep local print ------------------------------------------------------------------------ -- variables and data structures ------------------------------------------------------------------------ -- error function, can override by setting own function into module error = base.error warn = {} -- table for warning flags local stoks, sinfos, stoklns -- source lists local is_realtoken = { -- significant (grammar) tokens TK_KEYWORD = true, TK_NAME = true, TK_NUMBER = true, TK_STRING = true, TK_LSTRING = true, TK_OP = true, TK_EOS = true, } local is_faketoken = { -- whitespace (non-grammar) tokens TK_COMMENT = true, TK_LCOMMENT = true, TK_EOL = true, TK_SPACE = true, } local opt_details -- for extra information ------------------------------------------------------------------------ -- true if current token is at the start of a line -- * skips over deleted tokens via recursion ------------------------------------------------------------------------ local function atlinestart(i) local tok = stoks[i - 1] if i <= 1 or tok == "TK_EOL" then return true elseif tok == "" then return atlinestart(i - 1) end return false end ------------------------------------------------------------------------ -- true if current token is at the end of a line -- * skips over deleted tokens via recursion ------------------------------------------------------------------------ local function atlineend(i) local tok = stoks[i + 1] if i >= #stoks or tok == "TK_EOL" or tok == "TK_EOS" then return true elseif tok == "" then return atlineend(i + 1) end return false end ------------------------------------------------------------------------ -- counts comment EOLs inside a long comment -- * in order to keep line numbering, EOLs need to be reinserted ------------------------------------------------------------------------ local function commenteols(lcomment) local sep = #match(lcomment, "^%-%-%[=*%[") local z = sub(lcomment, sep + 1, -(sep - 1)) -- remove delims local i, c = 1, 0 while true do local p, q, r, s = find(z, "([\r\n])([\r\n]?)", i) if not p then break end -- if no matches, done i = p + 1 c = c + 1 if #s > 0 and r ~= s then -- skip CRLF or LFCR i = i + 1 end end return c end ------------------------------------------------------------------------ -- compares two tokens (i, j) and returns the whitespace required -- * see documentation for a reference table of interactions -- * only two grammar/real tokens are being considered -- * if "", no separation is needed -- * if " ", then at least one whitespace (or EOL) is required -- * NOTE: this doesn't work at the start or the end or for EOS! ------------------------------------------------------------------------ local function checkpair(i, j) local match = match local t1, t2 = stoks[i], stoks[j] -------------------------------------------------------------------- if t1 == "TK_STRING" or t1 == "TK_LSTRING" or t2 == "TK_STRING" or t2 == "TK_LSTRING" then return "" -------------------------------------------------------------------- elseif t1 == "TK_OP" or t2 == "TK_OP" then if (t1 == "TK_OP" and (t2 == "TK_KEYWORD" or t2 == "TK_NAME")) or (t2 == "TK_OP" and (t1 == "TK_KEYWORD" or t1 == "TK_NAME")) then return "" end if t1 == "TK_OP" and t2 == "TK_OP" then -- for TK_OP/TK_OP pairs, see notes in technotes.txt local op, op2 = sinfos[i], sinfos[j] if (match(op, "^%.%.?$") and match(op2, "^%.")) or (match(op, "^[~=<>]$") and op2 == "=") or (op == "[" and (op2 == "[" or op2 == "=")) then return " " end return "" end -- "TK_OP" + "TK_NUMBER" case local op = sinfos[i] if t2 == "TK_OP" then op = sinfos[j] end if match(op, "^%.%.?%.?$") then return " " end return "" -------------------------------------------------------------------- else-- "TK_KEYWORD" | "TK_NAME" | "TK_NUMBER" then return " " -------------------------------------------------------------------- end end ------------------------------------------------------------------------ -- repack tokens, removing deletions caused by optimization process ------------------------------------------------------------------------ local function repack_tokens() local dtoks, dinfos, dtoklns = {}, {}, {} local j = 1 for i = 1, #stoks do local tok = stoks[i] if tok ~= "" then dtoks[j], dinfos[j], dtoklns[j] = tok, sinfos[i], stoklns[i] j = j + 1 end end stoks, sinfos, stoklns = dtoks, dinfos, dtoklns end ------------------------------------------------------------------------ -- number optimization -- * optimization using string formatting functions is one way of doing -- this, but here, we consider all cases and handle them separately -- (possibly an idiotic approach...) -- * scientific notation being generated is not in canonical form, this -- may or may not be a bad thing -- * note: intermediate portions need to fit into a normal number range -- * optimizations can be divided based on number patterns: -- * hexadecimal: -- (1) no need to remove leading zeros, just skip to (2) -- (2) convert to integer if size equal or smaller -- * change if equal size -> lose the 'x' to reduce entropy -- (3) number is then processed as an integer -- (4) note: does not make 0[xX] consistent -- * integer: -- (1) note: includes anything with trailing ".", ".0", ... -- (2) remove useless fractional part, if present, e.g. 123.000 -- (3) remove leading zeros, e.g. 000123 -- (4) switch to scientific if shorter, e.g. 123000 -> 123e3 -- * with fraction: -- (1) split into digits dot digits -- (2) if no integer portion, take as zero (can omit later) -- (3) handle degenerate .000 case, after which the fractional part -- must be non-zero (if zero, it's matched as an integer) -- (4) remove trailing zeros for fractional portion -- (5) p.q where p > 0 and q > 0 cannot be shortened any more -- (6) otherwise p == 0 and the form is .q, e.g. .000123 -- (7) if scientific shorter, convert, e.g. .000123 -> 123e-6 -- * scientific: -- (1) split into (digits dot digits) [eE] ([+-] digits) -- (2) if significand has ".", shift it out so it becomes an integer -- (3) if significand is zero, just use zero -- (4) remove leading zeros for significand -- (5) shift out trailing zeros for significand -- (6) examine exponent and determine which format is best: -- integer, with fraction, scientific ------------------------------------------------------------------------ local function do_number(i) local before = sinfos[i] -- 'before' local z = before -- working representation local y -- 'after', if better -------------------------------------------------------------------- if match(z, "^0[xX]") then -- hexadecimal number local v = base.tostring(base.tonumber(z)) if #v <= #z then z = v -- change to integer, AND continue else return -- no change; stick to hex end end -------------------------------------------------------------------- if match(z, "^%d+%.?0*$") then -- integer or has useless frac z = match(z, "^(%d+)%.?0*$") -- int portion only if z + 0 > 0 then z = match(z, "^0*([1-9]%d*)$") -- remove leading zeros local v = #match(z, "0*$") local nv = base.tostring(v) if v > #nv + 1 then -- scientific is shorter z = sub(z, 1, #z - v).."e"..nv end y = z else y = "0" -- basic zero end -------------------------------------------------------------------- elseif not match(z, "[eE]") then -- number with fraction part local p, q = match(z, "^(%d*)%.(%d+)$") -- split if p == "" then p = 0 end -- int part zero if q + 0 == 0 and p == 0 then y = "0" -- degenerate .000 case else -- now, q > 0 holds and p is a number local v = #match(q, "0*$") -- remove trailing zeros if v > 0 then q = sub(q, 1, #q - v) end -- if p > 0, nothing else we can do to simplify p.q case if p + 0 > 0 then y = p.."."..q else y = "."..q -- tentative, e.g. .000123 local v = #match(q, "^0*") -- # leading spaces local w = #q - v -- # significant digits local nv = base.tostring(#q) -- e.g. compare 123e-6 versus .000123 if w + 2 + #nv < 1 + #q then y = sub(q, -w).."e-"..nv end end end -------------------------------------------------------------------- else -- scientific number local sig, ex = match(z, "^([^eE]+)[eE]([%+%-]?%d+)$") ex = base.tonumber(ex) -- if got ".", shift out fractional portion of significand local p, q = match(sig, "^(%d*)%.(%d*)$") if p then ex = ex - #q sig = p..q end if sig + 0 == 0 then y = "0" -- basic zero else local v = #match(sig, "^0*") -- remove leading zeros sig = sub(sig, v + 1) v = #match(sig, "0*$") -- shift out trailing zeros if v > 0 then sig = sub(sig, 1, #sig - v) ex = ex + v end -- examine exponent and determine which format is best local nex = base.tostring(ex) if ex == 0 then -- it's just an integer y = sig elseif ex > 0 and (ex <= 1 + #nex) then -- a number y = sig..rep("0", ex) elseif ex < 0 and (ex >= -#sig) then -- fraction, e.g. .123 v = #sig + ex y = sub(sig, 1, v).."."..sub(sig, v + 1) elseif ex < 0 and (#nex >= -ex - #sig) then -- e.g. compare 1234e-5 versus .01234 -- gives: #sig + 1 + #nex >= 1 + (-ex - #sig) + #sig -- -> #nex >= -ex - #sig v = -ex - #sig y = "."..rep("0", v)..sig else -- non-canonical scientific representation y = sig.."e"..ex end end--if sig end -------------------------------------------------------------------- if y and y ~= sinfos[i] then if opt_details then print(" (line "..stoklns[i]..") "..sinfos[i].." -> "..y) opt_details = opt_details + 1 end sinfos[i] = y end end ------------------------------------------------------------------------ -- string optimization -- * note: works on well-formed strings only! -- * optimizations on characters can be summarized as follows: -- \a\b\f\n\r\t\v -- no change -- \\ -- no change -- \"\' -- depends on delim, other can remove \ -- \[\] -- remove \ -- \ -- general escape, remove \ -- \ -- normalize the EOL only -- \ddd -- if \a\b\f\n\r\t\v, change to latter -- if other < ascii 32, keep ddd but zap leading zeros -- but cannot have following digits -- if >= ascii 32, translate it into the literal, then also -- do escapes for \\,\",\' cases -- -- no change -- * switch delimiters if string becomes shorter ------------------------------------------------------------------------ local function do_string(I) local info = sinfos[I] local delim = sub(info, 1, 1) -- delimiter used local ndelim = (delim == "'") and '"' or "'" -- opposite " <-> ' local z = sub(info, 2, -2) -- actual string local i = 1 local c_delim, c_ndelim = 0, 0 -- "/' counts -------------------------------------------------------------------- while i <= #z do local c = sub(z, i, i) ---------------------------------------------------------------- if c == "\\" then -- escaped stuff local j = i + 1 local d = sub(z, j, j) local p = find("abfnrtv\\\n\r\"\'0123456789", d, 1, true) ------------------------------------------------------------ if not p then -- \ -- remove \ z = sub(z, 1, i - 1)..sub(z, j) i = i + 1 ------------------------------------------------------------ elseif p <= 8 then -- \a\b\f\n\r\t\v\\ i = i + 2 -- no change ------------------------------------------------------------ elseif p <= 10 then -- \ -- normalize EOL local eol = sub(z, j, j + 1) if eol == "\r\n" or eol == "\n\r" then z = sub(z, 1, i).."\n"..sub(z, j + 2) elseif p == 10 then -- \r case z = sub(z, 1, i).."\n"..sub(z, j + 1) end i = i + 2 ------------------------------------------------------------ elseif p <= 12 then -- \"\' -- remove \ for ndelim if d == delim then c_delim = c_delim + 1 i = i + 2 else c_ndelim = c_ndelim + 1 z = sub(z, 1, i - 1)..sub(z, j) i = i + 1 end ------------------------------------------------------------ else -- \ddd -- various steps local s = match(z, "^(%d%d?%d?)", j) j = i + 1 + #s -- skip to location local cv = s + 0 local cc = string.char(cv) local p = find("\a\b\f\n\r\t\v", cc, 1, true) if p then -- special escapes s = "\\"..sub("abfnrtv", p, p) elseif cv < 32 then -- normalized \ddd if match(sub(z, j, j), "%d") then -- if a digit follows, \ddd cannot be shortened s = "\\"..s else s = "\\"..cv end elseif cc == delim then -- \ s = "\\"..cc c_delim = c_delim + 1 elseif cc == "\\" then -- \\ s = "\\\\" else -- literal character s = cc if cc == ndelim then c_ndelim = c_ndelim + 1 end end z = sub(z, 1, i - 1)..s..sub(z, j) i = i + #s ------------------------------------------------------------ end--if p ---------------------------------------------------------------- else-- c ~= "\\" -- -- no change i = i + 1 if c == ndelim then -- count ndelim, for switching delimiters c_ndelim = c_ndelim + 1 end ---------------------------------------------------------------- end--if c end--while -------------------------------------------------------------------- -- switching delimiters, a long-winded derivation: -- (1) delim takes 2+2*c_delim bytes, ndelim takes c_ndelim bytes -- (2) delim becomes c_delim bytes, ndelim becomes 2+2*c_ndelim bytes -- simplifying the condition (1)>(2) --> c_delim > c_ndelim if c_delim > c_ndelim then i = 1 while i <= #z do local p, q, r = find(z, "([\'\"])", i) if not p then break end if r == delim then -- \ -> z = sub(z, 1, p - 2)..sub(z, p) i = p else-- r == ndelim -- -> \ z = sub(z, 1, p - 1).."\\"..sub(z, p) i = p + 2 end end--while delim = ndelim -- actually change delimiters end -------------------------------------------------------------------- z = delim..z..delim if z ~= sinfos[I] then if opt_details then print(" (line "..stoklns[I]..") "..sinfos[I].." -> "..z) opt_details = opt_details + 1 end sinfos[I] = z end end ------------------------------------------------------------------------ -- long string optimization -- * note: warning flagged if trailing whitespace found, not trimmed -- * remove first optional newline -- * normalize embedded newlines -- * reduce '=' separators in delimiters if possible ------------------------------------------------------------------------ local function do_lstring(I) local info = sinfos[I] local delim1 = match(info, "^%[=*%[") -- cut out delimiters local sep = #delim1 local delim2 = sub(info, -sep, -1) local z = sub(info, sep + 1, -(sep + 1)) -- lstring without delims local y = "" local i = 1 -------------------------------------------------------------------- while true do local p, q, r, s = find(z, "([\r\n])([\r\n]?)", i) -- deal with a single line local ln if not p then ln = sub(z, i) elseif p >= i then ln = sub(z, i, p - 1) end if ln ~= "" then -- flag a warning if there are trailing spaces, won't optimize! if match(ln, "%s+$") then warn.LSTRING = "trailing whitespace in long string near line "..stoklns[I] end y = y..ln end if not p then -- done if no more EOLs break end -- deal with line endings, normalize them i = p + 1 if p then if #s > 0 and r ~= s then -- skip CRLF or LFCR i = i + 1 end -- skip first newline, which can be safely deleted if not(i == 1 and i == p) then y = y.."\n" end end end--while -------------------------------------------------------------------- -- handle possible deletion of one or more '=' separators if sep >= 3 then local chk, okay = sep - 1 -- loop to test ending delimiter with less of '=' down to zero while chk >= 2 do local delim = "%]"..rep("=", chk - 2).."%]" if not match(y, delim) then okay = chk end chk = chk - 1 end if okay then -- change delimiters sep = rep("=", okay - 2) delim1, delim2 = "["..sep.."[", "]"..sep.."]" end end -------------------------------------------------------------------- sinfos[I] = delim1..y..delim2 end ------------------------------------------------------------------------ -- long comment optimization -- * note: does not remove first optional newline -- * trim trailing whitespace -- * normalize embedded newlines -- * reduce '=' separators in delimiters if possible ------------------------------------------------------------------------ local function do_lcomment(I) local info = sinfos[I] local delim1 = match(info, "^%-%-%[=*%[") -- cut out delimiters local sep = #delim1 local delim2 = sub(info, -sep, -1) local z = sub(info, sep + 1, -(sep - 1)) -- comment without delims local y = "" local i = 1 -------------------------------------------------------------------- while true do local p, q, r, s = find(z, "([\r\n])([\r\n]?)", i) -- deal with a single line, extract and check trailing whitespace local ln if not p then ln = sub(z, i) elseif p >= i then ln = sub(z, i, p - 1) end if ln ~= "" then -- trim trailing whitespace if non-empty line local ws = match(ln, "%s*$") if #ws > 0 then ln = sub(ln, 1, -(ws + 1)) end y = y..ln end if not p then -- done if no more EOLs break end -- deal with line endings, normalize them i = p + 1 if p then if #s > 0 and r ~= s then -- skip CRLF or LFCR i = i + 1 end y = y.."\n" end end--while -------------------------------------------------------------------- -- handle possible deletion of one or more '=' separators sep = sep - 2 if sep >= 3 then local chk, okay = sep - 1 -- loop to test ending delimiter with less of '=' down to zero while chk >= 2 do local delim = "%]"..rep("=", chk - 2).."%]" if not match(y, delim) then okay = chk end chk = chk - 1 end if okay then -- change delimiters sep = rep("=", okay - 2) delim1, delim2 = "--["..sep.."[", "]"..sep.."]" end end -------------------------------------------------------------------- sinfos[I] = delim1..y..delim2 end ------------------------------------------------------------------------ -- short comment optimization -- * trim trailing whitespace ------------------------------------------------------------------------ local function do_comment(i) local info = sinfos[i] local ws = match(info, "%s*$") -- just look from end of string if #ws > 0 then info = sub(info, 1, -(ws + 1)) -- trim trailing whitespace end sinfos[i] = info end ------------------------------------------------------------------------ -- returns true if string found in long comment -- * this is a feature to keep copyright or license texts ------------------------------------------------------------------------ local function keep_lcomment(opt_keep, info) if not opt_keep then return false end -- option not set local delim1 = match(info, "^%-%-%[=*%[") -- cut out delimiters local sep = #delim1 local delim2 = sub(info, -sep, -1) local z = sub(info, sep + 1, -(sep - 1)) -- comment without delims if find(z, opt_keep, 1, true) then -- try to match return true end end ------------------------------------------------------------------------ -- main entry point -- * currently, lexer processing has 2 passes -- * processing is done on a line-oriented basis, which is easier to -- grok due to the next point... -- * since there are various options that can be enabled or disabled, -- processing is a little messy or convoluted ------------------------------------------------------------------------ function optimize(option, toklist, semlist, toklnlist) -------------------------------------------------------------------- -- set option flags -------------------------------------------------------------------- local opt_comments = option["opt-comments"] local opt_whitespace = option["opt-whitespace"] local opt_emptylines = option["opt-emptylines"] local opt_eols = option["opt-eols"] local opt_strings = option["opt-strings"] local opt_numbers = option["opt-numbers"] local opt_x = option["opt-experimental"] local opt_keep = option.KEEP opt_details = option.DETAILS and 0 -- upvalues for details display print = print or base.print if opt_eols then -- forced settings, otherwise won't work properly opt_comments = true opt_whitespace = true opt_emptylines = true elseif opt_x then opt_whitespace = true end -------------------------------------------------------------------- -- variable initialization -------------------------------------------------------------------- stoks, sinfos, stoklns -- set source lists = toklist, semlist, toklnlist local i = 1 -- token position local tok, info -- current token local prev -- position of last grammar token -- on same line (for TK_SPACE stuff) -------------------------------------------------------------------- -- changes a token, info pair -------------------------------------------------------------------- local function settoken(tok, info, I) I = I or i stoks[I] = tok or "" sinfos[I] = info or "" end -------------------------------------------------------------------- -- experimental optimization for ';' operator -------------------------------------------------------------------- if opt_x then while true do tok, info = stoks[i], sinfos[i] if tok == "TK_EOS" then -- end of stream/pass break elseif tok == "TK_OP" and info == ";" then -- ';' operator found, since it is entirely optional, set it -- as a space to let whitespace optimization do the rest settoken("TK_SPACE", " ") end i = i + 1 end repack_tokens() end -------------------------------------------------------------------- -- processing loop (PASS 1) -------------------------------------------------------------------- i = 1 while true do tok, info = stoks[i], sinfos[i] ---------------------------------------------------------------- local atstart = atlinestart(i) -- set line begin flag if atstart then prev = nil end ---------------------------------------------------------------- if tok == "TK_EOS" then -- end of stream/pass break ---------------------------------------------------------------- elseif tok == "TK_KEYWORD" or -- keywords, identifiers, tok == "TK_NAME" or -- operators tok == "TK_OP" then -- TK_KEYWORD and TK_OP can't be optimized without a big -- optimization framework; it would be more of an optimizing -- compiler, not a source code compressor -- TK_NAME that are locals needs parser to analyze/optimize prev = i ---------------------------------------------------------------- elseif tok == "TK_NUMBER" then -- numbers if opt_numbers then do_number(i) -- optimize end prev = i ---------------------------------------------------------------- elseif tok == "TK_STRING" or -- strings, long strings tok == "TK_LSTRING" then if opt_strings then if tok == "TK_STRING" then do_string(i) -- optimize else do_lstring(i) -- optimize end end prev = i ---------------------------------------------------------------- elseif tok == "TK_COMMENT" then -- short comments if opt_comments then if i == 1 and sub(info, 1, 1) == "#" then -- keep shbang comment, trim whitespace do_comment(i) else -- safe to delete, as a TK_EOL (or TK_EOS) always follows settoken() -- remove entirely end elseif opt_whitespace then -- trim whitespace only do_comment(i) end ---------------------------------------------------------------- elseif tok == "TK_LCOMMENT" then -- long comments if keep_lcomment(opt_keep, info) then ------------------------------------------------------------ -- if --keep, we keep a long comment if is found; -- this is a feature to keep copyright or license texts if opt_whitespace then -- trim whitespace only do_lcomment(i) end prev = i elseif opt_comments then local eols = commenteols(info) ------------------------------------------------------------ -- prepare opt_emptylines case first, if a disposable token -- follows, current one is safe to dump, else keep a space; -- it is implied that the operation is safe for '-', because -- current is a TK_LCOMMENT, and must be separate from a '-' if is_faketoken[stoks[i + 1]] then settoken() -- remove entirely tok = "" else settoken("TK_SPACE", " ") end ------------------------------------------------------------ -- if there are embedded EOLs to keep and opt_emptylines is -- disabled, then switch the token into one or more EOLs if not opt_emptylines and eols > 0 then settoken("TK_EOL", rep("\n", eols)) end ------------------------------------------------------------ -- if optimizing whitespaces, force reinterpretation of the -- token to give a chance for the space to be optimized away if opt_whitespace and tok ~= "" then i = i - 1 -- to reinterpret end ------------------------------------------------------------ else -- disabled case if opt_whitespace then -- trim whitespace only do_lcomment(i) end prev = i end ---------------------------------------------------------------- elseif tok == "TK_EOL" then -- line endings if atstart and opt_emptylines then settoken() -- remove entirely elseif info == "\r\n" or info == "\n\r" then -- normalize the rest of the EOLs for CRLF/LFCR only -- (note that TK_LCOMMENT can change into several EOLs) settoken("TK_EOL", "\n") end ---------------------------------------------------------------- elseif tok == "TK_SPACE" then -- whitespace if opt_whitespace then if atstart or atlineend(i) then -- delete leading and trailing whitespace settoken() -- remove entirely else ------------------------------------------------------------ -- at this point, since leading whitespace have been removed, -- there should be a either a real token or a TK_LCOMMENT -- prior to hitting this whitespace; the TK_LCOMMENT case -- only happens if opt_comments is disabled; so prev ~= nil local ptok = stoks[prev] if ptok == "TK_LCOMMENT" then -- previous TK_LCOMMENT can abut with anything settoken() -- remove entirely else -- prev must be a grammar token; consecutive TK_SPACE -- tokens is impossible when optimizing whitespace local ntok = stoks[i + 1] if is_faketoken[ntok] then -- handle special case where a '-' cannot abut with -- either a short comment or a long comment if (ntok == "TK_COMMENT" or ntok == "TK_LCOMMENT") and ptok == "TK_OP" and sinfos[prev] == "-" then -- keep token else settoken() -- remove entirely end else--is_realtoken -- check a pair of grammar tokens, if can abut, then -- delete space token entirely, otherwise keep one space local s = checkpair(prev, i + 1) if s == "" then settoken() -- remove entirely else settoken("TK_SPACE", " ") end end end ------------------------------------------------------------ end end ---------------------------------------------------------------- else error("unidentified token encountered") end ---------------------------------------------------------------- i = i + 1 end--while repack_tokens() -------------------------------------------------------------------- -- processing loop (PASS 2) -------------------------------------------------------------------- if opt_eols then i = 1 -- aggressive EOL removal only works with most non-grammar tokens -- optimized away because it is a rather simple scheme -- basically -- it just checks 'real' token pairs around EOLs if stoks[1] == "TK_COMMENT" then -- first comment still existing must be shbang, skip whole line i = 3 end while true do tok, info = stoks[i], sinfos[i] -------------------------------------------------------------- if tok == "TK_EOS" then -- end of stream/pass break -------------------------------------------------------------- elseif tok == "TK_EOL" then -- consider each TK_EOL local t1, t2 = stoks[i - 1], stoks[i + 1] if is_realtoken[t1] and is_realtoken[t2] then -- sanity check local s = checkpair(i - 1, i + 1) if s == "" or t2 == "TK_EOS" then settoken() -- remove entirely end end end--if tok -------------------------------------------------------------- i = i + 1 end--while repack_tokens() end -------------------------------------------------------------------- if opt_details and opt_details > 0 then print() end -- spacing return stoks, sinfos, stoklns end --end of inserted module end -- preload function for module optparser preload.optparser = function() --start of inserted module module "optparser" local string = base.require "string" local table = base.require "table" ---------------------------------------------------------------------- -- Letter frequencies for reducing symbol entropy (fixed version) -- * Might help a wee bit when the output file is compressed -- * See Wikipedia: http://en.wikipedia.org/wiki/Letter_frequencies -- * We use letter frequencies according to a Linotype keyboard, plus -- the underscore, and both lower case and upper case letters. -- * The arrangement below (LC, underscore, %d, UC) is arbitrary. -- * This is certainly not optimal, but is quick-and-dirty and the -- process has no significant overhead ---------------------------------------------------------------------- local LETTERS = "etaoinshrdlucmfwypvbgkqjxz_ETAOINSHRDLUCMFWYPVBGKQJXZ" local ALPHANUM = "etaoinshrdlucmfwypvbgkqjxz_0123456789ETAOINSHRDLUCMFWYPVBGKQJXZ" -- names or identifiers that must be skipped -- * the first two lines are for keywords local SKIP_NAME = {} for v in string.gmatch([[ and break do else elseif end false for function if in local nil not or repeat return then true until while self]], "%S+") do SKIP_NAME[v] = true end ------------------------------------------------------------------------ -- variables and data structures ------------------------------------------------------------------------ local toklist, seminfolist, -- token lists (lexer output) tokpar, seminfopar, xrefpar, -- token lists (parser output) globalinfo, localinfo, -- variable information tables statinfo, -- statment type table globaluniq, localuniq, -- unique name tables var_new, -- index of new variable names varlist -- list of output variables ---------------------------------------------------------------------- -- preprocess information table to get lists of unique names ---------------------------------------------------------------------- local function preprocess(infotable) local uniqtable = {} for i = 1, #infotable do -- enumerate info table local obj = infotable[i] local name = obj.name -------------------------------------------------------------------- if not uniqtable[name] then -- not found, start an entry uniqtable[name] = { decl = 0, token = 0, size = 0, } end -------------------------------------------------------------------- local uniq = uniqtable[name] -- count declarations, tokens, size uniq.decl = uniq.decl + 1 local xref = obj.xref local xcount = #xref uniq.token = uniq.token + xcount uniq.size = uniq.size + xcount * #name -------------------------------------------------------------------- if obj.decl then -- if local table, create first,last pairs obj.id = i obj.xcount = xcount if xcount > 1 then -- if ==1, means local never accessed obj.first = xref[2] obj.last = xref[xcount] end -------------------------------------------------------------------- else -- if global table, add a back ref uniq.id = i end -------------------------------------------------------------------- end--for return uniqtable end ---------------------------------------------------------------------- -- calculate actual symbol frequencies, in order to reduce entropy -- * this may help further reduce the size of compressed sources -- * note that since parsing optimizations is put before lexing -- optimizations, the frequency table is not exact! -- * yes, this will miss --keep block comments too... ---------------------------------------------------------------------- local function recalc_for_entropy(option) local byte = string.byte local char = string.char -- table of token classes to accept in calculating symbol frequency local ACCEPT = { TK_KEYWORD = true, TK_NAME = true, TK_NUMBER = true, TK_STRING = true, TK_LSTRING = true, } if not option["opt-comments"] then ACCEPT.TK_COMMENT = true ACCEPT.TK_LCOMMENT = true end -------------------------------------------------------------------- -- create a new table and remove any original locals by filtering -------------------------------------------------------------------- local filtered = {} for i = 1, #toklist do filtered[i] = seminfolist[i] end for i = 1, #localinfo do -- enumerate local info table local obj = localinfo[i] local xref = obj.xref for j = 1, obj.xcount do local p = xref[j] filtered[p] = "" -- remove locals end end -------------------------------------------------------------------- local freq = {} -- reset symbol frequency table for i = 0, 255 do freq[i] = 0 end for i = 1, #toklist do -- gather symbol frequency local tok, info = toklist[i], filtered[i] if ACCEPT[tok] then for j = 1, #info do local c = byte(info, j) freq[c] = freq[c] + 1 end end--if end--for -------------------------------------------------------------------- -- function to re-sort symbols according to actual frequencies -------------------------------------------------------------------- local function resort(symbols) local symlist = {} for i = 1, #symbols do -- prepare table to sort local c = byte(symbols, i) symlist[i] = { c = c, freq = freq[c], } end table.sort(symlist, -- sort selected symbols function(v1, v2) return v1.freq > v2.freq end ) local charlist = {} -- reconstitute the string for i = 1, #symlist do charlist[i] = char(symlist[i].c) end return table.concat(charlist) end -------------------------------------------------------------------- LETTERS = resort(LETTERS) -- change letter arrangement ALPHANUM = resort(ALPHANUM) end ---------------------------------------------------------------------- -- returns a string containing a new local variable name to use, and -- a flag indicating whether it collides with a global variable -- * trapping keywords and other names like 'self' is done elsewhere ---------------------------------------------------------------------- local function new_var_name() local var local cletters, calphanum = #LETTERS, #ALPHANUM local v = var_new if v < cletters then -- single char v = v + 1 var = string.sub(LETTERS, v, v) else -- longer names local range, sz = cletters, 1 -- calculate # chars fit repeat v = v - range range = range * calphanum sz = sz + 1 until range > v local n = v % cletters -- left side cycles faster v = (v - n) / cletters -- do first char first n = n + 1 var = string.sub(LETTERS, n, n) while sz > 1 do local m = v % calphanum v = (v - m) / calphanum m = m + 1 var = var..string.sub(ALPHANUM, m, m) sz = sz - 1 end end var_new = var_new + 1 return var, globaluniq[var] ~= nil end ---------------------------------------------------------------------- -- calculate and print some statistics -- * probably better in main source, put here for now ---------------------------------------------------------------------- local function stats_summary(globaluniq, localuniq, afteruniq, option) local print = print or base.print local fmt = string.format local opt_details = option.DETAILS if option.QUIET then return end local uniq_g , uniq_li, uniq_lo, uniq_ti, uniq_to, -- stats needed decl_g, decl_li, decl_lo, decl_ti, decl_to, token_g, token_li, token_lo, token_ti, token_to, size_g, size_li, size_lo, size_ti, size_to = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 local function avg(c, l) -- safe average function if c == 0 then return 0 end return l / c end -------------------------------------------------------------------- -- collect statistics (note: globals do not have declarations!) -------------------------------------------------------------------- for name, uniq in base.pairs(globaluniq) do uniq_g = uniq_g + 1 token_g = token_g + uniq.token size_g = size_g + uniq.size end for name, uniq in base.pairs(localuniq) do uniq_li = uniq_li + 1 decl_li = decl_li + uniq.decl token_li = token_li + uniq.token size_li = size_li + uniq.size end for name, uniq in base.pairs(afteruniq) do uniq_lo = uniq_lo + 1 decl_lo = decl_lo + uniq.decl token_lo = token_lo + uniq.token size_lo = size_lo + uniq.size end uniq_ti = uniq_g + uniq_li decl_ti = decl_g + decl_li token_ti = token_g + token_li size_ti = size_g + size_li uniq_to = uniq_g + uniq_lo decl_to = decl_g + decl_lo token_to = token_g + token_lo size_to = size_g + size_lo -------------------------------------------------------------------- -- detailed stats: global list -------------------------------------------------------------------- if opt_details then local sorted = {} -- sort table of unique global names by size for name, uniq in base.pairs(globaluniq) do uniq.name = name sorted[#sorted + 1] = uniq end table.sort(sorted, function(v1, v2) return v1.size > v2.size end ) local tabf1, tabf2 = "%8s%8s%10s %s", "%8d%8d%10.2f %s" local hl = string.rep("-", 44) print("*** global variable list (sorted by size) ***\n"..hl) print(fmt(tabf1, "Token", "Input", "Input", "Global")) print(fmt(tabf1, "Count", "Bytes", "Average", "Name")) print(hl) for i = 1, #sorted do local uniq = sorted[i] print(fmt(tabf2, uniq.token, uniq.size, avg(uniq.token, uniq.size), uniq.name)) end print(hl) print(fmt(tabf2, token_g, size_g, avg(token_g, size_g), "TOTAL")) print(hl.."\n") -------------------------------------------------------------------- -- detailed stats: local list -------------------------------------------------------------------- local tabf1, tabf2 = "%8s%8s%8s%10s%8s%10s %s", "%8d%8d%8d%10.2f%8d%10.2f %s" local hl = string.rep("-", 70) print("*** local variable list (sorted by allocation order) ***\n"..hl) print(fmt(tabf1, "Decl.", "Token", "Input", "Input", "Output", "Output", "Global")) print(fmt(tabf1, "Count", "Count", "Bytes", "Average", "Bytes", "Average", "Name")) print(hl) for i = 1, #varlist do -- iterate according to order assigned local name = varlist[i] local uniq = afteruniq[name] local old_t, old_s = 0, 0 for j = 1, #localinfo do -- find corresponding old names and calculate local obj = localinfo[j] if obj.name == name then old_t = old_t + obj.xcount old_s = old_s + obj.xcount * #obj.oldname end end print(fmt(tabf2, uniq.decl, uniq.token, old_s, avg(old_t, old_s), uniq.size, avg(uniq.token, uniq.size), name)) end print(hl) print(fmt(tabf2, decl_lo, token_lo, size_li, avg(token_li, size_li), size_lo, avg(token_lo, size_lo), "TOTAL")) print(hl.."\n") end--if opt_details -------------------------------------------------------------------- -- display output -------------------------------------------------------------------- local tabf1, tabf2 = "%-16s%8s%8s%8s%8s%10s", "%-16s%8d%8d%8d%8d%10.2f" local hl = string.rep("-", 58) print("*** local variable optimization summary ***\n"..hl) print(fmt(tabf1, "Variable", "Unique", "Decl.", "Token", "Size", "Average")) print(fmt(tabf1, "Types", "Names", "Count", "Count", "Bytes", "Bytes")) print(hl) print(fmt(tabf2, "Global", uniq_g, decl_g, token_g, size_g, avg(token_g, size_g))) print(hl) print(fmt(tabf2, "Local (in)", uniq_li, decl_li, token_li, size_li, avg(token_li, size_li))) print(fmt(tabf2, "TOTAL (in)", uniq_ti, decl_ti, token_ti, size_ti, avg(token_ti, size_ti))) print(hl) print(fmt(tabf2, "Local (out)", uniq_lo, decl_lo, token_lo, size_lo, avg(token_lo, size_lo))) print(fmt(tabf2, "TOTAL (out)", uniq_to, decl_to, token_to, size_to, avg(token_to, size_to))) print(hl.."\n") end ---------------------------------------------------------------------- -- delete a token and adjust all relevant tables -- * horribly inefficient... luckily it's an off-line processor -- * currently invalidates globalinfo and localinfo (not updated), -- so any other optimization is done after processing locals -- (of course, we can also lex the source data again...) ---------------------------------------------------------------------- local function del_token(id) if id < 1 or id >= #tokpar then return -- ignore if invalid (id == #tokpar is token) end local i2 = xrefpar[id] -- position in lexer lists local idend, i2end = -- final indices #tokpar, #toklist for i = id + 1, idend do -- shift parser tables tokpar[i - 1] = tokpar[i] seminfopar[i - 1] = seminfopar[i] xrefpar[i - 1] = xrefpar[i] - 1 statinfo[i - 1] = statinfo[i] end tokpar[idend] = nil seminfopar[idend] = nil xrefpar[idend] = nil statinfo[idend] = nil for i = i2 + 1, i2end do -- shift lexer tables toklist[i - 1] = toklist[i] seminfolist[i - 1] = seminfolist[i] end toklist[i2end] = nil seminfolist[i2end] = nil end ---------------------------------------------------------------------- -- experimental optimization for f("string") statements -- * safe to delete parentheses without adding whitespace, as both -- kinds of strings can abut with anything else ---------------------------------------------------------------------- local function optimize_func1() ------------------------------------------------------------------ local function is_strcall(j) -- find f("string") pattern local t1 = tokpar[j + 1] or "" local t2 = tokpar[j + 2] or "" local t3 = tokpar[j + 3] or "" if t1 == "(" and t2 == "" and t3 == ")" then return true end end ------------------------------------------------------------------ local starti = 1 while true do local i, found = starti, false while i <= #tokpar do -- scan for function pattern local id = statinfo[i] if id == "call" and is_strcall(i) then -- found, delete () del_token(i + 1) -- '(' del_token(i + 2) -- ')' (index shifted by -1) found = true starti = i + 2 end i = i + 1 end if not found then break end end end ---------------------------------------------------------------------- -- local variable optimization ---------------------------------------------------------------------- local function optimize_locals(option) var_new = 0 -- reset variable name allocator varlist = {} ------------------------------------------------------------------ -- preprocess global/local tables, handle entropy reduction ------------------------------------------------------------------ globaluniq = preprocess(globalinfo) localuniq = preprocess(localinfo) if option["opt-entropy"] then -- for entropy improvement recalc_for_entropy(option) end ------------------------------------------------------------------ -- build initial declared object table, then sort according to -- token count, this might help assign more tokens to more common -- variable names such as 'e' thus possibly reducing entropy -- * an object knows its localinfo index via its 'id' field -- * special handling for "self" special local (parameter) here ------------------------------------------------------------------ local object = {} for i = 1, #localinfo do object[i] = localinfo[i] end table.sort(object, -- sort largest first function(v1, v2) return v1.xcount > v2.xcount end ) ------------------------------------------------------------------ -- the special "self" function parameters must be preserved -- * the allocator below will never use "self", so it is safe to -- keep those implicit declarations as-is ------------------------------------------------------------------ local temp, j, gotself = {}, 1, false for i = 1, #object do local obj = object[i] if not obj.isself then temp[j] = obj j = j + 1 else gotself = true end end object = temp ------------------------------------------------------------------ -- a simple first-come first-served heuristic name allocator, -- note that this is in no way optimal... -- * each object is a local variable declaration plus existence -- * the aim is to assign short names to as many tokens as possible, -- so the following tries to maximize name reuse -- * note that we preserve sort order ------------------------------------------------------------------ local nobject = #object while nobject > 0 do local varname, gcollide repeat varname, gcollide = new_var_name() -- collect a variable name until not SKIP_NAME[varname] -- skip all special names varlist[#varlist + 1] = varname -- keep a list local oleft = nobject ------------------------------------------------------------------ -- if variable name collides with an existing global, the name -- cannot be used by a local when the name is accessed as a global -- during which the local is alive (between 'act' to 'rem'), so -- we drop objects that collides with the corresponding global ------------------------------------------------------------------ if gcollide then -- find the xref table of the global local gref = globalinfo[globaluniq[varname].id].xref local ngref = #gref -- enumerate for all current objects; all are valid at this point for i = 1, nobject do local obj = object[i] local act, rem = obj.act, obj.rem -- 'live' range of local -- if rem < 0, it is a -id to a local that had the same name -- so follow rem to extend it; does this make sense? while rem < 0 do rem = localinfo[-rem].rem end local drop for j = 1, ngref do local p = gref[j] if p >= act and p <= rem then drop = true end -- in range? end if drop then obj.skip = true oleft = oleft - 1 end end--for end--if gcollide ------------------------------------------------------------------ -- now the first unassigned local (since it's sorted) will be the -- one with the most tokens to rename, so we set this one and then -- eliminate all others that collides, then any locals that left -- can then reuse the same variable name; this is repeated until -- all local declaration that can use this name is assigned -- * the criteria for local-local reuse/collision is: -- A is the local with a name already assigned -- B is the unassigned local under consideration -- => anytime A is accessed, it cannot be when B is 'live' -- => to speed up things, we have first/last accesses noted ------------------------------------------------------------------ while oleft > 0 do local i = 1 while object[i].skip do -- scan for first object i = i + 1 end ------------------------------------------------------------------ -- first object is free for assignment of the variable name -- [first,last] gives the access range for collision checking ------------------------------------------------------------------ oleft = oleft - 1 local obja = object[i] i = i + 1 obja.newname = varname obja.skip = true obja.done = true local first, last = obja.first, obja.last local xref = obja.xref ------------------------------------------------------------------ -- then, scan all the rest and drop those colliding -- if A was never accessed then it'll never collide with anything -- otherwise trivial skip if: -- * B was activated after A's last access (last < act) -- * B was removed before A's first access (first > rem) -- if not, see detailed skip below... ------------------------------------------------------------------ if first and oleft > 0 then -- must have at least 1 access local scanleft = oleft while scanleft > 0 do while object[i].skip do -- next valid object i = i + 1 end scanleft = scanleft - 1 local objb = object[i] i = i + 1 local act, rem = objb.act, objb.rem -- live range of B -- if rem < 0, extend range of rem thru' following local while rem < 0 do rem = localinfo[-rem].rem end -------------------------------------------------------- if not(last < act or first > rem) then -- possible collision -------------------------------------------------------- -- B is activated later than A or at the same statement, -- this means for no collision, A cannot be accessed when B -- is alive, since B overrides A (or is a peer) -------------------------------------------------------- if act >= obja.act then for j = 1, obja.xcount do -- ... then check every access local p = xref[j] if p >= act and p <= rem then -- A accessed when B live! oleft = oleft - 1 objb.skip = true break end end--for -------------------------------------------------------- -- A is activated later than B, this means for no collision, -- A's access is okay since it overrides B, but B's last -- access need to be earlier than A's activation time -------------------------------------------------------- else if objb.last and objb.last >= obja.act then oleft = oleft - 1 objb.skip = true end end end -------------------------------------------------------- if oleft == 0 then break end end end--if first ------------------------------------------------------------------ end--while ------------------------------------------------------------------ -- after assigning all possible locals to one variable name, the -- unassigned locals/objects have the skip field reset and the table -- is compacted, to hopefully reduce iteration time ------------------------------------------------------------------ local temp, j = {}, 1 for i = 1, nobject do local obj = object[i] if not obj.done then obj.skip = false temp[j] = obj j = j + 1 end end object = temp -- new compacted object table nobject = #object -- objects left to process ------------------------------------------------------------------ end--while ------------------------------------------------------------------ -- after assigning all locals with new variable names, we can -- patch in the new names, and reprocess to get 'after' stats ------------------------------------------------------------------ for i = 1, #localinfo do -- enumerate all locals local obj = localinfo[i] local xref = obj.xref if obj.newname then -- if got new name, patch it in for j = 1, obj.xcount do local p = xref[j] -- xrefs indexes the token list seminfolist[p] = obj.newname end obj.name, obj.oldname -- adjust names = obj.newname, obj.name else obj.oldname = obj.name -- for cases like 'self' end end ------------------------------------------------------------------ -- deal with statistics output ------------------------------------------------------------------ if gotself then -- add 'self' to end of list varlist[#varlist + 1] = "self" end local afteruniq = preprocess(localinfo) stats_summary(globaluniq, localuniq, afteruniq, option) end ---------------------------------------------------------------------- -- main entry point ---------------------------------------------------------------------- function optimize(option, _toklist, _seminfolist, xinfo) -- set tables toklist, seminfolist -- from lexer = _toklist, _seminfolist tokpar, seminfopar, xrefpar -- from parser = xinfo.toklist, xinfo.seminfolist, xinfo.xreflist globalinfo, localinfo, statinfo -- from parser = xinfo.globalinfo, xinfo.localinfo, xinfo.statinfo ------------------------------------------------------------------ -- optimize locals ------------------------------------------------------------------ if option["opt-locals"] then optimize_locals(option) end ------------------------------------------------------------------ -- other optimizations ------------------------------------------------------------------ if option["opt-experimental"] then -- experimental optimize_func1() end end --end of inserted module end -- preload function for module equiv preload.equiv = function() --start of inserted module module "equiv" local string = base.require "string" local loadstring = base.loadstring local sub = string.sub local match = string.match local dump = string.dump local byte = string.byte --[[-------------------------------------------------------------------- -- variable and data initialization ----------------------------------------------------------------------]] local is_realtoken = { -- significant (grammar) tokens TK_KEYWORD = true, TK_NAME = true, TK_NUMBER = true, TK_STRING = true, TK_LSTRING = true, TK_OP = true, TK_EOS = true, } local option, llex, warn --[[-------------------------------------------------------------------- -- functions ----------------------------------------------------------------------]] ------------------------------------------------------------------------ -- initialization function ------------------------------------------------------------------------ function init(_option, _llex, _warn) option = _option llex = _llex warn = _warn end ------------------------------------------------------------------------ -- function to build lists containing a 'normal' lexer stream ------------------------------------------------------------------------ local function build_stream(s) llex.init(s) llex.llex() local stok, sseminfo -- source list (with whitespace elements) = llex.tok, llex.seminfo local tok, seminfo -- processed list (real elements only) = {}, {} for i = 1, #stok do local t = stok[i] if is_realtoken[t] then tok[#tok + 1] = t seminfo[#seminfo + 1] = sseminfo[i] end end--for return tok, seminfo end ------------------------------------------------------------------------ -- test source (lexer stream) equivalence ------------------------------------------------------------------------ function source(z, dat) -------------------------------------------------------------------- -- function to return a dumped string for seminfo compares -------------------------------------------------------------------- local function dumpsem(s) local sf = loadstring("return "..s, "z") if sf then return dump(sf) end end -------------------------------------------------------------------- -- mark and optionally report non-equivalence -------------------------------------------------------------------- local function bork(msg) if option.DETAILS then base.print("SRCEQUIV: "..msg) end warn.SRC_EQUIV = true end -------------------------------------------------------------------- -- get lexer streams for both source strings, compare -------------------------------------------------------------------- local tok1, seminfo1 = build_stream(z) -- original local tok2, seminfo2 = build_stream(dat) -- compressed -------------------------------------------------------------------- -- compare shbang lines ignoring EOL -------------------------------------------------------------------- local sh1 = match(z, "^(#[^\r\n]*)") local sh2 = match(dat, "^(#[^\r\n]*)") if sh1 or sh2 then if not sh1 or not sh2 or sh1 ~= sh2 then bork("shbang lines different") end end -------------------------------------------------------------------- -- compare by simple count -------------------------------------------------------------------- if #tok1 ~= #tok2 then bork("count "..#tok1.." "..#tok2) return end -------------------------------------------------------------------- -- compare each element the best we can -------------------------------------------------------------------- for i = 1, #tok1 do local t1, t2 = tok1[i], tok2[i] local s1, s2 = seminfo1[i], seminfo2[i] if t1 ~= t2 then -- by type bork("type ["..i.."] "..t1.." "..t2) break end if t1 == "TK_KEYWORD" or t1 == "TK_NAME" or t1 == "TK_OP" then if t1 == "TK_NAME" and option["opt-locals"] then -- can't compare identifiers of locals that are optimized elseif s1 ~= s2 then -- by semantic info (simple) bork("seminfo ["..i.."] "..t1.." "..s1.." "..s2) break end elseif t1 == "TK_EOS" then -- no seminfo to compare else-- "TK_NUMBER" or "TK_STRING" or "TK_LSTRING" -- compare 'binary' form, so dump a function local s1b,s2b = dumpsem(s1), dumpsem(s2) if not s1b or not s2b or s1b ~= s2b then bork("seminfo ["..i.."] "..t1.." "..s1.." "..s2) break end end end--for -------------------------------------------------------------------- -- successful comparison if end is reached with no borks -------------------------------------------------------------------- end ------------------------------------------------------------------------ -- test binary chunk equivalence ------------------------------------------------------------------------ function binary(z, dat) local TNIL = 0 local TBOOLEAN = 1 local TNUMBER = 3 local TSTRING = 4 -------------------------------------------------------------------- -- mark and optionally report non-equivalence -------------------------------------------------------------------- local function bork(msg) if option.DETAILS then base.print("BINEQUIV: "..msg) end warn.BIN_EQUIV = true end -------------------------------------------------------------------- -- function to remove shbang line so that loadstring runs -------------------------------------------------------------------- local function zap_shbang(s) local shbang = match(s, "^(#[^\r\n]*\r?\n?)") if shbang then -- cut out shbang s = sub(s, #shbang + 1) end return s end -------------------------------------------------------------------- -- attempt to compile, then dump to get binary chunk string -------------------------------------------------------------------- local cz = loadstring(zap_shbang(z), "z") if not cz then bork("failed to compile original sources for binary chunk comparison") return end local cdat = loadstring(zap_shbang(dat), "z") if not cdat then bork("failed to compile compressed result for binary chunk comparison") end -- if loadstring() works, dump assuming string.dump() is error-free local c1 = { i = 1, dat = dump(cz) } c1.len = #c1.dat local c2 = { i = 1, dat = dump(cdat) } c2.len = #c2.dat -------------------------------------------------------------------- -- support functions to handle binary chunk reading -------------------------------------------------------------------- local endian, sz_int, sz_sizet, -- sizes of data types sz_inst, sz_number, getint, getsizet -------------------------------------------------------------------- local function ensure(c, sz) -- check if bytes exist if c.i + sz - 1 > c.len then return end return true end -------------------------------------------------------------------- local function skip(c, sz) -- skip some bytes if not sz then sz = 1 end c.i = c.i + sz end -------------------------------------------------------------------- local function getbyte(c) -- return a byte value local i = c.i if i > c.len then return end local d = sub(c.dat, i, i) c.i = i + 1 return byte(d) end -------------------------------------------------------------------- local function getint_l(c) -- return an int value (little-endian) local n, scale = 0, 1 if not ensure(c, sz_int) then return end for j = 1, sz_int do n = n + scale * getbyte(c) scale = scale * 256 end return n end -------------------------------------------------------------------- local function getint_b(c) -- return an int value (big-endian) local n = 0 if not ensure(c, sz_int) then return end for j = 1, sz_int do n = n * 256 + getbyte(c) end return n end -------------------------------------------------------------------- local function getsizet_l(c) -- return a size_t value (little-endian) local n, scale = 0, 1 if not ensure(c, sz_sizet) then return end for j = 1, sz_sizet do n = n + scale * getbyte(c) scale = scale * 256 end return n end -------------------------------------------------------------------- local function getsizet_b(c) -- return a size_t value (big-endian) local n = 0 if not ensure(c, sz_sizet) then return end for j = 1, sz_sizet do n = n * 256 + getbyte(c) end return n end -------------------------------------------------------------------- local function getblock(c, sz) -- return a block (as a string) local i = c.i local j = i + sz - 1 if j > c.len then return end local d = sub(c.dat, i, j) c.i = i + sz return d end -------------------------------------------------------------------- local function getstring(c) -- return a string local n = getsizet(c) if not n then return end if n == 0 then return "" end return getblock(c, n) end -------------------------------------------------------------------- local function goodbyte(c1, c2) -- compare byte value local b1, b2 = getbyte(c1), getbyte(c2) if not b1 or not b2 or b1 ~= b2 then return end return b1 end -------------------------------------------------------------------- local function badbyte(c1, c2) -- compare byte value local b = goodbyte(c1, c2) if not b then return true end end -------------------------------------------------------------------- local function goodint(c1, c2) -- compare int value local i1, i2 = getint(c1), getint(c2) if not i1 or not i2 or i1 ~= i2 then return end return i1 end -------------------------------------------------------------------- -- recursively-called function to compare function prototypes -------------------------------------------------------------------- local function getfunc(c1, c2) -- source name (ignored) if not getstring(c1) or not getstring(c2) then bork("bad source name"); return end -- linedefined (ignored) if not getint(c1) or not getint(c2) then bork("bad linedefined"); return end -- lastlinedefined (ignored) if not getint(c1) or not getint(c2) then bork("bad lastlinedefined"); return end if not (ensure(c1, 4) and ensure(c2, 4)) then bork("prototype header broken") end -- nups (compared) if badbyte(c1, c2) then bork("bad nups"); return end -- numparams (compared) if badbyte(c1, c2) then bork("bad numparams"); return end -- is_vararg (compared) if badbyte(c1, c2) then bork("bad is_vararg"); return end -- maxstacksize (compared) if badbyte(c1, c2) then bork("bad maxstacksize"); return end -- code (compared) local ncode = goodint(c1, c2) if not ncode then bork("bad ncode"); return end local code1 = getblock(c1, ncode * sz_inst) local code2 = getblock(c2, ncode * sz_inst) if not code1 or not code2 or code1 ~= code2 then bork("bad code block"); return end -- constants (compared) local nconst = goodint(c1, c2) if not nconst then bork("bad nconst"); return end for i = 1, nconst do local ctype = goodbyte(c1, c2) if not ctype then bork("bad const type"); return end if ctype == TBOOLEAN then if badbyte(c1, c2) then bork("bad boolean value"); return end elseif ctype == TNUMBER then local num1 = getblock(c1, sz_number) local num2 = getblock(c2, sz_number) if not num1 or not num2 or num1 ~= num2 then bork("bad number value"); return end elseif ctype == TSTRING then local str1 = getstring(c1) local str2 = getstring(c2) if not str1 or not str2 or str1 ~= str2 then bork("bad string value"); return end end end -- prototypes (compared recursively) local nproto = goodint(c1, c2) if not nproto then bork("bad nproto"); return end for i = 1, nproto do if not getfunc(c1, c2) then bork("bad function prototype"); return end end -- debug information (ignored) -- lineinfo (ignored) local sizelineinfo1 = getint(c1) if not sizelineinfo1 then bork("bad sizelineinfo1"); return end local sizelineinfo2 = getint(c2) if not sizelineinfo2 then bork("bad sizelineinfo2"); return end if not getblock(c1, sizelineinfo1 * sz_int) then bork("bad lineinfo1"); return end if not getblock(c2, sizelineinfo2 * sz_int) then bork("bad lineinfo2"); return end -- locvars (ignored) local sizelocvars1 = getint(c1) if not sizelocvars1 then bork("bad sizelocvars1"); return end local sizelocvars2 = getint(c2) if not sizelocvars2 then bork("bad sizelocvars2"); return end for i = 1, sizelocvars1 do if not getstring(c1) or not getint(c1) or not getint(c1) then bork("bad locvars1"); return end end for i = 1, sizelocvars2 do if not getstring(c2) or not getint(c2) or not getint(c2) then bork("bad locvars2"); return end end -- upvalues (ignored) local sizeupvalues1 = getint(c1) if not sizeupvalues1 then bork("bad sizeupvalues1"); return end local sizeupvalues2 = getint(c2) if not sizeupvalues2 then bork("bad sizeupvalues2"); return end for i = 1, sizeupvalues1 do if not getstring(c1) then bork("bad upvalues1"); return end end for i = 1, sizeupvalues2 do if not getstring(c2) then bork("bad upvalues2"); return end end return true end -------------------------------------------------------------------- -- parse binary chunks to verify equivalence -- * for headers, handle sizes to allow a degree of flexibility -- * assume a valid binary chunk is generated, since it was not -- generated via external means -------------------------------------------------------------------- if not (ensure(c1, 12) and ensure(c2, 12)) then bork("header broken") end skip(c1, 6) -- skip signature(4), version, format endian = getbyte(c1) -- 1 = little endian sz_int = getbyte(c1) -- get data type sizes sz_sizet = getbyte(c1) sz_inst = getbyte(c1) sz_number = getbyte(c1) skip(c1) -- skip integral flag skip(c2, 12) -- skip other header (assume similar) if endian == 1 then -- set for endian sensitive data we need getint = getint_l getsizet = getsizet_l else getint = getint_b getsizet = getsizet_b end getfunc(c1, c2) -- get prototype at root if c1.i ~= c1.len + 1 then bork("inconsistent binary chunk1"); return elseif c2.i ~= c2.len + 1 then bork("inconsistent binary chunk2"); return end -------------------------------------------------------------------- -- successful comparison if end is reached with no borks -------------------------------------------------------------------- end --end of inserted module end -- preload function for module plugin/html preload["plugin/html"] = function() --start of inserted module module "plugin/html" local string = base.require "string" local table = base.require "table" local io = base.require "io" ------------------------------------------------------------------------ -- constants and configuration ------------------------------------------------------------------------ local HTML_EXT = ".html" local ENTITIES = { ["&"] = "&", ["<"] = "<", [">"] = ">", ["'"] = "'", ["\""] = """, } -- simple headers and footers local HEADER = [[ %s
]]
local FOOTER = [[
]] -- for more, please see wikimain.css from the Lua wiki site local STYLESHEET = [[ BODY { background: white; color: navy; } pre.code { color: black; } span.comment { color: #00a000; } span.string { color: #009090; } span.keyword { color: black; font-weight: bold; } span.number { color: #993399; } span.operator { } span.name { } span.global { color: #ff0000; font-weight: bold; } span.local { color: #0000ff; font-weight: bold; } ]] ------------------------------------------------------------------------ -- option handling, plays nice with --quiet option ------------------------------------------------------------------------ local option -- local reference to list of options local srcfl, destfl -- filenames local toklist, seminfolist, toklnlist -- token data local function print(...) -- handle quiet option if option.QUIET then return end base.print(...) end ------------------------------------------------------------------------ -- initialization ------------------------------------------------------------------------ function init(_option, _srcfl, _destfl) option = _option srcfl = _srcfl local extb, exte = string.find(srcfl, "%.[^%.%\\%/]*$") local basename, extension = srcfl, "" if extb and extb > 1 then basename = string.sub(srcfl, 1, extb - 1) extension = string.sub(srcfl, extb, exte) end destfl = basename..HTML_EXT if option.OUTPUT_FILE then destfl = option.OUTPUT_FILE end if srcfl == destfl then base.error("output filename identical to input filename") end end ------------------------------------------------------------------------ -- message display, post-load processing ------------------------------------------------------------------------ function post_load(z) print([[ HTML plugin module for LuaSrcDiet ]]) print("Exporting: "..srcfl.." -> "..destfl.."\n") end ------------------------------------------------------------------------ -- post-lexing processing, can work on lexer table output ------------------------------------------------------------------------ function post_lex(_toklist, _seminfolist, _toklnlist) toklist, seminfolist, toklnlist = _toklist, _seminfolist, _toklnlist end ------------------------------------------------------------------------ -- escape the usual suspects for HTML/XML ------------------------------------------------------------------------ local function do_entities(z) local i = 1 while i <= #z do local c = string.sub(z, i, i) local d = ENTITIES[c] if d then c = d z = string.sub(z, 1, i - 1)..c..string.sub(z, i + 1) end i = i + #c end--while return z end ------------------------------------------------------------------------ -- save source code to file ------------------------------------------------------------------------ local function save_file(fname, dat) local OUTF = io.open(fname, "wb") if not OUTF then base.error("cannot open \""..fname.."\" for writing") end local status = OUTF:write(dat) if not status then base.error("cannot write to \""..fname.."\"") end OUTF:close() end ------------------------------------------------------------------------ -- post-parsing processing, gives globalinfo, localinfo ------------------------------------------------------------------------ function post_parse(globalinfo, localinfo) local html = {} local function add(s) -- html helpers html[#html + 1] = s end local function span(class, s) add(''..s..'') end ---------------------------------------------------------------------- for i = 1, #globalinfo do -- mark global identifiers as TK_GLOBAL local obj = globalinfo[i] local xref = obj.xref for j = 1, #xref do local p = xref[j] toklist[p] = "TK_GLOBAL" end end--for ---------------------------------------------------------------------- for i = 1, #localinfo do -- mark local identifiers as TK_LOCAL local obj = localinfo[i] local xref = obj.xref for j = 1, #xref do local p = xref[j] toklist[p] = "TK_LOCAL" end end--for ---------------------------------------------------------------------- add(string.format(HEADER, -- header and leading stuff do_entities(srcfl), STYLESHEET)) for i = 1, #toklist do -- enumerate token list local tok, info = toklist[i], seminfolist[i] if tok == "TK_KEYWORD" then span("keyword", info) elseif tok == "TK_STRING" or tok == "TK_LSTRING" then span("string", do_entities(info)) elseif tok == "TK_COMMENT" or tok == "TK_LCOMMENT" then span("comment", do_entities(info)) elseif tok == "TK_GLOBAL" then span("global", info) elseif tok == "TK_LOCAL" then span("local", info) elseif tok == "TK_NAME" then span("name", info) elseif tok == "TK_NUMBER" then span("number", info) elseif tok == "TK_OP" then span("operator", do_entities(info)) elseif tok ~= "TK_EOS" then -- TK_EOL, TK_SPACE add(info) end end--for add(FOOTER) save_file(destfl, table.concat(html)) option.EXIT = true end --end of inserted module end -- preload function for module plugin/sloc preload["plugin/sloc"] = function() --start of inserted module module "plugin/sloc" local string = base.require "string" local table = base.require "table" ------------------------------------------------------------------------ -- initialization ------------------------------------------------------------------------ local option -- local reference to list of options local srcfl -- source file name function init(_option, _srcfl, _destfl) option = _option option.QUIET = true srcfl = _srcfl end ------------------------------------------------------------------------ -- splits a block into a table of lines (minus EOLs) ------------------------------------------------------------------------ local function split(blk) local lines = {} local i, nblk = 1, #blk while i <= nblk do local p, q, r, s = string.find(blk, "([\r\n])([\r\n]?)", i) if not p then p = nblk + 1 end lines[#lines + 1] = string.sub(blk, i, p - 1) i = p + 1 if p < nblk and q > p and r ~= s then -- handle Lua-style CRLF, LFCR i = i + 1 end end return lines end ------------------------------------------------------------------------ -- post-lexing processing, can work on lexer table output ------------------------------------------------------------------------ function post_lex(toklist, seminfolist, toklnlist) local lnow, sloc = 0, 0 local function chk(ln) -- if a new line, count it as an SLOC if ln > lnow then -- new line # must be > old line # sloc = sloc + 1; lnow = ln end end for i = 1, #toklist do -- enumerate over all tokens local tok, info, ln = toklist[i], seminfolist[i], toklnlist[i] -------------------------------------------------------------------- if tok == "TK_KEYWORD" or tok == "TK_NAME" or -- significant tok == "TK_NUMBER" or tok == "TK_OP" then chk(ln) -------------------------------------------------------------------- -- Both TK_STRING and TK_LSTRING may be multi-line, hence, a loop -- is needed in order to mark off lines one-by-one. Since llex.lua -- currently returns the line number of the last part of the string, -- we must subtract in order to get the starting line number. -------------------------------------------------------------------- elseif tok == "TK_STRING" then -- possible multi-line local t = split(info) ln = ln - #t + 1 for j = 1, #t do chk(ln); ln = ln + 1 end -------------------------------------------------------------------- elseif tok == "TK_LSTRING" then -- possible multi-line local t = split(info) ln = ln - #t + 1 for j = 1, #t do if t[j] ~= "" then chk(ln) end ln = ln + 1 end -------------------------------------------------------------------- -- other tokens are comments or whitespace and are ignored -------------------------------------------------------------------- end end--for base.print(srcfl..": "..sloc) -- display result option.EXIT = true end --end of inserted module end -- support modules local llex = require "llex" local lparser = require "lparser" local optlex = require "optlex" local optparser = require "optparser" local equiv = require "equiv" local plugin --[[-------------------------------------------------------------------- -- messages and textual data ----------------------------------------------------------------------]] local MSG_TITLE = [[ LuaSrcDiet: Puts your Lua 5.1 source code on a diet Version 0.12.0 (20110913) Copyright (c) 2005-2008,2011 Kein-Hong Man The COPYRIGHT file describes the conditions under which this software may be distributed. ]] local MSG_USAGE = [[ usage: LuaSrcDiet [options] [filenames] example: >LuaSrcDiet myscript.lua -o myscript_.lua options: -v, --version prints version information -h, --help prints usage information -o specify file name to write output -s suffix for output files (default '_') --keep keep block comment with inside --plugin run in plugin/ directory - stop handling arguments (optimization levels) --none all optimizations off (normalizes EOLs only) --basic lexer-based optimizations only --maximum maximize reduction of source (informational) --quiet process files quietly --read-only read file and print token stats only --dump-lexer dump raw tokens from lexer to stdout --dump-parser dump variable tracking tables from parser --details extra info (strings, numbers, locals) features (to disable, insert 'no' prefix like --noopt-comments): %s default settings: %s]] ------------------------------------------------------------------------ -- optimization options, for ease of switching on and off -- * positive to enable optimization, negative (no) to disable -- * these options should follow --opt-* and --noopt-* style for now ------------------------------------------------------------------------ local OPTION = [[ --opt-comments,'remove comments and block comments' --opt-whitespace,'remove whitespace excluding EOLs' --opt-emptylines,'remove empty lines' --opt-eols,'all above, plus remove unnecessary EOLs' --opt-strings,'optimize strings and long strings' --opt-numbers,'optimize numbers' --opt-locals,'optimize local variable names' --opt-entropy,'tries to reduce symbol entropy of locals' --opt-srcequiv,'insist on source (lexer stream) equivalence' --opt-binequiv,'insist on binary chunk equivalence' --opt-experimental,'apply experimental optimizations' ]] -- preset configuration local DEFAULT_CONFIG = [[ --opt-comments --opt-whitespace --opt-emptylines --opt-numbers --opt-locals --opt-srcequiv --opt-binequiv ]] -- override configurations -- * MUST explicitly enable/disable everything for -- total option replacement local BASIC_CONFIG = [[ --opt-comments --opt-whitespace --opt-emptylines --noopt-eols --noopt-strings --noopt-numbers --noopt-locals --noopt-entropy --opt-srcequiv --opt-binequiv ]] local MAXIMUM_CONFIG = [[ --opt-comments --opt-whitespace --opt-emptylines --opt-eols --opt-strings --opt-numbers --opt-locals --opt-entropy --opt-srcequiv --opt-binequiv ]] local NONE_CONFIG = [[ --noopt-comments --noopt-whitespace --noopt-emptylines --noopt-eols --noopt-strings --noopt-numbers --noopt-locals --noopt-entropy --opt-srcequiv --opt-binequiv ]] local DEFAULT_SUFFIX = "_" -- default suffix for file renaming local PLUGIN_SUFFIX = "plugin/" -- relative location of plugins --[[-------------------------------------------------------------------- -- startup and initialize option list handling ----------------------------------------------------------------------]] -- simple error message handler; change to error if traceback wanted local function die(msg) print("LuaSrcDiet (error): "..msg); os.exit(1) end --die = error--DEBUG if not match(_VERSION, "5.1", 1, 1) then -- sanity check die("requires Lua 5.1 to run") end ------------------------------------------------------------------------ -- prepares text for list of optimizations, prepare lookup table ------------------------------------------------------------------------ local MSG_OPTIONS = "" do local WIDTH = 24 local o = {} for op, desc in gmatch(OPTION, "%s*([^,]+),'([^']+)'") do local msg = " "..op msg = msg..string.rep(" ", WIDTH - #msg)..desc.."\n" MSG_OPTIONS = MSG_OPTIONS..msg o[op] = true o["--no"..sub(op, 3)] = true end OPTION = o -- replace OPTION with lookup table end MSG_USAGE = string.format(MSG_USAGE, MSG_OPTIONS, DEFAULT_CONFIG) if p_embedded then -- embedded plugins local EMBED_INFO = "\nembedded plugins:\n" for i = 1, #p_embedded do local p = p_embedded[i] EMBED_INFO = EMBED_INFO.." "..plugin_info[p].."\n" end MSG_USAGE = MSG_USAGE..EMBED_INFO end ------------------------------------------------------------------------ -- global variable initialization, option set handling ------------------------------------------------------------------------ local suffix = DEFAULT_SUFFIX -- file suffix local option = {} -- program options local stat_c, stat_l -- statistics tables -- function to set option lookup table based on a text list of options -- note: additional forced settings for --opt-eols is done in optlex.lua local function set_options(CONFIG) for op in gmatch(CONFIG, "(%-%-%S+)") do if sub(op, 3, 4) == "no" and -- handle negative options OPTION["--"..sub(op, 5)] then option[sub(op, 5)] = false else option[sub(op, 3)] = true end end end --[[-------------------------------------------------------------------- -- support functions ----------------------------------------------------------------------]] -- list of token types, parser-significant types are up to TTYPE_GRAMMAR -- while the rest are not used by parsers; arranged for stats display local TTYPES = { "TK_KEYWORD", "TK_NAME", "TK_NUMBER", -- grammar "TK_STRING", "TK_LSTRING", "TK_OP", "TK_EOS", "TK_COMMENT", "TK_LCOMMENT", -- non-grammar "TK_EOL", "TK_SPACE", } local TTYPE_GRAMMAR = 7 local EOLTYPES = { -- EOL names for token dump ["\n"] = "LF", ["\r"] = "CR", ["\n\r"] = "LFCR", ["\r\n"] = "CRLF", } ------------------------------------------------------------------------ -- read source code from file ------------------------------------------------------------------------ local function load_file(fname) local INF = io.open(fname, "rb") if not INF then die('cannot open "'..fname..'" for reading') end local dat = INF:read("*a") if not dat then die('cannot read from "'..fname..'"') end INF:close() return dat end ------------------------------------------------------------------------ -- save source code to file ------------------------------------------------------------------------ local function save_file(fname, dat) local OUTF = io.open(fname, "wb") if not OUTF then die('cannot open "'..fname..'" for writing') end local status = OUTF:write(dat) if not status then die('cannot write to "'..fname..'"') end OUTF:close() end ------------------------------------------------------------------------ -- functions to deal with statistics ------------------------------------------------------------------------ -- initialize statistics table local function stat_init() stat_c, stat_l = {}, {} for i = 1, #TTYPES do local ttype = TTYPES[i] stat_c[ttype], stat_l[ttype] = 0, 0 end end -- add a token to statistics table local function stat_add(tok, seminfo) stat_c[tok] = stat_c[tok] + 1 stat_l[tok] = stat_l[tok] + #seminfo end -- do totals for statistics table, return average table local function stat_calc() local function avg(c, l) -- safe average function if c == 0 then return 0 end return l / c end local stat_a = {} local c, l = 0, 0 for i = 1, TTYPE_GRAMMAR do -- total grammar tokens local ttype = TTYPES[i] c = c + stat_c[ttype]; l = l + stat_l[ttype] end stat_c.TOTAL_TOK, stat_l.TOTAL_TOK = c, l stat_a.TOTAL_TOK = avg(c, l) c, l = 0, 0 for i = 1, #TTYPES do -- total all tokens local ttype = TTYPES[i] c = c + stat_c[ttype]; l = l + stat_l[ttype] stat_a[ttype] = avg(stat_c[ttype], stat_l[ttype]) end stat_c.TOTAL_ALL, stat_l.TOTAL_ALL = c, l stat_a.TOTAL_ALL = avg(c, l) return stat_a end --[[-------------------------------------------------------------------- -- main tasks ----------------------------------------------------------------------]] ------------------------------------------------------------------------ -- a simple token dumper, minimal translation of seminfo data ------------------------------------------------------------------------ local function dump_tokens(srcfl) -------------------------------------------------------------------- -- load file and process source input into tokens -------------------------------------------------------------------- local z = load_file(srcfl) llex.init(z) llex.llex() local toklist, seminfolist = llex.tok, llex.seminfo -------------------------------------------------------------------- -- display output -------------------------------------------------------------------- for i = 1, #toklist do local tok, seminfo = toklist[i], seminfolist[i] if tok == "TK_OP" and string.byte(seminfo) < 32 then seminfo = "(".. string.byte(seminfo)..")" elseif tok == "TK_EOL" then seminfo = EOLTYPES[seminfo] else seminfo = "'"..seminfo.."'" end print(tok.." "..seminfo) end--for end ---------------------------------------------------------------------- -- parser dump; dump globalinfo and localinfo tables ---------------------------------------------------------------------- local function dump_parser(srcfl) local print = print -------------------------------------------------------------------- -- load file and process source input into tokens -------------------------------------------------------------------- local z = load_file(srcfl) llex.init(z) llex.llex() local toklist, seminfolist, toklnlist = llex.tok, llex.seminfo, llex.tokln -------------------------------------------------------------------- -- do parser optimization here -------------------------------------------------------------------- lparser.init(toklist, seminfolist, toklnlist) local xinfo = lparser.parser() local globalinfo, localinfo = xinfo.globalinfo, xinfo.localinfo -------------------------------------------------------------------- -- display output -------------------------------------------------------------------- local hl = string.rep("-", 72) print("*** Local/Global Variable Tracker Tables ***") print(hl.."\n GLOBALS\n"..hl) -- global tables have a list of xref numbers only for i = 1, #globalinfo do local obj = globalinfo[i] local msg = "("..i..") '"..obj.name.."' -> " local xref = obj.xref for j = 1, #xref do msg = msg..xref[j].." " end print(msg) end -- local tables have xref numbers and a few other special -- numbers that are specially named: decl (declaration xref), -- act (activation xref), rem (removal xref) print(hl.."\n LOCALS (decl=declared act=activated rem=removed)\n"..hl) for i = 1, #localinfo do local obj = localinfo[i] local msg = "("..i..") '"..obj.name.."' decl:"..obj.decl.. " act:"..obj.act.." rem:"..obj.rem if obj.isself then msg = msg.." isself" end msg = msg.." -> " local xref = obj.xref for j = 1, #xref do msg = msg..xref[j].." " end print(msg) end print(hl.."\n") end ------------------------------------------------------------------------ -- reads source file(s) and reports some statistics ------------------------------------------------------------------------ local function read_only(srcfl) local print = print -------------------------------------------------------------------- -- load file and process source input into tokens -------------------------------------------------------------------- local z = load_file(srcfl) llex.init(z) llex.llex() local toklist, seminfolist = llex.tok, llex.seminfo print(MSG_TITLE) print("Statistics for: "..srcfl.."\n") -------------------------------------------------------------------- -- collect statistics -------------------------------------------------------------------- stat_init() for i = 1, #toklist do local tok, seminfo = toklist[i], seminfolist[i] stat_add(tok, seminfo) end--for local stat_a = stat_calc() -------------------------------------------------------------------- -- display output -------------------------------------------------------------------- local fmt = string.format local function figures(tt) return stat_c[tt], stat_l[tt], stat_a[tt] end local tabf1, tabf2 = "%-16s%8s%8s%10s", "%-16s%8d%8d%10.2f" local hl = string.rep("-", 42) print(fmt(tabf1, "Lexical", "Input", "Input", "Input")) print(fmt(tabf1, "Elements", "Count", "Bytes", "Average")) print(hl) for i = 1, #TTYPES do local ttype = TTYPES[i] print(fmt(tabf2, ttype, figures(ttype))) if ttype == "TK_EOS" then print(hl) end end print(hl) print(fmt(tabf2, "Total Elements", figures("TOTAL_ALL"))) print(hl) print(fmt(tabf2, "Total Tokens", figures("TOTAL_TOK"))) print(hl.."\n") end ------------------------------------------------------------------------ -- process source file(s), write output and reports some statistics ------------------------------------------------------------------------ local function process_file(srcfl, destfl) local function print(...) -- handle quiet option if option.QUIET then return end _G.print(...) end if plugin and plugin.init then -- plugin init option.EXIT = false plugin.init(option, srcfl, destfl) if option.EXIT then return end end print(MSG_TITLE) -- title message -------------------------------------------------------------------- -- load file and process source input into tokens -------------------------------------------------------------------- local z = load_file(srcfl) if plugin and plugin.post_load then -- plugin post-load z = plugin.post_load(z) or z if option.EXIT then return end end llex.init(z) llex.llex() local toklist, seminfolist, toklnlist = llex.tok, llex.seminfo, llex.tokln if plugin and plugin.post_lex then -- plugin post-lex plugin.post_lex(toklist, seminfolist, toklnlist) if option.EXIT then return end end -------------------------------------------------------------------- -- collect 'before' statistics -------------------------------------------------------------------- stat_init() for i = 1, #toklist do local tok, seminfo = toklist[i], seminfolist[i] stat_add(tok, seminfo) end--for local stat1_a = stat_calc() local stat1_c, stat1_l = stat_c, stat_l -------------------------------------------------------------------- -- do parser optimization here -------------------------------------------------------------------- optparser.print = print -- hack lparser.init(toklist, seminfolist, toklnlist) local xinfo = lparser.parser() if plugin and plugin.post_parse then -- plugin post-parse plugin.post_parse(xinfo.globalinfo, xinfo.localinfo) if option.EXIT then return end end optparser.optimize(option, toklist, seminfolist, xinfo) if plugin and plugin.post_optparse then -- plugin post-optparse plugin.post_optparse() if option.EXIT then return end end -------------------------------------------------------------------- -- do lexer optimization here, save output file -------------------------------------------------------------------- local warn = optlex.warn -- use this as a general warning lookup optlex.print = print -- hack toklist, seminfolist, toklnlist = optlex.optimize(option, toklist, seminfolist, toklnlist) if plugin and plugin.post_optlex then -- plugin post-optlex plugin.post_optlex(toklist, seminfolist, toklnlist) if option.EXIT then return end end local dat = table.concat(seminfolist) -- depending on options selected, embedded EOLs in long strings and -- long comments may not have been translated to \n, tack a warning if string.find(dat, "\r\n", 1, 1) or string.find(dat, "\n\r", 1, 1) then warn.MIXEDEOL = true end -------------------------------------------------------------------- -- test source and binary chunk equivalence -------------------------------------------------------------------- equiv.init(option, llex, warn) equiv.source(z, dat) equiv.binary(z, dat) local smsg = "before and after lexer streams are NOT equivalent!" local bmsg = "before and after binary chunks are NOT equivalent!" -- for reporting, die if option was selected, else just warn if warn.SRC_EQUIV then if option["opt-srcequiv"] then die(smsg) end else print("*** SRCEQUIV: token streams are sort of equivalent") if option["opt-locals"] then print("(but no identifier comparisons since --opt-locals enabled)") end print() end if warn.BIN_EQUIV then if option["opt-binequiv"] then die(bmsg) end else print("*** BINEQUIV: binary chunks are sort of equivalent") print() end -------------------------------------------------------------------- -- save optimized source stream to output file -------------------------------------------------------------------- save_file(destfl, dat) -------------------------------------------------------------------- -- collect 'after' statistics -------------------------------------------------------------------- stat_init() for i = 1, #toklist do local tok, seminfo = toklist[i], seminfolist[i] stat_add(tok, seminfo) end--for local stat_a = stat_calc() -------------------------------------------------------------------- -- display output -------------------------------------------------------------------- print("Statistics for: "..srcfl.." -> "..destfl.."\n") local fmt = string.format local function figures(tt) return stat1_c[tt], stat1_l[tt], stat1_a[tt], stat_c[tt], stat_l[tt], stat_a[tt] end local tabf1, tabf2 = "%-16s%8s%8s%10s%8s%8s%10s", "%-16s%8d%8d%10.2f%8d%8d%10.2f" local hl = string.rep("-", 68) print("*** lexer-based optimizations summary ***\n"..hl) print(fmt(tabf1, "Lexical", "Input", "Input", "Input", "Output", "Output", "Output")) print(fmt(tabf1, "Elements", "Count", "Bytes", "Average", "Count", "Bytes", "Average")) print(hl) for i = 1, #TTYPES do local ttype = TTYPES[i] print(fmt(tabf2, ttype, figures(ttype))) if ttype == "TK_EOS" then print(hl) end end print(hl) print(fmt(tabf2, "Total Elements", figures("TOTAL_ALL"))) print(hl) print(fmt(tabf2, "Total Tokens", figures("TOTAL_TOK"))) print(hl) -------------------------------------------------------------------- -- report warning flags from optimizing process -------------------------------------------------------------------- if warn.LSTRING then print("* WARNING: "..warn.LSTRING) elseif warn.MIXEDEOL then print("* WARNING: ".."output still contains some CRLF or LFCR line endings") elseif warn.SRC_EQUIV then print("* WARNING: "..smsg) elseif warn.BIN_EQUIV then print("* WARNING: "..bmsg) end print() end --[[-------------------------------------------------------------------- -- main functions ----------------------------------------------------------------------]] local arg = {...} -- program arguments local fspec = {} set_options(DEFAULT_CONFIG) -- set to default options at beginning ------------------------------------------------------------------------ -- per-file handling, ship off to tasks ------------------------------------------------------------------------ local function do_files(fspec) for i = 1, #fspec do local srcfl = fspec[i] local destfl ------------------------------------------------------------------ -- find and replace extension for filenames ------------------------------------------------------------------ local extb, exte = string.find(srcfl, "%.[^%.%\\%/]*$") local basename, extension = srcfl, "" if extb and extb > 1 then basename = sub(srcfl, 1, extb - 1) extension = sub(srcfl, extb, exte) end destfl = basename..suffix..extension if #fspec == 1 and option.OUTPUT_FILE then destfl = option.OUTPUT_FILE end if srcfl == destfl then die("output filename identical to input filename") end ------------------------------------------------------------------ -- perform requested operations ------------------------------------------------------------------ if option.DUMP_LEXER then dump_tokens(srcfl) elseif option.DUMP_PARSER then dump_parser(srcfl) elseif option.READ_ONLY then read_only(srcfl) else process_file(srcfl, destfl) end end--for end ------------------------------------------------------------------------ -- main function (entry point is after this definition) ------------------------------------------------------------------------ local function main() local argn, i = #arg, 1 if argn == 0 then option.HELP = true end -------------------------------------------------------------------- -- handle arguments -------------------------------------------------------------------- while i <= argn do local o, p = arg[i], arg[i + 1] local dash = match(o, "^%-%-?") if dash == "-" then -- single-dash options if o == "-h" then option.HELP = true; break elseif o == "-v" then option.VERSION = true; break elseif o == "-s" then if not p then die("-s option needs suffix specification") end suffix = p i = i + 1 elseif o == "-o" then if not p then die("-o option needs a file name") end option.OUTPUT_FILE = p i = i + 1 elseif o == "-" then break -- ignore rest of args else die("unrecognized option "..o) end elseif dash == "--" then -- double-dash options if o == "--help" then option.HELP = true; break elseif o == "--version" then option.VERSION = true; break elseif o == "--keep" then if not p then die("--keep option needs a string to match for") end option.KEEP = p i = i + 1 elseif o == "--plugin" then if not p then die("--plugin option needs a module name") end if option.PLUGIN then die("only one plugin can be specified") end option.PLUGIN = p plugin = require(PLUGIN_SUFFIX..p) i = i + 1 elseif o == "--quiet" then option.QUIET = true elseif o == "--read-only" then option.READ_ONLY = true elseif o == "--basic" then set_options(BASIC_CONFIG) elseif o == "--maximum" then set_options(MAXIMUM_CONFIG) elseif o == "--none" then set_options(NONE_CONFIG) elseif o == "--dump-lexer" then option.DUMP_LEXER = true elseif o == "--dump-parser" then option.DUMP_PARSER = true elseif o == "--details" then option.DETAILS = true elseif OPTION[o] then -- lookup optimization options set_options(o) else die("unrecognized option "..o) end else fspec[#fspec + 1] = o -- potential filename end i = i + 1 end--while if option.HELP then print(MSG_TITLE..MSG_USAGE); return true elseif option.VERSION then print(MSG_TITLE); return true end if #fspec > 0 then if #fspec > 1 and option.OUTPUT_FILE then die("with -o, only one source file can be specified") end do_files(fspec) return true else die("nothing to do!") end end -- entry point -> main() -> do_files() if not main() then die("Please run with option -h or --help for usage information") end -- end of script ]=], "LuaSrcDiet")