local inf = math.huge local byte, char, sub = string.byte, string.char, string.sub local setmetatable = setmetatable local _ENV = nil local hextbl = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, inf, inf, inf, inf, inf, inf, inf, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, inf, inf, inf, inf, inf, inf, inf, inf, inf, } hextbl.__index = function() return inf end setmetatable(hextbl, hextbl) return function(myerror) local escapetbl = { ['"'] = '"', ['\\'] = '\\', ['/'] = '/', ['b'] = '\b', ['f'] = '\f', ['n'] = '\n', ['r'] = '\r', ['t'] = '\t' } escapetbl.__index = function() myerror("invalid escape sequence") end setmetatable(escapetbl, escapetbl) local surrogateprev = 0 local function subst(ch, rest) local u8 if ch == 'u' then local c1, c2, c3, c4 = byte(rest, 1, 4) -- multiplications should not be lshift since cn may be inf local ucode = hextbl[c1-47] * 0x1000 + hextbl[c2-47] * 0x100 + hextbl[c3-47] * 0x10 + hextbl[c4-47] if ucode == inf then myerror("invalid unicode charcode") end rest = sub(rest, 5) if ucode < 0x80 then -- 1byte u8 = char(ucode) elseif ucode < 0x800 then -- 2byte u8 = char(0xC0 + (ucode >> 6), 0x80 + (ucode & 0x3F)) elseif ucode < 0xD800 or 0xE000 <= ucode then -- 3byte u8 = char(0xE0 + (ucode >> 12), 0x80 + (ucode >> 6 & 0x3F), 0x80 + (ucode & 0x3F)) elseif 0xD800 <= ucode and ucode < 0xDC00 then -- surrogate pair 1st if surrogateprev == 0 then surrogateprev = ucode if rest == '' then return '' end end else -- surrogate pair 2nd if surrogateprev == 0 then surrogateprev = 1 else ucode = 0x10000 + (surrogateprev - 0xD800 << 10) + (ucode - 0xDC00) surrogateprev = 0 u8 = char(0xF0 + (ucode >> 18), 0x80 + (ucode >> 12 & 0x3F), 0x80 + (ucode >> 6 & 0x3F), 0x80 + (ucode & 0x3F)) end end end if surrogateprev ~= 0 then myerror("invalid surrogate pair") end return (u8 or escapetbl[ch]) .. rest end local function surrogateok() return surrogateprev == 0 end return { subst = subst, surrogateok = surrogateok } end