local json = require("json") local lunit = require("lunit") local testutil = require("testutil") local string= require("string") local encode = json.encode -- DECODE NOT 'local' due to requirement for testutil to access it decode = json.decode.getDecoder(false) local error = error module("lunit-strings", lunit.testcase, package.seeall) local function assert_table_equal(expect, t) if type(expect) ~= 'table' then return assert_equal(expect, t) end for k,v in pairs(expect) do if type(k) ~= 'string' and type(k) ~= 'number' and type(k) ~= 'boolean' then error("INVALID expected table key") end local found = t[k] if found == nil then fail(tostring(k) .. " not found but expected") end assert_table_equal(v, t[k]) end for k,v in pairs(t) do if nil == expect[k] then fail(tostring(k) .. " found but not expected") end end end function setup() -- Ensure that the decoder is reset _G["decode"] = json.decode.getDecoder(false) end function test_strict_quotes() local opts = { strings = { strict_quotes = true } } assert_error(function() local decoder = json.decode.getDecoder(opts) decoder("'hello'") end) opts.strings.strict_quotes = false assert_equal("hello", json.decode.getDecoder(opts)("'hello'")) -- Quote test assert_equal("he'\"llo'", json.decode.getDecoder(opts)("'he\\'\"llo\\''")) end local utf16_matches = { -- 1-byte { '"\\u0000"', string.char(0x00) }, { '"\\u007F"', string.char(0x7F) }, -- 2-byte { '"\\u0080"', string.char(0xC2, 0x80) }, { '"\\u00A2"', string.char(0xC2, 0xA2) }, { '"\\u07FF"', string.char(0xDF, 0xBF) }, -- 3-byte { '"\\u0800"', string.char(0xE0, 0xA0, 0x80) }, { '"\\u20AC"', string.char(0xE2, 0x82, 0xAC) }, { '"\\uFEFF"', string.char(0xEF, 0xBB, 0xBF) }, { '"\\uFFFF"', string.char(0xEF, 0xBF, 0xBF) }, -- 4-byte - currently not handled --{ '"\\uD800\\uDC00"', string.char(0xF0, 0x90, 0x80, 0x80) }, --{ '"\\uDBFF\\uDFFF"', string.char(0xF4, 0x8F, 0xBF, 0xBF) } } function test_utf16_decode() for i, v in ipairs(utf16_matches) do -- Test that the default \u decoder outputs UTF8 local num = tostring(i) .. ' ' assert_equal(num .. v[2], num .. json.decode(v[1])) end end local BOM = string.char(0xEF, 0xBB, 0xBF) -- BOM skipping tests - here due to relation to UTF8/16 local BOM_skip_tests = { { BOM .. '"x"', "x" }, { BOM .. '["\\uFFFF",true]', { string.char(0xEF, 0xBF, 0xBF), true } }, -- Other uses of unicode spaces } function test_bom_skip() for i,v in ipairs(BOM_skip_tests) do assert_table_equal(v[2], json.decode(v[1])) end end -- Unicode whitespace codepoints gleaned from unicode.org local WHITESPACES = { "\\u0009", -- \t "\\u000A", -- \n "\\u000B", -- \v "\\u000C", -- \f "\\u000D", -- \r "\\u0020", -- space "\\u0085", "\\u00A0", "\\u1680", "\\u180E", "\\u2000", "\\u2001", "\\u2002", "\\u2003", "\\u2004", "\\u2005", "\\u2006", "\\u2007", "\\u2008", "\\u2009", "\\u200A", "\\u200B", -- addition, zero-width space "\\u2028", "\\u2029", "\\u202F", "\\u205F", "\\u3000", "\\uFEFF" -- Zero-width non-breaking space (BOM) } local inject_ws_values = { "%WS%true", " %WS%'the%WS blob' %WS%", "%WS%{ key: %WS%\"valueMan\",%WS% key2:%WS%4.4}", "%WS%false%WS%" } function test_whitespace_ignore() for _, ws in ipairs(WHITESPACES) do ws = json.decode('"' .. ws .. '"') for _, v in ipairs(inject_ws_values) do v = v:gsub("%%WS%%", ws) assert_true(nil ~= json.decode(v)) end end end function test_u_encoding() local encoder = json.encode.getEncoder() local decoder = json.decode.getDecoder() for i = 0, 255 do local char = string.char(i) assert_equal(char, decoder(encoder(char))) end end function test_x_encoding() local encoder = json.encode.getEncoder({ strings = { xEncode = true } }) local decoder = json.decode.getDecoder() for i = 0, 255 do local char = string.char(i) assert_equal(char, decoder(encoder(char))) end end local multibyte_encoding_values = { -- 2-byte { '"\\u0080"', string.char(0xC2, 0x80) }, { '"\\u00A2"', string.char(0xC2, 0xA2) }, { '"\\u07FF"', string.char(0xDF, 0xBF) }, -- 3-byte { '"\\u0800"', string.char(0xE0, 0xA0, 0x80) }, { '"\\u20AC"', string.char(0xE2, 0x82, 0xAC) }, { '"\\uFEFF"', string.char(0xEF, 0xBB, 0xBF) }, { '"\\uFFFF"', string.char(0xEF, 0xBF, 0xBF) }, -- 4-byte (surrogate pairs) { '"\\uD800\\uDC00"', string.char(0xF0, 0x90, 0x80, 0x80) }, { '"\\uDBFF\\uDFFF"', string.char(0xF4, 0x8F, 0xBF, 0xBF) } } function test_custom_encoding() local function processor(s) return require("utf8_processor").process(s) end local encoder = json.encode.getEncoder({ strings = { processor = processor } }) for i, v in ipairs(multibyte_encoding_values) do local encoded = encoder(v[2]) assert_equal(v[1], encoded, "Failed to encode value using custom encoder") end end function test_strict_decoding() local encoder = json.encode.getEncoder(json.encode.strict) local decoder = json.decode.getDecoder(json.decode.strict) for i = 0, 255 do local char = string.char(i) -- Must wrap character in array due to decoder strict-ness assert_equal(char, decoder(encoder({char}))[1]) end end