----------------------------------------------------------------------- -- Character set converter for SciTE Exporter -- Version 1.0.1, 20070805 -- -- Copyright 2006-2007 by Kein-Hong Man -- All Rights Reserved -- -- Permission to use, copy, modify, and distribute this software and -- its documentation for any purpose and without fee is hereby granted, -- provided that the above copyright notice appear in all copies and -- that both that copyright notice and this permission notice appear -- in supporting documentation. -- -- KEIN-HONG MAN DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, -- INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN -- NO EVENT SHALL KEIN-HONG MAN BE LIABLE FOR ANY SPECIAL, INDIRECT OR -- CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS -- OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, -- NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION -- WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -- ----------------------------------------------------------------------- -- USAGE -- * First, initialize the charset converter, then the converter -- function is used within the exporter. -- * The default is a pass-through, equivalent to "iso-8859-1" -- * Currently implemented charsets are: -- iso-8859-1 alias: default -- windows-1252 alias: cp1252 -- * Used by UTF-8 targets only: ABW, SXW, ODT ----------------------------------------------------------------------- ----------------------------------------------------------------------- -- a simple check to alert of non-existence of exportutil library ----------------------------------------------------------------------- if not exportutil then error("SciTE_ExportCharset: exportutil not defined") end ----------------------------------------------------------------------- -- exportutil.CharsetFromSet -- -- Sets the character set of the source. ----------------------------------------------------------------------- function exportutil.CharsetFromSet(charset) local aliases = { ["default"] = "iso-8859-1", ["cp1252"] = "windows-1252", } --------------------------------------------------------------- local windows_1252 = { -- Data from apr-iconv. -- 0xFFFE encodings changed to a question mark instead. -- Source value: 0x80 -- 0x20AC, 0xFFFE, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, -- 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFE, 0x017D, 0xFFFE, 8364, 63, 8218, 402, 8222, 8230, 8224, 8225, 710, 8240, 352, 8249, 338, 63, 381, 63, -- Source value: 0x90 -- 0xFFFE, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, -- 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFE, 0x017E, 0x0178, 63, 8216, 8217, 8220, 8221, 8226, 8211, 8212, 732, 8482, 353, 8250, 339, 63, 382, 376, } local converters = { ------------------------------------------------------------- ["iso-8859-1"] = function(c) return c end, ------------------------------------------------------------- ["windows-1252"] = function(c) if c < 128 or c >= 160 then return c end return windows_1252[c - 127] end, ------------------------------------------------------------- } -- check source charset, optionally lookup alias if not converters[charset] then charset = aliases[charset] end -- set converter function if converters[charset] then exportutil.CharsetToUTF8 = converters[charset] else error("exportutil.CharsetFromSet: unrecognized charset "..charset) end end ----------------------------------------------------------------------- -- exportutil.CharsetToUTF8 -- -- Converts an 8-bit character value to a UTF-8 value. ----------------------------------------------------------------------- exportutil.CharsetFromSet("default") -- initializes the function -- function exportutil.CharsetToUTF8(c) -- this is the default translator, a pass-through (or iso-8859-1) -- end of script