#!Lua-5.0.exe -- Convert accented characters from a given string and return HTML entities in the output string. -- by Philippe Lhoste http://Phi.Lho.free.fr -- v. 2.0 -- 2003/06/10 -- Better algorithm, using regular expression -- v. 1.0 -- 2003/04/19 -- Naive implementation local entities = { -- ['&'] = "&", ['<'] = "<", ['>'] = ">", -- French entities (the most common ones) ['à'] = "à", ['â'] = "â", ['é'] = "é", ['è'] = "è", ['ê'] = "ê", ['ë'] = "ë", ['î'] = "î", ['ï'] = "ï", ['ô'] = "ô", ['ö'] = "ö", ['ù'] = "ù", ['û'] = "û", ['ÿ'] = "ÿ", ['À'] = "À", ['Â'] = "Â", ['É'] = "É", ['È'] = "È", ['Ê'] = "Ê", ['Ë'] = "Ë", ['Î'] = "Î", ['Ï'] = "Ï", ['Ô'] = "Ô", ['Ö'] = "Ö", ['Ù'] = "Ù", ['Û'] = "Û", ['ç'] = "ç", ['Ç'] = "Ç", ['Ÿ'] = "Ÿ", ['«'] = "«", ['»'] = "»", ['©'] = "©", ['®'] = "®", ['æ'] = "æ", ['Æ'] = "Æ", ['Œ'] = "Œ", -- Not understood by all browsers ['œ'] = "œ", -- Not understood by all browsers } function EncodeEntities1(toEncode) if toEncode == nil or type(toEncode) ~= "string" then return '' end local EncodeHighAscii = function (char) local code = string.byte(char) if code > 127 then return string.format("&#%d;", code) else return char end end local encodedString = toEncode -- First encode '&' char, to avoid re-encoding already encoded chars encodedString = string.gsub(encodedString, '&', "&") -- Encode known entities for char, entity in entities do encodedString = string.gsub(encodedString, char, entity) end -- Encode unknown high Ascii characters to numerical entities encodedString = string.gsub(encodedString, '(.)', EncodeHighAscii) return encodedString end function EncodeEntities2(toEncode) if toEncode == nil or type(toEncode) ~= "string" then return '' end local EncodeToEntities = function (char) return entities[char] or char end local EncodeHighAscii = function (char) local code = string.byte(char) if code > 127 then return string.format("&#%d;", code) else return char end end local encodedString = toEncode local encodingString = "([" entities['&'] = "&" -- Add all characters to encode to the encodingString for char, entity in entities do encodingString = encodingString .. char end encodingString = encodingString .. "])" -- Encode known characters to entities encodedString = string.gsub(encodedString, encodingString, EncodeToEntities) -- Encode unknown high Ascii characters to numerical entities encodedString = string.gsub(encodedString, '(.)', EncodeHighAscii) return encodedString end function EncodeEntities3(toEncode) if toEncode == nil or type(toEncode) ~= "string" then return '' end local EncodeToEntities = function (char) local entity = entities[char] if entity == nil then local code = string.byte(char) if code > 127 then entity = string.format("&#%d;", code) end end return entity or char end entities['&'] = "&" -- I will replace '(.)' with '([^%c%s%w%p])' encodedString = string.gsub(toEncode, '(.)', EncodeToEntities) return encodedString end