-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE. -- XML LPeg lexer. local l = require('lexer') local token, word_match = l.token, l.word_match local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V local M = {_NAME = 'xml'} -- Whitespace. local ws = token(l.WHITESPACE, l.space^1) -- Comments and CDATA. local comment = token(l.COMMENT, '')^0 * P('-->')^-1) local cdata = token('cdata', '')^0 * P(']]>')^-1) -- Strings. local sq_str = l.delimited_range("'", false, true) local dq_str = l.delimited_range('"', false, true) local string = l.last_char_includes('=') * token(l.STRING, sq_str + dq_str) local in_tag = P(function(input, index) local before = input:sub(1, index - 1) local s, e = before:find('<[^>]-$'), before:find('>[^<]-$') if s and e then return s > e and index or nil end if s then return index end return input:find('^[^<]->', index) and index or nil end) -- Numbers. local number = l.last_char_includes('=') * token(l.NUMBER, l.digit^1 * P('%')^-1) * in_tag local alpha = R('az', 'AZ', '\127\255') local word_char = l.alnum + S('_-:.??') local identifier = (l.alpha + S('_-:.??')) * word_char^0 local namespace = token(l.OPERATOR, ':') * token('namespace', identifier) -- Elements. local element = l.last_char_includes('') -- Equals. local equals = token(l.OPERATOR, '=') * in_tag -- Entities. local entity = token('entity', '&' * word_match{ 'lt', 'gt', 'amp', 'apos', 'quot' } * ';') -- Doctypes and other markup tags. local doctype = token('doctype', P(''))^0 * token('doctype', '>') -- Processing instructions. local proc_insn = token('proc_insn', P(''))^0 * P('?>')^-1) M._rules = { {'whitespace', ws}, {'comment', comment}, {'cdata', cdata}, {'doctype', doctype}, {'proc_insn', proc_insn}, {'tag', tag}, {'element', element}, {'attribute', attribute}, {'equals', equals}, {'string', string}, {'number', number}, {'entity', entity}, } M._tokenstyles = { tag = l.STYLE_KEYWORD, element = l.STYLE_KEYWORD, namespace = l.STYLE_CLASS, attribute = l.STYLE_TYPE, cdata = l.STYLE_COMMENT, entity = l.STYLE_OPERATOR, doctype = l.STYLE_COMMENT, proc_insn = l.STYLE_COMMENT, --markup = l.STYLE_COMMENT } M._foldsymbols = { _patterns = {'', '', ''}, tag = {['<'] = 1, ['/>'] = -1, [''] = -1}, cdata = {[''] = -1} } return M