--string module for codedit (Cosmin Apreutesei, public domain). --deals specifically with tabs, spaces, lines and words. local glue = require'glue' local utf8 = require'utf8' local str = glue.update({}, utf8) --tabs and whitespace ---------------------------------------------------------------------------------------------------- --check for an ascii char at a byte index without string creation function str.isascii(s, i, c) assert(i >= 1 and i <= #s, 'out of range') return s:byte(i) == c:byte(1) end --check if the char at byte index i is a tab function str.istab(s, i) return str.isascii(s, i, '\t') end --check if the char at byte index i is a space char function str.isspacechar(s, i) return str.isascii(s, i, ' ') end --check if the char at byte index i is a whitespace char function str.isspace(s, i) return str.isspacechar(s, i) or str.istab(s, i) end --char index of the next non-space char after some char (nil if none). --if after_ci is ommited, the first non-space char in the string is returned. function str.next_nonspace(s, after_ci) after_ci = after_ci or 0 local ci = 0 for i in str.byte_indices(s) do ci = ci + 1 if ci > after_ci and not str.isspace(s, i) then return ci end end end --char index of the next double-space char after some char (nil if none). --if after_ci is ommited, the first double-space char in the string is returned. function str.next_double_space(s, after_ci) after_ci = after_ci or 0 local ci = 0 local was_space for i in str.byte_indices(s) do ci = ci + 1 if ci > after_ci and str.isspace(s, i) then if was_space then return ci else was_space = true end else was_space = false end end end --char index of the last non-space char before some char (nil if none). --if before_ci is ommited, the last non-space char in the string is returned. function str.prev_nonspace(s, before_ci) before_ci = before_ci or 1/0 local ci = 0 local ns_ci for i in str.byte_indices(s) do ci = ci + 1 if ci >= before_ci then return ns_ci end if not str.isspace(s, i) then ns_ci = ci end end return ns_ci end --left trim of space and tab characters function str.ltrim(s) local ns_ci = str.next_nonspace(s) return ns_ci and str.sub(s, ns_ci) or '' end --right trim of space and tab characters function str.rtrim(s) local ns_ci = str.prev_nonspace(s) return ns_ci and str.sub(s, 1, ns_ci) or '' end --number of tabs and of spaces in indentation --TODO: use this function str.indent_counts(s) local tabs, spaces = 0, 0 for i in str.byte_indices(s) do if str.istab(s, i) then tabs = tabs + 1 elseif str.isspace(s, i) then spaces = spaces + 1 else break end end return tabs, spaces end --lines ------------------------------------------------------------------------------------------------------------------ --return the index where the next line starts (unimportant) and the indices of the line starting at a given index. --the last line is the substring after the last line terminator to the end of the string (see tests). function str.next_line_indices(s, i) i = i or 1 if i == #s + 1 then --string ended with newline, or string is empty: iterate one more empty line return 1/0, i, i-1 elseif i > #s then return end local j, nexti = s:match('^[^\r\n]*()\r?\n?()', i) if nexti > #s and j == nexti then --string ends without a newline, mark that by setting nexti to inf nexti = 1/0 end return nexti, i, j-1 end --iterate lines, returning the index where the next line starts (unimportant) and the indices of each line function str.line_indices(s) return str.next_line_indices, s end --return the index where the next line starts (unimportant) and the contents of the line starting at a given index. --the last line is the substring after the last line terminator to the end of the string (see tests). function str.next_line(s, i) local _, i, j = str.next_line_indices(s, i) if not _ then return end return _, s:sub(i, j) end --iterate lines, returning the index where the next line starts (unimportant) and the contents of each line function str.lines(s) return str.next_line, s end function str.line_count(s) local n = 0 for _ in str.line_indices(s) do n = n + 1 end return n end --words ------------------------------------------------------------------------------------------------------------------ function str.isword(s, i, word_chars) return s:find(word_chars, i) ~= nil end --from a char index, search forwards for: --1) 1..n spaces followed by a non-space char --2) 1..n word chars or non-word chars follwed by case 1 --3) 1..n word chars followed by a non-word char --4) 1..n non-word chars followed by a word char --if the next break should be on a different line, return nil. function str.next_word_break(s, first_ci, word_chars) if first_ci < 1 then return 1 end local firsti = str.byte_index(s, first_ci) if not firsti then return end local expect = str.isspace(s, firsti) and 'space' or str.isword(s, firsti, word_chars) and 'word' or 'nonword' local ci = first_ci for i in str.byte_indices(s, firsti) do ci = ci + 1 if expect == 'space' then --case 1 if not str.isspace(s, i) then --case 1 exit return ci end elseif str.isspace(s, i) then --case 2 -> case 1 expect = 'space' elseif expect ~= (str.isword(s, i, word_chars) and 'word' or 'nonword') then --case 3 and 4 exit return ci end end return ci + 1 end --from a char index, search backwards for: --1) 1..n spaces followed by 1..n words or non-words --2) 1 words or non-words followed by case 1 --3) 2..n words or non-words follwed by a char of a differnt class --in other words: look back until the char type changes from the type at firsti or of the prev. char, and skip spaces. --if the prev. break should be on a different line, return nil. function str.prev_word_break(s, first_ci, word_chars) if first_ci <= 1 then return end local firsti = str.byte_index(s, first_ci) local expect = not firsti and 'prev' or (str.isspace(s, firsti) and 'space' or str.isword(s, firsti, word_chars) and 'word' or 'nonword') local lasti = firsti local ci = first_ci for i in str.byte_indices_reverse(s, firsti) do ci = ci - 1 if expect == 'space' then if not str.isspace(s, i) then expect = str.isword(s, i, word_chars) and 'word' or 'nonword' end elseif expect ~= (str.isspace(s, i) and 'space' or str.isword(s, i, word_chars) and 'word' or 'nonword') then if lasti == firsti then expect = str.isspace(s, i) and 'space' or str.isword(s, i, word_chars) and 'word' or 'nonword' else return ci + 1 end end lasti = i end return 1 end --tests ------------------------------------------------------------------------------------------------------------------ if not ... then assert(str.next_nonspace('') == nil) assert(str.next_nonspace(' ') == nil) assert(str.next_nonspace(' x') == 2) assert(str.next_nonspace(' x ') == 2) assert(str.next_nonspace('x ') == 1) assert(str.prev_nonspace('') == nil) assert(str.prev_nonspace(' ') == nil) assert(str.prev_nonspace('x') == 1) assert(str.prev_nonspace('x ') == 1) assert(str.prev_nonspace(' x ') == 2) assert(str.rtrim('abc \t ') == 'abc') assert(str.rtrim(' \t abc x \t ') == ' \t abc x') assert(str.rtrim('abc') == 'abc') assert(str.rtrim(' ') == '') assert(str.rtrim('') == '') local function assert_lines(s, t) local i = 0 local dt = {} for _,s in str.lines(s) do i = i + 1 assert(t[i] == s, i .. ': "' .. s .. '" ~= "' .. tostring(t[i]) .. '"') dt[i] = s end assert(i == #t, i .. ' ~= ' .. #t .. ': ' .. table.concat(dt, ', ')) end assert_lines('', {''}) assert_lines(' ', {' '}) assert_lines('x\ny', {'x', 'y'}) assert_lines('x\ny\n', {'x', 'y', ''}) assert_lines('x\n\ny', {'x', '', 'y'}) assert_lines('\n', {'', ''}) assert_lines('\n\r\n', {'','',''}) assert_lines('\r\n\n', {'','',''}) assert_lines('\n\r', {'','',''}) assert_lines('\n\r\n\r', {'','','',''}) assert_lines('\n\n\r', {'','','',''}) --TODO: next_word_break, prev_word_break end if not ... then require'codedit_demo' end return str