--[[ * Diff Match and Patch * * Copyright 2006 Google Inc. * http://code.google.com/p/google-diff-match-patch/ * * Based on the Javascript implementation by Neil Fraser * Ported to Lua by Duncan Cross * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. --]] require 'bit' -- local band, bor, lshift = bit.band, bit.bor, bit.lshift local type, setmetatable, ipairs, select, pairs = type, setmetatable, ipairs, select, pairs local rawget, unpack, tonumber, error = rawget, unpack, tonumber, error local strsub, strbyte, strchar, gmatch, gsub = string.sub, string.byte, string.char, string.gmatch, string.gsub local strmatch, strfind, strformat, strreverse = string.match, string.find, string.format, string.reverse local tinsert, tremove, tconcat, tsort = table.insert, table.remove, table.concat, table.sort local max, min, floor, ceil, abs = math.max, math.min, math.floor, math.ceil, math.abs local clock = os.clock module 'diff_match_patch' -- Utility functions local percentEncode_pattern = '[^A-Za-z0-9%-=;\',./~!@#$%&*%(%)_%+ %?]' local function percentEncode_replace(v) return strformat('%%%02X', strbyte(v)) end local function tsplice(t,idx,deletions, ...) local insertions = select('#', ...) for i = 1, deletions do tremove(t,idx) end for i = insertions, 1, -1 do -- do not remove parentheses around select tinsert(t,idx,(select(i,...))) end end local function tsub(t,i,j) if (i < 0) then local sz = #t i = sz + 1 + i if j and (j < 0) then j = sz + 1 + j end elseif j and (j < 0) then j = #t + 1 + j end return {unpack(t,i,j)} end local function strelement(str,i) return strsub(str,i,i) end local function telement(t,i) if (i < 0) then i = #t + 1 + i end return t[i] end local function tprepend(t,v) tinsert(t,1,v) return t end local function tappend(t,v) t[#t+1] = v return t end local function strappend(str,v) return str .. v end local function strprepend(str,v) return v .. str end local function indexOf(a,b,start) if (#b == 0) then return nil end -- do not remove parentheses return (strfind(a,b,start,true)) end local function lastIndexOf(a,b, start) for i = (start or #a),1,-1 do if strsub(a,i,i+#b-1) == b then return i end end return nil end local function getMaxBits() local maxbits = 0 local oldi, newi = 1, 2 while (oldi ~= newi) do maxbits = maxbits + 1 oldi = newi newi = lshift(newi, 1) end return maxbits end local htmlEncode_pattern = '[&<>\n]' local htmlEncode_replace = { ['&'] = '&', ['<'] = '<', ['>'] = '>', ['\n'] = '¶
' } -- Public API Functions -- (Exported at the end of the script) local diff_main, diff_cleanupSemantic, diff_cleanupEfficiency, diff_levenshtein, diff_prettyHtml local match_main local patch_make, patch_toText, patch_fromText, patch_apply --[[ * The data structure representing a diff is an array of tuples: * {{DIFF_DELETE, 'Hello'}, {DIFF_INSERT, 'Goodbye'}, {DIFF_EQUAL, ' world.'}} * which means: delete 'Hello', add 'Goodbye' and keep ' world.' --]] local DIFF_DELETE = -1 local DIFF_INSERT = 1 local DIFF_EQUAL = 0 -- Number of seconds to map a diff before giving up (0 for infinity). local Diff_Timeout = 1.0 -- Cost of an empty edit operation in terms of edit characters. local Diff_EditCost = 4 -- The size beyond which the double-ended diff activates. -- Double-ending is twice as fast, but less accurate. local Diff_DualThreshold = 32 -- At what point is no match declared (0.0 = perfection, 1.0 = very loose). local Match_Threshold = 0.5 -- How far to search for a match (0 = exact location, 1000+ = broad match). -- A match this many characters away from the expected location will add -- 1.0 to the score (0.0 is a perfect match). local Match_Distance = 1000 -- When deleting a large block of text (over ~64 characters), how close does -- the contents have to match the expected contents. (0.0 = perfection, -- 1.0 = very loose). Note that Match_Threshold controls how closely the -- end points of a delete need to match. local Patch_DeleteThreshold = 0.5 -- Chunk size for context length. local Patch_Margin = 4 -- How many bits in a number? local Match_MaxBits = getMaxBits() function settings(new) if new then Diff_Timeout = new.Diff_Timeout or Diff_Timeout Diff_EditCost = new.Diff_EditCost or Diff_EditCost Diff_DualThreshold = new.Diff_DualThreshold or Diff_DualThreshold Match_Threshold = new.Match_Threshold or Match_Threshold Match_Distance = new.Match_Distance or Match_Distance Patch_DeleteThreshold = new.Patch_DeleteThreshold or Patch_DeleteThreshold Patch_Margin = new.Patch_Margin or Patch_Margin Match_MaxBits = new.Match_MaxBits or Match_MaxBits else return { Diff_Timeout = Diff_Timeout; Diff_EditCost = Diff_EditCost; Diff_DualThreshold = Diff_DualThreshold; Match_Threshold = Match_Threshold; Match_Distance = Match_Distance; Patch_DeleteThreshold = Patch_DeleteThreshold; Patch_Margin = Patch_Margin; Match_MaxBits = Match_MaxBits; } end end -- --------------------------------------------------------------------------- -- DIFF API -- --------------------------------------------------------------------------- -- The private diff functions local _diff_compute, _diff_map, _diff_path1, _diff_path2, _diff_halfMatchI, _diff_halfMatch, _diff_cleanupSemanticScore, _diff_cleanupSemanticLossless, _diff_cleanupMerge, _diff_commonPrefix, _diff_commonSuffix, _diff_xIndex, _diff_text1, _diff_text2, _diff_toDelta, _diff_fromDelta, _diff_toLines, _diff_fromLines --[[ * Find the differences between two texts. Simplifies the problem by stripping * any common prefix or suffix off the texts before diffing. * @param {string} text1 Old string to be diffed. * @param {string} text2 New string to be diffed. * @param {boolean} opt_checklines Optional speedup flag. If present and false, * then don't run a line-level diff first to identify the changed areas. * Defaults to true, which does a faster, slightly less optimal diff * @return {Array.>} Array of diff tuples. --]] function diff_main(text1, text2, opt_checklines) -- Check for equality (speedup) if (text1 == text2) then return {{DIFF_EQUAL, text1}} end if (opt_checklines == nil) then opt_checklines = true end local checklines = opt_checklines -- Trim off common prefix (speedup) local commonlength = _diff_commonPrefix(text1, text2) local commonprefix if (commonlength > 0) then commonprefix = strsub(text1, 1, commonlength) text1 = strsub(text1, commonlength + 1) text2 = strsub(text2, commonlength + 1) end -- Trim off common suffix (speedup) commonlength = _diff_commonSuffix(text1, text2) local commonsuffix if (commonlength > 0) then commonsuffix = strsub(text1, -commonlength) text1 = strsub(text1, 1, -commonlength-1) text2 = strsub(text2, 1, -commonlength-1) end -- Compute the diff on the middle block local diffs = _diff_compute(text1, text2, checklines) -- Restore the prefix and suffix if commonprefix then tinsert(diffs, 1, {DIFF_EQUAL, commonprefix}) end if commonsuffix then diffs[#diffs+1] = {DIFF_EQUAL, commonsuffix} end _diff_cleanupMerge(diffs) return diffs end --[[ * Reduce the number of edits by eliminating semantically trivial equalities. * @param {Array.>} diffs Array of diff tuples. --]] function diff_cleanupSemantic(diffs) local changes = false local equalities = {} -- Stack of indices where equalities are found. local equalitiesLength = 0 -- Keeping our own length var is faster. local lastequality = nil -- Always equal to equalities[equalitiesLength][2] local pointer = 1 -- Index of current position. -- Number of characters that changed prior to the equality. local length_changes1 = 0 -- Number of characters that changed after the equality. local length_changes2 = 0 while diffs[pointer] do if (diffs[pointer][1] == DIFF_EQUAL) then -- equality found equalitiesLength = equalitiesLength + 1 equalities[equalitiesLength] = pointer length_changes1 = length_changes2 length_changes2 = 0 lastequality = diffs[pointer][2] else -- an insertion or deletion length_changes2 = length_changes2 + #(diffs[pointer][2]) if lastequality and (#lastequality > 0) and (#lastequality <= length_changes1) and (#lastequality <= length_changes2) then -- Duplicate record tinsert(diffs, equalities[equalitiesLength], {DIFF_DELETE, lastequality}) -- Change second copy to insert. diffs[equalities[equalitiesLength] + 1][1] = DIFF_INSERT -- Throw away the equality we just deleted. equalitiesLength = equalitiesLength - 1 -- Throw away the previous equality (it needs to be reevaluated). equalitiesLength = equalitiesLength - 1 pointer = (equalitiesLength > 0) and equalities[equalitiesLength] or 0 length_changes1, length_changes2 = 0, 0 -- Reset the counters. lastequality = nil changes = true end end pointer = pointer + 1 end if changes then _diff_cleanupMerge(diffs) end _diff_cleanupSemanticLossless(diffs) end --[[ * Reduce the number of edits by eliminating operationally trivial equalities. * @param {Array.>} diffs Array of diff tuples. --]] function diff_cleanupEfficiency(diffs) local changes = false -- Stack of indices where equalities are found. local equalities = {} -- Keeping our own length var is faster. local equalitiesLength = 0 -- Always equal to diffs[equalities[equalitiesLength]][2] local lastequality = '' -- Index of current position. local pointer = 1 -- The following four are really booleans but are stored as numbers because -- they are used at one point like this: -- -- (pre_ins + pre_del + post_ins + post_del) == 3 -- -- ...i.e. checking that 3 of them are true and 1 of them is false. -- Is there an insertion operation before the last equality. local pre_ins = 0 -- Is there a deletion operation before the last equality. local pre_del = 0 -- Is there an insertion operation after the last equality. local post_ins = 0 -- Is there a deletion operation after the last equality. local post_del = 0 while diffs[pointer] do if (diffs[pointer][1] == DIFF_EQUAL) then -- equality found local diffText = diffs[pointer][2] if (#diffText < Diff_EditCost) and (post_ins == 1 or post_del == 1) then -- Candidate found. equalitiesLength = equalitiesLength + 1 equalities[equalitiesLength] = pointer pre_ins, pre_del = post_ins, post_del lastequality = diffText else -- Not a candidate, and can never become one. equalitiesLength = 0 lastequality = '' end post_ins, post_del = 0, 0 else -- an insertion or deletion if (diffs[pointer][1] == DIFF_DELETE) then post_del = 1 else post_ins = 1 end --[[ * Five types to be split: * ABXYCD * AXCD * ABXC * AXCD * ABXC --]] if (#lastequality > 0) and ( (pre_ins+pre_del+post_ins+post_del == 4) or ( (#lastequality < Diff_EditCost/2) and (pre_ins+pre_del+post_ins+post_del == 3) )) then -- Duplicate record tinsert(diffs, equalities[equalitiesLength], {DIFF_DELETE,lastequality}) -- Change second copy to insert. diffs[equalities[equalitiesLength]+1][1] = DIFF_INSERT -- Throw away the equality we just deleted; equalitiesLength = equalitiesLength - 1 lastequality = '' if (pre_ins == 1) and (pre_del == 1) then -- No changes made which could affect previous entry, keep going. post_ins, post_del = 1, 1 equalitiesLength = 0 else -- Throw away the previous equality; equalitiesLength = equalitiesLength - 1 pointer = (equalitiesLength > 0) and equalities[equalitiesLength] or 0 post_ins, post_del = 0, 0 end changes = true end end pointer = pointer + 1 end if changes then _diff_cleanupMerge(diffs) end end --[[ * Compute the Levenshtein distance; the number of inserted, deleted or * substituted characters. * @param {Array.>} diffs Array of diff tuples. * @return {number} Number of changes. --]] function diff_levenshtein(diffs) local levenshtein = 0 local insertions, deletions = 0, 0 for x, diff in ipairs(diffs) do local op, data = diff[1], diff[2] if (op == DIFF_INSERT) then insertions = insertions + #data elseif (op == DIFF_DELETE) then deletions = deletions + #data else--if (op == DIFF_EQUAL) then -- A deletion and an insertion is one substitution. levenshtein = levenshtein + max(insertions, deletions) insertions = 0 deletions = 0 end end levenshtein = levenshtein + max(insertions, deletions) return levenshtein end --[[ * Convert a diff array into a pretty HTML report. * @param {Array.>} diffs Array of diff tuples. * @return {string} HTML representation. --]] function diff_prettyHtml(diffs) local html = {} local i = 0 for x,diff in ipairs(diffs) do local op = diff[1] -- Operation (insert, delete, equal) local data = diff[2] -- Text of change. local text = gsub(data, htmlEncode_pattern, htmlEncode_replace) if (op == DIFF_INSERT) then html[x] = '' .. text .. '' elseif (op == DIFF_DELETE) then html[x] = '~~' .. text .. '~~' else--if (op == DIFF_EQUAL) then html[x] = '' .. text .. '' end if (op ~= DIFF_DELETE) then i = i + #data end end return tconcat(html) end -- --------------------------------------------------------------------------- -- UNOFFICIAL/PRIVATE DIFF FUNCTIONS -- --------------------------------------------------------------------------- --[[ * Find the differences between two texts. Assumes that the texts do not * have any common prefix or suffix. * @param {string} text1 Old string to be diffed. * @param {string} text2 New string to be diffed. * @param {boolean} checklines Speedup flag. If false, then don't run a * line-level diff first to identify the changed areas. * If true, then run a faster, slightly less optimal diff * @return {Array.>} Array of diff tuples. * @private --]] function _diff_compute(text1, text2, checklines) if (#text1 == 0) then -- Just add some text (speedup) return {{DIFF_INSERT, text2}} end if (#text2 == 0) then -- Just delete some text (speedup) return {{DIFF_DELETE, text1}} end local diffs local longtext = (#text1 > #text2) and text1 or text2 local shorttext = (#text1 > #text2) and text2 or text1 local i = indexOf(longtext, shorttext) if (i ~= nil) then -- Shorter text is inside the longer text (speedup) diffs = { {DIFF_INSERT, strsub(longtext,1,i-1)}, {DIFF_EQUAL, shorttext}, {DIFF_INSERT, strsub(longtext,i + #shorttext)} } -- Swap insertions for deletions if diff is reversed. if (#text1 > #text2) then diffs[1][1], diffs[3][1] = DIFF_DELETE, DIFF_DELETE end return diffs end longtext, shorttext = nil, nil -- Garbage collect -- Check to see if the problem can be split in two. do local text1_a, text1_b, text2_a, text2_b, mid_common = _diff_halfMatch(text1, text2) if text1_a then -- A half-match was found, sort out the return data. -- Send both pairs off for separate processing. local diffs_a = diff_main(text1_a, text2_a, checklines) local diffs_b = diff_main(text1_b, text2_b, checklines) -- Merge the results. local diffs_a_len = #diffs_a diffs = diffs_a diffs[diffs_a_len+1] = {DIFF_EQUAL, mid_common} for i,b_diff in ipairs(diffs_b) do diffs[diffs_a_len+1+i] = b_diff end return diffs end end -- Perform a real diff. if checklines and (#text1 < 100 or #text2 < 100) then -- Too trivial for the overhead. checklines = false end local linearray if checklines then -- Scan the text on a line-by-line basis first. text1, text2, linearray = _diff_toLines(text1, text2) end diffs = _diff_map(text1, text2) if (diffs == nil) then -- No acceptable result. diffs = {{DIFF_DELETE, text1}, {DIFF_INSERT, text2}} end if checklines then -- Convert the diff back to original text. _diff_fromLines(diffs, linearray) -- Eliminate freak matches (e.g. blank lines) diff_cleanupSemantic(diffs) -- Rediff any replacement blocks, this time character-by-character. -- Add a dummy entry at the end. diffs[#diffs+1] = {DIFF_EQUAL, ''} local pointer = 1 local count_delete = 0 local count_insert = 0 local text_delete = '' local text_insert = '' while (pointer <= #diffs) do local diff_type = diffs[pointer][1] if (diff_type == DIFF_INSERT) then count_insert = count_insert + 1 text_insert = text_insert .. diffs[pointer][2] elseif (diff_type == DIFF_DELETE) then count_delete = count_delete + 1 text_delete = text_delete .. diffs[pointer][2] else--if (diff_type == DIFF_EQUAL) then -- Upon reaching an equality, check for prior redundancies. if (count_delete >= 1) and (count_insert >= 1) then -- Delete the offending records and add the merged ones. local a = diff_main(text_delete, text_insert, false) pointer = pointer - count_delete - count_insert for i = 1, count_delete + count_insert do tremove(diffs, pointer) end for j = #a, 1, -1 do tinsert(diffs, pointer, a[j]) end pointer = pointer + #a end count_insert = 0 count_delete = 0 text_delete = '' text_insert = '' end pointer = pointer + 1 end diffs[#diffs] = nil -- Remove the dummy entry at the end. end return diffs end function _diff_toLines(text1, text2) local lineArray = {} -- e.g. lineArray[4] == 'Hello\n' local lineHash = {} -- e.g. lineHash['Hello\n'] == 4 --[[ * Split a text into an array of strings. Reduce the texts to a string of * hashes where each Unicode character represents one line. * Modifies linearray and linehash through being a closure. * @param {string} text String to encode * @return {string} Encoded string * @private --]] local _diff_toLinesMunge = function(text) local lines = {} -- Walk the text, pulling out a substring for each line. -- text.split('\n') would would temporarily double our memory footprint. -- Modifying text would create many large strings to garbage collect. local lineStart = 1 local lineEnd = 0 -- Keeping our own length variable is faster than looking it up. local lineArrayLength = #lineArray while (lineEnd < #text) do lineEnd = indexOf(text, '\n', lineStart) or #text local line = strsub(text, lineStart, lineEnd)-- +1) lineStart = lineEnd + 1 local hash = lineHash[line] if hash then lines[#lines+1] = hash else lineArrayLength = lineArrayLength + 1 lines[#lines+1] = lineArrayLength lineHash[line] = lineArrayLength lineArray[lineArrayLength] = line end end return lines end local lines1 = _diff_toLinesMunge(text1) local lines2 = _diff_toLinesMunge(text2) return lines1, lines2, lineArray end function _diff_fromLines(diffs, lineArray) for x,diff in ipairs(diffs) do local lines = diff[2] local text = {} for idx,line in ipairs(lines) do text[idx] = lineArray[line] end diff[2] = tconcat(text) end end --[[ * Explore the intersection points between the two texts. * @param {string} text1 Old string to be diffed. * @param {string} text2 New string to be diffed. * @return {Array.>?} Array of diff tuples or nil if no * diff available. * @private --]] function _diff_map(text1, text2) -- Don't run for too long. local ms_end = clock() + Diff_Timeout -- Cache the text lengths to prevent multiple calls. local text1_length = #text1 local text2_length = #text2 local _sub, _element if type(text1) == 'table' then _sub, _element = tsub, telement else _sub, _element = strsub, strelement end local max_d = text1_length + text2_length - 1 local doubleEnd = (Diff_DualThreshold*2 < max_d) local v_map1 = {} local v_map2 = {} local v1 = {} local v2 = {} v1[1] = 1 v2[1] = 1 local x, y local footstep -- Used to track overlapping paths. local footsteps = {} local done = false -- If the total number of characters is odd, then -- the front path will collide with the reverse path. local front = (((text1_length+text2_length) % 2) == 1) for d = 0, max_d-1 do -- Bail out if timeout reached. if (Diff_Timeout > 0) and (clock() > ms_end) then return nil end -- Walk the front path one step. v_map1[d+1] = {} for k = -d, d, 2 do if (k == -d) or ((k ~= d) and (v1[k-1] < v1[k+1])) then x = v1[k+1] else x = v1[k-1] + 1 end y = x - k if doubleEnd then footstep = x..','..y if front and footsteps[footstep] then done = true end if not front then footsteps[footstep] = d end end while (not done) and (x <= text1_length) and (y <= text2_length) and (_element(text1,x) == _element(text2,y)) do x = x + 1 y = y + 1 if doubleEnd then footstep = x..','..y if front and footsteps[footstep] then done = true end if not front then footsteps[footstep] = d end end end v1[k] = x v_map1[d+1][x..','..y] = true if (x == text1_length+1) and (y == text2_length+1) then -- Reached the end in single-path mode. return _diff_path1(v_map1, text1, text2) elseif done then -- Front path ran over reverse path. for i = footsteps[footstep] + 2, #v_map2 do v_map2[i] = nil end local a = _diff_path1(v_map1, _sub(text1,1,x-1), _sub(text2,1,y-1) ) local a_len = #a local a2 = _diff_path2(v_map2, _sub(text1,x), _sub(text2,y) ) for i,v in ipairs(a2) do a[a_len+i] = v end return a end end if doubleEnd then -- Walk the reverse path one step. v_map2[d+1] = {} for k = -d, d, 2 do if (k == -d) or ((k ~= d) and (v2[k-1] < v2[k+1])) then x = v2[k+1] else x = v2[k-1] + 1 end y = x - k footstep = (text1_length + 2 - x)..','..(text2_length + 2 - y) if (not front) and footsteps[footstep] then done = true end if front then footsteps[footstep] = d end while (not done) and (x <= text1_length) and (y <= text2_length) and (_element(text1,-x) == _element(text2,-y)) do x, y = x+1, y+1 footstep = (text1_length + 2 - x)..','..(text2_length + 2 - y) if (not front) and footsteps[footstep] then done = true end if front then footsteps[footstep] = d end end v2[k] = x v_map2[d+1][x..','..y] = true if done then -- Reverse path ran over front path. for i = footsteps[footstep]+2, #v_map1 do v_map1[i] = nil end local a = _diff_path1(v_map1, _sub(text1, 1, -x), _sub(text2, 1, -y)) -- LUANOTE: #text1-x+2 instead of -x+1 because if x is -1, -- you get the whole string instead of the empty string local appending = _diff_path2(v_map2, _sub(text1, #text1-x+2), _sub(text2, #text2-y+2)) local a_len = #a for i,v in ipairs(appending) do a[a_len+i] = v end return a end end end end -- Number of diffs equals number of characters, no commonality at all. return nil end --[[ * Work from the middle back to the start to determine the path. * @param {Array.