--[[
* Diff Match and Patch
*
* Copyright 2006 Google Inc.
* http://code.google.com/p/google-diff-match-patch/
*
* Based on the Javascript implementation by Neil Fraser
* Ported to Lua by Duncan Cross
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
--]]
require 'bit' --
local band, bor, lshift
= bit.band, bit.bor, bit.lshift
local type, setmetatable, ipairs, select, pairs
= type, setmetatable, ipairs, select, pairs
local rawget, unpack, tonumber, error
= rawget, unpack, tonumber, error
local strsub, strbyte, strchar, gmatch, gsub
= string.sub, string.byte, string.char, string.gmatch, string.gsub
local strmatch, strfind, strformat, strreverse
= string.match, string.find, string.format, string.reverse
local tinsert, tremove, tconcat, tsort
= table.insert, table.remove, table.concat, table.sort
local max, min, floor, ceil, abs
= math.max, math.min, math.floor, math.ceil, math.abs
local clock = os.clock
module 'diff_match_patch'
-- Utility functions
local percentEncode_pattern = '[^A-Za-z0-9%-=;\',./~!@#$%&*%(%)_%+ %?]'
local function percentEncode_replace(v)
return strformat('%%%02X', strbyte(v))
end
local function tsplice(t,idx,deletions, ...)
local insertions = select('#', ...)
for i = 1, deletions do
tremove(t,idx)
end
for i = insertions, 1, -1 do
-- do not remove parentheses around select
tinsert(t,idx,(select(i,...)))
end
end
local function tsub(t,i,j)
if (i < 0) then
local sz = #t
i = sz + 1 + i
if j and (j < 0) then
j = sz + 1 + j
end
elseif j and (j < 0) then
j = #t + 1 + j
end
return {unpack(t,i,j)}
end
local function strelement(str,i)
return strsub(str,i,i)
end
local function telement(t,i)
if (i < 0) then
i = #t + 1 + i
end
return t[i]
end
local function tprepend(t,v)
tinsert(t,1,v)
return t
end
local function tappend(t,v)
t[#t+1] = v
return t
end
local function strappend(str,v)
return str .. v
end
local function strprepend(str,v)
return v .. str
end
local function indexOf(a,b,start)
if (#b == 0) then
return nil
end
-- do not remove parentheses
return (strfind(a,b,start,true))
end
local function lastIndexOf(a,b, start)
for i = (start or #a),1,-1 do
if strsub(a,i,i+#b-1) == b then
return i
end
end
return nil
end
local function getMaxBits()
local maxbits = 0
local oldi, newi = 1, 2
while (oldi ~= newi) do
maxbits = maxbits + 1
oldi = newi
newi = lshift(newi, 1)
end
return maxbits
end
local htmlEncode_pattern = '[&<>\n]'
local htmlEncode_replace = {
['&'] = '&', ['<'] = '<', ['>'] = '>', ['\n'] = '¶ '
}
-- Public API Functions
-- (Exported at the end of the script)
local diff_main,
diff_cleanupSemantic,
diff_cleanupEfficiency,
diff_levenshtein,
diff_prettyHtml
local match_main
local patch_make,
patch_toText,
patch_fromText,
patch_apply
--[[
* The data structure representing a diff is an array of tuples:
* {{DIFF_DELETE, 'Hello'}, {DIFF_INSERT, 'Goodbye'}, {DIFF_EQUAL, ' world.'}}
* which means: delete 'Hello', add 'Goodbye' and keep ' world.'
--]]
local DIFF_DELETE = -1
local DIFF_INSERT = 1
local DIFF_EQUAL = 0
-- Number of seconds to map a diff before giving up (0 for infinity).
local Diff_Timeout = 1.0
-- Cost of an empty edit operation in terms of edit characters.
local Diff_EditCost = 4
-- The size beyond which the double-ended diff activates.
-- Double-ending is twice as fast, but less accurate.
local Diff_DualThreshold = 32
-- At what point is no match declared (0.0 = perfection, 1.0 = very loose).
local Match_Threshold = 0.5
-- How far to search for a match (0 = exact location, 1000+ = broad match).
-- A match this many characters away from the expected location will add
-- 1.0 to the score (0.0 is a perfect match).
local Match_Distance = 1000
-- When deleting a large block of text (over ~64 characters), how close does
-- the contents have to match the expected contents. (0.0 = perfection,
-- 1.0 = very loose). Note that Match_Threshold controls how closely the
-- end points of a delete need to match.
local Patch_DeleteThreshold = 0.5
-- Chunk size for context length.
local Patch_Margin = 4
-- How many bits in a number?
local Match_MaxBits = getMaxBits()
function settings(new)
if new then
Diff_Timeout = new.Diff_Timeout or Diff_Timeout
Diff_EditCost = new.Diff_EditCost or Diff_EditCost
Diff_DualThreshold = new.Diff_DualThreshold or Diff_DualThreshold
Match_Threshold = new.Match_Threshold or Match_Threshold
Match_Distance = new.Match_Distance or Match_Distance
Patch_DeleteThreshold = new.Patch_DeleteThreshold or Patch_DeleteThreshold
Patch_Margin = new.Patch_Margin or Patch_Margin
Match_MaxBits = new.Match_MaxBits or Match_MaxBits
else
return {
Diff_Timeout = Diff_Timeout;
Diff_EditCost = Diff_EditCost;
Diff_DualThreshold = Diff_DualThreshold;
Match_Threshold = Match_Threshold;
Match_Distance = Match_Distance;
Patch_DeleteThreshold = Patch_DeleteThreshold;
Patch_Margin = Patch_Margin;
Match_MaxBits = Match_MaxBits;
}
end
end
-- ---------------------------------------------------------------------------
-- DIFF API
-- ---------------------------------------------------------------------------
-- The private diff functions
local _diff_compute,
_diff_map,
_diff_path1,
_diff_path2,
_diff_halfMatchI,
_diff_halfMatch,
_diff_cleanupSemanticScore,
_diff_cleanupSemanticLossless,
_diff_cleanupMerge,
_diff_commonPrefix,
_diff_commonSuffix,
_diff_xIndex,
_diff_text1,
_diff_text2,
_diff_toDelta,
_diff_fromDelta,
_diff_toLines,
_diff_fromLines
--[[
* Find the differences between two texts. Simplifies the problem by stripping
* any common prefix or suffix off the texts before diffing.
* @param {string} text1 Old string to be diffed.
* @param {string} text2 New string to be diffed.
* @param {boolean} opt_checklines Optional speedup flag. If present and false,
* then don't run a line-level diff first to identify the changed areas.
* Defaults to true, which does a faster, slightly less optimal diff
* @return {Array.>} Array of diff tuples.
--]]
function diff_main(text1, text2, opt_checklines)
-- Check for equality (speedup)
if (text1 == text2) then
return {{DIFF_EQUAL, text1}}
end
if (opt_checklines == nil) then
opt_checklines = true
end
local checklines = opt_checklines
-- Trim off common prefix (speedup)
local commonlength = _diff_commonPrefix(text1, text2)
local commonprefix
if (commonlength > 0) then
commonprefix = strsub(text1, 1, commonlength)
text1 = strsub(text1, commonlength + 1)
text2 = strsub(text2, commonlength + 1)
end
-- Trim off common suffix (speedup)
commonlength = _diff_commonSuffix(text1, text2)
local commonsuffix
if (commonlength > 0) then
commonsuffix = strsub(text1, -commonlength)
text1 = strsub(text1, 1, -commonlength-1)
text2 = strsub(text2, 1, -commonlength-1)
end
-- Compute the diff on the middle block
local diffs = _diff_compute(text1, text2, checklines)
-- Restore the prefix and suffix
if commonprefix then
tinsert(diffs, 1, {DIFF_EQUAL, commonprefix})
end
if commonsuffix then
diffs[#diffs+1] = {DIFF_EQUAL, commonsuffix}
end
_diff_cleanupMerge(diffs)
return diffs
end
--[[
* Reduce the number of edits by eliminating semantically trivial equalities.
* @param {Array.>} diffs Array of diff tuples.
--]]
function diff_cleanupSemantic(diffs)
local changes = false
local equalities = {} -- Stack of indices where equalities are found.
local equalitiesLength = 0 -- Keeping our own length var is faster.
local lastequality = nil -- Always equal to equalities[equalitiesLength][2]
local pointer = 1 -- Index of current position.
-- Number of characters that changed prior to the equality.
local length_changes1 = 0
-- Number of characters that changed after the equality.
local length_changes2 = 0
while diffs[pointer] do
if (diffs[pointer][1] == DIFF_EQUAL) then -- equality found
equalitiesLength = equalitiesLength + 1
equalities[equalitiesLength] = pointer
length_changes1 = length_changes2
length_changes2 = 0
lastequality = diffs[pointer][2]
else -- an insertion or deletion
length_changes2 = length_changes2 + #(diffs[pointer][2])
if lastequality and (#lastequality > 0)
and (#lastequality <= length_changes1)
and (#lastequality <= length_changes2) then
-- Duplicate record
tinsert(diffs, equalities[equalitiesLength],
{DIFF_DELETE, lastequality})
-- Change second copy to insert.
diffs[equalities[equalitiesLength] + 1][1] = DIFF_INSERT
-- Throw away the equality we just deleted.
equalitiesLength = equalitiesLength - 1
-- Throw away the previous equality (it needs to be reevaluated).
equalitiesLength = equalitiesLength - 1
pointer = (equalitiesLength > 0) and equalities[equalitiesLength] or 0
length_changes1, length_changes2 = 0, 0 -- Reset the counters.
lastequality = nil
changes = true
end
end
pointer = pointer + 1
end
if changes then
_diff_cleanupMerge(diffs)
end
_diff_cleanupSemanticLossless(diffs)
end
--[[
* Reduce the number of edits by eliminating operationally trivial equalities.
* @param {Array.>} diffs Array of diff tuples.
--]]
function diff_cleanupEfficiency(diffs)
local changes = false
-- Stack of indices where equalities are found.
local equalities = {}
-- Keeping our own length var is faster.
local equalitiesLength = 0
-- Always equal to diffs[equalities[equalitiesLength]][2]
local lastequality = ''
-- Index of current position.
local pointer = 1
-- The following four are really booleans but are stored as numbers because
-- they are used at one point like this:
--
-- (pre_ins + pre_del + post_ins + post_del) == 3
--
-- ...i.e. checking that 3 of them are true and 1 of them is false.
-- Is there an insertion operation before the last equality.
local pre_ins = 0
-- Is there a deletion operation before the last equality.
local pre_del = 0
-- Is there an insertion operation after the last equality.
local post_ins = 0
-- Is there a deletion operation after the last equality.
local post_del = 0
while diffs[pointer] do
if (diffs[pointer][1] == DIFF_EQUAL) then -- equality found
local diffText = diffs[pointer][2]
if (#diffText < Diff_EditCost) and (post_ins == 1 or post_del == 1) then
-- Candidate found.
equalitiesLength = equalitiesLength + 1
equalities[equalitiesLength] = pointer
pre_ins, pre_del = post_ins, post_del
lastequality = diffText
else
-- Not a candidate, and can never become one.
equalitiesLength = 0
lastequality = ''
end
post_ins, post_del = 0, 0
else -- an insertion or deletion
if (diffs[pointer][1] == DIFF_DELETE) then
post_del = 1
else
post_ins = 1
end
--[[
* Five types to be split:
* ABXYCD
* AXCD
* ABXC
* AXCD
* ABXC
--]]
if (#lastequality > 0) and (
(pre_ins+pre_del+post_ins+post_del == 4)
or
(
(#lastequality < Diff_EditCost/2)
and
(pre_ins+pre_del+post_ins+post_del == 3)
)) then
-- Duplicate record
tinsert(diffs, equalities[equalitiesLength], {DIFF_DELETE,lastequality})
-- Change second copy to insert.
diffs[equalities[equalitiesLength]+1][1] = DIFF_INSERT
-- Throw away the equality we just deleted;
equalitiesLength = equalitiesLength - 1
lastequality = ''
if (pre_ins == 1) and (pre_del == 1) then
-- No changes made which could affect previous entry, keep going.
post_ins, post_del = 1, 1
equalitiesLength = 0
else
-- Throw away the previous equality;
equalitiesLength = equalitiesLength - 1
pointer = (equalitiesLength > 0) and equalities[equalitiesLength] or 0
post_ins, post_del = 0, 0
end
changes = true
end
end
pointer = pointer + 1
end
if changes then
_diff_cleanupMerge(diffs)
end
end
--[[
* Compute the Levenshtein distance; the number of inserted, deleted or
* substituted characters.
* @param {Array.>} diffs Array of diff tuples.
* @return {number} Number of changes.
--]]
function diff_levenshtein(diffs)
local levenshtein = 0
local insertions, deletions = 0, 0
for x, diff in ipairs(diffs) do
local op, data = diff[1], diff[2]
if (op == DIFF_INSERT) then
insertions = insertions + #data
elseif (op == DIFF_DELETE) then
deletions = deletions + #data
else--if (op == DIFF_EQUAL) then
-- A deletion and an insertion is one substitution.
levenshtein = levenshtein + max(insertions, deletions)
insertions = 0
deletions = 0
end
end
levenshtein = levenshtein + max(insertions, deletions)
return levenshtein
end
--[[
* Convert a diff array into a pretty HTML report.
* @param {Array.>} diffs Array of diff tuples.
* @return {string} HTML representation.
--]]
function diff_prettyHtml(diffs)
local html = {}
local i = 0
for x,diff in ipairs(diffs) do
local op = diff[1] -- Operation (insert, delete, equal)
local data = diff[2] -- Text of change.
local text = gsub(data, htmlEncode_pattern, htmlEncode_replace)
if (op == DIFF_INSERT) then
html[x] = ''
.. text .. ''
elseif (op == DIFF_DELETE) then
html[x] = ''
.. text .. ''
else--if (op == DIFF_EQUAL) then
html[x] = '' .. text .. ''
end
if (op ~= DIFF_DELETE) then
i = i + #data
end
end
return tconcat(html)
end
-- ---------------------------------------------------------------------------
-- UNOFFICIAL/PRIVATE DIFF FUNCTIONS
-- ---------------------------------------------------------------------------
--[[
* Find the differences between two texts. Assumes that the texts do not
* have any common prefix or suffix.
* @param {string} text1 Old string to be diffed.
* @param {string} text2 New string to be diffed.
* @param {boolean} checklines Speedup flag. If false, then don't run a
* line-level diff first to identify the changed areas.
* If true, then run a faster, slightly less optimal diff
* @return {Array.>} Array of diff tuples.
* @private
--]]
function _diff_compute(text1, text2, checklines)
if (#text1 == 0) then
-- Just add some text (speedup)
return {{DIFF_INSERT, text2}}
end
if (#text2 == 0) then
-- Just delete some text (speedup)
return {{DIFF_DELETE, text1}}
end
local diffs
local longtext = (#text1 > #text2) and text1 or text2
local shorttext = (#text1 > #text2) and text2 or text1
local i = indexOf(longtext, shorttext)
if (i ~= nil) then
-- Shorter text is inside the longer text (speedup)
diffs = {
{DIFF_INSERT, strsub(longtext,1,i-1)},
{DIFF_EQUAL, shorttext},
{DIFF_INSERT, strsub(longtext,i + #shorttext)}
}
-- Swap insertions for deletions if diff is reversed.
if (#text1 > #text2) then
diffs[1][1], diffs[3][1] = DIFF_DELETE, DIFF_DELETE
end
return diffs
end
longtext, shorttext = nil, nil -- Garbage collect
-- Check to see if the problem can be split in two.
do
local
text1_a, text1_b,
text2_a, text2_b,
mid_common = _diff_halfMatch(text1, text2)
if text1_a then
-- A half-match was found, sort out the return data.
-- Send both pairs off for separate processing.
local diffs_a = diff_main(text1_a, text2_a, checklines)
local diffs_b = diff_main(text1_b, text2_b, checklines)
-- Merge the results.
local diffs_a_len = #diffs_a
diffs = diffs_a
diffs[diffs_a_len+1] = {DIFF_EQUAL, mid_common}
for i,b_diff in ipairs(diffs_b) do
diffs[diffs_a_len+1+i] = b_diff
end
return diffs
end
end
-- Perform a real diff.
if checklines and (#text1 < 100 or #text2 < 100) then
-- Too trivial for the overhead.
checklines = false
end
local linearray
if checklines then
-- Scan the text on a line-by-line basis first.
text1, text2, linearray = _diff_toLines(text1, text2)
end
diffs = _diff_map(text1, text2)
if (diffs == nil) then
-- No acceptable result.
diffs = {{DIFF_DELETE, text1}, {DIFF_INSERT, text2}}
end
if checklines then
-- Convert the diff back to original text.
_diff_fromLines(diffs, linearray)
-- Eliminate freak matches (e.g. blank lines)
diff_cleanupSemantic(diffs)
-- Rediff any replacement blocks, this time character-by-character.
-- Add a dummy entry at the end.
diffs[#diffs+1] = {DIFF_EQUAL, ''}
local pointer = 1
local count_delete = 0
local count_insert = 0
local text_delete = ''
local text_insert = ''
while (pointer <= #diffs) do
local diff_type = diffs[pointer][1]
if (diff_type == DIFF_INSERT) then
count_insert = count_insert + 1
text_insert = text_insert .. diffs[pointer][2]
elseif (diff_type == DIFF_DELETE) then
count_delete = count_delete + 1
text_delete = text_delete .. diffs[pointer][2]
else--if (diff_type == DIFF_EQUAL) then
-- Upon reaching an equality, check for prior redundancies.
if (count_delete >= 1) and (count_insert >= 1) then
-- Delete the offending records and add the merged ones.
local a = diff_main(text_delete, text_insert, false)
pointer = pointer - count_delete - count_insert
for i = 1, count_delete + count_insert do
tremove(diffs, pointer)
end
for j = #a, 1, -1 do
tinsert(diffs, pointer, a[j])
end
pointer = pointer + #a
end
count_insert = 0
count_delete = 0
text_delete = ''
text_insert = ''
end
pointer = pointer + 1
end
diffs[#diffs] = nil -- Remove the dummy entry at the end.
end
return diffs
end
function _diff_toLines(text1, text2)
local lineArray = {} -- e.g. lineArray[4] == 'Hello\n'
local lineHash = {} -- e.g. lineHash['Hello\n'] == 4
--[[
* Split a text into an array of strings. Reduce the texts to a string of
* hashes where each Unicode character represents one line.
* Modifies linearray and linehash through being a closure.
* @param {string} text String to encode
* @return {string} Encoded string
* @private
--]]
local _diff_toLinesMunge = function(text)
local lines = {}
-- Walk the text, pulling out a substring for each line.
-- text.split('\n') would would temporarily double our memory footprint.
-- Modifying text would create many large strings to garbage collect.
local lineStart = 1
local lineEnd = 0
-- Keeping our own length variable is faster than looking it up.
local lineArrayLength = #lineArray
while (lineEnd < #text) do
lineEnd = indexOf(text, '\n', lineStart) or #text
local line = strsub(text, lineStart, lineEnd)-- +1)
lineStart = lineEnd + 1
local hash = lineHash[line]
if hash then
lines[#lines+1] = hash
else
lineArrayLength = lineArrayLength + 1
lines[#lines+1] = lineArrayLength
lineHash[line] = lineArrayLength
lineArray[lineArrayLength] = line
end
end
return lines
end
local lines1 = _diff_toLinesMunge(text1)
local lines2 = _diff_toLinesMunge(text2)
return lines1, lines2, lineArray
end
function _diff_fromLines(diffs, lineArray)
for x,diff in ipairs(diffs) do
local lines = diff[2]
local text = {}
for idx,line in ipairs(lines) do
text[idx] = lineArray[line]
end
diff[2] = tconcat(text)
end
end
--[[
* Explore the intersection points between the two texts.
* @param {string} text1 Old string to be diffed.
* @param {string} text2 New string to be diffed.
* @return {Array.>?} Array of diff tuples or nil if no
* diff available.
* @private
--]]
function _diff_map(text1, text2)
-- Don't run for too long.
local ms_end = clock() + Diff_Timeout
-- Cache the text lengths to prevent multiple calls.
local text1_length = #text1
local text2_length = #text2
local _sub, _element
if type(text1) == 'table' then
_sub, _element = tsub, telement
else
_sub, _element = strsub, strelement
end
local max_d = text1_length + text2_length - 1
local doubleEnd = (Diff_DualThreshold*2 < max_d)
local v_map1 = {}
local v_map2 = {}
local v1 = {}
local v2 = {}
v1[1] = 1
v2[1] = 1
local x, y
local footstep -- Used to track overlapping paths.
local footsteps = {}
local done = false
-- If the total number of characters is odd, then
-- the front path will collide with the reverse path.
local front = (((text1_length+text2_length) % 2) == 1)
for d = 0, max_d-1 do
-- Bail out if timeout reached.
if (Diff_Timeout > 0) and (clock() > ms_end) then
return nil
end
-- Walk the front path one step.
v_map1[d+1] = {}
for k = -d, d, 2 do
if (k == -d) or ((k ~= d) and (v1[k-1] < v1[k+1])) then
x = v1[k+1]
else
x = v1[k-1] + 1
end
y = x - k
if doubleEnd then
footstep = x..','..y
if front and footsteps[footstep] then
done = true
end
if not front then
footsteps[footstep] = d
end
end
while (not done) and (x <= text1_length) and (y <= text2_length)
and (_element(text1,x) == _element(text2,y)) do
x = x + 1
y = y + 1
if doubleEnd then
footstep = x..','..y
if front and footsteps[footstep] then
done = true
end
if not front then
footsteps[footstep] = d
end
end
end
v1[k] = x
v_map1[d+1][x..','..y] = true
if (x == text1_length+1) and (y == text2_length+1) then
-- Reached the end in single-path mode.
return _diff_path1(v_map1, text1, text2)
elseif done then
-- Front path ran over reverse path.
for i = footsteps[footstep] + 2, #v_map2 do
v_map2[i] = nil
end
local a = _diff_path1(v_map1,
_sub(text1,1,x-1),
_sub(text2,1,y-1)
)
local a_len = #a
local a2 = _diff_path2(v_map2,
_sub(text1,x),
_sub(text2,y)
)
for i,v in ipairs(a2) do
a[a_len+i] = v
end
return a
end
end
if doubleEnd then
-- Walk the reverse path one step.
v_map2[d+1] = {}
for k = -d, d, 2 do
if (k == -d) or ((k ~= d) and (v2[k-1] < v2[k+1])) then
x = v2[k+1]
else
x = v2[k-1] + 1
end
y = x - k
footstep
= (text1_length + 2 - x)..','..(text2_length + 2 - y)
if (not front) and footsteps[footstep] then
done = true
end
if front then
footsteps[footstep] = d
end
while (not done) and (x <= text1_length) and (y <= text2_length)
and (_element(text1,-x) == _element(text2,-y)) do
x, y = x+1, y+1
footstep = (text1_length + 2 - x)..','..(text2_length + 2 - y)
if (not front) and footsteps[footstep] then
done = true
end
if front then
footsteps[footstep] = d
end
end
v2[k] = x
v_map2[d+1][x..','..y] = true
if done then
-- Reverse path ran over front path.
for i = footsteps[footstep]+2, #v_map1 do
v_map1[i] = nil
end
local a = _diff_path1(v_map1, _sub(text1, 1, -x), _sub(text2, 1, -y))
-- LUANOTE: #text1-x+2 instead of -x+1 because if x is -1,
-- you get the whole string instead of the empty string
local appending = _diff_path2(v_map2,
_sub(text1, #text1-x+2), _sub(text2, #text2-y+2))
local a_len = #a
for i,v in ipairs(appending) do
a[a_len+i] = v
end
return a
end
end
end
end
-- Number of diffs equals number of characters, no commonality at all.
return nil
end
--[[
* Work from the middle back to the start to determine the path.
* @param {Array.