Exploring Diff Algorithms: Edit Distance, Longest Common Subsequence, and Myers Algorithm with TypeScript Implementations
This article examines the principles behind diff tools by implementing three approaches—Levenshtein edit distance, longest common subsequence, and the Myers algorithm—in TypeScript, detailing dynamic-programming solutions, back-tracing techniques, and visualizations to illustrate how strings can be transformed efficiently.
Recently I used an online json diff tool and became curious about how such diff functionality is implemented, so I did a small investigation.
The runnable code for the three implementations is provided below.
Shortest Edit Distance
Character‑by‑character comparison and multi‑line text comparison differ only in the granularity of the smallest unit, so we start with character comparison.
I first thought of the shortest edit distance algorithm. Although the textbook solution does not directly give the transformation steps, the computed distance implies the process, so we can retrieve it.
The classic solution uses dynamic programming; the core is a matrix storing edit distances. Usually a one‑dimensional array suffices, but to record the exact operations we use a two‑dimensional array.
We first write code following the standard approach.
function
minDistance
(
word1:
string
, word2:
string
) {
const
m = word1.length
const
n = word2.length
const
dp =
Array
.from({ length: m +
1
},
()
=>
Array
.from({ length: n +
1
},
()
=>
0
))
for
(
let
i =
1
; i <= m; i++) {
dp[i][
0
] = i
}
for
(
let
i =
1
; i <= n; i++) {
dp[
0
][i] = i
}
for
(
let
i =
1
; i <= m; i++) {
for
(
let
j =
1
; j <= n; j++) {
if
(word1[i -
1
] === word2[j -
1
]) {
dp[i][j] = dp[i -
1
][j -
1
]
}
else
{
dp[i][j] =
Math
.min(dp[i -
1
][j], dp[i][j -
1
], dp[i -
1
][j -
1
]) +
1
}
}
}
console
.log(dp)
}For the call minDistance('horse', 'ros') , the dp matrix is formatted as:
[
[0, 1, 2, 3]
[1, 1, 2, 3]
[2, 2, 1, 2]
[3, 2, 2, 2]
[4, 3, 3, 2]
[5, 4, 4, 3]
]The value dp[5][3] is the shortest edit distance; the other entries represent intermediate states. By back‑tracking this matrix we can reconstruct the exact sequence of edit operations.
Transforming one string into another requires three primitive operations: delete, insert, and keep unchanged. The algorithm does not have an explicit “keep” operation, but a “replace” can be expressed as a delete followed by an insert.
For example, dp[1][1] corresponds to changing the character h into r , which is a replace operation because neither a pure insert nor a pure delete matches the surrounding values.
Iterating through dp and applying the above logic yields the full edit script.
enum EMOpType {
/** Append */
Append,
/** Delete */
Delete,
/** Hold */
Hold
}
function minDistance(word1: string, word2: string) {
// ...
console.log(dp)
const ops: { type: EMOpType; index: number; otherIndex?: number }[] = []
let i = m
let j = n
while (i >= 0 || j >= 0) {
if (j && dp[i][j] === dp[i][j - 1] + 1) {
ops.push({ type: EMOpType.Append, index: j - 1 })
j--
} else if (i && dp[i][j] === dp[i - 1][j] + 1) {
ops.push({ type: EMOpType.Delete, index: i - 1 })
i--
} else if (i && j && dp[i][j] === dp[i - 1][j - 1] + 1) {
ops.push({ type: EMOpType.Delete, index: i - 1 })
ops.push({ type: EMOpType.Append, index: j - 1 })
i--
j--
} else if (i && j && dp[i][j] === dp[i - 1][j - 1]) {
ops.push({ type: EMOpType.Hold, index: i - 1 })
i--
j--
} else {
i--
j--
}
}
}Visualization (green = insertion, red = deletion, black = unchanged) illustrates the process.
Longest Common Subsequence
The longest common subsequence (LCS) can also be used to extract a concrete transformation process.
First we present the textbook solution, again using a two‑dimensional array.
function longestCommonSubsequence(word1: string, word2: string) {
const len1 = word1.length
const len2 = word2.length
const dp = Array.from({ length: len1 + 1 }, () => Array.from({ length: len2 + 1 }, () => 0))
for (let i = 1; i <= len1; i++) {
for (let j = 1; j <= len2; j++) {
if (word1[i - 1] === word2[j - 1]) {
dp[i][j] = dp[i - 1][j - 1] + 1
} else {
dp[i][j] = Math.max(dp[i - 1][j], dp[i][j - 1])
}
}
}
console.log(dp)
}Converting a string by deleting everything and inserting the target is wasteful; by keeping the LCS unchanged we reduce the number of operations.
For the call longestCommonSubsequence('horse', 'ros') , the dp matrix is:
[0, 0, 0, 0]
[0, 0, 0, 0]
[0, 0, 1, 1]
[0, 1, 1, 1]
[0, 1, 1, 2]
[0, 1, 1, 2]Back‑tracking from dp[len1][len2] (value 2) reveals the sequence of moves, distinguishing whether the current cell came from the diagonal, left, or top neighbor.
The analysis shows that the final step originates from the top neighbor, indicating a deletion operation that removes the character e from horse .
type TBackTrace = { type: EOpType; index: number; str: string }
function backTraceLSC(dp: number[][], word1: string, word2: string) {
const ses: TBackTrace[] = []
let dpXIndex = dp.length - 1
let dpYIndex = dp[0].length - 1
let top = -1
let left = -1
let topLeft = -1
let current = -1
while (dpXIndex !== 0 || dpYIndex !== 0) {
if (dpXIndex === 0) {
ses.push({ type: EOpType.InsertBefore, index: dpYIndex - 1, str: word2[dpYIndex - 1] })
dpYIndex--
continue
}
if (dpYIndex === 0) {
ses.push({ type: EOpType.Delete, index: dpXIndex - 1, str: word1[dpXIndex - 1] })
dpXIndex--
continue
}
top = dp[dpXIndex - 1][dpYIndex]
left = dp[dpXIndex][dpYIndex - 1]
topLeft = dp[dpXIndex - 1][dpYIndex - 1]
current = dp[dpXIndex][dpYIndex]
if (top === left && top === topLeft && current - 1 === topLeft) {
ses.push({ type: EOpType.Hold, index: dpXIndex - 1, str: word1[dpXIndex - 1] })
dpXIndex--
dpYIndex--
continue
}
if (left == current) {
ses.push({ type: EOpType.InsertBefore, index: dpYIndex, str: word2[dpYIndex - 1] })
dpYIndex--
continue
}
if (top === current) {
ses.push({ type: EOpType.Delete, index: dpXIndex - 1, str: word1[dpXIndex - 1] })
dpXIndex--
continue
}
}
return ses.reverse()
}Visualization again highlights insertions in green, deletions in red, and unchanged characters in black.
Myers Algorithm
Although the previous two methods can produce an edit script, they are not optimal in time or space. The Myers algorithm offers better complexity and yields the shortest edit path with an intuitive diff.
Below are TypeScript implementations of the Myers algorithm, adapted from Go versions.
enum Operation {
INSERT = 1,
DELETE = 2,
MOVE = 3
}
function reverse(s: Operation[]) {
const result: Operation[] = []
const len = s.length
for (let index = 0; index < s.length; index++) {
result[len - 1 - index] = s[index]
}
return result
}
function shortestEditScript(src: string, dst: string) {
const n = src.length
const m = dst.length
const max = n + m
const trace: Record
[] = []
let x: number
let y: number
let isDone = false
for (let d = 0; d <= max; d++) {
const v: Record
= {}
trace.push(v)
if (d === 0) {
let t = 0
while (n > t && m > t && src[t] === dst[t]) { t++ }
v[0] = t
if (t === n && t === m) { break }
continue
}
const lastV = trace[d - 1]
for (let k = -d; k <= d; k += 2) {
if (k === -d || (k !== d && (lastV?.[k - 1] || 0) < (lastV?.[k + 1] || 0))) {
x = lastV?.[k + 1] || 0
} else {
x = (lastV?.[k - 1] || 0) + 1
}
y = x - k
while (x < n && y < m && src[x] === dst[y]) { x++; y++ }
v[k] = x
if (x === n && y === m) { isDone = true; break }
}
if (isDone) break
}
const script: Operation[] = []
x = n; y = m
let k: number
let prevK: number
let prevX: number
let prevY: number
for (let d = trace.length - 1; d > 0; d--) {
k = x - y
const lastV = trace[d - 1]
if (k === -d || (k !== -d && lastV[k - 1] < lastV[k + 1])) { prevK = k + 1 } else { prevK = k - 1 }
prevX = lastV[prevK]
prevY = prevX - prevK
while (x > prevX && y > prevY) { script.push(Operation.MOVE); x--; y-- }
if (x === prevX) { script.push(Operation.INSERT) } else { script.push(Operation.DELETE) }
x = prevX; y = prevY
}
if (trace[0][0] !== 0) { for (let i = 0; i < trace[0][0]; i++) { script.push(Operation.MOVE) } }
return reverse(script)
}Visualization (green = insertion, red = deletion, black = unchanged) demonstrates the algorithm’s output.
Conclusion
This brief exploration shows the fundamentals of diff algorithms; applying them in production requires substantial work, and many editors such as monaco‑editor or CodeMirror already provide robust diff capabilities.
Rare Earth Juejin Tech Community
Juejin, a tech community that helps developers grow.
How this landed with the community
Was this worth your time?
0 Comments
Thoughtful readers leave field notes, pushback, and hard-won operational detail here.