Commit dc2d28f - fangorn/ex_git_objectstore


      fangorn/ex_git_objectstore

public

ref:dc2d28f203e0402512ba03c409d58b4ee4651b30

fix(diff): linear-space Myers (Myers 1986 §4b)

Replaces the previous hand-rolled Myers, which kept every V-table from every `d` iteration alive in a list (O(D²) memory) and used a `Map` for V (O(log n) per access). On large inputs the BEAM allocator climbed into the GBs and got cgroup-killed under load — verified against chiron PR #68 (1700× peak memory reduction in this commit alone). This commit is the actual linear-space variant from Myers' 1986 paper §4b: divide-and-conquer at the middle snake. Memory becomes O(N+M) total — each bisect call holds two V tables for its lifetime, then GCs them before recursing. Recursion depth is O(log(N+M)). Translation reference: Google diff_match_patch's `diff_bisect`, itself a faithful port of Myers §4b, cross-checked against git xdiff's `xdl_split` (https://github.com/git/git/blob/master/xdiff/xdiffi.c). Verified: - All 11 Myers unit tests pass byte-identical to before. - Stress test (10k-line × ~33%-diff input) peaks at ~9 MB process heap vs unbounded growth in old / stdlib variants. The 200 MB stress-test bound is now real. - Full ex_git_objectstore suite (903 tests) green. Critical correctness rules from the paper that earlier attempts got wrong: - Δ = N - M parity drives WHICH sweep checks overlap (front when Δ odd, reverse when Δ even). Doing both is wrong. - Reverse-frame ↔ forward-frame mapping: `k_other = delta - k_self` (minus, not plus). - When bisect runs out of d iterations without finding overlap (tiny inputs, no commonality), fall back to splitting at the top-right corner so both halves are STRICTLY smaller — otherwise the recursion can re-call itself on the same range. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

SHA: dc2d28f203e0402512ba03c409d58b4ee4651b30

Author: Cole Christensen <cole.christensen@macmillan.com>

Date: 2026-04-29 04:57

Parents: dbd335e

2 files changed +586 -99

Type

Type
	lib/ex_git_objectstore/diff/myers.ex	+516 −99
@@ -14,9 +14,34 @@ defmodule ExGitObjectstore.Diff.Myers do @moduledoc """ Myers diff algorithm for computing the shortest edit script between two sequences. Linear-space Myers diff (Myers 1986 §4b — "An O(ND) Difference Algorithm and Its Variations", Algorithmica 1(2):251-266). Divide-and-conquer at the middle snake: find the split point (x, y) where the optimal edit script crosses the middle of the edit graph, then recurse on a[0..x) vs b[0..y) and a[x..n) vs b[y..m). The snake's equalities fall out of the recursion naturally because they appear in both halves. Memory: O(N+M) total. Each recursion frame holds two V tables of size ~max_d (= (n+m+1)/2 in that frame) for the duration of one bisect; once bisect returns, the V tables are GC'd before recursing. Recursion depth is O(log(N+M)) on average. This module is a careful Elixir translation of Google diff_match_patch's `diff_bisect` (https://github.com/google/diff-match-patch), which is itself a faithful port of Myers §4b. The C reference is git xdiff's `xdl_split` / `xdl_recs_cmp` (https://github.com/git/git/blob/master/xdiff/xdiffi.c). ## Why we needed this Uses the standard forward-only approach with trace recording and backtracking. The previous hand-rolled implementation kept every V-table from every `d` iteration in a list (O(D²) memory) and used a `Map` for V (O(log n) per access). On large inputs the BEAM allocator climbed into the GBs and cgroup-killed under load. Stdlib's `List.myers_difference/2` is also naive Myers (one path-with-suffix per diagonal kept alive simultaneously — O(D × max(D, N+M)) memory) and exhibits the same problem. This module is the proper linear-space variant, the same algorithm git uses by default in its `xdiff` library. """ @type edit :: {:eq, term()} \| {:ins, term()} \| {:del, term()} @@ -27,11 +52,12 @@ """ @spec diff(list(), list()) :: [edit()] def diff(a, b) when is_list(a) and is_list(b) do n = length(a) m = length(b) a_vec = List.to_tuple(a) b_vec = List.to_tuple(b) n = tuple_size(a_vec) m = tuple_size(b_vec) diff_range(a_vec, b_vec, 0, n, 0, m) ses(a_vec, b_vec, n, m) end @doc """ @@ -39,138 +65,529 @@ """ @spec diff_lines(String.t(), String.t()) :: [edit()] def diff_lines(text_a, text_b) do lines_a = String.split(text_a, "\n", trim: false) lines_b = String.split(text_b, "\n", trim: false) diff(String.split(text_a, "\n", trim: false), String.split(text_b, "\n", trim: false)) diff(lines_a, lines_b) end # ── Recursive driver ────────────────────────────────────────────────── # defp ses(_a, _b, 0, 0), do: [] # Compute edits for a[a_lo..a_hi) vs b[b_lo..b_hi). Strips common # prefix/suffix first (huge speedup on typical code diffs), then either # bottoms out or bisects and recurses on the two halves. defp diff_range(a, b, a_lo, a_hi, b_lo, b_hi) do {prefix_eqs, a_lo, b_lo} = strip_prefix(a, b, a_lo, a_hi, b_lo, b_hi, []) {suffix_eqs, a_hi, b_hi} = strip_suffix(a, b, a_lo, a_hi, b_lo, b_hi, []) defp ses(a, b, n, m) do max = n + m v0 = %{1 => 0} # Forward pass: build trace of V snapshots # trace stores V state AFTER processing each d value # We also need v0 (the initial state before d=0) case find_d(a, b, n, m, max, 0, v0, [v0]) do {:found, trace} -> backtrack(trace, a, b, n, m) middle = cond do a_lo == a_hi and b_lo == b_hi -> [] :not_found -> [] end end a_lo == a_hi -> for i <- b_lo..(b_hi - 1)//1, do: {:ins, elem(b, i)} defp find_d(_a, _b, _n, _m, max, d, _v, _trace) when d > max, do: :not_found b_lo == b_hi -> for i <- a_lo..(a_hi - 1)//1, do: {:del, elem(a, i)} true -> {x, y} = bisect(a, b, a_lo, a_hi, b_lo, b_hi) defp find_d(a, b, n, m, max, d, v, trace) do v_new = process_diag(a, b, n, m, d, -d, v) left = diff_range(a, b, a_lo, x, b_lo, y) right = diff_range(a, b, x, a_hi, y, b_hi) left ++ right end prefix_eqs ++ middle ++ suffix_eqs end_x = Map.get(v_new, n - m, -1) end if end_x >= n do # trace already has [v0, ...] accumulated in reverse # Add v_new and reverse to get [v0, v_after_d0, v_after_d1, ..., v_after_dD] {:found, Enum.reverse([v_new \| trace])} defp strip_prefix(a, b, a_lo, a_hi, b_lo, b_hi, acc) when a_lo < a_hi and b_lo < b_hi do if elem(a, a_lo) == elem(b, b_lo) do strip_prefix(a, b, a_lo + 1, a_hi, b_lo + 1, b_hi, [{:eq, elem(a, a_lo)} \| acc]) else find_d(a, b, n, m, max, d + 1, v_new, [v_new \| trace]) {Enum.reverse(acc), a_lo, b_lo} end end defp process_diag(_a, _b, _n, _m, d, k, v) when k > d, do: v defp strip_prefix(_a, _b, a_lo, _a_hi, b_lo, _b_hi, acc), do: {Enum.reverse(acc), a_lo, b_lo} defp strip_suffix(a, b, a_lo, a_hi, b_lo, b_hi, acc) when a_lo < a_hi and b_lo < b_hi do if elem(a, a_hi - 1) == elem(b, b_hi - 1) do defp process_diag(a, b, n, m, d, k, v) do x = if k == -d or (k != d and Map.get(v, k - 1, 0) < Map.get(v, k + 1, 0)) do Map.get(v, k + 1, 0) else Map.get(v, k - 1, 0) + 1 end y = x - k strip_suffix(a, b, a_lo, a_hi - 1, b_lo, b_hi - 1, [{:eq, elem(a, a_hi - 1)} \| acc]) {x, _y} = snake(a, b, n, m, x, y) v = Map.put(v, k, x) process_diag(a, b, n, m, d, k + 2, v) end defp snake(a, b, n, m, x, y) when x < n and y < m do if elem(a, x) == elem(b, y) do snake(a, b, n, m, x + 1, y + 1) else {acc, a_hi, b_hi} {x, y} end end defp strip_suffix(_a, _b, _a_lo, a_hi, _b_lo, b_hi, acc), do: {acc, a_hi, b_hi} defp snake(_a, _b, _n, _m, x, y), do: {x, y} # Backtrack through V snapshots to reconstruct edit script. # ── Bisect — find the middle-snake split point ──────────────────────── # # trace = [v_initial, v_after_d0, v_after_d1, ..., v_after_dD] # Translation of diff_match_patch's diff_bisect (Python). Returns # `{x, y}` in ABSOLUTE coordinates (within the original a/b) such that # the optimal edit script from `a[a_lo..a_hi)` to `b[b_lo..b_hi)` passes # through (x, y) in the middle of the edit graph. # trace has D+2 elements. trace[0] = initial V = %{1 => 0}. # trace[d+1] = V after processing d. # # For backtracking at step d (1-indexed edit step): # - The end point at this step is determined by trace[d+1] at diagonal k # - The previous state is trace[d] (which is V after processing d-1) # # We start at (n, m) and work backwards d = D, D-1, ..., 1 defp backtrack(trace, a, b, n, m) do trace_arr = List.to_tuple(trace) # V tables are stored as Maps keyed by k_offset = v_offset + k. Sentinel # `-1` means "not yet reached" (distinct from "reached at x=0"). Map is # used for clarity; could be swapped for `:atomics` for speed once # correctness is established. defp bisect(a, b, a_lo, a_hi, b_lo, b_hi) do n = a_hi - a_lo m = b_hi - b_lo max_d = div(n + m + 1, 2) v_offset = max_d # Init: V_f[1] = 0 means "forward path on diagonal 1 is at x=0 (start)". # Same for V_r. All other entries default to -1 (not reached). v1 = %{(v_offset + 1) => 0} v2 = %{(v_offset + 1) => 0} # D = number of edits = tuple_size - 2 (since we have D+2 elements) d_max = tuple_size(trace_arr) - 2 do_backtrack(trace_arr, a, b, d_max, n, m, []) delta = n - m front? = rem(delta, 2) != 0 bisect_d(a, b, a_lo, b_lo, n, m, max_d, v_offset, delta, front?, v1, v2, 0, 0, 0, 0, 0) end # bisect_d/17: search depth d. For each d, walk the forward path one step # then the reverse path one step. Check overlap on the appropriate side # (forward when delta is odd, reverse when delta is even). defp bisect_d( _a, _b, a_lo, b_lo, n, _m, max_d, _v_off, _delta, _front?, _v1, _v2, d, _k1s, _k1e, _k2s, _k2e ) when d >= max_d do # No middle snake found within max_d iterations — happens when D >= 2 but # max_d is too small (e.g. tiny inputs like n=m=1 with no match) or no # commonality at all. Fall back to "split at the top-right corner": left # half becomes (full a, empty b) → all deletes, right half becomes # Base case: d=0, just emit the initial snake from (0,0) to (x, y) defp do_backtrack(_trace, a, _b, 0, x, _y, edits) do # (empty a, full b) → all inserts. Both halves are STRICTLY smaller, so # the recursion terminates. {a_lo + n, b_lo} diag(a, 0, x) ++ edits end defp bisect_d( a, b, a_lo, b_lo, n, m, max_d, v_off, delta, front?, v1, v2, d, k1s, k1e, k2s, k2e ) do case forward_sweep( a, b, a_lo, b_lo, n, m, v_off, delta, front?, v1, v2, d, -d + k1s, k1s, k1e ) do {:found, x, y} -> {a_lo + x, b_lo + y} {:cont, v1_new, k1s_new, k1e_new} -> case reverse_sweep( a, b, a_lo, b_lo, n, m, v_off, delta, front?, v1_new, v2, d, -d + k2s, k2s, k2e ) do {:found, x, y} -> {a_lo + x, b_lo + y} {:cont, v2_new, k2s_new, k2e_new} -> bisect_d( a, b, a_lo, b_lo, n, m, max_d, v_off, delta, front?, v1_new, v2_new, d + 1, k1s_new, k1e_new, k2s_new, k2e_new ) end end end # Forward sweep: walk diagonals k = -d+k1s, -d+k1s+2, ..., d-k1e. # Returns {:found, x, y} on overlap (when delta is odd), else # {:cont, v1, k1s, k1e} with possibly-adjusted bounds. defp forward_sweep( _a, _b, _a_lo, _b_lo, _n, _m, _v_off, _delta, _front?, v1, _v2, d, k1, k1s, k1e ) when k1 > d - k1e do {:cont, v1, k1s, k1e} end defp forward_sweep(a, b, a_lo, b_lo, n, m, v_off, delta, front?, v1, v2, d, k1, k1s, k1e) do k1_off = v_off + k1 # Pick predecessor — down (k+1) preferred over right (k-1) when tied. x1 = cond do k1 == -d -> Map.get(v1, k1_off + 1, -1) \|> max(0) k1 == d -> defp do_backtrack(trace, a, b, d, x, y, edits) do k = x - y # V state from previous round (after processing d-1) v_prev = elem(trace, d) Map.get(v1, k1_off - 1, -1) + 1 # Determine which direction we took at step d prev_k = if k == -d or (k != d and Map.get(v_prev, k - 1, 0) < Map.get(v_prev, k + 1, 0)) do k + 1 else k - 1 Map.get(v1, k1_off - 1, -1) < Map.get(v1, k1_off + 1, -1) -> Map.get(v1, k1_off + 1, -1) true -> Map.get(v1, k1_off - 1, -1) + 1 end y1 = x1 - k1 {x1, y1} = snake_forward(a, b, a_lo, b_lo, n, m, x1, y1) v1 = Map.put(v1, k1_off, x1) cond do x1 > n -> # Ran off the right edge — stop extending this side, shrink k range. forward_sweep( a, b, a_lo, b_lo, n, m, v_off, delta, front?, v1, v2, d, k1 + 2, k1s, k1e + 2 ) y1 > m -> # Ran off the bottom — shrink the OTHER end of the k range. forward_sweep( a, b, a_lo, b_lo, n, m, v_off, delta, front?, v1, v2, d, k1 + 2, k1s + 2, k1e ) front? -> # Overlap check on forward sweep (delta is odd). k2_off = v_off + delta - k1 if k2_off >= 0 and k2_off < 2 * v_off + 1 do v2_x = Map.get(v2, k2_off, -1) if v2_x != -1 do # Reverse coordinate → forward: x2_forward = n - v2_x x2_fwd = n - v2_x if x1 >= x2_fwd do {:found, x1, y1} else forward_sweep( a, b, a_lo, b_lo, n, m, v_off, delta, front?, v1, v2, d, k1 + 2, k1s, k1e ) end else forward_sweep( a, b, a_lo, b_lo, n, m, v_off, delta, front?, v1, v2, d, k1 + 2, k1s, k1e ) end else forward_sweep(a, b, a_lo, b_lo, n, m, v_off, delta, front?, v1, v2, d, k1 + 2, k1s, k1e) end true -> forward_sweep(a, b, a_lo, b_lo, n, m, v_off, delta, front?, v1, v2, d, k1 + 2, k1s, k1e) end end # Reverse sweep: same shape, mirrored. Snake compares from the END going # backward. defp reverse_sweep( _a, _b, _a_lo, _b_lo, _n, _m, _v_off, _delta, _front?, _v1, v2, d, k2, k2s, k2e ) when k2 > d - k2e do {:cont, v2, k2s, k2e} end defp reverse_sweep(a, b, a_lo, b_lo, n, m, v_off, delta, front?, v1, v2, d, k2, k2s, k2e) do k2_off = v_off + k2 x2 = cond do k2 == -d -> Map.get(v2, k2_off + 1, -1) \|> max(0) k2 == d -> Map.get(v2, k2_off - 1, -1) + 1 # End point of previous round on diagonal prev_k prev_x = Map.get(v_prev, prev_k, 0) prev_y = prev_x - prev_k # Mid point after the edit (before snake) {mid_x, edit} = if prev_k > k do # Insert (moved down: y+1, x stays) {prev_x, {:ins, elem(b, prev_y)}} else # Delete (moved right: x+1, y stays) {prev_x + 1, {:del, elem(a, prev_x)}} Map.get(v2, k2_off - 1, -1) < Map.get(v2, k2_off + 1, -1) -> Map.get(v2, k2_off + 1, -1) true -> Map.get(v2, k2_off - 1, -1) + 1 end y2 = x2 - k2 {x2, y2} = snake_reverse(a, b, a_lo, b_lo, n, m, x2, y2) v2 = Map.put(v2, k2_off, x2) cond do x2 > n -> reverse_sweep( a, b, a_lo, b_lo, n, m, v_off, delta, front?, v1, v2, d, k2 + 2, k2s, k2e + 2 ) # Snake from mid to (x, y) — these are all :eq edits = diag(a, mid_x, x) ++ edits y2 > m -> reverse_sweep( a, b, a_lo, b_lo, n, m, v_off, delta, front?, v1, v2, d, k2 + 2, k2s + 2, k2e ) not front? -> # Overlap check on reverse sweep (delta is even). k1_off = v_off + delta - k2 if k1_off >= 0 and k1_off < 2 * v_off + 1 do v1_x = Map.get(v1, k1_off, -1) if v1_x != -1 do x1_fwd = v1_x x2_fwd = n - x2 if x1_fwd >= x2_fwd do # Use forward coordinates of the meeting point. y1_fwd = x1_fwd - (k1_off - v_off) {:found, x1_fwd, y1_fwd} else reverse_sweep( # Prepend the edit edits = [edit \| edits] a, b, a_lo, b_lo, n, m, v_off, delta, front?, v1, v2, d, k2 + 2, k2s, k2e ) end else reverse_sweep( a, b, a_lo, b_lo, n, m, v_off, delta, front?, v1, v2, d, k2 + 2, k2s, k2e ) end else reverse_sweep(a, b, a_lo, b_lo, n, m, v_off, delta, front?, v1, v2, d, k2 + 2, k2s, k2e) end do_backtrack(trace, a, b, d - 1, prev_x, prev_y, edits) true -> reverse_sweep(a, b, a_lo, b_lo, n, m, v_off, delta, front?, v1, v2, d, k2 + 2, k2s, k2e) end end # ── Snakes ──────────────────────────────────────────────────────────── # # snake_forward: extend (x, y) forward (in local coordinates within the defp diag(a, x_start, x_end) do diag_acc(a, x_start, x_end, []) # bisect range) as long as a[a_lo + x] == b[b_lo + y]. defp snake_forward(a, b, a_lo, b_lo, n, m, x, y) when x < n and y < m do if elem(a, a_lo + x) == elem(b, b_lo + y) do snake_forward(a, b, a_lo, b_lo, n, m, x + 1, y + 1) else {x, y} end end defp snake_forward(_a, _b, _a_lo, _b_lo, _n, _m, x, y), do: {x, y} defp diag_acc(_a, x, x, acc), do: Enum.reverse(acc) # snake_reverse: extend (x, y) backward — local x, y are STEPS BACK from # the (n, m) corner. Compare a[a_lo + n - 1 - x] vs b[b_lo + m - 1 - y]. defp snake_reverse(a, b, a_lo, b_lo, n, m, x, y) when x < n and y < m do ai = a_lo + n - 1 - x bi = b_lo + m - 1 - y if elem(a, ai) == elem(b, bi) do snake_reverse(a, b, a_lo, b_lo, n, m, x + 1, y + 1) else {x, y} end defp diag_acc(a, x_start, x_end, acc) when x_start < x_end do diag_acc(a, x_start + 1, x_end, [{:eq, elem(a, x_start)} \| acc]) end defp snake_reverse(_a, _b, _a_lo, _b_lo, _n, _m, x, y), do: {x, y} end

lib/ex_git_objectstore/diff/myers.ex

+516 −99

@@ -14,9 +14,34 @@
defmodule ExGitObjectstore.Diff.Myers do
  @moduledoc """
  Myers diff algorithm for computing the shortest edit script between two sequences.
  Linear-space Myers diff (Myers 1986 §4b — "An O(ND) Difference Algorithm
  and Its Variations", Algorithmica 1(2):251-266).
  Divide-and-conquer at the middle snake: find the split point (x, y) where
  the optimal edit script crosses the middle of the edit graph, then recurse
  on a[0..x) vs b[0..y) and a[x..n) vs b[y..m). The snake's equalities fall
  out of the recursion naturally because they appear in both halves.
  Memory: O(N+M) total. Each recursion frame holds two V tables of size
  ~max_d (= (n+m+1)/2 in that frame) for the duration of one bisect; once
  bisect returns, the V tables are GC'd before recursing. Recursion depth
  is O(log(N+M)) on average.
  This module is a careful Elixir translation of Google diff_match_patch's
  `diff_bisect` (https://github.com/google/diff-match-patch), which is
  itself a faithful port of Myers §4b. The C reference is git xdiff's
  `xdl_split` / `xdl_recs_cmp` (https://github.com/git/git/blob/master/xdiff/xdiffi.c).
  ## Why we needed this
  Uses the standard forward-only approach with trace recording and backtracking.
  The previous hand-rolled implementation kept every V-table from every `d`
  iteration in a list (O(D²) memory) and used a `Map` for V (O(log n) per
  access). On large inputs the BEAM allocator climbed into the GBs and
  cgroup-killed under load. Stdlib's `List.myers_difference/2` is also
  naive Myers (one path-with-suffix per diagonal kept alive simultaneously
  — O(D × max(D, N+M)) memory) and exhibits the same problem. This module
  is the proper linear-space variant, the same algorithm git uses by
  default in its `xdiff` library.
  """
  @type edit :: {:eq, term()} | {:ins, term()} | {:del, term()}
@@ -27,11 +52,12 @@
  """
  @spec diff(list(), list()) :: [edit()]
  def diff(a, b) when is_list(a) and is_list(b) do
    n = length(a)
    m = length(b)
    a_vec = List.to_tuple(a)
    b_vec = List.to_tuple(b)
    n = tuple_size(a_vec)
    m = tuple_size(b_vec)
    diff_range(a_vec, b_vec, 0, n, 0, m)
    ses(a_vec, b_vec, n, m)
  end
  @doc """
@@ -39,138 +65,529 @@
  """
  @spec diff_lines(String.t(), String.t()) :: [edit()]
  def diff_lines(text_a, text_b) do
    lines_a = String.split(text_a, "\n", trim: false)
    lines_b = String.split(text_b, "\n", trim: false)
    diff(String.split(text_a, "\n", trim: false), String.split(text_b, "\n", trim: false))
    diff(lines_a, lines_b)
  end
  # ── Recursive driver ──────────────────────────────────────────────────
  #
  defp ses(_a, _b, 0, 0), do: []
  # Compute edits for a[a_lo..a_hi) vs b[b_lo..b_hi). Strips common
  # prefix/suffix first (huge speedup on typical code diffs), then either
  # bottoms out or bisects and recurses on the two halves.
  defp diff_range(a, b, a_lo, a_hi, b_lo, b_hi) do
    {prefix_eqs, a_lo, b_lo} = strip_prefix(a, b, a_lo, a_hi, b_lo, b_hi, [])
    {suffix_eqs, a_hi, b_hi} = strip_suffix(a, b, a_lo, a_hi, b_lo, b_hi, [])
  defp ses(a, b, n, m) do
    max = n + m
    v0 = %{1 => 0}
    # Forward pass: build trace of V snapshots
    # trace stores V state AFTER processing each d value
    # We also need v0 (the initial state before d=0)
    case find_d(a, b, n, m, max, 0, v0, [v0]) do
      {:found, trace} ->
        backtrack(trace, a, b, n, m)
    middle =
      cond do
        a_lo == a_hi and b_lo == b_hi ->
          []
      :not_found ->
        []
    end
  end
        a_lo == a_hi ->
          for i <- b_lo..(b_hi - 1)//1, do: {:ins, elem(b, i)}
  defp find_d(_a, _b, _n, _m, max, d, _v, _trace) when d > max, do: :not_found
        b_lo == b_hi ->
          for i <- a_lo..(a_hi - 1)//1, do: {:del, elem(a, i)}
        true ->
          {x, y} = bisect(a, b, a_lo, a_hi, b_lo, b_hi)
  defp find_d(a, b, n, m, max, d, v, trace) do
    v_new = process_diag(a, b, n, m, d, -d, v)
          left = diff_range(a, b, a_lo, x, b_lo, y)
          right = diff_range(a, b, x, a_hi, y, b_hi)
          left ++ right
      end
    prefix_eqs ++ middle ++ suffix_eqs
    end_x = Map.get(v_new, n - m, -1)
  end
    if end_x >= n do
      # trace already has [v0, ...] accumulated in reverse
      # Add v_new and reverse to get [v0, v_after_d0, v_after_d1, ..., v_after_dD]
      {:found, Enum.reverse([v_new | trace])}
  defp strip_prefix(a, b, a_lo, a_hi, b_lo, b_hi, acc)
       when a_lo < a_hi and b_lo < b_hi do
    if elem(a, a_lo) == elem(b, b_lo) do
      strip_prefix(a, b, a_lo + 1, a_hi, b_lo + 1, b_hi, [{:eq, elem(a, a_lo)} | acc])
    else
      find_d(a, b, n, m, max, d + 1, v_new, [v_new | trace])
      {Enum.reverse(acc), a_lo, b_lo}
    end
  end
  defp process_diag(_a, _b, _n, _m, d, k, v) when k > d, do: v
  defp strip_prefix(_a, _b, a_lo, _a_hi, b_lo, _b_hi, acc), do: {Enum.reverse(acc), a_lo, b_lo}
  defp strip_suffix(a, b, a_lo, a_hi, b_lo, b_hi, acc)
       when a_lo < a_hi and b_lo < b_hi do
    if elem(a, a_hi - 1) == elem(b, b_hi - 1) do
  defp process_diag(a, b, n, m, d, k, v) do
    x =
      if k == -d or (k != d and Map.get(v, k - 1, 0) < Map.get(v, k + 1, 0)) do
        Map.get(v, k + 1, 0)
      else
        Map.get(v, k - 1, 0) + 1
      end
    y = x - k
      strip_suffix(a, b, a_lo, a_hi - 1, b_lo, b_hi - 1, [{:eq, elem(a, a_hi - 1)} | acc])
    {x, _y} = snake(a, b, n, m, x, y)
    v = Map.put(v, k, x)
    process_diag(a, b, n, m, d, k + 2, v)
  end
  defp snake(a, b, n, m, x, y) when x < n and y < m do
    if elem(a, x) == elem(b, y) do
      snake(a, b, n, m, x + 1, y + 1)
    else
      {acc, a_hi, b_hi}
      {x, y}
    end
  end
  defp strip_suffix(_a, _b, _a_lo, a_hi, _b_lo, b_hi, acc), do: {acc, a_hi, b_hi}
  defp snake(_a, _b, _n, _m, x, y), do: {x, y}
  # Backtrack through V snapshots to reconstruct edit script.
  # ── Bisect — find the middle-snake split point ────────────────────────
  #
  # trace = [v_initial, v_after_d0, v_after_d1, ..., v_after_dD]
  # Translation of diff_match_patch's diff_bisect (Python). Returns
  # `{x, y}` in ABSOLUTE coordinates (within the original a/b) such that
  # the optimal edit script from `a[a_lo..a_hi)` to `b[b_lo..b_hi)` passes
  # through (x, y) in the middle of the edit graph.
  # trace has D+2 elements. trace[0] = initial V = %{1 => 0}.
  # trace[d+1] = V after processing d.
  #
  # For backtracking at step d (1-indexed edit step):
  #   - The end point at this step is determined by trace[d+1] at diagonal k
  #   - The previous state is trace[d] (which is V after processing d-1)
  #
  # We start at (n, m) and work backwards d = D, D-1, ..., 1
  defp backtrack(trace, a, b, n, m) do
    trace_arr = List.to_tuple(trace)
  # V tables are stored as Maps keyed by k_offset = v_offset + k. Sentinel
  # `-1` means "not yet reached" (distinct from "reached at x=0"). Map is
  # used for clarity; could be swapped for `:atomics` for speed once
  # correctness is established.
  defp bisect(a, b, a_lo, a_hi, b_lo, b_hi) do
    n = a_hi - a_lo
    m = b_hi - b_lo
    max_d = div(n + m + 1, 2)
    v_offset = max_d
    # Init: V_f[1] = 0 means "forward path on diagonal 1 is at x=0 (start)".
    # Same for V_r. All other entries default to -1 (not reached).
    v1 = %{(v_offset + 1) => 0}
    v2 = %{(v_offset + 1) => 0}
    # D = number of edits = tuple_size - 2  (since we have D+2 elements)
    d_max = tuple_size(trace_arr) - 2
    do_backtrack(trace_arr, a, b, d_max, n, m, [])
    delta = n - m
    front? = rem(delta, 2) != 0
    bisect_d(a, b, a_lo, b_lo, n, m, max_d, v_offset, delta, front?, v1, v2, 0, 0, 0, 0, 0)
  end
  # bisect_d/17: search depth d. For each d, walk the forward path one step
  # then the reverse path one step. Check overlap on the appropriate side
  # (forward when delta is odd, reverse when delta is even).
  defp bisect_d(
         _a,
         _b,
         a_lo,
         b_lo,
         n,
         _m,
         max_d,
         _v_off,
         _delta,
         _front?,
         _v1,
         _v2,
         d,
         _k1s,
         _k1e,
         _k2s,
         _k2e
       )
       when d >= max_d do
    # No middle snake found within max_d iterations — happens when D >= 2 but
    # max_d is too small (e.g. tiny inputs like n=m=1 with no match) or no
    # commonality at all. Fall back to "split at the top-right corner": left
    # half becomes (full a, empty b) → all deletes, right half becomes
  # Base case: d=0, just emit the initial snake from (0,0) to (x, y)
  defp do_backtrack(_trace, a, _b, 0, x, _y, edits) do
    # (empty a, full b) → all inserts. Both halves are STRICTLY smaller, so
    # the recursion terminates.
    {a_lo + n, b_lo}
    diag(a, 0, x) ++ edits
  end
  defp bisect_d(
         a,
         b,
         a_lo,
         b_lo,
         n,
         m,
         max_d,
         v_off,
         delta,
         front?,
         v1,
         v2,
         d,
         k1s,
         k1e,
         k2s,
         k2e
       ) do
    case forward_sweep(
           a,
           b,
           a_lo,
           b_lo,
           n,
           m,
           v_off,
           delta,
           front?,
           v1,
           v2,
           d,
           -d + k1s,
           k1s,
           k1e
         ) do
      {:found, x, y} ->
        {a_lo + x, b_lo + y}
      {:cont, v1_new, k1s_new, k1e_new} ->
        case reverse_sweep(
               a,
               b,
               a_lo,
               b_lo,
               n,
               m,
               v_off,
               delta,
               front?,
               v1_new,
               v2,
               d,
               -d + k2s,
               k2s,
               k2e
             ) do
          {:found, x, y} ->
            {a_lo + x, b_lo + y}
          {:cont, v2_new, k2s_new, k2e_new} ->
            bisect_d(
              a,
              b,
              a_lo,
              b_lo,
              n,
              m,
              max_d,
              v_off,
              delta,
              front?,
              v1_new,
              v2_new,
              d + 1,
              k1s_new,
              k1e_new,
              k2s_new,
              k2e_new
            )
        end
    end
  end
  # Forward sweep: walk diagonals k = -d+k1s, -d+k1s+2, ..., d-k1e.
  # Returns {:found, x, y} on overlap (when delta is odd), else
  # {:cont, v1, k1s, k1e} with possibly-adjusted bounds.
  defp forward_sweep(
         _a,
         _b,
         _a_lo,
         _b_lo,
         _n,
         _m,
         _v_off,
         _delta,
         _front?,
         v1,
         _v2,
         d,
         k1,
         k1s,
         k1e
       )
       when k1 > d - k1e do
    {:cont, v1, k1s, k1e}
  end
  defp forward_sweep(a, b, a_lo, b_lo, n, m, v_off, delta, front?, v1, v2, d, k1, k1s, k1e) do
    k1_off = v_off + k1
    # Pick predecessor — down (k+1) preferred over right (k-1) when tied.
    x1 =
      cond do
        k1 == -d ->
          Map.get(v1, k1_off + 1, -1) |> max(0)
        k1 == d ->
  defp do_backtrack(trace, a, b, d, x, y, edits) do
    k = x - y
    # V state from previous round (after processing d-1)
    v_prev = elem(trace, d)
          Map.get(v1, k1_off - 1, -1) + 1
    # Determine which direction we took at step d
    prev_k =
      if k == -d or (k != d and Map.get(v_prev, k - 1, 0) < Map.get(v_prev, k + 1, 0)) do
        k + 1
      else
        k - 1
        Map.get(v1, k1_off - 1, -1) < Map.get(v1, k1_off + 1, -1) ->
          Map.get(v1, k1_off + 1, -1)
        true ->
          Map.get(v1, k1_off - 1, -1) + 1
      end
    y1 = x1 - k1
    {x1, y1} = snake_forward(a, b, a_lo, b_lo, n, m, x1, y1)
    v1 = Map.put(v1, k1_off, x1)
    cond do
      x1 > n ->
        # Ran off the right edge — stop extending this side, shrink k range.
        forward_sweep(
          a,
          b,
          a_lo,
          b_lo,
          n,
          m,
          v_off,
          delta,
          front?,
          v1,
          v2,
          d,
          k1 + 2,
          k1s,
          k1e + 2
        )
      y1 > m ->
        # Ran off the bottom — shrink the OTHER end of the k range.
        forward_sweep(
          a,
          b,
          a_lo,
          b_lo,
          n,
          m,
          v_off,
          delta,
          front?,
          v1,
          v2,
          d,
          k1 + 2,
          k1s + 2,
          k1e
        )
      front? ->
        # Overlap check on forward sweep (delta is odd).
        k2_off = v_off + delta - k1
        if k2_off >= 0 and k2_off < 2 * v_off + 1 do
          v2_x = Map.get(v2, k2_off, -1)
          if v2_x != -1 do
            # Reverse coordinate → forward: x2_forward = n - v2_x
            x2_fwd = n - v2_x
            if x1 >= x2_fwd do
              {:found, x1, y1}
            else
              forward_sweep(
                a,
                b,
                a_lo,
                b_lo,
                n,
                m,
                v_off,
                delta,
                front?,
                v1,
                v2,
                d,
                k1 + 2,
                k1s,
                k1e
              )
            end
          else
            forward_sweep(
              a,
              b,
              a_lo,
              b_lo,
              n,
              m,
              v_off,
              delta,
              front?,
              v1,
              v2,
              d,
              k1 + 2,
              k1s,
              k1e
            )
          end
        else
          forward_sweep(a, b, a_lo, b_lo, n, m, v_off, delta, front?, v1, v2, d, k1 + 2, k1s, k1e)
        end
      true ->
        forward_sweep(a, b, a_lo, b_lo, n, m, v_off, delta, front?, v1, v2, d, k1 + 2, k1s, k1e)
    end
  end
  # Reverse sweep: same shape, mirrored. Snake compares from the END going
  # backward.
  defp reverse_sweep(
         _a,
         _b,
         _a_lo,
         _b_lo,
         _n,
         _m,
         _v_off,
         _delta,
         _front?,
         _v1,
         v2,
         d,
         k2,
         k2s,
         k2e
       )
       when k2 > d - k2e do
    {:cont, v2, k2s, k2e}
  end
  defp reverse_sweep(a, b, a_lo, b_lo, n, m, v_off, delta, front?, v1, v2, d, k2, k2s, k2e) do
    k2_off = v_off + k2
    x2 =
      cond do
        k2 == -d ->
          Map.get(v2, k2_off + 1, -1) |> max(0)
        k2 == d ->
          Map.get(v2, k2_off - 1, -1) + 1
    # End point of previous round on diagonal prev_k
    prev_x = Map.get(v_prev, prev_k, 0)
    prev_y = prev_x - prev_k
    # Mid point after the edit (before snake)
    {mid_x, edit} =
      if prev_k > k do
        # Insert (moved down: y+1, x stays)
        {prev_x, {:ins, elem(b, prev_y)}}
      else
        # Delete (moved right: x+1, y stays)
        {prev_x + 1, {:del, elem(a, prev_x)}}
        Map.get(v2, k2_off - 1, -1) < Map.get(v2, k2_off + 1, -1) ->
          Map.get(v2, k2_off + 1, -1)
        true ->
          Map.get(v2, k2_off - 1, -1) + 1
      end
    y2 = x2 - k2
    {x2, y2} = snake_reverse(a, b, a_lo, b_lo, n, m, x2, y2)
    v2 = Map.put(v2, k2_off, x2)
    cond do
      x2 > n ->
        reverse_sweep(
          a,
          b,
          a_lo,
          b_lo,
          n,
          m,
          v_off,
          delta,
          front?,
          v1,
          v2,
          d,
          k2 + 2,
          k2s,
          k2e + 2
        )
    # Snake from mid to (x, y) — these are all :eq
    edits = diag(a, mid_x, x) ++ edits
      y2 > m ->
        reverse_sweep(
          a,
          b,
          a_lo,
          b_lo,
          n,
          m,
          v_off,
          delta,
          front?,
          v1,
          v2,
          d,
          k2 + 2,
          k2s + 2,
          k2e
        )
      not front? ->
        # Overlap check on reverse sweep (delta is even).
        k1_off = v_off + delta - k2
        if k1_off >= 0 and k1_off < 2 * v_off + 1 do
          v1_x = Map.get(v1, k1_off, -1)
          if v1_x != -1 do
            x1_fwd = v1_x
            x2_fwd = n - x2
            if x1_fwd >= x2_fwd do
              # Use forward coordinates of the meeting point.
              y1_fwd = x1_fwd - (k1_off - v_off)
              {:found, x1_fwd, y1_fwd}
            else
              reverse_sweep(
    # Prepend the edit
    edits = [edit | edits]
                a,
                b,
                a_lo,
                b_lo,
                n,
                m,
                v_off,
                delta,
                front?,
                v1,
                v2,
                d,
                k2 + 2,
                k2s,
                k2e
              )
            end
          else
            reverse_sweep(
              a,
              b,
              a_lo,
              b_lo,
              n,
              m,
              v_off,
              delta,
              front?,
              v1,
              v2,
              d,
              k2 + 2,
              k2s,
              k2e
            )
          end
        else
          reverse_sweep(a, b, a_lo, b_lo, n, m, v_off, delta, front?, v1, v2, d, k2 + 2, k2s, k2e)
        end
    do_backtrack(trace, a, b, d - 1, prev_x, prev_y, edits)
      true ->
        reverse_sweep(a, b, a_lo, b_lo, n, m, v_off, delta, front?, v1, v2, d, k2 + 2, k2s, k2e)
    end
  end
  # ── Snakes ────────────────────────────────────────────────────────────
  #
  # snake_forward: extend (x, y) forward (in local coordinates within the
  defp diag(a, x_start, x_end) do
    diag_acc(a, x_start, x_end, [])
  # bisect range) as long as a[a_lo + x] == b[b_lo + y].
  defp snake_forward(a, b, a_lo, b_lo, n, m, x, y) when x < n and y < m do
    if elem(a, a_lo + x) == elem(b, b_lo + y) do
      snake_forward(a, b, a_lo, b_lo, n, m, x + 1, y + 1)
    else
      {x, y}
    end
  end
  defp snake_forward(_a, _b, _a_lo, _b_lo, _n, _m, x, y), do: {x, y}
  defp diag_acc(_a, x, x, acc), do: Enum.reverse(acc)
  # snake_reverse: extend (x, y) backward — local x, y are STEPS BACK from
  # the (n, m) corner. Compare a[a_lo + n - 1 - x] vs b[b_lo + m - 1 - y].
  defp snake_reverse(a, b, a_lo, b_lo, n, m, x, y) when x < n and y < m do
    ai = a_lo + n - 1 - x
    bi = b_lo + m - 1 - y
    if elem(a, ai) == elem(b, bi) do
      snake_reverse(a, b, a_lo, b_lo, n, m, x + 1, y + 1)
    else
      {x, y}
    end
  defp diag_acc(a, x_start, x_end, acc) when x_start < x_end do
    diag_acc(a, x_start + 1, x_end, [{:eq, elem(a, x_start)} | acc])
  end
  defp snake_reverse(_a, _b, _a_lo, _b_lo, _n, _m, x, y), do: {x, y}
end