ref:df0a4374cd49001f91e8dc0f8120d068143a5d15

fix: produce separate hunks for distant changes instead of showing entire file (#128)

The collect_change_groups function never split groups apart — context lines kept getting appended to the current group, so all edits ended up in one mega-hunk spanning the entire file. Replace with a correct algorithm: 1. Find indices of all change (add/del) lines 2. Expand each by context lines on each side 3. Merge overlapping ranges 4. Build one hunk per merged range Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
SHA: df0a4374cd49001f91e8dc0f8120d068143a5d15
Author: Cole Christensen <cole.christensen@macmillan.com>
Date: 2026-03-07 02:44
Parents: 802e450
2 files changed +173 -74
Type
lib/ex_git_objectstore/diff.ex +63 −74
@@ -76,9 +76,13 @@
old_content = load_blob_content(repo, old_sha)
new_content = load_blob_content(repo, new_sha)
edits = Myers.diff_lines(old_content, new_content)
hunks = edits_to_hunks(edits, context)
{:ok, hunks}
if binary?(old_content) or binary?(new_content) do
{:ok, :binary, %{old_size: byte_size(old_content), new_size: byte_size(new_content)}}
else
edits = Myers.diff_lines(old_content, new_content)
hunks = edits_to_hunks(edits, context)
{:ok, hunks}
end
end
@doc """
@@ -92,8 +96,20 @@
{:ok, changes} <- diff_trees(repo, old_commit.tree, new_commit.tree) do
file_diffs =
Enum.map(changes, fn change ->
case diff_blobs(repo, change.old_sha, change.new_sha, opts) do
{:ok, :binary, sizes} ->
%{
path: change.path,
status: change.status,
hunks: [],
binary: true,
old_size: sizes.old_size,
new_size: sizes.new_size
}
{:ok, hunks} ->
%{path: change.path, status: change.status, hunks: hunks}
{:ok, hunks} = diff_blobs(repo, change.old_sha, change.new_sha, opts)
%{path: change.path, status: change.status, hunks: hunks}
end
end)
{:ok, file_diffs}
@@ -177,6 +193,13 @@
end
end
defp binary?(content) when byte_size(content) == 0, do: false
defp binary?(content) do
chunk = binary_part(content, 0, min(byte_size(content), 8192))
:binary.match(chunk, <<0>>) != :nomatch
end
defp load_blob_content(_repo, nil), do: ""
defp load_blob_content(repo, sha) do
@@ -277,15 +300,45 @@
end
defp edits_to_hunks(edits, context) do
# Convert Myers edits to indexed lines
indexed = index_edits(edits)
indexed_vec = :array.from_list(indexed)
total = :array.size(indexed_vec)
# Find indices of all change (add/del) lines
change_indices =
indexed
|> Enum.with_index()
# Group consecutive non-eq edits into change regions
change_groups = group_changes(indexed, context)
|> Enum.filter(fn {{type, _, _, _}, _idx} -> type in [:add, :del] end)
|> Enum.map(fn {_, idx} -> idx end)
if change_indices == [] do
[]
else
# Expand each change index into a range with context, then merge overlapping ranges
ranges =
change_indices
|> Enum.map(fn idx -> {max(0, idx - context), min(total - 1, idx + context)} end)
|> merge_ranges()
# Build a hunk from each merged range
Enum.map(ranges, fn {start_idx, end_idx} ->
lines = for i <- start_idx..end_idx, do: :array.get(i, indexed_vec)
build_hunk(lines)
end)
end
end
defp merge_ranges([]), do: []
defp merge_ranges([first | rest]) do
Enum.reduce(rest, [first], fn {s, e}, [{prev_s, prev_e} | acc] ->
if s <= prev_e + 1 do
[{prev_s, max(prev_e, e)} | acc]
else
[{s, e}, {prev_s, prev_e} | acc]
Enum.map(change_groups, fn group ->
build_hunk(group)
end
end)
|> Enum.reverse()
end
defp index_edits(edits) do
@@ -302,70 +355,6 @@
end)
Enum.reverse(result)
end
defp group_changes(indexed, context) do
indexed_vec = :array.from_list(indexed)
total = :array.size(indexed_vec)
# Build index map for O(1) position lookups instead of O(n) find_index
index_map =
indexed
|> Enum.with_index()
|> Map.new(fn {item, idx} -> {item, idx} end)
# Find change regions using prepend + reverse to avoid O(n^2) append
all_groups = collect_change_groups(indexed)
# Add context lines around each group using pre-computed indices
all_groups
|> Enum.map(fn group -> add_context(group, indexed_vec, total, index_map, context) end)
|> Enum.reject(&Enum.empty?/1)
end
defp collect_change_groups(indexed) do
{groups, current_group} =
Enum.reduce(indexed, {[], []}, fn item, {groups, current} ->
reduce_change_group(item, groups, current)
end)
if current_group != [] do
[Enum.reverse(current_group) | groups]
else
groups
end
|> Enum.reverse()
end
defp reduce_change_group({:context, _, _, _} = item, groups, []) do
{groups, [item]}
end
defp reduce_change_group({:context, _, _, _} = item, groups, current) do
{groups, [item | current]}
end
defp reduce_change_group(item, groups, current) do
{groups, [item | current]}
end
defp add_context([], _indexed_vec, _total, _index_map, _context), do: []
defp add_context(group, indexed_vec, total, index_map, context) do
first = List.first(group)
last = List.last(group)
first_idx = Map.get(index_map, first)
last_idx = Map.get(index_map, last)
if first_idx == nil or last_idx == nil do
group
else
start_idx = max(0, first_idx - context)
end_idx = min(total - 1, last_idx + context)
for i <- start_idx..end_idx, do: :array.get(i, indexed_vec)
end
end
defp build_hunk(lines) do