ref:33706a51e3339957845721edf05914bf0f869dde

fix: prevent duplicate enqueue at merge convergence + match git tie-breaking

- Add `seen` MapSet (like git's SEEN flag) to track commits at enqueue time, not pop time. Prevents common ancestors from being enqueued and read twice when two branches converge. - Change insert_sorted from >= to > to match git's commit_list_insert_by_date(): same-timestamp inserts go after existing entries (FIFO within a tick), not before. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
SHA: 33706a51e3339957845721edf05914bf0f869dde
Author: Cole Christensen <cole.christensen@macmillan.com>
Date: 2026-02-26 21:06
Parents: 0df729d
1 files changed +28 -29
Type
lib/ex_git_objectstore/walk.ex +28 −29
@@ -59,8 +59,8 @@
end
case seed_queue(repo, start_sha) do
{:ok, queue} ->
{:ok, commits} = walk(repo, queue, MapSet.new(), [], 0, emit_limit)
{:ok, queue, seen} ->
{:ok, commits} = walk(repo, queue, seen, [], 0, emit_limit)
{:ok, apply_skip_and_limit(commits, skip, max_count)}
{:error, _} = err ->
@@ -82,11 +82,13 @@
end
# Read the starting commit and create the initial single-element queue.
# Returns the queue and a `seen` set (like git's SEEN flag) to prevent
# duplicate enqueuing at merge convergence points.
defp seed_queue(repo, sha) do
case ObjectResolver.read(repo, sha) do
{:ok, %Commit{} = commit} ->
ts = parse_timestamp(commit.committer)
{:ok, [{ts, sha, commit}]}
{:ok, [{ts, sha, commit}], MapSet.new([sha])}
{:error, _} = err ->
err
@@ -132,52 +134,48 @@
# -- Log walk (priority queue, like git's default `git log`) --
#
# Algorithm (mirrors git's revision.c get_revision_1 + process_parents):
# 1. Seed a date-sorted queue with the start commit
# 2. Pop the highest-timestamp entry (newest)
# 3. If already visited, skip; otherwise mark visited and emit
# 1. Seed a date-sorted queue with the start commit, mark it SEEN
# 2. Pop the highest-timestamp entry (newest) and emit it
# 3. For each parent: if not SEEN, mark SEEN and insert_sorted into queue
# 4. Repeat until queue empty or emit_limit reached
# 4. For each parent: read commit, get timestamp, insert_sorted into queue
# 5. Repeat until queue empty or emit_limit reached
#
# `seen` tracks all SHAs that have been enqueued (like git's SEEN flag),
# preventing duplicate enqueuing at merge convergence points.
# The queue is a descending-sorted list — insert_sorted maintains the
# invariant, equivalent to git's commit_list_insert_by_date().
defp walk(_repo, [], _visited, acc, _count, _limit) do
defp walk(_repo, [], _seen, acc, _count, _limit) do
{:ok, Enum.reverse(acc)}
end
defp walk(_repo, _queue, _visited, acc, count, limit)
defp walk(_repo, _queue, _seen, acc, count, limit)
when is_integer(limit) and count >= limit do
{:ok, Enum.reverse(acc)}
end
defp walk(repo, [{_ts, sha, commit} | rest], seen, acc, count, limit) do
{queue, seen} = enqueue_parents(repo, commit.parents, rest, seen)
walk(repo, queue, seen, [{sha, commit} | acc], count + 1, limit)
defp walk(repo, [{_ts, sha, commit} | rest], visited, acc, count, limit) do
if MapSet.member?(visited, sha) do
walk(repo, rest, visited, acc, count, limit)
else
visited = MapSet.put(visited, sha)
queue = enqueue_parents(repo, commit.parents, rest, visited)
walk(repo, queue, visited, [{sha, commit} | acc], count + 1, limit)
end
end
defp enqueue_parents(_repo, [], queue, seen), do: {queue, seen}
defp enqueue_parents(_repo, [], queue, _visited), do: queue
defp enqueue_parents(repo, [parent_sha | rest_parents], queue, visited) do
defp enqueue_parents(repo, [parent_sha | rest_parents], queue, seen) do
if MapSet.member?(seen, parent_sha) do
enqueue_parents(repo, rest_parents, queue, seen)
if MapSet.member?(visited, parent_sha) do
enqueue_parents(repo, rest_parents, queue, visited)
else
queue = enqueue_one_parent(repo, parent_sha, queue)
enqueue_parents(repo, rest_parents, queue, visited)
{queue, seen} = enqueue_one_parent(repo, parent_sha, queue, seen)
enqueue_parents(repo, rest_parents, queue, seen)
end
end
defp enqueue_one_parent(repo, sha, queue, seen) do
defp enqueue_one_parent(repo, sha, queue) do
case ObjectResolver.read(repo, sha) do
{:ok, %Commit{} = commit} ->
ts = parse_timestamp(commit.committer)
{insert_sorted(queue, {ts, sha, commit}), MapSet.put(seen, sha)}
insert_sorted(queue, {ts, sha, commit})
_ ->
queue
{queue, seen}
end
end
@@ -241,12 +239,13 @@
# -- Shared: sorted insertion into a descending-by-timestamp list --
#
# Works with any tuple where elem(0) is the timestamp.
# Matches git's commit_list_insert_by_date(): uses strict `>` so that
# same-timestamp items insert after existing entries (FIFO within a tick).
# Equivalent to git's commit_list_insert_by_date().
defp insert_sorted([], item), do: [item]
defp insert_sorted([head | rest] = list, item) do
if elem(item, 0) > elem(head, 0) do
if elem(item, 0) >= elem(head, 0) do
[item | list]
else
[head | insert_sorted(rest, item)]