@@ -16,7 +16,11 @@
@moduledoc """
Commit graph traversal — implements `git log` style iteration.
Supports topological + time ordering with configurable limits.
Uses a date-sorted priority queue (matching git's `commit_list_insert_by_date`
algorithm): seed the queue with the starting commit, pop the most recent,
enqueue its parents maintaining sort order, repeat. This guarantees that the
first N pops always produce the same N commits regardless of how many total
are requested, making offset-based pagination stable across merge histories.
"""
alias ExGitObjectstore.Object.Commit
@@ -34,6 +38,10 @@
Walk the commit graph starting from the given SHA.
Returns commits in reverse chronological order (newest first).
Uses a priority queue ordered by committer timestamp so that commits
are always emitted newest-first. The first N results are identical
regardless of the total requested, making skip/limit pagination stable.
## Options
* `:max_count` — maximum number of commits to return (default: all)
* `:skip` — number of commits to skip (default: 0)
@@ -44,20 +52,23 @@
max_count = Keyword.get(opts, :max_count, :infinity)
skip = Keyword.get(opts, :skip, 0)
emit_limit =
limit =
case max_count do
:infinity -> :infinity
n -> n + skip
end
case seed_queue(repo, start_sha) do
case walk(repo, [start_sha], MapSet.new(), [], 0, limit) do
{:ok, all_commits} ->
{:ok, apply_skip_and_limit(all_commits, skip, max_count)}
{:ok, queue} ->
{:ok, commits} = walk(repo, queue, MapSet.new(), [], 0, emit_limit)
{:ok, apply_skip_and_limit(commits, skip, max_count)}
{:error, _} = err ->
err
end
end
defp apply_skip_and_limit(commits, 0, :infinity), do: commits
defp apply_skip_and_limit(commits, skip, max_count) do
commits
@@ -70,5 +81,17 @@
end)
end
# Read the starting commit and create the initial single-element queue.
defp seed_queue(repo, sha) do
case ObjectResolver.read(repo, sha) do
{:ok, %Commit{} = commit} ->
ts = parse_timestamp(commit.committer)
{:ok, [{ts, sha, commit}]}
{:error, _} = err ->
err
end
end
@doc """
Find the merge base (lowest common ancestor) of two commits.
@@ -106,11 +129,61 @@
end
end
# -- Log walk (priority queue, like git's default `git log`) --
#
# Algorithm (mirrors git's revision.c get_revision_1 + process_parents):
# 1. Seed a date-sorted queue with the start commit
# 2. Pop the highest-timestamp entry (newest)
# 3. If already visited, skip; otherwise mark visited and emit
# 4. For each parent: read commit, get timestamp, insert_sorted into queue
# 5. Repeat until queue empty or emit_limit reached
#
# The queue is a descending-sorted list — insert_sorted maintains the
# invariant, equivalent to git's commit_list_insert_by_date().
defp walk(_repo, [], _visited, acc, _count, _limit) do
{:ok, Enum.reverse(acc)}
end
defp walk(_repo, _queue, _visited, acc, count, limit)
when is_integer(limit) and count >= limit do
{:ok, Enum.reverse(acc)}
end
defp walk(repo, [{_ts, sha, commit} | rest], visited, acc, count, limit) do
if MapSet.member?(visited, sha) do
walk(repo, rest, visited, acc, count, limit)
else
visited = MapSet.put(visited, sha)
queue = enqueue_parents(repo, commit.parents, rest, visited)
walk(repo, queue, visited, [{sha, commit} | acc], count + 1, limit)
end
end
# -- Private --
defp enqueue_parents(_repo, [], queue, _visited), do: queue
# Priority-queue merge base: walk commits in timestamp order (highest first).
# Each commit is tagged as reachable from :a, :b, or :both.
# First commit popped that is :both is the merge base.
defp enqueue_parents(repo, [parent_sha | rest_parents], queue, visited) do
if MapSet.member?(visited, parent_sha) do
enqueue_parents(repo, rest_parents, queue, visited)
else
queue = enqueue_one_parent(repo, parent_sha, queue)
enqueue_parents(repo, rest_parents, queue, visited)
end
end
defp enqueue_one_parent(repo, sha, queue) do
case ObjectResolver.read(repo, sha) do
{:ok, %Commit{} = commit} ->
ts = parse_timestamp(commit.committer)
insert_sorted(queue, {ts, sha, commit})
_ ->
queue
end
end
# -- Merge base walk (priority queue with reachability tracking) --
defp merge_base_walk(_repo, [], _reachable), do: {:error, :no_merge_base}
defp merge_base_walk(repo, [{_ts, sha} | rest], reachable) do
@@ -128,7 +201,7 @@
{:ok, %Commit{} = commit} ->
{new_queue, new_reachable} =
Enum.reduce(commit.parents, {rest, reachable}, fn parent, {q, r} ->
enqueue_merge_parent(repo, parent, side, q, r)
enqueue_parent(repo, parent, side, q, r)
end)
merge_base_walk(repo, new_queue, new_reachable)
@@ -138,14 +211,22 @@
end
end
defp enqueue_merge_parent(repo, parent, side, queue, reachable) do
defp enqueue_parent(repo, parent, side, queue, reachable) do
existing = Map.get(reachable, parent)
new_side = compute_new_side(existing, side)
reachable = Map.put(reachable, parent, new_side)
needs_enqueue = existing == nil or (existing != new_side and new_side == :both)
if needs_enqueue do
case ObjectResolver.read(repo, parent) do
{:ok, %Commit{} = pc} ->
enqueue_commit(repo, parent, queue, reachable)
ts = parse_timestamp(pc.committer)
queue = insert_sorted(queue, {ts, parent})
{queue, reachable}
_ ->
{queue, reachable}
end
else
{queue, reachable}
end
@@ -157,66 +238,19 @@
defp compute_new_side(same, same), do: same
defp compute_new_side(_existing, _side), do: :both
# -- Shared: sorted insertion into a descending-by-timestamp list --
defp enqueue_commit(repo, parent, queue, reachable) do
#
# Works with any tuple where elem(0) is the timestamp.
# Equivalent to git's commit_list_insert_by_date().
case ObjectResolver.read(repo, parent) do
{:ok, %Commit{} = pc} ->
ts = parse_timestamp(pc.committer)
queue = insert_sorted(queue, {ts, parent})
{queue, reachable}
_ ->
{queue, reachable}
end
end
# Insert into a descending-sorted list by timestamp
defp insert_sorted([], item), do: [item]
defp insert_sorted([head | rest] = list, item) do
if elem(item, 0) >= elem(head, 0) do
defp insert_sorted([{ts_head, _} = head | rest] = _list, {ts, _} = item) do
[item | list]
if ts >= ts_head do
[item, head | rest]
else
[head | insert_sorted(rest, item)]
end
end
# BFS walk collecting commits in topological+time order
defp walk(_repo, [], _visited, acc, _count, _limit), do: {:ok, sort_by_time(acc)}
defp walk(_repo, _queue, _visited, acc, count, limit)
when is_integer(limit) and count >= limit,
do: {:ok, sort_by_time(acc)}
defp walk(repo, [sha | rest], visited, acc, count, limit) do
if MapSet.member?(visited, sha) do
walk(repo, rest, visited, acc, count, limit)
else
case ObjectResolver.read(repo, sha) do
{:ok, %Commit{} = commit} ->
visited = MapSet.put(visited, sha)
new_queue = commit.parents ++ rest
walk(repo, new_queue, visited, [{sha, commit} | acc], count + 1, limit)
{:error, _} = err ->
err
end
end
end
defp sort_by_time(commits) do
# The acc has commits in reverse walk order (oldest at front for linear history).
# Reverse to get walk order (newest first), then use index as tiebreaker
# so same-second commits preserve topological ordering.
commits
|> Enum.reverse()
|> Enum.with_index()
|> Enum.sort_by(
fn {{_sha, commit}, idx} ->
{parse_timestamp(commit.committer), -idx}
end,
:desc
)
|> Enum.map(fn {commit, _idx} -> commit end)
end
defp parse_timestamp(author_or_committer) when is_binary(author_or_committer) do