ref:2b54c14bba04f24906f8e2d3318424d8e050cbb8

feat: top-level Graph-aware API (ahead_behind, commits_between, rebuild_graph) (#17)

PR 3 of 4 for fangorn/ex_git_objectstore#26. This is the entry point Anvil will call. Each query tries the persisted commit-graph first and falls back to the existing \`cat_object\` walker when the graph isn't built yet or doesn't cover one of the query SHAs. No behavior change for existing callers of \`ancestor?/3\` — only its speed. **Stacked on #16** — rebase-clean when that merges. ## New modules - **\`ExGitObjectstore.Graph.Cache\`** — \`:persistent_term\`-backed in-process cache keyed by \`{storage_module, repo_prefix}\`. Reads are lock-free and zero-copy. \`put/2\` triggers a global GC scan so it's only called on rebuild or first lazy-load. - **\`ExGitObjectstore.Graph.Fallback\`** — reference walker implementations of \`ahead_behind\`, \`commits_between\`, \`ancestor?\`. Bounded by \`:max_walk\` (default 10k) so a pathological history returns \`{:error, :walk_limit_exceeded}\` instead of running forever. ## New public API \`\`\`elixir ahead_behind(repo, base, head) :: {:ok, %{ahead, behind}} | {:error, _} commits_between(repo, base, head) :: {:ok, [sha]} | {:error, _} ancestor?(repo, anc, desc) :: {:ok, bool} | {:error, _} # existing, now graph-aware rebuild_graph(repo) :: :ok | {:error, _} \`\`\` All three queries follow the same routing: load-or-fetch-cached graph, verify both SHAs are members, answer from graph. On any failure (missing graph, missing member, corrupt blob), fall back to \`Graph.Fallback\`. Callers see one stable contract. \`rebuild_graph\` builds from refs, persists to storage, and seeds the cache. ## Tests 23 new tests on top of PR #16: - 6 Cache (hit/miss, overwrite, delete, per-repo keying) - 10 Fallback (edge cases, ahead_behind, commits_between ordering, ancestor? semantics, walk-limit ceiling, missing-commit error) - 7 integration tests exercising all routing paths: - no graph built — fallback - after \`rebuild_graph\` — graph path answers + cache seeded - stale graph (commit pushed after build) — falls back because new SHA isn't in graph Full suite: **704 tests, 0 failures** (was 681 on #16). Credo: unchanged from main. ## What's next Anvil PR: swap \`lib/anvil/git/objectstore.ex\` \`ahead_behind\` / \`commits_between\` to call \`ExGitObjectstore\` directly. Add a \`mix anvil.graphs.rebuild\` task so operators can seed graphs in dev / staging / prod (no push-hook wiring yet — tracked separately). ## Deployment notes - First query after deploy with no graph: walker path, same perf as today. - After one \`mix anvil.graphs.rebuild\` per repo: graph path on every subsequent query (until a push introduces commits not in the graph, at which point that specific query falls back until the next rebuild). - No incremental update in this PR — graph staleness after push is handled by the fallback, not by invalidation.
SHA: 2b54c14bba04f24906f8e2d3318424d8e050cbb8
Author: Anvil <noreply@anvil.fangorn.io>
Date: 2026-04-18 18:31
Parents: 37bb565
8 files changed +841 -42
Type
lib/ex_git_objectstore.ex +138 −6
@@ -54,7 +54,7 @@
moduledoc for details.
"""
alias ExGitObjectstore.{Merge, Object, ObjectResolver, Ref, Repo, Walk}
alias ExGitObjectstore.{Graph, Merge, Object, ObjectResolver, Ref, Repo, Telemetry, Walk}
alias ExGitObjectstore.Object.{Blob, Commit, Tree}
@type sha :: String.t()
@@ -681,13 +681,145 @@
True if `ancestor` is an ancestor of `descendant` (inclusive — a commit is
its own ancestor).
Uses the persisted commit-graph index when available (see
`ExGitObjectstore.Graph`). Falls back to a cat_object-based walker when
the graph isn't built or doesn't yet cover one of the SHAs.
Implemented by checking `merge_base(ancestor, descendant) == ancestor`.
Emits `[:ex_git_objectstore, :graph, :query]` telemetry with
`operation: :ancestor?` and `path: :graph | :fallback`.
"""
@spec ancestor?(Repo.t(), sha(), sha()) :: {:ok, boolean()} | {:error, term()}
def ancestor?(%Repo{} = repo, ancestor_sha, descendant_sha) do
case Walk.merge_base(repo, ancestor_sha, descendant_sha) do
{:ok, ^ancestor_sha} -> {:ok, true}
{:ok, _} -> {:ok, false}
{:error, _} = err -> err
routed_query(
repo,
:ancestor?,
fn graph ->
graph_result(
graph,
[ancestor_sha, descendant_sha],
&Graph.ancestor?(&1, ancestor_sha, descendant_sha)
)
end,
fn -> Graph.Fallback.ancestor?(repo, ancestor_sha, descendant_sha) end
)
end
@doc """
Count commits reachable from `head_sha` but not from `base_sha`
(`:ahead`) and vice versa (`:behind`). Equivalent to the output of
`git rev-list --count --left-right base...head`.
Uses the persisted commit-graph index when available. Falls back to a
cat_object-based walker when the graph isn't built or doesn't yet
cover one of the SHAs.
Emits `[:ex_git_objectstore, :graph, :query]` telemetry with
`operation: :ahead_behind` and `path: :graph | :fallback`.
"""
@spec ahead_behind(Repo.t(), sha(), sha()) ::
{:ok, %{ahead: non_neg_integer(), behind: non_neg_integer()}} | {:error, term()}
def ahead_behind(%Repo{} = repo, base_sha, head_sha) do
routed_query(
repo,
:ahead_behind,
fn graph ->
graph_result(graph, [base_sha, head_sha], &Graph.ahead_behind(&1, base_sha, head_sha))
end,
fn -> Graph.Fallback.ahead_behind(repo, base_sha, head_sha) end
)
end
@doc """
Commits reachable from `head_sha` but not from `base_sha`, newest-first.
Empty when `head_sha` is an ancestor of (or equal to) `base_sha`.
Uses the persisted commit-graph index when available. Falls back to a
cat_object-based walker otherwise.
Emits `[:ex_git_objectstore, :graph, :query]` telemetry with
`operation: :commits_between` and `path: :graph | :fallback`.
"""
@spec commits_between(Repo.t(), sha(), sha()) :: {:ok, [sha()]} | {:error, term()}
def commits_between(%Repo{} = repo, base_sha, head_sha) do
routed_query(
repo,
:commits_between,
fn graph ->
graph_result(graph, [base_sha, head_sha], &Graph.commits_between(&1, base_sha, head_sha))
end,
fn -> Graph.Fallback.commits_between(repo, base_sha, head_sha) end
)
end
@doc """
Rebuild the commit-graph index for `repo` from all refs, persist it to
storage, and seed the in-process cache. Safe to call from any process;
callers should serialize concurrent rebuilds externally.
"""
@spec rebuild_graph(Repo.t()) :: :ok | {:error, term()}
def rebuild_graph(%Repo{} = repo) do
with {:ok, graph} <- Graph.build(repo),
:ok <- Graph.save(repo, graph) do
Graph.Cache.put(repo, graph)
end
end
# -- Graph → fallback routing --
#
# `query_fun.(graph)` returns `{:ok, result}` (answer from the graph),
# `:fallback` (graph is loaded but doesn't cover the query), or
# `{:error, reason}` (graph said no, propagate as-is).
#
# The caller's `fallback_fun` is invoked only when the graph can't
# answer — missing from storage, cache miss and load error, or a
# `:fallback` signal from `query_fun`.
defp routed_query(%Repo{} = repo, operation, query_fun, fallback_fun) do
metadata = %{operation: operation, repo_id: repo.id}
Telemetry.span([:ex_git_objectstore, :graph, :query], metadata, fn ->
{result, path} = run_routed(repo, query_fun, fallback_fun)
{result, Map.put(metadata, :path, path)}
end)
end
defp run_routed(repo, query_fun, fallback_fun) do
case load_or_fetch_graph(repo) do
{:ok, graph} ->
case query_fun.(graph) do
{:ok, _} = ok -> {ok, :graph}
{:error, _} = err -> {err, :graph}
:fallback -> {fallback_fun.(), :fallback}
end
{:error, _} ->
{fallback_fun.(), :fallback}
end
end
# Runs `graph_fun` only if every SHA in `required_shas` is in the graph;
# otherwise signals `:fallback` so the caller routes to its walker.
defp graph_result(graph, required_shas, graph_fun) do
if Enum.all?(required_shas, &Graph.member?(graph, &1)) do
graph_fun.(graph)
else
:fallback
end
end
defp load_or_fetch_graph(repo) do
case Graph.Cache.fetch(repo) do
{:ok, graph} ->
{:ok, graph}
:miss ->
case Graph.load(repo) do
{:ok, graph} ->
:ok = Graph.Cache.put(repo, graph)
{:ok, graph}
{:error, _} = err ->
err
end
end
end