Commit 1f149a1 - fangorn/ex_git_objectstore


      fangorn/ex_git_objectstore

public

ref:1f149a1f725cda30241b369a6a5bb3f38001552d

perf: graph cache mtime freshness + walk-base-once in fallback (#26)

Two related fixes from investigating fangorn/anvil's 2.6 s PR-list mount. Adds Storage.blob_fingerprint optional callback (Filesystem returns mtime+size, others :unsupported) so Graph.Cache.fetch/2 can return :stale when the on-disk graph has been rewritten by a separate VM. Without this, 'mix anvil.graphs.rebuild' from a separate shell never reaches the running app's persistent_term cache — that's why the rebuild was 'briefly effective then gone'. Adds Graph.Fallback.ahead_behind_many/4 (walks ancestors(base) once and reuses it across heads). The old fill_per_head/4 was O(N · |ancestors(base)|) which on fangorn/anvil with ~50 PRs against a few-hundred-commit base was the actual reason the slow path was multi-second, not just slow. API change: Graph.Cache.fetch/1 -> fetch/2 and put/2 -> put/3 (added fingerprint arg; pass Cache.no_fingerprint() for old behaviour). ## Test plan - [x] 924 tests / 0 failures - [x] New tests cover walk-once correctness (vs per-call), cache freshness transitions, and an end-to-end "separate-process rewrite triggers reload" integration test. - [x] mix format --check-formatted clean. - [x] No new credo issues in changed files.

SHA: 1f149a1f725cda30241b369a6a5bb3f38001552d

Author: Anvil <noreply@anvil.fangorn.io>

Date: 2026-05-06 01:54

Parents: c38581b

9 files changed +451 -57

Type

Type
	lib/ex_git_objectstore.ex	+38 −10
@@ -775,13 +775,23 @@ end) end # Heads not covered by the graph (or queries against a graph-less repo) # land here. Naively iterating per-head re-walks ancestors(base_sha) for # every head — that's O(N · \|ancestors(base)\|), which on a 50-PR / # 400-commit base materially shows up as a multi-second LiveView mount. # `Graph.Fallback.ahead_behind_many/4` walks base once and reuses the # ancestor set, restoring graceful degradation. defp fill_per_head(_repo, _base_sha, [], acc), do: acc defp fill_per_head(repo, base_sha, head_shas, acc) do Enum.reduce(head_shas, acc, fn head_sha, acc -> case ahead_behind(repo, base_sha, head_sha) do {:ok, counts} -> Map.put(acc, head_sha, counts) {:error, _} -> acc end end) case Graph.Fallback.ahead_behind_many(repo, base_sha, head_shas) do {:ok, by_head} -> Map.merge(acc, by_head) # Base walk failed — preserve prior behaviour by returning whatever # the graph path already filled in. The caller's contract is "missing # heads are omitted," so dropping all of them on a base failure is # consistent. {:error, _} -> acc end end @doc """ @@ -815,6 +825,6 @@ def rebuild_graph(%Repo{} = repo) do with {:ok, graph} <- Graph.build(repo), :ok <- Graph.save(repo, graph) do Graph.Cache.put(repo, graph, current_graph_fingerprint(repo)) Graph.Cache.put(repo, graph) end end @@ -863,18 +873,36 @@ end defp load_or_fetch_graph(repo) do case Graph.Cache.fetch(repo) do fp = current_graph_fingerprint(repo) case Graph.Cache.fetch(repo, fp) do {:ok, graph} -> {:ok, graph} other when other in [:miss, :stale] -> :miss -> case Graph.load(repo) do {:ok, graph} -> :ok = Graph.Cache.put(repo, graph) :ok = Graph.Cache.put(repo, graph, fp) {:ok, graph} {:error, _} = err -> err end end end # The fingerprint is `:no_fingerprint` when the storage backend can't # cheaply detect changes (S3, Memory). Cache stays valid until an # explicit `Graph.Cache.delete/1` (writers must coordinate). For # Filesystem, a `mtime+size` tuple from `stat/2` does the job — one # syscall per query, far cheaper than reloading the graph. defp current_graph_fingerprint(%Repo{storage: {mod, _}} = repo) do if function_exported?(mod, :blob_fingerprint, 3) do case Repo.storage_call(repo, :blob_fingerprint, [Graph.blob_key()]) do {:ok, fp} -> fp {:error, _} -> Graph.Cache.no_fingerprint() end else Graph.Cache.no_fingerprint() end end

lib/ex_git_objectstore.ex

+38 −10

@@ -775,13 +775,23 @@
    end)
  end
  # Heads not covered by the graph (or queries against a graph-less repo)
  # land here. Naively iterating per-head re-walks ancestors(base_sha) for
  # every head — that's O(N · |ancestors(base)|), which on a 50-PR /
  # 400-commit base materially shows up as a multi-second LiveView mount.
  # `Graph.Fallback.ahead_behind_many/4` walks base once and reuses the
  # ancestor set, restoring graceful degradation.
  defp fill_per_head(_repo, _base_sha, [], acc), do: acc
  defp fill_per_head(repo, base_sha, head_shas, acc) do
    Enum.reduce(head_shas, acc, fn head_sha, acc ->
      case ahead_behind(repo, base_sha, head_sha) do
        {:ok, counts} -> Map.put(acc, head_sha, counts)
        {:error, _} -> acc
      end
    end)
    case Graph.Fallback.ahead_behind_many(repo, base_sha, head_shas) do
      {:ok, by_head} -> Map.merge(acc, by_head)
      # Base walk failed — preserve prior behaviour by returning whatever
      # the graph path already filled in. The caller's contract is "missing
      # heads are omitted," so dropping all of them on a base failure is
      # consistent.
      {:error, _} -> acc
    end
  end
  @doc """
@@ -815,6 +825,6 @@
  def rebuild_graph(%Repo{} = repo) do
    with {:ok, graph} <- Graph.build(repo),
         :ok <- Graph.save(repo, graph) do
      Graph.Cache.put(repo, graph, current_graph_fingerprint(repo))
      Graph.Cache.put(repo, graph)
    end
  end
@@ -863,18 +873,36 @@
  end
  defp load_or_fetch_graph(repo) do
    case Graph.Cache.fetch(repo) do
    fp = current_graph_fingerprint(repo)
    case Graph.Cache.fetch(repo, fp) do
      {:ok, graph} ->
        {:ok, graph}
      other when other in [:miss, :stale] ->
      :miss ->
        case Graph.load(repo) do
          {:ok, graph} ->
            :ok = Graph.Cache.put(repo, graph)
            :ok = Graph.Cache.put(repo, graph, fp)
            {:ok, graph}
          {:error, _} = err ->
            err
        end
    end
  end
  # The fingerprint is `:no_fingerprint` when the storage backend can't
  # cheaply detect changes (S3, Memory). Cache stays valid until an
  # explicit `Graph.Cache.delete/1` (writers must coordinate). For
  # Filesystem, a `mtime+size` tuple from `stat/2` does the job — one
  # syscall per query, far cheaper than reloading the graph.
  defp current_graph_fingerprint(%Repo{storage: {mod, _}} = repo) do
    if function_exported?(mod, :blob_fingerprint, 3) do
      case Repo.storage_call(repo, :blob_fingerprint, [Graph.blob_key()]) do
        {:ok, fp} -> fp
        {:error, _} -> Graph.Cache.no_fingerprint()
      end
    else
      Graph.Cache.no_fingerprint()
    end
  end