fangorn/ex_git_objectstore
public
ref:main
# Copyright 2026 Cole Christensen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Baseline benchmark for PR 1 of the commit-graph work (ex_git_objectstore#26).
#
# Measures the cost of the current per-commit-object walker on a 5000-commit
# linear chain, and times Graph.Builder.build/1 on the same repo. The walker
# being measured is the one replicated from Anvil's
# lib/anvil/git/objectstore.ex `collect_ancestors` (the hot path that has been
# saturating prod CPU — see fangorn/anvil#55).
#
# Run with: mix run bench/graph_build.exs
alias ExGitObjectstore.{Graph, Object}
alias ExGitObjectstore.Object.{Commit, Tree}
alias ExGitObjectstore.{ObjectResolver, Repo, Storage.Memory}
defmodule Bench.Walker do
@moduledoc false
# Replicates Anvil.Git.Objectstore.collect_ancestors/3 — reads each commit
# via ObjectResolver.read/2 and enqueues its parents. Purely a baseline to
# compare against; not intended for production use.
def walk_all_ancestors(repo, tip) do
do_walk(repo, [tip], MapSet.new())
end
defp do_walk(_repo, [], visited), do: visited
defp do_walk(repo, [sha | rest], visited) do
if MapSet.member?(visited, sha) do
do_walk(repo, rest, visited)
else
visited = MapSet.put(visited, sha)
parents =
case ObjectResolver.read(repo, sha) do
{:ok, %Commit{parents: ps}} -> ps
_ -> []
end
do_walk(repo, parents ++ rest, visited)
end
end
end
defmodule Bench.Fixture do
@moduledoc false
def build_linear_chain(repo, n) do
{:ok, tree_sha} = Object.write(repo, Tree.new([]))
Enum.reduce(1..n, nil, fn i, prev ->
parents = if prev, do: [prev], else: []
ident = "A <a@a.com> #{1_700_000_000 + i} +0000"
commit = %Commit{
tree: tree_sha,
parents: parents,
author: ident,
committer: ident,
message: "c\n"
}
{:ok, sha} = Object.write(repo, commit)
sha
end)
end
end
defmodule Bench.Run do
@moduledoc false
def now_us, do: :erlang.monotonic_time(:microsecond)
def time(label, fun) do
t0 = now_us()
result = fun.()
t1 = now_us()
elapsed_ms = (t1 - t0) / 1000
padded = String.pad_trailing(label, 45)
IO.puts(" #{padded} #{Float.round(elapsed_ms, 2) |> :erlang.float_to_binary(decimals: 2)} ms")
{result, elapsed_ms}
end
def run(n) do
IO.puts("\n=== Commit-graph baseline — #{n} commits, linear chain ===\n")
{:ok, mem_pid} = Memory.start_link()
repo = Repo.new("bench", storage: {Memory, Memory.config(mem_pid)})
:ok = ExGitObjectstore.init(repo)
tip =
time("Fixture: write #{n} commits", fn ->
Bench.Fixture.build_linear_chain(repo, n)
end)
|> elem(0)
:ok = ExGitObjectstore.create_branch(repo, "main", tip)
# 1) Today's walker (pure Elixir cat_object-per-commit BFS).
{visited, walker_ms} =
time("Walker: collect_ancestors (tip→roots)", fn ->
Bench.Walker.walk_all_ancestors(repo, tip)
end)
IO.puts(" (walker visited #{MapSet.size(visited)} commits)")
# 2) PR 1's builder.
{{:ok, graph}, build_ms} =
time("Builder.build (from all refs)", fn -> Graph.build(repo) end)
IO.puts(" (graph contains #{Graph.size(graph)} commits)")
# 3) Persist + reload (steady-state fast path — no cat_object calls).
{:ok, load_ms} =
time("Graph.save + Graph.load (persist + rehydrate)", fn ->
:ok = Graph.save(repo, graph)
{:ok, _} = Graph.load(repo)
:ok
end)
|> then(fn {_, ms} -> {:ok, ms} end)
# 4) Query paths on a loaded graph vs. the walker.
root = walk_to_root(repo, tip)
{_, walker_ab_ms} =
time("Walker: full ancestry scan (for ahead_behind)", fn ->
Bench.Walker.walk_all_ancestors(repo, tip)
Bench.Walker.walk_all_ancestors(repo, root)
end)
{_, graph_ab_ms} =
time("Graph.ahead_behind (in-memory)", fn ->
{:ok, _} = Graph.ahead_behind(graph, root, tip)
end)
{_, graph_between_ms} =
time("Graph.commits_between (in-memory)", fn ->
{:ok, _} = Graph.commits_between(graph, root, tip)
end)
IO.puts("\n walker_ms / build_ms = #{Float.round(walker_ms / max(build_ms, 0.1), 2)}x")
IO.puts(" walker_ms / load_ms = #{Float.round(walker_ms / max(load_ms, 0.1), 2)}x")
IO.puts(
" walker_ab_ms / graph_ab_ms = #{Float.round(walker_ab_ms / max(graph_ab_ms, 0.01), 2)}x"
)
_ = graph_between_ms
IO.puts("")
end
defp walk_to_root(repo, sha) do
case ObjectResolver.read(repo, sha) do
{:ok, %Commit{parents: []}} -> sha
{:ok, %Commit{parents: [p | _]}} -> walk_to_root(repo, p)
end
end
end
# Default: 5000 commits, override via CLI arg.
n =
case System.argv() do
[arg | _] -> String.to_integer(arg)
_ -> 5_000
end
Bench.Run.run(n)