@@ -453,12 +453,16 @@
# -- Object Collection (shared logic with v1 UploadPack) --
defp collect_objects(repo, wants, haves) do
# `opts` is a map of walker options. Currently the only option is
# `:skip_blobs` (bool) — when true, blob objects are not read from
# storage and not included in the pack. Lets `filter=blob:none` skip
# all blob I/O instead of loading then discarding.
defp collect_objects(repo, wants, haves, opts) do
have_set = MapSet.new(haves)
try do
{objects, _visited} =
Enum.reduce(wants, {[], MapSet.new()}, fn sha, {acc, visited} ->
{new_objects, visited} = collect_reachable(repo, sha, have_set, visited, opts)
{new_objects, visited} = collect_reachable(repo, sha, have_set, visited)
{new_objects ++ acc, visited}
end)
@@ -469,29 +473,29 @@
end
end
defp collect_reachable(repo, sha, exclude_set, visited) do
defp collect_reachable(repo, sha, exclude_set, visited, opts) do
if MapSet.member?(exclude_set, sha) or MapSet.member?(visited, sha) do
{[], visited}
else
visited = MapSet.put(visited, sha)
collect_reachable_object(repo, sha, exclude_set, visited)
collect_reachable_object(repo, sha, exclude_set, visited, opts)
end
end
defp collect_reachable_object(repo, sha, exclude_set, visited, opts) do
defp collect_reachable_object(repo, sha, exclude_set, visited) do
case ObjectResolver.read(repo, sha) do
{:ok, %Commit{} = commit} ->
collect_commit_objects(repo, sha, commit, exclude_set, visited, opts)
collect_commit_objects(repo, sha, commit, exclude_set, visited)
{:ok, %Tree{} = tree} ->
collect_tree_entry_objects(repo, tree, sha, visited, 0)
collect_tree_entry_objects(repo, tree, sha, visited, 0, opts)
{:ok, %Blob{content: content}} ->
{[{:blob, content, sha}], visited}
if opts[:skip_blobs], do: {[], visited}, else: {[{:blob, content, sha}], visited}
{:ok, %Tag{} = tag} ->
tag_data = Object.encode_content_only(tag)
{target_objs, visited} = collect_reachable(repo, tag.object, exclude_set, visited, opts)
{target_objs, visited} = collect_reachable(repo, tag.object, exclude_set, visited)
{[{:tag, tag_data, sha} | target_objs], visited}
{:error, _} ->
@@ -499,11 +503,11 @@
end
end
defp collect_commit_objects(repo, sha, commit, exclude_set, visited) do
{tree_objects, visited} = collect_tree_objects(repo, commit.tree, visited, 0)
defp collect_commit_objects(repo, sha, commit, exclude_set, visited, opts) do
{tree_objects, visited} = collect_tree_objects(repo, commit.tree, visited, 0, opts)
{parent_objects, visited} =
Enum.reduce(commit.parents, {[], visited}, fn parent_sha, {acc, vis} ->
{objs, vis} = collect_reachable(repo, parent_sha, exclude_set, vis, opts)
{objs, vis} = collect_reachable(repo, parent_sha, exclude_set, vis)
{objs ++ acc, vis}
end)
@@ -516,18 +520,18 @@
{objects, visited}
end
defp collect_tree_objects(_repo, _tree_sha, _visited, depth, _opts)
defp collect_tree_objects(_repo, _tree_sha, _visited, depth)
when depth > @max_tree_depth do
raise "max_tree_depth_exceeded"
end
defp collect_tree_objects(repo, tree_sha, visited, depth, opts) do
defp collect_tree_objects(repo, tree_sha, visited, depth) do
if MapSet.member?(visited, tree_sha) do
{[], visited}
else
case ObjectResolver.read(repo, tree_sha) do
{:ok, %Tree{} = tree} ->
collect_tree_entry_objects(repo, tree, tree_sha, visited, depth)
collect_tree_entry_objects(repo, tree, tree_sha, visited, depth, opts)
_ ->
{[], visited}
@@ -535,35 +539,41 @@
end
end
defp collect_tree_entry_objects(repo, %Tree{} = tree, tree_sha, visited, depth) do
defp collect_tree_entry_objects(repo, %Tree{} = tree, tree_sha, visited, depth, opts) do
visited = MapSet.put(visited, tree_sha)
tree_content = Tree.encode_content(tree)
{child_objects, visited} =
Enum.reduce(tree.entries, {[], visited}, fn entry, {acc, vis} ->
collect_single_tree_entry(repo, entry, acc, vis, depth)
collect_single_tree_entry(repo, entry, acc, vis, depth, opts)
end)
{[{:tree, tree_content, tree_sha} | Enum.reverse(child_objects)], visited}
end
defp collect_single_tree_entry(repo, %{mode: "40000", sha: sha}, acc, vis, depth) do
defp collect_single_tree_entry(repo, %{mode: "40000", sha: sha}, acc, vis, depth, opts) do
{objs, vis} = collect_tree_objects(repo, sha, vis, depth + 1, opts)
{objs, vis} = collect_tree_objects(repo, sha, vis, depth + 1)
{objs ++ acc, vis}
end
defp collect_single_tree_entry(repo, %{sha: sha}, acc, vis, _depth, opts) do
cond do
MapSet.member?(vis, sha) ->
{acc, vis}
opts[:skip_blobs] ->
defp collect_single_tree_entry(repo, %{sha: sha}, acc, vis, _depth) do
if MapSet.member?(vis, sha) do
# Don't read blob content from storage at all. Mark visited so
# we don't re-check on every tree that references this blob.
{acc, MapSet.put(vis, sha)}
{acc, vis}
else
vis = MapSet.put(vis, sha)
true ->
case ObjectResolver.read(repo, sha) do
{:ok, %Blob{content: content}} ->
read_blob_entry(repo, sha, acc, MapSet.put(vis, sha))
end
end
{[{:blob, content, sha} | acc], vis}
_ ->
{acc, vis}
end
defp read_blob_entry(repo, sha, acc, vis) do
case ObjectResolver.read(repo, sha) do
{:ok, %Blob{content: content}} -> {[{:blob, content, sha} | acc], vis}
_ -> {acc, vis}
end
end
@@ -630,16 +640,27 @@
# otherwise fall through to the standard full-reachability walk.
# Objects are then passed through the filter (if any).
defp collect_objects_maybe_shallow(repo, wants, haves, shallow_opts, filter_spec) do
walk_opts = %{skip_blobs: early_skip_blobs?(filter_spec)}
result =
case shallow_opts do
nil -> collect_objects(repo, wants, haves) |> wrap_full()
_ -> walk_shallow(repo, wants, shallow_opts)
nil -> collect_objects(repo, wants, haves, walk_opts) |> wrap_full()
_ -> walk_shallow(repo, wants, shallow_opts, walk_opts)
end
with {:ok, walk} <- result do
{:ok, %{walk | objects: apply_filter(walk.objects, filter_spec, repo)}}
end
end
# Some filter specs exclude every blob outright. When that's the
# case, the walker can skip reading blob content from storage
# entirely — saving an IO per blob and a content-sized allocation.
defp early_skip_blobs?(nil), do: false
defp early_skip_blobs?(:blob_none), do: true
defp early_skip_blobs?({:object_type, type}) when type != :blob, do: true
defp early_skip_blobs?({:combine, specs}), do: Enum.any?(specs, &early_skip_blobs?/1)
defp early_skip_blobs?(_), do: false
defp wrap_full({:ok, objects}), do: {:ok, %{objects: objects, new_shallow: [], unshallow: []}}
defp wrap_full(err), do: err
@@ -647,16 +668,30 @@
defp apply_filter(objects, nil, _repo), do: objects
defp apply_filter(objects, spec, repo) do
# Tree depth and blob path are needed by some filter specs
# (`tree:<n>`, `sparse:oid=…`). Compute both up-front by walking
# each commit's root tree; the maps are shared across all objects.
meta = compute_tree_metadata(repo, objects)
# Tree depth and blob path are only needed for certain filter specs.
# Walking every commit's root tree is expensive, so skip when the
# active spec doesn't use that metadata.
meta =
if filter_needs_tree_metadata?(spec),
do: compute_tree_metadata(repo, objects),
else: %{depths: %{}, paths: %{}}
Enum.filter(objects, fn entry ->
Filter.include?(spec, filter_ctx_for(entry, meta), repo)
end)
end
# `tree:<n>` uses `ctx.tree_depth`; `sparse:oid=<oid>` uses `ctx.path`.
# Everything else decides from type + size, which are already in the
# pack entry tuple.
defp filter_needs_tree_metadata?({:tree_depth, _}), do: true
defp filter_needs_tree_metadata?({:sparse_oid, _}), do: true
defp filter_needs_tree_metadata?({:combine, specs}),
do: Enum.any?(specs, &filter_needs_tree_metadata?/1)
defp filter_needs_tree_metadata?(_), do: false
defp filter_ctx_for({:blob, content, sha}, meta) do
%{
type: :blob,
@@ -756,7 +791,7 @@
# excluded (become client's shallow boundaries); `unshallow` lists
# client boundaries whose parents are now included (client can drop
# its prior shallow marker).
defp walk_shallow(repo, wants, opts) do
defp walk_shallow(repo, wants, opts, walk_opts) do
excluded = build_excluded_set(repo, opts.not_refs)
# `budget` is the number of additional commits this branch can
@@ -765,7 +800,8 @@
# budget at its parent (depth counted from the boundary).
initial_budget = opts.depth || :infinity
init_queue = Enum.map(wants, fn w -> {w, initial_budget} end)
init_queue =
Enum.reduce(wants, :queue.new(), fn w, q -> :queue.in({w, initial_budget}, q) end)
state = %{
repo: repo,
@@ -778,7 +814,7 @@
}
final = walk_shallow_loop(init_queue, state)
objects = collect_shallow_objects(repo, Enum.reverse(final.walked_commits), walk_opts)
objects = collect_shallow_objects(repo, Enum.reverse(final.walked_commits))
{:ok,
%{
@@ -790,20 +826,21 @@
e -> {:error, Exception.message(e)}
end
# Shallow walk uses `:queue` for O(1) push/pop. Earlier versions used
# `rest ++ parent_items` which was O(n) per enqueue and caused a
# quadratic blow-up on repos with long histories.
defp walk_shallow_loop(queue, state) do
case :queue.out(queue) do
{:empty, _} -> state
{{:value, {sha, budget}}, rest} -> step_shallow(sha, budget, rest, state)
end
end
defp walk_shallow_loop([], state), do: state
defp step_shallow(sha, budget, rest, state) do
defp walk_shallow_loop([{sha, budget} | rest], state) do
cond do
MapSet.member?(state.visited, sha) -> walk_shallow_loop(rest, state)
MapSet.member?(state.excluded, sha) -> walk_shallow_loop(rest, state)
MapSet.member?(state.visited, sha) ->
walk_shallow_loop(rest, state)
budget_exhausted?(budget) -> walk_shallow_loop(rest, state)
true -> visit_shallow_commit(sha, budget, rest, state)
MapSet.member?(state.excluded, sha) ->
walk_shallow_loop(rest, state)
budget_exhausted?(budget) ->
walk_shallow_loop(rest, state)
true ->
visit_shallow_commit(sha, budget, rest, state)
end
end
@@ -838,8 +875,9 @@
if walk_parents? do
state = maybe_unshallow(state, sha)
parent_items = Enum.map(parents, fn p -> {p, next_budget(sha, budget, state.opts)} end)
walk_shallow_loop(rest ++ parent_items, state)
parent_budget = next_budget(sha, budget, state.opts)
next_queue = Enum.reduce(parents, rest, fn p, q -> :queue.in({p, parent_budget}, q) end)
walk_shallow_loop(next_queue, state)
else
# Boundary commit: we stop here, parents are excluded from the pack.
walk_shallow_loop(rest, %{state | new_shallow: MapSet.put(state.new_shallow, sha)})
@@ -918,14 +956,14 @@
# each commit itself, its root tree, and all reachable sub-trees
# and blobs. Parents are NOT followed — the shallow walk already
# decided which commits are in scope.
defp collect_shallow_objects(repo, commits) do
defp collect_shallow_objects(repo, commits, walk_opts) do
{objects, _visited} =
Enum.reduce(commits, {[], MapSet.new()}, fn {sha, commit}, {acc, visited} ->
if MapSet.member?(visited, sha) do
{acc, visited}
else
visited = MapSet.put(visited, sha)
{tree_objs, visited} = collect_tree_objects(repo, commit.tree, visited, 0)
{tree_objs, visited} = collect_tree_objects(repo, commit.tree, visited, 0, walk_opts)
commit_entry = {:commit, Object.encode_content_only(commit), sha}
{[commit_entry | tree_objs ++ acc], visited}
end