@@ -53,16 +53,26 @@
@doc """
Parse all objects from a packfile binary, resolving deltas.
Returns a list of `%{type: atom, data: binary, offset: integer}`.
## Options
- `:external_resolver` — a function `(sha :: String.t()) -> {:ok, {type_atom, data}} | {:error, term()}`
used to resolve REF_DELTA bases not found in the pack (thin pack support).
"""
@spec parse(binary()) :: {:ok, [pack_object()]} | {:error, term()}
@spec parse(binary(), keyword()) :: {:ok, [pack_object()]} | {:error, term()}
def parse(pack_data, opts \\ [])
def parse(
<<@pack_signature, version::unsigned-big-32, count::unsigned-big-32, _rest::binary>> =
pack_data
pack_data,
opts
)
when version in [2, 3] do
if count > @max_pack_objects do
{:error, {:pack_too_large, count}}
else
external_resolver = Keyword.get(opts, :external_resolver)
# Verify trailing checksum (PK1)
case verify_pack_checksum(pack_data) do
:ok ->
@@ -74,7 +84,7 @@
case parse_entries(entries_data, count, [], %{}, 12) do
{:ok, entries, cache} ->
resolve_delta_entries(entries, pack_data, cache)
resolve_delta_entries(entries, pack_data, cache, external_resolver)
{:error, _} = err ->
err
@@ -86,7 +96,7 @@
end
end
def parse(_), do: {:error, :invalid_pack_header}
def parse(_, _opts), do: {:error, :invalid_pack_header}
@doc """
Verify the trailing SHA-1 checksum of a pack file.
@@ -121,7 +131,7 @@
@spec read_object(binary(), non_neg_integer(), map()) ::
{:ok, {atom(), binary()}} | {:error, term()}
def read_object(pack_data, offset, cache) do
do_read_object(pack_data, offset, cache, 0)
do_read_object(pack_data, offset, cache, 0, nil)
end
# -- Private --
@@ -153,27 +163,167 @@
end
end
# Resolve delta entries in two phases:
#
# Phase 1: Resolve all non-REF_DELTA entries (full objects + OFS_DELTAs).
# OFS_DELTAs can be fully resolved using offset chains within the pack.
# After this phase, compute SHAs of all resolved objects to build a
# complete SHA→{type, data} map that REF_DELTAs can reference.
#
# Phase 2: Resolve REF_DELTA entries using the SHA map built in Phase 1.
# Falls back to external_resolver for thin pack support.
#
# This two-phase approach handles the case where a REF_DELTA references a
# base object that's stored as an OFS_DELTA in the pack (whose SHA wouldn't
# appear in the initial non-delta-only SHA index).
defp resolve_delta_entries(entries, pack_data, cache, external_resolver) do
{ref_deltas, others} = Enum.split_with(entries, fn e -> e.type == :ref_delta end)
# Phase 1: Resolve non-REF_DELTA entries
phase1_result =
Enum.reduce_while(others, {:ok, []}, fn entry, {:ok, acc} ->
if entry.type == :ofs_delta do
case do_read_object(pack_data, entry.offset, cache, 0, nil) do
{:ok, {resolved_type, resolved_data}} ->
resolved = %{type: resolved_type, data: resolved_data, offset: entry.offset}
{:cont, {:ok, [resolved | acc]}}
{:error, _} = err ->
{:halt, err}
end
else
{:cont, {:ok, [entry | acc]}}
end
end)
case phase1_result do
{:ok, resolved_others} ->
resolved_others = Enum.reverse(resolved_others)
if ref_deltas == [] do
{:ok, resolved_others}
else
# Build SHA→{type, data} map from all resolved objects
sha_map = build_sha_data_map(resolved_others)
# Phase 2: Resolve REF_DELTA entries using sha_map + external_resolver
resolve_ref_deltas(ref_deltas, pack_data, cache, sha_map, external_resolver,
resolved: resolved_others
)
end
{:error, _} = err ->
err
end
end
# Build a SHA→{type, data} map from resolved pack entries
defp build_sha_data_map(entries) do
Enum.reduce(entries, %{}, fn entry, acc ->
type_str = Atom.to_string(entry.type)
raw = "#{type_str} #{byte_size(entry.data)}\0" <> entry.data
sha = :crypto.hash(:sha, raw) |> Base.encode16(case: :lower)
Map.put(acc, sha, {entry.type, entry.data})
end)
end
# Resolve REF_DELTA entries using the SHA data map. Falls back to external
# resolver for thin packs. Multi-pass: newly resolved REF_DELTAs may be
# bases for other REF_DELTAs.
defp resolve_ref_deltas([], _pack_data, _cache, _sha_map, _ext_resolver, resolved: resolved) do
{:ok, resolved}
end
defp resolve_ref_deltas(ref_deltas, pack_data, cache, sha_map, ext_resolver, resolved: resolved) do
{newly_resolved, still_pending} =
Enum.reduce(ref_deltas, {[], []}, fn entry, {done, pending} ->
case resolve_single_ref_delta(entry, pack_data, cache, sha_map, ext_resolver) do
{:ok, resolved_entry} -> {[resolved_entry | done], pending}
:pending -> {done, [entry | pending]}
{:error, _} = err -> throw(err)
end
end)
if newly_resolved == [] and still_pending != [] do
# No progress — remaining REF_DELTAs can't be resolved
first_pending = hd(still_pending)
# Extract the base SHA from the pack for error reporting
<<_::binary-size(first_pending.offset), _header_data::binary>> = pack_data
{:error,
{:ref_delta_base_not_found, "unresolvable REF_DELTA at offset #{first_pending.offset}"}}
else
# Add newly resolved to sha_map for next pass
new_sha_map =
Enum.reduce(newly_resolved, sha_map, fn entry, acc ->
type_str = Atom.to_string(entry.type)
raw = "#{type_str} #{byte_size(entry.data)}\0" <> entry.data
sha = :crypto.hash(:sha, raw) |> Base.encode16(case: :lower)
Map.put(acc, sha, {entry.type, entry.data})
end)
all_resolved = resolved ++ Enum.reverse(newly_resolved)
resolve_ref_deltas(still_pending, pack_data, cache, new_sha_map, ext_resolver,
resolved: all_resolved
)
end
catch
{:error, _} = err -> err
end
# Try to resolve a single REF_DELTA entry
defp resolve_single_ref_delta(entry, pack_data, _cache, sha_map, ext_resolver) do
<<_::binary-size(entry.offset), data::binary>> = pack_data
case parse_object_header(data) do
{:ok, @obj_ref_delta, _size, header_len, _rest} ->
data_start = entry.offset + header_len
<<_::binary-size(data_start), base_sha_bin::binary-size(20), compressed::binary>> =
pack_data
base_sha = Base.encode16(base_sha_bin, case: :lower)
case decompress_data(compressed) do
{:ok, delta_data, _} ->
# Try sha_map first (objects resolved in earlier phases)
case Map.get(sha_map, base_sha) do
{base_type, base_data} ->
case Delta.apply(base_data, delta_data) do
{:ok, result} ->
{:ok, %{type: base_type, data: result, offset: entry.offset}}
{:error, _} = err ->
throw(err)
end
# Resolve any delta entries by re-reading them with read_object which handles
# delta resolution. Non-delta entries pass through unchanged.
# The cache contains {:sha, sha} => offset mappings built during parse_entries
# to avoid redundant build_sha_index calls for REF_DELTA lookups.
defp resolve_delta_entries(entries, pack_data, cache) do
Enum.reduce_while(entries, {:ok, []}, fn entry, {:ok, acc} ->
if entry.type in [:ofs_delta, :ref_delta] do
case read_object(pack_data, entry.offset, cache) do
{:ok, {resolved_type, resolved_data}} ->
resolved = %{type: resolved_type, data: resolved_data, offset: entry.offset}
{:cont, {:ok, [resolved | acc]}}
nil ->
# Try external resolver (thin pack support)
if is_function(ext_resolver) do
case ext_resolver.(base_sha) do
{:ok, {base_type, base_data}} ->
case Delta.apply(base_data, delta_data) do
{:ok, result} ->
{:ok, %{type: base_type, data: result, offset: entry.offset}}
{:error, _} = err ->
throw(err)
end
{:error, _} ->
:pending
end
else
:pending
end
end
{:error, _} = err ->
{:halt, err}
throw(err)
end
else
{:cont, {:ok, [entry | acc]}}
end
end)
|> case do
{:ok, reversed} -> {:ok, Enum.reverse(reversed)}
{:error, _} = err -> err
_ ->
{:error, {:unexpected_type_at_ref_delta_offset, entry.offset}}
end
end
@@ -238,21 +388,22 @@
end
end
defp do_read_object(_pack_data, _offset, _cache, depth, _external_resolver)
when depth > @max_delta_depth do
defp do_read_object(_pack_data, _offset, _cache, depth) when depth > @max_delta_depth do
{:error, :max_delta_depth_exceeded}
end
defp do_read_object(pack_data, offset, cache, depth) do
defp do_read_object(pack_data, offset, cache, depth, external_resolver) do
case Map.get(cache, offset) do
{type, data} ->
{:ok, {type, data}}
nil ->
resolve_object(pack_data, offset, cache, depth)
resolve_object(pack_data, offset, cache, depth, external_resolver)
end
end
defp resolve_object(pack_data, offset, cache, depth) do
defp resolve_object(pack_data, offset, cache, depth, external_resolver) do
<<_skip::binary-size(offset), data::binary>> = pack_data
case parse_object_header(data) do
@@ -284,7 +435,13 @@
with {:ok, delta_data, _} <- decompress_data(compressed),
{:ok, {base_type, base_data}} <-
do_read_object(
do_read_object(pack_data, base_offset, cache, depth + 1),
pack_data,
base_offset,
cache,
depth + 1,
external_resolver
),
{:ok, result} <- Delta.apply(base_data, delta_data) do
{:ok, {base_type, result}}
end
@@ -300,12 +457,30 @@
base_sha = Base.encode16(base_sha_bin, case: :lower)
with {:ok, delta_data, _} <- decompress_data(compressed),
{:ok, base_offset} <- find_sha_offset(pack_data, base_sha, cache),
{:ok, {base_type, base_data}} <-
do_read_object(pack_data, base_offset, cache, depth + 1),
{:ok, result} <- Delta.apply(base_data, delta_data) do
{:ok, {base_type, result}}
with {:ok, delta_data, _} <- decompress_data(compressed) do
case find_sha_offset(pack_data, base_sha, cache) do
{:ok, base_offset} ->
with {:ok, {base_type, base_data}} <-
do_read_object(
pack_data,
base_offset,
cache,
depth + 1,
external_resolver
),
{:ok, result} <- Delta.apply(base_data, delta_data) do
{:ok, {base_type, result}}
end
{:error, {:ref_delta_base_not_found, _}} when is_function(external_resolver) ->
with {:ok, {base_type, base_data}} <- external_resolver.(base_sha),
{:ok, result} <- Delta.apply(base_data, delta_data) do
{:ok, {base_type, result}}
end
{:error, _} = err ->
err
end
end
_ ->