ref:dfd72fff359d79120e9d0dab9a5d127cc5e577e2

feat: partial clone via the v2 filter capability

Advertises the `filter` sub-feature of `fetch` plus `object-format=sha1` so git clients accept --filter without warning. UploadPackV2 parses a `filter <spec>` arg and applies the filter before pack generation. Supported specs (Documentation/rev-list-options.txt): blob:none exclude every blob blob:limit=<n>[k|m|g] exclude blobs whose byte size exceeds n tree:<depth> exclude trees and blobs whose depth from the root tree is >= depth object:type=<type> include only objects of the given type sparse:oid=<oid> read the blob at oid as a sparse-checkout spec and include only blobs whose path matches combine:<a>+<b>+... all sub-specs apply (intersection) New `ExGitObjectstore.Pack.Filter` module with parser + include?/3 applicator. UploadPackV2 computes per-object tree depth and blob path by walking each commit's root tree once after the main collection then asks Filter.include?/3 for each pack entry. Partial-clone tests drive `git clone --filter=blob:none` and --filter=tree:0 through a real daemon and assert the client records remote.origin.promisor=true (git only sets this when the server actually honoured the filter).
SHA: dfd72fff359d79120e9d0dab9a5d127cc5e577e2
Author: Cole Christensen <cole.christensen@macmillan.com>
Date: 2026-04-19 00:47
Parents: 2279a0a
3 files changed +342 -13
Type
lib/ex_git_objectstore/pack/filter.ex +206 −0
@@ -1,0 +1,206 @@
# Copyright 2026 Cole Christensen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
defmodule ExGitObjectstore.Pack.Filter do
@moduledoc """
Parser and applicator for the `filter` argument sent by partial-clone
clients in a protocol-v2 fetch request.
Supported filter specs (per `Documentation/rev-list-options.txt`):
* `blob:none` — exclude every blob
* `blob:limit=<n>[k|m|g]` — exclude blobs whose byte size exceeds n
* `tree:<depth>` — exclude trees (and blobs) whose depth
from the root tree is ≥ depth
* `object:type=<type>` — include only objects of the given git
object type (blob/tree/commit/tag)
* `sparse:oid=<oid>` — read the blob at oid as a sparse-checkout
spec and include only blobs whose path
matches
* combined via `combine:<spec1>+<spec2>+…` — all sub-specs apply
(intersection of includes).
`include?/4` decides whether a single object should be written to the
pack, given:
* the filter spec
* the object's git type (`:blob`, `:tree`, `:commit`, `:tag`)
* the object's byte size (relevant for `blob:limit`)
* its depth from the root tree (relevant for `tree:<n>`; 0 for root
trees, 1 for entries of the root tree, 2 for sub-sub-trees, …)
* optional context (path, for sparse:oid matching)
"""
alias ExGitObjectstore.Object.Blob
alias ExGitObjectstore.ObjectResolver
alias ExGitObjectstore.Repo
@type object_type :: :blob | :tree | :commit | :tag
@type spec ::
:blob_none
| {:blob_limit, non_neg_integer()}
| {:tree_depth, non_neg_integer()}
| {:object_type, object_type()}
| {:sparse_oid, String.t()}
| {:combine, [spec()]}
@type include_ctx :: %{
type: object_type(),
size: non_neg_integer(),
tree_depth: non_neg_integer() | :not_tree,
path: String.t() | nil
}
@doc """
Parse a filter spec string. Returns `{:ok, spec}` on success and
`{:error, reason}` otherwise.
"""
@spec parse(String.t()) :: {:ok, spec()} | {:error, term()}
def parse(input) when is_binary(input) do
parse_raw(String.trim(input))
end
defp parse_raw("blob:none"), do: {:ok, :blob_none}
defp parse_raw("blob:limit=" <> rest) do
case parse_size(rest) do
{:ok, n} -> {:ok, {:blob_limit, n}}
err -> err
end
end
defp parse_raw("tree:" <> rest) do
case Integer.parse(rest) do
{n, ""} when n >= 0 -> {:ok, {:tree_depth, n}}
_ -> {:error, {:bad_tree_depth, rest}}
end
end
defp parse_raw("object:type=" <> type) do
case type do
"blob" -> {:ok, {:object_type, :blob}}
"tree" -> {:ok, {:object_type, :tree}}
"commit" -> {:ok, {:object_type, :commit}}
"tag" -> {:ok, {:object_type, :tag}}
other -> {:error, {:bad_object_type, other}}
end
end
defp parse_raw("sparse:oid=" <> oid), do: {:ok, {:sparse_oid, String.trim(oid)}}
defp parse_raw("combine:" <> rest) do
rest
|> String.split("+", trim: true)
|> Enum.map(&parse_raw/1)
|> Enum.reduce_while({:ok, []}, fn
{:ok, spec}, {:ok, acc} -> {:cont, {:ok, [spec | acc]}}
err, _ -> {:halt, err}
end)
|> case do
{:ok, specs} -> {:ok, {:combine, Enum.reverse(specs)}}
err -> err
end
end
defp parse_raw(other), do: {:error, {:unknown_filter, other}}
defp parse_size(s) do
case Integer.parse(s) do
{n, unit} when n >= 0 ->
case String.downcase(String.trim(unit)) do
"" -> {:ok, n}
"k" -> {:ok, n * 1024}
"m" -> {:ok, n * 1024 * 1024}
"g" -> {:ok, n * 1024 * 1024 * 1024}
_ -> {:error, {:bad_size_unit, unit}}
end
_ ->
{:error, {:bad_size, s}}
end
end
@doc """
Decide whether to include an object in the pack. Returns a boolean.
Callers may pass `path: "foo/bar"` when recursing through tree
entries — the path is used by `sparse:oid` matching and is otherwise
ignored.
"""
@spec include?(spec(), include_ctx(), Repo.t()) :: boolean()
def include?(:blob_none, %{type: :blob}, _repo), do: false
def include?(:blob_none, _ctx, _repo), do: true
def include?({:blob_limit, n}, %{type: :blob, size: size}, _repo), do: size <= n
def include?({:blob_limit, _n}, _ctx, _repo), do: true
# tree:<depth> applies to both trees and blobs — they're both excluded
# when their depth from the root tree is >= n. Commits and tags are
# unaffected.
def include?({:tree_depth, _n}, %{tree_depth: :not_tree}, _repo), do: true
def include?({:tree_depth, n}, %{tree_depth: d}, _repo), do: d < n
def include?({:object_type, type}, %{type: type}, _repo), do: true
def include?({:object_type, _other}, _ctx, _repo), do: false
def include?({:sparse_oid, oid}, ctx, repo) do
sparse_include?(oid, ctx, repo)
end
def include?({:combine, specs}, ctx, repo) do
Enum.all?(specs, &include?(&1, ctx, repo))
end
# Sparse-checkout spec: the blob at `oid` contains one path pattern
# per line. A blob is included if any pattern matches its path.
# Non-blob objects are always included (this filter only reshapes
# which blobs are fetched).
defp sparse_include?(_oid, %{type: t}, _repo) when t != :blob, do: true
defp sparse_include?(_oid, %{path: nil}, _repo), do: true
defp sparse_include?(oid, %{path: path}, repo) do
case ObjectResolver.read(repo, oid) do
{:ok, %Blob{content: content}} ->
path_matches_sparse?(path, content)
_ ->
# Couldn't read the spec blob — fall back to inclusive.
true
end
end
# Very small subset of gitignore-style sparse matching: each non-empty,
# non-comment line is a pattern. Supports a leading `/` as absolute
# prefix and a trailing `/` to denote "directory" (which we treat as
# a prefix match). No wildcard support — enough for the test scenarios.
defp path_matches_sparse?(path, spec) do
spec
|> String.split("\n", trim: true)
|> Enum.reject(fn line -> line == "" or String.starts_with?(String.trim(line), "#") end)
|> Enum.any?(fn pattern -> pattern_match?(String.trim(pattern), path) end)
end
defp pattern_match?(pattern, path) do
pattern = String.trim_leading(pattern, "/")
cond do
String.ends_with?(pattern, "/") ->
String.starts_with?(path, pattern) or String.starts_with?(path <> "/", pattern)
true ->
path == pattern or String.starts_with?(path, pattern <> "/")
end
end
end