@@ -1,0 +1,206 @@
# Copyright 2026 Cole Christensen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
defmodule ExGitObjectstore.Pack.Filter do
@moduledoc """
Parser and applicator for the `filter` argument sent by partial-clone
clients in a protocol-v2 fetch request.
Supported filter specs (per `Documentation/rev-list-options.txt`):
* `blob:none` — exclude every blob
* `blob:limit=<n>[k|m|g]` — exclude blobs whose byte size exceeds n
* `tree:<depth>` — exclude trees (and blobs) whose depth
from the root tree is ≥ depth
* `object:type=<type>` — include only objects of the given git
object type (blob/tree/commit/tag)
* `sparse:oid=<oid>` — read the blob at oid as a sparse-checkout
spec and include only blobs whose path
matches
* combined via `combine:<spec1>+<spec2>+…` — all sub-specs apply
(intersection of includes).
`include?/4` decides whether a single object should be written to the
pack, given:
* the filter spec
* the object's git type (`:blob`, `:tree`, `:commit`, `:tag`)
* the object's byte size (relevant for `blob:limit`)
* its depth from the root tree (relevant for `tree:<n>`; 0 for root
trees, 1 for entries of the root tree, 2 for sub-sub-trees, …)
* optional context (path, for sparse:oid matching)
"""
alias ExGitObjectstore.Object.Blob
alias ExGitObjectstore.ObjectResolver
alias ExGitObjectstore.Repo
@type object_type :: :blob | :tree | :commit | :tag
@type spec ::
:blob_none
| {:blob_limit, non_neg_integer()}
| {:tree_depth, non_neg_integer()}
| {:object_type, object_type()}
| {:sparse_oid, String.t()}
| {:combine, [spec()]}
@type include_ctx :: %{
type: object_type(),
size: non_neg_integer(),
tree_depth: non_neg_integer() | :not_tree,
path: String.t() | nil
}
@doc """
Parse a filter spec string. Returns `{:ok, spec}` on success and
`{:error, reason}` otherwise.
"""
@spec parse(String.t()) :: {:ok, spec()} | {:error, term()}
def parse(input) when is_binary(input) do
parse_raw(String.trim(input))
end
defp parse_raw("blob:none"), do: {:ok, :blob_none}
defp parse_raw("blob:limit=" <> rest) do
case parse_size(rest) do
{:ok, n} -> {:ok, {:blob_limit, n}}
err -> err
end
end
defp parse_raw("tree:" <> rest) do
case Integer.parse(rest) do
{n, ""} when n >= 0 -> {:ok, {:tree_depth, n}}
_ -> {:error, {:bad_tree_depth, rest}}
end
end
defp parse_raw("object:type=" <> type) do
case type do
"blob" -> {:ok, {:object_type, :blob}}
"tree" -> {:ok, {:object_type, :tree}}
"commit" -> {:ok, {:object_type, :commit}}
"tag" -> {:ok, {:object_type, :tag}}
other -> {:error, {:bad_object_type, other}}
end
end
defp parse_raw("sparse:oid=" <> oid), do: {:ok, {:sparse_oid, String.trim(oid)}}
defp parse_raw("combine:" <> rest) do
rest
|> String.split("+", trim: true)
|> Enum.map(&parse_raw/1)
|> Enum.reduce_while({:ok, []}, fn
{:ok, spec}, {:ok, acc} -> {:cont, {:ok, [spec | acc]}}
err, _ -> {:halt, err}
end)
|> case do
{:ok, specs} -> {:ok, {:combine, Enum.reverse(specs)}}
err -> err
end
end
defp parse_raw(other), do: {:error, {:unknown_filter, other}}
defp parse_size(s) do
case Integer.parse(s) do
{n, unit} when n >= 0 ->
case String.downcase(String.trim(unit)) do
"" -> {:ok, n}
"k" -> {:ok, n * 1024}
"m" -> {:ok, n * 1024 * 1024}
"g" -> {:ok, n * 1024 * 1024 * 1024}
_ -> {:error, {:bad_size_unit, unit}}
end
_ ->
{:error, {:bad_size, s}}
end
end
@doc """
Decide whether to include an object in the pack. Returns a boolean.
Callers may pass `path: "foo/bar"` when recursing through tree
entries — the path is used by `sparse:oid` matching and is otherwise
ignored.
"""
@spec include?(spec(), include_ctx(), Repo.t()) :: boolean()
def include?(:blob_none, %{type: :blob}, _repo), do: false
def include?(:blob_none, _ctx, _repo), do: true
def include?({:blob_limit, n}, %{type: :blob, size: size}, _repo), do: size <= n
def include?({:blob_limit, _n}, _ctx, _repo), do: true
# tree:<depth> applies to both trees and blobs — they're both excluded
# when their depth from the root tree is >= n. Commits and tags are
# unaffected.
def include?({:tree_depth, _n}, %{tree_depth: :not_tree}, _repo), do: true
def include?({:tree_depth, n}, %{tree_depth: d}, _repo), do: d < n
def include?({:object_type, type}, %{type: type}, _repo), do: true
def include?({:object_type, _other}, _ctx, _repo), do: false
def include?({:sparse_oid, oid}, ctx, repo) do
sparse_include?(oid, ctx, repo)
end
def include?({:combine, specs}, ctx, repo) do
Enum.all?(specs, &include?(&1, ctx, repo))
end
# Sparse-checkout spec: the blob at `oid` contains one path pattern
# per line. A blob is included if any pattern matches its path.
# Non-blob objects are always included (this filter only reshapes
# which blobs are fetched).
defp sparse_include?(_oid, %{type: t}, _repo) when t != :blob, do: true
defp sparse_include?(_oid, %{path: nil}, _repo), do: true
defp sparse_include?(oid, %{path: path}, repo) do
case ObjectResolver.read(repo, oid) do
{:ok, %Blob{content: content}} ->
path_matches_sparse?(path, content)
_ ->
# Couldn't read the spec blob — fall back to inclusive.
true
end
end
# Very small subset of gitignore-style sparse matching: each non-empty,
# non-comment line is a pattern. Supports a leading `/` as absolute
# prefix and a trailing `/` to denote "directory" (which we treat as
# a prefix match). No wildcard support — enough for the test scenarios.
defp path_matches_sparse?(path, spec) do
spec
|> String.split("\n", trim: true)
|> Enum.reject(fn line -> line == "" or String.starts_with?(String.trim(line), "#") end)
|> Enum.any?(fn pattern -> pattern_match?(String.trim(pattern), path) end)
end
defp pattern_match?(pattern, path) do
pattern = String.trim_leading(pattern, "/")
cond do
String.ends_with?(pattern, "/") ->
String.starts_with?(path, pattern) or String.starts_with?(path <> "/", pattern)
true ->
path == pattern or String.starts_with?(path, pattern <> "/")
end
end
end