@@ -1,0 +1,361 @@
# Copyright 2026 Cole Christensen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
defmodule ExGitObjectstore.Protocol.UploadPackV2 do
@moduledoc """
State machine for the git upload-pack protocol version 2 (server side).
Git protocol v2 differs from v1 in that:
- The initial advertisement lists capabilities, not refs
- Clients send explicit commands (ls-refs, fetch) instead of want/have negotiation
- Each command is self-contained with its own request/response cycle
Flow:
1. Server advertises capabilities (version 2, ls-refs, fetch)
2. Client sends a command (ls-refs or fetch) as pkt-line encoded data
3. Server processes the command and returns the result
4. Client may send additional commands (ls-refs is typically followed by fetch)
This is a pure functional state machine — no processes.
"""
alias ExGitObjectstore.{ObjectResolver, Ref, Repo}
alias ExGitObjectstore.Object
alias ExGitObjectstore.Object.{Blob, Commit, Tag, Tree}
alias ExGitObjectstore.Pack.Writer
alias ExGitObjectstore.Protocol.PktLine
@max_tree_depth 64
@type state :: %__MODULE__{
repo: Repo.t(),
phase: :command | :done
}
defstruct [:repo, phase: :command]
@doc """
Create a new v2 upload-pack state machine and generate the capability advertisement.
Returns `{advertisement_data, state}`.
"""
@spec init(Repo.t()) :: {binary(), state()}
def init(%Repo{} = repo) do
advert = build_capability_advertisement()
state = %__MODULE__{repo: repo, phase: :command}
{advert, state}
end
@doc """
Feed a v2 command from the client into the state machine.
Returns `{response_data, new_state}`.
"""
@spec feed(state(), binary()) :: {binary(), state()}
def feed(%__MODULE__{phase: :command} = state, data) do
case parse_command(data) do
{:ls_refs, args} ->
response = handle_ls_refs(state.repo, args)
{response, state}
{:fetch, args} ->
response = handle_fetch(state.repo, args)
{response, %{state | phase: :done}}
{:error, _} ->
{PktLine.flush(), %{state | phase: :done}}
end
end
def feed(%__MODULE__{phase: :done} = state, _data) do
{<<>>, state}
end
@doc """
Check if the protocol exchange is complete.
"""
@spec done?(state()) :: boolean()
def done?(%__MODULE__{phase: :done}), do: true
def done?(_), do: false
# -- Capability Advertisement --
defp build_capability_advertisement do
lines = [
PktLine.encode("version 2"),
PktLine.encode("ls-refs"),
PktLine.encode("fetch=shallow"),
PktLine.encode("server-option"),
PktLine.flush()
]
IO.iodata_to_binary(lines)
end
# -- Command Parsing --
defp parse_command(data) do
case PktLine.decode(data) do
{:ok, packets, _rest} ->
dispatch_command(packets)
{:error, _} = err ->
err
end
end
defp dispatch_command(packets) do
command =
Enum.find_value(packets, fn
{:data, "command=" <> cmd} -> cmd
_ -> nil
end)
args = extract_args(packets)
case command do
"ls-refs" -> {:ls_refs, args}
"fetch" -> {:fetch, args}
_ -> {:error, :unknown_command}
end
end
defp extract_args(packets) do
packets
|> Enum.flat_map(fn
{:data, "command=" <> _} -> []
{:data, data} -> [data]
_ -> []
end)
end
# -- ls-refs Command --
defp handle_ls_refs(repo, args) do
prefixes =
args
|> Enum.filter(&String.starts_with?(&1, "ref-prefix "))
|> Enum.map(&String.trim_leading(&1, "ref-prefix "))
refs = list_refs(repo, prefixes)
lines =
Enum.map(refs, fn {ref, sha} ->
PktLine.encode("#{sha} #{ref}")
end)
IO.iodata_to_binary(lines ++ [PktLine.flush()])
end
defp list_refs(repo, []) do
list_all_refs(repo)
end
defp list_refs(repo, prefixes) do
all_refs = list_all_refs(repo)
Enum.filter(all_refs, fn {ref, _sha} ->
Enum.any?(prefixes, &String.starts_with?(ref, &1))
end)
end
defp list_all_refs(repo) do
heads = list_refs_safe(repo, "refs/heads/")
tags = list_refs_safe(repo, "refs/tags/")
(heads ++ tags) |> Enum.sort_by(fn {ref, _sha} -> ref end)
end
defp list_refs_safe(repo, prefix) do
case Ref.list(repo, prefix) do
{:ok, refs} -> refs
_ -> []
end
end
# -- fetch Command --
defp handle_fetch(repo, args) do
wants = extract_shas(args, "want ")
haves = extract_shas(args, "have ")
# Build acknowledgments section when client sends haves
ack_section = build_acknowledgments(repo, haves)
case collect_objects(repo, wants, haves) do
{:ok, objects} ->
{pack_data, _pack_sha} = Writer.generate(objects)
packfile_header = PktLine.encode("packfile")
sideband_data =
PktLine.encode_sideband(1, pack_data)
|> IO.iodata_to_binary()
IO.iodata_to_binary([ack_section, packfile_header, sideband_data, PktLine.flush()])
{:error, _reason} ->
PktLine.flush()
end
end
defp build_acknowledgments(_repo, []) do
# No haves = initial clone, no acknowledgments section needed
<<>>
end
defp build_acknowledgments(repo, haves) do
# Check which haves we have in common
acks =
haves
|> Enum.filter(fn sha ->
case ObjectResolver.read(repo, sha) do
{:ok, _} -> true
_ -> false
end
end)
|> Enum.map(fn sha -> PktLine.encode("ACK #{sha}") end)
header = PktLine.encode("acknowledgments")
ack_lines =
if acks == [] do
[PktLine.encode("NAK")]
else
acks ++ [PktLine.encode("ready")]
end
IO.iodata_to_binary([header | ack_lines] ++ [PktLine.delim()])
end
defp extract_shas(args, prefix) do
args
|> Enum.filter(&String.starts_with?(&1, prefix))
|> Enum.map(&(&1 |> String.trim_leading(prefix) |> String.trim()))
end
# -- Object Collection (shared logic with v1 UploadPack) --
defp collect_objects(repo, wants, haves) do
have_set = MapSet.new(haves)
try do
{objects, _visited} =
Enum.reduce(wants, {[], MapSet.new()}, fn sha, {acc, visited} ->
{new_objects, visited} = collect_reachable(repo, sha, have_set, visited)
{new_objects ++ acc, visited}
end)
{:ok, Enum.reverse(objects)}
rescue
e -> {:error, Exception.message(e)}
end
end
defp collect_reachable(repo, sha, exclude_set, visited) do
if MapSet.member?(exclude_set, sha) or MapSet.member?(visited, sha) do
{[], visited}
else
visited = MapSet.put(visited, sha)
collect_reachable_object(repo, sha, exclude_set, visited)
end
end
defp collect_reachable_object(repo, sha, exclude_set, visited) do
case ObjectResolver.read(repo, sha) do
{:ok, %Commit{} = commit} ->
collect_commit_objects(repo, sha, commit, exclude_set, visited)
{:ok, %Tree{} = tree} ->
collect_tree_entry_objects(repo, tree, sha, visited, 0)
{:ok, %Blob{content: content}} ->
{[{:blob, content, sha}], visited}
{:ok, %Tag{} = tag} ->
tag_data = Object.encode_content_only(tag)
{target_objs, visited} = collect_reachable(repo, tag.object, exclude_set, visited)
{[{:tag, tag_data, sha} | target_objs], visited}
{:error, _} ->
{[], visited}
end
end
defp collect_commit_objects(repo, sha, commit, exclude_set, visited) do
{tree_objects, visited} = collect_tree_objects(repo, commit.tree, visited, 0)
{parent_objects, visited} =
Enum.reduce(commit.parents, {[], visited}, fn parent_sha, {acc, vis} ->
{objs, vis} = collect_reachable(repo, parent_sha, exclude_set, vis)
{objs ++ acc, vis}
end)
objects = [
{:commit, Object.encode_content_only(commit), sha}
| tree_objects ++ Enum.reverse(parent_objects)
]
{objects, visited}
end
defp collect_tree_objects(_repo, _tree_sha, _visited, depth)
when depth > @max_tree_depth do
raise "max_tree_depth_exceeded"
end
defp collect_tree_objects(repo, tree_sha, visited, depth) do
if MapSet.member?(visited, tree_sha) do
{[], visited}
else
case ObjectResolver.read(repo, tree_sha) do
{:ok, %Tree{} = tree} ->
collect_tree_entry_objects(repo, tree, tree_sha, visited, depth)
_ ->
{[], visited}
end
end
end
defp collect_tree_entry_objects(repo, %Tree{} = tree, tree_sha, visited, depth) do
visited = MapSet.put(visited, tree_sha)
tree_content = Tree.encode_content(tree)
{child_objects, visited} =
Enum.reduce(tree.entries, {[], visited}, fn entry, {acc, vis} ->
collect_single_tree_entry(repo, entry, acc, vis, depth)
end)
{[{:tree, tree_content, tree_sha} | Enum.reverse(child_objects)], visited}
end
defp collect_single_tree_entry(repo, %{mode: "40000", sha: sha}, acc, vis, depth) do
{objs, vis} = collect_tree_objects(repo, sha, vis, depth + 1)
{objs ++ acc, vis}
end
defp collect_single_tree_entry(repo, %{sha: sha}, acc, vis, _depth) do
if MapSet.member?(vis, sha) do
{acc, vis}
else
vis = MapSet.put(vis, sha)
case ObjectResolver.read(repo, sha) do
{:ok, %Blob{content: content}} ->
{[{:blob, content, sha} | acc], vis}
_ ->
{acc, vis}
end
end
end
end