ref:main
Coverage: 72.5%
37/51 lines covered
1
# Copyright 2026 Cole Christensen
2
#
3
# Licensed under the Apache License, Version 2.0 (the "License");
4
# you may not use this file except in compliance with the License.
5
# You may obtain a copy of the License at
6
#
7
# http://www.apache.org/licenses/LICENSE-2.0
8
#
9
# Unless required by applicable law or agreed to in writing, software
10
# distributed under the License is distributed on an "AS IS" BASIS,
11
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
# See the License for the specific language governing permissions and
13
# limitations under the License.
14
15
defmodule ExGitObjectstore do
16
@moduledoc """
17
Pure Elixir Git library with pluggable object storage (S3, filesystem, memory).
18
19
Provides git operations without requiring libgit2, git CLI, or any NIF.
20
All git data (objects, refs, packs) is stored via a pluggable storage backend.
21
22
## Building objects
23
24
* `write_tree/2` — write a tree from a list of entries
25
* `commit_tree/3` — build and store a commit pointing at a tree
26
27
## Merge / rebase toolkit
28
29
Complete set of primitives for performing merges and rebases in-process
30
without a working directory:
31
32
* `merge_branches/4` — three-way merge two refs into a merge commit (two parents)
33
* `squash_merge/4` — three-way merge producing a single-parent commit
34
* `cherry_pick/3` — replay a single commit onto a new parent
35
* `rebase_commits/4` — replay a list of commits onto a new base
36
37
## Ref operations
38
39
* `create_branch/3`, `delete_branch/2`
40
* `update_branch/4` — atomic compare-and-swap ref update
41
42
## Graph queries
43
44
* `merge_base/3` — lowest common ancestor of two commits
45
* `ancestor?/3` — true if A is an ancestor of B
46
47
## Commit-graph index
48
49
`ExGitObjectstore.Graph` provides an optional persisted commit-graph
50
index with topological generation numbers and corrected commit dates.
51
Once built and saved (`Graph.build/1`, `Graph.save/2`), it is loaded
52
wholesale into memory for fast ancestry / ahead-behind queries without
53
per-commit object reads. See that module and the `Graph.BinaryFormat`
54
moduledoc for details.
55
"""
56
57
alias ExGitObjectstore.{Graph, Merge, Object, ObjectResolver, Ref, Repo, Telemetry, Walk}
58
alias ExGitObjectstore.Object.{Blob, Commit, Tree}
59
60
@type sha :: String.t()
61
@type ref_name :: String.t()
62
63
@typedoc """
64
A structured author/committer identity.
65
66
The `:when` DateTime's `utc_offset + std_offset` produces the timezone;
67
sub-minute offsets are rounded to the minute, matching git's own behavior.
68
"""
69
@type identity :: %{
70
required(:name) => String.t(),
71
required(:email) => String.t(),
72
required(:when) => DateTime.t()
73
}
74
75
@typedoc """
76
Either a structured `identity/0` map or a pre-formatted git wire-format
77
string like `"Name <email> 1234567890 +0000"`. Accepted by `commit_tree/3`
78
and related APIs. Raw strings pass through unchanged; maps are formatted
79
via `format_identity/1`. Pass-through is useful for cherry-pick and rebase,
80
where preserving the original commit's author string byte-for-byte avoids
81
any parse/format round-trip loss.
82
"""
83
@type identity_or_raw :: identity() | String.t()
84
85
@doc """
86
Initialize a new empty repository.
87
"""
88
@spec init(Repo.t()) :: :ok | {:error, term()}
89
def init(%Repo{} = repo) do
90
Ref.put_head(repo, "ref: refs/heads/main")
91
end
92
93
@doc """
94
Check if a repository exists (has a HEAD).
95
"""
96
@spec exists?(Repo.t()) :: boolean()
97
def exists?(%Repo{} = repo) do
98
case Ref.get_head(repo) do
99
{:ok, _} -> true
100
{:error, _} -> false
101
end
102
end
103
104
@doc """
105
Read and parse a git object by SHA.
106
"""
107
@spec cat_object(Repo.t(), sha()) :: {:ok, Object.t()} | {:error, term()}
108
def cat_object(%Repo{} = repo, sha) do
109
ObjectResolver.read(repo, sha)
110
end
111
112
@doc """
113
Write a git object and return its SHA.
114
"""
115
@spec write_object(Repo.t(), Object.t()) :: {:ok, sha()} | {:error, term()}
116
def write_object(%Repo{} = repo, object) do
117
Object.write(repo, object)
118
end
119
120
@doc """
121
Resolve a ref (branch name, tag name, or SHA) to a commit SHA.
122
"""
123
@spec resolve(Repo.t(), ref_name() | sha()) :: {:ok, sha()} | {:error, term()}
124
def resolve(%Repo{} = repo, ref_or_sha) do
125
Ref.resolve(repo, ref_or_sha)
126
end
127
128
@doc """
129
Get the default branch name.
130
"""
131
@spec default_branch(Repo.t()) :: {:ok, String.t()} | {:error, term()}
132
def default_branch(%Repo{} = repo) do
133
case Ref.get_head(repo) do
134
{:ok, "ref: refs/heads/" <> branch} -> {:ok, branch}
135
{:ok, sha} when byte_size(sha) == 40 -> {:ok, sha}
136
{:error, _} = err -> err
137
end
138
end
139
140
@doc """
141
List all branches.
142
"""
143
@spec branches(Repo.t()) :: {:ok, [{String.t(), sha()}]} | {:error, term()}
144
def branches(%Repo{} = repo) do
145
Ref.list(repo, "refs/heads/")
146
end
147
148
@doc """
149
List all tags.
150
"""
151
@spec tags(Repo.t()) :: {:ok, [{String.t(), sha()}]} | {:error, term()}
152
def tags(%Repo{} = repo) do
153
Ref.list(repo, "refs/tags/")
154
end
155
156
@doc """
157
Create a branch pointing to the given SHA.
158
"""
159
@spec create_branch(Repo.t(), String.t(), sha()) :: :ok | {:error, term()}
160
def create_branch(%Repo{} = repo, name, sha) do
161
Ref.put(repo, "refs/heads/#{name}", sha, nil)
162
end
163
164
@doc """
165
Delete a branch.
166
"""
167
@spec delete_branch(Repo.t(), String.t()) :: :ok | {:error, term()}
168
def delete_branch(%Repo{} = repo, name) do
169
Ref.delete(repo, "refs/heads/#{name}")
170
end
171
172
@doc """
173
Create a lightweight tag pointing to the given SHA.
174
"""
175
@spec create_tag(Repo.t(), String.t(), sha()) :: :ok | {:error, term()}
176
def create_tag(%Repo{} = repo, name, sha) do
177
Ref.put(repo, "refs/tags/#{name}", sha, nil)
178
end
179
180
@doc """
181
Delete a tag.
182
"""
183
@spec delete_tag(Repo.t(), String.t()) :: :ok | {:error, term()}
184
def delete_tag(%Repo{} = repo, name) do
185
Ref.delete(repo, "refs/tags/#{name}")
186
end
187
188
@doc """
189
Get a commit by SHA or ref.
190
"""
191
@spec commit(Repo.t(), sha() | ref_name()) :: {:ok, {sha(), Commit.t()}} | {:error, term()}
192
def commit(%Repo{} = repo, ref_or_sha) do
193
with {:ok, sha} <- resolve(repo, ref_or_sha),
194
{:ok, %Commit{} = commit} <- cat_object(repo, sha) do
195
{:ok, {sha, commit}}
196
end
197
end
198
199
@doc """
200
Get the commit log starting from a ref or SHA.
201
202
## Options
203
* `:max_count` — maximum number of commits (default: all)
204
* `:skip` — skip N commits (default: 0)
205
"""
206
@spec log(Repo.t(), sha() | ref_name(), keyword()) ::
207
{:ok, [{sha(), Commit.t()}]} | {:error, term()}
208
def log(%Repo{} = repo, ref_or_sha, opts \\ []) do
209
with {:ok, sha} <- resolve(repo, ref_or_sha) do
210
Walk.log(repo, sha, opts)
211
end
212
end
213
214
@doc """
215
Paginated log returning `{:ok, commits, cursor}`. Pass cursor to
216
`log_continue/3` for the next page. Each page is O(page_size).
217
"""
218
@spec log_page(Repo.t(), sha() | ref_name(), keyword()) ::
219
{:ok, [{sha(), Commit.t()}], [sha()]} | {:error, term()}
220
def log_page(%Repo{} = repo, ref_or_sha, opts \\ []) do
221
with {:ok, sha} <- resolve(repo, ref_or_sha) do
222
Walk.log_page(repo, sha, opts)
223
end
224
end
225
226
@doc """
227
Continue a paginated log from a cursor. Returns `{:ok, [], []}` when done.
228
"""
229
@spec log_continue(Repo.t(), [sha()], keyword()) ::
230
{:ok, [{sha(), Commit.t()}], [sha()]} | {:error, term()}
231
def log_continue(%Repo{} = repo, cursor, opts \\ []) do
232
Walk.log_continue(repo, cursor, opts)
233
end
234
235
@doc """
236
Get a tree listing at a given path for a ref or SHA.
237
Path "/" returns the root tree.
238
"""
239
@spec tree(Repo.t(), sha() | ref_name(), String.t()) ::
240
{:ok, Tree.t()} | {:error, term()}
241
def tree(%Repo{} = repo, ref_or_sha, path \\ "/") do
242
with {:ok, sha} <- resolve(repo, ref_or_sha),
243
{:ok, %Commit{} = commit} <- cat_object(repo, sha),
244
{:ok, %Tree{} = root_tree} <- cat_object(repo, commit.tree) do
245
if path == "/" or path == "" do
246
{:ok, root_tree}
247
else
248
walk_tree_path(repo, root_tree, String.split(path, "/", trim: true))
249
end
250
end
251
end
252
253
@doc """
254
Get blob content by SHA.
255
"""
256
@spec blob(Repo.t(), sha()) :: {:ok, binary()} | {:error, term()}
257
def blob(%Repo{} = repo, sha) do
258
case cat_object(repo, sha) do
259
{:ok, %Blob{content: content}} -> {:ok, content}
260
{:ok, _other} -> {:error, :not_a_blob}
261
{:error, _} = err -> err
262
end
263
end
264
265
@doc """
266
Get the size of a blob by SHA without reading full content.
267
Currently reads the full blob — could be optimized with pack header parsing.
268
"""
269
@spec blob_size(Repo.t(), sha()) :: {:ok, non_neg_integer()} | {:error, term()}
270
def blob_size(%Repo{} = repo, sha) do
271
case blob(repo, sha) do
272
{:ok, content} -> {:ok, byte_size(content)}
273
{:error, _} = err -> err
274
end
275
end
276
277
@doc """
278
Batch variant of `blob_size/2`.
279
280
Looks up the sizes of many blobs in one call, using bounded concurrency
281
to parallelize the backend reads. Returns a map of `sha => size` containing
282
only the shas that resolved successfully. Input shas that can't be resolved
283
(missing object, `:not_a_blob`, storage error) are silently omitted from
284
the result map.
285
286
Input shas are deduplicated before dispatch, so passing the same sha
287
multiple times costs the same as passing it once.
288
289
## Options
290
291
* `:max_concurrency` — maximum number of parallel backend reads.
292
Defaults to 16. Match this to your storage backend's sweet spot —
293
S3 likes higher concurrency, local filesystem benefits less.
294
* `:timeout` — per-sha timeout in milliseconds. Defaults to 30_000.
295
296
## Why a dedicated bulk API
297
298
Anvil's tree-rendering path (and similar UIs) repeatedly call
299
`blob_size/2` once per entry in an `Enum.map`. On an S3-backed store,
300
each call is a separate network round-trip — a 1000-file directory
301
listing costs ~100s at 100 ms/call. `blob_sizes/2` issues all lookups
302
in parallel so the same workload drops to a few seconds.
303
304
This version does *not* optimize the single-blob read cost — it still
305
reads full blob content via `blob/2` for each sha. Backends that want
306
to compute sizes without transferring blob content (e.g. parsing pack
307
headers or using S3 HEAD requests once the format supports it) can
308
override this function in a future revision.
309
310
## Examples
311
312
iex> ExGitObjectstore.blob_sizes(repo, [sha1, sha2, missing_sha])
313
{:ok, %{^sha1 => 42, ^sha2 => 100}} # missing_sha silently dropped
314
315
iex> ExGitObjectstore.blob_sizes(repo, [])
316
{:ok, %{}}
317
"""
318
@spec blob_sizes(Repo.t(), [sha()], keyword()) ::
319
{:ok, %{optional(sha()) => non_neg_integer()}}
320
def blob_sizes(repo, shas, opts \\ [])
321
322
def blob_sizes(%Repo{}, [], _opts), do: {:ok, %{}}
323
324
def blob_sizes(%Repo{} = repo, shas, opts) when is_list(shas) do
325
max_concurrency = Keyword.get(opts, :max_concurrency, 16)
326
timeout = Keyword.get(opts, :timeout, 30_000)
327
328
result =
329
shas
330
|> Enum.uniq()
331
|> Task.async_stream(
332
fn sha ->
333
case blob_size(repo, sha) do
334
{:ok, size} -> {sha, size}
335
{:error, _} -> :skip
336
end
337
end,
338
max_concurrency: max_concurrency,
339
timeout: timeout,
340
on_timeout: :kill_task,
341
ordered: false
342
)
343
|> Enum.reduce(%{}, fn
344
{:ok, {sha, size}}, acc -> Map.put(acc, sha, size)
345
{:ok, :skip}, acc -> acc
346
{:exit, _reason}, acc -> acc
347
end)
348
349
{:ok, result}
350
end
351
352
@doc """
353
Three-way merge of two commits.
354
355
Finds the merge base (LCA), then merges the trees.
356
Returns `{:ok, merged_tree_sha}` on clean merge, or
357
`{:error, {:conflicts, [conflict]}}` if there are conflicts.
358
"""
359
@spec merge_commits(Repo.t(), sha(), sha(), keyword()) ::
360
{:ok, sha()} | {:error, term()}
361
def merge_commits(%Repo{} = repo, ours_sha, theirs_sha, opts \\ []) do
362
Merge.merge_commits(repo, ours_sha, theirs_sha, opts)
363
end
364
365
@doc """
366
Three-way merge of tree objects given base, ours, and theirs tree SHAs.
367
"""
368
@spec merge_trees(Repo.t(), sha(), sha(), sha()) ::
369
{:ok, sha()} | {:error, term()}
370
def merge_trees(%Repo{} = repo, base_tree_sha, ours_tree_sha, theirs_tree_sha) do
371
Merge.merge_trees(repo, base_tree_sha, ours_tree_sha, theirs_tree_sha)
372
end
373
374
@doc """
375
Write a tree from a list of entries and return its SHA.
376
377
Entries are validated and canonicalized by `Tree.new/1`. Each entry is a map
378
with `:mode`, `:name`, and `:sha` — see `ExGitObjectstore.Object.Tree` for
379
allowed mode values.
380
381
## Example
382
383
{:ok, tree_sha} = ExGitObjectstore.write_tree(repo, [
384
%{mode: "100644", name: "README.md", sha: readme_blob_sha},
385
%{mode: "40000", name: "src", sha: src_tree_sha}
386
])
387
"""
388
@spec write_tree(Repo.t(), [Tree.entry()]) :: {:ok, sha()} | {:error, term()}
389
def write_tree(%Repo{} = repo, entries) when is_list(entries) do
390
Object.write(repo, Tree.new(entries))
391
end
392
393
@doc """
394
Build and write a commit object pointing at a tree.
395
396
Returns the new commit's SHA. Validates the tree and all parent SHAs exist
397
in storage before writing.
398
399
## Options
400
401
* `:parents` — list of parent commit SHAs. Empty list for a root commit.
402
* `:author` (required) — an `identity_or_raw/0`. Pass an `identity/0` map
403
and it's formatted to git wire format; pass a pre-formatted string and
404
it's used as-is (useful for cherry-pick to preserve author verbatim).
405
* `:committer` — same shape as `:author`. Defaults to `:author`.
406
* `:message` (required) — commit message. A trailing newline is added if
407
missing, per git convention.
408
* `:gpgsig` — optional detached GPG signature to embed in the commit.
409
410
## Errors
411
412
* `{:error, {:missing_option, :author | :message}}` — required option omitted.
413
* `{:error, {:missing_tree, sha}}` — tree SHA not in storage.
414
* `{:error, {:not_a_tree, sha}}` — SHA exists but isn't a tree.
415
* `{:error, {:missing_parent, sha}}` — parent SHA not in storage.
416
* `{:error, {:not_a_commit_parent, sha}}` — parent SHA exists but isn't a commit.
417
418
## Example
419
420
{:ok, commit_sha} =
421
ExGitObjectstore.commit_tree(repo, tree_sha,
422
parents: [base_sha, head_sha],
423
author: %{name: "Alice", email: "a@x.com", when: DateTime.utc_now()},
424
message: "Merge head into base"
425
)
426
"""
427
@spec commit_tree(Repo.t(), sha(),
428
parents: [sha()],
429
author: identity_or_raw(),
430
committer: identity_or_raw(),
431
message: String.t(),
432
gpgsig: String.t() | nil
433
) :: {:ok, sha()} | {:error, term()}
434
def commit_tree(%Repo{} = repo, tree_sha, opts) when is_binary(tree_sha) and is_list(opts) do
435
with {:ok, author} <- fetch_required(opts, :author),
436
{:ok, message} <- fetch_required(opts, :message),
437
:ok <- validate_tree_exists(repo, tree_sha),
438
:ok <- validate_parents_exist(repo, Keyword.get(opts, :parents, [])) do
439
commit = %Commit{
440
tree: tree_sha,
441
parents: Keyword.get(opts, :parents, []),
442
author: format_identity(author),
443
committer: format_identity(Keyword.get(opts, :committer, author)),
444
message: ensure_trailing_newline(message),
445
gpgsig: Keyword.get(opts, :gpgsig)
446
}
447
448
Object.write(repo, commit)
449
end
450
end
451
452
@doc """
453
Merge `theirs` into `ours`, creating a merge commit.
454
455
Resolves both refs (or SHAs) to commits, performs a three-way merge against
456
their merge base, and creates a new commit with the merged tree and both
457
commits as parents. On conflict, returns without writing the merge commit.
458
459
**Does not update any ref.** The returned SHA is only written as a commit
460
object; persisting it to a branch is the caller's responsibility (e.g.
461
`create_branch/3` or a storage-level CAS on `refs/heads/<branch>`).
462
463
Tag refs resolve transitively to their target commit via `resolve/2`.
464
465
Uses the same `identity/0` shape as `commit_tree/3`.
466
467
## Options
468
469
* `:author` (required) — identity for the merge commit author.
470
* `:committer` — defaults to `:author`.
471
* `:message` — merge commit message. Defaults to
472
`"Merge <theirs_ref> into <ours_ref>\\n"`.
473
474
## Returns
475
476
* `{:ok, merge_commit_sha}` on clean merge.
477
* `{:error, {:conflicts, [%{path, base, ours, theirs}]}}` on conflict
478
(no commit is written).
479
* `{:error, {:missing_option, :author}}` — required option omitted.
480
* `{:error, reason}` for resolution or storage failures.
481
482
## Example
483
484
{:ok, merge_sha} =
485
ExGitObjectstore.merge_branches(repo, "main", "feature",
486
author: %{name: "Alice", email: "a@x.com", when: DateTime.utc_now()}
487
)
488
"""
489
@spec merge_branches(Repo.t(), ref_name() | sha(), ref_name() | sha(),
490
author: identity(),
491
committer: identity(),
492
message: String.t()
493
) :: {:ok, sha()} | {:error, term()}
494
def merge_branches(%Repo{} = repo, ours_ref, theirs_ref, opts) when is_list(opts) do
495
with {:ok, author} <- fetch_required(opts, :author),
496
{:ok, ours_sha} <- resolve(repo, ours_ref),
497
{:ok, theirs_sha} <- resolve(repo, theirs_ref),
498
{:ok, merged_tree_sha} <- Merge.merge_commits(repo, ours_sha, theirs_sha) do
499
committer = Keyword.get(opts, :committer, author)
500
501
message =
502
Keyword.get_lazy(opts, :message, fn ->
503
"Merge #{theirs_ref} into #{ours_ref}\n"
504
end)
505
506
commit_tree(repo, merged_tree_sha,
507
parents: [ours_sha, theirs_sha],
508
author: author,
509
committer: committer,
510
message: message
511
)
512
end
513
end
514
515
# -- Squash / cherry-pick / rebase --
516
517
@doc """
518
Squash-merge `head` into `base`, writing a single-parent commit.
519
520
Same three-way merge as `merge_branches/4`, but the resulting commit has
521
only `base` as a parent — `head`'s history is collapsed into one commit.
522
523
**Does not update any ref.** Caller is responsible for persisting the
524
returned SHA (typically by updating `base`'s branch ref).
525
526
## Options
527
528
* `:author` (required) — `identity_or_raw/0`.
529
* `:committer` — defaults to `:author`.
530
* `:message` — defaults to `"Squash merge of <head_ref> into <base_ref>\\n"`.
531
532
## Returns
533
534
* `{:ok, squash_commit_sha}` on clean merge.
535
* `{:error, {:conflicts, [...]}}` on conflict (no commit written).
536
* `{:error, {:missing_option, :author}}`, or a resolution/storage error.
537
"""
538
@spec squash_merge(Repo.t(), ref_name() | sha(), ref_name() | sha(),
539
author: identity_or_raw(),
540
committer: identity_or_raw(),
541
message: String.t()
542
) :: {:ok, sha()} | {:error, term()}
543
def squash_merge(%Repo{} = repo, base_ref, head_ref, opts) when is_list(opts) do
544
with {:ok, author} <- fetch_required(opts, :author),
545
{:ok, base_sha} <- resolve(repo, base_ref),
546
{:ok, head_sha} <- resolve(repo, head_ref),
547
{:ok, merged_tree_sha} <- Merge.merge_commits(repo, base_sha, head_sha) do
548
committer = Keyword.get(opts, :committer, author)
549
550
message =
551
Keyword.get_lazy(opts, :message, fn ->
552
"Squash merge of #{head_ref} into #{base_ref}\n"
553
end)
554
555
commit_tree(repo, merged_tree_sha,
556
parents: [base_sha],
557
author: author,
558
committer: committer,
559
message: message
560
)
561
end
562
end
563
564
@doc """
565
Cherry-pick a single commit onto a new parent.
566
567
Performs a three-way merge with `commit`'s first parent as the base,
568
`onto`'s tree as "ours", and `commit`'s tree as "theirs". On success,
569
writes a new commit with `onto` as its sole parent, preserving `commit`'s
570
original author (and message, unless overridden).
571
572
The GPG signature of the original commit is **not** copied — cherry-picking
573
rewrites the commit, which invalidates any signature over the old content.
574
575
## Options
576
577
* `:onto` (required) — SHA of the commit that will become the new parent.
578
* `:committer` (required) — `identity_or_raw/0` for the cherry-picker.
579
* `:author` — override the author. Default: preserve `commit`'s author.
580
* `:message` — override the message. Default: preserve `commit`'s message.
581
582
## Errors
583
584
* `{:error, :cannot_cherry_pick_root}` — `commit` has no parents.
585
* `{:error, {:merge_commit_needs_mainline, sha}}` — `commit` is a merge
586
commit (has 2+ parents); cherry-picking it requires choosing a mainline
587
parent, which isn't yet supported.
588
* `{:error, {:conflicts, [...]}}` — three-way merge conflict.
589
* Other errors as `commit_tree/3`.
590
591
## Example
592
593
{:ok, new_sha} =
594
ExGitObjectstore.cherry_pick(repo, commit_sha,
595
onto: base_tip_sha,
596
committer: %{name: "Bot", email: "bot@x.com", when: DateTime.utc_now()}
597
)
598
"""
599
@spec cherry_pick(Repo.t(), sha(),
600
onto: sha(),
601
committer: identity_or_raw(),
602
author: identity_or_raw(),
603
message: String.t()
604
) :: {:ok, sha()} | {:error, term()}
605
def cherry_pick(%Repo{} = repo, commit_sha, opts)
606
when is_binary(commit_sha) and is_list(opts) do
607
with {:ok, onto_sha} <- fetch_required(opts, :onto),
608
{:ok, committer} <- fetch_required(opts, :committer),
609
{:ok, %Commit{} = commit} <- read_commit(repo, commit_sha),
610
{:ok, parent_sha} <- cherry_pick_parent(commit, commit_sha),
611
{:ok, %Commit{tree: parent_tree}} <- read_commit(repo, parent_sha),
612
{:ok, %Commit{tree: onto_tree}} <- read_commit(repo, onto_sha),
613
{:ok, merged_tree_sha} <- Merge.merge_trees(repo, parent_tree, onto_tree, commit.tree) do
614
author = Keyword.get(opts, :author, commit.author)
615
message = Keyword.get(opts, :message, commit.message)
616
617
commit_tree(repo, merged_tree_sha,
618
parents: [onto_sha],
619
author: author,
620
committer: committer,
621
message: message
622
)
623
end
624
end
625
626
@doc """
627
Replay a list of commits onto `onto`, returning the final tip SHA.
628
629
Cherry-picks each commit in order. Halts on the first conflict and returns
630
the error without writing any more commits. The commits already cherry-picked
631
before the halt remain as unreferenced objects in storage (they become
632
unreachable garbage unless the caller does something with them, which is
633
the normal outcome of an aborted rebase).
634
635
**Does not update any ref.** The returned tip is the caller's to persist.
636
637
Typical usage: pass the commits returned by a range walk (e.g. commits
638
reachable from `head` but not `base`, oldest-first).
639
640
## Options
641
642
* `:committer` (required) — `identity_or_raw/0` used for every replayed
643
commit's committer field. The author of each commit is preserved.
644
645
## Returns
646
647
* `{:ok, new_tip_sha}` — cleanly replayed all commits.
648
* `{:ok, ^onto}` if `commits` is empty.
649
* `{:error, {:conflicts, [...]}}` at the first conflicting commit.
650
* Other errors propagated from `cherry_pick/3`.
651
"""
652
@spec rebase_commits(Repo.t(), [sha()], sha(), committer: identity_or_raw()) ::
653
{:ok, sha()} | {:error, term()}
654
def rebase_commits(%Repo{} = repo, commits, onto, opts)
655
when is_list(commits) and is_binary(onto) and is_list(opts) do
656
with {:ok, committer} <- fetch_required(opts, :committer) do
657
Enum.reduce_while(commits, {:ok, onto}, &rebase_step(repo, committer, &1, &2))
658
end
659
end
660
661
defp rebase_step(repo, committer, commit_sha, {:ok, current_tip}) do
662
case cherry_pick(repo, commit_sha, onto: current_tip, committer: committer) do
663
{:ok, new_sha} -> {:cont, {:ok, new_sha}}
664
{:error, _} = err -> {:halt, err}
665
end
666
end
667
668
# -- Graph queries --
669
670
@doc """
671
Lowest common ancestor of two commits.
672
673
Delegates to `ExGitObjectstore.Walk.merge_base/3`.
674
"""
675
@spec merge_base(Repo.t(), sha(), sha()) :: {:ok, sha()} | {:error, term()}
676
def merge_base(%Repo{} = repo, sha_a, sha_b) do
677
Walk.merge_base(repo, sha_a, sha_b)
678
end
679
680
@doc """
681
True if `ancestor` is an ancestor of `descendant` (inclusive — a commit is
682
its own ancestor).
683
684
Uses the persisted commit-graph index when available (see
685
`ExGitObjectstore.Graph`). Falls back to a cat_object-based walker when
686
the graph isn't built or doesn't yet cover one of the SHAs.
687
688
Emits `[:ex_git_objectstore, :graph, :query]` telemetry with
689
`operation: :ancestor?` and `path: :graph | :fallback`.
690
"""
691
@spec ancestor?(Repo.t(), sha(), sha()) :: {:ok, boolean()} | {:error, term()}
692
def ancestor?(%Repo{} = repo, ancestor_sha, descendant_sha) do
693
routed_query(
694
repo,
695
:ancestor?,
696
fn graph ->
697
graph_result(
698
graph,
699
[ancestor_sha, descendant_sha],
700
&Graph.ancestor?(&1, ancestor_sha, descendant_sha)
701
)
702
end,
703
fn -> Graph.Fallback.ancestor?(repo, ancestor_sha, descendant_sha) end
704
)
705
end
706
707
@doc """
708
Count commits reachable from `head_sha` but not from `base_sha`
709
(`:ahead`) and vice versa (`:behind`). Equivalent to the output of
710
`git rev-list --count --left-right base...head`.
711
712
Uses the persisted commit-graph index when available. Falls back to a
713
cat_object-based walker when the graph isn't built or doesn't yet
714
cover one of the SHAs.
715
716
Emits `[:ex_git_objectstore, :graph, :query]` telemetry with
717
`operation: :ahead_behind` and `path: :graph | :fallback`.
718
"""
719
@spec ahead_behind(Repo.t(), sha(), sha()) ::
720
{:ok, %{ahead: non_neg_integer(), behind: non_neg_integer()}} | {:error, term()}
721
def ahead_behind(%Repo{} = repo, base_sha, head_sha) do
722
routed_query(
723
repo,
724
:ahead_behind,
725
fn graph ->
726
graph_result(graph, [base_sha, head_sha], &Graph.ahead_behind(&1, base_sha, head_sha))
727
end,
728
fn -> Graph.Fallback.ahead_behind(repo, base_sha, head_sha) end
729
)
730
end
731
732
@doc """
733
Like `ahead_behind/3`, but for many heads against a single base.
734
735
Walks `ancestors(base)` once and reuses it across every head, instead
736
of re-walking it for each call. For workloads where `head_shas` are
737
many small offsets from a common base (e.g. a PR-list page where
738
every PR has `base = main`), this turns
739
`O(N · |ancestors(base)|)` into `O(|ancestors(base)| + Σ head walks)`.
740
741
Returns `{:ok, %{head_sha => %{ahead: N, behind: M}}}` with one entry
742
per head. Heads not in the graph or whose ref couldn't be resolved
743
fall back to per-head `ahead_behind/3` (which has its own
744
cat_object walker fallback). If the graph itself isn't available,
745
every head goes through the per-head fallback.
746
747
Emits `[:ex_git_objectstore, :graph, :query]` telemetry with
748
`operation: :ahead_behind_many`. `path` is `:graph` when the batched
749
fast path was used, `:fallback` when nothing was in the graph and
750
every head went through the per-head walker.
751
"""
752
@spec ahead_behind_many(Repo.t(), sha(), [sha()]) ::
753
{:ok, %{sha() => %{ahead: non_neg_integer(), behind: non_neg_integer()}}}
754
| {:error, term()}
755
def ahead_behind_many(%Repo{} = repo, base_sha, head_shas) when is_list(head_shas) do
756
metadata = %{operation: :ahead_behind_many, repo_id: repo.id}
757
758
Telemetry.span([:ex_git_objectstore, :graph, :query], metadata, fn ->
759
case load_or_fetch_graph(repo) do
760
{:ok, graph} ->
761
if Graph.member?(graph, base_sha) do
762
{:ok, fast} = Graph.ahead_behind_many(graph, base_sha, head_shas)
763
missing = Enum.reject(head_shas, &Map.has_key?(fast, &1))
764
merged = fill_per_head(repo, base_sha, missing, fast)
765
{{:ok, merged}, Map.put(metadata, :path, :graph)}
766
else
767
merged = fill_per_head(repo, base_sha, head_shas, %{})
768
{{:ok, merged}, Map.put(metadata, :path, :fallback)}
769
end
770
771
{:error, _} ->
772
merged = fill_per_head(repo, base_sha, head_shas, %{})
773
{{:ok, merged}, Map.put(metadata, :path, :fallback)}
774
end
775
end)
776
end
777
778
# Heads not covered by the graph (or queries against a graph-less repo)
779
# land here. Naively iterating per-head re-walks ancestors(base_sha) for
780
# every head — that's O(N · |ancestors(base)|), which on a 50-PR /
781
# 400-commit base materially shows up as a multi-second LiveView mount.
782
# `Graph.Fallback.ahead_behind_many/4` walks base once and reuses the
783
# ancestor set, restoring graceful degradation.
784
defp fill_per_head(_repo, _base_sha, [], acc), do: acc
785
786
defp fill_per_head(repo, base_sha, head_shas, acc) do
787
case Graph.Fallback.ahead_behind_many(repo, base_sha, head_shas) do
788
{:ok, by_head} -> Map.merge(acc, by_head)
789
# Base walk failed — preserve prior behaviour by returning whatever
790
# the graph path already filled in. The caller's contract is "missing
791
# heads are omitted," so dropping all of them on a base failure is
792
# consistent.
793
{:error, _} -> acc
794
end
795
end
796
797
@doc """
798
Commits reachable from `head_sha` but not from `base_sha`, newest-first.
799
Empty when `head_sha` is an ancestor of (or equal to) `base_sha`.
800
801
Uses the persisted commit-graph index when available. Falls back to a
802
cat_object-based walker otherwise.
803
804
Emits `[:ex_git_objectstore, :graph, :query]` telemetry with
805
`operation: :commits_between` and `path: :graph | :fallback`.
806
"""
807
@spec commits_between(Repo.t(), sha(), sha()) :: {:ok, [sha()]} | {:error, term()}
808
def commits_between(%Repo{} = repo, base_sha, head_sha) do
809
routed_query(
810
repo,
811
:commits_between,
812
fn graph ->
813
graph_result(graph, [base_sha, head_sha], &Graph.commits_between(&1, base_sha, head_sha))
814
end,
815
fn -> Graph.Fallback.commits_between(repo, base_sha, head_sha) end
816
)
817
end
818
819
@doc """
820
Rebuild the commit-graph index for `repo` from all refs, persist it to
821
storage, and seed the in-process cache. Safe to call from any process;
822
callers should serialize concurrent rebuilds externally.
823
"""
824
@spec rebuild_graph(Repo.t()) :: :ok | {:error, term()}
825
def rebuild_graph(%Repo{} = repo) do
826
with {:ok, graph} <- Graph.build(repo),
827
:ok <- Graph.save(repo, graph) do
828
Graph.Cache.put(repo, graph, current_graph_fingerprint(repo))
829
end
830
end
831
832
# -- Graph → fallback routing --
833
#
834
# `query_fun.(graph)` returns `{:ok, result}` (answer from the graph),
835
# `:fallback` (graph is loaded but doesn't cover the query), or
836
# `{:error, reason}` (graph said no, propagate as-is).
837
#
838
# The caller's `fallback_fun` is invoked only when the graph can't
839
# answer — missing from storage, cache miss and load error, or a
840
# `:fallback` signal from `query_fun`.
841
842
defp routed_query(%Repo{} = repo, operation, query_fun, fallback_fun) do
843
metadata = %{operation: operation, repo_id: repo.id}
844
845
Telemetry.span([:ex_git_objectstore, :graph, :query], metadata, fn ->
846
{result, path} = run_routed(repo, query_fun, fallback_fun)
847
{result, Map.put(metadata, :path, path)}
848
end)
849
end
850
851
defp run_routed(repo, query_fun, fallback_fun) do
852
case load_or_fetch_graph(repo) do
853
{:ok, graph} ->
854
case query_fun.(graph) do
855
{:ok, _} = ok -> {ok, :graph}
856
{:error, _} = err -> {err, :graph}
857
:fallback -> {fallback_fun.(), :fallback}
858
end
859
860
{:error, _} ->
861
{fallback_fun.(), :fallback}
862
end
863
end
864
865
# Runs `graph_fun` only if every SHA in `required_shas` is in the graph;
866
# otherwise signals `:fallback` so the caller routes to its walker.
867
defp graph_result(graph, required_shas, graph_fun) do
868
if Enum.all?(required_shas, &Graph.member?(graph, &1)) do
869
graph_fun.(graph)
870
else
871
:fallback
872
end
873
end
874
875
defp load_or_fetch_graph(repo) do
876
fp = current_graph_fingerprint(repo)
877
878
case Graph.Cache.fetch(repo, fp) do
879
{:ok, graph} ->
880
{:ok, graph}
881
882
other when other in [:miss, :stale] ->
883
case Graph.load(repo) do
884
{:ok, graph} ->
885
:ok = Graph.Cache.put(repo, graph, fp)
886
{:ok, graph}
887
888
{:error, _} = err ->
889
err
890
end
891
end
892
end
893
894
# The fingerprint is `:no_fingerprint` when the storage backend can't
895
# cheaply detect changes (S3, Memory). Cache stays valid until an
896
# explicit `Graph.Cache.delete/1` (writers must coordinate). For
897
# Filesystem, a `mtime+size` tuple from `stat/2` does the job — one
898
# syscall per query, far cheaper than reloading the graph.
899
defp current_graph_fingerprint(%Repo{storage: {mod, _}} = repo) do
900
if function_exported?(mod, :blob_fingerprint, 3) do
901
case Repo.storage_call(repo, :blob_fingerprint, [Graph.blob_key()]) do
902
{:ok, fp} -> fp
903
{:error, _} -> Graph.Cache.no_fingerprint()
904
end
905
else
906
Graph.Cache.no_fingerprint()
907
end
908
end
909
910
# -- Ref update --
911
912
@doc """
913
Update a branch ref, optionally with compare-and-swap.
914
915
If `expected_old_sha` is `nil` (default), the update is unconditional.
916
If provided, the update succeeds only if the ref currently equals
917
`expected_old_sha`; otherwise the storage backend returns `:cas_failed`.
918
919
Creates the branch if it doesn't exist (when `expected_old_sha` is `nil`).
920
921
## Example
922
923
# unconditional
924
:ok = ExGitObjectstore.update_branch(repo, "main", new_sha)
925
926
# CAS — fails with {:error, :cas_failed} if ref has moved
927
case ExGitObjectstore.update_branch(repo, "main", new_sha, observed_sha) do
928
:ok -> ...
929
{:error, :cas_failed} -> # concurrent push, retry or abort
930
end
931
"""
932
@spec update_branch(Repo.t(), String.t(), sha(), sha() | nil) :: :ok | {:error, term()}
933
def update_branch(%Repo{} = repo, name, new_sha, expected_old_sha \\ nil)
934
when is_binary(name) and is_binary(new_sha) do
935
Ref.put(repo, "refs/heads/#{name}", new_sha, expected_old_sha)
936
end
937
938
# -- Identity helpers (public) --
939
940
@doc """
941
Format an identity as a git wire-format string.
942
943
A raw string input passes through unchanged — this lets callers that
944
already have a formatted identity (e.g. reading a commit's `:author`
945
field) pipe it through a uniform API.
946
947
## Examples
948
949
iex> ExGitObjectstore.format_identity(%{
950
...> name: "Alice", email: "a@x.com", when: ~U[2026-01-01 00:00:00Z]
951
...> })
952
"Alice <a@x.com> 1767225600 +0000"
953
954
iex> ExGitObjectstore.format_identity("Alice <a@x.com> 1767225600 +0000")
955
"Alice <a@x.com> 1767225600 +0000"
956
"""
957
@spec format_identity(identity_or_raw()) :: String.t()
958
def format_identity(value), do: do_format_identity(value)
959
960
@doc """
961
Parse a git wire-format identity string back into a structured identity.
962
963
Returns `{:ok, %{name, email, when}}` or `{:error, :invalid_identity}`.
964
965
The returned `DateTime` preserves the original timezone offset; a `+0000`
966
input produces a UTC DateTime, and non-zero offsets are represented via
967
`utc_offset` on the DateTime struct.
968
"""
969
@spec parse_identity(String.t()) :: {:ok, identity()} | {:error, :invalid_identity}
970
def parse_identity(str) when is_binary(str) do
971
case Regex.run(~r/^(.+?)\s+<([^>]*)>\s+(\d+)\s+([+-])(\d{2})(\d{2})$/, str) do
972
[_, name, email, ts_str, sign, hh_str, mm_str] ->
973
unix = String.to_integer(ts_str)
974
hours = String.to_integer(hh_str)
975
minutes = String.to_integer(mm_str)
976
offset_sec = (hours * 3600 + minutes * 60) * if(sign == "-", do: -1, else: 1)
977
978
dt =
979
unix
980
|> DateTime.from_unix!()
981
|> Map.merge(%{
982
utc_offset: offset_sec,
983
std_offset: 0,
984
zone_abbr: "UTC",
985
time_zone: "Etc/UTC"
986
})
987
988
{:ok, %{name: name, email: email, when: dt}}
989
990
_ ->
991
{:error, :invalid_identity}
992
end
993
end
994
995
# -- Option fetching & validation helpers --
996
997
defp fetch_required(opts, key) do
998
case Keyword.fetch(opts, key) do
999
{:ok, value} -> {:ok, value}
1000
:error -> {:error, {:missing_option, key}}
1001
end
1002
end
1003
1004
defp read_commit(repo, sha) do
1005
case cat_object(repo, sha) do
1006
{:ok, %Commit{} = c} -> {:ok, c}
1007
{:ok, _} -> {:error, {:not_a_commit, sha}}
1008
{:error, _} = err -> err
1009
end
1010
end
1011
1012
defp cherry_pick_parent(%Commit{parents: []}, _sha), do: {:error, :cannot_cherry_pick_root}
1013
defp cherry_pick_parent(%Commit{parents: [p]}, _sha), do: {:ok, p}
1014
1015
defp cherry_pick_parent(%Commit{parents: [_ | _]}, sha),
1016
do: {:error, {:merge_commit_needs_mainline, sha}}
1017
1018
# Pattern-match wrapper so the public API and internal callers go through
1019
# a single code path — raw strings pass through, maps get formatted.
1020
defp do_format_identity(str) when is_binary(str), do: str
1021
1022
defp do_format_identity(%{name: name, email: email, when: %DateTime{} = dt})
1023
when is_binary(name) and is_binary(email) do
1024
timestamp = DateTime.to_unix(dt)
1025
tz = format_tz_offset(dt)
1026
"#{name} <#{email}> #{timestamp} #{tz}"
1027
end
1028
1029
defp format_tz_offset(%DateTime{utc_offset: utc, std_offset: std}) do
1030
total = utc + std
1031
sign = if total >= 0, do: "+", else: "-"
1032
abs_total = abs(total)
1033
hours = div(abs_total, 3600)
1034
minutes = rem(div(abs_total, 60), 60)
1035
1036
[sign, pad2(hours), pad2(minutes)] |> IO.iodata_to_binary()
1037
end
1038
1039
defp pad2(n) when n < 10, do: "0#{n}"
1040
defp pad2(n), do: Integer.to_string(n)
1041
1042
defp ensure_trailing_newline(message) when is_binary(message) do
1043
if String.ends_with?(message, "\n"), do: message, else: message <> "\n"
1044
end
1045
1046
defp validate_tree_exists(repo, sha) do
1047
case cat_object(repo, sha) do
1048
{:ok, %Tree{}} -> :ok
1049
{:ok, _} -> {:error, {:not_a_tree, sha}}
1050
{:error, _} -> {:error, {:missing_tree, sha}}
1051
end
1052
end
1053
1054
defp validate_parents_exist(_repo, []), do: :ok
1055
1056
defp validate_parents_exist(repo, parents) when is_list(parents) do
1057
Enum.reduce_while(parents, :ok, fn sha, :ok ->
1058
case cat_object(repo, sha) do
1059
{:ok, %Commit{}} -> {:cont, :ok}
1060
{:ok, _} -> {:halt, {:error, {:not_a_commit_parent, sha}}}
1061
{:error, _} -> {:halt, {:error, {:missing_parent, sha}}}
1062
end
1063
end)
1064
end
1065
1066
# Walk into nested tree directories by path components
1067
defp walk_tree_path(_repo, tree, []), do: {:ok, tree}
1068
1069
defp walk_tree_path(repo, %Tree{entries: entries}, [component | rest]) do
1070
case Enum.find(entries, &(&1.name == component)) do
1071
nil ->
1072
{:error, {:path_not_found, component}}
1073
1074
%{mode: "40000", sha: sha} ->
1075
case cat_object(repo, sha) do
1076
{:ok, %Tree{} = subtree} -> walk_tree_path(repo, subtree, rest)
1077
{:error, _} = err -> err
1078
end
1079
1080
%{sha: sha} when rest == [] ->
1081
# Leaf entry — return the blob/tree
1082
cat_object(repo, sha)
1083
1084
_ ->
1085
{:error, {:not_a_directory, component}}
1086
end
1087
end
1088
end
1089
1090