Skip to content

Commit dc0f91a

Browse files
committed
feat: add LFS snapshot support to git strategy
When lfs-snapshot-enabled is true, cachew generates a separate LFS object snapshot (lfs-snapshot.tar.zst) alongside the regular git snapshot. This archives .git/lfs/objects/ after running git lfs fetch, and serves it at GET /git/{repo}/lfs-snapshot.tar.zst. Adds snapshot.CreateSubdir for archiving a named subdirectory with its path prefix preserved in the tar. LFS snapshot jobs are scheduled on both startup discovery and first clone of new repos.
1 parent 75c122b commit dc0f91a

7 files changed

Lines changed: 257 additions & 72 deletions

File tree

docker/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ ARG TARGETARCH
66
SHELL ["/bin/sh", "-o", "pipefail", "-c"]
77

88
# Install runtime dependencies for git operations and TLS
9-
RUN apk add --no-cache ca-certificates curl git git-daemon tzdata zstd && \
9+
RUN apk add --no-cache ca-certificates curl git git-daemon git-lfs tzdata zstd && \
1010
addgroup -g 1000 cachew && \
1111
adduser -D -u 1000 -G cachew cachew
1212

internal/gitclone/command.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@ import (
1111
"github.com/alecthomas/errors"
1212
)
1313

14-
func (r *Repository) gitCommand(ctx context.Context, args ...string) (*exec.Cmd, error) {
14+
// GitCommand returns a git subprocess configured with repository-scoped
15+
// authentication and any per-URL git config overrides disabled.
16+
func (r *Repository) GitCommand(ctx context.Context, args ...string) (*exec.Cmd, error) {
1517
repoURL := r.upstreamURL
1618
var token string
1719
if r.credentialProvider != nil && strings.Contains(repoURL, "github.com") {

internal/gitclone/command_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ func TestGitCommand(t *testing.T) {
5151
credentialProvider: nil,
5252
}
5353

54-
cmd, err := repo.gitCommand(ctx, "version")
54+
cmd, err := repo.GitCommand(ctx, "version")
5555
assert.NoError(t, err)
5656

5757
assert.NotZero(t, cmd)
@@ -70,7 +70,7 @@ func TestGitCommandWithEmptyURL(t *testing.T) {
7070
credentialProvider: nil,
7171
}
7272

73-
cmd, err := repo.gitCommand(ctx, "version")
73+
cmd, err := repo.GitCommand(ctx, "version")
7474
assert.NoError(t, err)
7575

7676
assert.NotZero(t, cmd)
@@ -124,7 +124,7 @@ func TestGitCommandWithCredentialProvider(t *testing.T) {
124124
},
125125
}
126126

127-
cmd, err := repo.gitCommand(ctx, "version")
127+
cmd, err := repo.GitCommand(ctx, "version")
128128
assert.NoError(t, err)
129129
assert.NotZero(t, cmd)
130130

internal/gitclone/manager.go

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -502,7 +502,7 @@ func (r *Repository) executeClone(ctx context.Context) error {
502502
r.upstreamURL, r.path,
503503
}
504504

505-
cmd, err := r.gitCommand(cloneCtx, args...)
505+
cmd, err := r.GitCommand(cloneCtx, args...)
506506
if err != nil {
507507
return errors.Wrap(err, "create git command")
508508
}
@@ -583,8 +583,7 @@ func (r *Repository) fetchInternal(ctx context.Context, timeout time.Duration, e
583583
}
584584
args = append(args, "fetch", "--prune", "--prune-tags")
585585

586-
// #nosec G204 - r.path is controlled by us
587-
cmd, err := r.gitCommand(fetchCtx, args...)
586+
cmd, err := r.GitCommand(fetchCtx, args...)
588587
if err != nil {
589588
return errors.Wrap(err, "create git command")
590589
}
@@ -682,7 +681,7 @@ func (r *Repository) GetLocalRefs(ctx context.Context) (map[string]string, error
682681

683682
func (r *Repository) GetUpstreamRefs(ctx context.Context) (map[string]string, error) {
684683
// #nosec G204 - r.upstreamURL is controlled by us
685-
cmd, err := r.gitCommand(ctx, "ls-remote", r.upstreamURL)
684+
cmd, err := r.GitCommand(ctx, "ls-remote", r.upstreamURL)
686685
if err != nil {
687686
return nil, errors.Wrap(err, "create git command")
688687
}

internal/snapshot/snapshot.go

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,31 +25,54 @@ import (
2525
// Exclude patterns use tar's --exclude syntax.
2626
// threads controls zstd parallelism; 0 uses all available CPU cores.
2727
func Create(ctx context.Context, remote cache.Cache, key cache.Key, directory string, ttl time.Duration, excludePatterns []string, threads int) error {
28+
return CreatePaths(ctx, remote, key, directory, filepath.Base(directory), []string{"."}, ttl, excludePatterns, threads)
29+
}
30+
31+
// CreatePaths archives named paths within baseDir using tar with zstd compression,
32+
// then uploads the resulting archive to the cache.
33+
//
34+
// The archive preserves all file permissions, ownership, and symlinks.
35+
// Each entry in includePaths is archived relative to baseDir and must exist.
36+
// This allows callers to archive either an entire directory with "." or a
37+
// specific subtree such as "lfs" while preserving that relative path prefix.
38+
// Exclude patterns use tar's --exclude syntax.
39+
// threads controls zstd parallelism; 0 uses all available CPU cores.
40+
func CreatePaths(ctx context.Context, remote cache.Cache, key cache.Key, baseDir, archiveName string, includePaths []string, ttl time.Duration, excludePatterns []string, threads int) error {
2841
if threads <= 0 {
2942
threads = runtime.NumCPU()
3043
}
3144

32-
// Verify directory exists
33-
if info, err := os.Stat(directory); err != nil {
34-
return errors.Wrap(err, "failed to stat directory")
45+
if len(includePaths) == 0 {
46+
return errors.New("includePaths must not be empty")
47+
}
48+
49+
if info, err := os.Stat(baseDir); err != nil {
50+
return errors.Wrap(err, "failed to stat base directory")
3551
} else if !info.IsDir() {
36-
return errors.Errorf("not a directory: %s", directory)
52+
return errors.Errorf("not a directory: %s", baseDir)
53+
}
54+
for _, path := range includePaths {
55+
targetPath := filepath.Join(baseDir, path)
56+
if _, err := os.Stat(targetPath); err != nil {
57+
return errors.Wrapf(err, "failed to stat include path %q", path)
58+
}
3759
}
3860

3961
headers := make(http.Header)
4062
headers.Set("Content-Type", "application/zstd")
41-
headers.Set("Content-Disposition", fmt.Sprintf("attachment; filename=%q", filepath.Base(directory)+".tar.zst"))
63+
headers.Set("Content-Disposition", fmt.Sprintf("attachment; filename=%q", archiveName+".tar.zst"))
4264

4365
wc, err := remote.Create(ctx, key, headers, ttl)
4466
if err != nil {
4567
return errors.Wrap(err, "failed to create object")
4668
}
4769

48-
tarArgs := []string{"-cpf", "-", "-C", directory}
70+
tarArgs := []string{"-cpf", "-", "-C", baseDir}
4971
for _, pattern := range excludePatterns {
5072
tarArgs = append(tarArgs, "--exclude", pattern)
5173
}
52-
tarArgs = append(tarArgs, ".")
74+
tarArgs = append(tarArgs, "--")
75+
tarArgs = append(tarArgs, includePaths...)
5376

5477
tarCmd := exec.CommandContext(ctx, "tar", tarArgs...)
5578
zstdCmd := exec.CommandContext(ctx, "zstd", "-c", fmt.Sprintf("-T%d", threads)) //nolint:gosec // threads is a validated integer, not user input

internal/strategy/git/git.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ type Config struct {
3838
SnapshotInterval time.Duration `hcl:"snapshot-interval,optional" help:"How often to generate tar.zstd workstation snapshots. 0 disables snapshots." default:"0"`
3939
MirrorSnapshotInterval time.Duration `hcl:"mirror-snapshot-interval,optional" help:"How often to generate mirror snapshots for pod bootstrap. 0 uses snapshot-interval. Defaults to 2h." default:"2h"`
4040
RepackInterval time.Duration `hcl:"repack-interval,optional" help:"How often to run full repack. 0 disables." default:"0"`
41+
LFSSnapshotEnabled bool `hcl:"lfs-snapshot-enabled,optional" help:"When true, also generate a separate LFS object snapshot at /git/{repo}/lfs-snapshot.tar.zst on each snapshot interval. Requires git-lfs and a configured GitHub App." default:"false"`
4142
// ServerURL is embedded as remote.origin.url in snapshots so git pull goes through cachew.
4243
ServerURL string `hcl:"server-url,optional" help:"Base URL of this cachew instance, embedded in snapshot remote URLs." default:"${CACHEW_URL}"`
4344
ZstdThreads int `hcl:"zstd-threads,optional" help:"Threads for zstd compression/decompression (0 = all CPU cores)." default:"0"`
@@ -151,6 +152,9 @@ func New(
151152

152153
if s.config.SnapshotInterval > 0 {
153154
s.scheduleSnapshotJobs(repo)
155+
if s.config.LFSSnapshotEnabled {
156+
s.scheduleLFSSnapshotJobs(repo)
157+
}
154158
}
155159
if s.config.RepackInterval > 0 {
156160
s.scheduleRepackJobs(repo)
@@ -219,6 +223,11 @@ func (s *Strategy) handleRequest(w http.ResponseWriter, r *http.Request) {
219223
return
220224
}
221225

226+
if strings.HasSuffix(pathValue, "/lfs-snapshot.tar.zst") {
227+
s.handleLFSSnapshotRequest(w, r, host, pathValue)
228+
return
229+
}
230+
222231
service := r.URL.Query().Get("service")
223232
isReceivePack := service == "git-receive-pack" || strings.HasSuffix(pathValue, "/git-receive-pack")
224233

@@ -497,6 +506,9 @@ func (s *Strategy) startClone(ctx context.Context, repo *gitclone.Repository) {
497506

498507
if s.config.SnapshotInterval > 0 {
499508
s.scheduleSnapshotJobs(repo)
509+
if s.config.LFSSnapshotEnabled {
510+
s.scheduleLFSSnapshotJobs(repo)
511+
}
500512
}
501513
if s.config.RepackInterval > 0 {
502514
s.scheduleRepackJobs(repo)
@@ -524,6 +536,9 @@ func (s *Strategy) startClone(ctx context.Context, repo *gitclone.Repository) {
524536

525537
if s.config.SnapshotInterval > 0 {
526538
s.scheduleSnapshotJobs(repo)
539+
if s.config.LFSSnapshotEnabled {
540+
s.scheduleLFSSnapshotJobs(repo)
541+
}
527542
}
528543
if s.config.RepackInterval > 0 {
529544
s.scheduleRepackJobs(repo)

0 commit comments

Comments
 (0)