Skip to content

Commit a424ba1

Browse files
dcantahfidencio
andcommitted
Linux: Make copyFile sparse aware
Today the Linux impl just uses io.Copy which does not handle sparse files, so they would be expanded to their full size. Co-authored-by: Fabiano Fidêncio <ffidencio@nvidia.com> Signed-off-by: Danny Canter <danny@dcantah.dev>
1 parent cad00c0 commit a424ba1

3 files changed

Lines changed: 215 additions & 1 deletion

File tree

fs/copy_linux.go

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,122 @@ import (
2626
"golang.org/x/sys/unix"
2727
)
2828

29+
// maxCopyChunk is the maximum size passed to copy_file_range per call,
30+
// avoiding int overflow on 32-bit architectures.
31+
const maxCopyChunk = 1 << 30 // 1 GiB
32+
33+
// copyFile copies a file from source to target preserving sparse file holes.
34+
//
35+
// If the filesystem does not support SEEK_DATA/SEEK_HOLE, it falls back
36+
// to a plain io.Copy.
37+
func copyFile(target, source string) error {
38+
src, err := os.Open(source)
39+
if err != nil {
40+
return fmt.Errorf("failed to open source %s: %w", source, err)
41+
}
42+
defer src.Close()
43+
44+
fi, err := src.Stat()
45+
if err != nil {
46+
return fmt.Errorf("failed to stat source %s: %w", source, err)
47+
}
48+
size := fi.Size()
49+
50+
tgt, err := os.Create(target)
51+
if err != nil {
52+
return fmt.Errorf("failed to open target %s: %w", target, err)
53+
}
54+
defer tgt.Close()
55+
56+
if err := tgt.Truncate(size); err != nil {
57+
return fmt.Errorf("failed to truncate target %s: %w", target, err)
58+
}
59+
60+
srcFd := int(src.Fd())
61+
62+
// Try a SEEK_DATA to check if the filesystem supports it.
63+
// If not, fall back to a plain copy.
64+
if _, err := unix.Seek(srcFd, 0, unix.SEEK_DATA); err != nil {
65+
// ENXIO means no data in the file at all. In other words it's entirely sparse.
66+
// The truncated target is already correct.
67+
if errors.Is(err, syscall.ENXIO) {
68+
return nil
69+
}
70+
71+
if errors.Is(err, syscall.EOPNOTSUPP) || errors.Is(err, syscall.ENOTSUP) || errors.Is(err, syscall.EINVAL) {
72+
// Filesystem doesn't support SEEK_DATA/SEEK_HOLE. Fall back to a plain copy.
73+
src.Close()
74+
tgt.Close()
75+
return openAndCopyFile(target, source)
76+
}
77+
78+
return fmt.Errorf("failed to seek data in source %s: %w", source, err)
79+
}
80+
81+
// Copy data regions from source to target, skipping holes.
82+
var offset int64
83+
tgtFd := int(tgt.Fd())
84+
85+
for offset < size {
86+
dataStart, err := unix.Seek(srcFd, offset, unix.SEEK_DATA)
87+
if err != nil {
88+
// No more data past offset. Remainder of file is a hole.
89+
if errors.Is(err, syscall.ENXIO) {
90+
break
91+
}
92+
return fmt.Errorf("SEEK_DATA failed at offset %d: %w", offset, err)
93+
}
94+
95+
// Find the end of this data region (start of next hole).
96+
holeStart, err := unix.Seek(srcFd, dataStart, unix.SEEK_HOLE)
97+
if err != nil {
98+
// ENXIO shouldn't happen after a successful SEEK_DATA, but
99+
// treat it as data extending to end of file.
100+
if errors.Is(err, syscall.ENXIO) {
101+
holeStart = size
102+
} else {
103+
return fmt.Errorf("SEEK_HOLE failed at offset %d: %w", dataStart, err)
104+
}
105+
}
106+
107+
// Copy the data region [dataStart, holeStart).
108+
srcOff := dataStart
109+
tgtOff := dataStart
110+
remain := holeStart - dataStart
111+
112+
for remain > 0 {
113+
chunk := remain
114+
if chunk > maxCopyChunk {
115+
chunk = maxCopyChunk
116+
}
117+
118+
n, err := unix.CopyFileRange(srcFd, &srcOff, tgtFd, &tgtOff, int(chunk), 0)
119+
if err != nil {
120+
// Fall back to a plain copy if copy_file_range is not supported
121+
// across the source and target filesystems.
122+
if errors.Is(err, syscall.EXDEV) || errors.Is(err, syscall.ENOSYS) || errors.Is(err, syscall.EOPNOTSUPP) {
123+
src.Close()
124+
tgt.Close()
125+
return openAndCopyFile(target, source)
126+
}
127+
return fmt.Errorf("copy_file_range failed: %w", err)
128+
}
129+
if n == 0 {
130+
return fmt.Errorf("copy_file_range returned 0 with %d bytes remaining", remain)
131+
}
132+
remain -= int64(n)
133+
}
134+
135+
offset = holeStart
136+
}
137+
138+
if err := tgt.Sync(); err != nil {
139+
return fmt.Errorf("failed to sync target %s: %w", target, err)
140+
}
141+
142+
return nil
143+
}
144+
29145
func copyFileInfo(fi os.FileInfo, src, name string) error {
30146
st := fi.Sys().(*syscall.Stat_t)
31147
if err := os.Lchown(name, int(st.Uid), int(st.Gid)); err != nil {

fs/copy_linux_test.go

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,17 +19,115 @@
1919
package fs
2020

2121
import (
22+
"bytes"
2223
"io"
2324
"math/rand"
2425
"os"
2526
"os/exec"
2627
"path/filepath"
28+
"syscall"
2729
"testing"
2830

2931
"github.com/containerd/continuity/testutil"
3032
"github.com/containerd/continuity/testutil/loopback"
3133
)
3234

35+
func TestCopyFileSparse(t *testing.T) {
36+
dir := t.TempDir()
37+
38+
type testCase struct {
39+
name string
40+
// parts alternates: data length, hole length, data length, ...
41+
// A 0 data length at the start means the file begins with a hole.
42+
parts []int64
43+
}
44+
45+
tests := []testCase{
46+
{
47+
name: "DataHoleData",
48+
parts: []int64{4096, 1024 * 1024, 4096},
49+
},
50+
{
51+
name: "HoleOnly",
52+
parts: []int64{0, 1024 * 1024, 1},
53+
},
54+
{
55+
name: "HoleAtStart",
56+
parts: []int64{0, 1024 * 1024, 4096},
57+
},
58+
{
59+
name: "HoleAtEnd",
60+
parts: []int64{4096, 1024 * 1024},
61+
},
62+
{
63+
name: "MultipleHoles",
64+
parts: []int64{4096, 512 * 1024, 4096, 512 * 1024, 4096},
65+
},
66+
{
67+
name: "NoHoles",
68+
parts: []int64{64 * 1024},
69+
},
70+
}
71+
72+
for _, tc := range tests {
73+
t.Run(tc.name, func(t *testing.T) {
74+
srcPath := filepath.Join(dir, tc.name+"-src")
75+
dstPath := filepath.Join(dir, tc.name+"-dst")
76+
77+
applier := createSparseFile(tc.name+"-src", 42, 0o644, tc.parts...)
78+
if err := applier.Apply(dir); err != nil {
79+
t.Fatal(err)
80+
}
81+
82+
if err := CopyFile(dstPath, srcPath); err != nil {
83+
t.Fatalf("CopyFile failed: %v", err)
84+
}
85+
86+
// Verify content matches exactly.
87+
srcData, err := os.ReadFile(srcPath)
88+
if err != nil {
89+
t.Fatal(err)
90+
}
91+
dstData, err := os.ReadFile(dstPath)
92+
if err != nil {
93+
t.Fatal(err)
94+
}
95+
if !bytes.Equal(srcData, dstData) {
96+
t.Fatal("source and destination file contents differ")
97+
}
98+
99+
// Verify sparseness is preserved: destination should not use
100+
// significantly more blocks than the source.
101+
srcStat, err := os.Stat(srcPath)
102+
if err != nil {
103+
t.Fatal(err)
104+
}
105+
dstStat, err := os.Stat(dstPath)
106+
if err != nil {
107+
t.Fatal(err)
108+
}
109+
110+
srcBlocks := srcStat.Sys().(*syscall.Stat_t).Blocks
111+
dstBlocks := dstStat.Sys().(*syscall.Stat_t).Blocks
112+
113+
t.Logf("src size=%d blocks=%d, dst size=%d blocks=%d",
114+
srcStat.Size(), srcBlocks, dstStat.Size(), dstBlocks)
115+
116+
if srcStat.Size() != dstStat.Size() {
117+
t.Fatalf("size mismatch: src=%d dst=%d", srcStat.Size(), dstStat.Size())
118+
}
119+
120+
// Allow some slack for filesystem metadata, but destination
121+
// should not use more than 10% extra blocks.
122+
maxBlocks := srcBlocks + srcBlocks/10 + 8
123+
if dstBlocks > maxBlocks {
124+
t.Fatalf("destination is not sparse: src blocks=%d, dst blocks=%d (max allowed=%d)",
125+
srcBlocks, dstBlocks, maxBlocks)
126+
}
127+
})
128+
}
129+
}
130+
33131
func TestCopyReflinkWithXFS(t *testing.T) {
34132
testutil.RequiresRoot(t)
35133
mnt := t.TempDir()

fs/copy_nondarwin.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//go:build !darwin
1+
//go:build !darwin && !linux
22

33
/*
44
Copyright The containerd Authors.

0 commit comments

Comments
 (0)