Skip to content

Commit 5f3f9b5

Browse files
authored
Merge pull request #2581 from micromaomao/tingmao/msrc-1
Merge various fixes for C-LCOW
2 parents 5a0252a + be72bb7 commit 5f3f9b5

16 files changed

Lines changed: 1779 additions & 180 deletions

internal/guest/runtime/hcsv2/uvm.go

Lines changed: 148 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"os/exec"
1515
"path"
1616
"path/filepath"
17+
"regexp"
1718
"strings"
1819
"sync"
1920
"syscall"
@@ -40,6 +41,7 @@ import (
4041
"github.com/Microsoft/hcsshim/internal/guest/storage/pmem"
4142
"github.com/Microsoft/hcsshim/internal/guest/storage/scsi"
4243
"github.com/Microsoft/hcsshim/internal/guest/transport"
44+
"github.com/Microsoft/hcsshim/internal/guestpath"
4345
"github.com/Microsoft/hcsshim/internal/log"
4446
"github.com/Microsoft/hcsshim/internal/logfields"
4547
"github.com/Microsoft/hcsshim/internal/oci"
@@ -54,6 +56,23 @@ import (
5456
// for V2 where the specific message is targeted at the UVM itself.
5557
const UVMContainerID = "00000000-0000-0000-0000-000000000000"
5658

59+
// Prevent path traversal via malformed container / sandbox IDs. Container IDs
60+
// can be either UVMContainerID, or a 64 character hex string. This is also used
61+
// to check that sandbox IDs (which is also used in paths) are valid, which has
62+
// the same format.
63+
const validContainerIDRegexRaw = `[0-9a-fA-F]{64}`
64+
65+
var validContainerIDRegex = regexp.MustCompile("^" + validContainerIDRegexRaw + "$")
66+
67+
// idType just changes the error message
68+
func checkValidContainerID(id string, idType string) error {
69+
if id == UVMContainerID || validContainerIDRegex.MatchString(id) {
70+
return nil
71+
}
72+
73+
return errors.Errorf("invalid %s id: %s (must match %s)", idType, id, validContainerIDRegex.String())
74+
}
75+
5776
// VirtualPod represents a virtual pod that shares a UVM/Sandbox with other pods
5877
type VirtualPod struct {
5978
VirtualSandboxID string
@@ -245,12 +264,68 @@ func setupSandboxLogDir(sandboxID, virtualSandboxID string) error {
245264
// TODO: unify workload and standalone logic for non-sandbox features (e.g., block devices, huge pages, uVM mounts)
246265
// TODO(go1.24): use [os.Root] instead of `!strings.HasPrefix(<path>, <root>)`
247266

267+
// Returns whether this host has a security policy set, i.e. if it's running
268+
// confidential containers.
269+
func (h *Host) HasSecurityPolicy() bool {
270+
return len(h.securityOptions.PolicyEnforcer.EncodedSecurityPolicy()) > 0
271+
}
272+
273+
// For confidential containers, make sure that the host can't use unexpected
274+
// bundle paths / scratch dir / rootfs
275+
func checkContainerSettings(sandboxID, containerID string, settings *prot.VMHostedContainerSettingsV2) error {
276+
if settings.OCISpecification == nil {
277+
return errors.Errorf("OCISpecification is nil")
278+
}
279+
if settings.OCISpecification.Root == nil {
280+
return errors.Errorf("OCISpecification.Root is nil")
281+
}
282+
283+
// matches with CreateContainer / createLinuxContainerDocument in internal/hcsoci
284+
containerRootInUVM := path.Join(guestpath.LCOWRootPrefixInUVM, containerID)
285+
if settings.OCIBundlePath != containerRootInUVM {
286+
return errors.Errorf("OCIBundlePath %q must equal expected %q",
287+
settings.OCIBundlePath, containerRootInUVM)
288+
}
289+
expectedContainerRootfs := path.Join(containerRootInUVM, guestpath.RootfsPath)
290+
if settings.OCISpecification.Root.Path != expectedContainerRootfs {
291+
return errors.Errorf("OCISpecification.Root.Path %q must equal expected %q",
292+
settings.OCISpecification.Root.Path, expectedContainerRootfs)
293+
}
294+
295+
// matches with MountLCOWLayers
296+
scratchDirPath := settings.ScratchDirPath
297+
expectedScratchDirPathNonShared := path.Join(containerRootInUVM, guestpath.ScratchDir, containerID)
298+
expectedScratchDirPathShared := path.Join(guestpath.LCOWRootPrefixInUVM, sandboxID, guestpath.ScratchDir, containerID)
299+
if scratchDirPath != expectedScratchDirPathNonShared &&
300+
scratchDirPath != expectedScratchDirPathShared {
301+
return errors.Errorf("ScratchDirPath %q must be either %q or %q",
302+
scratchDirPath, expectedScratchDirPathNonShared, expectedScratchDirPathShared)
303+
}
304+
305+
if settings.OCISpecification.Hooks != nil {
306+
return errors.Errorf("OCISpecification.Hooks must be nil.")
307+
}
308+
309+
return nil
310+
}
311+
248312
func (h *Host) CreateContainer(ctx context.Context, id string, settings *prot.VMHostedContainerSettingsV2) (_ *Container, err error) {
249313
criType, isCRI := settings.OCISpecification.Annotations[annotations.KubernetesContainerType]
250314

251315
// Check for virtual pod annotation
252316
virtualPodID, isVirtualPod := settings.OCISpecification.Annotations[annotations.VirtualPodID]
253317

318+
if h.HasSecurityPolicy() {
319+
if err = checkValidContainerID(id, "container"); err != nil {
320+
return nil, err
321+
}
322+
if virtualPodID != "" {
323+
if err = checkValidContainerID(virtualPodID, "virtual pod"); err != nil {
324+
return nil, err
325+
}
326+
}
327+
}
328+
254329
// Special handling for virtual pod sandbox containers:
255330
// The first container in a virtual pod (containerID == virtualPodID) should be treated as a sandbox
256331
// even if the CRI annotation might indicate otherwise due to host-side UVM setup differences
@@ -374,6 +449,11 @@ func (h *Host) CreateContainer(ctx context.Context, id string, settings *prot.VM
374449
case "container":
375450
sid, ok := settings.OCISpecification.Annotations[annotations.KubernetesSandboxID]
376451
sandboxID = sid
452+
if h.HasSecurityPolicy() {
453+
if err = checkValidContainerID(sid, "sandbox"); err != nil {
454+
return nil, err
455+
}
456+
}
377457
if !ok || sid == "" {
378458
return nil, errors.Errorf("unsupported 'io.kubernetes.cri.sandbox-id': '%s'", sid)
379459
}
@@ -383,7 +463,7 @@ func (h *Host) CreateContainer(ctx context.Context, id string, settings *prot.VM
383463

384464
// Add SEV device when security policy is not empty, except when privileged annotation is
385465
// set to "true", in which case all UVMs devices are added.
386-
if len(h.securityOptions.PolicyEnforcer.EncodedSecurityPolicy()) > 0 && !oci.ParseAnnotationsBool(ctx,
466+
if h.HasSecurityPolicy() && !oci.ParseAnnotationsBool(ctx,
387467
settings.OCISpecification.Annotations, annotations.LCOWPrivileged, false) {
388468
if err := specGuest.AddDevSev(ctx, settings.OCISpecification); err != nil {
389469
log.G(ctx).WithError(err).Debug("failed to add SEV device")
@@ -429,6 +509,12 @@ func (h *Host) CreateContainer(ctx context.Context, id string, settings *prot.VM
429509
})
430510
}
431511

512+
if h.HasSecurityPolicy() {
513+
if err = checkContainerSettings(sandboxID, id, settings); err != nil {
514+
return nil, err
515+
}
516+
}
517+
432518
user, groups, umask, err := h.securityOptions.PolicyEnforcer.GetUserInfo(settings.OCISpecification.Process, settings.OCISpecification.Root.Path)
433519
if err != nil {
434520
return nil, err
@@ -586,6 +672,12 @@ func writeSpecToFile(ctx context.Context, configFile string, spec *specs.Spec) e
586672
}
587673

588674
func (h *Host) modifyHostSettings(ctx context.Context, containerID string, req *guestrequest.ModificationRequest) (retErr error) {
675+
if h.HasSecurityPolicy() {
676+
if err := checkValidContainerID(containerID, "container"); err != nil {
677+
return err
678+
}
679+
}
680+
589681
switch req.ResourceType {
590682
case guestresource.ResourceTypeSCSIDevice:
591683
return modifySCSIDevice(ctx, req.RequestType, req.Settings.(*guestresource.SCSIDevice))
@@ -670,6 +762,12 @@ func (h *Host) modifyHostSettings(ctx context.Context, containerID string, req *
670762
}
671763

672764
func (h *Host) modifyContainerSettings(ctx context.Context, containerID string, req *guestrequest.ModificationRequest) error {
765+
if h.HasSecurityPolicy() {
766+
if err := checkValidContainerID(containerID, "container"); err != nil {
767+
return err
768+
}
769+
}
770+
673771
c, err := h.GetCreatedContainer(containerID)
674772
if err != nil {
675773
return err
@@ -1041,6 +1139,9 @@ func modifyMappedVirtualDisk(
10411139
if err != nil {
10421140
return err
10431141
}
1142+
if mvd.Filesystem != "" && mvd.Filesystem != "ext4" {
1143+
return errors.Errorf("filesystem must be ext4 for read-only scsi mounts")
1144+
}
10441145
}
10451146
}
10461147
switch rt {
@@ -1057,6 +1158,11 @@ func modifyMappedVirtualDisk(
10571158
if err != nil {
10581159
return errors.Wrapf(err, "mounting scsi device controller %d lun %d onto %s denied by policy", mvd.Controller, mvd.Lun, mvd.MountPath)
10591160
}
1161+
} else {
1162+
err = securityPolicy.EnforceRWDeviceMountPolicy(ctx, mvd.MountPath, mvd.Encrypted, mvd.EnsureFilesystem, mvd.Filesystem)
1163+
if err != nil {
1164+
return errors.Wrapf(err, "mounting scsi device controller %d lun %d onto %s denied by policy", mvd.Controller, mvd.Lun, mvd.MountPath)
1165+
}
10601166
}
10611167
config := &scsi.Config{
10621168
Encrypted: mvd.Encrypted,
@@ -1075,6 +1181,10 @@ func modifyMappedVirtualDisk(
10751181
if err := securityPolicy.EnforceDeviceUnmountPolicy(ctx, mvd.MountPath); err != nil {
10761182
return fmt.Errorf("unmounting scsi device at %s denied by policy: %w", mvd.MountPath, err)
10771183
}
1184+
} else {
1185+
if err := securityPolicy.EnforceRWDeviceUnmountPolicy(ctx, mvd.MountPath); err != nil {
1186+
return fmt.Errorf("unmounting scsi device at %s denied by policy: %w", mvd.MountPath, err)
1187+
}
10781188
}
10791189
config := &scsi.Config{
10801190
Encrypted: mvd.Encrypted,
@@ -1173,8 +1283,42 @@ func modifyCombinedLayers(
11731283
scratchEncrypted bool,
11741284
securityPolicy securitypolicy.SecurityPolicyEnforcer,
11751285
) (err error) {
1286+
isConfidential := len(securityPolicy.EncodedSecurityPolicy()) > 0
1287+
containerID := cl.ContainerID
1288+
11761289
switch rt {
11771290
case guestrequest.RequestTypeAdd:
1291+
if isConfidential {
1292+
if err := checkValidContainerID(containerID, "container"); err != nil {
1293+
return err
1294+
}
1295+
1296+
// We check this regardless of what the policy says, as long as we're in
1297+
// confidential mode. This matches with checkContainerSettings called for
1298+
// container creation request.
1299+
expectedContainerRootfs := path.Join(guestpath.LCOWRootPrefixInUVM, containerID, guestpath.RootfsPath)
1300+
if cl.ContainerRootPath != expectedContainerRootfs {
1301+
return fmt.Errorf("combined layers target %q does not match expected path %q",
1302+
cl.ContainerRootPath, expectedContainerRootfs)
1303+
}
1304+
1305+
if cl.ScratchPath != "" {
1306+
// At this point, we do not know what the sandbox ID would be yet, so we
1307+
// have to allow anything reasonable.
1308+
scratchDirRegexStr := fmt.Sprintf(
1309+
"^%s/%s/%s/%s$",
1310+
guestpath.LCOWRootPrefixInUVM,
1311+
validContainerIDRegexRaw,
1312+
guestpath.ScratchDir,
1313+
containerID,
1314+
)
1315+
scratchDirRegex := regexp.MustCompile(scratchDirRegexStr)
1316+
if !scratchDirRegex.MatchString(cl.ScratchPath) {
1317+
return fmt.Errorf("scratch path %q must match regex %q",
1318+
cl.ScratchPath, scratchDirRegexStr)
1319+
}
1320+
}
1321+
}
11781322
layerPaths := make([]string, len(cl.Layers))
11791323
for i, layer := range cl.Layers {
11801324
layerPaths[i] = layer.Path
@@ -1195,12 +1339,14 @@ func modifyCombinedLayers(
11951339
}
11961340
}
11971341

1198-
if err := securityPolicy.EnforceOverlayMountPolicy(ctx, cl.ContainerID, layerPaths, cl.ContainerRootPath); err != nil {
1342+
if err := securityPolicy.EnforceOverlayMountPolicy(ctx, containerID, layerPaths, cl.ContainerRootPath); err != nil {
11991343
return fmt.Errorf("overlay creation denied by policy: %w", err)
12001344
}
12011345

12021346
return overlay.MountLayer(ctx, layerPaths, upperdirPath, workdirPath, cl.ContainerRootPath, readonly)
12031347
case guestrequest.RequestTypeRemove:
1348+
// cl.ContainerID is not set on remove requests, but rego checks that we can
1349+
// only umount previously mounted targets anyway
12041350
if err := securityPolicy.EnforceOverlayUnmountPolicy(ctx, cl.ContainerRootPath); err != nil {
12051351
return errors.Wrap(err, "overlay removal denied by policy")
12061352
}

internal/guestpath/paths.go

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,15 +27,17 @@ const (
2727
// LCOWMountPathPrefixFmt is the path format in the LCOW UVM where
2828
// non-global mounts, such as Plan9 mounts are added
2929
LCOWMountPathPrefixFmt = "/mounts/m%d"
30-
// LCOWGlobalMountPrefixFmt is the path format in the LCOW UVM where global
31-
// mounts are added
32-
LCOWGlobalMountPrefixFmt = "/run/mounts/m%d"
30+
// LCOWGlobalScsiMountPrefixFmt is the path format in the LCOW UVM where
31+
// global desk mounts are added
32+
LCOWGlobalScsiMountPrefixFmt = "/run/mounts/scsi/m%d"
3333
// LCOWGlobalDriverPrefixFmt is the path format in the LCOW UVM where drivers
3434
// are mounted as read/write
3535
LCOWGlobalDriverPrefixFmt = "/run/drivers/%s"
36-
// WCOWGlobalMountPrefixFmt is the path prefix format in the WCOW UVM where
37-
// mounts are added
38-
WCOWGlobalMountPrefixFmt = "C:\\mounts\\m%d"
36+
// WCOWGlobalScsiMountPrefixFmt is the path prefix format in the WCOW UVM
37+
// where global desk mounts are added
38+
WCOWGlobalScsiMountPrefixFmt = `c:\mounts\scsi\m%d`
3939
// RootfsPath is part of the container's rootfs path
4040
RootfsPath = "rootfs"
41+
// ScratchDir is the name of the directory used for overlay upper and work
42+
ScratchDir = "scratch"
4143
)

internal/layers/lcow.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,9 @@ func MountLCOWLayers(
159159
// handles the case where we want to share a scratch disk for multiple containers instead
160160
// of mounting a new one. Pass a unique value for `ScratchPath` to avoid container upper and
161161
// work directories colliding in the UVM.
162-
containerScratchPathInUVM := ospath.Join("linux", scsiMount.GuestPath(), "scratch", containerID)
162+
// Note that in the shared scratch case, AddVirtualDisk above is a no-op and
163+
// will return the existing mount.
164+
containerScratchPathInUVM := ospath.Join("linux", scsiMount.GuestPath(), guestpath.ScratchDir, containerID)
163165

164166
defer func() {
165167
if err != nil {

internal/uvm/start.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212

1313
"github.com/Microsoft/hcsshim/internal/gcs"
1414
"github.com/Microsoft/hcsshim/internal/gcs/prot"
15+
"github.com/Microsoft/hcsshim/internal/guestpath"
1516
"github.com/Microsoft/hcsshim/internal/hcs/schema1"
1617
hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2"
1718
"github.com/Microsoft/hcsshim/internal/log"
@@ -247,9 +248,9 @@ func (uvm *UtilityVM) Start(ctx context.Context) (err error) {
247248
} else {
248249
gb = scsi.NewHCSGuestBackend(uvm.hcsSystem, uvm.OS())
249250
}
250-
guestMountFmt := `c:\mounts\scsi\m%d`
251+
guestMountFmt := guestpath.WCOWGlobalScsiMountPrefixFmt
251252
if uvm.OS() == "linux" {
252-
guestMountFmt = "/run/mounts/scsi/m%d"
253+
guestMountFmt = guestpath.LCOWGlobalScsiMountPrefixFmt
253254
}
254255
mgr, err := scsi.NewManager(
255256
scsi.NewHCSHostBackend(uvm.hcsSystem),

pkg/securitypolicy/api.rego

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,25 @@ package api
33
version := "@@API_VERSION@@"
44

55
enforcement_points := {
6-
"mount_device": {"introducedVersion": "0.1.0", "default_results": {"allowed": false}},
7-
"mount_overlay": {"introducedVersion": "0.1.0", "default_results": {"allowed": false}},
8-
"mount_cims": {"introducedVersion": "0.11.0", "default_results": {"allowed": false}},
9-
"registry_changes": {"introducedVersion": "0.10.0", "default_results": {"allowed": false}},
10-
"create_container": {"introducedVersion": "0.1.0", "default_results": {"allowed": false, "env_list": null, "allow_stdio_access": false}},
11-
"unmount_device": {"introducedVersion": "0.2.0", "default_results": {"allowed": true}},
12-
"unmount_overlay": {"introducedVersion": "0.6.0", "default_results": {"allowed": true}},
13-
"exec_in_container": {"introducedVersion": "0.2.0", "default_results": {"allowed": true, "env_list": null}},
14-
"exec_external": {"introducedVersion": "0.3.0", "default_results": {"allowed": true, "env_list": null, "allow_stdio_access": false}},
15-
"shutdown_container": {"introducedVersion": "0.4.0", "default_results": {"allowed": true}},
16-
"signal_container_process": {"introducedVersion": "0.5.0", "default_results": {"allowed": true}},
17-
"plan9_mount": {"introducedVersion": "0.6.0", "default_results": {"allowed": true}},
18-
"plan9_unmount": {"introducedVersion": "0.6.0", "default_results": {"allowed": true}},
19-
"get_properties": {"introducedVersion": "0.7.0", "default_results": {"allowed": true}},
20-
"dump_stacks": {"introducedVersion": "0.7.0", "default_results": {"allowed": true}},
21-
"runtime_logging": {"introducedVersion": "0.8.0", "default_results": {"allowed": true}},
22-
"load_fragment": {"introducedVersion": "0.9.0", "default_results": {"allowed": false, "add_module": false}},
23-
"scratch_mount": {"introducedVersion": "0.10.0", "default_results": {"allowed": true}},
24-
"scratch_unmount": {"introducedVersion": "0.10.0", "default_results": {"allowed": true}},
6+
"mount_device": {"introducedVersion": "0.1.0", "default_results": {"allowed": false}, "use_framework": false},
7+
"rw_mount_device": {"introducedVersion": "0.11.0", "default_results": {}, "use_framework": true},
8+
"mount_overlay": {"introducedVersion": "0.1.0", "default_results": {"allowed": false}, "use_framework": false},
9+
"mount_cims": {"introducedVersion": "0.11.0", "default_results": {"allowed": false}, "use_framework": false},
10+
"registry_changes": {"introducedVersion": "0.10.0", "default_results": {"allowed": false}, "use_framework": false},
11+
"create_container": {"introducedVersion": "0.1.0", "default_results": {"allowed": false, "env_list": null, "allow_stdio_access": false}, "use_framework": false},
12+
"unmount_device": {"introducedVersion": "0.2.0", "default_results": {"allowed": true}, "use_framework": false},
13+
"rw_unmount_device": {"introducedVersion": "0.11.0", "default_results": {}, "use_framework": true},
14+
"unmount_overlay": {"introducedVersion": "0.6.0", "default_results": {"allowed": true}, "use_framework": false},
15+
"exec_in_container": {"introducedVersion": "0.2.0", "default_results": {"allowed": true, "env_list": null}, "use_framework": false},
16+
"exec_external": {"introducedVersion": "0.3.0", "default_results": {"allowed": true, "env_list": null, "allow_stdio_access": false}, "use_framework": false},
17+
"shutdown_container": {"introducedVersion": "0.4.0", "default_results": {"allowed": true}, "use_framework": false},
18+
"signal_container_process": {"introducedVersion": "0.5.0", "default_results": {"allowed": true}, "use_framework": false},
19+
"plan9_mount": {"introducedVersion": "0.6.0", "default_results": {"allowed": true}, "use_framework": false},
20+
"plan9_unmount": {"introducedVersion": "0.6.0", "default_results": {"allowed": true}, "use_framework": false},
21+
"get_properties": {"introducedVersion": "0.7.0", "default_results": {"allowed": true}, "use_framework": false},
22+
"dump_stacks": {"introducedVersion": "0.7.0", "default_results": {"allowed": true}, "use_framework": false},
23+
"runtime_logging": {"introducedVersion": "0.8.0", "default_results": {"allowed": true}, "use_framework": false},
24+
"load_fragment": {"introducedVersion": "0.9.0", "default_results": {"allowed": false, "add_module": false}, "use_framework": false},
25+
"scratch_mount": {"introducedVersion": "0.10.0", "default_results": {"allowed": true}, "use_framework": false},
26+
"scratch_unmount": {"introducedVersion": "0.10.0", "default_results": {"allowed": true}, "use_framework": false},
2527
}

pkg/securitypolicy/api_test.rego

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ package api
33
version := "0.0.2"
44

55
enforcement_points := {
6-
"__fixture_for_future_test__": {"introducedVersion": "100.0.0", "default_results": {"allowed": true}},
7-
"__fixture_for_allowed_test_true__": {"introducedVersion": "0.0.2", "default_results": {"allowed": true}},
8-
"__fixture_for_allowed_test_false__": {"introducedVersion": "0.0.2", "default_results": {"allowed": false}},
9-
"__fixture_for_allowed_extra__": {"introducedVersion": "0.0.1", "default_results": {"allowed": false, "__test__": "test"}}
6+
"__fixture_for_future_test__": {"introducedVersion": "100.0.0", "default_results": {"allowed": true}, "use_framework": false},
7+
"__fixture_for_allowed_test_true__": {"introducedVersion": "0.0.2", "default_results": {"allowed": true}, "use_framework": false},
8+
"__fixture_for_allowed_test_false__": {"introducedVersion": "0.0.2", "default_results": {"allowed": false}, "use_framework": false},
9+
"__fixture_for_allowed_extra__": {"introducedVersion": "0.0.1", "default_results": {"allowed": false, "__test__": "test"}, "use_framework": false}
1010
}

0 commit comments

Comments
 (0)