@@ -14,6 +14,7 @@ import (
1414 "os/exec"
1515 "path"
1616 "path/filepath"
17+ "regexp"
1718 "strings"
1819 "sync"
1920 "syscall"
@@ -40,6 +41,7 @@ import (
4041 "github.com/Microsoft/hcsshim/internal/guest/storage/pmem"
4142 "github.com/Microsoft/hcsshim/internal/guest/storage/scsi"
4243 "github.com/Microsoft/hcsshim/internal/guest/transport"
44+ "github.com/Microsoft/hcsshim/internal/guestpath"
4345 "github.com/Microsoft/hcsshim/internal/log"
4446 "github.com/Microsoft/hcsshim/internal/logfields"
4547 "github.com/Microsoft/hcsshim/internal/oci"
@@ -54,6 +56,23 @@ import (
5456// for V2 where the specific message is targeted at the UVM itself.
5557const UVMContainerID = "00000000-0000-0000-0000-000000000000"
5658
59+ // Prevent path traversal via malformed container / sandbox IDs. Container IDs
60+ // can be either UVMContainerID, or a 64 character hex string. This is also used
61+ // to check that sandbox IDs (which is also used in paths) are valid, which has
62+ // the same format.
63+ const validContainerIDRegexRaw = `[0-9a-fA-F]{64}`
64+
65+ var validContainerIDRegex = regexp .MustCompile ("^" + validContainerIDRegexRaw + "$" )
66+
67+ // idType just changes the error message
68+ func checkValidContainerID (id string , idType string ) error {
69+ if id == UVMContainerID || validContainerIDRegex .MatchString (id ) {
70+ return nil
71+ }
72+
73+ return errors .Errorf ("invalid %s id: %s (must match %s)" , idType , id , validContainerIDRegex .String ())
74+ }
75+
5776// VirtualPod represents a virtual pod that shares a UVM/Sandbox with other pods
5877type VirtualPod struct {
5978 VirtualSandboxID string
@@ -245,12 +264,68 @@ func setupSandboxLogDir(sandboxID, virtualSandboxID string) error {
245264// TODO: unify workload and standalone logic for non-sandbox features (e.g., block devices, huge pages, uVM mounts)
246265// TODO(go1.24): use [os.Root] instead of `!strings.HasPrefix(<path>, <root>)`
247266
267+ // Returns whether this host has a security policy set, i.e. if it's running
268+ // confidential containers.
269+ func (h * Host ) HasSecurityPolicy () bool {
270+ return len (h .securityOptions .PolicyEnforcer .EncodedSecurityPolicy ()) > 0
271+ }
272+
273+ // For confidential containers, make sure that the host can't use unexpected
274+ // bundle paths / scratch dir / rootfs
275+ func checkContainerSettings (sandboxID , containerID string , settings * prot.VMHostedContainerSettingsV2 ) error {
276+ if settings .OCISpecification == nil {
277+ return errors .Errorf ("OCISpecification is nil" )
278+ }
279+ if settings .OCISpecification .Root == nil {
280+ return errors .Errorf ("OCISpecification.Root is nil" )
281+ }
282+
283+ // matches with CreateContainer / createLinuxContainerDocument in internal/hcsoci
284+ containerRootInUVM := path .Join (guestpath .LCOWRootPrefixInUVM , containerID )
285+ if settings .OCIBundlePath != containerRootInUVM {
286+ return errors .Errorf ("OCIBundlePath %q must equal expected %q" ,
287+ settings .OCIBundlePath , containerRootInUVM )
288+ }
289+ expectedContainerRootfs := path .Join (containerRootInUVM , guestpath .RootfsPath )
290+ if settings .OCISpecification .Root .Path != expectedContainerRootfs {
291+ return errors .Errorf ("OCISpecification.Root.Path %q must equal expected %q" ,
292+ settings .OCISpecification .Root .Path , expectedContainerRootfs )
293+ }
294+
295+ // matches with MountLCOWLayers
296+ scratchDirPath := settings .ScratchDirPath
297+ expectedScratchDirPathNonShared := path .Join (containerRootInUVM , guestpath .ScratchDir , containerID )
298+ expectedScratchDirPathShared := path .Join (guestpath .LCOWRootPrefixInUVM , sandboxID , guestpath .ScratchDir , containerID )
299+ if scratchDirPath != expectedScratchDirPathNonShared &&
300+ scratchDirPath != expectedScratchDirPathShared {
301+ return errors .Errorf ("ScratchDirPath %q must be either %q or %q" ,
302+ scratchDirPath , expectedScratchDirPathNonShared , expectedScratchDirPathShared )
303+ }
304+
305+ if settings .OCISpecification .Hooks != nil {
306+ return errors .Errorf ("OCISpecification.Hooks must be nil." )
307+ }
308+
309+ return nil
310+ }
311+
248312func (h * Host ) CreateContainer (ctx context.Context , id string , settings * prot.VMHostedContainerSettingsV2 ) (_ * Container , err error ) {
249313 criType , isCRI := settings .OCISpecification .Annotations [annotations .KubernetesContainerType ]
250314
251315 // Check for virtual pod annotation
252316 virtualPodID , isVirtualPod := settings .OCISpecification .Annotations [annotations .VirtualPodID ]
253317
318+ if h .HasSecurityPolicy () {
319+ if err = checkValidContainerID (id , "container" ); err != nil {
320+ return nil , err
321+ }
322+ if virtualPodID != "" {
323+ if err = checkValidContainerID (virtualPodID , "virtual pod" ); err != nil {
324+ return nil , err
325+ }
326+ }
327+ }
328+
254329 // Special handling for virtual pod sandbox containers:
255330 // The first container in a virtual pod (containerID == virtualPodID) should be treated as a sandbox
256331 // even if the CRI annotation might indicate otherwise due to host-side UVM setup differences
@@ -374,6 +449,11 @@ func (h *Host) CreateContainer(ctx context.Context, id string, settings *prot.VM
374449 case "container" :
375450 sid , ok := settings .OCISpecification .Annotations [annotations .KubernetesSandboxID ]
376451 sandboxID = sid
452+ if h .HasSecurityPolicy () {
453+ if err = checkValidContainerID (sid , "sandbox" ); err != nil {
454+ return nil , err
455+ }
456+ }
377457 if ! ok || sid == "" {
378458 return nil , errors .Errorf ("unsupported 'io.kubernetes.cri.sandbox-id': '%s'" , sid )
379459 }
@@ -383,7 +463,7 @@ func (h *Host) CreateContainer(ctx context.Context, id string, settings *prot.VM
383463
384464 // Add SEV device when security policy is not empty, except when privileged annotation is
385465 // set to "true", in which case all UVMs devices are added.
386- if len ( h . securityOptions . PolicyEnforcer . EncodedSecurityPolicy ()) > 0 && ! oci .ParseAnnotationsBool (ctx ,
466+ if h . HasSecurityPolicy () && ! oci .ParseAnnotationsBool (ctx ,
387467 settings .OCISpecification .Annotations , annotations .LCOWPrivileged , false ) {
388468 if err := specGuest .AddDevSev (ctx , settings .OCISpecification ); err != nil {
389469 log .G (ctx ).WithError (err ).Debug ("failed to add SEV device" )
@@ -429,6 +509,12 @@ func (h *Host) CreateContainer(ctx context.Context, id string, settings *prot.VM
429509 })
430510 }
431511
512+ if h .HasSecurityPolicy () {
513+ if err = checkContainerSettings (sandboxID , id , settings ); err != nil {
514+ return nil , err
515+ }
516+ }
517+
432518 user , groups , umask , err := h .securityOptions .PolicyEnforcer .GetUserInfo (settings .OCISpecification .Process , settings .OCISpecification .Root .Path )
433519 if err != nil {
434520 return nil , err
@@ -586,6 +672,12 @@ func writeSpecToFile(ctx context.Context, configFile string, spec *specs.Spec) e
586672}
587673
588674func (h * Host ) modifyHostSettings (ctx context.Context , containerID string , req * guestrequest.ModificationRequest ) (retErr error ) {
675+ if h .HasSecurityPolicy () {
676+ if err := checkValidContainerID (containerID , "container" ); err != nil {
677+ return err
678+ }
679+ }
680+
589681 switch req .ResourceType {
590682 case guestresource .ResourceTypeSCSIDevice :
591683 return modifySCSIDevice (ctx , req .RequestType , req .Settings .(* guestresource.SCSIDevice ))
@@ -670,6 +762,12 @@ func (h *Host) modifyHostSettings(ctx context.Context, containerID string, req *
670762}
671763
672764func (h * Host ) modifyContainerSettings (ctx context.Context , containerID string , req * guestrequest.ModificationRequest ) error {
765+ if h .HasSecurityPolicy () {
766+ if err := checkValidContainerID (containerID , "container" ); err != nil {
767+ return err
768+ }
769+ }
770+
673771 c , err := h .GetCreatedContainer (containerID )
674772 if err != nil {
675773 return err
@@ -1041,6 +1139,9 @@ func modifyMappedVirtualDisk(
10411139 if err != nil {
10421140 return err
10431141 }
1142+ if mvd .Filesystem != "" && mvd .Filesystem != "ext4" {
1143+ return errors .Errorf ("filesystem must be ext4 for read-only scsi mounts" )
1144+ }
10441145 }
10451146 }
10461147 switch rt {
@@ -1057,6 +1158,11 @@ func modifyMappedVirtualDisk(
10571158 if err != nil {
10581159 return errors .Wrapf (err , "mounting scsi device controller %d lun %d onto %s denied by policy" , mvd .Controller , mvd .Lun , mvd .MountPath )
10591160 }
1161+ } else {
1162+ err = securityPolicy .EnforceRWDeviceMountPolicy (ctx , mvd .MountPath , mvd .Encrypted , mvd .EnsureFilesystem , mvd .Filesystem )
1163+ if err != nil {
1164+ return errors .Wrapf (err , "mounting scsi device controller %d lun %d onto %s denied by policy" , mvd .Controller , mvd .Lun , mvd .MountPath )
1165+ }
10601166 }
10611167 config := & scsi.Config {
10621168 Encrypted : mvd .Encrypted ,
@@ -1075,6 +1181,10 @@ func modifyMappedVirtualDisk(
10751181 if err := securityPolicy .EnforceDeviceUnmountPolicy (ctx , mvd .MountPath ); err != nil {
10761182 return fmt .Errorf ("unmounting scsi device at %s denied by policy: %w" , mvd .MountPath , err )
10771183 }
1184+ } else {
1185+ if err := securityPolicy .EnforceRWDeviceUnmountPolicy (ctx , mvd .MountPath ); err != nil {
1186+ return fmt .Errorf ("unmounting scsi device at %s denied by policy: %w" , mvd .MountPath , err )
1187+ }
10781188 }
10791189 config := & scsi.Config {
10801190 Encrypted : mvd .Encrypted ,
@@ -1173,8 +1283,42 @@ func modifyCombinedLayers(
11731283 scratchEncrypted bool ,
11741284 securityPolicy securitypolicy.SecurityPolicyEnforcer ,
11751285) (err error ) {
1286+ isConfidential := len (securityPolicy .EncodedSecurityPolicy ()) > 0
1287+ containerID := cl .ContainerID
1288+
11761289 switch rt {
11771290 case guestrequest .RequestTypeAdd :
1291+ if isConfidential {
1292+ if err := checkValidContainerID (containerID , "container" ); err != nil {
1293+ return err
1294+ }
1295+
1296+ // We check this regardless of what the policy says, as long as we're in
1297+ // confidential mode. This matches with checkContainerSettings called for
1298+ // container creation request.
1299+ expectedContainerRootfs := path .Join (guestpath .LCOWRootPrefixInUVM , containerID , guestpath .RootfsPath )
1300+ if cl .ContainerRootPath != expectedContainerRootfs {
1301+ return fmt .Errorf ("combined layers target %q does not match expected path %q" ,
1302+ cl .ContainerRootPath , expectedContainerRootfs )
1303+ }
1304+
1305+ if cl .ScratchPath != "" {
1306+ // At this point, we do not know what the sandbox ID would be yet, so we
1307+ // have to allow anything reasonable.
1308+ scratchDirRegexStr := fmt .Sprintf (
1309+ "^%s/%s/%s/%s$" ,
1310+ guestpath .LCOWRootPrefixInUVM ,
1311+ validContainerIDRegexRaw ,
1312+ guestpath .ScratchDir ,
1313+ containerID ,
1314+ )
1315+ scratchDirRegex := regexp .MustCompile (scratchDirRegexStr )
1316+ if ! scratchDirRegex .MatchString (cl .ScratchPath ) {
1317+ return fmt .Errorf ("scratch path %q must match regex %q" ,
1318+ cl .ScratchPath , scratchDirRegexStr )
1319+ }
1320+ }
1321+ }
11781322 layerPaths := make ([]string , len (cl .Layers ))
11791323 for i , layer := range cl .Layers {
11801324 layerPaths [i ] = layer .Path
@@ -1195,12 +1339,14 @@ func modifyCombinedLayers(
11951339 }
11961340 }
11971341
1198- if err := securityPolicy .EnforceOverlayMountPolicy (ctx , cl . ContainerID , layerPaths , cl .ContainerRootPath ); err != nil {
1342+ if err := securityPolicy .EnforceOverlayMountPolicy (ctx , containerID , layerPaths , cl .ContainerRootPath ); err != nil {
11991343 return fmt .Errorf ("overlay creation denied by policy: %w" , err )
12001344 }
12011345
12021346 return overlay .MountLayer (ctx , layerPaths , upperdirPath , workdirPath , cl .ContainerRootPath , readonly )
12031347 case guestrequest .RequestTypeRemove :
1348+ // cl.ContainerID is not set on remove requests, but rego checks that we can
1349+ // only umount previously mounted targets anyway
12041350 if err := securityPolicy .EnforceOverlayUnmountPolicy (ctx , cl .ContainerRootPath ); err != nil {
12051351 return errors .Wrap (err , "overlay removal denied by policy" )
12061352 }
0 commit comments