diff --git a/compute/src/main/java/org/zstack/compute/vm/VmGlobalConfig.java b/compute/src/main/java/org/zstack/compute/vm/VmGlobalConfig.java index dc39bce94f..5f18eb7fd3 100755 --- a/compute/src/main/java/org/zstack/compute/vm/VmGlobalConfig.java +++ b/compute/src/main/java/org/zstack/compute/vm/VmGlobalConfig.java @@ -52,6 +52,10 @@ public class VmGlobalConfig { @GlobalConfigValidation(validValues = {"true", "false"}) @BindResourceConfig(value = {VmInstanceVO.class}) public static GlobalConfig VM_PORT_OFF = new GlobalConfig(CATEGORY, "vmPortOff"); + @GlobalConfigDef(defaultValue = "false", type = Boolean.class, description = "enable PMU for VM, disabled by default on aarch64 to avoid Kunpeng-920 kernel panic") + @GlobalConfigValidation(validValues = {"true", "false"}) + @BindResourceConfig(value = {VmInstanceVO.class, ClusterVO.class}) + public static GlobalConfig VM_PMU = new GlobalConfig(CATEGORY, "vm.pmu"); @GlobalConfigValidation(validValues = {"true", "false"}) @BindResourceConfig(value = {VmInstanceVO.class, ClusterVO.class}) public static GlobalConfig EMULATE_HYPERV = new GlobalConfig(CATEGORY, "emulateHyperV"); diff --git a/plugin/kvm/src/main/java/org/zstack/kvm/KVMAgentCommands.java b/plugin/kvm/src/main/java/org/zstack/kvm/KVMAgentCommands.java index 2a36bb5aba..36c5d35ecd 100755 --- a/plugin/kvm/src/main/java/org/zstack/kvm/KVMAgentCommands.java +++ b/plugin/kvm/src/main/java/org/zstack/kvm/KVMAgentCommands.java @@ -2312,6 +2312,8 @@ public static class StartVmCmd extends vdiCmd implements VmAddOnsCmd { private boolean consoleLogToFile; @GrayVersion(value = "5.0.0") private boolean acpi; + @GrayVersion(value = "5.5.12") + private boolean pmu = true; @GrayVersion(value = "5.0.0") private boolean x2apic = true; // cpuid hypervisor feature @@ -2835,6 +2837,14 @@ public void setAcpi(boolean acpi) { this.acpi = acpi; } + public boolean isPmu() { + return pmu; + } + + public void setPmu(boolean pmu) { + this.pmu = pmu; + } + public boolean getX2apic() { return x2apic; } diff --git a/plugin/kvm/src/main/java/org/zstack/kvm/KVMHost.java b/plugin/kvm/src/main/java/org/zstack/kvm/KVMHost.java index b8b38d1b80..96c2bd7bfa 100755 --- a/plugin/kvm/src/main/java/org/zstack/kvm/KVMHost.java +++ b/plugin/kvm/src/main/java/org/zstack/kvm/KVMHost.java @@ -66,6 +66,7 @@ import org.zstack.header.message.MessageReply; import org.zstack.header.message.NeedReplyMessage; import org.zstack.header.network.l2.*; +import org.zstack.header.os.OSArchitecture; import org.zstack.header.network.l3.L3NetworkInventory; import org.zstack.header.network.l3.L3NetworkVO; import org.zstack.header.rest.JsonAsyncRESTCallback; @@ -4573,6 +4574,14 @@ protected void startVm(final VmInstanceSpec spec, final NeedReplyMessage msg, fi cmd.setCreatePaused(true); } cmd.setAcpi(true); + // aarch64: disable PMU by default to avoid kernel panic on new Kunpeng-920 (7270Z/5230Z) + // where PMMIR_EL1 register is not supported by KVM. See ZSTAC-76375 + // GlobalConfig vm.pmu defaults to false; users can re-enable via ResourceConfig. + if (OSArchitecture.AARCH64.normalizedArchName().equals(architecture)) { + Boolean pmuEnabled = rcf.getResourceConfigValue( + VmGlobalConfig.VM_PMU, spec.getVmInventory().getUuid(), Boolean.class); + cmd.setPmu(Boolean.TRUE.equals(pmuEnabled)); + } GuestOsCharacter.Config config = GuestOsHelper.getInstance().getGuestOsCharacter( spec.getVmInventory().getArchitecture(), diff --git a/test/src/test/groovy/org/zstack/test/integration/kvm/vm/VmPmuConfigCase.groovy b/test/src/test/groovy/org/zstack/test/integration/kvm/vm/VmPmuConfigCase.groovy new file mode 100644 index 0000000000..5fa2bd9400 --- /dev/null +++ b/test/src/test/groovy/org/zstack/test/integration/kvm/vm/VmPmuConfigCase.groovy @@ -0,0 +1,194 @@ +package org.zstack.test.integration.kvm.vm + +import org.springframework.http.HttpEntity +import org.zstack.compute.vm.VmGlobalConfig +import org.zstack.kvm.KVMAgentCommands +import org.zstack.kvm.KVMConstant +import org.zstack.sdk.GlobalConfigInventory +import org.zstack.sdk.VmInstanceInventory +import org.zstack.test.integration.kvm.KvmTest +import org.zstack.testlib.EnvSpec +import org.zstack.testlib.SubCase +import org.zstack.utils.data.SizeUnit +import org.zstack.utils.gson.JSONObjectUtil + +/** + * Test VM PMU configuration. + * See ZSTAC-76375: Kunpeng-920 7270Z kernel panic due to PMMIR_EL1. + */ +class VmPmuConfigCase extends SubCase { + EnvSpec env + + @Override + void clean() { + env.delete() + } + + @Override + void setup() { + useSpring(KvmTest.springSpec) + } + + @Override + void environment() { + env = env { + instanceOffering { + name = "instanceOffering" + memory = SizeUnit.GIGABYTE.toByte(2) + cpu = 1 + } + + sftpBackupStorage { + name = "sftp" + url = "/sftp" + username = "root" + password = "password" + hostname = "localhost" + + image { + name = "image1" + url = "http://zstack.org/download/test.qcow2" + } + } + + zone { + name = "zone" + description = "test" + + cluster { + name = "cluster" + hypervisorType = "KVM" + + kvm { + name = "kvm" + managementIp = "localhost" + username = "root" + password = "password" + } + + attachPrimaryStorage("local") + attachL2Network("l2") + } + + attachBackupStorage("sftp") + + localPrimaryStorage { + name = "local" + url = "/local_ps" + } + + l2NoVlanNetwork { + name = "l2" + physicalInterface = "eth0" + + l3Network { + name = "l3" + + ip { + startIp = "192.168.100.10" + endIp = "192.168.100.100" + netmask = "255.255.255.0" + gateway = "192.168.100.1" + } + } + } + } + } + } + + @Override + void test() { + env.create() + testPmuGlobalConfigExists() + testPmuDefaultOnX86() + testPmuResourceConfigOverride() + } + + void testPmuGlobalConfigExists() { + def configs = queryGlobalConfig { + conditions = ["category=${VmGlobalConfig.CATEGORY}", "name=${VmGlobalConfig.VM_PMU.name}"] + } + + assert configs.size() == 1 : "vm.pmu GlobalConfig should exist" + def config = configs[0] as GlobalConfigInventory + assert config.defaultValue == "false" : "vm.pmu should default to false" + } + + void testPmuDefaultOnX86() { + def image = env.inventoryByName("image1") + def l3 = env.inventoryByName("l3") + def instance = env.inventoryByName("instanceOffering") + + KVMAgentCommands.StartVmCmd startCmd = null + env.afterSimulator(KVMConstant.KVM_START_VM_PATH) { KVMAgentCommands.StartVmResponse rsp, HttpEntity e -> + startCmd = JSONObjectUtil.toObject(e.body, KVMAgentCommands.StartVmCmd.class) + return rsp + } + + def vm = createVmInstance { + name = "test-pmu-x86" + imageUuid = image.uuid + l3NetworkUuids = [l3.uuid] + instanceOfferingUuid = instance.uuid + } as VmInstanceInventory + + assert startCmd != null + // On x86 (non-aarch64), PMU code path is not triggered, + // so StartVmCmd.pmu stays at its field default (true) + if ("x86_64".equals(vm.architecture) || vm.architecture == null) { + assert startCmd.pmu == true : "x86 VM should have PMU enabled by default" + } else if ("aarch64".equals(vm.architecture)) { + assert startCmd.pmu == false : "aarch64 VM should have PMU disabled by default" + } + + destroyVmInstance { uuid = vm.uuid } + expungeVmInstance { uuid = vm.uuid } + } + + void testPmuResourceConfigOverride() { + def image = env.inventoryByName("image1") + def l3 = env.inventoryByName("l3") + def instance = env.inventoryByName("instanceOffering") + + def vm = createVmInstance { + name = "test-pmu-override" + imageUuid = image.uuid + l3NetworkUuids = [l3.uuid] + instanceOfferingUuid = instance.uuid + } as VmInstanceInventory + + // Set vm.pmu=true via ResourceConfig (different from default false) + updateResourceConfig { + category = VmGlobalConfig.CATEGORY + name = VmGlobalConfig.VM_PMU.name + value = "true" + resourceUuid = vm.uuid + } + + KVMAgentCommands.StartVmCmd startCmd = null + env.afterSimulator(KVMConstant.KVM_START_VM_PATH) { KVMAgentCommands.StartVmResponse rsp, HttpEntity e -> + startCmd = JSONObjectUtil.toObject(e.body, KVMAgentCommands.StartVmCmd.class) + return rsp + } + + rebootVmInstance { uuid = vm.uuid } + + assert startCmd != null + // On x86, PMU stays true regardless of ResourceConfig (code only reads for aarch64) + // This verifies the ResourceConfig record exists and reboot doesn't crash + assert startCmd.pmu == true : "PMU should be true after reboot on x86" + + // Verify ResourceConfig was persisted + def configs = queryResourceConfig { + conditions = [ + "category=${VmGlobalConfig.CATEGORY}", + "name=${VmGlobalConfig.VM_PMU.name}", + "resourceUuid=${vm.uuid}" + ] + } + assert configs.size() == 1 : "ResourceConfig should be persisted" + + destroyVmInstance { uuid = vm.uuid } + expungeVmInstance { uuid = vm.uuid } + } +}