diff --git a/src/arch/src/aarch64/layout.rs b/src/arch/src/aarch64/layout.rs index 401925d6e..1d0303924 100644 --- a/src/arch/src/aarch64/layout.rs +++ b/src/arch/src/aarch64/layout.rs @@ -75,7 +75,7 @@ pub const FDT_MAX_SIZE: usize = 0x20_0000; pub const IRQ_BASE: u32 = 32; /// Last usable interrupt on aarch64. -pub const IRQ_MAX: u32 = 159; +pub const IRQ_MAX: u32 = 223; /// Timer interrupts pub const GTIMER_SEC: u32 = 13; diff --git a/src/arch/src/lib.rs b/src/arch/src/lib.rs index 10f4de073..ce5bb8479 100644 --- a/src/arch/src/lib.rs +++ b/src/arch/src/lib.rs @@ -50,8 +50,8 @@ pub mod x86_64; #[cfg(target_arch = "x86_64")] pub use crate::x86_64::{ arch_memory_regions, configure_system, layout::CMDLINE_MAX_SIZE, layout::FIRMWARE_SIZE, - layout::FIRMWARE_START, layout::IRQ_BASE, layout::IRQ_MAX, layout::MMIO_MEM_START, - layout::RESET_VECTOR, Error, + layout::FIRMWARE_START, layout::IRQ_BASE, layout::IRQ_MAX, layout::IRQ_MAX_SPLIT, + layout::MMIO_MEM_START, layout::RESET_VECTOR, Error, }; /// Type for returning public functions outcome. diff --git a/src/arch/src/x86_64/layout.rs b/src/arch/src/x86_64/layout.rs index c626d68d7..488fe8dc0 100644 --- a/src/arch/src/x86_64/layout.rs +++ b/src/arch/src/x86_64/layout.rs @@ -25,8 +25,12 @@ pub const HIMEM_START: u64 = 0x0010_0000; //1 MB. // Typically, on x86 systems 16 IRQs are used (0-15). /// First usable IRQ ID for virtio device interrupts on x86_64. pub const IRQ_BASE: u32 = 5; -/// Last usable IRQ ID for virtio device interrupts on x86_64. +/// Last usable IRQ ID for virtio device interrupts on x86_64 when using +/// KVM's in-kernel IOAPIC (hardcoded to 24 pins by KVM_IOAPIC_NUM_PINS). pub const IRQ_MAX: u32 = 15; +/// Last usable IRQ ID when using the userspace split irqchip, which +/// emulates an IOAPIC with `IOAPIC_NUM_PINS` redirection entries. +pub const IRQ_MAX_SPLIT: u32 = 223; /// Address for the TSS setup. pub const KVM_TSS_ADDRESS: u64 = 0xfffb_d000; diff --git a/src/cpuid/src/common.rs b/src/cpuid/src/common.rs index b09232edb..80fab58fe 100644 --- a/src/cpuid/src/common.rs +++ b/src/cpuid/src/common.rs @@ -2,9 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 #[cfg(target_arch = "x86")] -use std::arch::x86::{CpuidResult, __cpuid_count, __get_cpuid_max}; +use std::arch::x86::{__cpuid_count, __get_cpuid_max, CpuidResult}; #[cfg(target_arch = "x86_64")] -use std::arch::x86_64::{CpuidResult, __cpuid_count, __get_cpuid_max}; +use std::arch::x86_64::{__cpuid_count, __get_cpuid_max, CpuidResult}; use crate::cpu_leaf::*; diff --git a/src/devices/src/legacy/ioapic.rs b/src/devices/src/legacy/ioapic.rs index af752c906..f586dec2b 100644 --- a/src/devices/src/legacy/ioapic.rs +++ b/src/devices/src/legacy/ioapic.rs @@ -17,7 +17,7 @@ use crate::Error as DeviceError; const IOAPIC_BASE: u32 = 0xfec0_0000; const APIC_DEFAULT_ADDRESS: u32 = 0xfee0_0000; -const IOAPIC_NUM_PINS: usize = 24; +const IOAPIC_NUM_PINS: usize = 256; const IO_REG_SEL: u64 = 0x00; const IO_WIN: u64 = 0x10; @@ -114,7 +114,9 @@ impl IoApic { cap: KVM_CAP_SPLIT_IRQCHIP, ..Default::default() }; - cap.args[0] = 24; + // args[0] is the number of GSIs reserved for the userspace IOAPIC; + // must match the emulated IOAPIC's pin count. + cap.args[0] = IOAPIC_NUM_PINS as u64; vm.enable_cap(&cap)?; } diff --git a/src/devices/src/legacy/kvmgicv2.rs b/src/devices/src/legacy/kvmgicv2.rs index fcf764f8e..76fe2c031 100644 --- a/src/devices/src/legacy/kvmgicv2.rs +++ b/src/devices/src/legacy/kvmgicv2.rs @@ -57,7 +57,9 @@ impl KvmGicV2 { }; device_fd.set_device_attr(&attr).unwrap(); - let nr_irqs: u32 = arch::aarch64::layout::IRQ_MAX - arch::aarch64::layout::IRQ_BASE + 1; + // GIC nr_irqs includes 32 private interrupts (SGIs + PPIs), so we need + // the SPI count plus 32, rounded up to a multiple of 32. + let nr_irqs: u32 = (arch::aarch64::layout::IRQ_MAX + 1).div_ceil(32) * 32; let nr_irqs_ptr = &nr_irqs as *const u32; let attr = kvm_bindings::kvm_device_attr { group: kvm_bindings::KVM_DEV_ARM_VGIC_GRP_NR_IRQS, diff --git a/src/devices/src/legacy/kvmgicv3.rs b/src/devices/src/legacy/kvmgicv3.rs index a25bbb330..8c50319e4 100644 --- a/src/devices/src/legacy/kvmgicv3.rs +++ b/src/devices/src/legacy/kvmgicv3.rs @@ -57,7 +57,9 @@ impl KvmGicV3 { }; device_fd.set_device_attr(&attr)?; - let nr_irqs: u32 = arch::aarch64::layout::IRQ_MAX - arch::aarch64::layout::IRQ_BASE + 1; + // GIC nr_irqs includes 32 private interrupts (SGIs + PPIs), so we need + // the SPI count plus 32, rounded up to a multiple of 32. + let nr_irqs: u32 = (arch::aarch64::layout::IRQ_MAX + 1).div_ceil(32) * 32; let nr_irqs_ptr = &nr_irqs as *const u32; let attr = kvm_bindings::kvm_device_attr { group: kvm_bindings::KVM_DEV_ARM_VGIC_GRP_NR_IRQS, diff --git a/src/devices/src/virtio/snd/worker.rs b/src/devices/src/virtio/snd/worker.rs index b269c9503..e7950d557 100644 --- a/src/devices/src/virtio/snd/worker.rs +++ b/src/devices/src/virtio/snd/worker.rs @@ -586,7 +586,7 @@ impl SndWorker { { self.streams.write().unwrap()[stream_id as usize] .buffers - .extend(std::mem::take(&mut buffers).into_iter()); + .extend(std::mem::take(&mut buffers)); state = IoState::Done; } IoState::Ready if descriptor.len as usize != size_of::() => { diff --git a/src/krun/src/api/builder.rs b/src/krun/src/api/builder.rs index fb7387d1c..6a3319a41 100644 --- a/src/krun/src/api/builder.rs +++ b/src/krun/src/api/builder.rs @@ -332,6 +332,7 @@ impl VmBuilder { vmr.set_vm_config(&vm_config) .map_err(|err| map_vm_config_error(&self.machine, err))?; vmr.nested_enabled = self.machine.nested_virt; + vmr.split_irqchip = self.machine.split_irqchip; // Apply filesystem configuration #[cfg(not(feature = "tee"))] diff --git a/src/krun/src/api/builders.rs b/src/krun/src/api/builders.rs index c2dfb6e52..49315fecc 100644 --- a/src/krun/src/api/builders.rs +++ b/src/krun/src/api/builders.rs @@ -42,6 +42,7 @@ pub struct MachineBuilder { pub(crate) memory_mib: usize, pub(crate) hyperthreading: bool, pub(crate) nested_virt: bool, + pub(crate) split_irqchip: bool, } //-------------------------------------------------------------------------------------------------- @@ -296,6 +297,7 @@ impl MachineBuilder { memory_mib: 512, hyperthreading: false, nested_virt: false, + split_irqchip: false, } } @@ -322,6 +324,19 @@ impl MachineBuilder { self.nested_virt = enabled; self } + + /// Enable the userspace split irqchip on x86_64. + /// + /// The default in-kernel IOAPIC is hardcoded by KVM to 24 pins, leaving + /// only 11 IRQs for virtio-mmio devices. The userspace split irqchip + /// emulates a larger IOAPIC (256 pins) and raises the usable range to + /// 219 IRQs, which is needed for VMs with many virtio-mmio devices + /// (e.g. lots of virtio-fs mounts or block devices). No effect on + /// aarch64 or riscv64. + pub fn split_irqchip(mut self, enabled: bool) -> Self { + self.split_irqchip = enabled; + self + } } impl Default for MachineBuilder { diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 000e88d61..d053cb263 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -33,15 +33,13 @@ use crate::vmm_config::net::NetBuilder; use devices::legacy::Cmos; #[cfg(all(target_os = "linux", target_arch = "riscv64"))] use devices::legacy::KvmAia; -#[cfg(target_arch = "x86_64")] -use devices::legacy::KvmIoapic; use devices::legacy::Serial; #[cfg(target_os = "macos")] use devices::legacy::VcpuList; #[cfg(target_os = "macos")] use devices::legacy::{GicV3, HvfGicV3}; #[cfg(target_arch = "x86_64")] -use devices::legacy::{IoApic, IrqChipT}; +use devices::legacy::{IoApic, IrqChipT, KvmIoapic}; use devices::legacy::{IrqChip, IrqChipDevice}; #[cfg(all(target_os = "linux", target_arch = "aarch64"))] use devices::legacy::{KvmGicV2, KvmGicV3}; @@ -801,10 +799,18 @@ pub fn build_microvm( // Instantiate the MMIO device manager. // 'mmio_base' address has to be an address which is protected by the kernel // and is architectural specific. + #[cfg(target_arch = "x86_64")] + let irq_max = if vm_resources.split_irqchip { + arch::IRQ_MAX_SPLIT + } else { + arch::IRQ_MAX + }; + #[cfg(not(target_arch = "x86_64"))] + let irq_max = arch::IRQ_MAX; #[allow(unused_mut)] let mut mmio_device_manager = MMIODeviceManager::new( &mut (arch::MMIO_MEM_START.clone()), - (arch::IRQ_BASE, arch::IRQ_MAX), + (arch::IRQ_BASE, irq_max), ); #[cfg(target_os = "macos")] @@ -819,6 +825,8 @@ pub fn build_microvm( // while on aarch64 we need to do it the other way around. #[cfg(target_arch = "x86_64")] { + // Userspace split irqchip is required for >11 virtio IRQs, since KVM's + // in-kernel IOAPIC is hardcoded at 24 pins (KVM_IOAPIC_NUM_PINS). let ioapic: Box = if vm_resources.split_irqchip { Box::new( IoApic::new(vm.fd(), _sender.clone()) diff --git a/src/vmm/src/device_manager/kvm/mmio.rs b/src/vmm/src/device_manager/kvm/mmio.rs index 8e97aedc4..4dbb93f46 100644 --- a/src/vmm/src/device_manager/kvm/mmio.rs +++ b/src/vmm/src/device_manager/kvm/mmio.rs @@ -443,7 +443,7 @@ mod tests { #[cfg(target_arch = "aarch64")] let _gic = KvmGicV3::new(vm.fd(), 1).unwrap(); - let mut cmdline = kernel_cmdline::Cmdline::new(4096); + let mut cmdline = kernel_cmdline::Cmdline::new(16384); for _i in arch::IRQ_BASE..=arch::IRQ_MAX { device_manager